1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/systm.h> 28 #include <sys/archsystm.h> 29 #include <sys/machparam.h> 30 #include <sys/machsystm.h> 31 #include <sys/cpu.h> 32 #include <sys/elf_SPARC.h> 33 #include <vm/hat_sfmmu.h> 34 #include <vm/seg_kpm.h> 35 #include <vm/page.h> 36 #include <vm/vm_dep.h> 37 #include <sys/cpuvar.h> 38 #include <sys/spitregs.h> 39 #include <sys/async.h> 40 #include <sys/cmn_err.h> 41 #include <sys/debug.h> 42 #include <sys/dditypes.h> 43 #include <sys/sunddi.h> 44 #include <sys/cpu_module.h> 45 #include <sys/prom_debug.h> 46 #include <sys/vmsystm.h> 47 #include <sys/prom_plat.h> 48 #include <sys/sysmacros.h> 49 #include <sys/intreg.h> 50 #include <sys/machtrap.h> 51 #include <sys/ontrap.h> 52 #include <sys/ivintr.h> 53 #include <sys/atomic.h> 54 #include <sys/panic.h> 55 #include <sys/ndifm.h> 56 #include <sys/fm/protocol.h> 57 #include <sys/fm/util.h> 58 #include <sys/fm/cpu/UltraSPARC-II.h> 59 #include <sys/ddi.h> 60 #include <sys/ecc_kstat.h> 61 #include <sys/watchpoint.h> 62 #include <sys/dtrace.h> 63 #include <sys/errclassify.h> 64 65 uint_t cpu_impl_dual_pgsz = 0; 66 67 /* 68 * Structure for the 8 byte ecache data dump and the associated AFSR state. 69 * There will be 8 of these structures used to dump an ecache line (64 bytes). 70 */ 71 typedef struct sf_ec_data_elm { 72 uint64_t ec_d8; 73 uint64_t ec_afsr; 74 } ec_data_t; 75 76 /* 77 * Define spitfire (Ultra I/II) specific asynchronous error structure 78 */ 79 typedef struct spitfire_async_flt { 80 struct async_flt cmn_asyncflt; /* common - see sun4u/sys/async.h */ 81 ushort_t flt_type; /* types of faults - cpu specific */ 82 ec_data_t flt_ec_data[8]; /* for E$ or mem dump/state */ 83 uint64_t flt_ec_tag; /* E$ tag info */ 84 int flt_ec_lcnt; /* number of bad E$ lines */ 85 ushort_t flt_sdbh; /* UDBH reg */ 86 ushort_t flt_sdbl; /* UDBL reg */ 87 } spitf_async_flt; 88 89 /* 90 * Prototypes for support routines in spitfire_asm.s: 91 */ 92 extern void flush_ecache(uint64_t physaddr, size_t size, size_t linesize); 93 extern uint64_t get_lsu(void); 94 extern void set_lsu(uint64_t ncc); 95 extern void get_ecache_dtag(uint32_t ecache_idx, uint64_t *data, uint64_t *tag, 96 uint64_t *oafsr, uint64_t *acc_afsr); 97 extern uint64_t check_ecache_line(uint32_t id, uint64_t *acc_afsr); 98 extern uint64_t get_ecache_tag(uint32_t id, uint64_t *nafsr, 99 uint64_t *acc_afsr); 100 extern uint64_t read_and_clear_afsr(); 101 extern void write_ec_tag_parity(uint32_t id); 102 extern void write_hb_ec_tag_parity(uint32_t id); 103 104 /* 105 * Spitfire module routines: 106 */ 107 static void cpu_async_log_err(void *flt); 108 /*PRINTFLIKE6*/ 109 static void cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt, 110 uint_t logflags, const char *endstr, const char *fmt, ...); 111 112 static void cpu_read_paddr(struct async_flt *aflt, short verbose, short ce_err); 113 static void cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum); 114 static void cpu_log_ecmem_info(spitf_async_flt *spf_flt); 115 116 static void log_ce_err(struct async_flt *aflt, char *unum); 117 static void log_ue_err(struct async_flt *aflt, char *unum); 118 static void check_misc_err(spitf_async_flt *spf_flt); 119 static ushort_t ecc_gen(uint_t high_bytes, uint_t low_bytes); 120 static int check_ecc(struct async_flt *aflt); 121 static uint_t get_cpu_status(uint64_t arg); 122 static uint64_t clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr); 123 static void scan_ecache(uint64_t *afar, ec_data_t *data, uint64_t *tag, 124 int *m, uint64_t *afsr); 125 static void ecache_kstat_init(struct cpu *cp); 126 static void ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag, 127 uint64_t paddr, int mpb, uint64_t); 128 static uint64_t ecache_scrub_misc_err(int, uint64_t); 129 static void ecache_scrub_tag_err(uint64_t, uchar_t, uint32_t); 130 static void ecache_page_retire(void *); 131 static int ecc_kstat_update(kstat_t *ksp, int rw); 132 static int ce_count_unum(int status, int len, char *unum); 133 static void add_leaky_bucket_timeout(void); 134 static int synd_to_synd_code(int synd_status, ushort_t synd); 135 136 extern uint_t read_all_memscrub; 137 extern void memscrub_run(void); 138 139 static uchar_t isus2i; /* set if sabre */ 140 static uchar_t isus2e; /* set if hummingbird */ 141 142 /* 143 * Default ecache mask and shift settings for Spitfire. If we detect a 144 * different CPU implementation, we will modify these values at boot time. 145 */ 146 static uint64_t cpu_ec_tag_mask = S_ECTAG_MASK; 147 static uint64_t cpu_ec_state_mask = S_ECSTATE_MASK; 148 static uint64_t cpu_ec_par_mask = S_ECPAR_MASK; 149 static int cpu_ec_par_shift = S_ECPAR_SHIFT; 150 static int cpu_ec_tag_shift = S_ECTAG_SHIFT; 151 static int cpu_ec_state_shift = S_ECSTATE_SHIFT; 152 static uchar_t cpu_ec_state_exl = S_ECSTATE_EXL; 153 static uchar_t cpu_ec_state_mod = S_ECSTATE_MOD; 154 static uchar_t cpu_ec_state_shr = S_ECSTATE_SHR; 155 static uchar_t cpu_ec_state_own = S_ECSTATE_OWN; 156 157 /* 158 * Default ecache state bits for Spitfire. These individual bits indicate if 159 * the given line is in any of the valid or modified states, respectively. 160 * Again, we modify these at boot if we detect a different CPU. 161 */ 162 static uchar_t cpu_ec_state_valid = S_ECSTATE_VALID; 163 static uchar_t cpu_ec_state_dirty = S_ECSTATE_DIRTY; 164 static uchar_t cpu_ec_parity = S_EC_PARITY; 165 static uchar_t cpu_ec_state_parity = S_ECSTATE_PARITY; 166 167 /* 168 * This table is used to determine which bit(s) is(are) bad when an ECC 169 * error occurrs. The array is indexed an 8-bit syndrome. The entries 170 * of this array have the following semantics: 171 * 172 * 00-63 The number of the bad bit, when only one bit is bad. 173 * 64 ECC bit C0 is bad. 174 * 65 ECC bit C1 is bad. 175 * 66 ECC bit C2 is bad. 176 * 67 ECC bit C3 is bad. 177 * 68 ECC bit C4 is bad. 178 * 69 ECC bit C5 is bad. 179 * 70 ECC bit C6 is bad. 180 * 71 ECC bit C7 is bad. 181 * 72 Two bits are bad. 182 * 73 Three bits are bad. 183 * 74 Four bits are bad. 184 * 75 More than Four bits are bad. 185 * 76 NO bits are bad. 186 * Based on "Galaxy Memory Subsystem SPECIFICATION" rev 0.6, pg. 28. 187 */ 188 189 #define C0 64 190 #define C1 65 191 #define C2 66 192 #define C3 67 193 #define C4 68 194 #define C5 69 195 #define C6 70 196 #define C7 71 197 #define M2 72 198 #define M3 73 199 #define M4 74 200 #define MX 75 201 #define NA 76 202 203 #define SYND_IS_SINGLE_BIT_DATA(synd_code) ((synd_code >= 0) && \ 204 (synd_code < C0)) 205 #define SYND_IS_SINGLE_BIT_CHK(synd_code) ((synd_code >= C0) && \ 206 (synd_code <= C7)) 207 208 static char ecc_syndrome_tab[] = 209 { 210 NA, C0, C1, M2, C2, M2, M2, M3, C3, M2, M2, M3, M2, M3, M3, M4, 211 C4, M2, M2, 32, M2, 57, MX, M2, M2, 37, 49, M2, 40, M2, M2, 44, 212 C5, M2, M2, 33, M2, 61, 4, M2, M2, MX, 53, M2, 45, M2, M2, 41, 213 M2, 0, 1, M2, 10, M2, M2, MX, 15, M2, M2, MX, M2, M3, M3, M2, 214 C6, M2, M2, 42, M2, 59, 39, M2, M2, MX, 51, M2, 34, M2, M2, 46, 215 M2, 25, 29, M2, 27, M4, M2, MX, 31, M2, M4, MX, M2, MX, MX, M2, 216 M2, MX, 36, M2, 7, M2, M2, 54, MX, M2, M2, 62, M2, 48, 56, M2, 217 M3, M2, M2, MX, M2, MX, 22, M2, M2, 18, MX, M2, M3, M2, M2, MX, 218 C7, M2, M2, 47, M2, 63, MX, M2, M2, 6, 55, M2, 35, M2, M2, 43, 219 M2, 5, MX, M2, MX, M2, M2, 50, 38, M2, M2, 58, M2, 52, 60, M2, 220 M2, 17, 21, M2, 19, M4, M2, MX, 23, M2, M4, MX, M2, MX, MX, M2, 221 M3, M2, M2, MX, M2, MX, 30, M2, M2, 26, MX, M2, M3, M2, M2, MX, 222 M2, 8, 13, M2, 2, M2, M2, M3, 3, M2, M2, M3, M2, MX, MX, M2, 223 M3, M2, M2, M3, M2, MX, 16, M2, M2, 20, MX, M2, MX, M2, M2, MX, 224 M3, M2, M2, M3, M2, MX, 24, M2, M2, 28, MX, M2, MX, M2, M2, MX, 225 M4, 12, 9, M2, 14, M2, M2, MX, 11, M2, M2, MX, M2, MX, MX, M4 226 }; 227 228 #define SYND_TBL_SIZE 256 229 230 /* 231 * Hack for determining UDBH/UDBL, for later cpu-specific error reporting. 232 * Cannot use bit 3 in afar, because it is a valid bit on a Sabre/Hummingbird. 233 */ 234 #define UDBL_REG 0x8000 235 #define UDBL(synd) ((synd & UDBL_REG) >> 15) 236 #define SYND(synd) (synd & 0x7FFF) 237 238 /* 239 * These error types are specific to Spitfire and are used internally for the 240 * spitfire fault structure flt_type field. 241 */ 242 #define CPU_UE_ERR 0 /* uncorrectable errors - UEs */ 243 #define CPU_EDP_LDP_ERR 1 /* LDP or EDP parity error */ 244 #define CPU_WP_ERR 2 /* WP parity error */ 245 #define CPU_BTO_BERR_ERR 3 /* bus timeout errors */ 246 #define CPU_PANIC_CP_ERR 4 /* cp error from panic polling */ 247 #define CPU_TRAPPING_CP_ERR 5 /* for sabre/hbird only, cp error */ 248 #define CPU_BADLINE_CI_ERR 6 /* E$ clean_bad line when idle */ 249 #define CPU_BADLINE_CB_ERR 7 /* E$ clean_bad line when busy */ 250 #define CPU_BADLINE_DI_ERR 8 /* E$ dirty_bad line when idle */ 251 #define CPU_BADLINE_DB_ERR 9 /* E$ dirty_bad line when busy */ 252 #define CPU_ORPHAN_CP_ERR 10 /* Orphan CP error */ 253 #define CPU_ECACHE_ADDR_PAR_ERR 11 /* Ecache Address parity error */ 254 #define CPU_ECACHE_STATE_ERR 12 /* Ecache state error */ 255 #define CPU_ECACHE_ETP_ETS_ERR 13 /* ETP set but ETS is zero */ 256 #define CPU_ECACHE_TAG_ERR 14 /* Scrub the E$ tag, if state clean */ 257 #define CPU_ADDITIONAL_ERR 15 /* Additional errors occurred */ 258 259 /* 260 * Macro to access the "Spitfire cpu private" data structure. 261 */ 262 #define CPU_PRIVATE_PTR(cp, x) (&(((spitfire_private_t *)CPU_PRIVATE(cp))->x)) 263 264 /* 265 * set to 0 to disable automatic retiring of pages on 266 * DIMMs that have excessive soft errors 267 */ 268 int automatic_page_removal = 1; 269 270 /* 271 * Heuristic for figuring out which module to replace. 272 * Relative likelihood that this P_SYND indicates that this module is bad. 273 * We call it a "score", though, not a relative likelihood. 274 * 275 * Step 1. 276 * Assign a score to each byte of P_SYND according to the following rules: 277 * If no bits on (0x00) or all bits on (0xFF), then give it a 5. 278 * If one bit on, give it a 95. 279 * If seven bits on, give it a 10. 280 * If two bits on: 281 * in different nybbles, a 90 282 * in same nybble, but unaligned, 85 283 * in same nybble and as an aligned pair, 80 284 * If six bits on, look at the bits that are off: 285 * in same nybble and as an aligned pair, 15 286 * in same nybble, but unaligned, 20 287 * in different nybbles, a 25 288 * If three bits on: 289 * in diferent nybbles, no aligned pairs, 75 290 * in diferent nybbles, one aligned pair, 70 291 * in the same nybble, 65 292 * If five bits on, look at the bits that are off: 293 * in the same nybble, 30 294 * in diferent nybbles, one aligned pair, 35 295 * in diferent nybbles, no aligned pairs, 40 296 * If four bits on: 297 * all in one nybble, 45 298 * as two aligned pairs, 50 299 * one aligned pair, 55 300 * no aligned pairs, 60 301 * 302 * Step 2: 303 * Take the higher of the two scores (one for each byte) as the score 304 * for the module. 305 * 306 * Print the score for each module, and field service should replace the 307 * module with the highest score. 308 */ 309 310 /* 311 * In the table below, the first row/column comment indicates the 312 * number of bits on in that nybble; the second row/column comment is 313 * the hex digit. 314 */ 315 316 static int 317 p_synd_score_table[256] = { 318 /* 0 1 1 2 1 2 2 3 1 2 2 3 2 3 3 4 */ 319 /* 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F */ 320 /* 0 0 */ 5, 95, 95, 80, 95, 85, 85, 65, 95, 85, 85, 65, 80, 65, 65, 45, 321 /* 1 1 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 322 /* 1 2 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 323 /* 2 3 */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15, 324 /* 1 4 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 325 /* 2 5 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 326 /* 2 6 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 327 /* 3 7 */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 328 /* 1 8 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 329 /* 2 9 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 330 /* 2 A */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 331 /* 3 B */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 332 /* 2 C */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15, 333 /* 3 D */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 334 /* 3 E */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 335 /* 4 F */ 45, 30, 30, 15, 30, 20, 20, 10, 30, 20, 20, 10, 15, 10, 10, 5, 336 }; 337 338 int 339 ecc_psynd_score(ushort_t p_synd) 340 { 341 int i, j, a, b; 342 343 i = p_synd & 0xFF; 344 j = (p_synd >> 8) & 0xFF; 345 346 a = p_synd_score_table[i]; 347 b = p_synd_score_table[j]; 348 349 return (a > b ? a : b); 350 } 351 352 /* 353 * Async Fault Logging 354 * 355 * To ease identifying, reading, and filtering async fault log messages, the 356 * label [AFT#] is now prepended to each async fault message. These messages 357 * and the logging rules are implemented by cpu_aflt_log(), below. 358 * 359 * [AFT0] - Tag for log messages that are associated with corrected ECC errors. 360 * This includes both corrected ECC memory and ecache faults. 361 * 362 * [AFT1] - Tag for log messages that are not ECC corrected (i.e. everything 363 * else except CE errors) with a priority of 1 (highest). This tag 364 * is also used for panic messages that result from an async fault. 365 * 366 * [AFT2] - These are lower priority diagnostic messages for uncorrected ECC 367 * [AFT3] or parity errors. For example, AFT2 is used for the actual dump 368 * of the E-$ data and tags. 369 * 370 * In a non-DEBUG kernel, AFT > 1 logs will be sent to the system log but not 371 * printed on the console. To send all AFT logs to both the log and the 372 * console, set aft_verbose = 1. 373 */ 374 375 #define CPU_FLTCPU 0x0001 /* print flt_inst as a CPU id */ 376 #define CPU_SPACE 0x0002 /* print flt_status (data or instr) */ 377 #define CPU_ERRID 0x0004 /* print flt_id */ 378 #define CPU_TL 0x0008 /* print flt_tl */ 379 #define CPU_ERRID_FIRST 0x0010 /* print flt_id first in message */ 380 #define CPU_AFSR 0x0020 /* print flt_stat as decoded %afsr */ 381 #define CPU_AFAR 0x0040 /* print flt_addr as %afar */ 382 #define CPU_AF_PSYND 0x0080 /* print flt_stat %afsr.PSYND */ 383 #define CPU_AF_ETS 0x0100 /* print flt_stat %afsr.ETS */ 384 #define CPU_UDBH 0x0200 /* print flt_sdbh and syndrome */ 385 #define CPU_UDBL 0x0400 /* print flt_sdbl and syndrome */ 386 #define CPU_FAULTPC 0x0800 /* print flt_pc */ 387 #define CPU_SYND 0x1000 /* print flt_synd and unum */ 388 389 #define CMN_LFLAGS (CPU_FLTCPU | CPU_SPACE | CPU_ERRID | CPU_TL | \ 390 CPU_AFSR | CPU_AFAR | CPU_AF_PSYND | \ 391 CPU_AF_ETS | CPU_UDBH | CPU_UDBL | \ 392 CPU_FAULTPC) 393 #define UE_LFLAGS (CMN_LFLAGS | CPU_SYND) 394 #define CE_LFLAGS (UE_LFLAGS & ~CPU_UDBH & ~CPU_UDBL & ~CPU_TL & \ 395 ~CPU_SPACE) 396 #define PARERR_LFLAGS (CMN_LFLAGS) 397 #define WP_LFLAGS (CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL) 398 #define CP_LFLAGS (CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL & \ 399 ~CPU_FLTCPU & ~CPU_FAULTPC) 400 #define BERRTO_LFLAGS (CMN_LFLAGS) 401 #define NO_LFLAGS (0) 402 403 #define AFSR_FMTSTR0 "\020\1ME" 404 #define AFSR_FMTSTR1 "\020\040PRIV\037ISAP\036ETP\035IVUE\034TO" \ 405 "\033BERR\032LDP\031CP\030WP\027EDP\026UE\025CE" 406 #define UDB_FMTSTR "\020\012UE\011CE" 407 408 /* 409 * Save the cache bootup state for use when internal 410 * caches are to be re-enabled after an error occurs. 411 */ 412 uint64_t cache_boot_state = 0; 413 414 /* 415 * PA[31:0] represent Displacement in UPA configuration space. 416 */ 417 uint_t root_phys_addr_lo_mask = 0xffffffff; 418 419 /* 420 * Spitfire legacy globals 421 */ 422 int itlb_entries; 423 int dtlb_entries; 424 425 void 426 cpu_setup(void) 427 { 428 extern int page_retire_messages; 429 extern int page_retire_first_ue; 430 extern int at_flags; 431 #if defined(SF_ERRATA_57) 432 extern caddr_t errata57_limit; 433 #endif 434 cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT); 435 436 at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1; 437 438 /* 439 * Spitfire isn't currently FMA-aware, so we have to enable the 440 * page retirement messages. We also change the default policy 441 * for UE retirement to allow clearing of transient errors. 442 */ 443 page_retire_messages = 1; 444 page_retire_first_ue = 0; 445 446 /* 447 * save the cache bootup state. 448 */ 449 cache_boot_state = get_lsu() & (LSU_IC | LSU_DC); 450 451 if (use_page_coloring) { 452 do_pg_coloring = 1; 453 } 454 455 /* 456 * Tune pp_slots to use up to 1/8th of the tlb entries. 457 */ 458 pp_slots = MIN(8, MAXPP_SLOTS); 459 460 /* 461 * Block stores invalidate all pages of the d$ so pagecopy 462 * et. al. do not need virtual translations with virtual 463 * coloring taken into consideration. 464 */ 465 pp_consistent_coloring = 0; 466 467 isa_list = 468 "sparcv9+vis sparcv9 " 469 "sparcv8plus+vis sparcv8plus " 470 "sparcv8 sparcv8-fsmuld sparcv7 sparc"; 471 472 cpu_hwcap_flags = AV_SPARC_VIS; 473 474 /* 475 * On Spitfire, there's a hole in the address space 476 * that we must never map (the hardware only support 44-bits of 477 * virtual address). Later CPUs are expected to have wider 478 * supported address ranges. 479 * 480 * See address map on p23 of the UltraSPARC 1 user's manual. 481 */ 482 hole_start = (caddr_t)0x80000000000ull; 483 hole_end = (caddr_t)0xfffff80000000000ull; 484 485 /* 486 * A spitfire call bug requires us to be a further 4Gbytes of 487 * firewall from the spec. 488 * 489 * See Spitfire Errata #21 490 */ 491 hole_start = (caddr_t)((uintptr_t)hole_start - (1ul << 32)); 492 hole_end = (caddr_t)((uintptr_t)hole_end + (1ul << 32)); 493 494 /* 495 * The kpm mapping window. 496 * kpm_size: 497 * The size of a single kpm range. 498 * The overall size will be: kpm_size * vac_colors. 499 * kpm_vbase: 500 * The virtual start address of the kpm range within the kernel 501 * virtual address space. kpm_vbase has to be kpm_size aligned. 502 */ 503 kpm_size = (size_t)(2ull * 1024 * 1024 * 1024 * 1024); /* 2TB */ 504 kpm_size_shift = 41; 505 kpm_vbase = (caddr_t)0xfffffa0000000000ull; /* 16EB - 6TB */ 506 507 /* 508 * All UltraSPARC platforms should use small kpm page as default, as 509 * the KPM large page VAC conflict code has no value to maintain. The 510 * new generation of SPARC no longer have VAC conflict issue. 511 */ 512 kpm_smallpages = 1; 513 514 #if defined(SF_ERRATA_57) 515 errata57_limit = (caddr_t)0x80000000ul; 516 #endif 517 518 /* 519 * Disable text by default. 520 * Note that the other defaults are set in sun4u/vm/mach_vm_dep.c. 521 */ 522 max_utext_lpsize = MMU_PAGESIZE; 523 } 524 525 static int 526 getintprop(pnode_t node, char *name, int deflt) 527 { 528 int value; 529 530 switch (prom_getproplen(node, name)) { 531 case 0: 532 value = 1; /* boolean properties */ 533 break; 534 535 case sizeof (int): 536 (void) prom_getprop(node, name, (caddr_t)&value); 537 break; 538 539 default: 540 value = deflt; 541 break; 542 } 543 544 return (value); 545 } 546 547 /* 548 * Set the magic constants of the implementation. 549 */ 550 void 551 cpu_fiximp(pnode_t dnode) 552 { 553 extern int vac_size, vac_shift; 554 extern uint_t vac_mask; 555 extern int dcache_line_mask; 556 int i, a; 557 static struct { 558 char *name; 559 int *var; 560 } prop[] = { 561 "dcache-size", &dcache_size, 562 "dcache-line-size", &dcache_linesize, 563 "icache-size", &icache_size, 564 "icache-line-size", &icache_linesize, 565 "ecache-size", &ecache_size, 566 "ecache-line-size", &ecache_alignsize, 567 "ecache-associativity", &ecache_associativity, 568 "#itlb-entries", &itlb_entries, 569 "#dtlb-entries", &dtlb_entries, 570 }; 571 572 for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) { 573 if ((a = getintprop(dnode, prop[i].name, -1)) != -1) { 574 *prop[i].var = a; 575 } 576 } 577 578 ecache_setsize = ecache_size / ecache_associativity; 579 580 vac_size = S_VAC_SIZE; 581 vac_mask = MMU_PAGEMASK & (vac_size - 1); 582 i = 0; a = vac_size; 583 while (a >>= 1) 584 ++i; 585 vac_shift = i; 586 shm_alignment = vac_size; 587 vac = 1; 588 589 dcache_line_mask = (dcache_size - 1) & ~(dcache_linesize - 1); 590 591 /* 592 * UltraSPARC I & II have ecache sizes running 593 * as follows: .25 MB, .5 MB, 1 MB, 2 MB, 4 MB 594 * and 8 MB. Adjust the copyin/copyout limits 595 * according to the cache size. The magic number 596 * of VIS_COPY_THRESHOLD comes from the copyin/copyout code 597 * and its floor of VIS_COPY_THRESHOLD bytes before it will use 598 * VIS instructions. 599 * 600 * We assume that all CPUs on the system have the same size 601 * ecache. We're also called very early in the game. 602 * /etc/system will be parsed *after* we're called so 603 * these values can be overwritten. 604 */ 605 606 hw_copy_limit_1 = VIS_COPY_THRESHOLD; 607 if (ecache_size <= 524288) { 608 hw_copy_limit_2 = VIS_COPY_THRESHOLD; 609 hw_copy_limit_4 = VIS_COPY_THRESHOLD; 610 hw_copy_limit_8 = VIS_COPY_THRESHOLD; 611 } else if (ecache_size == 1048576) { 612 hw_copy_limit_2 = 1024; 613 hw_copy_limit_4 = 1280; 614 hw_copy_limit_8 = 1536; 615 } else if (ecache_size == 2097152) { 616 hw_copy_limit_2 = 1536; 617 hw_copy_limit_4 = 2048; 618 hw_copy_limit_8 = 2560; 619 } else if (ecache_size == 4194304) { 620 hw_copy_limit_2 = 2048; 621 hw_copy_limit_4 = 2560; 622 hw_copy_limit_8 = 3072; 623 } else { 624 hw_copy_limit_2 = 2560; 625 hw_copy_limit_4 = 3072; 626 hw_copy_limit_8 = 3584; 627 } 628 } 629 630 /* 631 * Called by setcpudelay 632 */ 633 void 634 cpu_init_tick_freq(void) 635 { 636 /* 637 * Determine the cpu frequency by calling 638 * tod_get_cpufrequency. Use an approximate freqency 639 * value computed by the prom if the tod module 640 * is not initialized and loaded yet. 641 */ 642 if (tod_ops.tod_get_cpufrequency != NULL) { 643 mutex_enter(&tod_lock); 644 sys_tick_freq = tod_ops.tod_get_cpufrequency(); 645 mutex_exit(&tod_lock); 646 } else { 647 #if defined(HUMMINGBIRD) 648 /* 649 * the hummingbird version of %stick is used as the basis for 650 * low level timing; this provides an independent constant-rate 651 * clock for general system use, and frees power mgmt to set 652 * various cpu clock speeds. 653 */ 654 if (system_clock_freq == 0) 655 cmn_err(CE_PANIC, "invalid system_clock_freq 0x%lx", 656 system_clock_freq); 657 sys_tick_freq = system_clock_freq; 658 #else /* SPITFIRE */ 659 sys_tick_freq = cpunodes[CPU->cpu_id].clock_freq; 660 #endif 661 } 662 } 663 664 665 void shipit(int upaid); 666 extern uint64_t xc_tick_limit; 667 extern uint64_t xc_tick_jump_limit; 668 669 #ifdef SEND_MONDO_STATS 670 uint64_t x_early[NCPU][64]; 671 #endif 672 673 /* 674 * Note: A version of this function is used by the debugger via the KDI, 675 * and must be kept in sync with this version. Any changes made to this 676 * function to support new chips or to accomodate errata must also be included 677 * in the KDI-specific version. See spitfire_kdi.c. 678 */ 679 void 680 send_one_mondo(int cpuid) 681 { 682 uint64_t idsr, starttick, endtick; 683 int upaid, busy, nack; 684 uint64_t tick, tick_prev; 685 ulong_t ticks; 686 687 CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 688 upaid = CPUID_TO_UPAID(cpuid); 689 tick = starttick = gettick(); 690 shipit(upaid); 691 endtick = starttick + xc_tick_limit; 692 busy = nack = 0; 693 for (;;) { 694 idsr = getidsr(); 695 if (idsr == 0) 696 break; 697 /* 698 * When we detect an irregular tick jump, we adjust 699 * the timer window to the current tick value. 700 */ 701 tick_prev = tick; 702 tick = gettick(); 703 ticks = tick - tick_prev; 704 if (ticks > xc_tick_jump_limit) { 705 endtick = tick + xc_tick_limit; 706 } else if (tick > endtick) { 707 if (panic_quiesce) 708 return; 709 cmn_err(CE_PANIC, 710 "send mondo timeout (target 0x%x) [%d NACK %d " 711 "BUSY]", upaid, nack, busy); 712 } 713 if (idsr & IDSR_BUSY) { 714 busy++; 715 continue; 716 } 717 drv_usecwait(1); 718 shipit(upaid); 719 nack++; 720 busy = 0; 721 } 722 #ifdef SEND_MONDO_STATS 723 x_early[getprocessorid()][highbit(gettick() - starttick) - 1]++; 724 #endif 725 } 726 727 void 728 send_mondo_set(cpuset_t set) 729 { 730 int i; 731 732 for (i = 0; i < NCPU; i++) 733 if (CPU_IN_SET(set, i)) { 734 send_one_mondo(i); 735 CPUSET_DEL(set, i); 736 if (CPUSET_ISNULL(set)) 737 break; 738 } 739 } 740 741 void 742 syncfpu(void) 743 { 744 } 745 746 /* 747 * Determine the size of the CPU module's error structure in bytes. This is 748 * called once during boot to initialize the error queues. 749 */ 750 int 751 cpu_aflt_size(void) 752 { 753 /* 754 * We need to determine whether this is a sabre, Hummingbird or a 755 * Spitfire/Blackbird impl and set the appropriate state variables for 756 * ecache tag manipulation. We can't do this in cpu_setup() as it is 757 * too early in the boot flow and the cpunodes are not initialized. 758 * This routine will be called once after cpunodes[] is ready, so do 759 * it here. 760 */ 761 if (cpunodes[CPU->cpu_id].implementation == SABRE_IMPL) { 762 isus2i = 1; 763 cpu_ec_tag_mask = SB_ECTAG_MASK; 764 cpu_ec_state_mask = SB_ECSTATE_MASK; 765 cpu_ec_par_mask = SB_ECPAR_MASK; 766 cpu_ec_par_shift = SB_ECPAR_SHIFT; 767 cpu_ec_tag_shift = SB_ECTAG_SHIFT; 768 cpu_ec_state_shift = SB_ECSTATE_SHIFT; 769 cpu_ec_state_exl = SB_ECSTATE_EXL; 770 cpu_ec_state_mod = SB_ECSTATE_MOD; 771 772 /* These states do not exist in sabre - set to 0xFF */ 773 cpu_ec_state_shr = 0xFF; 774 cpu_ec_state_own = 0xFF; 775 776 cpu_ec_state_valid = SB_ECSTATE_VALID; 777 cpu_ec_state_dirty = SB_ECSTATE_DIRTY; 778 cpu_ec_state_parity = SB_ECSTATE_PARITY; 779 cpu_ec_parity = SB_EC_PARITY; 780 } else if (cpunodes[CPU->cpu_id].implementation == HUMMBRD_IMPL) { 781 isus2e = 1; 782 cpu_ec_tag_mask = HB_ECTAG_MASK; 783 cpu_ec_state_mask = HB_ECSTATE_MASK; 784 cpu_ec_par_mask = HB_ECPAR_MASK; 785 cpu_ec_par_shift = HB_ECPAR_SHIFT; 786 cpu_ec_tag_shift = HB_ECTAG_SHIFT; 787 cpu_ec_state_shift = HB_ECSTATE_SHIFT; 788 cpu_ec_state_exl = HB_ECSTATE_EXL; 789 cpu_ec_state_mod = HB_ECSTATE_MOD; 790 791 /* These states do not exist in hummingbird - set to 0xFF */ 792 cpu_ec_state_shr = 0xFF; 793 cpu_ec_state_own = 0xFF; 794 795 cpu_ec_state_valid = HB_ECSTATE_VALID; 796 cpu_ec_state_dirty = HB_ECSTATE_DIRTY; 797 cpu_ec_state_parity = HB_ECSTATE_PARITY; 798 cpu_ec_parity = HB_EC_PARITY; 799 } 800 801 return (sizeof (spitf_async_flt)); 802 } 803 804 805 /* 806 * Correctable ecc error trap handler 807 */ 808 /*ARGSUSED*/ 809 void 810 cpu_ce_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr, 811 uint_t p_afsr_high, uint_t p_afar_high) 812 { 813 ushort_t sdbh, sdbl; 814 ushort_t e_syndh, e_syndl; 815 spitf_async_flt spf_flt; 816 struct async_flt *ecc; 817 int queue = 1; 818 819 uint64_t t_afar = p_afar; 820 uint64_t t_afsr = p_afsr; 821 822 /* 823 * Note: the Spitfire data buffer error registers 824 * (upper and lower halves) are or'ed into the upper 825 * word of the afsr by ce_err(). 826 */ 827 sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF); 828 sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF); 829 830 e_syndh = (uchar_t)(sdbh & (uint_t)P_DER_E_SYND); 831 e_syndl = (uchar_t)(sdbl & (uint_t)P_DER_E_SYND); 832 833 t_afsr &= S_AFSR_MASK; 834 t_afar &= SABRE_AFAR_PA; /* must use Sabre AFAR mask */ 835 836 /* Setup the async fault structure */ 837 bzero(&spf_flt, sizeof (spitf_async_flt)); 838 ecc = (struct async_flt *)&spf_flt; 839 ecc->flt_id = gethrtime_waitfree(); 840 ecc->flt_stat = t_afsr; 841 ecc->flt_addr = t_afar; 842 ecc->flt_status = ECC_C_TRAP; 843 ecc->flt_bus_id = getprocessorid(); 844 ecc->flt_inst = CPU->cpu_id; 845 ecc->flt_pc = (caddr_t)rp->r_pc; 846 ecc->flt_func = log_ce_err; 847 ecc->flt_in_memory = 848 (pf_is_memory(ecc->flt_addr >> MMU_PAGESHIFT)) ? 1: 0; 849 spf_flt.flt_sdbh = sdbh; 850 spf_flt.flt_sdbl = sdbl; 851 852 /* 853 * Check for fatal conditions. 854 */ 855 check_misc_err(&spf_flt); 856 857 /* 858 * Pananoid checks for valid AFSR and UDBs 859 */ 860 if ((t_afsr & P_AFSR_CE) == 0) { 861 cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS, 862 "** Panic due to CE bit not set in the AFSR", 863 " Corrected Memory Error on"); 864 } 865 866 /* 867 * We want to skip logging only if ALL the following 868 * conditions are true: 869 * 870 * 1. There is only one error 871 * 2. That error is a correctable memory error 872 * 3. The error is caused by the memory scrubber (in which case 873 * the error will have occurred under on_trap protection) 874 * 4. The error is on a retired page 875 * 876 * Note: OT_DATA_EC is used places other than the memory scrubber. 877 * However, none of those errors should occur on a retired page. 878 */ 879 if ((ecc->flt_stat & (S_AFSR_ALL_ERRS & ~P_AFSR_ME)) == P_AFSR_CE && 880 curthread->t_ontrap != NULL) { 881 882 if (curthread->t_ontrap->ot_prot & OT_DATA_EC) { 883 if (page_retire_check(ecc->flt_addr, NULL) == 0) { 884 queue = 0; 885 } 886 } 887 } 888 889 if (((sdbh & P_DER_CE) == 0) && ((sdbl & P_DER_CE) == 0)) { 890 cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS, 891 "** Panic due to CE bits not set in the UDBs", 892 " Corrected Memory Error on"); 893 } 894 895 if ((sdbh >> 8) & 1) { 896 ecc->flt_synd = e_syndh; 897 ce_scrub(ecc); 898 if (queue) { 899 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc, 900 sizeof (*ecc), ce_queue, ERRORQ_ASYNC); 901 } 902 } 903 904 if ((sdbl >> 8) & 1) { 905 ecc->flt_addr = t_afar | 0x8; /* Sabres do not have a UDBL */ 906 ecc->flt_synd = e_syndl | UDBL_REG; 907 ce_scrub(ecc); 908 if (queue) { 909 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc, 910 sizeof (*ecc), ce_queue, ERRORQ_ASYNC); 911 } 912 } 913 914 /* 915 * Re-enable all error trapping (CEEN currently cleared). 916 */ 917 clr_datapath(); 918 set_asyncflt(P_AFSR_CE); 919 set_error_enable(EER_ENABLE); 920 } 921 922 /* 923 * Cpu specific CE logging routine 924 */ 925 static void 926 log_ce_err(struct async_flt *aflt, char *unum) 927 { 928 spitf_async_flt spf_flt; 929 930 if ((aflt->flt_stat & P_AFSR_CE) && (ce_verbose_memory == 0)) { 931 return; 932 } 933 934 spf_flt.cmn_asyncflt = *aflt; 935 cpu_aflt_log(CE_CONT, 0, &spf_flt, CE_LFLAGS, unum, 936 " Corrected Memory Error detected by"); 937 } 938 939 /* 940 * Spitfire does not perform any further CE classification refinement 941 */ 942 /*ARGSUSED*/ 943 int 944 ce_scrub_xdiag_recirc(struct async_flt *ecc, errorq_t *eqp, errorq_elem_t *eqep, 945 size_t afltoffset) 946 { 947 return (0); 948 } 949 950 char * 951 flt_to_error_type(struct async_flt *aflt) 952 { 953 if (aflt->flt_status & ECC_INTERMITTENT) 954 return (ERR_TYPE_DESC_INTERMITTENT); 955 if (aflt->flt_status & ECC_PERSISTENT) 956 return (ERR_TYPE_DESC_PERSISTENT); 957 if (aflt->flt_status & ECC_STICKY) 958 return (ERR_TYPE_DESC_STICKY); 959 return (ERR_TYPE_DESC_UNKNOWN); 960 } 961 962 /* 963 * Called by correctable ecc error logging code to print out 964 * the stick/persistent/intermittent status of the error. 965 */ 966 static void 967 cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum) 968 { 969 ushort_t status; 970 char *status1_str = "Memory"; 971 char *status2_str = "Intermittent"; 972 struct async_flt *aflt = (struct async_flt *)spf_flt; 973 974 status = aflt->flt_status; 975 976 if (status & ECC_ECACHE) 977 status1_str = "Ecache"; 978 979 if (status & ECC_STICKY) 980 status2_str = "Sticky"; 981 else if (status & ECC_PERSISTENT) 982 status2_str = "Persistent"; 983 984 cpu_aflt_log(CE_CONT, 0, spf_flt, CPU_ERRID_FIRST, 985 NULL, " Corrected %s Error on %s is %s", 986 status1_str, unum, status2_str); 987 } 988 989 /* 990 * check for a valid ce syndrome, then call the 991 * displacement flush scrubbing code, and then check the afsr to see if 992 * the error was persistent or intermittent. Reread the afar/afsr to see 993 * if the error was not scrubbed successfully, and is therefore sticky. 994 */ 995 /*ARGSUSED1*/ 996 void 997 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t triedcpulogout) 998 { 999 uint64_t eer, afsr; 1000 ushort_t status; 1001 1002 ASSERT(getpil() > LOCK_LEVEL); 1003 1004 /* 1005 * It is possible that the flt_addr is not a valid 1006 * physical address. To deal with this, we disable 1007 * NCEEN while we scrub that address. If this causes 1008 * a TIMEOUT/BERR, we know this is an invalid 1009 * memory location. 1010 */ 1011 kpreempt_disable(); 1012 eer = get_error_enable(); 1013 if (eer & (EER_CEEN | EER_NCEEN)) 1014 set_error_enable(eer & ~(EER_CEEN | EER_NCEEN)); 1015 1016 /* 1017 * To check if the error detected by IO is persistent, sticky or 1018 * intermittent. 1019 */ 1020 if (ecc->flt_status & ECC_IOBUS) { 1021 ecc->flt_stat = P_AFSR_CE; 1022 } 1023 1024 scrubphys(P2ALIGN(ecc->flt_addr, 64), 1025 cpunodes[CPU->cpu_id].ecache_size); 1026 1027 get_asyncflt(&afsr); 1028 if (afsr & (P_AFSR_TO | P_AFSR_BERR)) { 1029 /* 1030 * Must ensure that we don't get the TIMEOUT/BERR 1031 * when we reenable NCEEN, so we clear the AFSR. 1032 */ 1033 set_asyncflt(afsr & (P_AFSR_TO | P_AFSR_BERR)); 1034 if (eer & (EER_CEEN | EER_NCEEN)) 1035 set_error_enable(eer); 1036 kpreempt_enable(); 1037 return; 1038 } 1039 1040 if (eer & EER_NCEEN) 1041 set_error_enable(eer & ~EER_CEEN); 1042 1043 /* 1044 * Check and clear any ECC errors from the scrub. If the scrub did 1045 * not trip over the error, mark it intermittent. If the scrub did 1046 * trip the error again and it did not scrub away, mark it sticky. 1047 * Otherwise mark it persistent. 1048 */ 1049 if (check_ecc(ecc) != 0) { 1050 cpu_read_paddr(ecc, 0, 1); 1051 1052 if (check_ecc(ecc) != 0) 1053 status = ECC_STICKY; 1054 else 1055 status = ECC_PERSISTENT; 1056 } else 1057 status = ECC_INTERMITTENT; 1058 1059 if (eer & (EER_CEEN | EER_NCEEN)) 1060 set_error_enable(eer); 1061 kpreempt_enable(); 1062 1063 ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY); 1064 ecc->flt_status |= status; 1065 } 1066 1067 /* 1068 * get the syndrome and unum, and then call the routines 1069 * to check the other cpus and iobuses, and then do the error logging. 1070 */ 1071 /*ARGSUSED1*/ 1072 void 1073 cpu_ce_log_err(struct async_flt *ecc, errorq_elem_t *eqep) 1074 { 1075 char unum[UNUM_NAMLEN]; 1076 int len = 0; 1077 int ce_verbose = 0; 1078 int err; 1079 1080 ASSERT(ecc->flt_func != NULL); 1081 1082 /* Get the unum string for logging purposes */ 1083 (void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, ecc, unum, 1084 UNUM_NAMLEN, &len); 1085 1086 /* Call specific error logging routine */ 1087 (void) (*ecc->flt_func)(ecc, unum); 1088 1089 /* 1090 * Count errors per unum. 1091 * Non-memory errors are all counted via a special unum string. 1092 */ 1093 if ((err = ce_count_unum(ecc->flt_status, len, unum)) != PR_OK && 1094 automatic_page_removal) { 1095 (void) page_retire(ecc->flt_addr, err); 1096 } 1097 1098 if (ecc->flt_panic) { 1099 ce_verbose = 1; 1100 } else if ((ecc->flt_class == BUS_FAULT) || 1101 (ecc->flt_stat & P_AFSR_CE)) { 1102 ce_verbose = (ce_verbose_memory > 0); 1103 } else { 1104 ce_verbose = 1; 1105 } 1106 1107 if (ce_verbose) { 1108 spitf_async_flt sflt; 1109 int synd_code; 1110 1111 sflt.cmn_asyncflt = *ecc; /* for cpu_aflt_log() */ 1112 1113 cpu_ce_log_status(&sflt, unum); 1114 1115 synd_code = synd_to_synd_code(AFLT_STAT_VALID, 1116 SYND(ecc->flt_synd)); 1117 1118 if (SYND_IS_SINGLE_BIT_DATA(synd_code)) { 1119 cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST, 1120 NULL, " ECC Data Bit %2d was in error " 1121 "and corrected", synd_code); 1122 } else if (SYND_IS_SINGLE_BIT_CHK(synd_code)) { 1123 cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST, 1124 NULL, " ECC Check Bit %2d was in error " 1125 "and corrected", synd_code - C0); 1126 } else { 1127 /* 1128 * These are UE errors - we shouldn't be getting CE 1129 * traps for these; handle them in case of bad h/w. 1130 */ 1131 switch (synd_code) { 1132 case M2: 1133 cpu_aflt_log(CE_CONT, 0, &sflt, 1134 CPU_ERRID_FIRST, NULL, 1135 " Two ECC Bits were in error"); 1136 break; 1137 case M3: 1138 cpu_aflt_log(CE_CONT, 0, &sflt, 1139 CPU_ERRID_FIRST, NULL, 1140 " Three ECC Bits were in error"); 1141 break; 1142 case M4: 1143 cpu_aflt_log(CE_CONT, 0, &sflt, 1144 CPU_ERRID_FIRST, NULL, 1145 " Four ECC Bits were in error"); 1146 break; 1147 case MX: 1148 cpu_aflt_log(CE_CONT, 0, &sflt, 1149 CPU_ERRID_FIRST, NULL, 1150 " More than Four ECC bits were " 1151 "in error"); 1152 break; 1153 default: 1154 cpu_aflt_log(CE_CONT, 0, &sflt, 1155 CPU_ERRID_FIRST, NULL, 1156 " Unknown fault syndrome %d", 1157 synd_code); 1158 break; 1159 } 1160 } 1161 } 1162 1163 /* Display entire cache line, if valid address */ 1164 if (ce_show_data && ecc->flt_addr != AFLT_INV_ADDR) 1165 read_ecc_data(ecc, 1, 1); 1166 } 1167 1168 /* 1169 * We route all errors through a single switch statement. 1170 */ 1171 void 1172 cpu_ue_log_err(struct async_flt *aflt) 1173 { 1174 1175 switch (aflt->flt_class) { 1176 case CPU_FAULT: 1177 cpu_async_log_err(aflt); 1178 break; 1179 1180 case BUS_FAULT: 1181 bus_async_log_err(aflt); 1182 break; 1183 1184 default: 1185 cmn_err(CE_WARN, "discarding async error 0x%p with invalid " 1186 "fault class (0x%x)", (void *)aflt, aflt->flt_class); 1187 break; 1188 } 1189 } 1190 1191 /* Values for action variable in cpu_async_error() */ 1192 #define ACTION_NONE 0 1193 #define ACTION_TRAMPOLINE 1 1194 #define ACTION_AST_FLAGS 2 1195 1196 /* 1197 * Access error trap handler for asynchronous cpu errors. This routine is 1198 * called to handle a data or instruction access error. All fatal errors are 1199 * completely handled by this routine (by panicking). Non fatal error logging 1200 * is queued for later processing either via AST or softint at a lower PIL. 1201 * In case of panic, the error log queue will also be processed as part of the 1202 * panic flow to ensure all errors are logged. This routine is called with all 1203 * errors disabled at PIL15. The AFSR bits are cleared and the UDBL and UDBH 1204 * error bits are also cleared. The hardware has also disabled the I and 1205 * D-caches for us, so we must re-enable them before returning. 1206 * 1207 * A summary of the handling of tl=0 UE/LDP/EDP/TO/BERR/WP/CP: 1208 * 1209 * _______________________________________________________________ 1210 * | Privileged tl0 | Unprivileged | 1211 * | Protected | Unprotected | Protected | Unprotected | 1212 * |on_trap|lofault| | | | 1213 * -------------|-------|-------+---------------+---------------+-------------| 1214 * | | | | | | 1215 * UE/LDP/EDP | L,T,p | L,R,p | L,P | n/a | L,R,p | 1216 * | | | | | | 1217 * TO/BERR | T | S | L,P | n/a | S | 1218 * | | | | | | 1219 * WP | L,M,p | L,M,p | L,M,p | n/a | L,M,p | 1220 * | | | | | | 1221 * CP (IIi/IIe) | L,P | L,P | L,P | n/a | L,P | 1222 * ____________________________________________________________________________ 1223 * 1224 * 1225 * Action codes: 1226 * 1227 * L - log 1228 * M - kick off memscrubber if flt_in_memory 1229 * P - panic 1230 * p - panic if US-IIi or US-IIe (Sabre); overrides R and M 1231 * R - i) if aft_panic is set, panic 1232 * ii) otherwise, send hwerr event to contract and SIGKILL to process 1233 * S - send SIGBUS to process 1234 * T - trampoline 1235 * 1236 * Special cases: 1237 * 1238 * 1) if aft_testfatal is set, all faults result in a panic regardless 1239 * of type (even WP), protection (even on_trap), or privilege. 1240 */ 1241 /*ARGSUSED*/ 1242 void 1243 cpu_async_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr, 1244 uint_t p_afsr_high, uint_t p_afar_high) 1245 { 1246 ushort_t sdbh, sdbl, ttype, tl; 1247 spitf_async_flt spf_flt; 1248 struct async_flt *aflt; 1249 char pr_reason[28]; 1250 uint64_t oafsr; 1251 uint64_t acc_afsr = 0; /* accumulated afsr */ 1252 int action = ACTION_NONE; 1253 uint64_t t_afar = p_afar; 1254 uint64_t t_afsr = p_afsr; 1255 int expected = DDI_FM_ERR_UNEXPECTED; 1256 ddi_acc_hdl_t *hp; 1257 1258 /* 1259 * We need to look at p_flag to determine if the thread detected an 1260 * error while dumping core. We can't grab p_lock here, but it's ok 1261 * because we just need a consistent snapshot and we know that everyone 1262 * else will store a consistent set of bits while holding p_lock. We 1263 * don't have to worry about a race because SDOCORE is set once prior 1264 * to doing i/o from the process's address space and is never cleared. 1265 */ 1266 uint_t pflag = ttoproc(curthread)->p_flag; 1267 1268 pr_reason[0] = '\0'; 1269 1270 /* 1271 * Note: the Spitfire data buffer error registers 1272 * (upper and lower halves) are or'ed into the upper 1273 * word of the afsr by async_err() if P_AFSR_UE is set. 1274 */ 1275 sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF); 1276 sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF); 1277 1278 /* 1279 * Grab the ttype encoded in <63:53> of the saved 1280 * afsr passed from async_err() 1281 */ 1282 ttype = (ushort_t)((t_afsr >> 53) & 0x1FF); 1283 tl = (ushort_t)(t_afsr >> 62); 1284 1285 t_afsr &= S_AFSR_MASK; 1286 t_afar &= SABRE_AFAR_PA; /* must use Sabre AFAR mask */ 1287 1288 /* 1289 * Initialize most of the common and CPU-specific structure. We derive 1290 * aflt->flt_priv from %tstate, instead of from the AFSR.PRIV bit. The 1291 * initial setting of aflt->flt_panic is based on TL: we must panic if 1292 * the error occurred at TL > 0. We also set flt_panic if the test/demo 1293 * tuneable aft_testfatal is set (not the default). 1294 */ 1295 bzero(&spf_flt, sizeof (spitf_async_flt)); 1296 aflt = (struct async_flt *)&spf_flt; 1297 aflt->flt_id = gethrtime_waitfree(); 1298 aflt->flt_stat = t_afsr; 1299 aflt->flt_addr = t_afar; 1300 aflt->flt_bus_id = getprocessorid(); 1301 aflt->flt_inst = CPU->cpu_id; 1302 aflt->flt_pc = (caddr_t)rp->r_pc; 1303 aflt->flt_prot = AFLT_PROT_NONE; 1304 aflt->flt_class = CPU_FAULT; 1305 aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0; 1306 aflt->flt_tl = (uchar_t)tl; 1307 aflt->flt_panic = (tl != 0 || aft_testfatal != 0); 1308 aflt->flt_core = (pflag & SDOCORE) ? 1 : 0; 1309 1310 /* 1311 * Set flt_status based on the trap type. If we end up here as the 1312 * result of a UE detected by the CE handling code, leave status 0. 1313 */ 1314 switch (ttype) { 1315 case T_DATA_ERROR: 1316 aflt->flt_status = ECC_D_TRAP; 1317 break; 1318 case T_INSTR_ERROR: 1319 aflt->flt_status = ECC_I_TRAP; 1320 break; 1321 } 1322 1323 spf_flt.flt_sdbh = sdbh; 1324 spf_flt.flt_sdbl = sdbl; 1325 1326 /* 1327 * Check for fatal async errors. 1328 */ 1329 check_misc_err(&spf_flt); 1330 1331 /* 1332 * If the trap occurred in privileged mode at TL=0, we need to check to 1333 * see if we were executing in the kernel under on_trap() or t_lofault 1334 * protection. If so, modify the saved registers so that we return 1335 * from the trap to the appropriate trampoline routine. 1336 */ 1337 if (aflt->flt_priv && tl == 0) { 1338 if (curthread->t_ontrap != NULL) { 1339 on_trap_data_t *otp = curthread->t_ontrap; 1340 1341 if (otp->ot_prot & OT_DATA_EC) { 1342 aflt->flt_prot = AFLT_PROT_EC; 1343 otp->ot_trap |= OT_DATA_EC; 1344 rp->r_pc = otp->ot_trampoline; 1345 rp->r_npc = rp->r_pc + 4; 1346 action = ACTION_TRAMPOLINE; 1347 } 1348 1349 if ((t_afsr & (P_AFSR_TO | P_AFSR_BERR)) && 1350 (otp->ot_prot & OT_DATA_ACCESS)) { 1351 aflt->flt_prot = AFLT_PROT_ACCESS; 1352 otp->ot_trap |= OT_DATA_ACCESS; 1353 rp->r_pc = otp->ot_trampoline; 1354 rp->r_npc = rp->r_pc + 4; 1355 action = ACTION_TRAMPOLINE; 1356 /* 1357 * for peeks and caut_gets errors are expected 1358 */ 1359 hp = (ddi_acc_hdl_t *)otp->ot_handle; 1360 if (!hp) 1361 expected = DDI_FM_ERR_PEEK; 1362 else if (hp->ah_acc.devacc_attr_access == 1363 DDI_CAUTIOUS_ACC) 1364 expected = DDI_FM_ERR_EXPECTED; 1365 } 1366 1367 } else if (curthread->t_lofault) { 1368 aflt->flt_prot = AFLT_PROT_COPY; 1369 rp->r_g1 = EFAULT; 1370 rp->r_pc = curthread->t_lofault; 1371 rp->r_npc = rp->r_pc + 4; 1372 action = ACTION_TRAMPOLINE; 1373 } 1374 } 1375 1376 /* 1377 * Determine if this error needs to be treated as fatal. Note that 1378 * multiple errors detected upon entry to this trap handler does not 1379 * necessarily warrant a panic. We only want to panic if the trap 1380 * happened in privileged mode and not under t_ontrap or t_lofault 1381 * protection. The exception is WP: if we *only* get WP, it is not 1382 * fatal even if the trap occurred in privileged mode, except on Sabre. 1383 * 1384 * aft_panic, if set, effectively makes us treat usermode 1385 * UE/EDP/LDP faults as if they were privileged - so we we will 1386 * panic instead of sending a contract event. A lofault-protected 1387 * fault will normally follow the contract event; if aft_panic is 1388 * set this will be changed to a panic. 1389 * 1390 * For usermode BERR/BTO errors, eg from processes performing device 1391 * control through mapped device memory, we need only deliver 1392 * a SIGBUS to the offending process. 1393 * 1394 * Some additional flt_panic reasons (eg, WP on Sabre) will be 1395 * checked later; for now we implement the common reasons. 1396 */ 1397 if (aflt->flt_prot == AFLT_PROT_NONE) { 1398 /* 1399 * Beware - multiple bits may be set in AFSR 1400 */ 1401 if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP)) { 1402 if (aflt->flt_priv || aft_panic) 1403 aflt->flt_panic = 1; 1404 } 1405 1406 if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) { 1407 if (aflt->flt_priv) 1408 aflt->flt_panic = 1; 1409 } 1410 } else if (aflt->flt_prot == AFLT_PROT_COPY && aft_panic) { 1411 aflt->flt_panic = 1; 1412 } 1413 1414 /* 1415 * UE/BERR/TO: Call our bus nexus friends to check for 1416 * IO errors that may have resulted in this trap. 1417 */ 1418 if (t_afsr & (P_AFSR_TO | P_AFSR_BERR | P_AFSR_UE)) { 1419 cpu_run_bus_error_handlers(aflt, expected); 1420 } 1421 1422 /* 1423 * Handle UE: If the UE is in memory, we need to flush the bad line from 1424 * the E-cache. We also need to query the bus nexus for fatal errors. 1425 * For sabre, we will panic on UEs. Attempts to do diagnostic read on 1426 * caches may introduce more parity errors (especially when the module 1427 * is bad) and in sabre there is no guarantee that such errors 1428 * (if introduced) are written back as poisoned data. 1429 */ 1430 if (t_afsr & P_AFSR_UE) { 1431 int i; 1432 1433 (void) strcat(pr_reason, "UE "); 1434 1435 spf_flt.flt_type = CPU_UE_ERR; 1436 aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >> 1437 MMU_PAGESHIFT)) ? 1: 0; 1438 1439 /* 1440 * With UE, we have the PA of the fault. 1441 * Let do a diagnostic read to get the ecache 1442 * data and tag info of the bad line for logging. 1443 */ 1444 if (aflt->flt_in_memory) { 1445 uint32_t ec_set_size; 1446 uchar_t state; 1447 uint32_t ecache_idx; 1448 uint64_t faultpa = P2ALIGN(aflt->flt_addr, 64); 1449 1450 /* touch the line to put it in ecache */ 1451 acc_afsr |= read_and_clear_afsr(); 1452 (void) lddphys(faultpa); 1453 acc_afsr |= (read_and_clear_afsr() & 1454 ~(P_AFSR_EDP | P_AFSR_UE)); 1455 1456 ec_set_size = cpunodes[CPU->cpu_id].ecache_size / 1457 ecache_associativity; 1458 1459 for (i = 0; i < ecache_associativity; i++) { 1460 ecache_idx = i * ec_set_size + 1461 (aflt->flt_addr % ec_set_size); 1462 get_ecache_dtag(P2ALIGN(ecache_idx, 64), 1463 (uint64_t *)&spf_flt.flt_ec_data[0], 1464 &spf_flt.flt_ec_tag, &oafsr, &acc_afsr); 1465 acc_afsr |= oafsr; 1466 1467 state = (uchar_t)((spf_flt.flt_ec_tag & 1468 cpu_ec_state_mask) >> cpu_ec_state_shift); 1469 1470 if ((state & cpu_ec_state_valid) && 1471 ((spf_flt.flt_ec_tag & cpu_ec_tag_mask) == 1472 ((uint64_t)aflt->flt_addr >> 1473 cpu_ec_tag_shift))) 1474 break; 1475 } 1476 1477 /* 1478 * Check to see if the ecache tag is valid for the 1479 * fault PA. In the very unlikely event where the 1480 * line could be victimized, no ecache info will be 1481 * available. If this is the case, capture the line 1482 * from memory instead. 1483 */ 1484 if ((state & cpu_ec_state_valid) == 0 || 1485 (spf_flt.flt_ec_tag & cpu_ec_tag_mask) != 1486 ((uint64_t)aflt->flt_addr >> cpu_ec_tag_shift)) { 1487 for (i = 0; i < 8; i++, faultpa += 8) { 1488 ec_data_t *ecdptr; 1489 1490 ecdptr = &spf_flt.flt_ec_data[i]; 1491 acc_afsr |= read_and_clear_afsr(); 1492 ecdptr->ec_d8 = lddphys(faultpa); 1493 acc_afsr |= (read_and_clear_afsr() & 1494 ~(P_AFSR_EDP | P_AFSR_UE)); 1495 ecdptr->ec_afsr = 0; 1496 /* null afsr value */ 1497 } 1498 1499 /* 1500 * Mark tag invalid to indicate mem dump 1501 * when we print out the info. 1502 */ 1503 spf_flt.flt_ec_tag = AFLT_INV_ADDR; 1504 } 1505 spf_flt.flt_ec_lcnt = 1; 1506 1507 /* 1508 * Flush out the bad line 1509 */ 1510 flushecacheline(P2ALIGN(aflt->flt_addr, 64), 1511 cpunodes[CPU->cpu_id].ecache_size); 1512 1513 acc_afsr |= clear_errors(NULL, NULL); 1514 } 1515 1516 /* 1517 * Ask our bus nexus friends if they have any fatal errors. If 1518 * so, they will log appropriate error messages and panic as a 1519 * result. We then queue an event for each UDB that reports a 1520 * UE. Each UE reported in a UDB will have its own log message. 1521 * 1522 * Note from kbn: In the case where there are multiple UEs 1523 * (ME bit is set) - the AFAR address is only accurate to 1524 * the 16-byte granularity. One cannot tell whether the AFAR 1525 * belongs to the UDBH or UDBL syndromes. In this case, we 1526 * always report the AFAR address to be 16-byte aligned. 1527 * 1528 * If we're on a Sabre, there is no SDBL, but it will always 1529 * read as zero, so the sdbl test below will safely fail. 1530 */ 1531 if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL || isus2i || isus2e) 1532 aflt->flt_panic = 1; 1533 1534 if (sdbh & P_DER_UE) { 1535 aflt->flt_synd = sdbh & P_DER_E_SYND; 1536 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE, 1537 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1538 aflt->flt_panic); 1539 } 1540 if (sdbl & P_DER_UE) { 1541 aflt->flt_synd = sdbl & P_DER_E_SYND; 1542 aflt->flt_synd |= UDBL_REG; /* indicates UDBL */ 1543 if (!(aflt->flt_stat & P_AFSR_ME)) 1544 aflt->flt_addr |= 0x8; 1545 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE, 1546 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1547 aflt->flt_panic); 1548 } 1549 1550 /* 1551 * We got a UE and are panicking, save the fault PA in a known 1552 * location so that the platform specific panic code can check 1553 * for copyback errors. 1554 */ 1555 if (aflt->flt_panic && aflt->flt_in_memory) { 1556 panic_aflt = *aflt; 1557 } 1558 } 1559 1560 /* 1561 * Handle EDP and LDP: Locate the line with bad parity and enqueue an 1562 * async error for logging. For Sabre, we panic on EDP or LDP. 1563 */ 1564 if (t_afsr & (P_AFSR_EDP | P_AFSR_LDP)) { 1565 spf_flt.flt_type = CPU_EDP_LDP_ERR; 1566 1567 if (t_afsr & P_AFSR_EDP) 1568 (void) strcat(pr_reason, "EDP "); 1569 1570 if (t_afsr & P_AFSR_LDP) 1571 (void) strcat(pr_reason, "LDP "); 1572 1573 /* 1574 * Here we have no PA to work with. 1575 * Scan each line in the ecache to look for 1576 * the one with bad parity. 1577 */ 1578 aflt->flt_addr = AFLT_INV_ADDR; 1579 scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0], 1580 &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr); 1581 acc_afsr |= (oafsr & ~P_AFSR_WP); 1582 1583 /* 1584 * If we found a bad PA, update the state to indicate if it is 1585 * memory or I/O space. This code will be important if we ever 1586 * support cacheable frame buffers. 1587 */ 1588 if (aflt->flt_addr != AFLT_INV_ADDR) { 1589 aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >> 1590 MMU_PAGESHIFT)) ? 1 : 0; 1591 } 1592 1593 if (isus2i || isus2e) 1594 aflt->flt_panic = 1; 1595 1596 cpu_errorq_dispatch((t_afsr & P_AFSR_EDP) ? 1597 FM_EREPORT_CPU_USII_EDP : FM_EREPORT_CPU_USII_LDP, 1598 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1599 aflt->flt_panic); 1600 } 1601 1602 /* 1603 * Timeout and bus error handling. There are two cases to consider: 1604 * 1605 * (1) If we are in the kernel protected by ddi_peek or ddi_poke,we 1606 * have already modified the saved registers so that we will return 1607 * from the trap to the appropriate trampoline routine; otherwise panic. 1608 * 1609 * (2) In user mode, we can simply use our AST mechanism to deliver 1610 * a SIGBUS. We do not log the occurence - processes performing 1611 * device control would generate lots of uninteresting messages. 1612 */ 1613 if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) { 1614 if (t_afsr & P_AFSR_TO) 1615 (void) strcat(pr_reason, "BTO "); 1616 1617 if (t_afsr & P_AFSR_BERR) 1618 (void) strcat(pr_reason, "BERR "); 1619 1620 spf_flt.flt_type = CPU_BTO_BERR_ERR; 1621 if (aflt->flt_priv && aflt->flt_prot == AFLT_PROT_NONE) { 1622 cpu_errorq_dispatch((t_afsr & P_AFSR_TO) ? 1623 FM_EREPORT_CPU_USII_TO : FM_EREPORT_CPU_USII_BERR, 1624 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1625 aflt->flt_panic); 1626 } 1627 } 1628 1629 /* 1630 * Handle WP: WP happens when the ecache is victimized and a parity 1631 * error was detected on a writeback. The data in question will be 1632 * poisoned as a UE will be written back. The PA is not logged and 1633 * it is possible that it doesn't belong to the trapped thread. The 1634 * WP trap is not fatal, but it could be fatal to someone that 1635 * subsequently accesses the toxic page. We set read_all_memscrub 1636 * to force the memscrubber to read all of memory when it awakens. 1637 * For Sabre/Hummingbird, WP is fatal because the HW doesn't write a 1638 * UE back to poison the data. 1639 */ 1640 if (t_afsr & P_AFSR_WP) { 1641 (void) strcat(pr_reason, "WP "); 1642 if (isus2i || isus2e) { 1643 aflt->flt_panic = 1; 1644 } else { 1645 read_all_memscrub = 1; 1646 } 1647 spf_flt.flt_type = CPU_WP_ERR; 1648 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_WP, 1649 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1650 aflt->flt_panic); 1651 } 1652 1653 /* 1654 * Handle trapping CP error: In Sabre/Hummingbird, parity error in 1655 * the ecache on a copyout due to a PCI DMA read is signaled as a CP. 1656 * This is fatal. 1657 */ 1658 1659 if (t_afsr & P_AFSR_CP) { 1660 if (isus2i || isus2e) { 1661 (void) strcat(pr_reason, "CP "); 1662 aflt->flt_panic = 1; 1663 spf_flt.flt_type = CPU_TRAPPING_CP_ERR; 1664 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP, 1665 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1666 aflt->flt_panic); 1667 } else { 1668 /* 1669 * Orphan CP: Happens due to signal integrity problem 1670 * on a CPU, where a CP is reported, without reporting 1671 * its associated UE. This is handled by locating the 1672 * bad parity line and would kick off the memscrubber 1673 * to find the UE if in memory or in another's cache. 1674 */ 1675 spf_flt.flt_type = CPU_ORPHAN_CP_ERR; 1676 (void) strcat(pr_reason, "ORPHAN_CP "); 1677 1678 /* 1679 * Here we have no PA to work with. 1680 * Scan each line in the ecache to look for 1681 * the one with bad parity. 1682 */ 1683 aflt->flt_addr = AFLT_INV_ADDR; 1684 scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0], 1685 &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, 1686 &oafsr); 1687 acc_afsr |= oafsr; 1688 1689 /* 1690 * If we found a bad PA, update the state to indicate 1691 * if it is memory or I/O space. 1692 */ 1693 if (aflt->flt_addr != AFLT_INV_ADDR) { 1694 aflt->flt_in_memory = 1695 (pf_is_memory(aflt->flt_addr >> 1696 MMU_PAGESHIFT)) ? 1 : 0; 1697 } 1698 read_all_memscrub = 1; 1699 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP, 1700 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1701 aflt->flt_panic); 1702 1703 } 1704 } 1705 1706 /* 1707 * If we queued an error other than WP or CP and we are going to return 1708 * from the trap and the error was in user mode or inside of a 1709 * copy routine, set AST flag so the queue will be drained before 1710 * returning to user mode. 1711 * 1712 * For UE/LDP/EDP, the AST processing will SIGKILL the process 1713 * and send an event to its process contract. 1714 * 1715 * For BERR/BTO, the AST processing will SIGBUS the process. There 1716 * will have been no error queued in this case. 1717 */ 1718 if ((t_afsr & 1719 (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP | P_AFSR_BERR | P_AFSR_TO)) && 1720 (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY)) { 1721 int pcb_flag = 0; 1722 1723 if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP)) 1724 pcb_flag |= ASYNC_HWERR; 1725 1726 if (t_afsr & P_AFSR_BERR) 1727 pcb_flag |= ASYNC_BERR; 1728 1729 if (t_afsr & P_AFSR_TO) 1730 pcb_flag |= ASYNC_BTO; 1731 1732 ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag; 1733 aston(curthread); 1734 action = ACTION_AST_FLAGS; 1735 } 1736 1737 /* 1738 * In response to a deferred error, we must do one of three things: 1739 * (1) set the AST flags, (2) trampoline, or (3) panic. action is 1740 * set in cases (1) and (2) - check that either action is set or 1741 * (3) is true. 1742 * 1743 * On II, the WP writes poisoned data back to memory, which will 1744 * cause a UE and a panic or reboot when read. In this case, we 1745 * don't need to panic at this time. On IIi and IIe, 1746 * aflt->flt_panic is already set above. 1747 */ 1748 ASSERT((aflt->flt_panic != 0) || (action != ACTION_NONE) || 1749 (t_afsr & P_AFSR_WP)); 1750 1751 /* 1752 * Make a final sanity check to make sure we did not get any more async 1753 * errors and accumulate the afsr. 1754 */ 1755 flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2, 1756 cpunodes[CPU->cpu_id].ecache_linesize); 1757 (void) clear_errors(&spf_flt, NULL); 1758 1759 /* 1760 * Take care of a special case: If there is a UE in the ecache flush 1761 * area, we'll see it in flush_ecache(). This will trigger the 1762 * CPU_ADDITIONAL_ERRORS case below. 1763 * 1764 * This could occur if the original error was a UE in the flush area, 1765 * or if the original error was an E$ error that was flushed out of 1766 * the E$ in scan_ecache(). 1767 * 1768 * If it's at the same address that we're already logging, then it's 1769 * probably one of these cases. Clear the bit so we don't trip over 1770 * it on the additional errors case, which could cause an unnecessary 1771 * panic. 1772 */ 1773 if ((aflt->flt_stat & P_AFSR_UE) && aflt->flt_addr == t_afar) 1774 acc_afsr |= aflt->flt_stat & ~P_AFSR_UE; 1775 else 1776 acc_afsr |= aflt->flt_stat; 1777 1778 /* 1779 * Check the acumulated afsr for the important bits. 1780 * Make sure the spf_flt.flt_type value is set, and 1781 * enque an error. 1782 */ 1783 if (acc_afsr & 1784 (P_AFSR_LEVEL1 | P_AFSR_IVUE | P_AFSR_ETP | P_AFSR_ISAP)) { 1785 if (acc_afsr & (P_AFSR_UE | P_AFSR_EDP | P_AFSR_LDP | 1786 P_AFSR_BERR | P_AFSR_TO | P_AFSR_IVUE | P_AFSR_ETP | 1787 P_AFSR_ISAP)) 1788 aflt->flt_panic = 1; 1789 1790 spf_flt.flt_type = CPU_ADDITIONAL_ERR; 1791 aflt->flt_stat = acc_afsr; 1792 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UNKNOWN, 1793 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1794 aflt->flt_panic); 1795 } 1796 1797 /* 1798 * If aflt->flt_panic is set at this point, we need to panic as the 1799 * result of a trap at TL > 0, or an error we determined to be fatal. 1800 * We've already enqueued the error in one of the if-clauses above, 1801 * and it will be dequeued and logged as part of the panic flow. 1802 */ 1803 if (aflt->flt_panic) { 1804 cpu_aflt_log(CE_PANIC, 1, &spf_flt, CPU_ERRID_FIRST, 1805 "See previous message(s) for details", " %sError(s)", 1806 pr_reason); 1807 } 1808 1809 /* 1810 * Before returning, we must re-enable errors, and 1811 * reset the caches to their boot-up state. 1812 */ 1813 set_lsu(get_lsu() | cache_boot_state); 1814 set_error_enable(EER_ENABLE); 1815 } 1816 1817 /* 1818 * Check for miscellaneous fatal errors and call CE_PANIC if any are seen. 1819 * This routine is shared by the CE and UE handling code. 1820 */ 1821 static void 1822 check_misc_err(spitf_async_flt *spf_flt) 1823 { 1824 struct async_flt *aflt = (struct async_flt *)spf_flt; 1825 char *fatal_str = NULL; 1826 1827 /* 1828 * The ISAP and ETP errors are supposed to cause a POR 1829 * from the system, so in theory we never, ever see these messages. 1830 * ISAP, ETP and IVUE are considered to be fatal. 1831 */ 1832 if (aflt->flt_stat & P_AFSR_ISAP) 1833 fatal_str = " System Address Parity Error on"; 1834 else if (aflt->flt_stat & P_AFSR_ETP) 1835 fatal_str = " Ecache Tag Parity Error on"; 1836 else if (aflt->flt_stat & P_AFSR_IVUE) 1837 fatal_str = " Interrupt Vector Uncorrectable Error on"; 1838 if (fatal_str != NULL) { 1839 cpu_aflt_log(CE_PANIC, 1, spf_flt, CMN_LFLAGS, 1840 NULL, fatal_str); 1841 } 1842 } 1843 1844 /* 1845 * Routine to convert a syndrome into a syndrome code. 1846 */ 1847 static int 1848 synd_to_synd_code(int synd_status, ushort_t synd) 1849 { 1850 if (synd_status != AFLT_STAT_VALID) 1851 return (-1); 1852 1853 /* 1854 * Use the 8-bit syndrome to index the ecc_syndrome_tab 1855 * to get the code indicating which bit(s) is(are) bad. 1856 */ 1857 if ((synd == 0) || (synd >= SYND_TBL_SIZE)) 1858 return (-1); 1859 else 1860 return (ecc_syndrome_tab[synd]); 1861 } 1862 1863 /* ARGSUSED */ 1864 int 1865 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp) 1866 { 1867 return (ENOTSUP); 1868 } 1869 1870 /* ARGSUSED */ 1871 int 1872 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp) 1873 { 1874 return (ENOTSUP); 1875 } 1876 1877 /* ARGSUSED */ 1878 int 1879 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp) 1880 { 1881 return (ENOTSUP); 1882 } 1883 1884 /* 1885 * Routine to return a string identifying the physical name 1886 * associated with a memory/cache error. 1887 */ 1888 /* ARGSUSED */ 1889 int 1890 cpu_get_mem_unum(int synd_status, ushort_t synd, uint64_t afsr, 1891 uint64_t afar, int cpuid, int flt_in_memory, ushort_t flt_status, 1892 char *buf, int buflen, int *lenp) 1893 { 1894 short synd_code; 1895 int ret; 1896 1897 if (flt_in_memory) { 1898 synd_code = synd_to_synd_code(synd_status, synd); 1899 if (synd_code == -1) { 1900 ret = EINVAL; 1901 } else if (prom_get_unum(synd_code, P2ALIGN(afar, 8), 1902 buf, buflen, lenp) != 0) { 1903 ret = EIO; 1904 } else if (*lenp <= 1) { 1905 ret = EINVAL; 1906 } else { 1907 ret = 0; 1908 } 1909 } else { 1910 ret = ENOTSUP; 1911 } 1912 1913 if (ret != 0) { 1914 buf[0] = '\0'; 1915 *lenp = 0; 1916 } 1917 1918 return (ret); 1919 } 1920 1921 /* 1922 * Wrapper for cpu_get_mem_unum() routine that takes an 1923 * async_flt struct rather than explicit arguments. 1924 */ 1925 int 1926 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt, 1927 char *buf, int buflen, int *lenp) 1928 { 1929 return (cpu_get_mem_unum(synd_status, SYND(aflt->flt_synd), 1930 aflt->flt_stat, aflt->flt_addr, aflt->flt_bus_id, 1931 aflt->flt_in_memory, aflt->flt_status, buf, buflen, lenp)); 1932 } 1933 1934 /* 1935 * This routine is a more generic interface to cpu_get_mem_unum(), 1936 * that may be used by other modules (e.g. mm). 1937 */ 1938 int 1939 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar, 1940 char *buf, int buflen, int *lenp) 1941 { 1942 int synd_status, flt_in_memory, ret; 1943 char unum[UNUM_NAMLEN]; 1944 1945 /* 1946 * Check for an invalid address. 1947 */ 1948 if (afar == (uint64_t)-1) 1949 return (ENXIO); 1950 1951 if (synd == (uint64_t)-1) 1952 synd_status = AFLT_STAT_INVALID; 1953 else 1954 synd_status = AFLT_STAT_VALID; 1955 1956 flt_in_memory = (pf_is_memory(afar >> MMU_PAGESHIFT)) ? 1 : 0; 1957 1958 if ((ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar, 1959 CPU->cpu_id, flt_in_memory, 0, unum, UNUM_NAMLEN, lenp)) 1960 != 0) 1961 return (ret); 1962 1963 if (*lenp >= buflen) 1964 return (ENAMETOOLONG); 1965 1966 (void) strncpy(buf, unum, buflen); 1967 1968 return (0); 1969 } 1970 1971 /* 1972 * Routine to return memory information associated 1973 * with a physical address and syndrome. 1974 */ 1975 /* ARGSUSED */ 1976 int 1977 cpu_get_mem_info(uint64_t synd, uint64_t afar, 1978 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 1979 int *segsp, int *banksp, int *mcidp) 1980 { 1981 return (ENOTSUP); 1982 } 1983 1984 /* 1985 * Routine to return a string identifying the physical 1986 * name associated with a cpuid. 1987 */ 1988 /* ARGSUSED */ 1989 int 1990 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp) 1991 { 1992 return (ENOTSUP); 1993 } 1994 1995 /* 1996 * This routine returns the size of the kernel's FRU name buffer. 1997 */ 1998 size_t 1999 cpu_get_name_bufsize() 2000 { 2001 return (UNUM_NAMLEN); 2002 } 2003 2004 /* 2005 * Cpu specific log func for UEs. 2006 */ 2007 static void 2008 log_ue_err(struct async_flt *aflt, char *unum) 2009 { 2010 spitf_async_flt *spf_flt = (spitf_async_flt *)aflt; 2011 int len = 0; 2012 2013 #ifdef DEBUG 2014 int afsr_priv = (aflt->flt_stat & P_AFSR_PRIV) ? 1 : 0; 2015 2016 /* 2017 * Paranoid Check for priv mismatch 2018 * Only applicable for UEs 2019 */ 2020 if (afsr_priv != aflt->flt_priv) { 2021 /* 2022 * The priv bits in %tstate and %afsr did not match; we expect 2023 * this to be very rare, so flag it with a message. 2024 */ 2025 cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST, NULL, 2026 ": PRIV bit in TSTATE and AFSR mismatched; " 2027 "TSTATE.PRIV=%d used", (aflt->flt_priv) ? 1 : 0); 2028 2029 /* update saved afsr to reflect the correct priv */ 2030 aflt->flt_stat &= ~P_AFSR_PRIV; 2031 if (aflt->flt_priv) 2032 aflt->flt_stat |= P_AFSR_PRIV; 2033 } 2034 #endif /* DEBUG */ 2035 2036 (void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt, unum, 2037 UNUM_NAMLEN, &len); 2038 2039 cpu_aflt_log(CE_WARN, 1, spf_flt, UE_LFLAGS, unum, 2040 " Uncorrectable Memory Error on"); 2041 2042 if (SYND(aflt->flt_synd) == 0x3) { 2043 cpu_aflt_log(CE_WARN, 1, spf_flt, CPU_ERRID_FIRST, NULL, 2044 " Syndrome 0x3 indicates that this may not be a " 2045 "memory module problem"); 2046 } 2047 2048 if (aflt->flt_in_memory) 2049 cpu_log_ecmem_info(spf_flt); 2050 } 2051 2052 2053 /* 2054 * The cpu_async_log_err() function is called via the ue_drain() function to 2055 * handle logging for CPU events that are dequeued. As such, it can be invoked 2056 * from softint context, from AST processing in the trap() flow, or from the 2057 * panic flow. We decode the CPU-specific data, and log appropriate messages. 2058 */ 2059 static void 2060 cpu_async_log_err(void *flt) 2061 { 2062 spitf_async_flt *spf_flt = (spitf_async_flt *)flt; 2063 struct async_flt *aflt = (struct async_flt *)flt; 2064 char unum[UNUM_NAMLEN]; 2065 char *space; 2066 char *ecache_scrub_logstr = NULL; 2067 2068 switch (spf_flt->flt_type) { 2069 case CPU_UE_ERR: 2070 /* 2071 * We want to skip logging only if ALL the following 2072 * conditions are true: 2073 * 2074 * 1. We are not panicking 2075 * 2. There is only one error 2076 * 3. That error is a memory error 2077 * 4. The error is caused by the memory scrubber (in 2078 * which case the error will have occurred under 2079 * on_trap protection) 2080 * 5. The error is on a retired page 2081 * 2082 * Note 1: AFLT_PROT_EC is used places other than the memory 2083 * scrubber. However, none of those errors should occur 2084 * on a retired page. 2085 * 2086 * Note 2: In the CE case, these errors are discarded before 2087 * the errorq. In the UE case, we must wait until now -- 2088 * softcall() grabs a mutex, which we can't do at a high PIL. 2089 */ 2090 if (!panicstr && 2091 (aflt->flt_stat & S_AFSR_ALL_ERRS) == P_AFSR_UE && 2092 aflt->flt_prot == AFLT_PROT_EC) { 2093 if (page_retire_check(aflt->flt_addr, NULL) == 0) { 2094 /* Zero the address to clear the error */ 2095 softcall(ecc_page_zero, (void *)aflt->flt_addr); 2096 return; 2097 } 2098 } 2099 2100 /* 2101 * Log the UE and check for causes of this UE error that 2102 * don't cause a trap (Copyback error). cpu_async_error() 2103 * has already checked the i/o buses for us. 2104 */ 2105 log_ue_err(aflt, unum); 2106 if (aflt->flt_in_memory) 2107 cpu_check_allcpus(aflt); 2108 break; 2109 2110 case CPU_EDP_LDP_ERR: 2111 if (aflt->flt_stat & P_AFSR_EDP) 2112 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, 2113 NULL, " EDP event on"); 2114 2115 if (aflt->flt_stat & P_AFSR_LDP) 2116 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, 2117 NULL, " LDP event on"); 2118 2119 /* Log ecache info if exist */ 2120 if (spf_flt->flt_ec_lcnt > 0) { 2121 cpu_log_ecmem_info(spf_flt); 2122 2123 cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, 2124 NULL, " AFAR was derived from E$Tag"); 2125 } else { 2126 cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, 2127 NULL, " No error found in ecache (No fault " 2128 "PA available)"); 2129 } 2130 break; 2131 2132 case CPU_WP_ERR: 2133 /* 2134 * If the memscrub thread hasn't yet read 2135 * all of memory, as we requested in the 2136 * trap handler, then give it a kick to 2137 * make sure it does. 2138 */ 2139 if (!isus2i && !isus2e && read_all_memscrub) 2140 memscrub_run(); 2141 2142 cpu_aflt_log(CE_WARN, 1, spf_flt, WP_LFLAGS, NULL, 2143 " WP event on"); 2144 return; 2145 2146 case CPU_BTO_BERR_ERR: 2147 /* 2148 * A bus timeout or error occurred that was in user mode or not 2149 * in a protected kernel code region. 2150 */ 2151 if (aflt->flt_stat & P_AFSR_BERR) { 2152 cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2, 2153 spf_flt, BERRTO_LFLAGS, NULL, 2154 " Bus Error on System Bus in %s mode from", 2155 aflt->flt_priv ? "privileged" : "user"); 2156 } 2157 2158 if (aflt->flt_stat & P_AFSR_TO) { 2159 cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2, 2160 spf_flt, BERRTO_LFLAGS, NULL, 2161 " Timeout on System Bus in %s mode from", 2162 aflt->flt_priv ? "privileged" : "user"); 2163 } 2164 2165 return; 2166 2167 case CPU_PANIC_CP_ERR: 2168 /* 2169 * Process the Copyback (CP) error info (if any) obtained from 2170 * polling all the cpus in the panic flow. This case is only 2171 * entered if we are panicking. 2172 */ 2173 ASSERT(panicstr != NULL); 2174 ASSERT(aflt->flt_id == panic_aflt.flt_id); 2175 2176 /* See which space - this info may not exist */ 2177 if (panic_aflt.flt_status & ECC_D_TRAP) 2178 space = "Data "; 2179 else if (panic_aflt.flt_status & ECC_I_TRAP) 2180 space = "Instruction "; 2181 else 2182 space = ""; 2183 2184 cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL, 2185 " AFAR was derived from UE report," 2186 " CP event on CPU%d (caused %saccess error on %s%d)", 2187 aflt->flt_inst, space, (panic_aflt.flt_status & ECC_IOBUS) ? 2188 "IOBUS" : "CPU", panic_aflt.flt_bus_id); 2189 2190 if (spf_flt->flt_ec_lcnt > 0) 2191 cpu_log_ecmem_info(spf_flt); 2192 else 2193 cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST, 2194 NULL, " No cache dump available"); 2195 2196 return; 2197 2198 case CPU_TRAPPING_CP_ERR: 2199 /* 2200 * For sabre only. This is a copyback ecache parity error due 2201 * to a PCI DMA read. We should be panicking if we get here. 2202 */ 2203 ASSERT(panicstr != NULL); 2204 cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL, 2205 " AFAR was derived from UE report," 2206 " CP event on CPU%d (caused Data access error " 2207 "on PCIBus)", aflt->flt_inst); 2208 return; 2209 2210 /* 2211 * We log the ecache lines of the following states, 2212 * clean_bad_idle, clean_bad_busy, dirty_bad_idle and 2213 * dirty_bad_busy if ecache_scrub_verbose is set and panic 2214 * in addition to logging if ecache_scrub_panic is set. 2215 */ 2216 case CPU_BADLINE_CI_ERR: 2217 ecache_scrub_logstr = "CBI"; 2218 /* FALLTHRU */ 2219 2220 case CPU_BADLINE_CB_ERR: 2221 if (ecache_scrub_logstr == NULL) 2222 ecache_scrub_logstr = "CBB"; 2223 /* FALLTHRU */ 2224 2225 case CPU_BADLINE_DI_ERR: 2226 if (ecache_scrub_logstr == NULL) 2227 ecache_scrub_logstr = "DBI"; 2228 /* FALLTHRU */ 2229 2230 case CPU_BADLINE_DB_ERR: 2231 if (ecache_scrub_logstr == NULL) 2232 ecache_scrub_logstr = "DBB"; 2233 2234 cpu_aflt_log(CE_NOTE, 2, spf_flt, 2235 (CPU_ERRID_FIRST | CPU_FLTCPU), NULL, 2236 " %s event on", ecache_scrub_logstr); 2237 cpu_log_ecmem_info(spf_flt); 2238 2239 return; 2240 2241 case CPU_ORPHAN_CP_ERR: 2242 /* 2243 * Orphan CPs, where the CP bit is set, but when a CPU 2244 * doesn't report a UE. 2245 */ 2246 if (read_all_memscrub) 2247 memscrub_run(); 2248 2249 cpu_aflt_log(CE_NOTE, 2, spf_flt, (CP_LFLAGS | CPU_FLTCPU), 2250 NULL, " Orphan CP event on"); 2251 2252 /* Log ecache info if exist */ 2253 if (spf_flt->flt_ec_lcnt > 0) 2254 cpu_log_ecmem_info(spf_flt); 2255 else 2256 cpu_aflt_log(CE_NOTE, 2, spf_flt, 2257 (CP_LFLAGS | CPU_FLTCPU), NULL, 2258 " No error found in ecache (No fault " 2259 "PA available"); 2260 return; 2261 2262 case CPU_ECACHE_ADDR_PAR_ERR: 2263 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 2264 " E$ Tag Address Parity error on"); 2265 cpu_log_ecmem_info(spf_flt); 2266 return; 2267 2268 case CPU_ECACHE_STATE_ERR: 2269 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 2270 " E$ Tag State Parity error on"); 2271 cpu_log_ecmem_info(spf_flt); 2272 return; 2273 2274 case CPU_ECACHE_TAG_ERR: 2275 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 2276 " E$ Tag scrub event on"); 2277 cpu_log_ecmem_info(spf_flt); 2278 return; 2279 2280 case CPU_ECACHE_ETP_ETS_ERR: 2281 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 2282 " AFSR.ETP is set and AFSR.ETS is zero on"); 2283 cpu_log_ecmem_info(spf_flt); 2284 return; 2285 2286 2287 case CPU_ADDITIONAL_ERR: 2288 cpu_aflt_log(CE_WARN, 1, spf_flt, CMN_LFLAGS & ~CPU_SPACE, NULL, 2289 " Additional errors detected during error processing on"); 2290 return; 2291 2292 default: 2293 cmn_err(CE_WARN, "cpu_async_log_err: fault %p has unknown " 2294 "fault type %x", (void *)spf_flt, spf_flt->flt_type); 2295 return; 2296 } 2297 2298 /* ... fall through from the UE, EDP, or LDP cases */ 2299 2300 if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) { 2301 if (!panicstr) { 2302 (void) page_retire(aflt->flt_addr, PR_UE); 2303 } else { 2304 /* 2305 * Clear UEs on panic so that we don't 2306 * get haunted by them during panic or 2307 * after reboot 2308 */ 2309 clearphys(P2ALIGN(aflt->flt_addr, 64), 2310 cpunodes[CPU->cpu_id].ecache_size, 2311 cpunodes[CPU->cpu_id].ecache_linesize); 2312 2313 (void) clear_errors(NULL, NULL); 2314 } 2315 } 2316 2317 /* 2318 * Log final recover message 2319 */ 2320 if (!panicstr) { 2321 if (!aflt->flt_priv) { 2322 cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, 2323 NULL, " Above Error is in User Mode" 2324 "\n and is fatal: " 2325 "will SIGKILL process and notify contract"); 2326 } else if (aflt->flt_prot == AFLT_PROT_COPY && aflt->flt_core) { 2327 cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, 2328 NULL, " Above Error detected while dumping core;" 2329 "\n core file will be truncated"); 2330 } else if (aflt->flt_prot == AFLT_PROT_COPY) { 2331 cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, 2332 NULL, " Above Error is due to Kernel access" 2333 "\n to User space and is fatal: " 2334 "will SIGKILL process and notify contract"); 2335 } else if (aflt->flt_prot == AFLT_PROT_EC) { 2336 cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, NULL, 2337 " Above Error detected by protected Kernel code" 2338 "\n that will try to clear error from system"); 2339 } 2340 } 2341 } 2342 2343 2344 /* 2345 * Check all cpus for non-trapping UE-causing errors 2346 * In Ultra I/II, we look for copyback errors (CPs) 2347 */ 2348 void 2349 cpu_check_allcpus(struct async_flt *aflt) 2350 { 2351 spitf_async_flt cp; 2352 spitf_async_flt *spf_cpflt = &cp; 2353 struct async_flt *cpflt = (struct async_flt *)&cp; 2354 int pix; 2355 2356 cpflt->flt_id = aflt->flt_id; 2357 cpflt->flt_addr = aflt->flt_addr; 2358 2359 for (pix = 0; pix < NCPU; pix++) { 2360 if (CPU_XCALL_READY(pix)) { 2361 xc_one(pix, (xcfunc_t *)get_cpu_status, 2362 (uint64_t)cpflt, 0); 2363 2364 if (cpflt->flt_stat & P_AFSR_CP) { 2365 char *space; 2366 2367 /* See which space - this info may not exist */ 2368 if (aflt->flt_status & ECC_D_TRAP) 2369 space = "Data "; 2370 else if (aflt->flt_status & ECC_I_TRAP) 2371 space = "Instruction "; 2372 else 2373 space = ""; 2374 2375 cpu_aflt_log(CE_WARN, 1, spf_cpflt, CP_LFLAGS, 2376 NULL, " AFAR was derived from UE report," 2377 " CP event on CPU%d (caused %saccess " 2378 "error on %s%d)", pix, space, 2379 (aflt->flt_status & ECC_IOBUS) ? 2380 "IOBUS" : "CPU", aflt->flt_bus_id); 2381 2382 if (spf_cpflt->flt_ec_lcnt > 0) 2383 cpu_log_ecmem_info(spf_cpflt); 2384 else 2385 cpu_aflt_log(CE_WARN, 2, spf_cpflt, 2386 CPU_ERRID_FIRST, NULL, 2387 " No cache dump available"); 2388 } 2389 } 2390 } 2391 } 2392 2393 #ifdef DEBUG 2394 int test_mp_cp = 0; 2395 #endif 2396 2397 /* 2398 * Cross-call callback routine to tell a CPU to read its own %afsr to check 2399 * for copyback errors and capture relevant information. 2400 */ 2401 static uint_t 2402 get_cpu_status(uint64_t arg) 2403 { 2404 struct async_flt *aflt = (struct async_flt *)arg; 2405 spitf_async_flt *spf_flt = (spitf_async_flt *)arg; 2406 uint64_t afsr; 2407 uint32_t ec_idx; 2408 uint64_t sdbh, sdbl; 2409 int i; 2410 uint32_t ec_set_size; 2411 uchar_t valid; 2412 ec_data_t ec_data[8]; 2413 uint64_t ec_tag, flt_addr_tag, oafsr; 2414 uint64_t *acc_afsr = NULL; 2415 2416 get_asyncflt(&afsr); 2417 if (CPU_PRIVATE(CPU) != NULL) { 2418 acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 2419 afsr |= *acc_afsr; 2420 *acc_afsr = 0; 2421 } 2422 2423 #ifdef DEBUG 2424 if (test_mp_cp) 2425 afsr |= P_AFSR_CP; 2426 #endif 2427 aflt->flt_stat = afsr; 2428 2429 if (afsr & P_AFSR_CP) { 2430 /* 2431 * Capture the UDBs 2432 */ 2433 get_udb_errors(&sdbh, &sdbl); 2434 spf_flt->flt_sdbh = (ushort_t)(sdbh & 0x3FF); 2435 spf_flt->flt_sdbl = (ushort_t)(sdbl & 0x3FF); 2436 2437 /* 2438 * Clear CP bit before capturing ecache data 2439 * and AFSR info. 2440 */ 2441 set_asyncflt(P_AFSR_CP); 2442 2443 /* 2444 * See if we can capture the ecache line for the 2445 * fault PA. 2446 * 2447 * Return a valid matching ecache line, if any. 2448 * Otherwise, return the first matching ecache 2449 * line marked invalid. 2450 */ 2451 flt_addr_tag = aflt->flt_addr >> cpu_ec_tag_shift; 2452 ec_set_size = cpunodes[CPU->cpu_id].ecache_size / 2453 ecache_associativity; 2454 spf_flt->flt_ec_lcnt = 0; 2455 2456 for (i = 0, ec_idx = (aflt->flt_addr % ec_set_size); 2457 i < ecache_associativity; i++, ec_idx += ec_set_size) { 2458 get_ecache_dtag(P2ALIGN(ec_idx, 64), 2459 (uint64_t *)&ec_data[0], &ec_tag, &oafsr, 2460 acc_afsr); 2461 2462 if ((ec_tag & cpu_ec_tag_mask) != flt_addr_tag) 2463 continue; 2464 2465 valid = cpu_ec_state_valid & 2466 (uchar_t)((ec_tag & cpu_ec_state_mask) >> 2467 cpu_ec_state_shift); 2468 2469 if (valid || spf_flt->flt_ec_lcnt == 0) { 2470 spf_flt->flt_ec_tag = ec_tag; 2471 bcopy(&ec_data, &spf_flt->flt_ec_data, 2472 sizeof (ec_data)); 2473 spf_flt->flt_ec_lcnt = 1; 2474 2475 if (valid) 2476 break; 2477 } 2478 } 2479 } 2480 return (0); 2481 } 2482 2483 /* 2484 * CPU-module callback for the non-panicking CPUs. This routine is invoked 2485 * from panic_idle() as part of the other CPUs stopping themselves when a 2486 * panic occurs. We need to be VERY careful what we do here, since panicstr 2487 * is NOT set yet and we cannot blow through locks. If panic_aflt is set 2488 * (panic_aflt.flt_id is non-zero), we need to read our %afsr to look for 2489 * CP error information. 2490 */ 2491 void 2492 cpu_async_panic_callb(void) 2493 { 2494 spitf_async_flt cp; 2495 struct async_flt *aflt = (struct async_flt *)&cp; 2496 uint64_t *scrub_afsr; 2497 2498 if (panic_aflt.flt_id != 0) { 2499 aflt->flt_addr = panic_aflt.flt_addr; 2500 (void) get_cpu_status((uint64_t)aflt); 2501 2502 if (CPU_PRIVATE(CPU) != NULL) { 2503 scrub_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 2504 if (*scrub_afsr & P_AFSR_CP) { 2505 aflt->flt_stat |= *scrub_afsr; 2506 *scrub_afsr = 0; 2507 } 2508 } 2509 if (aflt->flt_stat & P_AFSR_CP) { 2510 aflt->flt_id = panic_aflt.flt_id; 2511 aflt->flt_panic = 1; 2512 aflt->flt_inst = CPU->cpu_id; 2513 aflt->flt_class = CPU_FAULT; 2514 cp.flt_type = CPU_PANIC_CP_ERR; 2515 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP, 2516 (void *)&cp, sizeof (cp), ue_queue, 2517 aflt->flt_panic); 2518 } 2519 } 2520 } 2521 2522 /* 2523 * Turn off all cpu error detection, normally only used for panics. 2524 */ 2525 void 2526 cpu_disable_errors(void) 2527 { 2528 xt_all(set_error_enable_tl1, EER_DISABLE, EER_SET_ABSOLUTE); 2529 } 2530 2531 /* 2532 * Enable errors. 2533 */ 2534 void 2535 cpu_enable_errors(void) 2536 { 2537 xt_all(set_error_enable_tl1, EER_ENABLE, EER_SET_ABSOLUTE); 2538 } 2539 2540 static void 2541 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err) 2542 { 2543 uint64_t aligned_addr = P2ALIGN(ecc->flt_addr, 8); 2544 int i, loop = 1; 2545 ushort_t ecc_0; 2546 uint64_t paddr; 2547 uint64_t data; 2548 2549 if (verbose) 2550 loop = 8; 2551 for (i = 0; i < loop; i++) { 2552 paddr = aligned_addr + (i * 8); 2553 data = lddphys(paddr); 2554 if (verbose) { 2555 if (ce_err) { 2556 ecc_0 = ecc_gen((uint32_t)(data>>32), 2557 (uint32_t)data); 2558 cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS, 2559 NULL, " Paddr 0x%" PRIx64 ", " 2560 "Data 0x%08x.%08x, ECC 0x%x", paddr, 2561 (uint32_t)(data>>32), (uint32_t)data, 2562 ecc_0); 2563 } else { 2564 cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS, 2565 NULL, " Paddr 0x%" PRIx64 ", " 2566 "Data 0x%08x.%08x", paddr, 2567 (uint32_t)(data>>32), (uint32_t)data); 2568 } 2569 } 2570 } 2571 } 2572 2573 static struct { /* sec-ded-s4ed ecc code */ 2574 uint_t hi, lo; 2575 } ecc_code[8] = { 2576 { 0xee55de23U, 0x16161161U }, 2577 { 0x55eede93U, 0x61612212U }, 2578 { 0xbb557b8cU, 0x49494494U }, 2579 { 0x55bb7b6cU, 0x94948848U }, 2580 { 0x16161161U, 0xee55de23U }, 2581 { 0x61612212U, 0x55eede93U }, 2582 { 0x49494494U, 0xbb557b8cU }, 2583 { 0x94948848U, 0x55bb7b6cU } 2584 }; 2585 2586 static ushort_t 2587 ecc_gen(uint_t high_bytes, uint_t low_bytes) 2588 { 2589 int i, j; 2590 uchar_t checker, bit_mask; 2591 struct { 2592 uint_t hi, lo; 2593 } hex_data, masked_data[8]; 2594 2595 hex_data.hi = high_bytes; 2596 hex_data.lo = low_bytes; 2597 2598 /* mask out bits according to sec-ded-s4ed ecc code */ 2599 for (i = 0; i < 8; i++) { 2600 masked_data[i].hi = hex_data.hi & ecc_code[i].hi; 2601 masked_data[i].lo = hex_data.lo & ecc_code[i].lo; 2602 } 2603 2604 /* 2605 * xor all bits in masked_data[i] to get bit_i of checker, 2606 * where i = 0 to 7 2607 */ 2608 checker = 0; 2609 for (i = 0; i < 8; i++) { 2610 bit_mask = 1 << i; 2611 for (j = 0; j < 32; j++) { 2612 if (masked_data[i].lo & 1) checker ^= bit_mask; 2613 if (masked_data[i].hi & 1) checker ^= bit_mask; 2614 masked_data[i].hi >>= 1; 2615 masked_data[i].lo >>= 1; 2616 } 2617 } 2618 return (checker); 2619 } 2620 2621 /* 2622 * Flush the entire ecache using displacement flush by reading through a 2623 * physical address range as large as the ecache. 2624 */ 2625 void 2626 cpu_flush_ecache(void) 2627 { 2628 flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2, 2629 cpunodes[CPU->cpu_id].ecache_linesize); 2630 } 2631 2632 /* 2633 * read and display the data in the cache line where the 2634 * original ce error occurred. 2635 * This routine is mainly used for debugging new hardware. 2636 */ 2637 void 2638 read_ecc_data(struct async_flt *ecc, short verbose, short ce_err) 2639 { 2640 kpreempt_disable(); 2641 /* disable ECC error traps */ 2642 set_error_enable(EER_ECC_DISABLE); 2643 2644 /* 2645 * flush the ecache 2646 * read the data 2647 * check to see if an ECC error occured 2648 */ 2649 flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2, 2650 cpunodes[CPU->cpu_id].ecache_linesize); 2651 set_lsu(get_lsu() | cache_boot_state); 2652 cpu_read_paddr(ecc, verbose, ce_err); 2653 (void) check_ecc(ecc); 2654 2655 /* enable ECC error traps */ 2656 set_error_enable(EER_ENABLE); 2657 kpreempt_enable(); 2658 } 2659 2660 /* 2661 * Check the AFSR bits for UE/CE persistence. 2662 * If UE or CE errors are detected, the routine will 2663 * clears all the AFSR sticky bits (except CP for 2664 * spitfire/blackbird) and the UDBs. 2665 * if ce_debug or ue_debug is set, log any ue/ce errors detected. 2666 */ 2667 static int 2668 check_ecc(struct async_flt *ecc) 2669 { 2670 uint64_t t_afsr; 2671 uint64_t t_afar; 2672 uint64_t udbh; 2673 uint64_t udbl; 2674 ushort_t udb; 2675 int persistent = 0; 2676 2677 /* 2678 * Capture the AFSR, AFAR and UDBs info 2679 */ 2680 get_asyncflt(&t_afsr); 2681 get_asyncaddr(&t_afar); 2682 t_afar &= SABRE_AFAR_PA; 2683 get_udb_errors(&udbh, &udbl); 2684 2685 if ((t_afsr & P_AFSR_UE) || (t_afsr & P_AFSR_CE)) { 2686 /* 2687 * Clear the errors 2688 */ 2689 clr_datapath(); 2690 2691 if (isus2i || isus2e) 2692 set_asyncflt(t_afsr); 2693 else 2694 set_asyncflt(t_afsr & ~P_AFSR_CP); 2695 2696 /* 2697 * determine whether to check UDBH or UDBL for persistence 2698 */ 2699 if (ecc->flt_synd & UDBL_REG) { 2700 udb = (ushort_t)udbl; 2701 t_afar |= 0x8; 2702 } else { 2703 udb = (ushort_t)udbh; 2704 } 2705 2706 if (ce_debug || ue_debug) { 2707 spitf_async_flt spf_flt; /* for logging */ 2708 struct async_flt *aflt = 2709 (struct async_flt *)&spf_flt; 2710 2711 /* Package the info nicely in the spf_flt struct */ 2712 bzero(&spf_flt, sizeof (spitf_async_flt)); 2713 aflt->flt_stat = t_afsr; 2714 aflt->flt_addr = t_afar; 2715 spf_flt.flt_sdbh = (ushort_t)(udbh & 0x3FF); 2716 spf_flt.flt_sdbl = (ushort_t)(udbl & 0x3FF); 2717 2718 cpu_aflt_log(CE_CONT, 0, &spf_flt, (CPU_AFSR | 2719 CPU_AFAR | CPU_UDBH | CPU_UDBL), NULL, 2720 " check_ecc: Dumping captured error states ..."); 2721 } 2722 2723 /* 2724 * if the fault addresses don't match, not persistent 2725 */ 2726 if (t_afar != ecc->flt_addr) { 2727 return (persistent); 2728 } 2729 2730 /* 2731 * check for UE persistence 2732 * since all DIMMs in the bank are identified for a UE, 2733 * there's no reason to check the syndrome 2734 */ 2735 if ((ecc->flt_stat & P_AFSR_UE) && (t_afsr & P_AFSR_UE)) { 2736 persistent = 1; 2737 } 2738 2739 /* 2740 * check for CE persistence 2741 */ 2742 if ((ecc->flt_stat & P_AFSR_CE) && (t_afsr & P_AFSR_CE)) { 2743 if ((udb & P_DER_E_SYND) == 2744 (ecc->flt_synd & P_DER_E_SYND)) { 2745 persistent = 1; 2746 } 2747 } 2748 } 2749 return (persistent); 2750 } 2751 2752 #ifdef HUMMINGBIRD 2753 #define HB_FULL_DIV 1 2754 #define HB_HALF_DIV 2 2755 #define HB_LOWEST_DIV 8 2756 #define HB_ECLK_INVALID 0xdeadbad 2757 static uint64_t hb_eclk[HB_LOWEST_DIV + 1] = { 2758 HB_ECLK_INVALID, HB_ECLK_1, HB_ECLK_2, HB_ECLK_INVALID, 2759 HB_ECLK_4, HB_ECLK_INVALID, HB_ECLK_6, HB_ECLK_INVALID, 2760 HB_ECLK_8 }; 2761 2762 #define HB_SLOW_DOWN 0 2763 #define HB_SPEED_UP 1 2764 2765 #define SET_ESTAR_MODE(mode) \ 2766 stdphysio(HB_ESTAR_MODE, (mode)); \ 2767 /* \ 2768 * PLL logic requires minimum of 16 clock \ 2769 * cycles to lock to the new clock speed. \ 2770 * Wait 1 usec to satisfy this requirement. \ 2771 */ \ 2772 drv_usecwait(1); 2773 2774 #define CHANGE_REFRESH_COUNT(direction, cur_div, new_div) \ 2775 { \ 2776 volatile uint64_t data; \ 2777 uint64_t count, new_count; \ 2778 clock_t delay; \ 2779 data = lddphysio(HB_MEM_CNTRL0); \ 2780 count = (data & HB_REFRESH_COUNT_MASK) >> \ 2781 HB_REFRESH_COUNT_SHIFT; \ 2782 new_count = (HB_REFRESH_INTERVAL * \ 2783 cpunodes[CPU->cpu_id].clock_freq) / \ 2784 (HB_REFRESH_CLOCKS_PER_COUNT * (new_div) * NANOSEC);\ 2785 data = (data & ~HB_REFRESH_COUNT_MASK) | \ 2786 (new_count << HB_REFRESH_COUNT_SHIFT); \ 2787 stdphysio(HB_MEM_CNTRL0, data); \ 2788 data = lddphysio(HB_MEM_CNTRL0); \ 2789 /* \ 2790 * If we are slowing down the cpu and Memory \ 2791 * Self Refresh is not enabled, it is required \ 2792 * to wait for old refresh count to count-down and \ 2793 * new refresh count to go into effect (let new value \ 2794 * counts down once). \ 2795 */ \ 2796 if ((direction) == HB_SLOW_DOWN && \ 2797 (data & HB_SELF_REFRESH_MASK) == 0) { \ 2798 /* \ 2799 * Each count takes 64 cpu clock cycles \ 2800 * to decrement. Wait for current refresh \ 2801 * count plus new refresh count at current \ 2802 * cpu speed to count down to zero. Round \ 2803 * up the delay time. \ 2804 */ \ 2805 delay = ((HB_REFRESH_CLOCKS_PER_COUNT * \ 2806 (count + new_count) * MICROSEC * (cur_div)) /\ 2807 cpunodes[CPU->cpu_id].clock_freq) + 1; \ 2808 drv_usecwait(delay); \ 2809 } \ 2810 } 2811 2812 #define SET_SELF_REFRESH(bit) \ 2813 { \ 2814 volatile uint64_t data; \ 2815 data = lddphysio(HB_MEM_CNTRL0); \ 2816 data = (data & ~HB_SELF_REFRESH_MASK) | \ 2817 ((bit) << HB_SELF_REFRESH_SHIFT); \ 2818 stdphysio(HB_MEM_CNTRL0, data); \ 2819 data = lddphysio(HB_MEM_CNTRL0); \ 2820 } 2821 #endif /* HUMMINGBIRD */ 2822 2823 /* ARGSUSED */ 2824 void 2825 cpu_change_speed(uint64_t new_divisor, uint64_t arg2) 2826 { 2827 #ifdef HUMMINGBIRD 2828 uint64_t cur_mask, cur_divisor = 0; 2829 volatile uint64_t reg; 2830 processor_info_t *pi = &(CPU->cpu_type_info); 2831 int index; 2832 2833 if ((new_divisor < HB_FULL_DIV || new_divisor > HB_LOWEST_DIV) || 2834 (hb_eclk[new_divisor] == HB_ECLK_INVALID)) { 2835 cmn_err(CE_WARN, "cpu_change_speed: bad divisor 0x%lx", 2836 new_divisor); 2837 return; 2838 } 2839 2840 reg = lddphysio(HB_ESTAR_MODE); 2841 cur_mask = reg & HB_ECLK_MASK; 2842 for (index = HB_FULL_DIV; index <= HB_LOWEST_DIV; index++) { 2843 if (hb_eclk[index] == cur_mask) { 2844 cur_divisor = index; 2845 break; 2846 } 2847 } 2848 2849 if (cur_divisor == 0) 2850 cmn_err(CE_PANIC, "cpu_change_speed: current divisor " 2851 "can't be determined!"); 2852 2853 /* 2854 * If we are already at the requested divisor speed, just 2855 * return. 2856 */ 2857 if (cur_divisor == new_divisor) 2858 return; 2859 2860 if (cur_divisor == HB_FULL_DIV && new_divisor == HB_HALF_DIV) { 2861 CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor); 2862 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2863 SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE); 2864 2865 } else if (cur_divisor == HB_HALF_DIV && new_divisor == HB_FULL_DIV) { 2866 SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE); 2867 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2868 /* LINTED: E_FALSE_LOGICAL_EXPR */ 2869 CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor); 2870 2871 } else if (cur_divisor == HB_FULL_DIV && new_divisor > HB_HALF_DIV) { 2872 /* 2873 * Transition to 1/2 speed first, then to 2874 * lower speed. 2875 */ 2876 CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, HB_HALF_DIV); 2877 SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]); 2878 SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE); 2879 2880 CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, HB_HALF_DIV, new_divisor); 2881 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2882 2883 } else if (cur_divisor > HB_HALF_DIV && new_divisor == HB_FULL_DIV) { 2884 /* 2885 * Transition to 1/2 speed first, then to 2886 * full speed. 2887 */ 2888 SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]); 2889 /* LINTED: E_FALSE_LOGICAL_EXPR */ 2890 CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, HB_HALF_DIV); 2891 2892 SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE); 2893 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2894 /* LINTED: E_FALSE_LOGICAL_EXPR */ 2895 CHANGE_REFRESH_COUNT(HB_SPEED_UP, HB_HALF_DIV, new_divisor); 2896 2897 } else if (cur_divisor < new_divisor) { 2898 CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor); 2899 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2900 2901 } else if (cur_divisor > new_divisor) { 2902 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2903 /* LINTED: E_FALSE_LOGICAL_EXPR */ 2904 CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor); 2905 } 2906 CPU->cpu_m.divisor = (uchar_t)new_divisor; 2907 CPU->cpu_curr_clock = 2908 (((uint64_t)pi->pi_clock * 1000000) / new_divisor); 2909 #endif 2910 } 2911 2912 /* 2913 * Clear the AFSR sticky bits and the UDBs. For Sabre/Spitfire/Blackbird, 2914 * we clear all the sticky bits. If a non-null pointer to a async fault 2915 * structure argument is passed in, the captured error state (AFSR, AFAR, UDBs) 2916 * info will be returned in the structure. If a non-null pointer to a 2917 * uint64_t is passed in, this will be updated if the CP bit is set in the 2918 * AFSR. The afsr will be returned. 2919 */ 2920 static uint64_t 2921 clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr) 2922 { 2923 struct async_flt *aflt = (struct async_flt *)spf_flt; 2924 uint64_t afsr; 2925 uint64_t udbh, udbl; 2926 2927 get_asyncflt(&afsr); 2928 2929 if ((acc_afsr != NULL) && (afsr & P_AFSR_CP)) 2930 *acc_afsr |= afsr; 2931 2932 if (spf_flt != NULL) { 2933 aflt->flt_stat = afsr; 2934 get_asyncaddr(&aflt->flt_addr); 2935 aflt->flt_addr &= SABRE_AFAR_PA; 2936 2937 get_udb_errors(&udbh, &udbl); 2938 spf_flt->flt_sdbh = (ushort_t)(udbh & 0x3FF); 2939 spf_flt->flt_sdbl = (ushort_t)(udbl & 0x3FF); 2940 } 2941 2942 set_asyncflt(afsr); /* clear afsr */ 2943 clr_datapath(); /* clear udbs */ 2944 return (afsr); 2945 } 2946 2947 /* 2948 * Scan the ecache to look for bad lines. If found, the afsr, afar, e$ data 2949 * tag of the first bad line will be returned. We also return the old-afsr 2950 * (before clearing the sticky bits). The linecnt data will be updated to 2951 * indicate the number of bad lines detected. 2952 */ 2953 static void 2954 scan_ecache(uint64_t *t_afar, ec_data_t *ecache_data, 2955 uint64_t *ecache_tag, int *linecnt, uint64_t *t_afsr) 2956 { 2957 ec_data_t t_ecdata[8]; 2958 uint64_t t_etag, oafsr; 2959 uint64_t pa = AFLT_INV_ADDR; 2960 uint32_t i, j, ecache_sz; 2961 uint64_t acc_afsr = 0; 2962 uint64_t *cpu_afsr = NULL; 2963 2964 if (CPU_PRIVATE(CPU) != NULL) 2965 cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 2966 2967 *linecnt = 0; 2968 ecache_sz = cpunodes[CPU->cpu_id].ecache_size; 2969 2970 for (i = 0; i < ecache_sz; i += 64) { 2971 get_ecache_dtag(i, (uint64_t *)&t_ecdata[0], &t_etag, &oafsr, 2972 cpu_afsr); 2973 acc_afsr |= oafsr; 2974 2975 /* 2976 * Scan through the whole 64 bytes line in 8 8-byte chunks 2977 * looking for the first occurrence of an EDP error. The AFSR 2978 * info is captured for each 8-byte chunk. Note that for 2979 * Spitfire/Blackbird, the AFSR.PSYND is captured by h/w in 2980 * 16-byte chunk granularity (i.e. the AFSR will be the same 2981 * for the high and low 8-byte words within the 16-byte chunk). 2982 * For Sabre/Hummingbird, the AFSR.PSYND is captured in 8-byte 2983 * granularity and only PSYND bits [7:0] are used. 2984 */ 2985 for (j = 0; j < 8; j++) { 2986 ec_data_t *ecdptr = &t_ecdata[j]; 2987 2988 if (ecdptr->ec_afsr & P_AFSR_EDP) { 2989 uint64_t errpa; 2990 ushort_t psynd; 2991 uint32_t ec_set_size = ecache_sz / 2992 ecache_associativity; 2993 2994 /* 2995 * For Spitfire/Blackbird, we need to look at 2996 * the PSYND to make sure that this 8-byte chunk 2997 * is the right one. PSYND bits [15:8] belong 2998 * to the upper 8-byte (even) chunk. Bits 2999 * [7:0] belong to the lower 8-byte chunk (odd). 3000 */ 3001 psynd = ecdptr->ec_afsr & P_AFSR_P_SYND; 3002 if (!isus2i && !isus2e) { 3003 if (j & 0x1) 3004 psynd = psynd & 0xFF; 3005 else 3006 psynd = psynd >> 8; 3007 3008 if (!psynd) 3009 continue; /* wrong chunk */ 3010 } 3011 3012 /* Construct the PA */ 3013 errpa = ((t_etag & cpu_ec_tag_mask) << 3014 cpu_ec_tag_shift) | ((i | (j << 3)) % 3015 ec_set_size); 3016 3017 /* clean up the cache line */ 3018 flushecacheline(P2ALIGN(errpa, 64), 3019 cpunodes[CPU->cpu_id].ecache_size); 3020 3021 oafsr = clear_errors(NULL, cpu_afsr); 3022 acc_afsr |= oafsr; 3023 3024 (*linecnt)++; 3025 3026 /* 3027 * Capture the PA for the first bad line found. 3028 * Return the ecache dump and tag info. 3029 */ 3030 if (pa == AFLT_INV_ADDR) { 3031 int k; 3032 3033 pa = errpa; 3034 for (k = 0; k < 8; k++) 3035 ecache_data[k] = t_ecdata[k]; 3036 *ecache_tag = t_etag; 3037 } 3038 break; 3039 } 3040 } 3041 } 3042 *t_afar = pa; 3043 *t_afsr = acc_afsr; 3044 } 3045 3046 static void 3047 cpu_log_ecmem_info(spitf_async_flt *spf_flt) 3048 { 3049 struct async_flt *aflt = (struct async_flt *)spf_flt; 3050 uint64_t ecache_tag = spf_flt->flt_ec_tag; 3051 char linestr[30]; 3052 char *state_str; 3053 int i; 3054 3055 /* 3056 * Check the ecache tag to make sure it 3057 * is valid. If invalid, a memory dump was 3058 * captured instead of a ecache dump. 3059 */ 3060 if (spf_flt->flt_ec_tag != AFLT_INV_ADDR) { 3061 uchar_t eparity = (uchar_t) 3062 ((ecache_tag & cpu_ec_par_mask) >> cpu_ec_par_shift); 3063 3064 uchar_t estate = (uchar_t) 3065 ((ecache_tag & cpu_ec_state_mask) >> cpu_ec_state_shift); 3066 3067 if (estate == cpu_ec_state_shr) 3068 state_str = "Shared"; 3069 else if (estate == cpu_ec_state_exl) 3070 state_str = "Exclusive"; 3071 else if (estate == cpu_ec_state_own) 3072 state_str = "Owner"; 3073 else if (estate == cpu_ec_state_mod) 3074 state_str = "Modified"; 3075 else 3076 state_str = "Invalid"; 3077 3078 if (spf_flt->flt_ec_lcnt > 1) { 3079 (void) snprintf(linestr, sizeof (linestr), 3080 "Badlines found=%d", spf_flt->flt_ec_lcnt); 3081 } else { 3082 linestr[0] = '\0'; 3083 } 3084 3085 cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL, 3086 " PA=0x%08x.%08x\n E$tag 0x%08x.%08x E$State: %s " 3087 "E$parity 0x%02x %s", (uint32_t)(aflt->flt_addr >> 32), 3088 (uint32_t)aflt->flt_addr, (uint32_t)(ecache_tag >> 32), 3089 (uint32_t)ecache_tag, state_str, 3090 (uint32_t)eparity, linestr); 3091 } else { 3092 cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL, 3093 " E$tag != PA from AFAR; E$line was victimized" 3094 "\n dumping memory from PA 0x%08x.%08x instead", 3095 (uint32_t)(P2ALIGN(aflt->flt_addr, 64) >> 32), 3096 (uint32_t)P2ALIGN(aflt->flt_addr, 64)); 3097 } 3098 3099 /* 3100 * Dump out all 8 8-byte ecache data captured 3101 * For each 8-byte data captured, we check the 3102 * captured afsr's parity syndrome to find out 3103 * which 8-byte chunk is bad. For memory dump, the 3104 * AFSR values were initialized to 0. 3105 */ 3106 for (i = 0; i < 8; i++) { 3107 ec_data_t *ecdptr; 3108 uint_t offset; 3109 ushort_t psynd; 3110 ushort_t bad; 3111 uint64_t edp; 3112 3113 offset = i << 3; /* multiply by 8 */ 3114 ecdptr = &spf_flt->flt_ec_data[i]; 3115 psynd = ecdptr->ec_afsr & P_AFSR_P_SYND; 3116 edp = ecdptr->ec_afsr & P_AFSR_EDP; 3117 3118 /* 3119 * For Sabre/Hummingbird, parity synd is captured only 3120 * in [7:0] of AFSR.PSYND for each 8-byte chunk. 3121 * For spitfire/blackbird, AFSR.PSYND is captured 3122 * in 16-byte granularity. [15:8] represent 3123 * the upper 8 byte and [7:0] the lower 8 byte. 3124 */ 3125 if (isus2i || isus2e || (i & 0x1)) 3126 bad = (psynd & 0xFF); /* check bits [7:0] */ 3127 else 3128 bad = (psynd & 0xFF00); /* check bits [15:8] */ 3129 3130 if (bad && edp) { 3131 cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL, 3132 " E$Data (0x%02x): 0x%08x.%08x " 3133 "*Bad* PSYND=0x%04x", offset, 3134 (uint32_t)(ecdptr->ec_d8 >> 32), 3135 (uint32_t)ecdptr->ec_d8, psynd); 3136 } else { 3137 cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL, 3138 " E$Data (0x%02x): 0x%08x.%08x", offset, 3139 (uint32_t)(ecdptr->ec_d8 >> 32), 3140 (uint32_t)ecdptr->ec_d8); 3141 } 3142 } 3143 } 3144 3145 /* 3146 * Common logging function for all cpu async errors. This function allows the 3147 * caller to generate a single cmn_err() call that logs the appropriate items 3148 * from the fault structure, and implements our rules for AFT logging levels. 3149 * 3150 * ce_code: cmn_err() code (e.g. CE_PANIC, CE_WARN, CE_CONT) 3151 * tagnum: 0, 1, 2, .. generate the [AFT#] tag 3152 * spflt: pointer to spitfire async fault structure 3153 * logflags: bitflags indicating what to output 3154 * endstr: a end string to appear at the end of this log 3155 * fmt: a format string to appear at the beginning of the log 3156 * 3157 * The logflags allows the construction of predetermined output from the spflt 3158 * structure. The individual data items always appear in a consistent order. 3159 * Note that either or both of the spflt structure pointer and logflags may be 3160 * NULL or zero respectively, indicating that the predetermined output 3161 * substrings are not requested in this log. The output looks like this: 3162 * 3163 * [AFT#] <CPU_ERRID_FIRST><fmt string><CPU_FLTCPU> 3164 * <CPU_SPACE><CPU_ERRID> 3165 * newline+4spaces<CPU_AFSR><CPU_AFAR> 3166 * newline+4spaces<CPU_AF_PSYND><CPU_AF_ETS><CPU_FAULTPC> 3167 * newline+4spaces<CPU_UDBH><CPU_UDBL> 3168 * newline+4spaces<CPU_SYND> 3169 * newline+4spaces<endstr> 3170 * 3171 * Note that <endstr> may not start on a newline if we are logging <CPU_PSYND>; 3172 * it is assumed that <endstr> will be the unum string in this case. The size 3173 * of our intermediate formatting buf[] is based on the worst case of all flags 3174 * being enabled. We pass the caller's varargs directly to vcmn_err() for 3175 * formatting so we don't need additional stack space to format them here. 3176 */ 3177 /*PRINTFLIKE6*/ 3178 static void 3179 cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt, uint_t logflags, 3180 const char *endstr, const char *fmt, ...) 3181 { 3182 struct async_flt *aflt = (struct async_flt *)spflt; 3183 char buf[400], *p, *q; /* see comments about buf[] size above */ 3184 va_list ap; 3185 int console_log_flag; 3186 3187 if ((aflt == NULL) || ((aflt->flt_class == CPU_FAULT) && 3188 (aflt->flt_stat & P_AFSR_LEVEL1)) || 3189 (aflt->flt_panic)) { 3190 console_log_flag = (tagnum < 2) || aft_verbose; 3191 } else { 3192 int verbose = ((aflt->flt_class == BUS_FAULT) || 3193 (aflt->flt_stat & P_AFSR_CE)) ? 3194 ce_verbose_memory : ce_verbose_other; 3195 3196 if (!verbose) 3197 return; 3198 3199 console_log_flag = (verbose > 1); 3200 } 3201 3202 if (console_log_flag) 3203 (void) sprintf(buf, "[AFT%d]", tagnum); 3204 else 3205 (void) sprintf(buf, "![AFT%d]", tagnum); 3206 3207 p = buf + strlen(buf); /* current buffer position */ 3208 q = buf + sizeof (buf); /* pointer past end of buffer */ 3209 3210 if (spflt != NULL && (logflags & CPU_ERRID_FIRST)) { 3211 (void) snprintf(p, (size_t)(q - p), " errID 0x%08x.%08x", 3212 (uint32_t)(aflt->flt_id >> 32), (uint32_t)aflt->flt_id); 3213 p += strlen(p); 3214 } 3215 3216 /* 3217 * Copy the caller's format string verbatim into buf[]. It will be 3218 * formatted by the call to vcmn_err() at the end of this function. 3219 */ 3220 if (fmt != NULL && p < q) { 3221 (void) strncpy(p, fmt, (size_t)(q - p - 1)); 3222 buf[sizeof (buf) - 1] = '\0'; 3223 p += strlen(p); 3224 } 3225 3226 if (spflt != NULL) { 3227 if (logflags & CPU_FLTCPU) { 3228 (void) snprintf(p, (size_t)(q - p), " CPU%d", 3229 aflt->flt_inst); 3230 p += strlen(p); 3231 } 3232 3233 if (logflags & CPU_SPACE) { 3234 if (aflt->flt_status & ECC_D_TRAP) 3235 (void) snprintf(p, (size_t)(q - p), 3236 " Data access"); 3237 else if (aflt->flt_status & ECC_I_TRAP) 3238 (void) snprintf(p, (size_t)(q - p), 3239 " Instruction access"); 3240 p += strlen(p); 3241 } 3242 3243 if (logflags & CPU_TL) { 3244 (void) snprintf(p, (size_t)(q - p), " at TL%s", 3245 aflt->flt_tl ? ">0" : "=0"); 3246 p += strlen(p); 3247 } 3248 3249 if (logflags & CPU_ERRID) { 3250 (void) snprintf(p, (size_t)(q - p), 3251 ", errID 0x%08x.%08x", 3252 (uint32_t)(aflt->flt_id >> 32), 3253 (uint32_t)aflt->flt_id); 3254 p += strlen(p); 3255 } 3256 3257 if (logflags & CPU_AFSR) { 3258 (void) snprintf(p, (size_t)(q - p), 3259 "\n AFSR 0x%08b.%08b", 3260 (uint32_t)(aflt->flt_stat >> 32), AFSR_FMTSTR0, 3261 (uint32_t)aflt->flt_stat, AFSR_FMTSTR1); 3262 p += strlen(p); 3263 } 3264 3265 if (logflags & CPU_AFAR) { 3266 (void) snprintf(p, (size_t)(q - p), " AFAR 0x%08x.%08x", 3267 (uint32_t)(aflt->flt_addr >> 32), 3268 (uint32_t)aflt->flt_addr); 3269 p += strlen(p); 3270 } 3271 3272 if (logflags & CPU_AF_PSYND) { 3273 ushort_t psynd = (ushort_t) 3274 (aflt->flt_stat & P_AFSR_P_SYND); 3275 3276 (void) snprintf(p, (size_t)(q - p), 3277 "\n AFSR.PSYND 0x%04x(Score %02d)", 3278 psynd, ecc_psynd_score(psynd)); 3279 p += strlen(p); 3280 } 3281 3282 if (logflags & CPU_AF_ETS) { 3283 (void) snprintf(p, (size_t)(q - p), " AFSR.ETS 0x%02x", 3284 (uchar_t)((aflt->flt_stat & P_AFSR_ETS) >> 16)); 3285 p += strlen(p); 3286 } 3287 3288 if (logflags & CPU_FAULTPC) { 3289 (void) snprintf(p, (size_t)(q - p), " Fault_PC 0x%p", 3290 (void *)aflt->flt_pc); 3291 p += strlen(p); 3292 } 3293 3294 if (logflags & CPU_UDBH) { 3295 (void) snprintf(p, (size_t)(q - p), 3296 "\n UDBH 0x%04b UDBH.ESYND 0x%02x", 3297 spflt->flt_sdbh, UDB_FMTSTR, 3298 spflt->flt_sdbh & 0xFF); 3299 p += strlen(p); 3300 } 3301 3302 if (logflags & CPU_UDBL) { 3303 (void) snprintf(p, (size_t)(q - p), 3304 " UDBL 0x%04b UDBL.ESYND 0x%02x", 3305 spflt->flt_sdbl, UDB_FMTSTR, 3306 spflt->flt_sdbl & 0xFF); 3307 p += strlen(p); 3308 } 3309 3310 if (logflags & CPU_SYND) { 3311 ushort_t synd = SYND(aflt->flt_synd); 3312 3313 (void) snprintf(p, (size_t)(q - p), 3314 "\n %s Syndrome 0x%x Memory Module ", 3315 UDBL(aflt->flt_synd) ? "UDBL" : "UDBH", synd); 3316 p += strlen(p); 3317 } 3318 } 3319 3320 if (endstr != NULL) { 3321 if (!(logflags & CPU_SYND)) 3322 (void) snprintf(p, (size_t)(q - p), "\n %s", endstr); 3323 else 3324 (void) snprintf(p, (size_t)(q - p), "%s", endstr); 3325 p += strlen(p); 3326 } 3327 3328 if (ce_code == CE_CONT && (p < q - 1)) 3329 (void) strcpy(p, "\n"); /* add final \n if needed */ 3330 3331 va_start(ap, fmt); 3332 vcmn_err(ce_code, buf, ap); 3333 va_end(ap); 3334 } 3335 3336 /* 3337 * Ecache Scrubbing 3338 * 3339 * The basic idea is to prevent lines from sitting in the ecache long enough 3340 * to build up soft errors which can lead to ecache parity errors. 3341 * 3342 * The following rules are observed when flushing the ecache: 3343 * 3344 * 1. When the system is busy, flush bad clean lines 3345 * 2. When the system is idle, flush all clean lines 3346 * 3. When the system is idle, flush good dirty lines 3347 * 4. Never flush bad dirty lines. 3348 * 3349 * modify parity busy idle 3350 * ---------------------------- 3351 * clean good X 3352 * clean bad X X 3353 * dirty good X 3354 * dirty bad 3355 * 3356 * Bad or good refers to whether a line has an E$ parity error or not. 3357 * Clean or dirty refers to the state of the modified bit. We currently 3358 * default the scan rate to 100 (scan 10% of the cache per second). 3359 * 3360 * The following are E$ states and actions. 3361 * 3362 * We encode our state as a 3-bit number, consisting of: 3363 * ECACHE_STATE_MODIFIED (0=clean, 1=dirty) 3364 * ECACHE_STATE_PARITY (0=good, 1=bad) 3365 * ECACHE_STATE_BUSY (0=idle, 1=busy) 3366 * 3367 * We associate a flushing and a logging action with each state. 3368 * 3369 * E$ actions are different for Spitfire and Sabre/Hummingbird modules. 3370 * MIRROR_FLUSH indicates that an E$ line will be flushed for the mirrored 3371 * E$ only, in addition to value being set by ec_flush. 3372 */ 3373 3374 #define ALWAYS_FLUSH 0x1 /* flush E$ line on all E$ types */ 3375 #define NEVER_FLUSH 0x0 /* never the flush the E$ line */ 3376 #define MIRROR_FLUSH 0xF /* flush E$ line on mirrored E$ only */ 3377 3378 struct { 3379 char ec_flush; /* whether to flush or not */ 3380 char ec_log; /* ecache logging */ 3381 char ec_log_type; /* log type info */ 3382 } ec_action[] = { /* states of the E$ line in M P B */ 3383 { ALWAYS_FLUSH, 0, 0 }, /* 0 0 0 clean_good_idle */ 3384 { MIRROR_FLUSH, 0, 0 }, /* 0 0 1 clean_good_busy */ 3385 { ALWAYS_FLUSH, 1, CPU_BADLINE_CI_ERR }, /* 0 1 0 clean_bad_idle */ 3386 { ALWAYS_FLUSH, 1, CPU_BADLINE_CB_ERR }, /* 0 1 1 clean_bad_busy */ 3387 { ALWAYS_FLUSH, 0, 0 }, /* 1 0 0 dirty_good_idle */ 3388 { MIRROR_FLUSH, 0, 0 }, /* 1 0 1 dirty_good_busy */ 3389 { NEVER_FLUSH, 1, CPU_BADLINE_DI_ERR }, /* 1 1 0 dirty_bad_idle */ 3390 { NEVER_FLUSH, 1, CPU_BADLINE_DB_ERR } /* 1 1 1 dirty_bad_busy */ 3391 }; 3392 3393 /* 3394 * Offsets into the ec_action[] that determines clean_good_busy and 3395 * dirty_good_busy lines. 3396 */ 3397 #define ECACHE_CGB_LINE 1 /* E$ clean_good_busy line */ 3398 #define ECACHE_DGB_LINE 5 /* E$ dirty_good_busy line */ 3399 3400 /* 3401 * We are flushing lines which are Clean_Good_Busy and also the lines 3402 * Dirty_Good_Busy. And we only follow it for non-mirrored E$. 3403 */ 3404 #define CGB(x, m) (((x) == ECACHE_CGB_LINE) && (m != ECACHE_CPU_MIRROR)) 3405 #define DGB(x, m) (((x) == ECACHE_DGB_LINE) && (m != ECACHE_CPU_MIRROR)) 3406 3407 #define ECACHE_STATE_MODIFIED 0x4 3408 #define ECACHE_STATE_PARITY 0x2 3409 #define ECACHE_STATE_BUSY 0x1 3410 3411 /* 3412 * If ecache is mirrored ecache_calls_a_sec and ecache_scan_rate are reduced. 3413 */ 3414 int ecache_calls_a_sec_mirrored = 1; 3415 int ecache_lines_per_call_mirrored = 1; 3416 3417 int ecache_scrub_enable = 1; /* ecache scrubbing is on by default */ 3418 int ecache_scrub_verbose = 1; /* prints clean and dirty lines */ 3419 int ecache_scrub_panic = 0; /* panics on a clean and dirty line */ 3420 int ecache_calls_a_sec = 100; /* scrubber calls per sec */ 3421 int ecache_scan_rate = 100; /* scan rate (in tenths of a percent) */ 3422 int ecache_idle_factor = 1; /* increase the scan rate when idle */ 3423 int ecache_flush_clean_good_busy = 50; /* flush rate (in percent) */ 3424 int ecache_flush_dirty_good_busy = 100; /* flush rate (in percent) */ 3425 3426 volatile int ec_timeout_calls = 1; /* timeout calls */ 3427 3428 /* 3429 * Interrupt number and pil for ecache scrubber cross-trap calls. 3430 */ 3431 static uint64_t ecache_scrub_inum; 3432 uint_t ecache_scrub_pil = PIL_9; 3433 3434 /* 3435 * Kstats for the E$ scrubber. 3436 */ 3437 typedef struct ecache_kstat { 3438 kstat_named_t clean_good_idle; /* # of lines scrubbed */ 3439 kstat_named_t clean_good_busy; /* # of lines skipped */ 3440 kstat_named_t clean_bad_idle; /* # of lines scrubbed */ 3441 kstat_named_t clean_bad_busy; /* # of lines scrubbed */ 3442 kstat_named_t dirty_good_idle; /* # of lines scrubbed */ 3443 kstat_named_t dirty_good_busy; /* # of lines skipped */ 3444 kstat_named_t dirty_bad_idle; /* # of lines skipped */ 3445 kstat_named_t dirty_bad_busy; /* # of lines skipped */ 3446 kstat_named_t invalid_lines; /* # of invalid lines */ 3447 kstat_named_t clean_good_busy_flush; /* # of lines scrubbed */ 3448 kstat_named_t dirty_good_busy_flush; /* # of lines scrubbed */ 3449 kstat_named_t tags_cleared; /* # of E$ tags cleared */ 3450 } ecache_kstat_t; 3451 3452 static ecache_kstat_t ec_kstat_template = { 3453 { "clean_good_idle", KSTAT_DATA_ULONG }, 3454 { "clean_good_busy", KSTAT_DATA_ULONG }, 3455 { "clean_bad_idle", KSTAT_DATA_ULONG }, 3456 { "clean_bad_busy", KSTAT_DATA_ULONG }, 3457 { "dirty_good_idle", KSTAT_DATA_ULONG }, 3458 { "dirty_good_busy", KSTAT_DATA_ULONG }, 3459 { "dirty_bad_idle", KSTAT_DATA_ULONG }, 3460 { "dirty_bad_busy", KSTAT_DATA_ULONG }, 3461 { "invalid_lines", KSTAT_DATA_ULONG }, 3462 { "clean_good_busy_flush", KSTAT_DATA_ULONG }, 3463 { "dirty_good_busy_flush", KSTAT_DATA_ULONG }, 3464 { "ecache_tags_cleared", KSTAT_DATA_ULONG } 3465 }; 3466 3467 struct kmem_cache *sf_private_cache; 3468 3469 /* 3470 * Called periodically on each CPU to scan the ecache once a sec. 3471 * adjusting the ecache line index appropriately 3472 */ 3473 void 3474 scrub_ecache_line() 3475 { 3476 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc); 3477 int cpuid = CPU->cpu_id; 3478 uint32_t index = ssmp->ecache_flush_index; 3479 uint64_t ec_size = cpunodes[cpuid].ecache_size; 3480 size_t ec_linesize = cpunodes[cpuid].ecache_linesize; 3481 int nlines = ssmp->ecache_nlines; 3482 uint32_t ec_set_size = ec_size / ecache_associativity; 3483 int ec_mirror = ssmp->ecache_mirror; 3484 ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data; 3485 3486 int line, scan_lines, flush_clean_busy = 0, flush_dirty_busy = 0; 3487 int mpb; /* encode Modified, Parity, Busy for action */ 3488 uchar_t state; 3489 uint64_t ec_tag, paddr, oafsr, tafsr, nafsr; 3490 uint64_t *acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 3491 ec_data_t ec_data[8]; 3492 kstat_named_t *ec_knp; 3493 3494 switch (ec_mirror) { 3495 default: 3496 case ECACHE_CPU_NON_MIRROR: 3497 /* 3498 * The E$ scan rate is expressed in units of tenths of 3499 * a percent. ecache_scan_rate = 1000 (100%) means the 3500 * whole cache is scanned every second. 3501 */ 3502 scan_lines = (nlines * ecache_scan_rate) / 3503 (1000 * ecache_calls_a_sec); 3504 if (!(ssmp->ecache_busy)) { 3505 if (ecache_idle_factor > 0) { 3506 scan_lines *= ecache_idle_factor; 3507 } 3508 } else { 3509 flush_clean_busy = (scan_lines * 3510 ecache_flush_clean_good_busy) / 100; 3511 flush_dirty_busy = (scan_lines * 3512 ecache_flush_dirty_good_busy) / 100; 3513 } 3514 3515 ec_timeout_calls = (ecache_calls_a_sec ? 3516 ecache_calls_a_sec : 1); 3517 break; 3518 3519 case ECACHE_CPU_MIRROR: 3520 scan_lines = ecache_lines_per_call_mirrored; 3521 ec_timeout_calls = (ecache_calls_a_sec_mirrored ? 3522 ecache_calls_a_sec_mirrored : 1); 3523 break; 3524 } 3525 3526 /* 3527 * The ecache scrubber algorithm operates by reading and 3528 * decoding the E$ tag to determine whether the corresponding E$ line 3529 * can be scrubbed. There is a implicit assumption in the scrubber 3530 * logic that the E$ tag is valid. Unfortunately, this assertion is 3531 * flawed since the E$ tag may also be corrupted and have parity errors 3532 * The scrubber logic is enhanced to check the validity of the E$ tag 3533 * before scrubbing. When a parity error is detected in the E$ tag, 3534 * it is possible to recover and scrub the tag under certain conditions 3535 * so that a ETP error condition can be avoided. 3536 */ 3537 3538 for (mpb = line = 0; line < scan_lines; line++, mpb = 0) { 3539 /* 3540 * We get the old-AFSR before clearing the AFSR sticky bits 3541 * in {get_ecache_tag, check_ecache_line, get_ecache_dtag} 3542 * If CP bit is set in the old-AFSR, we log an Orphan CP event. 3543 */ 3544 ec_tag = get_ecache_tag(index, &nafsr, acc_afsr); 3545 state = (uchar_t)((ec_tag & cpu_ec_state_mask) >> 3546 cpu_ec_state_shift); 3547 3548 /* 3549 * ETP is set try to scrub the ecache tag. 3550 */ 3551 if (nafsr & P_AFSR_ETP) { 3552 ecache_scrub_tag_err(nafsr, state, index); 3553 } else if (state & cpu_ec_state_valid) { 3554 /* 3555 * ETP is not set, E$ tag is valid. 3556 * Proceed with the E$ scrubbing. 3557 */ 3558 if (state & cpu_ec_state_dirty) 3559 mpb |= ECACHE_STATE_MODIFIED; 3560 3561 tafsr = check_ecache_line(index, acc_afsr); 3562 3563 if (tafsr & P_AFSR_EDP) { 3564 mpb |= ECACHE_STATE_PARITY; 3565 3566 if (ecache_scrub_verbose || 3567 ecache_scrub_panic) { 3568 get_ecache_dtag(P2ALIGN(index, 64), 3569 (uint64_t *)&ec_data[0], 3570 &ec_tag, &oafsr, acc_afsr); 3571 } 3572 } 3573 3574 if (ssmp->ecache_busy) 3575 mpb |= ECACHE_STATE_BUSY; 3576 3577 ec_knp = (kstat_named_t *)ec_ksp + mpb; 3578 ec_knp->value.ul++; 3579 3580 paddr = ((ec_tag & cpu_ec_tag_mask) << 3581 cpu_ec_tag_shift) | (index % ec_set_size); 3582 3583 /* 3584 * We flush the E$ lines depending on the ec_flush, 3585 * we additionally flush clean_good_busy and 3586 * dirty_good_busy lines for mirrored E$. 3587 */ 3588 if (ec_action[mpb].ec_flush == ALWAYS_FLUSH) { 3589 flushecacheline(paddr, ec_size); 3590 } else if ((ec_mirror == ECACHE_CPU_MIRROR) && 3591 (ec_action[mpb].ec_flush == MIRROR_FLUSH)) { 3592 flushecacheline(paddr, ec_size); 3593 } else if (ec_action[mpb].ec_flush == NEVER_FLUSH) { 3594 softcall(ecache_page_retire, (void *)paddr); 3595 } 3596 3597 /* 3598 * Conditionally flush both the clean_good and 3599 * dirty_good lines when busy. 3600 */ 3601 if (CGB(mpb, ec_mirror) && (flush_clean_busy > 0)) { 3602 flush_clean_busy--; 3603 flushecacheline(paddr, ec_size); 3604 ec_ksp->clean_good_busy_flush.value.ul++; 3605 } else if (DGB(mpb, ec_mirror) && 3606 (flush_dirty_busy > 0)) { 3607 flush_dirty_busy--; 3608 flushecacheline(paddr, ec_size); 3609 ec_ksp->dirty_good_busy_flush.value.ul++; 3610 } 3611 3612 if (ec_action[mpb].ec_log && (ecache_scrub_verbose || 3613 ecache_scrub_panic)) { 3614 ecache_scrub_log(ec_data, ec_tag, paddr, mpb, 3615 tafsr); 3616 } 3617 3618 } else { 3619 ec_ksp->invalid_lines.value.ul++; 3620 } 3621 3622 if ((index += ec_linesize) >= ec_size) 3623 index = 0; 3624 3625 } 3626 3627 /* 3628 * set the ecache scrub index for the next time around 3629 */ 3630 ssmp->ecache_flush_index = index; 3631 3632 if (*acc_afsr & P_AFSR_CP) { 3633 uint64_t ret_afsr; 3634 3635 ret_afsr = ecache_scrub_misc_err(CPU_ORPHAN_CP_ERR, *acc_afsr); 3636 if ((ret_afsr & P_AFSR_CP) == 0) 3637 *acc_afsr = 0; 3638 } 3639 } 3640 3641 /* 3642 * Handler for ecache_scrub_inum softint. Call scrub_ecache_line until 3643 * we decrement the outstanding request count to zero. 3644 */ 3645 3646 /*ARGSUSED*/ 3647 uint_t 3648 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2) 3649 { 3650 int i; 3651 int outstanding; 3652 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc); 3653 uint32_t *countp = &ssmp->ec_scrub_outstanding; 3654 3655 do { 3656 outstanding = *countp; 3657 ASSERT(outstanding > 0); 3658 for (i = 0; i < outstanding; i++) 3659 scrub_ecache_line(); 3660 } while (atomic_add_32_nv(countp, -outstanding)); 3661 3662 return (DDI_INTR_CLAIMED); 3663 } 3664 3665 /* 3666 * force each cpu to perform an ecache scrub, called from a timeout 3667 */ 3668 extern xcfunc_t ecache_scrubreq_tl1; 3669 3670 void 3671 do_scrub_ecache_line(void) 3672 { 3673 long delta; 3674 3675 if (ecache_calls_a_sec > hz) 3676 ecache_calls_a_sec = hz; 3677 else if (ecache_calls_a_sec <= 0) 3678 ecache_calls_a_sec = 1; 3679 3680 if (ecache_calls_a_sec_mirrored > hz) 3681 ecache_calls_a_sec_mirrored = hz; 3682 else if (ecache_calls_a_sec_mirrored <= 0) 3683 ecache_calls_a_sec_mirrored = 1; 3684 3685 if (ecache_scrub_enable) { 3686 xt_all(ecache_scrubreq_tl1, ecache_scrub_inum, 0); 3687 delta = hz / ec_timeout_calls; 3688 } else { 3689 delta = hz; 3690 } 3691 3692 (void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0, 3693 delta); 3694 } 3695 3696 /* 3697 * initialization for ecache scrubbing 3698 * This routine is called AFTER all cpus have had cpu_init_private called 3699 * to initialize their private data areas. 3700 */ 3701 void 3702 cpu_init_cache_scrub(void) 3703 { 3704 if (ecache_calls_a_sec > hz) { 3705 cmn_err(CE_NOTE, "ecache_calls_a_sec set too high (%d); " 3706 "resetting to hz (%d)", ecache_calls_a_sec, hz); 3707 ecache_calls_a_sec = hz; 3708 } 3709 3710 /* 3711 * Register softint for ecache scrubbing. 3712 */ 3713 ecache_scrub_inum = add_softintr(ecache_scrub_pil, 3714 scrub_ecache_line_intr, NULL, SOFTINT_MT); 3715 3716 /* 3717 * kick off the scrubbing using realtime timeout 3718 */ 3719 (void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0, 3720 hz / ecache_calls_a_sec); 3721 } 3722 3723 /* 3724 * Unset the busy flag for this cpu. 3725 */ 3726 void 3727 cpu_idle_ecache_scrub(struct cpu *cp) 3728 { 3729 if (CPU_PRIVATE(cp) != NULL) { 3730 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, 3731 sfpr_scrub_misc); 3732 ssmp->ecache_busy = ECACHE_CPU_IDLE; 3733 } 3734 } 3735 3736 /* 3737 * Set the busy flag for this cpu. 3738 */ 3739 void 3740 cpu_busy_ecache_scrub(struct cpu *cp) 3741 { 3742 if (CPU_PRIVATE(cp) != NULL) { 3743 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, 3744 sfpr_scrub_misc); 3745 ssmp->ecache_busy = ECACHE_CPU_BUSY; 3746 } 3747 } 3748 3749 /* 3750 * initialize the ecache scrubber data structures 3751 * The global entry point cpu_init_private replaces this entry point. 3752 * 3753 */ 3754 static void 3755 cpu_init_ecache_scrub_dr(struct cpu *cp) 3756 { 3757 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc); 3758 int cpuid = cp->cpu_id; 3759 3760 /* 3761 * intialize bookkeeping for cache scrubbing 3762 */ 3763 bzero(ssmp, sizeof (spitfire_scrub_misc_t)); 3764 3765 ssmp->ecache_flush_index = 0; 3766 3767 ssmp->ecache_nlines = 3768 cpunodes[cpuid].ecache_size / cpunodes[cpuid].ecache_linesize; 3769 3770 /* 3771 * Determine whether we are running on mirrored SRAM 3772 */ 3773 3774 if (cpunodes[cpuid].msram == ECACHE_CPU_MIRROR) 3775 ssmp->ecache_mirror = ECACHE_CPU_MIRROR; 3776 else 3777 ssmp->ecache_mirror = ECACHE_CPU_NON_MIRROR; 3778 3779 cpu_busy_ecache_scrub(cp); 3780 3781 /* 3782 * initialize the kstats 3783 */ 3784 ecache_kstat_init(cp); 3785 } 3786 3787 /* 3788 * uninitialize the ecache scrubber data structures 3789 * The global entry point cpu_uninit_private replaces this entry point. 3790 */ 3791 static void 3792 cpu_uninit_ecache_scrub_dr(struct cpu *cp) 3793 { 3794 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc); 3795 3796 if (ssmp->ecache_ksp != NULL) { 3797 kstat_delete(ssmp->ecache_ksp); 3798 ssmp->ecache_ksp = NULL; 3799 } 3800 3801 /* 3802 * un-initialize bookkeeping for cache scrubbing 3803 */ 3804 bzero(ssmp, sizeof (spitfire_scrub_misc_t)); 3805 3806 cpu_idle_ecache_scrub(cp); 3807 } 3808 3809 struct kmem_cache *sf_private_cache; 3810 3811 /* 3812 * Cpu private initialization. This includes allocating the cpu_private 3813 * data structure, initializing it, and initializing the scrubber for this 3814 * cpu. This is called once for EVERY cpu, including CPU 0. This function 3815 * calls cpu_init_ecache_scrub_dr to init the scrubber. 3816 * We use kmem_cache_create for the spitfire private data structure because it 3817 * needs to be allocated on a S_ECACHE_MAX_LSIZE (64) byte boundary. 3818 */ 3819 void 3820 cpu_init_private(struct cpu *cp) 3821 { 3822 spitfire_private_t *sfprp; 3823 3824 ASSERT(CPU_PRIVATE(cp) == NULL); 3825 3826 /* 3827 * If the sf_private_cache has not been created, create it. 3828 */ 3829 if (sf_private_cache == NULL) { 3830 sf_private_cache = kmem_cache_create("sf_private_cache", 3831 sizeof (spitfire_private_t), S_ECACHE_MAX_LSIZE, NULL, 3832 NULL, NULL, NULL, NULL, 0); 3833 ASSERT(sf_private_cache); 3834 } 3835 3836 sfprp = CPU_PRIVATE(cp) = kmem_cache_alloc(sf_private_cache, KM_SLEEP); 3837 3838 bzero(sfprp, sizeof (spitfire_private_t)); 3839 3840 cpu_init_ecache_scrub_dr(cp); 3841 } 3842 3843 /* 3844 * Cpu private unitialization. Uninitialize the Ecache scrubber and 3845 * deallocate the scrubber data structures and cpu_private data structure. 3846 * For now, this function just calls cpu_unint_ecache_scrub_dr to uninit 3847 * the scrubber for the specified cpu. 3848 */ 3849 void 3850 cpu_uninit_private(struct cpu *cp) 3851 { 3852 ASSERT(CPU_PRIVATE(cp)); 3853 3854 cpu_uninit_ecache_scrub_dr(cp); 3855 kmem_cache_free(sf_private_cache, CPU_PRIVATE(cp)); 3856 CPU_PRIVATE(cp) = NULL; 3857 } 3858 3859 /* 3860 * initialize the ecache kstats for each cpu 3861 */ 3862 static void 3863 ecache_kstat_init(struct cpu *cp) 3864 { 3865 struct kstat *ksp; 3866 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc); 3867 3868 ASSERT(ssmp != NULL); 3869 3870 if ((ksp = kstat_create("unix", cp->cpu_id, "ecache_kstat", "misc", 3871 KSTAT_TYPE_NAMED, 3872 sizeof (ecache_kstat_t) / sizeof (kstat_named_t), 3873 KSTAT_FLAG_WRITABLE)) == NULL) { 3874 ssmp->ecache_ksp = NULL; 3875 cmn_err(CE_NOTE, "!ecache_kstat_init(%d) failed\n", cp->cpu_id); 3876 return; 3877 } 3878 3879 ssmp->ecache_ksp = ksp; 3880 bcopy(&ec_kstat_template, ksp->ks_data, sizeof (ecache_kstat_t)); 3881 kstat_install(ksp); 3882 } 3883 3884 /* 3885 * log the bad ecache information 3886 */ 3887 static void 3888 ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag, uint64_t paddr, int mpb, 3889 uint64_t afsr) 3890 { 3891 spitf_async_flt spf_flt; 3892 struct async_flt *aflt; 3893 int i; 3894 char *class; 3895 3896 bzero(&spf_flt, sizeof (spitf_async_flt)); 3897 aflt = &spf_flt.cmn_asyncflt; 3898 3899 for (i = 0; i < 8; i++) { 3900 spf_flt.flt_ec_data[i] = ec_data[i]; 3901 } 3902 3903 spf_flt.flt_ec_tag = ec_tag; 3904 3905 if (mpb < (sizeof (ec_action) / sizeof (ec_action[0]))) { 3906 spf_flt.flt_type = ec_action[mpb].ec_log_type; 3907 } else spf_flt.flt_type = (ushort_t)mpb; 3908 3909 aflt->flt_inst = CPU->cpu_id; 3910 aflt->flt_class = CPU_FAULT; 3911 aflt->flt_id = gethrtime_waitfree(); 3912 aflt->flt_addr = paddr; 3913 aflt->flt_stat = afsr; 3914 aflt->flt_panic = (uchar_t)ecache_scrub_panic; 3915 3916 switch (mpb) { 3917 case CPU_ECACHE_TAG_ERR: 3918 case CPU_ECACHE_ADDR_PAR_ERR: 3919 case CPU_ECACHE_ETP_ETS_ERR: 3920 case CPU_ECACHE_STATE_ERR: 3921 class = FM_EREPORT_CPU_USII_ESCRUB_TAG; 3922 break; 3923 default: 3924 class = FM_EREPORT_CPU_USII_ESCRUB_DATA; 3925 break; 3926 } 3927 3928 cpu_errorq_dispatch(class, (void *)&spf_flt, sizeof (spf_flt), 3929 ue_queue, aflt->flt_panic); 3930 3931 if (aflt->flt_panic) 3932 cmn_err(CE_PANIC, "ecache_scrub_panic set and bad E$" 3933 "line detected"); 3934 } 3935 3936 /* 3937 * Process an ecache error that occured during the E$ scrubbing. 3938 * We do the ecache scan to find the bad line, flush the bad line 3939 * and start the memscrubber to find any UE (in memory or in another cache) 3940 */ 3941 static uint64_t 3942 ecache_scrub_misc_err(int type, uint64_t afsr) 3943 { 3944 spitf_async_flt spf_flt; 3945 struct async_flt *aflt; 3946 uint64_t oafsr; 3947 3948 bzero(&spf_flt, sizeof (spitf_async_flt)); 3949 aflt = &spf_flt.cmn_asyncflt; 3950 3951 /* 3952 * Scan each line in the cache to look for the one 3953 * with bad parity 3954 */ 3955 aflt->flt_addr = AFLT_INV_ADDR; 3956 scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0], 3957 &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr); 3958 3959 if (oafsr & P_AFSR_CP) { 3960 uint64_t *cp_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 3961 *cp_afsr |= oafsr; 3962 } 3963 3964 /* 3965 * If we found a bad PA, update the state to indicate if it is 3966 * memory or I/O space. 3967 */ 3968 if (aflt->flt_addr != AFLT_INV_ADDR) { 3969 aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >> 3970 MMU_PAGESHIFT)) ? 1 : 0; 3971 } 3972 3973 spf_flt.flt_type = (ushort_t)type; 3974 3975 aflt->flt_inst = CPU->cpu_id; 3976 aflt->flt_class = CPU_FAULT; 3977 aflt->flt_id = gethrtime_waitfree(); 3978 aflt->flt_status = afsr; 3979 aflt->flt_panic = (uchar_t)ecache_scrub_panic; 3980 3981 /* 3982 * We have the bad line, flush that line and start 3983 * the memscrubber. 3984 */ 3985 if (spf_flt.flt_ec_lcnt > 0) { 3986 flushecacheline(P2ALIGN(aflt->flt_addr, 64), 3987 cpunodes[CPU->cpu_id].ecache_size); 3988 read_all_memscrub = 1; 3989 memscrub_run(); 3990 } 3991 3992 cpu_errorq_dispatch((type == CPU_ORPHAN_CP_ERR) ? 3993 FM_EREPORT_CPU_USII_CP : FM_EREPORT_CPU_USII_UNKNOWN, 3994 (void *)&spf_flt, sizeof (spf_flt), ue_queue, aflt->flt_panic); 3995 3996 return (oafsr); 3997 } 3998 3999 static void 4000 ecache_scrub_tag_err(uint64_t afsr, uchar_t state, uint32_t index) 4001 { 4002 ushort_t afsr_ets = (afsr & P_AFSR_ETS) >> P_AFSR_ETS_SHIFT; 4003 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc); 4004 ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data; 4005 uint64_t ec_tag, paddr, oafsr; 4006 ec_data_t ec_data[8]; 4007 int cpuid = CPU->cpu_id; 4008 uint32_t ec_set_size = cpunodes[cpuid].ecache_size / 4009 ecache_associativity; 4010 uint64_t *cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 4011 4012 get_ecache_dtag(P2ALIGN(index, 64), (uint64_t *)&ec_data[0], &ec_tag, 4013 &oafsr, cpu_afsr); 4014 paddr = ((ec_tag & cpu_ec_tag_mask) << cpu_ec_tag_shift) | 4015 (index % ec_set_size); 4016 4017 /* 4018 * E$ tag state has good parity 4019 */ 4020 if ((afsr_ets & cpu_ec_state_parity) == 0) { 4021 if (afsr_ets & cpu_ec_parity) { 4022 /* 4023 * E$ tag state bits indicate the line is clean, 4024 * invalidate the E$ tag and continue. 4025 */ 4026 if (!(state & cpu_ec_state_dirty)) { 4027 /* 4028 * Zero the tag and mark the state invalid 4029 * with good parity for the tag. 4030 */ 4031 if (isus2i || isus2e) 4032 write_hb_ec_tag_parity(index); 4033 else 4034 write_ec_tag_parity(index); 4035 4036 /* Sync with the dual tag */ 4037 flushecacheline(0, 4038 cpunodes[CPU->cpu_id].ecache_size); 4039 ec_ksp->tags_cleared.value.ul++; 4040 ecache_scrub_log(ec_data, ec_tag, paddr, 4041 CPU_ECACHE_TAG_ERR, afsr); 4042 return; 4043 } else { 4044 ecache_scrub_log(ec_data, ec_tag, paddr, 4045 CPU_ECACHE_ADDR_PAR_ERR, afsr); 4046 cmn_err(CE_PANIC, " E$ tag address has bad" 4047 " parity"); 4048 } 4049 } else if ((afsr_ets & cpu_ec_parity) == 0) { 4050 /* 4051 * ETS is zero but ETP is set 4052 */ 4053 ecache_scrub_log(ec_data, ec_tag, paddr, 4054 CPU_ECACHE_ETP_ETS_ERR, afsr); 4055 cmn_err(CE_PANIC, "AFSR.ETP is set and" 4056 " AFSR.ETS is zero"); 4057 } 4058 } else { 4059 /* 4060 * E$ tag state bit has a bad parity 4061 */ 4062 ecache_scrub_log(ec_data, ec_tag, paddr, 4063 CPU_ECACHE_STATE_ERR, afsr); 4064 cmn_err(CE_PANIC, "E$ tag state has bad parity"); 4065 } 4066 } 4067 4068 static void 4069 ecache_page_retire(void *arg) 4070 { 4071 uint64_t paddr = (uint64_t)arg; 4072 (void) page_retire(paddr, PR_UE); 4073 } 4074 4075 void 4076 sticksync_slave(void) 4077 {} 4078 4079 void 4080 sticksync_master(void) 4081 {} 4082 4083 /*ARGSUSED*/ 4084 void 4085 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t bpp) 4086 {} 4087 4088 void 4089 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected) 4090 { 4091 int status; 4092 ddi_fm_error_t de; 4093 4094 bzero(&de, sizeof (ddi_fm_error_t)); 4095 4096 de.fme_version = DDI_FME_VERSION; 4097 de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, 4098 FM_ENA_FMT1); 4099 de.fme_flag = expected; 4100 de.fme_bus_specific = (void *)aflt->flt_addr; 4101 status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de); 4102 4103 if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL)) 4104 aflt->flt_panic = 1; 4105 } 4106 4107 /*ARGSUSED*/ 4108 void 4109 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz, 4110 errorq_t *eqp, uint_t flag) 4111 { 4112 struct async_flt *aflt = (struct async_flt *)payload; 4113 4114 aflt->flt_erpt_class = error_class; 4115 errorq_dispatch(eqp, payload, payload_sz, flag); 4116 } 4117 4118 #define MAX_SIMM 8 4119 4120 struct ce_info { 4121 char name[UNUM_NAMLEN]; 4122 uint64_t intermittent_total; 4123 uint64_t persistent_total; 4124 uint64_t sticky_total; 4125 unsigned short leaky_bucket_cnt; 4126 }; 4127 4128 /* 4129 * Separately-defined structure for use in reporting the ce_info 4130 * to SunVTS without exposing the internal layout and implementation 4131 * of struct ce_info. 4132 */ 4133 static struct ecc_error_info ecc_error_info_data = { 4134 { "version", KSTAT_DATA_UINT32 }, 4135 { "maxcount", KSTAT_DATA_UINT32 }, 4136 { "count", KSTAT_DATA_UINT32 } 4137 }; 4138 static const size_t ecc_error_info_ndata = sizeof (ecc_error_info_data) / 4139 sizeof (struct kstat_named); 4140 4141 #if KSTAT_CE_UNUM_NAMLEN < UNUM_NAMLEN 4142 #error "Need to rev ecc_error_info version and update KSTAT_CE_UNUM_NAMLEN" 4143 #endif 4144 4145 struct ce_info *mem_ce_simm = NULL; 4146 size_t mem_ce_simm_size = 0; 4147 4148 /* 4149 * Default values for the number of CE's allowed per interval. 4150 * Interval is defined in minutes 4151 * SOFTERR_MIN_TIMEOUT is defined in microseconds 4152 */ 4153 #define SOFTERR_LIMIT_DEFAULT 2 4154 #define SOFTERR_INTERVAL_DEFAULT 1440 /* This is 24 hours */ 4155 #define SOFTERR_MIN_TIMEOUT (60 * MICROSEC) /* This is 1 minute */ 4156 #define TIMEOUT_NONE ((timeout_id_t)0) 4157 #define TIMEOUT_SET ((timeout_id_t)1) 4158 4159 /* 4160 * timeout identifer for leaky_bucket 4161 */ 4162 static timeout_id_t leaky_bucket_timeout_id = TIMEOUT_NONE; 4163 4164 /* 4165 * Tunables for maximum number of allowed CE's in a given time 4166 */ 4167 int ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT; 4168 int ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT; 4169 4170 void 4171 cpu_mp_init(void) 4172 { 4173 size_t size = cpu_aflt_size(); 4174 size_t i; 4175 kstat_t *ksp; 4176 4177 /* 4178 * Initialize the CE error handling buffers. 4179 */ 4180 mem_ce_simm_size = MAX_SIMM * max_ncpus; 4181 size = sizeof (struct ce_info) * mem_ce_simm_size; 4182 mem_ce_simm = kmem_zalloc(size, KM_SLEEP); 4183 4184 ksp = kstat_create("unix", 0, "ecc-info", "misc", 4185 KSTAT_TYPE_NAMED, ecc_error_info_ndata, KSTAT_FLAG_VIRTUAL); 4186 if (ksp != NULL) { 4187 ksp->ks_data = (struct kstat_named *)&ecc_error_info_data; 4188 ecc_error_info_data.version.value.ui32 = KSTAT_CE_INFO_VER; 4189 ecc_error_info_data.maxcount.value.ui32 = mem_ce_simm_size; 4190 ecc_error_info_data.count.value.ui32 = 0; 4191 kstat_install(ksp); 4192 } 4193 4194 for (i = 0; i < mem_ce_simm_size; i++) { 4195 struct kstat_ecc_mm_info *kceip; 4196 4197 kceip = kmem_zalloc(sizeof (struct kstat_ecc_mm_info), 4198 KM_SLEEP); 4199 ksp = kstat_create("mm", i, "ecc-info", "misc", 4200 KSTAT_TYPE_NAMED, 4201 sizeof (struct kstat_ecc_mm_info) / sizeof (kstat_named_t), 4202 KSTAT_FLAG_VIRTUAL); 4203 if (ksp != NULL) { 4204 /* 4205 * Re-declare ks_data_size to include room for the 4206 * UNUM name since we don't have KSTAT_FLAG_VAR_SIZE 4207 * set. 4208 */ 4209 ksp->ks_data_size = sizeof (struct kstat_ecc_mm_info) + 4210 KSTAT_CE_UNUM_NAMLEN; 4211 ksp->ks_data = kceip; 4212 kstat_named_init(&kceip->name, 4213 "name", KSTAT_DATA_STRING); 4214 kstat_named_init(&kceip->intermittent_total, 4215 "intermittent_total", KSTAT_DATA_UINT64); 4216 kstat_named_init(&kceip->persistent_total, 4217 "persistent_total", KSTAT_DATA_UINT64); 4218 kstat_named_init(&kceip->sticky_total, 4219 "sticky_total", KSTAT_DATA_UINT64); 4220 /* 4221 * Use the default snapshot routine as it knows how to 4222 * deal with named kstats with long strings. 4223 */ 4224 ksp->ks_update = ecc_kstat_update; 4225 kstat_install(ksp); 4226 } else { 4227 kmem_free(kceip, sizeof (struct kstat_ecc_mm_info)); 4228 } 4229 } 4230 } 4231 4232 /*ARGSUSED*/ 4233 static void 4234 leaky_bucket_timeout(void *arg) 4235 { 4236 int i; 4237 struct ce_info *psimm = mem_ce_simm; 4238 4239 for (i = 0; i < mem_ce_simm_size; i++) { 4240 if (psimm[i].leaky_bucket_cnt > 0) 4241 atomic_add_16(&psimm[i].leaky_bucket_cnt, -1); 4242 } 4243 add_leaky_bucket_timeout(); 4244 } 4245 4246 static void 4247 add_leaky_bucket_timeout(void) 4248 { 4249 long timeout_in_microsecs; 4250 4251 /* 4252 * create timeout for next leak. 4253 * 4254 * The timeout interval is calculated as follows 4255 * 4256 * (ecc_softerr_interval * 60 * MICROSEC) / ecc_softerr_limit 4257 * 4258 * ecc_softerr_interval is in minutes, so multiply this by 60 (seconds 4259 * in a minute), then multiply this by MICROSEC to get the interval 4260 * in microseconds. Divide this total by ecc_softerr_limit so that 4261 * the timeout interval is accurate to within a few microseconds. 4262 */ 4263 4264 if (ecc_softerr_limit <= 0) 4265 ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT; 4266 if (ecc_softerr_interval <= 0) 4267 ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT; 4268 4269 timeout_in_microsecs = ((int64_t)ecc_softerr_interval * 60 * MICROSEC) / 4270 ecc_softerr_limit; 4271 4272 if (timeout_in_microsecs < SOFTERR_MIN_TIMEOUT) 4273 timeout_in_microsecs = SOFTERR_MIN_TIMEOUT; 4274 4275 leaky_bucket_timeout_id = timeout(leaky_bucket_timeout, 4276 (void *)NULL, drv_usectohz((clock_t)timeout_in_microsecs)); 4277 } 4278 4279 /* 4280 * Legacy Correctable ECC Error Hash 4281 * 4282 * All of the code below this comment is used to implement a legacy array 4283 * which counted intermittent, persistent, and sticky CE errors by unum, 4284 * and then was later extended to publish the data as a kstat for SunVTS. 4285 * All of this code is replaced by FMA, and remains here until such time 4286 * that the UltraSPARC-I/II CPU code is converted to FMA, or is EOLed. 4287 * 4288 * Errors are saved in three buckets per-unum: 4289 * (1) sticky - scrub was unsuccessful, cannot be scrubbed 4290 * This could represent a problem, and is immediately printed out. 4291 * (2) persistent - was successfully scrubbed 4292 * These errors use the leaky bucket algorithm to determine 4293 * if there is a serious problem. 4294 * (3) intermittent - may have originated from the cpu or upa/safari bus, 4295 * and does not necessarily indicate any problem with the dimm itself, 4296 * is critical information for debugging new hardware. 4297 * Because we do not know if it came from the dimm, it would be 4298 * inappropriate to include these in the leaky bucket counts. 4299 * 4300 * If the E$ line was modified before the scrub operation began, then the 4301 * displacement flush at the beginning of scrubphys() will cause the modified 4302 * line to be written out, which will clean up the CE. Then, any subsequent 4303 * read will not cause an error, which will cause persistent errors to be 4304 * identified as intermittent. 4305 * 4306 * If a DIMM is going bad, it will produce true persistents as well as 4307 * false intermittents, so these intermittents can be safely ignored. 4308 * 4309 * If the error count is excessive for a DIMM, this function will return 4310 * PR_MCE, and the CPU module may then decide to remove that page from use. 4311 */ 4312 static int 4313 ce_count_unum(int status, int len, char *unum) 4314 { 4315 int i; 4316 struct ce_info *psimm = mem_ce_simm; 4317 int page_status = PR_OK; 4318 4319 ASSERT(psimm != NULL); 4320 4321 if (len <= 0 || 4322 (status & (ECC_STICKY | ECC_PERSISTENT | ECC_INTERMITTENT)) == 0) 4323 return (page_status); 4324 4325 /* 4326 * Initialize the leaky_bucket timeout 4327 */ 4328 if (casptr(&leaky_bucket_timeout_id, 4329 TIMEOUT_NONE, TIMEOUT_SET) == TIMEOUT_NONE) 4330 add_leaky_bucket_timeout(); 4331 4332 for (i = 0; i < mem_ce_simm_size; i++) { 4333 if (psimm[i].name[0] == '\0') { 4334 /* 4335 * Hit the end of the valid entries, add 4336 * a new one. 4337 */ 4338 (void) strncpy(psimm[i].name, unum, len); 4339 if (status & ECC_STICKY) { 4340 /* 4341 * Sticky - the leaky bucket is used to track 4342 * soft errors. Since a sticky error is a 4343 * hard error and likely to be retired soon, 4344 * we do not count it in the leaky bucket. 4345 */ 4346 psimm[i].leaky_bucket_cnt = 0; 4347 psimm[i].intermittent_total = 0; 4348 psimm[i].persistent_total = 0; 4349 psimm[i].sticky_total = 1; 4350 cmn_err(CE_NOTE, 4351 "[AFT0] Sticky Softerror encountered " 4352 "on Memory Module %s\n", unum); 4353 page_status = PR_MCE; 4354 } else if (status & ECC_PERSISTENT) { 4355 psimm[i].leaky_bucket_cnt = 1; 4356 psimm[i].intermittent_total = 0; 4357 psimm[i].persistent_total = 1; 4358 psimm[i].sticky_total = 0; 4359 } else { 4360 /* 4361 * Intermittent - Because the scrub operation 4362 * cannot find the error in the DIMM, we will 4363 * not count these in the leaky bucket 4364 */ 4365 psimm[i].leaky_bucket_cnt = 0; 4366 psimm[i].intermittent_total = 1; 4367 psimm[i].persistent_total = 0; 4368 psimm[i].sticky_total = 0; 4369 } 4370 ecc_error_info_data.count.value.ui32++; 4371 break; 4372 } else if (strncmp(unum, psimm[i].name, len) == 0) { 4373 /* 4374 * Found an existing entry for the current 4375 * memory module, adjust the counts. 4376 */ 4377 if (status & ECC_STICKY) { 4378 psimm[i].sticky_total++; 4379 cmn_err(CE_NOTE, 4380 "[AFT0] Sticky Softerror encountered " 4381 "on Memory Module %s\n", unum); 4382 page_status = PR_MCE; 4383 } else if (status & ECC_PERSISTENT) { 4384 int new_value; 4385 4386 new_value = atomic_add_16_nv( 4387 &psimm[i].leaky_bucket_cnt, 1); 4388 psimm[i].persistent_total++; 4389 if (new_value > ecc_softerr_limit) { 4390 cmn_err(CE_NOTE, "[AFT0] Most recent %d" 4391 " soft errors from Memory Module" 4392 " %s exceed threshold (N=%d," 4393 " T=%dh:%02dm) triggering page" 4394 " retire", new_value, unum, 4395 ecc_softerr_limit, 4396 ecc_softerr_interval / 60, 4397 ecc_softerr_interval % 60); 4398 atomic_add_16( 4399 &psimm[i].leaky_bucket_cnt, -1); 4400 page_status = PR_MCE; 4401 } 4402 } else { /* Intermittent */ 4403 psimm[i].intermittent_total++; 4404 } 4405 break; 4406 } 4407 } 4408 4409 if (i >= mem_ce_simm_size) 4410 cmn_err(CE_CONT, "[AFT0] Softerror: mem_ce_simm[] out of " 4411 "space.\n"); 4412 4413 return (page_status); 4414 } 4415 4416 /* 4417 * Function to support counting of IO detected CEs. 4418 */ 4419 void 4420 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum) 4421 { 4422 int err; 4423 4424 err = ce_count_unum(ecc->flt_status, len, unum); 4425 if (err != PR_OK && automatic_page_removal) { 4426 (void) page_retire(ecc->flt_addr, err); 4427 } 4428 } 4429 4430 static int 4431 ecc_kstat_update(kstat_t *ksp, int rw) 4432 { 4433 struct kstat_ecc_mm_info *kceip = ksp->ks_data; 4434 struct ce_info *ceip = mem_ce_simm; 4435 int i = ksp->ks_instance; 4436 4437 if (rw == KSTAT_WRITE) 4438 return (EACCES); 4439 4440 ASSERT(ksp->ks_data != NULL); 4441 ASSERT(i < mem_ce_simm_size && i >= 0); 4442 4443 /* 4444 * Since we're not using locks, make sure that we don't get partial 4445 * data. The name is always copied before the counters are incremented 4446 * so only do this update routine if at least one of the counters is 4447 * non-zero, which ensures that ce_count_unum() is done, and the 4448 * string is fully copied. 4449 */ 4450 if (ceip[i].intermittent_total == 0 && 4451 ceip[i].persistent_total == 0 && 4452 ceip[i].sticky_total == 0) { 4453 /* 4454 * Uninitialized or partially initialized. Ignore. 4455 * The ks_data buffer was allocated via kmem_zalloc, 4456 * so no need to bzero it. 4457 */ 4458 return (0); 4459 } 4460 4461 kstat_named_setstr(&kceip->name, ceip[i].name); 4462 kceip->intermittent_total.value.ui64 = ceip[i].intermittent_total; 4463 kceip->persistent_total.value.ui64 = ceip[i].persistent_total; 4464 kceip->sticky_total.value.ui64 = ceip[i].sticky_total; 4465 4466 return (0); 4467 } 4468 4469 #define VIS_BLOCKSIZE 64 4470 4471 int 4472 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data) 4473 { 4474 int ret, watched; 4475 4476 watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 4477 ret = dtrace_blksuword32(addr, data, 0); 4478 if (watched) 4479 watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 4480 4481 return (ret); 4482 } 4483 4484 /*ARGSUSED*/ 4485 void 4486 cpu_faulted_enter(struct cpu *cp) 4487 { 4488 } 4489 4490 /*ARGSUSED*/ 4491 void 4492 cpu_faulted_exit(struct cpu *cp) 4493 { 4494 } 4495 4496 /*ARGSUSED*/ 4497 void 4498 mmu_init_kernel_pgsz(struct hat *hat) 4499 { 4500 } 4501 4502 size_t 4503 mmu_get_kernel_lpsize(size_t lpsize) 4504 { 4505 uint_t tte; 4506 4507 if (lpsize == 0) { 4508 /* no setting for segkmem_lpsize in /etc/system: use default */ 4509 return (MMU_PAGESIZE4M); 4510 } 4511 4512 for (tte = TTE8K; tte <= TTE4M; tte++) { 4513 if (lpsize == TTEBYTES(tte)) 4514 return (lpsize); 4515 } 4516 4517 return (TTEBYTES(TTE8K)); 4518 } 4519