1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/systm.h> 30 #include <sys/archsystm.h> 31 #include <sys/machparam.h> 32 #include <sys/machsystm.h> 33 #include <sys/cpu.h> 34 #include <sys/elf_SPARC.h> 35 #include <vm/hat_sfmmu.h> 36 #include <vm/page.h> 37 #include <vm/vm_dep.h> 38 #include <sys/cpuvar.h> 39 #include <sys/spitregs.h> 40 #include <sys/async.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/dditypes.h> 44 #include <sys/sunddi.h> 45 #include <sys/cpu_module.h> 46 #include <sys/prom_debug.h> 47 #include <sys/vmsystm.h> 48 #include <sys/prom_plat.h> 49 #include <sys/sysmacros.h> 50 #include <sys/intreg.h> 51 #include <sys/machtrap.h> 52 #include <sys/ontrap.h> 53 #include <sys/ivintr.h> 54 #include <sys/atomic.h> 55 #include <sys/panic.h> 56 #include <sys/ndifm.h> 57 #include <sys/fm/protocol.h> 58 #include <sys/fm/util.h> 59 #include <sys/fm/cpu/UltraSPARC-II.h> 60 #include <sys/ddi.h> 61 #include <sys/ecc_kstat.h> 62 #include <sys/watchpoint.h> 63 #include <sys/dtrace.h> 64 #include <sys/errclassify.h> 65 66 uint_t cpu_impl_dual_pgsz = 0; 67 68 /* 69 * Structure for the 8 byte ecache data dump and the associated AFSR state. 70 * There will be 8 of these structures used to dump an ecache line (64 bytes). 71 */ 72 typedef struct sf_ec_data_elm { 73 uint64_t ec_d8; 74 uint64_t ec_afsr; 75 } ec_data_t; 76 77 /* 78 * Define spitfire (Ultra I/II) specific asynchronous error structure 79 */ 80 typedef struct spitfire_async_flt { 81 struct async_flt cmn_asyncflt; /* common - see sun4u/sys/async.h */ 82 ushort_t flt_type; /* types of faults - cpu specific */ 83 ec_data_t flt_ec_data[8]; /* for E$ or mem dump/state */ 84 uint64_t flt_ec_tag; /* E$ tag info */ 85 int flt_ec_lcnt; /* number of bad E$ lines */ 86 ushort_t flt_sdbh; /* UDBH reg */ 87 ushort_t flt_sdbl; /* UDBL reg */ 88 } spitf_async_flt; 89 90 /* 91 * Prototypes for support routines in spitfire_asm.s: 92 */ 93 extern void flush_ecache(uint64_t physaddr, size_t size, size_t linesize); 94 extern uint64_t get_lsu(void); 95 extern void set_lsu(uint64_t ncc); 96 extern void get_ecache_dtag(uint32_t ecache_idx, uint64_t *data, uint64_t *tag, 97 uint64_t *oafsr, uint64_t *acc_afsr); 98 extern uint64_t check_ecache_line(uint32_t id, uint64_t *acc_afsr); 99 extern uint64_t get_ecache_tag(uint32_t id, uint64_t *nafsr, 100 uint64_t *acc_afsr); 101 extern uint64_t read_and_clear_afsr(); 102 extern void write_ec_tag_parity(uint32_t id); 103 extern void write_hb_ec_tag_parity(uint32_t id); 104 105 /* 106 * Spitfire module routines: 107 */ 108 static void cpu_async_log_err(void *flt); 109 /*PRINTFLIKE6*/ 110 static void cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt, 111 uint_t logflags, const char *endstr, const char *fmt, ...); 112 113 static void cpu_read_paddr(struct async_flt *aflt, short verbose, short ce_err); 114 static void cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum); 115 static void cpu_log_ecmem_info(spitf_async_flt *spf_flt); 116 117 static void log_ce_err(struct async_flt *aflt, char *unum); 118 static void log_ue_err(struct async_flt *aflt, char *unum); 119 static void check_misc_err(spitf_async_flt *spf_flt); 120 static ushort_t ecc_gen(uint_t high_bytes, uint_t low_bytes); 121 static int check_ecc(struct async_flt *aflt); 122 static uint_t get_cpu_status(uint64_t arg); 123 static uint64_t clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr); 124 static void scan_ecache(uint64_t *afar, ec_data_t *data, uint64_t *tag, 125 int *m, uint64_t *afsr); 126 static void ecache_kstat_init(struct cpu *cp); 127 static void ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag, 128 uint64_t paddr, int mpb, uint64_t); 129 static uint64_t ecache_scrub_misc_err(int, uint64_t); 130 static void ecache_scrub_tag_err(uint64_t, uchar_t, uint32_t); 131 static void ecache_page_retire(void *); 132 static int ecc_kstat_update(kstat_t *ksp, int rw); 133 static int ce_count_unum(int status, int len, char *unum); 134 static void add_leaky_bucket_timeout(void); 135 static int synd_to_synd_code(int synd_status, ushort_t synd); 136 137 extern uint_t read_all_memscrub; 138 extern void memscrub_run(void); 139 140 static uchar_t isus2i; /* set if sabre */ 141 static uchar_t isus2e; /* set if hummingbird */ 142 143 /* 144 * Default ecache mask and shift settings for Spitfire. If we detect a 145 * different CPU implementation, we will modify these values at boot time. 146 */ 147 static uint64_t cpu_ec_tag_mask = S_ECTAG_MASK; 148 static uint64_t cpu_ec_state_mask = S_ECSTATE_MASK; 149 static uint64_t cpu_ec_par_mask = S_ECPAR_MASK; 150 static int cpu_ec_par_shift = S_ECPAR_SHIFT; 151 static int cpu_ec_tag_shift = S_ECTAG_SHIFT; 152 static int cpu_ec_state_shift = S_ECSTATE_SHIFT; 153 static uchar_t cpu_ec_state_exl = S_ECSTATE_EXL; 154 static uchar_t cpu_ec_state_mod = S_ECSTATE_MOD; 155 static uchar_t cpu_ec_state_shr = S_ECSTATE_SHR; 156 static uchar_t cpu_ec_state_own = S_ECSTATE_OWN; 157 158 /* 159 * Default ecache state bits for Spitfire. These individual bits indicate if 160 * the given line is in any of the valid or modified states, respectively. 161 * Again, we modify these at boot if we detect a different CPU. 162 */ 163 static uchar_t cpu_ec_state_valid = S_ECSTATE_VALID; 164 static uchar_t cpu_ec_state_dirty = S_ECSTATE_DIRTY; 165 static uchar_t cpu_ec_parity = S_EC_PARITY; 166 static uchar_t cpu_ec_state_parity = S_ECSTATE_PARITY; 167 168 /* 169 * This table is used to determine which bit(s) is(are) bad when an ECC 170 * error occurrs. The array is indexed an 8-bit syndrome. The entries 171 * of this array have the following semantics: 172 * 173 * 00-63 The number of the bad bit, when only one bit is bad. 174 * 64 ECC bit C0 is bad. 175 * 65 ECC bit C1 is bad. 176 * 66 ECC bit C2 is bad. 177 * 67 ECC bit C3 is bad. 178 * 68 ECC bit C4 is bad. 179 * 69 ECC bit C5 is bad. 180 * 70 ECC bit C6 is bad. 181 * 71 ECC bit C7 is bad. 182 * 72 Two bits are bad. 183 * 73 Three bits are bad. 184 * 74 Four bits are bad. 185 * 75 More than Four bits are bad. 186 * 76 NO bits are bad. 187 * Based on "Galaxy Memory Subsystem SPECIFICATION" rev 0.6, pg. 28. 188 */ 189 190 #define C0 64 191 #define C1 65 192 #define C2 66 193 #define C3 67 194 #define C4 68 195 #define C5 69 196 #define C6 70 197 #define C7 71 198 #define M2 72 199 #define M3 73 200 #define M4 74 201 #define MX 75 202 #define NA 76 203 204 #define SYND_IS_SINGLE_BIT_DATA(synd_code) ((synd_code >= 0) && \ 205 (synd_code < C0)) 206 #define SYND_IS_SINGLE_BIT_CHK(synd_code) ((synd_code >= C0) && \ 207 (synd_code <= C7)) 208 209 static char ecc_syndrome_tab[] = 210 { 211 NA, C0, C1, M2, C2, M2, M2, M3, C3, M2, M2, M3, M2, M3, M3, M4, 212 C4, M2, M2, 32, M2, 57, MX, M2, M2, 37, 49, M2, 40, M2, M2, 44, 213 C5, M2, M2, 33, M2, 61, 4, M2, M2, MX, 53, M2, 45, M2, M2, 41, 214 M2, 0, 1, M2, 10, M2, M2, MX, 15, M2, M2, MX, M2, M3, M3, M2, 215 C6, M2, M2, 42, M2, 59, 39, M2, M2, MX, 51, M2, 34, M2, M2, 46, 216 M2, 25, 29, M2, 27, M4, M2, MX, 31, M2, M4, MX, M2, MX, MX, M2, 217 M2, MX, 36, M2, 7, M2, M2, 54, MX, M2, M2, 62, M2, 48, 56, M2, 218 M3, M2, M2, MX, M2, MX, 22, M2, M2, 18, MX, M2, M3, M2, M2, MX, 219 C7, M2, M2, 47, M2, 63, MX, M2, M2, 6, 55, M2, 35, M2, M2, 43, 220 M2, 5, MX, M2, MX, M2, M2, 50, 38, M2, M2, 58, M2, 52, 60, M2, 221 M2, 17, 21, M2, 19, M4, M2, MX, 23, M2, M4, MX, M2, MX, MX, M2, 222 M3, M2, M2, MX, M2, MX, 30, M2, M2, 26, MX, M2, M3, M2, M2, MX, 223 M2, 8, 13, M2, 2, M2, M2, M3, 3, M2, M2, M3, M2, MX, MX, M2, 224 M3, M2, M2, M3, M2, MX, 16, M2, M2, 20, MX, M2, MX, M2, M2, MX, 225 M3, M2, M2, M3, M2, MX, 24, M2, M2, 28, MX, M2, MX, M2, M2, MX, 226 M4, 12, 9, M2, 14, M2, M2, MX, 11, M2, M2, MX, M2, MX, MX, M4 227 }; 228 229 #define SYND_TBL_SIZE 256 230 231 /* 232 * Hack for determining UDBH/UDBL, for later cpu-specific error reporting. 233 * Cannot use bit 3 in afar, because it is a valid bit on a Sabre/Hummingbird. 234 */ 235 #define UDBL_REG 0x8000 236 #define UDBL(synd) ((synd & UDBL_REG) >> 15) 237 #define SYND(synd) (synd & 0x7FFF) 238 239 /* 240 * These error types are specific to Spitfire and are used internally for the 241 * spitfire fault structure flt_type field. 242 */ 243 #define CPU_UE_ERR 0 /* uncorrectable errors - UEs */ 244 #define CPU_EDP_LDP_ERR 1 /* LDP or EDP parity error */ 245 #define CPU_WP_ERR 2 /* WP parity error */ 246 #define CPU_BTO_BERR_ERR 3 /* bus timeout errors */ 247 #define CPU_PANIC_CP_ERR 4 /* cp error from panic polling */ 248 #define CPU_TRAPPING_CP_ERR 5 /* for sabre/hbird only, cp error */ 249 #define CPU_BADLINE_CI_ERR 6 /* E$ clean_bad line when idle */ 250 #define CPU_BADLINE_CB_ERR 7 /* E$ clean_bad line when busy */ 251 #define CPU_BADLINE_DI_ERR 8 /* E$ dirty_bad line when idle */ 252 #define CPU_BADLINE_DB_ERR 9 /* E$ dirty_bad line when busy */ 253 #define CPU_ORPHAN_CP_ERR 10 /* Orphan CP error */ 254 #define CPU_ECACHE_ADDR_PAR_ERR 11 /* Ecache Address parity error */ 255 #define CPU_ECACHE_STATE_ERR 12 /* Ecache state error */ 256 #define CPU_ECACHE_ETP_ETS_ERR 13 /* ETP set but ETS is zero */ 257 #define CPU_ECACHE_TAG_ERR 14 /* Scrub the E$ tag, if state clean */ 258 #define CPU_ADDITIONAL_ERR 15 /* Additional errors occurred */ 259 260 /* 261 * Macro to access the "Spitfire cpu private" data structure. 262 */ 263 #define CPU_PRIVATE_PTR(cp, x) (&(((spitfire_private_t *)CPU_PRIVATE(cp))->x)) 264 265 /* 266 * set to 0 to disable automatic retiring of pages on 267 * DIMMs that have excessive soft errors 268 */ 269 int automatic_page_removal = 1; 270 271 /* 272 * Heuristic for figuring out which module to replace. 273 * Relative likelihood that this P_SYND indicates that this module is bad. 274 * We call it a "score", though, not a relative likelihood. 275 * 276 * Step 1. 277 * Assign a score to each byte of P_SYND according to the following rules: 278 * If no bits on (0x00) or all bits on (0xFF), then give it a 5. 279 * If one bit on, give it a 95. 280 * If seven bits on, give it a 10. 281 * If two bits on: 282 * in different nybbles, a 90 283 * in same nybble, but unaligned, 85 284 * in same nybble and as an aligned pair, 80 285 * If six bits on, look at the bits that are off: 286 * in same nybble and as an aligned pair, 15 287 * in same nybble, but unaligned, 20 288 * in different nybbles, a 25 289 * If three bits on: 290 * in diferent nybbles, no aligned pairs, 75 291 * in diferent nybbles, one aligned pair, 70 292 * in the same nybble, 65 293 * If five bits on, look at the bits that are off: 294 * in the same nybble, 30 295 * in diferent nybbles, one aligned pair, 35 296 * in diferent nybbles, no aligned pairs, 40 297 * If four bits on: 298 * all in one nybble, 45 299 * as two aligned pairs, 50 300 * one aligned pair, 55 301 * no aligned pairs, 60 302 * 303 * Step 2: 304 * Take the higher of the two scores (one for each byte) as the score 305 * for the module. 306 * 307 * Print the score for each module, and field service should replace the 308 * module with the highest score. 309 */ 310 311 /* 312 * In the table below, the first row/column comment indicates the 313 * number of bits on in that nybble; the second row/column comment is 314 * the hex digit. 315 */ 316 317 static int 318 p_synd_score_table[256] = { 319 /* 0 1 1 2 1 2 2 3 1 2 2 3 2 3 3 4 */ 320 /* 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F */ 321 /* 0 0 */ 5, 95, 95, 80, 95, 85, 85, 65, 95, 85, 85, 65, 80, 65, 65, 45, 322 /* 1 1 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 323 /* 1 2 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 324 /* 2 3 */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15, 325 /* 1 4 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 326 /* 2 5 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 327 /* 2 6 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 328 /* 3 7 */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 329 /* 1 8 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 330 /* 2 9 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 331 /* 2 A */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 332 /* 3 B */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 333 /* 2 C */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15, 334 /* 3 D */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 335 /* 3 E */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 336 /* 4 F */ 45, 30, 30, 15, 30, 20, 20, 10, 30, 20, 20, 10, 15, 10, 10, 5, 337 }; 338 339 int 340 ecc_psynd_score(ushort_t p_synd) 341 { 342 int i, j, a, b; 343 344 i = p_synd & 0xFF; 345 j = (p_synd >> 8) & 0xFF; 346 347 a = p_synd_score_table[i]; 348 b = p_synd_score_table[j]; 349 350 return (a > b ? a : b); 351 } 352 353 /* 354 * Async Fault Logging 355 * 356 * To ease identifying, reading, and filtering async fault log messages, the 357 * label [AFT#] is now prepended to each async fault message. These messages 358 * and the logging rules are implemented by cpu_aflt_log(), below. 359 * 360 * [AFT0] - Tag for log messages that are associated with corrected ECC errors. 361 * This includes both corrected ECC memory and ecache faults. 362 * 363 * [AFT1] - Tag for log messages that are not ECC corrected (i.e. everything 364 * else except CE errors) with a priority of 1 (highest). This tag 365 * is also used for panic messages that result from an async fault. 366 * 367 * [AFT2] - These are lower priority diagnostic messages for uncorrected ECC 368 * [AFT3] or parity errors. For example, AFT2 is used for the actual dump 369 * of the E-$ data and tags. 370 * 371 * In a non-DEBUG kernel, AFT > 1 logs will be sent to the system log but not 372 * printed on the console. To send all AFT logs to both the log and the 373 * console, set aft_verbose = 1. 374 */ 375 376 #define CPU_FLTCPU 0x0001 /* print flt_inst as a CPU id */ 377 #define CPU_SPACE 0x0002 /* print flt_status (data or instr) */ 378 #define CPU_ERRID 0x0004 /* print flt_id */ 379 #define CPU_TL 0x0008 /* print flt_tl */ 380 #define CPU_ERRID_FIRST 0x0010 /* print flt_id first in message */ 381 #define CPU_AFSR 0x0020 /* print flt_stat as decoded %afsr */ 382 #define CPU_AFAR 0x0040 /* print flt_addr as %afar */ 383 #define CPU_AF_PSYND 0x0080 /* print flt_stat %afsr.PSYND */ 384 #define CPU_AF_ETS 0x0100 /* print flt_stat %afsr.ETS */ 385 #define CPU_UDBH 0x0200 /* print flt_sdbh and syndrome */ 386 #define CPU_UDBL 0x0400 /* print flt_sdbl and syndrome */ 387 #define CPU_FAULTPC 0x0800 /* print flt_pc */ 388 #define CPU_SYND 0x1000 /* print flt_synd and unum */ 389 390 #define CMN_LFLAGS (CPU_FLTCPU | CPU_SPACE | CPU_ERRID | CPU_TL | \ 391 CPU_AFSR | CPU_AFAR | CPU_AF_PSYND | \ 392 CPU_AF_ETS | CPU_UDBH | CPU_UDBL | \ 393 CPU_FAULTPC) 394 #define UE_LFLAGS (CMN_LFLAGS | CPU_SYND) 395 #define CE_LFLAGS (UE_LFLAGS & ~CPU_UDBH & ~CPU_UDBL & ~CPU_TL & \ 396 ~CPU_SPACE) 397 #define PARERR_LFLAGS (CMN_LFLAGS) 398 #define WP_LFLAGS (CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL) 399 #define CP_LFLAGS (CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL & \ 400 ~CPU_FLTCPU & ~CPU_FAULTPC) 401 #define BERRTO_LFLAGS (CMN_LFLAGS) 402 #define NO_LFLAGS (0) 403 404 #define AFSR_FMTSTR0 "\020\1ME" 405 #define AFSR_FMTSTR1 "\020\040PRIV\037ISAP\036ETP\035IVUE\034TO" \ 406 "\033BERR\032LDP\031CP\030WP\027EDP\026UE\025CE" 407 #define UDB_FMTSTR "\020\012UE\011CE" 408 409 /* 410 * Save the cache bootup state for use when internal 411 * caches are to be re-enabled after an error occurs. 412 */ 413 uint64_t cache_boot_state = 0; 414 415 /* 416 * PA[31:0] represent Displacement in UPA configuration space. 417 */ 418 uint_t root_phys_addr_lo_mask = 0xffffffff; 419 420 /* 421 * Spitfire legacy globals 422 */ 423 int itlb_entries; 424 int dtlb_entries; 425 426 void 427 cpu_setup(void) 428 { 429 extern int page_retire_messages; 430 extern int page_retire_first_ue; 431 extern int at_flags; 432 #if defined(SF_ERRATA_57) 433 extern caddr_t errata57_limit; 434 #endif 435 cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT); 436 437 at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1; 438 439 /* 440 * Spitfire isn't currently FMA-aware, so we have to enable the 441 * page retirement messages. We also change the default policy 442 * for UE retirement to allow clearing of transient errors. 443 */ 444 page_retire_messages = 1; 445 page_retire_first_ue = 0; 446 447 /* 448 * save the cache bootup state. 449 */ 450 cache_boot_state = get_lsu() & (LSU_IC | LSU_DC); 451 452 if (use_page_coloring) { 453 do_pg_coloring = 1; 454 } 455 456 /* 457 * Tune pp_slots to use up to 1/8th of the tlb entries. 458 */ 459 pp_slots = MIN(8, MAXPP_SLOTS); 460 461 /* 462 * Block stores invalidate all pages of the d$ so pagecopy 463 * et. al. do not need virtual translations with virtual 464 * coloring taken into consideration. 465 */ 466 pp_consistent_coloring = 0; 467 468 isa_list = 469 "sparcv9+vis sparcv9 " 470 "sparcv8plus+vis sparcv8plus " 471 "sparcv8 sparcv8-fsmuld sparcv7 sparc"; 472 473 cpu_hwcap_flags = AV_SPARC_VIS; 474 475 /* 476 * On Spitfire, there's a hole in the address space 477 * that we must never map (the hardware only support 44-bits of 478 * virtual address). Later CPUs are expected to have wider 479 * supported address ranges. 480 * 481 * See address map on p23 of the UltraSPARC 1 user's manual. 482 */ 483 hole_start = (caddr_t)0x80000000000ull; 484 hole_end = (caddr_t)0xfffff80000000000ull; 485 486 /* 487 * A spitfire call bug requires us to be a further 4Gbytes of 488 * firewall from the spec. 489 * 490 * See Spitfire Errata #21 491 */ 492 hole_start = (caddr_t)((uintptr_t)hole_start - (1ul << 32)); 493 hole_end = (caddr_t)((uintptr_t)hole_end + (1ul << 32)); 494 495 /* 496 * The kpm mapping window. 497 * kpm_size: 498 * The size of a single kpm range. 499 * The overall size will be: kpm_size * vac_colors. 500 * kpm_vbase: 501 * The virtual start address of the kpm range within the kernel 502 * virtual address space. kpm_vbase has to be kpm_size aligned. 503 */ 504 kpm_size = (size_t)(2ull * 1024 * 1024 * 1024 * 1024); /* 2TB */ 505 kpm_size_shift = 41; 506 kpm_vbase = (caddr_t)0xfffffa0000000000ull; /* 16EB - 6TB */ 507 508 #if defined(SF_ERRATA_57) 509 errata57_limit = (caddr_t)0x80000000ul; 510 #endif 511 512 /* 513 * Disable text by default. 514 * Note that the other defaults are set in sun4u/vm/mach_vm_dep.c. 515 */ 516 max_utext_lpsize = MMU_PAGESIZE; 517 } 518 519 static int 520 getintprop(pnode_t node, char *name, int deflt) 521 { 522 int value; 523 524 switch (prom_getproplen(node, name)) { 525 case 0: 526 value = 1; /* boolean properties */ 527 break; 528 529 case sizeof (int): 530 (void) prom_getprop(node, name, (caddr_t)&value); 531 break; 532 533 default: 534 value = deflt; 535 break; 536 } 537 538 return (value); 539 } 540 541 /* 542 * Set the magic constants of the implementation. 543 */ 544 void 545 cpu_fiximp(pnode_t dnode) 546 { 547 extern int vac_size, vac_shift; 548 extern uint_t vac_mask; 549 extern int dcache_line_mask; 550 int i, a; 551 static struct { 552 char *name; 553 int *var; 554 } prop[] = { 555 "dcache-size", &dcache_size, 556 "dcache-line-size", &dcache_linesize, 557 "icache-size", &icache_size, 558 "icache-line-size", &icache_linesize, 559 "ecache-size", &ecache_size, 560 "ecache-line-size", &ecache_alignsize, 561 "ecache-associativity", &ecache_associativity, 562 "#itlb-entries", &itlb_entries, 563 "#dtlb-entries", &dtlb_entries, 564 }; 565 566 for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) { 567 if ((a = getintprop(dnode, prop[i].name, -1)) != -1) { 568 *prop[i].var = a; 569 } 570 } 571 572 ecache_setsize = ecache_size / ecache_associativity; 573 574 vac_size = S_VAC_SIZE; 575 vac_mask = MMU_PAGEMASK & (vac_size - 1); 576 i = 0; a = vac_size; 577 while (a >>= 1) 578 ++i; 579 vac_shift = i; 580 shm_alignment = vac_size; 581 vac = 1; 582 583 dcache_line_mask = (dcache_size - 1) & ~(dcache_linesize - 1); 584 585 /* 586 * UltraSPARC I & II have ecache sizes running 587 * as follows: .25 MB, .5 MB, 1 MB, 2 MB, 4 MB 588 * and 8 MB. Adjust the copyin/copyout limits 589 * according to the cache size. The magic number 590 * of VIS_COPY_THRESHOLD comes from the copyin/copyout code 591 * and its floor of VIS_COPY_THRESHOLD bytes before it will use 592 * VIS instructions. 593 * 594 * We assume that all CPUs on the system have the same size 595 * ecache. We're also called very early in the game. 596 * /etc/system will be parsed *after* we're called so 597 * these values can be overwritten. 598 */ 599 600 hw_copy_limit_1 = VIS_COPY_THRESHOLD; 601 if (ecache_size <= 524288) { 602 hw_copy_limit_2 = VIS_COPY_THRESHOLD; 603 hw_copy_limit_4 = VIS_COPY_THRESHOLD; 604 hw_copy_limit_8 = VIS_COPY_THRESHOLD; 605 } else if (ecache_size == 1048576) { 606 hw_copy_limit_2 = 1024; 607 hw_copy_limit_4 = 1280; 608 hw_copy_limit_8 = 1536; 609 } else if (ecache_size == 2097152) { 610 hw_copy_limit_2 = 1536; 611 hw_copy_limit_4 = 2048; 612 hw_copy_limit_8 = 2560; 613 } else if (ecache_size == 4194304) { 614 hw_copy_limit_2 = 2048; 615 hw_copy_limit_4 = 2560; 616 hw_copy_limit_8 = 3072; 617 } else { 618 hw_copy_limit_2 = 2560; 619 hw_copy_limit_4 = 3072; 620 hw_copy_limit_8 = 3584; 621 } 622 } 623 624 /* 625 * Called by setcpudelay 626 */ 627 void 628 cpu_init_tick_freq(void) 629 { 630 /* 631 * Determine the cpu frequency by calling 632 * tod_get_cpufrequency. Use an approximate freqency 633 * value computed by the prom if the tod module 634 * is not initialized and loaded yet. 635 */ 636 if (tod_ops.tod_get_cpufrequency != NULL) { 637 mutex_enter(&tod_lock); 638 sys_tick_freq = tod_ops.tod_get_cpufrequency(); 639 mutex_exit(&tod_lock); 640 } else { 641 #if defined(HUMMINGBIRD) 642 /* 643 * the hummingbird version of %stick is used as the basis for 644 * low level timing; this provides an independent constant-rate 645 * clock for general system use, and frees power mgmt to set 646 * various cpu clock speeds. 647 */ 648 if (system_clock_freq == 0) 649 cmn_err(CE_PANIC, "invalid system_clock_freq 0x%lx", 650 system_clock_freq); 651 sys_tick_freq = system_clock_freq; 652 #else /* SPITFIRE */ 653 sys_tick_freq = cpunodes[CPU->cpu_id].clock_freq; 654 #endif 655 } 656 } 657 658 659 void shipit(int upaid); 660 extern uint64_t xc_tick_limit; 661 extern uint64_t xc_tick_jump_limit; 662 663 #ifdef SEND_MONDO_STATS 664 uint64_t x_early[NCPU][64]; 665 #endif 666 667 /* 668 * Note: A version of this function is used by the debugger via the KDI, 669 * and must be kept in sync with this version. Any changes made to this 670 * function to support new chips or to accomodate errata must also be included 671 * in the KDI-specific version. See spitfire_kdi.c. 672 */ 673 void 674 send_one_mondo(int cpuid) 675 { 676 uint64_t idsr, starttick, endtick; 677 int upaid, busy, nack; 678 uint64_t tick, tick_prev; 679 ulong_t ticks; 680 681 CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 682 upaid = CPUID_TO_UPAID(cpuid); 683 tick = starttick = gettick(); 684 shipit(upaid); 685 endtick = starttick + xc_tick_limit; 686 busy = nack = 0; 687 for (;;) { 688 idsr = getidsr(); 689 if (idsr == 0) 690 break; 691 /* 692 * When we detect an irregular tick jump, we adjust 693 * the timer window to the current tick value. 694 */ 695 tick_prev = tick; 696 tick = gettick(); 697 ticks = tick - tick_prev; 698 if (ticks > xc_tick_jump_limit) { 699 endtick = tick + xc_tick_limit; 700 } else if (tick > endtick) { 701 if (panic_quiesce) 702 return; 703 cmn_err(CE_PANIC, 704 "send mondo timeout (target 0x%x) [%d NACK %d " 705 "BUSY]", upaid, nack, busy); 706 } 707 if (idsr & IDSR_BUSY) { 708 busy++; 709 continue; 710 } 711 drv_usecwait(1); 712 shipit(upaid); 713 nack++; 714 busy = 0; 715 } 716 #ifdef SEND_MONDO_STATS 717 x_early[getprocessorid()][highbit(gettick() - starttick) - 1]++; 718 #endif 719 } 720 721 void 722 send_mondo_set(cpuset_t set) 723 { 724 int i; 725 726 for (i = 0; i < NCPU; i++) 727 if (CPU_IN_SET(set, i)) { 728 send_one_mondo(i); 729 CPUSET_DEL(set, i); 730 if (CPUSET_ISNULL(set)) 731 break; 732 } 733 } 734 735 void 736 syncfpu(void) 737 { 738 } 739 740 /* 741 * Determine the size of the CPU module's error structure in bytes. This is 742 * called once during boot to initialize the error queues. 743 */ 744 int 745 cpu_aflt_size(void) 746 { 747 /* 748 * We need to determine whether this is a sabre, Hummingbird or a 749 * Spitfire/Blackbird impl and set the appropriate state variables for 750 * ecache tag manipulation. We can't do this in cpu_setup() as it is 751 * too early in the boot flow and the cpunodes are not initialized. 752 * This routine will be called once after cpunodes[] is ready, so do 753 * it here. 754 */ 755 if (cpunodes[CPU->cpu_id].implementation == SABRE_IMPL) { 756 isus2i = 1; 757 cpu_ec_tag_mask = SB_ECTAG_MASK; 758 cpu_ec_state_mask = SB_ECSTATE_MASK; 759 cpu_ec_par_mask = SB_ECPAR_MASK; 760 cpu_ec_par_shift = SB_ECPAR_SHIFT; 761 cpu_ec_tag_shift = SB_ECTAG_SHIFT; 762 cpu_ec_state_shift = SB_ECSTATE_SHIFT; 763 cpu_ec_state_exl = SB_ECSTATE_EXL; 764 cpu_ec_state_mod = SB_ECSTATE_MOD; 765 766 /* These states do not exist in sabre - set to 0xFF */ 767 cpu_ec_state_shr = 0xFF; 768 cpu_ec_state_own = 0xFF; 769 770 cpu_ec_state_valid = SB_ECSTATE_VALID; 771 cpu_ec_state_dirty = SB_ECSTATE_DIRTY; 772 cpu_ec_state_parity = SB_ECSTATE_PARITY; 773 cpu_ec_parity = SB_EC_PARITY; 774 } else if (cpunodes[CPU->cpu_id].implementation == HUMMBRD_IMPL) { 775 isus2e = 1; 776 cpu_ec_tag_mask = HB_ECTAG_MASK; 777 cpu_ec_state_mask = HB_ECSTATE_MASK; 778 cpu_ec_par_mask = HB_ECPAR_MASK; 779 cpu_ec_par_shift = HB_ECPAR_SHIFT; 780 cpu_ec_tag_shift = HB_ECTAG_SHIFT; 781 cpu_ec_state_shift = HB_ECSTATE_SHIFT; 782 cpu_ec_state_exl = HB_ECSTATE_EXL; 783 cpu_ec_state_mod = HB_ECSTATE_MOD; 784 785 /* These states do not exist in hummingbird - set to 0xFF */ 786 cpu_ec_state_shr = 0xFF; 787 cpu_ec_state_own = 0xFF; 788 789 cpu_ec_state_valid = HB_ECSTATE_VALID; 790 cpu_ec_state_dirty = HB_ECSTATE_DIRTY; 791 cpu_ec_state_parity = HB_ECSTATE_PARITY; 792 cpu_ec_parity = HB_EC_PARITY; 793 } 794 795 return (sizeof (spitf_async_flt)); 796 } 797 798 799 /* 800 * Correctable ecc error trap handler 801 */ 802 /*ARGSUSED*/ 803 void 804 cpu_ce_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr, 805 uint_t p_afsr_high, uint_t p_afar_high) 806 { 807 ushort_t sdbh, sdbl; 808 ushort_t e_syndh, e_syndl; 809 spitf_async_flt spf_flt; 810 struct async_flt *ecc; 811 int queue = 1; 812 813 uint64_t t_afar = p_afar; 814 uint64_t t_afsr = p_afsr; 815 816 /* 817 * Note: the Spitfire data buffer error registers 818 * (upper and lower halves) are or'ed into the upper 819 * word of the afsr by ce_err(). 820 */ 821 sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF); 822 sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF); 823 824 e_syndh = (uchar_t)(sdbh & (uint_t)P_DER_E_SYND); 825 e_syndl = (uchar_t)(sdbl & (uint_t)P_DER_E_SYND); 826 827 t_afsr &= S_AFSR_MASK; 828 t_afar &= SABRE_AFAR_PA; /* must use Sabre AFAR mask */ 829 830 /* Setup the async fault structure */ 831 bzero(&spf_flt, sizeof (spitf_async_flt)); 832 ecc = (struct async_flt *)&spf_flt; 833 ecc->flt_id = gethrtime_waitfree(); 834 ecc->flt_stat = t_afsr; 835 ecc->flt_addr = t_afar; 836 ecc->flt_status = ECC_C_TRAP; 837 ecc->flt_bus_id = getprocessorid(); 838 ecc->flt_inst = CPU->cpu_id; 839 ecc->flt_pc = (caddr_t)rp->r_pc; 840 ecc->flt_func = log_ce_err; 841 ecc->flt_in_memory = 842 (pf_is_memory(ecc->flt_addr >> MMU_PAGESHIFT)) ? 1: 0; 843 spf_flt.flt_sdbh = sdbh; 844 spf_flt.flt_sdbl = sdbl; 845 846 /* 847 * Check for fatal conditions. 848 */ 849 check_misc_err(&spf_flt); 850 851 /* 852 * Pananoid checks for valid AFSR and UDBs 853 */ 854 if ((t_afsr & P_AFSR_CE) == 0) { 855 cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS, 856 "** Panic due to CE bit not set in the AFSR", 857 " Corrected Memory Error on"); 858 } 859 860 /* 861 * We want to skip logging only if ALL the following 862 * conditions are true: 863 * 864 * 1. There is only one error 865 * 2. That error is a correctable memory error 866 * 3. The error is caused by the memory scrubber (in which case 867 * the error will have occurred under on_trap protection) 868 * 4. The error is on a retired page 869 * 870 * Note: OT_DATA_EC is used places other than the memory scrubber. 871 * However, none of those errors should occur on a retired page. 872 */ 873 if ((ecc->flt_stat & (S_AFSR_ALL_ERRS & ~P_AFSR_ME)) == P_AFSR_CE && 874 curthread->t_ontrap != NULL) { 875 876 if (curthread->t_ontrap->ot_prot & OT_DATA_EC) { 877 if (page_retire_check(ecc->flt_addr, NULL) == 0) { 878 queue = 0; 879 } 880 } 881 } 882 883 if (((sdbh & P_DER_CE) == 0) && ((sdbl & P_DER_CE) == 0)) { 884 cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS, 885 "** Panic due to CE bits not set in the UDBs", 886 " Corrected Memory Error on"); 887 } 888 889 if ((sdbh >> 8) & 1) { 890 ecc->flt_synd = e_syndh; 891 ce_scrub(ecc); 892 if (queue) { 893 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc, 894 sizeof (*ecc), ce_queue, ERRORQ_ASYNC); 895 } 896 } 897 898 if ((sdbl >> 8) & 1) { 899 ecc->flt_addr = t_afar | 0x8; /* Sabres do not have a UDBL */ 900 ecc->flt_synd = e_syndl | UDBL_REG; 901 ce_scrub(ecc); 902 if (queue) { 903 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc, 904 sizeof (*ecc), ce_queue, ERRORQ_ASYNC); 905 } 906 } 907 908 /* 909 * Re-enable all error trapping (CEEN currently cleared). 910 */ 911 clr_datapath(); 912 set_asyncflt(P_AFSR_CE); 913 set_error_enable(EER_ENABLE); 914 } 915 916 /* 917 * Cpu specific CE logging routine 918 */ 919 static void 920 log_ce_err(struct async_flt *aflt, char *unum) 921 { 922 spitf_async_flt spf_flt; 923 924 if ((aflt->flt_stat & P_AFSR_CE) && (ce_verbose_memory == 0)) { 925 return; 926 } 927 928 spf_flt.cmn_asyncflt = *aflt; 929 cpu_aflt_log(CE_CONT, 0, &spf_flt, CE_LFLAGS, unum, 930 " Corrected Memory Error detected by"); 931 } 932 933 /* 934 * Spitfire does not perform any further CE classification refinement 935 */ 936 /*ARGSUSED*/ 937 int 938 ce_scrub_xdiag_recirc(struct async_flt *ecc, errorq_t *eqp, errorq_elem_t *eqep, 939 size_t afltoffset) 940 { 941 return (0); 942 } 943 944 char * 945 flt_to_error_type(struct async_flt *aflt) 946 { 947 if (aflt->flt_status & ECC_INTERMITTENT) 948 return (ERR_TYPE_DESC_INTERMITTENT); 949 if (aflt->flt_status & ECC_PERSISTENT) 950 return (ERR_TYPE_DESC_PERSISTENT); 951 if (aflt->flt_status & ECC_STICKY) 952 return (ERR_TYPE_DESC_STICKY); 953 return (ERR_TYPE_DESC_UNKNOWN); 954 } 955 956 /* 957 * Called by correctable ecc error logging code to print out 958 * the stick/persistent/intermittent status of the error. 959 */ 960 static void 961 cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum) 962 { 963 ushort_t status; 964 char *status1_str = "Memory"; 965 char *status2_str = "Intermittent"; 966 struct async_flt *aflt = (struct async_flt *)spf_flt; 967 968 status = aflt->flt_status; 969 970 if (status & ECC_ECACHE) 971 status1_str = "Ecache"; 972 973 if (status & ECC_STICKY) 974 status2_str = "Sticky"; 975 else if (status & ECC_PERSISTENT) 976 status2_str = "Persistent"; 977 978 cpu_aflt_log(CE_CONT, 0, spf_flt, CPU_ERRID_FIRST, 979 NULL, " Corrected %s Error on %s is %s", 980 status1_str, unum, status2_str); 981 } 982 983 /* 984 * check for a valid ce syndrome, then call the 985 * displacement flush scrubbing code, and then check the afsr to see if 986 * the error was persistent or intermittent. Reread the afar/afsr to see 987 * if the error was not scrubbed successfully, and is therefore sticky. 988 */ 989 /*ARGSUSED1*/ 990 void 991 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t triedcpulogout) 992 { 993 uint64_t eer, afsr; 994 ushort_t status; 995 996 ASSERT(getpil() > LOCK_LEVEL); 997 998 /* 999 * It is possible that the flt_addr is not a valid 1000 * physical address. To deal with this, we disable 1001 * NCEEN while we scrub that address. If this causes 1002 * a TIMEOUT/BERR, we know this is an invalid 1003 * memory location. 1004 */ 1005 kpreempt_disable(); 1006 eer = get_error_enable(); 1007 if (eer & (EER_CEEN | EER_NCEEN)) 1008 set_error_enable(eer & ~(EER_CEEN | EER_NCEEN)); 1009 1010 /* 1011 * To check if the error detected by IO is persistent, sticky or 1012 * intermittent. 1013 */ 1014 if (ecc->flt_status & ECC_IOBUS) { 1015 ecc->flt_stat = P_AFSR_CE; 1016 } 1017 1018 scrubphys(P2ALIGN(ecc->flt_addr, 64), 1019 cpunodes[CPU->cpu_id].ecache_size); 1020 1021 get_asyncflt(&afsr); 1022 if (afsr & (P_AFSR_TO | P_AFSR_BERR)) { 1023 /* 1024 * Must ensure that we don't get the TIMEOUT/BERR 1025 * when we reenable NCEEN, so we clear the AFSR. 1026 */ 1027 set_asyncflt(afsr & (P_AFSR_TO | P_AFSR_BERR)); 1028 if (eer & (EER_CEEN | EER_NCEEN)) 1029 set_error_enable(eer); 1030 kpreempt_enable(); 1031 return; 1032 } 1033 1034 if (eer & EER_NCEEN) 1035 set_error_enable(eer & ~EER_CEEN); 1036 1037 /* 1038 * Check and clear any ECC errors from the scrub. If the scrub did 1039 * not trip over the error, mark it intermittent. If the scrub did 1040 * trip the error again and it did not scrub away, mark it sticky. 1041 * Otherwise mark it persistent. 1042 */ 1043 if (check_ecc(ecc) != 0) { 1044 cpu_read_paddr(ecc, 0, 1); 1045 1046 if (check_ecc(ecc) != 0) 1047 status = ECC_STICKY; 1048 else 1049 status = ECC_PERSISTENT; 1050 } else 1051 status = ECC_INTERMITTENT; 1052 1053 if (eer & (EER_CEEN | EER_NCEEN)) 1054 set_error_enable(eer); 1055 kpreempt_enable(); 1056 1057 ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY); 1058 ecc->flt_status |= status; 1059 } 1060 1061 /* 1062 * get the syndrome and unum, and then call the routines 1063 * to check the other cpus and iobuses, and then do the error logging. 1064 */ 1065 /*ARGSUSED1*/ 1066 void 1067 cpu_ce_log_err(struct async_flt *ecc, errorq_elem_t *eqep) 1068 { 1069 char unum[UNUM_NAMLEN]; 1070 int len = 0; 1071 int ce_verbose = 0; 1072 int err; 1073 1074 ASSERT(ecc->flt_func != NULL); 1075 1076 /* Get the unum string for logging purposes */ 1077 (void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, ecc, unum, 1078 UNUM_NAMLEN, &len); 1079 1080 /* Call specific error logging routine */ 1081 (void) (*ecc->flt_func)(ecc, unum); 1082 1083 /* 1084 * Count errors per unum. 1085 * Non-memory errors are all counted via a special unum string. 1086 */ 1087 if ((err = ce_count_unum(ecc->flt_status, len, unum)) != PR_OK && 1088 automatic_page_removal) { 1089 (void) page_retire(ecc->flt_addr, err); 1090 } 1091 1092 if (ecc->flt_panic) { 1093 ce_verbose = 1; 1094 } else if ((ecc->flt_class == BUS_FAULT) || 1095 (ecc->flt_stat & P_AFSR_CE)) { 1096 ce_verbose = (ce_verbose_memory > 0); 1097 } else { 1098 ce_verbose = 1; 1099 } 1100 1101 if (ce_verbose) { 1102 spitf_async_flt sflt; 1103 int synd_code; 1104 1105 sflt.cmn_asyncflt = *ecc; /* for cpu_aflt_log() */ 1106 1107 cpu_ce_log_status(&sflt, unum); 1108 1109 synd_code = synd_to_synd_code(AFLT_STAT_VALID, 1110 SYND(ecc->flt_synd)); 1111 1112 if (SYND_IS_SINGLE_BIT_DATA(synd_code)) { 1113 cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST, 1114 NULL, " ECC Data Bit %2d was in error " 1115 "and corrected", synd_code); 1116 } else if (SYND_IS_SINGLE_BIT_CHK(synd_code)) { 1117 cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST, 1118 NULL, " ECC Check Bit %2d was in error " 1119 "and corrected", synd_code - C0); 1120 } else { 1121 /* 1122 * These are UE errors - we shouldn't be getting CE 1123 * traps for these; handle them in case of bad h/w. 1124 */ 1125 switch (synd_code) { 1126 case M2: 1127 cpu_aflt_log(CE_CONT, 0, &sflt, 1128 CPU_ERRID_FIRST, NULL, 1129 " Two ECC Bits were in error"); 1130 break; 1131 case M3: 1132 cpu_aflt_log(CE_CONT, 0, &sflt, 1133 CPU_ERRID_FIRST, NULL, 1134 " Three ECC Bits were in error"); 1135 break; 1136 case M4: 1137 cpu_aflt_log(CE_CONT, 0, &sflt, 1138 CPU_ERRID_FIRST, NULL, 1139 " Four ECC Bits were in error"); 1140 break; 1141 case MX: 1142 cpu_aflt_log(CE_CONT, 0, &sflt, 1143 CPU_ERRID_FIRST, NULL, 1144 " More than Four ECC bits were " 1145 "in error"); 1146 break; 1147 default: 1148 cpu_aflt_log(CE_CONT, 0, &sflt, 1149 CPU_ERRID_FIRST, NULL, 1150 " Unknown fault syndrome %d", 1151 synd_code); 1152 break; 1153 } 1154 } 1155 } 1156 1157 /* Display entire cache line, if valid address */ 1158 if (ce_show_data && ecc->flt_addr != AFLT_INV_ADDR) 1159 read_ecc_data(ecc, 1, 1); 1160 } 1161 1162 /* 1163 * We route all errors through a single switch statement. 1164 */ 1165 void 1166 cpu_ue_log_err(struct async_flt *aflt) 1167 { 1168 1169 switch (aflt->flt_class) { 1170 case CPU_FAULT: 1171 cpu_async_log_err(aflt); 1172 break; 1173 1174 case BUS_FAULT: 1175 bus_async_log_err(aflt); 1176 break; 1177 1178 default: 1179 cmn_err(CE_WARN, "discarding async error 0x%p with invalid " 1180 "fault class (0x%x)", (void *)aflt, aflt->flt_class); 1181 break; 1182 } 1183 } 1184 1185 /* Values for action variable in cpu_async_error() */ 1186 #define ACTION_NONE 0 1187 #define ACTION_TRAMPOLINE 1 1188 #define ACTION_AST_FLAGS 2 1189 1190 /* 1191 * Access error trap handler for asynchronous cpu errors. This routine is 1192 * called to handle a data or instruction access error. All fatal errors are 1193 * completely handled by this routine (by panicking). Non fatal error logging 1194 * is queued for later processing either via AST or softint at a lower PIL. 1195 * In case of panic, the error log queue will also be processed as part of the 1196 * panic flow to ensure all errors are logged. This routine is called with all 1197 * errors disabled at PIL15. The AFSR bits are cleared and the UDBL and UDBH 1198 * error bits are also cleared. The hardware has also disabled the I and 1199 * D-caches for us, so we must re-enable them before returning. 1200 * 1201 * A summary of the handling of tl=0 UE/LDP/EDP/TO/BERR/WP/CP: 1202 * 1203 * _______________________________________________________________ 1204 * | Privileged tl0 | Unprivileged | 1205 * | Protected | Unprotected | Protected | Unprotected | 1206 * |on_trap|lofault| | | | 1207 * -------------|-------|-------+---------------+---------------+-------------| 1208 * | | | | | | 1209 * UE/LDP/EDP | L,T,p | L,R,p | L,P | n/a | L,R,p | 1210 * | | | | | | 1211 * TO/BERR | T | S | L,P | n/a | S | 1212 * | | | | | | 1213 * WP | L,M,p | L,M,p | L,M,p | n/a | L,M,p | 1214 * | | | | | | 1215 * CP (IIi/IIe) | L,P | L,P | L,P | n/a | L,P | 1216 * ____________________________________________________________________________ 1217 * 1218 * 1219 * Action codes: 1220 * 1221 * L - log 1222 * M - kick off memscrubber if flt_in_memory 1223 * P - panic 1224 * p - panic if US-IIi or US-IIe (Sabre); overrides R and M 1225 * R - i) if aft_panic is set, panic 1226 * ii) otherwise, send hwerr event to contract and SIGKILL to process 1227 * S - send SIGBUS to process 1228 * T - trampoline 1229 * 1230 * Special cases: 1231 * 1232 * 1) if aft_testfatal is set, all faults result in a panic regardless 1233 * of type (even WP), protection (even on_trap), or privilege. 1234 */ 1235 /*ARGSUSED*/ 1236 void 1237 cpu_async_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr, 1238 uint_t p_afsr_high, uint_t p_afar_high) 1239 { 1240 ushort_t sdbh, sdbl, ttype, tl; 1241 spitf_async_flt spf_flt; 1242 struct async_flt *aflt; 1243 char pr_reason[28]; 1244 uint64_t oafsr; 1245 uint64_t acc_afsr = 0; /* accumulated afsr */ 1246 int action = ACTION_NONE; 1247 uint64_t t_afar = p_afar; 1248 uint64_t t_afsr = p_afsr; 1249 int expected = DDI_FM_ERR_UNEXPECTED; 1250 ddi_acc_hdl_t *hp; 1251 1252 /* 1253 * We need to look at p_flag to determine if the thread detected an 1254 * error while dumping core. We can't grab p_lock here, but it's ok 1255 * because we just need a consistent snapshot and we know that everyone 1256 * else will store a consistent set of bits while holding p_lock. We 1257 * don't have to worry about a race because SDOCORE is set once prior 1258 * to doing i/o from the process's address space and is never cleared. 1259 */ 1260 uint_t pflag = ttoproc(curthread)->p_flag; 1261 1262 pr_reason[0] = '\0'; 1263 1264 /* 1265 * Note: the Spitfire data buffer error registers 1266 * (upper and lower halves) are or'ed into the upper 1267 * word of the afsr by async_err() if P_AFSR_UE is set. 1268 */ 1269 sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF); 1270 sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF); 1271 1272 /* 1273 * Grab the ttype encoded in <63:53> of the saved 1274 * afsr passed from async_err() 1275 */ 1276 ttype = (ushort_t)((t_afsr >> 53) & 0x1FF); 1277 tl = (ushort_t)(t_afsr >> 62); 1278 1279 t_afsr &= S_AFSR_MASK; 1280 t_afar &= SABRE_AFAR_PA; /* must use Sabre AFAR mask */ 1281 1282 /* 1283 * Initialize most of the common and CPU-specific structure. We derive 1284 * aflt->flt_priv from %tstate, instead of from the AFSR.PRIV bit. The 1285 * initial setting of aflt->flt_panic is based on TL: we must panic if 1286 * the error occurred at TL > 0. We also set flt_panic if the test/demo 1287 * tuneable aft_testfatal is set (not the default). 1288 */ 1289 bzero(&spf_flt, sizeof (spitf_async_flt)); 1290 aflt = (struct async_flt *)&spf_flt; 1291 aflt->flt_id = gethrtime_waitfree(); 1292 aflt->flt_stat = t_afsr; 1293 aflt->flt_addr = t_afar; 1294 aflt->flt_bus_id = getprocessorid(); 1295 aflt->flt_inst = CPU->cpu_id; 1296 aflt->flt_pc = (caddr_t)rp->r_pc; 1297 aflt->flt_prot = AFLT_PROT_NONE; 1298 aflt->flt_class = CPU_FAULT; 1299 aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0; 1300 aflt->flt_tl = (uchar_t)tl; 1301 aflt->flt_panic = (tl != 0 || aft_testfatal != 0); 1302 aflt->flt_core = (pflag & SDOCORE) ? 1 : 0; 1303 1304 /* 1305 * Set flt_status based on the trap type. If we end up here as the 1306 * result of a UE detected by the CE handling code, leave status 0. 1307 */ 1308 switch (ttype) { 1309 case T_DATA_ERROR: 1310 aflt->flt_status = ECC_D_TRAP; 1311 break; 1312 case T_INSTR_ERROR: 1313 aflt->flt_status = ECC_I_TRAP; 1314 break; 1315 } 1316 1317 spf_flt.flt_sdbh = sdbh; 1318 spf_flt.flt_sdbl = sdbl; 1319 1320 /* 1321 * Check for fatal async errors. 1322 */ 1323 check_misc_err(&spf_flt); 1324 1325 /* 1326 * If the trap occurred in privileged mode at TL=0, we need to check to 1327 * see if we were executing in the kernel under on_trap() or t_lofault 1328 * protection. If so, modify the saved registers so that we return 1329 * from the trap to the appropriate trampoline routine. 1330 */ 1331 if (aflt->flt_priv && tl == 0) { 1332 if (curthread->t_ontrap != NULL) { 1333 on_trap_data_t *otp = curthread->t_ontrap; 1334 1335 if (otp->ot_prot & OT_DATA_EC) { 1336 aflt->flt_prot = AFLT_PROT_EC; 1337 otp->ot_trap |= OT_DATA_EC; 1338 rp->r_pc = otp->ot_trampoline; 1339 rp->r_npc = rp->r_pc + 4; 1340 action = ACTION_TRAMPOLINE; 1341 } 1342 1343 if ((t_afsr & (P_AFSR_TO | P_AFSR_BERR)) && 1344 (otp->ot_prot & OT_DATA_ACCESS)) { 1345 aflt->flt_prot = AFLT_PROT_ACCESS; 1346 otp->ot_trap |= OT_DATA_ACCESS; 1347 rp->r_pc = otp->ot_trampoline; 1348 rp->r_npc = rp->r_pc + 4; 1349 action = ACTION_TRAMPOLINE; 1350 /* 1351 * for peeks and caut_gets errors are expected 1352 */ 1353 hp = (ddi_acc_hdl_t *)otp->ot_handle; 1354 if (!hp) 1355 expected = DDI_FM_ERR_PEEK; 1356 else if (hp->ah_acc.devacc_attr_access == 1357 DDI_CAUTIOUS_ACC) 1358 expected = DDI_FM_ERR_EXPECTED; 1359 } 1360 1361 } else if (curthread->t_lofault) { 1362 aflt->flt_prot = AFLT_PROT_COPY; 1363 rp->r_g1 = EFAULT; 1364 rp->r_pc = curthread->t_lofault; 1365 rp->r_npc = rp->r_pc + 4; 1366 action = ACTION_TRAMPOLINE; 1367 } 1368 } 1369 1370 /* 1371 * Determine if this error needs to be treated as fatal. Note that 1372 * multiple errors detected upon entry to this trap handler does not 1373 * necessarily warrant a panic. We only want to panic if the trap 1374 * happened in privileged mode and not under t_ontrap or t_lofault 1375 * protection. The exception is WP: if we *only* get WP, it is not 1376 * fatal even if the trap occurred in privileged mode, except on Sabre. 1377 * 1378 * aft_panic, if set, effectively makes us treat usermode 1379 * UE/EDP/LDP faults as if they were privileged - so we we will 1380 * panic instead of sending a contract event. A lofault-protected 1381 * fault will normally follow the contract event; if aft_panic is 1382 * set this will be changed to a panic. 1383 * 1384 * For usermode BERR/BTO errors, eg from processes performing device 1385 * control through mapped device memory, we need only deliver 1386 * a SIGBUS to the offending process. 1387 * 1388 * Some additional flt_panic reasons (eg, WP on Sabre) will be 1389 * checked later; for now we implement the common reasons. 1390 */ 1391 if (aflt->flt_prot == AFLT_PROT_NONE) { 1392 /* 1393 * Beware - multiple bits may be set in AFSR 1394 */ 1395 if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP)) { 1396 if (aflt->flt_priv || aft_panic) 1397 aflt->flt_panic = 1; 1398 } 1399 1400 if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) { 1401 if (aflt->flt_priv) 1402 aflt->flt_panic = 1; 1403 } 1404 } else if (aflt->flt_prot == AFLT_PROT_COPY && aft_panic) { 1405 aflt->flt_panic = 1; 1406 } 1407 1408 /* 1409 * UE/BERR/TO: Call our bus nexus friends to check for 1410 * IO errors that may have resulted in this trap. 1411 */ 1412 if (t_afsr & (P_AFSR_TO | P_AFSR_BERR | P_AFSR_UE)) { 1413 cpu_run_bus_error_handlers(aflt, expected); 1414 } 1415 1416 /* 1417 * Handle UE: If the UE is in memory, we need to flush the bad line from 1418 * the E-cache. We also need to query the bus nexus for fatal errors. 1419 * For sabre, we will panic on UEs. Attempts to do diagnostic read on 1420 * caches may introduce more parity errors (especially when the module 1421 * is bad) and in sabre there is no guarantee that such errors 1422 * (if introduced) are written back as poisoned data. 1423 */ 1424 if (t_afsr & P_AFSR_UE) { 1425 int i; 1426 1427 (void) strcat(pr_reason, "UE "); 1428 1429 spf_flt.flt_type = CPU_UE_ERR; 1430 aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >> 1431 MMU_PAGESHIFT)) ? 1: 0; 1432 1433 /* 1434 * With UE, we have the PA of the fault. 1435 * Let do a diagnostic read to get the ecache 1436 * data and tag info of the bad line for logging. 1437 */ 1438 if (aflt->flt_in_memory) { 1439 uint32_t ec_set_size; 1440 uchar_t state; 1441 uint32_t ecache_idx; 1442 uint64_t faultpa = P2ALIGN(aflt->flt_addr, 64); 1443 1444 /* touch the line to put it in ecache */ 1445 acc_afsr |= read_and_clear_afsr(); 1446 (void) lddphys(faultpa); 1447 acc_afsr |= (read_and_clear_afsr() & 1448 ~(P_AFSR_EDP | P_AFSR_UE)); 1449 1450 ec_set_size = cpunodes[CPU->cpu_id].ecache_size / 1451 ecache_associativity; 1452 1453 for (i = 0; i < ecache_associativity; i++) { 1454 ecache_idx = i * ec_set_size + 1455 (aflt->flt_addr % ec_set_size); 1456 get_ecache_dtag(P2ALIGN(ecache_idx, 64), 1457 (uint64_t *)&spf_flt.flt_ec_data[0], 1458 &spf_flt.flt_ec_tag, &oafsr, &acc_afsr); 1459 acc_afsr |= oafsr; 1460 1461 state = (uchar_t)((spf_flt.flt_ec_tag & 1462 cpu_ec_state_mask) >> cpu_ec_state_shift); 1463 1464 if ((state & cpu_ec_state_valid) && 1465 ((spf_flt.flt_ec_tag & cpu_ec_tag_mask) == 1466 ((uint64_t)aflt->flt_addr >> 1467 cpu_ec_tag_shift))) 1468 break; 1469 } 1470 1471 /* 1472 * Check to see if the ecache tag is valid for the 1473 * fault PA. In the very unlikely event where the 1474 * line could be victimized, no ecache info will be 1475 * available. If this is the case, capture the line 1476 * from memory instead. 1477 */ 1478 if ((state & cpu_ec_state_valid) == 0 || 1479 (spf_flt.flt_ec_tag & cpu_ec_tag_mask) != 1480 ((uint64_t)aflt->flt_addr >> cpu_ec_tag_shift)) { 1481 for (i = 0; i < 8; i++, faultpa += 8) { 1482 ec_data_t *ecdptr; 1483 1484 ecdptr = &spf_flt.flt_ec_data[i]; 1485 acc_afsr |= read_and_clear_afsr(); 1486 ecdptr->ec_d8 = lddphys(faultpa); 1487 acc_afsr |= (read_and_clear_afsr() & 1488 ~(P_AFSR_EDP | P_AFSR_UE)); 1489 ecdptr->ec_afsr = 0; 1490 /* null afsr value */ 1491 } 1492 1493 /* 1494 * Mark tag invalid to indicate mem dump 1495 * when we print out the info. 1496 */ 1497 spf_flt.flt_ec_tag = AFLT_INV_ADDR; 1498 } 1499 spf_flt.flt_ec_lcnt = 1; 1500 1501 /* 1502 * Flush out the bad line 1503 */ 1504 flushecacheline(P2ALIGN(aflt->flt_addr, 64), 1505 cpunodes[CPU->cpu_id].ecache_size); 1506 1507 acc_afsr |= clear_errors(NULL, NULL); 1508 } 1509 1510 /* 1511 * Ask our bus nexus friends if they have any fatal errors. If 1512 * so, they will log appropriate error messages and panic as a 1513 * result. We then queue an event for each UDB that reports a 1514 * UE. Each UE reported in a UDB will have its own log message. 1515 * 1516 * Note from kbn: In the case where there are multiple UEs 1517 * (ME bit is set) - the AFAR address is only accurate to 1518 * the 16-byte granularity. One cannot tell whether the AFAR 1519 * belongs to the UDBH or UDBL syndromes. In this case, we 1520 * always report the AFAR address to be 16-byte aligned. 1521 * 1522 * If we're on a Sabre, there is no SDBL, but it will always 1523 * read as zero, so the sdbl test below will safely fail. 1524 */ 1525 if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL || isus2i || isus2e) 1526 aflt->flt_panic = 1; 1527 1528 if (sdbh & P_DER_UE) { 1529 aflt->flt_synd = sdbh & P_DER_E_SYND; 1530 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE, 1531 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1532 aflt->flt_panic); 1533 } 1534 if (sdbl & P_DER_UE) { 1535 aflt->flt_synd = sdbl & P_DER_E_SYND; 1536 aflt->flt_synd |= UDBL_REG; /* indicates UDBL */ 1537 if (!(aflt->flt_stat & P_AFSR_ME)) 1538 aflt->flt_addr |= 0x8; 1539 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE, 1540 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1541 aflt->flt_panic); 1542 } 1543 1544 /* 1545 * We got a UE and are panicking, save the fault PA in a known 1546 * location so that the platform specific panic code can check 1547 * for copyback errors. 1548 */ 1549 if (aflt->flt_panic && aflt->flt_in_memory) { 1550 panic_aflt = *aflt; 1551 } 1552 } 1553 1554 /* 1555 * Handle EDP and LDP: Locate the line with bad parity and enqueue an 1556 * async error for logging. For Sabre, we panic on EDP or LDP. 1557 */ 1558 if (t_afsr & (P_AFSR_EDP | P_AFSR_LDP)) { 1559 spf_flt.flt_type = CPU_EDP_LDP_ERR; 1560 1561 if (t_afsr & P_AFSR_EDP) 1562 (void) strcat(pr_reason, "EDP "); 1563 1564 if (t_afsr & P_AFSR_LDP) 1565 (void) strcat(pr_reason, "LDP "); 1566 1567 /* 1568 * Here we have no PA to work with. 1569 * Scan each line in the ecache to look for 1570 * the one with bad parity. 1571 */ 1572 aflt->flt_addr = AFLT_INV_ADDR; 1573 scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0], 1574 &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr); 1575 acc_afsr |= (oafsr & ~P_AFSR_WP); 1576 1577 /* 1578 * If we found a bad PA, update the state to indicate if it is 1579 * memory or I/O space. This code will be important if we ever 1580 * support cacheable frame buffers. 1581 */ 1582 if (aflt->flt_addr != AFLT_INV_ADDR) { 1583 aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >> 1584 MMU_PAGESHIFT)) ? 1 : 0; 1585 } 1586 1587 if (isus2i || isus2e) 1588 aflt->flt_panic = 1; 1589 1590 cpu_errorq_dispatch((t_afsr & P_AFSR_EDP) ? 1591 FM_EREPORT_CPU_USII_EDP : FM_EREPORT_CPU_USII_LDP, 1592 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1593 aflt->flt_panic); 1594 } 1595 1596 /* 1597 * Timeout and bus error handling. There are two cases to consider: 1598 * 1599 * (1) If we are in the kernel protected by ddi_peek or ddi_poke,we 1600 * have already modified the saved registers so that we will return 1601 * from the trap to the appropriate trampoline routine; otherwise panic. 1602 * 1603 * (2) In user mode, we can simply use our AST mechanism to deliver 1604 * a SIGBUS. We do not log the occurence - processes performing 1605 * device control would generate lots of uninteresting messages. 1606 */ 1607 if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) { 1608 if (t_afsr & P_AFSR_TO) 1609 (void) strcat(pr_reason, "BTO "); 1610 1611 if (t_afsr & P_AFSR_BERR) 1612 (void) strcat(pr_reason, "BERR "); 1613 1614 spf_flt.flt_type = CPU_BTO_BERR_ERR; 1615 if (aflt->flt_priv && aflt->flt_prot == AFLT_PROT_NONE) { 1616 cpu_errorq_dispatch((t_afsr & P_AFSR_TO) ? 1617 FM_EREPORT_CPU_USII_TO : FM_EREPORT_CPU_USII_BERR, 1618 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1619 aflt->flt_panic); 1620 } 1621 } 1622 1623 /* 1624 * Handle WP: WP happens when the ecache is victimized and a parity 1625 * error was detected on a writeback. The data in question will be 1626 * poisoned as a UE will be written back. The PA is not logged and 1627 * it is possible that it doesn't belong to the trapped thread. The 1628 * WP trap is not fatal, but it could be fatal to someone that 1629 * subsequently accesses the toxic page. We set read_all_memscrub 1630 * to force the memscrubber to read all of memory when it awakens. 1631 * For Sabre/Hummingbird, WP is fatal because the HW doesn't write a 1632 * UE back to poison the data. 1633 */ 1634 if (t_afsr & P_AFSR_WP) { 1635 (void) strcat(pr_reason, "WP "); 1636 if (isus2i || isus2e) { 1637 aflt->flt_panic = 1; 1638 } else { 1639 read_all_memscrub = 1; 1640 } 1641 spf_flt.flt_type = CPU_WP_ERR; 1642 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_WP, 1643 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1644 aflt->flt_panic); 1645 } 1646 1647 /* 1648 * Handle trapping CP error: In Sabre/Hummingbird, parity error in 1649 * the ecache on a copyout due to a PCI DMA read is signaled as a CP. 1650 * This is fatal. 1651 */ 1652 1653 if (t_afsr & P_AFSR_CP) { 1654 if (isus2i || isus2e) { 1655 (void) strcat(pr_reason, "CP "); 1656 aflt->flt_panic = 1; 1657 spf_flt.flt_type = CPU_TRAPPING_CP_ERR; 1658 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP, 1659 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1660 aflt->flt_panic); 1661 } else { 1662 /* 1663 * Orphan CP: Happens due to signal integrity problem 1664 * on a CPU, where a CP is reported, without reporting 1665 * its associated UE. This is handled by locating the 1666 * bad parity line and would kick off the memscrubber 1667 * to find the UE if in memory or in another's cache. 1668 */ 1669 spf_flt.flt_type = CPU_ORPHAN_CP_ERR; 1670 (void) strcat(pr_reason, "ORPHAN_CP "); 1671 1672 /* 1673 * Here we have no PA to work with. 1674 * Scan each line in the ecache to look for 1675 * the one with bad parity. 1676 */ 1677 aflt->flt_addr = AFLT_INV_ADDR; 1678 scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0], 1679 &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, 1680 &oafsr); 1681 acc_afsr |= oafsr; 1682 1683 /* 1684 * If we found a bad PA, update the state to indicate 1685 * if it is memory or I/O space. 1686 */ 1687 if (aflt->flt_addr != AFLT_INV_ADDR) { 1688 aflt->flt_in_memory = 1689 (pf_is_memory(aflt->flt_addr >> 1690 MMU_PAGESHIFT)) ? 1 : 0; 1691 } 1692 read_all_memscrub = 1; 1693 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP, 1694 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1695 aflt->flt_panic); 1696 1697 } 1698 } 1699 1700 /* 1701 * If we queued an error other than WP or CP and we are going to return 1702 * from the trap and the error was in user mode or inside of a 1703 * copy routine, set AST flag so the queue will be drained before 1704 * returning to user mode. 1705 * 1706 * For UE/LDP/EDP, the AST processing will SIGKILL the process 1707 * and send an event to its process contract. 1708 * 1709 * For BERR/BTO, the AST processing will SIGBUS the process. There 1710 * will have been no error queued in this case. 1711 */ 1712 if ((t_afsr & 1713 (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP | P_AFSR_BERR | P_AFSR_TO)) && 1714 (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY)) { 1715 int pcb_flag = 0; 1716 1717 if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP)) 1718 pcb_flag |= ASYNC_HWERR; 1719 1720 if (t_afsr & P_AFSR_BERR) 1721 pcb_flag |= ASYNC_BERR; 1722 1723 if (t_afsr & P_AFSR_TO) 1724 pcb_flag |= ASYNC_BTO; 1725 1726 ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag; 1727 aston(curthread); 1728 action = ACTION_AST_FLAGS; 1729 } 1730 1731 /* 1732 * In response to a deferred error, we must do one of three things: 1733 * (1) set the AST flags, (2) trampoline, or (3) panic. action is 1734 * set in cases (1) and (2) - check that either action is set or 1735 * (3) is true. 1736 * 1737 * On II, the WP writes poisoned data back to memory, which will 1738 * cause a UE and a panic or reboot when read. In this case, we 1739 * don't need to panic at this time. On IIi and IIe, 1740 * aflt->flt_panic is already set above. 1741 */ 1742 ASSERT((aflt->flt_panic != 0) || (action != ACTION_NONE) || 1743 (t_afsr & P_AFSR_WP)); 1744 1745 /* 1746 * Make a final sanity check to make sure we did not get any more async 1747 * errors and accumulate the afsr. 1748 */ 1749 flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2, 1750 cpunodes[CPU->cpu_id].ecache_linesize); 1751 (void) clear_errors(&spf_flt, NULL); 1752 1753 /* 1754 * Take care of a special case: If there is a UE in the ecache flush 1755 * area, we'll see it in flush_ecache(). This will trigger the 1756 * CPU_ADDITIONAL_ERRORS case below. 1757 * 1758 * This could occur if the original error was a UE in the flush area, 1759 * or if the original error was an E$ error that was flushed out of 1760 * the E$ in scan_ecache(). 1761 * 1762 * If it's at the same address that we're already logging, then it's 1763 * probably one of these cases. Clear the bit so we don't trip over 1764 * it on the additional errors case, which could cause an unnecessary 1765 * panic. 1766 */ 1767 if ((aflt->flt_stat & P_AFSR_UE) && aflt->flt_addr == t_afar) 1768 acc_afsr |= aflt->flt_stat & ~P_AFSR_UE; 1769 else 1770 acc_afsr |= aflt->flt_stat; 1771 1772 /* 1773 * Check the acumulated afsr for the important bits. 1774 * Make sure the spf_flt.flt_type value is set, and 1775 * enque an error. 1776 */ 1777 if (acc_afsr & 1778 (P_AFSR_LEVEL1 | P_AFSR_IVUE | P_AFSR_ETP | P_AFSR_ISAP)) { 1779 if (acc_afsr & (P_AFSR_UE | P_AFSR_EDP | P_AFSR_LDP | 1780 P_AFSR_BERR | P_AFSR_TO | P_AFSR_IVUE | P_AFSR_ETP | 1781 P_AFSR_ISAP)) 1782 aflt->flt_panic = 1; 1783 1784 spf_flt.flt_type = CPU_ADDITIONAL_ERR; 1785 aflt->flt_stat = acc_afsr; 1786 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UNKNOWN, 1787 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1788 aflt->flt_panic); 1789 } 1790 1791 /* 1792 * If aflt->flt_panic is set at this point, we need to panic as the 1793 * result of a trap at TL > 0, or an error we determined to be fatal. 1794 * We've already enqueued the error in one of the if-clauses above, 1795 * and it will be dequeued and logged as part of the panic flow. 1796 */ 1797 if (aflt->flt_panic) { 1798 cpu_aflt_log(CE_PANIC, 1, &spf_flt, CPU_ERRID_FIRST, 1799 "See previous message(s) for details", " %sError(s)", 1800 pr_reason); 1801 } 1802 1803 /* 1804 * Before returning, we must re-enable errors, and 1805 * reset the caches to their boot-up state. 1806 */ 1807 set_lsu(get_lsu() | cache_boot_state); 1808 set_error_enable(EER_ENABLE); 1809 } 1810 1811 /* 1812 * Check for miscellaneous fatal errors and call CE_PANIC if any are seen. 1813 * This routine is shared by the CE and UE handling code. 1814 */ 1815 static void 1816 check_misc_err(spitf_async_flt *spf_flt) 1817 { 1818 struct async_flt *aflt = (struct async_flt *)spf_flt; 1819 char *fatal_str = NULL; 1820 1821 /* 1822 * The ISAP and ETP errors are supposed to cause a POR 1823 * from the system, so in theory we never, ever see these messages. 1824 * ISAP, ETP and IVUE are considered to be fatal. 1825 */ 1826 if (aflt->flt_stat & P_AFSR_ISAP) 1827 fatal_str = " System Address Parity Error on"; 1828 else if (aflt->flt_stat & P_AFSR_ETP) 1829 fatal_str = " Ecache Tag Parity Error on"; 1830 else if (aflt->flt_stat & P_AFSR_IVUE) 1831 fatal_str = " Interrupt Vector Uncorrectable Error on"; 1832 if (fatal_str != NULL) { 1833 cpu_aflt_log(CE_PANIC, 1, spf_flt, CMN_LFLAGS, 1834 NULL, fatal_str); 1835 } 1836 } 1837 1838 /* 1839 * Routine to convert a syndrome into a syndrome code. 1840 */ 1841 static int 1842 synd_to_synd_code(int synd_status, ushort_t synd) 1843 { 1844 if (synd_status != AFLT_STAT_VALID) 1845 return (-1); 1846 1847 /* 1848 * Use the 8-bit syndrome to index the ecc_syndrome_tab 1849 * to get the code indicating which bit(s) is(are) bad. 1850 */ 1851 if ((synd == 0) || (synd >= SYND_TBL_SIZE)) 1852 return (-1); 1853 else 1854 return (ecc_syndrome_tab[synd]); 1855 } 1856 1857 /* ARGSUSED */ 1858 int 1859 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp) 1860 { 1861 return (ENOTSUP); 1862 } 1863 1864 /* ARGSUSED */ 1865 int 1866 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp) 1867 { 1868 return (ENOTSUP); 1869 } 1870 1871 /* ARGSUSED */ 1872 int 1873 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp) 1874 { 1875 return (ENOTSUP); 1876 } 1877 1878 /* 1879 * Routine to return a string identifying the physical name 1880 * associated with a memory/cache error. 1881 */ 1882 /* ARGSUSED */ 1883 int 1884 cpu_get_mem_unum(int synd_status, ushort_t synd, uint64_t afsr, 1885 uint64_t afar, int cpuid, int flt_in_memory, ushort_t flt_status, 1886 char *buf, int buflen, int *lenp) 1887 { 1888 short synd_code; 1889 int ret; 1890 1891 if (flt_in_memory) { 1892 synd_code = synd_to_synd_code(synd_status, synd); 1893 if (synd_code == -1) { 1894 ret = EINVAL; 1895 } else if (prom_get_unum(synd_code, P2ALIGN(afar, 8), 1896 buf, buflen, lenp) != 0) { 1897 ret = EIO; 1898 } else if (*lenp <= 1) { 1899 ret = EINVAL; 1900 } else { 1901 ret = 0; 1902 } 1903 } else { 1904 ret = ENOTSUP; 1905 } 1906 1907 if (ret != 0) { 1908 buf[0] = '\0'; 1909 *lenp = 0; 1910 } 1911 1912 return (ret); 1913 } 1914 1915 /* 1916 * Wrapper for cpu_get_mem_unum() routine that takes an 1917 * async_flt struct rather than explicit arguments. 1918 */ 1919 int 1920 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt, 1921 char *buf, int buflen, int *lenp) 1922 { 1923 return (cpu_get_mem_unum(synd_status, SYND(aflt->flt_synd), 1924 aflt->flt_stat, aflt->flt_addr, aflt->flt_bus_id, 1925 aflt->flt_in_memory, aflt->flt_status, buf, buflen, lenp)); 1926 } 1927 1928 /* 1929 * This routine is a more generic interface to cpu_get_mem_unum(), 1930 * that may be used by other modules (e.g. mm). 1931 */ 1932 int 1933 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar, 1934 char *buf, int buflen, int *lenp) 1935 { 1936 int synd_status, flt_in_memory, ret; 1937 char unum[UNUM_NAMLEN]; 1938 1939 /* 1940 * Check for an invalid address. 1941 */ 1942 if (afar == (uint64_t)-1) 1943 return (ENXIO); 1944 1945 if (synd == (uint64_t)-1) 1946 synd_status = AFLT_STAT_INVALID; 1947 else 1948 synd_status = AFLT_STAT_VALID; 1949 1950 flt_in_memory = (pf_is_memory(afar >> MMU_PAGESHIFT)) ? 1 : 0; 1951 1952 if ((ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar, 1953 CPU->cpu_id, flt_in_memory, 0, unum, UNUM_NAMLEN, lenp)) 1954 != 0) 1955 return (ret); 1956 1957 if (*lenp >= buflen) 1958 return (ENAMETOOLONG); 1959 1960 (void) strncpy(buf, unum, buflen); 1961 1962 return (0); 1963 } 1964 1965 /* 1966 * Routine to return memory information associated 1967 * with a physical address and syndrome. 1968 */ 1969 /* ARGSUSED */ 1970 int 1971 cpu_get_mem_info(uint64_t synd, uint64_t afar, 1972 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 1973 int *segsp, int *banksp, int *mcidp) 1974 { 1975 return (ENOTSUP); 1976 } 1977 1978 /* 1979 * Routine to return a string identifying the physical 1980 * name associated with a cpuid. 1981 */ 1982 /* ARGSUSED */ 1983 int 1984 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp) 1985 { 1986 return (ENOTSUP); 1987 } 1988 1989 /* 1990 * This routine returns the size of the kernel's FRU name buffer. 1991 */ 1992 size_t 1993 cpu_get_name_bufsize() 1994 { 1995 return (UNUM_NAMLEN); 1996 } 1997 1998 /* 1999 * Cpu specific log func for UEs. 2000 */ 2001 static void 2002 log_ue_err(struct async_flt *aflt, char *unum) 2003 { 2004 spitf_async_flt *spf_flt = (spitf_async_flt *)aflt; 2005 int len = 0; 2006 2007 #ifdef DEBUG 2008 int afsr_priv = (aflt->flt_stat & P_AFSR_PRIV) ? 1 : 0; 2009 2010 /* 2011 * Paranoid Check for priv mismatch 2012 * Only applicable for UEs 2013 */ 2014 if (afsr_priv != aflt->flt_priv) { 2015 /* 2016 * The priv bits in %tstate and %afsr did not match; we expect 2017 * this to be very rare, so flag it with a message. 2018 */ 2019 cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST, NULL, 2020 ": PRIV bit in TSTATE and AFSR mismatched; " 2021 "TSTATE.PRIV=%d used", (aflt->flt_priv) ? 1 : 0); 2022 2023 /* update saved afsr to reflect the correct priv */ 2024 aflt->flt_stat &= ~P_AFSR_PRIV; 2025 if (aflt->flt_priv) 2026 aflt->flt_stat |= P_AFSR_PRIV; 2027 } 2028 #endif /* DEBUG */ 2029 2030 (void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt, unum, 2031 UNUM_NAMLEN, &len); 2032 2033 cpu_aflt_log(CE_WARN, 1, spf_flt, UE_LFLAGS, unum, 2034 " Uncorrectable Memory Error on"); 2035 2036 if (SYND(aflt->flt_synd) == 0x3) { 2037 cpu_aflt_log(CE_WARN, 1, spf_flt, CPU_ERRID_FIRST, NULL, 2038 " Syndrome 0x3 indicates that this may not be a " 2039 "memory module problem"); 2040 } 2041 2042 if (aflt->flt_in_memory) 2043 cpu_log_ecmem_info(spf_flt); 2044 } 2045 2046 2047 /* 2048 * The cpu_async_log_err() function is called via the ue_drain() function to 2049 * handle logging for CPU events that are dequeued. As such, it can be invoked 2050 * from softint context, from AST processing in the trap() flow, or from the 2051 * panic flow. We decode the CPU-specific data, and log appropriate messages. 2052 */ 2053 static void 2054 cpu_async_log_err(void *flt) 2055 { 2056 spitf_async_flt *spf_flt = (spitf_async_flt *)flt; 2057 struct async_flt *aflt = (struct async_flt *)flt; 2058 char unum[UNUM_NAMLEN]; 2059 char *space; 2060 char *ecache_scrub_logstr = NULL; 2061 2062 switch (spf_flt->flt_type) { 2063 case CPU_UE_ERR: 2064 /* 2065 * We want to skip logging only if ALL the following 2066 * conditions are true: 2067 * 2068 * 1. We are not panicking 2069 * 2. There is only one error 2070 * 3. That error is a memory error 2071 * 4. The error is caused by the memory scrubber (in 2072 * which case the error will have occurred under 2073 * on_trap protection) 2074 * 5. The error is on a retired page 2075 * 2076 * Note 1: AFLT_PROT_EC is used places other than the memory 2077 * scrubber. However, none of those errors should occur 2078 * on a retired page. 2079 * 2080 * Note 2: In the CE case, these errors are discarded before 2081 * the errorq. In the UE case, we must wait until now -- 2082 * softcall() grabs a mutex, which we can't do at a high PIL. 2083 */ 2084 if (!panicstr && 2085 (aflt->flt_stat & S_AFSR_ALL_ERRS) == P_AFSR_UE && 2086 aflt->flt_prot == AFLT_PROT_EC) { 2087 if (page_retire_check(aflt->flt_addr, NULL) == 0) { 2088 /* Zero the address to clear the error */ 2089 softcall(ecc_page_zero, (void *)aflt->flt_addr); 2090 return; 2091 } 2092 } 2093 2094 /* 2095 * Log the UE and check for causes of this UE error that 2096 * don't cause a trap (Copyback error). cpu_async_error() 2097 * has already checked the i/o buses for us. 2098 */ 2099 log_ue_err(aflt, unum); 2100 if (aflt->flt_in_memory) 2101 cpu_check_allcpus(aflt); 2102 break; 2103 2104 case CPU_EDP_LDP_ERR: 2105 if (aflt->flt_stat & P_AFSR_EDP) 2106 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, 2107 NULL, " EDP event on"); 2108 2109 if (aflt->flt_stat & P_AFSR_LDP) 2110 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, 2111 NULL, " LDP event on"); 2112 2113 /* Log ecache info if exist */ 2114 if (spf_flt->flt_ec_lcnt > 0) { 2115 cpu_log_ecmem_info(spf_flt); 2116 2117 cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, 2118 NULL, " AFAR was derived from E$Tag"); 2119 } else { 2120 cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, 2121 NULL, " No error found in ecache (No fault " 2122 "PA available)"); 2123 } 2124 break; 2125 2126 case CPU_WP_ERR: 2127 /* 2128 * If the memscrub thread hasn't yet read 2129 * all of memory, as we requested in the 2130 * trap handler, then give it a kick to 2131 * make sure it does. 2132 */ 2133 if (!isus2i && !isus2e && read_all_memscrub) 2134 memscrub_run(); 2135 2136 cpu_aflt_log(CE_WARN, 1, spf_flt, WP_LFLAGS, NULL, 2137 " WP event on"); 2138 return; 2139 2140 case CPU_BTO_BERR_ERR: 2141 /* 2142 * A bus timeout or error occurred that was in user mode or not 2143 * in a protected kernel code region. 2144 */ 2145 if (aflt->flt_stat & P_AFSR_BERR) { 2146 cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2, 2147 spf_flt, BERRTO_LFLAGS, NULL, 2148 " Bus Error on System Bus in %s mode from", 2149 aflt->flt_priv ? "privileged" : "user"); 2150 } 2151 2152 if (aflt->flt_stat & P_AFSR_TO) { 2153 cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2, 2154 spf_flt, BERRTO_LFLAGS, NULL, 2155 " Timeout on System Bus in %s mode from", 2156 aflt->flt_priv ? "privileged" : "user"); 2157 } 2158 2159 return; 2160 2161 case CPU_PANIC_CP_ERR: 2162 /* 2163 * Process the Copyback (CP) error info (if any) obtained from 2164 * polling all the cpus in the panic flow. This case is only 2165 * entered if we are panicking. 2166 */ 2167 ASSERT(panicstr != NULL); 2168 ASSERT(aflt->flt_id == panic_aflt.flt_id); 2169 2170 /* See which space - this info may not exist */ 2171 if (panic_aflt.flt_status & ECC_D_TRAP) 2172 space = "Data "; 2173 else if (panic_aflt.flt_status & ECC_I_TRAP) 2174 space = "Instruction "; 2175 else 2176 space = ""; 2177 2178 cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL, 2179 " AFAR was derived from UE report," 2180 " CP event on CPU%d (caused %saccess error on %s%d)", 2181 aflt->flt_inst, space, (panic_aflt.flt_status & ECC_IOBUS) ? 2182 "IOBUS" : "CPU", panic_aflt.flt_bus_id); 2183 2184 if (spf_flt->flt_ec_lcnt > 0) 2185 cpu_log_ecmem_info(spf_flt); 2186 else 2187 cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST, 2188 NULL, " No cache dump available"); 2189 2190 return; 2191 2192 case CPU_TRAPPING_CP_ERR: 2193 /* 2194 * For sabre only. This is a copyback ecache parity error due 2195 * to a PCI DMA read. We should be panicking if we get here. 2196 */ 2197 ASSERT(panicstr != NULL); 2198 cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL, 2199 " AFAR was derived from UE report," 2200 " CP event on CPU%d (caused Data access error " 2201 "on PCIBus)", aflt->flt_inst); 2202 return; 2203 2204 /* 2205 * We log the ecache lines of the following states, 2206 * clean_bad_idle, clean_bad_busy, dirty_bad_idle and 2207 * dirty_bad_busy if ecache_scrub_verbose is set and panic 2208 * in addition to logging if ecache_scrub_panic is set. 2209 */ 2210 case CPU_BADLINE_CI_ERR: 2211 ecache_scrub_logstr = "CBI"; 2212 /* FALLTHRU */ 2213 2214 case CPU_BADLINE_CB_ERR: 2215 if (ecache_scrub_logstr == NULL) 2216 ecache_scrub_logstr = "CBB"; 2217 /* FALLTHRU */ 2218 2219 case CPU_BADLINE_DI_ERR: 2220 if (ecache_scrub_logstr == NULL) 2221 ecache_scrub_logstr = "DBI"; 2222 /* FALLTHRU */ 2223 2224 case CPU_BADLINE_DB_ERR: 2225 if (ecache_scrub_logstr == NULL) 2226 ecache_scrub_logstr = "DBB"; 2227 2228 cpu_aflt_log(CE_NOTE, 2, spf_flt, 2229 (CPU_ERRID_FIRST | CPU_FLTCPU), NULL, 2230 " %s event on", ecache_scrub_logstr); 2231 cpu_log_ecmem_info(spf_flt); 2232 2233 return; 2234 2235 case CPU_ORPHAN_CP_ERR: 2236 /* 2237 * Orphan CPs, where the CP bit is set, but when a CPU 2238 * doesn't report a UE. 2239 */ 2240 if (read_all_memscrub) 2241 memscrub_run(); 2242 2243 cpu_aflt_log(CE_NOTE, 2, spf_flt, (CP_LFLAGS | CPU_FLTCPU), 2244 NULL, " Orphan CP event on"); 2245 2246 /* Log ecache info if exist */ 2247 if (spf_flt->flt_ec_lcnt > 0) 2248 cpu_log_ecmem_info(spf_flt); 2249 else 2250 cpu_aflt_log(CE_NOTE, 2, spf_flt, 2251 (CP_LFLAGS | CPU_FLTCPU), NULL, 2252 " No error found in ecache (No fault " 2253 "PA available"); 2254 return; 2255 2256 case CPU_ECACHE_ADDR_PAR_ERR: 2257 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 2258 " E$ Tag Address Parity error on"); 2259 cpu_log_ecmem_info(spf_flt); 2260 return; 2261 2262 case CPU_ECACHE_STATE_ERR: 2263 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 2264 " E$ Tag State Parity error on"); 2265 cpu_log_ecmem_info(spf_flt); 2266 return; 2267 2268 case CPU_ECACHE_TAG_ERR: 2269 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 2270 " E$ Tag scrub event on"); 2271 cpu_log_ecmem_info(spf_flt); 2272 return; 2273 2274 case CPU_ECACHE_ETP_ETS_ERR: 2275 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 2276 " AFSR.ETP is set and AFSR.ETS is zero on"); 2277 cpu_log_ecmem_info(spf_flt); 2278 return; 2279 2280 2281 case CPU_ADDITIONAL_ERR: 2282 cpu_aflt_log(CE_WARN, 1, spf_flt, CMN_LFLAGS & ~CPU_SPACE, NULL, 2283 " Additional errors detected during error processing on"); 2284 return; 2285 2286 default: 2287 cmn_err(CE_WARN, "cpu_async_log_err: fault %p has unknown " 2288 "fault type %x", (void *)spf_flt, spf_flt->flt_type); 2289 return; 2290 } 2291 2292 /* ... fall through from the UE, EDP, or LDP cases */ 2293 2294 if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) { 2295 if (!panicstr) { 2296 (void) page_retire(aflt->flt_addr, PR_UE); 2297 } else { 2298 /* 2299 * Clear UEs on panic so that we don't 2300 * get haunted by them during panic or 2301 * after reboot 2302 */ 2303 clearphys(P2ALIGN(aflt->flt_addr, 64), 2304 cpunodes[CPU->cpu_id].ecache_size, 2305 cpunodes[CPU->cpu_id].ecache_linesize); 2306 2307 (void) clear_errors(NULL, NULL); 2308 } 2309 } 2310 2311 /* 2312 * Log final recover message 2313 */ 2314 if (!panicstr) { 2315 if (!aflt->flt_priv) { 2316 cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, 2317 NULL, " Above Error is in User Mode" 2318 "\n and is fatal: " 2319 "will SIGKILL process and notify contract"); 2320 } else if (aflt->flt_prot == AFLT_PROT_COPY && aflt->flt_core) { 2321 cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, 2322 NULL, " Above Error detected while dumping core;" 2323 "\n core file will be truncated"); 2324 } else if (aflt->flt_prot == AFLT_PROT_COPY) { 2325 cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, 2326 NULL, " Above Error is due to Kernel access" 2327 "\n to User space and is fatal: " 2328 "will SIGKILL process and notify contract"); 2329 } else if (aflt->flt_prot == AFLT_PROT_EC) { 2330 cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, NULL, 2331 " Above Error detected by protected Kernel code" 2332 "\n that will try to clear error from system"); 2333 } 2334 } 2335 } 2336 2337 2338 /* 2339 * Check all cpus for non-trapping UE-causing errors 2340 * In Ultra I/II, we look for copyback errors (CPs) 2341 */ 2342 void 2343 cpu_check_allcpus(struct async_flt *aflt) 2344 { 2345 spitf_async_flt cp; 2346 spitf_async_flt *spf_cpflt = &cp; 2347 struct async_flt *cpflt = (struct async_flt *)&cp; 2348 int pix; 2349 2350 cpflt->flt_id = aflt->flt_id; 2351 cpflt->flt_addr = aflt->flt_addr; 2352 2353 for (pix = 0; pix < NCPU; pix++) { 2354 if (CPU_XCALL_READY(pix)) { 2355 xc_one(pix, (xcfunc_t *)get_cpu_status, 2356 (uint64_t)cpflt, 0); 2357 2358 if (cpflt->flt_stat & P_AFSR_CP) { 2359 char *space; 2360 2361 /* See which space - this info may not exist */ 2362 if (aflt->flt_status & ECC_D_TRAP) 2363 space = "Data "; 2364 else if (aflt->flt_status & ECC_I_TRAP) 2365 space = "Instruction "; 2366 else 2367 space = ""; 2368 2369 cpu_aflt_log(CE_WARN, 1, spf_cpflt, CP_LFLAGS, 2370 NULL, " AFAR was derived from UE report," 2371 " CP event on CPU%d (caused %saccess " 2372 "error on %s%d)", pix, space, 2373 (aflt->flt_status & ECC_IOBUS) ? 2374 "IOBUS" : "CPU", aflt->flt_bus_id); 2375 2376 if (spf_cpflt->flt_ec_lcnt > 0) 2377 cpu_log_ecmem_info(spf_cpflt); 2378 else 2379 cpu_aflt_log(CE_WARN, 2, spf_cpflt, 2380 CPU_ERRID_FIRST, NULL, 2381 " No cache dump available"); 2382 } 2383 } 2384 } 2385 } 2386 2387 #ifdef DEBUG 2388 int test_mp_cp = 0; 2389 #endif 2390 2391 /* 2392 * Cross-call callback routine to tell a CPU to read its own %afsr to check 2393 * for copyback errors and capture relevant information. 2394 */ 2395 static uint_t 2396 get_cpu_status(uint64_t arg) 2397 { 2398 struct async_flt *aflt = (struct async_flt *)arg; 2399 spitf_async_flt *spf_flt = (spitf_async_flt *)arg; 2400 uint64_t afsr; 2401 uint32_t ec_idx; 2402 uint64_t sdbh, sdbl; 2403 int i; 2404 uint32_t ec_set_size; 2405 uchar_t valid; 2406 ec_data_t ec_data[8]; 2407 uint64_t ec_tag, flt_addr_tag, oafsr; 2408 uint64_t *acc_afsr = NULL; 2409 2410 get_asyncflt(&afsr); 2411 if (CPU_PRIVATE(CPU) != NULL) { 2412 acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 2413 afsr |= *acc_afsr; 2414 *acc_afsr = 0; 2415 } 2416 2417 #ifdef DEBUG 2418 if (test_mp_cp) 2419 afsr |= P_AFSR_CP; 2420 #endif 2421 aflt->flt_stat = afsr; 2422 2423 if (afsr & P_AFSR_CP) { 2424 /* 2425 * Capture the UDBs 2426 */ 2427 get_udb_errors(&sdbh, &sdbl); 2428 spf_flt->flt_sdbh = (ushort_t)(sdbh & 0x3FF); 2429 spf_flt->flt_sdbl = (ushort_t)(sdbl & 0x3FF); 2430 2431 /* 2432 * Clear CP bit before capturing ecache data 2433 * and AFSR info. 2434 */ 2435 set_asyncflt(P_AFSR_CP); 2436 2437 /* 2438 * See if we can capture the ecache line for the 2439 * fault PA. 2440 * 2441 * Return a valid matching ecache line, if any. 2442 * Otherwise, return the first matching ecache 2443 * line marked invalid. 2444 */ 2445 flt_addr_tag = aflt->flt_addr >> cpu_ec_tag_shift; 2446 ec_set_size = cpunodes[CPU->cpu_id].ecache_size / 2447 ecache_associativity; 2448 spf_flt->flt_ec_lcnt = 0; 2449 2450 for (i = 0, ec_idx = (aflt->flt_addr % ec_set_size); 2451 i < ecache_associativity; i++, ec_idx += ec_set_size) { 2452 get_ecache_dtag(P2ALIGN(ec_idx, 64), 2453 (uint64_t *)&ec_data[0], &ec_tag, &oafsr, 2454 acc_afsr); 2455 2456 if ((ec_tag & cpu_ec_tag_mask) != flt_addr_tag) 2457 continue; 2458 2459 valid = cpu_ec_state_valid & 2460 (uchar_t)((ec_tag & cpu_ec_state_mask) >> 2461 cpu_ec_state_shift); 2462 2463 if (valid || spf_flt->flt_ec_lcnt == 0) { 2464 spf_flt->flt_ec_tag = ec_tag; 2465 bcopy(&ec_data, &spf_flt->flt_ec_data, 2466 sizeof (ec_data)); 2467 spf_flt->flt_ec_lcnt = 1; 2468 2469 if (valid) 2470 break; 2471 } 2472 } 2473 } 2474 return (0); 2475 } 2476 2477 /* 2478 * CPU-module callback for the non-panicking CPUs. This routine is invoked 2479 * from panic_idle() as part of the other CPUs stopping themselves when a 2480 * panic occurs. We need to be VERY careful what we do here, since panicstr 2481 * is NOT set yet and we cannot blow through locks. If panic_aflt is set 2482 * (panic_aflt.flt_id is non-zero), we need to read our %afsr to look for 2483 * CP error information. 2484 */ 2485 void 2486 cpu_async_panic_callb(void) 2487 { 2488 spitf_async_flt cp; 2489 struct async_flt *aflt = (struct async_flt *)&cp; 2490 uint64_t *scrub_afsr; 2491 2492 if (panic_aflt.flt_id != 0) { 2493 aflt->flt_addr = panic_aflt.flt_addr; 2494 (void) get_cpu_status((uint64_t)aflt); 2495 2496 if (CPU_PRIVATE(CPU) != NULL) { 2497 scrub_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 2498 if (*scrub_afsr & P_AFSR_CP) { 2499 aflt->flt_stat |= *scrub_afsr; 2500 *scrub_afsr = 0; 2501 } 2502 } 2503 if (aflt->flt_stat & P_AFSR_CP) { 2504 aflt->flt_id = panic_aflt.flt_id; 2505 aflt->flt_panic = 1; 2506 aflt->flt_inst = CPU->cpu_id; 2507 aflt->flt_class = CPU_FAULT; 2508 cp.flt_type = CPU_PANIC_CP_ERR; 2509 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP, 2510 (void *)&cp, sizeof (cp), ue_queue, 2511 aflt->flt_panic); 2512 } 2513 } 2514 } 2515 2516 /* 2517 * Turn off all cpu error detection, normally only used for panics. 2518 */ 2519 void 2520 cpu_disable_errors(void) 2521 { 2522 xt_all(set_error_enable_tl1, EER_DISABLE, EER_SET_ABSOLUTE); 2523 } 2524 2525 /* 2526 * Enable errors. 2527 */ 2528 void 2529 cpu_enable_errors(void) 2530 { 2531 xt_all(set_error_enable_tl1, EER_ENABLE, EER_SET_ABSOLUTE); 2532 } 2533 2534 static void 2535 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err) 2536 { 2537 uint64_t aligned_addr = P2ALIGN(ecc->flt_addr, 8); 2538 int i, loop = 1; 2539 ushort_t ecc_0; 2540 uint64_t paddr; 2541 uint64_t data; 2542 2543 if (verbose) 2544 loop = 8; 2545 for (i = 0; i < loop; i++) { 2546 paddr = aligned_addr + (i * 8); 2547 data = lddphys(paddr); 2548 if (verbose) { 2549 if (ce_err) { 2550 ecc_0 = ecc_gen((uint32_t)(data>>32), 2551 (uint32_t)data); 2552 cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS, 2553 NULL, " Paddr 0x%" PRIx64 ", " 2554 "Data 0x%08x.%08x, ECC 0x%x", paddr, 2555 (uint32_t)(data>>32), (uint32_t)data, 2556 ecc_0); 2557 } else { 2558 cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS, 2559 NULL, " Paddr 0x%" PRIx64 ", " 2560 "Data 0x%08x.%08x", paddr, 2561 (uint32_t)(data>>32), (uint32_t)data); 2562 } 2563 } 2564 } 2565 } 2566 2567 static struct { /* sec-ded-s4ed ecc code */ 2568 uint_t hi, lo; 2569 } ecc_code[8] = { 2570 { 0xee55de23U, 0x16161161U }, 2571 { 0x55eede93U, 0x61612212U }, 2572 { 0xbb557b8cU, 0x49494494U }, 2573 { 0x55bb7b6cU, 0x94948848U }, 2574 { 0x16161161U, 0xee55de23U }, 2575 { 0x61612212U, 0x55eede93U }, 2576 { 0x49494494U, 0xbb557b8cU }, 2577 { 0x94948848U, 0x55bb7b6cU } 2578 }; 2579 2580 static ushort_t 2581 ecc_gen(uint_t high_bytes, uint_t low_bytes) 2582 { 2583 int i, j; 2584 uchar_t checker, bit_mask; 2585 struct { 2586 uint_t hi, lo; 2587 } hex_data, masked_data[8]; 2588 2589 hex_data.hi = high_bytes; 2590 hex_data.lo = low_bytes; 2591 2592 /* mask out bits according to sec-ded-s4ed ecc code */ 2593 for (i = 0; i < 8; i++) { 2594 masked_data[i].hi = hex_data.hi & ecc_code[i].hi; 2595 masked_data[i].lo = hex_data.lo & ecc_code[i].lo; 2596 } 2597 2598 /* 2599 * xor all bits in masked_data[i] to get bit_i of checker, 2600 * where i = 0 to 7 2601 */ 2602 checker = 0; 2603 for (i = 0; i < 8; i++) { 2604 bit_mask = 1 << i; 2605 for (j = 0; j < 32; j++) { 2606 if (masked_data[i].lo & 1) checker ^= bit_mask; 2607 if (masked_data[i].hi & 1) checker ^= bit_mask; 2608 masked_data[i].hi >>= 1; 2609 masked_data[i].lo >>= 1; 2610 } 2611 } 2612 return (checker); 2613 } 2614 2615 /* 2616 * Flush the entire ecache using displacement flush by reading through a 2617 * physical address range as large as the ecache. 2618 */ 2619 void 2620 cpu_flush_ecache(void) 2621 { 2622 flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2, 2623 cpunodes[CPU->cpu_id].ecache_linesize); 2624 } 2625 2626 /* 2627 * read and display the data in the cache line where the 2628 * original ce error occurred. 2629 * This routine is mainly used for debugging new hardware. 2630 */ 2631 void 2632 read_ecc_data(struct async_flt *ecc, short verbose, short ce_err) 2633 { 2634 kpreempt_disable(); 2635 /* disable ECC error traps */ 2636 set_error_enable(EER_ECC_DISABLE); 2637 2638 /* 2639 * flush the ecache 2640 * read the data 2641 * check to see if an ECC error occured 2642 */ 2643 flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2, 2644 cpunodes[CPU->cpu_id].ecache_linesize); 2645 set_lsu(get_lsu() | cache_boot_state); 2646 cpu_read_paddr(ecc, verbose, ce_err); 2647 (void) check_ecc(ecc); 2648 2649 /* enable ECC error traps */ 2650 set_error_enable(EER_ENABLE); 2651 kpreempt_enable(); 2652 } 2653 2654 /* 2655 * Check the AFSR bits for UE/CE persistence. 2656 * If UE or CE errors are detected, the routine will 2657 * clears all the AFSR sticky bits (except CP for 2658 * spitfire/blackbird) and the UDBs. 2659 * if ce_debug or ue_debug is set, log any ue/ce errors detected. 2660 */ 2661 static int 2662 check_ecc(struct async_flt *ecc) 2663 { 2664 uint64_t t_afsr; 2665 uint64_t t_afar; 2666 uint64_t udbh; 2667 uint64_t udbl; 2668 ushort_t udb; 2669 int persistent = 0; 2670 2671 /* 2672 * Capture the AFSR, AFAR and UDBs info 2673 */ 2674 get_asyncflt(&t_afsr); 2675 get_asyncaddr(&t_afar); 2676 t_afar &= SABRE_AFAR_PA; 2677 get_udb_errors(&udbh, &udbl); 2678 2679 if ((t_afsr & P_AFSR_UE) || (t_afsr & P_AFSR_CE)) { 2680 /* 2681 * Clear the errors 2682 */ 2683 clr_datapath(); 2684 2685 if (isus2i || isus2e) 2686 set_asyncflt(t_afsr); 2687 else 2688 set_asyncflt(t_afsr & ~P_AFSR_CP); 2689 2690 /* 2691 * determine whether to check UDBH or UDBL for persistence 2692 */ 2693 if (ecc->flt_synd & UDBL_REG) { 2694 udb = (ushort_t)udbl; 2695 t_afar |= 0x8; 2696 } else { 2697 udb = (ushort_t)udbh; 2698 } 2699 2700 if (ce_debug || ue_debug) { 2701 spitf_async_flt spf_flt; /* for logging */ 2702 struct async_flt *aflt = 2703 (struct async_flt *)&spf_flt; 2704 2705 /* Package the info nicely in the spf_flt struct */ 2706 bzero(&spf_flt, sizeof (spitf_async_flt)); 2707 aflt->flt_stat = t_afsr; 2708 aflt->flt_addr = t_afar; 2709 spf_flt.flt_sdbh = (ushort_t)(udbh & 0x3FF); 2710 spf_flt.flt_sdbl = (ushort_t)(udbl & 0x3FF); 2711 2712 cpu_aflt_log(CE_CONT, 0, &spf_flt, (CPU_AFSR | 2713 CPU_AFAR | CPU_UDBH | CPU_UDBL), NULL, 2714 " check_ecc: Dumping captured error states ..."); 2715 } 2716 2717 /* 2718 * if the fault addresses don't match, not persistent 2719 */ 2720 if (t_afar != ecc->flt_addr) { 2721 return (persistent); 2722 } 2723 2724 /* 2725 * check for UE persistence 2726 * since all DIMMs in the bank are identified for a UE, 2727 * there's no reason to check the syndrome 2728 */ 2729 if ((ecc->flt_stat & P_AFSR_UE) && (t_afsr & P_AFSR_UE)) { 2730 persistent = 1; 2731 } 2732 2733 /* 2734 * check for CE persistence 2735 */ 2736 if ((ecc->flt_stat & P_AFSR_CE) && (t_afsr & P_AFSR_CE)) { 2737 if ((udb & P_DER_E_SYND) == 2738 (ecc->flt_synd & P_DER_E_SYND)) { 2739 persistent = 1; 2740 } 2741 } 2742 } 2743 return (persistent); 2744 } 2745 2746 #ifdef HUMMINGBIRD 2747 #define HB_FULL_DIV 1 2748 #define HB_HALF_DIV 2 2749 #define HB_LOWEST_DIV 8 2750 #define HB_ECLK_INVALID 0xdeadbad 2751 static uint64_t hb_eclk[HB_LOWEST_DIV + 1] = { 2752 HB_ECLK_INVALID, HB_ECLK_1, HB_ECLK_2, HB_ECLK_INVALID, 2753 HB_ECLK_4, HB_ECLK_INVALID, HB_ECLK_6, HB_ECLK_INVALID, 2754 HB_ECLK_8 }; 2755 2756 #define HB_SLOW_DOWN 0 2757 #define HB_SPEED_UP 1 2758 2759 #define SET_ESTAR_MODE(mode) \ 2760 stdphysio(HB_ESTAR_MODE, (mode)); \ 2761 /* \ 2762 * PLL logic requires minimum of 16 clock \ 2763 * cycles to lock to the new clock speed. \ 2764 * Wait 1 usec to satisfy this requirement. \ 2765 */ \ 2766 drv_usecwait(1); 2767 2768 #define CHANGE_REFRESH_COUNT(direction, cur_div, new_div) \ 2769 { \ 2770 volatile uint64_t data; \ 2771 uint64_t count, new_count; \ 2772 clock_t delay; \ 2773 data = lddphysio(HB_MEM_CNTRL0); \ 2774 count = (data & HB_REFRESH_COUNT_MASK) >> \ 2775 HB_REFRESH_COUNT_SHIFT; \ 2776 new_count = (HB_REFRESH_INTERVAL * \ 2777 cpunodes[CPU->cpu_id].clock_freq) / \ 2778 (HB_REFRESH_CLOCKS_PER_COUNT * (new_div) * NANOSEC);\ 2779 data = (data & ~HB_REFRESH_COUNT_MASK) | \ 2780 (new_count << HB_REFRESH_COUNT_SHIFT); \ 2781 stdphysio(HB_MEM_CNTRL0, data); \ 2782 data = lddphysio(HB_MEM_CNTRL0); \ 2783 /* \ 2784 * If we are slowing down the cpu and Memory \ 2785 * Self Refresh is not enabled, it is required \ 2786 * to wait for old refresh count to count-down and \ 2787 * new refresh count to go into effect (let new value \ 2788 * counts down once). \ 2789 */ \ 2790 if ((direction) == HB_SLOW_DOWN && \ 2791 (data & HB_SELF_REFRESH_MASK) == 0) { \ 2792 /* \ 2793 * Each count takes 64 cpu clock cycles \ 2794 * to decrement. Wait for current refresh \ 2795 * count plus new refresh count at current \ 2796 * cpu speed to count down to zero. Round \ 2797 * up the delay time. \ 2798 */ \ 2799 delay = ((HB_REFRESH_CLOCKS_PER_COUNT * \ 2800 (count + new_count) * MICROSEC * (cur_div)) /\ 2801 cpunodes[CPU->cpu_id].clock_freq) + 1; \ 2802 drv_usecwait(delay); \ 2803 } \ 2804 } 2805 2806 #define SET_SELF_REFRESH(bit) \ 2807 { \ 2808 volatile uint64_t data; \ 2809 data = lddphysio(HB_MEM_CNTRL0); \ 2810 data = (data & ~HB_SELF_REFRESH_MASK) | \ 2811 ((bit) << HB_SELF_REFRESH_SHIFT); \ 2812 stdphysio(HB_MEM_CNTRL0, data); \ 2813 data = lddphysio(HB_MEM_CNTRL0); \ 2814 } 2815 #endif /* HUMMINGBIRD */ 2816 2817 /* ARGSUSED */ 2818 void 2819 cpu_change_speed(uint64_t new_divisor, uint64_t arg2) 2820 { 2821 #ifdef HUMMINGBIRD 2822 uint64_t cur_mask, cur_divisor = 0; 2823 volatile uint64_t reg; 2824 processor_info_t *pi = &(CPU->cpu_type_info); 2825 int index; 2826 2827 if ((new_divisor < HB_FULL_DIV || new_divisor > HB_LOWEST_DIV) || 2828 (hb_eclk[new_divisor] == HB_ECLK_INVALID)) { 2829 cmn_err(CE_WARN, "cpu_change_speed: bad divisor 0x%lx", 2830 new_divisor); 2831 return; 2832 } 2833 2834 reg = lddphysio(HB_ESTAR_MODE); 2835 cur_mask = reg & HB_ECLK_MASK; 2836 for (index = HB_FULL_DIV; index <= HB_LOWEST_DIV; index++) { 2837 if (hb_eclk[index] == cur_mask) { 2838 cur_divisor = index; 2839 break; 2840 } 2841 } 2842 2843 if (cur_divisor == 0) 2844 cmn_err(CE_PANIC, "cpu_change_speed: current divisor " 2845 "can't be determined!"); 2846 2847 /* 2848 * If we are already at the requested divisor speed, just 2849 * return. 2850 */ 2851 if (cur_divisor == new_divisor) 2852 return; 2853 2854 if (cur_divisor == HB_FULL_DIV && new_divisor == HB_HALF_DIV) { 2855 CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor); 2856 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2857 SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE); 2858 2859 } else if (cur_divisor == HB_HALF_DIV && new_divisor == HB_FULL_DIV) { 2860 SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE); 2861 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2862 /* LINTED: E_FALSE_LOGICAL_EXPR */ 2863 CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor); 2864 2865 } else if (cur_divisor == HB_FULL_DIV && new_divisor > HB_HALF_DIV) { 2866 /* 2867 * Transition to 1/2 speed first, then to 2868 * lower speed. 2869 */ 2870 CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, HB_HALF_DIV); 2871 SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]); 2872 SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE); 2873 2874 CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, HB_HALF_DIV, new_divisor); 2875 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2876 2877 } else if (cur_divisor > HB_HALF_DIV && new_divisor == HB_FULL_DIV) { 2878 /* 2879 * Transition to 1/2 speed first, then to 2880 * full speed. 2881 */ 2882 SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]); 2883 /* LINTED: E_FALSE_LOGICAL_EXPR */ 2884 CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, HB_HALF_DIV); 2885 2886 SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE); 2887 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2888 /* LINTED: E_FALSE_LOGICAL_EXPR */ 2889 CHANGE_REFRESH_COUNT(HB_SPEED_UP, HB_HALF_DIV, new_divisor); 2890 2891 } else if (cur_divisor < new_divisor) { 2892 CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor); 2893 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2894 2895 } else if (cur_divisor > new_divisor) { 2896 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2897 /* LINTED: E_FALSE_LOGICAL_EXPR */ 2898 CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor); 2899 } 2900 CPU->cpu_m.divisor = (uchar_t)new_divisor; 2901 CPU->cpu_curr_clock = 2902 (((uint64_t)pi->pi_clock * 1000000) / new_divisor); 2903 #endif 2904 } 2905 2906 /* 2907 * Clear the AFSR sticky bits and the UDBs. For Sabre/Spitfire/Blackbird, 2908 * we clear all the sticky bits. If a non-null pointer to a async fault 2909 * structure argument is passed in, the captured error state (AFSR, AFAR, UDBs) 2910 * info will be returned in the structure. If a non-null pointer to a 2911 * uint64_t is passed in, this will be updated if the CP bit is set in the 2912 * AFSR. The afsr will be returned. 2913 */ 2914 static uint64_t 2915 clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr) 2916 { 2917 struct async_flt *aflt = (struct async_flt *)spf_flt; 2918 uint64_t afsr; 2919 uint64_t udbh, udbl; 2920 2921 get_asyncflt(&afsr); 2922 2923 if ((acc_afsr != NULL) && (afsr & P_AFSR_CP)) 2924 *acc_afsr |= afsr; 2925 2926 if (spf_flt != NULL) { 2927 aflt->flt_stat = afsr; 2928 get_asyncaddr(&aflt->flt_addr); 2929 aflt->flt_addr &= SABRE_AFAR_PA; 2930 2931 get_udb_errors(&udbh, &udbl); 2932 spf_flt->flt_sdbh = (ushort_t)(udbh & 0x3FF); 2933 spf_flt->flt_sdbl = (ushort_t)(udbl & 0x3FF); 2934 } 2935 2936 set_asyncflt(afsr); /* clear afsr */ 2937 clr_datapath(); /* clear udbs */ 2938 return (afsr); 2939 } 2940 2941 /* 2942 * Scan the ecache to look for bad lines. If found, the afsr, afar, e$ data 2943 * tag of the first bad line will be returned. We also return the old-afsr 2944 * (before clearing the sticky bits). The linecnt data will be updated to 2945 * indicate the number of bad lines detected. 2946 */ 2947 static void 2948 scan_ecache(uint64_t *t_afar, ec_data_t *ecache_data, 2949 uint64_t *ecache_tag, int *linecnt, uint64_t *t_afsr) 2950 { 2951 ec_data_t t_ecdata[8]; 2952 uint64_t t_etag, oafsr; 2953 uint64_t pa = AFLT_INV_ADDR; 2954 uint32_t i, j, ecache_sz; 2955 uint64_t acc_afsr = 0; 2956 uint64_t *cpu_afsr = NULL; 2957 2958 if (CPU_PRIVATE(CPU) != NULL) 2959 cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 2960 2961 *linecnt = 0; 2962 ecache_sz = cpunodes[CPU->cpu_id].ecache_size; 2963 2964 for (i = 0; i < ecache_sz; i += 64) { 2965 get_ecache_dtag(i, (uint64_t *)&t_ecdata[0], &t_etag, &oafsr, 2966 cpu_afsr); 2967 acc_afsr |= oafsr; 2968 2969 /* 2970 * Scan through the whole 64 bytes line in 8 8-byte chunks 2971 * looking for the first occurrence of an EDP error. The AFSR 2972 * info is captured for each 8-byte chunk. Note that for 2973 * Spitfire/Blackbird, the AFSR.PSYND is captured by h/w in 2974 * 16-byte chunk granularity (i.e. the AFSR will be the same 2975 * for the high and low 8-byte words within the 16-byte chunk). 2976 * For Sabre/Hummingbird, the AFSR.PSYND is captured in 8-byte 2977 * granularity and only PSYND bits [7:0] are used. 2978 */ 2979 for (j = 0; j < 8; j++) { 2980 ec_data_t *ecdptr = &t_ecdata[j]; 2981 2982 if (ecdptr->ec_afsr & P_AFSR_EDP) { 2983 uint64_t errpa; 2984 ushort_t psynd; 2985 uint32_t ec_set_size = ecache_sz / 2986 ecache_associativity; 2987 2988 /* 2989 * For Spitfire/Blackbird, we need to look at 2990 * the PSYND to make sure that this 8-byte chunk 2991 * is the right one. PSYND bits [15:8] belong 2992 * to the upper 8-byte (even) chunk. Bits 2993 * [7:0] belong to the lower 8-byte chunk (odd). 2994 */ 2995 psynd = ecdptr->ec_afsr & P_AFSR_P_SYND; 2996 if (!isus2i && !isus2e) { 2997 if (j & 0x1) 2998 psynd = psynd & 0xFF; 2999 else 3000 psynd = psynd >> 8; 3001 3002 if (!psynd) 3003 continue; /* wrong chunk */ 3004 } 3005 3006 /* Construct the PA */ 3007 errpa = ((t_etag & cpu_ec_tag_mask) << 3008 cpu_ec_tag_shift) | ((i | (j << 3)) % 3009 ec_set_size); 3010 3011 /* clean up the cache line */ 3012 flushecacheline(P2ALIGN(errpa, 64), 3013 cpunodes[CPU->cpu_id].ecache_size); 3014 3015 oafsr = clear_errors(NULL, cpu_afsr); 3016 acc_afsr |= oafsr; 3017 3018 (*linecnt)++; 3019 3020 /* 3021 * Capture the PA for the first bad line found. 3022 * Return the ecache dump and tag info. 3023 */ 3024 if (pa == AFLT_INV_ADDR) { 3025 int k; 3026 3027 pa = errpa; 3028 for (k = 0; k < 8; k++) 3029 ecache_data[k] = t_ecdata[k]; 3030 *ecache_tag = t_etag; 3031 } 3032 break; 3033 } 3034 } 3035 } 3036 *t_afar = pa; 3037 *t_afsr = acc_afsr; 3038 } 3039 3040 static void 3041 cpu_log_ecmem_info(spitf_async_flt *spf_flt) 3042 { 3043 struct async_flt *aflt = (struct async_flt *)spf_flt; 3044 uint64_t ecache_tag = spf_flt->flt_ec_tag; 3045 char linestr[30]; 3046 char *state_str; 3047 int i; 3048 3049 /* 3050 * Check the ecache tag to make sure it 3051 * is valid. If invalid, a memory dump was 3052 * captured instead of a ecache dump. 3053 */ 3054 if (spf_flt->flt_ec_tag != AFLT_INV_ADDR) { 3055 uchar_t eparity = (uchar_t) 3056 ((ecache_tag & cpu_ec_par_mask) >> cpu_ec_par_shift); 3057 3058 uchar_t estate = (uchar_t) 3059 ((ecache_tag & cpu_ec_state_mask) >> cpu_ec_state_shift); 3060 3061 if (estate == cpu_ec_state_shr) 3062 state_str = "Shared"; 3063 else if (estate == cpu_ec_state_exl) 3064 state_str = "Exclusive"; 3065 else if (estate == cpu_ec_state_own) 3066 state_str = "Owner"; 3067 else if (estate == cpu_ec_state_mod) 3068 state_str = "Modified"; 3069 else 3070 state_str = "Invalid"; 3071 3072 if (spf_flt->flt_ec_lcnt > 1) { 3073 (void) snprintf(linestr, sizeof (linestr), 3074 "Badlines found=%d", spf_flt->flt_ec_lcnt); 3075 } else { 3076 linestr[0] = '\0'; 3077 } 3078 3079 cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL, 3080 " PA=0x%08x.%08x\n E$tag 0x%08x.%08x E$State: %s " 3081 "E$parity 0x%02x %s", (uint32_t)(aflt->flt_addr >> 32), 3082 (uint32_t)aflt->flt_addr, (uint32_t)(ecache_tag >> 32), 3083 (uint32_t)ecache_tag, state_str, 3084 (uint32_t)eparity, linestr); 3085 } else { 3086 cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL, 3087 " E$tag != PA from AFAR; E$line was victimized" 3088 "\n dumping memory from PA 0x%08x.%08x instead", 3089 (uint32_t)(P2ALIGN(aflt->flt_addr, 64) >> 32), 3090 (uint32_t)P2ALIGN(aflt->flt_addr, 64)); 3091 } 3092 3093 /* 3094 * Dump out all 8 8-byte ecache data captured 3095 * For each 8-byte data captured, we check the 3096 * captured afsr's parity syndrome to find out 3097 * which 8-byte chunk is bad. For memory dump, the 3098 * AFSR values were initialized to 0. 3099 */ 3100 for (i = 0; i < 8; i++) { 3101 ec_data_t *ecdptr; 3102 uint_t offset; 3103 ushort_t psynd; 3104 ushort_t bad; 3105 uint64_t edp; 3106 3107 offset = i << 3; /* multiply by 8 */ 3108 ecdptr = &spf_flt->flt_ec_data[i]; 3109 psynd = ecdptr->ec_afsr & P_AFSR_P_SYND; 3110 edp = ecdptr->ec_afsr & P_AFSR_EDP; 3111 3112 /* 3113 * For Sabre/Hummingbird, parity synd is captured only 3114 * in [7:0] of AFSR.PSYND for each 8-byte chunk. 3115 * For spitfire/blackbird, AFSR.PSYND is captured 3116 * in 16-byte granularity. [15:8] represent 3117 * the upper 8 byte and [7:0] the lower 8 byte. 3118 */ 3119 if (isus2i || isus2e || (i & 0x1)) 3120 bad = (psynd & 0xFF); /* check bits [7:0] */ 3121 else 3122 bad = (psynd & 0xFF00); /* check bits [15:8] */ 3123 3124 if (bad && edp) { 3125 cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL, 3126 " E$Data (0x%02x): 0x%08x.%08x " 3127 "*Bad* PSYND=0x%04x", offset, 3128 (uint32_t)(ecdptr->ec_d8 >> 32), 3129 (uint32_t)ecdptr->ec_d8, psynd); 3130 } else { 3131 cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL, 3132 " E$Data (0x%02x): 0x%08x.%08x", offset, 3133 (uint32_t)(ecdptr->ec_d8 >> 32), 3134 (uint32_t)ecdptr->ec_d8); 3135 } 3136 } 3137 } 3138 3139 /* 3140 * Common logging function for all cpu async errors. This function allows the 3141 * caller to generate a single cmn_err() call that logs the appropriate items 3142 * from the fault structure, and implements our rules for AFT logging levels. 3143 * 3144 * ce_code: cmn_err() code (e.g. CE_PANIC, CE_WARN, CE_CONT) 3145 * tagnum: 0, 1, 2, .. generate the [AFT#] tag 3146 * spflt: pointer to spitfire async fault structure 3147 * logflags: bitflags indicating what to output 3148 * endstr: a end string to appear at the end of this log 3149 * fmt: a format string to appear at the beginning of the log 3150 * 3151 * The logflags allows the construction of predetermined output from the spflt 3152 * structure. The individual data items always appear in a consistent order. 3153 * Note that either or both of the spflt structure pointer and logflags may be 3154 * NULL or zero respectively, indicating that the predetermined output 3155 * substrings are not requested in this log. The output looks like this: 3156 * 3157 * [AFT#] <CPU_ERRID_FIRST><fmt string><CPU_FLTCPU> 3158 * <CPU_SPACE><CPU_ERRID> 3159 * newline+4spaces<CPU_AFSR><CPU_AFAR> 3160 * newline+4spaces<CPU_AF_PSYND><CPU_AF_ETS><CPU_FAULTPC> 3161 * newline+4spaces<CPU_UDBH><CPU_UDBL> 3162 * newline+4spaces<CPU_SYND> 3163 * newline+4spaces<endstr> 3164 * 3165 * Note that <endstr> may not start on a newline if we are logging <CPU_PSYND>; 3166 * it is assumed that <endstr> will be the unum string in this case. The size 3167 * of our intermediate formatting buf[] is based on the worst case of all flags 3168 * being enabled. We pass the caller's varargs directly to vcmn_err() for 3169 * formatting so we don't need additional stack space to format them here. 3170 */ 3171 /*PRINTFLIKE6*/ 3172 static void 3173 cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt, uint_t logflags, 3174 const char *endstr, const char *fmt, ...) 3175 { 3176 struct async_flt *aflt = (struct async_flt *)spflt; 3177 char buf[400], *p, *q; /* see comments about buf[] size above */ 3178 va_list ap; 3179 int console_log_flag; 3180 3181 if ((aflt == NULL) || ((aflt->flt_class == CPU_FAULT) && 3182 (aflt->flt_stat & P_AFSR_LEVEL1)) || 3183 (aflt->flt_panic)) { 3184 console_log_flag = (tagnum < 2) || aft_verbose; 3185 } else { 3186 int verbose = ((aflt->flt_class == BUS_FAULT) || 3187 (aflt->flt_stat & P_AFSR_CE)) ? 3188 ce_verbose_memory : ce_verbose_other; 3189 3190 if (!verbose) 3191 return; 3192 3193 console_log_flag = (verbose > 1); 3194 } 3195 3196 if (console_log_flag) 3197 (void) sprintf(buf, "[AFT%d]", tagnum); 3198 else 3199 (void) sprintf(buf, "![AFT%d]", tagnum); 3200 3201 p = buf + strlen(buf); /* current buffer position */ 3202 q = buf + sizeof (buf); /* pointer past end of buffer */ 3203 3204 if (spflt != NULL && (logflags & CPU_ERRID_FIRST)) { 3205 (void) snprintf(p, (size_t)(q - p), " errID 0x%08x.%08x", 3206 (uint32_t)(aflt->flt_id >> 32), (uint32_t)aflt->flt_id); 3207 p += strlen(p); 3208 } 3209 3210 /* 3211 * Copy the caller's format string verbatim into buf[]. It will be 3212 * formatted by the call to vcmn_err() at the end of this function. 3213 */ 3214 if (fmt != NULL && p < q) { 3215 (void) strncpy(p, fmt, (size_t)(q - p - 1)); 3216 buf[sizeof (buf) - 1] = '\0'; 3217 p += strlen(p); 3218 } 3219 3220 if (spflt != NULL) { 3221 if (logflags & CPU_FLTCPU) { 3222 (void) snprintf(p, (size_t)(q - p), " CPU%d", 3223 aflt->flt_inst); 3224 p += strlen(p); 3225 } 3226 3227 if (logflags & CPU_SPACE) { 3228 if (aflt->flt_status & ECC_D_TRAP) 3229 (void) snprintf(p, (size_t)(q - p), 3230 " Data access"); 3231 else if (aflt->flt_status & ECC_I_TRAP) 3232 (void) snprintf(p, (size_t)(q - p), 3233 " Instruction access"); 3234 p += strlen(p); 3235 } 3236 3237 if (logflags & CPU_TL) { 3238 (void) snprintf(p, (size_t)(q - p), " at TL%s", 3239 aflt->flt_tl ? ">0" : "=0"); 3240 p += strlen(p); 3241 } 3242 3243 if (logflags & CPU_ERRID) { 3244 (void) snprintf(p, (size_t)(q - p), 3245 ", errID 0x%08x.%08x", 3246 (uint32_t)(aflt->flt_id >> 32), 3247 (uint32_t)aflt->flt_id); 3248 p += strlen(p); 3249 } 3250 3251 if (logflags & CPU_AFSR) { 3252 (void) snprintf(p, (size_t)(q - p), 3253 "\n AFSR 0x%08b.%08b", 3254 (uint32_t)(aflt->flt_stat >> 32), AFSR_FMTSTR0, 3255 (uint32_t)aflt->flt_stat, AFSR_FMTSTR1); 3256 p += strlen(p); 3257 } 3258 3259 if (logflags & CPU_AFAR) { 3260 (void) snprintf(p, (size_t)(q - p), " AFAR 0x%08x.%08x", 3261 (uint32_t)(aflt->flt_addr >> 32), 3262 (uint32_t)aflt->flt_addr); 3263 p += strlen(p); 3264 } 3265 3266 if (logflags & CPU_AF_PSYND) { 3267 ushort_t psynd = (ushort_t) 3268 (aflt->flt_stat & P_AFSR_P_SYND); 3269 3270 (void) snprintf(p, (size_t)(q - p), 3271 "\n AFSR.PSYND 0x%04x(Score %02d)", 3272 psynd, ecc_psynd_score(psynd)); 3273 p += strlen(p); 3274 } 3275 3276 if (logflags & CPU_AF_ETS) { 3277 (void) snprintf(p, (size_t)(q - p), " AFSR.ETS 0x%02x", 3278 (uchar_t)((aflt->flt_stat & P_AFSR_ETS) >> 16)); 3279 p += strlen(p); 3280 } 3281 3282 if (logflags & CPU_FAULTPC) { 3283 (void) snprintf(p, (size_t)(q - p), " Fault_PC 0x%p", 3284 (void *)aflt->flt_pc); 3285 p += strlen(p); 3286 } 3287 3288 if (logflags & CPU_UDBH) { 3289 (void) snprintf(p, (size_t)(q - p), 3290 "\n UDBH 0x%04b UDBH.ESYND 0x%02x", 3291 spflt->flt_sdbh, UDB_FMTSTR, 3292 spflt->flt_sdbh & 0xFF); 3293 p += strlen(p); 3294 } 3295 3296 if (logflags & CPU_UDBL) { 3297 (void) snprintf(p, (size_t)(q - p), 3298 " UDBL 0x%04b UDBL.ESYND 0x%02x", 3299 spflt->flt_sdbl, UDB_FMTSTR, 3300 spflt->flt_sdbl & 0xFF); 3301 p += strlen(p); 3302 } 3303 3304 if (logflags & CPU_SYND) { 3305 ushort_t synd = SYND(aflt->flt_synd); 3306 3307 (void) snprintf(p, (size_t)(q - p), 3308 "\n %s Syndrome 0x%x Memory Module ", 3309 UDBL(aflt->flt_synd) ? "UDBL" : "UDBH", synd); 3310 p += strlen(p); 3311 } 3312 } 3313 3314 if (endstr != NULL) { 3315 if (!(logflags & CPU_SYND)) 3316 (void) snprintf(p, (size_t)(q - p), "\n %s", endstr); 3317 else 3318 (void) snprintf(p, (size_t)(q - p), "%s", endstr); 3319 p += strlen(p); 3320 } 3321 3322 if (ce_code == CE_CONT && (p < q - 1)) 3323 (void) strcpy(p, "\n"); /* add final \n if needed */ 3324 3325 va_start(ap, fmt); 3326 vcmn_err(ce_code, buf, ap); 3327 va_end(ap); 3328 } 3329 3330 /* 3331 * Ecache Scrubbing 3332 * 3333 * The basic idea is to prevent lines from sitting in the ecache long enough 3334 * to build up soft errors which can lead to ecache parity errors. 3335 * 3336 * The following rules are observed when flushing the ecache: 3337 * 3338 * 1. When the system is busy, flush bad clean lines 3339 * 2. When the system is idle, flush all clean lines 3340 * 3. When the system is idle, flush good dirty lines 3341 * 4. Never flush bad dirty lines. 3342 * 3343 * modify parity busy idle 3344 * ---------------------------- 3345 * clean good X 3346 * clean bad X X 3347 * dirty good X 3348 * dirty bad 3349 * 3350 * Bad or good refers to whether a line has an E$ parity error or not. 3351 * Clean or dirty refers to the state of the modified bit. We currently 3352 * default the scan rate to 100 (scan 10% of the cache per second). 3353 * 3354 * The following are E$ states and actions. 3355 * 3356 * We encode our state as a 3-bit number, consisting of: 3357 * ECACHE_STATE_MODIFIED (0=clean, 1=dirty) 3358 * ECACHE_STATE_PARITY (0=good, 1=bad) 3359 * ECACHE_STATE_BUSY (0=idle, 1=busy) 3360 * 3361 * We associate a flushing and a logging action with each state. 3362 * 3363 * E$ actions are different for Spitfire and Sabre/Hummingbird modules. 3364 * MIRROR_FLUSH indicates that an E$ line will be flushed for the mirrored 3365 * E$ only, in addition to value being set by ec_flush. 3366 */ 3367 3368 #define ALWAYS_FLUSH 0x1 /* flush E$ line on all E$ types */ 3369 #define NEVER_FLUSH 0x0 /* never the flush the E$ line */ 3370 #define MIRROR_FLUSH 0xF /* flush E$ line on mirrored E$ only */ 3371 3372 struct { 3373 char ec_flush; /* whether to flush or not */ 3374 char ec_log; /* ecache logging */ 3375 char ec_log_type; /* log type info */ 3376 } ec_action[] = { /* states of the E$ line in M P B */ 3377 { ALWAYS_FLUSH, 0, 0 }, /* 0 0 0 clean_good_idle */ 3378 { MIRROR_FLUSH, 0, 0 }, /* 0 0 1 clean_good_busy */ 3379 { ALWAYS_FLUSH, 1, CPU_BADLINE_CI_ERR }, /* 0 1 0 clean_bad_idle */ 3380 { ALWAYS_FLUSH, 1, CPU_BADLINE_CB_ERR }, /* 0 1 1 clean_bad_busy */ 3381 { ALWAYS_FLUSH, 0, 0 }, /* 1 0 0 dirty_good_idle */ 3382 { MIRROR_FLUSH, 0, 0 }, /* 1 0 1 dirty_good_busy */ 3383 { NEVER_FLUSH, 1, CPU_BADLINE_DI_ERR }, /* 1 1 0 dirty_bad_idle */ 3384 { NEVER_FLUSH, 1, CPU_BADLINE_DB_ERR } /* 1 1 1 dirty_bad_busy */ 3385 }; 3386 3387 /* 3388 * Offsets into the ec_action[] that determines clean_good_busy and 3389 * dirty_good_busy lines. 3390 */ 3391 #define ECACHE_CGB_LINE 1 /* E$ clean_good_busy line */ 3392 #define ECACHE_DGB_LINE 5 /* E$ dirty_good_busy line */ 3393 3394 /* 3395 * We are flushing lines which are Clean_Good_Busy and also the lines 3396 * Dirty_Good_Busy. And we only follow it for non-mirrored E$. 3397 */ 3398 #define CGB(x, m) (((x) == ECACHE_CGB_LINE) && (m != ECACHE_CPU_MIRROR)) 3399 #define DGB(x, m) (((x) == ECACHE_DGB_LINE) && (m != ECACHE_CPU_MIRROR)) 3400 3401 #define ECACHE_STATE_MODIFIED 0x4 3402 #define ECACHE_STATE_PARITY 0x2 3403 #define ECACHE_STATE_BUSY 0x1 3404 3405 /* 3406 * If ecache is mirrored ecache_calls_a_sec and ecache_scan_rate are reduced. 3407 */ 3408 int ecache_calls_a_sec_mirrored = 1; 3409 int ecache_lines_per_call_mirrored = 1; 3410 3411 int ecache_scrub_enable = 1; /* ecache scrubbing is on by default */ 3412 int ecache_scrub_verbose = 1; /* prints clean and dirty lines */ 3413 int ecache_scrub_panic = 0; /* panics on a clean and dirty line */ 3414 int ecache_calls_a_sec = 100; /* scrubber calls per sec */ 3415 int ecache_scan_rate = 100; /* scan rate (in tenths of a percent) */ 3416 int ecache_idle_factor = 1; /* increase the scan rate when idle */ 3417 int ecache_flush_clean_good_busy = 50; /* flush rate (in percent) */ 3418 int ecache_flush_dirty_good_busy = 100; /* flush rate (in percent) */ 3419 3420 volatile int ec_timeout_calls = 1; /* timeout calls */ 3421 3422 /* 3423 * Interrupt number and pil for ecache scrubber cross-trap calls. 3424 */ 3425 static uint64_t ecache_scrub_inum; 3426 uint_t ecache_scrub_pil = PIL_9; 3427 3428 /* 3429 * Kstats for the E$ scrubber. 3430 */ 3431 typedef struct ecache_kstat { 3432 kstat_named_t clean_good_idle; /* # of lines scrubbed */ 3433 kstat_named_t clean_good_busy; /* # of lines skipped */ 3434 kstat_named_t clean_bad_idle; /* # of lines scrubbed */ 3435 kstat_named_t clean_bad_busy; /* # of lines scrubbed */ 3436 kstat_named_t dirty_good_idle; /* # of lines scrubbed */ 3437 kstat_named_t dirty_good_busy; /* # of lines skipped */ 3438 kstat_named_t dirty_bad_idle; /* # of lines skipped */ 3439 kstat_named_t dirty_bad_busy; /* # of lines skipped */ 3440 kstat_named_t invalid_lines; /* # of invalid lines */ 3441 kstat_named_t clean_good_busy_flush; /* # of lines scrubbed */ 3442 kstat_named_t dirty_good_busy_flush; /* # of lines scrubbed */ 3443 kstat_named_t tags_cleared; /* # of E$ tags cleared */ 3444 } ecache_kstat_t; 3445 3446 static ecache_kstat_t ec_kstat_template = { 3447 { "clean_good_idle", KSTAT_DATA_ULONG }, 3448 { "clean_good_busy", KSTAT_DATA_ULONG }, 3449 { "clean_bad_idle", KSTAT_DATA_ULONG }, 3450 { "clean_bad_busy", KSTAT_DATA_ULONG }, 3451 { "dirty_good_idle", KSTAT_DATA_ULONG }, 3452 { "dirty_good_busy", KSTAT_DATA_ULONG }, 3453 { "dirty_bad_idle", KSTAT_DATA_ULONG }, 3454 { "dirty_bad_busy", KSTAT_DATA_ULONG }, 3455 { "invalid_lines", KSTAT_DATA_ULONG }, 3456 { "clean_good_busy_flush", KSTAT_DATA_ULONG }, 3457 { "dirty_good_busy_flush", KSTAT_DATA_ULONG }, 3458 { "ecache_tags_cleared", KSTAT_DATA_ULONG } 3459 }; 3460 3461 struct kmem_cache *sf_private_cache; 3462 3463 /* 3464 * Called periodically on each CPU to scan the ecache once a sec. 3465 * adjusting the ecache line index appropriately 3466 */ 3467 void 3468 scrub_ecache_line() 3469 { 3470 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc); 3471 int cpuid = CPU->cpu_id; 3472 uint32_t index = ssmp->ecache_flush_index; 3473 uint64_t ec_size = cpunodes[cpuid].ecache_size; 3474 size_t ec_linesize = cpunodes[cpuid].ecache_linesize; 3475 int nlines = ssmp->ecache_nlines; 3476 uint32_t ec_set_size = ec_size / ecache_associativity; 3477 int ec_mirror = ssmp->ecache_mirror; 3478 ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data; 3479 3480 int line, scan_lines, flush_clean_busy = 0, flush_dirty_busy = 0; 3481 int mpb; /* encode Modified, Parity, Busy for action */ 3482 uchar_t state; 3483 uint64_t ec_tag, paddr, oafsr, tafsr, nafsr; 3484 uint64_t *acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 3485 ec_data_t ec_data[8]; 3486 kstat_named_t *ec_knp; 3487 3488 switch (ec_mirror) { 3489 default: 3490 case ECACHE_CPU_NON_MIRROR: 3491 /* 3492 * The E$ scan rate is expressed in units of tenths of 3493 * a percent. ecache_scan_rate = 1000 (100%) means the 3494 * whole cache is scanned every second. 3495 */ 3496 scan_lines = (nlines * ecache_scan_rate) / 3497 (1000 * ecache_calls_a_sec); 3498 if (!(ssmp->ecache_busy)) { 3499 if (ecache_idle_factor > 0) { 3500 scan_lines *= ecache_idle_factor; 3501 } 3502 } else { 3503 flush_clean_busy = (scan_lines * 3504 ecache_flush_clean_good_busy) / 100; 3505 flush_dirty_busy = (scan_lines * 3506 ecache_flush_dirty_good_busy) / 100; 3507 } 3508 3509 ec_timeout_calls = (ecache_calls_a_sec ? 3510 ecache_calls_a_sec : 1); 3511 break; 3512 3513 case ECACHE_CPU_MIRROR: 3514 scan_lines = ecache_lines_per_call_mirrored; 3515 ec_timeout_calls = (ecache_calls_a_sec_mirrored ? 3516 ecache_calls_a_sec_mirrored : 1); 3517 break; 3518 } 3519 3520 /* 3521 * The ecache scrubber algorithm operates by reading and 3522 * decoding the E$ tag to determine whether the corresponding E$ line 3523 * can be scrubbed. There is a implicit assumption in the scrubber 3524 * logic that the E$ tag is valid. Unfortunately, this assertion is 3525 * flawed since the E$ tag may also be corrupted and have parity errors 3526 * The scrubber logic is enhanced to check the validity of the E$ tag 3527 * before scrubbing. When a parity error is detected in the E$ tag, 3528 * it is possible to recover and scrub the tag under certain conditions 3529 * so that a ETP error condition can be avoided. 3530 */ 3531 3532 for (mpb = line = 0; line < scan_lines; line++, mpb = 0) { 3533 /* 3534 * We get the old-AFSR before clearing the AFSR sticky bits 3535 * in {get_ecache_tag, check_ecache_line, get_ecache_dtag} 3536 * If CP bit is set in the old-AFSR, we log an Orphan CP event. 3537 */ 3538 ec_tag = get_ecache_tag(index, &nafsr, acc_afsr); 3539 state = (uchar_t)((ec_tag & cpu_ec_state_mask) >> 3540 cpu_ec_state_shift); 3541 3542 /* 3543 * ETP is set try to scrub the ecache tag. 3544 */ 3545 if (nafsr & P_AFSR_ETP) { 3546 ecache_scrub_tag_err(nafsr, state, index); 3547 } else if (state & cpu_ec_state_valid) { 3548 /* 3549 * ETP is not set, E$ tag is valid. 3550 * Proceed with the E$ scrubbing. 3551 */ 3552 if (state & cpu_ec_state_dirty) 3553 mpb |= ECACHE_STATE_MODIFIED; 3554 3555 tafsr = check_ecache_line(index, acc_afsr); 3556 3557 if (tafsr & P_AFSR_EDP) { 3558 mpb |= ECACHE_STATE_PARITY; 3559 3560 if (ecache_scrub_verbose || 3561 ecache_scrub_panic) { 3562 get_ecache_dtag(P2ALIGN(index, 64), 3563 (uint64_t *)&ec_data[0], 3564 &ec_tag, &oafsr, acc_afsr); 3565 } 3566 } 3567 3568 if (ssmp->ecache_busy) 3569 mpb |= ECACHE_STATE_BUSY; 3570 3571 ec_knp = (kstat_named_t *)ec_ksp + mpb; 3572 ec_knp->value.ul++; 3573 3574 paddr = ((ec_tag & cpu_ec_tag_mask) << 3575 cpu_ec_tag_shift) | (index % ec_set_size); 3576 3577 /* 3578 * We flush the E$ lines depending on the ec_flush, 3579 * we additionally flush clean_good_busy and 3580 * dirty_good_busy lines for mirrored E$. 3581 */ 3582 if (ec_action[mpb].ec_flush == ALWAYS_FLUSH) { 3583 flushecacheline(paddr, ec_size); 3584 } else if ((ec_mirror == ECACHE_CPU_MIRROR) && 3585 (ec_action[mpb].ec_flush == MIRROR_FLUSH)) { 3586 flushecacheline(paddr, ec_size); 3587 } else if (ec_action[mpb].ec_flush == NEVER_FLUSH) { 3588 softcall(ecache_page_retire, (void *)paddr); 3589 } 3590 3591 /* 3592 * Conditionally flush both the clean_good and 3593 * dirty_good lines when busy. 3594 */ 3595 if (CGB(mpb, ec_mirror) && (flush_clean_busy > 0)) { 3596 flush_clean_busy--; 3597 flushecacheline(paddr, ec_size); 3598 ec_ksp->clean_good_busy_flush.value.ul++; 3599 } else if (DGB(mpb, ec_mirror) && 3600 (flush_dirty_busy > 0)) { 3601 flush_dirty_busy--; 3602 flushecacheline(paddr, ec_size); 3603 ec_ksp->dirty_good_busy_flush.value.ul++; 3604 } 3605 3606 if (ec_action[mpb].ec_log && (ecache_scrub_verbose || 3607 ecache_scrub_panic)) { 3608 ecache_scrub_log(ec_data, ec_tag, paddr, mpb, 3609 tafsr); 3610 } 3611 3612 } else { 3613 ec_ksp->invalid_lines.value.ul++; 3614 } 3615 3616 if ((index += ec_linesize) >= ec_size) 3617 index = 0; 3618 3619 } 3620 3621 /* 3622 * set the ecache scrub index for the next time around 3623 */ 3624 ssmp->ecache_flush_index = index; 3625 3626 if (*acc_afsr & P_AFSR_CP) { 3627 uint64_t ret_afsr; 3628 3629 ret_afsr = ecache_scrub_misc_err(CPU_ORPHAN_CP_ERR, *acc_afsr); 3630 if ((ret_afsr & P_AFSR_CP) == 0) 3631 *acc_afsr = 0; 3632 } 3633 } 3634 3635 /* 3636 * Handler for ecache_scrub_inum softint. Call scrub_ecache_line until 3637 * we decrement the outstanding request count to zero. 3638 */ 3639 3640 /*ARGSUSED*/ 3641 uint_t 3642 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2) 3643 { 3644 int i; 3645 int outstanding; 3646 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc); 3647 uint32_t *countp = &ssmp->ec_scrub_outstanding; 3648 3649 do { 3650 outstanding = *countp; 3651 ASSERT(outstanding > 0); 3652 for (i = 0; i < outstanding; i++) 3653 scrub_ecache_line(); 3654 } while (atomic_add_32_nv(countp, -outstanding)); 3655 3656 return (DDI_INTR_CLAIMED); 3657 } 3658 3659 /* 3660 * force each cpu to perform an ecache scrub, called from a timeout 3661 */ 3662 extern xcfunc_t ecache_scrubreq_tl1; 3663 3664 void 3665 do_scrub_ecache_line(void) 3666 { 3667 long delta; 3668 3669 if (ecache_calls_a_sec > hz) 3670 ecache_calls_a_sec = hz; 3671 else if (ecache_calls_a_sec <= 0) 3672 ecache_calls_a_sec = 1; 3673 3674 if (ecache_calls_a_sec_mirrored > hz) 3675 ecache_calls_a_sec_mirrored = hz; 3676 else if (ecache_calls_a_sec_mirrored <= 0) 3677 ecache_calls_a_sec_mirrored = 1; 3678 3679 if (ecache_scrub_enable) { 3680 xt_all(ecache_scrubreq_tl1, ecache_scrub_inum, 0); 3681 delta = hz / ec_timeout_calls; 3682 } else { 3683 delta = hz; 3684 } 3685 3686 (void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0, 3687 delta); 3688 } 3689 3690 /* 3691 * initialization for ecache scrubbing 3692 * This routine is called AFTER all cpus have had cpu_init_private called 3693 * to initialize their private data areas. 3694 */ 3695 void 3696 cpu_init_cache_scrub(void) 3697 { 3698 if (ecache_calls_a_sec > hz) { 3699 cmn_err(CE_NOTE, "ecache_calls_a_sec set too high (%d); " 3700 "resetting to hz (%d)", ecache_calls_a_sec, hz); 3701 ecache_calls_a_sec = hz; 3702 } 3703 3704 /* 3705 * Register softint for ecache scrubbing. 3706 */ 3707 ecache_scrub_inum = add_softintr(ecache_scrub_pil, 3708 scrub_ecache_line_intr, NULL, SOFTINT_MT); 3709 3710 /* 3711 * kick off the scrubbing using realtime timeout 3712 */ 3713 (void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0, 3714 hz / ecache_calls_a_sec); 3715 } 3716 3717 /* 3718 * Unset the busy flag for this cpu. 3719 */ 3720 void 3721 cpu_idle_ecache_scrub(struct cpu *cp) 3722 { 3723 if (CPU_PRIVATE(cp) != NULL) { 3724 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, 3725 sfpr_scrub_misc); 3726 ssmp->ecache_busy = ECACHE_CPU_IDLE; 3727 } 3728 } 3729 3730 /* 3731 * Set the busy flag for this cpu. 3732 */ 3733 void 3734 cpu_busy_ecache_scrub(struct cpu *cp) 3735 { 3736 if (CPU_PRIVATE(cp) != NULL) { 3737 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, 3738 sfpr_scrub_misc); 3739 ssmp->ecache_busy = ECACHE_CPU_BUSY; 3740 } 3741 } 3742 3743 /* 3744 * initialize the ecache scrubber data structures 3745 * The global entry point cpu_init_private replaces this entry point. 3746 * 3747 */ 3748 static void 3749 cpu_init_ecache_scrub_dr(struct cpu *cp) 3750 { 3751 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc); 3752 int cpuid = cp->cpu_id; 3753 3754 /* 3755 * intialize bookkeeping for cache scrubbing 3756 */ 3757 bzero(ssmp, sizeof (spitfire_scrub_misc_t)); 3758 3759 ssmp->ecache_flush_index = 0; 3760 3761 ssmp->ecache_nlines = 3762 cpunodes[cpuid].ecache_size / cpunodes[cpuid].ecache_linesize; 3763 3764 /* 3765 * Determine whether we are running on mirrored SRAM 3766 */ 3767 3768 if (cpunodes[cpuid].msram == ECACHE_CPU_MIRROR) 3769 ssmp->ecache_mirror = ECACHE_CPU_MIRROR; 3770 else 3771 ssmp->ecache_mirror = ECACHE_CPU_NON_MIRROR; 3772 3773 cpu_busy_ecache_scrub(cp); 3774 3775 /* 3776 * initialize the kstats 3777 */ 3778 ecache_kstat_init(cp); 3779 } 3780 3781 /* 3782 * uninitialize the ecache scrubber data structures 3783 * The global entry point cpu_uninit_private replaces this entry point. 3784 */ 3785 static void 3786 cpu_uninit_ecache_scrub_dr(struct cpu *cp) 3787 { 3788 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc); 3789 3790 if (ssmp->ecache_ksp != NULL) { 3791 kstat_delete(ssmp->ecache_ksp); 3792 ssmp->ecache_ksp = NULL; 3793 } 3794 3795 /* 3796 * un-initialize bookkeeping for cache scrubbing 3797 */ 3798 bzero(ssmp, sizeof (spitfire_scrub_misc_t)); 3799 3800 cpu_idle_ecache_scrub(cp); 3801 } 3802 3803 struct kmem_cache *sf_private_cache; 3804 3805 /* 3806 * Cpu private initialization. This includes allocating the cpu_private 3807 * data structure, initializing it, and initializing the scrubber for this 3808 * cpu. This is called once for EVERY cpu, including CPU 0. This function 3809 * calls cpu_init_ecache_scrub_dr to init the scrubber. 3810 * We use kmem_cache_create for the spitfire private data structure because it 3811 * needs to be allocated on a S_ECACHE_MAX_LSIZE (64) byte boundary. 3812 */ 3813 void 3814 cpu_init_private(struct cpu *cp) 3815 { 3816 spitfire_private_t *sfprp; 3817 3818 ASSERT(CPU_PRIVATE(cp) == NULL); 3819 3820 /* 3821 * If the sf_private_cache has not been created, create it. 3822 */ 3823 if (sf_private_cache == NULL) { 3824 sf_private_cache = kmem_cache_create("sf_private_cache", 3825 sizeof (spitfire_private_t), S_ECACHE_MAX_LSIZE, NULL, 3826 NULL, NULL, NULL, NULL, 0); 3827 ASSERT(sf_private_cache); 3828 } 3829 3830 sfprp = CPU_PRIVATE(cp) = kmem_cache_alloc(sf_private_cache, KM_SLEEP); 3831 3832 bzero(sfprp, sizeof (spitfire_private_t)); 3833 3834 cpu_init_ecache_scrub_dr(cp); 3835 } 3836 3837 /* 3838 * Cpu private unitialization. Uninitialize the Ecache scrubber and 3839 * deallocate the scrubber data structures and cpu_private data structure. 3840 * For now, this function just calls cpu_unint_ecache_scrub_dr to uninit 3841 * the scrubber for the specified cpu. 3842 */ 3843 void 3844 cpu_uninit_private(struct cpu *cp) 3845 { 3846 ASSERT(CPU_PRIVATE(cp)); 3847 3848 cpu_uninit_ecache_scrub_dr(cp); 3849 kmem_cache_free(sf_private_cache, CPU_PRIVATE(cp)); 3850 CPU_PRIVATE(cp) = NULL; 3851 } 3852 3853 /* 3854 * initialize the ecache kstats for each cpu 3855 */ 3856 static void 3857 ecache_kstat_init(struct cpu *cp) 3858 { 3859 struct kstat *ksp; 3860 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc); 3861 3862 ASSERT(ssmp != NULL); 3863 3864 if ((ksp = kstat_create("unix", cp->cpu_id, "ecache_kstat", "misc", 3865 KSTAT_TYPE_NAMED, 3866 sizeof (ecache_kstat_t) / sizeof (kstat_named_t), 3867 KSTAT_FLAG_WRITABLE)) == NULL) { 3868 ssmp->ecache_ksp = NULL; 3869 cmn_err(CE_NOTE, "!ecache_kstat_init(%d) failed\n", cp->cpu_id); 3870 return; 3871 } 3872 3873 ssmp->ecache_ksp = ksp; 3874 bcopy(&ec_kstat_template, ksp->ks_data, sizeof (ecache_kstat_t)); 3875 kstat_install(ksp); 3876 } 3877 3878 /* 3879 * log the bad ecache information 3880 */ 3881 static void 3882 ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag, uint64_t paddr, int mpb, 3883 uint64_t afsr) 3884 { 3885 spitf_async_flt spf_flt; 3886 struct async_flt *aflt; 3887 int i; 3888 char *class; 3889 3890 bzero(&spf_flt, sizeof (spitf_async_flt)); 3891 aflt = &spf_flt.cmn_asyncflt; 3892 3893 for (i = 0; i < 8; i++) { 3894 spf_flt.flt_ec_data[i] = ec_data[i]; 3895 } 3896 3897 spf_flt.flt_ec_tag = ec_tag; 3898 3899 if (mpb < (sizeof (ec_action) / sizeof (ec_action[0]))) { 3900 spf_flt.flt_type = ec_action[mpb].ec_log_type; 3901 } else spf_flt.flt_type = (ushort_t)mpb; 3902 3903 aflt->flt_inst = CPU->cpu_id; 3904 aflt->flt_class = CPU_FAULT; 3905 aflt->flt_id = gethrtime_waitfree(); 3906 aflt->flt_addr = paddr; 3907 aflt->flt_stat = afsr; 3908 aflt->flt_panic = (uchar_t)ecache_scrub_panic; 3909 3910 switch (mpb) { 3911 case CPU_ECACHE_TAG_ERR: 3912 case CPU_ECACHE_ADDR_PAR_ERR: 3913 case CPU_ECACHE_ETP_ETS_ERR: 3914 case CPU_ECACHE_STATE_ERR: 3915 class = FM_EREPORT_CPU_USII_ESCRUB_TAG; 3916 break; 3917 default: 3918 class = FM_EREPORT_CPU_USII_ESCRUB_DATA; 3919 break; 3920 } 3921 3922 cpu_errorq_dispatch(class, (void *)&spf_flt, sizeof (spf_flt), 3923 ue_queue, aflt->flt_panic); 3924 3925 if (aflt->flt_panic) 3926 cmn_err(CE_PANIC, "ecache_scrub_panic set and bad E$" 3927 "line detected"); 3928 } 3929 3930 /* 3931 * Process an ecache error that occured during the E$ scrubbing. 3932 * We do the ecache scan to find the bad line, flush the bad line 3933 * and start the memscrubber to find any UE (in memory or in another cache) 3934 */ 3935 static uint64_t 3936 ecache_scrub_misc_err(int type, uint64_t afsr) 3937 { 3938 spitf_async_flt spf_flt; 3939 struct async_flt *aflt; 3940 uint64_t oafsr; 3941 3942 bzero(&spf_flt, sizeof (spitf_async_flt)); 3943 aflt = &spf_flt.cmn_asyncflt; 3944 3945 /* 3946 * Scan each line in the cache to look for the one 3947 * with bad parity 3948 */ 3949 aflt->flt_addr = AFLT_INV_ADDR; 3950 scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0], 3951 &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr); 3952 3953 if (oafsr & P_AFSR_CP) { 3954 uint64_t *cp_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 3955 *cp_afsr |= oafsr; 3956 } 3957 3958 /* 3959 * If we found a bad PA, update the state to indicate if it is 3960 * memory or I/O space. 3961 */ 3962 if (aflt->flt_addr != AFLT_INV_ADDR) { 3963 aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >> 3964 MMU_PAGESHIFT)) ? 1 : 0; 3965 } 3966 3967 spf_flt.flt_type = (ushort_t)type; 3968 3969 aflt->flt_inst = CPU->cpu_id; 3970 aflt->flt_class = CPU_FAULT; 3971 aflt->flt_id = gethrtime_waitfree(); 3972 aflt->flt_status = afsr; 3973 aflt->flt_panic = (uchar_t)ecache_scrub_panic; 3974 3975 /* 3976 * We have the bad line, flush that line and start 3977 * the memscrubber. 3978 */ 3979 if (spf_flt.flt_ec_lcnt > 0) { 3980 flushecacheline(P2ALIGN(aflt->flt_addr, 64), 3981 cpunodes[CPU->cpu_id].ecache_size); 3982 read_all_memscrub = 1; 3983 memscrub_run(); 3984 } 3985 3986 cpu_errorq_dispatch((type == CPU_ORPHAN_CP_ERR) ? 3987 FM_EREPORT_CPU_USII_CP : FM_EREPORT_CPU_USII_UNKNOWN, 3988 (void *)&spf_flt, sizeof (spf_flt), ue_queue, aflt->flt_panic); 3989 3990 return (oafsr); 3991 } 3992 3993 static void 3994 ecache_scrub_tag_err(uint64_t afsr, uchar_t state, uint32_t index) 3995 { 3996 ushort_t afsr_ets = (afsr & P_AFSR_ETS) >> P_AFSR_ETS_SHIFT; 3997 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc); 3998 ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data; 3999 uint64_t ec_tag, paddr, oafsr; 4000 ec_data_t ec_data[8]; 4001 int cpuid = CPU->cpu_id; 4002 uint32_t ec_set_size = cpunodes[cpuid].ecache_size / 4003 ecache_associativity; 4004 uint64_t *cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 4005 4006 get_ecache_dtag(P2ALIGN(index, 64), (uint64_t *)&ec_data[0], &ec_tag, 4007 &oafsr, cpu_afsr); 4008 paddr = ((ec_tag & cpu_ec_tag_mask) << cpu_ec_tag_shift) | 4009 (index % ec_set_size); 4010 4011 /* 4012 * E$ tag state has good parity 4013 */ 4014 if ((afsr_ets & cpu_ec_state_parity) == 0) { 4015 if (afsr_ets & cpu_ec_parity) { 4016 /* 4017 * E$ tag state bits indicate the line is clean, 4018 * invalidate the E$ tag and continue. 4019 */ 4020 if (!(state & cpu_ec_state_dirty)) { 4021 /* 4022 * Zero the tag and mark the state invalid 4023 * with good parity for the tag. 4024 */ 4025 if (isus2i || isus2e) 4026 write_hb_ec_tag_parity(index); 4027 else 4028 write_ec_tag_parity(index); 4029 4030 /* Sync with the dual tag */ 4031 flushecacheline(0, 4032 cpunodes[CPU->cpu_id].ecache_size); 4033 ec_ksp->tags_cleared.value.ul++; 4034 ecache_scrub_log(ec_data, ec_tag, paddr, 4035 CPU_ECACHE_TAG_ERR, afsr); 4036 return; 4037 } else { 4038 ecache_scrub_log(ec_data, ec_tag, paddr, 4039 CPU_ECACHE_ADDR_PAR_ERR, afsr); 4040 cmn_err(CE_PANIC, " E$ tag address has bad" 4041 " parity"); 4042 } 4043 } else if ((afsr_ets & cpu_ec_parity) == 0) { 4044 /* 4045 * ETS is zero but ETP is set 4046 */ 4047 ecache_scrub_log(ec_data, ec_tag, paddr, 4048 CPU_ECACHE_ETP_ETS_ERR, afsr); 4049 cmn_err(CE_PANIC, "AFSR.ETP is set and" 4050 " AFSR.ETS is zero"); 4051 } 4052 } else { 4053 /* 4054 * E$ tag state bit has a bad parity 4055 */ 4056 ecache_scrub_log(ec_data, ec_tag, paddr, 4057 CPU_ECACHE_STATE_ERR, afsr); 4058 cmn_err(CE_PANIC, "E$ tag state has bad parity"); 4059 } 4060 } 4061 4062 static void 4063 ecache_page_retire(void *arg) 4064 { 4065 uint64_t paddr = (uint64_t)arg; 4066 (void) page_retire(paddr, PR_UE); 4067 } 4068 4069 void 4070 sticksync_slave(void) 4071 {} 4072 4073 void 4074 sticksync_master(void) 4075 {} 4076 4077 /*ARGSUSED*/ 4078 void 4079 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t bpp) 4080 {} 4081 4082 void 4083 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected) 4084 { 4085 int status; 4086 ddi_fm_error_t de; 4087 4088 bzero(&de, sizeof (ddi_fm_error_t)); 4089 4090 de.fme_version = DDI_FME_VERSION; 4091 de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, 4092 FM_ENA_FMT1); 4093 de.fme_flag = expected; 4094 de.fme_bus_specific = (void *)aflt->flt_addr; 4095 status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de); 4096 4097 if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL)) 4098 aflt->flt_panic = 1; 4099 } 4100 4101 /*ARGSUSED*/ 4102 void 4103 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz, 4104 errorq_t *eqp, uint_t flag) 4105 { 4106 struct async_flt *aflt = (struct async_flt *)payload; 4107 4108 aflt->flt_erpt_class = error_class; 4109 errorq_dispatch(eqp, payload, payload_sz, flag); 4110 } 4111 4112 #define MAX_SIMM 8 4113 4114 struct ce_info { 4115 char name[UNUM_NAMLEN]; 4116 uint64_t intermittent_total; 4117 uint64_t persistent_total; 4118 uint64_t sticky_total; 4119 unsigned short leaky_bucket_cnt; 4120 }; 4121 4122 /* 4123 * Separately-defined structure for use in reporting the ce_info 4124 * to SunVTS without exposing the internal layout and implementation 4125 * of struct ce_info. 4126 */ 4127 static struct ecc_error_info ecc_error_info_data = { 4128 { "version", KSTAT_DATA_UINT32 }, 4129 { "maxcount", KSTAT_DATA_UINT32 }, 4130 { "count", KSTAT_DATA_UINT32 } 4131 }; 4132 static const size_t ecc_error_info_ndata = sizeof (ecc_error_info_data) / 4133 sizeof (struct kstat_named); 4134 4135 #if KSTAT_CE_UNUM_NAMLEN < UNUM_NAMLEN 4136 #error "Need to rev ecc_error_info version and update KSTAT_CE_UNUM_NAMLEN" 4137 #endif 4138 4139 struct ce_info *mem_ce_simm = NULL; 4140 size_t mem_ce_simm_size = 0; 4141 4142 /* 4143 * Default values for the number of CE's allowed per interval. 4144 * Interval is defined in minutes 4145 * SOFTERR_MIN_TIMEOUT is defined in microseconds 4146 */ 4147 #define SOFTERR_LIMIT_DEFAULT 2 4148 #define SOFTERR_INTERVAL_DEFAULT 1440 /* This is 24 hours */ 4149 #define SOFTERR_MIN_TIMEOUT (60 * MICROSEC) /* This is 1 minute */ 4150 #define TIMEOUT_NONE ((timeout_id_t)0) 4151 #define TIMEOUT_SET ((timeout_id_t)1) 4152 4153 /* 4154 * timeout identifer for leaky_bucket 4155 */ 4156 static timeout_id_t leaky_bucket_timeout_id = TIMEOUT_NONE; 4157 4158 /* 4159 * Tunables for maximum number of allowed CE's in a given time 4160 */ 4161 int ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT; 4162 int ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT; 4163 4164 void 4165 cpu_mp_init(void) 4166 { 4167 size_t size = cpu_aflt_size(); 4168 size_t i; 4169 kstat_t *ksp; 4170 4171 /* 4172 * Initialize the CE error handling buffers. 4173 */ 4174 mem_ce_simm_size = MAX_SIMM * max_ncpus; 4175 size = sizeof (struct ce_info) * mem_ce_simm_size; 4176 mem_ce_simm = kmem_zalloc(size, KM_SLEEP); 4177 4178 ksp = kstat_create("unix", 0, "ecc-info", "misc", 4179 KSTAT_TYPE_NAMED, ecc_error_info_ndata, KSTAT_FLAG_VIRTUAL); 4180 if (ksp != NULL) { 4181 ksp->ks_data = (struct kstat_named *)&ecc_error_info_data; 4182 ecc_error_info_data.version.value.ui32 = KSTAT_CE_INFO_VER; 4183 ecc_error_info_data.maxcount.value.ui32 = mem_ce_simm_size; 4184 ecc_error_info_data.count.value.ui32 = 0; 4185 kstat_install(ksp); 4186 } 4187 4188 for (i = 0; i < mem_ce_simm_size; i++) { 4189 struct kstat_ecc_mm_info *kceip; 4190 4191 kceip = kmem_zalloc(sizeof (struct kstat_ecc_mm_info), 4192 KM_SLEEP); 4193 ksp = kstat_create("mm", i, "ecc-info", "misc", 4194 KSTAT_TYPE_NAMED, 4195 sizeof (struct kstat_ecc_mm_info) / sizeof (kstat_named_t), 4196 KSTAT_FLAG_VIRTUAL); 4197 if (ksp != NULL) { 4198 /* 4199 * Re-declare ks_data_size to include room for the 4200 * UNUM name since we don't have KSTAT_FLAG_VAR_SIZE 4201 * set. 4202 */ 4203 ksp->ks_data_size = sizeof (struct kstat_ecc_mm_info) + 4204 KSTAT_CE_UNUM_NAMLEN; 4205 ksp->ks_data = kceip; 4206 kstat_named_init(&kceip->name, 4207 "name", KSTAT_DATA_STRING); 4208 kstat_named_init(&kceip->intermittent_total, 4209 "intermittent_total", KSTAT_DATA_UINT64); 4210 kstat_named_init(&kceip->persistent_total, 4211 "persistent_total", KSTAT_DATA_UINT64); 4212 kstat_named_init(&kceip->sticky_total, 4213 "sticky_total", KSTAT_DATA_UINT64); 4214 /* 4215 * Use the default snapshot routine as it knows how to 4216 * deal with named kstats with long strings. 4217 */ 4218 ksp->ks_update = ecc_kstat_update; 4219 kstat_install(ksp); 4220 } else { 4221 kmem_free(kceip, sizeof (struct kstat_ecc_mm_info)); 4222 } 4223 } 4224 } 4225 4226 /*ARGSUSED*/ 4227 static void 4228 leaky_bucket_timeout(void *arg) 4229 { 4230 int i; 4231 struct ce_info *psimm = mem_ce_simm; 4232 4233 for (i = 0; i < mem_ce_simm_size; i++) { 4234 if (psimm[i].leaky_bucket_cnt > 0) 4235 atomic_add_16(&psimm[i].leaky_bucket_cnt, -1); 4236 } 4237 add_leaky_bucket_timeout(); 4238 } 4239 4240 static void 4241 add_leaky_bucket_timeout(void) 4242 { 4243 long timeout_in_microsecs; 4244 4245 /* 4246 * create timeout for next leak. 4247 * 4248 * The timeout interval is calculated as follows 4249 * 4250 * (ecc_softerr_interval * 60 * MICROSEC) / ecc_softerr_limit 4251 * 4252 * ecc_softerr_interval is in minutes, so multiply this by 60 (seconds 4253 * in a minute), then multiply this by MICROSEC to get the interval 4254 * in microseconds. Divide this total by ecc_softerr_limit so that 4255 * the timeout interval is accurate to within a few microseconds. 4256 */ 4257 4258 if (ecc_softerr_limit <= 0) 4259 ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT; 4260 if (ecc_softerr_interval <= 0) 4261 ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT; 4262 4263 timeout_in_microsecs = ((int64_t)ecc_softerr_interval * 60 * MICROSEC) / 4264 ecc_softerr_limit; 4265 4266 if (timeout_in_microsecs < SOFTERR_MIN_TIMEOUT) 4267 timeout_in_microsecs = SOFTERR_MIN_TIMEOUT; 4268 4269 leaky_bucket_timeout_id = timeout(leaky_bucket_timeout, 4270 (void *)NULL, drv_usectohz((clock_t)timeout_in_microsecs)); 4271 } 4272 4273 /* 4274 * Legacy Correctable ECC Error Hash 4275 * 4276 * All of the code below this comment is used to implement a legacy array 4277 * which counted intermittent, persistent, and sticky CE errors by unum, 4278 * and then was later extended to publish the data as a kstat for SunVTS. 4279 * All of this code is replaced by FMA, and remains here until such time 4280 * that the UltraSPARC-I/II CPU code is converted to FMA, or is EOLed. 4281 * 4282 * Errors are saved in three buckets per-unum: 4283 * (1) sticky - scrub was unsuccessful, cannot be scrubbed 4284 * This could represent a problem, and is immediately printed out. 4285 * (2) persistent - was successfully scrubbed 4286 * These errors use the leaky bucket algorithm to determine 4287 * if there is a serious problem. 4288 * (3) intermittent - may have originated from the cpu or upa/safari bus, 4289 * and does not necessarily indicate any problem with the dimm itself, 4290 * is critical information for debugging new hardware. 4291 * Because we do not know if it came from the dimm, it would be 4292 * inappropriate to include these in the leaky bucket counts. 4293 * 4294 * If the E$ line was modified before the scrub operation began, then the 4295 * displacement flush at the beginning of scrubphys() will cause the modified 4296 * line to be written out, which will clean up the CE. Then, any subsequent 4297 * read will not cause an error, which will cause persistent errors to be 4298 * identified as intermittent. 4299 * 4300 * If a DIMM is going bad, it will produce true persistents as well as 4301 * false intermittents, so these intermittents can be safely ignored. 4302 * 4303 * If the error count is excessive for a DIMM, this function will return 4304 * PR_MCE, and the CPU module may then decide to remove that page from use. 4305 */ 4306 static int 4307 ce_count_unum(int status, int len, char *unum) 4308 { 4309 int i; 4310 struct ce_info *psimm = mem_ce_simm; 4311 int page_status = PR_OK; 4312 4313 ASSERT(psimm != NULL); 4314 4315 if (len <= 0 || 4316 (status & (ECC_STICKY | ECC_PERSISTENT | ECC_INTERMITTENT)) == 0) 4317 return (page_status); 4318 4319 /* 4320 * Initialize the leaky_bucket timeout 4321 */ 4322 if (casptr(&leaky_bucket_timeout_id, 4323 TIMEOUT_NONE, TIMEOUT_SET) == TIMEOUT_NONE) 4324 add_leaky_bucket_timeout(); 4325 4326 for (i = 0; i < mem_ce_simm_size; i++) { 4327 if (psimm[i].name[0] == '\0') { 4328 /* 4329 * Hit the end of the valid entries, add 4330 * a new one. 4331 */ 4332 (void) strncpy(psimm[i].name, unum, len); 4333 if (status & ECC_STICKY) { 4334 /* 4335 * Sticky - the leaky bucket is used to track 4336 * soft errors. Since a sticky error is a 4337 * hard error and likely to be retired soon, 4338 * we do not count it in the leaky bucket. 4339 */ 4340 psimm[i].leaky_bucket_cnt = 0; 4341 psimm[i].intermittent_total = 0; 4342 psimm[i].persistent_total = 0; 4343 psimm[i].sticky_total = 1; 4344 cmn_err(CE_WARN, 4345 "[AFT0] Sticky Softerror encountered " 4346 "on Memory Module %s\n", unum); 4347 page_status = PR_MCE; 4348 } else if (status & ECC_PERSISTENT) { 4349 psimm[i].leaky_bucket_cnt = 1; 4350 psimm[i].intermittent_total = 0; 4351 psimm[i].persistent_total = 1; 4352 psimm[i].sticky_total = 0; 4353 } else { 4354 /* 4355 * Intermittent - Because the scrub operation 4356 * cannot find the error in the DIMM, we will 4357 * not count these in the leaky bucket 4358 */ 4359 psimm[i].leaky_bucket_cnt = 0; 4360 psimm[i].intermittent_total = 1; 4361 psimm[i].persistent_total = 0; 4362 psimm[i].sticky_total = 0; 4363 } 4364 ecc_error_info_data.count.value.ui32++; 4365 break; 4366 } else if (strncmp(unum, psimm[i].name, len) == 0) { 4367 /* 4368 * Found an existing entry for the current 4369 * memory module, adjust the counts. 4370 */ 4371 if (status & ECC_STICKY) { 4372 psimm[i].sticky_total++; 4373 cmn_err(CE_WARN, 4374 "[AFT0] Sticky Softerror encountered " 4375 "on Memory Module %s\n", unum); 4376 page_status = PR_MCE; 4377 } else if (status & ECC_PERSISTENT) { 4378 int new_value; 4379 4380 new_value = atomic_add_16_nv( 4381 &psimm[i].leaky_bucket_cnt, 1); 4382 psimm[i].persistent_total++; 4383 if (new_value > ecc_softerr_limit) { 4384 cmn_err(CE_WARN, "[AFT0] Most recent %d" 4385 " soft errors from Memory Module" 4386 " %s exceed threshold (N=%d," 4387 " T=%dh:%02dm) triggering page" 4388 " retire", new_value, unum, 4389 ecc_softerr_limit, 4390 ecc_softerr_interval / 60, 4391 ecc_softerr_interval % 60); 4392 atomic_add_16( 4393 &psimm[i].leaky_bucket_cnt, -1); 4394 page_status = PR_MCE; 4395 } 4396 } else { /* Intermittent */ 4397 psimm[i].intermittent_total++; 4398 } 4399 break; 4400 } 4401 } 4402 4403 if (i >= mem_ce_simm_size) 4404 cmn_err(CE_CONT, "[AFT0] Softerror: mem_ce_simm[] out of " 4405 "space.\n"); 4406 4407 return (page_status); 4408 } 4409 4410 /* 4411 * Function to support counting of IO detected CEs. 4412 */ 4413 void 4414 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum) 4415 { 4416 int err; 4417 4418 err = ce_count_unum(ecc->flt_status, len, unum); 4419 if (err != PR_OK && automatic_page_removal) { 4420 (void) page_retire(ecc->flt_addr, err); 4421 } 4422 } 4423 4424 static int 4425 ecc_kstat_update(kstat_t *ksp, int rw) 4426 { 4427 struct kstat_ecc_mm_info *kceip = ksp->ks_data; 4428 struct ce_info *ceip = mem_ce_simm; 4429 int i = ksp->ks_instance; 4430 4431 if (rw == KSTAT_WRITE) 4432 return (EACCES); 4433 4434 ASSERT(ksp->ks_data != NULL); 4435 ASSERT(i < mem_ce_simm_size && i >= 0); 4436 4437 /* 4438 * Since we're not using locks, make sure that we don't get partial 4439 * data. The name is always copied before the counters are incremented 4440 * so only do this update routine if at least one of the counters is 4441 * non-zero, which ensures that ce_count_unum() is done, and the 4442 * string is fully copied. 4443 */ 4444 if (ceip[i].intermittent_total == 0 && 4445 ceip[i].persistent_total == 0 && 4446 ceip[i].sticky_total == 0) { 4447 /* 4448 * Uninitialized or partially initialized. Ignore. 4449 * The ks_data buffer was allocated via kmem_zalloc, 4450 * so no need to bzero it. 4451 */ 4452 return (0); 4453 } 4454 4455 kstat_named_setstr(&kceip->name, ceip[i].name); 4456 kceip->intermittent_total.value.ui64 = ceip[i].intermittent_total; 4457 kceip->persistent_total.value.ui64 = ceip[i].persistent_total; 4458 kceip->sticky_total.value.ui64 = ceip[i].sticky_total; 4459 4460 return (0); 4461 } 4462 4463 #define VIS_BLOCKSIZE 64 4464 4465 int 4466 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data) 4467 { 4468 int ret, watched; 4469 4470 watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 4471 ret = dtrace_blksuword32(addr, data, 0); 4472 if (watched) 4473 watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 4474 4475 return (ret); 4476 } 4477 4478 /*ARGSUSED*/ 4479 void 4480 cpu_faulted_enter(struct cpu *cp) 4481 { 4482 } 4483 4484 /*ARGSUSED*/ 4485 void 4486 cpu_faulted_exit(struct cpu *cp) 4487 { 4488 } 4489 4490 /*ARGSUSED*/ 4491 void 4492 mmu_init_kernel_pgsz(struct hat *hat) 4493 { 4494 } 4495 4496 size_t 4497 mmu_get_kernel_lpsize(size_t lpsize) 4498 { 4499 uint_t tte; 4500 4501 if (lpsize == 0) { 4502 /* no setting for segkmem_lpsize in /etc/system: use default */ 4503 return (MMU_PAGESIZE4M); 4504 } 4505 4506 for (tte = TTE8K; tte <= TTE4M; tte++) { 4507 if (lpsize == TTEBYTES(tte)) 4508 return (lpsize); 4509 } 4510 4511 return (TTEBYTES(TTE8K)); 4512 } 4513