1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/systm.h> 30 #include <sys/archsystm.h> 31 #include <sys/machparam.h> 32 #include <sys/machsystm.h> 33 #include <sys/cpu.h> 34 #include <sys/elf_SPARC.h> 35 #include <vm/hat_sfmmu.h> 36 #include <vm/page.h> 37 #include <vm/vm_dep.h> 38 #include <sys/cpuvar.h> 39 #include <sys/spitregs.h> 40 #include <sys/async.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/dditypes.h> 44 #include <sys/sunddi.h> 45 #include <sys/cpu_module.h> 46 #include <sys/prom_debug.h> 47 #include <sys/vmsystm.h> 48 #include <sys/prom_plat.h> 49 #include <sys/sysmacros.h> 50 #include <sys/intreg.h> 51 #include <sys/machtrap.h> 52 #include <sys/ontrap.h> 53 #include <sys/ivintr.h> 54 #include <sys/atomic.h> 55 #include <sys/panic.h> 56 #include <sys/ndifm.h> 57 #include <sys/fm/protocol.h> 58 #include <sys/fm/util.h> 59 #include <sys/fm/cpu/UltraSPARC-II.h> 60 #include <sys/ddi.h> 61 #include <sys/ecc_kstat.h> 62 #include <sys/watchpoint.h> 63 #include <sys/dtrace.h> 64 #include <sys/errclassify.h> 65 66 uint_t cpu_impl_dual_pgsz = 0; 67 68 /* 69 * Structure for the 8 byte ecache data dump and the associated AFSR state. 70 * There will be 8 of these structures used to dump an ecache line (64 bytes). 71 */ 72 typedef struct sf_ec_data_elm { 73 uint64_t ec_d8; 74 uint64_t ec_afsr; 75 } ec_data_t; 76 77 /* 78 * Define spitfire (Ultra I/II) specific asynchronous error structure 79 */ 80 typedef struct spitfire_async_flt { 81 struct async_flt cmn_asyncflt; /* common - see sun4u/sys/async.h */ 82 ushort_t flt_type; /* types of faults - cpu specific */ 83 ec_data_t flt_ec_data[8]; /* for E$ or mem dump/state */ 84 uint64_t flt_ec_tag; /* E$ tag info */ 85 int flt_ec_lcnt; /* number of bad E$ lines */ 86 ushort_t flt_sdbh; /* UDBH reg */ 87 ushort_t flt_sdbl; /* UDBL reg */ 88 } spitf_async_flt; 89 90 /* 91 * Prototypes for support routines in spitfire_asm.s: 92 */ 93 extern void flush_ecache(uint64_t physaddr, size_t size, size_t linesize); 94 extern uint64_t get_lsu(void); 95 extern void set_lsu(uint64_t ncc); 96 extern void get_ecache_dtag(uint32_t ecache_idx, uint64_t *data, uint64_t *tag, 97 uint64_t *oafsr, uint64_t *acc_afsr); 98 extern uint64_t check_ecache_line(uint32_t id, uint64_t *acc_afsr); 99 extern uint64_t get_ecache_tag(uint32_t id, uint64_t *nafsr, 100 uint64_t *acc_afsr); 101 extern uint64_t read_and_clear_afsr(); 102 extern void write_ec_tag_parity(uint32_t id); 103 extern void write_hb_ec_tag_parity(uint32_t id); 104 105 /* 106 * Spitfire module routines: 107 */ 108 static void cpu_async_log_err(void *flt); 109 /*PRINTFLIKE6*/ 110 static void cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt, 111 uint_t logflags, const char *endstr, const char *fmt, ...); 112 113 static void cpu_read_paddr(struct async_flt *aflt, short verbose, short ce_err); 114 static void cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum); 115 static void cpu_log_ecmem_info(spitf_async_flt *spf_flt); 116 117 static void log_ce_err(struct async_flt *aflt, char *unum); 118 static void log_ue_err(struct async_flt *aflt, char *unum); 119 static void check_misc_err(spitf_async_flt *spf_flt); 120 static ushort_t ecc_gen(uint_t high_bytes, uint_t low_bytes); 121 static int check_ecc(struct async_flt *aflt); 122 static uint_t get_cpu_status(uint64_t arg); 123 static uint64_t clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr); 124 static void scan_ecache(uint64_t *afar, ec_data_t *data, uint64_t *tag, 125 int *m, uint64_t *afsr); 126 static void ecache_kstat_init(struct cpu *cp); 127 static void ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag, 128 uint64_t paddr, int mpb, uint64_t); 129 static uint64_t ecache_scrub_misc_err(int, uint64_t); 130 static void ecache_scrub_tag_err(uint64_t, uchar_t, uint32_t); 131 static void ecache_page_retire(void *); 132 static int ecc_kstat_update(kstat_t *ksp, int rw); 133 static int ce_count_unum(int status, int len, char *unum); 134 static void add_leaky_bucket_timeout(void); 135 static int synd_to_synd_code(int synd_status, ushort_t synd); 136 137 extern uint_t read_all_memscrub; 138 extern void memscrub_run(void); 139 140 static uchar_t isus2i; /* set if sabre */ 141 static uchar_t isus2e; /* set if hummingbird */ 142 143 /* 144 * Default ecache mask and shift settings for Spitfire. If we detect a 145 * different CPU implementation, we will modify these values at boot time. 146 */ 147 static uint64_t cpu_ec_tag_mask = S_ECTAG_MASK; 148 static uint64_t cpu_ec_state_mask = S_ECSTATE_MASK; 149 static uint64_t cpu_ec_par_mask = S_ECPAR_MASK; 150 static int cpu_ec_par_shift = S_ECPAR_SHIFT; 151 static int cpu_ec_tag_shift = S_ECTAG_SHIFT; 152 static int cpu_ec_state_shift = S_ECSTATE_SHIFT; 153 static uchar_t cpu_ec_state_exl = S_ECSTATE_EXL; 154 static uchar_t cpu_ec_state_mod = S_ECSTATE_MOD; 155 static uchar_t cpu_ec_state_shr = S_ECSTATE_SHR; 156 static uchar_t cpu_ec_state_own = S_ECSTATE_OWN; 157 158 /* 159 * Default ecache state bits for Spitfire. These individual bits indicate if 160 * the given line is in any of the valid or modified states, respectively. 161 * Again, we modify these at boot if we detect a different CPU. 162 */ 163 static uchar_t cpu_ec_state_valid = S_ECSTATE_VALID; 164 static uchar_t cpu_ec_state_dirty = S_ECSTATE_DIRTY; 165 static uchar_t cpu_ec_parity = S_EC_PARITY; 166 static uchar_t cpu_ec_state_parity = S_ECSTATE_PARITY; 167 168 /* 169 * This table is used to determine which bit(s) is(are) bad when an ECC 170 * error occurrs. The array is indexed an 8-bit syndrome. The entries 171 * of this array have the following semantics: 172 * 173 * 00-63 The number of the bad bit, when only one bit is bad. 174 * 64 ECC bit C0 is bad. 175 * 65 ECC bit C1 is bad. 176 * 66 ECC bit C2 is bad. 177 * 67 ECC bit C3 is bad. 178 * 68 ECC bit C4 is bad. 179 * 69 ECC bit C5 is bad. 180 * 70 ECC bit C6 is bad. 181 * 71 ECC bit C7 is bad. 182 * 72 Two bits are bad. 183 * 73 Three bits are bad. 184 * 74 Four bits are bad. 185 * 75 More than Four bits are bad. 186 * 76 NO bits are bad. 187 * Based on "Galaxy Memory Subsystem SPECIFICATION" rev 0.6, pg. 28. 188 */ 189 190 #define C0 64 191 #define C1 65 192 #define C2 66 193 #define C3 67 194 #define C4 68 195 #define C5 69 196 #define C6 70 197 #define C7 71 198 #define M2 72 199 #define M3 73 200 #define M4 74 201 #define MX 75 202 #define NA 76 203 204 #define SYND_IS_SINGLE_BIT_DATA(synd_code) ((synd_code >= 0) && \ 205 (synd_code < C0)) 206 #define SYND_IS_SINGLE_BIT_CHK(synd_code) ((synd_code >= C0) && \ 207 (synd_code <= C7)) 208 209 static char ecc_syndrome_tab[] = 210 { 211 NA, C0, C1, M2, C2, M2, M2, M3, C3, M2, M2, M3, M2, M3, M3, M4, 212 C4, M2, M2, 32, M2, 57, MX, M2, M2, 37, 49, M2, 40, M2, M2, 44, 213 C5, M2, M2, 33, M2, 61, 4, M2, M2, MX, 53, M2, 45, M2, M2, 41, 214 M2, 0, 1, M2, 10, M2, M2, MX, 15, M2, M2, MX, M2, M3, M3, M2, 215 C6, M2, M2, 42, M2, 59, 39, M2, M2, MX, 51, M2, 34, M2, M2, 46, 216 M2, 25, 29, M2, 27, M4, M2, MX, 31, M2, M4, MX, M2, MX, MX, M2, 217 M2, MX, 36, M2, 7, M2, M2, 54, MX, M2, M2, 62, M2, 48, 56, M2, 218 M3, M2, M2, MX, M2, MX, 22, M2, M2, 18, MX, M2, M3, M2, M2, MX, 219 C7, M2, M2, 47, M2, 63, MX, M2, M2, 6, 55, M2, 35, M2, M2, 43, 220 M2, 5, MX, M2, MX, M2, M2, 50, 38, M2, M2, 58, M2, 52, 60, M2, 221 M2, 17, 21, M2, 19, M4, M2, MX, 23, M2, M4, MX, M2, MX, MX, M2, 222 M3, M2, M2, MX, M2, MX, 30, M2, M2, 26, MX, M2, M3, M2, M2, MX, 223 M2, 8, 13, M2, 2, M2, M2, M3, 3, M2, M2, M3, M2, MX, MX, M2, 224 M3, M2, M2, M3, M2, MX, 16, M2, M2, 20, MX, M2, MX, M2, M2, MX, 225 M3, M2, M2, M3, M2, MX, 24, M2, M2, 28, MX, M2, MX, M2, M2, MX, 226 M4, 12, 9, M2, 14, M2, M2, MX, 11, M2, M2, MX, M2, MX, MX, M4 227 }; 228 229 #define SYND_TBL_SIZE 256 230 231 /* 232 * Hack for determining UDBH/UDBL, for later cpu-specific error reporting. 233 * Cannot use bit 3 in afar, because it is a valid bit on a Sabre/Hummingbird. 234 */ 235 #define UDBL_REG 0x8000 236 #define UDBL(synd) ((synd & UDBL_REG) >> 15) 237 #define SYND(synd) (synd & 0x7FFF) 238 239 /* 240 * These error types are specific to Spitfire and are used internally for the 241 * spitfire fault structure flt_type field. 242 */ 243 #define CPU_UE_ERR 0 /* uncorrectable errors - UEs */ 244 #define CPU_EDP_LDP_ERR 1 /* LDP or EDP parity error */ 245 #define CPU_WP_ERR 2 /* WP parity error */ 246 #define CPU_BTO_BERR_ERR 3 /* bus timeout errors */ 247 #define CPU_PANIC_CP_ERR 4 /* cp error from panic polling */ 248 #define CPU_TRAPPING_CP_ERR 5 /* for sabre/hbird only, cp error */ 249 #define CPU_BADLINE_CI_ERR 6 /* E$ clean_bad line when idle */ 250 #define CPU_BADLINE_CB_ERR 7 /* E$ clean_bad line when busy */ 251 #define CPU_BADLINE_DI_ERR 8 /* E$ dirty_bad line when idle */ 252 #define CPU_BADLINE_DB_ERR 9 /* E$ dirty_bad line when busy */ 253 #define CPU_ORPHAN_CP_ERR 10 /* Orphan CP error */ 254 #define CPU_ECACHE_ADDR_PAR_ERR 11 /* Ecache Address parity error */ 255 #define CPU_ECACHE_STATE_ERR 12 /* Ecache state error */ 256 #define CPU_ECACHE_ETP_ETS_ERR 13 /* ETP set but ETS is zero */ 257 #define CPU_ECACHE_TAG_ERR 14 /* Scrub the E$ tag, if state clean */ 258 #define CPU_ADDITIONAL_ERR 15 /* Additional errors occurred */ 259 260 /* 261 * Macro to access the "Spitfire cpu private" data structure. 262 */ 263 #define CPU_PRIVATE_PTR(cp, x) (&(((spitfire_private_t *)CPU_PRIVATE(cp))->x)) 264 265 /* 266 * set to 0 to disable automatic retiring of pages on 267 * DIMMs that have excessive soft errors 268 */ 269 int automatic_page_removal = 1; 270 271 /* 272 * Heuristic for figuring out which module to replace. 273 * Relative likelihood that this P_SYND indicates that this module is bad. 274 * We call it a "score", though, not a relative likelihood. 275 * 276 * Step 1. 277 * Assign a score to each byte of P_SYND according to the following rules: 278 * If no bits on (0x00) or all bits on (0xFF), then give it a 5. 279 * If one bit on, give it a 95. 280 * If seven bits on, give it a 10. 281 * If two bits on: 282 * in different nybbles, a 90 283 * in same nybble, but unaligned, 85 284 * in same nybble and as an aligned pair, 80 285 * If six bits on, look at the bits that are off: 286 * in same nybble and as an aligned pair, 15 287 * in same nybble, but unaligned, 20 288 * in different nybbles, a 25 289 * If three bits on: 290 * in diferent nybbles, no aligned pairs, 75 291 * in diferent nybbles, one aligned pair, 70 292 * in the same nybble, 65 293 * If five bits on, look at the bits that are off: 294 * in the same nybble, 30 295 * in diferent nybbles, one aligned pair, 35 296 * in diferent nybbles, no aligned pairs, 40 297 * If four bits on: 298 * all in one nybble, 45 299 * as two aligned pairs, 50 300 * one aligned pair, 55 301 * no aligned pairs, 60 302 * 303 * Step 2: 304 * Take the higher of the two scores (one for each byte) as the score 305 * for the module. 306 * 307 * Print the score for each module, and field service should replace the 308 * module with the highest score. 309 */ 310 311 /* 312 * In the table below, the first row/column comment indicates the 313 * number of bits on in that nybble; the second row/column comment is 314 * the hex digit. 315 */ 316 317 static int 318 p_synd_score_table[256] = { 319 /* 0 1 1 2 1 2 2 3 1 2 2 3 2 3 3 4 */ 320 /* 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F */ 321 /* 0 0 */ 5, 95, 95, 80, 95, 85, 85, 65, 95, 85, 85, 65, 80, 65, 65, 45, 322 /* 1 1 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 323 /* 1 2 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 324 /* 2 3 */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15, 325 /* 1 4 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 326 /* 2 5 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 327 /* 2 6 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 328 /* 3 7 */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 329 /* 1 8 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 330 /* 2 9 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 331 /* 2 A */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 332 /* 3 B */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 333 /* 2 C */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15, 334 /* 3 D */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 335 /* 3 E */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 336 /* 4 F */ 45, 30, 30, 15, 30, 20, 20, 10, 30, 20, 20, 10, 15, 10, 10, 5, 337 }; 338 339 int 340 ecc_psynd_score(ushort_t p_synd) 341 { 342 int i, j, a, b; 343 344 i = p_synd & 0xFF; 345 j = (p_synd >> 8) & 0xFF; 346 347 a = p_synd_score_table[i]; 348 b = p_synd_score_table[j]; 349 350 return (a > b ? a : b); 351 } 352 353 /* 354 * Async Fault Logging 355 * 356 * To ease identifying, reading, and filtering async fault log messages, the 357 * label [AFT#] is now prepended to each async fault message. These messages 358 * and the logging rules are implemented by cpu_aflt_log(), below. 359 * 360 * [AFT0] - Tag for log messages that are associated with corrected ECC errors. 361 * This includes both corrected ECC memory and ecache faults. 362 * 363 * [AFT1] - Tag for log messages that are not ECC corrected (i.e. everything 364 * else except CE errors) with a priority of 1 (highest). This tag 365 * is also used for panic messages that result from an async fault. 366 * 367 * [AFT2] - These are lower priority diagnostic messages for uncorrected ECC 368 * [AFT3] or parity errors. For example, AFT2 is used for the actual dump 369 * of the E-$ data and tags. 370 * 371 * In a non-DEBUG kernel, AFT > 1 logs will be sent to the system log but not 372 * printed on the console. To send all AFT logs to both the log and the 373 * console, set aft_verbose = 1. 374 */ 375 376 #define CPU_FLTCPU 0x0001 /* print flt_inst as a CPU id */ 377 #define CPU_SPACE 0x0002 /* print flt_status (data or instr) */ 378 #define CPU_ERRID 0x0004 /* print flt_id */ 379 #define CPU_TL 0x0008 /* print flt_tl */ 380 #define CPU_ERRID_FIRST 0x0010 /* print flt_id first in message */ 381 #define CPU_AFSR 0x0020 /* print flt_stat as decoded %afsr */ 382 #define CPU_AFAR 0x0040 /* print flt_addr as %afar */ 383 #define CPU_AF_PSYND 0x0080 /* print flt_stat %afsr.PSYND */ 384 #define CPU_AF_ETS 0x0100 /* print flt_stat %afsr.ETS */ 385 #define CPU_UDBH 0x0200 /* print flt_sdbh and syndrome */ 386 #define CPU_UDBL 0x0400 /* print flt_sdbl and syndrome */ 387 #define CPU_FAULTPC 0x0800 /* print flt_pc */ 388 #define CPU_SYND 0x1000 /* print flt_synd and unum */ 389 390 #define CMN_LFLAGS (CPU_FLTCPU | CPU_SPACE | CPU_ERRID | CPU_TL | \ 391 CPU_AFSR | CPU_AFAR | CPU_AF_PSYND | \ 392 CPU_AF_ETS | CPU_UDBH | CPU_UDBL | \ 393 CPU_FAULTPC) 394 #define UE_LFLAGS (CMN_LFLAGS | CPU_SYND) 395 #define CE_LFLAGS (UE_LFLAGS & ~CPU_UDBH & ~CPU_UDBL & ~CPU_TL & \ 396 ~CPU_SPACE) 397 #define PARERR_LFLAGS (CMN_LFLAGS) 398 #define WP_LFLAGS (CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL) 399 #define CP_LFLAGS (CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL & \ 400 ~CPU_FLTCPU & ~CPU_FAULTPC) 401 #define BERRTO_LFLAGS (CMN_LFLAGS) 402 #define NO_LFLAGS (0) 403 404 #define AFSR_FMTSTR0 "\020\1ME" 405 #define AFSR_FMTSTR1 "\020\040PRIV\037ISAP\036ETP\035IVUE\034TO" \ 406 "\033BERR\032LDP\031CP\030WP\027EDP\026UE\025CE" 407 #define UDB_FMTSTR "\020\012UE\011CE" 408 409 /* 410 * Save the cache bootup state for use when internal 411 * caches are to be re-enabled after an error occurs. 412 */ 413 uint64_t cache_boot_state = 0; 414 415 /* 416 * PA[31:0] represent Displacement in UPA configuration space. 417 */ 418 uint_t root_phys_addr_lo_mask = 0xffffffff; 419 420 /* 421 * Spitfire legacy globals 422 */ 423 int itlb_entries; 424 int dtlb_entries; 425 426 void 427 cpu_setup(void) 428 { 429 extern int page_retire_messages; 430 extern int page_retire_first_ue; 431 extern int at_flags; 432 #if defined(SF_ERRATA_57) 433 extern caddr_t errata57_limit; 434 #endif 435 cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT); 436 437 at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1; 438 439 /* 440 * Spitfire isn't currently FMA-aware, so we have to enable the 441 * page retirement messages. We also change the default policy 442 * for UE retirement to allow clearing of transient errors. 443 */ 444 page_retire_messages = 1; 445 page_retire_first_ue = 0; 446 447 /* 448 * save the cache bootup state. 449 */ 450 cache_boot_state = get_lsu() & (LSU_IC | LSU_DC); 451 452 if (use_page_coloring) { 453 do_pg_coloring = 1; 454 if (use_virtual_coloring) 455 do_virtual_coloring = 1; 456 } 457 458 /* 459 * Tune pp_slots to use up to 1/8th of the tlb entries. 460 */ 461 pp_slots = MIN(8, MAXPP_SLOTS); 462 463 /* 464 * Block stores invalidate all pages of the d$ so pagecopy 465 * et. al. do not need virtual translations with virtual 466 * coloring taken into consideration. 467 */ 468 pp_consistent_coloring = 0; 469 470 isa_list = 471 "sparcv9+vis sparcv9 " 472 "sparcv8plus+vis sparcv8plus " 473 "sparcv8 sparcv8-fsmuld sparcv7 sparc"; 474 475 cpu_hwcap_flags = AV_SPARC_VIS; 476 477 /* 478 * On Spitfire, there's a hole in the address space 479 * that we must never map (the hardware only support 44-bits of 480 * virtual address). Later CPUs are expected to have wider 481 * supported address ranges. 482 * 483 * See address map on p23 of the UltraSPARC 1 user's manual. 484 */ 485 hole_start = (caddr_t)0x80000000000ull; 486 hole_end = (caddr_t)0xfffff80000000000ull; 487 488 /* 489 * A spitfire call bug requires us to be a further 4Gbytes of 490 * firewall from the spec. 491 * 492 * See Spitfire Errata #21 493 */ 494 hole_start = (caddr_t)((uintptr_t)hole_start - (1ul << 32)); 495 hole_end = (caddr_t)((uintptr_t)hole_end + (1ul << 32)); 496 497 /* 498 * The kpm mapping window. 499 * kpm_size: 500 * The size of a single kpm range. 501 * The overall size will be: kpm_size * vac_colors. 502 * kpm_vbase: 503 * The virtual start address of the kpm range within the kernel 504 * virtual address space. kpm_vbase has to be kpm_size aligned. 505 */ 506 kpm_size = (size_t)(2ull * 1024 * 1024 * 1024 * 1024); /* 2TB */ 507 kpm_size_shift = 41; 508 kpm_vbase = (caddr_t)0xfffffa0000000000ull; /* 16EB - 6TB */ 509 510 #if defined(SF_ERRATA_57) 511 errata57_limit = (caddr_t)0x80000000ul; 512 #endif 513 514 /* 515 * Disable text by default. 516 * Note that the other defaults are set in sun4u/vm/mach_vm_dep.c. 517 */ 518 max_utext_lpsize = MMU_PAGESIZE; 519 } 520 521 static int 522 getintprop(pnode_t node, char *name, int deflt) 523 { 524 int value; 525 526 switch (prom_getproplen(node, name)) { 527 case 0: 528 value = 1; /* boolean properties */ 529 break; 530 531 case sizeof (int): 532 (void) prom_getprop(node, name, (caddr_t)&value); 533 break; 534 535 default: 536 value = deflt; 537 break; 538 } 539 540 return (value); 541 } 542 543 /* 544 * Set the magic constants of the implementation. 545 */ 546 void 547 cpu_fiximp(pnode_t dnode) 548 { 549 extern int vac_size, vac_shift; 550 extern uint_t vac_mask; 551 extern int dcache_line_mask; 552 int i, a; 553 static struct { 554 char *name; 555 int *var; 556 } prop[] = { 557 "dcache-size", &dcache_size, 558 "dcache-line-size", &dcache_linesize, 559 "icache-size", &icache_size, 560 "icache-line-size", &icache_linesize, 561 "ecache-size", &ecache_size, 562 "ecache-line-size", &ecache_alignsize, 563 "ecache-associativity", &ecache_associativity, 564 "#itlb-entries", &itlb_entries, 565 "#dtlb-entries", &dtlb_entries, 566 }; 567 568 for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) { 569 if ((a = getintprop(dnode, prop[i].name, -1)) != -1) { 570 *prop[i].var = a; 571 } 572 } 573 574 ecache_setsize = ecache_size / ecache_associativity; 575 576 vac_size = S_VAC_SIZE; 577 vac_mask = MMU_PAGEMASK & (vac_size - 1); 578 i = 0; a = vac_size; 579 while (a >>= 1) 580 ++i; 581 vac_shift = i; 582 shm_alignment = vac_size; 583 vac = 1; 584 585 dcache_line_mask = (dcache_size - 1) & ~(dcache_linesize - 1); 586 587 /* 588 * UltraSPARC I & II have ecache sizes running 589 * as follows: .25 MB, .5 MB, 1 MB, 2 MB, 4 MB 590 * and 8 MB. Adjust the copyin/copyout limits 591 * according to the cache size. The magic number 592 * of VIS_COPY_THRESHOLD comes from the copyin/copyout code 593 * and its floor of VIS_COPY_THRESHOLD bytes before it will use 594 * VIS instructions. 595 * 596 * We assume that all CPUs on the system have the same size 597 * ecache. We're also called very early in the game. 598 * /etc/system will be parsed *after* we're called so 599 * these values can be overwritten. 600 */ 601 602 hw_copy_limit_1 = VIS_COPY_THRESHOLD; 603 if (ecache_size <= 524288) { 604 hw_copy_limit_2 = VIS_COPY_THRESHOLD; 605 hw_copy_limit_4 = VIS_COPY_THRESHOLD; 606 hw_copy_limit_8 = VIS_COPY_THRESHOLD; 607 } else if (ecache_size == 1048576) { 608 hw_copy_limit_2 = 1024; 609 hw_copy_limit_4 = 1280; 610 hw_copy_limit_8 = 1536; 611 } else if (ecache_size == 2097152) { 612 hw_copy_limit_2 = 1536; 613 hw_copy_limit_4 = 2048; 614 hw_copy_limit_8 = 2560; 615 } else if (ecache_size == 4194304) { 616 hw_copy_limit_2 = 2048; 617 hw_copy_limit_4 = 2560; 618 hw_copy_limit_8 = 3072; 619 } else { 620 hw_copy_limit_2 = 2560; 621 hw_copy_limit_4 = 3072; 622 hw_copy_limit_8 = 3584; 623 } 624 } 625 626 /* 627 * Called by setcpudelay 628 */ 629 void 630 cpu_init_tick_freq(void) 631 { 632 /* 633 * Determine the cpu frequency by calling 634 * tod_get_cpufrequency. Use an approximate freqency 635 * value computed by the prom if the tod module 636 * is not initialized and loaded yet. 637 */ 638 if (tod_ops.tod_get_cpufrequency != NULL) { 639 mutex_enter(&tod_lock); 640 sys_tick_freq = tod_ops.tod_get_cpufrequency(); 641 mutex_exit(&tod_lock); 642 } else { 643 #if defined(HUMMINGBIRD) 644 /* 645 * the hummingbird version of %stick is used as the basis for 646 * low level timing; this provides an independent constant-rate 647 * clock for general system use, and frees power mgmt to set 648 * various cpu clock speeds. 649 */ 650 if (system_clock_freq == 0) 651 cmn_err(CE_PANIC, "invalid system_clock_freq 0x%lx", 652 system_clock_freq); 653 sys_tick_freq = system_clock_freq; 654 #else /* SPITFIRE */ 655 sys_tick_freq = cpunodes[CPU->cpu_id].clock_freq; 656 #endif 657 } 658 } 659 660 661 void shipit(int upaid); 662 extern uint64_t xc_tick_limit; 663 extern uint64_t xc_tick_jump_limit; 664 665 #ifdef SEND_MONDO_STATS 666 uint64_t x_early[NCPU][64]; 667 #endif 668 669 /* 670 * Note: A version of this function is used by the debugger via the KDI, 671 * and must be kept in sync with this version. Any changes made to this 672 * function to support new chips or to accomodate errata must also be included 673 * in the KDI-specific version. See spitfire_kdi.c. 674 */ 675 void 676 send_one_mondo(int cpuid) 677 { 678 uint64_t idsr, starttick, endtick; 679 int upaid, busy, nack; 680 uint64_t tick, tick_prev; 681 ulong_t ticks; 682 683 CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 684 upaid = CPUID_TO_UPAID(cpuid); 685 tick = starttick = gettick(); 686 shipit(upaid); 687 endtick = starttick + xc_tick_limit; 688 busy = nack = 0; 689 for (;;) { 690 idsr = getidsr(); 691 if (idsr == 0) 692 break; 693 /* 694 * When we detect an irregular tick jump, we adjust 695 * the timer window to the current tick value. 696 */ 697 tick_prev = tick; 698 tick = gettick(); 699 ticks = tick - tick_prev; 700 if (ticks > xc_tick_jump_limit) { 701 endtick = tick + xc_tick_limit; 702 } else if (tick > endtick) { 703 if (panic_quiesce) 704 return; 705 cmn_err(CE_PANIC, 706 "send mondo timeout (target 0x%x) [%d NACK %d BUSY]", 707 upaid, nack, busy); 708 } 709 if (idsr & IDSR_BUSY) { 710 busy++; 711 continue; 712 } 713 drv_usecwait(1); 714 shipit(upaid); 715 nack++; 716 busy = 0; 717 } 718 #ifdef SEND_MONDO_STATS 719 x_early[getprocessorid()][highbit(gettick() - starttick) - 1]++; 720 #endif 721 } 722 723 void 724 send_mondo_set(cpuset_t set) 725 { 726 int i; 727 728 for (i = 0; i < NCPU; i++) 729 if (CPU_IN_SET(set, i)) { 730 send_one_mondo(i); 731 CPUSET_DEL(set, i); 732 if (CPUSET_ISNULL(set)) 733 break; 734 } 735 } 736 737 void 738 syncfpu(void) 739 { 740 } 741 742 /* 743 * Determine the size of the CPU module's error structure in bytes. This is 744 * called once during boot to initialize the error queues. 745 */ 746 int 747 cpu_aflt_size(void) 748 { 749 /* 750 * We need to determine whether this is a sabre, Hummingbird or a 751 * Spitfire/Blackbird impl and set the appropriate state variables for 752 * ecache tag manipulation. We can't do this in cpu_setup() as it is 753 * too early in the boot flow and the cpunodes are not initialized. 754 * This routine will be called once after cpunodes[] is ready, so do 755 * it here. 756 */ 757 if (cpunodes[CPU->cpu_id].implementation == SABRE_IMPL) { 758 isus2i = 1; 759 cpu_ec_tag_mask = SB_ECTAG_MASK; 760 cpu_ec_state_mask = SB_ECSTATE_MASK; 761 cpu_ec_par_mask = SB_ECPAR_MASK; 762 cpu_ec_par_shift = SB_ECPAR_SHIFT; 763 cpu_ec_tag_shift = SB_ECTAG_SHIFT; 764 cpu_ec_state_shift = SB_ECSTATE_SHIFT; 765 cpu_ec_state_exl = SB_ECSTATE_EXL; 766 cpu_ec_state_mod = SB_ECSTATE_MOD; 767 768 /* These states do not exist in sabre - set to 0xFF */ 769 cpu_ec_state_shr = 0xFF; 770 cpu_ec_state_own = 0xFF; 771 772 cpu_ec_state_valid = SB_ECSTATE_VALID; 773 cpu_ec_state_dirty = SB_ECSTATE_DIRTY; 774 cpu_ec_state_parity = SB_ECSTATE_PARITY; 775 cpu_ec_parity = SB_EC_PARITY; 776 } else if (cpunodes[CPU->cpu_id].implementation == HUMMBRD_IMPL) { 777 isus2e = 1; 778 cpu_ec_tag_mask = HB_ECTAG_MASK; 779 cpu_ec_state_mask = HB_ECSTATE_MASK; 780 cpu_ec_par_mask = HB_ECPAR_MASK; 781 cpu_ec_par_shift = HB_ECPAR_SHIFT; 782 cpu_ec_tag_shift = HB_ECTAG_SHIFT; 783 cpu_ec_state_shift = HB_ECSTATE_SHIFT; 784 cpu_ec_state_exl = HB_ECSTATE_EXL; 785 cpu_ec_state_mod = HB_ECSTATE_MOD; 786 787 /* These states do not exist in hummingbird - set to 0xFF */ 788 cpu_ec_state_shr = 0xFF; 789 cpu_ec_state_own = 0xFF; 790 791 cpu_ec_state_valid = HB_ECSTATE_VALID; 792 cpu_ec_state_dirty = HB_ECSTATE_DIRTY; 793 cpu_ec_state_parity = HB_ECSTATE_PARITY; 794 cpu_ec_parity = HB_EC_PARITY; 795 } 796 797 return (sizeof (spitf_async_flt)); 798 } 799 800 801 /* 802 * Correctable ecc error trap handler 803 */ 804 /*ARGSUSED*/ 805 void 806 cpu_ce_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr, 807 uint_t p_afsr_high, uint_t p_afar_high) 808 { 809 ushort_t sdbh, sdbl; 810 ushort_t e_syndh, e_syndl; 811 spitf_async_flt spf_flt; 812 struct async_flt *ecc; 813 int queue = 1; 814 815 uint64_t t_afar = p_afar; 816 uint64_t t_afsr = p_afsr; 817 818 /* 819 * Note: the Spitfire data buffer error registers 820 * (upper and lower halves) are or'ed into the upper 821 * word of the afsr by ce_err(). 822 */ 823 sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF); 824 sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF); 825 826 e_syndh = (uchar_t)(sdbh & (uint_t)P_DER_E_SYND); 827 e_syndl = (uchar_t)(sdbl & (uint_t)P_DER_E_SYND); 828 829 t_afsr &= S_AFSR_MASK; 830 t_afar &= SABRE_AFAR_PA; /* must use Sabre AFAR mask */ 831 832 /* Setup the async fault structure */ 833 bzero(&spf_flt, sizeof (spitf_async_flt)); 834 ecc = (struct async_flt *)&spf_flt; 835 ecc->flt_id = gethrtime_waitfree(); 836 ecc->flt_stat = t_afsr; 837 ecc->flt_addr = t_afar; 838 ecc->flt_status = ECC_C_TRAP; 839 ecc->flt_bus_id = getprocessorid(); 840 ecc->flt_inst = CPU->cpu_id; 841 ecc->flt_pc = (caddr_t)rp->r_pc; 842 ecc->flt_func = log_ce_err; 843 ecc->flt_in_memory = 844 (pf_is_memory(ecc->flt_addr >> MMU_PAGESHIFT)) ? 1: 0; 845 spf_flt.flt_sdbh = sdbh; 846 spf_flt.flt_sdbl = sdbl; 847 848 /* 849 * Check for fatal conditions. 850 */ 851 check_misc_err(&spf_flt); 852 853 /* 854 * Pananoid checks for valid AFSR and UDBs 855 */ 856 if ((t_afsr & P_AFSR_CE) == 0) { 857 cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS, 858 "** Panic due to CE bit not set in the AFSR", 859 " Corrected Memory Error on"); 860 } 861 862 /* 863 * We want to skip logging only if ALL the following 864 * conditions are true: 865 * 866 * 1. There is only one error 867 * 2. That error is a correctable memory error 868 * 3. The error is caused by the memory scrubber (in which case 869 * the error will have occurred under on_trap protection) 870 * 4. The error is on a retired page 871 * 872 * Note: OT_DATA_EC is used places other than the memory scrubber. 873 * However, none of those errors should occur on a retired page. 874 */ 875 if ((ecc->flt_stat & (S_AFSR_ALL_ERRS & ~P_AFSR_ME)) == P_AFSR_CE && 876 curthread->t_ontrap != NULL) { 877 878 if (curthread->t_ontrap->ot_prot & OT_DATA_EC) { 879 if (page_retire_check(ecc->flt_addr, NULL) == 0) { 880 queue = 0; 881 } 882 } 883 } 884 885 if (((sdbh & P_DER_CE) == 0) && ((sdbl & P_DER_CE) == 0)) { 886 cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS, 887 "** Panic due to CE bits not set in the UDBs", 888 " Corrected Memory Error on"); 889 } 890 891 if ((sdbh >> 8) & 1) { 892 ecc->flt_synd = e_syndh; 893 ce_scrub(ecc); 894 if (queue) { 895 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc, 896 sizeof (*ecc), ce_queue, ERRORQ_ASYNC); 897 } 898 } 899 900 if ((sdbl >> 8) & 1) { 901 ecc->flt_addr = t_afar | 0x8; /* Sabres do not have a UDBL */ 902 ecc->flt_synd = e_syndl | UDBL_REG; 903 ce_scrub(ecc); 904 if (queue) { 905 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc, 906 sizeof (*ecc), ce_queue, ERRORQ_ASYNC); 907 } 908 } 909 910 /* 911 * Re-enable all error trapping (CEEN currently cleared). 912 */ 913 clr_datapath(); 914 set_asyncflt(P_AFSR_CE); 915 set_error_enable(EER_ENABLE); 916 } 917 918 /* 919 * Cpu specific CE logging routine 920 */ 921 static void 922 log_ce_err(struct async_flt *aflt, char *unum) 923 { 924 spitf_async_flt spf_flt; 925 926 if ((aflt->flt_stat & P_AFSR_CE) && (ce_verbose_memory == 0)) { 927 return; 928 } 929 930 spf_flt.cmn_asyncflt = *aflt; 931 cpu_aflt_log(CE_CONT, 0, &spf_flt, CE_LFLAGS, unum, 932 " Corrected Memory Error detected by"); 933 } 934 935 /* 936 * Spitfire does not perform any further CE classification refinement 937 */ 938 /*ARGSUSED*/ 939 int 940 ce_scrub_xdiag_recirc(struct async_flt *ecc, errorq_t *eqp, errorq_elem_t *eqep, 941 size_t afltoffset) 942 { 943 return (0); 944 } 945 946 char * 947 flt_to_error_type(struct async_flt *aflt) 948 { 949 if (aflt->flt_status & ECC_INTERMITTENT) 950 return (ERR_TYPE_DESC_INTERMITTENT); 951 if (aflt->flt_status & ECC_PERSISTENT) 952 return (ERR_TYPE_DESC_PERSISTENT); 953 if (aflt->flt_status & ECC_STICKY) 954 return (ERR_TYPE_DESC_STICKY); 955 return (ERR_TYPE_DESC_UNKNOWN); 956 } 957 958 /* 959 * Called by correctable ecc error logging code to print out 960 * the stick/persistent/intermittent status of the error. 961 */ 962 static void 963 cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum) 964 { 965 ushort_t status; 966 char *status1_str = "Memory"; 967 char *status2_str = "Intermittent"; 968 struct async_flt *aflt = (struct async_flt *)spf_flt; 969 970 status = aflt->flt_status; 971 972 if (status & ECC_ECACHE) 973 status1_str = "Ecache"; 974 975 if (status & ECC_STICKY) 976 status2_str = "Sticky"; 977 else if (status & ECC_PERSISTENT) 978 status2_str = "Persistent"; 979 980 cpu_aflt_log(CE_CONT, 0, spf_flt, CPU_ERRID_FIRST, 981 NULL, " Corrected %s Error on %s is %s", 982 status1_str, unum, status2_str); 983 } 984 985 /* 986 * check for a valid ce syndrome, then call the 987 * displacement flush scrubbing code, and then check the afsr to see if 988 * the error was persistent or intermittent. Reread the afar/afsr to see 989 * if the error was not scrubbed successfully, and is therefore sticky. 990 */ 991 /*ARGSUSED1*/ 992 void 993 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t triedcpulogout) 994 { 995 uint64_t eer, afsr; 996 ushort_t status; 997 998 ASSERT(getpil() > LOCK_LEVEL); 999 1000 /* 1001 * It is possible that the flt_addr is not a valid 1002 * physical address. To deal with this, we disable 1003 * NCEEN while we scrub that address. If this causes 1004 * a TIMEOUT/BERR, we know this is an invalid 1005 * memory location. 1006 */ 1007 kpreempt_disable(); 1008 eer = get_error_enable(); 1009 if (eer & (EER_CEEN | EER_NCEEN)) 1010 set_error_enable(eer & ~(EER_CEEN | EER_NCEEN)); 1011 1012 /* 1013 * To check if the error detected by IO is persistent, sticky or 1014 * intermittent. 1015 */ 1016 if (ecc->flt_status & ECC_IOBUS) { 1017 ecc->flt_stat = P_AFSR_CE; 1018 } 1019 1020 scrubphys(P2ALIGN(ecc->flt_addr, 64), 1021 cpunodes[CPU->cpu_id].ecache_size); 1022 1023 get_asyncflt(&afsr); 1024 if (afsr & (P_AFSR_TO | P_AFSR_BERR)) { 1025 /* 1026 * Must ensure that we don't get the TIMEOUT/BERR 1027 * when we reenable NCEEN, so we clear the AFSR. 1028 */ 1029 set_asyncflt(afsr & (P_AFSR_TO | P_AFSR_BERR)); 1030 if (eer & (EER_CEEN | EER_NCEEN)) 1031 set_error_enable(eer); 1032 kpreempt_enable(); 1033 return; 1034 } 1035 1036 if (eer & EER_NCEEN) 1037 set_error_enable(eer & ~EER_CEEN); 1038 1039 /* 1040 * Check and clear any ECC errors from the scrub. If the scrub did 1041 * not trip over the error, mark it intermittent. If the scrub did 1042 * trip the error again and it did not scrub away, mark it sticky. 1043 * Otherwise mark it persistent. 1044 */ 1045 if (check_ecc(ecc) != 0) { 1046 cpu_read_paddr(ecc, 0, 1); 1047 1048 if (check_ecc(ecc) != 0) 1049 status = ECC_STICKY; 1050 else 1051 status = ECC_PERSISTENT; 1052 } else 1053 status = ECC_INTERMITTENT; 1054 1055 if (eer & (EER_CEEN | EER_NCEEN)) 1056 set_error_enable(eer); 1057 kpreempt_enable(); 1058 1059 ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY); 1060 ecc->flt_status |= status; 1061 } 1062 1063 /* 1064 * get the syndrome and unum, and then call the routines 1065 * to check the other cpus and iobuses, and then do the error logging. 1066 */ 1067 /*ARGSUSED1*/ 1068 void 1069 cpu_ce_log_err(struct async_flt *ecc, errorq_elem_t *eqep) 1070 { 1071 char unum[UNUM_NAMLEN]; 1072 int len = 0; 1073 int ce_verbose = 0; 1074 int err; 1075 1076 ASSERT(ecc->flt_func != NULL); 1077 1078 /* Get the unum string for logging purposes */ 1079 (void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, ecc, unum, 1080 UNUM_NAMLEN, &len); 1081 1082 /* Call specific error logging routine */ 1083 (void) (*ecc->flt_func)(ecc, unum); 1084 1085 /* 1086 * Count errors per unum. 1087 * Non-memory errors are all counted via a special unum string. 1088 */ 1089 if ((err = ce_count_unum(ecc->flt_status, len, unum)) != PR_OK && 1090 automatic_page_removal) { 1091 (void) page_retire(ecc->flt_addr, err); 1092 } 1093 1094 if (ecc->flt_panic) { 1095 ce_verbose = 1; 1096 } else if ((ecc->flt_class == BUS_FAULT) || 1097 (ecc->flt_stat & P_AFSR_CE)) { 1098 ce_verbose = (ce_verbose_memory > 0); 1099 } else { 1100 ce_verbose = 1; 1101 } 1102 1103 if (ce_verbose) { 1104 spitf_async_flt sflt; 1105 int synd_code; 1106 1107 sflt.cmn_asyncflt = *ecc; /* for cpu_aflt_log() */ 1108 1109 cpu_ce_log_status(&sflt, unum); 1110 1111 synd_code = synd_to_synd_code(AFLT_STAT_VALID, 1112 SYND(ecc->flt_synd)); 1113 1114 if (SYND_IS_SINGLE_BIT_DATA(synd_code)) { 1115 cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST, 1116 NULL, " ECC Data Bit %2d was in error " 1117 "and corrected", synd_code); 1118 } else if (SYND_IS_SINGLE_BIT_CHK(synd_code)) { 1119 cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST, 1120 NULL, " ECC Check Bit %2d was in error " 1121 "and corrected", synd_code - C0); 1122 } else { 1123 /* 1124 * These are UE errors - we shouldn't be getting CE 1125 * traps for these; handle them in case of bad h/w. 1126 */ 1127 switch (synd_code) { 1128 case M2: 1129 cpu_aflt_log(CE_CONT, 0, &sflt, 1130 CPU_ERRID_FIRST, NULL, 1131 " Two ECC Bits were in error"); 1132 break; 1133 case M3: 1134 cpu_aflt_log(CE_CONT, 0, &sflt, 1135 CPU_ERRID_FIRST, NULL, 1136 " Three ECC Bits were in error"); 1137 break; 1138 case M4: 1139 cpu_aflt_log(CE_CONT, 0, &sflt, 1140 CPU_ERRID_FIRST, NULL, 1141 " Four ECC Bits were in error"); 1142 break; 1143 case MX: 1144 cpu_aflt_log(CE_CONT, 0, &sflt, 1145 CPU_ERRID_FIRST, NULL, 1146 " More than Four ECC bits were " 1147 "in error"); 1148 break; 1149 default: 1150 cpu_aflt_log(CE_CONT, 0, &sflt, 1151 CPU_ERRID_FIRST, NULL, 1152 " Unknown fault syndrome %d", 1153 synd_code); 1154 break; 1155 } 1156 } 1157 } 1158 1159 /* Display entire cache line, if valid address */ 1160 if (ce_show_data && ecc->flt_addr != AFLT_INV_ADDR) 1161 read_ecc_data(ecc, 1, 1); 1162 } 1163 1164 /* 1165 * We route all errors through a single switch statement. 1166 */ 1167 void 1168 cpu_ue_log_err(struct async_flt *aflt) 1169 { 1170 1171 switch (aflt->flt_class) { 1172 case CPU_FAULT: 1173 cpu_async_log_err(aflt); 1174 break; 1175 1176 case BUS_FAULT: 1177 bus_async_log_err(aflt); 1178 break; 1179 1180 default: 1181 cmn_err(CE_WARN, "discarding async error 0x%p with invalid " 1182 "fault class (0x%x)", (void *)aflt, aflt->flt_class); 1183 break; 1184 } 1185 } 1186 1187 /* Values for action variable in cpu_async_error() */ 1188 #define ACTION_NONE 0 1189 #define ACTION_TRAMPOLINE 1 1190 #define ACTION_AST_FLAGS 2 1191 1192 /* 1193 * Access error trap handler for asynchronous cpu errors. This routine is 1194 * called to handle a data or instruction access error. All fatal errors are 1195 * completely handled by this routine (by panicking). Non fatal error logging 1196 * is queued for later processing either via AST or softint at a lower PIL. 1197 * In case of panic, the error log queue will also be processed as part of the 1198 * panic flow to ensure all errors are logged. This routine is called with all 1199 * errors disabled at PIL15. The AFSR bits are cleared and the UDBL and UDBH 1200 * error bits are also cleared. The hardware has also disabled the I and 1201 * D-caches for us, so we must re-enable them before returning. 1202 * 1203 * A summary of the handling of tl=0 UE/LDP/EDP/TO/BERR/WP/CP: 1204 * 1205 * _______________________________________________________________ 1206 * | Privileged tl0 | Unprivileged | 1207 * | Protected | Unprotected | Protected | Unprotected | 1208 * |on_trap|lofault| | | | 1209 * -------------|-------|-------+---------------+---------------+-------------| 1210 * | | | | | | 1211 * UE/LDP/EDP | L,T,p | L,R,p | L,P | n/a | L,R,p | 1212 * | | | | | | 1213 * TO/BERR | T | S | L,P | n/a | S | 1214 * | | | | | | 1215 * WP | L,M,p | L,M,p | L,M,p | n/a | L,M,p | 1216 * | | | | | | 1217 * CP (IIi/IIe) | L,P | L,P | L,P | n/a | L,P | 1218 * ____________________________________________________________________________ 1219 * 1220 * 1221 * Action codes: 1222 * 1223 * L - log 1224 * M - kick off memscrubber if flt_in_memory 1225 * P - panic 1226 * p - panic if US-IIi or US-IIe (Sabre); overrides R and M 1227 * R - i) if aft_panic is set, panic 1228 * ii) otherwise, send hwerr event to contract and SIGKILL to process 1229 * S - send SIGBUS to process 1230 * T - trampoline 1231 * 1232 * Special cases: 1233 * 1234 * 1) if aft_testfatal is set, all faults result in a panic regardless 1235 * of type (even WP), protection (even on_trap), or privilege. 1236 */ 1237 /*ARGSUSED*/ 1238 void 1239 cpu_async_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr, 1240 uint_t p_afsr_high, uint_t p_afar_high) 1241 { 1242 ushort_t sdbh, sdbl, ttype, tl; 1243 spitf_async_flt spf_flt; 1244 struct async_flt *aflt; 1245 char pr_reason[28]; 1246 uint64_t oafsr; 1247 uint64_t acc_afsr = 0; /* accumulated afsr */ 1248 int action = ACTION_NONE; 1249 uint64_t t_afar = p_afar; 1250 uint64_t t_afsr = p_afsr; 1251 int expected = DDI_FM_ERR_UNEXPECTED; 1252 ddi_acc_hdl_t *hp; 1253 1254 /* 1255 * We need to look at p_flag to determine if the thread detected an 1256 * error while dumping core. We can't grab p_lock here, but it's ok 1257 * because we just need a consistent snapshot and we know that everyone 1258 * else will store a consistent set of bits while holding p_lock. We 1259 * don't have to worry about a race because SDOCORE is set once prior 1260 * to doing i/o from the process's address space and is never cleared. 1261 */ 1262 uint_t pflag = ttoproc(curthread)->p_flag; 1263 1264 pr_reason[0] = '\0'; 1265 1266 /* 1267 * Note: the Spitfire data buffer error registers 1268 * (upper and lower halves) are or'ed into the upper 1269 * word of the afsr by async_err() if P_AFSR_UE is set. 1270 */ 1271 sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF); 1272 sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF); 1273 1274 /* 1275 * Grab the ttype encoded in <63:53> of the saved 1276 * afsr passed from async_err() 1277 */ 1278 ttype = (ushort_t)((t_afsr >> 53) & 0x1FF); 1279 tl = (ushort_t)(t_afsr >> 62); 1280 1281 t_afsr &= S_AFSR_MASK; 1282 t_afar &= SABRE_AFAR_PA; /* must use Sabre AFAR mask */ 1283 1284 /* 1285 * Initialize most of the common and CPU-specific structure. We derive 1286 * aflt->flt_priv from %tstate, instead of from the AFSR.PRIV bit. The 1287 * initial setting of aflt->flt_panic is based on TL: we must panic if 1288 * the error occurred at TL > 0. We also set flt_panic if the test/demo 1289 * tuneable aft_testfatal is set (not the default). 1290 */ 1291 bzero(&spf_flt, sizeof (spitf_async_flt)); 1292 aflt = (struct async_flt *)&spf_flt; 1293 aflt->flt_id = gethrtime_waitfree(); 1294 aflt->flt_stat = t_afsr; 1295 aflt->flt_addr = t_afar; 1296 aflt->flt_bus_id = getprocessorid(); 1297 aflt->flt_inst = CPU->cpu_id; 1298 aflt->flt_pc = (caddr_t)rp->r_pc; 1299 aflt->flt_prot = AFLT_PROT_NONE; 1300 aflt->flt_class = CPU_FAULT; 1301 aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0; 1302 aflt->flt_tl = (uchar_t)tl; 1303 aflt->flt_panic = (tl != 0 || aft_testfatal != 0); 1304 aflt->flt_core = (pflag & SDOCORE) ? 1 : 0; 1305 1306 /* 1307 * Set flt_status based on the trap type. If we end up here as the 1308 * result of a UE detected by the CE handling code, leave status 0. 1309 */ 1310 switch (ttype) { 1311 case T_DATA_ERROR: 1312 aflt->flt_status = ECC_D_TRAP; 1313 break; 1314 case T_INSTR_ERROR: 1315 aflt->flt_status = ECC_I_TRAP; 1316 break; 1317 } 1318 1319 spf_flt.flt_sdbh = sdbh; 1320 spf_flt.flt_sdbl = sdbl; 1321 1322 /* 1323 * Check for fatal async errors. 1324 */ 1325 check_misc_err(&spf_flt); 1326 1327 /* 1328 * If the trap occurred in privileged mode at TL=0, we need to check to 1329 * see if we were executing in the kernel under on_trap() or t_lofault 1330 * protection. If so, modify the saved registers so that we return 1331 * from the trap to the appropriate trampoline routine. 1332 */ 1333 if (aflt->flt_priv && tl == 0) { 1334 if (curthread->t_ontrap != NULL) { 1335 on_trap_data_t *otp = curthread->t_ontrap; 1336 1337 if (otp->ot_prot & OT_DATA_EC) { 1338 aflt->flt_prot = AFLT_PROT_EC; 1339 otp->ot_trap |= OT_DATA_EC; 1340 rp->r_pc = otp->ot_trampoline; 1341 rp->r_npc = rp->r_pc + 4; 1342 action = ACTION_TRAMPOLINE; 1343 } 1344 1345 if ((t_afsr & (P_AFSR_TO | P_AFSR_BERR)) && 1346 (otp->ot_prot & OT_DATA_ACCESS)) { 1347 aflt->flt_prot = AFLT_PROT_ACCESS; 1348 otp->ot_trap |= OT_DATA_ACCESS; 1349 rp->r_pc = otp->ot_trampoline; 1350 rp->r_npc = rp->r_pc + 4; 1351 action = ACTION_TRAMPOLINE; 1352 /* 1353 * for peeks and caut_gets errors are expected 1354 */ 1355 hp = (ddi_acc_hdl_t *)otp->ot_handle; 1356 if (!hp) 1357 expected = DDI_FM_ERR_PEEK; 1358 else if (hp->ah_acc.devacc_attr_access == 1359 DDI_CAUTIOUS_ACC) 1360 expected = DDI_FM_ERR_EXPECTED; 1361 } 1362 1363 } else if (curthread->t_lofault) { 1364 aflt->flt_prot = AFLT_PROT_COPY; 1365 rp->r_g1 = EFAULT; 1366 rp->r_pc = curthread->t_lofault; 1367 rp->r_npc = rp->r_pc + 4; 1368 action = ACTION_TRAMPOLINE; 1369 } 1370 } 1371 1372 /* 1373 * Determine if this error needs to be treated as fatal. Note that 1374 * multiple errors detected upon entry to this trap handler does not 1375 * necessarily warrant a panic. We only want to panic if the trap 1376 * happened in privileged mode and not under t_ontrap or t_lofault 1377 * protection. The exception is WP: if we *only* get WP, it is not 1378 * fatal even if the trap occurred in privileged mode, except on Sabre. 1379 * 1380 * aft_panic, if set, effectively makes us treat usermode 1381 * UE/EDP/LDP faults as if they were privileged - so we we will 1382 * panic instead of sending a contract event. A lofault-protected 1383 * fault will normally follow the contract event; if aft_panic is 1384 * set this will be changed to a panic. 1385 * 1386 * For usermode BERR/BTO errors, eg from processes performing device 1387 * control through mapped device memory, we need only deliver 1388 * a SIGBUS to the offending process. 1389 * 1390 * Some additional flt_panic reasons (eg, WP on Sabre) will be 1391 * checked later; for now we implement the common reasons. 1392 */ 1393 if (aflt->flt_prot == AFLT_PROT_NONE) { 1394 /* 1395 * Beware - multiple bits may be set in AFSR 1396 */ 1397 if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP)) { 1398 if (aflt->flt_priv || aft_panic) 1399 aflt->flt_panic = 1; 1400 } 1401 1402 if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) { 1403 if (aflt->flt_priv) 1404 aflt->flt_panic = 1; 1405 } 1406 } else if (aflt->flt_prot == AFLT_PROT_COPY && aft_panic) { 1407 aflt->flt_panic = 1; 1408 } 1409 1410 /* 1411 * UE/BERR/TO: Call our bus nexus friends to check for 1412 * IO errors that may have resulted in this trap. 1413 */ 1414 if (t_afsr & (P_AFSR_TO | P_AFSR_BERR | P_AFSR_UE)) { 1415 cpu_run_bus_error_handlers(aflt, expected); 1416 } 1417 1418 /* 1419 * Handle UE: If the UE is in memory, we need to flush the bad line from 1420 * the E-cache. We also need to query the bus nexus for fatal errors. 1421 * For sabre, we will panic on UEs. Attempts to do diagnostic read on 1422 * caches may introduce more parity errors (especially when the module 1423 * is bad) and in sabre there is no guarantee that such errors 1424 * (if introduced) are written back as poisoned data. 1425 */ 1426 if (t_afsr & P_AFSR_UE) { 1427 int i; 1428 1429 (void) strcat(pr_reason, "UE "); 1430 1431 spf_flt.flt_type = CPU_UE_ERR; 1432 aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >> 1433 MMU_PAGESHIFT)) ? 1: 0; 1434 1435 /* 1436 * With UE, we have the PA of the fault. 1437 * Let do a diagnostic read to get the ecache 1438 * data and tag info of the bad line for logging. 1439 */ 1440 if (aflt->flt_in_memory) { 1441 uint32_t ec_set_size; 1442 uchar_t state; 1443 uint32_t ecache_idx; 1444 uint64_t faultpa = P2ALIGN(aflt->flt_addr, 64); 1445 1446 /* touch the line to put it in ecache */ 1447 acc_afsr |= read_and_clear_afsr(); 1448 (void) lddphys(faultpa); 1449 acc_afsr |= (read_and_clear_afsr() & 1450 ~(P_AFSR_EDP | P_AFSR_UE)); 1451 1452 ec_set_size = cpunodes[CPU->cpu_id].ecache_size / 1453 ecache_associativity; 1454 1455 for (i = 0; i < ecache_associativity; i++) { 1456 ecache_idx = i * ec_set_size + 1457 (aflt->flt_addr % ec_set_size); 1458 get_ecache_dtag(P2ALIGN(ecache_idx, 64), 1459 (uint64_t *)&spf_flt.flt_ec_data[0], 1460 &spf_flt.flt_ec_tag, &oafsr, &acc_afsr); 1461 acc_afsr |= oafsr; 1462 1463 state = (uchar_t)((spf_flt.flt_ec_tag & 1464 cpu_ec_state_mask) >> cpu_ec_state_shift); 1465 1466 if ((state & cpu_ec_state_valid) && 1467 ((spf_flt.flt_ec_tag & cpu_ec_tag_mask) == 1468 ((uint64_t)aflt->flt_addr >> 1469 cpu_ec_tag_shift))) 1470 break; 1471 } 1472 1473 /* 1474 * Check to see if the ecache tag is valid for the 1475 * fault PA. In the very unlikely event where the 1476 * line could be victimized, no ecache info will be 1477 * available. If this is the case, capture the line 1478 * from memory instead. 1479 */ 1480 if ((state & cpu_ec_state_valid) == 0 || 1481 (spf_flt.flt_ec_tag & cpu_ec_tag_mask) != 1482 ((uint64_t)aflt->flt_addr >> cpu_ec_tag_shift)) { 1483 for (i = 0; i < 8; i++, faultpa += 8) { 1484 ec_data_t *ecdptr; 1485 1486 ecdptr = &spf_flt.flt_ec_data[i]; 1487 acc_afsr |= read_and_clear_afsr(); 1488 ecdptr->ec_d8 = lddphys(faultpa); 1489 acc_afsr |= (read_and_clear_afsr() & 1490 ~(P_AFSR_EDP | P_AFSR_UE)); 1491 ecdptr->ec_afsr = 0; 1492 /* null afsr value */ 1493 } 1494 1495 /* 1496 * Mark tag invalid to indicate mem dump 1497 * when we print out the info. 1498 */ 1499 spf_flt.flt_ec_tag = AFLT_INV_ADDR; 1500 } 1501 spf_flt.flt_ec_lcnt = 1; 1502 1503 /* 1504 * Flush out the bad line 1505 */ 1506 flushecacheline(P2ALIGN(aflt->flt_addr, 64), 1507 cpunodes[CPU->cpu_id].ecache_size); 1508 1509 acc_afsr |= clear_errors(NULL, NULL); 1510 } 1511 1512 /* 1513 * Ask our bus nexus friends if they have any fatal errors. If 1514 * so, they will log appropriate error messages and panic as a 1515 * result. We then queue an event for each UDB that reports a 1516 * UE. Each UE reported in a UDB will have its own log message. 1517 * 1518 * Note from kbn: In the case where there are multiple UEs 1519 * (ME bit is set) - the AFAR address is only accurate to 1520 * the 16-byte granularity. One cannot tell whether the AFAR 1521 * belongs to the UDBH or UDBL syndromes. In this case, we 1522 * always report the AFAR address to be 16-byte aligned. 1523 * 1524 * If we're on a Sabre, there is no SDBL, but it will always 1525 * read as zero, so the sdbl test below will safely fail. 1526 */ 1527 if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL || isus2i || isus2e) 1528 aflt->flt_panic = 1; 1529 1530 if (sdbh & P_DER_UE) { 1531 aflt->flt_synd = sdbh & P_DER_E_SYND; 1532 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE, 1533 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1534 aflt->flt_panic); 1535 } 1536 if (sdbl & P_DER_UE) { 1537 aflt->flt_synd = sdbl & P_DER_E_SYND; 1538 aflt->flt_synd |= UDBL_REG; /* indicates UDBL */ 1539 if (!(aflt->flt_stat & P_AFSR_ME)) 1540 aflt->flt_addr |= 0x8; 1541 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE, 1542 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1543 aflt->flt_panic); 1544 } 1545 1546 /* 1547 * We got a UE and are panicking, save the fault PA in a known 1548 * location so that the platform specific panic code can check 1549 * for copyback errors. 1550 */ 1551 if (aflt->flt_panic && aflt->flt_in_memory) { 1552 panic_aflt = *aflt; 1553 } 1554 } 1555 1556 /* 1557 * Handle EDP and LDP: Locate the line with bad parity and enqueue an 1558 * async error for logging. For Sabre, we panic on EDP or LDP. 1559 */ 1560 if (t_afsr & (P_AFSR_EDP | P_AFSR_LDP)) { 1561 spf_flt.flt_type = CPU_EDP_LDP_ERR; 1562 1563 if (t_afsr & P_AFSR_EDP) 1564 (void) strcat(pr_reason, "EDP "); 1565 1566 if (t_afsr & P_AFSR_LDP) 1567 (void) strcat(pr_reason, "LDP "); 1568 1569 /* 1570 * Here we have no PA to work with. 1571 * Scan each line in the ecache to look for 1572 * the one with bad parity. 1573 */ 1574 aflt->flt_addr = AFLT_INV_ADDR; 1575 scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0], 1576 &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr); 1577 acc_afsr |= (oafsr & ~P_AFSR_WP); 1578 1579 /* 1580 * If we found a bad PA, update the state to indicate if it is 1581 * memory or I/O space. This code will be important if we ever 1582 * support cacheable frame buffers. 1583 */ 1584 if (aflt->flt_addr != AFLT_INV_ADDR) { 1585 aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >> 1586 MMU_PAGESHIFT)) ? 1 : 0; 1587 } 1588 1589 if (isus2i || isus2e) 1590 aflt->flt_panic = 1; 1591 1592 cpu_errorq_dispatch((t_afsr & P_AFSR_EDP) ? 1593 FM_EREPORT_CPU_USII_EDP : FM_EREPORT_CPU_USII_LDP, 1594 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1595 aflt->flt_panic); 1596 } 1597 1598 /* 1599 * Timeout and bus error handling. There are two cases to consider: 1600 * 1601 * (1) If we are in the kernel protected by ddi_peek or ddi_poke,we 1602 * have already modified the saved registers so that we will return 1603 * from the trap to the appropriate trampoline routine; otherwise panic. 1604 * 1605 * (2) In user mode, we can simply use our AST mechanism to deliver 1606 * a SIGBUS. We do not log the occurence - processes performing 1607 * device control would generate lots of uninteresting messages. 1608 */ 1609 if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) { 1610 if (t_afsr & P_AFSR_TO) 1611 (void) strcat(pr_reason, "BTO "); 1612 1613 if (t_afsr & P_AFSR_BERR) 1614 (void) strcat(pr_reason, "BERR "); 1615 1616 spf_flt.flt_type = CPU_BTO_BERR_ERR; 1617 if (aflt->flt_priv && aflt->flt_prot == AFLT_PROT_NONE) { 1618 cpu_errorq_dispatch((t_afsr & P_AFSR_TO) ? 1619 FM_EREPORT_CPU_USII_TO : FM_EREPORT_CPU_USII_BERR, 1620 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1621 aflt->flt_panic); 1622 } 1623 } 1624 1625 /* 1626 * Handle WP: WP happens when the ecache is victimized and a parity 1627 * error was detected on a writeback. The data in question will be 1628 * poisoned as a UE will be written back. The PA is not logged and 1629 * it is possible that it doesn't belong to the trapped thread. The 1630 * WP trap is not fatal, but it could be fatal to someone that 1631 * subsequently accesses the toxic page. We set read_all_memscrub 1632 * to force the memscrubber to read all of memory when it awakens. 1633 * For Sabre/Hummingbird, WP is fatal because the HW doesn't write a 1634 * UE back to poison the data. 1635 */ 1636 if (t_afsr & P_AFSR_WP) { 1637 (void) strcat(pr_reason, "WP "); 1638 if (isus2i || isus2e) { 1639 aflt->flt_panic = 1; 1640 } else { 1641 read_all_memscrub = 1; 1642 } 1643 spf_flt.flt_type = CPU_WP_ERR; 1644 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_WP, 1645 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1646 aflt->flt_panic); 1647 } 1648 1649 /* 1650 * Handle trapping CP error: In Sabre/Hummingbird, parity error in 1651 * the ecache on a copyout due to a PCI DMA read is signaled as a CP. 1652 * This is fatal. 1653 */ 1654 1655 if (t_afsr & P_AFSR_CP) { 1656 if (isus2i || isus2e) { 1657 (void) strcat(pr_reason, "CP "); 1658 aflt->flt_panic = 1; 1659 spf_flt.flt_type = CPU_TRAPPING_CP_ERR; 1660 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP, 1661 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1662 aflt->flt_panic); 1663 } else { 1664 /* 1665 * Orphan CP: Happens due to signal integrity problem 1666 * on a CPU, where a CP is reported, without reporting 1667 * its associated UE. This is handled by locating the 1668 * bad parity line and would kick off the memscrubber 1669 * to find the UE if in memory or in another's cache. 1670 */ 1671 spf_flt.flt_type = CPU_ORPHAN_CP_ERR; 1672 (void) strcat(pr_reason, "ORPHAN_CP "); 1673 1674 /* 1675 * Here we have no PA to work with. 1676 * Scan each line in the ecache to look for 1677 * the one with bad parity. 1678 */ 1679 aflt->flt_addr = AFLT_INV_ADDR; 1680 scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0], 1681 &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, 1682 &oafsr); 1683 acc_afsr |= oafsr; 1684 1685 /* 1686 * If we found a bad PA, update the state to indicate 1687 * if it is memory or I/O space. 1688 */ 1689 if (aflt->flt_addr != AFLT_INV_ADDR) { 1690 aflt->flt_in_memory = 1691 (pf_is_memory(aflt->flt_addr >> 1692 MMU_PAGESHIFT)) ? 1 : 0; 1693 } 1694 read_all_memscrub = 1; 1695 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP, 1696 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1697 aflt->flt_panic); 1698 1699 } 1700 } 1701 1702 /* 1703 * If we queued an error other than WP or CP and we are going to return 1704 * from the trap and the error was in user mode or inside of a 1705 * copy routine, set AST flag so the queue will be drained before 1706 * returning to user mode. 1707 * 1708 * For UE/LDP/EDP, the AST processing will SIGKILL the process 1709 * and send an event to its process contract. 1710 * 1711 * For BERR/BTO, the AST processing will SIGBUS the process. There 1712 * will have been no error queued in this case. 1713 */ 1714 if ((t_afsr & 1715 (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP | P_AFSR_BERR | P_AFSR_TO)) && 1716 (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY)) { 1717 int pcb_flag = 0; 1718 1719 if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP)) 1720 pcb_flag |= ASYNC_HWERR; 1721 1722 if (t_afsr & P_AFSR_BERR) 1723 pcb_flag |= ASYNC_BERR; 1724 1725 if (t_afsr & P_AFSR_TO) 1726 pcb_flag |= ASYNC_BTO; 1727 1728 ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag; 1729 aston(curthread); 1730 action = ACTION_AST_FLAGS; 1731 } 1732 1733 /* 1734 * In response to a deferred error, we must do one of three things: 1735 * (1) set the AST flags, (2) trampoline, or (3) panic. action is 1736 * set in cases (1) and (2) - check that either action is set or 1737 * (3) is true. 1738 * 1739 * On II, the WP writes poisoned data back to memory, which will 1740 * cause a UE and a panic or reboot when read. In this case, we 1741 * don't need to panic at this time. On IIi and IIe, 1742 * aflt->flt_panic is already set above. 1743 */ 1744 ASSERT((aflt->flt_panic != 0) || (action != ACTION_NONE) || 1745 (t_afsr & P_AFSR_WP)); 1746 1747 /* 1748 * Make a final sanity check to make sure we did not get any more async 1749 * errors and accumulate the afsr. 1750 */ 1751 flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2, 1752 cpunodes[CPU->cpu_id].ecache_linesize); 1753 (void) clear_errors(&spf_flt, NULL); 1754 1755 /* 1756 * Take care of a special case: If there is a UE in the ecache flush 1757 * area, we'll see it in flush_ecache(). This will trigger the 1758 * CPU_ADDITIONAL_ERRORS case below. 1759 * 1760 * This could occur if the original error was a UE in the flush area, 1761 * or if the original error was an E$ error that was flushed out of 1762 * the E$ in scan_ecache(). 1763 * 1764 * If it's at the same address that we're already logging, then it's 1765 * probably one of these cases. Clear the bit so we don't trip over 1766 * it on the additional errors case, which could cause an unnecessary 1767 * panic. 1768 */ 1769 if ((aflt->flt_stat & P_AFSR_UE) && aflt->flt_addr == t_afar) 1770 acc_afsr |= aflt->flt_stat & ~P_AFSR_UE; 1771 else 1772 acc_afsr |= aflt->flt_stat; 1773 1774 /* 1775 * Check the acumulated afsr for the important bits. 1776 * Make sure the spf_flt.flt_type value is set, and 1777 * enque an error. 1778 */ 1779 if (acc_afsr & 1780 (P_AFSR_LEVEL1 | P_AFSR_IVUE | P_AFSR_ETP | P_AFSR_ISAP)) { 1781 if (acc_afsr & (P_AFSR_UE | P_AFSR_EDP | P_AFSR_LDP | 1782 P_AFSR_BERR | P_AFSR_TO | P_AFSR_IVUE | P_AFSR_ETP | 1783 P_AFSR_ISAP)) 1784 aflt->flt_panic = 1; 1785 1786 spf_flt.flt_type = CPU_ADDITIONAL_ERR; 1787 aflt->flt_stat = acc_afsr; 1788 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UNKNOWN, 1789 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1790 aflt->flt_panic); 1791 } 1792 1793 /* 1794 * If aflt->flt_panic is set at this point, we need to panic as the 1795 * result of a trap at TL > 0, or an error we determined to be fatal. 1796 * We've already enqueued the error in one of the if-clauses above, 1797 * and it will be dequeued and logged as part of the panic flow. 1798 */ 1799 if (aflt->flt_panic) { 1800 cpu_aflt_log(CE_PANIC, 1, &spf_flt, CPU_ERRID_FIRST, 1801 "See previous message(s) for details", " %sError(s)", 1802 pr_reason); 1803 } 1804 1805 /* 1806 * Before returning, we must re-enable errors, and 1807 * reset the caches to their boot-up state. 1808 */ 1809 set_lsu(get_lsu() | cache_boot_state); 1810 set_error_enable(EER_ENABLE); 1811 } 1812 1813 /* 1814 * Check for miscellaneous fatal errors and call CE_PANIC if any are seen. 1815 * This routine is shared by the CE and UE handling code. 1816 */ 1817 static void 1818 check_misc_err(spitf_async_flt *spf_flt) 1819 { 1820 struct async_flt *aflt = (struct async_flt *)spf_flt; 1821 char *fatal_str = NULL; 1822 1823 /* 1824 * The ISAP and ETP errors are supposed to cause a POR 1825 * from the system, so in theory we never, ever see these messages. 1826 * ISAP, ETP and IVUE are considered to be fatal. 1827 */ 1828 if (aflt->flt_stat & P_AFSR_ISAP) 1829 fatal_str = " System Address Parity Error on"; 1830 else if (aflt->flt_stat & P_AFSR_ETP) 1831 fatal_str = " Ecache Tag Parity Error on"; 1832 else if (aflt->flt_stat & P_AFSR_IVUE) 1833 fatal_str = " Interrupt Vector Uncorrectable Error on"; 1834 if (fatal_str != NULL) { 1835 cpu_aflt_log(CE_PANIC, 1, spf_flt, CMN_LFLAGS, 1836 NULL, fatal_str); 1837 } 1838 } 1839 1840 /* 1841 * Routine to convert a syndrome into a syndrome code. 1842 */ 1843 static int 1844 synd_to_synd_code(int synd_status, ushort_t synd) 1845 { 1846 if (synd_status != AFLT_STAT_VALID) 1847 return (-1); 1848 1849 /* 1850 * Use the 8-bit syndrome to index the ecc_syndrome_tab 1851 * to get the code indicating which bit(s) is(are) bad. 1852 */ 1853 if ((synd == 0) || (synd >= SYND_TBL_SIZE)) 1854 return (-1); 1855 else 1856 return (ecc_syndrome_tab[synd]); 1857 } 1858 1859 /* ARGSUSED */ 1860 int 1861 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp) 1862 { 1863 return (ENOTSUP); 1864 } 1865 1866 /* ARGSUSED */ 1867 int 1868 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp) 1869 { 1870 return (ENOTSUP); 1871 } 1872 1873 /* ARGSUSED */ 1874 int 1875 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp) 1876 { 1877 return (ENOTSUP); 1878 } 1879 1880 /* 1881 * Routine to return a string identifying the physical name 1882 * associated with a memory/cache error. 1883 */ 1884 /* ARGSUSED */ 1885 int 1886 cpu_get_mem_unum(int synd_status, ushort_t synd, uint64_t afsr, 1887 uint64_t afar, int cpuid, int flt_in_memory, ushort_t flt_status, 1888 char *buf, int buflen, int *lenp) 1889 { 1890 short synd_code; 1891 int ret; 1892 1893 if (flt_in_memory) { 1894 synd_code = synd_to_synd_code(synd_status, synd); 1895 if (synd_code == -1) { 1896 ret = EINVAL; 1897 } else if (prom_get_unum(synd_code, P2ALIGN(afar, 8), 1898 buf, buflen, lenp) != 0) { 1899 ret = EIO; 1900 } else if (*lenp <= 1) { 1901 ret = EINVAL; 1902 } else { 1903 ret = 0; 1904 } 1905 } else { 1906 ret = ENOTSUP; 1907 } 1908 1909 if (ret != 0) { 1910 buf[0] = '\0'; 1911 *lenp = 0; 1912 } 1913 1914 return (ret); 1915 } 1916 1917 /* 1918 * Wrapper for cpu_get_mem_unum() routine that takes an 1919 * async_flt struct rather than explicit arguments. 1920 */ 1921 int 1922 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt, 1923 char *buf, int buflen, int *lenp) 1924 { 1925 return (cpu_get_mem_unum(synd_status, SYND(aflt->flt_synd), 1926 aflt->flt_stat, aflt->flt_addr, aflt->flt_bus_id, 1927 aflt->flt_in_memory, aflt->flt_status, buf, buflen, lenp)); 1928 } 1929 1930 /* 1931 * This routine is a more generic interface to cpu_get_mem_unum(), 1932 * that may be used by other modules (e.g. mm). 1933 */ 1934 int 1935 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar, 1936 char *buf, int buflen, int *lenp) 1937 { 1938 int synd_status, flt_in_memory, ret; 1939 char unum[UNUM_NAMLEN]; 1940 1941 /* 1942 * Check for an invalid address. 1943 */ 1944 if (afar == (uint64_t)-1) 1945 return (ENXIO); 1946 1947 if (synd == (uint64_t)-1) 1948 synd_status = AFLT_STAT_INVALID; 1949 else 1950 synd_status = AFLT_STAT_VALID; 1951 1952 flt_in_memory = (pf_is_memory(afar >> MMU_PAGESHIFT)) ? 1 : 0; 1953 1954 if ((ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar, 1955 CPU->cpu_id, flt_in_memory, 0, unum, UNUM_NAMLEN, lenp)) 1956 != 0) 1957 return (ret); 1958 1959 if (*lenp >= buflen) 1960 return (ENAMETOOLONG); 1961 1962 (void) strncpy(buf, unum, buflen); 1963 1964 return (0); 1965 } 1966 1967 /* 1968 * Routine to return memory information associated 1969 * with a physical address and syndrome. 1970 */ 1971 /* ARGSUSED */ 1972 int 1973 cpu_get_mem_info(uint64_t synd, uint64_t afar, 1974 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 1975 int *segsp, int *banksp, int *mcidp) 1976 { 1977 return (ENOTSUP); 1978 } 1979 1980 /* 1981 * Routine to return a string identifying the physical 1982 * name associated with a cpuid. 1983 */ 1984 /* ARGSUSED */ 1985 int 1986 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp) 1987 { 1988 return (ENOTSUP); 1989 } 1990 1991 /* 1992 * This routine returns the size of the kernel's FRU name buffer. 1993 */ 1994 size_t 1995 cpu_get_name_bufsize() 1996 { 1997 return (UNUM_NAMLEN); 1998 } 1999 2000 /* 2001 * Cpu specific log func for UEs. 2002 */ 2003 static void 2004 log_ue_err(struct async_flt *aflt, char *unum) 2005 { 2006 spitf_async_flt *spf_flt = (spitf_async_flt *)aflt; 2007 int len = 0; 2008 2009 #ifdef DEBUG 2010 int afsr_priv = (aflt->flt_stat & P_AFSR_PRIV) ? 1 : 0; 2011 2012 /* 2013 * Paranoid Check for priv mismatch 2014 * Only applicable for UEs 2015 */ 2016 if (afsr_priv != aflt->flt_priv) { 2017 /* 2018 * The priv bits in %tstate and %afsr did not match; we expect 2019 * this to be very rare, so flag it with a message. 2020 */ 2021 cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST, NULL, 2022 ": PRIV bit in TSTATE and AFSR mismatched; " 2023 "TSTATE.PRIV=%d used", (aflt->flt_priv) ? 1 : 0); 2024 2025 /* update saved afsr to reflect the correct priv */ 2026 aflt->flt_stat &= ~P_AFSR_PRIV; 2027 if (aflt->flt_priv) 2028 aflt->flt_stat |= P_AFSR_PRIV; 2029 } 2030 #endif /* DEBUG */ 2031 2032 (void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt, unum, 2033 UNUM_NAMLEN, &len); 2034 2035 cpu_aflt_log(CE_WARN, 1, spf_flt, UE_LFLAGS, unum, 2036 " Uncorrectable Memory Error on"); 2037 2038 if (SYND(aflt->flt_synd) == 0x3) { 2039 cpu_aflt_log(CE_WARN, 1, spf_flt, CPU_ERRID_FIRST, NULL, 2040 " Syndrome 0x3 indicates that this may not be a " 2041 "memory module problem"); 2042 } 2043 2044 if (aflt->flt_in_memory) 2045 cpu_log_ecmem_info(spf_flt); 2046 } 2047 2048 2049 /* 2050 * The cpu_async_log_err() function is called via the ue_drain() function to 2051 * handle logging for CPU events that are dequeued. As such, it can be invoked 2052 * from softint context, from AST processing in the trap() flow, or from the 2053 * panic flow. We decode the CPU-specific data, and log appropriate messages. 2054 */ 2055 static void 2056 cpu_async_log_err(void *flt) 2057 { 2058 spitf_async_flt *spf_flt = (spitf_async_flt *)flt; 2059 struct async_flt *aflt = (struct async_flt *)flt; 2060 char unum[UNUM_NAMLEN]; 2061 char *space; 2062 char *ecache_scrub_logstr = NULL; 2063 2064 switch (spf_flt->flt_type) { 2065 case CPU_UE_ERR: 2066 /* 2067 * We want to skip logging only if ALL the following 2068 * conditions are true: 2069 * 2070 * 1. We are not panicking 2071 * 2. There is only one error 2072 * 3. That error is a memory error 2073 * 4. The error is caused by the memory scrubber (in 2074 * which case the error will have occurred under 2075 * on_trap protection) 2076 * 5. The error is on a retired page 2077 * 2078 * Note 1: AFLT_PROT_EC is used places other than the memory 2079 * scrubber. However, none of those errors should occur 2080 * on a retired page. 2081 * 2082 * Note 2: In the CE case, these errors are discarded before 2083 * the errorq. In the UE case, we must wait until now -- 2084 * softcall() grabs a mutex, which we can't do at a high PIL. 2085 */ 2086 if (!panicstr && 2087 (aflt->flt_stat & S_AFSR_ALL_ERRS) == P_AFSR_UE && 2088 aflt->flt_prot == AFLT_PROT_EC) { 2089 if (page_retire_check(aflt->flt_addr, NULL) == 0) { 2090 /* Zero the address to clear the error */ 2091 softcall(ecc_page_zero, (void *)aflt->flt_addr); 2092 return; 2093 } 2094 } 2095 2096 /* 2097 * Log the UE and check for causes of this UE error that 2098 * don't cause a trap (Copyback error). cpu_async_error() 2099 * has already checked the i/o buses for us. 2100 */ 2101 log_ue_err(aflt, unum); 2102 if (aflt->flt_in_memory) 2103 cpu_check_allcpus(aflt); 2104 break; 2105 2106 case CPU_EDP_LDP_ERR: 2107 if (aflt->flt_stat & P_AFSR_EDP) 2108 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, 2109 NULL, " EDP event on"); 2110 2111 if (aflt->flt_stat & P_AFSR_LDP) 2112 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, 2113 NULL, " LDP event on"); 2114 2115 /* Log ecache info if exist */ 2116 if (spf_flt->flt_ec_lcnt > 0) { 2117 cpu_log_ecmem_info(spf_flt); 2118 2119 cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, 2120 NULL, " AFAR was derived from E$Tag"); 2121 } else { 2122 cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, 2123 NULL, " No error found in ecache (No fault " 2124 "PA available)"); 2125 } 2126 break; 2127 2128 case CPU_WP_ERR: 2129 /* 2130 * If the memscrub thread hasn't yet read 2131 * all of memory, as we requested in the 2132 * trap handler, then give it a kick to 2133 * make sure it does. 2134 */ 2135 if (!isus2i && !isus2e && read_all_memscrub) 2136 memscrub_run(); 2137 2138 cpu_aflt_log(CE_WARN, 1, spf_flt, WP_LFLAGS, NULL, 2139 " WP event on"); 2140 return; 2141 2142 case CPU_BTO_BERR_ERR: 2143 /* 2144 * A bus timeout or error occurred that was in user mode or not 2145 * in a protected kernel code region. 2146 */ 2147 if (aflt->flt_stat & P_AFSR_BERR) { 2148 cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2, 2149 spf_flt, BERRTO_LFLAGS, NULL, 2150 " Bus Error on System Bus in %s mode from", 2151 aflt->flt_priv ? "privileged" : "user"); 2152 } 2153 2154 if (aflt->flt_stat & P_AFSR_TO) { 2155 cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2, 2156 spf_flt, BERRTO_LFLAGS, NULL, 2157 " Timeout on System Bus in %s mode from", 2158 aflt->flt_priv ? "privileged" : "user"); 2159 } 2160 2161 return; 2162 2163 case CPU_PANIC_CP_ERR: 2164 /* 2165 * Process the Copyback (CP) error info (if any) obtained from 2166 * polling all the cpus in the panic flow. This case is only 2167 * entered if we are panicking. 2168 */ 2169 ASSERT(panicstr != NULL); 2170 ASSERT(aflt->flt_id == panic_aflt.flt_id); 2171 2172 /* See which space - this info may not exist */ 2173 if (panic_aflt.flt_status & ECC_D_TRAP) 2174 space = "Data "; 2175 else if (panic_aflt.flt_status & ECC_I_TRAP) 2176 space = "Instruction "; 2177 else 2178 space = ""; 2179 2180 cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL, 2181 " AFAR was derived from UE report," 2182 " CP event on CPU%d (caused %saccess error on %s%d)", 2183 aflt->flt_inst, space, (panic_aflt.flt_status & ECC_IOBUS) ? 2184 "IOBUS" : "CPU", panic_aflt.flt_bus_id); 2185 2186 if (spf_flt->flt_ec_lcnt > 0) 2187 cpu_log_ecmem_info(spf_flt); 2188 else 2189 cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST, 2190 NULL, " No cache dump available"); 2191 2192 return; 2193 2194 case CPU_TRAPPING_CP_ERR: 2195 /* 2196 * For sabre only. This is a copyback ecache parity error due 2197 * to a PCI DMA read. We should be panicking if we get here. 2198 */ 2199 ASSERT(panicstr != NULL); 2200 cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL, 2201 " AFAR was derived from UE report," 2202 " CP event on CPU%d (caused Data access error " 2203 "on PCIBus)", aflt->flt_inst); 2204 return; 2205 2206 /* 2207 * We log the ecache lines of the following states, 2208 * clean_bad_idle, clean_bad_busy, dirty_bad_idle and 2209 * dirty_bad_busy if ecache_scrub_verbose is set and panic 2210 * in addition to logging if ecache_scrub_panic is set. 2211 */ 2212 case CPU_BADLINE_CI_ERR: 2213 ecache_scrub_logstr = "CBI"; 2214 /* FALLTHRU */ 2215 2216 case CPU_BADLINE_CB_ERR: 2217 if (ecache_scrub_logstr == NULL) 2218 ecache_scrub_logstr = "CBB"; 2219 /* FALLTHRU */ 2220 2221 case CPU_BADLINE_DI_ERR: 2222 if (ecache_scrub_logstr == NULL) 2223 ecache_scrub_logstr = "DBI"; 2224 /* FALLTHRU */ 2225 2226 case CPU_BADLINE_DB_ERR: 2227 if (ecache_scrub_logstr == NULL) 2228 ecache_scrub_logstr = "DBB"; 2229 2230 cpu_aflt_log(CE_NOTE, 2, spf_flt, 2231 (CPU_ERRID_FIRST | CPU_FLTCPU), NULL, 2232 " %s event on", ecache_scrub_logstr); 2233 cpu_log_ecmem_info(spf_flt); 2234 2235 return; 2236 2237 case CPU_ORPHAN_CP_ERR: 2238 /* 2239 * Orphan CPs, where the CP bit is set, but when a CPU 2240 * doesn't report a UE. 2241 */ 2242 if (read_all_memscrub) 2243 memscrub_run(); 2244 2245 cpu_aflt_log(CE_NOTE, 2, spf_flt, (CP_LFLAGS | CPU_FLTCPU), 2246 NULL, " Orphan CP event on"); 2247 2248 /* Log ecache info if exist */ 2249 if (spf_flt->flt_ec_lcnt > 0) 2250 cpu_log_ecmem_info(spf_flt); 2251 else 2252 cpu_aflt_log(CE_NOTE, 2, spf_flt, 2253 (CP_LFLAGS | CPU_FLTCPU), NULL, 2254 " No error found in ecache (No fault " 2255 "PA available"); 2256 return; 2257 2258 case CPU_ECACHE_ADDR_PAR_ERR: 2259 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 2260 " E$ Tag Address Parity error on"); 2261 cpu_log_ecmem_info(spf_flt); 2262 return; 2263 2264 case CPU_ECACHE_STATE_ERR: 2265 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 2266 " E$ Tag State Parity error on"); 2267 cpu_log_ecmem_info(spf_flt); 2268 return; 2269 2270 case CPU_ECACHE_TAG_ERR: 2271 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 2272 " E$ Tag scrub event on"); 2273 cpu_log_ecmem_info(spf_flt); 2274 return; 2275 2276 case CPU_ECACHE_ETP_ETS_ERR: 2277 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 2278 " AFSR.ETP is set and AFSR.ETS is zero on"); 2279 cpu_log_ecmem_info(spf_flt); 2280 return; 2281 2282 2283 case CPU_ADDITIONAL_ERR: 2284 cpu_aflt_log(CE_WARN, 1, spf_flt, CMN_LFLAGS & ~CPU_SPACE, NULL, 2285 " Additional errors detected during error processing on"); 2286 return; 2287 2288 default: 2289 cmn_err(CE_WARN, "cpu_async_log_err: fault %p has unknown " 2290 "fault type %x", (void *)spf_flt, spf_flt->flt_type); 2291 return; 2292 } 2293 2294 /* ... fall through from the UE, EDP, or LDP cases */ 2295 2296 if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) { 2297 if (!panicstr) { 2298 (void) page_retire(aflt->flt_addr, PR_UE); 2299 } else { 2300 /* 2301 * Clear UEs on panic so that we don't 2302 * get haunted by them during panic or 2303 * after reboot 2304 */ 2305 clearphys(P2ALIGN(aflt->flt_addr, 64), 2306 cpunodes[CPU->cpu_id].ecache_size, 2307 cpunodes[CPU->cpu_id].ecache_linesize); 2308 2309 (void) clear_errors(NULL, NULL); 2310 } 2311 } 2312 2313 /* 2314 * Log final recover message 2315 */ 2316 if (!panicstr) { 2317 if (!aflt->flt_priv) { 2318 cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, 2319 NULL, " Above Error is in User Mode" 2320 "\n and is fatal: " 2321 "will SIGKILL process and notify contract"); 2322 } else if (aflt->flt_prot == AFLT_PROT_COPY && aflt->flt_core) { 2323 cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, 2324 NULL, " Above Error detected while dumping core;" 2325 "\n core file will be truncated"); 2326 } else if (aflt->flt_prot == AFLT_PROT_COPY) { 2327 cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, 2328 NULL, " Above Error is due to Kernel access" 2329 "\n to User space and is fatal: " 2330 "will SIGKILL process and notify contract"); 2331 } else if (aflt->flt_prot == AFLT_PROT_EC) { 2332 cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, NULL, 2333 " Above Error detected by protected Kernel code" 2334 "\n that will try to clear error from system"); 2335 } 2336 } 2337 } 2338 2339 2340 /* 2341 * Check all cpus for non-trapping UE-causing errors 2342 * In Ultra I/II, we look for copyback errors (CPs) 2343 */ 2344 void 2345 cpu_check_allcpus(struct async_flt *aflt) 2346 { 2347 spitf_async_flt cp; 2348 spitf_async_flt *spf_cpflt = &cp; 2349 struct async_flt *cpflt = (struct async_flt *)&cp; 2350 int pix; 2351 2352 cpflt->flt_id = aflt->flt_id; 2353 cpflt->flt_addr = aflt->flt_addr; 2354 2355 for (pix = 0; pix < NCPU; pix++) { 2356 if (CPU_XCALL_READY(pix)) { 2357 xc_one(pix, (xcfunc_t *)get_cpu_status, 2358 (uint64_t)cpflt, 0); 2359 2360 if (cpflt->flt_stat & P_AFSR_CP) { 2361 char *space; 2362 2363 /* See which space - this info may not exist */ 2364 if (aflt->flt_status & ECC_D_TRAP) 2365 space = "Data "; 2366 else if (aflt->flt_status & ECC_I_TRAP) 2367 space = "Instruction "; 2368 else 2369 space = ""; 2370 2371 cpu_aflt_log(CE_WARN, 1, spf_cpflt, CP_LFLAGS, 2372 NULL, " AFAR was derived from UE report," 2373 " CP event on CPU%d (caused %saccess " 2374 "error on %s%d)", pix, space, 2375 (aflt->flt_status & ECC_IOBUS) ? 2376 "IOBUS" : "CPU", aflt->flt_bus_id); 2377 2378 if (spf_cpflt->flt_ec_lcnt > 0) 2379 cpu_log_ecmem_info(spf_cpflt); 2380 else 2381 cpu_aflt_log(CE_WARN, 2, spf_cpflt, 2382 CPU_ERRID_FIRST, NULL, 2383 " No cache dump available"); 2384 } 2385 } 2386 } 2387 } 2388 2389 #ifdef DEBUG 2390 int test_mp_cp = 0; 2391 #endif 2392 2393 /* 2394 * Cross-call callback routine to tell a CPU to read its own %afsr to check 2395 * for copyback errors and capture relevant information. 2396 */ 2397 static uint_t 2398 get_cpu_status(uint64_t arg) 2399 { 2400 struct async_flt *aflt = (struct async_flt *)arg; 2401 spitf_async_flt *spf_flt = (spitf_async_flt *)arg; 2402 uint64_t afsr; 2403 uint32_t ec_idx; 2404 uint64_t sdbh, sdbl; 2405 int i; 2406 uint32_t ec_set_size; 2407 uchar_t valid; 2408 ec_data_t ec_data[8]; 2409 uint64_t ec_tag, flt_addr_tag, oafsr; 2410 uint64_t *acc_afsr = NULL; 2411 2412 get_asyncflt(&afsr); 2413 if (CPU_PRIVATE(CPU) != NULL) { 2414 acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 2415 afsr |= *acc_afsr; 2416 *acc_afsr = 0; 2417 } 2418 2419 #ifdef DEBUG 2420 if (test_mp_cp) 2421 afsr |= P_AFSR_CP; 2422 #endif 2423 aflt->flt_stat = afsr; 2424 2425 if (afsr & P_AFSR_CP) { 2426 /* 2427 * Capture the UDBs 2428 */ 2429 get_udb_errors(&sdbh, &sdbl); 2430 spf_flt->flt_sdbh = (ushort_t)(sdbh & 0x3FF); 2431 spf_flt->flt_sdbl = (ushort_t)(sdbl & 0x3FF); 2432 2433 /* 2434 * Clear CP bit before capturing ecache data 2435 * and AFSR info. 2436 */ 2437 set_asyncflt(P_AFSR_CP); 2438 2439 /* 2440 * See if we can capture the ecache line for the 2441 * fault PA. 2442 * 2443 * Return a valid matching ecache line, if any. 2444 * Otherwise, return the first matching ecache 2445 * line marked invalid. 2446 */ 2447 flt_addr_tag = aflt->flt_addr >> cpu_ec_tag_shift; 2448 ec_set_size = cpunodes[CPU->cpu_id].ecache_size / 2449 ecache_associativity; 2450 spf_flt->flt_ec_lcnt = 0; 2451 2452 for (i = 0, ec_idx = (aflt->flt_addr % ec_set_size); 2453 i < ecache_associativity; i++, ec_idx += ec_set_size) { 2454 get_ecache_dtag(P2ALIGN(ec_idx, 64), 2455 (uint64_t *)&ec_data[0], &ec_tag, &oafsr, 2456 acc_afsr); 2457 2458 if ((ec_tag & cpu_ec_tag_mask) != flt_addr_tag) 2459 continue; 2460 2461 valid = cpu_ec_state_valid & 2462 (uchar_t)((ec_tag & cpu_ec_state_mask) >> 2463 cpu_ec_state_shift); 2464 2465 if (valid || spf_flt->flt_ec_lcnt == 0) { 2466 spf_flt->flt_ec_tag = ec_tag; 2467 bcopy(&ec_data, &spf_flt->flt_ec_data, 2468 sizeof (ec_data)); 2469 spf_flt->flt_ec_lcnt = 1; 2470 2471 if (valid) 2472 break; 2473 } 2474 } 2475 } 2476 return (0); 2477 } 2478 2479 /* 2480 * CPU-module callback for the non-panicking CPUs. This routine is invoked 2481 * from panic_idle() as part of the other CPUs stopping themselves when a 2482 * panic occurs. We need to be VERY careful what we do here, since panicstr 2483 * is NOT set yet and we cannot blow through locks. If panic_aflt is set 2484 * (panic_aflt.flt_id is non-zero), we need to read our %afsr to look for 2485 * CP error information. 2486 */ 2487 void 2488 cpu_async_panic_callb(void) 2489 { 2490 spitf_async_flt cp; 2491 struct async_flt *aflt = (struct async_flt *)&cp; 2492 uint64_t *scrub_afsr; 2493 2494 if (panic_aflt.flt_id != 0) { 2495 aflt->flt_addr = panic_aflt.flt_addr; 2496 (void) get_cpu_status((uint64_t)aflt); 2497 2498 if (CPU_PRIVATE(CPU) != NULL) { 2499 scrub_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 2500 if (*scrub_afsr & P_AFSR_CP) { 2501 aflt->flt_stat |= *scrub_afsr; 2502 *scrub_afsr = 0; 2503 } 2504 } 2505 if (aflt->flt_stat & P_AFSR_CP) { 2506 aflt->flt_id = panic_aflt.flt_id; 2507 aflt->flt_panic = 1; 2508 aflt->flt_inst = CPU->cpu_id; 2509 aflt->flt_class = CPU_FAULT; 2510 cp.flt_type = CPU_PANIC_CP_ERR; 2511 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP, 2512 (void *)&cp, sizeof (cp), ue_queue, 2513 aflt->flt_panic); 2514 } 2515 } 2516 } 2517 2518 /* 2519 * Turn off all cpu error detection, normally only used for panics. 2520 */ 2521 void 2522 cpu_disable_errors(void) 2523 { 2524 xt_all(set_error_enable_tl1, EER_DISABLE, EER_SET_ABSOLUTE); 2525 } 2526 2527 /* 2528 * Enable errors. 2529 */ 2530 void 2531 cpu_enable_errors(void) 2532 { 2533 xt_all(set_error_enable_tl1, EER_ENABLE, EER_SET_ABSOLUTE); 2534 } 2535 2536 static void 2537 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err) 2538 { 2539 uint64_t aligned_addr = P2ALIGN(ecc->flt_addr, 8); 2540 int i, loop = 1; 2541 ushort_t ecc_0; 2542 uint64_t paddr; 2543 uint64_t data; 2544 2545 if (verbose) 2546 loop = 8; 2547 for (i = 0; i < loop; i++) { 2548 paddr = aligned_addr + (i * 8); 2549 data = lddphys(paddr); 2550 if (verbose) { 2551 if (ce_err) { 2552 ecc_0 = ecc_gen((uint32_t)(data>>32), 2553 (uint32_t)data); 2554 cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS, 2555 NULL, " Paddr 0x%" PRIx64 ", " 2556 "Data 0x%08x.%08x, ECC 0x%x", paddr, 2557 (uint32_t)(data>>32), (uint32_t)data, ecc_0); 2558 } else { 2559 cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS, 2560 NULL, " Paddr 0x%" PRIx64 ", " 2561 "Data 0x%08x.%08x", paddr, 2562 (uint32_t)(data>>32), (uint32_t)data); 2563 } 2564 } 2565 } 2566 } 2567 2568 static struct { /* sec-ded-s4ed ecc code */ 2569 uint_t hi, lo; 2570 } ecc_code[8] = { 2571 { 0xee55de23U, 0x16161161U }, 2572 { 0x55eede93U, 0x61612212U }, 2573 { 0xbb557b8cU, 0x49494494U }, 2574 { 0x55bb7b6cU, 0x94948848U }, 2575 { 0x16161161U, 0xee55de23U }, 2576 { 0x61612212U, 0x55eede93U }, 2577 { 0x49494494U, 0xbb557b8cU }, 2578 { 0x94948848U, 0x55bb7b6cU } 2579 }; 2580 2581 static ushort_t 2582 ecc_gen(uint_t high_bytes, uint_t low_bytes) 2583 { 2584 int i, j; 2585 uchar_t checker, bit_mask; 2586 struct { 2587 uint_t hi, lo; 2588 } hex_data, masked_data[8]; 2589 2590 hex_data.hi = high_bytes; 2591 hex_data.lo = low_bytes; 2592 2593 /* mask out bits according to sec-ded-s4ed ecc code */ 2594 for (i = 0; i < 8; i++) { 2595 masked_data[i].hi = hex_data.hi & ecc_code[i].hi; 2596 masked_data[i].lo = hex_data.lo & ecc_code[i].lo; 2597 } 2598 2599 /* 2600 * xor all bits in masked_data[i] to get bit_i of checker, 2601 * where i = 0 to 7 2602 */ 2603 checker = 0; 2604 for (i = 0; i < 8; i++) { 2605 bit_mask = 1 << i; 2606 for (j = 0; j < 32; j++) { 2607 if (masked_data[i].lo & 1) checker ^= bit_mask; 2608 if (masked_data[i].hi & 1) checker ^= bit_mask; 2609 masked_data[i].hi >>= 1; 2610 masked_data[i].lo >>= 1; 2611 } 2612 } 2613 return (checker); 2614 } 2615 2616 /* 2617 * Flush the entire ecache using displacement flush by reading through a 2618 * physical address range as large as the ecache. 2619 */ 2620 void 2621 cpu_flush_ecache(void) 2622 { 2623 flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2, 2624 cpunodes[CPU->cpu_id].ecache_linesize); 2625 } 2626 2627 /* 2628 * read and display the data in the cache line where the 2629 * original ce error occurred. 2630 * This routine is mainly used for debugging new hardware. 2631 */ 2632 void 2633 read_ecc_data(struct async_flt *ecc, short verbose, short ce_err) 2634 { 2635 kpreempt_disable(); 2636 /* disable ECC error traps */ 2637 set_error_enable(EER_ECC_DISABLE); 2638 2639 /* 2640 * flush the ecache 2641 * read the data 2642 * check to see if an ECC error occured 2643 */ 2644 flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2, 2645 cpunodes[CPU->cpu_id].ecache_linesize); 2646 set_lsu(get_lsu() | cache_boot_state); 2647 cpu_read_paddr(ecc, verbose, ce_err); 2648 (void) check_ecc(ecc); 2649 2650 /* enable ECC error traps */ 2651 set_error_enable(EER_ENABLE); 2652 kpreempt_enable(); 2653 } 2654 2655 /* 2656 * Check the AFSR bits for UE/CE persistence. 2657 * If UE or CE errors are detected, the routine will 2658 * clears all the AFSR sticky bits (except CP for 2659 * spitfire/blackbird) and the UDBs. 2660 * if ce_debug or ue_debug is set, log any ue/ce errors detected. 2661 */ 2662 static int 2663 check_ecc(struct async_flt *ecc) 2664 { 2665 uint64_t t_afsr; 2666 uint64_t t_afar; 2667 uint64_t udbh; 2668 uint64_t udbl; 2669 ushort_t udb; 2670 int persistent = 0; 2671 2672 /* 2673 * Capture the AFSR, AFAR and UDBs info 2674 */ 2675 get_asyncflt(&t_afsr); 2676 get_asyncaddr(&t_afar); 2677 t_afar &= SABRE_AFAR_PA; 2678 get_udb_errors(&udbh, &udbl); 2679 2680 if ((t_afsr & P_AFSR_UE) || (t_afsr & P_AFSR_CE)) { 2681 /* 2682 * Clear the errors 2683 */ 2684 clr_datapath(); 2685 2686 if (isus2i || isus2e) 2687 set_asyncflt(t_afsr); 2688 else 2689 set_asyncflt(t_afsr & ~P_AFSR_CP); 2690 2691 /* 2692 * determine whether to check UDBH or UDBL for persistence 2693 */ 2694 if (ecc->flt_synd & UDBL_REG) { 2695 udb = (ushort_t)udbl; 2696 t_afar |= 0x8; 2697 } else { 2698 udb = (ushort_t)udbh; 2699 } 2700 2701 if (ce_debug || ue_debug) { 2702 spitf_async_flt spf_flt; /* for logging */ 2703 struct async_flt *aflt = 2704 (struct async_flt *)&spf_flt; 2705 2706 /* Package the info nicely in the spf_flt struct */ 2707 bzero(&spf_flt, sizeof (spitf_async_flt)); 2708 aflt->flt_stat = t_afsr; 2709 aflt->flt_addr = t_afar; 2710 spf_flt.flt_sdbh = (ushort_t)(udbh & 0x3FF); 2711 spf_flt.flt_sdbl = (ushort_t)(udbl & 0x3FF); 2712 2713 cpu_aflt_log(CE_CONT, 0, &spf_flt, (CPU_AFSR | 2714 CPU_AFAR | CPU_UDBH | CPU_UDBL), NULL, 2715 " check_ecc: Dumping captured error states ..."); 2716 } 2717 2718 /* 2719 * if the fault addresses don't match, not persistent 2720 */ 2721 if (t_afar != ecc->flt_addr) { 2722 return (persistent); 2723 } 2724 2725 /* 2726 * check for UE persistence 2727 * since all DIMMs in the bank are identified for a UE, 2728 * there's no reason to check the syndrome 2729 */ 2730 if ((ecc->flt_stat & P_AFSR_UE) && (t_afsr & P_AFSR_UE)) { 2731 persistent = 1; 2732 } 2733 2734 /* 2735 * check for CE persistence 2736 */ 2737 if ((ecc->flt_stat & P_AFSR_CE) && (t_afsr & P_AFSR_CE)) { 2738 if ((udb & P_DER_E_SYND) == 2739 (ecc->flt_synd & P_DER_E_SYND)) { 2740 persistent = 1; 2741 } 2742 } 2743 } 2744 return (persistent); 2745 } 2746 2747 #ifdef HUMMINGBIRD 2748 #define HB_FULL_DIV 1 2749 #define HB_HALF_DIV 2 2750 #define HB_LOWEST_DIV 8 2751 #define HB_ECLK_INVALID 0xdeadbad 2752 static uint64_t hb_eclk[HB_LOWEST_DIV + 1] = { 2753 HB_ECLK_INVALID, HB_ECLK_1, HB_ECLK_2, HB_ECLK_INVALID, 2754 HB_ECLK_4, HB_ECLK_INVALID, HB_ECLK_6, HB_ECLK_INVALID, 2755 HB_ECLK_8 }; 2756 2757 #define HB_SLOW_DOWN 0 2758 #define HB_SPEED_UP 1 2759 2760 #define SET_ESTAR_MODE(mode) \ 2761 stdphysio(HB_ESTAR_MODE, (mode)); \ 2762 /* \ 2763 * PLL logic requires minimum of 16 clock \ 2764 * cycles to lock to the new clock speed. \ 2765 * Wait 1 usec to satisfy this requirement. \ 2766 */ \ 2767 drv_usecwait(1); 2768 2769 #define CHANGE_REFRESH_COUNT(direction, cur_div, new_div) \ 2770 { \ 2771 volatile uint64_t data; \ 2772 uint64_t count, new_count; \ 2773 clock_t delay; \ 2774 data = lddphysio(HB_MEM_CNTRL0); \ 2775 count = (data & HB_REFRESH_COUNT_MASK) >> \ 2776 HB_REFRESH_COUNT_SHIFT; \ 2777 new_count = (HB_REFRESH_INTERVAL * \ 2778 cpunodes[CPU->cpu_id].clock_freq) / \ 2779 (HB_REFRESH_CLOCKS_PER_COUNT * (new_div) * NANOSEC);\ 2780 data = (data & ~HB_REFRESH_COUNT_MASK) | \ 2781 (new_count << HB_REFRESH_COUNT_SHIFT); \ 2782 stdphysio(HB_MEM_CNTRL0, data); \ 2783 data = lddphysio(HB_MEM_CNTRL0); \ 2784 /* \ 2785 * If we are slowing down the cpu and Memory \ 2786 * Self Refresh is not enabled, it is required \ 2787 * to wait for old refresh count to count-down and \ 2788 * new refresh count to go into effect (let new value \ 2789 * counts down once). \ 2790 */ \ 2791 if ((direction) == HB_SLOW_DOWN && \ 2792 (data & HB_SELF_REFRESH_MASK) == 0) { \ 2793 /* \ 2794 * Each count takes 64 cpu clock cycles \ 2795 * to decrement. Wait for current refresh \ 2796 * count plus new refresh count at current \ 2797 * cpu speed to count down to zero. Round \ 2798 * up the delay time. \ 2799 */ \ 2800 delay = ((HB_REFRESH_CLOCKS_PER_COUNT * \ 2801 (count + new_count) * MICROSEC * (cur_div)) /\ 2802 cpunodes[CPU->cpu_id].clock_freq) + 1; \ 2803 drv_usecwait(delay); \ 2804 } \ 2805 } 2806 2807 #define SET_SELF_REFRESH(bit) \ 2808 { \ 2809 volatile uint64_t data; \ 2810 data = lddphysio(HB_MEM_CNTRL0); \ 2811 data = (data & ~HB_SELF_REFRESH_MASK) | \ 2812 ((bit) << HB_SELF_REFRESH_SHIFT); \ 2813 stdphysio(HB_MEM_CNTRL0, data); \ 2814 data = lddphysio(HB_MEM_CNTRL0); \ 2815 } 2816 #endif /* HUMMINGBIRD */ 2817 2818 /* ARGSUSED */ 2819 void 2820 cpu_change_speed(uint64_t new_divisor, uint64_t arg2) 2821 { 2822 #ifdef HUMMINGBIRD 2823 uint64_t cur_mask, cur_divisor = 0; 2824 volatile uint64_t reg; 2825 int index; 2826 2827 if ((new_divisor < HB_FULL_DIV || new_divisor > HB_LOWEST_DIV) || 2828 (hb_eclk[new_divisor] == HB_ECLK_INVALID)) { 2829 cmn_err(CE_WARN, "cpu_change_speed: bad divisor 0x%lx", 2830 new_divisor); 2831 return; 2832 } 2833 2834 reg = lddphysio(HB_ESTAR_MODE); 2835 cur_mask = reg & HB_ECLK_MASK; 2836 for (index = HB_FULL_DIV; index <= HB_LOWEST_DIV; index++) { 2837 if (hb_eclk[index] == cur_mask) { 2838 cur_divisor = index; 2839 break; 2840 } 2841 } 2842 2843 if (cur_divisor == 0) 2844 cmn_err(CE_PANIC, "cpu_change_speed: current divisor " 2845 "can't be determined!"); 2846 2847 /* 2848 * If we are already at the requested divisor speed, just 2849 * return. 2850 */ 2851 if (cur_divisor == new_divisor) 2852 return; 2853 2854 if (cur_divisor == HB_FULL_DIV && new_divisor == HB_HALF_DIV) { 2855 CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor); 2856 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2857 SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE); 2858 2859 } else if (cur_divisor == HB_HALF_DIV && new_divisor == HB_FULL_DIV) { 2860 SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE); 2861 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2862 /* LINTED: E_FALSE_LOGICAL_EXPR */ 2863 CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor); 2864 2865 } else if (cur_divisor == HB_FULL_DIV && new_divisor > HB_HALF_DIV) { 2866 /* 2867 * Transition to 1/2 speed first, then to 2868 * lower speed. 2869 */ 2870 CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, HB_HALF_DIV); 2871 SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]); 2872 SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE); 2873 2874 CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, HB_HALF_DIV, new_divisor); 2875 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2876 2877 } else if (cur_divisor > HB_HALF_DIV && new_divisor == HB_FULL_DIV) { 2878 /* 2879 * Transition to 1/2 speed first, then to 2880 * full speed. 2881 */ 2882 SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]); 2883 /* LINTED: E_FALSE_LOGICAL_EXPR */ 2884 CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, HB_HALF_DIV); 2885 2886 SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE); 2887 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2888 /* LINTED: E_FALSE_LOGICAL_EXPR */ 2889 CHANGE_REFRESH_COUNT(HB_SPEED_UP, HB_HALF_DIV, new_divisor); 2890 2891 } else if (cur_divisor < new_divisor) { 2892 CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor); 2893 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2894 2895 } else if (cur_divisor > new_divisor) { 2896 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2897 /* LINTED: E_FALSE_LOGICAL_EXPR */ 2898 CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor); 2899 } 2900 CPU->cpu_m.divisor = (uchar_t)new_divisor; 2901 #endif 2902 } 2903 2904 /* 2905 * Clear the AFSR sticky bits and the UDBs. For Sabre/Spitfire/Blackbird, 2906 * we clear all the sticky bits. If a non-null pointer to a async fault 2907 * structure argument is passed in, the captured error state (AFSR, AFAR, UDBs) 2908 * info will be returned in the structure. If a non-null pointer to a 2909 * uint64_t is passed in, this will be updated if the CP bit is set in the 2910 * AFSR. The afsr will be returned. 2911 */ 2912 static uint64_t 2913 clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr) 2914 { 2915 struct async_flt *aflt = (struct async_flt *)spf_flt; 2916 uint64_t afsr; 2917 uint64_t udbh, udbl; 2918 2919 get_asyncflt(&afsr); 2920 2921 if ((acc_afsr != NULL) && (afsr & P_AFSR_CP)) 2922 *acc_afsr |= afsr; 2923 2924 if (spf_flt != NULL) { 2925 aflt->flt_stat = afsr; 2926 get_asyncaddr(&aflt->flt_addr); 2927 aflt->flt_addr &= SABRE_AFAR_PA; 2928 2929 get_udb_errors(&udbh, &udbl); 2930 spf_flt->flt_sdbh = (ushort_t)(udbh & 0x3FF); 2931 spf_flt->flt_sdbl = (ushort_t)(udbl & 0x3FF); 2932 } 2933 2934 set_asyncflt(afsr); /* clear afsr */ 2935 clr_datapath(); /* clear udbs */ 2936 return (afsr); 2937 } 2938 2939 /* 2940 * Scan the ecache to look for bad lines. If found, the afsr, afar, e$ data 2941 * tag of the first bad line will be returned. We also return the old-afsr 2942 * (before clearing the sticky bits). The linecnt data will be updated to 2943 * indicate the number of bad lines detected. 2944 */ 2945 static void 2946 scan_ecache(uint64_t *t_afar, ec_data_t *ecache_data, 2947 uint64_t *ecache_tag, int *linecnt, uint64_t *t_afsr) 2948 { 2949 ec_data_t t_ecdata[8]; 2950 uint64_t t_etag, oafsr; 2951 uint64_t pa = AFLT_INV_ADDR; 2952 uint32_t i, j, ecache_sz; 2953 uint64_t acc_afsr = 0; 2954 uint64_t *cpu_afsr = NULL; 2955 2956 if (CPU_PRIVATE(CPU) != NULL) 2957 cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 2958 2959 *linecnt = 0; 2960 ecache_sz = cpunodes[CPU->cpu_id].ecache_size; 2961 2962 for (i = 0; i < ecache_sz; i += 64) { 2963 get_ecache_dtag(i, (uint64_t *)&t_ecdata[0], &t_etag, &oafsr, 2964 cpu_afsr); 2965 acc_afsr |= oafsr; 2966 2967 /* 2968 * Scan through the whole 64 bytes line in 8 8-byte chunks 2969 * looking for the first occurrence of an EDP error. The AFSR 2970 * info is captured for each 8-byte chunk. Note that for 2971 * Spitfire/Blackbird, the AFSR.PSYND is captured by h/w in 2972 * 16-byte chunk granularity (i.e. the AFSR will be the same 2973 * for the high and low 8-byte words within the 16-byte chunk). 2974 * For Sabre/Hummingbird, the AFSR.PSYND is captured in 8-byte 2975 * granularity and only PSYND bits [7:0] are used. 2976 */ 2977 for (j = 0; j < 8; j++) { 2978 ec_data_t *ecdptr = &t_ecdata[j]; 2979 2980 if (ecdptr->ec_afsr & P_AFSR_EDP) { 2981 uint64_t errpa; 2982 ushort_t psynd; 2983 uint32_t ec_set_size = ecache_sz / 2984 ecache_associativity; 2985 2986 /* 2987 * For Spitfire/Blackbird, we need to look at 2988 * the PSYND to make sure that this 8-byte chunk 2989 * is the right one. PSYND bits [15:8] belong 2990 * to the upper 8-byte (even) chunk. Bits 2991 * [7:0] belong to the lower 8-byte chunk (odd). 2992 */ 2993 psynd = ecdptr->ec_afsr & P_AFSR_P_SYND; 2994 if (!isus2i && !isus2e) { 2995 if (j & 0x1) 2996 psynd = psynd & 0xFF; 2997 else 2998 psynd = psynd >> 8; 2999 3000 if (!psynd) 3001 continue; /* wrong chunk */ 3002 } 3003 3004 /* Construct the PA */ 3005 errpa = ((t_etag & cpu_ec_tag_mask) << 3006 cpu_ec_tag_shift) | ((i | (j << 3)) % 3007 ec_set_size); 3008 3009 /* clean up the cache line */ 3010 flushecacheline(P2ALIGN(errpa, 64), 3011 cpunodes[CPU->cpu_id].ecache_size); 3012 3013 oafsr = clear_errors(NULL, cpu_afsr); 3014 acc_afsr |= oafsr; 3015 3016 (*linecnt)++; 3017 3018 /* 3019 * Capture the PA for the first bad line found. 3020 * Return the ecache dump and tag info. 3021 */ 3022 if (pa == AFLT_INV_ADDR) { 3023 int k; 3024 3025 pa = errpa; 3026 for (k = 0; k < 8; k++) 3027 ecache_data[k] = t_ecdata[k]; 3028 *ecache_tag = t_etag; 3029 } 3030 break; 3031 } 3032 } 3033 } 3034 *t_afar = pa; 3035 *t_afsr = acc_afsr; 3036 } 3037 3038 static void 3039 cpu_log_ecmem_info(spitf_async_flt *spf_flt) 3040 { 3041 struct async_flt *aflt = (struct async_flt *)spf_flt; 3042 uint64_t ecache_tag = spf_flt->flt_ec_tag; 3043 char linestr[30]; 3044 char *state_str; 3045 int i; 3046 3047 /* 3048 * Check the ecache tag to make sure it 3049 * is valid. If invalid, a memory dump was 3050 * captured instead of a ecache dump. 3051 */ 3052 if (spf_flt->flt_ec_tag != AFLT_INV_ADDR) { 3053 uchar_t eparity = (uchar_t) 3054 ((ecache_tag & cpu_ec_par_mask) >> cpu_ec_par_shift); 3055 3056 uchar_t estate = (uchar_t) 3057 ((ecache_tag & cpu_ec_state_mask) >> cpu_ec_state_shift); 3058 3059 if (estate == cpu_ec_state_shr) 3060 state_str = "Shared"; 3061 else if (estate == cpu_ec_state_exl) 3062 state_str = "Exclusive"; 3063 else if (estate == cpu_ec_state_own) 3064 state_str = "Owner"; 3065 else if (estate == cpu_ec_state_mod) 3066 state_str = "Modified"; 3067 else 3068 state_str = "Invalid"; 3069 3070 if (spf_flt->flt_ec_lcnt > 1) { 3071 (void) snprintf(linestr, sizeof (linestr), 3072 "Badlines found=%d", spf_flt->flt_ec_lcnt); 3073 } else { 3074 linestr[0] = '\0'; 3075 } 3076 3077 cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL, 3078 " PA=0x%08x.%08x\n E$tag 0x%08x.%08x E$State: %s " 3079 "E$parity 0x%02x %s", (uint32_t)(aflt->flt_addr >> 32), 3080 (uint32_t)aflt->flt_addr, (uint32_t)(ecache_tag >> 32), 3081 (uint32_t)ecache_tag, state_str, 3082 (uint32_t)eparity, linestr); 3083 } else { 3084 cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL, 3085 " E$tag != PA from AFAR; E$line was victimized" 3086 "\n dumping memory from PA 0x%08x.%08x instead", 3087 (uint32_t)(P2ALIGN(aflt->flt_addr, 64) >> 32), 3088 (uint32_t)P2ALIGN(aflt->flt_addr, 64)); 3089 } 3090 3091 /* 3092 * Dump out all 8 8-byte ecache data captured 3093 * For each 8-byte data captured, we check the 3094 * captured afsr's parity syndrome to find out 3095 * which 8-byte chunk is bad. For memory dump, the 3096 * AFSR values were initialized to 0. 3097 */ 3098 for (i = 0; i < 8; i++) { 3099 ec_data_t *ecdptr; 3100 uint_t offset; 3101 ushort_t psynd; 3102 ushort_t bad; 3103 uint64_t edp; 3104 3105 offset = i << 3; /* multiply by 8 */ 3106 ecdptr = &spf_flt->flt_ec_data[i]; 3107 psynd = ecdptr->ec_afsr & P_AFSR_P_SYND; 3108 edp = ecdptr->ec_afsr & P_AFSR_EDP; 3109 3110 /* 3111 * For Sabre/Hummingbird, parity synd is captured only 3112 * in [7:0] of AFSR.PSYND for each 8-byte chunk. 3113 * For spitfire/blackbird, AFSR.PSYND is captured 3114 * in 16-byte granularity. [15:8] represent 3115 * the upper 8 byte and [7:0] the lower 8 byte. 3116 */ 3117 if (isus2i || isus2e || (i & 0x1)) 3118 bad = (psynd & 0xFF); /* check bits [7:0] */ 3119 else 3120 bad = (psynd & 0xFF00); /* check bits [15:8] */ 3121 3122 if (bad && edp) { 3123 cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL, 3124 " E$Data (0x%02x): 0x%08x.%08x " 3125 "*Bad* PSYND=0x%04x", offset, 3126 (uint32_t)(ecdptr->ec_d8 >> 32), 3127 (uint32_t)ecdptr->ec_d8, psynd); 3128 } else { 3129 cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL, 3130 " E$Data (0x%02x): 0x%08x.%08x", offset, 3131 (uint32_t)(ecdptr->ec_d8 >> 32), 3132 (uint32_t)ecdptr->ec_d8); 3133 } 3134 } 3135 } 3136 3137 /* 3138 * Common logging function for all cpu async errors. This function allows the 3139 * caller to generate a single cmn_err() call that logs the appropriate items 3140 * from the fault structure, and implements our rules for AFT logging levels. 3141 * 3142 * ce_code: cmn_err() code (e.g. CE_PANIC, CE_WARN, CE_CONT) 3143 * tagnum: 0, 1, 2, .. generate the [AFT#] tag 3144 * spflt: pointer to spitfire async fault structure 3145 * logflags: bitflags indicating what to output 3146 * endstr: a end string to appear at the end of this log 3147 * fmt: a format string to appear at the beginning of the log 3148 * 3149 * The logflags allows the construction of predetermined output from the spflt 3150 * structure. The individual data items always appear in a consistent order. 3151 * Note that either or both of the spflt structure pointer and logflags may be 3152 * NULL or zero respectively, indicating that the predetermined output 3153 * substrings are not requested in this log. The output looks like this: 3154 * 3155 * [AFT#] <CPU_ERRID_FIRST><fmt string><CPU_FLTCPU> 3156 * <CPU_SPACE><CPU_ERRID> 3157 * newline+4spaces<CPU_AFSR><CPU_AFAR> 3158 * newline+4spaces<CPU_AF_PSYND><CPU_AF_ETS><CPU_FAULTPC> 3159 * newline+4spaces<CPU_UDBH><CPU_UDBL> 3160 * newline+4spaces<CPU_SYND> 3161 * newline+4spaces<endstr> 3162 * 3163 * Note that <endstr> may not start on a newline if we are logging <CPU_PSYND>; 3164 * it is assumed that <endstr> will be the unum string in this case. The size 3165 * of our intermediate formatting buf[] is based on the worst case of all flags 3166 * being enabled. We pass the caller's varargs directly to vcmn_err() for 3167 * formatting so we don't need additional stack space to format them here. 3168 */ 3169 /*PRINTFLIKE6*/ 3170 static void 3171 cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt, uint_t logflags, 3172 const char *endstr, const char *fmt, ...) 3173 { 3174 struct async_flt *aflt = (struct async_flt *)spflt; 3175 char buf[400], *p, *q; /* see comments about buf[] size above */ 3176 va_list ap; 3177 int console_log_flag; 3178 3179 if ((aflt == NULL) || ((aflt->flt_class == CPU_FAULT) && 3180 (aflt->flt_stat & P_AFSR_LEVEL1)) || 3181 (aflt->flt_panic)) { 3182 console_log_flag = (tagnum < 2) || aft_verbose; 3183 } else { 3184 int verbose = ((aflt->flt_class == BUS_FAULT) || 3185 (aflt->flt_stat & P_AFSR_CE)) ? 3186 ce_verbose_memory : ce_verbose_other; 3187 3188 if (!verbose) 3189 return; 3190 3191 console_log_flag = (verbose > 1); 3192 } 3193 3194 if (console_log_flag) 3195 (void) sprintf(buf, "[AFT%d]", tagnum); 3196 else 3197 (void) sprintf(buf, "![AFT%d]", tagnum); 3198 3199 p = buf + strlen(buf); /* current buffer position */ 3200 q = buf + sizeof (buf); /* pointer past end of buffer */ 3201 3202 if (spflt != NULL && (logflags & CPU_ERRID_FIRST)) { 3203 (void) snprintf(p, (size_t)(q - p), " errID 0x%08x.%08x", 3204 (uint32_t)(aflt->flt_id >> 32), (uint32_t)aflt->flt_id); 3205 p += strlen(p); 3206 } 3207 3208 /* 3209 * Copy the caller's format string verbatim into buf[]. It will be 3210 * formatted by the call to vcmn_err() at the end of this function. 3211 */ 3212 if (fmt != NULL && p < q) { 3213 (void) strncpy(p, fmt, (size_t)(q - p - 1)); 3214 buf[sizeof (buf) - 1] = '\0'; 3215 p += strlen(p); 3216 } 3217 3218 if (spflt != NULL) { 3219 if (logflags & CPU_FLTCPU) { 3220 (void) snprintf(p, (size_t)(q - p), " CPU%d", 3221 aflt->flt_inst); 3222 p += strlen(p); 3223 } 3224 3225 if (logflags & CPU_SPACE) { 3226 if (aflt->flt_status & ECC_D_TRAP) 3227 (void) snprintf(p, (size_t)(q - p), 3228 " Data access"); 3229 else if (aflt->flt_status & ECC_I_TRAP) 3230 (void) snprintf(p, (size_t)(q - p), 3231 " Instruction access"); 3232 p += strlen(p); 3233 } 3234 3235 if (logflags & CPU_TL) { 3236 (void) snprintf(p, (size_t)(q - p), " at TL%s", 3237 aflt->flt_tl ? ">0" : "=0"); 3238 p += strlen(p); 3239 } 3240 3241 if (logflags & CPU_ERRID) { 3242 (void) snprintf(p, (size_t)(q - p), 3243 ", errID 0x%08x.%08x", 3244 (uint32_t)(aflt->flt_id >> 32), 3245 (uint32_t)aflt->flt_id); 3246 p += strlen(p); 3247 } 3248 3249 if (logflags & CPU_AFSR) { 3250 (void) snprintf(p, (size_t)(q - p), 3251 "\n AFSR 0x%08b.%08b", 3252 (uint32_t)(aflt->flt_stat >> 32), AFSR_FMTSTR0, 3253 (uint32_t)aflt->flt_stat, AFSR_FMTSTR1); 3254 p += strlen(p); 3255 } 3256 3257 if (logflags & CPU_AFAR) { 3258 (void) snprintf(p, (size_t)(q - p), " AFAR 0x%08x.%08x", 3259 (uint32_t)(aflt->flt_addr >> 32), 3260 (uint32_t)aflt->flt_addr); 3261 p += strlen(p); 3262 } 3263 3264 if (logflags & CPU_AF_PSYND) { 3265 ushort_t psynd = (ushort_t) 3266 (aflt->flt_stat & P_AFSR_P_SYND); 3267 3268 (void) snprintf(p, (size_t)(q - p), 3269 "\n AFSR.PSYND 0x%04x(Score %02d)", 3270 psynd, ecc_psynd_score(psynd)); 3271 p += strlen(p); 3272 } 3273 3274 if (logflags & CPU_AF_ETS) { 3275 (void) snprintf(p, (size_t)(q - p), " AFSR.ETS 0x%02x", 3276 (uchar_t)((aflt->flt_stat & P_AFSR_ETS) >> 16)); 3277 p += strlen(p); 3278 } 3279 3280 if (logflags & CPU_FAULTPC) { 3281 (void) snprintf(p, (size_t)(q - p), " Fault_PC 0x%p", 3282 (void *)aflt->flt_pc); 3283 p += strlen(p); 3284 } 3285 3286 if (logflags & CPU_UDBH) { 3287 (void) snprintf(p, (size_t)(q - p), 3288 "\n UDBH 0x%04b UDBH.ESYND 0x%02x", 3289 spflt->flt_sdbh, UDB_FMTSTR, 3290 spflt->flt_sdbh & 0xFF); 3291 p += strlen(p); 3292 } 3293 3294 if (logflags & CPU_UDBL) { 3295 (void) snprintf(p, (size_t)(q - p), 3296 " UDBL 0x%04b UDBL.ESYND 0x%02x", 3297 spflt->flt_sdbl, UDB_FMTSTR, 3298 spflt->flt_sdbl & 0xFF); 3299 p += strlen(p); 3300 } 3301 3302 if (logflags & CPU_SYND) { 3303 ushort_t synd = SYND(aflt->flt_synd); 3304 3305 (void) snprintf(p, (size_t)(q - p), 3306 "\n %s Syndrome 0x%x Memory Module ", 3307 UDBL(aflt->flt_synd) ? "UDBL" : "UDBH", synd); 3308 p += strlen(p); 3309 } 3310 } 3311 3312 if (endstr != NULL) { 3313 if (!(logflags & CPU_SYND)) 3314 (void) snprintf(p, (size_t)(q - p), "\n %s", endstr); 3315 else 3316 (void) snprintf(p, (size_t)(q - p), "%s", endstr); 3317 p += strlen(p); 3318 } 3319 3320 if (ce_code == CE_CONT && (p < q - 1)) 3321 (void) strcpy(p, "\n"); /* add final \n if needed */ 3322 3323 va_start(ap, fmt); 3324 vcmn_err(ce_code, buf, ap); 3325 va_end(ap); 3326 } 3327 3328 /* 3329 * Ecache Scrubbing 3330 * 3331 * The basic idea is to prevent lines from sitting in the ecache long enough 3332 * to build up soft errors which can lead to ecache parity errors. 3333 * 3334 * The following rules are observed when flushing the ecache: 3335 * 3336 * 1. When the system is busy, flush bad clean lines 3337 * 2. When the system is idle, flush all clean lines 3338 * 3. When the system is idle, flush good dirty lines 3339 * 4. Never flush bad dirty lines. 3340 * 3341 * modify parity busy idle 3342 * ---------------------------- 3343 * clean good X 3344 * clean bad X X 3345 * dirty good X 3346 * dirty bad 3347 * 3348 * Bad or good refers to whether a line has an E$ parity error or not. 3349 * Clean or dirty refers to the state of the modified bit. We currently 3350 * default the scan rate to 100 (scan 10% of the cache per second). 3351 * 3352 * The following are E$ states and actions. 3353 * 3354 * We encode our state as a 3-bit number, consisting of: 3355 * ECACHE_STATE_MODIFIED (0=clean, 1=dirty) 3356 * ECACHE_STATE_PARITY (0=good, 1=bad) 3357 * ECACHE_STATE_BUSY (0=idle, 1=busy) 3358 * 3359 * We associate a flushing and a logging action with each state. 3360 * 3361 * E$ actions are different for Spitfire and Sabre/Hummingbird modules. 3362 * MIRROR_FLUSH indicates that an E$ line will be flushed for the mirrored 3363 * E$ only, in addition to value being set by ec_flush. 3364 */ 3365 3366 #define ALWAYS_FLUSH 0x1 /* flush E$ line on all E$ types */ 3367 #define NEVER_FLUSH 0x0 /* never the flush the E$ line */ 3368 #define MIRROR_FLUSH 0xF /* flush E$ line on mirrored E$ only */ 3369 3370 struct { 3371 char ec_flush; /* whether to flush or not */ 3372 char ec_log; /* ecache logging */ 3373 char ec_log_type; /* log type info */ 3374 } ec_action[] = { /* states of the E$ line in M P B */ 3375 { ALWAYS_FLUSH, 0, 0 }, /* 0 0 0 clean_good_idle */ 3376 { MIRROR_FLUSH, 0, 0 }, /* 0 0 1 clean_good_busy */ 3377 { ALWAYS_FLUSH, 1, CPU_BADLINE_CI_ERR }, /* 0 1 0 clean_bad_idle */ 3378 { ALWAYS_FLUSH, 1, CPU_BADLINE_CB_ERR }, /* 0 1 1 clean_bad_busy */ 3379 { ALWAYS_FLUSH, 0, 0 }, /* 1 0 0 dirty_good_idle */ 3380 { MIRROR_FLUSH, 0, 0 }, /* 1 0 1 dirty_good_busy */ 3381 { NEVER_FLUSH, 1, CPU_BADLINE_DI_ERR }, /* 1 1 0 dirty_bad_idle */ 3382 { NEVER_FLUSH, 1, CPU_BADLINE_DB_ERR } /* 1 1 1 dirty_bad_busy */ 3383 }; 3384 3385 /* 3386 * Offsets into the ec_action[] that determines clean_good_busy and 3387 * dirty_good_busy lines. 3388 */ 3389 #define ECACHE_CGB_LINE 1 /* E$ clean_good_busy line */ 3390 #define ECACHE_DGB_LINE 5 /* E$ dirty_good_busy line */ 3391 3392 /* 3393 * We are flushing lines which are Clean_Good_Busy and also the lines 3394 * Dirty_Good_Busy. And we only follow it for non-mirrored E$. 3395 */ 3396 #define CGB(x, m) (((x) == ECACHE_CGB_LINE) && (m != ECACHE_CPU_MIRROR)) 3397 #define DGB(x, m) (((x) == ECACHE_DGB_LINE) && (m != ECACHE_CPU_MIRROR)) 3398 3399 #define ECACHE_STATE_MODIFIED 0x4 3400 #define ECACHE_STATE_PARITY 0x2 3401 #define ECACHE_STATE_BUSY 0x1 3402 3403 /* 3404 * If ecache is mirrored ecache_calls_a_sec and ecache_scan_rate are reduced. 3405 */ 3406 int ecache_calls_a_sec_mirrored = 1; 3407 int ecache_lines_per_call_mirrored = 1; 3408 3409 int ecache_scrub_enable = 1; /* ecache scrubbing is on by default */ 3410 int ecache_scrub_verbose = 1; /* prints clean and dirty lines */ 3411 int ecache_scrub_panic = 0; /* panics on a clean and dirty line */ 3412 int ecache_calls_a_sec = 100; /* scrubber calls per sec */ 3413 int ecache_scan_rate = 100; /* scan rate (in tenths of a percent) */ 3414 int ecache_idle_factor = 1; /* increase the scan rate when idle */ 3415 int ecache_flush_clean_good_busy = 50; /* flush rate (in percent) */ 3416 int ecache_flush_dirty_good_busy = 100; /* flush rate (in percent) */ 3417 3418 volatile int ec_timeout_calls = 1; /* timeout calls */ 3419 3420 /* 3421 * Interrupt number and pil for ecache scrubber cross-trap calls. 3422 */ 3423 static uint64_t ecache_scrub_inum; 3424 uint_t ecache_scrub_pil = PIL_9; 3425 3426 /* 3427 * Kstats for the E$ scrubber. 3428 */ 3429 typedef struct ecache_kstat { 3430 kstat_named_t clean_good_idle; /* # of lines scrubbed */ 3431 kstat_named_t clean_good_busy; /* # of lines skipped */ 3432 kstat_named_t clean_bad_idle; /* # of lines scrubbed */ 3433 kstat_named_t clean_bad_busy; /* # of lines scrubbed */ 3434 kstat_named_t dirty_good_idle; /* # of lines scrubbed */ 3435 kstat_named_t dirty_good_busy; /* # of lines skipped */ 3436 kstat_named_t dirty_bad_idle; /* # of lines skipped */ 3437 kstat_named_t dirty_bad_busy; /* # of lines skipped */ 3438 kstat_named_t invalid_lines; /* # of invalid lines */ 3439 kstat_named_t clean_good_busy_flush; /* # of lines scrubbed */ 3440 kstat_named_t dirty_good_busy_flush; /* # of lines scrubbed */ 3441 kstat_named_t tags_cleared; /* # of E$ tags cleared */ 3442 } ecache_kstat_t; 3443 3444 static ecache_kstat_t ec_kstat_template = { 3445 { "clean_good_idle", KSTAT_DATA_ULONG }, 3446 { "clean_good_busy", KSTAT_DATA_ULONG }, 3447 { "clean_bad_idle", KSTAT_DATA_ULONG }, 3448 { "clean_bad_busy", KSTAT_DATA_ULONG }, 3449 { "dirty_good_idle", KSTAT_DATA_ULONG }, 3450 { "dirty_good_busy", KSTAT_DATA_ULONG }, 3451 { "dirty_bad_idle", KSTAT_DATA_ULONG }, 3452 { "dirty_bad_busy", KSTAT_DATA_ULONG }, 3453 { "invalid_lines", KSTAT_DATA_ULONG }, 3454 { "clean_good_busy_flush", KSTAT_DATA_ULONG }, 3455 { "dirty_good_busy_flush", KSTAT_DATA_ULONG }, 3456 { "ecache_tags_cleared", KSTAT_DATA_ULONG } 3457 }; 3458 3459 struct kmem_cache *sf_private_cache; 3460 3461 /* 3462 * Called periodically on each CPU to scan the ecache once a sec. 3463 * adjusting the ecache line index appropriately 3464 */ 3465 void 3466 scrub_ecache_line() 3467 { 3468 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc); 3469 int cpuid = CPU->cpu_id; 3470 uint32_t index = ssmp->ecache_flush_index; 3471 uint64_t ec_size = cpunodes[cpuid].ecache_size; 3472 size_t ec_linesize = cpunodes[cpuid].ecache_linesize; 3473 int nlines = ssmp->ecache_nlines; 3474 uint32_t ec_set_size = ec_size / ecache_associativity; 3475 int ec_mirror = ssmp->ecache_mirror; 3476 ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data; 3477 3478 int line, scan_lines, flush_clean_busy = 0, flush_dirty_busy = 0; 3479 int mpb; /* encode Modified, Parity, Busy for action */ 3480 uchar_t state; 3481 uint64_t ec_tag, paddr, oafsr, tafsr, nafsr; 3482 uint64_t *acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 3483 ec_data_t ec_data[8]; 3484 kstat_named_t *ec_knp; 3485 3486 switch (ec_mirror) { 3487 default: 3488 case ECACHE_CPU_NON_MIRROR: 3489 /* 3490 * The E$ scan rate is expressed in units of tenths of 3491 * a percent. ecache_scan_rate = 1000 (100%) means the 3492 * whole cache is scanned every second. 3493 */ 3494 scan_lines = (nlines * ecache_scan_rate) / 3495 (1000 * ecache_calls_a_sec); 3496 if (!(ssmp->ecache_busy)) { 3497 if (ecache_idle_factor > 0) { 3498 scan_lines *= ecache_idle_factor; 3499 } 3500 } else { 3501 flush_clean_busy = (scan_lines * 3502 ecache_flush_clean_good_busy) / 100; 3503 flush_dirty_busy = (scan_lines * 3504 ecache_flush_dirty_good_busy) / 100; 3505 } 3506 3507 ec_timeout_calls = (ecache_calls_a_sec ? 3508 ecache_calls_a_sec : 1); 3509 break; 3510 3511 case ECACHE_CPU_MIRROR: 3512 scan_lines = ecache_lines_per_call_mirrored; 3513 ec_timeout_calls = (ecache_calls_a_sec_mirrored ? 3514 ecache_calls_a_sec_mirrored : 1); 3515 break; 3516 } 3517 3518 /* 3519 * The ecache scrubber algorithm operates by reading and 3520 * decoding the E$ tag to determine whether the corresponding E$ line 3521 * can be scrubbed. There is a implicit assumption in the scrubber 3522 * logic that the E$ tag is valid. Unfortunately, this assertion is 3523 * flawed since the E$ tag may also be corrupted and have parity errors 3524 * The scrubber logic is enhanced to check the validity of the E$ tag 3525 * before scrubbing. When a parity error is detected in the E$ tag, 3526 * it is possible to recover and scrub the tag under certain conditions 3527 * so that a ETP error condition can be avoided. 3528 */ 3529 3530 for (mpb = line = 0; line < scan_lines; line++, mpb = 0) { 3531 /* 3532 * We get the old-AFSR before clearing the AFSR sticky bits 3533 * in {get_ecache_tag, check_ecache_line, get_ecache_dtag} 3534 * If CP bit is set in the old-AFSR, we log an Orphan CP event. 3535 */ 3536 ec_tag = get_ecache_tag(index, &nafsr, acc_afsr); 3537 state = (uchar_t)((ec_tag & cpu_ec_state_mask) >> 3538 cpu_ec_state_shift); 3539 3540 /* 3541 * ETP is set try to scrub the ecache tag. 3542 */ 3543 if (nafsr & P_AFSR_ETP) { 3544 ecache_scrub_tag_err(nafsr, state, index); 3545 } else if (state & cpu_ec_state_valid) { 3546 /* 3547 * ETP is not set, E$ tag is valid. 3548 * Proceed with the E$ scrubbing. 3549 */ 3550 if (state & cpu_ec_state_dirty) 3551 mpb |= ECACHE_STATE_MODIFIED; 3552 3553 tafsr = check_ecache_line(index, acc_afsr); 3554 3555 if (tafsr & P_AFSR_EDP) { 3556 mpb |= ECACHE_STATE_PARITY; 3557 3558 if (ecache_scrub_verbose || 3559 ecache_scrub_panic) { 3560 get_ecache_dtag(P2ALIGN(index, 64), 3561 (uint64_t *)&ec_data[0], 3562 &ec_tag, &oafsr, acc_afsr); 3563 } 3564 } 3565 3566 if (ssmp->ecache_busy) 3567 mpb |= ECACHE_STATE_BUSY; 3568 3569 ec_knp = (kstat_named_t *)ec_ksp + mpb; 3570 ec_knp->value.ul++; 3571 3572 paddr = ((ec_tag & cpu_ec_tag_mask) << 3573 cpu_ec_tag_shift) | (index % ec_set_size); 3574 3575 /* 3576 * We flush the E$ lines depending on the ec_flush, 3577 * we additionally flush clean_good_busy and 3578 * dirty_good_busy lines for mirrored E$. 3579 */ 3580 if (ec_action[mpb].ec_flush == ALWAYS_FLUSH) { 3581 flushecacheline(paddr, ec_size); 3582 } else if ((ec_mirror == ECACHE_CPU_MIRROR) && 3583 (ec_action[mpb].ec_flush == MIRROR_FLUSH)) { 3584 flushecacheline(paddr, ec_size); 3585 } else if (ec_action[mpb].ec_flush == NEVER_FLUSH) { 3586 softcall(ecache_page_retire, (void *)paddr); 3587 } 3588 3589 /* 3590 * Conditionally flush both the clean_good and 3591 * dirty_good lines when busy. 3592 */ 3593 if (CGB(mpb, ec_mirror) && (flush_clean_busy > 0)) { 3594 flush_clean_busy--; 3595 flushecacheline(paddr, ec_size); 3596 ec_ksp->clean_good_busy_flush.value.ul++; 3597 } else if (DGB(mpb, ec_mirror) && 3598 (flush_dirty_busy > 0)) { 3599 flush_dirty_busy--; 3600 flushecacheline(paddr, ec_size); 3601 ec_ksp->dirty_good_busy_flush.value.ul++; 3602 } 3603 3604 if (ec_action[mpb].ec_log && (ecache_scrub_verbose || 3605 ecache_scrub_panic)) { 3606 ecache_scrub_log(ec_data, ec_tag, paddr, mpb, 3607 tafsr); 3608 } 3609 3610 } else { 3611 ec_ksp->invalid_lines.value.ul++; 3612 } 3613 3614 if ((index += ec_linesize) >= ec_size) 3615 index = 0; 3616 3617 } 3618 3619 /* 3620 * set the ecache scrub index for the next time around 3621 */ 3622 ssmp->ecache_flush_index = index; 3623 3624 if (*acc_afsr & P_AFSR_CP) { 3625 uint64_t ret_afsr; 3626 3627 ret_afsr = ecache_scrub_misc_err(CPU_ORPHAN_CP_ERR, *acc_afsr); 3628 if ((ret_afsr & P_AFSR_CP) == 0) 3629 *acc_afsr = 0; 3630 } 3631 } 3632 3633 /* 3634 * Handler for ecache_scrub_inum softint. Call scrub_ecache_line until 3635 * we decrement the outstanding request count to zero. 3636 */ 3637 3638 /*ARGSUSED*/ 3639 uint_t 3640 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2) 3641 { 3642 int i; 3643 int outstanding; 3644 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc); 3645 uint32_t *countp = &ssmp->ec_scrub_outstanding; 3646 3647 do { 3648 outstanding = *countp; 3649 ASSERT(outstanding > 0); 3650 for (i = 0; i < outstanding; i++) 3651 scrub_ecache_line(); 3652 } while (atomic_add_32_nv(countp, -outstanding)); 3653 3654 return (DDI_INTR_CLAIMED); 3655 } 3656 3657 /* 3658 * force each cpu to perform an ecache scrub, called from a timeout 3659 */ 3660 extern xcfunc_t ecache_scrubreq_tl1; 3661 3662 void 3663 do_scrub_ecache_line(void) 3664 { 3665 long delta; 3666 3667 if (ecache_calls_a_sec > hz) 3668 ecache_calls_a_sec = hz; 3669 else if (ecache_calls_a_sec <= 0) 3670 ecache_calls_a_sec = 1; 3671 3672 if (ecache_calls_a_sec_mirrored > hz) 3673 ecache_calls_a_sec_mirrored = hz; 3674 else if (ecache_calls_a_sec_mirrored <= 0) 3675 ecache_calls_a_sec_mirrored = 1; 3676 3677 if (ecache_scrub_enable) { 3678 xt_all(ecache_scrubreq_tl1, ecache_scrub_inum, 0); 3679 delta = hz / ec_timeout_calls; 3680 } else { 3681 delta = hz; 3682 } 3683 3684 (void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0, 3685 delta); 3686 } 3687 3688 /* 3689 * initialization for ecache scrubbing 3690 * This routine is called AFTER all cpus have had cpu_init_private called 3691 * to initialize their private data areas. 3692 */ 3693 void 3694 cpu_init_cache_scrub(void) 3695 { 3696 if (ecache_calls_a_sec > hz) { 3697 cmn_err(CE_NOTE, "ecache_calls_a_sec set too high (%d); " 3698 "resetting to hz (%d)", ecache_calls_a_sec, hz); 3699 ecache_calls_a_sec = hz; 3700 } 3701 3702 /* 3703 * Register softint for ecache scrubbing. 3704 */ 3705 ecache_scrub_inum = add_softintr(ecache_scrub_pil, 3706 scrub_ecache_line_intr, NULL, SOFTINT_MT); 3707 3708 /* 3709 * kick off the scrubbing using realtime timeout 3710 */ 3711 (void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0, 3712 hz / ecache_calls_a_sec); 3713 } 3714 3715 /* 3716 * Unset the busy flag for this cpu. 3717 */ 3718 void 3719 cpu_idle_ecache_scrub(struct cpu *cp) 3720 { 3721 if (CPU_PRIVATE(cp) != NULL) { 3722 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, 3723 sfpr_scrub_misc); 3724 ssmp->ecache_busy = ECACHE_CPU_IDLE; 3725 } 3726 } 3727 3728 /* 3729 * Set the busy flag for this cpu. 3730 */ 3731 void 3732 cpu_busy_ecache_scrub(struct cpu *cp) 3733 { 3734 if (CPU_PRIVATE(cp) != NULL) { 3735 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, 3736 sfpr_scrub_misc); 3737 ssmp->ecache_busy = ECACHE_CPU_BUSY; 3738 } 3739 } 3740 3741 /* 3742 * initialize the ecache scrubber data structures 3743 * The global entry point cpu_init_private replaces this entry point. 3744 * 3745 */ 3746 static void 3747 cpu_init_ecache_scrub_dr(struct cpu *cp) 3748 { 3749 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc); 3750 int cpuid = cp->cpu_id; 3751 3752 /* 3753 * intialize bookkeeping for cache scrubbing 3754 */ 3755 bzero(ssmp, sizeof (spitfire_scrub_misc_t)); 3756 3757 ssmp->ecache_flush_index = 0; 3758 3759 ssmp->ecache_nlines = 3760 cpunodes[cpuid].ecache_size / cpunodes[cpuid].ecache_linesize; 3761 3762 /* 3763 * Determine whether we are running on mirrored SRAM 3764 */ 3765 3766 if (cpunodes[cpuid].msram == ECACHE_CPU_MIRROR) 3767 ssmp->ecache_mirror = ECACHE_CPU_MIRROR; 3768 else 3769 ssmp->ecache_mirror = ECACHE_CPU_NON_MIRROR; 3770 3771 cpu_busy_ecache_scrub(cp); 3772 3773 /* 3774 * initialize the kstats 3775 */ 3776 ecache_kstat_init(cp); 3777 } 3778 3779 /* 3780 * uninitialize the ecache scrubber data structures 3781 * The global entry point cpu_uninit_private replaces this entry point. 3782 */ 3783 static void 3784 cpu_uninit_ecache_scrub_dr(struct cpu *cp) 3785 { 3786 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc); 3787 3788 if (ssmp->ecache_ksp != NULL) { 3789 kstat_delete(ssmp->ecache_ksp); 3790 ssmp->ecache_ksp = NULL; 3791 } 3792 3793 /* 3794 * un-initialize bookkeeping for cache scrubbing 3795 */ 3796 bzero(ssmp, sizeof (spitfire_scrub_misc_t)); 3797 3798 cpu_idle_ecache_scrub(cp); 3799 } 3800 3801 struct kmem_cache *sf_private_cache; 3802 3803 /* 3804 * Cpu private initialization. This includes allocating the cpu_private 3805 * data structure, initializing it, and initializing the scrubber for this 3806 * cpu. This is called once for EVERY cpu, including CPU 0. This function 3807 * calls cpu_init_ecache_scrub_dr to init the scrubber. 3808 * We use kmem_cache_create for the spitfire private data structure because it 3809 * needs to be allocated on a S_ECACHE_MAX_LSIZE (64) byte boundary. 3810 */ 3811 void 3812 cpu_init_private(struct cpu *cp) 3813 { 3814 spitfire_private_t *sfprp; 3815 3816 ASSERT(CPU_PRIVATE(cp) == NULL); 3817 3818 /* 3819 * If the sf_private_cache has not been created, create it. 3820 */ 3821 if (sf_private_cache == NULL) { 3822 sf_private_cache = kmem_cache_create("sf_private_cache", 3823 sizeof (spitfire_private_t), S_ECACHE_MAX_LSIZE, NULL, 3824 NULL, NULL, NULL, NULL, 0); 3825 ASSERT(sf_private_cache); 3826 } 3827 3828 sfprp = CPU_PRIVATE(cp) = kmem_cache_alloc(sf_private_cache, KM_SLEEP); 3829 3830 bzero(sfprp, sizeof (spitfire_private_t)); 3831 3832 cpu_init_ecache_scrub_dr(cp); 3833 } 3834 3835 /* 3836 * Cpu private unitialization. Uninitialize the Ecache scrubber and 3837 * deallocate the scrubber data structures and cpu_private data structure. 3838 * For now, this function just calls cpu_unint_ecache_scrub_dr to uninit 3839 * the scrubber for the specified cpu. 3840 */ 3841 void 3842 cpu_uninit_private(struct cpu *cp) 3843 { 3844 ASSERT(CPU_PRIVATE(cp)); 3845 3846 cpu_uninit_ecache_scrub_dr(cp); 3847 kmem_cache_free(sf_private_cache, CPU_PRIVATE(cp)); 3848 CPU_PRIVATE(cp) = NULL; 3849 } 3850 3851 /* 3852 * initialize the ecache kstats for each cpu 3853 */ 3854 static void 3855 ecache_kstat_init(struct cpu *cp) 3856 { 3857 struct kstat *ksp; 3858 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc); 3859 3860 ASSERT(ssmp != NULL); 3861 3862 if ((ksp = kstat_create("unix", cp->cpu_id, "ecache_kstat", "misc", 3863 KSTAT_TYPE_NAMED, 3864 sizeof (ecache_kstat_t) / sizeof (kstat_named_t), 3865 KSTAT_FLAG_WRITABLE)) == NULL) { 3866 ssmp->ecache_ksp = NULL; 3867 cmn_err(CE_NOTE, "!ecache_kstat_init(%d) failed\n", cp->cpu_id); 3868 return; 3869 } 3870 3871 ssmp->ecache_ksp = ksp; 3872 bcopy(&ec_kstat_template, ksp->ks_data, sizeof (ecache_kstat_t)); 3873 kstat_install(ksp); 3874 } 3875 3876 /* 3877 * log the bad ecache information 3878 */ 3879 static void 3880 ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag, uint64_t paddr, int mpb, 3881 uint64_t afsr) 3882 { 3883 spitf_async_flt spf_flt; 3884 struct async_flt *aflt; 3885 int i; 3886 char *class; 3887 3888 bzero(&spf_flt, sizeof (spitf_async_flt)); 3889 aflt = &spf_flt.cmn_asyncflt; 3890 3891 for (i = 0; i < 8; i++) { 3892 spf_flt.flt_ec_data[i] = ec_data[i]; 3893 } 3894 3895 spf_flt.flt_ec_tag = ec_tag; 3896 3897 if (mpb < (sizeof (ec_action) / sizeof (ec_action[0]))) { 3898 spf_flt.flt_type = ec_action[mpb].ec_log_type; 3899 } else spf_flt.flt_type = (ushort_t)mpb; 3900 3901 aflt->flt_inst = CPU->cpu_id; 3902 aflt->flt_class = CPU_FAULT; 3903 aflt->flt_id = gethrtime_waitfree(); 3904 aflt->flt_addr = paddr; 3905 aflt->flt_stat = afsr; 3906 aflt->flt_panic = (uchar_t)ecache_scrub_panic; 3907 3908 switch (mpb) { 3909 case CPU_ECACHE_TAG_ERR: 3910 case CPU_ECACHE_ADDR_PAR_ERR: 3911 case CPU_ECACHE_ETP_ETS_ERR: 3912 case CPU_ECACHE_STATE_ERR: 3913 class = FM_EREPORT_CPU_USII_ESCRUB_TAG; 3914 break; 3915 default: 3916 class = FM_EREPORT_CPU_USII_ESCRUB_DATA; 3917 break; 3918 } 3919 3920 cpu_errorq_dispatch(class, (void *)&spf_flt, sizeof (spf_flt), 3921 ue_queue, aflt->flt_panic); 3922 3923 if (aflt->flt_panic) 3924 cmn_err(CE_PANIC, "ecache_scrub_panic set and bad E$" 3925 "line detected"); 3926 } 3927 3928 /* 3929 * Process an ecache error that occured during the E$ scrubbing. 3930 * We do the ecache scan to find the bad line, flush the bad line 3931 * and start the memscrubber to find any UE (in memory or in another cache) 3932 */ 3933 static uint64_t 3934 ecache_scrub_misc_err(int type, uint64_t afsr) 3935 { 3936 spitf_async_flt spf_flt; 3937 struct async_flt *aflt; 3938 uint64_t oafsr; 3939 3940 bzero(&spf_flt, sizeof (spitf_async_flt)); 3941 aflt = &spf_flt.cmn_asyncflt; 3942 3943 /* 3944 * Scan each line in the cache to look for the one 3945 * with bad parity 3946 */ 3947 aflt->flt_addr = AFLT_INV_ADDR; 3948 scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0], 3949 &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr); 3950 3951 if (oafsr & P_AFSR_CP) { 3952 uint64_t *cp_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 3953 *cp_afsr |= oafsr; 3954 } 3955 3956 /* 3957 * If we found a bad PA, update the state to indicate if it is 3958 * memory or I/O space. 3959 */ 3960 if (aflt->flt_addr != AFLT_INV_ADDR) { 3961 aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >> 3962 MMU_PAGESHIFT)) ? 1 : 0; 3963 } 3964 3965 spf_flt.flt_type = (ushort_t)type; 3966 3967 aflt->flt_inst = CPU->cpu_id; 3968 aflt->flt_class = CPU_FAULT; 3969 aflt->flt_id = gethrtime_waitfree(); 3970 aflt->flt_status = afsr; 3971 aflt->flt_panic = (uchar_t)ecache_scrub_panic; 3972 3973 /* 3974 * We have the bad line, flush that line and start 3975 * the memscrubber. 3976 */ 3977 if (spf_flt.flt_ec_lcnt > 0) { 3978 flushecacheline(P2ALIGN(aflt->flt_addr, 64), 3979 cpunodes[CPU->cpu_id].ecache_size); 3980 read_all_memscrub = 1; 3981 memscrub_run(); 3982 } 3983 3984 cpu_errorq_dispatch((type == CPU_ORPHAN_CP_ERR) ? 3985 FM_EREPORT_CPU_USII_CP : FM_EREPORT_CPU_USII_UNKNOWN, 3986 (void *)&spf_flt, sizeof (spf_flt), ue_queue, aflt->flt_panic); 3987 3988 return (oafsr); 3989 } 3990 3991 static void 3992 ecache_scrub_tag_err(uint64_t afsr, uchar_t state, uint32_t index) 3993 { 3994 ushort_t afsr_ets = (afsr & P_AFSR_ETS) >> P_AFSR_ETS_SHIFT; 3995 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc); 3996 ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data; 3997 uint64_t ec_tag, paddr, oafsr; 3998 ec_data_t ec_data[8]; 3999 int cpuid = CPU->cpu_id; 4000 uint32_t ec_set_size = cpunodes[cpuid].ecache_size / 4001 ecache_associativity; 4002 uint64_t *cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 4003 4004 get_ecache_dtag(P2ALIGN(index, 64), (uint64_t *)&ec_data[0], &ec_tag, 4005 &oafsr, cpu_afsr); 4006 paddr = ((ec_tag & cpu_ec_tag_mask) << cpu_ec_tag_shift) | 4007 (index % ec_set_size); 4008 4009 /* 4010 * E$ tag state has good parity 4011 */ 4012 if ((afsr_ets & cpu_ec_state_parity) == 0) { 4013 if (afsr_ets & cpu_ec_parity) { 4014 /* 4015 * E$ tag state bits indicate the line is clean, 4016 * invalidate the E$ tag and continue. 4017 */ 4018 if (!(state & cpu_ec_state_dirty)) { 4019 /* 4020 * Zero the tag and mark the state invalid 4021 * with good parity for the tag. 4022 */ 4023 if (isus2i || isus2e) 4024 write_hb_ec_tag_parity(index); 4025 else 4026 write_ec_tag_parity(index); 4027 4028 /* Sync with the dual tag */ 4029 flushecacheline(0, 4030 cpunodes[CPU->cpu_id].ecache_size); 4031 ec_ksp->tags_cleared.value.ul++; 4032 ecache_scrub_log(ec_data, ec_tag, paddr, 4033 CPU_ECACHE_TAG_ERR, afsr); 4034 return; 4035 } else { 4036 ecache_scrub_log(ec_data, ec_tag, paddr, 4037 CPU_ECACHE_ADDR_PAR_ERR, afsr); 4038 cmn_err(CE_PANIC, " E$ tag address has bad" 4039 " parity"); 4040 } 4041 } else if ((afsr_ets & cpu_ec_parity) == 0) { 4042 /* 4043 * ETS is zero but ETP is set 4044 */ 4045 ecache_scrub_log(ec_data, ec_tag, paddr, 4046 CPU_ECACHE_ETP_ETS_ERR, afsr); 4047 cmn_err(CE_PANIC, "AFSR.ETP is set and" 4048 " AFSR.ETS is zero"); 4049 } 4050 } else { 4051 /* 4052 * E$ tag state bit has a bad parity 4053 */ 4054 ecache_scrub_log(ec_data, ec_tag, paddr, 4055 CPU_ECACHE_STATE_ERR, afsr); 4056 cmn_err(CE_PANIC, "E$ tag state has bad parity"); 4057 } 4058 } 4059 4060 static void 4061 ecache_page_retire(void *arg) 4062 { 4063 uint64_t paddr = (uint64_t)arg; 4064 (void) page_retire(paddr, PR_UE); 4065 } 4066 4067 void 4068 sticksync_slave(void) 4069 {} 4070 4071 void 4072 sticksync_master(void) 4073 {} 4074 4075 /*ARGSUSED*/ 4076 void 4077 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t bpp) 4078 {} 4079 4080 void 4081 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected) 4082 { 4083 int status; 4084 ddi_fm_error_t de; 4085 4086 bzero(&de, sizeof (ddi_fm_error_t)); 4087 4088 de.fme_version = DDI_FME_VERSION; 4089 de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, 4090 FM_ENA_FMT1); 4091 de.fme_flag = expected; 4092 de.fme_bus_specific = (void *)aflt->flt_addr; 4093 status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de); 4094 4095 if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL)) 4096 aflt->flt_panic = 1; 4097 } 4098 4099 /*ARGSUSED*/ 4100 void 4101 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz, 4102 errorq_t *eqp, uint_t flag) 4103 { 4104 struct async_flt *aflt = (struct async_flt *)payload; 4105 4106 aflt->flt_erpt_class = error_class; 4107 errorq_dispatch(eqp, payload, payload_sz, flag); 4108 } 4109 4110 #define MAX_SIMM 8 4111 4112 struct ce_info { 4113 char name[UNUM_NAMLEN]; 4114 uint64_t intermittent_total; 4115 uint64_t persistent_total; 4116 uint64_t sticky_total; 4117 unsigned short leaky_bucket_cnt; 4118 }; 4119 4120 /* 4121 * Separately-defined structure for use in reporting the ce_info 4122 * to SunVTS without exposing the internal layout and implementation 4123 * of struct ce_info. 4124 */ 4125 static struct ecc_error_info ecc_error_info_data = { 4126 { "version", KSTAT_DATA_UINT32 }, 4127 { "maxcount", KSTAT_DATA_UINT32 }, 4128 { "count", KSTAT_DATA_UINT32 } 4129 }; 4130 static const size_t ecc_error_info_ndata = sizeof (ecc_error_info_data) / 4131 sizeof (struct kstat_named); 4132 4133 #if KSTAT_CE_UNUM_NAMLEN < UNUM_NAMLEN 4134 #error "Need to rev ecc_error_info version and update KSTAT_CE_UNUM_NAMLEN" 4135 #endif 4136 4137 struct ce_info *mem_ce_simm = NULL; 4138 size_t mem_ce_simm_size = 0; 4139 4140 /* 4141 * Default values for the number of CE's allowed per interval. 4142 * Interval is defined in minutes 4143 * SOFTERR_MIN_TIMEOUT is defined in microseconds 4144 */ 4145 #define SOFTERR_LIMIT_DEFAULT 2 4146 #define SOFTERR_INTERVAL_DEFAULT 1440 /* This is 24 hours */ 4147 #define SOFTERR_MIN_TIMEOUT (60 * MICROSEC) /* This is 1 minute */ 4148 #define TIMEOUT_NONE ((timeout_id_t)0) 4149 #define TIMEOUT_SET ((timeout_id_t)1) 4150 4151 /* 4152 * timeout identifer for leaky_bucket 4153 */ 4154 static timeout_id_t leaky_bucket_timeout_id = TIMEOUT_NONE; 4155 4156 /* 4157 * Tunables for maximum number of allowed CE's in a given time 4158 */ 4159 int ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT; 4160 int ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT; 4161 4162 void 4163 cpu_mp_init(void) 4164 { 4165 size_t size = cpu_aflt_size(); 4166 size_t i; 4167 kstat_t *ksp; 4168 4169 /* 4170 * Initialize the CE error handling buffers. 4171 */ 4172 mem_ce_simm_size = MAX_SIMM * max_ncpus; 4173 size = sizeof (struct ce_info) * mem_ce_simm_size; 4174 mem_ce_simm = kmem_zalloc(size, KM_SLEEP); 4175 4176 ksp = kstat_create("unix", 0, "ecc-info", "misc", 4177 KSTAT_TYPE_NAMED, ecc_error_info_ndata, KSTAT_FLAG_VIRTUAL); 4178 if (ksp != NULL) { 4179 ksp->ks_data = (struct kstat_named *)&ecc_error_info_data; 4180 ecc_error_info_data.version.value.ui32 = KSTAT_CE_INFO_VER; 4181 ecc_error_info_data.maxcount.value.ui32 = mem_ce_simm_size; 4182 ecc_error_info_data.count.value.ui32 = 0; 4183 kstat_install(ksp); 4184 } 4185 4186 for (i = 0; i < mem_ce_simm_size; i++) { 4187 struct kstat_ecc_mm_info *kceip; 4188 4189 kceip = kmem_zalloc(sizeof (struct kstat_ecc_mm_info), 4190 KM_SLEEP); 4191 ksp = kstat_create("mm", i, "ecc-info", "misc", 4192 KSTAT_TYPE_NAMED, 4193 sizeof (struct kstat_ecc_mm_info) / sizeof (kstat_named_t), 4194 KSTAT_FLAG_VIRTUAL); 4195 if (ksp != NULL) { 4196 /* 4197 * Re-declare ks_data_size to include room for the 4198 * UNUM name since we don't have KSTAT_FLAG_VAR_SIZE 4199 * set. 4200 */ 4201 ksp->ks_data_size = sizeof (struct kstat_ecc_mm_info) + 4202 KSTAT_CE_UNUM_NAMLEN; 4203 ksp->ks_data = kceip; 4204 kstat_named_init(&kceip->name, 4205 "name", KSTAT_DATA_STRING); 4206 kstat_named_init(&kceip->intermittent_total, 4207 "intermittent_total", KSTAT_DATA_UINT64); 4208 kstat_named_init(&kceip->persistent_total, 4209 "persistent_total", KSTAT_DATA_UINT64); 4210 kstat_named_init(&kceip->sticky_total, 4211 "sticky_total", KSTAT_DATA_UINT64); 4212 /* 4213 * Use the default snapshot routine as it knows how to 4214 * deal with named kstats with long strings. 4215 */ 4216 ksp->ks_update = ecc_kstat_update; 4217 kstat_install(ksp); 4218 } else { 4219 kmem_free(kceip, sizeof (struct kstat_ecc_mm_info)); 4220 } 4221 } 4222 } 4223 4224 /*ARGSUSED*/ 4225 static void 4226 leaky_bucket_timeout(void *arg) 4227 { 4228 int i; 4229 struct ce_info *psimm = mem_ce_simm; 4230 4231 for (i = 0; i < mem_ce_simm_size; i++) { 4232 if (psimm[i].leaky_bucket_cnt > 0) 4233 atomic_add_16(&psimm[i].leaky_bucket_cnt, -1); 4234 } 4235 add_leaky_bucket_timeout(); 4236 } 4237 4238 static void 4239 add_leaky_bucket_timeout(void) 4240 { 4241 long timeout_in_microsecs; 4242 4243 /* 4244 * create timeout for next leak. 4245 * 4246 * The timeout interval is calculated as follows 4247 * 4248 * (ecc_softerr_interval * 60 * MICROSEC) / ecc_softerr_limit 4249 * 4250 * ecc_softerr_interval is in minutes, so multiply this by 60 (seconds 4251 * in a minute), then multiply this by MICROSEC to get the interval 4252 * in microseconds. Divide this total by ecc_softerr_limit so that 4253 * the timeout interval is accurate to within a few microseconds. 4254 */ 4255 4256 if (ecc_softerr_limit <= 0) 4257 ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT; 4258 if (ecc_softerr_interval <= 0) 4259 ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT; 4260 4261 timeout_in_microsecs = ((int64_t)ecc_softerr_interval * 60 * MICROSEC) / 4262 ecc_softerr_limit; 4263 4264 if (timeout_in_microsecs < SOFTERR_MIN_TIMEOUT) 4265 timeout_in_microsecs = SOFTERR_MIN_TIMEOUT; 4266 4267 leaky_bucket_timeout_id = timeout(leaky_bucket_timeout, 4268 (void *)NULL, drv_usectohz((clock_t)timeout_in_microsecs)); 4269 } 4270 4271 /* 4272 * Legacy Correctable ECC Error Hash 4273 * 4274 * All of the code below this comment is used to implement a legacy array 4275 * which counted intermittent, persistent, and sticky CE errors by unum, 4276 * and then was later extended to publish the data as a kstat for SunVTS. 4277 * All of this code is replaced by FMA, and remains here until such time 4278 * that the UltraSPARC-I/II CPU code is converted to FMA, or is EOLed. 4279 * 4280 * Errors are saved in three buckets per-unum: 4281 * (1) sticky - scrub was unsuccessful, cannot be scrubbed 4282 * This could represent a problem, and is immediately printed out. 4283 * (2) persistent - was successfully scrubbed 4284 * These errors use the leaky bucket algorithm to determine 4285 * if there is a serious problem. 4286 * (3) intermittent - may have originated from the cpu or upa/safari bus, 4287 * and does not necessarily indicate any problem with the dimm itself, 4288 * is critical information for debugging new hardware. 4289 * Because we do not know if it came from the dimm, it would be 4290 * inappropriate to include these in the leaky bucket counts. 4291 * 4292 * If the E$ line was modified before the scrub operation began, then the 4293 * displacement flush at the beginning of scrubphys() will cause the modified 4294 * line to be written out, which will clean up the CE. Then, any subsequent 4295 * read will not cause an error, which will cause persistent errors to be 4296 * identified as intermittent. 4297 * 4298 * If a DIMM is going bad, it will produce true persistents as well as 4299 * false intermittents, so these intermittents can be safely ignored. 4300 * 4301 * If the error count is excessive for a DIMM, this function will return 4302 * PR_MCE, and the CPU module may then decide to remove that page from use. 4303 */ 4304 static int 4305 ce_count_unum(int status, int len, char *unum) 4306 { 4307 int i; 4308 struct ce_info *psimm = mem_ce_simm; 4309 int page_status = PR_OK; 4310 4311 ASSERT(psimm != NULL); 4312 4313 if (len <= 0 || 4314 (status & (ECC_STICKY | ECC_PERSISTENT | ECC_INTERMITTENT)) == 0) 4315 return (page_status); 4316 4317 /* 4318 * Initialize the leaky_bucket timeout 4319 */ 4320 if (casptr(&leaky_bucket_timeout_id, 4321 TIMEOUT_NONE, TIMEOUT_SET) == TIMEOUT_NONE) 4322 add_leaky_bucket_timeout(); 4323 4324 for (i = 0; i < mem_ce_simm_size; i++) { 4325 if (psimm[i].name[0] == '\0') { 4326 /* 4327 * Hit the end of the valid entries, add 4328 * a new one. 4329 */ 4330 (void) strncpy(psimm[i].name, unum, len); 4331 if (status & ECC_STICKY) { 4332 /* 4333 * Sticky - the leaky bucket is used to track 4334 * soft errors. Since a sticky error is a 4335 * hard error and likely to be retired soon, 4336 * we do not count it in the leaky bucket. 4337 */ 4338 psimm[i].leaky_bucket_cnt = 0; 4339 psimm[i].intermittent_total = 0; 4340 psimm[i].persistent_total = 0; 4341 psimm[i].sticky_total = 1; 4342 cmn_err(CE_WARN, 4343 "[AFT0] Sticky Softerror encountered " 4344 "on Memory Module %s\n", unum); 4345 page_status = PR_MCE; 4346 } else if (status & ECC_PERSISTENT) { 4347 psimm[i].leaky_bucket_cnt = 1; 4348 psimm[i].intermittent_total = 0; 4349 psimm[i].persistent_total = 1; 4350 psimm[i].sticky_total = 0; 4351 } else { 4352 /* 4353 * Intermittent - Because the scrub operation 4354 * cannot find the error in the DIMM, we will 4355 * not count these in the leaky bucket 4356 */ 4357 psimm[i].leaky_bucket_cnt = 0; 4358 psimm[i].intermittent_total = 1; 4359 psimm[i].persistent_total = 0; 4360 psimm[i].sticky_total = 0; 4361 } 4362 ecc_error_info_data.count.value.ui32++; 4363 break; 4364 } else if (strncmp(unum, psimm[i].name, len) == 0) { 4365 /* 4366 * Found an existing entry for the current 4367 * memory module, adjust the counts. 4368 */ 4369 if (status & ECC_STICKY) { 4370 psimm[i].sticky_total++; 4371 cmn_err(CE_WARN, 4372 "[AFT0] Sticky Softerror encountered " 4373 "on Memory Module %s\n", unum); 4374 page_status = PR_MCE; 4375 } else if (status & ECC_PERSISTENT) { 4376 int new_value; 4377 4378 new_value = atomic_add_16_nv( 4379 &psimm[i].leaky_bucket_cnt, 1); 4380 psimm[i].persistent_total++; 4381 if (new_value > ecc_softerr_limit) { 4382 cmn_err(CE_WARN, "[AFT0] Most recent %d" 4383 " soft errors from Memory Module" 4384 " %s exceed threshold (N=%d," 4385 " T=%dh:%02dm) triggering page" 4386 " retire", new_value, unum, 4387 ecc_softerr_limit, 4388 ecc_softerr_interval / 60, 4389 ecc_softerr_interval % 60); 4390 atomic_add_16( 4391 &psimm[i].leaky_bucket_cnt, -1); 4392 page_status = PR_MCE; 4393 } 4394 } else { /* Intermittent */ 4395 psimm[i].intermittent_total++; 4396 } 4397 break; 4398 } 4399 } 4400 4401 if (i >= mem_ce_simm_size) 4402 cmn_err(CE_CONT, "[AFT0] Softerror: mem_ce_simm[] out of " 4403 "space.\n"); 4404 4405 return (page_status); 4406 } 4407 4408 /* 4409 * Function to support counting of IO detected CEs. 4410 */ 4411 void 4412 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum) 4413 { 4414 int err; 4415 4416 err = ce_count_unum(ecc->flt_status, len, unum); 4417 if (err != PR_OK && automatic_page_removal) { 4418 (void) page_retire(ecc->flt_addr, err); 4419 } 4420 } 4421 4422 static int 4423 ecc_kstat_update(kstat_t *ksp, int rw) 4424 { 4425 struct kstat_ecc_mm_info *kceip = ksp->ks_data; 4426 struct ce_info *ceip = mem_ce_simm; 4427 int i = ksp->ks_instance; 4428 4429 if (rw == KSTAT_WRITE) 4430 return (EACCES); 4431 4432 ASSERT(ksp->ks_data != NULL); 4433 ASSERT(i < mem_ce_simm_size && i >= 0); 4434 4435 /* 4436 * Since we're not using locks, make sure that we don't get partial 4437 * data. The name is always copied before the counters are incremented 4438 * so only do this update routine if at least one of the counters is 4439 * non-zero, which ensures that ce_count_unum() is done, and the 4440 * string is fully copied. 4441 */ 4442 if (ceip[i].intermittent_total == 0 && 4443 ceip[i].persistent_total == 0 && 4444 ceip[i].sticky_total == 0) { 4445 /* 4446 * Uninitialized or partially initialized. Ignore. 4447 * The ks_data buffer was allocated via kmem_zalloc, 4448 * so no need to bzero it. 4449 */ 4450 return (0); 4451 } 4452 4453 kstat_named_setstr(&kceip->name, ceip[i].name); 4454 kceip->intermittent_total.value.ui64 = ceip[i].intermittent_total; 4455 kceip->persistent_total.value.ui64 = ceip[i].persistent_total; 4456 kceip->sticky_total.value.ui64 = ceip[i].sticky_total; 4457 4458 return (0); 4459 } 4460 4461 #define VIS_BLOCKSIZE 64 4462 4463 int 4464 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data) 4465 { 4466 int ret, watched; 4467 4468 watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 4469 ret = dtrace_blksuword32(addr, data, 0); 4470 if (watched) 4471 watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 4472 4473 return (ret); 4474 } 4475 4476 /*ARGSUSED*/ 4477 void 4478 cpu_faulted_enter(struct cpu *cp) 4479 { 4480 } 4481 4482 /*ARGSUSED*/ 4483 void 4484 cpu_faulted_exit(struct cpu *cp) 4485 { 4486 } 4487 4488 /*ARGSUSED*/ 4489 void 4490 mmu_init_kernel_pgsz(struct hat *hat) 4491 { 4492 } 4493 4494 size_t 4495 mmu_get_kernel_lpsize(size_t lpsize) 4496 { 4497 uint_t tte; 4498 4499 if (lpsize == 0) { 4500 /* no setting for segkmem_lpsize in /etc/system: use default */ 4501 return (MMU_PAGESIZE4M); 4502 } 4503 4504 for (tte = TTE8K; tte <= TTE4M; tte++) { 4505 if (lpsize == TTEBYTES(tte)) 4506 return (lpsize); 4507 } 4508 4509 return (TTEBYTES(TTE8K)); 4510 } 4511