1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/archsystm.h> 32 #include <sys/machparam.h> 33 #include <sys/machsystm.h> 34 #include <sys/cpu.h> 35 #include <sys/elf_SPARC.h> 36 #include <vm/hat_sfmmu.h> 37 #include <vm/page.h> 38 #include <sys/cpuvar.h> 39 #include <sys/spitregs.h> 40 #include <sys/async.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/dditypes.h> 44 #include <sys/sunddi.h> 45 #include <sys/cpu_module.h> 46 #include <sys/prom_debug.h> 47 #include <sys/vmsystm.h> 48 #include <sys/prom_plat.h> 49 #include <sys/sysmacros.h> 50 #include <sys/intreg.h> 51 #include <sys/machtrap.h> 52 #include <sys/ontrap.h> 53 #include <sys/ivintr.h> 54 #include <sys/atomic.h> 55 #include <sys/panic.h> 56 #include <sys/ndifm.h> 57 #include <sys/fm/protocol.h> 58 #include <sys/fm/util.h> 59 #include <sys/fm/cpu/UltraSPARC-II.h> 60 #include <sys/ddi.h> 61 #include <sys/ecc_kstat.h> 62 #include <sys/watchpoint.h> 63 #include <sys/dtrace.h> 64 #include <sys/errclassify.h> 65 66 uchar_t *ctx_pgsz_array = NULL; 67 68 /* 69 * Structure for the 8 byte ecache data dump and the associated AFSR state. 70 * There will be 8 of these structures used to dump an ecache line (64 bytes). 71 */ 72 typedef struct sf_ec_data_elm { 73 uint64_t ec_d8; 74 uint64_t ec_afsr; 75 } ec_data_t; 76 77 /* 78 * Define spitfire (Ultra I/II) specific asynchronous error structure 79 */ 80 typedef struct spitfire_async_flt { 81 struct async_flt cmn_asyncflt; /* common - see sun4u/sys/async.h */ 82 ushort_t flt_type; /* types of faults - cpu specific */ 83 ec_data_t flt_ec_data[8]; /* for E$ or mem dump/state */ 84 uint64_t flt_ec_tag; /* E$ tag info */ 85 int flt_ec_lcnt; /* number of bad E$ lines */ 86 ushort_t flt_sdbh; /* UDBH reg */ 87 ushort_t flt_sdbl; /* UDBL reg */ 88 } spitf_async_flt; 89 90 /* 91 * Prototypes for support routines in spitfire_asm.s: 92 */ 93 extern void flush_ecache(uint64_t physaddr, size_t size, size_t linesize); 94 extern uint64_t get_lsu(void); 95 extern void set_lsu(uint64_t ncc); 96 extern void get_ecache_dtag(uint32_t ecache_idx, uint64_t *data, uint64_t *tag, 97 uint64_t *oafsr, uint64_t *acc_afsr); 98 extern uint64_t check_ecache_line(uint32_t id, uint64_t *acc_afsr); 99 extern uint64_t get_ecache_tag(uint32_t id, uint64_t *nafsr, 100 uint64_t *acc_afsr); 101 extern uint64_t read_and_clear_afsr(); 102 extern void write_ec_tag_parity(uint32_t id); 103 extern void write_hb_ec_tag_parity(uint32_t id); 104 105 /* 106 * Spitfire module routines: 107 */ 108 static void cpu_async_log_err(void *flt); 109 /*PRINTFLIKE6*/ 110 static void cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt, 111 uint_t logflags, const char *endstr, const char *fmt, ...); 112 113 static void cpu_read_paddr(struct async_flt *aflt, short verbose, short ce_err); 114 static void cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum); 115 static void cpu_log_ecmem_info(spitf_async_flt *spf_flt); 116 117 static void log_ce_err(struct async_flt *aflt, char *unum); 118 static void log_ue_err(struct async_flt *aflt, char *unum); 119 static void check_misc_err(spitf_async_flt *spf_flt); 120 static ushort_t ecc_gen(uint_t high_bytes, uint_t low_bytes); 121 static int check_ecc(struct async_flt *aflt); 122 static uint_t get_cpu_status(uint64_t arg); 123 static uint64_t clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr); 124 static void scan_ecache(uint64_t *afar, ec_data_t *data, uint64_t *tag, 125 int *m, uint64_t *afsr); 126 static void ecache_kstat_init(struct cpu *cp); 127 static void ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag, 128 uint64_t paddr, int mpb, uint64_t); 129 static uint64_t ecache_scrub_misc_err(int, uint64_t); 130 static void ecache_scrub_tag_err(uint64_t, uchar_t, uint32_t); 131 static void ecache_page_retire(void *); 132 static int ecc_kstat_update(kstat_t *ksp, int rw); 133 static int ce_count_unum(int status, int len, char *unum); 134 static void add_leaky_bucket_timeout(void); 135 static int synd_to_synd_code(int synd_status, ushort_t synd); 136 137 extern uint_t read_all_memscrub; 138 extern void memscrub_run(void); 139 140 static uchar_t isus2i; /* set if sabre */ 141 static uchar_t isus2e; /* set if hummingbird */ 142 143 /* 144 * Default ecache mask and shift settings for Spitfire. If we detect a 145 * different CPU implementation, we will modify these values at boot time. 146 */ 147 static uint64_t cpu_ec_tag_mask = S_ECTAG_MASK; 148 static uint64_t cpu_ec_state_mask = S_ECSTATE_MASK; 149 static uint64_t cpu_ec_par_mask = S_ECPAR_MASK; 150 static int cpu_ec_par_shift = S_ECPAR_SHIFT; 151 static int cpu_ec_tag_shift = S_ECTAG_SHIFT; 152 static int cpu_ec_state_shift = S_ECSTATE_SHIFT; 153 static uchar_t cpu_ec_state_exl = S_ECSTATE_EXL; 154 static uchar_t cpu_ec_state_mod = S_ECSTATE_MOD; 155 static uchar_t cpu_ec_state_shr = S_ECSTATE_SHR; 156 static uchar_t cpu_ec_state_own = S_ECSTATE_OWN; 157 158 /* 159 * Default ecache state bits for Spitfire. These individual bits indicate if 160 * the given line is in any of the valid or modified states, respectively. 161 * Again, we modify these at boot if we detect a different CPU. 162 */ 163 static uchar_t cpu_ec_state_valid = S_ECSTATE_VALID; 164 static uchar_t cpu_ec_state_dirty = S_ECSTATE_DIRTY; 165 static uchar_t cpu_ec_parity = S_EC_PARITY; 166 static uchar_t cpu_ec_state_parity = S_ECSTATE_PARITY; 167 168 /* 169 * This table is used to determine which bit(s) is(are) bad when an ECC 170 * error occurrs. The array is indexed an 8-bit syndrome. The entries 171 * of this array have the following semantics: 172 * 173 * 00-63 The number of the bad bit, when only one bit is bad. 174 * 64 ECC bit C0 is bad. 175 * 65 ECC bit C1 is bad. 176 * 66 ECC bit C2 is bad. 177 * 67 ECC bit C3 is bad. 178 * 68 ECC bit C4 is bad. 179 * 69 ECC bit C5 is bad. 180 * 70 ECC bit C6 is bad. 181 * 71 ECC bit C7 is bad. 182 * 72 Two bits are bad. 183 * 73 Three bits are bad. 184 * 74 Four bits are bad. 185 * 75 More than Four bits are bad. 186 * 76 NO bits are bad. 187 * Based on "Galaxy Memory Subsystem SPECIFICATION" rev 0.6, pg. 28. 188 */ 189 190 #define C0 64 191 #define C1 65 192 #define C2 66 193 #define C3 67 194 #define C4 68 195 #define C5 69 196 #define C6 70 197 #define C7 71 198 #define M2 72 199 #define M3 73 200 #define M4 74 201 #define MX 75 202 #define NA 76 203 204 #define SYND_IS_SINGLE_BIT_DATA(synd_code) ((synd_code >= 0) && \ 205 (synd_code < C0)) 206 #define SYND_IS_SINGLE_BIT_CHK(synd_code) ((synd_code >= C0) && \ 207 (synd_code <= C7)) 208 209 static char ecc_syndrome_tab[] = 210 { 211 NA, C0, C1, M2, C2, M2, M2, M3, C3, M2, M2, M3, M2, M3, M3, M4, 212 C4, M2, M2, 32, M2, 57, MX, M2, M2, 37, 49, M2, 40, M2, M2, 44, 213 C5, M2, M2, 33, M2, 61, 4, M2, M2, MX, 53, M2, 45, M2, M2, 41, 214 M2, 0, 1, M2, 10, M2, M2, MX, 15, M2, M2, MX, M2, M3, M3, M2, 215 C6, M2, M2, 42, M2, 59, 39, M2, M2, MX, 51, M2, 34, M2, M2, 46, 216 M2, 25, 29, M2, 27, M4, M2, MX, 31, M2, M4, MX, M2, MX, MX, M2, 217 M2, MX, 36, M2, 7, M2, M2, 54, MX, M2, M2, 62, M2, 48, 56, M2, 218 M3, M2, M2, MX, M2, MX, 22, M2, M2, 18, MX, M2, M3, M2, M2, MX, 219 C7, M2, M2, 47, M2, 63, MX, M2, M2, 6, 55, M2, 35, M2, M2, 43, 220 M2, 5, MX, M2, MX, M2, M2, 50, 38, M2, M2, 58, M2, 52, 60, M2, 221 M2, 17, 21, M2, 19, M4, M2, MX, 23, M2, M4, MX, M2, MX, MX, M2, 222 M3, M2, M2, MX, M2, MX, 30, M2, M2, 26, MX, M2, M3, M2, M2, MX, 223 M2, 8, 13, M2, 2, M2, M2, M3, 3, M2, M2, M3, M2, MX, MX, M2, 224 M3, M2, M2, M3, M2, MX, 16, M2, M2, 20, MX, M2, MX, M2, M2, MX, 225 M3, M2, M2, M3, M2, MX, 24, M2, M2, 28, MX, M2, MX, M2, M2, MX, 226 M4, 12, 9, M2, 14, M2, M2, MX, 11, M2, M2, MX, M2, MX, MX, M4 227 }; 228 229 #define SYND_TBL_SIZE 256 230 231 /* 232 * Hack for determining UDBH/UDBL, for later cpu-specific error reporting. 233 * Cannot use bit 3 in afar, because it is a valid bit on a Sabre/Hummingbird. 234 */ 235 #define UDBL_REG 0x8000 236 #define UDBL(synd) ((synd & UDBL_REG) >> 15) 237 #define SYND(synd) (synd & 0x7FFF) 238 239 /* 240 * These error types are specific to Spitfire and are used internally for the 241 * spitfire fault structure flt_type field. 242 */ 243 #define CPU_UE_ERR 0 /* uncorrectable errors - UEs */ 244 #define CPU_EDP_LDP_ERR 1 /* LDP or EDP parity error */ 245 #define CPU_WP_ERR 2 /* WP parity error */ 246 #define CPU_BTO_BERR_ERR 3 /* bus timeout errors */ 247 #define CPU_PANIC_CP_ERR 4 /* cp error from panic polling */ 248 #define CPU_TRAPPING_CP_ERR 5 /* for sabre/hbird only, cp error */ 249 #define CPU_BADLINE_CI_ERR 6 /* E$ clean_bad line when idle */ 250 #define CPU_BADLINE_CB_ERR 7 /* E$ clean_bad line when busy */ 251 #define CPU_BADLINE_DI_ERR 8 /* E$ dirty_bad line when idle */ 252 #define CPU_BADLINE_DB_ERR 9 /* E$ dirty_bad line when busy */ 253 #define CPU_ORPHAN_CP_ERR 10 /* Orphan CP error */ 254 #define CPU_ECACHE_ADDR_PAR_ERR 11 /* Ecache Address parity error */ 255 #define CPU_ECACHE_STATE_ERR 12 /* Ecache state error */ 256 #define CPU_ECACHE_ETP_ETS_ERR 13 /* ETP set but ETS is zero */ 257 #define CPU_ECACHE_TAG_ERR 14 /* Scrub the E$ tag, if state clean */ 258 #define CPU_ADDITIONAL_ERR 15 /* Additional errors occurred */ 259 260 /* 261 * Macro to access the "Spitfire cpu private" data structure. 262 */ 263 #define CPU_PRIVATE_PTR(cp, x) (&(((spitfire_private_t *)CPU_PRIVATE(cp))->x)) 264 265 /* 266 * set to 0 to disable automatic retiring of pages on 267 * DIMMs that have excessive soft errors 268 */ 269 int automatic_page_removal = 1; 270 271 /* 272 * Heuristic for figuring out which module to replace. 273 * Relative likelihood that this P_SYND indicates that this module is bad. 274 * We call it a "score", though, not a relative likelihood. 275 * 276 * Step 1. 277 * Assign a score to each byte of P_SYND according to the following rules: 278 * If no bits on (0x00) or all bits on (0xFF), then give it a 5. 279 * If one bit on, give it a 95. 280 * If seven bits on, give it a 10. 281 * If two bits on: 282 * in different nybbles, a 90 283 * in same nybble, but unaligned, 85 284 * in same nybble and as an aligned pair, 80 285 * If six bits on, look at the bits that are off: 286 * in same nybble and as an aligned pair, 15 287 * in same nybble, but unaligned, 20 288 * in different nybbles, a 25 289 * If three bits on: 290 * in diferent nybbles, no aligned pairs, 75 291 * in diferent nybbles, one aligned pair, 70 292 * in the same nybble, 65 293 * If five bits on, look at the bits that are off: 294 * in the same nybble, 30 295 * in diferent nybbles, one aligned pair, 35 296 * in diferent nybbles, no aligned pairs, 40 297 * If four bits on: 298 * all in one nybble, 45 299 * as two aligned pairs, 50 300 * one aligned pair, 55 301 * no aligned pairs, 60 302 * 303 * Step 2: 304 * Take the higher of the two scores (one for each byte) as the score 305 * for the module. 306 * 307 * Print the score for each module, and field service should replace the 308 * module with the highest score. 309 */ 310 311 /* 312 * In the table below, the first row/column comment indicates the 313 * number of bits on in that nybble; the second row/column comment is 314 * the hex digit. 315 */ 316 317 static int 318 p_synd_score_table[256] = { 319 /* 0 1 1 2 1 2 2 3 1 2 2 3 2 3 3 4 */ 320 /* 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F */ 321 /* 0 0 */ 5, 95, 95, 80, 95, 85, 85, 65, 95, 85, 85, 65, 80, 65, 65, 45, 322 /* 1 1 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 323 /* 1 2 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 324 /* 2 3 */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15, 325 /* 1 4 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 326 /* 2 5 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 327 /* 2 6 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 328 /* 3 7 */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 329 /* 1 8 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 330 /* 2 9 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 331 /* 2 A */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 332 /* 3 B */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 333 /* 2 C */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15, 334 /* 3 D */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 335 /* 3 E */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 336 /* 4 F */ 45, 30, 30, 15, 30, 20, 20, 10, 30, 20, 20, 10, 15, 10, 10, 5, 337 }; 338 339 int 340 ecc_psynd_score(ushort_t p_synd) 341 { 342 int i, j, a, b; 343 344 i = p_synd & 0xFF; 345 j = (p_synd >> 8) & 0xFF; 346 347 a = p_synd_score_table[i]; 348 b = p_synd_score_table[j]; 349 350 return (a > b ? a : b); 351 } 352 353 /* 354 * Async Fault Logging 355 * 356 * To ease identifying, reading, and filtering async fault log messages, the 357 * label [AFT#] is now prepended to each async fault message. These messages 358 * and the logging rules are implemented by cpu_aflt_log(), below. 359 * 360 * [AFT0] - Tag for log messages that are associated with corrected ECC errors. 361 * This includes both corrected ECC memory and ecache faults. 362 * 363 * [AFT1] - Tag for log messages that are not ECC corrected (i.e. everything 364 * else except CE errors) with a priority of 1 (highest). This tag 365 * is also used for panic messages that result from an async fault. 366 * 367 * [AFT2] - These are lower priority diagnostic messages for uncorrected ECC 368 * [AFT3] or parity errors. For example, AFT2 is used for the actual dump 369 * of the E-$ data and tags. 370 * 371 * In a non-DEBUG kernel, AFT > 1 logs will be sent to the system log but not 372 * printed on the console. To send all AFT logs to both the log and the 373 * console, set aft_verbose = 1. 374 */ 375 376 #define CPU_FLTCPU 0x0001 /* print flt_inst as a CPU id */ 377 #define CPU_SPACE 0x0002 /* print flt_status (data or instr) */ 378 #define CPU_ERRID 0x0004 /* print flt_id */ 379 #define CPU_TL 0x0008 /* print flt_tl */ 380 #define CPU_ERRID_FIRST 0x0010 /* print flt_id first in message */ 381 #define CPU_AFSR 0x0020 /* print flt_stat as decoded %afsr */ 382 #define CPU_AFAR 0x0040 /* print flt_addr as %afar */ 383 #define CPU_AF_PSYND 0x0080 /* print flt_stat %afsr.PSYND */ 384 #define CPU_AF_ETS 0x0100 /* print flt_stat %afsr.ETS */ 385 #define CPU_UDBH 0x0200 /* print flt_sdbh and syndrome */ 386 #define CPU_UDBL 0x0400 /* print flt_sdbl and syndrome */ 387 #define CPU_FAULTPC 0x0800 /* print flt_pc */ 388 #define CPU_SYND 0x1000 /* print flt_synd and unum */ 389 390 #define CMN_LFLAGS (CPU_FLTCPU | CPU_SPACE | CPU_ERRID | CPU_TL | \ 391 CPU_AFSR | CPU_AFAR | CPU_AF_PSYND | \ 392 CPU_AF_ETS | CPU_UDBH | CPU_UDBL | \ 393 CPU_FAULTPC) 394 #define UE_LFLAGS (CMN_LFLAGS | CPU_SYND) 395 #define CE_LFLAGS (UE_LFLAGS & ~CPU_UDBH & ~CPU_UDBL & ~CPU_TL & \ 396 ~CPU_SPACE) 397 #define PARERR_LFLAGS (CMN_LFLAGS) 398 #define WP_LFLAGS (CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL) 399 #define CP_LFLAGS (CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL & \ 400 ~CPU_FLTCPU & ~CPU_FAULTPC) 401 #define BERRTO_LFLAGS (CMN_LFLAGS) 402 #define NO_LFLAGS (0) 403 404 #define AFSR_FMTSTR0 "\020\1ME" 405 #define AFSR_FMTSTR1 "\020\040PRIV\037ISAP\036ETP\035IVUE\034TO" \ 406 "\033BERR\032LDP\031CP\030WP\027EDP\026UE\025CE" 407 #define UDB_FMTSTR "\020\012UE\011CE" 408 409 /* 410 * Maximum number of contexts for Spitfire. 411 */ 412 #define MAX_NCTXS (1 << 13) 413 414 /* 415 * Save the cache bootup state for use when internal 416 * caches are to be re-enabled after an error occurs. 417 */ 418 uint64_t cache_boot_state = 0; 419 420 /* 421 * PA[31:0] represent Displacement in UPA configuration space. 422 */ 423 uint_t root_phys_addr_lo_mask = 0xffffffff; 424 425 /* 426 * Spitfire legacy globals 427 */ 428 int itlb_entries; 429 int dtlb_entries; 430 431 void 432 cpu_setup(void) 433 { 434 extern int page_retire_messages; 435 extern int page_retire_first_ue; 436 extern int at_flags; 437 #if defined(SF_ERRATA_57) 438 extern caddr_t errata57_limit; 439 #endif 440 extern int disable_text_largepages; 441 extern int disable_initdata_largepages; 442 443 cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT); 444 445 at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1; 446 447 /* 448 * Spitfire isn't currently FMA-aware, so we have to enable the 449 * page retirement messages. We also change the default policy 450 * for UE retirement to allow clearing of transient errors. 451 */ 452 page_retire_messages = 1; 453 page_retire_first_ue = 0; 454 455 /* 456 * save the cache bootup state. 457 */ 458 cache_boot_state = get_lsu() & (LSU_IC | LSU_DC); 459 460 /* 461 * Use the maximum number of contexts available for Spitfire unless 462 * it has been tuned for debugging. 463 * We are checking against 0 here since this value can be patched 464 * while booting. It can not be patched via /etc/system since it 465 * will be patched too late and thus cause the system to panic. 466 */ 467 if (nctxs == 0) 468 nctxs = MAX_NCTXS; 469 470 if (use_page_coloring) { 471 do_pg_coloring = 1; 472 if (use_virtual_coloring) 473 do_virtual_coloring = 1; 474 } 475 476 /* 477 * Tune pp_slots to use up to 1/8th of the tlb entries. 478 */ 479 pp_slots = MIN(8, MAXPP_SLOTS); 480 481 /* 482 * Block stores invalidate all pages of the d$ so pagecopy 483 * et. al. do not need virtual translations with virtual 484 * coloring taken into consideration. 485 */ 486 pp_consistent_coloring = 0; 487 488 isa_list = 489 "sparcv9+vis sparcv9 " 490 "sparcv8plus+vis sparcv8plus " 491 "sparcv8 sparcv8-fsmuld sparcv7 sparc"; 492 493 cpu_hwcap_flags = AV_SPARC_VIS; 494 495 /* 496 * On Spitfire, there's a hole in the address space 497 * that we must never map (the hardware only support 44-bits of 498 * virtual address). Later CPUs are expected to have wider 499 * supported address ranges. 500 * 501 * See address map on p23 of the UltraSPARC 1 user's manual. 502 */ 503 hole_start = (caddr_t)0x80000000000ull; 504 hole_end = (caddr_t)0xfffff80000000000ull; 505 506 /* 507 * A spitfire call bug requires us to be a further 4Gbytes of 508 * firewall from the spec. 509 * 510 * See Spitfire Errata #21 511 */ 512 hole_start = (caddr_t)((uintptr_t)hole_start - (1ul << 32)); 513 hole_end = (caddr_t)((uintptr_t)hole_end + (1ul << 32)); 514 515 /* 516 * The kpm mapping window. 517 * kpm_size: 518 * The size of a single kpm range. 519 * The overall size will be: kpm_size * vac_colors. 520 * kpm_vbase: 521 * The virtual start address of the kpm range within the kernel 522 * virtual address space. kpm_vbase has to be kpm_size aligned. 523 */ 524 kpm_size = (size_t)(2ull * 1024 * 1024 * 1024 * 1024); /* 2TB */ 525 kpm_size_shift = 41; 526 kpm_vbase = (caddr_t)0xfffffa0000000000ull; /* 16EB - 6TB */ 527 528 #if defined(SF_ERRATA_57) 529 errata57_limit = (caddr_t)0x80000000ul; 530 #endif 531 532 /* 533 * Allow only 8K, 64K and 4M pages for text by default. 534 * Allow only 8K and 64K page for initialized data segments by 535 * default. 536 */ 537 disable_text_largepages = (1 << TTE512K) | (1 << TTE32M) | 538 (1 << TTE256M); 539 disable_initdata_largepages = (1 << TTE512K) | (1 << TTE4M) | 540 (1 << TTE32M) | (1 << TTE256M); 541 } 542 543 static int 544 getintprop(pnode_t node, char *name, int deflt) 545 { 546 int value; 547 548 switch (prom_getproplen(node, name)) { 549 case 0: 550 value = 1; /* boolean properties */ 551 break; 552 553 case sizeof (int): 554 (void) prom_getprop(node, name, (caddr_t)&value); 555 break; 556 557 default: 558 value = deflt; 559 break; 560 } 561 562 return (value); 563 } 564 565 /* 566 * Set the magic constants of the implementation. 567 */ 568 void 569 cpu_fiximp(pnode_t dnode) 570 { 571 extern int vac_size, vac_shift; 572 extern uint_t vac_mask; 573 extern int dcache_line_mask; 574 int i, a; 575 static struct { 576 char *name; 577 int *var; 578 } prop[] = { 579 "dcache-size", &dcache_size, 580 "dcache-line-size", &dcache_linesize, 581 "icache-size", &icache_size, 582 "icache-line-size", &icache_linesize, 583 "ecache-size", &ecache_size, 584 "ecache-line-size", &ecache_alignsize, 585 "ecache-associativity", &ecache_associativity, 586 "#itlb-entries", &itlb_entries, 587 "#dtlb-entries", &dtlb_entries, 588 }; 589 590 for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) { 591 if ((a = getintprop(dnode, prop[i].name, -1)) != -1) { 592 *prop[i].var = a; 593 } 594 } 595 596 ecache_setsize = ecache_size / ecache_associativity; 597 598 vac_size = S_VAC_SIZE; 599 vac_mask = MMU_PAGEMASK & (vac_size - 1); 600 i = 0; a = vac_size; 601 while (a >>= 1) 602 ++i; 603 vac_shift = i; 604 shm_alignment = vac_size; 605 vac = 1; 606 607 dcache_line_mask = (dcache_size - 1) & ~(dcache_linesize - 1); 608 609 /* 610 * UltraSPARC I & II have ecache sizes running 611 * as follows: .25 MB, .5 MB, 1 MB, 2 MB, 4 MB 612 * and 8 MB. Adjust the copyin/copyout limits 613 * according to the cache size. The magic number 614 * of VIS_COPY_THRESHOLD comes from the copyin/copyout code 615 * and its floor of VIS_COPY_THRESHOLD bytes before it will use 616 * VIS instructions. 617 * 618 * We assume that all CPUs on the system have the same size 619 * ecache. We're also called very early in the game. 620 * /etc/system will be parsed *after* we're called so 621 * these values can be overwritten. 622 */ 623 624 hw_copy_limit_1 = VIS_COPY_THRESHOLD; 625 if (ecache_size <= 524288) { 626 hw_copy_limit_2 = VIS_COPY_THRESHOLD; 627 hw_copy_limit_4 = VIS_COPY_THRESHOLD; 628 hw_copy_limit_8 = VIS_COPY_THRESHOLD; 629 } else if (ecache_size == 1048576) { 630 hw_copy_limit_2 = 1024; 631 hw_copy_limit_4 = 1280; 632 hw_copy_limit_8 = 1536; 633 } else if (ecache_size == 2097152) { 634 hw_copy_limit_2 = 1536; 635 hw_copy_limit_4 = 2048; 636 hw_copy_limit_8 = 2560; 637 } else if (ecache_size == 4194304) { 638 hw_copy_limit_2 = 2048; 639 hw_copy_limit_4 = 2560; 640 hw_copy_limit_8 = 3072; 641 } else { 642 hw_copy_limit_2 = 2560; 643 hw_copy_limit_4 = 3072; 644 hw_copy_limit_8 = 3584; 645 } 646 } 647 648 /* 649 * Called by setcpudelay 650 */ 651 void 652 cpu_init_tick_freq(void) 653 { 654 /* 655 * Determine the cpu frequency by calling 656 * tod_get_cpufrequency. Use an approximate freqency 657 * value computed by the prom if the tod module 658 * is not initialized and loaded yet. 659 */ 660 if (tod_ops.tod_get_cpufrequency != NULL) { 661 mutex_enter(&tod_lock); 662 sys_tick_freq = tod_ops.tod_get_cpufrequency(); 663 mutex_exit(&tod_lock); 664 } else { 665 #if defined(HUMMINGBIRD) 666 /* 667 * the hummingbird version of %stick is used as the basis for 668 * low level timing; this provides an independent constant-rate 669 * clock for general system use, and frees power mgmt to set 670 * various cpu clock speeds. 671 */ 672 if (system_clock_freq == 0) 673 cmn_err(CE_PANIC, "invalid system_clock_freq 0x%lx", 674 system_clock_freq); 675 sys_tick_freq = system_clock_freq; 676 #else /* SPITFIRE */ 677 sys_tick_freq = cpunodes[CPU->cpu_id].clock_freq; 678 #endif 679 } 680 } 681 682 683 void shipit(int upaid); 684 extern uint64_t xc_tick_limit; 685 extern uint64_t xc_tick_jump_limit; 686 687 #ifdef SEND_MONDO_STATS 688 uint64_t x_early[NCPU][64]; 689 #endif 690 691 /* 692 * Note: A version of this function is used by the debugger via the KDI, 693 * and must be kept in sync with this version. Any changes made to this 694 * function to support new chips or to accomodate errata must also be included 695 * in the KDI-specific version. See spitfire_kdi.c. 696 */ 697 void 698 send_one_mondo(int cpuid) 699 { 700 uint64_t idsr, starttick, endtick; 701 int upaid, busy, nack; 702 uint64_t tick, tick_prev; 703 ulong_t ticks; 704 705 CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 706 upaid = CPUID_TO_UPAID(cpuid); 707 tick = starttick = gettick(); 708 shipit(upaid); 709 endtick = starttick + xc_tick_limit; 710 busy = nack = 0; 711 for (;;) { 712 idsr = getidsr(); 713 if (idsr == 0) 714 break; 715 /* 716 * When we detect an irregular tick jump, we adjust 717 * the timer window to the current tick value. 718 */ 719 tick_prev = tick; 720 tick = gettick(); 721 ticks = tick - tick_prev; 722 if (ticks > xc_tick_jump_limit) { 723 endtick = tick + xc_tick_limit; 724 } else if (tick > endtick) { 725 if (panic_quiesce) 726 return; 727 cmn_err(CE_PANIC, 728 "send mondo timeout (target 0x%x) [%d NACK %d BUSY]", 729 upaid, nack, busy); 730 } 731 if (idsr & IDSR_BUSY) { 732 busy++; 733 continue; 734 } 735 drv_usecwait(1); 736 shipit(upaid); 737 nack++; 738 busy = 0; 739 } 740 #ifdef SEND_MONDO_STATS 741 x_early[getprocessorid()][highbit(gettick() - starttick) - 1]++; 742 #endif 743 } 744 745 void 746 send_mondo_set(cpuset_t set) 747 { 748 int i; 749 750 for (i = 0; i < NCPU; i++) 751 if (CPU_IN_SET(set, i)) { 752 send_one_mondo(i); 753 CPUSET_DEL(set, i); 754 if (CPUSET_ISNULL(set)) 755 break; 756 } 757 } 758 759 void 760 syncfpu(void) 761 { 762 } 763 764 /* 765 * Determine the size of the CPU module's error structure in bytes. This is 766 * called once during boot to initialize the error queues. 767 */ 768 int 769 cpu_aflt_size(void) 770 { 771 /* 772 * We need to determine whether this is a sabre, Hummingbird or a 773 * Spitfire/Blackbird impl and set the appropriate state variables for 774 * ecache tag manipulation. We can't do this in cpu_setup() as it is 775 * too early in the boot flow and the cpunodes are not initialized. 776 * This routine will be called once after cpunodes[] is ready, so do 777 * it here. 778 */ 779 if (cpunodes[CPU->cpu_id].implementation == SABRE_IMPL) { 780 isus2i = 1; 781 cpu_ec_tag_mask = SB_ECTAG_MASK; 782 cpu_ec_state_mask = SB_ECSTATE_MASK; 783 cpu_ec_par_mask = SB_ECPAR_MASK; 784 cpu_ec_par_shift = SB_ECPAR_SHIFT; 785 cpu_ec_tag_shift = SB_ECTAG_SHIFT; 786 cpu_ec_state_shift = SB_ECSTATE_SHIFT; 787 cpu_ec_state_exl = SB_ECSTATE_EXL; 788 cpu_ec_state_mod = SB_ECSTATE_MOD; 789 790 /* These states do not exist in sabre - set to 0xFF */ 791 cpu_ec_state_shr = 0xFF; 792 cpu_ec_state_own = 0xFF; 793 794 cpu_ec_state_valid = SB_ECSTATE_VALID; 795 cpu_ec_state_dirty = SB_ECSTATE_DIRTY; 796 cpu_ec_state_parity = SB_ECSTATE_PARITY; 797 cpu_ec_parity = SB_EC_PARITY; 798 } else if (cpunodes[CPU->cpu_id].implementation == HUMMBRD_IMPL) { 799 isus2e = 1; 800 cpu_ec_tag_mask = HB_ECTAG_MASK; 801 cpu_ec_state_mask = HB_ECSTATE_MASK; 802 cpu_ec_par_mask = HB_ECPAR_MASK; 803 cpu_ec_par_shift = HB_ECPAR_SHIFT; 804 cpu_ec_tag_shift = HB_ECTAG_SHIFT; 805 cpu_ec_state_shift = HB_ECSTATE_SHIFT; 806 cpu_ec_state_exl = HB_ECSTATE_EXL; 807 cpu_ec_state_mod = HB_ECSTATE_MOD; 808 809 /* These states do not exist in hummingbird - set to 0xFF */ 810 cpu_ec_state_shr = 0xFF; 811 cpu_ec_state_own = 0xFF; 812 813 cpu_ec_state_valid = HB_ECSTATE_VALID; 814 cpu_ec_state_dirty = HB_ECSTATE_DIRTY; 815 cpu_ec_state_parity = HB_ECSTATE_PARITY; 816 cpu_ec_parity = HB_EC_PARITY; 817 } 818 819 return (sizeof (spitf_async_flt)); 820 } 821 822 823 /* 824 * Correctable ecc error trap handler 825 */ 826 /*ARGSUSED*/ 827 void 828 cpu_ce_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr, 829 uint_t p_afsr_high, uint_t p_afar_high) 830 { 831 ushort_t sdbh, sdbl; 832 ushort_t e_syndh, e_syndl; 833 spitf_async_flt spf_flt; 834 struct async_flt *ecc; 835 int queue = 1; 836 837 uint64_t t_afar = p_afar; 838 uint64_t t_afsr = p_afsr; 839 840 /* 841 * Note: the Spitfire data buffer error registers 842 * (upper and lower halves) are or'ed into the upper 843 * word of the afsr by ce_err(). 844 */ 845 sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF); 846 sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF); 847 848 e_syndh = (uchar_t)(sdbh & (uint_t)P_DER_E_SYND); 849 e_syndl = (uchar_t)(sdbl & (uint_t)P_DER_E_SYND); 850 851 t_afsr &= S_AFSR_MASK; 852 t_afar &= SABRE_AFAR_PA; /* must use Sabre AFAR mask */ 853 854 /* Setup the async fault structure */ 855 bzero(&spf_flt, sizeof (spitf_async_flt)); 856 ecc = (struct async_flt *)&spf_flt; 857 ecc->flt_id = gethrtime_waitfree(); 858 ecc->flt_stat = t_afsr; 859 ecc->flt_addr = t_afar; 860 ecc->flt_status = ECC_C_TRAP; 861 ecc->flt_bus_id = getprocessorid(); 862 ecc->flt_inst = CPU->cpu_id; 863 ecc->flt_pc = (caddr_t)rp->r_pc; 864 ecc->flt_func = log_ce_err; 865 ecc->flt_in_memory = 866 (pf_is_memory(ecc->flt_addr >> MMU_PAGESHIFT)) ? 1: 0; 867 spf_flt.flt_sdbh = sdbh; 868 spf_flt.flt_sdbl = sdbl; 869 870 /* 871 * Check for fatal conditions. 872 */ 873 check_misc_err(&spf_flt); 874 875 /* 876 * Pananoid checks for valid AFSR and UDBs 877 */ 878 if ((t_afsr & P_AFSR_CE) == 0) { 879 cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS, 880 "** Panic due to CE bit not set in the AFSR", 881 " Corrected Memory Error on"); 882 } 883 884 /* 885 * We want to skip logging only if ALL the following 886 * conditions are true: 887 * 888 * 1. There is only one error 889 * 2. That error is a correctable memory error 890 * 3. The error is caused by the memory scrubber (in which case 891 * the error will have occurred under on_trap protection) 892 * 4. The error is on a retired page 893 * 894 * Note: OT_DATA_EC is used places other than the memory scrubber. 895 * However, none of those errors should occur on a retired page. 896 */ 897 if ((ecc->flt_stat & (S_AFSR_ALL_ERRS & ~P_AFSR_ME)) == P_AFSR_CE && 898 curthread->t_ontrap != NULL) { 899 900 if (curthread->t_ontrap->ot_prot & OT_DATA_EC) { 901 if (page_retire_check(ecc->flt_addr, NULL) == 0) { 902 queue = 0; 903 } 904 } 905 } 906 907 if (((sdbh & P_DER_CE) == 0) && ((sdbl & P_DER_CE) == 0)) { 908 cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS, 909 "** Panic due to CE bits not set in the UDBs", 910 " Corrected Memory Error on"); 911 } 912 913 if ((sdbh >> 8) & 1) { 914 ecc->flt_synd = e_syndh; 915 ce_scrub(ecc); 916 if (queue) { 917 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc, 918 sizeof (*ecc), ce_queue, ERRORQ_ASYNC); 919 } 920 } 921 922 if ((sdbl >> 8) & 1) { 923 ecc->flt_addr = t_afar | 0x8; /* Sabres do not have a UDBL */ 924 ecc->flt_synd = e_syndl | UDBL_REG; 925 ce_scrub(ecc); 926 if (queue) { 927 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc, 928 sizeof (*ecc), ce_queue, ERRORQ_ASYNC); 929 } 930 } 931 932 /* 933 * Re-enable all error trapping (CEEN currently cleared). 934 */ 935 clr_datapath(); 936 set_asyncflt(P_AFSR_CE); 937 set_error_enable(EER_ENABLE); 938 } 939 940 /* 941 * Cpu specific CE logging routine 942 */ 943 static void 944 log_ce_err(struct async_flt *aflt, char *unum) 945 { 946 spitf_async_flt spf_flt; 947 948 if ((aflt->flt_stat & P_AFSR_CE) && (ce_verbose_memory == 0)) { 949 return; 950 } 951 952 spf_flt.cmn_asyncflt = *aflt; 953 cpu_aflt_log(CE_CONT, 0, &spf_flt, CE_LFLAGS, unum, 954 " Corrected Memory Error detected by"); 955 } 956 957 /* 958 * Spitfire does not perform any further CE classification refinement 959 */ 960 /*ARGSUSED*/ 961 int 962 ce_scrub_xdiag_recirc(struct async_flt *ecc, errorq_t *eqp, errorq_elem_t *eqep, 963 size_t afltoffset) 964 { 965 return (0); 966 } 967 968 char * 969 flt_to_error_type(struct async_flt *aflt) 970 { 971 if (aflt->flt_status & ECC_INTERMITTENT) 972 return (ERR_TYPE_DESC_INTERMITTENT); 973 if (aflt->flt_status & ECC_PERSISTENT) 974 return (ERR_TYPE_DESC_PERSISTENT); 975 if (aflt->flt_status & ECC_STICKY) 976 return (ERR_TYPE_DESC_STICKY); 977 return (ERR_TYPE_DESC_UNKNOWN); 978 } 979 980 /* 981 * Called by correctable ecc error logging code to print out 982 * the stick/persistent/intermittent status of the error. 983 */ 984 static void 985 cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum) 986 { 987 ushort_t status; 988 char *status1_str = "Memory"; 989 char *status2_str = "Intermittent"; 990 struct async_flt *aflt = (struct async_flt *)spf_flt; 991 992 status = aflt->flt_status; 993 994 if (status & ECC_ECACHE) 995 status1_str = "Ecache"; 996 997 if (status & ECC_STICKY) 998 status2_str = "Sticky"; 999 else if (status & ECC_PERSISTENT) 1000 status2_str = "Persistent"; 1001 1002 cpu_aflt_log(CE_CONT, 0, spf_flt, CPU_ERRID_FIRST, 1003 NULL, " Corrected %s Error on %s is %s", 1004 status1_str, unum, status2_str); 1005 } 1006 1007 /* 1008 * check for a valid ce syndrome, then call the 1009 * displacement flush scrubbing code, and then check the afsr to see if 1010 * the error was persistent or intermittent. Reread the afar/afsr to see 1011 * if the error was not scrubbed successfully, and is therefore sticky. 1012 */ 1013 /*ARGSUSED1*/ 1014 void 1015 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t triedcpulogout) 1016 { 1017 uint64_t eer, afsr; 1018 ushort_t status; 1019 1020 ASSERT(getpil() > LOCK_LEVEL); 1021 1022 /* 1023 * It is possible that the flt_addr is not a valid 1024 * physical address. To deal with this, we disable 1025 * NCEEN while we scrub that address. If this causes 1026 * a TIMEOUT/BERR, we know this is an invalid 1027 * memory location. 1028 */ 1029 kpreempt_disable(); 1030 eer = get_error_enable(); 1031 if (eer & (EER_CEEN | EER_NCEEN)) 1032 set_error_enable(eer & ~(EER_CEEN | EER_NCEEN)); 1033 1034 /* 1035 * To check if the error detected by IO is persistent, sticky or 1036 * intermittent. 1037 */ 1038 if (ecc->flt_status & ECC_IOBUS) { 1039 ecc->flt_stat = P_AFSR_CE; 1040 } 1041 1042 scrubphys(P2ALIGN(ecc->flt_addr, 64), 1043 cpunodes[CPU->cpu_id].ecache_size); 1044 1045 get_asyncflt(&afsr); 1046 if (afsr & (P_AFSR_TO | P_AFSR_BERR)) { 1047 /* 1048 * Must ensure that we don't get the TIMEOUT/BERR 1049 * when we reenable NCEEN, so we clear the AFSR. 1050 */ 1051 set_asyncflt(afsr & (P_AFSR_TO | P_AFSR_BERR)); 1052 if (eer & (EER_CEEN | EER_NCEEN)) 1053 set_error_enable(eer); 1054 kpreempt_enable(); 1055 return; 1056 } 1057 1058 if (eer & EER_NCEEN) 1059 set_error_enable(eer & ~EER_CEEN); 1060 1061 /* 1062 * Check and clear any ECC errors from the scrub. If the scrub did 1063 * not trip over the error, mark it intermittent. If the scrub did 1064 * trip the error again and it did not scrub away, mark it sticky. 1065 * Otherwise mark it persistent. 1066 */ 1067 if (check_ecc(ecc) != 0) { 1068 cpu_read_paddr(ecc, 0, 1); 1069 1070 if (check_ecc(ecc) != 0) 1071 status = ECC_STICKY; 1072 else 1073 status = ECC_PERSISTENT; 1074 } else 1075 status = ECC_INTERMITTENT; 1076 1077 if (eer & (EER_CEEN | EER_NCEEN)) 1078 set_error_enable(eer); 1079 kpreempt_enable(); 1080 1081 ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY); 1082 ecc->flt_status |= status; 1083 } 1084 1085 /* 1086 * get the syndrome and unum, and then call the routines 1087 * to check the other cpus and iobuses, and then do the error logging. 1088 */ 1089 /*ARGSUSED1*/ 1090 void 1091 cpu_ce_log_err(struct async_flt *ecc, errorq_elem_t *eqep) 1092 { 1093 char unum[UNUM_NAMLEN]; 1094 int len = 0; 1095 int ce_verbose = 0; 1096 int err; 1097 1098 ASSERT(ecc->flt_func != NULL); 1099 1100 /* Get the unum string for logging purposes */ 1101 (void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, ecc, unum, 1102 UNUM_NAMLEN, &len); 1103 1104 /* Call specific error logging routine */ 1105 (void) (*ecc->flt_func)(ecc, unum); 1106 1107 /* 1108 * Count errors per unum. 1109 * Non-memory errors are all counted via a special unum string. 1110 */ 1111 if ((err = ce_count_unum(ecc->flt_status, len, unum)) != PR_OK && 1112 automatic_page_removal) { 1113 (void) page_retire(ecc->flt_addr, err); 1114 } 1115 1116 if (ecc->flt_panic) { 1117 ce_verbose = 1; 1118 } else if ((ecc->flt_class == BUS_FAULT) || 1119 (ecc->flt_stat & P_AFSR_CE)) { 1120 ce_verbose = (ce_verbose_memory > 0); 1121 } else { 1122 ce_verbose = 1; 1123 } 1124 1125 if (ce_verbose) { 1126 spitf_async_flt sflt; 1127 int synd_code; 1128 1129 sflt.cmn_asyncflt = *ecc; /* for cpu_aflt_log() */ 1130 1131 cpu_ce_log_status(&sflt, unum); 1132 1133 synd_code = synd_to_synd_code(AFLT_STAT_VALID, 1134 SYND(ecc->flt_synd)); 1135 1136 if (SYND_IS_SINGLE_BIT_DATA(synd_code)) { 1137 cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST, 1138 NULL, " ECC Data Bit %2d was in error " 1139 "and corrected", synd_code); 1140 } else if (SYND_IS_SINGLE_BIT_CHK(synd_code)) { 1141 cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST, 1142 NULL, " ECC Check Bit %2d was in error " 1143 "and corrected", synd_code - C0); 1144 } else { 1145 /* 1146 * These are UE errors - we shouldn't be getting CE 1147 * traps for these; handle them in case of bad h/w. 1148 */ 1149 switch (synd_code) { 1150 case M2: 1151 cpu_aflt_log(CE_CONT, 0, &sflt, 1152 CPU_ERRID_FIRST, NULL, 1153 " Two ECC Bits were in error"); 1154 break; 1155 case M3: 1156 cpu_aflt_log(CE_CONT, 0, &sflt, 1157 CPU_ERRID_FIRST, NULL, 1158 " Three ECC Bits were in error"); 1159 break; 1160 case M4: 1161 cpu_aflt_log(CE_CONT, 0, &sflt, 1162 CPU_ERRID_FIRST, NULL, 1163 " Four ECC Bits were in error"); 1164 break; 1165 case MX: 1166 cpu_aflt_log(CE_CONT, 0, &sflt, 1167 CPU_ERRID_FIRST, NULL, 1168 " More than Four ECC bits were " 1169 "in error"); 1170 break; 1171 default: 1172 cpu_aflt_log(CE_CONT, 0, &sflt, 1173 CPU_ERRID_FIRST, NULL, 1174 " Unknown fault syndrome %d", 1175 synd_code); 1176 break; 1177 } 1178 } 1179 } 1180 1181 /* Display entire cache line, if valid address */ 1182 if (ce_show_data && ecc->flt_addr != AFLT_INV_ADDR) 1183 read_ecc_data(ecc, 1, 1); 1184 } 1185 1186 /* 1187 * We route all errors through a single switch statement. 1188 */ 1189 void 1190 cpu_ue_log_err(struct async_flt *aflt) 1191 { 1192 1193 switch (aflt->flt_class) { 1194 case CPU_FAULT: 1195 cpu_async_log_err(aflt); 1196 break; 1197 1198 case BUS_FAULT: 1199 bus_async_log_err(aflt); 1200 break; 1201 1202 default: 1203 cmn_err(CE_WARN, "discarding async error 0x%p with invalid " 1204 "fault class (0x%x)", (void *)aflt, aflt->flt_class); 1205 break; 1206 } 1207 } 1208 1209 /* Values for action variable in cpu_async_error() */ 1210 #define ACTION_NONE 0 1211 #define ACTION_TRAMPOLINE 1 1212 #define ACTION_AST_FLAGS 2 1213 1214 /* 1215 * Access error trap handler for asynchronous cpu errors. This routine is 1216 * called to handle a data or instruction access error. All fatal errors are 1217 * completely handled by this routine (by panicking). Non fatal error logging 1218 * is queued for later processing either via AST or softint at a lower PIL. 1219 * In case of panic, the error log queue will also be processed as part of the 1220 * panic flow to ensure all errors are logged. This routine is called with all 1221 * errors disabled at PIL15. The AFSR bits are cleared and the UDBL and UDBH 1222 * error bits are also cleared. The hardware has also disabled the I and 1223 * D-caches for us, so we must re-enable them before returning. 1224 * 1225 * A summary of the handling of tl=0 UE/LDP/EDP/TO/BERR/WP/CP: 1226 * 1227 * _______________________________________________________________ 1228 * | Privileged tl0 | Unprivileged | 1229 * | Protected | Unprotected | Protected | Unprotected | 1230 * |on_trap|lofault| | | | 1231 * -------------|-------|-------+---------------+---------------+-------------| 1232 * | | | | | | 1233 * UE/LDP/EDP | L,T,p | L,R,p | L,P | n/a | L,R,p | 1234 * | | | | | | 1235 * TO/BERR | T | S | L,P | n/a | S | 1236 * | | | | | | 1237 * WP | L,M,p | L,M,p | L,M,p | n/a | L,M,p | 1238 * | | | | | | 1239 * CP (IIi/IIe) | L,P | L,P | L,P | n/a | L,P | 1240 * ____________________________________________________________________________ 1241 * 1242 * 1243 * Action codes: 1244 * 1245 * L - log 1246 * M - kick off memscrubber if flt_in_memory 1247 * P - panic 1248 * p - panic if US-IIi or US-IIe (Sabre); overrides R and M 1249 * R - i) if aft_panic is set, panic 1250 * ii) otherwise, send hwerr event to contract and SIGKILL to process 1251 * S - send SIGBUS to process 1252 * T - trampoline 1253 * 1254 * Special cases: 1255 * 1256 * 1) if aft_testfatal is set, all faults result in a panic regardless 1257 * of type (even WP), protection (even on_trap), or privilege. 1258 */ 1259 /*ARGSUSED*/ 1260 void 1261 cpu_async_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr, 1262 uint_t p_afsr_high, uint_t p_afar_high) 1263 { 1264 ushort_t sdbh, sdbl, ttype, tl; 1265 spitf_async_flt spf_flt; 1266 struct async_flt *aflt; 1267 char pr_reason[28]; 1268 uint64_t oafsr; 1269 uint64_t acc_afsr = 0; /* accumulated afsr */ 1270 int action = ACTION_NONE; 1271 uint64_t t_afar = p_afar; 1272 uint64_t t_afsr = p_afsr; 1273 int expected = DDI_FM_ERR_UNEXPECTED; 1274 ddi_acc_hdl_t *hp; 1275 1276 /* 1277 * We need to look at p_flag to determine if the thread detected an 1278 * error while dumping core. We can't grab p_lock here, but it's ok 1279 * because we just need a consistent snapshot and we know that everyone 1280 * else will store a consistent set of bits while holding p_lock. We 1281 * don't have to worry about a race because SDOCORE is set once prior 1282 * to doing i/o from the process's address space and is never cleared. 1283 */ 1284 uint_t pflag = ttoproc(curthread)->p_flag; 1285 1286 pr_reason[0] = '\0'; 1287 1288 /* 1289 * Note: the Spitfire data buffer error registers 1290 * (upper and lower halves) are or'ed into the upper 1291 * word of the afsr by async_err() if P_AFSR_UE is set. 1292 */ 1293 sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF); 1294 sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF); 1295 1296 /* 1297 * Grab the ttype encoded in <63:53> of the saved 1298 * afsr passed from async_err() 1299 */ 1300 ttype = (ushort_t)((t_afsr >> 53) & 0x1FF); 1301 tl = (ushort_t)(t_afsr >> 62); 1302 1303 t_afsr &= S_AFSR_MASK; 1304 t_afar &= SABRE_AFAR_PA; /* must use Sabre AFAR mask */ 1305 1306 /* 1307 * Initialize most of the common and CPU-specific structure. We derive 1308 * aflt->flt_priv from %tstate, instead of from the AFSR.PRIV bit. The 1309 * initial setting of aflt->flt_panic is based on TL: we must panic if 1310 * the error occurred at TL > 0. We also set flt_panic if the test/demo 1311 * tuneable aft_testfatal is set (not the default). 1312 */ 1313 bzero(&spf_flt, sizeof (spitf_async_flt)); 1314 aflt = (struct async_flt *)&spf_flt; 1315 aflt->flt_id = gethrtime_waitfree(); 1316 aflt->flt_stat = t_afsr; 1317 aflt->flt_addr = t_afar; 1318 aflt->flt_bus_id = getprocessorid(); 1319 aflt->flt_inst = CPU->cpu_id; 1320 aflt->flt_pc = (caddr_t)rp->r_pc; 1321 aflt->flt_prot = AFLT_PROT_NONE; 1322 aflt->flt_class = CPU_FAULT; 1323 aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0; 1324 aflt->flt_tl = (uchar_t)tl; 1325 aflt->flt_panic = (tl != 0 || aft_testfatal != 0); 1326 aflt->flt_core = (pflag & SDOCORE) ? 1 : 0; 1327 1328 /* 1329 * Set flt_status based on the trap type. If we end up here as the 1330 * result of a UE detected by the CE handling code, leave status 0. 1331 */ 1332 switch (ttype) { 1333 case T_DATA_ERROR: 1334 aflt->flt_status = ECC_D_TRAP; 1335 break; 1336 case T_INSTR_ERROR: 1337 aflt->flt_status = ECC_I_TRAP; 1338 break; 1339 } 1340 1341 spf_flt.flt_sdbh = sdbh; 1342 spf_flt.flt_sdbl = sdbl; 1343 1344 /* 1345 * Check for fatal async errors. 1346 */ 1347 check_misc_err(&spf_flt); 1348 1349 /* 1350 * If the trap occurred in privileged mode at TL=0, we need to check to 1351 * see if we were executing in the kernel under on_trap() or t_lofault 1352 * protection. If so, modify the saved registers so that we return 1353 * from the trap to the appropriate trampoline routine. 1354 */ 1355 if (aflt->flt_priv && tl == 0) { 1356 if (curthread->t_ontrap != NULL) { 1357 on_trap_data_t *otp = curthread->t_ontrap; 1358 1359 if (otp->ot_prot & OT_DATA_EC) { 1360 aflt->flt_prot = AFLT_PROT_EC; 1361 otp->ot_trap |= OT_DATA_EC; 1362 rp->r_pc = otp->ot_trampoline; 1363 rp->r_npc = rp->r_pc + 4; 1364 action = ACTION_TRAMPOLINE; 1365 } 1366 1367 if ((t_afsr & (P_AFSR_TO | P_AFSR_BERR)) && 1368 (otp->ot_prot & OT_DATA_ACCESS)) { 1369 aflt->flt_prot = AFLT_PROT_ACCESS; 1370 otp->ot_trap |= OT_DATA_ACCESS; 1371 rp->r_pc = otp->ot_trampoline; 1372 rp->r_npc = rp->r_pc + 4; 1373 action = ACTION_TRAMPOLINE; 1374 /* 1375 * for peeks and caut_gets errors are expected 1376 */ 1377 hp = (ddi_acc_hdl_t *)otp->ot_handle; 1378 if (!hp) 1379 expected = DDI_FM_ERR_PEEK; 1380 else if (hp->ah_acc.devacc_attr_access == 1381 DDI_CAUTIOUS_ACC) 1382 expected = DDI_FM_ERR_EXPECTED; 1383 } 1384 1385 } else if (curthread->t_lofault) { 1386 aflt->flt_prot = AFLT_PROT_COPY; 1387 rp->r_g1 = EFAULT; 1388 rp->r_pc = curthread->t_lofault; 1389 rp->r_npc = rp->r_pc + 4; 1390 action = ACTION_TRAMPOLINE; 1391 } 1392 } 1393 1394 /* 1395 * Determine if this error needs to be treated as fatal. Note that 1396 * multiple errors detected upon entry to this trap handler does not 1397 * necessarily warrant a panic. We only want to panic if the trap 1398 * happened in privileged mode and not under t_ontrap or t_lofault 1399 * protection. The exception is WP: if we *only* get WP, it is not 1400 * fatal even if the trap occurred in privileged mode, except on Sabre. 1401 * 1402 * aft_panic, if set, effectively makes us treat usermode 1403 * UE/EDP/LDP faults as if they were privileged - so we we will 1404 * panic instead of sending a contract event. A lofault-protected 1405 * fault will normally follow the contract event; if aft_panic is 1406 * set this will be changed to a panic. 1407 * 1408 * For usermode BERR/BTO errors, eg from processes performing device 1409 * control through mapped device memory, we need only deliver 1410 * a SIGBUS to the offending process. 1411 * 1412 * Some additional flt_panic reasons (eg, WP on Sabre) will be 1413 * checked later; for now we implement the common reasons. 1414 */ 1415 if (aflt->flt_prot == AFLT_PROT_NONE) { 1416 /* 1417 * Beware - multiple bits may be set in AFSR 1418 */ 1419 if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP)) { 1420 if (aflt->flt_priv || aft_panic) 1421 aflt->flt_panic = 1; 1422 } 1423 1424 if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) { 1425 if (aflt->flt_priv) 1426 aflt->flt_panic = 1; 1427 } 1428 } else if (aflt->flt_prot == AFLT_PROT_COPY && aft_panic) { 1429 aflt->flt_panic = 1; 1430 } 1431 1432 /* 1433 * UE/BERR/TO: Call our bus nexus friends to check for 1434 * IO errors that may have resulted in this trap. 1435 */ 1436 if (t_afsr & (P_AFSR_TO | P_AFSR_BERR | P_AFSR_UE)) { 1437 cpu_run_bus_error_handlers(aflt, expected); 1438 } 1439 1440 /* 1441 * Handle UE: If the UE is in memory, we need to flush the bad line from 1442 * the E-cache. We also need to query the bus nexus for fatal errors. 1443 * For sabre, we will panic on UEs. Attempts to do diagnostic read on 1444 * caches may introduce more parity errors (especially when the module 1445 * is bad) and in sabre there is no guarantee that such errors 1446 * (if introduced) are written back as poisoned data. 1447 */ 1448 if (t_afsr & P_AFSR_UE) { 1449 int i; 1450 1451 (void) strcat(pr_reason, "UE "); 1452 1453 spf_flt.flt_type = CPU_UE_ERR; 1454 aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >> 1455 MMU_PAGESHIFT)) ? 1: 0; 1456 1457 /* 1458 * With UE, we have the PA of the fault. 1459 * Let do a diagnostic read to get the ecache 1460 * data and tag info of the bad line for logging. 1461 */ 1462 if (aflt->flt_in_memory) { 1463 uint32_t ec_set_size; 1464 uchar_t state; 1465 uint32_t ecache_idx; 1466 uint64_t faultpa = P2ALIGN(aflt->flt_addr, 64); 1467 1468 /* touch the line to put it in ecache */ 1469 acc_afsr |= read_and_clear_afsr(); 1470 (void) lddphys(faultpa); 1471 acc_afsr |= (read_and_clear_afsr() & 1472 ~(P_AFSR_EDP | P_AFSR_UE)); 1473 1474 ec_set_size = cpunodes[CPU->cpu_id].ecache_size / 1475 ecache_associativity; 1476 1477 for (i = 0; i < ecache_associativity; i++) { 1478 ecache_idx = i * ec_set_size + 1479 (aflt->flt_addr % ec_set_size); 1480 get_ecache_dtag(P2ALIGN(ecache_idx, 64), 1481 (uint64_t *)&spf_flt.flt_ec_data[0], 1482 &spf_flt.flt_ec_tag, &oafsr, &acc_afsr); 1483 acc_afsr |= oafsr; 1484 1485 state = (uchar_t)((spf_flt.flt_ec_tag & 1486 cpu_ec_state_mask) >> cpu_ec_state_shift); 1487 1488 if ((state & cpu_ec_state_valid) && 1489 ((spf_flt.flt_ec_tag & cpu_ec_tag_mask) == 1490 ((uint64_t)aflt->flt_addr >> 1491 cpu_ec_tag_shift))) 1492 break; 1493 } 1494 1495 /* 1496 * Check to see if the ecache tag is valid for the 1497 * fault PA. In the very unlikely event where the 1498 * line could be victimized, no ecache info will be 1499 * available. If this is the case, capture the line 1500 * from memory instead. 1501 */ 1502 if ((state & cpu_ec_state_valid) == 0 || 1503 (spf_flt.flt_ec_tag & cpu_ec_tag_mask) != 1504 ((uint64_t)aflt->flt_addr >> cpu_ec_tag_shift)) { 1505 for (i = 0; i < 8; i++, faultpa += 8) { 1506 ec_data_t *ecdptr; 1507 1508 ecdptr = &spf_flt.flt_ec_data[i]; 1509 acc_afsr |= read_and_clear_afsr(); 1510 ecdptr->ec_d8 = lddphys(faultpa); 1511 acc_afsr |= (read_and_clear_afsr() & 1512 ~(P_AFSR_EDP | P_AFSR_UE)); 1513 ecdptr->ec_afsr = 0; 1514 /* null afsr value */ 1515 } 1516 1517 /* 1518 * Mark tag invalid to indicate mem dump 1519 * when we print out the info. 1520 */ 1521 spf_flt.flt_ec_tag = AFLT_INV_ADDR; 1522 } 1523 spf_flt.flt_ec_lcnt = 1; 1524 1525 /* 1526 * Flush out the bad line 1527 */ 1528 flushecacheline(P2ALIGN(aflt->flt_addr, 64), 1529 cpunodes[CPU->cpu_id].ecache_size); 1530 1531 acc_afsr |= clear_errors(NULL, NULL); 1532 } 1533 1534 /* 1535 * Ask our bus nexus friends if they have any fatal errors. If 1536 * so, they will log appropriate error messages and panic as a 1537 * result. We then queue an event for each UDB that reports a 1538 * UE. Each UE reported in a UDB will have its own log message. 1539 * 1540 * Note from kbn: In the case where there are multiple UEs 1541 * (ME bit is set) - the AFAR address is only accurate to 1542 * the 16-byte granularity. One cannot tell whether the AFAR 1543 * belongs to the UDBH or UDBL syndromes. In this case, we 1544 * always report the AFAR address to be 16-byte aligned. 1545 * 1546 * If we're on a Sabre, there is no SDBL, but it will always 1547 * read as zero, so the sdbl test below will safely fail. 1548 */ 1549 if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL || isus2i || isus2e) 1550 aflt->flt_panic = 1; 1551 1552 if (sdbh & P_DER_UE) { 1553 aflt->flt_synd = sdbh & P_DER_E_SYND; 1554 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE, 1555 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1556 aflt->flt_panic); 1557 } 1558 if (sdbl & P_DER_UE) { 1559 aflt->flt_synd = sdbl & P_DER_E_SYND; 1560 aflt->flt_synd |= UDBL_REG; /* indicates UDBL */ 1561 if (!(aflt->flt_stat & P_AFSR_ME)) 1562 aflt->flt_addr |= 0x8; 1563 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE, 1564 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1565 aflt->flt_panic); 1566 } 1567 1568 /* 1569 * We got a UE and are panicking, save the fault PA in a known 1570 * location so that the platform specific panic code can check 1571 * for copyback errors. 1572 */ 1573 if (aflt->flt_panic && aflt->flt_in_memory) { 1574 panic_aflt = *aflt; 1575 } 1576 } 1577 1578 /* 1579 * Handle EDP and LDP: Locate the line with bad parity and enqueue an 1580 * async error for logging. For Sabre, we panic on EDP or LDP. 1581 */ 1582 if (t_afsr & (P_AFSR_EDP | P_AFSR_LDP)) { 1583 spf_flt.flt_type = CPU_EDP_LDP_ERR; 1584 1585 if (t_afsr & P_AFSR_EDP) 1586 (void) strcat(pr_reason, "EDP "); 1587 1588 if (t_afsr & P_AFSR_LDP) 1589 (void) strcat(pr_reason, "LDP "); 1590 1591 /* 1592 * Here we have no PA to work with. 1593 * Scan each line in the ecache to look for 1594 * the one with bad parity. 1595 */ 1596 aflt->flt_addr = AFLT_INV_ADDR; 1597 scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0], 1598 &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr); 1599 acc_afsr |= (oafsr & ~P_AFSR_WP); 1600 1601 /* 1602 * If we found a bad PA, update the state to indicate if it is 1603 * memory or I/O space. This code will be important if we ever 1604 * support cacheable frame buffers. 1605 */ 1606 if (aflt->flt_addr != AFLT_INV_ADDR) { 1607 aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >> 1608 MMU_PAGESHIFT)) ? 1 : 0; 1609 } 1610 1611 if (isus2i || isus2e) 1612 aflt->flt_panic = 1; 1613 1614 cpu_errorq_dispatch((t_afsr & P_AFSR_EDP) ? 1615 FM_EREPORT_CPU_USII_EDP : FM_EREPORT_CPU_USII_LDP, 1616 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1617 aflt->flt_panic); 1618 } 1619 1620 /* 1621 * Timeout and bus error handling. There are two cases to consider: 1622 * 1623 * (1) If we are in the kernel protected by ddi_peek or ddi_poke,we 1624 * have already modified the saved registers so that we will return 1625 * from the trap to the appropriate trampoline routine; otherwise panic. 1626 * 1627 * (2) In user mode, we can simply use our AST mechanism to deliver 1628 * a SIGBUS. We do not log the occurence - processes performing 1629 * device control would generate lots of uninteresting messages. 1630 */ 1631 if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) { 1632 if (t_afsr & P_AFSR_TO) 1633 (void) strcat(pr_reason, "BTO "); 1634 1635 if (t_afsr & P_AFSR_BERR) 1636 (void) strcat(pr_reason, "BERR "); 1637 1638 spf_flt.flt_type = CPU_BTO_BERR_ERR; 1639 if (aflt->flt_priv && aflt->flt_prot == AFLT_PROT_NONE) { 1640 cpu_errorq_dispatch((t_afsr & P_AFSR_TO) ? 1641 FM_EREPORT_CPU_USII_TO : FM_EREPORT_CPU_USII_BERR, 1642 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1643 aflt->flt_panic); 1644 } 1645 } 1646 1647 /* 1648 * Handle WP: WP happens when the ecache is victimized and a parity 1649 * error was detected on a writeback. The data in question will be 1650 * poisoned as a UE will be written back. The PA is not logged and 1651 * it is possible that it doesn't belong to the trapped thread. The 1652 * WP trap is not fatal, but it could be fatal to someone that 1653 * subsequently accesses the toxic page. We set read_all_memscrub 1654 * to force the memscrubber to read all of memory when it awakens. 1655 * For Sabre/Hummingbird, WP is fatal because the HW doesn't write a 1656 * UE back to poison the data. 1657 */ 1658 if (t_afsr & P_AFSR_WP) { 1659 (void) strcat(pr_reason, "WP "); 1660 if (isus2i || isus2e) { 1661 aflt->flt_panic = 1; 1662 } else { 1663 read_all_memscrub = 1; 1664 } 1665 spf_flt.flt_type = CPU_WP_ERR; 1666 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_WP, 1667 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1668 aflt->flt_panic); 1669 } 1670 1671 /* 1672 * Handle trapping CP error: In Sabre/Hummingbird, parity error in 1673 * the ecache on a copyout due to a PCI DMA read is signaled as a CP. 1674 * This is fatal. 1675 */ 1676 1677 if (t_afsr & P_AFSR_CP) { 1678 if (isus2i || isus2e) { 1679 (void) strcat(pr_reason, "CP "); 1680 aflt->flt_panic = 1; 1681 spf_flt.flt_type = CPU_TRAPPING_CP_ERR; 1682 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP, 1683 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1684 aflt->flt_panic); 1685 } else { 1686 /* 1687 * Orphan CP: Happens due to signal integrity problem 1688 * on a CPU, where a CP is reported, without reporting 1689 * its associated UE. This is handled by locating the 1690 * bad parity line and would kick off the memscrubber 1691 * to find the UE if in memory or in another's cache. 1692 */ 1693 spf_flt.flt_type = CPU_ORPHAN_CP_ERR; 1694 (void) strcat(pr_reason, "ORPHAN_CP "); 1695 1696 /* 1697 * Here we have no PA to work with. 1698 * Scan each line in the ecache to look for 1699 * the one with bad parity. 1700 */ 1701 aflt->flt_addr = AFLT_INV_ADDR; 1702 scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0], 1703 &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, 1704 &oafsr); 1705 acc_afsr |= oafsr; 1706 1707 /* 1708 * If we found a bad PA, update the state to indicate 1709 * if it is memory or I/O space. 1710 */ 1711 if (aflt->flt_addr != AFLT_INV_ADDR) { 1712 aflt->flt_in_memory = 1713 (pf_is_memory(aflt->flt_addr >> 1714 MMU_PAGESHIFT)) ? 1 : 0; 1715 } 1716 read_all_memscrub = 1; 1717 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP, 1718 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1719 aflt->flt_panic); 1720 1721 } 1722 } 1723 1724 /* 1725 * If we queued an error other than WP or CP and we are going to return 1726 * from the trap and the error was in user mode or inside of a 1727 * copy routine, set AST flag so the queue will be drained before 1728 * returning to user mode. 1729 * 1730 * For UE/LDP/EDP, the AST processing will SIGKILL the process 1731 * and send an event to its process contract. 1732 * 1733 * For BERR/BTO, the AST processing will SIGBUS the process. There 1734 * will have been no error queued in this case. 1735 */ 1736 if ((t_afsr & 1737 (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP | P_AFSR_BERR | P_AFSR_TO)) && 1738 (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY)) { 1739 int pcb_flag = 0; 1740 1741 if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP)) 1742 pcb_flag |= ASYNC_HWERR; 1743 1744 if (t_afsr & P_AFSR_BERR) 1745 pcb_flag |= ASYNC_BERR; 1746 1747 if (t_afsr & P_AFSR_TO) 1748 pcb_flag |= ASYNC_BTO; 1749 1750 ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag; 1751 aston(curthread); 1752 action = ACTION_AST_FLAGS; 1753 } 1754 1755 /* 1756 * In response to a deferred error, we must do one of three things: 1757 * (1) set the AST flags, (2) trampoline, or (3) panic. action is 1758 * set in cases (1) and (2) - check that either action is set or 1759 * (3) is true. 1760 * 1761 * On II, the WP writes poisoned data back to memory, which will 1762 * cause a UE and a panic or reboot when read. In this case, we 1763 * don't need to panic at this time. On IIi and IIe, 1764 * aflt->flt_panic is already set above. 1765 */ 1766 ASSERT((aflt->flt_panic != 0) || (action != ACTION_NONE) || 1767 (t_afsr & P_AFSR_WP)); 1768 1769 /* 1770 * Make a final sanity check to make sure we did not get any more async 1771 * errors and accumulate the afsr. 1772 */ 1773 flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2, 1774 cpunodes[CPU->cpu_id].ecache_linesize); 1775 (void) clear_errors(&spf_flt, NULL); 1776 1777 /* 1778 * Take care of a special case: If there is a UE in the ecache flush 1779 * area, we'll see it in flush_ecache(). This will trigger the 1780 * CPU_ADDITIONAL_ERRORS case below. 1781 * 1782 * This could occur if the original error was a UE in the flush area, 1783 * or if the original error was an E$ error that was flushed out of 1784 * the E$ in scan_ecache(). 1785 * 1786 * If it's at the same address that we're already logging, then it's 1787 * probably one of these cases. Clear the bit so we don't trip over 1788 * it on the additional errors case, which could cause an unnecessary 1789 * panic. 1790 */ 1791 if ((aflt->flt_stat & P_AFSR_UE) && aflt->flt_addr == t_afar) 1792 acc_afsr |= aflt->flt_stat & ~P_AFSR_UE; 1793 else 1794 acc_afsr |= aflt->flt_stat; 1795 1796 /* 1797 * Check the acumulated afsr for the important bits. 1798 * Make sure the spf_flt.flt_type value is set, and 1799 * enque an error. 1800 */ 1801 if (acc_afsr & 1802 (P_AFSR_LEVEL1 | P_AFSR_IVUE | P_AFSR_ETP | P_AFSR_ISAP)) { 1803 if (acc_afsr & (P_AFSR_UE | P_AFSR_EDP | P_AFSR_LDP | 1804 P_AFSR_BERR | P_AFSR_TO | P_AFSR_IVUE | P_AFSR_ETP | 1805 P_AFSR_ISAP)) 1806 aflt->flt_panic = 1; 1807 1808 spf_flt.flt_type = CPU_ADDITIONAL_ERR; 1809 aflt->flt_stat = acc_afsr; 1810 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UNKNOWN, 1811 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1812 aflt->flt_panic); 1813 } 1814 1815 /* 1816 * If aflt->flt_panic is set at this point, we need to panic as the 1817 * result of a trap at TL > 0, or an error we determined to be fatal. 1818 * We've already enqueued the error in one of the if-clauses above, 1819 * and it will be dequeued and logged as part of the panic flow. 1820 */ 1821 if (aflt->flt_panic) { 1822 cpu_aflt_log(CE_PANIC, 1, &spf_flt, CPU_ERRID_FIRST, 1823 "See previous message(s) for details", " %sError(s)", 1824 pr_reason); 1825 } 1826 1827 /* 1828 * Before returning, we must re-enable errors, and 1829 * reset the caches to their boot-up state. 1830 */ 1831 set_lsu(get_lsu() | cache_boot_state); 1832 set_error_enable(EER_ENABLE); 1833 } 1834 1835 /* 1836 * Check for miscellaneous fatal errors and call CE_PANIC if any are seen. 1837 * This routine is shared by the CE and UE handling code. 1838 */ 1839 static void 1840 check_misc_err(spitf_async_flt *spf_flt) 1841 { 1842 struct async_flt *aflt = (struct async_flt *)spf_flt; 1843 char *fatal_str = NULL; 1844 1845 /* 1846 * The ISAP and ETP errors are supposed to cause a POR 1847 * from the system, so in theory we never, ever see these messages. 1848 * ISAP, ETP and IVUE are considered to be fatal. 1849 */ 1850 if (aflt->flt_stat & P_AFSR_ISAP) 1851 fatal_str = " System Address Parity Error on"; 1852 else if (aflt->flt_stat & P_AFSR_ETP) 1853 fatal_str = " Ecache Tag Parity Error on"; 1854 else if (aflt->flt_stat & P_AFSR_IVUE) 1855 fatal_str = " Interrupt Vector Uncorrectable Error on"; 1856 if (fatal_str != NULL) { 1857 cpu_aflt_log(CE_PANIC, 1, spf_flt, CMN_LFLAGS, 1858 NULL, fatal_str); 1859 } 1860 } 1861 1862 /* 1863 * Routine to convert a syndrome into a syndrome code. 1864 */ 1865 static int 1866 synd_to_synd_code(int synd_status, ushort_t synd) 1867 { 1868 if (synd_status != AFLT_STAT_VALID) 1869 return (-1); 1870 1871 /* 1872 * Use the 8-bit syndrome to index the ecc_syndrome_tab 1873 * to get the code indicating which bit(s) is(are) bad. 1874 */ 1875 if ((synd == 0) || (synd >= SYND_TBL_SIZE)) 1876 return (-1); 1877 else 1878 return (ecc_syndrome_tab[synd]); 1879 } 1880 1881 /* ARGSUSED */ 1882 int 1883 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp) 1884 { 1885 return (ENOTSUP); 1886 } 1887 1888 /* ARGSUSED */ 1889 int 1890 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp) 1891 { 1892 return (ENOTSUP); 1893 } 1894 1895 /* ARGSUSED */ 1896 int 1897 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp) 1898 { 1899 return (ENOTSUP); 1900 } 1901 1902 /* 1903 * Routine to return a string identifying the physical name 1904 * associated with a memory/cache error. 1905 */ 1906 /* ARGSUSED */ 1907 int 1908 cpu_get_mem_unum(int synd_status, ushort_t synd, uint64_t afsr, 1909 uint64_t afar, int cpuid, int flt_in_memory, ushort_t flt_status, 1910 char *buf, int buflen, int *lenp) 1911 { 1912 short synd_code; 1913 int ret; 1914 1915 if (flt_in_memory) { 1916 synd_code = synd_to_synd_code(synd_status, synd); 1917 if (synd_code == -1) { 1918 ret = EINVAL; 1919 } else if (prom_get_unum(synd_code, P2ALIGN(afar, 8), 1920 buf, buflen, lenp) != 0) { 1921 ret = EIO; 1922 } else if (*lenp <= 1) { 1923 ret = EINVAL; 1924 } else { 1925 ret = 0; 1926 } 1927 } else { 1928 ret = ENOTSUP; 1929 } 1930 1931 if (ret != 0) { 1932 buf[0] = '\0'; 1933 *lenp = 0; 1934 } 1935 1936 return (ret); 1937 } 1938 1939 /* 1940 * Wrapper for cpu_get_mem_unum() routine that takes an 1941 * async_flt struct rather than explicit arguments. 1942 */ 1943 int 1944 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt, 1945 char *buf, int buflen, int *lenp) 1946 { 1947 return (cpu_get_mem_unum(synd_status, SYND(aflt->flt_synd), 1948 aflt->flt_stat, aflt->flt_addr, aflt->flt_bus_id, 1949 aflt->flt_in_memory, aflt->flt_status, buf, buflen, lenp)); 1950 } 1951 1952 /* 1953 * This routine is a more generic interface to cpu_get_mem_unum(), 1954 * that may be used by other modules (e.g. mm). 1955 */ 1956 int 1957 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar, 1958 char *buf, int buflen, int *lenp) 1959 { 1960 int synd_status, flt_in_memory, ret; 1961 char unum[UNUM_NAMLEN]; 1962 1963 /* 1964 * Check for an invalid address. 1965 */ 1966 if (afar == (uint64_t)-1) 1967 return (ENXIO); 1968 1969 if (synd == (uint64_t)-1) 1970 synd_status = AFLT_STAT_INVALID; 1971 else 1972 synd_status = AFLT_STAT_VALID; 1973 1974 flt_in_memory = (pf_is_memory(afar >> MMU_PAGESHIFT)) ? 1 : 0; 1975 1976 if ((ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar, 1977 CPU->cpu_id, flt_in_memory, 0, unum, UNUM_NAMLEN, lenp)) 1978 != 0) 1979 return (ret); 1980 1981 if (*lenp >= buflen) 1982 return (ENAMETOOLONG); 1983 1984 (void) strncpy(buf, unum, buflen); 1985 1986 return (0); 1987 } 1988 1989 /* 1990 * Routine to return memory information associated 1991 * with a physical address and syndrome. 1992 */ 1993 /* ARGSUSED */ 1994 int 1995 cpu_get_mem_info(uint64_t synd, uint64_t afar, 1996 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 1997 int *segsp, int *banksp, int *mcidp) 1998 { 1999 return (ENOTSUP); 2000 } 2001 2002 /* 2003 * Routine to return a string identifying the physical 2004 * name associated with a cpuid. 2005 */ 2006 /* ARGSUSED */ 2007 int 2008 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp) 2009 { 2010 return (ENOTSUP); 2011 } 2012 2013 /* 2014 * This routine returns the size of the kernel's FRU name buffer. 2015 */ 2016 size_t 2017 cpu_get_name_bufsize() 2018 { 2019 return (UNUM_NAMLEN); 2020 } 2021 2022 /* 2023 * Cpu specific log func for UEs. 2024 */ 2025 static void 2026 log_ue_err(struct async_flt *aflt, char *unum) 2027 { 2028 spitf_async_flt *spf_flt = (spitf_async_flt *)aflt; 2029 int len = 0; 2030 2031 #ifdef DEBUG 2032 int afsr_priv = (aflt->flt_stat & P_AFSR_PRIV) ? 1 : 0; 2033 2034 /* 2035 * Paranoid Check for priv mismatch 2036 * Only applicable for UEs 2037 */ 2038 if (afsr_priv != aflt->flt_priv) { 2039 /* 2040 * The priv bits in %tstate and %afsr did not match; we expect 2041 * this to be very rare, so flag it with a message. 2042 */ 2043 cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST, NULL, 2044 ": PRIV bit in TSTATE and AFSR mismatched; " 2045 "TSTATE.PRIV=%d used", (aflt->flt_priv) ? 1 : 0); 2046 2047 /* update saved afsr to reflect the correct priv */ 2048 aflt->flt_stat &= ~P_AFSR_PRIV; 2049 if (aflt->flt_priv) 2050 aflt->flt_stat |= P_AFSR_PRIV; 2051 } 2052 #endif /* DEBUG */ 2053 2054 (void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt, unum, 2055 UNUM_NAMLEN, &len); 2056 2057 cpu_aflt_log(CE_WARN, 1, spf_flt, UE_LFLAGS, unum, 2058 " Uncorrectable Memory Error on"); 2059 2060 if (SYND(aflt->flt_synd) == 0x3) { 2061 cpu_aflt_log(CE_WARN, 1, spf_flt, CPU_ERRID_FIRST, NULL, 2062 " Syndrome 0x3 indicates that this may not be a " 2063 "memory module problem"); 2064 } 2065 2066 if (aflt->flt_in_memory) 2067 cpu_log_ecmem_info(spf_flt); 2068 } 2069 2070 2071 /* 2072 * The cpu_async_log_err() function is called via the ue_drain() function to 2073 * handle logging for CPU events that are dequeued. As such, it can be invoked 2074 * from softint context, from AST processing in the trap() flow, or from the 2075 * panic flow. We decode the CPU-specific data, and log appropriate messages. 2076 */ 2077 static void 2078 cpu_async_log_err(void *flt) 2079 { 2080 spitf_async_flt *spf_flt = (spitf_async_flt *)flt; 2081 struct async_flt *aflt = (struct async_flt *)flt; 2082 char unum[UNUM_NAMLEN]; 2083 char *space; 2084 char *ecache_scrub_logstr = NULL; 2085 2086 switch (spf_flt->flt_type) { 2087 case CPU_UE_ERR: 2088 /* 2089 * We want to skip logging only if ALL the following 2090 * conditions are true: 2091 * 2092 * 1. We are not panicking 2093 * 2. There is only one error 2094 * 3. That error is a memory error 2095 * 4. The error is caused by the memory scrubber (in 2096 * which case the error will have occurred under 2097 * on_trap protection) 2098 * 5. The error is on a retired page 2099 * 2100 * Note 1: AFLT_PROT_EC is used places other than the memory 2101 * scrubber. However, none of those errors should occur 2102 * on a retired page. 2103 * 2104 * Note 2: In the CE case, these errors are discarded before 2105 * the errorq. In the UE case, we must wait until now -- 2106 * softcall() grabs a mutex, which we can't do at a high PIL. 2107 */ 2108 if (!panicstr && 2109 (aflt->flt_stat & S_AFSR_ALL_ERRS) == P_AFSR_UE && 2110 aflt->flt_prot == AFLT_PROT_EC) { 2111 if (page_retire_check(aflt->flt_addr, NULL) == 0) { 2112 /* Zero the address to clear the error */ 2113 softcall(ecc_page_zero, (void *)aflt->flt_addr); 2114 return; 2115 } 2116 } 2117 2118 /* 2119 * Log the UE and check for causes of this UE error that 2120 * don't cause a trap (Copyback error). cpu_async_error() 2121 * has already checked the i/o buses for us. 2122 */ 2123 log_ue_err(aflt, unum); 2124 if (aflt->flt_in_memory) 2125 cpu_check_allcpus(aflt); 2126 break; 2127 2128 case CPU_EDP_LDP_ERR: 2129 if (aflt->flt_stat & P_AFSR_EDP) 2130 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, 2131 NULL, " EDP event on"); 2132 2133 if (aflt->flt_stat & P_AFSR_LDP) 2134 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, 2135 NULL, " LDP event on"); 2136 2137 /* Log ecache info if exist */ 2138 if (spf_flt->flt_ec_lcnt > 0) { 2139 cpu_log_ecmem_info(spf_flt); 2140 2141 cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, 2142 NULL, " AFAR was derived from E$Tag"); 2143 } else { 2144 cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, 2145 NULL, " No error found in ecache (No fault " 2146 "PA available)"); 2147 } 2148 break; 2149 2150 case CPU_WP_ERR: 2151 /* 2152 * If the memscrub thread hasn't yet read 2153 * all of memory, as we requested in the 2154 * trap handler, then give it a kick to 2155 * make sure it does. 2156 */ 2157 if (!isus2i && !isus2e && read_all_memscrub) 2158 memscrub_run(); 2159 2160 cpu_aflt_log(CE_WARN, 1, spf_flt, WP_LFLAGS, NULL, 2161 " WP event on"); 2162 return; 2163 2164 case CPU_BTO_BERR_ERR: 2165 /* 2166 * A bus timeout or error occurred that was in user mode or not 2167 * in a protected kernel code region. 2168 */ 2169 if (aflt->flt_stat & P_AFSR_BERR) { 2170 cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2, 2171 spf_flt, BERRTO_LFLAGS, NULL, 2172 " Bus Error on System Bus in %s mode from", 2173 aflt->flt_priv ? "privileged" : "user"); 2174 } 2175 2176 if (aflt->flt_stat & P_AFSR_TO) { 2177 cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2, 2178 spf_flt, BERRTO_LFLAGS, NULL, 2179 " Timeout on System Bus in %s mode from", 2180 aflt->flt_priv ? "privileged" : "user"); 2181 } 2182 2183 return; 2184 2185 case CPU_PANIC_CP_ERR: 2186 /* 2187 * Process the Copyback (CP) error info (if any) obtained from 2188 * polling all the cpus in the panic flow. This case is only 2189 * entered if we are panicking. 2190 */ 2191 ASSERT(panicstr != NULL); 2192 ASSERT(aflt->flt_id == panic_aflt.flt_id); 2193 2194 /* See which space - this info may not exist */ 2195 if (panic_aflt.flt_status & ECC_D_TRAP) 2196 space = "Data "; 2197 else if (panic_aflt.flt_status & ECC_I_TRAP) 2198 space = "Instruction "; 2199 else 2200 space = ""; 2201 2202 cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL, 2203 " AFAR was derived from UE report," 2204 " CP event on CPU%d (caused %saccess error on %s%d)", 2205 aflt->flt_inst, space, (panic_aflt.flt_status & ECC_IOBUS) ? 2206 "IOBUS" : "CPU", panic_aflt.flt_bus_id); 2207 2208 if (spf_flt->flt_ec_lcnt > 0) 2209 cpu_log_ecmem_info(spf_flt); 2210 else 2211 cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST, 2212 NULL, " No cache dump available"); 2213 2214 return; 2215 2216 case CPU_TRAPPING_CP_ERR: 2217 /* 2218 * For sabre only. This is a copyback ecache parity error due 2219 * to a PCI DMA read. We should be panicking if we get here. 2220 */ 2221 ASSERT(panicstr != NULL); 2222 cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL, 2223 " AFAR was derived from UE report," 2224 " CP event on CPU%d (caused Data access error " 2225 "on PCIBus)", aflt->flt_inst); 2226 return; 2227 2228 /* 2229 * We log the ecache lines of the following states, 2230 * clean_bad_idle, clean_bad_busy, dirty_bad_idle and 2231 * dirty_bad_busy if ecache_scrub_verbose is set and panic 2232 * in addition to logging if ecache_scrub_panic is set. 2233 */ 2234 case CPU_BADLINE_CI_ERR: 2235 ecache_scrub_logstr = "CBI"; 2236 /* FALLTHRU */ 2237 2238 case CPU_BADLINE_CB_ERR: 2239 if (ecache_scrub_logstr == NULL) 2240 ecache_scrub_logstr = "CBB"; 2241 /* FALLTHRU */ 2242 2243 case CPU_BADLINE_DI_ERR: 2244 if (ecache_scrub_logstr == NULL) 2245 ecache_scrub_logstr = "DBI"; 2246 /* FALLTHRU */ 2247 2248 case CPU_BADLINE_DB_ERR: 2249 if (ecache_scrub_logstr == NULL) 2250 ecache_scrub_logstr = "DBB"; 2251 2252 cpu_aflt_log(CE_NOTE, 2, spf_flt, 2253 (CPU_ERRID_FIRST | CPU_FLTCPU), NULL, 2254 " %s event on", ecache_scrub_logstr); 2255 cpu_log_ecmem_info(spf_flt); 2256 2257 return; 2258 2259 case CPU_ORPHAN_CP_ERR: 2260 /* 2261 * Orphan CPs, where the CP bit is set, but when a CPU 2262 * doesn't report a UE. 2263 */ 2264 if (read_all_memscrub) 2265 memscrub_run(); 2266 2267 cpu_aflt_log(CE_NOTE, 2, spf_flt, (CP_LFLAGS | CPU_FLTCPU), 2268 NULL, " Orphan CP event on"); 2269 2270 /* Log ecache info if exist */ 2271 if (spf_flt->flt_ec_lcnt > 0) 2272 cpu_log_ecmem_info(spf_flt); 2273 else 2274 cpu_aflt_log(CE_NOTE, 2, spf_flt, 2275 (CP_LFLAGS | CPU_FLTCPU), NULL, 2276 " No error found in ecache (No fault " 2277 "PA available"); 2278 return; 2279 2280 case CPU_ECACHE_ADDR_PAR_ERR: 2281 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 2282 " E$ Tag Address Parity error on"); 2283 cpu_log_ecmem_info(spf_flt); 2284 return; 2285 2286 case CPU_ECACHE_STATE_ERR: 2287 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 2288 " E$ Tag State Parity error on"); 2289 cpu_log_ecmem_info(spf_flt); 2290 return; 2291 2292 case CPU_ECACHE_TAG_ERR: 2293 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 2294 " E$ Tag scrub event on"); 2295 cpu_log_ecmem_info(spf_flt); 2296 return; 2297 2298 case CPU_ECACHE_ETP_ETS_ERR: 2299 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 2300 " AFSR.ETP is set and AFSR.ETS is zero on"); 2301 cpu_log_ecmem_info(spf_flt); 2302 return; 2303 2304 2305 case CPU_ADDITIONAL_ERR: 2306 cpu_aflt_log(CE_WARN, 1, spf_flt, CMN_LFLAGS & ~CPU_SPACE, NULL, 2307 " Additional errors detected during error processing on"); 2308 return; 2309 2310 default: 2311 cmn_err(CE_WARN, "cpu_async_log_err: fault %p has unknown " 2312 "fault type %x", (void *)spf_flt, spf_flt->flt_type); 2313 return; 2314 } 2315 2316 /* ... fall through from the UE, EDP, or LDP cases */ 2317 2318 if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) { 2319 if (!panicstr) { 2320 (void) page_retire(aflt->flt_addr, PR_UE); 2321 } else { 2322 /* 2323 * Clear UEs on panic so that we don't 2324 * get haunted by them during panic or 2325 * after reboot 2326 */ 2327 clearphys(P2ALIGN(aflt->flt_addr, 64), 2328 cpunodes[CPU->cpu_id].ecache_size, 2329 cpunodes[CPU->cpu_id].ecache_linesize); 2330 2331 (void) clear_errors(NULL, NULL); 2332 } 2333 } 2334 2335 /* 2336 * Log final recover message 2337 */ 2338 if (!panicstr) { 2339 if (!aflt->flt_priv) { 2340 cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, 2341 NULL, " Above Error is in User Mode" 2342 "\n and is fatal: " 2343 "will SIGKILL process and notify contract"); 2344 } else if (aflt->flt_prot == AFLT_PROT_COPY && aflt->flt_core) { 2345 cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, 2346 NULL, " Above Error detected while dumping core;" 2347 "\n core file will be truncated"); 2348 } else if (aflt->flt_prot == AFLT_PROT_COPY) { 2349 cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, 2350 NULL, " Above Error is due to Kernel access" 2351 "\n to User space and is fatal: " 2352 "will SIGKILL process and notify contract"); 2353 } else if (aflt->flt_prot == AFLT_PROT_EC) { 2354 cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, NULL, 2355 " Above Error detected by protected Kernel code" 2356 "\n that will try to clear error from system"); 2357 } 2358 } 2359 } 2360 2361 2362 /* 2363 * Check all cpus for non-trapping UE-causing errors 2364 * In Ultra I/II, we look for copyback errors (CPs) 2365 */ 2366 void 2367 cpu_check_allcpus(struct async_flt *aflt) 2368 { 2369 spitf_async_flt cp; 2370 spitf_async_flt *spf_cpflt = &cp; 2371 struct async_flt *cpflt = (struct async_flt *)&cp; 2372 int pix; 2373 2374 cpflt->flt_id = aflt->flt_id; 2375 cpflt->flt_addr = aflt->flt_addr; 2376 2377 for (pix = 0; pix < NCPU; pix++) { 2378 if (CPU_XCALL_READY(pix)) { 2379 xc_one(pix, (xcfunc_t *)get_cpu_status, 2380 (uint64_t)cpflt, 0); 2381 2382 if (cpflt->flt_stat & P_AFSR_CP) { 2383 char *space; 2384 2385 /* See which space - this info may not exist */ 2386 if (aflt->flt_status & ECC_D_TRAP) 2387 space = "Data "; 2388 else if (aflt->flt_status & ECC_I_TRAP) 2389 space = "Instruction "; 2390 else 2391 space = ""; 2392 2393 cpu_aflt_log(CE_WARN, 1, spf_cpflt, CP_LFLAGS, 2394 NULL, " AFAR was derived from UE report," 2395 " CP event on CPU%d (caused %saccess " 2396 "error on %s%d)", pix, space, 2397 (aflt->flt_status & ECC_IOBUS) ? 2398 "IOBUS" : "CPU", aflt->flt_bus_id); 2399 2400 if (spf_cpflt->flt_ec_lcnt > 0) 2401 cpu_log_ecmem_info(spf_cpflt); 2402 else 2403 cpu_aflt_log(CE_WARN, 2, spf_cpflt, 2404 CPU_ERRID_FIRST, NULL, 2405 " No cache dump available"); 2406 } 2407 } 2408 } 2409 } 2410 2411 #ifdef DEBUG 2412 int test_mp_cp = 0; 2413 #endif 2414 2415 /* 2416 * Cross-call callback routine to tell a CPU to read its own %afsr to check 2417 * for copyback errors and capture relevant information. 2418 */ 2419 static uint_t 2420 get_cpu_status(uint64_t arg) 2421 { 2422 struct async_flt *aflt = (struct async_flt *)arg; 2423 spitf_async_flt *spf_flt = (spitf_async_flt *)arg; 2424 uint64_t afsr; 2425 uint32_t ec_idx; 2426 uint64_t sdbh, sdbl; 2427 int i; 2428 uint32_t ec_set_size; 2429 uchar_t valid; 2430 ec_data_t ec_data[8]; 2431 uint64_t ec_tag, flt_addr_tag, oafsr; 2432 uint64_t *acc_afsr = NULL; 2433 2434 get_asyncflt(&afsr); 2435 if (CPU_PRIVATE(CPU) != NULL) { 2436 acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 2437 afsr |= *acc_afsr; 2438 *acc_afsr = 0; 2439 } 2440 2441 #ifdef DEBUG 2442 if (test_mp_cp) 2443 afsr |= P_AFSR_CP; 2444 #endif 2445 aflt->flt_stat = afsr; 2446 2447 if (afsr & P_AFSR_CP) { 2448 /* 2449 * Capture the UDBs 2450 */ 2451 get_udb_errors(&sdbh, &sdbl); 2452 spf_flt->flt_sdbh = (ushort_t)(sdbh & 0x3FF); 2453 spf_flt->flt_sdbl = (ushort_t)(sdbl & 0x3FF); 2454 2455 /* 2456 * Clear CP bit before capturing ecache data 2457 * and AFSR info. 2458 */ 2459 set_asyncflt(P_AFSR_CP); 2460 2461 /* 2462 * See if we can capture the ecache line for the 2463 * fault PA. 2464 * 2465 * Return a valid matching ecache line, if any. 2466 * Otherwise, return the first matching ecache 2467 * line marked invalid. 2468 */ 2469 flt_addr_tag = aflt->flt_addr >> cpu_ec_tag_shift; 2470 ec_set_size = cpunodes[CPU->cpu_id].ecache_size / 2471 ecache_associativity; 2472 spf_flt->flt_ec_lcnt = 0; 2473 2474 for (i = 0, ec_idx = (aflt->flt_addr % ec_set_size); 2475 i < ecache_associativity; i++, ec_idx += ec_set_size) { 2476 get_ecache_dtag(P2ALIGN(ec_idx, 64), 2477 (uint64_t *)&ec_data[0], &ec_tag, &oafsr, 2478 acc_afsr); 2479 2480 if ((ec_tag & cpu_ec_tag_mask) != flt_addr_tag) 2481 continue; 2482 2483 valid = cpu_ec_state_valid & 2484 (uchar_t)((ec_tag & cpu_ec_state_mask) >> 2485 cpu_ec_state_shift); 2486 2487 if (valid || spf_flt->flt_ec_lcnt == 0) { 2488 spf_flt->flt_ec_tag = ec_tag; 2489 bcopy(&ec_data, &spf_flt->flt_ec_data, 2490 sizeof (ec_data)); 2491 spf_flt->flt_ec_lcnt = 1; 2492 2493 if (valid) 2494 break; 2495 } 2496 } 2497 } 2498 return (0); 2499 } 2500 2501 /* 2502 * CPU-module callback for the non-panicking CPUs. This routine is invoked 2503 * from panic_idle() as part of the other CPUs stopping themselves when a 2504 * panic occurs. We need to be VERY careful what we do here, since panicstr 2505 * is NOT set yet and we cannot blow through locks. If panic_aflt is set 2506 * (panic_aflt.flt_id is non-zero), we need to read our %afsr to look for 2507 * CP error information. 2508 */ 2509 void 2510 cpu_async_panic_callb(void) 2511 { 2512 spitf_async_flt cp; 2513 struct async_flt *aflt = (struct async_flt *)&cp; 2514 uint64_t *scrub_afsr; 2515 2516 if (panic_aflt.flt_id != 0) { 2517 aflt->flt_addr = panic_aflt.flt_addr; 2518 (void) get_cpu_status((uint64_t)aflt); 2519 2520 if (CPU_PRIVATE(CPU) != NULL) { 2521 scrub_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 2522 if (*scrub_afsr & P_AFSR_CP) { 2523 aflt->flt_stat |= *scrub_afsr; 2524 *scrub_afsr = 0; 2525 } 2526 } 2527 if (aflt->flt_stat & P_AFSR_CP) { 2528 aflt->flt_id = panic_aflt.flt_id; 2529 aflt->flt_panic = 1; 2530 aflt->flt_inst = CPU->cpu_id; 2531 aflt->flt_class = CPU_FAULT; 2532 cp.flt_type = CPU_PANIC_CP_ERR; 2533 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP, 2534 (void *)&cp, sizeof (cp), ue_queue, 2535 aflt->flt_panic); 2536 } 2537 } 2538 } 2539 2540 /* 2541 * Turn off all cpu error detection, normally only used for panics. 2542 */ 2543 void 2544 cpu_disable_errors(void) 2545 { 2546 xt_all(set_error_enable_tl1, EER_DISABLE, EER_SET_ABSOLUTE); 2547 } 2548 2549 /* 2550 * Enable errors. 2551 */ 2552 void 2553 cpu_enable_errors(void) 2554 { 2555 xt_all(set_error_enable_tl1, EER_ENABLE, EER_SET_ABSOLUTE); 2556 } 2557 2558 static void 2559 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err) 2560 { 2561 uint64_t aligned_addr = P2ALIGN(ecc->flt_addr, 8); 2562 int i, loop = 1; 2563 ushort_t ecc_0; 2564 uint64_t paddr; 2565 uint64_t data; 2566 2567 if (verbose) 2568 loop = 8; 2569 for (i = 0; i < loop; i++) { 2570 paddr = aligned_addr + (i * 8); 2571 data = lddphys(paddr); 2572 if (verbose) { 2573 if (ce_err) { 2574 ecc_0 = ecc_gen((uint32_t)(data>>32), 2575 (uint32_t)data); 2576 cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS, 2577 NULL, " Paddr 0x%" PRIx64 ", " 2578 "Data 0x%08x.%08x, ECC 0x%x", paddr, 2579 (uint32_t)(data>>32), (uint32_t)data, ecc_0); 2580 } else { 2581 cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS, 2582 NULL, " Paddr 0x%" PRIx64 ", " 2583 "Data 0x%08x.%08x", paddr, 2584 (uint32_t)(data>>32), (uint32_t)data); 2585 } 2586 } 2587 } 2588 } 2589 2590 static struct { /* sec-ded-s4ed ecc code */ 2591 uint_t hi, lo; 2592 } ecc_code[8] = { 2593 { 0xee55de23U, 0x16161161U }, 2594 { 0x55eede93U, 0x61612212U }, 2595 { 0xbb557b8cU, 0x49494494U }, 2596 { 0x55bb7b6cU, 0x94948848U }, 2597 { 0x16161161U, 0xee55de23U }, 2598 { 0x61612212U, 0x55eede93U }, 2599 { 0x49494494U, 0xbb557b8cU }, 2600 { 0x94948848U, 0x55bb7b6cU } 2601 }; 2602 2603 static ushort_t 2604 ecc_gen(uint_t high_bytes, uint_t low_bytes) 2605 { 2606 int i, j; 2607 uchar_t checker, bit_mask; 2608 struct { 2609 uint_t hi, lo; 2610 } hex_data, masked_data[8]; 2611 2612 hex_data.hi = high_bytes; 2613 hex_data.lo = low_bytes; 2614 2615 /* mask out bits according to sec-ded-s4ed ecc code */ 2616 for (i = 0; i < 8; i++) { 2617 masked_data[i].hi = hex_data.hi & ecc_code[i].hi; 2618 masked_data[i].lo = hex_data.lo & ecc_code[i].lo; 2619 } 2620 2621 /* 2622 * xor all bits in masked_data[i] to get bit_i of checker, 2623 * where i = 0 to 7 2624 */ 2625 checker = 0; 2626 for (i = 0; i < 8; i++) { 2627 bit_mask = 1 << i; 2628 for (j = 0; j < 32; j++) { 2629 if (masked_data[i].lo & 1) checker ^= bit_mask; 2630 if (masked_data[i].hi & 1) checker ^= bit_mask; 2631 masked_data[i].hi >>= 1; 2632 masked_data[i].lo >>= 1; 2633 } 2634 } 2635 return (checker); 2636 } 2637 2638 /* 2639 * Flush the entire ecache using displacement flush by reading through a 2640 * physical address range as large as the ecache. 2641 */ 2642 void 2643 cpu_flush_ecache(void) 2644 { 2645 flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2, 2646 cpunodes[CPU->cpu_id].ecache_linesize); 2647 } 2648 2649 /* 2650 * read and display the data in the cache line where the 2651 * original ce error occurred. 2652 * This routine is mainly used for debugging new hardware. 2653 */ 2654 void 2655 read_ecc_data(struct async_flt *ecc, short verbose, short ce_err) 2656 { 2657 kpreempt_disable(); 2658 /* disable ECC error traps */ 2659 set_error_enable(EER_ECC_DISABLE); 2660 2661 /* 2662 * flush the ecache 2663 * read the data 2664 * check to see if an ECC error occured 2665 */ 2666 flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2, 2667 cpunodes[CPU->cpu_id].ecache_linesize); 2668 set_lsu(get_lsu() | cache_boot_state); 2669 cpu_read_paddr(ecc, verbose, ce_err); 2670 (void) check_ecc(ecc); 2671 2672 /* enable ECC error traps */ 2673 set_error_enable(EER_ENABLE); 2674 kpreempt_enable(); 2675 } 2676 2677 /* 2678 * Check the AFSR bits for UE/CE persistence. 2679 * If UE or CE errors are detected, the routine will 2680 * clears all the AFSR sticky bits (except CP for 2681 * spitfire/blackbird) and the UDBs. 2682 * if ce_debug or ue_debug is set, log any ue/ce errors detected. 2683 */ 2684 static int 2685 check_ecc(struct async_flt *ecc) 2686 { 2687 uint64_t t_afsr; 2688 uint64_t t_afar; 2689 uint64_t udbh; 2690 uint64_t udbl; 2691 ushort_t udb; 2692 int persistent = 0; 2693 2694 /* 2695 * Capture the AFSR, AFAR and UDBs info 2696 */ 2697 get_asyncflt(&t_afsr); 2698 get_asyncaddr(&t_afar); 2699 t_afar &= SABRE_AFAR_PA; 2700 get_udb_errors(&udbh, &udbl); 2701 2702 if ((t_afsr & P_AFSR_UE) || (t_afsr & P_AFSR_CE)) { 2703 /* 2704 * Clear the errors 2705 */ 2706 clr_datapath(); 2707 2708 if (isus2i || isus2e) 2709 set_asyncflt(t_afsr); 2710 else 2711 set_asyncflt(t_afsr & ~P_AFSR_CP); 2712 2713 /* 2714 * determine whether to check UDBH or UDBL for persistence 2715 */ 2716 if (ecc->flt_synd & UDBL_REG) { 2717 udb = (ushort_t)udbl; 2718 t_afar |= 0x8; 2719 } else { 2720 udb = (ushort_t)udbh; 2721 } 2722 2723 if (ce_debug || ue_debug) { 2724 spitf_async_flt spf_flt; /* for logging */ 2725 struct async_flt *aflt = 2726 (struct async_flt *)&spf_flt; 2727 2728 /* Package the info nicely in the spf_flt struct */ 2729 bzero(&spf_flt, sizeof (spitf_async_flt)); 2730 aflt->flt_stat = t_afsr; 2731 aflt->flt_addr = t_afar; 2732 spf_flt.flt_sdbh = (ushort_t)(udbh & 0x3FF); 2733 spf_flt.flt_sdbl = (ushort_t)(udbl & 0x3FF); 2734 2735 cpu_aflt_log(CE_CONT, 0, &spf_flt, (CPU_AFSR | 2736 CPU_AFAR | CPU_UDBH | CPU_UDBL), NULL, 2737 " check_ecc: Dumping captured error states ..."); 2738 } 2739 2740 /* 2741 * if the fault addresses don't match, not persistent 2742 */ 2743 if (t_afar != ecc->flt_addr) { 2744 return (persistent); 2745 } 2746 2747 /* 2748 * check for UE persistence 2749 * since all DIMMs in the bank are identified for a UE, 2750 * there's no reason to check the syndrome 2751 */ 2752 if ((ecc->flt_stat & P_AFSR_UE) && (t_afsr & P_AFSR_UE)) { 2753 persistent = 1; 2754 } 2755 2756 /* 2757 * check for CE persistence 2758 */ 2759 if ((ecc->flt_stat & P_AFSR_CE) && (t_afsr & P_AFSR_CE)) { 2760 if ((udb & P_DER_E_SYND) == 2761 (ecc->flt_synd & P_DER_E_SYND)) { 2762 persistent = 1; 2763 } 2764 } 2765 } 2766 return (persistent); 2767 } 2768 2769 #ifdef HUMMINGBIRD 2770 #define HB_FULL_DIV 1 2771 #define HB_HALF_DIV 2 2772 #define HB_LOWEST_DIV 8 2773 #define HB_ECLK_INVALID 0xdeadbad 2774 static uint64_t hb_eclk[HB_LOWEST_DIV + 1] = { 2775 HB_ECLK_INVALID, HB_ECLK_1, HB_ECLK_2, HB_ECLK_INVALID, 2776 HB_ECLK_4, HB_ECLK_INVALID, HB_ECLK_6, HB_ECLK_INVALID, 2777 HB_ECLK_8 }; 2778 2779 #define HB_SLOW_DOWN 0 2780 #define HB_SPEED_UP 1 2781 2782 #define SET_ESTAR_MODE(mode) \ 2783 stdphysio(HB_ESTAR_MODE, (mode)); \ 2784 /* \ 2785 * PLL logic requires minimum of 16 clock \ 2786 * cycles to lock to the new clock speed. \ 2787 * Wait 1 usec to satisfy this requirement. \ 2788 */ \ 2789 drv_usecwait(1); 2790 2791 #define CHANGE_REFRESH_COUNT(direction, cur_div, new_div) \ 2792 { \ 2793 volatile uint64_t data; \ 2794 uint64_t count, new_count; \ 2795 clock_t delay; \ 2796 data = lddphysio(HB_MEM_CNTRL0); \ 2797 count = (data & HB_REFRESH_COUNT_MASK) >> \ 2798 HB_REFRESH_COUNT_SHIFT; \ 2799 new_count = (HB_REFRESH_INTERVAL * \ 2800 cpunodes[CPU->cpu_id].clock_freq) / \ 2801 (HB_REFRESH_CLOCKS_PER_COUNT * (new_div) * NANOSEC);\ 2802 data = (data & ~HB_REFRESH_COUNT_MASK) | \ 2803 (new_count << HB_REFRESH_COUNT_SHIFT); \ 2804 stdphysio(HB_MEM_CNTRL0, data); \ 2805 data = lddphysio(HB_MEM_CNTRL0); \ 2806 /* \ 2807 * If we are slowing down the cpu and Memory \ 2808 * Self Refresh is not enabled, it is required \ 2809 * to wait for old refresh count to count-down and \ 2810 * new refresh count to go into effect (let new value \ 2811 * counts down once). \ 2812 */ \ 2813 if ((direction) == HB_SLOW_DOWN && \ 2814 (data & HB_SELF_REFRESH_MASK) == 0) { \ 2815 /* \ 2816 * Each count takes 64 cpu clock cycles \ 2817 * to decrement. Wait for current refresh \ 2818 * count plus new refresh count at current \ 2819 * cpu speed to count down to zero. Round \ 2820 * up the delay time. \ 2821 */ \ 2822 delay = ((HB_REFRESH_CLOCKS_PER_COUNT * \ 2823 (count + new_count) * MICROSEC * (cur_div)) /\ 2824 cpunodes[CPU->cpu_id].clock_freq) + 1; \ 2825 drv_usecwait(delay); \ 2826 } \ 2827 } 2828 2829 #define SET_SELF_REFRESH(bit) \ 2830 { \ 2831 volatile uint64_t data; \ 2832 data = lddphysio(HB_MEM_CNTRL0); \ 2833 data = (data & ~HB_SELF_REFRESH_MASK) | \ 2834 ((bit) << HB_SELF_REFRESH_SHIFT); \ 2835 stdphysio(HB_MEM_CNTRL0, data); \ 2836 data = lddphysio(HB_MEM_CNTRL0); \ 2837 } 2838 #endif /* HUMMINGBIRD */ 2839 2840 /* ARGSUSED */ 2841 void 2842 cpu_change_speed(uint64_t new_divisor, uint64_t arg2) 2843 { 2844 #ifdef HUMMINGBIRD 2845 uint64_t cur_mask, cur_divisor = 0; 2846 volatile uint64_t reg; 2847 int index; 2848 2849 if ((new_divisor < HB_FULL_DIV || new_divisor > HB_LOWEST_DIV) || 2850 (hb_eclk[new_divisor] == HB_ECLK_INVALID)) { 2851 cmn_err(CE_WARN, "cpu_change_speed: bad divisor 0x%lx", 2852 new_divisor); 2853 return; 2854 } 2855 2856 reg = lddphysio(HB_ESTAR_MODE); 2857 cur_mask = reg & HB_ECLK_MASK; 2858 for (index = HB_FULL_DIV; index <= HB_LOWEST_DIV; index++) { 2859 if (hb_eclk[index] == cur_mask) { 2860 cur_divisor = index; 2861 break; 2862 } 2863 } 2864 2865 if (cur_divisor == 0) 2866 cmn_err(CE_PANIC, "cpu_change_speed: current divisor " 2867 "can't be determined!"); 2868 2869 /* 2870 * If we are already at the requested divisor speed, just 2871 * return. 2872 */ 2873 if (cur_divisor == new_divisor) 2874 return; 2875 2876 if (cur_divisor == HB_FULL_DIV && new_divisor == HB_HALF_DIV) { 2877 CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor); 2878 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2879 SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE); 2880 2881 } else if (cur_divisor == HB_HALF_DIV && new_divisor == HB_FULL_DIV) { 2882 SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE); 2883 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2884 /* LINTED: E_FALSE_LOGICAL_EXPR */ 2885 CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor); 2886 2887 } else if (cur_divisor == HB_FULL_DIV && new_divisor > HB_HALF_DIV) { 2888 /* 2889 * Transition to 1/2 speed first, then to 2890 * lower speed. 2891 */ 2892 CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, HB_HALF_DIV); 2893 SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]); 2894 SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE); 2895 2896 CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, HB_HALF_DIV, new_divisor); 2897 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2898 2899 } else if (cur_divisor > HB_HALF_DIV && new_divisor == HB_FULL_DIV) { 2900 /* 2901 * Transition to 1/2 speed first, then to 2902 * full speed. 2903 */ 2904 SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]); 2905 /* LINTED: E_FALSE_LOGICAL_EXPR */ 2906 CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, HB_HALF_DIV); 2907 2908 SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE); 2909 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2910 /* LINTED: E_FALSE_LOGICAL_EXPR */ 2911 CHANGE_REFRESH_COUNT(HB_SPEED_UP, HB_HALF_DIV, new_divisor); 2912 2913 } else if (cur_divisor < new_divisor) { 2914 CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor); 2915 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2916 2917 } else if (cur_divisor > new_divisor) { 2918 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2919 /* LINTED: E_FALSE_LOGICAL_EXPR */ 2920 CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor); 2921 } 2922 CPU->cpu_m.divisor = (uchar_t)new_divisor; 2923 #endif 2924 } 2925 2926 /* 2927 * Clear the AFSR sticky bits and the UDBs. For Sabre/Spitfire/Blackbird, 2928 * we clear all the sticky bits. If a non-null pointer to a async fault 2929 * structure argument is passed in, the captured error state (AFSR, AFAR, UDBs) 2930 * info will be returned in the structure. If a non-null pointer to a 2931 * uint64_t is passed in, this will be updated if the CP bit is set in the 2932 * AFSR. The afsr will be returned. 2933 */ 2934 static uint64_t 2935 clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr) 2936 { 2937 struct async_flt *aflt = (struct async_flt *)spf_flt; 2938 uint64_t afsr; 2939 uint64_t udbh, udbl; 2940 2941 get_asyncflt(&afsr); 2942 2943 if ((acc_afsr != NULL) && (afsr & P_AFSR_CP)) 2944 *acc_afsr |= afsr; 2945 2946 if (spf_flt != NULL) { 2947 aflt->flt_stat = afsr; 2948 get_asyncaddr(&aflt->flt_addr); 2949 aflt->flt_addr &= SABRE_AFAR_PA; 2950 2951 get_udb_errors(&udbh, &udbl); 2952 spf_flt->flt_sdbh = (ushort_t)(udbh & 0x3FF); 2953 spf_flt->flt_sdbl = (ushort_t)(udbl & 0x3FF); 2954 } 2955 2956 set_asyncflt(afsr); /* clear afsr */ 2957 clr_datapath(); /* clear udbs */ 2958 return (afsr); 2959 } 2960 2961 /* 2962 * Scan the ecache to look for bad lines. If found, the afsr, afar, e$ data 2963 * tag of the first bad line will be returned. We also return the old-afsr 2964 * (before clearing the sticky bits). The linecnt data will be updated to 2965 * indicate the number of bad lines detected. 2966 */ 2967 static void 2968 scan_ecache(uint64_t *t_afar, ec_data_t *ecache_data, 2969 uint64_t *ecache_tag, int *linecnt, uint64_t *t_afsr) 2970 { 2971 ec_data_t t_ecdata[8]; 2972 uint64_t t_etag, oafsr; 2973 uint64_t pa = AFLT_INV_ADDR; 2974 uint32_t i, j, ecache_sz; 2975 uint64_t acc_afsr = 0; 2976 uint64_t *cpu_afsr = NULL; 2977 2978 if (CPU_PRIVATE(CPU) != NULL) 2979 cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 2980 2981 *linecnt = 0; 2982 ecache_sz = cpunodes[CPU->cpu_id].ecache_size; 2983 2984 for (i = 0; i < ecache_sz; i += 64) { 2985 get_ecache_dtag(i, (uint64_t *)&t_ecdata[0], &t_etag, &oafsr, 2986 cpu_afsr); 2987 acc_afsr |= oafsr; 2988 2989 /* 2990 * Scan through the whole 64 bytes line in 8 8-byte chunks 2991 * looking for the first occurrence of an EDP error. The AFSR 2992 * info is captured for each 8-byte chunk. Note that for 2993 * Spitfire/Blackbird, the AFSR.PSYND is captured by h/w in 2994 * 16-byte chunk granularity (i.e. the AFSR will be the same 2995 * for the high and low 8-byte words within the 16-byte chunk). 2996 * For Sabre/Hummingbird, the AFSR.PSYND is captured in 8-byte 2997 * granularity and only PSYND bits [7:0] are used. 2998 */ 2999 for (j = 0; j < 8; j++) { 3000 ec_data_t *ecdptr = &t_ecdata[j]; 3001 3002 if (ecdptr->ec_afsr & P_AFSR_EDP) { 3003 uint64_t errpa; 3004 ushort_t psynd; 3005 uint32_t ec_set_size = ecache_sz / 3006 ecache_associativity; 3007 3008 /* 3009 * For Spitfire/Blackbird, we need to look at 3010 * the PSYND to make sure that this 8-byte chunk 3011 * is the right one. PSYND bits [15:8] belong 3012 * to the upper 8-byte (even) chunk. Bits 3013 * [7:0] belong to the lower 8-byte chunk (odd). 3014 */ 3015 psynd = ecdptr->ec_afsr & P_AFSR_P_SYND; 3016 if (!isus2i && !isus2e) { 3017 if (j & 0x1) 3018 psynd = psynd & 0xFF; 3019 else 3020 psynd = psynd >> 8; 3021 3022 if (!psynd) 3023 continue; /* wrong chunk */ 3024 } 3025 3026 /* Construct the PA */ 3027 errpa = ((t_etag & cpu_ec_tag_mask) << 3028 cpu_ec_tag_shift) | ((i | (j << 3)) % 3029 ec_set_size); 3030 3031 /* clean up the cache line */ 3032 flushecacheline(P2ALIGN(errpa, 64), 3033 cpunodes[CPU->cpu_id].ecache_size); 3034 3035 oafsr = clear_errors(NULL, cpu_afsr); 3036 acc_afsr |= oafsr; 3037 3038 (*linecnt)++; 3039 3040 /* 3041 * Capture the PA for the first bad line found. 3042 * Return the ecache dump and tag info. 3043 */ 3044 if (pa == AFLT_INV_ADDR) { 3045 int k; 3046 3047 pa = errpa; 3048 for (k = 0; k < 8; k++) 3049 ecache_data[k] = t_ecdata[k]; 3050 *ecache_tag = t_etag; 3051 } 3052 break; 3053 } 3054 } 3055 } 3056 *t_afar = pa; 3057 *t_afsr = acc_afsr; 3058 } 3059 3060 static void 3061 cpu_log_ecmem_info(spitf_async_flt *spf_flt) 3062 { 3063 struct async_flt *aflt = (struct async_flt *)spf_flt; 3064 uint64_t ecache_tag = spf_flt->flt_ec_tag; 3065 char linestr[30]; 3066 char *state_str; 3067 int i; 3068 3069 /* 3070 * Check the ecache tag to make sure it 3071 * is valid. If invalid, a memory dump was 3072 * captured instead of a ecache dump. 3073 */ 3074 if (spf_flt->flt_ec_tag != AFLT_INV_ADDR) { 3075 uchar_t eparity = (uchar_t) 3076 ((ecache_tag & cpu_ec_par_mask) >> cpu_ec_par_shift); 3077 3078 uchar_t estate = (uchar_t) 3079 ((ecache_tag & cpu_ec_state_mask) >> cpu_ec_state_shift); 3080 3081 if (estate == cpu_ec_state_shr) 3082 state_str = "Shared"; 3083 else if (estate == cpu_ec_state_exl) 3084 state_str = "Exclusive"; 3085 else if (estate == cpu_ec_state_own) 3086 state_str = "Owner"; 3087 else if (estate == cpu_ec_state_mod) 3088 state_str = "Modified"; 3089 else 3090 state_str = "Invalid"; 3091 3092 if (spf_flt->flt_ec_lcnt > 1) { 3093 (void) snprintf(linestr, sizeof (linestr), 3094 "Badlines found=%d", spf_flt->flt_ec_lcnt); 3095 } else { 3096 linestr[0] = '\0'; 3097 } 3098 3099 cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL, 3100 " PA=0x%08x.%08x\n E$tag 0x%08x.%08x E$State: %s " 3101 "E$parity 0x%02x %s", (uint32_t)(aflt->flt_addr >> 32), 3102 (uint32_t)aflt->flt_addr, (uint32_t)(ecache_tag >> 32), 3103 (uint32_t)ecache_tag, state_str, 3104 (uint32_t)eparity, linestr); 3105 } else { 3106 cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL, 3107 " E$tag != PA from AFAR; E$line was victimized" 3108 "\n dumping memory from PA 0x%08x.%08x instead", 3109 (uint32_t)(P2ALIGN(aflt->flt_addr, 64) >> 32), 3110 (uint32_t)P2ALIGN(aflt->flt_addr, 64)); 3111 } 3112 3113 /* 3114 * Dump out all 8 8-byte ecache data captured 3115 * For each 8-byte data captured, we check the 3116 * captured afsr's parity syndrome to find out 3117 * which 8-byte chunk is bad. For memory dump, the 3118 * AFSR values were initialized to 0. 3119 */ 3120 for (i = 0; i < 8; i++) { 3121 ec_data_t *ecdptr; 3122 uint_t offset; 3123 ushort_t psynd; 3124 ushort_t bad; 3125 uint64_t edp; 3126 3127 offset = i << 3; /* multiply by 8 */ 3128 ecdptr = &spf_flt->flt_ec_data[i]; 3129 psynd = ecdptr->ec_afsr & P_AFSR_P_SYND; 3130 edp = ecdptr->ec_afsr & P_AFSR_EDP; 3131 3132 /* 3133 * For Sabre/Hummingbird, parity synd is captured only 3134 * in [7:0] of AFSR.PSYND for each 8-byte chunk. 3135 * For spitfire/blackbird, AFSR.PSYND is captured 3136 * in 16-byte granularity. [15:8] represent 3137 * the upper 8 byte and [7:0] the lower 8 byte. 3138 */ 3139 if (isus2i || isus2e || (i & 0x1)) 3140 bad = (psynd & 0xFF); /* check bits [7:0] */ 3141 else 3142 bad = (psynd & 0xFF00); /* check bits [15:8] */ 3143 3144 if (bad && edp) { 3145 cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL, 3146 " E$Data (0x%02x): 0x%08x.%08x " 3147 "*Bad* PSYND=0x%04x", offset, 3148 (uint32_t)(ecdptr->ec_d8 >> 32), 3149 (uint32_t)ecdptr->ec_d8, psynd); 3150 } else { 3151 cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL, 3152 " E$Data (0x%02x): 0x%08x.%08x", offset, 3153 (uint32_t)(ecdptr->ec_d8 >> 32), 3154 (uint32_t)ecdptr->ec_d8); 3155 } 3156 } 3157 } 3158 3159 /* 3160 * Common logging function for all cpu async errors. This function allows the 3161 * caller to generate a single cmn_err() call that logs the appropriate items 3162 * from the fault structure, and implements our rules for AFT logging levels. 3163 * 3164 * ce_code: cmn_err() code (e.g. CE_PANIC, CE_WARN, CE_CONT) 3165 * tagnum: 0, 1, 2, .. generate the [AFT#] tag 3166 * spflt: pointer to spitfire async fault structure 3167 * logflags: bitflags indicating what to output 3168 * endstr: a end string to appear at the end of this log 3169 * fmt: a format string to appear at the beginning of the log 3170 * 3171 * The logflags allows the construction of predetermined output from the spflt 3172 * structure. The individual data items always appear in a consistent order. 3173 * Note that either or both of the spflt structure pointer and logflags may be 3174 * NULL or zero respectively, indicating that the predetermined output 3175 * substrings are not requested in this log. The output looks like this: 3176 * 3177 * [AFT#] <CPU_ERRID_FIRST><fmt string><CPU_FLTCPU> 3178 * <CPU_SPACE><CPU_ERRID> 3179 * newline+4spaces<CPU_AFSR><CPU_AFAR> 3180 * newline+4spaces<CPU_AF_PSYND><CPU_AF_ETS><CPU_FAULTPC> 3181 * newline+4spaces<CPU_UDBH><CPU_UDBL> 3182 * newline+4spaces<CPU_SYND> 3183 * newline+4spaces<endstr> 3184 * 3185 * Note that <endstr> may not start on a newline if we are logging <CPU_PSYND>; 3186 * it is assumed that <endstr> will be the unum string in this case. The size 3187 * of our intermediate formatting buf[] is based on the worst case of all flags 3188 * being enabled. We pass the caller's varargs directly to vcmn_err() for 3189 * formatting so we don't need additional stack space to format them here. 3190 */ 3191 /*PRINTFLIKE6*/ 3192 static void 3193 cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt, uint_t logflags, 3194 const char *endstr, const char *fmt, ...) 3195 { 3196 struct async_flt *aflt = (struct async_flt *)spflt; 3197 char buf[400], *p, *q; /* see comments about buf[] size above */ 3198 va_list ap; 3199 int console_log_flag; 3200 3201 if ((aflt == NULL) || ((aflt->flt_class == CPU_FAULT) && 3202 (aflt->flt_stat & P_AFSR_LEVEL1)) || 3203 (aflt->flt_panic)) { 3204 console_log_flag = (tagnum < 2) || aft_verbose; 3205 } else { 3206 int verbose = ((aflt->flt_class == BUS_FAULT) || 3207 (aflt->flt_stat & P_AFSR_CE)) ? 3208 ce_verbose_memory : ce_verbose_other; 3209 3210 if (!verbose) 3211 return; 3212 3213 console_log_flag = (verbose > 1); 3214 } 3215 3216 if (console_log_flag) 3217 (void) sprintf(buf, "[AFT%d]", tagnum); 3218 else 3219 (void) sprintf(buf, "![AFT%d]", tagnum); 3220 3221 p = buf + strlen(buf); /* current buffer position */ 3222 q = buf + sizeof (buf); /* pointer past end of buffer */ 3223 3224 if (spflt != NULL && (logflags & CPU_ERRID_FIRST)) { 3225 (void) snprintf(p, (size_t)(q - p), " errID 0x%08x.%08x", 3226 (uint32_t)(aflt->flt_id >> 32), (uint32_t)aflt->flt_id); 3227 p += strlen(p); 3228 } 3229 3230 /* 3231 * Copy the caller's format string verbatim into buf[]. It will be 3232 * formatted by the call to vcmn_err() at the end of this function. 3233 */ 3234 if (fmt != NULL && p < q) { 3235 (void) strncpy(p, fmt, (size_t)(q - p - 1)); 3236 buf[sizeof (buf) - 1] = '\0'; 3237 p += strlen(p); 3238 } 3239 3240 if (spflt != NULL) { 3241 if (logflags & CPU_FLTCPU) { 3242 (void) snprintf(p, (size_t)(q - p), " CPU%d", 3243 aflt->flt_inst); 3244 p += strlen(p); 3245 } 3246 3247 if (logflags & CPU_SPACE) { 3248 if (aflt->flt_status & ECC_D_TRAP) 3249 (void) snprintf(p, (size_t)(q - p), 3250 " Data access"); 3251 else if (aflt->flt_status & ECC_I_TRAP) 3252 (void) snprintf(p, (size_t)(q - p), 3253 " Instruction access"); 3254 p += strlen(p); 3255 } 3256 3257 if (logflags & CPU_TL) { 3258 (void) snprintf(p, (size_t)(q - p), " at TL%s", 3259 aflt->flt_tl ? ">0" : "=0"); 3260 p += strlen(p); 3261 } 3262 3263 if (logflags & CPU_ERRID) { 3264 (void) snprintf(p, (size_t)(q - p), 3265 ", errID 0x%08x.%08x", 3266 (uint32_t)(aflt->flt_id >> 32), 3267 (uint32_t)aflt->flt_id); 3268 p += strlen(p); 3269 } 3270 3271 if (logflags & CPU_AFSR) { 3272 (void) snprintf(p, (size_t)(q - p), 3273 "\n AFSR 0x%08b.%08b", 3274 (uint32_t)(aflt->flt_stat >> 32), AFSR_FMTSTR0, 3275 (uint32_t)aflt->flt_stat, AFSR_FMTSTR1); 3276 p += strlen(p); 3277 } 3278 3279 if (logflags & CPU_AFAR) { 3280 (void) snprintf(p, (size_t)(q - p), " AFAR 0x%08x.%08x", 3281 (uint32_t)(aflt->flt_addr >> 32), 3282 (uint32_t)aflt->flt_addr); 3283 p += strlen(p); 3284 } 3285 3286 if (logflags & CPU_AF_PSYND) { 3287 ushort_t psynd = (ushort_t) 3288 (aflt->flt_stat & P_AFSR_P_SYND); 3289 3290 (void) snprintf(p, (size_t)(q - p), 3291 "\n AFSR.PSYND 0x%04x(Score %02d)", 3292 psynd, ecc_psynd_score(psynd)); 3293 p += strlen(p); 3294 } 3295 3296 if (logflags & CPU_AF_ETS) { 3297 (void) snprintf(p, (size_t)(q - p), " AFSR.ETS 0x%02x", 3298 (uchar_t)((aflt->flt_stat & P_AFSR_ETS) >> 16)); 3299 p += strlen(p); 3300 } 3301 3302 if (logflags & CPU_FAULTPC) { 3303 (void) snprintf(p, (size_t)(q - p), " Fault_PC 0x%p", 3304 (void *)aflt->flt_pc); 3305 p += strlen(p); 3306 } 3307 3308 if (logflags & CPU_UDBH) { 3309 (void) snprintf(p, (size_t)(q - p), 3310 "\n UDBH 0x%04b UDBH.ESYND 0x%02x", 3311 spflt->flt_sdbh, UDB_FMTSTR, 3312 spflt->flt_sdbh & 0xFF); 3313 p += strlen(p); 3314 } 3315 3316 if (logflags & CPU_UDBL) { 3317 (void) snprintf(p, (size_t)(q - p), 3318 " UDBL 0x%04b UDBL.ESYND 0x%02x", 3319 spflt->flt_sdbl, UDB_FMTSTR, 3320 spflt->flt_sdbl & 0xFF); 3321 p += strlen(p); 3322 } 3323 3324 if (logflags & CPU_SYND) { 3325 ushort_t synd = SYND(aflt->flt_synd); 3326 3327 (void) snprintf(p, (size_t)(q - p), 3328 "\n %s Syndrome 0x%x Memory Module ", 3329 UDBL(aflt->flt_synd) ? "UDBL" : "UDBH", synd); 3330 p += strlen(p); 3331 } 3332 } 3333 3334 if (endstr != NULL) { 3335 if (!(logflags & CPU_SYND)) 3336 (void) snprintf(p, (size_t)(q - p), "\n %s", endstr); 3337 else 3338 (void) snprintf(p, (size_t)(q - p), "%s", endstr); 3339 p += strlen(p); 3340 } 3341 3342 if (ce_code == CE_CONT && (p < q - 1)) 3343 (void) strcpy(p, "\n"); /* add final \n if needed */ 3344 3345 va_start(ap, fmt); 3346 vcmn_err(ce_code, buf, ap); 3347 va_end(ap); 3348 } 3349 3350 /* 3351 * Ecache Scrubbing 3352 * 3353 * The basic idea is to prevent lines from sitting in the ecache long enough 3354 * to build up soft errors which can lead to ecache parity errors. 3355 * 3356 * The following rules are observed when flushing the ecache: 3357 * 3358 * 1. When the system is busy, flush bad clean lines 3359 * 2. When the system is idle, flush all clean lines 3360 * 3. When the system is idle, flush good dirty lines 3361 * 4. Never flush bad dirty lines. 3362 * 3363 * modify parity busy idle 3364 * ---------------------------- 3365 * clean good X 3366 * clean bad X X 3367 * dirty good X 3368 * dirty bad 3369 * 3370 * Bad or good refers to whether a line has an E$ parity error or not. 3371 * Clean or dirty refers to the state of the modified bit. We currently 3372 * default the scan rate to 100 (scan 10% of the cache per second). 3373 * 3374 * The following are E$ states and actions. 3375 * 3376 * We encode our state as a 3-bit number, consisting of: 3377 * ECACHE_STATE_MODIFIED (0=clean, 1=dirty) 3378 * ECACHE_STATE_PARITY (0=good, 1=bad) 3379 * ECACHE_STATE_BUSY (0=idle, 1=busy) 3380 * 3381 * We associate a flushing and a logging action with each state. 3382 * 3383 * E$ actions are different for Spitfire and Sabre/Hummingbird modules. 3384 * MIRROR_FLUSH indicates that an E$ line will be flushed for the mirrored 3385 * E$ only, in addition to value being set by ec_flush. 3386 */ 3387 3388 #define ALWAYS_FLUSH 0x1 /* flush E$ line on all E$ types */ 3389 #define NEVER_FLUSH 0x0 /* never the flush the E$ line */ 3390 #define MIRROR_FLUSH 0xF /* flush E$ line on mirrored E$ only */ 3391 3392 struct { 3393 char ec_flush; /* whether to flush or not */ 3394 char ec_log; /* ecache logging */ 3395 char ec_log_type; /* log type info */ 3396 } ec_action[] = { /* states of the E$ line in M P B */ 3397 { ALWAYS_FLUSH, 0, 0 }, /* 0 0 0 clean_good_idle */ 3398 { MIRROR_FLUSH, 0, 0 }, /* 0 0 1 clean_good_busy */ 3399 { ALWAYS_FLUSH, 1, CPU_BADLINE_CI_ERR }, /* 0 1 0 clean_bad_idle */ 3400 { ALWAYS_FLUSH, 1, CPU_BADLINE_CB_ERR }, /* 0 1 1 clean_bad_busy */ 3401 { ALWAYS_FLUSH, 0, 0 }, /* 1 0 0 dirty_good_idle */ 3402 { MIRROR_FLUSH, 0, 0 }, /* 1 0 1 dirty_good_busy */ 3403 { NEVER_FLUSH, 1, CPU_BADLINE_DI_ERR }, /* 1 1 0 dirty_bad_idle */ 3404 { NEVER_FLUSH, 1, CPU_BADLINE_DB_ERR } /* 1 1 1 dirty_bad_busy */ 3405 }; 3406 3407 /* 3408 * Offsets into the ec_action[] that determines clean_good_busy and 3409 * dirty_good_busy lines. 3410 */ 3411 #define ECACHE_CGB_LINE 1 /* E$ clean_good_busy line */ 3412 #define ECACHE_DGB_LINE 5 /* E$ dirty_good_busy line */ 3413 3414 /* 3415 * We are flushing lines which are Clean_Good_Busy and also the lines 3416 * Dirty_Good_Busy. And we only follow it for non-mirrored E$. 3417 */ 3418 #define CGB(x, m) (((x) == ECACHE_CGB_LINE) && (m != ECACHE_CPU_MIRROR)) 3419 #define DGB(x, m) (((x) == ECACHE_DGB_LINE) && (m != ECACHE_CPU_MIRROR)) 3420 3421 #define ECACHE_STATE_MODIFIED 0x4 3422 #define ECACHE_STATE_PARITY 0x2 3423 #define ECACHE_STATE_BUSY 0x1 3424 3425 /* 3426 * If ecache is mirrored ecache_calls_a_sec and ecache_scan_rate are reduced. 3427 */ 3428 int ecache_calls_a_sec_mirrored = 1; 3429 int ecache_lines_per_call_mirrored = 1; 3430 3431 int ecache_scrub_enable = 1; /* ecache scrubbing is on by default */ 3432 int ecache_scrub_verbose = 1; /* prints clean and dirty lines */ 3433 int ecache_scrub_panic = 0; /* panics on a clean and dirty line */ 3434 int ecache_calls_a_sec = 100; /* scrubber calls per sec */ 3435 int ecache_scan_rate = 100; /* scan rate (in tenths of a percent) */ 3436 int ecache_idle_factor = 1; /* increase the scan rate when idle */ 3437 int ecache_flush_clean_good_busy = 50; /* flush rate (in percent) */ 3438 int ecache_flush_dirty_good_busy = 100; /* flush rate (in percent) */ 3439 3440 volatile int ec_timeout_calls = 1; /* timeout calls */ 3441 3442 /* 3443 * Interrupt number and pil for ecache scrubber cross-trap calls. 3444 */ 3445 static uint_t ecache_scrub_inum; 3446 uint_t ecache_scrub_pil = PIL_9; 3447 3448 /* 3449 * Kstats for the E$ scrubber. 3450 */ 3451 typedef struct ecache_kstat { 3452 kstat_named_t clean_good_idle; /* # of lines scrubbed */ 3453 kstat_named_t clean_good_busy; /* # of lines skipped */ 3454 kstat_named_t clean_bad_idle; /* # of lines scrubbed */ 3455 kstat_named_t clean_bad_busy; /* # of lines scrubbed */ 3456 kstat_named_t dirty_good_idle; /* # of lines scrubbed */ 3457 kstat_named_t dirty_good_busy; /* # of lines skipped */ 3458 kstat_named_t dirty_bad_idle; /* # of lines skipped */ 3459 kstat_named_t dirty_bad_busy; /* # of lines skipped */ 3460 kstat_named_t invalid_lines; /* # of invalid lines */ 3461 kstat_named_t clean_good_busy_flush; /* # of lines scrubbed */ 3462 kstat_named_t dirty_good_busy_flush; /* # of lines scrubbed */ 3463 kstat_named_t tags_cleared; /* # of E$ tags cleared */ 3464 } ecache_kstat_t; 3465 3466 static ecache_kstat_t ec_kstat_template = { 3467 { "clean_good_idle", KSTAT_DATA_ULONG }, 3468 { "clean_good_busy", KSTAT_DATA_ULONG }, 3469 { "clean_bad_idle", KSTAT_DATA_ULONG }, 3470 { "clean_bad_busy", KSTAT_DATA_ULONG }, 3471 { "dirty_good_idle", KSTAT_DATA_ULONG }, 3472 { "dirty_good_busy", KSTAT_DATA_ULONG }, 3473 { "dirty_bad_idle", KSTAT_DATA_ULONG }, 3474 { "dirty_bad_busy", KSTAT_DATA_ULONG }, 3475 { "invalid_lines", KSTAT_DATA_ULONG }, 3476 { "clean_good_busy_flush", KSTAT_DATA_ULONG }, 3477 { "dirty_good_busy_flush", KSTAT_DATA_ULONG }, 3478 { "ecache_tags_cleared", KSTAT_DATA_ULONG } 3479 }; 3480 3481 struct kmem_cache *sf_private_cache; 3482 3483 /* 3484 * Called periodically on each CPU to scan the ecache once a sec. 3485 * adjusting the ecache line index appropriately 3486 */ 3487 void 3488 scrub_ecache_line() 3489 { 3490 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc); 3491 int cpuid = CPU->cpu_id; 3492 uint32_t index = ssmp->ecache_flush_index; 3493 uint64_t ec_size = cpunodes[cpuid].ecache_size; 3494 size_t ec_linesize = cpunodes[cpuid].ecache_linesize; 3495 int nlines = ssmp->ecache_nlines; 3496 uint32_t ec_set_size = ec_size / ecache_associativity; 3497 int ec_mirror = ssmp->ecache_mirror; 3498 ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data; 3499 3500 int line, scan_lines, flush_clean_busy = 0, flush_dirty_busy = 0; 3501 int mpb; /* encode Modified, Parity, Busy for action */ 3502 uchar_t state; 3503 uint64_t ec_tag, paddr, oafsr, tafsr, nafsr; 3504 uint64_t *acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 3505 ec_data_t ec_data[8]; 3506 kstat_named_t *ec_knp; 3507 3508 switch (ec_mirror) { 3509 default: 3510 case ECACHE_CPU_NON_MIRROR: 3511 /* 3512 * The E$ scan rate is expressed in units of tenths of 3513 * a percent. ecache_scan_rate = 1000 (100%) means the 3514 * whole cache is scanned every second. 3515 */ 3516 scan_lines = (nlines * ecache_scan_rate) / 3517 (1000 * ecache_calls_a_sec); 3518 if (!(ssmp->ecache_busy)) { 3519 if (ecache_idle_factor > 0) { 3520 scan_lines *= ecache_idle_factor; 3521 } 3522 } else { 3523 flush_clean_busy = (scan_lines * 3524 ecache_flush_clean_good_busy) / 100; 3525 flush_dirty_busy = (scan_lines * 3526 ecache_flush_dirty_good_busy) / 100; 3527 } 3528 3529 ec_timeout_calls = (ecache_calls_a_sec ? 3530 ecache_calls_a_sec : 1); 3531 break; 3532 3533 case ECACHE_CPU_MIRROR: 3534 scan_lines = ecache_lines_per_call_mirrored; 3535 ec_timeout_calls = (ecache_calls_a_sec_mirrored ? 3536 ecache_calls_a_sec_mirrored : 1); 3537 break; 3538 } 3539 3540 /* 3541 * The ecache scrubber algorithm operates by reading and 3542 * decoding the E$ tag to determine whether the corresponding E$ line 3543 * can be scrubbed. There is a implicit assumption in the scrubber 3544 * logic that the E$ tag is valid. Unfortunately, this assertion is 3545 * flawed since the E$ tag may also be corrupted and have parity errors 3546 * The scrubber logic is enhanced to check the validity of the E$ tag 3547 * before scrubbing. When a parity error is detected in the E$ tag, 3548 * it is possible to recover and scrub the tag under certain conditions 3549 * so that a ETP error condition can be avoided. 3550 */ 3551 3552 for (mpb = line = 0; line < scan_lines; line++, mpb = 0) { 3553 /* 3554 * We get the old-AFSR before clearing the AFSR sticky bits 3555 * in {get_ecache_tag, check_ecache_line, get_ecache_dtag} 3556 * If CP bit is set in the old-AFSR, we log an Orphan CP event. 3557 */ 3558 ec_tag = get_ecache_tag(index, &nafsr, acc_afsr); 3559 state = (uchar_t)((ec_tag & cpu_ec_state_mask) >> 3560 cpu_ec_state_shift); 3561 3562 /* 3563 * ETP is set try to scrub the ecache tag. 3564 */ 3565 if (nafsr & P_AFSR_ETP) { 3566 ecache_scrub_tag_err(nafsr, state, index); 3567 } else if (state & cpu_ec_state_valid) { 3568 /* 3569 * ETP is not set, E$ tag is valid. 3570 * Proceed with the E$ scrubbing. 3571 */ 3572 if (state & cpu_ec_state_dirty) 3573 mpb |= ECACHE_STATE_MODIFIED; 3574 3575 tafsr = check_ecache_line(index, acc_afsr); 3576 3577 if (tafsr & P_AFSR_EDP) { 3578 mpb |= ECACHE_STATE_PARITY; 3579 3580 if (ecache_scrub_verbose || 3581 ecache_scrub_panic) { 3582 get_ecache_dtag(P2ALIGN(index, 64), 3583 (uint64_t *)&ec_data[0], 3584 &ec_tag, &oafsr, acc_afsr); 3585 } 3586 } 3587 3588 if (ssmp->ecache_busy) 3589 mpb |= ECACHE_STATE_BUSY; 3590 3591 ec_knp = (kstat_named_t *)ec_ksp + mpb; 3592 ec_knp->value.ul++; 3593 3594 paddr = ((ec_tag & cpu_ec_tag_mask) << 3595 cpu_ec_tag_shift) | (index % ec_set_size); 3596 3597 /* 3598 * We flush the E$ lines depending on the ec_flush, 3599 * we additionally flush clean_good_busy and 3600 * dirty_good_busy lines for mirrored E$. 3601 */ 3602 if (ec_action[mpb].ec_flush == ALWAYS_FLUSH) { 3603 flushecacheline(paddr, ec_size); 3604 } else if ((ec_mirror == ECACHE_CPU_MIRROR) && 3605 (ec_action[mpb].ec_flush == MIRROR_FLUSH)) { 3606 flushecacheline(paddr, ec_size); 3607 } else if (ec_action[mpb].ec_flush == NEVER_FLUSH) { 3608 softcall(ecache_page_retire, (void *)paddr); 3609 } 3610 3611 /* 3612 * Conditionally flush both the clean_good and 3613 * dirty_good lines when busy. 3614 */ 3615 if (CGB(mpb, ec_mirror) && (flush_clean_busy > 0)) { 3616 flush_clean_busy--; 3617 flushecacheline(paddr, ec_size); 3618 ec_ksp->clean_good_busy_flush.value.ul++; 3619 } else if (DGB(mpb, ec_mirror) && 3620 (flush_dirty_busy > 0)) { 3621 flush_dirty_busy--; 3622 flushecacheline(paddr, ec_size); 3623 ec_ksp->dirty_good_busy_flush.value.ul++; 3624 } 3625 3626 if (ec_action[mpb].ec_log && (ecache_scrub_verbose || 3627 ecache_scrub_panic)) { 3628 ecache_scrub_log(ec_data, ec_tag, paddr, mpb, 3629 tafsr); 3630 } 3631 3632 } else { 3633 ec_ksp->invalid_lines.value.ul++; 3634 } 3635 3636 if ((index += ec_linesize) >= ec_size) 3637 index = 0; 3638 3639 } 3640 3641 /* 3642 * set the ecache scrub index for the next time around 3643 */ 3644 ssmp->ecache_flush_index = index; 3645 3646 if (*acc_afsr & P_AFSR_CP) { 3647 uint64_t ret_afsr; 3648 3649 ret_afsr = ecache_scrub_misc_err(CPU_ORPHAN_CP_ERR, *acc_afsr); 3650 if ((ret_afsr & P_AFSR_CP) == 0) 3651 *acc_afsr = 0; 3652 } 3653 } 3654 3655 /* 3656 * Handler for ecache_scrub_inum softint. Call scrub_ecache_line until 3657 * we decrement the outstanding request count to zero. 3658 */ 3659 3660 /*ARGSUSED*/ 3661 uint_t 3662 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2) 3663 { 3664 int i; 3665 int outstanding; 3666 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc); 3667 uint32_t *countp = &ssmp->ec_scrub_outstanding; 3668 3669 do { 3670 outstanding = *countp; 3671 ASSERT(outstanding > 0); 3672 for (i = 0; i < outstanding; i++) 3673 scrub_ecache_line(); 3674 } while (atomic_add_32_nv(countp, -outstanding)); 3675 3676 return (DDI_INTR_CLAIMED); 3677 } 3678 3679 /* 3680 * force each cpu to perform an ecache scrub, called from a timeout 3681 */ 3682 extern xcfunc_t ecache_scrubreq_tl1; 3683 3684 void 3685 do_scrub_ecache_line(void) 3686 { 3687 long delta; 3688 3689 if (ecache_calls_a_sec > hz) 3690 ecache_calls_a_sec = hz; 3691 else if (ecache_calls_a_sec <= 0) 3692 ecache_calls_a_sec = 1; 3693 3694 if (ecache_calls_a_sec_mirrored > hz) 3695 ecache_calls_a_sec_mirrored = hz; 3696 else if (ecache_calls_a_sec_mirrored <= 0) 3697 ecache_calls_a_sec_mirrored = 1; 3698 3699 if (ecache_scrub_enable) { 3700 xt_all(ecache_scrubreq_tl1, ecache_scrub_inum, 0); 3701 delta = hz / ec_timeout_calls; 3702 } else { 3703 delta = hz; 3704 } 3705 3706 (void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0, 3707 delta); 3708 } 3709 3710 /* 3711 * initialization for ecache scrubbing 3712 * This routine is called AFTER all cpus have had cpu_init_private called 3713 * to initialize their private data areas. 3714 */ 3715 void 3716 cpu_init_cache_scrub(void) 3717 { 3718 if (ecache_calls_a_sec > hz) { 3719 cmn_err(CE_NOTE, "ecache_calls_a_sec set too high (%d); " 3720 "resetting to hz (%d)", ecache_calls_a_sec, hz); 3721 ecache_calls_a_sec = hz; 3722 } 3723 3724 /* 3725 * Register softint for ecache scrubbing. 3726 */ 3727 ecache_scrub_inum = add_softintr(ecache_scrub_pil, 3728 scrub_ecache_line_intr, NULL); 3729 3730 /* 3731 * kick off the scrubbing using realtime timeout 3732 */ 3733 (void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0, 3734 hz / ecache_calls_a_sec); 3735 } 3736 3737 /* 3738 * Unset the busy flag for this cpu. 3739 */ 3740 void 3741 cpu_idle_ecache_scrub(struct cpu *cp) 3742 { 3743 if (CPU_PRIVATE(cp) != NULL) { 3744 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, 3745 sfpr_scrub_misc); 3746 ssmp->ecache_busy = ECACHE_CPU_IDLE; 3747 } 3748 } 3749 3750 /* 3751 * Set the busy flag for this cpu. 3752 */ 3753 void 3754 cpu_busy_ecache_scrub(struct cpu *cp) 3755 { 3756 if (CPU_PRIVATE(cp) != NULL) { 3757 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, 3758 sfpr_scrub_misc); 3759 ssmp->ecache_busy = ECACHE_CPU_BUSY; 3760 } 3761 } 3762 3763 /* 3764 * initialize the ecache scrubber data structures 3765 * The global entry point cpu_init_private replaces this entry point. 3766 * 3767 */ 3768 static void 3769 cpu_init_ecache_scrub_dr(struct cpu *cp) 3770 { 3771 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc); 3772 int cpuid = cp->cpu_id; 3773 3774 /* 3775 * intialize bookkeeping for cache scrubbing 3776 */ 3777 bzero(ssmp, sizeof (spitfire_scrub_misc_t)); 3778 3779 ssmp->ecache_flush_index = 0; 3780 3781 ssmp->ecache_nlines = 3782 cpunodes[cpuid].ecache_size / cpunodes[cpuid].ecache_linesize; 3783 3784 /* 3785 * Determine whether we are running on mirrored SRAM 3786 */ 3787 3788 if (cpunodes[cpuid].msram == ECACHE_CPU_MIRROR) 3789 ssmp->ecache_mirror = ECACHE_CPU_MIRROR; 3790 else 3791 ssmp->ecache_mirror = ECACHE_CPU_NON_MIRROR; 3792 3793 cpu_busy_ecache_scrub(cp); 3794 3795 /* 3796 * initialize the kstats 3797 */ 3798 ecache_kstat_init(cp); 3799 } 3800 3801 /* 3802 * uninitialize the ecache scrubber data structures 3803 * The global entry point cpu_uninit_private replaces this entry point. 3804 */ 3805 static void 3806 cpu_uninit_ecache_scrub_dr(struct cpu *cp) 3807 { 3808 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc); 3809 3810 if (ssmp->ecache_ksp != NULL) { 3811 kstat_delete(ssmp->ecache_ksp); 3812 ssmp->ecache_ksp = NULL; 3813 } 3814 3815 /* 3816 * un-initialize bookkeeping for cache scrubbing 3817 */ 3818 bzero(ssmp, sizeof (spitfire_scrub_misc_t)); 3819 3820 cpu_idle_ecache_scrub(cp); 3821 } 3822 3823 struct kmem_cache *sf_private_cache; 3824 3825 /* 3826 * Cpu private initialization. This includes allocating the cpu_private 3827 * data structure, initializing it, and initializing the scrubber for this 3828 * cpu. This is called once for EVERY cpu, including CPU 0. This function 3829 * calls cpu_init_ecache_scrub_dr to init the scrubber. 3830 * We use kmem_cache_create for the spitfire private data structure because it 3831 * needs to be allocated on a S_ECACHE_MAX_LSIZE (64) byte boundary. 3832 */ 3833 void 3834 cpu_init_private(struct cpu *cp) 3835 { 3836 spitfire_private_t *sfprp; 3837 3838 ASSERT(CPU_PRIVATE(cp) == NULL); 3839 3840 /* 3841 * If the sf_private_cache has not been created, create it. 3842 */ 3843 if (sf_private_cache == NULL) { 3844 sf_private_cache = kmem_cache_create("sf_private_cache", 3845 sizeof (spitfire_private_t), S_ECACHE_MAX_LSIZE, NULL, 3846 NULL, NULL, NULL, NULL, 0); 3847 ASSERT(sf_private_cache); 3848 } 3849 3850 sfprp = CPU_PRIVATE(cp) = kmem_cache_alloc(sf_private_cache, KM_SLEEP); 3851 3852 bzero(sfprp, sizeof (spitfire_private_t)); 3853 3854 cpu_init_ecache_scrub_dr(cp); 3855 } 3856 3857 /* 3858 * Cpu private unitialization. Uninitialize the Ecache scrubber and 3859 * deallocate the scrubber data structures and cpu_private data structure. 3860 * For now, this function just calls cpu_unint_ecache_scrub_dr to uninit 3861 * the scrubber for the specified cpu. 3862 */ 3863 void 3864 cpu_uninit_private(struct cpu *cp) 3865 { 3866 ASSERT(CPU_PRIVATE(cp)); 3867 3868 cpu_uninit_ecache_scrub_dr(cp); 3869 kmem_cache_free(sf_private_cache, CPU_PRIVATE(cp)); 3870 CPU_PRIVATE(cp) = NULL; 3871 } 3872 3873 /* 3874 * initialize the ecache kstats for each cpu 3875 */ 3876 static void 3877 ecache_kstat_init(struct cpu *cp) 3878 { 3879 struct kstat *ksp; 3880 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc); 3881 3882 ASSERT(ssmp != NULL); 3883 3884 if ((ksp = kstat_create("unix", cp->cpu_id, "ecache_kstat", "misc", 3885 KSTAT_TYPE_NAMED, 3886 sizeof (ecache_kstat_t) / sizeof (kstat_named_t), 3887 KSTAT_FLAG_WRITABLE)) == NULL) { 3888 ssmp->ecache_ksp = NULL; 3889 cmn_err(CE_NOTE, "!ecache_kstat_init(%d) failed\n", cp->cpu_id); 3890 return; 3891 } 3892 3893 ssmp->ecache_ksp = ksp; 3894 bcopy(&ec_kstat_template, ksp->ks_data, sizeof (ecache_kstat_t)); 3895 kstat_install(ksp); 3896 } 3897 3898 /* 3899 * log the bad ecache information 3900 */ 3901 static void 3902 ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag, uint64_t paddr, int mpb, 3903 uint64_t afsr) 3904 { 3905 spitf_async_flt spf_flt; 3906 struct async_flt *aflt; 3907 int i; 3908 char *class; 3909 3910 bzero(&spf_flt, sizeof (spitf_async_flt)); 3911 aflt = &spf_flt.cmn_asyncflt; 3912 3913 for (i = 0; i < 8; i++) { 3914 spf_flt.flt_ec_data[i] = ec_data[i]; 3915 } 3916 3917 spf_flt.flt_ec_tag = ec_tag; 3918 3919 if (mpb < (sizeof (ec_action) / sizeof (ec_action[0]))) { 3920 spf_flt.flt_type = ec_action[mpb].ec_log_type; 3921 } else spf_flt.flt_type = (ushort_t)mpb; 3922 3923 aflt->flt_inst = CPU->cpu_id; 3924 aflt->flt_class = CPU_FAULT; 3925 aflt->flt_id = gethrtime_waitfree(); 3926 aflt->flt_addr = paddr; 3927 aflt->flt_stat = afsr; 3928 aflt->flt_panic = (uchar_t)ecache_scrub_panic; 3929 3930 switch (mpb) { 3931 case CPU_ECACHE_TAG_ERR: 3932 case CPU_ECACHE_ADDR_PAR_ERR: 3933 case CPU_ECACHE_ETP_ETS_ERR: 3934 case CPU_ECACHE_STATE_ERR: 3935 class = FM_EREPORT_CPU_USII_ESCRUB_TAG; 3936 break; 3937 default: 3938 class = FM_EREPORT_CPU_USII_ESCRUB_DATA; 3939 break; 3940 } 3941 3942 cpu_errorq_dispatch(class, (void *)&spf_flt, sizeof (spf_flt), 3943 ue_queue, aflt->flt_panic); 3944 3945 if (aflt->flt_panic) 3946 cmn_err(CE_PANIC, "ecache_scrub_panic set and bad E$" 3947 "line detected"); 3948 } 3949 3950 /* 3951 * Process an ecache error that occured during the E$ scrubbing. 3952 * We do the ecache scan to find the bad line, flush the bad line 3953 * and start the memscrubber to find any UE (in memory or in another cache) 3954 */ 3955 static uint64_t 3956 ecache_scrub_misc_err(int type, uint64_t afsr) 3957 { 3958 spitf_async_flt spf_flt; 3959 struct async_flt *aflt; 3960 uint64_t oafsr; 3961 3962 bzero(&spf_flt, sizeof (spitf_async_flt)); 3963 aflt = &spf_flt.cmn_asyncflt; 3964 3965 /* 3966 * Scan each line in the cache to look for the one 3967 * with bad parity 3968 */ 3969 aflt->flt_addr = AFLT_INV_ADDR; 3970 scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0], 3971 &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr); 3972 3973 if (oafsr & P_AFSR_CP) { 3974 uint64_t *cp_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 3975 *cp_afsr |= oafsr; 3976 } 3977 3978 /* 3979 * If we found a bad PA, update the state to indicate if it is 3980 * memory or I/O space. 3981 */ 3982 if (aflt->flt_addr != AFLT_INV_ADDR) { 3983 aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >> 3984 MMU_PAGESHIFT)) ? 1 : 0; 3985 } 3986 3987 spf_flt.flt_type = (ushort_t)type; 3988 3989 aflt->flt_inst = CPU->cpu_id; 3990 aflt->flt_class = CPU_FAULT; 3991 aflt->flt_id = gethrtime_waitfree(); 3992 aflt->flt_status = afsr; 3993 aflt->flt_panic = (uchar_t)ecache_scrub_panic; 3994 3995 /* 3996 * We have the bad line, flush that line and start 3997 * the memscrubber. 3998 */ 3999 if (spf_flt.flt_ec_lcnt > 0) { 4000 flushecacheline(P2ALIGN(aflt->flt_addr, 64), 4001 cpunodes[CPU->cpu_id].ecache_size); 4002 read_all_memscrub = 1; 4003 memscrub_run(); 4004 } 4005 4006 cpu_errorq_dispatch((type == CPU_ORPHAN_CP_ERR) ? 4007 FM_EREPORT_CPU_USII_CP : FM_EREPORT_CPU_USII_UNKNOWN, 4008 (void *)&spf_flt, sizeof (spf_flt), ue_queue, aflt->flt_panic); 4009 4010 return (oafsr); 4011 } 4012 4013 static void 4014 ecache_scrub_tag_err(uint64_t afsr, uchar_t state, uint32_t index) 4015 { 4016 ushort_t afsr_ets = (afsr & P_AFSR_ETS) >> P_AFSR_ETS_SHIFT; 4017 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc); 4018 ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data; 4019 uint64_t ec_tag, paddr, oafsr; 4020 ec_data_t ec_data[8]; 4021 int cpuid = CPU->cpu_id; 4022 uint32_t ec_set_size = cpunodes[cpuid].ecache_size / 4023 ecache_associativity; 4024 uint64_t *cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 4025 4026 get_ecache_dtag(P2ALIGN(index, 64), (uint64_t *)&ec_data[0], &ec_tag, 4027 &oafsr, cpu_afsr); 4028 paddr = ((ec_tag & cpu_ec_tag_mask) << cpu_ec_tag_shift) | 4029 (index % ec_set_size); 4030 4031 /* 4032 * E$ tag state has good parity 4033 */ 4034 if ((afsr_ets & cpu_ec_state_parity) == 0) { 4035 if (afsr_ets & cpu_ec_parity) { 4036 /* 4037 * E$ tag state bits indicate the line is clean, 4038 * invalidate the E$ tag and continue. 4039 */ 4040 if (!(state & cpu_ec_state_dirty)) { 4041 /* 4042 * Zero the tag and mark the state invalid 4043 * with good parity for the tag. 4044 */ 4045 if (isus2i || isus2e) 4046 write_hb_ec_tag_parity(index); 4047 else 4048 write_ec_tag_parity(index); 4049 4050 /* Sync with the dual tag */ 4051 flushecacheline(0, 4052 cpunodes[CPU->cpu_id].ecache_size); 4053 ec_ksp->tags_cleared.value.ul++; 4054 ecache_scrub_log(ec_data, ec_tag, paddr, 4055 CPU_ECACHE_TAG_ERR, afsr); 4056 return; 4057 } else { 4058 ecache_scrub_log(ec_data, ec_tag, paddr, 4059 CPU_ECACHE_ADDR_PAR_ERR, afsr); 4060 cmn_err(CE_PANIC, " E$ tag address has bad" 4061 " parity"); 4062 } 4063 } else if ((afsr_ets & cpu_ec_parity) == 0) { 4064 /* 4065 * ETS is zero but ETP is set 4066 */ 4067 ecache_scrub_log(ec_data, ec_tag, paddr, 4068 CPU_ECACHE_ETP_ETS_ERR, afsr); 4069 cmn_err(CE_PANIC, "AFSR.ETP is set and" 4070 " AFSR.ETS is zero"); 4071 } 4072 } else { 4073 /* 4074 * E$ tag state bit has a bad parity 4075 */ 4076 ecache_scrub_log(ec_data, ec_tag, paddr, 4077 CPU_ECACHE_STATE_ERR, afsr); 4078 cmn_err(CE_PANIC, "E$ tag state has bad parity"); 4079 } 4080 } 4081 4082 static void 4083 ecache_page_retire(void *arg) 4084 { 4085 uint64_t paddr = (uint64_t)arg; 4086 (void) page_retire(paddr, PR_UE); 4087 } 4088 4089 void 4090 sticksync_slave(void) 4091 {} 4092 4093 void 4094 sticksync_master(void) 4095 {} 4096 4097 /*ARGSUSED*/ 4098 void 4099 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t bpp) 4100 {} 4101 4102 void 4103 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected) 4104 { 4105 int status; 4106 ddi_fm_error_t de; 4107 4108 bzero(&de, sizeof (ddi_fm_error_t)); 4109 4110 de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, 4111 FM_ENA_FMT1); 4112 de.fme_flag = expected; 4113 de.fme_bus_specific = (void *)aflt->flt_addr; 4114 status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de); 4115 4116 if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL)) 4117 aflt->flt_panic = 1; 4118 } 4119 4120 /*ARGSUSED*/ 4121 void 4122 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz, 4123 errorq_t *eqp, uint_t flag) 4124 { 4125 struct async_flt *aflt = (struct async_flt *)payload; 4126 4127 aflt->flt_erpt_class = error_class; 4128 errorq_dispatch(eqp, payload, payload_sz, flag); 4129 } 4130 4131 #define MAX_SIMM 8 4132 4133 struct ce_info { 4134 char name[UNUM_NAMLEN]; 4135 uint64_t intermittent_total; 4136 uint64_t persistent_total; 4137 uint64_t sticky_total; 4138 unsigned short leaky_bucket_cnt; 4139 }; 4140 4141 /* 4142 * Separately-defined structure for use in reporting the ce_info 4143 * to SunVTS without exposing the internal layout and implementation 4144 * of struct ce_info. 4145 */ 4146 static struct ecc_error_info ecc_error_info_data = { 4147 { "version", KSTAT_DATA_UINT32 }, 4148 { "maxcount", KSTAT_DATA_UINT32 }, 4149 { "count", KSTAT_DATA_UINT32 } 4150 }; 4151 static const size_t ecc_error_info_ndata = sizeof (ecc_error_info_data) / 4152 sizeof (struct kstat_named); 4153 4154 #if KSTAT_CE_UNUM_NAMLEN < UNUM_NAMLEN 4155 #error "Need to rev ecc_error_info version and update KSTAT_CE_UNUM_NAMLEN" 4156 #endif 4157 4158 struct ce_info *mem_ce_simm = NULL; 4159 size_t mem_ce_simm_size = 0; 4160 4161 /* 4162 * Default values for the number of CE's allowed per interval. 4163 * Interval is defined in minutes 4164 * SOFTERR_MIN_TIMEOUT is defined in microseconds 4165 */ 4166 #define SOFTERR_LIMIT_DEFAULT 2 4167 #define SOFTERR_INTERVAL_DEFAULT 1440 /* This is 24 hours */ 4168 #define SOFTERR_MIN_TIMEOUT (60 * MICROSEC) /* This is 1 minute */ 4169 #define TIMEOUT_NONE ((timeout_id_t)0) 4170 #define TIMEOUT_SET ((timeout_id_t)1) 4171 4172 /* 4173 * timeout identifer for leaky_bucket 4174 */ 4175 static timeout_id_t leaky_bucket_timeout_id = TIMEOUT_NONE; 4176 4177 /* 4178 * Tunables for maximum number of allowed CE's in a given time 4179 */ 4180 int ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT; 4181 int ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT; 4182 4183 void 4184 cpu_mp_init(void) 4185 { 4186 size_t size = cpu_aflt_size(); 4187 size_t i; 4188 kstat_t *ksp; 4189 4190 /* 4191 * Initialize the CE error handling buffers. 4192 */ 4193 mem_ce_simm_size = MAX_SIMM * max_ncpus; 4194 size = sizeof (struct ce_info) * mem_ce_simm_size; 4195 mem_ce_simm = kmem_zalloc(size, KM_SLEEP); 4196 4197 ksp = kstat_create("unix", 0, "ecc-info", "misc", 4198 KSTAT_TYPE_NAMED, ecc_error_info_ndata, KSTAT_FLAG_VIRTUAL); 4199 if (ksp != NULL) { 4200 ksp->ks_data = (struct kstat_named *)&ecc_error_info_data; 4201 ecc_error_info_data.version.value.ui32 = KSTAT_CE_INFO_VER; 4202 ecc_error_info_data.maxcount.value.ui32 = mem_ce_simm_size; 4203 ecc_error_info_data.count.value.ui32 = 0; 4204 kstat_install(ksp); 4205 } 4206 4207 for (i = 0; i < mem_ce_simm_size; i++) { 4208 struct kstat_ecc_mm_info *kceip; 4209 4210 kceip = kmem_zalloc(sizeof (struct kstat_ecc_mm_info), 4211 KM_SLEEP); 4212 ksp = kstat_create("mm", i, "ecc-info", "misc", 4213 KSTAT_TYPE_NAMED, 4214 sizeof (struct kstat_ecc_mm_info) / sizeof (kstat_named_t), 4215 KSTAT_FLAG_VIRTUAL); 4216 if (ksp != NULL) { 4217 /* 4218 * Re-declare ks_data_size to include room for the 4219 * UNUM name since we don't have KSTAT_FLAG_VAR_SIZE 4220 * set. 4221 */ 4222 ksp->ks_data_size = sizeof (struct kstat_ecc_mm_info) + 4223 KSTAT_CE_UNUM_NAMLEN; 4224 ksp->ks_data = kceip; 4225 kstat_named_init(&kceip->name, 4226 "name", KSTAT_DATA_STRING); 4227 kstat_named_init(&kceip->intermittent_total, 4228 "intermittent_total", KSTAT_DATA_UINT64); 4229 kstat_named_init(&kceip->persistent_total, 4230 "persistent_total", KSTAT_DATA_UINT64); 4231 kstat_named_init(&kceip->sticky_total, 4232 "sticky_total", KSTAT_DATA_UINT64); 4233 /* 4234 * Use the default snapshot routine as it knows how to 4235 * deal with named kstats with long strings. 4236 */ 4237 ksp->ks_update = ecc_kstat_update; 4238 kstat_install(ksp); 4239 } else { 4240 kmem_free(kceip, sizeof (struct kstat_ecc_mm_info)); 4241 } 4242 } 4243 } 4244 4245 /*ARGSUSED*/ 4246 static void 4247 leaky_bucket_timeout(void *arg) 4248 { 4249 int i; 4250 struct ce_info *psimm = mem_ce_simm; 4251 4252 for (i = 0; i < mem_ce_simm_size; i++) { 4253 if (psimm[i].leaky_bucket_cnt > 0) 4254 atomic_add_16(&psimm[i].leaky_bucket_cnt, -1); 4255 } 4256 add_leaky_bucket_timeout(); 4257 } 4258 4259 static void 4260 add_leaky_bucket_timeout(void) 4261 { 4262 long timeout_in_microsecs; 4263 4264 /* 4265 * create timeout for next leak. 4266 * 4267 * The timeout interval is calculated as follows 4268 * 4269 * (ecc_softerr_interval * 60 * MICROSEC) / ecc_softerr_limit 4270 * 4271 * ecc_softerr_interval is in minutes, so multiply this by 60 (seconds 4272 * in a minute), then multiply this by MICROSEC to get the interval 4273 * in microseconds. Divide this total by ecc_softerr_limit so that 4274 * the timeout interval is accurate to within a few microseconds. 4275 */ 4276 4277 if (ecc_softerr_limit <= 0) 4278 ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT; 4279 if (ecc_softerr_interval <= 0) 4280 ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT; 4281 4282 timeout_in_microsecs = ((int64_t)ecc_softerr_interval * 60 * MICROSEC) / 4283 ecc_softerr_limit; 4284 4285 if (timeout_in_microsecs < SOFTERR_MIN_TIMEOUT) 4286 timeout_in_microsecs = SOFTERR_MIN_TIMEOUT; 4287 4288 leaky_bucket_timeout_id = timeout(leaky_bucket_timeout, 4289 (void *)NULL, drv_usectohz((clock_t)timeout_in_microsecs)); 4290 } 4291 4292 /* 4293 * Legacy Correctable ECC Error Hash 4294 * 4295 * All of the code below this comment is used to implement a legacy array 4296 * which counted intermittent, persistent, and sticky CE errors by unum, 4297 * and then was later extended to publish the data as a kstat for SunVTS. 4298 * All of this code is replaced by FMA, and remains here until such time 4299 * that the UltraSPARC-I/II CPU code is converted to FMA, or is EOLed. 4300 * 4301 * Errors are saved in three buckets per-unum: 4302 * (1) sticky - scrub was unsuccessful, cannot be scrubbed 4303 * This could represent a problem, and is immediately printed out. 4304 * (2) persistent - was successfully scrubbed 4305 * These errors use the leaky bucket algorithm to determine 4306 * if there is a serious problem. 4307 * (3) intermittent - may have originated from the cpu or upa/safari bus, 4308 * and does not necessarily indicate any problem with the dimm itself, 4309 * is critical information for debugging new hardware. 4310 * Because we do not know if it came from the dimm, it would be 4311 * inappropriate to include these in the leaky bucket counts. 4312 * 4313 * If the E$ line was modified before the scrub operation began, then the 4314 * displacement flush at the beginning of scrubphys() will cause the modified 4315 * line to be written out, which will clean up the CE. Then, any subsequent 4316 * read will not cause an error, which will cause persistent errors to be 4317 * identified as intermittent. 4318 * 4319 * If a DIMM is going bad, it will produce true persistents as well as 4320 * false intermittents, so these intermittents can be safely ignored. 4321 * 4322 * If the error count is excessive for a DIMM, this function will return 4323 * PR_MCE, and the CPU module may then decide to remove that page from use. 4324 */ 4325 static int 4326 ce_count_unum(int status, int len, char *unum) 4327 { 4328 int i; 4329 struct ce_info *psimm = mem_ce_simm; 4330 int page_status = PR_OK; 4331 4332 ASSERT(psimm != NULL); 4333 4334 if (len <= 0 || 4335 (status & (ECC_STICKY | ECC_PERSISTENT | ECC_INTERMITTENT)) == 0) 4336 return (page_status); 4337 4338 /* 4339 * Initialize the leaky_bucket timeout 4340 */ 4341 if (casptr(&leaky_bucket_timeout_id, 4342 TIMEOUT_NONE, TIMEOUT_SET) == TIMEOUT_NONE) 4343 add_leaky_bucket_timeout(); 4344 4345 for (i = 0; i < mem_ce_simm_size; i++) { 4346 if (psimm[i].name[0] == '\0') { 4347 /* 4348 * Hit the end of the valid entries, add 4349 * a new one. 4350 */ 4351 (void) strncpy(psimm[i].name, unum, len); 4352 if (status & ECC_STICKY) { 4353 /* 4354 * Sticky - the leaky bucket is used to track 4355 * soft errors. Since a sticky error is a 4356 * hard error and likely to be retired soon, 4357 * we do not count it in the leaky bucket. 4358 */ 4359 psimm[i].leaky_bucket_cnt = 0; 4360 psimm[i].intermittent_total = 0; 4361 psimm[i].persistent_total = 0; 4362 psimm[i].sticky_total = 1; 4363 cmn_err(CE_WARN, 4364 "[AFT0] Sticky Softerror encountered " 4365 "on Memory Module %s\n", unum); 4366 page_status = PR_MCE; 4367 } else if (status & ECC_PERSISTENT) { 4368 psimm[i].leaky_bucket_cnt = 1; 4369 psimm[i].intermittent_total = 0; 4370 psimm[i].persistent_total = 1; 4371 psimm[i].sticky_total = 0; 4372 } else { 4373 /* 4374 * Intermittent - Because the scrub operation 4375 * cannot find the error in the DIMM, we will 4376 * not count these in the leaky bucket 4377 */ 4378 psimm[i].leaky_bucket_cnt = 0; 4379 psimm[i].intermittent_total = 1; 4380 psimm[i].persistent_total = 0; 4381 psimm[i].sticky_total = 0; 4382 } 4383 ecc_error_info_data.count.value.ui32++; 4384 break; 4385 } else if (strncmp(unum, psimm[i].name, len) == 0) { 4386 /* 4387 * Found an existing entry for the current 4388 * memory module, adjust the counts. 4389 */ 4390 if (status & ECC_STICKY) { 4391 psimm[i].sticky_total++; 4392 cmn_err(CE_WARN, 4393 "[AFT0] Sticky Softerror encountered " 4394 "on Memory Module %s\n", unum); 4395 page_status = PR_MCE; 4396 } else if (status & ECC_PERSISTENT) { 4397 int new_value; 4398 4399 new_value = atomic_add_16_nv( 4400 &psimm[i].leaky_bucket_cnt, 1); 4401 psimm[i].persistent_total++; 4402 if (new_value > ecc_softerr_limit) { 4403 cmn_err(CE_WARN, "[AFT0] Most recent %d" 4404 " soft errors from Memory Module" 4405 " %s exceed threshold (N=%d," 4406 " T=%dh:%02dm) triggering page" 4407 " retire", new_value, unum, 4408 ecc_softerr_limit, 4409 ecc_softerr_interval / 60, 4410 ecc_softerr_interval % 60); 4411 atomic_add_16( 4412 &psimm[i].leaky_bucket_cnt, -1); 4413 page_status = PR_MCE; 4414 } 4415 } else { /* Intermittent */ 4416 psimm[i].intermittent_total++; 4417 } 4418 break; 4419 } 4420 } 4421 4422 if (i >= mem_ce_simm_size) 4423 cmn_err(CE_CONT, "[AFT0] Softerror: mem_ce_simm[] out of " 4424 "space.\n"); 4425 4426 return (page_status); 4427 } 4428 4429 /* 4430 * Function to support counting of IO detected CEs. 4431 */ 4432 void 4433 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum) 4434 { 4435 int err; 4436 4437 err = ce_count_unum(ecc->flt_status, len, unum); 4438 if (err != PR_OK && automatic_page_removal) { 4439 (void) page_retire(ecc->flt_addr, err); 4440 } 4441 } 4442 4443 static int 4444 ecc_kstat_update(kstat_t *ksp, int rw) 4445 { 4446 struct kstat_ecc_mm_info *kceip = ksp->ks_data; 4447 struct ce_info *ceip = mem_ce_simm; 4448 int i = ksp->ks_instance; 4449 4450 if (rw == KSTAT_WRITE) 4451 return (EACCES); 4452 4453 ASSERT(ksp->ks_data != NULL); 4454 ASSERT(i < mem_ce_simm_size && i >= 0); 4455 4456 /* 4457 * Since we're not using locks, make sure that we don't get partial 4458 * data. The name is always copied before the counters are incremented 4459 * so only do this update routine if at least one of the counters is 4460 * non-zero, which ensures that ce_count_unum() is done, and the 4461 * string is fully copied. 4462 */ 4463 if (ceip[i].intermittent_total == 0 && 4464 ceip[i].persistent_total == 0 && 4465 ceip[i].sticky_total == 0) { 4466 /* 4467 * Uninitialized or partially initialized. Ignore. 4468 * The ks_data buffer was allocated via kmem_zalloc, 4469 * so no need to bzero it. 4470 */ 4471 return (0); 4472 } 4473 4474 kstat_named_setstr(&kceip->name, ceip[i].name); 4475 kceip->intermittent_total.value.ui64 = ceip[i].intermittent_total; 4476 kceip->persistent_total.value.ui64 = ceip[i].persistent_total; 4477 kceip->sticky_total.value.ui64 = ceip[i].sticky_total; 4478 4479 return (0); 4480 } 4481 4482 #define VIS_BLOCKSIZE 64 4483 4484 int 4485 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data) 4486 { 4487 int ret, watched; 4488 4489 watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 4490 ret = dtrace_blksuword32(addr, data, 0); 4491 if (watched) 4492 watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 4493 4494 return (ret); 4495 } 4496 4497 /*ARGSUSED*/ 4498 void 4499 cpu_faulted_enter(struct cpu *cp) 4500 { 4501 } 4502 4503 /*ARGSUSED*/ 4504 void 4505 cpu_faulted_exit(struct cpu *cp) 4506 { 4507 } 4508 4509 static int mmu_disable_ism_large_pages = ((1 << TTE512K) | 4510 (1 << TTE32M) | (1 << TTE256M)); 4511 static int mmu_disable_large_pages = ((1 << TTE32M) | (1 << TTE256M)); 4512 4513 /* 4514 * The function returns the US_II mmu-specific values for the 4515 * hat's disable_large_pages and disable_ism_large_pages variables. 4516 */ 4517 int 4518 mmu_large_pages_disabled(uint_t flag) 4519 { 4520 int pages_disable = 0; 4521 4522 if (flag == HAT_LOAD) { 4523 pages_disable = mmu_disable_large_pages; 4524 } else if (flag == HAT_LOAD_SHARE) { 4525 pages_disable = mmu_disable_ism_large_pages; 4526 } 4527 return (pages_disable); 4528 } 4529 4530 /*ARGSUSED*/ 4531 void 4532 mmu_init_kernel_pgsz(struct hat *hat) 4533 { 4534 } 4535 4536 size_t 4537 mmu_get_kernel_lpsize(size_t lpsize) 4538 { 4539 uint_t tte; 4540 4541 if (lpsize == 0) { 4542 /* no setting for segkmem_lpsize in /etc/system: use default */ 4543 return (MMU_PAGESIZE4M); 4544 } 4545 4546 for (tte = TTE8K; tte <= TTE4M; tte++) { 4547 if (lpsize == TTEBYTES(tte)) 4548 return (lpsize); 4549 } 4550 4551 return (TTEBYTES(TTE8K)); 4552 } 4553