1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/systm.h> 30 #include <sys/archsystm.h> 31 #include <sys/machparam.h> 32 #include <sys/machsystm.h> 33 #include <sys/cpu.h> 34 #include <sys/elf_SPARC.h> 35 #include <vm/hat_sfmmu.h> 36 #include <vm/page.h> 37 #include <sys/cpuvar.h> 38 #include <sys/spitregs.h> 39 #include <sys/async.h> 40 #include <sys/cmn_err.h> 41 #include <sys/debug.h> 42 #include <sys/dditypes.h> 43 #include <sys/sunddi.h> 44 #include <sys/cpu_module.h> 45 #include <sys/prom_debug.h> 46 #include <sys/vmsystm.h> 47 #include <sys/prom_plat.h> 48 #include <sys/sysmacros.h> 49 #include <sys/intreg.h> 50 #include <sys/machtrap.h> 51 #include <sys/ontrap.h> 52 #include <sys/ivintr.h> 53 #include <sys/atomic.h> 54 #include <sys/panic.h> 55 #include <sys/ndifm.h> 56 #include <sys/fm/protocol.h> 57 #include <sys/fm/util.h> 58 #include <sys/fm/cpu/UltraSPARC-II.h> 59 #include <sys/ddi.h> 60 #include <sys/ecc_kstat.h> 61 #include <sys/watchpoint.h> 62 #include <sys/dtrace.h> 63 #include <sys/errclassify.h> 64 65 uint_t cpu_impl_dual_pgsz = 0; 66 67 /* 68 * Structure for the 8 byte ecache data dump and the associated AFSR state. 69 * There will be 8 of these structures used to dump an ecache line (64 bytes). 70 */ 71 typedef struct sf_ec_data_elm { 72 uint64_t ec_d8; 73 uint64_t ec_afsr; 74 } ec_data_t; 75 76 /* 77 * Define spitfire (Ultra I/II) specific asynchronous error structure 78 */ 79 typedef struct spitfire_async_flt { 80 struct async_flt cmn_asyncflt; /* common - see sun4u/sys/async.h */ 81 ushort_t flt_type; /* types of faults - cpu specific */ 82 ec_data_t flt_ec_data[8]; /* for E$ or mem dump/state */ 83 uint64_t flt_ec_tag; /* E$ tag info */ 84 int flt_ec_lcnt; /* number of bad E$ lines */ 85 ushort_t flt_sdbh; /* UDBH reg */ 86 ushort_t flt_sdbl; /* UDBL reg */ 87 } spitf_async_flt; 88 89 /* 90 * Prototypes for support routines in spitfire_asm.s: 91 */ 92 extern void flush_ecache(uint64_t physaddr, size_t size, size_t linesize); 93 extern uint64_t get_lsu(void); 94 extern void set_lsu(uint64_t ncc); 95 extern void get_ecache_dtag(uint32_t ecache_idx, uint64_t *data, uint64_t *tag, 96 uint64_t *oafsr, uint64_t *acc_afsr); 97 extern uint64_t check_ecache_line(uint32_t id, uint64_t *acc_afsr); 98 extern uint64_t get_ecache_tag(uint32_t id, uint64_t *nafsr, 99 uint64_t *acc_afsr); 100 extern uint64_t read_and_clear_afsr(); 101 extern void write_ec_tag_parity(uint32_t id); 102 extern void write_hb_ec_tag_parity(uint32_t id); 103 104 /* 105 * Spitfire module routines: 106 */ 107 static void cpu_async_log_err(void *flt); 108 /*PRINTFLIKE6*/ 109 static void cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt, 110 uint_t logflags, const char *endstr, const char *fmt, ...); 111 112 static void cpu_read_paddr(struct async_flt *aflt, short verbose, short ce_err); 113 static void cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum); 114 static void cpu_log_ecmem_info(spitf_async_flt *spf_flt); 115 116 static void log_ce_err(struct async_flt *aflt, char *unum); 117 static void log_ue_err(struct async_flt *aflt, char *unum); 118 static void check_misc_err(spitf_async_flt *spf_flt); 119 static ushort_t ecc_gen(uint_t high_bytes, uint_t low_bytes); 120 static int check_ecc(struct async_flt *aflt); 121 static uint_t get_cpu_status(uint64_t arg); 122 static uint64_t clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr); 123 static void scan_ecache(uint64_t *afar, ec_data_t *data, uint64_t *tag, 124 int *m, uint64_t *afsr); 125 static void ecache_kstat_init(struct cpu *cp); 126 static void ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag, 127 uint64_t paddr, int mpb, uint64_t); 128 static uint64_t ecache_scrub_misc_err(int, uint64_t); 129 static void ecache_scrub_tag_err(uint64_t, uchar_t, uint32_t); 130 static void ecache_page_retire(void *); 131 static int ecc_kstat_update(kstat_t *ksp, int rw); 132 static int ce_count_unum(int status, int len, char *unum); 133 static void add_leaky_bucket_timeout(void); 134 static int synd_to_synd_code(int synd_status, ushort_t synd); 135 136 extern uint_t read_all_memscrub; 137 extern void memscrub_run(void); 138 139 static uchar_t isus2i; /* set if sabre */ 140 static uchar_t isus2e; /* set if hummingbird */ 141 142 /* 143 * Default ecache mask and shift settings for Spitfire. If we detect a 144 * different CPU implementation, we will modify these values at boot time. 145 */ 146 static uint64_t cpu_ec_tag_mask = S_ECTAG_MASK; 147 static uint64_t cpu_ec_state_mask = S_ECSTATE_MASK; 148 static uint64_t cpu_ec_par_mask = S_ECPAR_MASK; 149 static int cpu_ec_par_shift = S_ECPAR_SHIFT; 150 static int cpu_ec_tag_shift = S_ECTAG_SHIFT; 151 static int cpu_ec_state_shift = S_ECSTATE_SHIFT; 152 static uchar_t cpu_ec_state_exl = S_ECSTATE_EXL; 153 static uchar_t cpu_ec_state_mod = S_ECSTATE_MOD; 154 static uchar_t cpu_ec_state_shr = S_ECSTATE_SHR; 155 static uchar_t cpu_ec_state_own = S_ECSTATE_OWN; 156 157 /* 158 * Default ecache state bits for Spitfire. These individual bits indicate if 159 * the given line is in any of the valid or modified states, respectively. 160 * Again, we modify these at boot if we detect a different CPU. 161 */ 162 static uchar_t cpu_ec_state_valid = S_ECSTATE_VALID; 163 static uchar_t cpu_ec_state_dirty = S_ECSTATE_DIRTY; 164 static uchar_t cpu_ec_parity = S_EC_PARITY; 165 static uchar_t cpu_ec_state_parity = S_ECSTATE_PARITY; 166 167 /* 168 * This table is used to determine which bit(s) is(are) bad when an ECC 169 * error occurrs. The array is indexed an 8-bit syndrome. The entries 170 * of this array have the following semantics: 171 * 172 * 00-63 The number of the bad bit, when only one bit is bad. 173 * 64 ECC bit C0 is bad. 174 * 65 ECC bit C1 is bad. 175 * 66 ECC bit C2 is bad. 176 * 67 ECC bit C3 is bad. 177 * 68 ECC bit C4 is bad. 178 * 69 ECC bit C5 is bad. 179 * 70 ECC bit C6 is bad. 180 * 71 ECC bit C7 is bad. 181 * 72 Two bits are bad. 182 * 73 Three bits are bad. 183 * 74 Four bits are bad. 184 * 75 More than Four bits are bad. 185 * 76 NO bits are bad. 186 * Based on "Galaxy Memory Subsystem SPECIFICATION" rev 0.6, pg. 28. 187 */ 188 189 #define C0 64 190 #define C1 65 191 #define C2 66 192 #define C3 67 193 #define C4 68 194 #define C5 69 195 #define C6 70 196 #define C7 71 197 #define M2 72 198 #define M3 73 199 #define M4 74 200 #define MX 75 201 #define NA 76 202 203 #define SYND_IS_SINGLE_BIT_DATA(synd_code) ((synd_code >= 0) && \ 204 (synd_code < C0)) 205 #define SYND_IS_SINGLE_BIT_CHK(synd_code) ((synd_code >= C0) && \ 206 (synd_code <= C7)) 207 208 static char ecc_syndrome_tab[] = 209 { 210 NA, C0, C1, M2, C2, M2, M2, M3, C3, M2, M2, M3, M2, M3, M3, M4, 211 C4, M2, M2, 32, M2, 57, MX, M2, M2, 37, 49, M2, 40, M2, M2, 44, 212 C5, M2, M2, 33, M2, 61, 4, M2, M2, MX, 53, M2, 45, M2, M2, 41, 213 M2, 0, 1, M2, 10, M2, M2, MX, 15, M2, M2, MX, M2, M3, M3, M2, 214 C6, M2, M2, 42, M2, 59, 39, M2, M2, MX, 51, M2, 34, M2, M2, 46, 215 M2, 25, 29, M2, 27, M4, M2, MX, 31, M2, M4, MX, M2, MX, MX, M2, 216 M2, MX, 36, M2, 7, M2, M2, 54, MX, M2, M2, 62, M2, 48, 56, M2, 217 M3, M2, M2, MX, M2, MX, 22, M2, M2, 18, MX, M2, M3, M2, M2, MX, 218 C7, M2, M2, 47, M2, 63, MX, M2, M2, 6, 55, M2, 35, M2, M2, 43, 219 M2, 5, MX, M2, MX, M2, M2, 50, 38, M2, M2, 58, M2, 52, 60, M2, 220 M2, 17, 21, M2, 19, M4, M2, MX, 23, M2, M4, MX, M2, MX, MX, M2, 221 M3, M2, M2, MX, M2, MX, 30, M2, M2, 26, MX, M2, M3, M2, M2, MX, 222 M2, 8, 13, M2, 2, M2, M2, M3, 3, M2, M2, M3, M2, MX, MX, M2, 223 M3, M2, M2, M3, M2, MX, 16, M2, M2, 20, MX, M2, MX, M2, M2, MX, 224 M3, M2, M2, M3, M2, MX, 24, M2, M2, 28, MX, M2, MX, M2, M2, MX, 225 M4, 12, 9, M2, 14, M2, M2, MX, 11, M2, M2, MX, M2, MX, MX, M4 226 }; 227 228 #define SYND_TBL_SIZE 256 229 230 /* 231 * Hack for determining UDBH/UDBL, for later cpu-specific error reporting. 232 * Cannot use bit 3 in afar, because it is a valid bit on a Sabre/Hummingbird. 233 */ 234 #define UDBL_REG 0x8000 235 #define UDBL(synd) ((synd & UDBL_REG) >> 15) 236 #define SYND(synd) (synd & 0x7FFF) 237 238 /* 239 * These error types are specific to Spitfire and are used internally for the 240 * spitfire fault structure flt_type field. 241 */ 242 #define CPU_UE_ERR 0 /* uncorrectable errors - UEs */ 243 #define CPU_EDP_LDP_ERR 1 /* LDP or EDP parity error */ 244 #define CPU_WP_ERR 2 /* WP parity error */ 245 #define CPU_BTO_BERR_ERR 3 /* bus timeout errors */ 246 #define CPU_PANIC_CP_ERR 4 /* cp error from panic polling */ 247 #define CPU_TRAPPING_CP_ERR 5 /* for sabre/hbird only, cp error */ 248 #define CPU_BADLINE_CI_ERR 6 /* E$ clean_bad line when idle */ 249 #define CPU_BADLINE_CB_ERR 7 /* E$ clean_bad line when busy */ 250 #define CPU_BADLINE_DI_ERR 8 /* E$ dirty_bad line when idle */ 251 #define CPU_BADLINE_DB_ERR 9 /* E$ dirty_bad line when busy */ 252 #define CPU_ORPHAN_CP_ERR 10 /* Orphan CP error */ 253 #define CPU_ECACHE_ADDR_PAR_ERR 11 /* Ecache Address parity error */ 254 #define CPU_ECACHE_STATE_ERR 12 /* Ecache state error */ 255 #define CPU_ECACHE_ETP_ETS_ERR 13 /* ETP set but ETS is zero */ 256 #define CPU_ECACHE_TAG_ERR 14 /* Scrub the E$ tag, if state clean */ 257 #define CPU_ADDITIONAL_ERR 15 /* Additional errors occurred */ 258 259 /* 260 * Macro to access the "Spitfire cpu private" data structure. 261 */ 262 #define CPU_PRIVATE_PTR(cp, x) (&(((spitfire_private_t *)CPU_PRIVATE(cp))->x)) 263 264 /* 265 * set to 0 to disable automatic retiring of pages on 266 * DIMMs that have excessive soft errors 267 */ 268 int automatic_page_removal = 1; 269 270 /* 271 * Heuristic for figuring out which module to replace. 272 * Relative likelihood that this P_SYND indicates that this module is bad. 273 * We call it a "score", though, not a relative likelihood. 274 * 275 * Step 1. 276 * Assign a score to each byte of P_SYND according to the following rules: 277 * If no bits on (0x00) or all bits on (0xFF), then give it a 5. 278 * If one bit on, give it a 95. 279 * If seven bits on, give it a 10. 280 * If two bits on: 281 * in different nybbles, a 90 282 * in same nybble, but unaligned, 85 283 * in same nybble and as an aligned pair, 80 284 * If six bits on, look at the bits that are off: 285 * in same nybble and as an aligned pair, 15 286 * in same nybble, but unaligned, 20 287 * in different nybbles, a 25 288 * If three bits on: 289 * in diferent nybbles, no aligned pairs, 75 290 * in diferent nybbles, one aligned pair, 70 291 * in the same nybble, 65 292 * If five bits on, look at the bits that are off: 293 * in the same nybble, 30 294 * in diferent nybbles, one aligned pair, 35 295 * in diferent nybbles, no aligned pairs, 40 296 * If four bits on: 297 * all in one nybble, 45 298 * as two aligned pairs, 50 299 * one aligned pair, 55 300 * no aligned pairs, 60 301 * 302 * Step 2: 303 * Take the higher of the two scores (one for each byte) as the score 304 * for the module. 305 * 306 * Print the score for each module, and field service should replace the 307 * module with the highest score. 308 */ 309 310 /* 311 * In the table below, the first row/column comment indicates the 312 * number of bits on in that nybble; the second row/column comment is 313 * the hex digit. 314 */ 315 316 static int 317 p_synd_score_table[256] = { 318 /* 0 1 1 2 1 2 2 3 1 2 2 3 2 3 3 4 */ 319 /* 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F */ 320 /* 0 0 */ 5, 95, 95, 80, 95, 85, 85, 65, 95, 85, 85, 65, 80, 65, 65, 45, 321 /* 1 1 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 322 /* 1 2 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 323 /* 2 3 */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15, 324 /* 1 4 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 325 /* 2 5 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 326 /* 2 6 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 327 /* 3 7 */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 328 /* 1 8 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 329 /* 2 9 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 330 /* 2 A */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 331 /* 3 B */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 332 /* 2 C */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15, 333 /* 3 D */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 334 /* 3 E */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 335 /* 4 F */ 45, 30, 30, 15, 30, 20, 20, 10, 30, 20, 20, 10, 15, 10, 10, 5, 336 }; 337 338 int 339 ecc_psynd_score(ushort_t p_synd) 340 { 341 int i, j, a, b; 342 343 i = p_synd & 0xFF; 344 j = (p_synd >> 8) & 0xFF; 345 346 a = p_synd_score_table[i]; 347 b = p_synd_score_table[j]; 348 349 return (a > b ? a : b); 350 } 351 352 /* 353 * Async Fault Logging 354 * 355 * To ease identifying, reading, and filtering async fault log messages, the 356 * label [AFT#] is now prepended to each async fault message. These messages 357 * and the logging rules are implemented by cpu_aflt_log(), below. 358 * 359 * [AFT0] - Tag for log messages that are associated with corrected ECC errors. 360 * This includes both corrected ECC memory and ecache faults. 361 * 362 * [AFT1] - Tag for log messages that are not ECC corrected (i.e. everything 363 * else except CE errors) with a priority of 1 (highest). This tag 364 * is also used for panic messages that result from an async fault. 365 * 366 * [AFT2] - These are lower priority diagnostic messages for uncorrected ECC 367 * [AFT3] or parity errors. For example, AFT2 is used for the actual dump 368 * of the E-$ data and tags. 369 * 370 * In a non-DEBUG kernel, AFT > 1 logs will be sent to the system log but not 371 * printed on the console. To send all AFT logs to both the log and the 372 * console, set aft_verbose = 1. 373 */ 374 375 #define CPU_FLTCPU 0x0001 /* print flt_inst as a CPU id */ 376 #define CPU_SPACE 0x0002 /* print flt_status (data or instr) */ 377 #define CPU_ERRID 0x0004 /* print flt_id */ 378 #define CPU_TL 0x0008 /* print flt_tl */ 379 #define CPU_ERRID_FIRST 0x0010 /* print flt_id first in message */ 380 #define CPU_AFSR 0x0020 /* print flt_stat as decoded %afsr */ 381 #define CPU_AFAR 0x0040 /* print flt_addr as %afar */ 382 #define CPU_AF_PSYND 0x0080 /* print flt_stat %afsr.PSYND */ 383 #define CPU_AF_ETS 0x0100 /* print flt_stat %afsr.ETS */ 384 #define CPU_UDBH 0x0200 /* print flt_sdbh and syndrome */ 385 #define CPU_UDBL 0x0400 /* print flt_sdbl and syndrome */ 386 #define CPU_FAULTPC 0x0800 /* print flt_pc */ 387 #define CPU_SYND 0x1000 /* print flt_synd and unum */ 388 389 #define CMN_LFLAGS (CPU_FLTCPU | CPU_SPACE | CPU_ERRID | CPU_TL | \ 390 CPU_AFSR | CPU_AFAR | CPU_AF_PSYND | \ 391 CPU_AF_ETS | CPU_UDBH | CPU_UDBL | \ 392 CPU_FAULTPC) 393 #define UE_LFLAGS (CMN_LFLAGS | CPU_SYND) 394 #define CE_LFLAGS (UE_LFLAGS & ~CPU_UDBH & ~CPU_UDBL & ~CPU_TL & \ 395 ~CPU_SPACE) 396 #define PARERR_LFLAGS (CMN_LFLAGS) 397 #define WP_LFLAGS (CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL) 398 #define CP_LFLAGS (CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL & \ 399 ~CPU_FLTCPU & ~CPU_FAULTPC) 400 #define BERRTO_LFLAGS (CMN_LFLAGS) 401 #define NO_LFLAGS (0) 402 403 #define AFSR_FMTSTR0 "\020\1ME" 404 #define AFSR_FMTSTR1 "\020\040PRIV\037ISAP\036ETP\035IVUE\034TO" \ 405 "\033BERR\032LDP\031CP\030WP\027EDP\026UE\025CE" 406 #define UDB_FMTSTR "\020\012UE\011CE" 407 408 /* 409 * Save the cache bootup state for use when internal 410 * caches are to be re-enabled after an error occurs. 411 */ 412 uint64_t cache_boot_state = 0; 413 414 /* 415 * PA[31:0] represent Displacement in UPA configuration space. 416 */ 417 uint_t root_phys_addr_lo_mask = 0xffffffff; 418 419 /* 420 * Spitfire legacy globals 421 */ 422 int itlb_entries; 423 int dtlb_entries; 424 425 void 426 cpu_setup(void) 427 { 428 extern int page_retire_messages; 429 extern int page_retire_first_ue; 430 extern int at_flags; 431 #if defined(SF_ERRATA_57) 432 extern caddr_t errata57_limit; 433 #endif 434 extern int disable_text_largepages; 435 extern int disable_initdata_largepages; 436 437 cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT); 438 439 at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1; 440 441 /* 442 * Spitfire isn't currently FMA-aware, so we have to enable the 443 * page retirement messages. We also change the default policy 444 * for UE retirement to allow clearing of transient errors. 445 */ 446 page_retire_messages = 1; 447 page_retire_first_ue = 0; 448 449 /* 450 * save the cache bootup state. 451 */ 452 cache_boot_state = get_lsu() & (LSU_IC | LSU_DC); 453 454 if (use_page_coloring) { 455 do_pg_coloring = 1; 456 if (use_virtual_coloring) 457 do_virtual_coloring = 1; 458 } 459 460 /* 461 * Tune pp_slots to use up to 1/8th of the tlb entries. 462 */ 463 pp_slots = MIN(8, MAXPP_SLOTS); 464 465 /* 466 * Block stores invalidate all pages of the d$ so pagecopy 467 * et. al. do not need virtual translations with virtual 468 * coloring taken into consideration. 469 */ 470 pp_consistent_coloring = 0; 471 472 isa_list = 473 "sparcv9+vis sparcv9 " 474 "sparcv8plus+vis sparcv8plus " 475 "sparcv8 sparcv8-fsmuld sparcv7 sparc"; 476 477 cpu_hwcap_flags = AV_SPARC_VIS; 478 479 /* 480 * On Spitfire, there's a hole in the address space 481 * that we must never map (the hardware only support 44-bits of 482 * virtual address). Later CPUs are expected to have wider 483 * supported address ranges. 484 * 485 * See address map on p23 of the UltraSPARC 1 user's manual. 486 */ 487 hole_start = (caddr_t)0x80000000000ull; 488 hole_end = (caddr_t)0xfffff80000000000ull; 489 490 /* 491 * A spitfire call bug requires us to be a further 4Gbytes of 492 * firewall from the spec. 493 * 494 * See Spitfire Errata #21 495 */ 496 hole_start = (caddr_t)((uintptr_t)hole_start - (1ul << 32)); 497 hole_end = (caddr_t)((uintptr_t)hole_end + (1ul << 32)); 498 499 /* 500 * The kpm mapping window. 501 * kpm_size: 502 * The size of a single kpm range. 503 * The overall size will be: kpm_size * vac_colors. 504 * kpm_vbase: 505 * The virtual start address of the kpm range within the kernel 506 * virtual address space. kpm_vbase has to be kpm_size aligned. 507 */ 508 kpm_size = (size_t)(2ull * 1024 * 1024 * 1024 * 1024); /* 2TB */ 509 kpm_size_shift = 41; 510 kpm_vbase = (caddr_t)0xfffffa0000000000ull; /* 16EB - 6TB */ 511 512 #if defined(SF_ERRATA_57) 513 errata57_limit = (caddr_t)0x80000000ul; 514 #endif 515 516 /* 517 * Allow only 8K, 64K and 4M pages for text by default. 518 * Allow only 8K and 64K page for initialized data segments by 519 * default. 520 */ 521 disable_text_largepages = (1 << TTE512K) | (1 << TTE32M) | 522 (1 << TTE256M); 523 disable_initdata_largepages = (1 << TTE512K) | (1 << TTE4M) | 524 (1 << TTE32M) | (1 << TTE256M); 525 } 526 527 static int 528 getintprop(pnode_t node, char *name, int deflt) 529 { 530 int value; 531 532 switch (prom_getproplen(node, name)) { 533 case 0: 534 value = 1; /* boolean properties */ 535 break; 536 537 case sizeof (int): 538 (void) prom_getprop(node, name, (caddr_t)&value); 539 break; 540 541 default: 542 value = deflt; 543 break; 544 } 545 546 return (value); 547 } 548 549 /* 550 * Set the magic constants of the implementation. 551 */ 552 void 553 cpu_fiximp(pnode_t dnode) 554 { 555 extern int vac_size, vac_shift; 556 extern uint_t vac_mask; 557 extern int dcache_line_mask; 558 int i, a; 559 static struct { 560 char *name; 561 int *var; 562 } prop[] = { 563 "dcache-size", &dcache_size, 564 "dcache-line-size", &dcache_linesize, 565 "icache-size", &icache_size, 566 "icache-line-size", &icache_linesize, 567 "ecache-size", &ecache_size, 568 "ecache-line-size", &ecache_alignsize, 569 "ecache-associativity", &ecache_associativity, 570 "#itlb-entries", &itlb_entries, 571 "#dtlb-entries", &dtlb_entries, 572 }; 573 574 for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) { 575 if ((a = getintprop(dnode, prop[i].name, -1)) != -1) { 576 *prop[i].var = a; 577 } 578 } 579 580 ecache_setsize = ecache_size / ecache_associativity; 581 582 vac_size = S_VAC_SIZE; 583 vac_mask = MMU_PAGEMASK & (vac_size - 1); 584 i = 0; a = vac_size; 585 while (a >>= 1) 586 ++i; 587 vac_shift = i; 588 shm_alignment = vac_size; 589 vac = 1; 590 591 dcache_line_mask = (dcache_size - 1) & ~(dcache_linesize - 1); 592 593 /* 594 * UltraSPARC I & II have ecache sizes running 595 * as follows: .25 MB, .5 MB, 1 MB, 2 MB, 4 MB 596 * and 8 MB. Adjust the copyin/copyout limits 597 * according to the cache size. The magic number 598 * of VIS_COPY_THRESHOLD comes from the copyin/copyout code 599 * and its floor of VIS_COPY_THRESHOLD bytes before it will use 600 * VIS instructions. 601 * 602 * We assume that all CPUs on the system have the same size 603 * ecache. We're also called very early in the game. 604 * /etc/system will be parsed *after* we're called so 605 * these values can be overwritten. 606 */ 607 608 hw_copy_limit_1 = VIS_COPY_THRESHOLD; 609 if (ecache_size <= 524288) { 610 hw_copy_limit_2 = VIS_COPY_THRESHOLD; 611 hw_copy_limit_4 = VIS_COPY_THRESHOLD; 612 hw_copy_limit_8 = VIS_COPY_THRESHOLD; 613 } else if (ecache_size == 1048576) { 614 hw_copy_limit_2 = 1024; 615 hw_copy_limit_4 = 1280; 616 hw_copy_limit_8 = 1536; 617 } else if (ecache_size == 2097152) { 618 hw_copy_limit_2 = 1536; 619 hw_copy_limit_4 = 2048; 620 hw_copy_limit_8 = 2560; 621 } else if (ecache_size == 4194304) { 622 hw_copy_limit_2 = 2048; 623 hw_copy_limit_4 = 2560; 624 hw_copy_limit_8 = 3072; 625 } else { 626 hw_copy_limit_2 = 2560; 627 hw_copy_limit_4 = 3072; 628 hw_copy_limit_8 = 3584; 629 } 630 } 631 632 /* 633 * Called by setcpudelay 634 */ 635 void 636 cpu_init_tick_freq(void) 637 { 638 /* 639 * Determine the cpu frequency by calling 640 * tod_get_cpufrequency. Use an approximate freqency 641 * value computed by the prom if the tod module 642 * is not initialized and loaded yet. 643 */ 644 if (tod_ops.tod_get_cpufrequency != NULL) { 645 mutex_enter(&tod_lock); 646 sys_tick_freq = tod_ops.tod_get_cpufrequency(); 647 mutex_exit(&tod_lock); 648 } else { 649 #if defined(HUMMINGBIRD) 650 /* 651 * the hummingbird version of %stick is used as the basis for 652 * low level timing; this provides an independent constant-rate 653 * clock for general system use, and frees power mgmt to set 654 * various cpu clock speeds. 655 */ 656 if (system_clock_freq == 0) 657 cmn_err(CE_PANIC, "invalid system_clock_freq 0x%lx", 658 system_clock_freq); 659 sys_tick_freq = system_clock_freq; 660 #else /* SPITFIRE */ 661 sys_tick_freq = cpunodes[CPU->cpu_id].clock_freq; 662 #endif 663 } 664 } 665 666 667 void shipit(int upaid); 668 extern uint64_t xc_tick_limit; 669 extern uint64_t xc_tick_jump_limit; 670 671 #ifdef SEND_MONDO_STATS 672 uint64_t x_early[NCPU][64]; 673 #endif 674 675 /* 676 * Note: A version of this function is used by the debugger via the KDI, 677 * and must be kept in sync with this version. Any changes made to this 678 * function to support new chips or to accomodate errata must also be included 679 * in the KDI-specific version. See spitfire_kdi.c. 680 */ 681 void 682 send_one_mondo(int cpuid) 683 { 684 uint64_t idsr, starttick, endtick; 685 int upaid, busy, nack; 686 uint64_t tick, tick_prev; 687 ulong_t ticks; 688 689 CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 690 upaid = CPUID_TO_UPAID(cpuid); 691 tick = starttick = gettick(); 692 shipit(upaid); 693 endtick = starttick + xc_tick_limit; 694 busy = nack = 0; 695 for (;;) { 696 idsr = getidsr(); 697 if (idsr == 0) 698 break; 699 /* 700 * When we detect an irregular tick jump, we adjust 701 * the timer window to the current tick value. 702 */ 703 tick_prev = tick; 704 tick = gettick(); 705 ticks = tick - tick_prev; 706 if (ticks > xc_tick_jump_limit) { 707 endtick = tick + xc_tick_limit; 708 } else if (tick > endtick) { 709 if (panic_quiesce) 710 return; 711 cmn_err(CE_PANIC, 712 "send mondo timeout (target 0x%x) [%d NACK %d BUSY]", 713 upaid, nack, busy); 714 } 715 if (idsr & IDSR_BUSY) { 716 busy++; 717 continue; 718 } 719 drv_usecwait(1); 720 shipit(upaid); 721 nack++; 722 busy = 0; 723 } 724 #ifdef SEND_MONDO_STATS 725 x_early[getprocessorid()][highbit(gettick() - starttick) - 1]++; 726 #endif 727 } 728 729 void 730 send_mondo_set(cpuset_t set) 731 { 732 int i; 733 734 for (i = 0; i < NCPU; i++) 735 if (CPU_IN_SET(set, i)) { 736 send_one_mondo(i); 737 CPUSET_DEL(set, i); 738 if (CPUSET_ISNULL(set)) 739 break; 740 } 741 } 742 743 void 744 syncfpu(void) 745 { 746 } 747 748 /* 749 * Determine the size of the CPU module's error structure in bytes. This is 750 * called once during boot to initialize the error queues. 751 */ 752 int 753 cpu_aflt_size(void) 754 { 755 /* 756 * We need to determine whether this is a sabre, Hummingbird or a 757 * Spitfire/Blackbird impl and set the appropriate state variables for 758 * ecache tag manipulation. We can't do this in cpu_setup() as it is 759 * too early in the boot flow and the cpunodes are not initialized. 760 * This routine will be called once after cpunodes[] is ready, so do 761 * it here. 762 */ 763 if (cpunodes[CPU->cpu_id].implementation == SABRE_IMPL) { 764 isus2i = 1; 765 cpu_ec_tag_mask = SB_ECTAG_MASK; 766 cpu_ec_state_mask = SB_ECSTATE_MASK; 767 cpu_ec_par_mask = SB_ECPAR_MASK; 768 cpu_ec_par_shift = SB_ECPAR_SHIFT; 769 cpu_ec_tag_shift = SB_ECTAG_SHIFT; 770 cpu_ec_state_shift = SB_ECSTATE_SHIFT; 771 cpu_ec_state_exl = SB_ECSTATE_EXL; 772 cpu_ec_state_mod = SB_ECSTATE_MOD; 773 774 /* These states do not exist in sabre - set to 0xFF */ 775 cpu_ec_state_shr = 0xFF; 776 cpu_ec_state_own = 0xFF; 777 778 cpu_ec_state_valid = SB_ECSTATE_VALID; 779 cpu_ec_state_dirty = SB_ECSTATE_DIRTY; 780 cpu_ec_state_parity = SB_ECSTATE_PARITY; 781 cpu_ec_parity = SB_EC_PARITY; 782 } else if (cpunodes[CPU->cpu_id].implementation == HUMMBRD_IMPL) { 783 isus2e = 1; 784 cpu_ec_tag_mask = HB_ECTAG_MASK; 785 cpu_ec_state_mask = HB_ECSTATE_MASK; 786 cpu_ec_par_mask = HB_ECPAR_MASK; 787 cpu_ec_par_shift = HB_ECPAR_SHIFT; 788 cpu_ec_tag_shift = HB_ECTAG_SHIFT; 789 cpu_ec_state_shift = HB_ECSTATE_SHIFT; 790 cpu_ec_state_exl = HB_ECSTATE_EXL; 791 cpu_ec_state_mod = HB_ECSTATE_MOD; 792 793 /* These states do not exist in hummingbird - set to 0xFF */ 794 cpu_ec_state_shr = 0xFF; 795 cpu_ec_state_own = 0xFF; 796 797 cpu_ec_state_valid = HB_ECSTATE_VALID; 798 cpu_ec_state_dirty = HB_ECSTATE_DIRTY; 799 cpu_ec_state_parity = HB_ECSTATE_PARITY; 800 cpu_ec_parity = HB_EC_PARITY; 801 } 802 803 return (sizeof (spitf_async_flt)); 804 } 805 806 807 /* 808 * Correctable ecc error trap handler 809 */ 810 /*ARGSUSED*/ 811 void 812 cpu_ce_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr, 813 uint_t p_afsr_high, uint_t p_afar_high) 814 { 815 ushort_t sdbh, sdbl; 816 ushort_t e_syndh, e_syndl; 817 spitf_async_flt spf_flt; 818 struct async_flt *ecc; 819 int queue = 1; 820 821 uint64_t t_afar = p_afar; 822 uint64_t t_afsr = p_afsr; 823 824 /* 825 * Note: the Spitfire data buffer error registers 826 * (upper and lower halves) are or'ed into the upper 827 * word of the afsr by ce_err(). 828 */ 829 sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF); 830 sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF); 831 832 e_syndh = (uchar_t)(sdbh & (uint_t)P_DER_E_SYND); 833 e_syndl = (uchar_t)(sdbl & (uint_t)P_DER_E_SYND); 834 835 t_afsr &= S_AFSR_MASK; 836 t_afar &= SABRE_AFAR_PA; /* must use Sabre AFAR mask */ 837 838 /* Setup the async fault structure */ 839 bzero(&spf_flt, sizeof (spitf_async_flt)); 840 ecc = (struct async_flt *)&spf_flt; 841 ecc->flt_id = gethrtime_waitfree(); 842 ecc->flt_stat = t_afsr; 843 ecc->flt_addr = t_afar; 844 ecc->flt_status = ECC_C_TRAP; 845 ecc->flt_bus_id = getprocessorid(); 846 ecc->flt_inst = CPU->cpu_id; 847 ecc->flt_pc = (caddr_t)rp->r_pc; 848 ecc->flt_func = log_ce_err; 849 ecc->flt_in_memory = 850 (pf_is_memory(ecc->flt_addr >> MMU_PAGESHIFT)) ? 1: 0; 851 spf_flt.flt_sdbh = sdbh; 852 spf_flt.flt_sdbl = sdbl; 853 854 /* 855 * Check for fatal conditions. 856 */ 857 check_misc_err(&spf_flt); 858 859 /* 860 * Pananoid checks for valid AFSR and UDBs 861 */ 862 if ((t_afsr & P_AFSR_CE) == 0) { 863 cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS, 864 "** Panic due to CE bit not set in the AFSR", 865 " Corrected Memory Error on"); 866 } 867 868 /* 869 * We want to skip logging only if ALL the following 870 * conditions are true: 871 * 872 * 1. There is only one error 873 * 2. That error is a correctable memory error 874 * 3. The error is caused by the memory scrubber (in which case 875 * the error will have occurred under on_trap protection) 876 * 4. The error is on a retired page 877 * 878 * Note: OT_DATA_EC is used places other than the memory scrubber. 879 * However, none of those errors should occur on a retired page. 880 */ 881 if ((ecc->flt_stat & (S_AFSR_ALL_ERRS & ~P_AFSR_ME)) == P_AFSR_CE && 882 curthread->t_ontrap != NULL) { 883 884 if (curthread->t_ontrap->ot_prot & OT_DATA_EC) { 885 if (page_retire_check(ecc->flt_addr, NULL) == 0) { 886 queue = 0; 887 } 888 } 889 } 890 891 if (((sdbh & P_DER_CE) == 0) && ((sdbl & P_DER_CE) == 0)) { 892 cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS, 893 "** Panic due to CE bits not set in the UDBs", 894 " Corrected Memory Error on"); 895 } 896 897 if ((sdbh >> 8) & 1) { 898 ecc->flt_synd = e_syndh; 899 ce_scrub(ecc); 900 if (queue) { 901 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc, 902 sizeof (*ecc), ce_queue, ERRORQ_ASYNC); 903 } 904 } 905 906 if ((sdbl >> 8) & 1) { 907 ecc->flt_addr = t_afar | 0x8; /* Sabres do not have a UDBL */ 908 ecc->flt_synd = e_syndl | UDBL_REG; 909 ce_scrub(ecc); 910 if (queue) { 911 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc, 912 sizeof (*ecc), ce_queue, ERRORQ_ASYNC); 913 } 914 } 915 916 /* 917 * Re-enable all error trapping (CEEN currently cleared). 918 */ 919 clr_datapath(); 920 set_asyncflt(P_AFSR_CE); 921 set_error_enable(EER_ENABLE); 922 } 923 924 /* 925 * Cpu specific CE logging routine 926 */ 927 static void 928 log_ce_err(struct async_flt *aflt, char *unum) 929 { 930 spitf_async_flt spf_flt; 931 932 if ((aflt->flt_stat & P_AFSR_CE) && (ce_verbose_memory == 0)) { 933 return; 934 } 935 936 spf_flt.cmn_asyncflt = *aflt; 937 cpu_aflt_log(CE_CONT, 0, &spf_flt, CE_LFLAGS, unum, 938 " Corrected Memory Error detected by"); 939 } 940 941 /* 942 * Spitfire does not perform any further CE classification refinement 943 */ 944 /*ARGSUSED*/ 945 int 946 ce_scrub_xdiag_recirc(struct async_flt *ecc, errorq_t *eqp, errorq_elem_t *eqep, 947 size_t afltoffset) 948 { 949 return (0); 950 } 951 952 char * 953 flt_to_error_type(struct async_flt *aflt) 954 { 955 if (aflt->flt_status & ECC_INTERMITTENT) 956 return (ERR_TYPE_DESC_INTERMITTENT); 957 if (aflt->flt_status & ECC_PERSISTENT) 958 return (ERR_TYPE_DESC_PERSISTENT); 959 if (aflt->flt_status & ECC_STICKY) 960 return (ERR_TYPE_DESC_STICKY); 961 return (ERR_TYPE_DESC_UNKNOWN); 962 } 963 964 /* 965 * Called by correctable ecc error logging code to print out 966 * the stick/persistent/intermittent status of the error. 967 */ 968 static void 969 cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum) 970 { 971 ushort_t status; 972 char *status1_str = "Memory"; 973 char *status2_str = "Intermittent"; 974 struct async_flt *aflt = (struct async_flt *)spf_flt; 975 976 status = aflt->flt_status; 977 978 if (status & ECC_ECACHE) 979 status1_str = "Ecache"; 980 981 if (status & ECC_STICKY) 982 status2_str = "Sticky"; 983 else if (status & ECC_PERSISTENT) 984 status2_str = "Persistent"; 985 986 cpu_aflt_log(CE_CONT, 0, spf_flt, CPU_ERRID_FIRST, 987 NULL, " Corrected %s Error on %s is %s", 988 status1_str, unum, status2_str); 989 } 990 991 /* 992 * check for a valid ce syndrome, then call the 993 * displacement flush scrubbing code, and then check the afsr to see if 994 * the error was persistent or intermittent. Reread the afar/afsr to see 995 * if the error was not scrubbed successfully, and is therefore sticky. 996 */ 997 /*ARGSUSED1*/ 998 void 999 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t triedcpulogout) 1000 { 1001 uint64_t eer, afsr; 1002 ushort_t status; 1003 1004 ASSERT(getpil() > LOCK_LEVEL); 1005 1006 /* 1007 * It is possible that the flt_addr is not a valid 1008 * physical address. To deal with this, we disable 1009 * NCEEN while we scrub that address. If this causes 1010 * a TIMEOUT/BERR, we know this is an invalid 1011 * memory location. 1012 */ 1013 kpreempt_disable(); 1014 eer = get_error_enable(); 1015 if (eer & (EER_CEEN | EER_NCEEN)) 1016 set_error_enable(eer & ~(EER_CEEN | EER_NCEEN)); 1017 1018 /* 1019 * To check if the error detected by IO is persistent, sticky or 1020 * intermittent. 1021 */ 1022 if (ecc->flt_status & ECC_IOBUS) { 1023 ecc->flt_stat = P_AFSR_CE; 1024 } 1025 1026 scrubphys(P2ALIGN(ecc->flt_addr, 64), 1027 cpunodes[CPU->cpu_id].ecache_size); 1028 1029 get_asyncflt(&afsr); 1030 if (afsr & (P_AFSR_TO | P_AFSR_BERR)) { 1031 /* 1032 * Must ensure that we don't get the TIMEOUT/BERR 1033 * when we reenable NCEEN, so we clear the AFSR. 1034 */ 1035 set_asyncflt(afsr & (P_AFSR_TO | P_AFSR_BERR)); 1036 if (eer & (EER_CEEN | EER_NCEEN)) 1037 set_error_enable(eer); 1038 kpreempt_enable(); 1039 return; 1040 } 1041 1042 if (eer & EER_NCEEN) 1043 set_error_enable(eer & ~EER_CEEN); 1044 1045 /* 1046 * Check and clear any ECC errors from the scrub. If the scrub did 1047 * not trip over the error, mark it intermittent. If the scrub did 1048 * trip the error again and it did not scrub away, mark it sticky. 1049 * Otherwise mark it persistent. 1050 */ 1051 if (check_ecc(ecc) != 0) { 1052 cpu_read_paddr(ecc, 0, 1); 1053 1054 if (check_ecc(ecc) != 0) 1055 status = ECC_STICKY; 1056 else 1057 status = ECC_PERSISTENT; 1058 } else 1059 status = ECC_INTERMITTENT; 1060 1061 if (eer & (EER_CEEN | EER_NCEEN)) 1062 set_error_enable(eer); 1063 kpreempt_enable(); 1064 1065 ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY); 1066 ecc->flt_status |= status; 1067 } 1068 1069 /* 1070 * get the syndrome and unum, and then call the routines 1071 * to check the other cpus and iobuses, and then do the error logging. 1072 */ 1073 /*ARGSUSED1*/ 1074 void 1075 cpu_ce_log_err(struct async_flt *ecc, errorq_elem_t *eqep) 1076 { 1077 char unum[UNUM_NAMLEN]; 1078 int len = 0; 1079 int ce_verbose = 0; 1080 int err; 1081 1082 ASSERT(ecc->flt_func != NULL); 1083 1084 /* Get the unum string for logging purposes */ 1085 (void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, ecc, unum, 1086 UNUM_NAMLEN, &len); 1087 1088 /* Call specific error logging routine */ 1089 (void) (*ecc->flt_func)(ecc, unum); 1090 1091 /* 1092 * Count errors per unum. 1093 * Non-memory errors are all counted via a special unum string. 1094 */ 1095 if ((err = ce_count_unum(ecc->flt_status, len, unum)) != PR_OK && 1096 automatic_page_removal) { 1097 (void) page_retire(ecc->flt_addr, err); 1098 } 1099 1100 if (ecc->flt_panic) { 1101 ce_verbose = 1; 1102 } else if ((ecc->flt_class == BUS_FAULT) || 1103 (ecc->flt_stat & P_AFSR_CE)) { 1104 ce_verbose = (ce_verbose_memory > 0); 1105 } else { 1106 ce_verbose = 1; 1107 } 1108 1109 if (ce_verbose) { 1110 spitf_async_flt sflt; 1111 int synd_code; 1112 1113 sflt.cmn_asyncflt = *ecc; /* for cpu_aflt_log() */ 1114 1115 cpu_ce_log_status(&sflt, unum); 1116 1117 synd_code = synd_to_synd_code(AFLT_STAT_VALID, 1118 SYND(ecc->flt_synd)); 1119 1120 if (SYND_IS_SINGLE_BIT_DATA(synd_code)) { 1121 cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST, 1122 NULL, " ECC Data Bit %2d was in error " 1123 "and corrected", synd_code); 1124 } else if (SYND_IS_SINGLE_BIT_CHK(synd_code)) { 1125 cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST, 1126 NULL, " ECC Check Bit %2d was in error " 1127 "and corrected", synd_code - C0); 1128 } else { 1129 /* 1130 * These are UE errors - we shouldn't be getting CE 1131 * traps for these; handle them in case of bad h/w. 1132 */ 1133 switch (synd_code) { 1134 case M2: 1135 cpu_aflt_log(CE_CONT, 0, &sflt, 1136 CPU_ERRID_FIRST, NULL, 1137 " Two ECC Bits were in error"); 1138 break; 1139 case M3: 1140 cpu_aflt_log(CE_CONT, 0, &sflt, 1141 CPU_ERRID_FIRST, NULL, 1142 " Three ECC Bits were in error"); 1143 break; 1144 case M4: 1145 cpu_aflt_log(CE_CONT, 0, &sflt, 1146 CPU_ERRID_FIRST, NULL, 1147 " Four ECC Bits were in error"); 1148 break; 1149 case MX: 1150 cpu_aflt_log(CE_CONT, 0, &sflt, 1151 CPU_ERRID_FIRST, NULL, 1152 " More than Four ECC bits were " 1153 "in error"); 1154 break; 1155 default: 1156 cpu_aflt_log(CE_CONT, 0, &sflt, 1157 CPU_ERRID_FIRST, NULL, 1158 " Unknown fault syndrome %d", 1159 synd_code); 1160 break; 1161 } 1162 } 1163 } 1164 1165 /* Display entire cache line, if valid address */ 1166 if (ce_show_data && ecc->flt_addr != AFLT_INV_ADDR) 1167 read_ecc_data(ecc, 1, 1); 1168 } 1169 1170 /* 1171 * We route all errors through a single switch statement. 1172 */ 1173 void 1174 cpu_ue_log_err(struct async_flt *aflt) 1175 { 1176 1177 switch (aflt->flt_class) { 1178 case CPU_FAULT: 1179 cpu_async_log_err(aflt); 1180 break; 1181 1182 case BUS_FAULT: 1183 bus_async_log_err(aflt); 1184 break; 1185 1186 default: 1187 cmn_err(CE_WARN, "discarding async error 0x%p with invalid " 1188 "fault class (0x%x)", (void *)aflt, aflt->flt_class); 1189 break; 1190 } 1191 } 1192 1193 /* Values for action variable in cpu_async_error() */ 1194 #define ACTION_NONE 0 1195 #define ACTION_TRAMPOLINE 1 1196 #define ACTION_AST_FLAGS 2 1197 1198 /* 1199 * Access error trap handler for asynchronous cpu errors. This routine is 1200 * called to handle a data or instruction access error. All fatal errors are 1201 * completely handled by this routine (by panicking). Non fatal error logging 1202 * is queued for later processing either via AST or softint at a lower PIL. 1203 * In case of panic, the error log queue will also be processed as part of the 1204 * panic flow to ensure all errors are logged. This routine is called with all 1205 * errors disabled at PIL15. The AFSR bits are cleared and the UDBL and UDBH 1206 * error bits are also cleared. The hardware has also disabled the I and 1207 * D-caches for us, so we must re-enable them before returning. 1208 * 1209 * A summary of the handling of tl=0 UE/LDP/EDP/TO/BERR/WP/CP: 1210 * 1211 * _______________________________________________________________ 1212 * | Privileged tl0 | Unprivileged | 1213 * | Protected | Unprotected | Protected | Unprotected | 1214 * |on_trap|lofault| | | | 1215 * -------------|-------|-------+---------------+---------------+-------------| 1216 * | | | | | | 1217 * UE/LDP/EDP | L,T,p | L,R,p | L,P | n/a | L,R,p | 1218 * | | | | | | 1219 * TO/BERR | T | S | L,P | n/a | S | 1220 * | | | | | | 1221 * WP | L,M,p | L,M,p | L,M,p | n/a | L,M,p | 1222 * | | | | | | 1223 * CP (IIi/IIe) | L,P | L,P | L,P | n/a | L,P | 1224 * ____________________________________________________________________________ 1225 * 1226 * 1227 * Action codes: 1228 * 1229 * L - log 1230 * M - kick off memscrubber if flt_in_memory 1231 * P - panic 1232 * p - panic if US-IIi or US-IIe (Sabre); overrides R and M 1233 * R - i) if aft_panic is set, panic 1234 * ii) otherwise, send hwerr event to contract and SIGKILL to process 1235 * S - send SIGBUS to process 1236 * T - trampoline 1237 * 1238 * Special cases: 1239 * 1240 * 1) if aft_testfatal is set, all faults result in a panic regardless 1241 * of type (even WP), protection (even on_trap), or privilege. 1242 */ 1243 /*ARGSUSED*/ 1244 void 1245 cpu_async_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr, 1246 uint_t p_afsr_high, uint_t p_afar_high) 1247 { 1248 ushort_t sdbh, sdbl, ttype, tl; 1249 spitf_async_flt spf_flt; 1250 struct async_flt *aflt; 1251 char pr_reason[28]; 1252 uint64_t oafsr; 1253 uint64_t acc_afsr = 0; /* accumulated afsr */ 1254 int action = ACTION_NONE; 1255 uint64_t t_afar = p_afar; 1256 uint64_t t_afsr = p_afsr; 1257 int expected = DDI_FM_ERR_UNEXPECTED; 1258 ddi_acc_hdl_t *hp; 1259 1260 /* 1261 * We need to look at p_flag to determine if the thread detected an 1262 * error while dumping core. We can't grab p_lock here, but it's ok 1263 * because we just need a consistent snapshot and we know that everyone 1264 * else will store a consistent set of bits while holding p_lock. We 1265 * don't have to worry about a race because SDOCORE is set once prior 1266 * to doing i/o from the process's address space and is never cleared. 1267 */ 1268 uint_t pflag = ttoproc(curthread)->p_flag; 1269 1270 pr_reason[0] = '\0'; 1271 1272 /* 1273 * Note: the Spitfire data buffer error registers 1274 * (upper and lower halves) are or'ed into the upper 1275 * word of the afsr by async_err() if P_AFSR_UE is set. 1276 */ 1277 sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF); 1278 sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF); 1279 1280 /* 1281 * Grab the ttype encoded in <63:53> of the saved 1282 * afsr passed from async_err() 1283 */ 1284 ttype = (ushort_t)((t_afsr >> 53) & 0x1FF); 1285 tl = (ushort_t)(t_afsr >> 62); 1286 1287 t_afsr &= S_AFSR_MASK; 1288 t_afar &= SABRE_AFAR_PA; /* must use Sabre AFAR mask */ 1289 1290 /* 1291 * Initialize most of the common and CPU-specific structure. We derive 1292 * aflt->flt_priv from %tstate, instead of from the AFSR.PRIV bit. The 1293 * initial setting of aflt->flt_panic is based on TL: we must panic if 1294 * the error occurred at TL > 0. We also set flt_panic if the test/demo 1295 * tuneable aft_testfatal is set (not the default). 1296 */ 1297 bzero(&spf_flt, sizeof (spitf_async_flt)); 1298 aflt = (struct async_flt *)&spf_flt; 1299 aflt->flt_id = gethrtime_waitfree(); 1300 aflt->flt_stat = t_afsr; 1301 aflt->flt_addr = t_afar; 1302 aflt->flt_bus_id = getprocessorid(); 1303 aflt->flt_inst = CPU->cpu_id; 1304 aflt->flt_pc = (caddr_t)rp->r_pc; 1305 aflt->flt_prot = AFLT_PROT_NONE; 1306 aflt->flt_class = CPU_FAULT; 1307 aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0; 1308 aflt->flt_tl = (uchar_t)tl; 1309 aflt->flt_panic = (tl != 0 || aft_testfatal != 0); 1310 aflt->flt_core = (pflag & SDOCORE) ? 1 : 0; 1311 1312 /* 1313 * Set flt_status based on the trap type. If we end up here as the 1314 * result of a UE detected by the CE handling code, leave status 0. 1315 */ 1316 switch (ttype) { 1317 case T_DATA_ERROR: 1318 aflt->flt_status = ECC_D_TRAP; 1319 break; 1320 case T_INSTR_ERROR: 1321 aflt->flt_status = ECC_I_TRAP; 1322 break; 1323 } 1324 1325 spf_flt.flt_sdbh = sdbh; 1326 spf_flt.flt_sdbl = sdbl; 1327 1328 /* 1329 * Check for fatal async errors. 1330 */ 1331 check_misc_err(&spf_flt); 1332 1333 /* 1334 * If the trap occurred in privileged mode at TL=0, we need to check to 1335 * see if we were executing in the kernel under on_trap() or t_lofault 1336 * protection. If so, modify the saved registers so that we return 1337 * from the trap to the appropriate trampoline routine. 1338 */ 1339 if (aflt->flt_priv && tl == 0) { 1340 if (curthread->t_ontrap != NULL) { 1341 on_trap_data_t *otp = curthread->t_ontrap; 1342 1343 if (otp->ot_prot & OT_DATA_EC) { 1344 aflt->flt_prot = AFLT_PROT_EC; 1345 otp->ot_trap |= OT_DATA_EC; 1346 rp->r_pc = otp->ot_trampoline; 1347 rp->r_npc = rp->r_pc + 4; 1348 action = ACTION_TRAMPOLINE; 1349 } 1350 1351 if ((t_afsr & (P_AFSR_TO | P_AFSR_BERR)) && 1352 (otp->ot_prot & OT_DATA_ACCESS)) { 1353 aflt->flt_prot = AFLT_PROT_ACCESS; 1354 otp->ot_trap |= OT_DATA_ACCESS; 1355 rp->r_pc = otp->ot_trampoline; 1356 rp->r_npc = rp->r_pc + 4; 1357 action = ACTION_TRAMPOLINE; 1358 /* 1359 * for peeks and caut_gets errors are expected 1360 */ 1361 hp = (ddi_acc_hdl_t *)otp->ot_handle; 1362 if (!hp) 1363 expected = DDI_FM_ERR_PEEK; 1364 else if (hp->ah_acc.devacc_attr_access == 1365 DDI_CAUTIOUS_ACC) 1366 expected = DDI_FM_ERR_EXPECTED; 1367 } 1368 1369 } else if (curthread->t_lofault) { 1370 aflt->flt_prot = AFLT_PROT_COPY; 1371 rp->r_g1 = EFAULT; 1372 rp->r_pc = curthread->t_lofault; 1373 rp->r_npc = rp->r_pc + 4; 1374 action = ACTION_TRAMPOLINE; 1375 } 1376 } 1377 1378 /* 1379 * Determine if this error needs to be treated as fatal. Note that 1380 * multiple errors detected upon entry to this trap handler does not 1381 * necessarily warrant a panic. We only want to panic if the trap 1382 * happened in privileged mode and not under t_ontrap or t_lofault 1383 * protection. The exception is WP: if we *only* get WP, it is not 1384 * fatal even if the trap occurred in privileged mode, except on Sabre. 1385 * 1386 * aft_panic, if set, effectively makes us treat usermode 1387 * UE/EDP/LDP faults as if they were privileged - so we we will 1388 * panic instead of sending a contract event. A lofault-protected 1389 * fault will normally follow the contract event; if aft_panic is 1390 * set this will be changed to a panic. 1391 * 1392 * For usermode BERR/BTO errors, eg from processes performing device 1393 * control through mapped device memory, we need only deliver 1394 * a SIGBUS to the offending process. 1395 * 1396 * Some additional flt_panic reasons (eg, WP on Sabre) will be 1397 * checked later; for now we implement the common reasons. 1398 */ 1399 if (aflt->flt_prot == AFLT_PROT_NONE) { 1400 /* 1401 * Beware - multiple bits may be set in AFSR 1402 */ 1403 if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP)) { 1404 if (aflt->flt_priv || aft_panic) 1405 aflt->flt_panic = 1; 1406 } 1407 1408 if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) { 1409 if (aflt->flt_priv) 1410 aflt->flt_panic = 1; 1411 } 1412 } else if (aflt->flt_prot == AFLT_PROT_COPY && aft_panic) { 1413 aflt->flt_panic = 1; 1414 } 1415 1416 /* 1417 * UE/BERR/TO: Call our bus nexus friends to check for 1418 * IO errors that may have resulted in this trap. 1419 */ 1420 if (t_afsr & (P_AFSR_TO | P_AFSR_BERR | P_AFSR_UE)) { 1421 cpu_run_bus_error_handlers(aflt, expected); 1422 } 1423 1424 /* 1425 * Handle UE: If the UE is in memory, we need to flush the bad line from 1426 * the E-cache. We also need to query the bus nexus for fatal errors. 1427 * For sabre, we will panic on UEs. Attempts to do diagnostic read on 1428 * caches may introduce more parity errors (especially when the module 1429 * is bad) and in sabre there is no guarantee that such errors 1430 * (if introduced) are written back as poisoned data. 1431 */ 1432 if (t_afsr & P_AFSR_UE) { 1433 int i; 1434 1435 (void) strcat(pr_reason, "UE "); 1436 1437 spf_flt.flt_type = CPU_UE_ERR; 1438 aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >> 1439 MMU_PAGESHIFT)) ? 1: 0; 1440 1441 /* 1442 * With UE, we have the PA of the fault. 1443 * Let do a diagnostic read to get the ecache 1444 * data and tag info of the bad line for logging. 1445 */ 1446 if (aflt->flt_in_memory) { 1447 uint32_t ec_set_size; 1448 uchar_t state; 1449 uint32_t ecache_idx; 1450 uint64_t faultpa = P2ALIGN(aflt->flt_addr, 64); 1451 1452 /* touch the line to put it in ecache */ 1453 acc_afsr |= read_and_clear_afsr(); 1454 (void) lddphys(faultpa); 1455 acc_afsr |= (read_and_clear_afsr() & 1456 ~(P_AFSR_EDP | P_AFSR_UE)); 1457 1458 ec_set_size = cpunodes[CPU->cpu_id].ecache_size / 1459 ecache_associativity; 1460 1461 for (i = 0; i < ecache_associativity; i++) { 1462 ecache_idx = i * ec_set_size + 1463 (aflt->flt_addr % ec_set_size); 1464 get_ecache_dtag(P2ALIGN(ecache_idx, 64), 1465 (uint64_t *)&spf_flt.flt_ec_data[0], 1466 &spf_flt.flt_ec_tag, &oafsr, &acc_afsr); 1467 acc_afsr |= oafsr; 1468 1469 state = (uchar_t)((spf_flt.flt_ec_tag & 1470 cpu_ec_state_mask) >> cpu_ec_state_shift); 1471 1472 if ((state & cpu_ec_state_valid) && 1473 ((spf_flt.flt_ec_tag & cpu_ec_tag_mask) == 1474 ((uint64_t)aflt->flt_addr >> 1475 cpu_ec_tag_shift))) 1476 break; 1477 } 1478 1479 /* 1480 * Check to see if the ecache tag is valid for the 1481 * fault PA. In the very unlikely event where the 1482 * line could be victimized, no ecache info will be 1483 * available. If this is the case, capture the line 1484 * from memory instead. 1485 */ 1486 if ((state & cpu_ec_state_valid) == 0 || 1487 (spf_flt.flt_ec_tag & cpu_ec_tag_mask) != 1488 ((uint64_t)aflt->flt_addr >> cpu_ec_tag_shift)) { 1489 for (i = 0; i < 8; i++, faultpa += 8) { 1490 ec_data_t *ecdptr; 1491 1492 ecdptr = &spf_flt.flt_ec_data[i]; 1493 acc_afsr |= read_and_clear_afsr(); 1494 ecdptr->ec_d8 = lddphys(faultpa); 1495 acc_afsr |= (read_and_clear_afsr() & 1496 ~(P_AFSR_EDP | P_AFSR_UE)); 1497 ecdptr->ec_afsr = 0; 1498 /* null afsr value */ 1499 } 1500 1501 /* 1502 * Mark tag invalid to indicate mem dump 1503 * when we print out the info. 1504 */ 1505 spf_flt.flt_ec_tag = AFLT_INV_ADDR; 1506 } 1507 spf_flt.flt_ec_lcnt = 1; 1508 1509 /* 1510 * Flush out the bad line 1511 */ 1512 flushecacheline(P2ALIGN(aflt->flt_addr, 64), 1513 cpunodes[CPU->cpu_id].ecache_size); 1514 1515 acc_afsr |= clear_errors(NULL, NULL); 1516 } 1517 1518 /* 1519 * Ask our bus nexus friends if they have any fatal errors. If 1520 * so, they will log appropriate error messages and panic as a 1521 * result. We then queue an event for each UDB that reports a 1522 * UE. Each UE reported in a UDB will have its own log message. 1523 * 1524 * Note from kbn: In the case where there are multiple UEs 1525 * (ME bit is set) - the AFAR address is only accurate to 1526 * the 16-byte granularity. One cannot tell whether the AFAR 1527 * belongs to the UDBH or UDBL syndromes. In this case, we 1528 * always report the AFAR address to be 16-byte aligned. 1529 * 1530 * If we're on a Sabre, there is no SDBL, but it will always 1531 * read as zero, so the sdbl test below will safely fail. 1532 */ 1533 if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL || isus2i || isus2e) 1534 aflt->flt_panic = 1; 1535 1536 if (sdbh & P_DER_UE) { 1537 aflt->flt_synd = sdbh & P_DER_E_SYND; 1538 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE, 1539 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1540 aflt->flt_panic); 1541 } 1542 if (sdbl & P_DER_UE) { 1543 aflt->flt_synd = sdbl & P_DER_E_SYND; 1544 aflt->flt_synd |= UDBL_REG; /* indicates UDBL */ 1545 if (!(aflt->flt_stat & P_AFSR_ME)) 1546 aflt->flt_addr |= 0x8; 1547 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE, 1548 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1549 aflt->flt_panic); 1550 } 1551 1552 /* 1553 * We got a UE and are panicking, save the fault PA in a known 1554 * location so that the platform specific panic code can check 1555 * for copyback errors. 1556 */ 1557 if (aflt->flt_panic && aflt->flt_in_memory) { 1558 panic_aflt = *aflt; 1559 } 1560 } 1561 1562 /* 1563 * Handle EDP and LDP: Locate the line with bad parity and enqueue an 1564 * async error for logging. For Sabre, we panic on EDP or LDP. 1565 */ 1566 if (t_afsr & (P_AFSR_EDP | P_AFSR_LDP)) { 1567 spf_flt.flt_type = CPU_EDP_LDP_ERR; 1568 1569 if (t_afsr & P_AFSR_EDP) 1570 (void) strcat(pr_reason, "EDP "); 1571 1572 if (t_afsr & P_AFSR_LDP) 1573 (void) strcat(pr_reason, "LDP "); 1574 1575 /* 1576 * Here we have no PA to work with. 1577 * Scan each line in the ecache to look for 1578 * the one with bad parity. 1579 */ 1580 aflt->flt_addr = AFLT_INV_ADDR; 1581 scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0], 1582 &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr); 1583 acc_afsr |= (oafsr & ~P_AFSR_WP); 1584 1585 /* 1586 * If we found a bad PA, update the state to indicate if it is 1587 * memory or I/O space. This code will be important if we ever 1588 * support cacheable frame buffers. 1589 */ 1590 if (aflt->flt_addr != AFLT_INV_ADDR) { 1591 aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >> 1592 MMU_PAGESHIFT)) ? 1 : 0; 1593 } 1594 1595 if (isus2i || isus2e) 1596 aflt->flt_panic = 1; 1597 1598 cpu_errorq_dispatch((t_afsr & P_AFSR_EDP) ? 1599 FM_EREPORT_CPU_USII_EDP : FM_EREPORT_CPU_USII_LDP, 1600 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1601 aflt->flt_panic); 1602 } 1603 1604 /* 1605 * Timeout and bus error handling. There are two cases to consider: 1606 * 1607 * (1) If we are in the kernel protected by ddi_peek or ddi_poke,we 1608 * have already modified the saved registers so that we will return 1609 * from the trap to the appropriate trampoline routine; otherwise panic. 1610 * 1611 * (2) In user mode, we can simply use our AST mechanism to deliver 1612 * a SIGBUS. We do not log the occurence - processes performing 1613 * device control would generate lots of uninteresting messages. 1614 */ 1615 if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) { 1616 if (t_afsr & P_AFSR_TO) 1617 (void) strcat(pr_reason, "BTO "); 1618 1619 if (t_afsr & P_AFSR_BERR) 1620 (void) strcat(pr_reason, "BERR "); 1621 1622 spf_flt.flt_type = CPU_BTO_BERR_ERR; 1623 if (aflt->flt_priv && aflt->flt_prot == AFLT_PROT_NONE) { 1624 cpu_errorq_dispatch((t_afsr & P_AFSR_TO) ? 1625 FM_EREPORT_CPU_USII_TO : FM_EREPORT_CPU_USII_BERR, 1626 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1627 aflt->flt_panic); 1628 } 1629 } 1630 1631 /* 1632 * Handle WP: WP happens when the ecache is victimized and a parity 1633 * error was detected on a writeback. The data in question will be 1634 * poisoned as a UE will be written back. The PA is not logged and 1635 * it is possible that it doesn't belong to the trapped thread. The 1636 * WP trap is not fatal, but it could be fatal to someone that 1637 * subsequently accesses the toxic page. We set read_all_memscrub 1638 * to force the memscrubber to read all of memory when it awakens. 1639 * For Sabre/Hummingbird, WP is fatal because the HW doesn't write a 1640 * UE back to poison the data. 1641 */ 1642 if (t_afsr & P_AFSR_WP) { 1643 (void) strcat(pr_reason, "WP "); 1644 if (isus2i || isus2e) { 1645 aflt->flt_panic = 1; 1646 } else { 1647 read_all_memscrub = 1; 1648 } 1649 spf_flt.flt_type = CPU_WP_ERR; 1650 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_WP, 1651 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1652 aflt->flt_panic); 1653 } 1654 1655 /* 1656 * Handle trapping CP error: In Sabre/Hummingbird, parity error in 1657 * the ecache on a copyout due to a PCI DMA read is signaled as a CP. 1658 * This is fatal. 1659 */ 1660 1661 if (t_afsr & P_AFSR_CP) { 1662 if (isus2i || isus2e) { 1663 (void) strcat(pr_reason, "CP "); 1664 aflt->flt_panic = 1; 1665 spf_flt.flt_type = CPU_TRAPPING_CP_ERR; 1666 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP, 1667 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1668 aflt->flt_panic); 1669 } else { 1670 /* 1671 * Orphan CP: Happens due to signal integrity problem 1672 * on a CPU, where a CP is reported, without reporting 1673 * its associated UE. This is handled by locating the 1674 * bad parity line and would kick off the memscrubber 1675 * to find the UE if in memory or in another's cache. 1676 */ 1677 spf_flt.flt_type = CPU_ORPHAN_CP_ERR; 1678 (void) strcat(pr_reason, "ORPHAN_CP "); 1679 1680 /* 1681 * Here we have no PA to work with. 1682 * Scan each line in the ecache to look for 1683 * the one with bad parity. 1684 */ 1685 aflt->flt_addr = AFLT_INV_ADDR; 1686 scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0], 1687 &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, 1688 &oafsr); 1689 acc_afsr |= oafsr; 1690 1691 /* 1692 * If we found a bad PA, update the state to indicate 1693 * if it is memory or I/O space. 1694 */ 1695 if (aflt->flt_addr != AFLT_INV_ADDR) { 1696 aflt->flt_in_memory = 1697 (pf_is_memory(aflt->flt_addr >> 1698 MMU_PAGESHIFT)) ? 1 : 0; 1699 } 1700 read_all_memscrub = 1; 1701 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP, 1702 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1703 aflt->flt_panic); 1704 1705 } 1706 } 1707 1708 /* 1709 * If we queued an error other than WP or CP and we are going to return 1710 * from the trap and the error was in user mode or inside of a 1711 * copy routine, set AST flag so the queue will be drained before 1712 * returning to user mode. 1713 * 1714 * For UE/LDP/EDP, the AST processing will SIGKILL the process 1715 * and send an event to its process contract. 1716 * 1717 * For BERR/BTO, the AST processing will SIGBUS the process. There 1718 * will have been no error queued in this case. 1719 */ 1720 if ((t_afsr & 1721 (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP | P_AFSR_BERR | P_AFSR_TO)) && 1722 (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY)) { 1723 int pcb_flag = 0; 1724 1725 if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP)) 1726 pcb_flag |= ASYNC_HWERR; 1727 1728 if (t_afsr & P_AFSR_BERR) 1729 pcb_flag |= ASYNC_BERR; 1730 1731 if (t_afsr & P_AFSR_TO) 1732 pcb_flag |= ASYNC_BTO; 1733 1734 ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag; 1735 aston(curthread); 1736 action = ACTION_AST_FLAGS; 1737 } 1738 1739 /* 1740 * In response to a deferred error, we must do one of three things: 1741 * (1) set the AST flags, (2) trampoline, or (3) panic. action is 1742 * set in cases (1) and (2) - check that either action is set or 1743 * (3) is true. 1744 * 1745 * On II, the WP writes poisoned data back to memory, which will 1746 * cause a UE and a panic or reboot when read. In this case, we 1747 * don't need to panic at this time. On IIi and IIe, 1748 * aflt->flt_panic is already set above. 1749 */ 1750 ASSERT((aflt->flt_panic != 0) || (action != ACTION_NONE) || 1751 (t_afsr & P_AFSR_WP)); 1752 1753 /* 1754 * Make a final sanity check to make sure we did not get any more async 1755 * errors and accumulate the afsr. 1756 */ 1757 flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2, 1758 cpunodes[CPU->cpu_id].ecache_linesize); 1759 (void) clear_errors(&spf_flt, NULL); 1760 1761 /* 1762 * Take care of a special case: If there is a UE in the ecache flush 1763 * area, we'll see it in flush_ecache(). This will trigger the 1764 * CPU_ADDITIONAL_ERRORS case below. 1765 * 1766 * This could occur if the original error was a UE in the flush area, 1767 * or if the original error was an E$ error that was flushed out of 1768 * the E$ in scan_ecache(). 1769 * 1770 * If it's at the same address that we're already logging, then it's 1771 * probably one of these cases. Clear the bit so we don't trip over 1772 * it on the additional errors case, which could cause an unnecessary 1773 * panic. 1774 */ 1775 if ((aflt->flt_stat & P_AFSR_UE) && aflt->flt_addr == t_afar) 1776 acc_afsr |= aflt->flt_stat & ~P_AFSR_UE; 1777 else 1778 acc_afsr |= aflt->flt_stat; 1779 1780 /* 1781 * Check the acumulated afsr for the important bits. 1782 * Make sure the spf_flt.flt_type value is set, and 1783 * enque an error. 1784 */ 1785 if (acc_afsr & 1786 (P_AFSR_LEVEL1 | P_AFSR_IVUE | P_AFSR_ETP | P_AFSR_ISAP)) { 1787 if (acc_afsr & (P_AFSR_UE | P_AFSR_EDP | P_AFSR_LDP | 1788 P_AFSR_BERR | P_AFSR_TO | P_AFSR_IVUE | P_AFSR_ETP | 1789 P_AFSR_ISAP)) 1790 aflt->flt_panic = 1; 1791 1792 spf_flt.flt_type = CPU_ADDITIONAL_ERR; 1793 aflt->flt_stat = acc_afsr; 1794 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UNKNOWN, 1795 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1796 aflt->flt_panic); 1797 } 1798 1799 /* 1800 * If aflt->flt_panic is set at this point, we need to panic as the 1801 * result of a trap at TL > 0, or an error we determined to be fatal. 1802 * We've already enqueued the error in one of the if-clauses above, 1803 * and it will be dequeued and logged as part of the panic flow. 1804 */ 1805 if (aflt->flt_panic) { 1806 cpu_aflt_log(CE_PANIC, 1, &spf_flt, CPU_ERRID_FIRST, 1807 "See previous message(s) for details", " %sError(s)", 1808 pr_reason); 1809 } 1810 1811 /* 1812 * Before returning, we must re-enable errors, and 1813 * reset the caches to their boot-up state. 1814 */ 1815 set_lsu(get_lsu() | cache_boot_state); 1816 set_error_enable(EER_ENABLE); 1817 } 1818 1819 /* 1820 * Check for miscellaneous fatal errors and call CE_PANIC if any are seen. 1821 * This routine is shared by the CE and UE handling code. 1822 */ 1823 static void 1824 check_misc_err(spitf_async_flt *spf_flt) 1825 { 1826 struct async_flt *aflt = (struct async_flt *)spf_flt; 1827 char *fatal_str = NULL; 1828 1829 /* 1830 * The ISAP and ETP errors are supposed to cause a POR 1831 * from the system, so in theory we never, ever see these messages. 1832 * ISAP, ETP and IVUE are considered to be fatal. 1833 */ 1834 if (aflt->flt_stat & P_AFSR_ISAP) 1835 fatal_str = " System Address Parity Error on"; 1836 else if (aflt->flt_stat & P_AFSR_ETP) 1837 fatal_str = " Ecache Tag Parity Error on"; 1838 else if (aflt->flt_stat & P_AFSR_IVUE) 1839 fatal_str = " Interrupt Vector Uncorrectable Error on"; 1840 if (fatal_str != NULL) { 1841 cpu_aflt_log(CE_PANIC, 1, spf_flt, CMN_LFLAGS, 1842 NULL, fatal_str); 1843 } 1844 } 1845 1846 /* 1847 * Routine to convert a syndrome into a syndrome code. 1848 */ 1849 static int 1850 synd_to_synd_code(int synd_status, ushort_t synd) 1851 { 1852 if (synd_status != AFLT_STAT_VALID) 1853 return (-1); 1854 1855 /* 1856 * Use the 8-bit syndrome to index the ecc_syndrome_tab 1857 * to get the code indicating which bit(s) is(are) bad. 1858 */ 1859 if ((synd == 0) || (synd >= SYND_TBL_SIZE)) 1860 return (-1); 1861 else 1862 return (ecc_syndrome_tab[synd]); 1863 } 1864 1865 /* ARGSUSED */ 1866 int 1867 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp) 1868 { 1869 return (ENOTSUP); 1870 } 1871 1872 /* ARGSUSED */ 1873 int 1874 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp) 1875 { 1876 return (ENOTSUP); 1877 } 1878 1879 /* ARGSUSED */ 1880 int 1881 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp) 1882 { 1883 return (ENOTSUP); 1884 } 1885 1886 /* 1887 * Routine to return a string identifying the physical name 1888 * associated with a memory/cache error. 1889 */ 1890 /* ARGSUSED */ 1891 int 1892 cpu_get_mem_unum(int synd_status, ushort_t synd, uint64_t afsr, 1893 uint64_t afar, int cpuid, int flt_in_memory, ushort_t flt_status, 1894 char *buf, int buflen, int *lenp) 1895 { 1896 short synd_code; 1897 int ret; 1898 1899 if (flt_in_memory) { 1900 synd_code = synd_to_synd_code(synd_status, synd); 1901 if (synd_code == -1) { 1902 ret = EINVAL; 1903 } else if (prom_get_unum(synd_code, P2ALIGN(afar, 8), 1904 buf, buflen, lenp) != 0) { 1905 ret = EIO; 1906 } else if (*lenp <= 1) { 1907 ret = EINVAL; 1908 } else { 1909 ret = 0; 1910 } 1911 } else { 1912 ret = ENOTSUP; 1913 } 1914 1915 if (ret != 0) { 1916 buf[0] = '\0'; 1917 *lenp = 0; 1918 } 1919 1920 return (ret); 1921 } 1922 1923 /* 1924 * Wrapper for cpu_get_mem_unum() routine that takes an 1925 * async_flt struct rather than explicit arguments. 1926 */ 1927 int 1928 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt, 1929 char *buf, int buflen, int *lenp) 1930 { 1931 return (cpu_get_mem_unum(synd_status, SYND(aflt->flt_synd), 1932 aflt->flt_stat, aflt->flt_addr, aflt->flt_bus_id, 1933 aflt->flt_in_memory, aflt->flt_status, buf, buflen, lenp)); 1934 } 1935 1936 /* 1937 * This routine is a more generic interface to cpu_get_mem_unum(), 1938 * that may be used by other modules (e.g. mm). 1939 */ 1940 int 1941 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar, 1942 char *buf, int buflen, int *lenp) 1943 { 1944 int synd_status, flt_in_memory, ret; 1945 char unum[UNUM_NAMLEN]; 1946 1947 /* 1948 * Check for an invalid address. 1949 */ 1950 if (afar == (uint64_t)-1) 1951 return (ENXIO); 1952 1953 if (synd == (uint64_t)-1) 1954 synd_status = AFLT_STAT_INVALID; 1955 else 1956 synd_status = AFLT_STAT_VALID; 1957 1958 flt_in_memory = (pf_is_memory(afar >> MMU_PAGESHIFT)) ? 1 : 0; 1959 1960 if ((ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar, 1961 CPU->cpu_id, flt_in_memory, 0, unum, UNUM_NAMLEN, lenp)) 1962 != 0) 1963 return (ret); 1964 1965 if (*lenp >= buflen) 1966 return (ENAMETOOLONG); 1967 1968 (void) strncpy(buf, unum, buflen); 1969 1970 return (0); 1971 } 1972 1973 /* 1974 * Routine to return memory information associated 1975 * with a physical address and syndrome. 1976 */ 1977 /* ARGSUSED */ 1978 int 1979 cpu_get_mem_info(uint64_t synd, uint64_t afar, 1980 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 1981 int *segsp, int *banksp, int *mcidp) 1982 { 1983 return (ENOTSUP); 1984 } 1985 1986 /* 1987 * Routine to return a string identifying the physical 1988 * name associated with a cpuid. 1989 */ 1990 /* ARGSUSED */ 1991 int 1992 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp) 1993 { 1994 return (ENOTSUP); 1995 } 1996 1997 /* 1998 * This routine returns the size of the kernel's FRU name buffer. 1999 */ 2000 size_t 2001 cpu_get_name_bufsize() 2002 { 2003 return (UNUM_NAMLEN); 2004 } 2005 2006 /* 2007 * Cpu specific log func for UEs. 2008 */ 2009 static void 2010 log_ue_err(struct async_flt *aflt, char *unum) 2011 { 2012 spitf_async_flt *spf_flt = (spitf_async_flt *)aflt; 2013 int len = 0; 2014 2015 #ifdef DEBUG 2016 int afsr_priv = (aflt->flt_stat & P_AFSR_PRIV) ? 1 : 0; 2017 2018 /* 2019 * Paranoid Check for priv mismatch 2020 * Only applicable for UEs 2021 */ 2022 if (afsr_priv != aflt->flt_priv) { 2023 /* 2024 * The priv bits in %tstate and %afsr did not match; we expect 2025 * this to be very rare, so flag it with a message. 2026 */ 2027 cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST, NULL, 2028 ": PRIV bit in TSTATE and AFSR mismatched; " 2029 "TSTATE.PRIV=%d used", (aflt->flt_priv) ? 1 : 0); 2030 2031 /* update saved afsr to reflect the correct priv */ 2032 aflt->flt_stat &= ~P_AFSR_PRIV; 2033 if (aflt->flt_priv) 2034 aflt->flt_stat |= P_AFSR_PRIV; 2035 } 2036 #endif /* DEBUG */ 2037 2038 (void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt, unum, 2039 UNUM_NAMLEN, &len); 2040 2041 cpu_aflt_log(CE_WARN, 1, spf_flt, UE_LFLAGS, unum, 2042 " Uncorrectable Memory Error on"); 2043 2044 if (SYND(aflt->flt_synd) == 0x3) { 2045 cpu_aflt_log(CE_WARN, 1, spf_flt, CPU_ERRID_FIRST, NULL, 2046 " Syndrome 0x3 indicates that this may not be a " 2047 "memory module problem"); 2048 } 2049 2050 if (aflt->flt_in_memory) 2051 cpu_log_ecmem_info(spf_flt); 2052 } 2053 2054 2055 /* 2056 * The cpu_async_log_err() function is called via the ue_drain() function to 2057 * handle logging for CPU events that are dequeued. As such, it can be invoked 2058 * from softint context, from AST processing in the trap() flow, or from the 2059 * panic flow. We decode the CPU-specific data, and log appropriate messages. 2060 */ 2061 static void 2062 cpu_async_log_err(void *flt) 2063 { 2064 spitf_async_flt *spf_flt = (spitf_async_flt *)flt; 2065 struct async_flt *aflt = (struct async_flt *)flt; 2066 char unum[UNUM_NAMLEN]; 2067 char *space; 2068 char *ecache_scrub_logstr = NULL; 2069 2070 switch (spf_flt->flt_type) { 2071 case CPU_UE_ERR: 2072 /* 2073 * We want to skip logging only if ALL the following 2074 * conditions are true: 2075 * 2076 * 1. We are not panicking 2077 * 2. There is only one error 2078 * 3. That error is a memory error 2079 * 4. The error is caused by the memory scrubber (in 2080 * which case the error will have occurred under 2081 * on_trap protection) 2082 * 5. The error is on a retired page 2083 * 2084 * Note 1: AFLT_PROT_EC is used places other than the memory 2085 * scrubber. However, none of those errors should occur 2086 * on a retired page. 2087 * 2088 * Note 2: In the CE case, these errors are discarded before 2089 * the errorq. In the UE case, we must wait until now -- 2090 * softcall() grabs a mutex, which we can't do at a high PIL. 2091 */ 2092 if (!panicstr && 2093 (aflt->flt_stat & S_AFSR_ALL_ERRS) == P_AFSR_UE && 2094 aflt->flt_prot == AFLT_PROT_EC) { 2095 if (page_retire_check(aflt->flt_addr, NULL) == 0) { 2096 /* Zero the address to clear the error */ 2097 softcall(ecc_page_zero, (void *)aflt->flt_addr); 2098 return; 2099 } 2100 } 2101 2102 /* 2103 * Log the UE and check for causes of this UE error that 2104 * don't cause a trap (Copyback error). cpu_async_error() 2105 * has already checked the i/o buses for us. 2106 */ 2107 log_ue_err(aflt, unum); 2108 if (aflt->flt_in_memory) 2109 cpu_check_allcpus(aflt); 2110 break; 2111 2112 case CPU_EDP_LDP_ERR: 2113 if (aflt->flt_stat & P_AFSR_EDP) 2114 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, 2115 NULL, " EDP event on"); 2116 2117 if (aflt->flt_stat & P_AFSR_LDP) 2118 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, 2119 NULL, " LDP event on"); 2120 2121 /* Log ecache info if exist */ 2122 if (spf_flt->flt_ec_lcnt > 0) { 2123 cpu_log_ecmem_info(spf_flt); 2124 2125 cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, 2126 NULL, " AFAR was derived from E$Tag"); 2127 } else { 2128 cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, 2129 NULL, " No error found in ecache (No fault " 2130 "PA available)"); 2131 } 2132 break; 2133 2134 case CPU_WP_ERR: 2135 /* 2136 * If the memscrub thread hasn't yet read 2137 * all of memory, as we requested in the 2138 * trap handler, then give it a kick to 2139 * make sure it does. 2140 */ 2141 if (!isus2i && !isus2e && read_all_memscrub) 2142 memscrub_run(); 2143 2144 cpu_aflt_log(CE_WARN, 1, spf_flt, WP_LFLAGS, NULL, 2145 " WP event on"); 2146 return; 2147 2148 case CPU_BTO_BERR_ERR: 2149 /* 2150 * A bus timeout or error occurred that was in user mode or not 2151 * in a protected kernel code region. 2152 */ 2153 if (aflt->flt_stat & P_AFSR_BERR) { 2154 cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2, 2155 spf_flt, BERRTO_LFLAGS, NULL, 2156 " Bus Error on System Bus in %s mode from", 2157 aflt->flt_priv ? "privileged" : "user"); 2158 } 2159 2160 if (aflt->flt_stat & P_AFSR_TO) { 2161 cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2, 2162 spf_flt, BERRTO_LFLAGS, NULL, 2163 " Timeout on System Bus in %s mode from", 2164 aflt->flt_priv ? "privileged" : "user"); 2165 } 2166 2167 return; 2168 2169 case CPU_PANIC_CP_ERR: 2170 /* 2171 * Process the Copyback (CP) error info (if any) obtained from 2172 * polling all the cpus in the panic flow. This case is only 2173 * entered if we are panicking. 2174 */ 2175 ASSERT(panicstr != NULL); 2176 ASSERT(aflt->flt_id == panic_aflt.flt_id); 2177 2178 /* See which space - this info may not exist */ 2179 if (panic_aflt.flt_status & ECC_D_TRAP) 2180 space = "Data "; 2181 else if (panic_aflt.flt_status & ECC_I_TRAP) 2182 space = "Instruction "; 2183 else 2184 space = ""; 2185 2186 cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL, 2187 " AFAR was derived from UE report," 2188 " CP event on CPU%d (caused %saccess error on %s%d)", 2189 aflt->flt_inst, space, (panic_aflt.flt_status & ECC_IOBUS) ? 2190 "IOBUS" : "CPU", panic_aflt.flt_bus_id); 2191 2192 if (spf_flt->flt_ec_lcnt > 0) 2193 cpu_log_ecmem_info(spf_flt); 2194 else 2195 cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST, 2196 NULL, " No cache dump available"); 2197 2198 return; 2199 2200 case CPU_TRAPPING_CP_ERR: 2201 /* 2202 * For sabre only. This is a copyback ecache parity error due 2203 * to a PCI DMA read. We should be panicking if we get here. 2204 */ 2205 ASSERT(panicstr != NULL); 2206 cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL, 2207 " AFAR was derived from UE report," 2208 " CP event on CPU%d (caused Data access error " 2209 "on PCIBus)", aflt->flt_inst); 2210 return; 2211 2212 /* 2213 * We log the ecache lines of the following states, 2214 * clean_bad_idle, clean_bad_busy, dirty_bad_idle and 2215 * dirty_bad_busy if ecache_scrub_verbose is set and panic 2216 * in addition to logging if ecache_scrub_panic is set. 2217 */ 2218 case CPU_BADLINE_CI_ERR: 2219 ecache_scrub_logstr = "CBI"; 2220 /* FALLTHRU */ 2221 2222 case CPU_BADLINE_CB_ERR: 2223 if (ecache_scrub_logstr == NULL) 2224 ecache_scrub_logstr = "CBB"; 2225 /* FALLTHRU */ 2226 2227 case CPU_BADLINE_DI_ERR: 2228 if (ecache_scrub_logstr == NULL) 2229 ecache_scrub_logstr = "DBI"; 2230 /* FALLTHRU */ 2231 2232 case CPU_BADLINE_DB_ERR: 2233 if (ecache_scrub_logstr == NULL) 2234 ecache_scrub_logstr = "DBB"; 2235 2236 cpu_aflt_log(CE_NOTE, 2, spf_flt, 2237 (CPU_ERRID_FIRST | CPU_FLTCPU), NULL, 2238 " %s event on", ecache_scrub_logstr); 2239 cpu_log_ecmem_info(spf_flt); 2240 2241 return; 2242 2243 case CPU_ORPHAN_CP_ERR: 2244 /* 2245 * Orphan CPs, where the CP bit is set, but when a CPU 2246 * doesn't report a UE. 2247 */ 2248 if (read_all_memscrub) 2249 memscrub_run(); 2250 2251 cpu_aflt_log(CE_NOTE, 2, spf_flt, (CP_LFLAGS | CPU_FLTCPU), 2252 NULL, " Orphan CP event on"); 2253 2254 /* Log ecache info if exist */ 2255 if (spf_flt->flt_ec_lcnt > 0) 2256 cpu_log_ecmem_info(spf_flt); 2257 else 2258 cpu_aflt_log(CE_NOTE, 2, spf_flt, 2259 (CP_LFLAGS | CPU_FLTCPU), NULL, 2260 " No error found in ecache (No fault " 2261 "PA available"); 2262 return; 2263 2264 case CPU_ECACHE_ADDR_PAR_ERR: 2265 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 2266 " E$ Tag Address Parity error on"); 2267 cpu_log_ecmem_info(spf_flt); 2268 return; 2269 2270 case CPU_ECACHE_STATE_ERR: 2271 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 2272 " E$ Tag State Parity error on"); 2273 cpu_log_ecmem_info(spf_flt); 2274 return; 2275 2276 case CPU_ECACHE_TAG_ERR: 2277 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 2278 " E$ Tag scrub event on"); 2279 cpu_log_ecmem_info(spf_flt); 2280 return; 2281 2282 case CPU_ECACHE_ETP_ETS_ERR: 2283 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 2284 " AFSR.ETP is set and AFSR.ETS is zero on"); 2285 cpu_log_ecmem_info(spf_flt); 2286 return; 2287 2288 2289 case CPU_ADDITIONAL_ERR: 2290 cpu_aflt_log(CE_WARN, 1, spf_flt, CMN_LFLAGS & ~CPU_SPACE, NULL, 2291 " Additional errors detected during error processing on"); 2292 return; 2293 2294 default: 2295 cmn_err(CE_WARN, "cpu_async_log_err: fault %p has unknown " 2296 "fault type %x", (void *)spf_flt, spf_flt->flt_type); 2297 return; 2298 } 2299 2300 /* ... fall through from the UE, EDP, or LDP cases */ 2301 2302 if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) { 2303 if (!panicstr) { 2304 (void) page_retire(aflt->flt_addr, PR_UE); 2305 } else { 2306 /* 2307 * Clear UEs on panic so that we don't 2308 * get haunted by them during panic or 2309 * after reboot 2310 */ 2311 clearphys(P2ALIGN(aflt->flt_addr, 64), 2312 cpunodes[CPU->cpu_id].ecache_size, 2313 cpunodes[CPU->cpu_id].ecache_linesize); 2314 2315 (void) clear_errors(NULL, NULL); 2316 } 2317 } 2318 2319 /* 2320 * Log final recover message 2321 */ 2322 if (!panicstr) { 2323 if (!aflt->flt_priv) { 2324 cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, 2325 NULL, " Above Error is in User Mode" 2326 "\n and is fatal: " 2327 "will SIGKILL process and notify contract"); 2328 } else if (aflt->flt_prot == AFLT_PROT_COPY && aflt->flt_core) { 2329 cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, 2330 NULL, " Above Error detected while dumping core;" 2331 "\n core file will be truncated"); 2332 } else if (aflt->flt_prot == AFLT_PROT_COPY) { 2333 cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, 2334 NULL, " Above Error is due to Kernel access" 2335 "\n to User space and is fatal: " 2336 "will SIGKILL process and notify contract"); 2337 } else if (aflt->flt_prot == AFLT_PROT_EC) { 2338 cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, NULL, 2339 " Above Error detected by protected Kernel code" 2340 "\n that will try to clear error from system"); 2341 } 2342 } 2343 } 2344 2345 2346 /* 2347 * Check all cpus for non-trapping UE-causing errors 2348 * In Ultra I/II, we look for copyback errors (CPs) 2349 */ 2350 void 2351 cpu_check_allcpus(struct async_flt *aflt) 2352 { 2353 spitf_async_flt cp; 2354 spitf_async_flt *spf_cpflt = &cp; 2355 struct async_flt *cpflt = (struct async_flt *)&cp; 2356 int pix; 2357 2358 cpflt->flt_id = aflt->flt_id; 2359 cpflt->flt_addr = aflt->flt_addr; 2360 2361 for (pix = 0; pix < NCPU; pix++) { 2362 if (CPU_XCALL_READY(pix)) { 2363 xc_one(pix, (xcfunc_t *)get_cpu_status, 2364 (uint64_t)cpflt, 0); 2365 2366 if (cpflt->flt_stat & P_AFSR_CP) { 2367 char *space; 2368 2369 /* See which space - this info may not exist */ 2370 if (aflt->flt_status & ECC_D_TRAP) 2371 space = "Data "; 2372 else if (aflt->flt_status & ECC_I_TRAP) 2373 space = "Instruction "; 2374 else 2375 space = ""; 2376 2377 cpu_aflt_log(CE_WARN, 1, spf_cpflt, CP_LFLAGS, 2378 NULL, " AFAR was derived from UE report," 2379 " CP event on CPU%d (caused %saccess " 2380 "error on %s%d)", pix, space, 2381 (aflt->flt_status & ECC_IOBUS) ? 2382 "IOBUS" : "CPU", aflt->flt_bus_id); 2383 2384 if (spf_cpflt->flt_ec_lcnt > 0) 2385 cpu_log_ecmem_info(spf_cpflt); 2386 else 2387 cpu_aflt_log(CE_WARN, 2, spf_cpflt, 2388 CPU_ERRID_FIRST, NULL, 2389 " No cache dump available"); 2390 } 2391 } 2392 } 2393 } 2394 2395 #ifdef DEBUG 2396 int test_mp_cp = 0; 2397 #endif 2398 2399 /* 2400 * Cross-call callback routine to tell a CPU to read its own %afsr to check 2401 * for copyback errors and capture relevant information. 2402 */ 2403 static uint_t 2404 get_cpu_status(uint64_t arg) 2405 { 2406 struct async_flt *aflt = (struct async_flt *)arg; 2407 spitf_async_flt *spf_flt = (spitf_async_flt *)arg; 2408 uint64_t afsr; 2409 uint32_t ec_idx; 2410 uint64_t sdbh, sdbl; 2411 int i; 2412 uint32_t ec_set_size; 2413 uchar_t valid; 2414 ec_data_t ec_data[8]; 2415 uint64_t ec_tag, flt_addr_tag, oafsr; 2416 uint64_t *acc_afsr = NULL; 2417 2418 get_asyncflt(&afsr); 2419 if (CPU_PRIVATE(CPU) != NULL) { 2420 acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 2421 afsr |= *acc_afsr; 2422 *acc_afsr = 0; 2423 } 2424 2425 #ifdef DEBUG 2426 if (test_mp_cp) 2427 afsr |= P_AFSR_CP; 2428 #endif 2429 aflt->flt_stat = afsr; 2430 2431 if (afsr & P_AFSR_CP) { 2432 /* 2433 * Capture the UDBs 2434 */ 2435 get_udb_errors(&sdbh, &sdbl); 2436 spf_flt->flt_sdbh = (ushort_t)(sdbh & 0x3FF); 2437 spf_flt->flt_sdbl = (ushort_t)(sdbl & 0x3FF); 2438 2439 /* 2440 * Clear CP bit before capturing ecache data 2441 * and AFSR info. 2442 */ 2443 set_asyncflt(P_AFSR_CP); 2444 2445 /* 2446 * See if we can capture the ecache line for the 2447 * fault PA. 2448 * 2449 * Return a valid matching ecache line, if any. 2450 * Otherwise, return the first matching ecache 2451 * line marked invalid. 2452 */ 2453 flt_addr_tag = aflt->flt_addr >> cpu_ec_tag_shift; 2454 ec_set_size = cpunodes[CPU->cpu_id].ecache_size / 2455 ecache_associativity; 2456 spf_flt->flt_ec_lcnt = 0; 2457 2458 for (i = 0, ec_idx = (aflt->flt_addr % ec_set_size); 2459 i < ecache_associativity; i++, ec_idx += ec_set_size) { 2460 get_ecache_dtag(P2ALIGN(ec_idx, 64), 2461 (uint64_t *)&ec_data[0], &ec_tag, &oafsr, 2462 acc_afsr); 2463 2464 if ((ec_tag & cpu_ec_tag_mask) != flt_addr_tag) 2465 continue; 2466 2467 valid = cpu_ec_state_valid & 2468 (uchar_t)((ec_tag & cpu_ec_state_mask) >> 2469 cpu_ec_state_shift); 2470 2471 if (valid || spf_flt->flt_ec_lcnt == 0) { 2472 spf_flt->flt_ec_tag = ec_tag; 2473 bcopy(&ec_data, &spf_flt->flt_ec_data, 2474 sizeof (ec_data)); 2475 spf_flt->flt_ec_lcnt = 1; 2476 2477 if (valid) 2478 break; 2479 } 2480 } 2481 } 2482 return (0); 2483 } 2484 2485 /* 2486 * CPU-module callback for the non-panicking CPUs. This routine is invoked 2487 * from panic_idle() as part of the other CPUs stopping themselves when a 2488 * panic occurs. We need to be VERY careful what we do here, since panicstr 2489 * is NOT set yet and we cannot blow through locks. If panic_aflt is set 2490 * (panic_aflt.flt_id is non-zero), we need to read our %afsr to look for 2491 * CP error information. 2492 */ 2493 void 2494 cpu_async_panic_callb(void) 2495 { 2496 spitf_async_flt cp; 2497 struct async_flt *aflt = (struct async_flt *)&cp; 2498 uint64_t *scrub_afsr; 2499 2500 if (panic_aflt.flt_id != 0) { 2501 aflt->flt_addr = panic_aflt.flt_addr; 2502 (void) get_cpu_status((uint64_t)aflt); 2503 2504 if (CPU_PRIVATE(CPU) != NULL) { 2505 scrub_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 2506 if (*scrub_afsr & P_AFSR_CP) { 2507 aflt->flt_stat |= *scrub_afsr; 2508 *scrub_afsr = 0; 2509 } 2510 } 2511 if (aflt->flt_stat & P_AFSR_CP) { 2512 aflt->flt_id = panic_aflt.flt_id; 2513 aflt->flt_panic = 1; 2514 aflt->flt_inst = CPU->cpu_id; 2515 aflt->flt_class = CPU_FAULT; 2516 cp.flt_type = CPU_PANIC_CP_ERR; 2517 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP, 2518 (void *)&cp, sizeof (cp), ue_queue, 2519 aflt->flt_panic); 2520 } 2521 } 2522 } 2523 2524 /* 2525 * Turn off all cpu error detection, normally only used for panics. 2526 */ 2527 void 2528 cpu_disable_errors(void) 2529 { 2530 xt_all(set_error_enable_tl1, EER_DISABLE, EER_SET_ABSOLUTE); 2531 } 2532 2533 /* 2534 * Enable errors. 2535 */ 2536 void 2537 cpu_enable_errors(void) 2538 { 2539 xt_all(set_error_enable_tl1, EER_ENABLE, EER_SET_ABSOLUTE); 2540 } 2541 2542 static void 2543 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err) 2544 { 2545 uint64_t aligned_addr = P2ALIGN(ecc->flt_addr, 8); 2546 int i, loop = 1; 2547 ushort_t ecc_0; 2548 uint64_t paddr; 2549 uint64_t data; 2550 2551 if (verbose) 2552 loop = 8; 2553 for (i = 0; i < loop; i++) { 2554 paddr = aligned_addr + (i * 8); 2555 data = lddphys(paddr); 2556 if (verbose) { 2557 if (ce_err) { 2558 ecc_0 = ecc_gen((uint32_t)(data>>32), 2559 (uint32_t)data); 2560 cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS, 2561 NULL, " Paddr 0x%" PRIx64 ", " 2562 "Data 0x%08x.%08x, ECC 0x%x", paddr, 2563 (uint32_t)(data>>32), (uint32_t)data, ecc_0); 2564 } else { 2565 cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS, 2566 NULL, " Paddr 0x%" PRIx64 ", " 2567 "Data 0x%08x.%08x", paddr, 2568 (uint32_t)(data>>32), (uint32_t)data); 2569 } 2570 } 2571 } 2572 } 2573 2574 static struct { /* sec-ded-s4ed ecc code */ 2575 uint_t hi, lo; 2576 } ecc_code[8] = { 2577 { 0xee55de23U, 0x16161161U }, 2578 { 0x55eede93U, 0x61612212U }, 2579 { 0xbb557b8cU, 0x49494494U }, 2580 { 0x55bb7b6cU, 0x94948848U }, 2581 { 0x16161161U, 0xee55de23U }, 2582 { 0x61612212U, 0x55eede93U }, 2583 { 0x49494494U, 0xbb557b8cU }, 2584 { 0x94948848U, 0x55bb7b6cU } 2585 }; 2586 2587 static ushort_t 2588 ecc_gen(uint_t high_bytes, uint_t low_bytes) 2589 { 2590 int i, j; 2591 uchar_t checker, bit_mask; 2592 struct { 2593 uint_t hi, lo; 2594 } hex_data, masked_data[8]; 2595 2596 hex_data.hi = high_bytes; 2597 hex_data.lo = low_bytes; 2598 2599 /* mask out bits according to sec-ded-s4ed ecc code */ 2600 for (i = 0; i < 8; i++) { 2601 masked_data[i].hi = hex_data.hi & ecc_code[i].hi; 2602 masked_data[i].lo = hex_data.lo & ecc_code[i].lo; 2603 } 2604 2605 /* 2606 * xor all bits in masked_data[i] to get bit_i of checker, 2607 * where i = 0 to 7 2608 */ 2609 checker = 0; 2610 for (i = 0; i < 8; i++) { 2611 bit_mask = 1 << i; 2612 for (j = 0; j < 32; j++) { 2613 if (masked_data[i].lo & 1) checker ^= bit_mask; 2614 if (masked_data[i].hi & 1) checker ^= bit_mask; 2615 masked_data[i].hi >>= 1; 2616 masked_data[i].lo >>= 1; 2617 } 2618 } 2619 return (checker); 2620 } 2621 2622 /* 2623 * Flush the entire ecache using displacement flush by reading through a 2624 * physical address range as large as the ecache. 2625 */ 2626 void 2627 cpu_flush_ecache(void) 2628 { 2629 flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2, 2630 cpunodes[CPU->cpu_id].ecache_linesize); 2631 } 2632 2633 /* 2634 * read and display the data in the cache line where the 2635 * original ce error occurred. 2636 * This routine is mainly used for debugging new hardware. 2637 */ 2638 void 2639 read_ecc_data(struct async_flt *ecc, short verbose, short ce_err) 2640 { 2641 kpreempt_disable(); 2642 /* disable ECC error traps */ 2643 set_error_enable(EER_ECC_DISABLE); 2644 2645 /* 2646 * flush the ecache 2647 * read the data 2648 * check to see if an ECC error occured 2649 */ 2650 flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2, 2651 cpunodes[CPU->cpu_id].ecache_linesize); 2652 set_lsu(get_lsu() | cache_boot_state); 2653 cpu_read_paddr(ecc, verbose, ce_err); 2654 (void) check_ecc(ecc); 2655 2656 /* enable ECC error traps */ 2657 set_error_enable(EER_ENABLE); 2658 kpreempt_enable(); 2659 } 2660 2661 /* 2662 * Check the AFSR bits for UE/CE persistence. 2663 * If UE or CE errors are detected, the routine will 2664 * clears all the AFSR sticky bits (except CP for 2665 * spitfire/blackbird) and the UDBs. 2666 * if ce_debug or ue_debug is set, log any ue/ce errors detected. 2667 */ 2668 static int 2669 check_ecc(struct async_flt *ecc) 2670 { 2671 uint64_t t_afsr; 2672 uint64_t t_afar; 2673 uint64_t udbh; 2674 uint64_t udbl; 2675 ushort_t udb; 2676 int persistent = 0; 2677 2678 /* 2679 * Capture the AFSR, AFAR and UDBs info 2680 */ 2681 get_asyncflt(&t_afsr); 2682 get_asyncaddr(&t_afar); 2683 t_afar &= SABRE_AFAR_PA; 2684 get_udb_errors(&udbh, &udbl); 2685 2686 if ((t_afsr & P_AFSR_UE) || (t_afsr & P_AFSR_CE)) { 2687 /* 2688 * Clear the errors 2689 */ 2690 clr_datapath(); 2691 2692 if (isus2i || isus2e) 2693 set_asyncflt(t_afsr); 2694 else 2695 set_asyncflt(t_afsr & ~P_AFSR_CP); 2696 2697 /* 2698 * determine whether to check UDBH or UDBL for persistence 2699 */ 2700 if (ecc->flt_synd & UDBL_REG) { 2701 udb = (ushort_t)udbl; 2702 t_afar |= 0x8; 2703 } else { 2704 udb = (ushort_t)udbh; 2705 } 2706 2707 if (ce_debug || ue_debug) { 2708 spitf_async_flt spf_flt; /* for logging */ 2709 struct async_flt *aflt = 2710 (struct async_flt *)&spf_flt; 2711 2712 /* Package the info nicely in the spf_flt struct */ 2713 bzero(&spf_flt, sizeof (spitf_async_flt)); 2714 aflt->flt_stat = t_afsr; 2715 aflt->flt_addr = t_afar; 2716 spf_flt.flt_sdbh = (ushort_t)(udbh & 0x3FF); 2717 spf_flt.flt_sdbl = (ushort_t)(udbl & 0x3FF); 2718 2719 cpu_aflt_log(CE_CONT, 0, &spf_flt, (CPU_AFSR | 2720 CPU_AFAR | CPU_UDBH | CPU_UDBL), NULL, 2721 " check_ecc: Dumping captured error states ..."); 2722 } 2723 2724 /* 2725 * if the fault addresses don't match, not persistent 2726 */ 2727 if (t_afar != ecc->flt_addr) { 2728 return (persistent); 2729 } 2730 2731 /* 2732 * check for UE persistence 2733 * since all DIMMs in the bank are identified for a UE, 2734 * there's no reason to check the syndrome 2735 */ 2736 if ((ecc->flt_stat & P_AFSR_UE) && (t_afsr & P_AFSR_UE)) { 2737 persistent = 1; 2738 } 2739 2740 /* 2741 * check for CE persistence 2742 */ 2743 if ((ecc->flt_stat & P_AFSR_CE) && (t_afsr & P_AFSR_CE)) { 2744 if ((udb & P_DER_E_SYND) == 2745 (ecc->flt_synd & P_DER_E_SYND)) { 2746 persistent = 1; 2747 } 2748 } 2749 } 2750 return (persistent); 2751 } 2752 2753 #ifdef HUMMINGBIRD 2754 #define HB_FULL_DIV 1 2755 #define HB_HALF_DIV 2 2756 #define HB_LOWEST_DIV 8 2757 #define HB_ECLK_INVALID 0xdeadbad 2758 static uint64_t hb_eclk[HB_LOWEST_DIV + 1] = { 2759 HB_ECLK_INVALID, HB_ECLK_1, HB_ECLK_2, HB_ECLK_INVALID, 2760 HB_ECLK_4, HB_ECLK_INVALID, HB_ECLK_6, HB_ECLK_INVALID, 2761 HB_ECLK_8 }; 2762 2763 #define HB_SLOW_DOWN 0 2764 #define HB_SPEED_UP 1 2765 2766 #define SET_ESTAR_MODE(mode) \ 2767 stdphysio(HB_ESTAR_MODE, (mode)); \ 2768 /* \ 2769 * PLL logic requires minimum of 16 clock \ 2770 * cycles to lock to the new clock speed. \ 2771 * Wait 1 usec to satisfy this requirement. \ 2772 */ \ 2773 drv_usecwait(1); 2774 2775 #define CHANGE_REFRESH_COUNT(direction, cur_div, new_div) \ 2776 { \ 2777 volatile uint64_t data; \ 2778 uint64_t count, new_count; \ 2779 clock_t delay; \ 2780 data = lddphysio(HB_MEM_CNTRL0); \ 2781 count = (data & HB_REFRESH_COUNT_MASK) >> \ 2782 HB_REFRESH_COUNT_SHIFT; \ 2783 new_count = (HB_REFRESH_INTERVAL * \ 2784 cpunodes[CPU->cpu_id].clock_freq) / \ 2785 (HB_REFRESH_CLOCKS_PER_COUNT * (new_div) * NANOSEC);\ 2786 data = (data & ~HB_REFRESH_COUNT_MASK) | \ 2787 (new_count << HB_REFRESH_COUNT_SHIFT); \ 2788 stdphysio(HB_MEM_CNTRL0, data); \ 2789 data = lddphysio(HB_MEM_CNTRL0); \ 2790 /* \ 2791 * If we are slowing down the cpu and Memory \ 2792 * Self Refresh is not enabled, it is required \ 2793 * to wait for old refresh count to count-down and \ 2794 * new refresh count to go into effect (let new value \ 2795 * counts down once). \ 2796 */ \ 2797 if ((direction) == HB_SLOW_DOWN && \ 2798 (data & HB_SELF_REFRESH_MASK) == 0) { \ 2799 /* \ 2800 * Each count takes 64 cpu clock cycles \ 2801 * to decrement. Wait for current refresh \ 2802 * count plus new refresh count at current \ 2803 * cpu speed to count down to zero. Round \ 2804 * up the delay time. \ 2805 */ \ 2806 delay = ((HB_REFRESH_CLOCKS_PER_COUNT * \ 2807 (count + new_count) * MICROSEC * (cur_div)) /\ 2808 cpunodes[CPU->cpu_id].clock_freq) + 1; \ 2809 drv_usecwait(delay); \ 2810 } \ 2811 } 2812 2813 #define SET_SELF_REFRESH(bit) \ 2814 { \ 2815 volatile uint64_t data; \ 2816 data = lddphysio(HB_MEM_CNTRL0); \ 2817 data = (data & ~HB_SELF_REFRESH_MASK) | \ 2818 ((bit) << HB_SELF_REFRESH_SHIFT); \ 2819 stdphysio(HB_MEM_CNTRL0, data); \ 2820 data = lddphysio(HB_MEM_CNTRL0); \ 2821 } 2822 #endif /* HUMMINGBIRD */ 2823 2824 /* ARGSUSED */ 2825 void 2826 cpu_change_speed(uint64_t new_divisor, uint64_t arg2) 2827 { 2828 #ifdef HUMMINGBIRD 2829 uint64_t cur_mask, cur_divisor = 0; 2830 volatile uint64_t reg; 2831 int index; 2832 2833 if ((new_divisor < HB_FULL_DIV || new_divisor > HB_LOWEST_DIV) || 2834 (hb_eclk[new_divisor] == HB_ECLK_INVALID)) { 2835 cmn_err(CE_WARN, "cpu_change_speed: bad divisor 0x%lx", 2836 new_divisor); 2837 return; 2838 } 2839 2840 reg = lddphysio(HB_ESTAR_MODE); 2841 cur_mask = reg & HB_ECLK_MASK; 2842 for (index = HB_FULL_DIV; index <= HB_LOWEST_DIV; index++) { 2843 if (hb_eclk[index] == cur_mask) { 2844 cur_divisor = index; 2845 break; 2846 } 2847 } 2848 2849 if (cur_divisor == 0) 2850 cmn_err(CE_PANIC, "cpu_change_speed: current divisor " 2851 "can't be determined!"); 2852 2853 /* 2854 * If we are already at the requested divisor speed, just 2855 * return. 2856 */ 2857 if (cur_divisor == new_divisor) 2858 return; 2859 2860 if (cur_divisor == HB_FULL_DIV && new_divisor == HB_HALF_DIV) { 2861 CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor); 2862 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2863 SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE); 2864 2865 } else if (cur_divisor == HB_HALF_DIV && new_divisor == HB_FULL_DIV) { 2866 SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE); 2867 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2868 /* LINTED: E_FALSE_LOGICAL_EXPR */ 2869 CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor); 2870 2871 } else if (cur_divisor == HB_FULL_DIV && new_divisor > HB_HALF_DIV) { 2872 /* 2873 * Transition to 1/2 speed first, then to 2874 * lower speed. 2875 */ 2876 CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, HB_HALF_DIV); 2877 SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]); 2878 SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE); 2879 2880 CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, HB_HALF_DIV, new_divisor); 2881 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2882 2883 } else if (cur_divisor > HB_HALF_DIV && new_divisor == HB_FULL_DIV) { 2884 /* 2885 * Transition to 1/2 speed first, then to 2886 * full speed. 2887 */ 2888 SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]); 2889 /* LINTED: E_FALSE_LOGICAL_EXPR */ 2890 CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, HB_HALF_DIV); 2891 2892 SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE); 2893 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2894 /* LINTED: E_FALSE_LOGICAL_EXPR */ 2895 CHANGE_REFRESH_COUNT(HB_SPEED_UP, HB_HALF_DIV, new_divisor); 2896 2897 } else if (cur_divisor < new_divisor) { 2898 CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor); 2899 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2900 2901 } else if (cur_divisor > new_divisor) { 2902 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2903 /* LINTED: E_FALSE_LOGICAL_EXPR */ 2904 CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor); 2905 } 2906 CPU->cpu_m.divisor = (uchar_t)new_divisor; 2907 #endif 2908 } 2909 2910 /* 2911 * Clear the AFSR sticky bits and the UDBs. For Sabre/Spitfire/Blackbird, 2912 * we clear all the sticky bits. If a non-null pointer to a async fault 2913 * structure argument is passed in, the captured error state (AFSR, AFAR, UDBs) 2914 * info will be returned in the structure. If a non-null pointer to a 2915 * uint64_t is passed in, this will be updated if the CP bit is set in the 2916 * AFSR. The afsr will be returned. 2917 */ 2918 static uint64_t 2919 clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr) 2920 { 2921 struct async_flt *aflt = (struct async_flt *)spf_flt; 2922 uint64_t afsr; 2923 uint64_t udbh, udbl; 2924 2925 get_asyncflt(&afsr); 2926 2927 if ((acc_afsr != NULL) && (afsr & P_AFSR_CP)) 2928 *acc_afsr |= afsr; 2929 2930 if (spf_flt != NULL) { 2931 aflt->flt_stat = afsr; 2932 get_asyncaddr(&aflt->flt_addr); 2933 aflt->flt_addr &= SABRE_AFAR_PA; 2934 2935 get_udb_errors(&udbh, &udbl); 2936 spf_flt->flt_sdbh = (ushort_t)(udbh & 0x3FF); 2937 spf_flt->flt_sdbl = (ushort_t)(udbl & 0x3FF); 2938 } 2939 2940 set_asyncflt(afsr); /* clear afsr */ 2941 clr_datapath(); /* clear udbs */ 2942 return (afsr); 2943 } 2944 2945 /* 2946 * Scan the ecache to look for bad lines. If found, the afsr, afar, e$ data 2947 * tag of the first bad line will be returned. We also return the old-afsr 2948 * (before clearing the sticky bits). The linecnt data will be updated to 2949 * indicate the number of bad lines detected. 2950 */ 2951 static void 2952 scan_ecache(uint64_t *t_afar, ec_data_t *ecache_data, 2953 uint64_t *ecache_tag, int *linecnt, uint64_t *t_afsr) 2954 { 2955 ec_data_t t_ecdata[8]; 2956 uint64_t t_etag, oafsr; 2957 uint64_t pa = AFLT_INV_ADDR; 2958 uint32_t i, j, ecache_sz; 2959 uint64_t acc_afsr = 0; 2960 uint64_t *cpu_afsr = NULL; 2961 2962 if (CPU_PRIVATE(CPU) != NULL) 2963 cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 2964 2965 *linecnt = 0; 2966 ecache_sz = cpunodes[CPU->cpu_id].ecache_size; 2967 2968 for (i = 0; i < ecache_sz; i += 64) { 2969 get_ecache_dtag(i, (uint64_t *)&t_ecdata[0], &t_etag, &oafsr, 2970 cpu_afsr); 2971 acc_afsr |= oafsr; 2972 2973 /* 2974 * Scan through the whole 64 bytes line in 8 8-byte chunks 2975 * looking for the first occurrence of an EDP error. The AFSR 2976 * info is captured for each 8-byte chunk. Note that for 2977 * Spitfire/Blackbird, the AFSR.PSYND is captured by h/w in 2978 * 16-byte chunk granularity (i.e. the AFSR will be the same 2979 * for the high and low 8-byte words within the 16-byte chunk). 2980 * For Sabre/Hummingbird, the AFSR.PSYND is captured in 8-byte 2981 * granularity and only PSYND bits [7:0] are used. 2982 */ 2983 for (j = 0; j < 8; j++) { 2984 ec_data_t *ecdptr = &t_ecdata[j]; 2985 2986 if (ecdptr->ec_afsr & P_AFSR_EDP) { 2987 uint64_t errpa; 2988 ushort_t psynd; 2989 uint32_t ec_set_size = ecache_sz / 2990 ecache_associativity; 2991 2992 /* 2993 * For Spitfire/Blackbird, we need to look at 2994 * the PSYND to make sure that this 8-byte chunk 2995 * is the right one. PSYND bits [15:8] belong 2996 * to the upper 8-byte (even) chunk. Bits 2997 * [7:0] belong to the lower 8-byte chunk (odd). 2998 */ 2999 psynd = ecdptr->ec_afsr & P_AFSR_P_SYND; 3000 if (!isus2i && !isus2e) { 3001 if (j & 0x1) 3002 psynd = psynd & 0xFF; 3003 else 3004 psynd = psynd >> 8; 3005 3006 if (!psynd) 3007 continue; /* wrong chunk */ 3008 } 3009 3010 /* Construct the PA */ 3011 errpa = ((t_etag & cpu_ec_tag_mask) << 3012 cpu_ec_tag_shift) | ((i | (j << 3)) % 3013 ec_set_size); 3014 3015 /* clean up the cache line */ 3016 flushecacheline(P2ALIGN(errpa, 64), 3017 cpunodes[CPU->cpu_id].ecache_size); 3018 3019 oafsr = clear_errors(NULL, cpu_afsr); 3020 acc_afsr |= oafsr; 3021 3022 (*linecnt)++; 3023 3024 /* 3025 * Capture the PA for the first bad line found. 3026 * Return the ecache dump and tag info. 3027 */ 3028 if (pa == AFLT_INV_ADDR) { 3029 int k; 3030 3031 pa = errpa; 3032 for (k = 0; k < 8; k++) 3033 ecache_data[k] = t_ecdata[k]; 3034 *ecache_tag = t_etag; 3035 } 3036 break; 3037 } 3038 } 3039 } 3040 *t_afar = pa; 3041 *t_afsr = acc_afsr; 3042 } 3043 3044 static void 3045 cpu_log_ecmem_info(spitf_async_flt *spf_flt) 3046 { 3047 struct async_flt *aflt = (struct async_flt *)spf_flt; 3048 uint64_t ecache_tag = spf_flt->flt_ec_tag; 3049 char linestr[30]; 3050 char *state_str; 3051 int i; 3052 3053 /* 3054 * Check the ecache tag to make sure it 3055 * is valid. If invalid, a memory dump was 3056 * captured instead of a ecache dump. 3057 */ 3058 if (spf_flt->flt_ec_tag != AFLT_INV_ADDR) { 3059 uchar_t eparity = (uchar_t) 3060 ((ecache_tag & cpu_ec_par_mask) >> cpu_ec_par_shift); 3061 3062 uchar_t estate = (uchar_t) 3063 ((ecache_tag & cpu_ec_state_mask) >> cpu_ec_state_shift); 3064 3065 if (estate == cpu_ec_state_shr) 3066 state_str = "Shared"; 3067 else if (estate == cpu_ec_state_exl) 3068 state_str = "Exclusive"; 3069 else if (estate == cpu_ec_state_own) 3070 state_str = "Owner"; 3071 else if (estate == cpu_ec_state_mod) 3072 state_str = "Modified"; 3073 else 3074 state_str = "Invalid"; 3075 3076 if (spf_flt->flt_ec_lcnt > 1) { 3077 (void) snprintf(linestr, sizeof (linestr), 3078 "Badlines found=%d", spf_flt->flt_ec_lcnt); 3079 } else { 3080 linestr[0] = '\0'; 3081 } 3082 3083 cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL, 3084 " PA=0x%08x.%08x\n E$tag 0x%08x.%08x E$State: %s " 3085 "E$parity 0x%02x %s", (uint32_t)(aflt->flt_addr >> 32), 3086 (uint32_t)aflt->flt_addr, (uint32_t)(ecache_tag >> 32), 3087 (uint32_t)ecache_tag, state_str, 3088 (uint32_t)eparity, linestr); 3089 } else { 3090 cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL, 3091 " E$tag != PA from AFAR; E$line was victimized" 3092 "\n dumping memory from PA 0x%08x.%08x instead", 3093 (uint32_t)(P2ALIGN(aflt->flt_addr, 64) >> 32), 3094 (uint32_t)P2ALIGN(aflt->flt_addr, 64)); 3095 } 3096 3097 /* 3098 * Dump out all 8 8-byte ecache data captured 3099 * For each 8-byte data captured, we check the 3100 * captured afsr's parity syndrome to find out 3101 * which 8-byte chunk is bad. For memory dump, the 3102 * AFSR values were initialized to 0. 3103 */ 3104 for (i = 0; i < 8; i++) { 3105 ec_data_t *ecdptr; 3106 uint_t offset; 3107 ushort_t psynd; 3108 ushort_t bad; 3109 uint64_t edp; 3110 3111 offset = i << 3; /* multiply by 8 */ 3112 ecdptr = &spf_flt->flt_ec_data[i]; 3113 psynd = ecdptr->ec_afsr & P_AFSR_P_SYND; 3114 edp = ecdptr->ec_afsr & P_AFSR_EDP; 3115 3116 /* 3117 * For Sabre/Hummingbird, parity synd is captured only 3118 * in [7:0] of AFSR.PSYND for each 8-byte chunk. 3119 * For spitfire/blackbird, AFSR.PSYND is captured 3120 * in 16-byte granularity. [15:8] represent 3121 * the upper 8 byte and [7:0] the lower 8 byte. 3122 */ 3123 if (isus2i || isus2e || (i & 0x1)) 3124 bad = (psynd & 0xFF); /* check bits [7:0] */ 3125 else 3126 bad = (psynd & 0xFF00); /* check bits [15:8] */ 3127 3128 if (bad && edp) { 3129 cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL, 3130 " E$Data (0x%02x): 0x%08x.%08x " 3131 "*Bad* PSYND=0x%04x", offset, 3132 (uint32_t)(ecdptr->ec_d8 >> 32), 3133 (uint32_t)ecdptr->ec_d8, psynd); 3134 } else { 3135 cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL, 3136 " E$Data (0x%02x): 0x%08x.%08x", offset, 3137 (uint32_t)(ecdptr->ec_d8 >> 32), 3138 (uint32_t)ecdptr->ec_d8); 3139 } 3140 } 3141 } 3142 3143 /* 3144 * Common logging function for all cpu async errors. This function allows the 3145 * caller to generate a single cmn_err() call that logs the appropriate items 3146 * from the fault structure, and implements our rules for AFT logging levels. 3147 * 3148 * ce_code: cmn_err() code (e.g. CE_PANIC, CE_WARN, CE_CONT) 3149 * tagnum: 0, 1, 2, .. generate the [AFT#] tag 3150 * spflt: pointer to spitfire async fault structure 3151 * logflags: bitflags indicating what to output 3152 * endstr: a end string to appear at the end of this log 3153 * fmt: a format string to appear at the beginning of the log 3154 * 3155 * The logflags allows the construction of predetermined output from the spflt 3156 * structure. The individual data items always appear in a consistent order. 3157 * Note that either or both of the spflt structure pointer and logflags may be 3158 * NULL or zero respectively, indicating that the predetermined output 3159 * substrings are not requested in this log. The output looks like this: 3160 * 3161 * [AFT#] <CPU_ERRID_FIRST><fmt string><CPU_FLTCPU> 3162 * <CPU_SPACE><CPU_ERRID> 3163 * newline+4spaces<CPU_AFSR><CPU_AFAR> 3164 * newline+4spaces<CPU_AF_PSYND><CPU_AF_ETS><CPU_FAULTPC> 3165 * newline+4spaces<CPU_UDBH><CPU_UDBL> 3166 * newline+4spaces<CPU_SYND> 3167 * newline+4spaces<endstr> 3168 * 3169 * Note that <endstr> may not start on a newline if we are logging <CPU_PSYND>; 3170 * it is assumed that <endstr> will be the unum string in this case. The size 3171 * of our intermediate formatting buf[] is based on the worst case of all flags 3172 * being enabled. We pass the caller's varargs directly to vcmn_err() for 3173 * formatting so we don't need additional stack space to format them here. 3174 */ 3175 /*PRINTFLIKE6*/ 3176 static void 3177 cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt, uint_t logflags, 3178 const char *endstr, const char *fmt, ...) 3179 { 3180 struct async_flt *aflt = (struct async_flt *)spflt; 3181 char buf[400], *p, *q; /* see comments about buf[] size above */ 3182 va_list ap; 3183 int console_log_flag; 3184 3185 if ((aflt == NULL) || ((aflt->flt_class == CPU_FAULT) && 3186 (aflt->flt_stat & P_AFSR_LEVEL1)) || 3187 (aflt->flt_panic)) { 3188 console_log_flag = (tagnum < 2) || aft_verbose; 3189 } else { 3190 int verbose = ((aflt->flt_class == BUS_FAULT) || 3191 (aflt->flt_stat & P_AFSR_CE)) ? 3192 ce_verbose_memory : ce_verbose_other; 3193 3194 if (!verbose) 3195 return; 3196 3197 console_log_flag = (verbose > 1); 3198 } 3199 3200 if (console_log_flag) 3201 (void) sprintf(buf, "[AFT%d]", tagnum); 3202 else 3203 (void) sprintf(buf, "![AFT%d]", tagnum); 3204 3205 p = buf + strlen(buf); /* current buffer position */ 3206 q = buf + sizeof (buf); /* pointer past end of buffer */ 3207 3208 if (spflt != NULL && (logflags & CPU_ERRID_FIRST)) { 3209 (void) snprintf(p, (size_t)(q - p), " errID 0x%08x.%08x", 3210 (uint32_t)(aflt->flt_id >> 32), (uint32_t)aflt->flt_id); 3211 p += strlen(p); 3212 } 3213 3214 /* 3215 * Copy the caller's format string verbatim into buf[]. It will be 3216 * formatted by the call to vcmn_err() at the end of this function. 3217 */ 3218 if (fmt != NULL && p < q) { 3219 (void) strncpy(p, fmt, (size_t)(q - p - 1)); 3220 buf[sizeof (buf) - 1] = '\0'; 3221 p += strlen(p); 3222 } 3223 3224 if (spflt != NULL) { 3225 if (logflags & CPU_FLTCPU) { 3226 (void) snprintf(p, (size_t)(q - p), " CPU%d", 3227 aflt->flt_inst); 3228 p += strlen(p); 3229 } 3230 3231 if (logflags & CPU_SPACE) { 3232 if (aflt->flt_status & ECC_D_TRAP) 3233 (void) snprintf(p, (size_t)(q - p), 3234 " Data access"); 3235 else if (aflt->flt_status & ECC_I_TRAP) 3236 (void) snprintf(p, (size_t)(q - p), 3237 " Instruction access"); 3238 p += strlen(p); 3239 } 3240 3241 if (logflags & CPU_TL) { 3242 (void) snprintf(p, (size_t)(q - p), " at TL%s", 3243 aflt->flt_tl ? ">0" : "=0"); 3244 p += strlen(p); 3245 } 3246 3247 if (logflags & CPU_ERRID) { 3248 (void) snprintf(p, (size_t)(q - p), 3249 ", errID 0x%08x.%08x", 3250 (uint32_t)(aflt->flt_id >> 32), 3251 (uint32_t)aflt->flt_id); 3252 p += strlen(p); 3253 } 3254 3255 if (logflags & CPU_AFSR) { 3256 (void) snprintf(p, (size_t)(q - p), 3257 "\n AFSR 0x%08b.%08b", 3258 (uint32_t)(aflt->flt_stat >> 32), AFSR_FMTSTR0, 3259 (uint32_t)aflt->flt_stat, AFSR_FMTSTR1); 3260 p += strlen(p); 3261 } 3262 3263 if (logflags & CPU_AFAR) { 3264 (void) snprintf(p, (size_t)(q - p), " AFAR 0x%08x.%08x", 3265 (uint32_t)(aflt->flt_addr >> 32), 3266 (uint32_t)aflt->flt_addr); 3267 p += strlen(p); 3268 } 3269 3270 if (logflags & CPU_AF_PSYND) { 3271 ushort_t psynd = (ushort_t) 3272 (aflt->flt_stat & P_AFSR_P_SYND); 3273 3274 (void) snprintf(p, (size_t)(q - p), 3275 "\n AFSR.PSYND 0x%04x(Score %02d)", 3276 psynd, ecc_psynd_score(psynd)); 3277 p += strlen(p); 3278 } 3279 3280 if (logflags & CPU_AF_ETS) { 3281 (void) snprintf(p, (size_t)(q - p), " AFSR.ETS 0x%02x", 3282 (uchar_t)((aflt->flt_stat & P_AFSR_ETS) >> 16)); 3283 p += strlen(p); 3284 } 3285 3286 if (logflags & CPU_FAULTPC) { 3287 (void) snprintf(p, (size_t)(q - p), " Fault_PC 0x%p", 3288 (void *)aflt->flt_pc); 3289 p += strlen(p); 3290 } 3291 3292 if (logflags & CPU_UDBH) { 3293 (void) snprintf(p, (size_t)(q - p), 3294 "\n UDBH 0x%04b UDBH.ESYND 0x%02x", 3295 spflt->flt_sdbh, UDB_FMTSTR, 3296 spflt->flt_sdbh & 0xFF); 3297 p += strlen(p); 3298 } 3299 3300 if (logflags & CPU_UDBL) { 3301 (void) snprintf(p, (size_t)(q - p), 3302 " UDBL 0x%04b UDBL.ESYND 0x%02x", 3303 spflt->flt_sdbl, UDB_FMTSTR, 3304 spflt->flt_sdbl & 0xFF); 3305 p += strlen(p); 3306 } 3307 3308 if (logflags & CPU_SYND) { 3309 ushort_t synd = SYND(aflt->flt_synd); 3310 3311 (void) snprintf(p, (size_t)(q - p), 3312 "\n %s Syndrome 0x%x Memory Module ", 3313 UDBL(aflt->flt_synd) ? "UDBL" : "UDBH", synd); 3314 p += strlen(p); 3315 } 3316 } 3317 3318 if (endstr != NULL) { 3319 if (!(logflags & CPU_SYND)) 3320 (void) snprintf(p, (size_t)(q - p), "\n %s", endstr); 3321 else 3322 (void) snprintf(p, (size_t)(q - p), "%s", endstr); 3323 p += strlen(p); 3324 } 3325 3326 if (ce_code == CE_CONT && (p < q - 1)) 3327 (void) strcpy(p, "\n"); /* add final \n if needed */ 3328 3329 va_start(ap, fmt); 3330 vcmn_err(ce_code, buf, ap); 3331 va_end(ap); 3332 } 3333 3334 /* 3335 * Ecache Scrubbing 3336 * 3337 * The basic idea is to prevent lines from sitting in the ecache long enough 3338 * to build up soft errors which can lead to ecache parity errors. 3339 * 3340 * The following rules are observed when flushing the ecache: 3341 * 3342 * 1. When the system is busy, flush bad clean lines 3343 * 2. When the system is idle, flush all clean lines 3344 * 3. When the system is idle, flush good dirty lines 3345 * 4. Never flush bad dirty lines. 3346 * 3347 * modify parity busy idle 3348 * ---------------------------- 3349 * clean good X 3350 * clean bad X X 3351 * dirty good X 3352 * dirty bad 3353 * 3354 * Bad or good refers to whether a line has an E$ parity error or not. 3355 * Clean or dirty refers to the state of the modified bit. We currently 3356 * default the scan rate to 100 (scan 10% of the cache per second). 3357 * 3358 * The following are E$ states and actions. 3359 * 3360 * We encode our state as a 3-bit number, consisting of: 3361 * ECACHE_STATE_MODIFIED (0=clean, 1=dirty) 3362 * ECACHE_STATE_PARITY (0=good, 1=bad) 3363 * ECACHE_STATE_BUSY (0=idle, 1=busy) 3364 * 3365 * We associate a flushing and a logging action with each state. 3366 * 3367 * E$ actions are different for Spitfire and Sabre/Hummingbird modules. 3368 * MIRROR_FLUSH indicates that an E$ line will be flushed for the mirrored 3369 * E$ only, in addition to value being set by ec_flush. 3370 */ 3371 3372 #define ALWAYS_FLUSH 0x1 /* flush E$ line on all E$ types */ 3373 #define NEVER_FLUSH 0x0 /* never the flush the E$ line */ 3374 #define MIRROR_FLUSH 0xF /* flush E$ line on mirrored E$ only */ 3375 3376 struct { 3377 char ec_flush; /* whether to flush or not */ 3378 char ec_log; /* ecache logging */ 3379 char ec_log_type; /* log type info */ 3380 } ec_action[] = { /* states of the E$ line in M P B */ 3381 { ALWAYS_FLUSH, 0, 0 }, /* 0 0 0 clean_good_idle */ 3382 { MIRROR_FLUSH, 0, 0 }, /* 0 0 1 clean_good_busy */ 3383 { ALWAYS_FLUSH, 1, CPU_BADLINE_CI_ERR }, /* 0 1 0 clean_bad_idle */ 3384 { ALWAYS_FLUSH, 1, CPU_BADLINE_CB_ERR }, /* 0 1 1 clean_bad_busy */ 3385 { ALWAYS_FLUSH, 0, 0 }, /* 1 0 0 dirty_good_idle */ 3386 { MIRROR_FLUSH, 0, 0 }, /* 1 0 1 dirty_good_busy */ 3387 { NEVER_FLUSH, 1, CPU_BADLINE_DI_ERR }, /* 1 1 0 dirty_bad_idle */ 3388 { NEVER_FLUSH, 1, CPU_BADLINE_DB_ERR } /* 1 1 1 dirty_bad_busy */ 3389 }; 3390 3391 /* 3392 * Offsets into the ec_action[] that determines clean_good_busy and 3393 * dirty_good_busy lines. 3394 */ 3395 #define ECACHE_CGB_LINE 1 /* E$ clean_good_busy line */ 3396 #define ECACHE_DGB_LINE 5 /* E$ dirty_good_busy line */ 3397 3398 /* 3399 * We are flushing lines which are Clean_Good_Busy and also the lines 3400 * Dirty_Good_Busy. And we only follow it for non-mirrored E$. 3401 */ 3402 #define CGB(x, m) (((x) == ECACHE_CGB_LINE) && (m != ECACHE_CPU_MIRROR)) 3403 #define DGB(x, m) (((x) == ECACHE_DGB_LINE) && (m != ECACHE_CPU_MIRROR)) 3404 3405 #define ECACHE_STATE_MODIFIED 0x4 3406 #define ECACHE_STATE_PARITY 0x2 3407 #define ECACHE_STATE_BUSY 0x1 3408 3409 /* 3410 * If ecache is mirrored ecache_calls_a_sec and ecache_scan_rate are reduced. 3411 */ 3412 int ecache_calls_a_sec_mirrored = 1; 3413 int ecache_lines_per_call_mirrored = 1; 3414 3415 int ecache_scrub_enable = 1; /* ecache scrubbing is on by default */ 3416 int ecache_scrub_verbose = 1; /* prints clean and dirty lines */ 3417 int ecache_scrub_panic = 0; /* panics on a clean and dirty line */ 3418 int ecache_calls_a_sec = 100; /* scrubber calls per sec */ 3419 int ecache_scan_rate = 100; /* scan rate (in tenths of a percent) */ 3420 int ecache_idle_factor = 1; /* increase the scan rate when idle */ 3421 int ecache_flush_clean_good_busy = 50; /* flush rate (in percent) */ 3422 int ecache_flush_dirty_good_busy = 100; /* flush rate (in percent) */ 3423 3424 volatile int ec_timeout_calls = 1; /* timeout calls */ 3425 3426 /* 3427 * Interrupt number and pil for ecache scrubber cross-trap calls. 3428 */ 3429 static uint64_t ecache_scrub_inum; 3430 uint_t ecache_scrub_pil = PIL_9; 3431 3432 /* 3433 * Kstats for the E$ scrubber. 3434 */ 3435 typedef struct ecache_kstat { 3436 kstat_named_t clean_good_idle; /* # of lines scrubbed */ 3437 kstat_named_t clean_good_busy; /* # of lines skipped */ 3438 kstat_named_t clean_bad_idle; /* # of lines scrubbed */ 3439 kstat_named_t clean_bad_busy; /* # of lines scrubbed */ 3440 kstat_named_t dirty_good_idle; /* # of lines scrubbed */ 3441 kstat_named_t dirty_good_busy; /* # of lines skipped */ 3442 kstat_named_t dirty_bad_idle; /* # of lines skipped */ 3443 kstat_named_t dirty_bad_busy; /* # of lines skipped */ 3444 kstat_named_t invalid_lines; /* # of invalid lines */ 3445 kstat_named_t clean_good_busy_flush; /* # of lines scrubbed */ 3446 kstat_named_t dirty_good_busy_flush; /* # of lines scrubbed */ 3447 kstat_named_t tags_cleared; /* # of E$ tags cleared */ 3448 } ecache_kstat_t; 3449 3450 static ecache_kstat_t ec_kstat_template = { 3451 { "clean_good_idle", KSTAT_DATA_ULONG }, 3452 { "clean_good_busy", KSTAT_DATA_ULONG }, 3453 { "clean_bad_idle", KSTAT_DATA_ULONG }, 3454 { "clean_bad_busy", KSTAT_DATA_ULONG }, 3455 { "dirty_good_idle", KSTAT_DATA_ULONG }, 3456 { "dirty_good_busy", KSTAT_DATA_ULONG }, 3457 { "dirty_bad_idle", KSTAT_DATA_ULONG }, 3458 { "dirty_bad_busy", KSTAT_DATA_ULONG }, 3459 { "invalid_lines", KSTAT_DATA_ULONG }, 3460 { "clean_good_busy_flush", KSTAT_DATA_ULONG }, 3461 { "dirty_good_busy_flush", KSTAT_DATA_ULONG }, 3462 { "ecache_tags_cleared", KSTAT_DATA_ULONG } 3463 }; 3464 3465 struct kmem_cache *sf_private_cache; 3466 3467 /* 3468 * Called periodically on each CPU to scan the ecache once a sec. 3469 * adjusting the ecache line index appropriately 3470 */ 3471 void 3472 scrub_ecache_line() 3473 { 3474 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc); 3475 int cpuid = CPU->cpu_id; 3476 uint32_t index = ssmp->ecache_flush_index; 3477 uint64_t ec_size = cpunodes[cpuid].ecache_size; 3478 size_t ec_linesize = cpunodes[cpuid].ecache_linesize; 3479 int nlines = ssmp->ecache_nlines; 3480 uint32_t ec_set_size = ec_size / ecache_associativity; 3481 int ec_mirror = ssmp->ecache_mirror; 3482 ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data; 3483 3484 int line, scan_lines, flush_clean_busy = 0, flush_dirty_busy = 0; 3485 int mpb; /* encode Modified, Parity, Busy for action */ 3486 uchar_t state; 3487 uint64_t ec_tag, paddr, oafsr, tafsr, nafsr; 3488 uint64_t *acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 3489 ec_data_t ec_data[8]; 3490 kstat_named_t *ec_knp; 3491 3492 switch (ec_mirror) { 3493 default: 3494 case ECACHE_CPU_NON_MIRROR: 3495 /* 3496 * The E$ scan rate is expressed in units of tenths of 3497 * a percent. ecache_scan_rate = 1000 (100%) means the 3498 * whole cache is scanned every second. 3499 */ 3500 scan_lines = (nlines * ecache_scan_rate) / 3501 (1000 * ecache_calls_a_sec); 3502 if (!(ssmp->ecache_busy)) { 3503 if (ecache_idle_factor > 0) { 3504 scan_lines *= ecache_idle_factor; 3505 } 3506 } else { 3507 flush_clean_busy = (scan_lines * 3508 ecache_flush_clean_good_busy) / 100; 3509 flush_dirty_busy = (scan_lines * 3510 ecache_flush_dirty_good_busy) / 100; 3511 } 3512 3513 ec_timeout_calls = (ecache_calls_a_sec ? 3514 ecache_calls_a_sec : 1); 3515 break; 3516 3517 case ECACHE_CPU_MIRROR: 3518 scan_lines = ecache_lines_per_call_mirrored; 3519 ec_timeout_calls = (ecache_calls_a_sec_mirrored ? 3520 ecache_calls_a_sec_mirrored : 1); 3521 break; 3522 } 3523 3524 /* 3525 * The ecache scrubber algorithm operates by reading and 3526 * decoding the E$ tag to determine whether the corresponding E$ line 3527 * can be scrubbed. There is a implicit assumption in the scrubber 3528 * logic that the E$ tag is valid. Unfortunately, this assertion is 3529 * flawed since the E$ tag may also be corrupted and have parity errors 3530 * The scrubber logic is enhanced to check the validity of the E$ tag 3531 * before scrubbing. When a parity error is detected in the E$ tag, 3532 * it is possible to recover and scrub the tag under certain conditions 3533 * so that a ETP error condition can be avoided. 3534 */ 3535 3536 for (mpb = line = 0; line < scan_lines; line++, mpb = 0) { 3537 /* 3538 * We get the old-AFSR before clearing the AFSR sticky bits 3539 * in {get_ecache_tag, check_ecache_line, get_ecache_dtag} 3540 * If CP bit is set in the old-AFSR, we log an Orphan CP event. 3541 */ 3542 ec_tag = get_ecache_tag(index, &nafsr, acc_afsr); 3543 state = (uchar_t)((ec_tag & cpu_ec_state_mask) >> 3544 cpu_ec_state_shift); 3545 3546 /* 3547 * ETP is set try to scrub the ecache tag. 3548 */ 3549 if (nafsr & P_AFSR_ETP) { 3550 ecache_scrub_tag_err(nafsr, state, index); 3551 } else if (state & cpu_ec_state_valid) { 3552 /* 3553 * ETP is not set, E$ tag is valid. 3554 * Proceed with the E$ scrubbing. 3555 */ 3556 if (state & cpu_ec_state_dirty) 3557 mpb |= ECACHE_STATE_MODIFIED; 3558 3559 tafsr = check_ecache_line(index, acc_afsr); 3560 3561 if (tafsr & P_AFSR_EDP) { 3562 mpb |= ECACHE_STATE_PARITY; 3563 3564 if (ecache_scrub_verbose || 3565 ecache_scrub_panic) { 3566 get_ecache_dtag(P2ALIGN(index, 64), 3567 (uint64_t *)&ec_data[0], 3568 &ec_tag, &oafsr, acc_afsr); 3569 } 3570 } 3571 3572 if (ssmp->ecache_busy) 3573 mpb |= ECACHE_STATE_BUSY; 3574 3575 ec_knp = (kstat_named_t *)ec_ksp + mpb; 3576 ec_knp->value.ul++; 3577 3578 paddr = ((ec_tag & cpu_ec_tag_mask) << 3579 cpu_ec_tag_shift) | (index % ec_set_size); 3580 3581 /* 3582 * We flush the E$ lines depending on the ec_flush, 3583 * we additionally flush clean_good_busy and 3584 * dirty_good_busy lines for mirrored E$. 3585 */ 3586 if (ec_action[mpb].ec_flush == ALWAYS_FLUSH) { 3587 flushecacheline(paddr, ec_size); 3588 } else if ((ec_mirror == ECACHE_CPU_MIRROR) && 3589 (ec_action[mpb].ec_flush == MIRROR_FLUSH)) { 3590 flushecacheline(paddr, ec_size); 3591 } else if (ec_action[mpb].ec_flush == NEVER_FLUSH) { 3592 softcall(ecache_page_retire, (void *)paddr); 3593 } 3594 3595 /* 3596 * Conditionally flush both the clean_good and 3597 * dirty_good lines when busy. 3598 */ 3599 if (CGB(mpb, ec_mirror) && (flush_clean_busy > 0)) { 3600 flush_clean_busy--; 3601 flushecacheline(paddr, ec_size); 3602 ec_ksp->clean_good_busy_flush.value.ul++; 3603 } else if (DGB(mpb, ec_mirror) && 3604 (flush_dirty_busy > 0)) { 3605 flush_dirty_busy--; 3606 flushecacheline(paddr, ec_size); 3607 ec_ksp->dirty_good_busy_flush.value.ul++; 3608 } 3609 3610 if (ec_action[mpb].ec_log && (ecache_scrub_verbose || 3611 ecache_scrub_panic)) { 3612 ecache_scrub_log(ec_data, ec_tag, paddr, mpb, 3613 tafsr); 3614 } 3615 3616 } else { 3617 ec_ksp->invalid_lines.value.ul++; 3618 } 3619 3620 if ((index += ec_linesize) >= ec_size) 3621 index = 0; 3622 3623 } 3624 3625 /* 3626 * set the ecache scrub index for the next time around 3627 */ 3628 ssmp->ecache_flush_index = index; 3629 3630 if (*acc_afsr & P_AFSR_CP) { 3631 uint64_t ret_afsr; 3632 3633 ret_afsr = ecache_scrub_misc_err(CPU_ORPHAN_CP_ERR, *acc_afsr); 3634 if ((ret_afsr & P_AFSR_CP) == 0) 3635 *acc_afsr = 0; 3636 } 3637 } 3638 3639 /* 3640 * Handler for ecache_scrub_inum softint. Call scrub_ecache_line until 3641 * we decrement the outstanding request count to zero. 3642 */ 3643 3644 /*ARGSUSED*/ 3645 uint_t 3646 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2) 3647 { 3648 int i; 3649 int outstanding; 3650 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc); 3651 uint32_t *countp = &ssmp->ec_scrub_outstanding; 3652 3653 do { 3654 outstanding = *countp; 3655 ASSERT(outstanding > 0); 3656 for (i = 0; i < outstanding; i++) 3657 scrub_ecache_line(); 3658 } while (atomic_add_32_nv(countp, -outstanding)); 3659 3660 return (DDI_INTR_CLAIMED); 3661 } 3662 3663 /* 3664 * force each cpu to perform an ecache scrub, called from a timeout 3665 */ 3666 extern xcfunc_t ecache_scrubreq_tl1; 3667 3668 void 3669 do_scrub_ecache_line(void) 3670 { 3671 long delta; 3672 3673 if (ecache_calls_a_sec > hz) 3674 ecache_calls_a_sec = hz; 3675 else if (ecache_calls_a_sec <= 0) 3676 ecache_calls_a_sec = 1; 3677 3678 if (ecache_calls_a_sec_mirrored > hz) 3679 ecache_calls_a_sec_mirrored = hz; 3680 else if (ecache_calls_a_sec_mirrored <= 0) 3681 ecache_calls_a_sec_mirrored = 1; 3682 3683 if (ecache_scrub_enable) { 3684 xt_all(ecache_scrubreq_tl1, ecache_scrub_inum, 0); 3685 delta = hz / ec_timeout_calls; 3686 } else { 3687 delta = hz; 3688 } 3689 3690 (void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0, 3691 delta); 3692 } 3693 3694 /* 3695 * initialization for ecache scrubbing 3696 * This routine is called AFTER all cpus have had cpu_init_private called 3697 * to initialize their private data areas. 3698 */ 3699 void 3700 cpu_init_cache_scrub(void) 3701 { 3702 if (ecache_calls_a_sec > hz) { 3703 cmn_err(CE_NOTE, "ecache_calls_a_sec set too high (%d); " 3704 "resetting to hz (%d)", ecache_calls_a_sec, hz); 3705 ecache_calls_a_sec = hz; 3706 } 3707 3708 /* 3709 * Register softint for ecache scrubbing. 3710 */ 3711 ecache_scrub_inum = add_softintr(ecache_scrub_pil, 3712 scrub_ecache_line_intr, NULL, SOFTINT_MT); 3713 3714 /* 3715 * kick off the scrubbing using realtime timeout 3716 */ 3717 (void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0, 3718 hz / ecache_calls_a_sec); 3719 } 3720 3721 /* 3722 * Unset the busy flag for this cpu. 3723 */ 3724 void 3725 cpu_idle_ecache_scrub(struct cpu *cp) 3726 { 3727 if (CPU_PRIVATE(cp) != NULL) { 3728 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, 3729 sfpr_scrub_misc); 3730 ssmp->ecache_busy = ECACHE_CPU_IDLE; 3731 } 3732 } 3733 3734 /* 3735 * Set the busy flag for this cpu. 3736 */ 3737 void 3738 cpu_busy_ecache_scrub(struct cpu *cp) 3739 { 3740 if (CPU_PRIVATE(cp) != NULL) { 3741 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, 3742 sfpr_scrub_misc); 3743 ssmp->ecache_busy = ECACHE_CPU_BUSY; 3744 } 3745 } 3746 3747 /* 3748 * initialize the ecache scrubber data structures 3749 * The global entry point cpu_init_private replaces this entry point. 3750 * 3751 */ 3752 static void 3753 cpu_init_ecache_scrub_dr(struct cpu *cp) 3754 { 3755 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc); 3756 int cpuid = cp->cpu_id; 3757 3758 /* 3759 * intialize bookkeeping for cache scrubbing 3760 */ 3761 bzero(ssmp, sizeof (spitfire_scrub_misc_t)); 3762 3763 ssmp->ecache_flush_index = 0; 3764 3765 ssmp->ecache_nlines = 3766 cpunodes[cpuid].ecache_size / cpunodes[cpuid].ecache_linesize; 3767 3768 /* 3769 * Determine whether we are running on mirrored SRAM 3770 */ 3771 3772 if (cpunodes[cpuid].msram == ECACHE_CPU_MIRROR) 3773 ssmp->ecache_mirror = ECACHE_CPU_MIRROR; 3774 else 3775 ssmp->ecache_mirror = ECACHE_CPU_NON_MIRROR; 3776 3777 cpu_busy_ecache_scrub(cp); 3778 3779 /* 3780 * initialize the kstats 3781 */ 3782 ecache_kstat_init(cp); 3783 } 3784 3785 /* 3786 * uninitialize the ecache scrubber data structures 3787 * The global entry point cpu_uninit_private replaces this entry point. 3788 */ 3789 static void 3790 cpu_uninit_ecache_scrub_dr(struct cpu *cp) 3791 { 3792 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc); 3793 3794 if (ssmp->ecache_ksp != NULL) { 3795 kstat_delete(ssmp->ecache_ksp); 3796 ssmp->ecache_ksp = NULL; 3797 } 3798 3799 /* 3800 * un-initialize bookkeeping for cache scrubbing 3801 */ 3802 bzero(ssmp, sizeof (spitfire_scrub_misc_t)); 3803 3804 cpu_idle_ecache_scrub(cp); 3805 } 3806 3807 struct kmem_cache *sf_private_cache; 3808 3809 /* 3810 * Cpu private initialization. This includes allocating the cpu_private 3811 * data structure, initializing it, and initializing the scrubber for this 3812 * cpu. This is called once for EVERY cpu, including CPU 0. This function 3813 * calls cpu_init_ecache_scrub_dr to init the scrubber. 3814 * We use kmem_cache_create for the spitfire private data structure because it 3815 * needs to be allocated on a S_ECACHE_MAX_LSIZE (64) byte boundary. 3816 */ 3817 void 3818 cpu_init_private(struct cpu *cp) 3819 { 3820 spitfire_private_t *sfprp; 3821 3822 ASSERT(CPU_PRIVATE(cp) == NULL); 3823 3824 /* 3825 * If the sf_private_cache has not been created, create it. 3826 */ 3827 if (sf_private_cache == NULL) { 3828 sf_private_cache = kmem_cache_create("sf_private_cache", 3829 sizeof (spitfire_private_t), S_ECACHE_MAX_LSIZE, NULL, 3830 NULL, NULL, NULL, NULL, 0); 3831 ASSERT(sf_private_cache); 3832 } 3833 3834 sfprp = CPU_PRIVATE(cp) = kmem_cache_alloc(sf_private_cache, KM_SLEEP); 3835 3836 bzero(sfprp, sizeof (spitfire_private_t)); 3837 3838 cpu_init_ecache_scrub_dr(cp); 3839 } 3840 3841 /* 3842 * Cpu private unitialization. Uninitialize the Ecache scrubber and 3843 * deallocate the scrubber data structures and cpu_private data structure. 3844 * For now, this function just calls cpu_unint_ecache_scrub_dr to uninit 3845 * the scrubber for the specified cpu. 3846 */ 3847 void 3848 cpu_uninit_private(struct cpu *cp) 3849 { 3850 ASSERT(CPU_PRIVATE(cp)); 3851 3852 cpu_uninit_ecache_scrub_dr(cp); 3853 kmem_cache_free(sf_private_cache, CPU_PRIVATE(cp)); 3854 CPU_PRIVATE(cp) = NULL; 3855 } 3856 3857 /* 3858 * initialize the ecache kstats for each cpu 3859 */ 3860 static void 3861 ecache_kstat_init(struct cpu *cp) 3862 { 3863 struct kstat *ksp; 3864 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc); 3865 3866 ASSERT(ssmp != NULL); 3867 3868 if ((ksp = kstat_create("unix", cp->cpu_id, "ecache_kstat", "misc", 3869 KSTAT_TYPE_NAMED, 3870 sizeof (ecache_kstat_t) / sizeof (kstat_named_t), 3871 KSTAT_FLAG_WRITABLE)) == NULL) { 3872 ssmp->ecache_ksp = NULL; 3873 cmn_err(CE_NOTE, "!ecache_kstat_init(%d) failed\n", cp->cpu_id); 3874 return; 3875 } 3876 3877 ssmp->ecache_ksp = ksp; 3878 bcopy(&ec_kstat_template, ksp->ks_data, sizeof (ecache_kstat_t)); 3879 kstat_install(ksp); 3880 } 3881 3882 /* 3883 * log the bad ecache information 3884 */ 3885 static void 3886 ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag, uint64_t paddr, int mpb, 3887 uint64_t afsr) 3888 { 3889 spitf_async_flt spf_flt; 3890 struct async_flt *aflt; 3891 int i; 3892 char *class; 3893 3894 bzero(&spf_flt, sizeof (spitf_async_flt)); 3895 aflt = &spf_flt.cmn_asyncflt; 3896 3897 for (i = 0; i < 8; i++) { 3898 spf_flt.flt_ec_data[i] = ec_data[i]; 3899 } 3900 3901 spf_flt.flt_ec_tag = ec_tag; 3902 3903 if (mpb < (sizeof (ec_action) / sizeof (ec_action[0]))) { 3904 spf_flt.flt_type = ec_action[mpb].ec_log_type; 3905 } else spf_flt.flt_type = (ushort_t)mpb; 3906 3907 aflt->flt_inst = CPU->cpu_id; 3908 aflt->flt_class = CPU_FAULT; 3909 aflt->flt_id = gethrtime_waitfree(); 3910 aflt->flt_addr = paddr; 3911 aflt->flt_stat = afsr; 3912 aflt->flt_panic = (uchar_t)ecache_scrub_panic; 3913 3914 switch (mpb) { 3915 case CPU_ECACHE_TAG_ERR: 3916 case CPU_ECACHE_ADDR_PAR_ERR: 3917 case CPU_ECACHE_ETP_ETS_ERR: 3918 case CPU_ECACHE_STATE_ERR: 3919 class = FM_EREPORT_CPU_USII_ESCRUB_TAG; 3920 break; 3921 default: 3922 class = FM_EREPORT_CPU_USII_ESCRUB_DATA; 3923 break; 3924 } 3925 3926 cpu_errorq_dispatch(class, (void *)&spf_flt, sizeof (spf_flt), 3927 ue_queue, aflt->flt_panic); 3928 3929 if (aflt->flt_panic) 3930 cmn_err(CE_PANIC, "ecache_scrub_panic set and bad E$" 3931 "line detected"); 3932 } 3933 3934 /* 3935 * Process an ecache error that occured during the E$ scrubbing. 3936 * We do the ecache scan to find the bad line, flush the bad line 3937 * and start the memscrubber to find any UE (in memory or in another cache) 3938 */ 3939 static uint64_t 3940 ecache_scrub_misc_err(int type, uint64_t afsr) 3941 { 3942 spitf_async_flt spf_flt; 3943 struct async_flt *aflt; 3944 uint64_t oafsr; 3945 3946 bzero(&spf_flt, sizeof (spitf_async_flt)); 3947 aflt = &spf_flt.cmn_asyncflt; 3948 3949 /* 3950 * Scan each line in the cache to look for the one 3951 * with bad parity 3952 */ 3953 aflt->flt_addr = AFLT_INV_ADDR; 3954 scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0], 3955 &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr); 3956 3957 if (oafsr & P_AFSR_CP) { 3958 uint64_t *cp_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 3959 *cp_afsr |= oafsr; 3960 } 3961 3962 /* 3963 * If we found a bad PA, update the state to indicate if it is 3964 * memory or I/O space. 3965 */ 3966 if (aflt->flt_addr != AFLT_INV_ADDR) { 3967 aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >> 3968 MMU_PAGESHIFT)) ? 1 : 0; 3969 } 3970 3971 spf_flt.flt_type = (ushort_t)type; 3972 3973 aflt->flt_inst = CPU->cpu_id; 3974 aflt->flt_class = CPU_FAULT; 3975 aflt->flt_id = gethrtime_waitfree(); 3976 aflt->flt_status = afsr; 3977 aflt->flt_panic = (uchar_t)ecache_scrub_panic; 3978 3979 /* 3980 * We have the bad line, flush that line and start 3981 * the memscrubber. 3982 */ 3983 if (spf_flt.flt_ec_lcnt > 0) { 3984 flushecacheline(P2ALIGN(aflt->flt_addr, 64), 3985 cpunodes[CPU->cpu_id].ecache_size); 3986 read_all_memscrub = 1; 3987 memscrub_run(); 3988 } 3989 3990 cpu_errorq_dispatch((type == CPU_ORPHAN_CP_ERR) ? 3991 FM_EREPORT_CPU_USII_CP : FM_EREPORT_CPU_USII_UNKNOWN, 3992 (void *)&spf_flt, sizeof (spf_flt), ue_queue, aflt->flt_panic); 3993 3994 return (oafsr); 3995 } 3996 3997 static void 3998 ecache_scrub_tag_err(uint64_t afsr, uchar_t state, uint32_t index) 3999 { 4000 ushort_t afsr_ets = (afsr & P_AFSR_ETS) >> P_AFSR_ETS_SHIFT; 4001 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc); 4002 ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data; 4003 uint64_t ec_tag, paddr, oafsr; 4004 ec_data_t ec_data[8]; 4005 int cpuid = CPU->cpu_id; 4006 uint32_t ec_set_size = cpunodes[cpuid].ecache_size / 4007 ecache_associativity; 4008 uint64_t *cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 4009 4010 get_ecache_dtag(P2ALIGN(index, 64), (uint64_t *)&ec_data[0], &ec_tag, 4011 &oafsr, cpu_afsr); 4012 paddr = ((ec_tag & cpu_ec_tag_mask) << cpu_ec_tag_shift) | 4013 (index % ec_set_size); 4014 4015 /* 4016 * E$ tag state has good parity 4017 */ 4018 if ((afsr_ets & cpu_ec_state_parity) == 0) { 4019 if (afsr_ets & cpu_ec_parity) { 4020 /* 4021 * E$ tag state bits indicate the line is clean, 4022 * invalidate the E$ tag and continue. 4023 */ 4024 if (!(state & cpu_ec_state_dirty)) { 4025 /* 4026 * Zero the tag and mark the state invalid 4027 * with good parity for the tag. 4028 */ 4029 if (isus2i || isus2e) 4030 write_hb_ec_tag_parity(index); 4031 else 4032 write_ec_tag_parity(index); 4033 4034 /* Sync with the dual tag */ 4035 flushecacheline(0, 4036 cpunodes[CPU->cpu_id].ecache_size); 4037 ec_ksp->tags_cleared.value.ul++; 4038 ecache_scrub_log(ec_data, ec_tag, paddr, 4039 CPU_ECACHE_TAG_ERR, afsr); 4040 return; 4041 } else { 4042 ecache_scrub_log(ec_data, ec_tag, paddr, 4043 CPU_ECACHE_ADDR_PAR_ERR, afsr); 4044 cmn_err(CE_PANIC, " E$ tag address has bad" 4045 " parity"); 4046 } 4047 } else if ((afsr_ets & cpu_ec_parity) == 0) { 4048 /* 4049 * ETS is zero but ETP is set 4050 */ 4051 ecache_scrub_log(ec_data, ec_tag, paddr, 4052 CPU_ECACHE_ETP_ETS_ERR, afsr); 4053 cmn_err(CE_PANIC, "AFSR.ETP is set and" 4054 " AFSR.ETS is zero"); 4055 } 4056 } else { 4057 /* 4058 * E$ tag state bit has a bad parity 4059 */ 4060 ecache_scrub_log(ec_data, ec_tag, paddr, 4061 CPU_ECACHE_STATE_ERR, afsr); 4062 cmn_err(CE_PANIC, "E$ tag state has bad parity"); 4063 } 4064 } 4065 4066 static void 4067 ecache_page_retire(void *arg) 4068 { 4069 uint64_t paddr = (uint64_t)arg; 4070 (void) page_retire(paddr, PR_UE); 4071 } 4072 4073 void 4074 sticksync_slave(void) 4075 {} 4076 4077 void 4078 sticksync_master(void) 4079 {} 4080 4081 /*ARGSUSED*/ 4082 void 4083 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t bpp) 4084 {} 4085 4086 void 4087 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected) 4088 { 4089 int status; 4090 ddi_fm_error_t de; 4091 4092 bzero(&de, sizeof (ddi_fm_error_t)); 4093 4094 de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, 4095 FM_ENA_FMT1); 4096 de.fme_flag = expected; 4097 de.fme_bus_specific = (void *)aflt->flt_addr; 4098 status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de); 4099 4100 if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL)) 4101 aflt->flt_panic = 1; 4102 } 4103 4104 /*ARGSUSED*/ 4105 void 4106 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz, 4107 errorq_t *eqp, uint_t flag) 4108 { 4109 struct async_flt *aflt = (struct async_flt *)payload; 4110 4111 aflt->flt_erpt_class = error_class; 4112 errorq_dispatch(eqp, payload, payload_sz, flag); 4113 } 4114 4115 #define MAX_SIMM 8 4116 4117 struct ce_info { 4118 char name[UNUM_NAMLEN]; 4119 uint64_t intermittent_total; 4120 uint64_t persistent_total; 4121 uint64_t sticky_total; 4122 unsigned short leaky_bucket_cnt; 4123 }; 4124 4125 /* 4126 * Separately-defined structure for use in reporting the ce_info 4127 * to SunVTS without exposing the internal layout and implementation 4128 * of struct ce_info. 4129 */ 4130 static struct ecc_error_info ecc_error_info_data = { 4131 { "version", KSTAT_DATA_UINT32 }, 4132 { "maxcount", KSTAT_DATA_UINT32 }, 4133 { "count", KSTAT_DATA_UINT32 } 4134 }; 4135 static const size_t ecc_error_info_ndata = sizeof (ecc_error_info_data) / 4136 sizeof (struct kstat_named); 4137 4138 #if KSTAT_CE_UNUM_NAMLEN < UNUM_NAMLEN 4139 #error "Need to rev ecc_error_info version and update KSTAT_CE_UNUM_NAMLEN" 4140 #endif 4141 4142 struct ce_info *mem_ce_simm = NULL; 4143 size_t mem_ce_simm_size = 0; 4144 4145 /* 4146 * Default values for the number of CE's allowed per interval. 4147 * Interval is defined in minutes 4148 * SOFTERR_MIN_TIMEOUT is defined in microseconds 4149 */ 4150 #define SOFTERR_LIMIT_DEFAULT 2 4151 #define SOFTERR_INTERVAL_DEFAULT 1440 /* This is 24 hours */ 4152 #define SOFTERR_MIN_TIMEOUT (60 * MICROSEC) /* This is 1 minute */ 4153 #define TIMEOUT_NONE ((timeout_id_t)0) 4154 #define TIMEOUT_SET ((timeout_id_t)1) 4155 4156 /* 4157 * timeout identifer for leaky_bucket 4158 */ 4159 static timeout_id_t leaky_bucket_timeout_id = TIMEOUT_NONE; 4160 4161 /* 4162 * Tunables for maximum number of allowed CE's in a given time 4163 */ 4164 int ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT; 4165 int ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT; 4166 4167 void 4168 cpu_mp_init(void) 4169 { 4170 size_t size = cpu_aflt_size(); 4171 size_t i; 4172 kstat_t *ksp; 4173 4174 /* 4175 * Initialize the CE error handling buffers. 4176 */ 4177 mem_ce_simm_size = MAX_SIMM * max_ncpus; 4178 size = sizeof (struct ce_info) * mem_ce_simm_size; 4179 mem_ce_simm = kmem_zalloc(size, KM_SLEEP); 4180 4181 ksp = kstat_create("unix", 0, "ecc-info", "misc", 4182 KSTAT_TYPE_NAMED, ecc_error_info_ndata, KSTAT_FLAG_VIRTUAL); 4183 if (ksp != NULL) { 4184 ksp->ks_data = (struct kstat_named *)&ecc_error_info_data; 4185 ecc_error_info_data.version.value.ui32 = KSTAT_CE_INFO_VER; 4186 ecc_error_info_data.maxcount.value.ui32 = mem_ce_simm_size; 4187 ecc_error_info_data.count.value.ui32 = 0; 4188 kstat_install(ksp); 4189 } 4190 4191 for (i = 0; i < mem_ce_simm_size; i++) { 4192 struct kstat_ecc_mm_info *kceip; 4193 4194 kceip = kmem_zalloc(sizeof (struct kstat_ecc_mm_info), 4195 KM_SLEEP); 4196 ksp = kstat_create("mm", i, "ecc-info", "misc", 4197 KSTAT_TYPE_NAMED, 4198 sizeof (struct kstat_ecc_mm_info) / sizeof (kstat_named_t), 4199 KSTAT_FLAG_VIRTUAL); 4200 if (ksp != NULL) { 4201 /* 4202 * Re-declare ks_data_size to include room for the 4203 * UNUM name since we don't have KSTAT_FLAG_VAR_SIZE 4204 * set. 4205 */ 4206 ksp->ks_data_size = sizeof (struct kstat_ecc_mm_info) + 4207 KSTAT_CE_UNUM_NAMLEN; 4208 ksp->ks_data = kceip; 4209 kstat_named_init(&kceip->name, 4210 "name", KSTAT_DATA_STRING); 4211 kstat_named_init(&kceip->intermittent_total, 4212 "intermittent_total", KSTAT_DATA_UINT64); 4213 kstat_named_init(&kceip->persistent_total, 4214 "persistent_total", KSTAT_DATA_UINT64); 4215 kstat_named_init(&kceip->sticky_total, 4216 "sticky_total", KSTAT_DATA_UINT64); 4217 /* 4218 * Use the default snapshot routine as it knows how to 4219 * deal with named kstats with long strings. 4220 */ 4221 ksp->ks_update = ecc_kstat_update; 4222 kstat_install(ksp); 4223 } else { 4224 kmem_free(kceip, sizeof (struct kstat_ecc_mm_info)); 4225 } 4226 } 4227 } 4228 4229 /*ARGSUSED*/ 4230 static void 4231 leaky_bucket_timeout(void *arg) 4232 { 4233 int i; 4234 struct ce_info *psimm = mem_ce_simm; 4235 4236 for (i = 0; i < mem_ce_simm_size; i++) { 4237 if (psimm[i].leaky_bucket_cnt > 0) 4238 atomic_add_16(&psimm[i].leaky_bucket_cnt, -1); 4239 } 4240 add_leaky_bucket_timeout(); 4241 } 4242 4243 static void 4244 add_leaky_bucket_timeout(void) 4245 { 4246 long timeout_in_microsecs; 4247 4248 /* 4249 * create timeout for next leak. 4250 * 4251 * The timeout interval is calculated as follows 4252 * 4253 * (ecc_softerr_interval * 60 * MICROSEC) / ecc_softerr_limit 4254 * 4255 * ecc_softerr_interval is in minutes, so multiply this by 60 (seconds 4256 * in a minute), then multiply this by MICROSEC to get the interval 4257 * in microseconds. Divide this total by ecc_softerr_limit so that 4258 * the timeout interval is accurate to within a few microseconds. 4259 */ 4260 4261 if (ecc_softerr_limit <= 0) 4262 ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT; 4263 if (ecc_softerr_interval <= 0) 4264 ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT; 4265 4266 timeout_in_microsecs = ((int64_t)ecc_softerr_interval * 60 * MICROSEC) / 4267 ecc_softerr_limit; 4268 4269 if (timeout_in_microsecs < SOFTERR_MIN_TIMEOUT) 4270 timeout_in_microsecs = SOFTERR_MIN_TIMEOUT; 4271 4272 leaky_bucket_timeout_id = timeout(leaky_bucket_timeout, 4273 (void *)NULL, drv_usectohz((clock_t)timeout_in_microsecs)); 4274 } 4275 4276 /* 4277 * Legacy Correctable ECC Error Hash 4278 * 4279 * All of the code below this comment is used to implement a legacy array 4280 * which counted intermittent, persistent, and sticky CE errors by unum, 4281 * and then was later extended to publish the data as a kstat for SunVTS. 4282 * All of this code is replaced by FMA, and remains here until such time 4283 * that the UltraSPARC-I/II CPU code is converted to FMA, or is EOLed. 4284 * 4285 * Errors are saved in three buckets per-unum: 4286 * (1) sticky - scrub was unsuccessful, cannot be scrubbed 4287 * This could represent a problem, and is immediately printed out. 4288 * (2) persistent - was successfully scrubbed 4289 * These errors use the leaky bucket algorithm to determine 4290 * if there is a serious problem. 4291 * (3) intermittent - may have originated from the cpu or upa/safari bus, 4292 * and does not necessarily indicate any problem with the dimm itself, 4293 * is critical information for debugging new hardware. 4294 * Because we do not know if it came from the dimm, it would be 4295 * inappropriate to include these in the leaky bucket counts. 4296 * 4297 * If the E$ line was modified before the scrub operation began, then the 4298 * displacement flush at the beginning of scrubphys() will cause the modified 4299 * line to be written out, which will clean up the CE. Then, any subsequent 4300 * read will not cause an error, which will cause persistent errors to be 4301 * identified as intermittent. 4302 * 4303 * If a DIMM is going bad, it will produce true persistents as well as 4304 * false intermittents, so these intermittents can be safely ignored. 4305 * 4306 * If the error count is excessive for a DIMM, this function will return 4307 * PR_MCE, and the CPU module may then decide to remove that page from use. 4308 */ 4309 static int 4310 ce_count_unum(int status, int len, char *unum) 4311 { 4312 int i; 4313 struct ce_info *psimm = mem_ce_simm; 4314 int page_status = PR_OK; 4315 4316 ASSERT(psimm != NULL); 4317 4318 if (len <= 0 || 4319 (status & (ECC_STICKY | ECC_PERSISTENT | ECC_INTERMITTENT)) == 0) 4320 return (page_status); 4321 4322 /* 4323 * Initialize the leaky_bucket timeout 4324 */ 4325 if (casptr(&leaky_bucket_timeout_id, 4326 TIMEOUT_NONE, TIMEOUT_SET) == TIMEOUT_NONE) 4327 add_leaky_bucket_timeout(); 4328 4329 for (i = 0; i < mem_ce_simm_size; i++) { 4330 if (psimm[i].name[0] == '\0') { 4331 /* 4332 * Hit the end of the valid entries, add 4333 * a new one. 4334 */ 4335 (void) strncpy(psimm[i].name, unum, len); 4336 if (status & ECC_STICKY) { 4337 /* 4338 * Sticky - the leaky bucket is used to track 4339 * soft errors. Since a sticky error is a 4340 * hard error and likely to be retired soon, 4341 * we do not count it in the leaky bucket. 4342 */ 4343 psimm[i].leaky_bucket_cnt = 0; 4344 psimm[i].intermittent_total = 0; 4345 psimm[i].persistent_total = 0; 4346 psimm[i].sticky_total = 1; 4347 cmn_err(CE_WARN, 4348 "[AFT0] Sticky Softerror encountered " 4349 "on Memory Module %s\n", unum); 4350 page_status = PR_MCE; 4351 } else if (status & ECC_PERSISTENT) { 4352 psimm[i].leaky_bucket_cnt = 1; 4353 psimm[i].intermittent_total = 0; 4354 psimm[i].persistent_total = 1; 4355 psimm[i].sticky_total = 0; 4356 } else { 4357 /* 4358 * Intermittent - Because the scrub operation 4359 * cannot find the error in the DIMM, we will 4360 * not count these in the leaky bucket 4361 */ 4362 psimm[i].leaky_bucket_cnt = 0; 4363 psimm[i].intermittent_total = 1; 4364 psimm[i].persistent_total = 0; 4365 psimm[i].sticky_total = 0; 4366 } 4367 ecc_error_info_data.count.value.ui32++; 4368 break; 4369 } else if (strncmp(unum, psimm[i].name, len) == 0) { 4370 /* 4371 * Found an existing entry for the current 4372 * memory module, adjust the counts. 4373 */ 4374 if (status & ECC_STICKY) { 4375 psimm[i].sticky_total++; 4376 cmn_err(CE_WARN, 4377 "[AFT0] Sticky Softerror encountered " 4378 "on Memory Module %s\n", unum); 4379 page_status = PR_MCE; 4380 } else if (status & ECC_PERSISTENT) { 4381 int new_value; 4382 4383 new_value = atomic_add_16_nv( 4384 &psimm[i].leaky_bucket_cnt, 1); 4385 psimm[i].persistent_total++; 4386 if (new_value > ecc_softerr_limit) { 4387 cmn_err(CE_WARN, "[AFT0] Most recent %d" 4388 " soft errors from Memory Module" 4389 " %s exceed threshold (N=%d," 4390 " T=%dh:%02dm) triggering page" 4391 " retire", new_value, unum, 4392 ecc_softerr_limit, 4393 ecc_softerr_interval / 60, 4394 ecc_softerr_interval % 60); 4395 atomic_add_16( 4396 &psimm[i].leaky_bucket_cnt, -1); 4397 page_status = PR_MCE; 4398 } 4399 } else { /* Intermittent */ 4400 psimm[i].intermittent_total++; 4401 } 4402 break; 4403 } 4404 } 4405 4406 if (i >= mem_ce_simm_size) 4407 cmn_err(CE_CONT, "[AFT0] Softerror: mem_ce_simm[] out of " 4408 "space.\n"); 4409 4410 return (page_status); 4411 } 4412 4413 /* 4414 * Function to support counting of IO detected CEs. 4415 */ 4416 void 4417 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum) 4418 { 4419 int err; 4420 4421 err = ce_count_unum(ecc->flt_status, len, unum); 4422 if (err != PR_OK && automatic_page_removal) { 4423 (void) page_retire(ecc->flt_addr, err); 4424 } 4425 } 4426 4427 static int 4428 ecc_kstat_update(kstat_t *ksp, int rw) 4429 { 4430 struct kstat_ecc_mm_info *kceip = ksp->ks_data; 4431 struct ce_info *ceip = mem_ce_simm; 4432 int i = ksp->ks_instance; 4433 4434 if (rw == KSTAT_WRITE) 4435 return (EACCES); 4436 4437 ASSERT(ksp->ks_data != NULL); 4438 ASSERT(i < mem_ce_simm_size && i >= 0); 4439 4440 /* 4441 * Since we're not using locks, make sure that we don't get partial 4442 * data. The name is always copied before the counters are incremented 4443 * so only do this update routine if at least one of the counters is 4444 * non-zero, which ensures that ce_count_unum() is done, and the 4445 * string is fully copied. 4446 */ 4447 if (ceip[i].intermittent_total == 0 && 4448 ceip[i].persistent_total == 0 && 4449 ceip[i].sticky_total == 0) { 4450 /* 4451 * Uninitialized or partially initialized. Ignore. 4452 * The ks_data buffer was allocated via kmem_zalloc, 4453 * so no need to bzero it. 4454 */ 4455 return (0); 4456 } 4457 4458 kstat_named_setstr(&kceip->name, ceip[i].name); 4459 kceip->intermittent_total.value.ui64 = ceip[i].intermittent_total; 4460 kceip->persistent_total.value.ui64 = ceip[i].persistent_total; 4461 kceip->sticky_total.value.ui64 = ceip[i].sticky_total; 4462 4463 return (0); 4464 } 4465 4466 #define VIS_BLOCKSIZE 64 4467 4468 int 4469 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data) 4470 { 4471 int ret, watched; 4472 4473 watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 4474 ret = dtrace_blksuword32(addr, data, 0); 4475 if (watched) 4476 watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 4477 4478 return (ret); 4479 } 4480 4481 /*ARGSUSED*/ 4482 void 4483 cpu_faulted_enter(struct cpu *cp) 4484 { 4485 } 4486 4487 /*ARGSUSED*/ 4488 void 4489 cpu_faulted_exit(struct cpu *cp) 4490 { 4491 } 4492 4493 static int mmu_disable_ism_large_pages = ((1 << TTE512K) | 4494 (1 << TTE32M) | (1 << TTE256M)); 4495 static int mmu_disable_large_pages = ((1 << TTE32M) | (1 << TTE256M)); 4496 4497 /* 4498 * The function returns the US_II mmu-specific values for the 4499 * hat's disable_large_pages and disable_ism_large_pages variables. 4500 */ 4501 int 4502 mmu_large_pages_disabled(uint_t flag) 4503 { 4504 int pages_disable = 0; 4505 4506 if (flag == HAT_LOAD) { 4507 pages_disable = mmu_disable_large_pages; 4508 } else if (flag == HAT_LOAD_SHARE) { 4509 pages_disable = mmu_disable_ism_large_pages; 4510 } 4511 return (pages_disable); 4512 } 4513 4514 /*ARGSUSED*/ 4515 void 4516 mmu_init_kernel_pgsz(struct hat *hat) 4517 { 4518 } 4519 4520 size_t 4521 mmu_get_kernel_lpsize(size_t lpsize) 4522 { 4523 uint_t tte; 4524 4525 if (lpsize == 0) { 4526 /* no setting for segkmem_lpsize in /etc/system: use default */ 4527 return (MMU_PAGESIZE4M); 4528 } 4529 4530 for (tte = TTE8K; tte <= TTE4M; tte++) { 4531 if (lpsize == TTEBYTES(tte)) 4532 return (lpsize); 4533 } 4534 4535 return (TTEBYTES(TTE8K)); 4536 } 4537