1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/archsystm.h> 32 #include <sys/machparam.h> 33 #include <sys/machsystm.h> 34 #include <sys/cpu.h> 35 #include <sys/elf_SPARC.h> 36 #include <vm/hat_sfmmu.h> 37 #include <vm/page.h> 38 #include <sys/cpuvar.h> 39 #include <sys/spitregs.h> 40 #include <sys/async.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/dditypes.h> 44 #include <sys/sunddi.h> 45 #include <sys/cpu_module.h> 46 #include <sys/prom_debug.h> 47 #include <sys/vmsystm.h> 48 #include <sys/prom_plat.h> 49 #include <sys/sysmacros.h> 50 #include <sys/intreg.h> 51 #include <sys/machtrap.h> 52 #include <sys/ontrap.h> 53 #include <sys/ivintr.h> 54 #include <sys/atomic.h> 55 #include <sys/panic.h> 56 #include <sys/ndifm.h> 57 #include <sys/fm/protocol.h> 58 #include <sys/fm/util.h> 59 #include <sys/fm/cpu/UltraSPARC-II.h> 60 #include <sys/ddi.h> 61 #include <sys/ecc_kstat.h> 62 #include <sys/watchpoint.h> 63 #include <sys/dtrace.h> 64 #include <sys/errclassify.h> 65 66 uchar_t *ctx_pgsz_array = NULL; 67 68 /* 69 * Structure for the 8 byte ecache data dump and the associated AFSR state. 70 * There will be 8 of these structures used to dump an ecache line (64 bytes). 71 */ 72 typedef struct sf_ec_data_elm { 73 uint64_t ec_d8; 74 uint64_t ec_afsr; 75 } ec_data_t; 76 77 /* 78 * Define spitfire (Ultra I/II) specific asynchronous error structure 79 */ 80 typedef struct spitfire_async_flt { 81 struct async_flt cmn_asyncflt; /* common - see sun4u/sys/async.h */ 82 ushort_t flt_type; /* types of faults - cpu specific */ 83 ec_data_t flt_ec_data[8]; /* for E$ or mem dump/state */ 84 uint64_t flt_ec_tag; /* E$ tag info */ 85 int flt_ec_lcnt; /* number of bad E$ lines */ 86 ushort_t flt_sdbh; /* UDBH reg */ 87 ushort_t flt_sdbl; /* UDBL reg */ 88 } spitf_async_flt; 89 90 /* 91 * Prototypes for support routines in spitfire_asm.s: 92 */ 93 extern void flush_ecache(uint64_t physaddr, size_t size, size_t linesize); 94 extern uint64_t get_lsu(void); 95 extern void set_lsu(uint64_t ncc); 96 extern void get_ecache_dtag(uint32_t ecache_idx, uint64_t *data, uint64_t *tag, 97 uint64_t *oafsr, uint64_t *acc_afsr); 98 extern uint64_t check_ecache_line(uint32_t id, uint64_t *acc_afsr); 99 extern uint64_t get_ecache_tag(uint32_t id, uint64_t *nafsr, 100 uint64_t *acc_afsr); 101 extern uint64_t read_and_clear_afsr(); 102 extern void write_ec_tag_parity(uint32_t id); 103 extern void write_hb_ec_tag_parity(uint32_t id); 104 105 /* 106 * Spitfire module routines: 107 */ 108 static void cpu_async_log_err(void *flt); 109 /*PRINTFLIKE6*/ 110 static void cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt, 111 uint_t logflags, const char *endstr, const char *fmt, ...); 112 113 static void cpu_read_paddr(struct async_flt *aflt, short verbose, short ce_err); 114 static void cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum); 115 static void cpu_log_ecmem_info(spitf_async_flt *spf_flt); 116 117 static void log_ce_err(struct async_flt *aflt, char *unum); 118 static void log_ue_err(struct async_flt *aflt, char *unum); 119 static void check_misc_err(spitf_async_flt *spf_flt); 120 static ushort_t ecc_gen(uint_t high_bytes, uint_t low_bytes); 121 static int check_ecc(struct async_flt *aflt); 122 static uint_t get_cpu_status(uint64_t arg); 123 static uint64_t clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr); 124 static void scan_ecache(uint64_t *afar, ec_data_t *data, uint64_t *tag, 125 int *m, uint64_t *afsr); 126 static void ecache_kstat_init(struct cpu *cp); 127 static void ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag, 128 uint64_t paddr, int mpb, uint64_t); 129 static uint64_t ecache_scrub_misc_err(int, uint64_t); 130 static void ecache_scrub_tag_err(uint64_t, uchar_t, uint32_t); 131 static void ecache_page_retire(void *); 132 static int ecc_kstat_update(kstat_t *ksp, int rw); 133 static int ce_count_unum(int status, int len, char *unum); 134 static void add_leaky_bucket_timeout(void); 135 static int synd_to_synd_code(int synd_status, ushort_t synd); 136 137 extern uint_t read_all_memscrub; 138 extern void memscrub_run(void); 139 140 static uchar_t isus2i; /* set if sabre */ 141 static uchar_t isus2e; /* set if hummingbird */ 142 143 /* 144 * Default ecache mask and shift settings for Spitfire. If we detect a 145 * different CPU implementation, we will modify these values at boot time. 146 */ 147 static uint64_t cpu_ec_tag_mask = S_ECTAG_MASK; 148 static uint64_t cpu_ec_state_mask = S_ECSTATE_MASK; 149 static uint64_t cpu_ec_par_mask = S_ECPAR_MASK; 150 static int cpu_ec_par_shift = S_ECPAR_SHIFT; 151 static int cpu_ec_tag_shift = S_ECTAG_SHIFT; 152 static int cpu_ec_state_shift = S_ECSTATE_SHIFT; 153 static uchar_t cpu_ec_state_exl = S_ECSTATE_EXL; 154 static uchar_t cpu_ec_state_mod = S_ECSTATE_MOD; 155 static uchar_t cpu_ec_state_shr = S_ECSTATE_SHR; 156 static uchar_t cpu_ec_state_own = S_ECSTATE_OWN; 157 158 /* 159 * Default ecache state bits for Spitfire. These individual bits indicate if 160 * the given line is in any of the valid or modified states, respectively. 161 * Again, we modify these at boot if we detect a different CPU. 162 */ 163 static uchar_t cpu_ec_state_valid = S_ECSTATE_VALID; 164 static uchar_t cpu_ec_state_dirty = S_ECSTATE_DIRTY; 165 static uchar_t cpu_ec_parity = S_EC_PARITY; 166 static uchar_t cpu_ec_state_parity = S_ECSTATE_PARITY; 167 168 /* 169 * This table is used to determine which bit(s) is(are) bad when an ECC 170 * error occurrs. The array is indexed an 8-bit syndrome. The entries 171 * of this array have the following semantics: 172 * 173 * 00-63 The number of the bad bit, when only one bit is bad. 174 * 64 ECC bit C0 is bad. 175 * 65 ECC bit C1 is bad. 176 * 66 ECC bit C2 is bad. 177 * 67 ECC bit C3 is bad. 178 * 68 ECC bit C4 is bad. 179 * 69 ECC bit C5 is bad. 180 * 70 ECC bit C6 is bad. 181 * 71 ECC bit C7 is bad. 182 * 72 Two bits are bad. 183 * 73 Three bits are bad. 184 * 74 Four bits are bad. 185 * 75 More than Four bits are bad. 186 * 76 NO bits are bad. 187 * Based on "Galaxy Memory Subsystem SPECIFICATION" rev 0.6, pg. 28. 188 */ 189 190 #define C0 64 191 #define C1 65 192 #define C2 66 193 #define C3 67 194 #define C4 68 195 #define C5 69 196 #define C6 70 197 #define C7 71 198 #define M2 72 199 #define M3 73 200 #define M4 74 201 #define MX 75 202 #define NA 76 203 204 #define SYND_IS_SINGLE_BIT_DATA(synd_code) ((synd_code >= 0) && \ 205 (synd_code < C0)) 206 #define SYND_IS_SINGLE_BIT_CHK(synd_code) ((synd_code >= C0) && \ 207 (synd_code <= C7)) 208 209 static char ecc_syndrome_tab[] = 210 { 211 NA, C0, C1, M2, C2, M2, M2, M3, C3, M2, M2, M3, M2, M3, M3, M4, 212 C4, M2, M2, 32, M2, 57, MX, M2, M2, 37, 49, M2, 40, M2, M2, 44, 213 C5, M2, M2, 33, M2, 61, 4, M2, M2, MX, 53, M2, 45, M2, M2, 41, 214 M2, 0, 1, M2, 10, M2, M2, MX, 15, M2, M2, MX, M2, M3, M3, M2, 215 C6, M2, M2, 42, M2, 59, 39, M2, M2, MX, 51, M2, 34, M2, M2, 46, 216 M2, 25, 29, M2, 27, M4, M2, MX, 31, M2, M4, MX, M2, MX, MX, M2, 217 M2, MX, 36, M2, 7, M2, M2, 54, MX, M2, M2, 62, M2, 48, 56, M2, 218 M3, M2, M2, MX, M2, MX, 22, M2, M2, 18, MX, M2, M3, M2, M2, MX, 219 C7, M2, M2, 47, M2, 63, MX, M2, M2, 6, 55, M2, 35, M2, M2, 43, 220 M2, 5, MX, M2, MX, M2, M2, 50, 38, M2, M2, 58, M2, 52, 60, M2, 221 M2, 17, 21, M2, 19, M4, M2, MX, 23, M2, M4, MX, M2, MX, MX, M2, 222 M3, M2, M2, MX, M2, MX, 30, M2, M2, 26, MX, M2, M3, M2, M2, MX, 223 M2, 8, 13, M2, 2, M2, M2, M3, 3, M2, M2, M3, M2, MX, MX, M2, 224 M3, M2, M2, M3, M2, MX, 16, M2, M2, 20, MX, M2, MX, M2, M2, MX, 225 M3, M2, M2, M3, M2, MX, 24, M2, M2, 28, MX, M2, MX, M2, M2, MX, 226 M4, 12, 9, M2, 14, M2, M2, MX, 11, M2, M2, MX, M2, MX, MX, M4 227 }; 228 229 #define SYND_TBL_SIZE 256 230 231 /* 232 * Hack for determining UDBH/UDBL, for later cpu-specific error reporting. 233 * Cannot use bit 3 in afar, because it is a valid bit on a Sabre/Hummingbird. 234 */ 235 #define UDBL_REG 0x8000 236 #define UDBL(synd) ((synd & UDBL_REG) >> 15) 237 #define SYND(synd) (synd & 0x7FFF) 238 239 /* 240 * These error types are specific to Spitfire and are used internally for the 241 * spitfire fault structure flt_type field. 242 */ 243 #define CPU_UE_ERR 0 /* uncorrectable errors - UEs */ 244 #define CPU_EDP_LDP_ERR 1 /* LDP or EDP parity error */ 245 #define CPU_WP_ERR 2 /* WP parity error */ 246 #define CPU_BTO_BERR_ERR 3 /* bus timeout errors */ 247 #define CPU_PANIC_CP_ERR 4 /* cp error from panic polling */ 248 #define CPU_TRAPPING_CP_ERR 5 /* for sabre/hbird only, cp error */ 249 #define CPU_BADLINE_CI_ERR 6 /* E$ clean_bad line when idle */ 250 #define CPU_BADLINE_CB_ERR 7 /* E$ clean_bad line when busy */ 251 #define CPU_BADLINE_DI_ERR 8 /* E$ dirty_bad line when idle */ 252 #define CPU_BADLINE_DB_ERR 9 /* E$ dirty_bad line when busy */ 253 #define CPU_ORPHAN_CP_ERR 10 /* Orphan CP error */ 254 #define CPU_ECACHE_ADDR_PAR_ERR 11 /* Ecache Address parity error */ 255 #define CPU_ECACHE_STATE_ERR 12 /* Ecache state error */ 256 #define CPU_ECACHE_ETP_ETS_ERR 13 /* ETP set but ETS is zero */ 257 #define CPU_ECACHE_TAG_ERR 14 /* Scrub the E$ tag, if state clean */ 258 #define CPU_ADDITIONAL_ERR 15 /* Additional errors occurred */ 259 260 /* 261 * Macro to access the "Spitfire cpu private" data structure. 262 */ 263 #define CPU_PRIVATE_PTR(cp, x) (&(((spitfire_private_t *)CPU_PRIVATE(cp))->x)) 264 265 /* 266 * set to 0 to disable automatic retiring of pages on 267 * DIMMs that have excessive soft errors 268 */ 269 int automatic_page_removal = 1; 270 271 /* 272 * Heuristic for figuring out which module to replace. 273 * Relative likelihood that this P_SYND indicates that this module is bad. 274 * We call it a "score", though, not a relative likelihood. 275 * 276 * Step 1. 277 * Assign a score to each byte of P_SYND according to the following rules: 278 * If no bits on (0x00) or all bits on (0xFF), then give it a 5. 279 * If one bit on, give it a 95. 280 * If seven bits on, give it a 10. 281 * If two bits on: 282 * in different nybbles, a 90 283 * in same nybble, but unaligned, 85 284 * in same nybble and as an aligned pair, 80 285 * If six bits on, look at the bits that are off: 286 * in same nybble and as an aligned pair, 15 287 * in same nybble, but unaligned, 20 288 * in different nybbles, a 25 289 * If three bits on: 290 * in diferent nybbles, no aligned pairs, 75 291 * in diferent nybbles, one aligned pair, 70 292 * in the same nybble, 65 293 * If five bits on, look at the bits that are off: 294 * in the same nybble, 30 295 * in diferent nybbles, one aligned pair, 35 296 * in diferent nybbles, no aligned pairs, 40 297 * If four bits on: 298 * all in one nybble, 45 299 * as two aligned pairs, 50 300 * one aligned pair, 55 301 * no aligned pairs, 60 302 * 303 * Step 2: 304 * Take the higher of the two scores (one for each byte) as the score 305 * for the module. 306 * 307 * Print the score for each module, and field service should replace the 308 * module with the highest score. 309 */ 310 311 /* 312 * In the table below, the first row/column comment indicates the 313 * number of bits on in that nybble; the second row/column comment is 314 * the hex digit. 315 */ 316 317 static int 318 p_synd_score_table[256] = { 319 /* 0 1 1 2 1 2 2 3 1 2 2 3 2 3 3 4 */ 320 /* 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F */ 321 /* 0 0 */ 5, 95, 95, 80, 95, 85, 85, 65, 95, 85, 85, 65, 80, 65, 65, 45, 322 /* 1 1 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 323 /* 1 2 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 324 /* 2 3 */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15, 325 /* 1 4 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 326 /* 2 5 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 327 /* 2 6 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 328 /* 3 7 */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 329 /* 1 8 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30, 330 /* 2 9 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 331 /* 2 A */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20, 332 /* 3 B */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 333 /* 2 C */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15, 334 /* 3 D */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 335 /* 3 E */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10, 336 /* 4 F */ 45, 30, 30, 15, 30, 20, 20, 10, 30, 20, 20, 10, 15, 10, 10, 5, 337 }; 338 339 int 340 ecc_psynd_score(ushort_t p_synd) 341 { 342 int i, j, a, b; 343 344 i = p_synd & 0xFF; 345 j = (p_synd >> 8) & 0xFF; 346 347 a = p_synd_score_table[i]; 348 b = p_synd_score_table[j]; 349 350 return (a > b ? a : b); 351 } 352 353 /* 354 * Async Fault Logging 355 * 356 * To ease identifying, reading, and filtering async fault log messages, the 357 * label [AFT#] is now prepended to each async fault message. These messages 358 * and the logging rules are implemented by cpu_aflt_log(), below. 359 * 360 * [AFT0] - Tag for log messages that are associated with corrected ECC errors. 361 * This includes both corrected ECC memory and ecache faults. 362 * 363 * [AFT1] - Tag for log messages that are not ECC corrected (i.e. everything 364 * else except CE errors) with a priority of 1 (highest). This tag 365 * is also used for panic messages that result from an async fault. 366 * 367 * [AFT2] - These are lower priority diagnostic messages for uncorrected ECC 368 * [AFT3] or parity errors. For example, AFT2 is used for the actual dump 369 * of the E-$ data and tags. 370 * 371 * In a non-DEBUG kernel, AFT > 1 logs will be sent to the system log but not 372 * printed on the console. To send all AFT logs to both the log and the 373 * console, set aft_verbose = 1. 374 */ 375 376 #define CPU_FLTCPU 0x0001 /* print flt_inst as a CPU id */ 377 #define CPU_SPACE 0x0002 /* print flt_status (data or instr) */ 378 #define CPU_ERRID 0x0004 /* print flt_id */ 379 #define CPU_TL 0x0008 /* print flt_tl */ 380 #define CPU_ERRID_FIRST 0x0010 /* print flt_id first in message */ 381 #define CPU_AFSR 0x0020 /* print flt_stat as decoded %afsr */ 382 #define CPU_AFAR 0x0040 /* print flt_addr as %afar */ 383 #define CPU_AF_PSYND 0x0080 /* print flt_stat %afsr.PSYND */ 384 #define CPU_AF_ETS 0x0100 /* print flt_stat %afsr.ETS */ 385 #define CPU_UDBH 0x0200 /* print flt_sdbh and syndrome */ 386 #define CPU_UDBL 0x0400 /* print flt_sdbl and syndrome */ 387 #define CPU_FAULTPC 0x0800 /* print flt_pc */ 388 #define CPU_SYND 0x1000 /* print flt_synd and unum */ 389 390 #define CMN_LFLAGS (CPU_FLTCPU | CPU_SPACE | CPU_ERRID | CPU_TL | \ 391 CPU_AFSR | CPU_AFAR | CPU_AF_PSYND | \ 392 CPU_AF_ETS | CPU_UDBH | CPU_UDBL | \ 393 CPU_FAULTPC) 394 #define UE_LFLAGS (CMN_LFLAGS | CPU_SYND) 395 #define CE_LFLAGS (UE_LFLAGS & ~CPU_UDBH & ~CPU_UDBL & ~CPU_TL & \ 396 ~CPU_SPACE) 397 #define PARERR_LFLAGS (CMN_LFLAGS) 398 #define WP_LFLAGS (CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL) 399 #define CP_LFLAGS (CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL & \ 400 ~CPU_FLTCPU & ~CPU_FAULTPC) 401 #define BERRTO_LFLAGS (CMN_LFLAGS) 402 #define NO_LFLAGS (0) 403 404 #define AFSR_FMTSTR0 "\020\1ME" 405 #define AFSR_FMTSTR1 "\020\040PRIV\037ISAP\036ETP\035IVUE\034TO" \ 406 "\033BERR\032LDP\031CP\030WP\027EDP\026UE\025CE" 407 #define UDB_FMTSTR "\020\012UE\011CE" 408 409 /* 410 * Maximum number of contexts for Spitfire. 411 */ 412 #define MAX_NCTXS (1 << 13) 413 414 /* 415 * Save the cache bootup state for use when internal 416 * caches are to be re-enabled after an error occurs. 417 */ 418 uint64_t cache_boot_state = 0; 419 420 /* 421 * PA[31:0] represent Displacement in UPA configuration space. 422 */ 423 uint_t root_phys_addr_lo_mask = 0xffffffff; 424 425 /* 426 * Spitfire legacy globals 427 */ 428 int itlb_entries; 429 int dtlb_entries; 430 431 void 432 cpu_setup(void) 433 { 434 extern int page_retire_messages; 435 extern int at_flags; 436 #if defined(SF_ERRATA_57) 437 extern caddr_t errata57_limit; 438 #endif 439 extern int disable_text_largepages; 440 extern int disable_initdata_largepages; 441 442 cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT); 443 444 at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1; 445 446 /* 447 * Spitfire isn't currently FMA-aware, so we have to enable the 448 * page retirement messages. 449 */ 450 page_retire_messages = 1; 451 452 /* 453 * save the cache bootup state. 454 */ 455 cache_boot_state = get_lsu() & (LSU_IC | LSU_DC); 456 457 /* 458 * Use the maximum number of contexts available for Spitfire unless 459 * it has been tuned for debugging. 460 * We are checking against 0 here since this value can be patched 461 * while booting. It can not be patched via /etc/system since it 462 * will be patched too late and thus cause the system to panic. 463 */ 464 if (nctxs == 0) 465 nctxs = MAX_NCTXS; 466 467 if (use_page_coloring) { 468 do_pg_coloring = 1; 469 if (use_virtual_coloring) 470 do_virtual_coloring = 1; 471 } 472 473 /* 474 * Tune pp_slots to use up to 1/8th of the tlb entries. 475 */ 476 pp_slots = MIN(8, MAXPP_SLOTS); 477 478 /* 479 * Block stores invalidate all pages of the d$ so pagecopy 480 * et. al. do not need virtual translations with virtual 481 * coloring taken into consideration. 482 */ 483 pp_consistent_coloring = 0; 484 485 isa_list = 486 "sparcv9+vis sparcv9 " 487 "sparcv8plus+vis sparcv8plus " 488 "sparcv8 sparcv8-fsmuld sparcv7 sparc"; 489 490 cpu_hwcap_flags = AV_SPARC_VIS; 491 492 /* 493 * On Spitfire, there's a hole in the address space 494 * that we must never map (the hardware only support 44-bits of 495 * virtual address). Later CPUs are expected to have wider 496 * supported address ranges. 497 * 498 * See address map on p23 of the UltraSPARC 1 user's manual. 499 */ 500 hole_start = (caddr_t)0x80000000000ull; 501 hole_end = (caddr_t)0xfffff80000000000ull; 502 503 /* 504 * A spitfire call bug requires us to be a further 4Gbytes of 505 * firewall from the spec. 506 * 507 * See Spitfire Errata #21 508 */ 509 hole_start = (caddr_t)((uintptr_t)hole_start - (1ul << 32)); 510 hole_end = (caddr_t)((uintptr_t)hole_end + (1ul << 32)); 511 512 /* 513 * The kpm mapping window. 514 * kpm_size: 515 * The size of a single kpm range. 516 * The overall size will be: kpm_size * vac_colors. 517 * kpm_vbase: 518 * The virtual start address of the kpm range within the kernel 519 * virtual address space. kpm_vbase has to be kpm_size aligned. 520 */ 521 kpm_size = (size_t)(2ull * 1024 * 1024 * 1024 * 1024); /* 2TB */ 522 kpm_size_shift = 41; 523 kpm_vbase = (caddr_t)0xfffffa0000000000ull; /* 16EB - 6TB */ 524 525 #if defined(SF_ERRATA_57) 526 errata57_limit = (caddr_t)0x80000000ul; 527 #endif 528 529 /* 530 * Allow only 8K, 64K and 4M pages for text by default. 531 * Allow only 8K and 64K page for initialized data segments by 532 * default. 533 */ 534 disable_text_largepages = (1 << TTE512K) | (1 << TTE32M) | 535 (1 << TTE256M); 536 disable_initdata_largepages = (1 << TTE512K) | (1 << TTE4M) | 537 (1 << TTE32M) | (1 << TTE256M); 538 } 539 540 static int 541 getintprop(pnode_t node, char *name, int deflt) 542 { 543 int value; 544 545 switch (prom_getproplen(node, name)) { 546 case 0: 547 value = 1; /* boolean properties */ 548 break; 549 550 case sizeof (int): 551 (void) prom_getprop(node, name, (caddr_t)&value); 552 break; 553 554 default: 555 value = deflt; 556 break; 557 } 558 559 return (value); 560 } 561 562 /* 563 * Set the magic constants of the implementation. 564 */ 565 void 566 cpu_fiximp(pnode_t dnode) 567 { 568 extern int vac_size, vac_shift; 569 extern uint_t vac_mask; 570 extern int dcache_line_mask; 571 int i, a; 572 static struct { 573 char *name; 574 int *var; 575 } prop[] = { 576 "dcache-size", &dcache_size, 577 "dcache-line-size", &dcache_linesize, 578 "icache-size", &icache_size, 579 "icache-line-size", &icache_linesize, 580 "ecache-size", &ecache_size, 581 "ecache-line-size", &ecache_alignsize, 582 "ecache-associativity", &ecache_associativity, 583 "#itlb-entries", &itlb_entries, 584 "#dtlb-entries", &dtlb_entries, 585 }; 586 587 for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) { 588 if ((a = getintprop(dnode, prop[i].name, -1)) != -1) { 589 *prop[i].var = a; 590 } 591 } 592 593 ecache_setsize = ecache_size / ecache_associativity; 594 595 vac_size = S_VAC_SIZE; 596 vac_mask = MMU_PAGEMASK & (vac_size - 1); 597 i = 0; a = vac_size; 598 while (a >>= 1) 599 ++i; 600 vac_shift = i; 601 shm_alignment = vac_size; 602 vac = 1; 603 604 dcache_line_mask = (dcache_size - 1) & ~(dcache_linesize - 1); 605 606 /* 607 * UltraSPARC I & II have ecache sizes running 608 * as follows: .25 MB, .5 MB, 1 MB, 2 MB, 4 MB 609 * and 8 MB. Adjust the copyin/copyout limits 610 * according to the cache size. The magic number 611 * of VIS_COPY_THRESHOLD comes from the copyin/copyout code 612 * and its floor of VIS_COPY_THRESHOLD bytes before it will use 613 * VIS instructions. 614 * 615 * We assume that all CPUs on the system have the same size 616 * ecache. We're also called very early in the game. 617 * /etc/system will be parsed *after* we're called so 618 * these values can be overwritten. 619 */ 620 621 hw_copy_limit_1 = VIS_COPY_THRESHOLD; 622 if (ecache_size <= 524288) { 623 hw_copy_limit_2 = VIS_COPY_THRESHOLD; 624 hw_copy_limit_4 = VIS_COPY_THRESHOLD; 625 hw_copy_limit_8 = VIS_COPY_THRESHOLD; 626 } else if (ecache_size == 1048576) { 627 hw_copy_limit_2 = 1024; 628 hw_copy_limit_4 = 1280; 629 hw_copy_limit_8 = 1536; 630 } else if (ecache_size == 2097152) { 631 hw_copy_limit_2 = 1536; 632 hw_copy_limit_4 = 2048; 633 hw_copy_limit_8 = 2560; 634 } else if (ecache_size == 4194304) { 635 hw_copy_limit_2 = 2048; 636 hw_copy_limit_4 = 2560; 637 hw_copy_limit_8 = 3072; 638 } else { 639 hw_copy_limit_2 = 2560; 640 hw_copy_limit_4 = 3072; 641 hw_copy_limit_8 = 3584; 642 } 643 } 644 645 /* 646 * Called by setcpudelay 647 */ 648 void 649 cpu_init_tick_freq(void) 650 { 651 /* 652 * Determine the cpu frequency by calling 653 * tod_get_cpufrequency. Use an approximate freqency 654 * value computed by the prom if the tod module 655 * is not initialized and loaded yet. 656 */ 657 if (tod_ops.tod_get_cpufrequency != NULL) { 658 mutex_enter(&tod_lock); 659 sys_tick_freq = tod_ops.tod_get_cpufrequency(); 660 mutex_exit(&tod_lock); 661 } else { 662 #if defined(HUMMINGBIRD) 663 /* 664 * the hummingbird version of %stick is used as the basis for 665 * low level timing; this provides an independent constant-rate 666 * clock for general system use, and frees power mgmt to set 667 * various cpu clock speeds. 668 */ 669 if (system_clock_freq == 0) 670 cmn_err(CE_PANIC, "invalid system_clock_freq 0x%lx", 671 system_clock_freq); 672 sys_tick_freq = system_clock_freq; 673 #else /* SPITFIRE */ 674 sys_tick_freq = cpunodes[CPU->cpu_id].clock_freq; 675 #endif 676 } 677 } 678 679 680 void shipit(int upaid); 681 extern uint64_t xc_tick_limit; 682 extern uint64_t xc_tick_jump_limit; 683 684 #ifdef SEND_MONDO_STATS 685 uint64_t x_early[NCPU][64]; 686 #endif 687 688 /* 689 * Note: A version of this function is used by the debugger via the KDI, 690 * and must be kept in sync with this version. Any changes made to this 691 * function to support new chips or to accomodate errata must also be included 692 * in the KDI-specific version. See spitfire_kdi.c. 693 */ 694 void 695 send_one_mondo(int cpuid) 696 { 697 uint64_t idsr, starttick, endtick; 698 int upaid, busy, nack; 699 uint64_t tick, tick_prev; 700 ulong_t ticks; 701 702 CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 703 upaid = CPUID_TO_UPAID(cpuid); 704 tick = starttick = gettick(); 705 shipit(upaid); 706 endtick = starttick + xc_tick_limit; 707 busy = nack = 0; 708 for (;;) { 709 idsr = getidsr(); 710 if (idsr == 0) 711 break; 712 /* 713 * When we detect an irregular tick jump, we adjust 714 * the timer window to the current tick value. 715 */ 716 tick_prev = tick; 717 tick = gettick(); 718 ticks = tick - tick_prev; 719 if (ticks > xc_tick_jump_limit) { 720 endtick = tick + xc_tick_limit; 721 } else if (tick > endtick) { 722 if (panic_quiesce) 723 return; 724 cmn_err(CE_PANIC, 725 "send mondo timeout (target 0x%x) [%d NACK %d BUSY]", 726 upaid, nack, busy); 727 } 728 if (idsr & IDSR_BUSY) { 729 busy++; 730 continue; 731 } 732 drv_usecwait(1); 733 shipit(upaid); 734 nack++; 735 busy = 0; 736 } 737 #ifdef SEND_MONDO_STATS 738 x_early[getprocessorid()][highbit(gettick() - starttick) - 1]++; 739 #endif 740 } 741 742 void 743 send_mondo_set(cpuset_t set) 744 { 745 int i; 746 747 for (i = 0; i < NCPU; i++) 748 if (CPU_IN_SET(set, i)) { 749 send_one_mondo(i); 750 CPUSET_DEL(set, i); 751 if (CPUSET_ISNULL(set)) 752 break; 753 } 754 } 755 756 void 757 syncfpu(void) 758 { 759 } 760 761 /* 762 * Determine the size of the CPU module's error structure in bytes. This is 763 * called once during boot to initialize the error queues. 764 */ 765 int 766 cpu_aflt_size(void) 767 { 768 /* 769 * We need to determine whether this is a sabre, Hummingbird or a 770 * Spitfire/Blackbird impl and set the appropriate state variables for 771 * ecache tag manipulation. We can't do this in cpu_setup() as it is 772 * too early in the boot flow and the cpunodes are not initialized. 773 * This routine will be called once after cpunodes[] is ready, so do 774 * it here. 775 */ 776 if (cpunodes[CPU->cpu_id].implementation == SABRE_IMPL) { 777 isus2i = 1; 778 cpu_ec_tag_mask = SB_ECTAG_MASK; 779 cpu_ec_state_mask = SB_ECSTATE_MASK; 780 cpu_ec_par_mask = SB_ECPAR_MASK; 781 cpu_ec_par_shift = SB_ECPAR_SHIFT; 782 cpu_ec_tag_shift = SB_ECTAG_SHIFT; 783 cpu_ec_state_shift = SB_ECSTATE_SHIFT; 784 cpu_ec_state_exl = SB_ECSTATE_EXL; 785 cpu_ec_state_mod = SB_ECSTATE_MOD; 786 787 /* These states do not exist in sabre - set to 0xFF */ 788 cpu_ec_state_shr = 0xFF; 789 cpu_ec_state_own = 0xFF; 790 791 cpu_ec_state_valid = SB_ECSTATE_VALID; 792 cpu_ec_state_dirty = SB_ECSTATE_DIRTY; 793 cpu_ec_state_parity = SB_ECSTATE_PARITY; 794 cpu_ec_parity = SB_EC_PARITY; 795 } else if (cpunodes[CPU->cpu_id].implementation == HUMMBRD_IMPL) { 796 isus2e = 1; 797 cpu_ec_tag_mask = HB_ECTAG_MASK; 798 cpu_ec_state_mask = HB_ECSTATE_MASK; 799 cpu_ec_par_mask = HB_ECPAR_MASK; 800 cpu_ec_par_shift = HB_ECPAR_SHIFT; 801 cpu_ec_tag_shift = HB_ECTAG_SHIFT; 802 cpu_ec_state_shift = HB_ECSTATE_SHIFT; 803 cpu_ec_state_exl = HB_ECSTATE_EXL; 804 cpu_ec_state_mod = HB_ECSTATE_MOD; 805 806 /* These states do not exist in hummingbird - set to 0xFF */ 807 cpu_ec_state_shr = 0xFF; 808 cpu_ec_state_own = 0xFF; 809 810 cpu_ec_state_valid = HB_ECSTATE_VALID; 811 cpu_ec_state_dirty = HB_ECSTATE_DIRTY; 812 cpu_ec_state_parity = HB_ECSTATE_PARITY; 813 cpu_ec_parity = HB_EC_PARITY; 814 } 815 816 return (sizeof (spitf_async_flt)); 817 } 818 819 820 /* 821 * Correctable ecc error trap handler 822 */ 823 /*ARGSUSED*/ 824 void 825 cpu_ce_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr, 826 uint_t p_afsr_high, uint_t p_afar_high) 827 { 828 ushort_t sdbh, sdbl; 829 ushort_t e_syndh, e_syndl; 830 spitf_async_flt spf_flt; 831 struct async_flt *ecc; 832 int queue = 1; 833 834 uint64_t t_afar = p_afar; 835 uint64_t t_afsr = p_afsr; 836 837 /* 838 * Note: the Spitfire data buffer error registers 839 * (upper and lower halves) are or'ed into the upper 840 * word of the afsr by ce_err(). 841 */ 842 sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF); 843 sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF); 844 845 e_syndh = (uchar_t)(sdbh & (uint_t)P_DER_E_SYND); 846 e_syndl = (uchar_t)(sdbl & (uint_t)P_DER_E_SYND); 847 848 t_afsr &= S_AFSR_MASK; 849 t_afar &= SABRE_AFAR_PA; /* must use Sabre AFAR mask */ 850 851 /* Setup the async fault structure */ 852 bzero(&spf_flt, sizeof (spitf_async_flt)); 853 ecc = (struct async_flt *)&spf_flt; 854 ecc->flt_id = gethrtime_waitfree(); 855 ecc->flt_stat = t_afsr; 856 ecc->flt_addr = t_afar; 857 ecc->flt_status = ECC_C_TRAP; 858 ecc->flt_bus_id = getprocessorid(); 859 ecc->flt_inst = CPU->cpu_id; 860 ecc->flt_pc = (caddr_t)rp->r_pc; 861 ecc->flt_func = log_ce_err; 862 ecc->flt_in_memory = 863 (pf_is_memory(ecc->flt_addr >> MMU_PAGESHIFT)) ? 1: 0; 864 spf_flt.flt_sdbh = sdbh; 865 spf_flt.flt_sdbl = sdbl; 866 867 /* 868 * Check for fatal conditions. 869 */ 870 check_misc_err(&spf_flt); 871 872 /* 873 * Pananoid checks for valid AFSR and UDBs 874 */ 875 if ((t_afsr & P_AFSR_CE) == 0) { 876 cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS, 877 "** Panic due to CE bit not set in the AFSR", 878 " Corrected Memory Error on"); 879 } 880 881 /* 882 * We want to skip logging only if ALL the following 883 * conditions are true: 884 * 885 * 1. There is only one error 886 * 2. That error is a correctable memory error 887 * 3. The error is caused by the memory scrubber (in which case 888 * the error will have occurred under on_trap protection) 889 * 4. The error is on a retired page 890 * 891 * Note: OT_DATA_EC is used places other than the memory scrubber. 892 * However, none of those errors should occur on a retired page. 893 */ 894 if ((ecc->flt_stat & (S_AFSR_ALL_ERRS & ~P_AFSR_ME)) == P_AFSR_CE && 895 curthread->t_ontrap != NULL) { 896 897 if (curthread->t_ontrap->ot_prot & OT_DATA_EC) { 898 page_t *pp = page_numtopp_nolock((pfn_t) 899 (ecc->flt_addr >> MMU_PAGESHIFT)); 900 901 if (pp != NULL && page_isretired(pp)) { 902 queue = 0; 903 } 904 } 905 } 906 907 if (((sdbh & P_DER_CE) == 0) && ((sdbl & P_DER_CE) == 0)) { 908 cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS, 909 "** Panic due to CE bits not set in the UDBs", 910 " Corrected Memory Error on"); 911 } 912 913 if ((sdbh >> 8) & 1) { 914 ecc->flt_synd = e_syndh; 915 ce_scrub(ecc); 916 if (queue) { 917 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc, 918 sizeof (*ecc), ce_queue, ERRORQ_ASYNC); 919 } 920 } 921 922 if ((sdbl >> 8) & 1) { 923 ecc->flt_addr = t_afar | 0x8; /* Sabres do not have a UDBL */ 924 ecc->flt_synd = e_syndl | UDBL_REG; 925 ce_scrub(ecc); 926 if (queue) { 927 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc, 928 sizeof (*ecc), ce_queue, ERRORQ_ASYNC); 929 } 930 } 931 932 /* 933 * Re-enable all error trapping (CEEN currently cleared). 934 */ 935 clr_datapath(); 936 set_asyncflt(P_AFSR_CE); 937 set_error_enable(EER_ENABLE); 938 } 939 940 /* 941 * Cpu specific CE logging routine 942 */ 943 static void 944 log_ce_err(struct async_flt *aflt, char *unum) 945 { 946 spitf_async_flt spf_flt; 947 948 if ((aflt->flt_stat & P_AFSR_CE) && (ce_verbose_memory == 0)) { 949 return; 950 } 951 952 spf_flt.cmn_asyncflt = *aflt; 953 cpu_aflt_log(CE_CONT, 0, &spf_flt, CE_LFLAGS, unum, 954 " Corrected Memory Error detected by"); 955 } 956 957 /* 958 * Spitfire does not perform any further CE classification refinement 959 */ 960 /*ARGSUSED*/ 961 int 962 ce_scrub_xdiag_recirc(struct async_flt *ecc, errorq_t *eqp, errorq_elem_t *eqep, 963 size_t afltoffset) 964 { 965 return (0); 966 } 967 968 char * 969 flt_to_error_type(struct async_flt *aflt) 970 { 971 if (aflt->flt_status & ECC_INTERMITTENT) 972 return (ERR_TYPE_DESC_INTERMITTENT); 973 if (aflt->flt_status & ECC_PERSISTENT) 974 return (ERR_TYPE_DESC_PERSISTENT); 975 if (aflt->flt_status & ECC_STICKY) 976 return (ERR_TYPE_DESC_STICKY); 977 return (ERR_TYPE_DESC_UNKNOWN); 978 } 979 980 /* 981 * Called by correctable ecc error logging code to print out 982 * the stick/persistent/intermittent status of the error. 983 */ 984 static void 985 cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum) 986 { 987 ushort_t status; 988 char *status1_str = "Memory"; 989 char *status2_str = "Intermittent"; 990 struct async_flt *aflt = (struct async_flt *)spf_flt; 991 992 status = aflt->flt_status; 993 994 if (status & ECC_ECACHE) 995 status1_str = "Ecache"; 996 997 if (status & ECC_STICKY) 998 status2_str = "Sticky"; 999 else if (status & ECC_PERSISTENT) 1000 status2_str = "Persistent"; 1001 1002 cpu_aflt_log(CE_CONT, 0, spf_flt, CPU_ERRID_FIRST, 1003 NULL, " Corrected %s Error on %s is %s", 1004 status1_str, unum, status2_str); 1005 } 1006 1007 /* 1008 * check for a valid ce syndrome, then call the 1009 * displacement flush scrubbing code, and then check the afsr to see if 1010 * the error was persistent or intermittent. Reread the afar/afsr to see 1011 * if the error was not scrubbed successfully, and is therefore sticky. 1012 */ 1013 /*ARGSUSED1*/ 1014 void 1015 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t triedcpulogout) 1016 { 1017 uint64_t eer, afsr; 1018 ushort_t status; 1019 1020 ASSERT(getpil() > LOCK_LEVEL); 1021 1022 /* 1023 * It is possible that the flt_addr is not a valid 1024 * physical address. To deal with this, we disable 1025 * NCEEN while we scrub that address. If this causes 1026 * a TIMEOUT/BERR, we know this is an invalid 1027 * memory location. 1028 */ 1029 kpreempt_disable(); 1030 eer = get_error_enable(); 1031 if (eer & (EER_CEEN | EER_NCEEN)) 1032 set_error_enable(eer & ~(EER_CEEN | EER_NCEEN)); 1033 1034 /* 1035 * To check if the error detected by IO is persistent, sticky or 1036 * intermittent. 1037 */ 1038 if (ecc->flt_status & ECC_IOBUS) { 1039 ecc->flt_stat = P_AFSR_CE; 1040 } 1041 1042 scrubphys(P2ALIGN(ecc->flt_addr, 64), 1043 cpunodes[CPU->cpu_id].ecache_size); 1044 1045 get_asyncflt(&afsr); 1046 if (afsr & (P_AFSR_TO | P_AFSR_BERR)) { 1047 /* 1048 * Must ensure that we don't get the TIMEOUT/BERR 1049 * when we reenable NCEEN, so we clear the AFSR. 1050 */ 1051 set_asyncflt(afsr & (P_AFSR_TO | P_AFSR_BERR)); 1052 if (eer & (EER_CEEN | EER_NCEEN)) 1053 set_error_enable(eer); 1054 kpreempt_enable(); 1055 return; 1056 } 1057 1058 if (eer & EER_NCEEN) 1059 set_error_enable(eer & ~EER_CEEN); 1060 1061 /* 1062 * Check and clear any ECC errors from the scrub. If the scrub did 1063 * not trip over the error, mark it intermittent. If the scrub did 1064 * trip the error again and it did not scrub away, mark it sticky. 1065 * Otherwise mark it persistent. 1066 */ 1067 if (check_ecc(ecc) != 0) { 1068 cpu_read_paddr(ecc, 0, 1); 1069 1070 if (check_ecc(ecc) != 0) 1071 status = ECC_STICKY; 1072 else 1073 status = ECC_PERSISTENT; 1074 } else 1075 status = ECC_INTERMITTENT; 1076 1077 if (eer & (EER_CEEN | EER_NCEEN)) 1078 set_error_enable(eer); 1079 kpreempt_enable(); 1080 1081 ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY); 1082 ecc->flt_status |= status; 1083 } 1084 1085 /* 1086 * get the syndrome and unum, and then call the routines 1087 * to check the other cpus and iobuses, and then do the error logging. 1088 */ 1089 /*ARGSUSED1*/ 1090 void 1091 cpu_ce_log_err(struct async_flt *ecc, errorq_elem_t *eqep) 1092 { 1093 char unum[UNUM_NAMLEN]; 1094 int len = 0; 1095 int ce_verbose = 0; 1096 1097 ASSERT(ecc->flt_func != NULL); 1098 1099 /* Get the unum string for logging purposes */ 1100 (void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, ecc, unum, 1101 UNUM_NAMLEN, &len); 1102 1103 /* Call specific error logging routine */ 1104 (void) (*ecc->flt_func)(ecc, unum); 1105 1106 /* 1107 * Count errors per unum. 1108 * Non-memory errors are all counted via a special unum string. 1109 */ 1110 if (ce_count_unum(ecc->flt_status, len, unum) == PAGE_IS_FAILING && 1111 automatic_page_removal) { 1112 page_t *pp = page_numtopp_nolock((pfn_t) 1113 (ecc->flt_addr >> MMU_PAGESHIFT)); 1114 1115 if (pp) { 1116 page_settoxic(pp, PAGE_IS_FAULTY); 1117 (void) page_retire(pp, PAGE_IS_FAILING); 1118 } 1119 } 1120 1121 if (ecc->flt_panic) { 1122 ce_verbose = 1; 1123 } else if ((ecc->flt_class == BUS_FAULT) || 1124 (ecc->flt_stat & P_AFSR_CE)) { 1125 ce_verbose = (ce_verbose_memory > 0); 1126 } else { 1127 ce_verbose = 1; 1128 } 1129 1130 if (ce_verbose) { 1131 spitf_async_flt sflt; 1132 int synd_code; 1133 1134 sflt.cmn_asyncflt = *ecc; /* for cpu_aflt_log() */ 1135 1136 cpu_ce_log_status(&sflt, unum); 1137 1138 synd_code = synd_to_synd_code(AFLT_STAT_VALID, 1139 SYND(ecc->flt_synd)); 1140 1141 if (SYND_IS_SINGLE_BIT_DATA(synd_code)) { 1142 cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST, 1143 NULL, " ECC Data Bit %2d was in error " 1144 "and corrected", synd_code); 1145 } else if (SYND_IS_SINGLE_BIT_CHK(synd_code)) { 1146 cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST, 1147 NULL, " ECC Check Bit %2d was in error " 1148 "and corrected", synd_code - C0); 1149 } else { 1150 /* 1151 * These are UE errors - we shouldn't be getting CE 1152 * traps for these; handle them in case of bad h/w. 1153 */ 1154 switch (synd_code) { 1155 case M2: 1156 cpu_aflt_log(CE_CONT, 0, &sflt, 1157 CPU_ERRID_FIRST, NULL, 1158 " Two ECC Bits were in error"); 1159 break; 1160 case M3: 1161 cpu_aflt_log(CE_CONT, 0, &sflt, 1162 CPU_ERRID_FIRST, NULL, 1163 " Three ECC Bits were in error"); 1164 break; 1165 case M4: 1166 cpu_aflt_log(CE_CONT, 0, &sflt, 1167 CPU_ERRID_FIRST, NULL, 1168 " Four ECC Bits were in error"); 1169 break; 1170 case MX: 1171 cpu_aflt_log(CE_CONT, 0, &sflt, 1172 CPU_ERRID_FIRST, NULL, 1173 " More than Four ECC bits were " 1174 "in error"); 1175 break; 1176 default: 1177 cpu_aflt_log(CE_CONT, 0, &sflt, 1178 CPU_ERRID_FIRST, NULL, 1179 " Unknown fault syndrome %d", 1180 synd_code); 1181 break; 1182 } 1183 } 1184 } 1185 1186 /* Display entire cache line, if valid address */ 1187 if (ce_show_data && ecc->flt_addr != AFLT_INV_ADDR) 1188 read_ecc_data(ecc, 1, 1); 1189 } 1190 1191 /* 1192 * We route all errors through a single switch statement. 1193 */ 1194 void 1195 cpu_ue_log_err(struct async_flt *aflt) 1196 { 1197 1198 switch (aflt->flt_class) { 1199 case CPU_FAULT: 1200 cpu_async_log_err(aflt); 1201 break; 1202 1203 case BUS_FAULT: 1204 bus_async_log_err(aflt); 1205 break; 1206 1207 default: 1208 cmn_err(CE_WARN, "discarding async error 0x%p with invalid " 1209 "fault class (0x%x)", (void *)aflt, aflt->flt_class); 1210 break; 1211 } 1212 } 1213 1214 /* Values for action variable in cpu_async_error() */ 1215 #define ACTION_NONE 0 1216 #define ACTION_TRAMPOLINE 1 1217 #define ACTION_AST_FLAGS 2 1218 1219 /* 1220 * Access error trap handler for asynchronous cpu errors. This routine is 1221 * called to handle a data or instruction access error. All fatal errors are 1222 * completely handled by this routine (by panicking). Non fatal error logging 1223 * is queued for later processing either via AST or softint at a lower PIL. 1224 * In case of panic, the error log queue will also be processed as part of the 1225 * panic flow to ensure all errors are logged. This routine is called with all 1226 * errors disabled at PIL15. The AFSR bits are cleared and the UDBL and UDBH 1227 * error bits are also cleared. The hardware has also disabled the I and 1228 * D-caches for us, so we must re-enable them before returning. 1229 * 1230 * A summary of the handling of tl=0 UE/LDP/EDP/TO/BERR/WP/CP: 1231 * 1232 * _______________________________________________________________ 1233 * | Privileged tl0 | Unprivileged | 1234 * | Protected | Unprotected | Protected | Unprotected | 1235 * |on_trap|lofault| | | | 1236 * -------------|-------|-------+---------------+---------------+-------------| 1237 * | | | | | | 1238 * UE/LDP/EDP | L,T,p | L,R,p | L,P | n/a | L,R,p | 1239 * | | | | | | 1240 * TO/BERR | T | S | L,P | n/a | S | 1241 * | | | | | | 1242 * WP | L,M,p | L,M,p | L,M,p | n/a | L,M,p | 1243 * | | | | | | 1244 * CP (IIi/IIe) | L,P | L,P | L,P | n/a | L,P | 1245 * ____________________________________________________________________________ 1246 * 1247 * 1248 * Action codes: 1249 * 1250 * L - log 1251 * M - kick off memscrubber if flt_in_memory 1252 * P - panic 1253 * p - panic if US-IIi or US-IIe (Sabre); overrides R and M 1254 * R - i) if aft_panic is set, panic 1255 * ii) otherwise, send hwerr event to contract and SIGKILL to process 1256 * S - send SIGBUS to process 1257 * T - trampoline 1258 * 1259 * Special cases: 1260 * 1261 * 1) if aft_testfatal is set, all faults result in a panic regardless 1262 * of type (even WP), protection (even on_trap), or privilege. 1263 */ 1264 /*ARGSUSED*/ 1265 void 1266 cpu_async_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr, 1267 uint_t p_afsr_high, uint_t p_afar_high) 1268 { 1269 ushort_t sdbh, sdbl, ttype, tl; 1270 spitf_async_flt spf_flt; 1271 struct async_flt *aflt; 1272 char pr_reason[28]; 1273 uint64_t oafsr; 1274 uint64_t acc_afsr = 0; /* accumulated afsr */ 1275 int action = ACTION_NONE; 1276 uint64_t t_afar = p_afar; 1277 uint64_t t_afsr = p_afsr; 1278 int expected = DDI_FM_ERR_UNEXPECTED; 1279 ddi_acc_hdl_t *hp; 1280 1281 /* 1282 * We need to look at p_flag to determine if the thread detected an 1283 * error while dumping core. We can't grab p_lock here, but it's ok 1284 * because we just need a consistent snapshot and we know that everyone 1285 * else will store a consistent set of bits while holding p_lock. We 1286 * don't have to worry about a race because SDOCORE is set once prior 1287 * to doing i/o from the process's address space and is never cleared. 1288 */ 1289 uint_t pflag = ttoproc(curthread)->p_flag; 1290 1291 pr_reason[0] = '\0'; 1292 1293 /* 1294 * Note: the Spitfire data buffer error registers 1295 * (upper and lower halves) are or'ed into the upper 1296 * word of the afsr by async_err() if P_AFSR_UE is set. 1297 */ 1298 sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF); 1299 sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF); 1300 1301 /* 1302 * Grab the ttype encoded in <63:53> of the saved 1303 * afsr passed from async_err() 1304 */ 1305 ttype = (ushort_t)((t_afsr >> 53) & 0x1FF); 1306 tl = (ushort_t)(t_afsr >> 62); 1307 1308 t_afsr &= S_AFSR_MASK; 1309 t_afar &= SABRE_AFAR_PA; /* must use Sabre AFAR mask */ 1310 1311 /* 1312 * Initialize most of the common and CPU-specific structure. We derive 1313 * aflt->flt_priv from %tstate, instead of from the AFSR.PRIV bit. The 1314 * initial setting of aflt->flt_panic is based on TL: we must panic if 1315 * the error occurred at TL > 0. We also set flt_panic if the test/demo 1316 * tuneable aft_testfatal is set (not the default). 1317 */ 1318 bzero(&spf_flt, sizeof (spitf_async_flt)); 1319 aflt = (struct async_flt *)&spf_flt; 1320 aflt->flt_id = gethrtime_waitfree(); 1321 aflt->flt_stat = t_afsr; 1322 aflt->flt_addr = t_afar; 1323 aflt->flt_bus_id = getprocessorid(); 1324 aflt->flt_inst = CPU->cpu_id; 1325 aflt->flt_pc = (caddr_t)rp->r_pc; 1326 aflt->flt_prot = AFLT_PROT_NONE; 1327 aflt->flt_class = CPU_FAULT; 1328 aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0; 1329 aflt->flt_tl = (uchar_t)tl; 1330 aflt->flt_panic = (tl != 0 || aft_testfatal != 0); 1331 aflt->flt_core = (pflag & SDOCORE) ? 1 : 0; 1332 1333 /* 1334 * Set flt_status based on the trap type. If we end up here as the 1335 * result of a UE detected by the CE handling code, leave status 0. 1336 */ 1337 switch (ttype) { 1338 case T_DATA_ERROR: 1339 aflt->flt_status = ECC_D_TRAP; 1340 break; 1341 case T_INSTR_ERROR: 1342 aflt->flt_status = ECC_I_TRAP; 1343 break; 1344 } 1345 1346 spf_flt.flt_sdbh = sdbh; 1347 spf_flt.flt_sdbl = sdbl; 1348 1349 /* 1350 * Check for fatal async errors. 1351 */ 1352 check_misc_err(&spf_flt); 1353 1354 /* 1355 * If the trap occurred in privileged mode at TL=0, we need to check to 1356 * see if we were executing in the kernel under on_trap() or t_lofault 1357 * protection. If so, modify the saved registers so that we return 1358 * from the trap to the appropriate trampoline routine. 1359 */ 1360 if (aflt->flt_priv && tl == 0) { 1361 if (curthread->t_ontrap != NULL) { 1362 on_trap_data_t *otp = curthread->t_ontrap; 1363 1364 if (otp->ot_prot & OT_DATA_EC) { 1365 aflt->flt_prot = AFLT_PROT_EC; 1366 otp->ot_trap |= OT_DATA_EC; 1367 rp->r_pc = otp->ot_trampoline; 1368 rp->r_npc = rp->r_pc + 4; 1369 action = ACTION_TRAMPOLINE; 1370 } 1371 1372 if ((t_afsr & (P_AFSR_TO | P_AFSR_BERR)) && 1373 (otp->ot_prot & OT_DATA_ACCESS)) { 1374 aflt->flt_prot = AFLT_PROT_ACCESS; 1375 otp->ot_trap |= OT_DATA_ACCESS; 1376 rp->r_pc = otp->ot_trampoline; 1377 rp->r_npc = rp->r_pc + 4; 1378 action = ACTION_TRAMPOLINE; 1379 /* 1380 * for peeks and caut_gets errors are expected 1381 */ 1382 hp = (ddi_acc_hdl_t *)otp->ot_handle; 1383 if (!hp) 1384 expected = DDI_FM_ERR_PEEK; 1385 else if (hp->ah_acc.devacc_attr_access == 1386 DDI_CAUTIOUS_ACC) 1387 expected = DDI_FM_ERR_EXPECTED; 1388 } 1389 1390 } else if (curthread->t_lofault) { 1391 aflt->flt_prot = AFLT_PROT_COPY; 1392 rp->r_g1 = EFAULT; 1393 rp->r_pc = curthread->t_lofault; 1394 rp->r_npc = rp->r_pc + 4; 1395 action = ACTION_TRAMPOLINE; 1396 } 1397 } 1398 1399 /* 1400 * Determine if this error needs to be treated as fatal. Note that 1401 * multiple errors detected upon entry to this trap handler does not 1402 * necessarily warrant a panic. We only want to panic if the trap 1403 * happened in privileged mode and not under t_ontrap or t_lofault 1404 * protection. The exception is WP: if we *only* get WP, it is not 1405 * fatal even if the trap occurred in privileged mode, except on Sabre. 1406 * 1407 * aft_panic, if set, effectively makes us treat usermode 1408 * UE/EDP/LDP faults as if they were privileged - so we we will 1409 * panic instead of sending a contract event. A lofault-protected 1410 * fault will normally follow the contract event; if aft_panic is 1411 * set this will be changed to a panic. 1412 * 1413 * For usermode BERR/BTO errors, eg from processes performing device 1414 * control through mapped device memory, we need only deliver 1415 * a SIGBUS to the offending process. 1416 * 1417 * Some additional flt_panic reasons (eg, WP on Sabre) will be 1418 * checked later; for now we implement the common reasons. 1419 */ 1420 if (aflt->flt_prot == AFLT_PROT_NONE) { 1421 /* 1422 * Beware - multiple bits may be set in AFSR 1423 */ 1424 if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP)) { 1425 if (aflt->flt_priv || aft_panic) 1426 aflt->flt_panic = 1; 1427 } 1428 1429 if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) { 1430 if (aflt->flt_priv) 1431 aflt->flt_panic = 1; 1432 } 1433 } else if (aflt->flt_prot == AFLT_PROT_COPY && aft_panic) { 1434 aflt->flt_panic = 1; 1435 } 1436 1437 /* 1438 * UE/BERR/TO: Call our bus nexus friends to check for 1439 * IO errors that may have resulted in this trap. 1440 */ 1441 if (t_afsr & (P_AFSR_TO | P_AFSR_BERR | P_AFSR_UE)) { 1442 cpu_run_bus_error_handlers(aflt, expected); 1443 } 1444 1445 /* 1446 * Handle UE: If the UE is in memory, we need to flush the bad line from 1447 * the E-cache. We also need to query the bus nexus for fatal errors. 1448 * For sabre, we will panic on UEs. Attempts to do diagnostic read on 1449 * caches may introduce more parity errors (especially when the module 1450 * is bad) and in sabre there is no guarantee that such errors 1451 * (if introduced) are written back as poisoned data. 1452 */ 1453 if (t_afsr & P_AFSR_UE) { 1454 int i; 1455 1456 (void) strcat(pr_reason, "UE "); 1457 1458 spf_flt.flt_type = CPU_UE_ERR; 1459 aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >> 1460 MMU_PAGESHIFT)) ? 1: 0; 1461 1462 /* 1463 * With UE, we have the PA of the fault. 1464 * Let do a diagnostic read to get the ecache 1465 * data and tag info of the bad line for logging. 1466 */ 1467 if (aflt->flt_in_memory) { 1468 uint32_t ec_set_size; 1469 uchar_t state; 1470 uint32_t ecache_idx; 1471 uint64_t faultpa = P2ALIGN(aflt->flt_addr, 64); 1472 1473 /* touch the line to put it in ecache */ 1474 acc_afsr |= read_and_clear_afsr(); 1475 (void) lddphys(faultpa); 1476 acc_afsr |= (read_and_clear_afsr() & 1477 ~(P_AFSR_EDP | P_AFSR_UE)); 1478 1479 ec_set_size = cpunodes[CPU->cpu_id].ecache_size / 1480 ecache_associativity; 1481 1482 for (i = 0; i < ecache_associativity; i++) { 1483 ecache_idx = i * ec_set_size + 1484 (aflt->flt_addr % ec_set_size); 1485 get_ecache_dtag(P2ALIGN(ecache_idx, 64), 1486 (uint64_t *)&spf_flt.flt_ec_data[0], 1487 &spf_flt.flt_ec_tag, &oafsr, &acc_afsr); 1488 acc_afsr |= oafsr; 1489 1490 state = (uchar_t)((spf_flt.flt_ec_tag & 1491 cpu_ec_state_mask) >> cpu_ec_state_shift); 1492 1493 if ((state & cpu_ec_state_valid) && 1494 ((spf_flt.flt_ec_tag & cpu_ec_tag_mask) == 1495 ((uint64_t)aflt->flt_addr >> 1496 cpu_ec_tag_shift))) 1497 break; 1498 } 1499 1500 /* 1501 * Check to see if the ecache tag is valid for the 1502 * fault PA. In the very unlikely event where the 1503 * line could be victimized, no ecache info will be 1504 * available. If this is the case, capture the line 1505 * from memory instead. 1506 */ 1507 if ((state & cpu_ec_state_valid) == 0 || 1508 (spf_flt.flt_ec_tag & cpu_ec_tag_mask) != 1509 ((uint64_t)aflt->flt_addr >> cpu_ec_tag_shift)) { 1510 for (i = 0; i < 8; i++, faultpa += 8) { 1511 ec_data_t *ecdptr; 1512 1513 ecdptr = &spf_flt.flt_ec_data[i]; 1514 acc_afsr |= read_and_clear_afsr(); 1515 ecdptr->ec_d8 = lddphys(faultpa); 1516 acc_afsr |= (read_and_clear_afsr() & 1517 ~(P_AFSR_EDP | P_AFSR_UE)); 1518 ecdptr->ec_afsr = 0; 1519 /* null afsr value */ 1520 } 1521 1522 /* 1523 * Mark tag invalid to indicate mem dump 1524 * when we print out the info. 1525 */ 1526 spf_flt.flt_ec_tag = AFLT_INV_ADDR; 1527 } 1528 spf_flt.flt_ec_lcnt = 1; 1529 1530 /* 1531 * Flush out the bad line 1532 */ 1533 flushecacheline(P2ALIGN(aflt->flt_addr, 64), 1534 cpunodes[CPU->cpu_id].ecache_size); 1535 1536 acc_afsr |= clear_errors(NULL, NULL); 1537 } 1538 1539 /* 1540 * Ask our bus nexus friends if they have any fatal errors. If 1541 * so, they will log appropriate error messages and panic as a 1542 * result. We then queue an event for each UDB that reports a 1543 * UE. Each UE reported in a UDB will have its own log message. 1544 * 1545 * Note from kbn: In the case where there are multiple UEs 1546 * (ME bit is set) - the AFAR address is only accurate to 1547 * the 16-byte granularity. One cannot tell whether the AFAR 1548 * belongs to the UDBH or UDBL syndromes. In this case, we 1549 * always report the AFAR address to be 16-byte aligned. 1550 * 1551 * If we're on a Sabre, there is no SDBL, but it will always 1552 * read as zero, so the sdbl test below will safely fail. 1553 */ 1554 if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL || isus2i || isus2e) 1555 aflt->flt_panic = 1; 1556 1557 if (sdbh & P_DER_UE) { 1558 aflt->flt_synd = sdbh & P_DER_E_SYND; 1559 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE, 1560 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1561 aflt->flt_panic); 1562 } 1563 if (sdbl & P_DER_UE) { 1564 aflt->flt_synd = sdbl & P_DER_E_SYND; 1565 aflt->flt_synd |= UDBL_REG; /* indicates UDBL */ 1566 if (!(aflt->flt_stat & P_AFSR_ME)) 1567 aflt->flt_addr |= 0x8; 1568 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE, 1569 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1570 aflt->flt_panic); 1571 } 1572 1573 /* 1574 * We got a UE and are panicking, save the fault PA in a known 1575 * location so that the platform specific panic code can check 1576 * for copyback errors. 1577 */ 1578 if (aflt->flt_panic && aflt->flt_in_memory) { 1579 panic_aflt = *aflt; 1580 } 1581 } 1582 1583 /* 1584 * Handle EDP and LDP: Locate the line with bad parity and enqueue an 1585 * async error for logging. For Sabre, we panic on EDP or LDP. 1586 */ 1587 if (t_afsr & (P_AFSR_EDP | P_AFSR_LDP)) { 1588 spf_flt.flt_type = CPU_EDP_LDP_ERR; 1589 1590 if (t_afsr & P_AFSR_EDP) 1591 (void) strcat(pr_reason, "EDP "); 1592 1593 if (t_afsr & P_AFSR_LDP) 1594 (void) strcat(pr_reason, "LDP "); 1595 1596 /* 1597 * Here we have no PA to work with. 1598 * Scan each line in the ecache to look for 1599 * the one with bad parity. 1600 */ 1601 aflt->flt_addr = AFLT_INV_ADDR; 1602 scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0], 1603 &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr); 1604 acc_afsr |= (oafsr & ~P_AFSR_WP); 1605 1606 /* 1607 * If we found a bad PA, update the state to indicate if it is 1608 * memory or I/O space. This code will be important if we ever 1609 * support cacheable frame buffers. 1610 */ 1611 if (aflt->flt_addr != AFLT_INV_ADDR) { 1612 aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >> 1613 MMU_PAGESHIFT)) ? 1 : 0; 1614 } 1615 1616 if (isus2i || isus2e) 1617 aflt->flt_panic = 1; 1618 1619 cpu_errorq_dispatch((t_afsr & P_AFSR_EDP) ? 1620 FM_EREPORT_CPU_USII_EDP : FM_EREPORT_CPU_USII_LDP, 1621 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1622 aflt->flt_panic); 1623 } 1624 1625 /* 1626 * Timeout and bus error handling. There are two cases to consider: 1627 * 1628 * (1) If we are in the kernel protected by ddi_peek or ddi_poke,we 1629 * have already modified the saved registers so that we will return 1630 * from the trap to the appropriate trampoline routine; otherwise panic. 1631 * 1632 * (2) In user mode, we can simply use our AST mechanism to deliver 1633 * a SIGBUS. We do not log the occurence - processes performing 1634 * device control would generate lots of uninteresting messages. 1635 */ 1636 if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) { 1637 if (t_afsr & P_AFSR_TO) 1638 (void) strcat(pr_reason, "BTO "); 1639 1640 if (t_afsr & P_AFSR_BERR) 1641 (void) strcat(pr_reason, "BERR "); 1642 1643 spf_flt.flt_type = CPU_BTO_BERR_ERR; 1644 if (aflt->flt_priv && aflt->flt_prot == AFLT_PROT_NONE) { 1645 cpu_errorq_dispatch((t_afsr & P_AFSR_TO) ? 1646 FM_EREPORT_CPU_USII_TO : FM_EREPORT_CPU_USII_BERR, 1647 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1648 aflt->flt_panic); 1649 } 1650 } 1651 1652 /* 1653 * Handle WP: WP happens when the ecache is victimized and a parity 1654 * error was detected on a writeback. The data in question will be 1655 * poisoned as a UE will be written back. The PA is not logged and 1656 * it is possible that it doesn't belong to the trapped thread. The 1657 * WP trap is not fatal, but it could be fatal to someone that 1658 * subsequently accesses the toxic page. We set read_all_memscrub 1659 * to force the memscrubber to read all of memory when it awakens. 1660 * For Sabre/Hummingbird, WP is fatal because the HW doesn't write a 1661 * UE back to poison the data. 1662 */ 1663 if (t_afsr & P_AFSR_WP) { 1664 (void) strcat(pr_reason, "WP "); 1665 if (isus2i || isus2e) { 1666 aflt->flt_panic = 1; 1667 } else { 1668 read_all_memscrub = 1; 1669 } 1670 spf_flt.flt_type = CPU_WP_ERR; 1671 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_WP, 1672 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1673 aflt->flt_panic); 1674 } 1675 1676 /* 1677 * Handle trapping CP error: In Sabre/Hummingbird, parity error in 1678 * the ecache on a copyout due to a PCI DMA read is signaled as a CP. 1679 * This is fatal. 1680 */ 1681 1682 if (t_afsr & P_AFSR_CP) { 1683 if (isus2i || isus2e) { 1684 (void) strcat(pr_reason, "CP "); 1685 aflt->flt_panic = 1; 1686 spf_flt.flt_type = CPU_TRAPPING_CP_ERR; 1687 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP, 1688 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1689 aflt->flt_panic); 1690 } else { 1691 /* 1692 * Orphan CP: Happens due to signal integrity problem 1693 * on a CPU, where a CP is reported, without reporting 1694 * its associated UE. This is handled by locating the 1695 * bad parity line and would kick off the memscrubber 1696 * to find the UE if in memory or in another's cache. 1697 */ 1698 spf_flt.flt_type = CPU_ORPHAN_CP_ERR; 1699 (void) strcat(pr_reason, "ORPHAN_CP "); 1700 1701 /* 1702 * Here we have no PA to work with. 1703 * Scan each line in the ecache to look for 1704 * the one with bad parity. 1705 */ 1706 aflt->flt_addr = AFLT_INV_ADDR; 1707 scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0], 1708 &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, 1709 &oafsr); 1710 acc_afsr |= oafsr; 1711 1712 /* 1713 * If we found a bad PA, update the state to indicate 1714 * if it is memory or I/O space. 1715 */ 1716 if (aflt->flt_addr != AFLT_INV_ADDR) { 1717 aflt->flt_in_memory = 1718 (pf_is_memory(aflt->flt_addr >> 1719 MMU_PAGESHIFT)) ? 1 : 0; 1720 } 1721 read_all_memscrub = 1; 1722 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP, 1723 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1724 aflt->flt_panic); 1725 1726 } 1727 } 1728 1729 /* 1730 * If we queued an error other than WP or CP and we are going to return 1731 * from the trap and the error was in user mode or inside of a 1732 * copy routine, set AST flag so the queue will be drained before 1733 * returning to user mode. 1734 * 1735 * For UE/LDP/EDP, the AST processing will SIGKILL the process 1736 * and send an event to its process contract. 1737 * 1738 * For BERR/BTO, the AST processing will SIGBUS the process. There 1739 * will have been no error queued in this case. 1740 */ 1741 if ((t_afsr & 1742 (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP | P_AFSR_BERR | P_AFSR_TO)) && 1743 (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY)) { 1744 int pcb_flag = 0; 1745 1746 if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP)) 1747 pcb_flag |= ASYNC_HWERR; 1748 1749 if (t_afsr & P_AFSR_BERR) 1750 pcb_flag |= ASYNC_BERR; 1751 1752 if (t_afsr & P_AFSR_TO) 1753 pcb_flag |= ASYNC_BTO; 1754 1755 ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag; 1756 aston(curthread); 1757 action = ACTION_AST_FLAGS; 1758 } 1759 1760 /* 1761 * In response to a deferred error, we must do one of three things: 1762 * (1) set the AST flags, (2) trampoline, or (3) panic. action is 1763 * set in cases (1) and (2) - check that either action is set or 1764 * (3) is true. 1765 * 1766 * On II, the WP writes poisoned data back to memory, which will 1767 * cause a UE and a panic or reboot when read. In this case, we 1768 * don't need to panic at this time. On IIi and IIe, 1769 * aflt->flt_panic is already set above. 1770 */ 1771 ASSERT((aflt->flt_panic != 0) || (action != ACTION_NONE) || 1772 (t_afsr & P_AFSR_WP)); 1773 1774 /* 1775 * Make a final sanity check to make sure we did not get any more async 1776 * errors and accumulate the afsr. 1777 */ 1778 flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2, 1779 cpunodes[CPU->cpu_id].ecache_linesize); 1780 (void) clear_errors(&spf_flt, NULL); 1781 1782 /* 1783 * Take care of a special case: If there is a UE in the ecache flush 1784 * area, we'll see it in flush_ecache(). This will trigger the 1785 * CPU_ADDITIONAL_ERRORS case below. 1786 * 1787 * This could occur if the original error was a UE in the flush area, 1788 * or if the original error was an E$ error that was flushed out of 1789 * the E$ in scan_ecache(). 1790 * 1791 * If it's at the same address that we're already logging, then it's 1792 * probably one of these cases. Clear the bit so we don't trip over 1793 * it on the additional errors case, which could cause an unnecessary 1794 * panic. 1795 */ 1796 if ((aflt->flt_stat & P_AFSR_UE) && aflt->flt_addr == t_afar) 1797 acc_afsr |= aflt->flt_stat & ~P_AFSR_UE; 1798 else 1799 acc_afsr |= aflt->flt_stat; 1800 1801 /* 1802 * Check the acumulated afsr for the important bits. 1803 * Make sure the spf_flt.flt_type value is set, and 1804 * enque an error. 1805 */ 1806 if (acc_afsr & 1807 (P_AFSR_LEVEL1 | P_AFSR_IVUE | P_AFSR_ETP | P_AFSR_ISAP)) { 1808 if (acc_afsr & (P_AFSR_UE | P_AFSR_EDP | P_AFSR_LDP | 1809 P_AFSR_BERR | P_AFSR_TO | P_AFSR_IVUE | P_AFSR_ETP | 1810 P_AFSR_ISAP)) 1811 aflt->flt_panic = 1; 1812 1813 spf_flt.flt_type = CPU_ADDITIONAL_ERR; 1814 aflt->flt_stat = acc_afsr; 1815 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UNKNOWN, 1816 (void *)&spf_flt, sizeof (spf_flt), ue_queue, 1817 aflt->flt_panic); 1818 } 1819 1820 /* 1821 * If aflt->flt_panic is set at this point, we need to panic as the 1822 * result of a trap at TL > 0, or an error we determined to be fatal. 1823 * We've already enqueued the error in one of the if-clauses above, 1824 * and it will be dequeued and logged as part of the panic flow. 1825 */ 1826 if (aflt->flt_panic) { 1827 cpu_aflt_log(CE_PANIC, 1, &spf_flt, CPU_ERRID_FIRST, 1828 "See previous message(s) for details", " %sError(s)", 1829 pr_reason); 1830 } 1831 1832 /* 1833 * Before returning, we must re-enable errors, and 1834 * reset the caches to their boot-up state. 1835 */ 1836 set_lsu(get_lsu() | cache_boot_state); 1837 set_error_enable(EER_ENABLE); 1838 } 1839 1840 /* 1841 * Check for miscellaneous fatal errors and call CE_PANIC if any are seen. 1842 * This routine is shared by the CE and UE handling code. 1843 */ 1844 static void 1845 check_misc_err(spitf_async_flt *spf_flt) 1846 { 1847 struct async_flt *aflt = (struct async_flt *)spf_flt; 1848 char *fatal_str = NULL; 1849 1850 /* 1851 * The ISAP and ETP errors are supposed to cause a POR 1852 * from the system, so in theory we never, ever see these messages. 1853 * ISAP, ETP and IVUE are considered to be fatal. 1854 */ 1855 if (aflt->flt_stat & P_AFSR_ISAP) 1856 fatal_str = " System Address Parity Error on"; 1857 else if (aflt->flt_stat & P_AFSR_ETP) 1858 fatal_str = " Ecache Tag Parity Error on"; 1859 else if (aflt->flt_stat & P_AFSR_IVUE) 1860 fatal_str = " Interrupt Vector Uncorrectable Error on"; 1861 if (fatal_str != NULL) { 1862 cpu_aflt_log(CE_PANIC, 1, spf_flt, CMN_LFLAGS, 1863 NULL, fatal_str); 1864 } 1865 } 1866 1867 /* 1868 * Routine to convert a syndrome into a syndrome code. 1869 */ 1870 static int 1871 synd_to_synd_code(int synd_status, ushort_t synd) 1872 { 1873 if (synd_status != AFLT_STAT_VALID) 1874 return (-1); 1875 1876 /* 1877 * Use the 8-bit syndrome to index the ecc_syndrome_tab 1878 * to get the code indicating which bit(s) is(are) bad. 1879 */ 1880 if ((synd == 0) || (synd >= SYND_TBL_SIZE)) 1881 return (-1); 1882 else 1883 return (ecc_syndrome_tab[synd]); 1884 } 1885 1886 /* 1887 * Routine to return a string identifying the physical name 1888 * associated with a memory/cache error. 1889 */ 1890 /* ARGSUSED */ 1891 int 1892 cpu_get_mem_unum(int synd_status, ushort_t synd, uint64_t afsr, 1893 uint64_t afar, int cpuid, int flt_in_memory, ushort_t flt_status, 1894 char *buf, int buflen, int *lenp) 1895 { 1896 short synd_code; 1897 int ret; 1898 1899 if (flt_in_memory) { 1900 synd_code = synd_to_synd_code(synd_status, synd); 1901 if (synd_code == -1) { 1902 ret = EINVAL; 1903 } else if (prom_get_unum(synd_code, P2ALIGN(afar, 8), 1904 buf, buflen, lenp) != 0) { 1905 ret = EIO; 1906 } else if (*lenp <= 1) { 1907 ret = EINVAL; 1908 } else { 1909 ret = 0; 1910 } 1911 } else { 1912 ret = ENOTSUP; 1913 } 1914 1915 if (ret != 0) { 1916 buf[0] = '\0'; 1917 *lenp = 0; 1918 } 1919 1920 return (ret); 1921 } 1922 1923 /* 1924 * Wrapper for cpu_get_mem_unum() routine that takes an 1925 * async_flt struct rather than explicit arguments. 1926 */ 1927 int 1928 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt, 1929 char *buf, int buflen, int *lenp) 1930 { 1931 return (cpu_get_mem_unum(synd_status, SYND(aflt->flt_synd), 1932 aflt->flt_stat, aflt->flt_addr, aflt->flt_bus_id, 1933 aflt->flt_in_memory, aflt->flt_status, buf, buflen, lenp)); 1934 } 1935 1936 /* 1937 * This routine is a more generic interface to cpu_get_mem_unum(), 1938 * that may be used by other modules (e.g. mm). 1939 */ 1940 int 1941 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar, 1942 char *buf, int buflen, int *lenp) 1943 { 1944 int synd_status, flt_in_memory, ret; 1945 char unum[UNUM_NAMLEN]; 1946 1947 /* 1948 * Check for an invalid address. 1949 */ 1950 if (afar == (uint64_t)-1) 1951 return (ENXIO); 1952 1953 if (synd == (uint64_t)-1) 1954 synd_status = AFLT_STAT_INVALID; 1955 else 1956 synd_status = AFLT_STAT_VALID; 1957 1958 flt_in_memory = (pf_is_memory(afar >> MMU_PAGESHIFT)) ? 1 : 0; 1959 1960 if ((ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar, 1961 CPU->cpu_id, flt_in_memory, 0, unum, UNUM_NAMLEN, lenp)) 1962 != 0) 1963 return (ret); 1964 1965 if (*lenp >= buflen) 1966 return (ENAMETOOLONG); 1967 1968 (void) strncpy(buf, unum, buflen); 1969 1970 return (0); 1971 } 1972 1973 /* 1974 * Routine to return memory information associated 1975 * with a physical address and syndrome. 1976 */ 1977 /* ARGSUSED */ 1978 int 1979 cpu_get_mem_info(uint64_t synd, uint64_t afar, 1980 uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, 1981 int *segsp, int *banksp, int *mcidp) 1982 { 1983 return (ENOTSUP); 1984 } 1985 1986 /* 1987 * Routine to return a string identifying the physical 1988 * name associated with a cpuid. 1989 */ 1990 /* ARGSUSED */ 1991 int 1992 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp) 1993 { 1994 return (ENOTSUP); 1995 } 1996 1997 /* 1998 * This routine returns the size of the kernel's FRU name buffer. 1999 */ 2000 size_t 2001 cpu_get_name_bufsize() 2002 { 2003 return (UNUM_NAMLEN); 2004 } 2005 2006 /* 2007 * Cpu specific log func for UEs. 2008 */ 2009 static void 2010 log_ue_err(struct async_flt *aflt, char *unum) 2011 { 2012 spitf_async_flt *spf_flt = (spitf_async_flt *)aflt; 2013 int len = 0; 2014 2015 #ifdef DEBUG 2016 int afsr_priv = (aflt->flt_stat & P_AFSR_PRIV) ? 1 : 0; 2017 2018 /* 2019 * Paranoid Check for priv mismatch 2020 * Only applicable for UEs 2021 */ 2022 if (afsr_priv != aflt->flt_priv) { 2023 /* 2024 * The priv bits in %tstate and %afsr did not match; we expect 2025 * this to be very rare, so flag it with a message. 2026 */ 2027 cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST, NULL, 2028 ": PRIV bit in TSTATE and AFSR mismatched; " 2029 "TSTATE.PRIV=%d used", (aflt->flt_priv) ? 1 : 0); 2030 2031 /* update saved afsr to reflect the correct priv */ 2032 aflt->flt_stat &= ~P_AFSR_PRIV; 2033 if (aflt->flt_priv) 2034 aflt->flt_stat |= P_AFSR_PRIV; 2035 } 2036 #endif /* DEBUG */ 2037 2038 (void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt, unum, 2039 UNUM_NAMLEN, &len); 2040 2041 cpu_aflt_log(CE_WARN, 1, spf_flt, UE_LFLAGS, unum, 2042 " Uncorrectable Memory Error on"); 2043 2044 if (SYND(aflt->flt_synd) == 0x3) { 2045 cpu_aflt_log(CE_WARN, 1, spf_flt, CPU_ERRID_FIRST, NULL, 2046 " Syndrome 0x3 indicates that this may not be a " 2047 "memory module problem"); 2048 } 2049 2050 if (aflt->flt_in_memory) 2051 cpu_log_ecmem_info(spf_flt); 2052 } 2053 2054 2055 /* 2056 * The cpu_async_log_err() function is called via the ue_drain() function to 2057 * handle logging for CPU events that are dequeued. As such, it can be invoked 2058 * from softint context, from AST processing in the trap() flow, or from the 2059 * panic flow. We decode the CPU-specific data, and log appropriate messages. 2060 */ 2061 static void 2062 cpu_async_log_err(void *flt) 2063 { 2064 spitf_async_flt *spf_flt = (spitf_async_flt *)flt; 2065 struct async_flt *aflt = (struct async_flt *)flt; 2066 char unum[UNUM_NAMLEN]; 2067 char *space; 2068 char *ecache_scrub_logstr = NULL; 2069 2070 switch (spf_flt->flt_type) { 2071 case CPU_UE_ERR: 2072 /* 2073 * We want to skip logging only if ALL the following 2074 * conditions are true: 2075 * 2076 * 1. We are not panicking 2077 * 2. There is only one error 2078 * 3. That error is a memory error 2079 * 4. The error is caused by the memory scrubber (in 2080 * which case the error will have occurred under 2081 * on_trap protection) 2082 * 5. The error is on a retired page 2083 * 2084 * Note 1: AFLT_PROT_EC is used places other than the memory 2085 * scrubber. However, none of those errors should occur 2086 * on a retired page. 2087 * 2088 * Note 2: In the CE case, these errors are discarded before 2089 * the errorq. In the UE case, we must wait until now -- 2090 * softcall() grabs a mutex, which we can't do at a high PIL. 2091 */ 2092 if (!panicstr && 2093 (aflt->flt_stat & S_AFSR_ALL_ERRS) == P_AFSR_UE && 2094 aflt->flt_prot == AFLT_PROT_EC) { 2095 page_t *pp = page_numtopp_nolock((pfn_t) 2096 (aflt->flt_addr >> MMU_PAGESHIFT)); 2097 2098 if (pp != NULL && page_isretired(pp)) { 2099 2100 /* Zero the address to clear the error */ 2101 softcall(ecc_page_zero, (void *)aflt->flt_addr); 2102 return; 2103 } 2104 } 2105 2106 /* 2107 * Log the UE and check for causes of this UE error that 2108 * don't cause a trap (Copyback error). cpu_async_error() 2109 * has already checked the i/o buses for us. 2110 */ 2111 log_ue_err(aflt, unum); 2112 if (aflt->flt_in_memory) 2113 cpu_check_allcpus(aflt); 2114 break; 2115 2116 case CPU_EDP_LDP_ERR: 2117 if (aflt->flt_stat & P_AFSR_EDP) 2118 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, 2119 NULL, " EDP event on"); 2120 2121 if (aflt->flt_stat & P_AFSR_LDP) 2122 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, 2123 NULL, " LDP event on"); 2124 2125 /* Log ecache info if exist */ 2126 if (spf_flt->flt_ec_lcnt > 0) { 2127 cpu_log_ecmem_info(spf_flt); 2128 2129 cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, 2130 NULL, " AFAR was derived from E$Tag"); 2131 } else { 2132 cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, 2133 NULL, " No error found in ecache (No fault " 2134 "PA available)"); 2135 } 2136 break; 2137 2138 case CPU_WP_ERR: 2139 /* 2140 * If the memscrub thread hasn't yet read 2141 * all of memory, as we requested in the 2142 * trap handler, then give it a kick to 2143 * make sure it does. 2144 */ 2145 if (!isus2i && !isus2e && read_all_memscrub) 2146 memscrub_run(); 2147 2148 cpu_aflt_log(CE_WARN, 1, spf_flt, WP_LFLAGS, NULL, 2149 " WP event on"); 2150 return; 2151 2152 case CPU_BTO_BERR_ERR: 2153 /* 2154 * A bus timeout or error occurred that was in user mode or not 2155 * in a protected kernel code region. 2156 */ 2157 if (aflt->flt_stat & P_AFSR_BERR) { 2158 cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2, 2159 spf_flt, BERRTO_LFLAGS, NULL, 2160 " Bus Error on System Bus in %s mode from", 2161 aflt->flt_priv ? "privileged" : "user"); 2162 } 2163 2164 if (aflt->flt_stat & P_AFSR_TO) { 2165 cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2, 2166 spf_flt, BERRTO_LFLAGS, NULL, 2167 " Timeout on System Bus in %s mode from", 2168 aflt->flt_priv ? "privileged" : "user"); 2169 } 2170 2171 return; 2172 2173 case CPU_PANIC_CP_ERR: 2174 /* 2175 * Process the Copyback (CP) error info (if any) obtained from 2176 * polling all the cpus in the panic flow. This case is only 2177 * entered if we are panicking. 2178 */ 2179 ASSERT(panicstr != NULL); 2180 ASSERT(aflt->flt_id == panic_aflt.flt_id); 2181 2182 /* See which space - this info may not exist */ 2183 if (panic_aflt.flt_status & ECC_D_TRAP) 2184 space = "Data "; 2185 else if (panic_aflt.flt_status & ECC_I_TRAP) 2186 space = "Instruction "; 2187 else 2188 space = ""; 2189 2190 cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL, 2191 " AFAR was derived from UE report," 2192 " CP event on CPU%d (caused %saccess error on %s%d)", 2193 aflt->flt_inst, space, (panic_aflt.flt_status & ECC_IOBUS) ? 2194 "IOBUS" : "CPU", panic_aflt.flt_bus_id); 2195 2196 if (spf_flt->flt_ec_lcnt > 0) 2197 cpu_log_ecmem_info(spf_flt); 2198 else 2199 cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST, 2200 NULL, " No cache dump available"); 2201 2202 return; 2203 2204 case CPU_TRAPPING_CP_ERR: 2205 /* 2206 * For sabre only. This is a copyback ecache parity error due 2207 * to a PCI DMA read. We should be panicking if we get here. 2208 */ 2209 ASSERT(panicstr != NULL); 2210 cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL, 2211 " AFAR was derived from UE report," 2212 " CP event on CPU%d (caused Data access error " 2213 "on PCIBus)", aflt->flt_inst); 2214 return; 2215 2216 /* 2217 * We log the ecache lines of the following states, 2218 * clean_bad_idle, clean_bad_busy, dirty_bad_idle and 2219 * dirty_bad_busy if ecache_scrub_verbose is set and panic 2220 * in addition to logging if ecache_scrub_panic is set. 2221 */ 2222 case CPU_BADLINE_CI_ERR: 2223 ecache_scrub_logstr = "CBI"; 2224 /* FALLTHRU */ 2225 2226 case CPU_BADLINE_CB_ERR: 2227 if (ecache_scrub_logstr == NULL) 2228 ecache_scrub_logstr = "CBB"; 2229 /* FALLTHRU */ 2230 2231 case CPU_BADLINE_DI_ERR: 2232 if (ecache_scrub_logstr == NULL) 2233 ecache_scrub_logstr = "DBI"; 2234 /* FALLTHRU */ 2235 2236 case CPU_BADLINE_DB_ERR: 2237 if (ecache_scrub_logstr == NULL) 2238 ecache_scrub_logstr = "DBB"; 2239 2240 cpu_aflt_log(CE_NOTE, 2, spf_flt, 2241 (CPU_ERRID_FIRST | CPU_FLTCPU), NULL, 2242 " %s event on", ecache_scrub_logstr); 2243 cpu_log_ecmem_info(spf_flt); 2244 2245 return; 2246 2247 case CPU_ORPHAN_CP_ERR: 2248 /* 2249 * Orphan CPs, where the CP bit is set, but when a CPU 2250 * doesn't report a UE. 2251 */ 2252 if (read_all_memscrub) 2253 memscrub_run(); 2254 2255 cpu_aflt_log(CE_NOTE, 2, spf_flt, (CP_LFLAGS | CPU_FLTCPU), 2256 NULL, " Orphan CP event on"); 2257 2258 /* Log ecache info if exist */ 2259 if (spf_flt->flt_ec_lcnt > 0) 2260 cpu_log_ecmem_info(spf_flt); 2261 else 2262 cpu_aflt_log(CE_NOTE, 2, spf_flt, 2263 (CP_LFLAGS | CPU_FLTCPU), NULL, 2264 " No error found in ecache (No fault " 2265 "PA available"); 2266 return; 2267 2268 case CPU_ECACHE_ADDR_PAR_ERR: 2269 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 2270 " E$ Tag Address Parity error on"); 2271 cpu_log_ecmem_info(spf_flt); 2272 return; 2273 2274 case CPU_ECACHE_STATE_ERR: 2275 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 2276 " E$ Tag State Parity error on"); 2277 cpu_log_ecmem_info(spf_flt); 2278 return; 2279 2280 case CPU_ECACHE_TAG_ERR: 2281 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 2282 " E$ Tag scrub event on"); 2283 cpu_log_ecmem_info(spf_flt); 2284 return; 2285 2286 case CPU_ECACHE_ETP_ETS_ERR: 2287 cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL, 2288 " AFSR.ETP is set and AFSR.ETS is zero on"); 2289 cpu_log_ecmem_info(spf_flt); 2290 return; 2291 2292 2293 case CPU_ADDITIONAL_ERR: 2294 cpu_aflt_log(CE_WARN, 1, spf_flt, CMN_LFLAGS & ~CPU_SPACE, NULL, 2295 " Additional errors detected during error processing on"); 2296 return; 2297 2298 default: 2299 cmn_err(CE_WARN, "cpu_async_log_err: fault %p has unknown " 2300 "fault type %x", (void *)spf_flt, spf_flt->flt_type); 2301 return; 2302 } 2303 2304 /* ... fall through from the UE, EDP, or LDP cases */ 2305 2306 if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) { 2307 if (!panicstr) { 2308 /* 2309 * Retire the bad page that caused the error 2310 */ 2311 page_t *pp = page_numtopp_nolock((pfn_t) 2312 (aflt->flt_addr >> MMU_PAGESHIFT)); 2313 2314 if (pp != NULL) { 2315 page_settoxic(pp, PAGE_IS_FAULTY); 2316 (void) page_retire(pp, PAGE_IS_TOXIC); 2317 } else { 2318 uint64_t pa = 2319 P2ALIGN(aflt->flt_addr, MMU_PAGESIZE); 2320 2321 cpu_aflt_log(CE_CONT, 3, spf_flt, 2322 CPU_ERRID_FIRST, NULL, 2323 ": cannot schedule clearing of error on " 2324 "page 0x%08x.%08x; page not in VM system", 2325 (uint32_t)(pa >> 32), (uint32_t)pa); 2326 } 2327 } else { 2328 /* 2329 * Clear UEs on panic so that we don't 2330 * get haunted by them during panic or 2331 * after reboot 2332 */ 2333 clearphys(P2ALIGN(aflt->flt_addr, 64), 2334 cpunodes[CPU->cpu_id].ecache_size, 2335 cpunodes[CPU->cpu_id].ecache_linesize); 2336 2337 (void) clear_errors(NULL, NULL); 2338 } 2339 } 2340 2341 /* 2342 * Log final recover message 2343 */ 2344 if (!panicstr) { 2345 if (!aflt->flt_priv) { 2346 cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, 2347 NULL, " Above Error is in User Mode" 2348 "\n and is fatal: " 2349 "will SIGKILL process and notify contract"); 2350 } else if (aflt->flt_prot == AFLT_PROT_COPY && aflt->flt_core) { 2351 cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, 2352 NULL, " Above Error detected while dumping core;" 2353 "\n core file will be truncated"); 2354 } else if (aflt->flt_prot == AFLT_PROT_COPY) { 2355 cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, 2356 NULL, " Above Error is due to Kernel access" 2357 "\n to User space and is fatal: " 2358 "will SIGKILL process and notify contract"); 2359 } else if (aflt->flt_prot == AFLT_PROT_EC) { 2360 cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, NULL, 2361 " Above Error detected by protected Kernel code" 2362 "\n that will try to clear error from system"); 2363 } 2364 } 2365 } 2366 2367 2368 /* 2369 * Check all cpus for non-trapping UE-causing errors 2370 * In Ultra I/II, we look for copyback errors (CPs) 2371 */ 2372 void 2373 cpu_check_allcpus(struct async_flt *aflt) 2374 { 2375 spitf_async_flt cp; 2376 spitf_async_flt *spf_cpflt = &cp; 2377 struct async_flt *cpflt = (struct async_flt *)&cp; 2378 int pix; 2379 2380 cpflt->flt_id = aflt->flt_id; 2381 cpflt->flt_addr = aflt->flt_addr; 2382 2383 for (pix = 0; pix < NCPU; pix++) { 2384 if (CPU_XCALL_READY(pix)) { 2385 xc_one(pix, (xcfunc_t *)get_cpu_status, 2386 (uint64_t)cpflt, 0); 2387 2388 if (cpflt->flt_stat & P_AFSR_CP) { 2389 char *space; 2390 2391 /* See which space - this info may not exist */ 2392 if (aflt->flt_status & ECC_D_TRAP) 2393 space = "Data "; 2394 else if (aflt->flt_status & ECC_I_TRAP) 2395 space = "Instruction "; 2396 else 2397 space = ""; 2398 2399 cpu_aflt_log(CE_WARN, 1, spf_cpflt, CP_LFLAGS, 2400 NULL, " AFAR was derived from UE report," 2401 " CP event on CPU%d (caused %saccess " 2402 "error on %s%d)", pix, space, 2403 (aflt->flt_status & ECC_IOBUS) ? 2404 "IOBUS" : "CPU", aflt->flt_bus_id); 2405 2406 if (spf_cpflt->flt_ec_lcnt > 0) 2407 cpu_log_ecmem_info(spf_cpflt); 2408 else 2409 cpu_aflt_log(CE_WARN, 2, spf_cpflt, 2410 CPU_ERRID_FIRST, NULL, 2411 " No cache dump available"); 2412 } 2413 } 2414 } 2415 } 2416 2417 #ifdef DEBUG 2418 int test_mp_cp = 0; 2419 #endif 2420 2421 /* 2422 * Cross-call callback routine to tell a CPU to read its own %afsr to check 2423 * for copyback errors and capture relevant information. 2424 */ 2425 static uint_t 2426 get_cpu_status(uint64_t arg) 2427 { 2428 struct async_flt *aflt = (struct async_flt *)arg; 2429 spitf_async_flt *spf_flt = (spitf_async_flt *)arg; 2430 uint64_t afsr; 2431 uint32_t ec_idx; 2432 uint64_t sdbh, sdbl; 2433 int i; 2434 uint32_t ec_set_size; 2435 uchar_t valid; 2436 ec_data_t ec_data[8]; 2437 uint64_t ec_tag, flt_addr_tag, oafsr; 2438 uint64_t *acc_afsr = NULL; 2439 2440 get_asyncflt(&afsr); 2441 if (CPU_PRIVATE(CPU) != NULL) { 2442 acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 2443 afsr |= *acc_afsr; 2444 *acc_afsr = 0; 2445 } 2446 2447 #ifdef DEBUG 2448 if (test_mp_cp) 2449 afsr |= P_AFSR_CP; 2450 #endif 2451 aflt->flt_stat = afsr; 2452 2453 if (afsr & P_AFSR_CP) { 2454 /* 2455 * Capture the UDBs 2456 */ 2457 get_udb_errors(&sdbh, &sdbl); 2458 spf_flt->flt_sdbh = (ushort_t)(sdbh & 0x3FF); 2459 spf_flt->flt_sdbl = (ushort_t)(sdbl & 0x3FF); 2460 2461 /* 2462 * Clear CP bit before capturing ecache data 2463 * and AFSR info. 2464 */ 2465 set_asyncflt(P_AFSR_CP); 2466 2467 /* 2468 * See if we can capture the ecache line for the 2469 * fault PA. 2470 * 2471 * Return a valid matching ecache line, if any. 2472 * Otherwise, return the first matching ecache 2473 * line marked invalid. 2474 */ 2475 flt_addr_tag = aflt->flt_addr >> cpu_ec_tag_shift; 2476 ec_set_size = cpunodes[CPU->cpu_id].ecache_size / 2477 ecache_associativity; 2478 spf_flt->flt_ec_lcnt = 0; 2479 2480 for (i = 0, ec_idx = (aflt->flt_addr % ec_set_size); 2481 i < ecache_associativity; i++, ec_idx += ec_set_size) { 2482 get_ecache_dtag(P2ALIGN(ec_idx, 64), 2483 (uint64_t *)&ec_data[0], &ec_tag, &oafsr, 2484 acc_afsr); 2485 2486 if ((ec_tag & cpu_ec_tag_mask) != flt_addr_tag) 2487 continue; 2488 2489 valid = cpu_ec_state_valid & 2490 (uchar_t)((ec_tag & cpu_ec_state_mask) >> 2491 cpu_ec_state_shift); 2492 2493 if (valid || spf_flt->flt_ec_lcnt == 0) { 2494 spf_flt->flt_ec_tag = ec_tag; 2495 bcopy(&ec_data, &spf_flt->flt_ec_data, 2496 sizeof (ec_data)); 2497 spf_flt->flt_ec_lcnt = 1; 2498 2499 if (valid) 2500 break; 2501 } 2502 } 2503 } 2504 return (0); 2505 } 2506 2507 /* 2508 * CPU-module callback for the non-panicking CPUs. This routine is invoked 2509 * from panic_idle() as part of the other CPUs stopping themselves when a 2510 * panic occurs. We need to be VERY careful what we do here, since panicstr 2511 * is NOT set yet and we cannot blow through locks. If panic_aflt is set 2512 * (panic_aflt.flt_id is non-zero), we need to read our %afsr to look for 2513 * CP error information. 2514 */ 2515 void 2516 cpu_async_panic_callb(void) 2517 { 2518 spitf_async_flt cp; 2519 struct async_flt *aflt = (struct async_flt *)&cp; 2520 uint64_t *scrub_afsr; 2521 2522 if (panic_aflt.flt_id != 0) { 2523 aflt->flt_addr = panic_aflt.flt_addr; 2524 (void) get_cpu_status((uint64_t)aflt); 2525 2526 if (CPU_PRIVATE(CPU) != NULL) { 2527 scrub_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 2528 if (*scrub_afsr & P_AFSR_CP) { 2529 aflt->flt_stat |= *scrub_afsr; 2530 *scrub_afsr = 0; 2531 } 2532 } 2533 if (aflt->flt_stat & P_AFSR_CP) { 2534 aflt->flt_id = panic_aflt.flt_id; 2535 aflt->flt_panic = 1; 2536 aflt->flt_inst = CPU->cpu_id; 2537 aflt->flt_class = CPU_FAULT; 2538 cp.flt_type = CPU_PANIC_CP_ERR; 2539 cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP, 2540 (void *)&cp, sizeof (cp), ue_queue, 2541 aflt->flt_panic); 2542 } 2543 } 2544 } 2545 2546 /* 2547 * Turn off all cpu error detection, normally only used for panics. 2548 */ 2549 void 2550 cpu_disable_errors(void) 2551 { 2552 xt_all(set_error_enable_tl1, EER_DISABLE, EER_SET_ABSOLUTE); 2553 } 2554 2555 /* 2556 * Enable errors. 2557 */ 2558 void 2559 cpu_enable_errors(void) 2560 { 2561 xt_all(set_error_enable_tl1, EER_ENABLE, EER_SET_ABSOLUTE); 2562 } 2563 2564 static void 2565 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err) 2566 { 2567 uint64_t aligned_addr = P2ALIGN(ecc->flt_addr, 8); 2568 int i, loop = 1; 2569 ushort_t ecc_0; 2570 uint64_t paddr; 2571 uint64_t data; 2572 2573 if (verbose) 2574 loop = 8; 2575 for (i = 0; i < loop; i++) { 2576 paddr = aligned_addr + (i * 8); 2577 data = lddphys(paddr); 2578 if (verbose) { 2579 if (ce_err) { 2580 ecc_0 = ecc_gen((uint32_t)(data>>32), 2581 (uint32_t)data); 2582 cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS, 2583 NULL, " Paddr 0x%" PRIx64 ", " 2584 "Data 0x%08x.%08x, ECC 0x%x", paddr, 2585 (uint32_t)(data>>32), (uint32_t)data, ecc_0); 2586 } else { 2587 cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS, 2588 NULL, " Paddr 0x%" PRIx64 ", " 2589 "Data 0x%08x.%08x", paddr, 2590 (uint32_t)(data>>32), (uint32_t)data); 2591 } 2592 } 2593 } 2594 } 2595 2596 static struct { /* sec-ded-s4ed ecc code */ 2597 uint_t hi, lo; 2598 } ecc_code[8] = { 2599 { 0xee55de23U, 0x16161161U }, 2600 { 0x55eede93U, 0x61612212U }, 2601 { 0xbb557b8cU, 0x49494494U }, 2602 { 0x55bb7b6cU, 0x94948848U }, 2603 { 0x16161161U, 0xee55de23U }, 2604 { 0x61612212U, 0x55eede93U }, 2605 { 0x49494494U, 0xbb557b8cU }, 2606 { 0x94948848U, 0x55bb7b6cU } 2607 }; 2608 2609 static ushort_t 2610 ecc_gen(uint_t high_bytes, uint_t low_bytes) 2611 { 2612 int i, j; 2613 uchar_t checker, bit_mask; 2614 struct { 2615 uint_t hi, lo; 2616 } hex_data, masked_data[8]; 2617 2618 hex_data.hi = high_bytes; 2619 hex_data.lo = low_bytes; 2620 2621 /* mask out bits according to sec-ded-s4ed ecc code */ 2622 for (i = 0; i < 8; i++) { 2623 masked_data[i].hi = hex_data.hi & ecc_code[i].hi; 2624 masked_data[i].lo = hex_data.lo & ecc_code[i].lo; 2625 } 2626 2627 /* 2628 * xor all bits in masked_data[i] to get bit_i of checker, 2629 * where i = 0 to 7 2630 */ 2631 checker = 0; 2632 for (i = 0; i < 8; i++) { 2633 bit_mask = 1 << i; 2634 for (j = 0; j < 32; j++) { 2635 if (masked_data[i].lo & 1) checker ^= bit_mask; 2636 if (masked_data[i].hi & 1) checker ^= bit_mask; 2637 masked_data[i].hi >>= 1; 2638 masked_data[i].lo >>= 1; 2639 } 2640 } 2641 return (checker); 2642 } 2643 2644 /* 2645 * Flush the entire ecache using displacement flush by reading through a 2646 * physical address range as large as the ecache. 2647 */ 2648 void 2649 cpu_flush_ecache(void) 2650 { 2651 flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2, 2652 cpunodes[CPU->cpu_id].ecache_linesize); 2653 } 2654 2655 /* 2656 * read and display the data in the cache line where the 2657 * original ce error occurred. 2658 * This routine is mainly used for debugging new hardware. 2659 */ 2660 void 2661 read_ecc_data(struct async_flt *ecc, short verbose, short ce_err) 2662 { 2663 kpreempt_disable(); 2664 /* disable ECC error traps */ 2665 set_error_enable(EER_ECC_DISABLE); 2666 2667 /* 2668 * flush the ecache 2669 * read the data 2670 * check to see if an ECC error occured 2671 */ 2672 flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2, 2673 cpunodes[CPU->cpu_id].ecache_linesize); 2674 set_lsu(get_lsu() | cache_boot_state); 2675 cpu_read_paddr(ecc, verbose, ce_err); 2676 (void) check_ecc(ecc); 2677 2678 /* enable ECC error traps */ 2679 set_error_enable(EER_ENABLE); 2680 kpreempt_enable(); 2681 } 2682 2683 /* 2684 * Check the AFSR bits for UE/CE persistence. 2685 * If UE or CE errors are detected, the routine will 2686 * clears all the AFSR sticky bits (except CP for 2687 * spitfire/blackbird) and the UDBs. 2688 * if ce_debug or ue_debug is set, log any ue/ce errors detected. 2689 */ 2690 static int 2691 check_ecc(struct async_flt *ecc) 2692 { 2693 uint64_t t_afsr; 2694 uint64_t t_afar; 2695 uint64_t udbh; 2696 uint64_t udbl; 2697 ushort_t udb; 2698 int persistent = 0; 2699 2700 /* 2701 * Capture the AFSR, AFAR and UDBs info 2702 */ 2703 get_asyncflt(&t_afsr); 2704 get_asyncaddr(&t_afar); 2705 t_afar &= SABRE_AFAR_PA; 2706 get_udb_errors(&udbh, &udbl); 2707 2708 if ((t_afsr & P_AFSR_UE) || (t_afsr & P_AFSR_CE)) { 2709 /* 2710 * Clear the errors 2711 */ 2712 clr_datapath(); 2713 2714 if (isus2i || isus2e) 2715 set_asyncflt(t_afsr); 2716 else 2717 set_asyncflt(t_afsr & ~P_AFSR_CP); 2718 2719 /* 2720 * determine whether to check UDBH or UDBL for persistence 2721 */ 2722 if (ecc->flt_synd & UDBL_REG) { 2723 udb = (ushort_t)udbl; 2724 t_afar |= 0x8; 2725 } else { 2726 udb = (ushort_t)udbh; 2727 } 2728 2729 if (ce_debug || ue_debug) { 2730 spitf_async_flt spf_flt; /* for logging */ 2731 struct async_flt *aflt = 2732 (struct async_flt *)&spf_flt; 2733 2734 /* Package the info nicely in the spf_flt struct */ 2735 bzero(&spf_flt, sizeof (spitf_async_flt)); 2736 aflt->flt_stat = t_afsr; 2737 aflt->flt_addr = t_afar; 2738 spf_flt.flt_sdbh = (ushort_t)(udbh & 0x3FF); 2739 spf_flt.flt_sdbl = (ushort_t)(udbl & 0x3FF); 2740 2741 cpu_aflt_log(CE_CONT, 0, &spf_flt, (CPU_AFSR | 2742 CPU_AFAR | CPU_UDBH | CPU_UDBL), NULL, 2743 " check_ecc: Dumping captured error states ..."); 2744 } 2745 2746 /* 2747 * if the fault addresses don't match, not persistent 2748 */ 2749 if (t_afar != ecc->flt_addr) { 2750 return (persistent); 2751 } 2752 2753 /* 2754 * check for UE persistence 2755 * since all DIMMs in the bank are identified for a UE, 2756 * there's no reason to check the syndrome 2757 */ 2758 if ((ecc->flt_stat & P_AFSR_UE) && (t_afsr & P_AFSR_UE)) { 2759 persistent = 1; 2760 } 2761 2762 /* 2763 * check for CE persistence 2764 */ 2765 if ((ecc->flt_stat & P_AFSR_CE) && (t_afsr & P_AFSR_CE)) { 2766 if ((udb & P_DER_E_SYND) == 2767 (ecc->flt_synd & P_DER_E_SYND)) { 2768 persistent = 1; 2769 } 2770 } 2771 } 2772 return (persistent); 2773 } 2774 2775 #ifdef HUMMINGBIRD 2776 #define HB_FULL_DIV 1 2777 #define HB_HALF_DIV 2 2778 #define HB_LOWEST_DIV 8 2779 #define HB_ECLK_INVALID 0xdeadbad 2780 static uint64_t hb_eclk[HB_LOWEST_DIV + 1] = { 2781 HB_ECLK_INVALID, HB_ECLK_1, HB_ECLK_2, HB_ECLK_INVALID, 2782 HB_ECLK_4, HB_ECLK_INVALID, HB_ECLK_6, HB_ECLK_INVALID, 2783 HB_ECLK_8 }; 2784 2785 #define HB_SLOW_DOWN 0 2786 #define HB_SPEED_UP 1 2787 2788 #define SET_ESTAR_MODE(mode) \ 2789 stdphysio(HB_ESTAR_MODE, (mode)); \ 2790 /* \ 2791 * PLL logic requires minimum of 16 clock \ 2792 * cycles to lock to the new clock speed. \ 2793 * Wait 1 usec to satisfy this requirement. \ 2794 */ \ 2795 drv_usecwait(1); 2796 2797 #define CHANGE_REFRESH_COUNT(direction, cur_div, new_div) \ 2798 { \ 2799 volatile uint64_t data; \ 2800 uint64_t count, new_count; \ 2801 clock_t delay; \ 2802 data = lddphysio(HB_MEM_CNTRL0); \ 2803 count = (data & HB_REFRESH_COUNT_MASK) >> \ 2804 HB_REFRESH_COUNT_SHIFT; \ 2805 new_count = (HB_REFRESH_INTERVAL * \ 2806 cpunodes[CPU->cpu_id].clock_freq) / \ 2807 (HB_REFRESH_CLOCKS_PER_COUNT * (new_div) * NANOSEC);\ 2808 data = (data & ~HB_REFRESH_COUNT_MASK) | \ 2809 (new_count << HB_REFRESH_COUNT_SHIFT); \ 2810 stdphysio(HB_MEM_CNTRL0, data); \ 2811 data = lddphysio(HB_MEM_CNTRL0); \ 2812 /* \ 2813 * If we are slowing down the cpu and Memory \ 2814 * Self Refresh is not enabled, it is required \ 2815 * to wait for old refresh count to count-down and \ 2816 * new refresh count to go into effect (let new value \ 2817 * counts down once). \ 2818 */ \ 2819 if ((direction) == HB_SLOW_DOWN && \ 2820 (data & HB_SELF_REFRESH_MASK) == 0) { \ 2821 /* \ 2822 * Each count takes 64 cpu clock cycles \ 2823 * to decrement. Wait for current refresh \ 2824 * count plus new refresh count at current \ 2825 * cpu speed to count down to zero. Round \ 2826 * up the delay time. \ 2827 */ \ 2828 delay = ((HB_REFRESH_CLOCKS_PER_COUNT * \ 2829 (count + new_count) * MICROSEC * (cur_div)) /\ 2830 cpunodes[CPU->cpu_id].clock_freq) + 1; \ 2831 drv_usecwait(delay); \ 2832 } \ 2833 } 2834 2835 #define SET_SELF_REFRESH(bit) \ 2836 { \ 2837 volatile uint64_t data; \ 2838 data = lddphysio(HB_MEM_CNTRL0); \ 2839 data = (data & ~HB_SELF_REFRESH_MASK) | \ 2840 ((bit) << HB_SELF_REFRESH_SHIFT); \ 2841 stdphysio(HB_MEM_CNTRL0, data); \ 2842 data = lddphysio(HB_MEM_CNTRL0); \ 2843 } 2844 #endif /* HUMMINGBIRD */ 2845 2846 /* ARGSUSED */ 2847 void 2848 cpu_change_speed(uint64_t new_divisor, uint64_t arg2) 2849 { 2850 #ifdef HUMMINGBIRD 2851 uint64_t cur_mask, cur_divisor = 0; 2852 volatile uint64_t reg; 2853 int index; 2854 2855 if ((new_divisor < HB_FULL_DIV || new_divisor > HB_LOWEST_DIV) || 2856 (hb_eclk[new_divisor] == HB_ECLK_INVALID)) { 2857 cmn_err(CE_WARN, "cpu_change_speed: bad divisor 0x%lx", 2858 new_divisor); 2859 return; 2860 } 2861 2862 reg = lddphysio(HB_ESTAR_MODE); 2863 cur_mask = reg & HB_ECLK_MASK; 2864 for (index = HB_FULL_DIV; index <= HB_LOWEST_DIV; index++) { 2865 if (hb_eclk[index] == cur_mask) { 2866 cur_divisor = index; 2867 break; 2868 } 2869 } 2870 2871 if (cur_divisor == 0) 2872 cmn_err(CE_PANIC, "cpu_change_speed: current divisor " 2873 "can't be determined!"); 2874 2875 /* 2876 * If we are already at the requested divisor speed, just 2877 * return. 2878 */ 2879 if (cur_divisor == new_divisor) 2880 return; 2881 2882 if (cur_divisor == HB_FULL_DIV && new_divisor == HB_HALF_DIV) { 2883 CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor); 2884 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2885 SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE); 2886 2887 } else if (cur_divisor == HB_HALF_DIV && new_divisor == HB_FULL_DIV) { 2888 SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE); 2889 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2890 /* LINTED: E_FALSE_LOGICAL_EXPR */ 2891 CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor); 2892 2893 } else if (cur_divisor == HB_FULL_DIV && new_divisor > HB_HALF_DIV) { 2894 /* 2895 * Transition to 1/2 speed first, then to 2896 * lower speed. 2897 */ 2898 CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, HB_HALF_DIV); 2899 SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]); 2900 SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE); 2901 2902 CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, HB_HALF_DIV, new_divisor); 2903 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2904 2905 } else if (cur_divisor > HB_HALF_DIV && new_divisor == HB_FULL_DIV) { 2906 /* 2907 * Transition to 1/2 speed first, then to 2908 * full speed. 2909 */ 2910 SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]); 2911 /* LINTED: E_FALSE_LOGICAL_EXPR */ 2912 CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, HB_HALF_DIV); 2913 2914 SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE); 2915 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2916 /* LINTED: E_FALSE_LOGICAL_EXPR */ 2917 CHANGE_REFRESH_COUNT(HB_SPEED_UP, HB_HALF_DIV, new_divisor); 2918 2919 } else if (cur_divisor < new_divisor) { 2920 CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor); 2921 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2922 2923 } else if (cur_divisor > new_divisor) { 2924 SET_ESTAR_MODE(hb_eclk[new_divisor]); 2925 /* LINTED: E_FALSE_LOGICAL_EXPR */ 2926 CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor); 2927 } 2928 CPU->cpu_m.divisor = (uchar_t)new_divisor; 2929 #endif 2930 } 2931 2932 /* 2933 * Clear the AFSR sticky bits and the UDBs. For Sabre/Spitfire/Blackbird, 2934 * we clear all the sticky bits. If a non-null pointer to a async fault 2935 * structure argument is passed in, the captured error state (AFSR, AFAR, UDBs) 2936 * info will be returned in the structure. If a non-null pointer to a 2937 * uint64_t is passed in, this will be updated if the CP bit is set in the 2938 * AFSR. The afsr will be returned. 2939 */ 2940 static uint64_t 2941 clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr) 2942 { 2943 struct async_flt *aflt = (struct async_flt *)spf_flt; 2944 uint64_t afsr; 2945 uint64_t udbh, udbl; 2946 2947 get_asyncflt(&afsr); 2948 2949 if ((acc_afsr != NULL) && (afsr & P_AFSR_CP)) 2950 *acc_afsr |= afsr; 2951 2952 if (spf_flt != NULL) { 2953 aflt->flt_stat = afsr; 2954 get_asyncaddr(&aflt->flt_addr); 2955 aflt->flt_addr &= SABRE_AFAR_PA; 2956 2957 get_udb_errors(&udbh, &udbl); 2958 spf_flt->flt_sdbh = (ushort_t)(udbh & 0x3FF); 2959 spf_flt->flt_sdbl = (ushort_t)(udbl & 0x3FF); 2960 } 2961 2962 set_asyncflt(afsr); /* clear afsr */ 2963 clr_datapath(); /* clear udbs */ 2964 return (afsr); 2965 } 2966 2967 /* 2968 * Scan the ecache to look for bad lines. If found, the afsr, afar, e$ data 2969 * tag of the first bad line will be returned. We also return the old-afsr 2970 * (before clearing the sticky bits). The linecnt data will be updated to 2971 * indicate the number of bad lines detected. 2972 */ 2973 static void 2974 scan_ecache(uint64_t *t_afar, ec_data_t *ecache_data, 2975 uint64_t *ecache_tag, int *linecnt, uint64_t *t_afsr) 2976 { 2977 ec_data_t t_ecdata[8]; 2978 uint64_t t_etag, oafsr; 2979 uint64_t pa = AFLT_INV_ADDR; 2980 uint32_t i, j, ecache_sz; 2981 uint64_t acc_afsr = 0; 2982 uint64_t *cpu_afsr = NULL; 2983 2984 if (CPU_PRIVATE(CPU) != NULL) 2985 cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 2986 2987 *linecnt = 0; 2988 ecache_sz = cpunodes[CPU->cpu_id].ecache_size; 2989 2990 for (i = 0; i < ecache_sz; i += 64) { 2991 get_ecache_dtag(i, (uint64_t *)&t_ecdata[0], &t_etag, &oafsr, 2992 cpu_afsr); 2993 acc_afsr |= oafsr; 2994 2995 /* 2996 * Scan through the whole 64 bytes line in 8 8-byte chunks 2997 * looking for the first occurrence of an EDP error. The AFSR 2998 * info is captured for each 8-byte chunk. Note that for 2999 * Spitfire/Blackbird, the AFSR.PSYND is captured by h/w in 3000 * 16-byte chunk granularity (i.e. the AFSR will be the same 3001 * for the high and low 8-byte words within the 16-byte chunk). 3002 * For Sabre/Hummingbird, the AFSR.PSYND is captured in 8-byte 3003 * granularity and only PSYND bits [7:0] are used. 3004 */ 3005 for (j = 0; j < 8; j++) { 3006 ec_data_t *ecdptr = &t_ecdata[j]; 3007 3008 if (ecdptr->ec_afsr & P_AFSR_EDP) { 3009 uint64_t errpa; 3010 ushort_t psynd; 3011 uint32_t ec_set_size = ecache_sz / 3012 ecache_associativity; 3013 3014 /* 3015 * For Spitfire/Blackbird, we need to look at 3016 * the PSYND to make sure that this 8-byte chunk 3017 * is the right one. PSYND bits [15:8] belong 3018 * to the upper 8-byte (even) chunk. Bits 3019 * [7:0] belong to the lower 8-byte chunk (odd). 3020 */ 3021 psynd = ecdptr->ec_afsr & P_AFSR_P_SYND; 3022 if (!isus2i && !isus2e) { 3023 if (j & 0x1) 3024 psynd = psynd & 0xFF; 3025 else 3026 psynd = psynd >> 8; 3027 3028 if (!psynd) 3029 continue; /* wrong chunk */ 3030 } 3031 3032 /* Construct the PA */ 3033 errpa = ((t_etag & cpu_ec_tag_mask) << 3034 cpu_ec_tag_shift) | ((i | (j << 3)) % 3035 ec_set_size); 3036 3037 /* clean up the cache line */ 3038 flushecacheline(P2ALIGN(errpa, 64), 3039 cpunodes[CPU->cpu_id].ecache_size); 3040 3041 oafsr = clear_errors(NULL, cpu_afsr); 3042 acc_afsr |= oafsr; 3043 3044 (*linecnt)++; 3045 3046 /* 3047 * Capture the PA for the first bad line found. 3048 * Return the ecache dump and tag info. 3049 */ 3050 if (pa == AFLT_INV_ADDR) { 3051 int k; 3052 3053 pa = errpa; 3054 for (k = 0; k < 8; k++) 3055 ecache_data[k] = t_ecdata[k]; 3056 *ecache_tag = t_etag; 3057 } 3058 break; 3059 } 3060 } 3061 } 3062 *t_afar = pa; 3063 *t_afsr = acc_afsr; 3064 } 3065 3066 static void 3067 cpu_log_ecmem_info(spitf_async_flt *spf_flt) 3068 { 3069 struct async_flt *aflt = (struct async_flt *)spf_flt; 3070 uint64_t ecache_tag = spf_flt->flt_ec_tag; 3071 char linestr[30]; 3072 char *state_str; 3073 int i; 3074 3075 /* 3076 * Check the ecache tag to make sure it 3077 * is valid. If invalid, a memory dump was 3078 * captured instead of a ecache dump. 3079 */ 3080 if (spf_flt->flt_ec_tag != AFLT_INV_ADDR) { 3081 uchar_t eparity = (uchar_t) 3082 ((ecache_tag & cpu_ec_par_mask) >> cpu_ec_par_shift); 3083 3084 uchar_t estate = (uchar_t) 3085 ((ecache_tag & cpu_ec_state_mask) >> cpu_ec_state_shift); 3086 3087 if (estate == cpu_ec_state_shr) 3088 state_str = "Shared"; 3089 else if (estate == cpu_ec_state_exl) 3090 state_str = "Exclusive"; 3091 else if (estate == cpu_ec_state_own) 3092 state_str = "Owner"; 3093 else if (estate == cpu_ec_state_mod) 3094 state_str = "Modified"; 3095 else 3096 state_str = "Invalid"; 3097 3098 if (spf_flt->flt_ec_lcnt > 1) { 3099 (void) snprintf(linestr, sizeof (linestr), 3100 "Badlines found=%d", spf_flt->flt_ec_lcnt); 3101 } else { 3102 linestr[0] = '\0'; 3103 } 3104 3105 cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL, 3106 " PA=0x%08x.%08x\n E$tag 0x%08x.%08x E$State: %s " 3107 "E$parity 0x%02x %s", (uint32_t)(aflt->flt_addr >> 32), 3108 (uint32_t)aflt->flt_addr, (uint32_t)(ecache_tag >> 32), 3109 (uint32_t)ecache_tag, state_str, 3110 (uint32_t)eparity, linestr); 3111 } else { 3112 cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL, 3113 " E$tag != PA from AFAR; E$line was victimized" 3114 "\n dumping memory from PA 0x%08x.%08x instead", 3115 (uint32_t)(P2ALIGN(aflt->flt_addr, 64) >> 32), 3116 (uint32_t)P2ALIGN(aflt->flt_addr, 64)); 3117 } 3118 3119 /* 3120 * Dump out all 8 8-byte ecache data captured 3121 * For each 8-byte data captured, we check the 3122 * captured afsr's parity syndrome to find out 3123 * which 8-byte chunk is bad. For memory dump, the 3124 * AFSR values were initialized to 0. 3125 */ 3126 for (i = 0; i < 8; i++) { 3127 ec_data_t *ecdptr; 3128 uint_t offset; 3129 ushort_t psynd; 3130 ushort_t bad; 3131 uint64_t edp; 3132 3133 offset = i << 3; /* multiply by 8 */ 3134 ecdptr = &spf_flt->flt_ec_data[i]; 3135 psynd = ecdptr->ec_afsr & P_AFSR_P_SYND; 3136 edp = ecdptr->ec_afsr & P_AFSR_EDP; 3137 3138 /* 3139 * For Sabre/Hummingbird, parity synd is captured only 3140 * in [7:0] of AFSR.PSYND for each 8-byte chunk. 3141 * For spitfire/blackbird, AFSR.PSYND is captured 3142 * in 16-byte granularity. [15:8] represent 3143 * the upper 8 byte and [7:0] the lower 8 byte. 3144 */ 3145 if (isus2i || isus2e || (i & 0x1)) 3146 bad = (psynd & 0xFF); /* check bits [7:0] */ 3147 else 3148 bad = (psynd & 0xFF00); /* check bits [15:8] */ 3149 3150 if (bad && edp) { 3151 cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL, 3152 " E$Data (0x%02x): 0x%08x.%08x " 3153 "*Bad* PSYND=0x%04x", offset, 3154 (uint32_t)(ecdptr->ec_d8 >> 32), 3155 (uint32_t)ecdptr->ec_d8, psynd); 3156 } else { 3157 cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL, 3158 " E$Data (0x%02x): 0x%08x.%08x", offset, 3159 (uint32_t)(ecdptr->ec_d8 >> 32), 3160 (uint32_t)ecdptr->ec_d8); 3161 } 3162 } 3163 } 3164 3165 /* 3166 * Common logging function for all cpu async errors. This function allows the 3167 * caller to generate a single cmn_err() call that logs the appropriate items 3168 * from the fault structure, and implements our rules for AFT logging levels. 3169 * 3170 * ce_code: cmn_err() code (e.g. CE_PANIC, CE_WARN, CE_CONT) 3171 * tagnum: 0, 1, 2, .. generate the [AFT#] tag 3172 * spflt: pointer to spitfire async fault structure 3173 * logflags: bitflags indicating what to output 3174 * endstr: a end string to appear at the end of this log 3175 * fmt: a format string to appear at the beginning of the log 3176 * 3177 * The logflags allows the construction of predetermined output from the spflt 3178 * structure. The individual data items always appear in a consistent order. 3179 * Note that either or both of the spflt structure pointer and logflags may be 3180 * NULL or zero respectively, indicating that the predetermined output 3181 * substrings are not requested in this log. The output looks like this: 3182 * 3183 * [AFT#] <CPU_ERRID_FIRST><fmt string><CPU_FLTCPU> 3184 * <CPU_SPACE><CPU_ERRID> 3185 * newline+4spaces<CPU_AFSR><CPU_AFAR> 3186 * newline+4spaces<CPU_AF_PSYND><CPU_AF_ETS><CPU_FAULTPC> 3187 * newline+4spaces<CPU_UDBH><CPU_UDBL> 3188 * newline+4spaces<CPU_SYND> 3189 * newline+4spaces<endstr> 3190 * 3191 * Note that <endstr> may not start on a newline if we are logging <CPU_PSYND>; 3192 * it is assumed that <endstr> will be the unum string in this case. The size 3193 * of our intermediate formatting buf[] is based on the worst case of all flags 3194 * being enabled. We pass the caller's varargs directly to vcmn_err() for 3195 * formatting so we don't need additional stack space to format them here. 3196 */ 3197 /*PRINTFLIKE6*/ 3198 static void 3199 cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt, uint_t logflags, 3200 const char *endstr, const char *fmt, ...) 3201 { 3202 struct async_flt *aflt = (struct async_flt *)spflt; 3203 char buf[400], *p, *q; /* see comments about buf[] size above */ 3204 va_list ap; 3205 int console_log_flag; 3206 3207 if ((aflt == NULL) || ((aflt->flt_class == CPU_FAULT) && 3208 (aflt->flt_stat & P_AFSR_LEVEL1)) || 3209 (aflt->flt_panic)) { 3210 console_log_flag = (tagnum < 2) || aft_verbose; 3211 } else { 3212 int verbose = ((aflt->flt_class == BUS_FAULT) || 3213 (aflt->flt_stat & P_AFSR_CE)) ? 3214 ce_verbose_memory : ce_verbose_other; 3215 3216 if (!verbose) 3217 return; 3218 3219 console_log_flag = (verbose > 1); 3220 } 3221 3222 if (console_log_flag) 3223 (void) sprintf(buf, "[AFT%d]", tagnum); 3224 else 3225 (void) sprintf(buf, "![AFT%d]", tagnum); 3226 3227 p = buf + strlen(buf); /* current buffer position */ 3228 q = buf + sizeof (buf); /* pointer past end of buffer */ 3229 3230 if (spflt != NULL && (logflags & CPU_ERRID_FIRST)) { 3231 (void) snprintf(p, (size_t)(q - p), " errID 0x%08x.%08x", 3232 (uint32_t)(aflt->flt_id >> 32), (uint32_t)aflt->flt_id); 3233 p += strlen(p); 3234 } 3235 3236 /* 3237 * Copy the caller's format string verbatim into buf[]. It will be 3238 * formatted by the call to vcmn_err() at the end of this function. 3239 */ 3240 if (fmt != NULL && p < q) { 3241 (void) strncpy(p, fmt, (size_t)(q - p - 1)); 3242 buf[sizeof (buf) - 1] = '\0'; 3243 p += strlen(p); 3244 } 3245 3246 if (spflt != NULL) { 3247 if (logflags & CPU_FLTCPU) { 3248 (void) snprintf(p, (size_t)(q - p), " CPU%d", 3249 aflt->flt_inst); 3250 p += strlen(p); 3251 } 3252 3253 if (logflags & CPU_SPACE) { 3254 if (aflt->flt_status & ECC_D_TRAP) 3255 (void) snprintf(p, (size_t)(q - p), 3256 " Data access"); 3257 else if (aflt->flt_status & ECC_I_TRAP) 3258 (void) snprintf(p, (size_t)(q - p), 3259 " Instruction access"); 3260 p += strlen(p); 3261 } 3262 3263 if (logflags & CPU_TL) { 3264 (void) snprintf(p, (size_t)(q - p), " at TL%s", 3265 aflt->flt_tl ? ">0" : "=0"); 3266 p += strlen(p); 3267 } 3268 3269 if (logflags & CPU_ERRID) { 3270 (void) snprintf(p, (size_t)(q - p), 3271 ", errID 0x%08x.%08x", 3272 (uint32_t)(aflt->flt_id >> 32), 3273 (uint32_t)aflt->flt_id); 3274 p += strlen(p); 3275 } 3276 3277 if (logflags & CPU_AFSR) { 3278 (void) snprintf(p, (size_t)(q - p), 3279 "\n AFSR 0x%08b.%08b", 3280 (uint32_t)(aflt->flt_stat >> 32), AFSR_FMTSTR0, 3281 (uint32_t)aflt->flt_stat, AFSR_FMTSTR1); 3282 p += strlen(p); 3283 } 3284 3285 if (logflags & CPU_AFAR) { 3286 (void) snprintf(p, (size_t)(q - p), " AFAR 0x%08x.%08x", 3287 (uint32_t)(aflt->flt_addr >> 32), 3288 (uint32_t)aflt->flt_addr); 3289 p += strlen(p); 3290 } 3291 3292 if (logflags & CPU_AF_PSYND) { 3293 ushort_t psynd = (ushort_t) 3294 (aflt->flt_stat & P_AFSR_P_SYND); 3295 3296 (void) snprintf(p, (size_t)(q - p), 3297 "\n AFSR.PSYND 0x%04x(Score %02d)", 3298 psynd, ecc_psynd_score(psynd)); 3299 p += strlen(p); 3300 } 3301 3302 if (logflags & CPU_AF_ETS) { 3303 (void) snprintf(p, (size_t)(q - p), " AFSR.ETS 0x%02x", 3304 (uchar_t)((aflt->flt_stat & P_AFSR_ETS) >> 16)); 3305 p += strlen(p); 3306 } 3307 3308 if (logflags & CPU_FAULTPC) { 3309 (void) snprintf(p, (size_t)(q - p), " Fault_PC 0x%p", 3310 (void *)aflt->flt_pc); 3311 p += strlen(p); 3312 } 3313 3314 if (logflags & CPU_UDBH) { 3315 (void) snprintf(p, (size_t)(q - p), 3316 "\n UDBH 0x%04b UDBH.ESYND 0x%02x", 3317 spflt->flt_sdbh, UDB_FMTSTR, 3318 spflt->flt_sdbh & 0xFF); 3319 p += strlen(p); 3320 } 3321 3322 if (logflags & CPU_UDBL) { 3323 (void) snprintf(p, (size_t)(q - p), 3324 " UDBL 0x%04b UDBL.ESYND 0x%02x", 3325 spflt->flt_sdbl, UDB_FMTSTR, 3326 spflt->flt_sdbl & 0xFF); 3327 p += strlen(p); 3328 } 3329 3330 if (logflags & CPU_SYND) { 3331 ushort_t synd = SYND(aflt->flt_synd); 3332 3333 (void) snprintf(p, (size_t)(q - p), 3334 "\n %s Syndrome 0x%x Memory Module ", 3335 UDBL(aflt->flt_synd) ? "UDBL" : "UDBH", synd); 3336 p += strlen(p); 3337 } 3338 } 3339 3340 if (endstr != NULL) { 3341 if (!(logflags & CPU_SYND)) 3342 (void) snprintf(p, (size_t)(q - p), "\n %s", endstr); 3343 else 3344 (void) snprintf(p, (size_t)(q - p), "%s", endstr); 3345 p += strlen(p); 3346 } 3347 3348 if (ce_code == CE_CONT && (p < q - 1)) 3349 (void) strcpy(p, "\n"); /* add final \n if needed */ 3350 3351 va_start(ap, fmt); 3352 vcmn_err(ce_code, buf, ap); 3353 va_end(ap); 3354 } 3355 3356 /* 3357 * Ecache Scrubbing 3358 * 3359 * The basic idea is to prevent lines from sitting in the ecache long enough 3360 * to build up soft errors which can lead to ecache parity errors. 3361 * 3362 * The following rules are observed when flushing the ecache: 3363 * 3364 * 1. When the system is busy, flush bad clean lines 3365 * 2. When the system is idle, flush all clean lines 3366 * 3. When the system is idle, flush good dirty lines 3367 * 4. Never flush bad dirty lines. 3368 * 3369 * modify parity busy idle 3370 * ---------------------------- 3371 * clean good X 3372 * clean bad X X 3373 * dirty good X 3374 * dirty bad 3375 * 3376 * Bad or good refers to whether a line has an E$ parity error or not. 3377 * Clean or dirty refers to the state of the modified bit. We currently 3378 * default the scan rate to 100 (scan 10% of the cache per second). 3379 * 3380 * The following are E$ states and actions. 3381 * 3382 * We encode our state as a 3-bit number, consisting of: 3383 * ECACHE_STATE_MODIFIED (0=clean, 1=dirty) 3384 * ECACHE_STATE_PARITY (0=good, 1=bad) 3385 * ECACHE_STATE_BUSY (0=idle, 1=busy) 3386 * 3387 * We associate a flushing and a logging action with each state. 3388 * 3389 * E$ actions are different for Spitfire and Sabre/Hummingbird modules. 3390 * MIRROR_FLUSH indicates that an E$ line will be flushed for the mirrored 3391 * E$ only, in addition to value being set by ec_flush. 3392 */ 3393 3394 #define ALWAYS_FLUSH 0x1 /* flush E$ line on all E$ types */ 3395 #define NEVER_FLUSH 0x0 /* never the flush the E$ line */ 3396 #define MIRROR_FLUSH 0xF /* flush E$ line on mirrored E$ only */ 3397 3398 struct { 3399 char ec_flush; /* whether to flush or not */ 3400 char ec_log; /* ecache logging */ 3401 char ec_log_type; /* log type info */ 3402 } ec_action[] = { /* states of the E$ line in M P B */ 3403 { ALWAYS_FLUSH, 0, 0 }, /* 0 0 0 clean_good_idle */ 3404 { MIRROR_FLUSH, 0, 0 }, /* 0 0 1 clean_good_busy */ 3405 { ALWAYS_FLUSH, 1, CPU_BADLINE_CI_ERR }, /* 0 1 0 clean_bad_idle */ 3406 { ALWAYS_FLUSH, 1, CPU_BADLINE_CB_ERR }, /* 0 1 1 clean_bad_busy */ 3407 { ALWAYS_FLUSH, 0, 0 }, /* 1 0 0 dirty_good_idle */ 3408 { MIRROR_FLUSH, 0, 0 }, /* 1 0 1 dirty_good_busy */ 3409 { NEVER_FLUSH, 1, CPU_BADLINE_DI_ERR }, /* 1 1 0 dirty_bad_idle */ 3410 { NEVER_FLUSH, 1, CPU_BADLINE_DB_ERR } /* 1 1 1 dirty_bad_busy */ 3411 }; 3412 3413 /* 3414 * Offsets into the ec_action[] that determines clean_good_busy and 3415 * dirty_good_busy lines. 3416 */ 3417 #define ECACHE_CGB_LINE 1 /* E$ clean_good_busy line */ 3418 #define ECACHE_DGB_LINE 5 /* E$ dirty_good_busy line */ 3419 3420 /* 3421 * We are flushing lines which are Clean_Good_Busy and also the lines 3422 * Dirty_Good_Busy. And we only follow it for non-mirrored E$. 3423 */ 3424 #define CGB(x, m) (((x) == ECACHE_CGB_LINE) && (m != ECACHE_CPU_MIRROR)) 3425 #define DGB(x, m) (((x) == ECACHE_DGB_LINE) && (m != ECACHE_CPU_MIRROR)) 3426 3427 #define ECACHE_STATE_MODIFIED 0x4 3428 #define ECACHE_STATE_PARITY 0x2 3429 #define ECACHE_STATE_BUSY 0x1 3430 3431 /* 3432 * If ecache is mirrored ecache_calls_a_sec and ecache_scan_rate are reduced. 3433 */ 3434 int ecache_calls_a_sec_mirrored = 1; 3435 int ecache_lines_per_call_mirrored = 1; 3436 3437 int ecache_scrub_enable = 1; /* ecache scrubbing is on by default */ 3438 int ecache_scrub_verbose = 1; /* prints clean and dirty lines */ 3439 int ecache_scrub_panic = 0; /* panics on a clean and dirty line */ 3440 int ecache_calls_a_sec = 100; /* scrubber calls per sec */ 3441 int ecache_scan_rate = 100; /* scan rate (in tenths of a percent) */ 3442 int ecache_idle_factor = 1; /* increase the scan rate when idle */ 3443 int ecache_flush_clean_good_busy = 50; /* flush rate (in percent) */ 3444 int ecache_flush_dirty_good_busy = 100; /* flush rate (in percent) */ 3445 3446 volatile int ec_timeout_calls = 1; /* timeout calls */ 3447 3448 /* 3449 * Interrupt number and pil for ecache scrubber cross-trap calls. 3450 */ 3451 static uint_t ecache_scrub_inum; 3452 uint_t ecache_scrub_pil = PIL_9; 3453 3454 /* 3455 * Kstats for the E$ scrubber. 3456 */ 3457 typedef struct ecache_kstat { 3458 kstat_named_t clean_good_idle; /* # of lines scrubbed */ 3459 kstat_named_t clean_good_busy; /* # of lines skipped */ 3460 kstat_named_t clean_bad_idle; /* # of lines scrubbed */ 3461 kstat_named_t clean_bad_busy; /* # of lines scrubbed */ 3462 kstat_named_t dirty_good_idle; /* # of lines scrubbed */ 3463 kstat_named_t dirty_good_busy; /* # of lines skipped */ 3464 kstat_named_t dirty_bad_idle; /* # of lines skipped */ 3465 kstat_named_t dirty_bad_busy; /* # of lines skipped */ 3466 kstat_named_t invalid_lines; /* # of invalid lines */ 3467 kstat_named_t clean_good_busy_flush; /* # of lines scrubbed */ 3468 kstat_named_t dirty_good_busy_flush; /* # of lines scrubbed */ 3469 kstat_named_t tags_cleared; /* # of E$ tags cleared */ 3470 } ecache_kstat_t; 3471 3472 static ecache_kstat_t ec_kstat_template = { 3473 { "clean_good_idle", KSTAT_DATA_ULONG }, 3474 { "clean_good_busy", KSTAT_DATA_ULONG }, 3475 { "clean_bad_idle", KSTAT_DATA_ULONG }, 3476 { "clean_bad_busy", KSTAT_DATA_ULONG }, 3477 { "dirty_good_idle", KSTAT_DATA_ULONG }, 3478 { "dirty_good_busy", KSTAT_DATA_ULONG }, 3479 { "dirty_bad_idle", KSTAT_DATA_ULONG }, 3480 { "dirty_bad_busy", KSTAT_DATA_ULONG }, 3481 { "invalid_lines", KSTAT_DATA_ULONG }, 3482 { "clean_good_busy_flush", KSTAT_DATA_ULONG }, 3483 { "dirty_good_busy_flush", KSTAT_DATA_ULONG }, 3484 { "ecache_tags_cleared", KSTAT_DATA_ULONG } 3485 }; 3486 3487 struct kmem_cache *sf_private_cache; 3488 3489 /* 3490 * Called periodically on each CPU to scan the ecache once a sec. 3491 * adjusting the ecache line index appropriately 3492 */ 3493 void 3494 scrub_ecache_line() 3495 { 3496 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc); 3497 int cpuid = CPU->cpu_id; 3498 uint32_t index = ssmp->ecache_flush_index; 3499 uint64_t ec_size = cpunodes[cpuid].ecache_size; 3500 size_t ec_linesize = cpunodes[cpuid].ecache_linesize; 3501 int nlines = ssmp->ecache_nlines; 3502 uint32_t ec_set_size = ec_size / ecache_associativity; 3503 int ec_mirror = ssmp->ecache_mirror; 3504 ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data; 3505 3506 int line, scan_lines, flush_clean_busy = 0, flush_dirty_busy = 0; 3507 int mpb; /* encode Modified, Parity, Busy for action */ 3508 uchar_t state; 3509 uint64_t ec_tag, paddr, oafsr, tafsr, nafsr; 3510 uint64_t *acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 3511 ec_data_t ec_data[8]; 3512 kstat_named_t *ec_knp; 3513 3514 switch (ec_mirror) { 3515 default: 3516 case ECACHE_CPU_NON_MIRROR: 3517 /* 3518 * The E$ scan rate is expressed in units of tenths of 3519 * a percent. ecache_scan_rate = 1000 (100%) means the 3520 * whole cache is scanned every second. 3521 */ 3522 scan_lines = (nlines * ecache_scan_rate) / 3523 (1000 * ecache_calls_a_sec); 3524 if (!(ssmp->ecache_busy)) { 3525 if (ecache_idle_factor > 0) { 3526 scan_lines *= ecache_idle_factor; 3527 } 3528 } else { 3529 flush_clean_busy = (scan_lines * 3530 ecache_flush_clean_good_busy) / 100; 3531 flush_dirty_busy = (scan_lines * 3532 ecache_flush_dirty_good_busy) / 100; 3533 } 3534 3535 ec_timeout_calls = (ecache_calls_a_sec ? 3536 ecache_calls_a_sec : 1); 3537 break; 3538 3539 case ECACHE_CPU_MIRROR: 3540 scan_lines = ecache_lines_per_call_mirrored; 3541 ec_timeout_calls = (ecache_calls_a_sec_mirrored ? 3542 ecache_calls_a_sec_mirrored : 1); 3543 break; 3544 } 3545 3546 /* 3547 * The ecache scrubber algorithm operates by reading and 3548 * decoding the E$ tag to determine whether the corresponding E$ line 3549 * can be scrubbed. There is a implicit assumption in the scrubber 3550 * logic that the E$ tag is valid. Unfortunately, this assertion is 3551 * flawed since the E$ tag may also be corrupted and have parity errors 3552 * The scrubber logic is enhanced to check the validity of the E$ tag 3553 * before scrubbing. When a parity error is detected in the E$ tag, 3554 * it is possible to recover and scrub the tag under certain conditions 3555 * so that a ETP error condition can be avoided. 3556 */ 3557 3558 for (mpb = line = 0; line < scan_lines; line++, mpb = 0) { 3559 /* 3560 * We get the old-AFSR before clearing the AFSR sticky bits 3561 * in {get_ecache_tag, check_ecache_line, get_ecache_dtag} 3562 * If CP bit is set in the old-AFSR, we log an Orphan CP event. 3563 */ 3564 ec_tag = get_ecache_tag(index, &nafsr, acc_afsr); 3565 state = (uchar_t)((ec_tag & cpu_ec_state_mask) >> 3566 cpu_ec_state_shift); 3567 3568 /* 3569 * ETP is set try to scrub the ecache tag. 3570 */ 3571 if (nafsr & P_AFSR_ETP) { 3572 ecache_scrub_tag_err(nafsr, state, index); 3573 } else if (state & cpu_ec_state_valid) { 3574 /* 3575 * ETP is not set, E$ tag is valid. 3576 * Proceed with the E$ scrubbing. 3577 */ 3578 if (state & cpu_ec_state_dirty) 3579 mpb |= ECACHE_STATE_MODIFIED; 3580 3581 tafsr = check_ecache_line(index, acc_afsr); 3582 3583 if (tafsr & P_AFSR_EDP) { 3584 mpb |= ECACHE_STATE_PARITY; 3585 3586 if (ecache_scrub_verbose || 3587 ecache_scrub_panic) { 3588 get_ecache_dtag(P2ALIGN(index, 64), 3589 (uint64_t *)&ec_data[0], 3590 &ec_tag, &oafsr, acc_afsr); 3591 } 3592 } 3593 3594 if (ssmp->ecache_busy) 3595 mpb |= ECACHE_STATE_BUSY; 3596 3597 ec_knp = (kstat_named_t *)ec_ksp + mpb; 3598 ec_knp->value.ul++; 3599 3600 paddr = ((ec_tag & cpu_ec_tag_mask) << 3601 cpu_ec_tag_shift) | (index % ec_set_size); 3602 3603 /* 3604 * We flush the E$ lines depending on the ec_flush, 3605 * we additionally flush clean_good_busy and 3606 * dirty_good_busy lines for mirrored E$. 3607 */ 3608 if (ec_action[mpb].ec_flush == ALWAYS_FLUSH) { 3609 flushecacheline(paddr, ec_size); 3610 } else if ((ec_mirror == ECACHE_CPU_MIRROR) && 3611 (ec_action[mpb].ec_flush == MIRROR_FLUSH)) { 3612 flushecacheline(paddr, ec_size); 3613 } else if (ec_action[mpb].ec_flush == NEVER_FLUSH) { 3614 softcall(ecache_page_retire, (void *)paddr); 3615 } 3616 3617 /* 3618 * Conditionally flush both the clean_good and 3619 * dirty_good lines when busy. 3620 */ 3621 if (CGB(mpb, ec_mirror) && (flush_clean_busy > 0)) { 3622 flush_clean_busy--; 3623 flushecacheline(paddr, ec_size); 3624 ec_ksp->clean_good_busy_flush.value.ul++; 3625 } else if (DGB(mpb, ec_mirror) && 3626 (flush_dirty_busy > 0)) { 3627 flush_dirty_busy--; 3628 flushecacheline(paddr, ec_size); 3629 ec_ksp->dirty_good_busy_flush.value.ul++; 3630 } 3631 3632 if (ec_action[mpb].ec_log && (ecache_scrub_verbose || 3633 ecache_scrub_panic)) { 3634 ecache_scrub_log(ec_data, ec_tag, paddr, mpb, 3635 tafsr); 3636 } 3637 3638 } else { 3639 ec_ksp->invalid_lines.value.ul++; 3640 } 3641 3642 if ((index += ec_linesize) >= ec_size) 3643 index = 0; 3644 3645 } 3646 3647 /* 3648 * set the ecache scrub index for the next time around 3649 */ 3650 ssmp->ecache_flush_index = index; 3651 3652 if (*acc_afsr & P_AFSR_CP) { 3653 uint64_t ret_afsr; 3654 3655 ret_afsr = ecache_scrub_misc_err(CPU_ORPHAN_CP_ERR, *acc_afsr); 3656 if ((ret_afsr & P_AFSR_CP) == 0) 3657 *acc_afsr = 0; 3658 } 3659 } 3660 3661 /* 3662 * Handler for ecache_scrub_inum softint. Call scrub_ecache_line until 3663 * we decrement the outstanding request count to zero. 3664 */ 3665 3666 /*ARGSUSED*/ 3667 uint_t 3668 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2) 3669 { 3670 int i; 3671 int outstanding; 3672 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc); 3673 uint32_t *countp = &ssmp->ec_scrub_outstanding; 3674 3675 do { 3676 outstanding = *countp; 3677 ASSERT(outstanding > 0); 3678 for (i = 0; i < outstanding; i++) 3679 scrub_ecache_line(); 3680 } while (atomic_add_32_nv(countp, -outstanding)); 3681 3682 return (DDI_INTR_CLAIMED); 3683 } 3684 3685 /* 3686 * force each cpu to perform an ecache scrub, called from a timeout 3687 */ 3688 extern xcfunc_t ecache_scrubreq_tl1; 3689 3690 void 3691 do_scrub_ecache_line(void) 3692 { 3693 long delta; 3694 3695 if (ecache_calls_a_sec > hz) 3696 ecache_calls_a_sec = hz; 3697 else if (ecache_calls_a_sec <= 0) 3698 ecache_calls_a_sec = 1; 3699 3700 if (ecache_calls_a_sec_mirrored > hz) 3701 ecache_calls_a_sec_mirrored = hz; 3702 else if (ecache_calls_a_sec_mirrored <= 0) 3703 ecache_calls_a_sec_mirrored = 1; 3704 3705 if (ecache_scrub_enable) { 3706 xt_all(ecache_scrubreq_tl1, ecache_scrub_inum, 0); 3707 delta = hz / ec_timeout_calls; 3708 } else { 3709 delta = hz; 3710 } 3711 3712 (void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0, 3713 delta); 3714 } 3715 3716 /* 3717 * initialization for ecache scrubbing 3718 * This routine is called AFTER all cpus have had cpu_init_private called 3719 * to initialize their private data areas. 3720 */ 3721 void 3722 cpu_init_cache_scrub(void) 3723 { 3724 if (ecache_calls_a_sec > hz) { 3725 cmn_err(CE_NOTE, "ecache_calls_a_sec set too high (%d); " 3726 "resetting to hz (%d)", ecache_calls_a_sec, hz); 3727 ecache_calls_a_sec = hz; 3728 } 3729 3730 /* 3731 * Register softint for ecache scrubbing. 3732 */ 3733 ecache_scrub_inum = add_softintr(ecache_scrub_pil, 3734 scrub_ecache_line_intr, NULL); 3735 3736 /* 3737 * kick off the scrubbing using realtime timeout 3738 */ 3739 (void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0, 3740 hz / ecache_calls_a_sec); 3741 } 3742 3743 /* 3744 * Unset the busy flag for this cpu. 3745 */ 3746 void 3747 cpu_idle_ecache_scrub(struct cpu *cp) 3748 { 3749 if (CPU_PRIVATE(cp) != NULL) { 3750 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, 3751 sfpr_scrub_misc); 3752 ssmp->ecache_busy = ECACHE_CPU_IDLE; 3753 } 3754 } 3755 3756 /* 3757 * Set the busy flag for this cpu. 3758 */ 3759 void 3760 cpu_busy_ecache_scrub(struct cpu *cp) 3761 { 3762 if (CPU_PRIVATE(cp) != NULL) { 3763 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, 3764 sfpr_scrub_misc); 3765 ssmp->ecache_busy = ECACHE_CPU_BUSY; 3766 } 3767 } 3768 3769 /* 3770 * initialize the ecache scrubber data structures 3771 * The global entry point cpu_init_private replaces this entry point. 3772 * 3773 */ 3774 static void 3775 cpu_init_ecache_scrub_dr(struct cpu *cp) 3776 { 3777 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc); 3778 int cpuid = cp->cpu_id; 3779 3780 /* 3781 * intialize bookkeeping for cache scrubbing 3782 */ 3783 bzero(ssmp, sizeof (spitfire_scrub_misc_t)); 3784 3785 ssmp->ecache_flush_index = 0; 3786 3787 ssmp->ecache_nlines = 3788 cpunodes[cpuid].ecache_size / cpunodes[cpuid].ecache_linesize; 3789 3790 /* 3791 * Determine whether we are running on mirrored SRAM 3792 */ 3793 3794 if (cpunodes[cpuid].msram == ECACHE_CPU_MIRROR) 3795 ssmp->ecache_mirror = ECACHE_CPU_MIRROR; 3796 else 3797 ssmp->ecache_mirror = ECACHE_CPU_NON_MIRROR; 3798 3799 cpu_busy_ecache_scrub(cp); 3800 3801 /* 3802 * initialize the kstats 3803 */ 3804 ecache_kstat_init(cp); 3805 } 3806 3807 /* 3808 * uninitialize the ecache scrubber data structures 3809 * The global entry point cpu_uninit_private replaces this entry point. 3810 */ 3811 static void 3812 cpu_uninit_ecache_scrub_dr(struct cpu *cp) 3813 { 3814 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc); 3815 3816 if (ssmp->ecache_ksp != NULL) { 3817 kstat_delete(ssmp->ecache_ksp); 3818 ssmp->ecache_ksp = NULL; 3819 } 3820 3821 /* 3822 * un-initialize bookkeeping for cache scrubbing 3823 */ 3824 bzero(ssmp, sizeof (spitfire_scrub_misc_t)); 3825 3826 cpu_idle_ecache_scrub(cp); 3827 } 3828 3829 struct kmem_cache *sf_private_cache; 3830 3831 /* 3832 * Cpu private initialization. This includes allocating the cpu_private 3833 * data structure, initializing it, and initializing the scrubber for this 3834 * cpu. This is called once for EVERY cpu, including CPU 0. This function 3835 * calls cpu_init_ecache_scrub_dr to init the scrubber. 3836 * We use kmem_cache_create for the spitfire private data structure because it 3837 * needs to be allocated on a S_ECACHE_MAX_LSIZE (64) byte boundary. 3838 */ 3839 void 3840 cpu_init_private(struct cpu *cp) 3841 { 3842 spitfire_private_t *sfprp; 3843 3844 ASSERT(CPU_PRIVATE(cp) == NULL); 3845 3846 /* 3847 * If the sf_private_cache has not been created, create it. 3848 */ 3849 if (sf_private_cache == NULL) { 3850 sf_private_cache = kmem_cache_create("sf_private_cache", 3851 sizeof (spitfire_private_t), S_ECACHE_MAX_LSIZE, NULL, 3852 NULL, NULL, NULL, NULL, 0); 3853 ASSERT(sf_private_cache); 3854 } 3855 3856 sfprp = CPU_PRIVATE(cp) = kmem_cache_alloc(sf_private_cache, KM_SLEEP); 3857 3858 bzero(sfprp, sizeof (spitfire_private_t)); 3859 3860 cpu_init_ecache_scrub_dr(cp); 3861 } 3862 3863 /* 3864 * Cpu private unitialization. Uninitialize the Ecache scrubber and 3865 * deallocate the scrubber data structures and cpu_private data structure. 3866 * For now, this function just calls cpu_unint_ecache_scrub_dr to uninit 3867 * the scrubber for the specified cpu. 3868 */ 3869 void 3870 cpu_uninit_private(struct cpu *cp) 3871 { 3872 ASSERT(CPU_PRIVATE(cp)); 3873 3874 cpu_uninit_ecache_scrub_dr(cp); 3875 kmem_cache_free(sf_private_cache, CPU_PRIVATE(cp)); 3876 CPU_PRIVATE(cp) = NULL; 3877 } 3878 3879 /* 3880 * initialize the ecache kstats for each cpu 3881 */ 3882 static void 3883 ecache_kstat_init(struct cpu *cp) 3884 { 3885 struct kstat *ksp; 3886 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc); 3887 3888 ASSERT(ssmp != NULL); 3889 3890 if ((ksp = kstat_create("unix", cp->cpu_id, "ecache_kstat", "misc", 3891 KSTAT_TYPE_NAMED, 3892 sizeof (ecache_kstat_t) / sizeof (kstat_named_t), 3893 KSTAT_FLAG_WRITABLE)) == NULL) { 3894 ssmp->ecache_ksp = NULL; 3895 cmn_err(CE_NOTE, "!ecache_kstat_init(%d) failed\n", cp->cpu_id); 3896 return; 3897 } 3898 3899 ssmp->ecache_ksp = ksp; 3900 bcopy(&ec_kstat_template, ksp->ks_data, sizeof (ecache_kstat_t)); 3901 kstat_install(ksp); 3902 } 3903 3904 /* 3905 * log the bad ecache information 3906 */ 3907 static void 3908 ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag, uint64_t paddr, int mpb, 3909 uint64_t afsr) 3910 { 3911 spitf_async_flt spf_flt; 3912 struct async_flt *aflt; 3913 int i; 3914 char *class; 3915 3916 bzero(&spf_flt, sizeof (spitf_async_flt)); 3917 aflt = &spf_flt.cmn_asyncflt; 3918 3919 for (i = 0; i < 8; i++) { 3920 spf_flt.flt_ec_data[i] = ec_data[i]; 3921 } 3922 3923 spf_flt.flt_ec_tag = ec_tag; 3924 3925 if (mpb < (sizeof (ec_action) / sizeof (ec_action[0]))) { 3926 spf_flt.flt_type = ec_action[mpb].ec_log_type; 3927 } else spf_flt.flt_type = (ushort_t)mpb; 3928 3929 aflt->flt_inst = CPU->cpu_id; 3930 aflt->flt_class = CPU_FAULT; 3931 aflt->flt_id = gethrtime_waitfree(); 3932 aflt->flt_addr = paddr; 3933 aflt->flt_stat = afsr; 3934 aflt->flt_panic = (uchar_t)ecache_scrub_panic; 3935 3936 switch (mpb) { 3937 case CPU_ECACHE_TAG_ERR: 3938 case CPU_ECACHE_ADDR_PAR_ERR: 3939 case CPU_ECACHE_ETP_ETS_ERR: 3940 case CPU_ECACHE_STATE_ERR: 3941 class = FM_EREPORT_CPU_USII_ESCRUB_TAG; 3942 break; 3943 default: 3944 class = FM_EREPORT_CPU_USII_ESCRUB_DATA; 3945 break; 3946 } 3947 3948 cpu_errorq_dispatch(class, (void *)&spf_flt, sizeof (spf_flt), 3949 ue_queue, aflt->flt_panic); 3950 3951 if (aflt->flt_panic) 3952 cmn_err(CE_PANIC, "ecache_scrub_panic set and bad E$" 3953 "line detected"); 3954 } 3955 3956 /* 3957 * Process an ecache error that occured during the E$ scrubbing. 3958 * We do the ecache scan to find the bad line, flush the bad line 3959 * and start the memscrubber to find any UE (in memory or in another cache) 3960 */ 3961 static uint64_t 3962 ecache_scrub_misc_err(int type, uint64_t afsr) 3963 { 3964 spitf_async_flt spf_flt; 3965 struct async_flt *aflt; 3966 uint64_t oafsr; 3967 3968 bzero(&spf_flt, sizeof (spitf_async_flt)); 3969 aflt = &spf_flt.cmn_asyncflt; 3970 3971 /* 3972 * Scan each line in the cache to look for the one 3973 * with bad parity 3974 */ 3975 aflt->flt_addr = AFLT_INV_ADDR; 3976 scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0], 3977 &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr); 3978 3979 if (oafsr & P_AFSR_CP) { 3980 uint64_t *cp_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 3981 *cp_afsr |= oafsr; 3982 } 3983 3984 /* 3985 * If we found a bad PA, update the state to indicate if it is 3986 * memory or I/O space. 3987 */ 3988 if (aflt->flt_addr != AFLT_INV_ADDR) { 3989 aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >> 3990 MMU_PAGESHIFT)) ? 1 : 0; 3991 } 3992 3993 spf_flt.flt_type = (ushort_t)type; 3994 3995 aflt->flt_inst = CPU->cpu_id; 3996 aflt->flt_class = CPU_FAULT; 3997 aflt->flt_id = gethrtime_waitfree(); 3998 aflt->flt_status = afsr; 3999 aflt->flt_panic = (uchar_t)ecache_scrub_panic; 4000 4001 /* 4002 * We have the bad line, flush that line and start 4003 * the memscrubber. 4004 */ 4005 if (spf_flt.flt_ec_lcnt > 0) { 4006 flushecacheline(P2ALIGN(aflt->flt_addr, 64), 4007 cpunodes[CPU->cpu_id].ecache_size); 4008 read_all_memscrub = 1; 4009 memscrub_run(); 4010 } 4011 4012 cpu_errorq_dispatch((type == CPU_ORPHAN_CP_ERR) ? 4013 FM_EREPORT_CPU_USII_CP : FM_EREPORT_CPU_USII_UNKNOWN, 4014 (void *)&spf_flt, sizeof (spf_flt), ue_queue, aflt->flt_panic); 4015 4016 return (oafsr); 4017 } 4018 4019 static void 4020 ecache_scrub_tag_err(uint64_t afsr, uchar_t state, uint32_t index) 4021 { 4022 ushort_t afsr_ets = (afsr & P_AFSR_ETS) >> P_AFSR_ETS_SHIFT; 4023 spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc); 4024 ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data; 4025 uint64_t ec_tag, paddr, oafsr; 4026 ec_data_t ec_data[8]; 4027 int cpuid = CPU->cpu_id; 4028 uint32_t ec_set_size = cpunodes[cpuid].ecache_size / 4029 ecache_associativity; 4030 uint64_t *cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr); 4031 4032 get_ecache_dtag(P2ALIGN(index, 64), (uint64_t *)&ec_data[0], &ec_tag, 4033 &oafsr, cpu_afsr); 4034 paddr = ((ec_tag & cpu_ec_tag_mask) << cpu_ec_tag_shift) | 4035 (index % ec_set_size); 4036 4037 /* 4038 * E$ tag state has good parity 4039 */ 4040 if ((afsr_ets & cpu_ec_state_parity) == 0) { 4041 if (afsr_ets & cpu_ec_parity) { 4042 /* 4043 * E$ tag state bits indicate the line is clean, 4044 * invalidate the E$ tag and continue. 4045 */ 4046 if (!(state & cpu_ec_state_dirty)) { 4047 /* 4048 * Zero the tag and mark the state invalid 4049 * with good parity for the tag. 4050 */ 4051 if (isus2i || isus2e) 4052 write_hb_ec_tag_parity(index); 4053 else 4054 write_ec_tag_parity(index); 4055 4056 /* Sync with the dual tag */ 4057 flushecacheline(0, 4058 cpunodes[CPU->cpu_id].ecache_size); 4059 ec_ksp->tags_cleared.value.ul++; 4060 ecache_scrub_log(ec_data, ec_tag, paddr, 4061 CPU_ECACHE_TAG_ERR, afsr); 4062 return; 4063 } else { 4064 ecache_scrub_log(ec_data, ec_tag, paddr, 4065 CPU_ECACHE_ADDR_PAR_ERR, afsr); 4066 cmn_err(CE_PANIC, " E$ tag address has bad" 4067 " parity"); 4068 } 4069 } else if ((afsr_ets & cpu_ec_parity) == 0) { 4070 /* 4071 * ETS is zero but ETP is set 4072 */ 4073 ecache_scrub_log(ec_data, ec_tag, paddr, 4074 CPU_ECACHE_ETP_ETS_ERR, afsr); 4075 cmn_err(CE_PANIC, "AFSR.ETP is set and" 4076 " AFSR.ETS is zero"); 4077 } 4078 } else { 4079 /* 4080 * E$ tag state bit has a bad parity 4081 */ 4082 ecache_scrub_log(ec_data, ec_tag, paddr, 4083 CPU_ECACHE_STATE_ERR, afsr); 4084 cmn_err(CE_PANIC, "E$ tag state has bad parity"); 4085 } 4086 } 4087 4088 static void 4089 ecache_page_retire(void *arg) 4090 { 4091 uint64_t paddr = (uint64_t)arg; 4092 page_t *pp = page_numtopp_nolock((pfn_t)(paddr >> MMU_PAGESHIFT)); 4093 4094 if (pp) { 4095 page_settoxic(pp, PAGE_IS_FAULTY); 4096 (void) page_retire(pp, PAGE_IS_TOXIC); 4097 } 4098 } 4099 4100 void 4101 sticksync_slave(void) 4102 {} 4103 4104 void 4105 sticksync_master(void) 4106 {} 4107 4108 /*ARGSUSED*/ 4109 void 4110 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t bpp) 4111 {} 4112 4113 void 4114 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected) 4115 { 4116 int status; 4117 ddi_fm_error_t de; 4118 4119 bzero(&de, sizeof (ddi_fm_error_t)); 4120 4121 de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, 4122 FM_ENA_FMT1); 4123 de.fme_flag = expected; 4124 de.fme_bus_specific = (void *)aflt->flt_addr; 4125 status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de); 4126 4127 if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL)) 4128 aflt->flt_panic = 1; 4129 } 4130 4131 /*ARGSUSED*/ 4132 void 4133 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz, 4134 errorq_t *eqp, uint_t flag) 4135 { 4136 struct async_flt *aflt = (struct async_flt *)payload; 4137 4138 aflt->flt_erpt_class = error_class; 4139 errorq_dispatch(eqp, payload, payload_sz, flag); 4140 } 4141 4142 #define MAX_SIMM 8 4143 4144 struct ce_info { 4145 char name[UNUM_NAMLEN]; 4146 uint64_t intermittent_total; 4147 uint64_t persistent_total; 4148 uint64_t sticky_total; 4149 unsigned short leaky_bucket_cnt; 4150 }; 4151 4152 /* 4153 * Separately-defined structure for use in reporting the ce_info 4154 * to SunVTS without exposing the internal layout and implementation 4155 * of struct ce_info. 4156 */ 4157 static struct ecc_error_info ecc_error_info_data = { 4158 { "version", KSTAT_DATA_UINT32 }, 4159 { "maxcount", KSTAT_DATA_UINT32 }, 4160 { "count", KSTAT_DATA_UINT32 } 4161 }; 4162 static const size_t ecc_error_info_ndata = sizeof (ecc_error_info_data) / 4163 sizeof (struct kstat_named); 4164 4165 #if KSTAT_CE_UNUM_NAMLEN < UNUM_NAMLEN 4166 #error "Need to rev ecc_error_info version and update KSTAT_CE_UNUM_NAMLEN" 4167 #endif 4168 4169 struct ce_info *mem_ce_simm = NULL; 4170 size_t mem_ce_simm_size = 0; 4171 4172 /* 4173 * Default values for the number of CE's allowed per interval. 4174 * Interval is defined in minutes 4175 * SOFTERR_MIN_TIMEOUT is defined in microseconds 4176 */ 4177 #define SOFTERR_LIMIT_DEFAULT 2 4178 #define SOFTERR_INTERVAL_DEFAULT 1440 /* This is 24 hours */ 4179 #define SOFTERR_MIN_TIMEOUT (60 * MICROSEC) /* This is 1 minute */ 4180 #define TIMEOUT_NONE ((timeout_id_t)0) 4181 #define TIMEOUT_SET ((timeout_id_t)1) 4182 4183 /* 4184 * timeout identifer for leaky_bucket 4185 */ 4186 static timeout_id_t leaky_bucket_timeout_id = TIMEOUT_NONE; 4187 4188 /* 4189 * Tunables for maximum number of allowed CE's in a given time 4190 */ 4191 int ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT; 4192 int ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT; 4193 4194 void 4195 cpu_mp_init(void) 4196 { 4197 size_t size = cpu_aflt_size(); 4198 size_t i; 4199 kstat_t *ksp; 4200 4201 /* 4202 * Initialize the CE error handling buffers. 4203 */ 4204 mem_ce_simm_size = MAX_SIMM * max_ncpus; 4205 size = sizeof (struct ce_info) * mem_ce_simm_size; 4206 mem_ce_simm = kmem_zalloc(size, KM_SLEEP); 4207 4208 ksp = kstat_create("unix", 0, "ecc-info", "misc", 4209 KSTAT_TYPE_NAMED, ecc_error_info_ndata, KSTAT_FLAG_VIRTUAL); 4210 if (ksp != NULL) { 4211 ksp->ks_data = (struct kstat_named *)&ecc_error_info_data; 4212 ecc_error_info_data.version.value.ui32 = KSTAT_CE_INFO_VER; 4213 ecc_error_info_data.maxcount.value.ui32 = mem_ce_simm_size; 4214 ecc_error_info_data.count.value.ui32 = 0; 4215 kstat_install(ksp); 4216 } 4217 4218 for (i = 0; i < mem_ce_simm_size; i++) { 4219 struct kstat_ecc_mm_info *kceip; 4220 4221 kceip = kmem_zalloc(sizeof (struct kstat_ecc_mm_info), 4222 KM_SLEEP); 4223 ksp = kstat_create("mm", i, "ecc-info", "misc", 4224 KSTAT_TYPE_NAMED, 4225 sizeof (struct kstat_ecc_mm_info) / sizeof (kstat_named_t), 4226 KSTAT_FLAG_VIRTUAL); 4227 if (ksp != NULL) { 4228 /* 4229 * Re-declare ks_data_size to include room for the 4230 * UNUM name since we don't have KSTAT_FLAG_VAR_SIZE 4231 * set. 4232 */ 4233 ksp->ks_data_size = sizeof (struct kstat_ecc_mm_info) + 4234 KSTAT_CE_UNUM_NAMLEN; 4235 ksp->ks_data = kceip; 4236 kstat_named_init(&kceip->name, 4237 "name", KSTAT_DATA_STRING); 4238 kstat_named_init(&kceip->intermittent_total, 4239 "intermittent_total", KSTAT_DATA_UINT64); 4240 kstat_named_init(&kceip->persistent_total, 4241 "persistent_total", KSTAT_DATA_UINT64); 4242 kstat_named_init(&kceip->sticky_total, 4243 "sticky_total", KSTAT_DATA_UINT64); 4244 /* 4245 * Use the default snapshot routine as it knows how to 4246 * deal with named kstats with long strings. 4247 */ 4248 ksp->ks_update = ecc_kstat_update; 4249 kstat_install(ksp); 4250 } else { 4251 kmem_free(kceip, sizeof (struct kstat_ecc_mm_info)); 4252 } 4253 } 4254 } 4255 4256 /*ARGSUSED*/ 4257 static void 4258 leaky_bucket_timeout(void *arg) 4259 { 4260 int i; 4261 struct ce_info *psimm = mem_ce_simm; 4262 4263 for (i = 0; i < mem_ce_simm_size; i++) { 4264 if (psimm[i].leaky_bucket_cnt > 0) 4265 atomic_add_16(&psimm[i].leaky_bucket_cnt, -1); 4266 } 4267 add_leaky_bucket_timeout(); 4268 } 4269 4270 static void 4271 add_leaky_bucket_timeout(void) 4272 { 4273 long timeout_in_microsecs; 4274 4275 /* 4276 * create timeout for next leak. 4277 * 4278 * The timeout interval is calculated as follows 4279 * 4280 * (ecc_softerr_interval * 60 * MICROSEC) / ecc_softerr_limit 4281 * 4282 * ecc_softerr_interval is in minutes, so multiply this by 60 (seconds 4283 * in a minute), then multiply this by MICROSEC to get the interval 4284 * in microseconds. Divide this total by ecc_softerr_limit so that 4285 * the timeout interval is accurate to within a few microseconds. 4286 */ 4287 4288 if (ecc_softerr_limit <= 0) 4289 ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT; 4290 if (ecc_softerr_interval <= 0) 4291 ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT; 4292 4293 timeout_in_microsecs = ((int64_t)ecc_softerr_interval * 60 * MICROSEC) / 4294 ecc_softerr_limit; 4295 4296 if (timeout_in_microsecs < SOFTERR_MIN_TIMEOUT) 4297 timeout_in_microsecs = SOFTERR_MIN_TIMEOUT; 4298 4299 leaky_bucket_timeout_id = timeout(leaky_bucket_timeout, 4300 (void *)NULL, drv_usectohz((clock_t)timeout_in_microsecs)); 4301 } 4302 4303 /* 4304 * Legacy Correctable ECC Error Hash 4305 * 4306 * All of the code below this comment is used to implement a legacy array 4307 * which counted intermittent, persistent, and sticky CE errors by unum, 4308 * and then was later extended to publish the data as a kstat for SunVTS. 4309 * All of this code is replaced by FMA, and remains here until such time 4310 * that the UltraSPARC-I/II CPU code is converted to FMA, or is EOLed. 4311 * 4312 * Errors are saved in three buckets per-unum: 4313 * (1) sticky - scrub was unsuccessful, cannot be scrubbed 4314 * This could represent a problem, and is immediately printed out. 4315 * (2) persistent - was successfully scrubbed 4316 * These errors use the leaky bucket algorithm to determine 4317 * if there is a serious problem. 4318 * (3) intermittent - may have originated from the cpu or upa/safari bus, 4319 * and does not necessarily indicate any problem with the dimm itself, 4320 * is critical information for debugging new hardware. 4321 * Because we do not know if it came from the dimm, it would be 4322 * inappropriate to include these in the leaky bucket counts. 4323 * 4324 * If the E$ line was modified before the scrub operation began, then the 4325 * displacement flush at the beginning of scrubphys() will cause the modified 4326 * line to be written out, which will clean up the CE. Then, any subsequent 4327 * read will not cause an error, which will cause persistent errors to be 4328 * identified as intermittent. 4329 * 4330 * If a DIMM is going bad, it will produce true persistents as well as 4331 * false intermittents, so these intermittents can be safely ignored. 4332 * 4333 * If the error count is excessive for a DIMM, this function will return 4334 * PAGE_IS_FAILING, and the CPU module may then decide to remove that page 4335 * from use. 4336 */ 4337 static int 4338 ce_count_unum(int status, int len, char *unum) 4339 { 4340 int i; 4341 struct ce_info *psimm = mem_ce_simm; 4342 int page_status = PAGE_IS_OK; 4343 4344 ASSERT(psimm != NULL); 4345 4346 if (len <= 0 || 4347 (status & (ECC_STICKY | ECC_PERSISTENT | ECC_INTERMITTENT)) == 0) 4348 return (page_status); 4349 4350 /* 4351 * Initialize the leaky_bucket timeout 4352 */ 4353 if (casptr(&leaky_bucket_timeout_id, 4354 TIMEOUT_NONE, TIMEOUT_SET) == TIMEOUT_NONE) 4355 add_leaky_bucket_timeout(); 4356 4357 for (i = 0; i < mem_ce_simm_size; i++) { 4358 if (psimm[i].name[0] == '\0') { 4359 /* 4360 * Hit the end of the valid entries, add 4361 * a new one. 4362 */ 4363 (void) strncpy(psimm[i].name, unum, len); 4364 if (status & ECC_STICKY) { 4365 /* 4366 * Sticky - the leaky bucket is used to track 4367 * soft errors. Since a sticky error is a 4368 * hard error and likely to be retired soon, 4369 * we do not count it in the leaky bucket. 4370 */ 4371 psimm[i].leaky_bucket_cnt = 0; 4372 psimm[i].intermittent_total = 0; 4373 psimm[i].persistent_total = 0; 4374 psimm[i].sticky_total = 1; 4375 cmn_err(CE_WARN, 4376 "[AFT0] Sticky Softerror encountered " 4377 "on Memory Module %s\n", unum); 4378 page_status = PAGE_IS_FAILING; 4379 } else if (status & ECC_PERSISTENT) { 4380 psimm[i].leaky_bucket_cnt = 1; 4381 psimm[i].intermittent_total = 0; 4382 psimm[i].persistent_total = 1; 4383 psimm[i].sticky_total = 0; 4384 } else { 4385 /* 4386 * Intermittent - Because the scrub operation 4387 * cannot find the error in the DIMM, we will 4388 * not count these in the leaky bucket 4389 */ 4390 psimm[i].leaky_bucket_cnt = 0; 4391 psimm[i].intermittent_total = 1; 4392 psimm[i].persistent_total = 0; 4393 psimm[i].sticky_total = 0; 4394 } 4395 ecc_error_info_data.count.value.ui32++; 4396 break; 4397 } else if (strncmp(unum, psimm[i].name, len) == 0) { 4398 /* 4399 * Found an existing entry for the current 4400 * memory module, adjust the counts. 4401 */ 4402 if (status & ECC_STICKY) { 4403 psimm[i].sticky_total++; 4404 cmn_err(CE_WARN, 4405 "[AFT0] Sticky Softerror encountered " 4406 "on Memory Module %s\n", unum); 4407 page_status = PAGE_IS_FAILING; 4408 } else if (status & ECC_PERSISTENT) { 4409 int new_value; 4410 4411 new_value = atomic_add_16_nv( 4412 &psimm[i].leaky_bucket_cnt, 1); 4413 psimm[i].persistent_total++; 4414 if (new_value > ecc_softerr_limit) { 4415 cmn_err(CE_WARN, "[AFT0] Most recent %d" 4416 " soft errors from Memory Module" 4417 " %s exceed threshold (N=%d," 4418 " T=%dh:%02dm) triggering page" 4419 " retire", new_value, unum, 4420 ecc_softerr_limit, 4421 ecc_softerr_interval / 60, 4422 ecc_softerr_interval % 60); 4423 atomic_add_16( 4424 &psimm[i].leaky_bucket_cnt, -1); 4425 page_status = PAGE_IS_FAILING; 4426 } 4427 } else { /* Intermittent */ 4428 psimm[i].intermittent_total++; 4429 } 4430 break; 4431 } 4432 } 4433 4434 if (i >= mem_ce_simm_size) 4435 cmn_err(CE_CONT, "[AFT0] Softerror: mem_ce_simm[] out of " 4436 "space.\n"); 4437 4438 return (page_status); 4439 } 4440 4441 /* 4442 * Function to support counting of IO detected CEs. 4443 */ 4444 void 4445 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum) 4446 { 4447 if (ce_count_unum(ecc->flt_status, len, unum) == PAGE_IS_FAILING && 4448 automatic_page_removal) { 4449 page_t *pp = page_numtopp_nolock((pfn_t) 4450 (ecc->flt_addr >> MMU_PAGESHIFT)); 4451 4452 if (pp) { 4453 page_settoxic(pp, PAGE_IS_FAULTY); 4454 (void) page_retire(pp, PAGE_IS_FAILING); 4455 } 4456 } 4457 } 4458 4459 static int 4460 ecc_kstat_update(kstat_t *ksp, int rw) 4461 { 4462 struct kstat_ecc_mm_info *kceip = ksp->ks_data; 4463 struct ce_info *ceip = mem_ce_simm; 4464 int i = ksp->ks_instance; 4465 4466 if (rw == KSTAT_WRITE) 4467 return (EACCES); 4468 4469 ASSERT(ksp->ks_data != NULL); 4470 ASSERT(i < mem_ce_simm_size && i >= 0); 4471 4472 /* 4473 * Since we're not using locks, make sure that we don't get partial 4474 * data. The name is always copied before the counters are incremented 4475 * so only do this update routine if at least one of the counters is 4476 * non-zero, which ensures that ce_count_unum() is done, and the 4477 * string is fully copied. 4478 */ 4479 if (ceip[i].intermittent_total == 0 && 4480 ceip[i].persistent_total == 0 && 4481 ceip[i].sticky_total == 0) { 4482 /* 4483 * Uninitialized or partially initialized. Ignore. 4484 * The ks_data buffer was allocated via kmem_zalloc, 4485 * so no need to bzero it. 4486 */ 4487 return (0); 4488 } 4489 4490 kstat_named_setstr(&kceip->name, ceip[i].name); 4491 kceip->intermittent_total.value.ui64 = ceip[i].intermittent_total; 4492 kceip->persistent_total.value.ui64 = ceip[i].persistent_total; 4493 kceip->sticky_total.value.ui64 = ceip[i].sticky_total; 4494 4495 return (0); 4496 } 4497 4498 #define VIS_BLOCKSIZE 64 4499 4500 int 4501 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data) 4502 { 4503 int ret, watched; 4504 4505 watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 4506 ret = dtrace_blksuword32(addr, data, 0); 4507 if (watched) 4508 watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE); 4509 4510 return (ret); 4511 } 4512 4513 /*ARGSUSED*/ 4514 void 4515 cpu_faulted_enter(struct cpu *cp) 4516 { 4517 } 4518 4519 /*ARGSUSED*/ 4520 void 4521 cpu_faulted_exit(struct cpu *cp) 4522 { 4523 } 4524 4525 static int mmu_disable_ism_large_pages = ((1 << TTE512K) | 4526 (1 << TTE32M) | (1 << TTE256M)); 4527 static int mmu_disable_large_pages = ((1 << TTE32M) | (1 << TTE256M)); 4528 4529 /* 4530 * The function returns the US_II mmu-specific values for the 4531 * hat's disable_large_pages and disable_ism_large_pages variables. 4532 */ 4533 int 4534 mmu_large_pages_disabled(uint_t flag) 4535 { 4536 int pages_disable = 0; 4537 4538 if (flag == HAT_LOAD) { 4539 pages_disable = mmu_disable_large_pages; 4540 } else if (flag == HAT_LOAD_SHARE) { 4541 pages_disable = mmu_disable_ism_large_pages; 4542 } 4543 return (pages_disable); 4544 } 4545 4546 /*ARGSUSED*/ 4547 void 4548 mmu_init_kernel_pgsz(struct hat *hat) 4549 { 4550 } 4551 4552 size_t 4553 mmu_get_kernel_lpsize(size_t lpsize) 4554 { 4555 uint_t tte; 4556 4557 if (lpsize == 0) { 4558 /* no setting for segkmem_lpsize in /etc/system: use default */ 4559 return (MMU_PAGESIZE4M); 4560 } 4561 4562 for (tte = TTE8K; tte <= TTE4M; tte++) { 4563 if (lpsize == TTEBYTES(tte)) 4564 return (lpsize); 4565 } 4566 4567 return (TTEBYTES(TTE8K)); 4568 } 4569