1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/regset.h> 31 #include <sys/privregs.h> 32 #include <sys/pci_impl.h> 33 #include <sys/cpuvar.h> 34 #include <sys/x86_archext.h> 35 #include <sys/cmn_err.h> 36 #include <sys/systm.h> 37 #include <sys/sysmacros.h> 38 #include <sys/chip.h> 39 #include <sys/cyclic.h> 40 #include <sys/cpu_module_impl.h> 41 #include <sys/pci_cfgspace_impl.h> 42 #include <sys/sysevent.h> 43 #include <sys/smbios.h> 44 #include <sys/mca_x86.h> 45 #include <sys/mca_amd.h> 46 #include <sys/mc.h> 47 #include <sys/psw.h> 48 #include <sys/ddi.h> 49 #include <sys/sunddi.h> 50 #include <sys/sdt.h> 51 #include <sys/fm/util.h> 52 #include <sys/fm/protocol.h> 53 #include <sys/fm/cpu/AMD.h> 54 55 #include "ao.h" 56 #include "ao_mca_disp.h" 57 58 errorq_t *ao_mca_queue; /* machine-check ereport queue */ 59 int ao_mca_stack_flag = 0; /* record stack trace in ereports */ 60 int ao_mca_smi_disable = 1; /* attempt to disable SMI polling */ 61 62 ao_bank_regs_t ao_bank_regs[AMD_MCA_BANK_COUNT] = { 63 { AMD_MSR_DC_STATUS, AMD_MSR_DC_ADDR }, 64 { AMD_MSR_IC_STATUS, AMD_MSR_IC_ADDR }, 65 { AMD_MSR_BU_STATUS, AMD_MSR_BU_ADDR }, 66 { AMD_MSR_LS_STATUS, AMD_MSR_LS_ADDR }, 67 { AMD_MSR_NB_STATUS, AMD_MSR_NB_ADDR } 68 }; 69 70 typedef struct ao_bank_cfg { 71 uint_t bank_ctl; 72 uint_t bank_ctl_mask; 73 uint64_t bank_ctl_init; 74 uint_t bank_status; 75 uint_t bank_addr; 76 } ao_bank_cfg_t; 77 78 static const ao_bank_cfg_t ao_bank_cfgs[] = { 79 { AMD_MSR_DC_CTL, AMD_MSR_DC_MASK, AMD_DC_CTL_INIT, AMD_MSR_DC_STATUS, 80 AMD_MSR_DC_ADDR }, 81 { AMD_MSR_IC_CTL, AMD_MSR_IC_MASK, AMD_IC_CTL_INIT, AMD_MSR_IC_STATUS, 82 AMD_MSR_IC_ADDR }, 83 { AMD_MSR_BU_CTL, AMD_MSR_BU_MASK, AMD_BU_CTL_INIT, AMD_MSR_BU_STATUS, 84 AMD_MSR_BU_ADDR }, 85 { AMD_MSR_LS_CTL, AMD_MSR_LS_MASK, AMD_LS_CTL_INIT, AMD_MSR_LS_STATUS, 86 AMD_MSR_LS_ADDR }, 87 { AMD_MSR_NB_CTL, AMD_MSR_NB_MASK, AMD_NB_CTL_INIT, AMD_MSR_NB_STATUS, 88 AMD_MSR_NB_ADDR } 89 }; 90 91 static const ao_error_disp_t ao_disp_unknown = { 92 FM_EREPORT_CPU_AMD_UNKNOWN, 93 FM_EREPORT_PAYLOAD_FLAGS_CPU_AMD_UNKNOWN 94 }; 95 96 /* 97 * This is quite awful but necessary to work around x86 system vendor's view of 98 * the world. Other operating systems (you know who you are) don't understand 99 * Opteron-specific error handling, so BIOS and system vendors often hide these 100 * conditions from them by using SMI polling to copy out any errors from the 101 * machine-check registers. When Solaris runs on a system with this feature, 102 * we want to disable the SMI polling so we can use FMA instead. Sadly, there 103 * isn't even a standard self-describing way to express the whole situation, 104 * so we have to resort to hard-coded values. This should all be changed to 105 * be a self-describing vendor-specific SMBIOS structure in the future. 106 */ 107 static const struct ao_smi_disable { 108 const char *asd_sys_vendor; /* SMB_TYPE_SYSTEM vendor prefix */ 109 const char *asd_bios_vendor; /* SMB_TYPE_BIOS vendor prefix */ 110 uint32_t asd_port; /* output port for SMI disable */ 111 uint32_t asd_code; /* output code for SMI disable */ 112 } ao_smi_disable[] = { 113 { "Sun Microsystems", "American Megatrends", 0x502F, 0x59 }, 114 { NULL, NULL, 0, 0 } 115 }; 116 117 static int 118 ao_disp_match_r4(uint16_t ref, uint8_t r4) 119 { 120 static const uint16_t ao_r4_map[] = { 121 AO_MCA_R4_BIT_GEN, /* AMD_ERRCODE_R4_GEN */ 122 AO_MCA_R4_BIT_RD, /* AMD_ERRCODE_R4_RD */ 123 AO_MCA_R4_BIT_WR, /* AMD_ERRCODE_R4_WR */ 124 AO_MCA_R4_BIT_DRD, /* AMD_ERRCODE_R4_DRD */ 125 AO_MCA_R4_BIT_DWR, /* AMD_ERRCODE_R4_DWR */ 126 AO_MCA_R4_BIT_IRD, /* AMD_ERRCODE_R4_IRD */ 127 AO_MCA_R4_BIT_PREFETCH, /* AMD_ERRCODE_R4_PREFETCH */ 128 AO_MCA_R4_BIT_EVICT, /* AMD_ERRCODE_R4_EVICT */ 129 AO_MCA_R4_BIT_SNOOP /* AMD_ERRCODE_R4_SNOOP */ 130 }; 131 132 ASSERT(r4 < sizeof (ao_r4_map) / sizeof (uint16_t)); 133 134 return ((ref & ao_r4_map[r4]) != 0); 135 } 136 137 static int 138 ao_disp_match_pp(uint8_t ref, uint8_t pp) 139 { 140 static const uint8_t ao_pp_map[] = { 141 AO_MCA_PP_BIT_SRC, /* AMD_ERRCODE_PP_SRC */ 142 AO_MCA_PP_BIT_RSP, /* AMD_ERRCODE_PP_RSP */ 143 AO_MCA_PP_BIT_OBS, /* AMD_ERRCODE_PP_OBS */ 144 AO_MCA_PP_BIT_GEN /* AMD_ERRCODE_PP_GEN */ 145 }; 146 147 ASSERT(pp < sizeof (ao_pp_map) / sizeof (uint8_t)); 148 149 return ((ref & ao_pp_map[pp]) != 0); 150 } 151 152 static int 153 ao_disp_match_ii(uint8_t ref, uint8_t ii) 154 { 155 static const uint8_t ao_ii_map[] = { 156 AO_MCA_II_BIT_MEM, /* AMD_ERRCODE_II_MEM */ 157 0, 158 AO_MCA_II_BIT_IO, /* AMD_ERRCODE_II_IO */ 159 AO_MCA_II_BIT_GEN /* AMD_ERRCODE_II_GEN */ 160 }; 161 162 ASSERT(ii < sizeof (ao_ii_map) / sizeof (uint8_t)); 163 164 return ((ref & ao_ii_map[ii]) != 0); 165 } 166 167 static uint8_t 168 bit_strip(uint16_t *codep, uint16_t mask, uint16_t shift) 169 { 170 uint8_t val = (*codep & mask) >> shift; 171 *codep &= ~mask; 172 return (val); 173 } 174 175 #define BIT_STRIP(codep, name) \ 176 bit_strip(codep, AMD_ERRCODE_##name##_MASK, AMD_ERRCODE_##name##_SHIFT) 177 178 static int 179 ao_disp_match_one(const ao_error_disp_t *aed, uint64_t status) 180 { 181 uint16_t code = status & AMD_ERRCODE_MASK; 182 uint8_t extcode = (status & AMD_ERREXT_MASK) >> AMD_ERREXT_SHIFT; 183 uint64_t stat_mask = aed->aed_stat_mask; 184 uint64_t stat_mask_res = aed->aed_stat_mask_res; 185 186 /* 187 * If the bank's status register indicates overflow, then we can no 188 * longer rely on the value of CECC: our experience with actual fault 189 * injection has shown that multiple CE's overwriting each other shows 190 * AMD_BANK_STAT_CECC and AMD_BANK_STAT_UECC both set to zero. This 191 * should be clarified in a future BKDG or by the Revision Guide. 192 */ 193 if (status & AMD_BANK_STAT_OVER) { 194 stat_mask &= ~AMD_BANK_STAT_CECC; 195 stat_mask_res &= ~AMD_BANK_STAT_CECC; 196 } 197 198 if ((status & stat_mask) != stat_mask_res) 199 return (0); 200 201 /* 202 * r4 and pp bits are stored separately, so we mask off and compare them 203 * for the code types that use them. Once we've taken the r4 and pp 204 * bits out of the equation, we can directly compare the resulting code 205 * with the one stored in the ao_error_disp_t. 206 */ 207 if (AMD_ERRCODE_ISMEM(code)) { 208 uint8_t r4 = BIT_STRIP(&code, R4); 209 210 if (!ao_disp_match_r4(aed->aed_stat_r4_bits, r4)) 211 return (0); 212 213 } else if (AMD_ERRCODE_ISBUS(code)) { 214 uint8_t r4 = BIT_STRIP(&code, R4); 215 uint8_t pp = BIT_STRIP(&code, PP); 216 uint8_t ii = BIT_STRIP(&code, II); 217 218 if (!ao_disp_match_r4(aed->aed_stat_r4_bits, r4) || 219 !ao_disp_match_pp(aed->aed_stat_pp_bits, pp) || 220 !ao_disp_match_ii(aed->aed_stat_ii_bits, ii)) 221 return (0); 222 } 223 224 return (code == aed->aed_stat_code && extcode == aed->aed_stat_extcode); 225 } 226 227 static const ao_error_disp_t * 228 ao_disp_match(uint_t bankno, uint64_t status) 229 { 230 const ao_error_disp_t *aed; 231 232 for (aed = ao_error_disp[bankno]; aed->aed_stat_mask != 0; aed++) { 233 if (ao_disp_match_one(aed, status)) 234 return (aed); 235 } 236 237 return (&ao_disp_unknown); 238 } 239 240 void 241 ao_pcicfg_write(uint_t chipid, uint_t func, uint_t reg, uint32_t val) 242 { 243 ASSERT(chipid + 24 <= 31); 244 ASSERT((func & 7) == func); 245 ASSERT((reg & 3) == 0 && reg < 256); 246 247 pci_mech1_putl(0, chipid + 24, func, reg, val); 248 } 249 250 uint32_t 251 ao_pcicfg_read(uint_t chipid, uint_t func, uint_t reg) 252 { 253 ASSERT(chipid + 24 <= 31); 254 ASSERT((func & 7) == func); 255 ASSERT((reg & 3) == 0 && reg < 256); 256 257 return (pci_mech1_getl(0, chipid + 24, func, reg)); 258 } 259 260 /* 261 * Setup individual bank detectors after stashing their bios settings. 262 */ 263 static void 264 ao_bank_cfg(ao_mca_t *mca) 265 { 266 ao_bios_cfg_t *bioscfg = &mca->ao_mca_bios_cfg; 267 const ao_bank_cfg_t *bankcfg = ao_bank_cfgs; 268 int i; 269 270 for (i = 0; i < AMD_MCA_BANK_COUNT; i++, bankcfg++) { 271 bioscfg->bcfg_bank_ctl[i] = rdmsr(bankcfg->bank_ctl); 272 bioscfg->bcfg_bank_mask[i] = rdmsr(bankcfg->bank_ctl_mask); 273 wrmsr(bankcfg->bank_ctl, bankcfg->bank_ctl_init); 274 } 275 } 276 277 /* 278 * Bits to be added to the NorthBridge (NB) configuration register. 279 * See BKDG 3.29 Section 3.6.4.2 for more information. 280 */ 281 uint32_t ao_nb_cfg_add = 282 AMD_NB_CFG_NBMCATOMSTCPUEN | 283 AMD_NB_CFG_DISPCICFGCPUERRRSP | 284 AMD_NB_CFG_SYNCONUCECCEN | 285 AMD_NB_CFG_CPUECCERREN; 286 287 /* 288 * Bits to be cleared from the NorthBridge (NB) configuration register. 289 * See BKDG 3.29 Section 3.6.4.2 for more information. 290 */ 291 uint32_t ao_nb_cfg_remove = 292 AMD_NB_CFG_IORDDATERREN | 293 AMD_NB_CFG_SYNCONANYERREN | 294 AMD_NB_CFG_SYNCONWDOGEN | 295 AMD_NB_CFG_IOERRDIS | 296 AMD_NB_CFG_IOMSTABORTDIS | 297 AMD_NB_CFG_SYNCPKTPROPDIS | 298 AMD_NB_CFG_SYNCPKTGENDIS; 299 300 /* 301 * Bits to be used if we configure the NorthBridge (NB) Watchdog. The watchdog 302 * triggers a machine check exception when no response to an NB system access 303 * occurs within a specified time interval. If the BIOS (i.e. platform design) 304 * has enabled the watchdog, we leave its rate alone. If the BIOS has not 305 * enabled the watchdog, we enable it and set the rate to one specified below. 306 * To disable the watchdog, add the AMD_NB_CFG_WDOGTMRDIS bit to ao_nb_cfg_add. 307 */ 308 uint32_t ao_nb_cfg_wdog = 309 AMD_NB_CFG_WDOGTMRCNTSEL_4095 | 310 AMD_NB_CFG_WDOGTMRBASESEL_1MS; 311 312 static void 313 ao_nb_cfg(ao_mca_t *mca) 314 { 315 uint_t chipid = chip_plat_get_chipid(CPU); 316 uint32_t val; 317 318 if (chip_plat_get_clogid(CPU) != 0) 319 return; /* only configure NB once per CPU */ 320 321 /* 322 * Read the NorthBridge (NB) configuration register in PCI space, 323 * modify the settings accordingly, and store the new value back. 324 */ 325 mca->ao_mca_bios_cfg.bcfg_nb_cfg = val = 326 ao_pcicfg_read(chipid, AMD_NB_FUNC, AMD_NB_REG_CFG); 327 328 /* 329 * If the watchdog was disabled, enable it according to the policy 330 * described above. Then apply the ao_nb_cfg_[add|remove] masks. 331 */ 332 if (val & AMD_NB_CFG_WDOGTMRDIS) { 333 val &= ~AMD_NB_CFG_WDOGTMRBASESEL_MASK; 334 val &= ~AMD_NB_CFG_WDOGTMRCNTSEL_MASK; 335 val &= ~AMD_NB_CFG_WDOGTMRDIS; 336 val |= ao_nb_cfg_wdog; 337 } 338 339 val &= ~ao_nb_cfg_remove; 340 val |= ao_nb_cfg_add; 341 342 ao_pcicfg_write(chipid, AMD_NB_FUNC, AMD_NB_REG_CFG, val); 343 } 344 345 /* 346 * Capture the machine-check exception state into our per-CPU logout area, and 347 * dispatch a copy of the logout area to our error queue for ereport creation. 348 * If 'rp' is non-NULL, we're being called from trap context; otherwise we're 349 * being polled or poked by the injector. We return the number of errors 350 * found through 'np', and a boolean indicating whether the error is fatal. 351 * The caller is expected to call fm_panic() if we return fatal (non-zero). 352 */ 353 int 354 ao_mca_logout(ao_cpu_logout_t *acl, struct regs *rp, int *np) 355 { 356 int i, fatal = 0, n = 0; 357 358 acl->acl_timestamp = gethrtime_waitfree(); 359 acl->acl_mcg_status = rdmsr(IA32_MSR_MCG_STATUS); 360 acl->acl_ip = rp ? rp->r_pc : 0; 361 acl->acl_flags = 0; 362 363 /* 364 * Iterate over the banks of machine-check registers, read the address 365 * and status registers into the logout area, and clear them as we go. 366 */ 367 for (i = 0; i < AMD_MCA_BANK_COUNT; i++) { 368 ao_bank_logout_t *abl = &acl->acl_banks[i]; 369 370 abl->abl_addr = rdmsr(ao_bank_regs[i].abr_addr); 371 abl->abl_status = rdmsr(ao_bank_regs[i].abr_status); 372 373 if (abl->abl_status & AMD_BANK_STAT_VALID) 374 wrmsr(ao_bank_regs[i].abr_status, 0); 375 } 376 377 if (rp == NULL || !USERMODE(rp->r_cs)) 378 acl->acl_flags |= AO_ACL_F_PRIV; 379 380 if (ao_mca_stack_flag) 381 acl->acl_stackdepth = getpcstack(acl->acl_stack, FM_STK_DEPTH); 382 else 383 acl->acl_stackdepth = 0; 384 385 /* 386 * Clear MCG_STATUS, indicating that machine-check trap processing is 387 * complete. Once we do this, another machine-check trap can occur. 388 */ 389 wrmsr(IA32_MSR_MCG_STATUS, 0); 390 391 /* 392 * If we took a machine-check trap, then the error is fatal if the 393 * return instruction pointer is not valid in the global register. 394 */ 395 if (rp != NULL && !(acl->acl_mcg_status & MCG_STATUS_RIPV)) 396 fatal++; 397 398 /* 399 * Now iterate over the saved logout area, determining whether the 400 * error that we saw is fatal or not based upon our dispositions 401 * and the hardware's indicators of whether or not we can resume. 402 */ 403 for (i = 0; i < AMD_MCA_BANK_COUNT; i++) { 404 ao_bank_logout_t *abl = &acl->acl_banks[i]; 405 const ao_error_disp_t *aed; 406 uint8_t when; 407 408 if (!(abl->abl_status & AMD_BANK_STAT_VALID)) 409 continue; 410 411 aed = ao_disp_match(i, abl->abl_status); 412 if ((when = aed->aed_panic_when) != AO_AED_PANIC_NEVER) { 413 if ((when & AO_AED_PANIC_ALWAYS) || 414 ((when & AO_AED_PANIC_IFMCE) && rp != NULL)) { 415 fatal++; 416 } 417 } 418 419 /* 420 * If we are taking a machine-check exception and the overflow 421 * bit is set or our context is corrupt, then we must die. 422 * NOTE: This code assumes that if the overflow bit is set and 423 * we didn't take a #mc exception (i.e. the poller found it), 424 * then multiple correctable errors overwrote each other. 425 * This will need to change if we eventually use the Opteron 426 * Rev E exception mechanism for detecting correctable errors. 427 */ 428 if (rp != NULL && (abl->abl_status & 429 (AMD_BANK_STAT_OVER | AMD_BANK_STAT_PCC))) 430 fatal++; 431 432 /* 433 * If we are taking a machine-check exception and we don't 434 * recognize the error case at all, then assume it's fatal. 435 * This will need to change if we eventually use the Opteron 436 * Rev E exception mechanism for detecting correctable errors. 437 */ 438 if (rp != NULL && aed == &ao_disp_unknown) 439 fatal++; 440 441 n++; 442 } 443 444 if (n > 0) { 445 errorq_dispatch(ao_mca_queue, acl, sizeof (ao_cpu_logout_t), 446 fatal && cmi_panic_on_uncorrectable_error ? 447 ERRORQ_SYNC : ERRORQ_ASYNC); 448 } 449 450 if (np != NULL) 451 *np = n; /* return number of errors found to caller */ 452 453 return (fatal != 0); 454 } 455 456 static uint_t 457 ao_ereport_synd(ao_mca_t *mca, 458 const ao_bank_logout_t *abl, uint_t *typep, int is_nb) 459 { 460 if (is_nb) { 461 if ((mca->ao_mca_bios_cfg.bcfg_nb_cfg & 462 AMD_NB_CFG_CHIPKILLECCEN) != 0) { 463 *typep = AMD_SYNDTYPE_CHIPKILL; 464 return (AMD_NB_STAT_CKSYND(abl->abl_status)); 465 } else { 466 *typep = AMD_SYNDTYPE_ECC; 467 return (AMD_BANK_SYND(abl->abl_status)); 468 } 469 } else { 470 *typep = AMD_SYNDTYPE_ECC; 471 return (AMD_BANK_SYND(abl->abl_status)); 472 } 473 } 474 475 static void 476 ao_ereport_create_resource_elem(nvlist_t **nvlp, nv_alloc_t *nva, 477 mc_unum_t *unump, int dimmnum) 478 { 479 nvlist_t *snvl; 480 *nvlp = fm_nvlist_create(nva); /* freed by caller */ 481 482 snvl = fm_nvlist_create(nva); 483 484 (void) nvlist_add_uint64(snvl, FM_FMRI_HC_SPECIFIC_OFFSET, 485 unump->unum_offset); 486 487 fm_fmri_hc_set(*nvlp, FM_HC_SCHEME_VERSION, NULL, snvl, 4, 488 "motherboard", unump->unum_board, 489 "chip", unump->unum_chip, 490 "memory-controller", unump->unum_mc, 491 "dimm", unump->unum_dimms[dimmnum]); 492 493 fm_nvlist_destroy(snvl, FM_NVA_FREE); 494 } 495 496 static void 497 ao_ereport_add_resource(nvlist_t *payload, nv_alloc_t *nva, mc_unum_t *unump) 498 { 499 500 nvlist_t *elems[MC_UNUM_NDIMM]; 501 int nelems = 0; 502 int i; 503 504 for (i = 0; i < MC_UNUM_NDIMM; i++) { 505 if (unump->unum_dimms[i] == -1) 506 break; 507 ao_ereport_create_resource_elem(&elems[nelems++], nva, 508 unump, i); 509 } 510 511 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_RESOURCE, 512 DATA_TYPE_NVLIST_ARRAY, nelems, elems, NULL); 513 514 for (i = 0; i < nelems; i++) 515 fm_nvlist_destroy(elems[i], FM_NVA_FREE); 516 } 517 518 static void 519 ao_ereport_add_logout(ao_data_t *ao, nvlist_t *payload, nv_alloc_t *nva, 520 const ao_cpu_logout_t *acl, uint_t bankno, const ao_error_disp_t *aed) 521 { 522 uint64_t members = aed->aed_ereport_members; 523 ao_mca_t *mca = &ao->ao_mca; 524 const ao_bank_logout_t *abl = &acl->acl_banks[bankno]; 525 uint_t synd, syndtype; 526 527 synd = ao_ereport_synd(mca, abl, &syndtype, bankno == AMD_MCA_BANK_NB); 528 529 if (members & FM_EREPORT_PAYLOAD_FLAG_BANK_STAT) { 530 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_BANK_STAT, 531 DATA_TYPE_UINT64, abl->abl_status, NULL); 532 } 533 534 if (members & FM_EREPORT_PAYLOAD_FLAG_BANK_NUM) { 535 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_BANK_NUM, 536 DATA_TYPE_UINT8, bankno, NULL); 537 } 538 539 if (members & FM_EREPORT_PAYLOAD_FLAG_ADDR) { 540 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ADDR, 541 DATA_TYPE_UINT64, abl->abl_addr, NULL); 542 } 543 544 if (members & FM_EREPORT_PAYLOAD_FLAG_ADDR_VALID) { 545 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ADDR_VALID, 546 DATA_TYPE_BOOLEAN_VALUE, (abl->abl_status & 547 AMD_BANK_STAT_ADDRV) ? B_TRUE : B_FALSE, NULL); 548 } 549 550 if (members & FM_EREPORT_PAYLOAD_FLAG_SYND) { 551 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND, 552 DATA_TYPE_UINT16, synd, NULL); 553 } 554 555 if (members & FM_EREPORT_PAYLOAD_FLAG_SYND_TYPE) { 556 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND_TYPE, 557 DATA_TYPE_STRING, (syndtype == AMD_SYNDTYPE_CHIPKILL ? 558 "C" : "E"), NULL); 559 } 560 561 if (members & FM_EREPORT_PAYLOAD_FLAG_IP) { 562 uint64_t ip = (acl->acl_mcg_status & MCG_STATUS_EIPV) ? 563 acl->acl_ip : 0; 564 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_IP, 565 DATA_TYPE_UINT64, ip, NULL); 566 } 567 568 if (members & FM_EREPORT_PAYLOAD_FLAG_PRIV) { 569 fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV, 570 DATA_TYPE_BOOLEAN_VALUE, (acl->acl_flags & AO_ACL_F_PRIV) ? 571 B_TRUE : B_FALSE, NULL); 572 } 573 574 if (members & FM_EREPORT_PAYLOAD_FLAG_RESOURCE) { 575 mc_unum_t unum; 576 int addrvalid; 577 578 addrvalid = (members & FM_EREPORT_PAYLOAD_FLAG_ADDR) && 579 (members & FM_EREPORT_PAYLOAD_FLAG_ADDR_VALID) && 580 (abl->abl_status & AMD_BANK_STAT_ADDRV); 581 582 if (addrvalid && ao_mc_patounum(ao, abl->abl_addr, synd, 583 syndtype, &unum)) 584 ao_ereport_add_resource(payload, nva, &unum); 585 } 586 587 if (ao_mca_stack_flag && members & FM_EREPORT_PAYLOAD_FLAG_STACK) { 588 fm_payload_stack_add(payload, acl->acl_stack, 589 acl->acl_stackdepth); 590 } 591 } 592 593 static void 594 ao_ereport_post(const ao_cpu_logout_t *acl, 595 int bankno, const ao_error_disp_t *aed) 596 { 597 ao_data_t *ao = acl->acl_ao; 598 errorq_elem_t *eqep; 599 nvlist_t *ereport, *detector; 600 nv_alloc_t *nva = NULL; 601 char buf[FM_MAX_CLASS]; 602 603 if (panicstr) { 604 if ((eqep = errorq_reserve(ereport_errorq)) == NULL) 605 return; 606 ereport = errorq_elem_nvl(ereport_errorq, eqep); 607 nva = errorq_elem_nva(ereport_errorq, eqep); 608 } else { 609 ereport = fm_nvlist_create(nva); 610 } 611 612 /* 613 * Create the scheme "cpu" FMRI 614 */ 615 detector = ao_fmri_create(ao, nva); 616 617 /* 618 * Encode all the common data into the ereport. 619 */ 620 (void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s", 621 FM_ERROR_CPU, "amd", aed->aed_class); 622 623 fm_ereport_set(ereport, FM_EREPORT_VERSION, buf, 624 fm_ena_generate_cpu(acl->acl_timestamp, ao->ao_cpu->cpu_id, 625 FM_ENA_FMT1), detector, NULL); 626 627 /* 628 * Encode the error-specific data that was saved in the logout area. 629 */ 630 ao_ereport_add_logout(ao, ereport, nva, acl, bankno, aed); 631 632 if (panicstr) { 633 errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC); 634 } else { 635 (void) fm_ereport_post(ereport, EVCH_TRYHARD); 636 fm_nvlist_destroy(ereport, FM_NVA_FREE); 637 fm_nvlist_destroy(detector, FM_NVA_FREE); 638 } 639 } 640 641 /*ARGSUSED*/ 642 void 643 ao_mca_drain(void *ignored, const void *data, const errorq_elem_t *eqe) 644 { 645 const ao_cpu_logout_t *acl = data; 646 int i; 647 648 for (i = 0; i < AMD_MCA_BANK_COUNT; i++) { 649 const ao_bank_logout_t *abl = &acl->acl_banks[i]; 650 const ao_error_disp_t *aed; 651 652 if (abl->abl_status & AMD_BANK_STAT_VALID) { 653 aed = ao_disp_match(i, abl->abl_status); 654 ao_ereport_post(acl, i, aed); 655 } 656 } 657 } 658 659 int 660 ao_mca_trap(void *data, struct regs *rp) 661 { 662 ao_data_t *ao = data; 663 ao_mca_t *mca = &ao->ao_mca; 664 ao_cpu_logout_t *acl = &mca->ao_mca_logout[AO_MCA_LOGOUT_EXCEPTION]; 665 return (ao_mca_logout(acl, rp, NULL)); 666 } 667 668 /*ARGSUSED*/ 669 int 670 ao_mca_inject(void *data, cmi_mca_regs_t *regs, uint_t nregs) 671 { 672 uint64_t hwcr, oldhwcr; 673 int i; 674 675 oldhwcr = rdmsr(MSR_AMD_HWCR); 676 hwcr = oldhwcr | AMD_HWCR_MCI_STATUS_WREN; 677 wrmsr(MSR_AMD_HWCR, hwcr); 678 679 for (i = 0; i < nregs; i++) 680 wrmsr(regs[i].cmr_msrnum, regs[i].cmr_msrval); 681 682 wrmsr(MSR_AMD_HWCR, oldhwcr); 683 return (0); 684 } 685 686 void 687 ao_mca_init(void *data) 688 { 689 ao_data_t *ao = data; 690 ao_mca_t *mca = &ao->ao_mca; 691 uint64_t cap; 692 int i; 693 694 ao_mca_poll_init(mca); 695 696 ASSERT(x86_feature & X86_MCA); 697 cap = rdmsr(IA32_MSR_MCG_CAP); 698 ASSERT(cap & MCG_CAP_CTL_P); 699 700 /* 701 * If the hardware's bank count is different than what we expect, then 702 * we're running on some Opteron variant that we don't understand yet. 703 */ 704 if ((cap & MCG_CAP_COUNT_MASK) != AMD_MCA_BANK_COUNT) { 705 cmn_err(CE_WARN, "CPU %d has %llu MCA banks; expected %u: " 706 "disabling MCA on this CPU", ao->ao_cpu->cpu_id, 707 (u_longlong_t)cap & MCG_CAP_COUNT_MASK, AMD_MCA_BANK_COUNT); 708 return; 709 } 710 711 /* 712 * Configure the logout areas. We preset every logout area's acl_ao 713 * pointer to refer back to our per-CPU state for errorq drain usage. 714 */ 715 for (i = 0; i < AO_MCA_LOGOUT_NUM; i++) 716 mca->ao_mca_logout[i].acl_ao = ao; 717 718 ao_bank_cfg(mca); 719 ao_nb_cfg(mca); 720 721 wrmsr(IA32_MSR_MCG_CTL, AMD_MCG_EN_ALL); 722 723 /* 724 * Throw away all existing bank state. We do this because some BIOSes, 725 * perhaps during POST, do things to the machine that cause MCA state 726 * to be updated. If we interpret this state as an actual error, we 727 * may end up indicting something that's not actually broken. 728 */ 729 for (i = 0; i < sizeof (ao_bank_cfgs) / sizeof (ao_bank_cfg_t); i++) 730 wrmsr(ao_bank_cfgs[i].bank_status, 0ULL); 731 732 wrmsr(IA32_MSR_MCG_STATUS, 0ULL); 733 membar_producer(); 734 735 setcr4(getcr4() | CR4_MCE); /* enable #mc exceptions */ 736 } 737 738 /*ARGSUSED*/ 739 void 740 ao_mca_post_init(void *data) 741 { 742 const struct ao_smi_disable *asd; 743 id_t id; 744 745 smbios_system_t sy; 746 smbios_bios_t sb; 747 smbios_info_t si; 748 749 /* 750 * Fetch the System and BIOS vendor strings from SMBIOS and see if they 751 * match a value in our table. If so, disable SMI error polling. This 752 * is grotesque and should be replaced by self-describing vendor- 753 * specific SMBIOS data or a specification enhancement instead. 754 */ 755 if (ao_mca_smi_disable && ksmbios != NULL && 756 smbios_info_bios(ksmbios, &sb) != SMB_ERR && 757 (id = smbios_info_system(ksmbios, &sy)) != SMB_ERR && 758 smbios_info_common(ksmbios, id, &si) != SMB_ERR) { 759 760 for (asd = ao_smi_disable; asd->asd_sys_vendor != NULL; asd++) { 761 if (strncmp(asd->asd_sys_vendor, si.smbi_manufacturer, 762 strlen(asd->asd_sys_vendor)) != 0 || 763 strncmp(asd->asd_bios_vendor, sb.smbb_vendor, 764 strlen(asd->asd_bios_vendor)) != 0) 765 continue; 766 767 cmn_err(CE_CONT, "?SMI polling disabled in favor of " 768 "Solaris Fault Management for AMD Processors"); 769 770 outl(asd->asd_port, asd->asd_code); 771 break; 772 } 773 } 774 775 ao_mca_poll_start(); 776 } 777 778 /* 779 * Called after a CPU has been marked with CPU_FAULTED. Not called on the 780 * faulted CPU. cpu_lock is held. 781 */ 782 /*ARGSUSED*/ 783 void 784 ao_faulted_enter(void *data) 785 { 786 /* 787 * Nothing to do here. We'd like to turn off the faulted CPU's 788 * correctable error detectors, but that can only be done by the 789 * faulted CPU itself. cpu_get_state() will now return P_FAULTED, 790 * allowing the poller to skip this CPU until it is re-enabled. 791 */ 792 } 793 794 /* 795 * Called after the CPU_FAULTED bit has been cleared from a previously-faulted 796 * CPU. Not called on the faulted CPU. cpu_lock is held. 797 */ 798 void 799 ao_faulted_exit(void *data) 800 { 801 ao_data_t *ao = data; 802 803 /* 804 * We'd like to clear the faulted CPU's MCi_STATUS registers so as to 805 * avoid generating ereports for errors which occurred while the CPU was 806 * officially faulted. Unfortunately, those registers can only be 807 * cleared by the CPU itself, so we can't do it here. 808 * 809 * We're going to set the UNFAULTING bit on the formerly-faulted CPU's 810 * MCA state. This will tell the poller that the MCi_STATUS registers 811 * can't yet be trusted. The poller, which is the first thing we 812 * control that'll execute on that CPU, will clear the registers, and 813 * will then clear the bit. 814 */ 815 816 ao->ao_mca.ao_mca_flags |= AO_MCA_F_UNFAULTING; 817 } 818