1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Intel model-specific support. Right now all this conists of is 29 * to modify the ereport subclass to produce different ereport classes 30 * so that we can have different diagnosis rules and corresponding faults. 31 */ 32 33 #include <sys/types.h> 34 #include <sys/cmn_err.h> 35 #include <sys/modctl.h> 36 #include <sys/mca_x86.h> 37 #include <sys/cpu_module_ms_impl.h> 38 #include <sys/mc_intel.h> 39 #include <sys/pci_cfgspace.h> 40 #include <sys/fm/protocol.h> 41 42 int gintel_ms_support_disable = 0; 43 int gintel_error_action_return = 0; 44 int gintel_ms_unconstrained = 0; 45 46 int quickpath; 47 int max_bus_number = 0xff; 48 49 #define ERR_COUNTER_INDEX 2 50 #define MAX_CPU_NODES 2 51 #define N_MC_COR_ECC_CNT 6 52 uint32_t err_counter_array[MAX_CPU_NODES][ERR_COUNTER_INDEX][N_MC_COR_ECC_CNT]; 53 uint8_t err_counter_index[MAX_CPU_NODES]; 54 55 #define MAX_BUS_NUMBER max_bus_number 56 #define SOCKET_BUS(cpu) (MAX_BUS_NUMBER - (cpu)) 57 58 #define MC_COR_ECC_CNT(chipid, reg) (*pci_getl_func)(SOCKET_BUS(chipid), \ 59 NEHALEM_EP_MEMORY_CONTROLLER_DEV, NEHALEM_EP_MEMORY_CONTROLLER_FUNC, \ 60 0x80 + (reg) * 4) 61 62 #define MSCOD_MEM_ECC_READ 0x1 63 #define MSCOD_MEM_ECC_SCRUB 0x2 64 #define MSCOD_MEM_WR_PARITY 0x4 65 #define MSCOD_MEM_REDUNDANT_MEM 0x8 66 #define MSCOD_MEM_SPARE_MEM 0x10 67 #define MSCOD_MEM_ILLEGAL_ADDR 0x20 68 #define MSCOD_MEM_BAD_ID 0x40 69 #define MSCOD_MEM_ADDR_PARITY 0x80 70 #define MSCOD_MEM_BYTE_PARITY 0x100 71 72 #define GINTEL_ERROR_MEM 0x1000 73 #define GINTEL_ERROR_QUICKPATH 0x2000 74 75 #define GINTEL_ERR_SPARE_MEM (GINTEL_ERROR_MEM | 1) 76 #define GINTEL_ERR_MEM_UE (GINTEL_ERROR_MEM | 2) 77 #define GINTEL_ERR_MEM_CE (GINTEL_ERROR_MEM | 3) 78 #define GINTEL_ERR_MEM_PARITY (GINTEL_ERROR_MEM | 4) 79 #define GINTEL_ERR_MEM_ADDR_PARITY (GINTEL_ERROR_MEM | 5) 80 #define GINTEL_ERR_MEM_REDUNDANT (GINTEL_ERROR_MEM | 6) 81 #define GINTEL_ERR_MEM_BAD_ADDR (GINTEL_ERROR_MEM | 7) 82 #define GINTEL_ERR_MEM_BAD_ID (GINTEL_ERROR_MEM | 8) 83 #define GINTEL_ERR_MEM_UNKNOWN (GINTEL_ERROR_MEM | 0xfff) 84 85 #define MSR_MC_MISC_MEM_CHANNEL_MASK 0x00000000000c0000ULL 86 #define MSR_MC_MISC_MEM_CHANNEL_SHIFT 18 87 #define MSR_MC_MISC_MEM_DIMM_MASK 0x0000000000030000ULL 88 #define MSR_MC_MISC_MEM_DIMM_SHIFT 16 89 #define MSR_MC_MISC_MEM_SYNDROME_MASK 0xffffffff00000000ULL 90 #define MSR_MC_MISC_MEM_SYNDROME_SHIFT 32 91 92 #define CPU_GENERATION_DONT_CARE 0 93 #define CPU_GENERATION_NEHALEM_EP 1 94 95 #define INTEL_NEHALEM_CPU_FAMILY_ID 0x6 96 #define INTEL_NEHALEM_CPU_MODEL_ID 0x1A 97 98 #define NEHALEM_EP_MEMORY_CONTROLLER_DEV 0x3 99 #define NEHALEM_EP_MEMORY_CONTROLLER_FUNC 0x2 100 101 /*ARGSUSED*/ 102 int 103 gintel_init(cmi_hdl_t hdl, void **datap) 104 { 105 uint32_t nb_chipset; 106 107 if (gintel_ms_support_disable) 108 return (ENOTSUP); 109 110 if (!(x86_feature & X86_MCA)) 111 return (ENOTSUP); 112 113 nb_chipset = (*pci_getl_func)(0, 0, 0, 0x0); 114 switch (nb_chipset) { 115 case INTEL_NB_7300: 116 case INTEL_NB_5000P: 117 case INTEL_NB_5000X: 118 case INTEL_NB_5000V: 119 case INTEL_NB_5000Z: 120 case INTEL_NB_5400: 121 case INTEL_NB_5400A: 122 case INTEL_NB_5400B: 123 if (!gintel_ms_unconstrained) 124 gintel_error_action_return |= CMS_ERRSCOPE_POISONED; 125 break; 126 case INTEL_QP_IO: 127 case INTEL_QP_WP: 128 case INTEL_QP_36D: 129 case INTEL_QP_24D: 130 case INTEL_QP_U1: 131 case INTEL_QP_U2: 132 case INTEL_QP_U3: 133 case INTEL_QP_U4: 134 case INTEL_QP_JF: 135 case INTEL_QP_JF0: 136 case INTEL_QP_JF1: 137 case INTEL_QP_JF2: 138 case INTEL_QP_JF3: 139 case INTEL_QP_JF4: 140 case INTEL_QP_JF5: 141 case INTEL_QP_JF6: 142 case INTEL_QP_JF7: 143 case INTEL_QP_JF8: 144 case INTEL_QP_JF9: 145 case INTEL_QP_JFa: 146 case INTEL_QP_JFb: 147 case INTEL_QP_JFc: 148 case INTEL_QP_JFd: 149 case INTEL_QP_JFe: 150 case INTEL_QP_JFf: 151 quickpath = 1; 152 break; 153 default: 154 break; 155 } 156 return (0); 157 } 158 159 /*ARGSUSED*/ 160 uint32_t 161 gintel_error_action(cmi_hdl_t hdl, int ismc, int bank, 162 uint64_t status, uint64_t addr, uint64_t misc, void *mslogout) 163 { 164 if ((status & MSR_MC_STATUS_PCC) == 0) 165 return (gintel_error_action_return); 166 else 167 return (gintel_error_action_return & ~CMS_ERRSCOPE_POISONED); 168 } 169 170 /*ARGSUSED*/ 171 cms_cookie_t 172 gintel_disp_match(cmi_hdl_t hdl, int bank, uint64_t status, 173 uint64_t addr, uint64_t misc, void *mslogout) 174 { 175 cms_cookie_t rt = (cms_cookie_t)NULL; 176 uint16_t mcacode = MCAX86_ERRCODE(status); 177 uint16_t mscode = MCAX86_MSERRCODE(status); 178 179 if (MCAX86_ERRCODE_ISMEMORY_CONTROLLER(mcacode)) { 180 /* 181 * memory controller errors 182 */ 183 if (mscode & MSCOD_MEM_SPARE_MEM) { 184 rt = (cms_cookie_t)GINTEL_ERR_SPARE_MEM; 185 } else if (mscode & (MSCOD_MEM_ECC_READ | 186 MSCOD_MEM_ECC_SCRUB)) { 187 if (status & MSR_MC_STATUS_UC) 188 rt = (cms_cookie_t)GINTEL_ERR_MEM_UE; 189 else 190 rt = (cms_cookie_t)GINTEL_ERR_MEM_CE; 191 } else if (mscode & (MSCOD_MEM_WR_PARITY | 192 MSCOD_MEM_BYTE_PARITY)) { 193 rt = (cms_cookie_t)GINTEL_ERR_MEM_PARITY; 194 } else if (mscode & MSCOD_MEM_ADDR_PARITY) { 195 rt = (cms_cookie_t)GINTEL_ERR_MEM_ADDR_PARITY; 196 } else if (mscode & MSCOD_MEM_REDUNDANT_MEM) { 197 rt = (cms_cookie_t)GINTEL_ERR_MEM_REDUNDANT; 198 } else if (mscode & MSCOD_MEM_ILLEGAL_ADDR) { 199 rt = (cms_cookie_t)GINTEL_ERR_MEM_BAD_ADDR; 200 } else if (mscode & MSCOD_MEM_BAD_ID) { 201 rt = (cms_cookie_t)GINTEL_ERR_MEM_BAD_ID; 202 } else { 203 rt = (cms_cookie_t)GINTEL_ERR_MEM_UNKNOWN; 204 } 205 } else if (quickpath && 206 MCAX86_ERRCODE_ISBUS_INTERCONNECT(MCAX86_ERRCODE(status))) { 207 rt = (cms_cookie_t)GINTEL_ERROR_QUICKPATH; 208 } 209 return (rt); 210 } 211 212 /*ARGSUSED*/ 213 void 214 gintel_ereport_class(cmi_hdl_t hdl, cms_cookie_t mscookie, 215 const char **cpuclsp, const char **leafclsp) 216 { 217 *cpuclsp = FM_EREPORT_CPU_INTEL; 218 switch ((uintptr_t)mscookie) { 219 case GINTEL_ERROR_QUICKPATH: 220 *leafclsp = "quickpath.interconnect"; 221 break; 222 case GINTEL_ERR_SPARE_MEM: 223 *leafclsp = "quickpath.mem_spare"; 224 break; 225 case GINTEL_ERR_MEM_UE: 226 *leafclsp = "quickpath.mem_ue"; 227 break; 228 case GINTEL_ERR_MEM_CE: 229 *leafclsp = "quickpath.mem_ce"; 230 break; 231 case GINTEL_ERR_MEM_PARITY: 232 *leafclsp = "quickpath.mem_parity"; 233 break; 234 case GINTEL_ERR_MEM_ADDR_PARITY: 235 *leafclsp = "quickpath.mem_addr_parity"; 236 break; 237 case GINTEL_ERR_MEM_REDUNDANT: 238 *leafclsp = "quickpath.mem_redundant"; 239 break; 240 case GINTEL_ERR_MEM_BAD_ADDR: 241 *leafclsp = "quickpath.mem_bad_addr"; 242 break; 243 case GINTEL_ERR_MEM_BAD_ID: 244 *leafclsp = "quickpath.mem_bad_id"; 245 break; 246 case GINTEL_ERR_MEM_UNKNOWN: 247 *leafclsp = "quickpath.mem_unknown"; 248 break; 249 } 250 } 251 252 /*ARGSUSED*/ 253 nvlist_t * 254 gintel_ereport_detector(cmi_hdl_t hdl, int bankno, cms_cookie_t mscookie, 255 nv_alloc_t *nva) 256 { 257 nvlist_t *nvl = (nvlist_t *)NULL; 258 259 if (mscookie) { 260 if ((nvl = fm_nvlist_create(nva)) == NULL) 261 return (NULL); 262 if ((uintptr_t)mscookie & GINTEL_ERROR_QUICKPATH) { 263 fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, NULL, 2, 264 "motherboard", 0, 265 "chip", cmi_hdl_chipid(hdl)); 266 } else { 267 fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, NULL, 3, 268 "motherboard", 0, 269 "chip", cmi_hdl_chipid(hdl), 270 "memory-controller", 0); 271 } 272 } 273 return (nvl); 274 } 275 276 static nvlist_t * 277 gintel_ereport_create_resource_elem(nv_alloc_t *nva, mc_unum_t *unump) 278 { 279 nvlist_t *nvl, *snvl; 280 281 if ((nvl = fm_nvlist_create(nva)) == NULL) /* freed by caller */ 282 return (NULL); 283 284 if ((snvl = fm_nvlist_create(nva)) == NULL) { 285 fm_nvlist_destroy(nvl, nva ? FM_NVA_RETAIN : FM_NVA_FREE); 286 return (NULL); 287 } 288 289 (void) nvlist_add_uint64(snvl, FM_FMRI_HC_SPECIFIC_OFFSET, 290 unump->unum_offset); 291 292 if (unump->unum_chan == -1) { 293 fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, snvl, 3, 294 "motherboard", unump->unum_board, 295 "chip", unump->unum_chip, 296 "memory-controller", unump->unum_mc); 297 } else if (unump->unum_cs == -1) { 298 fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, snvl, 4, 299 "motherboard", unump->unum_board, 300 "chip", unump->unum_chip, 301 "memory-controller", unump->unum_mc, 302 "dram-channel", unump->unum_chan); 303 } else if (unump->unum_rank == -1) { 304 fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, snvl, 5, 305 "motherboard", unump->unum_board, 306 "chip", unump->unum_chip, 307 "memory-controller", unump->unum_mc, 308 "dram-channel", unump->unum_chan, 309 "dimm", unump->unum_cs); 310 } else { 311 fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, snvl, 6, 312 "motherboard", unump->unum_board, 313 "chip", unump->unum_chip, 314 "memory-controller", unump->unum_mc, 315 "dram-channel", unump->unum_chan, 316 "dimm", unump->unum_cs, 317 "rank", unump->unum_rank); 318 } 319 320 fm_nvlist_destroy(snvl, nva ? FM_NVA_RETAIN : FM_NVA_FREE); 321 322 return (nvl); 323 } 324 325 static void 326 nehalem_ep_ereport_add_memory_error_counter(uint_t chipid, 327 uint32_t *this_err_counter_array) 328 { 329 int index; 330 331 for (index = 0; index < N_MC_COR_ECC_CNT; index ++) 332 this_err_counter_array[index] = MC_COR_ECC_CNT(chipid, index); 333 } 334 335 static int 336 gintel_cpu_generation(cmi_hdl_t hdl) 337 { 338 int cpu_generation = CPU_GENERATION_DONT_CARE; 339 340 if ((cmi_hdl_family(hdl) == INTEL_NEHALEM_CPU_FAMILY_ID) && 341 (cmi_hdl_model(hdl) == INTEL_NEHALEM_CPU_MODEL_ID)) 342 cpu_generation = CPU_GENERATION_NEHALEM_EP; 343 344 return (cpu_generation); 345 } 346 347 /*ARGSUSED*/ 348 void 349 gintel_ereport_add_logout(cmi_hdl_t hdl, nvlist_t *ereport, 350 nv_alloc_t *nva, int banknum, uint64_t status, uint64_t addr, 351 uint64_t misc, void *mslogout, cms_cookie_t mscookie) 352 { 353 mc_unum_t unum; 354 nvlist_t *resource; 355 uint32_t synd = 0; 356 int chan = MCAX86_ERRCODE_CCCC(status); 357 uint8_t last_index, this_index; 358 int chipid; 359 360 if (chan == 0xf) 361 chan = -1; 362 363 if ((uintptr_t)mscookie & GINTEL_ERROR_MEM) { 364 unum.unum_board = 0; 365 unum.unum_chip = cmi_hdl_chipid(hdl); 366 unum.unum_mc = 0; 367 unum.unum_chan = chan; 368 unum.unum_cs = -1; 369 unum.unum_rank = -1; 370 unum.unum_offset = -1ULL; 371 if (status & MSR_MC_STATUS_MISCV) { 372 unum.unum_chan = 373 (misc & MSR_MC_MISC_MEM_CHANNEL_MASK) >> 374 MSR_MC_MISC_MEM_CHANNEL_SHIFT; 375 unum.unum_cs = 376 (misc & MSR_MC_MISC_MEM_DIMM_MASK) >> 377 MSR_MC_MISC_MEM_DIMM_SHIFT; 378 synd = (misc & MSR_MC_MISC_MEM_SYNDROME_MASK) >> 379 MSR_MC_MISC_MEM_SYNDROME_SHIFT; 380 fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ECC_SYND, 381 DATA_TYPE_UINT32, synd, 0); 382 } 383 if (status & MSR_MC_STATUS_ADDRV) { 384 fm_payload_set(ereport, FM_FMRI_MEM_PHYSADDR, 385 DATA_TYPE_UINT64, addr, NULL); 386 (void) cmi_mc_patounum(addr, 0, 0, synd, 0, &unum); 387 if (unum.unum_offset != -1ULL && 388 (unum.unum_offset & OFFSET_ROW_BANK_COL) != 0) { 389 fm_payload_set(ereport, 390 FM_EREPORT_PAYLOAD_NAME_BANK, 391 DATA_TYPE_INT32, 392 TCODE_OFFSET_BANK(unum.unum_offset), NULL); 393 fm_payload_set(ereport, 394 FM_EREPORT_PAYLOAD_NAME_CAS, 395 DATA_TYPE_INT32, 396 TCODE_OFFSET_CAS(unum.unum_offset), NULL); 397 fm_payload_set(ereport, 398 FM_EREPORT_PAYLOAD_NAME_RAS, 399 DATA_TYPE_INT32, 400 TCODE_OFFSET_RAS(unum.unum_offset), NULL); 401 } 402 } 403 resource = gintel_ereport_create_resource_elem(nva, &unum); 404 fm_payload_set(ereport, FM_EREPORT_PAYLOAD_NAME_RESOURCE, 405 DATA_TYPE_NVLIST_ARRAY, 1, &resource, NULL); 406 fm_nvlist_destroy(resource, nva ? FM_NVA_RETAIN:FM_NVA_FREE); 407 408 if (gintel_cpu_generation(hdl) == CPU_GENERATION_NEHALEM_EP) { 409 410 chipid = unum.unum_chip; 411 if (chipid < MAX_CPU_NODES) { 412 last_index = err_counter_index[chipid]; 413 this_index = 414 (last_index + 1) % ERR_COUNTER_INDEX; 415 err_counter_index[chipid] = this_index; 416 nehalem_ep_ereport_add_memory_error_counter( 417 chipid, 418 err_counter_array[chipid][this_index]); 419 fm_payload_set(ereport, 420 FM_EREPORT_PAYLOAD_MEM_ECC_COUNTER_THIS, 421 DATA_TYPE_UINT32_ARRAY, N_MC_COR_ECC_CNT, 422 err_counter_array[chipid][this_index], 423 NULL); 424 fm_payload_set(ereport, 425 FM_EREPORT_PAYLOAD_MEM_ECC_COUNTER_LAST, 426 DATA_TYPE_UINT32_ARRAY, N_MC_COR_ECC_CNT, 427 err_counter_array[chipid][last_index], 428 NULL); 429 } 430 } 431 } 432 } 433 434 boolean_t 435 gintel_bankctl_skipinit(cmi_hdl_t hdl, int banknum) 436 { 437 /* 438 * On Intel family 6 before QuickPath we must not enable machine check 439 * from bank 0 detectors. bank 0 is reserved for the platform 440 */ 441 442 if (banknum == 0 && 443 cmi_hdl_family(hdl) == INTEL_NEHALEM_CPU_FAMILY_ID && 444 cmi_hdl_model(hdl) < INTEL_NEHALEM_CPU_MODEL_ID) 445 return (1); 446 else 447 return (0); 448 } 449 450 cms_api_ver_t _cms_api_version = CMS_API_VERSION_0; 451 452 const cms_ops_t _cms_ops = { 453 gintel_init, /* cms_init */ 454 NULL, /* cms_post_startup */ 455 NULL, /* cms_post_mpstartup */ 456 NULL, /* cms_logout_size */ 457 NULL, /* cms_mcgctl_val */ 458 gintel_bankctl_skipinit, /* cms_bankctl_skipinit */ 459 NULL, /* cms_bankctl_val */ 460 NULL, /* cms_bankstatus_skipinit */ 461 NULL, /* cms_bankstatus_val */ 462 NULL, /* cms_mca_init */ 463 NULL, /* cms_poll_ownermask */ 464 NULL, /* cms_bank_logout */ 465 gintel_error_action, /* cms_error_action */ 466 gintel_disp_match, /* cms_disp_match */ 467 gintel_ereport_class, /* cms_ereport_class */ 468 gintel_ereport_detector, /* cms_ereport_detector */ 469 NULL, /* cms_ereport_includestack */ 470 gintel_ereport_add_logout, /* cms_ereport_add_logout */ 471 NULL, /* cms_msrinject */ 472 NULL, /* cms_fini */ 473 }; 474 475 static struct modlcpu modlcpu = { 476 &mod_cpuops, 477 "Generic Intel model-specific MCA" 478 }; 479 480 static struct modlinkage modlinkage = { 481 MODREV_1, 482 (void *)&modlcpu, 483 NULL 484 }; 485 486 int 487 _init(void) 488 { 489 return (mod_install(&modlinkage)); 490 } 491 492 int 493 _info(struct modinfo *modinfop) 494 { 495 return (mod_info(&modlinkage, modinfop)); 496 } 497 498 int 499 _fini(void) 500 { 501 return (mod_remove(&modlinkage)); 502 } 503