1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Intel model-specific support. Right now all this conists of is 29 * to modify the ereport subclass to produce different ereport classes 30 * so that we can have different diagnosis rules and corresponding faults. 31 */ 32 33 #include <sys/types.h> 34 #include <sys/cmn_err.h> 35 #include <sys/modctl.h> 36 #include <sys/mca_x86.h> 37 #include <sys/cpu_module_ms_impl.h> 38 #include <sys/mc_intel.h> 39 #include <sys/pci_cfgspace.h> 40 #include <sys/fm/protocol.h> 41 42 int gintel_ms_support_disable = 0; 43 int gintel_error_action_return = 0; 44 int gintel_ms_unconstrained = 0; 45 46 int quickpath; 47 int max_bus_number = 0xff; 48 49 #define ERR_COUNTER_INDEX 2 50 #define MAX_CPU_NODES 2 51 #define N_MC_COR_ECC_CNT 6 52 uint32_t err_counter_array[MAX_CPU_NODES][ERR_COUNTER_INDEX][N_MC_COR_ECC_CNT]; 53 uint8_t err_counter_index[MAX_CPU_NODES]; 54 55 #define MAX_BUS_NUMBER max_bus_number 56 #define SOCKET_BUS(cpu) (MAX_BUS_NUMBER - (cpu)) 57 58 #define MC_COR_ECC_CNT(chipid, reg) (*pci_getl_func)(SOCKET_BUS(chipid), \ 59 NEHALEM_EP_MEMORY_CONTROLLER_DEV, NEHALEM_EP_MEMORY_CONTROLLER_FUNC, \ 60 0x80 + (reg) * 4) 61 62 #define MSCOD_MEM_ECC_READ 0x1 63 #define MSCOD_MEM_ECC_SCRUB 0x2 64 #define MSCOD_MEM_WR_PARITY 0x4 65 #define MSCOD_MEM_REDUNDANT_MEM 0x8 66 #define MSCOD_MEM_SPARE_MEM 0x10 67 #define MSCOD_MEM_ILLEGAL_ADDR 0x20 68 #define MSCOD_MEM_BAD_ID 0x40 69 #define MSCOD_MEM_ADDR_PARITY 0x80 70 #define MSCOD_MEM_BYTE_PARITY 0x100 71 72 #define GINTEL_ERROR_MEM 0x1000 73 #define GINTEL_ERROR_QUICKPATH 0x2000 74 75 #define GINTEL_ERR_SPARE_MEM (GINTEL_ERROR_MEM | 1) 76 #define GINTEL_ERR_MEM_UE (GINTEL_ERROR_MEM | 2) 77 #define GINTEL_ERR_MEM_CE (GINTEL_ERROR_MEM | 3) 78 #define GINTEL_ERR_MEM_PARITY (GINTEL_ERROR_MEM | 4) 79 #define GINTEL_ERR_MEM_ADDR_PARITY (GINTEL_ERROR_MEM | 5) 80 #define GINTEL_ERR_MEM_REDUNDANT (GINTEL_ERROR_MEM | 6) 81 #define GINTEL_ERR_MEM_BAD_ADDR (GINTEL_ERROR_MEM | 7) 82 #define GINTEL_ERR_MEM_BAD_ID (GINTEL_ERROR_MEM | 8) 83 #define GINTEL_ERR_MEM_UNKNOWN (GINTEL_ERROR_MEM | 0xfff) 84 85 #define MSR_MC_MISC_MEM_CHANNEL_MASK 0x00000000000c0000ULL 86 #define MSR_MC_MISC_MEM_CHANNEL_SHIFT 18 87 #define MSR_MC_MISC_MEM_DIMM_MASK 0x0000000000030000ULL 88 #define MSR_MC_MISC_MEM_DIMM_SHIFT 16 89 #define MSR_MC_MISC_MEM_SYNDROME_MASK 0xffffffff00000000ULL 90 #define MSR_MC_MISC_MEM_SYNDROME_SHIFT 32 91 92 #define CPU_GENERATION_DONT_CARE 0 93 #define CPU_GENERATION_NEHALEM_EP 1 94 95 #define INTEL_NEHALEM_CPU_FAMILY_ID 0x6 96 #define INTEL_NEHALEM_CPU_MODEL_ID 0x1A 97 98 #define NEHALEM_EP_MEMORY_CONTROLLER_DEV 0x3 99 #define NEHALEM_EP_MEMORY_CONTROLLER_FUNC 0x2 100 101 /*ARGSUSED*/ 102 int 103 gintel_init(cmi_hdl_t hdl, void **datap) 104 { 105 uint32_t nb_chipset; 106 107 if (gintel_ms_support_disable) 108 return (ENOTSUP); 109 110 if (!(x86_feature & X86_MCA)) 111 return (ENOTSUP); 112 113 nb_chipset = (*pci_getl_func)(0, 0, 0, 0x0); 114 switch (nb_chipset) { 115 case INTEL_NB_7300: 116 case INTEL_NB_5000P: 117 case INTEL_NB_5000X: 118 case INTEL_NB_5000V: 119 case INTEL_NB_5000Z: 120 case INTEL_NB_5400: 121 case INTEL_NB_5400A: 122 case INTEL_NB_5400B: 123 if (!gintel_ms_unconstrained) 124 gintel_error_action_return |= CMS_ERRSCOPE_POISONED; 125 break; 126 case INTEL_QP_IO: 127 case INTEL_QP_WP: 128 case INTEL_QP_36D: 129 case INTEL_QP_24D: 130 case INTEL_QP_U1: 131 case INTEL_QP_U2: 132 case INTEL_QP_U3: 133 case INTEL_QP_U4: 134 case INTEL_QP_JF: 135 case INTEL_QP_JF0: 136 case INTEL_QP_JF1: 137 case INTEL_QP_JF2: 138 case INTEL_QP_JF3: 139 case INTEL_QP_JF4: 140 case INTEL_QP_JF5: 141 case INTEL_QP_JF6: 142 case INTEL_QP_JF7: 143 case INTEL_QP_JF8: 144 case INTEL_QP_JF9: 145 case INTEL_QP_JFa: 146 case INTEL_QP_JFb: 147 case INTEL_QP_JFc: 148 case INTEL_QP_JFd: 149 case INTEL_QP_JFe: 150 case INTEL_QP_JFf: 151 quickpath = 1; 152 break; 153 default: 154 break; 155 } 156 return (0); 157 } 158 159 /*ARGSUSED*/ 160 uint32_t 161 gintel_error_action(cmi_hdl_t hdl, int ismc, int bank, 162 uint64_t status, uint64_t addr, uint64_t misc, void *mslogout) 163 { 164 if ((status & MSR_MC_STATUS_PCC) == 0) 165 return (gintel_error_action_return); 166 else 167 return (gintel_error_action_return & ~CMS_ERRSCOPE_POISONED); 168 } 169 170 /*ARGSUSED*/ 171 cms_cookie_t 172 gintel_disp_match(cmi_hdl_t hdl, int bank, uint64_t status, 173 uint64_t addr, uint64_t misc, void *mslogout) 174 { 175 cms_cookie_t rt = (cms_cookie_t)NULL; 176 uint16_t mcacode = MCAX86_ERRCODE(status); 177 uint16_t mscode = MCAX86_MSERRCODE(status); 178 179 if (MCAX86_ERRCODE_ISMEMORY_CONTROLLER(mcacode)) { 180 /* 181 * memory controller errors 182 */ 183 if (mscode & MSCOD_MEM_SPARE_MEM) { 184 rt = (cms_cookie_t)GINTEL_ERR_SPARE_MEM; 185 } else if (mscode & (MSCOD_MEM_ECC_READ | 186 MSCOD_MEM_ECC_SCRUB)) { 187 if (status & MSR_MC_STATUS_UC) 188 rt = (cms_cookie_t)GINTEL_ERR_MEM_UE; 189 else 190 rt = (cms_cookie_t)GINTEL_ERR_MEM_CE; 191 } else if (mscode & (MSCOD_MEM_WR_PARITY | 192 MSCOD_MEM_BYTE_PARITY)) { 193 rt = (cms_cookie_t)GINTEL_ERR_MEM_PARITY; 194 } else if (mscode & MSCOD_MEM_ADDR_PARITY) { 195 rt = (cms_cookie_t)GINTEL_ERR_MEM_ADDR_PARITY; 196 } else if (mscode & MSCOD_MEM_REDUNDANT_MEM) { 197 rt = (cms_cookie_t)GINTEL_ERR_MEM_REDUNDANT; 198 } else if (mscode & MSCOD_MEM_ILLEGAL_ADDR) { 199 rt = (cms_cookie_t)GINTEL_ERR_MEM_BAD_ADDR; 200 } else if (mscode & MSCOD_MEM_BAD_ID) { 201 rt = (cms_cookie_t)GINTEL_ERR_MEM_BAD_ID; 202 } else { 203 rt = (cms_cookie_t)GINTEL_ERR_MEM_UNKNOWN; 204 } 205 } else if (quickpath && 206 MCAX86_ERRCODE_ISBUS_INTERCONNECT(MCAX86_ERRCODE(status))) { 207 rt = (cms_cookie_t)GINTEL_ERROR_QUICKPATH; 208 } 209 return (rt); 210 } 211 212 /*ARGSUSED*/ 213 void 214 gintel_ereport_class(cmi_hdl_t hdl, cms_cookie_t mscookie, 215 const char **cpuclsp, const char **leafclsp) 216 { 217 *cpuclsp = FM_EREPORT_CPU_INTEL; 218 switch ((uintptr_t)mscookie) { 219 case GINTEL_ERROR_QUICKPATH: 220 *leafclsp = "quickpath.interconnect"; 221 break; 222 case GINTEL_ERR_SPARE_MEM: 223 *leafclsp = "quickpath.mem_spare"; 224 break; 225 case GINTEL_ERR_MEM_UE: 226 *leafclsp = "quickpath.mem_ue"; 227 break; 228 case GINTEL_ERR_MEM_CE: 229 *leafclsp = "quickpath.mem_ce"; 230 break; 231 case GINTEL_ERR_MEM_PARITY: 232 *leafclsp = "quickpath.mem_parity"; 233 break; 234 case GINTEL_ERR_MEM_ADDR_PARITY: 235 *leafclsp = "quickpath.mem_addr_parity"; 236 break; 237 case GINTEL_ERR_MEM_REDUNDANT: 238 *leafclsp = "quickpath.mem_redundant"; 239 break; 240 case GINTEL_ERR_MEM_BAD_ADDR: 241 *leafclsp = "quickpath.mem_bad_addr"; 242 break; 243 case GINTEL_ERR_MEM_BAD_ID: 244 *leafclsp = "quickpath.mem_bad_id"; 245 break; 246 case GINTEL_ERR_MEM_UNKNOWN: 247 *leafclsp = "quickpath.mem_unknown"; 248 break; 249 } 250 } 251 252 nvlist_t * 253 gintel_ereport_detector(cmi_hdl_t hdl, cms_cookie_t mscookie, nv_alloc_t *nva) 254 { 255 nvlist_t *nvl = (nvlist_t *)NULL; 256 257 if (mscookie) { 258 if ((nvl = fm_nvlist_create(nva)) == NULL) 259 return (NULL); 260 if ((uintptr_t)mscookie & GINTEL_ERROR_QUICKPATH) { 261 fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, NULL, 2, 262 "motherboard", 0, 263 "chip", cmi_hdl_chipid(hdl)); 264 } else { 265 fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, NULL, 3, 266 "motherboard", 0, 267 "chip", cmi_hdl_chipid(hdl), 268 "memory-controller", 0); 269 } 270 } 271 return (nvl); 272 } 273 274 static nvlist_t * 275 gintel_ereport_create_resource_elem(nv_alloc_t *nva, mc_unum_t *unump) 276 { 277 nvlist_t *nvl, *snvl; 278 279 if ((nvl = fm_nvlist_create(nva)) == NULL) /* freed by caller */ 280 return (NULL); 281 282 if ((snvl = fm_nvlist_create(nva)) == NULL) { 283 fm_nvlist_destroy(nvl, nva ? FM_NVA_RETAIN : FM_NVA_FREE); 284 return (NULL); 285 } 286 287 (void) nvlist_add_uint64(snvl, FM_FMRI_HC_SPECIFIC_OFFSET, 288 unump->unum_offset); 289 290 if (unump->unum_chan == -1) { 291 fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, snvl, 3, 292 "motherboard", unump->unum_board, 293 "chip", unump->unum_chip, 294 "memory-controller", unump->unum_mc); 295 } else if (unump->unum_cs == -1) { 296 fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, snvl, 4, 297 "motherboard", unump->unum_board, 298 "chip", unump->unum_chip, 299 "memory-controller", unump->unum_mc, 300 "dram-channel", unump->unum_chan); 301 } else if (unump->unum_rank == -1) { 302 fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, snvl, 5, 303 "motherboard", unump->unum_board, 304 "chip", unump->unum_chip, 305 "memory-controller", unump->unum_mc, 306 "dram-channel", unump->unum_chan, 307 "dimm", unump->unum_cs); 308 } else { 309 fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, snvl, 6, 310 "motherboard", unump->unum_board, 311 "chip", unump->unum_chip, 312 "memory-controller", unump->unum_mc, 313 "dram-channel", unump->unum_chan, 314 "dimm", unump->unum_cs, 315 "rank", unump->unum_rank); 316 } 317 318 fm_nvlist_destroy(snvl, nva ? FM_NVA_RETAIN : FM_NVA_FREE); 319 320 return (nvl); 321 } 322 323 static void 324 nehalem_ep_ereport_add_memory_error_counter(uint_t chipid, 325 uint32_t *this_err_counter_array) 326 { 327 int index; 328 329 for (index = 0; index < N_MC_COR_ECC_CNT; index ++) 330 this_err_counter_array[index] = MC_COR_ECC_CNT(chipid, index); 331 } 332 333 static int 334 gintel_cpu_generation(cmi_hdl_t hdl) 335 { 336 int cpu_generation = CPU_GENERATION_DONT_CARE; 337 338 if ((cmi_hdl_family(hdl) == INTEL_NEHALEM_CPU_FAMILY_ID) && 339 (cmi_hdl_model(hdl) == INTEL_NEHALEM_CPU_MODEL_ID)) 340 cpu_generation = CPU_GENERATION_NEHALEM_EP; 341 342 return (cpu_generation); 343 } 344 345 /*ARGSUSED*/ 346 void 347 gintel_ereport_add_logout(cmi_hdl_t hdl, nvlist_t *ereport, 348 nv_alloc_t *nva, int banknum, uint64_t status, uint64_t addr, 349 uint64_t misc, void *mslogout, cms_cookie_t mscookie) 350 { 351 mc_unum_t unum; 352 nvlist_t *resource; 353 uint32_t synd = 0; 354 int chan = MCAX86_ERRCODE_CCCC(status); 355 uint8_t last_index, this_index; 356 int chipid; 357 358 if (chan == 0xf) 359 chan = -1; 360 361 if ((uintptr_t)mscookie & GINTEL_ERROR_MEM) { 362 unum.unum_board = 0; 363 unum.unum_chip = cmi_hdl_chipid(hdl); 364 unum.unum_mc = 0; 365 unum.unum_chan = chan; 366 unum.unum_cs = -1; 367 unum.unum_rank = -1; 368 unum.unum_offset = -1ULL; 369 if (status & MSR_MC_STATUS_MISCV) { 370 unum.unum_chan = 371 (misc & MSR_MC_MISC_MEM_CHANNEL_MASK) >> 372 MSR_MC_MISC_MEM_CHANNEL_SHIFT; 373 unum.unum_cs = 374 (misc & MSR_MC_MISC_MEM_DIMM_MASK) >> 375 MSR_MC_MISC_MEM_DIMM_SHIFT; 376 synd = (misc & MSR_MC_MISC_MEM_SYNDROME_MASK) >> 377 MSR_MC_MISC_MEM_SYNDROME_SHIFT; 378 fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ECC_SYND, 379 DATA_TYPE_UINT32, synd, 0); 380 } 381 if (status & MSR_MC_STATUS_ADDRV) { 382 fm_payload_set(ereport, FM_FMRI_MEM_PHYSADDR, 383 DATA_TYPE_UINT64, addr, NULL); 384 (void) cmi_mc_patounum(addr, 0, 0, synd, 0, &unum); 385 } 386 resource = gintel_ereport_create_resource_elem(nva, &unum); 387 fm_payload_set(ereport, FM_EREPORT_PAYLOAD_NAME_RESOURCE, 388 DATA_TYPE_NVLIST_ARRAY, 1, &resource, NULL); 389 fm_nvlist_destroy(resource, nva ? FM_NVA_RETAIN:FM_NVA_FREE); 390 391 if (gintel_cpu_generation(hdl) == CPU_GENERATION_NEHALEM_EP) { 392 393 chipid = unum.unum_chip; 394 if (chipid < MAX_CPU_NODES) { 395 last_index = err_counter_index[chipid]; 396 this_index = 397 (last_index + 1) % ERR_COUNTER_INDEX; 398 err_counter_index[chipid] = this_index; 399 nehalem_ep_ereport_add_memory_error_counter( 400 chipid, 401 err_counter_array[chipid][this_index]); 402 fm_payload_set(ereport, 403 FM_EREPORT_PAYLOAD_MEM_ECC_COUNTER_THIS, 404 DATA_TYPE_UINT32_ARRAY, N_MC_COR_ECC_CNT, 405 err_counter_array[chipid][this_index], 406 NULL); 407 fm_payload_set(ereport, 408 FM_EREPORT_PAYLOAD_MEM_ECC_COUNTER_LAST, 409 DATA_TYPE_UINT32_ARRAY, N_MC_COR_ECC_CNT, 410 err_counter_array[chipid][last_index], 411 NULL); 412 } 413 } 414 } 415 } 416 417 boolean_t 418 gintel_bankctl_skipinit(cmi_hdl_t hdl, int banknum) 419 { 420 /* 421 * On Intel family 6 before QuickPath we must not enable machine check 422 * from bank 0 detectors. bank 0 is reserved for the platform 423 */ 424 425 if (banknum == 0 && 426 cmi_hdl_family(hdl) == INTEL_NEHALEM_CPU_FAMILY_ID && 427 cmi_hdl_model(hdl) < INTEL_NEHALEM_CPU_MODEL_ID) 428 return (1); 429 else 430 return (0); 431 } 432 433 cms_api_ver_t _cms_api_version = CMS_API_VERSION_0; 434 435 const cms_ops_t _cms_ops = { 436 gintel_init, /* cms_init */ 437 NULL, /* cms_post_startup */ 438 NULL, /* cms_post_mpstartup */ 439 NULL, /* cms_logout_size */ 440 NULL, /* cms_mcgctl_val */ 441 gintel_bankctl_skipinit, /* cms_bankctl_skipinit */ 442 NULL, /* cms_bankctl_val */ 443 NULL, /* cms_bankstatus_skipinit */ 444 NULL, /* cms_bankstatus_val */ 445 NULL, /* cms_mca_init */ 446 NULL, /* cms_poll_ownermask */ 447 NULL, /* cms_bank_logout */ 448 gintel_error_action, /* cms_error_action */ 449 gintel_disp_match, /* cms_disp_match */ 450 gintel_ereport_class, /* cms_ereport_class */ 451 gintel_ereport_detector, /* cms_ereport_detector */ 452 NULL, /* cms_ereport_includestack */ 453 gintel_ereport_add_logout, /* cms_ereport_add_logout */ 454 NULL, /* cms_msrinject */ 455 NULL, /* cms_fini */ 456 }; 457 458 static struct modlcpu modlcpu = { 459 &mod_cpuops, 460 "Generic Intel model-specific MCA" 461 }; 462 463 static struct modlinkage modlinkage = { 464 MODREV_1, 465 (void *)&modlcpu, 466 NULL 467 }; 468 469 int 470 _init(void) 471 { 472 return (mod_install(&modlinkage)); 473 } 474 475 int 476 _info(struct modinfo *modinfop) 477 { 478 return (mod_info(&modlinkage, modinfop)); 479 } 480 481 int 482 _fini(void) 483 { 484 return (mod_remove(&modlinkage)); 485 } 486