1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Intel model-specific support. Right now all this conists of is 29 * to modify the ereport subclass to produce different ereport classes 30 * so that we can have different diagnosis rules and corresponding faults. 31 */ 32 33 #include <sys/types.h> 34 #include <sys/cmn_err.h> 35 #include <sys/modctl.h> 36 #include <sys/mca_x86.h> 37 #include <sys/cpu_module_ms_impl.h> 38 #include <sys/mc_intel.h> 39 #include <sys/pci_cfgspace.h> 40 #include <sys/fm/protocol.h> 41 42 int gintel_ms_support_disable = 0; 43 int gintel_error_action_return = 0; 44 int gintel_ms_unconstrained = 0; 45 46 int quickpath; 47 int max_bus_number = 0xff; 48 49 #define ERR_COUNTER_INDEX 2 50 #define MAX_CPU_NODES 2 51 #define N_MC_COR_ECC_CNT 6 52 uint32_t err_counter_array[MAX_CPU_NODES][ERR_COUNTER_INDEX][N_MC_COR_ECC_CNT]; 53 uint8_t err_counter_index[MAX_CPU_NODES]; 54 55 #define MAX_BUS_NUMBER max_bus_number 56 #define SOCKET_BUS(cpu) (MAX_BUS_NUMBER - (cpu)) 57 58 #define MC_COR_ECC_CNT(chipid, reg) (*pci_getl_func)(SOCKET_BUS(chipid), \ 59 NEHALEM_EP_MEMORY_CONTROLLER_DEV, NEHALEM_EP_MEMORY_CONTROLLER_FUNC, \ 60 0x80 + (reg) * 4) 61 62 #define MSCOD_MEM_ECC_READ 0x1 63 #define MSCOD_MEM_ECC_SCRUB 0x2 64 #define MSCOD_MEM_WR_PARITY 0x4 65 #define MSCOD_MEM_REDUNDANT_MEM 0x8 66 #define MSCOD_MEM_SPARE_MEM 0x10 67 #define MSCOD_MEM_ILLEGAL_ADDR 0x20 68 #define MSCOD_MEM_BAD_ID 0x40 69 #define MSCOD_MEM_ADDR_PARITY 0x80 70 #define MSCOD_MEM_BYTE_PARITY 0x100 71 72 #define GINTEL_ERROR_MEM 0x1000 73 #define GINTEL_ERROR_QUICKPATH 0x2000 74 75 #define GINTEL_ERR_SPARE_MEM (GINTEL_ERROR_MEM | 1) 76 #define GINTEL_ERR_MEM_UE (GINTEL_ERROR_MEM | 2) 77 #define GINTEL_ERR_MEM_CE (GINTEL_ERROR_MEM | 3) 78 #define GINTEL_ERR_MEM_PARITY (GINTEL_ERROR_MEM | 4) 79 #define GINTEL_ERR_MEM_ADDR_PARITY (GINTEL_ERROR_MEM | 5) 80 #define GINTEL_ERR_MEM_REDUNDANT (GINTEL_ERROR_MEM | 6) 81 #define GINTEL_ERR_MEM_BAD_ADDR (GINTEL_ERROR_MEM | 7) 82 #define GINTEL_ERR_MEM_BAD_ID (GINTEL_ERROR_MEM | 8) 83 #define GINTEL_ERR_MEM_UNKNOWN (GINTEL_ERROR_MEM | 0xfff) 84 85 #define MSR_MC_MISC_MEM_CHANNEL_MASK 0x00000000000c0000ULL 86 #define MSR_MC_MISC_MEM_CHANNEL_SHIFT 18 87 #define MSR_MC_MISC_MEM_DIMM_MASK 0x0000000000030000ULL 88 #define MSR_MC_MISC_MEM_DIMM_SHIFT 16 89 #define MSR_MC_MISC_MEM_SYNDROME_MASK 0xffffffff00000000ULL 90 #define MSR_MC_MISC_MEM_SYNDROME_SHIFT 32 91 92 #define CPU_GENERATION_DONT_CARE 0 93 #define CPU_GENERATION_NEHALEM_EP 1 94 95 #define INTEL_NEHALEM_CPU_FAMILY_ID 0x6 96 #define INTEL_NEHALEM_CPU_MODEL_ID 0x1A 97 98 #define NEHALEM_EP_MEMORY_CONTROLLER_DEV 0x3 99 #define NEHALEM_EP_MEMORY_CONTROLLER_FUNC 0x2 100 101 /*ARGSUSED*/ 102 int 103 gintel_init(cmi_hdl_t hdl, void **datap) 104 { 105 uint32_t nb_chipset; 106 107 if (gintel_ms_support_disable) 108 return (ENOTSUP); 109 110 if (!(x86_feature & X86_MCA)) 111 return (ENOTSUP); 112 113 nb_chipset = (*pci_getl_func)(0, 0, 0, 0x0); 114 switch (nb_chipset) { 115 case INTEL_NB_7300: 116 case INTEL_NB_5000P: 117 case INTEL_NB_5000X: 118 case INTEL_NB_5000V: 119 case INTEL_NB_5000Z: 120 case INTEL_NB_5400: 121 case INTEL_NB_5400A: 122 case INTEL_NB_5400B: 123 if (!gintel_ms_unconstrained) 124 gintel_error_action_return |= CMS_ERRSCOPE_POISONED; 125 break; 126 case INTEL_QP_IO: 127 case INTEL_QP_WP: 128 case INTEL_QP_36D: 129 case INTEL_QP_24D: 130 case INTEL_QP_U1: 131 case INTEL_QP_U2: 132 case INTEL_QP_U3: 133 case INTEL_QP_U4: 134 quickpath = 1; 135 break; 136 default: 137 break; 138 } 139 return (0); 140 } 141 142 /*ARGSUSED*/ 143 uint32_t 144 gintel_error_action(cmi_hdl_t hdl, int ismc, int bank, 145 uint64_t status, uint64_t addr, uint64_t misc, void *mslogout) 146 { 147 if ((status & MSR_MC_STATUS_PCC) == 0) 148 return (gintel_error_action_return); 149 else 150 return (gintel_error_action_return & ~CMS_ERRSCOPE_POISONED); 151 } 152 153 /*ARGSUSED*/ 154 cms_cookie_t 155 gintel_disp_match(cmi_hdl_t hdl, int bank, uint64_t status, 156 uint64_t addr, uint64_t misc, void *mslogout) 157 { 158 cms_cookie_t rt = (cms_cookie_t)NULL; 159 uint16_t mcacode = MCAX86_ERRCODE(status); 160 uint16_t mscode = MCAX86_MSERRCODE(status); 161 162 if (MCAX86_ERRCODE_ISMEMORY_CONTROLLER(mcacode)) { 163 /* 164 * memory controller errors 165 */ 166 if (mscode & MSCOD_MEM_SPARE_MEM) { 167 rt = (cms_cookie_t)GINTEL_ERR_SPARE_MEM; 168 } else if (mscode & (MSCOD_MEM_ECC_READ | 169 MSCOD_MEM_ECC_SCRUB)) { 170 if (status & MSR_MC_STATUS_UC) 171 rt = (cms_cookie_t)GINTEL_ERR_MEM_UE; 172 else 173 rt = (cms_cookie_t)GINTEL_ERR_MEM_CE; 174 } else if (mscode & (MSCOD_MEM_WR_PARITY | 175 MSCOD_MEM_BYTE_PARITY)) { 176 rt = (cms_cookie_t)GINTEL_ERR_MEM_PARITY; 177 } else if (mscode & MSCOD_MEM_ADDR_PARITY) { 178 rt = (cms_cookie_t)GINTEL_ERR_MEM_ADDR_PARITY; 179 } else if (mscode & MSCOD_MEM_REDUNDANT_MEM) { 180 rt = (cms_cookie_t)GINTEL_ERR_MEM_REDUNDANT; 181 } else if (mscode & MSCOD_MEM_ILLEGAL_ADDR) { 182 rt = (cms_cookie_t)GINTEL_ERR_MEM_BAD_ADDR; 183 } else if (mscode & MSCOD_MEM_BAD_ID) { 184 rt = (cms_cookie_t)GINTEL_ERR_MEM_BAD_ID; 185 } else { 186 rt = (cms_cookie_t)GINTEL_ERR_MEM_UNKNOWN; 187 } 188 } else if (quickpath && 189 MCAX86_ERRCODE_ISBUS_INTERCONNECT(MCAX86_ERRCODE(status))) { 190 rt = (cms_cookie_t)GINTEL_ERROR_QUICKPATH; 191 } 192 return (rt); 193 } 194 195 /*ARGSUSED*/ 196 void 197 gintel_ereport_class(cmi_hdl_t hdl, cms_cookie_t mscookie, 198 const char **cpuclsp, const char **leafclsp) 199 { 200 *cpuclsp = FM_EREPORT_CPU_INTEL; 201 switch ((uintptr_t)mscookie) { 202 case GINTEL_ERROR_QUICKPATH: 203 *leafclsp = "quickpath.interconnect"; 204 break; 205 case GINTEL_ERR_SPARE_MEM: 206 *leafclsp = "quickpath.mem_spare"; 207 break; 208 case GINTEL_ERR_MEM_UE: 209 *leafclsp = "quickpath.mem_ue"; 210 break; 211 case GINTEL_ERR_MEM_CE: 212 *leafclsp = "quickpath.mem_ce"; 213 break; 214 case GINTEL_ERR_MEM_PARITY: 215 *leafclsp = "quickpath.mem_parity"; 216 break; 217 case GINTEL_ERR_MEM_ADDR_PARITY: 218 *leafclsp = "quickpath.mem_addr_parity"; 219 break; 220 case GINTEL_ERR_MEM_REDUNDANT: 221 *leafclsp = "quickpath.mem_redundant"; 222 break; 223 case GINTEL_ERR_MEM_BAD_ADDR: 224 *leafclsp = "quickpath.mem_bad_addr"; 225 break; 226 case GINTEL_ERR_MEM_BAD_ID: 227 *leafclsp = "quickpath.mem_bad_id"; 228 break; 229 case GINTEL_ERR_MEM_UNKNOWN: 230 *leafclsp = "quickpath.mem_unknown"; 231 break; 232 } 233 } 234 235 nvlist_t * 236 gintel_ereport_detector(cmi_hdl_t hdl, cms_cookie_t mscookie, nv_alloc_t *nva) 237 { 238 nvlist_t *nvl = (nvlist_t *)NULL; 239 240 if (mscookie) { 241 if ((nvl = fm_nvlist_create(nva)) == NULL) 242 return (NULL); 243 if ((uintptr_t)mscookie & GINTEL_ERROR_QUICKPATH) { 244 fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, NULL, 2, 245 "motherboard", 0, 246 "chip", cmi_hdl_chipid(hdl)); 247 } else { 248 fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, NULL, 3, 249 "motherboard", 0, 250 "chip", cmi_hdl_chipid(hdl), 251 "memory-controller", 0); 252 } 253 } 254 return (nvl); 255 } 256 257 static nvlist_t * 258 gintel_ereport_create_resource_elem(nv_alloc_t *nva, mc_unum_t *unump) 259 { 260 nvlist_t *nvl, *snvl; 261 262 if ((nvl = fm_nvlist_create(nva)) == NULL) /* freed by caller */ 263 return (NULL); 264 265 if ((snvl = fm_nvlist_create(nva)) == NULL) { 266 fm_nvlist_destroy(nvl, nva ? FM_NVA_RETAIN : FM_NVA_FREE); 267 return (NULL); 268 } 269 270 (void) nvlist_add_uint64(snvl, FM_FMRI_HC_SPECIFIC_OFFSET, 271 unump->unum_offset); 272 273 if (unump->unum_chan == -1) { 274 fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, snvl, 3, 275 "motherboard", unump->unum_board, 276 "chip", unump->unum_chip, 277 "memory-controller", unump->unum_mc); 278 } else if (unump->unum_cs == -1) { 279 fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, snvl, 4, 280 "motherboard", unump->unum_board, 281 "chip", unump->unum_chip, 282 "memory-controller", unump->unum_mc, 283 "dram-channel", unump->unum_chan); 284 } else if (unump->unum_rank == -1) { 285 fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, snvl, 5, 286 "motherboard", unump->unum_board, 287 "chip", unump->unum_chip, 288 "memory-controller", unump->unum_mc, 289 "dram-channel", unump->unum_chan, 290 "dimm", unump->unum_cs); 291 } else { 292 fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, snvl, 6, 293 "motherboard", unump->unum_board, 294 "chip", unump->unum_chip, 295 "memory-controller", unump->unum_mc, 296 "dram-channel", unump->unum_chan, 297 "dimm", unump->unum_cs, 298 "rank", unump->unum_rank); 299 } 300 301 fm_nvlist_destroy(snvl, nva ? FM_NVA_RETAIN : FM_NVA_FREE); 302 303 return (nvl); 304 } 305 306 static void 307 nehalem_ep_ereport_add_memory_error_counter(uint_t chipid, 308 uint32_t *this_err_counter_array) 309 { 310 int index; 311 312 for (index = 0; index < N_MC_COR_ECC_CNT; index ++) 313 this_err_counter_array[index] = MC_COR_ECC_CNT(chipid, index); 314 } 315 316 static int 317 gintel_cpu_generation(cmi_hdl_t hdl) 318 { 319 int cpu_generation = CPU_GENERATION_DONT_CARE; 320 321 if ((cmi_hdl_family(hdl) == INTEL_NEHALEM_CPU_FAMILY_ID) && 322 (cmi_hdl_model(hdl) == INTEL_NEHALEM_CPU_MODEL_ID)) 323 cpu_generation = CPU_GENERATION_NEHALEM_EP; 324 325 return (cpu_generation); 326 } 327 328 /*ARGSUSED*/ 329 void 330 gintel_ereport_add_logout(cmi_hdl_t hdl, nvlist_t *ereport, 331 nv_alloc_t *nva, int banknum, uint64_t status, uint64_t addr, 332 uint64_t misc, void *mslogout, cms_cookie_t mscookie) 333 { 334 mc_unum_t unum; 335 nvlist_t *resource; 336 uint32_t synd = 0; 337 int chan = MCAX86_ERRCODE_CCCC(status); 338 uint8_t last_index, this_index; 339 int chipid; 340 341 if (chan == 0xf) 342 chan = -1; 343 344 if ((uintptr_t)mscookie & GINTEL_ERROR_MEM) { 345 unum.unum_board = 0; 346 unum.unum_chip = cmi_hdl_chipid(hdl); 347 unum.unum_mc = 0; 348 unum.unum_chan = chan; 349 unum.unum_cs = -1; 350 unum.unum_rank = -1; 351 unum.unum_offset = -1ULL; 352 if (status & MSR_MC_STATUS_MISCV) { 353 unum.unum_chan = 354 (misc & MSR_MC_MISC_MEM_CHANNEL_MASK) >> 355 MSR_MC_MISC_MEM_CHANNEL_SHIFT; 356 unum.unum_cs = 357 (misc & MSR_MC_MISC_MEM_DIMM_MASK) >> 358 MSR_MC_MISC_MEM_DIMM_SHIFT; 359 synd = (misc & MSR_MC_MISC_MEM_SYNDROME_MASK) >> 360 MSR_MC_MISC_MEM_SYNDROME_SHIFT; 361 fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ECC_SYND, 362 DATA_TYPE_UINT32, synd, 0); 363 } 364 if (status & MSR_MC_STATUS_ADDRV) { 365 fm_payload_set(ereport, FM_FMRI_MEM_PHYSADDR, 366 DATA_TYPE_UINT64, addr, NULL); 367 (void) cmi_mc_patounum(addr, 0, 0, synd, 0, &unum); 368 } 369 resource = gintel_ereport_create_resource_elem(nva, &unum); 370 fm_payload_set(ereport, FM_EREPORT_PAYLOAD_NAME_RESOURCE, 371 DATA_TYPE_NVLIST_ARRAY, 1, &resource, NULL); 372 fm_nvlist_destroy(resource, nva ? FM_NVA_RETAIN:FM_NVA_FREE); 373 374 if (gintel_cpu_generation(hdl) == CPU_GENERATION_NEHALEM_EP) { 375 376 chipid = unum.unum_chip; 377 if (chipid < MAX_CPU_NODES) { 378 last_index = err_counter_index[chipid]; 379 this_index = 380 (last_index + 1) % ERR_COUNTER_INDEX; 381 err_counter_index[chipid] = this_index; 382 nehalem_ep_ereport_add_memory_error_counter( 383 chipid, 384 err_counter_array[chipid][this_index]); 385 fm_payload_set(ereport, 386 FM_EREPORT_PAYLOAD_MEM_ECC_COUNTER_THIS, 387 DATA_TYPE_UINT32_ARRAY, N_MC_COR_ECC_CNT, 388 err_counter_array[chipid][this_index], 389 NULL); 390 fm_payload_set(ereport, 391 FM_EREPORT_PAYLOAD_MEM_ECC_COUNTER_LAST, 392 DATA_TYPE_UINT32_ARRAY, N_MC_COR_ECC_CNT, 393 err_counter_array[chipid][last_index], 394 NULL); 395 } 396 } 397 } 398 } 399 400 boolean_t 401 gintel_bankctl_skipinit(cmi_hdl_t hdl, int banknum) 402 { 403 /* 404 * On Intel family 6 before QuickPath we must not enable machine check 405 * from bank 0 detectors. bank 0 is reserved for the platform 406 */ 407 408 if (banknum == 0 && 409 cmi_hdl_family(hdl) == INTEL_NEHALEM_CPU_FAMILY_ID && 410 cmi_hdl_model(hdl) < INTEL_NEHALEM_CPU_MODEL_ID) 411 return (1); 412 else 413 return (0); 414 } 415 416 cms_api_ver_t _cms_api_version = CMS_API_VERSION_0; 417 418 const cms_ops_t _cms_ops = { 419 gintel_init, /* cms_init */ 420 NULL, /* cms_post_startup */ 421 NULL, /* cms_post_mpstartup */ 422 NULL, /* cms_logout_size */ 423 NULL, /* cms_mcgctl_val */ 424 gintel_bankctl_skipinit, /* cms_bankctl_skipinit */ 425 NULL, /* cms_bankctl_val */ 426 NULL, /* cms_bankstatus_skipinit */ 427 NULL, /* cms_bankstatus_val */ 428 NULL, /* cms_mca_init */ 429 NULL, /* cms_poll_ownermask */ 430 NULL, /* cms_bank_logout */ 431 gintel_error_action, /* cms_error_action */ 432 gintel_disp_match, /* cms_disp_match */ 433 gintel_ereport_class, /* cms_ereport_class */ 434 gintel_ereport_detector, /* cms_ereport_detector */ 435 NULL, /* cms_ereport_includestack */ 436 gintel_ereport_add_logout, /* cms_ereport_add_logout */ 437 NULL, /* cms_msrinject */ 438 NULL, /* cms_fini */ 439 }; 440 441 static struct modlcpu modlcpu = { 442 &mod_cpuops, 443 "Generic Intel model-specific MCA" 444 }; 445 446 static struct modlinkage modlinkage = { 447 MODREV_1, 448 (void *)&modlcpu, 449 NULL 450 }; 451 452 int 453 _init(void) 454 { 455 return (mod_install(&modlinkage)); 456 } 457 458 int 459 _info(struct modinfo *modinfop) 460 { 461 return (mod_info(&modlinkage, modinfop)); 462 } 463 464 int 465 _fini(void) 466 { 467 return (mod_remove(&modlinkage)); 468 } 469