1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Intel model-specific support. Right now all this conists of is 29 * to modify the ereport subclass to produce different ereport classes 30 * so that we can have different diagnosis rules and corresponding faults. 31 */ 32 33 #include <sys/types.h> 34 #include <sys/cmn_err.h> 35 #include <sys/modctl.h> 36 #include <sys/mca_x86.h> 37 #include <sys/cpu_module_ms_impl.h> 38 #include <sys/mc_intel.h> 39 #include <sys/pci_cfgspace.h> 40 #include <sys/fm/protocol.h> 41 42 int gintel_ms_support_disable = 0; 43 int gintel_error_action_return = 0; 44 int gintel_ms_unconstrained = 0; 45 46 int quickpath; 47 int max_bus_number = 0xff; 48 49 #define ERR_COUNTER_INDEX 2 50 #define MAX_CPU_NODES 2 51 #define N_MC_COR_ECC_CNT 6 52 uint32_t err_counter_array[MAX_CPU_NODES][ERR_COUNTER_INDEX][N_MC_COR_ECC_CNT]; 53 uint8_t err_counter_index[MAX_CPU_NODES]; 54 55 #define MAX_BUS_NUMBER max_bus_number 56 #define SOCKET_BUS(cpu) (MAX_BUS_NUMBER - (cpu)) 57 58 #define MC_COR_ECC_CNT(chipid, reg) (*pci_getl_func)(SOCKET_BUS(chipid), \ 59 NEHALEM_EP_MEMORY_CONTROLLER_DEV, NEHALEM_EP_MEMORY_CONTROLLER_FUNC, \ 60 0x80 + (reg) * 4) 61 62 #define MSCOD_MEM_ECC_READ 0x1 63 #define MSCOD_MEM_ECC_SCRUB 0x2 64 #define MSCOD_MEM_WR_PARITY 0x4 65 #define MSCOD_MEM_REDUNDANT_MEM 0x8 66 #define MSCOD_MEM_SPARE_MEM 0x10 67 #define MSCOD_MEM_ILLEGAL_ADDR 0x20 68 #define MSCOD_MEM_BAD_ID 0x40 69 #define MSCOD_MEM_ADDR_PARITY 0x80 70 #define MSCOD_MEM_BYTE_PARITY 0x100 71 72 #define GINTEL_ERROR_MEM 0x1000 73 #define GINTEL_ERROR_QUICKPATH 0x2000 74 75 #define GINTEL_ERR_SPARE_MEM (GINTEL_ERROR_MEM | 1) 76 #define GINTEL_ERR_MEM_UE (GINTEL_ERROR_MEM | 2) 77 #define GINTEL_ERR_MEM_CE (GINTEL_ERROR_MEM | 3) 78 #define GINTEL_ERR_MEM_PARITY (GINTEL_ERROR_MEM | 4) 79 #define GINTEL_ERR_MEM_ADDR_PARITY (GINTEL_ERROR_MEM | 5) 80 #define GINTEL_ERR_MEM_REDUNDANT (GINTEL_ERROR_MEM | 6) 81 #define GINTEL_ERR_MEM_BAD_ADDR (GINTEL_ERROR_MEM | 7) 82 #define GINTEL_ERR_MEM_BAD_ID (GINTEL_ERROR_MEM | 8) 83 #define GINTEL_ERR_MEM_UNKNOWN (GINTEL_ERROR_MEM | 0xfff) 84 85 #define MSR_MC_MISC_MEM_CHANNEL_MASK 0x00000000000c0000ULL 86 #define MSR_MC_MISC_MEM_CHANNEL_SHIFT 18 87 #define MSR_MC_MISC_MEM_DIMM_MASK 0x0000000000030000ULL 88 #define MSR_MC_MISC_MEM_DIMM_SHIFT 16 89 #define MSR_MC_MISC_MEM_SYNDROME_MASK 0xffffffff00000000ULL 90 #define MSR_MC_MISC_MEM_SYNDROME_SHIFT 32 91 92 #define CPU_GENERATION_DONT_CARE 0 93 #define CPU_GENERATION_NEHALEM_EP 1 94 95 #define INTEL_NEHALEM_CPU_FAMILY_ID 0x6 96 #define INTEL_NEHALEM_CPU_MODEL_ID 0x1A 97 98 #define NEHALEM_EP_MEMORY_CONTROLLER_DEV 0x3 99 #define NEHALEM_EP_MEMORY_CONTROLLER_FUNC 0x2 100 101 /*ARGSUSED*/ 102 int 103 gintel_init(cmi_hdl_t hdl, void **datap) 104 { 105 uint32_t nb_chipset; 106 107 if (gintel_ms_support_disable) 108 return (ENOTSUP); 109 110 if (!(x86_feature & X86_MCA)) 111 return (ENOTSUP); 112 113 nb_chipset = (*pci_getl_func)(0, 0, 0, 0x0); 114 switch (nb_chipset) { 115 case INTEL_NB_7300: 116 case INTEL_NB_5000P: 117 case INTEL_NB_5000X: 118 case INTEL_NB_5000V: 119 case INTEL_NB_5000Z: 120 case INTEL_NB_5400: 121 case INTEL_NB_5400A: 122 case INTEL_NB_5400B: 123 if (!gintel_ms_unconstrained) 124 gintel_error_action_return |= CMS_ERRSCOPE_POISONED; 125 break; 126 case INTEL_QP_IO: 127 case INTEL_QP_36D: 128 case INTEL_QP_24D: 129 quickpath = 1; 130 break; 131 default: 132 break; 133 } 134 return (0); 135 } 136 137 /*ARGSUSED*/ 138 uint32_t 139 gintel_error_action(cmi_hdl_t hdl, int ismc, int bank, 140 uint64_t status, uint64_t addr, uint64_t misc, void *mslogout) 141 { 142 if ((status & MSR_MC_STATUS_PCC) == 0) 143 return (gintel_error_action_return); 144 else 145 return (gintel_error_action_return & ~CMS_ERRSCOPE_POISONED); 146 } 147 148 /*ARGSUSED*/ 149 cms_cookie_t 150 gintel_disp_match(cmi_hdl_t hdl, int bank, uint64_t status, 151 uint64_t addr, uint64_t misc, void *mslogout) 152 { 153 cms_cookie_t rt = (cms_cookie_t)NULL; 154 uint16_t mcacode = MCAX86_ERRCODE(status); 155 uint16_t mscode = MCAX86_MSERRCODE(status); 156 157 if (MCAX86_ERRCODE_ISMEMORY_CONTROLLER(mcacode)) { 158 /* 159 * memory controller errors 160 */ 161 if (mscode & MSCOD_MEM_SPARE_MEM) { 162 rt = (cms_cookie_t)GINTEL_ERR_SPARE_MEM; 163 } else if (mscode & (MSCOD_MEM_ECC_READ | 164 MSCOD_MEM_ECC_SCRUB)) { 165 if (status & MSR_MC_STATUS_UC) 166 rt = (cms_cookie_t)GINTEL_ERR_MEM_UE; 167 else 168 rt = (cms_cookie_t)GINTEL_ERR_MEM_CE; 169 } else if (mscode & (MSCOD_MEM_WR_PARITY | 170 MSCOD_MEM_BYTE_PARITY)) { 171 rt = (cms_cookie_t)GINTEL_ERR_MEM_PARITY; 172 } else if (mscode & MSCOD_MEM_ADDR_PARITY) { 173 rt = (cms_cookie_t)GINTEL_ERR_MEM_ADDR_PARITY; 174 } else if (mscode & MSCOD_MEM_REDUNDANT_MEM) { 175 rt = (cms_cookie_t)GINTEL_ERR_MEM_REDUNDANT; 176 } else if (mscode & MSCOD_MEM_ILLEGAL_ADDR) { 177 rt = (cms_cookie_t)GINTEL_ERR_MEM_BAD_ADDR; 178 } else if (mscode & MSCOD_MEM_BAD_ID) { 179 rt = (cms_cookie_t)GINTEL_ERR_MEM_BAD_ID; 180 } else { 181 rt = (cms_cookie_t)GINTEL_ERR_MEM_UNKNOWN; 182 } 183 } else if (quickpath && 184 MCAX86_ERRCODE_ISBUS_INTERCONNECT(MCAX86_ERRCODE(status))) { 185 rt = (cms_cookie_t)GINTEL_ERROR_QUICKPATH; 186 } 187 return (rt); 188 } 189 190 /*ARGSUSED*/ 191 void 192 gintel_ereport_class(cmi_hdl_t hdl, cms_cookie_t mscookie, 193 const char **cpuclsp, const char **leafclsp) 194 { 195 *cpuclsp = FM_EREPORT_CPU_INTEL; 196 switch ((uintptr_t)mscookie) { 197 case GINTEL_ERROR_QUICKPATH: 198 *leafclsp = "quickpath.interconnect"; 199 break; 200 case GINTEL_ERR_SPARE_MEM: 201 *leafclsp = "quickpath.mem_spare"; 202 break; 203 case GINTEL_ERR_MEM_UE: 204 *leafclsp = "quickpath.mem_ue"; 205 break; 206 case GINTEL_ERR_MEM_CE: 207 *leafclsp = "quickpath.mem_ce"; 208 break; 209 case GINTEL_ERR_MEM_PARITY: 210 *leafclsp = "quickpath.mem_parity"; 211 break; 212 case GINTEL_ERR_MEM_ADDR_PARITY: 213 *leafclsp = "quickpath.mem_addr_parity"; 214 break; 215 case GINTEL_ERR_MEM_REDUNDANT: 216 *leafclsp = "quickpath.mem_redundant"; 217 break; 218 case GINTEL_ERR_MEM_BAD_ADDR: 219 *leafclsp = "quickpath.mem_bad_addr"; 220 break; 221 case GINTEL_ERR_MEM_BAD_ID: 222 *leafclsp = "quickpath.mem_bad_id"; 223 break; 224 case GINTEL_ERR_MEM_UNKNOWN: 225 *leafclsp = "quickpath.mem_unknown"; 226 break; 227 } 228 } 229 230 nvlist_t * 231 gintel_ereport_detector(cmi_hdl_t hdl, cms_cookie_t mscookie, nv_alloc_t *nva) 232 { 233 nvlist_t *nvl = (nvlist_t *)NULL; 234 235 if (mscookie) { 236 if ((nvl = fm_nvlist_create(nva)) == NULL) 237 return (NULL); 238 if ((uintptr_t)mscookie & GINTEL_ERROR_QUICKPATH) { 239 fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, NULL, 2, 240 "motherboard", 0, 241 "chip", cmi_hdl_chipid(hdl)); 242 } else { 243 fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, NULL, 3, 244 "motherboard", 0, 245 "chip", cmi_hdl_chipid(hdl), 246 "memory-controller", 0); 247 } 248 } 249 return (nvl); 250 } 251 252 static nvlist_t * 253 gintel_ereport_create_resource_elem(nv_alloc_t *nva, mc_unum_t *unump) 254 { 255 nvlist_t *nvl, *snvl; 256 257 if ((nvl = fm_nvlist_create(nva)) == NULL) /* freed by caller */ 258 return (NULL); 259 260 if ((snvl = fm_nvlist_create(nva)) == NULL) { 261 fm_nvlist_destroy(nvl, nva ? FM_NVA_RETAIN : FM_NVA_FREE); 262 return (NULL); 263 } 264 265 (void) nvlist_add_uint64(snvl, FM_FMRI_HC_SPECIFIC_OFFSET, 266 unump->unum_offset); 267 268 if (unump->unum_chan == -1) { 269 fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, snvl, 3, 270 "motherboard", unump->unum_board, 271 "chip", unump->unum_chip, 272 "memory-controller", unump->unum_mc); 273 } else if (unump->unum_cs == -1) { 274 fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, snvl, 4, 275 "motherboard", unump->unum_board, 276 "chip", unump->unum_chip, 277 "memory-controller", unump->unum_mc, 278 "dram-channel", unump->unum_chan); 279 } else if (unump->unum_rank == -1) { 280 fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, snvl, 5, 281 "motherboard", unump->unum_board, 282 "chip", unump->unum_chip, 283 "memory-controller", unump->unum_mc, 284 "dram-channel", unump->unum_chan, 285 "dimm", unump->unum_cs); 286 } else { 287 fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, snvl, 6, 288 "motherboard", unump->unum_board, 289 "chip", unump->unum_chip, 290 "memory-controller", unump->unum_mc, 291 "dram-channel", unump->unum_chan, 292 "dimm", unump->unum_cs, 293 "rank", unump->unum_rank); 294 } 295 296 fm_nvlist_destroy(snvl, nva ? FM_NVA_RETAIN : FM_NVA_FREE); 297 298 return (nvl); 299 } 300 301 static void 302 nehalem_ep_ereport_add_memory_error_counter(uint_t chipid, 303 uint32_t *this_err_counter_array) 304 { 305 int index; 306 307 for (index = 0; index < N_MC_COR_ECC_CNT; index ++) 308 this_err_counter_array[index] = MC_COR_ECC_CNT(chipid, index); 309 } 310 311 static int 312 gintel_cpu_generation(cmi_hdl_t hdl) 313 { 314 int cpu_generation = CPU_GENERATION_DONT_CARE; 315 316 if ((cmi_hdl_family(hdl) == INTEL_NEHALEM_CPU_FAMILY_ID) && 317 (cmi_hdl_model(hdl) == INTEL_NEHALEM_CPU_MODEL_ID)) 318 cpu_generation = CPU_GENERATION_NEHALEM_EP; 319 320 return (cpu_generation); 321 } 322 323 /*ARGSUSED*/ 324 void 325 gintel_ereport_add_logout(cmi_hdl_t hdl, nvlist_t *ereport, 326 nv_alloc_t *nva, int banknum, uint64_t status, uint64_t addr, 327 uint64_t misc, void *mslogout, cms_cookie_t mscookie) 328 { 329 mc_unum_t unum; 330 nvlist_t *resource; 331 uint32_t synd = 0; 332 int chan = MCAX86_ERRCODE_CCCC(status); 333 uint8_t last_index, this_index; 334 int chipid; 335 336 if (chan == 0xf) 337 chan = -1; 338 339 if ((uintptr_t)mscookie & GINTEL_ERROR_MEM) { 340 unum.unum_board = 0; 341 unum.unum_chip = cmi_hdl_chipid(hdl); 342 unum.unum_mc = 0; 343 unum.unum_chan = chan; 344 unum.unum_cs = -1; 345 unum.unum_rank = -1; 346 unum.unum_offset = -1ULL; 347 if (status & MSR_MC_STATUS_MISCV) { 348 unum.unum_chan = 349 (misc & MSR_MC_MISC_MEM_CHANNEL_MASK) >> 350 MSR_MC_MISC_MEM_CHANNEL_SHIFT; 351 unum.unum_cs = 352 (misc & MSR_MC_MISC_MEM_DIMM_MASK) >> 353 MSR_MC_MISC_MEM_DIMM_SHIFT; 354 synd = (misc & MSR_MC_MISC_MEM_SYNDROME_MASK) >> 355 MSR_MC_MISC_MEM_SYNDROME_SHIFT; 356 fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ECC_SYND, 357 DATA_TYPE_UINT32, synd, 0); 358 } 359 if (status & MSR_MC_STATUS_ADDRV) { 360 fm_payload_set(ereport, FM_FMRI_MEM_PHYSADDR, 361 DATA_TYPE_UINT64, addr, NULL); 362 (void) cmi_mc_patounum(addr, 0, 0, synd, 0, &unum); 363 } 364 resource = gintel_ereport_create_resource_elem(nva, &unum); 365 fm_payload_set(ereport, FM_EREPORT_PAYLOAD_NAME_RESOURCE, 366 DATA_TYPE_NVLIST_ARRAY, 1, &resource, NULL); 367 fm_nvlist_destroy(resource, nva ? FM_NVA_RETAIN:FM_NVA_FREE); 368 369 if (gintel_cpu_generation(hdl) == CPU_GENERATION_NEHALEM_EP) { 370 371 chipid = unum.unum_chip; 372 if (chipid < MAX_CPU_NODES) { 373 last_index = err_counter_index[chipid]; 374 this_index = 375 (last_index + 1) % ERR_COUNTER_INDEX; 376 err_counter_index[chipid] = this_index; 377 nehalem_ep_ereport_add_memory_error_counter( 378 chipid, 379 err_counter_array[chipid][this_index]); 380 fm_payload_set(ereport, 381 FM_EREPORT_PAYLOAD_MEM_ECC_COUNTER_THIS, 382 DATA_TYPE_UINT32_ARRAY, N_MC_COR_ECC_CNT, 383 err_counter_array[chipid][this_index], 384 NULL); 385 fm_payload_set(ereport, 386 FM_EREPORT_PAYLOAD_MEM_ECC_COUNTER_LAST, 387 DATA_TYPE_UINT32_ARRAY, N_MC_COR_ECC_CNT, 388 err_counter_array[chipid][last_index], 389 NULL); 390 } 391 } 392 } 393 } 394 395 boolean_t 396 gintel_bankctl_skipinit(cmi_hdl_t hdl, int banknum) 397 { 398 /* 399 * On Intel family 6 before QuickPath we must not enable machine check 400 * from bank 0 detectors. bank 0 is reserved for the platform 401 */ 402 403 if (banknum == 0 && 404 cmi_hdl_family(hdl) == INTEL_NEHALEM_CPU_FAMILY_ID && 405 cmi_hdl_model(hdl) < INTEL_NEHALEM_CPU_MODEL_ID) 406 return (1); 407 else 408 return (0); 409 } 410 411 cms_api_ver_t _cms_api_version = CMS_API_VERSION_0; 412 413 const cms_ops_t _cms_ops = { 414 gintel_init, /* cms_init */ 415 NULL, /* cms_post_startup */ 416 NULL, /* cms_post_mpstartup */ 417 NULL, /* cms_logout_size */ 418 NULL, /* cms_mcgctl_val */ 419 gintel_bankctl_skipinit, /* cms_bankctl_skipinit */ 420 NULL, /* cms_bankctl_val */ 421 NULL, /* cms_bankstatus_skipinit */ 422 NULL, /* cms_bankstatus_val */ 423 NULL, /* cms_mca_init */ 424 NULL, /* cms_poll_ownermask */ 425 NULL, /* cms_bank_logout */ 426 gintel_error_action, /* cms_error_action */ 427 gintel_disp_match, /* cms_disp_match */ 428 gintel_ereport_class, /* cms_ereport_class */ 429 gintel_ereport_detector, /* cms_ereport_detector */ 430 NULL, /* cms_ereport_includestack */ 431 gintel_ereport_add_logout, /* cms_ereport_add_logout */ 432 NULL, /* cms_msrinject */ 433 NULL, /* cms_fini */ 434 }; 435 436 static struct modlcpu modlcpu = { 437 &mod_cpuops, 438 "Generic Intel model-specific MCA" 439 }; 440 441 static struct modlinkage modlinkage = { 442 MODREV_1, 443 (void *)&modlcpu, 444 NULL 445 }; 446 447 int 448 _init(void) 449 { 450 return (mod_install(&modlinkage)); 451 } 452 453 int 454 _info(struct modinfo *modinfop) 455 { 456 return (mod_info(&modlinkage, modinfop)); 457 } 458 459 int 460 _fini(void) 461 { 462 return (mod_remove(&modlinkage)); 463 } 464