xref: /illumos-gate/usr/src/uts/i86pc/cpu/genuineintel/gintel_main.c (revision c90a5fbe436d54a1d69dbcf41c7f1cc7b2834a18)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Intel model-specific support.  Right now all this conists of is
29  * to modify the ereport subclass to produce different ereport classes
30  * so that we can have different diagnosis rules and corresponding faults.
31  */
32 
33 #include <sys/types.h>
34 #include <sys/cmn_err.h>
35 #include <sys/modctl.h>
36 #include <sys/mca_x86.h>
37 #include <sys/cpu_module_ms_impl.h>
38 #include <sys/mc_intel.h>
39 #include <sys/pci_cfgspace.h>
40 #include <sys/fm/protocol.h>
41 
42 int gintel_ms_support_disable = 0;
43 int gintel_error_action_return = 0;
44 int gintel_ms_unconstrained = 0;
45 
46 int quickpath;
47 int max_bus_number = 0xff;
48 
49 #define	ERR_COUNTER_INDEX	2
50 #define	MAX_CPU_NODES		2
51 #define	N_MC_COR_ECC_CNT	6
52 uint32_t err_counter_array[MAX_CPU_NODES][ERR_COUNTER_INDEX][N_MC_COR_ECC_CNT];
53 uint8_t	err_counter_index[MAX_CPU_NODES];
54 
55 #define	MAX_BUS_NUMBER  max_bus_number
56 #define	SOCKET_BUS(cpu) (MAX_BUS_NUMBER - (cpu))
57 
58 #define	MC_COR_ECC_CNT(chipid, reg)	(*pci_getl_func)(SOCKET_BUS(chipid), \
59     NEHALEM_EP_MEMORY_CONTROLLER_DEV, NEHALEM_EP_MEMORY_CONTROLLER_FUNC, \
60     0x80 + (reg) * 4)
61 
62 #define	MSCOD_MEM_ECC_READ	0x1
63 #define	MSCOD_MEM_ECC_SCRUB	0x2
64 #define	MSCOD_MEM_WR_PARITY	0x4
65 #define	MSCOD_MEM_REDUNDANT_MEM	0x8
66 #define	MSCOD_MEM_SPARE_MEM	0x10
67 #define	MSCOD_MEM_ILLEGAL_ADDR	0x20
68 #define	MSCOD_MEM_BAD_ID	0x40
69 #define	MSCOD_MEM_ADDR_PARITY	0x80
70 #define	MSCOD_MEM_BYTE_PARITY	0x100
71 
72 #define	GINTEL_ERROR_MEM	0x1000
73 #define	GINTEL_ERROR_QUICKPATH	0x2000
74 
75 #define	GINTEL_ERR_SPARE_MEM	(GINTEL_ERROR_MEM | 1)
76 #define	GINTEL_ERR_MEM_UE	(GINTEL_ERROR_MEM | 2)
77 #define	GINTEL_ERR_MEM_CE	(GINTEL_ERROR_MEM | 3)
78 #define	GINTEL_ERR_MEM_PARITY	(GINTEL_ERROR_MEM | 4)
79 #define	GINTEL_ERR_MEM_ADDR_PARITY	(GINTEL_ERROR_MEM | 5)
80 #define	GINTEL_ERR_MEM_REDUNDANT (GINTEL_ERROR_MEM | 6)
81 #define	GINTEL_ERR_MEM_BAD_ADDR	(GINTEL_ERROR_MEM | 7)
82 #define	GINTEL_ERR_MEM_BAD_ID	(GINTEL_ERROR_MEM | 8)
83 #define	GINTEL_ERR_MEM_UNKNOWN	(GINTEL_ERROR_MEM | 0xfff)
84 
85 #define	MSR_MC_MISC_MEM_CHANNEL_MASK	0x00000000000c0000ULL
86 #define	MSR_MC_MISC_MEM_CHANNEL_SHIFT	18
87 #define	MSR_MC_MISC_MEM_DIMM_MASK	0x0000000000030000ULL
88 #define	MSR_MC_MISC_MEM_DIMM_SHIFT	16
89 #define	MSR_MC_MISC_MEM_SYNDROME_MASK	0xffffffff00000000ULL
90 #define	MSR_MC_MISC_MEM_SYNDROME_SHIFT	32
91 
92 #define	CPU_GENERATION_DONT_CARE	0
93 #define	CPU_GENERATION_NEHALEM_EP	1
94 
95 #define	INTEL_NEHALEM_CPU_FAMILY_ID	0x6
96 #define	INTEL_NEHALEM_CPU_MODEL_ID	0x1A
97 
98 #define	NEHALEM_EP_MEMORY_CONTROLLER_DEV	0x3
99 #define	NEHALEM_EP_MEMORY_CONTROLLER_FUNC	0x2
100 
101 /*ARGSUSED*/
102 int
103 gintel_init(cmi_hdl_t hdl, void **datap)
104 {
105 	uint32_t nb_chipset;
106 
107 	if (gintel_ms_support_disable)
108 		return (ENOTSUP);
109 
110 	if (!(x86_feature & X86_MCA))
111 		return (ENOTSUP);
112 
113 	nb_chipset = (*pci_getl_func)(0, 0, 0, 0x0);
114 	switch (nb_chipset) {
115 	case INTEL_NB_7300:
116 	case INTEL_NB_5000P:
117 	case INTEL_NB_5000X:
118 	case INTEL_NB_5000V:
119 	case INTEL_NB_5000Z:
120 	case INTEL_NB_5400:
121 	case INTEL_NB_5400A:
122 	case INTEL_NB_5400B:
123 		if (!gintel_ms_unconstrained)
124 			gintel_error_action_return |= CMS_ERRSCOPE_POISONED;
125 		break;
126 	case INTEL_QP_IO:
127 	case INTEL_QP_WP:
128 	case INTEL_QP_36D:
129 	case INTEL_QP_24D:
130 	case INTEL_QP_U1:
131 	case INTEL_QP_U2:
132 	case INTEL_QP_U3:
133 	case INTEL_QP_U4:
134 	case INTEL_QP_JF:
135 	case INTEL_QP_JF0:
136 	case INTEL_QP_JF1:
137 	case INTEL_QP_JF2:
138 	case INTEL_QP_JF3:
139 	case INTEL_QP_JF4:
140 	case INTEL_QP_JF5:
141 	case INTEL_QP_JF6:
142 	case INTEL_QP_JF7:
143 	case INTEL_QP_JF8:
144 	case INTEL_QP_JF9:
145 	case INTEL_QP_JFa:
146 	case INTEL_QP_JFb:
147 	case INTEL_QP_JFc:
148 	case INTEL_QP_JFd:
149 	case INTEL_QP_JFe:
150 	case INTEL_QP_JFf:
151 		quickpath = 1;
152 		break;
153 	default:
154 		break;
155 	}
156 	return (0);
157 }
158 
159 /*ARGSUSED*/
160 uint32_t
161 gintel_error_action(cmi_hdl_t hdl, int ismc, int bank,
162     uint64_t status, uint64_t addr, uint64_t misc, void *mslogout)
163 {
164 	if ((status & MSR_MC_STATUS_PCC) == 0)
165 		return (gintel_error_action_return);
166 	else
167 		return (gintel_error_action_return & ~CMS_ERRSCOPE_POISONED);
168 }
169 
170 /*ARGSUSED*/
171 cms_cookie_t
172 gintel_disp_match(cmi_hdl_t hdl, int bank, uint64_t status,
173     uint64_t addr, uint64_t misc, void *mslogout)
174 {
175 	cms_cookie_t rt = (cms_cookie_t)NULL;
176 	uint16_t mcacode = MCAX86_ERRCODE(status);
177 	uint16_t mscode = MCAX86_MSERRCODE(status);
178 
179 	if (MCAX86_ERRCODE_ISMEMORY_CONTROLLER(mcacode)) {
180 		/*
181 		 * memory controller errors
182 		 */
183 		if (mscode & MSCOD_MEM_SPARE_MEM) {
184 			rt = (cms_cookie_t)GINTEL_ERR_SPARE_MEM;
185 		} else if (mscode & (MSCOD_MEM_ECC_READ |
186 		    MSCOD_MEM_ECC_SCRUB)) {
187 			if (status & MSR_MC_STATUS_UC)
188 				rt = (cms_cookie_t)GINTEL_ERR_MEM_UE;
189 			else
190 				rt = (cms_cookie_t)GINTEL_ERR_MEM_CE;
191 		} else if (mscode & (MSCOD_MEM_WR_PARITY |
192 		    MSCOD_MEM_BYTE_PARITY)) {
193 			rt = (cms_cookie_t)GINTEL_ERR_MEM_PARITY;
194 		} else if (mscode & MSCOD_MEM_ADDR_PARITY) {
195 			rt = (cms_cookie_t)GINTEL_ERR_MEM_ADDR_PARITY;
196 		} else if (mscode & MSCOD_MEM_REDUNDANT_MEM) {
197 			rt = (cms_cookie_t)GINTEL_ERR_MEM_REDUNDANT;
198 		} else if (mscode & MSCOD_MEM_ILLEGAL_ADDR) {
199 			rt = (cms_cookie_t)GINTEL_ERR_MEM_BAD_ADDR;
200 		} else if (mscode & MSCOD_MEM_BAD_ID) {
201 			rt = (cms_cookie_t)GINTEL_ERR_MEM_BAD_ID;
202 		} else {
203 			rt = (cms_cookie_t)GINTEL_ERR_MEM_UNKNOWN;
204 		}
205 	} else if (quickpath &&
206 	    MCAX86_ERRCODE_ISBUS_INTERCONNECT(MCAX86_ERRCODE(status))) {
207 		rt = (cms_cookie_t)GINTEL_ERROR_QUICKPATH;
208 	}
209 	return (rt);
210 }
211 
212 /*ARGSUSED*/
213 void
214 gintel_ereport_class(cmi_hdl_t hdl, cms_cookie_t mscookie,
215     const char **cpuclsp, const char **leafclsp)
216 {
217 	*cpuclsp = FM_EREPORT_CPU_INTEL;
218 	switch ((uintptr_t)mscookie) {
219 	case GINTEL_ERROR_QUICKPATH:
220 		*leafclsp = "quickpath.interconnect";
221 		break;
222 	case GINTEL_ERR_SPARE_MEM:
223 		*leafclsp = "quickpath.mem_spare";
224 		break;
225 	case GINTEL_ERR_MEM_UE:
226 		*leafclsp = "quickpath.mem_ue";
227 		break;
228 	case GINTEL_ERR_MEM_CE:
229 		*leafclsp = "quickpath.mem_ce";
230 		break;
231 	case GINTEL_ERR_MEM_PARITY:
232 		*leafclsp = "quickpath.mem_parity";
233 		break;
234 	case GINTEL_ERR_MEM_ADDR_PARITY:
235 		*leafclsp = "quickpath.mem_addr_parity";
236 		break;
237 	case GINTEL_ERR_MEM_REDUNDANT:
238 		*leafclsp = "quickpath.mem_redundant";
239 		break;
240 	case GINTEL_ERR_MEM_BAD_ADDR:
241 		*leafclsp = "quickpath.mem_bad_addr";
242 		break;
243 	case GINTEL_ERR_MEM_BAD_ID:
244 		*leafclsp = "quickpath.mem_bad_id";
245 		break;
246 	case GINTEL_ERR_MEM_UNKNOWN:
247 		*leafclsp = "quickpath.mem_unknown";
248 		break;
249 	}
250 }
251 
252 nvlist_t *
253 gintel_ereport_detector(cmi_hdl_t hdl, cms_cookie_t mscookie, nv_alloc_t *nva)
254 {
255 	nvlist_t *nvl = (nvlist_t *)NULL;
256 
257 	if (mscookie) {
258 		if ((nvl = fm_nvlist_create(nva)) == NULL)
259 			return (NULL);
260 		if ((uintptr_t)mscookie & GINTEL_ERROR_QUICKPATH) {
261 			fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, NULL, 2,
262 			    "motherboard", 0,
263 			    "chip", cmi_hdl_chipid(hdl));
264 		} else {
265 			fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, NULL, 3,
266 			    "motherboard", 0,
267 			    "chip", cmi_hdl_chipid(hdl),
268 			    "memory-controller", 0);
269 		}
270 	}
271 	return (nvl);
272 }
273 
274 static nvlist_t *
275 gintel_ereport_create_resource_elem(nv_alloc_t *nva, mc_unum_t *unump)
276 {
277 	nvlist_t *nvl, *snvl;
278 
279 	if ((nvl = fm_nvlist_create(nva)) == NULL)	/* freed by caller */
280 		return (NULL);
281 
282 	if ((snvl = fm_nvlist_create(nva)) == NULL) {
283 		fm_nvlist_destroy(nvl, nva ? FM_NVA_RETAIN : FM_NVA_FREE);
284 		return (NULL);
285 	}
286 
287 	(void) nvlist_add_uint64(snvl, FM_FMRI_HC_SPECIFIC_OFFSET,
288 	    unump->unum_offset);
289 
290 	if (unump->unum_chan == -1) {
291 		fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, snvl, 3,
292 		    "motherboard", unump->unum_board,
293 		    "chip", unump->unum_chip,
294 		    "memory-controller", unump->unum_mc);
295 	} else if (unump->unum_cs == -1) {
296 		fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, snvl, 4,
297 		    "motherboard", unump->unum_board,
298 		    "chip", unump->unum_chip,
299 		    "memory-controller", unump->unum_mc,
300 		    "dram-channel", unump->unum_chan);
301 	} else if (unump->unum_rank == -1) {
302 		fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, snvl, 5,
303 		    "motherboard", unump->unum_board,
304 		    "chip", unump->unum_chip,
305 		    "memory-controller", unump->unum_mc,
306 		    "dram-channel", unump->unum_chan,
307 		    "dimm", unump->unum_cs);
308 	} else {
309 		fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, snvl, 6,
310 		    "motherboard", unump->unum_board,
311 		    "chip", unump->unum_chip,
312 		    "memory-controller", unump->unum_mc,
313 		    "dram-channel", unump->unum_chan,
314 		    "dimm", unump->unum_cs,
315 		    "rank", unump->unum_rank);
316 	}
317 
318 	fm_nvlist_destroy(snvl, nva ? FM_NVA_RETAIN : FM_NVA_FREE);
319 
320 	return (nvl);
321 }
322 
323 static void
324 nehalem_ep_ereport_add_memory_error_counter(uint_t  chipid,
325     uint32_t *this_err_counter_array)
326 {
327 	int	index;
328 
329 	for (index = 0; index < N_MC_COR_ECC_CNT; index ++)
330 		this_err_counter_array[index] = MC_COR_ECC_CNT(chipid, index);
331 }
332 
333 static int
334 gintel_cpu_generation(cmi_hdl_t hdl)
335 {
336 	int	cpu_generation = CPU_GENERATION_DONT_CARE;
337 
338 	if ((cmi_hdl_family(hdl) == INTEL_NEHALEM_CPU_FAMILY_ID) &&
339 	    (cmi_hdl_model(hdl) == INTEL_NEHALEM_CPU_MODEL_ID))
340 		cpu_generation = CPU_GENERATION_NEHALEM_EP;
341 
342 	return (cpu_generation);
343 }
344 
345 /*ARGSUSED*/
346 void
347 gintel_ereport_add_logout(cmi_hdl_t hdl, nvlist_t *ereport,
348     nv_alloc_t *nva, int banknum, uint64_t status, uint64_t addr,
349     uint64_t misc, void *mslogout, cms_cookie_t mscookie)
350 {
351 	mc_unum_t unum;
352 	nvlist_t *resource;
353 	uint32_t synd = 0;
354 	int  chan = MCAX86_ERRCODE_CCCC(status);
355 	uint8_t last_index, this_index;
356 	int chipid;
357 
358 	if (chan == 0xf)
359 		chan = -1;
360 
361 	if ((uintptr_t)mscookie & GINTEL_ERROR_MEM) {
362 		unum.unum_board = 0;
363 		unum.unum_chip = cmi_hdl_chipid(hdl);
364 		unum.unum_mc = 0;
365 		unum.unum_chan = chan;
366 		unum.unum_cs = -1;
367 		unum.unum_rank = -1;
368 		unum.unum_offset = -1ULL;
369 		if (status & MSR_MC_STATUS_MISCV) {
370 			unum.unum_chan =
371 			    (misc & MSR_MC_MISC_MEM_CHANNEL_MASK) >>
372 			    MSR_MC_MISC_MEM_CHANNEL_SHIFT;
373 			unum.unum_cs =
374 			    (misc & MSR_MC_MISC_MEM_DIMM_MASK) >>
375 			    MSR_MC_MISC_MEM_DIMM_SHIFT;
376 			synd = (misc & MSR_MC_MISC_MEM_SYNDROME_MASK) >>
377 			    MSR_MC_MISC_MEM_SYNDROME_SHIFT;
378 			fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ECC_SYND,
379 			    DATA_TYPE_UINT32, synd, 0);
380 		}
381 		if (status & MSR_MC_STATUS_ADDRV) {
382 			fm_payload_set(ereport, FM_FMRI_MEM_PHYSADDR,
383 			    DATA_TYPE_UINT64, addr, NULL);
384 			(void) cmi_mc_patounum(addr, 0, 0, synd, 0, &unum);
385 		}
386 		resource = gintel_ereport_create_resource_elem(nva, &unum);
387 		fm_payload_set(ereport, FM_EREPORT_PAYLOAD_NAME_RESOURCE,
388 		    DATA_TYPE_NVLIST_ARRAY, 1, &resource, NULL);
389 		fm_nvlist_destroy(resource, nva ? FM_NVA_RETAIN:FM_NVA_FREE);
390 
391 		if (gintel_cpu_generation(hdl) == CPU_GENERATION_NEHALEM_EP) {
392 
393 			chipid = unum.unum_chip;
394 			if (chipid < MAX_CPU_NODES) {
395 				last_index = err_counter_index[chipid];
396 				this_index =
397 				    (last_index + 1) % ERR_COUNTER_INDEX;
398 				err_counter_index[chipid] = this_index;
399 				nehalem_ep_ereport_add_memory_error_counter(
400 				    chipid,
401 				    err_counter_array[chipid][this_index]);
402 				fm_payload_set(ereport,
403 				    FM_EREPORT_PAYLOAD_MEM_ECC_COUNTER_THIS,
404 				    DATA_TYPE_UINT32_ARRAY, N_MC_COR_ECC_CNT,
405 				    err_counter_array[chipid][this_index],
406 				    NULL);
407 				fm_payload_set(ereport,
408 				    FM_EREPORT_PAYLOAD_MEM_ECC_COUNTER_LAST,
409 				    DATA_TYPE_UINT32_ARRAY, N_MC_COR_ECC_CNT,
410 				    err_counter_array[chipid][last_index],
411 				    NULL);
412 			}
413 		}
414 	}
415 }
416 
417 boolean_t
418 gintel_bankctl_skipinit(cmi_hdl_t hdl, int banknum)
419 {
420 	/*
421 	 * On Intel family 6 before QuickPath we must not enable machine check
422 	 * from bank 0 detectors. bank 0 is reserved for the platform
423 	 */
424 
425 	if (banknum == 0 &&
426 	    cmi_hdl_family(hdl) == INTEL_NEHALEM_CPU_FAMILY_ID &&
427 	    cmi_hdl_model(hdl) < INTEL_NEHALEM_CPU_MODEL_ID)
428 		return (1);
429 	else
430 		return (0);
431 }
432 
433 cms_api_ver_t _cms_api_version = CMS_API_VERSION_0;
434 
435 const cms_ops_t _cms_ops = {
436 	gintel_init,		/* cms_init */
437 	NULL,			/* cms_post_startup */
438 	NULL,			/* cms_post_mpstartup */
439 	NULL,			/* cms_logout_size */
440 	NULL,			/* cms_mcgctl_val */
441 	gintel_bankctl_skipinit, /* cms_bankctl_skipinit */
442 	NULL,			/* cms_bankctl_val */
443 	NULL,			/* cms_bankstatus_skipinit */
444 	NULL,			/* cms_bankstatus_val */
445 	NULL,			/* cms_mca_init */
446 	NULL,			/* cms_poll_ownermask */
447 	NULL,			/* cms_bank_logout */
448 	gintel_error_action,	/* cms_error_action */
449 	gintel_disp_match,	/* cms_disp_match */
450 	gintel_ereport_class,	/* cms_ereport_class */
451 	gintel_ereport_detector,	/* cms_ereport_detector */
452 	NULL,			/* cms_ereport_includestack */
453 	gintel_ereport_add_logout,	/* cms_ereport_add_logout */
454 	NULL,			/* cms_msrinject */
455 	NULL,			/* cms_fini */
456 };
457 
458 static struct modlcpu modlcpu = {
459 	&mod_cpuops,
460 	"Generic Intel model-specific MCA"
461 };
462 
463 static struct modlinkage modlinkage = {
464 	MODREV_1,
465 	(void *)&modlcpu,
466 	NULL
467 };
468 
469 int
470 _init(void)
471 {
472 	return (mod_install(&modlinkage));
473 }
474 
475 int
476 _info(struct modinfo *modinfop)
477 {
478 	return (mod_info(&modlinkage, modinfop));
479 }
480 
481 int
482 _fini(void)
483 {
484 	return (mod_remove(&modlinkage));
485 }
486