xref: /titanic_44/usr/src/uts/i86pc/cpu/genuineintel/gintel_main.c (revision 530f2c280d739b194cfbb75f25352b75bb99b4b2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Intel model-specific support.  Right now all this conists of is
29  * to modify the ereport subclass to produce different ereport classes
30  * so that we can have different diagnosis rules and corresponding faults.
31  */
32 
33 #include <sys/types.h>
34 #include <sys/cmn_err.h>
35 #include <sys/modctl.h>
36 #include <sys/mca_x86.h>
37 #include <sys/cpu_module_ms_impl.h>
38 #include <sys/mc_intel.h>
39 #include <sys/pci_cfgspace.h>
40 #include <sys/fm/protocol.h>
41 
42 int gintel_ms_support_disable = 0;
43 int gintel_error_action_return = 0;
44 int gintel_ms_unconstrained = 0;
45 
46 int quickpath;
47 int max_bus_number = 0xff;
48 
49 #define	ERR_COUNTER_INDEX	2
50 #define	MAX_CPU_NODES		2
51 #define	N_MC_COR_ECC_CNT	6
52 uint32_t err_counter_array[MAX_CPU_NODES][ERR_COUNTER_INDEX][N_MC_COR_ECC_CNT];
53 uint8_t	err_counter_index[MAX_CPU_NODES];
54 
55 #define	MAX_BUS_NUMBER  max_bus_number
56 #define	SOCKET_BUS(cpu) (MAX_BUS_NUMBER - (cpu))
57 
58 #define	MC_COR_ECC_CNT(chipid, reg)	(*pci_getl_func)(SOCKET_BUS(chipid), \
59     NEHALEM_EP_MEMORY_CONTROLLER_DEV, NEHALEM_EP_MEMORY_CONTROLLER_FUNC, \
60     0x80 + (reg) * 4)
61 
62 #define	MSCOD_MEM_ECC_READ	0x1
63 #define	MSCOD_MEM_ECC_SCRUB	0x2
64 #define	MSCOD_MEM_WR_PARITY	0x4
65 #define	MSCOD_MEM_REDUNDANT_MEM	0x8
66 #define	MSCOD_MEM_SPARE_MEM	0x10
67 #define	MSCOD_MEM_ILLEGAL_ADDR	0x20
68 #define	MSCOD_MEM_BAD_ID	0x40
69 #define	MSCOD_MEM_ADDR_PARITY	0x80
70 #define	MSCOD_MEM_BYTE_PARITY	0x100
71 
72 #define	GINTEL_ERROR_MEM	0x1000
73 #define	GINTEL_ERROR_QUICKPATH	0x2000
74 
75 #define	GINTEL_ERR_SPARE_MEM	(GINTEL_ERROR_MEM | 1)
76 #define	GINTEL_ERR_MEM_UE	(GINTEL_ERROR_MEM | 2)
77 #define	GINTEL_ERR_MEM_CE	(GINTEL_ERROR_MEM | 3)
78 #define	GINTEL_ERR_MEM_PARITY	(GINTEL_ERROR_MEM | 4)
79 #define	GINTEL_ERR_MEM_ADDR_PARITY	(GINTEL_ERROR_MEM | 5)
80 #define	GINTEL_ERR_MEM_REDUNDANT (GINTEL_ERROR_MEM | 6)
81 #define	GINTEL_ERR_MEM_BAD_ADDR	(GINTEL_ERROR_MEM | 7)
82 #define	GINTEL_ERR_MEM_BAD_ID	(GINTEL_ERROR_MEM | 8)
83 #define	GINTEL_ERR_MEM_UNKNOWN	(GINTEL_ERROR_MEM | 0xfff)
84 
85 #define	MSR_MC_MISC_MEM_CHANNEL_MASK	0x00000000000c0000ULL
86 #define	MSR_MC_MISC_MEM_CHANNEL_SHIFT	18
87 #define	MSR_MC_MISC_MEM_DIMM_MASK	0x0000000000030000ULL
88 #define	MSR_MC_MISC_MEM_DIMM_SHIFT	16
89 #define	MSR_MC_MISC_MEM_SYNDROME_MASK	0xffffffff00000000ULL
90 #define	MSR_MC_MISC_MEM_SYNDROME_SHIFT	32
91 
92 #define	CPU_GENERATION_DONT_CARE	0
93 #define	CPU_GENERATION_NEHALEM_EP	1
94 
95 #define	INTEL_NEHALEM_CPU_FAMILY_ID	0x6
96 #define	INTEL_NEHALEM_CPU_MODEL_ID	0x1A
97 
98 #define	NEHALEM_EP_MEMORY_CONTROLLER_DEV	0x3
99 #define	NEHALEM_EP_MEMORY_CONTROLLER_FUNC	0x2
100 
101 /*ARGSUSED*/
102 int
103 gintel_init(cmi_hdl_t hdl, void **datap)
104 {
105 	uint32_t nb_chipset;
106 
107 	if (gintel_ms_support_disable)
108 		return (ENOTSUP);
109 
110 	if (!(x86_feature & X86_MCA))
111 		return (ENOTSUP);
112 
113 	nb_chipset = (*pci_getl_func)(0, 0, 0, 0x0);
114 	switch (nb_chipset) {
115 	case INTEL_NB_7300:
116 	case INTEL_NB_5000P:
117 	case INTEL_NB_5000X:
118 	case INTEL_NB_5000V:
119 	case INTEL_NB_5000Z:
120 	case INTEL_NB_5400:
121 	case INTEL_NB_5400A:
122 	case INTEL_NB_5400B:
123 		if (!gintel_ms_unconstrained)
124 			gintel_error_action_return |= CMS_ERRSCOPE_POISONED;
125 		break;
126 	case INTEL_QP_IO:
127 	case INTEL_QP_WP:
128 	case INTEL_QP_36D:
129 	case INTEL_QP_24D:
130 	case INTEL_QP_U1:
131 	case INTEL_QP_U2:
132 	case INTEL_QP_U3:
133 	case INTEL_QP_U4:
134 	case INTEL_QP_JF:
135 	case INTEL_QP_JF0:
136 	case INTEL_QP_JF1:
137 	case INTEL_QP_JF2:
138 	case INTEL_QP_JF3:
139 	case INTEL_QP_JF4:
140 	case INTEL_QP_JF5:
141 	case INTEL_QP_JF6:
142 	case INTEL_QP_JF7:
143 	case INTEL_QP_JF8:
144 	case INTEL_QP_JF9:
145 	case INTEL_QP_JFa:
146 	case INTEL_QP_JFb:
147 	case INTEL_QP_JFc:
148 	case INTEL_QP_JFd:
149 	case INTEL_QP_JFe:
150 	case INTEL_QP_JFf:
151 		quickpath = 1;
152 		break;
153 	default:
154 		break;
155 	}
156 	return (0);
157 }
158 
159 /*ARGSUSED*/
160 uint32_t
161 gintel_error_action(cmi_hdl_t hdl, int ismc, int bank,
162     uint64_t status, uint64_t addr, uint64_t misc, void *mslogout)
163 {
164 	if ((status & MSR_MC_STATUS_PCC) == 0)
165 		return (gintel_error_action_return);
166 	else
167 		return (gintel_error_action_return & ~CMS_ERRSCOPE_POISONED);
168 }
169 
170 /*ARGSUSED*/
171 cms_cookie_t
172 gintel_disp_match(cmi_hdl_t hdl, int bank, uint64_t status,
173     uint64_t addr, uint64_t misc, void *mslogout)
174 {
175 	cms_cookie_t rt = (cms_cookie_t)NULL;
176 	uint16_t mcacode = MCAX86_ERRCODE(status);
177 	uint16_t mscode = MCAX86_MSERRCODE(status);
178 
179 	if (MCAX86_ERRCODE_ISMEMORY_CONTROLLER(mcacode)) {
180 		/*
181 		 * memory controller errors
182 		 */
183 		if (mscode & MSCOD_MEM_SPARE_MEM) {
184 			rt = (cms_cookie_t)GINTEL_ERR_SPARE_MEM;
185 		} else if (mscode & (MSCOD_MEM_ECC_READ |
186 		    MSCOD_MEM_ECC_SCRUB)) {
187 			if (status & MSR_MC_STATUS_UC)
188 				rt = (cms_cookie_t)GINTEL_ERR_MEM_UE;
189 			else
190 				rt = (cms_cookie_t)GINTEL_ERR_MEM_CE;
191 		} else if (mscode & (MSCOD_MEM_WR_PARITY |
192 		    MSCOD_MEM_BYTE_PARITY)) {
193 			rt = (cms_cookie_t)GINTEL_ERR_MEM_PARITY;
194 		} else if (mscode & MSCOD_MEM_ADDR_PARITY) {
195 			rt = (cms_cookie_t)GINTEL_ERR_MEM_ADDR_PARITY;
196 		} else if (mscode & MSCOD_MEM_REDUNDANT_MEM) {
197 			rt = (cms_cookie_t)GINTEL_ERR_MEM_REDUNDANT;
198 		} else if (mscode & MSCOD_MEM_ILLEGAL_ADDR) {
199 			rt = (cms_cookie_t)GINTEL_ERR_MEM_BAD_ADDR;
200 		} else if (mscode & MSCOD_MEM_BAD_ID) {
201 			rt = (cms_cookie_t)GINTEL_ERR_MEM_BAD_ID;
202 		} else {
203 			rt = (cms_cookie_t)GINTEL_ERR_MEM_UNKNOWN;
204 		}
205 	} else if (quickpath &&
206 	    MCAX86_ERRCODE_ISBUS_INTERCONNECT(MCAX86_ERRCODE(status))) {
207 		rt = (cms_cookie_t)GINTEL_ERROR_QUICKPATH;
208 	}
209 	return (rt);
210 }
211 
212 /*ARGSUSED*/
213 void
214 gintel_ereport_class(cmi_hdl_t hdl, cms_cookie_t mscookie,
215     const char **cpuclsp, const char **leafclsp)
216 {
217 	*cpuclsp = FM_EREPORT_CPU_INTEL;
218 	switch ((uintptr_t)mscookie) {
219 	case GINTEL_ERROR_QUICKPATH:
220 		*leafclsp = "quickpath.interconnect";
221 		break;
222 	case GINTEL_ERR_SPARE_MEM:
223 		*leafclsp = "quickpath.mem_spare";
224 		break;
225 	case GINTEL_ERR_MEM_UE:
226 		*leafclsp = "quickpath.mem_ue";
227 		break;
228 	case GINTEL_ERR_MEM_CE:
229 		*leafclsp = "quickpath.mem_ce";
230 		break;
231 	case GINTEL_ERR_MEM_PARITY:
232 		*leafclsp = "quickpath.mem_parity";
233 		break;
234 	case GINTEL_ERR_MEM_ADDR_PARITY:
235 		*leafclsp = "quickpath.mem_addr_parity";
236 		break;
237 	case GINTEL_ERR_MEM_REDUNDANT:
238 		*leafclsp = "quickpath.mem_redundant";
239 		break;
240 	case GINTEL_ERR_MEM_BAD_ADDR:
241 		*leafclsp = "quickpath.mem_bad_addr";
242 		break;
243 	case GINTEL_ERR_MEM_BAD_ID:
244 		*leafclsp = "quickpath.mem_bad_id";
245 		break;
246 	case GINTEL_ERR_MEM_UNKNOWN:
247 		*leafclsp = "quickpath.mem_unknown";
248 		break;
249 	}
250 }
251 
252 /*ARGSUSED*/
253 nvlist_t *
254 gintel_ereport_detector(cmi_hdl_t hdl, int bankno, cms_cookie_t mscookie,
255     nv_alloc_t *nva)
256 {
257 	nvlist_t *nvl = (nvlist_t *)NULL;
258 
259 	if (mscookie) {
260 		if ((nvl = fm_nvlist_create(nva)) == NULL)
261 			return (NULL);
262 		if ((uintptr_t)mscookie & GINTEL_ERROR_QUICKPATH) {
263 			fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, NULL, 2,
264 			    "motherboard", 0,
265 			    "chip", cmi_hdl_chipid(hdl));
266 		} else {
267 			fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, NULL, 3,
268 			    "motherboard", 0,
269 			    "chip", cmi_hdl_chipid(hdl),
270 			    "memory-controller", 0);
271 		}
272 	}
273 	return (nvl);
274 }
275 
276 static nvlist_t *
277 gintel_ereport_create_resource_elem(nv_alloc_t *nva, mc_unum_t *unump)
278 {
279 	nvlist_t *nvl, *snvl;
280 
281 	if ((nvl = fm_nvlist_create(nva)) == NULL)	/* freed by caller */
282 		return (NULL);
283 
284 	if ((snvl = fm_nvlist_create(nva)) == NULL) {
285 		fm_nvlist_destroy(nvl, nva ? FM_NVA_RETAIN : FM_NVA_FREE);
286 		return (NULL);
287 	}
288 
289 	(void) nvlist_add_uint64(snvl, FM_FMRI_HC_SPECIFIC_OFFSET,
290 	    unump->unum_offset);
291 
292 	if (unump->unum_chan == -1) {
293 		fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, snvl, 3,
294 		    "motherboard", unump->unum_board,
295 		    "chip", unump->unum_chip,
296 		    "memory-controller", unump->unum_mc);
297 	} else if (unump->unum_cs == -1) {
298 		fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, snvl, 4,
299 		    "motherboard", unump->unum_board,
300 		    "chip", unump->unum_chip,
301 		    "memory-controller", unump->unum_mc,
302 		    "dram-channel", unump->unum_chan);
303 	} else if (unump->unum_rank == -1) {
304 		fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, snvl, 5,
305 		    "motherboard", unump->unum_board,
306 		    "chip", unump->unum_chip,
307 		    "memory-controller", unump->unum_mc,
308 		    "dram-channel", unump->unum_chan,
309 		    "dimm", unump->unum_cs);
310 	} else {
311 		fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, snvl, 6,
312 		    "motherboard", unump->unum_board,
313 		    "chip", unump->unum_chip,
314 		    "memory-controller", unump->unum_mc,
315 		    "dram-channel", unump->unum_chan,
316 		    "dimm", unump->unum_cs,
317 		    "rank", unump->unum_rank);
318 	}
319 
320 	fm_nvlist_destroy(snvl, nva ? FM_NVA_RETAIN : FM_NVA_FREE);
321 
322 	return (nvl);
323 }
324 
325 static void
326 nehalem_ep_ereport_add_memory_error_counter(uint_t  chipid,
327     uint32_t *this_err_counter_array)
328 {
329 	int	index;
330 
331 	for (index = 0; index < N_MC_COR_ECC_CNT; index ++)
332 		this_err_counter_array[index] = MC_COR_ECC_CNT(chipid, index);
333 }
334 
335 static int
336 gintel_cpu_generation(cmi_hdl_t hdl)
337 {
338 	int	cpu_generation = CPU_GENERATION_DONT_CARE;
339 
340 	if ((cmi_hdl_family(hdl) == INTEL_NEHALEM_CPU_FAMILY_ID) &&
341 	    (cmi_hdl_model(hdl) == INTEL_NEHALEM_CPU_MODEL_ID))
342 		cpu_generation = CPU_GENERATION_NEHALEM_EP;
343 
344 	return (cpu_generation);
345 }
346 
347 /*ARGSUSED*/
348 void
349 gintel_ereport_add_logout(cmi_hdl_t hdl, nvlist_t *ereport,
350     nv_alloc_t *nva, int banknum, uint64_t status, uint64_t addr,
351     uint64_t misc, void *mslogout, cms_cookie_t mscookie)
352 {
353 	mc_unum_t unum;
354 	nvlist_t *resource;
355 	uint32_t synd = 0;
356 	int  chan = MCAX86_ERRCODE_CCCC(status);
357 	uint8_t last_index, this_index;
358 	int chipid;
359 
360 	if (chan == 0xf)
361 		chan = -1;
362 
363 	if ((uintptr_t)mscookie & GINTEL_ERROR_MEM) {
364 		unum.unum_board = 0;
365 		unum.unum_chip = cmi_hdl_chipid(hdl);
366 		unum.unum_mc = 0;
367 		unum.unum_chan = chan;
368 		unum.unum_cs = -1;
369 		unum.unum_rank = -1;
370 		unum.unum_offset = -1ULL;
371 		if (status & MSR_MC_STATUS_MISCV) {
372 			unum.unum_chan =
373 			    (misc & MSR_MC_MISC_MEM_CHANNEL_MASK) >>
374 			    MSR_MC_MISC_MEM_CHANNEL_SHIFT;
375 			unum.unum_cs =
376 			    (misc & MSR_MC_MISC_MEM_DIMM_MASK) >>
377 			    MSR_MC_MISC_MEM_DIMM_SHIFT;
378 			synd = (misc & MSR_MC_MISC_MEM_SYNDROME_MASK) >>
379 			    MSR_MC_MISC_MEM_SYNDROME_SHIFT;
380 			fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ECC_SYND,
381 			    DATA_TYPE_UINT32, synd, 0);
382 		}
383 		if (status & MSR_MC_STATUS_ADDRV) {
384 			fm_payload_set(ereport, FM_FMRI_MEM_PHYSADDR,
385 			    DATA_TYPE_UINT64, addr, NULL);
386 			(void) cmi_mc_patounum(addr, 0, 0, synd, 0, &unum);
387 			if (unum.unum_offset != -1ULL &&
388 			    (unum.unum_offset & OFFSET_ROW_BANK_COL) != 0) {
389 				fm_payload_set(ereport,
390 				    FM_EREPORT_PAYLOAD_NAME_BANK,
391 				    DATA_TYPE_INT32,
392 				    TCODE_OFFSET_BANK(unum.unum_offset), NULL);
393 				fm_payload_set(ereport,
394 				    FM_EREPORT_PAYLOAD_NAME_CAS,
395 				    DATA_TYPE_INT32,
396 				    TCODE_OFFSET_CAS(unum.unum_offset), NULL);
397 				fm_payload_set(ereport,
398 				    FM_EREPORT_PAYLOAD_NAME_RAS,
399 				    DATA_TYPE_INT32,
400 				    TCODE_OFFSET_RAS(unum.unum_offset), NULL);
401 			}
402 		}
403 		resource = gintel_ereport_create_resource_elem(nva, &unum);
404 		fm_payload_set(ereport, FM_EREPORT_PAYLOAD_NAME_RESOURCE,
405 		    DATA_TYPE_NVLIST_ARRAY, 1, &resource, NULL);
406 		fm_nvlist_destroy(resource, nva ? FM_NVA_RETAIN:FM_NVA_FREE);
407 
408 		if (gintel_cpu_generation(hdl) == CPU_GENERATION_NEHALEM_EP) {
409 
410 			chipid = unum.unum_chip;
411 			if (chipid < MAX_CPU_NODES) {
412 				last_index = err_counter_index[chipid];
413 				this_index =
414 				    (last_index + 1) % ERR_COUNTER_INDEX;
415 				err_counter_index[chipid] = this_index;
416 				nehalem_ep_ereport_add_memory_error_counter(
417 				    chipid,
418 				    err_counter_array[chipid][this_index]);
419 				fm_payload_set(ereport,
420 				    FM_EREPORT_PAYLOAD_MEM_ECC_COUNTER_THIS,
421 				    DATA_TYPE_UINT32_ARRAY, N_MC_COR_ECC_CNT,
422 				    err_counter_array[chipid][this_index],
423 				    NULL);
424 				fm_payload_set(ereport,
425 				    FM_EREPORT_PAYLOAD_MEM_ECC_COUNTER_LAST,
426 				    DATA_TYPE_UINT32_ARRAY, N_MC_COR_ECC_CNT,
427 				    err_counter_array[chipid][last_index],
428 				    NULL);
429 			}
430 		}
431 	}
432 }
433 
434 boolean_t
435 gintel_bankctl_skipinit(cmi_hdl_t hdl, int banknum)
436 {
437 	/*
438 	 * On Intel family 6 before QuickPath we must not enable machine check
439 	 * from bank 0 detectors. bank 0 is reserved for the platform
440 	 */
441 
442 	if (banknum == 0 &&
443 	    cmi_hdl_family(hdl) == INTEL_NEHALEM_CPU_FAMILY_ID &&
444 	    cmi_hdl_model(hdl) < INTEL_NEHALEM_CPU_MODEL_ID)
445 		return (1);
446 	else
447 		return (0);
448 }
449 
450 cms_api_ver_t _cms_api_version = CMS_API_VERSION_0;
451 
452 const cms_ops_t _cms_ops = {
453 	gintel_init,		/* cms_init */
454 	NULL,			/* cms_post_startup */
455 	NULL,			/* cms_post_mpstartup */
456 	NULL,			/* cms_logout_size */
457 	NULL,			/* cms_mcgctl_val */
458 	gintel_bankctl_skipinit, /* cms_bankctl_skipinit */
459 	NULL,			/* cms_bankctl_val */
460 	NULL,			/* cms_bankstatus_skipinit */
461 	NULL,			/* cms_bankstatus_val */
462 	NULL,			/* cms_mca_init */
463 	NULL,			/* cms_poll_ownermask */
464 	NULL,			/* cms_bank_logout */
465 	gintel_error_action,	/* cms_error_action */
466 	gintel_disp_match,	/* cms_disp_match */
467 	gintel_ereport_class,	/* cms_ereport_class */
468 	gintel_ereport_detector,	/* cms_ereport_detector */
469 	NULL,			/* cms_ereport_includestack */
470 	gintel_ereport_add_logout,	/* cms_ereport_add_logout */
471 	NULL,			/* cms_msrinject */
472 	NULL,			/* cms_fini */
473 };
474 
475 static struct modlcpu modlcpu = {
476 	&mod_cpuops,
477 	"Generic Intel model-specific MCA"
478 };
479 
480 static struct modlinkage modlinkage = {
481 	MODREV_1,
482 	(void *)&modlcpu,
483 	NULL
484 };
485 
486 int
487 _init(void)
488 {
489 	return (mod_install(&modlinkage));
490 }
491 
492 int
493 _info(struct modinfo *modinfop)
494 {
495 	return (mod_info(&modlinkage, modinfop));
496 }
497 
498 int
499 _fini(void)
500 {
501 	return (mod_remove(&modlinkage));
502 }
503