xref: /illumos-gate/usr/src/uts/i86pc/cpu/genuineintel/gintel_main.c (revision a6d4d7d5d0e34964282f736f7bade0574645f1fd)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Intel model-specific support.  Right now all this conists of is
29  * to modify the ereport subclass to produce different ereport classes
30  * so that we can have different diagnosis rules and corresponding faults.
31  */
32 
33 #include <sys/types.h>
34 #include <sys/cmn_err.h>
35 #include <sys/modctl.h>
36 #include <sys/mca_x86.h>
37 #include <sys/cpu_module_ms_impl.h>
38 #include <sys/mc_intel.h>
39 #include <sys/pci_cfgspace.h>
40 #include <sys/fm/protocol.h>
41 
42 int gintel_ms_support_disable = 0;
43 int gintel_error_action_return = 0;
44 int gintel_ms_unconstrained = 0;
45 
46 int quickpath;
47 int max_bus_number = 0xff;
48 
49 #define	ERR_COUNTER_INDEX	2
50 #define	MAX_CPU_NODES		2
51 #define	N_MC_COR_ECC_CNT	6
52 uint32_t err_counter_array[MAX_CPU_NODES][ERR_COUNTER_INDEX][N_MC_COR_ECC_CNT];
53 uint8_t	err_counter_index[MAX_CPU_NODES];
54 
55 #define	MAX_BUS_NUMBER  max_bus_number
56 #define	SOCKET_BUS(cpu) (MAX_BUS_NUMBER - (cpu))
57 
58 #define	MC_COR_ECC_CNT(chipid, reg)	(*pci_getl_func)(SOCKET_BUS(chipid), \
59     NEHALEM_EP_MEMORY_CONTROLLER_DEV, NEHALEM_EP_MEMORY_CONTROLLER_FUNC, \
60     0x80 + (reg) * 4)
61 
62 #define	MSCOD_MEM_ECC_READ	0x1
63 #define	MSCOD_MEM_ECC_SCRUB	0x2
64 #define	MSCOD_MEM_WR_PARITY	0x4
65 #define	MSCOD_MEM_REDUNDANT_MEM	0x8
66 #define	MSCOD_MEM_SPARE_MEM	0x10
67 #define	MSCOD_MEM_ILLEGAL_ADDR	0x20
68 #define	MSCOD_MEM_BAD_ID	0x40
69 #define	MSCOD_MEM_ADDR_PARITY	0x80
70 #define	MSCOD_MEM_BYTE_PARITY	0x100
71 
72 #define	GINTEL_ERROR_MEM	0x1000
73 #define	GINTEL_ERROR_QUICKPATH	0x2000
74 
75 #define	GINTEL_ERR_SPARE_MEM	(GINTEL_ERROR_MEM | 1)
76 #define	GINTEL_ERR_MEM_UE	(GINTEL_ERROR_MEM | 2)
77 #define	GINTEL_ERR_MEM_CE	(GINTEL_ERROR_MEM | 3)
78 #define	GINTEL_ERR_MEM_PARITY	(GINTEL_ERROR_MEM | 4)
79 #define	GINTEL_ERR_MEM_ADDR_PARITY	(GINTEL_ERROR_MEM | 5)
80 #define	GINTEL_ERR_MEM_REDUNDANT (GINTEL_ERROR_MEM | 6)
81 #define	GINTEL_ERR_MEM_BAD_ADDR	(GINTEL_ERROR_MEM | 7)
82 #define	GINTEL_ERR_MEM_BAD_ID	(GINTEL_ERROR_MEM | 8)
83 #define	GINTEL_ERR_MEM_UNKNOWN	(GINTEL_ERROR_MEM | 0xfff)
84 
85 #define	MSR_MC_MISC_MEM_CHANNEL_MASK	0x00000000000c0000ULL
86 #define	MSR_MC_MISC_MEM_CHANNEL_SHIFT	18
87 #define	MSR_MC_MISC_MEM_DIMM_MASK	0x0000000000030000ULL
88 #define	MSR_MC_MISC_MEM_DIMM_SHIFT	16
89 #define	MSR_MC_MISC_MEM_SYNDROME_MASK	0xffffffff00000000ULL
90 #define	MSR_MC_MISC_MEM_SYNDROME_SHIFT	32
91 
92 #define	CPU_GENERATION_DONT_CARE	0
93 #define	CPU_GENERATION_NEHALEM_EP	1
94 
95 #define	INTEL_NEHALEM_CPU_FAMILY_ID	0x6
96 #define	INTEL_NEHALEM_CPU_MODEL_ID	0x1A
97 
98 #define	NEHALEM_EP_MEMORY_CONTROLLER_DEV	0x3
99 #define	NEHALEM_EP_MEMORY_CONTROLLER_FUNC	0x2
100 
101 /*ARGSUSED*/
102 int
103 gintel_init(cmi_hdl_t hdl, void **datap)
104 {
105 	uint32_t nb_chipset;
106 
107 	if (gintel_ms_support_disable)
108 		return (ENOTSUP);
109 
110 	if (!(x86_feature & X86_MCA))
111 		return (ENOTSUP);
112 
113 	nb_chipset = (*pci_getl_func)(0, 0, 0, 0x0);
114 	switch (nb_chipset) {
115 	case INTEL_NB_7300:
116 	case INTEL_NB_5000P:
117 	case INTEL_NB_5000X:
118 	case INTEL_NB_5000V:
119 	case INTEL_NB_5000Z:
120 	case INTEL_NB_5400:
121 	case INTEL_NB_5400A:
122 	case INTEL_NB_5400B:
123 		if (!gintel_ms_unconstrained)
124 			gintel_error_action_return |= CMS_ERRSCOPE_POISONED;
125 		break;
126 	case INTEL_QP_IO:
127 	case INTEL_QP_36D:
128 	case INTEL_QP_24D:
129 		quickpath = 1;
130 		break;
131 	default:
132 		break;
133 	}
134 	return (0);
135 }
136 
137 /*ARGSUSED*/
138 uint32_t
139 gintel_error_action(cmi_hdl_t hdl, int ismc, int bank,
140     uint64_t status, uint64_t addr, uint64_t misc, void *mslogout)
141 {
142 	if ((status & MSR_MC_STATUS_PCC) == 0)
143 		return (gintel_error_action_return);
144 	else
145 		return (gintel_error_action_return & ~CMS_ERRSCOPE_POISONED);
146 }
147 
148 /*ARGSUSED*/
149 cms_cookie_t
150 gintel_disp_match(cmi_hdl_t hdl, int bank, uint64_t status,
151     uint64_t addr, uint64_t misc, void *mslogout)
152 {
153 	cms_cookie_t rt = (cms_cookie_t)NULL;
154 	uint16_t mcacode = MCAX86_ERRCODE(status);
155 	uint16_t mscode = MCAX86_MSERRCODE(status);
156 
157 	if (MCAX86_ERRCODE_ISMEMORY_CONTROLLER(mcacode)) {
158 		/*
159 		 * memory controller errors
160 		 */
161 		if (mscode & MSCOD_MEM_SPARE_MEM) {
162 			rt = (cms_cookie_t)GINTEL_ERR_SPARE_MEM;
163 		} else if (mscode & (MSCOD_MEM_ECC_READ |
164 		    MSCOD_MEM_ECC_SCRUB)) {
165 			if (status & MSR_MC_STATUS_UC)
166 				rt = (cms_cookie_t)GINTEL_ERR_MEM_UE;
167 			else
168 				rt = (cms_cookie_t)GINTEL_ERR_MEM_CE;
169 		} else if (mscode & (MSCOD_MEM_WR_PARITY |
170 		    MSCOD_MEM_BYTE_PARITY)) {
171 			rt = (cms_cookie_t)GINTEL_ERR_MEM_PARITY;
172 		} else if (mscode & MSCOD_MEM_ADDR_PARITY) {
173 			rt = (cms_cookie_t)GINTEL_ERR_MEM_ADDR_PARITY;
174 		} else if (mscode & MSCOD_MEM_REDUNDANT_MEM) {
175 			rt = (cms_cookie_t)GINTEL_ERR_MEM_REDUNDANT;
176 		} else if (mscode & MSCOD_MEM_ILLEGAL_ADDR) {
177 			rt = (cms_cookie_t)GINTEL_ERR_MEM_BAD_ADDR;
178 		} else if (mscode & MSCOD_MEM_BAD_ID) {
179 			rt = (cms_cookie_t)GINTEL_ERR_MEM_BAD_ID;
180 		} else {
181 			rt = (cms_cookie_t)GINTEL_ERR_MEM_UNKNOWN;
182 		}
183 	} else if (quickpath &&
184 	    MCAX86_ERRCODE_ISBUS_INTERCONNECT(MCAX86_ERRCODE(status))) {
185 		rt = (cms_cookie_t)GINTEL_ERROR_QUICKPATH;
186 	}
187 	return (rt);
188 }
189 
190 /*ARGSUSED*/
191 void
192 gintel_ereport_class(cmi_hdl_t hdl, cms_cookie_t mscookie,
193     const char **cpuclsp, const char **leafclsp)
194 {
195 	*cpuclsp = FM_EREPORT_CPU_INTEL;
196 	switch ((uintptr_t)mscookie) {
197 	case GINTEL_ERROR_QUICKPATH:
198 		*leafclsp = "quickpath.interconnect";
199 		break;
200 	case GINTEL_ERR_SPARE_MEM:
201 		*leafclsp = "quickpath.mem_spare";
202 		break;
203 	case GINTEL_ERR_MEM_UE:
204 		*leafclsp = "quickpath.mem_ue";
205 		break;
206 	case GINTEL_ERR_MEM_CE:
207 		*leafclsp = "quickpath.mem_ce";
208 		break;
209 	case GINTEL_ERR_MEM_PARITY:
210 		*leafclsp = "quickpath.mem_parity";
211 		break;
212 	case GINTEL_ERR_MEM_ADDR_PARITY:
213 		*leafclsp = "quickpath.mem_addr_parity";
214 		break;
215 	case GINTEL_ERR_MEM_REDUNDANT:
216 		*leafclsp = "quickpath.mem_redundant";
217 		break;
218 	case GINTEL_ERR_MEM_BAD_ADDR:
219 		*leafclsp = "quickpath.mem_bad_addr";
220 		break;
221 	case GINTEL_ERR_MEM_BAD_ID:
222 		*leafclsp = "quickpath.mem_bad_id";
223 		break;
224 	case GINTEL_ERR_MEM_UNKNOWN:
225 		*leafclsp = "quickpath.mem_unknown";
226 		break;
227 	}
228 }
229 
230 nvlist_t *
231 gintel_ereport_detector(cmi_hdl_t hdl, cms_cookie_t mscookie, nv_alloc_t *nva)
232 {
233 	nvlist_t *nvl = (nvlist_t *)NULL;
234 
235 	if (mscookie) {
236 		if ((nvl = fm_nvlist_create(nva)) == NULL)
237 			return (NULL);
238 		if ((uintptr_t)mscookie & GINTEL_ERROR_QUICKPATH) {
239 			fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, NULL, 2,
240 			    "motherboard", 0,
241 			    "chip", cmi_hdl_chipid(hdl));
242 		} else {
243 			fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, NULL, 3,
244 			    "motherboard", 0,
245 			    "chip", cmi_hdl_chipid(hdl),
246 			    "memory-controller", 0);
247 		}
248 	}
249 	return (nvl);
250 }
251 
252 static nvlist_t *
253 gintel_ereport_create_resource_elem(nv_alloc_t *nva, mc_unum_t *unump)
254 {
255 	nvlist_t *nvl, *snvl;
256 
257 	if ((nvl = fm_nvlist_create(nva)) == NULL)	/* freed by caller */
258 		return (NULL);
259 
260 	if ((snvl = fm_nvlist_create(nva)) == NULL) {
261 		fm_nvlist_destroy(nvl, nva ? FM_NVA_RETAIN : FM_NVA_FREE);
262 		return (NULL);
263 	}
264 
265 	(void) nvlist_add_uint64(snvl, FM_FMRI_HC_SPECIFIC_OFFSET,
266 	    unump->unum_offset);
267 
268 	if (unump->unum_chan == -1) {
269 		fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, snvl, 3,
270 		    "motherboard", unump->unum_board,
271 		    "chip", unump->unum_chip,
272 		    "memory-controller", unump->unum_mc);
273 	} else if (unump->unum_cs == -1) {
274 		fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, snvl, 4,
275 		    "motherboard", unump->unum_board,
276 		    "chip", unump->unum_chip,
277 		    "memory-controller", unump->unum_mc,
278 		    "dram-channel", unump->unum_chan);
279 	} else if (unump->unum_rank == -1) {
280 		fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, snvl, 5,
281 		    "motherboard", unump->unum_board,
282 		    "chip", unump->unum_chip,
283 		    "memory-controller", unump->unum_mc,
284 		    "dram-channel", unump->unum_chan,
285 		    "dimm", unump->unum_cs);
286 	} else {
287 		fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, snvl, 6,
288 		    "motherboard", unump->unum_board,
289 		    "chip", unump->unum_chip,
290 		    "memory-controller", unump->unum_mc,
291 		    "dram-channel", unump->unum_chan,
292 		    "dimm", unump->unum_cs,
293 		    "rank", unump->unum_rank);
294 	}
295 
296 	fm_nvlist_destroy(snvl, nva ? FM_NVA_RETAIN : FM_NVA_FREE);
297 
298 	return (nvl);
299 }
300 
301 static void
302 nehalem_ep_ereport_add_memory_error_counter(uint_t  chipid,
303     uint32_t *this_err_counter_array)
304 {
305 	int	index;
306 
307 	for (index = 0; index < N_MC_COR_ECC_CNT; index ++)
308 		this_err_counter_array[index] = MC_COR_ECC_CNT(chipid, index);
309 }
310 
311 static int
312 gintel_cpu_generation()
313 {
314 	int	cpu_generation = CPU_GENERATION_DONT_CARE;
315 
316 	if ((cpuid_getfamily(CPU) == INTEL_NEHALEM_CPU_FAMILY_ID) &&
317 	    (cpuid_getmodel(CPU) == INTEL_NEHALEM_CPU_MODEL_ID))
318 		cpu_generation = CPU_GENERATION_NEHALEM_EP;
319 
320 	return (cpu_generation);
321 }
322 
323 /*ARGSUSED*/
324 void
325 gintel_ereport_add_logout(cmi_hdl_t hdl, nvlist_t *ereport,
326     nv_alloc_t *nva, int banknum, uint64_t status, uint64_t addr,
327     uint64_t misc, void *mslogout, cms_cookie_t mscookie)
328 {
329 	mc_unum_t unum;
330 	nvlist_t *resource;
331 	uint32_t synd = 0;
332 	int  chan = MCAX86_ERRCODE_CCCC(status);
333 	uint8_t last_index, this_index;
334 	int chipid;
335 
336 	if (chan == 0xf)
337 		chan = -1;
338 
339 	if ((uintptr_t)mscookie & GINTEL_ERROR_MEM) {
340 		unum.unum_board = 0;
341 		unum.unum_chip = cmi_hdl_chipid(hdl);
342 		unum.unum_mc = 0;
343 		unum.unum_chan = chan;
344 		unum.unum_cs = -1;
345 		unum.unum_rank = -1;
346 		unum.unum_offset = -1ULL;
347 		if (status & MSR_MC_STATUS_MISCV) {
348 			unum.unum_chan =
349 			    (misc & MSR_MC_MISC_MEM_CHANNEL_MASK) >>
350 			    MSR_MC_MISC_MEM_CHANNEL_SHIFT;
351 			unum.unum_cs =
352 			    (misc & MSR_MC_MISC_MEM_DIMM_MASK) >>
353 			    MSR_MC_MISC_MEM_DIMM_SHIFT;
354 			synd = (misc & MSR_MC_MISC_MEM_SYNDROME_MASK) >>
355 			    MSR_MC_MISC_MEM_SYNDROME_SHIFT;
356 			fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ECC_SYND,
357 			    DATA_TYPE_UINT32, synd, 0);
358 		}
359 		if (status & MSR_MC_STATUS_ADDRV) {
360 			fm_payload_set(ereport, FM_FMRI_MEM_PHYSADDR,
361 			    DATA_TYPE_UINT64, addr, NULL);
362 			(void) cmi_mc_patounum(addr, 0, 0, synd, 0, &unum);
363 		}
364 		resource = gintel_ereport_create_resource_elem(nva, &unum);
365 		fm_payload_set(ereport, FM_EREPORT_PAYLOAD_NAME_RESOURCE,
366 		    DATA_TYPE_NVLIST_ARRAY, 1, &resource, NULL);
367 		fm_nvlist_destroy(resource, nva ? FM_NVA_RETAIN:FM_NVA_FREE);
368 
369 		if (gintel_cpu_generation() == CPU_GENERATION_NEHALEM_EP) {
370 
371 			chipid = cmi_ntv_hwchipid(CPU);
372 			if (chipid < MAX_CPU_NODES) {
373 				last_index = err_counter_index[chipid];
374 				this_index =
375 				    (last_index + 1) % ERR_COUNTER_INDEX;
376 				err_counter_index[chipid] = this_index;
377 				nehalem_ep_ereport_add_memory_error_counter(
378 				    chipid,
379 				    err_counter_array[chipid][this_index]);
380 				fm_payload_set(ereport,
381 				    FM_EREPORT_PAYLOAD_MEM_ECC_COUNTER_THIS,
382 				    DATA_TYPE_UINT32_ARRAY, N_MC_COR_ECC_CNT,
383 				    err_counter_array[chipid][this_index],
384 				    NULL);
385 				fm_payload_set(ereport,
386 				    FM_EREPORT_PAYLOAD_MEM_ECC_COUNTER_LAST,
387 				    DATA_TYPE_UINT32_ARRAY, N_MC_COR_ECC_CNT,
388 				    err_counter_array[chipid][last_index],
389 				    NULL);
390 			}
391 		}
392 	}
393 }
394 
395 boolean_t
396 gintel_bankctl_skipinit(cmi_hdl_t hdl, int banknum)
397 {
398 	/*
399 	 * On Intel family 6 before QuickPath we must not enable machine check
400 	 * from bank 0 detectors. bank 0 is reserved for the platform
401 	 */
402 
403 	if (banknum == 0 &&
404 	    cmi_hdl_family(hdl) == INTEL_NEHALEM_CPU_FAMILY_ID &&
405 	    cmi_hdl_model(hdl) < INTEL_NEHALEM_CPU_MODEL_ID)
406 		return (1);
407 	else
408 		return (0);
409 }
410 
411 cms_api_ver_t _cms_api_version = CMS_API_VERSION_0;
412 
413 const cms_ops_t _cms_ops = {
414 	gintel_init,		/* cms_init */
415 	NULL,			/* cms_post_startup */
416 	NULL,			/* cms_post_mpstartup */
417 	NULL,			/* cms_logout_size */
418 	NULL,			/* cms_mcgctl_val */
419 	gintel_bankctl_skipinit, /* cms_bankctl_skipinit */
420 	NULL,			/* cms_bankctl_val */
421 	NULL,			/* cms_bankstatus_skipinit */
422 	NULL,			/* cms_bankstatus_val */
423 	NULL,			/* cms_mca_init */
424 	NULL,			/* cms_poll_ownermask */
425 	NULL,			/* cms_bank_logout */
426 	gintel_error_action,	/* cms_error_action */
427 	gintel_disp_match,	/* cms_disp_match */
428 	gintel_ereport_class,	/* cms_ereport_class */
429 	gintel_ereport_detector,	/* cms_ereport_detector */
430 	NULL,			/* cms_ereport_includestack */
431 	gintel_ereport_add_logout,	/* cms_ereport_add_logout */
432 	NULL,			/* cms_msrinject */
433 	NULL,			/* cms_fini */
434 };
435 
436 static struct modlcpu modlcpu = {
437 	&mod_cpuops,
438 	"Generic Intel model-specific MCA"
439 };
440 
441 static struct modlinkage modlinkage = {
442 	MODREV_1,
443 	(void *)&modlcpu,
444 	NULL
445 };
446 
447 int
448 _init(void)
449 {
450 	return (mod_install(&modlinkage));
451 }
452 
453 int
454 _info(struct modinfo *modinfop)
455 {
456 	return (mod_info(&modlinkage, modinfop));
457 }
458 
459 int
460 _fini(void)
461 {
462 	return (mod_remove(&modlinkage));
463 }
464