xref: /illumos-gate/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_mca.c (revision 9dd828891378a0a6a509ab601b4c5c20ca5562ec)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*
24  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
25  * Use is subject to license terms.
26  */
27 
28 #pragma ident	"%Z%%M%	%I%	%E% SMI"
29 
30 #include <sys/mca_x86.h>
31 #include <sys/cpu_module_impl.h>
32 #include <sys/cmn_err.h>
33 #include <sys/cpuvar.h>
34 #include <sys/x86_archext.h>
35 #include <sys/controlregs.h>
36 #include <sys/sysmacros.h>
37 #include <sys/regset.h>
38 #include <sys/privregs.h>
39 #include <sys/systm.h>
40 #include <sys/types.h>
41 #include <sys/log.h>
42 #include <sys/psw.h>
43 
44 #include "gcpu.h"
45 
46 /*
47  * x86 architecture standard banks for IA32 and compatible processors.  These
48  * are effectively the lowest common denominators for the MCA architecture.
49  */
50 static const gcpu_mca_bank_t gcpu_mca_banks_ia32[] = {
51 { IA32_MSR_MC0_CTL, IA32_MSR_MC0_STATUS, IA32_MSR_MC0_ADDR, IA32_MSR_MC0_MISC },
52 { IA32_MSR_MC1_CTL, IA32_MSR_MC1_STATUS, IA32_MSR_MC1_ADDR, IA32_MSR_MC1_MISC },
53 { IA32_MSR_MC2_CTL, IA32_MSR_MC2_STATUS, IA32_MSR_MC2_ADDR, IA32_MSR_MC2_MISC },
54 { IA32_MSR_MC3_CTL, IA32_MSR_MC3_STATUS, IA32_MSR_MC3_ADDR, IA32_MSR_MC3_MISC },
55 };
56 
57 /*
58  * The P6-family processors have a different layout for their banks.  Note that
59  * MC4 comes *before* MC3 by design here (Intel's design that is, not ours).
60  */
61 static const gcpu_mca_bank_t gcpu_mca_banks_p6[] = {
62 { P6_MSR_MC0_CTL, P6_MSR_MC0_STATUS, P6_MSR_MC0_ADDR, P6_MSR_MC0_MISC },
63 { P6_MSR_MC1_CTL, P6_MSR_MC1_STATUS, P6_MSR_MC1_ADDR, P6_MSR_MC1_MISC },
64 { P6_MSR_MC2_CTL, P6_MSR_MC2_STATUS, P6_MSR_MC2_ADDR, P6_MSR_MC2_MISC },
65 { P6_MSR_MC4_CTL, P6_MSR_MC4_STATUS, P6_MSR_MC4_ADDR, P6_MSR_MC4_MISC },
66 { P6_MSR_MC3_CTL, P6_MSR_MC3_STATUS, P6_MSR_MC3_ADDR, P6_MSR_MC3_MISC },
67 };
68 
69 /*
70  * Initialize the Machine Check Architecture (MCA) for a generic x86 CPU.
71  * Refer to the IA-32 Intel Architecture Software Developer's Manual,
72  * Volume 3: System Programming Guide, Section 14.5 for more information.
73  */
74 void
75 gcpu_mca_init(void *data)
76 {
77 	gcpu_data_t *gcpu = data;
78 	gcpu_mca_t *mca = &gcpu->gcpu_mca;
79 	cpu_t *cp = CPU;
80 
81 	uint64_t cap;
82 	uint_t nbanks;
83 	int i;
84 
85 	/*
86 	 * We're only prepared to handle processors that have an MCG_CAP
87 	 * register.  P5, K6, and earlier processors, which have their own
88 	 * more primitive way of doing machine checks, are not supported.
89 	 */
90 	ASSERT(x86_feature & X86_MCA);
91 	cap = rdmsr(IA32_MSR_MCG_CAP);
92 
93 	if (!(cap & MCG_CAP_CTL_P))
94 		return; /* do nothing if IA32_MCG_CTL register is missing */
95 
96 	if (strcmp(cpuid_getvendorstr(cp), "GenuineIntel") == 0 &&
97 	    cpuid_getfamily(cp) == 6) {
98 		mca->gcpu_mca_banks = gcpu_mca_banks_p6;
99 		mca->gcpu_mca_nbanks = sizeof (gcpu_mca_banks_p6) /
100 		    sizeof (gcpu_mca_bank_t);
101 	} else {
102 		mca->gcpu_mca_banks = gcpu_mca_banks_ia32;
103 		mca->gcpu_mca_nbanks = sizeof (gcpu_mca_banks_ia32) /
104 		    sizeof (gcpu_mca_bank_t);
105 	}
106 
107 	mca->gcpu_mca_data = kmem_alloc(
108 	    mca->gcpu_mca_nbanks * sizeof (gcpu_mca_data_t), KM_SLEEP);
109 
110 	/*
111 	 * Unlike AMD's approach of assigning one MCG_CTL bit to each machine
112 	 * check register bank, Intel doesn't describe the layout of MCG_CTL or
113 	 * promise that each bit corresponds to a bank.  The generic guidance
114 	 * is simply to write all ones to MCG_CTL, enabling everything that is
115 	 * present (h/w ignores writes to the undefined bit positions).  The
116 	 * code right now only handles the original four banks or the P6 banks,
117 	 * so we may enable more than we know how to read on a future CPU.
118 	 * This code can be enhanced to dynamically allocate bank state based
119 	 * upon MCG_CAP.Count if RAS ever becomes important on non-AMD CPUs.
120 	 */
121 	nbanks = cap & MCG_CAP_COUNT_MASK;
122 	mca->gcpu_mca_nbanks = MIN(nbanks, mca->gcpu_mca_nbanks);
123 	wrmsr(IA32_MSR_MCG_CTL, 0ULL); /* disable features while we configure */
124 
125 	for (i = 0; i < mca->gcpu_mca_nbanks; i++) {
126 		const gcpu_mca_bank_t *bank = &mca->gcpu_mca_banks[i];
127 		wrmsr(bank->bank_ctl, -1ULL);
128 		wrmsr(bank->bank_status, 0ULL);
129 	}
130 
131 	wrmsr(IA32_MSR_MCG_CTL, -1ULL); /* enable all machine-check features */
132 	setcr4(getcr4() | CR4_MCE);	/* enable machine-check exceptions */
133 }
134 
135 /*
136  * Initialize the Machine Check Architecture (MCA) for a generic x86 CPU.
137  * Refer to the IA-32 Intel Architecture Software Developer's Manual,
138  * Volume 3: System Programming Guide, Section 14.7 for more information.
139  */
140 int
141 gcpu_mca_trap(void *data, struct regs *rp)
142 {
143 	gcpu_data_t *gcpu = data;
144 	gcpu_mca_t *mca = &gcpu->gcpu_mca;
145 	uint64_t gstatus = rdmsr(IA32_MSR_MCG_STATUS);
146 	int i, fatal = !(gstatus & MCG_STATUS_RIPV);
147 
148 	if (!(gstatus & MCG_STATUS_MCIP))
149 		return (0); /* spurious machine check trap */
150 
151 	/*
152 	 * Read out the bank status values, and the address and misc registers
153 	 * if they are valid.  Update our fatal status based on each bank.
154 	 * Clear the MCG_STATUS register when we're done reading the h/w state.
155 	 */
156 	for (i = 0; i < mca->gcpu_mca_nbanks; i++) {
157 		const gcpu_mca_bank_t *bank = &mca->gcpu_mca_banks[i];
158 		gcpu_mca_data_t *data = &mca->gcpu_mca_data[i];
159 		uint64_t bstatus = rdmsr(bank->bank_status);
160 
161 		data->bank_status_data = bstatus;
162 		data->bank_addr_data = 0;
163 		data->bank_misc_data = 0;
164 
165 		if (!(bstatus & MSR_MC_STATUS_VAL))
166 			continue;
167 
168 		if (bstatus & MSR_MC_STATUS_ADDRV)
169 			data->bank_addr_data = rdmsr(bank->bank_addr);
170 		if (bstatus & MSR_MC_STATUS_MISCV)
171 			data->bank_misc_data = rdmsr(bank->bank_misc);
172 
173 		if (bstatus & (MSR_MC_STATUS_PCC | MSR_MC_STATUS_O))
174 			fatal = 1; /* context corrupt or overflow */
175 
176 		wrmsr(bank->bank_status, 0ULL);
177 	}
178 
179 	wrmsr(IA32_MSR_MCG_STATUS, 0);
180 
181 	log_enter();
182 
183 	if (gstatus & MCG_STATUS_EIPV) {
184 		cmn_err(CE_WARN, "Machine-Check Exception at 0x%lx in %s mode",
185 		    (ulong_t)rp->r_pc, USERMODE(rp->r_cs) ? "user" : "kernel");
186 	} else {
187 		cmn_err(CE_WARN, "Machine-Check Exception in %s mode",
188 		    USERMODE(rp->r_cs) ? "user" : "kernel");
189 	}
190 
191 	/*
192 	 * Now go back through our saved state and report it using cmn_err().
193 	 * We don't bother attempting any kind of decoding here as the actual
194 	 * values are entirely specific to the actual processor in use.  We
195 	 * could break out the generic bit-fields, but you're only here if
196 	 * we didn't care enough to implement FMA support for this processor.
197 	 */
198 	for (i = 0; i < mca->gcpu_mca_nbanks; i++) {
199 		gcpu_mca_data_t *bank = &mca->gcpu_mca_data[i];
200 		uint64_t bstatus = bank->bank_status_data;
201 
202 		if (!(bstatus & MSR_MC_STATUS_VAL))
203 			continue;
204 
205 		switch (bstatus & (MSR_MC_STATUS_ADDRV | MSR_MC_STATUS_MISCV)) {
206 		case MSR_MC_STATUS_ADDRV | MSR_MC_STATUS_MISCV:
207 			cmn_err(CE_WARN, "%d STAT 0x%016llx ADDR 0x%016llx "
208 			    "MISC 0x%016llx", i, (u_longlong_t)bstatus,
209 			    (u_longlong_t)bank->bank_addr_data,
210 			    (u_longlong_t)bank->bank_misc_data);
211 			break;
212 		case MSR_MC_STATUS_ADDRV:
213 			cmn_err(CE_WARN, "%d STAT 0x%016llx ADDR 0x%016llx",
214 			    i, (u_longlong_t)bstatus,
215 			    (u_longlong_t)bank->bank_addr_data);
216 			break;
217 		case MSR_MC_STATUS_MISCV:
218 			cmn_err(CE_WARN, "%d STAT 0x%016llx MISC 0x%016llx",
219 			    i, (u_longlong_t)bstatus,
220 			    (u_longlong_t)bank->bank_misc_data);
221 			break;
222 		default:
223 			cmn_err(CE_WARN, "%d STAT 0x%016llx",
224 			    i, (u_longlong_t)bstatus);
225 		}
226 	}
227 
228 	log_exit();
229 	return (fatal);
230 }
231