xref: /titanic_50/usr/src/uts/i86pc/cpu/authenticamd/authamd_main.c (revision a93a1f58a8763fa69172980b98e3d24720c1136e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * "Generic AMD" model-specific support.  If no more-specific support can
31  * be found, or such modules declines to initialize, then for AuthenticAMD
32  * cpus this module can have a crack at providing some AMD model-specific
33  * support that at least goes beyond common MCA architectural features
34  * if not down to the nitty-gritty level for a particular model.  We
35  * are layered on top of a cpu module, likely cpu.generic, so there is no
36  * need for us to perform common architecturally-accessible functions.
37  */
38 
39 #include <sys/types.h>
40 #include <sys/cmn_err.h>
41 #include <sys/modctl.h>
42 #include <sys/cpu_module.h>
43 #include <sys/mca_x86.h>
44 #include <sys/pci_cfgspace.h>
45 #include <sys/x86_archext.h>
46 #include <sys/mc_amd.h>
47 #include <sys/fm/protocol.h>
48 #include <sys/fm/cpu/GENAMD.h>
49 #include <sys/nvpair.h>
50 #include <sys/controlregs.h>
51 #include <sys/pghw.h>
52 #include <sys/sunddi.h>
53 #include <sys/cpu_module_ms_impl.h>
54 
55 #include "authamd.h"
56 
57 int authamd_ms_support_disable = 0;
58 
59 #define	AUTHAMD_F_REVS_BCDE \
60 	(X86_CHIPREV_AMD_F_REV_B | X86_CHIPREV_AMD_F_REV_C0 | \
61 	X86_CHIPREV_AMD_F_REV_CG | X86_CHIPREV_AMD_F_REV_D | \
62 	X86_CHIPREV_AMD_F_REV_E)
63 
64 #define	AUTHAMD_F_REVS_FG \
65 	(X86_CHIPREV_AMD_F_REV_F | X86_CHIPREV_AMD_F_REV_G)
66 
67 #define	AUTHAMD_10_REVS_AB \
68 	(X86_CHIPREV_AMD_10_REV_A | X86_CHIPREV_AMD_10_REV_B)
69 
70 /*
71  * Bitmasks of support for various features.  Try to enable features
72  * via inclusion in one of these bitmasks and check that at the
73  * feature imlementation - that way new family support may often simply
74  * simply need to update these bitmasks.
75  */
76 
77 /*
78  * Families that this module will provide some model-specific
79  * support for (if no more-specific module claims it first).
80  * We try to support whole families rather than differentiate down
81  * to revision.
82  */
83 #define	AUTHAMD_SUPPORTED(fam) \
84 	((fam) == AUTHAMD_FAMILY_6 || (fam) == AUTHAMD_FAMILY_F || \
85 	(fam) == AUTHAMD_FAMILY_10)
86 
87 /*
88  * Families/revisions for which we can recognise main memory ECC errors.
89  */
90 #define	AUTHAMD_MEMECC_RECOGNISED(rev) \
91 	(X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_F_REV_B) || \
92 	X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_10_REV_A))
93 
94 /*
95  * Families/revisions that have an Online Spare Control Register
96  */
97 #define	AUTHAMD_HAS_ONLINESPARECTL(rev) \
98 	(X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_F_REV_F) || \
99 	X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_10_REV_A))
100 
101 /*
102  * Families/revisions that have a NB misc register or registers -
103  * evaluates to 0 if no support, otherwise the number of MC4_MISCj.
104  */
105 #define	AUTHAMD_NBMISC_NUM(rev) \
106 	(X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_F_REV_F)? 1 : \
107 	(X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_10_REV_A) ? 3 : 0))
108 
109 /*
110  * Families/revision for which we wish not to machine check for GART
111  * table walk errors - bit 10 of NB CTL.
112  */
113 #define	AUTHAMD_NOGARTTBLWLK_MC(rev) \
114 	(X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_F_REV_B) || \
115 	X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_10_REV_A))
116 
117 /*
118  * We recognise main memory ECC errors for AUTHAMD_MEMECC_RECOGNISED
119  * revisions as:
120  *
121  *	- being reported by the NB
122  *	- being a compound bus/interconnect error (external to chip)
123  *	- having LL of LG
124  *	- having II of MEM (but could still be a master/target abort)
125  *	- having CECC or UECC set
126  *
127  * We do not check the extended error code (first nibble of the
128  * model-specific error code on AMD) since this has changed from
129  * family 0xf to family 0x10 (ext code 0 now reserved on family 0x10).
130  * Instead we use CECC/UECC to separate off the master/target
131  * abort cases.
132  *
133  * We insist that the detector be the NorthBridge bank;  although
134  * IC/DC can report some main memory errors, they do not capture
135  * an address at sufficient resolution to be useful and the NB will
136  * report most errors.
137  */
138 #define	AUTHAMD_IS_MEMECCERR(bank, status) \
139 	((bank) == AMD_MCA_BANK_NB && \
140 	MCAX86_ERRCODE_ISBUS_INTERCONNECT(MCAX86_ERRCODE(status)) && \
141 	MCAX86_ERRCODE_LL(MCAX86_ERRCODE(status)) == MCAX86_ERRCODE_LL_LG && \
142 	MCAX86_ERRCODE_II(MCAX86_ERRCODE(status)) == MCAX86_ERRCODE_II_MEM && \
143 	((status) & (AMD_BANK_STAT_CECC | AMD_BANK_STAT_UECC)))
144 
145 static authamd_error_disp_t authamd_memce_disp = {
146 	FM_EREPORT_CPU_GENAMD,
147 	FM_EREPORT_CPU_GENAMD_MEM_CE,
148 	FM_EREPORT_GENAMD_PAYLOAD_FLAGS_MEM_CE
149 };
150 
151 static authamd_error_disp_t authamd_memue_disp = {
152 	FM_EREPORT_CPU_GENAMD,
153 	FM_EREPORT_CPU_GENAMD_MEM_UE,
154 	FM_EREPORT_GENAMD_PAYLOAD_FLAGS_MEM_UE
155 };
156 
157 static authamd_error_disp_t authamd_ckmemce_disp = {
158 	FM_EREPORT_CPU_GENAMD,
159 	FM_EREPORT_CPU_GENAMD_CKMEM_CE,
160 	FM_EREPORT_GENAMD_PAYLOAD_FLAGS_CKMEM_CE
161 };
162 
163 static authamd_error_disp_t authamd_ckmemue_disp = {
164 	FM_EREPORT_CPU_GENAMD,
165 	FM_EREPORT_CPU_GENAMD_CKMEM_UE,
166 	FM_EREPORT_GENAMD_PAYLOAD_FLAGS_CKMEM_UE
167 };
168 
169 /*
170  * We recognise GART walk errors as:
171  *
172  *	- being reported by the NB
173  *	- being a compound TLB error
174  *	- having LL of LG and TT of GEN
175  *	- having UC set
176  *	- possibly having PCC set (if source CPU)
177  */
178 #define	AUTHAMD_IS_GARTERR(bank, status) \
179 	((bank) == AMD_MCA_BANK_NB && \
180 	MCAX86_ERRCODE_ISTLB(MCAX86_ERRCODE(status)) && \
181 	MCAX86_ERRCODE_LL(MCAX86_ERRCODE(status)) == MCAX86_ERRCODE_LL_LG && \
182 	MCAX86_ERRCODE_TT(MCAX86_ERRCODE(status)) == MCAX86_ERRCODE_TT_GEN && \
183 	(status) & MSR_MC_STATUS_UC)
184 
185 static authamd_error_disp_t authamd_gart_disp = {
186 	FM_EREPORT_CPU_GENAMD,			/* use generic subclass */
187 	FM_EREPORT_CPU_GENADM_GARTTBLWLK,	/* use generic leafclass */
188 	0					/* no additional payload */
189 };
190 
191 
192 static struct authamd_chipshared *authamd_shared[AUTHAMD_MAX_CHIPS];
193 
194 static int
195 authamd_chip_once(authamd_data_t *authamd, enum authamd_cfgonce_bitnum what)
196 {
197 	return (atomic_set_long_excl(&authamd->amd_shared->acs_cfgonce,
198 	    what) == 0 ?  B_TRUE : B_FALSE);
199 }
200 
201 static void
202 authamd_pcicfg_write(uint_t chipid, uint_t func, uint_t reg, uint32_t val)
203 {
204 	ASSERT(chipid + 24 <= 31);
205 	ASSERT((func & 7) == func);
206 	ASSERT((reg & 3) == 0 && reg < 256);
207 
208 	cmi_pci_putl(0, chipid + 24, func, reg, 0, val);
209 }
210 
211 static uint32_t
212 authamd_pcicfg_read(uint_t chipid, uint_t func, uint_t reg)
213 {
214 	ASSERT(chipid + 24 <= 31);
215 	ASSERT((func & 7) == func);
216 	ASSERT((reg & 3) == 0 && reg < 256);
217 
218 	return (cmi_pci_getl(0, chipid + 24, func, reg, 0, 0));
219 }
220 
221 void
222 authamd_bankstatus_prewrite(cmi_hdl_t hdl, authamd_data_t *authamd)
223 {
224 	uint64_t hwcr;
225 
226 	if (cmi_hdl_rdmsr(hdl, MSR_AMD_HWCR, &hwcr) != CMI_SUCCESS)
227 		return;
228 
229 	authamd->amd_hwcr = hwcr;
230 
231 	if (!(hwcr & AMD_HWCR_MCI_STATUS_WREN)) {
232 		hwcr |= AMD_HWCR_MCI_STATUS_WREN;
233 		(void) cmi_hdl_wrmsr(hdl, MSR_AMD_HWCR, hwcr);
234 	}
235 }
236 
237 void
238 authamd_bankstatus_postwrite(cmi_hdl_t hdl, authamd_data_t *authamd)
239 {
240 	uint64_t hwcr = authamd->amd_hwcr;
241 
242 	if (!(hwcr & AMD_HWCR_MCI_STATUS_WREN)) {
243 		hwcr &= ~AMD_HWCR_MCI_STATUS_WREN;
244 		(void) cmi_hdl_wrmsr(hdl, MSR_AMD_HWCR, hwcr);
245 	}
246 }
247 
248 /*
249  * Read EccCnt repeatedly for all possible channel/chip-select combos:
250  *
251  *	- read sparectl register
252  *	- if EccErrCntWrEn is set, clear that bit in the just-read value
253  *	  and write it back to sparectl;  this *may* clobber the EccCnt
254  *	  for the channel/chip-select combination currently selected, so
255  *	  we leave this bit clear if we had to clear it
256  *	- cycle through all channel/chip-select combinations writing each
257  *	  combination to sparectl before reading the register back for
258  *	  EccCnt for that combination;  since EccErrCntWrEn is clear
259  *	  the writes to select what count to read will not themselves
260  *	  zero any counts
261  */
262 static int
263 authamd_read_ecccnt(authamd_data_t *authamd, struct authamd_logout *msl)
264 {
265 	union mcreg_sparectl sparectl;
266 	uint_t chipid = authamd->amd_shared->acs_chipid;
267 	uint_t family = authamd->amd_shared->acs_family;
268 	uint32_t rev = authamd->amd_shared->acs_rev;
269 	int chan, cs;
270 
271 	/*
272 	 * Check for feature support;  this macro will test down to the
273 	 * family revision number, whereafter we'll switch on family
274 	 * assuming that future revisions will use the same register
275 	 * format.
276 	 */
277 	if (!AUTHAMD_HAS_ONLINESPARECTL(rev)) {
278 		bzero(&msl->aal_eccerrcnt, sizeof (msl->aal_eccerrcnt));
279 		return (0);
280 	}
281 
282 	MCREG_VAL32(&sparectl) =
283 	    authamd_pcicfg_read(chipid, MC_FUNC_MISCCTL, MC_CTL_REG_SPARECTL);
284 
285 	switch (family) {
286 	case AUTHAMD_FAMILY_F:
287 		MCREG_FIELD_F_revFG(&sparectl, EccErrCntWrEn) = 0;
288 		break;
289 
290 	case AUTHAMD_FAMILY_10:
291 		MCREG_FIELD_10_revAB(&sparectl, EccErrCntWrEn) = 0;
292 		break;
293 	}
294 
295 	for (chan = 0; chan < AUTHAMD_DRAM_NCHANNEL; chan++) {
296 		switch (family) {
297 		case AUTHAMD_FAMILY_F:
298 			MCREG_FIELD_F_revFG(&sparectl, EccErrCntDramChan) =
299 			    chan;
300 			break;
301 
302 		case AUTHAMD_FAMILY_10:
303 			MCREG_FIELD_10_revAB(&sparectl, EccErrCntDramChan) =
304 			    chan;
305 			break;
306 		}
307 
308 		for (cs = 0; cs < AUTHAMD_DRAM_NCS; cs++) {
309 			switch (family) {
310 			case AUTHAMD_FAMILY_F:
311 				MCREG_FIELD_F_revFG(&sparectl,
312 				    EccErrCntDramCs) = cs;
313 				break;
314 
315 			case AUTHAMD_FAMILY_10:
316 				MCREG_FIELD_10_revAB(&sparectl,
317 				    EccErrCntDramCs) = cs;
318 				break;
319 			}
320 
321 			authamd_pcicfg_write(chipid, MC_FUNC_MISCCTL,
322 			    MC_CTL_REG_SPARECTL, MCREG_VAL32(&sparectl));
323 
324 			MCREG_VAL32(&sparectl) = authamd_pcicfg_read(chipid,
325 			    MC_FUNC_MISCCTL, MC_CTL_REG_SPARECTL);
326 
327 			switch (family) {
328 			case AUTHAMD_FAMILY_F:
329 				msl->aal_eccerrcnt[chan][cs] =
330 				    MCREG_FIELD_F_revFG(&sparectl, EccErrCnt);
331 				break;
332 			case AUTHAMD_FAMILY_10:
333 				msl->aal_eccerrcnt[chan][cs] =
334 				    MCREG_FIELD_10_revAB(&sparectl, EccErrCnt);
335 				break;
336 			}
337 		}
338 	}
339 
340 	return (1);
341 }
342 
343 /*
344  * Clear EccCnt for all possible channel/chip-select combos:
345  *
346  *	- set EccErrCntWrEn in sparectl, if necessary
347  *	- write 0 to EccCnt for all channel/chip-select combinations
348  *	- clear EccErrCntWrEn
349  *
350  * If requested also disable the interrupts taken on counter overflow
351  * and on swap done.
352  */
353 static void
354 authamd_clear_ecccnt(authamd_data_t *authamd, boolean_t clrint)
355 {
356 	union mcreg_sparectl sparectl;
357 	uint_t chipid = authamd->amd_shared->acs_chipid;
358 	uint_t family = authamd->amd_shared->acs_family;
359 	uint32_t rev = authamd->amd_shared->acs_rev;
360 	int chan, cs;
361 
362 	if (!AUTHAMD_HAS_ONLINESPARECTL(rev))
363 		return;
364 
365 	MCREG_VAL32(&sparectl) =
366 	    authamd_pcicfg_read(chipid, MC_FUNC_MISCCTL, MC_CTL_REG_SPARECTL);
367 
368 	switch (family) {
369 	case AUTHAMD_FAMILY_F:
370 		MCREG_FIELD_F_revFG(&sparectl, EccErrCntWrEn) = 1;
371 		if (clrint) {
372 			MCREG_FIELD_F_revFG(&sparectl, EccErrInt) = 0;
373 			MCREG_FIELD_F_revFG(&sparectl, SwapDoneInt) = 0;
374 		}
375 		break;
376 
377 	case AUTHAMD_FAMILY_10:
378 		MCREG_FIELD_10_revAB(&sparectl, EccErrCntWrEn) = 1;
379 		if (clrint) {
380 			MCREG_FIELD_10_revAB(&sparectl, EccErrInt) = 0;
381 			MCREG_FIELD_10_revAB(&sparectl, SwapDoneInt) = 0;
382 		}
383 		break;
384 	}
385 
386 	authamd_pcicfg_write(chipid, MC_FUNC_MISCCTL,
387 	    MC_CTL_REG_SPARECTL, MCREG_VAL32(&sparectl));
388 
389 	for (chan = 0; chan < AUTHAMD_DRAM_NCHANNEL; chan++) {
390 		switch (family) {
391 		case AUTHAMD_FAMILY_F:
392 			MCREG_FIELD_F_revFG(&sparectl, EccErrCntDramChan) =
393 			    chan;
394 			break;
395 
396 		case AUTHAMD_FAMILY_10:
397 			MCREG_FIELD_10_revAB(&sparectl, EccErrCntDramChan) =
398 			    chan;
399 			break;
400 		}
401 
402 		for (cs = 0; cs < AUTHAMD_DRAM_NCS; cs++) {
403 			switch (family) {
404 			case AUTHAMD_FAMILY_F:
405 				MCREG_FIELD_F_revFG(&sparectl,
406 				    EccErrCntDramCs) = cs;
407 				MCREG_FIELD_F_revFG(&sparectl,
408 				    EccErrCnt) = 0;
409 				break;
410 
411 			case AUTHAMD_FAMILY_10:
412 				MCREG_FIELD_10_revAB(&sparectl,
413 				    EccErrCntDramCs) = cs;
414 				MCREG_FIELD_10_revAB(&sparectl,
415 				    EccErrCnt) = 0;
416 				break;
417 			}
418 
419 			authamd_pcicfg_write(chipid, MC_FUNC_MISCCTL,
420 			    MC_CTL_REG_SPARECTL, MCREG_VAL32(&sparectl));
421 		}
422 	}
423 }
424 
425 /*
426  * cms_init entry point.
427  *
428  * This module provides broad model-specific support for AMD families
429  * 0x6, 0xf and 0x10.  Future families will have to be evaluated once their
430  * documentation is available.
431  */
432 int
433 authamd_init(cmi_hdl_t hdl, void **datap)
434 {
435 	uint_t chipid = cmi_hdl_chipid(hdl);
436 	struct authamd_chipshared *sp, *osp;
437 	uint_t family = cmi_hdl_family(hdl);
438 	authamd_data_t *authamd;
439 	uint64_t cap;
440 
441 	if (authamd_ms_support_disable || !AUTHAMD_SUPPORTED(family))
442 		return (ENOTSUP);
443 
444 	if (!(x86_feature & X86_MCA))
445 		return (ENOTSUP);
446 
447 	if (cmi_hdl_rdmsr(hdl, IA32_MSR_MCG_CAP, &cap) != CMI_SUCCESS)
448 		return (ENOTSUP);
449 
450 	if (!(cap & MCG_CAP_CTL_P))
451 		return (ENOTSUP);
452 
453 	authamd = *datap = kmem_zalloc(sizeof (authamd_data_t), KM_SLEEP);
454 	cmi_hdl_hold(hdl);	/* release in fini */
455 	authamd->amd_hdl = hdl;
456 
457 	if ((sp = authamd_shared[chipid]) == NULL) {
458 		sp = kmem_zalloc(sizeof (struct authamd_chipshared), KM_SLEEP);
459 		osp = atomic_cas_ptr(&authamd_shared[chipid], NULL, sp);
460 		if (osp != NULL) {
461 			kmem_free(sp, sizeof (struct authamd_chipshared));
462 			sp = osp;
463 		} else {
464 			sp->acs_chipid = chipid;
465 			sp->acs_family = family;
466 			sp->acs_rev = cmi_hdl_chiprev(hdl);
467 		}
468 	}
469 	authamd->amd_shared = sp;
470 
471 	return (0);
472 }
473 
474 /*
475  * cms_logout_size entry point.
476  */
477 /*ARGSUSED*/
478 size_t
479 authamd_logout_size(cmi_hdl_t hdl)
480 {
481 	return (sizeof (struct authamd_logout));
482 }
483 
484 /*
485  * cms_mcgctl_val entry point
486  *
487  * Instead of setting all bits to 1 we can set just those for the
488  * error detector banks known to exist.
489  */
490 /*ARGSUSED*/
491 uint64_t
492 authamd_mcgctl_val(cmi_hdl_t hdl, int nbanks, uint64_t proposed)
493 {
494 	return (nbanks < 64 ? (1ULL << nbanks) - 1 : proposed);
495 }
496 
497 /*
498  * cms_bankctl_skipinit entry point
499  *
500  * On K6 we do not initialize MC0_CTL since, reportedly, this bank (for DC)
501  * may produce spurious machine checks.
502  */
503 /*ARGSUSED*/
504 boolean_t
505 authamd_bankctl_skipinit(cmi_hdl_t hdl, int bank)
506 {
507 	authamd_data_t *authamd = cms_hdl_getcmsdata(hdl);
508 
509 	return (authamd->amd_shared->acs_family == AUTHAMD_FAMILY_6 &&
510 	    bank == 0 ?  B_TRUE : B_FALSE);
511 }
512 
513 /*
514  * cms_bankctl_val entry point
515  */
516 uint64_t
517 authamd_bankctl_val(cmi_hdl_t hdl, int bank, uint64_t proposed)
518 {
519 	authamd_data_t *authamd = cms_hdl_getcmsdata(hdl);
520 	uint32_t rev = authamd->amd_shared->acs_rev;
521 	uint64_t val = proposed;
522 
523 	/*
524 	 * The Intel MCA says we can write all 1's to enable #MC for
525 	 * all errors, and AMD docs say much the same.  But, depending
526 	 * perhaps on other config registers, taking machine checks
527 	 * for some errors such as GART TLB errors and master/target
528 	 * aborts may be bad - they set UC and sometime also PCC, but
529 	 * we should not always panic for these error types.
530 	 *
531 	 * Our cms_error_action entry point can suppress such panics,
532 	 * however we can also use the cms_bankctl_val entry point to
533 	 * veto enabling of some of the known villains in the first place.
534 	 */
535 	if (bank == AMD_MCA_BANK_NB && AUTHAMD_NOGARTTBLWLK_MC(rev))
536 		val &= ~AMD_NB_EN_GARTTBLWK;
537 
538 	return (val);
539 }
540 
541 /*
542  * cms_mca_init entry point.
543  */
544 /*ARGSUSED*/
545 void
546 authamd_mca_init(cmi_hdl_t hdl, int nbanks)
547 {
548 	authamd_data_t *authamd = cms_hdl_getcmsdata(hdl);
549 	uint32_t rev = authamd->amd_shared->acs_rev;
550 
551 	/*
552 	 * On chips with a NB online spare control register take control
553 	 * and clear ECC counts.
554 	 */
555 	if (AUTHAMD_HAS_ONLINESPARECTL(rev) &&
556 	    authamd_chip_once(authamd, AUTHAMD_CFGONCE_ONLNSPRCFG)) {
557 		authamd_clear_ecccnt(authamd, B_TRUE);
558 	}
559 
560 	/*
561 	 * And since we are claiming the telemetry stop the BIOS receiving
562 	 * an SMI on NB threshold overflow.
563 	 */
564 	if (AUTHAMD_NBMISC_NUM(rev) &&
565 	    authamd_chip_once(authamd, AUTHAMD_CFGONCE_NBTHRESH)) {
566 		union mcmsr_nbmisc nbm;
567 		int i;
568 
569 		authamd_bankstatus_prewrite(hdl, authamd);
570 
571 		for (i = 0; i < AUTHAMD_NBMISC_NUM(rev); i++) {
572 			if (cmi_hdl_rdmsr(hdl, MC_MSR_NB_MISC(i),
573 			    (uint64_t *)&nbm) != CMI_SUCCESS)
574 				continue;
575 
576 			if (X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_F_REV_F) &&
577 			    MCMSR_FIELD_F_revFG(&nbm, mcmisc_Valid) &&
578 			    MCMSR_FIELD_F_revFG(&nbm, mcmisc_CntP)) {
579 				MCMSR_FIELD_F_revFG(&nbm, mcmisc_IntType) = 0;
580 			} else if (X86_CHIPREV_ATLEAST(rev,
581 			    X86_CHIPREV_AMD_10_REV_A) &&
582 			    MCMSR_FIELD_10_revAB(&nbm, mcmisc_Valid) &&
583 			    MCMSR_FIELD_10_revAB(&nbm, mcmisc_CntP)) {
584 				MCMSR_FIELD_10_revAB(&nbm, mcmisc_IntType) = 0;
585 			}
586 
587 			(void) cmi_hdl_wrmsr(hdl, MC_MSR_NB_MISC(i),
588 			    MCMSR_VAL(&nbm));
589 		}
590 
591 		authamd_bankstatus_postwrite(hdl, authamd);
592 	}
593 }
594 
595 /*
596  * cms_bank_logout entry point.
597  */
598 /*ARGSUSED*/
599 void
600 authamd_bank_logout(cmi_hdl_t hdl, int bank, uint64_t status,
601     uint64_t addr, uint64_t misc, void *mslogout)
602 {
603 	authamd_data_t *authamd = cms_hdl_getcmsdata(hdl);
604 	struct authamd_logout *msl = mslogout;
605 	uint32_t rev = authamd->amd_shared->acs_rev;
606 
607 	if (msl == NULL)
608 		return;
609 
610 	/*
611 	 * For main memory ECC errors on revisions with an Online Spare
612 	 * Control Register grab the ECC counts by channel and chip-select
613 	 * and reset them to 0.
614 	 */
615 	if (AUTHAMD_MEMECC_RECOGNISED(rev) &&
616 	    AUTHAMD_IS_MEMECCERR(bank, status) &&
617 	    AUTHAMD_HAS_ONLINESPARECTL(rev)) {
618 		if (authamd_read_ecccnt(authamd, msl))
619 			authamd_clear_ecccnt(authamd, B_FALSE);
620 	}
621 }
622 
623 /*
624  * cms_error_action entry point
625  */
626 
627 int authamd_forgive_uc = 0;	/* For test/debug only */
628 int authamd_forgive_pcc = 0;	/* For test/debug only */
629 int authamd_fake_poison = 0;	/* For test/debug only */
630 
631 /*ARGSUSED*/
632 uint32_t
633 authamd_error_action(cmi_hdl_t hdl, int ismc, int bank,
634     uint64_t status, uint64_t addr, uint64_t misc, void *mslogout)
635 {
636 	authamd_error_disp_t *disp;
637 	uint32_t rv = 0;
638 
639 	if (authamd_forgive_uc)
640 		rv |= CMS_ERRSCOPE_CLEARED_UC;
641 
642 	if (authamd_forgive_pcc)
643 		rv |= CMS_ERRSCOPE_CURCONTEXT_OK;
644 
645 	if (authamd_fake_poison && status & MSR_MC_STATUS_UC)
646 		rv |= CMS_ERRSCOPE_POISONED;
647 
648 	if (rv)
649 		return (rv);
650 
651 	disp = authamd_disp_match(hdl, bank, status, addr, misc, mslogout);
652 
653 	if (disp == &authamd_gart_disp) {
654 		/*
655 		 * GART walk errors set UC and possibly PCC (if source CPU)
656 		 * but should not be regarded as terminal.
657 		 */
658 		return (CMS_ERRSCOPE_IGNORE_ERR);
659 	}
660 
661 	/*
662 	 * May also want to consider master abort and target abort.  These
663 	 * also set UC and PCC (if src CPU) but the requester gets -1
664 	 * and I believe the IO stuff in Solaris will handle that.
665 	 */
666 
667 	return (rv);
668 }
669 
670 /*
671  * cms_disp_match entry point
672  */
673 /*ARGSUSED*/
674 cms_cookie_t
675 authamd_disp_match(cmi_hdl_t hdl, int bank, uint64_t status,
676     uint64_t addr, uint64_t misc, void *mslogout)
677 {
678 	authamd_data_t *authamd = cms_hdl_getcmsdata(hdl);
679 	/* uint16_t errcode = MCAX86_ERRCODE(status); */
680 	uint16_t exterrcode = AMD_EXT_ERRCODE(status);
681 	uint32_t rev = authamd->amd_shared->acs_rev;
682 
683 	/*
684 	 * Recognise main memory ECC errors
685 	 */
686 	if (AUTHAMD_MEMECC_RECOGNISED(rev) &&
687 	    AUTHAMD_IS_MEMECCERR(bank, status)) {
688 		if (status & AMD_BANK_STAT_CECC) {
689 			return (exterrcode == 0 ? &authamd_memce_disp :
690 			    &authamd_ckmemce_disp);
691 		} else if (status & AMD_BANK_STAT_UECC) {
692 			return (exterrcode == 0 ? &authamd_memue_disp :
693 			    &authamd_ckmemue_disp);
694 		}
695 	}
696 
697 	/*
698 	 * Recognise GART walk errors
699 	 */
700 	if (AUTHAMD_NOGARTTBLWLK_MC(rev) && AUTHAMD_IS_GARTERR(bank, status))
701 		return (&authamd_gart_disp);
702 
703 	return (NULL);
704 }
705 
706 /*
707  * cms_ereport_class entry point
708  */
709 /*ARGSUSED*/
710 void
711 authamd_ereport_class(cmi_hdl_t hdl, cms_cookie_t mscookie,
712     const char **cpuclsp, const char **leafclsp)
713 {
714 	const authamd_error_disp_t *aed = mscookie;
715 
716 	if (aed == NULL)
717 		return;
718 
719 	if (aed->aad_subclass != NULL)
720 		*cpuclsp = aed->aad_subclass;
721 	if (aed->aad_leafclass != NULL)
722 		*leafclsp = aed->aad_leafclass;
723 }
724 
725 /*ARGSUSED*/
726 static void
727 authamd_ereport_add_resource(cmi_hdl_t hdl, authamd_data_t *authamd,
728     nvlist_t *ereport, nv_alloc_t *nva, void *mslogout)
729 {
730 	nvlist_t *elems[AUTHAMD_DRAM_NCHANNEL * AUTHAMD_DRAM_NCS];
731 	uint8_t counts[AUTHAMD_DRAM_NCHANNEL * AUTHAMD_DRAM_NCS];
732 	authamd_logout_t *msl;
733 	nvlist_t *nvl;
734 	int nelems = 0;
735 	int i, chan, cs;
736 
737 	if ((msl = mslogout) == NULL)
738 		return;
739 
740 	for (chan = 0; chan < AUTHAMD_DRAM_NCHANNEL; chan++) {
741 		for (cs = 0; cs < AUTHAMD_DRAM_NCS; cs++) {
742 			if (msl->aal_eccerrcnt[chan][cs] == 0)
743 				continue;
744 
745 			if ((nvl = fm_nvlist_create(nva)) == NULL)
746 				continue;
747 
748 			elems[nelems] = nvl;
749 			counts[nelems++] = msl->aal_eccerrcnt[chan][cs];
750 
751 			fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, NULL, 5,
752 			    "motherboard", 0,
753 			    "chip", authamd->amd_shared->acs_chipid,
754 			    "memory-controller", 0,
755 			    "dram-channel", chan,
756 			    "chip-select", cs);
757 		}
758 	}
759 
760 	if (nelems == 0)
761 		return;
762 
763 	fm_payload_set(ereport, FM_EREPORT_GENAMD_PAYLOAD_NAME_RESOURCE,
764 	    DATA_TYPE_NVLIST_ARRAY, nelems, elems,
765 	    NULL);
766 
767 	fm_payload_set(ereport, FM_EREPORT_GENAMD_PAYLOAD_NAME_RESOURCECNT,
768 	    DATA_TYPE_UINT8_ARRAY, nelems, &counts[0],
769 	    NULL);
770 
771 	for (i = 0; i < nelems; i++)
772 		fm_nvlist_destroy(elems[i], nva ? FM_NVA_RETAIN : FM_NVA_FREE);
773 }
774 
775 /*
776  * cms_ereport_add_logout entry point
777  */
778 /*ARGSUSED*/
779 void
780 authamd_ereport_add_logout(cmi_hdl_t hdl, nvlist_t *ereport, nv_alloc_t *nva,
781     int bank, uint64_t status, uint64_t addr, uint64_t misc,
782     void *mslogout, cms_cookie_t mscookie)
783 {
784 	authamd_data_t *authamd = cms_hdl_getcmsdata(hdl);
785 	const authamd_error_disp_t *aed = mscookie;
786 	uint64_t members;
787 
788 	if (aed == NULL)
789 		return;
790 
791 	members = aed->aad_ereport_members;
792 
793 	if (members & FM_EREPORT_GENAMD_PAYLOAD_FLAG_SYND) {
794 		fm_payload_set(ereport, FM_EREPORT_GENAMD_PAYLOAD_NAME_SYND,
795 		    DATA_TYPE_UINT16, (uint16_t)AMD_BANK_SYND(status),
796 		    NULL);
797 
798 		if (members & FM_EREPORT_GENAMD_PAYLOAD_FLAG_SYNDTYPE) {
799 			fm_payload_set(ereport,
800 			    FM_EREPORT_GENAMD_PAYLOAD_NAME_SYNDTYPE,
801 			    DATA_TYPE_STRING, "E",
802 			    NULL);
803 		}
804 	}
805 
806 	if (members & FM_EREPORT_GENAMD_PAYLOAD_FLAG_CKSYND) {
807 		fm_payload_set(ereport, FM_EREPORT_GENAMD_PAYLOAD_NAME_CKSYND,
808 		    DATA_TYPE_UINT16, (uint16_t)AMD_NB_STAT_CKSYND(status),
809 		    NULL);
810 
811 		if (members & FM_EREPORT_GENAMD_PAYLOAD_FLAG_SYNDTYPE) {
812 			fm_payload_set(ereport,
813 			    FM_EREPORT_GENAMD_PAYLOAD_NAME_SYNDTYPE,
814 			    DATA_TYPE_STRING, "C",
815 			    NULL);
816 		}
817 	}
818 
819 	if (members & FM_EREPORT_GENAMD_PAYLOAD_FLAG_RESOURCE &&
820 	    status & MSR_MC_STATUS_ADDRV) {
821 		authamd_ereport_add_resource(hdl, authamd, ereport, nva,
822 		    mslogout);
823 	}
824 }
825 
826 /*
827  * cms_msrinject entry point
828  */
829 cms_errno_t
830 authamd_msrinject(cmi_hdl_t hdl, uint_t msr, uint64_t val)
831 {
832 	authamd_data_t *authamd = cms_hdl_getcmsdata(hdl);
833 	cms_errno_t rv = CMSERR_BADMSRWRITE;
834 
835 	authamd_bankstatus_prewrite(hdl, authamd);
836 	if (cmi_hdl_wrmsr(hdl, msr, val) == CMI_SUCCESS)
837 		rv = CMS_SUCCESS;
838 	authamd_bankstatus_postwrite(hdl, authamd);
839 
840 	return (rv);
841 }
842 
843 cms_api_ver_t _cms_api_version = CMS_API_VERSION_0;
844 
845 const cms_ops_t _cms_ops = {
846 	authamd_init,			/* cms_init */
847 	NULL,				/* cms_post_startup */
848 	NULL,				/* cms_post_mpstartup */
849 	authamd_logout_size,		/* cms_logout_size */
850 	authamd_mcgctl_val,		/* cms_mcgctl_val */
851 	authamd_bankctl_skipinit,	/* cms_bankctl_skipinit */
852 	authamd_bankctl_val,		/* cms_bankctl_val */
853 	NULL,				/* cms_bankstatus_skipinit */
854 	NULL,				/* cms_bankstatus_val */
855 	authamd_mca_init,		/* cms_mca_init */
856 	NULL,				/* cms_poll_ownermask */
857 	authamd_bank_logout,		/* cms_bank_logout */
858 	authamd_error_action,		/* cms_error_action */
859 	authamd_disp_match,		/* cms_disp_match */
860 	authamd_ereport_class,		/* cms_ereport_class */
861 	NULL,				/* cms_ereport_detector */
862 	NULL,				/* cms_ereport_includestack */
863 	authamd_ereport_add_logout,	/* cms_ereport_add_logout */
864 	authamd_msrinject,		/* cms_msrinject */
865 	NULL,				/* cms_fini */
866 };
867 
868 static struct modlcpu modlcpu = {
869 	&mod_cpuops,
870 	"Generic AMD model-specific MCA"
871 };
872 
873 static struct modlinkage modlinkage = {
874 	MODREV_1,
875 	(void *)&modlcpu,
876 	NULL
877 };
878 
879 int
880 _init(void)
881 {
882 	return (mod_install(&modlinkage));
883 }
884 
885 int
886 _info(struct modinfo *modinfop)
887 {
888 	return (mod_info(&modlinkage, modinfop));
889 }
890 
891 int
892 _fini(void)
893 {
894 	return (mod_remove(&modlinkage));
895 }
896