xref: /titanic_52/usr/src/uts/i86pc/os/cmi_hw.c (revision 7f0b8309074a5d8e9f9d8ffe7aad7bb0b1ee6b1f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * CPU Module Interface - hardware abstraction.
29  */
30 
31 #include <sys/types.h>
32 #include <sys/cpu_module.h>
33 #include <sys/kmem.h>
34 #include <sys/x86_archext.h>
35 #include <sys/cpuvar.h>
36 #include <sys/ksynch.h>
37 #include <sys/x_call.h>
38 #include <sys/pghw.h>
39 #include <sys/pci_cfgspace.h>
40 #include <sys/archsystm.h>
41 #include <sys/ontrap.h>
42 #include <sys/controlregs.h>
43 #include <sys/sunddi.h>
44 #include <sys/trap.h>
45 #include <sys/mca_x86.h>
46 #include <sys/processor.h>
47 
48 #ifdef __xpv
49 #include <sys/hypervisor.h>
50 #endif
51 
52 /*
53  * Outside of this file consumers use the opaque cmi_hdl_t.  This
54  * definition is duplicated in the generic_cpu mdb module, so keep
55  * them in-sync when making changes.
56  */
57 typedef struct cmi_hdl_impl {
58 	enum cmi_hdl_class cmih_class;		/* Handle nature */
59 	const struct cmi_hdl_ops *cmih_ops;	/* Operations vector */
60 	uint_t cmih_chipid;			/* Chipid of cpu resource */
61 	uint_t cmih_coreid;			/* Core within die */
62 	uint_t cmih_strandid;			/* Thread within core */
63 	boolean_t cmih_mstrand;			/* cores are multithreaded */
64 	volatile uint32_t *cmih_refcntp;	/* Reference count pointer */
65 	uint64_t cmih_msrsrc;			/* MSR data source flags */
66 	void *cmih_hdlpriv;			/* cmi_hw.c private data */
67 	void *cmih_spec;			/* cmi_hdl_{set,get}_specific */
68 	void *cmih_cmi;				/* cpu mod control structure */
69 	void *cmih_cmidata;			/* cpu mod private data */
70 	const struct cmi_mc_ops *cmih_mcops;	/* Memory-controller ops */
71 	void *cmih_mcdata;			/* Memory-controller data */
72 	uint64_t cmih_flags;			/* See CMIH_F_* below */
73 } cmi_hdl_impl_t;
74 
75 #define	IMPLHDL(ophdl)	((cmi_hdl_impl_t *)ophdl)
76 #define	HDLOPS(hdl)	((hdl)->cmih_ops)
77 
78 #define	CMIH_F_INJACTV		0x1ULL
79 
80 /*
81  * Ops structure for handle operations.
82  */
83 struct cmi_hdl_ops {
84 	/*
85 	 * These ops are required in an implementation.
86 	 */
87 	uint_t (*cmio_vendor)(cmi_hdl_impl_t *);
88 	const char *(*cmio_vendorstr)(cmi_hdl_impl_t *);
89 	uint_t (*cmio_family)(cmi_hdl_impl_t *);
90 	uint_t (*cmio_model)(cmi_hdl_impl_t *);
91 	uint_t (*cmio_stepping)(cmi_hdl_impl_t *);
92 	uint_t (*cmio_chipid)(cmi_hdl_impl_t *);
93 	uint_t (*cmio_coreid)(cmi_hdl_impl_t *);
94 	uint_t (*cmio_strandid)(cmi_hdl_impl_t *);
95 	uint32_t (*cmio_chiprev)(cmi_hdl_impl_t *);
96 	const char *(*cmio_chiprevstr)(cmi_hdl_impl_t *);
97 	uint32_t (*cmio_getsockettype)(cmi_hdl_impl_t *);
98 	id_t (*cmio_logical_id)(cmi_hdl_impl_t *);
99 	/*
100 	 * These ops are optional in an implementation.
101 	 */
102 	ulong_t (*cmio_getcr4)(cmi_hdl_impl_t *);
103 	void (*cmio_setcr4)(cmi_hdl_impl_t *, ulong_t);
104 	cmi_errno_t (*cmio_rdmsr)(cmi_hdl_impl_t *, uint_t, uint64_t *);
105 	cmi_errno_t (*cmio_wrmsr)(cmi_hdl_impl_t *, uint_t, uint64_t);
106 	cmi_errno_t (*cmio_msrinterpose)(cmi_hdl_impl_t *, uint_t, uint64_t);
107 	void (*cmio_int)(cmi_hdl_impl_t *, int);
108 	int (*cmio_online)(cmi_hdl_impl_t *, int, int *);
109 };
110 
111 static const struct cmi_hdl_ops cmi_hdl_ops;
112 
113 /*
114  * Handles are looked up from contexts such as polling, injection etc
115  * where the context is reasonably well defined (although a poller could
116  * interrupt any old thread holding any old lock).  They are also looked
117  * up by machine check handlers, which may strike at inconvenient times
118  * such as during handle initialization or destruction or during handle
119  * lookup (which the #MC handler itself will also have to perform).
120  *
121  * So keeping handles in a linked list makes locking difficult when we
122  * consider #MC handlers.  Our solution is to have an array indexed
123  * by that which uniquely identifies a handle - chip/core/strand id -
124  * with each array member a structure including a pointer to a handle
125  * structure for the resource, and a reference count for the handle.
126  * Reference counts are modified atomically.  The public cmi_hdl_hold
127  * always succeeds because this can only be used after handle creation
128  * and before the call to destruct, so the hold count is already at least one.
129  * In other functions that lookup a handle (cmi_hdl_lookup, cmi_hdl_any)
130  * we must be certain that the count has not already decrmented to zero
131  * before applying our hold.
132  *
133  * This array is allocated when first we want to populate an entry.
134  * When allocated it is maximal - ideally we should scale to the
135  * actual number of chips, cores per chip and strand per core but
136  * that info is not readily available if we are virtualized so
137  * for now we stick with the dumb approach.
138  */
139 #define	CMI_MAX_CHIPS_NBITS		4	/* 16 chips packages max */
140 #define	CMI_MAX_CORES_PER_CHIP_NBITS	3	/* 8 cores per chip max */
141 #define	CMI_MAX_STRANDS_PER_CORE_NBITS	1	/* 2 strands per core max */
142 
143 #define	CMI_MAX_CHIPS			(1 << CMI_MAX_CHIPS_NBITS)
144 #define	CMI_MAX_CORES_PER_CHIP		(1 << CMI_MAX_CORES_PER_CHIP_NBITS)
145 #define	CMI_MAX_STRANDS_PER_CORE	(1 << CMI_MAX_STRANDS_PER_CORE_NBITS)
146 
147 /*
148  * Handle array indexing.
149  *	[7:4] = Chip package.
150  *	[3:1] = Core in package,
151  *	[0:0] = Strand in core,
152  */
153 #define	CMI_HDL_ARR_IDX_CHIP(chipid) \
154 	(((chipid) & (CMI_MAX_CHIPS - 1)) << \
155 	(CMI_MAX_STRANDS_PER_CORE_NBITS + CMI_MAX_CORES_PER_CHIP_NBITS))
156 
157 #define	CMI_HDL_ARR_IDX_CORE(coreid) \
158 	(((coreid) & (CMI_MAX_CORES_PER_CHIP - 1)) << \
159 	CMI_MAX_STRANDS_PER_CORE_NBITS)
160 
161 #define	CMI_HDL_ARR_IDX_STRAND(strandid) \
162 	(((strandid) & (CMI_MAX_STRANDS_PER_CORE - 1)))
163 
164 #define	CMI_HDL_ARR_IDX(chipid, coreid, strandid) \
165 	(CMI_HDL_ARR_IDX_CHIP(chipid) | CMI_HDL_ARR_IDX_CORE(coreid) | \
166 	CMI_HDL_ARR_IDX_STRAND(strandid))
167 
168 #define	CMI_HDL_ARR_SZ (CMI_MAX_CHIPS * CMI_MAX_CORES_PER_CHIP * \
169     CMI_MAX_STRANDS_PER_CORE)
170 
171 struct cmi_hdl_arr_ent {
172 	volatile uint32_t cmae_refcnt;
173 	cmi_hdl_impl_t *cmae_hdlp;
174 };
175 
176 static struct cmi_hdl_arr_ent *cmi_hdl_arr;
177 
178 /*
179  * Controls where we will source PCI config space data.
180  */
181 #define	CMI_PCICFG_FLAG_RD_HWOK		0x0001
182 #define	CMI_PCICFG_FLAG_RD_INTERPOSEOK	0X0002
183 #define	CMI_PCICFG_FLAG_WR_HWOK		0x0004
184 #define	CMI_PCICFG_FLAG_WR_INTERPOSEOK	0X0008
185 
186 static uint64_t cmi_pcicfg_flags =
187     CMI_PCICFG_FLAG_RD_HWOK | CMI_PCICFG_FLAG_RD_INTERPOSEOK |
188     CMI_PCICFG_FLAG_WR_HWOK | CMI_PCICFG_FLAG_WR_INTERPOSEOK;
189 
190 /*
191  * The flags for individual cpus are kept in their per-cpu handle cmih_msrsrc
192  */
193 #define	CMI_MSR_FLAG_RD_HWOK		0x0001
194 #define	CMI_MSR_FLAG_RD_INTERPOSEOK	0x0002
195 #define	CMI_MSR_FLAG_WR_HWOK		0x0004
196 #define	CMI_MSR_FLAG_WR_INTERPOSEOK	0x0008
197 
198 int cmi_call_func_ntv_tries = 3;
199 
200 static cmi_errno_t
201 call_func_ntv(int cpuid, xc_func_t func, xc_arg_t arg1, xc_arg_t arg2)
202 {
203 	cmi_errno_t rc = -1;
204 	int i;
205 
206 	kpreempt_disable();
207 
208 	if (CPU->cpu_id == cpuid) {
209 		(*func)(arg1, arg2, (xc_arg_t)&rc);
210 	} else {
211 		/*
212 		 * This should not happen for a #MC trap or a poll, so
213 		 * this is likely an error injection or similar.
214 		 * We will try to cross call with xc_trycall - we
215 		 * can't guarantee success with xc_call because
216 		 * the interrupt code in the case of a #MC may
217 		 * already hold the xc mutex.
218 		 */
219 		for (i = 0; i < cmi_call_func_ntv_tries; i++) {
220 			cpuset_t cpus;
221 
222 			CPUSET_ONLY(cpus, cpuid);
223 			xc_trycall(arg1, arg2, (xc_arg_t)&rc, cpus, func);
224 			if (rc != -1)
225 				break;
226 
227 			DELAY(1);
228 		}
229 	}
230 
231 	kpreempt_enable();
232 
233 	return (rc != -1 ? rc : CMIERR_DEADLOCK);
234 }
235 
236 static uint64_t injcnt;
237 
238 void
239 cmi_hdl_inj_begin(cmi_hdl_t ophdl)
240 {
241 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
242 
243 	if (hdl != NULL)
244 		hdl->cmih_flags |= CMIH_F_INJACTV;
245 	if (injcnt++ == 0) {
246 		cmn_err(CE_NOTE, "Hardware error injection/simulation "
247 		    "activity noted");
248 	}
249 }
250 
251 void
252 cmi_hdl_inj_end(cmi_hdl_t ophdl)
253 {
254 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
255 
256 	ASSERT(hdl == NULL || hdl->cmih_flags & CMIH_F_INJACTV);
257 	if (hdl != NULL)
258 		hdl->cmih_flags &= ~CMIH_F_INJACTV;
259 }
260 
261 boolean_t
262 cmi_inj_tainted(void)
263 {
264 	return (injcnt != 0 ? B_TRUE : B_FALSE);
265 }
266 
267 /*
268  *	 =======================================================
269  *	|	MSR Interposition				|
270  *	|	-----------------				|
271  *	|							|
272  *	 -------------------------------------------------------
273  */
274 
275 #define	CMI_MSRI_HASHSZ		16
276 #define	CMI_MSRI_HASHIDX(hdl, msr) \
277 	(((uintptr_t)(hdl) >> 3 + (msr)) % (CMI_MSRI_HASHSZ - 1))
278 
279 struct cmi_msri_bkt {
280 	kmutex_t msrib_lock;
281 	struct cmi_msri_hashent *msrib_head;
282 };
283 
284 struct cmi_msri_hashent {
285 	struct cmi_msri_hashent *msrie_next;
286 	struct cmi_msri_hashent *msrie_prev;
287 	cmi_hdl_impl_t *msrie_hdl;
288 	uint_t msrie_msrnum;
289 	uint64_t msrie_msrval;
290 };
291 
292 #define	CMI_MSRI_MATCH(ent, hdl, req_msr) \
293 	((ent)->msrie_hdl == (hdl) && (ent)->msrie_msrnum == (req_msr))
294 
295 static struct cmi_msri_bkt msrihash[CMI_MSRI_HASHSZ];
296 
297 static void
298 msri_addent(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t val)
299 {
300 	int idx = CMI_MSRI_HASHIDX(hdl, msr);
301 	struct cmi_msri_bkt *hbp = &msrihash[idx];
302 	struct cmi_msri_hashent *hep;
303 
304 	mutex_enter(&hbp->msrib_lock);
305 
306 	for (hep = hbp->msrib_head; hep != NULL; hep = hep->msrie_next) {
307 		if (CMI_MSRI_MATCH(hep, hdl, msr))
308 			break;
309 	}
310 
311 	if (hep != NULL) {
312 		hep->msrie_msrval = val;
313 	} else {
314 		hep = kmem_alloc(sizeof (*hep), KM_SLEEP);
315 		hep->msrie_hdl = hdl;
316 		hep->msrie_msrnum = msr;
317 		hep->msrie_msrval = val;
318 
319 		if (hbp->msrib_head != NULL)
320 			hbp->msrib_head->msrie_prev = hep;
321 		hep->msrie_next = hbp->msrib_head;
322 		hep->msrie_prev = NULL;
323 		hbp->msrib_head = hep;
324 	}
325 
326 	mutex_exit(&hbp->msrib_lock);
327 }
328 
329 /*
330  * Look for a match for the given hanlde and msr.  Return 1 with valp
331  * filled if a match is found, otherwise return 0 with valp untouched.
332  */
333 static int
334 msri_lookup(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t *valp)
335 {
336 	int idx = CMI_MSRI_HASHIDX(hdl, msr);
337 	struct cmi_msri_bkt *hbp = &msrihash[idx];
338 	struct cmi_msri_hashent *hep;
339 
340 	/*
341 	 * This function is called during #MC trap handling, so we should
342 	 * consider the possibility that the hash mutex is held by the
343 	 * interrupted thread.  This should not happen because interposition
344 	 * is an artificial injection mechanism and the #MC is requested
345 	 * after adding entries, but just in case of a real #MC at an
346 	 * unlucky moment we'll use mutex_tryenter here.
347 	 */
348 	if (!mutex_tryenter(&hbp->msrib_lock))
349 		return (0);
350 
351 	for (hep = hbp->msrib_head; hep != NULL; hep = hep->msrie_next) {
352 		if (CMI_MSRI_MATCH(hep, hdl, msr)) {
353 			*valp = hep->msrie_msrval;
354 			break;
355 		}
356 	}
357 
358 	mutex_exit(&hbp->msrib_lock);
359 
360 	return (hep != NULL);
361 }
362 
363 /*
364  * Remove any interposed value that matches.
365  */
366 static void
367 msri_rment(cmi_hdl_impl_t *hdl, uint_t msr)
368 {
369 
370 	int idx = CMI_MSRI_HASHIDX(hdl, msr);
371 	struct cmi_msri_bkt *hbp = &msrihash[idx];
372 	struct cmi_msri_hashent *hep;
373 
374 	if (!mutex_tryenter(&hbp->msrib_lock))
375 		return;
376 
377 	for (hep = hbp->msrib_head; hep != NULL; hep = hep->msrie_next) {
378 		if (CMI_MSRI_MATCH(hep, hdl, msr)) {
379 			if (hep->msrie_prev != NULL)
380 				hep->msrie_prev->msrie_next = hep->msrie_next;
381 
382 			if (hep->msrie_next != NULL)
383 				hep->msrie_next->msrie_prev = hep->msrie_prev;
384 
385 			if (hbp->msrib_head == hep)
386 				hbp->msrib_head = hep->msrie_next;
387 
388 			kmem_free(hep, sizeof (*hep));
389 			break;
390 		}
391 	}
392 
393 	mutex_exit(&hbp->msrib_lock);
394 }
395 
396 /*
397  *	 =======================================================
398  *	|	PCI Config Space Interposition			|
399  *	|	------------------------------			|
400  *	|							|
401  *	 -------------------------------------------------------
402  */
403 
404 /*
405  * Hash for interposed PCI config space values.  We lookup on bus/dev/fun/offset
406  * and then record whether the value stashed was made with a byte, word or
407  * doubleword access;  we will only return a hit for an access of the
408  * same size.  If you access say a 32-bit register using byte accesses
409  * and then attempt to read the full 32-bit value back you will not obtain
410  * any sort of merged result - you get a lookup miss.
411  */
412 
413 #define	CMI_PCII_HASHSZ		16
414 #define	CMI_PCII_HASHIDX(b, d, f, o) \
415 	(((b) + (d) + (f) + (o)) % (CMI_PCII_HASHSZ - 1))
416 
417 struct cmi_pcii_bkt {
418 	kmutex_t pciib_lock;
419 	struct cmi_pcii_hashent *pciib_head;
420 };
421 
422 struct cmi_pcii_hashent {
423 	struct cmi_pcii_hashent *pcii_next;
424 	struct cmi_pcii_hashent *pcii_prev;
425 	int pcii_bus;
426 	int pcii_dev;
427 	int pcii_func;
428 	int pcii_reg;
429 	int pcii_asize;
430 	uint32_t pcii_val;
431 };
432 
433 #define	CMI_PCII_MATCH(ent, b, d, f, r, asz) \
434 	((ent)->pcii_bus == (b) && (ent)->pcii_dev == (d) && \
435 	(ent)->pcii_func == (f) && (ent)->pcii_reg == (r) && \
436 	(ent)->pcii_asize == (asz))
437 
438 static struct cmi_pcii_bkt pciihash[CMI_PCII_HASHSZ];
439 
440 
441 /*
442  * Add a new entry to the PCI interpose hash, overwriting any existing
443  * entry that is found.
444  */
445 static void
446 pcii_addent(int bus, int dev, int func, int reg, uint32_t val, int asz)
447 {
448 	int idx = CMI_PCII_HASHIDX(bus, dev, func, reg);
449 	struct cmi_pcii_bkt *hbp = &pciihash[idx];
450 	struct cmi_pcii_hashent *hep;
451 
452 	cmi_hdl_inj_begin(NULL);
453 
454 	mutex_enter(&hbp->pciib_lock);
455 
456 	for (hep = hbp->pciib_head; hep != NULL; hep = hep->pcii_next) {
457 		if (CMI_PCII_MATCH(hep, bus, dev, func, reg, asz))
458 			break;
459 	}
460 
461 	if (hep != NULL) {
462 		hep->pcii_val = val;
463 	} else {
464 		hep = kmem_alloc(sizeof (*hep), KM_SLEEP);
465 		hep->pcii_bus = bus;
466 		hep->pcii_dev = dev;
467 		hep->pcii_func = func;
468 		hep->pcii_reg = reg;
469 		hep->pcii_asize = asz;
470 		hep->pcii_val = val;
471 
472 		if (hbp->pciib_head != NULL)
473 			hbp->pciib_head->pcii_prev = hep;
474 		hep->pcii_next = hbp->pciib_head;
475 		hep->pcii_prev = NULL;
476 		hbp->pciib_head = hep;
477 	}
478 
479 	mutex_exit(&hbp->pciib_lock);
480 
481 	cmi_hdl_inj_end(NULL);
482 }
483 
484 /*
485  * Look for a match for the given bus/dev/func/reg; return 1 with valp
486  * filled if a match is found, otherwise return 0 with valp untouched.
487  */
488 static int
489 pcii_lookup(int bus, int dev, int func, int reg, int asz, uint32_t *valp)
490 {
491 	int idx = CMI_PCII_HASHIDX(bus, dev, func, reg);
492 	struct cmi_pcii_bkt *hbp = &pciihash[idx];
493 	struct cmi_pcii_hashent *hep;
494 
495 	if (!mutex_tryenter(&hbp->pciib_lock))
496 		return (0);
497 
498 	for (hep = hbp->pciib_head; hep != NULL; hep = hep->pcii_next) {
499 		if (CMI_PCII_MATCH(hep, bus, dev, func, reg, asz)) {
500 			*valp = hep->pcii_val;
501 			break;
502 		}
503 	}
504 
505 	mutex_exit(&hbp->pciib_lock);
506 
507 	return (hep != NULL);
508 }
509 
510 static void
511 pcii_rment(int bus, int dev, int func, int reg, int asz)
512 {
513 	int idx = CMI_PCII_HASHIDX(bus, dev, func, reg);
514 	struct cmi_pcii_bkt *hbp = &pciihash[idx];
515 	struct cmi_pcii_hashent *hep;
516 
517 	mutex_enter(&hbp->pciib_lock);
518 
519 	for (hep = hbp->pciib_head; hep != NULL; hep = hep->pcii_next) {
520 		if (CMI_PCII_MATCH(hep, bus, dev, func, reg, asz)) {
521 			if (hep->pcii_prev != NULL)
522 				hep->pcii_prev->pcii_next = hep->pcii_next;
523 
524 			if (hep->pcii_next != NULL)
525 				hep->pcii_next->pcii_prev = hep->pcii_prev;
526 
527 			if (hbp->pciib_head == hep)
528 				hbp->pciib_head = hep->pcii_next;
529 
530 			kmem_free(hep, sizeof (*hep));
531 			break;
532 		}
533 	}
534 
535 	mutex_exit(&hbp->pciib_lock);
536 }
537 
538 #ifndef __xpv
539 
540 /*
541  *	 =======================================================
542  *	|	Native methods					|
543  *	|	--------------					|
544  *	|							|
545  *	| These are used when we are running native on bare-	|
546  *	| metal, or simply don't know any better.		|
547  *	---------------------------------------------------------
548  */
549 
550 #define	HDLPRIV(hdl)	((cpu_t *)(hdl)->cmih_hdlpriv)
551 
552 static uint_t
553 ntv_vendor(cmi_hdl_impl_t *hdl)
554 {
555 	return (cpuid_getvendor(HDLPRIV(hdl)));
556 }
557 
558 static const char *
559 ntv_vendorstr(cmi_hdl_impl_t *hdl)
560 {
561 	return (cpuid_getvendorstr(HDLPRIV(hdl)));
562 }
563 
564 static uint_t
565 ntv_family(cmi_hdl_impl_t *hdl)
566 {
567 	return (cpuid_getfamily(HDLPRIV(hdl)));
568 }
569 
570 static uint_t
571 ntv_model(cmi_hdl_impl_t *hdl)
572 {
573 	return (cpuid_getmodel(HDLPRIV(hdl)));
574 }
575 
576 static uint_t
577 ntv_stepping(cmi_hdl_impl_t *hdl)
578 {
579 	return (cpuid_getstep(HDLPRIV(hdl)));
580 }
581 
582 static uint_t
583 ntv_chipid(cmi_hdl_impl_t *hdl)
584 {
585 	return (hdl->cmih_chipid);
586 
587 }
588 
589 static uint_t
590 ntv_coreid(cmi_hdl_impl_t *hdl)
591 {
592 	return (hdl->cmih_coreid);
593 }
594 
595 static uint_t
596 ntv_strandid(cmi_hdl_impl_t *hdl)
597 {
598 	return (hdl->cmih_strandid);
599 }
600 
601 static uint32_t
602 ntv_chiprev(cmi_hdl_impl_t *hdl)
603 {
604 	return (cpuid_getchiprev(HDLPRIV(hdl)));
605 }
606 
607 static const char *
608 ntv_chiprevstr(cmi_hdl_impl_t *hdl)
609 {
610 	return (cpuid_getchiprevstr(HDLPRIV(hdl)));
611 }
612 
613 static uint32_t
614 ntv_getsockettype(cmi_hdl_impl_t *hdl)
615 {
616 	return (cpuid_getsockettype(HDLPRIV(hdl)));
617 }
618 
619 static id_t
620 ntv_logical_id(cmi_hdl_impl_t *hdl)
621 {
622 	return (HDLPRIV(hdl)->cpu_id);
623 }
624 
625 /*ARGSUSED*/
626 static int
627 ntv_getcr4_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
628 {
629 	ulong_t *dest = (ulong_t *)arg1;
630 	cmi_errno_t *rcp = (cmi_errno_t *)arg3;
631 
632 	*dest = getcr4();
633 	*rcp = CMI_SUCCESS;
634 
635 	return (0);
636 }
637 
638 static ulong_t
639 ntv_getcr4(cmi_hdl_impl_t *hdl)
640 {
641 	cpu_t *cp = HDLPRIV(hdl);
642 	ulong_t val;
643 
644 	(void) call_func_ntv(cp->cpu_id, ntv_getcr4_xc, (xc_arg_t)&val, NULL);
645 
646 	return (val);
647 }
648 
649 /*ARGSUSED*/
650 static int
651 ntv_setcr4_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
652 {
653 	ulong_t val = (ulong_t)arg1;
654 	cmi_errno_t *rcp = (cmi_errno_t *)arg3;
655 
656 	setcr4(val);
657 	*rcp = CMI_SUCCESS;
658 
659 	return (0);
660 }
661 
662 static void
663 ntv_setcr4(cmi_hdl_impl_t *hdl, ulong_t val)
664 {
665 	cpu_t *cp = HDLPRIV(hdl);
666 
667 	(void) call_func_ntv(cp->cpu_id, ntv_setcr4_xc, (xc_arg_t)val, NULL);
668 }
669 
670 volatile uint32_t cmi_trapped_rdmsr;
671 
672 /*ARGSUSED*/
673 static int
674 ntv_rdmsr_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
675 {
676 	uint_t msr = (uint_t)arg1;
677 	uint64_t *valp = (uint64_t *)arg2;
678 	cmi_errno_t *rcp = (cmi_errno_t *)arg3;
679 
680 	on_trap_data_t otd;
681 
682 	if (on_trap(&otd, OT_DATA_ACCESS) == 0) {
683 		if (checked_rdmsr(msr, valp) == 0)
684 			*rcp = CMI_SUCCESS;
685 		else
686 			*rcp = CMIERR_NOTSUP;
687 	} else {
688 		*rcp = CMIERR_MSRGPF;
689 		atomic_inc_32(&cmi_trapped_rdmsr);
690 	}
691 	no_trap();
692 
693 	return (0);
694 }
695 
696 static cmi_errno_t
697 ntv_rdmsr(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t *valp)
698 {
699 	cpu_t *cp = HDLPRIV(hdl);
700 
701 	if (!(hdl->cmih_msrsrc & CMI_MSR_FLAG_RD_HWOK))
702 		return (CMIERR_INTERPOSE);
703 
704 	return (call_func_ntv(cp->cpu_id, ntv_rdmsr_xc,
705 	    (xc_arg_t)msr, (xc_arg_t)valp));
706 }
707 
708 volatile uint32_t cmi_trapped_wrmsr;
709 
710 /*ARGSUSED*/
711 static int
712 ntv_wrmsr_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
713 {
714 	uint_t msr = (uint_t)arg1;
715 	uint64_t val = *((uint64_t *)arg2);
716 	cmi_errno_t *rcp = (cmi_errno_t *)arg3;
717 	on_trap_data_t otd;
718 
719 	if (on_trap(&otd, OT_DATA_ACCESS) == 0) {
720 		if (checked_wrmsr(msr, val) == 0)
721 			*rcp = CMI_SUCCESS;
722 		else
723 			*rcp = CMIERR_NOTSUP;
724 	} else {
725 		*rcp = CMIERR_MSRGPF;
726 		atomic_inc_32(&cmi_trapped_wrmsr);
727 	}
728 	no_trap();
729 
730 	return (0);
731 
732 }
733 
734 static cmi_errno_t
735 ntv_wrmsr(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t val)
736 {
737 	cpu_t *cp = HDLPRIV(hdl);
738 
739 	if (!(hdl->cmih_msrsrc & CMI_MSR_FLAG_WR_HWOK))
740 		return (CMI_SUCCESS);
741 
742 	return (call_func_ntv(cp->cpu_id, ntv_wrmsr_xc,
743 	    (xc_arg_t)msr, (xc_arg_t)&val));
744 }
745 
746 static cmi_errno_t
747 ntv_msrinterpose(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t val)
748 {
749 	msri_addent(hdl, msr, val);
750 	return (CMI_SUCCESS);
751 }
752 
753 /*ARGSUSED*/
754 static int
755 ntv_int_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
756 {
757 	cmi_errno_t *rcp = (cmi_errno_t *)arg3;
758 	int int_no = (int)arg1;
759 
760 	if (int_no == T_MCE)
761 		int18();
762 	else
763 		int_cmci();
764 	*rcp = CMI_SUCCESS;
765 
766 	return (0);
767 }
768 
769 static void
770 ntv_int(cmi_hdl_impl_t *hdl, int int_no)
771 {
772 	cpu_t *cp = HDLPRIV(hdl);
773 
774 	(void) call_func_ntv(cp->cpu_id, ntv_int_xc, (xc_arg_t)int_no, NULL);
775 }
776 
777 static int
778 ntv_online(cmi_hdl_impl_t *hdl, int new_status, int *old_status)
779 {
780 	processorid_t cpuid = HDLPRIV(hdl)->cpu_id;
781 
782 	return (p_online_internal(cpuid, new_status, old_status));
783 }
784 
785 #else	/* __xpv */
786 
787 /*
788  *	 =======================================================
789  *	|	xVM dom0 methods				|
790  *	|	----------------				|
791  *	|							|
792  *	| These are used when we are running as dom0 in		|
793  *	| a Solaris xVM context.				|
794  *	---------------------------------------------------------
795  */
796 
797 #define	HDLPRIV(hdl)	((xen_mc_lcpu_cookie_t)(hdl)->cmih_hdlpriv)
798 
799 extern uint_t _cpuid_vendorstr_to_vendorcode(char *);
800 
801 
802 static uint_t
803 xpv_vendor(cmi_hdl_impl_t *hdl)
804 {
805 	return (_cpuid_vendorstr_to_vendorcode((char *)xen_physcpu_vendorstr(
806 	    HDLPRIV(hdl))));
807 }
808 
809 static const char *
810 xpv_vendorstr(cmi_hdl_impl_t *hdl)
811 {
812 	return (xen_physcpu_vendorstr(HDLPRIV(hdl)));
813 }
814 
815 static uint_t
816 xpv_family(cmi_hdl_impl_t *hdl)
817 {
818 	return (xen_physcpu_family(HDLPRIV(hdl)));
819 }
820 
821 static uint_t
822 xpv_model(cmi_hdl_impl_t *hdl)
823 {
824 	return (xen_physcpu_model(HDLPRIV(hdl)));
825 }
826 
827 static uint_t
828 xpv_stepping(cmi_hdl_impl_t *hdl)
829 {
830 	return (xen_physcpu_stepping(HDLPRIV(hdl)));
831 }
832 
833 static uint_t
834 xpv_chipid(cmi_hdl_impl_t *hdl)
835 {
836 	return (hdl->cmih_chipid);
837 }
838 
839 static uint_t
840 xpv_coreid(cmi_hdl_impl_t *hdl)
841 {
842 	return (hdl->cmih_coreid);
843 }
844 
845 static uint_t
846 xpv_strandid(cmi_hdl_impl_t *hdl)
847 {
848 	return (hdl->cmih_strandid);
849 }
850 
851 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t);
852 
853 static uint32_t
854 xpv_chiprev(cmi_hdl_impl_t *hdl)
855 {
856 	return (_cpuid_chiprev(xpv_vendor(hdl), xpv_family(hdl),
857 	    xpv_model(hdl), xpv_stepping(hdl)));
858 }
859 
860 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t);
861 
862 static const char *
863 xpv_chiprevstr(cmi_hdl_impl_t *hdl)
864 {
865 	return (_cpuid_chiprevstr(xpv_vendor(hdl), xpv_family(hdl),
866 	    xpv_model(hdl), xpv_stepping(hdl)));
867 }
868 
869 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t);
870 
871 static uint32_t
872 xpv_getsockettype(cmi_hdl_impl_t *hdl)
873 {
874 	return (_cpuid_skt(xpv_vendor(hdl), xpv_family(hdl),
875 	    xpv_model(hdl), xpv_stepping(hdl)));
876 }
877 
878 static id_t
879 xpv_logical_id(cmi_hdl_impl_t *hdl)
880 {
881 	return (xen_physcpu_logical_id(HDLPRIV(hdl)));
882 }
883 
884 static cmi_errno_t
885 xpv_rdmsr(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t *valp)
886 {
887 	switch (msr) {
888 	case IA32_MSR_MCG_CAP:
889 		*valp = xen_physcpu_mcg_cap(HDLPRIV(hdl));
890 		break;
891 
892 	default:
893 		return (CMIERR_NOTSUP);
894 	}
895 
896 	return (CMI_SUCCESS);
897 }
898 
899 /*
900  * Request the hypervisor to write an MSR for us.  The hypervisor
901  * will only accept MCA-related MSRs, as this is for MCA error
902  * simulation purposes alone.  We will pre-screen MSRs for injection
903  * so we don't bother the HV with bogus requests.  We will permit
904  * injection to any MCA bank register, and to MCG_STATUS.
905  */
906 
907 #define	IS_MCA_INJ_MSR(msr) \
908 	(((msr) >= IA32_MSR_MC(0, CTL) && (msr) <= IA32_MSR_MC(10, MISC)) || \
909 	(msr) == IA32_MSR_MCG_STATUS)
910 
911 static cmi_errno_t
912 xpv_wrmsr_cmn(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t val, boolean_t intpose)
913 {
914 	struct xen_mc_msrinject mci;
915 
916 	if (!(hdl->cmih_flags & CMIH_F_INJACTV))
917 		return (CMIERR_NOTSUP);		/* for injection use only! */
918 
919 	if (!IS_MCA_INJ_MSR(msr))
920 		return (CMIERR_API);
921 
922 	if (panicstr)
923 		return (CMIERR_DEADLOCK);
924 
925 	mci.mcinj_cpunr = xen_physcpu_logical_id(HDLPRIV(hdl));
926 	mci.mcinj_flags = intpose ? MC_MSRINJ_F_INTERPOSE : 0;
927 	mci.mcinj_count = 1;	/* learn to batch sometime */
928 	mci.mcinj_msr[0].reg = msr;
929 	mci.mcinj_msr[0].value = val;
930 
931 	return (HYPERVISOR_mca(XEN_MC_CMD_msrinject, (xen_mc_arg_t *)&mci) ==
932 	    XEN_MC_HCALL_SUCCESS ?  CMI_SUCCESS : CMIERR_NOTSUP);
933 }
934 
935 static cmi_errno_t
936 xpv_wrmsr(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t val)
937 {
938 	return (xpv_wrmsr_cmn(hdl, msr, val, B_FALSE));
939 }
940 
941 
942 static cmi_errno_t
943 xpv_msrinterpose(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t val)
944 {
945 	return (xpv_wrmsr_cmn(hdl, msr, val, B_TRUE));
946 }
947 
948 static void
949 xpv_int(cmi_hdl_impl_t *hdl, int int_no)
950 {
951 	struct xen_mc_mceinject mce;
952 
953 	if (!(hdl->cmih_flags & CMIH_F_INJACTV))
954 		return;
955 
956 	if (int_no != T_MCE) {
957 		cmn_err(CE_WARN, "xpv_int: int_no %d unimplemented\n",
958 		    int_no);
959 	}
960 
961 	mce.mceinj_cpunr = xen_physcpu_logical_id(HDLPRIV(hdl));
962 
963 	(void) HYPERVISOR_mca(XEN_MC_CMD_mceinject, (xen_mc_arg_t *)&mce);
964 }
965 
966 #define	CSM_XLATE_SUNOS2XEN	1
967 #define	CSM_XLATE_XEN2SUNOS	2
968 
969 #define	CSM_MAPENT(suffix)	{ P_##suffix, MC_CPU_P_##suffix }
970 
971 static int
972 cpu_status_xlate(int in, int direction, int *outp)
973 {
974 	struct cpu_status_map {
975 		int csm_val[2];
976 	} map[] = {
977 		CSM_MAPENT(STATUS),
978 		CSM_MAPENT(ONLINE),
979 		CSM_MAPENT(OFFLINE),
980 		CSM_MAPENT(FAULTED),
981 		CSM_MAPENT(SPARE),
982 		CSM_MAPENT(POWEROFF)
983 	};
984 
985 	int cmpidx = (direction == CSM_XLATE_XEN2SUNOS);
986 	int i;
987 
988 	for (i = 0; i < sizeof (map) / sizeof (struct cpu_status_map); i++) {
989 		if (map[i].csm_val[cmpidx] == in) {
990 			*outp = map[i].csm_val[!cmpidx];
991 			return (1);
992 		}
993 	}
994 
995 	return (0);
996 }
997 
998 static int
999 xpv_online(cmi_hdl_impl_t *hdl, int new_status, int *old_status)
1000 {
1001 	struct xen_mc_offline mco;
1002 	int flag, rc;
1003 
1004 	new_status &= ~P_FORCED;
1005 
1006 	if (!cpu_status_xlate(new_status, CSM_XLATE_SUNOS2XEN, &flag))
1007 		return (ENOSYS);
1008 
1009 	mco.mco_cpu = xen_physcpu_logical_id(HDLPRIV(hdl));
1010 	mco.mco_flag = flag;
1011 
1012 	if ((rc = HYPERVISOR_mca(XEN_MC_CMD_offlinecpu,
1013 	    (xen_mc_arg_t *)&mco)) == XEN_MC_HCALL_SUCCESS) {
1014 		flag = mco.mco_flag;
1015 		if (!cpu_status_xlate(flag, CSM_XLATE_XEN2SUNOS, old_status))
1016 			cmn_err(CE_NOTE, "xpv_online: unknown status %d.",
1017 			    flag);
1018 	}
1019 
1020 	return (-rc);
1021 }
1022 
1023 #endif
1024 
1025 /*ARGSUSED*/
1026 static void *
1027 cpu_search(enum cmi_hdl_class class, uint_t chipid, uint_t coreid,
1028     uint_t strandid)
1029 {
1030 #ifdef __xpv
1031 	xen_mc_lcpu_cookie_t cpi;
1032 
1033 	for (cpi = xen_physcpu_next(NULL); cpi != NULL;
1034 	    cpi = xen_physcpu_next(cpi)) {
1035 		if (xen_physcpu_chipid(cpi) == chipid &&
1036 		    xen_physcpu_coreid(cpi) == coreid &&
1037 		    xen_physcpu_strandid(cpi) == strandid)
1038 			return ((void *)cpi);
1039 	}
1040 	return (NULL);
1041 
1042 #else	/* __xpv */
1043 
1044 	cpu_t *cp, *startcp;
1045 
1046 	kpreempt_disable();
1047 	cp = startcp = CPU;
1048 	do {
1049 		if (cmi_ntv_hwchipid(cp) == chipid &&
1050 		    cmi_ntv_hwcoreid(cp) == coreid &&
1051 		    cmi_ntv_hwstrandid(cp) == strandid) {
1052 			kpreempt_enable();
1053 			return ((void *)cp);
1054 		}
1055 
1056 		cp = cp->cpu_next;
1057 	} while (cp != startcp);
1058 	kpreempt_enable();
1059 	return (NULL);
1060 #endif	/* __ xpv */
1061 }
1062 
1063 static boolean_t
1064 cpu_is_cmt(void *priv)
1065 {
1066 #ifdef __xpv
1067 	return (xen_physcpu_is_cmt((xen_mc_lcpu_cookie_t)priv));
1068 #else /* __xpv */
1069 	cpu_t *cp = (cpu_t *)priv;
1070 
1071 	int strands_per_core = cpuid_get_ncpu_per_chip(cp) /
1072 	    cpuid_get_ncore_per_chip(cp);
1073 
1074 	return (strands_per_core > 1);
1075 #endif /* __xpv */
1076 }
1077 
1078 cmi_hdl_t
1079 cmi_hdl_create(enum cmi_hdl_class class, uint_t chipid, uint_t coreid,
1080     uint_t strandid)
1081 {
1082 	cmi_hdl_impl_t *hdl;
1083 	void *priv;
1084 	int idx;
1085 
1086 #ifdef __xpv
1087 	ASSERT(class == CMI_HDL_SOLARIS_xVM_MCA);
1088 #else
1089 	ASSERT(class == CMI_HDL_NATIVE);
1090 #endif
1091 
1092 	if (chipid > CMI_MAX_CHIPS - 1 || coreid > CMI_MAX_CORES_PER_CHIP - 1 ||
1093 	    strandid > CMI_MAX_STRANDS_PER_CORE - 1)
1094 		return (NULL);
1095 
1096 	if ((priv = cpu_search(class, chipid, coreid, strandid)) == NULL)
1097 		return (NULL);
1098 
1099 	hdl = kmem_zalloc(sizeof (*hdl), KM_SLEEP);
1100 
1101 	hdl->cmih_class = class;
1102 	HDLOPS(hdl) = &cmi_hdl_ops;
1103 	hdl->cmih_chipid = chipid;
1104 	hdl->cmih_coreid = coreid;
1105 	hdl->cmih_strandid = strandid;
1106 	hdl->cmih_mstrand = cpu_is_cmt(priv);
1107 	hdl->cmih_hdlpriv = priv;
1108 #ifdef __xpv
1109 	hdl->cmih_msrsrc = CMI_MSR_FLAG_RD_INTERPOSEOK |
1110 	    CMI_MSR_FLAG_WR_INTERPOSEOK;
1111 #else	/* __xpv */
1112 	hdl->cmih_msrsrc = CMI_MSR_FLAG_RD_HWOK | CMI_MSR_FLAG_RD_INTERPOSEOK |
1113 	    CMI_MSR_FLAG_WR_HWOK | CMI_MSR_FLAG_WR_INTERPOSEOK;
1114 #endif
1115 
1116 	if (cmi_hdl_arr == NULL) {
1117 		size_t sz = CMI_HDL_ARR_SZ * sizeof (struct cmi_hdl_arr_ent);
1118 		void *arr = kmem_zalloc(sz, KM_SLEEP);
1119 
1120 		if (atomic_cas_ptr(&cmi_hdl_arr, NULL, arr) != NULL)
1121 			kmem_free(arr, sz); /* someone beat us */
1122 	}
1123 
1124 	idx = CMI_HDL_ARR_IDX(chipid, coreid, strandid);
1125 	if (cmi_hdl_arr[idx].cmae_refcnt != 0 ||
1126 	    cmi_hdl_arr[idx].cmae_hdlp != NULL) {
1127 		/*
1128 		 * Somehow this (chipid, coreid, strandid) id tuple has
1129 		 * already been assigned!  This indicates that the
1130 		 * callers logic in determining these values is busted,
1131 		 * or perhaps undermined by bad BIOS setup.  Complain,
1132 		 * and refuse to initialize this tuple again as bad things
1133 		 * will happen.
1134 		 */
1135 		cmn_err(CE_NOTE, "cmi_hdl_create: chipid %d coreid %d "
1136 		    "strandid %d handle already allocated!",
1137 		    chipid, coreid, strandid);
1138 		kmem_free(hdl, sizeof (*hdl));
1139 		return (NULL);
1140 	}
1141 
1142 	/*
1143 	 * Once we store a nonzero reference count others can find this
1144 	 * handle via cmi_hdl_lookup etc.  This initial hold on the handle
1145 	 * is to be dropped only if some other part of cmi initialization
1146 	 * fails or, if it succeeds, at later cpu deconfigure.  Note the
1147 	 * the module private data we hold in cmih_cmi and cmih_cmidata
1148 	 * is still NULL at this point (the caller will fill it with
1149 	 * cmi_hdl_setcmi if it initializes) so consumers of handles
1150 	 * should always be ready for that possibility.
1151 	 */
1152 	cmi_hdl_arr[idx].cmae_hdlp = hdl;
1153 	hdl->cmih_refcntp = &cmi_hdl_arr[idx].cmae_refcnt;
1154 	cmi_hdl_arr[idx].cmae_refcnt = 1;
1155 
1156 	return ((cmi_hdl_t)hdl);
1157 }
1158 
1159 void
1160 cmi_hdl_hold(cmi_hdl_t ophdl)
1161 {
1162 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1163 
1164 	ASSERT(*hdl->cmih_refcntp != 0); /* must not be the initial hold */
1165 
1166 	atomic_inc_32(hdl->cmih_refcntp);
1167 }
1168 
1169 static int
1170 cmi_hdl_canref(int arridx)
1171 {
1172 	volatile uint32_t *refcntp;
1173 	uint32_t refcnt;
1174 
1175 	if (cmi_hdl_arr == NULL)
1176 		return (0);
1177 
1178 	refcntp = &cmi_hdl_arr[arridx].cmae_refcnt;
1179 	refcnt = *refcntp;
1180 
1181 	if (refcnt == 0) {
1182 		/*
1183 		 * Associated object never existed, is being destroyed,
1184 		 * or has been destroyed.
1185 		 */
1186 		return (0);
1187 	}
1188 
1189 	/*
1190 	 * We cannot use atomic increment here because once the reference
1191 	 * count reaches zero it must never be bumped up again.
1192 	 */
1193 	while (refcnt != 0) {
1194 		if (atomic_cas_32(refcntp, refcnt, refcnt + 1) == refcnt)
1195 			return (1);
1196 		refcnt = *refcntp;
1197 	}
1198 
1199 	/*
1200 	 * Somebody dropped the reference count to 0 after our initial
1201 	 * check.
1202 	 */
1203 	return (0);
1204 }
1205 
1206 
1207 void
1208 cmi_hdl_rele(cmi_hdl_t ophdl)
1209 {
1210 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1211 	int idx;
1212 
1213 	ASSERT(*hdl->cmih_refcntp > 0);
1214 
1215 	if (atomic_dec_32_nv(hdl->cmih_refcntp) > 0)
1216 		return;
1217 
1218 	idx = CMI_HDL_ARR_IDX(hdl->cmih_chipid, hdl->cmih_coreid,
1219 	    hdl->cmih_strandid);
1220 	cmi_hdl_arr[idx].cmae_hdlp = NULL;
1221 
1222 	kmem_free(hdl, sizeof (*hdl));
1223 }
1224 
1225 void
1226 cmi_hdl_setspecific(cmi_hdl_t ophdl, void *arg)
1227 {
1228 	IMPLHDL(ophdl)->cmih_spec = arg;
1229 }
1230 
1231 void *
1232 cmi_hdl_getspecific(cmi_hdl_t ophdl)
1233 {
1234 	return (IMPLHDL(ophdl)->cmih_spec);
1235 }
1236 
1237 void
1238 cmi_hdl_setmc(cmi_hdl_t ophdl, const struct cmi_mc_ops *mcops, void *mcdata)
1239 {
1240 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1241 
1242 	ASSERT(hdl->cmih_mcops == NULL && hdl->cmih_mcdata == NULL);
1243 	hdl->cmih_mcops = mcops;
1244 	hdl->cmih_mcdata = mcdata;
1245 }
1246 
1247 const struct cmi_mc_ops *
1248 cmi_hdl_getmcops(cmi_hdl_t ophdl)
1249 {
1250 	return (IMPLHDL(ophdl)->cmih_mcops);
1251 }
1252 
1253 void *
1254 cmi_hdl_getmcdata(cmi_hdl_t ophdl)
1255 {
1256 	return (IMPLHDL(ophdl)->cmih_mcdata);
1257 }
1258 
1259 cmi_hdl_t
1260 cmi_hdl_lookup(enum cmi_hdl_class class, uint_t chipid, uint_t coreid,
1261     uint_t strandid)
1262 {
1263 	int idx;
1264 
1265 	if (chipid > CMI_MAX_CHIPS - 1 || coreid > CMI_MAX_CORES_PER_CHIP - 1 ||
1266 	    strandid > CMI_MAX_STRANDS_PER_CORE - 1)
1267 		return (NULL);
1268 
1269 	idx = CMI_HDL_ARR_IDX(chipid, coreid, strandid);
1270 
1271 	if (class == CMI_HDL_NEUTRAL)
1272 #ifdef __xpv
1273 		class = CMI_HDL_SOLARIS_xVM_MCA;
1274 #else
1275 		class = CMI_HDL_NATIVE;
1276 #endif
1277 
1278 	if (!cmi_hdl_canref(idx))
1279 		return (NULL);
1280 
1281 	if (cmi_hdl_arr[idx].cmae_hdlp->cmih_class != class) {
1282 		cmi_hdl_rele((cmi_hdl_t)cmi_hdl_arr[idx].cmae_hdlp);
1283 		return (NULL);
1284 	}
1285 
1286 	return ((cmi_hdl_t)cmi_hdl_arr[idx].cmae_hdlp);
1287 }
1288 
1289 cmi_hdl_t
1290 cmi_hdl_any(void)
1291 {
1292 	int i;
1293 
1294 	for (i = 0; i < CMI_HDL_ARR_SZ; i++) {
1295 		if (cmi_hdl_canref(i))
1296 			return ((cmi_hdl_t)cmi_hdl_arr[i].cmae_hdlp);
1297 	}
1298 
1299 	return (NULL);
1300 }
1301 
1302 void
1303 cmi_hdl_walk(int (*cbfunc)(cmi_hdl_t, void *, void *, void *),
1304     void *arg1, void *arg2, void *arg3)
1305 {
1306 	int i;
1307 
1308 	for (i = 0; i < CMI_HDL_ARR_SZ; i++) {
1309 		if (cmi_hdl_canref(i)) {
1310 			cmi_hdl_impl_t *hdl = cmi_hdl_arr[i].cmae_hdlp;
1311 
1312 			if ((*cbfunc)((cmi_hdl_t)hdl, arg1, arg2, arg3) ==
1313 			    CMI_HDL_WALK_DONE) {
1314 				cmi_hdl_rele((cmi_hdl_t)hdl);
1315 				break;
1316 			}
1317 			cmi_hdl_rele((cmi_hdl_t)hdl);
1318 		}
1319 	}
1320 }
1321 
1322 void
1323 cmi_hdl_setcmi(cmi_hdl_t ophdl, void *cmi, void *cmidata)
1324 {
1325 	IMPLHDL(ophdl)->cmih_cmidata = cmidata;
1326 	IMPLHDL(ophdl)->cmih_cmi = cmi;
1327 }
1328 
1329 void *
1330 cmi_hdl_getcmi(cmi_hdl_t ophdl)
1331 {
1332 	return (IMPLHDL(ophdl)->cmih_cmi);
1333 }
1334 
1335 void *
1336 cmi_hdl_getcmidata(cmi_hdl_t ophdl)
1337 {
1338 	return (IMPLHDL(ophdl)->cmih_cmidata);
1339 }
1340 
1341 enum cmi_hdl_class
1342 cmi_hdl_class(cmi_hdl_t ophdl)
1343 {
1344 	return (IMPLHDL(ophdl)->cmih_class);
1345 }
1346 
1347 #define	CMI_HDL_OPFUNC(what, type)				\
1348 	type							\
1349 	cmi_hdl_##what(cmi_hdl_t ophdl)				\
1350 	{							\
1351 		return (HDLOPS(IMPLHDL(ophdl))->		\
1352 		    cmio_##what(IMPLHDL(ophdl)));		\
1353 	}
1354 
1355 CMI_HDL_OPFUNC(vendor, uint_t)
1356 CMI_HDL_OPFUNC(vendorstr, const char *)
1357 CMI_HDL_OPFUNC(family, uint_t)
1358 CMI_HDL_OPFUNC(model, uint_t)
1359 CMI_HDL_OPFUNC(stepping, uint_t)
1360 CMI_HDL_OPFUNC(chipid, uint_t)
1361 CMI_HDL_OPFUNC(coreid, uint_t)
1362 CMI_HDL_OPFUNC(strandid, uint_t)
1363 CMI_HDL_OPFUNC(chiprev, uint32_t)
1364 CMI_HDL_OPFUNC(chiprevstr, const char *)
1365 CMI_HDL_OPFUNC(getsockettype, uint32_t)
1366 CMI_HDL_OPFUNC(logical_id, id_t)
1367 
1368 boolean_t
1369 cmi_hdl_is_cmt(cmi_hdl_t ophdl)
1370 {
1371 	return (IMPLHDL(ophdl)->cmih_mstrand);
1372 }
1373 
1374 void
1375 cmi_hdl_int(cmi_hdl_t ophdl, int num)
1376 {
1377 	if (HDLOPS(IMPLHDL(ophdl))->cmio_int == NULL)
1378 		return;
1379 
1380 	cmi_hdl_inj_begin(ophdl);
1381 	HDLOPS(IMPLHDL(ophdl))->cmio_int(IMPLHDL(ophdl), num);
1382 	cmi_hdl_inj_end(NULL);
1383 }
1384 
1385 int
1386 cmi_hdl_online(cmi_hdl_t ophdl, int new_status, int *old_status)
1387 {
1388 	return (HDLOPS(IMPLHDL(ophdl))->cmio_online(IMPLHDL(ophdl),
1389 	    new_status, old_status));
1390 }
1391 
1392 #ifndef	__xpv
1393 /*
1394  * Return hardware chip instance; cpuid_get_chipid provides this directly.
1395  */
1396 uint_t
1397 cmi_ntv_hwchipid(cpu_t *cp)
1398 {
1399 	return (cpuid_get_chipid(cp));
1400 }
1401 
1402 /*
1403  * Return core instance within a single chip.
1404  */
1405 uint_t
1406 cmi_ntv_hwcoreid(cpu_t *cp)
1407 {
1408 	return (cpuid_get_pkgcoreid(cp));
1409 }
1410 
1411 /*
1412  * Return strand number within a single core.  cpuid_get_clogid numbers
1413  * all execution units (strands, or cores in unstranded models) sequentially
1414  * within a single chip.
1415  */
1416 uint_t
1417 cmi_ntv_hwstrandid(cpu_t *cp)
1418 {
1419 	int strands_per_core = cpuid_get_ncpu_per_chip(cp) /
1420 	    cpuid_get_ncore_per_chip(cp);
1421 
1422 	return (cpuid_get_clogid(cp) % strands_per_core);
1423 }
1424 #endif	/* __xpv */
1425 
1426 void
1427 cmi_hdlconf_rdmsr_nohw(cmi_hdl_t ophdl)
1428 {
1429 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1430 
1431 	hdl->cmih_msrsrc &= ~CMI_MSR_FLAG_RD_HWOK;
1432 }
1433 
1434 void
1435 cmi_hdlconf_wrmsr_nohw(cmi_hdl_t ophdl)
1436 {
1437 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1438 
1439 	hdl->cmih_msrsrc &= ~CMI_MSR_FLAG_WR_HWOK;
1440 }
1441 
1442 cmi_errno_t
1443 cmi_hdl_rdmsr(cmi_hdl_t ophdl, uint_t msr, uint64_t *valp)
1444 {
1445 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1446 
1447 	/*
1448 	 * Regardless of the handle class, we first check for am
1449 	 * interposed value.  In the xVM case you probably want to
1450 	 * place interposed values within the hypervisor itself, but
1451 	 * we still allow interposing them in dom0 for test and bringup
1452 	 * purposes.
1453 	 */
1454 	if ((hdl->cmih_msrsrc & CMI_MSR_FLAG_RD_INTERPOSEOK) &&
1455 	    msri_lookup(hdl, msr, valp))
1456 		return (CMI_SUCCESS);
1457 
1458 	if (HDLOPS(hdl)->cmio_rdmsr == NULL)
1459 		return (CMIERR_NOTSUP);
1460 
1461 	return (HDLOPS(hdl)->cmio_rdmsr(hdl, msr, valp));
1462 }
1463 
1464 cmi_errno_t
1465 cmi_hdl_wrmsr(cmi_hdl_t ophdl, uint_t msr, uint64_t val)
1466 {
1467 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1468 
1469 	/* Invalidate any interposed value */
1470 	msri_rment(hdl, msr);
1471 
1472 	if (HDLOPS(hdl)->cmio_wrmsr == NULL)
1473 		return (CMI_SUCCESS);	/* pretend all is ok */
1474 
1475 	return (HDLOPS(hdl)->cmio_wrmsr(hdl, msr, val));
1476 }
1477 
1478 void
1479 cmi_hdl_enable_mce(cmi_hdl_t ophdl)
1480 {
1481 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1482 	ulong_t cr4;
1483 
1484 	if (HDLOPS(hdl)->cmio_getcr4 == NULL ||
1485 	    HDLOPS(hdl)->cmio_setcr4 == NULL)
1486 		return;
1487 
1488 	cr4 = HDLOPS(hdl)->cmio_getcr4(hdl);
1489 
1490 	HDLOPS(hdl)->cmio_setcr4(hdl, cr4 | CR4_MCE);
1491 }
1492 
1493 void
1494 cmi_hdl_msrinterpose(cmi_hdl_t ophdl, cmi_mca_regs_t *regs, uint_t nregs)
1495 {
1496 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1497 	int i;
1498 
1499 	if (HDLOPS(hdl)->cmio_msrinterpose == NULL)
1500 		return;
1501 
1502 	cmi_hdl_inj_begin(ophdl);
1503 
1504 	for (i = 0; i < nregs; i++, regs++)
1505 		HDLOPS(hdl)->cmio_msrinterpose(hdl, regs->cmr_msrnum,
1506 		    regs->cmr_msrval);
1507 
1508 	cmi_hdl_inj_end(ophdl);
1509 }
1510 
1511 /*ARGSUSED*/
1512 void
1513 cmi_hdl_msrforward(cmi_hdl_t ophdl, cmi_mca_regs_t *regs, uint_t nregs)
1514 {
1515 #ifdef __xpv
1516 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1517 	int i;
1518 
1519 	for (i = 0; i < nregs; i++, regs++)
1520 		msri_addent(hdl, regs->cmr_msrnum, regs->cmr_msrval);
1521 #endif
1522 }
1523 
1524 
1525 void
1526 cmi_pcird_nohw(void)
1527 {
1528 	cmi_pcicfg_flags &= ~CMI_PCICFG_FLAG_RD_HWOK;
1529 }
1530 
1531 void
1532 cmi_pciwr_nohw(void)
1533 {
1534 	cmi_pcicfg_flags &= ~CMI_PCICFG_FLAG_WR_HWOK;
1535 }
1536 
1537 static uint32_t
1538 cmi_pci_get_cmn(int bus, int dev, int func, int reg, int asz,
1539     int *interpose, ddi_acc_handle_t hdl)
1540 {
1541 	uint32_t val;
1542 
1543 	if (cmi_pcicfg_flags & CMI_PCICFG_FLAG_RD_INTERPOSEOK &&
1544 	    pcii_lookup(bus, dev, func, reg, asz, &val)) {
1545 		if (interpose)
1546 			*interpose = 1;
1547 		return (val);
1548 	}
1549 	if (interpose)
1550 		*interpose = 0;
1551 
1552 	if (!(cmi_pcicfg_flags & CMI_PCICFG_FLAG_RD_HWOK))
1553 		return (0);
1554 
1555 	switch (asz) {
1556 	case 1:
1557 		if (hdl)
1558 			val = pci_config_get8(hdl, (off_t)reg);
1559 		else
1560 			val = (*pci_getb_func)(bus, dev, func, reg);
1561 		break;
1562 	case 2:
1563 		if (hdl)
1564 			val = pci_config_get16(hdl, (off_t)reg);
1565 		else
1566 			val = (*pci_getw_func)(bus, dev, func, reg);
1567 		break;
1568 	case 4:
1569 		if (hdl)
1570 			val = pci_config_get32(hdl, (off_t)reg);
1571 		else
1572 			val = (*pci_getl_func)(bus, dev, func, reg);
1573 		break;
1574 	default:
1575 		val = 0;
1576 	}
1577 	return (val);
1578 }
1579 
1580 uint8_t
1581 cmi_pci_getb(int bus, int dev, int func, int reg, int *interpose,
1582     ddi_acc_handle_t hdl)
1583 {
1584 	return ((uint8_t)cmi_pci_get_cmn(bus, dev, func, reg, 1, interpose,
1585 	    hdl));
1586 }
1587 
1588 uint16_t
1589 cmi_pci_getw(int bus, int dev, int func, int reg, int *interpose,
1590     ddi_acc_handle_t hdl)
1591 {
1592 	return ((uint16_t)cmi_pci_get_cmn(bus, dev, func, reg, 2, interpose,
1593 	    hdl));
1594 }
1595 
1596 uint32_t
1597 cmi_pci_getl(int bus, int dev, int func, int reg, int *interpose,
1598     ddi_acc_handle_t hdl)
1599 {
1600 	return (cmi_pci_get_cmn(bus, dev, func, reg, 4, interpose, hdl));
1601 }
1602 
1603 void
1604 cmi_pci_interposeb(int bus, int dev, int func, int reg, uint8_t val)
1605 {
1606 	pcii_addent(bus, dev, func, reg, val, 1);
1607 }
1608 
1609 void
1610 cmi_pci_interposew(int bus, int dev, int func, int reg, uint16_t val)
1611 {
1612 	pcii_addent(bus, dev, func, reg, val, 2);
1613 }
1614 
1615 void
1616 cmi_pci_interposel(int bus, int dev, int func, int reg, uint32_t val)
1617 {
1618 	pcii_addent(bus, dev, func, reg, val, 4);
1619 }
1620 
1621 static void
1622 cmi_pci_put_cmn(int bus, int dev, int func, int reg, int asz,
1623     ddi_acc_handle_t hdl, uint32_t val)
1624 {
1625 	/*
1626 	 * If there is an interposed value for this register invalidate it.
1627 	 */
1628 	pcii_rment(bus, dev, func, reg, asz);
1629 
1630 	if (!(cmi_pcicfg_flags & CMI_PCICFG_FLAG_WR_HWOK))
1631 		return;
1632 
1633 	switch (asz) {
1634 	case 1:
1635 		if (hdl)
1636 			pci_config_put8(hdl, (off_t)reg, (uint8_t)val);
1637 		else
1638 			(*pci_putb_func)(bus, dev, func, reg, (uint8_t)val);
1639 		break;
1640 
1641 	case 2:
1642 		if (hdl)
1643 			pci_config_put16(hdl, (off_t)reg, (uint16_t)val);
1644 		else
1645 			(*pci_putw_func)(bus, dev, func, reg, (uint16_t)val);
1646 		break;
1647 
1648 	case 4:
1649 		if (hdl)
1650 			pci_config_put32(hdl, (off_t)reg, val);
1651 		else
1652 			(*pci_putl_func)(bus, dev, func, reg, val);
1653 		break;
1654 
1655 	default:
1656 		break;
1657 	}
1658 }
1659 
1660 void
1661 cmi_pci_putb(int bus, int dev, int func, int reg, ddi_acc_handle_t hdl,
1662     uint8_t val)
1663 {
1664 	cmi_pci_put_cmn(bus, dev, func, reg, 1, hdl, val);
1665 }
1666 
1667 void
1668 cmi_pci_putw(int bus, int dev, int func, int reg, ddi_acc_handle_t hdl,
1669     uint16_t val)
1670 {
1671 	cmi_pci_put_cmn(bus, dev, func, reg, 2, hdl, val);
1672 }
1673 
1674 void
1675 cmi_pci_putl(int bus, int dev, int func, int reg, ddi_acc_handle_t hdl,
1676     uint32_t val)
1677 {
1678 	cmi_pci_put_cmn(bus, dev, func, reg, 4, hdl, val);
1679 }
1680 
1681 static const struct cmi_hdl_ops cmi_hdl_ops = {
1682 #ifdef __xpv
1683 	/*
1684 	 * CMI_HDL_SOLARIS_xVM_MCA - ops when we are an xVM dom0
1685 	 */
1686 	xpv_vendor,		/* cmio_vendor */
1687 	xpv_vendorstr,		/* cmio_vendorstr */
1688 	xpv_family,		/* cmio_family */
1689 	xpv_model,		/* cmio_model */
1690 	xpv_stepping,		/* cmio_stepping */
1691 	xpv_chipid,		/* cmio_chipid */
1692 	xpv_coreid,		/* cmio_coreid */
1693 	xpv_strandid,		/* cmio_strandid */
1694 	xpv_chiprev,		/* cmio_chiprev */
1695 	xpv_chiprevstr,		/* cmio_chiprevstr */
1696 	xpv_getsockettype,	/* cmio_getsockettype */
1697 	xpv_logical_id,		/* cmio_logical_id */
1698 	NULL,			/* cmio_getcr4 */
1699 	NULL,			/* cmio_setcr4 */
1700 	xpv_rdmsr,		/* cmio_rdmsr */
1701 	xpv_wrmsr,		/* cmio_wrmsr */
1702 	xpv_msrinterpose,	/* cmio_msrinterpose */
1703 	xpv_int,		/* cmio_int */
1704 	xpv_online		/* cmio_online */
1705 
1706 #else	/* __xpv */
1707 
1708 	/*
1709 	 * CMI_HDL_NATIVE - ops when apparently running on bare-metal
1710 	 */
1711 	ntv_vendor,		/* cmio_vendor */
1712 	ntv_vendorstr,		/* cmio_vendorstr */
1713 	ntv_family,		/* cmio_family */
1714 	ntv_model,		/* cmio_model */
1715 	ntv_stepping,		/* cmio_stepping */
1716 	ntv_chipid,		/* cmio_chipid */
1717 	ntv_coreid,		/* cmio_coreid */
1718 	ntv_strandid,		/* cmio_strandid */
1719 	ntv_chiprev,		/* cmio_chiprev */
1720 	ntv_chiprevstr,		/* cmio_chiprevstr */
1721 	ntv_getsockettype,	/* cmio_getsockettype */
1722 	ntv_logical_id,		/* cmio_logical_id */
1723 	ntv_getcr4,		/* cmio_getcr4 */
1724 	ntv_setcr4,		/* cmio_setcr4 */
1725 	ntv_rdmsr,		/* cmio_rdmsr */
1726 	ntv_wrmsr,		/* cmio_wrmsr */
1727 	ntv_msrinterpose,	/* cmio_msrinterpose */
1728 	ntv_int,		/* cmio_int */
1729 	ntv_online		/* cmio_online */
1730 #endif
1731 };
1732