xref: /titanic_41/usr/src/uts/i86pc/os/cmi_hw.c (revision de6f998e6d02b8c2b9fd7169f88e293848d5760f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * CPU Module Interface - hardware abstraction.
29  */
30 
31 #include <sys/types.h>
32 #include <sys/cpu_module.h>
33 #include <sys/kmem.h>
34 #include <sys/x86_archext.h>
35 #include <sys/cpuvar.h>
36 #include <sys/ksynch.h>
37 #include <sys/x_call.h>
38 #include <sys/pghw.h>
39 #include <sys/pci_cfgspace.h>
40 #include <sys/archsystm.h>
41 #include <sys/ontrap.h>
42 #include <sys/controlregs.h>
43 #include <sys/sunddi.h>
44 #include <sys/trap.h>
45 #include <sys/mca_x86.h>
46 #include <sys/processor.h>
47 
48 #ifdef __xpv
49 #include <sys/hypervisor.h>
50 #endif
51 
52 /*
53  * Outside of this file consumers use the opaque cmi_hdl_t.  This
54  * definition is duplicated in the generic_cpu mdb module, so keep
55  * them in-sync when making changes.
56  */
57 typedef struct cmi_hdl_impl {
58 	enum cmi_hdl_class cmih_class;		/* Handle nature */
59 	const struct cmi_hdl_ops *cmih_ops;	/* Operations vector */
60 	uint_t cmih_chipid;			/* Chipid of cpu resource */
61 	uint_t cmih_coreid;			/* Core within die */
62 	uint_t cmih_strandid;			/* Thread within core */
63 	boolean_t cmih_mstrand;			/* cores are multithreaded */
64 	volatile uint32_t *cmih_refcntp;	/* Reference count pointer */
65 	uint64_t cmih_msrsrc;			/* MSR data source flags */
66 	void *cmih_hdlpriv;			/* cmi_hw.c private data */
67 	void *cmih_spec;			/* cmi_hdl_{set,get}_specific */
68 	void *cmih_cmi;				/* cpu mod control structure */
69 	void *cmih_cmidata;			/* cpu mod private data */
70 	const struct cmi_mc_ops *cmih_mcops;	/* Memory-controller ops */
71 	void *cmih_mcdata;			/* Memory-controller data */
72 	uint64_t cmih_flags;			/* See CMIH_F_* below */
73 } cmi_hdl_impl_t;
74 
75 #define	IMPLHDL(ophdl)	((cmi_hdl_impl_t *)ophdl)
76 #define	HDLOPS(hdl)	((hdl)->cmih_ops)
77 
78 #define	CMIH_F_INJACTV		0x1ULL
79 
80 /*
81  * Ops structure for handle operations.
82  */
83 struct cmi_hdl_ops {
84 	/*
85 	 * These ops are required in an implementation.
86 	 */
87 	uint_t (*cmio_vendor)(cmi_hdl_impl_t *);
88 	const char *(*cmio_vendorstr)(cmi_hdl_impl_t *);
89 	uint_t (*cmio_family)(cmi_hdl_impl_t *);
90 	uint_t (*cmio_model)(cmi_hdl_impl_t *);
91 	uint_t (*cmio_stepping)(cmi_hdl_impl_t *);
92 	uint_t (*cmio_chipid)(cmi_hdl_impl_t *);
93 	uint_t (*cmio_coreid)(cmi_hdl_impl_t *);
94 	uint_t (*cmio_strandid)(cmi_hdl_impl_t *);
95 	uint32_t (*cmio_chiprev)(cmi_hdl_impl_t *);
96 	const char *(*cmio_chiprevstr)(cmi_hdl_impl_t *);
97 	uint32_t (*cmio_getsockettype)(cmi_hdl_impl_t *);
98 	id_t (*cmio_logical_id)(cmi_hdl_impl_t *);
99 	/*
100 	 * These ops are optional in an implementation.
101 	 */
102 	ulong_t (*cmio_getcr4)(cmi_hdl_impl_t *);
103 	void (*cmio_setcr4)(cmi_hdl_impl_t *, ulong_t);
104 	cmi_errno_t (*cmio_rdmsr)(cmi_hdl_impl_t *, uint_t, uint64_t *);
105 	cmi_errno_t (*cmio_wrmsr)(cmi_hdl_impl_t *, uint_t, uint64_t);
106 	cmi_errno_t (*cmio_msrinterpose)(cmi_hdl_impl_t *, uint_t, uint64_t);
107 	void (*cmio_int)(cmi_hdl_impl_t *, int);
108 	int (*cmio_online)(cmi_hdl_impl_t *, int, int *);
109 };
110 
111 static const struct cmi_hdl_ops cmi_hdl_ops;
112 
113 /*
114  * Handles are looked up from contexts such as polling, injection etc
115  * where the context is reasonably well defined (although a poller could
116  * interrupt any old thread holding any old lock).  They are also looked
117  * up by machine check handlers, which may strike at inconvenient times
118  * such as during handle initialization or destruction or during handle
119  * lookup (which the #MC handler itself will also have to perform).
120  *
121  * So keeping handles in a linked list makes locking difficult when we
122  * consider #MC handlers.  Our solution is to have a look-up table indexed
123  * by that which uniquely identifies a handle - chip/core/strand id -
124  * with each entry a structure including a pointer to a handle
125  * structure for the resource, and a reference count for the handle.
126  * Reference counts are modified atomically.  The public cmi_hdl_hold
127  * always succeeds because this can only be used after handle creation
128  * and before the call to destruct, so the hold count is already at least one.
129  * In other functions that lookup a handle (cmi_hdl_lookup, cmi_hdl_any)
130  * we must be certain that the count has not already decrmented to zero
131  * before applying our hold.
132  *
133  * The table is an array of maximum number of chips defined in
134  * CMI_CHIPID_ARR_SZ indexed by the chip id. If the chip is not present, the
135  * entry is NULL. Each entry is a pointer to another array which contains a
136  * list of all strands of the chip. This first level table is allocated when
137  * first we want to populate an entry. The size of the latter (per chip) table
138  * is CMI_MAX_STRANDS_PER_CHIP and it is populated when one of its cpus starts.
139  *
140  * Ideally we should only allocate to the actual number of chips, cores per
141  * chip and strand per core. The number of chips is not available until all
142  * of them are passed. The number of cores and strands are partially available.
143  * For now we stick with the above approach.
144  */
145 #define	CMI_MAX_CHIPID_NBITS		6	/* max chipid of 63 */
146 #define	CMI_MAX_CORES_PER_CHIP_NBITS	4	/* 16 cores per chip max */
147 #define	CMI_MAX_STRANDS_PER_CORE_NBITS	3	/* 8 strands per core max */
148 
149 #define	CMI_MAX_CHIPID			((1 << (CMI_MAX_CHIPID_NBITS)) - 1)
150 #define	CMI_MAX_CORES_PER_CHIP		(1 << CMI_MAX_CORES_PER_CHIP_NBITS)
151 #define	CMI_MAX_STRANDS_PER_CORE	(1 << CMI_MAX_STRANDS_PER_CORE_NBITS)
152 #define	CMI_MAX_STRANDS_PER_CHIP	(CMI_MAX_CORES_PER_CHIP * \
153 					    CMI_MAX_STRANDS_PER_CORE)
154 
155 /*
156  * Handle array indexing within a per-chip table
157  *	[6:3] = Core in package,
158  *	[2:0] = Strand in core,
159  */
160 #define	CMI_HDL_ARR_IDX_CORE(coreid) \
161 	(((coreid) & (CMI_MAX_CORES_PER_CHIP - 1)) << \
162 	CMI_MAX_STRANDS_PER_CORE_NBITS)
163 
164 #define	CMI_HDL_ARR_IDX_STRAND(strandid) \
165 	(((strandid) & (CMI_MAX_STRANDS_PER_CORE - 1)))
166 
167 #define	CMI_HDL_ARR_IDX(coreid, strandid) \
168 	(CMI_HDL_ARR_IDX_CORE(coreid) | CMI_HDL_ARR_IDX_STRAND(strandid))
169 
170 #define	CMI_CHIPID_ARR_SZ		(1 << CMI_MAX_CHIPID_NBITS)
171 
172 typedef struct cmi_hdl_ent {
173 	volatile uint32_t cmae_refcnt;
174 	cmi_hdl_impl_t *cmae_hdlp;
175 } cmi_hdl_ent_t;
176 
177 static cmi_hdl_ent_t *cmi_chip_tab[CMI_CHIPID_ARR_SZ];
178 
179 /*
180  * Controls where we will source PCI config space data.
181  */
182 #define	CMI_PCICFG_FLAG_RD_HWOK		0x0001
183 #define	CMI_PCICFG_FLAG_RD_INTERPOSEOK	0X0002
184 #define	CMI_PCICFG_FLAG_WR_HWOK		0x0004
185 #define	CMI_PCICFG_FLAG_WR_INTERPOSEOK	0X0008
186 
187 static uint64_t cmi_pcicfg_flags =
188     CMI_PCICFG_FLAG_RD_HWOK | CMI_PCICFG_FLAG_RD_INTERPOSEOK |
189     CMI_PCICFG_FLAG_WR_HWOK | CMI_PCICFG_FLAG_WR_INTERPOSEOK;
190 
191 /*
192  * The flags for individual cpus are kept in their per-cpu handle cmih_msrsrc
193  */
194 #define	CMI_MSR_FLAG_RD_HWOK		0x0001
195 #define	CMI_MSR_FLAG_RD_INTERPOSEOK	0x0002
196 #define	CMI_MSR_FLAG_WR_HWOK		0x0004
197 #define	CMI_MSR_FLAG_WR_INTERPOSEOK	0x0008
198 
199 int cmi_call_func_ntv_tries = 3;
200 
201 static cmi_errno_t
202 call_func_ntv(int cpuid, xc_func_t func, xc_arg_t arg1, xc_arg_t arg2)
203 {
204 	cmi_errno_t rc = -1;
205 	int i;
206 
207 	kpreempt_disable();
208 
209 	if (CPU->cpu_id == cpuid) {
210 		(*func)(arg1, arg2, (xc_arg_t)&rc);
211 	} else {
212 		/*
213 		 * This should not happen for a #MC trap or a poll, so
214 		 * this is likely an error injection or similar.
215 		 * We will try to cross call with xc_trycall - we
216 		 * can't guarantee success with xc_call because
217 		 * the interrupt code in the case of a #MC may
218 		 * already hold the xc mutex.
219 		 */
220 		for (i = 0; i < cmi_call_func_ntv_tries; i++) {
221 			cpuset_t cpus;
222 
223 			CPUSET_ONLY(cpus, cpuid);
224 			xc_trycall(arg1, arg2, (xc_arg_t)&rc, cpus, func);
225 			if (rc != -1)
226 				break;
227 
228 			DELAY(1);
229 		}
230 	}
231 
232 	kpreempt_enable();
233 
234 	return (rc != -1 ? rc : CMIERR_DEADLOCK);
235 }
236 
237 static uint64_t injcnt;
238 
239 void
240 cmi_hdl_inj_begin(cmi_hdl_t ophdl)
241 {
242 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
243 
244 	if (hdl != NULL)
245 		hdl->cmih_flags |= CMIH_F_INJACTV;
246 	if (injcnt++ == 0) {
247 		cmn_err(CE_NOTE, "Hardware error injection/simulation "
248 		    "activity noted");
249 	}
250 }
251 
252 void
253 cmi_hdl_inj_end(cmi_hdl_t ophdl)
254 {
255 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
256 
257 	ASSERT(hdl == NULL || hdl->cmih_flags & CMIH_F_INJACTV);
258 	if (hdl != NULL)
259 		hdl->cmih_flags &= ~CMIH_F_INJACTV;
260 }
261 
262 boolean_t
263 cmi_inj_tainted(void)
264 {
265 	return (injcnt != 0 ? B_TRUE : B_FALSE);
266 }
267 
268 /*
269  *	 =======================================================
270  *	|	MSR Interposition				|
271  *	|	-----------------				|
272  *	|							|
273  *	 -------------------------------------------------------
274  */
275 
276 #define	CMI_MSRI_HASHSZ		16
277 #define	CMI_MSRI_HASHIDX(hdl, msr) \
278 	(((uintptr_t)(hdl) >> 3 + (msr)) % (CMI_MSRI_HASHSZ - 1))
279 
280 struct cmi_msri_bkt {
281 	kmutex_t msrib_lock;
282 	struct cmi_msri_hashent *msrib_head;
283 };
284 
285 struct cmi_msri_hashent {
286 	struct cmi_msri_hashent *msrie_next;
287 	struct cmi_msri_hashent *msrie_prev;
288 	cmi_hdl_impl_t *msrie_hdl;
289 	uint_t msrie_msrnum;
290 	uint64_t msrie_msrval;
291 };
292 
293 #define	CMI_MSRI_MATCH(ent, hdl, req_msr) \
294 	((ent)->msrie_hdl == (hdl) && (ent)->msrie_msrnum == (req_msr))
295 
296 static struct cmi_msri_bkt msrihash[CMI_MSRI_HASHSZ];
297 
298 static void
299 msri_addent(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t val)
300 {
301 	int idx = CMI_MSRI_HASHIDX(hdl, msr);
302 	struct cmi_msri_bkt *hbp = &msrihash[idx];
303 	struct cmi_msri_hashent *hep;
304 
305 	mutex_enter(&hbp->msrib_lock);
306 
307 	for (hep = hbp->msrib_head; hep != NULL; hep = hep->msrie_next) {
308 		if (CMI_MSRI_MATCH(hep, hdl, msr))
309 			break;
310 	}
311 
312 	if (hep != NULL) {
313 		hep->msrie_msrval = val;
314 	} else {
315 		hep = kmem_alloc(sizeof (*hep), KM_SLEEP);
316 		hep->msrie_hdl = hdl;
317 		hep->msrie_msrnum = msr;
318 		hep->msrie_msrval = val;
319 
320 		if (hbp->msrib_head != NULL)
321 			hbp->msrib_head->msrie_prev = hep;
322 		hep->msrie_next = hbp->msrib_head;
323 		hep->msrie_prev = NULL;
324 		hbp->msrib_head = hep;
325 	}
326 
327 	mutex_exit(&hbp->msrib_lock);
328 }
329 
330 /*
331  * Look for a match for the given hanlde and msr.  Return 1 with valp
332  * filled if a match is found, otherwise return 0 with valp untouched.
333  */
334 static int
335 msri_lookup(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t *valp)
336 {
337 	int idx = CMI_MSRI_HASHIDX(hdl, msr);
338 	struct cmi_msri_bkt *hbp = &msrihash[idx];
339 	struct cmi_msri_hashent *hep;
340 
341 	/*
342 	 * This function is called during #MC trap handling, so we should
343 	 * consider the possibility that the hash mutex is held by the
344 	 * interrupted thread.  This should not happen because interposition
345 	 * is an artificial injection mechanism and the #MC is requested
346 	 * after adding entries, but just in case of a real #MC at an
347 	 * unlucky moment we'll use mutex_tryenter here.
348 	 */
349 	if (!mutex_tryenter(&hbp->msrib_lock))
350 		return (0);
351 
352 	for (hep = hbp->msrib_head; hep != NULL; hep = hep->msrie_next) {
353 		if (CMI_MSRI_MATCH(hep, hdl, msr)) {
354 			*valp = hep->msrie_msrval;
355 			break;
356 		}
357 	}
358 
359 	mutex_exit(&hbp->msrib_lock);
360 
361 	return (hep != NULL);
362 }
363 
364 /*
365  * Remove any interposed value that matches.
366  */
367 static void
368 msri_rment(cmi_hdl_impl_t *hdl, uint_t msr)
369 {
370 
371 	int idx = CMI_MSRI_HASHIDX(hdl, msr);
372 	struct cmi_msri_bkt *hbp = &msrihash[idx];
373 	struct cmi_msri_hashent *hep;
374 
375 	if (!mutex_tryenter(&hbp->msrib_lock))
376 		return;
377 
378 	for (hep = hbp->msrib_head; hep != NULL; hep = hep->msrie_next) {
379 		if (CMI_MSRI_MATCH(hep, hdl, msr)) {
380 			if (hep->msrie_prev != NULL)
381 				hep->msrie_prev->msrie_next = hep->msrie_next;
382 
383 			if (hep->msrie_next != NULL)
384 				hep->msrie_next->msrie_prev = hep->msrie_prev;
385 
386 			if (hbp->msrib_head == hep)
387 				hbp->msrib_head = hep->msrie_next;
388 
389 			kmem_free(hep, sizeof (*hep));
390 			break;
391 		}
392 	}
393 
394 	mutex_exit(&hbp->msrib_lock);
395 }
396 
397 /*
398  *	 =======================================================
399  *	|	PCI Config Space Interposition			|
400  *	|	------------------------------			|
401  *	|							|
402  *	 -------------------------------------------------------
403  */
404 
405 /*
406  * Hash for interposed PCI config space values.  We lookup on bus/dev/fun/offset
407  * and then record whether the value stashed was made with a byte, word or
408  * doubleword access;  we will only return a hit for an access of the
409  * same size.  If you access say a 32-bit register using byte accesses
410  * and then attempt to read the full 32-bit value back you will not obtain
411  * any sort of merged result - you get a lookup miss.
412  */
413 
414 #define	CMI_PCII_HASHSZ		16
415 #define	CMI_PCII_HASHIDX(b, d, f, o) \
416 	(((b) + (d) + (f) + (o)) % (CMI_PCII_HASHSZ - 1))
417 
418 struct cmi_pcii_bkt {
419 	kmutex_t pciib_lock;
420 	struct cmi_pcii_hashent *pciib_head;
421 };
422 
423 struct cmi_pcii_hashent {
424 	struct cmi_pcii_hashent *pcii_next;
425 	struct cmi_pcii_hashent *pcii_prev;
426 	int pcii_bus;
427 	int pcii_dev;
428 	int pcii_func;
429 	int pcii_reg;
430 	int pcii_asize;
431 	uint32_t pcii_val;
432 };
433 
434 #define	CMI_PCII_MATCH(ent, b, d, f, r, asz) \
435 	((ent)->pcii_bus == (b) && (ent)->pcii_dev == (d) && \
436 	(ent)->pcii_func == (f) && (ent)->pcii_reg == (r) && \
437 	(ent)->pcii_asize == (asz))
438 
439 static struct cmi_pcii_bkt pciihash[CMI_PCII_HASHSZ];
440 
441 
442 /*
443  * Add a new entry to the PCI interpose hash, overwriting any existing
444  * entry that is found.
445  */
446 static void
447 pcii_addent(int bus, int dev, int func, int reg, uint32_t val, int asz)
448 {
449 	int idx = CMI_PCII_HASHIDX(bus, dev, func, reg);
450 	struct cmi_pcii_bkt *hbp = &pciihash[idx];
451 	struct cmi_pcii_hashent *hep;
452 
453 	cmi_hdl_inj_begin(NULL);
454 
455 	mutex_enter(&hbp->pciib_lock);
456 
457 	for (hep = hbp->pciib_head; hep != NULL; hep = hep->pcii_next) {
458 		if (CMI_PCII_MATCH(hep, bus, dev, func, reg, asz))
459 			break;
460 	}
461 
462 	if (hep != NULL) {
463 		hep->pcii_val = val;
464 	} else {
465 		hep = kmem_alloc(sizeof (*hep), KM_SLEEP);
466 		hep->pcii_bus = bus;
467 		hep->pcii_dev = dev;
468 		hep->pcii_func = func;
469 		hep->pcii_reg = reg;
470 		hep->pcii_asize = asz;
471 		hep->pcii_val = val;
472 
473 		if (hbp->pciib_head != NULL)
474 			hbp->pciib_head->pcii_prev = hep;
475 		hep->pcii_next = hbp->pciib_head;
476 		hep->pcii_prev = NULL;
477 		hbp->pciib_head = hep;
478 	}
479 
480 	mutex_exit(&hbp->pciib_lock);
481 
482 	cmi_hdl_inj_end(NULL);
483 }
484 
485 /*
486  * Look for a match for the given bus/dev/func/reg; return 1 with valp
487  * filled if a match is found, otherwise return 0 with valp untouched.
488  */
489 static int
490 pcii_lookup(int bus, int dev, int func, int reg, int asz, uint32_t *valp)
491 {
492 	int idx = CMI_PCII_HASHIDX(bus, dev, func, reg);
493 	struct cmi_pcii_bkt *hbp = &pciihash[idx];
494 	struct cmi_pcii_hashent *hep;
495 
496 	if (!mutex_tryenter(&hbp->pciib_lock))
497 		return (0);
498 
499 	for (hep = hbp->pciib_head; hep != NULL; hep = hep->pcii_next) {
500 		if (CMI_PCII_MATCH(hep, bus, dev, func, reg, asz)) {
501 			*valp = hep->pcii_val;
502 			break;
503 		}
504 	}
505 
506 	mutex_exit(&hbp->pciib_lock);
507 
508 	return (hep != NULL);
509 }
510 
511 static void
512 pcii_rment(int bus, int dev, int func, int reg, int asz)
513 {
514 	int idx = CMI_PCII_HASHIDX(bus, dev, func, reg);
515 	struct cmi_pcii_bkt *hbp = &pciihash[idx];
516 	struct cmi_pcii_hashent *hep;
517 
518 	mutex_enter(&hbp->pciib_lock);
519 
520 	for (hep = hbp->pciib_head; hep != NULL; hep = hep->pcii_next) {
521 		if (CMI_PCII_MATCH(hep, bus, dev, func, reg, asz)) {
522 			if (hep->pcii_prev != NULL)
523 				hep->pcii_prev->pcii_next = hep->pcii_next;
524 
525 			if (hep->pcii_next != NULL)
526 				hep->pcii_next->pcii_prev = hep->pcii_prev;
527 
528 			if (hbp->pciib_head == hep)
529 				hbp->pciib_head = hep->pcii_next;
530 
531 			kmem_free(hep, sizeof (*hep));
532 			break;
533 		}
534 	}
535 
536 	mutex_exit(&hbp->pciib_lock);
537 }
538 
539 #ifndef __xpv
540 
541 /*
542  *	 =======================================================
543  *	|	Native methods					|
544  *	|	--------------					|
545  *	|							|
546  *	| These are used when we are running native on bare-	|
547  *	| metal, or simply don't know any better.		|
548  *	---------------------------------------------------------
549  */
550 
551 #define	HDLPRIV(hdl)	((cpu_t *)(hdl)->cmih_hdlpriv)
552 
553 static uint_t
554 ntv_vendor(cmi_hdl_impl_t *hdl)
555 {
556 	return (cpuid_getvendor(HDLPRIV(hdl)));
557 }
558 
559 static const char *
560 ntv_vendorstr(cmi_hdl_impl_t *hdl)
561 {
562 	return (cpuid_getvendorstr(HDLPRIV(hdl)));
563 }
564 
565 static uint_t
566 ntv_family(cmi_hdl_impl_t *hdl)
567 {
568 	return (cpuid_getfamily(HDLPRIV(hdl)));
569 }
570 
571 static uint_t
572 ntv_model(cmi_hdl_impl_t *hdl)
573 {
574 	return (cpuid_getmodel(HDLPRIV(hdl)));
575 }
576 
577 static uint_t
578 ntv_stepping(cmi_hdl_impl_t *hdl)
579 {
580 	return (cpuid_getstep(HDLPRIV(hdl)));
581 }
582 
583 static uint_t
584 ntv_chipid(cmi_hdl_impl_t *hdl)
585 {
586 	return (hdl->cmih_chipid);
587 
588 }
589 
590 static uint_t
591 ntv_coreid(cmi_hdl_impl_t *hdl)
592 {
593 	return (hdl->cmih_coreid);
594 }
595 
596 static uint_t
597 ntv_strandid(cmi_hdl_impl_t *hdl)
598 {
599 	return (hdl->cmih_strandid);
600 }
601 
602 static uint32_t
603 ntv_chiprev(cmi_hdl_impl_t *hdl)
604 {
605 	return (cpuid_getchiprev(HDLPRIV(hdl)));
606 }
607 
608 static const char *
609 ntv_chiprevstr(cmi_hdl_impl_t *hdl)
610 {
611 	return (cpuid_getchiprevstr(HDLPRIV(hdl)));
612 }
613 
614 static uint32_t
615 ntv_getsockettype(cmi_hdl_impl_t *hdl)
616 {
617 	return (cpuid_getsockettype(HDLPRIV(hdl)));
618 }
619 
620 static id_t
621 ntv_logical_id(cmi_hdl_impl_t *hdl)
622 {
623 	return (HDLPRIV(hdl)->cpu_id);
624 }
625 
626 /*ARGSUSED*/
627 static int
628 ntv_getcr4_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
629 {
630 	ulong_t *dest = (ulong_t *)arg1;
631 	cmi_errno_t *rcp = (cmi_errno_t *)arg3;
632 
633 	*dest = getcr4();
634 	*rcp = CMI_SUCCESS;
635 
636 	return (0);
637 }
638 
639 static ulong_t
640 ntv_getcr4(cmi_hdl_impl_t *hdl)
641 {
642 	cpu_t *cp = HDLPRIV(hdl);
643 	ulong_t val;
644 
645 	(void) call_func_ntv(cp->cpu_id, ntv_getcr4_xc, (xc_arg_t)&val, NULL);
646 
647 	return (val);
648 }
649 
650 /*ARGSUSED*/
651 static int
652 ntv_setcr4_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
653 {
654 	ulong_t val = (ulong_t)arg1;
655 	cmi_errno_t *rcp = (cmi_errno_t *)arg3;
656 
657 	setcr4(val);
658 	*rcp = CMI_SUCCESS;
659 
660 	return (0);
661 }
662 
663 static void
664 ntv_setcr4(cmi_hdl_impl_t *hdl, ulong_t val)
665 {
666 	cpu_t *cp = HDLPRIV(hdl);
667 
668 	(void) call_func_ntv(cp->cpu_id, ntv_setcr4_xc, (xc_arg_t)val, NULL);
669 }
670 
671 volatile uint32_t cmi_trapped_rdmsr;
672 
673 /*ARGSUSED*/
674 static int
675 ntv_rdmsr_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
676 {
677 	uint_t msr = (uint_t)arg1;
678 	uint64_t *valp = (uint64_t *)arg2;
679 	cmi_errno_t *rcp = (cmi_errno_t *)arg3;
680 
681 	on_trap_data_t otd;
682 
683 	if (on_trap(&otd, OT_DATA_ACCESS) == 0) {
684 		if (checked_rdmsr(msr, valp) == 0)
685 			*rcp = CMI_SUCCESS;
686 		else
687 			*rcp = CMIERR_NOTSUP;
688 	} else {
689 		*rcp = CMIERR_MSRGPF;
690 		atomic_inc_32(&cmi_trapped_rdmsr);
691 	}
692 	no_trap();
693 
694 	return (0);
695 }
696 
697 static cmi_errno_t
698 ntv_rdmsr(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t *valp)
699 {
700 	cpu_t *cp = HDLPRIV(hdl);
701 
702 	if (!(hdl->cmih_msrsrc & CMI_MSR_FLAG_RD_HWOK))
703 		return (CMIERR_INTERPOSE);
704 
705 	return (call_func_ntv(cp->cpu_id, ntv_rdmsr_xc,
706 	    (xc_arg_t)msr, (xc_arg_t)valp));
707 }
708 
709 volatile uint32_t cmi_trapped_wrmsr;
710 
711 /*ARGSUSED*/
712 static int
713 ntv_wrmsr_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
714 {
715 	uint_t msr = (uint_t)arg1;
716 	uint64_t val = *((uint64_t *)arg2);
717 	cmi_errno_t *rcp = (cmi_errno_t *)arg3;
718 	on_trap_data_t otd;
719 
720 	if (on_trap(&otd, OT_DATA_ACCESS) == 0) {
721 		if (checked_wrmsr(msr, val) == 0)
722 			*rcp = CMI_SUCCESS;
723 		else
724 			*rcp = CMIERR_NOTSUP;
725 	} else {
726 		*rcp = CMIERR_MSRGPF;
727 		atomic_inc_32(&cmi_trapped_wrmsr);
728 	}
729 	no_trap();
730 
731 	return (0);
732 
733 }
734 
735 static cmi_errno_t
736 ntv_wrmsr(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t val)
737 {
738 	cpu_t *cp = HDLPRIV(hdl);
739 
740 	if (!(hdl->cmih_msrsrc & CMI_MSR_FLAG_WR_HWOK))
741 		return (CMI_SUCCESS);
742 
743 	return (call_func_ntv(cp->cpu_id, ntv_wrmsr_xc,
744 	    (xc_arg_t)msr, (xc_arg_t)&val));
745 }
746 
747 static cmi_errno_t
748 ntv_msrinterpose(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t val)
749 {
750 	msri_addent(hdl, msr, val);
751 	return (CMI_SUCCESS);
752 }
753 
754 /*ARGSUSED*/
755 static int
756 ntv_int_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
757 {
758 	cmi_errno_t *rcp = (cmi_errno_t *)arg3;
759 	int int_no = (int)arg1;
760 
761 	if (int_no == T_MCE)
762 		int18();
763 	else
764 		int_cmci();
765 	*rcp = CMI_SUCCESS;
766 
767 	return (0);
768 }
769 
770 static void
771 ntv_int(cmi_hdl_impl_t *hdl, int int_no)
772 {
773 	cpu_t *cp = HDLPRIV(hdl);
774 
775 	(void) call_func_ntv(cp->cpu_id, ntv_int_xc, (xc_arg_t)int_no, NULL);
776 }
777 
778 static int
779 ntv_online(cmi_hdl_impl_t *hdl, int new_status, int *old_status)
780 {
781 	processorid_t cpuid = HDLPRIV(hdl)->cpu_id;
782 
783 	return (p_online_internal(cpuid, new_status, old_status));
784 }
785 
786 #else	/* __xpv */
787 
788 /*
789  *	 =======================================================
790  *	|	xVM dom0 methods				|
791  *	|	----------------				|
792  *	|							|
793  *	| These are used when we are running as dom0 in		|
794  *	| a Solaris xVM context.				|
795  *	---------------------------------------------------------
796  */
797 
798 #define	HDLPRIV(hdl)	((xen_mc_lcpu_cookie_t)(hdl)->cmih_hdlpriv)
799 
800 extern uint_t _cpuid_vendorstr_to_vendorcode(char *);
801 
802 
803 static uint_t
804 xpv_vendor(cmi_hdl_impl_t *hdl)
805 {
806 	return (_cpuid_vendorstr_to_vendorcode((char *)xen_physcpu_vendorstr(
807 	    HDLPRIV(hdl))));
808 }
809 
810 static const char *
811 xpv_vendorstr(cmi_hdl_impl_t *hdl)
812 {
813 	return (xen_physcpu_vendorstr(HDLPRIV(hdl)));
814 }
815 
816 static uint_t
817 xpv_family(cmi_hdl_impl_t *hdl)
818 {
819 	return (xen_physcpu_family(HDLPRIV(hdl)));
820 }
821 
822 static uint_t
823 xpv_model(cmi_hdl_impl_t *hdl)
824 {
825 	return (xen_physcpu_model(HDLPRIV(hdl)));
826 }
827 
828 static uint_t
829 xpv_stepping(cmi_hdl_impl_t *hdl)
830 {
831 	return (xen_physcpu_stepping(HDLPRIV(hdl)));
832 }
833 
834 static uint_t
835 xpv_chipid(cmi_hdl_impl_t *hdl)
836 {
837 	return (hdl->cmih_chipid);
838 }
839 
840 static uint_t
841 xpv_coreid(cmi_hdl_impl_t *hdl)
842 {
843 	return (hdl->cmih_coreid);
844 }
845 
846 static uint_t
847 xpv_strandid(cmi_hdl_impl_t *hdl)
848 {
849 	return (hdl->cmih_strandid);
850 }
851 
852 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t);
853 
854 static uint32_t
855 xpv_chiprev(cmi_hdl_impl_t *hdl)
856 {
857 	return (_cpuid_chiprev(xpv_vendor(hdl), xpv_family(hdl),
858 	    xpv_model(hdl), xpv_stepping(hdl)));
859 }
860 
861 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t);
862 
863 static const char *
864 xpv_chiprevstr(cmi_hdl_impl_t *hdl)
865 {
866 	return (_cpuid_chiprevstr(xpv_vendor(hdl), xpv_family(hdl),
867 	    xpv_model(hdl), xpv_stepping(hdl)));
868 }
869 
870 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t);
871 
872 static uint32_t
873 xpv_getsockettype(cmi_hdl_impl_t *hdl)
874 {
875 	return (_cpuid_skt(xpv_vendor(hdl), xpv_family(hdl),
876 	    xpv_model(hdl), xpv_stepping(hdl)));
877 }
878 
879 static id_t
880 xpv_logical_id(cmi_hdl_impl_t *hdl)
881 {
882 	return (xen_physcpu_logical_id(HDLPRIV(hdl)));
883 }
884 
885 static cmi_errno_t
886 xpv_rdmsr(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t *valp)
887 {
888 	switch (msr) {
889 	case IA32_MSR_MCG_CAP:
890 		*valp = xen_physcpu_mcg_cap(HDLPRIV(hdl));
891 		break;
892 
893 	default:
894 		return (CMIERR_NOTSUP);
895 	}
896 
897 	return (CMI_SUCCESS);
898 }
899 
900 /*
901  * Request the hypervisor to write an MSR for us.  The hypervisor
902  * will only accept MCA-related MSRs, as this is for MCA error
903  * simulation purposes alone.  We will pre-screen MSRs for injection
904  * so we don't bother the HV with bogus requests.  We will permit
905  * injection to any MCA bank register, and to MCG_STATUS.
906  */
907 
908 #define	IS_MCA_INJ_MSR(msr) \
909 	(((msr) >= IA32_MSR_MC(0, CTL) && (msr) <= IA32_MSR_MC(10, MISC)) || \
910 	(msr) == IA32_MSR_MCG_STATUS)
911 
912 static cmi_errno_t
913 xpv_wrmsr_cmn(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t val, boolean_t intpose)
914 {
915 	struct xen_mc_msrinject mci;
916 
917 	if (!(hdl->cmih_flags & CMIH_F_INJACTV))
918 		return (CMIERR_NOTSUP);		/* for injection use only! */
919 
920 	if (!IS_MCA_INJ_MSR(msr))
921 		return (CMIERR_API);
922 
923 	if (panicstr)
924 		return (CMIERR_DEADLOCK);
925 
926 	mci.mcinj_cpunr = xen_physcpu_logical_id(HDLPRIV(hdl));
927 	mci.mcinj_flags = intpose ? MC_MSRINJ_F_INTERPOSE : 0;
928 	mci.mcinj_count = 1;	/* learn to batch sometime */
929 	mci.mcinj_msr[0].reg = msr;
930 	mci.mcinj_msr[0].value = val;
931 
932 	return (HYPERVISOR_mca(XEN_MC_CMD_msrinject, (xen_mc_arg_t *)&mci) ==
933 	    XEN_MC_HCALL_SUCCESS ?  CMI_SUCCESS : CMIERR_NOTSUP);
934 }
935 
936 static cmi_errno_t
937 xpv_wrmsr(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t val)
938 {
939 	return (xpv_wrmsr_cmn(hdl, msr, val, B_FALSE));
940 }
941 
942 
943 static cmi_errno_t
944 xpv_msrinterpose(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t val)
945 {
946 	return (xpv_wrmsr_cmn(hdl, msr, val, B_TRUE));
947 }
948 
949 static void
950 xpv_int(cmi_hdl_impl_t *hdl, int int_no)
951 {
952 	struct xen_mc_mceinject mce;
953 
954 	if (!(hdl->cmih_flags & CMIH_F_INJACTV))
955 		return;
956 
957 	if (int_no != T_MCE) {
958 		cmn_err(CE_WARN, "xpv_int: int_no %d unimplemented\n",
959 		    int_no);
960 	}
961 
962 	mce.mceinj_cpunr = xen_physcpu_logical_id(HDLPRIV(hdl));
963 
964 	(void) HYPERVISOR_mca(XEN_MC_CMD_mceinject, (xen_mc_arg_t *)&mce);
965 }
966 
967 #define	CSM_XLATE_SUNOS2XEN	1
968 #define	CSM_XLATE_XEN2SUNOS	2
969 
970 #define	CSM_MAPENT(suffix)	{ P_##suffix, MC_CPU_P_##suffix }
971 
972 static int
973 cpu_status_xlate(int in, int direction, int *outp)
974 {
975 	struct cpu_status_map {
976 		int csm_val[2];
977 	} map[] = {
978 		CSM_MAPENT(STATUS),
979 		CSM_MAPENT(ONLINE),
980 		CSM_MAPENT(OFFLINE),
981 		CSM_MAPENT(FAULTED),
982 		CSM_MAPENT(SPARE),
983 		CSM_MAPENT(POWEROFF)
984 	};
985 
986 	int cmpidx = (direction == CSM_XLATE_XEN2SUNOS);
987 	int i;
988 
989 	for (i = 0; i < sizeof (map) / sizeof (struct cpu_status_map); i++) {
990 		if (map[i].csm_val[cmpidx] == in) {
991 			*outp = map[i].csm_val[!cmpidx];
992 			return (1);
993 		}
994 	}
995 
996 	return (0);
997 }
998 
999 static int
1000 xpv_online(cmi_hdl_impl_t *hdl, int new_status, int *old_status)
1001 {
1002 	struct xen_mc_offline mco;
1003 	int flag, rc;
1004 
1005 	new_status &= ~P_FORCED;
1006 
1007 	if (!cpu_status_xlate(new_status, CSM_XLATE_SUNOS2XEN, &flag))
1008 		return (ENOSYS);
1009 
1010 	mco.mco_cpu = xen_physcpu_logical_id(HDLPRIV(hdl));
1011 	mco.mco_flag = flag;
1012 
1013 	if ((rc = HYPERVISOR_mca(XEN_MC_CMD_offlinecpu,
1014 	    (xen_mc_arg_t *)&mco)) == XEN_MC_HCALL_SUCCESS) {
1015 		flag = mco.mco_flag;
1016 		if (!cpu_status_xlate(flag, CSM_XLATE_XEN2SUNOS, old_status))
1017 			cmn_err(CE_NOTE, "xpv_online: unknown status %d.",
1018 			    flag);
1019 	}
1020 
1021 	return (-rc);
1022 }
1023 
1024 #endif
1025 
1026 /*ARGSUSED*/
1027 static void *
1028 cpu_search(enum cmi_hdl_class class, uint_t chipid, uint_t coreid,
1029     uint_t strandid)
1030 {
1031 #ifdef __xpv
1032 	xen_mc_lcpu_cookie_t cpi;
1033 
1034 	for (cpi = xen_physcpu_next(NULL); cpi != NULL;
1035 	    cpi = xen_physcpu_next(cpi)) {
1036 		if (xen_physcpu_chipid(cpi) == chipid &&
1037 		    xen_physcpu_coreid(cpi) == coreid &&
1038 		    xen_physcpu_strandid(cpi) == strandid)
1039 			return ((void *)cpi);
1040 	}
1041 	return (NULL);
1042 
1043 #else	/* __xpv */
1044 
1045 	cpu_t *cp, *startcp;
1046 
1047 	kpreempt_disable();
1048 	cp = startcp = CPU;
1049 	do {
1050 		if (cmi_ntv_hwchipid(cp) == chipid &&
1051 		    cmi_ntv_hwcoreid(cp) == coreid &&
1052 		    cmi_ntv_hwstrandid(cp) == strandid) {
1053 			kpreempt_enable();
1054 			return ((void *)cp);
1055 		}
1056 
1057 		cp = cp->cpu_next;
1058 	} while (cp != startcp);
1059 	kpreempt_enable();
1060 	return (NULL);
1061 #endif	/* __ xpv */
1062 }
1063 
1064 static boolean_t
1065 cpu_is_cmt(void *priv)
1066 {
1067 #ifdef __xpv
1068 	return (xen_physcpu_is_cmt((xen_mc_lcpu_cookie_t)priv));
1069 #else /* __xpv */
1070 	cpu_t *cp = (cpu_t *)priv;
1071 
1072 	int strands_per_core = cpuid_get_ncpu_per_chip(cp) /
1073 	    cpuid_get_ncore_per_chip(cp);
1074 
1075 	return (strands_per_core > 1);
1076 #endif /* __xpv */
1077 }
1078 
1079 /*
1080  * Find the handle entry of a given cpu identified by a <chip,core,strand>
1081  * tuple.
1082  */
1083 static cmi_hdl_ent_t *
1084 cmi_hdl_ent_lookup(uint_t chipid, uint_t coreid, uint_t strandid)
1085 {
1086 	/*
1087 	 * Allocate per-chip table which contains a list of handle of
1088 	 * all strands of the chip.
1089 	 */
1090 	if (cmi_chip_tab[chipid] == NULL) {
1091 		size_t sz;
1092 		cmi_hdl_ent_t *pg;
1093 
1094 		sz = CMI_MAX_STRANDS_PER_CHIP * sizeof (cmi_hdl_ent_t);
1095 		pg = kmem_zalloc(sz, KM_SLEEP);
1096 
1097 		/* test and set the per-chip table if it is not allocated */
1098 		if (atomic_cas_ptr(&cmi_chip_tab[chipid], NULL, pg) != NULL)
1099 			kmem_free(pg, sz); /* someone beat us */
1100 	}
1101 
1102 	return (cmi_chip_tab[chipid] + CMI_HDL_ARR_IDX(coreid, strandid));
1103 }
1104 
1105 cmi_hdl_t
1106 cmi_hdl_create(enum cmi_hdl_class class, uint_t chipid, uint_t coreid,
1107     uint_t strandid)
1108 {
1109 	cmi_hdl_impl_t *hdl;
1110 	void *priv;
1111 	cmi_hdl_ent_t *ent;
1112 
1113 #ifdef __xpv
1114 	ASSERT(class == CMI_HDL_SOLARIS_xVM_MCA);
1115 #else
1116 	ASSERT(class == CMI_HDL_NATIVE);
1117 #endif
1118 
1119 	if (chipid > CMI_MAX_CHIPID ||
1120 	    coreid > CMI_MAX_CORES_PER_CHIP - 1 ||
1121 	    strandid > CMI_MAX_STRANDS_PER_CORE - 1)
1122 		return (NULL);
1123 
1124 	if ((priv = cpu_search(class, chipid, coreid, strandid)) == NULL)
1125 		return (NULL);
1126 
1127 	hdl = kmem_zalloc(sizeof (*hdl), KM_SLEEP);
1128 
1129 	hdl->cmih_class = class;
1130 	HDLOPS(hdl) = &cmi_hdl_ops;
1131 	hdl->cmih_chipid = chipid;
1132 	hdl->cmih_coreid = coreid;
1133 	hdl->cmih_strandid = strandid;
1134 	hdl->cmih_mstrand = cpu_is_cmt(priv);
1135 	hdl->cmih_hdlpriv = priv;
1136 #ifdef __xpv
1137 	hdl->cmih_msrsrc = CMI_MSR_FLAG_RD_INTERPOSEOK |
1138 	    CMI_MSR_FLAG_WR_INTERPOSEOK;
1139 #else	/* __xpv */
1140 	hdl->cmih_msrsrc = CMI_MSR_FLAG_RD_HWOK | CMI_MSR_FLAG_RD_INTERPOSEOK |
1141 	    CMI_MSR_FLAG_WR_HWOK | CMI_MSR_FLAG_WR_INTERPOSEOK;
1142 #endif
1143 
1144 	ent = cmi_hdl_ent_lookup(chipid, coreid, strandid);
1145 	if (ent->cmae_refcnt != 0 || ent->cmae_hdlp != NULL) {
1146 		/*
1147 		 * Somehow this (chipid, coreid, strandid) id tuple has
1148 		 * already been assigned!  This indicates that the
1149 		 * callers logic in determining these values is busted,
1150 		 * or perhaps undermined by bad BIOS setup.  Complain,
1151 		 * and refuse to initialize this tuple again as bad things
1152 		 * will happen.
1153 		 */
1154 		cmn_err(CE_NOTE, "cmi_hdl_create: chipid %d coreid %d "
1155 		    "strandid %d handle already allocated!",
1156 		    chipid, coreid, strandid);
1157 		kmem_free(hdl, sizeof (*hdl));
1158 		return (NULL);
1159 	}
1160 
1161 	/*
1162 	 * Once we store a nonzero reference count others can find this
1163 	 * handle via cmi_hdl_lookup etc.  This initial hold on the handle
1164 	 * is to be dropped only if some other part of cmi initialization
1165 	 * fails or, if it succeeds, at later cpu deconfigure.  Note the
1166 	 * the module private data we hold in cmih_cmi and cmih_cmidata
1167 	 * is still NULL at this point (the caller will fill it with
1168 	 * cmi_hdl_setcmi if it initializes) so consumers of handles
1169 	 * should always be ready for that possibility.
1170 	 */
1171 	ent->cmae_hdlp = hdl;
1172 	hdl->cmih_refcntp = &ent->cmae_refcnt;
1173 	ent->cmae_refcnt = 1;
1174 
1175 	return ((cmi_hdl_t)hdl);
1176 }
1177 
1178 void
1179 cmi_hdl_hold(cmi_hdl_t ophdl)
1180 {
1181 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1182 
1183 	ASSERT(*hdl->cmih_refcntp != 0); /* must not be the initial hold */
1184 
1185 	atomic_inc_32(hdl->cmih_refcntp);
1186 }
1187 
1188 static int
1189 cmi_hdl_canref(cmi_hdl_ent_t *ent)
1190 {
1191 	volatile uint32_t *refcntp;
1192 	uint32_t refcnt;
1193 
1194 	refcntp = &ent->cmae_refcnt;
1195 	refcnt = *refcntp;
1196 
1197 	if (refcnt == 0) {
1198 		/*
1199 		 * Associated object never existed, is being destroyed,
1200 		 * or has been destroyed.
1201 		 */
1202 		return (0);
1203 	}
1204 
1205 	/*
1206 	 * We cannot use atomic increment here because once the reference
1207 	 * count reaches zero it must never be bumped up again.
1208 	 */
1209 	while (refcnt != 0) {
1210 		if (atomic_cas_32(refcntp, refcnt, refcnt + 1) == refcnt)
1211 			return (1);
1212 		refcnt = *refcntp;
1213 	}
1214 
1215 	/*
1216 	 * Somebody dropped the reference count to 0 after our initial
1217 	 * check.
1218 	 */
1219 	return (0);
1220 }
1221 
1222 
1223 void
1224 cmi_hdl_rele(cmi_hdl_t ophdl)
1225 {
1226 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1227 	cmi_hdl_ent_t *ent;
1228 
1229 	ASSERT(*hdl->cmih_refcntp > 0);
1230 
1231 	if (atomic_dec_32_nv(hdl->cmih_refcntp) > 0)
1232 		return;
1233 
1234 	ent = cmi_hdl_ent_lookup(hdl->cmih_chipid, hdl->cmih_coreid,
1235 	    hdl->cmih_strandid);
1236 	ent->cmae_hdlp = NULL;
1237 
1238 	kmem_free(hdl, sizeof (*hdl));
1239 }
1240 
1241 void
1242 cmi_hdl_setspecific(cmi_hdl_t ophdl, void *arg)
1243 {
1244 	IMPLHDL(ophdl)->cmih_spec = arg;
1245 }
1246 
1247 void *
1248 cmi_hdl_getspecific(cmi_hdl_t ophdl)
1249 {
1250 	return (IMPLHDL(ophdl)->cmih_spec);
1251 }
1252 
1253 void
1254 cmi_hdl_setmc(cmi_hdl_t ophdl, const struct cmi_mc_ops *mcops, void *mcdata)
1255 {
1256 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1257 
1258 	ASSERT(hdl->cmih_mcops == NULL && hdl->cmih_mcdata == NULL);
1259 	hdl->cmih_mcops = mcops;
1260 	hdl->cmih_mcdata = mcdata;
1261 }
1262 
1263 const struct cmi_mc_ops *
1264 cmi_hdl_getmcops(cmi_hdl_t ophdl)
1265 {
1266 	return (IMPLHDL(ophdl)->cmih_mcops);
1267 }
1268 
1269 void *
1270 cmi_hdl_getmcdata(cmi_hdl_t ophdl)
1271 {
1272 	return (IMPLHDL(ophdl)->cmih_mcdata);
1273 }
1274 
1275 cmi_hdl_t
1276 cmi_hdl_lookup(enum cmi_hdl_class class, uint_t chipid, uint_t coreid,
1277     uint_t strandid)
1278 {
1279 	cmi_hdl_ent_t *ent;
1280 
1281 	if (chipid > CMI_MAX_CHIPID ||
1282 	    coreid > CMI_MAX_CORES_PER_CHIP - 1 ||
1283 	    strandid > CMI_MAX_STRANDS_PER_CORE - 1)
1284 		return (NULL);
1285 
1286 	ent = cmi_hdl_ent_lookup(chipid, coreid, strandid);
1287 
1288 	if (class == CMI_HDL_NEUTRAL)
1289 #ifdef __xpv
1290 		class = CMI_HDL_SOLARIS_xVM_MCA;
1291 #else
1292 		class = CMI_HDL_NATIVE;
1293 #endif
1294 
1295 	if (!cmi_hdl_canref(ent))
1296 		return (NULL);
1297 
1298 	if (ent->cmae_hdlp->cmih_class != class) {
1299 		cmi_hdl_rele((cmi_hdl_t)ent->cmae_hdlp);
1300 		return (NULL);
1301 	}
1302 
1303 	return ((cmi_hdl_t)ent->cmae_hdlp);
1304 }
1305 
1306 cmi_hdl_t
1307 cmi_hdl_any(void)
1308 {
1309 	int i, j;
1310 	cmi_hdl_ent_t *ent;
1311 
1312 	for (i = 0; i < CMI_CHIPID_ARR_SZ; i++) {
1313 		if (cmi_chip_tab[i] == NULL)
1314 			continue;
1315 		for (j = 0, ent = cmi_chip_tab[i]; j < CMI_MAX_STRANDS_PER_CHIP;
1316 		    j++, ent++) {
1317 			if (cmi_hdl_canref(ent))
1318 				return ((cmi_hdl_t)ent->cmae_hdlp);
1319 		}
1320 	}
1321 
1322 	return (NULL);
1323 }
1324 
1325 void
1326 cmi_hdl_walk(int (*cbfunc)(cmi_hdl_t, void *, void *, void *),
1327     void *arg1, void *arg2, void *arg3)
1328 {
1329 	int i, j;
1330 	cmi_hdl_ent_t *ent;
1331 
1332 	for (i = 0; i < CMI_CHIPID_ARR_SZ; i++) {
1333 		if (cmi_chip_tab[i] == NULL)
1334 			continue;
1335 		for (j = 0, ent = cmi_chip_tab[i]; j < CMI_MAX_STRANDS_PER_CHIP;
1336 		    j++, ent++) {
1337 			if (cmi_hdl_canref(ent)) {
1338 				cmi_hdl_impl_t *hdl = ent->cmae_hdlp;
1339 				if ((*cbfunc)((cmi_hdl_t)hdl, arg1, arg2, arg3)
1340 				    == CMI_HDL_WALK_DONE) {
1341 					cmi_hdl_rele((cmi_hdl_t)hdl);
1342 					return;
1343 				}
1344 				cmi_hdl_rele((cmi_hdl_t)hdl);
1345 			}
1346 		}
1347 	}
1348 }
1349 
1350 void
1351 cmi_hdl_setcmi(cmi_hdl_t ophdl, void *cmi, void *cmidata)
1352 {
1353 	IMPLHDL(ophdl)->cmih_cmidata = cmidata;
1354 	IMPLHDL(ophdl)->cmih_cmi = cmi;
1355 }
1356 
1357 void *
1358 cmi_hdl_getcmi(cmi_hdl_t ophdl)
1359 {
1360 	return (IMPLHDL(ophdl)->cmih_cmi);
1361 }
1362 
1363 void *
1364 cmi_hdl_getcmidata(cmi_hdl_t ophdl)
1365 {
1366 	return (IMPLHDL(ophdl)->cmih_cmidata);
1367 }
1368 
1369 enum cmi_hdl_class
1370 cmi_hdl_class(cmi_hdl_t ophdl)
1371 {
1372 	return (IMPLHDL(ophdl)->cmih_class);
1373 }
1374 
1375 #define	CMI_HDL_OPFUNC(what, type)				\
1376 	type							\
1377 	cmi_hdl_##what(cmi_hdl_t ophdl)				\
1378 	{							\
1379 		return (HDLOPS(IMPLHDL(ophdl))->		\
1380 		    cmio_##what(IMPLHDL(ophdl)));		\
1381 	}
1382 
1383 CMI_HDL_OPFUNC(vendor, uint_t)
1384 CMI_HDL_OPFUNC(vendorstr, const char *)
1385 CMI_HDL_OPFUNC(family, uint_t)
1386 CMI_HDL_OPFUNC(model, uint_t)
1387 CMI_HDL_OPFUNC(stepping, uint_t)
1388 CMI_HDL_OPFUNC(chipid, uint_t)
1389 CMI_HDL_OPFUNC(coreid, uint_t)
1390 CMI_HDL_OPFUNC(strandid, uint_t)
1391 CMI_HDL_OPFUNC(chiprev, uint32_t)
1392 CMI_HDL_OPFUNC(chiprevstr, const char *)
1393 CMI_HDL_OPFUNC(getsockettype, uint32_t)
1394 CMI_HDL_OPFUNC(logical_id, id_t)
1395 
1396 boolean_t
1397 cmi_hdl_is_cmt(cmi_hdl_t ophdl)
1398 {
1399 	return (IMPLHDL(ophdl)->cmih_mstrand);
1400 }
1401 
1402 void
1403 cmi_hdl_int(cmi_hdl_t ophdl, int num)
1404 {
1405 	if (HDLOPS(IMPLHDL(ophdl))->cmio_int == NULL)
1406 		return;
1407 
1408 	cmi_hdl_inj_begin(ophdl);
1409 	HDLOPS(IMPLHDL(ophdl))->cmio_int(IMPLHDL(ophdl), num);
1410 	cmi_hdl_inj_end(NULL);
1411 }
1412 
1413 int
1414 cmi_hdl_online(cmi_hdl_t ophdl, int new_status, int *old_status)
1415 {
1416 	return (HDLOPS(IMPLHDL(ophdl))->cmio_online(IMPLHDL(ophdl),
1417 	    new_status, old_status));
1418 }
1419 
1420 #ifndef	__xpv
1421 /*
1422  * Return hardware chip instance; cpuid_get_chipid provides this directly.
1423  */
1424 uint_t
1425 cmi_ntv_hwchipid(cpu_t *cp)
1426 {
1427 	return (cpuid_get_chipid(cp));
1428 }
1429 
1430 /*
1431  * Return core instance within a single chip.
1432  */
1433 uint_t
1434 cmi_ntv_hwcoreid(cpu_t *cp)
1435 {
1436 	return (cpuid_get_pkgcoreid(cp));
1437 }
1438 
1439 /*
1440  * Return strand number within a single core.  cpuid_get_clogid numbers
1441  * all execution units (strands, or cores in unstranded models) sequentially
1442  * within a single chip.
1443  */
1444 uint_t
1445 cmi_ntv_hwstrandid(cpu_t *cp)
1446 {
1447 	int strands_per_core = cpuid_get_ncpu_per_chip(cp) /
1448 	    cpuid_get_ncore_per_chip(cp);
1449 
1450 	return (cpuid_get_clogid(cp) % strands_per_core);
1451 }
1452 #endif	/* __xpv */
1453 
1454 void
1455 cmi_hdlconf_rdmsr_nohw(cmi_hdl_t ophdl)
1456 {
1457 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1458 
1459 	hdl->cmih_msrsrc &= ~CMI_MSR_FLAG_RD_HWOK;
1460 }
1461 
1462 void
1463 cmi_hdlconf_wrmsr_nohw(cmi_hdl_t ophdl)
1464 {
1465 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1466 
1467 	hdl->cmih_msrsrc &= ~CMI_MSR_FLAG_WR_HWOK;
1468 }
1469 
1470 cmi_errno_t
1471 cmi_hdl_rdmsr(cmi_hdl_t ophdl, uint_t msr, uint64_t *valp)
1472 {
1473 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1474 
1475 	/*
1476 	 * Regardless of the handle class, we first check for am
1477 	 * interposed value.  In the xVM case you probably want to
1478 	 * place interposed values within the hypervisor itself, but
1479 	 * we still allow interposing them in dom0 for test and bringup
1480 	 * purposes.
1481 	 */
1482 	if ((hdl->cmih_msrsrc & CMI_MSR_FLAG_RD_INTERPOSEOK) &&
1483 	    msri_lookup(hdl, msr, valp))
1484 		return (CMI_SUCCESS);
1485 
1486 	if (HDLOPS(hdl)->cmio_rdmsr == NULL)
1487 		return (CMIERR_NOTSUP);
1488 
1489 	return (HDLOPS(hdl)->cmio_rdmsr(hdl, msr, valp));
1490 }
1491 
1492 cmi_errno_t
1493 cmi_hdl_wrmsr(cmi_hdl_t ophdl, uint_t msr, uint64_t val)
1494 {
1495 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1496 
1497 	/* Invalidate any interposed value */
1498 	msri_rment(hdl, msr);
1499 
1500 	if (HDLOPS(hdl)->cmio_wrmsr == NULL)
1501 		return (CMI_SUCCESS);	/* pretend all is ok */
1502 
1503 	return (HDLOPS(hdl)->cmio_wrmsr(hdl, msr, val));
1504 }
1505 
1506 void
1507 cmi_hdl_enable_mce(cmi_hdl_t ophdl)
1508 {
1509 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1510 	ulong_t cr4;
1511 
1512 	if (HDLOPS(hdl)->cmio_getcr4 == NULL ||
1513 	    HDLOPS(hdl)->cmio_setcr4 == NULL)
1514 		return;
1515 
1516 	cr4 = HDLOPS(hdl)->cmio_getcr4(hdl);
1517 
1518 	HDLOPS(hdl)->cmio_setcr4(hdl, cr4 | CR4_MCE);
1519 }
1520 
1521 void
1522 cmi_hdl_msrinterpose(cmi_hdl_t ophdl, cmi_mca_regs_t *regs, uint_t nregs)
1523 {
1524 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1525 	int i;
1526 
1527 	if (HDLOPS(hdl)->cmio_msrinterpose == NULL)
1528 		return;
1529 
1530 	cmi_hdl_inj_begin(ophdl);
1531 
1532 	for (i = 0; i < nregs; i++, regs++)
1533 		HDLOPS(hdl)->cmio_msrinterpose(hdl, regs->cmr_msrnum,
1534 		    regs->cmr_msrval);
1535 
1536 	cmi_hdl_inj_end(ophdl);
1537 }
1538 
1539 /*ARGSUSED*/
1540 void
1541 cmi_hdl_msrforward(cmi_hdl_t ophdl, cmi_mca_regs_t *regs, uint_t nregs)
1542 {
1543 #ifdef __xpv
1544 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1545 	int i;
1546 
1547 	for (i = 0; i < nregs; i++, regs++)
1548 		msri_addent(hdl, regs->cmr_msrnum, regs->cmr_msrval);
1549 #endif
1550 }
1551 
1552 
1553 void
1554 cmi_pcird_nohw(void)
1555 {
1556 	cmi_pcicfg_flags &= ~CMI_PCICFG_FLAG_RD_HWOK;
1557 }
1558 
1559 void
1560 cmi_pciwr_nohw(void)
1561 {
1562 	cmi_pcicfg_flags &= ~CMI_PCICFG_FLAG_WR_HWOK;
1563 }
1564 
1565 static uint32_t
1566 cmi_pci_get_cmn(int bus, int dev, int func, int reg, int asz,
1567     int *interpose, ddi_acc_handle_t hdl)
1568 {
1569 	uint32_t val;
1570 
1571 	if (cmi_pcicfg_flags & CMI_PCICFG_FLAG_RD_INTERPOSEOK &&
1572 	    pcii_lookup(bus, dev, func, reg, asz, &val)) {
1573 		if (interpose)
1574 			*interpose = 1;
1575 		return (val);
1576 	}
1577 	if (interpose)
1578 		*interpose = 0;
1579 
1580 	if (!(cmi_pcicfg_flags & CMI_PCICFG_FLAG_RD_HWOK))
1581 		return (0);
1582 
1583 	switch (asz) {
1584 	case 1:
1585 		if (hdl)
1586 			val = pci_config_get8(hdl, (off_t)reg);
1587 		else
1588 			val = (*pci_getb_func)(bus, dev, func, reg);
1589 		break;
1590 	case 2:
1591 		if (hdl)
1592 			val = pci_config_get16(hdl, (off_t)reg);
1593 		else
1594 			val = (*pci_getw_func)(bus, dev, func, reg);
1595 		break;
1596 	case 4:
1597 		if (hdl)
1598 			val = pci_config_get32(hdl, (off_t)reg);
1599 		else
1600 			val = (*pci_getl_func)(bus, dev, func, reg);
1601 		break;
1602 	default:
1603 		val = 0;
1604 	}
1605 	return (val);
1606 }
1607 
1608 uint8_t
1609 cmi_pci_getb(int bus, int dev, int func, int reg, int *interpose,
1610     ddi_acc_handle_t hdl)
1611 {
1612 	return ((uint8_t)cmi_pci_get_cmn(bus, dev, func, reg, 1, interpose,
1613 	    hdl));
1614 }
1615 
1616 uint16_t
1617 cmi_pci_getw(int bus, int dev, int func, int reg, int *interpose,
1618     ddi_acc_handle_t hdl)
1619 {
1620 	return ((uint16_t)cmi_pci_get_cmn(bus, dev, func, reg, 2, interpose,
1621 	    hdl));
1622 }
1623 
1624 uint32_t
1625 cmi_pci_getl(int bus, int dev, int func, int reg, int *interpose,
1626     ddi_acc_handle_t hdl)
1627 {
1628 	return (cmi_pci_get_cmn(bus, dev, func, reg, 4, interpose, hdl));
1629 }
1630 
1631 void
1632 cmi_pci_interposeb(int bus, int dev, int func, int reg, uint8_t val)
1633 {
1634 	pcii_addent(bus, dev, func, reg, val, 1);
1635 }
1636 
1637 void
1638 cmi_pci_interposew(int bus, int dev, int func, int reg, uint16_t val)
1639 {
1640 	pcii_addent(bus, dev, func, reg, val, 2);
1641 }
1642 
1643 void
1644 cmi_pci_interposel(int bus, int dev, int func, int reg, uint32_t val)
1645 {
1646 	pcii_addent(bus, dev, func, reg, val, 4);
1647 }
1648 
1649 static void
1650 cmi_pci_put_cmn(int bus, int dev, int func, int reg, int asz,
1651     ddi_acc_handle_t hdl, uint32_t val)
1652 {
1653 	/*
1654 	 * If there is an interposed value for this register invalidate it.
1655 	 */
1656 	pcii_rment(bus, dev, func, reg, asz);
1657 
1658 	if (!(cmi_pcicfg_flags & CMI_PCICFG_FLAG_WR_HWOK))
1659 		return;
1660 
1661 	switch (asz) {
1662 	case 1:
1663 		if (hdl)
1664 			pci_config_put8(hdl, (off_t)reg, (uint8_t)val);
1665 		else
1666 			(*pci_putb_func)(bus, dev, func, reg, (uint8_t)val);
1667 		break;
1668 
1669 	case 2:
1670 		if (hdl)
1671 			pci_config_put16(hdl, (off_t)reg, (uint16_t)val);
1672 		else
1673 			(*pci_putw_func)(bus, dev, func, reg, (uint16_t)val);
1674 		break;
1675 
1676 	case 4:
1677 		if (hdl)
1678 			pci_config_put32(hdl, (off_t)reg, val);
1679 		else
1680 			(*pci_putl_func)(bus, dev, func, reg, val);
1681 		break;
1682 
1683 	default:
1684 		break;
1685 	}
1686 }
1687 
1688 void
1689 cmi_pci_putb(int bus, int dev, int func, int reg, ddi_acc_handle_t hdl,
1690     uint8_t val)
1691 {
1692 	cmi_pci_put_cmn(bus, dev, func, reg, 1, hdl, val);
1693 }
1694 
1695 void
1696 cmi_pci_putw(int bus, int dev, int func, int reg, ddi_acc_handle_t hdl,
1697     uint16_t val)
1698 {
1699 	cmi_pci_put_cmn(bus, dev, func, reg, 2, hdl, val);
1700 }
1701 
1702 void
1703 cmi_pci_putl(int bus, int dev, int func, int reg, ddi_acc_handle_t hdl,
1704     uint32_t val)
1705 {
1706 	cmi_pci_put_cmn(bus, dev, func, reg, 4, hdl, val);
1707 }
1708 
1709 static const struct cmi_hdl_ops cmi_hdl_ops = {
1710 #ifdef __xpv
1711 	/*
1712 	 * CMI_HDL_SOLARIS_xVM_MCA - ops when we are an xVM dom0
1713 	 */
1714 	xpv_vendor,		/* cmio_vendor */
1715 	xpv_vendorstr,		/* cmio_vendorstr */
1716 	xpv_family,		/* cmio_family */
1717 	xpv_model,		/* cmio_model */
1718 	xpv_stepping,		/* cmio_stepping */
1719 	xpv_chipid,		/* cmio_chipid */
1720 	xpv_coreid,		/* cmio_coreid */
1721 	xpv_strandid,		/* cmio_strandid */
1722 	xpv_chiprev,		/* cmio_chiprev */
1723 	xpv_chiprevstr,		/* cmio_chiprevstr */
1724 	xpv_getsockettype,	/* cmio_getsockettype */
1725 	xpv_logical_id,		/* cmio_logical_id */
1726 	NULL,			/* cmio_getcr4 */
1727 	NULL,			/* cmio_setcr4 */
1728 	xpv_rdmsr,		/* cmio_rdmsr */
1729 	xpv_wrmsr,		/* cmio_wrmsr */
1730 	xpv_msrinterpose,	/* cmio_msrinterpose */
1731 	xpv_int,		/* cmio_int */
1732 	xpv_online		/* cmio_online */
1733 
1734 #else	/* __xpv */
1735 
1736 	/*
1737 	 * CMI_HDL_NATIVE - ops when apparently running on bare-metal
1738 	 */
1739 	ntv_vendor,		/* cmio_vendor */
1740 	ntv_vendorstr,		/* cmio_vendorstr */
1741 	ntv_family,		/* cmio_family */
1742 	ntv_model,		/* cmio_model */
1743 	ntv_stepping,		/* cmio_stepping */
1744 	ntv_chipid,		/* cmio_chipid */
1745 	ntv_coreid,		/* cmio_coreid */
1746 	ntv_strandid,		/* cmio_strandid */
1747 	ntv_chiprev,		/* cmio_chiprev */
1748 	ntv_chiprevstr,		/* cmio_chiprevstr */
1749 	ntv_getsockettype,	/* cmio_getsockettype */
1750 	ntv_logical_id,		/* cmio_logical_id */
1751 	ntv_getcr4,		/* cmio_getcr4 */
1752 	ntv_setcr4,		/* cmio_setcr4 */
1753 	ntv_rdmsr,		/* cmio_rdmsr */
1754 	ntv_wrmsr,		/* cmio_wrmsr */
1755 	ntv_msrinterpose,	/* cmio_msrinterpose */
1756 	ntv_int,		/* cmio_int */
1757 	ntv_online		/* cmio_online */
1758 #endif
1759 };
1760