xref: /illumos-gate/usr/src/uts/i86pc/os/cmi_hw.c (revision 48f21d36693650e32c51fc8474dca1acc9b7376c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * CPU Module Interface - hardware abstraction.
29  */
30 
31 #include <sys/types.h>
32 #include <sys/cpu_module.h>
33 #include <sys/kmem.h>
34 #include <sys/x86_archext.h>
35 #include <sys/cpuvar.h>
36 #include <sys/ksynch.h>
37 #include <sys/x_call.h>
38 #include <sys/pghw.h>
39 #include <sys/pci_cfgspace.h>
40 #include <sys/archsystm.h>
41 #include <sys/ontrap.h>
42 #include <sys/controlregs.h>
43 #include <sys/sunddi.h>
44 #include <sys/trap.h>
45 #include <sys/mca_x86.h>
46 #include <sys/processor.h>
47 
48 #ifdef __xpv
49 #include <sys/hypervisor.h>
50 #endif
51 
52 /*
53  * Outside of this file consumers use the opaque cmi_hdl_t.  This
54  * definition is duplicated in the generic_cpu mdb module, so keep
55  * them in-sync when making changes.
56  */
57 typedef struct cmi_hdl_impl {
58 	enum cmi_hdl_class cmih_class;		/* Handle nature */
59 	const struct cmi_hdl_ops *cmih_ops;	/* Operations vector */
60 	uint_t cmih_chipid;			/* Chipid of cpu resource */
61 	uint_t cmih_coreid;			/* Core within die */
62 	uint_t cmih_strandid;			/* Thread within core */
63 	boolean_t cmih_mstrand;			/* cores are multithreaded */
64 	volatile uint32_t *cmih_refcntp;	/* Reference count pointer */
65 	uint64_t cmih_msrsrc;			/* MSR data source flags */
66 	void *cmih_hdlpriv;			/* cmi_hw.c private data */
67 	void *cmih_spec;			/* cmi_hdl_{set,get}_specific */
68 	void *cmih_cmi;				/* cpu mod control structure */
69 	void *cmih_cmidata;			/* cpu mod private data */
70 	const struct cmi_mc_ops *cmih_mcops;	/* Memory-controller ops */
71 	void *cmih_mcdata;			/* Memory-controller data */
72 	uint64_t cmih_flags;			/* See CMIH_F_* below */
73 } cmi_hdl_impl_t;
74 
75 #define	IMPLHDL(ophdl)	((cmi_hdl_impl_t *)ophdl)
76 #define	HDLOPS(hdl)	((hdl)->cmih_ops)
77 
78 #define	CMIH_F_INJACTV		0x1ULL
79 
80 /*
81  * Ops structure for handle operations.
82  */
83 struct cmi_hdl_ops {
84 	/*
85 	 * These ops are required in an implementation.
86 	 */
87 	uint_t (*cmio_vendor)(cmi_hdl_impl_t *);
88 	const char *(*cmio_vendorstr)(cmi_hdl_impl_t *);
89 	uint_t (*cmio_family)(cmi_hdl_impl_t *);
90 	uint_t (*cmio_model)(cmi_hdl_impl_t *);
91 	uint_t (*cmio_stepping)(cmi_hdl_impl_t *);
92 	uint_t (*cmio_chipid)(cmi_hdl_impl_t *);
93 	uint_t (*cmio_coreid)(cmi_hdl_impl_t *);
94 	uint_t (*cmio_strandid)(cmi_hdl_impl_t *);
95 	uint32_t (*cmio_chiprev)(cmi_hdl_impl_t *);
96 	const char *(*cmio_chiprevstr)(cmi_hdl_impl_t *);
97 	uint32_t (*cmio_getsockettype)(cmi_hdl_impl_t *);
98 	const char *(*cmio_getsocketstr)(cmi_hdl_impl_t *);
99 
100 	id_t (*cmio_logical_id)(cmi_hdl_impl_t *);
101 	/*
102 	 * These ops are optional in an implementation.
103 	 */
104 	ulong_t (*cmio_getcr4)(cmi_hdl_impl_t *);
105 	void (*cmio_setcr4)(cmi_hdl_impl_t *, ulong_t);
106 	cmi_errno_t (*cmio_rdmsr)(cmi_hdl_impl_t *, uint_t, uint64_t *);
107 	cmi_errno_t (*cmio_wrmsr)(cmi_hdl_impl_t *, uint_t, uint64_t);
108 	cmi_errno_t (*cmio_msrinterpose)(cmi_hdl_impl_t *, uint_t, uint64_t);
109 	void (*cmio_int)(cmi_hdl_impl_t *, int);
110 	int (*cmio_online)(cmi_hdl_impl_t *, int, int *);
111 };
112 
113 static const struct cmi_hdl_ops cmi_hdl_ops;
114 
115 /*
116  * Handles are looked up from contexts such as polling, injection etc
117  * where the context is reasonably well defined (although a poller could
118  * interrupt any old thread holding any old lock).  They are also looked
119  * up by machine check handlers, which may strike at inconvenient times
120  * such as during handle initialization or destruction or during handle
121  * lookup (which the #MC handler itself will also have to perform).
122  *
123  * So keeping handles in a linked list makes locking difficult when we
124  * consider #MC handlers.  Our solution is to have a look-up table indexed
125  * by that which uniquely identifies a handle - chip/core/strand id -
126  * with each entry a structure including a pointer to a handle
127  * structure for the resource, and a reference count for the handle.
128  * Reference counts are modified atomically.  The public cmi_hdl_hold
129  * always succeeds because this can only be used after handle creation
130  * and before the call to destruct, so the hold count is already at least one.
131  * In other functions that lookup a handle (cmi_hdl_lookup, cmi_hdl_any)
132  * we must be certain that the count has not already decrmented to zero
133  * before applying our hold.
134  *
135  * The table is an array of maximum number of chips defined in
136  * CMI_CHIPID_ARR_SZ indexed by the chip id. If the chip is not present, the
137  * entry is NULL. Each entry is a pointer to another array which contains a
138  * list of all strands of the chip. This first level table is allocated when
139  * first we want to populate an entry. The size of the latter (per chip) table
140  * is CMI_MAX_STRANDS_PER_CHIP and it is populated when one of its cpus starts.
141  *
142  * Ideally we should only allocate to the actual number of chips, cores per
143  * chip and strand per core. The number of chips is not available until all
144  * of them are passed. The number of cores and strands are partially available.
145  * For now we stick with the above approach.
146  */
147 #define	CMI_MAX_CHIPID_NBITS		6	/* max chipid of 63 */
148 #define	CMI_MAX_CORES_PER_CHIP_NBITS	4	/* 16 cores per chip max */
149 #define	CMI_MAX_STRANDS_PER_CORE_NBITS	3	/* 8 strands per core max */
150 
151 #define	CMI_MAX_CHIPID			((1 << (CMI_MAX_CHIPID_NBITS)) - 1)
152 #define	CMI_MAX_CORES_PER_CHIP		(1 << CMI_MAX_CORES_PER_CHIP_NBITS)
153 #define	CMI_MAX_STRANDS_PER_CORE	(1 << CMI_MAX_STRANDS_PER_CORE_NBITS)
154 #define	CMI_MAX_STRANDS_PER_CHIP	(CMI_MAX_CORES_PER_CHIP * \
155 					    CMI_MAX_STRANDS_PER_CORE)
156 
157 /*
158  * Handle array indexing within a per-chip table
159  *	[6:3] = Core in package,
160  *	[2:0] = Strand in core,
161  */
162 #define	CMI_HDL_ARR_IDX_CORE(coreid) \
163 	(((coreid) & (CMI_MAX_CORES_PER_CHIP - 1)) << \
164 	CMI_MAX_STRANDS_PER_CORE_NBITS)
165 
166 #define	CMI_HDL_ARR_IDX_STRAND(strandid) \
167 	(((strandid) & (CMI_MAX_STRANDS_PER_CORE - 1)))
168 
169 #define	CMI_HDL_ARR_IDX(coreid, strandid) \
170 	(CMI_HDL_ARR_IDX_CORE(coreid) | CMI_HDL_ARR_IDX_STRAND(strandid))
171 
172 #define	CMI_CHIPID_ARR_SZ		(1 << CMI_MAX_CHIPID_NBITS)
173 
174 typedef struct cmi_hdl_ent {
175 	volatile uint32_t cmae_refcnt;
176 	cmi_hdl_impl_t *cmae_hdlp;
177 } cmi_hdl_ent_t;
178 
179 static cmi_hdl_ent_t *cmi_chip_tab[CMI_CHIPID_ARR_SZ];
180 
181 /*
182  * Controls where we will source PCI config space data.
183  */
184 #define	CMI_PCICFG_FLAG_RD_HWOK		0x0001
185 #define	CMI_PCICFG_FLAG_RD_INTERPOSEOK	0X0002
186 #define	CMI_PCICFG_FLAG_WR_HWOK		0x0004
187 #define	CMI_PCICFG_FLAG_WR_INTERPOSEOK	0X0008
188 
189 static uint64_t cmi_pcicfg_flags =
190     CMI_PCICFG_FLAG_RD_HWOK | CMI_PCICFG_FLAG_RD_INTERPOSEOK |
191     CMI_PCICFG_FLAG_WR_HWOK | CMI_PCICFG_FLAG_WR_INTERPOSEOK;
192 
193 /*
194  * The flags for individual cpus are kept in their per-cpu handle cmih_msrsrc
195  */
196 #define	CMI_MSR_FLAG_RD_HWOK		0x0001
197 #define	CMI_MSR_FLAG_RD_INTERPOSEOK	0x0002
198 #define	CMI_MSR_FLAG_WR_HWOK		0x0004
199 #define	CMI_MSR_FLAG_WR_INTERPOSEOK	0x0008
200 
201 int cmi_call_func_ntv_tries = 3;
202 
203 static cmi_errno_t
204 call_func_ntv(int cpuid, xc_func_t func, xc_arg_t arg1, xc_arg_t arg2)
205 {
206 	cmi_errno_t rc = -1;
207 	int i;
208 
209 	kpreempt_disable();
210 
211 	if (CPU->cpu_id == cpuid) {
212 		(*func)(arg1, arg2, (xc_arg_t)&rc);
213 	} else {
214 		/*
215 		 * This should not happen for a #MC trap or a poll, so
216 		 * this is likely an error injection or similar.
217 		 * We will try to cross call with xc_trycall - we
218 		 * can't guarantee success with xc_call because
219 		 * the interrupt code in the case of a #MC may
220 		 * already hold the xc mutex.
221 		 */
222 		for (i = 0; i < cmi_call_func_ntv_tries; i++) {
223 			cpuset_t cpus;
224 
225 			CPUSET_ONLY(cpus, cpuid);
226 			xc_priority(arg1, arg2, (xc_arg_t)&rc,
227 			    CPUSET2BV(cpus), func);
228 			if (rc != -1)
229 				break;
230 
231 			DELAY(1);
232 		}
233 	}
234 
235 	kpreempt_enable();
236 
237 	return (rc != -1 ? rc : CMIERR_DEADLOCK);
238 }
239 
240 static uint64_t injcnt;
241 
242 void
243 cmi_hdl_inj_begin(cmi_hdl_t ophdl)
244 {
245 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
246 
247 	if (hdl != NULL)
248 		hdl->cmih_flags |= CMIH_F_INJACTV;
249 	if (injcnt++ == 0) {
250 		cmn_err(CE_NOTE, "Hardware error injection/simulation "
251 		    "activity noted");
252 	}
253 }
254 
255 void
256 cmi_hdl_inj_end(cmi_hdl_t ophdl)
257 {
258 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
259 
260 	ASSERT(hdl == NULL || hdl->cmih_flags & CMIH_F_INJACTV);
261 	if (hdl != NULL)
262 		hdl->cmih_flags &= ~CMIH_F_INJACTV;
263 }
264 
265 boolean_t
266 cmi_inj_tainted(void)
267 {
268 	return (injcnt != 0 ? B_TRUE : B_FALSE);
269 }
270 
271 /*
272  *	 =======================================================
273  *	|	MSR Interposition				|
274  *	|	-----------------				|
275  *	|							|
276  *	 -------------------------------------------------------
277  */
278 
279 #define	CMI_MSRI_HASHSZ		16
280 #define	CMI_MSRI_HASHIDX(hdl, msr) \
281 	(((uintptr_t)(hdl) >> 3 + (msr)) % (CMI_MSRI_HASHSZ - 1))
282 
283 struct cmi_msri_bkt {
284 	kmutex_t msrib_lock;
285 	struct cmi_msri_hashent *msrib_head;
286 };
287 
288 struct cmi_msri_hashent {
289 	struct cmi_msri_hashent *msrie_next;
290 	struct cmi_msri_hashent *msrie_prev;
291 	cmi_hdl_impl_t *msrie_hdl;
292 	uint_t msrie_msrnum;
293 	uint64_t msrie_msrval;
294 };
295 
296 #define	CMI_MSRI_MATCH(ent, hdl, req_msr) \
297 	((ent)->msrie_hdl == (hdl) && (ent)->msrie_msrnum == (req_msr))
298 
299 static struct cmi_msri_bkt msrihash[CMI_MSRI_HASHSZ];
300 
301 static void
302 msri_addent(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t val)
303 {
304 	int idx = CMI_MSRI_HASHIDX(hdl, msr);
305 	struct cmi_msri_bkt *hbp = &msrihash[idx];
306 	struct cmi_msri_hashent *hep;
307 
308 	mutex_enter(&hbp->msrib_lock);
309 
310 	for (hep = hbp->msrib_head; hep != NULL; hep = hep->msrie_next) {
311 		if (CMI_MSRI_MATCH(hep, hdl, msr))
312 			break;
313 	}
314 
315 	if (hep != NULL) {
316 		hep->msrie_msrval = val;
317 	} else {
318 		hep = kmem_alloc(sizeof (*hep), KM_SLEEP);
319 		hep->msrie_hdl = hdl;
320 		hep->msrie_msrnum = msr;
321 		hep->msrie_msrval = val;
322 
323 		if (hbp->msrib_head != NULL)
324 			hbp->msrib_head->msrie_prev = hep;
325 		hep->msrie_next = hbp->msrib_head;
326 		hep->msrie_prev = NULL;
327 		hbp->msrib_head = hep;
328 	}
329 
330 	mutex_exit(&hbp->msrib_lock);
331 }
332 
333 /*
334  * Look for a match for the given hanlde and msr.  Return 1 with valp
335  * filled if a match is found, otherwise return 0 with valp untouched.
336  */
337 static int
338 msri_lookup(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t *valp)
339 {
340 	int idx = CMI_MSRI_HASHIDX(hdl, msr);
341 	struct cmi_msri_bkt *hbp = &msrihash[idx];
342 	struct cmi_msri_hashent *hep;
343 
344 	/*
345 	 * This function is called during #MC trap handling, so we should
346 	 * consider the possibility that the hash mutex is held by the
347 	 * interrupted thread.  This should not happen because interposition
348 	 * is an artificial injection mechanism and the #MC is requested
349 	 * after adding entries, but just in case of a real #MC at an
350 	 * unlucky moment we'll use mutex_tryenter here.
351 	 */
352 	if (!mutex_tryenter(&hbp->msrib_lock))
353 		return (0);
354 
355 	for (hep = hbp->msrib_head; hep != NULL; hep = hep->msrie_next) {
356 		if (CMI_MSRI_MATCH(hep, hdl, msr)) {
357 			*valp = hep->msrie_msrval;
358 			break;
359 		}
360 	}
361 
362 	mutex_exit(&hbp->msrib_lock);
363 
364 	return (hep != NULL);
365 }
366 
367 /*
368  * Remove any interposed value that matches.
369  */
370 static void
371 msri_rment(cmi_hdl_impl_t *hdl, uint_t msr)
372 {
373 
374 	int idx = CMI_MSRI_HASHIDX(hdl, msr);
375 	struct cmi_msri_bkt *hbp = &msrihash[idx];
376 	struct cmi_msri_hashent *hep;
377 
378 	if (!mutex_tryenter(&hbp->msrib_lock))
379 		return;
380 
381 	for (hep = hbp->msrib_head; hep != NULL; hep = hep->msrie_next) {
382 		if (CMI_MSRI_MATCH(hep, hdl, msr)) {
383 			if (hep->msrie_prev != NULL)
384 				hep->msrie_prev->msrie_next = hep->msrie_next;
385 
386 			if (hep->msrie_next != NULL)
387 				hep->msrie_next->msrie_prev = hep->msrie_prev;
388 
389 			if (hbp->msrib_head == hep)
390 				hbp->msrib_head = hep->msrie_next;
391 
392 			kmem_free(hep, sizeof (*hep));
393 			break;
394 		}
395 	}
396 
397 	mutex_exit(&hbp->msrib_lock);
398 }
399 
400 /*
401  *	 =======================================================
402  *	|	PCI Config Space Interposition			|
403  *	|	------------------------------			|
404  *	|							|
405  *	 -------------------------------------------------------
406  */
407 
408 /*
409  * Hash for interposed PCI config space values.  We lookup on bus/dev/fun/offset
410  * and then record whether the value stashed was made with a byte, word or
411  * doubleword access;  we will only return a hit for an access of the
412  * same size.  If you access say a 32-bit register using byte accesses
413  * and then attempt to read the full 32-bit value back you will not obtain
414  * any sort of merged result - you get a lookup miss.
415  */
416 
417 #define	CMI_PCII_HASHSZ		16
418 #define	CMI_PCII_HASHIDX(b, d, f, o) \
419 	(((b) + (d) + (f) + (o)) % (CMI_PCII_HASHSZ - 1))
420 
421 struct cmi_pcii_bkt {
422 	kmutex_t pciib_lock;
423 	struct cmi_pcii_hashent *pciib_head;
424 };
425 
426 struct cmi_pcii_hashent {
427 	struct cmi_pcii_hashent *pcii_next;
428 	struct cmi_pcii_hashent *pcii_prev;
429 	int pcii_bus;
430 	int pcii_dev;
431 	int pcii_func;
432 	int pcii_reg;
433 	int pcii_asize;
434 	uint32_t pcii_val;
435 };
436 
437 #define	CMI_PCII_MATCH(ent, b, d, f, r, asz) \
438 	((ent)->pcii_bus == (b) && (ent)->pcii_dev == (d) && \
439 	(ent)->pcii_func == (f) && (ent)->pcii_reg == (r) && \
440 	(ent)->pcii_asize == (asz))
441 
442 static struct cmi_pcii_bkt pciihash[CMI_PCII_HASHSZ];
443 
444 
445 /*
446  * Add a new entry to the PCI interpose hash, overwriting any existing
447  * entry that is found.
448  */
449 static void
450 pcii_addent(int bus, int dev, int func, int reg, uint32_t val, int asz)
451 {
452 	int idx = CMI_PCII_HASHIDX(bus, dev, func, reg);
453 	struct cmi_pcii_bkt *hbp = &pciihash[idx];
454 	struct cmi_pcii_hashent *hep;
455 
456 	cmi_hdl_inj_begin(NULL);
457 
458 	mutex_enter(&hbp->pciib_lock);
459 
460 	for (hep = hbp->pciib_head; hep != NULL; hep = hep->pcii_next) {
461 		if (CMI_PCII_MATCH(hep, bus, dev, func, reg, asz))
462 			break;
463 	}
464 
465 	if (hep != NULL) {
466 		hep->pcii_val = val;
467 	} else {
468 		hep = kmem_alloc(sizeof (*hep), KM_SLEEP);
469 		hep->pcii_bus = bus;
470 		hep->pcii_dev = dev;
471 		hep->pcii_func = func;
472 		hep->pcii_reg = reg;
473 		hep->pcii_asize = asz;
474 		hep->pcii_val = val;
475 
476 		if (hbp->pciib_head != NULL)
477 			hbp->pciib_head->pcii_prev = hep;
478 		hep->pcii_next = hbp->pciib_head;
479 		hep->pcii_prev = NULL;
480 		hbp->pciib_head = hep;
481 	}
482 
483 	mutex_exit(&hbp->pciib_lock);
484 
485 	cmi_hdl_inj_end(NULL);
486 }
487 
488 /*
489  * Look for a match for the given bus/dev/func/reg; return 1 with valp
490  * filled if a match is found, otherwise return 0 with valp untouched.
491  */
492 static int
493 pcii_lookup(int bus, int dev, int func, int reg, int asz, uint32_t *valp)
494 {
495 	int idx = CMI_PCII_HASHIDX(bus, dev, func, reg);
496 	struct cmi_pcii_bkt *hbp = &pciihash[idx];
497 	struct cmi_pcii_hashent *hep;
498 
499 	if (!mutex_tryenter(&hbp->pciib_lock))
500 		return (0);
501 
502 	for (hep = hbp->pciib_head; hep != NULL; hep = hep->pcii_next) {
503 		if (CMI_PCII_MATCH(hep, bus, dev, func, reg, asz)) {
504 			*valp = hep->pcii_val;
505 			break;
506 		}
507 	}
508 
509 	mutex_exit(&hbp->pciib_lock);
510 
511 	return (hep != NULL);
512 }
513 
514 static void
515 pcii_rment(int bus, int dev, int func, int reg, int asz)
516 {
517 	int idx = CMI_PCII_HASHIDX(bus, dev, func, reg);
518 	struct cmi_pcii_bkt *hbp = &pciihash[idx];
519 	struct cmi_pcii_hashent *hep;
520 
521 	mutex_enter(&hbp->pciib_lock);
522 
523 	for (hep = hbp->pciib_head; hep != NULL; hep = hep->pcii_next) {
524 		if (CMI_PCII_MATCH(hep, bus, dev, func, reg, asz)) {
525 			if (hep->pcii_prev != NULL)
526 				hep->pcii_prev->pcii_next = hep->pcii_next;
527 
528 			if (hep->pcii_next != NULL)
529 				hep->pcii_next->pcii_prev = hep->pcii_prev;
530 
531 			if (hbp->pciib_head == hep)
532 				hbp->pciib_head = hep->pcii_next;
533 
534 			kmem_free(hep, sizeof (*hep));
535 			break;
536 		}
537 	}
538 
539 	mutex_exit(&hbp->pciib_lock);
540 }
541 
542 #ifndef __xpv
543 
544 /*
545  *	 =======================================================
546  *	|	Native methods					|
547  *	|	--------------					|
548  *	|							|
549  *	| These are used when we are running native on bare-	|
550  *	| metal, or simply don't know any better.		|
551  *	---------------------------------------------------------
552  */
553 
554 #define	HDLPRIV(hdl)	((cpu_t *)(hdl)->cmih_hdlpriv)
555 
556 static uint_t
557 ntv_vendor(cmi_hdl_impl_t *hdl)
558 {
559 	return (cpuid_getvendor(HDLPRIV(hdl)));
560 }
561 
562 static const char *
563 ntv_vendorstr(cmi_hdl_impl_t *hdl)
564 {
565 	return (cpuid_getvendorstr(HDLPRIV(hdl)));
566 }
567 
568 static uint_t
569 ntv_family(cmi_hdl_impl_t *hdl)
570 {
571 	return (cpuid_getfamily(HDLPRIV(hdl)));
572 }
573 
574 static uint_t
575 ntv_model(cmi_hdl_impl_t *hdl)
576 {
577 	return (cpuid_getmodel(HDLPRIV(hdl)));
578 }
579 
580 static uint_t
581 ntv_stepping(cmi_hdl_impl_t *hdl)
582 {
583 	return (cpuid_getstep(HDLPRIV(hdl)));
584 }
585 
586 static uint_t
587 ntv_chipid(cmi_hdl_impl_t *hdl)
588 {
589 	return (hdl->cmih_chipid);
590 
591 }
592 
593 static uint_t
594 ntv_coreid(cmi_hdl_impl_t *hdl)
595 {
596 	return (hdl->cmih_coreid);
597 }
598 
599 static uint_t
600 ntv_strandid(cmi_hdl_impl_t *hdl)
601 {
602 	return (hdl->cmih_strandid);
603 }
604 
605 static uint32_t
606 ntv_chiprev(cmi_hdl_impl_t *hdl)
607 {
608 	return (cpuid_getchiprev(HDLPRIV(hdl)));
609 }
610 
611 static const char *
612 ntv_chiprevstr(cmi_hdl_impl_t *hdl)
613 {
614 	return (cpuid_getchiprevstr(HDLPRIV(hdl)));
615 }
616 
617 static uint32_t
618 ntv_getsockettype(cmi_hdl_impl_t *hdl)
619 {
620 	return (cpuid_getsockettype(HDLPRIV(hdl)));
621 }
622 
623 static const char *
624 ntv_getsocketstr(cmi_hdl_impl_t *hdl)
625 {
626 	return (cpuid_getsocketstr(HDLPRIV(hdl)));
627 }
628 
629 static id_t
630 ntv_logical_id(cmi_hdl_impl_t *hdl)
631 {
632 	return (HDLPRIV(hdl)->cpu_id);
633 }
634 
635 /*ARGSUSED*/
636 static int
637 ntv_getcr4_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
638 {
639 	ulong_t *dest = (ulong_t *)arg1;
640 	cmi_errno_t *rcp = (cmi_errno_t *)arg3;
641 
642 	*dest = getcr4();
643 	*rcp = CMI_SUCCESS;
644 
645 	return (0);
646 }
647 
648 static ulong_t
649 ntv_getcr4(cmi_hdl_impl_t *hdl)
650 {
651 	cpu_t *cp = HDLPRIV(hdl);
652 	ulong_t val;
653 
654 	(void) call_func_ntv(cp->cpu_id, ntv_getcr4_xc, (xc_arg_t)&val, NULL);
655 
656 	return (val);
657 }
658 
659 /*ARGSUSED*/
660 static int
661 ntv_setcr4_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
662 {
663 	ulong_t val = (ulong_t)arg1;
664 	cmi_errno_t *rcp = (cmi_errno_t *)arg3;
665 
666 	setcr4(val);
667 	*rcp = CMI_SUCCESS;
668 
669 	return (0);
670 }
671 
672 static void
673 ntv_setcr4(cmi_hdl_impl_t *hdl, ulong_t val)
674 {
675 	cpu_t *cp = HDLPRIV(hdl);
676 
677 	(void) call_func_ntv(cp->cpu_id, ntv_setcr4_xc, (xc_arg_t)val, NULL);
678 }
679 
680 volatile uint32_t cmi_trapped_rdmsr;
681 
682 /*ARGSUSED*/
683 static int
684 ntv_rdmsr_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
685 {
686 	uint_t msr = (uint_t)arg1;
687 	uint64_t *valp = (uint64_t *)arg2;
688 	cmi_errno_t *rcp = (cmi_errno_t *)arg3;
689 
690 	on_trap_data_t otd;
691 
692 	if (on_trap(&otd, OT_DATA_ACCESS) == 0) {
693 		if (checked_rdmsr(msr, valp) == 0)
694 			*rcp = CMI_SUCCESS;
695 		else
696 			*rcp = CMIERR_NOTSUP;
697 	} else {
698 		*rcp = CMIERR_MSRGPF;
699 		atomic_inc_32(&cmi_trapped_rdmsr);
700 	}
701 	no_trap();
702 
703 	return (0);
704 }
705 
706 static cmi_errno_t
707 ntv_rdmsr(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t *valp)
708 {
709 	cpu_t *cp = HDLPRIV(hdl);
710 
711 	if (!(hdl->cmih_msrsrc & CMI_MSR_FLAG_RD_HWOK))
712 		return (CMIERR_INTERPOSE);
713 
714 	return (call_func_ntv(cp->cpu_id, ntv_rdmsr_xc,
715 	    (xc_arg_t)msr, (xc_arg_t)valp));
716 }
717 
718 volatile uint32_t cmi_trapped_wrmsr;
719 
720 /*ARGSUSED*/
721 static int
722 ntv_wrmsr_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
723 {
724 	uint_t msr = (uint_t)arg1;
725 	uint64_t val = *((uint64_t *)arg2);
726 	cmi_errno_t *rcp = (cmi_errno_t *)arg3;
727 	on_trap_data_t otd;
728 
729 	if (on_trap(&otd, OT_DATA_ACCESS) == 0) {
730 		if (checked_wrmsr(msr, val) == 0)
731 			*rcp = CMI_SUCCESS;
732 		else
733 			*rcp = CMIERR_NOTSUP;
734 	} else {
735 		*rcp = CMIERR_MSRGPF;
736 		atomic_inc_32(&cmi_trapped_wrmsr);
737 	}
738 	no_trap();
739 
740 	return (0);
741 
742 }
743 
744 static cmi_errno_t
745 ntv_wrmsr(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t val)
746 {
747 	cpu_t *cp = HDLPRIV(hdl);
748 
749 	if (!(hdl->cmih_msrsrc & CMI_MSR_FLAG_WR_HWOK))
750 		return (CMI_SUCCESS);
751 
752 	return (call_func_ntv(cp->cpu_id, ntv_wrmsr_xc,
753 	    (xc_arg_t)msr, (xc_arg_t)&val));
754 }
755 
756 static cmi_errno_t
757 ntv_msrinterpose(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t val)
758 {
759 	msri_addent(hdl, msr, val);
760 	return (CMI_SUCCESS);
761 }
762 
763 /*ARGSUSED*/
764 static int
765 ntv_int_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
766 {
767 	cmi_errno_t *rcp = (cmi_errno_t *)arg3;
768 	int int_no = (int)arg1;
769 
770 	if (int_no == T_MCE)
771 		int18();
772 	else
773 		int_cmci();
774 	*rcp = CMI_SUCCESS;
775 
776 	return (0);
777 }
778 
779 static void
780 ntv_int(cmi_hdl_impl_t *hdl, int int_no)
781 {
782 	cpu_t *cp = HDLPRIV(hdl);
783 
784 	(void) call_func_ntv(cp->cpu_id, ntv_int_xc, (xc_arg_t)int_no, NULL);
785 }
786 
787 static int
788 ntv_online(cmi_hdl_impl_t *hdl, int new_status, int *old_status)
789 {
790 	processorid_t cpuid = HDLPRIV(hdl)->cpu_id;
791 
792 	return (p_online_internal(cpuid, new_status, old_status));
793 }
794 
795 #else	/* __xpv */
796 
797 /*
798  *	 =======================================================
799  *	|	xVM dom0 methods				|
800  *	|	----------------				|
801  *	|							|
802  *	| These are used when we are running as dom0 in		|
803  *	| a Solaris xVM context.				|
804  *	---------------------------------------------------------
805  */
806 
807 #define	HDLPRIV(hdl)	((xen_mc_lcpu_cookie_t)(hdl)->cmih_hdlpriv)
808 
809 extern uint_t _cpuid_vendorstr_to_vendorcode(char *);
810 
811 
812 static uint_t
813 xpv_vendor(cmi_hdl_impl_t *hdl)
814 {
815 	return (_cpuid_vendorstr_to_vendorcode((char *)xen_physcpu_vendorstr(
816 	    HDLPRIV(hdl))));
817 }
818 
819 static const char *
820 xpv_vendorstr(cmi_hdl_impl_t *hdl)
821 {
822 	return (xen_physcpu_vendorstr(HDLPRIV(hdl)));
823 }
824 
825 static uint_t
826 xpv_family(cmi_hdl_impl_t *hdl)
827 {
828 	return (xen_physcpu_family(HDLPRIV(hdl)));
829 }
830 
831 static uint_t
832 xpv_model(cmi_hdl_impl_t *hdl)
833 {
834 	return (xen_physcpu_model(HDLPRIV(hdl)));
835 }
836 
837 static uint_t
838 xpv_stepping(cmi_hdl_impl_t *hdl)
839 {
840 	return (xen_physcpu_stepping(HDLPRIV(hdl)));
841 }
842 
843 static uint_t
844 xpv_chipid(cmi_hdl_impl_t *hdl)
845 {
846 	return (hdl->cmih_chipid);
847 }
848 
849 static uint_t
850 xpv_coreid(cmi_hdl_impl_t *hdl)
851 {
852 	return (hdl->cmih_coreid);
853 }
854 
855 static uint_t
856 xpv_strandid(cmi_hdl_impl_t *hdl)
857 {
858 	return (hdl->cmih_strandid);
859 }
860 
861 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t);
862 
863 static uint32_t
864 xpv_chiprev(cmi_hdl_impl_t *hdl)
865 {
866 	return (_cpuid_chiprev(xpv_vendor(hdl), xpv_family(hdl),
867 	    xpv_model(hdl), xpv_stepping(hdl)));
868 }
869 
870 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t);
871 
872 static const char *
873 xpv_chiprevstr(cmi_hdl_impl_t *hdl)
874 {
875 	return (_cpuid_chiprevstr(xpv_vendor(hdl), xpv_family(hdl),
876 	    xpv_model(hdl), xpv_stepping(hdl)));
877 }
878 
879 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t);
880 
881 static uint32_t
882 xpv_getsockettype(cmi_hdl_impl_t *hdl)
883 {
884 	return (_cpuid_skt(xpv_vendor(hdl), xpv_family(hdl),
885 	    xpv_model(hdl), xpv_stepping(hdl)));
886 }
887 
888 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t);
889 
890 static const char *
891 xpv_getsocketstr(cmi_hdl_impl_t *hdl)
892 {
893 	return (_cpuid_sktstr(xpv_vendor(hdl), xpv_family(hdl),
894 	    xpv_model(hdl), xpv_stepping(hdl)));
895 }
896 
897 static id_t
898 xpv_logical_id(cmi_hdl_impl_t *hdl)
899 {
900 	return (xen_physcpu_logical_id(HDLPRIV(hdl)));
901 }
902 
903 static cmi_errno_t
904 xpv_rdmsr(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t *valp)
905 {
906 	switch (msr) {
907 	case IA32_MSR_MCG_CAP:
908 		*valp = xen_physcpu_mcg_cap(HDLPRIV(hdl));
909 		break;
910 
911 	default:
912 		return (CMIERR_NOTSUP);
913 	}
914 
915 	return (CMI_SUCCESS);
916 }
917 
918 /*
919  * Request the hypervisor to write an MSR for us.  The hypervisor
920  * will only accept MCA-related MSRs, as this is for MCA error
921  * simulation purposes alone.  We will pre-screen MSRs for injection
922  * so we don't bother the HV with bogus requests.  We will permit
923  * injection to any MCA bank register, and to MCG_STATUS.
924  */
925 
926 #define	IS_MCA_INJ_MSR(msr) \
927 	(((msr) >= IA32_MSR_MC(0, CTL) && (msr) <= IA32_MSR_MC(10, MISC)) || \
928 	(msr) == IA32_MSR_MCG_STATUS)
929 
930 static cmi_errno_t
931 xpv_wrmsr_cmn(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t val, boolean_t intpose)
932 {
933 	struct xen_mc_msrinject mci;
934 
935 	if (!(hdl->cmih_flags & CMIH_F_INJACTV))
936 		return (CMIERR_NOTSUP);		/* for injection use only! */
937 
938 	if (!IS_MCA_INJ_MSR(msr))
939 		return (CMIERR_API);
940 
941 	if (panicstr)
942 		return (CMIERR_DEADLOCK);
943 
944 	mci.mcinj_cpunr = xen_physcpu_logical_id(HDLPRIV(hdl));
945 	mci.mcinj_flags = intpose ? MC_MSRINJ_F_INTERPOSE : 0;
946 	mci.mcinj_count = 1;	/* learn to batch sometime */
947 	mci.mcinj_msr[0].reg = msr;
948 	mci.mcinj_msr[0].value = val;
949 
950 	return (HYPERVISOR_mca(XEN_MC_CMD_msrinject, (xen_mc_arg_t *)&mci) ==
951 	    XEN_MC_HCALL_SUCCESS ?  CMI_SUCCESS : CMIERR_NOTSUP);
952 }
953 
954 static cmi_errno_t
955 xpv_wrmsr(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t val)
956 {
957 	return (xpv_wrmsr_cmn(hdl, msr, val, B_FALSE));
958 }
959 
960 
961 static cmi_errno_t
962 xpv_msrinterpose(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t val)
963 {
964 	return (xpv_wrmsr_cmn(hdl, msr, val, B_TRUE));
965 }
966 
967 static void
968 xpv_int(cmi_hdl_impl_t *hdl, int int_no)
969 {
970 	struct xen_mc_mceinject mce;
971 
972 	if (!(hdl->cmih_flags & CMIH_F_INJACTV))
973 		return;
974 
975 	if (int_no != T_MCE) {
976 		cmn_err(CE_WARN, "xpv_int: int_no %d unimplemented\n",
977 		    int_no);
978 	}
979 
980 	mce.mceinj_cpunr = xen_physcpu_logical_id(HDLPRIV(hdl));
981 
982 	(void) HYPERVISOR_mca(XEN_MC_CMD_mceinject, (xen_mc_arg_t *)&mce);
983 }
984 
985 #define	CSM_XLATE_SUNOS2XEN	1
986 #define	CSM_XLATE_XEN2SUNOS	2
987 
988 #define	CSM_MAPENT(suffix)	{ P_##suffix, MC_CPU_P_##suffix }
989 
990 static int
991 cpu_status_xlate(int in, int direction, int *outp)
992 {
993 	struct cpu_status_map {
994 		int csm_val[2];
995 	} map[] = {
996 		CSM_MAPENT(STATUS),
997 		CSM_MAPENT(ONLINE),
998 		CSM_MAPENT(OFFLINE),
999 		CSM_MAPENT(FAULTED),
1000 		CSM_MAPENT(SPARE),
1001 		CSM_MAPENT(POWEROFF)
1002 	};
1003 
1004 	int cmpidx = (direction == CSM_XLATE_XEN2SUNOS);
1005 	int i;
1006 
1007 	for (i = 0; i < sizeof (map) / sizeof (struct cpu_status_map); i++) {
1008 		if (map[i].csm_val[cmpidx] == in) {
1009 			*outp = map[i].csm_val[!cmpidx];
1010 			return (1);
1011 		}
1012 	}
1013 
1014 	return (0);
1015 }
1016 
1017 static int
1018 xpv_online(cmi_hdl_impl_t *hdl, int new_status, int *old_status)
1019 {
1020 	struct xen_mc_offline mco;
1021 	int flag, rc;
1022 
1023 	new_status &= ~P_FORCED;
1024 
1025 	if (!cpu_status_xlate(new_status, CSM_XLATE_SUNOS2XEN, &flag))
1026 		return (ENOSYS);
1027 
1028 	mco.mco_cpu = xen_physcpu_logical_id(HDLPRIV(hdl));
1029 	mco.mco_flag = flag;
1030 
1031 	if ((rc = HYPERVISOR_mca(XEN_MC_CMD_offlinecpu,
1032 	    (xen_mc_arg_t *)&mco)) == XEN_MC_HCALL_SUCCESS) {
1033 		flag = mco.mco_flag;
1034 		if (!cpu_status_xlate(flag, CSM_XLATE_XEN2SUNOS, old_status))
1035 			cmn_err(CE_NOTE, "xpv_online: unknown status %d.",
1036 			    flag);
1037 	}
1038 
1039 	return (-rc);
1040 }
1041 
1042 #endif
1043 
1044 /*ARGSUSED*/
1045 static void *
1046 cpu_search(enum cmi_hdl_class class, uint_t chipid, uint_t coreid,
1047     uint_t strandid)
1048 {
1049 #ifdef __xpv
1050 	xen_mc_lcpu_cookie_t cpi;
1051 
1052 	for (cpi = xen_physcpu_next(NULL); cpi != NULL;
1053 	    cpi = xen_physcpu_next(cpi)) {
1054 		if (xen_physcpu_chipid(cpi) == chipid &&
1055 		    xen_physcpu_coreid(cpi) == coreid &&
1056 		    xen_physcpu_strandid(cpi) == strandid)
1057 			return ((void *)cpi);
1058 	}
1059 	return (NULL);
1060 
1061 #else	/* __xpv */
1062 
1063 	cpu_t *cp, *startcp;
1064 
1065 	kpreempt_disable();
1066 	cp = startcp = CPU;
1067 	do {
1068 		if (cmi_ntv_hwchipid(cp) == chipid &&
1069 		    cmi_ntv_hwcoreid(cp) == coreid &&
1070 		    cmi_ntv_hwstrandid(cp) == strandid) {
1071 			kpreempt_enable();
1072 			return ((void *)cp);
1073 		}
1074 
1075 		cp = cp->cpu_next;
1076 	} while (cp != startcp);
1077 	kpreempt_enable();
1078 	return (NULL);
1079 #endif	/* __ xpv */
1080 }
1081 
1082 static boolean_t
1083 cpu_is_cmt(void *priv)
1084 {
1085 #ifdef __xpv
1086 	return (xen_physcpu_is_cmt((xen_mc_lcpu_cookie_t)priv));
1087 #else /* __xpv */
1088 	cpu_t *cp = (cpu_t *)priv;
1089 
1090 	int strands_per_core = cpuid_get_ncpu_per_chip(cp) /
1091 	    cpuid_get_ncore_per_chip(cp);
1092 
1093 	return (strands_per_core > 1);
1094 #endif /* __xpv */
1095 }
1096 
1097 /*
1098  * Find the handle entry of a given cpu identified by a <chip,core,strand>
1099  * tuple.
1100  */
1101 static cmi_hdl_ent_t *
1102 cmi_hdl_ent_lookup(uint_t chipid, uint_t coreid, uint_t strandid)
1103 {
1104 	/*
1105 	 * Allocate per-chip table which contains a list of handle of
1106 	 * all strands of the chip.
1107 	 */
1108 	if (cmi_chip_tab[chipid] == NULL) {
1109 		size_t sz;
1110 		cmi_hdl_ent_t *pg;
1111 
1112 		sz = CMI_MAX_STRANDS_PER_CHIP * sizeof (cmi_hdl_ent_t);
1113 		pg = kmem_zalloc(sz, KM_SLEEP);
1114 
1115 		/* test and set the per-chip table if it is not allocated */
1116 		if (atomic_cas_ptr(&cmi_chip_tab[chipid], NULL, pg) != NULL)
1117 			kmem_free(pg, sz); /* someone beat us */
1118 	}
1119 
1120 	return (cmi_chip_tab[chipid] + CMI_HDL_ARR_IDX(coreid, strandid));
1121 }
1122 
1123 cmi_hdl_t
1124 cmi_hdl_create(enum cmi_hdl_class class, uint_t chipid, uint_t coreid,
1125     uint_t strandid)
1126 {
1127 	cmi_hdl_impl_t *hdl;
1128 	void *priv;
1129 	cmi_hdl_ent_t *ent;
1130 
1131 #ifdef __xpv
1132 	ASSERT(class == CMI_HDL_SOLARIS_xVM_MCA);
1133 #else
1134 	ASSERT(class == CMI_HDL_NATIVE);
1135 #endif
1136 
1137 	if (chipid > CMI_MAX_CHIPID ||
1138 	    coreid > CMI_MAX_CORES_PER_CHIP - 1 ||
1139 	    strandid > CMI_MAX_STRANDS_PER_CORE - 1)
1140 		return (NULL);
1141 
1142 	if ((priv = cpu_search(class, chipid, coreid, strandid)) == NULL)
1143 		return (NULL);
1144 
1145 	hdl = kmem_zalloc(sizeof (*hdl), KM_SLEEP);
1146 
1147 	hdl->cmih_class = class;
1148 	HDLOPS(hdl) = &cmi_hdl_ops;
1149 	hdl->cmih_chipid = chipid;
1150 	hdl->cmih_coreid = coreid;
1151 	hdl->cmih_strandid = strandid;
1152 	hdl->cmih_mstrand = cpu_is_cmt(priv);
1153 	hdl->cmih_hdlpriv = priv;
1154 #ifdef __xpv
1155 	hdl->cmih_msrsrc = CMI_MSR_FLAG_RD_INTERPOSEOK |
1156 	    CMI_MSR_FLAG_WR_INTERPOSEOK;
1157 #else	/* __xpv */
1158 	hdl->cmih_msrsrc = CMI_MSR_FLAG_RD_HWOK | CMI_MSR_FLAG_RD_INTERPOSEOK |
1159 	    CMI_MSR_FLAG_WR_HWOK | CMI_MSR_FLAG_WR_INTERPOSEOK;
1160 #endif
1161 
1162 	ent = cmi_hdl_ent_lookup(chipid, coreid, strandid);
1163 	if (ent->cmae_refcnt != 0 || ent->cmae_hdlp != NULL) {
1164 		/*
1165 		 * Somehow this (chipid, coreid, strandid) id tuple has
1166 		 * already been assigned!  This indicates that the
1167 		 * callers logic in determining these values is busted,
1168 		 * or perhaps undermined by bad BIOS setup.  Complain,
1169 		 * and refuse to initialize this tuple again as bad things
1170 		 * will happen.
1171 		 */
1172 		cmn_err(CE_NOTE, "cmi_hdl_create: chipid %d coreid %d "
1173 		    "strandid %d handle already allocated!",
1174 		    chipid, coreid, strandid);
1175 		kmem_free(hdl, sizeof (*hdl));
1176 		return (NULL);
1177 	}
1178 
1179 	/*
1180 	 * Once we store a nonzero reference count others can find this
1181 	 * handle via cmi_hdl_lookup etc.  This initial hold on the handle
1182 	 * is to be dropped only if some other part of cmi initialization
1183 	 * fails or, if it succeeds, at later cpu deconfigure.  Note the
1184 	 * the module private data we hold in cmih_cmi and cmih_cmidata
1185 	 * is still NULL at this point (the caller will fill it with
1186 	 * cmi_hdl_setcmi if it initializes) so consumers of handles
1187 	 * should always be ready for that possibility.
1188 	 */
1189 	ent->cmae_hdlp = hdl;
1190 	hdl->cmih_refcntp = &ent->cmae_refcnt;
1191 	ent->cmae_refcnt = 1;
1192 
1193 	return ((cmi_hdl_t)hdl);
1194 }
1195 
1196 void
1197 cmi_hdl_hold(cmi_hdl_t ophdl)
1198 {
1199 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1200 
1201 	ASSERT(*hdl->cmih_refcntp != 0); /* must not be the initial hold */
1202 
1203 	atomic_inc_32(hdl->cmih_refcntp);
1204 }
1205 
1206 static int
1207 cmi_hdl_canref(cmi_hdl_ent_t *ent)
1208 {
1209 	volatile uint32_t *refcntp;
1210 	uint32_t refcnt;
1211 
1212 	refcntp = &ent->cmae_refcnt;
1213 	refcnt = *refcntp;
1214 
1215 	if (refcnt == 0) {
1216 		/*
1217 		 * Associated object never existed, is being destroyed,
1218 		 * or has been destroyed.
1219 		 */
1220 		return (0);
1221 	}
1222 
1223 	/*
1224 	 * We cannot use atomic increment here because once the reference
1225 	 * count reaches zero it must never be bumped up again.
1226 	 */
1227 	while (refcnt != 0) {
1228 		if (atomic_cas_32(refcntp, refcnt, refcnt + 1) == refcnt)
1229 			return (1);
1230 		refcnt = *refcntp;
1231 	}
1232 
1233 	/*
1234 	 * Somebody dropped the reference count to 0 after our initial
1235 	 * check.
1236 	 */
1237 	return (0);
1238 }
1239 
1240 
1241 void
1242 cmi_hdl_rele(cmi_hdl_t ophdl)
1243 {
1244 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1245 	cmi_hdl_ent_t *ent;
1246 
1247 	ASSERT(*hdl->cmih_refcntp > 0);
1248 
1249 	if (atomic_dec_32_nv(hdl->cmih_refcntp) > 0)
1250 		return;
1251 
1252 	ent = cmi_hdl_ent_lookup(hdl->cmih_chipid, hdl->cmih_coreid,
1253 	    hdl->cmih_strandid);
1254 	ent->cmae_hdlp = NULL;
1255 
1256 	kmem_free(hdl, sizeof (*hdl));
1257 }
1258 
1259 void
1260 cmi_hdl_setspecific(cmi_hdl_t ophdl, void *arg)
1261 {
1262 	IMPLHDL(ophdl)->cmih_spec = arg;
1263 }
1264 
1265 void *
1266 cmi_hdl_getspecific(cmi_hdl_t ophdl)
1267 {
1268 	return (IMPLHDL(ophdl)->cmih_spec);
1269 }
1270 
1271 void
1272 cmi_hdl_setmc(cmi_hdl_t ophdl, const struct cmi_mc_ops *mcops, void *mcdata)
1273 {
1274 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1275 
1276 	ASSERT(hdl->cmih_mcops == NULL && hdl->cmih_mcdata == NULL);
1277 	hdl->cmih_mcops = mcops;
1278 	hdl->cmih_mcdata = mcdata;
1279 }
1280 
1281 const struct cmi_mc_ops *
1282 cmi_hdl_getmcops(cmi_hdl_t ophdl)
1283 {
1284 	return (IMPLHDL(ophdl)->cmih_mcops);
1285 }
1286 
1287 void *
1288 cmi_hdl_getmcdata(cmi_hdl_t ophdl)
1289 {
1290 	return (IMPLHDL(ophdl)->cmih_mcdata);
1291 }
1292 
1293 cmi_hdl_t
1294 cmi_hdl_lookup(enum cmi_hdl_class class, uint_t chipid, uint_t coreid,
1295     uint_t strandid)
1296 {
1297 	cmi_hdl_ent_t *ent;
1298 
1299 	if (chipid > CMI_MAX_CHIPID ||
1300 	    coreid > CMI_MAX_CORES_PER_CHIP - 1 ||
1301 	    strandid > CMI_MAX_STRANDS_PER_CORE - 1)
1302 		return (NULL);
1303 
1304 	ent = cmi_hdl_ent_lookup(chipid, coreid, strandid);
1305 
1306 	if (class == CMI_HDL_NEUTRAL)
1307 #ifdef __xpv
1308 		class = CMI_HDL_SOLARIS_xVM_MCA;
1309 #else
1310 		class = CMI_HDL_NATIVE;
1311 #endif
1312 
1313 	if (!cmi_hdl_canref(ent))
1314 		return (NULL);
1315 
1316 	if (ent->cmae_hdlp->cmih_class != class) {
1317 		cmi_hdl_rele((cmi_hdl_t)ent->cmae_hdlp);
1318 		return (NULL);
1319 	}
1320 
1321 	return ((cmi_hdl_t)ent->cmae_hdlp);
1322 }
1323 
1324 cmi_hdl_t
1325 cmi_hdl_any(void)
1326 {
1327 	int i, j;
1328 	cmi_hdl_ent_t *ent;
1329 
1330 	for (i = 0; i < CMI_CHIPID_ARR_SZ; i++) {
1331 		if (cmi_chip_tab[i] == NULL)
1332 			continue;
1333 		for (j = 0, ent = cmi_chip_tab[i]; j < CMI_MAX_STRANDS_PER_CHIP;
1334 		    j++, ent++) {
1335 			if (cmi_hdl_canref(ent))
1336 				return ((cmi_hdl_t)ent->cmae_hdlp);
1337 		}
1338 	}
1339 
1340 	return (NULL);
1341 }
1342 
1343 void
1344 cmi_hdl_walk(int (*cbfunc)(cmi_hdl_t, void *, void *, void *),
1345     void *arg1, void *arg2, void *arg3)
1346 {
1347 	int i, j;
1348 	cmi_hdl_ent_t *ent;
1349 
1350 	for (i = 0; i < CMI_CHIPID_ARR_SZ; i++) {
1351 		if (cmi_chip_tab[i] == NULL)
1352 			continue;
1353 		for (j = 0, ent = cmi_chip_tab[i]; j < CMI_MAX_STRANDS_PER_CHIP;
1354 		    j++, ent++) {
1355 			if (cmi_hdl_canref(ent)) {
1356 				cmi_hdl_impl_t *hdl = ent->cmae_hdlp;
1357 				if ((*cbfunc)((cmi_hdl_t)hdl, arg1, arg2, arg3)
1358 				    == CMI_HDL_WALK_DONE) {
1359 					cmi_hdl_rele((cmi_hdl_t)hdl);
1360 					return;
1361 				}
1362 				cmi_hdl_rele((cmi_hdl_t)hdl);
1363 			}
1364 		}
1365 	}
1366 }
1367 
1368 void
1369 cmi_hdl_setcmi(cmi_hdl_t ophdl, void *cmi, void *cmidata)
1370 {
1371 	IMPLHDL(ophdl)->cmih_cmidata = cmidata;
1372 	IMPLHDL(ophdl)->cmih_cmi = cmi;
1373 }
1374 
1375 void *
1376 cmi_hdl_getcmi(cmi_hdl_t ophdl)
1377 {
1378 	return (IMPLHDL(ophdl)->cmih_cmi);
1379 }
1380 
1381 void *
1382 cmi_hdl_getcmidata(cmi_hdl_t ophdl)
1383 {
1384 	return (IMPLHDL(ophdl)->cmih_cmidata);
1385 }
1386 
1387 enum cmi_hdl_class
1388 cmi_hdl_class(cmi_hdl_t ophdl)
1389 {
1390 	return (IMPLHDL(ophdl)->cmih_class);
1391 }
1392 
1393 #define	CMI_HDL_OPFUNC(what, type)				\
1394 	type							\
1395 	cmi_hdl_##what(cmi_hdl_t ophdl)				\
1396 	{							\
1397 		return (HDLOPS(IMPLHDL(ophdl))->		\
1398 		    cmio_##what(IMPLHDL(ophdl)));		\
1399 	}
1400 
1401 CMI_HDL_OPFUNC(vendor, uint_t)
1402 CMI_HDL_OPFUNC(vendorstr, const char *)
1403 CMI_HDL_OPFUNC(family, uint_t)
1404 CMI_HDL_OPFUNC(model, uint_t)
1405 CMI_HDL_OPFUNC(stepping, uint_t)
1406 CMI_HDL_OPFUNC(chipid, uint_t)
1407 CMI_HDL_OPFUNC(coreid, uint_t)
1408 CMI_HDL_OPFUNC(strandid, uint_t)
1409 CMI_HDL_OPFUNC(chiprev, uint32_t)
1410 CMI_HDL_OPFUNC(chiprevstr, const char *)
1411 CMI_HDL_OPFUNC(getsockettype, uint32_t)
1412 CMI_HDL_OPFUNC(getsocketstr, const char *)
1413 CMI_HDL_OPFUNC(logical_id, id_t)
1414 
1415 boolean_t
1416 cmi_hdl_is_cmt(cmi_hdl_t ophdl)
1417 {
1418 	return (IMPLHDL(ophdl)->cmih_mstrand);
1419 }
1420 
1421 void
1422 cmi_hdl_int(cmi_hdl_t ophdl, int num)
1423 {
1424 	if (HDLOPS(IMPLHDL(ophdl))->cmio_int == NULL)
1425 		return;
1426 
1427 	cmi_hdl_inj_begin(ophdl);
1428 	HDLOPS(IMPLHDL(ophdl))->cmio_int(IMPLHDL(ophdl), num);
1429 	cmi_hdl_inj_end(NULL);
1430 }
1431 
1432 int
1433 cmi_hdl_online(cmi_hdl_t ophdl, int new_status, int *old_status)
1434 {
1435 	return (HDLOPS(IMPLHDL(ophdl))->cmio_online(IMPLHDL(ophdl),
1436 	    new_status, old_status));
1437 }
1438 
1439 #ifndef	__xpv
1440 /*
1441  * Return hardware chip instance; cpuid_get_chipid provides this directly.
1442  */
1443 uint_t
1444 cmi_ntv_hwchipid(cpu_t *cp)
1445 {
1446 	return (cpuid_get_chipid(cp));
1447 }
1448 
1449 /*
1450  * Return core instance within a single chip.
1451  */
1452 uint_t
1453 cmi_ntv_hwcoreid(cpu_t *cp)
1454 {
1455 	return (cpuid_get_pkgcoreid(cp));
1456 }
1457 
1458 /*
1459  * Return strand number within a single core.  cpuid_get_clogid numbers
1460  * all execution units (strands, or cores in unstranded models) sequentially
1461  * within a single chip.
1462  */
1463 uint_t
1464 cmi_ntv_hwstrandid(cpu_t *cp)
1465 {
1466 	int strands_per_core = cpuid_get_ncpu_per_chip(cp) /
1467 	    cpuid_get_ncore_per_chip(cp);
1468 
1469 	return (cpuid_get_clogid(cp) % strands_per_core);
1470 }
1471 #endif	/* __xpv */
1472 
1473 void
1474 cmi_hdlconf_rdmsr_nohw(cmi_hdl_t ophdl)
1475 {
1476 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1477 
1478 	hdl->cmih_msrsrc &= ~CMI_MSR_FLAG_RD_HWOK;
1479 }
1480 
1481 void
1482 cmi_hdlconf_wrmsr_nohw(cmi_hdl_t ophdl)
1483 {
1484 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1485 
1486 	hdl->cmih_msrsrc &= ~CMI_MSR_FLAG_WR_HWOK;
1487 }
1488 
1489 cmi_errno_t
1490 cmi_hdl_rdmsr(cmi_hdl_t ophdl, uint_t msr, uint64_t *valp)
1491 {
1492 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1493 
1494 	/*
1495 	 * Regardless of the handle class, we first check for am
1496 	 * interposed value.  In the xVM case you probably want to
1497 	 * place interposed values within the hypervisor itself, but
1498 	 * we still allow interposing them in dom0 for test and bringup
1499 	 * purposes.
1500 	 */
1501 	if ((hdl->cmih_msrsrc & CMI_MSR_FLAG_RD_INTERPOSEOK) &&
1502 	    msri_lookup(hdl, msr, valp))
1503 		return (CMI_SUCCESS);
1504 
1505 	if (HDLOPS(hdl)->cmio_rdmsr == NULL)
1506 		return (CMIERR_NOTSUP);
1507 
1508 	return (HDLOPS(hdl)->cmio_rdmsr(hdl, msr, valp));
1509 }
1510 
1511 cmi_errno_t
1512 cmi_hdl_wrmsr(cmi_hdl_t ophdl, uint_t msr, uint64_t val)
1513 {
1514 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1515 
1516 	/* Invalidate any interposed value */
1517 	msri_rment(hdl, msr);
1518 
1519 	if (HDLOPS(hdl)->cmio_wrmsr == NULL)
1520 		return (CMI_SUCCESS);	/* pretend all is ok */
1521 
1522 	return (HDLOPS(hdl)->cmio_wrmsr(hdl, msr, val));
1523 }
1524 
1525 void
1526 cmi_hdl_enable_mce(cmi_hdl_t ophdl)
1527 {
1528 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1529 	ulong_t cr4;
1530 
1531 	if (HDLOPS(hdl)->cmio_getcr4 == NULL ||
1532 	    HDLOPS(hdl)->cmio_setcr4 == NULL)
1533 		return;
1534 
1535 	cr4 = HDLOPS(hdl)->cmio_getcr4(hdl);
1536 
1537 	HDLOPS(hdl)->cmio_setcr4(hdl, cr4 | CR4_MCE);
1538 }
1539 
1540 void
1541 cmi_hdl_msrinterpose(cmi_hdl_t ophdl, cmi_mca_regs_t *regs, uint_t nregs)
1542 {
1543 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1544 	int i;
1545 
1546 	if (HDLOPS(hdl)->cmio_msrinterpose == NULL)
1547 		return;
1548 
1549 	cmi_hdl_inj_begin(ophdl);
1550 
1551 	for (i = 0; i < nregs; i++, regs++)
1552 		HDLOPS(hdl)->cmio_msrinterpose(hdl, regs->cmr_msrnum,
1553 		    regs->cmr_msrval);
1554 
1555 	cmi_hdl_inj_end(ophdl);
1556 }
1557 
1558 /*ARGSUSED*/
1559 void
1560 cmi_hdl_msrforward(cmi_hdl_t ophdl, cmi_mca_regs_t *regs, uint_t nregs)
1561 {
1562 #ifdef __xpv
1563 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1564 	int i;
1565 
1566 	for (i = 0; i < nregs; i++, regs++)
1567 		msri_addent(hdl, regs->cmr_msrnum, regs->cmr_msrval);
1568 #endif
1569 }
1570 
1571 
1572 void
1573 cmi_pcird_nohw(void)
1574 {
1575 	cmi_pcicfg_flags &= ~CMI_PCICFG_FLAG_RD_HWOK;
1576 }
1577 
1578 void
1579 cmi_pciwr_nohw(void)
1580 {
1581 	cmi_pcicfg_flags &= ~CMI_PCICFG_FLAG_WR_HWOK;
1582 }
1583 
1584 static uint32_t
1585 cmi_pci_get_cmn(int bus, int dev, int func, int reg, int asz,
1586     int *interpose, ddi_acc_handle_t hdl)
1587 {
1588 	uint32_t val;
1589 
1590 	if (cmi_pcicfg_flags & CMI_PCICFG_FLAG_RD_INTERPOSEOK &&
1591 	    pcii_lookup(bus, dev, func, reg, asz, &val)) {
1592 		if (interpose)
1593 			*interpose = 1;
1594 		return (val);
1595 	}
1596 	if (interpose)
1597 		*interpose = 0;
1598 
1599 	if (!(cmi_pcicfg_flags & CMI_PCICFG_FLAG_RD_HWOK))
1600 		return (0);
1601 
1602 	switch (asz) {
1603 	case 1:
1604 		if (hdl)
1605 			val = pci_config_get8(hdl, (off_t)reg);
1606 		else
1607 			val = (*pci_getb_func)(bus, dev, func, reg);
1608 		break;
1609 	case 2:
1610 		if (hdl)
1611 			val = pci_config_get16(hdl, (off_t)reg);
1612 		else
1613 			val = (*pci_getw_func)(bus, dev, func, reg);
1614 		break;
1615 	case 4:
1616 		if (hdl)
1617 			val = pci_config_get32(hdl, (off_t)reg);
1618 		else
1619 			val = (*pci_getl_func)(bus, dev, func, reg);
1620 		break;
1621 	default:
1622 		val = 0;
1623 	}
1624 	return (val);
1625 }
1626 
1627 uint8_t
1628 cmi_pci_getb(int bus, int dev, int func, int reg, int *interpose,
1629     ddi_acc_handle_t hdl)
1630 {
1631 	return ((uint8_t)cmi_pci_get_cmn(bus, dev, func, reg, 1, interpose,
1632 	    hdl));
1633 }
1634 
1635 uint16_t
1636 cmi_pci_getw(int bus, int dev, int func, int reg, int *interpose,
1637     ddi_acc_handle_t hdl)
1638 {
1639 	return ((uint16_t)cmi_pci_get_cmn(bus, dev, func, reg, 2, interpose,
1640 	    hdl));
1641 }
1642 
1643 uint32_t
1644 cmi_pci_getl(int bus, int dev, int func, int reg, int *interpose,
1645     ddi_acc_handle_t hdl)
1646 {
1647 	return (cmi_pci_get_cmn(bus, dev, func, reg, 4, interpose, hdl));
1648 }
1649 
1650 void
1651 cmi_pci_interposeb(int bus, int dev, int func, int reg, uint8_t val)
1652 {
1653 	pcii_addent(bus, dev, func, reg, val, 1);
1654 }
1655 
1656 void
1657 cmi_pci_interposew(int bus, int dev, int func, int reg, uint16_t val)
1658 {
1659 	pcii_addent(bus, dev, func, reg, val, 2);
1660 }
1661 
1662 void
1663 cmi_pci_interposel(int bus, int dev, int func, int reg, uint32_t val)
1664 {
1665 	pcii_addent(bus, dev, func, reg, val, 4);
1666 }
1667 
1668 static void
1669 cmi_pci_put_cmn(int bus, int dev, int func, int reg, int asz,
1670     ddi_acc_handle_t hdl, uint32_t val)
1671 {
1672 	/*
1673 	 * If there is an interposed value for this register invalidate it.
1674 	 */
1675 	pcii_rment(bus, dev, func, reg, asz);
1676 
1677 	if (!(cmi_pcicfg_flags & CMI_PCICFG_FLAG_WR_HWOK))
1678 		return;
1679 
1680 	switch (asz) {
1681 	case 1:
1682 		if (hdl)
1683 			pci_config_put8(hdl, (off_t)reg, (uint8_t)val);
1684 		else
1685 			(*pci_putb_func)(bus, dev, func, reg, (uint8_t)val);
1686 		break;
1687 
1688 	case 2:
1689 		if (hdl)
1690 			pci_config_put16(hdl, (off_t)reg, (uint16_t)val);
1691 		else
1692 			(*pci_putw_func)(bus, dev, func, reg, (uint16_t)val);
1693 		break;
1694 
1695 	case 4:
1696 		if (hdl)
1697 			pci_config_put32(hdl, (off_t)reg, val);
1698 		else
1699 			(*pci_putl_func)(bus, dev, func, reg, val);
1700 		break;
1701 
1702 	default:
1703 		break;
1704 	}
1705 }
1706 
1707 void
1708 cmi_pci_putb(int bus, int dev, int func, int reg, ddi_acc_handle_t hdl,
1709     uint8_t val)
1710 {
1711 	cmi_pci_put_cmn(bus, dev, func, reg, 1, hdl, val);
1712 }
1713 
1714 void
1715 cmi_pci_putw(int bus, int dev, int func, int reg, ddi_acc_handle_t hdl,
1716     uint16_t val)
1717 {
1718 	cmi_pci_put_cmn(bus, dev, func, reg, 2, hdl, val);
1719 }
1720 
1721 void
1722 cmi_pci_putl(int bus, int dev, int func, int reg, ddi_acc_handle_t hdl,
1723     uint32_t val)
1724 {
1725 	cmi_pci_put_cmn(bus, dev, func, reg, 4, hdl, val);
1726 }
1727 
1728 static const struct cmi_hdl_ops cmi_hdl_ops = {
1729 #ifdef __xpv
1730 	/*
1731 	 * CMI_HDL_SOLARIS_xVM_MCA - ops when we are an xVM dom0
1732 	 */
1733 	xpv_vendor,		/* cmio_vendor */
1734 	xpv_vendorstr,		/* cmio_vendorstr */
1735 	xpv_family,		/* cmio_family */
1736 	xpv_model,		/* cmio_model */
1737 	xpv_stepping,		/* cmio_stepping */
1738 	xpv_chipid,		/* cmio_chipid */
1739 	xpv_coreid,		/* cmio_coreid */
1740 	xpv_strandid,		/* cmio_strandid */
1741 	xpv_chiprev,		/* cmio_chiprev */
1742 	xpv_chiprevstr,		/* cmio_chiprevstr */
1743 	xpv_getsockettype,	/* cmio_getsockettype */
1744 	xpv_getsocketstr,	/* cmio_getsocketstr */
1745 	xpv_logical_id,		/* cmio_logical_id */
1746 	NULL,			/* cmio_getcr4 */
1747 	NULL,			/* cmio_setcr4 */
1748 	xpv_rdmsr,		/* cmio_rdmsr */
1749 	xpv_wrmsr,		/* cmio_wrmsr */
1750 	xpv_msrinterpose,	/* cmio_msrinterpose */
1751 	xpv_int,		/* cmio_int */
1752 	xpv_online		/* cmio_online */
1753 
1754 #else	/* __xpv */
1755 
1756 	/*
1757 	 * CMI_HDL_NATIVE - ops when apparently running on bare-metal
1758 	 */
1759 	ntv_vendor,		/* cmio_vendor */
1760 	ntv_vendorstr,		/* cmio_vendorstr */
1761 	ntv_family,		/* cmio_family */
1762 	ntv_model,		/* cmio_model */
1763 	ntv_stepping,		/* cmio_stepping */
1764 	ntv_chipid,		/* cmio_chipid */
1765 	ntv_coreid,		/* cmio_coreid */
1766 	ntv_strandid,		/* cmio_strandid */
1767 	ntv_chiprev,		/* cmio_chiprev */
1768 	ntv_chiprevstr,		/* cmio_chiprevstr */
1769 	ntv_getsockettype,	/* cmio_getsockettype */
1770 	ntv_getsocketstr,	/* cmio_getsocketstr */
1771 	ntv_logical_id,		/* cmio_logical_id */
1772 	ntv_getcr4,		/* cmio_getcr4 */
1773 	ntv_setcr4,		/* cmio_setcr4 */
1774 	ntv_rdmsr,		/* cmio_rdmsr */
1775 	ntv_wrmsr,		/* cmio_wrmsr */
1776 	ntv_msrinterpose,	/* cmio_msrinterpose */
1777 	ntv_int,		/* cmio_int */
1778 	ntv_online		/* cmio_online */
1779 #endif
1780 };
1781