xref: /titanic_41/usr/src/uts/i86pc/os/cmi_hw.c (revision c9b6d37c673213b7ad91d849a105790cb469f95b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * CPU Module Interface - hardware abstraction.
31  */
32 
33 #include <sys/types.h>
34 #include <sys/cpu_module.h>
35 #include <sys/kmem.h>
36 #include <sys/x86_archext.h>
37 #include <sys/cpuvar.h>
38 #include <sys/ksynch.h>
39 #include <sys/x_call.h>
40 #include <sys/pghw.h>
41 #include <sys/pci_cfgspace.h>
42 #include <sys/archsystm.h>
43 #include <sys/ontrap.h>
44 #include <sys/controlregs.h>
45 #include <sys/sunddi.h>
46 
47 /*
48  * Outside of this file consumers use the opaque cmi_hdl_t.  This
49  * definition is duplicated in the generic_cpu mdb module, so keep
50  * them in-sync when making changes.
51  */
52 typedef struct cmi_hdl_impl {
53 	enum cmi_hdl_class cmih_class;		/* Handle nature */
54 	struct cmi_hdl_ops *cmih_ops;		/* Operations vector */
55 	uint_t cmih_chipid;			/* Chipid of cpu resource */
56 	uint_t cmih_coreid;			/* Core within die */
57 	uint_t cmih_strandid;			/* Thread within core */
58 	volatile uint32_t *cmih_refcntp;	/* Reference count pointer */
59 	uint64_t cmih_msrsrc;			/* MSR data source flags */
60 	void *cmih_hdlpriv;			/* cmi_hw.c private data */
61 	void *cmih_spec;			/* cmi_hdl_{set,get}_specific */
62 	void *cmih_cmi;				/* cpu mod control structure */
63 	void *cmih_cmidata;			/* cpu mod private data */
64 	const struct cmi_mc_ops *cmih_mcops;	/* Memory-controller ops */
65 	void *cmih_mcdata;			/* Memory-controller data */
66 } cmi_hdl_impl_t;
67 
68 #define	IMPLHDL(ophdl)	((cmi_hdl_impl_t *)ophdl)
69 
70 /*
71  * Handles are looked up from contexts such as polling, injection etc
72  * where the context is reasonably well defined (although a poller could
73  * interrupt any old thread holding any old lock).  They are also looked
74  * up by machine check handlers, which may strike at inconvenient times
75  * such as during handle initialization or destruction or during handle
76  * lookup (which the #MC handler itself will also have to perform).
77  *
78  * So keeping handles in a linked list makes locking difficult when we
79  * consider #MC handlers.  Our solution is to have an array indexed
80  * by that which uniquely identifies a handle - chip/core/strand id -
81  * with each array member a structure including a pointer to a handle
82  * structure for the resource, and a reference count for the handle.
83  * Reference counts are modified atomically.  The public cmi_hdl_hold
84  * always succeeds because this can only be used after handle creation
85  * and before the call to destruct, so the hold count it already at least one.
86  * In other functions that lookup a handle (cmi_hdl_lookup, cmi_hdl_any)
87  * we must be certain that the count has not already decrmented to zero
88  * before applying our hold.
89  *
90  * This array is allocated when first we want to populate an entry.
91  * When allocated it is maximal - ideally we should scale to the
92  * actual number of chips, cores per chip and strand per core but
93  * that info is not readily available if we are virtualized so
94  * for now we stick with the dumb approach.
95  */
96 #define	CMI_MAX_CHIPS			16
97 #define	CMI_MAX_CORES_PER_CHIP		4
98 #define	CMI_MAX_STRANDS_PER_CORE	2
99 #define	CMI_HDL_HASHSZ (CMI_MAX_CHIPS * CMI_MAX_CORES_PER_CHIP * \
100     CMI_MAX_STRANDS_PER_CORE)
101 
102 struct cmi_hdl_hashent {
103 	volatile uint32_t cmhe_refcnt;
104 	cmi_hdl_impl_t *cmhe_hdlp;
105 };
106 
107 static struct cmi_hdl_hashent *cmi_hdl_hash;
108 
109 #define	CMI_HDL_HASHIDX(chipid, coreid, strandid) \
110 	((chipid) * CMI_MAX_CHIPS + (coreid) * CMI_MAX_CORES_PER_CHIP + \
111 	(strandid))
112 
113 /*
114  * Controls where we will source PCI config space data.
115  */
116 #define	CMI_PCICFG_FLAG_RD_HWOK		0x0001
117 #define	CMI_PCICFG_FLAG_RD_INTERPOSEOK	0X0002
118 #define	CMI_PCICFG_FLAG_WR_HWOK		0x0004
119 #define	CMI_PCICFG_FLAG_WR_INTERPOSEOK	0X0008
120 
121 static uint64_t cmi_pcicfg_flags =
122     CMI_PCICFG_FLAG_RD_HWOK | CMI_PCICFG_FLAG_RD_INTERPOSEOK |
123     CMI_PCICFG_FLAG_WR_HWOK | CMI_PCICFG_FLAG_WR_INTERPOSEOK;
124 
125 /*
126  * The flags for individual cpus are kept in their per-cpu handle cmih_msrsrc
127  */
128 #define	CMI_MSR_FLAG_RD_HWOK		0x0001
129 #define	CMI_MSR_FLAG_RD_INTERPOSEOK	0x0002
130 #define	CMI_MSR_FLAG_WR_HWOK		0x0004
131 #define	CMI_MSR_FLAG_WR_INTERPOSEOK	0x0008
132 
133 int cmi_call_func_ntv_tries = 3;
134 
135 static cmi_errno_t
136 call_func_ntv(int cpuid, xc_func_t func, xc_arg_t arg1, xc_arg_t arg2)
137 {
138 	cmi_errno_t rc = -1;
139 	int i;
140 
141 	kpreempt_disable();
142 
143 	if (CPU->cpu_id == cpuid) {
144 		(*func)(arg1, arg2, (xc_arg_t)&rc);
145 	} else {
146 		/*
147 		 * This should not happen for a #MC trap or a poll, so
148 		 * this is likely an error injection or similar.
149 		 * We will try to cross call with xc_trycall - we
150 		 * can't guarantee success with xc_call because
151 		 * the interrupt code in the case of a #MC may
152 		 * already hold the xc mutex.
153 		 */
154 		for (i = 0; i < cmi_call_func_ntv_tries; i++) {
155 			cpuset_t cpus;
156 
157 			CPUSET_ONLY(cpus, cpuid);
158 			xc_trycall(arg1, arg2, (xc_arg_t)&rc, cpus, func);
159 			if (rc != -1)
160 				break;
161 
162 			DELAY(1);
163 		}
164 	}
165 
166 	kpreempt_enable();
167 
168 	return (rc != -1 ? rc : CMIERR_DEADLOCK);
169 }
170 
171 /*
172  *	 =======================================================
173  *	|	MSR Interposition				|
174  *	|	-----------------				|
175  *	|							|
176  *	 -------------------------------------------------------
177  */
178 
179 #define	CMI_MSRI_HASHSZ		16
180 #define	CMI_MSRI_HASHIDX(hdl, msr) \
181 	(((uintptr_t)(hdl) >> 3 + (msr)) % (CMI_MSRI_HASHSZ - 1))
182 
183 struct cmi_msri_bkt {
184 	kmutex_t msrib_lock;
185 	struct cmi_msri_hashent *msrib_head;
186 };
187 
188 struct cmi_msri_hashent {
189 	struct cmi_msri_hashent *msrie_next;
190 	struct cmi_msri_hashent *msrie_prev;
191 	cmi_hdl_impl_t *msrie_hdl;
192 	uint_t msrie_msrnum;
193 	uint64_t msrie_msrval;
194 };
195 
196 #define	CMI_MSRI_MATCH(ent, hdl, req_msr) \
197 	((ent)->msrie_hdl == (hdl) && (ent)->msrie_msrnum == (req_msr))
198 
199 static struct cmi_msri_bkt msrihash[CMI_MSRI_HASHSZ];
200 
201 static void
202 msri_addent(cmi_hdl_impl_t *hdl, cmi_mca_regs_t *regp)
203 {
204 	int idx = CMI_MSRI_HASHIDX(hdl, regp->cmr_msrnum);
205 	struct cmi_msri_bkt *hbp = &msrihash[idx];
206 	struct cmi_msri_hashent *hep;
207 
208 	mutex_enter(&hbp->msrib_lock);
209 
210 	for (hep = hbp->msrib_head; hep != NULL; hep = hep->msrie_next) {
211 		if (CMI_MSRI_MATCH(hep, hdl, regp->cmr_msrnum))
212 			break;
213 	}
214 
215 	if (hep != NULL) {
216 		hep->msrie_msrval = regp->cmr_msrval;
217 	} else {
218 		hep = kmem_alloc(sizeof (*hep), KM_SLEEP);
219 		hep->msrie_hdl = hdl;
220 		hep->msrie_msrnum = regp->cmr_msrnum;
221 		hep->msrie_msrval = regp->cmr_msrval;
222 
223 		if (hbp->msrib_head != NULL)
224 			hbp->msrib_head->msrie_prev = hep;
225 		hep->msrie_next = hbp->msrib_head;
226 		hep->msrie_prev = NULL;
227 		hbp->msrib_head = hep;
228 	}
229 
230 	mutex_exit(&hbp->msrib_lock);
231 }
232 
233 /*
234  * Look for a match for the given hanlde and msr.  Return 1 with valp
235  * filled if a match is found, otherwise return 0 with valp untouched.
236  */
237 static int
238 msri_lookup(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t *valp)
239 {
240 	int idx = CMI_MSRI_HASHIDX(hdl, msr);
241 	struct cmi_msri_bkt *hbp = &msrihash[idx];
242 	struct cmi_msri_hashent *hep;
243 
244 	/*
245 	 * This function is called during #MC trap handling, so we should
246 	 * consider the possibility that the hash mutex is held by the
247 	 * interrupted thread.  This should not happen because interposition
248 	 * is an artificial injection mechanism and the #MC is requested
249 	 * after adding entries, but just in case of a real #MC at an
250 	 * unlucky moment we'll use mutex_tryenter here.
251 	 */
252 	if (!mutex_tryenter(&hbp->msrib_lock))
253 		return (0);
254 
255 	for (hep = hbp->msrib_head; hep != NULL; hep = hep->msrie_next) {
256 		if (CMI_MSRI_MATCH(hep, hdl, msr)) {
257 			*valp = hep->msrie_msrval;
258 			break;
259 		}
260 	}
261 
262 	mutex_exit(&hbp->msrib_lock);
263 
264 	return (hep != NULL);
265 }
266 
267 /*
268  * Remove any interposed value that matches.
269  */
270 static void
271 msri_rment(cmi_hdl_impl_t *hdl, uint_t msr)
272 {
273 
274 	int idx = CMI_MSRI_HASHIDX(hdl, msr);
275 	struct cmi_msri_bkt *hbp = &msrihash[idx];
276 	struct cmi_msri_hashent *hep;
277 
278 	if (!mutex_tryenter(&hbp->msrib_lock))
279 		return;
280 
281 	for (hep = hbp->msrib_head; hep != NULL; hep = hep->msrie_next) {
282 		if (CMI_MSRI_MATCH(hep, hdl, msr)) {
283 			if (hep->msrie_prev != NULL)
284 				hep->msrie_prev->msrie_next = hep->msrie_next;
285 
286 			if (hep->msrie_next != NULL)
287 				hep->msrie_next->msrie_prev = hep->msrie_prev;
288 
289 			if (hbp->msrib_head == hep)
290 				hbp->msrib_head = hep->msrie_next;
291 
292 			kmem_free(hep, sizeof (*hep));
293 			break;
294 		}
295 	}
296 
297 	mutex_exit(&hbp->msrib_lock);
298 }
299 
300 /*
301  *	 =======================================================
302  *	|	PCI Config Space Interposition			|
303  *	|	------------------------------			|
304  *	|							|
305  *	 -------------------------------------------------------
306  */
307 
308 /*
309  * Hash for interposed PCI config space values.  We lookup on bus/dev/fun/offset
310  * and then record whether the value stashed was made with a byte, word or
311  * doubleword access;  we will only return a hit for an access of the
312  * same size.  If you access say a 32-bit register using byte accesses
313  * and then attempt to read the full 32-bit value back you will not obtain
314  * any sort of merged result - you get a lookup miss.
315  */
316 
317 #define	CMI_PCII_HASHSZ		16
318 #define	CMI_PCII_HASHIDX(b, d, f, o) \
319 	(((b) + (d) + (f) + (o)) % (CMI_PCII_HASHSZ - 1))
320 
321 struct cmi_pcii_bkt {
322 	kmutex_t pciib_lock;
323 	struct cmi_pcii_hashent *pciib_head;
324 };
325 
326 struct cmi_pcii_hashent {
327 	struct cmi_pcii_hashent *pcii_next;
328 	struct cmi_pcii_hashent *pcii_prev;
329 	int pcii_bus;
330 	int pcii_dev;
331 	int pcii_func;
332 	int pcii_reg;
333 	int pcii_asize;
334 	uint32_t pcii_val;
335 };
336 
337 #define	CMI_PCII_MATCH(ent, b, d, f, r, asz) \
338 	((ent)->pcii_bus == (b) && (ent)->pcii_dev == (d) && \
339 	(ent)->pcii_func == (f) && (ent)->pcii_reg == (r) && \
340 	(ent)->pcii_asize == (asz))
341 
342 static struct cmi_pcii_bkt pciihash[CMI_PCII_HASHSZ];
343 
344 
345 /*
346  * Add a new entry to the PCI interpose hash, overwriting any existing
347  * entry that is found.
348  */
349 static void
350 pcii_addent(int bus, int dev, int func, int reg, uint32_t val, int asz)
351 {
352 	int idx = CMI_PCII_HASHIDX(bus, dev, func, reg);
353 	struct cmi_pcii_bkt *hbp = &pciihash[idx];
354 	struct cmi_pcii_hashent *hep;
355 
356 	mutex_enter(&hbp->pciib_lock);
357 
358 	for (hep = hbp->pciib_head; hep != NULL; hep = hep->pcii_next) {
359 		if (CMI_PCII_MATCH(hep, bus, dev, func, reg, asz))
360 			break;
361 	}
362 
363 	if (hep != NULL) {
364 		hep->pcii_val = val;
365 	} else {
366 		hep = kmem_alloc(sizeof (*hep), KM_SLEEP);
367 		hep->pcii_bus = bus;
368 		hep->pcii_dev = dev;
369 		hep->pcii_func = func;
370 		hep->pcii_reg = reg;
371 		hep->pcii_asize = asz;
372 		hep->pcii_val = val;
373 
374 		if (hbp->pciib_head != NULL)
375 			hbp->pciib_head->pcii_prev = hep;
376 		hep->pcii_next = hbp->pciib_head;
377 		hep->pcii_prev = NULL;
378 		hbp->pciib_head = hep;
379 	}
380 
381 	mutex_exit(&hbp->pciib_lock);
382 }
383 
384 /*
385  * Look for a match for the given bus/dev/func/reg; return 1 with valp
386  * filled if a match is found, otherwise return 0 with valp untouched.
387  */
388 static int
389 pcii_lookup(int bus, int dev, int func, int reg, int asz, uint32_t *valp)
390 {
391 	int idx = CMI_PCII_HASHIDX(bus, dev, func, reg);
392 	struct cmi_pcii_bkt *hbp = &pciihash[idx];
393 	struct cmi_pcii_hashent *hep;
394 
395 	if (!mutex_tryenter(&hbp->pciib_lock))
396 		return (0);
397 
398 	for (hep = hbp->pciib_head; hep != NULL; hep = hep->pcii_next) {
399 		if (CMI_PCII_MATCH(hep, bus, dev, func, reg, asz)) {
400 			*valp = hep->pcii_val;
401 			break;
402 		}
403 	}
404 
405 	mutex_exit(&hbp->pciib_lock);
406 
407 	return (hep != NULL);
408 }
409 
410 static void
411 pcii_rment(int bus, int dev, int func, int reg, int asz)
412 {
413 	int idx = CMI_PCII_HASHIDX(bus, dev, func, reg);
414 	struct cmi_pcii_bkt *hbp = &pciihash[idx];
415 	struct cmi_pcii_hashent *hep;
416 
417 	mutex_enter(&hbp->pciib_lock);
418 
419 	for (hep = hbp->pciib_head; hep != NULL; hep = hep->pcii_next) {
420 		if (CMI_PCII_MATCH(hep, bus, dev, func, reg, asz)) {
421 			if (hep->pcii_prev != NULL)
422 				hep->pcii_prev->pcii_next = hep->pcii_next;
423 
424 			if (hep->pcii_next != NULL)
425 				hep->pcii_next->pcii_prev = hep->pcii_prev;
426 
427 			if (hbp->pciib_head == hep)
428 				hbp->pciib_head = hep->pcii_next;
429 
430 			kmem_free(hep, sizeof (*hep));
431 			break;
432 		}
433 	}
434 
435 	mutex_exit(&hbp->pciib_lock);
436 }
437 
438 /*
439  *	 =======================================================
440  *	|	Native methods					|
441  *	|	--------------					|
442  *	|							|
443  *	| These are used when we are running native on bare-	|
444  *	| metal, or simply don't know any better.		|
445  *	---------------------------------------------------------
446  */
447 
448 static uint_t
449 ntv_vendor(cmi_hdl_impl_t *hdl)
450 {
451 	return (cpuid_getvendor((cpu_t *)hdl->cmih_hdlpriv));
452 }
453 
454 static const char *
455 ntv_vendorstr(cmi_hdl_impl_t *hdl)
456 {
457 	return (cpuid_getvendorstr((cpu_t *)hdl->cmih_hdlpriv));
458 }
459 
460 static uint_t
461 ntv_family(cmi_hdl_impl_t *hdl)
462 {
463 	return (cpuid_getfamily((cpu_t *)hdl->cmih_hdlpriv));
464 }
465 
466 static uint_t
467 ntv_model(cmi_hdl_impl_t *hdl)
468 {
469 	return (cpuid_getmodel((cpu_t *)hdl->cmih_hdlpriv));
470 }
471 
472 static uint_t
473 ntv_stepping(cmi_hdl_impl_t *hdl)
474 {
475 	return (cpuid_getstep((cpu_t *)hdl->cmih_hdlpriv));
476 }
477 
478 static uint_t
479 ntv_chipid(cmi_hdl_impl_t *hdl)
480 {
481 	return (hdl->cmih_chipid);
482 
483 }
484 
485 static uint_t
486 ntv_coreid(cmi_hdl_impl_t *hdl)
487 {
488 	return (hdl->cmih_coreid);
489 }
490 
491 static uint_t
492 ntv_strandid(cmi_hdl_impl_t *hdl)
493 {
494 	return (hdl->cmih_strandid);
495 }
496 
497 static uint32_t
498 ntv_chiprev(cmi_hdl_impl_t *hdl)
499 {
500 	return (cpuid_getchiprev((cpu_t *)hdl->cmih_hdlpriv));
501 }
502 
503 static const char *
504 ntv_chiprevstr(cmi_hdl_impl_t *hdl)
505 {
506 	return (cpuid_getchiprevstr((cpu_t *)hdl->cmih_hdlpriv));
507 }
508 
509 static uint32_t
510 ntv_getsockettype(cmi_hdl_impl_t *hdl)
511 {
512 	return (cpuid_getsockettype((cpu_t *)hdl->cmih_hdlpriv));
513 }
514 
515 /*ARGSUSED*/
516 static int
517 ntv_getcr4_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
518 {
519 	ulong_t *dest = (ulong_t *)arg1;
520 	cmi_errno_t *rcp = (cmi_errno_t *)arg3;
521 
522 	*dest = getcr4();
523 	*rcp = CMI_SUCCESS;
524 
525 	return (0);
526 }
527 
528 static ulong_t
529 ntv_getcr4(cmi_hdl_impl_t *hdl)
530 {
531 	cpu_t *cp = (cpu_t *)hdl->cmih_hdlpriv;
532 	ulong_t val;
533 
534 	(void) call_func_ntv(cp->cpu_id, ntv_getcr4_xc, (xc_arg_t)&val, NULL);
535 
536 	return (val);
537 }
538 
539 /*ARGSUSED*/
540 static int
541 ntv_setcr4_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
542 {
543 	ulong_t val = (ulong_t)arg1;
544 	cmi_errno_t *rcp = (cmi_errno_t *)arg3;
545 
546 	setcr4(val);
547 	*rcp = CMI_SUCCESS;
548 
549 	return (0);
550 }
551 
552 static void
553 ntv_setcr4(cmi_hdl_impl_t *hdl, ulong_t val)
554 {
555 	cpu_t *cp = (cpu_t *)hdl->cmih_hdlpriv;
556 
557 	(void) call_func_ntv(cp->cpu_id, ntv_setcr4_xc, (xc_arg_t)val, NULL);
558 }
559 
560 /*ARGSUSED*/
561 static int
562 ntv_rdmsr_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
563 {
564 	uint_t msr = (uint_t)arg1;
565 	uint64_t *valp = (uint64_t *)arg2;
566 	cmi_errno_t *rcp = (cmi_errno_t *)arg3;
567 
568 	on_trap_data_t otd;
569 
570 	if (on_trap(&otd, OT_DATA_ACCESS) == 0) {
571 		if (checked_rdmsr(msr, valp) == 0)
572 			*rcp = CMI_SUCCESS;
573 		else
574 			*rcp = CMIERR_NOTSUP;
575 	} else {
576 		*rcp = CMIERR_MSRGPF;
577 	}
578 	no_trap();
579 
580 	return (0);
581 }
582 
583 static cmi_errno_t
584 ntv_rdmsr(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t *valp)
585 {
586 	cpu_t *cp = (cpu_t *)hdl->cmih_hdlpriv;
587 
588 	return (call_func_ntv(cp->cpu_id, ntv_rdmsr_xc,
589 	    (xc_arg_t)msr, (xc_arg_t)valp));
590 }
591 
592 /*ARGSUSED*/
593 static int
594 ntv_wrmsr_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
595 {
596 	uint_t msr = (uint_t)arg1;
597 	uint64_t val = *((uint64_t *)arg2);
598 	cmi_errno_t *rcp = (cmi_errno_t *)arg3;
599 	on_trap_data_t otd;
600 
601 	if (on_trap(&otd, OT_DATA_ACCESS) == 0) {
602 		if (checked_wrmsr(msr, val) == 0)
603 			*rcp = CMI_SUCCESS;
604 		else
605 			*rcp = CMIERR_NOTSUP;
606 	} else {
607 		*rcp = CMIERR_MSRGPF;
608 	}
609 	no_trap();
610 
611 	return (0);
612 
613 }
614 
615 static cmi_errno_t
616 ntv_wrmsr(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t val)
617 {
618 	cpu_t *cp = (cpu_t *)hdl->cmih_hdlpriv;
619 
620 	return (call_func_ntv(cp->cpu_id, ntv_wrmsr_xc,
621 	    (xc_arg_t)msr, (xc_arg_t)&val));
622 }
623 
624 /*ARGSUSED*/
625 static int
626 ntv_mcheck_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
627 {
628 	cmi_errno_t *rcp = (cmi_errno_t *)arg3;
629 
630 	int18();
631 	*rcp = CMI_SUCCESS;
632 
633 	return (0);
634 }
635 
636 static void
637 ntv_mcheck(cmi_hdl_impl_t *hdl)
638 {
639 	cpu_t *cp = (cpu_t *)hdl->cmih_hdlpriv;
640 
641 	(void) call_func_ntv(cp->cpu_id, ntv_mcheck_xc, NULL, NULL);
642 }
643 
644 /*
645  * Ops structure for handle operations.
646  */
647 struct cmi_hdl_ops {
648 	uint_t (*cmio_vendor)(cmi_hdl_impl_t *);
649 	const char *(*cmio_vendorstr)(cmi_hdl_impl_t *);
650 	uint_t (*cmio_family)(cmi_hdl_impl_t *);
651 	uint_t (*cmio_model)(cmi_hdl_impl_t *);
652 	uint_t (*cmio_stepping)(cmi_hdl_impl_t *);
653 	uint_t (*cmio_chipid)(cmi_hdl_impl_t *);
654 	uint_t (*cmio_coreid)(cmi_hdl_impl_t *);
655 	uint_t (*cmio_strandid)(cmi_hdl_impl_t *);
656 	uint32_t (*cmio_chiprev)(cmi_hdl_impl_t *);
657 	const char *(*cmio_chiprevstr)(cmi_hdl_impl_t *);
658 	uint32_t (*cmio_getsockettype)(cmi_hdl_impl_t *);
659 	ulong_t (*cmio_getcr4)(cmi_hdl_impl_t *);
660 	void (*cmio_setcr4)(cmi_hdl_impl_t *, ulong_t);
661 	cmi_errno_t (*cmio_rdmsr)(cmi_hdl_impl_t *, uint_t, uint64_t *);
662 	cmi_errno_t (*cmio_wrmsr)(cmi_hdl_impl_t *, uint_t, uint64_t);
663 	void (*cmio_mcheck)(cmi_hdl_impl_t *);
664 } cmi_hdl_ops[] = {
665 	/*
666 	 * CMI_HDL_NATIVE - ops when apparently running on bare-metal
667 	 */
668 	{
669 		ntv_vendor,
670 		ntv_vendorstr,
671 		ntv_family,
672 		ntv_model,
673 		ntv_stepping,
674 		ntv_chipid,
675 		ntv_coreid,
676 		ntv_strandid,
677 		ntv_chiprev,
678 		ntv_chiprevstr,
679 		ntv_getsockettype,
680 		ntv_getcr4,
681 		ntv_setcr4,
682 		ntv_rdmsr,
683 		ntv_wrmsr,
684 		ntv_mcheck
685 	},
686 };
687 
688 #ifndef __xpv
689 static void *
690 cpu_search(enum cmi_hdl_class class, uint_t chipid, uint_t coreid,
691     uint_t strandid)
692 {
693 	switch (class) {
694 	case CMI_HDL_NATIVE: {
695 		cpu_t *cp, *startcp;
696 
697 		kpreempt_disable();
698 		cp = startcp = CPU;
699 		do {
700 			if (cmi_ntv_hwchipid(cp) == chipid &&
701 			    cmi_ntv_hwcoreid(cp) == coreid &&
702 			    cmi_ntv_hwstrandid(cp) == strandid) {
703 				kpreempt_enable();
704 				return ((void *)cp);
705 			}
706 
707 			cp = cp->cpu_next;
708 		} while (cp != startcp);
709 		kpreempt_enable();
710 		return (NULL);
711 	}
712 
713 	default:
714 		return (NULL);
715 	}
716 }
717 #endif
718 
719 cmi_hdl_t
720 cmi_hdl_create(enum cmi_hdl_class class, uint_t chipid, uint_t coreid,
721     uint_t strandid)
722 {
723 	cmi_hdl_impl_t *hdl;
724 	void *priv = NULL;
725 	int idx;
726 
727 	if (chipid > CMI_MAX_CHIPS - 1 || coreid > CMI_MAX_CORES_PER_CHIP - 1 ||
728 	    strandid > CMI_MAX_STRANDS_PER_CORE - 1)
729 		return (NULL);
730 
731 #ifndef __xpv
732 	if ((priv = cpu_search(class, chipid, coreid, strandid)) == NULL)
733 		return (NULL);
734 #endif
735 
736 	hdl = kmem_zalloc(sizeof (*hdl), KM_SLEEP);
737 
738 	hdl->cmih_class = class;
739 	hdl->cmih_ops = &cmi_hdl_ops[class];
740 	hdl->cmih_chipid = chipid;
741 	hdl->cmih_coreid = coreid;
742 	hdl->cmih_strandid = strandid;
743 	hdl->cmih_hdlpriv = priv;
744 	hdl->cmih_msrsrc = CMI_MSR_FLAG_RD_HWOK | CMI_MSR_FLAG_RD_INTERPOSEOK |
745 	    CMI_MSR_FLAG_WR_HWOK | CMI_MSR_FLAG_WR_INTERPOSEOK;
746 
747 	ASSERT(hdl->cmih_cmi == NULL && hdl->cmih_cmidata == NULL);
748 
749 	if (cmi_hdl_hash == NULL) {
750 		size_t sz = CMI_HDL_HASHSZ * sizeof (struct cmi_hdl_hashent);
751 		void *hash = kmem_zalloc(sz, KM_SLEEP);
752 
753 		if (atomic_cas_ptr(&cmi_hdl_hash, NULL, hash) != NULL)
754 			kmem_free(hash, sz); /* someone beat us */
755 	}
756 
757 	idx = CMI_HDL_HASHIDX(chipid, coreid, strandid);
758 	ASSERT(cmi_hdl_hash[idx].cmhe_refcnt == 0 &&
759 	    cmi_hdl_hash[idx].cmhe_hdlp == NULL);
760 
761 	/*
762 	 * Once we store a nonzero reference count others can find this
763 	 * handle via cmi_hdl_lookup etc.  This initial hold on the handle
764 	 * is to be dropped only if some other part of cmi initialization
765 	 * fails or, if it succeeds, at later cpu deconfigure.  Note the
766 	 * the module private data we hold in cmih_cmi and cmih_cmidata
767 	 * is still NULL at this point (the caller will fill it with
768 	 * cmi_hdl_setcmi if it initializes) so consumers of handles
769 	 * should always be ready for that possibility.
770 	 */
771 	cmi_hdl_hash[idx].cmhe_hdlp = hdl;
772 	hdl->cmih_refcntp = &cmi_hdl_hash[idx].cmhe_refcnt;
773 	cmi_hdl_hash[idx].cmhe_refcnt = 1;
774 
775 	return ((cmi_hdl_t)hdl);
776 }
777 
778 void
779 cmi_hdl_hold(cmi_hdl_t ophdl)
780 {
781 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
782 
783 	ASSERT(*hdl->cmih_refcntp != 0); /* must not be the initial hold */
784 
785 	atomic_inc_32(hdl->cmih_refcntp);
786 }
787 
788 static int
789 cmi_hdl_canref(int hashidx)
790 {
791 	volatile uint32_t *refcntp;
792 	uint32_t refcnt;
793 
794 	if (cmi_hdl_hash == NULL)
795 		return (0);
796 
797 	refcntp = &cmi_hdl_hash[hashidx].cmhe_refcnt;
798 	refcnt = *refcntp;
799 
800 	if (refcnt == 0) {
801 		/*
802 		 * Associated object never existed, is being destroyed,
803 		 * or has been destroyed.
804 		 */
805 		return (0);
806 	}
807 
808 	/*
809 	 * We cannot use atomic increment here because once the reference
810 	 * count reaches zero it must never be bumped up again.
811 	 */
812 	while (refcnt != 0) {
813 		if (atomic_cas_32(refcntp, refcnt, refcnt + 1) == refcnt)
814 			return (1);
815 		refcnt = *refcntp;
816 	}
817 
818 	/*
819 	 * Somebody dropped the reference count to 0 after our initial
820 	 * check.
821 	 */
822 	return (0);
823 }
824 
825 
826 void
827 cmi_hdl_rele(cmi_hdl_t ophdl)
828 {
829 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
830 	int idx;
831 
832 	ASSERT(*hdl->cmih_refcntp > 0);
833 
834 	if (atomic_dec_32_nv(hdl->cmih_refcntp) > 0)
835 		return;
836 
837 	idx = CMI_HDL_HASHIDX(hdl->cmih_chipid, hdl->cmih_coreid,
838 	    hdl->cmih_strandid);
839 	cmi_hdl_hash[idx].cmhe_hdlp = NULL;
840 
841 	kmem_free(hdl, sizeof (*hdl));
842 }
843 
844 void
845 cmi_hdl_setspecific(cmi_hdl_t ophdl, void *arg)
846 {
847 	IMPLHDL(ophdl)->cmih_spec = arg;
848 }
849 
850 void *
851 cmi_hdl_getspecific(cmi_hdl_t ophdl)
852 {
853 	return (IMPLHDL(ophdl)->cmih_spec);
854 }
855 
856 void
857 cmi_hdl_setmc(cmi_hdl_t ophdl, const struct cmi_mc_ops *mcops, void *mcdata)
858 {
859 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
860 
861 	ASSERT(hdl->cmih_mcops == NULL && hdl->cmih_mcdata == NULL);
862 	hdl->cmih_mcops = mcops;
863 	hdl->cmih_mcdata = mcdata;
864 }
865 
866 const struct cmi_mc_ops *
867 cmi_hdl_getmcops(cmi_hdl_t ophdl)
868 {
869 	return (IMPLHDL(ophdl)->cmih_mcops);
870 }
871 
872 void *
873 cmi_hdl_getmcdata(cmi_hdl_t ophdl)
874 {
875 	return (IMPLHDL(ophdl)->cmih_mcdata);
876 }
877 
878 cmi_hdl_t
879 cmi_hdl_lookup(enum cmi_hdl_class class, uint_t chipid, uint_t coreid,
880     uint_t strandid)
881 {
882 	int idx = CMI_HDL_HASHIDX(chipid, coreid, strandid);
883 
884 	if (!cmi_hdl_canref(idx))
885 		return (NULL);
886 
887 	if (cmi_hdl_hash[idx].cmhe_hdlp->cmih_class != class) {
888 		cmi_hdl_rele((cmi_hdl_t)cmi_hdl_hash[idx].cmhe_hdlp);
889 		return (NULL);
890 	}
891 
892 	return ((cmi_hdl_t)cmi_hdl_hash[idx].cmhe_hdlp);
893 }
894 
895 cmi_hdl_t
896 cmi_hdl_any(void)
897 {
898 	int i;
899 
900 	for (i = 0; i < CMI_HDL_HASHSZ; i++) {
901 		if (cmi_hdl_canref(i))
902 			return ((cmi_hdl_t)cmi_hdl_hash[i].cmhe_hdlp);
903 	}
904 
905 	return (NULL);
906 }
907 
908 void
909 cmi_hdl_walk(int (*cbfunc)(cmi_hdl_t, void *, void *, void *),
910     void *arg1, void *arg2, void *arg3)
911 {
912 	int i;
913 
914 	for (i = 0; i < CMI_HDL_HASHSZ; i++) {
915 		if (cmi_hdl_canref(i)) {
916 			cmi_hdl_impl_t *hdl = cmi_hdl_hash[i].cmhe_hdlp;
917 
918 			if ((*cbfunc)((cmi_hdl_t)hdl, arg1, arg2, arg3) ==
919 			    CMI_HDL_WALK_DONE) {
920 				cmi_hdl_rele((cmi_hdl_t)hdl);
921 				break;
922 			}
923 			cmi_hdl_rele((cmi_hdl_t)hdl);
924 		}
925 	}
926 }
927 
928 void
929 cmi_hdl_setcmi(cmi_hdl_t ophdl, void *cmi, void *cmidata)
930 {
931 	IMPLHDL(ophdl)->cmih_cmidata = cmidata;
932 	IMPLHDL(ophdl)->cmih_cmi = cmi;
933 }
934 
935 void *
936 cmi_hdl_getcmi(cmi_hdl_t ophdl)
937 {
938 	return (IMPLHDL(ophdl)->cmih_cmi);
939 }
940 
941 void *
942 cmi_hdl_getcmidata(cmi_hdl_t ophdl)
943 {
944 	return (IMPLHDL(ophdl)->cmih_cmidata);
945 }
946 
947 enum cmi_hdl_class
948 cmi_hdl_class(cmi_hdl_t ophdl)
949 {
950 	return (IMPLHDL(ophdl)->cmih_class);
951 }
952 
953 #define	CMI_HDL_OPFUNC(what, type)				\
954 	type							\
955 	cmi_hdl_##what(cmi_hdl_t ophdl)				\
956 	{							\
957 		return (IMPLHDL(ophdl)->cmih_ops->		\
958 		    cmio_##what(IMPLHDL(ophdl)));		\
959 	}
960 
961 CMI_HDL_OPFUNC(vendor, uint_t)
962 CMI_HDL_OPFUNC(vendorstr, const char *)
963 CMI_HDL_OPFUNC(family, uint_t)
964 CMI_HDL_OPFUNC(model, uint_t)
965 CMI_HDL_OPFUNC(stepping, uint_t)
966 CMI_HDL_OPFUNC(chipid, uint_t)
967 CMI_HDL_OPFUNC(coreid, uint_t)
968 CMI_HDL_OPFUNC(strandid, uint_t)
969 CMI_HDL_OPFUNC(chiprev, uint32_t)
970 CMI_HDL_OPFUNC(chiprevstr, const char *)
971 CMI_HDL_OPFUNC(getsockettype, uint32_t)
972 
973 void
974 cmi_hdl_mcheck(cmi_hdl_t ophdl)
975 {
976 	IMPLHDL(ophdl)->cmih_ops->cmio_mcheck(IMPLHDL(ophdl));
977 }
978 
979 #ifndef	__xpv
980 /*
981  * Return hardware chip instance; cpuid_get_chipid provides this directly.
982  */
983 uint_t
984 cmi_ntv_hwchipid(cpu_t *cp)
985 {
986 	return (cpuid_get_chipid(cp));
987 }
988 
989 /*
990  * Return core instance within a single chip.  cpuid_get_coreid numbers cores
991  * across all chips with the same number of cores on each chip and counting
992  * all cores of chip N before moving on to count the cores of chip N + 1.
993  */
994 uint_t
995 cmi_ntv_hwcoreid(cpu_t *cp)
996 {
997 	return (cpuid_get_coreid(cp) % cpuid_get_ncore_per_chip(cp));
998 }
999 
1000 /*
1001  * Return strand number within a single core.  cpuid_get_clogid numbers
1002  * all execution units (strands, or cores in unstranded models) sequentially
1003  * within a single chip.
1004  */
1005 uint_t
1006 cmi_ntv_hwstrandid(cpu_t *cp)
1007 {
1008 	int strands_per_core = cpuid_get_ncpu_per_chip(cp) /
1009 	    cpuid_get_ncore_per_chip(cp);
1010 
1011 	return (cpuid_get_clogid(cp) % strands_per_core);
1012 }
1013 #endif	/* __xpv */
1014 
1015 void
1016 cmi_hdlconf_rdmsr_nohw(cmi_hdl_t ophdl)
1017 {
1018 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1019 
1020 	hdl->cmih_msrsrc &= ~CMI_MSR_FLAG_RD_HWOK;
1021 }
1022 
1023 void
1024 cmi_hdlconf_wrmsr_nohw(cmi_hdl_t ophdl)
1025 {
1026 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1027 
1028 	hdl->cmih_msrsrc &= ~CMI_MSR_FLAG_WR_HWOK;
1029 }
1030 
1031 cmi_errno_t
1032 cmi_hdl_rdmsr(cmi_hdl_t ophdl, uint_t msr, uint64_t *valp)
1033 {
1034 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1035 
1036 	/*
1037 	 * Regardless of the handle class, we first check for am
1038 	 * interposed value.  In the xVM case you probably want to
1039 	 * place interposed values within the hypervisor itself, but
1040 	 * we still allow interposing them in dom0 for test and bringup
1041 	 * purposes.
1042 	 */
1043 	if ((hdl->cmih_msrsrc & CMI_MSR_FLAG_RD_INTERPOSEOK) &&
1044 	    msri_lookup(hdl, msr, valp))
1045 		return (CMI_SUCCESS);
1046 
1047 	if (!(hdl->cmih_msrsrc & CMI_MSR_FLAG_RD_HWOK))
1048 		return (CMIERR_INTERPOSE);
1049 
1050 	return (hdl->cmih_ops->cmio_rdmsr(hdl, msr, valp));
1051 }
1052 
1053 cmi_errno_t
1054 cmi_hdl_wrmsr(cmi_hdl_t ophdl, uint_t msr, uint64_t val)
1055 {
1056 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1057 
1058 	/* Invalidate any interposed value */
1059 	msri_rment(hdl, msr);
1060 
1061 	if (!(hdl->cmih_msrsrc & CMI_MSR_FLAG_WR_HWOK))
1062 		return (CMI_SUCCESS);
1063 
1064 	return (hdl->cmih_ops->cmio_wrmsr(hdl, msr, val));
1065 }
1066 
1067 void
1068 cmi_hdl_enable_mce(cmi_hdl_t ophdl)
1069 {
1070 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1071 	ulong_t cr4 = hdl->cmih_ops->cmio_getcr4(hdl);
1072 
1073 	hdl->cmih_ops->cmio_setcr4(hdl, cr4 | CR4_MCE);
1074 }
1075 
1076 void
1077 cmi_hdl_msrinterpose(cmi_hdl_t ophdl, cmi_mca_regs_t *regs, uint_t nregs)
1078 {
1079 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1080 	int i;
1081 
1082 	for (i = 0; i < nregs; i++)
1083 		msri_addent(hdl, regs++);
1084 }
1085 
1086 void
1087 cmi_pcird_nohw(void)
1088 {
1089 	cmi_pcicfg_flags &= ~CMI_PCICFG_FLAG_RD_HWOK;
1090 }
1091 
1092 void
1093 cmi_pciwr_nohw(void)
1094 {
1095 	cmi_pcicfg_flags &= ~CMI_PCICFG_FLAG_WR_HWOK;
1096 }
1097 
1098 static uint32_t
1099 cmi_pci_get_cmn(int bus, int dev, int func, int reg, int asz,
1100     int *interpose, ddi_acc_handle_t hdl)
1101 {
1102 	uint32_t val;
1103 
1104 	if (cmi_pcicfg_flags & CMI_PCICFG_FLAG_RD_INTERPOSEOK &&
1105 	    pcii_lookup(bus, dev, func, reg, asz, &val)) {
1106 		if (interpose)
1107 			*interpose = 1;
1108 		return (val);
1109 	}
1110 	if (interpose)
1111 		*interpose = 0;
1112 
1113 	if (!(cmi_pcicfg_flags & CMI_PCICFG_FLAG_RD_HWOK))
1114 		return (0);
1115 
1116 	switch (asz) {
1117 	case 1:
1118 		if (hdl)
1119 			val = pci_config_get8(hdl, (off_t)reg);
1120 		else
1121 			val = (*pci_getb_func)(bus, dev, func, reg);
1122 		break;
1123 	case 2:
1124 		if (hdl)
1125 			val = pci_config_get16(hdl, (off_t)reg);
1126 		else
1127 			val = (*pci_getw_func)(bus, dev, func, reg);
1128 		break;
1129 	case 4:
1130 		if (hdl)
1131 			val = pci_config_get32(hdl, (off_t)reg);
1132 		else
1133 			val = (*pci_getl_func)(bus, dev, func, reg);
1134 		break;
1135 	default:
1136 		val = 0;
1137 	}
1138 	return (val);
1139 }
1140 
1141 uint8_t
1142 cmi_pci_getb(int bus, int dev, int func, int reg, int *interpose,
1143     ddi_acc_handle_t hdl)
1144 {
1145 	return ((uint8_t)cmi_pci_get_cmn(bus, dev, func, reg, 1, interpose,
1146 	    hdl));
1147 }
1148 
1149 uint16_t
1150 cmi_pci_getw(int bus, int dev, int func, int reg, int *interpose,
1151     ddi_acc_handle_t hdl)
1152 {
1153 	return ((uint16_t)cmi_pci_get_cmn(bus, dev, func, reg, 2, interpose,
1154 	    hdl));
1155 }
1156 
1157 uint32_t
1158 cmi_pci_getl(int bus, int dev, int func, int reg, int *interpose,
1159     ddi_acc_handle_t hdl)
1160 {
1161 	return (cmi_pci_get_cmn(bus, dev, func, reg, 4, interpose, hdl));
1162 }
1163 
1164 void
1165 cmi_pci_interposeb(int bus, int dev, int func, int reg, uint8_t val)
1166 {
1167 	pcii_addent(bus, dev, func, reg, val, 1);
1168 }
1169 
1170 void
1171 cmi_pci_interposew(int bus, int dev, int func, int reg, uint16_t val)
1172 {
1173 	pcii_addent(bus, dev, func, reg, val, 2);
1174 }
1175 
1176 void
1177 cmi_pci_interposel(int bus, int dev, int func, int reg, uint32_t val)
1178 {
1179 	pcii_addent(bus, dev, func, reg, val, 4);
1180 }
1181 
1182 static void
1183 cmi_pci_put_cmn(int bus, int dev, int func, int reg, int asz,
1184     ddi_acc_handle_t hdl, uint32_t val)
1185 {
1186 	/*
1187 	 * If there is an interposed value for this register invalidate it.
1188 	 */
1189 	pcii_rment(bus, dev, func, reg, asz);
1190 
1191 	if (!(cmi_pcicfg_flags & CMI_PCICFG_FLAG_WR_HWOK))
1192 		return;
1193 
1194 	switch (asz) {
1195 	case 1:
1196 		if (hdl)
1197 			pci_config_put8(hdl, (off_t)reg, (uint8_t)val);
1198 		else
1199 			(*pci_putb_func)(bus, dev, func, reg, (uint8_t)val);
1200 		break;
1201 
1202 	case 2:
1203 		if (hdl)
1204 			pci_config_put16(hdl, (off_t)reg, (uint16_t)val);
1205 		else
1206 			(*pci_putw_func)(bus, dev, func, reg, (uint16_t)val);
1207 		break;
1208 
1209 	case 4:
1210 		if (hdl)
1211 			pci_config_put32(hdl, (off_t)reg, val);
1212 		else
1213 			(*pci_putl_func)(bus, dev, func, reg, val);
1214 		break;
1215 
1216 	default:
1217 		break;
1218 	}
1219 }
1220 
1221 extern void
1222 cmi_pci_putb(int bus, int dev, int func, int reg, ddi_acc_handle_t hdl,
1223     uint8_t val)
1224 {
1225 	cmi_pci_put_cmn(bus, dev, func, reg, 1, hdl, val);
1226 }
1227 
1228 extern void
1229 cmi_pci_putw(int bus, int dev, int func, int reg, ddi_acc_handle_t hdl,
1230     uint16_t val)
1231 {
1232 	cmi_pci_put_cmn(bus, dev, func, reg, 2, hdl, val);
1233 }
1234 
1235 extern void
1236 cmi_pci_putl(int bus, int dev, int func, int reg, ddi_acc_handle_t hdl,
1237     uint32_t val)
1238 {
1239 	cmi_pci_put_cmn(bus, dev, func, reg, 4, hdl, val);
1240 }
1241