xref: /titanic_44/usr/src/uts/i86pc/os/cmi_hw.c (revision 29493bd8e037cbaea9095b34172305abb589cb6b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * CPU Module Interface - hardware abstraction.
31  */
32 
33 #include <sys/types.h>
34 #include <sys/cpu_module.h>
35 #include <sys/kmem.h>
36 #include <sys/x86_archext.h>
37 #include <sys/cpuvar.h>
38 #include <sys/ksynch.h>
39 #include <sys/x_call.h>
40 #include <sys/pghw.h>
41 #include <sys/pci_cfgspace.h>
42 #include <sys/archsystm.h>
43 #include <sys/ontrap.h>
44 #include <sys/controlregs.h>
45 #include <sys/sunddi.h>
46 
47 /*
48  * Outside of this file consumers use the opaque cmi_hdl_t.  This
49  * definition is duplicated in the generic_cpu mdb module, so keep
50  * them in-sync when making changes.
51  */
52 typedef struct cmi_hdl_impl {
53 	enum cmi_hdl_class cmih_class;		/* Handle nature */
54 	struct cmi_hdl_ops *cmih_ops;		/* Operations vector */
55 	uint_t cmih_chipid;			/* Chipid of cpu resource */
56 	uint_t cmih_coreid;			/* Core within die */
57 	uint_t cmih_strandid;			/* Thread within core */
58 	volatile uint32_t *cmih_refcntp;	/* Reference count pointer */
59 	uint64_t cmih_msrsrc;			/* MSR data source flags */
60 	void *cmih_hdlpriv;			/* cmi_hw.c private data */
61 	void *cmih_spec;			/* cmi_hdl_{set,get}_specific */
62 	void *cmih_cmi;				/* cpu mod control structure */
63 	void *cmih_cmidata;			/* cpu mod private data */
64 	const struct cmi_mc_ops *cmih_mcops;	/* Memory-controller ops */
65 	void *cmih_mcdata;			/* Memory-controller data */
66 } cmi_hdl_impl_t;
67 
68 #define	IMPLHDL(ophdl)	((cmi_hdl_impl_t *)ophdl)
69 
70 /*
71  * Handles are looked up from contexts such as polling, injection etc
72  * where the context is reasonably well defined (although a poller could
73  * interrupt any old thread holding any old lock).  They are also looked
74  * up by machine check handlers, which may strike at inconvenient times
75  * such as during handle initialization or destruction or during handle
76  * lookup (which the #MC handler itself will also have to perform).
77  *
78  * So keeping handles in a linked list makes locking difficult when we
79  * consider #MC handlers.  Our solution is to have an array indexed
80  * by that which uniquely identifies a handle - chip/core/strand id -
81  * with each array member a structure including a pointer to a handle
82  * structure for the resource, and a reference count for the handle.
83  * Reference counts are modified atomically.  The public cmi_hdl_hold
84  * always succeeds because this can only be used after handle creation
85  * and before the call to destruct, so the hold count it already at least one.
86  * In other functions that lookup a handle (cmi_hdl_lookup, cmi_hdl_any)
87  * we must be certain that the count has not already decrmented to zero
88  * before applying our hold.
89  *
90  * This array is allocated when first we want to populate an entry.
91  * When allocated it is maximal - ideally we should scale to the
92  * actual number of chips, cores per chip and strand per core but
93  * that info is not readily available if we are virtualized so
94  * for now we stick with the dumb approach.
95  */
96 #define	CMI_MAX_CHIPS			16
97 #define	CMI_MAX_CORES_PER_CHIP		4
98 #define	CMI_MAX_STRANDS_PER_CORE	2
99 #define	CMI_HDL_HASHSZ (CMI_MAX_CHIPS * CMI_MAX_CORES_PER_CHIP * \
100     CMI_MAX_STRANDS_PER_CORE)
101 
102 struct cmi_hdl_hashent {
103 	volatile uint32_t cmhe_refcnt;
104 	cmi_hdl_impl_t *cmhe_hdlp;
105 };
106 
107 static struct cmi_hdl_hashent *cmi_hdl_hash;
108 
109 #define	CMI_HDL_HASHIDX(chipid, coreid, strandid) \
110 	((chipid) * CMI_MAX_CHIPS + (coreid) * CMI_MAX_CORES_PER_CHIP + \
111 	(strandid))
112 
113 /*
114  * Controls where we will source PCI config space data.
115  */
116 #define	CMI_PCICFG_FLAG_RD_HWOK		0x0001
117 #define	CMI_PCICFG_FLAG_RD_INTERPOSEOK	0X0002
118 #define	CMI_PCICFG_FLAG_WR_HWOK		0x0004
119 #define	CMI_PCICFG_FLAG_WR_INTERPOSEOK	0X0008
120 
121 static uint64_t cmi_pcicfg_flags =
122     CMI_PCICFG_FLAG_RD_HWOK | CMI_PCICFG_FLAG_RD_INTERPOSEOK |
123     CMI_PCICFG_FLAG_WR_HWOK | CMI_PCICFG_FLAG_WR_INTERPOSEOK;
124 
125 /*
126  * The flags for individual cpus are kept in their per-cpu handle cmih_msrsrc
127  */
128 #define	CMI_MSR_FLAG_RD_HWOK		0x0001
129 #define	CMI_MSR_FLAG_RD_INTERPOSEOK	0x0002
130 #define	CMI_MSR_FLAG_WR_HWOK		0x0004
131 #define	CMI_MSR_FLAG_WR_INTERPOSEOK	0x0008
132 
133 int cmi_call_func_ntv_tries = 3;
134 
135 static cmi_errno_t
136 call_func_ntv(int cpuid, xc_func_t func, xc_arg_t arg1, xc_arg_t arg2)
137 {
138 	cmi_errno_t rc = -1;
139 	int i;
140 
141 	kpreempt_disable();
142 
143 	if (CPU->cpu_id == cpuid) {
144 		(*func)(arg1, arg2, (xc_arg_t)&rc);
145 	} else {
146 		/*
147 		 * This should not happen for a #MC trap or a poll, so
148 		 * this is likely an error injection or similar.
149 		 * We will try to cross call with xc_trycall - we
150 		 * can't guarantee success with xc_call because
151 		 * the interrupt code in the case of a #MC may
152 		 * already hold the xc mutex.
153 		 */
154 		for (i = 0; i < cmi_call_func_ntv_tries; i++) {
155 			cpuset_t cpus;
156 
157 			CPUSET_ONLY(cpus, cpuid);
158 			xc_trycall(arg1, arg2, (xc_arg_t)&rc, cpus, func);
159 			if (rc != -1)
160 				break;
161 
162 			DELAY(1);
163 		}
164 	}
165 
166 	kpreempt_enable();
167 
168 	return (rc != -1 ? rc : CMIERR_DEADLOCK);
169 }
170 
171 /*
172  *	 =======================================================
173  *	|	MSR Interposition				|
174  *	|	-----------------				|
175  *	|							|
176  *	 -------------------------------------------------------
177  */
178 
179 #define	CMI_MSRI_HASHSZ		16
180 #define	CMI_MSRI_HASHIDX(hdl, msr) \
181 	(((uintptr_t)(hdl) >> 3 + (msr)) % (CMI_MSRI_HASHSZ - 1))
182 
183 struct cmi_msri_bkt {
184 	kmutex_t msrib_lock;
185 	struct cmi_msri_hashent *msrib_head;
186 };
187 
188 struct cmi_msri_hashent {
189 	struct cmi_msri_hashent *msrie_next;
190 	struct cmi_msri_hashent *msrie_prev;
191 	cmi_hdl_impl_t *msrie_hdl;
192 	uint_t msrie_msrnum;
193 	uint64_t msrie_msrval;
194 };
195 
196 #define	CMI_MSRI_MATCH(ent, hdl, req_msr) \
197 	((ent)->msrie_hdl == (hdl) && (ent)->msrie_msrnum == (req_msr))
198 
199 static struct cmi_msri_bkt msrihash[CMI_MSRI_HASHSZ];
200 
201 static void
202 msri_addent(cmi_hdl_impl_t *hdl, cmi_mca_regs_t *regp)
203 {
204 	int idx = CMI_MSRI_HASHIDX(hdl, regp->cmr_msrnum);
205 	struct cmi_msri_bkt *hbp = &msrihash[idx];
206 	struct cmi_msri_hashent *hep;
207 
208 	mutex_enter(&hbp->msrib_lock);
209 
210 	for (hep = hbp->msrib_head; hep != NULL; hep = hep->msrie_next) {
211 		if (CMI_MSRI_MATCH(hep, hdl, regp->cmr_msrnum))
212 			break;
213 	}
214 
215 	if (hep != NULL) {
216 		hep->msrie_msrval = regp->cmr_msrval;
217 	} else {
218 		hep = kmem_alloc(sizeof (*hep), KM_SLEEP);
219 		hep->msrie_hdl = hdl;
220 		hep->msrie_msrnum = regp->cmr_msrnum;
221 		hep->msrie_msrval = regp->cmr_msrval;
222 
223 		if (hbp->msrib_head != NULL)
224 			hbp->msrib_head->msrie_prev = hep;
225 		hep->msrie_next = hbp->msrib_head;
226 		hep->msrie_prev = NULL;
227 		hbp->msrib_head = hep;
228 	}
229 
230 	mutex_exit(&hbp->msrib_lock);
231 }
232 
233 /*
234  * Look for a match for the given hanlde and msr.  Return 1 with valp
235  * filled if a match is found, otherwise return 0 with valp untouched.
236  */
237 static int
238 msri_lookup(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t *valp)
239 {
240 	int idx = CMI_MSRI_HASHIDX(hdl, msr);
241 	struct cmi_msri_bkt *hbp = &msrihash[idx];
242 	struct cmi_msri_hashent *hep;
243 
244 	/*
245 	 * This function is called during #MC trap handling, so we should
246 	 * consider the possibility that the hash mutex is held by the
247 	 * interrupted thread.  This should not happen because interposition
248 	 * is an artificial injection mechanism and the #MC is requested
249 	 * after adding entries, but just in case of a real #MC at an
250 	 * unlucky moment we'll use mutex_tryenter here.
251 	 */
252 	if (!mutex_tryenter(&hbp->msrib_lock))
253 		return (0);
254 
255 	for (hep = hbp->msrib_head; hep != NULL; hep = hep->msrie_next) {
256 		if (CMI_MSRI_MATCH(hep, hdl, msr)) {
257 			*valp = hep->msrie_msrval;
258 			break;
259 		}
260 	}
261 
262 	mutex_exit(&hbp->msrib_lock);
263 
264 	return (hep != NULL);
265 }
266 
267 /*
268  * Remove any interposed value that matches.
269  */
270 static void
271 msri_rment(cmi_hdl_impl_t *hdl, uint_t msr)
272 {
273 
274 	int idx = CMI_MSRI_HASHIDX(hdl, msr);
275 	struct cmi_msri_bkt *hbp = &msrihash[idx];
276 	struct cmi_msri_hashent *hep;
277 
278 	if (!mutex_tryenter(&hbp->msrib_lock))
279 		return;
280 
281 	for (hep = hbp->msrib_head; hep != NULL; hep = hep->msrie_next) {
282 		if (CMI_MSRI_MATCH(hep, hdl, msr)) {
283 			if (hep->msrie_prev != NULL)
284 				hep->msrie_prev->msrie_next = hep->msrie_next;
285 
286 			if (hep->msrie_next != NULL)
287 				hep->msrie_next->msrie_prev = hep->msrie_prev;
288 
289 			if (hbp->msrib_head == hep)
290 				hbp->msrib_head = hep->msrie_next;
291 
292 			kmem_free(hep, sizeof (*hep));
293 			break;
294 		}
295 	}
296 
297 	mutex_exit(&hbp->msrib_lock);
298 }
299 
300 /*
301  *	 =======================================================
302  *	|	PCI Config Space Interposition			|
303  *	|	------------------------------			|
304  *	|							|
305  *	 -------------------------------------------------------
306  */
307 
308 /*
309  * Hash for interposed PCI config space values.  We lookup on bus/dev/fun/offset
310  * and then record whether the value stashed was made with a byte, word or
311  * doubleword access;  we will only return a hit for an access of the
312  * same size.  If you access say a 32-bit register using byte accesses
313  * and then attempt to read the full 32-bit value back you will not obtain
314  * any sort of merged result - you get a lookup miss.
315  */
316 
317 #define	CMI_PCII_HASHSZ		16
318 #define	CMI_PCII_HASHIDX(b, d, f, o) \
319 	(((b) + (d) + (f) + (o)) % (CMI_PCII_HASHSZ - 1))
320 
321 struct cmi_pcii_bkt {
322 	kmutex_t pciib_lock;
323 	struct cmi_pcii_hashent *pciib_head;
324 };
325 
326 struct cmi_pcii_hashent {
327 	struct cmi_pcii_hashent *pcii_next;
328 	struct cmi_pcii_hashent *pcii_prev;
329 	int pcii_bus;
330 	int pcii_dev;
331 	int pcii_func;
332 	int pcii_reg;
333 	int pcii_asize;
334 	uint32_t pcii_val;
335 };
336 
337 #define	CMI_PCII_MATCH(ent, b, d, f, r, asz) \
338 	((ent)->pcii_bus == (b) && (ent)->pcii_dev == (d) && \
339 	(ent)->pcii_func == (f) && (ent)->pcii_reg == (r) && \
340 	(ent)->pcii_asize == (asz))
341 
342 static struct cmi_pcii_bkt pciihash[CMI_PCII_HASHSZ];
343 
344 
345 /*
346  * Add a new entry to the PCI interpose hash, overwriting any existing
347  * entry that is found.
348  */
349 static void
350 pcii_addent(int bus, int dev, int func, int reg, uint32_t val, int asz)
351 {
352 	int idx = CMI_PCII_HASHIDX(bus, dev, func, reg);
353 	struct cmi_pcii_bkt *hbp = &pciihash[idx];
354 	struct cmi_pcii_hashent *hep;
355 
356 	mutex_enter(&hbp->pciib_lock);
357 
358 	for (hep = hbp->pciib_head; hep != NULL; hep = hep->pcii_next) {
359 		if (CMI_PCII_MATCH(hep, bus, dev, func, reg, asz))
360 			break;
361 	}
362 
363 	if (hep != NULL) {
364 		hep->pcii_val = val;
365 	} else {
366 		hep = kmem_alloc(sizeof (*hep), KM_SLEEP);
367 		hep->pcii_bus = bus;
368 		hep->pcii_dev = dev;
369 		hep->pcii_func = func;
370 		hep->pcii_reg = reg;
371 		hep->pcii_asize = asz;
372 		hep->pcii_val = val;
373 
374 		if (hbp->pciib_head != NULL)
375 			hbp->pciib_head->pcii_prev = hep;
376 		hep->pcii_next = hbp->pciib_head;
377 		hep->pcii_prev = NULL;
378 		hbp->pciib_head = hep;
379 	}
380 
381 	mutex_exit(&hbp->pciib_lock);
382 }
383 
384 /*
385  * Look for a match for the given bus/dev/func/reg; return 1 with valp
386  * filled if a match is found, otherwise return 0 with valp untouched.
387  */
388 static int
389 pcii_lookup(int bus, int dev, int func, int reg, int asz, uint32_t *valp)
390 {
391 	int idx = CMI_PCII_HASHIDX(bus, dev, func, reg);
392 	struct cmi_pcii_bkt *hbp = &pciihash[idx];
393 	struct cmi_pcii_hashent *hep;
394 
395 	if (!mutex_tryenter(&hbp->pciib_lock))
396 		return (0);
397 
398 	for (hep = hbp->pciib_head; hep != NULL; hep = hep->pcii_next) {
399 		if (CMI_PCII_MATCH(hep, bus, dev, func, reg, asz)) {
400 			*valp = hep->pcii_val;
401 			break;
402 		}
403 	}
404 
405 	mutex_exit(&hbp->pciib_lock);
406 
407 	return (hep != NULL);
408 }
409 
410 static void
411 pcii_rment(int bus, int dev, int func, int reg, int asz)
412 {
413 	int idx = CMI_PCII_HASHIDX(bus, dev, func, reg);
414 	struct cmi_pcii_bkt *hbp = &pciihash[idx];
415 	struct cmi_pcii_hashent *hep;
416 
417 	mutex_enter(&hbp->pciib_lock);
418 
419 	for (hep = hbp->pciib_head; hep != NULL; hep = hep->pcii_next) {
420 		if (CMI_PCII_MATCH(hep, bus, dev, func, reg, asz)) {
421 			if (hep->pcii_prev != NULL)
422 				hep->pcii_prev->pcii_next = hep->pcii_next;
423 
424 			if (hep->pcii_next != NULL)
425 				hep->pcii_next->pcii_prev = hep->pcii_prev;
426 
427 			if (hbp->pciib_head == hep)
428 				hbp->pciib_head = hep->pcii_next;
429 
430 			kmem_free(hep, sizeof (*hep));
431 			break;
432 		}
433 	}
434 
435 	mutex_exit(&hbp->pciib_lock);
436 }
437 
438 /*
439  *	 =======================================================
440  *	|	Native methods					|
441  *	|	--------------					|
442  *	|							|
443  *	| These are used when we are running native on bare-	|
444  *	| metal, or simply don't know any better.		|
445  *	---------------------------------------------------------
446  */
447 
448 static uint_t
449 ntv_vendor(cmi_hdl_impl_t *hdl)
450 {
451 	return (cpuid_getvendor((cpu_t *)hdl->cmih_hdlpriv));
452 }
453 
454 static const char *
455 ntv_vendorstr(cmi_hdl_impl_t *hdl)
456 {
457 	return (cpuid_getvendorstr((cpu_t *)hdl->cmih_hdlpriv));
458 }
459 
460 static uint_t
461 ntv_family(cmi_hdl_impl_t *hdl)
462 {
463 	return (cpuid_getfamily((cpu_t *)hdl->cmih_hdlpriv));
464 }
465 
466 static uint_t
467 ntv_model(cmi_hdl_impl_t *hdl)
468 {
469 	return (cpuid_getmodel((cpu_t *)hdl->cmih_hdlpriv));
470 }
471 
472 static uint_t
473 ntv_stepping(cmi_hdl_impl_t *hdl)
474 {
475 	return (cpuid_getstep((cpu_t *)hdl->cmih_hdlpriv));
476 }
477 
478 static uint_t
479 ntv_chipid(cmi_hdl_impl_t *hdl)
480 {
481 	return (hdl->cmih_chipid);
482 
483 }
484 
485 static uint_t
486 ntv_coreid(cmi_hdl_impl_t *hdl)
487 {
488 	return (hdl->cmih_coreid);
489 }
490 
491 static uint_t
492 ntv_strandid(cmi_hdl_impl_t *hdl)
493 {
494 	return (hdl->cmih_strandid);
495 }
496 
497 static uint32_t
498 ntv_chiprev(cmi_hdl_impl_t *hdl)
499 {
500 	return (cpuid_getchiprev((cpu_t *)hdl->cmih_hdlpriv));
501 }
502 
503 static const char *
504 ntv_chiprevstr(cmi_hdl_impl_t *hdl)
505 {
506 	return (cpuid_getchiprevstr((cpu_t *)hdl->cmih_hdlpriv));
507 }
508 
509 static uint32_t
510 ntv_getsockettype(cmi_hdl_impl_t *hdl)
511 {
512 	return (cpuid_getsockettype((cpu_t *)hdl->cmih_hdlpriv));
513 }
514 
515 /*ARGSUSED*/
516 static int
517 ntv_getcr4_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
518 {
519 	ulong_t *dest = (ulong_t *)arg1;
520 	cmi_errno_t *rcp = (cmi_errno_t *)arg3;
521 
522 	*dest = getcr4();
523 	*rcp = CMI_SUCCESS;
524 
525 	return (0);
526 }
527 
528 static ulong_t
529 ntv_getcr4(cmi_hdl_impl_t *hdl)
530 {
531 	cpu_t *cp = (cpu_t *)hdl->cmih_hdlpriv;
532 	ulong_t val;
533 
534 	(void) call_func_ntv(cp->cpu_id, ntv_getcr4_xc, (xc_arg_t)&val, NULL);
535 
536 	return (val);
537 }
538 
539 /*ARGSUSED*/
540 static int
541 ntv_setcr4_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
542 {
543 	ulong_t val = (ulong_t)arg1;
544 	cmi_errno_t *rcp = (cmi_errno_t *)arg3;
545 
546 	setcr4(val);
547 	*rcp = CMI_SUCCESS;
548 
549 	return (0);
550 }
551 
552 static void
553 ntv_setcr4(cmi_hdl_impl_t *hdl, ulong_t val)
554 {
555 	cpu_t *cp = (cpu_t *)hdl->cmih_hdlpriv;
556 
557 	(void) call_func_ntv(cp->cpu_id, ntv_setcr4_xc, (xc_arg_t)val, NULL);
558 }
559 
560 volatile uint32_t cmi_trapped_rdmsr;
561 
562 /*ARGSUSED*/
563 static int
564 ntv_rdmsr_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
565 {
566 	uint_t msr = (uint_t)arg1;
567 	uint64_t *valp = (uint64_t *)arg2;
568 	cmi_errno_t *rcp = (cmi_errno_t *)arg3;
569 
570 	on_trap_data_t otd;
571 
572 	if (on_trap(&otd, OT_DATA_ACCESS) == 0) {
573 		if (checked_rdmsr(msr, valp) == 0)
574 			*rcp = CMI_SUCCESS;
575 		else
576 			*rcp = CMIERR_NOTSUP;
577 	} else {
578 		*rcp = CMIERR_MSRGPF;
579 		atomic_inc_32(&cmi_trapped_rdmsr);
580 	}
581 	no_trap();
582 
583 	return (0);
584 }
585 
586 static cmi_errno_t
587 ntv_rdmsr(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t *valp)
588 {
589 	cpu_t *cp = (cpu_t *)hdl->cmih_hdlpriv;
590 
591 	return (call_func_ntv(cp->cpu_id, ntv_rdmsr_xc,
592 	    (xc_arg_t)msr, (xc_arg_t)valp));
593 }
594 
595 volatile uint32_t cmi_trapped_wrmsr;
596 
597 /*ARGSUSED*/
598 static int
599 ntv_wrmsr_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
600 {
601 	uint_t msr = (uint_t)arg1;
602 	uint64_t val = *((uint64_t *)arg2);
603 	cmi_errno_t *rcp = (cmi_errno_t *)arg3;
604 	on_trap_data_t otd;
605 
606 	if (on_trap(&otd, OT_DATA_ACCESS) == 0) {
607 		if (checked_wrmsr(msr, val) == 0)
608 			*rcp = CMI_SUCCESS;
609 		else
610 			*rcp = CMIERR_NOTSUP;
611 	} else {
612 		*rcp = CMIERR_MSRGPF;
613 		atomic_inc_32(&cmi_trapped_wrmsr);
614 	}
615 	no_trap();
616 
617 	return (0);
618 
619 }
620 
621 static cmi_errno_t
622 ntv_wrmsr(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t val)
623 {
624 	cpu_t *cp = (cpu_t *)hdl->cmih_hdlpriv;
625 
626 	return (call_func_ntv(cp->cpu_id, ntv_wrmsr_xc,
627 	    (xc_arg_t)msr, (xc_arg_t)&val));
628 }
629 
630 /*ARGSUSED*/
631 static int
632 ntv_mcheck_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
633 {
634 	cmi_errno_t *rcp = (cmi_errno_t *)arg3;
635 
636 	int18();
637 	*rcp = CMI_SUCCESS;
638 
639 	return (0);
640 }
641 
642 static void
643 ntv_mcheck(cmi_hdl_impl_t *hdl)
644 {
645 	cpu_t *cp = (cpu_t *)hdl->cmih_hdlpriv;
646 
647 	(void) call_func_ntv(cp->cpu_id, ntv_mcheck_xc, NULL, NULL);
648 }
649 
650 /*
651  * Ops structure for handle operations.
652  */
653 struct cmi_hdl_ops {
654 	uint_t (*cmio_vendor)(cmi_hdl_impl_t *);
655 	const char *(*cmio_vendorstr)(cmi_hdl_impl_t *);
656 	uint_t (*cmio_family)(cmi_hdl_impl_t *);
657 	uint_t (*cmio_model)(cmi_hdl_impl_t *);
658 	uint_t (*cmio_stepping)(cmi_hdl_impl_t *);
659 	uint_t (*cmio_chipid)(cmi_hdl_impl_t *);
660 	uint_t (*cmio_coreid)(cmi_hdl_impl_t *);
661 	uint_t (*cmio_strandid)(cmi_hdl_impl_t *);
662 	uint32_t (*cmio_chiprev)(cmi_hdl_impl_t *);
663 	const char *(*cmio_chiprevstr)(cmi_hdl_impl_t *);
664 	uint32_t (*cmio_getsockettype)(cmi_hdl_impl_t *);
665 	ulong_t (*cmio_getcr4)(cmi_hdl_impl_t *);
666 	void (*cmio_setcr4)(cmi_hdl_impl_t *, ulong_t);
667 	cmi_errno_t (*cmio_rdmsr)(cmi_hdl_impl_t *, uint_t, uint64_t *);
668 	cmi_errno_t (*cmio_wrmsr)(cmi_hdl_impl_t *, uint_t, uint64_t);
669 	void (*cmio_mcheck)(cmi_hdl_impl_t *);
670 } cmi_hdl_ops[] = {
671 	/*
672 	 * CMI_HDL_NATIVE - ops when apparently running on bare-metal
673 	 */
674 	{
675 		ntv_vendor,
676 		ntv_vendorstr,
677 		ntv_family,
678 		ntv_model,
679 		ntv_stepping,
680 		ntv_chipid,
681 		ntv_coreid,
682 		ntv_strandid,
683 		ntv_chiprev,
684 		ntv_chiprevstr,
685 		ntv_getsockettype,
686 		ntv_getcr4,
687 		ntv_setcr4,
688 		ntv_rdmsr,
689 		ntv_wrmsr,
690 		ntv_mcheck
691 	},
692 };
693 
694 #ifndef __xpv
695 static void *
696 cpu_search(enum cmi_hdl_class class, uint_t chipid, uint_t coreid,
697     uint_t strandid)
698 {
699 	switch (class) {
700 	case CMI_HDL_NATIVE: {
701 		cpu_t *cp, *startcp;
702 
703 		kpreempt_disable();
704 		cp = startcp = CPU;
705 		do {
706 			if (cmi_ntv_hwchipid(cp) == chipid &&
707 			    cmi_ntv_hwcoreid(cp) == coreid &&
708 			    cmi_ntv_hwstrandid(cp) == strandid) {
709 				kpreempt_enable();
710 				return ((void *)cp);
711 			}
712 
713 			cp = cp->cpu_next;
714 		} while (cp != startcp);
715 		kpreempt_enable();
716 		return (NULL);
717 	}
718 
719 	default:
720 		return (NULL);
721 	}
722 }
723 #endif
724 
725 cmi_hdl_t
726 cmi_hdl_create(enum cmi_hdl_class class, uint_t chipid, uint_t coreid,
727     uint_t strandid)
728 {
729 	cmi_hdl_impl_t *hdl;
730 	void *priv = NULL;
731 	int idx;
732 
733 	if (chipid > CMI_MAX_CHIPS - 1 || coreid > CMI_MAX_CORES_PER_CHIP - 1 ||
734 	    strandid > CMI_MAX_STRANDS_PER_CORE - 1)
735 		return (NULL);
736 
737 #ifndef __xpv
738 	if ((priv = cpu_search(class, chipid, coreid, strandid)) == NULL)
739 		return (NULL);
740 #endif
741 
742 	hdl = kmem_zalloc(sizeof (*hdl), KM_SLEEP);
743 
744 	hdl->cmih_class = class;
745 	hdl->cmih_ops = &cmi_hdl_ops[class];
746 	hdl->cmih_chipid = chipid;
747 	hdl->cmih_coreid = coreid;
748 	hdl->cmih_strandid = strandid;
749 	hdl->cmih_hdlpriv = priv;
750 	hdl->cmih_msrsrc = CMI_MSR_FLAG_RD_HWOK | CMI_MSR_FLAG_RD_INTERPOSEOK |
751 	    CMI_MSR_FLAG_WR_HWOK | CMI_MSR_FLAG_WR_INTERPOSEOK;
752 
753 	if (cmi_hdl_hash == NULL) {
754 		size_t sz = CMI_HDL_HASHSZ * sizeof (struct cmi_hdl_hashent);
755 		void *hash = kmem_zalloc(sz, KM_SLEEP);
756 
757 		if (atomic_cas_ptr(&cmi_hdl_hash, NULL, hash) != NULL)
758 			kmem_free(hash, sz); /* someone beat us */
759 	}
760 
761 	idx = CMI_HDL_HASHIDX(chipid, coreid, strandid);
762 	if (cmi_hdl_hash[idx].cmhe_refcnt != 0 ||
763 	    cmi_hdl_hash[idx].cmhe_hdlp != NULL) {
764 		/*
765 		 * Somehow this (chipid, coreid, strandid) id tuple has
766 		 * already been assigned!  This indicates that the
767 		 * callers logic in determining these values is busted,
768 		 * or perhaps undermined by bad BIOS setup.  Complain,
769 		 * and refuse to initialize this tuple again as bad things
770 		 * will happen.
771 		 */
772 		cmn_err(CE_NOTE, "cmi_hdl_create: chipid %d coreid %d "
773 		    "strandid %d handle already allocated!",
774 		    chipid, coreid, strandid);
775 		kmem_free(hdl, sizeof (*hdl));
776 		return (NULL);
777 	}
778 
779 	/*
780 	 * Once we store a nonzero reference count others can find this
781 	 * handle via cmi_hdl_lookup etc.  This initial hold on the handle
782 	 * is to be dropped only if some other part of cmi initialization
783 	 * fails or, if it succeeds, at later cpu deconfigure.  Note the
784 	 * the module private data we hold in cmih_cmi and cmih_cmidata
785 	 * is still NULL at this point (the caller will fill it with
786 	 * cmi_hdl_setcmi if it initializes) so consumers of handles
787 	 * should always be ready for that possibility.
788 	 */
789 	cmi_hdl_hash[idx].cmhe_hdlp = hdl;
790 	hdl->cmih_refcntp = &cmi_hdl_hash[idx].cmhe_refcnt;
791 	cmi_hdl_hash[idx].cmhe_refcnt = 1;
792 
793 	return ((cmi_hdl_t)hdl);
794 }
795 
796 void
797 cmi_hdl_hold(cmi_hdl_t ophdl)
798 {
799 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
800 
801 	ASSERT(*hdl->cmih_refcntp != 0); /* must not be the initial hold */
802 
803 	atomic_inc_32(hdl->cmih_refcntp);
804 }
805 
806 static int
807 cmi_hdl_canref(int hashidx)
808 {
809 	volatile uint32_t *refcntp;
810 	uint32_t refcnt;
811 
812 	if (cmi_hdl_hash == NULL)
813 		return (0);
814 
815 	refcntp = &cmi_hdl_hash[hashidx].cmhe_refcnt;
816 	refcnt = *refcntp;
817 
818 	if (refcnt == 0) {
819 		/*
820 		 * Associated object never existed, is being destroyed,
821 		 * or has been destroyed.
822 		 */
823 		return (0);
824 	}
825 
826 	/*
827 	 * We cannot use atomic increment here because once the reference
828 	 * count reaches zero it must never be bumped up again.
829 	 */
830 	while (refcnt != 0) {
831 		if (atomic_cas_32(refcntp, refcnt, refcnt + 1) == refcnt)
832 			return (1);
833 		refcnt = *refcntp;
834 	}
835 
836 	/*
837 	 * Somebody dropped the reference count to 0 after our initial
838 	 * check.
839 	 */
840 	return (0);
841 }
842 
843 
844 void
845 cmi_hdl_rele(cmi_hdl_t ophdl)
846 {
847 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
848 	int idx;
849 
850 	ASSERT(*hdl->cmih_refcntp > 0);
851 
852 	if (atomic_dec_32_nv(hdl->cmih_refcntp) > 0)
853 		return;
854 
855 	idx = CMI_HDL_HASHIDX(hdl->cmih_chipid, hdl->cmih_coreid,
856 	    hdl->cmih_strandid);
857 	cmi_hdl_hash[idx].cmhe_hdlp = NULL;
858 
859 	kmem_free(hdl, sizeof (*hdl));
860 }
861 
862 void
863 cmi_hdl_setspecific(cmi_hdl_t ophdl, void *arg)
864 {
865 	IMPLHDL(ophdl)->cmih_spec = arg;
866 }
867 
868 void *
869 cmi_hdl_getspecific(cmi_hdl_t ophdl)
870 {
871 	return (IMPLHDL(ophdl)->cmih_spec);
872 }
873 
874 void
875 cmi_hdl_setmc(cmi_hdl_t ophdl, const struct cmi_mc_ops *mcops, void *mcdata)
876 {
877 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
878 
879 	ASSERT(hdl->cmih_mcops == NULL && hdl->cmih_mcdata == NULL);
880 	hdl->cmih_mcops = mcops;
881 	hdl->cmih_mcdata = mcdata;
882 }
883 
884 const struct cmi_mc_ops *
885 cmi_hdl_getmcops(cmi_hdl_t ophdl)
886 {
887 	return (IMPLHDL(ophdl)->cmih_mcops);
888 }
889 
890 void *
891 cmi_hdl_getmcdata(cmi_hdl_t ophdl)
892 {
893 	return (IMPLHDL(ophdl)->cmih_mcdata);
894 }
895 
896 cmi_hdl_t
897 cmi_hdl_lookup(enum cmi_hdl_class class, uint_t chipid, uint_t coreid,
898     uint_t strandid)
899 {
900 	int idx = CMI_HDL_HASHIDX(chipid, coreid, strandid);
901 
902 	if (!cmi_hdl_canref(idx))
903 		return (NULL);
904 
905 	if (cmi_hdl_hash[idx].cmhe_hdlp->cmih_class != class) {
906 		cmi_hdl_rele((cmi_hdl_t)cmi_hdl_hash[idx].cmhe_hdlp);
907 		return (NULL);
908 	}
909 
910 	return ((cmi_hdl_t)cmi_hdl_hash[idx].cmhe_hdlp);
911 }
912 
913 cmi_hdl_t
914 cmi_hdl_any(void)
915 {
916 	int i;
917 
918 	for (i = 0; i < CMI_HDL_HASHSZ; i++) {
919 		if (cmi_hdl_canref(i))
920 			return ((cmi_hdl_t)cmi_hdl_hash[i].cmhe_hdlp);
921 	}
922 
923 	return (NULL);
924 }
925 
926 void
927 cmi_hdl_walk(int (*cbfunc)(cmi_hdl_t, void *, void *, void *),
928     void *arg1, void *arg2, void *arg3)
929 {
930 	int i;
931 
932 	for (i = 0; i < CMI_HDL_HASHSZ; i++) {
933 		if (cmi_hdl_canref(i)) {
934 			cmi_hdl_impl_t *hdl = cmi_hdl_hash[i].cmhe_hdlp;
935 
936 			if ((*cbfunc)((cmi_hdl_t)hdl, arg1, arg2, arg3) ==
937 			    CMI_HDL_WALK_DONE) {
938 				cmi_hdl_rele((cmi_hdl_t)hdl);
939 				break;
940 			}
941 			cmi_hdl_rele((cmi_hdl_t)hdl);
942 		}
943 	}
944 }
945 
946 void
947 cmi_hdl_setcmi(cmi_hdl_t ophdl, void *cmi, void *cmidata)
948 {
949 	IMPLHDL(ophdl)->cmih_cmidata = cmidata;
950 	IMPLHDL(ophdl)->cmih_cmi = cmi;
951 }
952 
953 void *
954 cmi_hdl_getcmi(cmi_hdl_t ophdl)
955 {
956 	return (IMPLHDL(ophdl)->cmih_cmi);
957 }
958 
959 void *
960 cmi_hdl_getcmidata(cmi_hdl_t ophdl)
961 {
962 	return (IMPLHDL(ophdl)->cmih_cmidata);
963 }
964 
965 enum cmi_hdl_class
966 cmi_hdl_class(cmi_hdl_t ophdl)
967 {
968 	return (IMPLHDL(ophdl)->cmih_class);
969 }
970 
971 #define	CMI_HDL_OPFUNC(what, type)				\
972 	type							\
973 	cmi_hdl_##what(cmi_hdl_t ophdl)				\
974 	{							\
975 		return (IMPLHDL(ophdl)->cmih_ops->		\
976 		    cmio_##what(IMPLHDL(ophdl)));		\
977 	}
978 
979 CMI_HDL_OPFUNC(vendor, uint_t)
980 CMI_HDL_OPFUNC(vendorstr, const char *)
981 CMI_HDL_OPFUNC(family, uint_t)
982 CMI_HDL_OPFUNC(model, uint_t)
983 CMI_HDL_OPFUNC(stepping, uint_t)
984 CMI_HDL_OPFUNC(chipid, uint_t)
985 CMI_HDL_OPFUNC(coreid, uint_t)
986 CMI_HDL_OPFUNC(strandid, uint_t)
987 CMI_HDL_OPFUNC(chiprev, uint32_t)
988 CMI_HDL_OPFUNC(chiprevstr, const char *)
989 CMI_HDL_OPFUNC(getsockettype, uint32_t)
990 
991 void
992 cmi_hdl_mcheck(cmi_hdl_t ophdl)
993 {
994 	IMPLHDL(ophdl)->cmih_ops->cmio_mcheck(IMPLHDL(ophdl));
995 }
996 
997 #ifndef	__xpv
998 /*
999  * Return hardware chip instance; cpuid_get_chipid provides this directly.
1000  */
1001 uint_t
1002 cmi_ntv_hwchipid(cpu_t *cp)
1003 {
1004 	return (cpuid_get_chipid(cp));
1005 }
1006 
1007 /*
1008  * Return core instance within a single chip.
1009  */
1010 uint_t
1011 cmi_ntv_hwcoreid(cpu_t *cp)
1012 {
1013 	return (cpuid_get_pkgcoreid(cp));
1014 }
1015 
1016 /*
1017  * Return strand number within a single core.  cpuid_get_clogid numbers
1018  * all execution units (strands, or cores in unstranded models) sequentially
1019  * within a single chip.
1020  */
1021 uint_t
1022 cmi_ntv_hwstrandid(cpu_t *cp)
1023 {
1024 	int strands_per_core = cpuid_get_ncpu_per_chip(cp) /
1025 	    cpuid_get_ncore_per_chip(cp);
1026 
1027 	return (cpuid_get_clogid(cp) % strands_per_core);
1028 }
1029 #endif	/* __xpv */
1030 
1031 void
1032 cmi_hdlconf_rdmsr_nohw(cmi_hdl_t ophdl)
1033 {
1034 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1035 
1036 	hdl->cmih_msrsrc &= ~CMI_MSR_FLAG_RD_HWOK;
1037 }
1038 
1039 void
1040 cmi_hdlconf_wrmsr_nohw(cmi_hdl_t ophdl)
1041 {
1042 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1043 
1044 	hdl->cmih_msrsrc &= ~CMI_MSR_FLAG_WR_HWOK;
1045 }
1046 
1047 cmi_errno_t
1048 cmi_hdl_rdmsr(cmi_hdl_t ophdl, uint_t msr, uint64_t *valp)
1049 {
1050 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1051 
1052 	/*
1053 	 * Regardless of the handle class, we first check for am
1054 	 * interposed value.  In the xVM case you probably want to
1055 	 * place interposed values within the hypervisor itself, but
1056 	 * we still allow interposing them in dom0 for test and bringup
1057 	 * purposes.
1058 	 */
1059 	if ((hdl->cmih_msrsrc & CMI_MSR_FLAG_RD_INTERPOSEOK) &&
1060 	    msri_lookup(hdl, msr, valp))
1061 		return (CMI_SUCCESS);
1062 
1063 	if (!(hdl->cmih_msrsrc & CMI_MSR_FLAG_RD_HWOK))
1064 		return (CMIERR_INTERPOSE);
1065 
1066 	return (hdl->cmih_ops->cmio_rdmsr(hdl, msr, valp));
1067 }
1068 
1069 cmi_errno_t
1070 cmi_hdl_wrmsr(cmi_hdl_t ophdl, uint_t msr, uint64_t val)
1071 {
1072 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1073 
1074 	/* Invalidate any interposed value */
1075 	msri_rment(hdl, msr);
1076 
1077 	if (!(hdl->cmih_msrsrc & CMI_MSR_FLAG_WR_HWOK))
1078 		return (CMI_SUCCESS);
1079 
1080 	return (hdl->cmih_ops->cmio_wrmsr(hdl, msr, val));
1081 }
1082 
1083 void
1084 cmi_hdl_enable_mce(cmi_hdl_t ophdl)
1085 {
1086 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1087 	ulong_t cr4 = hdl->cmih_ops->cmio_getcr4(hdl);
1088 
1089 	hdl->cmih_ops->cmio_setcr4(hdl, cr4 | CR4_MCE);
1090 }
1091 
1092 void
1093 cmi_hdl_msrinterpose(cmi_hdl_t ophdl, cmi_mca_regs_t *regs, uint_t nregs)
1094 {
1095 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1096 	int i;
1097 
1098 	for (i = 0; i < nregs; i++)
1099 		msri_addent(hdl, regs++);
1100 }
1101 
1102 void
1103 cmi_pcird_nohw(void)
1104 {
1105 	cmi_pcicfg_flags &= ~CMI_PCICFG_FLAG_RD_HWOK;
1106 }
1107 
1108 void
1109 cmi_pciwr_nohw(void)
1110 {
1111 	cmi_pcicfg_flags &= ~CMI_PCICFG_FLAG_WR_HWOK;
1112 }
1113 
1114 static uint32_t
1115 cmi_pci_get_cmn(int bus, int dev, int func, int reg, int asz,
1116     int *interpose, ddi_acc_handle_t hdl)
1117 {
1118 	uint32_t val;
1119 
1120 	if (cmi_pcicfg_flags & CMI_PCICFG_FLAG_RD_INTERPOSEOK &&
1121 	    pcii_lookup(bus, dev, func, reg, asz, &val)) {
1122 		if (interpose)
1123 			*interpose = 1;
1124 		return (val);
1125 	}
1126 	if (interpose)
1127 		*interpose = 0;
1128 
1129 	if (!(cmi_pcicfg_flags & CMI_PCICFG_FLAG_RD_HWOK))
1130 		return (0);
1131 
1132 	switch (asz) {
1133 	case 1:
1134 		if (hdl)
1135 			val = pci_config_get8(hdl, (off_t)reg);
1136 		else
1137 			val = (*pci_getb_func)(bus, dev, func, reg);
1138 		break;
1139 	case 2:
1140 		if (hdl)
1141 			val = pci_config_get16(hdl, (off_t)reg);
1142 		else
1143 			val = (*pci_getw_func)(bus, dev, func, reg);
1144 		break;
1145 	case 4:
1146 		if (hdl)
1147 			val = pci_config_get32(hdl, (off_t)reg);
1148 		else
1149 			val = (*pci_getl_func)(bus, dev, func, reg);
1150 		break;
1151 	default:
1152 		val = 0;
1153 	}
1154 	return (val);
1155 }
1156 
1157 uint8_t
1158 cmi_pci_getb(int bus, int dev, int func, int reg, int *interpose,
1159     ddi_acc_handle_t hdl)
1160 {
1161 	return ((uint8_t)cmi_pci_get_cmn(bus, dev, func, reg, 1, interpose,
1162 	    hdl));
1163 }
1164 
1165 uint16_t
1166 cmi_pci_getw(int bus, int dev, int func, int reg, int *interpose,
1167     ddi_acc_handle_t hdl)
1168 {
1169 	return ((uint16_t)cmi_pci_get_cmn(bus, dev, func, reg, 2, interpose,
1170 	    hdl));
1171 }
1172 
1173 uint32_t
1174 cmi_pci_getl(int bus, int dev, int func, int reg, int *interpose,
1175     ddi_acc_handle_t hdl)
1176 {
1177 	return (cmi_pci_get_cmn(bus, dev, func, reg, 4, interpose, hdl));
1178 }
1179 
1180 void
1181 cmi_pci_interposeb(int bus, int dev, int func, int reg, uint8_t val)
1182 {
1183 	pcii_addent(bus, dev, func, reg, val, 1);
1184 }
1185 
1186 void
1187 cmi_pci_interposew(int bus, int dev, int func, int reg, uint16_t val)
1188 {
1189 	pcii_addent(bus, dev, func, reg, val, 2);
1190 }
1191 
1192 void
1193 cmi_pci_interposel(int bus, int dev, int func, int reg, uint32_t val)
1194 {
1195 	pcii_addent(bus, dev, func, reg, val, 4);
1196 }
1197 
1198 static void
1199 cmi_pci_put_cmn(int bus, int dev, int func, int reg, int asz,
1200     ddi_acc_handle_t hdl, uint32_t val)
1201 {
1202 	/*
1203 	 * If there is an interposed value for this register invalidate it.
1204 	 */
1205 	pcii_rment(bus, dev, func, reg, asz);
1206 
1207 	if (!(cmi_pcicfg_flags & CMI_PCICFG_FLAG_WR_HWOK))
1208 		return;
1209 
1210 	switch (asz) {
1211 	case 1:
1212 		if (hdl)
1213 			pci_config_put8(hdl, (off_t)reg, (uint8_t)val);
1214 		else
1215 			(*pci_putb_func)(bus, dev, func, reg, (uint8_t)val);
1216 		break;
1217 
1218 	case 2:
1219 		if (hdl)
1220 			pci_config_put16(hdl, (off_t)reg, (uint16_t)val);
1221 		else
1222 			(*pci_putw_func)(bus, dev, func, reg, (uint16_t)val);
1223 		break;
1224 
1225 	case 4:
1226 		if (hdl)
1227 			pci_config_put32(hdl, (off_t)reg, val);
1228 		else
1229 			(*pci_putl_func)(bus, dev, func, reg, val);
1230 		break;
1231 
1232 	default:
1233 		break;
1234 	}
1235 }
1236 
1237 extern void
1238 cmi_pci_putb(int bus, int dev, int func, int reg, ddi_acc_handle_t hdl,
1239     uint8_t val)
1240 {
1241 	cmi_pci_put_cmn(bus, dev, func, reg, 1, hdl, val);
1242 }
1243 
1244 extern void
1245 cmi_pci_putw(int bus, int dev, int func, int reg, ddi_acc_handle_t hdl,
1246     uint16_t val)
1247 {
1248 	cmi_pci_put_cmn(bus, dev, func, reg, 2, hdl, val);
1249 }
1250 
1251 extern void
1252 cmi_pci_putl(int bus, int dev, int func, int reg, ddi_acc_handle_t hdl,
1253     uint32_t val)
1254 {
1255 	cmi_pci_put_cmn(bus, dev, func, reg, 4, hdl, val);
1256 }
1257