xref: /titanic_44/usr/src/uts/i86pc/os/cmi_hw.c (revision 50e783325f49fdd425c3ad4611534d110980da2f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * CPU Module Interface - hardware abstraction.
31  */
32 
33 #include <sys/types.h>
34 #include <sys/cpu_module.h>
35 #include <sys/kmem.h>
36 #include <sys/x86_archext.h>
37 #include <sys/cpuvar.h>
38 #include <sys/ksynch.h>
39 #include <sys/x_call.h>
40 #include <sys/pghw.h>
41 #include <sys/pci_cfgspace.h>
42 #include <sys/archsystm.h>
43 #include <sys/ontrap.h>
44 #include <sys/controlregs.h>
45 #include <sys/sunddi.h>
46 
47 /*
48  * Outside of this file consumers use the opaque cmi_hdl_t.  This
49  * definition is duplicated in the generic_cpu mdb module, so keep
50  * them in-sync when making changes.
51  */
52 typedef struct cmi_hdl_impl {
53 	enum cmi_hdl_class cmih_class;		/* Handle nature */
54 	struct cmi_hdl_ops *cmih_ops;		/* Operations vector */
55 	uint_t cmih_chipid;			/* Chipid of cpu resource */
56 	uint_t cmih_coreid;			/* Core within die */
57 	uint_t cmih_strandid;			/* Thread within core */
58 	volatile uint32_t *cmih_refcntp;	/* Reference count pointer */
59 	uint64_t cmih_msrsrc;			/* MSR data source flags */
60 	void *cmih_hdlpriv;			/* cmi_hw.c private data */
61 	void *cmih_spec;			/* cmi_hdl_{set,get}_specific */
62 	void *cmih_cmi;				/* cpu mod control structure */
63 	void *cmih_cmidata;			/* cpu mod private data */
64 	const struct cmi_mc_ops *cmih_mcops;	/* Memory-controller ops */
65 	void *cmih_mcdata;			/* Memory-controller data */
66 } cmi_hdl_impl_t;
67 
68 #define	IMPLHDL(ophdl)	((cmi_hdl_impl_t *)ophdl)
69 
70 /*
71  * Handles are looked up from contexts such as polling, injection etc
72  * where the context is reasonably well defined (although a poller could
73  * interrupt any old thread holding any old lock).  They are also looked
74  * up by machine check handlers, which may strike at inconvenient times
75  * such as during handle initialization or destruction or during handle
76  * lookup (which the #MC handler itself will also have to perform).
77  *
78  * So keeping handles in a linked list makes locking difficult when we
79  * consider #MC handlers.  Our solution is to have an array indexed
80  * by that which uniquely identifies a handle - chip/core/strand id -
81  * with each array member a structure including a pointer to a handle
82  * structure for the resource, and a reference count for the handle.
83  * Reference counts are modified atomically.  The public cmi_hdl_hold
84  * always succeeds because this can only be used after handle creation
85  * and before the call to destruct, so the hold count it already at least one.
86  * In other functions that lookup a handle (cmi_hdl_lookup, cmi_hdl_any)
87  * we must be certain that the count has not already decrmented to zero
88  * before applying our hold.
89  *
90  * This array is allocated when first we want to populate an entry.
91  * When allocated it is maximal - ideally we should scale to the
92  * actual number of chips, cores per chip and strand per core but
93  * that info is not readily available if we are virtualized so
94  * for now we stick with the dumb approach.
95  */
96 #define	CMI_MAX_CHIPS			16
97 #define	CMI_MAX_CORES_PER_CHIP		4
98 #define	CMI_MAX_STRANDS_PER_CORE	2
99 #define	CMI_HDL_HASHSZ (CMI_MAX_CHIPS * CMI_MAX_CORES_PER_CHIP * \
100     CMI_MAX_STRANDS_PER_CORE)
101 
102 struct cmi_hdl_hashent {
103 	volatile uint32_t cmhe_refcnt;
104 	cmi_hdl_impl_t *cmhe_hdlp;
105 };
106 
107 static struct cmi_hdl_hashent *cmi_hdl_hash;
108 
109 #define	CMI_HDL_HASHIDX(chipid, coreid, strandid) \
110 	((chipid) * CMI_MAX_CHIPS + (coreid) * CMI_MAX_CORES_PER_CHIP + \
111 	(strandid))
112 
113 /*
114  * Controls where we will source PCI config space data.
115  */
116 #define	CMI_PCICFG_FLAG_RD_HWOK		0x0001
117 #define	CMI_PCICFG_FLAG_RD_INTERPOSEOK	0X0002
118 #define	CMI_PCICFG_FLAG_WR_HWOK		0x0004
119 #define	CMI_PCICFG_FLAG_WR_INTERPOSEOK	0X0008
120 
121 static uint64_t cmi_pcicfg_flags =
122     CMI_PCICFG_FLAG_RD_HWOK | CMI_PCICFG_FLAG_RD_INTERPOSEOK |
123     CMI_PCICFG_FLAG_WR_HWOK | CMI_PCICFG_FLAG_WR_INTERPOSEOK;
124 
125 /*
126  * The flags for individual cpus are kept in their per-cpu handle cmih_msrsrc
127  */
128 #define	CMI_MSR_FLAG_RD_HWOK		0x0001
129 #define	CMI_MSR_FLAG_RD_INTERPOSEOK	0x0002
130 #define	CMI_MSR_FLAG_WR_HWOK		0x0004
131 #define	CMI_MSR_FLAG_WR_INTERPOSEOK	0x0008
132 
133 int cmi_call_func_ntv_tries = 3;
134 
135 static cmi_errno_t
136 call_func_ntv(int cpuid, xc_func_t func, xc_arg_t arg1, xc_arg_t arg2)
137 {
138 	cmi_errno_t rc = -1;
139 	int i;
140 
141 	kpreempt_disable();
142 
143 	if (CPU->cpu_id == cpuid) {
144 		(*func)(arg1, arg2, (xc_arg_t)&rc);
145 	} else {
146 		/*
147 		 * This should not happen for a #MC trap or a poll, so
148 		 * this is likely an error injection or similar.
149 		 * We will try to cross call with xc_trycall - we
150 		 * can't guarantee success with xc_call because
151 		 * the interrupt code in the case of a #MC may
152 		 * already hold the xc mutex.
153 		 */
154 		for (i = 0; i < cmi_call_func_ntv_tries; i++) {
155 			cpuset_t cpus;
156 
157 			CPUSET_ONLY(cpus, cpuid);
158 			xc_trycall(arg1, arg2, (xc_arg_t)&rc, cpus, func);
159 			if (rc != -1)
160 				break;
161 
162 			DELAY(1);
163 		}
164 	}
165 
166 	kpreempt_enable();
167 
168 	return (rc != -1 ? rc : CMIERR_DEADLOCK);
169 }
170 
171 /*
172  *	 =======================================================
173  *	|	MSR Interposition				|
174  *	|	-----------------				|
175  *	|							|
176  *	 -------------------------------------------------------
177  */
178 
179 #define	CMI_MSRI_HASHSZ		16
180 #define	CMI_MSRI_HASHIDX(hdl, msr) \
181 	(((uintptr_t)(hdl) >> 3 + (msr)) % (CMI_MSRI_HASHSZ - 1))
182 
183 struct cmi_msri_bkt {
184 	kmutex_t msrib_lock;
185 	struct cmi_msri_hashent *msrib_head;
186 };
187 
188 struct cmi_msri_hashent {
189 	struct cmi_msri_hashent *msrie_next;
190 	struct cmi_msri_hashent *msrie_prev;
191 	cmi_hdl_impl_t *msrie_hdl;
192 	uint_t msrie_msrnum;
193 	uint64_t msrie_msrval;
194 };
195 
196 #define	CMI_MSRI_MATCH(ent, hdl, req_msr) \
197 	((ent)->msrie_hdl == (hdl) && (ent)->msrie_msrnum == (req_msr))
198 
199 static struct cmi_msri_bkt msrihash[CMI_MSRI_HASHSZ];
200 
201 static void
202 msri_addent(cmi_hdl_impl_t *hdl, cmi_mca_regs_t *regp)
203 {
204 	int idx = CMI_MSRI_HASHIDX(hdl, regp->cmr_msrnum);
205 	struct cmi_msri_bkt *hbp = &msrihash[idx];
206 	struct cmi_msri_hashent *hep;
207 
208 	mutex_enter(&hbp->msrib_lock);
209 
210 	for (hep = hbp->msrib_head; hep != NULL; hep = hep->msrie_next) {
211 		if (CMI_MSRI_MATCH(hep, hdl, regp->cmr_msrnum))
212 			break;
213 	}
214 
215 	if (hep != NULL) {
216 		hep->msrie_msrval = regp->cmr_msrval;
217 	} else {
218 		hep = kmem_alloc(sizeof (*hep), KM_SLEEP);
219 		hep->msrie_hdl = hdl;
220 		hep->msrie_msrnum = regp->cmr_msrnum;
221 		hep->msrie_msrval = regp->cmr_msrval;
222 
223 		if (hbp->msrib_head != NULL)
224 			hbp->msrib_head->msrie_prev = hep;
225 		hep->msrie_next = hbp->msrib_head;
226 		hep->msrie_prev = NULL;
227 		hbp->msrib_head = hep;
228 	}
229 
230 	mutex_exit(&hbp->msrib_lock);
231 }
232 
233 /*
234  * Look for a match for the given hanlde and msr.  Return 1 with valp
235  * filled if a match is found, otherwise return 0 with valp untouched.
236  */
237 static int
238 msri_lookup(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t *valp)
239 {
240 	int idx = CMI_MSRI_HASHIDX(hdl, msr);
241 	struct cmi_msri_bkt *hbp = &msrihash[idx];
242 	struct cmi_msri_hashent *hep;
243 
244 	/*
245 	 * This function is called during #MC trap handling, so we should
246 	 * consider the possibility that the hash mutex is held by the
247 	 * interrupted thread.  This should not happen because interposition
248 	 * is an artificial injection mechanism and the #MC is requested
249 	 * after adding entries, but just in case of a real #MC at an
250 	 * unlucky moment we'll use mutex_tryenter here.
251 	 */
252 	if (!mutex_tryenter(&hbp->msrib_lock))
253 		return (0);
254 
255 	for (hep = hbp->msrib_head; hep != NULL; hep = hep->msrie_next) {
256 		if (CMI_MSRI_MATCH(hep, hdl, msr)) {
257 			*valp = hep->msrie_msrval;
258 			break;
259 		}
260 	}
261 
262 	mutex_exit(&hbp->msrib_lock);
263 
264 	return (hep != NULL);
265 }
266 
267 /*
268  * Remove any interposed value that matches.
269  */
270 static void
271 msri_rment(cmi_hdl_impl_t *hdl, uint_t msr)
272 {
273 
274 	int idx = CMI_MSRI_HASHIDX(hdl, msr);
275 	struct cmi_msri_bkt *hbp = &msrihash[idx];
276 	struct cmi_msri_hashent *hep;
277 
278 	if (!mutex_tryenter(&hbp->msrib_lock))
279 		return;
280 
281 	for (hep = hbp->msrib_head; hep != NULL; hep = hep->msrie_next) {
282 		if (CMI_MSRI_MATCH(hep, hdl, msr)) {
283 			if (hep->msrie_prev != NULL)
284 				hep->msrie_prev->msrie_next = hep->msrie_next;
285 
286 			if (hep->msrie_next != NULL)
287 				hep->msrie_next->msrie_prev = hep->msrie_prev;
288 
289 			if (hbp->msrib_head == hep)
290 				hbp->msrib_head = hep->msrie_next;
291 
292 			kmem_free(hep, sizeof (*hep));
293 			break;
294 		}
295 	}
296 
297 	mutex_exit(&hbp->msrib_lock);
298 }
299 
300 /*
301  *	 =======================================================
302  *	|	PCI Config Space Interposition			|
303  *	|	------------------------------			|
304  *	|							|
305  *	 -------------------------------------------------------
306  */
307 
308 /*
309  * Hash for interposed PCI config space values.  We lookup on bus/dev/fun/offset
310  * and then record whether the value stashed was made with a byte, word or
311  * doubleword access;  we will only return a hit for an access of the
312  * same size.  If you access say a 32-bit register using byte accesses
313  * and then attempt to read the full 32-bit value back you will not obtain
314  * any sort of merged result - you get a lookup miss.
315  */
316 
317 #define	CMI_PCII_HASHSZ		16
318 #define	CMI_PCII_HASHIDX(b, d, f, o) \
319 	(((b) + (d) + (f) + (o)) % (CMI_PCII_HASHSZ - 1))
320 
321 struct cmi_pcii_bkt {
322 	kmutex_t pciib_lock;
323 	struct cmi_pcii_hashent *pciib_head;
324 };
325 
326 struct cmi_pcii_hashent {
327 	struct cmi_pcii_hashent *pcii_next;
328 	struct cmi_pcii_hashent *pcii_prev;
329 	int pcii_bus;
330 	int pcii_dev;
331 	int pcii_func;
332 	int pcii_reg;
333 	int pcii_asize;
334 	uint32_t pcii_val;
335 };
336 
337 #define	CMI_PCII_MATCH(ent, b, d, f, r, asz) \
338 	((ent)->pcii_bus == (b) && (ent)->pcii_dev == (d) && \
339 	(ent)->pcii_func == (f) && (ent)->pcii_reg == (r) && \
340 	(ent)->pcii_asize == (asz))
341 
342 static struct cmi_pcii_bkt pciihash[CMI_PCII_HASHSZ];
343 
344 
345 /*
346  * Add a new entry to the PCI interpose hash, overwriting any existing
347  * entry that is found.
348  */
349 static void
350 pcii_addent(int bus, int dev, int func, int reg, uint32_t val, int asz)
351 {
352 	int idx = CMI_PCII_HASHIDX(bus, dev, func, reg);
353 	struct cmi_pcii_bkt *hbp = &pciihash[idx];
354 	struct cmi_pcii_hashent *hep;
355 
356 	mutex_enter(&hbp->pciib_lock);
357 
358 	for (hep = hbp->pciib_head; hep != NULL; hep = hep->pcii_next) {
359 		if (CMI_PCII_MATCH(hep, bus, dev, func, reg, asz))
360 			break;
361 	}
362 
363 	if (hep != NULL) {
364 		hep->pcii_val = val;
365 	} else {
366 		hep = kmem_alloc(sizeof (*hep), KM_SLEEP);
367 		hep->pcii_bus = bus;
368 		hep->pcii_dev = dev;
369 		hep->pcii_func = func;
370 		hep->pcii_reg = reg;
371 		hep->pcii_asize = asz;
372 		hep->pcii_val = val;
373 
374 		if (hbp->pciib_head != NULL)
375 			hbp->pciib_head->pcii_prev = hep;
376 		hep->pcii_next = hbp->pciib_head;
377 		hep->pcii_prev = NULL;
378 		hbp->pciib_head = hep;
379 	}
380 
381 	mutex_exit(&hbp->pciib_lock);
382 }
383 
384 /*
385  * Look for a match for the given bus/dev/func/reg; return 1 with valp
386  * filled if a match is found, otherwise return 0 with valp untouched.
387  */
388 static int
389 pcii_lookup(int bus, int dev, int func, int reg, int asz, uint32_t *valp)
390 {
391 	int idx = CMI_PCII_HASHIDX(bus, dev, func, reg);
392 	struct cmi_pcii_bkt *hbp = &pciihash[idx];
393 	struct cmi_pcii_hashent *hep;
394 
395 	if (!mutex_tryenter(&hbp->pciib_lock))
396 		return (0);
397 
398 	for (hep = hbp->pciib_head; hep != NULL; hep = hep->pcii_next) {
399 		if (CMI_PCII_MATCH(hep, bus, dev, func, reg, asz)) {
400 			*valp = hep->pcii_val;
401 			break;
402 		}
403 	}
404 
405 	mutex_exit(&hbp->pciib_lock);
406 
407 	return (hep != NULL);
408 }
409 
410 static void
411 pcii_rment(int bus, int dev, int func, int reg, int asz)
412 {
413 	int idx = CMI_PCII_HASHIDX(bus, dev, func, reg);
414 	struct cmi_pcii_bkt *hbp = &pciihash[idx];
415 	struct cmi_pcii_hashent *hep;
416 
417 	mutex_enter(&hbp->pciib_lock);
418 
419 	for (hep = hbp->pciib_head; hep != NULL; hep = hep->pcii_next) {
420 		if (CMI_PCII_MATCH(hep, bus, dev, func, reg, asz)) {
421 			if (hep->pcii_prev != NULL)
422 				hep->pcii_prev->pcii_next = hep->pcii_next;
423 
424 			if (hep->pcii_next != NULL)
425 				hep->pcii_next->pcii_prev = hep->pcii_prev;
426 
427 			if (hbp->pciib_head == hep)
428 				hbp->pciib_head = hep->pcii_next;
429 
430 			kmem_free(hep, sizeof (*hep));
431 			break;
432 		}
433 	}
434 
435 	mutex_exit(&hbp->pciib_lock);
436 }
437 
438 /*
439  *	 =======================================================
440  *	|	Native methods					|
441  *	|	--------------					|
442  *	|							|
443  *	| These are used when we are running native on bare-	|
444  *	| metal, or simply don't know any better.		|
445  *	---------------------------------------------------------
446  */
447 
448 static uint_t
449 ntv_vendor(cmi_hdl_impl_t *hdl)
450 {
451 	return (cpuid_getvendor((cpu_t *)hdl->cmih_hdlpriv));
452 }
453 
454 static const char *
455 ntv_vendorstr(cmi_hdl_impl_t *hdl)
456 {
457 	return (cpuid_getvendorstr((cpu_t *)hdl->cmih_hdlpriv));
458 }
459 
460 static uint_t
461 ntv_family(cmi_hdl_impl_t *hdl)
462 {
463 	return (cpuid_getfamily((cpu_t *)hdl->cmih_hdlpriv));
464 }
465 
466 static uint_t
467 ntv_model(cmi_hdl_impl_t *hdl)
468 {
469 	return (cpuid_getmodel((cpu_t *)hdl->cmih_hdlpriv));
470 }
471 
472 static uint_t
473 ntv_stepping(cmi_hdl_impl_t *hdl)
474 {
475 	return (cpuid_getstep((cpu_t *)hdl->cmih_hdlpriv));
476 }
477 
478 static uint_t
479 ntv_chipid(cmi_hdl_impl_t *hdl)
480 {
481 	return (hdl->cmih_chipid);
482 
483 }
484 
485 static uint_t
486 ntv_coreid(cmi_hdl_impl_t *hdl)
487 {
488 	return (hdl->cmih_coreid);
489 }
490 
491 static uint_t
492 ntv_strandid(cmi_hdl_impl_t *hdl)
493 {
494 	return (hdl->cmih_strandid);
495 }
496 
497 static uint32_t
498 ntv_chiprev(cmi_hdl_impl_t *hdl)
499 {
500 	return (cpuid_getchiprev((cpu_t *)hdl->cmih_hdlpriv));
501 }
502 
503 static const char *
504 ntv_chiprevstr(cmi_hdl_impl_t *hdl)
505 {
506 	return (cpuid_getchiprevstr((cpu_t *)hdl->cmih_hdlpriv));
507 }
508 
509 static uint32_t
510 ntv_getsockettype(cmi_hdl_impl_t *hdl)
511 {
512 	return (cpuid_getsockettype((cpu_t *)hdl->cmih_hdlpriv));
513 }
514 
515 /*ARGSUSED*/
516 static int
517 ntv_getcr4_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
518 {
519 	ulong_t *dest = (ulong_t *)arg1;
520 	cmi_errno_t *rcp = (cmi_errno_t *)arg3;
521 
522 	*dest = getcr4();
523 	*rcp = CMI_SUCCESS;
524 
525 	return (0);
526 }
527 
528 static ulong_t
529 ntv_getcr4(cmi_hdl_impl_t *hdl)
530 {
531 	cpu_t *cp = (cpu_t *)hdl->cmih_hdlpriv;
532 	ulong_t val;
533 
534 	(void) call_func_ntv(cp->cpu_id, ntv_getcr4_xc, (xc_arg_t)&val, NULL);
535 
536 	return (val);
537 }
538 
539 /*ARGSUSED*/
540 static int
541 ntv_setcr4_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
542 {
543 	ulong_t val = (ulong_t)arg1;
544 	cmi_errno_t *rcp = (cmi_errno_t *)arg3;
545 
546 	setcr4(val);
547 	*rcp = CMI_SUCCESS;
548 
549 	return (0);
550 }
551 
552 static void
553 ntv_setcr4(cmi_hdl_impl_t *hdl, ulong_t val)
554 {
555 	cpu_t *cp = (cpu_t *)hdl->cmih_hdlpriv;
556 
557 	(void) call_func_ntv(cp->cpu_id, ntv_setcr4_xc, (xc_arg_t)val, NULL);
558 }
559 
560 volatile uint32_t cmi_trapped_rdmsr;
561 
562 /*ARGSUSED*/
563 static int
564 ntv_rdmsr_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
565 {
566 	uint_t msr = (uint_t)arg1;
567 	uint64_t *valp = (uint64_t *)arg2;
568 	cmi_errno_t *rcp = (cmi_errno_t *)arg3;
569 
570 	on_trap_data_t otd;
571 
572 	if (on_trap(&otd, OT_DATA_ACCESS) == 0) {
573 		if (checked_rdmsr(msr, valp) == 0)
574 			*rcp = CMI_SUCCESS;
575 		else
576 			*rcp = CMIERR_NOTSUP;
577 	} else {
578 		*rcp = CMIERR_MSRGPF;
579 		atomic_inc_32(&cmi_trapped_rdmsr);
580 	}
581 	no_trap();
582 
583 	return (0);
584 }
585 
586 static cmi_errno_t
587 ntv_rdmsr(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t *valp)
588 {
589 	cpu_t *cp = (cpu_t *)hdl->cmih_hdlpriv;
590 
591 	return (call_func_ntv(cp->cpu_id, ntv_rdmsr_xc,
592 	    (xc_arg_t)msr, (xc_arg_t)valp));
593 }
594 
595 volatile uint32_t cmi_trapped_wrmsr;
596 
597 /*ARGSUSED*/
598 static int
599 ntv_wrmsr_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
600 {
601 	uint_t msr = (uint_t)arg1;
602 	uint64_t val = *((uint64_t *)arg2);
603 	cmi_errno_t *rcp = (cmi_errno_t *)arg3;
604 	on_trap_data_t otd;
605 
606 	if (on_trap(&otd, OT_DATA_ACCESS) == 0) {
607 		if (checked_wrmsr(msr, val) == 0)
608 			*rcp = CMI_SUCCESS;
609 		else
610 			*rcp = CMIERR_NOTSUP;
611 	} else {
612 		*rcp = CMIERR_MSRGPF;
613 		atomic_inc_32(&cmi_trapped_wrmsr);
614 	}
615 	no_trap();
616 
617 	return (0);
618 
619 }
620 
621 static cmi_errno_t
622 ntv_wrmsr(cmi_hdl_impl_t *hdl, uint_t msr, uint64_t val)
623 {
624 	cpu_t *cp = (cpu_t *)hdl->cmih_hdlpriv;
625 
626 	return (call_func_ntv(cp->cpu_id, ntv_wrmsr_xc,
627 	    (xc_arg_t)msr, (xc_arg_t)&val));
628 }
629 
630 /*ARGSUSED*/
631 static int
632 ntv_mcheck_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
633 {
634 	cmi_errno_t *rcp = (cmi_errno_t *)arg3;
635 
636 	int18();
637 	*rcp = CMI_SUCCESS;
638 
639 	return (0);
640 }
641 
642 static void
643 ntv_mcheck(cmi_hdl_impl_t *hdl)
644 {
645 	cpu_t *cp = (cpu_t *)hdl->cmih_hdlpriv;
646 
647 	(void) call_func_ntv(cp->cpu_id, ntv_mcheck_xc, NULL, NULL);
648 }
649 
650 /*
651  * Ops structure for handle operations.
652  */
653 struct cmi_hdl_ops {
654 	uint_t (*cmio_vendor)(cmi_hdl_impl_t *);
655 	const char *(*cmio_vendorstr)(cmi_hdl_impl_t *);
656 	uint_t (*cmio_family)(cmi_hdl_impl_t *);
657 	uint_t (*cmio_model)(cmi_hdl_impl_t *);
658 	uint_t (*cmio_stepping)(cmi_hdl_impl_t *);
659 	uint_t (*cmio_chipid)(cmi_hdl_impl_t *);
660 	uint_t (*cmio_coreid)(cmi_hdl_impl_t *);
661 	uint_t (*cmio_strandid)(cmi_hdl_impl_t *);
662 	uint32_t (*cmio_chiprev)(cmi_hdl_impl_t *);
663 	const char *(*cmio_chiprevstr)(cmi_hdl_impl_t *);
664 	uint32_t (*cmio_getsockettype)(cmi_hdl_impl_t *);
665 	ulong_t (*cmio_getcr4)(cmi_hdl_impl_t *);
666 	void (*cmio_setcr4)(cmi_hdl_impl_t *, ulong_t);
667 	cmi_errno_t (*cmio_rdmsr)(cmi_hdl_impl_t *, uint_t, uint64_t *);
668 	cmi_errno_t (*cmio_wrmsr)(cmi_hdl_impl_t *, uint_t, uint64_t);
669 	void (*cmio_mcheck)(cmi_hdl_impl_t *);
670 } cmi_hdl_ops[] = {
671 	/*
672 	 * CMI_HDL_NATIVE - ops when apparently running on bare-metal
673 	 */
674 	{
675 		ntv_vendor,
676 		ntv_vendorstr,
677 		ntv_family,
678 		ntv_model,
679 		ntv_stepping,
680 		ntv_chipid,
681 		ntv_coreid,
682 		ntv_strandid,
683 		ntv_chiprev,
684 		ntv_chiprevstr,
685 		ntv_getsockettype,
686 		ntv_getcr4,
687 		ntv_setcr4,
688 		ntv_rdmsr,
689 		ntv_wrmsr,
690 		ntv_mcheck
691 	},
692 };
693 
694 #ifndef __xpv
695 static void *
696 cpu_search(enum cmi_hdl_class class, uint_t chipid, uint_t coreid,
697     uint_t strandid)
698 {
699 	switch (class) {
700 	case CMI_HDL_NATIVE: {
701 		cpu_t *cp, *startcp;
702 
703 		kpreempt_disable();
704 		cp = startcp = CPU;
705 		do {
706 			if (cmi_ntv_hwchipid(cp) == chipid &&
707 			    cmi_ntv_hwcoreid(cp) == coreid &&
708 			    cmi_ntv_hwstrandid(cp) == strandid) {
709 				kpreempt_enable();
710 				return ((void *)cp);
711 			}
712 
713 			cp = cp->cpu_next;
714 		} while (cp != startcp);
715 		kpreempt_enable();
716 		return (NULL);
717 	}
718 
719 	default:
720 		return (NULL);
721 	}
722 }
723 #endif
724 
725 cmi_hdl_t
726 cmi_hdl_create(enum cmi_hdl_class class, uint_t chipid, uint_t coreid,
727     uint_t strandid)
728 {
729 	cmi_hdl_impl_t *hdl;
730 	void *priv = NULL;
731 	int idx;
732 
733 	if (chipid > CMI_MAX_CHIPS - 1 || coreid > CMI_MAX_CORES_PER_CHIP - 1 ||
734 	    strandid > CMI_MAX_STRANDS_PER_CORE - 1)
735 		return (NULL);
736 
737 #ifndef __xpv
738 	if ((priv = cpu_search(class, chipid, coreid, strandid)) == NULL)
739 		return (NULL);
740 #endif
741 
742 	hdl = kmem_zalloc(sizeof (*hdl), KM_SLEEP);
743 
744 	hdl->cmih_class = class;
745 	hdl->cmih_ops = &cmi_hdl_ops[class];
746 	hdl->cmih_chipid = chipid;
747 	hdl->cmih_coreid = coreid;
748 	hdl->cmih_strandid = strandid;
749 	hdl->cmih_hdlpriv = priv;
750 	hdl->cmih_msrsrc = CMI_MSR_FLAG_RD_HWOK | CMI_MSR_FLAG_RD_INTERPOSEOK |
751 	    CMI_MSR_FLAG_WR_HWOK | CMI_MSR_FLAG_WR_INTERPOSEOK;
752 
753 	ASSERT(hdl->cmih_cmi == NULL && hdl->cmih_cmidata == NULL);
754 
755 	if (cmi_hdl_hash == NULL) {
756 		size_t sz = CMI_HDL_HASHSZ * sizeof (struct cmi_hdl_hashent);
757 		void *hash = kmem_zalloc(sz, KM_SLEEP);
758 
759 		if (atomic_cas_ptr(&cmi_hdl_hash, NULL, hash) != NULL)
760 			kmem_free(hash, sz); /* someone beat us */
761 	}
762 
763 	idx = CMI_HDL_HASHIDX(chipid, coreid, strandid);
764 	ASSERT(cmi_hdl_hash[idx].cmhe_refcnt == 0 &&
765 	    cmi_hdl_hash[idx].cmhe_hdlp == NULL);
766 
767 	/*
768 	 * Once we store a nonzero reference count others can find this
769 	 * handle via cmi_hdl_lookup etc.  This initial hold on the handle
770 	 * is to be dropped only if some other part of cmi initialization
771 	 * fails or, if it succeeds, at later cpu deconfigure.  Note the
772 	 * the module private data we hold in cmih_cmi and cmih_cmidata
773 	 * is still NULL at this point (the caller will fill it with
774 	 * cmi_hdl_setcmi if it initializes) so consumers of handles
775 	 * should always be ready for that possibility.
776 	 */
777 	cmi_hdl_hash[idx].cmhe_hdlp = hdl;
778 	hdl->cmih_refcntp = &cmi_hdl_hash[idx].cmhe_refcnt;
779 	cmi_hdl_hash[idx].cmhe_refcnt = 1;
780 
781 	return ((cmi_hdl_t)hdl);
782 }
783 
784 void
785 cmi_hdl_hold(cmi_hdl_t ophdl)
786 {
787 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
788 
789 	ASSERT(*hdl->cmih_refcntp != 0); /* must not be the initial hold */
790 
791 	atomic_inc_32(hdl->cmih_refcntp);
792 }
793 
794 static int
795 cmi_hdl_canref(int hashidx)
796 {
797 	volatile uint32_t *refcntp;
798 	uint32_t refcnt;
799 
800 	if (cmi_hdl_hash == NULL)
801 		return (0);
802 
803 	refcntp = &cmi_hdl_hash[hashidx].cmhe_refcnt;
804 	refcnt = *refcntp;
805 
806 	if (refcnt == 0) {
807 		/*
808 		 * Associated object never existed, is being destroyed,
809 		 * or has been destroyed.
810 		 */
811 		return (0);
812 	}
813 
814 	/*
815 	 * We cannot use atomic increment here because once the reference
816 	 * count reaches zero it must never be bumped up again.
817 	 */
818 	while (refcnt != 0) {
819 		if (atomic_cas_32(refcntp, refcnt, refcnt + 1) == refcnt)
820 			return (1);
821 		refcnt = *refcntp;
822 	}
823 
824 	/*
825 	 * Somebody dropped the reference count to 0 after our initial
826 	 * check.
827 	 */
828 	return (0);
829 }
830 
831 
832 void
833 cmi_hdl_rele(cmi_hdl_t ophdl)
834 {
835 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
836 	int idx;
837 
838 	ASSERT(*hdl->cmih_refcntp > 0);
839 
840 	if (atomic_dec_32_nv(hdl->cmih_refcntp) > 0)
841 		return;
842 
843 	idx = CMI_HDL_HASHIDX(hdl->cmih_chipid, hdl->cmih_coreid,
844 	    hdl->cmih_strandid);
845 	cmi_hdl_hash[idx].cmhe_hdlp = NULL;
846 
847 	kmem_free(hdl, sizeof (*hdl));
848 }
849 
850 void
851 cmi_hdl_setspecific(cmi_hdl_t ophdl, void *arg)
852 {
853 	IMPLHDL(ophdl)->cmih_spec = arg;
854 }
855 
856 void *
857 cmi_hdl_getspecific(cmi_hdl_t ophdl)
858 {
859 	return (IMPLHDL(ophdl)->cmih_spec);
860 }
861 
862 void
863 cmi_hdl_setmc(cmi_hdl_t ophdl, const struct cmi_mc_ops *mcops, void *mcdata)
864 {
865 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
866 
867 	ASSERT(hdl->cmih_mcops == NULL && hdl->cmih_mcdata == NULL);
868 	hdl->cmih_mcops = mcops;
869 	hdl->cmih_mcdata = mcdata;
870 }
871 
872 const struct cmi_mc_ops *
873 cmi_hdl_getmcops(cmi_hdl_t ophdl)
874 {
875 	return (IMPLHDL(ophdl)->cmih_mcops);
876 }
877 
878 void *
879 cmi_hdl_getmcdata(cmi_hdl_t ophdl)
880 {
881 	return (IMPLHDL(ophdl)->cmih_mcdata);
882 }
883 
884 cmi_hdl_t
885 cmi_hdl_lookup(enum cmi_hdl_class class, uint_t chipid, uint_t coreid,
886     uint_t strandid)
887 {
888 	int idx = CMI_HDL_HASHIDX(chipid, coreid, strandid);
889 
890 	if (!cmi_hdl_canref(idx))
891 		return (NULL);
892 
893 	if (cmi_hdl_hash[idx].cmhe_hdlp->cmih_class != class) {
894 		cmi_hdl_rele((cmi_hdl_t)cmi_hdl_hash[idx].cmhe_hdlp);
895 		return (NULL);
896 	}
897 
898 	return ((cmi_hdl_t)cmi_hdl_hash[idx].cmhe_hdlp);
899 }
900 
901 cmi_hdl_t
902 cmi_hdl_any(void)
903 {
904 	int i;
905 
906 	for (i = 0; i < CMI_HDL_HASHSZ; i++) {
907 		if (cmi_hdl_canref(i))
908 			return ((cmi_hdl_t)cmi_hdl_hash[i].cmhe_hdlp);
909 	}
910 
911 	return (NULL);
912 }
913 
914 void
915 cmi_hdl_walk(int (*cbfunc)(cmi_hdl_t, void *, void *, void *),
916     void *arg1, void *arg2, void *arg3)
917 {
918 	int i;
919 
920 	for (i = 0; i < CMI_HDL_HASHSZ; i++) {
921 		if (cmi_hdl_canref(i)) {
922 			cmi_hdl_impl_t *hdl = cmi_hdl_hash[i].cmhe_hdlp;
923 
924 			if ((*cbfunc)((cmi_hdl_t)hdl, arg1, arg2, arg3) ==
925 			    CMI_HDL_WALK_DONE) {
926 				cmi_hdl_rele((cmi_hdl_t)hdl);
927 				break;
928 			}
929 			cmi_hdl_rele((cmi_hdl_t)hdl);
930 		}
931 	}
932 }
933 
934 void
935 cmi_hdl_setcmi(cmi_hdl_t ophdl, void *cmi, void *cmidata)
936 {
937 	IMPLHDL(ophdl)->cmih_cmidata = cmidata;
938 	IMPLHDL(ophdl)->cmih_cmi = cmi;
939 }
940 
941 void *
942 cmi_hdl_getcmi(cmi_hdl_t ophdl)
943 {
944 	return (IMPLHDL(ophdl)->cmih_cmi);
945 }
946 
947 void *
948 cmi_hdl_getcmidata(cmi_hdl_t ophdl)
949 {
950 	return (IMPLHDL(ophdl)->cmih_cmidata);
951 }
952 
953 enum cmi_hdl_class
954 cmi_hdl_class(cmi_hdl_t ophdl)
955 {
956 	return (IMPLHDL(ophdl)->cmih_class);
957 }
958 
959 #define	CMI_HDL_OPFUNC(what, type)				\
960 	type							\
961 	cmi_hdl_##what(cmi_hdl_t ophdl)				\
962 	{							\
963 		return (IMPLHDL(ophdl)->cmih_ops->		\
964 		    cmio_##what(IMPLHDL(ophdl)));		\
965 	}
966 
967 CMI_HDL_OPFUNC(vendor, uint_t)
968 CMI_HDL_OPFUNC(vendorstr, const char *)
969 CMI_HDL_OPFUNC(family, uint_t)
970 CMI_HDL_OPFUNC(model, uint_t)
971 CMI_HDL_OPFUNC(stepping, uint_t)
972 CMI_HDL_OPFUNC(chipid, uint_t)
973 CMI_HDL_OPFUNC(coreid, uint_t)
974 CMI_HDL_OPFUNC(strandid, uint_t)
975 CMI_HDL_OPFUNC(chiprev, uint32_t)
976 CMI_HDL_OPFUNC(chiprevstr, const char *)
977 CMI_HDL_OPFUNC(getsockettype, uint32_t)
978 
979 void
980 cmi_hdl_mcheck(cmi_hdl_t ophdl)
981 {
982 	IMPLHDL(ophdl)->cmih_ops->cmio_mcheck(IMPLHDL(ophdl));
983 }
984 
985 #ifndef	__xpv
986 /*
987  * Return hardware chip instance; cpuid_get_chipid provides this directly.
988  */
989 uint_t
990 cmi_ntv_hwchipid(cpu_t *cp)
991 {
992 	return (cpuid_get_chipid(cp));
993 }
994 
995 /*
996  * Return core instance within a single chip.  cpuid_get_coreid numbers cores
997  * across all chips with the same number of cores on each chip and counting
998  * all cores of chip N before moving on to count the cores of chip N + 1.
999  */
1000 uint_t
1001 cmi_ntv_hwcoreid(cpu_t *cp)
1002 {
1003 	return (cpuid_get_coreid(cp) % cpuid_get_ncore_per_chip(cp));
1004 }
1005 
1006 /*
1007  * Return strand number within a single core.  cpuid_get_clogid numbers
1008  * all execution units (strands, or cores in unstranded models) sequentially
1009  * within a single chip.
1010  */
1011 uint_t
1012 cmi_ntv_hwstrandid(cpu_t *cp)
1013 {
1014 	int strands_per_core = cpuid_get_ncpu_per_chip(cp) /
1015 	    cpuid_get_ncore_per_chip(cp);
1016 
1017 	return (cpuid_get_clogid(cp) % strands_per_core);
1018 }
1019 #endif	/* __xpv */
1020 
1021 void
1022 cmi_hdlconf_rdmsr_nohw(cmi_hdl_t ophdl)
1023 {
1024 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1025 
1026 	hdl->cmih_msrsrc &= ~CMI_MSR_FLAG_RD_HWOK;
1027 }
1028 
1029 void
1030 cmi_hdlconf_wrmsr_nohw(cmi_hdl_t ophdl)
1031 {
1032 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1033 
1034 	hdl->cmih_msrsrc &= ~CMI_MSR_FLAG_WR_HWOK;
1035 }
1036 
1037 cmi_errno_t
1038 cmi_hdl_rdmsr(cmi_hdl_t ophdl, uint_t msr, uint64_t *valp)
1039 {
1040 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1041 
1042 	/*
1043 	 * Regardless of the handle class, we first check for am
1044 	 * interposed value.  In the xVM case you probably want to
1045 	 * place interposed values within the hypervisor itself, but
1046 	 * we still allow interposing them in dom0 for test and bringup
1047 	 * purposes.
1048 	 */
1049 	if ((hdl->cmih_msrsrc & CMI_MSR_FLAG_RD_INTERPOSEOK) &&
1050 	    msri_lookup(hdl, msr, valp))
1051 		return (CMI_SUCCESS);
1052 
1053 	if (!(hdl->cmih_msrsrc & CMI_MSR_FLAG_RD_HWOK))
1054 		return (CMIERR_INTERPOSE);
1055 
1056 	return (hdl->cmih_ops->cmio_rdmsr(hdl, msr, valp));
1057 }
1058 
1059 cmi_errno_t
1060 cmi_hdl_wrmsr(cmi_hdl_t ophdl, uint_t msr, uint64_t val)
1061 {
1062 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1063 
1064 	/* Invalidate any interposed value */
1065 	msri_rment(hdl, msr);
1066 
1067 	if (!(hdl->cmih_msrsrc & CMI_MSR_FLAG_WR_HWOK))
1068 		return (CMI_SUCCESS);
1069 
1070 	return (hdl->cmih_ops->cmio_wrmsr(hdl, msr, val));
1071 }
1072 
1073 void
1074 cmi_hdl_enable_mce(cmi_hdl_t ophdl)
1075 {
1076 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1077 	ulong_t cr4 = hdl->cmih_ops->cmio_getcr4(hdl);
1078 
1079 	hdl->cmih_ops->cmio_setcr4(hdl, cr4 | CR4_MCE);
1080 }
1081 
1082 void
1083 cmi_hdl_msrinterpose(cmi_hdl_t ophdl, cmi_mca_regs_t *regs, uint_t nregs)
1084 {
1085 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
1086 	int i;
1087 
1088 	for (i = 0; i < nregs; i++)
1089 		msri_addent(hdl, regs++);
1090 }
1091 
1092 void
1093 cmi_pcird_nohw(void)
1094 {
1095 	cmi_pcicfg_flags &= ~CMI_PCICFG_FLAG_RD_HWOK;
1096 }
1097 
1098 void
1099 cmi_pciwr_nohw(void)
1100 {
1101 	cmi_pcicfg_flags &= ~CMI_PCICFG_FLAG_WR_HWOK;
1102 }
1103 
1104 static uint32_t
1105 cmi_pci_get_cmn(int bus, int dev, int func, int reg, int asz,
1106     int *interpose, ddi_acc_handle_t hdl)
1107 {
1108 	uint32_t val;
1109 
1110 	if (cmi_pcicfg_flags & CMI_PCICFG_FLAG_RD_INTERPOSEOK &&
1111 	    pcii_lookup(bus, dev, func, reg, asz, &val)) {
1112 		if (interpose)
1113 			*interpose = 1;
1114 		return (val);
1115 	}
1116 	if (interpose)
1117 		*interpose = 0;
1118 
1119 	if (!(cmi_pcicfg_flags & CMI_PCICFG_FLAG_RD_HWOK))
1120 		return (0);
1121 
1122 	switch (asz) {
1123 	case 1:
1124 		if (hdl)
1125 			val = pci_config_get8(hdl, (off_t)reg);
1126 		else
1127 			val = (*pci_getb_func)(bus, dev, func, reg);
1128 		break;
1129 	case 2:
1130 		if (hdl)
1131 			val = pci_config_get16(hdl, (off_t)reg);
1132 		else
1133 			val = (*pci_getw_func)(bus, dev, func, reg);
1134 		break;
1135 	case 4:
1136 		if (hdl)
1137 			val = pci_config_get32(hdl, (off_t)reg);
1138 		else
1139 			val = (*pci_getl_func)(bus, dev, func, reg);
1140 		break;
1141 	default:
1142 		val = 0;
1143 	}
1144 	return (val);
1145 }
1146 
1147 uint8_t
1148 cmi_pci_getb(int bus, int dev, int func, int reg, int *interpose,
1149     ddi_acc_handle_t hdl)
1150 {
1151 	return ((uint8_t)cmi_pci_get_cmn(bus, dev, func, reg, 1, interpose,
1152 	    hdl));
1153 }
1154 
1155 uint16_t
1156 cmi_pci_getw(int bus, int dev, int func, int reg, int *interpose,
1157     ddi_acc_handle_t hdl)
1158 {
1159 	return ((uint16_t)cmi_pci_get_cmn(bus, dev, func, reg, 2, interpose,
1160 	    hdl));
1161 }
1162 
1163 uint32_t
1164 cmi_pci_getl(int bus, int dev, int func, int reg, int *interpose,
1165     ddi_acc_handle_t hdl)
1166 {
1167 	return (cmi_pci_get_cmn(bus, dev, func, reg, 4, interpose, hdl));
1168 }
1169 
1170 void
1171 cmi_pci_interposeb(int bus, int dev, int func, int reg, uint8_t val)
1172 {
1173 	pcii_addent(bus, dev, func, reg, val, 1);
1174 }
1175 
1176 void
1177 cmi_pci_interposew(int bus, int dev, int func, int reg, uint16_t val)
1178 {
1179 	pcii_addent(bus, dev, func, reg, val, 2);
1180 }
1181 
1182 void
1183 cmi_pci_interposel(int bus, int dev, int func, int reg, uint32_t val)
1184 {
1185 	pcii_addent(bus, dev, func, reg, val, 4);
1186 }
1187 
1188 static void
1189 cmi_pci_put_cmn(int bus, int dev, int func, int reg, int asz,
1190     ddi_acc_handle_t hdl, uint32_t val)
1191 {
1192 	/*
1193 	 * If there is an interposed value for this register invalidate it.
1194 	 */
1195 	pcii_rment(bus, dev, func, reg, asz);
1196 
1197 	if (!(cmi_pcicfg_flags & CMI_PCICFG_FLAG_WR_HWOK))
1198 		return;
1199 
1200 	switch (asz) {
1201 	case 1:
1202 		if (hdl)
1203 			pci_config_put8(hdl, (off_t)reg, (uint8_t)val);
1204 		else
1205 			(*pci_putb_func)(bus, dev, func, reg, (uint8_t)val);
1206 		break;
1207 
1208 	case 2:
1209 		if (hdl)
1210 			pci_config_put16(hdl, (off_t)reg, (uint16_t)val);
1211 		else
1212 			(*pci_putw_func)(bus, dev, func, reg, (uint16_t)val);
1213 		break;
1214 
1215 	case 4:
1216 		if (hdl)
1217 			pci_config_put32(hdl, (off_t)reg, val);
1218 		else
1219 			(*pci_putl_func)(bus, dev, func, reg, val);
1220 		break;
1221 
1222 	default:
1223 		break;
1224 	}
1225 }
1226 
1227 extern void
1228 cmi_pci_putb(int bus, int dev, int func, int reg, ddi_acc_handle_t hdl,
1229     uint8_t val)
1230 {
1231 	cmi_pci_put_cmn(bus, dev, func, reg, 1, hdl, val);
1232 }
1233 
1234 extern void
1235 cmi_pci_putw(int bus, int dev, int func, int reg, ddi_acc_handle_t hdl,
1236     uint16_t val)
1237 {
1238 	cmi_pci_put_cmn(bus, dev, func, reg, 2, hdl, val);
1239 }
1240 
1241 extern void
1242 cmi_pci_putl(int bus, int dev, int func, int reg, ddi_acc_handle_t hdl,
1243     uint32_t val)
1244 {
1245 	cmi_pci_put_cmn(bus, dev, func, reg, 4, hdl, val);
1246 }
1247