xref: /freebsd/sys/dev/hwpmc/hwpmc_core.c (revision 7ef62cebc2f965b0f640263e179276928885e33d)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2008 Joseph Koshy
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * Intel Core PMCs.
31  */
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 
36 #include <sys/param.h>
37 #include <sys/bus.h>
38 #include <sys/pmc.h>
39 #include <sys/pmckern.h>
40 #include <sys/smp.h>
41 #include <sys/systm.h>
42 
43 #include <machine/intr_machdep.h>
44 #include <x86/apicvar.h>
45 #include <machine/cpu.h>
46 #include <machine/cpufunc.h>
47 #include <machine/md_var.h>
48 #include <machine/specialreg.h>
49 
50 #define	CORE_CPUID_REQUEST		0xA
51 #define	CORE_CPUID_REQUEST_SIZE		0x4
52 #define	CORE_CPUID_EAX			0x0
53 #define	CORE_CPUID_EBX			0x1
54 #define	CORE_CPUID_ECX			0x2
55 #define	CORE_CPUID_EDX			0x3
56 
57 #define	IAF_PMC_CAPS			\
58 	(PMC_CAP_READ | PMC_CAP_WRITE | PMC_CAP_INTERRUPT | \
59 	 PMC_CAP_USER | PMC_CAP_SYSTEM)
60 #define	IAF_RI_TO_MSR(RI)		((RI) + (1 << 30))
61 
62 #define	IAP_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_USER | PMC_CAP_SYSTEM | \
63     PMC_CAP_EDGE | PMC_CAP_THRESHOLD | PMC_CAP_READ | PMC_CAP_WRITE |	 \
64     PMC_CAP_INVERT | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE)
65 
66 #define	EV_IS_NOTARCH		0
67 #define	EV_IS_ARCH_SUPP		1
68 #define	EV_IS_ARCH_NOTSUPP	-1
69 
70 /*
71  * "Architectural" events defined by Intel.  The values of these
72  * symbols correspond to positions in the bitmask returned by
73  * the CPUID.0AH instruction.
74  */
75 enum core_arch_events {
76 	CORE_AE_BRANCH_INSTRUCTION_RETIRED	= 5,
77 	CORE_AE_BRANCH_MISSES_RETIRED		= 6,
78 	CORE_AE_INSTRUCTION_RETIRED		= 1,
79 	CORE_AE_LLC_MISSES			= 4,
80 	CORE_AE_LLC_REFERENCE			= 3,
81 	CORE_AE_UNHALTED_REFERENCE_CYCLES	= 2,
82 	CORE_AE_UNHALTED_CORE_CYCLES		= 0
83 };
84 
85 static enum pmc_cputype	core_cputype;
86 static int core_version;
87 
88 struct core_cpu {
89 	volatile uint32_t	pc_iafctrl;	/* Fixed function control. */
90 	volatile uint64_t	pc_globalctrl;	/* Global control register. */
91 	struct pmc_hw		pc_corepmcs[];
92 };
93 
94 static struct core_cpu **core_pcpu;
95 
96 static uint32_t core_architectural_events;
97 static uint64_t core_pmcmask;
98 
99 static int core_iaf_ri;		/* relative index of fixed counters */
100 static int core_iaf_width;
101 static int core_iaf_npmc;
102 
103 static int core_iap_width;
104 static int core_iap_npmc;
105 static int core_iap_wroffset;
106 
107 static u_int pmc_alloc_refs;
108 static bool pmc_tsx_force_abort_set;
109 
110 static int
111 core_pcpu_noop(struct pmc_mdep *md, int cpu)
112 {
113 	(void) md;
114 	(void) cpu;
115 	return (0);
116 }
117 
118 static int
119 core_pcpu_init(struct pmc_mdep *md, int cpu)
120 {
121 	struct pmc_cpu *pc;
122 	struct core_cpu *cc;
123 	struct pmc_hw *phw;
124 	int core_ri, n, npmc;
125 
126 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
127 	    ("[iaf,%d] insane cpu number %d", __LINE__, cpu));
128 
129 	PMCDBG1(MDP,INI,1,"core-init cpu=%d", cpu);
130 
131 	core_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_ri;
132 	npmc = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_num;
133 
134 	if (core_version >= 2)
135 		npmc += md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAF].pcd_num;
136 
137 	cc = malloc(sizeof(struct core_cpu) + npmc * sizeof(struct pmc_hw),
138 	    M_PMC, M_WAITOK | M_ZERO);
139 
140 	core_pcpu[cpu] = cc;
141 	pc = pmc_pcpu[cpu];
142 
143 	KASSERT(pc != NULL && cc != NULL,
144 	    ("[core,%d] NULL per-cpu structures cpu=%d", __LINE__, cpu));
145 
146 	for (n = 0, phw = cc->pc_corepmcs; n < npmc; n++, phw++) {
147 		phw->phw_state 	  = PMC_PHW_FLAG_IS_ENABLED |
148 		    PMC_PHW_CPU_TO_STATE(cpu) |
149 		    PMC_PHW_INDEX_TO_STATE(n + core_ri);
150 		phw->phw_pmc	  = NULL;
151 		pc->pc_hwpmcs[n + core_ri]  = phw;
152 	}
153 
154 	if (core_version >= 2 && vm_guest == VM_GUEST_NO) {
155 		/* Enable Freezing PMCs on PMI. */
156 		wrmsr(MSR_DEBUGCTLMSR, rdmsr(MSR_DEBUGCTLMSR) | 0x1000);
157 	}
158 
159 	return (0);
160 }
161 
162 static int
163 core_pcpu_fini(struct pmc_mdep *md, int cpu)
164 {
165 	int core_ri, n, npmc;
166 	struct pmc_cpu *pc;
167 	struct core_cpu *cc;
168 
169 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
170 	    ("[core,%d] insane cpu number (%d)", __LINE__, cpu));
171 
172 	PMCDBG1(MDP,INI,1,"core-pcpu-fini cpu=%d", cpu);
173 
174 	if ((cc = core_pcpu[cpu]) == NULL)
175 		return (0);
176 
177 	core_pcpu[cpu] = NULL;
178 
179 	pc = pmc_pcpu[cpu];
180 
181 	KASSERT(pc != NULL, ("[core,%d] NULL per-cpu %d state", __LINE__,
182 		cpu));
183 
184 	npmc = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_num;
185 	core_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_ri;
186 
187 	for (n = 0; n < npmc; n++)
188 		wrmsr(IAP_EVSEL0 + n, 0);
189 
190 	if (core_version >= 2) {
191 		wrmsr(IAF_CTRL, 0);
192 		npmc += md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAF].pcd_num;
193 	}
194 
195 	for (n = 0; n < npmc; n++)
196 		pc->pc_hwpmcs[n + core_ri] = NULL;
197 
198 	free(cc, M_PMC);
199 
200 	return (0);
201 }
202 
203 /*
204  * Fixed function counters.
205  */
206 
207 static pmc_value_t
208 iaf_perfctr_value_to_reload_count(pmc_value_t v)
209 {
210 
211 	/* If the PMC has overflowed, return a reload count of zero. */
212 	if ((v & (1ULL << (core_iaf_width - 1))) == 0)
213 		return (0);
214 	v &= (1ULL << core_iaf_width) - 1;
215 	return (1ULL << core_iaf_width) - v;
216 }
217 
218 static pmc_value_t
219 iaf_reload_count_to_perfctr_value(pmc_value_t rlc)
220 {
221 	return (1ULL << core_iaf_width) - rlc;
222 }
223 
224 static int
225 iaf_allocate_pmc(int cpu, int ri, struct pmc *pm,
226     const struct pmc_op_pmcallocate *a)
227 {
228 	uint8_t ev, umask;
229 	uint32_t caps;
230 	uint64_t config, flags;
231 	const struct pmc_md_iap_op_pmcallocate *iap;
232 
233 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
234 	    ("[core,%d] illegal CPU %d", __LINE__, cpu));
235 
236 	PMCDBG2(MDP,ALL,1, "iaf-allocate ri=%d reqcaps=0x%x", ri, pm->pm_caps);
237 
238 	if (ri < 0 || ri > core_iaf_npmc)
239 		return (EINVAL);
240 
241 	if (a->pm_class != PMC_CLASS_IAF)
242 		return (EINVAL);
243 
244 	iap = &a->pm_md.pm_iap;
245 	config = iap->pm_iap_config;
246 	ev = IAP_EVSEL_GET(config);
247 	umask = IAP_UMASK_GET(config);
248 
249 	if (ev == 0x0) {
250 		if (umask != ri + 1)
251 			return (EINVAL);
252 	} else {
253 		switch (ri) {
254 		case 0:	/* INST_RETIRED.ANY */
255 			if (ev != 0xC0 || umask != 0x00)
256 				return (EINVAL);
257 			break;
258 		case 1:	/* CPU_CLK_UNHALTED.THREAD */
259 			if (ev != 0x3C || umask != 0x00)
260 				return (EINVAL);
261 			break;
262 		case 2:	/* CPU_CLK_UNHALTED.REF */
263 			if (ev != 0x3C || umask != 0x01)
264 				return (EINVAL);
265 			break;
266 		case 3:	/* TOPDOWN.SLOTS */
267 			if (ev != 0xA4 || umask != 0x01)
268 				return (EINVAL);
269 			break;
270 		default:
271 			return (EINVAL);
272 		}
273 	}
274 
275 	pmc_alloc_refs++;
276 	if ((cpu_stdext_feature3 & CPUID_STDEXT3_TSXFA) != 0 &&
277 	    !pmc_tsx_force_abort_set) {
278 		pmc_tsx_force_abort_set = true;
279 		x86_msr_op(MSR_TSX_FORCE_ABORT, MSR_OP_RENDEZVOUS_ALL |
280 		    MSR_OP_WRITE, 1, NULL);
281 	}
282 
283 	flags = 0;
284 	if (config & IAP_OS)
285 		flags |= IAF_OS;
286 	if (config & IAP_USR)
287 		flags |= IAF_USR;
288 	if (config & IAP_ANY)
289 		flags |= IAF_ANY;
290 	if (config & IAP_INT)
291 		flags |= IAF_PMI;
292 
293 	caps = a->pm_caps;
294 	if (caps & PMC_CAP_INTERRUPT)
295 		flags |= IAF_PMI;
296 	if (caps & PMC_CAP_SYSTEM)
297 		flags |= IAF_OS;
298 	if (caps & PMC_CAP_USER)
299 		flags |= IAF_USR;
300 	if ((caps & (PMC_CAP_USER | PMC_CAP_SYSTEM)) == 0)
301 		flags |= (IAF_OS | IAF_USR);
302 
303 	pm->pm_md.pm_iaf.pm_iaf_ctrl = (flags << (ri * 4));
304 
305 	PMCDBG1(MDP,ALL,2, "iaf-allocate config=0x%jx",
306 	    (uintmax_t) pm->pm_md.pm_iaf.pm_iaf_ctrl);
307 
308 	return (0);
309 }
310 
311 static int
312 iaf_config_pmc(int cpu, int ri, struct pmc *pm)
313 {
314 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
315 	    ("[core,%d] illegal CPU %d", __LINE__, cpu));
316 
317 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
318 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
319 
320 	PMCDBG3(MDP,CFG,1, "iaf-config cpu=%d ri=%d pm=%p", cpu, ri, pm);
321 
322 	KASSERT(core_pcpu[cpu] != NULL, ("[core,%d] null per-cpu %d", __LINE__,
323 	    cpu));
324 
325 	core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc = pm;
326 
327 	return (0);
328 }
329 
330 static int
331 iaf_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc)
332 {
333 	struct pmc_hw *phw;
334 
335 	phw = &core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri];
336 
337 	snprintf(pi->pm_name, sizeof(pi->pm_name), "IAF-%d", ri);
338 	pi->pm_class = PMC_CLASS_IAF;
339 
340 	if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) {
341 		pi->pm_enabled = TRUE;
342 		*ppmc          = phw->phw_pmc;
343 	} else {
344 		pi->pm_enabled = FALSE;
345 		*ppmc          = NULL;
346 	}
347 
348 	return (0);
349 }
350 
351 static int
352 iaf_get_config(int cpu, int ri, struct pmc **ppm)
353 {
354 	*ppm = core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc;
355 
356 	return (0);
357 }
358 
359 static int
360 iaf_get_msr(int ri, uint32_t *msr)
361 {
362 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
363 	    ("[iaf,%d] ri %d out of range", __LINE__, ri));
364 
365 	*msr = IAF_RI_TO_MSR(ri);
366 
367 	return (0);
368 }
369 
370 static int
371 iaf_read_pmc(int cpu, int ri, struct pmc *pm, pmc_value_t *v)
372 {
373 	pmc_value_t tmp;
374 
375 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
376 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
377 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
378 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
379 
380 	tmp = rdpmc(IAF_RI_TO_MSR(ri));
381 
382 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
383 		*v = iaf_perfctr_value_to_reload_count(tmp);
384 	else
385 		*v = tmp & ((1ULL << core_iaf_width) - 1);
386 
387 	PMCDBG4(MDP,REA,1, "iaf-read cpu=%d ri=%d msr=0x%x -> v=%jx", cpu, ri,
388 	    IAF_RI_TO_MSR(ri), *v);
389 
390 	return (0);
391 }
392 
393 static int
394 iaf_release_pmc(int cpu, int ri, struct pmc *pmc)
395 {
396 	PMCDBG3(MDP,REL,1, "iaf-release cpu=%d ri=%d pm=%p", cpu, ri, pmc);
397 
398 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
399 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
400 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
401 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
402 
403 	KASSERT(core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc == NULL,
404 	    ("[core,%d] PHW pmc non-NULL", __LINE__));
405 
406 	MPASS(pmc_alloc_refs > 0);
407 	if (pmc_alloc_refs-- == 1 && pmc_tsx_force_abort_set) {
408 		pmc_tsx_force_abort_set = false;
409 		x86_msr_op(MSR_TSX_FORCE_ABORT, MSR_OP_RENDEZVOUS_ALL |
410 		    MSR_OP_WRITE, 0, NULL);
411 	}
412 
413 	return (0);
414 }
415 
416 static int
417 iaf_start_pmc(int cpu, int ri, struct pmc *pm)
418 {
419 	struct core_cpu *cc;
420 
421 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
422 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
423 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
424 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
425 
426 	PMCDBG2(MDP,STA,1,"iaf-start cpu=%d ri=%d", cpu, ri);
427 
428 	cc = core_pcpu[cpu];
429 	cc->pc_iafctrl |= pm->pm_md.pm_iaf.pm_iaf_ctrl;
430 	wrmsr(IAF_CTRL, cc->pc_iafctrl);
431 
432 	cc->pc_globalctrl |= (1ULL << (ri + IAF_OFFSET));
433 	wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl);
434 
435 	PMCDBG4(MDP,STA,1,"iafctrl=%x(%x) globalctrl=%jx(%jx)",
436 	    cc->pc_iafctrl, (uint32_t) rdmsr(IAF_CTRL),
437 	    cc->pc_globalctrl, rdmsr(IA_GLOBAL_CTRL));
438 
439 	return (0);
440 }
441 
442 static int
443 iaf_stop_pmc(int cpu, int ri, struct pmc *pm)
444 {
445 	struct core_cpu *cc;
446 
447 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
448 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
449 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
450 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
451 
452 	PMCDBG2(MDP,STA,1,"iaf-stop cpu=%d ri=%d", cpu, ri);
453 
454 	cc = core_pcpu[cpu];
455 
456 	cc->pc_iafctrl &= ~(IAF_MASK << (ri * 4));
457 	wrmsr(IAF_CTRL, cc->pc_iafctrl);
458 
459 	/* Don't need to write IA_GLOBAL_CTRL, one disable is enough. */
460 
461 	PMCDBG4(MDP,STO,1,"iafctrl=%x(%x) globalctrl=%jx(%jx)",
462 	    cc->pc_iafctrl, (uint32_t) rdmsr(IAF_CTRL),
463 	    cc->pc_globalctrl, rdmsr(IA_GLOBAL_CTRL));
464 
465 	return (0);
466 }
467 
468 static int
469 iaf_write_pmc(int cpu, int ri, struct pmc *pm, pmc_value_t v)
470 {
471 	struct core_cpu *cc;
472 
473 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
474 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
475 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
476 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
477 
478 	cc = core_pcpu[cpu];
479 
480 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
481 		v = iaf_reload_count_to_perfctr_value(v);
482 
483 	/* Turn off the fixed counter */
484 	wrmsr(IAF_CTRL, cc->pc_iafctrl & ~(IAF_MASK << (ri * 4)));
485 
486 	wrmsr(IAF_CTR0 + ri, v & ((1ULL << core_iaf_width) - 1));
487 
488 	/* Turn on fixed counters */
489 	wrmsr(IAF_CTRL, cc->pc_iafctrl);
490 
491 	PMCDBG6(MDP,WRI,1, "iaf-write cpu=%d ri=%d msr=0x%x v=%jx iafctrl=%jx "
492 	    "pmc=%jx", cpu, ri, IAF_RI_TO_MSR(ri), v,
493 	    (uintmax_t) rdmsr(IAF_CTRL),
494 	    (uintmax_t) rdpmc(IAF_RI_TO_MSR(ri)));
495 
496 	return (0);
497 }
498 
499 
500 static void
501 iaf_initialize(struct pmc_mdep *md, int maxcpu, int npmc, int pmcwidth)
502 {
503 	struct pmc_classdep *pcd;
504 
505 	KASSERT(md != NULL, ("[iaf,%d] md is NULL", __LINE__));
506 
507 	PMCDBG0(MDP,INI,1, "iaf-initialize");
508 
509 	pcd = &md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAF];
510 
511 	pcd->pcd_caps	= IAF_PMC_CAPS;
512 	pcd->pcd_class	= PMC_CLASS_IAF;
513 	pcd->pcd_num	= npmc;
514 	pcd->pcd_ri	= md->pmd_npmc;
515 	pcd->pcd_width	= pmcwidth;
516 
517 	pcd->pcd_allocate_pmc	= iaf_allocate_pmc;
518 	pcd->pcd_config_pmc	= iaf_config_pmc;
519 	pcd->pcd_describe	= iaf_describe;
520 	pcd->pcd_get_config	= iaf_get_config;
521 	pcd->pcd_get_msr	= iaf_get_msr;
522 	pcd->pcd_pcpu_fini	= core_pcpu_noop;
523 	pcd->pcd_pcpu_init	= core_pcpu_noop;
524 	pcd->pcd_read_pmc	= iaf_read_pmc;
525 	pcd->pcd_release_pmc	= iaf_release_pmc;
526 	pcd->pcd_start_pmc	= iaf_start_pmc;
527 	pcd->pcd_stop_pmc	= iaf_stop_pmc;
528 	pcd->pcd_write_pmc	= iaf_write_pmc;
529 
530 	md->pmd_npmc	       += npmc;
531 }
532 
533 /*
534  * Intel programmable PMCs.
535  */
536 
537 /* Sub fields of UMASK that this event supports. */
538 #define	IAP_M_CORE		(1 << 0) /* Core specificity */
539 #define	IAP_M_AGENT		(1 << 1) /* Agent specificity */
540 #define	IAP_M_PREFETCH		(1 << 2) /* Prefetch */
541 #define	IAP_M_MESI		(1 << 3) /* MESI */
542 #define	IAP_M_SNOOPRESPONSE	(1 << 4) /* Snoop response */
543 #define	IAP_M_SNOOPTYPE		(1 << 5) /* Snoop type */
544 #define	IAP_M_TRANSITION	(1 << 6) /* Transition */
545 
546 #define	IAP_F_CORE		(0x3 << 14) /* Core specificity */
547 #define	IAP_F_AGENT		(0x1 << 13) /* Agent specificity */
548 #define	IAP_F_PREFETCH		(0x3 << 12) /* Prefetch */
549 #define	IAP_F_MESI		(0xF <<  8) /* MESI */
550 #define	IAP_F_SNOOPRESPONSE	(0xB <<  8) /* Snoop response */
551 #define	IAP_F_SNOOPTYPE		(0x3 <<  8) /* Snoop type */
552 #define	IAP_F_TRANSITION	(0x1 << 12) /* Transition */
553 
554 #define	IAP_PREFETCH_RESERVED	(0x2 << 12)
555 #define	IAP_CORE_THIS		(0x1 << 14)
556 #define	IAP_CORE_ALL		(0x3 << 14)
557 #define	IAP_F_CMASK		0xFF000000
558 
559 static pmc_value_t
560 iap_perfctr_value_to_reload_count(pmc_value_t v)
561 {
562 
563 	/* If the PMC has overflowed, return a reload count of zero. */
564 	if ((v & (1ULL << (core_iap_width - 1))) == 0)
565 		return (0);
566 	v &= (1ULL << core_iap_width) - 1;
567 	return (1ULL << core_iap_width) - v;
568 }
569 
570 static pmc_value_t
571 iap_reload_count_to_perfctr_value(pmc_value_t rlc)
572 {
573 	return (1ULL << core_iap_width) - rlc;
574 }
575 
576 static int
577 iap_pmc_has_overflowed(int ri)
578 {
579 	uint64_t v;
580 
581 	/*
582 	 * We treat a Core (i.e., Intel architecture v1) PMC as has
583 	 * having overflowed if its MSB is zero.
584 	 */
585 	v = rdpmc(ri);
586 	return ((v & (1ULL << (core_iap_width - 1))) == 0);
587 }
588 
589 static int
590 iap_event_corei7_ok_on_counter(uint8_t evsel, int ri)
591 {
592 	uint32_t mask;
593 
594 	switch (evsel) {
595 	/* Events valid only on counter 0, 1. */
596 	case 0x40:
597 	case 0x41:
598 	case 0x42:
599 	case 0x43:
600 	case 0x4C:
601 	case 0x4E:
602 	case 0x51:
603 	case 0x52:
604 	case 0x53:
605 	case 0x63:
606 		mask = 0x3;
607 		break;
608 	/* Any row index is ok. */
609 	default:
610 		mask = ~0;
611 	}
612 
613 	return (mask & (1 << ri));
614 }
615 
616 static int
617 iap_event_westmere_ok_on_counter(uint8_t evsel, int ri)
618 {
619 	uint32_t mask;
620 
621 	switch (evsel) {
622 	/* Events valid only on counter 0. */
623 	case 0x60:
624 	case 0xB3:
625 		mask = 0x1;
626 		break;
627 
628 	/* Events valid only on counter 0, 1. */
629 	case 0x4C:
630 	case 0x4E:
631 	case 0x51:
632 	case 0x52:
633 	case 0x63:
634 		mask = 0x3;
635 		break;
636 	/* Any row index is ok. */
637 	default:
638 		mask = ~0;
639 	}
640 
641 	return (mask & (1 << ri));
642 }
643 
644 static int
645 iap_event_sb_sbx_ib_ibx_ok_on_counter(uint8_t evsel, int ri)
646 {
647 	uint32_t mask;
648 
649 	switch (evsel) {
650 	/* Events valid only on counter 0. */
651 	case 0xB7:
652 		mask = 0x1;
653 		break;
654 	/* Events valid only on counter 1. */
655 	case 0xC0:
656 		mask = 0x2;
657 		break;
658 	/* Events valid only on counter 2. */
659 	case 0x48:
660 	case 0xA2:
661 	case 0xA3:
662 		mask = 0x4;
663 		break;
664 	/* Events valid only on counter 3. */
665 	case 0xBB:
666 	case 0xCD:
667 		mask = 0x8;
668 		break;
669 	/* Any row index is ok. */
670 	default:
671 		mask = ~0;
672 	}
673 
674 	return (mask & (1 << ri));
675 }
676 
677 static int
678 iap_event_core_ok_on_counter(uint8_t evsel, int ri)
679 {
680 	uint32_t mask;
681 
682 	switch (evsel) {
683 		/*
684 		 * Events valid only on counter 0.
685 		 */
686 	case 0x10:
687 	case 0x14:
688 	case 0x18:
689 	case 0xB3:
690 	case 0xC1:
691 	case 0xCB:
692 		mask = (1 << 0);
693 		break;
694 
695 		/*
696 		 * Events valid only on counter 1.
697 		 */
698 	case 0x11:
699 	case 0x12:
700 	case 0x13:
701 		mask = (1 << 1);
702 		break;
703 
704 	default:
705 		mask = ~0;	/* Any row index is ok. */
706 	}
707 
708 	return (mask & (1 << ri));
709 }
710 
711 static int
712 iap_allocate_pmc(int cpu, int ri, struct pmc *pm,
713     const struct pmc_op_pmcallocate *a)
714 {
715 	uint8_t ev;
716 	const struct pmc_md_iap_op_pmcallocate *iap;
717 
718 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
719 	    ("[core,%d] illegal CPU %d", __LINE__, cpu));
720 	KASSERT(ri >= 0 && ri < core_iap_npmc,
721 	    ("[core,%d] illegal row-index value %d", __LINE__, ri));
722 
723 	if (a->pm_class != PMC_CLASS_IAP)
724 		return (EINVAL);
725 
726 	iap = &a->pm_md.pm_iap;
727 	ev = IAP_EVSEL_GET(iap->pm_iap_config);
728 
729 	switch (core_cputype) {
730 	case PMC_CPU_INTEL_CORE:
731 	case PMC_CPU_INTEL_CORE2:
732 	case PMC_CPU_INTEL_CORE2EXTREME:
733 		if (iap_event_core_ok_on_counter(ev, ri) == 0)
734 			return (EINVAL);
735 	case PMC_CPU_INTEL_COREI7:
736 	case PMC_CPU_INTEL_NEHALEM_EX:
737 		if (iap_event_corei7_ok_on_counter(ev, ri) == 0)
738 			return (EINVAL);
739 		break;
740 	case PMC_CPU_INTEL_WESTMERE:
741 	case PMC_CPU_INTEL_WESTMERE_EX:
742 		if (iap_event_westmere_ok_on_counter(ev, ri) == 0)
743 			return (EINVAL);
744 		break;
745 	case PMC_CPU_INTEL_SANDYBRIDGE:
746 	case PMC_CPU_INTEL_SANDYBRIDGE_XEON:
747 	case PMC_CPU_INTEL_IVYBRIDGE:
748 	case PMC_CPU_INTEL_IVYBRIDGE_XEON:
749 	case PMC_CPU_INTEL_HASWELL:
750 	case PMC_CPU_INTEL_HASWELL_XEON:
751 	case PMC_CPU_INTEL_BROADWELL:
752 	case PMC_CPU_INTEL_BROADWELL_XEON:
753 		if (iap_event_sb_sbx_ib_ibx_ok_on_counter(ev, ri) == 0)
754 			return (EINVAL);
755 		break;
756 	case PMC_CPU_INTEL_ATOM:
757 	case PMC_CPU_INTEL_ATOM_SILVERMONT:
758 	case PMC_CPU_INTEL_ATOM_GOLDMONT:
759 	case PMC_CPU_INTEL_ATOM_GOLDMONT_P:
760 	case PMC_CPU_INTEL_ATOM_TREMONT:
761 	case PMC_CPU_INTEL_SKYLAKE:
762 	case PMC_CPU_INTEL_SKYLAKE_XEON:
763 	case PMC_CPU_INTEL_ICELAKE:
764 	case PMC_CPU_INTEL_ICELAKE_XEON:
765 	case PMC_CPU_INTEL_ALDERLAKE:
766 	default:
767 		break;
768 	}
769 
770 	pm->pm_md.pm_iap.pm_iap_evsel = iap->pm_iap_config;
771 	return (0);
772 }
773 
774 static int
775 iap_config_pmc(int cpu, int ri, struct pmc *pm)
776 {
777 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
778 	    ("[core,%d] illegal CPU %d", __LINE__, cpu));
779 
780 	KASSERT(ri >= 0 && ri < core_iap_npmc,
781 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
782 
783 	PMCDBG3(MDP,CFG,1, "iap-config cpu=%d ri=%d pm=%p", cpu, ri, pm);
784 
785 	KASSERT(core_pcpu[cpu] != NULL, ("[core,%d] null per-cpu %d", __LINE__,
786 	    cpu));
787 
788 	core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc = pm;
789 
790 	return (0);
791 }
792 
793 static int
794 iap_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc)
795 {
796 	struct pmc_hw *phw;
797 
798 	phw = &core_pcpu[cpu]->pc_corepmcs[ri];
799 
800 	snprintf(pi->pm_name, sizeof(pi->pm_name), "IAP-%d", ri);
801 	pi->pm_class = PMC_CLASS_IAP;
802 
803 	if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) {
804 		pi->pm_enabled = TRUE;
805 		*ppmc          = phw->phw_pmc;
806 	} else {
807 		pi->pm_enabled = FALSE;
808 		*ppmc          = NULL;
809 	}
810 
811 	return (0);
812 }
813 
814 static int
815 iap_get_config(int cpu, int ri, struct pmc **ppm)
816 {
817 	*ppm = core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc;
818 
819 	return (0);
820 }
821 
822 static int
823 iap_get_msr(int ri, uint32_t *msr)
824 {
825 	KASSERT(ri >= 0 && ri < core_iap_npmc,
826 	    ("[iap,%d] ri %d out of range", __LINE__, ri));
827 
828 	*msr = ri;
829 
830 	return (0);
831 }
832 
833 static int
834 iap_read_pmc(int cpu, int ri, struct pmc *pm, pmc_value_t *v)
835 {
836 	pmc_value_t tmp;
837 
838 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
839 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
840 	KASSERT(ri >= 0 && ri < core_iap_npmc,
841 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
842 
843 	tmp = rdpmc(ri);
844 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
845 		*v = iap_perfctr_value_to_reload_count(tmp);
846 	else
847 		*v = tmp & ((1ULL << core_iap_width) - 1);
848 
849 	PMCDBG4(MDP,REA,1, "iap-read cpu=%d ri=%d msr=0x%x -> v=%jx", cpu, ri,
850 	    IAP_PMC0 + ri, *v);
851 
852 	return (0);
853 }
854 
855 static int
856 iap_release_pmc(int cpu, int ri, struct pmc *pm)
857 {
858 	(void) pm;
859 
860 	PMCDBG3(MDP,REL,1, "iap-release cpu=%d ri=%d pm=%p", cpu, ri,
861 	    pm);
862 
863 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
864 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
865 	KASSERT(ri >= 0 && ri < core_iap_npmc,
866 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
867 
868 	KASSERT(core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc
869 	    == NULL, ("[core,%d] PHW pmc non-NULL", __LINE__));
870 
871 	return (0);
872 }
873 
874 static int
875 iap_start_pmc(int cpu, int ri, struct pmc *pm)
876 {
877 	uint64_t evsel;
878 	struct core_cpu *cc;
879 
880 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
881 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
882 	KASSERT(ri >= 0 && ri < core_iap_npmc,
883 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
884 
885 	cc = core_pcpu[cpu];
886 
887 	PMCDBG2(MDP,STA,1, "iap-start cpu=%d ri=%d", cpu, ri);
888 
889 	evsel = pm->pm_md.pm_iap.pm_iap_evsel;
890 
891 	PMCDBG4(MDP,STA,2, "iap-start/2 cpu=%d ri=%d evselmsr=0x%x evsel=0x%x",
892 	    cpu, ri, IAP_EVSEL0 + ri, evsel);
893 
894 	/* Event specific configuration. */
895 
896 	switch (IAP_EVSEL_GET(evsel)) {
897 	case 0xB7:
898 		wrmsr(IA_OFFCORE_RSP0, pm->pm_md.pm_iap.pm_iap_rsp);
899 		break;
900 	case 0xBB:
901 		wrmsr(IA_OFFCORE_RSP1, pm->pm_md.pm_iap.pm_iap_rsp);
902 		break;
903 	default:
904 		break;
905 	}
906 
907 	wrmsr(IAP_EVSEL0 + ri, evsel | IAP_EN);
908 
909 	if (core_version >= 2) {
910 		cc->pc_globalctrl |= (1ULL << ri);
911 		wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl);
912 	}
913 
914 	return (0);
915 }
916 
917 static int
918 iap_stop_pmc(int cpu, int ri, struct pmc *pm __unused)
919 {
920 
921 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
922 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
923 	KASSERT(ri >= 0 && ri < core_iap_npmc,
924 	    ("[core,%d] illegal row index %d", __LINE__, ri));
925 
926 	PMCDBG2(MDP,STO,1, "iap-stop cpu=%d ri=%d", cpu, ri);
927 
928 	wrmsr(IAP_EVSEL0 + ri, 0);
929 
930 	/* Don't need to write IA_GLOBAL_CTRL, one disable is enough. */
931 
932 	return (0);
933 }
934 
935 static int
936 iap_write_pmc(int cpu, int ri, struct pmc *pm, pmc_value_t v)
937 {
938 
939 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
940 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
941 	KASSERT(ri >= 0 && ri < core_iap_npmc,
942 	    ("[core,%d] illegal row index %d", __LINE__, ri));
943 
944 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
945 		v = iap_reload_count_to_perfctr_value(v);
946 
947 	v &= (1ULL << core_iap_width) - 1;
948 
949 	PMCDBG4(MDP,WRI,1, "iap-write cpu=%d ri=%d msr=0x%x v=%jx", cpu, ri,
950 	    IAP_PMC0 + ri, v);
951 
952 	/*
953 	 * Write the new value to the counter (or it's alias).  The
954 	 * counter will be in a stopped state when the pcd_write()
955 	 * entry point is called.
956 	 */
957 	wrmsr(core_iap_wroffset + IAP_PMC0 + ri, v);
958 	return (0);
959 }
960 
961 
962 static void
963 iap_initialize(struct pmc_mdep *md, int maxcpu, int npmc, int pmcwidth,
964     int flags)
965 {
966 	struct pmc_classdep *pcd;
967 
968 	KASSERT(md != NULL, ("[iap,%d] md is NULL", __LINE__));
969 
970 	PMCDBG0(MDP,INI,1, "iap-initialize");
971 
972 	/* Remember the set of architectural events supported. */
973 	core_architectural_events = ~flags;
974 
975 	pcd = &md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP];
976 
977 	pcd->pcd_caps	= IAP_PMC_CAPS;
978 	pcd->pcd_class	= PMC_CLASS_IAP;
979 	pcd->pcd_num	= npmc;
980 	pcd->pcd_ri	= md->pmd_npmc;
981 	pcd->pcd_width	= pmcwidth;
982 
983 	pcd->pcd_allocate_pmc	= iap_allocate_pmc;
984 	pcd->pcd_config_pmc	= iap_config_pmc;
985 	pcd->pcd_describe	= iap_describe;
986 	pcd->pcd_get_config	= iap_get_config;
987 	pcd->pcd_get_msr	= iap_get_msr;
988 	pcd->pcd_pcpu_fini	= core_pcpu_fini;
989 	pcd->pcd_pcpu_init	= core_pcpu_init;
990 	pcd->pcd_read_pmc	= iap_read_pmc;
991 	pcd->pcd_release_pmc	= iap_release_pmc;
992 	pcd->pcd_start_pmc	= iap_start_pmc;
993 	pcd->pcd_stop_pmc	= iap_stop_pmc;
994 	pcd->pcd_write_pmc	= iap_write_pmc;
995 
996 	md->pmd_npmc	       += npmc;
997 }
998 
999 static int
1000 core_intr(struct trapframe *tf)
1001 {
1002 	pmc_value_t v;
1003 	struct pmc *pm;
1004 	struct core_cpu *cc;
1005 	int error, found_interrupt, ri;
1006 
1007 	PMCDBG3(MDP,INT, 1, "cpu=%d tf=%p um=%d", curcpu, (void *) tf,
1008 	    TRAPF_USERMODE(tf));
1009 
1010 	found_interrupt = 0;
1011 	cc = core_pcpu[curcpu];
1012 
1013 	for (ri = 0; ri < core_iap_npmc; ri++) {
1014 
1015 		if ((pm = cc->pc_corepmcs[ri].phw_pmc) == NULL ||
1016 		    !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1017 			continue;
1018 
1019 		if (!iap_pmc_has_overflowed(ri))
1020 			continue;
1021 
1022 		found_interrupt = 1;
1023 
1024 		if (pm->pm_state != PMC_STATE_RUNNING)
1025 			continue;
1026 
1027 		error = pmc_process_interrupt(PMC_HR, pm, tf);
1028 
1029 		v = pm->pm_sc.pm_reloadcount;
1030 		v = iap_reload_count_to_perfctr_value(v);
1031 
1032 		/*
1033 		 * Stop the counter, reload it but only restart it if
1034 		 * the PMC is not stalled.
1035 		 */
1036 		wrmsr(IAP_EVSEL0 + ri, pm->pm_md.pm_iap.pm_iap_evsel);
1037 		wrmsr(core_iap_wroffset + IAP_PMC0 + ri, v);
1038 
1039 		if (__predict_false(error))
1040 			continue;
1041 
1042 		wrmsr(IAP_EVSEL0 + ri, pm->pm_md.pm_iap.pm_iap_evsel | IAP_EN);
1043 	}
1044 
1045 	if (found_interrupt)
1046 		counter_u64_add(pmc_stats.pm_intr_processed, 1);
1047 	else
1048 		counter_u64_add(pmc_stats.pm_intr_ignored, 1);
1049 
1050 	if (found_interrupt)
1051 		lapic_reenable_pmc();
1052 
1053 	return (found_interrupt);
1054 }
1055 
1056 static int
1057 core2_intr(struct trapframe *tf)
1058 {
1059 	int error, found_interrupt = 0, n, cpu;
1060 	uint64_t flag, intrstatus, intrdisable = 0;
1061 	struct pmc *pm;
1062 	struct core_cpu *cc;
1063 	pmc_value_t v;
1064 
1065 	cpu = curcpu;
1066 	PMCDBG3(MDP,INT, 1, "cpu=%d tf=0x%p um=%d", cpu, (void *) tf,
1067 	    TRAPF_USERMODE(tf));
1068 
1069 	/*
1070 	 * The IA_GLOBAL_STATUS (MSR 0x38E) register indicates which
1071 	 * PMCs have a pending PMI interrupt.  We take a 'snapshot' of
1072 	 * the current set of interrupting PMCs and process these
1073 	 * after stopping them.
1074 	 */
1075 	intrstatus = rdmsr(IA_GLOBAL_STATUS);
1076 	PMCDBG2(MDP,INT, 1, "cpu=%d intrstatus=%jx", cpu,
1077 	    (uintmax_t) intrstatus);
1078 
1079 	/*
1080 	 * Stop PMCs unless hardware already done it.
1081 	 */
1082 	if ((intrstatus & IA_GLOBAL_STATUS_FLAG_CTR_FRZ) == 0)
1083 		wrmsr(IA_GLOBAL_CTRL, 0);
1084 
1085 	cc = core_pcpu[cpu];
1086 	KASSERT(cc != NULL, ("[core,%d] null pcpu", __LINE__));
1087 
1088 	/*
1089 	 * Look for interrupts from fixed function PMCs.
1090 	 */
1091 	for (n = 0, flag = (1ULL << IAF_OFFSET); n < core_iaf_npmc;
1092 	     n++, flag <<= 1) {
1093 
1094 		if ((intrstatus & flag) == 0)
1095 			continue;
1096 
1097 		found_interrupt = 1;
1098 
1099 		pm = cc->pc_corepmcs[n + core_iaf_ri].phw_pmc;
1100 		if (pm == NULL || pm->pm_state != PMC_STATE_RUNNING ||
1101 		    !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1102 			continue;
1103 
1104 		error = pmc_process_interrupt(PMC_HR, pm, tf);
1105 		if (__predict_false(error))
1106 			intrdisable |= flag;
1107 
1108 		v = iaf_reload_count_to_perfctr_value(pm->pm_sc.pm_reloadcount);
1109 
1110 		/* Reload sampling count. */
1111 		wrmsr(IAF_CTR0 + n, v);
1112 
1113 		PMCDBG4(MDP,INT, 1, "iaf-intr cpu=%d error=%d v=%jx(%jx)", curcpu,
1114 		    error, (uintmax_t) v, (uintmax_t) rdpmc(IAF_RI_TO_MSR(n)));
1115 	}
1116 
1117 	/*
1118 	 * Process interrupts from the programmable counters.
1119 	 */
1120 	for (n = 0, flag = 1; n < core_iap_npmc; n++, flag <<= 1) {
1121 		if ((intrstatus & flag) == 0)
1122 			continue;
1123 
1124 		found_interrupt = 1;
1125 
1126 		pm = cc->pc_corepmcs[n].phw_pmc;
1127 		if (pm == NULL || pm->pm_state != PMC_STATE_RUNNING ||
1128 		    !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1129 			continue;
1130 
1131 		error = pmc_process_interrupt(PMC_HR, pm, tf);
1132 		if (__predict_false(error))
1133 			intrdisable |= flag;
1134 
1135 		v = iap_reload_count_to_perfctr_value(pm->pm_sc.pm_reloadcount);
1136 
1137 		PMCDBG3(MDP,INT, 1, "iap-intr cpu=%d error=%d v=%jx", cpu, error,
1138 		    (uintmax_t) v);
1139 
1140 		/* Reload sampling count. */
1141 		wrmsr(core_iap_wroffset + IAP_PMC0 + n, v);
1142 	}
1143 
1144 	if (found_interrupt)
1145 		counter_u64_add(pmc_stats.pm_intr_processed, 1);
1146 	else
1147 		counter_u64_add(pmc_stats.pm_intr_ignored, 1);
1148 
1149 	if (found_interrupt)
1150 		lapic_reenable_pmc();
1151 
1152 	/*
1153 	 * Reenable all non-stalled PMCs.
1154 	 */
1155 	if ((intrstatus & IA_GLOBAL_STATUS_FLAG_CTR_FRZ) == 0) {
1156 		wrmsr(IA_GLOBAL_OVF_CTRL, intrstatus);
1157 		cc->pc_globalctrl &= ~intrdisable;
1158 		wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl);
1159 	} else {
1160 		if (__predict_false(intrdisable)) {
1161 			cc->pc_globalctrl &= ~intrdisable;
1162 			wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl);
1163 		}
1164 		wrmsr(IA_GLOBAL_OVF_CTRL, intrstatus);
1165 	}
1166 
1167 	PMCDBG4(MDP, INT, 1, "cpu=%d fixedctrl=%jx globalctrl=%jx status=%jx",
1168 	    cpu, (uintmax_t) rdmsr(IAF_CTRL),
1169 	    (uintmax_t) rdmsr(IA_GLOBAL_CTRL),
1170 	    (uintmax_t) rdmsr(IA_GLOBAL_STATUS));
1171 
1172 	return (found_interrupt);
1173 }
1174 
1175 int
1176 pmc_core_initialize(struct pmc_mdep *md, int maxcpu, int version_override)
1177 {
1178 	int cpuid[CORE_CPUID_REQUEST_SIZE];
1179 	int flags, nflags;
1180 
1181 	do_cpuid(CORE_CPUID_REQUEST, cpuid);
1182 
1183 	core_cputype = md->pmd_cputype;
1184 	core_version = (version_override > 0) ? version_override :
1185 	    cpuid[CORE_CPUID_EAX] & 0xFF;
1186 
1187 	PMCDBG3(MDP,INI,1,"core-init cputype=%d ncpu=%d version=%d",
1188 	    core_cputype, maxcpu, core_version);
1189 
1190 	if (core_version < 1 || core_version > 5 ||
1191 	    (core_cputype != PMC_CPU_INTEL_CORE && core_version == 1)) {
1192 		/* Unknown PMC architecture. */
1193 		printf("hwpmc_core: unknown PMC architecture: %d\n",
1194 		    core_version);
1195 		return (EPROGMISMATCH);
1196 	}
1197 
1198 	core_iap_wroffset = 0;
1199 	if (cpu_feature2 & CPUID2_PDCM) {
1200 		if (rdmsr(IA32_PERF_CAPABILITIES) & PERFCAP_FW_WRITE) {
1201 			PMCDBG0(MDP, INI, 1,
1202 			    "core-init full-width write supported");
1203 			core_iap_wroffset = IAP_A_PMC0 - IAP_PMC0;
1204 		} else
1205 			PMCDBG0(MDP, INI, 1,
1206 			    "core-init full-width write NOT supported");
1207 	} else
1208 		PMCDBG0(MDP, INI, 1, "core-init pdcm not supported");
1209 
1210 	core_pmcmask = 0;
1211 
1212 	/*
1213 	 * Initialize programmable counters.
1214 	 */
1215 	core_iap_npmc = (cpuid[CORE_CPUID_EAX] >> 8) & 0xFF;
1216 	core_iap_width = (cpuid[CORE_CPUID_EAX] >> 16) & 0xFF;
1217 
1218 	core_pmcmask |= ((1ULL << core_iap_npmc) - 1);
1219 
1220 	nflags = (cpuid[CORE_CPUID_EAX] >> 24) & 0xFF;
1221 	flags = cpuid[CORE_CPUID_EBX] & ((1 << nflags) - 1);
1222 
1223 	iap_initialize(md, maxcpu, core_iap_npmc, core_iap_width, flags);
1224 
1225 	/*
1226 	 * Initialize fixed function counters, if present.
1227 	 */
1228 	if (core_version >= 2) {
1229 		core_iaf_ri = core_iap_npmc;
1230 		core_iaf_npmc = cpuid[CORE_CPUID_EDX] & 0x1F;
1231 		core_iaf_width = (cpuid[CORE_CPUID_EDX] >> 5) & 0xFF;
1232 
1233 		iaf_initialize(md, maxcpu, core_iaf_npmc, core_iaf_width);
1234 		core_pmcmask |= ((1ULL << core_iaf_npmc) - 1) << IAF_OFFSET;
1235 	}
1236 
1237 	PMCDBG2(MDP,INI,1,"core-init pmcmask=0x%jx iafri=%d", core_pmcmask,
1238 	    core_iaf_ri);
1239 
1240 	core_pcpu = malloc(sizeof(*core_pcpu) * maxcpu, M_PMC,
1241 	    M_ZERO | M_WAITOK);
1242 
1243 	/*
1244 	 * Choose the appropriate interrupt handler.
1245 	 */
1246 	if (core_version >= 2)
1247 		md->pmd_intr = core2_intr;
1248 	else
1249 		md->pmd_intr = core_intr;
1250 
1251 	return (0);
1252 }
1253 
1254 void
1255 pmc_core_finalize(struct pmc_mdep *md)
1256 {
1257 	PMCDBG0(MDP,INI,1, "core-finalize");
1258 
1259 	free(core_pcpu, M_PMC);
1260 	core_pcpu = NULL;
1261 }
1262