xref: /freebsd/sys/dev/hwpmc/hwpmc_core.c (revision 35eb9b10c265a27ce1f80a6eb74887240c7f4305)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2008 Joseph Koshy
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * Intel Core PMCs.
31  */
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 
36 #include <sys/param.h>
37 #include <sys/bus.h>
38 #include <sys/pmc.h>
39 #include <sys/pmckern.h>
40 #include <sys/smp.h>
41 #include <sys/systm.h>
42 
43 #include <machine/intr_machdep.h>
44 #include <x86/apicvar.h>
45 #include <machine/cpu.h>
46 #include <machine/cpufunc.h>
47 #include <machine/md_var.h>
48 #include <machine/specialreg.h>
49 
50 #define	CORE_CPUID_REQUEST		0xA
51 #define	CORE_CPUID_REQUEST_SIZE		0x4
52 #define	CORE_CPUID_EAX			0x0
53 #define	CORE_CPUID_EBX			0x1
54 #define	CORE_CPUID_ECX			0x2
55 #define	CORE_CPUID_EDX			0x3
56 
57 #define	IAF_PMC_CAPS			\
58 	(PMC_CAP_READ | PMC_CAP_WRITE | PMC_CAP_INTERRUPT | \
59 	 PMC_CAP_USER | PMC_CAP_SYSTEM)
60 #define	IAF_RI_TO_MSR(RI)		((RI) + (1 << 30))
61 
62 #define	IAP_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_USER | PMC_CAP_SYSTEM | \
63     PMC_CAP_EDGE | PMC_CAP_THRESHOLD | PMC_CAP_READ | PMC_CAP_WRITE |	 \
64     PMC_CAP_INVERT | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE)
65 
66 #define	EV_IS_NOTARCH		0
67 #define	EV_IS_ARCH_SUPP		1
68 #define	EV_IS_ARCH_NOTSUPP	-1
69 
70 /*
71  * "Architectural" events defined by Intel.  The values of these
72  * symbols correspond to positions in the bitmask returned by
73  * the CPUID.0AH instruction.
74  */
75 enum core_arch_events {
76 	CORE_AE_BRANCH_INSTRUCTION_RETIRED	= 5,
77 	CORE_AE_BRANCH_MISSES_RETIRED		= 6,
78 	CORE_AE_INSTRUCTION_RETIRED		= 1,
79 	CORE_AE_LLC_MISSES			= 4,
80 	CORE_AE_LLC_REFERENCE			= 3,
81 	CORE_AE_UNHALTED_REFERENCE_CYCLES	= 2,
82 	CORE_AE_UNHALTED_CORE_CYCLES		= 0
83 };
84 
85 static enum pmc_cputype	core_cputype;
86 static int core_version;
87 
88 struct core_cpu {
89 	volatile uint32_t	pc_iafctrl;	/* Fixed function control. */
90 	volatile uint64_t	pc_globalctrl;	/* Global control register. */
91 	struct pmc_hw		pc_corepmcs[];
92 };
93 
94 static struct core_cpu **core_pcpu;
95 
96 static uint32_t core_architectural_events;
97 static uint64_t core_pmcmask;
98 
99 static int core_iaf_ri;		/* relative index of fixed counters */
100 static int core_iaf_width;
101 static int core_iaf_npmc;
102 
103 static int core_iap_width;
104 static int core_iap_npmc;
105 static int core_iap_wroffset;
106 
107 static u_int pmc_alloc_refs;
108 static bool pmc_tsx_force_abort_set;
109 
110 static int
111 core_pcpu_noop(struct pmc_mdep *md, int cpu)
112 {
113 	(void) md;
114 	(void) cpu;
115 	return (0);
116 }
117 
118 static int
119 core_pcpu_init(struct pmc_mdep *md, int cpu)
120 {
121 	struct pmc_cpu *pc;
122 	struct core_cpu *cc;
123 	struct pmc_hw *phw;
124 	int core_ri, n, npmc;
125 
126 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
127 	    ("[iaf,%d] insane cpu number %d", __LINE__, cpu));
128 
129 	PMCDBG1(MDP,INI,1,"core-init cpu=%d", cpu);
130 
131 	core_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_ri;
132 	npmc = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_num;
133 
134 	if (core_version >= 2)
135 		npmc += md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAF].pcd_num;
136 
137 	cc = malloc(sizeof(struct core_cpu) + npmc * sizeof(struct pmc_hw),
138 	    M_PMC, M_WAITOK | M_ZERO);
139 
140 	core_pcpu[cpu] = cc;
141 	pc = pmc_pcpu[cpu];
142 
143 	KASSERT(pc != NULL && cc != NULL,
144 	    ("[core,%d] NULL per-cpu structures cpu=%d", __LINE__, cpu));
145 
146 	for (n = 0, phw = cc->pc_corepmcs; n < npmc; n++, phw++) {
147 		phw->phw_state 	  = PMC_PHW_FLAG_IS_ENABLED |
148 		    PMC_PHW_CPU_TO_STATE(cpu) |
149 		    PMC_PHW_INDEX_TO_STATE(n + core_ri);
150 		phw->phw_pmc	  = NULL;
151 		pc->pc_hwpmcs[n + core_ri]  = phw;
152 	}
153 
154 	if (core_version >= 2) {
155 		/* Enable Freezing PMCs on PMI. */
156 		wrmsr(MSR_DEBUGCTLMSR, rdmsr(MSR_DEBUGCTLMSR) | 0x1000);
157 	}
158 
159 	return (0);
160 }
161 
162 static int
163 core_pcpu_fini(struct pmc_mdep *md, int cpu)
164 {
165 	int core_ri, n, npmc;
166 	struct pmc_cpu *pc;
167 	struct core_cpu *cc;
168 
169 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
170 	    ("[core,%d] insane cpu number (%d)", __LINE__, cpu));
171 
172 	PMCDBG1(MDP,INI,1,"core-pcpu-fini cpu=%d", cpu);
173 
174 	if ((cc = core_pcpu[cpu]) == NULL)
175 		return (0);
176 
177 	core_pcpu[cpu] = NULL;
178 
179 	pc = pmc_pcpu[cpu];
180 
181 	KASSERT(pc != NULL, ("[core,%d] NULL per-cpu %d state", __LINE__,
182 		cpu));
183 
184 	npmc = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_num;
185 	core_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_ri;
186 
187 	for (n = 0; n < npmc; n++)
188 		wrmsr(IAP_EVSEL0 + n, 0);
189 
190 	if (core_version >= 2) {
191 		wrmsr(IAF_CTRL, 0);
192 		npmc += md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAF].pcd_num;
193 	}
194 
195 	for (n = 0; n < npmc; n++)
196 		pc->pc_hwpmcs[n + core_ri] = NULL;
197 
198 	free(cc, M_PMC);
199 
200 	return (0);
201 }
202 
203 /*
204  * Fixed function counters.
205  */
206 
207 static pmc_value_t
208 iaf_perfctr_value_to_reload_count(pmc_value_t v)
209 {
210 
211 	/* If the PMC has overflowed, return a reload count of zero. */
212 	if ((v & (1ULL << (core_iaf_width - 1))) == 0)
213 		return (0);
214 	v &= (1ULL << core_iaf_width) - 1;
215 	return (1ULL << core_iaf_width) - v;
216 }
217 
218 static pmc_value_t
219 iaf_reload_count_to_perfctr_value(pmc_value_t rlc)
220 {
221 	return (1ULL << core_iaf_width) - rlc;
222 }
223 
224 static int
225 iaf_allocate_pmc(int cpu, int ri, struct pmc *pm,
226     const struct pmc_op_pmcallocate *a)
227 {
228 	uint8_t ev, umask;
229 	uint32_t caps, flags, config;
230 	const struct pmc_md_iap_op_pmcallocate *iap;
231 
232 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
233 	    ("[core,%d] illegal CPU %d", __LINE__, cpu));
234 
235 	PMCDBG2(MDP,ALL,1, "iaf-allocate ri=%d reqcaps=0x%x", ri, pm->pm_caps);
236 
237 	if (ri < 0 || ri > core_iaf_npmc)
238 		return (EINVAL);
239 
240 	if (a->pm_class != PMC_CLASS_IAF)
241 		return (EINVAL);
242 
243 	iap = &a->pm_md.pm_iap;
244 	config = iap->pm_iap_config;
245 	ev = IAP_EVSEL_GET(config);
246 	umask = IAP_UMASK_GET(config);
247 
248 	if (ev == 0x0) {
249 		if (umask != ri + 1)
250 			return (EINVAL);
251 	} else {
252 		switch (ri) {
253 		case 0:	/* INST_RETIRED.ANY */
254 			if (ev != 0xC0 || umask != 0x00)
255 				return (EINVAL);
256 			break;
257 		case 1:	/* CPU_CLK_UNHALTED.THREAD */
258 			if (ev != 0x3C || umask != 0x00)
259 				return (EINVAL);
260 			break;
261 		case 2:	/* CPU_CLK_UNHALTED.REF */
262 			if (ev != 0x3C || umask != 0x01)
263 				return (EINVAL);
264 			break;
265 		case 3:	/* TOPDOWN.SLOTS */
266 			if (ev != 0xA4 || umask != 0x01)
267 				return (EINVAL);
268 			break;
269 		default:
270 			return (EINVAL);
271 		}
272 	}
273 
274 	pmc_alloc_refs++;
275 	if ((cpu_stdext_feature3 & CPUID_STDEXT3_TSXFA) != 0 &&
276 	    !pmc_tsx_force_abort_set) {
277 		pmc_tsx_force_abort_set = true;
278 		x86_msr_op(MSR_TSX_FORCE_ABORT, MSR_OP_RENDEZVOUS_ALL |
279 		    MSR_OP_WRITE, 1, NULL);
280 	}
281 
282 	flags = 0;
283 	if (config & IAP_OS)
284 		flags |= IAF_OS;
285 	if (config & IAP_USR)
286 		flags |= IAF_USR;
287 	if (config & IAP_ANY)
288 		flags |= IAF_ANY;
289 	if (config & IAP_INT)
290 		flags |= IAF_PMI;
291 
292 	caps = a->pm_caps;
293 	if (caps & PMC_CAP_INTERRUPT)
294 		flags |= IAF_PMI;
295 	if (caps & PMC_CAP_SYSTEM)
296 		flags |= IAF_OS;
297 	if (caps & PMC_CAP_USER)
298 		flags |= IAF_USR;
299 	if ((caps & (PMC_CAP_USER | PMC_CAP_SYSTEM)) == 0)
300 		flags |= (IAF_OS | IAF_USR);
301 
302 	pm->pm_md.pm_iaf.pm_iaf_ctrl = (flags << (ri * 4));
303 
304 	PMCDBG1(MDP,ALL,2, "iaf-allocate config=0x%jx",
305 	    (uintmax_t) pm->pm_md.pm_iaf.pm_iaf_ctrl);
306 
307 	return (0);
308 }
309 
310 static int
311 iaf_config_pmc(int cpu, int ri, struct pmc *pm)
312 {
313 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
314 	    ("[core,%d] illegal CPU %d", __LINE__, cpu));
315 
316 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
317 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
318 
319 	PMCDBG3(MDP,CFG,1, "iaf-config cpu=%d ri=%d pm=%p", cpu, ri, pm);
320 
321 	KASSERT(core_pcpu[cpu] != NULL, ("[core,%d] null per-cpu %d", __LINE__,
322 	    cpu));
323 
324 	core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc = pm;
325 
326 	return (0);
327 }
328 
329 static int
330 iaf_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc)
331 {
332 	int error;
333 	struct pmc_hw *phw;
334 	char iaf_name[PMC_NAME_MAX];
335 
336 	phw = &core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri];
337 
338 	(void) snprintf(iaf_name, sizeof(iaf_name), "IAF-%d", ri);
339 	if ((error = copystr(iaf_name, pi->pm_name, PMC_NAME_MAX,
340 	    NULL)) != 0)
341 		return (error);
342 
343 	pi->pm_class = PMC_CLASS_IAF;
344 
345 	if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) {
346 		pi->pm_enabled = TRUE;
347 		*ppmc          = phw->phw_pmc;
348 	} else {
349 		pi->pm_enabled = FALSE;
350 		*ppmc          = NULL;
351 	}
352 
353 	return (0);
354 }
355 
356 static int
357 iaf_get_config(int cpu, int ri, struct pmc **ppm)
358 {
359 	*ppm = core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc;
360 
361 	return (0);
362 }
363 
364 static int
365 iaf_get_msr(int ri, uint32_t *msr)
366 {
367 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
368 	    ("[iaf,%d] ri %d out of range", __LINE__, ri));
369 
370 	*msr = IAF_RI_TO_MSR(ri);
371 
372 	return (0);
373 }
374 
375 static int
376 iaf_read_pmc(int cpu, int ri, pmc_value_t *v)
377 {
378 	struct pmc *pm;
379 	pmc_value_t tmp;
380 
381 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
382 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
383 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
384 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
385 
386 	pm = core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc;
387 
388 	KASSERT(pm,
389 	    ("[core,%d] cpu %d ri %d(%d) pmc not configured", __LINE__, cpu,
390 		ri, ri + core_iaf_ri));
391 
392 	tmp = rdpmc(IAF_RI_TO_MSR(ri));
393 
394 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
395 		*v = iaf_perfctr_value_to_reload_count(tmp);
396 	else
397 		*v = tmp & ((1ULL << core_iaf_width) - 1);
398 
399 	PMCDBG4(MDP,REA,1, "iaf-read cpu=%d ri=%d msr=0x%x -> v=%jx", cpu, ri,
400 	    IAF_RI_TO_MSR(ri), *v);
401 
402 	return (0);
403 }
404 
405 static int
406 iaf_release_pmc(int cpu, int ri, struct pmc *pmc)
407 {
408 	PMCDBG3(MDP,REL,1, "iaf-release cpu=%d ri=%d pm=%p", cpu, ri, pmc);
409 
410 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
411 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
412 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
413 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
414 
415 	KASSERT(core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc == NULL,
416 	    ("[core,%d] PHW pmc non-NULL", __LINE__));
417 
418 	MPASS(pmc_alloc_refs > 0);
419 	if (pmc_alloc_refs-- == 1 && pmc_tsx_force_abort_set) {
420 		pmc_tsx_force_abort_set = false;
421 		x86_msr_op(MSR_TSX_FORCE_ABORT, MSR_OP_RENDEZVOUS_ALL |
422 		    MSR_OP_WRITE, 0, NULL);
423 	}
424 
425 	return (0);
426 }
427 
428 static int
429 iaf_start_pmc(int cpu, int ri)
430 {
431 	struct pmc *pm;
432 	struct core_cpu *cc;
433 
434 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
435 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
436 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
437 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
438 
439 	PMCDBG2(MDP,STA,1,"iaf-start cpu=%d ri=%d", cpu, ri);
440 
441 	cc = core_pcpu[cpu];
442 	pm = cc->pc_corepmcs[ri + core_iaf_ri].phw_pmc;
443 
444 	cc->pc_iafctrl |= pm->pm_md.pm_iaf.pm_iaf_ctrl;
445 	wrmsr(IAF_CTRL, cc->pc_iafctrl);
446 
447 	cc->pc_globalctrl |= (1ULL << (ri + IAF_OFFSET));
448 	wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl);
449 
450 	PMCDBG4(MDP,STA,1,"iafctrl=%x(%x) globalctrl=%jx(%jx)",
451 	    cc->pc_iafctrl, (uint32_t) rdmsr(IAF_CTRL),
452 	    cc->pc_globalctrl, rdmsr(IA_GLOBAL_CTRL));
453 
454 	return (0);
455 }
456 
457 static int
458 iaf_stop_pmc(int cpu, int ri)
459 {
460 	struct core_cpu *cc;
461 
462 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
463 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
464 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
465 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
466 
467 	PMCDBG2(MDP,STA,1,"iaf-stop cpu=%d ri=%d", cpu, ri);
468 
469 	cc = core_pcpu[cpu];
470 
471 	cc->pc_iafctrl &= ~(IAF_MASK << (ri * 4));
472 	wrmsr(IAF_CTRL, cc->pc_iafctrl);
473 
474 	cc->pc_globalctrl &= ~(1ULL << (ri + IAF_OFFSET));
475 	wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl);
476 
477 	PMCDBG4(MDP,STO,1,"iafctrl=%x(%x) globalctrl=%jx(%jx)",
478 	    cc->pc_iafctrl, (uint32_t) rdmsr(IAF_CTRL),
479 	    cc->pc_globalctrl, rdmsr(IA_GLOBAL_CTRL));
480 
481 	return (0);
482 }
483 
484 static int
485 iaf_write_pmc(int cpu, int ri, pmc_value_t v)
486 {
487 	struct core_cpu *cc;
488 	struct pmc *pm;
489 
490 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
491 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
492 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
493 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
494 
495 	cc = core_pcpu[cpu];
496 	pm = cc->pc_corepmcs[ri + core_iaf_ri].phw_pmc;
497 
498 	KASSERT(pm,
499 	    ("[core,%d] cpu %d ri %d pmc not configured", __LINE__, cpu, ri));
500 
501 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
502 		v = iaf_reload_count_to_perfctr_value(v);
503 
504 	/* Turn off the fixed counter */
505 	wrmsr(IAF_CTRL, cc->pc_iafctrl & ~(IAF_MASK << (ri * 4)));
506 
507 	wrmsr(IAF_CTR0 + ri, v & ((1ULL << core_iaf_width) - 1));
508 
509 	/* Turn on fixed counters */
510 	wrmsr(IAF_CTRL, cc->pc_iafctrl);
511 
512 	PMCDBG6(MDP,WRI,1, "iaf-write cpu=%d ri=%d msr=0x%x v=%jx iafctrl=%jx "
513 	    "pmc=%jx", cpu, ri, IAF_RI_TO_MSR(ri), v,
514 	    (uintmax_t) rdmsr(IAF_CTRL),
515 	    (uintmax_t) rdpmc(IAF_RI_TO_MSR(ri)));
516 
517 	return (0);
518 }
519 
520 
521 static void
522 iaf_initialize(struct pmc_mdep *md, int maxcpu, int npmc, int pmcwidth)
523 {
524 	struct pmc_classdep *pcd;
525 
526 	KASSERT(md != NULL, ("[iaf,%d] md is NULL", __LINE__));
527 
528 	PMCDBG0(MDP,INI,1, "iaf-initialize");
529 
530 	pcd = &md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAF];
531 
532 	pcd->pcd_caps	= IAF_PMC_CAPS;
533 	pcd->pcd_class	= PMC_CLASS_IAF;
534 	pcd->pcd_num	= npmc;
535 	pcd->pcd_ri	= md->pmd_npmc;
536 	pcd->pcd_width	= pmcwidth;
537 
538 	pcd->pcd_allocate_pmc	= iaf_allocate_pmc;
539 	pcd->pcd_config_pmc	= iaf_config_pmc;
540 	pcd->pcd_describe	= iaf_describe;
541 	pcd->pcd_get_config	= iaf_get_config;
542 	pcd->pcd_get_msr	= iaf_get_msr;
543 	pcd->pcd_pcpu_fini	= core_pcpu_noop;
544 	pcd->pcd_pcpu_init	= core_pcpu_noop;
545 	pcd->pcd_read_pmc	= iaf_read_pmc;
546 	pcd->pcd_release_pmc	= iaf_release_pmc;
547 	pcd->pcd_start_pmc	= iaf_start_pmc;
548 	pcd->pcd_stop_pmc	= iaf_stop_pmc;
549 	pcd->pcd_write_pmc	= iaf_write_pmc;
550 
551 	md->pmd_npmc	       += npmc;
552 }
553 
554 /*
555  * Intel programmable PMCs.
556  */
557 
558 /* Sub fields of UMASK that this event supports. */
559 #define	IAP_M_CORE		(1 << 0) /* Core specificity */
560 #define	IAP_M_AGENT		(1 << 1) /* Agent specificity */
561 #define	IAP_M_PREFETCH		(1 << 2) /* Prefetch */
562 #define	IAP_M_MESI		(1 << 3) /* MESI */
563 #define	IAP_M_SNOOPRESPONSE	(1 << 4) /* Snoop response */
564 #define	IAP_M_SNOOPTYPE		(1 << 5) /* Snoop type */
565 #define	IAP_M_TRANSITION	(1 << 6) /* Transition */
566 
567 #define	IAP_F_CORE		(0x3 << 14) /* Core specificity */
568 #define	IAP_F_AGENT		(0x1 << 13) /* Agent specificity */
569 #define	IAP_F_PREFETCH		(0x3 << 12) /* Prefetch */
570 #define	IAP_F_MESI		(0xF <<  8) /* MESI */
571 #define	IAP_F_SNOOPRESPONSE	(0xB <<  8) /* Snoop response */
572 #define	IAP_F_SNOOPTYPE		(0x3 <<  8) /* Snoop type */
573 #define	IAP_F_TRANSITION	(0x1 << 12) /* Transition */
574 
575 #define	IAP_PREFETCH_RESERVED	(0x2 << 12)
576 #define	IAP_CORE_THIS		(0x1 << 14)
577 #define	IAP_CORE_ALL		(0x3 << 14)
578 #define	IAP_F_CMASK		0xFF000000
579 
580 static pmc_value_t
581 iap_perfctr_value_to_reload_count(pmc_value_t v)
582 {
583 
584 	/* If the PMC has overflowed, return a reload count of zero. */
585 	if ((v & (1ULL << (core_iap_width - 1))) == 0)
586 		return (0);
587 	v &= (1ULL << core_iap_width) - 1;
588 	return (1ULL << core_iap_width) - v;
589 }
590 
591 static pmc_value_t
592 iap_reload_count_to_perfctr_value(pmc_value_t rlc)
593 {
594 	return (1ULL << core_iap_width) - rlc;
595 }
596 
597 static int
598 iap_pmc_has_overflowed(int ri)
599 {
600 	uint64_t v;
601 
602 	/*
603 	 * We treat a Core (i.e., Intel architecture v1) PMC as has
604 	 * having overflowed if its MSB is zero.
605 	 */
606 	v = rdpmc(ri);
607 	return ((v & (1ULL << (core_iap_width - 1))) == 0);
608 }
609 
610 static int
611 iap_event_corei7_ok_on_counter(uint8_t evsel, int ri)
612 {
613 	uint32_t mask;
614 
615 	switch (evsel) {
616 	/* Events valid only on counter 0, 1. */
617 	case 0x40:
618 	case 0x41:
619 	case 0x42:
620 	case 0x43:
621 	case 0x4C:
622 	case 0x4E:
623 	case 0x51:
624 	case 0x52:
625 	case 0x53:
626 	case 0x63:
627 		mask = 0x3;
628 		break;
629 	/* Any row index is ok. */
630 	default:
631 		mask = ~0;
632 	}
633 
634 	return (mask & (1 << ri));
635 }
636 
637 static int
638 iap_event_westmere_ok_on_counter(uint8_t evsel, int ri)
639 {
640 	uint32_t mask;
641 
642 	switch (evsel) {
643 	/* Events valid only on counter 0. */
644 	case 0x60:
645 	case 0xB3:
646 		mask = 0x1;
647 		break;
648 
649 	/* Events valid only on counter 0, 1. */
650 	case 0x4C:
651 	case 0x4E:
652 	case 0x51:
653 	case 0x52:
654 	case 0x63:
655 		mask = 0x3;
656 		break;
657 	/* Any row index is ok. */
658 	default:
659 		mask = ~0;
660 	}
661 
662 	return (mask & (1 << ri));
663 }
664 
665 static int
666 iap_event_sb_sbx_ib_ibx_ok_on_counter(uint8_t evsel, int ri)
667 {
668 	uint32_t mask;
669 
670 	switch (evsel) {
671 	/* Events valid only on counter 0. */
672 	case 0xB7:
673 		mask = 0x1;
674 		break;
675 	/* Events valid only on counter 1. */
676 	case 0xC0:
677 		mask = 0x2;
678 		break;
679 	/* Events valid only on counter 2. */
680 	case 0x48:
681 	case 0xA2:
682 	case 0xA3:
683 		mask = 0x4;
684 		break;
685 	/* Events valid only on counter 3. */
686 	case 0xBB:
687 	case 0xCD:
688 		mask = 0x8;
689 		break;
690 	/* Any row index is ok. */
691 	default:
692 		mask = ~0;
693 	}
694 
695 	return (mask & (1 << ri));
696 }
697 
698 static int
699 iap_event_core_ok_on_counter(uint8_t evsel, int ri)
700 {
701 	uint32_t mask;
702 
703 	switch (evsel) {
704 		/*
705 		 * Events valid only on counter 0.
706 		 */
707 	case 0x10:
708 	case 0x14:
709 	case 0x18:
710 	case 0xB3:
711 	case 0xC1:
712 	case 0xCB:
713 		mask = (1 << 0);
714 		break;
715 
716 		/*
717 		 * Events valid only on counter 1.
718 		 */
719 	case 0x11:
720 	case 0x12:
721 	case 0x13:
722 		mask = (1 << 1);
723 		break;
724 
725 	default:
726 		mask = ~0;	/* Any row index is ok. */
727 	}
728 
729 	return (mask & (1 << ri));
730 }
731 
732 static int
733 iap_allocate_pmc(int cpu, int ri, struct pmc *pm,
734     const struct pmc_op_pmcallocate *a)
735 {
736 	uint8_t ev;
737 	const struct pmc_md_iap_op_pmcallocate *iap;
738 
739 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
740 	    ("[core,%d] illegal CPU %d", __LINE__, cpu));
741 	KASSERT(ri >= 0 && ri < core_iap_npmc,
742 	    ("[core,%d] illegal row-index value %d", __LINE__, ri));
743 
744 	if (a->pm_class != PMC_CLASS_IAP)
745 		return (EINVAL);
746 
747 	iap = &a->pm_md.pm_iap;
748 	ev = IAP_EVSEL_GET(iap->pm_iap_config);
749 
750 	switch (core_cputype) {
751 	case PMC_CPU_INTEL_CORE:
752 	case PMC_CPU_INTEL_CORE2:
753 	case PMC_CPU_INTEL_CORE2EXTREME:
754 		if (iap_event_core_ok_on_counter(ev, ri) == 0)
755 			return (EINVAL);
756 	case PMC_CPU_INTEL_COREI7:
757 	case PMC_CPU_INTEL_NEHALEM_EX:
758 		if (iap_event_corei7_ok_on_counter(ev, ri) == 0)
759 			return (EINVAL);
760 		break;
761 	case PMC_CPU_INTEL_WESTMERE:
762 	case PMC_CPU_INTEL_WESTMERE_EX:
763 		if (iap_event_westmere_ok_on_counter(ev, ri) == 0)
764 			return (EINVAL);
765 		break;
766 	case PMC_CPU_INTEL_SANDYBRIDGE:
767 	case PMC_CPU_INTEL_SANDYBRIDGE_XEON:
768 	case PMC_CPU_INTEL_IVYBRIDGE:
769 	case PMC_CPU_INTEL_IVYBRIDGE_XEON:
770 	case PMC_CPU_INTEL_HASWELL:
771 	case PMC_CPU_INTEL_HASWELL_XEON:
772 	case PMC_CPU_INTEL_BROADWELL:
773 	case PMC_CPU_INTEL_BROADWELL_XEON:
774 		if (iap_event_sb_sbx_ib_ibx_ok_on_counter(ev, ri) == 0)
775 			return (EINVAL);
776 		break;
777 	case PMC_CPU_INTEL_ATOM:
778 	case PMC_CPU_INTEL_ATOM_SILVERMONT:
779 	case PMC_CPU_INTEL_ATOM_GOLDMONT:
780 	case PMC_CPU_INTEL_SKYLAKE:
781 	case PMC_CPU_INTEL_SKYLAKE_XEON:
782 	case PMC_CPU_INTEL_ICELAKE:
783 	case PMC_CPU_INTEL_ICELAKE_XEON:
784 	case PMC_CPU_INTEL_ALDERLAKE:
785 	default:
786 		break;
787 	}
788 
789 	pm->pm_md.pm_iap.pm_iap_evsel = iap->pm_iap_config;
790 	return (0);
791 }
792 
793 static int
794 iap_config_pmc(int cpu, int ri, struct pmc *pm)
795 {
796 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
797 	    ("[core,%d] illegal CPU %d", __LINE__, cpu));
798 
799 	KASSERT(ri >= 0 && ri < core_iap_npmc,
800 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
801 
802 	PMCDBG3(MDP,CFG,1, "iap-config cpu=%d ri=%d pm=%p", cpu, ri, pm);
803 
804 	KASSERT(core_pcpu[cpu] != NULL, ("[core,%d] null per-cpu %d", __LINE__,
805 	    cpu));
806 
807 	core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc = pm;
808 
809 	return (0);
810 }
811 
812 static int
813 iap_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc)
814 {
815 	int error;
816 	struct pmc_hw *phw;
817 	char iap_name[PMC_NAME_MAX];
818 
819 	phw = &core_pcpu[cpu]->pc_corepmcs[ri];
820 
821 	(void) snprintf(iap_name, sizeof(iap_name), "IAP-%d", ri);
822 	if ((error = copystr(iap_name, pi->pm_name, PMC_NAME_MAX,
823 	    NULL)) != 0)
824 		return (error);
825 
826 	pi->pm_class = PMC_CLASS_IAP;
827 
828 	if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) {
829 		pi->pm_enabled = TRUE;
830 		*ppmc          = phw->phw_pmc;
831 	} else {
832 		pi->pm_enabled = FALSE;
833 		*ppmc          = NULL;
834 	}
835 
836 	return (0);
837 }
838 
839 static int
840 iap_get_config(int cpu, int ri, struct pmc **ppm)
841 {
842 	*ppm = core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc;
843 
844 	return (0);
845 }
846 
847 static int
848 iap_get_msr(int ri, uint32_t *msr)
849 {
850 	KASSERT(ri >= 0 && ri < core_iap_npmc,
851 	    ("[iap,%d] ri %d out of range", __LINE__, ri));
852 
853 	*msr = ri;
854 
855 	return (0);
856 }
857 
858 static int
859 iap_read_pmc(int cpu, int ri, pmc_value_t *v)
860 {
861 	struct pmc *pm;
862 	pmc_value_t tmp;
863 
864 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
865 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
866 	KASSERT(ri >= 0 && ri < core_iap_npmc,
867 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
868 
869 	pm = core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc;
870 
871 	KASSERT(pm,
872 	    ("[core,%d] cpu %d ri %d pmc not configured", __LINE__, cpu,
873 		ri));
874 
875 	tmp = rdpmc(ri);
876 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
877 		*v = iap_perfctr_value_to_reload_count(tmp);
878 	else
879 		*v = tmp & ((1ULL << core_iap_width) - 1);
880 
881 	PMCDBG4(MDP,REA,1, "iap-read cpu=%d ri=%d msr=0x%x -> v=%jx", cpu, ri,
882 	    IAP_PMC0 + ri, *v);
883 
884 	return (0);
885 }
886 
887 static int
888 iap_release_pmc(int cpu, int ri, struct pmc *pm)
889 {
890 	(void) pm;
891 
892 	PMCDBG3(MDP,REL,1, "iap-release cpu=%d ri=%d pm=%p", cpu, ri,
893 	    pm);
894 
895 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
896 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
897 	KASSERT(ri >= 0 && ri < core_iap_npmc,
898 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
899 
900 	KASSERT(core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc
901 	    == NULL, ("[core,%d] PHW pmc non-NULL", __LINE__));
902 
903 	return (0);
904 }
905 
906 static int
907 iap_start_pmc(int cpu, int ri)
908 {
909 	struct pmc *pm;
910 	uint32_t evsel;
911 	struct core_cpu *cc;
912 
913 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
914 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
915 	KASSERT(ri >= 0 && ri < core_iap_npmc,
916 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
917 
918 	cc = core_pcpu[cpu];
919 	pm = cc->pc_corepmcs[ri].phw_pmc;
920 
921 	KASSERT(pm,
922 	    ("[core,%d] starting cpu%d,ri%d with no pmc configured",
923 		__LINE__, cpu, ri));
924 
925 	PMCDBG2(MDP,STA,1, "iap-start cpu=%d ri=%d", cpu, ri);
926 
927 	evsel = pm->pm_md.pm_iap.pm_iap_evsel;
928 
929 	PMCDBG4(MDP,STA,2, "iap-start/2 cpu=%d ri=%d evselmsr=0x%x evsel=0x%x",
930 	    cpu, ri, IAP_EVSEL0 + ri, evsel);
931 
932 	/* Event specific configuration. */
933 
934 	switch (IAP_EVSEL_GET(evsel)) {
935 	case 0xB7:
936 		wrmsr(IA_OFFCORE_RSP0, pm->pm_md.pm_iap.pm_iap_rsp);
937 		break;
938 	case 0xBB:
939 		wrmsr(IA_OFFCORE_RSP1, pm->pm_md.pm_iap.pm_iap_rsp);
940 		break;
941 	default:
942 		break;
943 	}
944 
945 	wrmsr(IAP_EVSEL0 + ri, evsel | IAP_EN);
946 
947 	if (core_version >= 2) {
948 		cc->pc_globalctrl |= (1ULL << ri);
949 		wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl);
950 	}
951 
952 	return (0);
953 }
954 
955 static int
956 iap_stop_pmc(int cpu, int ri)
957 {
958 	struct pmc *pm __diagused;
959 	struct core_cpu *cc;
960 
961 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
962 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
963 	KASSERT(ri >= 0 && ri < core_iap_npmc,
964 	    ("[core,%d] illegal row index %d", __LINE__, ri));
965 
966 	cc = core_pcpu[cpu];
967 	pm = cc->pc_corepmcs[ri].phw_pmc;
968 
969 	KASSERT(pm,
970 	    ("[core,%d] cpu%d ri%d no configured PMC to stop", __LINE__,
971 		cpu, ri));
972 
973 	PMCDBG2(MDP,STO,1, "iap-stop cpu=%d ri=%d", cpu, ri);
974 
975 	wrmsr(IAP_EVSEL0 + ri, 0);
976 
977 	if (core_version >= 2) {
978 		cc->pc_globalctrl &= ~(1ULL << ri);
979 		wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl);
980 	}
981 
982 	return (0);
983 }
984 
985 static int
986 iap_write_pmc(int cpu, int ri, pmc_value_t v)
987 {
988 	struct pmc *pm;
989 	struct core_cpu *cc;
990 
991 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
992 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
993 	KASSERT(ri >= 0 && ri < core_iap_npmc,
994 	    ("[core,%d] illegal row index %d", __LINE__, ri));
995 
996 	cc = core_pcpu[cpu];
997 	pm = cc->pc_corepmcs[ri].phw_pmc;
998 
999 	KASSERT(pm,
1000 	    ("[core,%d] cpu%d ri%d no configured PMC to stop", __LINE__,
1001 		cpu, ri));
1002 
1003 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1004 		v = iap_reload_count_to_perfctr_value(v);
1005 
1006 	v &= (1ULL << core_iap_width) - 1;
1007 
1008 	PMCDBG4(MDP,WRI,1, "iap-write cpu=%d ri=%d msr=0x%x v=%jx", cpu, ri,
1009 	    IAP_PMC0 + ri, v);
1010 
1011 	/*
1012 	 * Write the new value to the counter (or it's alias).  The
1013 	 * counter will be in a stopped state when the pcd_write()
1014 	 * entry point is called.
1015 	 */
1016 	wrmsr(core_iap_wroffset + IAP_PMC0 + ri, v);
1017 	return (0);
1018 }
1019 
1020 
1021 static void
1022 iap_initialize(struct pmc_mdep *md, int maxcpu, int npmc, int pmcwidth,
1023     int flags)
1024 {
1025 	struct pmc_classdep *pcd;
1026 
1027 	KASSERT(md != NULL, ("[iap,%d] md is NULL", __LINE__));
1028 
1029 	PMCDBG0(MDP,INI,1, "iap-initialize");
1030 
1031 	/* Remember the set of architectural events supported. */
1032 	core_architectural_events = ~flags;
1033 
1034 	pcd = &md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP];
1035 
1036 	pcd->pcd_caps	= IAP_PMC_CAPS;
1037 	pcd->pcd_class	= PMC_CLASS_IAP;
1038 	pcd->pcd_num	= npmc;
1039 	pcd->pcd_ri	= md->pmd_npmc;
1040 	pcd->pcd_width	= pmcwidth;
1041 
1042 	pcd->pcd_allocate_pmc	= iap_allocate_pmc;
1043 	pcd->pcd_config_pmc	= iap_config_pmc;
1044 	pcd->pcd_describe	= iap_describe;
1045 	pcd->pcd_get_config	= iap_get_config;
1046 	pcd->pcd_get_msr	= iap_get_msr;
1047 	pcd->pcd_pcpu_fini	= core_pcpu_fini;
1048 	pcd->pcd_pcpu_init	= core_pcpu_init;
1049 	pcd->pcd_read_pmc	= iap_read_pmc;
1050 	pcd->pcd_release_pmc	= iap_release_pmc;
1051 	pcd->pcd_start_pmc	= iap_start_pmc;
1052 	pcd->pcd_stop_pmc	= iap_stop_pmc;
1053 	pcd->pcd_write_pmc	= iap_write_pmc;
1054 
1055 	md->pmd_npmc	       += npmc;
1056 }
1057 
1058 static int
1059 core_intr(struct trapframe *tf)
1060 {
1061 	pmc_value_t v;
1062 	struct pmc *pm;
1063 	struct core_cpu *cc;
1064 	int error, found_interrupt, ri;
1065 
1066 	PMCDBG3(MDP,INT, 1, "cpu=%d tf=%p um=%d", curcpu, (void *) tf,
1067 	    TRAPF_USERMODE(tf));
1068 
1069 	found_interrupt = 0;
1070 	cc = core_pcpu[curcpu];
1071 
1072 	for (ri = 0; ri < core_iap_npmc; ri++) {
1073 
1074 		if ((pm = cc->pc_corepmcs[ri].phw_pmc) == NULL ||
1075 		    !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1076 			continue;
1077 
1078 		if (!iap_pmc_has_overflowed(ri))
1079 			continue;
1080 
1081 		found_interrupt = 1;
1082 
1083 		if (pm->pm_state != PMC_STATE_RUNNING)
1084 			continue;
1085 
1086 		error = pmc_process_interrupt(PMC_HR, pm, tf);
1087 
1088 		v = pm->pm_sc.pm_reloadcount;
1089 		v = iap_reload_count_to_perfctr_value(v);
1090 
1091 		/*
1092 		 * Stop the counter, reload it but only restart it if
1093 		 * the PMC is not stalled.
1094 		 */
1095 		wrmsr(IAP_EVSEL0 + ri, pm->pm_md.pm_iap.pm_iap_evsel);
1096 		wrmsr(core_iap_wroffset + IAP_PMC0 + ri, v);
1097 
1098 		if (__predict_false(error))
1099 			continue;
1100 
1101 		wrmsr(IAP_EVSEL0 + ri, pm->pm_md.pm_iap.pm_iap_evsel | IAP_EN);
1102 	}
1103 
1104 	if (found_interrupt)
1105 		counter_u64_add(pmc_stats.pm_intr_processed, 1);
1106 	else
1107 		counter_u64_add(pmc_stats.pm_intr_ignored, 1);
1108 
1109 	if (found_interrupt)
1110 		lapic_reenable_pmc();
1111 
1112 	return (found_interrupt);
1113 }
1114 
1115 static int
1116 core2_intr(struct trapframe *tf)
1117 {
1118 	int error, found_interrupt = 0, n, cpu;
1119 	uint64_t flag, intrstatus, intrdisable = 0;
1120 	struct pmc *pm;
1121 	struct core_cpu *cc;
1122 	pmc_value_t v;
1123 
1124 	cpu = curcpu;
1125 	PMCDBG3(MDP,INT, 1, "cpu=%d tf=0x%p um=%d", cpu, (void *) tf,
1126 	    TRAPF_USERMODE(tf));
1127 
1128 	/*
1129 	 * The IA_GLOBAL_STATUS (MSR 0x38E) register indicates which
1130 	 * PMCs have a pending PMI interrupt.  We take a 'snapshot' of
1131 	 * the current set of interrupting PMCs and process these
1132 	 * after stopping them.
1133 	 */
1134 	intrstatus = rdmsr(IA_GLOBAL_STATUS);
1135 	PMCDBG2(MDP,INT, 1, "cpu=%d intrstatus=%jx", cpu,
1136 	    (uintmax_t) intrstatus);
1137 
1138 	/*
1139 	 * Stop PMCs unless hardware already done it.
1140 	 */
1141 	if ((intrstatus & IA_GLOBAL_STATUS_FLAG_CTR_FRZ) == 0)
1142 		wrmsr(IA_GLOBAL_CTRL, 0);
1143 
1144 	cc = core_pcpu[cpu];
1145 	KASSERT(cc != NULL, ("[core,%d] null pcpu", __LINE__));
1146 
1147 	/*
1148 	 * Look for interrupts from fixed function PMCs.
1149 	 */
1150 	for (n = 0, flag = (1ULL << IAF_OFFSET); n < core_iaf_npmc;
1151 	     n++, flag <<= 1) {
1152 
1153 		if ((intrstatus & flag) == 0)
1154 			continue;
1155 
1156 		found_interrupt = 1;
1157 
1158 		pm = cc->pc_corepmcs[n + core_iaf_ri].phw_pmc;
1159 		if (pm == NULL || pm->pm_state != PMC_STATE_RUNNING ||
1160 		    !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1161 			continue;
1162 
1163 		error = pmc_process_interrupt(PMC_HR, pm, tf);
1164 		if (__predict_false(error))
1165 			intrdisable |= flag;
1166 
1167 		v = iaf_reload_count_to_perfctr_value(pm->pm_sc.pm_reloadcount);
1168 
1169 		/* Reload sampling count. */
1170 		wrmsr(IAF_CTR0 + n, v);
1171 
1172 		PMCDBG4(MDP,INT, 1, "iaf-intr cpu=%d error=%d v=%jx(%jx)", curcpu,
1173 		    error, (uintmax_t) v, (uintmax_t) rdpmc(IAF_RI_TO_MSR(n)));
1174 	}
1175 
1176 	/*
1177 	 * Process interrupts from the programmable counters.
1178 	 */
1179 	for (n = 0, flag = 1; n < core_iap_npmc; n++, flag <<= 1) {
1180 		if ((intrstatus & flag) == 0)
1181 			continue;
1182 
1183 		found_interrupt = 1;
1184 
1185 		pm = cc->pc_corepmcs[n].phw_pmc;
1186 		if (pm == NULL || pm->pm_state != PMC_STATE_RUNNING ||
1187 		    !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1188 			continue;
1189 
1190 		error = pmc_process_interrupt(PMC_HR, pm, tf);
1191 		if (__predict_false(error))
1192 			intrdisable |= flag;
1193 
1194 		v = iap_reload_count_to_perfctr_value(pm->pm_sc.pm_reloadcount);
1195 
1196 		PMCDBG3(MDP,INT, 1, "iap-intr cpu=%d error=%d v=%jx", cpu, error,
1197 		    (uintmax_t) v);
1198 
1199 		/* Reload sampling count. */
1200 		wrmsr(core_iap_wroffset + IAP_PMC0 + n, v);
1201 	}
1202 
1203 	if (found_interrupt)
1204 		counter_u64_add(pmc_stats.pm_intr_processed, 1);
1205 	else
1206 		counter_u64_add(pmc_stats.pm_intr_ignored, 1);
1207 
1208 	/*
1209 	 * Reenable all non-stalled PMCs.
1210 	 */
1211 	if ((intrstatus & IA_GLOBAL_STATUS_FLAG_CTR_FRZ) == 0) {
1212 		wrmsr(IA_GLOBAL_OVF_CTRL, intrstatus);
1213 		cc->pc_globalctrl &= ~intrdisable;
1214 		wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl);
1215 	} else {
1216 		if (__predict_false(intrdisable)) {
1217 			cc->pc_globalctrl &= ~intrdisable;
1218 			wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl);
1219 		}
1220 		wrmsr(IA_GLOBAL_OVF_CTRL, intrstatus);
1221 	}
1222 
1223 	PMCDBG4(MDP, INT, 1, "cpu=%d fixedctrl=%jx globalctrl=%jx status=%jx",
1224 	    cpu, (uintmax_t) rdmsr(IAF_CTRL),
1225 	    (uintmax_t) rdmsr(IA_GLOBAL_CTRL),
1226 	    (uintmax_t) rdmsr(IA_GLOBAL_STATUS));
1227 
1228 	if (found_interrupt)
1229 		lapic_reenable_pmc();
1230 
1231 	return (found_interrupt);
1232 }
1233 
1234 int
1235 pmc_core_initialize(struct pmc_mdep *md, int maxcpu, int version_override)
1236 {
1237 	int cpuid[CORE_CPUID_REQUEST_SIZE];
1238 	int flags, nflags;
1239 
1240 	do_cpuid(CORE_CPUID_REQUEST, cpuid);
1241 
1242 	core_cputype = md->pmd_cputype;
1243 	core_version = (version_override > 0) ? version_override :
1244 	    cpuid[CORE_CPUID_EAX] & 0xFF;
1245 
1246 	PMCDBG3(MDP,INI,1,"core-init cputype=%d ncpu=%d version=%d",
1247 	    core_cputype, maxcpu, core_version);
1248 
1249 	if (core_version < 1 || core_version > 5 ||
1250 	    (core_cputype != PMC_CPU_INTEL_CORE && core_version == 1)) {
1251 		/* Unknown PMC architecture. */
1252 		printf("hwpc_core: unknown PMC architecture: %d\n",
1253 		    core_version);
1254 		return (EPROGMISMATCH);
1255 	}
1256 
1257 	core_iap_wroffset = 0;
1258 	if (cpu_feature2 & CPUID2_PDCM) {
1259 		if (rdmsr(IA32_PERF_CAPABILITIES) & PERFCAP_FW_WRITE) {
1260 			PMCDBG0(MDP, INI, 1,
1261 			    "core-init full-width write supported");
1262 			core_iap_wroffset = IAP_A_PMC0 - IAP_PMC0;
1263 		} else
1264 			PMCDBG0(MDP, INI, 1,
1265 			    "core-init full-width write NOT supported");
1266 	} else
1267 		PMCDBG0(MDP, INI, 1, "core-init pdcm not supported");
1268 
1269 	core_pmcmask = 0;
1270 
1271 	/*
1272 	 * Initialize programmable counters.
1273 	 */
1274 	core_iap_npmc = (cpuid[CORE_CPUID_EAX] >> 8) & 0xFF;
1275 	core_iap_width = (cpuid[CORE_CPUID_EAX] >> 16) & 0xFF;
1276 
1277 	core_pmcmask |= ((1ULL << core_iap_npmc) - 1);
1278 
1279 	nflags = (cpuid[CORE_CPUID_EAX] >> 24) & 0xFF;
1280 	flags = cpuid[CORE_CPUID_EBX] & ((1 << nflags) - 1);
1281 
1282 	iap_initialize(md, maxcpu, core_iap_npmc, core_iap_width, flags);
1283 
1284 	/*
1285 	 * Initialize fixed function counters, if present.
1286 	 */
1287 	if (core_version >= 2) {
1288 		core_iaf_ri = core_iap_npmc;
1289 		core_iaf_npmc = cpuid[CORE_CPUID_EDX] & 0x1F;
1290 		core_iaf_width = (cpuid[CORE_CPUID_EDX] >> 5) & 0xFF;
1291 
1292 		iaf_initialize(md, maxcpu, core_iaf_npmc, core_iaf_width);
1293 		core_pmcmask |= ((1ULL << core_iaf_npmc) - 1) << IAF_OFFSET;
1294 	}
1295 
1296 	PMCDBG2(MDP,INI,1,"core-init pmcmask=0x%jx iafri=%d", core_pmcmask,
1297 	    core_iaf_ri);
1298 
1299 	core_pcpu = malloc(sizeof(*core_pcpu) * maxcpu, M_PMC,
1300 	    M_ZERO | M_WAITOK);
1301 
1302 	/*
1303 	 * Choose the appropriate interrupt handler.
1304 	 */
1305 	if (core_version >= 2)
1306 		md->pmd_intr = core2_intr;
1307 	else
1308 		md->pmd_intr = core_intr;
1309 
1310 	md->pmd_pcpu_fini = NULL;
1311 	md->pmd_pcpu_init = NULL;
1312 
1313 	return (0);
1314 }
1315 
1316 void
1317 pmc_core_finalize(struct pmc_mdep *md)
1318 {
1319 	PMCDBG0(MDP,INI,1, "core-finalize");
1320 
1321 	free(core_pcpu, M_PMC);
1322 	core_pcpu = NULL;
1323 }
1324