xref: /freebsd/sys/dev/hwpmc/hwpmc_core.c (revision 4f8f43b06ed07e96a250855488cc531799d5b78f)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2008 Joseph Koshy
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * Intel Core PMCs.
31  */
32 
33 #include <sys/cdefs.h>
34 #include <sys/param.h>
35 #include <sys/bus.h>
36 #include <sys/pmc.h>
37 #include <sys/pmckern.h>
38 #include <sys/smp.h>
39 #include <sys/systm.h>
40 
41 #include <machine/intr_machdep.h>
42 #include <x86/apicvar.h>
43 #include <machine/cpu.h>
44 #include <machine/cpufunc.h>
45 #include <machine/md_var.h>
46 #include <machine/specialreg.h>
47 
48 #define	CORE_CPUID_REQUEST		0xA
49 #define	CORE_CPUID_REQUEST_SIZE		0x4
50 #define	CORE_CPUID_EAX			0x0
51 #define	CORE_CPUID_EBX			0x1
52 #define	CORE_CPUID_ECX			0x2
53 #define	CORE_CPUID_EDX			0x3
54 
55 #define	IAF_PMC_CAPS			\
56 	(PMC_CAP_READ | PMC_CAP_WRITE | PMC_CAP_INTERRUPT | \
57 	 PMC_CAP_USER | PMC_CAP_SYSTEM)
58 #define	IAF_RI_TO_MSR(RI)		((RI) + (1 << 30))
59 
60 #define	IAP_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_USER | PMC_CAP_SYSTEM | \
61     PMC_CAP_EDGE | PMC_CAP_THRESHOLD | PMC_CAP_READ | PMC_CAP_WRITE |	 \
62     PMC_CAP_INVERT | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE)
63 
64 #define	EV_IS_NOTARCH		0
65 #define	EV_IS_ARCH_SUPP		1
66 #define	EV_IS_ARCH_NOTSUPP	-1
67 
68 /*
69  * "Architectural" events defined by Intel.  The values of these
70  * symbols correspond to positions in the bitmask returned by
71  * the CPUID.0AH instruction.
72  */
73 enum core_arch_events {
74 	CORE_AE_BRANCH_INSTRUCTION_RETIRED	= 5,
75 	CORE_AE_BRANCH_MISSES_RETIRED		= 6,
76 	CORE_AE_INSTRUCTION_RETIRED		= 1,
77 	CORE_AE_LLC_MISSES			= 4,
78 	CORE_AE_LLC_REFERENCE			= 3,
79 	CORE_AE_UNHALTED_REFERENCE_CYCLES	= 2,
80 	CORE_AE_UNHALTED_CORE_CYCLES		= 0
81 };
82 
83 static enum pmc_cputype	core_cputype;
84 static int core_version;
85 
86 struct core_cpu {
87 	volatile uint32_t	pc_iafctrl;	/* Fixed function control. */
88 	volatile uint64_t	pc_globalctrl;	/* Global control register. */
89 	struct pmc_hw		pc_corepmcs[];
90 };
91 
92 static struct core_cpu **core_pcpu;
93 
94 static uint32_t core_architectural_events;
95 static uint64_t core_pmcmask;
96 
97 static int core_iaf_ri;		/* relative index of fixed counters */
98 static int core_iaf_width;
99 static int core_iaf_npmc;
100 
101 static int core_iap_width;
102 static int core_iap_npmc;
103 static int core_iap_wroffset;
104 
105 static u_int pmc_alloc_refs;
106 static bool pmc_tsx_force_abort_set;
107 
108 static int
109 core_pcpu_noop(struct pmc_mdep *md, int cpu)
110 {
111 	(void) md;
112 	(void) cpu;
113 	return (0);
114 }
115 
116 static int
117 core_pcpu_init(struct pmc_mdep *md, int cpu)
118 {
119 	struct pmc_cpu *pc;
120 	struct core_cpu *cc;
121 	struct pmc_hw *phw;
122 	int core_ri, n, npmc;
123 
124 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
125 	    ("[iaf,%d] insane cpu number %d", __LINE__, cpu));
126 
127 	PMCDBG1(MDP,INI,1,"core-init cpu=%d", cpu);
128 
129 	core_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_ri;
130 	npmc = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_num;
131 
132 	if (core_version >= 2)
133 		npmc += md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAF].pcd_num;
134 
135 	cc = malloc(sizeof(struct core_cpu) + npmc * sizeof(struct pmc_hw),
136 	    M_PMC, M_WAITOK | M_ZERO);
137 
138 	core_pcpu[cpu] = cc;
139 	pc = pmc_pcpu[cpu];
140 
141 	KASSERT(pc != NULL && cc != NULL,
142 	    ("[core,%d] NULL per-cpu structures cpu=%d", __LINE__, cpu));
143 
144 	for (n = 0, phw = cc->pc_corepmcs; n < npmc; n++, phw++) {
145 		phw->phw_state 	  = PMC_PHW_FLAG_IS_ENABLED |
146 		    PMC_PHW_CPU_TO_STATE(cpu) |
147 		    PMC_PHW_INDEX_TO_STATE(n + core_ri);
148 		phw->phw_pmc	  = NULL;
149 		pc->pc_hwpmcs[n + core_ri]  = phw;
150 	}
151 
152 	if (core_version >= 2 && vm_guest == VM_GUEST_NO) {
153 		/* Enable Freezing PMCs on PMI. */
154 		wrmsr(MSR_DEBUGCTLMSR, rdmsr(MSR_DEBUGCTLMSR) | 0x1000);
155 	}
156 
157 	return (0);
158 }
159 
160 static int
161 core_pcpu_fini(struct pmc_mdep *md, int cpu)
162 {
163 	int core_ri, n, npmc;
164 	struct pmc_cpu *pc;
165 	struct core_cpu *cc;
166 
167 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
168 	    ("[core,%d] insane cpu number (%d)", __LINE__, cpu));
169 
170 	PMCDBG1(MDP,INI,1,"core-pcpu-fini cpu=%d", cpu);
171 
172 	if ((cc = core_pcpu[cpu]) == NULL)
173 		return (0);
174 
175 	core_pcpu[cpu] = NULL;
176 
177 	pc = pmc_pcpu[cpu];
178 
179 	KASSERT(pc != NULL, ("[core,%d] NULL per-cpu %d state", __LINE__,
180 		cpu));
181 
182 	npmc = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_num;
183 	core_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_ri;
184 
185 	for (n = 0; n < npmc; n++)
186 		wrmsr(IAP_EVSEL0 + n, 0);
187 
188 	if (core_version >= 2) {
189 		wrmsr(IAF_CTRL, 0);
190 		npmc += md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAF].pcd_num;
191 	}
192 
193 	for (n = 0; n < npmc; n++)
194 		pc->pc_hwpmcs[n + core_ri] = NULL;
195 
196 	free(cc, M_PMC);
197 
198 	return (0);
199 }
200 
201 /*
202  * Fixed function counters.
203  */
204 
205 static pmc_value_t
206 iaf_perfctr_value_to_reload_count(pmc_value_t v)
207 {
208 
209 	/* If the PMC has overflowed, return a reload count of zero. */
210 	if ((v & (1ULL << (core_iaf_width - 1))) == 0)
211 		return (0);
212 	v &= (1ULL << core_iaf_width) - 1;
213 	return (1ULL << core_iaf_width) - v;
214 }
215 
216 static pmc_value_t
217 iaf_reload_count_to_perfctr_value(pmc_value_t rlc)
218 {
219 	return (1ULL << core_iaf_width) - rlc;
220 }
221 
222 static int
223 iaf_allocate_pmc(int cpu, int ri, struct pmc *pm,
224     const struct pmc_op_pmcallocate *a)
225 {
226 	uint8_t ev, umask;
227 	uint32_t caps;
228 	uint64_t config, flags;
229 	const struct pmc_md_iap_op_pmcallocate *iap;
230 
231 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
232 	    ("[core,%d] illegal CPU %d", __LINE__, cpu));
233 
234 	PMCDBG2(MDP,ALL,1, "iaf-allocate ri=%d reqcaps=0x%x", ri, pm->pm_caps);
235 
236 	if (ri < 0 || ri > core_iaf_npmc)
237 		return (EINVAL);
238 
239 	if (a->pm_class != PMC_CLASS_IAF)
240 		return (EINVAL);
241 
242 	if ((a->pm_flags & PMC_F_EV_PMU) == 0)
243 		return (EINVAL);
244 
245 	iap = &a->pm_md.pm_iap;
246 	config = iap->pm_iap_config;
247 	ev = IAP_EVSEL_GET(config);
248 	umask = IAP_UMASK_GET(config);
249 
250 	if (ev == 0x0) {
251 		if (umask != ri + 1)
252 			return (EINVAL);
253 	} else {
254 		switch (ri) {
255 		case 0:	/* INST_RETIRED.ANY */
256 			if (ev != 0xC0 || umask != 0x00)
257 				return (EINVAL);
258 			break;
259 		case 1:	/* CPU_CLK_UNHALTED.THREAD */
260 			if (ev != 0x3C || umask != 0x00)
261 				return (EINVAL);
262 			break;
263 		case 2:	/* CPU_CLK_UNHALTED.REF */
264 			if (ev != 0x3C || umask != 0x01)
265 				return (EINVAL);
266 			break;
267 		case 3:	/* TOPDOWN.SLOTS */
268 			if (ev != 0xA4 || umask != 0x01)
269 				return (EINVAL);
270 			break;
271 		default:
272 			return (EINVAL);
273 		}
274 	}
275 
276 	pmc_alloc_refs++;
277 	if ((cpu_stdext_feature3 & CPUID_STDEXT3_TSXFA) != 0 &&
278 	    !pmc_tsx_force_abort_set) {
279 		pmc_tsx_force_abort_set = true;
280 		x86_msr_op(MSR_TSX_FORCE_ABORT, MSR_OP_RENDEZVOUS_ALL |
281 		    MSR_OP_WRITE, 1, NULL);
282 	}
283 
284 	flags = 0;
285 	if (config & IAP_OS)
286 		flags |= IAF_OS;
287 	if (config & IAP_USR)
288 		flags |= IAF_USR;
289 	if (config & IAP_ANY)
290 		flags |= IAF_ANY;
291 	if (config & IAP_INT)
292 		flags |= IAF_PMI;
293 
294 	caps = a->pm_caps;
295 	if (caps & PMC_CAP_INTERRUPT)
296 		flags |= IAF_PMI;
297 	if (caps & PMC_CAP_SYSTEM)
298 		flags |= IAF_OS;
299 	if (caps & PMC_CAP_USER)
300 		flags |= IAF_USR;
301 	if ((caps & (PMC_CAP_USER | PMC_CAP_SYSTEM)) == 0)
302 		flags |= (IAF_OS | IAF_USR);
303 
304 	pm->pm_md.pm_iaf.pm_iaf_ctrl = (flags << (ri * 4));
305 
306 	PMCDBG1(MDP,ALL,2, "iaf-allocate config=0x%jx",
307 	    (uintmax_t) pm->pm_md.pm_iaf.pm_iaf_ctrl);
308 
309 	return (0);
310 }
311 
312 static int
313 iaf_config_pmc(int cpu, int ri, struct pmc *pm)
314 {
315 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
316 	    ("[core,%d] illegal CPU %d", __LINE__, cpu));
317 
318 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
319 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
320 
321 	PMCDBG3(MDP,CFG,1, "iaf-config cpu=%d ri=%d pm=%p", cpu, ri, pm);
322 
323 	KASSERT(core_pcpu[cpu] != NULL, ("[core,%d] null per-cpu %d", __LINE__,
324 	    cpu));
325 
326 	core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc = pm;
327 
328 	return (0);
329 }
330 
331 static int
332 iaf_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc)
333 {
334 	struct pmc_hw *phw;
335 
336 	phw = &core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri];
337 
338 	snprintf(pi->pm_name, sizeof(pi->pm_name), "IAF-%d", ri);
339 	pi->pm_class = PMC_CLASS_IAF;
340 
341 	if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) {
342 		pi->pm_enabled = TRUE;
343 		*ppmc          = phw->phw_pmc;
344 	} else {
345 		pi->pm_enabled = FALSE;
346 		*ppmc          = NULL;
347 	}
348 
349 	return (0);
350 }
351 
352 static int
353 iaf_get_config(int cpu, int ri, struct pmc **ppm)
354 {
355 	*ppm = core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc;
356 
357 	return (0);
358 }
359 
360 static int
361 iaf_get_msr(int ri, uint32_t *msr)
362 {
363 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
364 	    ("[iaf,%d] ri %d out of range", __LINE__, ri));
365 
366 	*msr = IAF_RI_TO_MSR(ri);
367 
368 	return (0);
369 }
370 
371 static int
372 iaf_read_pmc(int cpu, int ri, struct pmc *pm, pmc_value_t *v)
373 {
374 	pmc_value_t tmp;
375 
376 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
377 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
378 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
379 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
380 
381 	tmp = rdpmc(IAF_RI_TO_MSR(ri));
382 
383 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
384 		*v = iaf_perfctr_value_to_reload_count(tmp);
385 	else
386 		*v = tmp & ((1ULL << core_iaf_width) - 1);
387 
388 	PMCDBG4(MDP,REA,1, "iaf-read cpu=%d ri=%d msr=0x%x -> v=%jx", cpu, ri,
389 	    IAF_RI_TO_MSR(ri), *v);
390 
391 	return (0);
392 }
393 
394 static int
395 iaf_release_pmc(int cpu, int ri, struct pmc *pmc)
396 {
397 	PMCDBG3(MDP,REL,1, "iaf-release cpu=%d ri=%d pm=%p", cpu, ri, pmc);
398 
399 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
400 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
401 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
402 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
403 
404 	KASSERT(core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc == NULL,
405 	    ("[core,%d] PHW pmc non-NULL", __LINE__));
406 
407 	MPASS(pmc_alloc_refs > 0);
408 	if (pmc_alloc_refs-- == 1 && pmc_tsx_force_abort_set) {
409 		pmc_tsx_force_abort_set = false;
410 		x86_msr_op(MSR_TSX_FORCE_ABORT, MSR_OP_RENDEZVOUS_ALL |
411 		    MSR_OP_WRITE, 0, NULL);
412 	}
413 
414 	return (0);
415 }
416 
417 static int
418 iaf_start_pmc(int cpu, int ri, struct pmc *pm)
419 {
420 	struct core_cpu *cc;
421 
422 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
423 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
424 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
425 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
426 
427 	PMCDBG2(MDP,STA,1,"iaf-start cpu=%d ri=%d", cpu, ri);
428 
429 	cc = core_pcpu[cpu];
430 	cc->pc_iafctrl |= pm->pm_md.pm_iaf.pm_iaf_ctrl;
431 	wrmsr(IAF_CTRL, cc->pc_iafctrl);
432 
433 	cc->pc_globalctrl |= (1ULL << (ri + IAF_OFFSET));
434 	wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl);
435 
436 	PMCDBG4(MDP,STA,1,"iafctrl=%x(%x) globalctrl=%jx(%jx)",
437 	    cc->pc_iafctrl, (uint32_t) rdmsr(IAF_CTRL),
438 	    cc->pc_globalctrl, rdmsr(IA_GLOBAL_CTRL));
439 
440 	return (0);
441 }
442 
443 static int
444 iaf_stop_pmc(int cpu, int ri, struct pmc *pm)
445 {
446 	struct core_cpu *cc;
447 
448 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
449 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
450 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
451 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
452 
453 	PMCDBG2(MDP,STA,1,"iaf-stop cpu=%d ri=%d", cpu, ri);
454 
455 	cc = core_pcpu[cpu];
456 
457 	cc->pc_iafctrl &= ~(IAF_MASK << (ri * 4));
458 	wrmsr(IAF_CTRL, cc->pc_iafctrl);
459 
460 	/* Don't need to write IA_GLOBAL_CTRL, one disable is enough. */
461 
462 	PMCDBG4(MDP,STO,1,"iafctrl=%x(%x) globalctrl=%jx(%jx)",
463 	    cc->pc_iafctrl, (uint32_t) rdmsr(IAF_CTRL),
464 	    cc->pc_globalctrl, rdmsr(IA_GLOBAL_CTRL));
465 
466 	return (0);
467 }
468 
469 static int
470 iaf_write_pmc(int cpu, int ri, struct pmc *pm, pmc_value_t v)
471 {
472 	struct core_cpu *cc;
473 
474 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
475 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
476 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
477 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
478 
479 	cc = core_pcpu[cpu];
480 
481 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
482 		v = iaf_reload_count_to_perfctr_value(v);
483 
484 	/* Turn off the fixed counter */
485 	wrmsr(IAF_CTRL, cc->pc_iafctrl & ~(IAF_MASK << (ri * 4)));
486 
487 	wrmsr(IAF_CTR0 + ri, v & ((1ULL << core_iaf_width) - 1));
488 
489 	/* Turn on fixed counters */
490 	wrmsr(IAF_CTRL, cc->pc_iafctrl);
491 
492 	PMCDBG6(MDP,WRI,1, "iaf-write cpu=%d ri=%d msr=0x%x v=%jx iafctrl=%jx "
493 	    "pmc=%jx", cpu, ri, IAF_RI_TO_MSR(ri), v,
494 	    (uintmax_t) rdmsr(IAF_CTRL),
495 	    (uintmax_t) rdpmc(IAF_RI_TO_MSR(ri)));
496 
497 	return (0);
498 }
499 
500 
501 static void
502 iaf_initialize(struct pmc_mdep *md, int maxcpu, int npmc, int pmcwidth)
503 {
504 	struct pmc_classdep *pcd;
505 
506 	KASSERT(md != NULL, ("[iaf,%d] md is NULL", __LINE__));
507 
508 	PMCDBG0(MDP,INI,1, "iaf-initialize");
509 
510 	pcd = &md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAF];
511 
512 	pcd->pcd_caps	= IAF_PMC_CAPS;
513 	pcd->pcd_class	= PMC_CLASS_IAF;
514 	pcd->pcd_num	= npmc;
515 	pcd->pcd_ri	= md->pmd_npmc;
516 	pcd->pcd_width	= pmcwidth;
517 
518 	pcd->pcd_allocate_pmc	= iaf_allocate_pmc;
519 	pcd->pcd_config_pmc	= iaf_config_pmc;
520 	pcd->pcd_describe	= iaf_describe;
521 	pcd->pcd_get_config	= iaf_get_config;
522 	pcd->pcd_get_msr	= iaf_get_msr;
523 	pcd->pcd_pcpu_fini	= core_pcpu_noop;
524 	pcd->pcd_pcpu_init	= core_pcpu_noop;
525 	pcd->pcd_read_pmc	= iaf_read_pmc;
526 	pcd->pcd_release_pmc	= iaf_release_pmc;
527 	pcd->pcd_start_pmc	= iaf_start_pmc;
528 	pcd->pcd_stop_pmc	= iaf_stop_pmc;
529 	pcd->pcd_write_pmc	= iaf_write_pmc;
530 
531 	md->pmd_npmc	       += npmc;
532 }
533 
534 /*
535  * Intel programmable PMCs.
536  */
537 
538 /* Sub fields of UMASK that this event supports. */
539 #define	IAP_M_CORE		(1 << 0) /* Core specificity */
540 #define	IAP_M_AGENT		(1 << 1) /* Agent specificity */
541 #define	IAP_M_PREFETCH		(1 << 2) /* Prefetch */
542 #define	IAP_M_MESI		(1 << 3) /* MESI */
543 #define	IAP_M_SNOOPRESPONSE	(1 << 4) /* Snoop response */
544 #define	IAP_M_SNOOPTYPE		(1 << 5) /* Snoop type */
545 #define	IAP_M_TRANSITION	(1 << 6) /* Transition */
546 
547 #define	IAP_F_CORE		(0x3 << 14) /* Core specificity */
548 #define	IAP_F_AGENT		(0x1 << 13) /* Agent specificity */
549 #define	IAP_F_PREFETCH		(0x3 << 12) /* Prefetch */
550 #define	IAP_F_MESI		(0xF <<  8) /* MESI */
551 #define	IAP_F_SNOOPRESPONSE	(0xB <<  8) /* Snoop response */
552 #define	IAP_F_SNOOPTYPE		(0x3 <<  8) /* Snoop type */
553 #define	IAP_F_TRANSITION	(0x1 << 12) /* Transition */
554 
555 #define	IAP_PREFETCH_RESERVED	(0x2 << 12)
556 #define	IAP_CORE_THIS		(0x1 << 14)
557 #define	IAP_CORE_ALL		(0x3 << 14)
558 #define	IAP_F_CMASK		0xFF000000
559 
560 static pmc_value_t
561 iap_perfctr_value_to_reload_count(pmc_value_t v)
562 {
563 
564 	/* If the PMC has overflowed, return a reload count of zero. */
565 	if ((v & (1ULL << (core_iap_width - 1))) == 0)
566 		return (0);
567 	v &= (1ULL << core_iap_width) - 1;
568 	return (1ULL << core_iap_width) - v;
569 }
570 
571 static pmc_value_t
572 iap_reload_count_to_perfctr_value(pmc_value_t rlc)
573 {
574 	return (1ULL << core_iap_width) - rlc;
575 }
576 
577 static int
578 iap_pmc_has_overflowed(int ri)
579 {
580 	uint64_t v;
581 
582 	/*
583 	 * We treat a Core (i.e., Intel architecture v1) PMC as has
584 	 * having overflowed if its MSB is zero.
585 	 */
586 	v = rdpmc(ri);
587 	return ((v & (1ULL << (core_iap_width - 1))) == 0);
588 }
589 
590 static int
591 iap_event_corei7_ok_on_counter(uint8_t evsel, int ri)
592 {
593 	uint32_t mask;
594 
595 	switch (evsel) {
596 	/* Events valid only on counter 0, 1. */
597 	case 0x40:
598 	case 0x41:
599 	case 0x42:
600 	case 0x43:
601 	case 0x4C:
602 	case 0x4E:
603 	case 0x51:
604 	case 0x52:
605 	case 0x53:
606 	case 0x63:
607 		mask = 0x3;
608 		break;
609 	/* Any row index is ok. */
610 	default:
611 		mask = ~0;
612 	}
613 
614 	return (mask & (1 << ri));
615 }
616 
617 static int
618 iap_event_westmere_ok_on_counter(uint8_t evsel, int ri)
619 {
620 	uint32_t mask;
621 
622 	switch (evsel) {
623 	/* Events valid only on counter 0. */
624 	case 0x60:
625 	case 0xB3:
626 		mask = 0x1;
627 		break;
628 
629 	/* Events valid only on counter 0, 1. */
630 	case 0x4C:
631 	case 0x4E:
632 	case 0x51:
633 	case 0x52:
634 	case 0x63:
635 		mask = 0x3;
636 		break;
637 	/* Any row index is ok. */
638 	default:
639 		mask = ~0;
640 	}
641 
642 	return (mask & (1 << ri));
643 }
644 
645 static int
646 iap_event_sb_sbx_ib_ibx_ok_on_counter(uint8_t evsel, int ri)
647 {
648 	uint32_t mask;
649 
650 	switch (evsel) {
651 	/* Events valid only on counter 0. */
652 	case 0xB7:
653 		mask = 0x1;
654 		break;
655 	/* Events valid only on counter 1. */
656 	case 0xC0:
657 		mask = 0x2;
658 		break;
659 	/* Events valid only on counter 2. */
660 	case 0x48:
661 	case 0xA2:
662 	case 0xA3:
663 		mask = 0x4;
664 		break;
665 	/* Events valid only on counter 3. */
666 	case 0xBB:
667 	case 0xCD:
668 		mask = 0x8;
669 		break;
670 	/* Any row index is ok. */
671 	default:
672 		mask = ~0;
673 	}
674 
675 	return (mask & (1 << ri));
676 }
677 
678 static int
679 iap_event_core_ok_on_counter(uint8_t evsel, int ri)
680 {
681 	uint32_t mask;
682 
683 	switch (evsel) {
684 		/*
685 		 * Events valid only on counter 0.
686 		 */
687 	case 0x10:
688 	case 0x14:
689 	case 0x18:
690 	case 0xB3:
691 	case 0xC1:
692 	case 0xCB:
693 		mask = (1 << 0);
694 		break;
695 
696 		/*
697 		 * Events valid only on counter 1.
698 		 */
699 	case 0x11:
700 	case 0x12:
701 	case 0x13:
702 		mask = (1 << 1);
703 		break;
704 
705 	default:
706 		mask = ~0;	/* Any row index is ok. */
707 	}
708 
709 	return (mask & (1 << ri));
710 }
711 
712 static int
713 iap_allocate_pmc(int cpu, int ri, struct pmc *pm,
714     const struct pmc_op_pmcallocate *a)
715 {
716 	uint8_t ev;
717 	const struct pmc_md_iap_op_pmcallocate *iap;
718 
719 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
720 	    ("[core,%d] illegal CPU %d", __LINE__, cpu));
721 	KASSERT(ri >= 0 && ri < core_iap_npmc,
722 	    ("[core,%d] illegal row-index value %d", __LINE__, ri));
723 
724 	if (a->pm_class != PMC_CLASS_IAP)
725 		return (EINVAL);
726 
727 	if ((a->pm_flags & PMC_F_EV_PMU) == 0)
728 		return (EINVAL);
729 
730 	iap = &a->pm_md.pm_iap;
731 	ev = IAP_EVSEL_GET(iap->pm_iap_config);
732 
733 	switch (core_cputype) {
734 	case PMC_CPU_INTEL_CORE:
735 	case PMC_CPU_INTEL_CORE2:
736 	case PMC_CPU_INTEL_CORE2EXTREME:
737 		if (iap_event_core_ok_on_counter(ev, ri) == 0)
738 			return (EINVAL);
739 	case PMC_CPU_INTEL_COREI7:
740 	case PMC_CPU_INTEL_NEHALEM_EX:
741 		if (iap_event_corei7_ok_on_counter(ev, ri) == 0)
742 			return (EINVAL);
743 		break;
744 	case PMC_CPU_INTEL_WESTMERE:
745 	case PMC_CPU_INTEL_WESTMERE_EX:
746 		if (iap_event_westmere_ok_on_counter(ev, ri) == 0)
747 			return (EINVAL);
748 		break;
749 	case PMC_CPU_INTEL_SANDYBRIDGE:
750 	case PMC_CPU_INTEL_SANDYBRIDGE_XEON:
751 	case PMC_CPU_INTEL_IVYBRIDGE:
752 	case PMC_CPU_INTEL_IVYBRIDGE_XEON:
753 	case PMC_CPU_INTEL_HASWELL:
754 	case PMC_CPU_INTEL_HASWELL_XEON:
755 	case PMC_CPU_INTEL_BROADWELL:
756 	case PMC_CPU_INTEL_BROADWELL_XEON:
757 		if (iap_event_sb_sbx_ib_ibx_ok_on_counter(ev, ri) == 0)
758 			return (EINVAL);
759 		break;
760 	case PMC_CPU_INTEL_ATOM:
761 	case PMC_CPU_INTEL_ATOM_SILVERMONT:
762 	case PMC_CPU_INTEL_ATOM_GOLDMONT:
763 	case PMC_CPU_INTEL_ATOM_GOLDMONT_P:
764 	case PMC_CPU_INTEL_ATOM_TREMONT:
765 	case PMC_CPU_INTEL_SKYLAKE:
766 	case PMC_CPU_INTEL_SKYLAKE_XEON:
767 	case PMC_CPU_INTEL_ICELAKE:
768 	case PMC_CPU_INTEL_ICELAKE_XEON:
769 	case PMC_CPU_INTEL_ALDERLAKE:
770 	default:
771 		break;
772 	}
773 
774 	pm->pm_md.pm_iap.pm_iap_evsel = iap->pm_iap_config;
775 	return (0);
776 }
777 
778 static int
779 iap_config_pmc(int cpu, int ri, struct pmc *pm)
780 {
781 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
782 	    ("[core,%d] illegal CPU %d", __LINE__, cpu));
783 
784 	KASSERT(ri >= 0 && ri < core_iap_npmc,
785 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
786 
787 	PMCDBG3(MDP,CFG,1, "iap-config cpu=%d ri=%d pm=%p", cpu, ri, pm);
788 
789 	KASSERT(core_pcpu[cpu] != NULL, ("[core,%d] null per-cpu %d", __LINE__,
790 	    cpu));
791 
792 	core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc = pm;
793 
794 	return (0);
795 }
796 
797 static int
798 iap_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc)
799 {
800 	struct pmc_hw *phw;
801 
802 	phw = &core_pcpu[cpu]->pc_corepmcs[ri];
803 
804 	snprintf(pi->pm_name, sizeof(pi->pm_name), "IAP-%d", ri);
805 	pi->pm_class = PMC_CLASS_IAP;
806 
807 	if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) {
808 		pi->pm_enabled = TRUE;
809 		*ppmc          = phw->phw_pmc;
810 	} else {
811 		pi->pm_enabled = FALSE;
812 		*ppmc          = NULL;
813 	}
814 
815 	return (0);
816 }
817 
818 static int
819 iap_get_config(int cpu, int ri, struct pmc **ppm)
820 {
821 	*ppm = core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc;
822 
823 	return (0);
824 }
825 
826 static int
827 iap_get_msr(int ri, uint32_t *msr)
828 {
829 	KASSERT(ri >= 0 && ri < core_iap_npmc,
830 	    ("[iap,%d] ri %d out of range", __LINE__, ri));
831 
832 	*msr = ri;
833 
834 	return (0);
835 }
836 
837 static int
838 iap_read_pmc(int cpu, int ri, struct pmc *pm, pmc_value_t *v)
839 {
840 	pmc_value_t tmp;
841 
842 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
843 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
844 	KASSERT(ri >= 0 && ri < core_iap_npmc,
845 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
846 
847 	tmp = rdpmc(ri);
848 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
849 		*v = iap_perfctr_value_to_reload_count(tmp);
850 	else
851 		*v = tmp & ((1ULL << core_iap_width) - 1);
852 
853 	PMCDBG4(MDP,REA,1, "iap-read cpu=%d ri=%d msr=0x%x -> v=%jx", cpu, ri,
854 	    IAP_PMC0 + ri, *v);
855 
856 	return (0);
857 }
858 
859 static int
860 iap_release_pmc(int cpu, int ri, struct pmc *pm)
861 {
862 	(void) pm;
863 
864 	PMCDBG3(MDP,REL,1, "iap-release cpu=%d ri=%d pm=%p", cpu, ri,
865 	    pm);
866 
867 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
868 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
869 	KASSERT(ri >= 0 && ri < core_iap_npmc,
870 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
871 
872 	KASSERT(core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc
873 	    == NULL, ("[core,%d] PHW pmc non-NULL", __LINE__));
874 
875 	return (0);
876 }
877 
878 static int
879 iap_start_pmc(int cpu, int ri, struct pmc *pm)
880 {
881 	uint64_t evsel;
882 	struct core_cpu *cc;
883 
884 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
885 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
886 	KASSERT(ri >= 0 && ri < core_iap_npmc,
887 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
888 
889 	cc = core_pcpu[cpu];
890 
891 	PMCDBG2(MDP,STA,1, "iap-start cpu=%d ri=%d", cpu, ri);
892 
893 	evsel = pm->pm_md.pm_iap.pm_iap_evsel;
894 
895 	PMCDBG4(MDP,STA,2, "iap-start/2 cpu=%d ri=%d evselmsr=0x%x evsel=0x%x",
896 	    cpu, ri, IAP_EVSEL0 + ri, evsel);
897 
898 	/* Event specific configuration. */
899 
900 	switch (IAP_EVSEL_GET(evsel)) {
901 	case 0xB7:
902 		wrmsr(IA_OFFCORE_RSP0, pm->pm_md.pm_iap.pm_iap_rsp);
903 		break;
904 	case 0xBB:
905 		wrmsr(IA_OFFCORE_RSP1, pm->pm_md.pm_iap.pm_iap_rsp);
906 		break;
907 	default:
908 		break;
909 	}
910 
911 	wrmsr(IAP_EVSEL0 + ri, evsel | IAP_EN);
912 
913 	if (core_version >= 2) {
914 		cc->pc_globalctrl |= (1ULL << ri);
915 		wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl);
916 	}
917 
918 	return (0);
919 }
920 
921 static int
922 iap_stop_pmc(int cpu, int ri, struct pmc *pm __unused)
923 {
924 
925 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
926 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
927 	KASSERT(ri >= 0 && ri < core_iap_npmc,
928 	    ("[core,%d] illegal row index %d", __LINE__, ri));
929 
930 	PMCDBG2(MDP,STO,1, "iap-stop cpu=%d ri=%d", cpu, ri);
931 
932 	wrmsr(IAP_EVSEL0 + ri, 0);
933 
934 	/* Don't need to write IA_GLOBAL_CTRL, one disable is enough. */
935 
936 	return (0);
937 }
938 
939 static int
940 iap_write_pmc(int cpu, int ri, struct pmc *pm, pmc_value_t v)
941 {
942 
943 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
944 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
945 	KASSERT(ri >= 0 && ri < core_iap_npmc,
946 	    ("[core,%d] illegal row index %d", __LINE__, ri));
947 
948 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
949 		v = iap_reload_count_to_perfctr_value(v);
950 
951 	v &= (1ULL << core_iap_width) - 1;
952 
953 	PMCDBG4(MDP,WRI,1, "iap-write cpu=%d ri=%d msr=0x%x v=%jx", cpu, ri,
954 	    IAP_PMC0 + ri, v);
955 
956 	/*
957 	 * Write the new value to the counter (or it's alias).  The
958 	 * counter will be in a stopped state when the pcd_write()
959 	 * entry point is called.
960 	 */
961 	wrmsr(core_iap_wroffset + IAP_PMC0 + ri, v);
962 	return (0);
963 }
964 
965 
966 static void
967 iap_initialize(struct pmc_mdep *md, int maxcpu, int npmc, int pmcwidth,
968     int flags)
969 {
970 	struct pmc_classdep *pcd;
971 
972 	KASSERT(md != NULL, ("[iap,%d] md is NULL", __LINE__));
973 
974 	PMCDBG0(MDP,INI,1, "iap-initialize");
975 
976 	/* Remember the set of architectural events supported. */
977 	core_architectural_events = ~flags;
978 
979 	pcd = &md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP];
980 
981 	pcd->pcd_caps	= IAP_PMC_CAPS;
982 	pcd->pcd_class	= PMC_CLASS_IAP;
983 	pcd->pcd_num	= npmc;
984 	pcd->pcd_ri	= md->pmd_npmc;
985 	pcd->pcd_width	= pmcwidth;
986 
987 	pcd->pcd_allocate_pmc	= iap_allocate_pmc;
988 	pcd->pcd_config_pmc	= iap_config_pmc;
989 	pcd->pcd_describe	= iap_describe;
990 	pcd->pcd_get_config	= iap_get_config;
991 	pcd->pcd_get_msr	= iap_get_msr;
992 	pcd->pcd_pcpu_fini	= core_pcpu_fini;
993 	pcd->pcd_pcpu_init	= core_pcpu_init;
994 	pcd->pcd_read_pmc	= iap_read_pmc;
995 	pcd->pcd_release_pmc	= iap_release_pmc;
996 	pcd->pcd_start_pmc	= iap_start_pmc;
997 	pcd->pcd_stop_pmc	= iap_stop_pmc;
998 	pcd->pcd_write_pmc	= iap_write_pmc;
999 
1000 	md->pmd_npmc	       += npmc;
1001 }
1002 
1003 static int
1004 core_intr(struct trapframe *tf)
1005 {
1006 	pmc_value_t v;
1007 	struct pmc *pm;
1008 	struct core_cpu *cc;
1009 	int error, found_interrupt, ri;
1010 
1011 	PMCDBG3(MDP,INT, 1, "cpu=%d tf=%p um=%d", curcpu, (void *) tf,
1012 	    TRAPF_USERMODE(tf));
1013 
1014 	found_interrupt = 0;
1015 	cc = core_pcpu[curcpu];
1016 
1017 	for (ri = 0; ri < core_iap_npmc; ri++) {
1018 
1019 		if ((pm = cc->pc_corepmcs[ri].phw_pmc) == NULL ||
1020 		    !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1021 			continue;
1022 
1023 		if (!iap_pmc_has_overflowed(ri))
1024 			continue;
1025 
1026 		found_interrupt = 1;
1027 
1028 		if (pm->pm_state != PMC_STATE_RUNNING)
1029 			continue;
1030 
1031 		error = pmc_process_interrupt(PMC_HR, pm, tf);
1032 
1033 		v = pm->pm_sc.pm_reloadcount;
1034 		v = iap_reload_count_to_perfctr_value(v);
1035 
1036 		/*
1037 		 * Stop the counter, reload it but only restart it if
1038 		 * the PMC is not stalled.
1039 		 */
1040 		wrmsr(IAP_EVSEL0 + ri, pm->pm_md.pm_iap.pm_iap_evsel);
1041 		wrmsr(core_iap_wroffset + IAP_PMC0 + ri, v);
1042 
1043 		if (__predict_false(error))
1044 			continue;
1045 
1046 		wrmsr(IAP_EVSEL0 + ri, pm->pm_md.pm_iap.pm_iap_evsel | IAP_EN);
1047 	}
1048 
1049 	if (found_interrupt)
1050 		counter_u64_add(pmc_stats.pm_intr_processed, 1);
1051 	else
1052 		counter_u64_add(pmc_stats.pm_intr_ignored, 1);
1053 
1054 	if (found_interrupt)
1055 		lapic_reenable_pmc();
1056 
1057 	return (found_interrupt);
1058 }
1059 
1060 static int
1061 core2_intr(struct trapframe *tf)
1062 {
1063 	int error, found_interrupt = 0, n, cpu;
1064 	uint64_t flag, intrstatus, intrdisable = 0;
1065 	struct pmc *pm;
1066 	struct core_cpu *cc;
1067 	pmc_value_t v;
1068 
1069 	cpu = curcpu;
1070 	PMCDBG3(MDP,INT, 1, "cpu=%d tf=0x%p um=%d", cpu, (void *) tf,
1071 	    TRAPF_USERMODE(tf));
1072 
1073 	/*
1074 	 * The IA_GLOBAL_STATUS (MSR 0x38E) register indicates which
1075 	 * PMCs have a pending PMI interrupt.  We take a 'snapshot' of
1076 	 * the current set of interrupting PMCs and process these
1077 	 * after stopping them.
1078 	 */
1079 	intrstatus = rdmsr(IA_GLOBAL_STATUS);
1080 	PMCDBG2(MDP,INT, 1, "cpu=%d intrstatus=%jx", cpu,
1081 	    (uintmax_t) intrstatus);
1082 
1083 	/*
1084 	 * Stop PMCs unless hardware already done it.
1085 	 */
1086 	if ((intrstatus & IA_GLOBAL_STATUS_FLAG_CTR_FRZ) == 0)
1087 		wrmsr(IA_GLOBAL_CTRL, 0);
1088 
1089 	cc = core_pcpu[cpu];
1090 	KASSERT(cc != NULL, ("[core,%d] null pcpu", __LINE__));
1091 
1092 	/*
1093 	 * Look for interrupts from fixed function PMCs.
1094 	 */
1095 	for (n = 0, flag = (1ULL << IAF_OFFSET); n < core_iaf_npmc;
1096 	     n++, flag <<= 1) {
1097 
1098 		if ((intrstatus & flag) == 0)
1099 			continue;
1100 
1101 		found_interrupt = 1;
1102 
1103 		pm = cc->pc_corepmcs[n + core_iaf_ri].phw_pmc;
1104 		if (pm == NULL || pm->pm_state != PMC_STATE_RUNNING ||
1105 		    !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1106 			continue;
1107 
1108 		error = pmc_process_interrupt(PMC_HR, pm, tf);
1109 		if (__predict_false(error))
1110 			intrdisable |= flag;
1111 
1112 		v = iaf_reload_count_to_perfctr_value(pm->pm_sc.pm_reloadcount);
1113 
1114 		/* Reload sampling count. */
1115 		wrmsr(IAF_CTR0 + n, v);
1116 
1117 		PMCDBG4(MDP,INT, 1, "iaf-intr cpu=%d error=%d v=%jx(%jx)", curcpu,
1118 		    error, (uintmax_t) v, (uintmax_t) rdpmc(IAF_RI_TO_MSR(n)));
1119 	}
1120 
1121 	/*
1122 	 * Process interrupts from the programmable counters.
1123 	 */
1124 	for (n = 0, flag = 1; n < core_iap_npmc; n++, flag <<= 1) {
1125 		if ((intrstatus & flag) == 0)
1126 			continue;
1127 
1128 		found_interrupt = 1;
1129 
1130 		pm = cc->pc_corepmcs[n].phw_pmc;
1131 		if (pm == NULL || pm->pm_state != PMC_STATE_RUNNING ||
1132 		    !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1133 			continue;
1134 
1135 		error = pmc_process_interrupt(PMC_HR, pm, tf);
1136 		if (__predict_false(error))
1137 			intrdisable |= flag;
1138 
1139 		v = iap_reload_count_to_perfctr_value(pm->pm_sc.pm_reloadcount);
1140 
1141 		PMCDBG3(MDP,INT, 1, "iap-intr cpu=%d error=%d v=%jx", cpu, error,
1142 		    (uintmax_t) v);
1143 
1144 		/* Reload sampling count. */
1145 		wrmsr(core_iap_wroffset + IAP_PMC0 + n, v);
1146 	}
1147 
1148 	if (found_interrupt)
1149 		counter_u64_add(pmc_stats.pm_intr_processed, 1);
1150 	else
1151 		counter_u64_add(pmc_stats.pm_intr_ignored, 1);
1152 
1153 	if (found_interrupt)
1154 		lapic_reenable_pmc();
1155 
1156 	/*
1157 	 * Reenable all non-stalled PMCs.
1158 	 */
1159 	if ((intrstatus & IA_GLOBAL_STATUS_FLAG_CTR_FRZ) == 0) {
1160 		wrmsr(IA_GLOBAL_OVF_CTRL, intrstatus);
1161 		cc->pc_globalctrl &= ~intrdisable;
1162 		wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl);
1163 	} else {
1164 		if (__predict_false(intrdisable)) {
1165 			cc->pc_globalctrl &= ~intrdisable;
1166 			wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl);
1167 		}
1168 		wrmsr(IA_GLOBAL_OVF_CTRL, intrstatus);
1169 	}
1170 
1171 	PMCDBG4(MDP, INT, 1, "cpu=%d fixedctrl=%jx globalctrl=%jx status=%jx",
1172 	    cpu, (uintmax_t) rdmsr(IAF_CTRL),
1173 	    (uintmax_t) rdmsr(IA_GLOBAL_CTRL),
1174 	    (uintmax_t) rdmsr(IA_GLOBAL_STATUS));
1175 
1176 	return (found_interrupt);
1177 }
1178 
1179 int
1180 pmc_core_initialize(struct pmc_mdep *md, int maxcpu, int version_override)
1181 {
1182 	int cpuid[CORE_CPUID_REQUEST_SIZE];
1183 	int flags, nflags;
1184 
1185 	do_cpuid(CORE_CPUID_REQUEST, cpuid);
1186 
1187 	core_cputype = md->pmd_cputype;
1188 	core_version = (version_override > 0) ? version_override :
1189 	    cpuid[CORE_CPUID_EAX] & 0xFF;
1190 
1191 	PMCDBG3(MDP,INI,1,"core-init cputype=%d ncpu=%d version=%d",
1192 	    core_cputype, maxcpu, core_version);
1193 
1194 	if (core_version < 1 || core_version > 5 ||
1195 	    (core_cputype != PMC_CPU_INTEL_CORE && core_version == 1)) {
1196 		/* Unknown PMC architecture. */
1197 		printf("hwpmc_core: unknown PMC architecture: %d\n",
1198 		    core_version);
1199 		return (EPROGMISMATCH);
1200 	}
1201 
1202 	core_iap_wroffset = 0;
1203 	if (cpu_feature2 & CPUID2_PDCM) {
1204 		if (rdmsr(IA32_PERF_CAPABILITIES) & PERFCAP_FW_WRITE) {
1205 			PMCDBG0(MDP, INI, 1,
1206 			    "core-init full-width write supported");
1207 			core_iap_wroffset = IAP_A_PMC0 - IAP_PMC0;
1208 		} else
1209 			PMCDBG0(MDP, INI, 1,
1210 			    "core-init full-width write NOT supported");
1211 	} else
1212 		PMCDBG0(MDP, INI, 1, "core-init pdcm not supported");
1213 
1214 	core_pmcmask = 0;
1215 
1216 	/*
1217 	 * Initialize programmable counters.
1218 	 */
1219 	core_iap_npmc = (cpuid[CORE_CPUID_EAX] >> 8) & 0xFF;
1220 	core_iap_width = (cpuid[CORE_CPUID_EAX] >> 16) & 0xFF;
1221 
1222 	core_pmcmask |= ((1ULL << core_iap_npmc) - 1);
1223 
1224 	nflags = (cpuid[CORE_CPUID_EAX] >> 24) & 0xFF;
1225 	flags = cpuid[CORE_CPUID_EBX] & ((1 << nflags) - 1);
1226 
1227 	iap_initialize(md, maxcpu, core_iap_npmc, core_iap_width, flags);
1228 
1229 	/*
1230 	 * Initialize fixed function counters, if present.
1231 	 */
1232 	if (core_version >= 2) {
1233 		core_iaf_ri = core_iap_npmc;
1234 		core_iaf_npmc = cpuid[CORE_CPUID_EDX] & 0x1F;
1235 		core_iaf_width = (cpuid[CORE_CPUID_EDX] >> 5) & 0xFF;
1236 
1237 		iaf_initialize(md, maxcpu, core_iaf_npmc, core_iaf_width);
1238 		core_pmcmask |= ((1ULL << core_iaf_npmc) - 1) << IAF_OFFSET;
1239 	}
1240 
1241 	PMCDBG2(MDP,INI,1,"core-init pmcmask=0x%jx iafri=%d", core_pmcmask,
1242 	    core_iaf_ri);
1243 
1244 	core_pcpu = malloc(sizeof(*core_pcpu) * maxcpu, M_PMC,
1245 	    M_ZERO | M_WAITOK);
1246 
1247 	/*
1248 	 * Choose the appropriate interrupt handler.
1249 	 */
1250 	if (core_version >= 2)
1251 		md->pmd_intr = core2_intr;
1252 	else
1253 		md->pmd_intr = core_intr;
1254 
1255 	return (0);
1256 }
1257 
1258 void
1259 pmc_core_finalize(struct pmc_mdep *md)
1260 {
1261 	PMCDBG0(MDP,INI,1, "core-finalize");
1262 
1263 	free(core_pcpu, M_PMC);
1264 	core_pcpu = NULL;
1265 }
1266