xref: /freebsd/sys/dev/hwpmc/hwpmc_core.c (revision 8df8b2d3e51d1b816201d8a1fe8bc29fe192e562)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2008 Joseph Koshy
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * Intel Core PMCs.
31  */
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 
36 #include <sys/param.h>
37 #include <sys/bus.h>
38 #include <sys/pmc.h>
39 #include <sys/pmckern.h>
40 #include <sys/systm.h>
41 
42 #include <machine/intr_machdep.h>
43 #if (__FreeBSD_version >= 1100000)
44 #include <x86/apicvar.h>
45 #else
46 #include <machine/apicvar.h>
47 #endif
48 #include <machine/cpu.h>
49 #include <machine/cpufunc.h>
50 #include <machine/md_var.h>
51 #include <machine/specialreg.h>
52 
53 #define	CORE_CPUID_REQUEST		0xA
54 #define	CORE_CPUID_REQUEST_SIZE		0x4
55 #define	CORE_CPUID_EAX			0x0
56 #define	CORE_CPUID_EBX			0x1
57 #define	CORE_CPUID_ECX			0x2
58 #define	CORE_CPUID_EDX			0x3
59 
60 #define	IAF_PMC_CAPS			\
61 	(PMC_CAP_READ | PMC_CAP_WRITE | PMC_CAP_INTERRUPT | \
62 	 PMC_CAP_USER | PMC_CAP_SYSTEM)
63 #define	IAF_RI_TO_MSR(RI)		((RI) + (1 << 30))
64 
65 #define	IAP_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_USER | PMC_CAP_SYSTEM | \
66     PMC_CAP_EDGE | PMC_CAP_THRESHOLD | PMC_CAP_READ | PMC_CAP_WRITE |	 \
67     PMC_CAP_INVERT | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE)
68 
69 #define	EV_IS_NOTARCH		0
70 #define	EV_IS_ARCH_SUPP		1
71 #define	EV_IS_ARCH_NOTSUPP	-1
72 
73 /*
74  * "Architectural" events defined by Intel.  The values of these
75  * symbols correspond to positions in the bitmask returned by
76  * the CPUID.0AH instruction.
77  */
78 enum core_arch_events {
79 	CORE_AE_BRANCH_INSTRUCTION_RETIRED	= 5,
80 	CORE_AE_BRANCH_MISSES_RETIRED		= 6,
81 	CORE_AE_INSTRUCTION_RETIRED		= 1,
82 	CORE_AE_LLC_MISSES			= 4,
83 	CORE_AE_LLC_REFERENCE			= 3,
84 	CORE_AE_UNHALTED_REFERENCE_CYCLES	= 2,
85 	CORE_AE_UNHALTED_CORE_CYCLES		= 0
86 };
87 
88 static enum pmc_cputype	core_cputype;
89 
90 struct core_cpu {
91 	volatile uint32_t	pc_resync;
92 	volatile uint32_t	pc_iafctrl;	/* Fixed function control. */
93 	volatile uint64_t	pc_globalctrl;	/* Global control register. */
94 	struct pmc_hw		pc_corepmcs[];
95 };
96 
97 static struct core_cpu **core_pcpu;
98 
99 static uint32_t core_architectural_events;
100 static uint64_t core_pmcmask;
101 
102 static int core_iaf_ri;		/* relative index of fixed counters */
103 static int core_iaf_width;
104 static int core_iaf_npmc;
105 
106 static int core_iap_width;
107 static int core_iap_npmc;
108 static int core_iap_wroffset;
109 
110 static int
111 core_pcpu_noop(struct pmc_mdep *md, int cpu)
112 {
113 	(void) md;
114 	(void) cpu;
115 	return (0);
116 }
117 
118 static int
119 core_pcpu_init(struct pmc_mdep *md, int cpu)
120 {
121 	struct pmc_cpu *pc;
122 	struct core_cpu *cc;
123 	struct pmc_hw *phw;
124 	int core_ri, n, npmc;
125 
126 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
127 	    ("[iaf,%d] insane cpu number %d", __LINE__, cpu));
128 
129 	PMCDBG1(MDP,INI,1,"core-init cpu=%d", cpu);
130 
131 	core_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_ri;
132 	npmc = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_num;
133 
134 	if (core_cputype != PMC_CPU_INTEL_CORE)
135 		npmc += md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAF].pcd_num;
136 
137 	cc = malloc(sizeof(struct core_cpu) + npmc * sizeof(struct pmc_hw),
138 	    M_PMC, M_WAITOK | M_ZERO);
139 
140 	core_pcpu[cpu] = cc;
141 	pc = pmc_pcpu[cpu];
142 
143 	KASSERT(pc != NULL && cc != NULL,
144 	    ("[core,%d] NULL per-cpu structures cpu=%d", __LINE__, cpu));
145 
146 	for (n = 0, phw = cc->pc_corepmcs; n < npmc; n++, phw++) {
147 		phw->phw_state 	  = PMC_PHW_FLAG_IS_ENABLED |
148 		    PMC_PHW_CPU_TO_STATE(cpu) |
149 		    PMC_PHW_INDEX_TO_STATE(n + core_ri);
150 		phw->phw_pmc	  = NULL;
151 		pc->pc_hwpmcs[n + core_ri]  = phw;
152 	}
153 
154 	return (0);
155 }
156 
157 static int
158 core_pcpu_fini(struct pmc_mdep *md, int cpu)
159 {
160 	int core_ri, n, npmc;
161 	struct pmc_cpu *pc;
162 	struct core_cpu *cc;
163 	uint64_t msr = 0;
164 
165 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
166 	    ("[core,%d] insane cpu number (%d)", __LINE__, cpu));
167 
168 	PMCDBG1(MDP,INI,1,"core-pcpu-fini cpu=%d", cpu);
169 
170 	if ((cc = core_pcpu[cpu]) == NULL)
171 		return (0);
172 
173 	core_pcpu[cpu] = NULL;
174 
175 	pc = pmc_pcpu[cpu];
176 
177 	KASSERT(pc != NULL, ("[core,%d] NULL per-cpu %d state", __LINE__,
178 		cpu));
179 
180 	npmc = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_num;
181 	core_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_ri;
182 
183 	for (n = 0; n < npmc; n++) {
184 		msr = rdmsr(IAP_EVSEL0 + n) & ~IAP_EVSEL_MASK;
185 		wrmsr(IAP_EVSEL0 + n, msr);
186 	}
187 
188 	if (core_cputype != PMC_CPU_INTEL_CORE) {
189 		msr = rdmsr(IAF_CTRL) & ~IAF_CTRL_MASK;
190 		wrmsr(IAF_CTRL, msr);
191 		npmc += md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAF].pcd_num;
192 	}
193 
194 	for (n = 0; n < npmc; n++)
195 		pc->pc_hwpmcs[n + core_ri] = NULL;
196 
197 	free(cc, M_PMC);
198 
199 	return (0);
200 }
201 
202 /*
203  * Fixed function counters.
204  */
205 
206 static pmc_value_t
207 iaf_perfctr_value_to_reload_count(pmc_value_t v)
208 {
209 
210 	/* If the PMC has overflowed, return a reload count of zero. */
211 	if ((v & (1ULL << (core_iaf_width - 1))) == 0)
212 		return (0);
213 	v &= (1ULL << core_iaf_width) - 1;
214 	return (1ULL << core_iaf_width) - v;
215 }
216 
217 static pmc_value_t
218 iaf_reload_count_to_perfctr_value(pmc_value_t rlc)
219 {
220 	return (1ULL << core_iaf_width) - rlc;
221 }
222 
223 static int
224 iaf_allocate_pmc(int cpu, int ri, struct pmc *pm,
225     const struct pmc_op_pmcallocate *a)
226 {
227 	uint8_t ev, umask;
228 	uint32_t caps, flags, config;
229 	const struct pmc_md_iap_op_pmcallocate *iap;
230 
231 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
232 	    ("[core,%d] illegal CPU %d", __LINE__, cpu));
233 
234 	PMCDBG2(MDP,ALL,1, "iaf-allocate ri=%d reqcaps=0x%x", ri, pm->pm_caps);
235 
236 	if (ri < 0 || ri > core_iaf_npmc)
237 		return (EINVAL);
238 
239 	caps = a->pm_caps;
240 
241 	if (a->pm_class != PMC_CLASS_IAF ||
242 	    (caps & IAF_PMC_CAPS) != caps)
243 		return (EINVAL);
244 
245 	iap = &a->pm_md.pm_iap;
246 	config = iap->pm_iap_config;
247 	ev = IAP_EVSEL_GET(config);
248 	umask = IAP_UMASK_GET(config);
249 
250 	/* INST_RETIRED.ANY */
251 	if (ev == 0xC0 && ri != 0)
252 		return (EINVAL);
253 	/* CPU_CLK_UNHALTED.THREAD */
254 	if (ev == 0x3C && ri != 1)
255 		return (EINVAL);
256 	/* CPU_CLK_UNHALTED.REF */
257 	if (ev == 0x0 && umask == 0x3 && ri != 2)
258 		return (EINVAL);
259 
260 
261 	flags = 0;
262 	if (config & IAP_OS)
263 		flags |= IAF_OS;
264 	if (config & IAP_USR)
265 		flags |= IAF_USR;
266 	if (config & IAP_ANY)
267 		flags |= IAF_ANY;
268 	if (config & IAP_INT)
269 		flags |= IAF_PMI;
270 
271 	if (caps & PMC_CAP_INTERRUPT)
272 		flags |= IAF_PMI;
273 	if (caps & PMC_CAP_SYSTEM)
274 		flags |= IAF_OS;
275 	if (caps & PMC_CAP_USER)
276 		flags |= IAF_USR;
277 	if ((caps & (PMC_CAP_USER | PMC_CAP_SYSTEM)) == 0)
278 		flags |= (IAF_OS | IAF_USR);
279 
280 	pm->pm_md.pm_iaf.pm_iaf_ctrl = (flags << (ri * 4));
281 
282 	PMCDBG1(MDP,ALL,2, "iaf-allocate config=0x%jx",
283 	    (uintmax_t) pm->pm_md.pm_iaf.pm_iaf_ctrl);
284 
285 	return (0);
286 }
287 
288 static int
289 iaf_config_pmc(int cpu, int ri, struct pmc *pm)
290 {
291 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
292 	    ("[core,%d] illegal CPU %d", __LINE__, cpu));
293 
294 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
295 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
296 
297 	PMCDBG3(MDP,CFG,1, "iaf-config cpu=%d ri=%d pm=%p", cpu, ri, pm);
298 
299 	KASSERT(core_pcpu[cpu] != NULL, ("[core,%d] null per-cpu %d", __LINE__,
300 	    cpu));
301 
302 	core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc = pm;
303 
304 	return (0);
305 }
306 
307 static int
308 iaf_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc)
309 {
310 	int error;
311 	struct pmc_hw *phw;
312 	char iaf_name[PMC_NAME_MAX];
313 
314 	phw = &core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri];
315 
316 	(void) snprintf(iaf_name, sizeof(iaf_name), "IAF-%d", ri);
317 	if ((error = copystr(iaf_name, pi->pm_name, PMC_NAME_MAX,
318 	    NULL)) != 0)
319 		return (error);
320 
321 	pi->pm_class = PMC_CLASS_IAF;
322 
323 	if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) {
324 		pi->pm_enabled = TRUE;
325 		*ppmc          = phw->phw_pmc;
326 	} else {
327 		pi->pm_enabled = FALSE;
328 		*ppmc          = NULL;
329 	}
330 
331 	return (0);
332 }
333 
334 static int
335 iaf_get_config(int cpu, int ri, struct pmc **ppm)
336 {
337 	*ppm = core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc;
338 
339 	return (0);
340 }
341 
342 static int
343 iaf_get_msr(int ri, uint32_t *msr)
344 {
345 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
346 	    ("[iaf,%d] ri %d out of range", __LINE__, ri));
347 
348 	*msr = IAF_RI_TO_MSR(ri);
349 
350 	return (0);
351 }
352 
353 static int
354 iaf_read_pmc(int cpu, int ri, pmc_value_t *v)
355 {
356 	struct pmc *pm;
357 	pmc_value_t tmp;
358 
359 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
360 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
361 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
362 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
363 
364 	pm = core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc;
365 
366 	KASSERT(pm,
367 	    ("[core,%d] cpu %d ri %d(%d) pmc not configured", __LINE__, cpu,
368 		ri, ri + core_iaf_ri));
369 
370 	tmp = rdpmc(IAF_RI_TO_MSR(ri));
371 
372 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
373 		*v = iaf_perfctr_value_to_reload_count(tmp);
374 	else
375 		*v = tmp & ((1ULL << core_iaf_width) - 1);
376 
377 	PMCDBG4(MDP,REA,1, "iaf-read cpu=%d ri=%d msr=0x%x -> v=%jx", cpu, ri,
378 	    IAF_RI_TO_MSR(ri), *v);
379 
380 	return (0);
381 }
382 
383 static int
384 iaf_release_pmc(int cpu, int ri, struct pmc *pmc)
385 {
386 	PMCDBG3(MDP,REL,1, "iaf-release cpu=%d ri=%d pm=%p", cpu, ri, pmc);
387 
388 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
389 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
390 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
391 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
392 
393 	KASSERT(core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc == NULL,
394 	    ("[core,%d] PHW pmc non-NULL", __LINE__));
395 
396 	return (0);
397 }
398 
399 static int
400 iaf_start_pmc(int cpu, int ri)
401 {
402 	struct pmc *pm;
403 	struct core_cpu *iafc;
404 	uint64_t msr = 0;
405 
406 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
407 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
408 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
409 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
410 
411 	PMCDBG2(MDP,STA,1,"iaf-start cpu=%d ri=%d", cpu, ri);
412 
413 	iafc = core_pcpu[cpu];
414 	pm = iafc->pc_corepmcs[ri + core_iaf_ri].phw_pmc;
415 
416 	iafc->pc_iafctrl |= pm->pm_md.pm_iaf.pm_iaf_ctrl;
417 
418  	msr = rdmsr(IAF_CTRL) & ~IAF_CTRL_MASK;
419  	wrmsr(IAF_CTRL, msr | (iafc->pc_iafctrl & IAF_CTRL_MASK));
420 
421 	do {
422 		iafc->pc_resync = 0;
423 		iafc->pc_globalctrl |= (1ULL << (ri + IAF_OFFSET));
424  		msr = rdmsr(IA_GLOBAL_CTRL) & ~IAF_GLOBAL_CTRL_MASK;
425  		wrmsr(IA_GLOBAL_CTRL, msr | (iafc->pc_globalctrl &
426  					     IAF_GLOBAL_CTRL_MASK));
427 	} while (iafc->pc_resync != 0);
428 
429 	PMCDBG4(MDP,STA,1,"iafctrl=%x(%x) globalctrl=%jx(%jx)",
430 	    iafc->pc_iafctrl, (uint32_t) rdmsr(IAF_CTRL),
431 	    iafc->pc_globalctrl, rdmsr(IA_GLOBAL_CTRL));
432 
433 	return (0);
434 }
435 
436 static int
437 iaf_stop_pmc(int cpu, int ri)
438 {
439 	uint32_t fc;
440 	struct core_cpu *iafc;
441 	uint64_t msr = 0;
442 
443 	PMCDBG2(MDP,STO,1,"iaf-stop cpu=%d ri=%d", cpu, ri);
444 
445 	iafc = core_pcpu[cpu];
446 
447 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
448 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
449 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
450 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
451 
452 	fc = (IAF_MASK << (ri * 4));
453 
454 	iafc->pc_iafctrl &= ~fc;
455 
456 	PMCDBG1(MDP,STO,1,"iaf-stop iafctrl=%x", iafc->pc_iafctrl);
457  	msr = rdmsr(IAF_CTRL) & ~IAF_CTRL_MASK;
458  	wrmsr(IAF_CTRL, msr | (iafc->pc_iafctrl & IAF_CTRL_MASK));
459 
460 	do {
461 		iafc->pc_resync = 0;
462 		iafc->pc_globalctrl &= ~(1ULL << (ri + IAF_OFFSET));
463  		msr = rdmsr(IA_GLOBAL_CTRL) & ~IAF_GLOBAL_CTRL_MASK;
464  		wrmsr(IA_GLOBAL_CTRL, msr | (iafc->pc_globalctrl &
465  					     IAF_GLOBAL_CTRL_MASK));
466 	} while (iafc->pc_resync != 0);
467 
468 	PMCDBG4(MDP,STO,1,"iafctrl=%x(%x) globalctrl=%jx(%jx)",
469 	    iafc->pc_iafctrl, (uint32_t) rdmsr(IAF_CTRL),
470 	    iafc->pc_globalctrl, rdmsr(IA_GLOBAL_CTRL));
471 
472 	return (0);
473 }
474 
475 static int
476 iaf_write_pmc(int cpu, int ri, pmc_value_t v)
477 {
478 	struct core_cpu *cc;
479 	struct pmc *pm;
480 	uint64_t msr;
481 
482 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
483 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
484 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
485 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
486 
487 	cc = core_pcpu[cpu];
488 	pm = cc->pc_corepmcs[ri + core_iaf_ri].phw_pmc;
489 
490 	KASSERT(pm,
491 	    ("[core,%d] cpu %d ri %d pmc not configured", __LINE__, cpu, ri));
492 
493 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
494 		v = iaf_reload_count_to_perfctr_value(v);
495 
496 	/* Turn off fixed counters */
497 	msr = rdmsr(IAF_CTRL) & ~IAF_CTRL_MASK;
498 	wrmsr(IAF_CTRL, msr);
499 
500 	wrmsr(IAF_CTR0 + ri, v & ((1ULL << core_iaf_width) - 1));
501 
502 	/* Turn on fixed counters */
503 	msr = rdmsr(IAF_CTRL) & ~IAF_CTRL_MASK;
504 	wrmsr(IAF_CTRL, msr | (cc->pc_iafctrl & IAF_CTRL_MASK));
505 
506 	PMCDBG6(MDP,WRI,1, "iaf-write cpu=%d ri=%d msr=0x%x v=%jx iafctrl=%jx "
507 	    "pmc=%jx", cpu, ri, IAF_RI_TO_MSR(ri), v,
508 	    (uintmax_t) rdmsr(IAF_CTRL),
509 	    (uintmax_t) rdpmc(IAF_RI_TO_MSR(ri)));
510 
511 	return (0);
512 }
513 
514 
515 static void
516 iaf_initialize(struct pmc_mdep *md, int maxcpu, int npmc, int pmcwidth)
517 {
518 	struct pmc_classdep *pcd;
519 
520 	KASSERT(md != NULL, ("[iaf,%d] md is NULL", __LINE__));
521 
522 	PMCDBG0(MDP,INI,1, "iaf-initialize");
523 
524 	pcd = &md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAF];
525 
526 	pcd->pcd_caps	= IAF_PMC_CAPS;
527 	pcd->pcd_class	= PMC_CLASS_IAF;
528 	pcd->pcd_num	= npmc;
529 	pcd->pcd_ri	= md->pmd_npmc;
530 	pcd->pcd_width	= pmcwidth;
531 
532 	pcd->pcd_allocate_pmc	= iaf_allocate_pmc;
533 	pcd->pcd_config_pmc	= iaf_config_pmc;
534 	pcd->pcd_describe	= iaf_describe;
535 	pcd->pcd_get_config	= iaf_get_config;
536 	pcd->pcd_get_msr	= iaf_get_msr;
537 	pcd->pcd_pcpu_fini	= core_pcpu_noop;
538 	pcd->pcd_pcpu_init	= core_pcpu_noop;
539 	pcd->pcd_read_pmc	= iaf_read_pmc;
540 	pcd->pcd_release_pmc	= iaf_release_pmc;
541 	pcd->pcd_start_pmc	= iaf_start_pmc;
542 	pcd->pcd_stop_pmc	= iaf_stop_pmc;
543 	pcd->pcd_write_pmc	= iaf_write_pmc;
544 
545 	md->pmd_npmc	       += npmc;
546 }
547 
548 /*
549  * Intel programmable PMCs.
550  */
551 
552 /* Sub fields of UMASK that this event supports. */
553 #define	IAP_M_CORE		(1 << 0) /* Core specificity */
554 #define	IAP_M_AGENT		(1 << 1) /* Agent specificity */
555 #define	IAP_M_PREFETCH		(1 << 2) /* Prefetch */
556 #define	IAP_M_MESI		(1 << 3) /* MESI */
557 #define	IAP_M_SNOOPRESPONSE	(1 << 4) /* Snoop response */
558 #define	IAP_M_SNOOPTYPE		(1 << 5) /* Snoop type */
559 #define	IAP_M_TRANSITION	(1 << 6) /* Transition */
560 
561 #define	IAP_F_CORE		(0x3 << 14) /* Core specificity */
562 #define	IAP_F_AGENT		(0x1 << 13) /* Agent specificity */
563 #define	IAP_F_PREFETCH		(0x3 << 12) /* Prefetch */
564 #define	IAP_F_MESI		(0xF <<  8) /* MESI */
565 #define	IAP_F_SNOOPRESPONSE	(0xB <<  8) /* Snoop response */
566 #define	IAP_F_SNOOPTYPE		(0x3 <<  8) /* Snoop type */
567 #define	IAP_F_TRANSITION	(0x1 << 12) /* Transition */
568 
569 #define	IAP_PREFETCH_RESERVED	(0x2 << 12)
570 #define	IAP_CORE_THIS		(0x1 << 14)
571 #define	IAP_CORE_ALL		(0x3 << 14)
572 #define	IAP_F_CMASK		0xFF000000
573 
574 static pmc_value_t
575 iap_perfctr_value_to_reload_count(pmc_value_t v)
576 {
577 
578 	/* If the PMC has overflowed, return a reload count of zero. */
579 	if ((v & (1ULL << (core_iap_width - 1))) == 0)
580 		return (0);
581 	v &= (1ULL << core_iap_width) - 1;
582 	return (1ULL << core_iap_width) - v;
583 }
584 
585 static pmc_value_t
586 iap_reload_count_to_perfctr_value(pmc_value_t rlc)
587 {
588 	return (1ULL << core_iap_width) - rlc;
589 }
590 
591 static int
592 iap_pmc_has_overflowed(int ri)
593 {
594 	uint64_t v;
595 
596 	/*
597 	 * We treat a Core (i.e., Intel architecture v1) PMC as has
598 	 * having overflowed if its MSB is zero.
599 	 */
600 	v = rdpmc(ri);
601 	return ((v & (1ULL << (core_iap_width - 1))) == 0);
602 }
603 
604 static int
605 iap_event_corei7_ok_on_counter(uint8_t evsel, int ri)
606 {
607 	uint32_t mask;
608 
609 	switch (evsel) {
610 		/*
611 		 * Events valid only on counter 0, 1.
612 		 */
613 		case 0x40:
614 		case 0x41:
615 		case 0x42:
616 		case 0x43:
617 		case 0x51:
618 		case 0x63:
619 			mask = 0x3;
620 		break;
621 
622 		default:
623 		mask = ~0;	/* Any row index is ok. */
624 	}
625 
626 	return (mask & (1 << ri));
627 }
628 
629 static int
630 iap_event_westmere_ok_on_counter(uint8_t evsel, int ri)
631 {
632 	uint32_t mask;
633 
634 	switch (evsel) {
635 		/*
636 		 * Events valid only on counter 0.
637 		 */
638 		case 0x60:
639 		case 0xB3:
640 		mask = 0x1;
641 		break;
642 
643 		/*
644 		 * Events valid only on counter 0, 1.
645 		 */
646 		case 0x4C:
647 		case 0x4E:
648 		case 0x51:
649 		case 0x63:
650 		mask = 0x3;
651 		break;
652 
653 	default:
654 		mask = ~0;	/* Any row index is ok. */
655 	}
656 
657 	return (mask & (1 << ri));
658 }
659 
660 static int
661 iap_event_sb_sbx_ib_ibx_ok_on_counter(uint8_t evsel, int ri)
662 {
663 	uint32_t mask;
664 
665 	switch (evsel) {
666 		/* Events valid only on counter 0. */
667     case 0xB7:
668 		mask = 0x1;
669 		break;
670 		/* Events valid only on counter 1. */
671 	case 0xC0:
672 		mask = 0x2;
673 		break;
674 		/* Events valid only on counter 2. */
675 	case 0x48:
676 	case 0xA2:
677 	case 0xA3:
678 		mask = 0x4;
679 		break;
680 		/* Events valid only on counter 3. */
681 	case 0xBB:
682 	case 0xCD:
683 		mask = 0x8;
684 		break;
685 	default:
686 		mask = ~0;	/* Any row index is ok. */
687 	}
688 
689 	return (mask & (1 << ri));
690 }
691 
692 static int
693 iap_event_ok_on_counter(uint8_t evsel, int ri)
694 {
695 	uint32_t mask;
696 
697 	switch (evsel) {
698 		/*
699 		 * Events valid only on counter 0.
700 		 */
701 	case 0x10:
702 	case 0x14:
703 	case 0x18:
704 	case 0xB3:
705 	case 0xC1:
706 	case 0xCB:
707 		mask = (1 << 0);
708 		break;
709 
710 		/*
711 		 * Events valid only on counter 1.
712 		 */
713 	case 0x11:
714 	case 0x12:
715 	case 0x13:
716 		mask = (1 << 1);
717 		break;
718 
719 	default:
720 		mask = ~0;	/* Any row index is ok. */
721 	}
722 
723 	return (mask & (1 << ri));
724 }
725 
726 static int
727 iap_allocate_pmc(int cpu, int ri, struct pmc *pm,
728     const struct pmc_op_pmcallocate *a)
729 {
730 	enum pmc_event map;
731 	uint8_t ev;
732 	uint32_t caps;
733 	const struct pmc_md_iap_op_pmcallocate *iap;
734 
735 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
736 	    ("[core,%d] illegal CPU %d", __LINE__, cpu));
737 	KASSERT(ri >= 0 && ri < core_iap_npmc,
738 	    ("[core,%d] illegal row-index value %d", __LINE__, ri));
739 
740 	/* check requested capabilities */
741 	caps = a->pm_caps;
742 	if ((IAP_PMC_CAPS & caps) != caps)
743 		return (EPERM);
744 	map = 0;	/* XXX: silent GCC warning */
745 	iap = &a->pm_md.pm_iap;
746 	ev = IAP_EVSEL_GET(iap->pm_iap_config);
747 
748 	switch (core_cputype) {
749 	case PMC_CPU_INTEL_COREI7:
750 	case PMC_CPU_INTEL_NEHALEM_EX:
751 		if (iap_event_corei7_ok_on_counter(ev, ri) == 0)
752 			return (EINVAL);
753 		break;
754 	case PMC_CPU_INTEL_SKYLAKE:
755 	case PMC_CPU_INTEL_SKYLAKE_XEON:
756 	case PMC_CPU_INTEL_BROADWELL:
757 	case PMC_CPU_INTEL_BROADWELL_XEON:
758 	case PMC_CPU_INTEL_SANDYBRIDGE:
759 	case PMC_CPU_INTEL_SANDYBRIDGE_XEON:
760 	case PMC_CPU_INTEL_IVYBRIDGE:
761 	case PMC_CPU_INTEL_IVYBRIDGE_XEON:
762 	case PMC_CPU_INTEL_HASWELL:
763 	case PMC_CPU_INTEL_HASWELL_XEON:
764 		if (iap_event_sb_sbx_ib_ibx_ok_on_counter(ev, ri) == 0)
765 			return (EINVAL);
766 		break;
767 	case PMC_CPU_INTEL_WESTMERE:
768 	case PMC_CPU_INTEL_WESTMERE_EX:
769 		if (iap_event_westmere_ok_on_counter(ev, ri) == 0)
770 			return (EINVAL);
771 		break;
772 	default:
773 		if (iap_event_ok_on_counter(ev, ri) == 0)
774 			return (EINVAL);
775 	}
776 
777 	pm->pm_md.pm_iap.pm_iap_evsel = iap->pm_iap_config;
778 	return (0);
779 }
780 
781 static int
782 iap_config_pmc(int cpu, int ri, struct pmc *pm)
783 {
784 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
785 	    ("[core,%d] illegal CPU %d", __LINE__, cpu));
786 
787 	KASSERT(ri >= 0 && ri < core_iap_npmc,
788 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
789 
790 	PMCDBG3(MDP,CFG,1, "iap-config cpu=%d ri=%d pm=%p", cpu, ri, pm);
791 
792 	KASSERT(core_pcpu[cpu] != NULL, ("[core,%d] null per-cpu %d", __LINE__,
793 	    cpu));
794 
795 	core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc = pm;
796 
797 	return (0);
798 }
799 
800 static int
801 iap_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc)
802 {
803 	int error;
804 	struct pmc_hw *phw;
805 	char iap_name[PMC_NAME_MAX];
806 
807 	phw = &core_pcpu[cpu]->pc_corepmcs[ri];
808 
809 	(void) snprintf(iap_name, sizeof(iap_name), "IAP-%d", ri);
810 	if ((error = copystr(iap_name, pi->pm_name, PMC_NAME_MAX,
811 	    NULL)) != 0)
812 		return (error);
813 
814 	pi->pm_class = PMC_CLASS_IAP;
815 
816 	if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) {
817 		pi->pm_enabled = TRUE;
818 		*ppmc          = phw->phw_pmc;
819 	} else {
820 		pi->pm_enabled = FALSE;
821 		*ppmc          = NULL;
822 	}
823 
824 	return (0);
825 }
826 
827 static int
828 iap_get_config(int cpu, int ri, struct pmc **ppm)
829 {
830 	*ppm = core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc;
831 
832 	return (0);
833 }
834 
835 static int
836 iap_get_msr(int ri, uint32_t *msr)
837 {
838 	KASSERT(ri >= 0 && ri < core_iap_npmc,
839 	    ("[iap,%d] ri %d out of range", __LINE__, ri));
840 
841 	*msr = ri;
842 
843 	return (0);
844 }
845 
846 static int
847 iap_read_pmc(int cpu, int ri, pmc_value_t *v)
848 {
849 	struct pmc *pm;
850 	pmc_value_t tmp;
851 
852 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
853 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
854 	KASSERT(ri >= 0 && ri < core_iap_npmc,
855 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
856 
857 	pm = core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc;
858 
859 	KASSERT(pm,
860 	    ("[core,%d] cpu %d ri %d pmc not configured", __LINE__, cpu,
861 		ri));
862 
863 	tmp = rdpmc(ri);
864 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
865 		*v = iap_perfctr_value_to_reload_count(tmp);
866 	else
867 		*v = tmp & ((1ULL << core_iap_width) - 1);
868 
869 	PMCDBG4(MDP,REA,1, "iap-read cpu=%d ri=%d msr=0x%x -> v=%jx", cpu, ri,
870 	    IAP_PMC0 + ri, *v);
871 
872 	return (0);
873 }
874 
875 static int
876 iap_release_pmc(int cpu, int ri, struct pmc *pm)
877 {
878 	(void) pm;
879 
880 	PMCDBG3(MDP,REL,1, "iap-release cpu=%d ri=%d pm=%p", cpu, ri,
881 	    pm);
882 
883 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
884 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
885 	KASSERT(ri >= 0 && ri < core_iap_npmc,
886 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
887 
888 	KASSERT(core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc
889 	    == NULL, ("[core,%d] PHW pmc non-NULL", __LINE__));
890 
891 	return (0);
892 }
893 
894 static int
895 iap_start_pmc(int cpu, int ri)
896 {
897 	struct pmc *pm;
898 	uint32_t evsel;
899 	struct core_cpu *cc;
900 
901 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
902 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
903 	KASSERT(ri >= 0 && ri < core_iap_npmc,
904 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
905 
906 	cc = core_pcpu[cpu];
907 	pm = cc->pc_corepmcs[ri].phw_pmc;
908 
909 	KASSERT(pm,
910 	    ("[core,%d] starting cpu%d,ri%d with no pmc configured",
911 		__LINE__, cpu, ri));
912 
913 	PMCDBG2(MDP,STA,1, "iap-start cpu=%d ri=%d", cpu, ri);
914 
915 	evsel = pm->pm_md.pm_iap.pm_iap_evsel;
916 
917 	PMCDBG4(MDP,STA,2, "iap-start/2 cpu=%d ri=%d evselmsr=0x%x evsel=0x%x",
918 	    cpu, ri, IAP_EVSEL0 + ri, evsel);
919 
920 	/* Event specific configuration. */
921 
922 	switch (IAP_EVSEL_GET(evsel)) {
923 	case 0xB7:
924 		wrmsr(IA_OFFCORE_RSP0, pm->pm_md.pm_iap.pm_iap_rsp);
925 		break;
926 	case 0xBB:
927 		wrmsr(IA_OFFCORE_RSP1, pm->pm_md.pm_iap.pm_iap_rsp);
928 		break;
929 	default:
930 		break;
931 	}
932 
933 	wrmsr(IAP_EVSEL0 + ri, evsel | IAP_EN);
934 
935 	if (core_cputype == PMC_CPU_INTEL_CORE)
936 		return (0);
937 
938 	do {
939 		cc->pc_resync = 0;
940 		cc->pc_globalctrl |= (1ULL << ri);
941 		wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl);
942 	} while (cc->pc_resync != 0);
943 
944 	return (0);
945 }
946 
947 static int
948 iap_stop_pmc(int cpu, int ri)
949 {
950 	struct pmc *pm;
951 	struct core_cpu *cc;
952 	uint64_t msr;
953 
954 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
955 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
956 	KASSERT(ri >= 0 && ri < core_iap_npmc,
957 	    ("[core,%d] illegal row index %d", __LINE__, ri));
958 
959 	cc = core_pcpu[cpu];
960 	pm = cc->pc_corepmcs[ri].phw_pmc;
961 
962 	KASSERT(pm,
963 	    ("[core,%d] cpu%d ri%d no configured PMC to stop", __LINE__,
964 		cpu, ri));
965 
966 	PMCDBG2(MDP,STO,1, "iap-stop cpu=%d ri=%d", cpu, ri);
967 
968 	msr = rdmsr(IAP_EVSEL0 + ri) & ~IAP_EVSEL_MASK;
969 	wrmsr(IAP_EVSEL0 + ri, msr);	/* stop hw */
970 
971 	if (core_cputype == PMC_CPU_INTEL_CORE)
972 		return (0);
973 
974 	msr = 0;
975 	do {
976 		cc->pc_resync = 0;
977 		cc->pc_globalctrl &= ~(1ULL << ri);
978 		msr = rdmsr(IA_GLOBAL_CTRL) & ~IA_GLOBAL_CTRL_MASK;
979 		wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl);
980 	} while (cc->pc_resync != 0);
981 
982 	return (0);
983 }
984 
985 static int
986 iap_write_pmc(int cpu, int ri, pmc_value_t v)
987 {
988 	struct pmc *pm;
989 	struct core_cpu *cc;
990 
991 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
992 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
993 	KASSERT(ri >= 0 && ri < core_iap_npmc,
994 	    ("[core,%d] illegal row index %d", __LINE__, ri));
995 
996 	cc = core_pcpu[cpu];
997 	pm = cc->pc_corepmcs[ri].phw_pmc;
998 
999 	KASSERT(pm,
1000 	    ("[core,%d] cpu%d ri%d no configured PMC to stop", __LINE__,
1001 		cpu, ri));
1002 
1003 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1004 		v = iap_reload_count_to_perfctr_value(v);
1005 
1006 	v &= (1ULL << core_iap_width) - 1;
1007 
1008 	PMCDBG4(MDP,WRI,1, "iap-write cpu=%d ri=%d msr=0x%x v=%jx", cpu, ri,
1009 	    IAP_PMC0 + ri, v);
1010 
1011 	/*
1012 	 * Write the new value to the counter (or it's alias).  The
1013 	 * counter will be in a stopped state when the pcd_write()
1014 	 * entry point is called.
1015 	 */
1016 	wrmsr(core_iap_wroffset + IAP_PMC0 + ri, v);
1017 	return (0);
1018 }
1019 
1020 
1021 static void
1022 iap_initialize(struct pmc_mdep *md, int maxcpu, int npmc, int pmcwidth,
1023     int flags)
1024 {
1025 	struct pmc_classdep *pcd;
1026 
1027 	KASSERT(md != NULL, ("[iap,%d] md is NULL", __LINE__));
1028 
1029 	PMCDBG0(MDP,INI,1, "iap-initialize");
1030 
1031 	/* Remember the set of architectural events supported. */
1032 	core_architectural_events = ~flags;
1033 
1034 	pcd = &md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP];
1035 
1036 	pcd->pcd_caps	= IAP_PMC_CAPS;
1037 	pcd->pcd_class	= PMC_CLASS_IAP;
1038 	pcd->pcd_num	= npmc;
1039 	pcd->pcd_ri	= md->pmd_npmc;
1040 	pcd->pcd_width	= pmcwidth;
1041 
1042 	pcd->pcd_allocate_pmc	= iap_allocate_pmc;
1043 	pcd->pcd_config_pmc	= iap_config_pmc;
1044 	pcd->pcd_describe	= iap_describe;
1045 	pcd->pcd_get_config	= iap_get_config;
1046 	pcd->pcd_get_msr	= iap_get_msr;
1047 	pcd->pcd_pcpu_fini	= core_pcpu_fini;
1048 	pcd->pcd_pcpu_init	= core_pcpu_init;
1049 	pcd->pcd_read_pmc	= iap_read_pmc;
1050 	pcd->pcd_release_pmc	= iap_release_pmc;
1051 	pcd->pcd_start_pmc	= iap_start_pmc;
1052 	pcd->pcd_stop_pmc	= iap_stop_pmc;
1053 	pcd->pcd_write_pmc	= iap_write_pmc;
1054 
1055 	md->pmd_npmc	       += npmc;
1056 }
1057 
1058 static int
1059 core_intr(struct trapframe *tf)
1060 {
1061 	pmc_value_t v;
1062 	struct pmc *pm;
1063 	struct core_cpu *cc;
1064 	int error, found_interrupt, ri;
1065 	uint64_t msr;
1066 
1067 	PMCDBG3(MDP,INT, 1, "cpu=%d tf=0x%p um=%d", curcpu, (void *) tf,
1068 	    TRAPF_USERMODE(tf));
1069 
1070 	found_interrupt = 0;
1071 	cc = core_pcpu[curcpu];
1072 
1073 	for (ri = 0; ri < core_iap_npmc; ri++) {
1074 
1075 		if ((pm = cc->pc_corepmcs[ri].phw_pmc) == NULL ||
1076 		    !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1077 			continue;
1078 
1079 		if (!iap_pmc_has_overflowed(ri))
1080 			continue;
1081 
1082 		found_interrupt = 1;
1083 
1084 		if (pm->pm_state != PMC_STATE_RUNNING)
1085 			continue;
1086 
1087 		error = pmc_process_interrupt(PMC_HR, pm, tf);
1088 
1089 		v = pm->pm_sc.pm_reloadcount;
1090 		v = iap_reload_count_to_perfctr_value(v);
1091 
1092 		/*
1093 		 * Stop the counter, reload it but only restart it if
1094 		 * the PMC is not stalled.
1095 		 */
1096 		msr = rdmsr(IAP_EVSEL0 + ri) & ~IAP_EVSEL_MASK;
1097 		wrmsr(IAP_EVSEL0 + ri, msr);
1098 		wrmsr(core_iap_wroffset + IAP_PMC0 + ri, v);
1099 
1100 		if (error)
1101 			continue;
1102 
1103 		wrmsr(IAP_EVSEL0 + ri, msr | (pm->pm_md.pm_iap.pm_iap_evsel |
1104 					      IAP_EN));
1105 	}
1106 
1107 	if (found_interrupt)
1108 		lapic_reenable_pmc();
1109 
1110 	if (found_interrupt)
1111 		counter_u64_add(pmc_stats.pm_intr_processed, 1);
1112 	else
1113 		counter_u64_add(pmc_stats.pm_intr_ignored, 1);
1114 
1115 	return (found_interrupt);
1116 }
1117 
1118 static int
1119 core2_intr(struct trapframe *tf)
1120 {
1121 	int error, found_interrupt, n, cpu;
1122 	uint64_t flag, intrstatus, intrenable, msr;
1123 	struct pmc *pm;
1124 	struct core_cpu *cc;
1125 	pmc_value_t v;
1126 
1127 	cpu = curcpu;
1128 	PMCDBG3(MDP,INT, 1, "cpu=%d tf=0x%p um=%d", cpu, (void *) tf,
1129 	    TRAPF_USERMODE(tf));
1130 
1131 	/*
1132 	 * The IA_GLOBAL_STATUS (MSR 0x38E) register indicates which
1133 	 * PMCs have a pending PMI interrupt.  We take a 'snapshot' of
1134 	 * the current set of interrupting PMCs and process these
1135 	 * after stopping them.
1136 	 */
1137 	intrstatus = rdmsr(IA_GLOBAL_STATUS);
1138 	intrenable = intrstatus & core_pmcmask;
1139 
1140 	PMCDBG2(MDP,INT, 1, "cpu=%d intrstatus=%jx", cpu,
1141 	    (uintmax_t) intrstatus);
1142 
1143 	found_interrupt = 0;
1144 	cc = core_pcpu[cpu];
1145 
1146 	KASSERT(cc != NULL, ("[core,%d] null pcpu", __LINE__));
1147 
1148 	cc->pc_globalctrl &= ~intrenable;
1149 	cc->pc_resync = 1;	/* MSRs now potentially out of sync. */
1150 
1151 	/*
1152 	 * Stop PMCs and clear overflow status bits.
1153 	 */
1154 	msr = rdmsr(IA_GLOBAL_CTRL) & ~IA_GLOBAL_CTRL_MASK;
1155 	wrmsr(IA_GLOBAL_CTRL, msr);
1156 	wrmsr(IA_GLOBAL_OVF_CTRL, intrenable |
1157 	    IA_GLOBAL_STATUS_FLAG_OVFBUF |
1158 	    IA_GLOBAL_STATUS_FLAG_CONDCHG);
1159 
1160 	/*
1161 	 * Look for interrupts from fixed function PMCs.
1162 	 */
1163 	for (n = 0, flag = (1ULL << IAF_OFFSET); n < core_iaf_npmc;
1164 	     n++, flag <<= 1) {
1165 
1166 		if ((intrstatus & flag) == 0)
1167 			continue;
1168 
1169 		found_interrupt = 1;
1170 
1171 		pm = cc->pc_corepmcs[n + core_iaf_ri].phw_pmc;
1172 		if (pm == NULL || pm->pm_state != PMC_STATE_RUNNING ||
1173 		    !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1174 			continue;
1175 
1176 		error = pmc_process_interrupt(PMC_HR, pm, tf);
1177 
1178 		if (error)
1179 			intrenable &= ~flag;
1180 
1181 		v = iaf_reload_count_to_perfctr_value(pm->pm_sc.pm_reloadcount);
1182 
1183 		/* Reload sampling count. */
1184 		wrmsr(IAF_CTR0 + n, v);
1185 
1186 		PMCDBG4(MDP,INT, 1, "iaf-intr cpu=%d error=%d v=%jx(%jx)", curcpu,
1187 		    error, (uintmax_t) v, (uintmax_t) rdpmc(IAF_RI_TO_MSR(n)));
1188 	}
1189 
1190 	/*
1191 	 * Process interrupts from the programmable counters.
1192 	 */
1193 	for (n = 0, flag = 1; n < core_iap_npmc; n++, flag <<= 1) {
1194 		if ((intrstatus & flag) == 0)
1195 			continue;
1196 
1197 		found_interrupt = 1;
1198 
1199 		pm = cc->pc_corepmcs[n].phw_pmc;
1200 		if (pm == NULL || pm->pm_state != PMC_STATE_RUNNING ||
1201 		    !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1202 			continue;
1203 
1204 		error = pmc_process_interrupt(PMC_HR, pm, tf);
1205 		if (error)
1206 			intrenable &= ~flag;
1207 
1208 		v = iap_reload_count_to_perfctr_value(pm->pm_sc.pm_reloadcount);
1209 
1210 		PMCDBG3(MDP,INT, 1, "iap-intr cpu=%d error=%d v=%jx", cpu, error,
1211 		    (uintmax_t) v);
1212 
1213 		/* Reload sampling count. */
1214 		wrmsr(core_iap_wroffset + IAP_PMC0 + n, v);
1215 	}
1216 
1217 	/*
1218 	 * Reenable all non-stalled PMCs.
1219 	 */
1220 	PMCDBG2(MDP,INT, 1, "cpu=%d intrenable=%jx", cpu,
1221 	    (uintmax_t) intrenable);
1222 
1223 	cc->pc_globalctrl |= intrenable;
1224 
1225 	wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl & IA_GLOBAL_CTRL_MASK);
1226 
1227 	PMCDBG5(MDP,INT, 1, "cpu=%d fixedctrl=%jx globalctrl=%jx status=%jx "
1228 	    "ovf=%jx", cpu, (uintmax_t) rdmsr(IAF_CTRL),
1229 	    (uintmax_t) rdmsr(IA_GLOBAL_CTRL),
1230 	    (uintmax_t) rdmsr(IA_GLOBAL_STATUS),
1231 	    (uintmax_t) rdmsr(IA_GLOBAL_OVF_CTRL));
1232 
1233 	if (found_interrupt)
1234 		lapic_reenable_pmc();
1235 
1236 	if (found_interrupt)
1237 		counter_u64_add(pmc_stats.pm_intr_processed, 1);
1238 	else
1239 		counter_u64_add(pmc_stats.pm_intr_ignored, 1);
1240 
1241 	return (found_interrupt);
1242 }
1243 
1244 int
1245 pmc_core_initialize(struct pmc_mdep *md, int maxcpu, int version_override)
1246 {
1247 	int cpuid[CORE_CPUID_REQUEST_SIZE];
1248 	int ipa_version, flags, nflags;
1249 
1250 	do_cpuid(CORE_CPUID_REQUEST, cpuid);
1251 
1252 	ipa_version = (version_override > 0) ? version_override :
1253 	    cpuid[CORE_CPUID_EAX] & 0xFF;
1254 	core_cputype = md->pmd_cputype;
1255 
1256 	PMCDBG3(MDP,INI,1,"core-init cputype=%d ncpu=%d ipa-version=%d",
1257 	    core_cputype, maxcpu, ipa_version);
1258 
1259 	if (ipa_version < 1 || ipa_version > 4 ||
1260 	    (core_cputype != PMC_CPU_INTEL_CORE && ipa_version == 1)) {
1261 		/* Unknown PMC architecture. */
1262 		printf("hwpc_core: unknown PMC architecture: %d\n",
1263 		    ipa_version);
1264 		return (EPROGMISMATCH);
1265 	}
1266 
1267 	core_iap_wroffset = 0;
1268 	if (cpu_feature2 & CPUID2_PDCM) {
1269 		if (rdmsr(IA32_PERF_CAPABILITIES) & PERFCAP_FW_WRITE) {
1270 			PMCDBG0(MDP, INI, 1,
1271 			    "core-init full-width write supported");
1272 			core_iap_wroffset = IAP_A_PMC0 - IAP_PMC0;
1273 		} else
1274 			PMCDBG0(MDP, INI, 1,
1275 			    "core-init full-width write NOT supported");
1276 	} else
1277 		PMCDBG0(MDP, INI, 1, "core-init pdcm not supported");
1278 
1279 	core_pmcmask = 0;
1280 
1281 	/*
1282 	 * Initialize programmable counters.
1283 	 */
1284 	core_iap_npmc = (cpuid[CORE_CPUID_EAX] >> 8) & 0xFF;
1285 	core_iap_width = (cpuid[CORE_CPUID_EAX] >> 16) & 0xFF;
1286 
1287 	core_pmcmask |= ((1ULL << core_iap_npmc) - 1);
1288 
1289 	nflags = (cpuid[CORE_CPUID_EAX] >> 24) & 0xFF;
1290 	flags = cpuid[CORE_CPUID_EBX] & ((1 << nflags) - 1);
1291 
1292 	iap_initialize(md, maxcpu, core_iap_npmc, core_iap_width, flags);
1293 
1294 	/*
1295 	 * Initialize fixed function counters, if present.
1296 	 */
1297 	if (core_cputype != PMC_CPU_INTEL_CORE) {
1298 		core_iaf_ri = core_iap_npmc;
1299 		core_iaf_npmc = cpuid[CORE_CPUID_EDX] & 0x1F;
1300 		core_iaf_width = (cpuid[CORE_CPUID_EDX] >> 5) & 0xFF;
1301 
1302 		iaf_initialize(md, maxcpu, core_iaf_npmc, core_iaf_width);
1303 		core_pmcmask |= ((1ULL << core_iaf_npmc) - 1) << IAF_OFFSET;
1304 	}
1305 
1306 	PMCDBG2(MDP,INI,1,"core-init pmcmask=0x%jx iafri=%d", core_pmcmask,
1307 	    core_iaf_ri);
1308 
1309 	core_pcpu = malloc(sizeof(*core_pcpu) * maxcpu, M_PMC,
1310 	    M_ZERO | M_WAITOK);
1311 
1312 	/*
1313 	 * Choose the appropriate interrupt handler.
1314 	 */
1315 	if (ipa_version == 1)
1316 		md->pmd_intr = core_intr;
1317 	else
1318 		md->pmd_intr = core2_intr;
1319 
1320 	md->pmd_pcpu_fini = NULL;
1321 	md->pmd_pcpu_init = NULL;
1322 
1323 	return (0);
1324 }
1325 
1326 void
1327 pmc_core_finalize(struct pmc_mdep *md)
1328 {
1329 	PMCDBG0(MDP,INI,1, "core-finalize");
1330 
1331 	free(core_pcpu, M_PMC);
1332 	core_pcpu = NULL;
1333 }
1334