xref: /freebsd/sys/dev/hwpmc/hwpmc_core.c (revision 38535d6cab17b86db2806866ab9b7a2a30c1ab90)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2008 Joseph Koshy
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * Intel Core PMCs.
31  */
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 
36 #include <sys/param.h>
37 #include <sys/bus.h>
38 #include <sys/pmc.h>
39 #include <sys/pmckern.h>
40 #include <sys/systm.h>
41 
42 #include <machine/intr_machdep.h>
43 #if (__FreeBSD_version >= 1100000)
44 #include <x86/apicvar.h>
45 #else
46 #include <machine/apicvar.h>
47 #endif
48 #include <machine/cpu.h>
49 #include <machine/cpufunc.h>
50 #include <machine/md_var.h>
51 #include <machine/specialreg.h>
52 
53 #define	CORE_CPUID_REQUEST		0xA
54 #define	CORE_CPUID_REQUEST_SIZE		0x4
55 #define	CORE_CPUID_EAX			0x0
56 #define	CORE_CPUID_EBX			0x1
57 #define	CORE_CPUID_ECX			0x2
58 #define	CORE_CPUID_EDX			0x3
59 
60 #define	IAF_PMC_CAPS			\
61 	(PMC_CAP_READ | PMC_CAP_WRITE | PMC_CAP_INTERRUPT | \
62 	 PMC_CAP_USER | PMC_CAP_SYSTEM)
63 #define	IAF_RI_TO_MSR(RI)		((RI) + (1 << 30))
64 
65 #define	IAP_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_USER | PMC_CAP_SYSTEM | \
66     PMC_CAP_EDGE | PMC_CAP_THRESHOLD | PMC_CAP_READ | PMC_CAP_WRITE |	 \
67     PMC_CAP_INVERT | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE)
68 
69 #define	EV_IS_NOTARCH		0
70 #define	EV_IS_ARCH_SUPP		1
71 #define	EV_IS_ARCH_NOTSUPP	-1
72 
73 /*
74  * "Architectural" events defined by Intel.  The values of these
75  * symbols correspond to positions in the bitmask returned by
76  * the CPUID.0AH instruction.
77  */
78 enum core_arch_events {
79 	CORE_AE_BRANCH_INSTRUCTION_RETIRED	= 5,
80 	CORE_AE_BRANCH_MISSES_RETIRED		= 6,
81 	CORE_AE_INSTRUCTION_RETIRED		= 1,
82 	CORE_AE_LLC_MISSES			= 4,
83 	CORE_AE_LLC_REFERENCE			= 3,
84 	CORE_AE_UNHALTED_REFERENCE_CYCLES	= 2,
85 	CORE_AE_UNHALTED_CORE_CYCLES		= 0
86 };
87 
88 static enum pmc_cputype	core_cputype;
89 
90 struct core_cpu {
91 	volatile uint32_t	pc_resync;
92 	volatile uint32_t	pc_iafctrl;	/* Fixed function control. */
93 	volatile uint64_t	pc_globalctrl;	/* Global control register. */
94 	struct pmc_hw		pc_corepmcs[];
95 };
96 
97 static struct core_cpu **core_pcpu;
98 
99 static uint32_t core_architectural_events;
100 static uint64_t core_pmcmask;
101 
102 static int core_iaf_ri;		/* relative index of fixed counters */
103 static int core_iaf_width;
104 static int core_iaf_npmc;
105 
106 static int core_iap_width;
107 static int core_iap_npmc;
108 static int core_iap_wroffset;
109 
110 static int
111 core_pcpu_noop(struct pmc_mdep *md, int cpu)
112 {
113 	(void) md;
114 	(void) cpu;
115 	return (0);
116 }
117 
118 static int
119 core_pcpu_init(struct pmc_mdep *md, int cpu)
120 {
121 	struct pmc_cpu *pc;
122 	struct core_cpu *cc;
123 	struct pmc_hw *phw;
124 	int core_ri, n, npmc;
125 
126 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
127 	    ("[iaf,%d] insane cpu number %d", __LINE__, cpu));
128 
129 	PMCDBG1(MDP,INI,1,"core-init cpu=%d", cpu);
130 
131 	core_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_ri;
132 	npmc = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_num;
133 
134 	if (core_cputype != PMC_CPU_INTEL_CORE)
135 		npmc += md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAF].pcd_num;
136 
137 	cc = malloc(sizeof(struct core_cpu) + npmc * sizeof(struct pmc_hw),
138 	    M_PMC, M_WAITOK | M_ZERO);
139 
140 	core_pcpu[cpu] = cc;
141 	pc = pmc_pcpu[cpu];
142 
143 	KASSERT(pc != NULL && cc != NULL,
144 	    ("[core,%d] NULL per-cpu structures cpu=%d", __LINE__, cpu));
145 
146 	for (n = 0, phw = cc->pc_corepmcs; n < npmc; n++, phw++) {
147 		phw->phw_state 	  = PMC_PHW_FLAG_IS_ENABLED |
148 		    PMC_PHW_CPU_TO_STATE(cpu) |
149 		    PMC_PHW_INDEX_TO_STATE(n + core_ri);
150 		phw->phw_pmc	  = NULL;
151 		pc->pc_hwpmcs[n + core_ri]  = phw;
152 	}
153 
154 	return (0);
155 }
156 
157 static int
158 core_pcpu_fini(struct pmc_mdep *md, int cpu)
159 {
160 	int core_ri, n, npmc;
161 	struct pmc_cpu *pc;
162 	struct core_cpu *cc;
163 	uint64_t msr = 0;
164 
165 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
166 	    ("[core,%d] insane cpu number (%d)", __LINE__, cpu));
167 
168 	PMCDBG1(MDP,INI,1,"core-pcpu-fini cpu=%d", cpu);
169 
170 	if ((cc = core_pcpu[cpu]) == NULL)
171 		return (0);
172 
173 	core_pcpu[cpu] = NULL;
174 
175 	pc = pmc_pcpu[cpu];
176 
177 	KASSERT(pc != NULL, ("[core,%d] NULL per-cpu %d state", __LINE__,
178 		cpu));
179 
180 	npmc = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_num;
181 	core_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_ri;
182 
183 	for (n = 0; n < npmc; n++) {
184 		msr = rdmsr(IAP_EVSEL0 + n) & ~IAP_EVSEL_MASK;
185 		wrmsr(IAP_EVSEL0 + n, msr);
186 	}
187 
188 	if (core_cputype != PMC_CPU_INTEL_CORE) {
189 		msr = rdmsr(IAF_CTRL) & ~IAF_CTRL_MASK;
190 		wrmsr(IAF_CTRL, msr);
191 		npmc += md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAF].pcd_num;
192 	}
193 
194 	for (n = 0; n < npmc; n++)
195 		pc->pc_hwpmcs[n + core_ri] = NULL;
196 
197 	free(cc, M_PMC);
198 
199 	return (0);
200 }
201 
202 /*
203  * Fixed function counters.
204  */
205 
206 static pmc_value_t
207 iaf_perfctr_value_to_reload_count(pmc_value_t v)
208 {
209 
210 	/* If the PMC has overflowed, return a reload count of zero. */
211 	if ((v & (1ULL << (core_iaf_width - 1))) == 0)
212 		return (0);
213 	v &= (1ULL << core_iaf_width) - 1;
214 	return (1ULL << core_iaf_width) - v;
215 }
216 
217 static pmc_value_t
218 iaf_reload_count_to_perfctr_value(pmc_value_t rlc)
219 {
220 	return (1ULL << core_iaf_width) - rlc;
221 }
222 
223 static int
224 iaf_allocate_pmc(int cpu, int ri, struct pmc *pm,
225     const struct pmc_op_pmcallocate *a)
226 {
227 	enum pmc_event ev;
228 	uint32_t caps, flags, validflags;
229 
230 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
231 	    ("[core,%d] illegal CPU %d", __LINE__, cpu));
232 
233 	PMCDBG2(MDP,ALL,1, "iaf-allocate ri=%d reqcaps=0x%x", ri, pm->pm_caps);
234 
235 	if (ri < 0 || ri > core_iaf_npmc)
236 		return (EINVAL);
237 
238 	caps = a->pm_caps;
239 
240 	if (a->pm_class != PMC_CLASS_IAF ||
241 	    (caps & IAF_PMC_CAPS) != caps)
242 		return (EINVAL);
243 
244 	ev = pm->pm_event;
245 	if (ev < PMC_EV_IAF_FIRST || ev > PMC_EV_IAF_LAST)
246 		return (EINVAL);
247 
248 	if (ev == PMC_EV_IAF_INSTR_RETIRED_ANY && ri != 0)
249 		return (EINVAL);
250 	if (ev == PMC_EV_IAF_CPU_CLK_UNHALTED_CORE && ri != 1)
251 		return (EINVAL);
252 	if (ev == PMC_EV_IAF_CPU_CLK_UNHALTED_REF && ri != 2)
253 		return (EINVAL);
254 
255 	flags = a->pm_md.pm_iaf.pm_iaf_flags;
256 
257 	validflags = IAF_MASK;
258 
259 	if (caps & PMC_CAP_INTERRUPT)
260 		flags |= IAF_PMI;
261 	if (caps & PMC_CAP_SYSTEM)
262 		flags |= IAF_OS;
263 	if (caps & PMC_CAP_USER)
264 		flags |= IAF_USR;
265 	if ((caps & (PMC_CAP_USER | PMC_CAP_SYSTEM)) == 0)
266 		flags |= (IAF_OS | IAF_USR);
267 
268 	pm->pm_md.pm_iaf.pm_iaf_ctrl = (flags << (ri * 4));
269 
270 	PMCDBG1(MDP,ALL,2, "iaf-allocate config=0x%jx",
271 	    (uintmax_t) pm->pm_md.pm_iaf.pm_iaf_ctrl);
272 
273 	return (0);
274 }
275 
276 static int
277 iaf_config_pmc(int cpu, int ri, struct pmc *pm)
278 {
279 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
280 	    ("[core,%d] illegal CPU %d", __LINE__, cpu));
281 
282 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
283 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
284 
285 	PMCDBG3(MDP,CFG,1, "iaf-config cpu=%d ri=%d pm=%p", cpu, ri, pm);
286 
287 	KASSERT(core_pcpu[cpu] != NULL, ("[core,%d] null per-cpu %d", __LINE__,
288 	    cpu));
289 
290 	core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc = pm;
291 
292 	return (0);
293 }
294 
295 static int
296 iaf_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc)
297 {
298 	int error;
299 	struct pmc_hw *phw;
300 	char iaf_name[PMC_NAME_MAX];
301 
302 	phw = &core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri];
303 
304 	(void) snprintf(iaf_name, sizeof(iaf_name), "IAF-%d", ri);
305 	if ((error = copystr(iaf_name, pi->pm_name, PMC_NAME_MAX,
306 	    NULL)) != 0)
307 		return (error);
308 
309 	pi->pm_class = PMC_CLASS_IAF;
310 
311 	if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) {
312 		pi->pm_enabled = TRUE;
313 		*ppmc          = phw->phw_pmc;
314 	} else {
315 		pi->pm_enabled = FALSE;
316 		*ppmc          = NULL;
317 	}
318 
319 	return (0);
320 }
321 
322 static int
323 iaf_get_config(int cpu, int ri, struct pmc **ppm)
324 {
325 	*ppm = core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc;
326 
327 	return (0);
328 }
329 
330 static int
331 iaf_get_msr(int ri, uint32_t *msr)
332 {
333 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
334 	    ("[iaf,%d] ri %d out of range", __LINE__, ri));
335 
336 	*msr = IAF_RI_TO_MSR(ri);
337 
338 	return (0);
339 }
340 
341 static int
342 iaf_read_pmc(int cpu, int ri, pmc_value_t *v)
343 {
344 	struct pmc *pm;
345 	pmc_value_t tmp;
346 
347 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
348 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
349 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
350 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
351 
352 	pm = core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc;
353 
354 	KASSERT(pm,
355 	    ("[core,%d] cpu %d ri %d(%d) pmc not configured", __LINE__, cpu,
356 		ri, ri + core_iaf_ri));
357 
358 	tmp = rdpmc(IAF_RI_TO_MSR(ri));
359 
360 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
361 		*v = iaf_perfctr_value_to_reload_count(tmp);
362 	else
363 		*v = tmp & ((1ULL << core_iaf_width) - 1);
364 
365 	PMCDBG4(MDP,REA,1, "iaf-read cpu=%d ri=%d msr=0x%x -> v=%jx", cpu, ri,
366 	    IAF_RI_TO_MSR(ri), *v);
367 
368 	return (0);
369 }
370 
371 static int
372 iaf_release_pmc(int cpu, int ri, struct pmc *pmc)
373 {
374 	PMCDBG3(MDP,REL,1, "iaf-release cpu=%d ri=%d pm=%p", cpu, ri, pmc);
375 
376 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
377 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
378 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
379 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
380 
381 	KASSERT(core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc == NULL,
382 	    ("[core,%d] PHW pmc non-NULL", __LINE__));
383 
384 	return (0);
385 }
386 
387 static int
388 iaf_start_pmc(int cpu, int ri)
389 {
390 	struct pmc *pm;
391 	struct core_cpu *iafc;
392 	uint64_t msr = 0;
393 
394 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
395 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
396 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
397 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
398 
399 	PMCDBG2(MDP,STA,1,"iaf-start cpu=%d ri=%d", cpu, ri);
400 
401 	iafc = core_pcpu[cpu];
402 	pm = iafc->pc_corepmcs[ri + core_iaf_ri].phw_pmc;
403 
404 	iafc->pc_iafctrl |= pm->pm_md.pm_iaf.pm_iaf_ctrl;
405 
406  	msr = rdmsr(IAF_CTRL) & ~IAF_CTRL_MASK;
407  	wrmsr(IAF_CTRL, msr | (iafc->pc_iafctrl & IAF_CTRL_MASK));
408 
409 	do {
410 		iafc->pc_resync = 0;
411 		iafc->pc_globalctrl |= (1ULL << (ri + IAF_OFFSET));
412  		msr = rdmsr(IA_GLOBAL_CTRL) & ~IAF_GLOBAL_CTRL_MASK;
413  		wrmsr(IA_GLOBAL_CTRL, msr | (iafc->pc_globalctrl &
414  					     IAF_GLOBAL_CTRL_MASK));
415 	} while (iafc->pc_resync != 0);
416 
417 	PMCDBG4(MDP,STA,1,"iafctrl=%x(%x) globalctrl=%jx(%jx)",
418 	    iafc->pc_iafctrl, (uint32_t) rdmsr(IAF_CTRL),
419 	    iafc->pc_globalctrl, rdmsr(IA_GLOBAL_CTRL));
420 
421 	return (0);
422 }
423 
424 static int
425 iaf_stop_pmc(int cpu, int ri)
426 {
427 	uint32_t fc;
428 	struct core_cpu *iafc;
429 	uint64_t msr = 0;
430 
431 	PMCDBG2(MDP,STO,1,"iaf-stop cpu=%d ri=%d", cpu, ri);
432 
433 	iafc = core_pcpu[cpu];
434 
435 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
436 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
437 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
438 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
439 
440 	fc = (IAF_MASK << (ri * 4));
441 
442 	iafc->pc_iafctrl &= ~fc;
443 
444 	PMCDBG1(MDP,STO,1,"iaf-stop iafctrl=%x", iafc->pc_iafctrl);
445  	msr = rdmsr(IAF_CTRL) & ~IAF_CTRL_MASK;
446  	wrmsr(IAF_CTRL, msr | (iafc->pc_iafctrl & IAF_CTRL_MASK));
447 
448 	do {
449 		iafc->pc_resync = 0;
450 		iafc->pc_globalctrl &= ~(1ULL << (ri + IAF_OFFSET));
451  		msr = rdmsr(IA_GLOBAL_CTRL) & ~IAF_GLOBAL_CTRL_MASK;
452  		wrmsr(IA_GLOBAL_CTRL, msr | (iafc->pc_globalctrl &
453  					     IAF_GLOBAL_CTRL_MASK));
454 	} while (iafc->pc_resync != 0);
455 
456 	PMCDBG4(MDP,STO,1,"iafctrl=%x(%x) globalctrl=%jx(%jx)",
457 	    iafc->pc_iafctrl, (uint32_t) rdmsr(IAF_CTRL),
458 	    iafc->pc_globalctrl, rdmsr(IA_GLOBAL_CTRL));
459 
460 	return (0);
461 }
462 
463 static int
464 iaf_write_pmc(int cpu, int ri, pmc_value_t v)
465 {
466 	struct core_cpu *cc;
467 	struct pmc *pm;
468 	uint64_t msr;
469 
470 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
471 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
472 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
473 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
474 
475 	cc = core_pcpu[cpu];
476 	pm = cc->pc_corepmcs[ri + core_iaf_ri].phw_pmc;
477 
478 	KASSERT(pm,
479 	    ("[core,%d] cpu %d ri %d pmc not configured", __LINE__, cpu, ri));
480 
481 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
482 		v = iaf_reload_count_to_perfctr_value(v);
483 
484 	/* Turn off fixed counters */
485 	msr = rdmsr(IAF_CTRL) & ~IAF_CTRL_MASK;
486 	wrmsr(IAF_CTRL, msr);
487 
488 	wrmsr(IAF_CTR0 + ri, v & ((1ULL << core_iaf_width) - 1));
489 
490 	/* Turn on fixed counters */
491 	msr = rdmsr(IAF_CTRL) & ~IAF_CTRL_MASK;
492 	wrmsr(IAF_CTRL, msr | (cc->pc_iafctrl & IAF_CTRL_MASK));
493 
494 	PMCDBG6(MDP,WRI,1, "iaf-write cpu=%d ri=%d msr=0x%x v=%jx iafctrl=%jx "
495 	    "pmc=%jx", cpu, ri, IAF_RI_TO_MSR(ri), v,
496 	    (uintmax_t) rdmsr(IAF_CTRL),
497 	    (uintmax_t) rdpmc(IAF_RI_TO_MSR(ri)));
498 
499 	return (0);
500 }
501 
502 
503 static void
504 iaf_initialize(struct pmc_mdep *md, int maxcpu, int npmc, int pmcwidth)
505 {
506 	struct pmc_classdep *pcd;
507 
508 	KASSERT(md != NULL, ("[iaf,%d] md is NULL", __LINE__));
509 
510 	PMCDBG0(MDP,INI,1, "iaf-initialize");
511 
512 	pcd = &md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAF];
513 
514 	pcd->pcd_caps	= IAF_PMC_CAPS;
515 	pcd->pcd_class	= PMC_CLASS_IAF;
516 	pcd->pcd_num	= npmc;
517 	pcd->pcd_ri	= md->pmd_npmc;
518 	pcd->pcd_width	= pmcwidth;
519 
520 	pcd->pcd_allocate_pmc	= iaf_allocate_pmc;
521 	pcd->pcd_config_pmc	= iaf_config_pmc;
522 	pcd->pcd_describe	= iaf_describe;
523 	pcd->pcd_get_config	= iaf_get_config;
524 	pcd->pcd_get_msr	= iaf_get_msr;
525 	pcd->pcd_pcpu_fini	= core_pcpu_noop;
526 	pcd->pcd_pcpu_init	= core_pcpu_noop;
527 	pcd->pcd_read_pmc	= iaf_read_pmc;
528 	pcd->pcd_release_pmc	= iaf_release_pmc;
529 	pcd->pcd_start_pmc	= iaf_start_pmc;
530 	pcd->pcd_stop_pmc	= iaf_stop_pmc;
531 	pcd->pcd_write_pmc	= iaf_write_pmc;
532 
533 	md->pmd_npmc	       += npmc;
534 }
535 
536 /*
537  * Intel programmable PMCs.
538  */
539 
540 /* Sub fields of UMASK that this event supports. */
541 #define	IAP_M_CORE		(1 << 0) /* Core specificity */
542 #define	IAP_M_AGENT		(1 << 1) /* Agent specificity */
543 #define	IAP_M_PREFETCH		(1 << 2) /* Prefetch */
544 #define	IAP_M_MESI		(1 << 3) /* MESI */
545 #define	IAP_M_SNOOPRESPONSE	(1 << 4) /* Snoop response */
546 #define	IAP_M_SNOOPTYPE		(1 << 5) /* Snoop type */
547 #define	IAP_M_TRANSITION	(1 << 6) /* Transition */
548 
549 #define	IAP_F_CORE		(0x3 << 14) /* Core specificity */
550 #define	IAP_F_AGENT		(0x1 << 13) /* Agent specificity */
551 #define	IAP_F_PREFETCH		(0x3 << 12) /* Prefetch */
552 #define	IAP_F_MESI		(0xF <<  8) /* MESI */
553 #define	IAP_F_SNOOPRESPONSE	(0xB <<  8) /* Snoop response */
554 #define	IAP_F_SNOOPTYPE		(0x3 <<  8) /* Snoop type */
555 #define	IAP_F_TRANSITION	(0x1 << 12) /* Transition */
556 
557 #define	IAP_PREFETCH_RESERVED	(0x2 << 12)
558 #define	IAP_CORE_THIS		(0x1 << 14)
559 #define	IAP_CORE_ALL		(0x3 << 14)
560 #define	IAP_F_CMASK		0xFF000000
561 
562 static pmc_value_t
563 iap_perfctr_value_to_reload_count(pmc_value_t v)
564 {
565 
566 	/* If the PMC has overflowed, return a reload count of zero. */
567 	if ((v & (1ULL << (core_iap_width - 1))) == 0)
568 		return (0);
569 	v &= (1ULL << core_iap_width) - 1;
570 	return (1ULL << core_iap_width) - v;
571 }
572 
573 static pmc_value_t
574 iap_reload_count_to_perfctr_value(pmc_value_t rlc)
575 {
576 	return (1ULL << core_iap_width) - rlc;
577 }
578 
579 static int
580 iap_pmc_has_overflowed(int ri)
581 {
582 	uint64_t v;
583 
584 	/*
585 	 * We treat a Core (i.e., Intel architecture v1) PMC as has
586 	 * having overflowed if its MSB is zero.
587 	 */
588 	v = rdpmc(ri);
589 	return ((v & (1ULL << (core_iap_width - 1))) == 0);
590 }
591 
592 static int
593 iap_event_corei7_ok_on_counter(uint8_t evsel, int ri)
594 {
595 	uint32_t mask;
596 
597 	switch (evsel) {
598 		/*
599 		 * Events valid only on counter 0, 1.
600 		 */
601 		case 0x40:
602 		case 0x41:
603 		case 0x42:
604 		case 0x43:
605 		case 0x51:
606 		case 0x63:
607 			mask = 0x3;
608 		break;
609 
610 		default:
611 		mask = ~0;	/* Any row index is ok. */
612 	}
613 
614 	return (mask & (1 << ri));
615 }
616 
617 static int
618 iap_event_westmere_ok_on_counter(uint8_t evsel, int ri)
619 {
620 	uint32_t mask;
621 
622 	switch (evsel) {
623 		/*
624 		 * Events valid only on counter 0.
625 		 */
626 		case 0x60:
627 		case 0xB3:
628 		mask = 0x1;
629 		break;
630 
631 		/*
632 		 * Events valid only on counter 0, 1.
633 		 */
634 		case 0x4C:
635 		case 0x4E:
636 		case 0x51:
637 		case 0x63:
638 		mask = 0x3;
639 		break;
640 
641 	default:
642 		mask = ~0;	/* Any row index is ok. */
643 	}
644 
645 	return (mask & (1 << ri));
646 }
647 
648 static int
649 iap_event_sb_sbx_ib_ibx_ok_on_counter(uint8_t evsel, int ri)
650 {
651 	uint32_t mask;
652 
653 	switch (evsel) {
654 		/* Events valid only on counter 0. */
655     case 0xB7:
656 		mask = 0x1;
657 		break;
658 		/* Events valid only on counter 1. */
659 	case 0xC0:
660 		mask = 0x2;
661 		break;
662 		/* Events valid only on counter 2. */
663 	case 0x48:
664 	case 0xA2:
665 	case 0xA3:
666 		mask = 0x4;
667 		break;
668 		/* Events valid only on counter 3. */
669 	case 0xBB:
670 	case 0xCD:
671 		mask = 0x8;
672 		break;
673 	default:
674 		mask = ~0;	/* Any row index is ok. */
675 	}
676 
677 	return (mask & (1 << ri));
678 }
679 
680 static int
681 iap_event_ok_on_counter(uint8_t evsel, int ri)
682 {
683 	uint32_t mask;
684 
685 	switch (evsel) {
686 		/*
687 		 * Events valid only on counter 0.
688 		 */
689 	case 0x10:
690 	case 0x14:
691 	case 0x18:
692 	case 0xB3:
693 	case 0xC1:
694 	case 0xCB:
695 		mask = (1 << 0);
696 		break;
697 
698 		/*
699 		 * Events valid only on counter 1.
700 		 */
701 	case 0x11:
702 	case 0x12:
703 	case 0x13:
704 		mask = (1 << 1);
705 		break;
706 
707 	default:
708 		mask = ~0;	/* Any row index is ok. */
709 	}
710 
711 	return (mask & (1 << ri));
712 }
713 
714 static int
715 iap_allocate_pmc(int cpu, int ri, struct pmc *pm,
716     const struct pmc_op_pmcallocate *a)
717 {
718 	enum pmc_event map;
719 	uint8_t ev;
720 	uint32_t caps;
721 	const struct pmc_md_iap_op_pmcallocate *iap;
722 
723 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
724 	    ("[core,%d] illegal CPU %d", __LINE__, cpu));
725 	KASSERT(ri >= 0 && ri < core_iap_npmc,
726 	    ("[core,%d] illegal row-index value %d", __LINE__, ri));
727 
728 	/* check requested capabilities */
729 	caps = a->pm_caps;
730 	if ((IAP_PMC_CAPS & caps) != caps)
731 		return (EPERM);
732 	map = 0;	/* XXX: silent GCC warning */
733 	iap = &a->pm_md.pm_iap;
734 	ev = IAP_EVSEL_GET(iap->pm_iap_config);
735 
736 	switch (core_cputype) {
737 	case PMC_CPU_INTEL_COREI7:
738 	case PMC_CPU_INTEL_NEHALEM_EX:
739 		if (iap_event_corei7_ok_on_counter(ev, ri) == 0)
740 			return (EINVAL);
741 		break;
742 	case PMC_CPU_INTEL_SKYLAKE:
743 	case PMC_CPU_INTEL_SKYLAKE_XEON:
744 	case PMC_CPU_INTEL_BROADWELL:
745 	case PMC_CPU_INTEL_BROADWELL_XEON:
746 	case PMC_CPU_INTEL_SANDYBRIDGE:
747 	case PMC_CPU_INTEL_SANDYBRIDGE_XEON:
748 	case PMC_CPU_INTEL_IVYBRIDGE:
749 	case PMC_CPU_INTEL_IVYBRIDGE_XEON:
750 	case PMC_CPU_INTEL_HASWELL:
751 	case PMC_CPU_INTEL_HASWELL_XEON:
752 		if (iap_event_sb_sbx_ib_ibx_ok_on_counter(ev, ri) == 0)
753 			return (EINVAL);
754 		break;
755 	case PMC_CPU_INTEL_WESTMERE:
756 	case PMC_CPU_INTEL_WESTMERE_EX:
757 		if (iap_event_westmere_ok_on_counter(ev, ri) == 0)
758 			return (EINVAL);
759 		break;
760 	default:
761 		if (iap_event_ok_on_counter(ev, ri) == 0)
762 			return (EINVAL);
763 	}
764 
765 	pm->pm_md.pm_iap.pm_iap_evsel = iap->pm_iap_config;
766 	return (0);
767 }
768 
769 static int
770 iap_config_pmc(int cpu, int ri, struct pmc *pm)
771 {
772 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
773 	    ("[core,%d] illegal CPU %d", __LINE__, cpu));
774 
775 	KASSERT(ri >= 0 && ri < core_iap_npmc,
776 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
777 
778 	PMCDBG3(MDP,CFG,1, "iap-config cpu=%d ri=%d pm=%p", cpu, ri, pm);
779 
780 	KASSERT(core_pcpu[cpu] != NULL, ("[core,%d] null per-cpu %d", __LINE__,
781 	    cpu));
782 
783 	core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc = pm;
784 
785 	return (0);
786 }
787 
788 static int
789 iap_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc)
790 {
791 	int error;
792 	struct pmc_hw *phw;
793 	char iap_name[PMC_NAME_MAX];
794 
795 	phw = &core_pcpu[cpu]->pc_corepmcs[ri];
796 
797 	(void) snprintf(iap_name, sizeof(iap_name), "IAP-%d", ri);
798 	if ((error = copystr(iap_name, pi->pm_name, PMC_NAME_MAX,
799 	    NULL)) != 0)
800 		return (error);
801 
802 	pi->pm_class = PMC_CLASS_IAP;
803 
804 	if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) {
805 		pi->pm_enabled = TRUE;
806 		*ppmc          = phw->phw_pmc;
807 	} else {
808 		pi->pm_enabled = FALSE;
809 		*ppmc          = NULL;
810 	}
811 
812 	return (0);
813 }
814 
815 static int
816 iap_get_config(int cpu, int ri, struct pmc **ppm)
817 {
818 	*ppm = core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc;
819 
820 	return (0);
821 }
822 
823 static int
824 iap_get_msr(int ri, uint32_t *msr)
825 {
826 	KASSERT(ri >= 0 && ri < core_iap_npmc,
827 	    ("[iap,%d] ri %d out of range", __LINE__, ri));
828 
829 	*msr = ri;
830 
831 	return (0);
832 }
833 
834 static int
835 iap_read_pmc(int cpu, int ri, pmc_value_t *v)
836 {
837 	struct pmc *pm;
838 	pmc_value_t tmp;
839 
840 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
841 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
842 	KASSERT(ri >= 0 && ri < core_iap_npmc,
843 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
844 
845 	pm = core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc;
846 
847 	KASSERT(pm,
848 	    ("[core,%d] cpu %d ri %d pmc not configured", __LINE__, cpu,
849 		ri));
850 
851 	tmp = rdpmc(ri);
852 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
853 		*v = iap_perfctr_value_to_reload_count(tmp);
854 	else
855 		*v = tmp & ((1ULL << core_iap_width) - 1);
856 
857 	PMCDBG4(MDP,REA,1, "iap-read cpu=%d ri=%d msr=0x%x -> v=%jx", cpu, ri,
858 	    IAP_PMC0 + ri, *v);
859 
860 	return (0);
861 }
862 
863 static int
864 iap_release_pmc(int cpu, int ri, struct pmc *pm)
865 {
866 	(void) pm;
867 
868 	PMCDBG3(MDP,REL,1, "iap-release cpu=%d ri=%d pm=%p", cpu, ri,
869 	    pm);
870 
871 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
872 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
873 	KASSERT(ri >= 0 && ri < core_iap_npmc,
874 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
875 
876 	KASSERT(core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc
877 	    == NULL, ("[core,%d] PHW pmc non-NULL", __LINE__));
878 
879 	return (0);
880 }
881 
882 static int
883 iap_start_pmc(int cpu, int ri)
884 {
885 	struct pmc *pm;
886 	uint32_t evsel;
887 	struct core_cpu *cc;
888 
889 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
890 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
891 	KASSERT(ri >= 0 && ri < core_iap_npmc,
892 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
893 
894 	cc = core_pcpu[cpu];
895 	pm = cc->pc_corepmcs[ri].phw_pmc;
896 
897 	KASSERT(pm,
898 	    ("[core,%d] starting cpu%d,ri%d with no pmc configured",
899 		__LINE__, cpu, ri));
900 
901 	PMCDBG2(MDP,STA,1, "iap-start cpu=%d ri=%d", cpu, ri);
902 
903 	evsel = pm->pm_md.pm_iap.pm_iap_evsel;
904 
905 	PMCDBG4(MDP,STA,2, "iap-start/2 cpu=%d ri=%d evselmsr=0x%x evsel=0x%x",
906 	    cpu, ri, IAP_EVSEL0 + ri, evsel);
907 
908 	/* Event specific configuration. */
909 
910 	switch (IAP_EVSEL_GET(evsel)) {
911 	case 0xB7:
912 		wrmsr(IA_OFFCORE_RSP0, pm->pm_md.pm_iap.pm_iap_rsp);
913 		break;
914 	case 0xBB:
915 		wrmsr(IA_OFFCORE_RSP1, pm->pm_md.pm_iap.pm_iap_rsp);
916 		break;
917 	default:
918 		break;
919 	}
920 
921 	wrmsr(IAP_EVSEL0 + ri, evsel | IAP_EN);
922 
923 	if (core_cputype == PMC_CPU_INTEL_CORE)
924 		return (0);
925 
926 	do {
927 		cc->pc_resync = 0;
928 		cc->pc_globalctrl |= (1ULL << ri);
929 		wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl);
930 	} while (cc->pc_resync != 0);
931 
932 	return (0);
933 }
934 
935 static int
936 iap_stop_pmc(int cpu, int ri)
937 {
938 	struct pmc *pm;
939 	struct core_cpu *cc;
940 	uint64_t msr;
941 
942 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
943 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
944 	KASSERT(ri >= 0 && ri < core_iap_npmc,
945 	    ("[core,%d] illegal row index %d", __LINE__, ri));
946 
947 	cc = core_pcpu[cpu];
948 	pm = cc->pc_corepmcs[ri].phw_pmc;
949 
950 	KASSERT(pm,
951 	    ("[core,%d] cpu%d ri%d no configured PMC to stop", __LINE__,
952 		cpu, ri));
953 
954 	PMCDBG2(MDP,STO,1, "iap-stop cpu=%d ri=%d", cpu, ri);
955 
956 	msr = rdmsr(IAP_EVSEL0 + ri) & ~IAP_EVSEL_MASK;
957 	wrmsr(IAP_EVSEL0 + ri, msr);	/* stop hw */
958 
959 	if (core_cputype == PMC_CPU_INTEL_CORE)
960 		return (0);
961 
962 	msr = 0;
963 	do {
964 		cc->pc_resync = 0;
965 		cc->pc_globalctrl &= ~(1ULL << ri);
966 		msr = rdmsr(IA_GLOBAL_CTRL) & ~IA_GLOBAL_CTRL_MASK;
967 		wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl);
968 	} while (cc->pc_resync != 0);
969 
970 	return (0);
971 }
972 
973 static int
974 iap_write_pmc(int cpu, int ri, pmc_value_t v)
975 {
976 	struct pmc *pm;
977 	struct core_cpu *cc;
978 
979 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
980 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
981 	KASSERT(ri >= 0 && ri < core_iap_npmc,
982 	    ("[core,%d] illegal row index %d", __LINE__, ri));
983 
984 	cc = core_pcpu[cpu];
985 	pm = cc->pc_corepmcs[ri].phw_pmc;
986 
987 	KASSERT(pm,
988 	    ("[core,%d] cpu%d ri%d no configured PMC to stop", __LINE__,
989 		cpu, ri));
990 
991 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
992 		v = iap_reload_count_to_perfctr_value(v);
993 
994 	v &= (1ULL << core_iap_width) - 1;
995 
996 	PMCDBG4(MDP,WRI,1, "iap-write cpu=%d ri=%d msr=0x%x v=%jx", cpu, ri,
997 	    IAP_PMC0 + ri, v);
998 
999 	/*
1000 	 * Write the new value to the counter (or it's alias).  The
1001 	 * counter will be in a stopped state when the pcd_write()
1002 	 * entry point is called.
1003 	 */
1004 	wrmsr(core_iap_wroffset + IAP_PMC0 + ri, v);
1005 	return (0);
1006 }
1007 
1008 
1009 static void
1010 iap_initialize(struct pmc_mdep *md, int maxcpu, int npmc, int pmcwidth,
1011     int flags)
1012 {
1013 	struct pmc_classdep *pcd;
1014 
1015 	KASSERT(md != NULL, ("[iap,%d] md is NULL", __LINE__));
1016 
1017 	PMCDBG0(MDP,INI,1, "iap-initialize");
1018 
1019 	/* Remember the set of architectural events supported. */
1020 	core_architectural_events = ~flags;
1021 
1022 	pcd = &md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP];
1023 
1024 	pcd->pcd_caps	= IAP_PMC_CAPS;
1025 	pcd->pcd_class	= PMC_CLASS_IAP;
1026 	pcd->pcd_num	= npmc;
1027 	pcd->pcd_ri	= md->pmd_npmc;
1028 	pcd->pcd_width	= pmcwidth;
1029 
1030 	pcd->pcd_allocate_pmc	= iap_allocate_pmc;
1031 	pcd->pcd_config_pmc	= iap_config_pmc;
1032 	pcd->pcd_describe	= iap_describe;
1033 	pcd->pcd_get_config	= iap_get_config;
1034 	pcd->pcd_get_msr	= iap_get_msr;
1035 	pcd->pcd_pcpu_fini	= core_pcpu_fini;
1036 	pcd->pcd_pcpu_init	= core_pcpu_init;
1037 	pcd->pcd_read_pmc	= iap_read_pmc;
1038 	pcd->pcd_release_pmc	= iap_release_pmc;
1039 	pcd->pcd_start_pmc	= iap_start_pmc;
1040 	pcd->pcd_stop_pmc	= iap_stop_pmc;
1041 	pcd->pcd_write_pmc	= iap_write_pmc;
1042 
1043 	md->pmd_npmc	       += npmc;
1044 }
1045 
1046 static int
1047 core_intr(int cpu, struct trapframe *tf)
1048 {
1049 	pmc_value_t v;
1050 	struct pmc *pm;
1051 	struct core_cpu *cc;
1052 	int error, found_interrupt, ri;
1053 	uint64_t msr;
1054 
1055 	PMCDBG3(MDP,INT, 1, "cpu=%d tf=0x%p um=%d", cpu, (void *) tf,
1056 	    TRAPF_USERMODE(tf));
1057 
1058 	found_interrupt = 0;
1059 	cc = core_pcpu[cpu];
1060 
1061 	for (ri = 0; ri < core_iap_npmc; ri++) {
1062 
1063 		if ((pm = cc->pc_corepmcs[ri].phw_pmc) == NULL ||
1064 		    !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1065 			continue;
1066 
1067 		if (!iap_pmc_has_overflowed(ri))
1068 			continue;
1069 
1070 		found_interrupt = 1;
1071 
1072 		if (pm->pm_state != PMC_STATE_RUNNING)
1073 			continue;
1074 
1075 		error = pmc_process_interrupt(cpu, PMC_HR, pm, tf,
1076 		    TRAPF_USERMODE(tf));
1077 
1078 		v = pm->pm_sc.pm_reloadcount;
1079 		v = iap_reload_count_to_perfctr_value(v);
1080 
1081 		/*
1082 		 * Stop the counter, reload it but only restart it if
1083 		 * the PMC is not stalled.
1084 		 */
1085 		msr = rdmsr(IAP_EVSEL0 + ri) & ~IAP_EVSEL_MASK;
1086 		wrmsr(IAP_EVSEL0 + ri, msr);
1087 		wrmsr(core_iap_wroffset + IAP_PMC0 + ri, v);
1088 
1089 		if (error)
1090 			continue;
1091 
1092 		wrmsr(IAP_EVSEL0 + ri, msr | (pm->pm_md.pm_iap.pm_iap_evsel |
1093 					      IAP_EN));
1094 	}
1095 
1096 	if (found_interrupt)
1097 		lapic_reenable_pmc();
1098 
1099 	if (found_interrupt)
1100 		counter_u64_add(pmc_stats.pm_intr_processed, 1);
1101 	else
1102 		counter_u64_add(pmc_stats.pm_intr_ignored, 1);
1103 
1104 	return (found_interrupt);
1105 }
1106 
1107 static int
1108 core2_intr(int cpu, struct trapframe *tf)
1109 {
1110 	int error, found_interrupt, n;
1111 	uint64_t flag, intrstatus, intrenable, msr;
1112 	struct pmc *pm;
1113 	struct core_cpu *cc;
1114 	pmc_value_t v;
1115 
1116 	PMCDBG3(MDP,INT, 1, "cpu=%d tf=0x%p um=%d", cpu, (void *) tf,
1117 	    TRAPF_USERMODE(tf));
1118 
1119 	/*
1120 	 * The IA_GLOBAL_STATUS (MSR 0x38E) register indicates which
1121 	 * PMCs have a pending PMI interrupt.  We take a 'snapshot' of
1122 	 * the current set of interrupting PMCs and process these
1123 	 * after stopping them.
1124 	 */
1125 	intrstatus = rdmsr(IA_GLOBAL_STATUS);
1126 	intrenable = intrstatus & core_pmcmask;
1127 
1128 	PMCDBG2(MDP,INT, 1, "cpu=%d intrstatus=%jx", cpu,
1129 	    (uintmax_t) intrstatus);
1130 
1131 	found_interrupt = 0;
1132 	cc = core_pcpu[cpu];
1133 
1134 	KASSERT(cc != NULL, ("[core,%d] null pcpu", __LINE__));
1135 
1136 	cc->pc_globalctrl &= ~intrenable;
1137 	cc->pc_resync = 1;	/* MSRs now potentially out of sync. */
1138 
1139 	/*
1140 	 * Stop PMCs and clear overflow status bits.
1141 	 */
1142 	msr = rdmsr(IA_GLOBAL_CTRL) & ~IA_GLOBAL_CTRL_MASK;
1143 	wrmsr(IA_GLOBAL_CTRL, msr);
1144 	wrmsr(IA_GLOBAL_OVF_CTRL, intrenable |
1145 	    IA_GLOBAL_STATUS_FLAG_OVFBUF |
1146 	    IA_GLOBAL_STATUS_FLAG_CONDCHG);
1147 
1148 	/*
1149 	 * Look for interrupts from fixed function PMCs.
1150 	 */
1151 	for (n = 0, flag = (1ULL << IAF_OFFSET); n < core_iaf_npmc;
1152 	     n++, flag <<= 1) {
1153 
1154 		if ((intrstatus & flag) == 0)
1155 			continue;
1156 
1157 		found_interrupt = 1;
1158 
1159 		pm = cc->pc_corepmcs[n + core_iaf_ri].phw_pmc;
1160 		if (pm == NULL || pm->pm_state != PMC_STATE_RUNNING ||
1161 		    !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1162 			continue;
1163 
1164 		error = pmc_process_interrupt(cpu, PMC_HR, pm, tf,
1165 		    TRAPF_USERMODE(tf));
1166 
1167 		if (error)
1168 			intrenable &= ~flag;
1169 
1170 		v = iaf_reload_count_to_perfctr_value(pm->pm_sc.pm_reloadcount);
1171 
1172 		/* Reload sampling count. */
1173 		wrmsr(IAF_CTR0 + n, v);
1174 
1175 		PMCDBG4(MDP,INT, 1, "iaf-intr cpu=%d error=%d v=%jx(%jx)", cpu,
1176 		    error, (uintmax_t) v, (uintmax_t) rdpmc(IAF_RI_TO_MSR(n)));
1177 	}
1178 
1179 	/*
1180 	 * Process interrupts from the programmable counters.
1181 	 */
1182 	for (n = 0, flag = 1; n < core_iap_npmc; n++, flag <<= 1) {
1183 		if ((intrstatus & flag) == 0)
1184 			continue;
1185 
1186 		found_interrupt = 1;
1187 
1188 		pm = cc->pc_corepmcs[n].phw_pmc;
1189 		if (pm == NULL || pm->pm_state != PMC_STATE_RUNNING ||
1190 		    !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1191 			continue;
1192 
1193 		error = pmc_process_interrupt(cpu, PMC_HR, pm, tf,
1194 		    TRAPF_USERMODE(tf));
1195 		if (error)
1196 			intrenable &= ~flag;
1197 
1198 		v = iap_reload_count_to_perfctr_value(pm->pm_sc.pm_reloadcount);
1199 
1200 		PMCDBG3(MDP,INT, 1, "iap-intr cpu=%d error=%d v=%jx", cpu, error,
1201 		    (uintmax_t) v);
1202 
1203 		/* Reload sampling count. */
1204 		wrmsr(core_iap_wroffset + IAP_PMC0 + n, v);
1205 	}
1206 
1207 	/*
1208 	 * Reenable all non-stalled PMCs.
1209 	 */
1210 	PMCDBG2(MDP,INT, 1, "cpu=%d intrenable=%jx", cpu,
1211 	    (uintmax_t) intrenable);
1212 
1213 	cc->pc_globalctrl |= intrenable;
1214 
1215 	wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl & IA_GLOBAL_CTRL_MASK);
1216 
1217 	PMCDBG5(MDP,INT, 1, "cpu=%d fixedctrl=%jx globalctrl=%jx status=%jx "
1218 	    "ovf=%jx", cpu, (uintmax_t) rdmsr(IAF_CTRL),
1219 	    (uintmax_t) rdmsr(IA_GLOBAL_CTRL),
1220 	    (uintmax_t) rdmsr(IA_GLOBAL_STATUS),
1221 	    (uintmax_t) rdmsr(IA_GLOBAL_OVF_CTRL));
1222 
1223 	if (found_interrupt)
1224 		lapic_reenable_pmc();
1225 
1226 	if (found_interrupt)
1227 		counter_u64_add(pmc_stats.pm_intr_processed, 1);
1228 	else
1229 		counter_u64_add(pmc_stats.pm_intr_ignored, 1);
1230 
1231 	return (found_interrupt);
1232 }
1233 
1234 int
1235 pmc_core_initialize(struct pmc_mdep *md, int maxcpu, int version_override)
1236 {
1237 	int cpuid[CORE_CPUID_REQUEST_SIZE];
1238 	int ipa_version, flags, nflags;
1239 
1240 	do_cpuid(CORE_CPUID_REQUEST, cpuid);
1241 
1242 	ipa_version = (version_override > 0) ? version_override :
1243 	    cpuid[CORE_CPUID_EAX] & 0xFF;
1244 	core_cputype = md->pmd_cputype;
1245 
1246 	PMCDBG3(MDP,INI,1,"core-init cputype=%d ncpu=%d ipa-version=%d",
1247 	    core_cputype, maxcpu, ipa_version);
1248 
1249 	if (ipa_version < 1 || ipa_version > 4 ||
1250 	    (core_cputype != PMC_CPU_INTEL_CORE && ipa_version == 1)) {
1251 		/* Unknown PMC architecture. */
1252 		printf("hwpc_core: unknown PMC architecture: %d\n",
1253 		    ipa_version);
1254 		return (EPROGMISMATCH);
1255 	}
1256 
1257 	core_iap_wroffset = 0;
1258 	if (cpu_feature2 & CPUID2_PDCM) {
1259 		if (rdmsr(IA32_PERF_CAPABILITIES) & PERFCAP_FW_WRITE) {
1260 			PMCDBG0(MDP, INI, 1,
1261 			    "core-init full-width write supported");
1262 			core_iap_wroffset = IAP_A_PMC0 - IAP_PMC0;
1263 		} else
1264 			PMCDBG0(MDP, INI, 1,
1265 			    "core-init full-width write NOT supported");
1266 	} else
1267 		PMCDBG0(MDP, INI, 1, "core-init pdcm not supported");
1268 
1269 	core_pmcmask = 0;
1270 
1271 	/*
1272 	 * Initialize programmable counters.
1273 	 */
1274 	core_iap_npmc = (cpuid[CORE_CPUID_EAX] >> 8) & 0xFF;
1275 	core_iap_width = (cpuid[CORE_CPUID_EAX] >> 16) & 0xFF;
1276 
1277 	core_pmcmask |= ((1ULL << core_iap_npmc) - 1);
1278 
1279 	nflags = (cpuid[CORE_CPUID_EAX] >> 24) & 0xFF;
1280 	flags = cpuid[CORE_CPUID_EBX] & ((1 << nflags) - 1);
1281 
1282 	iap_initialize(md, maxcpu, core_iap_npmc, core_iap_width, flags);
1283 
1284 	/*
1285 	 * Initialize fixed function counters, if present.
1286 	 */
1287 	if (core_cputype != PMC_CPU_INTEL_CORE) {
1288 		core_iaf_ri = core_iap_npmc;
1289 		core_iaf_npmc = cpuid[CORE_CPUID_EDX] & 0x1F;
1290 		core_iaf_width = (cpuid[CORE_CPUID_EDX] >> 5) & 0xFF;
1291 
1292 		iaf_initialize(md, maxcpu, core_iaf_npmc, core_iaf_width);
1293 		core_pmcmask |= ((1ULL << core_iaf_npmc) - 1) << IAF_OFFSET;
1294 	}
1295 
1296 	PMCDBG2(MDP,INI,1,"core-init pmcmask=0x%jx iafri=%d", core_pmcmask,
1297 	    core_iaf_ri);
1298 
1299 	core_pcpu = malloc(sizeof(*core_pcpu) * maxcpu, M_PMC,
1300 	    M_ZERO | M_WAITOK);
1301 
1302 	/*
1303 	 * Choose the appropriate interrupt handler.
1304 	 */
1305 	if (ipa_version == 1)
1306 		md->pmd_intr = core_intr;
1307 	else
1308 		md->pmd_intr = core2_intr;
1309 
1310 	md->pmd_pcpu_fini = NULL;
1311 	md->pmd_pcpu_init = NULL;
1312 
1313 	return (0);
1314 }
1315 
1316 void
1317 pmc_core_finalize(struct pmc_mdep *md)
1318 {
1319 	PMCDBG0(MDP,INI,1, "core-finalize");
1320 
1321 	free(core_pcpu, M_PMC);
1322 	core_pcpu = NULL;
1323 }
1324