xref: /freebsd/sys/dev/hwpmc/hwpmc_core.c (revision 68d75eff68281c1b445e3010bb975eae07aac225)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2008 Joseph Koshy
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * Intel Core PMCs.
31  */
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 
36 #include <sys/param.h>
37 #include <sys/bus.h>
38 #include <sys/pmc.h>
39 #include <sys/pmckern.h>
40 #include <sys/smp.h>
41 #include <sys/systm.h>
42 
43 #include <machine/intr_machdep.h>
44 #include <x86/apicvar.h>
45 #include <machine/cpu.h>
46 #include <machine/cpufunc.h>
47 #include <machine/md_var.h>
48 #include <machine/specialreg.h>
49 
50 #define	CORE_CPUID_REQUEST		0xA
51 #define	CORE_CPUID_REQUEST_SIZE		0x4
52 #define	CORE_CPUID_EAX			0x0
53 #define	CORE_CPUID_EBX			0x1
54 #define	CORE_CPUID_ECX			0x2
55 #define	CORE_CPUID_EDX			0x3
56 
57 #define	IAF_PMC_CAPS			\
58 	(PMC_CAP_READ | PMC_CAP_WRITE | PMC_CAP_INTERRUPT | \
59 	 PMC_CAP_USER | PMC_CAP_SYSTEM)
60 #define	IAF_RI_TO_MSR(RI)		((RI) + (1 << 30))
61 
62 #define	IAP_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_USER | PMC_CAP_SYSTEM | \
63     PMC_CAP_EDGE | PMC_CAP_THRESHOLD | PMC_CAP_READ | PMC_CAP_WRITE |	 \
64     PMC_CAP_INVERT | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE)
65 
66 #define	EV_IS_NOTARCH		0
67 #define	EV_IS_ARCH_SUPP		1
68 #define	EV_IS_ARCH_NOTSUPP	-1
69 
70 /*
71  * "Architectural" events defined by Intel.  The values of these
72  * symbols correspond to positions in the bitmask returned by
73  * the CPUID.0AH instruction.
74  */
75 enum core_arch_events {
76 	CORE_AE_BRANCH_INSTRUCTION_RETIRED	= 5,
77 	CORE_AE_BRANCH_MISSES_RETIRED		= 6,
78 	CORE_AE_INSTRUCTION_RETIRED		= 1,
79 	CORE_AE_LLC_MISSES			= 4,
80 	CORE_AE_LLC_REFERENCE			= 3,
81 	CORE_AE_UNHALTED_REFERENCE_CYCLES	= 2,
82 	CORE_AE_UNHALTED_CORE_CYCLES		= 0
83 };
84 
85 static enum pmc_cputype	core_cputype;
86 
87 struct core_cpu {
88 	volatile uint32_t	pc_resync;
89 	volatile uint32_t	pc_iafctrl;	/* Fixed function control. */
90 	volatile uint64_t	pc_globalctrl;	/* Global control register. */
91 	struct pmc_hw		pc_corepmcs[];
92 };
93 
94 static struct core_cpu **core_pcpu;
95 
96 static uint32_t core_architectural_events;
97 static uint64_t core_pmcmask;
98 
99 static int core_iaf_ri;		/* relative index of fixed counters */
100 static int core_iaf_width;
101 static int core_iaf_npmc;
102 
103 static int core_iap_width;
104 static int core_iap_npmc;
105 static int core_iap_wroffset;
106 
107 static u_int pmc_alloc_refs;
108 static bool pmc_tsx_force_abort_set;
109 
110 static int
111 core_pcpu_noop(struct pmc_mdep *md, int cpu)
112 {
113 	(void) md;
114 	(void) cpu;
115 	return (0);
116 }
117 
118 static int
119 core_pcpu_init(struct pmc_mdep *md, int cpu)
120 {
121 	struct pmc_cpu *pc;
122 	struct core_cpu *cc;
123 	struct pmc_hw *phw;
124 	int core_ri, n, npmc;
125 
126 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
127 	    ("[iaf,%d] insane cpu number %d", __LINE__, cpu));
128 
129 	PMCDBG1(MDP,INI,1,"core-init cpu=%d", cpu);
130 
131 	core_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_ri;
132 	npmc = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_num;
133 
134 	if (core_cputype != PMC_CPU_INTEL_CORE)
135 		npmc += md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAF].pcd_num;
136 
137 	cc = malloc(sizeof(struct core_cpu) + npmc * sizeof(struct pmc_hw),
138 	    M_PMC, M_WAITOK | M_ZERO);
139 
140 	core_pcpu[cpu] = cc;
141 	pc = pmc_pcpu[cpu];
142 
143 	KASSERT(pc != NULL && cc != NULL,
144 	    ("[core,%d] NULL per-cpu structures cpu=%d", __LINE__, cpu));
145 
146 	for (n = 0, phw = cc->pc_corepmcs; n < npmc; n++, phw++) {
147 		phw->phw_state 	  = PMC_PHW_FLAG_IS_ENABLED |
148 		    PMC_PHW_CPU_TO_STATE(cpu) |
149 		    PMC_PHW_INDEX_TO_STATE(n + core_ri);
150 		phw->phw_pmc	  = NULL;
151 		pc->pc_hwpmcs[n + core_ri]  = phw;
152 	}
153 
154 	return (0);
155 }
156 
157 static int
158 core_pcpu_fini(struct pmc_mdep *md, int cpu)
159 {
160 	int core_ri, n, npmc;
161 	struct pmc_cpu *pc;
162 	struct core_cpu *cc;
163 	uint64_t msr = 0;
164 
165 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
166 	    ("[core,%d] insane cpu number (%d)", __LINE__, cpu));
167 
168 	PMCDBG1(MDP,INI,1,"core-pcpu-fini cpu=%d", cpu);
169 
170 	if ((cc = core_pcpu[cpu]) == NULL)
171 		return (0);
172 
173 	core_pcpu[cpu] = NULL;
174 
175 	pc = pmc_pcpu[cpu];
176 
177 	KASSERT(pc != NULL, ("[core,%d] NULL per-cpu %d state", __LINE__,
178 		cpu));
179 
180 	npmc = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_num;
181 	core_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_ri;
182 
183 	for (n = 0; n < npmc; n++) {
184 		msr = rdmsr(IAP_EVSEL0 + n) & ~IAP_EVSEL_MASK;
185 		wrmsr(IAP_EVSEL0 + n, msr);
186 	}
187 
188 	if (core_cputype != PMC_CPU_INTEL_CORE) {
189 		msr = rdmsr(IAF_CTRL) & ~IAF_CTRL_MASK;
190 		wrmsr(IAF_CTRL, msr);
191 		npmc += md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAF].pcd_num;
192 	}
193 
194 	for (n = 0; n < npmc; n++)
195 		pc->pc_hwpmcs[n + core_ri] = NULL;
196 
197 	free(cc, M_PMC);
198 
199 	return (0);
200 }
201 
202 /*
203  * Fixed function counters.
204  */
205 
206 static pmc_value_t
207 iaf_perfctr_value_to_reload_count(pmc_value_t v)
208 {
209 
210 	/* If the PMC has overflowed, return a reload count of zero. */
211 	if ((v & (1ULL << (core_iaf_width - 1))) == 0)
212 		return (0);
213 	v &= (1ULL << core_iaf_width) - 1;
214 	return (1ULL << core_iaf_width) - v;
215 }
216 
217 static pmc_value_t
218 iaf_reload_count_to_perfctr_value(pmc_value_t rlc)
219 {
220 	return (1ULL << core_iaf_width) - rlc;
221 }
222 
223 static int
224 iaf_allocate_pmc(int cpu, int ri, struct pmc *pm,
225     const struct pmc_op_pmcallocate *a)
226 {
227 	uint8_t ev, umask;
228 	uint32_t caps, flags, config;
229 	const struct pmc_md_iap_op_pmcallocate *iap;
230 
231 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
232 	    ("[core,%d] illegal CPU %d", __LINE__, cpu));
233 
234 	PMCDBG2(MDP,ALL,1, "iaf-allocate ri=%d reqcaps=0x%x", ri, pm->pm_caps);
235 
236 	if (ri < 0 || ri > core_iaf_npmc)
237 		return (EINVAL);
238 
239 	caps = a->pm_caps;
240 
241 	if (a->pm_class != PMC_CLASS_IAF ||
242 	    (caps & IAF_PMC_CAPS) != caps)
243 		return (EINVAL);
244 
245 	iap = &a->pm_md.pm_iap;
246 	config = iap->pm_iap_config;
247 	ev = IAP_EVSEL_GET(config);
248 	umask = IAP_UMASK_GET(config);
249 
250 	/* INST_RETIRED.ANY */
251 	if (ev == 0xC0 && ri != 0)
252 		return (EINVAL);
253 	/* CPU_CLK_UNHALTED.THREAD */
254 	if (ev == 0x3C && ri != 1)
255 		return (EINVAL);
256 	/* CPU_CLK_UNHALTED.REF */
257 	if (ev == 0x0 && umask == 0x3 && ri != 2)
258 		return (EINVAL);
259 
260 	pmc_alloc_refs++;
261 	if ((cpu_stdext_feature3 & CPUID_STDEXT3_TSXFA) != 0 &&
262 	    !pmc_tsx_force_abort_set) {
263 		pmc_tsx_force_abort_set = true;
264 		x86_msr_op(MSR_TSX_FORCE_ABORT, MSR_OP_RENDEZVOUS |
265 		    MSR_OP_WRITE, 1);
266 	}
267 
268 	flags = 0;
269 	if (config & IAP_OS)
270 		flags |= IAF_OS;
271 	if (config & IAP_USR)
272 		flags |= IAF_USR;
273 	if (config & IAP_ANY)
274 		flags |= IAF_ANY;
275 	if (config & IAP_INT)
276 		flags |= IAF_PMI;
277 
278 	if (caps & PMC_CAP_INTERRUPT)
279 		flags |= IAF_PMI;
280 	if (caps & PMC_CAP_SYSTEM)
281 		flags |= IAF_OS;
282 	if (caps & PMC_CAP_USER)
283 		flags |= IAF_USR;
284 	if ((caps & (PMC_CAP_USER | PMC_CAP_SYSTEM)) == 0)
285 		flags |= (IAF_OS | IAF_USR);
286 
287 	pm->pm_md.pm_iaf.pm_iaf_ctrl = (flags << (ri * 4));
288 
289 	PMCDBG1(MDP,ALL,2, "iaf-allocate config=0x%jx",
290 	    (uintmax_t) pm->pm_md.pm_iaf.pm_iaf_ctrl);
291 
292 	return (0);
293 }
294 
295 static int
296 iaf_config_pmc(int cpu, int ri, struct pmc *pm)
297 {
298 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
299 	    ("[core,%d] illegal CPU %d", __LINE__, cpu));
300 
301 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
302 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
303 
304 	PMCDBG3(MDP,CFG,1, "iaf-config cpu=%d ri=%d pm=%p", cpu, ri, pm);
305 
306 	KASSERT(core_pcpu[cpu] != NULL, ("[core,%d] null per-cpu %d", __LINE__,
307 	    cpu));
308 
309 	core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc = pm;
310 
311 	return (0);
312 }
313 
314 static int
315 iaf_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc)
316 {
317 	int error;
318 	struct pmc_hw *phw;
319 	char iaf_name[PMC_NAME_MAX];
320 
321 	phw = &core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri];
322 
323 	(void) snprintf(iaf_name, sizeof(iaf_name), "IAF-%d", ri);
324 	if ((error = copystr(iaf_name, pi->pm_name, PMC_NAME_MAX,
325 	    NULL)) != 0)
326 		return (error);
327 
328 	pi->pm_class = PMC_CLASS_IAF;
329 
330 	if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) {
331 		pi->pm_enabled = TRUE;
332 		*ppmc          = phw->phw_pmc;
333 	} else {
334 		pi->pm_enabled = FALSE;
335 		*ppmc          = NULL;
336 	}
337 
338 	return (0);
339 }
340 
341 static int
342 iaf_get_config(int cpu, int ri, struct pmc **ppm)
343 {
344 	*ppm = core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc;
345 
346 	return (0);
347 }
348 
349 static int
350 iaf_get_msr(int ri, uint32_t *msr)
351 {
352 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
353 	    ("[iaf,%d] ri %d out of range", __LINE__, ri));
354 
355 	*msr = IAF_RI_TO_MSR(ri);
356 
357 	return (0);
358 }
359 
360 static int
361 iaf_read_pmc(int cpu, int ri, pmc_value_t *v)
362 {
363 	struct pmc *pm;
364 	pmc_value_t tmp;
365 
366 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
367 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
368 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
369 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
370 
371 	pm = core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc;
372 
373 	KASSERT(pm,
374 	    ("[core,%d] cpu %d ri %d(%d) pmc not configured", __LINE__, cpu,
375 		ri, ri + core_iaf_ri));
376 
377 	tmp = rdpmc(IAF_RI_TO_MSR(ri));
378 
379 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
380 		*v = iaf_perfctr_value_to_reload_count(tmp);
381 	else
382 		*v = tmp & ((1ULL << core_iaf_width) - 1);
383 
384 	PMCDBG4(MDP,REA,1, "iaf-read cpu=%d ri=%d msr=0x%x -> v=%jx", cpu, ri,
385 	    IAF_RI_TO_MSR(ri), *v);
386 
387 	return (0);
388 }
389 
390 static int
391 iaf_release_pmc(int cpu, int ri, struct pmc *pmc)
392 {
393 	PMCDBG3(MDP,REL,1, "iaf-release cpu=%d ri=%d pm=%p", cpu, ri, pmc);
394 
395 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
396 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
397 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
398 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
399 
400 	KASSERT(core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc == NULL,
401 	    ("[core,%d] PHW pmc non-NULL", __LINE__));
402 
403 	MPASS(pmc_alloc_refs > 0);
404 	if (pmc_alloc_refs-- == 1 && pmc_tsx_force_abort_set) {
405 		pmc_tsx_force_abort_set = false;
406 		x86_msr_op(MSR_TSX_FORCE_ABORT, MSR_OP_RENDEZVOUS |
407 		    MSR_OP_WRITE, 0);
408 	}
409 
410 	return (0);
411 }
412 
413 static int
414 iaf_start_pmc(int cpu, int ri)
415 {
416 	struct pmc *pm;
417 	struct core_cpu *iafc;
418 	uint64_t msr = 0;
419 
420 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
421 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
422 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
423 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
424 
425 	PMCDBG2(MDP,STA,1,"iaf-start cpu=%d ri=%d", cpu, ri);
426 
427 	iafc = core_pcpu[cpu];
428 	pm = iafc->pc_corepmcs[ri + core_iaf_ri].phw_pmc;
429 
430 	iafc->pc_iafctrl |= pm->pm_md.pm_iaf.pm_iaf_ctrl;
431 
432  	msr = rdmsr(IAF_CTRL) & ~IAF_CTRL_MASK;
433  	wrmsr(IAF_CTRL, msr | (iafc->pc_iafctrl & IAF_CTRL_MASK));
434 
435 	do {
436 		iafc->pc_resync = 0;
437 		iafc->pc_globalctrl |= (1ULL << (ri + IAF_OFFSET));
438  		msr = rdmsr(IA_GLOBAL_CTRL) & ~IAF_GLOBAL_CTRL_MASK;
439  		wrmsr(IA_GLOBAL_CTRL, msr | (iafc->pc_globalctrl &
440  					     IAF_GLOBAL_CTRL_MASK));
441 	} while (iafc->pc_resync != 0);
442 
443 	PMCDBG4(MDP,STA,1,"iafctrl=%x(%x) globalctrl=%jx(%jx)",
444 	    iafc->pc_iafctrl, (uint32_t) rdmsr(IAF_CTRL),
445 	    iafc->pc_globalctrl, rdmsr(IA_GLOBAL_CTRL));
446 
447 	return (0);
448 }
449 
450 static int
451 iaf_stop_pmc(int cpu, int ri)
452 {
453 	uint32_t fc;
454 	struct core_cpu *iafc;
455 	uint64_t msr = 0;
456 
457 	PMCDBG2(MDP,STO,1,"iaf-stop cpu=%d ri=%d", cpu, ri);
458 
459 	iafc = core_pcpu[cpu];
460 
461 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
462 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
463 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
464 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
465 
466 	fc = (IAF_MASK << (ri * 4));
467 
468 	iafc->pc_iafctrl &= ~fc;
469 
470 	PMCDBG1(MDP,STO,1,"iaf-stop iafctrl=%x", iafc->pc_iafctrl);
471  	msr = rdmsr(IAF_CTRL) & ~IAF_CTRL_MASK;
472  	wrmsr(IAF_CTRL, msr | (iafc->pc_iafctrl & IAF_CTRL_MASK));
473 
474 	do {
475 		iafc->pc_resync = 0;
476 		iafc->pc_globalctrl &= ~(1ULL << (ri + IAF_OFFSET));
477  		msr = rdmsr(IA_GLOBAL_CTRL) & ~IAF_GLOBAL_CTRL_MASK;
478  		wrmsr(IA_GLOBAL_CTRL, msr | (iafc->pc_globalctrl &
479  					     IAF_GLOBAL_CTRL_MASK));
480 	} while (iafc->pc_resync != 0);
481 
482 	PMCDBG4(MDP,STO,1,"iafctrl=%x(%x) globalctrl=%jx(%jx)",
483 	    iafc->pc_iafctrl, (uint32_t) rdmsr(IAF_CTRL),
484 	    iafc->pc_globalctrl, rdmsr(IA_GLOBAL_CTRL));
485 
486 	return (0);
487 }
488 
489 static int
490 iaf_write_pmc(int cpu, int ri, pmc_value_t v)
491 {
492 	struct core_cpu *cc;
493 	struct pmc *pm;
494 	uint64_t msr;
495 
496 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
497 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
498 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
499 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
500 
501 	cc = core_pcpu[cpu];
502 	pm = cc->pc_corepmcs[ri + core_iaf_ri].phw_pmc;
503 
504 	KASSERT(pm,
505 	    ("[core,%d] cpu %d ri %d pmc not configured", __LINE__, cpu, ri));
506 
507 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
508 		v = iaf_reload_count_to_perfctr_value(v);
509 
510 	/* Turn off fixed counters */
511 	msr = rdmsr(IAF_CTRL) & ~IAF_CTRL_MASK;
512 	wrmsr(IAF_CTRL, msr);
513 
514 	wrmsr(IAF_CTR0 + ri, v & ((1ULL << core_iaf_width) - 1));
515 
516 	/* Turn on fixed counters */
517 	msr = rdmsr(IAF_CTRL) & ~IAF_CTRL_MASK;
518 	wrmsr(IAF_CTRL, msr | (cc->pc_iafctrl & IAF_CTRL_MASK));
519 
520 	PMCDBG6(MDP,WRI,1, "iaf-write cpu=%d ri=%d msr=0x%x v=%jx iafctrl=%jx "
521 	    "pmc=%jx", cpu, ri, IAF_RI_TO_MSR(ri), v,
522 	    (uintmax_t) rdmsr(IAF_CTRL),
523 	    (uintmax_t) rdpmc(IAF_RI_TO_MSR(ri)));
524 
525 	return (0);
526 }
527 
528 
529 static void
530 iaf_initialize(struct pmc_mdep *md, int maxcpu, int npmc, int pmcwidth)
531 {
532 	struct pmc_classdep *pcd;
533 
534 	KASSERT(md != NULL, ("[iaf,%d] md is NULL", __LINE__));
535 
536 	PMCDBG0(MDP,INI,1, "iaf-initialize");
537 
538 	pcd = &md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAF];
539 
540 	pcd->pcd_caps	= IAF_PMC_CAPS;
541 	pcd->pcd_class	= PMC_CLASS_IAF;
542 	pcd->pcd_num	= npmc;
543 	pcd->pcd_ri	= md->pmd_npmc;
544 	pcd->pcd_width	= pmcwidth;
545 
546 	pcd->pcd_allocate_pmc	= iaf_allocate_pmc;
547 	pcd->pcd_config_pmc	= iaf_config_pmc;
548 	pcd->pcd_describe	= iaf_describe;
549 	pcd->pcd_get_config	= iaf_get_config;
550 	pcd->pcd_get_msr	= iaf_get_msr;
551 	pcd->pcd_pcpu_fini	= core_pcpu_noop;
552 	pcd->pcd_pcpu_init	= core_pcpu_noop;
553 	pcd->pcd_read_pmc	= iaf_read_pmc;
554 	pcd->pcd_release_pmc	= iaf_release_pmc;
555 	pcd->pcd_start_pmc	= iaf_start_pmc;
556 	pcd->pcd_stop_pmc	= iaf_stop_pmc;
557 	pcd->pcd_write_pmc	= iaf_write_pmc;
558 
559 	md->pmd_npmc	       += npmc;
560 }
561 
562 /*
563  * Intel programmable PMCs.
564  */
565 
566 /* Sub fields of UMASK that this event supports. */
567 #define	IAP_M_CORE		(1 << 0) /* Core specificity */
568 #define	IAP_M_AGENT		(1 << 1) /* Agent specificity */
569 #define	IAP_M_PREFETCH		(1 << 2) /* Prefetch */
570 #define	IAP_M_MESI		(1 << 3) /* MESI */
571 #define	IAP_M_SNOOPRESPONSE	(1 << 4) /* Snoop response */
572 #define	IAP_M_SNOOPTYPE		(1 << 5) /* Snoop type */
573 #define	IAP_M_TRANSITION	(1 << 6) /* Transition */
574 
575 #define	IAP_F_CORE		(0x3 << 14) /* Core specificity */
576 #define	IAP_F_AGENT		(0x1 << 13) /* Agent specificity */
577 #define	IAP_F_PREFETCH		(0x3 << 12) /* Prefetch */
578 #define	IAP_F_MESI		(0xF <<  8) /* MESI */
579 #define	IAP_F_SNOOPRESPONSE	(0xB <<  8) /* Snoop response */
580 #define	IAP_F_SNOOPTYPE		(0x3 <<  8) /* Snoop type */
581 #define	IAP_F_TRANSITION	(0x1 << 12) /* Transition */
582 
583 #define	IAP_PREFETCH_RESERVED	(0x2 << 12)
584 #define	IAP_CORE_THIS		(0x1 << 14)
585 #define	IAP_CORE_ALL		(0x3 << 14)
586 #define	IAP_F_CMASK		0xFF000000
587 
588 static pmc_value_t
589 iap_perfctr_value_to_reload_count(pmc_value_t v)
590 {
591 
592 	/* If the PMC has overflowed, return a reload count of zero. */
593 	if ((v & (1ULL << (core_iap_width - 1))) == 0)
594 		return (0);
595 	v &= (1ULL << core_iap_width) - 1;
596 	return (1ULL << core_iap_width) - v;
597 }
598 
599 static pmc_value_t
600 iap_reload_count_to_perfctr_value(pmc_value_t rlc)
601 {
602 	return (1ULL << core_iap_width) - rlc;
603 }
604 
605 static int
606 iap_pmc_has_overflowed(int ri)
607 {
608 	uint64_t v;
609 
610 	/*
611 	 * We treat a Core (i.e., Intel architecture v1) PMC as has
612 	 * having overflowed if its MSB is zero.
613 	 */
614 	v = rdpmc(ri);
615 	return ((v & (1ULL << (core_iap_width - 1))) == 0);
616 }
617 
618 static int
619 iap_event_corei7_ok_on_counter(uint8_t evsel, int ri)
620 {
621 	uint32_t mask;
622 
623 	switch (evsel) {
624 		/*
625 		 * Events valid only on counter 0, 1.
626 		 */
627 		case 0x40:
628 		case 0x41:
629 		case 0x42:
630 		case 0x43:
631 		case 0x51:
632 		case 0x63:
633 			mask = 0x3;
634 		break;
635 
636 		default:
637 		mask = ~0;	/* Any row index is ok. */
638 	}
639 
640 	return (mask & (1 << ri));
641 }
642 
643 static int
644 iap_event_westmere_ok_on_counter(uint8_t evsel, int ri)
645 {
646 	uint32_t mask;
647 
648 	switch (evsel) {
649 		/*
650 		 * Events valid only on counter 0.
651 		 */
652 		case 0x60:
653 		case 0xB3:
654 		mask = 0x1;
655 		break;
656 
657 		/*
658 		 * Events valid only on counter 0, 1.
659 		 */
660 		case 0x4C:
661 		case 0x4E:
662 		case 0x51:
663 		case 0x63:
664 		mask = 0x3;
665 		break;
666 
667 	default:
668 		mask = ~0;	/* Any row index is ok. */
669 	}
670 
671 	return (mask & (1 << ri));
672 }
673 
674 static int
675 iap_event_sb_sbx_ib_ibx_ok_on_counter(uint8_t evsel, int ri)
676 {
677 	uint32_t mask;
678 
679 	switch (evsel) {
680 		/* Events valid only on counter 0. */
681     case 0xB7:
682 		mask = 0x1;
683 		break;
684 		/* Events valid only on counter 1. */
685 	case 0xC0:
686 		mask = 0x2;
687 		break;
688 		/* Events valid only on counter 2. */
689 	case 0x48:
690 	case 0xA2:
691 	case 0xA3:
692 		mask = 0x4;
693 		break;
694 		/* Events valid only on counter 3. */
695 	case 0xBB:
696 	case 0xCD:
697 		mask = 0x8;
698 		break;
699 	default:
700 		mask = ~0;	/* Any row index is ok. */
701 	}
702 
703 	return (mask & (1 << ri));
704 }
705 
706 static int
707 iap_event_ok_on_counter(uint8_t evsel, int ri)
708 {
709 	uint32_t mask;
710 
711 	switch (evsel) {
712 		/*
713 		 * Events valid only on counter 0.
714 		 */
715 	case 0x10:
716 	case 0x14:
717 	case 0x18:
718 	case 0xB3:
719 	case 0xC1:
720 	case 0xCB:
721 		mask = (1 << 0);
722 		break;
723 
724 		/*
725 		 * Events valid only on counter 1.
726 		 */
727 	case 0x11:
728 	case 0x12:
729 	case 0x13:
730 		mask = (1 << 1);
731 		break;
732 
733 	default:
734 		mask = ~0;	/* Any row index is ok. */
735 	}
736 
737 	return (mask & (1 << ri));
738 }
739 
740 static int
741 iap_allocate_pmc(int cpu, int ri, struct pmc *pm,
742     const struct pmc_op_pmcallocate *a)
743 {
744 	enum pmc_event map;
745 	uint8_t ev;
746 	uint32_t caps;
747 	const struct pmc_md_iap_op_pmcallocate *iap;
748 
749 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
750 	    ("[core,%d] illegal CPU %d", __LINE__, cpu));
751 	KASSERT(ri >= 0 && ri < core_iap_npmc,
752 	    ("[core,%d] illegal row-index value %d", __LINE__, ri));
753 
754 	/* check requested capabilities */
755 	caps = a->pm_caps;
756 	if ((IAP_PMC_CAPS & caps) != caps)
757 		return (EPERM);
758 	map = 0;	/* XXX: silent GCC warning */
759 	iap = &a->pm_md.pm_iap;
760 	ev = IAP_EVSEL_GET(iap->pm_iap_config);
761 
762 	switch (core_cputype) {
763 	case PMC_CPU_INTEL_COREI7:
764 	case PMC_CPU_INTEL_NEHALEM_EX:
765 		if (iap_event_corei7_ok_on_counter(ev, ri) == 0)
766 			return (EINVAL);
767 		break;
768 	case PMC_CPU_INTEL_SKYLAKE:
769 	case PMC_CPU_INTEL_SKYLAKE_XEON:
770 	case PMC_CPU_INTEL_BROADWELL:
771 	case PMC_CPU_INTEL_BROADWELL_XEON:
772 	case PMC_CPU_INTEL_SANDYBRIDGE:
773 	case PMC_CPU_INTEL_SANDYBRIDGE_XEON:
774 	case PMC_CPU_INTEL_IVYBRIDGE:
775 	case PMC_CPU_INTEL_IVYBRIDGE_XEON:
776 	case PMC_CPU_INTEL_HASWELL:
777 	case PMC_CPU_INTEL_HASWELL_XEON:
778 		if (iap_event_sb_sbx_ib_ibx_ok_on_counter(ev, ri) == 0)
779 			return (EINVAL);
780 		break;
781 	case PMC_CPU_INTEL_WESTMERE:
782 	case PMC_CPU_INTEL_WESTMERE_EX:
783 		if (iap_event_westmere_ok_on_counter(ev, ri) == 0)
784 			return (EINVAL);
785 		break;
786 	default:
787 		if (iap_event_ok_on_counter(ev, ri) == 0)
788 			return (EINVAL);
789 	}
790 
791 	pm->pm_md.pm_iap.pm_iap_evsel = iap->pm_iap_config;
792 	return (0);
793 }
794 
795 static int
796 iap_config_pmc(int cpu, int ri, struct pmc *pm)
797 {
798 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
799 	    ("[core,%d] illegal CPU %d", __LINE__, cpu));
800 
801 	KASSERT(ri >= 0 && ri < core_iap_npmc,
802 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
803 
804 	PMCDBG3(MDP,CFG,1, "iap-config cpu=%d ri=%d pm=%p", cpu, ri, pm);
805 
806 	KASSERT(core_pcpu[cpu] != NULL, ("[core,%d] null per-cpu %d", __LINE__,
807 	    cpu));
808 
809 	core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc = pm;
810 
811 	return (0);
812 }
813 
814 static int
815 iap_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc)
816 {
817 	int error;
818 	struct pmc_hw *phw;
819 	char iap_name[PMC_NAME_MAX];
820 
821 	phw = &core_pcpu[cpu]->pc_corepmcs[ri];
822 
823 	(void) snprintf(iap_name, sizeof(iap_name), "IAP-%d", ri);
824 	if ((error = copystr(iap_name, pi->pm_name, PMC_NAME_MAX,
825 	    NULL)) != 0)
826 		return (error);
827 
828 	pi->pm_class = PMC_CLASS_IAP;
829 
830 	if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) {
831 		pi->pm_enabled = TRUE;
832 		*ppmc          = phw->phw_pmc;
833 	} else {
834 		pi->pm_enabled = FALSE;
835 		*ppmc          = NULL;
836 	}
837 
838 	return (0);
839 }
840 
841 static int
842 iap_get_config(int cpu, int ri, struct pmc **ppm)
843 {
844 	*ppm = core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc;
845 
846 	return (0);
847 }
848 
849 static int
850 iap_get_msr(int ri, uint32_t *msr)
851 {
852 	KASSERT(ri >= 0 && ri < core_iap_npmc,
853 	    ("[iap,%d] ri %d out of range", __LINE__, ri));
854 
855 	*msr = ri;
856 
857 	return (0);
858 }
859 
860 static int
861 iap_read_pmc(int cpu, int ri, pmc_value_t *v)
862 {
863 	struct pmc *pm;
864 	pmc_value_t tmp;
865 
866 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
867 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
868 	KASSERT(ri >= 0 && ri < core_iap_npmc,
869 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
870 
871 	pm = core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc;
872 
873 	KASSERT(pm,
874 	    ("[core,%d] cpu %d ri %d pmc not configured", __LINE__, cpu,
875 		ri));
876 
877 	tmp = rdpmc(ri);
878 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
879 		*v = iap_perfctr_value_to_reload_count(tmp);
880 	else
881 		*v = tmp & ((1ULL << core_iap_width) - 1);
882 
883 	PMCDBG4(MDP,REA,1, "iap-read cpu=%d ri=%d msr=0x%x -> v=%jx", cpu, ri,
884 	    IAP_PMC0 + ri, *v);
885 
886 	return (0);
887 }
888 
889 static int
890 iap_release_pmc(int cpu, int ri, struct pmc *pm)
891 {
892 	(void) pm;
893 
894 	PMCDBG3(MDP,REL,1, "iap-release cpu=%d ri=%d pm=%p", cpu, ri,
895 	    pm);
896 
897 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
898 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
899 	KASSERT(ri >= 0 && ri < core_iap_npmc,
900 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
901 
902 	KASSERT(core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc
903 	    == NULL, ("[core,%d] PHW pmc non-NULL", __LINE__));
904 
905 	return (0);
906 }
907 
908 static int
909 iap_start_pmc(int cpu, int ri)
910 {
911 	struct pmc *pm;
912 	uint32_t evsel;
913 	struct core_cpu *cc;
914 
915 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
916 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
917 	KASSERT(ri >= 0 && ri < core_iap_npmc,
918 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
919 
920 	cc = core_pcpu[cpu];
921 	pm = cc->pc_corepmcs[ri].phw_pmc;
922 
923 	KASSERT(pm,
924 	    ("[core,%d] starting cpu%d,ri%d with no pmc configured",
925 		__LINE__, cpu, ri));
926 
927 	PMCDBG2(MDP,STA,1, "iap-start cpu=%d ri=%d", cpu, ri);
928 
929 	evsel = pm->pm_md.pm_iap.pm_iap_evsel;
930 
931 	PMCDBG4(MDP,STA,2, "iap-start/2 cpu=%d ri=%d evselmsr=0x%x evsel=0x%x",
932 	    cpu, ri, IAP_EVSEL0 + ri, evsel);
933 
934 	/* Event specific configuration. */
935 
936 	switch (IAP_EVSEL_GET(evsel)) {
937 	case 0xB7:
938 		wrmsr(IA_OFFCORE_RSP0, pm->pm_md.pm_iap.pm_iap_rsp);
939 		break;
940 	case 0xBB:
941 		wrmsr(IA_OFFCORE_RSP1, pm->pm_md.pm_iap.pm_iap_rsp);
942 		break;
943 	default:
944 		break;
945 	}
946 
947 	wrmsr(IAP_EVSEL0 + ri, evsel | IAP_EN);
948 
949 	if (core_cputype == PMC_CPU_INTEL_CORE)
950 		return (0);
951 
952 	do {
953 		cc->pc_resync = 0;
954 		cc->pc_globalctrl |= (1ULL << ri);
955 		wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl);
956 	} while (cc->pc_resync != 0);
957 
958 	return (0);
959 }
960 
961 static int
962 iap_stop_pmc(int cpu, int ri)
963 {
964 	struct pmc *pm;
965 	struct core_cpu *cc;
966 	uint64_t msr;
967 
968 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
969 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
970 	KASSERT(ri >= 0 && ri < core_iap_npmc,
971 	    ("[core,%d] illegal row index %d", __LINE__, ri));
972 
973 	cc = core_pcpu[cpu];
974 	pm = cc->pc_corepmcs[ri].phw_pmc;
975 
976 	KASSERT(pm,
977 	    ("[core,%d] cpu%d ri%d no configured PMC to stop", __LINE__,
978 		cpu, ri));
979 
980 	PMCDBG2(MDP,STO,1, "iap-stop cpu=%d ri=%d", cpu, ri);
981 
982 	msr = rdmsr(IAP_EVSEL0 + ri) & ~IAP_EVSEL_MASK;
983 	wrmsr(IAP_EVSEL0 + ri, msr);	/* stop hw */
984 
985 	if (core_cputype == PMC_CPU_INTEL_CORE)
986 		return (0);
987 
988 	msr = 0;
989 	do {
990 		cc->pc_resync = 0;
991 		cc->pc_globalctrl &= ~(1ULL << ri);
992 		msr = rdmsr(IA_GLOBAL_CTRL) & ~IA_GLOBAL_CTRL_MASK;
993 		wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl);
994 	} while (cc->pc_resync != 0);
995 
996 	return (0);
997 }
998 
999 static int
1000 iap_write_pmc(int cpu, int ri, pmc_value_t v)
1001 {
1002 	struct pmc *pm;
1003 	struct core_cpu *cc;
1004 
1005 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
1006 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
1007 	KASSERT(ri >= 0 && ri < core_iap_npmc,
1008 	    ("[core,%d] illegal row index %d", __LINE__, ri));
1009 
1010 	cc = core_pcpu[cpu];
1011 	pm = cc->pc_corepmcs[ri].phw_pmc;
1012 
1013 	KASSERT(pm,
1014 	    ("[core,%d] cpu%d ri%d no configured PMC to stop", __LINE__,
1015 		cpu, ri));
1016 
1017 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1018 		v = iap_reload_count_to_perfctr_value(v);
1019 
1020 	v &= (1ULL << core_iap_width) - 1;
1021 
1022 	PMCDBG4(MDP,WRI,1, "iap-write cpu=%d ri=%d msr=0x%x v=%jx", cpu, ri,
1023 	    IAP_PMC0 + ri, v);
1024 
1025 	/*
1026 	 * Write the new value to the counter (or it's alias).  The
1027 	 * counter will be in a stopped state when the pcd_write()
1028 	 * entry point is called.
1029 	 */
1030 	wrmsr(core_iap_wroffset + IAP_PMC0 + ri, v);
1031 	return (0);
1032 }
1033 
1034 
1035 static void
1036 iap_initialize(struct pmc_mdep *md, int maxcpu, int npmc, int pmcwidth,
1037     int flags)
1038 {
1039 	struct pmc_classdep *pcd;
1040 
1041 	KASSERT(md != NULL, ("[iap,%d] md is NULL", __LINE__));
1042 
1043 	PMCDBG0(MDP,INI,1, "iap-initialize");
1044 
1045 	/* Remember the set of architectural events supported. */
1046 	core_architectural_events = ~flags;
1047 
1048 	pcd = &md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP];
1049 
1050 	pcd->pcd_caps	= IAP_PMC_CAPS;
1051 	pcd->pcd_class	= PMC_CLASS_IAP;
1052 	pcd->pcd_num	= npmc;
1053 	pcd->pcd_ri	= md->pmd_npmc;
1054 	pcd->pcd_width	= pmcwidth;
1055 
1056 	pcd->pcd_allocate_pmc	= iap_allocate_pmc;
1057 	pcd->pcd_config_pmc	= iap_config_pmc;
1058 	pcd->pcd_describe	= iap_describe;
1059 	pcd->pcd_get_config	= iap_get_config;
1060 	pcd->pcd_get_msr	= iap_get_msr;
1061 	pcd->pcd_pcpu_fini	= core_pcpu_fini;
1062 	pcd->pcd_pcpu_init	= core_pcpu_init;
1063 	pcd->pcd_read_pmc	= iap_read_pmc;
1064 	pcd->pcd_release_pmc	= iap_release_pmc;
1065 	pcd->pcd_start_pmc	= iap_start_pmc;
1066 	pcd->pcd_stop_pmc	= iap_stop_pmc;
1067 	pcd->pcd_write_pmc	= iap_write_pmc;
1068 
1069 	md->pmd_npmc	       += npmc;
1070 }
1071 
1072 static int
1073 core_intr(struct trapframe *tf)
1074 {
1075 	pmc_value_t v;
1076 	struct pmc *pm;
1077 	struct core_cpu *cc;
1078 	int error, found_interrupt, ri;
1079 	uint64_t msr;
1080 
1081 	PMCDBG3(MDP,INT, 1, "cpu=%d tf=0x%p um=%d", curcpu, (void *) tf,
1082 	    TRAPF_USERMODE(tf));
1083 
1084 	found_interrupt = 0;
1085 	cc = core_pcpu[curcpu];
1086 
1087 	for (ri = 0; ri < core_iap_npmc; ri++) {
1088 
1089 		if ((pm = cc->pc_corepmcs[ri].phw_pmc) == NULL ||
1090 		    !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1091 			continue;
1092 
1093 		if (!iap_pmc_has_overflowed(ri))
1094 			continue;
1095 
1096 		found_interrupt = 1;
1097 
1098 		if (pm->pm_state != PMC_STATE_RUNNING)
1099 			continue;
1100 
1101 		error = pmc_process_interrupt(PMC_HR, pm, tf);
1102 
1103 		v = pm->pm_sc.pm_reloadcount;
1104 		v = iap_reload_count_to_perfctr_value(v);
1105 
1106 		/*
1107 		 * Stop the counter, reload it but only restart it if
1108 		 * the PMC is not stalled.
1109 		 */
1110 		msr = rdmsr(IAP_EVSEL0 + ri) & ~IAP_EVSEL_MASK;
1111 		wrmsr(IAP_EVSEL0 + ri, msr);
1112 		wrmsr(core_iap_wroffset + IAP_PMC0 + ri, v);
1113 
1114 		if (error)
1115 			continue;
1116 
1117 		wrmsr(IAP_EVSEL0 + ri, msr | (pm->pm_md.pm_iap.pm_iap_evsel |
1118 					      IAP_EN));
1119 	}
1120 
1121 	if (found_interrupt)
1122 		lapic_reenable_pmc();
1123 
1124 	if (found_interrupt)
1125 		counter_u64_add(pmc_stats.pm_intr_processed, 1);
1126 	else
1127 		counter_u64_add(pmc_stats.pm_intr_ignored, 1);
1128 
1129 	return (found_interrupt);
1130 }
1131 
1132 static int
1133 core2_intr(struct trapframe *tf)
1134 {
1135 	int error, found_interrupt, n, cpu;
1136 	uint64_t flag, intrstatus, intrenable, msr;
1137 	struct pmc *pm;
1138 	struct core_cpu *cc;
1139 	pmc_value_t v;
1140 
1141 	cpu = curcpu;
1142 	PMCDBG3(MDP,INT, 1, "cpu=%d tf=0x%p um=%d", cpu, (void *) tf,
1143 	    TRAPF_USERMODE(tf));
1144 
1145 	/*
1146 	 * The IA_GLOBAL_STATUS (MSR 0x38E) register indicates which
1147 	 * PMCs have a pending PMI interrupt.  We take a 'snapshot' of
1148 	 * the current set of interrupting PMCs and process these
1149 	 * after stopping them.
1150 	 */
1151 	intrstatus = rdmsr(IA_GLOBAL_STATUS);
1152 	intrenable = intrstatus & core_pmcmask;
1153 
1154 	PMCDBG2(MDP,INT, 1, "cpu=%d intrstatus=%jx", cpu,
1155 	    (uintmax_t) intrstatus);
1156 
1157 	found_interrupt = 0;
1158 	cc = core_pcpu[cpu];
1159 
1160 	KASSERT(cc != NULL, ("[core,%d] null pcpu", __LINE__));
1161 
1162 	cc->pc_globalctrl &= ~intrenable;
1163 	cc->pc_resync = 1;	/* MSRs now potentially out of sync. */
1164 
1165 	/*
1166 	 * Stop PMCs and clear overflow status bits.
1167 	 */
1168 	msr = rdmsr(IA_GLOBAL_CTRL) & ~IA_GLOBAL_CTRL_MASK;
1169 	wrmsr(IA_GLOBAL_CTRL, msr);
1170 	wrmsr(IA_GLOBAL_OVF_CTRL, intrenable |
1171 	    IA_GLOBAL_STATUS_FLAG_OVFBUF |
1172 	    IA_GLOBAL_STATUS_FLAG_CONDCHG);
1173 
1174 	/*
1175 	 * Look for interrupts from fixed function PMCs.
1176 	 */
1177 	for (n = 0, flag = (1ULL << IAF_OFFSET); n < core_iaf_npmc;
1178 	     n++, flag <<= 1) {
1179 
1180 		if ((intrstatus & flag) == 0)
1181 			continue;
1182 
1183 		found_interrupt = 1;
1184 
1185 		pm = cc->pc_corepmcs[n + core_iaf_ri].phw_pmc;
1186 		if (pm == NULL || pm->pm_state != PMC_STATE_RUNNING ||
1187 		    !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1188 			continue;
1189 
1190 		error = pmc_process_interrupt(PMC_HR, pm, tf);
1191 
1192 		if (error)
1193 			intrenable &= ~flag;
1194 
1195 		v = iaf_reload_count_to_perfctr_value(pm->pm_sc.pm_reloadcount);
1196 
1197 		/* Reload sampling count. */
1198 		wrmsr(IAF_CTR0 + n, v);
1199 
1200 		PMCDBG4(MDP,INT, 1, "iaf-intr cpu=%d error=%d v=%jx(%jx)", curcpu,
1201 		    error, (uintmax_t) v, (uintmax_t) rdpmc(IAF_RI_TO_MSR(n)));
1202 	}
1203 
1204 	/*
1205 	 * Process interrupts from the programmable counters.
1206 	 */
1207 	for (n = 0, flag = 1; n < core_iap_npmc; n++, flag <<= 1) {
1208 		if ((intrstatus & flag) == 0)
1209 			continue;
1210 
1211 		found_interrupt = 1;
1212 
1213 		pm = cc->pc_corepmcs[n].phw_pmc;
1214 		if (pm == NULL || pm->pm_state != PMC_STATE_RUNNING ||
1215 		    !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1216 			continue;
1217 
1218 		error = pmc_process_interrupt(PMC_HR, pm, tf);
1219 		if (error)
1220 			intrenable &= ~flag;
1221 
1222 		v = iap_reload_count_to_perfctr_value(pm->pm_sc.pm_reloadcount);
1223 
1224 		PMCDBG3(MDP,INT, 1, "iap-intr cpu=%d error=%d v=%jx", cpu, error,
1225 		    (uintmax_t) v);
1226 
1227 		/* Reload sampling count. */
1228 		wrmsr(core_iap_wroffset + IAP_PMC0 + n, v);
1229 	}
1230 
1231 	/*
1232 	 * Reenable all non-stalled PMCs.
1233 	 */
1234 	PMCDBG2(MDP,INT, 1, "cpu=%d intrenable=%jx", cpu,
1235 	    (uintmax_t) intrenable);
1236 
1237 	cc->pc_globalctrl |= intrenable;
1238 
1239 	wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl & IA_GLOBAL_CTRL_MASK);
1240 
1241 	PMCDBG5(MDP,INT, 1, "cpu=%d fixedctrl=%jx globalctrl=%jx status=%jx "
1242 	    "ovf=%jx", cpu, (uintmax_t) rdmsr(IAF_CTRL),
1243 	    (uintmax_t) rdmsr(IA_GLOBAL_CTRL),
1244 	    (uintmax_t) rdmsr(IA_GLOBAL_STATUS),
1245 	    (uintmax_t) rdmsr(IA_GLOBAL_OVF_CTRL));
1246 
1247 	if (found_interrupt)
1248 		lapic_reenable_pmc();
1249 
1250 	if (found_interrupt)
1251 		counter_u64_add(pmc_stats.pm_intr_processed, 1);
1252 	else
1253 		counter_u64_add(pmc_stats.pm_intr_ignored, 1);
1254 
1255 	return (found_interrupt);
1256 }
1257 
1258 int
1259 pmc_core_initialize(struct pmc_mdep *md, int maxcpu, int version_override)
1260 {
1261 	int cpuid[CORE_CPUID_REQUEST_SIZE];
1262 	int ipa_version, flags, nflags;
1263 
1264 	do_cpuid(CORE_CPUID_REQUEST, cpuid);
1265 
1266 	ipa_version = (version_override > 0) ? version_override :
1267 	    cpuid[CORE_CPUID_EAX] & 0xFF;
1268 	core_cputype = md->pmd_cputype;
1269 
1270 	PMCDBG3(MDP,INI,1,"core-init cputype=%d ncpu=%d ipa-version=%d",
1271 	    core_cputype, maxcpu, ipa_version);
1272 
1273 	if (ipa_version < 1 || ipa_version > 4 ||
1274 	    (core_cputype != PMC_CPU_INTEL_CORE && ipa_version == 1)) {
1275 		/* Unknown PMC architecture. */
1276 		printf("hwpc_core: unknown PMC architecture: %d\n",
1277 		    ipa_version);
1278 		return (EPROGMISMATCH);
1279 	}
1280 
1281 	core_iap_wroffset = 0;
1282 	if (cpu_feature2 & CPUID2_PDCM) {
1283 		if (rdmsr(IA32_PERF_CAPABILITIES) & PERFCAP_FW_WRITE) {
1284 			PMCDBG0(MDP, INI, 1,
1285 			    "core-init full-width write supported");
1286 			core_iap_wroffset = IAP_A_PMC0 - IAP_PMC0;
1287 		} else
1288 			PMCDBG0(MDP, INI, 1,
1289 			    "core-init full-width write NOT supported");
1290 	} else
1291 		PMCDBG0(MDP, INI, 1, "core-init pdcm not supported");
1292 
1293 	core_pmcmask = 0;
1294 
1295 	/*
1296 	 * Initialize programmable counters.
1297 	 */
1298 	core_iap_npmc = (cpuid[CORE_CPUID_EAX] >> 8) & 0xFF;
1299 	core_iap_width = (cpuid[CORE_CPUID_EAX] >> 16) & 0xFF;
1300 
1301 	core_pmcmask |= ((1ULL << core_iap_npmc) - 1);
1302 
1303 	nflags = (cpuid[CORE_CPUID_EAX] >> 24) & 0xFF;
1304 	flags = cpuid[CORE_CPUID_EBX] & ((1 << nflags) - 1);
1305 
1306 	iap_initialize(md, maxcpu, core_iap_npmc, core_iap_width, flags);
1307 
1308 	/*
1309 	 * Initialize fixed function counters, if present.
1310 	 */
1311 	if (core_cputype != PMC_CPU_INTEL_CORE) {
1312 		core_iaf_ri = core_iap_npmc;
1313 		core_iaf_npmc = cpuid[CORE_CPUID_EDX] & 0x1F;
1314 		core_iaf_width = (cpuid[CORE_CPUID_EDX] >> 5) & 0xFF;
1315 
1316 		iaf_initialize(md, maxcpu, core_iaf_npmc, core_iaf_width);
1317 		core_pmcmask |= ((1ULL << core_iaf_npmc) - 1) << IAF_OFFSET;
1318 	}
1319 
1320 	PMCDBG2(MDP,INI,1,"core-init pmcmask=0x%jx iafri=%d", core_pmcmask,
1321 	    core_iaf_ri);
1322 
1323 	core_pcpu = malloc(sizeof(*core_pcpu) * maxcpu, M_PMC,
1324 	    M_ZERO | M_WAITOK);
1325 
1326 	/*
1327 	 * Choose the appropriate interrupt handler.
1328 	 */
1329 	if (ipa_version == 1)
1330 		md->pmd_intr = core_intr;
1331 	else
1332 		md->pmd_intr = core2_intr;
1333 
1334 	md->pmd_pcpu_fini = NULL;
1335 	md->pmd_pcpu_init = NULL;
1336 
1337 	return (0);
1338 }
1339 
1340 void
1341 pmc_core_finalize(struct pmc_mdep *md)
1342 {
1343 	PMCDBG0(MDP,INI,1, "core-finalize");
1344 
1345 	free(core_pcpu, M_PMC);
1346 	core_pcpu = NULL;
1347 }
1348