xref: /freebsd/sys/dev/hwpmc/hwpmc_core.c (revision 9a41926bfb664dd77659d1615ba55d75c2c530a8)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2008 Joseph Koshy
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * Intel Core PMCs.
31  */
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 
36 #include <sys/param.h>
37 #include <sys/bus.h>
38 #include <sys/pmc.h>
39 #include <sys/pmckern.h>
40 #include <sys/smp.h>
41 #include <sys/systm.h>
42 
43 #include <machine/intr_machdep.h>
44 #include <x86/apicvar.h>
45 #include <machine/cpu.h>
46 #include <machine/cpufunc.h>
47 #include <machine/md_var.h>
48 #include <machine/specialreg.h>
49 
50 #define	CORE_CPUID_REQUEST		0xA
51 #define	CORE_CPUID_REQUEST_SIZE		0x4
52 #define	CORE_CPUID_EAX			0x0
53 #define	CORE_CPUID_EBX			0x1
54 #define	CORE_CPUID_ECX			0x2
55 #define	CORE_CPUID_EDX			0x3
56 
57 #define	IAF_PMC_CAPS			\
58 	(PMC_CAP_READ | PMC_CAP_WRITE | PMC_CAP_INTERRUPT | \
59 	 PMC_CAP_USER | PMC_CAP_SYSTEM)
60 #define	IAF_RI_TO_MSR(RI)		((RI) + (1 << 30))
61 
62 #define	IAP_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_USER | PMC_CAP_SYSTEM | \
63     PMC_CAP_EDGE | PMC_CAP_THRESHOLD | PMC_CAP_READ | PMC_CAP_WRITE |	 \
64     PMC_CAP_INVERT | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE)
65 
66 #define	EV_IS_NOTARCH		0
67 #define	EV_IS_ARCH_SUPP		1
68 #define	EV_IS_ARCH_NOTSUPP	-1
69 
70 /*
71  * "Architectural" events defined by Intel.  The values of these
72  * symbols correspond to positions in the bitmask returned by
73  * the CPUID.0AH instruction.
74  */
75 enum core_arch_events {
76 	CORE_AE_BRANCH_INSTRUCTION_RETIRED	= 5,
77 	CORE_AE_BRANCH_MISSES_RETIRED		= 6,
78 	CORE_AE_INSTRUCTION_RETIRED		= 1,
79 	CORE_AE_LLC_MISSES			= 4,
80 	CORE_AE_LLC_REFERENCE			= 3,
81 	CORE_AE_UNHALTED_REFERENCE_CYCLES	= 2,
82 	CORE_AE_UNHALTED_CORE_CYCLES		= 0
83 };
84 
85 static enum pmc_cputype	core_cputype;
86 
87 struct core_cpu {
88 	volatile uint32_t	pc_resync;
89 	volatile uint32_t	pc_iafctrl;	/* Fixed function control. */
90 	volatile uint64_t	pc_globalctrl;	/* Global control register. */
91 	struct pmc_hw		pc_corepmcs[];
92 };
93 
94 static struct core_cpu **core_pcpu;
95 
96 static uint32_t core_architectural_events;
97 static uint64_t core_pmcmask;
98 
99 static int core_iaf_ri;		/* relative index of fixed counters */
100 static int core_iaf_width;
101 static int core_iaf_npmc;
102 
103 static int core_iap_width;
104 static int core_iap_npmc;
105 static int core_iap_wroffset;
106 
107 static u_int pmc_alloc_refs;
108 static bool pmc_tsx_force_abort_set;
109 
110 static int
111 core_pcpu_noop(struct pmc_mdep *md, int cpu)
112 {
113 	(void) md;
114 	(void) cpu;
115 	return (0);
116 }
117 
118 static int
119 core_pcpu_init(struct pmc_mdep *md, int cpu)
120 {
121 	struct pmc_cpu *pc;
122 	struct core_cpu *cc;
123 	struct pmc_hw *phw;
124 	int core_ri, n, npmc;
125 
126 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
127 	    ("[iaf,%d] insane cpu number %d", __LINE__, cpu));
128 
129 	PMCDBG1(MDP,INI,1,"core-init cpu=%d", cpu);
130 
131 	core_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_ri;
132 	npmc = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_num;
133 
134 	if (core_cputype != PMC_CPU_INTEL_CORE)
135 		npmc += md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAF].pcd_num;
136 
137 	cc = malloc(sizeof(struct core_cpu) + npmc * sizeof(struct pmc_hw),
138 	    M_PMC, M_WAITOK | M_ZERO);
139 
140 	core_pcpu[cpu] = cc;
141 	pc = pmc_pcpu[cpu];
142 
143 	KASSERT(pc != NULL && cc != NULL,
144 	    ("[core,%d] NULL per-cpu structures cpu=%d", __LINE__, cpu));
145 
146 	for (n = 0, phw = cc->pc_corepmcs; n < npmc; n++, phw++) {
147 		phw->phw_state 	  = PMC_PHW_FLAG_IS_ENABLED |
148 		    PMC_PHW_CPU_TO_STATE(cpu) |
149 		    PMC_PHW_INDEX_TO_STATE(n + core_ri);
150 		phw->phw_pmc	  = NULL;
151 		pc->pc_hwpmcs[n + core_ri]  = phw;
152 	}
153 
154 	return (0);
155 }
156 
157 static int
158 core_pcpu_fini(struct pmc_mdep *md, int cpu)
159 {
160 	int core_ri, n, npmc;
161 	struct pmc_cpu *pc;
162 	struct core_cpu *cc;
163 	uint64_t msr = 0;
164 
165 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
166 	    ("[core,%d] insane cpu number (%d)", __LINE__, cpu));
167 
168 	PMCDBG1(MDP,INI,1,"core-pcpu-fini cpu=%d", cpu);
169 
170 	if ((cc = core_pcpu[cpu]) == NULL)
171 		return (0);
172 
173 	core_pcpu[cpu] = NULL;
174 
175 	pc = pmc_pcpu[cpu];
176 
177 	KASSERT(pc != NULL, ("[core,%d] NULL per-cpu %d state", __LINE__,
178 		cpu));
179 
180 	npmc = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_num;
181 	core_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP].pcd_ri;
182 
183 	for (n = 0; n < npmc; n++) {
184 		msr = rdmsr(IAP_EVSEL0 + n) & ~IAP_EVSEL_MASK;
185 		wrmsr(IAP_EVSEL0 + n, msr);
186 	}
187 
188 	if (core_cputype != PMC_CPU_INTEL_CORE) {
189 		msr = rdmsr(IAF_CTRL) & ~IAF_CTRL_MASK;
190 		wrmsr(IAF_CTRL, msr);
191 		npmc += md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAF].pcd_num;
192 	}
193 
194 	for (n = 0; n < npmc; n++)
195 		pc->pc_hwpmcs[n + core_ri] = NULL;
196 
197 	free(cc, M_PMC);
198 
199 	return (0);
200 }
201 
202 /*
203  * Fixed function counters.
204  */
205 
206 static pmc_value_t
207 iaf_perfctr_value_to_reload_count(pmc_value_t v)
208 {
209 
210 	/* If the PMC has overflowed, return a reload count of zero. */
211 	if ((v & (1ULL << (core_iaf_width - 1))) == 0)
212 		return (0);
213 	v &= (1ULL << core_iaf_width) - 1;
214 	return (1ULL << core_iaf_width) - v;
215 }
216 
217 static pmc_value_t
218 iaf_reload_count_to_perfctr_value(pmc_value_t rlc)
219 {
220 	return (1ULL << core_iaf_width) - rlc;
221 }
222 
223 static void
224 tweak_tsx_force_abort(void *arg)
225 {
226 	u_int val;
227 
228 	val = (uintptr_t)arg;
229 	wrmsr(MSR_TSX_FORCE_ABORT, val);
230 }
231 
232 static int
233 iaf_allocate_pmc(int cpu, int ri, struct pmc *pm,
234     const struct pmc_op_pmcallocate *a)
235 {
236 	uint8_t ev, umask;
237 	uint32_t caps, flags, config;
238 	const struct pmc_md_iap_op_pmcallocate *iap;
239 
240 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
241 	    ("[core,%d] illegal CPU %d", __LINE__, cpu));
242 
243 	PMCDBG2(MDP,ALL,1, "iaf-allocate ri=%d reqcaps=0x%x", ri, pm->pm_caps);
244 
245 	if (ri < 0 || ri > core_iaf_npmc)
246 		return (EINVAL);
247 
248 	caps = a->pm_caps;
249 
250 	if (a->pm_class != PMC_CLASS_IAF ||
251 	    (caps & IAF_PMC_CAPS) != caps)
252 		return (EINVAL);
253 
254 	iap = &a->pm_md.pm_iap;
255 	config = iap->pm_iap_config;
256 	ev = IAP_EVSEL_GET(config);
257 	umask = IAP_UMASK_GET(config);
258 
259 	/* INST_RETIRED.ANY */
260 	if (ev == 0xC0 && ri != 0)
261 		return (EINVAL);
262 	/* CPU_CLK_UNHALTED.THREAD */
263 	if (ev == 0x3C && ri != 1)
264 		return (EINVAL);
265 	/* CPU_CLK_UNHALTED.REF */
266 	if (ev == 0x0 && umask == 0x3 && ri != 2)
267 		return (EINVAL);
268 
269 	pmc_alloc_refs++;
270 	if ((cpu_stdext_feature3 & CPUID_STDEXT3_TSXFA) != 0 &&
271 	    !pmc_tsx_force_abort_set) {
272 		pmc_tsx_force_abort_set = true;
273 		smp_rendezvous(NULL, tweak_tsx_force_abort, NULL, (void *)1);
274 	}
275 
276 	flags = 0;
277 	if (config & IAP_OS)
278 		flags |= IAF_OS;
279 	if (config & IAP_USR)
280 		flags |= IAF_USR;
281 	if (config & IAP_ANY)
282 		flags |= IAF_ANY;
283 	if (config & IAP_INT)
284 		flags |= IAF_PMI;
285 
286 	if (caps & PMC_CAP_INTERRUPT)
287 		flags |= IAF_PMI;
288 	if (caps & PMC_CAP_SYSTEM)
289 		flags |= IAF_OS;
290 	if (caps & PMC_CAP_USER)
291 		flags |= IAF_USR;
292 	if ((caps & (PMC_CAP_USER | PMC_CAP_SYSTEM)) == 0)
293 		flags |= (IAF_OS | IAF_USR);
294 
295 	pm->pm_md.pm_iaf.pm_iaf_ctrl = (flags << (ri * 4));
296 
297 	PMCDBG1(MDP,ALL,2, "iaf-allocate config=0x%jx",
298 	    (uintmax_t) pm->pm_md.pm_iaf.pm_iaf_ctrl);
299 
300 	return (0);
301 }
302 
303 static int
304 iaf_config_pmc(int cpu, int ri, struct pmc *pm)
305 {
306 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
307 	    ("[core,%d] illegal CPU %d", __LINE__, cpu));
308 
309 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
310 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
311 
312 	PMCDBG3(MDP,CFG,1, "iaf-config cpu=%d ri=%d pm=%p", cpu, ri, pm);
313 
314 	KASSERT(core_pcpu[cpu] != NULL, ("[core,%d] null per-cpu %d", __LINE__,
315 	    cpu));
316 
317 	core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc = pm;
318 
319 	return (0);
320 }
321 
322 static int
323 iaf_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc)
324 {
325 	int error;
326 	struct pmc_hw *phw;
327 	char iaf_name[PMC_NAME_MAX];
328 
329 	phw = &core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri];
330 
331 	(void) snprintf(iaf_name, sizeof(iaf_name), "IAF-%d", ri);
332 	if ((error = copystr(iaf_name, pi->pm_name, PMC_NAME_MAX,
333 	    NULL)) != 0)
334 		return (error);
335 
336 	pi->pm_class = PMC_CLASS_IAF;
337 
338 	if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) {
339 		pi->pm_enabled = TRUE;
340 		*ppmc          = phw->phw_pmc;
341 	} else {
342 		pi->pm_enabled = FALSE;
343 		*ppmc          = NULL;
344 	}
345 
346 	return (0);
347 }
348 
349 static int
350 iaf_get_config(int cpu, int ri, struct pmc **ppm)
351 {
352 	*ppm = core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc;
353 
354 	return (0);
355 }
356 
357 static int
358 iaf_get_msr(int ri, uint32_t *msr)
359 {
360 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
361 	    ("[iaf,%d] ri %d out of range", __LINE__, ri));
362 
363 	*msr = IAF_RI_TO_MSR(ri);
364 
365 	return (0);
366 }
367 
368 static int
369 iaf_read_pmc(int cpu, int ri, pmc_value_t *v)
370 {
371 	struct pmc *pm;
372 	pmc_value_t tmp;
373 
374 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
375 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
376 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
377 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
378 
379 	pm = core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc;
380 
381 	KASSERT(pm,
382 	    ("[core,%d] cpu %d ri %d(%d) pmc not configured", __LINE__, cpu,
383 		ri, ri + core_iaf_ri));
384 
385 	tmp = rdpmc(IAF_RI_TO_MSR(ri));
386 
387 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
388 		*v = iaf_perfctr_value_to_reload_count(tmp);
389 	else
390 		*v = tmp & ((1ULL << core_iaf_width) - 1);
391 
392 	PMCDBG4(MDP,REA,1, "iaf-read cpu=%d ri=%d msr=0x%x -> v=%jx", cpu, ri,
393 	    IAF_RI_TO_MSR(ri), *v);
394 
395 	return (0);
396 }
397 
398 static int
399 iaf_release_pmc(int cpu, int ri, struct pmc *pmc)
400 {
401 	PMCDBG3(MDP,REL,1, "iaf-release cpu=%d ri=%d pm=%p", cpu, ri, pmc);
402 
403 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
404 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
405 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
406 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
407 
408 	KASSERT(core_pcpu[cpu]->pc_corepmcs[ri + core_iaf_ri].phw_pmc == NULL,
409 	    ("[core,%d] PHW pmc non-NULL", __LINE__));
410 
411 	MPASS(pmc_alloc_refs > 0);
412 	if (pmc_alloc_refs-- == 1 && pmc_tsx_force_abort_set) {
413 		pmc_tsx_force_abort_set = false;
414 		smp_rendezvous(NULL, tweak_tsx_force_abort, NULL, (void *)0);
415 	}
416 
417 	return (0);
418 }
419 
420 static int
421 iaf_start_pmc(int cpu, int ri)
422 {
423 	struct pmc *pm;
424 	struct core_cpu *iafc;
425 	uint64_t msr = 0;
426 
427 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
428 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
429 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
430 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
431 
432 	PMCDBG2(MDP,STA,1,"iaf-start cpu=%d ri=%d", cpu, ri);
433 
434 	iafc = core_pcpu[cpu];
435 	pm = iafc->pc_corepmcs[ri + core_iaf_ri].phw_pmc;
436 
437 	iafc->pc_iafctrl |= pm->pm_md.pm_iaf.pm_iaf_ctrl;
438 
439  	msr = rdmsr(IAF_CTRL) & ~IAF_CTRL_MASK;
440  	wrmsr(IAF_CTRL, msr | (iafc->pc_iafctrl & IAF_CTRL_MASK));
441 
442 	do {
443 		iafc->pc_resync = 0;
444 		iafc->pc_globalctrl |= (1ULL << (ri + IAF_OFFSET));
445  		msr = rdmsr(IA_GLOBAL_CTRL) & ~IAF_GLOBAL_CTRL_MASK;
446  		wrmsr(IA_GLOBAL_CTRL, msr | (iafc->pc_globalctrl &
447  					     IAF_GLOBAL_CTRL_MASK));
448 	} while (iafc->pc_resync != 0);
449 
450 	PMCDBG4(MDP,STA,1,"iafctrl=%x(%x) globalctrl=%jx(%jx)",
451 	    iafc->pc_iafctrl, (uint32_t) rdmsr(IAF_CTRL),
452 	    iafc->pc_globalctrl, rdmsr(IA_GLOBAL_CTRL));
453 
454 	return (0);
455 }
456 
457 static int
458 iaf_stop_pmc(int cpu, int ri)
459 {
460 	uint32_t fc;
461 	struct core_cpu *iafc;
462 	uint64_t msr = 0;
463 
464 	PMCDBG2(MDP,STO,1,"iaf-stop cpu=%d ri=%d", cpu, ri);
465 
466 	iafc = core_pcpu[cpu];
467 
468 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
469 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
470 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
471 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
472 
473 	fc = (IAF_MASK << (ri * 4));
474 
475 	iafc->pc_iafctrl &= ~fc;
476 
477 	PMCDBG1(MDP,STO,1,"iaf-stop iafctrl=%x", iafc->pc_iafctrl);
478  	msr = rdmsr(IAF_CTRL) & ~IAF_CTRL_MASK;
479  	wrmsr(IAF_CTRL, msr | (iafc->pc_iafctrl & IAF_CTRL_MASK));
480 
481 	do {
482 		iafc->pc_resync = 0;
483 		iafc->pc_globalctrl &= ~(1ULL << (ri + IAF_OFFSET));
484  		msr = rdmsr(IA_GLOBAL_CTRL) & ~IAF_GLOBAL_CTRL_MASK;
485  		wrmsr(IA_GLOBAL_CTRL, msr | (iafc->pc_globalctrl &
486  					     IAF_GLOBAL_CTRL_MASK));
487 	} while (iafc->pc_resync != 0);
488 
489 	PMCDBG4(MDP,STO,1,"iafctrl=%x(%x) globalctrl=%jx(%jx)",
490 	    iafc->pc_iafctrl, (uint32_t) rdmsr(IAF_CTRL),
491 	    iafc->pc_globalctrl, rdmsr(IA_GLOBAL_CTRL));
492 
493 	return (0);
494 }
495 
496 static int
497 iaf_write_pmc(int cpu, int ri, pmc_value_t v)
498 {
499 	struct core_cpu *cc;
500 	struct pmc *pm;
501 	uint64_t msr;
502 
503 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
504 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
505 	KASSERT(ri >= 0 && ri < core_iaf_npmc,
506 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
507 
508 	cc = core_pcpu[cpu];
509 	pm = cc->pc_corepmcs[ri + core_iaf_ri].phw_pmc;
510 
511 	KASSERT(pm,
512 	    ("[core,%d] cpu %d ri %d pmc not configured", __LINE__, cpu, ri));
513 
514 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
515 		v = iaf_reload_count_to_perfctr_value(v);
516 
517 	/* Turn off fixed counters */
518 	msr = rdmsr(IAF_CTRL) & ~IAF_CTRL_MASK;
519 	wrmsr(IAF_CTRL, msr);
520 
521 	wrmsr(IAF_CTR0 + ri, v & ((1ULL << core_iaf_width) - 1));
522 
523 	/* Turn on fixed counters */
524 	msr = rdmsr(IAF_CTRL) & ~IAF_CTRL_MASK;
525 	wrmsr(IAF_CTRL, msr | (cc->pc_iafctrl & IAF_CTRL_MASK));
526 
527 	PMCDBG6(MDP,WRI,1, "iaf-write cpu=%d ri=%d msr=0x%x v=%jx iafctrl=%jx "
528 	    "pmc=%jx", cpu, ri, IAF_RI_TO_MSR(ri), v,
529 	    (uintmax_t) rdmsr(IAF_CTRL),
530 	    (uintmax_t) rdpmc(IAF_RI_TO_MSR(ri)));
531 
532 	return (0);
533 }
534 
535 
536 static void
537 iaf_initialize(struct pmc_mdep *md, int maxcpu, int npmc, int pmcwidth)
538 {
539 	struct pmc_classdep *pcd;
540 
541 	KASSERT(md != NULL, ("[iaf,%d] md is NULL", __LINE__));
542 
543 	PMCDBG0(MDP,INI,1, "iaf-initialize");
544 
545 	pcd = &md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAF];
546 
547 	pcd->pcd_caps	= IAF_PMC_CAPS;
548 	pcd->pcd_class	= PMC_CLASS_IAF;
549 	pcd->pcd_num	= npmc;
550 	pcd->pcd_ri	= md->pmd_npmc;
551 	pcd->pcd_width	= pmcwidth;
552 
553 	pcd->pcd_allocate_pmc	= iaf_allocate_pmc;
554 	pcd->pcd_config_pmc	= iaf_config_pmc;
555 	pcd->pcd_describe	= iaf_describe;
556 	pcd->pcd_get_config	= iaf_get_config;
557 	pcd->pcd_get_msr	= iaf_get_msr;
558 	pcd->pcd_pcpu_fini	= core_pcpu_noop;
559 	pcd->pcd_pcpu_init	= core_pcpu_noop;
560 	pcd->pcd_read_pmc	= iaf_read_pmc;
561 	pcd->pcd_release_pmc	= iaf_release_pmc;
562 	pcd->pcd_start_pmc	= iaf_start_pmc;
563 	pcd->pcd_stop_pmc	= iaf_stop_pmc;
564 	pcd->pcd_write_pmc	= iaf_write_pmc;
565 
566 	md->pmd_npmc	       += npmc;
567 }
568 
569 /*
570  * Intel programmable PMCs.
571  */
572 
573 /* Sub fields of UMASK that this event supports. */
574 #define	IAP_M_CORE		(1 << 0) /* Core specificity */
575 #define	IAP_M_AGENT		(1 << 1) /* Agent specificity */
576 #define	IAP_M_PREFETCH		(1 << 2) /* Prefetch */
577 #define	IAP_M_MESI		(1 << 3) /* MESI */
578 #define	IAP_M_SNOOPRESPONSE	(1 << 4) /* Snoop response */
579 #define	IAP_M_SNOOPTYPE		(1 << 5) /* Snoop type */
580 #define	IAP_M_TRANSITION	(1 << 6) /* Transition */
581 
582 #define	IAP_F_CORE		(0x3 << 14) /* Core specificity */
583 #define	IAP_F_AGENT		(0x1 << 13) /* Agent specificity */
584 #define	IAP_F_PREFETCH		(0x3 << 12) /* Prefetch */
585 #define	IAP_F_MESI		(0xF <<  8) /* MESI */
586 #define	IAP_F_SNOOPRESPONSE	(0xB <<  8) /* Snoop response */
587 #define	IAP_F_SNOOPTYPE		(0x3 <<  8) /* Snoop type */
588 #define	IAP_F_TRANSITION	(0x1 << 12) /* Transition */
589 
590 #define	IAP_PREFETCH_RESERVED	(0x2 << 12)
591 #define	IAP_CORE_THIS		(0x1 << 14)
592 #define	IAP_CORE_ALL		(0x3 << 14)
593 #define	IAP_F_CMASK		0xFF000000
594 
595 static pmc_value_t
596 iap_perfctr_value_to_reload_count(pmc_value_t v)
597 {
598 
599 	/* If the PMC has overflowed, return a reload count of zero. */
600 	if ((v & (1ULL << (core_iap_width - 1))) == 0)
601 		return (0);
602 	v &= (1ULL << core_iap_width) - 1;
603 	return (1ULL << core_iap_width) - v;
604 }
605 
606 static pmc_value_t
607 iap_reload_count_to_perfctr_value(pmc_value_t rlc)
608 {
609 	return (1ULL << core_iap_width) - rlc;
610 }
611 
612 static int
613 iap_pmc_has_overflowed(int ri)
614 {
615 	uint64_t v;
616 
617 	/*
618 	 * We treat a Core (i.e., Intel architecture v1) PMC as has
619 	 * having overflowed if its MSB is zero.
620 	 */
621 	v = rdpmc(ri);
622 	return ((v & (1ULL << (core_iap_width - 1))) == 0);
623 }
624 
625 static int
626 iap_event_corei7_ok_on_counter(uint8_t evsel, int ri)
627 {
628 	uint32_t mask;
629 
630 	switch (evsel) {
631 		/*
632 		 * Events valid only on counter 0, 1.
633 		 */
634 		case 0x40:
635 		case 0x41:
636 		case 0x42:
637 		case 0x43:
638 		case 0x51:
639 		case 0x63:
640 			mask = 0x3;
641 		break;
642 
643 		default:
644 		mask = ~0;	/* Any row index is ok. */
645 	}
646 
647 	return (mask & (1 << ri));
648 }
649 
650 static int
651 iap_event_westmere_ok_on_counter(uint8_t evsel, int ri)
652 {
653 	uint32_t mask;
654 
655 	switch (evsel) {
656 		/*
657 		 * Events valid only on counter 0.
658 		 */
659 		case 0x60:
660 		case 0xB3:
661 		mask = 0x1;
662 		break;
663 
664 		/*
665 		 * Events valid only on counter 0, 1.
666 		 */
667 		case 0x4C:
668 		case 0x4E:
669 		case 0x51:
670 		case 0x63:
671 		mask = 0x3;
672 		break;
673 
674 	default:
675 		mask = ~0;	/* Any row index is ok. */
676 	}
677 
678 	return (mask & (1 << ri));
679 }
680 
681 static int
682 iap_event_sb_sbx_ib_ibx_ok_on_counter(uint8_t evsel, int ri)
683 {
684 	uint32_t mask;
685 
686 	switch (evsel) {
687 		/* Events valid only on counter 0. */
688     case 0xB7:
689 		mask = 0x1;
690 		break;
691 		/* Events valid only on counter 1. */
692 	case 0xC0:
693 		mask = 0x2;
694 		break;
695 		/* Events valid only on counter 2. */
696 	case 0x48:
697 	case 0xA2:
698 	case 0xA3:
699 		mask = 0x4;
700 		break;
701 		/* Events valid only on counter 3. */
702 	case 0xBB:
703 	case 0xCD:
704 		mask = 0x8;
705 		break;
706 	default:
707 		mask = ~0;	/* Any row index is ok. */
708 	}
709 
710 	return (mask & (1 << ri));
711 }
712 
713 static int
714 iap_event_ok_on_counter(uint8_t evsel, int ri)
715 {
716 	uint32_t mask;
717 
718 	switch (evsel) {
719 		/*
720 		 * Events valid only on counter 0.
721 		 */
722 	case 0x10:
723 	case 0x14:
724 	case 0x18:
725 	case 0xB3:
726 	case 0xC1:
727 	case 0xCB:
728 		mask = (1 << 0);
729 		break;
730 
731 		/*
732 		 * Events valid only on counter 1.
733 		 */
734 	case 0x11:
735 	case 0x12:
736 	case 0x13:
737 		mask = (1 << 1);
738 		break;
739 
740 	default:
741 		mask = ~0;	/* Any row index is ok. */
742 	}
743 
744 	return (mask & (1 << ri));
745 }
746 
747 static int
748 iap_allocate_pmc(int cpu, int ri, struct pmc *pm,
749     const struct pmc_op_pmcallocate *a)
750 {
751 	enum pmc_event map;
752 	uint8_t ev;
753 	uint32_t caps;
754 	const struct pmc_md_iap_op_pmcallocate *iap;
755 
756 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
757 	    ("[core,%d] illegal CPU %d", __LINE__, cpu));
758 	KASSERT(ri >= 0 && ri < core_iap_npmc,
759 	    ("[core,%d] illegal row-index value %d", __LINE__, ri));
760 
761 	/* check requested capabilities */
762 	caps = a->pm_caps;
763 	if ((IAP_PMC_CAPS & caps) != caps)
764 		return (EPERM);
765 	map = 0;	/* XXX: silent GCC warning */
766 	iap = &a->pm_md.pm_iap;
767 	ev = IAP_EVSEL_GET(iap->pm_iap_config);
768 
769 	switch (core_cputype) {
770 	case PMC_CPU_INTEL_COREI7:
771 	case PMC_CPU_INTEL_NEHALEM_EX:
772 		if (iap_event_corei7_ok_on_counter(ev, ri) == 0)
773 			return (EINVAL);
774 		break;
775 	case PMC_CPU_INTEL_SKYLAKE:
776 	case PMC_CPU_INTEL_SKYLAKE_XEON:
777 	case PMC_CPU_INTEL_BROADWELL:
778 	case PMC_CPU_INTEL_BROADWELL_XEON:
779 	case PMC_CPU_INTEL_SANDYBRIDGE:
780 	case PMC_CPU_INTEL_SANDYBRIDGE_XEON:
781 	case PMC_CPU_INTEL_IVYBRIDGE:
782 	case PMC_CPU_INTEL_IVYBRIDGE_XEON:
783 	case PMC_CPU_INTEL_HASWELL:
784 	case PMC_CPU_INTEL_HASWELL_XEON:
785 		if (iap_event_sb_sbx_ib_ibx_ok_on_counter(ev, ri) == 0)
786 			return (EINVAL);
787 		break;
788 	case PMC_CPU_INTEL_WESTMERE:
789 	case PMC_CPU_INTEL_WESTMERE_EX:
790 		if (iap_event_westmere_ok_on_counter(ev, ri) == 0)
791 			return (EINVAL);
792 		break;
793 	default:
794 		if (iap_event_ok_on_counter(ev, ri) == 0)
795 			return (EINVAL);
796 	}
797 
798 	pm->pm_md.pm_iap.pm_iap_evsel = iap->pm_iap_config;
799 	return (0);
800 }
801 
802 static int
803 iap_config_pmc(int cpu, int ri, struct pmc *pm)
804 {
805 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
806 	    ("[core,%d] illegal CPU %d", __LINE__, cpu));
807 
808 	KASSERT(ri >= 0 && ri < core_iap_npmc,
809 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
810 
811 	PMCDBG3(MDP,CFG,1, "iap-config cpu=%d ri=%d pm=%p", cpu, ri, pm);
812 
813 	KASSERT(core_pcpu[cpu] != NULL, ("[core,%d] null per-cpu %d", __LINE__,
814 	    cpu));
815 
816 	core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc = pm;
817 
818 	return (0);
819 }
820 
821 static int
822 iap_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc)
823 {
824 	int error;
825 	struct pmc_hw *phw;
826 	char iap_name[PMC_NAME_MAX];
827 
828 	phw = &core_pcpu[cpu]->pc_corepmcs[ri];
829 
830 	(void) snprintf(iap_name, sizeof(iap_name), "IAP-%d", ri);
831 	if ((error = copystr(iap_name, pi->pm_name, PMC_NAME_MAX,
832 	    NULL)) != 0)
833 		return (error);
834 
835 	pi->pm_class = PMC_CLASS_IAP;
836 
837 	if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) {
838 		pi->pm_enabled = TRUE;
839 		*ppmc          = phw->phw_pmc;
840 	} else {
841 		pi->pm_enabled = FALSE;
842 		*ppmc          = NULL;
843 	}
844 
845 	return (0);
846 }
847 
848 static int
849 iap_get_config(int cpu, int ri, struct pmc **ppm)
850 {
851 	*ppm = core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc;
852 
853 	return (0);
854 }
855 
856 static int
857 iap_get_msr(int ri, uint32_t *msr)
858 {
859 	KASSERT(ri >= 0 && ri < core_iap_npmc,
860 	    ("[iap,%d] ri %d out of range", __LINE__, ri));
861 
862 	*msr = ri;
863 
864 	return (0);
865 }
866 
867 static int
868 iap_read_pmc(int cpu, int ri, pmc_value_t *v)
869 {
870 	struct pmc *pm;
871 	pmc_value_t tmp;
872 
873 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
874 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
875 	KASSERT(ri >= 0 && ri < core_iap_npmc,
876 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
877 
878 	pm = core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc;
879 
880 	KASSERT(pm,
881 	    ("[core,%d] cpu %d ri %d pmc not configured", __LINE__, cpu,
882 		ri));
883 
884 	tmp = rdpmc(ri);
885 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
886 		*v = iap_perfctr_value_to_reload_count(tmp);
887 	else
888 		*v = tmp & ((1ULL << core_iap_width) - 1);
889 
890 	PMCDBG4(MDP,REA,1, "iap-read cpu=%d ri=%d msr=0x%x -> v=%jx", cpu, ri,
891 	    IAP_PMC0 + ri, *v);
892 
893 	return (0);
894 }
895 
896 static int
897 iap_release_pmc(int cpu, int ri, struct pmc *pm)
898 {
899 	(void) pm;
900 
901 	PMCDBG3(MDP,REL,1, "iap-release cpu=%d ri=%d pm=%p", cpu, ri,
902 	    pm);
903 
904 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
905 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
906 	KASSERT(ri >= 0 && ri < core_iap_npmc,
907 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
908 
909 	KASSERT(core_pcpu[cpu]->pc_corepmcs[ri].phw_pmc
910 	    == NULL, ("[core,%d] PHW pmc non-NULL", __LINE__));
911 
912 	return (0);
913 }
914 
915 static int
916 iap_start_pmc(int cpu, int ri)
917 {
918 	struct pmc *pm;
919 	uint32_t evsel;
920 	struct core_cpu *cc;
921 
922 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
923 	    ("[core,%d] illegal CPU value %d", __LINE__, cpu));
924 	KASSERT(ri >= 0 && ri < core_iap_npmc,
925 	    ("[core,%d] illegal row-index %d", __LINE__, ri));
926 
927 	cc = core_pcpu[cpu];
928 	pm = cc->pc_corepmcs[ri].phw_pmc;
929 
930 	KASSERT(pm,
931 	    ("[core,%d] starting cpu%d,ri%d with no pmc configured",
932 		__LINE__, cpu, ri));
933 
934 	PMCDBG2(MDP,STA,1, "iap-start cpu=%d ri=%d", cpu, ri);
935 
936 	evsel = pm->pm_md.pm_iap.pm_iap_evsel;
937 
938 	PMCDBG4(MDP,STA,2, "iap-start/2 cpu=%d ri=%d evselmsr=0x%x evsel=0x%x",
939 	    cpu, ri, IAP_EVSEL0 + ri, evsel);
940 
941 	/* Event specific configuration. */
942 
943 	switch (IAP_EVSEL_GET(evsel)) {
944 	case 0xB7:
945 		wrmsr(IA_OFFCORE_RSP0, pm->pm_md.pm_iap.pm_iap_rsp);
946 		break;
947 	case 0xBB:
948 		wrmsr(IA_OFFCORE_RSP1, pm->pm_md.pm_iap.pm_iap_rsp);
949 		break;
950 	default:
951 		break;
952 	}
953 
954 	wrmsr(IAP_EVSEL0 + ri, evsel | IAP_EN);
955 
956 	if (core_cputype == PMC_CPU_INTEL_CORE)
957 		return (0);
958 
959 	do {
960 		cc->pc_resync = 0;
961 		cc->pc_globalctrl |= (1ULL << ri);
962 		wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl);
963 	} while (cc->pc_resync != 0);
964 
965 	return (0);
966 }
967 
968 static int
969 iap_stop_pmc(int cpu, int ri)
970 {
971 	struct pmc *pm;
972 	struct core_cpu *cc;
973 	uint64_t msr;
974 
975 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
976 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
977 	KASSERT(ri >= 0 && ri < core_iap_npmc,
978 	    ("[core,%d] illegal row index %d", __LINE__, ri));
979 
980 	cc = core_pcpu[cpu];
981 	pm = cc->pc_corepmcs[ri].phw_pmc;
982 
983 	KASSERT(pm,
984 	    ("[core,%d] cpu%d ri%d no configured PMC to stop", __LINE__,
985 		cpu, ri));
986 
987 	PMCDBG2(MDP,STO,1, "iap-stop cpu=%d ri=%d", cpu, ri);
988 
989 	msr = rdmsr(IAP_EVSEL0 + ri) & ~IAP_EVSEL_MASK;
990 	wrmsr(IAP_EVSEL0 + ri, msr);	/* stop hw */
991 
992 	if (core_cputype == PMC_CPU_INTEL_CORE)
993 		return (0);
994 
995 	msr = 0;
996 	do {
997 		cc->pc_resync = 0;
998 		cc->pc_globalctrl &= ~(1ULL << ri);
999 		msr = rdmsr(IA_GLOBAL_CTRL) & ~IA_GLOBAL_CTRL_MASK;
1000 		wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl);
1001 	} while (cc->pc_resync != 0);
1002 
1003 	return (0);
1004 }
1005 
1006 static int
1007 iap_write_pmc(int cpu, int ri, pmc_value_t v)
1008 {
1009 	struct pmc *pm;
1010 	struct core_cpu *cc;
1011 
1012 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
1013 	    ("[core,%d] illegal cpu value %d", __LINE__, cpu));
1014 	KASSERT(ri >= 0 && ri < core_iap_npmc,
1015 	    ("[core,%d] illegal row index %d", __LINE__, ri));
1016 
1017 	cc = core_pcpu[cpu];
1018 	pm = cc->pc_corepmcs[ri].phw_pmc;
1019 
1020 	KASSERT(pm,
1021 	    ("[core,%d] cpu%d ri%d no configured PMC to stop", __LINE__,
1022 		cpu, ri));
1023 
1024 	if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1025 		v = iap_reload_count_to_perfctr_value(v);
1026 
1027 	v &= (1ULL << core_iap_width) - 1;
1028 
1029 	PMCDBG4(MDP,WRI,1, "iap-write cpu=%d ri=%d msr=0x%x v=%jx", cpu, ri,
1030 	    IAP_PMC0 + ri, v);
1031 
1032 	/*
1033 	 * Write the new value to the counter (or it's alias).  The
1034 	 * counter will be in a stopped state when the pcd_write()
1035 	 * entry point is called.
1036 	 */
1037 	wrmsr(core_iap_wroffset + IAP_PMC0 + ri, v);
1038 	return (0);
1039 }
1040 
1041 
1042 static void
1043 iap_initialize(struct pmc_mdep *md, int maxcpu, int npmc, int pmcwidth,
1044     int flags)
1045 {
1046 	struct pmc_classdep *pcd;
1047 
1048 	KASSERT(md != NULL, ("[iap,%d] md is NULL", __LINE__));
1049 
1050 	PMCDBG0(MDP,INI,1, "iap-initialize");
1051 
1052 	/* Remember the set of architectural events supported. */
1053 	core_architectural_events = ~flags;
1054 
1055 	pcd = &md->pmd_classdep[PMC_MDEP_CLASS_INDEX_IAP];
1056 
1057 	pcd->pcd_caps	= IAP_PMC_CAPS;
1058 	pcd->pcd_class	= PMC_CLASS_IAP;
1059 	pcd->pcd_num	= npmc;
1060 	pcd->pcd_ri	= md->pmd_npmc;
1061 	pcd->pcd_width	= pmcwidth;
1062 
1063 	pcd->pcd_allocate_pmc	= iap_allocate_pmc;
1064 	pcd->pcd_config_pmc	= iap_config_pmc;
1065 	pcd->pcd_describe	= iap_describe;
1066 	pcd->pcd_get_config	= iap_get_config;
1067 	pcd->pcd_get_msr	= iap_get_msr;
1068 	pcd->pcd_pcpu_fini	= core_pcpu_fini;
1069 	pcd->pcd_pcpu_init	= core_pcpu_init;
1070 	pcd->pcd_read_pmc	= iap_read_pmc;
1071 	pcd->pcd_release_pmc	= iap_release_pmc;
1072 	pcd->pcd_start_pmc	= iap_start_pmc;
1073 	pcd->pcd_stop_pmc	= iap_stop_pmc;
1074 	pcd->pcd_write_pmc	= iap_write_pmc;
1075 
1076 	md->pmd_npmc	       += npmc;
1077 }
1078 
1079 static int
1080 core_intr(struct trapframe *tf)
1081 {
1082 	pmc_value_t v;
1083 	struct pmc *pm;
1084 	struct core_cpu *cc;
1085 	int error, found_interrupt, ri;
1086 	uint64_t msr;
1087 
1088 	PMCDBG3(MDP,INT, 1, "cpu=%d tf=0x%p um=%d", curcpu, (void *) tf,
1089 	    TRAPF_USERMODE(tf));
1090 
1091 	found_interrupt = 0;
1092 	cc = core_pcpu[curcpu];
1093 
1094 	for (ri = 0; ri < core_iap_npmc; ri++) {
1095 
1096 		if ((pm = cc->pc_corepmcs[ri].phw_pmc) == NULL ||
1097 		    !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1098 			continue;
1099 
1100 		if (!iap_pmc_has_overflowed(ri))
1101 			continue;
1102 
1103 		found_interrupt = 1;
1104 
1105 		if (pm->pm_state != PMC_STATE_RUNNING)
1106 			continue;
1107 
1108 		error = pmc_process_interrupt(PMC_HR, pm, tf);
1109 
1110 		v = pm->pm_sc.pm_reloadcount;
1111 		v = iap_reload_count_to_perfctr_value(v);
1112 
1113 		/*
1114 		 * Stop the counter, reload it but only restart it if
1115 		 * the PMC is not stalled.
1116 		 */
1117 		msr = rdmsr(IAP_EVSEL0 + ri) & ~IAP_EVSEL_MASK;
1118 		wrmsr(IAP_EVSEL0 + ri, msr);
1119 		wrmsr(core_iap_wroffset + IAP_PMC0 + ri, v);
1120 
1121 		if (error)
1122 			continue;
1123 
1124 		wrmsr(IAP_EVSEL0 + ri, msr | (pm->pm_md.pm_iap.pm_iap_evsel |
1125 					      IAP_EN));
1126 	}
1127 
1128 	if (found_interrupt)
1129 		lapic_reenable_pmc();
1130 
1131 	if (found_interrupt)
1132 		counter_u64_add(pmc_stats.pm_intr_processed, 1);
1133 	else
1134 		counter_u64_add(pmc_stats.pm_intr_ignored, 1);
1135 
1136 	return (found_interrupt);
1137 }
1138 
1139 static int
1140 core2_intr(struct trapframe *tf)
1141 {
1142 	int error, found_interrupt, n, cpu;
1143 	uint64_t flag, intrstatus, intrenable, msr;
1144 	struct pmc *pm;
1145 	struct core_cpu *cc;
1146 	pmc_value_t v;
1147 
1148 	cpu = curcpu;
1149 	PMCDBG3(MDP,INT, 1, "cpu=%d tf=0x%p um=%d", cpu, (void *) tf,
1150 	    TRAPF_USERMODE(tf));
1151 
1152 	/*
1153 	 * The IA_GLOBAL_STATUS (MSR 0x38E) register indicates which
1154 	 * PMCs have a pending PMI interrupt.  We take a 'snapshot' of
1155 	 * the current set of interrupting PMCs and process these
1156 	 * after stopping them.
1157 	 */
1158 	intrstatus = rdmsr(IA_GLOBAL_STATUS);
1159 	intrenable = intrstatus & core_pmcmask;
1160 
1161 	PMCDBG2(MDP,INT, 1, "cpu=%d intrstatus=%jx", cpu,
1162 	    (uintmax_t) intrstatus);
1163 
1164 	found_interrupt = 0;
1165 	cc = core_pcpu[cpu];
1166 
1167 	KASSERT(cc != NULL, ("[core,%d] null pcpu", __LINE__));
1168 
1169 	cc->pc_globalctrl &= ~intrenable;
1170 	cc->pc_resync = 1;	/* MSRs now potentially out of sync. */
1171 
1172 	/*
1173 	 * Stop PMCs and clear overflow status bits.
1174 	 */
1175 	msr = rdmsr(IA_GLOBAL_CTRL) & ~IA_GLOBAL_CTRL_MASK;
1176 	wrmsr(IA_GLOBAL_CTRL, msr);
1177 	wrmsr(IA_GLOBAL_OVF_CTRL, intrenable |
1178 	    IA_GLOBAL_STATUS_FLAG_OVFBUF |
1179 	    IA_GLOBAL_STATUS_FLAG_CONDCHG);
1180 
1181 	/*
1182 	 * Look for interrupts from fixed function PMCs.
1183 	 */
1184 	for (n = 0, flag = (1ULL << IAF_OFFSET); n < core_iaf_npmc;
1185 	     n++, flag <<= 1) {
1186 
1187 		if ((intrstatus & flag) == 0)
1188 			continue;
1189 
1190 		found_interrupt = 1;
1191 
1192 		pm = cc->pc_corepmcs[n + core_iaf_ri].phw_pmc;
1193 		if (pm == NULL || pm->pm_state != PMC_STATE_RUNNING ||
1194 		    !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1195 			continue;
1196 
1197 		error = pmc_process_interrupt(PMC_HR, pm, tf);
1198 
1199 		if (error)
1200 			intrenable &= ~flag;
1201 
1202 		v = iaf_reload_count_to_perfctr_value(pm->pm_sc.pm_reloadcount);
1203 
1204 		/* Reload sampling count. */
1205 		wrmsr(IAF_CTR0 + n, v);
1206 
1207 		PMCDBG4(MDP,INT, 1, "iaf-intr cpu=%d error=%d v=%jx(%jx)", curcpu,
1208 		    error, (uintmax_t) v, (uintmax_t) rdpmc(IAF_RI_TO_MSR(n)));
1209 	}
1210 
1211 	/*
1212 	 * Process interrupts from the programmable counters.
1213 	 */
1214 	for (n = 0, flag = 1; n < core_iap_npmc; n++, flag <<= 1) {
1215 		if ((intrstatus & flag) == 0)
1216 			continue;
1217 
1218 		found_interrupt = 1;
1219 
1220 		pm = cc->pc_corepmcs[n].phw_pmc;
1221 		if (pm == NULL || pm->pm_state != PMC_STATE_RUNNING ||
1222 		    !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1223 			continue;
1224 
1225 		error = pmc_process_interrupt(PMC_HR, pm, tf);
1226 		if (error)
1227 			intrenable &= ~flag;
1228 
1229 		v = iap_reload_count_to_perfctr_value(pm->pm_sc.pm_reloadcount);
1230 
1231 		PMCDBG3(MDP,INT, 1, "iap-intr cpu=%d error=%d v=%jx", cpu, error,
1232 		    (uintmax_t) v);
1233 
1234 		/* Reload sampling count. */
1235 		wrmsr(core_iap_wroffset + IAP_PMC0 + n, v);
1236 	}
1237 
1238 	/*
1239 	 * Reenable all non-stalled PMCs.
1240 	 */
1241 	PMCDBG2(MDP,INT, 1, "cpu=%d intrenable=%jx", cpu,
1242 	    (uintmax_t) intrenable);
1243 
1244 	cc->pc_globalctrl |= intrenable;
1245 
1246 	wrmsr(IA_GLOBAL_CTRL, cc->pc_globalctrl & IA_GLOBAL_CTRL_MASK);
1247 
1248 	PMCDBG5(MDP,INT, 1, "cpu=%d fixedctrl=%jx globalctrl=%jx status=%jx "
1249 	    "ovf=%jx", cpu, (uintmax_t) rdmsr(IAF_CTRL),
1250 	    (uintmax_t) rdmsr(IA_GLOBAL_CTRL),
1251 	    (uintmax_t) rdmsr(IA_GLOBAL_STATUS),
1252 	    (uintmax_t) rdmsr(IA_GLOBAL_OVF_CTRL));
1253 
1254 	if (found_interrupt)
1255 		lapic_reenable_pmc();
1256 
1257 	if (found_interrupt)
1258 		counter_u64_add(pmc_stats.pm_intr_processed, 1);
1259 	else
1260 		counter_u64_add(pmc_stats.pm_intr_ignored, 1);
1261 
1262 	return (found_interrupt);
1263 }
1264 
1265 int
1266 pmc_core_initialize(struct pmc_mdep *md, int maxcpu, int version_override)
1267 {
1268 	int cpuid[CORE_CPUID_REQUEST_SIZE];
1269 	int ipa_version, flags, nflags;
1270 
1271 	do_cpuid(CORE_CPUID_REQUEST, cpuid);
1272 
1273 	ipa_version = (version_override > 0) ? version_override :
1274 	    cpuid[CORE_CPUID_EAX] & 0xFF;
1275 	core_cputype = md->pmd_cputype;
1276 
1277 	PMCDBG3(MDP,INI,1,"core-init cputype=%d ncpu=%d ipa-version=%d",
1278 	    core_cputype, maxcpu, ipa_version);
1279 
1280 	if (ipa_version < 1 || ipa_version > 4 ||
1281 	    (core_cputype != PMC_CPU_INTEL_CORE && ipa_version == 1)) {
1282 		/* Unknown PMC architecture. */
1283 		printf("hwpc_core: unknown PMC architecture: %d\n",
1284 		    ipa_version);
1285 		return (EPROGMISMATCH);
1286 	}
1287 
1288 	core_iap_wroffset = 0;
1289 	if (cpu_feature2 & CPUID2_PDCM) {
1290 		if (rdmsr(IA32_PERF_CAPABILITIES) & PERFCAP_FW_WRITE) {
1291 			PMCDBG0(MDP, INI, 1,
1292 			    "core-init full-width write supported");
1293 			core_iap_wroffset = IAP_A_PMC0 - IAP_PMC0;
1294 		} else
1295 			PMCDBG0(MDP, INI, 1,
1296 			    "core-init full-width write NOT supported");
1297 	} else
1298 		PMCDBG0(MDP, INI, 1, "core-init pdcm not supported");
1299 
1300 	core_pmcmask = 0;
1301 
1302 	/*
1303 	 * Initialize programmable counters.
1304 	 */
1305 	core_iap_npmc = (cpuid[CORE_CPUID_EAX] >> 8) & 0xFF;
1306 	core_iap_width = (cpuid[CORE_CPUID_EAX] >> 16) & 0xFF;
1307 
1308 	core_pmcmask |= ((1ULL << core_iap_npmc) - 1);
1309 
1310 	nflags = (cpuid[CORE_CPUID_EAX] >> 24) & 0xFF;
1311 	flags = cpuid[CORE_CPUID_EBX] & ((1 << nflags) - 1);
1312 
1313 	iap_initialize(md, maxcpu, core_iap_npmc, core_iap_width, flags);
1314 
1315 	/*
1316 	 * Initialize fixed function counters, if present.
1317 	 */
1318 	if (core_cputype != PMC_CPU_INTEL_CORE) {
1319 		core_iaf_ri = core_iap_npmc;
1320 		core_iaf_npmc = cpuid[CORE_CPUID_EDX] & 0x1F;
1321 		core_iaf_width = (cpuid[CORE_CPUID_EDX] >> 5) & 0xFF;
1322 
1323 		iaf_initialize(md, maxcpu, core_iaf_npmc, core_iaf_width);
1324 		core_pmcmask |= ((1ULL << core_iaf_npmc) - 1) << IAF_OFFSET;
1325 	}
1326 
1327 	PMCDBG2(MDP,INI,1,"core-init pmcmask=0x%jx iafri=%d", core_pmcmask,
1328 	    core_iaf_ri);
1329 
1330 	core_pcpu = malloc(sizeof(*core_pcpu) * maxcpu, M_PMC,
1331 	    M_ZERO | M_WAITOK);
1332 
1333 	/*
1334 	 * Choose the appropriate interrupt handler.
1335 	 */
1336 	if (ipa_version == 1)
1337 		md->pmd_intr = core_intr;
1338 	else
1339 		md->pmd_intr = core2_intr;
1340 
1341 	md->pmd_pcpu_fini = NULL;
1342 	md->pmd_pcpu_init = NULL;
1343 
1344 	return (0);
1345 }
1346 
1347 void
1348 pmc_core_finalize(struct pmc_mdep *md)
1349 {
1350 	PMCDBG0(MDP,INI,1, "core-finalize");
1351 
1352 	free(core_pcpu, M_PMC);
1353 	core_pcpu = NULL;
1354 }
1355