xref: /illumos-gate/usr/src/uts/intel/pcbe/core_pcbe.c (revision 4764d912222e53f8386bae7bf491f5780fd102ec)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Performance Counter Back-End for Intel Family 6 Models 15 and 23
30  */
31 
32 #include <sys/cpuvar.h>
33 #include <sys/param.h>
34 #include <sys/cpc_impl.h>
35 #include <sys/cpc_pcbe.h>
36 #include <sys/modctl.h>
37 #include <sys/inttypes.h>
38 #include <sys/systm.h>
39 #include <sys/cmn_err.h>
40 #include <sys/x86_archext.h>
41 #include <sys/sdt.h>
42 #include <sys/archsystm.h>
43 #include <sys/privregs.h>
44 #include <sys/ddi.h>
45 #include <sys/sunddi.h>
46 #include <sys/cred.h>
47 #include <sys/policy.h>
48 
49 static int core_pcbe_init(void);
50 static uint_t core_pcbe_ncounters(void);
51 static const char *core_pcbe_impl_name(void);
52 static const char *core_pcbe_cpuref(void);
53 static char *core_pcbe_list_events(uint_t picnum);
54 static char *core_pcbe_list_attrs(void);
55 static uint64_t core_pcbe_event_coverage(char *event);
56 static uint64_t core_pcbe_overflow_bitmap(void);
57 static int core_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
58     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
59     void *token);
60 static void core_pcbe_program(void *token);
61 static void core_pcbe_allstop(void);
62 static void core_pcbe_sample(void *token);
63 static void core_pcbe_free(void *config);
64 
65 #define	FALSE	0
66 #define	TRUE	1
67 
68 /* Architectural Performance Counter versioning */
69 #define	APC_V1	1
70 #define	APC_V2	2
71 
72 /* Counter Type */
73 #define	CORE_GPC	0	/* General-Purpose Counter (GPC) */
74 #define	CORE_FFC	1	/* Fixed-Function Counter (FFC) */
75 
76 /* MSR Addresses */
77 #define	GPC_BASE_PMC		0x00c1	/* First GPC */
78 #define	GPC_BASE_PES		0x0186	/* First GPC Event Select register */
79 #define	FFC_BASE_PMC		0x0309	/* First FFC */
80 #define	PERF_FIXED_CTR_CTRL	0x038d	/* Used to enable/disable FFCs */
81 #define	PERF_GLOBAL_STATUS	0x038e	/* Overflow status register */
82 #define	PERF_GLOBAL_CTRL	0x038f	/* Used to enable/disable counting */
83 #define	PERF_GLOBAL_OVF_CTRL	0x0390	/* Used to clear overflow status */
84 
85 /*
86  * Processor Event Select register fields
87  */
88 #define	CORE_USR	(1ULL << 16)	/* Count while not in ring 0 */
89 #define	CORE_OS		(1ULL << 17)	/* Count while in ring 0 */
90 #define	CORE_EDGE	(1ULL << 18)	/* Enable edge detection */
91 #define	CORE_PC		(1ULL << 19)	/* Enable pin control */
92 #define	CORE_INT	(1ULL << 20)	/* Enable interrupt on overflow */
93 #define	CORE_EN		(1ULL << 22)	/* Enable counting */
94 #define	CORE_INV	(1ULL << 23)	/* Invert the CMASK */
95 
96 #define	CORE_UMASK_SHIFT	8
97 #define	CORE_UMASK_MASK		0xffu
98 #define	CORE_CMASK_SHIFT	24
99 #define	CORE_CMASK_MASK		0xffu
100 
101 /*
102  * Fixed-function counter attributes
103  */
104 #define	CORE_FFC_OS_EN	(1ULL << 0)	/* Count while not in ring 0 */
105 #define	CORE_FFC_USR_EN	(1ULL << 1)	/* Count while in ring 1 */
106 #define	CORE_FFC_PMI	(1ULL << 3)	/* Enable interrupt on overflow */
107 
108 /*
109  * Number of bits for specifying each FFC's attributes in the control register
110  */
111 #define	CORE_FFC_ATTR_SIZE	4
112 
113 /*
114  * CondChgd and OvfBuffer fields of global status and overflow control registers
115  */
116 #define	CONDCHGD	(1ULL << 63)
117 #define	OVFBUFFER	(1ULL << 62)
118 #define	MASK_CONDCHGD_OVFBUFFER	(CONDCHGD | OVFBUFFER)
119 
120 #define	ALL_STOPPED	0ULL
121 
122 #define	BITMASK_XBITS(x)	((1ull << (x)) - 1ull)
123 
124 /*
125  * Only the lower 32-bits can be written to in the general-purpose
126  * counters.  The higher bits are extended from bit 31; all ones if
127  * bit 31 is one and all zeros otherwise.
128  *
129  * The fixed-function counters do not have this restriction.
130  */
131 #define	BITS_EXTENDED_FROM_31	(BITMASK_XBITS(width_gpc) & ~BITMASK_XBITS(31))
132 
133 #define	WRMSR(msr, value)						\
134 	wrmsr((msr), (value));						\
135 	DTRACE_PROBE2(wrmsr, uint64_t, (msr), uint64_t, (value));
136 
137 #define	RDMSR(msr, value)						\
138 	(value) = rdmsr((msr));						\
139 	DTRACE_PROBE2(rdmsr, uint64_t, (msr), uint64_t, (value));
140 
141 typedef struct core_pcbe_config {
142 	uint64_t	core_rawpic;
143 	uint64_t	core_ctl;	/* Event Select bits */
144 	uint64_t	core_pmc;	/* Counter register address */
145 	uint64_t	core_pes;	/* Event Select register address */
146 	uint_t		core_picno;
147 	uint8_t		core_pictype;	/* CORE_GPC or CORE_FFC */
148 } core_pcbe_config_t;
149 
150 pcbe_ops_t core_pcbe_ops = {
151 	PCBE_VER_1,			/* pcbe_ver */
152 	CPC_CAP_OVERFLOW_INTERRUPT | CPC_CAP_OVERFLOW_PRECISE,	/* pcbe_caps */
153 	core_pcbe_ncounters,		/* pcbe_ncounters */
154 	core_pcbe_impl_name,		/* pcbe_impl_name */
155 	core_pcbe_cpuref,		/* pcbe_cpuref */
156 	core_pcbe_list_events,		/* pcbe_list_events */
157 	core_pcbe_list_attrs,		/* pcbe_list_attrs */
158 	core_pcbe_event_coverage,	/* pcbe_event_coverage */
159 	core_pcbe_overflow_bitmap,	/* pcbe_overflow_bitmap */
160 	core_pcbe_configure,		/* pcbe_configure */
161 	core_pcbe_program,		/* pcbe_program */
162 	core_pcbe_allstop,		/* pcbe_allstop */
163 	core_pcbe_sample,		/* pcbe_sample */
164 	core_pcbe_free			/* pcbe_free */
165 };
166 
167 struct nametable {
168 	const char	*name;
169 	uint64_t	restricted_bits;
170 	uint8_t		event_num;
171 };
172 
173 #define	NT_END	0xFF
174 
175 /*
176  * Counting an event for all cores or all bus agents requires cpc_cpu privileges
177  */
178 #define	ALL_CORES	(1ULL << 15)
179 #define	ALL_AGENTS	(1ULL << 13)
180 
181 static const struct nametable common_gpc_events[] = {
182 	/* Alphabetical order of event name */
183 
184 	{ "baclears",			0x0,	0xe6 },
185 	{ "bogus_br",			0x0,	0xe4 },
186 	{ "br_bac_missp_exec",		0x0,	0x8a },
187 
188 	{ "br_call_exec",		0x0,	0x92 },
189 	{ "br_call_missp_exec",		0x0,	0x93 },
190 	{ "br_cnd_exec",		0x0,	0x8b },
191 
192 	{ "br_cnd_missp_exec",		0x0,	0x8c },
193 	{ "br_ind_call_exec",		0x0,	0x94 },
194 	{ "br_ind_exec",		0x0,	0x8d },
195 
196 	{ "br_ind_missp_exec",		0x0,	0x8e },
197 	{ "br_inst_decoded",		0x0,	0xe0 },
198 	{ "br_inst_exec",		0x0,	0x88 },
199 
200 	{ "br_inst_retired",		0x0,	0xc4 },
201 	{ "br_inst_retired_mispred",	0x0,	0xc5 },
202 	{ "br_missp_exec",		0x0,	0x89 },
203 
204 	{ "br_ret_bac_missp_exec",	0x0,	0x91 },
205 	{ "br_ret_exec",		0x0,	0x8f },
206 	{ "br_ret_missp_exec",		0x0,	0x90 },
207 
208 	{ "br_tkn_bubble_1",		0x0,	0x97 },
209 	{ "br_tkn_bubble_2",		0x0,	0x98 },
210 	{ "bus_bnr_drv",		ALL_AGENTS,	0x61 },
211 
212 	{ "bus_data_rcv",		ALL_CORES,	0x64 },
213 	{ "bus_drdy_clocks",		ALL_AGENTS,	0x62 },
214 	{ "bus_hit_drv",		ALL_AGENTS,	0x7a },
215 
216 	{ "bus_hitm_drv",		ALL_AGENTS,	0x7b },
217 	{ "bus_io_wait",		ALL_CORES,	0x7f },
218 	{ "bus_lock_clocks",		ALL_CORES | ALL_AGENTS,	0x63 },
219 
220 	{ "bus_request_outstanding",	ALL_CORES | ALL_AGENTS,	0x60 },
221 	{ "bus_trans_any",		ALL_CORES | ALL_AGENTS,	0x70 },
222 	{ "bus_trans_brd",		ALL_CORES | ALL_AGENTS,	0x65 },
223 
224 	{ "bus_trans_burst",		ALL_CORES | ALL_AGENTS,	0x6e },
225 	{ "bus_trans_def",		ALL_CORES | ALL_AGENTS,	0x6d },
226 	{ "bus_trans_ifetch",		ALL_CORES | ALL_AGENTS,	0x68 },
227 
228 	{ "bus_trans_inval",		ALL_CORES | ALL_AGENTS,	0x69 },
229 	{ "bus_trans_io",		ALL_CORES | ALL_AGENTS,	0x6c },
230 	{ "bus_trans_mem",		ALL_CORES | ALL_AGENTS,	0x6f },
231 
232 	{ "bus_trans_p",		ALL_CORES | ALL_AGENTS,	0x6b },
233 	{ "bus_trans_pwr",		ALL_CORES | ALL_AGENTS,	0x6a },
234 	{ "bus_trans_rfo",		ALL_CORES | ALL_AGENTS,	0x66 },
235 
236 	{ "bus_trans_wb",		ALL_CORES | ALL_AGENTS,	0x67 },
237 	{ "busq_empty",			ALL_CORES,	0x7d },
238 	{ "cmp_snoop",			ALL_CORES,	0x78 },
239 
240 	{ "cpu_clk_unhalted",		0x0,	0x3c },
241 	{ "cycles_int",			0x0,	0xc6 },
242 	{ "cycles_l1i_mem_stalled",	0x0,	0x86 },
243 
244 	{ "dtlb_misses",		0x0,	0x08 },
245 	{ "eist_trans",			0x0,	0x3a },
246 	{ "esp",			0x0,	0xab },
247 
248 	{ "ext_snoop",			ALL_AGENTS,	0x77 },
249 	{ "fp_mmx_trans",		0x0,	0xcc },
250 	{ "hw_int_rcv",			0x0,	0xc8 },
251 
252 	{ "ild_stall",			0x0,	0x87 },
253 	{ "inst_queue",			0x0,	0x83 },
254 	{ "inst_retired",		0x0,	0xc0 },
255 
256 	{ "itlb",			0x0,	0x82 },
257 	{ "itlb_miss_retired",		0x0,	0xc9 },
258 	{ "l1d_all_ref",		0x0,	0x43 },
259 
260 	{ "l1d_cache_ld",		0x0,	0x40 },
261 	{ "l1d_cache_lock",		0x0,	0x42 },
262 	{ "l1d_cache_st",		0x0,	0x41 },
263 
264 	{ "l1d_m_evict",		0x0,	0x47 },
265 	{ "l1d_m_repl",			0x0,	0x46 },
266 	{ "l1d_pend_miss",		0x0,	0x48 },
267 
268 	{ "l1d_prefetch",		0x0,	0x4e },
269 	{ "l1d_repl",			0x0,	0x45 },
270 	{ "l1d_split",			0x0,	0x49 },
271 
272 	{ "l1i_misses",			0x0,	0x81 },
273 	{ "l1i_reads",			0x0,	0x80 },
274 	{ "l2_ads",			ALL_CORES,	0x21 },
275 
276 	{ "l2_dbus_busy_rd",		ALL_CORES,	0x23 },
277 	{ "l2_ifetch",			ALL_CORES,	0x28 },
278 	{ "l2_ld",			ALL_CORES,	0x29 },
279 
280 	{ "l2_lines_in",		ALL_CORES,	0x24 },
281 	{ "l2_lines_out",		ALL_CORES,	0x26 },
282 	{ "l2_lock",			ALL_CORES,	0x2b },
283 
284 	{ "l2_m_lines_in",		ALL_CORES,	0x25 },
285 	{ "l2_m_lines_out",		ALL_CORES,	0x27 },
286 	{ "l2_no_req",			ALL_CORES,	0x32 },
287 
288 	{ "l2_reject_busq",		ALL_CORES,	0x30 },
289 	{ "l2_rqsts",			ALL_CORES,	0x2e },
290 	{ "l2_st",			ALL_CORES,	0x2a },
291 
292 	{ "load_block",			0x0,	0x03 },
293 	{ "load_hit_pre",		0x0,	0x4c },
294 	{ "machine_nukes",		0x0,	0xc3 },
295 
296 	{ "macro_insts",		0x0,	0xaa },
297 	{ "memory_disambiguation",	0x0,	0x09 },
298 	{ "page_walks",			0x0,	0x0c },
299 
300 	{ "pref_rqsts_dn",		0x0,	0xf8 },
301 	{ "pref_rqsts_up",		0x0,	0xf0 },
302 	{ "rat_stalls",			0x0,	0xd2 },
303 
304 	{ "resource_stalls",		0x0,	0xdc },
305 	{ "rs_uops_dispatched",		0x0,	0xa0 },
306 	{ "seg_reg_renames",		0x0,	0xd5 },
307 
308 	{ "seg_rename_stalls",		0x0,	0xd4 },
309 	{ "segment_reg_loads",		0x0,	0x06 },
310 	{ "simd_assist",		0x0,	0xcd },
311 
312 	{ "simd_comp_inst_retired",	0x0,	0xca },
313 	{ "simd_inst_retired",		0x0,	0xc7 },
314 	{ "simd_instr_retired",		0x0,	0xce },
315 
316 	{ "simd_sat_instr_retired",	0x0,	0xcf },
317 	{ "simd_sat_uop_exec",		0x0,	0xb1 },
318 	{ "simd_uop_type_exec",		0x0,	0xb3 },
319 
320 	{ "simd_uops_exec",		0x0,	0xb0 },
321 	{ "snoop_stall_drv",		ALL_CORES | ALL_AGENTS,	0x7e },
322 	{ "sse_pre_exec",		0x0,	0x07 },
323 
324 	{ "sse_pre_miss",		0x0,	0x4b },
325 	{ "store_block",		0x0,	0x04 },
326 	{ "thermal_trip",		0x0,	0x3b },
327 
328 	{ "uops_retired",		0x0,	0xc2 },
329 	{ "x87_ops_retired",		0x0,	0xc1 },
330 	{ "",				0x0,	NT_END }
331 };
332 
333 /*
334  * If any of the pic specific events require privileges, make sure to add a
335  * check in configure_gpc() to find whether an event hard-coded as a number by
336  * the user has any privilege requirements
337  */
338 static const struct nametable pic0_events[] = {
339 	/* Alphabetical order of event name */
340 
341 	{ "cycles_div_busy",		0x0,	0x14 },
342 	{ "fp_comp_ops_exe",		0x0,	0x10 },
343 	{ "idle_during_div",		0x0,	0x18 },
344 
345 	{ "mem_load_retired",		0x0,	0xcb },
346 	{ "rs_uops_dispatched_port",	0x0,	0xa1 },
347 	{ "",				0x0,	NT_END }
348 };
349 
350 static const struct nametable pic1_events[] = {
351 	/* Alphabetical order of event name */
352 
353 	{ "delayed_bypass",	0x0,	0x19 },
354 	{ "div",		0x0,	0x13 },
355 	{ "fp_assist",		0x0,	0x11 },
356 
357 	{ "mul",		0x0,	0x12 },
358 	{ "",			0x0,	NT_END }
359 };
360 
361 static char **gpc_names;
362 
363 char *ffc_names[] = {
364 	"instr_retired.any",
365 	"cpu_clk_unhalted.core",
366 	"cpu_clk_unhalted.ref",
367 	NULL
368 };
369 
370 static uint64_t	num_gpc;
371 static uint64_t	width_gpc;
372 static uint64_t	mask_gpc;
373 static uint64_t	num_ffc;
374 static uint64_t	width_ffc;
375 static uint64_t	mask_ffc;
376 static uint_t	total_pmc;
377 static uint64_t	control_ffc;
378 static uint64_t	control_gpc;
379 static uint64_t	control_mask;
380 
381 static const char *core_impl_name = "Core Microarchitecture";
382 
383 static const char *core_cpuref =
384 	"See Appendix A of the \"Intel 64 and IA-32 Architectures Software" \
385 	" Developer's Manual Volume 3B: System Programming Guide, Part 2\"" \
386 	" Order Number: 253669-026US, Februrary 2008";
387 
388 static int
389 core_pcbe_init(void)
390 {
391 	struct cpuid_regs	cp;
392 	uint32_t		versionid;
393 	const struct nametable	*n;
394 	size_t			size;
395 	size_t			common_size;
396 	uint64_t		i;
397 	const struct nametable	*picspecific_events;
398 
399 	if ((cpuid_getvendor(CPU) != X86_VENDOR_Intel) ||
400 	    (cpuid_getfamily(CPU) != 6) ||
401 	    (cpuid_getmodel(CPU) != 15 && cpuid_getmodel(CPU) != 23))
402 		return (-1);
403 
404 	/* Obtain the Architectural Performance Monitoring Leaf */
405 	cp.cp_eax = 0xa;
406 	(void) __cpuid_insn(&cp);
407 
408 	versionid = cp.cp_eax & 0xFF;
409 
410 	/*
411 	 * All Family 6 Model 15 and Model 23 processors have fixed-function
412 	 * counters.  These counters were made Architectural with
413 	 * Family 6 Model 9 Stepping 9.
414 	 */
415 	switch (versionid) {
416 
417 		case 0:
418 			return (-1);
419 
420 		case APC_V2:
421 			num_ffc = cp.cp_edx & 0x1F;
422 			width_ffc = (cp.cp_edx >> 5) & 0xFF;
423 
424 			if (num_ffc == 0) {
425 				/*
426 				 * Some processors have an errata (AW34) where
427 				 * versionid is reported as 2 when actually 1.
428 				 * In this case, fixed-function counters are
429 				 * model-specific as in Version 1.
430 				 */
431 				num_ffc = 3;
432 				width_ffc = 40;
433 				versionid = APC_V1;
434 			}
435 			break;
436 
437 		default:
438 			/*
439 			 * For higher versions currently unsupported,
440 			 * default to Version 1
441 			 */
442 			num_ffc = 3;
443 			width_ffc = 40;
444 			break;
445 	}
446 
447 	if (num_ffc >= 64)
448 		return (-1);
449 
450 	if (num_ffc >= sizeof (ffc_names) / sizeof (char *)) {
451 		/*
452 		 * The system seems to have more fixed-function counters than
453 		 * what this PCBE is able to handle correctly.  Default to the
454 		 * maximum number of fixed-function counters that this driver
455 		 * is aware of.
456 		 */
457 		num_ffc = sizeof (ffc_names) / sizeof (char *) - 1;
458 	}
459 
460 	mask_ffc = BITMASK_XBITS(width_ffc);
461 
462 	num_gpc = (cp.cp_eax >> 8) & 0xFF;
463 	width_gpc = (cp.cp_eax >> 16) & 0xFF;
464 
465 	if (num_gpc >= 64)
466 		return (-1);
467 
468 	mask_gpc = BITMASK_XBITS(width_gpc);
469 
470 	total_pmc = num_gpc + num_ffc;
471 
472 	control_gpc = BITMASK_XBITS(num_gpc);
473 	control_ffc = BITMASK_XBITS(num_ffc);
474 
475 	control_mask = (control_ffc << 32) | control_gpc;
476 
477 	if (total_pmc > 64) {
478 		/* Too wide for the overflow bitmap */
479 		return (-1);
480 	}
481 
482 	/* General-purpose Counters (GPC) */
483 	gpc_names = NULL;
484 
485 	if (num_gpc > 0) {
486 		gpc_names = kmem_alloc(num_gpc * sizeof (char *), KM_SLEEP);
487 
488 		/* Calculate space needed to save all the common event names */
489 		common_size = 0;
490 		for (n = common_gpc_events; n->event_num != NT_END; n++) {
491 			common_size += strlen(n->name) + 1;
492 		}
493 
494 		for (i = 0; i < num_gpc; i++) {
495 			size = 0;
496 			switch (i) {
497 				case 0:
498 					picspecific_events = pic0_events;
499 					break;
500 				case 1:
501 					picspecific_events = pic1_events;
502 					break;
503 				default:
504 					picspecific_events = NULL;
505 					break;
506 			}
507 			if (picspecific_events != NULL) {
508 				for (n = picspecific_events;
509 				    n->event_num != NT_END;
510 				    n++) {
511 					size += strlen(n->name) + 1;
512 				}
513 			}
514 
515 			gpc_names[i] =
516 			    kmem_alloc(size + common_size + 1, KM_SLEEP);
517 
518 			gpc_names[i][0] = '\0';
519 			if (picspecific_events != NULL) {
520 				for (n = picspecific_events;
521 				    n->event_num != NT_END;
522 				    n++) {
523 					(void) strcat(gpc_names[i], n->name);
524 					(void) strcat(gpc_names[i], ",");
525 				}
526 			}
527 			for (n = common_gpc_events; n->event_num != NT_END;
528 			    n++) {
529 				(void) strcat(gpc_names[i], n->name);
530 				(void) strcat(gpc_names[i], ",");
531 			}
532 			/*
533 			 * Remove trailing comma.
534 			 */
535 			gpc_names[i][common_size + size - 1] = '\0';
536 		}
537 	}
538 
539 	/*
540 	 * Fixed-function Counters (FFC) are already listed individually in
541 	 * ffc_names[]
542 	 */
543 	return (0);
544 }
545 
546 static uint_t core_pcbe_ncounters()
547 {
548 	return (total_pmc);
549 }
550 
551 static const char *core_pcbe_impl_name(void)
552 {
553 	return (core_impl_name);
554 }
555 
556 static const char *core_pcbe_cpuref(void)
557 {
558 	return (core_cpuref);
559 }
560 
561 static char *core_pcbe_list_events(uint_t picnum)
562 {
563 	ASSERT(picnum < cpc_ncounters);
564 
565 	if (picnum < num_gpc) {
566 		return (gpc_names[picnum]);
567 	} else {
568 		return (ffc_names[picnum - num_gpc]);
569 	}
570 }
571 
572 static char *core_pcbe_list_attrs(void)
573 {
574 	return ("edge,pc,inv,umask,cmask");
575 }
576 
577 static const struct nametable *
578 find_gpcevent(char *name, const struct nametable *nametable)
579 {
580 	const struct nametable *n;
581 	int compare_result;
582 
583 	compare_result = -1;
584 	for (n = nametable; n->event_num != NT_END; n++) {
585 		compare_result = strcmp(name, n->name);
586 		if (compare_result <= 0) {
587 			break;
588 		}
589 	}
590 
591 	if (compare_result == 0) {
592 		return (n);
593 	}
594 
595 	return (NULL);
596 }
597 
598 static uint64_t
599 core_pcbe_event_coverage(char *event)
600 {
601 	uint64_t bitmap;
602 	uint64_t bitmask;
603 	int i;
604 
605 	bitmap = 0;
606 
607 	/* Is it an event that a GPC can track? */
608 	if (find_gpcevent(event, common_gpc_events) != NULL) {
609 		bitmap |= BITMASK_XBITS(num_gpc);
610 	} else if (find_gpcevent(event, pic0_events) != NULL) {
611 		bitmap |= 1ULL;
612 	} else if (find_gpcevent(event, pic1_events) != NULL) {
613 		bitmap |= 1ULL << 1;
614 	}
615 
616 	/* Check if the event can be counted in the fixed-function counters */
617 	if (num_ffc > 0) {
618 		bitmask = 1ULL << num_gpc;
619 		for (i = 0; i < num_ffc; i++) {
620 			if (strcmp(event, ffc_names[i]) == 0) {
621 				bitmap |= bitmask;
622 			}
623 			bitmask = bitmask << 1;
624 		}
625 	}
626 
627 	return (bitmap);
628 }
629 
630 static uint64_t
631 core_pcbe_overflow_bitmap(void)
632 {
633 	uint64_t interrupt_status;
634 	uint64_t intrbits_ffc;
635 	uint64_t intrbits_gpc;
636 	extern int kcpc_hw_overflow_intr_installed;
637 	uint64_t overflow_bitmap;
638 
639 	RDMSR(PERF_GLOBAL_STATUS, interrupt_status);
640 	WRMSR(PERF_GLOBAL_OVF_CTRL, interrupt_status);
641 
642 	interrupt_status = interrupt_status & control_mask;
643 	intrbits_ffc = (interrupt_status >> 32) & control_ffc;
644 	intrbits_gpc = interrupt_status & control_gpc;
645 	overflow_bitmap = (intrbits_ffc << num_gpc) | intrbits_gpc;
646 
647 	ASSERT(kcpc_hw_overflow_intr_installed);
648 	(*kcpc_hw_enable_cpc_intr)();
649 
650 	return (overflow_bitmap);
651 }
652 
653 static int
654 check_cpc_securitypolicy(core_pcbe_config_t *conf, const struct nametable *n)
655 {
656 	if (conf->core_ctl & n->restricted_bits) {
657 		if (secpolicy_cpc_cpu(crgetcred()) != 0) {
658 			return (CPC_ATTR_REQUIRES_PRIVILEGE);
659 		}
660 	}
661 	return (0);
662 }
663 
664 static int
665 configure_gpc(uint_t picnum, char *event, uint64_t preset, uint32_t flags,
666     uint_t nattrs, kcpc_attr_t *attrs, void **data)
667 {
668 	core_pcbe_config_t	conf;
669 	const struct nametable	*n;
670 	const struct nametable	*m;
671 	const struct nametable	*picspecific_events;
672 	struct nametable	nt_raw = { "", 0x0, 0x0 };
673 	uint_t			i;
674 	long			event_num;
675 
676 	if (((preset & BITS_EXTENDED_FROM_31) != 0) &&
677 	    ((preset & BITS_EXTENDED_FROM_31) !=
678 	    BITS_EXTENDED_FROM_31)) {
679 
680 		/*
681 		 * Bits beyond bit-31 in the general-purpose counters can only
682 		 * be written to by extension of bit 31.  We cannot preset
683 		 * these bits to any value other than all 1s or all 0s.
684 		 */
685 		return (CPC_ATTRIBUTE_OUT_OF_RANGE);
686 	}
687 
688 	n = find_gpcevent(event, common_gpc_events);
689 	if (n == NULL) {
690 		switch (picnum) {
691 			case 0:
692 				picspecific_events = pic0_events;
693 				break;
694 			case 1:
695 				picspecific_events = pic1_events;
696 				break;
697 			default:
698 				picspecific_events = NULL;
699 				break;
700 		}
701 		if (picspecific_events != NULL) {
702 			n = find_gpcevent(event, picspecific_events);
703 			if (n == NULL) {
704 				/*
705 				 * Check if this is a case where the event was
706 				 * specified directly by its event number
707 				 * instead of its name string.
708 				 */
709 				if (ddi_strtol(event, NULL, 0, &event_num) !=
710 				    0) {
711 					return (CPC_INVALID_EVENT);
712 				}
713 
714 				event_num = event_num & 0xFF;
715 
716 				/*
717 				 * Search the event table to find out if the
718 				 * event specified has an privilege
719 				 * requirements.  Currently none of the
720 				 * pic-specific counters have any privilege
721 				 * requirements.  Hence only the
722 				 * common_gpc_events table is searched.
723 				 */
724 				for (m = common_gpc_events;
725 				    m->event_num != NT_END;
726 				    m++) {
727 					if (event_num == m->event_num) {
728 						break;
729 					}
730 				}
731 				if (m->event_num == NT_END) {
732 					nt_raw.event_num = (uint8_t)event_num;
733 					n = &nt_raw;
734 				} else {
735 					n = m;
736 				}
737 			}
738 		}
739 	}
740 
741 	conf.core_picno = picnum;
742 	conf.core_pictype = CORE_GPC;
743 	conf.core_rawpic = preset & mask_gpc;
744 
745 	conf.core_pes = GPC_BASE_PES + picnum;
746 	conf.core_pmc = GPC_BASE_PMC + picnum;
747 
748 	conf.core_ctl = n->event_num; /* Event Select */
749 	for (i = 0; i < nattrs; i++) {
750 		if (strncmp(attrs[i].ka_name, "umask", 6) == 0) {
751 			if ((attrs[i].ka_val | CORE_UMASK_MASK) !=
752 			    CORE_UMASK_MASK) {
753 				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
754 			}
755 			conf.core_ctl |= attrs[i].ka_val <<
756 			    CORE_UMASK_SHIFT;
757 		} else if (strncmp(attrs[i].ka_name, "edge", 6) == 0) {
758 			if (attrs[i].ka_val != 0)
759 				conf.core_ctl |= CORE_EDGE;
760 		} else if (strncmp(attrs[i].ka_name, "pc", 3) == 0) {
761 			if (attrs[i].ka_val != 0)
762 				conf.core_ctl |= CORE_PC;
763 		} else if (strncmp(attrs[i].ka_name, "inv", 4) == 0) {
764 			if (attrs[i].ka_val != 0)
765 				conf.core_ctl |= CORE_INV;
766 		} else if (strncmp(attrs[i].ka_name, "cmask", 6) == 0) {
767 			if ((attrs[i].ka_val | CORE_CMASK_MASK) !=
768 			    CORE_CMASK_MASK) {
769 				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
770 			}
771 			conf.core_ctl |= attrs[i].ka_val << CORE_CMASK_SHIFT;
772 		} else {
773 			return (CPC_INVALID_ATTRIBUTE);
774 		}
775 	}
776 
777 	if (flags & CPC_COUNT_USER)
778 		conf.core_ctl |= CORE_USR;
779 	if (flags & CPC_COUNT_SYSTEM)
780 		conf.core_ctl |= CORE_OS;
781 	if (flags & CPC_OVF_NOTIFY_EMT)
782 		conf.core_ctl |= CORE_INT;
783 	conf.core_ctl |= CORE_EN;
784 
785 	if (check_cpc_securitypolicy(&conf, n) != 0) {
786 		return (CPC_ATTR_REQUIRES_PRIVILEGE);
787 	}
788 
789 	*data = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP);
790 	*((core_pcbe_config_t *)*data) = conf;
791 
792 	return (0);
793 }
794 
795 static int
796 configure_ffc(uint_t picnum, char *event, uint64_t preset, uint32_t flags,
797     uint_t nattrs, void **data)
798 {
799 	core_pcbe_config_t	*conf;
800 
801 	if (picnum - num_gpc >= num_ffc) {
802 		return (CPC_INVALID_PICNUM);
803 	}
804 	if (strcmp(ffc_names[picnum-num_gpc], event) != 0) {
805 		return (CPC_INVALID_EVENT);
806 	}
807 
808 	if (nattrs != 0) {
809 		return (CPC_INVALID_ATTRIBUTE);
810 	}
811 
812 	conf = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP);
813 
814 	conf->core_picno = picnum;
815 	conf->core_pictype = CORE_FFC;
816 	conf->core_rawpic = preset & mask_ffc;
817 	conf->core_pmc = FFC_BASE_PMC + (picnum - num_gpc);
818 
819 	/* All fixed-function counters have the same control register */
820 	conf->core_pes = PERF_FIXED_CTR_CTRL;
821 
822 	conf->core_ctl = 0;
823 	if (flags & CPC_COUNT_USER)
824 		conf->core_ctl |= CORE_FFC_USR_EN;
825 	if (flags & CPC_COUNT_SYSTEM)
826 		conf->core_ctl |= CORE_FFC_OS_EN;
827 	if (flags & CPC_OVF_NOTIFY_EMT)
828 		conf->core_ctl |= CORE_FFC_PMI;
829 
830 	*data = conf;
831 	return (0);
832 }
833 
834 /*ARGSUSED*/
835 static int
836 core_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
837     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
838     void *token)
839 {
840 	int			ret;
841 	core_pcbe_config_t	*conf;
842 
843 	/*
844 	 * If we've been handed an existing configuration, we need only preset
845 	 * the counter value.
846 	 */
847 	if (*data != NULL) {
848 		conf = *data;
849 		ASSERT(conf->core_pictype == CORE_GPC ||
850 		    conf->core_pictype == CORE_FFC);
851 		if (conf->core_pictype == CORE_GPC)
852 			conf->core_rawpic = preset & mask_gpc;
853 		else /* CORE_FFC */
854 			conf->core_rawpic = preset & mask_ffc;
855 		return (0);
856 	}
857 
858 	if (picnum >= total_pmc) {
859 		return (CPC_INVALID_PICNUM);
860 	}
861 
862 	if (picnum < num_gpc) {
863 		ret = configure_gpc(picnum, event, preset, flags,
864 		    nattrs, attrs, data);
865 	} else {
866 		ret = configure_ffc(picnum, event, preset, flags,
867 		    nattrs, data);
868 	}
869 	return (ret);
870 }
871 
872 static void
873 core_pcbe_program(void *token)
874 {
875 	core_pcbe_config_t	*cfg;
876 	uint64_t		perf_global_ctrl;
877 	uint64_t		perf_fixed_ctr_ctrl;
878 	uint64_t		curcr4;
879 
880 	core_pcbe_allstop();
881 
882 	curcr4 = getcr4();
883 	if (kcpc_allow_nonpriv(token))
884 		/* Allow RDPMC at any ring level */
885 		setcr4(curcr4 | CR4_PCE);
886 	else
887 		/* Allow RDPMC only at ring 0 */
888 		setcr4(curcr4 & ~CR4_PCE);
889 
890 	/* Clear any overflow indicators before programming the counters */
891 	WRMSR(PERF_GLOBAL_OVF_CTRL, MASK_CONDCHGD_OVFBUFFER | control_mask);
892 
893 	cfg = NULL;
894 	perf_global_ctrl = 0;
895 	perf_fixed_ctr_ctrl = 0;
896 	cfg = (core_pcbe_config_t *)kcpc_next_config(token, cfg, NULL);
897 	while (cfg != NULL) {
898 		ASSERT(cfg->core_pictype == CORE_GPC ||
899 		    cfg->core_pictype == CORE_FFC);
900 
901 		if (cfg->core_pictype == CORE_GPC) {
902 			/*
903 			 * General-purpose counter registers have write
904 			 * restrictions where only the lower 32-bits can be
905 			 * written to.  The rest of the relevant bits are
906 			 * written to by extension from bit 31 (all ZEROS if
907 			 * bit-31 is ZERO and all ONE if bit-31 is ONE).  This
908 			 * makes it possible to write to the counter register
909 			 * only values that have all ONEs or all ZEROs in the
910 			 * higher bits.
911 			 */
912 			if (((cfg->core_rawpic & BITS_EXTENDED_FROM_31) == 0) ||
913 			    ((cfg->core_rawpic & BITS_EXTENDED_FROM_31) ==
914 			    BITS_EXTENDED_FROM_31)) {
915 				/*
916 				 * Straighforward case where the higher bits
917 				 * are all ZEROs or all ONEs.
918 				 */
919 				WRMSR(cfg->core_pmc,
920 				    (cfg->core_rawpic & mask_gpc));
921 			} else {
922 				/*
923 				 * The high order bits are not all the same.
924 				 * We save what is currently in the registers
925 				 * and do not write to it.  When we want to do
926 				 * a read from this register later (in
927 				 * core_pcbe_sample()), we subtract the value
928 				 * we save here to get the actual event count.
929 				 *
930 				 * NOTE: As a result, we will not get overflow
931 				 * interrupts as expected.
932 				 */
933 				RDMSR(cfg->core_pmc, cfg->core_rawpic);
934 				cfg->core_rawpic = cfg->core_rawpic & mask_gpc;
935 			}
936 			WRMSR(cfg->core_pes, cfg->core_ctl);
937 			perf_global_ctrl |= 1ull << cfg->core_picno;
938 		} else {
939 			/*
940 			 * Unlike the general-purpose counters, all relevant
941 			 * bits of fixed-function counters can be written to.
942 			 */
943 			WRMSR(cfg->core_pmc, cfg->core_rawpic & mask_ffc);
944 
945 			/*
946 			 * Collect the control bits for all the
947 			 * fixed-function counters and write it at one shot
948 			 * later in this function
949 			 */
950 			perf_fixed_ctr_ctrl |= cfg->core_ctl <<
951 			    ((cfg->core_picno - num_gpc) * CORE_FFC_ATTR_SIZE);
952 			perf_global_ctrl |=
953 			    1ull << (cfg->core_picno - num_gpc + 32);
954 		}
955 
956 		cfg = (core_pcbe_config_t *)
957 		    kcpc_next_config(token, cfg, NULL);
958 	}
959 
960 	/* Enable all the counters */
961 	WRMSR(PERF_FIXED_CTR_CTRL, perf_fixed_ctr_ctrl);
962 	WRMSR(PERF_GLOBAL_CTRL, perf_global_ctrl);
963 }
964 
965 static void
966 core_pcbe_allstop(void)
967 {
968 	/* Disable all the counters together */
969 	WRMSR(PERF_GLOBAL_CTRL, ALL_STOPPED);
970 
971 	setcr4(getcr4() & ~CR4_PCE);
972 }
973 
974 static void
975 core_pcbe_sample(void *token)
976 {
977 	uint64_t		*daddr;
978 	uint64_t		curpic;
979 	core_pcbe_config_t	*cfg;
980 	uint64_t			counter_mask;
981 
982 	cfg = (core_pcbe_config_t *)kcpc_next_config(token, NULL, &daddr);
983 	while (cfg != NULL) {
984 		ASSERT(cfg->core_pictype == CORE_GPC ||
985 		    cfg->core_pictype == CORE_FFC);
986 
987 		curpic = rdmsr(cfg->core_pmc);
988 
989 		DTRACE_PROBE4(core__pcbe__sample,
990 		    uint64_t, cfg->core_pmc,
991 		    uint64_t, curpic,
992 		    uint64_t, cfg->core_rawpic,
993 		    uint64_t, *daddr);
994 
995 		if (cfg->core_pictype == CORE_GPC) {
996 			counter_mask = mask_gpc;
997 		} else {
998 			counter_mask = mask_ffc;
999 		}
1000 		curpic = curpic & counter_mask;
1001 		if (curpic >= cfg->core_rawpic) {
1002 			*daddr += curpic - cfg->core_rawpic;
1003 		} else {
1004 			/* Counter overflowed since our last sample */
1005 			*daddr += counter_mask - (cfg->core_rawpic - curpic) +
1006 			    1;
1007 		}
1008 		cfg->core_rawpic = *daddr & counter_mask;
1009 
1010 		cfg =
1011 		    (core_pcbe_config_t *)kcpc_next_config(token, cfg, &daddr);
1012 	}
1013 }
1014 
1015 static void
1016 core_pcbe_free(void *config)
1017 {
1018 	kmem_free(config, sizeof (core_pcbe_config_t));
1019 }
1020 
1021 static struct modlpcbe core_modlpcbe = {
1022 	&mod_pcbeops,
1023 	"Core Performance Counters",
1024 	&core_pcbe_ops
1025 };
1026 
1027 static struct modlinkage core_modl = {
1028 	MODREV_1,
1029 	&core_modlpcbe,
1030 };
1031 
1032 int
1033 _init(void)
1034 {
1035 	if (core_pcbe_init() != 0) {
1036 		return (ENOTSUP);
1037 	}
1038 	return (mod_install(&core_modl));
1039 }
1040 
1041 int
1042 _fini(void)
1043 {
1044 	return (mod_remove(&core_modl));
1045 }
1046 
1047 int
1048 _info(struct modinfo *mi)
1049 {
1050 	return (mod_info(&core_modl, mi));
1051 }
1052