xref: /illumos-gate/usr/src/uts/intel/pcbe/core_pcbe.c (revision 2eef1f2b3c0d57d3f401f917b9f38f01456fd554)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Performance Counter Back-End for Intel Family 6 Models 15 and 23
28  */
29 
30 #include <sys/cpuvar.h>
31 #include <sys/param.h>
32 #include <sys/cpc_impl.h>
33 #include <sys/cpc_pcbe.h>
34 #include <sys/modctl.h>
35 #include <sys/inttypes.h>
36 #include <sys/systm.h>
37 #include <sys/cmn_err.h>
38 #include <sys/x86_archext.h>
39 #include <sys/sdt.h>
40 #include <sys/archsystm.h>
41 #include <sys/privregs.h>
42 #include <sys/ddi.h>
43 #include <sys/sunddi.h>
44 #include <sys/cred.h>
45 #include <sys/policy.h>
46 
47 static int core_pcbe_init(void);
48 static uint_t core_pcbe_ncounters(void);
49 static const char *core_pcbe_impl_name(void);
50 static const char *core_pcbe_cpuref(void);
51 static char *core_pcbe_list_events(uint_t picnum);
52 static char *core_pcbe_list_attrs(void);
53 static uint64_t core_pcbe_event_coverage(char *event);
54 static uint64_t core_pcbe_overflow_bitmap(void);
55 static int core_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
56     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
57     void *token);
58 static void core_pcbe_program(void *token);
59 static void core_pcbe_allstop(void);
60 static void core_pcbe_sample(void *token);
61 static void core_pcbe_free(void *config);
62 
63 #define	FALSE	0
64 #define	TRUE	1
65 
66 /* Counter Type */
67 #define	CORE_GPC	0	/* General-Purpose Counter (GPC) */
68 #define	CORE_FFC	1	/* Fixed-Function Counter (FFC) */
69 
70 /* MSR Addresses */
71 #define	GPC_BASE_PMC		0x00c1	/* First GPC */
72 #define	GPC_BASE_PES		0x0186	/* First GPC Event Select register */
73 #define	FFC_BASE_PMC		0x0309	/* First FFC */
74 #define	PERF_FIXED_CTR_CTRL	0x038d	/* Used to enable/disable FFCs */
75 #define	PERF_GLOBAL_STATUS	0x038e	/* Overflow status register */
76 #define	PERF_GLOBAL_CTRL	0x038f	/* Used to enable/disable counting */
77 #define	PERF_GLOBAL_OVF_CTRL	0x0390	/* Used to clear overflow status */
78 
79 /*
80  * Processor Event Select register fields
81  */
82 #define	CORE_USR	(1ULL << 16)	/* Count while not in ring 0 */
83 #define	CORE_OS		(1ULL << 17)	/* Count while in ring 0 */
84 #define	CORE_EDGE	(1ULL << 18)	/* Enable edge detection */
85 #define	CORE_PC		(1ULL << 19)	/* Enable pin control */
86 #define	CORE_INT	(1ULL << 20)	/* Enable interrupt on overflow */
87 #define	CORE_EN		(1ULL << 22)	/* Enable counting */
88 #define	CORE_INV	(1ULL << 23)	/* Invert the CMASK */
89 #define	CORE_ANYTHR	(1ULL << 21)	/* Count event for any thread on core */
90 
91 #define	CORE_UMASK_SHIFT	8
92 #define	CORE_UMASK_MASK		0xffu
93 #define	CORE_CMASK_SHIFT	24
94 #define	CORE_CMASK_MASK		0xffu
95 
96 /*
97  * Fixed-function counter attributes
98  */
99 #define	CORE_FFC_OS_EN	(1ULL << 0)	/* Count while not in ring 0 */
100 #define	CORE_FFC_USR_EN	(1ULL << 1)	/* Count while in ring 1 */
101 #define	CORE_FFC_ANYTHR	(1ULL << 2)	/* Count event for any thread on core */
102 #define	CORE_FFC_PMI	(1ULL << 3)	/* Enable interrupt on overflow */
103 
104 /*
105  * Number of bits for specifying each FFC's attributes in the control register
106  */
107 #define	CORE_FFC_ATTR_SIZE	4
108 
109 /*
110  * CondChgd and OvfBuffer fields of global status and overflow control registers
111  */
112 #define	CONDCHGD	(1ULL << 63)
113 #define	OVFBUFFER	(1ULL << 62)
114 #define	MASK_CONDCHGD_OVFBUFFER	(CONDCHGD | OVFBUFFER)
115 
116 #define	ALL_STOPPED	0ULL
117 
118 #define	BITMASK_XBITS(x)	((1ull << (x)) - 1ull)
119 
120 /*
121  * Only the lower 32-bits can be written to in the general-purpose
122  * counters.  The higher bits are extended from bit 31; all ones if
123  * bit 31 is one and all zeros otherwise.
124  *
125  * The fixed-function counters do not have this restriction.
126  */
127 #define	BITS_EXTENDED_FROM_31	(BITMASK_XBITS(width_gpc) & ~BITMASK_XBITS(31))
128 
129 #define	WRMSR(msr, value)						\
130 	wrmsr((msr), (value));						\
131 	DTRACE_PROBE2(wrmsr, uint64_t, (msr), uint64_t, (value));
132 
133 #define	RDMSR(msr, value)						\
134 	(value) = rdmsr((msr));						\
135 	DTRACE_PROBE2(rdmsr, uint64_t, (msr), uint64_t, (value));
136 
137 typedef struct core_pcbe_config {
138 	uint64_t	core_rawpic;
139 	uint64_t	core_ctl;	/* Event Select bits */
140 	uint64_t	core_pmc;	/* Counter register address */
141 	uint64_t	core_pes;	/* Event Select register address */
142 	uint_t		core_picno;
143 	uint8_t		core_pictype;	/* CORE_GPC or CORE_FFC */
144 } core_pcbe_config_t;
145 
146 pcbe_ops_t core_pcbe_ops = {
147 	PCBE_VER_1,			/* pcbe_ver */
148 	CPC_CAP_OVERFLOW_INTERRUPT | CPC_CAP_OVERFLOW_PRECISE,	/* pcbe_caps */
149 	core_pcbe_ncounters,		/* pcbe_ncounters */
150 	core_pcbe_impl_name,		/* pcbe_impl_name */
151 	core_pcbe_cpuref,		/* pcbe_cpuref */
152 	core_pcbe_list_events,		/* pcbe_list_events */
153 	core_pcbe_list_attrs,		/* pcbe_list_attrs */
154 	core_pcbe_event_coverage,	/* pcbe_event_coverage */
155 	core_pcbe_overflow_bitmap,	/* pcbe_overflow_bitmap */
156 	core_pcbe_configure,		/* pcbe_configure */
157 	core_pcbe_program,		/* pcbe_program */
158 	core_pcbe_allstop,		/* pcbe_allstop */
159 	core_pcbe_sample,		/* pcbe_sample */
160 	core_pcbe_free			/* pcbe_free */
161 };
162 
163 struct nametable_fam6mod15_23 {
164 	const char	*name;
165 	uint64_t	restricted_bits;
166 	uint8_t		event_num;
167 };
168 
169 #define	NT_END	0xFF
170 
171 /*
172  * Counting an event for all cores or all bus agents requires cpc_cpu privileges
173  */
174 #define	ALL_CORES	(1ULL << 15)
175 #define	ALL_AGENTS	(1ULL << 13)
176 
177 /*
178  * The events listed in the following table can be counted on all
179  * general-purpose counters on processors that are of Family 6 Models 15 or 23
180  */
181 static const struct nametable_fam6mod15_23 cmn_gpc_events_f6m15_23[] = {
182 	/* Alphabetical order of event name */
183 
184 	{ "baclears",			0x0,	0xe6 },
185 	{ "bogus_br",			0x0,	0xe4 },
186 	{ "br_bac_missp_exec",		0x0,	0x8a },
187 
188 	{ "br_call_exec",		0x0,	0x92 },
189 	{ "br_call_missp_exec",		0x0,	0x93 },
190 	{ "br_cnd_exec",		0x0,	0x8b },
191 
192 	{ "br_cnd_missp_exec",		0x0,	0x8c },
193 	{ "br_ind_call_exec",		0x0,	0x94 },
194 	{ "br_ind_exec",		0x0,	0x8d },
195 
196 	{ "br_ind_missp_exec",		0x0,	0x8e },
197 	{ "br_inst_decoded",		0x0,	0xe0 },
198 	{ "br_inst_exec",		0x0,	0x88 },
199 
200 	{ "br_inst_retired",		0x0,	0xc4 },
201 	{ "br_inst_retired_mispred",	0x0,	0xc5 },
202 	{ "br_missp_exec",		0x0,	0x89 },
203 
204 	{ "br_ret_bac_missp_exec",	0x0,	0x91 },
205 	{ "br_ret_exec",		0x0,	0x8f },
206 	{ "br_ret_missp_exec",		0x0,	0x90 },
207 
208 	{ "br_tkn_bubble_1",		0x0,	0x97 },
209 	{ "br_tkn_bubble_2",		0x0,	0x98 },
210 	{ "bus_bnr_drv",		ALL_AGENTS,	0x61 },
211 
212 	{ "bus_data_rcv",		ALL_CORES,	0x64 },
213 	{ "bus_drdy_clocks",		ALL_AGENTS,	0x62 },
214 	{ "bus_hit_drv",		ALL_AGENTS,	0x7a },
215 
216 	{ "bus_hitm_drv",		ALL_AGENTS,	0x7b },
217 	{ "bus_io_wait",		ALL_CORES,	0x7f },
218 	{ "bus_lock_clocks",		ALL_CORES | ALL_AGENTS,	0x63 },
219 
220 	{ "bus_request_outstanding",	ALL_CORES | ALL_AGENTS,	0x60 },
221 	{ "bus_trans_any",		ALL_CORES | ALL_AGENTS,	0x70 },
222 	{ "bus_trans_brd",		ALL_CORES | ALL_AGENTS,	0x65 },
223 
224 	{ "bus_trans_burst",		ALL_CORES | ALL_AGENTS,	0x6e },
225 	{ "bus_trans_def",		ALL_CORES | ALL_AGENTS,	0x6d },
226 	{ "bus_trans_ifetch",		ALL_CORES | ALL_AGENTS,	0x68 },
227 
228 	{ "bus_trans_inval",		ALL_CORES | ALL_AGENTS,	0x69 },
229 	{ "bus_trans_io",		ALL_CORES | ALL_AGENTS,	0x6c },
230 	{ "bus_trans_mem",		ALL_CORES | ALL_AGENTS,	0x6f },
231 
232 	{ "bus_trans_p",		ALL_CORES | ALL_AGENTS,	0x6b },
233 	{ "bus_trans_pwr",		ALL_CORES | ALL_AGENTS,	0x6a },
234 	{ "bus_trans_rfo",		ALL_CORES | ALL_AGENTS,	0x66 },
235 
236 	{ "bus_trans_wb",		ALL_CORES | ALL_AGENTS,	0x67 },
237 	{ "busq_empty",			ALL_CORES,	0x7d },
238 	{ "cmp_snoop",			ALL_CORES,	0x78 },
239 
240 	{ "cpu_clk_unhalted",		0x0,	0x3c },
241 	{ "cycles_int",			0x0,	0xc6 },
242 	{ "cycles_l1i_mem_stalled",	0x0,	0x86 },
243 
244 	{ "dtlb_misses",		0x0,	0x08 },
245 	{ "eist_trans",			0x0,	0x3a },
246 	{ "esp",			0x0,	0xab },
247 
248 	{ "ext_snoop",			ALL_AGENTS,	0x77 },
249 	{ "fp_mmx_trans",		0x0,	0xcc },
250 	{ "hw_int_rcv",			0x0,	0xc8 },
251 
252 	{ "ild_stall",			0x0,	0x87 },
253 	{ "inst_queue",			0x0,	0x83 },
254 	{ "inst_retired",		0x0,	0xc0 },
255 
256 	{ "itlb",			0x0,	0x82 },
257 	{ "itlb_miss_retired",		0x0,	0xc9 },
258 	{ "l1d_all_ref",		0x0,	0x43 },
259 
260 	{ "l1d_cache_ld",		0x0,	0x40 },
261 	{ "l1d_cache_lock",		0x0,	0x42 },
262 	{ "l1d_cache_st",		0x0,	0x41 },
263 
264 	{ "l1d_m_evict",		0x0,	0x47 },
265 	{ "l1d_m_repl",			0x0,	0x46 },
266 	{ "l1d_pend_miss",		0x0,	0x48 },
267 
268 	{ "l1d_prefetch",		0x0,	0x4e },
269 	{ "l1d_repl",			0x0,	0x45 },
270 	{ "l1d_split",			0x0,	0x49 },
271 
272 	{ "l1i_misses",			0x0,	0x81 },
273 	{ "l1i_reads",			0x0,	0x80 },
274 	{ "l2_ads",			ALL_CORES,	0x21 },
275 
276 	{ "l2_dbus_busy_rd",		ALL_CORES,	0x23 },
277 	{ "l2_ifetch",			ALL_CORES,	0x28 },
278 	{ "l2_ld",			ALL_CORES,	0x29 },
279 
280 	{ "l2_lines_in",		ALL_CORES,	0x24 },
281 	{ "l2_lines_out",		ALL_CORES,	0x26 },
282 	{ "l2_lock",			ALL_CORES,	0x2b },
283 
284 	{ "l2_m_lines_in",		ALL_CORES,	0x25 },
285 	{ "l2_m_lines_out",		ALL_CORES,	0x27 },
286 	{ "l2_no_req",			ALL_CORES,	0x32 },
287 
288 	{ "l2_reject_busq",		ALL_CORES,	0x30 },
289 	{ "l2_rqsts",			ALL_CORES,	0x2e },
290 	{ "l2_st",			ALL_CORES,	0x2a },
291 
292 	{ "load_block",			0x0,	0x03 },
293 	{ "load_hit_pre",		0x0,	0x4c },
294 	{ "machine_nukes",		0x0,	0xc3 },
295 
296 	{ "macro_insts",		0x0,	0xaa },
297 	{ "memory_disambiguation",	0x0,	0x09 },
298 	{ "misalign_mem_ref",		0x0,	0x05 },
299 	{ "page_walks",			0x0,	0x0c },
300 
301 	{ "pref_rqsts_dn",		0x0,	0xf8 },
302 	{ "pref_rqsts_up",		0x0,	0xf0 },
303 	{ "rat_stalls",			0x0,	0xd2 },
304 
305 	{ "resource_stalls",		0x0,	0xdc },
306 	{ "rs_uops_dispatched",		0x0,	0xa0 },
307 	{ "seg_reg_renames",		0x0,	0xd5 },
308 
309 	{ "seg_rename_stalls",		0x0,	0xd4 },
310 	{ "segment_reg_loads",		0x0,	0x06 },
311 	{ "simd_assist",		0x0,	0xcd },
312 
313 	{ "simd_comp_inst_retired",	0x0,	0xca },
314 	{ "simd_inst_retired",		0x0,	0xc7 },
315 	{ "simd_instr_retired",		0x0,	0xce },
316 
317 	{ "simd_sat_instr_retired",	0x0,	0xcf },
318 	{ "simd_sat_uop_exec",		0x0,	0xb1 },
319 	{ "simd_uop_type_exec",		0x0,	0xb3 },
320 
321 	{ "simd_uops_exec",		0x0,	0xb0 },
322 	{ "snoop_stall_drv",		ALL_CORES | ALL_AGENTS,	0x7e },
323 	{ "sse_pre_exec",		0x0,	0x07 },
324 
325 	{ "sse_pre_miss",		0x0,	0x4b },
326 	{ "store_block",		0x0,	0x04 },
327 	{ "thermal_trip",		0x0,	0x3b },
328 
329 	{ "uops_retired",		0x0,	0xc2 },
330 	{ "x87_ops_retired",		0x0,	0xc1 },
331 	{ "",				0x0,	NT_END }
332 };
333 
334 /*
335  * If any of the pic specific events require privileges, make sure to add a
336  * check in configure_gpc() to find whether an event hard-coded as a number by
337  * the user has any privilege requirements
338  */
339 static const struct nametable_fam6mod15_23 pic0_events[] = {
340 	/* Alphabetical order of event name */
341 
342 	{ "cycles_div_busy",		0x0,	0x14 },
343 	{ "fp_comp_ops_exe",		0x0,	0x10 },
344 	{ "idle_during_div",		0x0,	0x18 },
345 
346 	{ "mem_load_retired",		0x0,	0xcb },
347 	{ "rs_uops_dispatched_port",	0x0,	0xa1 },
348 	{ "",				0x0,	NT_END }
349 };
350 
351 static const struct nametable_fam6mod15_23 pic1_events[] = {
352 	/* Alphabetical order of event name */
353 
354 	{ "delayed_bypass",	0x0,	0x19 },
355 	{ "div",		0x0,	0x13 },
356 	{ "fp_assist",		0x0,	0x11 },
357 
358 	{ "mul",		0x0,	0x12 },
359 	{ "",			0x0,	NT_END }
360 };
361 
362 /* FFC entries must be in order */
363 char *ffc_names_non_htt[] = {
364 	"instr_retired.any",
365 	"cpu_clk_unhalted.core",
366 	"cpu_clk_unhalted.ref",
367 	NULL
368 };
369 
370 char *ffc_names_htt[] = {
371 	"instr_retired.any",
372 	"cpu_clk_unhalted.thread",
373 	"cpu_clk_unhalted.ref",
374 	NULL
375 };
376 
377 char **ffc_names = NULL;
378 
379 static char	**gpc_names;
380 static uint32_t	versionid;
381 static uint64_t	num_gpc;
382 static uint64_t	width_gpc;
383 static uint64_t	mask_gpc;
384 static uint64_t	num_ffc;
385 static uint64_t	width_ffc;
386 static uint64_t	mask_ffc;
387 static uint_t	total_pmc;
388 static uint64_t	control_ffc;
389 static uint64_t	control_gpc;
390 static uint64_t	control_mask;
391 static uint32_t	arch_events_vector;
392 
393 #define	IMPL_NAME_LEN 100
394 static char core_impl_name[IMPL_NAME_LEN];
395 
396 static const char *core_cpuref =
397 	"See Appendix A of the \"Intel 64 and IA-32 Architectures Software" \
398 	" Developer's Manual Volume 3B: System Programming Guide, Part 2\"" \
399 	" Order Number: 253669-026US, Februrary 2008";
400 
401 struct events_table_t {
402 	uint8_t		eventselect;
403 	uint8_t		unitmask;
404 	uint64_t	supported_counters;
405 	const char	*name;
406 };
407 
408 /* Used to describe which counters support an event */
409 #define	C(x) (1 << (x))
410 #define	C0 C(0)
411 #define	C1 C(1)
412 #define	C2 C(2)
413 #define	C3 C(3)
414 #define	C_ALL 0xFFFFFFFFFFFFFFFF
415 
416 /* Architectural events */
417 #define	ARCH_EVENTS_COMMON					\
418 	{ 0xc0, 0x00, C_ALL, "inst_retired.any_p" },		\
419 	{ 0x3c, 0x01, C_ALL, "cpu_clk_unhalted.ref_p" },	\
420 	{ 0x2e, 0x4f, C_ALL, "longest_lat_cache.reference" },	\
421 	{ 0x2e, 0x41, C_ALL, "longest_lat_cache.miss" },	\
422 	{ 0xc4, 0x00, C_ALL, "br_inst_retired.all_branches" },	\
423 	{ 0xc5, 0x00, C_ALL, "br_misp_retired.all_branches" }
424 
425 const struct events_table_t arch_events_table_non_htt[] = {
426 	{ 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.core" },
427 	ARCH_EVENTS_COMMON
428 };
429 
430 const struct events_table_t arch_events_table_htt[] = {
431 	{ 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.thread_p" },
432 	ARCH_EVENTS_COMMON
433 };
434 
435 const struct events_table_t *arch_events_table = NULL;
436 static uint64_t known_arch_events;
437 static uint64_t known_ffc_num;
438 
439 #define	EVENTS_FAM6_MOD26						\
440 									\
441 { 0x80, 0x04, C0|C1|C2|C3, "l1i.cycles_stalled" },			\
442 { 0x80, 0x01, C0|C1|C2|C3, "l1i.hits" },				\
443 { 0x80, 0x02, C0|C1|C2|C3, "l1i.misses" },				\
444 									\
445 { 0x80, 0x03, C0|C1|C2|C3, "l1i.reads" },				\
446 { 0x82, 0x01, C0|C1|C2|C3, "large_itlb.hit" },				\
447 { 0x87, 0x0F, C0|C1|C2|C3, "ild_stall.any" },				\
448 									\
449 { 0x87, 0x04, C0|C1|C2|C3, "ild_stall.iq_full" },			\
450 { 0x87, 0x01, C0|C1|C2|C3, "ild_stall.lcp" },				\
451 { 0x87, 0x02, C0|C1|C2|C3, "ild_stall.mru" },				\
452 									\
453 { 0x87, 0x08, C0|C1|C2|C3, "ild_stall.regen" },				\
454 { 0xE6, 0x02, C0|C1|C2|C3, "baclear.bad_target" },			\
455 { 0xE6, 0x01, C0|C1|C2|C3, "baclear.clear" },				\
456 									\
457 { 0xE8, 0x01, C0|C1|C2|C3, "bpu_clears.early" },			\
458 { 0xE8, 0x02, C0|C1|C2|C3, "bpu_clears.late" },				\
459 { 0xE5, 0x01, C0|C1|C2|C3, "bpu_missed_call_ret" },			\
460 									\
461 { 0xE0, 0x01, C0|C1|C2|C3, "br_inst_decoded" },				\
462 { 0x88, 0x7F, C0|C1|C2|C3, "br_inst_exec.any" },			\
463 { 0x88, 0x01, C0|C1|C2|C3, "br_inst_exec.cond" },			\
464 									\
465 { 0x88, 0x02, C0|C1|C2|C3, "br_inst_exec.direct" },			\
466 { 0x88, 0x10, C0|C1|C2|C3, "br_inst_exec.direct_near_call" },		\
467 { 0x88, 0x20, C0|C1|C2|C3, "br_inst_exec.indirect_near_call" },		\
468 									\
469 { 0x88, 0x04, C0|C1|C2|C3, "br_inst_exec.indirect_non_call" },		\
470 { 0x88, 0x30, C0|C1|C2|C3, "br_inst_exec.near_calls" },			\
471 { 0x88, 0x07, C0|C1|C2|C3, "br_inst_exec.non_calls" },			\
472 									\
473 { 0x88, 0x08, C0|C1|C2|C3, "br_inst_exec.return_near" },		\
474 { 0x88, 0x40, C0|C1|C2|C3, "br_inst_exec.taken" },			\
475 { 0x89, 0x7F, C0|C1|C2|C3, "br_misp_exec.any" },			\
476 									\
477 { 0x89, 0x01, C0|C1|C2|C3, "br_misp_exec.cond" },			\
478 { 0x89, 0x02, C0|C1|C2|C3, "br_misp_exec.direct" },			\
479 { 0x89, 0x10, C0|C1|C2|C3, "br_misp_exec.direct_near_call" },		\
480 									\
481 { 0x89, 0x20, C0|C1|C2|C3, "br_misp_exec.indirect_near_call" },		\
482 { 0x89, 0x04, C0|C1|C2|C3, "br_misp_exec.indirect_non_call" },		\
483 { 0x89, 0x30, C0|C1|C2|C3, "br_misp_exec.near_calls" },			\
484 									\
485 { 0x89, 0x07, C0|C1|C2|C3, "br_misp_exec.non_calls" },			\
486 { 0x89, 0x08, C0|C1|C2|C3, "br_misp_exec.return_near" },		\
487 { 0x89, 0x40, C0|C1|C2|C3, "br_misp_exec.taken" },			\
488 									\
489 { 0x17, 0x01, C0|C1|C2|C3, "inst_queue_writes" },			\
490 { 0x1E, 0x01, C0|C1|C2|C3, "inst_queue_write_cycles" },			\
491 { 0xA7, 0x01, C0|C1|C2|C3, "baclear_force_iq" },			\
492 									\
493 { 0xD0, 0x01, C0|C1|C2|C3, "macro_insts.decoded" },			\
494 { 0xA6, 0x01, C0|C1|C2|C3, "macro_insts.fusions_decoded" },		\
495 { 0x19, 0x01, C0|C1|C2|C3, "two_uop_insts_decoded" },			\
496 									\
497 { 0x18, 0x01, C0|C1|C2|C3, "inst_decoded.dec0" },			\
498 { 0xD1, 0x04, C0|C1|C2|C3, "uops_decoded.esp_folding" },		\
499 { 0xD1, 0x08, C0|C1|C2|C3, "uops_decoded.esp_sync" },			\
500 									\
501 { 0xD1, 0x02, C0|C1|C2|C3, "uops_decoded.ms" },				\
502 { 0x20, 0x01, C0|C1|C2|C3, "lsd_overflow" },				\
503 { 0x0E, 0x01, C0|C1|C2|C3, "uops_issued.any" },				\
504 									\
505 { 0x0E, 0x02, C0|C1|C2|C3, "uops_issued.fused" },			\
506 { 0xA2, 0x20, C0|C1|C2|C3, "resource_stalls.fpcw" },			\
507 { 0xA2, 0x02, C0|C1|C2|C3, "resource_stalls.load" },			\
508 									\
509 { 0xA2, 0x40, C0|C1|C2|C3, "resource_stalls.mxcsr" },			\
510 { 0xA2, 0x04, C0|C1|C2|C3, "resource_stalls.rs_full" },			\
511 { 0xA2, 0x08, C0|C1|C2|C3, "resource_stalls.store" },			\
512 									\
513 { 0xA2, 0x01, C0|C1|C2|C3, "resource_stalls.any" },			\
514 { 0xD2, 0x01, C0|C1|C2|C3, "rat_stalls.flags" },			\
515 { 0xD2, 0x02, C0|C1|C2|C3, "rat_stalls.registers" },			\
516 									\
517 { 0xD2, 0x04, C0|C1|C2|C3, "rat_stalls.rob_read_port" },		\
518 { 0xD2, 0x08, C0|C1|C2|C3, "rat_stalls.scoreboard" },			\
519 { 0xD2, 0x0F, C0|C1|C2|C3, "rat_stalls.any" },				\
520 									\
521 { 0xD4, 0x01, C0|C1|C2|C3, "seg_rename_stalls" },			\
522 { 0xD5, 0x01, C0|C1|C2|C3, "es_reg_renames" },				\
523 { 0x10, 0x02, C0|C1|C2|C3, "fp_comp_ops_exe.mmx" },			\
524 									\
525 { 0x10, 0x80, C0|C1|C2|C3, "fp_comp_ops_exe.sse_double_precision" },	\
526 { 0x10, 0x04, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp" },			\
527 { 0x10, 0x10, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp_packed" },		\
528 									\
529 { 0x10, 0x20, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp_scalar" },		\
530 { 0x10, 0x40, C0|C1|C2|C3, "fp_comp_ops_exe.sse_single_precision" },	\
531 { 0x10, 0x08, C0|C1|C2|C3, "fp_comp_ops_exe.sse2_integer" },		\
532 									\
533 { 0x10, 0x01, C0|C1|C2|C3, "fp_comp_ops_exe.x87" },			\
534 { 0x14, 0x01, C0|C1|C2|C3, "arith.cycles_div_busy" },			\
535 { 0x14, 0x02, C0|C1|C2|C3, "arith.mul" },				\
536 									\
537 { 0x12, 0x04, C0|C1|C2|C3, "simd_int_128.pack" },			\
538 { 0x12, 0x20, C0|C1|C2|C3, "simd_int_128.packed_arith" },		\
539 { 0x12, 0x10, C0|C1|C2|C3, "simd_int_128.packed_logical" },		\
540 									\
541 { 0x12, 0x01, C0|C1|C2|C3, "simd_int_128.packed_mpy" },			\
542 { 0x12, 0x02, C0|C1|C2|C3, "simd_int_128.packed_shift" },		\
543 { 0x12, 0x40, C0|C1|C2|C3, "simd_int_128.shuffle_move" },		\
544 									\
545 { 0x12, 0x08, C0|C1|C2|C3, "simd_int_128.unpack" },			\
546 { 0xFD, 0x04, C0|C1|C2|C3, "simd_int_64.pack" },			\
547 { 0xFD, 0x20, C0|C1|C2|C3, "simd_int_64.packed_arith" },		\
548 									\
549 { 0xFD, 0x10, C0|C1|C2|C3, "simd_int_64.packed_logical" },		\
550 { 0xFD, 0x01, C0|C1|C2|C3, "simd_int_64.packed_mpy" },			\
551 { 0xFD, 0x02, C0|C1|C2|C3, "simd_int_64.packed_shift" },		\
552 									\
553 { 0xFD, 0x40, C0|C1|C2|C3, "simd_int_64.shuffle_move" },		\
554 { 0xFD, 0x08, C0|C1|C2|C3, "simd_int_64.unpack" },			\
555 { 0xB1, 0x01, C0|C1|C2|C3, "uops_executed.port0" },			\
556 									\
557 { 0xB1, 0x02, C0|C1|C2|C3, "uops_executed.port1" },			\
558 { 0x40, 0x04, C0|C1, "l1d_cache_ld.e_state" },				\
559 { 0x40, 0x01, C0|C1, "l1d_cache_ld.i_state" },				\
560 									\
561 { 0x40, 0x08, C0|C1, "l1d_cache_ld.m_state" },				\
562 { 0x40, 0x0F, C0|C1, "l1d_cache_ld.mesi" },				\
563 { 0x40, 0x02, C0|C1, "l1d_cache_ld.s_state" },				\
564 									\
565 { 0x41, 0x04, C0|C1, "l1d_cache_st.e_state" },				\
566 { 0x41, 0x08, C0|C1, "l1d_cache_st.m_state" },				\
567 { 0x41, 0x0F, C0|C1, "l1d_cache_st.mesi" },				\
568 									\
569 { 0x41, 0x02, C0|C1, "l1d_cache_st.s_state" },				\
570 { 0x42, 0x04, C0|C1, "l1d_cache_lock.e_state" },			\
571 { 0x42, 0x01, C0|C1, "l1d_cache_lock.hit" },				\
572 									\
573 { 0x42, 0x08, C0|C1, "l1d_cache_lock.m_state" },			\
574 { 0x42, 0x02, C0|C1, "l1d_cache_lock.s_state" },			\
575 { 0x43, 0x01, C0|C1, "l1d_all_ref.any" },				\
576 									\
577 { 0x43, 0x02, C0|C1, "l1d_all_ref.cacheable" },				\
578 { 0x4B, 0x01, C0|C1, "mmx2_mem_exec.nta" },				\
579 { 0x4C, 0x01, C0|C1, "load_hit_pre" },					\
580 									\
581 { 0x4E, 0x02, C0|C1, "l1d_prefetch.miss" },				\
582 { 0x4E, 0x01, C0|C1, "l1d_prefetch.requests" },				\
583 { 0x4E, 0x04, C0|C1, "l1d_prefetch.triggers" },				\
584 									\
585 { 0x51, 0x04, C0|C1, "l1d.m_evict" },					\
586 { 0x51, 0x02, C0|C1, "l1d.m_repl" },					\
587 { 0x51, 0x08, C0|C1, "l1d.m_snoop_evict" },				\
588 									\
589 { 0x51, 0x01, C0|C1, "l1d.repl" },					\
590 { 0x52, 0x01, C0|C1, "l1d_cache_prefetch_lock_fb_hit" },		\
591 { 0x53, 0x01, C0|C1, "l1d_cache_lock_fb_hit" },				\
592 									\
593 { 0x63, 0x02, C0|C1, "cache_lock_cycles.l1d" },				\
594 { 0x63, 0x01, C0|C1, "cache_lock_cycles.l1d_l2" },			\
595 { 0x06, 0x04, C0|C1|C2|C3, "store_blocks.at_ret" },			\
596 									\
597 { 0x06, 0x08, C0|C1|C2|C3, "store_blocks.l1d_block" },			\
598 { 0x06, 0x01, C0|C1|C2|C3, "store_blocks.not_sta" },			\
599 { 0x06, 0x02, C0|C1|C2|C3, "store_blocks.sta" },			\
600 									\
601 { 0x13, 0x07, C0|C1|C2|C3, "load_dispatch.any" },			\
602 { 0x13, 0x04, C0|C1|C2|C3, "load_dispatch.mob" },			\
603 { 0x13, 0x01, C0|C1|C2|C3, "load_dispatch.rs" },			\
604 									\
605 { 0x13, 0x02, C0|C1|C2|C3, "load_dispatch.rs_delayed" },		\
606 { 0x08, 0x01, C0|C1|C2|C3, "dtlb_load_misses.any" },			\
607 { 0x08, 0x20, C0|C1|C2|C3, "dtlb_load_misses.pde_miss" },		\
608 									\
609 { 0x08, 0x02, C0|C1|C2|C3, "dtlb_load_misses.walk_completed" },		\
610 { 0x49, 0x01, C0|C1|C2|C3, "dtlb_misses.any" },				\
611 { 0x49, 0x10, C0|C1|C2|C3, "dtlb_misses.stlb_hit" },			\
612 									\
613 { 0x49, 0x02, C0|C1|C2|C3, "dtlb_misses.walk_completed" },		\
614 { 0x4F, 0x02, C0|C1|C2|C3, "ept.epde_miss" },				\
615 { 0x4F, 0x08, C0|C1|C2|C3, "ept.epdpe_miss" },				\
616 									\
617 { 0x85, 0x01, C0|C1|C2|C3, "itlb_misses.any" },				\
618 { 0x85, 0x02, C0|C1|C2|C3, "itlb_misses.walk_completed" },		\
619 { 0x24, 0xAA, C0|C1|C2|C3, "l2_rqsts.miss" },				\
620 									\
621 { 0x24, 0xFF, C0|C1|C2|C3, "l2_rqsts.references" },			\
622 { 0x24, 0x10, C0|C1|C2|C3, "l2_rqsts.ifetch_hit" },			\
623 { 0x24, 0x20, C0|C1|C2|C3, "l2_rqsts.ifetch_miss" },			\
624 									\
625 { 0x24, 0x30, C0|C1|C2|C3, "l2_rqsts.ifetches" },			\
626 { 0x24, 0x01, C0|C1|C2|C3, "l2_rqsts.ld_hit" },				\
627 { 0x24, 0x02, C0|C1|C2|C3, "l2_rqsts.ld_miss" },			\
628 									\
629 { 0x24, 0x03, C0|C1|C2|C3, "l2_rqsts.loads" },				\
630 { 0x24, 0x40, C0|C1|C2|C3, "l2_rqsts.prefetch_hit" },			\
631 { 0x24, 0x80, C0|C1|C2|C3, "l2_rqsts.prefetch_miss" },			\
632 									\
633 { 0x24, 0xC0, C0|C1|C2|C3, "l2_rqsts.prefetches" },			\
634 { 0x24, 0x04, C0|C1|C2|C3, "l2_rqsts.rfo_hit" },			\
635 { 0x24, 0x08, C0|C1|C2|C3, "l2_rqsts.rfo_miss" },			\
636 									\
637 { 0x24, 0x0C, C0|C1|C2|C3, "l2_rqsts.rfos" },				\
638 { 0x26, 0xFF, C0|C1|C2|C3, "l2_data_rqsts.any" },			\
639 { 0x26, 0x04, C0|C1|C2|C3, "l2_data_rqsts.demand.e_state" },		\
640 									\
641 { 0x26, 0x01, C0|C1|C2|C3, "l2_data_rqsts.demand.i_state" },		\
642 { 0x26, 0x08, C0|C1|C2|C3, "l2_data_rqsts.demand.m_state" },		\
643 { 0x26, 0x0F, C0|C1|C2|C3, "l2_data_rqsts.demand.mesi" },		\
644 									\
645 { 0x26, 0x02, C0|C1|C2|C3, "l2_data_rqsts.demand.s_state" },		\
646 { 0x26, 0x40, C0|C1|C2|C3, "l2_data_rqsts.prefetch.e_state" },		\
647 { 0x26, 0x10, C0|C1|C2|C3, "l2_data_rqsts.prefetch.i_state" },		\
648 									\
649 { 0x26, 0x80, C0|C1|C2|C3, "l2_data_rqsts.prefetch.m_state" },		\
650 { 0x26, 0xF0, C0|C1|C2|C3, "l2_data_rqsts.prefetch.mesi" },		\
651 { 0x26, 0x20, C0|C1|C2|C3, "l2_data_rqsts.prefetch.s_state" },		\
652 									\
653 { 0x27, 0x40, C0|C1|C2|C3, "l2_write.lock.e_state" },			\
654 { 0x27, 0x10, C0|C1|C2|C3, "l2_write.lock.i_state" },			\
655 { 0x27, 0x20, C0|C1|C2|C3, "l2_write.lock.s_state" },			\
656 									\
657 { 0x27, 0x0E, C0|C1|C2|C3, "l2_write.rfo.hit" },			\
658 { 0x27, 0x01, C0|C1|C2|C3, "l2_write.rfo.i_state" },			\
659 { 0x27, 0x08, C0|C1|C2|C3, "l2_write.rfo.m_state" },			\
660 									\
661 { 0x27, 0x0F, C0|C1|C2|C3, "l2_write.rfo.mesi" },			\
662 { 0x27, 0x02, C0|C1|C2|C3, "l2_write.rfo.s_state" },			\
663 { 0x28, 0x04, C0|C1|C2|C3, "l1d_wb_l2.e_state" },			\
664 									\
665 { 0x28, 0x01, C0|C1|C2|C3, "l1d_wb_l2.i_state" },			\
666 { 0x28, 0x08, C0|C1|C2|C3, "l1d_wb_l2.m_state" },			\
667 { 0xF0, 0x80, C0|C1|C2|C3, "l2_transactions.any" },			\
668 									\
669 { 0xF0, 0x20, C0|C1|C2|C3, "l2_transactions.fill" },			\
670 { 0xF0, 0x04, C0|C1|C2|C3, "l2_transactions.ifetch" },			\
671 { 0xF0, 0x10, C0|C1|C2|C3, "l2_transactions.l1d_wb" },			\
672 									\
673 { 0xF0, 0x01, C0|C1|C2|C3, "l2_transactions.load" },			\
674 { 0xF0, 0x08, C0|C1|C2|C3, "l2_transactions.prefetch" },		\
675 { 0xF0, 0x02, C0|C1|C2|C3, "l2_transactions.rfo" },			\
676 									\
677 { 0xF0, 0x40, C0|C1|C2|C3, "l2_transactions.wb" },			\
678 { 0xF1, 0x07, C0|C1|C2|C3, "l2_lines_in.any" },				\
679 { 0xF1, 0x04, C0|C1|C2|C3, "l2_lines_in.e_state" },			\
680 									\
681 { 0xF1, 0x02, C0|C1|C2|C3, "l2_lines_in.s_state" },			\
682 { 0xF2, 0x0F, C0|C1|C2|C3, "l2_lines_out.any" },			\
683 { 0xF2, 0x01, C0|C1|C2|C3, "l2_lines_out.demand_clean" },		\
684 									\
685 { 0xF2, 0x02, C0|C1|C2|C3, "l2_lines_out.demand_dirty" },		\
686 { 0xF2, 0x04, C0|C1|C2|C3, "l2_lines_out.prefetch_clean" },		\
687 { 0x6C, 0x01, C0|C1|C2|C3, "io_transactions" },				\
688 									\
689 { 0xB0, 0x80, C0|C1|C2|C3, "offcore_requests.any" },			\
690 { 0xB0, 0x10, C0|C1|C2|C3, "offcore_requests.any.rfo" },		\
691 { 0xB0, 0x40, C0|C1|C2|C3, "offcore_requests.l1d_writeback" },		\
692 									\
693 { 0xB8, 0x01, C0|C1|C2|C3, "snoop_response.hit" },			\
694 { 0xB8, 0x02, C0|C1|C2|C3, "snoop_response.hite" },			\
695 { 0xB8, 0x04, C0|C1|C2|C3, "snoop_response.hitm" },			\
696 									\
697 { 0xF4, 0x10, C0|C1|C2|C3, "sq_misc.split_lock" },			\
698 { 0x0B, 0x01, C0|C1|C2|C3, "mem_inst_retired.loads" },			\
699 { 0x0B, 0x02, C0|C1|C2|C3, "mem_inst_retired.stores" },			\
700 									\
701 { 0xC0, 0x04, C0|C1|C2|C3, "inst_retired.mmx" },			\
702 { 0xC0, 0x02, C0|C1|C2|C3, "inst_retired.x87" },			\
703 { 0xC7, 0x04, C0|C1|C2|C3, "ssex_uops_retired.packed_double" },		\
704 									\
705 { 0xC7, 0x01, C0|C1|C2|C3, "ssex_uops_retired.packed_single" },		\
706 { 0xC7, 0x08, C0|C1|C2|C3, "ssex_uops_retired.scalar_double" },		\
707 { 0xC7, 0x02, C0|C1|C2|C3, "ssex_uops_retired.scalar_single" },		\
708 									\
709 { 0xC7, 0x10, C0|C1|C2|C3, "ssex_uops_retired.vector_integer" },	\
710 { 0xC2, 0x01, C0|C1|C2|C3, "uops_retired.any" },			\
711 { 0xC2, 0x04, C0|C1|C2|C3, "uops_retired.macro_fused" },		\
712 									\
713 { 0xC8, 0x20, C0|C1|C2|C3, "itlb_miss_retired" },			\
714 { 0xCB, 0x80, C0|C1|C2|C3, "mem_load_retired.dtlb_miss" },		\
715 { 0xCB, 0x40, C0|C1|C2|C3, "mem_load_retired.hit_lfb" },		\
716 									\
717 { 0xCB, 0x01, C0|C1|C2|C3, "mem_load_retired.l1d_hit" },		\
718 { 0xCB, 0x02, C0|C1|C2|C3, "mem_load_retired.l2_hit" },			\
719 { 0xCB, 0x10, C0|C1|C2|C3, "mem_load_retired.llc_miss" },		\
720 									\
721 { 0xCB, 0x04, C0|C1|C2|C3, "mem_load_retired.llc_unshared_hit" },	\
722 { 0xCB, 0x08, C0|C1|C2|C3, "mem_load_retired.other_core_l2_hit_hitm" },	\
723 { 0x0F, 0x02, C0|C1|C2|C3, "mem_uncore_retired.other_core_l2_hitm" },	\
724 									\
725 { 0x0F, 0x08, C0|C1|C2|C3, "mem_uncore_retired.remote_cache_local_home_hit" },\
726 { 0x0F, 0x10, C0|C1|C2|C3, "mem_uncore_retired.remote_dram" },		\
727 { 0x0F, 0x20, C0|C1|C2|C3, "mem_uncore_retired.local_dram" },		\
728 									\
729 { 0x0C, 0x01, C0|C1|C2|C3, "mem_store_retired.dtlb_miss" },		\
730 { 0xC4, 0x01, C0|C1|C2|C3, "br_inst_retired.conditional" },		\
731 { 0xC4, 0x02, C0|C1|C2|C3, "br_inst_retired.near_call" },		\
732 									\
733 { 0xC5, 0x02, C0|C1|C2|C3, "br_misp_retired.near_call" },		\
734 { 0xDB, 0x01, C0|C1|C2|C3, "uop_unfusion" },				\
735 { 0xF7, 0x01, C0|C1|C2|C3, "fp_assist.all" },				\
736 									\
737 { 0xF7, 0x04, C0|C1|C2|C3, "fp_assist.input" },				\
738 { 0xF7, 0x02, C0|C1|C2|C3, "fp_assist.output" },			\
739 { 0xCC, 0x03, C0|C1|C2|C3, "fp_mmx_trans.any" },			\
740 									\
741 { 0xCC, 0x01, C0|C1|C2|C3, "fp_mmx_trans.to_fp" },			\
742 { 0xCC, 0x02, C0|C1|C2|C3, "fp_mmx_trans.to_mmx" },			\
743 { 0xC3, 0x04, C0|C1|C2|C3, "machine_clears.smc" }
744 
745 
746 #define	EVENTS_FAM6_MOD37						\
747 { 0xB0, 0x08, C0|C1|C2|C3, "offcore_requests.any.read" },		\
748 { 0xB0, 0x01, C0|C1|C2|C3, "offcore_requests.demand.read_data" },	\
749 { 0xB0, 0x04, C0|C1|C2|C3, "offcore_requests.demand.rfo" }
750 
751 static const struct events_table_t *events_table = NULL;
752 
753 const struct events_table_t events_fam6_mod26[] = {
754 	EVENTS_FAM6_MOD26,
755 	{ NT_END, 0, 0, "" }
756 };
757 
758 const struct events_table_t events_fam6_mod37[] = {
759 	EVENTS_FAM6_MOD26,
760 	EVENTS_FAM6_MOD37,
761 	{ NT_END, 0, 0, "" }
762 };
763 
764 /*
765  * Initialize string containing list of supported general-purpose counter
766  * events for processors of Family 6 Models 15 and 23
767  */
768 static void
769 pcbe_init_fam6_model15_23()
770 {
771 	const struct nametable_fam6mod15_23	*n;
772 	const struct nametable_fam6mod15_23	*picspecific_events;
773 	size_t			common_size;
774 	size_t			size;
775 	uint64_t		i;
776 
777 	gpc_names = kmem_alloc(num_gpc * sizeof (char *), KM_SLEEP);
778 
779 	/* Calculate space needed to save all the common event names */
780 	common_size = 0;
781 	for (n = cmn_gpc_events_f6m15_23; n->event_num != NT_END; n++) {
782 		common_size += strlen(n->name) + 1;
783 	}
784 
785 	for (i = 0; i < num_gpc; i++) {
786 		size = 0;
787 		switch (i) {
788 			case 0:
789 				picspecific_events = pic0_events;
790 				break;
791 			case 1:
792 				picspecific_events = pic1_events;
793 				break;
794 			default:
795 				picspecific_events = NULL;
796 				break;
797 		}
798 		if (picspecific_events != NULL) {
799 			for (n = picspecific_events;
800 			    n->event_num != NT_END;
801 			    n++) {
802 				size += strlen(n->name) + 1;
803 			}
804 		}
805 
806 		gpc_names[i] =
807 		    kmem_alloc(size + common_size + 1, KM_SLEEP);
808 
809 		gpc_names[i][0] = '\0';
810 		if (picspecific_events != NULL) {
811 			for (n = picspecific_events;
812 			    n->event_num != NT_END;
813 			    n++) {
814 				(void) strcat(gpc_names[i], n->name);
815 				(void) strcat(gpc_names[i], ",");
816 			}
817 		}
818 		for (n = cmn_gpc_events_f6m15_23; n->event_num != NT_END;
819 		    n++) {
820 			(void) strcat(gpc_names[i], n->name);
821 			(void) strcat(gpc_names[i], ",");
822 		}
823 		/*
824 		 * Remove trailing comma.
825 		 */
826 		gpc_names[i][common_size + size - 1] = '\0';
827 	}
828 }
829 
830 static int
831 core_pcbe_init(void)
832 {
833 	struct cpuid_regs	cp;
834 	size_t			size;
835 	uint64_t		i;
836 	uint64_t		j;
837 	uint64_t		arch_events_vector_length;
838 	size_t			arch_events_string_length;
839 
840 	if (cpuid_getvendor(CPU) != X86_VENDOR_Intel)
841 		return (-1);
842 
843 	/* Obtain Basic CPUID information */
844 	cp.cp_eax = 0x0;
845 	(void) __cpuid_insn(&cp);
846 
847 	/* No Architectural Performance Monitoring Leaf returned by CPUID */
848 	if (cp.cp_eax < 0xa) {
849 		return (-1);
850 	}
851 
852 	/* Obtain the Architectural Performance Monitoring Leaf */
853 	cp.cp_eax = 0xa;
854 	(void) __cpuid_insn(&cp);
855 
856 	versionid = cp.cp_eax & 0xFF;
857 
858 	/*
859 	 * All Family 6 Model 15 and Model 23 processors have fixed-function
860 	 * counters.  These counters were made Architectural with
861 	 * Family 6 Model 15 Stepping 9.
862 	 */
863 	switch (versionid) {
864 
865 		case 0:
866 			return (-1);
867 
868 		case 2:
869 			num_ffc = cp.cp_edx & 0x1F;
870 			width_ffc = (cp.cp_edx >> 5) & 0xFF;
871 
872 			/*
873 			 * Some processors have an errata (AW34) where
874 			 * versionid is reported as 2 when actually 1.
875 			 * In this case, fixed-function counters are
876 			 * model-specific as in Version 1.
877 			 */
878 			if (num_ffc != 0) {
879 				break;
880 			}
881 			/* FALLTHROUGH */
882 		case 1:
883 			num_ffc = 3;
884 			width_ffc = 40;
885 			versionid = 1;
886 			break;
887 
888 		default:
889 			num_ffc = cp.cp_edx & 0x1F;
890 			width_ffc = (cp.cp_edx >> 5) & 0xFF;
891 			break;
892 	}
893 
894 
895 	if (num_ffc >= 64)
896 		return (-1);
897 
898 	/* Set HTT-specific names of architectural & FFC events */
899 	if (x86_feature & X86_HTT) {
900 		ffc_names = ffc_names_htt;
901 		arch_events_table = arch_events_table_htt;
902 		known_arch_events =
903 		    sizeof (arch_events_table_htt) /
904 		    sizeof (struct events_table_t);
905 		known_ffc_num =
906 		    sizeof (ffc_names_htt) / sizeof (char *);
907 	} else {
908 		ffc_names = ffc_names_non_htt;
909 		arch_events_table = arch_events_table_non_htt;
910 		known_arch_events =
911 		    sizeof (arch_events_table_non_htt) /
912 		    sizeof (struct events_table_t);
913 		known_ffc_num =
914 		    sizeof (ffc_names_non_htt) / sizeof (char *);
915 	}
916 
917 	if (num_ffc >= known_ffc_num) {
918 		/*
919 		 * The system seems to have more fixed-function counters than
920 		 * what this PCBE is able to handle correctly.  Default to the
921 		 * maximum number of fixed-function counters that this driver
922 		 * is aware of.
923 		 */
924 		num_ffc = known_ffc_num - 1;
925 	}
926 
927 	mask_ffc = BITMASK_XBITS(width_ffc);
928 
929 	num_gpc = (cp.cp_eax >> 8) & 0xFF;
930 	width_gpc = (cp.cp_eax >> 16) & 0xFF;
931 
932 	if (num_gpc >= 64)
933 		return (-1);
934 
935 	mask_gpc = BITMASK_XBITS(width_gpc);
936 
937 	total_pmc = num_gpc + num_ffc;
938 
939 	control_gpc = BITMASK_XBITS(num_gpc);
940 	control_ffc = BITMASK_XBITS(num_ffc);
941 
942 	control_mask = (control_ffc << 32) | control_gpc;
943 
944 	if (total_pmc > 64) {
945 		/* Too wide for the overflow bitmap */
946 		return (-1);
947 	}
948 
949 	if ((cpuid_getfamily(CPU) == 6) &&
950 	    ((cpuid_getmodel(CPU) == 15) || (cpuid_getmodel(CPU) == 23))) {
951 		(void) snprintf(core_impl_name, IMPL_NAME_LEN,
952 		    "Core Microarchitecture");
953 		pcbe_init_fam6_model15_23();
954 		return (0);
955 	}
956 
957 	(void) snprintf(core_impl_name, IMPL_NAME_LEN,
958 	    "Intel Arch PerfMon v%d on Family %d Model %d",
959 	    versionid, cpuid_getfamily(CPU), cpuid_getmodel(CPU));
960 
961 	arch_events_vector_length = (cp.cp_eax >> 24) & 0xFF;
962 
963 	ASSERT(known_arch_events == arch_events_vector_length);
964 
965 	/*
966 	 * To handle the case where a new performance monitoring setup is run
967 	 * on a non-debug kernel
968 	 */
969 	if (known_arch_events > arch_events_vector_length) {
970 		known_arch_events = arch_events_vector_length;
971 	} else {
972 		arch_events_vector_length = known_arch_events;
973 	}
974 
975 	arch_events_vector = cp.cp_ebx &
976 	    BITMASK_XBITS(arch_events_vector_length);
977 
978 	/* General-purpose Counters (GPC) */
979 	gpc_names = NULL;
980 
981 	if (num_gpc > 0) {
982 
983 		gpc_names = kmem_alloc(num_gpc * sizeof (char *), KM_SLEEP);
984 
985 		/* Calculate space required for the architectural gpc events */
986 		arch_events_string_length = 0;
987 		for (i = 0; i < known_arch_events; i++) {
988 			if (((1U << i) & arch_events_vector) == 0) {
989 				arch_events_string_length +=
990 				    strlen(arch_events_table[i].name) + 1;
991 			}
992 		}
993 
994 		if (cpuid_getmodel(CPU) == 26) {
995 			events_table = events_fam6_mod26;
996 		} else if (cpuid_getmodel(CPU) == 37) {
997 			events_table = events_fam6_mod37;
998 		}
999 
1000 		for (i = 0; i < num_gpc; i++) {
1001 
1002 			/* Determine length of supported event names */
1003 			size = arch_events_string_length;
1004 			for (j = 0; events_table != NULL &&
1005 			    events_table[j].eventselect != NT_END;
1006 			    j++) {
1007 				if (C(i) & events_table[j].supported_counters) {
1008 					size += strlen(events_table[j].name) +
1009 					    1;
1010 				}
1011 			}
1012 
1013 			/* Allocate memory for this pics list */
1014 			gpc_names[i] = kmem_alloc(size + 1, KM_SLEEP);
1015 			gpc_names[i][0] = '\0';
1016 			if (size == 0) {
1017 				continue;
1018 			}
1019 
1020 			/* Create the list */
1021 			for (j = 0; j < known_arch_events; j++) {
1022 				if (((1U << j) & arch_events_vector) == 0) {
1023 					(void) strcat(gpc_names[i],
1024 					    arch_events_table[j].name);
1025 					(void) strcat(gpc_names[i], ",");
1026 				}
1027 			}
1028 
1029 			for (j = 0; events_table != NULL &&
1030 			    events_table[j].eventselect != NT_END;
1031 			    j++) {
1032 				if (C(i) & events_table[j].supported_counters) {
1033 					(void) strcat(gpc_names[i],
1034 					    events_table[j].name);
1035 					(void) strcat(gpc_names[i], ",");
1036 				}
1037 			}
1038 			/*
1039 			 * Remove trailing comma.
1040 			 */
1041 			gpc_names[i][size - 1] = '\0';
1042 		}
1043 	}
1044 	/*
1045 	 * Fixed-function Counters (FFC) are already listed individually in
1046 	 * ffc_names[]
1047 	 */
1048 	return (0);
1049 }
1050 
1051 static uint_t core_pcbe_ncounters()
1052 {
1053 	return (total_pmc);
1054 }
1055 
1056 static const char *core_pcbe_impl_name(void)
1057 {
1058 	return (core_impl_name);
1059 }
1060 
1061 static const char *core_pcbe_cpuref(void)
1062 {
1063 	return (core_cpuref);
1064 }
1065 
1066 static char *core_pcbe_list_events(uint_t picnum)
1067 {
1068 	ASSERT(picnum < cpc_ncounters);
1069 
1070 	if (picnum < num_gpc) {
1071 		return (gpc_names[picnum]);
1072 	} else {
1073 		return (ffc_names[picnum - num_gpc]);
1074 	}
1075 }
1076 
1077 static char *core_pcbe_list_attrs(void)
1078 {
1079 	if (versionid >= 3) {
1080 		return ("edge,inv,umask,cmask,anythr");
1081 	} else {
1082 		return ("edge,pc,inv,umask,cmask");
1083 	}
1084 }
1085 
1086 static const struct nametable_fam6mod15_23 *
1087 find_gpcevent_f6m15_23(char *name,
1088     const struct nametable_fam6mod15_23 *nametable)
1089 {
1090 	const struct nametable_fam6mod15_23 *n;
1091 	int compare_result;
1092 
1093 	compare_result = -1;
1094 	for (n = nametable; n->event_num != NT_END; n++) {
1095 		compare_result = strcmp(name, n->name);
1096 		if (compare_result <= 0) {
1097 			break;
1098 		}
1099 	}
1100 
1101 	if (compare_result == 0) {
1102 		return (n);
1103 	}
1104 
1105 	return (NULL);
1106 }
1107 
1108 static const struct events_table_t *
1109 find_gpcevent(char *name)
1110 {
1111 	int i;
1112 
1113 	for (i = 0; i < known_arch_events; i++) {
1114 		if (strcmp(name, arch_events_table[i].name) == 0) {
1115 			if (((1U << i) & arch_events_vector) == 0) {
1116 				return (&arch_events_table[i]);
1117 			}
1118 		}
1119 	}
1120 
1121 	if (events_table == NULL) {
1122 		return (NULL);
1123 	}
1124 
1125 	for (i = 0; events_table[i].eventselect != NT_END; i++) {
1126 		if (strcmp(name, events_table[i].name) == 0) {
1127 			return (&events_table[i]);
1128 		}
1129 	}
1130 
1131 	return (NULL);
1132 }
1133 static uint64_t
1134 core_pcbe_event_coverage(char *event)
1135 {
1136 	uint64_t bitmap;
1137 	uint64_t bitmask;
1138 	const struct events_table_t *n;
1139 	int i;
1140 
1141 	bitmap = 0;
1142 
1143 	/* Is it an event that a GPC can track? */
1144 	if (versionid >= 3) {
1145 		n = find_gpcevent(event);
1146 		if (n != NULL) {
1147 			bitmap |= (n->supported_counters &
1148 			    BITMASK_XBITS(num_gpc));
1149 		}
1150 	} else {
1151 		if (find_gpcevent_f6m15_23(event, cmn_gpc_events_f6m15_23) !=
1152 		    NULL) {
1153 			bitmap |= BITMASK_XBITS(num_gpc);
1154 		} else if (find_gpcevent_f6m15_23(event, pic0_events) != NULL) {
1155 			bitmap |= 1ULL;
1156 		} else if (find_gpcevent_f6m15_23(event, pic1_events) != NULL) {
1157 			bitmap |= 1ULL << 1;
1158 		}
1159 	}
1160 
1161 	/* Check if the event can be counted in the fixed-function counters */
1162 	if (num_ffc > 0) {
1163 		bitmask = 1ULL << num_gpc;
1164 		for (i = 0; i < num_ffc; i++) {
1165 			if (strcmp(event, ffc_names[i]) == 0) {
1166 				bitmap |= bitmask;
1167 			}
1168 			bitmask = bitmask << 1;
1169 		}
1170 	}
1171 
1172 	return (bitmap);
1173 }
1174 
1175 static uint64_t
1176 core_pcbe_overflow_bitmap(void)
1177 {
1178 	uint64_t interrupt_status;
1179 	uint64_t intrbits_ffc;
1180 	uint64_t intrbits_gpc;
1181 	extern int kcpc_hw_overflow_intr_installed;
1182 	uint64_t overflow_bitmap;
1183 
1184 	RDMSR(PERF_GLOBAL_STATUS, interrupt_status);
1185 	WRMSR(PERF_GLOBAL_OVF_CTRL, interrupt_status);
1186 
1187 	interrupt_status = interrupt_status & control_mask;
1188 	intrbits_ffc = (interrupt_status >> 32) & control_ffc;
1189 	intrbits_gpc = interrupt_status & control_gpc;
1190 	overflow_bitmap = (intrbits_ffc << num_gpc) | intrbits_gpc;
1191 
1192 	ASSERT(kcpc_hw_overflow_intr_installed);
1193 	(*kcpc_hw_enable_cpc_intr)();
1194 
1195 	return (overflow_bitmap);
1196 }
1197 
1198 static int
1199 check_cpc_securitypolicy(core_pcbe_config_t *conf,
1200     const struct nametable_fam6mod15_23 *n)
1201 {
1202 	if (conf->core_ctl & n->restricted_bits) {
1203 		if (secpolicy_cpc_cpu(crgetcred()) != 0) {
1204 			return (CPC_ATTR_REQUIRES_PRIVILEGE);
1205 		}
1206 	}
1207 	return (0);
1208 }
1209 
1210 static int
1211 configure_gpc(uint_t picnum, char *event, uint64_t preset, uint32_t flags,
1212     uint_t nattrs, kcpc_attr_t *attrs, void **data)
1213 {
1214 	core_pcbe_config_t	conf;
1215 	const struct nametable_fam6mod15_23	*n;
1216 	const struct nametable_fam6mod15_23	*m;
1217 	const struct nametable_fam6mod15_23	*picspecific_events;
1218 	struct nametable_fam6mod15_23	nt_raw = { "", 0x0, 0x0 };
1219 	uint_t			i;
1220 	long			event_num;
1221 	const struct events_table_t *eventcode;
1222 	int			umask_known;
1223 
1224 	if (((preset & BITS_EXTENDED_FROM_31) != 0) &&
1225 	    ((preset & BITS_EXTENDED_FROM_31) !=
1226 	    BITS_EXTENDED_FROM_31)) {
1227 
1228 		/*
1229 		 * Bits beyond bit-31 in the general-purpose counters can only
1230 		 * be written to by extension of bit 31.  We cannot preset
1231 		 * these bits to any value other than all 1s or all 0s.
1232 		 */
1233 		return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1234 	}
1235 
1236 	if (versionid >= 3) {
1237 		eventcode = find_gpcevent(event);
1238 		if (eventcode != NULL) {
1239 			if ((C(picnum) & eventcode->supported_counters) == 0) {
1240 				return (CPC_PIC_NOT_CAPABLE);
1241 			}
1242 			conf.core_ctl = eventcode->eventselect;
1243 			conf.core_ctl |= eventcode->unitmask <<
1244 			    CORE_UMASK_SHIFT;
1245 			umask_known = 1;
1246 		} else {
1247 			/* Event specified as raw event code */
1248 			if (ddi_strtol(event, NULL, 0, &event_num) != 0) {
1249 				return (CPC_INVALID_EVENT);
1250 			}
1251 			conf.core_ctl = event_num & 0xFF;
1252 			umask_known = 0;
1253 		}
1254 	} else {
1255 		umask_known = 0;
1256 		n = find_gpcevent_f6m15_23(event, cmn_gpc_events_f6m15_23);
1257 		if (n == NULL) {
1258 			switch (picnum) {
1259 				case 0:
1260 					picspecific_events = pic0_events;
1261 					break;
1262 				case 1:
1263 					picspecific_events = pic1_events;
1264 					break;
1265 				default:
1266 					picspecific_events = NULL;
1267 					break;
1268 			}
1269 			if (picspecific_events != NULL) {
1270 				n = find_gpcevent_f6m15_23(event,
1271 				    picspecific_events);
1272 			}
1273 		}
1274 		if (n == NULL) {
1275 			/*
1276 			 * Check if this is a case where the event was
1277 			 * specified directly by its event number instead of
1278 			 * its name string.
1279 			 */
1280 			if (ddi_strtol(event, NULL, 0, &event_num) != 0) {
1281 				return (CPC_INVALID_EVENT);
1282 			}
1283 
1284 			event_num = event_num & 0xFF;
1285 
1286 			/*
1287 			 * Search the event table to find out if the event
1288 			 * specified has an privilege requirements.  Currently
1289 			 * none of the pic-specific counters have any privilege
1290 			 * requirements.  Hence only the table
1291 			 * cmn_gpc_events_f6m15_23 is searched.
1292 			 */
1293 			for (m = cmn_gpc_events_f6m15_23;
1294 			    m->event_num != NT_END;
1295 			    m++) {
1296 				if (event_num == m->event_num) {
1297 					break;
1298 				}
1299 			}
1300 			if (m->event_num == NT_END) {
1301 				nt_raw.event_num = (uint8_t)event_num;
1302 				n = &nt_raw;
1303 			} else {
1304 				n = m;
1305 			}
1306 		}
1307 		conf.core_ctl = n->event_num; /* Event Select */
1308 	}
1309 
1310 
1311 	conf.core_picno = picnum;
1312 	conf.core_pictype = CORE_GPC;
1313 	conf.core_rawpic = preset & mask_gpc;
1314 
1315 	conf.core_pes = GPC_BASE_PES + picnum;
1316 	conf.core_pmc = GPC_BASE_PMC + picnum;
1317 
1318 	for (i = 0; i < nattrs; i++) {
1319 		if (strncmp(attrs[i].ka_name, "umask", 6) == 0) {
1320 			if (umask_known == 1) {
1321 				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1322 			}
1323 			if ((attrs[i].ka_val | CORE_UMASK_MASK) !=
1324 			    CORE_UMASK_MASK) {
1325 				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1326 			}
1327 			conf.core_ctl |= attrs[i].ka_val <<
1328 			    CORE_UMASK_SHIFT;
1329 		} else  if (strncmp(attrs[i].ka_name, "edge", 6) == 0) {
1330 			if (attrs[i].ka_val != 0)
1331 				conf.core_ctl |= CORE_EDGE;
1332 		} else if (strncmp(attrs[i].ka_name, "inv", 4) == 0) {
1333 			if (attrs[i].ka_val != 0)
1334 				conf.core_ctl |= CORE_INV;
1335 		} else if (strncmp(attrs[i].ka_name, "cmask", 6) == 0) {
1336 			if ((attrs[i].ka_val | CORE_CMASK_MASK) !=
1337 			    CORE_CMASK_MASK) {
1338 				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1339 			}
1340 			conf.core_ctl |= attrs[i].ka_val <<
1341 			    CORE_CMASK_SHIFT;
1342 		} else if (strncmp(attrs[i].ka_name, "anythr", 7) ==
1343 		    0) {
1344 			if (versionid < 3)
1345 				return (CPC_INVALID_ATTRIBUTE);
1346 			if (secpolicy_cpc_cpu(crgetcred()) != 0) {
1347 				return (CPC_ATTR_REQUIRES_PRIVILEGE);
1348 			}
1349 			if (attrs[i].ka_val != 0)
1350 				conf.core_ctl |= CORE_ANYTHR;
1351 		} else {
1352 			return (CPC_INVALID_ATTRIBUTE);
1353 		}
1354 	}
1355 
1356 	if (flags & CPC_COUNT_USER)
1357 		conf.core_ctl |= CORE_USR;
1358 	if (flags & CPC_COUNT_SYSTEM)
1359 		conf.core_ctl |= CORE_OS;
1360 	if (flags & CPC_OVF_NOTIFY_EMT)
1361 		conf.core_ctl |= CORE_INT;
1362 	conf.core_ctl |= CORE_EN;
1363 
1364 	if (versionid < 3) {
1365 		if (check_cpc_securitypolicy(&conf, n) != 0) {
1366 			return (CPC_ATTR_REQUIRES_PRIVILEGE);
1367 		}
1368 	}
1369 
1370 	*data = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP);
1371 	*((core_pcbe_config_t *)*data) = conf;
1372 
1373 	return (0);
1374 }
1375 
1376 static int
1377 configure_ffc(uint_t picnum, char *event, uint64_t preset, uint32_t flags,
1378     uint_t nattrs, kcpc_attr_t *attrs, void **data)
1379 {
1380 	core_pcbe_config_t	*conf;
1381 	uint_t			i;
1382 
1383 	if (picnum - num_gpc >= num_ffc) {
1384 		return (CPC_INVALID_PICNUM);
1385 	}
1386 
1387 	if (strcmp(ffc_names[picnum-num_gpc], event) != 0) {
1388 		return (CPC_INVALID_EVENT);
1389 	}
1390 
1391 	if ((versionid < 3) && (nattrs != 0)) {
1392 		return (CPC_INVALID_ATTRIBUTE);
1393 	}
1394 
1395 	conf = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP);
1396 	conf->core_ctl = 0;
1397 
1398 	for (i = 0; i < nattrs; i++) {
1399 		if (strncmp(attrs[i].ka_name, "anythr", 7) == 0) {
1400 			if (secpolicy_cpc_cpu(crgetcred()) != 0) {
1401 				return (CPC_ATTR_REQUIRES_PRIVILEGE);
1402 			}
1403 			if (attrs[i].ka_val != 0) {
1404 				conf->core_ctl |= CORE_FFC_ANYTHR;
1405 			}
1406 		} else {
1407 			kmem_free(conf, sizeof (core_pcbe_config_t));
1408 			return (CPC_INVALID_ATTRIBUTE);
1409 		}
1410 	}
1411 
1412 	conf->core_picno = picnum;
1413 	conf->core_pictype = CORE_FFC;
1414 	conf->core_rawpic = preset & mask_ffc;
1415 	conf->core_pmc = FFC_BASE_PMC + (picnum - num_gpc);
1416 
1417 	/* All fixed-function counters have the same control register */
1418 	conf->core_pes = PERF_FIXED_CTR_CTRL;
1419 
1420 	if (flags & CPC_COUNT_USER)
1421 		conf->core_ctl |= CORE_FFC_USR_EN;
1422 	if (flags & CPC_COUNT_SYSTEM)
1423 		conf->core_ctl |= CORE_FFC_OS_EN;
1424 	if (flags & CPC_OVF_NOTIFY_EMT)
1425 		conf->core_ctl |= CORE_FFC_PMI;
1426 
1427 	*data = conf;
1428 	return (0);
1429 }
1430 
1431 /*ARGSUSED*/
1432 static int
1433 core_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
1434     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
1435     void *token)
1436 {
1437 	int			ret;
1438 	core_pcbe_config_t	*conf;
1439 
1440 	/*
1441 	 * If we've been handed an existing configuration, we need only preset
1442 	 * the counter value.
1443 	 */
1444 	if (*data != NULL) {
1445 		conf = *data;
1446 		ASSERT(conf->core_pictype == CORE_GPC ||
1447 		    conf->core_pictype == CORE_FFC);
1448 		if (conf->core_pictype == CORE_GPC)
1449 			conf->core_rawpic = preset & mask_gpc;
1450 		else /* CORE_FFC */
1451 			conf->core_rawpic = preset & mask_ffc;
1452 		return (0);
1453 	}
1454 
1455 	if (picnum >= total_pmc) {
1456 		return (CPC_INVALID_PICNUM);
1457 	}
1458 
1459 	if (picnum < num_gpc) {
1460 		ret = configure_gpc(picnum, event, preset, flags,
1461 		    nattrs, attrs, data);
1462 	} else {
1463 		ret = configure_ffc(picnum, event, preset, flags,
1464 		    nattrs, attrs, data);
1465 	}
1466 	return (ret);
1467 }
1468 
1469 static void
1470 core_pcbe_program(void *token)
1471 {
1472 	core_pcbe_config_t	*cfg;
1473 	uint64_t		perf_global_ctrl;
1474 	uint64_t		perf_fixed_ctr_ctrl;
1475 	uint64_t		curcr4;
1476 
1477 	core_pcbe_allstop();
1478 
1479 	curcr4 = getcr4();
1480 	if (kcpc_allow_nonpriv(token))
1481 		/* Allow RDPMC at any ring level */
1482 		setcr4(curcr4 | CR4_PCE);
1483 	else
1484 		/* Allow RDPMC only at ring 0 */
1485 		setcr4(curcr4 & ~CR4_PCE);
1486 
1487 	/* Clear any overflow indicators before programming the counters */
1488 	WRMSR(PERF_GLOBAL_OVF_CTRL, MASK_CONDCHGD_OVFBUFFER | control_mask);
1489 
1490 	cfg = NULL;
1491 	perf_global_ctrl = 0;
1492 	perf_fixed_ctr_ctrl = 0;
1493 	cfg = (core_pcbe_config_t *)kcpc_next_config(token, cfg, NULL);
1494 	while (cfg != NULL) {
1495 		ASSERT(cfg->core_pictype == CORE_GPC ||
1496 		    cfg->core_pictype == CORE_FFC);
1497 
1498 		if (cfg->core_pictype == CORE_GPC) {
1499 			/*
1500 			 * General-purpose counter registers have write
1501 			 * restrictions where only the lower 32-bits can be
1502 			 * written to.  The rest of the relevant bits are
1503 			 * written to by extension from bit 31 (all ZEROS if
1504 			 * bit-31 is ZERO and all ONE if bit-31 is ONE).  This
1505 			 * makes it possible to write to the counter register
1506 			 * only values that have all ONEs or all ZEROs in the
1507 			 * higher bits.
1508 			 */
1509 			if (((cfg->core_rawpic & BITS_EXTENDED_FROM_31) == 0) ||
1510 			    ((cfg->core_rawpic & BITS_EXTENDED_FROM_31) ==
1511 			    BITS_EXTENDED_FROM_31)) {
1512 				/*
1513 				 * Straighforward case where the higher bits
1514 				 * are all ZEROs or all ONEs.
1515 				 */
1516 				WRMSR(cfg->core_pmc,
1517 				    (cfg->core_rawpic & mask_gpc));
1518 			} else {
1519 				/*
1520 				 * The high order bits are not all the same.
1521 				 * We save what is currently in the registers
1522 				 * and do not write to it.  When we want to do
1523 				 * a read from this register later (in
1524 				 * core_pcbe_sample()), we subtract the value
1525 				 * we save here to get the actual event count.
1526 				 *
1527 				 * NOTE: As a result, we will not get overflow
1528 				 * interrupts as expected.
1529 				 */
1530 				RDMSR(cfg->core_pmc, cfg->core_rawpic);
1531 				cfg->core_rawpic = cfg->core_rawpic & mask_gpc;
1532 			}
1533 			WRMSR(cfg->core_pes, cfg->core_ctl);
1534 			perf_global_ctrl |= 1ull << cfg->core_picno;
1535 		} else {
1536 			/*
1537 			 * Unlike the general-purpose counters, all relevant
1538 			 * bits of fixed-function counters can be written to.
1539 			 */
1540 			WRMSR(cfg->core_pmc, cfg->core_rawpic & mask_ffc);
1541 
1542 			/*
1543 			 * Collect the control bits for all the
1544 			 * fixed-function counters and write it at one shot
1545 			 * later in this function
1546 			 */
1547 			perf_fixed_ctr_ctrl |= cfg->core_ctl <<
1548 			    ((cfg->core_picno - num_gpc) * CORE_FFC_ATTR_SIZE);
1549 			perf_global_ctrl |=
1550 			    1ull << (cfg->core_picno - num_gpc + 32);
1551 		}
1552 
1553 		cfg = (core_pcbe_config_t *)
1554 		    kcpc_next_config(token, cfg, NULL);
1555 	}
1556 
1557 	/* Enable all the counters */
1558 	WRMSR(PERF_FIXED_CTR_CTRL, perf_fixed_ctr_ctrl);
1559 	WRMSR(PERF_GLOBAL_CTRL, perf_global_ctrl);
1560 }
1561 
1562 static void
1563 core_pcbe_allstop(void)
1564 {
1565 	/* Disable all the counters together */
1566 	WRMSR(PERF_GLOBAL_CTRL, ALL_STOPPED);
1567 
1568 	setcr4(getcr4() & ~CR4_PCE);
1569 }
1570 
1571 static void
1572 core_pcbe_sample(void *token)
1573 {
1574 	uint64_t		*daddr;
1575 	uint64_t		curpic;
1576 	core_pcbe_config_t	*cfg;
1577 	uint64_t			counter_mask;
1578 
1579 	cfg = (core_pcbe_config_t *)kcpc_next_config(token, NULL, &daddr);
1580 	while (cfg != NULL) {
1581 		ASSERT(cfg->core_pictype == CORE_GPC ||
1582 		    cfg->core_pictype == CORE_FFC);
1583 
1584 		curpic = rdmsr(cfg->core_pmc);
1585 
1586 		DTRACE_PROBE4(core__pcbe__sample,
1587 		    uint64_t, cfg->core_pmc,
1588 		    uint64_t, curpic,
1589 		    uint64_t, cfg->core_rawpic,
1590 		    uint64_t, *daddr);
1591 
1592 		if (cfg->core_pictype == CORE_GPC) {
1593 			counter_mask = mask_gpc;
1594 		} else {
1595 			counter_mask = mask_ffc;
1596 		}
1597 		curpic = curpic & counter_mask;
1598 		if (curpic >= cfg->core_rawpic) {
1599 			*daddr += curpic - cfg->core_rawpic;
1600 		} else {
1601 			/* Counter overflowed since our last sample */
1602 			*daddr += counter_mask - (cfg->core_rawpic - curpic) +
1603 			    1;
1604 		}
1605 		cfg->core_rawpic = *daddr & counter_mask;
1606 
1607 		cfg =
1608 		    (core_pcbe_config_t *)kcpc_next_config(token, cfg, &daddr);
1609 	}
1610 }
1611 
1612 static void
1613 core_pcbe_free(void *config)
1614 {
1615 	kmem_free(config, sizeof (core_pcbe_config_t));
1616 }
1617 
1618 static struct modlpcbe core_modlpcbe = {
1619 	&mod_pcbeops,
1620 	"Core Performance Counters",
1621 	&core_pcbe_ops
1622 };
1623 
1624 static struct modlinkage core_modl = {
1625 	MODREV_1,
1626 	&core_modlpcbe,
1627 };
1628 
1629 int
1630 _init(void)
1631 {
1632 	if (core_pcbe_init() != 0) {
1633 		return (ENOTSUP);
1634 	}
1635 	return (mod_install(&core_modl));
1636 }
1637 
1638 int
1639 _fini(void)
1640 {
1641 	return (mod_remove(&core_modl));
1642 }
1643 
1644 int
1645 _info(struct modinfo *mi)
1646 {
1647 	return (mod_info(&core_modl, mi));
1648 }
1649