1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2019 Joyent, Inc.
24 */
25
26 /*
27 * This file contains preset event names from the Performance Application
28 * Programming Interface v3.5 which included the following notice:
29 *
30 * Copyright (c) 2005,6
31 * Innovative Computing Labs
32 * Computer Science Department,
33 * University of Tennessee,
34 * Knoxville, TN.
35 * All Rights Reserved.
36 *
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions are met:
40 *
41 * * Redistributions of source code must retain the above copyright notice,
42 * this list of conditions and the following disclaimer.
43 * * Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * * Neither the name of the University of Tennessee nor the names of its
47 * contributors may be used to endorse or promote products derived from
48 * this software without specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
51 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
54 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
55 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
56 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
57 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
58 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
59 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
60 * POSSIBILITY OF SUCH DAMAGE.
61 *
62 *
63 * This open source software license conforms to the BSD License template.
64 */
65
66
67 /*
68 * Performance Counter Back-End for Intel processors supporting Architectural
69 * Performance Monitoring.
70 */
71
72 #include <sys/cpuvar.h>
73 #include <sys/param.h>
74 #include <sys/cpc_impl.h>
75 #include <sys/cpc_pcbe.h>
76 #include <sys/modctl.h>
77 #include <sys/inttypes.h>
78 #include <sys/systm.h>
79 #include <sys/cmn_err.h>
80 #include <sys/x86_archext.h>
81 #include <sys/sdt.h>
82 #include <sys/archsystm.h>
83 #include <sys/privregs.h>
84 #include <sys/ddi.h>
85 #include <sys/sunddi.h>
86 #include <sys/cred.h>
87 #include <sys/policy.h>
88
89 #include "core_pcbe_table.h"
90 #include <core_pcbe_cpcgen.h>
91
92 static int core_pcbe_init(void);
93 static uint_t core_pcbe_ncounters(void);
94 static const char *core_pcbe_impl_name(void);
95 static const char *core_pcbe_cpuref(void);
96 static char *core_pcbe_list_events(uint_t picnum);
97 static char *core_pcbe_list_attrs(void);
98 static uint64_t core_pcbe_event_coverage(char *event);
99 static uint64_t core_pcbe_overflow_bitmap(void);
100 static int core_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
101 uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
102 void *token);
103 static void core_pcbe_program(void *token);
104 static void core_pcbe_allstop(void);
105 static void core_pcbe_sample(void *token);
106 static void core_pcbe_free(void *config);
107
108 #define FALSE 0
109 #define TRUE 1
110
111 /* Counter Type */
112 #define CORE_GPC 0 /* General-Purpose Counter (GPC) */
113 #define CORE_FFC 1 /* Fixed-Function Counter (FFC) */
114
115 /* MSR Addresses */
116 #define GPC_BASE_PMC 0x00c1 /* First GPC */
117 #define GPC_BASE_PES 0x0186 /* First GPC Event Select register */
118 #define FFC_BASE_PMC 0x0309 /* First FFC */
119 #define PERF_FIXED_CTR_CTRL 0x038d /* Used to enable/disable FFCs */
120 #define PERF_GLOBAL_STATUS 0x038e /* Overflow status register */
121 #define PERF_GLOBAL_CTRL 0x038f /* Used to enable/disable counting */
122 #define PERF_GLOBAL_OVF_CTRL 0x0390 /* Used to clear overflow status */
123
124 /*
125 * Processor Event Select register fields
126 */
127 #define CORE_USR (1ULL << 16) /* Count while not in ring 0 */
128 #define CORE_OS (1ULL << 17) /* Count while in ring 0 */
129 #define CORE_EDGE (1ULL << 18) /* Enable edge detection */
130 #define CORE_PC (1ULL << 19) /* Enable pin control */
131 #define CORE_INT (1ULL << 20) /* Enable interrupt on overflow */
132 #define CORE_EN (1ULL << 22) /* Enable counting */
133 #define CORE_INV (1ULL << 23) /* Invert the CMASK */
134 #define CORE_ANYTHR (1ULL << 21) /* Count event for any thread on core */
135
136 #define CORE_UMASK_SHIFT 8
137 #define CORE_UMASK_MASK 0xffu
138 #define CORE_CMASK_SHIFT 24
139 #define CORE_CMASK_MASK 0xffu
140
141 /*
142 * Fixed-function counter attributes
143 */
144 #define CORE_FFC_OS_EN (1ULL << 0) /* Count while not in ring 0 */
145 #define CORE_FFC_USR_EN (1ULL << 1) /* Count while in ring 1 */
146 #define CORE_FFC_ANYTHR (1ULL << 2) /* Count event for any thread on core */
147 #define CORE_FFC_PMI (1ULL << 3) /* Enable interrupt on overflow */
148
149 /*
150 * Number of bits for specifying each FFC's attributes in the control register
151 */
152 #define CORE_FFC_ATTR_SIZE 4
153
154 /*
155 * CondChgd and OvfBuffer fields of global status and overflow control registers
156 */
157 #define CONDCHGD (1ULL << 63)
158 #define OVFBUFFER (1ULL << 62)
159 #define MASK_CONDCHGD_OVFBUFFER (CONDCHGD | OVFBUFFER)
160
161 #define ALL_STOPPED 0ULL
162
163 #define BITMASK_XBITS(x) ((1ull << (x)) - 1ull)
164
165 /*
166 * Only the lower 32-bits can be written to in the general-purpose
167 * counters. The higher bits are extended from bit 31; all ones if
168 * bit 31 is one and all zeros otherwise.
169 *
170 * The fixed-function counters do not have this restriction.
171 */
172 #define BITS_EXTENDED_FROM_31 (BITMASK_XBITS(width_gpc) & ~BITMASK_XBITS(31))
173
174 #define WRMSR(msr, value) \
175 wrmsr((msr), (value)); \
176 DTRACE_PROBE2(wrmsr, uint64_t, (msr), uint64_t, (value));
177
178 #define RDMSR(msr, value) \
179 (value) = rdmsr((msr)); \
180 DTRACE_PROBE2(rdmsr, uint64_t, (msr), uint64_t, (value));
181
182 typedef struct core_pcbe_config {
183 uint64_t core_rawpic;
184 uint64_t core_ctl; /* Event Select bits */
185 uint64_t core_pmc; /* Counter register address */
186 uint64_t core_pes; /* Event Select register address */
187 uint_t core_picno;
188 uint8_t core_pictype; /* CORE_GPC or CORE_FFC */
189 } core_pcbe_config_t;
190
191 pcbe_ops_t core_pcbe_ops = {
192 PCBE_VER_1, /* pcbe_ver */
193 CPC_CAP_OVERFLOW_INTERRUPT | CPC_CAP_OVERFLOW_PRECISE, /* pcbe_caps */
194 core_pcbe_ncounters, /* pcbe_ncounters */
195 core_pcbe_impl_name, /* pcbe_impl_name */
196 core_pcbe_cpuref, /* pcbe_cpuref */
197 core_pcbe_list_events, /* pcbe_list_events */
198 core_pcbe_list_attrs, /* pcbe_list_attrs */
199 core_pcbe_event_coverage, /* pcbe_event_coverage */
200 core_pcbe_overflow_bitmap, /* pcbe_overflow_bitmap */
201 core_pcbe_configure, /* pcbe_configure */
202 core_pcbe_program, /* pcbe_program */
203 core_pcbe_allstop, /* pcbe_allstop */
204 core_pcbe_sample, /* pcbe_sample */
205 core_pcbe_free /* pcbe_free */
206 };
207
208 struct nametable_core_uarch {
209 const char *name;
210 uint64_t restricted_bits;
211 uint8_t event_num;
212 };
213
214 /*
215 * Counting an event for all cores or all bus agents requires cpc_cpu privileges
216 */
217 #define ALL_CORES (1ULL << 15)
218 #define ALL_AGENTS (1ULL << 13)
219
220 struct generic_events {
221 const char *name;
222 uint8_t event_num;
223 uint8_t umask;
224 };
225
226 static const struct generic_events cmn_generic_events[] = {
227 { "PAPI_tot_cyc", 0x3c, 0x00 }, /* cpu_clk_unhalted.thread_p/core */
228 { "PAPI_tot_ins", 0xc0, 0x00 }, /* inst_retired.any_p */
229 { "PAPI_br_ins", 0xc4, 0x0c }, /* br_inst_retired.taken */
230 { "PAPI_br_msp", 0xc5, 0x00 }, /* br_inst_retired.mispred */
231 { "PAPI_br_ntk", 0xc4, 0x03 },
232 /* br_inst_retired.pred_not_taken|pred_taken */
233 { "PAPI_br_prc", 0xc4, 0x05 },
234 /* br_inst_retired.pred_not_taken|pred_taken */
235 { "PAPI_hw_int", 0xc8, 0x00 }, /* hw_int_rvc */
236 { "PAPI_tot_iis", 0xaa, 0x01 }, /* macro_insts.decoded */
237 { "PAPI_l1_dca", 0x43, 0x01 }, /* l1d_all_ref */
238 { "PAPI_l1_icm", 0x81, 0x00 }, /* l1i_misses */
239 { "PAPI_l1_icr", 0x80, 0x00 }, /* l1i_reads */
240 { "PAPI_l1_tcw", 0x41, 0x0f }, /* l1d_cache_st.mesi */
241 { "PAPI_l2_stm", 0x2a, 0x41 }, /* l2_st.self.i_state */
242 { "PAPI_l2_tca", 0x2e, 0x4f }, /* l2_rqsts.self.demand.mesi */
243 { "PAPI_l2_tch", 0x2e, 0x4e }, /* l2_rqsts.mes */
244 { "PAPI_l2_tcm", 0x2e, 0x41 }, /* l2_rqsts.self.demand.i_state */
245 { "PAPI_l2_tcw", 0x2a, 0x4f }, /* l2_st.self.mesi */
246 { "PAPI_ld_ins", 0xc0, 0x01 }, /* inst_retired.loads */
247 { "PAPI_lst_ins", 0xc0, 0x03 }, /* inst_retired.loads|stores */
248 { "PAPI_sr_ins", 0xc0, 0x02 }, /* inst_retired.stores */
249 { "PAPI_tlb_dm", 0x08, 0x01 }, /* dtlb_misses.any */
250 { "PAPI_tlb_im", 0x82, 0x12 }, /* itlb.small_miss|large_miss */
251 { "PAPI_tlb_tl", 0x0c, 0x03 }, /* page_walks */
252 { "", NT_END, 0 }
253 };
254
255 static const struct generic_events generic_events_pic0[] = {
256 { "PAPI_l1_dcm", 0xcb, 0x01 }, /* mem_load_retired.l1d_miss */
257 { "", NT_END, 0 }
258 };
259
260 /*
261 * The events listed in the following table can be counted on all
262 * general-purpose counters on processors that are of Penryn and Merom Family
263 */
264 static const struct nametable_core_uarch cmn_gpc_events_core_uarch[] = {
265 /* Alphabetical order of event name */
266
267 { "baclears", 0x0, 0xe6 },
268 { "bogus_br", 0x0, 0xe4 },
269 { "br_bac_missp_exec", 0x0, 0x8a },
270
271 { "br_call_exec", 0x0, 0x92 },
272 { "br_call_missp_exec", 0x0, 0x93 },
273 { "br_cnd_exec", 0x0, 0x8b },
274
275 { "br_cnd_missp_exec", 0x0, 0x8c },
276 { "br_ind_call_exec", 0x0, 0x94 },
277 { "br_ind_exec", 0x0, 0x8d },
278
279 { "br_ind_missp_exec", 0x0, 0x8e },
280 { "br_inst_decoded", 0x0, 0xe0 },
281 { "br_inst_exec", 0x0, 0x88 },
282
283 { "br_inst_retired", 0x0, 0xc4 },
284 { "br_inst_retired_mispred", 0x0, 0xc5 },
285 { "br_missp_exec", 0x0, 0x89 },
286
287 { "br_ret_bac_missp_exec", 0x0, 0x91 },
288 { "br_ret_exec", 0x0, 0x8f },
289 { "br_ret_missp_exec", 0x0, 0x90 },
290
291 { "br_tkn_bubble_1", 0x0, 0x97 },
292 { "br_tkn_bubble_2", 0x0, 0x98 },
293 { "bus_bnr_drv", ALL_AGENTS, 0x61 },
294
295 { "bus_data_rcv", ALL_CORES, 0x64 },
296 { "bus_drdy_clocks", ALL_AGENTS, 0x62 },
297 { "bus_hit_drv", ALL_AGENTS, 0x7a },
298
299 { "bus_hitm_drv", ALL_AGENTS, 0x7b },
300 { "bus_io_wait", ALL_CORES, 0x7f },
301 { "bus_lock_clocks", ALL_CORES | ALL_AGENTS, 0x63 },
302
303 { "bus_request_outstanding", ALL_CORES | ALL_AGENTS, 0x60 },
304 { "bus_trans_any", ALL_CORES | ALL_AGENTS, 0x70 },
305 { "bus_trans_brd", ALL_CORES | ALL_AGENTS, 0x65 },
306
307 { "bus_trans_burst", ALL_CORES | ALL_AGENTS, 0x6e },
308 { "bus_trans_def", ALL_CORES | ALL_AGENTS, 0x6d },
309 { "bus_trans_ifetch", ALL_CORES | ALL_AGENTS, 0x68 },
310
311 { "bus_trans_inval", ALL_CORES | ALL_AGENTS, 0x69 },
312 { "bus_trans_io", ALL_CORES | ALL_AGENTS, 0x6c },
313 { "bus_trans_mem", ALL_CORES | ALL_AGENTS, 0x6f },
314
315 { "bus_trans_p", ALL_CORES | ALL_AGENTS, 0x6b },
316 { "bus_trans_pwr", ALL_CORES | ALL_AGENTS, 0x6a },
317 { "bus_trans_rfo", ALL_CORES | ALL_AGENTS, 0x66 },
318
319 { "bus_trans_wb", ALL_CORES | ALL_AGENTS, 0x67 },
320 { "busq_empty", ALL_CORES, 0x7d },
321 { "cmp_snoop", ALL_CORES, 0x78 },
322
323 { "cpu_clk_unhalted", 0x0, 0x3c },
324 { "cycles_int", 0x0, 0xc6 },
325 { "cycles_l1i_mem_stalled", 0x0, 0x86 },
326
327 { "dtlb_misses", 0x0, 0x08 },
328 { "eist_trans", 0x0, 0x3a },
329 { "esp", 0x0, 0xab },
330
331 { "ext_snoop", ALL_AGENTS, 0x77 },
332 { "fp_mmx_trans", 0x0, 0xcc },
333 { "hw_int_rcv", 0x0, 0xc8 },
334
335 { "ild_stall", 0x0, 0x87 },
336 { "inst_queue", 0x0, 0x83 },
337 { "inst_retired", 0x0, 0xc0 },
338
339 { "itlb", 0x0, 0x82 },
340 { "itlb_miss_retired", 0x0, 0xc9 },
341 { "l1d_all_ref", 0x0, 0x43 },
342
343 { "l1d_cache_ld", 0x0, 0x40 },
344 { "l1d_cache_lock", 0x0, 0x42 },
345 { "l1d_cache_st", 0x0, 0x41 },
346
347 { "l1d_m_evict", 0x0, 0x47 },
348 { "l1d_m_repl", 0x0, 0x46 },
349 { "l1d_pend_miss", 0x0, 0x48 },
350
351 { "l1d_prefetch", 0x0, 0x4e },
352 { "l1d_repl", 0x0, 0x45 },
353 { "l1d_split", 0x0, 0x49 },
354
355 { "l1i_misses", 0x0, 0x81 },
356 { "l1i_reads", 0x0, 0x80 },
357 { "l2_ads", ALL_CORES, 0x21 },
358
359 { "l2_dbus_busy_rd", ALL_CORES, 0x23 },
360 { "l2_ifetch", ALL_CORES, 0x28 },
361 { "l2_ld", ALL_CORES, 0x29 },
362
363 { "l2_lines_in", ALL_CORES, 0x24 },
364 { "l2_lines_out", ALL_CORES, 0x26 },
365 { "l2_lock", ALL_CORES, 0x2b },
366
367 { "l2_m_lines_in", ALL_CORES, 0x25 },
368 { "l2_m_lines_out", ALL_CORES, 0x27 },
369 { "l2_no_req", ALL_CORES, 0x32 },
370
371 { "l2_reject_busq", ALL_CORES, 0x30 },
372 { "l2_rqsts", ALL_CORES, 0x2e },
373 { "l2_st", ALL_CORES, 0x2a },
374
375 { "load_block", 0x0, 0x03 },
376 { "load_hit_pre", 0x0, 0x4c },
377 { "machine_nukes", 0x0, 0xc3 },
378
379 { "macro_insts", 0x0, 0xaa },
380 { "memory_disambiguation", 0x0, 0x09 },
381 { "misalign_mem_ref", 0x0, 0x05 },
382 { "page_walks", 0x0, 0x0c },
383
384 { "pref_rqsts_dn", 0x0, 0xf8 },
385 { "pref_rqsts_up", 0x0, 0xf0 },
386 { "rat_stalls", 0x0, 0xd2 },
387
388 { "resource_stalls", 0x0, 0xdc },
389 { "rs_uops_dispatched", 0x0, 0xa0 },
390 { "seg_reg_renames", 0x0, 0xd5 },
391
392 { "seg_rename_stalls", 0x0, 0xd4 },
393 { "segment_reg_loads", 0x0, 0x06 },
394 { "simd_assist", 0x0, 0xcd },
395
396 { "simd_comp_inst_retired", 0x0, 0xca },
397 { "simd_inst_retired", 0x0, 0xc7 },
398 { "simd_instr_retired", 0x0, 0xce },
399
400 { "simd_sat_instr_retired", 0x0, 0xcf },
401 { "simd_sat_uop_exec", 0x0, 0xb1 },
402 { "simd_uop_type_exec", 0x0, 0xb3 },
403
404 { "simd_uops_exec", 0x0, 0xb0 },
405 { "snoop_stall_drv", ALL_CORES | ALL_AGENTS, 0x7e },
406 { "sse_pre_exec", 0x0, 0x07 },
407
408 { "sse_pre_miss", 0x0, 0x4b },
409 { "store_block", 0x0, 0x04 },
410 { "thermal_trip", 0x0, 0x3b },
411
412 { "uops_retired", 0x0, 0xc2 },
413 { "x87_ops_retired", 0x0, 0xc1 },
414 { "", 0x0, NT_END }
415 };
416
417 /*
418 * If any of the pic specific events require privileges, make sure to add a
419 * check in configure_gpc() to find whether an event hard-coded as a number by
420 * the user has any privilege requirements
421 */
422 static const struct nametable_core_uarch pic0_events[] = {
423 /* Alphabetical order of event name */
424
425 { "cycles_div_busy", 0x0, 0x14 },
426 { "fp_comp_ops_exe", 0x0, 0x10 },
427 { "idle_during_div", 0x0, 0x18 },
428
429 { "mem_load_retired", 0x0, 0xcb },
430 { "rs_uops_dispatched_port", 0x0, 0xa1 },
431 { "", 0x0, NT_END }
432 };
433
434 static const struct nametable_core_uarch pic1_events[] = {
435 /* Alphabetical order of event name */
436
437 { "delayed_bypass", 0x0, 0x19 },
438 { "div", 0x0, 0x13 },
439 { "fp_assist", 0x0, 0x11 },
440
441 { "mul", 0x0, 0x12 },
442 { "", 0x0, NT_END }
443 };
444
445 /* FFC entries must be in order */
446 static char *ffc_names_non_htt[] = {
447 "instr_retired.any",
448 "cpu_clk_unhalted.core",
449 "cpu_clk_unhalted.ref",
450 NULL
451 };
452
453 static char *ffc_names_htt[] = {
454 "instr_retired.any",
455 "cpu_clk_unhalted.thread",
456 "cpu_clk_unhalted.ref",
457 NULL
458 };
459
460 static char *ffc_genericnames[] = {
461 "PAPI_tot_ins",
462 "PAPI_tot_cyc",
463 "",
464 NULL
465 };
466
467 static char **ffc_names = NULL;
468 static char **ffc_allnames = NULL;
469 static char **gpc_names = NULL;
470 static uint32_t versionid;
471 static uint64_t num_gpc;
472 static uint64_t width_gpc;
473 static uint64_t mask_gpc;
474 static uint64_t num_ffc;
475 static uint64_t width_ffc;
476 static uint64_t mask_ffc;
477 static uint_t total_pmc;
478 static uint64_t control_ffc;
479 static uint64_t control_gpc;
480 static uint64_t control_mask;
481 static uint32_t arch_events_vector;
482
483 #define IMPL_NAME_LEN 100
484 static char core_impl_name[IMPL_NAME_LEN];
485
486 static const char *core_cpuref =
487 "See https://download.01.org/perfmon/index/ or Chapers 18 and 19 " \
488 "of the \"Intel 64 and IA-32 Architectures Software Developer's " \
489 "Manual Volume 3: System Programming Guide\" Order Number: " \
490 "325384-062US, March 2017.";
491
492
493 /* Architectural events */
494 #define ARCH_EVENTS_COMMON \
495 { 0xc0, 0x00, C_ALL, "inst_retired.any_p" }, \
496 { 0x3c, 0x01, C_ALL, "cpu_clk_unhalted.ref_p" }, \
497 { 0x2e, 0x4f, C_ALL, "longest_lat_cache.reference" }, \
498 { 0x2e, 0x41, C_ALL, "longest_lat_cache.miss" }, \
499 { 0xc4, 0x00, C_ALL, "br_inst_retired.all_branches" }, \
500 { 0xc5, 0x00, C_ALL, "br_misp_retired.all_branches" }
501
502 static const struct events_table_t arch_events_table_non_htt[] = {
503 { 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.core" },
504 ARCH_EVENTS_COMMON
505 };
506
507 static const struct events_table_t arch_events_table_htt[] = {
508 { 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.thread_p" },
509 ARCH_EVENTS_COMMON
510 };
511
512 static char *arch_genevents_table[] = {
513 "PAPI_tot_cyc", /* cpu_clk_unhalted.thread_p/core */
514 "PAPI_tot_ins", /* inst_retired.any_p */
515 "", /* cpu_clk_unhalted.ref_p */
516 "", /* longest_lat_cache.reference */
517 "", /* longest_lat_cache.miss */
518 "", /* br_inst_retired.all_branches */
519 "", /* br_misp_retired.all_branches */
520 };
521
522 static const struct events_table_t *arch_events_table = NULL;
523 static uint64_t known_arch_events;
524 static uint64_t known_ffc_num;
525 static const struct events_table_t *events_table = NULL;
526
527 /*
528 * Initialize string containing list of supported general-purpose counter
529 * events for processors of Penryn and Merom Family
530 */
531 static void
pcbe_init_core_uarch()532 pcbe_init_core_uarch()
533 {
534 const struct nametable_core_uarch *n;
535 const struct generic_events *k;
536 const struct nametable_core_uarch *picspecific_events;
537 const struct generic_events *picspecific_genericevents;
538 size_t common_size;
539 size_t size;
540 uint64_t i;
541
542 gpc_names = kmem_alloc(num_gpc * sizeof (char *), KM_SLEEP);
543
544 /* Calculate space needed to save all the common event names */
545 common_size = 0;
546 for (n = cmn_gpc_events_core_uarch; n->event_num != NT_END; n++) {
547 common_size += strlen(n->name) + 1;
548 }
549
550 for (k = cmn_generic_events; k->event_num != NT_END; k++) {
551 common_size += strlen(k->name) + 1;
552 }
553
554 for (i = 0; i < num_gpc; i++) {
555 size = 0;
556 picspecific_genericevents = NULL;
557
558 switch (i) {
559 case 0:
560 picspecific_events = pic0_events;
561 picspecific_genericevents = generic_events_pic0;
562 break;
563 case 1:
564 picspecific_events = pic1_events;
565 break;
566 default:
567 picspecific_events = NULL;
568 break;
569 }
570 if (picspecific_events != NULL) {
571 for (n = picspecific_events;
572 n->event_num != NT_END;
573 n++) {
574 size += strlen(n->name) + 1;
575 }
576 }
577 if (picspecific_genericevents != NULL) {
578 for (k = picspecific_genericevents;
579 k->event_num != NT_END; k++) {
580 size += strlen(k->name) + 1;
581 }
582 }
583
584 gpc_names[i] =
585 kmem_alloc(size + common_size + 1, KM_SLEEP);
586
587 gpc_names[i][0] = '\0';
588 if (picspecific_events != NULL) {
589 for (n = picspecific_events;
590 n->event_num != NT_END; n++) {
591 (void) strcat(gpc_names[i], n->name);
592 (void) strcat(gpc_names[i], ",");
593 }
594 }
595 if (picspecific_genericevents != NULL) {
596 for (k = picspecific_genericevents;
597 k->event_num != NT_END; k++) {
598 (void) strcat(gpc_names[i], k->name);
599 (void) strcat(gpc_names[i], ",");
600 }
601 }
602 for (n = cmn_gpc_events_core_uarch; n->event_num != NT_END;
603 n++) {
604 (void) strcat(gpc_names[i], n->name);
605 (void) strcat(gpc_names[i], ",");
606 }
607 for (k = cmn_generic_events; k->event_num != NT_END; k++) {
608 (void) strcat(gpc_names[i], k->name);
609 (void) strcat(gpc_names[i], ",");
610 }
611
612 /*
613 * Remove trailing comma.
614 */
615 gpc_names[i][common_size + size - 1] = '\0';
616 }
617 }
618
619 static int
core_pcbe_init(void)620 core_pcbe_init(void)
621 {
622 struct cpuid_regs cp;
623 size_t size;
624 uint64_t i;
625 uint64_t j;
626 uint64_t arch_events_vector_length;
627 size_t arch_events_string_length;
628 uint_t model, stepping;
629
630 if (cpuid_getvendor(CPU) != X86_VENDOR_Intel)
631 return (-1);
632
633 /* Obtain Basic CPUID information */
634 cp.cp_eax = 0x0;
635 (void) __cpuid_insn(&cp);
636
637 /* No Architectural Performance Monitoring Leaf returned by CPUID */
638 if (cp.cp_eax < 0xa) {
639 return (-1);
640 }
641
642 /* Obtain the Architectural Performance Monitoring Leaf */
643 cp.cp_eax = 0xa;
644 (void) __cpuid_insn(&cp);
645
646 versionid = cp.cp_eax & 0xFF;
647
648 /*
649 * Fixed-Function Counters (FFC)
650 *
651 * All Family 6 Model 15 and Model 23 processors have fixed-function
652 * counters. These counters were made Architectural with
653 * Family 6 Model 15 Stepping 9.
654 */
655 switch (versionid) {
656
657 case 0:
658 return (-1);
659
660 case 2:
661 num_ffc = cp.cp_edx & 0x1F;
662 width_ffc = (cp.cp_edx >> 5) & 0xFF;
663
664 /*
665 * Some processors have an errata (AW34) where
666 * versionid is reported as 2 when actually 1.
667 * In this case, fixed-function counters are
668 * model-specific as in Version 1.
669 */
670 if (num_ffc != 0) {
671 break;
672 }
673 /* FALLTHROUGH */
674 case 1:
675 num_ffc = 3;
676 width_ffc = 40;
677 versionid = 1;
678 break;
679
680 default:
681 num_ffc = cp.cp_edx & 0x1F;
682 width_ffc = (cp.cp_edx >> 5) & 0xFF;
683 break;
684 }
685
686
687 if (num_ffc >= 64)
688 return (-1);
689
690 /* Set HTT-specific names of architectural & FFC events */
691 if (is_x86_feature(x86_featureset, X86FSET_HTT)) {
692 ffc_names = ffc_names_htt;
693 arch_events_table = arch_events_table_htt;
694 known_arch_events =
695 sizeof (arch_events_table_htt) /
696 sizeof (struct events_table_t);
697 known_ffc_num =
698 sizeof (ffc_names_htt) / sizeof (char *);
699 } else {
700 ffc_names = ffc_names_non_htt;
701 arch_events_table = arch_events_table_non_htt;
702 known_arch_events =
703 sizeof (arch_events_table_non_htt) /
704 sizeof (struct events_table_t);
705 known_ffc_num =
706 sizeof (ffc_names_non_htt) / sizeof (char *);
707 }
708
709 if (num_ffc >= known_ffc_num) {
710 /*
711 * The system seems to have more fixed-function counters than
712 * what this PCBE is able to handle correctly. Default to the
713 * maximum number of fixed-function counters that this driver
714 * is aware of.
715 */
716 num_ffc = known_ffc_num - 1;
717 }
718
719 mask_ffc = BITMASK_XBITS(width_ffc);
720 control_ffc = BITMASK_XBITS(num_ffc);
721
722 /*
723 * General Purpose Counters (GPC)
724 */
725 num_gpc = (cp.cp_eax >> 8) & 0xFF;
726 width_gpc = (cp.cp_eax >> 16) & 0xFF;
727
728 if (num_gpc >= 64)
729 return (-1);
730
731 mask_gpc = BITMASK_XBITS(width_gpc);
732
733 control_gpc = BITMASK_XBITS(num_gpc);
734
735 control_mask = (control_ffc << 32) | control_gpc;
736
737 total_pmc = num_gpc + num_ffc;
738 if (total_pmc > 64) {
739 /* Too wide for the overflow bitmap */
740 return (-1);
741 }
742
743 /* FFC names */
744 ffc_allnames = kmem_alloc(num_ffc * sizeof (char *), KM_SLEEP);
745 for (i = 0; i < num_ffc; i++) {
746 ffc_allnames[i] = kmem_alloc(
747 strlen(ffc_names[i]) + strlen(ffc_genericnames[i]) + 2,
748 KM_SLEEP);
749
750 ffc_allnames[i][0] = '\0';
751 (void) strcat(ffc_allnames[i], ffc_names[i]);
752
753 /* Check if this ffc has a generic name */
754 if (strcmp(ffc_genericnames[i], "") != 0) {
755 (void) strcat(ffc_allnames[i], ",");
756 (void) strcat(ffc_allnames[i], ffc_genericnames[i]);
757 }
758 }
759
760 /* GPC events for Family 6 Models 15, 23 and 29 only */
761 if ((cpuid_getfamily(CPU) == 6) &&
762 ((cpuid_getmodel(CPU) == 15) || (cpuid_getmodel(CPU) == 23) ||
763 (cpuid_getmodel(CPU) == 29))) {
764 (void) snprintf(core_impl_name, IMPL_NAME_LEN,
765 "Core Microarchitecture");
766 pcbe_init_core_uarch();
767 return (0);
768 }
769
770 (void) snprintf(core_impl_name, IMPL_NAME_LEN,
771 "Intel Arch PerfMon v%d on Family %d Model %d",
772 versionid, cpuid_getfamily(CPU), cpuid_getmodel(CPU));
773
774 /*
775 * Architectural events
776 */
777 arch_events_vector_length = (cp.cp_eax >> 24) & 0xFF;
778
779 ASSERT(known_arch_events == arch_events_vector_length);
780
781 /*
782 * To handle the case where a new performance monitoring setup is run
783 * on a non-debug kernel
784 */
785 if (known_arch_events > arch_events_vector_length) {
786 known_arch_events = arch_events_vector_length;
787 } else {
788 arch_events_vector_length = known_arch_events;
789 }
790
791 arch_events_vector = cp.cp_ebx &
792 BITMASK_XBITS(arch_events_vector_length);
793
794 /*
795 * Process architectural and non-architectural events using GPC
796 */
797 if (num_gpc > 0) {
798
799 gpc_names = kmem_alloc(num_gpc * sizeof (char *), KM_SLEEP);
800
801 /* Calculate space required for the architectural gpc events */
802 arch_events_string_length = 0;
803 for (i = 0; i < known_arch_events; i++) {
804 if (((1U << i) & arch_events_vector) == 0) {
805 arch_events_string_length +=
806 strlen(arch_events_table[i].name) + 1;
807 if (strcmp(arch_genevents_table[i], "") != 0) {
808 arch_events_string_length +=
809 strlen(arch_genevents_table[i]) + 1;
810 }
811 }
812 }
813
814 /* Non-architectural events list */
815 model = cpuid_getmodel(CPU);
816 stepping = cpuid_getstep(CPU);
817 events_table = core_cpcgen_table(model, stepping);
818
819 for (i = 0; i < num_gpc; i++) {
820
821 /*
822 * Determine length of all supported event names
823 * (architectural + non-architectural)
824 */
825 size = arch_events_string_length;
826 for (j = 0; events_table != NULL &&
827 events_table[j].eventselect != NT_END;
828 j++) {
829 if (C(i) & events_table[j].supported_counters) {
830 size += strlen(events_table[j].name) +
831 1;
832 }
833 }
834
835 /* Allocate memory for this pics list */
836 gpc_names[i] = kmem_alloc(size + 1, KM_SLEEP);
837 gpc_names[i][0] = '\0';
838 if (size == 0) {
839 continue;
840 }
841
842 /*
843 * Create the list of all supported events
844 * (architectural + non-architectural)
845 */
846 for (j = 0; j < known_arch_events; j++) {
847 if (((1U << j) & arch_events_vector) == 0) {
848 (void) strcat(gpc_names[i],
849 arch_events_table[j].name);
850 (void) strcat(gpc_names[i], ",");
851 if (strcmp(
852 arch_genevents_table[j], "")
853 != 0) {
854 (void) strcat(gpc_names[i],
855 arch_genevents_table[j]);
856 (void) strcat(gpc_names[i],
857 ",");
858 }
859 }
860 }
861
862 for (j = 0; events_table != NULL &&
863 events_table[j].eventselect != NT_END;
864 j++) {
865 if (C(i) & events_table[j].supported_counters) {
866 (void) strcat(gpc_names[i],
867 events_table[j].name);
868 (void) strcat(gpc_names[i], ",");
869 }
870 }
871
872 /* Remove trailing comma */
873 gpc_names[i][size - 1] = '\0';
874 }
875 }
876
877 return (0);
878 }
879
core_pcbe_ncounters()880 static uint_t core_pcbe_ncounters()
881 {
882 return (total_pmc);
883 }
884
core_pcbe_impl_name(void)885 static const char *core_pcbe_impl_name(void)
886 {
887 return (core_impl_name);
888 }
889
core_pcbe_cpuref(void)890 static const char *core_pcbe_cpuref(void)
891 {
892 return (core_cpuref);
893 }
894
core_pcbe_list_events(uint_t picnum)895 static char *core_pcbe_list_events(uint_t picnum)
896 {
897 ASSERT(picnum < cpc_ncounters);
898
899 if (picnum < num_gpc) {
900 return (gpc_names[picnum]);
901 } else {
902 return (ffc_allnames[picnum - num_gpc]);
903 }
904 }
905
core_pcbe_list_attrs(void)906 static char *core_pcbe_list_attrs(void)
907 {
908 if (versionid >= 3) {
909 return ("edge,inv,umask,cmask,anythr");
910 } else {
911 return ("edge,pc,inv,umask,cmask");
912 }
913 }
914
915 static const struct nametable_core_uarch *
find_gpcevent_core_uarch(char * name,const struct nametable_core_uarch * nametable)916 find_gpcevent_core_uarch(char *name,
917 const struct nametable_core_uarch *nametable)
918 {
919 const struct nametable_core_uarch *n;
920 int compare_result = -1;
921
922 for (n = nametable; n->event_num != NT_END; n++) {
923 compare_result = strcmp(name, n->name);
924 if (compare_result <= 0) {
925 break;
926 }
927 }
928
929 if (compare_result == 0) {
930 return (n);
931 }
932
933 return (NULL);
934 }
935
936 static const struct generic_events *
find_generic_events(char * name,const struct generic_events * table)937 find_generic_events(char *name, const struct generic_events *table)
938 {
939 const struct generic_events *n;
940
941 for (n = table; n->event_num != NT_END; n++) {
942 if (strcmp(name, n->name) == 0) {
943 return (n);
944 };
945 }
946
947 return (NULL);
948 }
949
950 static const struct events_table_t *
find_gpcevent(char * name)951 find_gpcevent(char *name)
952 {
953 int i;
954
955 /* Search architectural events */
956 for (i = 0; i < known_arch_events; i++) {
957 if (strcmp(name, arch_events_table[i].name) == 0 ||
958 strcmp(name, arch_genevents_table[i]) == 0) {
959 if (((1U << i) & arch_events_vector) == 0) {
960 return (&arch_events_table[i]);
961 }
962 }
963 }
964
965 /* Search non-architectural events */
966 if (events_table != NULL) {
967 for (i = 0; events_table[i].eventselect != NT_END; i++) {
968 if (strcmp(name, events_table[i].name) == 0) {
969 return (&events_table[i]);
970 }
971 }
972 }
973
974 return (NULL);
975 }
976
977 static uint64_t
core_pcbe_event_coverage(char * event)978 core_pcbe_event_coverage(char *event)
979 {
980 uint64_t bitmap;
981 uint64_t bitmask;
982 const struct events_table_t *n;
983 int i;
984
985 bitmap = 0;
986
987 /* Is it an event that a GPC can track? */
988 if (versionid >= 3) {
989 n = find_gpcevent(event);
990 if (n != NULL) {
991 bitmap |= (n->supported_counters &
992 BITMASK_XBITS(num_gpc));
993 }
994 } else {
995 if (find_generic_events(event, cmn_generic_events) != NULL) {
996 bitmap |= BITMASK_XBITS(num_gpc);
997 } else if (find_generic_events(event,
998 generic_events_pic0) != NULL) {
999 bitmap |= 1ULL;
1000 } else if (find_gpcevent_core_uarch(event,
1001 cmn_gpc_events_core_uarch) != NULL) {
1002 bitmap |= BITMASK_XBITS(num_gpc);
1003 } else if (find_gpcevent_core_uarch(event, pic0_events) !=
1004 NULL) {
1005 bitmap |= 1ULL;
1006 } else if (find_gpcevent_core_uarch(event, pic1_events) !=
1007 NULL) {
1008 bitmap |= 1ULL << 1;
1009 }
1010 }
1011
1012 /* Check if the event can be counted in the fixed-function counters */
1013 if (num_ffc > 0) {
1014 bitmask = 1ULL << num_gpc;
1015 for (i = 0; i < num_ffc; i++) {
1016 if (strcmp(event, ffc_names[i]) == 0) {
1017 bitmap |= bitmask;
1018 } else if (strcmp(event, ffc_genericnames[i]) == 0) {
1019 bitmap |= bitmask;
1020 }
1021 bitmask = bitmask << 1;
1022 }
1023 }
1024
1025 return (bitmap);
1026 }
1027
1028 static uint64_t
core_pcbe_overflow_bitmap(void)1029 core_pcbe_overflow_bitmap(void)
1030 {
1031 uint64_t interrupt_status;
1032 uint64_t intrbits_ffc;
1033 uint64_t intrbits_gpc;
1034 extern int kcpc_hw_overflow_intr_installed;
1035 uint64_t overflow_bitmap;
1036
1037 RDMSR(PERF_GLOBAL_STATUS, interrupt_status);
1038 WRMSR(PERF_GLOBAL_OVF_CTRL, interrupt_status);
1039
1040 interrupt_status = interrupt_status & control_mask;
1041 intrbits_ffc = (interrupt_status >> 32) & control_ffc;
1042 intrbits_gpc = interrupt_status & control_gpc;
1043 overflow_bitmap = (intrbits_ffc << num_gpc) | intrbits_gpc;
1044
1045 ASSERT(kcpc_hw_overflow_intr_installed);
1046 (*kcpc_hw_enable_cpc_intr)();
1047
1048 return (overflow_bitmap);
1049 }
1050
1051 static int
check_cpc_securitypolicy(core_pcbe_config_t * conf,const struct nametable_core_uarch * n)1052 check_cpc_securitypolicy(core_pcbe_config_t *conf,
1053 const struct nametable_core_uarch *n)
1054 {
1055 if (conf->core_ctl & n->restricted_bits) {
1056 if (secpolicy_cpc_cpu(crgetcred()) != 0) {
1057 return (CPC_ATTR_REQUIRES_PRIVILEGE);
1058 }
1059 }
1060 return (0);
1061 }
1062
1063 static int
configure_gpc(uint_t picnum,char * event,uint64_t preset,uint32_t flags,uint_t nattrs,kcpc_attr_t * attrs,void ** data)1064 configure_gpc(uint_t picnum, char *event, uint64_t preset, uint32_t flags,
1065 uint_t nattrs, kcpc_attr_t *attrs, void **data)
1066 {
1067 core_pcbe_config_t conf;
1068 const struct nametable_core_uarch *n;
1069 const struct generic_events *k = NULL;
1070 const struct nametable_core_uarch *m;
1071 const struct nametable_core_uarch *picspecific_events;
1072 struct nametable_core_uarch nt_raw = { "", 0x0, 0x0 };
1073 uint_t i;
1074 long event_num;
1075 const struct events_table_t *eventcode;
1076
1077 if (((preset & BITS_EXTENDED_FROM_31) != 0) &&
1078 ((preset & BITS_EXTENDED_FROM_31) !=
1079 BITS_EXTENDED_FROM_31)) {
1080
1081 /*
1082 * Bits beyond bit-31 in the general-purpose counters can only
1083 * be written to by extension of bit 31. We cannot preset
1084 * these bits to any value other than all 1s or all 0s.
1085 */
1086 return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1087 }
1088
1089 if (versionid >= 3) {
1090 eventcode = find_gpcevent(event);
1091 if (eventcode != NULL) {
1092 if ((C(picnum) & eventcode->supported_counters) == 0) {
1093 return (CPC_PIC_NOT_CAPABLE);
1094 }
1095 if (nattrs > 0 &&
1096 (strncmp("PAPI_", event, 5) == 0)) {
1097 return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1098 }
1099 conf.core_ctl = eventcode->eventselect;
1100 conf.core_ctl |= eventcode->unitmask <<
1101 CORE_UMASK_SHIFT;
1102 } else {
1103 /* Event specified as raw event code */
1104 if (ddi_strtol(event, NULL, 0, &event_num) != 0) {
1105 return (CPC_INVALID_EVENT);
1106 }
1107 conf.core_ctl = event_num & 0xFF;
1108 }
1109 } else {
1110 if ((k = find_generic_events(event, cmn_generic_events)) !=
1111 NULL ||
1112 (picnum == 0 &&
1113 (k = find_generic_events(event, generic_events_pic0)) !=
1114 NULL)) {
1115 if (nattrs > 0) {
1116 return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1117 }
1118 conf.core_ctl = k->event_num;
1119 conf.core_ctl |= k->umask << CORE_UMASK_SHIFT;
1120 } else {
1121 /* Not a generic event */
1122
1123 n = find_gpcevent_core_uarch(event,
1124 cmn_gpc_events_core_uarch);
1125 if (n == NULL) {
1126 switch (picnum) {
1127 case 0:
1128 picspecific_events =
1129 pic0_events;
1130 break;
1131 case 1:
1132 picspecific_events =
1133 pic1_events;
1134 break;
1135 default:
1136 picspecific_events = NULL;
1137 break;
1138 }
1139 if (picspecific_events != NULL) {
1140 n = find_gpcevent_core_uarch(event,
1141 picspecific_events);
1142 }
1143 }
1144 if (n == NULL) {
1145
1146 /*
1147 * Check if this is a case where the event was
1148 * specified directly by its event number
1149 * instead of its name string.
1150 */
1151 if (ddi_strtol(event, NULL, 0, &event_num) !=
1152 0) {
1153 return (CPC_INVALID_EVENT);
1154 }
1155
1156 event_num = event_num & 0xFF;
1157
1158 /*
1159 * Search the event table to find out if the
1160 * event specified has an privilege
1161 * requirements. Currently none of the
1162 * pic-specific counters have any privilege
1163 * requirements. Hence only the table
1164 * cmn_gpc_events_core_uarch is searched.
1165 */
1166 for (m = cmn_gpc_events_core_uarch;
1167 m->event_num != NT_END;
1168 m++) {
1169 if (event_num == m->event_num) {
1170 break;
1171 }
1172 }
1173 if (m->event_num == NT_END) {
1174 nt_raw.event_num = (uint8_t)event_num;
1175 n = &nt_raw;
1176 } else {
1177 n = m;
1178 }
1179 }
1180 conf.core_ctl = n->event_num; /* Event Select */
1181 }
1182 }
1183
1184
1185 conf.core_picno = picnum;
1186 conf.core_pictype = CORE_GPC;
1187 conf.core_rawpic = preset & mask_gpc;
1188
1189 conf.core_pes = GPC_BASE_PES + picnum;
1190 conf.core_pmc = GPC_BASE_PMC + picnum;
1191
1192 for (i = 0; i < nattrs; i++) {
1193 if (strncmp(attrs[i].ka_name, "umask", 6) == 0) {
1194 if ((attrs[i].ka_val | CORE_UMASK_MASK) !=
1195 CORE_UMASK_MASK) {
1196 return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1197 }
1198 /* Clear out the default umask */
1199 conf.core_ctl &= ~ (CORE_UMASK_MASK <<
1200 CORE_UMASK_SHIFT);
1201 /* Use the user provided umask */
1202 conf.core_ctl |= attrs[i].ka_val <<
1203 CORE_UMASK_SHIFT;
1204 } else if (strncmp(attrs[i].ka_name, "edge", 6) == 0) {
1205 if (attrs[i].ka_val != 0)
1206 conf.core_ctl |= CORE_EDGE;
1207 } else if (strncmp(attrs[i].ka_name, "inv", 4) == 0) {
1208 if (attrs[i].ka_val != 0)
1209 conf.core_ctl |= CORE_INV;
1210 } else if (strncmp(attrs[i].ka_name, "cmask", 6) == 0) {
1211 if ((attrs[i].ka_val | CORE_CMASK_MASK) !=
1212 CORE_CMASK_MASK) {
1213 return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1214 }
1215 conf.core_ctl |= attrs[i].ka_val <<
1216 CORE_CMASK_SHIFT;
1217 } else if (strncmp(attrs[i].ka_name, "anythr", 7) ==
1218 0) {
1219 if (versionid < 3)
1220 return (CPC_INVALID_ATTRIBUTE);
1221 if (secpolicy_cpc_cpu(crgetcred()) != 0) {
1222 return (CPC_ATTR_REQUIRES_PRIVILEGE);
1223 }
1224 if (attrs[i].ka_val != 0)
1225 conf.core_ctl |= CORE_ANYTHR;
1226 } else {
1227 return (CPC_INVALID_ATTRIBUTE);
1228 }
1229 }
1230
1231 if (flags & CPC_COUNT_USER)
1232 conf.core_ctl |= CORE_USR;
1233 if (flags & CPC_COUNT_SYSTEM)
1234 conf.core_ctl |= CORE_OS;
1235 if (flags & CPC_OVF_NOTIFY_EMT)
1236 conf.core_ctl |= CORE_INT;
1237 conf.core_ctl |= CORE_EN;
1238
1239 if (versionid < 3 && k == NULL) {
1240 if (check_cpc_securitypolicy(&conf, n) != 0) {
1241 return (CPC_ATTR_REQUIRES_PRIVILEGE);
1242 }
1243 }
1244
1245 *data = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP);
1246 *((core_pcbe_config_t *)*data) = conf;
1247
1248 return (0);
1249 }
1250
1251 static int
configure_ffc(uint_t picnum,char * event,uint64_t preset,uint32_t flags,uint_t nattrs,kcpc_attr_t * attrs,void ** data)1252 configure_ffc(uint_t picnum, char *event, uint64_t preset, uint32_t flags,
1253 uint_t nattrs, kcpc_attr_t *attrs, void **data)
1254 {
1255 core_pcbe_config_t *conf;
1256 uint_t i;
1257
1258 if (picnum - num_gpc >= num_ffc) {
1259 return (CPC_INVALID_PICNUM);
1260 }
1261
1262 if ((strcmp(ffc_names[picnum-num_gpc], event) != 0) &&
1263 (strcmp(ffc_genericnames[picnum-num_gpc], event) != 0)) {
1264 return (CPC_INVALID_EVENT);
1265 }
1266
1267 if ((versionid < 3) && (nattrs != 0)) {
1268 return (CPC_INVALID_ATTRIBUTE);
1269 }
1270
1271 conf = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP);
1272 conf->core_ctl = 0;
1273
1274 for (i = 0; i < nattrs; i++) {
1275 if (strncmp(attrs[i].ka_name, "anythr", 7) == 0) {
1276 if (secpolicy_cpc_cpu(crgetcred()) != 0) {
1277 kmem_free(conf, sizeof (core_pcbe_config_t));
1278 return (CPC_ATTR_REQUIRES_PRIVILEGE);
1279 }
1280 if (attrs[i].ka_val != 0) {
1281 conf->core_ctl |= CORE_FFC_ANYTHR;
1282 }
1283 } else {
1284 kmem_free(conf, sizeof (core_pcbe_config_t));
1285 return (CPC_INVALID_ATTRIBUTE);
1286 }
1287 }
1288
1289 conf->core_picno = picnum;
1290 conf->core_pictype = CORE_FFC;
1291 conf->core_rawpic = preset & mask_ffc;
1292 conf->core_pmc = FFC_BASE_PMC + (picnum - num_gpc);
1293
1294 /* All fixed-function counters have the same control register */
1295 conf->core_pes = PERF_FIXED_CTR_CTRL;
1296
1297 if (flags & CPC_COUNT_USER)
1298 conf->core_ctl |= CORE_FFC_USR_EN;
1299 if (flags & CPC_COUNT_SYSTEM)
1300 conf->core_ctl |= CORE_FFC_OS_EN;
1301 if (flags & CPC_OVF_NOTIFY_EMT)
1302 conf->core_ctl |= CORE_FFC_PMI;
1303
1304 *data = conf;
1305 return (0);
1306 }
1307
1308 /*ARGSUSED*/
1309 static int
core_pcbe_configure(uint_t picnum,char * event,uint64_t preset,uint32_t flags,uint_t nattrs,kcpc_attr_t * attrs,void ** data,void * token)1310 core_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
1311 uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
1312 void *token)
1313 {
1314 int ret;
1315 core_pcbe_config_t *conf;
1316
1317 /*
1318 * If we've been handed an existing configuration, we need only preset
1319 * the counter value.
1320 */
1321 if (*data != NULL) {
1322 conf = *data;
1323 ASSERT(conf->core_pictype == CORE_GPC ||
1324 conf->core_pictype == CORE_FFC);
1325 if (conf->core_pictype == CORE_GPC)
1326 conf->core_rawpic = preset & mask_gpc;
1327 else /* CORE_FFC */
1328 conf->core_rawpic = preset & mask_ffc;
1329 return (0);
1330 }
1331
1332 if (picnum >= total_pmc) {
1333 return (CPC_INVALID_PICNUM);
1334 }
1335
1336 if (picnum < num_gpc) {
1337 ret = configure_gpc(picnum, event, preset, flags,
1338 nattrs, attrs, data);
1339 } else {
1340 ret = configure_ffc(picnum, event, preset, flags,
1341 nattrs, attrs, data);
1342 }
1343 return (ret);
1344 }
1345
1346 static void
core_pcbe_program(void * token)1347 core_pcbe_program(void *token)
1348 {
1349 core_pcbe_config_t *cfg;
1350 uint64_t perf_global_ctrl;
1351 uint64_t perf_fixed_ctr_ctrl;
1352 uint64_t curcr4;
1353
1354 core_pcbe_allstop();
1355
1356 curcr4 = getcr4();
1357 if (kcpc_allow_nonpriv(token))
1358 /* Allow RDPMC at any ring level */
1359 setcr4(curcr4 | CR4_PCE);
1360 else
1361 /* Allow RDPMC only at ring 0 */
1362 setcr4(curcr4 & ~CR4_PCE);
1363
1364 /* Clear any overflow indicators before programming the counters */
1365 WRMSR(PERF_GLOBAL_OVF_CTRL, MASK_CONDCHGD_OVFBUFFER | control_mask);
1366
1367 cfg = NULL;
1368 perf_global_ctrl = 0;
1369 perf_fixed_ctr_ctrl = 0;
1370 cfg = (core_pcbe_config_t *)kcpc_next_config(token, cfg, NULL);
1371 while (cfg != NULL) {
1372 ASSERT(cfg->core_pictype == CORE_GPC ||
1373 cfg->core_pictype == CORE_FFC);
1374
1375 if (cfg->core_pictype == CORE_GPC) {
1376 /*
1377 * General-purpose counter registers have write
1378 * restrictions where only the lower 32-bits can be
1379 * written to. The rest of the relevant bits are
1380 * written to by extension from bit 31 (all ZEROS if
1381 * bit-31 is ZERO and all ONE if bit-31 is ONE). This
1382 * makes it possible to write to the counter register
1383 * only values that have all ONEs or all ZEROs in the
1384 * higher bits.
1385 */
1386 if (((cfg->core_rawpic & BITS_EXTENDED_FROM_31) == 0) ||
1387 ((cfg->core_rawpic & BITS_EXTENDED_FROM_31) ==
1388 BITS_EXTENDED_FROM_31)) {
1389 /*
1390 * Straighforward case where the higher bits
1391 * are all ZEROs or all ONEs.
1392 */
1393 WRMSR(cfg->core_pmc,
1394 (cfg->core_rawpic & mask_gpc));
1395 } else {
1396 /*
1397 * The high order bits are not all the same.
1398 * We save what is currently in the registers
1399 * and do not write to it. When we want to do
1400 * a read from this register later (in
1401 * core_pcbe_sample()), we subtract the value
1402 * we save here to get the actual event count.
1403 *
1404 * NOTE: As a result, we will not get overflow
1405 * interrupts as expected.
1406 */
1407 RDMSR(cfg->core_pmc, cfg->core_rawpic);
1408 cfg->core_rawpic = cfg->core_rawpic & mask_gpc;
1409 }
1410 WRMSR(cfg->core_pes, cfg->core_ctl);
1411 perf_global_ctrl |= 1ull << cfg->core_picno;
1412 } else {
1413 /*
1414 * Unlike the general-purpose counters, all relevant
1415 * bits of fixed-function counters can be written to.
1416 */
1417 WRMSR(cfg->core_pmc, cfg->core_rawpic & mask_ffc);
1418
1419 /*
1420 * Collect the control bits for all the
1421 * fixed-function counters and write it at one shot
1422 * later in this function
1423 */
1424 perf_fixed_ctr_ctrl |= cfg->core_ctl <<
1425 ((cfg->core_picno - num_gpc) * CORE_FFC_ATTR_SIZE);
1426 perf_global_ctrl |=
1427 1ull << (cfg->core_picno - num_gpc + 32);
1428 }
1429
1430 cfg = (core_pcbe_config_t *)
1431 kcpc_next_config(token, cfg, NULL);
1432 }
1433
1434 /* Enable all the counters */
1435 WRMSR(PERF_FIXED_CTR_CTRL, perf_fixed_ctr_ctrl);
1436 WRMSR(PERF_GLOBAL_CTRL, perf_global_ctrl);
1437 }
1438
1439 static void
core_pcbe_allstop(void)1440 core_pcbe_allstop(void)
1441 {
1442 /* Disable all the counters together */
1443 WRMSR(PERF_GLOBAL_CTRL, ALL_STOPPED);
1444
1445 setcr4(getcr4() & ~CR4_PCE);
1446 }
1447
1448 static void
core_pcbe_sample(void * token)1449 core_pcbe_sample(void *token)
1450 {
1451 uint64_t *daddr;
1452 uint64_t curpic;
1453 core_pcbe_config_t *cfg;
1454 uint64_t counter_mask;
1455
1456 cfg = (core_pcbe_config_t *)kcpc_next_config(token, NULL, &daddr);
1457 while (cfg != NULL) {
1458 ASSERT(cfg->core_pictype == CORE_GPC ||
1459 cfg->core_pictype == CORE_FFC);
1460
1461 curpic = rdmsr(cfg->core_pmc);
1462
1463 DTRACE_PROBE4(core__pcbe__sample,
1464 uint64_t, cfg->core_pmc,
1465 uint64_t, curpic,
1466 uint64_t, cfg->core_rawpic,
1467 uint64_t, *daddr);
1468
1469 if (cfg->core_pictype == CORE_GPC) {
1470 counter_mask = mask_gpc;
1471 } else {
1472 counter_mask = mask_ffc;
1473 }
1474 curpic = curpic & counter_mask;
1475 if (curpic >= cfg->core_rawpic) {
1476 *daddr += curpic - cfg->core_rawpic;
1477 } else {
1478 /* Counter overflowed since our last sample */
1479 *daddr += counter_mask - (cfg->core_rawpic - curpic) +
1480 1;
1481 }
1482 cfg->core_rawpic = *daddr & counter_mask;
1483
1484 cfg =
1485 (core_pcbe_config_t *)kcpc_next_config(token, cfg, &daddr);
1486 }
1487 }
1488
1489 static void
core_pcbe_free(void * config)1490 core_pcbe_free(void *config)
1491 {
1492 kmem_free(config, sizeof (core_pcbe_config_t));
1493 }
1494
1495 static struct modlpcbe core_modlpcbe = {
1496 &mod_pcbeops,
1497 "Core Performance Counters",
1498 &core_pcbe_ops
1499 };
1500
1501 static struct modlinkage core_modl = {
1502 MODREV_1,
1503 &core_modlpcbe,
1504 };
1505
1506 int
_init(void)1507 _init(void)
1508 {
1509 if (core_pcbe_init() != 0) {
1510 return (ENOTSUP);
1511 }
1512 return (mod_install(&core_modl));
1513 }
1514
1515 int
_fini(void)1516 _fini(void)
1517 {
1518 return (mod_remove(&core_modl));
1519 }
1520
1521 int
_info(struct modinfo * mi)1522 _info(struct modinfo *mi)
1523 {
1524 return (mod_info(&core_modl, mi));
1525 }
1526