1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Performance Counter Back-End for Intel processors supporting Architectural 28 * Performance Monitoring. 29 */ 30 31 #include <sys/cpuvar.h> 32 #include <sys/param.h> 33 #include <sys/cpc_impl.h> 34 #include <sys/cpc_pcbe.h> 35 #include <sys/modctl.h> 36 #include <sys/inttypes.h> 37 #include <sys/systm.h> 38 #include <sys/cmn_err.h> 39 #include <sys/x86_archext.h> 40 #include <sys/sdt.h> 41 #include <sys/archsystm.h> 42 #include <sys/privregs.h> 43 #include <sys/ddi.h> 44 #include <sys/sunddi.h> 45 #include <sys/cred.h> 46 #include <sys/policy.h> 47 48 static int core_pcbe_init(void); 49 static uint_t core_pcbe_ncounters(void); 50 static const char *core_pcbe_impl_name(void); 51 static const char *core_pcbe_cpuref(void); 52 static char *core_pcbe_list_events(uint_t picnum); 53 static char *core_pcbe_list_attrs(void); 54 static uint64_t core_pcbe_event_coverage(char *event); 55 static uint64_t core_pcbe_overflow_bitmap(void); 56 static int core_pcbe_configure(uint_t picnum, char *event, uint64_t preset, 57 uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data, 58 void *token); 59 static void core_pcbe_program(void *token); 60 static void core_pcbe_allstop(void); 61 static void core_pcbe_sample(void *token); 62 static void core_pcbe_free(void *config); 63 64 #define FALSE 0 65 #define TRUE 1 66 67 /* Counter Type */ 68 #define CORE_GPC 0 /* General-Purpose Counter (GPC) */ 69 #define CORE_FFC 1 /* Fixed-Function Counter (FFC) */ 70 71 /* MSR Addresses */ 72 #define GPC_BASE_PMC 0x00c1 /* First GPC */ 73 #define GPC_BASE_PES 0x0186 /* First GPC Event Select register */ 74 #define FFC_BASE_PMC 0x0309 /* First FFC */ 75 #define PERF_FIXED_CTR_CTRL 0x038d /* Used to enable/disable FFCs */ 76 #define PERF_GLOBAL_STATUS 0x038e /* Overflow status register */ 77 #define PERF_GLOBAL_CTRL 0x038f /* Used to enable/disable counting */ 78 #define PERF_GLOBAL_OVF_CTRL 0x0390 /* Used to clear overflow status */ 79 80 /* 81 * Processor Event Select register fields 82 */ 83 #define CORE_USR (1ULL << 16) /* Count while not in ring 0 */ 84 #define CORE_OS (1ULL << 17) /* Count while in ring 0 */ 85 #define CORE_EDGE (1ULL << 18) /* Enable edge detection */ 86 #define CORE_PC (1ULL << 19) /* Enable pin control */ 87 #define CORE_INT (1ULL << 20) /* Enable interrupt on overflow */ 88 #define CORE_EN (1ULL << 22) /* Enable counting */ 89 #define CORE_INV (1ULL << 23) /* Invert the CMASK */ 90 #define CORE_ANYTHR (1ULL << 21) /* Count event for any thread on core */ 91 92 #define CORE_UMASK_SHIFT 8 93 #define CORE_UMASK_MASK 0xffu 94 #define CORE_CMASK_SHIFT 24 95 #define CORE_CMASK_MASK 0xffu 96 97 /* 98 * Fixed-function counter attributes 99 */ 100 #define CORE_FFC_OS_EN (1ULL << 0) /* Count while not in ring 0 */ 101 #define CORE_FFC_USR_EN (1ULL << 1) /* Count while in ring 1 */ 102 #define CORE_FFC_ANYTHR (1ULL << 2) /* Count event for any thread on core */ 103 #define CORE_FFC_PMI (1ULL << 3) /* Enable interrupt on overflow */ 104 105 /* 106 * Number of bits for specifying each FFC's attributes in the control register 107 */ 108 #define CORE_FFC_ATTR_SIZE 4 109 110 /* 111 * CondChgd and OvfBuffer fields of global status and overflow control registers 112 */ 113 #define CONDCHGD (1ULL << 63) 114 #define OVFBUFFER (1ULL << 62) 115 #define MASK_CONDCHGD_OVFBUFFER (CONDCHGD | OVFBUFFER) 116 117 #define ALL_STOPPED 0ULL 118 119 #define BITMASK_XBITS(x) ((1ull << (x)) - 1ull) 120 121 /* 122 * Only the lower 32-bits can be written to in the general-purpose 123 * counters. The higher bits are extended from bit 31; all ones if 124 * bit 31 is one and all zeros otherwise. 125 * 126 * The fixed-function counters do not have this restriction. 127 */ 128 #define BITS_EXTENDED_FROM_31 (BITMASK_XBITS(width_gpc) & ~BITMASK_XBITS(31)) 129 130 #define WRMSR(msr, value) \ 131 wrmsr((msr), (value)); \ 132 DTRACE_PROBE2(wrmsr, uint64_t, (msr), uint64_t, (value)); 133 134 #define RDMSR(msr, value) \ 135 (value) = rdmsr((msr)); \ 136 DTRACE_PROBE2(rdmsr, uint64_t, (msr), uint64_t, (value)); 137 138 typedef struct core_pcbe_config { 139 uint64_t core_rawpic; 140 uint64_t core_ctl; /* Event Select bits */ 141 uint64_t core_pmc; /* Counter register address */ 142 uint64_t core_pes; /* Event Select register address */ 143 uint_t core_picno; 144 uint8_t core_pictype; /* CORE_GPC or CORE_FFC */ 145 } core_pcbe_config_t; 146 147 pcbe_ops_t core_pcbe_ops = { 148 PCBE_VER_1, /* pcbe_ver */ 149 CPC_CAP_OVERFLOW_INTERRUPT | CPC_CAP_OVERFLOW_PRECISE, /* pcbe_caps */ 150 core_pcbe_ncounters, /* pcbe_ncounters */ 151 core_pcbe_impl_name, /* pcbe_impl_name */ 152 core_pcbe_cpuref, /* pcbe_cpuref */ 153 core_pcbe_list_events, /* pcbe_list_events */ 154 core_pcbe_list_attrs, /* pcbe_list_attrs */ 155 core_pcbe_event_coverage, /* pcbe_event_coverage */ 156 core_pcbe_overflow_bitmap, /* pcbe_overflow_bitmap */ 157 core_pcbe_configure, /* pcbe_configure */ 158 core_pcbe_program, /* pcbe_program */ 159 core_pcbe_allstop, /* pcbe_allstop */ 160 core_pcbe_sample, /* pcbe_sample */ 161 core_pcbe_free /* pcbe_free */ 162 }; 163 164 struct nametable_core_uarch { 165 const char *name; 166 uint64_t restricted_bits; 167 uint8_t event_num; 168 }; 169 170 #define NT_END 0xFF 171 172 /* 173 * Counting an event for all cores or all bus agents requires cpc_cpu privileges 174 */ 175 #define ALL_CORES (1ULL << 15) 176 #define ALL_AGENTS (1ULL << 13) 177 178 /* 179 * The events listed in the following table can be counted on all 180 * general-purpose counters on processors that are of Penryn and Merom Family 181 */ 182 static const struct nametable_core_uarch cmn_gpc_events_core_uarch[] = { 183 /* Alphabetical order of event name */ 184 185 { "baclears", 0x0, 0xe6 }, 186 { "bogus_br", 0x0, 0xe4 }, 187 { "br_bac_missp_exec", 0x0, 0x8a }, 188 189 { "br_call_exec", 0x0, 0x92 }, 190 { "br_call_missp_exec", 0x0, 0x93 }, 191 { "br_cnd_exec", 0x0, 0x8b }, 192 193 { "br_cnd_missp_exec", 0x0, 0x8c }, 194 { "br_ind_call_exec", 0x0, 0x94 }, 195 { "br_ind_exec", 0x0, 0x8d }, 196 197 { "br_ind_missp_exec", 0x0, 0x8e }, 198 { "br_inst_decoded", 0x0, 0xe0 }, 199 { "br_inst_exec", 0x0, 0x88 }, 200 201 { "br_inst_retired", 0x0, 0xc4 }, 202 { "br_inst_retired_mispred", 0x0, 0xc5 }, 203 { "br_missp_exec", 0x0, 0x89 }, 204 205 { "br_ret_bac_missp_exec", 0x0, 0x91 }, 206 { "br_ret_exec", 0x0, 0x8f }, 207 { "br_ret_missp_exec", 0x0, 0x90 }, 208 209 { "br_tkn_bubble_1", 0x0, 0x97 }, 210 { "br_tkn_bubble_2", 0x0, 0x98 }, 211 { "bus_bnr_drv", ALL_AGENTS, 0x61 }, 212 213 { "bus_data_rcv", ALL_CORES, 0x64 }, 214 { "bus_drdy_clocks", ALL_AGENTS, 0x62 }, 215 { "bus_hit_drv", ALL_AGENTS, 0x7a }, 216 217 { "bus_hitm_drv", ALL_AGENTS, 0x7b }, 218 { "bus_io_wait", ALL_CORES, 0x7f }, 219 { "bus_lock_clocks", ALL_CORES | ALL_AGENTS, 0x63 }, 220 221 { "bus_request_outstanding", ALL_CORES | ALL_AGENTS, 0x60 }, 222 { "bus_trans_any", ALL_CORES | ALL_AGENTS, 0x70 }, 223 { "bus_trans_brd", ALL_CORES | ALL_AGENTS, 0x65 }, 224 225 { "bus_trans_burst", ALL_CORES | ALL_AGENTS, 0x6e }, 226 { "bus_trans_def", ALL_CORES | ALL_AGENTS, 0x6d }, 227 { "bus_trans_ifetch", ALL_CORES | ALL_AGENTS, 0x68 }, 228 229 { "bus_trans_inval", ALL_CORES | ALL_AGENTS, 0x69 }, 230 { "bus_trans_io", ALL_CORES | ALL_AGENTS, 0x6c }, 231 { "bus_trans_mem", ALL_CORES | ALL_AGENTS, 0x6f }, 232 233 { "bus_trans_p", ALL_CORES | ALL_AGENTS, 0x6b }, 234 { "bus_trans_pwr", ALL_CORES | ALL_AGENTS, 0x6a }, 235 { "bus_trans_rfo", ALL_CORES | ALL_AGENTS, 0x66 }, 236 237 { "bus_trans_wb", ALL_CORES | ALL_AGENTS, 0x67 }, 238 { "busq_empty", ALL_CORES, 0x7d }, 239 { "cmp_snoop", ALL_CORES, 0x78 }, 240 241 { "cpu_clk_unhalted", 0x0, 0x3c }, 242 { "cycles_int", 0x0, 0xc6 }, 243 { "cycles_l1i_mem_stalled", 0x0, 0x86 }, 244 245 { "dtlb_misses", 0x0, 0x08 }, 246 { "eist_trans", 0x0, 0x3a }, 247 { "esp", 0x0, 0xab }, 248 249 { "ext_snoop", ALL_AGENTS, 0x77 }, 250 { "fp_mmx_trans", 0x0, 0xcc }, 251 { "hw_int_rcv", 0x0, 0xc8 }, 252 253 { "ild_stall", 0x0, 0x87 }, 254 { "inst_queue", 0x0, 0x83 }, 255 { "inst_retired", 0x0, 0xc0 }, 256 257 { "itlb", 0x0, 0x82 }, 258 { "itlb_miss_retired", 0x0, 0xc9 }, 259 { "l1d_all_ref", 0x0, 0x43 }, 260 261 { "l1d_cache_ld", 0x0, 0x40 }, 262 { "l1d_cache_lock", 0x0, 0x42 }, 263 { "l1d_cache_st", 0x0, 0x41 }, 264 265 { "l1d_m_evict", 0x0, 0x47 }, 266 { "l1d_m_repl", 0x0, 0x46 }, 267 { "l1d_pend_miss", 0x0, 0x48 }, 268 269 { "l1d_prefetch", 0x0, 0x4e }, 270 { "l1d_repl", 0x0, 0x45 }, 271 { "l1d_split", 0x0, 0x49 }, 272 273 { "l1i_misses", 0x0, 0x81 }, 274 { "l1i_reads", 0x0, 0x80 }, 275 { "l2_ads", ALL_CORES, 0x21 }, 276 277 { "l2_dbus_busy_rd", ALL_CORES, 0x23 }, 278 { "l2_ifetch", ALL_CORES, 0x28 }, 279 { "l2_ld", ALL_CORES, 0x29 }, 280 281 { "l2_lines_in", ALL_CORES, 0x24 }, 282 { "l2_lines_out", ALL_CORES, 0x26 }, 283 { "l2_lock", ALL_CORES, 0x2b }, 284 285 { "l2_m_lines_in", ALL_CORES, 0x25 }, 286 { "l2_m_lines_out", ALL_CORES, 0x27 }, 287 { "l2_no_req", ALL_CORES, 0x32 }, 288 289 { "l2_reject_busq", ALL_CORES, 0x30 }, 290 { "l2_rqsts", ALL_CORES, 0x2e }, 291 { "l2_st", ALL_CORES, 0x2a }, 292 293 { "load_block", 0x0, 0x03 }, 294 { "load_hit_pre", 0x0, 0x4c }, 295 { "machine_nukes", 0x0, 0xc3 }, 296 297 { "macro_insts", 0x0, 0xaa }, 298 { "memory_disambiguation", 0x0, 0x09 }, 299 { "misalign_mem_ref", 0x0, 0x05 }, 300 { "page_walks", 0x0, 0x0c }, 301 302 { "pref_rqsts_dn", 0x0, 0xf8 }, 303 { "pref_rqsts_up", 0x0, 0xf0 }, 304 { "rat_stalls", 0x0, 0xd2 }, 305 306 { "resource_stalls", 0x0, 0xdc }, 307 { "rs_uops_dispatched", 0x0, 0xa0 }, 308 { "seg_reg_renames", 0x0, 0xd5 }, 309 310 { "seg_rename_stalls", 0x0, 0xd4 }, 311 { "segment_reg_loads", 0x0, 0x06 }, 312 { "simd_assist", 0x0, 0xcd }, 313 314 { "simd_comp_inst_retired", 0x0, 0xca }, 315 { "simd_inst_retired", 0x0, 0xc7 }, 316 { "simd_instr_retired", 0x0, 0xce }, 317 318 { "simd_sat_instr_retired", 0x0, 0xcf }, 319 { "simd_sat_uop_exec", 0x0, 0xb1 }, 320 { "simd_uop_type_exec", 0x0, 0xb3 }, 321 322 { "simd_uops_exec", 0x0, 0xb0 }, 323 { "snoop_stall_drv", ALL_CORES | ALL_AGENTS, 0x7e }, 324 { "sse_pre_exec", 0x0, 0x07 }, 325 326 { "sse_pre_miss", 0x0, 0x4b }, 327 { "store_block", 0x0, 0x04 }, 328 { "thermal_trip", 0x0, 0x3b }, 329 330 { "uops_retired", 0x0, 0xc2 }, 331 { "x87_ops_retired", 0x0, 0xc1 }, 332 { "", 0x0, NT_END } 333 }; 334 335 /* 336 * If any of the pic specific events require privileges, make sure to add a 337 * check in configure_gpc() to find whether an event hard-coded as a number by 338 * the user has any privilege requirements 339 */ 340 static const struct nametable_core_uarch pic0_events[] = { 341 /* Alphabetical order of event name */ 342 343 { "cycles_div_busy", 0x0, 0x14 }, 344 { "fp_comp_ops_exe", 0x0, 0x10 }, 345 { "idle_during_div", 0x0, 0x18 }, 346 347 { "mem_load_retired", 0x0, 0xcb }, 348 { "rs_uops_dispatched_port", 0x0, 0xa1 }, 349 { "", 0x0, NT_END } 350 }; 351 352 static const struct nametable_core_uarch pic1_events[] = { 353 /* Alphabetical order of event name */ 354 355 { "delayed_bypass", 0x0, 0x19 }, 356 { "div", 0x0, 0x13 }, 357 { "fp_assist", 0x0, 0x11 }, 358 359 { "mul", 0x0, 0x12 }, 360 { "", 0x0, NT_END } 361 }; 362 363 /* FFC entries must be in order */ 364 char *ffc_names_non_htt[] = { 365 "instr_retired.any", 366 "cpu_clk_unhalted.core", 367 "cpu_clk_unhalted.ref", 368 NULL 369 }; 370 371 char *ffc_names_htt[] = { 372 "instr_retired.any", 373 "cpu_clk_unhalted.thread", 374 "cpu_clk_unhalted.ref", 375 NULL 376 }; 377 378 char **ffc_names = NULL; 379 380 static char **gpc_names = NULL; 381 static uint32_t versionid; 382 static uint64_t num_gpc; 383 static uint64_t width_gpc; 384 static uint64_t mask_gpc; 385 static uint64_t num_ffc; 386 static uint64_t width_ffc; 387 static uint64_t mask_ffc; 388 static uint_t total_pmc; 389 static uint64_t control_ffc; 390 static uint64_t control_gpc; 391 static uint64_t control_mask; 392 static uint32_t arch_events_vector; 393 394 #define IMPL_NAME_LEN 100 395 static char core_impl_name[IMPL_NAME_LEN]; 396 397 static const char *core_cpuref = 398 "See Appendix A of the \"Intel 64 and IA-32 Architectures Software" \ 399 " Developer's Manual Volume 3B: System Programming Guide, Part 2\"" \ 400 " Order Number: 253669-026US, Februrary 2008"; 401 402 struct events_table_t { 403 uint8_t eventselect; 404 uint8_t unitmask; 405 uint64_t supported_counters; 406 const char *name; 407 }; 408 409 /* Used to describe which counters support an event */ 410 #define C(x) (1 << (x)) 411 #define C0 C(0) 412 #define C1 C(1) 413 #define C2 C(2) 414 #define C3 C(3) 415 #define C_ALL 0xFFFFFFFFFFFFFFFF 416 417 /* Architectural events */ 418 #define ARCH_EVENTS_COMMON \ 419 { 0xc0, 0x00, C_ALL, "inst_retired.any_p" }, \ 420 { 0x3c, 0x01, C_ALL, "cpu_clk_unhalted.ref_p" }, \ 421 { 0x2e, 0x4f, C_ALL, "longest_lat_cache.reference" }, \ 422 { 0x2e, 0x41, C_ALL, "longest_lat_cache.miss" }, \ 423 { 0xc4, 0x00, C_ALL, "br_inst_retired.all_branches" }, \ 424 { 0xc5, 0x00, C_ALL, "br_misp_retired.all_branches" } 425 426 const struct events_table_t arch_events_table_non_htt[] = { 427 { 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.core" }, 428 ARCH_EVENTS_COMMON 429 }; 430 431 const struct events_table_t arch_events_table_htt[] = { 432 { 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.thread_p" }, 433 ARCH_EVENTS_COMMON 434 }; 435 436 const struct events_table_t *arch_events_table = NULL; 437 static uint64_t known_arch_events; 438 static uint64_t known_ffc_num; 439 440 #define EVENTS_FAM6_MOD26 \ 441 \ 442 { 0x80, 0x04, C0|C1|C2|C3, "l1i.cycles_stalled" }, \ 443 { 0x80, 0x01, C0|C1|C2|C3, "l1i.hits" }, \ 444 { 0x80, 0x02, C0|C1|C2|C3, "l1i.misses" }, \ 445 \ 446 { 0x80, 0x03, C0|C1|C2|C3, "l1i.reads" }, \ 447 { 0x82, 0x01, C0|C1|C2|C3, "large_itlb.hit" }, \ 448 { 0x87, 0x0F, C0|C1|C2|C3, "ild_stall.any" }, \ 449 \ 450 { 0x87, 0x04, C0|C1|C2|C3, "ild_stall.iq_full" }, \ 451 { 0x87, 0x01, C0|C1|C2|C3, "ild_stall.lcp" }, \ 452 { 0x87, 0x02, C0|C1|C2|C3, "ild_stall.mru" }, \ 453 \ 454 { 0x87, 0x08, C0|C1|C2|C3, "ild_stall.regen" }, \ 455 { 0xE6, 0x02, C0|C1|C2|C3, "baclear.bad_target" }, \ 456 { 0xE6, 0x01, C0|C1|C2|C3, "baclear.clear" }, \ 457 \ 458 { 0xE8, 0x01, C0|C1|C2|C3, "bpu_clears.early" }, \ 459 { 0xE8, 0x02, C0|C1|C2|C3, "bpu_clears.late" }, \ 460 { 0xE5, 0x01, C0|C1|C2|C3, "bpu_missed_call_ret" }, \ 461 \ 462 { 0xE0, 0x01, C0|C1|C2|C3, "br_inst_decoded" }, \ 463 { 0x88, 0x7F, C0|C1|C2|C3, "br_inst_exec.any" }, \ 464 { 0x88, 0x01, C0|C1|C2|C3, "br_inst_exec.cond" }, \ 465 \ 466 { 0x88, 0x02, C0|C1|C2|C3, "br_inst_exec.direct" }, \ 467 { 0x88, 0x10, C0|C1|C2|C3, "br_inst_exec.direct_near_call" }, \ 468 { 0x88, 0x20, C0|C1|C2|C3, "br_inst_exec.indirect_near_call" }, \ 469 \ 470 { 0x88, 0x04, C0|C1|C2|C3, "br_inst_exec.indirect_non_call" }, \ 471 { 0x88, 0x30, C0|C1|C2|C3, "br_inst_exec.near_calls" }, \ 472 { 0x88, 0x07, C0|C1|C2|C3, "br_inst_exec.non_calls" }, \ 473 \ 474 { 0x88, 0x08, C0|C1|C2|C3, "br_inst_exec.return_near" }, \ 475 { 0x88, 0x40, C0|C1|C2|C3, "br_inst_exec.taken" }, \ 476 { 0x89, 0x7F, C0|C1|C2|C3, "br_misp_exec.any" }, \ 477 \ 478 { 0x89, 0x01, C0|C1|C2|C3, "br_misp_exec.cond" }, \ 479 { 0x89, 0x02, C0|C1|C2|C3, "br_misp_exec.direct" }, \ 480 { 0x89, 0x10, C0|C1|C2|C3, "br_misp_exec.direct_near_call" }, \ 481 \ 482 { 0x89, 0x20, C0|C1|C2|C3, "br_misp_exec.indirect_near_call" }, \ 483 { 0x89, 0x04, C0|C1|C2|C3, "br_misp_exec.indirect_non_call" }, \ 484 { 0x89, 0x30, C0|C1|C2|C3, "br_misp_exec.near_calls" }, \ 485 \ 486 { 0x89, 0x07, C0|C1|C2|C3, "br_misp_exec.non_calls" }, \ 487 { 0x89, 0x08, C0|C1|C2|C3, "br_misp_exec.return_near" }, \ 488 { 0x89, 0x40, C0|C1|C2|C3, "br_misp_exec.taken" }, \ 489 \ 490 { 0x17, 0x01, C0|C1|C2|C3, "inst_queue_writes" }, \ 491 { 0x1E, 0x01, C0|C1|C2|C3, "inst_queue_write_cycles" }, \ 492 { 0xA7, 0x01, C0|C1|C2|C3, "baclear_force_iq" }, \ 493 \ 494 { 0xD0, 0x01, C0|C1|C2|C3, "macro_insts.decoded" }, \ 495 { 0xA6, 0x01, C0|C1|C2|C3, "macro_insts.fusions_decoded" }, \ 496 { 0x19, 0x01, C0|C1|C2|C3, "two_uop_insts_decoded" }, \ 497 \ 498 { 0x18, 0x01, C0|C1|C2|C3, "inst_decoded.dec0" }, \ 499 { 0xD1, 0x04, C0|C1|C2|C3, "uops_decoded.esp_folding" }, \ 500 { 0xD1, 0x08, C0|C1|C2|C3, "uops_decoded.esp_sync" }, \ 501 \ 502 { 0xD1, 0x02, C0|C1|C2|C3, "uops_decoded.ms" }, \ 503 { 0x20, 0x01, C0|C1|C2|C3, "lsd_overflow" }, \ 504 { 0x0E, 0x01, C0|C1|C2|C3, "uops_issued.any" }, \ 505 \ 506 { 0x0E, 0x02, C0|C1|C2|C3, "uops_issued.fused" }, \ 507 { 0xA2, 0x20, C0|C1|C2|C3, "resource_stalls.fpcw" }, \ 508 { 0xA2, 0x02, C0|C1|C2|C3, "resource_stalls.load" }, \ 509 \ 510 { 0xA2, 0x40, C0|C1|C2|C3, "resource_stalls.mxcsr" }, \ 511 { 0xA2, 0x04, C0|C1|C2|C3, "resource_stalls.rs_full" }, \ 512 { 0xA2, 0x08, C0|C1|C2|C3, "resource_stalls.store" }, \ 513 \ 514 { 0xA2, 0x01, C0|C1|C2|C3, "resource_stalls.any" }, \ 515 { 0xD2, 0x01, C0|C1|C2|C3, "rat_stalls.flags" }, \ 516 { 0xD2, 0x02, C0|C1|C2|C3, "rat_stalls.registers" }, \ 517 \ 518 { 0xD2, 0x04, C0|C1|C2|C3, "rat_stalls.rob_read_port" }, \ 519 { 0xD2, 0x08, C0|C1|C2|C3, "rat_stalls.scoreboard" }, \ 520 { 0xD2, 0x0F, C0|C1|C2|C3, "rat_stalls.any" }, \ 521 \ 522 { 0xD4, 0x01, C0|C1|C2|C3, "seg_rename_stalls" }, \ 523 { 0xD5, 0x01, C0|C1|C2|C3, "es_reg_renames" }, \ 524 { 0x10, 0x02, C0|C1|C2|C3, "fp_comp_ops_exe.mmx" }, \ 525 \ 526 { 0x10, 0x80, C0|C1|C2|C3, "fp_comp_ops_exe.sse_double_precision" }, \ 527 { 0x10, 0x04, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp" }, \ 528 { 0x10, 0x10, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp_packed" }, \ 529 \ 530 { 0x10, 0x20, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp_scalar" }, \ 531 { 0x10, 0x40, C0|C1|C2|C3, "fp_comp_ops_exe.sse_single_precision" }, \ 532 { 0x10, 0x08, C0|C1|C2|C3, "fp_comp_ops_exe.sse2_integer" }, \ 533 \ 534 { 0x10, 0x01, C0|C1|C2|C3, "fp_comp_ops_exe.x87" }, \ 535 { 0x14, 0x01, C0|C1|C2|C3, "arith.cycles_div_busy" }, \ 536 { 0x14, 0x02, C0|C1|C2|C3, "arith.mul" }, \ 537 \ 538 { 0x12, 0x04, C0|C1|C2|C3, "simd_int_128.pack" }, \ 539 { 0x12, 0x20, C0|C1|C2|C3, "simd_int_128.packed_arith" }, \ 540 { 0x12, 0x10, C0|C1|C2|C3, "simd_int_128.packed_logical" }, \ 541 \ 542 { 0x12, 0x01, C0|C1|C2|C3, "simd_int_128.packed_mpy" }, \ 543 { 0x12, 0x02, C0|C1|C2|C3, "simd_int_128.packed_shift" }, \ 544 { 0x12, 0x40, C0|C1|C2|C3, "simd_int_128.shuffle_move" }, \ 545 \ 546 { 0x12, 0x08, C0|C1|C2|C3, "simd_int_128.unpack" }, \ 547 { 0xFD, 0x04, C0|C1|C2|C3, "simd_int_64.pack" }, \ 548 { 0xFD, 0x20, C0|C1|C2|C3, "simd_int_64.packed_arith" }, \ 549 \ 550 { 0xFD, 0x10, C0|C1|C2|C3, "simd_int_64.packed_logical" }, \ 551 { 0xFD, 0x01, C0|C1|C2|C3, "simd_int_64.packed_mpy" }, \ 552 { 0xFD, 0x02, C0|C1|C2|C3, "simd_int_64.packed_shift" }, \ 553 \ 554 { 0xFD, 0x40, C0|C1|C2|C3, "simd_int_64.shuffle_move" }, \ 555 { 0xFD, 0x08, C0|C1|C2|C3, "simd_int_64.unpack" }, \ 556 { 0xB1, 0x01, C0|C1|C2|C3, "uops_executed.port0" }, \ 557 \ 558 { 0xB1, 0x02, C0|C1|C2|C3, "uops_executed.port1" }, \ 559 { 0x40, 0x04, C0|C1, "l1d_cache_ld.e_state" }, \ 560 { 0x40, 0x01, C0|C1, "l1d_cache_ld.i_state" }, \ 561 \ 562 { 0x40, 0x08, C0|C1, "l1d_cache_ld.m_state" }, \ 563 { 0x40, 0x0F, C0|C1, "l1d_cache_ld.mesi" }, \ 564 { 0x40, 0x02, C0|C1, "l1d_cache_ld.s_state" }, \ 565 \ 566 { 0x41, 0x04, C0|C1, "l1d_cache_st.e_state" }, \ 567 { 0x41, 0x08, C0|C1, "l1d_cache_st.m_state" }, \ 568 { 0x41, 0x0F, C0|C1, "l1d_cache_st.mesi" }, \ 569 \ 570 { 0x41, 0x02, C0|C1, "l1d_cache_st.s_state" }, \ 571 { 0x42, 0x04, C0|C1, "l1d_cache_lock.e_state" }, \ 572 { 0x42, 0x01, C0|C1, "l1d_cache_lock.hit" }, \ 573 \ 574 { 0x42, 0x08, C0|C1, "l1d_cache_lock.m_state" }, \ 575 { 0x42, 0x02, C0|C1, "l1d_cache_lock.s_state" }, \ 576 { 0x43, 0x01, C0|C1, "l1d_all_ref.any" }, \ 577 \ 578 { 0x43, 0x02, C0|C1, "l1d_all_ref.cacheable" }, \ 579 { 0x4B, 0x01, C0|C1, "mmx2_mem_exec.nta" }, \ 580 { 0x4C, 0x01, C0|C1, "load_hit_pre" }, \ 581 \ 582 { 0x4E, 0x02, C0|C1, "l1d_prefetch.miss" }, \ 583 { 0x4E, 0x01, C0|C1, "l1d_prefetch.requests" }, \ 584 { 0x4E, 0x04, C0|C1, "l1d_prefetch.triggers" }, \ 585 \ 586 { 0x51, 0x04, C0|C1, "l1d.m_evict" }, \ 587 { 0x51, 0x02, C0|C1, "l1d.m_repl" }, \ 588 { 0x51, 0x08, C0|C1, "l1d.m_snoop_evict" }, \ 589 \ 590 { 0x51, 0x01, C0|C1, "l1d.repl" }, \ 591 { 0x52, 0x01, C0|C1, "l1d_cache_prefetch_lock_fb_hit" }, \ 592 { 0x53, 0x01, C0|C1, "l1d_cache_lock_fb_hit" }, \ 593 \ 594 { 0x63, 0x02, C0|C1, "cache_lock_cycles.l1d" }, \ 595 { 0x63, 0x01, C0|C1, "cache_lock_cycles.l1d_l2" }, \ 596 { 0x06, 0x04, C0|C1|C2|C3, "store_blocks.at_ret" }, \ 597 \ 598 { 0x06, 0x08, C0|C1|C2|C3, "store_blocks.l1d_block" }, \ 599 { 0x06, 0x01, C0|C1|C2|C3, "store_blocks.not_sta" }, \ 600 { 0x06, 0x02, C0|C1|C2|C3, "store_blocks.sta" }, \ 601 \ 602 { 0x13, 0x07, C0|C1|C2|C3, "load_dispatch.any" }, \ 603 { 0x13, 0x04, C0|C1|C2|C3, "load_dispatch.mob" }, \ 604 { 0x13, 0x01, C0|C1|C2|C3, "load_dispatch.rs" }, \ 605 \ 606 { 0x13, 0x02, C0|C1|C2|C3, "load_dispatch.rs_delayed" }, \ 607 { 0x08, 0x01, C0|C1|C2|C3, "dtlb_load_misses.any" }, \ 608 { 0x08, 0x20, C0|C1|C2|C3, "dtlb_load_misses.pde_miss" }, \ 609 \ 610 { 0x08, 0x02, C0|C1|C2|C3, "dtlb_load_misses.walk_completed" }, \ 611 { 0x49, 0x01, C0|C1|C2|C3, "dtlb_misses.any" }, \ 612 { 0x49, 0x10, C0|C1|C2|C3, "dtlb_misses.stlb_hit" }, \ 613 \ 614 { 0x49, 0x02, C0|C1|C2|C3, "dtlb_misses.walk_completed" }, \ 615 { 0x4F, 0x02, C0|C1|C2|C3, "ept.epde_miss" }, \ 616 { 0x4F, 0x08, C0|C1|C2|C3, "ept.epdpe_miss" }, \ 617 \ 618 { 0x85, 0x01, C0|C1|C2|C3, "itlb_misses.any" }, \ 619 { 0x85, 0x02, C0|C1|C2|C3, "itlb_misses.walk_completed" }, \ 620 { 0x24, 0xAA, C0|C1|C2|C3, "l2_rqsts.miss" }, \ 621 \ 622 { 0x24, 0xFF, C0|C1|C2|C3, "l2_rqsts.references" }, \ 623 { 0x24, 0x10, C0|C1|C2|C3, "l2_rqsts.ifetch_hit" }, \ 624 { 0x24, 0x20, C0|C1|C2|C3, "l2_rqsts.ifetch_miss" }, \ 625 \ 626 { 0x24, 0x30, C0|C1|C2|C3, "l2_rqsts.ifetches" }, \ 627 { 0x24, 0x01, C0|C1|C2|C3, "l2_rqsts.ld_hit" }, \ 628 { 0x24, 0x02, C0|C1|C2|C3, "l2_rqsts.ld_miss" }, \ 629 \ 630 { 0x24, 0x03, C0|C1|C2|C3, "l2_rqsts.loads" }, \ 631 { 0x24, 0x40, C0|C1|C2|C3, "l2_rqsts.prefetch_hit" }, \ 632 { 0x24, 0x80, C0|C1|C2|C3, "l2_rqsts.prefetch_miss" }, \ 633 \ 634 { 0x24, 0xC0, C0|C1|C2|C3, "l2_rqsts.prefetches" }, \ 635 { 0x24, 0x04, C0|C1|C2|C3, "l2_rqsts.rfo_hit" }, \ 636 { 0x24, 0x08, C0|C1|C2|C3, "l2_rqsts.rfo_miss" }, \ 637 \ 638 { 0x24, 0x0C, C0|C1|C2|C3, "l2_rqsts.rfos" }, \ 639 { 0x26, 0xFF, C0|C1|C2|C3, "l2_data_rqsts.any" }, \ 640 { 0x26, 0x04, C0|C1|C2|C3, "l2_data_rqsts.demand.e_state" }, \ 641 \ 642 { 0x26, 0x01, C0|C1|C2|C3, "l2_data_rqsts.demand.i_state" }, \ 643 { 0x26, 0x08, C0|C1|C2|C3, "l2_data_rqsts.demand.m_state" }, \ 644 { 0x26, 0x0F, C0|C1|C2|C3, "l2_data_rqsts.demand.mesi" }, \ 645 \ 646 { 0x26, 0x02, C0|C1|C2|C3, "l2_data_rqsts.demand.s_state" }, \ 647 { 0x26, 0x40, C0|C1|C2|C3, "l2_data_rqsts.prefetch.e_state" }, \ 648 { 0x26, 0x10, C0|C1|C2|C3, "l2_data_rqsts.prefetch.i_state" }, \ 649 \ 650 { 0x26, 0x80, C0|C1|C2|C3, "l2_data_rqsts.prefetch.m_state" }, \ 651 { 0x26, 0xF0, C0|C1|C2|C3, "l2_data_rqsts.prefetch.mesi" }, \ 652 { 0x26, 0x20, C0|C1|C2|C3, "l2_data_rqsts.prefetch.s_state" }, \ 653 \ 654 { 0x27, 0x40, C0|C1|C2|C3, "l2_write.lock.e_state" }, \ 655 { 0x27, 0x10, C0|C1|C2|C3, "l2_write.lock.i_state" }, \ 656 { 0x27, 0x20, C0|C1|C2|C3, "l2_write.lock.s_state" }, \ 657 \ 658 { 0x27, 0x0E, C0|C1|C2|C3, "l2_write.rfo.hit" }, \ 659 { 0x27, 0x01, C0|C1|C2|C3, "l2_write.rfo.i_state" }, \ 660 { 0x27, 0x08, C0|C1|C2|C3, "l2_write.rfo.m_state" }, \ 661 \ 662 { 0x27, 0x0F, C0|C1|C2|C3, "l2_write.rfo.mesi" }, \ 663 { 0x27, 0x02, C0|C1|C2|C3, "l2_write.rfo.s_state" }, \ 664 { 0x28, 0x04, C0|C1|C2|C3, "l1d_wb_l2.e_state" }, \ 665 \ 666 { 0x28, 0x01, C0|C1|C2|C3, "l1d_wb_l2.i_state" }, \ 667 { 0x28, 0x08, C0|C1|C2|C3, "l1d_wb_l2.m_state" }, \ 668 { 0xF0, 0x80, C0|C1|C2|C3, "l2_transactions.any" }, \ 669 \ 670 { 0xF0, 0x20, C0|C1|C2|C3, "l2_transactions.fill" }, \ 671 { 0xF0, 0x04, C0|C1|C2|C3, "l2_transactions.ifetch" }, \ 672 { 0xF0, 0x10, C0|C1|C2|C3, "l2_transactions.l1d_wb" }, \ 673 \ 674 { 0xF0, 0x01, C0|C1|C2|C3, "l2_transactions.load" }, \ 675 { 0xF0, 0x08, C0|C1|C2|C3, "l2_transactions.prefetch" }, \ 676 { 0xF0, 0x02, C0|C1|C2|C3, "l2_transactions.rfo" }, \ 677 \ 678 { 0xF0, 0x40, C0|C1|C2|C3, "l2_transactions.wb" }, \ 679 { 0xF1, 0x07, C0|C1|C2|C3, "l2_lines_in.any" }, \ 680 { 0xF1, 0x04, C0|C1|C2|C3, "l2_lines_in.e_state" }, \ 681 \ 682 { 0xF1, 0x02, C0|C1|C2|C3, "l2_lines_in.s_state" }, \ 683 { 0xF2, 0x0F, C0|C1|C2|C3, "l2_lines_out.any" }, \ 684 { 0xF2, 0x01, C0|C1|C2|C3, "l2_lines_out.demand_clean" }, \ 685 \ 686 { 0xF2, 0x02, C0|C1|C2|C3, "l2_lines_out.demand_dirty" }, \ 687 { 0xF2, 0x04, C0|C1|C2|C3, "l2_lines_out.prefetch_clean" }, \ 688 { 0x6C, 0x01, C0|C1|C2|C3, "io_transactions" }, \ 689 \ 690 { 0xB0, 0x80, C0|C1|C2|C3, "offcore_requests.any" }, \ 691 { 0xB0, 0x10, C0|C1|C2|C3, "offcore_requests.any.rfo" }, \ 692 { 0xB0, 0x40, C0|C1|C2|C3, "offcore_requests.l1d_writeback" }, \ 693 \ 694 { 0xB8, 0x01, C0|C1|C2|C3, "snoop_response.hit" }, \ 695 { 0xB8, 0x02, C0|C1|C2|C3, "snoop_response.hite" }, \ 696 { 0xB8, 0x04, C0|C1|C2|C3, "snoop_response.hitm" }, \ 697 \ 698 { 0xF4, 0x10, C0|C1|C2|C3, "sq_misc.split_lock" }, \ 699 { 0x0B, 0x01, C0|C1|C2|C3, "mem_inst_retired.loads" }, \ 700 { 0x0B, 0x02, C0|C1|C2|C3, "mem_inst_retired.stores" }, \ 701 \ 702 { 0xC0, 0x04, C0|C1|C2|C3, "inst_retired.mmx" }, \ 703 { 0xC0, 0x02, C0|C1|C2|C3, "inst_retired.x87" }, \ 704 { 0xC7, 0x04, C0|C1|C2|C3, "ssex_uops_retired.packed_double" }, \ 705 \ 706 { 0xC7, 0x01, C0|C1|C2|C3, "ssex_uops_retired.packed_single" }, \ 707 { 0xC7, 0x08, C0|C1|C2|C3, "ssex_uops_retired.scalar_double" }, \ 708 { 0xC7, 0x02, C0|C1|C2|C3, "ssex_uops_retired.scalar_single" }, \ 709 \ 710 { 0xC7, 0x10, C0|C1|C2|C3, "ssex_uops_retired.vector_integer" }, \ 711 { 0xC2, 0x01, C0|C1|C2|C3, "uops_retired.any" }, \ 712 { 0xC2, 0x04, C0|C1|C2|C3, "uops_retired.macro_fused" }, \ 713 \ 714 { 0xC8, 0x20, C0|C1|C2|C3, "itlb_miss_retired" }, \ 715 { 0xCB, 0x80, C0|C1|C2|C3, "mem_load_retired.dtlb_miss" }, \ 716 { 0xCB, 0x40, C0|C1|C2|C3, "mem_load_retired.hit_lfb" }, \ 717 \ 718 { 0xCB, 0x01, C0|C1|C2|C3, "mem_load_retired.l1d_hit" }, \ 719 { 0xCB, 0x02, C0|C1|C2|C3, "mem_load_retired.l2_hit" }, \ 720 { 0xCB, 0x10, C0|C1|C2|C3, "mem_load_retired.llc_miss" }, \ 721 \ 722 { 0xCB, 0x04, C0|C1|C2|C3, "mem_load_retired.llc_unshared_hit" }, \ 723 { 0xCB, 0x08, C0|C1|C2|C3, "mem_load_retired.other_core_l2_hit_hitm" }, \ 724 { 0x0F, 0x02, C0|C1|C2|C3, "mem_uncore_retired.other_core_l2_hitm" }, \ 725 \ 726 { 0x0F, 0x08, C0|C1|C2|C3, "mem_uncore_retired.remote_cache_local_home_hit" },\ 727 { 0x0F, 0x10, C0|C1|C2|C3, "mem_uncore_retired.remote_dram" }, \ 728 { 0x0F, 0x20, C0|C1|C2|C3, "mem_uncore_retired.local_dram" }, \ 729 \ 730 { 0x0C, 0x01, C0|C1|C2|C3, "mem_store_retired.dtlb_miss" }, \ 731 { 0xC4, 0x01, C0|C1|C2|C3, "br_inst_retired.conditional" }, \ 732 { 0xC4, 0x02, C0|C1|C2|C3, "br_inst_retired.near_call" }, \ 733 \ 734 { 0xC5, 0x02, C0|C1|C2|C3, "br_misp_retired.near_call" }, \ 735 { 0xDB, 0x01, C0|C1|C2|C3, "uop_unfusion" }, \ 736 { 0xF7, 0x01, C0|C1|C2|C3, "fp_assist.all" }, \ 737 \ 738 { 0xF7, 0x04, C0|C1|C2|C3, "fp_assist.input" }, \ 739 { 0xF7, 0x02, C0|C1|C2|C3, "fp_assist.output" }, \ 740 { 0xCC, 0x03, C0|C1|C2|C3, "fp_mmx_trans.any" }, \ 741 \ 742 { 0xCC, 0x01, C0|C1|C2|C3, "fp_mmx_trans.to_fp" }, \ 743 { 0xCC, 0x02, C0|C1|C2|C3, "fp_mmx_trans.to_mmx" }, \ 744 { 0xC3, 0x04, C0|C1|C2|C3, "machine_clears.smc" } 745 746 #define EVENTS_FAM6_MOD28 \ 747 { 0x2, 0x81, C0|C1, "store_forwards.good" }, \ 748 { 0x6, 0x0, C0|C1, "segment_reg_loads.any" }, \ 749 { 0x7, 0x1, C0|C1, "prefetch.prefetcht0" }, \ 750 { 0x7, 0x6, C0|C1, "prefetch.sw_l2" }, \ 751 { 0x7, 0x8, C0|C1, "prefetch.prefetchnta" }, \ 752 { 0x8, 0x7, C0|C1, "data_tlb_misses.dtlb_miss" }, \ 753 { 0x8, 0x5, C0|C1, "data_tlb_misses.dtlb_miss_ld" }, \ 754 { 0x8, 0x9, C0|C1, "data_tlb_misses.l0_dtlb_miss_ld" }, \ 755 { 0x8, 0x6, C0|C1, "data_tlb_misses.dtlb_miss_st" }, \ 756 { 0xC, 0x3, C0|C1, "page_walks.cycles" }, \ 757 { 0x10, 0x1, C0|C1, "x87_comp_ops_exe.any.s" }, \ 758 { 0x10, 0x81, C0|C1, "x87_comp_ops_exe.any.ar" }, \ 759 { 0x11, 0x1, C0|C1, "fp_assist" }, \ 760 { 0x11, 0x81, C0|C1, "fp_assist.ar" }, \ 761 { 0x12, 0x1, C0|C1, "mul.s" }, \ 762 { 0x12, 0x81, C0|C1, "mul.ar" }, \ 763 { 0x13, 0x1, C0|C1, "div.s" }, \ 764 { 0x13, 0x81, C0|C1, "div.ar" }, \ 765 { 0x14, 0x1, C0|C1, "cycles_div_busy" }, \ 766 { 0x21, 0x0, C0|C1, "l2_ads" }, \ 767 { 0x22, 0x0, C0|C1, "l2_dbus_busy" }, \ 768 { 0x24, 0x0, C0|C1, "l2_lines_in" }, \ 769 { 0x25, 0x0, C0|C1, "l2_m_lines_in" }, \ 770 { 0x26, 0x0, C0|C1, "l2_lines_out" }, \ 771 { 0x27, 0x0, C0|C1, "l2_m_lines_out" }, \ 772 { 0x28, 0x0, C0|C1, "l2_ifetch" }, \ 773 { 0x29, 0x0, C0|C1, "l2_ld" }, \ 774 { 0x2A, 0x0, C0|C1, "l2_st" }, \ 775 { 0x2B, 0x0, C0|C1, "l2_lock" }, \ 776 { 0x2E, 0x0, C0|C1, "l2_rqsts" }, \ 777 { 0x2E, 0x41, C0|C1, "l2_rqsts.self.demand.i_state" }, \ 778 { 0x2E, 0x4F, C0|C1, "l2_rqsts.self.demand.mesi" }, \ 779 { 0x30, 0x0, C0|C1, "l2_reject_bus_q" }, \ 780 { 0x32, 0x0, C0|C1, "l2_no_req" }, \ 781 { 0x3A, 0x0, C0|C1, "eist_trans" }, \ 782 { 0x3B, 0xC0, C0|C1, "thermal_trip" }, \ 783 { 0x3C, 0x0, C0|C1, "cpu_clk_unhalted.core_p" }, \ 784 { 0x3C, 0x1, C0|C1, "cpu_clk_unhalted.bus" }, \ 785 { 0x3C, 0x2, C0|C1, "cpu_clk_unhalted.no_other" }, \ 786 { 0x40, 0x21, C0|C1, "l1d_cache.ld" }, \ 787 { 0x40, 0x22, C0|C1, "l1d_cache.st" }, \ 788 { 0x60, 0x0, C0|C1, "bus_request_outstanding" }, \ 789 { 0x61, 0x0, C0|C1, "bus_bnr_drv" }, \ 790 { 0x62, 0x0, C0|C1, "bus_drdy_clocks" }, \ 791 { 0x63, 0x0, C0|C1, "bus_lock_clocks" }, \ 792 { 0x64, 0x0, C0|C1, "bus_data_rcv" }, \ 793 { 0x65, 0x0, C0|C1, "bus_trans_brd" }, \ 794 { 0x66, 0x0, C0|C1, "bus_trans_rfo" }, \ 795 { 0x67, 0x0, C0|C1, "bus_trans_wb" }, \ 796 { 0x68, 0x0, C0|C1, "bus_trans_ifetch" }, \ 797 { 0x69, 0x0, C0|C1, "bus_trans_inval" }, \ 798 { 0x6A, 0x0, C0|C1, "bus_trans_pwr" }, \ 799 { 0x6B, 0x0, C0|C1, "bus_trans_p" }, \ 800 { 0x6C, 0x0, C0|C1, "bus_trans_io" }, \ 801 { 0x6D, 0x0, C0|C1, "bus_trans_def" }, \ 802 { 0x6E, 0x0, C0|C1, "bus_trans_burst" }, \ 803 { 0x6F, 0x0, C0|C1, "bus_trans_mem" }, \ 804 { 0x70, 0x0, C0|C1, "bus_trans_any" }, \ 805 { 0x77, 0x0, C0|C1, "ext_snoop" }, \ 806 { 0x7A, 0x0, C0|C1, "bus_hit_drv" }, \ 807 { 0x7B, 0x0, C0|C1, "bus_hitm_drv" }, \ 808 { 0x7D, 0x0, C0|C1, "busq_empty" }, \ 809 { 0x7E, 0x0, C0|C1, "snoop_stall_drv" }, \ 810 { 0x7F, 0x0, C0|C1, "bus_io_wait" }, \ 811 { 0x80, 0x3, C0|C1, "icache.accesses" }, \ 812 { 0x80, 0x2, C0|C1, "icache.misses" }, \ 813 { 0x82, 0x4, C0|C1, "itlb.flush" }, \ 814 { 0x82, 0x2, C0|C1, "itlb.misses" }, \ 815 { 0xAA, 0x2, C0|C1, "macro_insts.cisc_decoded" }, \ 816 { 0xAA, 0x3, C0|C1, "macro_insts.all_decoded" }, \ 817 { 0xB0, 0x0, C0|C1, "simd_uops_exec.s" }, \ 818 { 0xB0, 0x80, C0|C1, "simd_uops_exec.ar" }, \ 819 { 0xB1, 0x0, C0|C1, "simd_sat_uop_exec.s" }, \ 820 { 0xB1, 0x80, C0|C1, "simd_sat_uop_exec.ar" }, \ 821 { 0xB3, 0x1, C0|C1, "simd_uop_type_exec.mul.s" }, \ 822 { 0xB3, 0x81, C0|C1, "simd_uop_type_exec.mul.ar" }, \ 823 { 0xB3, 0x02, C0|C1, "simd_uop_type_exec.shift.s" }, \ 824 { 0xB3, 0x82, C0|C1, "simd_uop_type_exec.shift.ar" }, \ 825 { 0xB3, 0x04, C0|C1, "simd_uop_type_exec.pack.s" }, \ 826 { 0xB3, 0x84, C0|C1, "simd_uop_type_exec.pack.ar" }, \ 827 { 0xB3, 0x08, C0|C1, "simd_uop_type_exec.unpack.s" }, \ 828 { 0xB3, 0x88, C0|C1, "simd_uop_type_exec.unpack.ar" }, \ 829 { 0xB3, 0x10, C0|C1, "simd_uop_type_exec.logical.s" }, \ 830 { 0xB3, 0x90, C0|C1, "simd_uop_type_exec.logical.ar" }, \ 831 { 0xB3, 0x20, C0|C1, "simd_uop_type_exec.arithmetic.s" }, \ 832 { 0xB3, 0xA0, C0|C1, "simd_uop_type_exec.arithmetic.ar" }, \ 833 { 0xC2, 0x10, C0|C1, "uops_retired.any" }, \ 834 { 0xC3, 0x1, C0|C1, "machine_clears.smc" }, \ 835 { 0xC4, 0x0, C0|C1, "br_inst_retired.any" }, \ 836 { 0xC4, 0x1, C0|C1, "br_inst_retired.pred_not_taken" }, \ 837 { 0xC4, 0x2, C0|C1, "br_inst_retired.mispred_not_taken" }, \ 838 { 0xC4, 0x4, C0|C1, "br_inst_retired.pred_taken" }, \ 839 { 0xC4, 0x8, C0|C1, "br_inst_retired.mispred_taken" }, \ 840 { 0xC4, 0xA, C0|C1, "br_inst_retired.mispred" }, \ 841 { 0xC4, 0xC, C0|C1, "br_inst_retired.taken" }, \ 842 { 0xC4, 0xF, C0|C1, "br_inst_retired.any1" }, \ 843 { 0xC6, 0x1, C0|C1, "cycles_int_masked.cycles_int_masked" }, \ 844 { 0xC6, 0x2, C0|C1, \ 845 "cycles_int_masked.cycles_int_pending_and_masked" }, \ 846 { 0xC7, 0x1, C0|C1, "simd_inst_retired.packed_single" }, \ 847 { 0xC7, 0x2, C0|C1, "simd_inst_retired.scalar_single" }, \ 848 { 0xC7, 0x4, C0|C1, "simd_inst_retired.packed_double" }, \ 849 { 0xC7, 0x8, C0|C1, "simd_inst_retired.scalar_double" }, \ 850 { 0xC7, 0x10, C0|C1, "simd_inst_retired.vector" }, \ 851 { 0xC7, 0x1F, C0|C1, "simd_inst_retired.any" }, \ 852 { 0xC8, 0x00, C0|C1, "hw_int_rcv" }, \ 853 { 0xCA, 0x1, C0|C1, "simd_comp_inst_retired.packed_single" }, \ 854 { 0xCA, 0x2, C0|C1, "simd_comp_inst_retired.scalar_single" }, \ 855 { 0xCA, 0x4, C0|C1, "simd_comp_inst_retired.packed_double" }, \ 856 { 0xCA, 0x8, C0|C1, "simd_comp_inst_retired.scalar_double" }, \ 857 { 0xCB, 0x1, C0|C1, "mem_load_retired.l2_hit" }, \ 858 { 0xCB, 0x2, C0|C1, "mem_load_retired.l2_miss" }, \ 859 { 0xCB, 0x4, C0|C1, "mem_load_retired.dtlb_miss" }, \ 860 { 0xCD, 0x0, C0|C1, "simd_assist" }, \ 861 { 0xCE, 0x0, C0|C1, "simd_instr_retired" }, \ 862 { 0xCF, 0x0, C0|C1, "simd_sat_instr_retired" }, \ 863 { 0xE0, 0x1, C0|C1, "br_inst_decoded" }, \ 864 { 0xE4, 0x1, C0|C1, "bogus_br" }, \ 865 { 0xE6, 0x1, C0|C1, "baclears.any" } 866 867 static const struct events_table_t *events_table = NULL; 868 869 const struct events_table_t events_fam6_mod26[] = { 870 EVENTS_FAM6_MOD26, 871 { NT_END, 0, 0, "" } 872 }; 873 874 const struct events_table_t events_fam6_mod28[] = { 875 EVENTS_FAM6_MOD28, 876 { NT_END, 0, 0, "" } 877 }; 878 879 /* 880 * Initialize string containing list of supported general-purpose counter 881 * events for processors of Penryn and Merom Family 882 */ 883 static void 884 pcbe_init_core_uarch() 885 { 886 const struct nametable_core_uarch *n; 887 const struct nametable_core_uarch *picspecific_events; 888 size_t common_size; 889 size_t size; 890 uint64_t i; 891 892 gpc_names = kmem_alloc(num_gpc * sizeof (char *), KM_SLEEP); 893 894 /* Calculate space needed to save all the common event names */ 895 common_size = 0; 896 for (n = cmn_gpc_events_core_uarch; n->event_num != NT_END; n++) { 897 common_size += strlen(n->name) + 1; 898 } 899 900 for (i = 0; i < num_gpc; i++) { 901 size = 0; 902 switch (i) { 903 case 0: 904 picspecific_events = pic0_events; 905 break; 906 case 1: 907 picspecific_events = pic1_events; 908 break; 909 default: 910 picspecific_events = NULL; 911 break; 912 } 913 if (picspecific_events != NULL) { 914 for (n = picspecific_events; 915 n->event_num != NT_END; 916 n++) { 917 size += strlen(n->name) + 1; 918 } 919 } 920 921 gpc_names[i] = 922 kmem_alloc(size + common_size + 1, KM_SLEEP); 923 924 gpc_names[i][0] = '\0'; 925 if (picspecific_events != NULL) { 926 for (n = picspecific_events; 927 n->event_num != NT_END; 928 n++) { 929 (void) strcat(gpc_names[i], n->name); 930 (void) strcat(gpc_names[i], ","); 931 } 932 } 933 for (n = cmn_gpc_events_core_uarch; n->event_num != NT_END; 934 n++) { 935 (void) strcat(gpc_names[i], n->name); 936 (void) strcat(gpc_names[i], ","); 937 } 938 /* 939 * Remove trailing comma. 940 */ 941 gpc_names[i][common_size + size - 1] = '\0'; 942 } 943 } 944 945 static int 946 core_pcbe_init(void) 947 { 948 struct cpuid_regs cp; 949 size_t size; 950 uint64_t i; 951 uint64_t j; 952 uint64_t arch_events_vector_length; 953 size_t arch_events_string_length; 954 955 if (cpuid_getvendor(CPU) != X86_VENDOR_Intel) 956 return (-1); 957 958 /* Obtain Basic CPUID information */ 959 cp.cp_eax = 0x0; 960 (void) __cpuid_insn(&cp); 961 962 /* No Architectural Performance Monitoring Leaf returned by CPUID */ 963 if (cp.cp_eax < 0xa) { 964 return (-1); 965 } 966 967 /* Obtain the Architectural Performance Monitoring Leaf */ 968 cp.cp_eax = 0xa; 969 (void) __cpuid_insn(&cp); 970 971 versionid = cp.cp_eax & 0xFF; 972 973 /* 974 * Fixed-Function Counters (FFC) 975 * 976 * All Family 6 Model 15 and Model 23 processors have fixed-function 977 * counters. These counters were made Architectural with 978 * Family 6 Model 15 Stepping 9. 979 */ 980 switch (versionid) { 981 982 case 0: 983 return (-1); 984 985 case 2: 986 num_ffc = cp.cp_edx & 0x1F; 987 width_ffc = (cp.cp_edx >> 5) & 0xFF; 988 989 /* 990 * Some processors have an errata (AW34) where 991 * versionid is reported as 2 when actually 1. 992 * In this case, fixed-function counters are 993 * model-specific as in Version 1. 994 */ 995 if (num_ffc != 0) { 996 break; 997 } 998 /* FALLTHROUGH */ 999 case 1: 1000 num_ffc = 3; 1001 width_ffc = 40; 1002 versionid = 1; 1003 break; 1004 1005 default: 1006 num_ffc = cp.cp_edx & 0x1F; 1007 width_ffc = (cp.cp_edx >> 5) & 0xFF; 1008 break; 1009 } 1010 1011 1012 if (num_ffc >= 64) 1013 return (-1); 1014 1015 /* Set HTT-specific names of architectural & FFC events */ 1016 if (x86_feature & X86_HTT) { 1017 ffc_names = ffc_names_htt; 1018 arch_events_table = arch_events_table_htt; 1019 known_arch_events = 1020 sizeof (arch_events_table_htt) / 1021 sizeof (struct events_table_t); 1022 known_ffc_num = 1023 sizeof (ffc_names_htt) / sizeof (char *); 1024 } else { 1025 ffc_names = ffc_names_non_htt; 1026 arch_events_table = arch_events_table_non_htt; 1027 known_arch_events = 1028 sizeof (arch_events_table_non_htt) / 1029 sizeof (struct events_table_t); 1030 known_ffc_num = 1031 sizeof (ffc_names_non_htt) / sizeof (char *); 1032 } 1033 1034 if (num_ffc >= known_ffc_num) { 1035 /* 1036 * The system seems to have more fixed-function counters than 1037 * what this PCBE is able to handle correctly. Default to the 1038 * maximum number of fixed-function counters that this driver 1039 * is aware of. 1040 */ 1041 num_ffc = known_ffc_num - 1; 1042 } 1043 1044 mask_ffc = BITMASK_XBITS(width_ffc); 1045 control_ffc = BITMASK_XBITS(num_ffc); 1046 1047 /* 1048 * General Purpose Counters (GPC) 1049 */ 1050 num_gpc = (cp.cp_eax >> 8) & 0xFF; 1051 width_gpc = (cp.cp_eax >> 16) & 0xFF; 1052 1053 if (num_gpc >= 64) 1054 return (-1); 1055 1056 mask_gpc = BITMASK_XBITS(width_gpc); 1057 1058 control_gpc = BITMASK_XBITS(num_gpc); 1059 1060 control_mask = (control_ffc << 32) | control_gpc; 1061 1062 total_pmc = num_gpc + num_ffc; 1063 if (total_pmc > 64) { 1064 /* Too wide for the overflow bitmap */ 1065 return (-1); 1066 } 1067 1068 /* GPC events for Family 6 Models 15 & 23 only */ 1069 if ((cpuid_getfamily(CPU) == 6) && 1070 ((cpuid_getmodel(CPU) == 15) || (cpuid_getmodel(CPU) == 23) || 1071 (cpuid_getmodel(CPU) == 29))) { 1072 (void) snprintf(core_impl_name, IMPL_NAME_LEN, 1073 "Core Microarchitecture"); 1074 pcbe_init_core_uarch(); 1075 return (0); 1076 } 1077 1078 (void) snprintf(core_impl_name, IMPL_NAME_LEN, 1079 "Intel Arch PerfMon v%d on Family %d Model %d", 1080 versionid, cpuid_getfamily(CPU), cpuid_getmodel(CPU)); 1081 1082 /* 1083 * Architectural events 1084 */ 1085 arch_events_vector_length = (cp.cp_eax >> 24) & 0xFF; 1086 1087 ASSERT(known_arch_events == arch_events_vector_length); 1088 1089 /* 1090 * To handle the case where a new performance monitoring setup is run 1091 * on a non-debug kernel 1092 */ 1093 if (known_arch_events > arch_events_vector_length) { 1094 known_arch_events = arch_events_vector_length; 1095 } else { 1096 arch_events_vector_length = known_arch_events; 1097 } 1098 1099 arch_events_vector = cp.cp_ebx & 1100 BITMASK_XBITS(arch_events_vector_length); 1101 1102 /* 1103 * Process architectural and non-architectural events using GPC 1104 */ 1105 if (num_gpc > 0) { 1106 1107 gpc_names = kmem_alloc(num_gpc * sizeof (char *), KM_SLEEP); 1108 1109 /* Calculate space required for the architectural gpc events */ 1110 arch_events_string_length = 0; 1111 for (i = 0; i < known_arch_events; i++) { 1112 if (((1U << i) & arch_events_vector) == 0) { 1113 arch_events_string_length += 1114 strlen(arch_events_table[i].name) + 1; 1115 } 1116 } 1117 1118 /* Non-architectural events list */ 1119 if (cpuid_getmodel(CPU) == 26) { 1120 events_table = events_fam6_mod26; 1121 } else if (cpuid_getmodel(CPU) == 28) { 1122 events_table = events_fam6_mod28; 1123 } 1124 1125 for (i = 0; i < num_gpc; i++) { 1126 1127 /* 1128 * Determine length of all supported event names 1129 * (architectural + non-architectural) 1130 */ 1131 size = arch_events_string_length; 1132 for (j = 0; events_table != NULL && 1133 events_table[j].eventselect != NT_END; 1134 j++) { 1135 if (C(i) & events_table[j].supported_counters) { 1136 size += strlen(events_table[j].name) + 1137 1; 1138 } 1139 } 1140 1141 /* Allocate memory for this pics list */ 1142 gpc_names[i] = kmem_alloc(size + 1, KM_SLEEP); 1143 gpc_names[i][0] = '\0'; 1144 if (size == 0) { 1145 continue; 1146 } 1147 1148 /* 1149 * Create the list of all supported events 1150 * (architectural + non-architectural) 1151 */ 1152 for (j = 0; j < known_arch_events; j++) { 1153 if (((1U << j) & arch_events_vector) == 0) { 1154 (void) strcat(gpc_names[i], 1155 arch_events_table[j].name); 1156 (void) strcat(gpc_names[i], ","); 1157 } 1158 } 1159 1160 for (j = 0; events_table != NULL && 1161 events_table[j].eventselect != NT_END; 1162 j++) { 1163 if (C(i) & events_table[j].supported_counters) { 1164 (void) strcat(gpc_names[i], 1165 events_table[j].name); 1166 (void) strcat(gpc_names[i], ","); 1167 } 1168 } 1169 1170 /* Remove trailing comma */ 1171 gpc_names[i][size - 1] = '\0'; 1172 } 1173 } 1174 /* 1175 * Fixed-function Counters (FFC) are already listed individually in 1176 * ffc_names[] 1177 */ 1178 return (0); 1179 } 1180 1181 static uint_t core_pcbe_ncounters() 1182 { 1183 return (total_pmc); 1184 } 1185 1186 static const char *core_pcbe_impl_name(void) 1187 { 1188 return (core_impl_name); 1189 } 1190 1191 static const char *core_pcbe_cpuref(void) 1192 { 1193 return (core_cpuref); 1194 } 1195 1196 static char *core_pcbe_list_events(uint_t picnum) 1197 { 1198 ASSERT(picnum < cpc_ncounters); 1199 1200 if (picnum < num_gpc) { 1201 return (gpc_names[picnum]); 1202 } else { 1203 return (ffc_names[picnum - num_gpc]); 1204 } 1205 } 1206 1207 static char *core_pcbe_list_attrs(void) 1208 { 1209 if (versionid >= 3) { 1210 return ("edge,inv,umask,cmask,anythr"); 1211 } else { 1212 return ("edge,pc,inv,umask,cmask"); 1213 } 1214 } 1215 1216 static const struct nametable_core_uarch * 1217 find_gpcevent_core_uarch(char *name, 1218 const struct nametable_core_uarch *nametable) 1219 { 1220 const struct nametable_core_uarch *n; 1221 int compare_result = -1; 1222 1223 for (n = nametable; n->event_num != NT_END; n++) { 1224 compare_result = strcmp(name, n->name); 1225 if (compare_result <= 0) { 1226 break; 1227 } 1228 } 1229 1230 if (compare_result == 0) { 1231 return (n); 1232 } 1233 1234 return (NULL); 1235 } 1236 1237 static const struct events_table_t * 1238 find_gpcevent(char *name) 1239 { 1240 int i; 1241 1242 /* Search architectural events */ 1243 for (i = 0; i < known_arch_events; i++) { 1244 if (strcmp(name, arch_events_table[i].name) == 0) { 1245 if (((1U << i) & arch_events_vector) == 0) { 1246 return (&arch_events_table[i]); 1247 } 1248 } 1249 } 1250 1251 /* Search non-architectural events */ 1252 if (events_table != NULL) { 1253 for (i = 0; events_table[i].eventselect != NT_END; i++) { 1254 if (strcmp(name, events_table[i].name) == 0) { 1255 return (&events_table[i]); 1256 } 1257 } 1258 } 1259 1260 return (NULL); 1261 } 1262 static uint64_t 1263 core_pcbe_event_coverage(char *event) 1264 { 1265 uint64_t bitmap; 1266 uint64_t bitmask; 1267 const struct events_table_t *n; 1268 int i; 1269 1270 bitmap = 0; 1271 1272 /* Is it an event that a GPC can track? */ 1273 if (versionid >= 3) { 1274 n = find_gpcevent(event); 1275 if (n != NULL) { 1276 bitmap |= (n->supported_counters & 1277 BITMASK_XBITS(num_gpc)); 1278 } 1279 } else { 1280 if (find_gpcevent_core_uarch(event, cmn_gpc_events_core_uarch) 1281 != NULL) { 1282 bitmap |= BITMASK_XBITS(num_gpc); 1283 } else if (find_gpcevent_core_uarch(event, pic0_events) != 1284 NULL) { 1285 bitmap |= 1ULL; 1286 } else if (find_gpcevent_core_uarch(event, pic1_events) != 1287 NULL) { 1288 bitmap |= 1ULL << 1; 1289 } 1290 } 1291 1292 /* Check if the event can be counted in the fixed-function counters */ 1293 if (num_ffc > 0) { 1294 bitmask = 1ULL << num_gpc; 1295 for (i = 0; i < num_ffc; i++) { 1296 if (strcmp(event, ffc_names[i]) == 0) { 1297 bitmap |= bitmask; 1298 } 1299 bitmask = bitmask << 1; 1300 } 1301 } 1302 1303 return (bitmap); 1304 } 1305 1306 static uint64_t 1307 core_pcbe_overflow_bitmap(void) 1308 { 1309 uint64_t interrupt_status; 1310 uint64_t intrbits_ffc; 1311 uint64_t intrbits_gpc; 1312 extern int kcpc_hw_overflow_intr_installed; 1313 uint64_t overflow_bitmap; 1314 1315 RDMSR(PERF_GLOBAL_STATUS, interrupt_status); 1316 WRMSR(PERF_GLOBAL_OVF_CTRL, interrupt_status); 1317 1318 interrupt_status = interrupt_status & control_mask; 1319 intrbits_ffc = (interrupt_status >> 32) & control_ffc; 1320 intrbits_gpc = interrupt_status & control_gpc; 1321 overflow_bitmap = (intrbits_ffc << num_gpc) | intrbits_gpc; 1322 1323 ASSERT(kcpc_hw_overflow_intr_installed); 1324 (*kcpc_hw_enable_cpc_intr)(); 1325 1326 return (overflow_bitmap); 1327 } 1328 1329 static int 1330 check_cpc_securitypolicy(core_pcbe_config_t *conf, 1331 const struct nametable_core_uarch *n) 1332 { 1333 if (conf->core_ctl & n->restricted_bits) { 1334 if (secpolicy_cpc_cpu(crgetcred()) != 0) { 1335 return (CPC_ATTR_REQUIRES_PRIVILEGE); 1336 } 1337 } 1338 return (0); 1339 } 1340 1341 static int 1342 configure_gpc(uint_t picnum, char *event, uint64_t preset, uint32_t flags, 1343 uint_t nattrs, kcpc_attr_t *attrs, void **data) 1344 { 1345 core_pcbe_config_t conf; 1346 const struct nametable_core_uarch *n; 1347 const struct nametable_core_uarch *m; 1348 const struct nametable_core_uarch *picspecific_events; 1349 struct nametable_core_uarch nt_raw = { "", 0x0, 0x0 }; 1350 uint_t i; 1351 long event_num; 1352 const struct events_table_t *eventcode; 1353 1354 if (((preset & BITS_EXTENDED_FROM_31) != 0) && 1355 ((preset & BITS_EXTENDED_FROM_31) != 1356 BITS_EXTENDED_FROM_31)) { 1357 1358 /* 1359 * Bits beyond bit-31 in the general-purpose counters can only 1360 * be written to by extension of bit 31. We cannot preset 1361 * these bits to any value other than all 1s or all 0s. 1362 */ 1363 return (CPC_ATTRIBUTE_OUT_OF_RANGE); 1364 } 1365 1366 if (versionid >= 3) { 1367 eventcode = find_gpcevent(event); 1368 if (eventcode != NULL) { 1369 if ((C(picnum) & eventcode->supported_counters) == 0) { 1370 return (CPC_PIC_NOT_CAPABLE); 1371 } 1372 conf.core_ctl = eventcode->eventselect; 1373 conf.core_ctl |= eventcode->unitmask << 1374 CORE_UMASK_SHIFT; 1375 } else { 1376 /* Event specified as raw event code */ 1377 if (ddi_strtol(event, NULL, 0, &event_num) != 0) { 1378 return (CPC_INVALID_EVENT); 1379 } 1380 conf.core_ctl = event_num & 0xFF; 1381 } 1382 } else { 1383 n = find_gpcevent_core_uarch(event, cmn_gpc_events_core_uarch); 1384 if (n == NULL) { 1385 switch (picnum) { 1386 case 0: 1387 picspecific_events = pic0_events; 1388 break; 1389 case 1: 1390 picspecific_events = pic1_events; 1391 break; 1392 default: 1393 picspecific_events = NULL; 1394 break; 1395 } 1396 if (picspecific_events != NULL) { 1397 n = find_gpcevent_core_uarch(event, 1398 picspecific_events); 1399 } 1400 } 1401 if (n == NULL) { 1402 /* 1403 * Check if this is a case where the event was 1404 * specified directly by its event number instead of 1405 * its name string. 1406 */ 1407 if (ddi_strtol(event, NULL, 0, &event_num) != 0) { 1408 return (CPC_INVALID_EVENT); 1409 } 1410 1411 event_num = event_num & 0xFF; 1412 1413 /* 1414 * Search the event table to find out if the event 1415 * specified has an privilege requirements. Currently 1416 * none of the pic-specific counters have any privilege 1417 * requirements. Hence only the table 1418 * cmn_gpc_events_core_uarch is searched. 1419 */ 1420 for (m = cmn_gpc_events_core_uarch; 1421 m->event_num != NT_END; 1422 m++) { 1423 if (event_num == m->event_num) { 1424 break; 1425 } 1426 } 1427 if (m->event_num == NT_END) { 1428 nt_raw.event_num = (uint8_t)event_num; 1429 n = &nt_raw; 1430 } else { 1431 n = m; 1432 } 1433 } 1434 conf.core_ctl = n->event_num; /* Event Select */ 1435 } 1436 1437 1438 conf.core_picno = picnum; 1439 conf.core_pictype = CORE_GPC; 1440 conf.core_rawpic = preset & mask_gpc; 1441 1442 conf.core_pes = GPC_BASE_PES + picnum; 1443 conf.core_pmc = GPC_BASE_PMC + picnum; 1444 1445 for (i = 0; i < nattrs; i++) { 1446 if (strncmp(attrs[i].ka_name, "umask", 6) == 0) { 1447 if ((attrs[i].ka_val | CORE_UMASK_MASK) != 1448 CORE_UMASK_MASK) { 1449 return (CPC_ATTRIBUTE_OUT_OF_RANGE); 1450 } 1451 /* Clear out the default umask */ 1452 conf.core_ctl &= ~ (CORE_UMASK_MASK << 1453 CORE_UMASK_SHIFT); 1454 /* Use the user provided umask */ 1455 conf.core_ctl |= attrs[i].ka_val << 1456 CORE_UMASK_SHIFT; 1457 } else if (strncmp(attrs[i].ka_name, "edge", 6) == 0) { 1458 if (attrs[i].ka_val != 0) 1459 conf.core_ctl |= CORE_EDGE; 1460 } else if (strncmp(attrs[i].ka_name, "inv", 4) == 0) { 1461 if (attrs[i].ka_val != 0) 1462 conf.core_ctl |= CORE_INV; 1463 } else if (strncmp(attrs[i].ka_name, "cmask", 6) == 0) { 1464 if ((attrs[i].ka_val | CORE_CMASK_MASK) != 1465 CORE_CMASK_MASK) { 1466 return (CPC_ATTRIBUTE_OUT_OF_RANGE); 1467 } 1468 conf.core_ctl |= attrs[i].ka_val << 1469 CORE_CMASK_SHIFT; 1470 } else if (strncmp(attrs[i].ka_name, "anythr", 7) == 1471 0) { 1472 if (versionid < 3) 1473 return (CPC_INVALID_ATTRIBUTE); 1474 if (secpolicy_cpc_cpu(crgetcred()) != 0) { 1475 return (CPC_ATTR_REQUIRES_PRIVILEGE); 1476 } 1477 if (attrs[i].ka_val != 0) 1478 conf.core_ctl |= CORE_ANYTHR; 1479 } else { 1480 return (CPC_INVALID_ATTRIBUTE); 1481 } 1482 } 1483 1484 if (flags & CPC_COUNT_USER) 1485 conf.core_ctl |= CORE_USR; 1486 if (flags & CPC_COUNT_SYSTEM) 1487 conf.core_ctl |= CORE_OS; 1488 if (flags & CPC_OVF_NOTIFY_EMT) 1489 conf.core_ctl |= CORE_INT; 1490 conf.core_ctl |= CORE_EN; 1491 1492 if (versionid < 3) { 1493 if (check_cpc_securitypolicy(&conf, n) != 0) { 1494 return (CPC_ATTR_REQUIRES_PRIVILEGE); 1495 } 1496 } 1497 1498 *data = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP); 1499 *((core_pcbe_config_t *)*data) = conf; 1500 1501 return (0); 1502 } 1503 1504 static int 1505 configure_ffc(uint_t picnum, char *event, uint64_t preset, uint32_t flags, 1506 uint_t nattrs, kcpc_attr_t *attrs, void **data) 1507 { 1508 core_pcbe_config_t *conf; 1509 uint_t i; 1510 1511 if (picnum - num_gpc >= num_ffc) { 1512 return (CPC_INVALID_PICNUM); 1513 } 1514 1515 if (strcmp(ffc_names[picnum-num_gpc], event) != 0) { 1516 return (CPC_INVALID_EVENT); 1517 } 1518 1519 if ((versionid < 3) && (nattrs != 0)) { 1520 return (CPC_INVALID_ATTRIBUTE); 1521 } 1522 1523 conf = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP); 1524 conf->core_ctl = 0; 1525 1526 for (i = 0; i < nattrs; i++) { 1527 if (strncmp(attrs[i].ka_name, "anythr", 7) == 0) { 1528 if (secpolicy_cpc_cpu(crgetcred()) != 0) { 1529 return (CPC_ATTR_REQUIRES_PRIVILEGE); 1530 } 1531 if (attrs[i].ka_val != 0) { 1532 conf->core_ctl |= CORE_FFC_ANYTHR; 1533 } 1534 } else { 1535 kmem_free(conf, sizeof (core_pcbe_config_t)); 1536 return (CPC_INVALID_ATTRIBUTE); 1537 } 1538 } 1539 1540 conf->core_picno = picnum; 1541 conf->core_pictype = CORE_FFC; 1542 conf->core_rawpic = preset & mask_ffc; 1543 conf->core_pmc = FFC_BASE_PMC + (picnum - num_gpc); 1544 1545 /* All fixed-function counters have the same control register */ 1546 conf->core_pes = PERF_FIXED_CTR_CTRL; 1547 1548 if (flags & CPC_COUNT_USER) 1549 conf->core_ctl |= CORE_FFC_USR_EN; 1550 if (flags & CPC_COUNT_SYSTEM) 1551 conf->core_ctl |= CORE_FFC_OS_EN; 1552 if (flags & CPC_OVF_NOTIFY_EMT) 1553 conf->core_ctl |= CORE_FFC_PMI; 1554 1555 *data = conf; 1556 return (0); 1557 } 1558 1559 /*ARGSUSED*/ 1560 static int 1561 core_pcbe_configure(uint_t picnum, char *event, uint64_t preset, 1562 uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data, 1563 void *token) 1564 { 1565 int ret; 1566 core_pcbe_config_t *conf; 1567 1568 /* 1569 * If we've been handed an existing configuration, we need only preset 1570 * the counter value. 1571 */ 1572 if (*data != NULL) { 1573 conf = *data; 1574 ASSERT(conf->core_pictype == CORE_GPC || 1575 conf->core_pictype == CORE_FFC); 1576 if (conf->core_pictype == CORE_GPC) 1577 conf->core_rawpic = preset & mask_gpc; 1578 else /* CORE_FFC */ 1579 conf->core_rawpic = preset & mask_ffc; 1580 return (0); 1581 } 1582 1583 if (picnum >= total_pmc) { 1584 return (CPC_INVALID_PICNUM); 1585 } 1586 1587 if (picnum < num_gpc) { 1588 ret = configure_gpc(picnum, event, preset, flags, 1589 nattrs, attrs, data); 1590 } else { 1591 ret = configure_ffc(picnum, event, preset, flags, 1592 nattrs, attrs, data); 1593 } 1594 return (ret); 1595 } 1596 1597 static void 1598 core_pcbe_program(void *token) 1599 { 1600 core_pcbe_config_t *cfg; 1601 uint64_t perf_global_ctrl; 1602 uint64_t perf_fixed_ctr_ctrl; 1603 uint64_t curcr4; 1604 1605 core_pcbe_allstop(); 1606 1607 curcr4 = getcr4(); 1608 if (kcpc_allow_nonpriv(token)) 1609 /* Allow RDPMC at any ring level */ 1610 setcr4(curcr4 | CR4_PCE); 1611 else 1612 /* Allow RDPMC only at ring 0 */ 1613 setcr4(curcr4 & ~CR4_PCE); 1614 1615 /* Clear any overflow indicators before programming the counters */ 1616 WRMSR(PERF_GLOBAL_OVF_CTRL, MASK_CONDCHGD_OVFBUFFER | control_mask); 1617 1618 cfg = NULL; 1619 perf_global_ctrl = 0; 1620 perf_fixed_ctr_ctrl = 0; 1621 cfg = (core_pcbe_config_t *)kcpc_next_config(token, cfg, NULL); 1622 while (cfg != NULL) { 1623 ASSERT(cfg->core_pictype == CORE_GPC || 1624 cfg->core_pictype == CORE_FFC); 1625 1626 if (cfg->core_pictype == CORE_GPC) { 1627 /* 1628 * General-purpose counter registers have write 1629 * restrictions where only the lower 32-bits can be 1630 * written to. The rest of the relevant bits are 1631 * written to by extension from bit 31 (all ZEROS if 1632 * bit-31 is ZERO and all ONE if bit-31 is ONE). This 1633 * makes it possible to write to the counter register 1634 * only values that have all ONEs or all ZEROs in the 1635 * higher bits. 1636 */ 1637 if (((cfg->core_rawpic & BITS_EXTENDED_FROM_31) == 0) || 1638 ((cfg->core_rawpic & BITS_EXTENDED_FROM_31) == 1639 BITS_EXTENDED_FROM_31)) { 1640 /* 1641 * Straighforward case where the higher bits 1642 * are all ZEROs or all ONEs. 1643 */ 1644 WRMSR(cfg->core_pmc, 1645 (cfg->core_rawpic & mask_gpc)); 1646 } else { 1647 /* 1648 * The high order bits are not all the same. 1649 * We save what is currently in the registers 1650 * and do not write to it. When we want to do 1651 * a read from this register later (in 1652 * core_pcbe_sample()), we subtract the value 1653 * we save here to get the actual event count. 1654 * 1655 * NOTE: As a result, we will not get overflow 1656 * interrupts as expected. 1657 */ 1658 RDMSR(cfg->core_pmc, cfg->core_rawpic); 1659 cfg->core_rawpic = cfg->core_rawpic & mask_gpc; 1660 } 1661 WRMSR(cfg->core_pes, cfg->core_ctl); 1662 perf_global_ctrl |= 1ull << cfg->core_picno; 1663 } else { 1664 /* 1665 * Unlike the general-purpose counters, all relevant 1666 * bits of fixed-function counters can be written to. 1667 */ 1668 WRMSR(cfg->core_pmc, cfg->core_rawpic & mask_ffc); 1669 1670 /* 1671 * Collect the control bits for all the 1672 * fixed-function counters and write it at one shot 1673 * later in this function 1674 */ 1675 perf_fixed_ctr_ctrl |= cfg->core_ctl << 1676 ((cfg->core_picno - num_gpc) * CORE_FFC_ATTR_SIZE); 1677 perf_global_ctrl |= 1678 1ull << (cfg->core_picno - num_gpc + 32); 1679 } 1680 1681 cfg = (core_pcbe_config_t *) 1682 kcpc_next_config(token, cfg, NULL); 1683 } 1684 1685 /* Enable all the counters */ 1686 WRMSR(PERF_FIXED_CTR_CTRL, perf_fixed_ctr_ctrl); 1687 WRMSR(PERF_GLOBAL_CTRL, perf_global_ctrl); 1688 } 1689 1690 static void 1691 core_pcbe_allstop(void) 1692 { 1693 /* Disable all the counters together */ 1694 WRMSR(PERF_GLOBAL_CTRL, ALL_STOPPED); 1695 1696 setcr4(getcr4() & ~CR4_PCE); 1697 } 1698 1699 static void 1700 core_pcbe_sample(void *token) 1701 { 1702 uint64_t *daddr; 1703 uint64_t curpic; 1704 core_pcbe_config_t *cfg; 1705 uint64_t counter_mask; 1706 1707 cfg = (core_pcbe_config_t *)kcpc_next_config(token, NULL, &daddr); 1708 while (cfg != NULL) { 1709 ASSERT(cfg->core_pictype == CORE_GPC || 1710 cfg->core_pictype == CORE_FFC); 1711 1712 curpic = rdmsr(cfg->core_pmc); 1713 1714 DTRACE_PROBE4(core__pcbe__sample, 1715 uint64_t, cfg->core_pmc, 1716 uint64_t, curpic, 1717 uint64_t, cfg->core_rawpic, 1718 uint64_t, *daddr); 1719 1720 if (cfg->core_pictype == CORE_GPC) { 1721 counter_mask = mask_gpc; 1722 } else { 1723 counter_mask = mask_ffc; 1724 } 1725 curpic = curpic & counter_mask; 1726 if (curpic >= cfg->core_rawpic) { 1727 *daddr += curpic - cfg->core_rawpic; 1728 } else { 1729 /* Counter overflowed since our last sample */ 1730 *daddr += counter_mask - (cfg->core_rawpic - curpic) + 1731 1; 1732 } 1733 cfg->core_rawpic = *daddr & counter_mask; 1734 1735 cfg = 1736 (core_pcbe_config_t *)kcpc_next_config(token, cfg, &daddr); 1737 } 1738 } 1739 1740 static void 1741 core_pcbe_free(void *config) 1742 { 1743 kmem_free(config, sizeof (core_pcbe_config_t)); 1744 } 1745 1746 static struct modlpcbe core_modlpcbe = { 1747 &mod_pcbeops, 1748 "Core Performance Counters", 1749 &core_pcbe_ops 1750 }; 1751 1752 static struct modlinkage core_modl = { 1753 MODREV_1, 1754 &core_modlpcbe, 1755 }; 1756 1757 int 1758 _init(void) 1759 { 1760 if (core_pcbe_init() != 0) { 1761 return (ENOTSUP); 1762 } 1763 return (mod_install(&core_modl)); 1764 } 1765 1766 int 1767 _fini(void) 1768 { 1769 return (mod_remove(&core_modl)); 1770 } 1771 1772 int 1773 _info(struct modinfo *mi) 1774 { 1775 return (mod_info(&core_modl, mi)); 1776 } 1777