1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Performance Counter Back-End for Intel Family 6 Models 15 and 23 30 */ 31 32 #include <sys/cpuvar.h> 33 #include <sys/param.h> 34 #include <sys/cpc_impl.h> 35 #include <sys/cpc_pcbe.h> 36 #include <sys/modctl.h> 37 #include <sys/inttypes.h> 38 #include <sys/systm.h> 39 #include <sys/cmn_err.h> 40 #include <sys/x86_archext.h> 41 #include <sys/sdt.h> 42 #include <sys/archsystm.h> 43 #include <sys/privregs.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 #include <sys/cred.h> 47 #include <sys/policy.h> 48 49 static int core_pcbe_init(void); 50 static uint_t core_pcbe_ncounters(void); 51 static const char *core_pcbe_impl_name(void); 52 static const char *core_pcbe_cpuref(void); 53 static char *core_pcbe_list_events(uint_t picnum); 54 static char *core_pcbe_list_attrs(void); 55 static uint64_t core_pcbe_event_coverage(char *event); 56 static uint64_t core_pcbe_overflow_bitmap(void); 57 static int core_pcbe_configure(uint_t picnum, char *event, uint64_t preset, 58 uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data, 59 void *token); 60 static void core_pcbe_program(void *token); 61 static void core_pcbe_allstop(void); 62 static void core_pcbe_sample(void *token); 63 static void core_pcbe_free(void *config); 64 65 #define FALSE 0 66 #define TRUE 1 67 68 /* Architectural Performance Counter versioning */ 69 #define APC_V1 1 70 #define APC_V2 2 71 72 /* Counter Type */ 73 #define CORE_GPC 0 /* General-Purpose Counter (GPC) */ 74 #define CORE_FFC 1 /* Fixed-Function Counter (FFC) */ 75 76 /* MSR Addresses */ 77 #define GPC_BASE_PMC 0x00c1 /* First GPC */ 78 #define GPC_BASE_PES 0x0186 /* First GPC Event Select register */ 79 #define FFC_BASE_PMC 0x0309 /* First FFC */ 80 #define PERF_FIXED_CTR_CTRL 0x038d /* Used to enable/disable FFCs */ 81 #define PERF_GLOBAL_STATUS 0x038e /* Overflow status register */ 82 #define PERF_GLOBAL_CTRL 0x038f /* Used to enable/disable counting */ 83 #define PERF_GLOBAL_OVF_CTRL 0x0390 /* Used to clear overflow status */ 84 85 /* 86 * Processor Event Select register fields 87 */ 88 #define CORE_USR (1ULL << 16) /* Count while not in ring 0 */ 89 #define CORE_OS (1ULL << 17) /* Count while in ring 0 */ 90 #define CORE_EDGE (1ULL << 18) /* Enable edge detection */ 91 #define CORE_PC (1ULL << 19) /* Enable pin control */ 92 #define CORE_INT (1ULL << 20) /* Enable interrupt on overflow */ 93 #define CORE_EN (1ULL << 22) /* Enable counting */ 94 #define CORE_INV (1ULL << 23) /* Invert the CMASK */ 95 96 #define CORE_UMASK_SHIFT 8 97 #define CORE_UMASK_MASK 0xffu 98 #define CORE_CMASK_SHIFT 24 99 #define CORE_CMASK_MASK 0xffu 100 101 /* 102 * Fixed-function counter attributes 103 */ 104 #define CORE_FFC_OS_EN (1ULL << 0) /* Count while not in ring 0 */ 105 #define CORE_FFC_USR_EN (1ULL << 1) /* Count while in ring 1 */ 106 #define CORE_FFC_PMI (1ULL << 3) /* Enable interrupt on overflow */ 107 108 /* 109 * Number of bits for specifying each FFC's attributes in the control register 110 */ 111 #define CORE_FFC_ATTR_SIZE 4 112 113 /* 114 * CondChgd and OvfBuffer fields of global status and overflow control registers 115 */ 116 #define CONDCHGD (1ULL << 63) 117 #define OVFBUFFER (1ULL << 62) 118 #define MASK_CONDCHGD_OVFBUFFER (CONDCHGD | OVFBUFFER) 119 120 #define ALL_STOPPED 0ULL 121 122 #define BITMASK_XBITS(x) ((1ull << (x)) - 1ull) 123 124 /* 125 * Only the lower 32-bits can be written to in the general-purpose 126 * counters. The higher bits are extended from bit 31; all ones if 127 * bit 31 is one and all zeros otherwise. 128 * 129 * The fixed-function counters do not have this restriction. 130 */ 131 #define BITS_EXTENDED_FROM_31 (BITMASK_XBITS(width_gpc) & ~BITMASK_XBITS(31)) 132 133 #define WRMSR(msr, value) \ 134 wrmsr((msr), (value)); \ 135 DTRACE_PROBE2(wrmsr, uint64_t, (msr), uint64_t, (value)); 136 137 #define RDMSR(msr, value) \ 138 (value) = rdmsr((msr)); \ 139 DTRACE_PROBE2(rdmsr, uint64_t, (msr), uint64_t, (value)); 140 141 typedef struct core_pcbe_config { 142 uint64_t core_rawpic; 143 uint64_t core_ctl; /* Event Select bits */ 144 uint64_t core_pmc; /* Counter register address */ 145 uint64_t core_pes; /* Event Select register address */ 146 uint_t core_picno; 147 uint8_t core_pictype; /* CORE_GPC or CORE_FFC */ 148 } core_pcbe_config_t; 149 150 pcbe_ops_t core_pcbe_ops = { 151 PCBE_VER_1, /* pcbe_ver */ 152 CPC_CAP_OVERFLOW_INTERRUPT | CPC_CAP_OVERFLOW_PRECISE, /* pcbe_caps */ 153 core_pcbe_ncounters, /* pcbe_ncounters */ 154 core_pcbe_impl_name, /* pcbe_impl_name */ 155 core_pcbe_cpuref, /* pcbe_cpuref */ 156 core_pcbe_list_events, /* pcbe_list_events */ 157 core_pcbe_list_attrs, /* pcbe_list_attrs */ 158 core_pcbe_event_coverage, /* pcbe_event_coverage */ 159 core_pcbe_overflow_bitmap, /* pcbe_overflow_bitmap */ 160 core_pcbe_configure, /* pcbe_configure */ 161 core_pcbe_program, /* pcbe_program */ 162 core_pcbe_allstop, /* pcbe_allstop */ 163 core_pcbe_sample, /* pcbe_sample */ 164 core_pcbe_free /* pcbe_free */ 165 }; 166 167 struct nametable { 168 const char *name; 169 uint64_t restricted_bits; 170 uint8_t event_num; 171 }; 172 173 #define NT_END 0xFF 174 175 /* 176 * Counting an event for all cores or all bus agents requires cpc_cpu privileges 177 */ 178 #define ALL_CORES (1ULL << 15) 179 #define ALL_AGENTS (1ULL << 13) 180 181 static const struct nametable common_gpc_events[] = { 182 /* Alphabetical order of event name */ 183 184 { "baclears", 0x0, 0xe6 }, 185 { "bogus_br", 0x0, 0xe4 }, 186 { "br_bac_missp_exec", 0x0, 0x8a }, 187 188 { "br_call_exec", 0x0, 0x92 }, 189 { "br_call_missp_exec", 0x0, 0x93 }, 190 { "br_cnd_exec", 0x0, 0x8b }, 191 192 { "br_cnd_missp_exec", 0x0, 0x8c }, 193 { "br_ind_call_exec", 0x0, 0x94 }, 194 { "br_ind_exec", 0x0, 0x8d }, 195 196 { "br_ind_missp_exec", 0x0, 0x8e }, 197 { "br_inst_decoded", 0x0, 0xe0 }, 198 { "br_inst_exec", 0x0, 0x88 }, 199 200 { "br_inst_retired", 0x0, 0xc4 }, 201 { "br_inst_retired_mispred", 0x0, 0xc5 }, 202 { "br_missp_exec", 0x0, 0x89 }, 203 204 { "br_ret_bac_missp_exec", 0x0, 0x91 }, 205 { "br_ret_exec", 0x0, 0x8f }, 206 { "br_ret_missp_exec", 0x0, 0x90 }, 207 208 { "br_tkn_bubble_1", 0x0, 0x97 }, 209 { "br_tkn_bubble_2", 0x0, 0x98 }, 210 { "bus_bnr_drv", ALL_AGENTS, 0x61 }, 211 212 { "bus_data_rcv", ALL_CORES, 0x64 }, 213 { "bus_drdy_clocks", ALL_AGENTS, 0x62 }, 214 { "bus_hit_drv", ALL_AGENTS, 0x7a }, 215 216 { "bus_hitm_drv", ALL_AGENTS, 0x7b }, 217 { "bus_io_wait", ALL_CORES, 0x7f }, 218 { "bus_lock_clocks", ALL_CORES | ALL_AGENTS, 0x63 }, 219 220 { "bus_request_outstanding", ALL_CORES | ALL_AGENTS, 0x60 }, 221 { "bus_trans_any", ALL_CORES | ALL_AGENTS, 0x70 }, 222 { "bus_trans_brd", ALL_CORES | ALL_AGENTS, 0x65 }, 223 224 { "bus_trans_burst", ALL_CORES | ALL_AGENTS, 0x6e }, 225 { "bus_trans_def", ALL_CORES | ALL_AGENTS, 0x6d }, 226 { "bus_trans_ifetch", ALL_CORES | ALL_AGENTS, 0x68 }, 227 228 { "bus_trans_inval", ALL_CORES | ALL_AGENTS, 0x69 }, 229 { "bus_trans_io", ALL_CORES | ALL_AGENTS, 0x6c }, 230 { "bus_trans_mem", ALL_CORES | ALL_AGENTS, 0x6f }, 231 232 { "bus_trans_p", ALL_CORES | ALL_AGENTS, 0x6b }, 233 { "bus_trans_pwr", ALL_CORES | ALL_AGENTS, 0x6a }, 234 { "bus_trans_rfo", ALL_CORES | ALL_AGENTS, 0x66 }, 235 236 { "bus_trans_wb", ALL_CORES | ALL_AGENTS, 0x67 }, 237 { "busq_empty", ALL_CORES, 0x7d }, 238 { "cmp_snoop", ALL_CORES, 0x78 }, 239 240 { "cpu_clk_unhalted", 0x0, 0x3c }, 241 { "cycles_int", 0x0, 0xc6 }, 242 { "cycles_l1i_mem_stalled", 0x0, 0x86 }, 243 244 { "dtlb_misses", 0x0, 0x08 }, 245 { "eist_trans", 0x0, 0x3a }, 246 { "esp", 0x0, 0xab }, 247 248 { "ext_snoop", ALL_AGENTS, 0x77 }, 249 { "fp_mmx_trans", 0x0, 0xcc }, 250 { "hw_int_rcv", 0x0, 0xc8 }, 251 252 { "ild_stall", 0x0, 0x87 }, 253 { "inst_queue", 0x0, 0x83 }, 254 { "inst_retired", 0x0, 0xc0 }, 255 256 { "itlb", 0x0, 0x82 }, 257 { "itlb_miss_retired", 0x0, 0xc9 }, 258 { "l1d_all_ref", 0x0, 0x43 }, 259 260 { "l1d_cache_ld", 0x0, 0x40 }, 261 { "l1d_cache_lock", 0x0, 0x42 }, 262 { "l1d_cache_st", 0x0, 0x41 }, 263 264 { "l1d_m_evict", 0x0, 0x47 }, 265 { "l1d_m_repl", 0x0, 0x46 }, 266 { "l1d_pend_miss", 0x0, 0x48 }, 267 268 { "l1d_prefetch", 0x0, 0x4e }, 269 { "l1d_repl", 0x0, 0x45 }, 270 { "l1d_split", 0x0, 0x49 }, 271 272 { "l1i_misses", 0x0, 0x81 }, 273 { "l1i_reads", 0x0, 0x80 }, 274 { "l2_ads", ALL_CORES, 0x21 }, 275 276 { "l2_dbus_busy_rd", ALL_CORES, 0x23 }, 277 { "l2_ifetch", ALL_CORES, 0x28 }, 278 { "l2_ld", ALL_CORES, 0x29 }, 279 280 { "l2_lines_in", ALL_CORES, 0x24 }, 281 { "l2_lines_out", ALL_CORES, 0x26 }, 282 { "l2_lock", ALL_CORES, 0x2b }, 283 284 { "l2_m_lines_in", ALL_CORES, 0x25 }, 285 { "l2_m_lines_out", ALL_CORES, 0x27 }, 286 { "l2_no_req", ALL_CORES, 0x32 }, 287 288 { "l2_reject_busq", ALL_CORES, 0x30 }, 289 { "l2_rqsts", ALL_CORES, 0x2e }, 290 { "l2_st", ALL_CORES, 0x2a }, 291 292 { "load_block", 0x0, 0x03 }, 293 { "load_hit_pre", 0x0, 0x4c }, 294 { "machine_nukes", 0x0, 0xc3 }, 295 296 { "macro_insts", 0x0, 0xaa }, 297 { "memory_disambiguation", 0x0, 0x09 }, 298 { "page_walks", 0x0, 0x0c }, 299 300 { "pref_rqsts_dn", 0x0, 0xf8 }, 301 { "pref_rqsts_up", 0x0, 0xf0 }, 302 { "rat_stalls", 0x0, 0xd2 }, 303 304 { "resource_stalls", 0x0, 0xdc }, 305 { "rs_uops_dispatched", 0x0, 0xa0 }, 306 { "seg_reg_renames", 0x0, 0xd5 }, 307 308 { "seg_rename_stalls", 0x0, 0xd4 }, 309 { "segment_reg_loads", 0x0, 0x06 }, 310 { "simd_assist", 0x0, 0xcd }, 311 312 { "simd_comp_inst_retired", 0x0, 0xca }, 313 { "simd_inst_retired", 0x0, 0xc7 }, 314 { "simd_instr_retired", 0x0, 0xce }, 315 316 { "simd_sat_instr_retired", 0x0, 0xcf }, 317 { "simd_sat_uop_exec", 0x0, 0xb1 }, 318 { "simd_uop_type_exec", 0x0, 0xb3 }, 319 320 { "simd_uops_exec", 0x0, 0xb0 }, 321 { "snoop_stall_drv", ALL_CORES | ALL_AGENTS, 0x7e }, 322 { "sse_pre_exec", 0x0, 0x07 }, 323 324 { "sse_pre_miss", 0x0, 0x4b }, 325 { "store_block", 0x0, 0x04 }, 326 { "thermal_trip", 0x0, 0x3b }, 327 328 { "uops_retired", 0x0, 0xc2 }, 329 { "x87_ops_retired", 0x0, 0xc1 }, 330 { "", 0x0, NT_END } 331 }; 332 333 /* 334 * If any of the pic specific events require privileges, make sure to add a 335 * check in configure_gpc() to find whether an event hard-coded as a number by 336 * the user has any privilege requirements 337 */ 338 static const struct nametable pic0_events[] = { 339 /* Alphabetical order of event name */ 340 341 { "cycles_div_busy", 0x0, 0x14 }, 342 { "fp_comp_ops_exe", 0x0, 0x10 }, 343 { "idle_during_div", 0x0, 0x18 }, 344 345 { "mem_load_retired", 0x0, 0xcb }, 346 { "rs_uops_dispatched_port", 0x0, 0xa1 }, 347 { "", 0x0, NT_END } 348 }; 349 350 static const struct nametable pic1_events[] = { 351 /* Alphabetical order of event name */ 352 353 { "delayed_bypass", 0x0, 0x19 }, 354 { "div", 0x0, 0x13 }, 355 { "fp_assist", 0x0, 0x11 }, 356 357 { "mul", 0x0, 0x12 }, 358 { "", 0x0, NT_END } 359 }; 360 361 static char **gpc_names; 362 363 char *ffc_names[] = { 364 "instr_retired.any", 365 "cpu_clk_unhalted.core", 366 "cpu_clk_unhalted.ref", 367 NULL 368 }; 369 370 static uint64_t num_gpc; 371 static uint64_t width_gpc; 372 static uint64_t mask_gpc; 373 static uint64_t num_ffc; 374 static uint64_t width_ffc; 375 static uint64_t mask_ffc; 376 static uint_t total_pmc; 377 static uint64_t control_ffc; 378 static uint64_t control_gpc; 379 static uint64_t control_mask; 380 381 static const char *core_impl_name = "Core Microarchitecture"; 382 383 static const char *core_cpuref = 384 "See Appendix A of the \"Intel 64 and IA-32 Architectures Software" \ 385 " Developer's Manual Volume 3B: System Programming Guide, Part 2\"" \ 386 " Order Number: 253669-026US, Februrary 2008"; 387 388 static int 389 core_pcbe_init(void) 390 { 391 struct cpuid_regs cp; 392 uint32_t versionid; 393 const struct nametable *n; 394 size_t size; 395 size_t common_size; 396 uint64_t i; 397 const struct nametable *picspecific_events; 398 399 if ((cpuid_getvendor(CPU) != X86_VENDOR_Intel) || 400 (cpuid_getfamily(CPU) != 6) || 401 (cpuid_getmodel(CPU) != 15 && cpuid_getmodel(CPU) != 23)) 402 return (-1); 403 404 /* Obtain the Architectural Performance Monitoring Leaf */ 405 cp.cp_eax = 0xa; 406 (void) __cpuid_insn(&cp); 407 408 versionid = cp.cp_eax & 0xFF; 409 410 /* 411 * All Family 6 Model 15 and Model 23 processors have fixed-function 412 * counters. These counters were made Architectural with 413 * Family 6 Model 9 Stepping 9. 414 */ 415 switch (versionid) { 416 417 case 0: 418 return (-1); 419 420 case APC_V2: 421 num_ffc = cp.cp_edx & 0x1F; 422 width_ffc = (cp.cp_edx >> 5) & 0xFF; 423 424 if (num_ffc == 0) { 425 /* 426 * Some processors have an errata (AW34) where 427 * versionid is reported as 2 when actually 1. 428 * In this case, fixed-function counters are 429 * model-specific as in Version 1. 430 */ 431 num_ffc = 3; 432 width_ffc = 40; 433 versionid = APC_V1; 434 } 435 break; 436 437 default: 438 /* 439 * For higher versions currently unsupported, 440 * default to Version 1 441 */ 442 num_ffc = 3; 443 width_ffc = 40; 444 break; 445 } 446 447 if (num_ffc >= 64) 448 return (-1); 449 450 if (num_ffc >= sizeof (ffc_names) / sizeof (char *)) { 451 /* 452 * The system seems to have more fixed-function counters than 453 * what this PCBE is able to handle correctly. Default to the 454 * maximum number of fixed-function counters that this driver 455 * is aware of. 456 */ 457 num_ffc = sizeof (ffc_names) / sizeof (char *) - 1; 458 } 459 460 mask_ffc = BITMASK_XBITS(width_ffc); 461 462 num_gpc = (cp.cp_eax >> 8) & 0xFF; 463 width_gpc = (cp.cp_eax >> 16) & 0xFF; 464 465 if (num_gpc >= 64) 466 return (-1); 467 468 mask_gpc = BITMASK_XBITS(width_gpc); 469 470 total_pmc = num_gpc + num_ffc; 471 472 control_gpc = BITMASK_XBITS(num_gpc); 473 control_ffc = BITMASK_XBITS(num_ffc); 474 475 control_mask = (control_ffc << 32) | control_gpc; 476 477 if (total_pmc > 64) { 478 /* Too wide for the overflow bitmap */ 479 return (-1); 480 } 481 482 /* General-purpose Counters (GPC) */ 483 gpc_names = NULL; 484 485 if (num_gpc > 0) { 486 gpc_names = kmem_alloc(num_gpc * sizeof (char *), KM_SLEEP); 487 488 /* Calculate space needed to save all the common event names */ 489 common_size = 0; 490 for (n = common_gpc_events; n->event_num != NT_END; n++) { 491 common_size += strlen(n->name) + 1; 492 } 493 494 for (i = 0; i < num_gpc; i++) { 495 size = 0; 496 switch (i) { 497 case 0: 498 picspecific_events = pic0_events; 499 break; 500 case 1: 501 picspecific_events = pic1_events; 502 break; 503 default: 504 picspecific_events = NULL; 505 break; 506 } 507 if (picspecific_events != NULL) { 508 for (n = picspecific_events; 509 n->event_num != NT_END; 510 n++) { 511 size += strlen(n->name) + 1; 512 } 513 } 514 515 gpc_names[i] = 516 kmem_alloc(size + common_size + 1, KM_SLEEP); 517 518 gpc_names[i][0] = '\0'; 519 if (picspecific_events != NULL) { 520 for (n = picspecific_events; 521 n->event_num != NT_END; 522 n++) { 523 (void) strcat(gpc_names[i], n->name); 524 (void) strcat(gpc_names[i], ","); 525 } 526 } 527 for (n = common_gpc_events; n->event_num != NT_END; 528 n++) { 529 (void) strcat(gpc_names[i], n->name); 530 (void) strcat(gpc_names[i], ","); 531 } 532 /* 533 * Remove trailing comma. 534 */ 535 gpc_names[i][common_size + size - 1] = '\0'; 536 } 537 } 538 539 /* 540 * Fixed-function Counters (FFC) are already listed individually in 541 * ffc_names[] 542 */ 543 return (0); 544 } 545 546 static uint_t core_pcbe_ncounters() 547 { 548 return (total_pmc); 549 } 550 551 static const char *core_pcbe_impl_name(void) 552 { 553 return (core_impl_name); 554 } 555 556 static const char *core_pcbe_cpuref(void) 557 { 558 return (core_cpuref); 559 } 560 561 static char *core_pcbe_list_events(uint_t picnum) 562 { 563 ASSERT(picnum < cpc_ncounters); 564 565 if (picnum < num_gpc) { 566 return (gpc_names[picnum]); 567 } else { 568 return (ffc_names[picnum - num_gpc]); 569 } 570 } 571 572 static char *core_pcbe_list_attrs(void) 573 { 574 return ("edge,pc,inv,umask,cmask"); 575 } 576 577 static const struct nametable * 578 find_gpcevent(char *name, const struct nametable *nametable) 579 { 580 const struct nametable *n; 581 int compare_result; 582 583 compare_result = -1; 584 for (n = nametable; n->event_num != NT_END; n++) { 585 compare_result = strcmp(name, n->name); 586 if (compare_result <= 0) { 587 break; 588 } 589 } 590 591 if (compare_result == 0) { 592 return (n); 593 } 594 595 return (NULL); 596 } 597 598 static uint64_t 599 core_pcbe_event_coverage(char *event) 600 { 601 uint64_t bitmap; 602 uint64_t bitmask; 603 int i; 604 605 bitmap = 0; 606 607 /* Is it an event that a GPC can track? */ 608 if (find_gpcevent(event, common_gpc_events) != NULL) { 609 bitmap |= BITMASK_XBITS(num_gpc); 610 } else if (find_gpcevent(event, pic0_events) != NULL) { 611 bitmap |= 1ULL; 612 } else if (find_gpcevent(event, pic1_events) != NULL) { 613 bitmap |= 1ULL << 1; 614 } 615 616 /* Check if the event can be counted in the fixed-function counters */ 617 if (num_ffc > 0) { 618 bitmask = 1ULL << num_gpc; 619 for (i = 0; i < num_ffc; i++) { 620 if (strcmp(event, ffc_names[i]) == 0) { 621 bitmap |= bitmask; 622 } 623 bitmask = bitmask << 1; 624 } 625 } 626 627 return (bitmap); 628 } 629 630 static uint64_t 631 core_pcbe_overflow_bitmap(void) 632 { 633 uint64_t interrupt_status; 634 uint64_t intrbits_ffc; 635 uint64_t intrbits_gpc; 636 extern int kcpc_hw_overflow_intr_installed; 637 uint64_t overflow_bitmap; 638 639 RDMSR(PERF_GLOBAL_STATUS, interrupt_status); 640 WRMSR(PERF_GLOBAL_OVF_CTRL, interrupt_status); 641 642 interrupt_status = interrupt_status & control_mask; 643 intrbits_ffc = (interrupt_status >> 32) & control_ffc; 644 intrbits_gpc = interrupt_status & control_gpc; 645 overflow_bitmap = (intrbits_ffc << num_gpc) | intrbits_gpc; 646 647 ASSERT(kcpc_hw_overflow_intr_installed); 648 (*kcpc_hw_enable_cpc_intr)(); 649 650 return (overflow_bitmap); 651 } 652 653 static int 654 check_cpc_securitypolicy(core_pcbe_config_t *conf, const struct nametable *n) 655 { 656 if (conf->core_ctl & n->restricted_bits) { 657 if (secpolicy_cpc_cpu(crgetcred()) != 0) { 658 return (CPC_ATTR_REQUIRES_PRIVILEGE); 659 } 660 } 661 return (0); 662 } 663 664 static int 665 configure_gpc(uint_t picnum, char *event, uint64_t preset, uint32_t flags, 666 uint_t nattrs, kcpc_attr_t *attrs, void **data) 667 { 668 core_pcbe_config_t conf; 669 const struct nametable *n; 670 const struct nametable *m; 671 const struct nametable *picspecific_events; 672 struct nametable nt_raw = { "", 0x0, 0x0 }; 673 uint_t i; 674 long event_num; 675 676 if (((preset & BITS_EXTENDED_FROM_31) != 0) && 677 ((preset & BITS_EXTENDED_FROM_31) != 678 BITS_EXTENDED_FROM_31)) { 679 680 /* 681 * Bits beyond bit-31 in the general-purpose counters can only 682 * be written to by extension of bit 31. We cannot preset 683 * these bits to any value other than all 1s or all 0s. 684 */ 685 return (CPC_ATTRIBUTE_OUT_OF_RANGE); 686 } 687 688 n = find_gpcevent(event, common_gpc_events); 689 if (n == NULL) { 690 switch (picnum) { 691 case 0: 692 picspecific_events = pic0_events; 693 break; 694 case 1: 695 picspecific_events = pic1_events; 696 break; 697 default: 698 picspecific_events = NULL; 699 break; 700 } 701 if (picspecific_events != NULL) { 702 n = find_gpcevent(event, picspecific_events); 703 if (n == NULL) { 704 /* 705 * Check if this is a case where the event was 706 * specified directly by its event number 707 * instead of its name string. 708 */ 709 if (ddi_strtol(event, NULL, 0, &event_num) != 710 0) { 711 return (CPC_INVALID_EVENT); 712 } 713 714 event_num = event_num & 0xFF; 715 716 /* 717 * Search the event table to find out if the 718 * event specified has an privilege 719 * requirements. Currently none of the 720 * pic-specific counters have any privilege 721 * requirements. Hence only the 722 * common_gpc_events table is searched. 723 */ 724 for (m = common_gpc_events; 725 m->event_num != NT_END; 726 m++) { 727 if (event_num == m->event_num) { 728 break; 729 } 730 } 731 if (m->event_num == NT_END) { 732 nt_raw.event_num = (uint8_t)event_num; 733 n = &nt_raw; 734 } else { 735 n = m; 736 } 737 } 738 } 739 } 740 741 conf.core_picno = picnum; 742 conf.core_pictype = CORE_GPC; 743 conf.core_rawpic = preset & mask_gpc; 744 745 conf.core_pes = GPC_BASE_PES + picnum; 746 conf.core_pmc = GPC_BASE_PMC + picnum; 747 748 conf.core_ctl = n->event_num; /* Event Select */ 749 for (i = 0; i < nattrs; i++) { 750 if (strncmp(attrs[i].ka_name, "umask", 6) == 0) { 751 if ((attrs[i].ka_val | CORE_UMASK_MASK) != 752 CORE_UMASK_MASK) { 753 return (CPC_ATTRIBUTE_OUT_OF_RANGE); 754 } 755 conf.core_ctl |= attrs[i].ka_val << 756 CORE_UMASK_SHIFT; 757 } else if (strncmp(attrs[i].ka_name, "edge", 6) == 0) { 758 if (attrs[i].ka_val != 0) 759 conf.core_ctl |= CORE_EDGE; 760 } else if (strncmp(attrs[i].ka_name, "pc", 3) == 0) { 761 if (attrs[i].ka_val != 0) 762 conf.core_ctl |= CORE_PC; 763 } else if (strncmp(attrs[i].ka_name, "inv", 4) == 0) { 764 if (attrs[i].ka_val != 0) 765 conf.core_ctl |= CORE_INV; 766 } else if (strncmp(attrs[i].ka_name, "cmask", 6) == 0) { 767 if ((attrs[i].ka_val | CORE_CMASK_MASK) != 768 CORE_CMASK_MASK) { 769 return (CPC_ATTRIBUTE_OUT_OF_RANGE); 770 } 771 conf.core_ctl |= attrs[i].ka_val << CORE_CMASK_SHIFT; 772 } else { 773 return (CPC_INVALID_ATTRIBUTE); 774 } 775 } 776 777 if (flags & CPC_COUNT_USER) 778 conf.core_ctl |= CORE_USR; 779 if (flags & CPC_COUNT_SYSTEM) 780 conf.core_ctl |= CORE_OS; 781 if (flags & CPC_OVF_NOTIFY_EMT) 782 conf.core_ctl |= CORE_INT; 783 conf.core_ctl |= CORE_EN; 784 785 if (check_cpc_securitypolicy(&conf, n) != 0) { 786 return (CPC_ATTR_REQUIRES_PRIVILEGE); 787 } 788 789 *data = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP); 790 *((core_pcbe_config_t *)*data) = conf; 791 792 return (0); 793 } 794 795 static int 796 configure_ffc(uint_t picnum, char *event, uint64_t preset, uint32_t flags, 797 uint_t nattrs, void **data) 798 { 799 core_pcbe_config_t *conf; 800 801 if (picnum - num_gpc >= num_ffc) { 802 return (CPC_INVALID_PICNUM); 803 } 804 if (strcmp(ffc_names[picnum-num_gpc], event) != 0) { 805 return (CPC_INVALID_EVENT); 806 } 807 808 if (nattrs != 0) { 809 return (CPC_INVALID_ATTRIBUTE); 810 } 811 812 conf = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP); 813 814 conf->core_picno = picnum; 815 conf->core_pictype = CORE_FFC; 816 conf->core_rawpic = preset & mask_ffc; 817 conf->core_pmc = FFC_BASE_PMC + (picnum - num_gpc); 818 819 /* All fixed-function counters have the same control register */ 820 conf->core_pes = PERF_FIXED_CTR_CTRL; 821 822 conf->core_ctl = 0; 823 if (flags & CPC_COUNT_USER) 824 conf->core_ctl |= CORE_FFC_USR_EN; 825 if (flags & CPC_COUNT_SYSTEM) 826 conf->core_ctl |= CORE_FFC_OS_EN; 827 if (flags & CPC_OVF_NOTIFY_EMT) 828 conf->core_ctl |= CORE_FFC_PMI; 829 830 *data = conf; 831 return (0); 832 } 833 834 /*ARGSUSED*/ 835 static int 836 core_pcbe_configure(uint_t picnum, char *event, uint64_t preset, 837 uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data, 838 void *token) 839 { 840 int ret; 841 core_pcbe_config_t *conf; 842 843 /* 844 * If we've been handed an existing configuration, we need only preset 845 * the counter value. 846 */ 847 if (*data != NULL) { 848 conf = *data; 849 ASSERT(conf->core_pictype == CORE_GPC || 850 conf->core_pictype == CORE_FFC); 851 if (conf->core_pictype == CORE_GPC) 852 conf->core_rawpic = preset & mask_gpc; 853 else /* CORE_FFC */ 854 conf->core_rawpic = preset & mask_ffc; 855 return (0); 856 } 857 858 if (picnum >= total_pmc) { 859 return (CPC_INVALID_PICNUM); 860 } 861 862 if (picnum < num_gpc) { 863 ret = configure_gpc(picnum, event, preset, flags, 864 nattrs, attrs, data); 865 } else { 866 ret = configure_ffc(picnum, event, preset, flags, 867 nattrs, data); 868 } 869 return (ret); 870 } 871 872 static void 873 core_pcbe_program(void *token) 874 { 875 core_pcbe_config_t *cfg; 876 uint64_t perf_global_ctrl; 877 uint64_t perf_fixed_ctr_ctrl; 878 uint64_t curcr4; 879 880 core_pcbe_allstop(); 881 882 curcr4 = getcr4(); 883 if (kcpc_allow_nonpriv(token)) 884 /* Allow RDPMC at any ring level */ 885 setcr4(curcr4 | CR4_PCE); 886 else 887 /* Allow RDPMC only at ring 0 */ 888 setcr4(curcr4 & ~CR4_PCE); 889 890 /* Clear any overflow indicators before programming the counters */ 891 WRMSR(PERF_GLOBAL_OVF_CTRL, MASK_CONDCHGD_OVFBUFFER | control_mask); 892 893 cfg = NULL; 894 perf_global_ctrl = 0; 895 perf_fixed_ctr_ctrl = 0; 896 cfg = (core_pcbe_config_t *)kcpc_next_config(token, cfg, NULL); 897 while (cfg != NULL) { 898 ASSERT(cfg->core_pictype == CORE_GPC || 899 cfg->core_pictype == CORE_FFC); 900 901 if (cfg->core_pictype == CORE_GPC) { 902 /* 903 * General-purpose counter registers have write 904 * restrictions where only the lower 32-bits can be 905 * written to. The rest of the relevant bits are 906 * written to by extension from bit 31 (all ZEROS if 907 * bit-31 is ZERO and all ONE if bit-31 is ONE). This 908 * makes it possible to write to the counter register 909 * only values that have all ONEs or all ZEROs in the 910 * higher bits. 911 */ 912 if (((cfg->core_rawpic & BITS_EXTENDED_FROM_31) == 0) || 913 ((cfg->core_rawpic & BITS_EXTENDED_FROM_31) == 914 BITS_EXTENDED_FROM_31)) { 915 /* 916 * Straighforward case where the higher bits 917 * are all ZEROs or all ONEs. 918 */ 919 WRMSR(cfg->core_pmc, 920 (cfg->core_rawpic & mask_gpc)); 921 } else { 922 /* 923 * The high order bits are not all the same. 924 * We save what is currently in the registers 925 * and do not write to it. When we want to do 926 * a read from this register later (in 927 * core_pcbe_sample()), we subtract the value 928 * we save here to get the actual event count. 929 * 930 * NOTE: As a result, we will not get overflow 931 * interrupts as expected. 932 */ 933 RDMSR(cfg->core_pmc, cfg->core_rawpic); 934 cfg->core_rawpic = cfg->core_rawpic & mask_gpc; 935 } 936 WRMSR(cfg->core_pes, cfg->core_ctl); 937 perf_global_ctrl |= 1ull << cfg->core_picno; 938 } else { 939 /* 940 * Unlike the general-purpose counters, all relevant 941 * bits of fixed-function counters can be written to. 942 */ 943 WRMSR(cfg->core_pmc, cfg->core_rawpic & mask_ffc); 944 945 /* 946 * Collect the control bits for all the 947 * fixed-function counters and write it at one shot 948 * later in this function 949 */ 950 perf_fixed_ctr_ctrl |= cfg->core_ctl << 951 ((cfg->core_picno - num_gpc) * CORE_FFC_ATTR_SIZE); 952 perf_global_ctrl |= 953 1ull << (cfg->core_picno - num_gpc + 32); 954 } 955 956 cfg = (core_pcbe_config_t *) 957 kcpc_next_config(token, cfg, NULL); 958 } 959 960 /* Enable all the counters */ 961 WRMSR(PERF_FIXED_CTR_CTRL, perf_fixed_ctr_ctrl); 962 WRMSR(PERF_GLOBAL_CTRL, perf_global_ctrl); 963 } 964 965 static void 966 core_pcbe_allstop(void) 967 { 968 /* Disable all the counters together */ 969 WRMSR(PERF_GLOBAL_CTRL, ALL_STOPPED); 970 971 setcr4(getcr4() & ~CR4_PCE); 972 } 973 974 static void 975 core_pcbe_sample(void *token) 976 { 977 uint64_t *daddr; 978 uint64_t curpic; 979 core_pcbe_config_t *cfg; 980 uint64_t counter_mask; 981 982 cfg = (core_pcbe_config_t *)kcpc_next_config(token, NULL, &daddr); 983 while (cfg != NULL) { 984 ASSERT(cfg->core_pictype == CORE_GPC || 985 cfg->core_pictype == CORE_FFC); 986 987 curpic = rdmsr(cfg->core_pmc); 988 989 DTRACE_PROBE4(core__pcbe__sample, 990 uint64_t, cfg->core_pmc, 991 uint64_t, curpic, 992 uint64_t, cfg->core_rawpic, 993 uint64_t, *daddr); 994 995 if (cfg->core_pictype == CORE_GPC) { 996 counter_mask = mask_gpc; 997 } else { 998 counter_mask = mask_ffc; 999 } 1000 curpic = curpic & counter_mask; 1001 if (curpic >= cfg->core_rawpic) { 1002 *daddr += curpic - cfg->core_rawpic; 1003 } else { 1004 /* Counter overflowed since our last sample */ 1005 *daddr += counter_mask - (cfg->core_rawpic - curpic) + 1006 1; 1007 } 1008 cfg->core_rawpic = *daddr & counter_mask; 1009 1010 cfg = 1011 (core_pcbe_config_t *)kcpc_next_config(token, cfg, &daddr); 1012 } 1013 } 1014 1015 static void 1016 core_pcbe_free(void *config) 1017 { 1018 kmem_free(config, sizeof (core_pcbe_config_t)); 1019 } 1020 1021 static struct modlpcbe core_modlpcbe = { 1022 &mod_pcbeops, 1023 "Core Performance Counters", 1024 &core_pcbe_ops 1025 }; 1026 1027 static struct modlinkage core_modl = { 1028 MODREV_1, 1029 &core_modlpcbe, 1030 }; 1031 1032 int 1033 _init(void) 1034 { 1035 if (core_pcbe_init() != 0) { 1036 return (ENOTSUP); 1037 } 1038 return (mod_install(&core_modl)); 1039 } 1040 1041 int 1042 _fini(void) 1043 { 1044 return (mod_remove(&core_modl)); 1045 } 1046 1047 int 1048 _info(struct modinfo *mi) 1049 { 1050 return (mod_info(&core_modl, mi)); 1051 } 1052