1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <string.h> 29 #include <alloca.h> 30 #include <stdlib.h> 31 #include <stdio.h> 32 #include <libintl.h> 33 34 #include "libcpc.h" 35 #include "libcpc_impl.h" 36 37 /* 38 * Configuration data for Pentium Pro performance counters. 39 * 40 * Definitions taken from [3]. See the reference to 41 * understand what any of these settings actually means. 42 * 43 * [3] "Pentium Pro Family Developer's Manual, Volume 3: 44 * Operating Systems Writer's Manual," January 1996 45 */ 46 47 #define V_P5 (1u << 0) /* specific to Pentium cpus */ 48 #define V_P5mmx (1u << 1) /* " MMX instructions */ 49 #define V_P6 (1u << 2) /* specific to Pentium II cpus */ 50 #define V_P6mmx (1u << 3) /* " MMX instructions */ 51 #define V_END 0 52 53 /* 54 * map from "cpu version" to flag bits 55 */ 56 static const uint_t cpuvermap[] = { 57 V_P5, /* CPC_PENTIUM */ 58 V_P5 | V_P5mmx, /* CPC_PENTIUM_MMX */ 59 V_P6, /* CPC_PENTIUM_PRO */ 60 V_P6 | V_P6mmx, /* CPC_PENTIUM_PRO_MMX */ 61 }; 62 63 struct nametable { 64 const uint_t ver; 65 const uint8_t bits; 66 const char *name; 67 }; 68 69 /* 70 * Basic Pentium events 71 */ 72 #define P5_EVENTS(v) \ 73 {v, 0x0, "data_read"}, \ 74 {v, 0x1, "data_write"}, \ 75 {v, 0x2, "data_tlb_miss"}, \ 76 {v, 0x3, "data_read_miss"}, \ 77 {v, 0x4, "data_write_miss"}, \ 78 {v, 0x5, "write_hit_to_M_or_E"}, \ 79 {v, 0x6, "dcache_lines_wrback"}, \ 80 {v, 0x7, "external_snoops"}, \ 81 {v, 0x8, "external_dcache_snoop_hits"}, \ 82 {v, 0x9, "memory_access_in_both_pipes"}, \ 83 {v, 0xa, "bank_conflicts"}, \ 84 {v, 0xb, "misaligned_ref"}, \ 85 {v, 0xc, "code_read"}, \ 86 {v, 0xd, "code_tlb_miss"}, \ 87 {v, 0xe, "code_cache_miss"}, \ 88 {v, 0xf, "any_segreg_loaded"}, \ 89 {v, 0x12, "branches"}, \ 90 {v, 0x13, "btb_hits"}, \ 91 {v, 0x14, "taken_or_btb_hit"}, \ 92 {v, 0x15, "pipeline_flushes"}, \ 93 {v, 0x16, "instr_exec"}, \ 94 {v, 0x17, "instr_exec_V_pipe"}, \ 95 {v, 0x18, "clks_bus_cycle"}, \ 96 {v, 0x19, "clks_full_wbufs"}, \ 97 {v, 0x1a, "pipe_stall_read"}, \ 98 {v, 0x1b, "stall_on_write_ME"}, \ 99 {v, 0x1c, "locked_bus_cycle"}, \ 100 {v, 0x1d, "io_rw_cycles"}, \ 101 {v, 0x1e, "reads_noncache_mem"}, \ 102 {v, 0x1f, "pipeline_agi_stalls"}, \ 103 {v, 0x22, "flops"}, \ 104 {v, 0x23, "bp_match_dr0"}, \ 105 {v, 0x24, "bp_match_dr1"}, \ 106 {v, 0x25, "bp_match_dr2"}, \ 107 {v, 0x26, "bp_match_dr3"}, \ 108 {v, 0x27, "hw_intrs"}, \ 109 {v, 0x28, "data_rw"}, \ 110 {v, 0x29, "data_rw_miss"} 111 112 static const struct nametable P5mmx_names0[] = { 113 P5_EVENTS(V_P5), 114 {V_P5mmx, 0x2a, "bus_ownership_latency"}, 115 {V_P5mmx, 0x2b, "mmx_instr_upipe"}, 116 {V_P5mmx, 0x2c, "cache_M_line_sharing"}, 117 {V_P5mmx, 0x2d, "emms_instr"}, 118 {V_P5mmx, 0x2e, "bus_util_processor"}, 119 {V_P5mmx, 0x2f, "sat_mmx_instr"}, 120 {V_P5mmx, 0x30, "clks_not_HLT"}, 121 {V_P5mmx, 0x31, "mmx_data_read"}, 122 {V_P5mmx, 0x32, "clks_fp_stall"}, 123 {V_P5mmx, 0x33, "d1_starv_fifo_0"}, 124 {V_P5mmx, 0x34, "mmx_data_write"}, 125 {V_P5mmx, 0x35, "pipe_flush_wbp"}, 126 {V_P5mmx, 0x36, "mmx_misalign_data_refs"}, 127 {V_P5mmx, 0x37, "rets_pred_incorrect"}, 128 {V_P5mmx, 0x38, "mmx_multiply_unit_interlock"}, 129 {V_P5mmx, 0x39, "rets"}, 130 {V_P5mmx, 0x3a, "btb_false_entries"}, 131 {V_P5mmx, 0x3b, "clocks_stall_full_wb"}, 132 {V_END} 133 }; 134 135 static const struct nametable P5mmx_names1[] = { 136 P5_EVENTS(V_P5), 137 {V_P5mmx, 0x2a, "bus_ownership_transfers"}, 138 {V_P5mmx, 0x2b, "mmx_instr_vpipe"}, 139 {V_P5mmx, 0x2c, "cache_lint_sharing"}, 140 {V_P5mmx, 0x2d, "mmx_fp_transitions"}, 141 {V_P5mmx, 0x2e, "writes_noncache_mem"}, 142 {V_P5mmx, 0x2f, "sats_performed"}, 143 {V_P5mmx, 0x30, "clks_dcache_tlb_miss"}, 144 {V_P5mmx, 0x31, "mmx_data_read_miss"}, 145 {V_P5mmx, 0x32, "taken_br"}, 146 {V_P5mmx, 0x33, "d1_starv_fifo_1"}, 147 {V_P5mmx, 0x34, "mmx_data_write_miss"}, 148 {V_P5mmx, 0x35, "pipe_flush_wbp_wb"}, 149 {V_P5mmx, 0x36, "mmx_pipe_stall_data_read"}, 150 {V_P5mmx, 0x37, "rets_pred"}, 151 {V_P5mmx, 0x38, "movd_movq_stall"}, 152 {V_P5mmx, 0x39, "rsb_overflow"}, 153 {V_P5mmx, 0x3a, "btb_mispred_nt"}, 154 {V_P5mmx, 0x3b, "mmx_stall_write_ME"}, 155 {V_END} 156 }; 157 158 static const struct nametable *P5mmx_names[2] = { 159 P5mmx_names0, 160 P5mmx_names1 161 }; 162 163 /* 164 * Pentium Pro and Pentium II events 165 */ 166 static const struct nametable P6_names[] = { 167 /* 168 * Data cache unit 169 */ 170 {V_P6, 0x43, "data_mem_refs"}, 171 {V_P6, 0x45, "dcu_lines_in"}, 172 {V_P6, 0x46, "dcu_m_lines_in"}, 173 {V_P6, 0x47, "dcu_m_lines_out"}, 174 {V_P6, 0x48, "dcu_miss_outstanding"}, 175 176 /* 177 * Instruction fetch unit 178 */ 179 {V_P6, 0x80, "ifu_ifetch"}, 180 {V_P6, 0x81, "ifu_ifetch_miss"}, 181 {V_P6, 0x85, "itlb_miss"}, 182 {V_P6, 0x86, "ifu_mem_stall"}, 183 {V_P6, 0x87, "ild_stall"}, 184 185 /* 186 * L2 cache 187 */ 188 {V_P6, 0x28, "l2_ifetch"}, 189 {V_P6, 0x29, "l2_ld"}, 190 {V_P6, 0x2a, "l2_st"}, 191 {V_P6, 0x24, "l2_lines_in"}, 192 {V_P6, 0x26, "l2_lines_out"}, 193 {V_P6, 0x25, "l2_m_lines_inm"}, 194 {V_P6, 0x27, "l2_m_lines_outm"}, 195 {V_P6, 0x2e, "l2_rqsts"}, 196 {V_P6, 0x21, "l2_ads"}, 197 {V_P6, 0x22, "l2_dbus_busy"}, 198 {V_P6, 0x23, "l2_dbus_busy_rd"}, 199 200 /* 201 * External bus logic 202 */ 203 {V_P6, 0x62, "bus_drdy_clocks"}, 204 {V_P6, 0x63, "bus_lock_clocks"}, 205 {V_P6, 0x60, "bus_req_outstanding"}, 206 {V_P6, 0x65, "bus_tran_brd"}, 207 {V_P6, 0x66, "bus_tran_rfo"}, 208 {V_P6, 0x67, "bus_trans_wb"}, 209 {V_P6, 0x68, "bus_tran_ifetch"}, 210 {V_P6, 0x69, "bus_tran_inval"}, 211 {V_P6, 0x6a, "bus_tran_pwr"}, 212 {V_P6, 0x6b, "bus_trans_p"}, 213 {V_P6, 0x6c, "bus_trans_io"}, 214 {V_P6, 0x6d, "bus_tran_def"}, 215 {V_P6, 0x6e, "bus_tran_burst"}, 216 {V_P6, 0x70, "bus_tran_any"}, 217 {V_P6, 0x6f, "bus_tran_mem"}, 218 {V_P6, 0x64, "bus_data_rcv"}, 219 {V_P6, 0x61, "bus_bnr_drv"}, 220 {V_P6, 0x7a, "bus_hit_drv"}, 221 {V_P6, 0x7b, "bus_hitm_drv"}, 222 {V_P6, 0x7e, "bus_snoop_stall"}, 223 224 /* 225 * Floating point unit 226 */ 227 {V_P6, 0xc1, "flops"}, /* 0 only */ 228 {V_P6, 0x10, "fp_comp_ops_exe"}, /* 0 only */ 229 {V_P6, 0x11, "fp_assist"}, /* 1 only */ 230 {V_P6, 0x12, "mul"}, /* 1 only */ 231 {V_P6, 0x13, "div"}, /* 1 only */ 232 {V_P6, 0x14, "cycles_div_busy"}, /* 0 only */ 233 234 /* 235 * Memory ordering 236 */ 237 {V_P6, 0x3, "ld_blocks"}, 238 {V_P6, 0x4, "sb_drains"}, 239 {V_P6, 0x5, "misalign_mem_ref"}, 240 241 /* 242 * Instruction decoding and retirement 243 */ 244 {V_P6, 0xc0, "inst_retired"}, 245 {V_P6, 0xc2, "uops_retired"}, 246 {V_P6, 0xd0, "inst_decoder"}, 247 248 /* 249 * Interrupts 250 */ 251 {V_P6, 0xc8, "hw_int_rx"}, 252 {V_P6, 0xc6, "cycles_int_masked"}, 253 {V_P6, 0xc7, "cycles_int_pending_and_masked"}, 254 255 /* 256 * Branches 257 */ 258 {V_P6, 0xc4, "br_inst_retired"}, 259 {V_P6, 0xc5, "br_miss_pred_retired"}, 260 {V_P6, 0xc9, "br_taken_retired"}, 261 {V_P6, 0xca, "br_miss_pred_taken_ret"}, 262 {V_P6, 0xe0, "br_inst_decoded"}, 263 {V_P6, 0xe2, "btb_misses"}, 264 {V_P6, 0xe4, "br_bogus"}, 265 {V_P6, 0xe6, "baclears"}, 266 267 /* 268 * Stalls 269 */ 270 {V_P6, 0xa2, "resource_stalls"}, 271 {V_P6, 0xd2, "partial_rat_stalls"}, 272 273 /* 274 * Segment register loads 275 */ 276 {V_P6, 0x6, "segment_reg_loads"}, 277 278 /* 279 * Clocks 280 */ 281 {V_P6, 0x79, "cpu_clk_unhalted"}, 282 283 /* 284 * MMX 285 */ 286 {V_P6mmx, 0xb0, "mmx_instr_exec"}, 287 {V_P6mmx, 0xb1, "mmx_sat_instr_exec"}, 288 {V_P6mmx, 0xb2, "mmx_uops_exec"}, 289 {V_P6mmx, 0xb3, "mmx_instr_type_exec"}, 290 {V_P6mmx, 0xcc, "fp_mmx_trans"}, 291 {V_P6mmx, 0xcd, "mmx_assists"}, 292 {V_P6mmx, 0xce, "mmx_instr_ret"}, 293 {V_P6mmx, 0xd4, "seg_rename_stalls"}, 294 {V_P6mmx, 0xd5, "seg_reg_renames"}, 295 {V_P6mmx, 0xd6, "ret_seg_renames"}, 296 297 {V_END} 298 }; 299 300 #define MAPCPUVER(cpuver) (cpuvermap[(cpuver) - CPC_PENTIUM]) 301 302 static int 303 validargs(int cpuver, int regno) 304 { 305 if (regno < 0 || regno > 1) 306 return (0); 307 cpuver -= CPC_PENTIUM; 308 if (cpuver < 0 || 309 cpuver >= sizeof (cpuvermap) / sizeof (cpuvermap[0])) 310 return (0); 311 return (1); 312 } 313 314 /*ARGSUSED*/ 315 static int 316 versionmatch(int cpuver, int regno, const struct nametable *n) 317 { 318 if (!validargs(cpuver, regno) || (n->ver & MAPCPUVER(cpuver)) == 0) 319 return (0); 320 321 switch (MAPCPUVER(cpuver)) { 322 case V_P5: 323 case V_P5 | V_P5mmx: 324 break; 325 case V_P6: 326 case V_P6 | V_P6mmx: 327 switch (n->bits) { 328 case 0xc1: /* flops */ 329 case 0x10: /* fp_comp_ops_exe */ 330 case 0x14: /* cycles_div_busy */ 331 /* only reg0 counts these */ 332 if (regno == 1) 333 return (0); 334 break; 335 case 0x11: /* fp_assist */ 336 case 0x12: /* mul */ 337 case 0x13: /* div */ 338 /* only 1 can count these */ 339 if (regno == 0) 340 return (0); 341 break; 342 default: 343 break; 344 } 345 break; 346 default: 347 return (0); 348 } 349 350 return (1); 351 } 352 353 static const struct nametable * 354 getnametable(int cpuver, int regno) 355 { 356 const struct nametable *n; 357 358 if (!validargs(cpuver, regno)) 359 return (NULL); 360 361 switch (MAPCPUVER(cpuver)) { 362 case V_P5: 363 case V_P5 | V_P5mmx: 364 n = P5mmx_names[regno]; 365 break; 366 case V_P6: 367 case V_P6 | V_P6mmx: 368 n = P6_names; 369 break; 370 default: 371 n = NULL; 372 break; 373 } 374 375 return (n); 376 } 377 378 void 379 cpc_walk_names(int cpuver, int regno, void *arg, 380 void (*action)(void *, int, const char *, uint8_t)) 381 { 382 const struct nametable *n; 383 384 if ((n = getnametable(cpuver, regno)) == NULL) 385 return; 386 for (; n->ver != V_END; n++) 387 if (versionmatch(cpuver, regno, n)) 388 action(arg, regno, n->name, n->bits); 389 } 390 391 const char * 392 __cpc_reg_to_name(int cpuver, int regno, uint8_t bits) 393 { 394 const struct nametable *n; 395 396 if ((n = getnametable(cpuver, regno)) == NULL) 397 return (NULL); 398 for (; n->ver != V_END; n++) 399 if (bits == n->bits && versionmatch(cpuver, regno, n)) 400 return (n->name); 401 return (NULL); 402 } 403 404 /* 405 * Register names can be specified as strings or even as numbers 406 */ 407 int 408 __cpc_name_to_reg(int cpuver, int regno, const char *name, uint8_t *bits) 409 { 410 const struct nametable *n; 411 char *eptr = NULL; 412 long value; 413 414 if ((n = getnametable(cpuver, regno)) == NULL || name == NULL) 415 return (-1); 416 for (; n->ver != V_END; n++) 417 if (strcmp(name, n->name) == 0 && 418 versionmatch(cpuver, regno, n)) { 419 *bits = n->bits; 420 return (0); 421 } 422 423 value = strtol(name, &eptr, 0); 424 if (name != eptr && value >= 0 && value <= UINT8_MAX) { 425 *bits = (uint8_t)value; 426 return (0); 427 } 428 429 return (-1); 430 } 431 432 const char * 433 cpc_getcciname(int cpuver) 434 { 435 if (validargs(cpuver, 0)) 436 switch (MAPCPUVER(cpuver)) { 437 case V_P5: 438 return ("Pentium"); 439 case V_P5 | V_P5mmx: 440 return ("Pentium with MMX"); 441 case V_P6: 442 return ("Pentium Pro, Pentium II"); 443 case V_P6 | V_P6mmx: 444 return ("Pentium Pro with MMX, Pentium II"); 445 default: 446 break; 447 } 448 return (NULL); 449 } 450 451 const char * 452 cpc_getcpuref(int cpuver) 453 { 454 if (validargs(cpuver, 0)) 455 switch (MAPCPUVER(cpuver)) { 456 case V_P5: 457 case V_P5 | V_P5mmx: 458 return (gettext( 459 "See Appendix A.2 of the \"Intel Architecture " 460 "Software Developer's Manual,\" 243192, 1997")); 461 case V_P6: 462 case V_P6 | V_P6mmx: 463 return (gettext( 464 "See Appendix A.1 of the \"Intel Architecture " 465 "Software Developer's Manual,\" 243192, 1997")); 466 default: 467 break; 468 } 469 return (NULL); 470 } 471 472 /* 473 * This is a functional interface to allow CPUs with fewer %pic registers 474 * to share the same data structure as those with more %pic registers 475 * within the same instruction set family. 476 */ 477 uint_t 478 cpc_getnpic(int cpuver) 479 { 480 switch (cpuver) { 481 case CPC_PENTIUM: 482 case CPC_PENTIUM_MMX: 483 case CPC_PENTIUM_PRO: 484 case CPC_PENTIUM_PRO_MMX: 485 #define EVENT ((cpc_event_t *)0) 486 return (sizeof (EVENT->ce_pic) / sizeof (EVENT->ce_pic[0])); 487 #undef EVENT 488 default: 489 return (0); 490 } 491 } 492 493 #define BITS(v, u, l) \ 494 (((v) >> (l)) & ((1 << (1 + (u) - (l))) - 1)) 495 496 #include "getcpuid.h" 497 498 /* 499 * Return the version of the current processor. 500 * 501 * Version -1 is defined as 'not performance counter capable' 502 */ 503 int 504 cpc_getcpuver(void) 505 { 506 static int ver = -1; 507 uint32_t maxeax; 508 uint32_t vbuf[4]; 509 510 if (ver != -1) 511 return (ver); 512 513 maxeax = cpc_getcpuid(0, &vbuf[0], &vbuf[2], &vbuf[1]); 514 { 515 char *vendor = (char *)vbuf; 516 vendor[12] = '\0'; 517 518 if (strcmp(vendor, "GenuineIntel") != 0) 519 return (ver); 520 } 521 522 if (maxeax >= 1) { 523 int family, model; 524 uint32_t eax, ebx, ecx, edx; 525 526 eax = cpc_getcpuid(1, &ebx, &ecx, &edx); 527 528 if ((family = BITS(eax, 11, 8)) == 0xf) 529 family = BITS(eax, 27, 20); 530 if ((model = BITS(eax, 7, 4)) == 0xf) 531 model = BITS(eax, 19, 16); 532 533 /* 534 * map family and model into the performance 535 * counter architectures we currently understand. 536 * 537 * See application note AP485 (from developer.intel.com) 538 * for further explanation. 539 */ 540 switch (family) { 541 case 5: /* Pentium and Pentium with MMX */ 542 ver = model < 4 ? 543 CPC_PENTIUM : CPC_PENTIUM_MMX; 544 break; 545 case 6: /* Pentium Pro and Pentium II and III */ 546 ver = BITS(edx, 23, 23) ? /* mmx check */ 547 CPC_PENTIUM_PRO_MMX : CPC_PENTIUM_PRO; 548 break; 549 default: 550 case 0xf: /* Pentium IV */ 551 break; 552 } 553 } 554 555 return (ver); 556 } 557