1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <string.h> 31 #include <alloca.h> 32 #include <stdlib.h> 33 #include <stdio.h> 34 #include <libintl.h> 35 36 #include "libcpc.h" 37 #include "libcpc_impl.h" 38 39 /* 40 * Configuration data for Pentium Pro performance counters. 41 * 42 * Definitions taken from [3]. See the reference to 43 * understand what any of these settings actually means. 44 * 45 * [3] "Pentium Pro Family Developer's Manual, Volume 3: 46 * Operating Systems Writer's Manual," January 1996 47 */ 48 49 #define V_P5 (1u << 0) /* specific to Pentium cpus */ 50 #define V_P5mmx (1u << 1) /* " MMX instructions */ 51 #define V_P6 (1u << 2) /* specific to Pentium II cpus */ 52 #define V_P6mmx (1u << 3) /* " MMX instructions */ 53 #define V_END 0 54 55 /* 56 * map from "cpu version" to flag bits 57 */ 58 static const uint_t cpuvermap[] = { 59 V_P5, /* CPC_PENTIUM */ 60 V_P5 | V_P5mmx, /* CPC_PENTIUM_MMX */ 61 V_P6, /* CPC_PENTIUM_PRO */ 62 V_P6 | V_P6mmx, /* CPC_PENTIUM_PRO_MMX */ 63 }; 64 65 struct nametable { 66 const uint_t ver; 67 const uint8_t bits; 68 const char *name; 69 }; 70 71 /* 72 * Basic Pentium events 73 */ 74 #define P5_EVENTS(v) \ 75 {v, 0x0, "data_read"}, \ 76 {v, 0x1, "data_write"}, \ 77 {v, 0x2, "data_tlb_miss"}, \ 78 {v, 0x3, "data_read_miss"}, \ 79 {v, 0x4, "data_write_miss"}, \ 80 {v, 0x5, "write_hit_to_M_or_E"}, \ 81 {v, 0x6, "dcache_lines_wrback"}, \ 82 {v, 0x7, "external_snoops"}, \ 83 {v, 0x8, "external_dcache_snoop_hits"}, \ 84 {v, 0x9, "memory_access_in_both_pipes"}, \ 85 {v, 0xa, "bank_conflicts"}, \ 86 {v, 0xb, "misaligned_ref"}, \ 87 {v, 0xc, "code_read"}, \ 88 {v, 0xd, "code_tlb_miss"}, \ 89 {v, 0xe, "code_cache_miss"}, \ 90 {v, 0xf, "any_segreg_loaded"}, \ 91 {v, 0x12, "branches"}, \ 92 {v, 0x13, "btb_hits"}, \ 93 {v, 0x14, "taken_or_btb_hit"}, \ 94 {v, 0x15, "pipeline_flushes"}, \ 95 {v, 0x16, "instr_exec"}, \ 96 {v, 0x17, "instr_exec_V_pipe"}, \ 97 {v, 0x18, "clks_bus_cycle"}, \ 98 {v, 0x19, "clks_full_wbufs"}, \ 99 {v, 0x1a, "pipe_stall_read"}, \ 100 {v, 0x1b, "stall_on_write_ME"}, \ 101 {v, 0x1c, "locked_bus_cycle"}, \ 102 {v, 0x1d, "io_rw_cycles"}, \ 103 {v, 0x1e, "reads_noncache_mem"}, \ 104 {v, 0x1f, "pipeline_agi_stalls"}, \ 105 {v, 0x22, "flops"}, \ 106 {v, 0x23, "bp_match_dr0"}, \ 107 {v, 0x24, "bp_match_dr1"}, \ 108 {v, 0x25, "bp_match_dr2"}, \ 109 {v, 0x26, "bp_match_dr3"}, \ 110 {v, 0x27, "hw_intrs"}, \ 111 {v, 0x28, "data_rw"}, \ 112 {v, 0x29, "data_rw_miss"} 113 114 static const struct nametable P5mmx_names0[] = { 115 P5_EVENTS(V_P5), 116 {V_P5mmx, 0x2a, "bus_ownership_latency"}, 117 {V_P5mmx, 0x2b, "mmx_instr_upipe"}, 118 {V_P5mmx, 0x2c, "cache_M_line_sharing"}, 119 {V_P5mmx, 0x2d, "emms_instr"}, 120 {V_P5mmx, 0x2e, "bus_util_processor"}, 121 {V_P5mmx, 0x2f, "sat_mmx_instr"}, 122 {V_P5mmx, 0x30, "clks_not_HLT"}, 123 {V_P5mmx, 0x31, "mmx_data_read"}, 124 {V_P5mmx, 0x32, "clks_fp_stall"}, 125 {V_P5mmx, 0x33, "d1_starv_fifo_0"}, 126 {V_P5mmx, 0x34, "mmx_data_write"}, 127 {V_P5mmx, 0x35, "pipe_flush_wbp"}, 128 {V_P5mmx, 0x36, "mmx_misalign_data_refs"}, 129 {V_P5mmx, 0x37, "rets_pred_incorrect"}, 130 {V_P5mmx, 0x38, "mmx_multiply_unit_interlock"}, 131 {V_P5mmx, 0x39, "rets"}, 132 {V_P5mmx, 0x3a, "btb_false_entries"}, 133 {V_P5mmx, 0x3b, "clocks_stall_full_wb"}, 134 {V_END} 135 }; 136 137 static const struct nametable P5mmx_names1[] = { 138 P5_EVENTS(V_P5), 139 {V_P5mmx, 0x2a, "bus_ownership_transfers"}, 140 {V_P5mmx, 0x2b, "mmx_instr_vpipe"}, 141 {V_P5mmx, 0x2c, "cache_lint_sharing"}, 142 {V_P5mmx, 0x2d, "mmx_fp_transitions"}, 143 {V_P5mmx, 0x2e, "writes_noncache_mem"}, 144 {V_P5mmx, 0x2f, "sats_performed"}, 145 {V_P5mmx, 0x30, "clks_dcache_tlb_miss"}, 146 {V_P5mmx, 0x31, "mmx_data_read_miss"}, 147 {V_P5mmx, 0x32, "taken_br"}, 148 {V_P5mmx, 0x33, "d1_starv_fifo_1"}, 149 {V_P5mmx, 0x34, "mmx_data_write_miss"}, 150 {V_P5mmx, 0x35, "pipe_flush_wbp_wb"}, 151 {V_P5mmx, 0x36, "mmx_pipe_stall_data_read"}, 152 {V_P5mmx, 0x37, "rets_pred"}, 153 {V_P5mmx, 0x38, "movd_movq_stall"}, 154 {V_P5mmx, 0x39, "rsb_overflow"}, 155 {V_P5mmx, 0x3a, "btb_mispred_nt"}, 156 {V_P5mmx, 0x3b, "mmx_stall_write_ME"}, 157 {V_END} 158 }; 159 160 static const struct nametable *P5mmx_names[2] = { 161 P5mmx_names0, 162 P5mmx_names1 163 }; 164 165 /* 166 * Pentium Pro and Pentium II events 167 */ 168 static const struct nametable P6_names[] = { 169 /* 170 * Data cache unit 171 */ 172 {V_P6, 0x43, "data_mem_refs"}, 173 {V_P6, 0x45, "dcu_lines_in"}, 174 {V_P6, 0x46, "dcu_m_lines_in"}, 175 {V_P6, 0x47, "dcu_m_lines_out"}, 176 {V_P6, 0x48, "dcu_miss_outstanding"}, 177 178 /* 179 * Instruction fetch unit 180 */ 181 {V_P6, 0x80, "ifu_ifetch"}, 182 {V_P6, 0x81, "ifu_ifetch_miss"}, 183 {V_P6, 0x85, "itlb_miss"}, 184 {V_P6, 0x86, "ifu_mem_stall"}, 185 {V_P6, 0x87, "ild_stall"}, 186 187 /* 188 * L2 cache 189 */ 190 {V_P6, 0x28, "l2_ifetch"}, 191 {V_P6, 0x29, "l2_ld"}, 192 {V_P6, 0x2a, "l2_st"}, 193 {V_P6, 0x24, "l2_lines_in"}, 194 {V_P6, 0x26, "l2_lines_out"}, 195 {V_P6, 0x25, "l2_m_lines_inm"}, 196 {V_P6, 0x27, "l2_m_lines_outm"}, 197 {V_P6, 0x2e, "l2_rqsts"}, 198 {V_P6, 0x21, "l2_ads"}, 199 {V_P6, 0x22, "l2_dbus_busy"}, 200 {V_P6, 0x23, "l2_dbus_busy_rd"}, 201 202 /* 203 * External bus logic 204 */ 205 {V_P6, 0x62, "bus_drdy_clocks"}, 206 {V_P6, 0x63, "bus_lock_clocks"}, 207 {V_P6, 0x60, "bus_req_outstanding"}, 208 {V_P6, 0x65, "bus_tran_brd"}, 209 {V_P6, 0x66, "bus_tran_rfo"}, 210 {V_P6, 0x67, "bus_trans_wb"}, 211 {V_P6, 0x68, "bus_tran_ifetch"}, 212 {V_P6, 0x69, "bus_tran_inval"}, 213 {V_P6, 0x6a, "bus_tran_pwr"}, 214 {V_P6, 0x6b, "bus_trans_p"}, 215 {V_P6, 0x6c, "bus_trans_io"}, 216 {V_P6, 0x6d, "bus_tran_def"}, 217 {V_P6, 0x6e, "bus_tran_burst"}, 218 {V_P6, 0x70, "bus_tran_any"}, 219 {V_P6, 0x6f, "bus_tran_mem"}, 220 {V_P6, 0x64, "bus_data_rcv"}, 221 {V_P6, 0x61, "bus_bnr_drv"}, 222 {V_P6, 0x7a, "bus_hit_drv"}, 223 {V_P6, 0x7b, "bus_hitm_drv"}, 224 {V_P6, 0x7e, "bus_snoop_stall"}, 225 226 /* 227 * Floating point unit 228 */ 229 {V_P6, 0xc1, "flops"}, /* 0 only */ 230 {V_P6, 0x10, "fp_comp_ops_exe"}, /* 0 only */ 231 {V_P6, 0x11, "fp_assist"}, /* 1 only */ 232 {V_P6, 0x12, "mul"}, /* 1 only */ 233 {V_P6, 0x13, "div"}, /* 1 only */ 234 {V_P6, 0x14, "cycles_div_busy"}, /* 0 only */ 235 236 /* 237 * Memory ordering 238 */ 239 {V_P6, 0x3, "ld_blocks"}, 240 {V_P6, 0x4, "sb_drains"}, 241 {V_P6, 0x5, "misalign_mem_ref"}, 242 243 /* 244 * Instruction decoding and retirement 245 */ 246 {V_P6, 0xc0, "inst_retired"}, 247 {V_P6, 0xc2, "uops_retired"}, 248 {V_P6, 0xd0, "inst_decoder"}, 249 250 /* 251 * Interrupts 252 */ 253 {V_P6, 0xc8, "hw_int_rx"}, 254 {V_P6, 0xc6, "cycles_int_masked"}, 255 {V_P6, 0xc7, "cycles_int_pending_and_masked"}, 256 257 /* 258 * Branches 259 */ 260 {V_P6, 0xc4, "br_inst_retired"}, 261 {V_P6, 0xc5, "br_miss_pred_retired"}, 262 {V_P6, 0xc9, "br_taken_retired"}, 263 {V_P6, 0xca, "br_miss_pred_taken_ret"}, 264 {V_P6, 0xe0, "br_inst_decoded"}, 265 {V_P6, 0xe2, "btb_misses"}, 266 {V_P6, 0xe4, "br_bogus"}, 267 {V_P6, 0xe6, "baclears"}, 268 269 /* 270 * Stalls 271 */ 272 {V_P6, 0xa2, "resource_stalls"}, 273 {V_P6, 0xd2, "partial_rat_stalls"}, 274 275 /* 276 * Segment register loads 277 */ 278 {V_P6, 0x6, "segment_reg_loads"}, 279 280 /* 281 * Clocks 282 */ 283 {V_P6, 0x79, "cpu_clk_unhalted"}, 284 285 /* 286 * MMX 287 */ 288 {V_P6mmx, 0xb0, "mmx_instr_exec"}, 289 {V_P6mmx, 0xb1, "mmx_sat_instr_exec"}, 290 {V_P6mmx, 0xb2, "mmx_uops_exec"}, 291 {V_P6mmx, 0xb3, "mmx_instr_type_exec"}, 292 {V_P6mmx, 0xcc, "fp_mmx_trans"}, 293 {V_P6mmx, 0xcd, "mmx_assists"}, 294 {V_P6mmx, 0xce, "mmx_instr_ret"}, 295 {V_P6mmx, 0xd4, "seg_rename_stalls"}, 296 {V_P6mmx, 0xd5, "seg_reg_renames"}, 297 {V_P6mmx, 0xd6, "ret_seg_renames"}, 298 299 {V_END} 300 }; 301 302 #define MAPCPUVER(cpuver) (cpuvermap[(cpuver) - CPC_PENTIUM]) 303 304 static int 305 validargs(int cpuver, int regno) 306 { 307 if (regno < 0 || regno > 1) 308 return (0); 309 cpuver -= CPC_PENTIUM; 310 if (cpuver < 0 || 311 cpuver >= sizeof (cpuvermap) / sizeof (cpuvermap[0])) 312 return (0); 313 return (1); 314 } 315 316 /*ARGSUSED*/ 317 static int 318 versionmatch(int cpuver, int regno, const struct nametable *n) 319 { 320 if (!validargs(cpuver, regno) || (n->ver & MAPCPUVER(cpuver)) == 0) 321 return (0); 322 323 switch (MAPCPUVER(cpuver)) { 324 case V_P5: 325 case V_P5 | V_P5mmx: 326 break; 327 case V_P6: 328 case V_P6 | V_P6mmx: 329 switch (n->bits) { 330 case 0xc1: /* flops */ 331 case 0x10: /* fp_comp_ops_exe */ 332 case 0x14: /* cycles_div_busy */ 333 /* only reg0 counts these */ 334 if (regno == 1) 335 return (0); 336 break; 337 case 0x11: /* fp_assist */ 338 case 0x12: /* mul */ 339 case 0x13: /* div */ 340 /* only 1 can count these */ 341 if (regno == 0) 342 return (0); 343 break; 344 default: 345 break; 346 } 347 break; 348 default: 349 return (0); 350 } 351 352 return (1); 353 } 354 355 static const struct nametable * 356 getnametable(int cpuver, int regno) 357 { 358 const struct nametable *n; 359 360 if (!validargs(cpuver, regno)) 361 return (NULL); 362 363 switch (MAPCPUVER(cpuver)) { 364 case V_P5: 365 case V_P5 | V_P5mmx: 366 n = P5mmx_names[regno]; 367 break; 368 case V_P6: 369 case V_P6 | V_P6mmx: 370 n = P6_names; 371 break; 372 default: 373 n = NULL; 374 break; 375 } 376 377 return (n); 378 } 379 380 void 381 cpc_walk_names(int cpuver, int regno, void *arg, 382 void (*action)(void *, int, const char *, uint8_t)) 383 { 384 const struct nametable *n; 385 386 if ((n = getnametable(cpuver, regno)) == NULL) 387 return; 388 for (; n->ver != V_END; n++) 389 if (versionmatch(cpuver, regno, n)) 390 action(arg, regno, n->name, n->bits); 391 } 392 393 const char * 394 __cpc_reg_to_name(int cpuver, int regno, uint8_t bits) 395 { 396 const struct nametable *n; 397 398 if ((n = getnametable(cpuver, regno)) == NULL) 399 return (NULL); 400 for (; n->ver != V_END; n++) 401 if (bits == n->bits && versionmatch(cpuver, regno, n)) 402 return (n->name); 403 return (NULL); 404 } 405 406 /* 407 * Register names can be specified as strings or even as numbers 408 */ 409 int 410 __cpc_name_to_reg(int cpuver, int regno, const char *name, uint8_t *bits) 411 { 412 const struct nametable *n; 413 char *eptr = NULL; 414 long value; 415 416 if ((n = getnametable(cpuver, regno)) == NULL || name == NULL) 417 return (-1); 418 for (; n->ver != V_END; n++) 419 if (strcmp(name, n->name) == 0 && 420 versionmatch(cpuver, regno, n)) { 421 *bits = n->bits; 422 return (0); 423 } 424 425 value = strtol(name, &eptr, 0); 426 if (name != eptr && value >= 0 && value <= UINT8_MAX) { 427 *bits = (uint8_t)value; 428 return (0); 429 } 430 431 return (-1); 432 } 433 434 const char * 435 cpc_getcciname(int cpuver) 436 { 437 if (validargs(cpuver, 0)) 438 switch (MAPCPUVER(cpuver)) { 439 case V_P5: 440 return ("Pentium"); 441 case V_P5 | V_P5mmx: 442 return ("Pentium with MMX"); 443 case V_P6: 444 return ("Pentium Pro, Pentium II"); 445 case V_P6 | V_P6mmx: 446 return ("Pentium Pro with MMX, Pentium II"); 447 default: 448 break; 449 } 450 return (NULL); 451 } 452 453 const char * 454 cpc_getcpuref(int cpuver) 455 { 456 if (validargs(cpuver, 0)) 457 switch (MAPCPUVER(cpuver)) { 458 case V_P5: 459 case V_P5 | V_P5mmx: 460 return (gettext( 461 "See Appendix A.2 of the \"Intel Architecture " 462 "Software Developer's Manual,\" 243192, 1997")); 463 case V_P6: 464 case V_P6 | V_P6mmx: 465 return (gettext( 466 "See Appendix A.1 of the \"Intel Architecture " 467 "Software Developer's Manual,\" 243192, 1997")); 468 default: 469 break; 470 } 471 return (NULL); 472 } 473 474 /* 475 * This is a functional interface to allow CPUs with fewer %pic registers 476 * to share the same data structure as those with more %pic registers 477 * within the same instruction set family. 478 */ 479 uint_t 480 cpc_getnpic(int cpuver) 481 { 482 switch (cpuver) { 483 case CPC_PENTIUM: 484 case CPC_PENTIUM_MMX: 485 case CPC_PENTIUM_PRO: 486 case CPC_PENTIUM_PRO_MMX: 487 #define EVENT ((cpc_event_t *)0) 488 return (sizeof (EVENT->ce_pic) / sizeof (EVENT->ce_pic[0])); 489 #undef EVENT 490 default: 491 return (0); 492 } 493 } 494 495 #define BITS(v, u, l) \ 496 (((v) >> (l)) & ((1 << (1 + (u) - (l))) - 1)) 497 498 #include "getcpuid.h" 499 500 /* 501 * Return the version of the current processor. 502 * 503 * Version -1 is defined as 'not performance counter capable' 504 */ 505 int 506 cpc_getcpuver(void) 507 { 508 static int ver = -1; 509 uint32_t maxeax; 510 uint32_t vbuf[4]; 511 512 if (ver != -1) 513 return (ver); 514 515 maxeax = cpc_getcpuid(0, &vbuf[0], &vbuf[2], &vbuf[1]); 516 { 517 char *vendor = (char *)vbuf; 518 vendor[12] = '\0'; 519 520 if (strcmp(vendor, "GenuineIntel") != 0) 521 return (ver); 522 } 523 524 if (maxeax >= 1) { 525 int family, model; 526 uint32_t eax, ebx, ecx, edx; 527 528 eax = cpc_getcpuid(1, &ebx, &ecx, &edx); 529 530 if ((family = BITS(eax, 11, 8)) == 0xf) 531 family = BITS(eax, 27, 20); 532 if ((model = BITS(eax, 7, 4)) == 0xf) 533 model = BITS(eax, 19, 16); 534 535 /* 536 * map family and model into the performance 537 * counter architectures we currently understand. 538 * 539 * See application note AP485 (from developer.intel.com) 540 * for further explanation. 541 */ 542 switch (family) { 543 case 5: /* Pentium and Pentium with MMX */ 544 ver = model < 4 ? 545 CPC_PENTIUM : CPC_PENTIUM_MMX; 546 break; 547 case 6: /* Pentium Pro and Pentium II and III */ 548 ver = BITS(edx, 23, 23) ? /* mmx check */ 549 CPC_PENTIUM_PRO_MMX : CPC_PENTIUM_PRO; 550 break; 551 default: 552 case 0xf: /* Pentium IV */ 553 break; 554 } 555 } 556 557 return (ver); 558 } 559