1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #pragma ident "%Z%%M% %I% %E% SMI"
28
29 #include <sys/types.h>
30 #include <string.h>
31 #include <alloca.h>
32 #include <stdlib.h>
33 #include <stdio.h>
34 #include <libintl.h>
35
36 #include "libcpc.h"
37 #include "libcpc_impl.h"
38
39 /*
40 * Configuration data for Pentium Pro performance counters.
41 *
42 * Definitions taken from [3]. See the reference to
43 * understand what any of these settings actually means.
44 *
45 * [3] "Pentium Pro Family Developer's Manual, Volume 3:
46 * Operating Systems Writer's Manual," January 1996
47 */
48
49 #define V_P5 (1u << 0) /* specific to Pentium cpus */
50 #define V_P5mmx (1u << 1) /* " MMX instructions */
51 #define V_P6 (1u << 2) /* specific to Pentium II cpus */
52 #define V_P6mmx (1u << 3) /* " MMX instructions */
53 #define V_END 0
54
55 /*
56 * map from "cpu version" to flag bits
57 */
58 static const uint_t cpuvermap[] = {
59 V_P5, /* CPC_PENTIUM */
60 V_P5 | V_P5mmx, /* CPC_PENTIUM_MMX */
61 V_P6, /* CPC_PENTIUM_PRO */
62 V_P6 | V_P6mmx, /* CPC_PENTIUM_PRO_MMX */
63 };
64
65 struct nametable {
66 const uint_t ver;
67 const uint8_t bits;
68 const char *name;
69 };
70
71 /*
72 * Basic Pentium events
73 */
74 #define P5_EVENTS(v) \
75 {v, 0x0, "data_read"}, \
76 {v, 0x1, "data_write"}, \
77 {v, 0x2, "data_tlb_miss"}, \
78 {v, 0x3, "data_read_miss"}, \
79 {v, 0x4, "data_write_miss"}, \
80 {v, 0x5, "write_hit_to_M_or_E"}, \
81 {v, 0x6, "dcache_lines_wrback"}, \
82 {v, 0x7, "external_snoops"}, \
83 {v, 0x8, "external_dcache_snoop_hits"}, \
84 {v, 0x9, "memory_access_in_both_pipes"}, \
85 {v, 0xa, "bank_conflicts"}, \
86 {v, 0xb, "misaligned_ref"}, \
87 {v, 0xc, "code_read"}, \
88 {v, 0xd, "code_tlb_miss"}, \
89 {v, 0xe, "code_cache_miss"}, \
90 {v, 0xf, "any_segreg_loaded"}, \
91 {v, 0x12, "branches"}, \
92 {v, 0x13, "btb_hits"}, \
93 {v, 0x14, "taken_or_btb_hit"}, \
94 {v, 0x15, "pipeline_flushes"}, \
95 {v, 0x16, "instr_exec"}, \
96 {v, 0x17, "instr_exec_V_pipe"}, \
97 {v, 0x18, "clks_bus_cycle"}, \
98 {v, 0x19, "clks_full_wbufs"}, \
99 {v, 0x1a, "pipe_stall_read"}, \
100 {v, 0x1b, "stall_on_write_ME"}, \
101 {v, 0x1c, "locked_bus_cycle"}, \
102 {v, 0x1d, "io_rw_cycles"}, \
103 {v, 0x1e, "reads_noncache_mem"}, \
104 {v, 0x1f, "pipeline_agi_stalls"}, \
105 {v, 0x22, "flops"}, \
106 {v, 0x23, "bp_match_dr0"}, \
107 {v, 0x24, "bp_match_dr1"}, \
108 {v, 0x25, "bp_match_dr2"}, \
109 {v, 0x26, "bp_match_dr3"}, \
110 {v, 0x27, "hw_intrs"}, \
111 {v, 0x28, "data_rw"}, \
112 {v, 0x29, "data_rw_miss"}
113
114 static const struct nametable P5mmx_names0[] = {
115 P5_EVENTS(V_P5),
116 {V_P5mmx, 0x2a, "bus_ownership_latency"},
117 {V_P5mmx, 0x2b, "mmx_instr_upipe"},
118 {V_P5mmx, 0x2c, "cache_M_line_sharing"},
119 {V_P5mmx, 0x2d, "emms_instr"},
120 {V_P5mmx, 0x2e, "bus_util_processor"},
121 {V_P5mmx, 0x2f, "sat_mmx_instr"},
122 {V_P5mmx, 0x30, "clks_not_HLT"},
123 {V_P5mmx, 0x31, "mmx_data_read"},
124 {V_P5mmx, 0x32, "clks_fp_stall"},
125 {V_P5mmx, 0x33, "d1_starv_fifo_0"},
126 {V_P5mmx, 0x34, "mmx_data_write"},
127 {V_P5mmx, 0x35, "pipe_flush_wbp"},
128 {V_P5mmx, 0x36, "mmx_misalign_data_refs"},
129 {V_P5mmx, 0x37, "rets_pred_incorrect"},
130 {V_P5mmx, 0x38, "mmx_multiply_unit_interlock"},
131 {V_P5mmx, 0x39, "rets"},
132 {V_P5mmx, 0x3a, "btb_false_entries"},
133 {V_P5mmx, 0x3b, "clocks_stall_full_wb"},
134 {V_END}
135 };
136
137 static const struct nametable P5mmx_names1[] = {
138 P5_EVENTS(V_P5),
139 {V_P5mmx, 0x2a, "bus_ownership_transfers"},
140 {V_P5mmx, 0x2b, "mmx_instr_vpipe"},
141 {V_P5mmx, 0x2c, "cache_lint_sharing"},
142 {V_P5mmx, 0x2d, "mmx_fp_transitions"},
143 {V_P5mmx, 0x2e, "writes_noncache_mem"},
144 {V_P5mmx, 0x2f, "sats_performed"},
145 {V_P5mmx, 0x30, "clks_dcache_tlb_miss"},
146 {V_P5mmx, 0x31, "mmx_data_read_miss"},
147 {V_P5mmx, 0x32, "taken_br"},
148 {V_P5mmx, 0x33, "d1_starv_fifo_1"},
149 {V_P5mmx, 0x34, "mmx_data_write_miss"},
150 {V_P5mmx, 0x35, "pipe_flush_wbp_wb"},
151 {V_P5mmx, 0x36, "mmx_pipe_stall_data_read"},
152 {V_P5mmx, 0x37, "rets_pred"},
153 {V_P5mmx, 0x38, "movd_movq_stall"},
154 {V_P5mmx, 0x39, "rsb_overflow"},
155 {V_P5mmx, 0x3a, "btb_mispred_nt"},
156 {V_P5mmx, 0x3b, "mmx_stall_write_ME"},
157 {V_END}
158 };
159
160 static const struct nametable *P5mmx_names[2] = {
161 P5mmx_names0,
162 P5mmx_names1
163 };
164
165 /*
166 * Pentium Pro and Pentium II events
167 */
168 static const struct nametable P6_names[] = {
169 /*
170 * Data cache unit
171 */
172 {V_P6, 0x43, "data_mem_refs"},
173 {V_P6, 0x45, "dcu_lines_in"},
174 {V_P6, 0x46, "dcu_m_lines_in"},
175 {V_P6, 0x47, "dcu_m_lines_out"},
176 {V_P6, 0x48, "dcu_miss_outstanding"},
177
178 /*
179 * Instruction fetch unit
180 */
181 {V_P6, 0x80, "ifu_ifetch"},
182 {V_P6, 0x81, "ifu_ifetch_miss"},
183 {V_P6, 0x85, "itlb_miss"},
184 {V_P6, 0x86, "ifu_mem_stall"},
185 {V_P6, 0x87, "ild_stall"},
186
187 /*
188 * L2 cache
189 */
190 {V_P6, 0x28, "l2_ifetch"},
191 {V_P6, 0x29, "l2_ld"},
192 {V_P6, 0x2a, "l2_st"},
193 {V_P6, 0x24, "l2_lines_in"},
194 {V_P6, 0x26, "l2_lines_out"},
195 {V_P6, 0x25, "l2_m_lines_inm"},
196 {V_P6, 0x27, "l2_m_lines_outm"},
197 {V_P6, 0x2e, "l2_rqsts"},
198 {V_P6, 0x21, "l2_ads"},
199 {V_P6, 0x22, "l2_dbus_busy"},
200 {V_P6, 0x23, "l2_dbus_busy_rd"},
201
202 /*
203 * External bus logic
204 */
205 {V_P6, 0x62, "bus_drdy_clocks"},
206 {V_P6, 0x63, "bus_lock_clocks"},
207 {V_P6, 0x60, "bus_req_outstanding"},
208 {V_P6, 0x65, "bus_tran_brd"},
209 {V_P6, 0x66, "bus_tran_rfo"},
210 {V_P6, 0x67, "bus_trans_wb"},
211 {V_P6, 0x68, "bus_tran_ifetch"},
212 {V_P6, 0x69, "bus_tran_inval"},
213 {V_P6, 0x6a, "bus_tran_pwr"},
214 {V_P6, 0x6b, "bus_trans_p"},
215 {V_P6, 0x6c, "bus_trans_io"},
216 {V_P6, 0x6d, "bus_tran_def"},
217 {V_P6, 0x6e, "bus_tran_burst"},
218 {V_P6, 0x70, "bus_tran_any"},
219 {V_P6, 0x6f, "bus_tran_mem"},
220 {V_P6, 0x64, "bus_data_rcv"},
221 {V_P6, 0x61, "bus_bnr_drv"},
222 {V_P6, 0x7a, "bus_hit_drv"},
223 {V_P6, 0x7b, "bus_hitm_drv"},
224 {V_P6, 0x7e, "bus_snoop_stall"},
225
226 /*
227 * Floating point unit
228 */
229 {V_P6, 0xc1, "flops"}, /* 0 only */
230 {V_P6, 0x10, "fp_comp_ops_exe"}, /* 0 only */
231 {V_P6, 0x11, "fp_assist"}, /* 1 only */
232 {V_P6, 0x12, "mul"}, /* 1 only */
233 {V_P6, 0x13, "div"}, /* 1 only */
234 {V_P6, 0x14, "cycles_div_busy"}, /* 0 only */
235
236 /*
237 * Memory ordering
238 */
239 {V_P6, 0x3, "ld_blocks"},
240 {V_P6, 0x4, "sb_drains"},
241 {V_P6, 0x5, "misalign_mem_ref"},
242
243 /*
244 * Instruction decoding and retirement
245 */
246 {V_P6, 0xc0, "inst_retired"},
247 {V_P6, 0xc2, "uops_retired"},
248 {V_P6, 0xd0, "inst_decoder"},
249
250 /*
251 * Interrupts
252 */
253 {V_P6, 0xc8, "hw_int_rx"},
254 {V_P6, 0xc6, "cycles_int_masked"},
255 {V_P6, 0xc7, "cycles_int_pending_and_masked"},
256
257 /*
258 * Branches
259 */
260 {V_P6, 0xc4, "br_inst_retired"},
261 {V_P6, 0xc5, "br_miss_pred_retired"},
262 {V_P6, 0xc9, "br_taken_retired"},
263 {V_P6, 0xca, "br_miss_pred_taken_ret"},
264 {V_P6, 0xe0, "br_inst_decoded"},
265 {V_P6, 0xe2, "btb_misses"},
266 {V_P6, 0xe4, "br_bogus"},
267 {V_P6, 0xe6, "baclears"},
268
269 /*
270 * Stalls
271 */
272 {V_P6, 0xa2, "resource_stalls"},
273 {V_P6, 0xd2, "partial_rat_stalls"},
274
275 /*
276 * Segment register loads
277 */
278 {V_P6, 0x6, "segment_reg_loads"},
279
280 /*
281 * Clocks
282 */
283 {V_P6, 0x79, "cpu_clk_unhalted"},
284
285 /*
286 * MMX
287 */
288 {V_P6mmx, 0xb0, "mmx_instr_exec"},
289 {V_P6mmx, 0xb1, "mmx_sat_instr_exec"},
290 {V_P6mmx, 0xb2, "mmx_uops_exec"},
291 {V_P6mmx, 0xb3, "mmx_instr_type_exec"},
292 {V_P6mmx, 0xcc, "fp_mmx_trans"},
293 {V_P6mmx, 0xcd, "mmx_assists"},
294 {V_P6mmx, 0xce, "mmx_instr_ret"},
295 {V_P6mmx, 0xd4, "seg_rename_stalls"},
296 {V_P6mmx, 0xd5, "seg_reg_renames"},
297 {V_P6mmx, 0xd6, "ret_seg_renames"},
298
299 {V_END}
300 };
301
302 #define MAPCPUVER(cpuver) (cpuvermap[(cpuver) - CPC_PENTIUM])
303
304 static int
validargs(int cpuver,int regno)305 validargs(int cpuver, int regno)
306 {
307 if (regno < 0 || regno > 1)
308 return (0);
309 cpuver -= CPC_PENTIUM;
310 if (cpuver < 0 ||
311 cpuver >= sizeof (cpuvermap) / sizeof (cpuvermap[0]))
312 return (0);
313 return (1);
314 }
315
316 /*ARGSUSED*/
317 static int
versionmatch(int cpuver,int regno,const struct nametable * n)318 versionmatch(int cpuver, int regno, const struct nametable *n)
319 {
320 if (!validargs(cpuver, regno) || (n->ver & MAPCPUVER(cpuver)) == 0)
321 return (0);
322
323 switch (MAPCPUVER(cpuver)) {
324 case V_P5:
325 case V_P5 | V_P5mmx:
326 break;
327 case V_P6:
328 case V_P6 | V_P6mmx:
329 switch (n->bits) {
330 case 0xc1: /* flops */
331 case 0x10: /* fp_comp_ops_exe */
332 case 0x14: /* cycles_div_busy */
333 /* only reg0 counts these */
334 if (regno == 1)
335 return (0);
336 break;
337 case 0x11: /* fp_assist */
338 case 0x12: /* mul */
339 case 0x13: /* div */
340 /* only 1 can count these */
341 if (regno == 0)
342 return (0);
343 break;
344 default:
345 break;
346 }
347 break;
348 default:
349 return (0);
350 }
351
352 return (1);
353 }
354
355 static const struct nametable *
getnametable(int cpuver,int regno)356 getnametable(int cpuver, int regno)
357 {
358 const struct nametable *n;
359
360 if (!validargs(cpuver, regno))
361 return (NULL);
362
363 switch (MAPCPUVER(cpuver)) {
364 case V_P5:
365 case V_P5 | V_P5mmx:
366 n = P5mmx_names[regno];
367 break;
368 case V_P6:
369 case V_P6 | V_P6mmx:
370 n = P6_names;
371 break;
372 default:
373 n = NULL;
374 break;
375 }
376
377 return (n);
378 }
379
380 void
cpc_walk_names(int cpuver,int regno,void * arg,void (* action)(void *,int,const char *,uint8_t))381 cpc_walk_names(int cpuver, int regno, void *arg,
382 void (*action)(void *, int, const char *, uint8_t))
383 {
384 const struct nametable *n;
385
386 if ((n = getnametable(cpuver, regno)) == NULL)
387 return;
388 for (; n->ver != V_END; n++)
389 if (versionmatch(cpuver, regno, n))
390 action(arg, regno, n->name, n->bits);
391 }
392
393 const char *
__cpc_reg_to_name(int cpuver,int regno,uint8_t bits)394 __cpc_reg_to_name(int cpuver, int regno, uint8_t bits)
395 {
396 const struct nametable *n;
397
398 if ((n = getnametable(cpuver, regno)) == NULL)
399 return (NULL);
400 for (; n->ver != V_END; n++)
401 if (bits == n->bits && versionmatch(cpuver, regno, n))
402 return (n->name);
403 return (NULL);
404 }
405
406 /*
407 * Register names can be specified as strings or even as numbers
408 */
409 int
__cpc_name_to_reg(int cpuver,int regno,const char * name,uint8_t * bits)410 __cpc_name_to_reg(int cpuver, int regno, const char *name, uint8_t *bits)
411 {
412 const struct nametable *n;
413 char *eptr = NULL;
414 long value;
415
416 if ((n = getnametable(cpuver, regno)) == NULL || name == NULL)
417 return (-1);
418 for (; n->ver != V_END; n++)
419 if (strcmp(name, n->name) == 0 &&
420 versionmatch(cpuver, regno, n)) {
421 *bits = n->bits;
422 return (0);
423 }
424
425 value = strtol(name, &eptr, 0);
426 if (name != eptr && value >= 0 && value <= UINT8_MAX) {
427 *bits = (uint8_t)value;
428 return (0);
429 }
430
431 return (-1);
432 }
433
434 const char *
cpc_getcciname(int cpuver)435 cpc_getcciname(int cpuver)
436 {
437 if (validargs(cpuver, 0))
438 switch (MAPCPUVER(cpuver)) {
439 case V_P5:
440 return ("Pentium");
441 case V_P5 | V_P5mmx:
442 return ("Pentium with MMX");
443 case V_P6:
444 return ("Pentium Pro, Pentium II");
445 case V_P6 | V_P6mmx:
446 return ("Pentium Pro with MMX, Pentium II");
447 default:
448 break;
449 }
450 return (NULL);
451 }
452
453 const char *
cpc_getcpuref(int cpuver)454 cpc_getcpuref(int cpuver)
455 {
456 if (validargs(cpuver, 0))
457 switch (MAPCPUVER(cpuver)) {
458 case V_P5:
459 case V_P5 | V_P5mmx:
460 return (gettext(
461 "See Appendix A.2 of the \"Intel Architecture "
462 "Software Developer's Manual,\" 243192, 1997"));
463 case V_P6:
464 case V_P6 | V_P6mmx:
465 return (gettext(
466 "See Appendix A.1 of the \"Intel Architecture "
467 "Software Developer's Manual,\" 243192, 1997"));
468 default:
469 break;
470 }
471 return (NULL);
472 }
473
474 /*
475 * This is a functional interface to allow CPUs with fewer %pic registers
476 * to share the same data structure as those with more %pic registers
477 * within the same instruction set family.
478 */
479 uint_t
cpc_getnpic(int cpuver)480 cpc_getnpic(int cpuver)
481 {
482 switch (cpuver) {
483 case CPC_PENTIUM:
484 case CPC_PENTIUM_MMX:
485 case CPC_PENTIUM_PRO:
486 case CPC_PENTIUM_PRO_MMX:
487 #define EVENT ((cpc_event_t *)0)
488 return (sizeof (EVENT->ce_pic) / sizeof (EVENT->ce_pic[0]));
489 #undef EVENT
490 default:
491 return (0);
492 }
493 }
494
495 #define BITS(v, u, l) \
496 (((v) >> (l)) & ((1 << (1 + (u) - (l))) - 1))
497
498 #include "getcpuid.h"
499
500 /*
501 * Return the version of the current processor.
502 *
503 * Version -1 is defined as 'not performance counter capable'
504 */
505 int
cpc_getcpuver(void)506 cpc_getcpuver(void)
507 {
508 static int ver = -1;
509 uint32_t maxeax;
510 uint32_t vbuf[4];
511
512 if (ver != -1)
513 return (ver);
514
515 maxeax = cpc_getcpuid(0, &vbuf[0], &vbuf[2], &vbuf[1]);
516 {
517 char *vendor = (char *)vbuf;
518 vendor[12] = '\0';
519
520 if (strcmp(vendor, "GenuineIntel") != 0)
521 return (ver);
522 }
523
524 if (maxeax >= 1) {
525 int family, model;
526 uint32_t eax, ebx, ecx, edx;
527
528 eax = cpc_getcpuid(1, &ebx, &ecx, &edx);
529
530 if ((family = BITS(eax, 11, 8)) == 0xf)
531 family = BITS(eax, 27, 20);
532 if ((model = BITS(eax, 7, 4)) == 0xf)
533 model = BITS(eax, 19, 16);
534
535 /*
536 * map family and model into the performance
537 * counter architectures we currently understand.
538 *
539 * See application note AP485 (from developer.intel.com)
540 * for further explanation.
541 */
542 switch (family) {
543 case 5: /* Pentium and Pentium with MMX */
544 ver = model < 4 ?
545 CPC_PENTIUM : CPC_PENTIUM_MMX;
546 break;
547 case 6: /* Pentium Pro and Pentium II and III */
548 ver = BITS(edx, 23, 23) ? /* mmx check */
549 CPC_PENTIUM_PRO_MMX : CPC_PENTIUM_PRO;
550 break;
551 default:
552 case 0xf: /* Pentium IV */
553 break;
554 }
555 }
556
557 return (ver);
558 }
559