xref: /titanic_52/usr/src/lib/libcpc/i386/conf_pentium.c (revision 7aec1d6e253b21f9e9b7ef68b4d81ab9859b51fe)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <string.h>
31 #include <alloca.h>
32 #include <stdlib.h>
33 #include <stdio.h>
34 #include <libintl.h>
35 
36 #include "libcpc.h"
37 #include "libcpc_impl.h"
38 
39 /*
40  * Configuration data for Pentium Pro performance counters.
41  *
42  * Definitions taken from [3].  See the reference to
43  * understand what any of these settings actually means.
44  *
45  * [3] "Pentium Pro Family Developer's Manual, Volume 3:
46  *     Operating Systems Writer's Manual," January 1996
47  */
48 
49 #define	V_P5	(1u << 0)		/* specific to Pentium cpus */
50 #define	V_P5mmx	(1u << 1)		/* " MMX instructions */
51 #define	V_P6	(1u << 2)		/* specific to Pentium II cpus */
52 #define	V_P6mmx	(1u << 3)		/* " MMX instructions */
53 #define	V_END	0
54 
55 /*
56  * map from "cpu version" to flag bits
57  */
58 static const uint_t cpuvermap[] = {
59 	V_P5,		/* CPC_PENTIUM */
60 	V_P5 | V_P5mmx,	/* CPC_PENTIUM_MMX */
61 	V_P6,		/* CPC_PENTIUM_PRO */
62 	V_P6 | V_P6mmx,	/* CPC_PENTIUM_PRO_MMX */
63 };
64 
65 struct nametable {
66 	const uint_t	ver;
67 	const uint8_t	bits;
68 	const char	*name;
69 };
70 
71 /*
72  * Basic Pentium events
73  */
74 #define	P5_EVENTS(v)						\
75 	{v,		0x0,	"data_read"},			\
76 	{v,		0x1,	"data_write"},			\
77 	{v,		0x2,	"data_tlb_miss"},		\
78 	{v,		0x3,	"data_read_miss"},		\
79 	{v,		0x4,	"data_write_miss"},		\
80 	{v,		0x5,	"write_hit_to_M_or_E"},		\
81 	{v,		0x6,	"dcache_lines_wrback"},		\
82 	{v,		0x7,	"external_snoops"},		\
83 	{v,		0x8,	"external_dcache_snoop_hits"},	\
84 	{v,		0x9,	"memory_access_in_both_pipes"},	\
85 	{v,		0xa,	"bank_conflicts"},		\
86 	{v,		0xb,	"misaligned_ref"},		\
87 	{v,		0xc,	"code_read"},			\
88 	{v,		0xd,	"code_tlb_miss"},		\
89 	{v,		0xe,	"code_cache_miss"},		\
90 	{v,		0xf,	"any_segreg_loaded"},		\
91 	{v,		0x12,	"branches"},			\
92 	{v,		0x13,	"btb_hits"},			\
93 	{v,		0x14,	"taken_or_btb_hit"},		\
94 	{v,		0x15,	"pipeline_flushes"},		\
95 	{v,		0x16,	"instr_exec"},			\
96 	{v,		0x17,	"instr_exec_V_pipe"},		\
97 	{v,		0x18,	"clks_bus_cycle"},		\
98 	{v,		0x19,	"clks_full_wbufs"},		\
99 	{v,		0x1a,	"pipe_stall_read"},		\
100 	{v,		0x1b,	"stall_on_write_ME"},		\
101 	{v,		0x1c,	"locked_bus_cycle"},		\
102 	{v,		0x1d,	"io_rw_cycles"},		\
103 	{v,		0x1e,	"reads_noncache_mem"},		\
104 	{v,		0x1f,	"pipeline_agi_stalls"},		\
105 	{v,		0x22,	"flops"},			\
106 	{v,		0x23,	"bp_match_dr0"},		\
107 	{v,		0x24,	"bp_match_dr1"},		\
108 	{v,		0x25,	"bp_match_dr2"},		\
109 	{v,		0x26,	"bp_match_dr3"},		\
110 	{v,		0x27,	"hw_intrs"},			\
111 	{v,		0x28,	"data_rw"},			\
112 	{v,		0x29,	"data_rw_miss"}
113 
114 static const struct nametable P5mmx_names0[] = {
115 	P5_EVENTS(V_P5),
116 	{V_P5mmx,	0x2a,	"bus_ownership_latency"},
117 	{V_P5mmx,	0x2b,	"mmx_instr_upipe"},
118 	{V_P5mmx,	0x2c,	"cache_M_line_sharing"},
119 	{V_P5mmx,	0x2d,	"emms_instr"},
120 	{V_P5mmx,	0x2e,	"bus_util_processor"},
121 	{V_P5mmx,	0x2f,	"sat_mmx_instr"},
122 	{V_P5mmx,	0x30,	"clks_not_HLT"},
123 	{V_P5mmx,	0x31,	"mmx_data_read"},
124 	{V_P5mmx,	0x32,	"clks_fp_stall"},
125 	{V_P5mmx,	0x33,	"d1_starv_fifo_0"},
126 	{V_P5mmx,	0x34,	"mmx_data_write"},
127 	{V_P5mmx,	0x35,	"pipe_flush_wbp"},
128 	{V_P5mmx,	0x36,	"mmx_misalign_data_refs"},
129 	{V_P5mmx,	0x37,	"rets_pred_incorrect"},
130 	{V_P5mmx,	0x38,	"mmx_multiply_unit_interlock"},
131 	{V_P5mmx,	0x39,	"rets"},
132 	{V_P5mmx,	0x3a,	"btb_false_entries"},
133 	{V_P5mmx,	0x3b,	"clocks_stall_full_wb"},
134 	{V_END}
135 };
136 
137 static const struct nametable P5mmx_names1[] = {
138 	P5_EVENTS(V_P5),
139 	{V_P5mmx,	0x2a,	"bus_ownership_transfers"},
140 	{V_P5mmx,	0x2b,	"mmx_instr_vpipe"},
141 	{V_P5mmx,	0x2c,	"cache_lint_sharing"},
142 	{V_P5mmx,	0x2d,	"mmx_fp_transitions"},
143 	{V_P5mmx,	0x2e,	"writes_noncache_mem"},
144 	{V_P5mmx,	0x2f,	"sats_performed"},
145 	{V_P5mmx,	0x30,	"clks_dcache_tlb_miss"},
146 	{V_P5mmx,	0x31,	"mmx_data_read_miss"},
147 	{V_P5mmx,	0x32,	"taken_br"},
148 	{V_P5mmx,	0x33,	"d1_starv_fifo_1"},
149 	{V_P5mmx,	0x34,	"mmx_data_write_miss"},
150 	{V_P5mmx,	0x35,	"pipe_flush_wbp_wb"},
151 	{V_P5mmx,	0x36,	"mmx_pipe_stall_data_read"},
152 	{V_P5mmx,	0x37,	"rets_pred"},
153 	{V_P5mmx,	0x38,	"movd_movq_stall"},
154 	{V_P5mmx,	0x39,	"rsb_overflow"},
155 	{V_P5mmx,	0x3a,	"btb_mispred_nt"},
156 	{V_P5mmx,	0x3b,	"mmx_stall_write_ME"},
157 	{V_END}
158 };
159 
160 static const struct nametable *P5mmx_names[2] = {
161 	P5mmx_names0,
162 	P5mmx_names1
163 };
164 
165 /*
166  * Pentium Pro and Pentium II events
167  */
168 static const struct nametable P6_names[] = {
169 	/*
170 	 * Data cache unit
171 	 */
172 	{V_P6,		0x43,	"data_mem_refs"},
173 	{V_P6,		0x45,	"dcu_lines_in"},
174 	{V_P6,		0x46,	"dcu_m_lines_in"},
175 	{V_P6,		0x47,	"dcu_m_lines_out"},
176 	{V_P6,		0x48,	"dcu_miss_outstanding"},
177 
178 	/*
179 	 * Instruction fetch unit
180 	 */
181 	{V_P6,		0x80,	"ifu_ifetch"},
182 	{V_P6,		0x81,	"ifu_ifetch_miss"},
183 	{V_P6,		0x85,	"itlb_miss"},
184 	{V_P6,		0x86,	"ifu_mem_stall"},
185 	{V_P6,		0x87,	"ild_stall"},
186 
187 	/*
188 	 * L2 cache
189 	 */
190 	{V_P6,		0x28,	"l2_ifetch"},
191 	{V_P6,		0x29,	"l2_ld"},
192 	{V_P6,		0x2a,	"l2_st"},
193 	{V_P6,		0x24,	"l2_lines_in"},
194 	{V_P6,		0x26,	"l2_lines_out"},
195 	{V_P6,		0x25,	"l2_m_lines_inm"},
196 	{V_P6,		0x27,	"l2_m_lines_outm"},
197 	{V_P6,		0x2e,	"l2_rqsts"},
198 	{V_P6,		0x21,	"l2_ads"},
199 	{V_P6,		0x22,	"l2_dbus_busy"},
200 	{V_P6,		0x23,	"l2_dbus_busy_rd"},
201 
202 	/*
203 	 * External bus logic
204 	 */
205 	{V_P6,		0x62,	"bus_drdy_clocks"},
206 	{V_P6,		0x63,	"bus_lock_clocks"},
207 	{V_P6,		0x60,	"bus_req_outstanding"},
208 	{V_P6,		0x65,	"bus_tran_brd"},
209 	{V_P6,		0x66,	"bus_tran_rfo"},
210 	{V_P6,		0x67,	"bus_trans_wb"},
211 	{V_P6,		0x68,	"bus_tran_ifetch"},
212 	{V_P6,		0x69,	"bus_tran_inval"},
213 	{V_P6,		0x6a,	"bus_tran_pwr"},
214 	{V_P6,		0x6b,	"bus_trans_p"},
215 	{V_P6,		0x6c,	"bus_trans_io"},
216 	{V_P6,		0x6d,	"bus_tran_def"},
217 	{V_P6,		0x6e,	"bus_tran_burst"},
218 	{V_P6,		0x70,	"bus_tran_any"},
219 	{V_P6,		0x6f,	"bus_tran_mem"},
220 	{V_P6,		0x64,	"bus_data_rcv"},
221 	{V_P6,		0x61,	"bus_bnr_drv"},
222 	{V_P6,		0x7a,	"bus_hit_drv"},
223 	{V_P6,		0x7b,	"bus_hitm_drv"},
224 	{V_P6,		0x7e,	"bus_snoop_stall"},
225 
226 	/*
227 	 * Floating point unit
228 	 */
229 	{V_P6,		0xc1,	"flops"},		/* 0 only */
230 	{V_P6,		0x10,	"fp_comp_ops_exe"},	/* 0 only */
231 	{V_P6,		0x11,	"fp_assist"},		/* 1 only */
232 	{V_P6,		0x12,	"mul"},			/* 1 only */
233 	{V_P6,		0x13,	"div"},			/* 1 only */
234 	{V_P6,		0x14,	"cycles_div_busy"},	/* 0 only */
235 
236 	/*
237 	 * Memory ordering
238 	 */
239 	{V_P6,		0x3,	"ld_blocks"},
240 	{V_P6,		0x4,	"sb_drains"},
241 	{V_P6,		0x5,	"misalign_mem_ref"},
242 
243 	/*
244 	 * Instruction decoding and retirement
245 	 */
246 	{V_P6,		0xc0,	"inst_retired"},
247 	{V_P6,		0xc2,	"uops_retired"},
248 	{V_P6,		0xd0,	"inst_decoder"},
249 
250 	/*
251 	 * Interrupts
252 	 */
253 	{V_P6,		0xc8,	"hw_int_rx"},
254 	{V_P6,		0xc6,	"cycles_int_masked"},
255 	{V_P6,		0xc7,	"cycles_int_pending_and_masked"},
256 
257 	/*
258 	 * Branches
259 	 */
260 	{V_P6,		0xc4,	"br_inst_retired"},
261 	{V_P6,		0xc5,	"br_miss_pred_retired"},
262 	{V_P6,		0xc9,	"br_taken_retired"},
263 	{V_P6,		0xca,	"br_miss_pred_taken_ret"},
264 	{V_P6,		0xe0,	"br_inst_decoded"},
265 	{V_P6,		0xe2,	"btb_misses"},
266 	{V_P6,		0xe4,	"br_bogus"},
267 	{V_P6,		0xe6,	"baclears"},
268 
269 	/*
270 	 * Stalls
271 	 */
272 	{V_P6,		0xa2,	"resource_stalls"},
273 	{V_P6,		0xd2,	"partial_rat_stalls"},
274 
275 	/*
276 	 * Segment register loads
277 	 */
278 	{V_P6,		0x6,	"segment_reg_loads"},
279 
280 	/*
281 	 * Clocks
282 	 */
283 	{V_P6,		0x79,	"cpu_clk_unhalted"},
284 
285 	/*
286 	 * MMX
287 	 */
288 	{V_P6mmx,	0xb0,	"mmx_instr_exec"},
289 	{V_P6mmx,	0xb1,	"mmx_sat_instr_exec"},
290 	{V_P6mmx,	0xb2,	"mmx_uops_exec"},
291 	{V_P6mmx,	0xb3,	"mmx_instr_type_exec"},
292 	{V_P6mmx,	0xcc,	"fp_mmx_trans"},
293 	{V_P6mmx,	0xcd,	"mmx_assists"},
294 	{V_P6mmx,	0xce,	"mmx_instr_ret"},
295 	{V_P6mmx,	0xd4,	"seg_rename_stalls"},
296 	{V_P6mmx,	0xd5,	"seg_reg_renames"},
297 	{V_P6mmx,	0xd6,	"ret_seg_renames"},
298 
299 	{V_END}
300 };
301 
302 #define	MAPCPUVER(cpuver)	(cpuvermap[(cpuver) - CPC_PENTIUM])
303 
304 static int
305 validargs(int cpuver, int regno)
306 {
307 	if (regno < 0 || regno > 1)
308 		return (0);
309 	cpuver -= CPC_PENTIUM;
310 	if (cpuver < 0 ||
311 	    cpuver >= sizeof (cpuvermap) / sizeof (cpuvermap[0]))
312 		return (0);
313 	return (1);
314 }
315 
316 /*ARGSUSED*/
317 static int
318 versionmatch(int cpuver, int regno, const struct nametable *n)
319 {
320 	if (!validargs(cpuver, regno) || (n->ver & MAPCPUVER(cpuver)) == 0)
321 		return (0);
322 
323 	switch (MAPCPUVER(cpuver)) {
324 	case V_P5:
325 	case V_P5 | V_P5mmx:
326 		break;
327 	case V_P6:
328 	case V_P6 | V_P6mmx:
329 		switch (n->bits) {
330 		case 0xc1:	/* flops */
331 		case 0x10:	/* fp_comp_ops_exe */
332 		case 0x14:	/* cycles_div_busy */
333 			/* only reg0 counts these */
334 			if (regno == 1)
335 				return (0);
336 			break;
337 		case 0x11:	/* fp_assist */
338 		case 0x12:	/* mul */
339 		case 0x13:	/* div */
340 			/* only 1 can count these */
341 			if (regno == 0)
342 				return (0);
343 			break;
344 		default:
345 			break;
346 		}
347 		break;
348 	default:
349 		return (0);
350 	}
351 
352 	return (1);
353 }
354 
355 static const struct nametable *
356 getnametable(int cpuver, int regno)
357 {
358 	const struct nametable *n;
359 
360 	if (!validargs(cpuver, regno))
361 		return (NULL);
362 
363 	switch (MAPCPUVER(cpuver)) {
364 	case V_P5:
365 	case V_P5 | V_P5mmx:
366 		n = P5mmx_names[regno];
367 		break;
368 	case V_P6:
369 	case V_P6 | V_P6mmx:
370 		n = P6_names;
371 		break;
372 	default:
373 		n = NULL;
374 		break;
375 	}
376 
377 	return (n);
378 }
379 
380 void
381 cpc_walk_names(int cpuver, int regno, void *arg,
382     void (*action)(void *, int, const char *, uint8_t))
383 {
384 	const struct nametable *n;
385 
386 	if ((n = getnametable(cpuver, regno)) == NULL)
387 		return;
388 	for (; n->ver != V_END; n++)
389 		if (versionmatch(cpuver, regno, n))
390 			action(arg, regno, n->name, n->bits);
391 }
392 
393 const char *
394 __cpc_reg_to_name(int cpuver, int regno, uint8_t bits)
395 {
396 	const struct nametable *n;
397 
398 	if ((n = getnametable(cpuver, regno)) == NULL)
399 		return (NULL);
400 	for (; n->ver != V_END; n++)
401 		if (bits == n->bits && versionmatch(cpuver, regno, n))
402 			return (n->name);
403 	return (NULL);
404 }
405 
406 /*
407  * Register names can be specified as strings or even as numbers
408  */
409 int
410 __cpc_name_to_reg(int cpuver, int regno, const char *name, uint8_t *bits)
411 {
412 	const struct nametable *n;
413 	char *eptr = NULL;
414 	long value;
415 
416 	if ((n = getnametable(cpuver, regno)) == NULL || name == NULL)
417 		return (-1);
418 	for (; n->ver != V_END; n++)
419 		if (strcmp(name, n->name) == 0 &&
420 		    versionmatch(cpuver, regno, n)) {
421 			*bits = n->bits;
422 			return (0);
423 		}
424 
425 	value = strtol(name, &eptr, 0);
426 	if (name != eptr && value >= 0 && value <= UINT8_MAX) {
427 		*bits = (uint8_t)value;
428 		return (0);
429 	}
430 
431 	return (-1);
432 }
433 
434 const char *
435 cpc_getcciname(int cpuver)
436 {
437 	if (validargs(cpuver, 0))
438 		switch (MAPCPUVER(cpuver)) {
439 		case V_P5:
440 			return ("Pentium");
441 		case V_P5 | V_P5mmx:
442 			return ("Pentium with MMX");
443 		case V_P6:
444 			return ("Pentium Pro, Pentium II");
445 		case V_P6 | V_P6mmx:
446 			return ("Pentium Pro with MMX, Pentium II");
447 		default:
448 			break;
449 		}
450 	return (NULL);
451 }
452 
453 const char *
454 cpc_getcpuref(int cpuver)
455 {
456 	if (validargs(cpuver, 0))
457 		switch (MAPCPUVER(cpuver)) {
458 		case V_P5:
459 		case V_P5 | V_P5mmx:
460 			return (gettext(
461 			    "See Appendix A.2 of the \"Intel Architecture "
462 			    "Software Developer's Manual,\" 243192, 1997"));
463 		case V_P6:
464 		case V_P6 | V_P6mmx:
465 			return (gettext(
466 			    "See Appendix A.1 of the \"Intel Architecture "
467 			    "Software Developer's Manual,\" 243192, 1997"));
468 		default:
469 			break;
470 		}
471 	return (NULL);
472 }
473 
474 /*
475  * This is a functional interface to allow CPUs with fewer %pic registers
476  * to share the same data structure as those with more %pic registers
477  * within the same instruction set family.
478  */
479 uint_t
480 cpc_getnpic(int cpuver)
481 {
482 	switch (cpuver) {
483 	case CPC_PENTIUM:
484 	case CPC_PENTIUM_MMX:
485 	case CPC_PENTIUM_PRO:
486 	case CPC_PENTIUM_PRO_MMX:
487 #define	EVENT	((cpc_event_t *)0)
488 		return (sizeof (EVENT->ce_pic) / sizeof	(EVENT->ce_pic[0]));
489 #undef	EVENT
490 	default:
491 		return (0);
492 	}
493 }
494 
495 #define	BITS(v, u, l)	\
496 	(((v) >> (l)) & ((1 << (1 + (u) - (l))) - 1))
497 
498 #include "getcpuid.h"
499 
500 /*
501  * Return the version of the current processor.
502  *
503  * Version -1 is defined as 'not performance counter capable'
504  */
505 int
506 cpc_getcpuver(void)
507 {
508 	static int ver = -1;
509 	uint32_t maxeax;
510 	uint32_t vbuf[4];
511 
512 	if (ver != -1)
513 		return (ver);
514 
515 	maxeax = cpc_getcpuid(0, &vbuf[0], &vbuf[2], &vbuf[1]);
516 	{
517 		char *vendor = (char *)vbuf;
518 		vendor[12] = '\0';
519 
520 		if (strcmp(vendor, "GenuineIntel") != 0)
521 			return (ver);
522 	}
523 
524 	if (maxeax >= 1) {
525 		int family, model;
526 		uint32_t eax, ebx, ecx, edx;
527 
528 		eax = cpc_getcpuid(1, &ebx, &ecx, &edx);
529 
530 		if ((family = BITS(eax, 11, 8)) == 0xf)
531 			family = BITS(eax, 27, 20);
532 		if ((model = BITS(eax, 7, 4)) == 0xf)
533 			model = BITS(eax, 19, 16);
534 
535 		/*
536 		 * map family and model into the performance
537 		 * counter architectures we currently understand.
538 		 *
539 		 * See application note AP485 (from developer.intel.com)
540 		 * for further explanation.
541 		 */
542 		switch (family) {
543 		case 5:		/* Pentium and Pentium with MMX */
544 			ver = model < 4 ?
545 				CPC_PENTIUM : CPC_PENTIUM_MMX;
546 			break;
547 		case 6:		/* Pentium Pro and Pentium II and III */
548 			ver = BITS(edx, 23, 23) ?	   /* mmx check */
549 				CPC_PENTIUM_PRO_MMX : CPC_PENTIUM_PRO;
550 			break;
551 		default:
552 		case 0xf:	/* Pentium IV */
553 			break;
554 		}
555 	}
556 
557 	return (ver);
558 }
559