xref: /titanic_41/usr/src/uts/intel/pcbe/p123_pcbe.c (revision 9e86db79b7d1bbc5f2f04e99954cbd5eae0e22bb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * This file contains preset event names from the Performance Application
28  * Programming Interface v3.5 which included the following notice:
29  *
30  *                             Copyright (c) 2005,6
31  *                           Innovative Computing Labs
32  *                         Computer Science Department,
33  *                            University of Tennessee,
34  *                                 Knoxville, TN.
35  *                              All Rights Reserved.
36  *
37  *
38  * Redistribution and use in source and binary forms, with or without
39  * modification, are permitted provided that the following conditions are met:
40  *
41  *    * Redistributions of source code must retain the above copyright notice,
42  *      this list of conditions and the following disclaimer.
43  *    * Redistributions in binary form must reproduce the above copyright
44  *      notice, this list of conditions and the following disclaimer in the
45  *      documentation and/or other materials provided with the distribution.
46  *    * Neither the name of the University of Tennessee nor the names of its
47  *      contributors may be used to endorse or promote products derived from
48  *      this software without specific prior written permission.
49  *
50  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
51  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
54  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
55  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
56  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
57  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
58  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
59  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
60  * POSSIBILITY OF SUCH DAMAGE.
61  *
62  *
63  * This open source software license conforms to the BSD License template.
64  */
65 
66 /*
67  * Performance Counter Back-End for Pentiums I, II, and III.
68  */
69 
70 #include <sys/cpuvar.h>
71 #include <sys/param.h>
72 #include <sys/cpc_impl.h>
73 #include <sys/cpc_pcbe.h>
74 #include <sys/modctl.h>
75 #include <sys/inttypes.h>
76 #include <sys/systm.h>
77 #include <sys/cmn_err.h>
78 #include <sys/x86_archext.h>
79 #include <sys/sdt.h>
80 #include <sys/archsystm.h>
81 #include <sys/privregs.h>
82 #include <sys/ddi.h>
83 #include <sys/sunddi.h>
84 
85 static int64_t diff3931(uint64_t sample, uint64_t old);
86 static uint64_t trunc3931(uint64_t value);
87 
88 static int ptm_pcbe_init(void);
89 static uint_t ptm_pcbe_ncounters(void);
90 static const char *ptm_pcbe_impl_name(void);
91 static const char *ptm_pcbe_cpuref(void);
92 static char *ptm_pcbe_list_events(uint_t picnum);
93 static char *ptm_pcbe_list_attrs(void);
94 static uint64_t ptm_pcbe_event_coverage(char *event);
95 static int ptm_pcbe_pic_index(char *picname);
96 static uint64_t	ptm_pcbe_overflow_bitmap(void);
97 static int ptm_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
98     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
99     void *token);
100 static void ptm_pcbe_program(void *token);
101 static void ptm_pcbe_allstop(void);
102 static void ptm_pcbe_sample(void *token);
103 static void ptm_pcbe_free(void *config);
104 
105 pcbe_ops_t ptm_pcbe_ops = {
106 	PCBE_VER_1,
107 	0,
108 	ptm_pcbe_ncounters,
109 	ptm_pcbe_impl_name,
110 	ptm_pcbe_cpuref,
111 	ptm_pcbe_list_events,
112 	ptm_pcbe_list_attrs,
113 	ptm_pcbe_event_coverage,
114 	ptm_pcbe_overflow_bitmap,
115 	ptm_pcbe_configure,
116 	ptm_pcbe_program,
117 	ptm_pcbe_allstop,
118 	ptm_pcbe_sample,
119 	ptm_pcbe_free
120 };
121 
122 typedef enum _ptm_ver {
123 	PTM_VER_P5,
124 	PTM_VER_P6
125 } ptm_ver_t;
126 
127 static ptm_ver_t ptm_ver;
128 static const char *ptm_impl_name;
129 static const char *ptm_cpuref;
130 static char *pic_events[2] = { NULL, NULL };
131 
132 /*
133  * Indicates whether the "rdpmc" instruction is available on this processor.
134  */
135 static int ptm_rdpmc_avail = 0;
136 
137 #define	ALL_STOPPED	0ULL
138 
139 typedef struct _ptm_pcbe_config {
140 	uint8_t		ptm_picno;	/* 0 for pic0 or 1 for pic1 */
141 	uint32_t	ptm_ctl;    /* P6: PerfEventSelect; P5: cesr, shifted */
142 	uint64_t	ptm_rawpic;
143 } ptm_pcbe_config_t;
144 
145 struct nametable {
146 	uint8_t		bits;
147 	const char	*name;
148 };
149 
150 typedef struct _ptm_generic_events {
151 	char *name;
152 	char *event;
153 	uint8_t umask;
154 } ptm_generic_event_t;
155 
156 #define	NT_END 0xFF
157 #define	CPC_GEN_END { NULL, NULL }
158 
159 /*
160  * Basic Pentium events
161  */
162 #define	P5_EVENTS				\
163 	{0x0,	"data_read"},			\
164 	{0x1,	"data_write"},			\
165 	{0x2,	"data_tlb_miss"},		\
166 	{0x3,	"data_read_miss"},		\
167 	{0x4,	"data_write_miss"},		\
168 	{0x5,	"write_hit_to_M_or_E"},		\
169 	{0x6,	"dcache_lines_wrback"},		\
170 	{0x7,	"external_snoops"},		\
171 	{0x8,	"external_dcache_snoop_hits"},	\
172 	{0x9,	"memory_access_in_both_pipes"},	\
173 	{0xa,	"bank_conflicts"},		\
174 	{0xb,	"misaligned_ref"},		\
175 	{0xc,	"code_read"},			\
176 	{0xd,	"code_tlb_miss"},		\
177 	{0xe,	"code_cache_miss"},		\
178 	{0xf,	"any_segreg_loaded"},		\
179 	{0x12,	"branches"},			\
180 	{0x13,	"btb_hits"},			\
181 	{0x14,	"taken_or_btb_hit"},		\
182 	{0x15,	"pipeline_flushes"},		\
183 	{0x16,	"instr_exec"},			\
184 	{0x17,	"instr_exec_V_pipe"},		\
185 	{0x18,	"clks_bus_cycle"},		\
186 	{0x19,	"clks_full_wbufs"},		\
187 	{0x1a,	"pipe_stall_read"},		\
188 	{0x1b,	"stall_on_write_ME"},		\
189 	{0x1c,	"locked_bus_cycle"},		\
190 	{0x1d,	"io_rw_cycles"},		\
191 	{0x1e,	"reads_noncache_mem"},		\
192 	{0x1f,	"pipeline_agi_stalls"},		\
193 	{0x22,	"flops"},			\
194 	{0x23,	"bp_match_dr0"},		\
195 	{0x24,	"bp_match_dr1"},		\
196 	{0x25,	"bp_match_dr2"},		\
197 	{0x26,	"bp_match_dr3"},		\
198 	{0x27,	"hw_intrs"},			\
199 	{0x28,	"data_rw"},			\
200 	{0x29,	"data_rw_miss"}
201 
202 static const struct nametable P5mmx_names0[] = {
203 	P5_EVENTS,
204 	{0x2a,	"bus_ownership_latency"},
205 	{0x2b,	"mmx_instr_upipe"},
206 	{0x2c,	"cache_M_line_sharing"},
207 	{0x2d,	"emms_instr"},
208 	{0x2e,	"bus_util_processor"},
209 	{0x2f,	"sat_mmx_instr"},
210 	{0x30,	"clks_not_HLT"},
211 	{0x31,	"mmx_data_read"},
212 	{0x32,	"clks_fp_stall"},
213 	{0x33,	"d1_starv_fifo_0"},
214 	{0x34,	"mmx_data_write"},
215 	{0x35,	"pipe_flush_wbp"},
216 	{0x36,	"mmx_misalign_data_refs"},
217 	{0x37,	"rets_pred_incorrect"},
218 	{0x38,	"mmx_multiply_unit_interlock"},
219 	{0x39,	"rets"},
220 	{0x3a,	"btb_false_entries"},
221 	{0x3b,	"clocks_stall_full_wb"},
222 	{NT_END, ""}
223 };
224 
225 static const struct nametable P5mmx_names1[] = {
226 	P5_EVENTS,
227 	{0x2a,	"bus_ownership_transfers"},
228 	{0x2b,	"mmx_instr_vpipe"},
229 	{0x2c,	"cache_lint_sharing"},
230 	{0x2d,	"mmx_fp_transitions"},
231 	{0x2e,	"writes_noncache_mem"},
232 	{0x2f,	"sats_performed"},
233 	{0x30,	"clks_dcache_tlb_miss"},
234 	{0x31,	"mmx_data_read_miss"},
235 	{0x32,	"taken_br"},
236 	{0x33,	"d1_starv_fifo_1"},
237 	{0x34,	"mmx_data_write_miss"},
238 	{0x35,	"pipe_flush_wbp_wb"},
239 	{0x36,	"mmx_pipe_stall_data_read"},
240 	{0x37,	"rets_pred"},
241 	{0x38,	"movd_movq_stall"},
242 	{0x39,	"rsb_overflow"},
243 	{0x3a,	"btb_mispred_nt"},
244 	{0x3b,	"mmx_stall_write_ME"},
245 	{NT_END, ""}
246 };
247 
248 static const struct nametable *P5mmx_names[2] = {
249 	P5mmx_names0,
250 	P5mmx_names1
251 };
252 
253 /*
254  * Pentium Pro and Pentium II events
255  */
256 static const struct nametable _P6_names[] = {
257 	/*
258 	 * Data cache unit
259 	 */
260 	{0x43,	"data_mem_refs"},
261 	{0x45,	"dcu_lines_in"},
262 	{0x46,	"dcu_m_lines_in"},
263 	{0x47,	"dcu_m_lines_out"},
264 	{0x48,	"dcu_miss_outstanding"},
265 
266 	/*
267 	 * Instruction fetch unit
268 	 */
269 	{0x80,	"ifu_ifetch"},
270 	{0x81,	"ifu_ifetch_miss"},
271 	{0x85,	"itlb_miss"},
272 	{0x86,	"ifu_mem_stall"},
273 	{0x87,	"ild_stall"},
274 
275 	/*
276 	 * L2 cache
277 	 */
278 	{0x28,	"l2_ifetch"},
279 	{0x29,	"l2_ld"},
280 	{0x2a,	"l2_st"},
281 	{0x24,	"l2_lines_in"},
282 	{0x26,	"l2_lines_out"},
283 	{0x25,	"l2_m_lines_inm"},
284 	{0x27,	"l2_m_lines_outm"},
285 	{0x2e,	"l2_rqsts"},
286 	{0x21,	"l2_ads"},
287 	{0x22,	"l2_dbus_busy"},
288 	{0x23,	"l2_dbus_busy_rd"},
289 
290 	/*
291 	 * External bus logic
292 	 */
293 	{0x62,	"bus_drdy_clocks"},
294 	{0x63,	"bus_lock_clocks"},
295 	{0x60,	"bus_req_outstanding"},
296 	{0x65,	"bus_tran_brd"},
297 	{0x66,	"bus_tran_rfo"},
298 	{0x67,	"bus_trans_wb"},
299 	{0x68,	"bus_tran_ifetch"},
300 	{0x69,	"bus_tran_inval"},
301 	{0x6a,	"bus_tran_pwr"},
302 	{0x6b,	"bus_trans_p"},
303 	{0x6c,	"bus_trans_io"},
304 	{0x6d,	"bus_tran_def"},
305 	{0x6e,	"bus_tran_burst"},
306 	{0x70,	"bus_tran_any"},
307 	{0x6f,	"bus_tran_mem"},
308 	{0x64,	"bus_data_rcv"},
309 	{0x61,	"bus_bnr_drv"},
310 	{0x7a,	"bus_hit_drv"},
311 	{0x7b,	"bus_hitm_drv"},
312 	{0x7e,	"bus_snoop_stall"},
313 
314 	/*
315 	 * Floating point unit
316 	 */
317 	{0xc1,	"flops"},		/* 0 only */
318 	{0x10,	"fp_comp_ops_exe"},	/* 0 only */
319 	{0x11,	"fp_assist"},		/* 1 only */
320 	{0x12,	"mul"},			/* 1 only */
321 	{0x13,	"div"},			/* 1 only */
322 	{0x14,	"cycles_div_busy"},	/* 0 only */
323 
324 	/*
325 	 * Memory ordering
326 	 */
327 	{0x3,	"ld_blocks"},
328 	{0x4,	"sb_drains"},
329 	{0x5,	"misalign_mem_ref"},
330 
331 	/*
332 	 * Instruction decoding and retirement
333 	 */
334 	{0xc0,	"inst_retired"},
335 	{0xc2,	"uops_retired"},
336 	{0xd0,	"inst_decoder"},
337 
338 	/*
339 	 * Interrupts
340 	 */
341 	{0xc8,	"hw_int_rx"},
342 	{0xc6,	"cycles_int_masked"},
343 	{0xc7,	"cycles_int_pending_and_masked"},
344 
345 	/*
346 	 * Branches
347 	 */
348 	{0xc4,	"br_inst_retired"},
349 	{0xc5,	"br_miss_pred_retired"},
350 	{0xc9,	"br_taken_retired"},
351 	{0xca,	"br_miss_pred_taken_ret"},
352 	{0xe0,	"br_inst_decoded"},
353 	{0xe2,	"btb_misses"},
354 	{0xe4,	"br_bogus"},
355 	{0xe6,	"baclears"},
356 
357 	/*
358 	 * Stalls
359 	 */
360 	{0xa2,	"resource_stalls"},
361 	{0xd2,	"partial_rat_stalls"},
362 
363 	/*
364 	 * Segment register loads
365 	 */
366 	{0x6,	"segment_reg_loads"},
367 
368 	/*
369 	 * Clocks
370 	 */
371 	{0x79,	"cpu_clk_unhalted"},
372 
373 	/*
374 	 * MMX
375 	 */
376 	{0xb0,	"mmx_instr_exec"},
377 	{0xb1,	"mmx_sat_instr_exec"},
378 	{0xb2,	"mmx_uops_exec"},
379 	{0xb3,	"mmx_instr_type_exec"},
380 	{0xcc,	"fp_mmx_trans"},
381 	{0xcd,	"mmx_assists"},
382 	{0xce,	"mmx_instr_ret"},
383 	{0xd4,	"seg_rename_stalls"},
384 	{0xd5,	"seg_reg_renames"},
385 	{0xd6,	"ret_seg_renames"},
386 
387 	{NT_END, ""}
388 };
389 
390 static const struct nametable *P6_names[2] = {
391 	_P6_names,
392 	_P6_names
393 };
394 
395 #define	P5_GENERIC_EVENTS					\
396 	{ "PAPI_tot_ins",	"instr_exec",	 0x0 },		\
397 	{ "PAPI_tlb_dm",	"data_tlb_miss", 0x0 },		\
398 	{ "PAPI_tlb_im",	"code_tlb_miss", 0x0 },		\
399 	{ "PAPI_fp_ops",	"flops" }
400 
401 static const ptm_generic_event_t P5mmx_generic_names0[] = {
402 	P5_GENERIC_EVENTS,
403 	{ "PAPI_tot_cyc",	"clks_not_HLT", 0x0 },
404 	CPC_GEN_END
405 };
406 
407 static const ptm_generic_event_t P5mmx_generic_names1[] = {
408 	P5_GENERIC_EVENTS,
409 	{ "PAPI_br_ins",	"taken_br",	0x0 },
410 	CPC_GEN_END
411 };
412 
413 static const ptm_generic_event_t *P5mmx_generic_names[2] = {
414 	P5mmx_generic_names0,
415 	P5mmx_generic_names1
416 };
417 
418 static const ptm_generic_event_t _P6_generic_names[] = {
419 	{ "PAPI_ca_shr",	"l2_ifetch",		0xf },
420 	{ "PAPI_ca_cln",	"bus_tran_rfo",		0x0 },
421 	{ "PAPI_ca_itv",	"bus_tran_inval",	0x0 },
422 	{ "PAPI_tlb_im",	"itlb_miss",		0x0 },
423 	{ "PAPI_btac_m",	"btb_misses",		0x0 },
424 	{ "PAPI_hw_int",	"hw_int_rx",		0x0 },
425 	{ "PAPI_br_cn",		"br_inst_retired",	0x0 },
426 	{ "PAPI_br_tkn",	"br_taken_retired",	0x0 },
427 	{ "PAPI_br_msp",	"br_miss_pred_taken_ret", 0x0 },
428 	{ "PAPI_br_ins",	"br_inst_retired",	0x0 },
429 	{ "PAPI_res_stl",	"resource_stalls",	0x0 },
430 	{ "PAPI_tot_iis",	"inst_decoder",		0x0 },
431 	{ "PAPI_tot_ins",	"inst_retired",		0x0 },
432 	{ "PAPI_tot_cyc",	"cpu_clk_unhalted",	0x0 },
433 	{ "PAPI_l1_dcm",	"dcu_lines_in",		0x0 },
434 	{ "PAPI_l1_icm",	"l2_ifetch",		0xf },
435 	{ "PAPI_l1_tcm",	"l2_rqsts",		0xf },
436 	{ "PAPI_l1_dca",	"data_mem_refs",	0x0 },
437 	{ "PAPI_l1_stm",	"l2_st",		0xf },
438 	{ "PAPI_l2_icm",	"bus_tran_ifetch",	0x0 },
439 	{ "PAPI_l2_dcr",	"l2_ld",		0xf },
440 	{ "PAPI_l2_dcw",	"l2_st",		0xf },
441 	{ "PAPI_l2_tcm",	"l2_lines_in",		0x0 },
442 	{ "PAPI_l2_tca",	"l2_rqsts",		0xf },
443 	{ "PAPI_l2_tcw",	"l2_st",		0xf },
444 	{ "PAPI_l2_stm",	"l2_m_lines_inm",	0x0 },
445 	{ "PAPI_fp_ins",	"flops",		0x0 },
446 	{ "PAPI_fp_ops",	"flops",		0x0 },
447 	{ "PAPI_fml_ins",	"mul",			0x0 },
448 	{ "PAPI_fdv_ins",	"div",			0x0 }
449 };
450 
451 static const ptm_generic_event_t *P6_generic_names[2] = {
452 	_P6_generic_names,
453 	_P6_generic_names
454 };
455 
456 static const struct nametable **events;
457 static const ptm_generic_event_t **generic_events;
458 
459 #define	BITS(v, u, l)	\
460 	(((v) >> (l)) & ((1 << (1 + (u) - (l))) - 1))
461 
462 /*
463  * "Well known" bit fields in the Pentium CES register
464  * The interfaces in libcpc should make these #defines uninteresting.
465  */
466 #define	CPC_P5_CESR_ES0_SHIFT	0
467 #define	CPC_P5_CESR_ES0_MASK	0x3f
468 #define	CPC_P5_CESR_ES1_SHIFT	16
469 #define	CPC_P5_CESR_ES1_MASK	0x3f
470 
471 #define	CPC_P5_CESR_OS0		6
472 #define	CPC_P5_CESR_USR0	7
473 #define	CPC_P5_CESR_CLK0	8
474 #define	CPC_P5_CESR_PC0		9
475 #define	CPC_P5_CESR_OS1		(CPC_P5_CESR_OS0 + 16)
476 #define	CPC_P5_CESR_USR1	(CPC_P5_CESR_USR0 + 16)
477 #define	CPC_P5_CESR_CLK1	(CPC_P5_CESR_CLK0 + 16)
478 #define	CPC_P5_CESR_PC1		(CPC_P5_CESR_PC0 + 16)
479 
480 /*
481  * "Well known" bit fields in the Pentium Pro PerfEvtSel registers
482  * The interfaces in libcpc should make these #defines uninteresting.
483  */
484 #define	CPC_P6_PES_INV		23
485 #define	CPC_P6_PES_EN		22
486 #define	CPC_P6_PES_INT		20
487 #define	CPC_P6_PES_PC		19
488 #define	CPC_P6_PES_E		18
489 #define	CPC_P6_PES_OS		17
490 #define	CPC_P6_PES_USR		16
491 
492 #define	CPC_P6_PES_UMASK_SHIFT	8
493 #define	CPC_P6_PES_UMASK_MASK	(0xffu)
494 
495 #define	CPC_P6_PES_CMASK_SHIFT	24
496 #define	CPC_P6_PES_CMASK_MASK	(0xffu)
497 
498 #define	CPC_P6_PES_PIC0_MASK	(0xffu)
499 #define	CPC_P6_PES_PIC1_MASK	(0xffu)
500 
501 #define	P6_PES_EN	(UINT32_C(1) << CPC_P6_PES_EN)
502 #define	P6_PES_INT	(UINT32_C(1) << CPC_P6_PES_INT)
503 #define	P6_PES_OS	(UINT32_C(1) << CPC_P6_PES_OS)
504 
505 /*
506  * Pentium 5 attributes
507  */
508 #define	P5_NOEDGE	0x1	/* "noedge"	- no edge detection */
509 #define	P5_PC		0x2	/* "pc"		- pin control */
510 
511 /*
512  * Pentium 6 attributes
513  */
514 #define	P6_NOEDGE	0x1
515 #define	P6_PC		0x2
516 #define	P6_INV		0x4	/* "inv" - count inverted transitions */
517 #define	P6_INT		0x8	/* "int" - interrupt on overflow */
518 
519 /*
520  * CPU reference strings
521  */
522 
523 #define	P5_CPUREF	"See Appendix A.4 of the \"IA-32 Intel Architecture "  \
524 			"Software Developer's Manual Volume 3: System "	       \
525 			"Programming Guide,\" Order # 245472-012, 2003"
526 
527 #define	P6_CPUREF	"See Appendix A.3 of the \"IA-32 Intel Architecture "  \
528 			"Software Developer's Manual Volume 3: System "	       \
529 			"Programming Guide,\" Order # 245472-012, 2003"
530 
531 static int
532 ptm_pcbe_init(void)
533 {
534 	const struct nametable		*n;
535 	const ptm_generic_event_t	*gevp;
536 	int				i;
537 	size_t				size;
538 
539 	if (x86_feature & X86_MMX)
540 		ptm_rdpmc_avail = 1;
541 
542 	/*
543 	 * Discover type of CPU and set events pointer appropriately.
544 	 *
545 	 * Map family and model into the performance
546 	 * counter architectures we currently understand.
547 	 *
548 	 * See application note AP485 (from developer.intel.com)
549 	 * for further explanation.
550 	 */
551 	if (cpuid_getvendor(CPU) != X86_VENDOR_Intel)
552 		return (-1);
553 	switch (cpuid_getfamily(CPU)) {
554 	case 5:		/* Pentium and Pentium with MMX */
555 		events = P5mmx_names;
556 		generic_events = P5mmx_generic_names;
557 		ptm_ver = PTM_VER_P5;
558 		ptm_cpuref = P5_CPUREF;
559 		if (cpuid_getmodel(CPU) < 4)
560 			ptm_impl_name = "Pentium";
561 		else
562 			ptm_impl_name = "Pentium with MMX";
563 		break;
564 	case 6:		/* Pentium Pro and Pentium II and III */
565 		events = P6_names;
566 		generic_events = P6_generic_names;
567 		ptm_ver = PTM_VER_P6;
568 		ptm_cpuref = P6_CPUREF;
569 		ptm_pcbe_ops.pcbe_caps = CPC_CAP_OVERFLOW_INTERRUPT;
570 		if (x86_feature & X86_MMX)
571 			ptm_impl_name = "Pentium Pro with MMX, Pentium II";
572 		else
573 			ptm_impl_name = "Pentium Pro, Pentium II";
574 		break;
575 	default:
576 		return (-1);
577 	}
578 
579 	/*
580 	 * Initialize the list of events for each PIC.
581 	 * Do two passes: one to compute the size necessary and another
582 	 * to copy the strings. Need room for event, comma, and NULL terminator.
583 	 */
584 	for (i = 0; i < 2; i++) {
585 		size = 0;
586 		for (n = events[i]; n->bits != NT_END; n++)
587 			size += strlen(n->name) + 1;
588 		for (gevp = generic_events[i]; gevp->name != NULL; gevp++)
589 			size += strlen(gevp->name) + 1;
590 		pic_events[i] = kmem_alloc(size + 1, KM_SLEEP);
591 		*pic_events[i] = '\0';
592 		for (n = events[i]; n->bits != NT_END; n++) {
593 			(void) strcat(pic_events[i], n->name);
594 			(void) strcat(pic_events[i], ",");
595 		}
596 		for (gevp = generic_events[i]; gevp->name != NULL; gevp++) {
597 			(void) strcat(pic_events[i], gevp->name);
598 			(void) strcat(pic_events[i], ",");
599 		}
600 
601 		/*
602 		 * Remove trailing comma.
603 		 */
604 		pic_events[i][size - 1] = '\0';
605 	}
606 
607 	return (0);
608 }
609 
610 static uint_t
611 ptm_pcbe_ncounters(void)
612 {
613 	return (2);
614 }
615 
616 static const char *
617 ptm_pcbe_impl_name(void)
618 {
619 	return (ptm_impl_name);
620 }
621 
622 static const char *
623 ptm_pcbe_cpuref(void)
624 {
625 	return (ptm_cpuref);
626 }
627 
628 static char *
629 ptm_pcbe_list_events(uint_t picnum)
630 {
631 	ASSERT(picnum >= 0 && picnum < cpc_ncounters);
632 
633 	if (pic_events[0] == NULL) {
634 		ASSERT(pic_events[1] == NULL);
635 	}
636 
637 	return (pic_events[picnum]);
638 }
639 
640 static char *
641 ptm_pcbe_list_attrs(void)
642 {
643 	if (ptm_ver == PTM_VER_P5)
644 		return ("noedge,pc");
645 	else
646 		return ("noedge,pc,inv,int,umask,cmask");
647 }
648 
649 static const ptm_generic_event_t *
650 find_generic_event(int regno, char *name)
651 {
652 	const ptm_generic_event_t	*gevp;
653 
654 	for (gevp = generic_events[regno]; gevp->name != NULL; gevp++)
655 		if (strcmp(name, gevp->name) == 0)
656 			return (gevp);
657 
658 	return (NULL);
659 }
660 
661 static const struct nametable *
662 find_event(int regno, char *name)
663 {
664 	const struct nametable *n;
665 
666 	n = events[regno];
667 
668 	for (; n->bits != NT_END; n++)
669 		if (strcmp(name, n->name) == 0)
670 			return (n);
671 
672 	return (NULL);
673 }
674 
675 static uint64_t
676 ptm_pcbe_event_coverage(char *event)
677 {
678 	uint64_t bitmap = 0;
679 
680 	if ((find_event(0, event) != NULL) ||
681 	    (find_generic_event(0, event) != NULL))
682 		bitmap = 0x1;
683 	if ((find_event(1, event) != NULL) ||
684 	    (find_generic_event(1, event) != NULL))
685 		bitmap |= 0x2;
686 
687 	return (bitmap);
688 }
689 
690 static uint64_t
691 ptm_pcbe_overflow_bitmap(void)
692 {
693 	uint64_t	ret = 0;
694 	uint64_t	pes[2];
695 
696 	/*
697 	 * P5 is not capable of generating interrupts.
698 	 */
699 	ASSERT(ptm_ver == PTM_VER_P6);
700 
701 	/*
702 	 * CPC could have caused an interrupt provided that
703 	 *
704 	 * 1) Counters are enabled
705 	 * 2) Either counter has requested an interrupt
706 	 */
707 
708 	pes[0] = rdmsr(REG_PERFEVNT0);
709 	if (((uint32_t)pes[0] & P6_PES_EN) != P6_PES_EN)
710 		return (0);
711 
712 	/*
713 	 * If a particular counter requested an interrupt, assume it caused
714 	 * this interrupt. There is no way to determine which counter overflowed
715 	 * on this hardware other than by using unreliable heuristics.
716 	 */
717 
718 	pes[1] = rdmsr(REG_PERFEVNT1);
719 	if ((uint32_t)pes[0] & P6_PES_INT)
720 		ret |= 0x1;
721 	if ((uint32_t)pes[1] & P6_PES_INT)
722 		ret |= 0x2;
723 
724 	return (ret);
725 }
726 
727 /*ARGSUSED*/
728 static int
729 ptm_pcbe_configure(uint_t picnum, char *eventname, uint64_t preset,
730     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
731     void *token)
732 {
733 	ptm_pcbe_config_t		*conf;
734 	const struct nametable		*n;
735 	const ptm_generic_event_t	*gevp;
736 	struct nametable		nt_raw = { 0, "raw" };
737 	int				i;
738 	int				ptm_flags = 0;
739 
740 	/*
741 	 * If we've been handed an existing configuration, we need only preset
742 	 * the counter value.
743 	 */
744 	if (*data != NULL) {
745 		conf = *data;
746 		conf->ptm_rawpic = trunc3931(preset);
747 		return (0);
748 	}
749 
750 	if (picnum != 0 && picnum != 1)
751 		return (CPC_INVALID_PICNUM);
752 
753 	conf = kmem_alloc(sizeof (ptm_pcbe_config_t), KM_SLEEP);
754 
755 	conf->ptm_picno = picnum;
756 	conf->ptm_rawpic = trunc3931(preset);
757 	conf->ptm_ctl = 0;
758 
759 	if ((n = find_event(picnum, eventname)) == NULL) {
760 		if ((gevp = find_generic_event(picnum, eventname)) != NULL) {
761 			n = find_event(picnum, gevp->event);
762 			ASSERT(n != NULL);
763 
764 			if (nattrs > 0) {
765 				kmem_free(conf, sizeof (ptm_pcbe_config_t));
766 				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
767 			}
768 
769 			if (ptm_ver == PTM_VER_P6)
770 				conf->ptm_ctl |= gevp->umask <<
771 				    CPC_P6_PES_UMASK_SHIFT;
772 		} else {
773 			long tmp;
774 
775 			/*
776 			 * If ddi_strtol() likes this event, use it as a raw
777 			 * event code.
778 			 */
779 			if (ddi_strtol(eventname, NULL, 0, &tmp) != 0) {
780 				kmem_free(conf, sizeof (ptm_pcbe_config_t));
781 				return (CPC_INVALID_EVENT);
782 			}
783 
784 			nt_raw.bits = tmp;
785 
786 			if (ptm_ver == PTM_VER_P5)
787 				nt_raw.bits &= CPC_P5_CESR_ES0_MASK;
788 			else
789 				nt_raw.bits &= CPC_P6_PES_PIC0_MASK;
790 
791 			n = &nt_raw;
792 		}
793 	}
794 
795 	if (ptm_ver == PTM_VER_P5) {
796 		int picshift;
797 		picshift = (picnum == 0) ? 0 : 16;
798 
799 		for (i = 0; i < nattrs; i++) {
800 			/*
801 			 * Value of these attributes is ignored; their presence
802 			 * alone tells us to set the corresponding flag.
803 			 */
804 			if (strncmp(attrs[i].ka_name, "noedge", 7) == 0) {
805 				if (attrs[i].ka_val != 0)
806 					ptm_flags |= P5_NOEDGE;
807 			} else if (strncmp(attrs[i].ka_name, "pc", 3) == 0) {
808 				if (attrs[i].ka_val != 0)
809 					ptm_flags |= P5_PC;
810 			} else {
811 				kmem_free(conf, sizeof (ptm_pcbe_config_t));
812 				return (CPC_INVALID_ATTRIBUTE);
813 			}
814 		}
815 
816 		if (flags & CPC_COUNT_USER)
817 			conf->ptm_ctl |= (1 << (CPC_P5_CESR_USR0 + picshift));
818 		if (flags & CPC_COUNT_SYSTEM)
819 			conf->ptm_ctl |= (1 << (CPC_P5_CESR_OS0 + picshift));
820 		if (ptm_flags & P5_NOEDGE)
821 			conf->ptm_ctl |= (1 << (CPC_P5_CESR_CLK0 + picshift));
822 		if (ptm_flags & P5_PC)
823 			conf->ptm_ctl |= (1 << (CPC_P5_CESR_PC0 + picshift));
824 
825 		ASSERT((n->bits | CPC_P5_CESR_ES0_MASK) ==
826 		    CPC_P5_CESR_ES0_MASK);
827 
828 		conf->ptm_ctl |= (n->bits << picshift);
829 	} else {
830 		for (i = 0; i < nattrs; i++) {
831 			if (strncmp(attrs[i].ka_name, "noedge", 6) == 0) {
832 				if (attrs[i].ka_val != 0)
833 					ptm_flags |= P6_NOEDGE;
834 			} else if (strncmp(attrs[i].ka_name, "pc", 2) == 0) {
835 				if (attrs[i].ka_val != 0)
836 					ptm_flags |= P6_PC;
837 			} else if (strncmp(attrs[i].ka_name, "inv", 3) == 0) {
838 				if (attrs[i].ka_val != 0)
839 					ptm_flags |= P6_INV;
840 			} else if (strncmp(attrs[i].ka_name, "umask", 5) == 0) {
841 				if ((attrs[i].ka_val | CPC_P6_PES_UMASK_MASK) !=
842 				    CPC_P6_PES_UMASK_MASK) {
843 					kmem_free(conf,
844 					    sizeof (ptm_pcbe_config_t));
845 					return (CPC_ATTRIBUTE_OUT_OF_RANGE);
846 				}
847 				conf->ptm_ctl |= (uint8_t)attrs[i].ka_val <<
848 				    CPC_P6_PES_UMASK_SHIFT;
849 			} else if (strncmp(attrs[i].ka_name, "cmask", 5) == 0) {
850 				if ((attrs[i].ka_val | CPC_P6_PES_CMASK_MASK) !=
851 				    CPC_P6_PES_CMASK_MASK) {
852 					kmem_free(conf,
853 					    sizeof (ptm_pcbe_config_t));
854 					return (CPC_ATTRIBUTE_OUT_OF_RANGE);
855 				}
856 				conf->ptm_ctl |= (uint8_t)attrs[i].ka_val <<
857 				    CPC_P6_PES_CMASK_SHIFT;
858 			} else if (strncmp(attrs[i].ka_name, "int", 3) == 0) {
859 				if (attrs[i].ka_val != 0)
860 					ptm_flags |= P6_INT;
861 			} else {
862 				kmem_free(conf, sizeof (ptm_pcbe_config_t));
863 				return (CPC_INVALID_ATTRIBUTE);
864 			}
865 		}
866 
867 		if (flags & CPC_OVF_NOTIFY_EMT)
868 			/*
869 			 * If the user has requested notification of overflows,
870 			 * we automatically program the hardware to generate
871 			 * overflow interrupts.
872 			 */
873 			ptm_flags |= P6_INT;
874 		if (flags & CPC_COUNT_USER)
875 			conf->ptm_ctl |= (1 << CPC_P6_PES_USR);
876 		if (flags & CPC_COUNT_SYSTEM)
877 			conf->ptm_ctl |= (1 << CPC_P6_PES_OS);
878 		if ((ptm_flags & P6_NOEDGE) == 0)
879 			conf->ptm_ctl |= (1 << CPC_P6_PES_E);
880 		if (ptm_flags & P6_PC)
881 			conf->ptm_ctl |= (1 << CPC_P6_PES_PC);
882 		if (ptm_flags & P6_INV)
883 			conf->ptm_ctl |= (1 << CPC_P6_PES_INV);
884 		if (ptm_flags & P6_INT)
885 			conf->ptm_ctl |= (1 << CPC_P6_PES_INT);
886 
887 		ASSERT((n->bits | CPC_P6_PES_PIC0_MASK) ==
888 		    CPC_P6_PES_PIC0_MASK);
889 
890 		conf->ptm_ctl |= n->bits;
891 	}
892 
893 	*data = conf;
894 	return (0);
895 }
896 
897 static void
898 ptm_pcbe_program(void *token)
899 {
900 	ptm_pcbe_config_t	*pic0;
901 	ptm_pcbe_config_t	*pic1;
902 	ptm_pcbe_config_t	*tmp;
903 	ptm_pcbe_config_t	empty = { 1, 0, 0 }; /* assume pic1 to start */
904 
905 	if ((pic0 = kcpc_next_config(token, NULL, NULL)) == NULL)
906 		panic("ptm_pcbe: token %p has no configs", token);
907 
908 	if ((pic1 = kcpc_next_config(token, pic0, NULL)) == NULL)
909 		pic1 = &empty;
910 
911 	if (pic0->ptm_picno != 0) {
912 		empty.ptm_picno = 0;
913 		tmp = pic1;
914 		pic1 = pic0;
915 		pic0 = tmp;
916 	}
917 
918 	ASSERT(pic0->ptm_picno == 0 && pic1->ptm_picno == 1);
919 
920 	if (ptm_rdpmc_avail) {
921 		ulong_t curcr4 = getcr4();
922 		if (kcpc_allow_nonpriv(token))
923 			setcr4(curcr4 | CR4_PCE);
924 		else
925 			setcr4(curcr4 & ~CR4_PCE);
926 	}
927 
928 	if (ptm_ver == PTM_VER_P5) {
929 		wrmsr(P5_CESR, ALL_STOPPED);
930 		wrmsr(P5_CTR0, pic0->ptm_rawpic);
931 		wrmsr(P5_CTR1, pic1->ptm_rawpic);
932 		wrmsr(P5_CESR, pic0->ptm_ctl | pic1->ptm_ctl);
933 		pic0->ptm_rawpic = rdmsr(P5_CTR0);
934 		pic1->ptm_rawpic = rdmsr(P5_CTR1);
935 	} else {
936 		uint64_t	pes;
937 		wrmsr(REG_PERFEVNT0, ALL_STOPPED);
938 		wrmsr(REG_PERFCTR0, pic0->ptm_rawpic);
939 		wrmsr(REG_PERFCTR1, pic1->ptm_rawpic);
940 		pes = pic1->ptm_ctl;
941 		DTRACE_PROBE1(ptm__pes1, uint64_t, pes);
942 		wrmsr(REG_PERFEVNT1, pes);
943 		pes = pic0->ptm_ctl | (1 << CPC_P6_PES_EN);
944 		DTRACE_PROBE1(ptm__pes0, uint64_t, pes);
945 		wrmsr(REG_PERFEVNT0, pes);
946 	}
947 }
948 
949 static void
950 ptm_pcbe_allstop(void)
951 {
952 	if (ptm_ver == PTM_VER_P5)
953 		wrmsr(P5_CESR, ALL_STOPPED);
954 	else {
955 		wrmsr(REG_PERFEVNT0, ALL_STOPPED);
956 		setcr4(getcr4() & ~CR4_PCE);
957 	}
958 }
959 
960 static void
961 ptm_pcbe_sample(void *token)
962 {
963 	ptm_pcbe_config_t	*pic0;
964 	ptm_pcbe_config_t	*pic1;
965 	ptm_pcbe_config_t	*swap;
966 	ptm_pcbe_config_t	empty = { 1, 0, 0 }; /* assume pic1 to start */
967 	uint64_t		tmp;
968 	uint64_t		*pic0_data;
969 	uint64_t		*pic1_data;
970 	uint64_t		*dtmp;
971 	uint64_t		curpic[2];
972 
973 	if ((pic0 = kcpc_next_config(token, NULL, &pic0_data)) == NULL)
974 		panic("ptm_pcbe: token %p has no configs", token);
975 
976 	if ((pic1 = kcpc_next_config(token, pic0, &pic1_data)) == NULL) {
977 		pic1 = &empty;
978 		pic1_data = &tmp;
979 	}
980 
981 	if (pic0->ptm_picno != 0) {
982 		empty.ptm_picno = 0;
983 		swap = pic0;
984 		pic0 = pic1;
985 		pic1 = swap;
986 		dtmp = pic0_data;
987 		pic0_data = pic1_data;
988 		pic1_data = dtmp;
989 	}
990 
991 	ASSERT(pic0->ptm_picno == 0 && pic1->ptm_picno == 1);
992 
993 	if (ptm_ver == PTM_VER_P5) {
994 		curpic[0] = rdmsr(P5_CTR0);
995 		curpic[1] = rdmsr(P5_CTR1);
996 	} else {
997 		curpic[0] = rdmsr(REG_PERFCTR0);
998 		curpic[1] = rdmsr(REG_PERFCTR1);
999 	}
1000 
1001 	DTRACE_PROBE1(ptm__curpic0, uint64_t, curpic[0]);
1002 	DTRACE_PROBE1(ptm__curpic1, uint64_t, curpic[1]);
1003 
1004 	*pic0_data += diff3931(curpic[0], pic0->ptm_rawpic);
1005 	pic0->ptm_rawpic = trunc3931(*pic0_data);
1006 
1007 	*pic1_data += diff3931(curpic[1], pic1->ptm_rawpic);
1008 	pic1->ptm_rawpic = trunc3931(*pic1_data);
1009 }
1010 
1011 static void
1012 ptm_pcbe_free(void *config)
1013 {
1014 	kmem_free(config, sizeof (ptm_pcbe_config_t));
1015 }
1016 
1017 /*
1018  * Virtualizes the 40-bit field of the %pic
1019  * register into a 64-bit software register.
1020  *
1021  * We can retrieve 40 (signed) bits from the counters,
1022  * but we can set only 32 (signed) bits into the counters.
1023  * This makes virtualizing more than 31-bits of registers
1024  * quite tricky.
1025  *
1026  * If bits 39 to 31 are set in the virtualized pic register,
1027  * then we can preset the counter to this value using the fact
1028  * that wrmsr sign extends bit 31.   Though it might look easier
1029  * to only use the bottom 31-bits of the register, we have to allow
1030  * the full 40-bits to be used to perform overflow profiling.
1031  */
1032 
1033 #define	MASK40		UINT64_C(0xffffffffff)
1034 #define	MASK31		UINT64_C(0x7fffffff)
1035 #define	BITS_39_31	UINT64_C(0xff80000000)
1036 
1037 static int64_t
1038 diff3931(uint64_t sample, uint64_t old)
1039 {
1040 	int64_t diff;
1041 
1042 	if ((old & BITS_39_31) == BITS_39_31) {
1043 		diff = (MASK40 & sample) - old;
1044 		if (diff < 0)
1045 			diff += (UINT64_C(1) << 40);
1046 	} else {
1047 		diff = (MASK31 & sample) - old;
1048 		if (diff < 0)
1049 			diff += (UINT64_C(1) << 31);
1050 	}
1051 	return (diff);
1052 }
1053 
1054 static uint64_t
1055 trunc3931(uint64_t value)
1056 {
1057 	if ((value & BITS_39_31) == BITS_39_31)
1058 		return (MASK40 & value);
1059 	return (MASK31 & value);
1060 }
1061 
1062 static struct modlpcbe modlpcbe = {
1063 	&mod_pcbeops,
1064 	"Pentium Performance Counters",
1065 	&ptm_pcbe_ops
1066 };
1067 
1068 static struct modlinkage modl = {
1069 	MODREV_1,
1070 	&modlpcbe,
1071 };
1072 
1073 int
1074 _init(void)
1075 {
1076 	if (ptm_pcbe_init() != 0)
1077 		return (ENOTSUP);
1078 	return (mod_install(&modl));
1079 }
1080 
1081 int
1082 _fini(void)
1083 {
1084 	return (mod_remove(&modl));
1085 }
1086 
1087 int
1088 _info(struct modinfo *mi)
1089 {
1090 	return (mod_info(&modl, mi));
1091 }
1092