xref: /titanic_41/usr/src/uts/intel/pcbe/p123_pcbe.c (revision 938d11f4dc1913fa271733c9057f13109fd80cdb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 /*
26  * This file contains preset event names from the Performance Application
27  * Programming Interface v3.5 which included the following notice:
28  *
29  *                             Copyright (c) 2005,6
30  *                           Innovative Computing Labs
31  *                         Computer Science Department,
32  *                            University of Tennessee,
33  *                                 Knoxville, TN.
34  *                              All Rights Reserved.
35  *
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions are met:
39  *
40  *    * Redistributions of source code must retain the above copyright notice,
41  *      this list of conditions and the following disclaimer.
42  *    * Redistributions in binary form must reproduce the above copyright
43  *      notice, this list of conditions and the following disclaimer in the
44  *      documentation and/or other materials provided with the distribution.
45  *    * Neither the name of the University of Tennessee nor the names of its
46  *      contributors may be used to endorse or promote products derived from
47  *      this software without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
50  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
53  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
54  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
55  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
56  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
57  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
58  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
59  * POSSIBILITY OF SUCH DAMAGE.
60  *
61  *
62  * This open source software license conforms to the BSD License template.
63  */
64 
65 /*
66  * Performance Counter Back-End for Pentiums I, II, and III.
67  */
68 
69 #include <sys/cpuvar.h>
70 #include <sys/param.h>
71 #include <sys/cpc_impl.h>
72 #include <sys/cpc_pcbe.h>
73 #include <sys/modctl.h>
74 #include <sys/inttypes.h>
75 #include <sys/systm.h>
76 #include <sys/cmn_err.h>
77 #include <sys/x86_archext.h>
78 #include <sys/sdt.h>
79 #include <sys/archsystm.h>
80 #include <sys/privregs.h>
81 #include <sys/ddi.h>
82 #include <sys/sunddi.h>
83 
84 static int64_t diff3931(uint64_t sample, uint64_t old);
85 static uint64_t trunc3931(uint64_t value);
86 
87 static int ptm_pcbe_init(void);
88 static uint_t ptm_pcbe_ncounters(void);
89 static const char *ptm_pcbe_impl_name(void);
90 static const char *ptm_pcbe_cpuref(void);
91 static char *ptm_pcbe_list_events(uint_t picnum);
92 static char *ptm_pcbe_list_attrs(void);
93 static uint64_t ptm_pcbe_event_coverage(char *event);
94 static int ptm_pcbe_pic_index(char *picname);
95 static uint64_t	ptm_pcbe_overflow_bitmap(void);
96 static int ptm_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
97     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
98     void *token);
99 static void ptm_pcbe_program(void *token);
100 static void ptm_pcbe_allstop(void);
101 static void ptm_pcbe_sample(void *token);
102 static void ptm_pcbe_free(void *config);
103 
104 pcbe_ops_t ptm_pcbe_ops = {
105 	PCBE_VER_1,
106 	0,
107 	ptm_pcbe_ncounters,
108 	ptm_pcbe_impl_name,
109 	ptm_pcbe_cpuref,
110 	ptm_pcbe_list_events,
111 	ptm_pcbe_list_attrs,
112 	ptm_pcbe_event_coverage,
113 	ptm_pcbe_overflow_bitmap,
114 	ptm_pcbe_configure,
115 	ptm_pcbe_program,
116 	ptm_pcbe_allstop,
117 	ptm_pcbe_sample,
118 	ptm_pcbe_free
119 };
120 
121 typedef enum _ptm_ver {
122 	PTM_VER_P5,
123 	PTM_VER_P6
124 } ptm_ver_t;
125 
126 static ptm_ver_t ptm_ver;
127 static const char *ptm_impl_name;
128 static const char *ptm_cpuref;
129 static char *pic_events[2] = { NULL, NULL };
130 
131 /*
132  * Indicates whether the "rdpmc" instruction is available on this processor.
133  */
134 static int ptm_rdpmc_avail = 0;
135 
136 #define	ALL_STOPPED	0ULL
137 
138 typedef struct _ptm_pcbe_config {
139 	uint8_t		ptm_picno;	/* 0 for pic0 or 1 for pic1 */
140 	uint32_t	ptm_ctl;    /* P6: PerfEventSelect; P5: cesr, shifted */
141 	uint64_t	ptm_rawpic;
142 } ptm_pcbe_config_t;
143 
144 struct nametable {
145 	uint8_t		bits;
146 	const char	*name;
147 };
148 
149 typedef struct _ptm_generic_events {
150 	char *name;
151 	char *event;
152 	uint8_t umask;
153 } ptm_generic_event_t;
154 
155 #define	NT_END 0xFF
156 #define	CPC_GEN_END { NULL, NULL }
157 
158 /*
159  * Basic Pentium events
160  */
161 #define	P5_EVENTS				\
162 	{0x0,	"data_read"},			\
163 	{0x1,	"data_write"},			\
164 	{0x2,	"data_tlb_miss"},		\
165 	{0x3,	"data_read_miss"},		\
166 	{0x4,	"data_write_miss"},		\
167 	{0x5,	"write_hit_to_M_or_E"},		\
168 	{0x6,	"dcache_lines_wrback"},		\
169 	{0x7,	"external_snoops"},		\
170 	{0x8,	"external_dcache_snoop_hits"},	\
171 	{0x9,	"memory_access_in_both_pipes"},	\
172 	{0xa,	"bank_conflicts"},		\
173 	{0xb,	"misaligned_ref"},		\
174 	{0xc,	"code_read"},			\
175 	{0xd,	"code_tlb_miss"},		\
176 	{0xe,	"code_cache_miss"},		\
177 	{0xf,	"any_segreg_loaded"},		\
178 	{0x12,	"branches"},			\
179 	{0x13,	"btb_hits"},			\
180 	{0x14,	"taken_or_btb_hit"},		\
181 	{0x15,	"pipeline_flushes"},		\
182 	{0x16,	"instr_exec"},			\
183 	{0x17,	"instr_exec_V_pipe"},		\
184 	{0x18,	"clks_bus_cycle"},		\
185 	{0x19,	"clks_full_wbufs"},		\
186 	{0x1a,	"pipe_stall_read"},		\
187 	{0x1b,	"stall_on_write_ME"},		\
188 	{0x1c,	"locked_bus_cycle"},		\
189 	{0x1d,	"io_rw_cycles"},		\
190 	{0x1e,	"reads_noncache_mem"},		\
191 	{0x1f,	"pipeline_agi_stalls"},		\
192 	{0x22,	"flops"},			\
193 	{0x23,	"bp_match_dr0"},		\
194 	{0x24,	"bp_match_dr1"},		\
195 	{0x25,	"bp_match_dr2"},		\
196 	{0x26,	"bp_match_dr3"},		\
197 	{0x27,	"hw_intrs"},			\
198 	{0x28,	"data_rw"},			\
199 	{0x29,	"data_rw_miss"}
200 
201 static const struct nametable P5mmx_names0[] = {
202 	P5_EVENTS,
203 	{0x2a,	"bus_ownership_latency"},
204 	{0x2b,	"mmx_instr_upipe"},
205 	{0x2c,	"cache_M_line_sharing"},
206 	{0x2d,	"emms_instr"},
207 	{0x2e,	"bus_util_processor"},
208 	{0x2f,	"sat_mmx_instr"},
209 	{0x30,	"clks_not_HLT"},
210 	{0x31,	"mmx_data_read"},
211 	{0x32,	"clks_fp_stall"},
212 	{0x33,	"d1_starv_fifo_0"},
213 	{0x34,	"mmx_data_write"},
214 	{0x35,	"pipe_flush_wbp"},
215 	{0x36,	"mmx_misalign_data_refs"},
216 	{0x37,	"rets_pred_incorrect"},
217 	{0x38,	"mmx_multiply_unit_interlock"},
218 	{0x39,	"rets"},
219 	{0x3a,	"btb_false_entries"},
220 	{0x3b,	"clocks_stall_full_wb"},
221 	{NT_END, ""}
222 };
223 
224 static const struct nametable P5mmx_names1[] = {
225 	P5_EVENTS,
226 	{0x2a,	"bus_ownership_transfers"},
227 	{0x2b,	"mmx_instr_vpipe"},
228 	{0x2c,	"cache_lint_sharing"},
229 	{0x2d,	"mmx_fp_transitions"},
230 	{0x2e,	"writes_noncache_mem"},
231 	{0x2f,	"sats_performed"},
232 	{0x30,	"clks_dcache_tlb_miss"},
233 	{0x31,	"mmx_data_read_miss"},
234 	{0x32,	"taken_br"},
235 	{0x33,	"d1_starv_fifo_1"},
236 	{0x34,	"mmx_data_write_miss"},
237 	{0x35,	"pipe_flush_wbp_wb"},
238 	{0x36,	"mmx_pipe_stall_data_read"},
239 	{0x37,	"rets_pred"},
240 	{0x38,	"movd_movq_stall"},
241 	{0x39,	"rsb_overflow"},
242 	{0x3a,	"btb_mispred_nt"},
243 	{0x3b,	"mmx_stall_write_ME"},
244 	{NT_END, ""}
245 };
246 
247 static const struct nametable *P5mmx_names[2] = {
248 	P5mmx_names0,
249 	P5mmx_names1
250 };
251 
252 /*
253  * Pentium Pro and Pentium II events
254  */
255 static const struct nametable _P6_names[] = {
256 	/*
257 	 * Data cache unit
258 	 */
259 	{0x43,	"data_mem_refs"},
260 	{0x45,	"dcu_lines_in"},
261 	{0x46,	"dcu_m_lines_in"},
262 	{0x47,	"dcu_m_lines_out"},
263 	{0x48,	"dcu_miss_outstanding"},
264 
265 	/*
266 	 * Instruction fetch unit
267 	 */
268 	{0x80,	"ifu_ifetch"},
269 	{0x81,	"ifu_ifetch_miss"},
270 	{0x85,	"itlb_miss"},
271 	{0x86,	"ifu_mem_stall"},
272 	{0x87,	"ild_stall"},
273 
274 	/*
275 	 * L2 cache
276 	 */
277 	{0x28,	"l2_ifetch"},
278 	{0x29,	"l2_ld"},
279 	{0x2a,	"l2_st"},
280 	{0x24,	"l2_lines_in"},
281 	{0x26,	"l2_lines_out"},
282 	{0x25,	"l2_m_lines_inm"},
283 	{0x27,	"l2_m_lines_outm"},
284 	{0x2e,	"l2_rqsts"},
285 	{0x21,	"l2_ads"},
286 	{0x22,	"l2_dbus_busy"},
287 	{0x23,	"l2_dbus_busy_rd"},
288 
289 	/*
290 	 * External bus logic
291 	 */
292 	{0x62,	"bus_drdy_clocks"},
293 	{0x63,	"bus_lock_clocks"},
294 	{0x60,	"bus_req_outstanding"},
295 	{0x65,	"bus_tran_brd"},
296 	{0x66,	"bus_tran_rfo"},
297 	{0x67,	"bus_trans_wb"},
298 	{0x68,	"bus_tran_ifetch"},
299 	{0x69,	"bus_tran_inval"},
300 	{0x6a,	"bus_tran_pwr"},
301 	{0x6b,	"bus_trans_p"},
302 	{0x6c,	"bus_trans_io"},
303 	{0x6d,	"bus_tran_def"},
304 	{0x6e,	"bus_tran_burst"},
305 	{0x70,	"bus_tran_any"},
306 	{0x6f,	"bus_tran_mem"},
307 	{0x64,	"bus_data_rcv"},
308 	{0x61,	"bus_bnr_drv"},
309 	{0x7a,	"bus_hit_drv"},
310 	{0x7b,	"bus_hitm_drv"},
311 	{0x7e,	"bus_snoop_stall"},
312 
313 	/*
314 	 * Floating point unit
315 	 */
316 	{0xc1,	"flops"},		/* 0 only */
317 	{0x10,	"fp_comp_ops_exe"},	/* 0 only */
318 	{0x11,	"fp_assist"},		/* 1 only */
319 	{0x12,	"mul"},			/* 1 only */
320 	{0x13,	"div"},			/* 1 only */
321 	{0x14,	"cycles_div_busy"},	/* 0 only */
322 
323 	/*
324 	 * Memory ordering
325 	 */
326 	{0x3,	"ld_blocks"},
327 	{0x4,	"sb_drains"},
328 	{0x5,	"misalign_mem_ref"},
329 
330 	/*
331 	 * Instruction decoding and retirement
332 	 */
333 	{0xc0,	"inst_retired"},
334 	{0xc2,	"uops_retired"},
335 	{0xd0,	"inst_decoder"},
336 
337 	/*
338 	 * Interrupts
339 	 */
340 	{0xc8,	"hw_int_rx"},
341 	{0xc6,	"cycles_int_masked"},
342 	{0xc7,	"cycles_int_pending_and_masked"},
343 
344 	/*
345 	 * Branches
346 	 */
347 	{0xc4,	"br_inst_retired"},
348 	{0xc5,	"br_miss_pred_retired"},
349 	{0xc9,	"br_taken_retired"},
350 	{0xca,	"br_miss_pred_taken_ret"},
351 	{0xe0,	"br_inst_decoded"},
352 	{0xe2,	"btb_misses"},
353 	{0xe4,	"br_bogus"},
354 	{0xe6,	"baclears"},
355 
356 	/*
357 	 * Stalls
358 	 */
359 	{0xa2,	"resource_stalls"},
360 	{0xd2,	"partial_rat_stalls"},
361 
362 	/*
363 	 * Segment register loads
364 	 */
365 	{0x6,	"segment_reg_loads"},
366 
367 	/*
368 	 * Clocks
369 	 */
370 	{0x79,	"cpu_clk_unhalted"},
371 
372 	/*
373 	 * MMX
374 	 */
375 	{0xb0,	"mmx_instr_exec"},
376 	{0xb1,	"mmx_sat_instr_exec"},
377 	{0xb2,	"mmx_uops_exec"},
378 	{0xb3,	"mmx_instr_type_exec"},
379 	{0xcc,	"fp_mmx_trans"},
380 	{0xcd,	"mmx_assists"},
381 	{0xce,	"mmx_instr_ret"},
382 	{0xd4,	"seg_rename_stalls"},
383 	{0xd5,	"seg_reg_renames"},
384 	{0xd6,	"ret_seg_renames"},
385 
386 	{NT_END, ""}
387 };
388 
389 static const struct nametable *P6_names[2] = {
390 	_P6_names,
391 	_P6_names
392 };
393 
394 #define	P5_GENERIC_EVENTS					\
395 	{ "PAPI_tot_ins",	"instr_exec",	 0x0 },		\
396 	{ "PAPI_tlb_dm",	"data_tlb_miss", 0x0 },		\
397 	{ "PAPI_tlb_im",	"code_tlb_miss", 0x0 },		\
398 	{ "PAPI_fp_ops",	"flops" }
399 
400 static const ptm_generic_event_t P5mmx_generic_names0[] = {
401 	P5_GENERIC_EVENTS,
402 	{ "PAPI_tot_cyc",	"clks_not_HLT", 0x0 },
403 	CPC_GEN_END
404 };
405 
406 static const ptm_generic_event_t P5mmx_generic_names1[] = {
407 	P5_GENERIC_EVENTS,
408 	{ "PAPI_br_ins",	"taken_br",	0x0 },
409 	CPC_GEN_END
410 };
411 
412 static const ptm_generic_event_t *P5mmx_generic_names[2] = {
413 	P5mmx_generic_names0,
414 	P5mmx_generic_names1
415 };
416 
417 static const ptm_generic_event_t _P6_generic_names[] = {
418 	{ "PAPI_ca_shr",	"l2_ifetch",		0xf },
419 	{ "PAPI_ca_cln",	"bus_tran_rfo",		0x0 },
420 	{ "PAPI_ca_itv",	"bus_tran_inval",	0x0 },
421 	{ "PAPI_tlb_im",	"itlb_miss",		0x0 },
422 	{ "PAPI_btac_m",	"btb_misses",		0x0 },
423 	{ "PAPI_hw_int",	"hw_int_rx",		0x0 },
424 	{ "PAPI_br_cn",		"br_inst_retired",	0x0 },
425 	{ "PAPI_br_tkn",	"br_taken_retired",	0x0 },
426 	{ "PAPI_br_msp",	"br_miss_pred_taken_ret", 0x0 },
427 	{ "PAPI_br_ins",	"br_inst_retired",	0x0 },
428 	{ "PAPI_res_stl",	"resource_stalls",	0x0 },
429 	{ "PAPI_tot_iis",	"inst_decoder",		0x0 },
430 	{ "PAPI_tot_ins",	"inst_retired",		0x0 },
431 	{ "PAPI_tot_cyc",	"cpu_clk_unhalted",	0x0 },
432 	{ "PAPI_l1_dcm",	"dcu_lines_in",		0x0 },
433 	{ "PAPI_l1_icm",	"l2_ifetch",		0xf },
434 	{ "PAPI_l1_tcm",	"l2_rqsts",		0xf },
435 	{ "PAPI_l1_dca",	"data_mem_refs",	0x0 },
436 	{ "PAPI_l1_stm",	"l2_st",		0xf },
437 	{ "PAPI_l2_icm",	"bus_tran_ifetch",	0x0 },
438 	{ "PAPI_l2_dcr",	"l2_ld",		0xf },
439 	{ "PAPI_l2_dcw",	"l2_st",		0xf },
440 	{ "PAPI_l2_tcm",	"l2_lines_in",		0x0 },
441 	{ "PAPI_l2_tca",	"l2_rqsts",		0xf },
442 	{ "PAPI_l2_tcw",	"l2_st",		0xf },
443 	{ "PAPI_l2_stm",	"l2_m_lines_inm",	0x0 },
444 	{ "PAPI_fp_ins",	"flops",		0x0 },
445 	{ "PAPI_fp_ops",	"flops",		0x0 },
446 	{ "PAPI_fml_ins",	"mul",			0x0 },
447 	{ "PAPI_fdv_ins",	"div",			0x0 }
448 };
449 
450 static const ptm_generic_event_t *P6_generic_names[2] = {
451 	_P6_generic_names,
452 	_P6_generic_names
453 };
454 
455 static const struct nametable **events;
456 static const ptm_generic_event_t **generic_events;
457 
458 #define	BITS(v, u, l)	\
459 	(((v) >> (l)) & ((1 << (1 + (u) - (l))) - 1))
460 
461 /*
462  * "Well known" bit fields in the Pentium CES register
463  * The interfaces in libcpc should make these #defines uninteresting.
464  */
465 #define	CPC_P5_CESR_ES0_SHIFT	0
466 #define	CPC_P5_CESR_ES0_MASK	0x3f
467 #define	CPC_P5_CESR_ES1_SHIFT	16
468 #define	CPC_P5_CESR_ES1_MASK	0x3f
469 
470 #define	CPC_P5_CESR_OS0		6
471 #define	CPC_P5_CESR_USR0	7
472 #define	CPC_P5_CESR_CLK0	8
473 #define	CPC_P5_CESR_PC0		9
474 #define	CPC_P5_CESR_OS1		(CPC_P5_CESR_OS0 + 16)
475 #define	CPC_P5_CESR_USR1	(CPC_P5_CESR_USR0 + 16)
476 #define	CPC_P5_CESR_CLK1	(CPC_P5_CESR_CLK0 + 16)
477 #define	CPC_P5_CESR_PC1		(CPC_P5_CESR_PC0 + 16)
478 
479 /*
480  * "Well known" bit fields in the Pentium Pro PerfEvtSel registers
481  * The interfaces in libcpc should make these #defines uninteresting.
482  */
483 #define	CPC_P6_PES_INV		23
484 #define	CPC_P6_PES_EN		22
485 #define	CPC_P6_PES_INT		20
486 #define	CPC_P6_PES_PC		19
487 #define	CPC_P6_PES_E		18
488 #define	CPC_P6_PES_OS		17
489 #define	CPC_P6_PES_USR		16
490 
491 #define	CPC_P6_PES_UMASK_SHIFT	8
492 #define	CPC_P6_PES_UMASK_MASK	(0xffu)
493 
494 #define	CPC_P6_PES_CMASK_SHIFT	24
495 #define	CPC_P6_PES_CMASK_MASK	(0xffu)
496 
497 #define	CPC_P6_PES_PIC0_MASK	(0xffu)
498 #define	CPC_P6_PES_PIC1_MASK	(0xffu)
499 
500 #define	P6_PES_EN	(UINT32_C(1) << CPC_P6_PES_EN)
501 #define	P6_PES_INT	(UINT32_C(1) << CPC_P6_PES_INT)
502 #define	P6_PES_OS	(UINT32_C(1) << CPC_P6_PES_OS)
503 
504 /*
505  * Pentium 5 attributes
506  */
507 #define	P5_NOEDGE	0x1	/* "noedge"	- no edge detection */
508 #define	P5_PC		0x2	/* "pc"		- pin control */
509 
510 /*
511  * Pentium 6 attributes
512  */
513 #define	P6_NOEDGE	0x1
514 #define	P6_PC		0x2
515 #define	P6_INV		0x4	/* "inv" - count inverted transitions */
516 #define	P6_INT		0x8	/* "int" - interrupt on overflow */
517 
518 /*
519  * CPU reference strings
520  */
521 
522 #define	P5_CPUREF	"See Appendix A.4 of the \"IA-32 Intel Architecture "  \
523 			"Software Developer's Manual Volume 3: System "	       \
524 			"Programming Guide,\" Order # 245472-012, 2003"
525 
526 #define	P6_CPUREF	"See Appendix A.3 of the \"IA-32 Intel Architecture "  \
527 			"Software Developer's Manual Volume 3: System "	       \
528 			"Programming Guide,\" Order # 245472-012, 2003"
529 
530 static int
531 ptm_pcbe_init(void)
532 {
533 	const struct nametable		*n;
534 	const ptm_generic_event_t	*gevp;
535 	int				i;
536 	size_t				size;
537 
538 	if (is_x86_feature(x86_featureset, X86FSET_MMX))
539 		ptm_rdpmc_avail = 1;
540 
541 	/*
542 	 * Discover type of CPU and set events pointer appropriately.
543 	 *
544 	 * Map family and model into the performance
545 	 * counter architectures we currently understand.
546 	 *
547 	 * See application note AP485 (from developer.intel.com)
548 	 * for further explanation.
549 	 */
550 	if (cpuid_getvendor(CPU) != X86_VENDOR_Intel)
551 		return (-1);
552 	switch (cpuid_getfamily(CPU)) {
553 	case 5:		/* Pentium and Pentium with MMX */
554 		events = P5mmx_names;
555 		generic_events = P5mmx_generic_names;
556 		ptm_ver = PTM_VER_P5;
557 		ptm_cpuref = P5_CPUREF;
558 		if (cpuid_getmodel(CPU) < 4)
559 			ptm_impl_name = "Pentium";
560 		else
561 			ptm_impl_name = "Pentium with MMX";
562 		break;
563 	case 6:		/* Pentium Pro and Pentium II and III */
564 		events = P6_names;
565 		generic_events = P6_generic_names;
566 		ptm_ver = PTM_VER_P6;
567 		ptm_cpuref = P6_CPUREF;
568 		ptm_pcbe_ops.pcbe_caps = CPC_CAP_OVERFLOW_INTERRUPT;
569 		if (is_x86_feature(x86_featureset, X86FSET_MMX))
570 			ptm_impl_name = "Pentium Pro with MMX, Pentium II";
571 		else
572 			ptm_impl_name = "Pentium Pro, Pentium II";
573 		break;
574 	default:
575 		return (-1);
576 	}
577 
578 	/*
579 	 * Initialize the list of events for each PIC.
580 	 * Do two passes: one to compute the size necessary and another
581 	 * to copy the strings. Need room for event, comma, and NULL terminator.
582 	 */
583 	for (i = 0; i < 2; i++) {
584 		size = 0;
585 		for (n = events[i]; n->bits != NT_END; n++)
586 			size += strlen(n->name) + 1;
587 		for (gevp = generic_events[i]; gevp->name != NULL; gevp++)
588 			size += strlen(gevp->name) + 1;
589 		pic_events[i] = kmem_alloc(size + 1, KM_SLEEP);
590 		*pic_events[i] = '\0';
591 		for (n = events[i]; n->bits != NT_END; n++) {
592 			(void) strcat(pic_events[i], n->name);
593 			(void) strcat(pic_events[i], ",");
594 		}
595 		for (gevp = generic_events[i]; gevp->name != NULL; gevp++) {
596 			(void) strcat(pic_events[i], gevp->name);
597 			(void) strcat(pic_events[i], ",");
598 		}
599 
600 		/*
601 		 * Remove trailing comma.
602 		 */
603 		pic_events[i][size - 1] = '\0';
604 	}
605 
606 	return (0);
607 }
608 
609 static uint_t
610 ptm_pcbe_ncounters(void)
611 {
612 	return (2);
613 }
614 
615 static const char *
616 ptm_pcbe_impl_name(void)
617 {
618 	return (ptm_impl_name);
619 }
620 
621 static const char *
622 ptm_pcbe_cpuref(void)
623 {
624 	return (ptm_cpuref);
625 }
626 
627 static char *
628 ptm_pcbe_list_events(uint_t picnum)
629 {
630 	ASSERT(picnum >= 0 && picnum < cpc_ncounters);
631 
632 	if (pic_events[0] == NULL) {
633 		ASSERT(pic_events[1] == NULL);
634 	}
635 
636 	return (pic_events[picnum]);
637 }
638 
639 static char *
640 ptm_pcbe_list_attrs(void)
641 {
642 	if (ptm_ver == PTM_VER_P5)
643 		return ("noedge,pc");
644 	else
645 		return ("noedge,pc,inv,int,umask,cmask");
646 }
647 
648 static const ptm_generic_event_t *
649 find_generic_event(int regno, char *name)
650 {
651 	const ptm_generic_event_t	*gevp;
652 
653 	for (gevp = generic_events[regno]; gevp->name != NULL; gevp++)
654 		if (strcmp(name, gevp->name) == 0)
655 			return (gevp);
656 
657 	return (NULL);
658 }
659 
660 static const struct nametable *
661 find_event(int regno, char *name)
662 {
663 	const struct nametable *n;
664 
665 	n = events[regno];
666 
667 	for (; n->bits != NT_END; n++)
668 		if (strcmp(name, n->name) == 0)
669 			return (n);
670 
671 	return (NULL);
672 }
673 
674 static uint64_t
675 ptm_pcbe_event_coverage(char *event)
676 {
677 	uint64_t bitmap = 0;
678 
679 	if ((find_event(0, event) != NULL) ||
680 	    (find_generic_event(0, event) != NULL))
681 		bitmap = 0x1;
682 	if ((find_event(1, event) != NULL) ||
683 	    (find_generic_event(1, event) != NULL))
684 		bitmap |= 0x2;
685 
686 	return (bitmap);
687 }
688 
689 static uint64_t
690 ptm_pcbe_overflow_bitmap(void)
691 {
692 	uint64_t	ret = 0;
693 	uint64_t	pes[2];
694 
695 	/*
696 	 * P5 is not capable of generating interrupts.
697 	 */
698 	ASSERT(ptm_ver == PTM_VER_P6);
699 
700 	/*
701 	 * CPC could have caused an interrupt provided that
702 	 *
703 	 * 1) Counters are enabled
704 	 * 2) Either counter has requested an interrupt
705 	 */
706 
707 	pes[0] = rdmsr(REG_PERFEVNT0);
708 	if (((uint32_t)pes[0] & P6_PES_EN) != P6_PES_EN)
709 		return (0);
710 
711 	/*
712 	 * If a particular counter requested an interrupt, assume it caused
713 	 * this interrupt. There is no way to determine which counter overflowed
714 	 * on this hardware other than by using unreliable heuristics.
715 	 */
716 
717 	pes[1] = rdmsr(REG_PERFEVNT1);
718 	if ((uint32_t)pes[0] & P6_PES_INT)
719 		ret |= 0x1;
720 	if ((uint32_t)pes[1] & P6_PES_INT)
721 		ret |= 0x2;
722 
723 	return (ret);
724 }
725 
726 /*ARGSUSED*/
727 static int
728 ptm_pcbe_configure(uint_t picnum, char *eventname, uint64_t preset,
729     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
730     void *token)
731 {
732 	ptm_pcbe_config_t		*conf;
733 	const struct nametable		*n;
734 	const ptm_generic_event_t	*gevp;
735 	struct nametable		nt_raw = { 0, "raw" };
736 	int				i;
737 	int				ptm_flags = 0;
738 
739 	/*
740 	 * If we've been handed an existing configuration, we need only preset
741 	 * the counter value.
742 	 */
743 	if (*data != NULL) {
744 		conf = *data;
745 		conf->ptm_rawpic = trunc3931(preset);
746 		return (0);
747 	}
748 
749 	if (picnum != 0 && picnum != 1)
750 		return (CPC_INVALID_PICNUM);
751 
752 	conf = kmem_alloc(sizeof (ptm_pcbe_config_t), KM_SLEEP);
753 
754 	conf->ptm_picno = picnum;
755 	conf->ptm_rawpic = trunc3931(preset);
756 	conf->ptm_ctl = 0;
757 
758 	if ((n = find_event(picnum, eventname)) == NULL) {
759 		if ((gevp = find_generic_event(picnum, eventname)) != NULL) {
760 			n = find_event(picnum, gevp->event);
761 			ASSERT(n != NULL);
762 
763 			if (nattrs > 0) {
764 				kmem_free(conf, sizeof (ptm_pcbe_config_t));
765 				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
766 			}
767 
768 			if (ptm_ver == PTM_VER_P6)
769 				conf->ptm_ctl |= gevp->umask <<
770 				    CPC_P6_PES_UMASK_SHIFT;
771 		} else {
772 			long tmp;
773 
774 			/*
775 			 * If ddi_strtol() likes this event, use it as a raw
776 			 * event code.
777 			 */
778 			if (ddi_strtol(eventname, NULL, 0, &tmp) != 0) {
779 				kmem_free(conf, sizeof (ptm_pcbe_config_t));
780 				return (CPC_INVALID_EVENT);
781 			}
782 
783 			nt_raw.bits = tmp;
784 
785 			if (ptm_ver == PTM_VER_P5)
786 				nt_raw.bits &= CPC_P5_CESR_ES0_MASK;
787 			else
788 				nt_raw.bits &= CPC_P6_PES_PIC0_MASK;
789 
790 			n = &nt_raw;
791 		}
792 	}
793 
794 	if (ptm_ver == PTM_VER_P5) {
795 		int picshift;
796 		picshift = (picnum == 0) ? 0 : 16;
797 
798 		for (i = 0; i < nattrs; i++) {
799 			/*
800 			 * Value of these attributes is ignored; their presence
801 			 * alone tells us to set the corresponding flag.
802 			 */
803 			if (strncmp(attrs[i].ka_name, "noedge", 7) == 0) {
804 				if (attrs[i].ka_val != 0)
805 					ptm_flags |= P5_NOEDGE;
806 			} else if (strncmp(attrs[i].ka_name, "pc", 3) == 0) {
807 				if (attrs[i].ka_val != 0)
808 					ptm_flags |= P5_PC;
809 			} else {
810 				kmem_free(conf, sizeof (ptm_pcbe_config_t));
811 				return (CPC_INVALID_ATTRIBUTE);
812 			}
813 		}
814 
815 		if (flags & CPC_COUNT_USER)
816 			conf->ptm_ctl |= (1 << (CPC_P5_CESR_USR0 + picshift));
817 		if (flags & CPC_COUNT_SYSTEM)
818 			conf->ptm_ctl |= (1 << (CPC_P5_CESR_OS0 + picshift));
819 		if (ptm_flags & P5_NOEDGE)
820 			conf->ptm_ctl |= (1 << (CPC_P5_CESR_CLK0 + picshift));
821 		if (ptm_flags & P5_PC)
822 			conf->ptm_ctl |= (1 << (CPC_P5_CESR_PC0 + picshift));
823 
824 		ASSERT((n->bits | CPC_P5_CESR_ES0_MASK) ==
825 		    CPC_P5_CESR_ES0_MASK);
826 
827 		conf->ptm_ctl |= (n->bits << picshift);
828 	} else {
829 		for (i = 0; i < nattrs; i++) {
830 			if (strncmp(attrs[i].ka_name, "noedge", 6) == 0) {
831 				if (attrs[i].ka_val != 0)
832 					ptm_flags |= P6_NOEDGE;
833 			} else if (strncmp(attrs[i].ka_name, "pc", 2) == 0) {
834 				if (attrs[i].ka_val != 0)
835 					ptm_flags |= P6_PC;
836 			} else if (strncmp(attrs[i].ka_name, "inv", 3) == 0) {
837 				if (attrs[i].ka_val != 0)
838 					ptm_flags |= P6_INV;
839 			} else if (strncmp(attrs[i].ka_name, "umask", 5) == 0) {
840 				if ((attrs[i].ka_val | CPC_P6_PES_UMASK_MASK) !=
841 				    CPC_P6_PES_UMASK_MASK) {
842 					kmem_free(conf,
843 					    sizeof (ptm_pcbe_config_t));
844 					return (CPC_ATTRIBUTE_OUT_OF_RANGE);
845 				}
846 				conf->ptm_ctl |= (uint8_t)attrs[i].ka_val <<
847 				    CPC_P6_PES_UMASK_SHIFT;
848 			} else if (strncmp(attrs[i].ka_name, "cmask", 5) == 0) {
849 				if ((attrs[i].ka_val | CPC_P6_PES_CMASK_MASK) !=
850 				    CPC_P6_PES_CMASK_MASK) {
851 					kmem_free(conf,
852 					    sizeof (ptm_pcbe_config_t));
853 					return (CPC_ATTRIBUTE_OUT_OF_RANGE);
854 				}
855 				conf->ptm_ctl |= (uint8_t)attrs[i].ka_val <<
856 				    CPC_P6_PES_CMASK_SHIFT;
857 			} else if (strncmp(attrs[i].ka_name, "int", 3) == 0) {
858 				if (attrs[i].ka_val != 0)
859 					ptm_flags |= P6_INT;
860 			} else {
861 				kmem_free(conf, sizeof (ptm_pcbe_config_t));
862 				return (CPC_INVALID_ATTRIBUTE);
863 			}
864 		}
865 
866 		if (flags & CPC_OVF_NOTIFY_EMT)
867 			/*
868 			 * If the user has requested notification of overflows,
869 			 * we automatically program the hardware to generate
870 			 * overflow interrupts.
871 			 */
872 			ptm_flags |= P6_INT;
873 		if (flags & CPC_COUNT_USER)
874 			conf->ptm_ctl |= (1 << CPC_P6_PES_USR);
875 		if (flags & CPC_COUNT_SYSTEM)
876 			conf->ptm_ctl |= (1 << CPC_P6_PES_OS);
877 		if ((ptm_flags & P6_NOEDGE) == 0)
878 			conf->ptm_ctl |= (1 << CPC_P6_PES_E);
879 		if (ptm_flags & P6_PC)
880 			conf->ptm_ctl |= (1 << CPC_P6_PES_PC);
881 		if (ptm_flags & P6_INV)
882 			conf->ptm_ctl |= (1 << CPC_P6_PES_INV);
883 		if (ptm_flags & P6_INT)
884 			conf->ptm_ctl |= (1 << CPC_P6_PES_INT);
885 
886 		ASSERT((n->bits | CPC_P6_PES_PIC0_MASK) ==
887 		    CPC_P6_PES_PIC0_MASK);
888 
889 		conf->ptm_ctl |= n->bits;
890 	}
891 
892 	*data = conf;
893 	return (0);
894 }
895 
896 static void
897 ptm_pcbe_program(void *token)
898 {
899 	ptm_pcbe_config_t	*pic0;
900 	ptm_pcbe_config_t	*pic1;
901 	ptm_pcbe_config_t	*tmp;
902 	ptm_pcbe_config_t	empty = { 1, 0, 0 }; /* assume pic1 to start */
903 
904 	if ((pic0 = kcpc_next_config(token, NULL, NULL)) == NULL)
905 		panic("ptm_pcbe: token %p has no configs", token);
906 
907 	if ((pic1 = kcpc_next_config(token, pic0, NULL)) == NULL)
908 		pic1 = &empty;
909 
910 	if (pic0->ptm_picno != 0) {
911 		empty.ptm_picno = 0;
912 		tmp = pic1;
913 		pic1 = pic0;
914 		pic0 = tmp;
915 	}
916 
917 	ASSERT(pic0->ptm_picno == 0 && pic1->ptm_picno == 1);
918 
919 	if (ptm_rdpmc_avail) {
920 		ulong_t curcr4 = getcr4();
921 		if (kcpc_allow_nonpriv(token))
922 			setcr4(curcr4 | CR4_PCE);
923 		else
924 			setcr4(curcr4 & ~CR4_PCE);
925 	}
926 
927 	if (ptm_ver == PTM_VER_P5) {
928 		wrmsr(P5_CESR, ALL_STOPPED);
929 		wrmsr(P5_CTR0, pic0->ptm_rawpic);
930 		wrmsr(P5_CTR1, pic1->ptm_rawpic);
931 		wrmsr(P5_CESR, pic0->ptm_ctl | pic1->ptm_ctl);
932 		pic0->ptm_rawpic = rdmsr(P5_CTR0);
933 		pic1->ptm_rawpic = rdmsr(P5_CTR1);
934 	} else {
935 		uint64_t	pes;
936 		wrmsr(REG_PERFEVNT0, ALL_STOPPED);
937 		wrmsr(REG_PERFCTR0, pic0->ptm_rawpic);
938 		wrmsr(REG_PERFCTR1, pic1->ptm_rawpic);
939 		pes = pic1->ptm_ctl;
940 		DTRACE_PROBE1(ptm__pes1, uint64_t, pes);
941 		wrmsr(REG_PERFEVNT1, pes);
942 		pes = pic0->ptm_ctl | (1 << CPC_P6_PES_EN);
943 		DTRACE_PROBE1(ptm__pes0, uint64_t, pes);
944 		wrmsr(REG_PERFEVNT0, pes);
945 	}
946 }
947 
948 static void
949 ptm_pcbe_allstop(void)
950 {
951 	if (ptm_ver == PTM_VER_P5)
952 		wrmsr(P5_CESR, ALL_STOPPED);
953 	else {
954 		wrmsr(REG_PERFEVNT0, ALL_STOPPED);
955 		setcr4(getcr4() & ~CR4_PCE);
956 	}
957 }
958 
959 static void
960 ptm_pcbe_sample(void *token)
961 {
962 	ptm_pcbe_config_t	*pic0;
963 	ptm_pcbe_config_t	*pic1;
964 	ptm_pcbe_config_t	*swap;
965 	ptm_pcbe_config_t	empty = { 1, 0, 0 }; /* assume pic1 to start */
966 	uint64_t		tmp;
967 	uint64_t		*pic0_data;
968 	uint64_t		*pic1_data;
969 	uint64_t		*dtmp;
970 	uint64_t		curpic[2];
971 
972 	if ((pic0 = kcpc_next_config(token, NULL, &pic0_data)) == NULL)
973 		panic("ptm_pcbe: token %p has no configs", token);
974 
975 	if ((pic1 = kcpc_next_config(token, pic0, &pic1_data)) == NULL) {
976 		pic1 = &empty;
977 		pic1_data = &tmp;
978 	}
979 
980 	if (pic0->ptm_picno != 0) {
981 		empty.ptm_picno = 0;
982 		swap = pic0;
983 		pic0 = pic1;
984 		pic1 = swap;
985 		dtmp = pic0_data;
986 		pic0_data = pic1_data;
987 		pic1_data = dtmp;
988 	}
989 
990 	ASSERT(pic0->ptm_picno == 0 && pic1->ptm_picno == 1);
991 
992 	if (ptm_ver == PTM_VER_P5) {
993 		curpic[0] = rdmsr(P5_CTR0);
994 		curpic[1] = rdmsr(P5_CTR1);
995 	} else {
996 		curpic[0] = rdmsr(REG_PERFCTR0);
997 		curpic[1] = rdmsr(REG_PERFCTR1);
998 	}
999 
1000 	DTRACE_PROBE1(ptm__curpic0, uint64_t, curpic[0]);
1001 	DTRACE_PROBE1(ptm__curpic1, uint64_t, curpic[1]);
1002 
1003 	*pic0_data += diff3931(curpic[0], pic0->ptm_rawpic);
1004 	pic0->ptm_rawpic = trunc3931(*pic0_data);
1005 
1006 	*pic1_data += diff3931(curpic[1], pic1->ptm_rawpic);
1007 	pic1->ptm_rawpic = trunc3931(*pic1_data);
1008 }
1009 
1010 static void
1011 ptm_pcbe_free(void *config)
1012 {
1013 	kmem_free(config, sizeof (ptm_pcbe_config_t));
1014 }
1015 
1016 /*
1017  * Virtualizes the 40-bit field of the %pic
1018  * register into a 64-bit software register.
1019  *
1020  * We can retrieve 40 (signed) bits from the counters,
1021  * but we can set only 32 (signed) bits into the counters.
1022  * This makes virtualizing more than 31-bits of registers
1023  * quite tricky.
1024  *
1025  * If bits 39 to 31 are set in the virtualized pic register,
1026  * then we can preset the counter to this value using the fact
1027  * that wrmsr sign extends bit 31.   Though it might look easier
1028  * to only use the bottom 31-bits of the register, we have to allow
1029  * the full 40-bits to be used to perform overflow profiling.
1030  */
1031 
1032 #define	MASK40		UINT64_C(0xffffffffff)
1033 #define	MASK31		UINT64_C(0x7fffffff)
1034 #define	BITS_39_31	UINT64_C(0xff80000000)
1035 
1036 static int64_t
1037 diff3931(uint64_t sample, uint64_t old)
1038 {
1039 	int64_t diff;
1040 
1041 	if ((old & BITS_39_31) == BITS_39_31) {
1042 		diff = (MASK40 & sample) - old;
1043 		if (diff < 0)
1044 			diff += (UINT64_C(1) << 40);
1045 	} else {
1046 		diff = (MASK31 & sample) - old;
1047 		if (diff < 0)
1048 			diff += (UINT64_C(1) << 31);
1049 	}
1050 	return (diff);
1051 }
1052 
1053 static uint64_t
1054 trunc3931(uint64_t value)
1055 {
1056 	if ((value & BITS_39_31) == BITS_39_31)
1057 		return (MASK40 & value);
1058 	return (MASK31 & value);
1059 }
1060 
1061 static struct modlpcbe modlpcbe = {
1062 	&mod_pcbeops,
1063 	"Pentium Performance Counters",
1064 	&ptm_pcbe_ops
1065 };
1066 
1067 static struct modlinkage modl = {
1068 	MODREV_1,
1069 	&modlpcbe,
1070 };
1071 
1072 int
1073 _init(void)
1074 {
1075 	if (ptm_pcbe_init() != 0)
1076 		return (ENOTSUP);
1077 	return (mod_install(&modl));
1078 }
1079 
1080 int
1081 _fini(void)
1082 {
1083 	return (mod_remove(&modl));
1084 }
1085 
1086 int
1087 _info(struct modinfo *mi)
1088 {
1089 	return (mod_info(&modl, mi));
1090 }
1091