xref: /titanic_41/usr/src/uts/intel/pcbe/p123_pcbe.c (revision 9ea11b96f9f71eef4f1bd2062ea054451a32a216)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 /*
26  * This file contains preset event names from the Performance Application
27  * Programming Interface v3.5 which included the following notice:
28  *
29  *                             Copyright (c) 2005,6
30  *                           Innovative Computing Labs
31  *                         Computer Science Department,
32  *                            University of Tennessee,
33  *                                 Knoxville, TN.
34  *                              All Rights Reserved.
35  *
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions are met:
39  *
40  *    * Redistributions of source code must retain the above copyright notice,
41  *      this list of conditions and the following disclaimer.
42  *    * Redistributions in binary form must reproduce the above copyright
43  *      notice, this list of conditions and the following disclaimer in the
44  *      documentation and/or other materials provided with the distribution.
45  *    * Neither the name of the University of Tennessee nor the names of its
46  *      contributors may be used to endorse or promote products derived from
47  *      this software without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
50  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
53  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
54  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
55  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
56  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
57  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
58  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
59  * POSSIBILITY OF SUCH DAMAGE.
60  *
61  *
62  * This open source software license conforms to the BSD License template.
63  */
64 
65 /*
66  * Performance Counter Back-End for Pentiums I, II, and III.
67  */
68 
69 #include <sys/cpuvar.h>
70 #include <sys/param.h>
71 #include <sys/cpc_impl.h>
72 #include <sys/cpc_pcbe.h>
73 #include <sys/modctl.h>
74 #include <sys/inttypes.h>
75 #include <sys/systm.h>
76 #include <sys/cmn_err.h>
77 #include <sys/x86_archext.h>
78 #include <sys/sdt.h>
79 #include <sys/archsystm.h>
80 #include <sys/privregs.h>
81 #include <sys/ddi.h>
82 #include <sys/sunddi.h>
83 
84 static int64_t diff3931(uint64_t sample, uint64_t old);
85 static uint64_t trunc3931(uint64_t value);
86 
87 static int ptm_pcbe_init(void);
88 static uint_t ptm_pcbe_ncounters(void);
89 static const char *ptm_pcbe_impl_name(void);
90 static const char *ptm_pcbe_cpuref(void);
91 static char *ptm_pcbe_list_events(uint_t picnum);
92 static char *ptm_pcbe_list_attrs(void);
93 static uint64_t ptm_pcbe_event_coverage(char *event);
94 static int ptm_pcbe_pic_index(char *picname);
95 static uint64_t	ptm_pcbe_overflow_bitmap(void);
96 static int ptm_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
97     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
98     void *token);
99 static void ptm_pcbe_program(void *token);
100 static void ptm_pcbe_allstop(void);
101 static void ptm_pcbe_sample(void *token);
102 static void ptm_pcbe_free(void *config);
103 
104 pcbe_ops_t ptm_pcbe_ops = {
105 	PCBE_VER_1,
106 	0,
107 	ptm_pcbe_ncounters,
108 	ptm_pcbe_impl_name,
109 	ptm_pcbe_cpuref,
110 	ptm_pcbe_list_events,
111 	ptm_pcbe_list_attrs,
112 	ptm_pcbe_event_coverage,
113 	ptm_pcbe_overflow_bitmap,
114 	ptm_pcbe_configure,
115 	ptm_pcbe_program,
116 	ptm_pcbe_allstop,
117 	ptm_pcbe_sample,
118 	ptm_pcbe_free
119 };
120 
121 typedef enum _ptm_ver {
122 	PTM_VER_P5,
123 	PTM_VER_P6
124 } ptm_ver_t;
125 
126 static ptm_ver_t ptm_ver;
127 static const char *ptm_impl_name;
128 static const char *ptm_cpuref;
129 static char *pic_events[2] = { NULL, NULL };
130 
131 /*
132  * Indicates whether the "rdpmc" instruction is available on this processor.
133  */
134 static int ptm_rdpmc_avail = 0;
135 
136 #define	ALL_STOPPED	0ULL
137 
138 typedef struct _ptm_pcbe_config {
139 	uint8_t		ptm_picno;	/* 0 for pic0 or 1 for pic1 */
140 	uint32_t	ptm_ctl;    /* P6: PerfEventSelect; P5: cesr, shifted */
141 	uint64_t	ptm_rawpic;
142 } ptm_pcbe_config_t;
143 
144 struct nametable {
145 	uint8_t		bits;
146 	const char	*name;
147 };
148 
149 typedef struct _ptm_generic_events {
150 	char *name;
151 	char *event;
152 	uint8_t umask;
153 } ptm_generic_event_t;
154 
155 #define	NT_END 0xFF
156 #define	CPC_GEN_END { NULL, NULL }
157 
158 /*
159  * Basic Pentium events
160  */
161 #define	P5_EVENTS				\
162 	{0x0,	"data_read"},			\
163 	{0x1,	"data_write"},			\
164 	{0x2,	"data_tlb_miss"},		\
165 	{0x3,	"data_read_miss"},		\
166 	{0x4,	"data_write_miss"},		\
167 	{0x5,	"write_hit_to_M_or_E"},		\
168 	{0x6,	"dcache_lines_wrback"},		\
169 	{0x7,	"external_snoops"},		\
170 	{0x8,	"external_dcache_snoop_hits"},	\
171 	{0x9,	"memory_access_in_both_pipes"},	\
172 	{0xa,	"bank_conflicts"},		\
173 	{0xb,	"misaligned_ref"},		\
174 	{0xc,	"code_read"},			\
175 	{0xd,	"code_tlb_miss"},		\
176 	{0xe,	"code_cache_miss"},		\
177 	{0xf,	"any_segreg_loaded"},		\
178 	{0x12,	"branches"},			\
179 	{0x13,	"btb_hits"},			\
180 	{0x14,	"taken_or_btb_hit"},		\
181 	{0x15,	"pipeline_flushes"},		\
182 	{0x16,	"instr_exec"},			\
183 	{0x17,	"instr_exec_V_pipe"},		\
184 	{0x18,	"clks_bus_cycle"},		\
185 	{0x19,	"clks_full_wbufs"},		\
186 	{0x1a,	"pipe_stall_read"},		\
187 	{0x1b,	"stall_on_write_ME"},		\
188 	{0x1c,	"locked_bus_cycle"},		\
189 	{0x1d,	"io_rw_cycles"},		\
190 	{0x1e,	"reads_noncache_mem"},		\
191 	{0x1f,	"pipeline_agi_stalls"},		\
192 	{0x22,	"flops"},			\
193 	{0x23,	"bp_match_dr0"},		\
194 	{0x24,	"bp_match_dr1"},		\
195 	{0x25,	"bp_match_dr2"},		\
196 	{0x26,	"bp_match_dr3"},		\
197 	{0x27,	"hw_intrs"},			\
198 	{0x28,	"data_rw"},			\
199 	{0x29,	"data_rw_miss"}
200 
201 static const struct nametable P5mmx_names0[] = {
202 	P5_EVENTS,
203 	{0x2a,	"bus_ownership_latency"},
204 	{0x2b,	"mmx_instr_upipe"},
205 	{0x2c,	"cache_M_line_sharing"},
206 	{0x2d,	"emms_instr"},
207 	{0x2e,	"bus_util_processor"},
208 	{0x2f,	"sat_mmx_instr"},
209 	{0x30,	"clks_not_HLT"},
210 	{0x31,	"mmx_data_read"},
211 	{0x32,	"clks_fp_stall"},
212 	{0x33,	"d1_starv_fifo_0"},
213 	{0x34,	"mmx_data_write"},
214 	{0x35,	"pipe_flush_wbp"},
215 	{0x36,	"mmx_misalign_data_refs"},
216 	{0x37,	"rets_pred_incorrect"},
217 	{0x38,	"mmx_multiply_unit_interlock"},
218 	{0x39,	"rets"},
219 	{0x3a,	"btb_false_entries"},
220 	{0x3b,	"clocks_stall_full_wb"},
221 	{NT_END, ""}
222 };
223 
224 static const struct nametable P5mmx_names1[] = {
225 	P5_EVENTS,
226 	{0x2a,	"bus_ownership_transfers"},
227 	{0x2b,	"mmx_instr_vpipe"},
228 	{0x2c,	"cache_lint_sharing"},
229 	{0x2d,	"mmx_fp_transitions"},
230 	{0x2e,	"writes_noncache_mem"},
231 	{0x2f,	"sats_performed"},
232 	{0x30,	"clks_dcache_tlb_miss"},
233 	{0x31,	"mmx_data_read_miss"},
234 	{0x32,	"taken_br"},
235 	{0x33,	"d1_starv_fifo_1"},
236 	{0x34,	"mmx_data_write_miss"},
237 	{0x35,	"pipe_flush_wbp_wb"},
238 	{0x36,	"mmx_pipe_stall_data_read"},
239 	{0x37,	"rets_pred"},
240 	{0x38,	"movd_movq_stall"},
241 	{0x39,	"rsb_overflow"},
242 	{0x3a,	"btb_mispred_nt"},
243 	{0x3b,	"mmx_stall_write_ME"},
244 	{NT_END, ""}
245 };
246 
247 static const struct nametable *P5mmx_names[2] = {
248 	P5mmx_names0,
249 	P5mmx_names1
250 };
251 
252 /*
253  * Pentium Pro and Pentium II events
254  */
255 static const struct nametable _P6_names[] = {
256 	/*
257 	 * Data cache unit
258 	 */
259 	{0x43,	"data_mem_refs"},
260 	{0x45,	"dcu_lines_in"},
261 	{0x46,	"dcu_m_lines_in"},
262 	{0x47,	"dcu_m_lines_out"},
263 	{0x48,	"dcu_miss_outstanding"},
264 
265 	/*
266 	 * Instruction fetch unit
267 	 */
268 	{0x80,	"ifu_ifetch"},
269 	{0x81,	"ifu_ifetch_miss"},
270 	{0x85,	"itlb_miss"},
271 	{0x86,	"ifu_mem_stall"},
272 	{0x87,	"ild_stall"},
273 
274 	/*
275 	 * L2 cache
276 	 */
277 	{0x28,	"l2_ifetch"},
278 	{0x29,	"l2_ld"},
279 	{0x2a,	"l2_st"},
280 	{0x24,	"l2_lines_in"},
281 	{0x26,	"l2_lines_out"},
282 	{0x25,	"l2_m_lines_inm"},
283 	{0x27,	"l2_m_lines_outm"},
284 	{0x2e,	"l2_rqsts"},
285 	{0x21,	"l2_ads"},
286 	{0x22,	"l2_dbus_busy"},
287 	{0x23,	"l2_dbus_busy_rd"},
288 
289 	/*
290 	 * External bus logic
291 	 */
292 	{0x62,	"bus_drdy_clocks"},
293 	{0x63,	"bus_lock_clocks"},
294 	{0x60,	"bus_req_outstanding"},
295 	{0x65,	"bus_tran_brd"},
296 	{0x66,	"bus_tran_rfo"},
297 	{0x67,	"bus_trans_wb"},
298 	{0x68,	"bus_tran_ifetch"},
299 	{0x69,	"bus_tran_inval"},
300 	{0x6a,	"bus_tran_pwr"},
301 	{0x6b,	"bus_trans_p"},
302 	{0x6c,	"bus_trans_io"},
303 	{0x6d,	"bus_tran_def"},
304 	{0x6e,	"bus_tran_burst"},
305 	{0x70,	"bus_tran_any"},
306 	{0x6f,	"bus_tran_mem"},
307 	{0x64,	"bus_data_rcv"},
308 	{0x61,	"bus_bnr_drv"},
309 	{0x7a,	"bus_hit_drv"},
310 	{0x7b,	"bus_hitm_drv"},
311 	{0x7e,	"bus_snoop_stall"},
312 
313 	/*
314 	 * Floating point unit
315 	 */
316 	{0xc1,	"flops"},		/* 0 only */
317 	{0x10,	"fp_comp_ops_exe"},	/* 0 only */
318 	{0x11,	"fp_assist"},		/* 1 only */
319 	{0x12,	"mul"},			/* 1 only */
320 	{0x13,	"div"},			/* 1 only */
321 	{0x14,	"cycles_div_busy"},	/* 0 only */
322 
323 	/*
324 	 * Memory ordering
325 	 */
326 	{0x3,	"ld_blocks"},
327 	{0x4,	"sb_drains"},
328 	{0x5,	"misalign_mem_ref"},
329 
330 	/*
331 	 * Instruction decoding and retirement
332 	 */
333 	{0xc0,	"inst_retired"},
334 	{0xc2,	"uops_retired"},
335 	{0xd0,	"inst_decoder"},
336 
337 	/*
338 	 * Interrupts
339 	 */
340 	{0xc8,	"hw_int_rx"},
341 	{0xc6,	"cycles_int_masked"},
342 	{0xc7,	"cycles_int_pending_and_masked"},
343 
344 	/*
345 	 * Branches
346 	 */
347 	{0xc4,	"br_inst_retired"},
348 	{0xc5,	"br_miss_pred_retired"},
349 	{0xc9,	"br_taken_retired"},
350 	{0xca,	"br_miss_pred_taken_ret"},
351 	{0xe0,	"br_inst_decoded"},
352 	{0xe2,	"btb_misses"},
353 	{0xe4,	"br_bogus"},
354 	{0xe6,	"baclears"},
355 
356 	/*
357 	 * Stalls
358 	 */
359 	{0xa2,	"resource_stalls"},
360 	{0xd2,	"partial_rat_stalls"},
361 
362 	/*
363 	 * Segment register loads
364 	 */
365 	{0x6,	"segment_reg_loads"},
366 
367 	/*
368 	 * Clocks
369 	 */
370 	{0x79,	"cpu_clk_unhalted"},
371 
372 	/*
373 	 * MMX
374 	 */
375 	{0xb0,	"mmx_instr_exec"},
376 	{0xb1,	"mmx_sat_instr_exec"},
377 	{0xb2,	"mmx_uops_exec"},
378 	{0xb3,	"mmx_instr_type_exec"},
379 	{0xcc,	"fp_mmx_trans"},
380 	{0xcd,	"mmx_assists"},
381 	{0xce,	"mmx_instr_ret"},
382 	{0xd4,	"seg_rename_stalls"},
383 	{0xd5,	"seg_reg_renames"},
384 	{0xd6,	"ret_seg_renames"},
385 
386 	{NT_END, ""}
387 };
388 
389 static const struct nametable *P6_names[2] = {
390 	_P6_names,
391 	_P6_names
392 };
393 
394 #define	P5_GENERIC_EVENTS					\
395 	{ "PAPI_tot_ins",	"instr_exec",	 0x0 },		\
396 	{ "PAPI_tlb_dm",	"data_tlb_miss", 0x0 },		\
397 	{ "PAPI_tlb_im",	"code_tlb_miss", 0x0 },		\
398 	{ "PAPI_fp_ops",	"flops" }
399 
400 static const ptm_generic_event_t P5mmx_generic_names0[] = {
401 	P5_GENERIC_EVENTS,
402 	{ "PAPI_tot_cyc",	"clks_not_HLT", 0x0 },
403 	CPC_GEN_END
404 };
405 
406 static const ptm_generic_event_t P5mmx_generic_names1[] = {
407 	P5_GENERIC_EVENTS,
408 	{ "PAPI_br_ins",	"taken_br",	0x0 },
409 	CPC_GEN_END
410 };
411 
412 static const ptm_generic_event_t *P5mmx_generic_names[2] = {
413 	P5mmx_generic_names0,
414 	P5mmx_generic_names1
415 };
416 
417 static const ptm_generic_event_t _P6_generic_names[] = {
418 	{ "PAPI_ca_shr",	"l2_ifetch",		0xf },
419 	{ "PAPI_ca_cln",	"bus_tran_rfo",		0x0 },
420 	{ "PAPI_ca_itv",	"bus_tran_inval",	0x0 },
421 	{ "PAPI_tlb_im",	"itlb_miss",		0x0 },
422 	{ "PAPI_btac_m",	"btb_misses",		0x0 },
423 	{ "PAPI_hw_int",	"hw_int_rx",		0x0 },
424 	{ "PAPI_br_cn",		"br_inst_retired",	0x0 },
425 	{ "PAPI_br_tkn",	"br_taken_retired",	0x0 },
426 	{ "PAPI_br_msp",	"br_miss_pred_taken_ret", 0x0 },
427 	{ "PAPI_br_ins",	"br_inst_retired",	0x0 },
428 	{ "PAPI_res_stl",	"resource_stalls",	0x0 },
429 	{ "PAPI_tot_iis",	"inst_decoder",		0x0 },
430 	{ "PAPI_tot_ins",	"inst_retired",		0x0 },
431 	{ "PAPI_tot_cyc",	"cpu_clk_unhalted",	0x0 },
432 	{ "PAPI_l1_dcm",	"dcu_lines_in",		0x0 },
433 	{ "PAPI_l1_icm",	"l2_ifetch",		0xf },
434 	{ "PAPI_l1_tcm",	"l2_rqsts",		0xf },
435 	{ "PAPI_l1_dca",	"data_mem_refs",	0x0 },
436 	{ "PAPI_l1_stm",	"l2_st",		0xf },
437 	{ "PAPI_l2_icm",	"bus_tran_ifetch",	0x0 },
438 	{ "PAPI_l2_dcr",	"l2_ld",		0xf },
439 	{ "PAPI_l2_dcw",	"l2_st",		0xf },
440 	{ "PAPI_l2_tcm",	"l2_lines_in",		0x0 },
441 	{ "PAPI_l2_tca",	"l2_rqsts",		0xf },
442 	{ "PAPI_l2_tcw",	"l2_st",		0xf },
443 	{ "PAPI_l2_stm",	"l2_m_lines_inm",	0x0 },
444 	{ "PAPI_fp_ins",	"flops",		0x0 },
445 	{ "PAPI_fp_ops",	"flops",		0x0 },
446 	{ "PAPI_fml_ins",	"mul",			0x0 },
447 	{ "PAPI_fdv_ins",	"div",			0x0 },
448 	CPC_GEN_END
449 };
450 
451 static const ptm_generic_event_t *P6_generic_names[2] = {
452 	_P6_generic_names,
453 	_P6_generic_names
454 };
455 
456 static const struct nametable **events;
457 static const ptm_generic_event_t **generic_events;
458 
459 #define	BITS(v, u, l)	\
460 	(((v) >> (l)) & ((1 << (1 + (u) - (l))) - 1))
461 
462 /*
463  * "Well known" bit fields in the Pentium CES register
464  * The interfaces in libcpc should make these #defines uninteresting.
465  */
466 #define	CPC_P5_CESR_ES0_SHIFT	0
467 #define	CPC_P5_CESR_ES0_MASK	0x3f
468 #define	CPC_P5_CESR_ES1_SHIFT	16
469 #define	CPC_P5_CESR_ES1_MASK	0x3f
470 
471 #define	CPC_P5_CESR_OS0		6
472 #define	CPC_P5_CESR_USR0	7
473 #define	CPC_P5_CESR_CLK0	8
474 #define	CPC_P5_CESR_PC0		9
475 #define	CPC_P5_CESR_OS1		(CPC_P5_CESR_OS0 + 16)
476 #define	CPC_P5_CESR_USR1	(CPC_P5_CESR_USR0 + 16)
477 #define	CPC_P5_CESR_CLK1	(CPC_P5_CESR_CLK0 + 16)
478 #define	CPC_P5_CESR_PC1		(CPC_P5_CESR_PC0 + 16)
479 
480 /*
481  * "Well known" bit fields in the Pentium Pro PerfEvtSel registers
482  * The interfaces in libcpc should make these #defines uninteresting.
483  */
484 #define	CPC_P6_PES_INV		23
485 #define	CPC_P6_PES_EN		22
486 #define	CPC_P6_PES_INT		20
487 #define	CPC_P6_PES_PC		19
488 #define	CPC_P6_PES_E		18
489 #define	CPC_P6_PES_OS		17
490 #define	CPC_P6_PES_USR		16
491 
492 #define	CPC_P6_PES_UMASK_SHIFT	8
493 #define	CPC_P6_PES_UMASK_MASK	(0xffu)
494 
495 #define	CPC_P6_PES_CMASK_SHIFT	24
496 #define	CPC_P6_PES_CMASK_MASK	(0xffu)
497 
498 #define	CPC_P6_PES_PIC0_MASK	(0xffu)
499 #define	CPC_P6_PES_PIC1_MASK	(0xffu)
500 
501 #define	P6_PES_EN	(UINT32_C(1) << CPC_P6_PES_EN)
502 #define	P6_PES_INT	(UINT32_C(1) << CPC_P6_PES_INT)
503 #define	P6_PES_OS	(UINT32_C(1) << CPC_P6_PES_OS)
504 
505 /*
506  * Pentium 5 attributes
507  */
508 #define	P5_NOEDGE	0x1	/* "noedge"	- no edge detection */
509 #define	P5_PC		0x2	/* "pc"		- pin control */
510 
511 /*
512  * Pentium 6 attributes
513  */
514 #define	P6_NOEDGE	0x1
515 #define	P6_PC		0x2
516 #define	P6_INV		0x4	/* "inv" - count inverted transitions */
517 #define	P6_INT		0x8	/* "int" - interrupt on overflow */
518 
519 /*
520  * CPU reference strings
521  */
522 
523 #define	P5_CPUREF	"See Appendix A.4 of the \"IA-32 Intel Architecture "  \
524 			"Software Developer's Manual Volume 3: System "	       \
525 			"Programming Guide,\" Order # 245472-012, 2003"
526 
527 #define	P6_CPUREF	"See Appendix A.3 of the \"IA-32 Intel Architecture "  \
528 			"Software Developer's Manual Volume 3: System "	       \
529 			"Programming Guide,\" Order # 245472-012, 2003"
530 
531 static int
532 ptm_pcbe_init(void)
533 {
534 	const struct nametable		*n;
535 	const ptm_generic_event_t	*gevp;
536 	int				i;
537 	size_t				size;
538 
539 	if (is_x86_feature(x86_featureset, X86FSET_MMX))
540 		ptm_rdpmc_avail = 1;
541 
542 	/*
543 	 * Discover type of CPU and set events pointer appropriately.
544 	 *
545 	 * Map family and model into the performance
546 	 * counter architectures we currently understand.
547 	 *
548 	 * See application note AP485 (from developer.intel.com)
549 	 * for further explanation.
550 	 */
551 	if (cpuid_getvendor(CPU) != X86_VENDOR_Intel)
552 		return (-1);
553 	switch (cpuid_getfamily(CPU)) {
554 	case 5:		/* Pentium and Pentium with MMX */
555 		events = P5mmx_names;
556 		generic_events = P5mmx_generic_names;
557 		ptm_ver = PTM_VER_P5;
558 		ptm_cpuref = P5_CPUREF;
559 		if (cpuid_getmodel(CPU) < 4)
560 			ptm_impl_name = "Pentium";
561 		else
562 			ptm_impl_name = "Pentium with MMX";
563 		break;
564 	case 6:		/* Pentium Pro and Pentium II and III */
565 		events = P6_names;
566 		generic_events = P6_generic_names;
567 		ptm_ver = PTM_VER_P6;
568 		ptm_cpuref = P6_CPUREF;
569 		ptm_pcbe_ops.pcbe_caps = CPC_CAP_OVERFLOW_INTERRUPT;
570 		if (is_x86_feature(x86_featureset, X86FSET_MMX))
571 			ptm_impl_name = "Pentium Pro with MMX, Pentium II";
572 		else
573 			ptm_impl_name = "Pentium Pro, Pentium II";
574 		break;
575 	default:
576 		return (-1);
577 	}
578 
579 	/*
580 	 * Initialize the list of events for each PIC.
581 	 * Do two passes: one to compute the size necessary and another
582 	 * to copy the strings. Need room for event, comma, and NULL terminator.
583 	 */
584 	for (i = 0; i < 2; i++) {
585 		size = 0;
586 		for (n = events[i]; n->bits != NT_END; n++)
587 			size += strlen(n->name) + 1;
588 		for (gevp = generic_events[i]; gevp->name != NULL; gevp++)
589 			size += strlen(gevp->name) + 1;
590 		pic_events[i] = kmem_alloc(size + 1, KM_SLEEP);
591 		*pic_events[i] = '\0';
592 		for (n = events[i]; n->bits != NT_END; n++) {
593 			(void) strcat(pic_events[i], n->name);
594 			(void) strcat(pic_events[i], ",");
595 		}
596 		for (gevp = generic_events[i]; gevp->name != NULL; gevp++) {
597 			(void) strcat(pic_events[i], gevp->name);
598 			(void) strcat(pic_events[i], ",");
599 		}
600 
601 		/*
602 		 * Remove trailing comma.
603 		 */
604 		pic_events[i][size - 1] = '\0';
605 	}
606 
607 	return (0);
608 }
609 
610 static uint_t
611 ptm_pcbe_ncounters(void)
612 {
613 	return (2);
614 }
615 
616 static const char *
617 ptm_pcbe_impl_name(void)
618 {
619 	return (ptm_impl_name);
620 }
621 
622 static const char *
623 ptm_pcbe_cpuref(void)
624 {
625 	return (ptm_cpuref);
626 }
627 
628 static char *
629 ptm_pcbe_list_events(uint_t picnum)
630 {
631 	ASSERT(picnum >= 0 && picnum < cpc_ncounters);
632 
633 	if (pic_events[0] == NULL) {
634 		ASSERT(pic_events[1] == NULL);
635 	}
636 
637 	return (pic_events[picnum]);
638 }
639 
640 static char *
641 ptm_pcbe_list_attrs(void)
642 {
643 	if (ptm_ver == PTM_VER_P5)
644 		return ("noedge,pc");
645 	else
646 		return ("noedge,pc,inv,int,umask,cmask");
647 }
648 
649 static const ptm_generic_event_t *
650 find_generic_event(int regno, char *name)
651 {
652 	const ptm_generic_event_t	*gevp;
653 
654 	for (gevp = generic_events[regno]; gevp->name != NULL; gevp++)
655 		if (strcmp(name, gevp->name) == 0)
656 			return (gevp);
657 
658 	return (NULL);
659 }
660 
661 static const struct nametable *
662 find_event(int regno, char *name)
663 {
664 	const struct nametable *n;
665 
666 	n = events[regno];
667 
668 	for (; n->bits != NT_END; n++)
669 		if (strcmp(name, n->name) == 0)
670 			return (n);
671 
672 	return (NULL);
673 }
674 
675 static uint64_t
676 ptm_pcbe_event_coverage(char *event)
677 {
678 	uint64_t bitmap = 0;
679 
680 	if ((find_event(0, event) != NULL) ||
681 	    (find_generic_event(0, event) != NULL))
682 		bitmap = 0x1;
683 	if ((find_event(1, event) != NULL) ||
684 	    (find_generic_event(1, event) != NULL))
685 		bitmap |= 0x2;
686 
687 	return (bitmap);
688 }
689 
690 static uint64_t
691 ptm_pcbe_overflow_bitmap(void)
692 {
693 	uint64_t	ret = 0;
694 	uint64_t	pes[2];
695 
696 	/*
697 	 * P5 is not capable of generating interrupts.
698 	 */
699 	ASSERT(ptm_ver == PTM_VER_P6);
700 
701 	/*
702 	 * CPC could have caused an interrupt provided that
703 	 *
704 	 * 1) Counters are enabled
705 	 * 2) Either counter has requested an interrupt
706 	 */
707 
708 	pes[0] = rdmsr(REG_PERFEVNT0);
709 	if (((uint32_t)pes[0] & P6_PES_EN) != P6_PES_EN)
710 		return (0);
711 
712 	/*
713 	 * If a particular counter requested an interrupt, assume it caused
714 	 * this interrupt. There is no way to determine which counter overflowed
715 	 * on this hardware other than by using unreliable heuristics.
716 	 */
717 
718 	pes[1] = rdmsr(REG_PERFEVNT1);
719 	if ((uint32_t)pes[0] & P6_PES_INT)
720 		ret |= 0x1;
721 	if ((uint32_t)pes[1] & P6_PES_INT)
722 		ret |= 0x2;
723 
724 	return (ret);
725 }
726 
727 /*ARGSUSED*/
728 static int
729 ptm_pcbe_configure(uint_t picnum, char *eventname, uint64_t preset,
730     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
731     void *token)
732 {
733 	ptm_pcbe_config_t		*conf;
734 	const struct nametable		*n;
735 	const ptm_generic_event_t	*gevp;
736 	struct nametable		nt_raw = { 0, "raw" };
737 	int				i;
738 	int				ptm_flags = 0;
739 
740 	/*
741 	 * If we've been handed an existing configuration, we need only preset
742 	 * the counter value.
743 	 */
744 	if (*data != NULL) {
745 		conf = *data;
746 		conf->ptm_rawpic = trunc3931(preset);
747 		return (0);
748 	}
749 
750 	if (picnum != 0 && picnum != 1)
751 		return (CPC_INVALID_PICNUM);
752 
753 	conf = kmem_alloc(sizeof (ptm_pcbe_config_t), KM_SLEEP);
754 
755 	conf->ptm_picno = picnum;
756 	conf->ptm_rawpic = trunc3931(preset);
757 	conf->ptm_ctl = 0;
758 
759 	if ((n = find_event(picnum, eventname)) == NULL) {
760 		if ((gevp = find_generic_event(picnum, eventname)) != NULL) {
761 			n = find_event(picnum, gevp->event);
762 			ASSERT(n != NULL);
763 
764 			if (nattrs > 0) {
765 				kmem_free(conf, sizeof (ptm_pcbe_config_t));
766 				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
767 			}
768 
769 			if (ptm_ver == PTM_VER_P6)
770 				conf->ptm_ctl |= gevp->umask <<
771 				    CPC_P6_PES_UMASK_SHIFT;
772 		} else {
773 			long tmp;
774 
775 			/*
776 			 * If ddi_strtol() likes this event, use it as a raw
777 			 * event code.
778 			 */
779 			if (ddi_strtol(eventname, NULL, 0, &tmp) != 0) {
780 				kmem_free(conf, sizeof (ptm_pcbe_config_t));
781 				return (CPC_INVALID_EVENT);
782 			}
783 
784 			nt_raw.bits = tmp;
785 
786 			if (ptm_ver == PTM_VER_P5)
787 				nt_raw.bits &= CPC_P5_CESR_ES0_MASK;
788 			else
789 				nt_raw.bits &= CPC_P6_PES_PIC0_MASK;
790 
791 			n = &nt_raw;
792 		}
793 	}
794 
795 	if (ptm_ver == PTM_VER_P5) {
796 		int picshift;
797 		picshift = (picnum == 0) ? 0 : 16;
798 
799 		for (i = 0; i < nattrs; i++) {
800 			/*
801 			 * Value of these attributes is ignored; their presence
802 			 * alone tells us to set the corresponding flag.
803 			 */
804 			if (strncmp(attrs[i].ka_name, "noedge", 7) == 0) {
805 				if (attrs[i].ka_val != 0)
806 					ptm_flags |= P5_NOEDGE;
807 			} else if (strncmp(attrs[i].ka_name, "pc", 3) == 0) {
808 				if (attrs[i].ka_val != 0)
809 					ptm_flags |= P5_PC;
810 			} else {
811 				kmem_free(conf, sizeof (ptm_pcbe_config_t));
812 				return (CPC_INVALID_ATTRIBUTE);
813 			}
814 		}
815 
816 		if (flags & CPC_COUNT_USER)
817 			conf->ptm_ctl |= (1 << (CPC_P5_CESR_USR0 + picshift));
818 		if (flags & CPC_COUNT_SYSTEM)
819 			conf->ptm_ctl |= (1 << (CPC_P5_CESR_OS0 + picshift));
820 		if (ptm_flags & P5_NOEDGE)
821 			conf->ptm_ctl |= (1 << (CPC_P5_CESR_CLK0 + picshift));
822 		if (ptm_flags & P5_PC)
823 			conf->ptm_ctl |= (1 << (CPC_P5_CESR_PC0 + picshift));
824 
825 		ASSERT((n->bits | CPC_P5_CESR_ES0_MASK) ==
826 		    CPC_P5_CESR_ES0_MASK);
827 
828 		conf->ptm_ctl |= (n->bits << picshift);
829 	} else {
830 		for (i = 0; i < nattrs; i++) {
831 			if (strncmp(attrs[i].ka_name, "noedge", 6) == 0) {
832 				if (attrs[i].ka_val != 0)
833 					ptm_flags |= P6_NOEDGE;
834 			} else if (strncmp(attrs[i].ka_name, "pc", 2) == 0) {
835 				if (attrs[i].ka_val != 0)
836 					ptm_flags |= P6_PC;
837 			} else if (strncmp(attrs[i].ka_name, "inv", 3) == 0) {
838 				if (attrs[i].ka_val != 0)
839 					ptm_flags |= P6_INV;
840 			} else if (strncmp(attrs[i].ka_name, "umask", 5) == 0) {
841 				if ((attrs[i].ka_val | CPC_P6_PES_UMASK_MASK) !=
842 				    CPC_P6_PES_UMASK_MASK) {
843 					kmem_free(conf,
844 					    sizeof (ptm_pcbe_config_t));
845 					return (CPC_ATTRIBUTE_OUT_OF_RANGE);
846 				}
847 				conf->ptm_ctl |= (uint8_t)attrs[i].ka_val <<
848 				    CPC_P6_PES_UMASK_SHIFT;
849 			} else if (strncmp(attrs[i].ka_name, "cmask", 5) == 0) {
850 				if ((attrs[i].ka_val | CPC_P6_PES_CMASK_MASK) !=
851 				    CPC_P6_PES_CMASK_MASK) {
852 					kmem_free(conf,
853 					    sizeof (ptm_pcbe_config_t));
854 					return (CPC_ATTRIBUTE_OUT_OF_RANGE);
855 				}
856 				conf->ptm_ctl |= (uint8_t)attrs[i].ka_val <<
857 				    CPC_P6_PES_CMASK_SHIFT;
858 			} else if (strncmp(attrs[i].ka_name, "int", 3) == 0) {
859 				if (attrs[i].ka_val != 0)
860 					ptm_flags |= P6_INT;
861 			} else {
862 				kmem_free(conf, sizeof (ptm_pcbe_config_t));
863 				return (CPC_INVALID_ATTRIBUTE);
864 			}
865 		}
866 
867 		if (flags & CPC_OVF_NOTIFY_EMT)
868 			/*
869 			 * If the user has requested notification of overflows,
870 			 * we automatically program the hardware to generate
871 			 * overflow interrupts.
872 			 */
873 			ptm_flags |= P6_INT;
874 		if (flags & CPC_COUNT_USER)
875 			conf->ptm_ctl |= (1 << CPC_P6_PES_USR);
876 		if (flags & CPC_COUNT_SYSTEM)
877 			conf->ptm_ctl |= (1 << CPC_P6_PES_OS);
878 		if ((ptm_flags & P6_NOEDGE) == 0)
879 			conf->ptm_ctl |= (1 << CPC_P6_PES_E);
880 		if (ptm_flags & P6_PC)
881 			conf->ptm_ctl |= (1 << CPC_P6_PES_PC);
882 		if (ptm_flags & P6_INV)
883 			conf->ptm_ctl |= (1 << CPC_P6_PES_INV);
884 		if (ptm_flags & P6_INT)
885 			conf->ptm_ctl |= (1 << CPC_P6_PES_INT);
886 
887 		ASSERT((n->bits | CPC_P6_PES_PIC0_MASK) ==
888 		    CPC_P6_PES_PIC0_MASK);
889 
890 		conf->ptm_ctl |= n->bits;
891 	}
892 
893 	*data = conf;
894 	return (0);
895 }
896 
897 static void
898 ptm_pcbe_program(void *token)
899 {
900 	ptm_pcbe_config_t	*pic0;
901 	ptm_pcbe_config_t	*pic1;
902 	ptm_pcbe_config_t	*tmp;
903 	ptm_pcbe_config_t	empty = { 1, 0, 0 }; /* assume pic1 to start */
904 
905 	if ((pic0 = kcpc_next_config(token, NULL, NULL)) == NULL)
906 		panic("ptm_pcbe: token %p has no configs", token);
907 
908 	if ((pic1 = kcpc_next_config(token, pic0, NULL)) == NULL)
909 		pic1 = &empty;
910 
911 	if (pic0->ptm_picno != 0) {
912 		empty.ptm_picno = 0;
913 		tmp = pic1;
914 		pic1 = pic0;
915 		pic0 = tmp;
916 	}
917 
918 	ASSERT(pic0->ptm_picno == 0 && pic1->ptm_picno == 1);
919 
920 	if (ptm_rdpmc_avail) {
921 		ulong_t curcr4 = getcr4();
922 		if (kcpc_allow_nonpriv(token))
923 			setcr4(curcr4 | CR4_PCE);
924 		else
925 			setcr4(curcr4 & ~CR4_PCE);
926 	}
927 
928 	if (ptm_ver == PTM_VER_P5) {
929 		wrmsr(P5_CESR, ALL_STOPPED);
930 		wrmsr(P5_CTR0, pic0->ptm_rawpic);
931 		wrmsr(P5_CTR1, pic1->ptm_rawpic);
932 		wrmsr(P5_CESR, pic0->ptm_ctl | pic1->ptm_ctl);
933 		pic0->ptm_rawpic = rdmsr(P5_CTR0);
934 		pic1->ptm_rawpic = rdmsr(P5_CTR1);
935 	} else {
936 		uint64_t	pes;
937 		wrmsr(REG_PERFEVNT0, ALL_STOPPED);
938 		wrmsr(REG_PERFCTR0, pic0->ptm_rawpic);
939 		wrmsr(REG_PERFCTR1, pic1->ptm_rawpic);
940 		pes = pic1->ptm_ctl;
941 		DTRACE_PROBE1(ptm__pes1, uint64_t, pes);
942 		wrmsr(REG_PERFEVNT1, pes);
943 		pes = pic0->ptm_ctl | (1 << CPC_P6_PES_EN);
944 		DTRACE_PROBE1(ptm__pes0, uint64_t, pes);
945 		wrmsr(REG_PERFEVNT0, pes);
946 	}
947 }
948 
949 static void
950 ptm_pcbe_allstop(void)
951 {
952 	if (ptm_ver == PTM_VER_P5)
953 		wrmsr(P5_CESR, ALL_STOPPED);
954 	else {
955 		wrmsr(REG_PERFEVNT0, ALL_STOPPED);
956 		setcr4(getcr4() & ~CR4_PCE);
957 	}
958 }
959 
960 static void
961 ptm_pcbe_sample(void *token)
962 {
963 	ptm_pcbe_config_t	*pic0;
964 	ptm_pcbe_config_t	*pic1;
965 	ptm_pcbe_config_t	*swap;
966 	ptm_pcbe_config_t	empty = { 1, 0, 0 }; /* assume pic1 to start */
967 	uint64_t		tmp;
968 	uint64_t		*pic0_data;
969 	uint64_t		*pic1_data;
970 	uint64_t		*dtmp;
971 	uint64_t		curpic[2];
972 
973 	if ((pic0 = kcpc_next_config(token, NULL, &pic0_data)) == NULL)
974 		panic("ptm_pcbe: token %p has no configs", token);
975 
976 	if ((pic1 = kcpc_next_config(token, pic0, &pic1_data)) == NULL) {
977 		pic1 = &empty;
978 		pic1_data = &tmp;
979 	}
980 
981 	if (pic0->ptm_picno != 0) {
982 		empty.ptm_picno = 0;
983 		swap = pic0;
984 		pic0 = pic1;
985 		pic1 = swap;
986 		dtmp = pic0_data;
987 		pic0_data = pic1_data;
988 		pic1_data = dtmp;
989 	}
990 
991 	ASSERT(pic0->ptm_picno == 0 && pic1->ptm_picno == 1);
992 
993 	if (ptm_ver == PTM_VER_P5) {
994 		curpic[0] = rdmsr(P5_CTR0);
995 		curpic[1] = rdmsr(P5_CTR1);
996 	} else {
997 		curpic[0] = rdmsr(REG_PERFCTR0);
998 		curpic[1] = rdmsr(REG_PERFCTR1);
999 	}
1000 
1001 	DTRACE_PROBE1(ptm__curpic0, uint64_t, curpic[0]);
1002 	DTRACE_PROBE1(ptm__curpic1, uint64_t, curpic[1]);
1003 
1004 	*pic0_data += diff3931(curpic[0], pic0->ptm_rawpic);
1005 	pic0->ptm_rawpic = trunc3931(*pic0_data);
1006 
1007 	*pic1_data += diff3931(curpic[1], pic1->ptm_rawpic);
1008 	pic1->ptm_rawpic = trunc3931(*pic1_data);
1009 }
1010 
1011 static void
1012 ptm_pcbe_free(void *config)
1013 {
1014 	kmem_free(config, sizeof (ptm_pcbe_config_t));
1015 }
1016 
1017 /*
1018  * Virtualizes the 40-bit field of the %pic
1019  * register into a 64-bit software register.
1020  *
1021  * We can retrieve 40 (signed) bits from the counters,
1022  * but we can set only 32 (signed) bits into the counters.
1023  * This makes virtualizing more than 31-bits of registers
1024  * quite tricky.
1025  *
1026  * If bits 39 to 31 are set in the virtualized pic register,
1027  * then we can preset the counter to this value using the fact
1028  * that wrmsr sign extends bit 31.   Though it might look easier
1029  * to only use the bottom 31-bits of the register, we have to allow
1030  * the full 40-bits to be used to perform overflow profiling.
1031  */
1032 
1033 #define	MASK40		UINT64_C(0xffffffffff)
1034 #define	MASK31		UINT64_C(0x7fffffff)
1035 #define	BITS_39_31	UINT64_C(0xff80000000)
1036 
1037 static int64_t
1038 diff3931(uint64_t sample, uint64_t old)
1039 {
1040 	int64_t diff;
1041 
1042 	if ((old & BITS_39_31) == BITS_39_31) {
1043 		diff = (MASK40 & sample) - old;
1044 		if (diff < 0)
1045 			diff += (UINT64_C(1) << 40);
1046 	} else {
1047 		diff = (MASK31 & sample) - old;
1048 		if (diff < 0)
1049 			diff += (UINT64_C(1) << 31);
1050 	}
1051 	return (diff);
1052 }
1053 
1054 static uint64_t
1055 trunc3931(uint64_t value)
1056 {
1057 	if ((value & BITS_39_31) == BITS_39_31)
1058 		return (MASK40 & value);
1059 	return (MASK31 & value);
1060 }
1061 
1062 static struct modlpcbe modlpcbe = {
1063 	&mod_pcbeops,
1064 	"Pentium Performance Counters",
1065 	&ptm_pcbe_ops
1066 };
1067 
1068 static struct modlinkage modl = {
1069 	MODREV_1,
1070 	&modlpcbe,
1071 };
1072 
1073 int
1074 _init(void)
1075 {
1076 	if (ptm_pcbe_init() != 0)
1077 		return (ENOTSUP);
1078 	return (mod_install(&modl));
1079 }
1080 
1081 int
1082 _fini(void)
1083 {
1084 	return (mod_remove(&modl));
1085 }
1086 
1087 int
1088 _info(struct modinfo *mi)
1089 {
1090 	return (mod_info(&modl, mi));
1091 }
1092