xref: /titanic_41/usr/src/uts/intel/pcbe/p123_pcbe.c (revision 8461248208fabd3a8230615f8615e5bf1b4dcdcb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Performance Counter Back-End for Pentiums I, II, and III.
31  */
32 
33 #include <sys/cpuvar.h>
34 #include <sys/param.h>
35 #include <sys/cpc_impl.h>
36 #include <sys/cpc_pcbe.h>
37 #include <sys/modctl.h>
38 #include <sys/inttypes.h>
39 #include <sys/systm.h>
40 #include <sys/cmn_err.h>
41 #include <sys/x86_archext.h>
42 #include <sys/sdt.h>
43 #include <sys/archsystm.h>
44 #include <sys/privregs.h>
45 
46 static int64_t diff3931(uint64_t sample, uint64_t old);
47 static uint64_t trunc3931(uint64_t value);
48 
49 static int ptm_pcbe_init(void);
50 static uint_t ptm_pcbe_ncounters(void);
51 static const char *ptm_pcbe_impl_name(void);
52 static const char *ptm_pcbe_cpuref(void);
53 static char *ptm_pcbe_list_events(uint_t picnum);
54 static char *ptm_pcbe_list_attrs(void);
55 static uint64_t ptm_pcbe_event_coverage(char *event);
56 static int ptm_pcbe_pic_index(char *picname);
57 static uint64_t	ptm_pcbe_overflow_bitmap(void);
58 static int ptm_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
59     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
60     void *token);
61 static void ptm_pcbe_program(void *token);
62 static void ptm_pcbe_allstop(void);
63 static void ptm_pcbe_sample(void *token);
64 static void ptm_pcbe_free(void *config);
65 
66 pcbe_ops_t ptm_pcbe_ops = {
67 	PCBE_VER_1,
68 	0,
69 	ptm_pcbe_ncounters,
70 	ptm_pcbe_impl_name,
71 	ptm_pcbe_cpuref,
72 	ptm_pcbe_list_events,
73 	ptm_pcbe_list_attrs,
74 	ptm_pcbe_event_coverage,
75 	ptm_pcbe_overflow_bitmap,
76 	ptm_pcbe_configure,
77 	ptm_pcbe_program,
78 	ptm_pcbe_allstop,
79 	ptm_pcbe_sample,
80 	ptm_pcbe_free
81 };
82 
83 typedef enum _ptm_ver {
84 	PTM_VER_P5,
85 	PTM_VER_P6
86 } ptm_ver_t;
87 
88 static ptm_ver_t ptm_ver;
89 static const char *ptm_impl_name;
90 static const char *ptm_cpuref;
91 static char *pic_events[2] = { NULL, NULL };
92 
93 /*
94  * Indicates whether the "rdpmc" instruction is available on this processor.
95  */
96 static int ptm_rdpmc_avail = 0;
97 
98 static const uint64_t allstopped = 0;
99 
100 typedef struct _ptm_pcbe_config {
101 	uint8_t		ptm_picno;	/* 0 for pic0 or 1 for pic1 */
102 	uint32_t	ptm_ctl;    /* P6: PerfEventSelect; P5: cesr, shifted */
103 	uint64_t	ptm_rawpic;
104 } ptm_pcbe_config_t;
105 
106 struct nametable {
107 	const uint8_t	bits;
108 	const char	*name;
109 };
110 
111 #define	NT_END 0xFF
112 
113 /*
114  * Basic Pentium events
115  */
116 #define	P5_EVENTS				\
117 	{0x0,	"data_read"},			\
118 	{0x1,	"data_write"},			\
119 	{0x2,	"data_tlb_miss"},		\
120 	{0x3,	"data_read_miss"},		\
121 	{0x4,	"data_write_miss"},		\
122 	{0x5,	"write_hit_to_M_or_E"},		\
123 	{0x6,	"dcache_lines_wrback"},		\
124 	{0x7,	"external_snoops"},		\
125 	{0x8,	"external_dcache_snoop_hits"},	\
126 	{0x9,	"memory_access_in_both_pipes"},	\
127 	{0xa,	"bank_conflicts"},		\
128 	{0xb,	"misaligned_ref"},		\
129 	{0xc,	"code_read"},			\
130 	{0xd,	"code_tlb_miss"},		\
131 	{0xe,	"code_cache_miss"},		\
132 	{0xf,	"any_segreg_loaded"},		\
133 	{0x12,	"branches"},			\
134 	{0x13,	"btb_hits"},			\
135 	{0x14,	"taken_or_btb_hit"},		\
136 	{0x15,	"pipeline_flushes"},		\
137 	{0x16,	"instr_exec"},			\
138 	{0x17,	"instr_exec_V_pipe"},		\
139 	{0x18,	"clks_bus_cycle"},		\
140 	{0x19,	"clks_full_wbufs"},		\
141 	{0x1a,	"pipe_stall_read"},		\
142 	{0x1b,	"stall_on_write_ME"},		\
143 	{0x1c,	"locked_bus_cycle"},		\
144 	{0x1d,	"io_rw_cycles"},		\
145 	{0x1e,	"reads_noncache_mem"},		\
146 	{0x1f,	"pipeline_agi_stalls"},		\
147 	{0x22,	"flops"},			\
148 	{0x23,	"bp_match_dr0"},		\
149 	{0x24,	"bp_match_dr1"},		\
150 	{0x25,	"bp_match_dr2"},		\
151 	{0x26,	"bp_match_dr3"},		\
152 	{0x27,	"hw_intrs"},			\
153 	{0x28,	"data_rw"},			\
154 	{0x29,	"data_rw_miss"}
155 
156 static const struct nametable P5mmx_names0[] = {
157 	P5_EVENTS,
158 	{0x2a,	"bus_ownership_latency"},
159 	{0x2b,	"mmx_instr_upipe"},
160 	{0x2c,	"cache_M_line_sharing"},
161 	{0x2d,	"emms_instr"},
162 	{0x2e,	"bus_util_processor"},
163 	{0x2f,	"sat_mmx_instr"},
164 	{0x30,	"clks_not_HLT"},
165 	{0x31,	"mmx_data_read"},
166 	{0x32,	"clks_fp_stall"},
167 	{0x33,	"d1_starv_fifo_0"},
168 	{0x34,	"mmx_data_write"},
169 	{0x35,	"pipe_flush_wbp"},
170 	{0x36,	"mmx_misalign_data_refs"},
171 	{0x37,	"rets_pred_incorrect"},
172 	{0x38,	"mmx_multiply_unit_interlock"},
173 	{0x39,	"rets"},
174 	{0x3a,	"btb_false_entries"},
175 	{0x3b,	"clocks_stall_full_wb"},
176 	{NT_END, ""}
177 };
178 
179 static const struct nametable P5mmx_names1[] = {
180 	P5_EVENTS,
181 	{0x2a,	"bus_ownership_transfers"},
182 	{0x2b,	"mmx_instr_vpipe"},
183 	{0x2c,	"cache_lint_sharing"},
184 	{0x2d,	"mmx_fp_transitions"},
185 	{0x2e,	"writes_noncache_mem"},
186 	{0x2f,	"sats_performed"},
187 	{0x30,	"clks_dcache_tlb_miss"},
188 	{0x31,	"mmx_data_read_miss"},
189 	{0x32,	"taken_br"},
190 	{0x33,	"d1_starv_fifo_1"},
191 	{0x34,	"mmx_data_write_miss"},
192 	{0x35,	"pipe_flush_wbp_wb"},
193 	{0x36,	"mmx_pipe_stall_data_read"},
194 	{0x37,	"rets_pred"},
195 	{0x38,	"movd_movq_stall"},
196 	{0x39,	"rsb_overflow"},
197 	{0x3a,	"btb_mispred_nt"},
198 	{0x3b,	"mmx_stall_write_ME"},
199 	{NT_END, ""}
200 };
201 
202 static const struct nametable *P5mmx_names[2] = {
203 	P5mmx_names0,
204 	P5mmx_names1
205 };
206 
207 /*
208  * Pentium Pro and Pentium II events
209  */
210 static const struct nametable _P6_names[] = {
211 	/*
212 	 * Data cache unit
213 	 */
214 	{0x43,	"data_mem_refs"},
215 	{0x45,	"dcu_lines_in"},
216 	{0x46,	"dcu_m_lines_in"},
217 	{0x47,	"dcu_m_lines_out"},
218 	{0x48,	"dcu_miss_outstanding"},
219 
220 	/*
221 	 * Instruction fetch unit
222 	 */
223 	{0x80,	"ifu_ifetch"},
224 	{0x81,	"ifu_ifetch_miss"},
225 	{0x85,	"itlb_miss"},
226 	{0x86,	"ifu_mem_stall"},
227 	{0x87,	"ild_stall"},
228 
229 	/*
230 	 * L2 cache
231 	 */
232 	{0x28,	"l2_ifetch"},
233 	{0x29,	"l2_ld"},
234 	{0x2a,	"l2_st"},
235 	{0x24,	"l2_lines_in"},
236 	{0x26,	"l2_lines_out"},
237 	{0x25,	"l2_m_lines_inm"},
238 	{0x27,	"l2_m_lines_outm"},
239 	{0x2e,	"l2_rqsts"},
240 	{0x21,	"l2_ads"},
241 	{0x22,	"l2_dbus_busy"},
242 	{0x23,	"l2_dbus_busy_rd"},
243 
244 	/*
245 	 * External bus logic
246 	 */
247 	{0x62,	"bus_drdy_clocks"},
248 	{0x63,	"bus_lock_clocks"},
249 	{0x60,	"bus_req_outstanding"},
250 	{0x65,	"bus_tran_brd"},
251 	{0x66,	"bus_tran_rfo"},
252 	{0x67,	"bus_trans_wb"},
253 	{0x68,	"bus_tran_ifetch"},
254 	{0x69,	"bus_tran_inval"},
255 	{0x6a,	"bus_tran_pwr"},
256 	{0x6b,	"bus_trans_p"},
257 	{0x6c,	"bus_trans_io"},
258 	{0x6d,	"bus_tran_def"},
259 	{0x6e,	"bus_tran_burst"},
260 	{0x70,	"bus_tran_any"},
261 	{0x6f,	"bus_tran_mem"},
262 	{0x64,	"bus_data_rcv"},
263 	{0x61,	"bus_bnr_drv"},
264 	{0x7a,	"bus_hit_drv"},
265 	{0x7b,	"bus_hitm_drv"},
266 	{0x7e,	"bus_snoop_stall"},
267 
268 	/*
269 	 * Floating point unit
270 	 */
271 	{0xc1,	"flops"},		/* 0 only */
272 	{0x10,	"fp_comp_ops_exe"},	/* 0 only */
273 	{0x11,	"fp_assist"},		/* 1 only */
274 	{0x12,	"mul"},			/* 1 only */
275 	{0x13,	"div"},			/* 1 only */
276 	{0x14,	"cycles_div_busy"},	/* 0 only */
277 
278 	/*
279 	 * Memory ordering
280 	 */
281 	{0x3,	"ld_blocks"},
282 	{0x4,	"sb_drains"},
283 	{0x5,	"misalign_mem_ref"},
284 
285 	/*
286 	 * Instruction decoding and retirement
287 	 */
288 	{0xc0,	"inst_retired"},
289 	{0xc2,	"uops_retired"},
290 	{0xd0,	"inst_decoder"},
291 
292 	/*
293 	 * Interrupts
294 	 */
295 	{0xc8,	"hw_int_rx"},
296 	{0xc6,	"cycles_int_masked"},
297 	{0xc7,	"cycles_int_pending_and_masked"},
298 
299 	/*
300 	 * Branches
301 	 */
302 	{0xc4,	"br_inst_retired"},
303 	{0xc5,	"br_miss_pred_retired"},
304 	{0xc9,	"br_taken_retired"},
305 	{0xca,	"br_miss_pred_taken_ret"},
306 	{0xe0,	"br_inst_decoded"},
307 	{0xe2,	"btb_misses"},
308 	{0xe4,	"br_bogus"},
309 	{0xe6,	"baclears"},
310 
311 	/*
312 	 * Stalls
313 	 */
314 	{0xa2,	"resource_stalls"},
315 	{0xd2,	"partial_rat_stalls"},
316 
317 	/*
318 	 * Segment register loads
319 	 */
320 	{0x6,	"segment_reg_loads"},
321 
322 	/*
323 	 * Clocks
324 	 */
325 	{0x79,	"cpu_clk_unhalted"},
326 
327 	/*
328 	 * MMX
329 	 */
330 	{0xb0,	"mmx_instr_exec"},
331 	{0xb1,	"mmx_sat_instr_exec"},
332 	{0xb2,	"mmx_uops_exec"},
333 	{0xb3,	"mmx_instr_type_exec"},
334 	{0xcc,	"fp_mmx_trans"},
335 	{0xcd,	"mmx_assists"},
336 	{0xce,	"mmx_instr_ret"},
337 	{0xd4,	"seg_rename_stalls"},
338 	{0xd5,	"seg_reg_renames"},
339 	{0xd6,	"ret_seg_renames"},
340 
341 	{NT_END, ""}
342 };
343 
344 static const struct nametable *P6_names[2] = {
345 	_P6_names,
346 	_P6_names
347 };
348 
349 static const struct nametable **events;
350 
351 #define	BITS(v, u, l)	\
352 	(((v) >> (l)) & ((1 << (1 + (u) - (l))) - 1))
353 
354 /*
355  * "Well known" bit fields in the Pentium CES register
356  * The interfaces in libcpc should make these #defines uninteresting.
357  */
358 #define	CPC_P5_CESR_ES0_SHIFT	0
359 #define	CPC_P5_CESR_ES0_MASK	0x3f
360 #define	CPC_P5_CESR_ES1_SHIFT	16
361 #define	CPC_P5_CESR_ES1_MASK	0x3f
362 
363 #define	CPC_P5_CESR_OS0		6
364 #define	CPC_P5_CESR_USR0	7
365 #define	CPC_P5_CESR_CLK0	8
366 #define	CPC_P5_CESR_PC0		9
367 #define	CPC_P5_CESR_OS1		(CPC_P5_CESR_OS0 + 16)
368 #define	CPC_P5_CESR_USR1	(CPC_P5_CESR_USR0 + 16)
369 #define	CPC_P5_CESR_CLK1	(CPC_P5_CESR_CLK0 + 16)
370 #define	CPC_P5_CESR_PC1		(CPC_P5_CESR_PC0 + 16)
371 
372 /*
373  * "Well known" bit fields in the Pentium Pro PerfEvtSel registers
374  * The interfaces in libcpc should make these #defines uninteresting.
375  */
376 #define	CPC_P6_PES_INV		23
377 #define	CPC_P6_PES_EN		22
378 #define	CPC_P6_PES_INT		20
379 #define	CPC_P6_PES_PC		19
380 #define	CPC_P6_PES_E		18
381 #define	CPC_P6_PES_OS		17
382 #define	CPC_P6_PES_USR		16
383 
384 #define	CPC_P6_PES_UMASK_SHIFT	8
385 #define	CPC_P6_PES_UMASK_MASK	(0xffu)
386 
387 #define	CPC_P6_PES_CMASK_SHIFT	24
388 #define	CPC_P6_PES_CMASK_MASK	(0xffu)
389 
390 #define	CPC_P6_PES_PIC0_MASK	(0xffu)
391 #define	CPC_P6_PES_PIC1_MASK	(0xffu)
392 
393 #define	P6_PES_EN	(UINT32_C(1) << CPC_P6_PES_EN)
394 #define	P6_PES_INT	(UINT32_C(1) << CPC_P6_PES_INT)
395 #define	P6_PES_OS	(UINT32_C(1) << CPC_P6_PES_OS)
396 
397 /*
398  * Pentium 5 attributes
399  */
400 #define	P5_NOEDGE	0x1	/* "noedge"	- no edge detection */
401 #define	P5_PC		0x2	/* "pc"		- pin control */
402 
403 /*
404  * Pentium 6 attributes
405  */
406 #define	P6_NOEDGE	0x1
407 #define	P6_PC		0x2
408 #define	P6_INV		0x4	/* "inv" - count inverted transitions */
409 #define	P6_INT		0x8	/* "int" - interrupt on overflow */
410 
411 /*
412  * CPU reference strings
413  */
414 
415 #define	P5_CPUREF	"See Appendix A.4 of the \"IA-32 Intel Architecture "  \
416 			"Software Developer's Manual Volume 3: System "	       \
417 			"Programming Guide,\" Order # 245472-012, 2003"
418 
419 #define	P6_CPUREF	"See Appendix A.3 of the \"IA-32 Intel Architecture "  \
420 			"Software Developer's Manual Volume 3: System "	       \
421 			"Programming Guide,\" Order # 245472-012, 2003"
422 
423 static int
424 ptm_pcbe_init(void)
425 {
426 	const struct nametable	*n;
427 	int			i;
428 	size_t			size;
429 
430 	if (x86_feature & X86_MMX)
431 		ptm_rdpmc_avail = 1;
432 
433 	/*
434 	 * Discover type of CPU and set events pointer appropriately.
435 	 *
436 	 * Map family and model into the performance
437 	 * counter architectures we currently understand.
438 	 *
439 	 * See application note AP485 (from developer.intel.com)
440 	 * for further explanation.
441 	 */
442 	if (cpuid_getvendor(CPU) != X86_VENDOR_Intel)
443 		return (-1);
444 	switch (cpuid_getfamily(CPU)) {
445 	case 5:		/* Pentium and Pentium with MMX */
446 		events = P5mmx_names;
447 		ptm_ver = PTM_VER_P5;
448 		ptm_cpuref = P5_CPUREF;
449 		if (cpuid_getmodel(CPU) < 4)
450 			ptm_impl_name = "Pentium";
451 		else
452 			ptm_impl_name = "Pentium with MMX";
453 		break;
454 	case 6:		/* Pentium Pro and Pentium II and III */
455 		events = P6_names;
456 		ptm_ver = PTM_VER_P6;
457 		ptm_cpuref = P6_CPUREF;
458 		ptm_pcbe_ops.pcbe_caps = CPC_CAP_OVERFLOW_INTERRUPT;
459 		if (x86_feature & X86_MMX)
460 			ptm_impl_name = "Pentium Pro with MMX, Pentium II";
461 		else
462 			ptm_impl_name = "Pentium Pro, Pentium II";
463 		break;
464 	default:
465 		return (-1);
466 	}
467 
468 	/*
469 	 * Initialize the list of events for each PIC.
470 	 * Do two passes: one to compute the size necessary and another
471 	 * to copy the strings. Need room for event, comma, and NULL terminator.
472 	 */
473 	for (i = 0; i < 2; i++) {
474 		size = 0;
475 		for (n = events[i]; n->bits != NT_END; n++)
476 			size += strlen(n->name) + 1;
477 		pic_events[i] = kmem_alloc(size + 1, KM_SLEEP);
478 		*pic_events[i] = '\0';
479 		for (n = events[i]; n->bits != NT_END; n++) {
480 			(void) strcat(pic_events[i], n->name);
481 			(void) strcat(pic_events[i], ",");
482 		}
483 		/*
484 		 * Remove trailing comma.
485 		 */
486 		pic_events[i][size - 1] = '\0';
487 	}
488 
489 	return (0);
490 }
491 
492 static uint_t
493 ptm_pcbe_ncounters(void)
494 {
495 	return (2);
496 }
497 
498 static const char *
499 ptm_pcbe_impl_name(void)
500 {
501 	return (ptm_impl_name);
502 }
503 
504 static const char *
505 ptm_pcbe_cpuref(void)
506 {
507 	return (ptm_cpuref);
508 }
509 
510 static char *
511 ptm_pcbe_list_events(uint_t picnum)
512 {
513 	ASSERT(picnum >= 0 && picnum < cpc_ncounters);
514 
515 	if (pic_events[0] == NULL) {
516 		ASSERT(pic_events[1] == NULL);
517 	}
518 
519 	return (pic_events[picnum]);
520 }
521 
522 static char *
523 ptm_pcbe_list_attrs(void)
524 {
525 	if (ptm_ver == PTM_VER_P5)
526 		return ("noedge,pc");
527 	else
528 		return ("noedge,pc,inv,int,umask,cmask");
529 }
530 
531 static const struct nametable *
532 find_event(int regno, char *name)
533 {
534 	const struct nametable *n;
535 
536 	n = events[regno];
537 
538 	for (; n->bits != NT_END; n++)
539 		if (strcmp(name, n->name) == 0)
540 			return (n);
541 
542 	return (NULL);
543 }
544 
545 static uint64_t
546 ptm_pcbe_event_coverage(char *event)
547 {
548 	uint64_t bitmap = 0;
549 
550 	if (find_event(0, event) != NULL)
551 		bitmap = 0x1;
552 	if (find_event(1, event) != NULL)
553 		bitmap |= 0x2;
554 
555 	return (bitmap);
556 }
557 
558 static uint64_t
559 ptm_pcbe_overflow_bitmap(void)
560 {
561 	uint64_t	ret = 0;
562 	uint64_t	pes[2];
563 
564 	/*
565 	 * P5 is not capable of generating interrupts.
566 	 */
567 	ASSERT(ptm_ver == PTM_VER_P6);
568 
569 	/*
570 	 * CPC could have caused an interrupt provided that
571 	 *
572 	 * 1) Counters are enabled
573 	 * 2) Either counter has requested an interrupt
574 	 */
575 
576 	(void) rdmsr(REG_PERFEVNT0, &pes[0]);
577 	if (((uint32_t)pes[0] & P6_PES_EN) != P6_PES_EN)
578 		return (0);
579 
580 	/*
581 	 * If a particular counter requested an interrupt, assume it caused
582 	 * this interrupt. There is no way to determine which counter overflowed
583 	 * on this hardware other than by using unreliable heuristics.
584 	 */
585 
586 	(void) rdmsr(REG_PERFEVNT1, &pes[1]);
587 	if ((uint32_t)pes[0] & P6_PES_INT)
588 		ret |= 0x1;
589 	if ((uint32_t)pes[1] & P6_PES_INT)
590 		ret |= 0x2;
591 
592 	return (ret);
593 }
594 
595 /*ARGSUSED*/
596 static int
597 ptm_pcbe_configure(uint_t picnum, char *eventname, uint64_t preset,
598     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
599     void *token)
600 {
601 	ptm_pcbe_config_t	*conf;
602 	const struct nametable	*n;
603 	int			i;
604 	int			ptm_flags = 0;
605 
606 	/*
607 	 * If we've been handed an existing configuration, we need only preset
608 	 * the counter value.
609 	 */
610 	if (*data != NULL) {
611 		conf = *data;
612 		conf->ptm_rawpic = trunc3931(preset);
613 		return (0);
614 	}
615 
616 	if (picnum != 0 && picnum != 1)
617 		return (CPC_INVALID_PICNUM);
618 
619 	if ((n = find_event(picnum, eventname)) == NULL)
620 		return (CPC_INVALID_EVENT);
621 
622 	conf = kmem_alloc(sizeof (ptm_pcbe_config_t), KM_SLEEP);
623 
624 	conf->ptm_picno = picnum;
625 	conf->ptm_rawpic = trunc3931(preset);
626 	conf->ptm_ctl = 0;
627 
628 	if (ptm_ver == PTM_VER_P5) {
629 		int picshift;
630 		picshift = (picnum == 0) ? 0 : 16;
631 
632 		for (i = 0; i < nattrs; i++) {
633 			/*
634 			 * Value of these attributes is ignored; their presence
635 			 * alone tells us to set the corresponding flag.
636 			 */
637 			if (strncmp(attrs[i].ka_name, "noedge", 7) == 0) {
638 				if (attrs[i].ka_val != 0)
639 					ptm_flags |= P5_NOEDGE;
640 			} else if (strncmp(attrs[i].ka_name, "pc", 3) == 0) {
641 				if (attrs[i].ka_val != 0)
642 					ptm_flags |= P5_PC;
643 			} else {
644 				kmem_free(conf, sizeof (ptm_pcbe_config_t));
645 				return (CPC_INVALID_ATTRIBUTE);
646 			}
647 		}
648 
649 		if (flags & CPC_COUNT_USER)
650 			conf->ptm_ctl |= (1 << (CPC_P5_CESR_USR0 + picshift));
651 		if (flags & CPC_COUNT_SYSTEM)
652 			conf->ptm_ctl |= (1 << (CPC_P5_CESR_OS0 + picshift));
653 		if (ptm_flags & P5_NOEDGE)
654 			conf->ptm_ctl |= (1 << (CPC_P5_CESR_CLK0 + picshift));
655 		if (ptm_flags & P5_PC)
656 			conf->ptm_ctl |= (1 << (CPC_P5_CESR_PC0 + picshift));
657 
658 		ASSERT((n->bits | CPC_P5_CESR_ES0_MASK) ==
659 		    CPC_P5_CESR_ES0_MASK);
660 
661 		conf->ptm_ctl |= (n->bits << picshift);
662 	} else {
663 		for (i = 0; i < nattrs; i++) {
664 			if (strncmp(attrs[i].ka_name, "noedge", 6) == 0) {
665 				if (attrs[i].ka_val != 0)
666 					ptm_flags |= P6_NOEDGE;
667 			} else if (strncmp(attrs[i].ka_name, "pc", 2) == 0) {
668 				if (attrs[i].ka_val != 0)
669 					ptm_flags |= P6_PC;
670 			} else if (strncmp(attrs[i].ka_name, "inv", 3) == 0) {
671 				if (attrs[i].ka_val != 0)
672 					ptm_flags |= P6_INV;
673 			} else if (strncmp(attrs[i].ka_name, "umask", 5) == 0) {
674 				if ((attrs[i].ka_val | CPC_P6_PES_UMASK_MASK) !=
675 					CPC_P6_PES_UMASK_MASK) {
676 					kmem_free(conf,
677 					    sizeof (ptm_pcbe_config_t));
678 					return (CPC_ATTRIBUTE_OUT_OF_RANGE);
679 				}
680 				conf->ptm_ctl |= (uint8_t)attrs[i].ka_val <<
681 				    CPC_P6_PES_UMASK_SHIFT;
682 			} else if (strncmp(attrs[i].ka_name, "cmask", 5) == 0) {
683 				if ((attrs[i].ka_val | CPC_P6_PES_CMASK_MASK) !=
684 					CPC_P6_PES_CMASK_MASK) {
685 					kmem_free(conf,
686 					    sizeof (ptm_pcbe_config_t));
687 					return (CPC_ATTRIBUTE_OUT_OF_RANGE);
688 				}
689 				conf->ptm_ctl |= (uint8_t)attrs[i].ka_val <<
690 				    CPC_P6_PES_CMASK_SHIFT;
691 			} else if (strncmp(attrs[i].ka_name, "int", 3) == 0) {
692 				if (attrs[i].ka_val != 0)
693 					ptm_flags |= P6_INT;
694 			} else {
695 				kmem_free(conf, sizeof (ptm_pcbe_config_t));
696 				return (CPC_INVALID_ATTRIBUTE);
697 			}
698 		}
699 
700 		if (flags & CPC_OVF_NOTIFY_EMT)
701 			/*
702 			 * If the user has requested notification of overflows,
703 			 * we automatically program the hardware to generate
704 			 * overflow interrupts.
705 			 */
706 			ptm_flags |= P6_INT;
707 		if (flags & CPC_COUNT_USER)
708 			conf->ptm_ctl |= (1 << CPC_P6_PES_USR);
709 		if (flags & CPC_COUNT_SYSTEM)
710 			conf->ptm_ctl |= (1 << CPC_P6_PES_OS);
711 		if ((ptm_flags & P6_NOEDGE) == 0)
712 			conf->ptm_ctl |= (1 << CPC_P6_PES_E);
713 		if (ptm_flags & P6_PC)
714 			conf->ptm_ctl |= (1 << CPC_P6_PES_PC);
715 		if (ptm_flags & P6_INV)
716 			conf->ptm_ctl |= (1 << CPC_P6_PES_INV);
717 		if (ptm_flags & P6_INT)
718 			conf->ptm_ctl |= (1 << CPC_P6_PES_INT);
719 
720 		ASSERT((n->bits | CPC_P6_PES_PIC0_MASK) ==
721 		    CPC_P6_PES_PIC0_MASK);
722 
723 		conf->ptm_ctl |= n->bits;
724 	}
725 
726 	*data = conf;
727 	return (0);
728 }
729 
730 static void
731 ptm_pcbe_program(void *token)
732 {
733 	ptm_pcbe_config_t	*pic0;
734 	ptm_pcbe_config_t	*pic1;
735 	ptm_pcbe_config_t	*tmp;
736 	ptm_pcbe_config_t	empty = { 1, 0, 0 }; /* assume pic1 to start */
737 
738 	if ((pic0 = kcpc_next_config(token, NULL, NULL)) == NULL)
739 		panic("ptm_pcbe: token %p has no configs", token);
740 
741 	if ((pic1 = kcpc_next_config(token, pic0, NULL)) == NULL)
742 		pic1 = &empty;
743 
744 	if (pic0->ptm_picno != 0) {
745 		empty.ptm_picno = 0;
746 		tmp = pic1;
747 		pic1 = pic0;
748 		pic0 = tmp;
749 	}
750 
751 	ASSERT(pic0->ptm_picno == 0 && pic1->ptm_picno == 1);
752 
753 	if (ptm_rdpmc_avail) {
754 		uint32_t curcr4 = getcr4();
755 		if (kcpc_allow_nonpriv(token))
756 			setcr4(curcr4 | CR4_PCE);
757 		else
758 			setcr4(curcr4 & ~CR4_PCE);
759 	}
760 
761 	if (ptm_ver == PTM_VER_P5) {
762 		uint64_t	cesr = 0;
763 		wrmsr(P5_CESR, &allstopped);
764 		wrmsr(P5_CTR0, &pic0->ptm_rawpic);
765 		wrmsr(P5_CTR1, &pic1->ptm_rawpic);
766 		cesr = pic0->ptm_ctl | pic1->ptm_ctl;
767 		wrmsr(P5_CESR, &cesr);
768 		(void) rdmsr(P5_CTR0, &pic0->ptm_rawpic);
769 		(void) rdmsr(P5_CTR1, &pic1->ptm_rawpic);
770 	} else {
771 		uint64_t	pes;
772 		wrmsr(REG_PERFEVNT0, &allstopped);
773 		wrmsr(REG_PERFCTR0, &pic0->ptm_rawpic);
774 		wrmsr(REG_PERFCTR1, &pic1->ptm_rawpic);
775 		pes = pic1->ptm_ctl;
776 		DTRACE_PROBE1(ptm__pes1, uint64_t, pes);
777 		wrmsr(REG_PERFEVNT1, &pes);
778 		pes = pic0->ptm_ctl | (1 << CPC_P6_PES_EN);
779 		DTRACE_PROBE1(ptm__pes0, uint64_t, pes);
780 		wrmsr(REG_PERFEVNT0, &pes);
781 	}
782 }
783 
784 static void
785 ptm_pcbe_allstop(void)
786 {
787 	if (ptm_ver == PTM_VER_P5)
788 		wrmsr(P5_CESR, &allstopped);
789 	else {
790 		wrmsr(REG_PERFEVNT0, &allstopped);
791 		setcr4((uint32_t)getcr4() & ~CR4_PCE);
792 	}
793 }
794 
795 static void
796 ptm_pcbe_sample(void *token)
797 {
798 	ptm_pcbe_config_t	*pic0;
799 	ptm_pcbe_config_t	*pic1;
800 	ptm_pcbe_config_t	*swap;
801 	ptm_pcbe_config_t	empty = { 1, 0, 0 }; /* assume pic1 to start */
802 	uint64_t		tmp;
803 	uint64_t		*pic0_data;
804 	uint64_t		*pic1_data;
805 	uint64_t		*dtmp;
806 	uint64_t		curpic[2];
807 
808 	if ((pic0 = kcpc_next_config(token, NULL, &pic0_data)) == NULL)
809 		panic("ptm_pcbe: token %p has no configs", token);
810 
811 	if ((pic1 = kcpc_next_config(token, pic0, &pic1_data)) == NULL) {
812 		pic1 = &empty;
813 		pic1_data = &tmp;
814 	}
815 
816 	if (pic0->ptm_picno != 0) {
817 		empty.ptm_picno = 0;
818 		swap = pic0;
819 		pic0 = pic1;
820 		pic1 = swap;
821 		dtmp = pic0_data;
822 		pic0_data = pic1_data;
823 		pic1_data = dtmp;
824 	}
825 
826 	ASSERT(pic0->ptm_picno == 0 && pic1->ptm_picno == 1);
827 
828 	if (ptm_ver == PTM_VER_P5) {
829 		(void) rdmsr(P5_CTR0, &curpic[0]);
830 		(void) rdmsr(P5_CTR1, &curpic[1]);
831 	} else {
832 		(void) rdmsr(REG_PERFCTR0, &curpic[0]);
833 		(void) rdmsr(REG_PERFCTR1, &curpic[1]);
834 	}
835 
836 	DTRACE_PROBE1(ptm__curpic0, uint64_t, curpic[0]);
837 	DTRACE_PROBE1(ptm__curpic1, uint64_t, curpic[1]);
838 
839 	*pic0_data += diff3931(curpic[0], pic0->ptm_rawpic);
840 	pic0->ptm_rawpic = trunc3931(*pic0_data);
841 
842 	*pic1_data += diff3931(curpic[1], pic1->ptm_rawpic);
843 	pic1->ptm_rawpic = trunc3931(*pic1_data);
844 }
845 
846 static void
847 ptm_pcbe_free(void *config)
848 {
849 	kmem_free(config, sizeof (ptm_pcbe_config_t));
850 }
851 
852 /*
853  * Virtualizes the 40-bit field of the %pic
854  * register into a 64-bit software register.
855  *
856  * We can retrieve 40 (signed) bits from the counters,
857  * but we can set only 32 (signed) bits into the counters.
858  * This makes virtualizing more than 31-bits of registers
859  * quite tricky.
860  *
861  * If bits 39 to 31 are set in the virtualized pic register,
862  * then we can preset the counter to this value using the fact
863  * that wrmsr sign extends bit 31.   Though it might look easier
864  * to only use the bottom 31-bits of the register, we have to allow
865  * the full 40-bits to be used to perform overflow profiling.
866  */
867 
868 #define	MASK40		UINT64_C(0xffffffffff)
869 #define	MASK31		UINT64_C(0x7fffffff)
870 #define	BITS_39_31	UINT64_C(0xff80000000)
871 
872 static int64_t
873 diff3931(uint64_t sample, uint64_t old)
874 {
875 	int64_t diff;
876 
877 	if ((old & BITS_39_31) == BITS_39_31) {
878 		diff = (MASK40 & sample) - old;
879 		if (diff < 0)
880 			diff += (UINT64_C(1) << 40);
881 	} else {
882 		diff = (MASK31 & sample) - old;
883 		if (diff < 0)
884 			diff += (UINT64_C(1) << 31);
885 	}
886 	return (diff);
887 }
888 
889 static uint64_t
890 trunc3931(uint64_t value)
891 {
892 	if ((value & BITS_39_31) == BITS_39_31)
893 		return (MASK40 & value);
894 	return (MASK31 & value);
895 }
896 
897 static struct modlpcbe modlpcbe = {
898 	&mod_pcbeops,
899 	"Pentium Performance Counters v%I%",
900 	&ptm_pcbe_ops
901 };
902 
903 static struct modlinkage modl = {
904 	MODREV_1,
905 	&modlpcbe,
906 };
907 
908 int
909 _init(void)
910 {
911 	if (ptm_pcbe_init() != 0)
912 		return (ENOTSUP);
913 	return (mod_install(&modl));
914 }
915 
916 int
917 _fini(void)
918 {
919 	return (mod_remove(&modl));
920 }
921 
922 int
923 _info(struct modinfo *mi)
924 {
925 	return (mod_info(&modl, mi));
926 }
927