xref: /titanic_50/usr/src/uts/intel/pcbe/p4_pcbe.c (revision 726fad2a65f16c200a03969c29cb5c86c2d427db)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * This file contains preset event names from the Performance Application
28  * Programming Interface v3.5 which included the following notice:
29  *
30  *                             Copyright (c) 2005,6
31  *                           Innovative Computing Labs
32  *                         Computer Science Department,
33  *                            University of Tennessee,
34  *                                 Knoxville, TN.
35  *                              All Rights Reserved.
36  *
37  *
38  * Redistribution and use in source and binary forms, with or without
39  * modification, are permitted provided that the following conditions are met:
40  *
41  *    * Redistributions of source code must retain the above copyright notice,
42  *      this list of conditions and the following disclaimer.
43  *    * Redistributions in binary form must reproduce the above copyright
44  *      notice, this list of conditions and the following disclaimer in the
45  *      documentation and/or other materials provided with the distribution.
46  *    * Neither the name of the University of Tennessee nor the names of its
47  *      contributors may be used to endorse or promote products derived from
48  *      this software without specific prior written permission.
49  *
50  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
51  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
54  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
55  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
56  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
57  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
58  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
59  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
60  * POSSIBILITY OF SUCH DAMAGE.
61  *
62  *
63  * This open source software license conforms to the BSD License template.
64  */
65 
66 /*
67  * Performance Counter Back-End for Pentium 4.
68  */
69 
70 #include <sys/cpuvar.h>
71 #include <sys/param.h>
72 #include <sys/cpc_impl.h>
73 #include <sys/cpc_pcbe.h>
74 #include <sys/inttypes.h>
75 #include <sys/errno.h>
76 #include <sys/systm.h>
77 #include <sys/archsystm.h>
78 #include <sys/x86_archext.h>
79 #include <sys/modctl.h>
80 #include <sys/sdt.h>
81 #include <sys/cred.h>
82 #include <sys/policy.h>
83 #include <sys/privregs.h>
84 
85 static int p4_pcbe_init(void);
86 static uint_t p4_pcbe_ncounters(void);
87 static const char *p4_pcbe_impl_name(void);
88 static const char *p4_pcbe_cpuref(void);
89 static char *p4_pcbe_list_events(uint_t picnum);
90 static char *p4_pcbe_list_attrs(void);
91 static uint64_t p4_pcbe_event_coverage(char *event);
92 static uint64_t p4_pcbe_overflow_bitmap(void);
93 static int p4_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
94     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
95     void *token);
96 static void p4_pcbe_program(void *token);
97 static void p4_pcbe_allstop(void);
98 static void p4_pcbe_sample(void *token);
99 static void p4_pcbe_free(void *config);
100 
101 extern int cpuid_get_clogid(cpu_t *);
102 
103 static pcbe_ops_t p4_pcbe_ops = {
104 	PCBE_VER_1,
105 	CPC_CAP_OVERFLOW_INTERRUPT | CPC_CAP_OVERFLOW_PRECISE,
106 	p4_pcbe_ncounters,
107 	p4_pcbe_impl_name,
108 	p4_pcbe_cpuref,
109 	p4_pcbe_list_events,
110 	p4_pcbe_list_attrs,
111 	p4_pcbe_event_coverage,
112 	p4_pcbe_overflow_bitmap,
113 	p4_pcbe_configure,
114 	p4_pcbe_program,
115 	p4_pcbe_allstop,
116 	p4_pcbe_sample,
117 	p4_pcbe_free
118 };
119 
120 /*
121  * P4 Configuration Flags.
122  */
123 #define	P4_THIS_USR	0x1 /* HTT: Measure usr events on this logical CPU */
124 #define	P4_THIS_SYS	0x2 /* HTT: Measure os events on this logical CPU */
125 #define	P4_SIBLING_USR	0x4 /* HTT: Measure os events on other logical CPU */
126 #define	P4_SIBLING_SYS	0x8 /* HTT: Measure usr events on other logical CPU */
127 #define	P4_PMI		0x10 /* HTT: Set PMI bit for local logical CPU */
128 
129 typedef struct _p4_pcbe_config {
130 	uint8_t		p4_flags;
131 	uint8_t		p4_picno;	/* From 0 to 18 */
132 	uint8_t		p4_escr_ndx;	/* Which ESCR to use */
133 	uint32_t	p4_escr;	/* Value to program in selected ESCR */
134 	uint32_t	p4_cccr;	/* Value to program in counter's CCCR */
135 	uint64_t	p4_rawpic;
136 } p4_pcbe_config_t;
137 
138 typedef uint32_t cntr_map_t;
139 
140 typedef struct _p4_escr {
141 	int		pe_num;
142 	uint32_t	pe_addr;
143 	uint32_t	pe_map; /* bitmap of counters; bit 1 means ctr 0 */
144 } p4_escr_t;
145 
146 #define	MASK40			UINT64_C(0xffffffffff)
147 
148 /*
149  * CCCR field definitions.
150  *
151  * Note that the Intel Developer's Manual states that the reserved field at
152  * bit location 16 and 17 must be set to 11. (??)
153  */
154 #define	CCCR_ENABLE_SHIFT	12
155 #define	CCCR_ESCR_SEL_SHIFT	13
156 #define	CCCR_ACTV_THR_SHIFT	16
157 #define	CCCR_COMPARE_SHIFT	18
158 #define	CCCR_COMPLEMENT_SHIFT	19
159 #define	CCCR_THRESHOLD_SHIFT	20
160 #define	CCCR_EDGE_SHIFT		24
161 #define	CCCR_OVF_PMI_SHIFT	26
162 #define	CCCR_OVF_PMI_T0_SHIFT	26
163 #define	CCCR_OVF_PMI_T1_SHIFT	27
164 #define	CCCR_OVF_SHIFT		31
165 #define	CCCR_ACTV_THR_MASK	0x3
166 #define	CCCR_THRESHOLD_MAX	0xF
167 #define	CCCR_ENABLE		(1U << CCCR_ENABLE_SHIFT)
168 #define	CCCR_COMPARE		(1U << CCCR_COMPARE_SHIFT)
169 #define	CCCR_COMPLEMENT		(1U << CCCR_COMPLEMENT_SHIFT)
170 #define	CCCR_EDGE		(1U << CCCR_EDGE_SHIFT)
171 #define	CCCR_OVF_PMI		(1U << CCCR_OVF_PMI_SHIFT)
172 #define	CCCR_OVF_PMI_T0		(1U << CCCR_OVF_PMI_T0_SHIFT)
173 #define	CCCR_OVF_PMI_T1		(1U << CCCR_OVF_PMI_T1_SHIFT)
174 #define	CCCR_INIT		CCCR_ENABLE
175 #define	CCCR_OVF		(1U << CCCR_OVF_SHIFT)
176 
177 #define	ESCR_EVSEL_SHIFT	25
178 #define	ESCR_EVMASK_SHIFT	9
179 #define	ESCR_TAG_VALUE_SHIFT	5
180 #define	ESCR_TAG_VALUE_MAX	0xF
181 #define	ESCR_TAG_ENABLE_SHIFT	4
182 #define	ESCR_USR_SHIFT		2
183 #define	ESCR_OS_SHIFT		3
184 #define	ESCR_USR		(1U << ESCR_USR_SHIFT)
185 #define	ESCR_OS			(1U << ESCR_OS_SHIFT)
186 #define	ESCR_TAG_ENABLE		(1U << ESCR_TAG_ENABLE_SHIFT)
187 
188 /*
189  * HyperThreaded ESCR fields.
190  */
191 #define	ESCR_T0_OS_SHIFT	3
192 #define	ESCR_T0_USR_SHIFT	2
193 #define	ESCR_T1_OS_SHIFT	1
194 #define	ESCR_T1_USR_SHIFT	0
195 #define	ESCR_T0_OS		(1U << ESCR_T0_OS_SHIFT)
196 #define	ESCR_T0_USR		(1U << ESCR_T0_USR_SHIFT)
197 #define	ESCR_T1_OS		(1U << ESCR_T1_OS_SHIFT)
198 #define	ESCR_T1_USR		(1U << ESCR_T1_USR_SHIFT)
199 
200 /*
201  * ESCRs are grouped by counter; each group of ESCRs is associated with a
202  * distinct group of counters. Use these macros to fill in the table below.
203  */
204 #define	BPU0_map	(0x1 | 0x2)		/* Counters 0 and 1 */
205 #define	BPU2_map	(0x4 | 0x8)		/* Counters 2 and 3 */
206 #define	MS0_map		(0x10 | 0x20)		/* Counters 4 and 5 */
207 #define	MS2_map		(0x40 | 0x80)		/* Counters 6 and 7 */
208 #define	FLAME0_map	(0x100 | 0x200)		/* Counters 8 and 9 */
209 #define	FLAME2_map	(0x400 | 0x800)		/* Counters 10 and 11 */
210 #define	IQ0_map		(0x1000 | 0x2000 | 0x10000) /* Counters 12, 13, 16 */
211 #define	IQ2_map		(0x4000 | 0x8000 | 0x20000) /* Counters 14, 15, 17 */
212 
213 /*
214  * Table describing the 45 Event Selection and Control Registers (ESCRs).
215  */
216 const p4_escr_t p4_escrs[] = {
217 #define	BPU0 (1)
218 	{ 0, 0x3B2, BPU0_map },		/* 0 */
219 #define	IS0 (1ULL << 1)
220 	{ 1, 0x3B4, BPU0_map },		/* 1 */
221 #define	MOB0 (1ULL << 2)
222 	{ 2, 0x3AA, BPU0_map },		/* 2 */
223 #define	ITLB0 (1ULL << 3)
224 	{ 3, 0x3B6, BPU0_map },		/* 3 */
225 #define	PMH0 (1ULL << 4)
226 	{ 4, 0x3AC, BPU0_map },		/* 4 */
227 #define	IX0 (1ULL << 5)
228 	{ 5, 0x3C8, BPU0_map },		/* 5 */
229 #define	FSB0 (1ULL << 6)
230 	{ 6, 0x3A2, BPU0_map },		/* 6 */
231 #define	BSU0 (1ULL << 7)
232 	{ 7, 0x3A0, BPU0_map },		/* 7 */
233 #define	BPU1 (1ULL << 8)
234 	{ 0, 0x3B3, BPU2_map },		/* 8 */
235 #define	IS1 (1ULL << 9)
236 	{ 1, 0x3B5, BPU2_map },		/* 9 */
237 #define	MOB1 (1ULL << 10)
238 	{ 2, 0x3AB, BPU2_map },		/* 10 */
239 #define	ITLB1 (1ULL << 11)
240 	{ 3, 0x3B7, BPU2_map },		/* 11 */
241 #define	PMH1 (1ULL << 12)
242 	{ 4, 0x3AD, BPU2_map },		/* 12 */
243 #define	IX1 (1ULL << 13)
244 	{ 5, 0x3C9, BPU2_map },		/* 13 */
245 #define	FSB1 (1ULL << 14)
246 	{ 6, 0x3A3, BPU2_map },		/* 14 */
247 #define	BSU1 (1ULL << 15)
248 	{ 7, 0x3A1, BPU2_map },		/* 15 */
249 #define	MS0 (1ULL << 16)
250 	{ 0, 0x3C0, MS0_map },		/* 16 */
251 #define	TC0 (1ULL << 17)
252 	{ 1, 0x3C4, MS0_map },		/* 17 */
253 #define	TBPU0 (1ULL << 18)
254 	{ 2, 0x3C2, MS0_map },		/* 18 */
255 #define	MS1 (1ULL << 19)
256 	{ 0, 0x3C1, MS2_map },		/* 19 */
257 #define	TC1 (1ULL << 20)
258 	{ 1, 0x3C5, MS2_map },		/* 20 */
259 #define	TBPU1 (1ULL << 21)
260 	{ 2, 0x3C3, MS2_map },		/* 21 */
261 #define	FLAME0 (1ULL << 22)
262 	{ 0, 0x3A6, FLAME0_map },	/* 22 */
263 #define	FIRM0 (1ULL << 23)
264 	{ 1, 0x3A4, FLAME0_map },	/* 23 */
265 #define	SAAT0 (1ULL << 24)
266 	{ 2, 0x3AE, FLAME0_map },	/* 24 */
267 #define	U2L0 (1ULL << 25)
268 	{ 3, 0x3B0, FLAME0_map },	/* 25 */
269 #define	DAC0 (1ULL << 26)
270 	{ 5, 0x3A8, FLAME0_map },	/* 26 */
271 #define	FLAME1 (1ULL << 27)
272 	{ 0, 0x3A7, FLAME2_map },	/* 27 */
273 #define	FIRM1 (1ULL << 28)
274 	{ 1, 0x3A5, FLAME2_map },	/* 28 */
275 #define	SAAT1 (1ULL << 29)
276 	{ 2, 0x3AF, FLAME2_map },	/* 29 */
277 #define	U2L1 (1ULL << 30)
278 	{ 3, 0x3B1, FLAME2_map },	/* 30 */
279 #define	DAC1 (1ULL << 31)
280 	{ 5, 0x3A9, FLAME2_map },	/* 31 */
281 #define	IQ0 (1ULL << 32)
282 	{ 0, 0x3BA, IQ0_map },		/* 32 */
283 #define	ALF0 (1ULL << 33)
284 	{ 1, 0x3CA, IQ0_map },		/* 33 */
285 #define	RAT0 (1ULL << 34)
286 	{ 2, 0x3BC, IQ0_map },		/* 34 */
287 #define	SSU0 (1ULL << 35)
288 	{ 3, 0x3BE, IQ0_map },		/* 35 */
289 #define	CRU0 (1ULL << 36)
290 	{ 4, 0x3B8, IQ0_map },		/* 36 */
291 #define	CRU2 (1ULL << 37)
292 	{ 5, 0x3CC, IQ0_map },		/* 37 */
293 #define	CRU4 (1ULL << 38)
294 	{ 6, 0x3E0, IQ0_map },		/* 38 */
295 #define	IQ1 (1ULL << 39)
296 	{ 0, 0x3BB, IQ2_map },		/* 39 */
297 #define	ALF1 (1ULL << 40)
298 	{ 1, 0x3CB, IQ2_map },		/* 40 */
299 #define	RAT1 (1ULL << 41)
300 	{ 2, 0x3BD, IQ2_map },		/* 41 */
301 #define	CRU1 (1ULL << 42)
302 	{ 4, 0x3B9, IQ2_map },		/* 42 */
303 #define	CRU3 (1ULL << 43)
304 	{ 5, 0x3CD, IQ2_map },		/* 43 */
305 #define	CRU5 (1ULL << 44)
306 	{ 6, 0x3E1, IQ2_map }		/* 44 */
307 };
308 
309 #define	ESCR_MAX_INDEX 44
310 
311 typedef struct _p4_ctr {
312 	uint32_t	pc_caddr;	/* counter MSR address */
313 	uint32_t	pc_ctladdr;	/* counter's CCCR MSR address */
314 	uint64_t	pc_map;		/* bitmap of ESCRs controlling ctr */
315 } p4_ctr_t;
316 
317 const p4_ctr_t p4_ctrs[18] = {
318 { /* BPU_COUNTER0 */ 0x300, 0x360, BSU0|FSB0|MOB0|PMH0|BPU0|IS0|ITLB0|IX0},
319 { /* BPU_COUNTER1 */ 0x301, 0x361, BSU0|FSB0|MOB0|PMH0|BPU0|IS0|ITLB0|IX0},
320 { /* BPU_COUNTER2 */ 0x302, 0x362, BSU1|FSB1|MOB1|PMH1|BPU1|IS1|ITLB1|IX1},
321 { /* BPU_COUNTER3 */ 0x303, 0x363, BSU1|FSB1|MOB1|PMH1|BPU1|IS1|ITLB1|IX1},
322 { /* MS_COUNTER0 */  0x304, 0x364, MS0|TBPU0|TC0 },
323 { /* MS_COUNTER1 */  0x305, 0x365, MS0|TBPU0|TC0 },
324 { /* MS_COUNTER2 */  0x306, 0x366, MS1|TBPU1|TC1 },
325 { /* MS_COUNTER3 */  0x307, 0x367, MS1|TBPU1|TC1 },
326 { /* FLAME_COUNTER0 */ 0x308, 0x368, FIRM0|FLAME0|DAC0|SAAT0|U2L0 },
327 { /* FLAME_COUNTER1 */ 0x309, 0x369, FIRM0|FLAME0|DAC0|SAAT0|U2L0 },
328 { /* FLAME_COUNTER2 */ 0x30A, 0x36A, FIRM1|FLAME1|DAC1|SAAT1|U2L1 },
329 { /* FLAME_COUNTER3 */ 0x30B, 0x36B, FIRM1|FLAME1|DAC1|SAAT1|U2L1 },
330 { /* IQ_COUNTER0 */  0x30C, 0x36C, CRU0|CRU2|CRU4|IQ0|RAT0|SSU0|ALF0 },
331 { /* IQ_COUNTER1 */  0x30D, 0x36D, CRU0|CRU2|CRU4|IQ0|RAT0|SSU0|ALF0 },
332 { /* IQ_COUNTER2 */  0x30E, 0x36E, CRU1|CRU3|CRU5|IQ1|RAT1|ALF1 },
333 { /* IQ_COUNTER3 */  0x30F, 0x36F, CRU1|CRU3|CRU5|IQ1|RAT1|ALF1 },
334 { /* IQ_COUNTER4 */  0x310, 0x370, CRU0|CRU2|CRU4|IQ0|RAT0|SSU0|ALF0 },
335 { /* IQ_COUNTER5 */  0x311, 0x371, CRU1|CRU3|CRU5|IQ1|RAT1|ALF1 }
336 };
337 
338 typedef struct _p4_event {
339 	char		*pe_name;	/* Name of event according to docs */
340 	uint64_t	pe_escr_map;	/* Bitmap of ESCRs capable of event */
341 	uint32_t	pe_escr_mask;	/* permissible ESCR event mask */
342 	uint8_t		pe_ev;		/* ESCR event select value */
343 	uint16_t	pe_cccr;	/* CCCR select value */
344 	uint32_t	pe_ctr_mask;	/* Bitmap of capable counters */
345 } p4_event_t;
346 
347 typedef struct _p4_generic_event {
348 	char		*name;
349 	char		*event;
350 	uint16_t	emask;
351 	uint32_t	ctr_mask;
352 } p4_generic_event_t;
353 
354 #define	C(n) (1 << n)
355 #define	GEN_EVT_END { NULL, NULL, 0x0, 0x0 }
356 
357 p4_event_t p4_events[] = {
358 { "branch_retired", CRU2|CRU3, 0xF, 0x6, 0x5, C(12)|C(13)|C(14)|C(15)|C(16) },
359 { "mispred_branch_retired", CRU0|CRU1, 0x1, 0x3, 0x4,
360 	C(12)|C(13)|C(14)|C(15)|C(16) },
361 { "TC_deliver_mode", TC0|TC1, 0xFF, 0x1, 0x1, C(4)|C(5)|C(6)|C(7) },
362 { "BPU_fetch_request", BPU0|BPU1, 0x1, 0x3, 0x0, C(0)|C(1)|C(2)|C(3) },
363 { "ITLB_reference", ITLB0|ITLB1, 0x7, 0x18, 0x3, C(0)|C(1)|C(2)|C(3) },
364 { "memory_cancel", DAC0|DAC1, 0x6, 0x2, 0x5, C(8)|C(9)|C(10)|C(11) },
365 { "memory_complete", SAAT0|SAAT1, 0x3, 0x8, 0x2, C(8)|C(9)|C(10)|C(11) },
366 { "load_port_replay", SAAT0|SAAT1, 0x1, 0x4, 0x2, C(8)|C(9)|C(10)|C(11) },
367 { "store_port_replay", SAAT0|SAAT1, 0x1, 0x5, 0x2, C(8)|C(9)|C(10)|C(11) },
368 { "MOB_load_replay", MOB0|MOB1, 0x35, 0x3, 0x2, C(0)|C(1)|C(2)|C(3) },
369 { "page_walk_type", PMH0|PMH1, 0x3, 0x1, 0x4, C(0)|C(1)|C(2)|C(3) },
370 { "BSQ_cache_reference", BSU0|BSU1, 0x73F, 0xC, 0x7, C(0)|C(1)|C(2)|C(3) },
371 { "IOQ_allocation", FSB0, 0xEFFF, 0x3, 0x6, C(0)|C(1) },
372 { "IOQ_active_entries", FSB1, 0xEFFF, 0x1A, 0x6, C(2)|C(3) },
373 { "FSB_data_activity", FSB0|FSB1, 0x3F, 0x17, 0x6, C(0)|C(1)|C(2)|C(3) },
374 { "BSQ_allocation", BSU0, 0x3FEF, 0x5, 0x7, C(0)|C(1) },
375 { "bsq_active_entries", BSU1, 0x3FEF, 0x6, 0x7, C(2)|C(3) },
376 { "x87_assist", CRU2|CRU3, 0x1F, 0x3, 0x5, C(12)|C(13)|C(14)|C(15)|C(16)|C(17)},
377 { "SSE_input_assist", FIRM0|FIRM1, 0x8000, 0x34, 0x1, C(8)|C(9)|C(10)|C(11) },
378 { "packed_SP_uop", FIRM0|FIRM1, 0x8000, 0x8, 0x1, C(8)|C(9)|C(10)|C(11) },
379 { "packed_DP_uop", FIRM0|FIRM1, 0x8000, 0xC, 0x1, C(8)|C(9)|C(10)|C(11) },
380 { "scalar_SP_uop", FIRM0|FIRM1, 0x8000, 0xA, 0x1, C(8)|C(9)|C(10)|C(11) },
381 { "scalar_DP_uop", FIRM0|FIRM1, 0x8000, 0xE, 0x1, C(8)|C(9)|C(10)|C(11) },
382 { "64bit_MMX_uop", FIRM0|FIRM1, 0x8000, 0x2, 0x1, C(8)|C(9)|C(10)|C(11) },
383 { "128bit_MMX_uop", FIRM0|FIRM1, 0x8000, 0x1A, 0x1, C(8)|C(9)|C(10)|C(11) },
384 { "x87_FP_uop", FIRM0|FIRM1, 0x8000, 0x4, 0x1, C(8)|C(9)|C(10)|C(11) },
385 { "x87_SIMD_moves_uop", FIRM0|FIRM1, 0x18, 0x2E, 0x1, C(8)|C(9)|C(10)|C(11) },
386 { "machine_clear", CRU2|CRU3, 0xD, 0x2, 0x5,
387 	C(12)|C(13)|C(14)|C(15)|C(16)|C(17)},
388 { "global_power_events", FSB0|FSB1, 0x1, 0x13, 0x6, C(0)|C(1)|C(2)|C(3) },
389 { "tc_ms_xfer", MS0|MS1, 0x1, 0x5, 0x0, C(4)|C(5)|C(6)|C(7) },
390 { "uop_queue_writes", MS0|MS1, 0x7, 0x9, 0x0, C(4)|C(5)|C(6)|C(7) },
391 { "front_end_event", CRU2|CRU3, 0x3, 0x8, 0x5,
392 	C(12)|C(13)|C(14)|C(15)|C(16)|C(17)},
393 { "execution_event", CRU2|CRU3, 0xFF, 0xC, 0x5,
394 	C(12)|C(13)|C(14)|C(15)|C(16)|C(17)},
395 { "replay_event", CRU2|CRU3, 0x3, 0x9, 0x5,
396 	C(12)|C(13)|C(14)|C(15)|C(16)|C(17)},
397 { "instr_retired", CRU0|CRU1, 0xF, 0x2, 0x4,
398 	C(12)|C(13)|C(14)|C(15)|C(16)|C(17)},
399 { "uops_retired", CRU0|CRU1, 0x3, 0x1, 0x4,
400 	C(12)|C(13)|C(14)|C(15)|C(16)|C(17)},
401 { "uop_type", RAT0|RAT1, 0x3, 0x2, 0x2, C(12)|C(13)|C(14)|C(15)|C(16)|C(17)},
402 { "retired_mispred_branch_type", TBPU0|TBPU1, 0x1F, 0x5, 0x2,
403 	C(4)|C(5)|C(6)|C(7)},
404 { "retired_branch_type", TBPU0|TBPU1, 0x1F, 0x4, 0x2, C(4)|C(5)|C(6)|C(7) },
405 { NULL, 0, 0, 0, 0 }
406 };
407 
408 static p4_generic_event_t p4_generic_events[] = {
409 { "PAPI_br_msp", "branch_retired", 0xa, C(12)|C(13)|C(14)|C(15)|C(16) },
410 { "PAPI_br_ins", "branch_retired", 0xf, C(12)|C(13)|C(14)|C(15)|C(16) },
411 { "PAPI_br_tkn", "branch_retired", 0xc, C(12)|C(13)|C(14)|C(15)|C(16) },
412 { "PAPI_br_ntk", "branch_retired", 0x3, C(12)|C(13)|C(14)|C(15)|C(16) },
413 { "PAPI_br_prc", "branch_retired", 0x5, C(12)|C(13)|C(14)|C(15)|C(16) },
414 { "PAPI_tot_ins", "instr_retired", 0x3, C(12)|C(13)|C(14)|C(15)|C(16)|C(17) },
415 { "PAPI_tot_cyc", "global_power_events", 0x1, C(0)|C(1)|C(2)|C(3) },
416 { "PAPI_tlb_dm", "page_walk_type", 0x1, C(0)|C(1)|C(2)|C(3) },
417 { "PAPI_tlb_im", "page_walk_type", 0x2, C(0)|C(1)|C(2)|C(3) },
418 { "PAPI_tlb_tm", "page_walk_type", 0x3, C(0)|C(1)|C(2)|C(3) },
419 { "PAPI_l1_icm", "BPU_fetch_request", 0x1, C(0)|C(1)|C(2)|C(3) },
420 { "PAPI_l2_ldm", "BSQ_cache_reference", 0x100, C(0)|C(1)|C(2)|C(3) },
421 { "PAPI_l2_stm", "BSQ_cache_reference", 0x400, C(0)|C(1)|C(2)|C(3) },
422 { "PAPI_l2_tcm", "BSQ_cache_reference", 0x500, C(0)|C(1)|C(2)|C(3) },
423 GEN_EVT_END
424 };
425 
426 /*
427  * Indicates whether the "rdpmc" instruction is available on this processor.
428  */
429 static int p4_rdpmc_avail = 0;
430 
431 static const uint64_t p4_cccrstop = 0;
432 
433 static char *p4_eventlist[18];
434 
435 /*
436  * If set, this processor has HyperThreading.
437  */
438 static int p4_htt = 0;
439 
440 #define	P4_FAMILY	0xF
441 
442 static int
443 p4_pcbe_init(void)
444 {
445 	int			i;
446 	size_t			size;
447 	p4_event_t		*ev;
448 	p4_generic_event_t	*gevp;
449 
450 	/*
451 	 * If we're not running on a P4, refuse to load.
452 	 */
453 	if (cpuid_getvendor(CPU) != X86_VENDOR_Intel ||
454 	    cpuid_getfamily(CPU) != P4_FAMILY)
455 		return (-1);
456 
457 	/*
458 	 * Set up the event lists for each counter.
459 	 *
460 	 * First pass calculates the size of the event list, and the second
461 	 * pass copies each event name into the event list.
462 	 */
463 	for (i = 0; i < 18; i++) {
464 		size = 0;
465 
466 		for (ev = p4_events; ev->pe_name != NULL; ev++) {
467 			if (ev->pe_ctr_mask & C(i))
468 				size += strlen(ev->pe_name) + 1;
469 		}
470 
471 		for (gevp = p4_generic_events; gevp->name != NULL; gevp++) {
472 			if (gevp->ctr_mask & C(i))
473 				size += strlen(gevp->name) + 1;
474 		}
475 
476 		/*
477 		 * We use 'size + 1' here to ensure room for the final
478 		 * strcat when it terminates the string.
479 		 */
480 		p4_eventlist[i] = (char *)kmem_alloc(size + 1, KM_SLEEP);
481 		*p4_eventlist[i] = '\0';
482 
483 		for (ev = p4_events; ev->pe_name != NULL; ev++) {
484 			if (ev->pe_ctr_mask & C(i)) {
485 				(void) strcat(p4_eventlist[i], ev->pe_name);
486 				(void) strcat(p4_eventlist[i], ",");
487 			}
488 		}
489 
490 		for (gevp = p4_generic_events; gevp->name != NULL; gevp++) {
491 			if (gevp->ctr_mask & C(i)) {
492 				(void) strcat(p4_eventlist[i], gevp->name);
493 				(void) strcat(p4_eventlist[i], ",");
494 			}
495 		}
496 
497 		/*
498 		 * Remove trailing ','
499 		 */
500 		p4_eventlist[i][size - 1] = '\0';
501 	}
502 
503 	if (x86_feature & X86_MMX)
504 		p4_rdpmc_avail = 1;
505 	/*
506 	 * The X86_HTT flag may disappear soon, so we'll isolate the impact of
507 	 * its demise to the following if().
508 	 */
509 	if (x86_feature & X86_HTT)
510 		p4_htt = 1;
511 
512 	return (0);
513 }
514 
515 static uint_t
516 p4_pcbe_ncounters(void)
517 {
518 	return (18);
519 }
520 
521 static const char *
522 p4_pcbe_impl_name(void)
523 {
524 	if (p4_htt)
525 		return (PCBE_IMPL_NAME_P4HT);
526 	return ("Pentium 4");
527 }
528 
529 static const char *
530 p4_pcbe_cpuref(void)
531 {
532 	return ("See Appendix A.1 of the \"IA-32 Intel Architecture Software " \
533 	    "Developer's Manual Volume 3: System Programming Guide,\" "	       \
534 	    "Order # 245472-012, 2003");
535 }
536 
537 static char *
538 p4_pcbe_list_events(uint_t picnum)
539 {
540 	ASSERT(picnum >= 0 && picnum < 18);
541 
542 	return (p4_eventlist[picnum]);
543 }
544 
545 #define	P4_ATTRS "emask,tag,compare,complement,threshold,edge"
546 
547 static char *
548 p4_pcbe_list_attrs(void)
549 {
550 	if (p4_htt)
551 		return (P4_ATTRS ",active_thread,count_sibling_usr,"
552 		    "count_sibling_sys");
553 	return (P4_ATTRS);
554 }
555 
556 static p4_generic_event_t *
557 find_generic_event(char *name)
558 {
559 	p4_generic_event_t	*gevp;
560 
561 	for (gevp = p4_generic_events; gevp->name != NULL; gevp++)
562 		if (strcmp(name, gevp->name) == 0)
563 			return (gevp);
564 
565 	return (NULL);
566 }
567 
568 static p4_event_t *
569 find_event(char *name)
570 {
571 	p4_event_t		*evp;
572 
573 	for (evp = p4_events; evp->pe_name != NULL; evp++)
574 		if (strcmp(name, evp->pe_name) == 0)
575 			return (evp);
576 
577 	return (NULL);
578 }
579 
580 static uint64_t
581 p4_pcbe_event_coverage(char *event)
582 {
583 	p4_event_t		*ev;
584 	p4_generic_event_t	*gevp;
585 
586 	if ((ev = find_event(event)) == NULL) {
587 		if ((gevp = find_generic_event(event)) != NULL)
588 			return (gevp->ctr_mask);
589 		else
590 			return (0);
591 	}
592 
593 	return (ev->pe_ctr_mask);
594 }
595 
596 static uint64_t
597 p4_pcbe_overflow_bitmap(void)
598 {
599 	extern int	kcpc_hw_overflow_intr_installed;
600 	uint64_t	ret = 0;
601 	int		i;
602 
603 	/*
604 	 * The CCCR's OVF bit indicates that the corresponding counter has
605 	 * overflowed. It must be explicitly cleared by software, so it is
606 	 * safe to read the CCCR values here.
607 	 */
608 	for (i = 0; i < 18; i++) {
609 		if (rdmsr(p4_ctrs[i].pc_ctladdr) & CCCR_OVF)
610 			ret |= (1 << i);
611 	}
612 
613 	/*
614 	 * Pentium 4 and Xeon turn off the CPC interrupt mask bit in the LVT at
615 	 * every overflow. Turn it back on here.
616 	 */
617 	ASSERT(kcpc_hw_overflow_intr_installed);
618 	(*kcpc_hw_enable_cpc_intr)();
619 
620 	return (ret);
621 }
622 
623 static int
624 p4_escr_inuse(p4_pcbe_config_t **cfgs, int escr_ndx)
625 {
626 	int i;
627 
628 	for (i = 0; i < 18; i++) {
629 		if (cfgs[i] == NULL)
630 			continue;
631 		if (cfgs[i]->p4_escr_ndx == escr_ndx)
632 			return (1);
633 	}
634 
635 	return (0);
636 }
637 
638 static void
639 build_cfgs(p4_pcbe_config_t *cfgs[18], uint64_t *data[18], void *token)
640 {
641 	p4_pcbe_config_t	*cfg = NULL;
642 	uint64_t		*daddr;
643 
644 	bzero(cfgs, 18 * sizeof (p4_pcbe_config_t *));
645 
646 	do {
647 		cfg = (p4_pcbe_config_t *)kcpc_next_config(token, cfg, &daddr);
648 
649 		if (cfg != NULL) {
650 			ASSERT(cfg->p4_picno < 18);
651 			cfgs[cfg->p4_picno] = cfg;
652 			if (data != NULL) {
653 				ASSERT(daddr != NULL);
654 				data[cfg->p4_picno] = daddr;
655 			}
656 		}
657 	} while (cfg != NULL);
658 }
659 
660 /*
661  * Programming a counter:
662  *
663  * Select event.
664  * Choose an ESCR capable of counting that event.
665  * Set up the ESCR with the desired parameters (usr, sys, tag).
666  * Set up the CCCR to point to the selected ESCR.
667  * Set the CCCR parameters (overflow, cascade, edge, etc).
668  */
669 static int
670 p4_pcbe_configure(uint_t picnum, char *eventname, uint64_t preset,
671     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
672     void *token)
673 {
674 	p4_pcbe_config_t	*cfgs[18];
675 	p4_pcbe_config_t	*cfg;
676 	p4_event_t		*ev;
677 	p4_generic_event_t	*gevp;
678 	int			escr_ndx;
679 	int			i;
680 	uint16_t		emask = 0;
681 	uint8_t			tag;
682 	int			use_tag = 0;
683 	int			active_thread = 0x3; /* default is "any" */
684 	int			compare = 0;
685 	int			complement = 0;
686 	int			threshold = 0;
687 	int			edge = 0;
688 	int			sibling_usr = 0; /* count usr on other cpu */
689 	int			sibling_sys = 0; /* count sys on other cpu */
690 	int			invalid_attr = 0;
691 
692 	/*
693 	 * If we've been handed an existing configuration, we need only preset
694 	 * the counter value.
695 	 */
696 	if (*data != NULL) {
697 		cfg = *data;
698 		cfg->p4_rawpic = preset & MASK40;
699 		return (0);
700 	}
701 
702 	if (picnum < 0 || picnum >= 18)
703 		return (CPC_INVALID_PICNUM);
704 
705 	if ((ev	= find_event(eventname)) == NULL) {
706 		if ((gevp = find_generic_event(eventname)) != NULL) {
707 			ev = find_event(gevp->event);
708 			ASSERT(ev != NULL);
709 
710 			/*
711 			 * For generic events a HTT processor is only allowed
712 			 * to specify the 'active_thread', 'count_sibling_usr'
713 			 * and 'count_sibling_sys' attributes.
714 			 */
715 			if (p4_htt)
716 				for (i = 0; i < nattrs; i++)
717 					if (strstr(P4_ATTRS,
718 					    attrs[i].ka_name) != NULL)
719 						invalid_attr = 1;
720 
721 			if ((p4_htt && invalid_attr) ||
722 			    (!p4_htt && nattrs > 0))
723 				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
724 
725 			emask = gevp->emask;
726 		} else {
727 			return (CPC_INVALID_EVENT);
728 		}
729 	}
730 
731 	build_cfgs(cfgs, NULL, token);
732 
733 	/*
734 	 * Find an ESCR capable of counting this event.
735 	 */
736 	for (escr_ndx = 0; escr_ndx < ESCR_MAX_INDEX; escr_ndx++) {
737 		if ((ev->pe_escr_map & (1ULL << escr_ndx)) &&
738 		    p4_escr_inuse(cfgs, escr_ndx) == 0)
739 			break;
740 	}
741 
742 	/*
743 	 * All ESCRs capable of counting this event are already being
744 	 * used.
745 	 */
746 	if (escr_ndx == ESCR_MAX_INDEX)
747 		return (CPC_RESOURCE_UNAVAIL);
748 
749 	/*
750 	 * At this point, ev points to the desired event and escr is the index
751 	 * of a capable and available ESCR.
752 	 *
753 	 * Now process and verify the attributes.
754 	 */
755 	for (i = 0; i < nattrs; i++) {
756 		if (strcmp("emask", attrs[i].ka_name) == 0) {
757 			if ((attrs[i].ka_val | ev->pe_escr_mask)
758 			    != ev->pe_escr_mask)
759 				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
760 			emask = attrs[i].ka_val;
761 			continue;
762 		} else if (strcmp("tag", attrs[i].ka_name) == 0) {
763 			if (attrs[i].ka_val > ESCR_TAG_VALUE_MAX)
764 				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
765 			tag = attrs[i].ka_val;
766 			use_tag = 1;
767 			continue;
768 		} else if (strcmp("compare", attrs[i].ka_name) == 0) {
769 			if (attrs[i].ka_val != 0)
770 				compare = 1;
771 			continue;
772 		} else if (strcmp("complement", attrs[i].ka_name) == 0) {
773 			if (attrs[i].ka_val != 0)
774 				complement = 1;
775 			continue;
776 		} else if (strcmp("threshold", attrs[i].ka_name) == 0) {
777 			if (attrs[i].ka_val > CCCR_THRESHOLD_MAX)
778 				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
779 			threshold = attrs[i].ka_val;
780 			continue;
781 		} else if (strcmp("edge", attrs[i].ka_name) == 0) {
782 			if (attrs[i].ka_val != 0)
783 				edge = 1;
784 			continue;
785 		}
786 
787 		/*
788 		 * The remaining attributes are valid only on HyperThreaded P4s
789 		 * for processes with the "cpc_cpu" privilege.
790 		 */
791 		if (p4_htt == 0)
792 			return (CPC_INVALID_ATTRIBUTE);
793 
794 		if (secpolicy_cpc_cpu(crgetcred()) != 0)
795 			return (CPC_ATTR_REQUIRES_PRIVILEGE);
796 
797 		if (strcmp("active_thread", attrs[i].ka_name) == 0) {
798 			if ((attrs[i].ka_val | CCCR_ACTV_THR_MASK) !=
799 			    CCCR_ACTV_THR_MASK)
800 				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
801 			active_thread = (int)attrs[i].ka_val;
802 		} else if (strcmp("count_sibling_usr", attrs[i].ka_name) == 0) {
803 			if (attrs[i].ka_val != 0)
804 				sibling_usr = 1;
805 		} else if (strcmp("count_sibling_sys", attrs[i].ka_name) == 0) {
806 			if (attrs[i].ka_val != 0)
807 				sibling_sys = 1;
808 		} else
809 			return (CPC_INVALID_ATTRIBUTE);
810 	}
811 
812 	/*
813 	 * Make sure the counter can count this event
814 	 */
815 	if ((ev->pe_ctr_mask & C(picnum)) == 0)
816 		return (CPC_PIC_NOT_CAPABLE);
817 
818 	/*
819 	 * Find an ESCR that lines up with the event _and_ the counter.
820 	 */
821 	for (escr_ndx = 0; escr_ndx < ESCR_MAX_INDEX; escr_ndx++) {
822 		if ((ev->pe_escr_map & (1ULL << escr_ndx)) &&
823 		    (p4_escrs[escr_ndx].pe_map & (1 << picnum)) &&
824 		    p4_escr_inuse(cfgs, escr_ndx) == 0)
825 			break;
826 	}
827 	if (escr_ndx == ESCR_MAX_INDEX)
828 		return (CPC_RESOURCE_UNAVAIL);
829 
830 	cfg = (p4_pcbe_config_t *)kmem_alloc(sizeof (p4_pcbe_config_t),
831 	    KM_SLEEP);
832 
833 	cfg->p4_flags = 0;
834 	cfg->p4_picno = picnum;
835 	cfg->p4_escr_ndx = escr_ndx;
836 	cfg->p4_escr = (ev->pe_ev << ESCR_EVSEL_SHIFT) |
837 	    (emask << ESCR_EVMASK_SHIFT);
838 
839 	if (use_tag == 1) {
840 		cfg->p4_escr |= tag << ESCR_TAG_VALUE_SHIFT;
841 		cfg->p4_escr |= ESCR_TAG_ENABLE;
842 	}
843 
844 	if (p4_htt) {
845 		/*
846 		 * This is a HyperThreaded P4.  Since we don't know which
847 		 * logical CPU this configuration will eventually be programmed
848 		 * on, we can't yet decide which fields of the ESCR to select.
849 		 *
850 		 * Record the necessary information in the flags for later.
851 		 */
852 		if (flags & CPC_COUNT_USER)
853 			cfg->p4_flags |= P4_THIS_USR;
854 		if (flags & CPC_COUNT_SYSTEM)
855 			cfg->p4_flags |= P4_THIS_SYS;
856 		if (p4_htt && sibling_usr)
857 			cfg->p4_flags |= P4_SIBLING_USR;
858 		if (p4_htt && sibling_sys)
859 			cfg->p4_flags |= P4_SIBLING_SYS;
860 	} else {
861 		/*
862 		 * This is not HyperThreaded, so we can determine the exact
863 		 * ESCR value necessary now.
864 		 */
865 		if (flags & CPC_COUNT_USER)
866 			cfg->p4_escr |= ESCR_USR;
867 		if (flags & CPC_COUNT_SYSTEM)
868 			cfg->p4_escr |= ESCR_OS;
869 	}
870 
871 	cfg->p4_rawpic = preset & MASK40;
872 
873 	/*
874 	 * Even on non-HT P4s, Intel states the active_thread field (marked as
875 	 * "reserved" for the non-HT chips) must be set to all 1s.
876 	 */
877 	cfg->p4_cccr = CCCR_INIT | (active_thread << CCCR_ACTV_THR_SHIFT);
878 	if (compare)
879 		cfg->p4_cccr |= CCCR_COMPARE;
880 	if (complement)
881 		cfg->p4_cccr |= CCCR_COMPLEMENT;
882 	cfg->p4_cccr |= threshold << CCCR_THRESHOLD_SHIFT;
883 	if (edge)
884 		cfg->p4_cccr |= CCCR_EDGE;
885 	cfg->p4_cccr |= p4_escrs[cfg->p4_escr_ndx].pe_num
886 	    << CCCR_ESCR_SEL_SHIFT;
887 	if (flags & CPC_OVF_NOTIFY_EMT) {
888 		if (p4_htt)
889 			cfg->p4_flags |= P4_PMI;
890 		else {
891 			/*
892 			 * If the user has asked for notification of overflows,
893 			 * we automatically program the hardware to generate an
894 			 * interrupt on overflow.
895 			 *
896 			 * This can only be programmed now if this P4 doesn't
897 			 * have HyperThreading. If it does, we must wait until
898 			 * we know which logical CPU we'll be programming.
899 			 */
900 			cfg->p4_cccr |= CCCR_OVF_PMI;
901 		}
902 	}
903 
904 	*data = cfg;
905 
906 	return (0);
907 }
908 
909 static void
910 p4_pcbe_program(void *token)
911 {
912 	int			i;
913 	uint64_t		cccr;
914 	p4_pcbe_config_t	*cfgs[18];
915 
916 	p4_pcbe_allstop();
917 
918 	build_cfgs(cfgs, NULL, token);
919 
920 	if (p4_rdpmc_avail) {
921 		ulong_t curcr4 = getcr4();
922 		if (kcpc_allow_nonpriv(token))
923 			setcr4(curcr4 | CR4_PCE);
924 		else
925 			setcr4(curcr4 & ~CR4_PCE);
926 	}
927 
928 	/*
929 	 * Ideally we would start all counters with a single operation, but in
930 	 * P4 each counter is enabled individually via its CCCR. To minimize the
931 	 * probe effect of enabling the counters, we do it in two passes: the
932 	 * first programs the counter and ESCR, and the second programs the
933 	 * CCCR (and thus enables the counter).
934 	 */
935 	if (p4_htt) {
936 		int	lid = cpuid_get_clogid(CPU); /* Logical ID of CPU */
937 
938 		for (i = 0; i < 18; i++) {
939 			uint64_t escr;
940 
941 			if (cfgs[i] == NULL)
942 				continue;
943 			escr = (uint64_t)cfgs[i]->p4_escr;
944 
945 			if (cfgs[i]->p4_flags & P4_THIS_USR)
946 				escr |= (lid == 0) ? ESCR_T0_USR : ESCR_T1_USR;
947 			if (cfgs[i]->p4_flags & P4_THIS_SYS)
948 				escr |= (lid == 0) ? ESCR_T0_OS : ESCR_T1_OS;
949 			if (cfgs[i]->p4_flags & P4_SIBLING_USR)
950 				escr |= (lid == 0) ? ESCR_T1_USR : ESCR_T0_USR;
951 			if (cfgs[i]->p4_flags & P4_SIBLING_SYS)
952 				escr |= (lid == 0) ? ESCR_T1_OS : ESCR_T0_OS;
953 
954 			wrmsr(p4_ctrs[i].pc_caddr, cfgs[i]->p4_rawpic);
955 			wrmsr(p4_escrs[cfgs[i]->p4_escr_ndx].pe_addr, escr);
956 		}
957 
958 		for (i = 0; i < 18; i++) {
959 			if (cfgs[i] == NULL)
960 				continue;
961 			cccr = (uint64_t)cfgs[i]->p4_cccr;
962 			/*
963 			 * We always target the overflow interrupt at the
964 			 * logical CPU which is doing the counting.
965 			 */
966 			if (cfgs[i]->p4_flags & P4_PMI)
967 				cccr |= (lid == 0) ?
968 				    CCCR_OVF_PMI_T0 : CCCR_OVF_PMI_T1;
969 			wrmsr(p4_ctrs[i].pc_ctladdr, cccr);
970 		}
971 	} else {
972 		for (i = 0; i < 18; i++) {
973 			if (cfgs[i] == NULL)
974 				continue;
975 			wrmsr(p4_ctrs[i].pc_caddr, cfgs[i]->p4_rawpic);
976 			wrmsr(p4_escrs[cfgs[i]->p4_escr_ndx].pe_addr,
977 			    (uint64_t)cfgs[i]->p4_escr);
978 		}
979 
980 		for (i = 0; i < 18; i++) {
981 			if (cfgs[i] == NULL)
982 				continue;
983 			wrmsr(p4_ctrs[i].pc_ctladdr,
984 			    (uint64_t)cfgs[i]->p4_cccr);
985 		}
986 	}
987 }
988 
989 static void
990 p4_pcbe_allstop(void)
991 {
992 	int		i;
993 
994 	for (i = 0; i < 18; i++)
995 		wrmsr(p4_ctrs[i].pc_ctladdr, 0ULL);
996 
997 	setcr4(getcr4() & ~CR4_PCE);
998 }
999 
1000 
1001 static void
1002 p4_pcbe_sample(void *token)
1003 {
1004 	p4_pcbe_config_t	*cfgs[18];
1005 	uint64_t		*addrs[18];
1006 	uint64_t		curpic[18];
1007 	int64_t			diff;
1008 	int			i;
1009 
1010 	for (i = 0; i < 18; i++)
1011 		curpic[i] = rdmsr(p4_ctrs[i].pc_caddr);
1012 
1013 	build_cfgs(cfgs, addrs, token);
1014 
1015 	for (i = 0; i < 18; i++) {
1016 		if (cfgs[i] == NULL)
1017 			continue;
1018 		diff = curpic[i] - cfgs[i]->p4_rawpic;
1019 		if (diff < 0)
1020 			diff += (1ll << 40);
1021 		*addrs[i] += diff;
1022 		DTRACE_PROBE4(p4__pcbe__sample, int, i, uint64_t, *addrs[i],
1023 		    uint64_t, curpic[i], uint64_t, cfgs[i]->p4_rawpic);
1024 		cfgs[i]->p4_rawpic = *addrs[i] & MASK40;
1025 	}
1026 }
1027 
1028 static void
1029 p4_pcbe_free(void *config)
1030 {
1031 	kmem_free(config, sizeof (p4_pcbe_config_t));
1032 }
1033 
1034 static struct modlpcbe modlpcbe = {
1035 	&mod_pcbeops,
1036 	"Pentium 4 Performance Counters",
1037 	&p4_pcbe_ops
1038 };
1039 
1040 static struct modlinkage modl = {
1041 	MODREV_1,
1042 	&modlpcbe,
1043 };
1044 
1045 int
1046 _init(void)
1047 {
1048 	if (p4_pcbe_init() != 0)
1049 		return (ENOTSUP);
1050 	return (mod_install(&modl));
1051 }
1052 
1053 int
1054 _fini(void)
1055 {
1056 	return (mod_remove(&modl));
1057 }
1058 
1059 int
1060 _info(struct modinfo *mi)
1061 {
1062 	return (mod_info(&modl, mi));
1063 }
1064