1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Performance Counter Back-End for Pentium 4. 31 */ 32 33 #include <sys/cpuvar.h> 34 #include <sys/param.h> 35 #include <sys/cpc_impl.h> 36 #include <sys/cpc_pcbe.h> 37 #include <sys/inttypes.h> 38 #include <sys/errno.h> 39 #include <sys/systm.h> 40 #include <sys/archsystm.h> 41 #include <sys/x86_archext.h> 42 #include <sys/modctl.h> 43 #include <sys/sdt.h> 44 #include <sys/cred.h> 45 #include <sys/policy.h> 46 #include <sys/privregs.h> 47 48 static int p4_pcbe_init(void); 49 static uint_t p4_pcbe_ncounters(void); 50 static const char *p4_pcbe_impl_name(void); 51 static const char *p4_pcbe_cpuref(void); 52 static char *p4_pcbe_list_events(uint_t picnum); 53 static char *p4_pcbe_list_attrs(void); 54 static uint64_t p4_pcbe_event_coverage(char *event); 55 static uint64_t p4_pcbe_overflow_bitmap(void); 56 static int p4_pcbe_configure(uint_t picnum, char *event, uint64_t preset, 57 uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data, 58 void *token); 59 static void p4_pcbe_program(void *token); 60 static void p4_pcbe_allstop(void); 61 static void p4_pcbe_sample(void *token); 62 static void p4_pcbe_free(void *config); 63 64 extern int chip_plat_get_clogid(cpu_t *); 65 66 static pcbe_ops_t p4_pcbe_ops = { 67 PCBE_VER_1, 68 CPC_CAP_OVERFLOW_INTERRUPT | CPC_CAP_OVERFLOW_PRECISE, 69 p4_pcbe_ncounters, 70 p4_pcbe_impl_name, 71 p4_pcbe_cpuref, 72 p4_pcbe_list_events, 73 p4_pcbe_list_attrs, 74 p4_pcbe_event_coverage, 75 p4_pcbe_overflow_bitmap, 76 p4_pcbe_configure, 77 p4_pcbe_program, 78 p4_pcbe_allstop, 79 p4_pcbe_sample, 80 p4_pcbe_free 81 }; 82 83 /* 84 * P4 Configuration Flags. 85 */ 86 #define P4_THIS_USR 0x1 /* HTT: Measure usr events on this logical CPU */ 87 #define P4_THIS_SYS 0x2 /* HTT: Measure os events on this logical CPU */ 88 #define P4_SIBLING_USR 0x4 /* HTT: Measure os events on other logical CPU */ 89 #define P4_SIBLING_SYS 0x8 /* HTT: Measure usr events on other logical CPU */ 90 #define P4_PMI 0x10 /* HTT: Set PMI bit for local logical CPU */ 91 92 typedef struct _p4_pcbe_config { 93 uint8_t p4_flags; 94 uint8_t p4_picno; /* From 0 to 18 */ 95 uint8_t p4_escr_ndx; /* Which ESCR to use */ 96 uint32_t p4_escr; /* Value to program in selected ESCR */ 97 uint32_t p4_cccr; /* Value to program in counter's CCCR */ 98 uint64_t p4_rawpic; 99 } p4_pcbe_config_t; 100 101 typedef uint32_t cntr_map_t; 102 103 typedef struct _p4_escr { 104 int pe_num; 105 uint32_t pe_addr; 106 uint32_t pe_map; /* bitmap of counters; bit 1 means ctr 0 */ 107 } p4_escr_t; 108 109 #define MASK40 UINT64_C(0xffffffffff) 110 111 /* 112 * CCCR field definitions. 113 * 114 * Note that the Intel Developer's Manual states that the reserved field at 115 * bit location 16 and 17 must be set to 11. (??) 116 */ 117 #define CCCR_ENABLE_SHIFT 12 118 #define CCCR_ESCR_SEL_SHIFT 13 119 #define CCCR_ACTV_THR_SHIFT 16 120 #define CCCR_COMPARE_SHIFT 18 121 #define CCCR_COMPLEMENT_SHIFT 19 122 #define CCCR_THRESHOLD_SHIFT 20 123 #define CCCR_EDGE_SHIFT 24 124 #define CCCR_OVF_PMI_SHIFT 26 125 #define CCCR_OVF_PMI_T0_SHIFT 26 126 #define CCCR_OVF_PMI_T1_SHIFT 27 127 #define CCCR_OVF_SHIFT 31 128 #define CCCR_ACTV_THR_MASK 0x3 129 #define CCCR_THRESHOLD_MAX 0xF 130 #define CCCR_ENABLE (1U << CCCR_ENABLE_SHIFT) 131 #define CCCR_COMPARE (1U << CCCR_COMPARE_SHIFT) 132 #define CCCR_COMPLEMENT (1U << CCCR_COMPLEMENT_SHIFT) 133 #define CCCR_EDGE (1U << CCCR_EDGE_SHIFT) 134 #define CCCR_OVF_PMI (1U << CCCR_OVF_PMI_SHIFT) 135 #define CCCR_OVF_PMI_T0 (1U << CCCR_OVF_PMI_T0_SHIFT) 136 #define CCCR_OVF_PMI_T1 (1U << CCCR_OVF_PMI_T1_SHIFT) 137 #define CCCR_INIT CCCR_ENABLE 138 #define CCCR_OVF (1U << CCCR_OVF_SHIFT) 139 140 #define ESCR_EVSEL_SHIFT 25 141 #define ESCR_EVMASK_SHIFT 9 142 #define ESCR_TAG_VALUE_SHIFT 5 143 #define ESCR_TAG_VALUE_MAX 0xF 144 #define ESCR_TAG_ENABLE_SHIFT 4 145 #define ESCR_USR_SHIFT 2 146 #define ESCR_OS_SHIFT 3 147 #define ESCR_USR (1U << ESCR_USR_SHIFT) 148 #define ESCR_OS (1U << ESCR_OS_SHIFT) 149 #define ESCR_TAG_ENABLE (1U << ESCR_TAG_ENABLE_SHIFT) 150 151 /* 152 * HyperThreaded ESCR fields. 153 */ 154 #define ESCR_T0_OS_SHIFT 3 155 #define ESCR_T0_USR_SHIFT 2 156 #define ESCR_T1_OS_SHIFT 1 157 #define ESCR_T1_USR_SHIFT 0 158 #define ESCR_T0_OS (1U << ESCR_T0_OS_SHIFT) 159 #define ESCR_T0_USR (1U << ESCR_T0_USR_SHIFT) 160 #define ESCR_T1_OS (1U << ESCR_T1_OS_SHIFT) 161 #define ESCR_T1_USR (1U << ESCR_T1_USR_SHIFT) 162 163 /* 164 * ESCRs are grouped by counter; each group of ESCRs is associated with a 165 * distinct group of counters. Use these macros to fill in the table below. 166 */ 167 #define BPU0_map (0x1 | 0x2) /* Counters 0 and 1 */ 168 #define BPU2_map (0x4 | 0x8) /* Counters 2 and 3 */ 169 #define MS0_map (0x10 | 0x20) /* Counters 4 and 5 */ 170 #define MS2_map (0x40 | 0x80) /* Counters 6 and 7 */ 171 #define FLAME0_map (0x100 | 0x200) /* Counters 8 and 9 */ 172 #define FLAME2_map (0x400 | 0x800) /* Counters 10 and 11 */ 173 #define IQ0_map (0x1000 | 0x2000 | 0x10000) /* Counters 12, 13, 16 */ 174 #define IQ2_map (0x4000 | 0x8000 | 0x20000) /* Counters 14, 15, 17 */ 175 176 /* 177 * Table describing the 45 Event Selection and Control Registers (ESCRs). 178 */ 179 const p4_escr_t p4_escrs[] = { 180 #define BPU0 (1) 181 { 0, 0x3B2, BPU0_map }, /* 0 */ 182 #define IS0 (1ULL << 1) 183 { 1, 0x3B4, BPU0_map }, /* 1 */ 184 #define MOB0 (1ULL << 2) 185 { 2, 0x3AA, BPU0_map }, /* 2 */ 186 #define ITLB0 (1ULL << 3) 187 { 3, 0x3B6, BPU0_map }, /* 3 */ 188 #define PMH0 (1ULL << 4) 189 { 4, 0x3AC, BPU0_map }, /* 4 */ 190 #define IX0 (1ULL << 5) 191 { 5, 0x3C8, BPU0_map }, /* 5 */ 192 #define FSB0 (1ULL << 6) 193 { 6, 0x3A2, BPU0_map }, /* 6 */ 194 #define BSU0 (1ULL << 7) 195 { 7, 0x3A0, BPU0_map }, /* 7 */ 196 #define BPU1 (1ULL << 8) 197 { 0, 0x3B3, BPU2_map }, /* 8 */ 198 #define IS1 (1ULL << 9) 199 { 1, 0x3B5, BPU2_map }, /* 9 */ 200 #define MOB1 (1ULL << 10) 201 { 2, 0x3AB, BPU2_map }, /* 10 */ 202 #define ITLB1 (1ULL << 11) 203 { 3, 0x3B7, BPU2_map }, /* 11 */ 204 #define PMH1 (1ULL << 12) 205 { 4, 0x3AD, BPU2_map }, /* 12 */ 206 #define IX1 (1ULL << 13) 207 { 5, 0x3C9, BPU2_map }, /* 13 */ 208 #define FSB1 (1ULL << 14) 209 { 6, 0x3A3, BPU2_map }, /* 14 */ 210 #define BSU1 (1ULL << 15) 211 { 7, 0x3A1, BPU2_map }, /* 15 */ 212 #define MS0 (1ULL << 16) 213 { 0, 0x3C0, MS0_map }, /* 16 */ 214 #define TC0 (1ULL << 17) 215 { 1, 0x3C4, MS0_map }, /* 17 */ 216 #define TBPU0 (1ULL << 18) 217 { 2, 0x3C2, MS0_map }, /* 18 */ 218 #define MS1 (1ULL << 19) 219 { 0, 0x3C1, MS2_map }, /* 19 */ 220 #define TC1 (1ULL << 20) 221 { 1, 0x3C5, MS2_map }, /* 20 */ 222 #define TBPU1 (1ULL << 21) 223 { 2, 0x3C3, MS2_map }, /* 21 */ 224 #define FLAME0 (1ULL << 22) 225 { 0, 0x3A6, FLAME0_map }, /* 22 */ 226 #define FIRM0 (1ULL << 23) 227 { 1, 0x3A4, FLAME0_map }, /* 23 */ 228 #define SAAT0 (1ULL << 24) 229 { 2, 0x3AE, FLAME0_map }, /* 24 */ 230 #define U2L0 (1ULL << 25) 231 { 3, 0x3B0, FLAME0_map }, /* 25 */ 232 #define DAC0 (1ULL << 26) 233 { 5, 0x3A8, FLAME0_map }, /* 26 */ 234 #define FLAME1 (1ULL << 27) 235 { 0, 0x3A7, FLAME2_map }, /* 27 */ 236 #define FIRM1 (1ULL << 28) 237 { 1, 0x3A5, FLAME2_map }, /* 28 */ 238 #define SAAT1 (1ULL << 29) 239 { 2, 0x3AF, FLAME2_map }, /* 29 */ 240 #define U2L1 (1ULL << 30) 241 { 3, 0x3B1, FLAME2_map }, /* 30 */ 242 #define DAC1 (1ULL << 31) 243 { 5, 0x3A9, FLAME2_map }, /* 31 */ 244 #define IQ0 (1ULL << 32) 245 { 0, 0x3BA, IQ0_map }, /* 32 */ 246 #define ALF0 (1ULL << 33) 247 { 1, 0x3CA, IQ0_map }, /* 33 */ 248 #define RAT0 (1ULL << 34) 249 { 2, 0x3BC, IQ0_map }, /* 34 */ 250 #define SSU0 (1ULL << 35) 251 { 3, 0x3BE, IQ0_map }, /* 35 */ 252 #define CRU0 (1ULL << 36) 253 { 4, 0x3B8, IQ0_map }, /* 36 */ 254 #define CRU2 (1ULL << 37) 255 { 5, 0x3CC, IQ0_map }, /* 37 */ 256 #define CRU4 (1ULL << 38) 257 { 6, 0x3E0, IQ0_map }, /* 38 */ 258 #define IQ1 (1ULL << 39) 259 { 0, 0x3BB, IQ2_map }, /* 39 */ 260 #define ALF1 (1ULL << 40) 261 { 1, 0x3CB, IQ2_map }, /* 40 */ 262 #define RAT1 (1ULL << 41) 263 { 2, 0x3BD, IQ2_map }, /* 41 */ 264 #define CRU1 (1ULL << 42) 265 { 4, 0x3B9, IQ2_map }, /* 42 */ 266 #define CRU3 (1ULL << 43) 267 { 5, 0x3CD, IQ2_map }, /* 43 */ 268 #define CRU5 (1ULL << 44) 269 { 6, 0x3E1, IQ2_map } /* 44 */ 270 }; 271 272 #define ESCR_MAX_INDEX 44 273 274 typedef struct _p4_ctr { 275 uint32_t pc_caddr; /* counter MSR address */ 276 uint32_t pc_ctladdr; /* counter's CCCR MSR address */ 277 uint64_t pc_map; /* bitmap of ESCRs controlling ctr */ 278 } p4_ctr_t; 279 280 const p4_ctr_t p4_ctrs[18] = { 281 { /* BPU_COUNTER0 */ 0x300, 0x360, BSU0|FSB0|MOB0|PMH0|BPU0|IS0|ITLB0|IX0}, 282 { /* BPU_COUNTER1 */ 0x301, 0x361, BSU0|FSB0|MOB0|PMH0|BPU0|IS0|ITLB0|IX0}, 283 { /* BPU_COUNTER2 */ 0x302, 0x362, BSU1|FSB1|MOB1|PMH1|BPU1|IS1|ITLB1|IX1}, 284 { /* BPU_COUNTER3 */ 0x303, 0x363, BSU1|FSB1|MOB1|PMH1|BPU1|IS1|ITLB1|IX1}, 285 { /* MS_COUNTER0 */ 0x304, 0x364, MS0|TBPU0|TC0 }, 286 { /* MS_COUNTER1 */ 0x305, 0x365, MS0|TBPU0|TC0 }, 287 { /* MS_COUNTER2 */ 0x306, 0x366, MS1|TBPU1|TC1 }, 288 { /* MS_COUNTER3 */ 0x307, 0x367, MS1|TBPU1|TC1 }, 289 { /* FLAME_COUNTER0 */ 0x308, 0x368, FIRM0|FLAME0|DAC0|SAAT0|U2L0 }, 290 { /* FLAME_COUNTER1 */ 0x309, 0x369, FIRM0|FLAME0|DAC0|SAAT0|U2L0 }, 291 { /* FLAME_COUNTER2 */ 0x30A, 0x36A, FIRM1|FLAME1|DAC1|SAAT1|U2L1 }, 292 { /* FLAME_COUNTER3 */ 0x30B, 0x36B, FIRM1|FLAME1|DAC1|SAAT1|U2L1 }, 293 { /* IQ_COUNTER0 */ 0x30C, 0x36C, CRU0|CRU2|CRU4|IQ0|RAT0|SSU0|ALF0 }, 294 { /* IQ_COUNTER1 */ 0x30D, 0x36D, CRU0|CRU2|CRU4|IQ0|RAT0|SSU0|ALF0 }, 295 { /* IQ_COUNTER2 */ 0x30E, 0x36E, CRU1|CRU3|CRU5|IQ1|RAT1|ALF1 }, 296 { /* IQ_COUNTER3 */ 0x30F, 0x36F, CRU1|CRU3|CRU5|IQ1|RAT1|ALF1 }, 297 { /* IQ_COUNTER4 */ 0x310, 0x370, CRU0|CRU2|CRU4|IQ0|RAT0|SSU0|ALF0 }, 298 { /* IQ_COUNTER5 */ 0x311, 0x371, CRU1|CRU3|CRU5|IQ1|RAT1|ALF1 } 299 }; 300 301 typedef struct _p4_event { 302 char *pe_name; /* Name of event according to docs */ 303 uint64_t pe_escr_map; /* Bitmap of ESCRs capable of event */ 304 uint32_t pe_escr_mask; /* permissible ESCR event mask */ 305 uint8_t pe_ev; /* ESCR event select value */ 306 uint16_t pe_cccr; /* CCCR select value */ 307 uint32_t pe_ctr_mask; /* Bitmap of capable counters */ 308 } p4_event_t; 309 310 #define C(n) (1 << n) 311 312 p4_event_t p4_events[] = { 313 { "branch_retired", CRU2|CRU3, 0xF, 0x6, 0x5, C(12)|C(13)|C(14)|C(15)|C(16) }, 314 { "mispred_branch_retired", CRU0|CRU1, 0x1, 0x3, 0x4, 315 C(12)|C(13)|C(14)|C(15)|C(16) }, 316 { "TC_deliver_mode", TC0|TC1, 0xFF, 0x1, 0x1, C(4)|C(5)|C(6)|C(7) }, 317 { "BPU_fetch_request", BPU0|BPU1, 0x1, 0x3, 0x0, C(0)|C(1)|C(2)|C(3) }, 318 { "ITLB_reference", ITLB0|ITLB1, 0x7, 0x18, 0x3, C(0)|C(1)|C(2)|C(3) }, 319 { "memory_cancel", DAC0|DAC1, 0x6, 0x2, 0x5, C(8)|C(9)|C(10)|C(11) }, 320 { "memory_complete", SAAT0|SAAT1, 0x3, 0x8, 0x2, C(8)|C(9)|C(10)|C(11) }, 321 { "load_port_replay", SAAT0|SAAT1, 0x1, 0x4, 0x2, C(8)|C(9)|C(10)|C(11) }, 322 { "store_port_replay", SAAT0|SAAT1, 0x1, 0x5, 0x2, C(8)|C(9)|C(10)|C(11) }, 323 { "MOB_load_replay", MOB0|MOB1, 0x35, 0x3, 0x2, C(0)|C(1)|C(2)|C(3) }, 324 { "page_walk_type", PMH0|PMH1, 0x3, 0x1, 0x4, C(0)|C(1)|C(2)|C(3) }, 325 { "BSQ_cache_reference", BSU0|BSU1, 0x73F, 0xC, 0x7, C(0)|C(1)|C(2)|C(3) }, 326 { "IOQ_allocation", FSB0, 0xEFFF, 0x3, 0x6, C(0)|C(1) }, 327 { "IOQ_active_entries", FSB1, 0xEFFF, 0x1A, 0x6, C(2)|C(3) }, 328 { "FSB_data_activity", FSB0|FSB1, 0x3F, 0x17, 0x6, C(0)|C(1)|C(2)|C(3) }, 329 { "BSQ_allocation", BSU0, 0x3FEF, 0x5, 0x7, C(0)|C(1) }, 330 { "bsq_active_entries", BSU1, 0x3FEF, 0x6, 0x7, C(2)|C(3) }, 331 { "x87_assist", CRU2|CRU3, 0x1F, 0x3, 0x5, C(12)|C(13)|C(14)|C(15)|C(16)|C(17)}, 332 { "SSE_input_assist", FIRM0|FIRM1, 0x8000, 0x34, 0x1, C(8)|C(9)|C(10)|C(11) }, 333 { "packed_SP_uop", FIRM0|FIRM1, 0x8000, 0x8, 0x1, C(8)|C(9)|C(10)|C(11) }, 334 { "packed_DP_uop", FIRM0|FIRM1, 0x8000, 0xC, 0x1, C(8)|C(9)|C(10)|C(11) }, 335 { "scalar_SP_uop", FIRM0|FIRM1, 0x8000, 0xA, 0x1, C(8)|C(9)|C(10)|C(11) }, 336 { "scalar_DP_uop", FIRM0|FIRM1, 0x8000, 0xE, 0x1, C(8)|C(9)|C(10)|C(11) }, 337 { "64bit_MMX_uop", FIRM0|FIRM1, 0x8000, 0x2, 0x1, C(8)|C(9)|C(10)|C(11) }, 338 { "128bit_MMX_uop", FIRM0|FIRM1, 0x8000, 0x1A, 0x1, C(8)|C(9)|C(10)|C(11) }, 339 { "x87_FP_uop", FIRM0|FIRM1, 0x8000, 0x4, 0x1, C(8)|C(9)|C(10)|C(11) }, 340 { "x87_SIMD_moves_uop", FIRM0|FIRM1, 0x18, 0x2E, 0x1, C(8)|C(9)|C(10)|C(11) }, 341 { "machine_clear", CRU2|CRU3, 0xD, 0x2, 0x5, 342 C(12)|C(13)|C(14)|C(15)|C(16)|C(17)}, 343 { "global_power_events", FSB0|FSB1, 0x1, 0x5, 0x6, C(0)|C(1)|C(2)|C(3) }, 344 { "tc_ms_xfer", MS0|MS1, 0x1, 0x5, 0x0, C(4)|C(5)|C(6)|C(7) }, 345 { "uop_queue_writes", MS0|MS1, 0x7, 0x9, 0x0, C(4)|C(5)|C(6)|C(7) }, 346 { "front_end_event", CRU2|CRU3, 0x3, 0x8, 0x5, 347 C(12)|C(13)|C(14)|C(15)|C(16)|C(17)}, 348 { "execution_event", CRU2|CRU3, 0xFF, 0xC, 0x5, 349 C(12)|C(13)|C(14)|C(15)|C(16)|C(17)}, 350 { "replay_event", CRU2|CRU3, 0x3, 0x9, 0x5, 351 C(12)|C(13)|C(14)|C(15)|C(16)|C(17)}, 352 { "instr_retired", CRU0|CRU1, 0xF, 0x2, 0x4, 353 C(12)|C(13)|C(14)|C(15)|C(16)|C(17)}, 354 { "uops_retired", CRU0|CRU1, 0x3, 0x1, 0x4, 355 C(12)|C(13)|C(14)|C(15)|C(16)|C(17)}, 356 { "uop_type", RAT0|RAT1, 0x3, 0x2, 0x2, C(12)|C(13)|C(14)|C(15)|C(16)|C(17)}, 357 { "retired_mispred_branch_type", TBPU0|TBPU1, 0x1F, 0x5, 0x2, 358 C(4)|C(5)|C(6)|C(7)}, 359 { "retired_branch_type", TBPU0|TBPU1, 0x1F, 0x4, 0x2, C(4)|C(5)|C(6)|C(7) }, 360 { NULL, 0, 0, 0, 0 } 361 }; 362 363 /* 364 * Indicates whether the "rdpmc" instruction is available on this processor. 365 */ 366 static int p4_rdpmc_avail = 0; 367 368 static const uint64_t p4_cccrstop = 0; 369 370 static char *p4_eventlist[18]; 371 372 /* 373 * If set, this processor has HyperThreading. 374 */ 375 static int p4_htt = 0; 376 377 #define P4_FAMILY 0xF 378 379 static int 380 p4_pcbe_init(void) 381 { 382 int i; 383 size_t size; 384 p4_event_t *ev; 385 386 /* 387 * If we're not running on a P4, refuse to load. 388 */ 389 if (cpuid_getvendor(CPU) != X86_VENDOR_Intel || 390 cpuid_getfamily(CPU) != P4_FAMILY) 391 return (-1); 392 393 /* 394 * Set up the event lists for each counter. 395 * 396 * First pass calculates the size of the event list, and the second 397 * pass copies each event name into the event list. 398 */ 399 for (i = 0; i < 18; i++) { 400 size = 0; 401 for (ev = p4_events; ev->pe_name != NULL; ev++) { 402 if (ev->pe_ctr_mask & C(i)) 403 size += strlen(ev->pe_name) + 1; 404 } 405 406 /* 407 * We use 'size + 1' here to ensure room for the final 408 * strcat when it terminates the string. 409 */ 410 p4_eventlist[i] = (char *)kmem_alloc(size + 1, KM_SLEEP); 411 *p4_eventlist[i] = '\0'; 412 413 for (ev = p4_events; ev->pe_name != NULL; ev++) { 414 if (ev->pe_ctr_mask & C(i)) { 415 (void) strcat(p4_eventlist[i], ev->pe_name); 416 (void) strcat(p4_eventlist[i], ","); 417 } 418 } 419 /* 420 * Remove trailing ',' 421 */ 422 p4_eventlist[i][size - 1] = '\0'; 423 } 424 425 if (x86_feature & X86_MMX) 426 p4_rdpmc_avail = 1; 427 /* 428 * The X86_HTT flag may disappear soon, so we'll isolate the impact of 429 * its demise to the following if(). 430 */ 431 if (x86_feature & X86_HTT) 432 p4_htt = 1; 433 434 return (0); 435 } 436 437 static uint_t 438 p4_pcbe_ncounters(void) 439 { 440 return (18); 441 } 442 443 static const char * 444 p4_pcbe_impl_name(void) 445 { 446 if (p4_htt) 447 return ("Pentium 4 with HyperThreading"); 448 return ("Pentium 4"); 449 } 450 451 static const char * 452 p4_pcbe_cpuref(void) 453 { 454 return ("See Appendix A.1 of the \"IA-32 Intel Architecture Software " \ 455 "Developer's Manual Volume 3: System Programming Guide,\" " \ 456 "Order # 245472-012, 2003"); 457 } 458 459 static char * 460 p4_pcbe_list_events(uint_t picnum) 461 { 462 ASSERT(picnum >= 0 && picnum < 18); 463 464 return (p4_eventlist[picnum]); 465 } 466 467 #define P4_ATTRS "emask,tag,compare,complement,threshold,edge" 468 469 static char * 470 p4_pcbe_list_attrs(void) 471 { 472 if (p4_htt) 473 return (P4_ATTRS ",active_thread,count_sibling_usr," 474 "count_sibling_sys"); 475 return (P4_ATTRS); 476 } 477 478 static uint64_t 479 p4_pcbe_event_coverage(char *event) 480 { 481 p4_event_t *ev; 482 483 for (ev = p4_events; ev->pe_name != NULL; ev++) { 484 if (strcmp(event, ev->pe_name) == 0) 485 break; 486 } 487 488 return (ev->pe_ctr_mask); 489 } 490 491 static uint64_t 492 p4_pcbe_overflow_bitmap(void) 493 { 494 extern int kcpc_hw_overflow_intr_installed; 495 uint64_t ret = 0; 496 uint64_t tmp; 497 int i; 498 499 /* 500 * The CCCR's OVF bit indicates that the corresponding counter has 501 * overflowed. It must be explicitly cleared by software, so it is 502 * safe to read the CCCR values here. 503 */ 504 for (i = 0; i < 18; i++) { 505 (void) rdmsr(p4_ctrs[i].pc_ctladdr, &tmp); 506 if (tmp & CCCR_OVF) 507 ret |= (1 << i); 508 } 509 510 /* 511 * Pentium 4 and Xeon turn off the CPC interrupt mask bit in the LVT at 512 * every overflow. Turn it back on here. 513 */ 514 ASSERT(kcpc_hw_overflow_intr_installed); 515 (*kcpc_hw_enable_cpc_intr)(); 516 517 return (ret); 518 } 519 520 static int 521 p4_escr_inuse(p4_pcbe_config_t **cfgs, int escr_ndx) 522 { 523 int i; 524 525 for (i = 0; i < 18; i++) { 526 if (cfgs[i] == NULL) 527 continue; 528 if (cfgs[i]->p4_escr_ndx == escr_ndx) 529 return (1); 530 } 531 532 return (0); 533 } 534 535 static void 536 build_cfgs(p4_pcbe_config_t *cfgs[18], uint64_t *data[18], void *token) 537 { 538 p4_pcbe_config_t *cfg = NULL; 539 uint64_t *daddr; 540 541 bzero(cfgs, 18 * sizeof (p4_pcbe_config_t *)); 542 543 do { 544 cfg = (p4_pcbe_config_t *)kcpc_next_config(token, cfg, &daddr); 545 546 if (cfg != NULL) { 547 ASSERT(cfg->p4_picno < 18); 548 cfgs[cfg->p4_picno] = cfg; 549 if (data != NULL) { 550 ASSERT(daddr != NULL); 551 data[cfg->p4_picno] = daddr; 552 } 553 } 554 } while (cfg != NULL); 555 } 556 557 /* 558 * Programming a counter: 559 * 560 * Select event. 561 * Choose an ESCR capable of counting that event. 562 * Set up the ESCR with the desired parameters (usr, sys, tag). 563 * Set up the CCCR to point to the selected ESCR. 564 * Set the CCCR parameters (overflow, cascade, edge, etc). 565 */ 566 static int 567 p4_pcbe_configure(uint_t picnum, char *eventname, uint64_t preset, 568 uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data, 569 void *token) 570 { 571 p4_pcbe_config_t *cfgs[18]; 572 p4_pcbe_config_t *cfg; 573 p4_event_t *ev; 574 int escr_ndx; 575 int i; 576 uint16_t emask = 0; 577 uint8_t tag; 578 int use_tag = 0; 579 int active_thread = 0x3; /* default is "any" */ 580 int compare = 0; 581 int complement = 0; 582 int threshold = 0; 583 int edge = 0; 584 int sibling_usr = 0; /* count usr on other cpu */ 585 int sibling_sys = 0; /* count sys on other cpu */ 586 587 /* 588 * If we've been handed an existing configuration, we need only preset 589 * the counter value. 590 */ 591 if (*data != NULL) { 592 cfg = *data; 593 cfg->p4_rawpic = preset & MASK40; 594 return (0); 595 } 596 597 if (picnum < 0 || picnum >= 18) 598 return (CPC_INVALID_PICNUM); 599 600 for (ev = p4_events; ev->pe_name != NULL; ev++) { 601 if (strcmp(eventname, ev->pe_name) == 0) 602 break; 603 } 604 if (ev->pe_name == NULL) 605 return (CPC_INVALID_EVENT); 606 607 build_cfgs(cfgs, NULL, token); 608 609 /* 610 * Find an ESCR capable of counting this event. 611 */ 612 for (escr_ndx = 0; escr_ndx < ESCR_MAX_INDEX; escr_ndx++) { 613 if ((ev->pe_escr_map & (1ULL << escr_ndx)) && 614 p4_escr_inuse(cfgs, escr_ndx) == 0) 615 break; 616 } 617 618 /* 619 * All ESCRs capable of counting this event are already being 620 * used. 621 */ 622 if (escr_ndx == ESCR_MAX_INDEX) 623 return (CPC_RESOURCE_UNAVAIL); 624 625 /* 626 * At this point, ev points to the desired event and escr is the index 627 * of a capable and available ESCR. 628 * 629 * Now process and verify the attributes. 630 */ 631 for (i = 0; i < nattrs; i++) { 632 if (strcmp("emask", attrs[i].ka_name) == 0) { 633 if ((attrs[i].ka_val | ev->pe_escr_mask) 634 != ev->pe_escr_mask) 635 return (CPC_ATTRIBUTE_OUT_OF_RANGE); 636 emask = attrs[i].ka_val; 637 continue; 638 } else if (strcmp("tag", attrs[i].ka_name) == 0) { 639 if (attrs[i].ka_val > ESCR_TAG_VALUE_MAX) 640 return (CPC_ATTRIBUTE_OUT_OF_RANGE); 641 tag = attrs[i].ka_val; 642 use_tag = 1; 643 continue; 644 } else if (strcmp("compare", attrs[i].ka_name) == 0) { 645 if (attrs[i].ka_val != 0) 646 compare = 1; 647 continue; 648 } else if (strcmp("complement", attrs[i].ka_name) == 0) { 649 if (attrs[i].ka_val != 0) 650 complement = 1; 651 continue; 652 } else if (strcmp("threshold", attrs[i].ka_name) == 0) { 653 if (attrs[i].ka_val > CCCR_THRESHOLD_MAX) 654 return (CPC_ATTRIBUTE_OUT_OF_RANGE); 655 threshold = attrs[i].ka_val; 656 continue; 657 } else if (strcmp("edge", attrs[i].ka_name) == 0) { 658 if (attrs[i].ka_val != 0) 659 edge = 1; 660 continue; 661 } 662 663 /* 664 * The remaining attributes are valid only on HyperThreaded P4s 665 * for processes with the "cpc_cpu" privilege. 666 */ 667 if (p4_htt == 0) 668 return (CPC_INVALID_ATTRIBUTE); 669 670 if (secpolicy_cpc_cpu(crgetcred()) != 0) 671 return (CPC_ATTR_REQUIRES_PRIVILEGE); 672 673 if (strcmp("active_thread", attrs[i].ka_name) == 0) { 674 if ((attrs[i].ka_val | CCCR_ACTV_THR_MASK) != 675 CCCR_ACTV_THR_MASK) 676 return (CPC_ATTRIBUTE_OUT_OF_RANGE); 677 active_thread = (int)attrs[i].ka_val; 678 } else if (strcmp("count_sibling_usr", attrs[i].ka_name) == 0) { 679 if (attrs[i].ka_val != 0) 680 sibling_usr = 1; 681 } else if (strcmp("count_sibling_sys", attrs[i].ka_name) == 0) { 682 if (attrs[i].ka_val != 0) 683 sibling_sys = 1; 684 } else 685 return (CPC_INVALID_ATTRIBUTE); 686 } 687 688 /* 689 * Make sure the counter can count this event 690 */ 691 if ((ev->pe_ctr_mask & C(picnum)) == 0) 692 return (CPC_PIC_NOT_CAPABLE); 693 694 /* 695 * Find an ESCR that lines up with the event _and_ the counter. 696 */ 697 for (escr_ndx = 0; escr_ndx < ESCR_MAX_INDEX; escr_ndx++) { 698 if ((ev->pe_escr_map & (1ULL << escr_ndx)) && 699 (p4_escrs[escr_ndx].pe_map & (1 << picnum)) && 700 p4_escr_inuse(cfgs, escr_ndx) == 0) 701 break; 702 } 703 if (escr_ndx == ESCR_MAX_INDEX) 704 return (CPC_RESOURCE_UNAVAIL); 705 706 cfg = (p4_pcbe_config_t *)kmem_alloc(sizeof (p4_pcbe_config_t), 707 KM_SLEEP); 708 709 cfg->p4_flags = 0; 710 cfg->p4_picno = picnum; 711 cfg->p4_escr_ndx = escr_ndx; 712 cfg->p4_escr = (ev->pe_ev << ESCR_EVSEL_SHIFT) | 713 (emask << ESCR_EVMASK_SHIFT); 714 715 if (use_tag == 1) { 716 cfg->p4_escr |= tag << ESCR_TAG_VALUE_SHIFT; 717 cfg->p4_escr |= ESCR_TAG_ENABLE; 718 } 719 720 if (p4_htt) { 721 /* 722 * This is a HyperThreaded P4. Since we don't know which 723 * logical CPU this configuration will eventually be programmed 724 * on, we can't yet decide which fields of the ESCR to select. 725 * 726 * Record the necessary information in the flags for later. 727 */ 728 if (flags & CPC_COUNT_USER) 729 cfg->p4_flags |= P4_THIS_USR; 730 if (flags & CPC_COUNT_SYSTEM) 731 cfg->p4_flags |= P4_THIS_SYS; 732 if (p4_htt && sibling_usr) 733 cfg->p4_flags |= P4_SIBLING_USR; 734 if (p4_htt && sibling_sys) 735 cfg->p4_flags |= P4_SIBLING_SYS; 736 } else { 737 /* 738 * This is not HyperThreaded, so we can determine the exact 739 * ESCR value necessary now. 740 */ 741 if (flags & CPC_COUNT_USER) 742 cfg->p4_escr |= ESCR_USR; 743 if (flags & CPC_COUNT_SYSTEM) 744 cfg->p4_escr |= ESCR_OS; 745 } 746 747 cfg->p4_rawpic = preset & MASK40; 748 749 /* 750 * Even on non-HT P4s, Intel states the active_thread field (marked as 751 * "reserved" for the non-HT chips) must be set to all 1s. 752 */ 753 cfg->p4_cccr = CCCR_INIT | (active_thread << CCCR_ACTV_THR_SHIFT); 754 if (compare) 755 cfg->p4_cccr |= CCCR_COMPARE; 756 if (complement) 757 cfg->p4_cccr |= CCCR_COMPLEMENT; 758 cfg->p4_cccr |= threshold << CCCR_THRESHOLD_SHIFT; 759 if (edge) 760 cfg->p4_cccr |= CCCR_EDGE; 761 cfg->p4_cccr |= p4_escrs[cfg->p4_escr_ndx].pe_num 762 << CCCR_ESCR_SEL_SHIFT; 763 if (flags & CPC_OVF_NOTIFY_EMT) { 764 if (p4_htt) 765 cfg->p4_flags |= P4_PMI; 766 else { 767 /* 768 * If the user has asked for notification of overflows, 769 * we automatically program the hardware to generate an 770 * interrupt on overflow. 771 * 772 * This can only be programmed now if this P4 doesn't 773 * have HyperThreading. If it does, we must wait until 774 * we know which logical CPU we'll be programming. 775 */ 776 cfg->p4_cccr |= CCCR_OVF_PMI; 777 } 778 } 779 780 *data = cfg; 781 782 return (0); 783 } 784 785 static void 786 p4_pcbe_program(void *token) 787 { 788 int i; 789 uint64_t escr; 790 uint64_t cccr; 791 p4_pcbe_config_t *cfgs[18]; 792 793 p4_pcbe_allstop(); 794 795 build_cfgs(cfgs, NULL, token); 796 797 if (p4_rdpmc_avail) { 798 uint32_t curcr4 = getcr4(); 799 if (kcpc_allow_nonpriv(token)) 800 setcr4(curcr4 | CR4_PCE); 801 else 802 setcr4(curcr4 & ~CR4_PCE); 803 } 804 805 /* 806 * Ideally we would start all counters with a single operation, but in 807 * P4 each counter is enabled individually via its CCCR. To minimize the 808 * probe effect of enabling the counters, we do it in two passes: the 809 * first programs the counter and ESCR, and the second programs the 810 * CCCR (and thus enables the counter). 811 */ 812 if (p4_htt) { 813 int lid = chip_plat_get_clogid(CPU); /* Logical ID of CPU */ 814 815 for (i = 0; i < 18; i++) { 816 if (cfgs[i] == NULL) 817 continue; 818 escr = (uint64_t)cfgs[i]->p4_escr; 819 820 if (cfgs[i]->p4_flags & P4_THIS_USR) 821 escr |= (lid == 0) ? ESCR_T0_USR : ESCR_T1_USR; 822 if (cfgs[i]->p4_flags & P4_THIS_SYS) 823 escr |= (lid == 0) ? ESCR_T0_OS : ESCR_T1_OS; 824 if (cfgs[i]->p4_flags & P4_SIBLING_USR) 825 escr |= (lid == 0) ? ESCR_T1_USR : ESCR_T0_USR; 826 if (cfgs[i]->p4_flags & P4_SIBLING_SYS) 827 escr |= (lid == 0) ? ESCR_T1_OS : ESCR_T0_OS; 828 829 wrmsr(p4_ctrs[i].pc_caddr, &cfgs[i]->p4_rawpic); 830 wrmsr(p4_escrs[cfgs[i]->p4_escr_ndx].pe_addr, &escr); 831 } 832 833 for (i = 0; i < 18; i++) { 834 if (cfgs[i] == NULL) 835 continue; 836 cccr = (uint64_t)cfgs[i]->p4_cccr; 837 /* 838 * We always target the overflow interrupt at the 839 * logical CPU which is doing the counting. 840 */ 841 if (cfgs[i]->p4_flags & P4_PMI) 842 cccr |= (lid == 0) ? 843 CCCR_OVF_PMI_T0 : CCCR_OVF_PMI_T1; 844 wrmsr(p4_ctrs[i].pc_ctladdr, &cccr); 845 } 846 } else { 847 for (i = 0; i < 18; i++) { 848 if (cfgs[i] == NULL) 849 continue; 850 escr = (uint64_t)cfgs[i]->p4_escr; 851 wrmsr(p4_ctrs[i].pc_caddr, &cfgs[i]->p4_rawpic); 852 wrmsr(p4_escrs[cfgs[i]->p4_escr_ndx].pe_addr, &escr); 853 } 854 855 for (i = 0; i < 18; i++) { 856 if (cfgs[i] == NULL) 857 continue; 858 cccr = (uint64_t)cfgs[i]->p4_cccr; 859 wrmsr(p4_ctrs[i].pc_ctladdr, &cccr); 860 } 861 } 862 } 863 864 static void 865 p4_pcbe_allstop(void) 866 { 867 int i; 868 uint64_t tmp = 0; 869 870 for (i = 0; i < 18; i++) 871 wrmsr(p4_ctrs[i].pc_ctladdr, &tmp); 872 873 setcr4(getcr4() & ~CR4_PCE); 874 } 875 876 877 static void 878 p4_pcbe_sample(void *token) 879 { 880 p4_pcbe_config_t *cfgs[18]; 881 uint64_t *addrs[18]; 882 uint64_t curpic[18]; 883 int64_t diff; 884 int i; 885 886 for (i = 0; i < 18; i++) 887 (void) rdmsr(p4_ctrs[i].pc_caddr, &curpic[i]); 888 889 build_cfgs(cfgs, addrs, token); 890 891 for (i = 0; i < 18; i++) { 892 if (cfgs[i] == NULL) 893 continue; 894 diff = curpic[i] - cfgs[i]->p4_rawpic; 895 if (diff < 0) 896 diff += (1ll << 40); 897 *addrs[i] += diff; 898 DTRACE_PROBE4(p4__pcbe__sample, int, i, uint64_t, *addrs[i], 899 uint64_t, curpic[i], uint64_t, cfgs[i]->p4_rawpic); 900 cfgs[i]->p4_rawpic = *addrs[i] & MASK40; 901 } 902 } 903 904 static void 905 p4_pcbe_free(void *config) 906 { 907 kmem_free(config, sizeof (p4_pcbe_config_t)); 908 } 909 910 static struct modlpcbe modlpcbe = { 911 &mod_pcbeops, 912 "Pentium 4 Performance Counters v%I%", 913 &p4_pcbe_ops 914 }; 915 916 static struct modlinkage modl = { 917 MODREV_1, 918 &modlpcbe, 919 }; 920 921 int 922 _init(void) 923 { 924 if (p4_pcbe_init() != 0) 925 return (ENOTSUP); 926 return (mod_install(&modl)); 927 } 928 929 int 930 _fini(void) 931 { 932 return (mod_remove(&modl)); 933 } 934 935 int 936 _info(struct modinfo *mi) 937 { 938 return (mod_info(&modl, mi)); 939 } 940