1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * turbostat -- show CPU frequency and C-state residency 4 * on modern Intel and AMD processors. 5 * 6 * Copyright (c) 2025 Intel Corporation. 7 * Len Brown <len.brown@intel.com> 8 */ 9 10 #define _GNU_SOURCE 11 #include MSRHEADER 12 13 // copied from arch/x86/include/asm/cpu_device_id.h 14 #define VFM_MODEL_BIT 0 15 #define VFM_FAMILY_BIT 8 16 #define VFM_VENDOR_BIT 16 17 #define VFM_RSVD_BIT 24 18 19 #define VFM_MODEL_MASK GENMASK(VFM_FAMILY_BIT - 1, VFM_MODEL_BIT) 20 #define VFM_FAMILY_MASK GENMASK(VFM_VENDOR_BIT - 1, VFM_FAMILY_BIT) 21 #define VFM_VENDOR_MASK GENMASK(VFM_RSVD_BIT - 1, VFM_VENDOR_BIT) 22 23 #define VFM_MODEL(vfm) (((vfm) & VFM_MODEL_MASK) >> VFM_MODEL_BIT) 24 #define VFM_FAMILY(vfm) (((vfm) & VFM_FAMILY_MASK) >> VFM_FAMILY_BIT) 25 #define VFM_VENDOR(vfm) (((vfm) & VFM_VENDOR_MASK) >> VFM_VENDOR_BIT) 26 27 #define VFM_MAKE(_vendor, _family, _model) ( \ 28 ((_model) << VFM_MODEL_BIT) | \ 29 ((_family) << VFM_FAMILY_BIT) | \ 30 ((_vendor) << VFM_VENDOR_BIT) \ 31 ) 32 // end copied section 33 34 #define CPUID_LEAF_MODEL_ID 0x1A 35 #define CPUID_LEAF_MODEL_ID_CORE_TYPE_SHIFT 24 36 37 #define X86_VENDOR_INTEL 0 38 39 #include INTEL_FAMILY_HEADER 40 #include BUILD_BUG_HEADER 41 #include <stdarg.h> 42 #include <stdio.h> 43 #include <err.h> 44 #include <unistd.h> 45 #include <sys/types.h> 46 #include <sys/wait.h> 47 #include <sys/stat.h> 48 #include <sys/select.h> 49 #include <sys/resource.h> 50 #include <sys/mman.h> 51 #include <fcntl.h> 52 #include <signal.h> 53 #include <sys/time.h> 54 #include <stdlib.h> 55 #include <getopt.h> 56 #include <dirent.h> 57 #include <string.h> 58 #include <ctype.h> 59 #include <sched.h> 60 #include <time.h> 61 #include <cpuid.h> 62 #include <sys/capability.h> 63 #include <errno.h> 64 #include <math.h> 65 #include <linux/perf_event.h> 66 #include <asm/unistd.h> 67 #include <stdbool.h> 68 #include <assert.h> 69 #include <linux/kernel.h> 70 #include <limits.h> 71 72 #define UNUSED(x) (void)(x) 73 74 /* 75 * This list matches the column headers, except 76 * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time 77 * 2. Core and CPU are moved to the end, we can't have strings that contain them 78 * matching on them for --show and --hide. 79 */ 80 81 /* 82 * buffer size used by sscanf() for added column names 83 * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters 84 */ 85 #define NAME_BYTES 20 86 #define PATH_BYTES 128 87 #define PERF_NAME_BYTES 128 88 89 #define MAX_NOFILE 0x8000 90 91 #define COUNTER_KIND_PERF_PREFIX "perf/" 92 #define COUNTER_KIND_PERF_PREFIX_LEN strlen(COUNTER_KIND_PERF_PREFIX) 93 #define PERF_DEV_NAME_BYTES 32 94 #define PERF_EVT_NAME_BYTES 32 95 96 #define INTEL_ECORE_TYPE 0x20 97 #define INTEL_PCORE_TYPE 0x40 98 99 #define ROUND_UP_TO_PAGE_SIZE(n) (((n) + 0x1000UL-1UL) & ~(0x1000UL-1UL)) 100 101 enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE }; 102 enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC, COUNTER_K2M }; 103 enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT, FORMAT_AVERAGE }; 104 enum counter_source { COUNTER_SOURCE_NONE, COUNTER_SOURCE_PERF, COUNTER_SOURCE_MSR }; 105 106 struct perf_counter_info { 107 struct perf_counter_info *next; 108 109 /* How to open the counter / What counter it is. */ 110 char device[PERF_DEV_NAME_BYTES]; 111 char event[PERF_EVT_NAME_BYTES]; 112 113 /* How to show/format the counter. */ 114 char name[PERF_NAME_BYTES]; 115 unsigned int width; 116 enum counter_scope scope; 117 enum counter_type type; 118 enum counter_format format; 119 double scale; 120 121 /* For reading the counter. */ 122 int *fd_perf_per_domain; 123 size_t num_domains; 124 }; 125 126 struct sysfs_path { 127 char path[PATH_BYTES]; 128 int id; 129 struct sysfs_path *next; 130 }; 131 132 struct msr_counter { 133 unsigned int msr_num; 134 char name[NAME_BYTES]; 135 struct sysfs_path *sp; 136 unsigned int width; 137 enum counter_type type; 138 enum counter_format format; 139 struct msr_counter *next; 140 unsigned int flags; 141 #define FLAGS_HIDE (1 << 0) 142 #define FLAGS_SHOW (1 << 1) 143 #define SYSFS_PERCPU (1 << 1) 144 }; 145 146 struct msr_counter bic[] = { 147 { 0x0, "usec", NULL, 0, 0, 0, NULL, 0 }, 148 { 0x0, "Time_Of_Day_Seconds", NULL, 0, 0, 0, NULL, 0 }, 149 { 0x0, "Package", NULL, 0, 0, 0, NULL, 0 }, 150 { 0x0, "Node", NULL, 0, 0, 0, NULL, 0 }, 151 { 0x0, "Avg_MHz", NULL, 0, 0, 0, NULL, 0 }, 152 { 0x0, "Busy%", NULL, 0, 0, 0, NULL, 0 }, 153 { 0x0, "Bzy_MHz", NULL, 0, 0, 0, NULL, 0 }, 154 { 0x0, "TSC_MHz", NULL, 0, 0, 0, NULL, 0 }, 155 { 0x0, "IRQ", NULL, 0, 0, 0, NULL, 0 }, 156 { 0x0, "SMI", NULL, 32, 0, FORMAT_DELTA, NULL, 0 }, 157 { 0x0, "cpuidle", NULL, 0, 0, 0, NULL, 0 }, 158 { 0x0, "CPU%c1", NULL, 0, 0, 0, NULL, 0 }, 159 { 0x0, "CPU%c3", NULL, 0, 0, 0, NULL, 0 }, 160 { 0x0, "CPU%c6", NULL, 0, 0, 0, NULL, 0 }, 161 { 0x0, "CPU%c7", NULL, 0, 0, 0, NULL, 0 }, 162 { 0x0, "ThreadC", NULL, 0, 0, 0, NULL, 0 }, 163 { 0x0, "CoreTmp", NULL, 0, 0, 0, NULL, 0 }, 164 { 0x0, "CoreCnt", NULL, 0, 0, 0, NULL, 0 }, 165 { 0x0, "PkgTmp", NULL, 0, 0, 0, NULL, 0 }, 166 { 0x0, "GFX%rc6", NULL, 0, 0, 0, NULL, 0 }, 167 { 0x0, "GFXMHz", NULL, 0, 0, 0, NULL, 0 }, 168 { 0x0, "Pkg%pc2", NULL, 0, 0, 0, NULL, 0 }, 169 { 0x0, "Pkg%pc3", NULL, 0, 0, 0, NULL, 0 }, 170 { 0x0, "Pkg%pc6", NULL, 0, 0, 0, NULL, 0 }, 171 { 0x0, "Pkg%pc7", NULL, 0, 0, 0, NULL, 0 }, 172 { 0x0, "Pkg%pc8", NULL, 0, 0, 0, NULL, 0 }, 173 { 0x0, "Pkg%pc9", NULL, 0, 0, 0, NULL, 0 }, 174 { 0x0, "Pk%pc10", NULL, 0, 0, 0, NULL, 0 }, 175 { 0x0, "CPU%LPI", NULL, 0, 0, 0, NULL, 0 }, 176 { 0x0, "SYS%LPI", NULL, 0, 0, 0, NULL, 0 }, 177 { 0x0, "PkgWatt", NULL, 0, 0, 0, NULL, 0 }, 178 { 0x0, "CorWatt", NULL, 0, 0, 0, NULL, 0 }, 179 { 0x0, "GFXWatt", NULL, 0, 0, 0, NULL, 0 }, 180 { 0x0, "PkgCnt", NULL, 0, 0, 0, NULL, 0 }, 181 { 0x0, "RAMWatt", NULL, 0, 0, 0, NULL, 0 }, 182 { 0x0, "PKG_%", NULL, 0, 0, 0, NULL, 0 }, 183 { 0x0, "RAM_%", NULL, 0, 0, 0, NULL, 0 }, 184 { 0x0, "Pkg_J", NULL, 0, 0, 0, NULL, 0 }, 185 { 0x0, "Cor_J", NULL, 0, 0, 0, NULL, 0 }, 186 { 0x0, "GFX_J", NULL, 0, 0, 0, NULL, 0 }, 187 { 0x0, "RAM_J", NULL, 0, 0, 0, NULL, 0 }, 188 { 0x0, "Mod%c6", NULL, 0, 0, 0, NULL, 0 }, 189 { 0x0, "Totl%C0", NULL, 0, 0, 0, NULL, 0 }, 190 { 0x0, "Any%C0", NULL, 0, 0, 0, NULL, 0 }, 191 { 0x0, "GFX%C0", NULL, 0, 0, 0, NULL, 0 }, 192 { 0x0, "CPUGFX%", NULL, 0, 0, 0, NULL, 0 }, 193 { 0x0, "Core", NULL, 0, 0, 0, NULL, 0 }, 194 { 0x0, "CPU", NULL, 0, 0, 0, NULL, 0 }, 195 { 0x0, "APIC", NULL, 0, 0, 0, NULL, 0 }, 196 { 0x0, "X2APIC", NULL, 0, 0, 0, NULL, 0 }, 197 { 0x0, "Die", NULL, 0, 0, 0, NULL, 0 }, 198 { 0x0, "L3", NULL, 0, 0, 0, NULL, 0 }, 199 { 0x0, "GFXAMHz", NULL, 0, 0, 0, NULL, 0 }, 200 { 0x0, "IPC", NULL, 0, 0, 0, NULL, 0 }, 201 { 0x0, "CoreThr", NULL, 0, 0, 0, NULL, 0 }, 202 { 0x0, "UncMHz", NULL, 0, 0, 0, NULL, 0 }, 203 { 0x0, "SAM%mc6", NULL, 0, 0, 0, NULL, 0 }, 204 { 0x0, "SAMMHz", NULL, 0, 0, 0, NULL, 0 }, 205 { 0x0, "SAMAMHz", NULL, 0, 0, 0, NULL, 0 }, 206 { 0x0, "Die%c6", NULL, 0, 0, 0, NULL, 0 }, 207 { 0x0, "SysWatt", NULL, 0, 0, 0, NULL, 0 }, 208 { 0x0, "Sys_J", NULL, 0, 0, 0, NULL, 0 }, 209 { 0x0, "NMI", NULL, 0, 0, 0, NULL, 0 }, 210 { 0x0, "CPU%c1e", NULL, 0, 0, 0, NULL, 0 }, 211 { 0x0, "pct_idle", NULL, 0, 0, 0, NULL, 0 }, 212 }; 213 214 /* n.b. bic_names must match the order in bic[], above */ 215 enum bic_names { 216 BIC_USEC, 217 BIC_TOD, 218 BIC_Package, 219 BIC_Node, 220 BIC_Avg_MHz, 221 BIC_Busy, 222 BIC_Bzy_MHz, 223 BIC_TSC_MHz, 224 BIC_IRQ, 225 BIC_SMI, 226 BIC_cpuidle, 227 BIC_CPU_c1, 228 BIC_CPU_c3, 229 BIC_CPU_c6, 230 BIC_CPU_c7, 231 BIC_ThreadC, 232 BIC_CoreTmp, 233 BIC_CoreCnt, 234 BIC_PkgTmp, 235 BIC_GFX_rc6, 236 BIC_GFXMHz, 237 BIC_Pkgpc2, 238 BIC_Pkgpc3, 239 BIC_Pkgpc6, 240 BIC_Pkgpc7, 241 BIC_Pkgpc8, 242 BIC_Pkgpc9, 243 BIC_Pkgpc10, 244 BIC_CPU_LPI, 245 BIC_SYS_LPI, 246 BIC_PkgWatt, 247 BIC_CorWatt, 248 BIC_GFXWatt, 249 BIC_PkgCnt, 250 BIC_RAMWatt, 251 BIC_PKG__, 252 BIC_RAM__, 253 BIC_Pkg_J, 254 BIC_Cor_J, 255 BIC_GFX_J, 256 BIC_RAM_J, 257 BIC_Mod_c6, 258 BIC_Totl_c0, 259 BIC_Any_c0, 260 BIC_GFX_c0, 261 BIC_CPUGFX, 262 BIC_Core, 263 BIC_CPU, 264 BIC_APIC, 265 BIC_X2APIC, 266 BIC_Die, 267 BIC_L3, 268 BIC_GFXACTMHz, 269 BIC_IPC, 270 BIC_CORE_THROT_CNT, 271 BIC_UNCORE_MHZ, 272 BIC_SAM_mc6, 273 BIC_SAMMHz, 274 BIC_SAMACTMHz, 275 BIC_Diec6, 276 BIC_SysWatt, 277 BIC_Sys_J, 278 BIC_NMI, 279 BIC_CPU_c1e, 280 BIC_pct_idle, 281 MAX_BIC 282 }; 283 284 void print_bic_set(char *s, cpu_set_t *set) 285 { 286 int i; 287 288 assert(MAX_BIC < CPU_SETSIZE); 289 290 printf("%s:", s); 291 292 for (i = 0; i <= MAX_BIC; ++i) { 293 294 if (CPU_ISSET(i, set)) { 295 assert(i < MAX_BIC); 296 printf(" %s", bic[i].name); 297 } 298 } 299 putchar('\n'); 300 } 301 302 static cpu_set_t bic_group_topology; 303 static cpu_set_t bic_group_thermal_pwr; 304 static cpu_set_t bic_group_frequency; 305 static cpu_set_t bic_group_hw_idle; 306 static cpu_set_t bic_group_sw_idle; 307 static cpu_set_t bic_group_idle; 308 static cpu_set_t bic_group_other; 309 static cpu_set_t bic_group_disabled_by_default; 310 static cpu_set_t bic_enabled; 311 static cpu_set_t bic_present; 312 313 /* modify */ 314 #define BIC_INIT(set) CPU_ZERO(set) 315 316 #define SET_BIC(COUNTER_NUMBER, set) CPU_SET(COUNTER_NUMBER, set) 317 #define CLR_BIC(COUNTER_NUMBER, set) CPU_CLR(COUNTER_NUMBER, set) 318 319 #define BIC_PRESENT(COUNTER_NUMBER) SET_BIC(COUNTER_NUMBER, &bic_present) 320 #define BIC_NOT_PRESENT(COUNTER_NUMBER) CPU_CLR(COUNTER_NUMBER, &bic_present) 321 322 /* test */ 323 #define BIC_IS_ENABLED(COUNTER_NUMBER) CPU_ISSET(COUNTER_NUMBER, &bic_enabled) 324 #define DO_BIC_READ(COUNTER_NUMBER) CPU_ISSET(COUNTER_NUMBER, &bic_present) 325 #define DO_BIC(COUNTER_NUMBER) (CPU_ISSET(COUNTER_NUMBER, &bic_enabled) && CPU_ISSET(COUNTER_NUMBER, &bic_present)) 326 327 static void bic_set_all(cpu_set_t *set) 328 { 329 int i; 330 331 assert(MAX_BIC < CPU_SETSIZE); 332 333 for (i = 0; i < MAX_BIC; ++i) 334 SET_BIC(i, set); 335 } 336 337 /* 338 * bic_clear_bits() 339 * clear all the bits from "clr" in "dst" 340 */ 341 static void bic_clear_bits(cpu_set_t *dst, cpu_set_t *clr) 342 { 343 int i; 344 345 assert(MAX_BIC < CPU_SETSIZE); 346 347 for (i = 0; i < MAX_BIC; ++i) 348 if (CPU_ISSET(i, clr)) 349 CLR_BIC(i, dst); 350 } 351 352 static void bic_groups_init(void) 353 { 354 BIC_INIT(&bic_group_topology); 355 SET_BIC(BIC_Package, &bic_group_topology); 356 SET_BIC(BIC_Node, &bic_group_topology); 357 SET_BIC(BIC_CoreCnt, &bic_group_topology); 358 SET_BIC(BIC_PkgCnt, &bic_group_topology); 359 SET_BIC(BIC_Core, &bic_group_topology); 360 SET_BIC(BIC_CPU, &bic_group_topology); 361 SET_BIC(BIC_Die, &bic_group_topology); 362 SET_BIC(BIC_L3, &bic_group_topology); 363 364 BIC_INIT(&bic_group_thermal_pwr); 365 SET_BIC(BIC_CoreTmp, &bic_group_thermal_pwr); 366 SET_BIC(BIC_PkgTmp, &bic_group_thermal_pwr); 367 SET_BIC(BIC_PkgWatt, &bic_group_thermal_pwr); 368 SET_BIC(BIC_CorWatt, &bic_group_thermal_pwr); 369 SET_BIC(BIC_GFXWatt, &bic_group_thermal_pwr); 370 SET_BIC(BIC_RAMWatt, &bic_group_thermal_pwr); 371 SET_BIC(BIC_PKG__, &bic_group_thermal_pwr); 372 SET_BIC(BIC_RAM__, &bic_group_thermal_pwr); 373 SET_BIC(BIC_SysWatt, &bic_group_thermal_pwr); 374 375 BIC_INIT(&bic_group_frequency); 376 SET_BIC(BIC_Avg_MHz, &bic_group_frequency); 377 SET_BIC(BIC_Busy, &bic_group_frequency); 378 SET_BIC(BIC_Bzy_MHz, &bic_group_frequency); 379 SET_BIC(BIC_TSC_MHz, &bic_group_frequency); 380 SET_BIC(BIC_GFXMHz, &bic_group_frequency); 381 SET_BIC(BIC_GFXACTMHz, &bic_group_frequency); 382 SET_BIC(BIC_SAMMHz, &bic_group_frequency); 383 SET_BIC(BIC_SAMACTMHz, &bic_group_frequency); 384 SET_BIC(BIC_UNCORE_MHZ, &bic_group_frequency); 385 386 BIC_INIT(&bic_group_hw_idle); 387 SET_BIC(BIC_Busy, &bic_group_hw_idle); 388 SET_BIC(BIC_CPU_c1, &bic_group_hw_idle); 389 SET_BIC(BIC_CPU_c3, &bic_group_hw_idle); 390 SET_BIC(BIC_CPU_c6, &bic_group_hw_idle); 391 SET_BIC(BIC_CPU_c7, &bic_group_hw_idle); 392 SET_BIC(BIC_GFX_rc6, &bic_group_hw_idle); 393 SET_BIC(BIC_Pkgpc2, &bic_group_hw_idle); 394 SET_BIC(BIC_Pkgpc3, &bic_group_hw_idle); 395 SET_BIC(BIC_Pkgpc6, &bic_group_hw_idle); 396 SET_BIC(BIC_Pkgpc7, &bic_group_hw_idle); 397 SET_BIC(BIC_Pkgpc8, &bic_group_hw_idle); 398 SET_BIC(BIC_Pkgpc9, &bic_group_hw_idle); 399 SET_BIC(BIC_Pkgpc10, &bic_group_hw_idle); 400 SET_BIC(BIC_CPU_LPI, &bic_group_hw_idle); 401 SET_BIC(BIC_SYS_LPI, &bic_group_hw_idle); 402 SET_BIC(BIC_Mod_c6, &bic_group_hw_idle); 403 SET_BIC(BIC_Totl_c0, &bic_group_hw_idle); 404 SET_BIC(BIC_Any_c0, &bic_group_hw_idle); 405 SET_BIC(BIC_GFX_c0, &bic_group_hw_idle); 406 SET_BIC(BIC_CPUGFX, &bic_group_hw_idle); 407 SET_BIC(BIC_SAM_mc6, &bic_group_hw_idle); 408 SET_BIC(BIC_Diec6, &bic_group_hw_idle); 409 410 BIC_INIT(&bic_group_sw_idle); 411 SET_BIC(BIC_Busy, &bic_group_sw_idle); 412 SET_BIC(BIC_cpuidle, &bic_group_sw_idle); 413 SET_BIC(BIC_pct_idle, &bic_group_sw_idle); 414 415 BIC_INIT(&bic_group_idle); 416 CPU_OR(&bic_group_idle, &bic_group_idle, &bic_group_hw_idle); 417 SET_BIC(BIC_pct_idle, &bic_group_idle); 418 419 BIC_INIT(&bic_group_other); 420 SET_BIC(BIC_IRQ, &bic_group_other); 421 SET_BIC(BIC_NMI, &bic_group_other); 422 SET_BIC(BIC_SMI, &bic_group_other); 423 SET_BIC(BIC_ThreadC, &bic_group_other); 424 SET_BIC(BIC_CoreTmp, &bic_group_other); 425 SET_BIC(BIC_IPC, &bic_group_other); 426 427 BIC_INIT(&bic_group_disabled_by_default); 428 SET_BIC(BIC_USEC, &bic_group_disabled_by_default); 429 SET_BIC(BIC_TOD, &bic_group_disabled_by_default); 430 SET_BIC(BIC_cpuidle, &bic_group_disabled_by_default); 431 SET_BIC(BIC_APIC, &bic_group_disabled_by_default); 432 SET_BIC(BIC_X2APIC, &bic_group_disabled_by_default); 433 434 BIC_INIT(&bic_enabled); 435 bic_set_all(&bic_enabled); 436 bic_clear_bits(&bic_enabled, &bic_group_disabled_by_default); 437 438 BIC_INIT(&bic_present); 439 SET_BIC(BIC_USEC, &bic_present); 440 SET_BIC(BIC_TOD, &bic_present); 441 SET_BIC(BIC_cpuidle, &bic_present); 442 SET_BIC(BIC_APIC, &bic_present); 443 SET_BIC(BIC_X2APIC, &bic_present); 444 SET_BIC(BIC_pct_idle, &bic_present); 445 } 446 447 /* 448 * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit: 449 * If you change the values, note they are used both in comparisons 450 * (>= PCL__7) and to index pkg_cstate_limit_strings[]. 451 */ 452 #define PCLUKN 0 /* Unknown */ 453 #define PCLRSV 1 /* Reserved */ 454 #define PCL__0 2 /* PC0 */ 455 #define PCL__1 3 /* PC1 */ 456 #define PCL__2 4 /* PC2 */ 457 #define PCL__3 5 /* PC3 */ 458 #define PCL__4 6 /* PC4 */ 459 #define PCL__6 7 /* PC6 */ 460 #define PCL_6N 8 /* PC6 No Retention */ 461 #define PCL_6R 9 /* PC6 Retention */ 462 #define PCL__7 10 /* PC7 */ 463 #define PCL_7S 11 /* PC7 Shrink */ 464 #define PCL__8 12 /* PC8 */ 465 #define PCL__9 13 /* PC9 */ 466 #define PCL_10 14 /* PC10 */ 467 #define PCLUNL 15 /* Unlimited */ 468 469 struct amperf_group_fd; 470 471 char *proc_stat = "/proc/stat"; 472 FILE *outf; 473 int *fd_percpu; 474 int *fd_instr_count_percpu; 475 struct timeval interval_tv = { 5, 0 }; 476 struct timespec interval_ts = { 5, 0 }; 477 478 unsigned int num_iterations; 479 unsigned int header_iterations; 480 unsigned int debug; 481 unsigned int quiet; 482 unsigned int shown; 483 unsigned int sums_need_wide_columns; 484 unsigned int rapl_joules; 485 unsigned int summary_only; 486 unsigned int list_header_only; 487 unsigned int dump_only; 488 unsigned int force_load; 489 unsigned int has_aperf; 490 unsigned int has_aperf_access; 491 unsigned int has_epb; 492 unsigned int has_turbo; 493 unsigned int is_hybrid; 494 unsigned int units = 1000000; /* MHz etc */ 495 unsigned int genuine_intel; 496 unsigned int authentic_amd; 497 unsigned int hygon_genuine; 498 unsigned int max_level, max_extended_level; 499 unsigned int has_invariant_tsc; 500 unsigned int aperf_mperf_multiplier = 1; 501 double bclk; 502 double base_hz; 503 unsigned int has_base_hz; 504 double tsc_tweak = 1.0; 505 unsigned int show_pkg_only; 506 unsigned int show_core_only; 507 char *output_buffer, *outp; 508 unsigned int do_dts; 509 unsigned int do_ptm; 510 unsigned int do_ipc; 511 unsigned long long cpuidle_cur_cpu_lpi_us; 512 unsigned long long cpuidle_cur_sys_lpi_us; 513 unsigned int tj_max; 514 unsigned int tj_max_override; 515 double rapl_power_units, rapl_time_units; 516 double rapl_dram_energy_units, rapl_energy_units, rapl_psys_energy_units; 517 double rapl_joule_counter_range; 518 unsigned int crystal_hz; 519 unsigned long long tsc_hz; 520 int base_cpu; 521 unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */ 522 /* IA32_HWP_REQUEST, IA32_HWP_STATUS */ 523 unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */ 524 unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */ 525 unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */ 526 unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */ 527 unsigned int first_counter_read = 1; 528 529 static struct timeval procsysfs_tv_begin; 530 531 int ignore_stdin; 532 bool no_msr; 533 bool no_perf; 534 535 enum gfx_sysfs_idx { 536 GFX_rc6, 537 GFX_MHz, 538 GFX_ACTMHz, 539 SAM_mc6, 540 SAM_MHz, 541 SAM_ACTMHz, 542 GFX_MAX 543 }; 544 545 struct gfx_sysfs_info { 546 FILE *fp; 547 unsigned int val; 548 unsigned long long val_ull; 549 }; 550 551 static struct gfx_sysfs_info gfx_info[GFX_MAX]; 552 553 int get_msr(int cpu, off_t offset, unsigned long long *msr); 554 int add_counter(unsigned int msr_num, char *path, char *name, 555 unsigned int width, enum counter_scope scope, 556 enum counter_type type, enum counter_format format, int flags, int package_num); 557 558 /* Model specific support Start */ 559 560 /* List of features that may diverge among different platforms */ 561 struct platform_features { 562 bool has_msr_misc_feature_control; /* MSR_MISC_FEATURE_CONTROL */ 563 bool has_msr_misc_pwr_mgmt; /* MSR_MISC_PWR_MGMT */ 564 bool has_nhm_msrs; /* MSR_PLATFORM_INFO, MSR_IA32_TEMPERATURE_TARGET, MSR_SMI_COUNT, MSR_PKG_CST_CONFIG_CONTROL, MSR_IA32_POWER_CTL, TRL MSRs */ 565 bool has_config_tdp; /* MSR_CONFIG_TDP_NOMINAL/LEVEL_1/LEVEL_2/CONTROL, MSR_TURBO_ACTIVATION_RATIO */ 566 int bclk_freq; /* CPU base clock */ 567 int crystal_freq; /* Crystal clock to use when not available from CPUID.15 */ 568 int supported_cstates; /* Core cstates and Package cstates supported */ 569 int cst_limit; /* MSR_PKG_CST_CONFIG_CONTROL */ 570 bool has_cst_auto_convension; /* AUTOMATIC_CSTATE_CONVERSION bit in MSR_PKG_CST_CONFIG_CONTROL */ 571 bool has_irtl_msrs; /* MSR_PKGC3/PKGC6/PKGC7/PKGC8/PKGC9/PKGC10_IRTL */ 572 bool has_msr_core_c1_res; /* MSR_CORE_C1_RES */ 573 bool has_msr_module_c6_res_ms; /* MSR_MODULE_C6_RES_MS */ 574 bool has_msr_c6_demotion_policy_config; /* MSR_CC6_DEMOTION_POLICY_CONFIG/MSR_MC6_DEMOTION_POLICY_CONFIG */ 575 bool has_msr_atom_pkg_c6_residency; /* MSR_ATOM_PKG_C6_RESIDENCY */ 576 bool has_msr_knl_core_c6_residency; /* MSR_KNL_CORE_C6_RESIDENCY */ 577 bool has_ext_cst_msrs; /* MSR_PKG_WEIGHTED_CORE_C0_RES/MSR_PKG_ANY_CORE_C0_RES/MSR_PKG_ANY_GFXE_C0_RES/MSR_PKG_BOTH_CORE_GFXE_C0_RES */ 578 bool has_cst_prewake_bit; /* Cstate prewake bit in MSR_IA32_POWER_CTL */ 579 int trl_msrs; /* MSR_TURBO_RATIO_LIMIT/LIMIT1/LIMIT2/SECONDARY, Atom TRL MSRs */ 580 int plr_msrs; /* MSR_CORE/GFX/RING_PERF_LIMIT_REASONS */ 581 int rapl_msrs; /* RAPL PKG/DRAM/CORE/GFX MSRs, AMD RAPL MSRs */ 582 bool has_per_core_rapl; /* Indicates cores energy collection is per-core, not per-package. AMD specific for now */ 583 bool has_rapl_divisor; /* Divisor for Energy unit raw value from MSR_RAPL_POWER_UNIT */ 584 bool has_fixed_rapl_unit; /* Fixed Energy Unit used for DRAM RAPL Domain */ 585 bool has_fixed_rapl_psys_unit; /* Fixed Energy Unit used for PSYS RAPL Domain */ 586 int rapl_quirk_tdp; /* Hardcoded TDP value when cannot be retrieved from hardware */ 587 int tcc_offset_bits; /* TCC Offset bits in MSR_IA32_TEMPERATURE_TARGET */ 588 bool enable_tsc_tweak; /* Use CPU Base freq instead of TSC freq for aperf/mperf counter */ 589 bool need_perf_multiplier; /* mperf/aperf multiplier */ 590 }; 591 592 struct platform_data { 593 unsigned int vfm; 594 const struct platform_features *features; 595 }; 596 597 /* For BCLK */ 598 enum bclk_freq { 599 BCLK_100MHZ = 1, 600 BCLK_133MHZ, 601 BCLK_SLV, 602 }; 603 604 #define SLM_BCLK_FREQS 5 605 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 }; 606 607 double slm_bclk(void) 608 { 609 unsigned long long msr = 3; 610 unsigned int i; 611 double freq; 612 613 if (get_msr(base_cpu, MSR_FSB_FREQ, &msr)) 614 fprintf(outf, "SLM BCLK: unknown\n"); 615 616 i = msr & 0xf; 617 if (i >= SLM_BCLK_FREQS) { 618 fprintf(outf, "SLM BCLK[%d] invalid\n", i); 619 i = 3; 620 } 621 freq = slm_freq_table[i]; 622 623 if (!quiet) 624 fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq); 625 626 return freq; 627 } 628 629 /* For Package cstate limit */ 630 enum package_cstate_limit { 631 CST_LIMIT_NHM = 1, 632 CST_LIMIT_SNB, 633 CST_LIMIT_HSW, 634 CST_LIMIT_SKX, 635 CST_LIMIT_ICX, 636 CST_LIMIT_SLV, 637 CST_LIMIT_AMT, 638 CST_LIMIT_KNL, 639 CST_LIMIT_GMT, 640 }; 641 642 /* For Turbo Ratio Limit MSRs */ 643 enum turbo_ratio_limit_msrs { 644 TRL_BASE = BIT(0), 645 TRL_LIMIT1 = BIT(1), 646 TRL_LIMIT2 = BIT(2), 647 TRL_ATOM = BIT(3), 648 TRL_KNL = BIT(4), 649 TRL_CORECOUNT = BIT(5), 650 }; 651 652 /* For Perf Limit Reason MSRs */ 653 enum perf_limit_reason_msrs { 654 PLR_CORE = BIT(0), 655 PLR_GFX = BIT(1), 656 PLR_RING = BIT(2), 657 }; 658 659 /* For RAPL MSRs */ 660 enum rapl_msrs { 661 RAPL_PKG_POWER_LIMIT = BIT(0), /* 0x610 MSR_PKG_POWER_LIMIT */ 662 RAPL_PKG_ENERGY_STATUS = BIT(1), /* 0x611 MSR_PKG_ENERGY_STATUS */ 663 RAPL_PKG_PERF_STATUS = BIT(2), /* 0x613 MSR_PKG_PERF_STATUS */ 664 RAPL_PKG_POWER_INFO = BIT(3), /* 0x614 MSR_PKG_POWER_INFO */ 665 RAPL_DRAM_POWER_LIMIT = BIT(4), /* 0x618 MSR_DRAM_POWER_LIMIT */ 666 RAPL_DRAM_ENERGY_STATUS = BIT(5), /* 0x619 MSR_DRAM_ENERGY_STATUS */ 667 RAPL_DRAM_PERF_STATUS = BIT(6), /* 0x61b MSR_DRAM_PERF_STATUS */ 668 RAPL_DRAM_POWER_INFO = BIT(7), /* 0x61c MSR_DRAM_POWER_INFO */ 669 RAPL_CORE_POWER_LIMIT = BIT(8), /* 0x638 MSR_PP0_POWER_LIMIT */ 670 RAPL_CORE_ENERGY_STATUS = BIT(9), /* 0x639 MSR_PP0_ENERGY_STATUS */ 671 RAPL_CORE_POLICY = BIT(10), /* 0x63a MSR_PP0_POLICY */ 672 RAPL_GFX_POWER_LIMIT = BIT(11), /* 0x640 MSR_PP1_POWER_LIMIT */ 673 RAPL_GFX_ENERGY_STATUS = BIT(12), /* 0x641 MSR_PP1_ENERGY_STATUS */ 674 RAPL_GFX_POLICY = BIT(13), /* 0x642 MSR_PP1_POLICY */ 675 RAPL_AMD_PWR_UNIT = BIT(14), /* 0xc0010299 MSR_AMD_RAPL_POWER_UNIT */ 676 RAPL_AMD_CORE_ENERGY_STAT = BIT(15), /* 0xc001029a MSR_AMD_CORE_ENERGY_STATUS */ 677 RAPL_AMD_PKG_ENERGY_STAT = BIT(16), /* 0xc001029b MSR_AMD_PKG_ENERGY_STATUS */ 678 RAPL_PLATFORM_ENERGY_LIMIT = BIT(17), /* 0x64c MSR_PLATFORM_ENERGY_LIMIT */ 679 RAPL_PLATFORM_ENERGY_STATUS = BIT(18), /* 0x64d MSR_PLATFORM_ENERGY_STATUS */ 680 }; 681 682 #define RAPL_PKG (RAPL_PKG_ENERGY_STATUS | RAPL_PKG_POWER_LIMIT) 683 #define RAPL_DRAM (RAPL_DRAM_ENERGY_STATUS | RAPL_DRAM_POWER_LIMIT) 684 #define RAPL_CORE (RAPL_CORE_ENERGY_STATUS | RAPL_CORE_POWER_LIMIT) 685 #define RAPL_GFX (RAPL_GFX_POWER_LIMIT | RAPL_GFX_ENERGY_STATUS) 686 #define RAPL_PSYS (RAPL_PLATFORM_ENERGY_STATUS | RAPL_PLATFORM_ENERGY_LIMIT) 687 688 #define RAPL_PKG_ALL (RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO) 689 #define RAPL_DRAM_ALL (RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_DRAM_POWER_INFO) 690 #define RAPL_CORE_ALL (RAPL_CORE | RAPL_CORE_POLICY) 691 #define RAPL_GFX_ALL (RAPL_GFX | RAPL_GFX_POLICY) 692 693 #define RAPL_AMD_F17H (RAPL_AMD_PWR_UNIT | RAPL_AMD_CORE_ENERGY_STAT | RAPL_AMD_PKG_ENERGY_STAT) 694 695 /* For Cstates */ 696 enum cstates { 697 CC1 = BIT(0), 698 CC3 = BIT(1), 699 CC6 = BIT(2), 700 CC7 = BIT(3), 701 PC2 = BIT(4), 702 PC3 = BIT(5), 703 PC6 = BIT(6), 704 PC7 = BIT(7), 705 PC8 = BIT(8), 706 PC9 = BIT(9), 707 PC10 = BIT(10), 708 }; 709 710 static const struct platform_features nhm_features = { 711 .has_msr_misc_pwr_mgmt = 1, 712 .has_nhm_msrs = 1, 713 .bclk_freq = BCLK_133MHZ, 714 .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6, 715 .cst_limit = CST_LIMIT_NHM, 716 .trl_msrs = TRL_BASE, 717 }; 718 719 static const struct platform_features nhx_features = { 720 .has_msr_misc_pwr_mgmt = 1, 721 .has_nhm_msrs = 1, 722 .bclk_freq = BCLK_133MHZ, 723 .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6, 724 .cst_limit = CST_LIMIT_NHM, 725 }; 726 727 static const struct platform_features snb_features = { 728 .has_msr_misc_feature_control = 1, 729 .has_msr_misc_pwr_mgmt = 1, 730 .has_nhm_msrs = 1, 731 .bclk_freq = BCLK_100MHZ, 732 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 733 .cst_limit = CST_LIMIT_SNB, 734 .has_irtl_msrs = 1, 735 .trl_msrs = TRL_BASE, 736 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 737 }; 738 739 static const struct platform_features snx_features = { 740 .has_msr_misc_feature_control = 1, 741 .has_msr_misc_pwr_mgmt = 1, 742 .has_nhm_msrs = 1, 743 .bclk_freq = BCLK_100MHZ, 744 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 745 .cst_limit = CST_LIMIT_SNB, 746 .has_irtl_msrs = 1, 747 .trl_msrs = TRL_BASE, 748 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL, 749 }; 750 751 static const struct platform_features ivb_features = { 752 .has_msr_misc_feature_control = 1, 753 .has_msr_misc_pwr_mgmt = 1, 754 .has_nhm_msrs = 1, 755 .has_config_tdp = 1, 756 .bclk_freq = BCLK_100MHZ, 757 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 758 .cst_limit = CST_LIMIT_SNB, 759 .has_irtl_msrs = 1, 760 .trl_msrs = TRL_BASE, 761 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 762 }; 763 764 static const struct platform_features ivx_features = { 765 .has_msr_misc_feature_control = 1, 766 .has_msr_misc_pwr_mgmt = 1, 767 .has_nhm_msrs = 1, 768 .bclk_freq = BCLK_100MHZ, 769 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 770 .cst_limit = CST_LIMIT_SNB, 771 .has_irtl_msrs = 1, 772 .trl_msrs = TRL_BASE | TRL_LIMIT1, 773 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL, 774 }; 775 776 static const struct platform_features hsw_features = { 777 .has_msr_misc_feature_control = 1, 778 .has_msr_misc_pwr_mgmt = 1, 779 .has_nhm_msrs = 1, 780 .has_config_tdp = 1, 781 .bclk_freq = BCLK_100MHZ, 782 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 783 .cst_limit = CST_LIMIT_HSW, 784 .has_irtl_msrs = 1, 785 .trl_msrs = TRL_BASE, 786 .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, 787 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 788 }; 789 790 static const struct platform_features hsx_features = { 791 .has_msr_misc_feature_control = 1, 792 .has_msr_misc_pwr_mgmt = 1, 793 .has_nhm_msrs = 1, 794 .has_config_tdp = 1, 795 .bclk_freq = BCLK_100MHZ, 796 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 797 .cst_limit = CST_LIMIT_HSW, 798 .has_irtl_msrs = 1, 799 .trl_msrs = TRL_BASE | TRL_LIMIT1 | TRL_LIMIT2, 800 .plr_msrs = PLR_CORE | PLR_RING, 801 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 802 .has_fixed_rapl_unit = 1, 803 }; 804 805 static const struct platform_features hswl_features = { 806 .has_msr_misc_feature_control = 1, 807 .has_msr_misc_pwr_mgmt = 1, 808 .has_nhm_msrs = 1, 809 .has_config_tdp = 1, 810 .bclk_freq = BCLK_100MHZ, 811 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 812 .cst_limit = CST_LIMIT_HSW, 813 .has_irtl_msrs = 1, 814 .trl_msrs = TRL_BASE, 815 .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, 816 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 817 }; 818 819 static const struct platform_features hswg_features = { 820 .has_msr_misc_feature_control = 1, 821 .has_msr_misc_pwr_mgmt = 1, 822 .has_nhm_msrs = 1, 823 .has_config_tdp = 1, 824 .bclk_freq = BCLK_100MHZ, 825 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 826 .cst_limit = CST_LIMIT_HSW, 827 .has_irtl_msrs = 1, 828 .trl_msrs = TRL_BASE, 829 .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, 830 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 831 }; 832 833 static const struct platform_features bdw_features = { 834 .has_msr_misc_feature_control = 1, 835 .has_msr_misc_pwr_mgmt = 1, 836 .has_nhm_msrs = 1, 837 .has_config_tdp = 1, 838 .bclk_freq = BCLK_100MHZ, 839 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 840 .cst_limit = CST_LIMIT_HSW, 841 .has_irtl_msrs = 1, 842 .trl_msrs = TRL_BASE, 843 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 844 }; 845 846 static const struct platform_features bdwg_features = { 847 .has_msr_misc_feature_control = 1, 848 .has_msr_misc_pwr_mgmt = 1, 849 .has_nhm_msrs = 1, 850 .has_config_tdp = 1, 851 .bclk_freq = BCLK_100MHZ, 852 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 853 .cst_limit = CST_LIMIT_HSW, 854 .has_irtl_msrs = 1, 855 .trl_msrs = TRL_BASE, 856 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 857 }; 858 859 static const struct platform_features bdx_features = { 860 .has_msr_misc_feature_control = 1, 861 .has_msr_misc_pwr_mgmt = 1, 862 .has_nhm_msrs = 1, 863 .has_config_tdp = 1, 864 .bclk_freq = BCLK_100MHZ, 865 .supported_cstates = CC1 | CC3 | CC6 | PC2 | PC3 | PC6, 866 .cst_limit = CST_LIMIT_HSW, 867 .has_irtl_msrs = 1, 868 .has_cst_auto_convension = 1, 869 .trl_msrs = TRL_BASE, 870 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 871 .has_fixed_rapl_unit = 1, 872 }; 873 874 static const struct platform_features skl_features = { 875 .has_msr_misc_feature_control = 1, 876 .has_msr_misc_pwr_mgmt = 1, 877 .has_nhm_msrs = 1, 878 .has_config_tdp = 1, 879 .bclk_freq = BCLK_100MHZ, 880 .crystal_freq = 24000000, 881 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 882 .cst_limit = CST_LIMIT_HSW, 883 .has_irtl_msrs = 1, 884 .has_ext_cst_msrs = 1, 885 .trl_msrs = TRL_BASE, 886 .tcc_offset_bits = 6, 887 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS, 888 .enable_tsc_tweak = 1, 889 }; 890 891 static const struct platform_features cnl_features = { 892 .has_msr_misc_feature_control = 1, 893 .has_msr_misc_pwr_mgmt = 1, 894 .has_nhm_msrs = 1, 895 .has_config_tdp = 1, 896 .bclk_freq = BCLK_100MHZ, 897 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 898 .cst_limit = CST_LIMIT_HSW, 899 .has_irtl_msrs = 1, 900 .has_msr_core_c1_res = 1, 901 .has_ext_cst_msrs = 1, 902 .trl_msrs = TRL_BASE, 903 .tcc_offset_bits = 6, 904 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS, 905 .enable_tsc_tweak = 1, 906 }; 907 908 /* Copied from cnl_features, with PC7/PC9 removed */ 909 static const struct platform_features adl_features = { 910 .has_msr_misc_feature_control = cnl_features.has_msr_misc_feature_control, 911 .has_msr_misc_pwr_mgmt = cnl_features.has_msr_misc_pwr_mgmt, 912 .has_nhm_msrs = cnl_features.has_nhm_msrs, 913 .has_config_tdp = cnl_features.has_config_tdp, 914 .bclk_freq = cnl_features.bclk_freq, 915 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC8 | PC10, 916 .cst_limit = cnl_features.cst_limit, 917 .has_irtl_msrs = cnl_features.has_irtl_msrs, 918 .has_msr_core_c1_res = cnl_features.has_msr_core_c1_res, 919 .has_ext_cst_msrs = cnl_features.has_ext_cst_msrs, 920 .trl_msrs = cnl_features.trl_msrs, 921 .tcc_offset_bits = cnl_features.tcc_offset_bits, 922 .rapl_msrs = cnl_features.rapl_msrs, 923 .enable_tsc_tweak = cnl_features.enable_tsc_tweak, 924 }; 925 926 /* Copied from adl_features, with PC3/PC8 removed */ 927 static const struct platform_features lnl_features = { 928 .has_msr_misc_feature_control = adl_features.has_msr_misc_feature_control, 929 .has_msr_misc_pwr_mgmt = adl_features.has_msr_misc_pwr_mgmt, 930 .has_nhm_msrs = adl_features.has_nhm_msrs, 931 .has_config_tdp = adl_features.has_config_tdp, 932 .bclk_freq = adl_features.bclk_freq, 933 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC6 | PC10, 934 .cst_limit = adl_features.cst_limit, 935 .has_irtl_msrs = adl_features.has_irtl_msrs, 936 .has_msr_core_c1_res = adl_features.has_msr_core_c1_res, 937 .has_ext_cst_msrs = adl_features.has_ext_cst_msrs, 938 .trl_msrs = adl_features.trl_msrs, 939 .tcc_offset_bits = adl_features.tcc_offset_bits, 940 .rapl_msrs = adl_features.rapl_msrs, 941 .enable_tsc_tweak = adl_features.enable_tsc_tweak, 942 }; 943 944 static const struct platform_features skx_features = { 945 .has_msr_misc_feature_control = 1, 946 .has_msr_misc_pwr_mgmt = 1, 947 .has_nhm_msrs = 1, 948 .has_config_tdp = 1, 949 .bclk_freq = BCLK_100MHZ, 950 .supported_cstates = CC1 | CC6 | PC2 | PC6, 951 .cst_limit = CST_LIMIT_SKX, 952 .has_irtl_msrs = 1, 953 .has_cst_auto_convension = 1, 954 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 955 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 956 .has_fixed_rapl_unit = 1, 957 }; 958 959 static const struct platform_features icx_features = { 960 .has_msr_misc_feature_control = 1, 961 .has_msr_misc_pwr_mgmt = 1, 962 .has_nhm_msrs = 1, 963 .has_config_tdp = 1, 964 .bclk_freq = BCLK_100MHZ, 965 .supported_cstates = CC1 | CC6 | PC2 | PC6, 966 .cst_limit = CST_LIMIT_ICX, 967 .has_msr_core_c1_res = 1, 968 .has_irtl_msrs = 1, 969 .has_cst_prewake_bit = 1, 970 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 971 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, 972 .has_fixed_rapl_unit = 1, 973 }; 974 975 static const struct platform_features spr_features = { 976 .has_msr_misc_feature_control = 1, 977 .has_msr_misc_pwr_mgmt = 1, 978 .has_nhm_msrs = 1, 979 .has_config_tdp = 1, 980 .bclk_freq = BCLK_100MHZ, 981 .supported_cstates = CC1 | CC6 | PC2 | PC6, 982 .cst_limit = CST_LIMIT_SKX, 983 .has_msr_core_c1_res = 1, 984 .has_irtl_msrs = 1, 985 .has_cst_prewake_bit = 1, 986 .has_fixed_rapl_psys_unit = 1, 987 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 988 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, 989 }; 990 991 static const struct platform_features dmr_features = { 992 .has_msr_misc_feature_control = spr_features.has_msr_misc_feature_control, 993 .has_msr_misc_pwr_mgmt = spr_features.has_msr_misc_pwr_mgmt, 994 .has_nhm_msrs = spr_features.has_nhm_msrs, 995 .bclk_freq = spr_features.bclk_freq, 996 .supported_cstates = spr_features.supported_cstates, 997 .cst_limit = spr_features.cst_limit, 998 .has_msr_core_c1_res = spr_features.has_msr_core_c1_res, 999 .has_cst_prewake_bit = spr_features.has_cst_prewake_bit, 1000 .has_fixed_rapl_psys_unit = spr_features.has_fixed_rapl_psys_unit, 1001 .trl_msrs = spr_features.trl_msrs, 1002 .has_msr_module_c6_res_ms = 1, /* DMR has Dual-Core-Module and MC6 MSR */ 1003 .rapl_msrs = 0, /* DMR does not have RAPL MSRs */ 1004 .plr_msrs = 0, /* DMR does not have PLR MSRs */ 1005 .has_irtl_msrs = 0, /* DMR does not have IRTL MSRs */ 1006 .has_config_tdp = 0, /* DMR does not have CTDP MSRs */ 1007 }; 1008 1009 static const struct platform_features srf_features = { 1010 .has_msr_misc_feature_control = 1, 1011 .has_msr_misc_pwr_mgmt = 1, 1012 .has_nhm_msrs = 1, 1013 .has_config_tdp = 1, 1014 .bclk_freq = BCLK_100MHZ, 1015 .supported_cstates = CC1 | CC6 | PC2 | PC6, 1016 .cst_limit = CST_LIMIT_SKX, 1017 .has_msr_core_c1_res = 1, 1018 .has_msr_module_c6_res_ms = 1, 1019 .has_irtl_msrs = 1, 1020 .has_cst_prewake_bit = 1, 1021 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 1022 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, 1023 }; 1024 1025 static const struct platform_features grr_features = { 1026 .has_msr_misc_feature_control = 1, 1027 .has_msr_misc_pwr_mgmt = 1, 1028 .has_nhm_msrs = 1, 1029 .has_config_tdp = 1, 1030 .bclk_freq = BCLK_100MHZ, 1031 .supported_cstates = CC1 | CC6, 1032 .cst_limit = CST_LIMIT_SKX, 1033 .has_msr_core_c1_res = 1, 1034 .has_msr_module_c6_res_ms = 1, 1035 .has_irtl_msrs = 1, 1036 .has_cst_prewake_bit = 1, 1037 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 1038 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, 1039 }; 1040 1041 static const struct platform_features slv_features = { 1042 .has_nhm_msrs = 1, 1043 .bclk_freq = BCLK_SLV, 1044 .supported_cstates = CC1 | CC6 | PC6, 1045 .cst_limit = CST_LIMIT_SLV, 1046 .has_msr_core_c1_res = 1, 1047 .has_msr_module_c6_res_ms = 1, 1048 .has_msr_c6_demotion_policy_config = 1, 1049 .has_msr_atom_pkg_c6_residency = 1, 1050 .trl_msrs = TRL_ATOM, 1051 .rapl_msrs = RAPL_PKG | RAPL_CORE, 1052 .has_rapl_divisor = 1, 1053 .rapl_quirk_tdp = 30, 1054 }; 1055 1056 static const struct platform_features slvd_features = { 1057 .has_msr_misc_pwr_mgmt = 1, 1058 .has_nhm_msrs = 1, 1059 .bclk_freq = BCLK_SLV, 1060 .supported_cstates = CC1 | CC6 | PC3 | PC6, 1061 .cst_limit = CST_LIMIT_SLV, 1062 .has_msr_atom_pkg_c6_residency = 1, 1063 .trl_msrs = TRL_BASE, 1064 .rapl_msrs = RAPL_PKG | RAPL_CORE, 1065 .rapl_quirk_tdp = 30, 1066 }; 1067 1068 static const struct platform_features amt_features = { 1069 .has_nhm_msrs = 1, 1070 .bclk_freq = BCLK_133MHZ, 1071 .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6, 1072 .cst_limit = CST_LIMIT_AMT, 1073 .trl_msrs = TRL_BASE, 1074 }; 1075 1076 static const struct platform_features gmt_features = { 1077 .has_msr_misc_pwr_mgmt = 1, 1078 .has_nhm_msrs = 1, 1079 .bclk_freq = BCLK_100MHZ, 1080 .crystal_freq = 19200000, 1081 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 1082 .cst_limit = CST_LIMIT_GMT, 1083 .has_irtl_msrs = 1, 1084 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 1085 .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO, 1086 }; 1087 1088 static const struct platform_features gmtd_features = { 1089 .has_msr_misc_pwr_mgmt = 1, 1090 .has_nhm_msrs = 1, 1091 .bclk_freq = BCLK_100MHZ, 1092 .crystal_freq = 25000000, 1093 .supported_cstates = CC1 | CC6 | PC2 | PC6, 1094 .cst_limit = CST_LIMIT_GMT, 1095 .has_irtl_msrs = 1, 1096 .has_msr_core_c1_res = 1, 1097 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 1098 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_CORE_ENERGY_STATUS, 1099 }; 1100 1101 static const struct platform_features gmtp_features = { 1102 .has_msr_misc_pwr_mgmt = 1, 1103 .has_nhm_msrs = 1, 1104 .bclk_freq = BCLK_100MHZ, 1105 .crystal_freq = 19200000, 1106 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 1107 .cst_limit = CST_LIMIT_GMT, 1108 .has_irtl_msrs = 1, 1109 .trl_msrs = TRL_BASE, 1110 .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO, 1111 }; 1112 1113 static const struct platform_features tmt_features = { 1114 .has_msr_misc_pwr_mgmt = 1, 1115 .has_nhm_msrs = 1, 1116 .bclk_freq = BCLK_100MHZ, 1117 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 1118 .cst_limit = CST_LIMIT_GMT, 1119 .has_irtl_msrs = 1, 1120 .trl_msrs = TRL_BASE, 1121 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, 1122 .enable_tsc_tweak = 1, 1123 }; 1124 1125 static const struct platform_features tmtd_features = { 1126 .has_msr_misc_pwr_mgmt = 1, 1127 .has_nhm_msrs = 1, 1128 .bclk_freq = BCLK_100MHZ, 1129 .supported_cstates = CC1 | CC6, 1130 .cst_limit = CST_LIMIT_GMT, 1131 .has_irtl_msrs = 1, 1132 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 1133 .rapl_msrs = RAPL_PKG_ALL, 1134 }; 1135 1136 static const struct platform_features knl_features = { 1137 .has_msr_misc_pwr_mgmt = 1, 1138 .has_nhm_msrs = 1, 1139 .has_config_tdp = 1, 1140 .bclk_freq = BCLK_100MHZ, 1141 .supported_cstates = CC1 | CC6 | PC3 | PC6, 1142 .cst_limit = CST_LIMIT_KNL, 1143 .has_msr_knl_core_c6_residency = 1, 1144 .trl_msrs = TRL_KNL, 1145 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 1146 .has_fixed_rapl_unit = 1, 1147 .need_perf_multiplier = 1, 1148 }; 1149 1150 static const struct platform_features default_features = { 1151 }; 1152 1153 static const struct platform_features amd_features_with_rapl = { 1154 .rapl_msrs = RAPL_AMD_F17H, 1155 .has_per_core_rapl = 1, 1156 .rapl_quirk_tdp = 280, /* This is the max stock TDP of HEDT/Server Fam17h+ chips */ 1157 }; 1158 1159 static const struct platform_data turbostat_pdata[] = { 1160 { INTEL_NEHALEM, &nhm_features }, 1161 { INTEL_NEHALEM_G, &nhm_features }, 1162 { INTEL_NEHALEM_EP, &nhm_features }, 1163 { INTEL_NEHALEM_EX, &nhx_features }, 1164 { INTEL_WESTMERE, &nhm_features }, 1165 { INTEL_WESTMERE_EP, &nhm_features }, 1166 { INTEL_WESTMERE_EX, &nhx_features }, 1167 { INTEL_SANDYBRIDGE, &snb_features }, 1168 { INTEL_SANDYBRIDGE_X, &snx_features }, 1169 { INTEL_IVYBRIDGE, &ivb_features }, 1170 { INTEL_IVYBRIDGE_X, &ivx_features }, 1171 { INTEL_HASWELL, &hsw_features }, 1172 { INTEL_HASWELL_X, &hsx_features }, 1173 { INTEL_HASWELL_L, &hswl_features }, 1174 { INTEL_HASWELL_G, &hswg_features }, 1175 { INTEL_BROADWELL, &bdw_features }, 1176 { INTEL_BROADWELL_G, &bdwg_features }, 1177 { INTEL_BROADWELL_X, &bdx_features }, 1178 { INTEL_BROADWELL_D, &bdx_features }, 1179 { INTEL_SKYLAKE_L, &skl_features }, 1180 { INTEL_SKYLAKE, &skl_features }, 1181 { INTEL_SKYLAKE_X, &skx_features }, 1182 { INTEL_KABYLAKE_L, &skl_features }, 1183 { INTEL_KABYLAKE, &skl_features }, 1184 { INTEL_COMETLAKE, &skl_features }, 1185 { INTEL_COMETLAKE_L, &skl_features }, 1186 { INTEL_CANNONLAKE_L, &cnl_features }, 1187 { INTEL_ICELAKE_X, &icx_features }, 1188 { INTEL_ICELAKE_D, &icx_features }, 1189 { INTEL_ICELAKE_L, &cnl_features }, 1190 { INTEL_ICELAKE_NNPI, &cnl_features }, 1191 { INTEL_ROCKETLAKE, &cnl_features }, 1192 { INTEL_TIGERLAKE_L, &cnl_features }, 1193 { INTEL_TIGERLAKE, &cnl_features }, 1194 { INTEL_SAPPHIRERAPIDS_X, &spr_features }, 1195 { INTEL_EMERALDRAPIDS_X, &spr_features }, 1196 { INTEL_GRANITERAPIDS_X, &spr_features }, 1197 { INTEL_GRANITERAPIDS_D, &spr_features }, 1198 { INTEL_PANTHERCOVE_X, &dmr_features }, 1199 { INTEL_LAKEFIELD, &cnl_features }, 1200 { INTEL_ALDERLAKE, &adl_features }, 1201 { INTEL_ALDERLAKE_L, &adl_features }, 1202 { INTEL_RAPTORLAKE, &adl_features }, 1203 { INTEL_RAPTORLAKE_P, &adl_features }, 1204 { INTEL_RAPTORLAKE_S, &adl_features }, 1205 { INTEL_BARTLETTLAKE, &adl_features }, 1206 { INTEL_METEORLAKE, &adl_features }, 1207 { INTEL_METEORLAKE_L, &adl_features }, 1208 { INTEL_ARROWLAKE_H, &adl_features }, 1209 { INTEL_ARROWLAKE_U, &adl_features }, 1210 { INTEL_ARROWLAKE, &adl_features }, 1211 { INTEL_LUNARLAKE_M, &lnl_features }, 1212 { INTEL_PANTHERLAKE_L, &lnl_features }, 1213 { INTEL_ATOM_SILVERMONT, &slv_features }, 1214 { INTEL_ATOM_SILVERMONT_D, &slvd_features }, 1215 { INTEL_ATOM_AIRMONT, &amt_features }, 1216 { INTEL_ATOM_GOLDMONT, &gmt_features }, 1217 { INTEL_ATOM_GOLDMONT_D, &gmtd_features }, 1218 { INTEL_ATOM_GOLDMONT_PLUS, &gmtp_features }, 1219 { INTEL_ATOM_TREMONT_D, &tmtd_features }, 1220 { INTEL_ATOM_TREMONT, &tmt_features }, 1221 { INTEL_ATOM_TREMONT_L, &tmt_features }, 1222 { INTEL_ATOM_GRACEMONT, &adl_features }, 1223 { INTEL_ATOM_CRESTMONT_X, &srf_features }, 1224 { INTEL_ATOM_CRESTMONT, &grr_features }, 1225 { INTEL_ATOM_DARKMONT_X, &srf_features }, 1226 { INTEL_XEON_PHI_KNL, &knl_features }, 1227 { INTEL_XEON_PHI_KNM, &knl_features }, 1228 /* 1229 * Missing support for 1230 * INTEL_ICELAKE 1231 * INTEL_ATOM_SILVERMONT_MID 1232 * INTEL_ATOM_SILVERMONT_MID2 1233 * INTEL_ATOM_AIRMONT_NP 1234 */ 1235 { 0, NULL }, 1236 }; 1237 1238 static const struct platform_features *platform; 1239 1240 void probe_platform_features(unsigned int family, unsigned int model) 1241 { 1242 int i; 1243 1244 if (authentic_amd || hygon_genuine) { 1245 /* fallback to default features on unsupported models */ 1246 force_load++; 1247 if (max_extended_level >= 0x80000007) { 1248 unsigned int eax, ebx, ecx, edx; 1249 1250 __cpuid(0x80000007, eax, ebx, ecx, edx); 1251 /* RAPL (Fam 17h+) */ 1252 if ((edx & (1 << 14)) && family >= 0x17) 1253 platform = &amd_features_with_rapl; 1254 } 1255 goto end; 1256 } 1257 1258 if (!genuine_intel) 1259 goto end; 1260 1261 for (i = 0; turbostat_pdata[i].features; i++) { 1262 if (VFM_FAMILY(turbostat_pdata[i].vfm) == family && VFM_MODEL(turbostat_pdata[i].vfm) == model) { 1263 platform = turbostat_pdata[i].features; 1264 return; 1265 } 1266 } 1267 1268 end: 1269 if (force_load && !platform) { 1270 fprintf(outf, "Forced to run on unsupported platform!\n"); 1271 platform = &default_features; 1272 } 1273 1274 if (platform) 1275 return; 1276 1277 fprintf(stderr, "Unsupported platform detected.\n\tSee RUN THE LATEST VERSION on turbostat(8)\n"); 1278 exit(1); 1279 } 1280 1281 /* Model specific support End */ 1282 1283 #define TJMAX_DEFAULT 100 1284 1285 /* MSRs that are not yet in the kernel-provided header. */ 1286 #define MSR_RAPL_PWR_UNIT 0xc0010299 1287 #define MSR_CORE_ENERGY_STAT 0xc001029a 1288 #define MSR_PKG_ENERGY_STAT 0xc001029b 1289 1290 #define MAX(a, b) ((a) > (b) ? (a) : (b)) 1291 1292 int backwards_count; 1293 char *progname; 1294 1295 #define CPU_SUBSET_MAXCPUS 8192 /* need to use before probe... */ 1296 cpu_set_t *cpu_present_set, *cpu_possible_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset; 1297 size_t cpu_present_setsize, cpu_possible_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, 1298 cpu_subset_size; 1299 #define MAX_ADDED_THREAD_COUNTERS 24 1300 #define MAX_ADDED_CORE_COUNTERS 8 1301 #define MAX_ADDED_PACKAGE_COUNTERS 16 1302 #define PMT_MAX_ADDED_THREAD_COUNTERS 24 1303 #define PMT_MAX_ADDED_CORE_COUNTERS 8 1304 #define PMT_MAX_ADDED_PACKAGE_COUNTERS 16 1305 #define BITMASK_SIZE 32 1306 1307 #define ZERO_ARRAY(arr) (memset(arr, 0, sizeof(arr)) + __must_be_array(arr)) 1308 1309 /* Indexes used to map data read from perf and MSRs into global variables */ 1310 enum rapl_rci_index { 1311 RAPL_RCI_INDEX_ENERGY_PKG = 0, 1312 RAPL_RCI_INDEX_ENERGY_CORES = 1, 1313 RAPL_RCI_INDEX_DRAM = 2, 1314 RAPL_RCI_INDEX_GFX = 3, 1315 RAPL_RCI_INDEX_PKG_PERF_STATUS = 4, 1316 RAPL_RCI_INDEX_DRAM_PERF_STATUS = 5, 1317 RAPL_RCI_INDEX_CORE_ENERGY = 6, 1318 RAPL_RCI_INDEX_ENERGY_PLATFORM = 7, 1319 NUM_RAPL_COUNTERS, 1320 }; 1321 1322 enum rapl_unit { 1323 RAPL_UNIT_INVALID, 1324 RAPL_UNIT_JOULES, 1325 RAPL_UNIT_WATTS, 1326 }; 1327 1328 struct rapl_counter_info_t { 1329 unsigned long long data[NUM_RAPL_COUNTERS]; 1330 enum counter_source source[NUM_RAPL_COUNTERS]; 1331 unsigned long long flags[NUM_RAPL_COUNTERS]; 1332 double scale[NUM_RAPL_COUNTERS]; 1333 enum rapl_unit unit[NUM_RAPL_COUNTERS]; 1334 unsigned long long msr[NUM_RAPL_COUNTERS]; 1335 unsigned long long msr_mask[NUM_RAPL_COUNTERS]; 1336 int msr_shift[NUM_RAPL_COUNTERS]; 1337 1338 int fd_perf; 1339 }; 1340 1341 /* struct rapl_counter_info_t for each RAPL domain */ 1342 struct rapl_counter_info_t *rapl_counter_info_perdomain; 1343 unsigned int rapl_counter_info_perdomain_size; 1344 1345 #define RAPL_COUNTER_FLAG_PLATFORM_COUNTER (1u << 0) 1346 #define RAPL_COUNTER_FLAG_USE_MSR_SUM (1u << 1) 1347 1348 struct rapl_counter_arch_info { 1349 int feature_mask; /* Mask for testing if the counter is supported on host */ 1350 const char *perf_subsys; 1351 const char *perf_name; 1352 unsigned long long msr; 1353 unsigned long long msr_mask; 1354 int msr_shift; /* Positive mean shift right, negative mean shift left */ 1355 double *platform_rapl_msr_scale; /* Scale applied to values read by MSR (platform dependent, filled at runtime) */ 1356 unsigned int rci_index; /* Maps data from perf counters to global variables */ 1357 unsigned int bic_number; 1358 double compat_scale; /* Some counters require constant scaling to be in the same range as other, similar ones */ 1359 unsigned long long flags; 1360 }; 1361 1362 static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { 1363 { 1364 .feature_mask = RAPL_PKG, 1365 .perf_subsys = "power", 1366 .perf_name = "energy-pkg", 1367 .msr = MSR_PKG_ENERGY_STATUS, 1368 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1369 .msr_shift = 0, 1370 .platform_rapl_msr_scale = &rapl_energy_units, 1371 .rci_index = RAPL_RCI_INDEX_ENERGY_PKG, 1372 .bic_number = BIC_PkgWatt, 1373 .compat_scale = 1.0, 1374 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1375 }, 1376 { 1377 .feature_mask = RAPL_PKG, 1378 .perf_subsys = "power", 1379 .perf_name = "energy-pkg", 1380 .msr = MSR_PKG_ENERGY_STATUS, 1381 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1382 .msr_shift = 0, 1383 .platform_rapl_msr_scale = &rapl_energy_units, 1384 .rci_index = RAPL_RCI_INDEX_ENERGY_PKG, 1385 .bic_number = BIC_Pkg_J, 1386 .compat_scale = 1.0, 1387 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1388 }, 1389 { 1390 .feature_mask = RAPL_AMD_F17H, 1391 .perf_subsys = "power", 1392 .perf_name = "energy-pkg", 1393 .msr = MSR_PKG_ENERGY_STAT, 1394 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1395 .msr_shift = 0, 1396 .platform_rapl_msr_scale = &rapl_energy_units, 1397 .rci_index = RAPL_RCI_INDEX_ENERGY_PKG, 1398 .bic_number = BIC_PkgWatt, 1399 .compat_scale = 1.0, 1400 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1401 }, 1402 { 1403 .feature_mask = RAPL_AMD_F17H, 1404 .perf_subsys = "power", 1405 .perf_name = "energy-pkg", 1406 .msr = MSR_PKG_ENERGY_STAT, 1407 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1408 .msr_shift = 0, 1409 .platform_rapl_msr_scale = &rapl_energy_units, 1410 .rci_index = RAPL_RCI_INDEX_ENERGY_PKG, 1411 .bic_number = BIC_Pkg_J, 1412 .compat_scale = 1.0, 1413 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1414 }, 1415 { 1416 .feature_mask = RAPL_CORE_ENERGY_STATUS, 1417 .perf_subsys = "power", 1418 .perf_name = "energy-cores", 1419 .msr = MSR_PP0_ENERGY_STATUS, 1420 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1421 .msr_shift = 0, 1422 .platform_rapl_msr_scale = &rapl_energy_units, 1423 .rci_index = RAPL_RCI_INDEX_ENERGY_CORES, 1424 .bic_number = BIC_CorWatt, 1425 .compat_scale = 1.0, 1426 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1427 }, 1428 { 1429 .feature_mask = RAPL_CORE_ENERGY_STATUS, 1430 .perf_subsys = "power", 1431 .perf_name = "energy-cores", 1432 .msr = MSR_PP0_ENERGY_STATUS, 1433 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1434 .msr_shift = 0, 1435 .platform_rapl_msr_scale = &rapl_energy_units, 1436 .rci_index = RAPL_RCI_INDEX_ENERGY_CORES, 1437 .bic_number = BIC_Cor_J, 1438 .compat_scale = 1.0, 1439 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1440 }, 1441 { 1442 .feature_mask = RAPL_DRAM, 1443 .perf_subsys = "power", 1444 .perf_name = "energy-ram", 1445 .msr = MSR_DRAM_ENERGY_STATUS, 1446 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1447 .msr_shift = 0, 1448 .platform_rapl_msr_scale = &rapl_dram_energy_units, 1449 .rci_index = RAPL_RCI_INDEX_DRAM, 1450 .bic_number = BIC_RAMWatt, 1451 .compat_scale = 1.0, 1452 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1453 }, 1454 { 1455 .feature_mask = RAPL_DRAM, 1456 .perf_subsys = "power", 1457 .perf_name = "energy-ram", 1458 .msr = MSR_DRAM_ENERGY_STATUS, 1459 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1460 .msr_shift = 0, 1461 .platform_rapl_msr_scale = &rapl_dram_energy_units, 1462 .rci_index = RAPL_RCI_INDEX_DRAM, 1463 .bic_number = BIC_RAM_J, 1464 .compat_scale = 1.0, 1465 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1466 }, 1467 { 1468 .feature_mask = RAPL_GFX, 1469 .perf_subsys = "power", 1470 .perf_name = "energy-gpu", 1471 .msr = MSR_PP1_ENERGY_STATUS, 1472 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1473 .msr_shift = 0, 1474 .platform_rapl_msr_scale = &rapl_energy_units, 1475 .rci_index = RAPL_RCI_INDEX_GFX, 1476 .bic_number = BIC_GFXWatt, 1477 .compat_scale = 1.0, 1478 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1479 }, 1480 { 1481 .feature_mask = RAPL_GFX, 1482 .perf_subsys = "power", 1483 .perf_name = "energy-gpu", 1484 .msr = MSR_PP1_ENERGY_STATUS, 1485 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1486 .msr_shift = 0, 1487 .platform_rapl_msr_scale = &rapl_energy_units, 1488 .rci_index = RAPL_RCI_INDEX_GFX, 1489 .bic_number = BIC_GFX_J, 1490 .compat_scale = 1.0, 1491 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1492 }, 1493 { 1494 .feature_mask = RAPL_PKG_PERF_STATUS, 1495 .perf_subsys = NULL, 1496 .perf_name = NULL, 1497 .msr = MSR_PKG_PERF_STATUS, 1498 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1499 .msr_shift = 0, 1500 .platform_rapl_msr_scale = &rapl_time_units, 1501 .rci_index = RAPL_RCI_INDEX_PKG_PERF_STATUS, 1502 .bic_number = BIC_PKG__, 1503 .compat_scale = 100.0, 1504 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1505 }, 1506 { 1507 .feature_mask = RAPL_DRAM_PERF_STATUS, 1508 .perf_subsys = NULL, 1509 .perf_name = NULL, 1510 .msr = MSR_DRAM_PERF_STATUS, 1511 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1512 .msr_shift = 0, 1513 .platform_rapl_msr_scale = &rapl_time_units, 1514 .rci_index = RAPL_RCI_INDEX_DRAM_PERF_STATUS, 1515 .bic_number = BIC_RAM__, 1516 .compat_scale = 100.0, 1517 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1518 }, 1519 { 1520 .feature_mask = RAPL_AMD_F17H, 1521 .perf_subsys = NULL, 1522 .perf_name = NULL, 1523 .msr = MSR_CORE_ENERGY_STAT, 1524 .msr_mask = 0xFFFFFFFF, 1525 .msr_shift = 0, 1526 .platform_rapl_msr_scale = &rapl_energy_units, 1527 .rci_index = RAPL_RCI_INDEX_CORE_ENERGY, 1528 .bic_number = BIC_CorWatt, 1529 .compat_scale = 1.0, 1530 .flags = 0, 1531 }, 1532 { 1533 .feature_mask = RAPL_AMD_F17H, 1534 .perf_subsys = NULL, 1535 .perf_name = NULL, 1536 .msr = MSR_CORE_ENERGY_STAT, 1537 .msr_mask = 0xFFFFFFFF, 1538 .msr_shift = 0, 1539 .platform_rapl_msr_scale = &rapl_energy_units, 1540 .rci_index = RAPL_RCI_INDEX_CORE_ENERGY, 1541 .bic_number = BIC_Cor_J, 1542 .compat_scale = 1.0, 1543 .flags = 0, 1544 }, 1545 { 1546 .feature_mask = RAPL_PSYS, 1547 .perf_subsys = "power", 1548 .perf_name = "energy-psys", 1549 .msr = MSR_PLATFORM_ENERGY_STATUS, 1550 .msr_mask = 0x00000000FFFFFFFF, 1551 .msr_shift = 0, 1552 .platform_rapl_msr_scale = &rapl_psys_energy_units, 1553 .rci_index = RAPL_RCI_INDEX_ENERGY_PLATFORM, 1554 .bic_number = BIC_SysWatt, 1555 .compat_scale = 1.0, 1556 .flags = RAPL_COUNTER_FLAG_PLATFORM_COUNTER | RAPL_COUNTER_FLAG_USE_MSR_SUM, 1557 }, 1558 { 1559 .feature_mask = RAPL_PSYS, 1560 .perf_subsys = "power", 1561 .perf_name = "energy-psys", 1562 .msr = MSR_PLATFORM_ENERGY_STATUS, 1563 .msr_mask = 0x00000000FFFFFFFF, 1564 .msr_shift = 0, 1565 .platform_rapl_msr_scale = &rapl_psys_energy_units, 1566 .rci_index = RAPL_RCI_INDEX_ENERGY_PLATFORM, 1567 .bic_number = BIC_Sys_J, 1568 .compat_scale = 1.0, 1569 .flags = RAPL_COUNTER_FLAG_PLATFORM_COUNTER | RAPL_COUNTER_FLAG_USE_MSR_SUM, 1570 }, 1571 }; 1572 1573 struct rapl_counter { 1574 unsigned long long raw_value; 1575 enum rapl_unit unit; 1576 double scale; 1577 }; 1578 1579 /* Indexes used to map data read from perf and MSRs into global variables */ 1580 enum ccstate_rci_index { 1581 CCSTATE_RCI_INDEX_C1_RESIDENCY = 0, 1582 CCSTATE_RCI_INDEX_C3_RESIDENCY = 1, 1583 CCSTATE_RCI_INDEX_C6_RESIDENCY = 2, 1584 CCSTATE_RCI_INDEX_C7_RESIDENCY = 3, 1585 PCSTATE_RCI_INDEX_C2_RESIDENCY = 4, 1586 PCSTATE_RCI_INDEX_C3_RESIDENCY = 5, 1587 PCSTATE_RCI_INDEX_C6_RESIDENCY = 6, 1588 PCSTATE_RCI_INDEX_C7_RESIDENCY = 7, 1589 PCSTATE_RCI_INDEX_C8_RESIDENCY = 8, 1590 PCSTATE_RCI_INDEX_C9_RESIDENCY = 9, 1591 PCSTATE_RCI_INDEX_C10_RESIDENCY = 10, 1592 NUM_CSTATE_COUNTERS, 1593 }; 1594 1595 struct cstate_counter_info_t { 1596 unsigned long long data[NUM_CSTATE_COUNTERS]; 1597 enum counter_source source[NUM_CSTATE_COUNTERS]; 1598 unsigned long long msr[NUM_CSTATE_COUNTERS]; 1599 int fd_perf_core; 1600 int fd_perf_pkg; 1601 }; 1602 1603 struct cstate_counter_info_t *ccstate_counter_info; 1604 unsigned int ccstate_counter_info_size; 1605 1606 #define CSTATE_COUNTER_FLAG_COLLECT_PER_CORE (1u << 0) 1607 #define CSTATE_COUNTER_FLAG_COLLECT_PER_THREAD ((1u << 1) | CSTATE_COUNTER_FLAG_COLLECT_PER_CORE) 1608 #define CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY (1u << 2) 1609 1610 struct cstate_counter_arch_info { 1611 int feature_mask; /* Mask for testing if the counter is supported on host */ 1612 const char *perf_subsys; 1613 const char *perf_name; 1614 unsigned long long msr; 1615 unsigned int rci_index; /* Maps data from perf counters to global variables */ 1616 unsigned int bic_number; 1617 unsigned long long flags; 1618 int pkg_cstate_limit; 1619 }; 1620 1621 static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { 1622 { 1623 .feature_mask = CC1, 1624 .perf_subsys = "cstate_core", 1625 .perf_name = "c1-residency", 1626 .msr = MSR_CORE_C1_RES, 1627 .rci_index = CCSTATE_RCI_INDEX_C1_RESIDENCY, 1628 .bic_number = BIC_CPU_c1, 1629 .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_THREAD, 1630 .pkg_cstate_limit = 0, 1631 }, 1632 { 1633 .feature_mask = CC3, 1634 .perf_subsys = "cstate_core", 1635 .perf_name = "c3-residency", 1636 .msr = MSR_CORE_C3_RESIDENCY, 1637 .rci_index = CCSTATE_RCI_INDEX_C3_RESIDENCY, 1638 .bic_number = BIC_CPU_c3, 1639 .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY, 1640 .pkg_cstate_limit = 0, 1641 }, 1642 { 1643 .feature_mask = CC6, 1644 .perf_subsys = "cstate_core", 1645 .perf_name = "c6-residency", 1646 .msr = MSR_CORE_C6_RESIDENCY, 1647 .rci_index = CCSTATE_RCI_INDEX_C6_RESIDENCY, 1648 .bic_number = BIC_CPU_c6, 1649 .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY, 1650 .pkg_cstate_limit = 0, 1651 }, 1652 { 1653 .feature_mask = CC7, 1654 .perf_subsys = "cstate_core", 1655 .perf_name = "c7-residency", 1656 .msr = MSR_CORE_C7_RESIDENCY, 1657 .rci_index = CCSTATE_RCI_INDEX_C7_RESIDENCY, 1658 .bic_number = BIC_CPU_c7, 1659 .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY, 1660 .pkg_cstate_limit = 0, 1661 }, 1662 { 1663 .feature_mask = PC2, 1664 .perf_subsys = "cstate_pkg", 1665 .perf_name = "c2-residency", 1666 .msr = MSR_PKG_C2_RESIDENCY, 1667 .rci_index = PCSTATE_RCI_INDEX_C2_RESIDENCY, 1668 .bic_number = BIC_Pkgpc2, 1669 .flags = 0, 1670 .pkg_cstate_limit = PCL__2, 1671 }, 1672 { 1673 .feature_mask = PC3, 1674 .perf_subsys = "cstate_pkg", 1675 .perf_name = "c3-residency", 1676 .msr = MSR_PKG_C3_RESIDENCY, 1677 .rci_index = PCSTATE_RCI_INDEX_C3_RESIDENCY, 1678 .bic_number = BIC_Pkgpc3, 1679 .flags = 0, 1680 .pkg_cstate_limit = PCL__3, 1681 }, 1682 { 1683 .feature_mask = PC6, 1684 .perf_subsys = "cstate_pkg", 1685 .perf_name = "c6-residency", 1686 .msr = MSR_PKG_C6_RESIDENCY, 1687 .rci_index = PCSTATE_RCI_INDEX_C6_RESIDENCY, 1688 .bic_number = BIC_Pkgpc6, 1689 .flags = 0, 1690 .pkg_cstate_limit = PCL__6, 1691 }, 1692 { 1693 .feature_mask = PC7, 1694 .perf_subsys = "cstate_pkg", 1695 .perf_name = "c7-residency", 1696 .msr = MSR_PKG_C7_RESIDENCY, 1697 .rci_index = PCSTATE_RCI_INDEX_C7_RESIDENCY, 1698 .bic_number = BIC_Pkgpc7, 1699 .flags = 0, 1700 .pkg_cstate_limit = PCL__7, 1701 }, 1702 { 1703 .feature_mask = PC8, 1704 .perf_subsys = "cstate_pkg", 1705 .perf_name = "c8-residency", 1706 .msr = MSR_PKG_C8_RESIDENCY, 1707 .rci_index = PCSTATE_RCI_INDEX_C8_RESIDENCY, 1708 .bic_number = BIC_Pkgpc8, 1709 .flags = 0, 1710 .pkg_cstate_limit = PCL__8, 1711 }, 1712 { 1713 .feature_mask = PC9, 1714 .perf_subsys = "cstate_pkg", 1715 .perf_name = "c9-residency", 1716 .msr = MSR_PKG_C9_RESIDENCY, 1717 .rci_index = PCSTATE_RCI_INDEX_C9_RESIDENCY, 1718 .bic_number = BIC_Pkgpc9, 1719 .flags = 0, 1720 .pkg_cstate_limit = PCL__9, 1721 }, 1722 { 1723 .feature_mask = PC10, 1724 .perf_subsys = "cstate_pkg", 1725 .perf_name = "c10-residency", 1726 .msr = MSR_PKG_C10_RESIDENCY, 1727 .rci_index = PCSTATE_RCI_INDEX_C10_RESIDENCY, 1728 .bic_number = BIC_Pkgpc10, 1729 .flags = 0, 1730 .pkg_cstate_limit = PCL_10, 1731 }, 1732 }; 1733 1734 /* Indexes used to map data read from perf and MSRs into global variables */ 1735 enum msr_rci_index { 1736 MSR_RCI_INDEX_APERF = 0, 1737 MSR_RCI_INDEX_MPERF = 1, 1738 MSR_RCI_INDEX_SMI = 2, 1739 NUM_MSR_COUNTERS, 1740 }; 1741 1742 struct msr_counter_info_t { 1743 unsigned long long data[NUM_MSR_COUNTERS]; 1744 enum counter_source source[NUM_MSR_COUNTERS]; 1745 unsigned long long msr[NUM_MSR_COUNTERS]; 1746 unsigned long long msr_mask[NUM_MSR_COUNTERS]; 1747 int fd_perf; 1748 }; 1749 1750 struct msr_counter_info_t *msr_counter_info; 1751 unsigned int msr_counter_info_size; 1752 1753 struct msr_counter_arch_info { 1754 const char *perf_subsys; 1755 const char *perf_name; 1756 unsigned long long msr; 1757 unsigned long long msr_mask; 1758 unsigned int rci_index; /* Maps data from perf counters to global variables */ 1759 bool needed; 1760 bool present; 1761 }; 1762 1763 enum msr_arch_info_index { 1764 MSR_ARCH_INFO_APERF_INDEX = 0, 1765 MSR_ARCH_INFO_MPERF_INDEX = 1, 1766 MSR_ARCH_INFO_SMI_INDEX = 2, 1767 }; 1768 1769 static struct msr_counter_arch_info msr_counter_arch_infos[] = { 1770 [MSR_ARCH_INFO_APERF_INDEX] = { 1771 .perf_subsys = "msr", 1772 .perf_name = "aperf", 1773 .msr = MSR_IA32_APERF, 1774 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1775 .rci_index = MSR_RCI_INDEX_APERF, 1776 }, 1777 1778 [MSR_ARCH_INFO_MPERF_INDEX] = { 1779 .perf_subsys = "msr", 1780 .perf_name = "mperf", 1781 .msr = MSR_IA32_MPERF, 1782 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1783 .rci_index = MSR_RCI_INDEX_MPERF, 1784 }, 1785 1786 [MSR_ARCH_INFO_SMI_INDEX] = { 1787 .perf_subsys = "msr", 1788 .perf_name = "smi", 1789 .msr = MSR_SMI_COUNT, 1790 .msr_mask = 0xFFFFFFFF, 1791 .rci_index = MSR_RCI_INDEX_SMI, 1792 }, 1793 }; 1794 1795 /* Can be redefined when compiling, useful for testing. */ 1796 #ifndef SYSFS_TELEM_PATH 1797 #define SYSFS_TELEM_PATH "/sys/class/intel_pmt" 1798 #endif 1799 1800 #define PMT_COUNTER_MTL_DC6_OFFSET 120 1801 #define PMT_COUNTER_MTL_DC6_LSB 0 1802 #define PMT_COUNTER_MTL_DC6_MSB 63 1803 #define PMT_MTL_DC6_GUID 0x1a067102 1804 #define PMT_MTL_DC6_SEQ 0 1805 1806 #define PMT_COUNTER_CWF_MC1E_OFFSET_BASE 20936 1807 #define PMT_COUNTER_CWF_MC1E_OFFSET_INCREMENT 24 1808 #define PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE 12 1809 #define PMT_COUNTER_CWF_CPUS_PER_MODULE 4 1810 #define PMT_COUNTER_CWF_MC1E_LSB 0 1811 #define PMT_COUNTER_CWF_MC1E_MSB 63 1812 #define PMT_CWF_MC1E_GUID 0x14421519 1813 1814 unsigned long long tcore_clock_freq_hz = 800000000; 1815 1816 #define PMT_COUNTER_NAME_SIZE_BYTES 16 1817 #define PMT_COUNTER_TYPE_NAME_SIZE_BYTES 32 1818 1819 struct pmt_mmio { 1820 struct pmt_mmio *next; 1821 1822 unsigned int guid; 1823 unsigned int size; 1824 1825 /* Base pointer to the mmaped memory. */ 1826 void *mmio_base; 1827 1828 /* 1829 * Offset to be applied to the mmio_base 1830 * to get the beginning of the PMT counters for given GUID. 1831 */ 1832 unsigned long pmt_offset; 1833 } *pmt_mmios; 1834 1835 enum pmt_datatype { 1836 PMT_TYPE_RAW, 1837 PMT_TYPE_XTAL_TIME, 1838 PMT_TYPE_TCORE_CLOCK, 1839 }; 1840 1841 struct pmt_domain_info { 1842 /* 1843 * Pointer to the MMIO obtained by applying a counter offset 1844 * to the mmio_base of the mmaped region for the given GUID. 1845 * 1846 * This is where to read the raw value of the counter from. 1847 */ 1848 unsigned long *pcounter; 1849 }; 1850 1851 struct pmt_counter { 1852 struct pmt_counter *next; 1853 1854 /* PMT metadata */ 1855 char name[PMT_COUNTER_NAME_SIZE_BYTES]; 1856 enum pmt_datatype type; 1857 enum counter_scope scope; 1858 unsigned int lsb; 1859 unsigned int msb; 1860 1861 /* BIC-like metadata */ 1862 enum counter_format format; 1863 1864 unsigned int num_domains; 1865 struct pmt_domain_info *domains; 1866 }; 1867 1868 /* 1869 * PMT telemetry directory iterator. 1870 * Used to iterate telemetry files in sysfs in correct order. 1871 */ 1872 struct pmt_diriter_t { 1873 DIR *dir; 1874 struct dirent **namelist; 1875 unsigned int num_names; 1876 unsigned int current_name_idx; 1877 }; 1878 1879 int pmt_telemdir_filter(const struct dirent *e) 1880 { 1881 unsigned int dummy; 1882 1883 return sscanf(e->d_name, "telem%u", &dummy); 1884 } 1885 1886 int pmt_telemdir_sort(const struct dirent **a, const struct dirent **b) 1887 { 1888 unsigned int aidx = 0, bidx = 0; 1889 1890 sscanf((*a)->d_name, "telem%u", &aidx); 1891 sscanf((*b)->d_name, "telem%u", &bidx); 1892 1893 return aidx >= bidx; 1894 } 1895 1896 const struct dirent *pmt_diriter_next(struct pmt_diriter_t *iter) 1897 { 1898 const struct dirent *ret = NULL; 1899 1900 if (!iter->dir) 1901 return NULL; 1902 1903 if (iter->current_name_idx >= iter->num_names) 1904 return NULL; 1905 1906 ret = iter->namelist[iter->current_name_idx]; 1907 ++iter->current_name_idx; 1908 1909 return ret; 1910 } 1911 1912 const struct dirent *pmt_diriter_begin(struct pmt_diriter_t *iter, const char *pmt_root_path) 1913 { 1914 int num_names = iter->num_names; 1915 1916 if (!iter->dir) { 1917 iter->dir = opendir(pmt_root_path); 1918 if (iter->dir == NULL) 1919 return NULL; 1920 1921 num_names = scandir(pmt_root_path, &iter->namelist, pmt_telemdir_filter, pmt_telemdir_sort); 1922 if (num_names == -1) 1923 return NULL; 1924 } 1925 1926 iter->current_name_idx = 0; 1927 iter->num_names = num_names; 1928 1929 return pmt_diriter_next(iter); 1930 } 1931 1932 void pmt_diriter_init(struct pmt_diriter_t *iter) 1933 { 1934 memset(iter, 0, sizeof(*iter)); 1935 } 1936 1937 void pmt_diriter_remove(struct pmt_diriter_t *iter) 1938 { 1939 if (iter->namelist) { 1940 for (unsigned int i = 0; i < iter->num_names; i++) { 1941 free(iter->namelist[i]); 1942 iter->namelist[i] = NULL; 1943 } 1944 } 1945 1946 free(iter->namelist); 1947 iter->namelist = NULL; 1948 iter->num_names = 0; 1949 iter->current_name_idx = 0; 1950 1951 closedir(iter->dir); 1952 iter->dir = NULL; 1953 } 1954 1955 unsigned int pmt_counter_get_width(const struct pmt_counter *p) 1956 { 1957 return (p->msb - p->lsb) + 1; 1958 } 1959 1960 void pmt_counter_resize_(struct pmt_counter *pcounter, unsigned int new_size) 1961 { 1962 struct pmt_domain_info *new_mem; 1963 1964 new_mem = (struct pmt_domain_info *)reallocarray(pcounter->domains, new_size, sizeof(*pcounter->domains)); 1965 if (!new_mem) { 1966 fprintf(stderr, "%s: failed to allocate memory for PMT counters\n", __func__); 1967 exit(1); 1968 } 1969 1970 /* Zero initialize just allocated memory. */ 1971 const size_t num_new_domains = new_size - pcounter->num_domains; 1972 1973 memset(&new_mem[pcounter->num_domains], 0, num_new_domains * sizeof(*pcounter->domains)); 1974 1975 pcounter->num_domains = new_size; 1976 pcounter->domains = new_mem; 1977 } 1978 1979 void pmt_counter_resize(struct pmt_counter *pcounter, unsigned int new_size) 1980 { 1981 /* 1982 * Allocate more memory ahead of time. 1983 * 1984 * Always allocate space for at least 8 elements 1985 * and double the size when growing. 1986 */ 1987 if (new_size < 8) 1988 new_size = 8; 1989 new_size = MAX(new_size, pcounter->num_domains * 2); 1990 1991 pmt_counter_resize_(pcounter, new_size); 1992 } 1993 1994 struct thread_data { 1995 struct timeval tv_begin; 1996 struct timeval tv_end; 1997 struct timeval tv_delta; 1998 unsigned long long tsc; 1999 unsigned long long aperf; 2000 unsigned long long mperf; 2001 unsigned long long c1; 2002 unsigned long long instr_count; 2003 unsigned long long irq_count; 2004 unsigned long long nmi_count; 2005 unsigned int smi_count; 2006 unsigned int cpu_id; 2007 unsigned int apic_id; 2008 unsigned int x2apic_id; 2009 unsigned int flags; 2010 bool is_atom; 2011 unsigned long long counter[MAX_ADDED_THREAD_COUNTERS]; 2012 unsigned long long perf_counter[MAX_ADDED_THREAD_COUNTERS]; 2013 unsigned long long pmt_counter[PMT_MAX_ADDED_THREAD_COUNTERS]; 2014 } *thread_even, *thread_odd; 2015 2016 struct core_data { 2017 int base_cpu; 2018 unsigned long long c3; 2019 unsigned long long c6; 2020 unsigned long long c7; 2021 unsigned long long mc6_us; /* duplicate as per-core for now, even though per module */ 2022 unsigned int core_temp_c; 2023 struct rapl_counter core_energy; /* MSR_CORE_ENERGY_STAT */ 2024 unsigned int core_id; 2025 unsigned long long core_throt_cnt; 2026 unsigned long long counter[MAX_ADDED_CORE_COUNTERS]; 2027 unsigned long long perf_counter[MAX_ADDED_CORE_COUNTERS]; 2028 unsigned long long pmt_counter[PMT_MAX_ADDED_CORE_COUNTERS]; 2029 } *core_even, *core_odd; 2030 2031 struct pkg_data { 2032 int base_cpu; 2033 unsigned long long pc2; 2034 unsigned long long pc3; 2035 unsigned long long pc6; 2036 unsigned long long pc7; 2037 unsigned long long pc8; 2038 unsigned long long pc9; 2039 unsigned long long pc10; 2040 long long cpu_lpi; 2041 long long sys_lpi; 2042 unsigned long long pkg_wtd_core_c0; 2043 unsigned long long pkg_any_core_c0; 2044 unsigned long long pkg_any_gfxe_c0; 2045 unsigned long long pkg_both_core_gfxe_c0; 2046 long long gfx_rc6_ms; 2047 unsigned int gfx_mhz; 2048 unsigned int gfx_act_mhz; 2049 long long sam_mc6_ms; 2050 unsigned int sam_mhz; 2051 unsigned int sam_act_mhz; 2052 unsigned int package_id; 2053 struct rapl_counter energy_pkg; /* MSR_PKG_ENERGY_STATUS */ 2054 struct rapl_counter energy_dram; /* MSR_DRAM_ENERGY_STATUS */ 2055 struct rapl_counter energy_cores; /* MSR_PP0_ENERGY_STATUS */ 2056 struct rapl_counter energy_gfx; /* MSR_PP1_ENERGY_STATUS */ 2057 struct rapl_counter rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */ 2058 struct rapl_counter rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */ 2059 unsigned int pkg_temp_c; 2060 unsigned int uncore_mhz; 2061 unsigned long long die_c6; 2062 unsigned long long counter[MAX_ADDED_PACKAGE_COUNTERS]; 2063 unsigned long long perf_counter[MAX_ADDED_PACKAGE_COUNTERS]; 2064 unsigned long long pmt_counter[PMT_MAX_ADDED_PACKAGE_COUNTERS]; 2065 } *package_even, *package_odd; 2066 2067 #define ODD_COUNTERS thread_odd, core_odd, package_odd 2068 #define EVEN_COUNTERS thread_even, core_even, package_even 2069 2070 #define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no) \ 2071 ((thread_base) + \ 2072 ((pkg_no) * \ 2073 topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \ 2074 ((node_no) * topo.cores_per_node * topo.threads_per_core) + \ 2075 ((core_no) * topo.threads_per_core) + \ 2076 (thread_no)) 2077 2078 #define GET_CORE(core_base, core_no, node_no, pkg_no) \ 2079 ((core_base) + \ 2080 ((pkg_no) * topo.nodes_per_pkg * topo.cores_per_node) + \ 2081 ((node_no) * topo.cores_per_node) + \ 2082 (core_no)) 2083 2084 /* 2085 * The accumulated sum of MSR is defined as a monotonic 2086 * increasing MSR, it will be accumulated periodically, 2087 * despite its register's bit width. 2088 */ 2089 enum { 2090 IDX_PKG_ENERGY, 2091 IDX_DRAM_ENERGY, 2092 IDX_PP0_ENERGY, 2093 IDX_PP1_ENERGY, 2094 IDX_PKG_PERF, 2095 IDX_DRAM_PERF, 2096 IDX_PSYS_ENERGY, 2097 IDX_COUNT, 2098 }; 2099 2100 int get_msr_sum(int cpu, off_t offset, unsigned long long *msr); 2101 2102 struct msr_sum_array { 2103 /* get_msr_sum() = sum + (get_msr() - last) */ 2104 struct { 2105 /*The accumulated MSR value is updated by the timer */ 2106 unsigned long long sum; 2107 /*The MSR footprint recorded in last timer */ 2108 unsigned long long last; 2109 } entries[IDX_COUNT]; 2110 }; 2111 2112 /* The percpu MSR sum array.*/ 2113 struct msr_sum_array *per_cpu_msr_sum; 2114 2115 off_t idx_to_offset(int idx) 2116 { 2117 off_t offset; 2118 2119 switch (idx) { 2120 case IDX_PKG_ENERGY: 2121 if (platform->rapl_msrs & RAPL_AMD_F17H) 2122 offset = MSR_PKG_ENERGY_STAT; 2123 else 2124 offset = MSR_PKG_ENERGY_STATUS; 2125 break; 2126 case IDX_DRAM_ENERGY: 2127 offset = MSR_DRAM_ENERGY_STATUS; 2128 break; 2129 case IDX_PP0_ENERGY: 2130 offset = MSR_PP0_ENERGY_STATUS; 2131 break; 2132 case IDX_PP1_ENERGY: 2133 offset = MSR_PP1_ENERGY_STATUS; 2134 break; 2135 case IDX_PKG_PERF: 2136 offset = MSR_PKG_PERF_STATUS; 2137 break; 2138 case IDX_DRAM_PERF: 2139 offset = MSR_DRAM_PERF_STATUS; 2140 break; 2141 case IDX_PSYS_ENERGY: 2142 offset = MSR_PLATFORM_ENERGY_STATUS; 2143 break; 2144 default: 2145 offset = -1; 2146 } 2147 return offset; 2148 } 2149 2150 int offset_to_idx(off_t offset) 2151 { 2152 int idx; 2153 2154 switch (offset) { 2155 case MSR_PKG_ENERGY_STATUS: 2156 case MSR_PKG_ENERGY_STAT: 2157 idx = IDX_PKG_ENERGY; 2158 break; 2159 case MSR_DRAM_ENERGY_STATUS: 2160 idx = IDX_DRAM_ENERGY; 2161 break; 2162 case MSR_PP0_ENERGY_STATUS: 2163 idx = IDX_PP0_ENERGY; 2164 break; 2165 case MSR_PP1_ENERGY_STATUS: 2166 idx = IDX_PP1_ENERGY; 2167 break; 2168 case MSR_PKG_PERF_STATUS: 2169 idx = IDX_PKG_PERF; 2170 break; 2171 case MSR_DRAM_PERF_STATUS: 2172 idx = IDX_DRAM_PERF; 2173 break; 2174 case MSR_PLATFORM_ENERGY_STATUS: 2175 idx = IDX_PSYS_ENERGY; 2176 break; 2177 default: 2178 idx = -1; 2179 } 2180 return idx; 2181 } 2182 2183 int idx_valid(int idx) 2184 { 2185 switch (idx) { 2186 case IDX_PKG_ENERGY: 2187 return platform->rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H); 2188 case IDX_DRAM_ENERGY: 2189 return platform->rapl_msrs & RAPL_DRAM; 2190 case IDX_PP0_ENERGY: 2191 return platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS; 2192 case IDX_PP1_ENERGY: 2193 return platform->rapl_msrs & RAPL_GFX; 2194 case IDX_PKG_PERF: 2195 return platform->rapl_msrs & RAPL_PKG_PERF_STATUS; 2196 case IDX_DRAM_PERF: 2197 return platform->rapl_msrs & RAPL_DRAM_PERF_STATUS; 2198 case IDX_PSYS_ENERGY: 2199 return platform->rapl_msrs & RAPL_PSYS; 2200 default: 2201 return 0; 2202 } 2203 } 2204 2205 struct sys_counters { 2206 /* MSR added counters */ 2207 unsigned int added_thread_counters; 2208 unsigned int added_core_counters; 2209 unsigned int added_package_counters; 2210 struct msr_counter *tp; 2211 struct msr_counter *cp; 2212 struct msr_counter *pp; 2213 2214 /* perf added counters */ 2215 unsigned int added_thread_perf_counters; 2216 unsigned int added_core_perf_counters; 2217 unsigned int added_package_perf_counters; 2218 struct perf_counter_info *perf_tp; 2219 struct perf_counter_info *perf_cp; 2220 struct perf_counter_info *perf_pp; 2221 2222 struct pmt_counter *pmt_tp; 2223 struct pmt_counter *pmt_cp; 2224 struct pmt_counter *pmt_pp; 2225 } sys; 2226 2227 static size_t free_msr_counters_(struct msr_counter **pp) 2228 { 2229 struct msr_counter *p = NULL; 2230 size_t num_freed = 0; 2231 2232 while (*pp) { 2233 p = *pp; 2234 2235 if (p->msr_num != 0) { 2236 *pp = p->next; 2237 2238 free(p); 2239 ++num_freed; 2240 2241 continue; 2242 } 2243 2244 pp = &p->next; 2245 } 2246 2247 return num_freed; 2248 } 2249 2250 /* 2251 * Free all added counters accessed via msr. 2252 */ 2253 static void free_sys_msr_counters(void) 2254 { 2255 /* Thread counters */ 2256 sys.added_thread_counters -= free_msr_counters_(&sys.tp); 2257 2258 /* Core counters */ 2259 sys.added_core_counters -= free_msr_counters_(&sys.cp); 2260 2261 /* Package counters */ 2262 sys.added_package_counters -= free_msr_counters_(&sys.pp); 2263 } 2264 2265 struct system_summary { 2266 struct thread_data threads; 2267 struct core_data cores; 2268 struct pkg_data packages; 2269 } average; 2270 2271 struct platform_counters { 2272 struct rapl_counter energy_psys; /* MSR_PLATFORM_ENERGY_STATUS */ 2273 } platform_counters_odd, platform_counters_even; 2274 2275 struct cpu_topology { 2276 int physical_package_id; 2277 int die_id; 2278 int l3_id; 2279 int logical_cpu_id; 2280 int physical_node_id; 2281 int logical_node_id; /* 0-based count within the package */ 2282 int physical_core_id; 2283 int thread_id; 2284 int type; 2285 cpu_set_t *put_ids; /* Processing Unit/Thread IDs */ 2286 } *cpus; 2287 2288 struct topo_params { 2289 int num_packages; 2290 int num_die; 2291 int num_cpus; 2292 int num_cores; 2293 int allowed_packages; 2294 int allowed_cpus; 2295 int allowed_cores; 2296 int max_cpu_num; 2297 int max_core_id; 2298 int max_package_id; 2299 int max_die_id; 2300 int max_l3_id; 2301 int max_node_num; 2302 int nodes_per_pkg; 2303 int cores_per_node; 2304 int threads_per_core; 2305 } topo; 2306 2307 struct timeval tv_even, tv_odd, tv_delta; 2308 2309 int *irq_column_2_cpu; /* /proc/interrupts column numbers */ 2310 int *irqs_per_cpu; /* indexed by cpu_num */ 2311 int *nmi_per_cpu; /* indexed by cpu_num */ 2312 2313 void setup_all_buffers(bool startup); 2314 2315 char *sys_lpi_file; 2316 char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us"; 2317 char *sys_lpi_file_debugfs = "/sys/kernel/debug/pmc_core/slp_s0_residency_usec"; 2318 2319 int cpu_is_not_present(int cpu) 2320 { 2321 return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set); 2322 } 2323 2324 int cpu_is_not_allowed(int cpu) 2325 { 2326 return !CPU_ISSET_S(cpu, cpu_allowed_setsize, cpu_allowed_set); 2327 } 2328 2329 /* 2330 * run func(thread, core, package) in topology order 2331 * skip non-present cpus 2332 */ 2333 2334 #define PER_THREAD_PARAMS struct thread_data *t, struct core_data *c, struct pkg_data *p 2335 2336 int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pkg_data *), 2337 struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base) 2338 { 2339 int retval, pkg_no, core_no, thread_no, node_no; 2340 2341 retval = 0; 2342 2343 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { 2344 for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) { 2345 for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { 2346 for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) { 2347 struct thread_data *t; 2348 struct core_data *c; 2349 2350 t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no); 2351 2352 if (cpu_is_not_allowed(t->cpu_id)) 2353 continue; 2354 2355 c = GET_CORE(core_base, core_no, node_no, pkg_no); 2356 2357 retval |= func(t, c, &pkg_base[pkg_no]); 2358 } 2359 } 2360 } 2361 } 2362 return retval; 2363 } 2364 2365 int is_cpu_first_thread_in_core(PER_THREAD_PARAMS) 2366 { 2367 UNUSED(p); 2368 2369 return ((int)t->cpu_id == c->base_cpu || c->base_cpu < 0); 2370 } 2371 2372 int is_cpu_first_core_in_package(PER_THREAD_PARAMS) 2373 { 2374 UNUSED(c); 2375 2376 return ((int)t->cpu_id == p->base_cpu || p->base_cpu < 0); 2377 } 2378 2379 int is_cpu_first_thread_in_package(PER_THREAD_PARAMS) 2380 { 2381 return is_cpu_first_thread_in_core(t, c, p) && is_cpu_first_core_in_package(t, c, p); 2382 } 2383 2384 int cpu_migrate(int cpu) 2385 { 2386 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); 2387 CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set); 2388 if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1) 2389 return -1; 2390 else 2391 return 0; 2392 } 2393 2394 int get_msr_fd(int cpu) 2395 { 2396 char pathname[32]; 2397 int fd; 2398 2399 fd = fd_percpu[cpu]; 2400 2401 if (fd) 2402 return fd; 2403 #if defined(ANDROID) 2404 sprintf(pathname, "/dev/msr%d", cpu); 2405 #else 2406 sprintf(pathname, "/dev/cpu/%d/msr", cpu); 2407 #endif 2408 fd = open(pathname, O_RDONLY); 2409 if (fd < 0) 2410 #if defined(ANDROID) 2411 err(-1, "%s open failed, try chown or chmod +r /dev/msr*, " 2412 "or run with --no-msr, or run as root", pathname); 2413 #else 2414 err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, " 2415 "or run with --no-msr, or run as root", pathname); 2416 #endif 2417 fd_percpu[cpu] = fd; 2418 2419 return fd; 2420 } 2421 2422 static void bic_disable_msr_access(void) 2423 { 2424 CLR_BIC(BIC_Mod_c6, &bic_enabled); 2425 CLR_BIC(BIC_CoreTmp, &bic_enabled); 2426 CLR_BIC(BIC_Totl_c0, &bic_enabled); 2427 CLR_BIC(BIC_Any_c0, &bic_enabled); 2428 CLR_BIC(BIC_GFX_c0, &bic_enabled); 2429 CLR_BIC(BIC_CPUGFX, &bic_enabled); 2430 CLR_BIC(BIC_PkgTmp, &bic_enabled); 2431 2432 free_sys_msr_counters(); 2433 } 2434 2435 static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) 2436 { 2437 assert(!no_perf); 2438 2439 return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags); 2440 } 2441 2442 static long open_perf_counter(int cpu, unsigned int type, unsigned int config, int group_fd, __u64 read_format) 2443 { 2444 struct perf_event_attr attr; 2445 const pid_t pid = -1; 2446 const unsigned long flags = 0; 2447 2448 assert(!no_perf); 2449 2450 memset(&attr, 0, sizeof(struct perf_event_attr)); 2451 2452 attr.type = type; 2453 attr.size = sizeof(struct perf_event_attr); 2454 attr.config = config; 2455 attr.disabled = 0; 2456 attr.sample_type = PERF_SAMPLE_IDENTIFIER; 2457 attr.read_format = read_format; 2458 2459 const int fd = perf_event_open(&attr, pid, cpu, group_fd, flags); 2460 2461 return fd; 2462 } 2463 2464 int get_instr_count_fd(int cpu) 2465 { 2466 if (fd_instr_count_percpu[cpu]) 2467 return fd_instr_count_percpu[cpu]; 2468 2469 fd_instr_count_percpu[cpu] = open_perf_counter(cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0); 2470 2471 return fd_instr_count_percpu[cpu]; 2472 } 2473 2474 int get_msr(int cpu, off_t offset, unsigned long long *msr) 2475 { 2476 ssize_t retval; 2477 2478 assert(!no_msr); 2479 2480 retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset); 2481 2482 if (retval != sizeof *msr) 2483 err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset); 2484 2485 return 0; 2486 } 2487 2488 int add_msr_counter(int cpu, off_t offset) 2489 { 2490 ssize_t retval; 2491 unsigned long long value; 2492 2493 if (no_msr) 2494 return -1; 2495 2496 if (!offset) 2497 return -1; 2498 2499 retval = pread(get_msr_fd(cpu), &value, sizeof(value), offset); 2500 2501 /* if the read failed, the probe fails */ 2502 if (retval != sizeof(value)) 2503 return -1; 2504 2505 if (value == 0) 2506 return 0; 2507 2508 return 1; 2509 } 2510 2511 int add_rapl_msr_counter(int cpu, const struct rapl_counter_arch_info *cai) 2512 { 2513 int ret; 2514 2515 if (!(platform->rapl_msrs & cai->feature_mask)) 2516 return -1; 2517 2518 ret = add_msr_counter(cpu, cai->msr); 2519 if (ret < 0) 2520 return -1; 2521 2522 switch (cai->rci_index) { 2523 case RAPL_RCI_INDEX_ENERGY_PKG: 2524 case RAPL_RCI_INDEX_ENERGY_CORES: 2525 case RAPL_RCI_INDEX_DRAM: 2526 case RAPL_RCI_INDEX_GFX: 2527 case RAPL_RCI_INDEX_ENERGY_PLATFORM: 2528 if (ret == 0) 2529 return 1; 2530 } 2531 2532 /* PKG,DRAM_PERF_STATUS MSRs, can return any value */ 2533 return 1; 2534 } 2535 2536 /* Convert CPU ID to domain ID for given added perf counter. */ 2537 unsigned int cpu_to_domain(const struct perf_counter_info *pc, int cpu) 2538 { 2539 switch (pc->scope) { 2540 case SCOPE_CPU: 2541 return cpu; 2542 2543 case SCOPE_CORE: 2544 return cpus[cpu].physical_core_id; 2545 2546 case SCOPE_PACKAGE: 2547 return cpus[cpu].physical_package_id; 2548 } 2549 2550 __builtin_unreachable(); 2551 } 2552 2553 #define MAX_DEFERRED 16 2554 char *deferred_add_names[MAX_DEFERRED]; 2555 char *deferred_skip_names[MAX_DEFERRED]; 2556 int deferred_add_index; 2557 int deferred_skip_index; 2558 unsigned int deferred_add_consumed; 2559 unsigned int deferred_skip_consumed; 2560 2561 /* 2562 * HIDE_LIST - hide this list of counters, show the rest [default] 2563 * SHOW_LIST - show this list of counters, hide the rest 2564 */ 2565 enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST; 2566 2567 void help(void) 2568 { 2569 fprintf(outf, 2570 "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n" 2571 "\n" 2572 "Turbostat forks the specified COMMAND and prints statistics\n" 2573 "when COMMAND completes.\n" 2574 "If no COMMAND is specified, turbostat wakes every 5-seconds\n" 2575 "to print statistics, until interrupted.\n" 2576 " -a, --add counter\n" 2577 " add a counter\n" 2578 " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n" 2579 " eg. --add perf/cstate_pkg/c2-residency,package,delta,percent,perfPC2\n" 2580 " eg. --add pmt,name=XTAL,type=raw,domain=package0,offset=0,lsb=0,msb=63,guid=0x1a067102\n" 2581 " -c, --cpu cpu-set\n" 2582 " limit output to summary plus cpu-set:\n" 2583 " {core | package | j,k,l..m,n-p }\n" 2584 " -d, --debug\n" 2585 " displays usec, Time_Of_Day_Seconds and more debugging\n" 2586 " debug messages are printed to stderr\n" 2587 " -D, --Dump\n" 2588 " displays the raw counter values\n" 2589 " -e, --enable [all | column]\n" 2590 " shows all or the specified disabled column\n" 2591 " -f, --force\n" 2592 " force load turbostat with minimum default features on unsupported platforms.\n" 2593 " -H, --hide [column | column,column,...]\n" 2594 " hide the specified column(s)\n" 2595 " -i, --interval sec.subsec\n" 2596 " override default 5-second measurement interval\n" 2597 " -J, --Joules\n" 2598 " displays energy in Joules instead of Watts\n" 2599 " -l, --list\n" 2600 " list column headers only\n" 2601 " -M, --no-msr\n" 2602 " disable all uses of the MSR driver\n" 2603 " -P, --no-perf\n" 2604 " disable all uses of the perf API\n" 2605 " -n, --num_iterations num\n" 2606 " number of the measurement iterations\n" 2607 " -N, --header_iterations num\n" 2608 " print header every num iterations\n" 2609 " -o, --out file\n" 2610 " create or truncate \"file\" for all output\n" 2611 " -q, --quiet\n" 2612 " skip decoding system configuration header\n" 2613 " -s, --show [column | column,column,...]\n" 2614 " show only the specified column(s)\n" 2615 " -S, --Summary\n" 2616 " limits output to 1-line system summary per interval\n" 2617 " -T, --TCC temperature\n" 2618 " sets the Thermal Control Circuit temperature in\n" 2619 " degrees Celsius\n" 2620 " -h, --help\n" 2621 " print this help message\n" 2622 " -v, --version\n\t\tprint version information\n\nFor more help, run \"man turbostat\"\n"); 2623 } 2624 2625 /* 2626 * bic_lookup 2627 * for all the strings in comma separate name_list, 2628 * set the approprate bit in return value. 2629 */ 2630 void bic_lookup(cpu_set_t *ret_set, char *name_list, enum show_hide_mode mode) 2631 { 2632 unsigned int i; 2633 2634 while (name_list) { 2635 char *comma; 2636 2637 comma = strchr(name_list, ','); 2638 2639 if (comma) 2640 *comma = '\0'; 2641 2642 for (i = 0; i < MAX_BIC; ++i) { 2643 if (!strcmp(name_list, bic[i].name)) { 2644 SET_BIC(i, ret_set); 2645 break; 2646 } 2647 if (!strcmp(name_list, "all")) { 2648 bic_set_all(ret_set); 2649 break; 2650 } else if (!strcmp(name_list, "topology")) { 2651 CPU_OR(ret_set, ret_set, &bic_group_topology); 2652 break; 2653 } else if (!strcmp(name_list, "power")) { 2654 CPU_OR(ret_set, ret_set, &bic_group_thermal_pwr); 2655 break; 2656 } else if (!strcmp(name_list, "idle")) { 2657 CPU_OR(ret_set, ret_set, &bic_group_idle); 2658 break; 2659 } else if (!strcmp(name_list, "swidle")) { 2660 CPU_OR(ret_set, ret_set, &bic_group_sw_idle); 2661 break; 2662 } else if (!strcmp(name_list, "sysfs")) { /* legacy compatibility */ 2663 CPU_OR(ret_set, ret_set, &bic_group_sw_idle); 2664 break; 2665 } else if (!strcmp(name_list, "hwidle")) { 2666 CPU_OR(ret_set, ret_set, &bic_group_hw_idle); 2667 break; 2668 } else if (!strcmp(name_list, "frequency")) { 2669 CPU_OR(ret_set, ret_set, &bic_group_frequency); 2670 break; 2671 } else if (!strcmp(name_list, "other")) { 2672 CPU_OR(ret_set, ret_set, &bic_group_other); 2673 break; 2674 } 2675 } 2676 if (i == MAX_BIC) { 2677 if (mode == SHOW_LIST) { 2678 deferred_add_names[deferred_add_index++] = name_list; 2679 if (deferred_add_index >= MAX_DEFERRED) { 2680 fprintf(stderr, "More than max %d un-recognized --add options '%s'\n", 2681 MAX_DEFERRED, name_list); 2682 help(); 2683 exit(1); 2684 } 2685 } else { 2686 deferred_skip_names[deferred_skip_index++] = name_list; 2687 if (debug) 2688 fprintf(stderr, "deferred \"%s\"\n", name_list); 2689 if (deferred_skip_index >= MAX_DEFERRED) { 2690 fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n", 2691 MAX_DEFERRED, name_list); 2692 help(); 2693 exit(1); 2694 } 2695 } 2696 } 2697 2698 name_list = comma; 2699 if (name_list) 2700 name_list++; 2701 2702 } 2703 } 2704 2705 void print_header(char *delim) 2706 { 2707 struct msr_counter *mp; 2708 struct perf_counter_info *pp; 2709 struct pmt_counter *ppmt; 2710 int printed = 0; 2711 2712 if (DO_BIC(BIC_USEC)) 2713 outp += sprintf(outp, "%susec", (printed++ ? delim : "")); 2714 if (DO_BIC(BIC_TOD)) 2715 outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : "")); 2716 if (DO_BIC(BIC_Package)) 2717 outp += sprintf(outp, "%sPackage", (printed++ ? delim : "")); 2718 if (DO_BIC(BIC_Die)) 2719 outp += sprintf(outp, "%sDie", (printed++ ? delim : "")); 2720 if (DO_BIC(BIC_L3)) 2721 outp += sprintf(outp, "%sL3", (printed++ ? delim : "")); 2722 if (DO_BIC(BIC_Node)) 2723 outp += sprintf(outp, "%sNode", (printed++ ? delim : "")); 2724 if (DO_BIC(BIC_Core)) 2725 outp += sprintf(outp, "%sCore", (printed++ ? delim : "")); 2726 if (DO_BIC(BIC_CPU)) 2727 outp += sprintf(outp, "%sCPU", (printed++ ? delim : "")); 2728 if (DO_BIC(BIC_APIC)) 2729 outp += sprintf(outp, "%sAPIC", (printed++ ? delim : "")); 2730 if (DO_BIC(BIC_X2APIC)) 2731 outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : "")); 2732 if (DO_BIC(BIC_Avg_MHz)) 2733 outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : "")); 2734 if (DO_BIC(BIC_Busy)) 2735 outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : "")); 2736 if (DO_BIC(BIC_Bzy_MHz)) 2737 outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : "")); 2738 if (DO_BIC(BIC_TSC_MHz)) 2739 outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : "")); 2740 2741 if (DO_BIC(BIC_IPC)) 2742 outp += sprintf(outp, "%sIPC", (printed++ ? delim : "")); 2743 2744 if (DO_BIC(BIC_IRQ)) { 2745 if (sums_need_wide_columns) 2746 outp += sprintf(outp, "%s IRQ", (printed++ ? delim : "")); 2747 else 2748 outp += sprintf(outp, "%sIRQ", (printed++ ? delim : "")); 2749 } 2750 if (DO_BIC(BIC_NMI)) { 2751 if (sums_need_wide_columns) 2752 outp += sprintf(outp, "%s NMI", (printed++ ? delim : "")); 2753 else 2754 outp += sprintf(outp, "%sNMI", (printed++ ? delim : "")); 2755 } 2756 2757 if (DO_BIC(BIC_SMI)) 2758 outp += sprintf(outp, "%sSMI", (printed++ ? delim : "")); 2759 2760 for (mp = sys.tp; mp; mp = mp->next) { 2761 2762 if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) { 2763 if (mp->width == 64) 2764 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name); 2765 else 2766 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name); 2767 } else { 2768 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2769 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name); 2770 else 2771 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name); 2772 } 2773 } 2774 2775 for (pp = sys.perf_tp; pp; pp = pp->next) { 2776 2777 if (pp->format == FORMAT_RAW) { 2778 if (pp->width == 64) 2779 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); 2780 else 2781 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); 2782 } else { 2783 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2784 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); 2785 else 2786 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); 2787 } 2788 } 2789 2790 ppmt = sys.pmt_tp; 2791 while (ppmt) { 2792 switch (ppmt->type) { 2793 case PMT_TYPE_RAW: 2794 if (pmt_counter_get_width(ppmt) <= 32) 2795 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); 2796 else 2797 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); 2798 2799 break; 2800 2801 case PMT_TYPE_XTAL_TIME: 2802 case PMT_TYPE_TCORE_CLOCK: 2803 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); 2804 break; 2805 } 2806 2807 ppmt = ppmt->next; 2808 } 2809 2810 if (DO_BIC(BIC_CPU_c1)) 2811 outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : "")); 2812 if (DO_BIC(BIC_CPU_c3)) 2813 outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : "")); 2814 if (DO_BIC(BIC_CPU_c6)) 2815 outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : "")); 2816 if (DO_BIC(BIC_CPU_c7)) 2817 outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : "")); 2818 2819 if (DO_BIC(BIC_Mod_c6)) 2820 outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : "")); 2821 2822 if (DO_BIC(BIC_CoreTmp)) 2823 outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : "")); 2824 2825 if (DO_BIC(BIC_CORE_THROT_CNT)) 2826 outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : "")); 2827 2828 if (platform->rapl_msrs && !rapl_joules) { 2829 if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl) 2830 outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : "")); 2831 } else if (platform->rapl_msrs && rapl_joules) { 2832 if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl) 2833 outp += sprintf(outp, "%sCor_J", (printed++ ? delim : "")); 2834 } 2835 2836 for (mp = sys.cp; mp; mp = mp->next) { 2837 if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) { 2838 if (mp->width == 64) 2839 outp += sprintf(outp, "%s%18.18s", delim, mp->name); 2840 else 2841 outp += sprintf(outp, "%s%10.10s", delim, mp->name); 2842 } else { 2843 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2844 outp += sprintf(outp, "%s%8s", delim, mp->name); 2845 else 2846 outp += sprintf(outp, "%s%s", delim, mp->name); 2847 } 2848 } 2849 2850 for (pp = sys.perf_cp; pp; pp = pp->next) { 2851 2852 if (pp->format == FORMAT_RAW) { 2853 if (pp->width == 64) 2854 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); 2855 else 2856 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); 2857 } else { 2858 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2859 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); 2860 else 2861 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); 2862 } 2863 } 2864 2865 ppmt = sys.pmt_cp; 2866 while (ppmt) { 2867 switch (ppmt->type) { 2868 case PMT_TYPE_RAW: 2869 if (pmt_counter_get_width(ppmt) <= 32) 2870 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); 2871 else 2872 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); 2873 2874 break; 2875 2876 case PMT_TYPE_XTAL_TIME: 2877 case PMT_TYPE_TCORE_CLOCK: 2878 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); 2879 break; 2880 } 2881 2882 ppmt = ppmt->next; 2883 } 2884 2885 if (DO_BIC(BIC_PkgTmp)) 2886 outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : "")); 2887 2888 if (DO_BIC(BIC_GFX_rc6)) 2889 outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : "")); 2890 2891 if (DO_BIC(BIC_GFXMHz)) 2892 outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : "")); 2893 2894 if (DO_BIC(BIC_GFXACTMHz)) 2895 outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : "")); 2896 2897 if (DO_BIC(BIC_SAM_mc6)) 2898 outp += sprintf(outp, "%sSAM%%mc6", (printed++ ? delim : "")); 2899 2900 if (DO_BIC(BIC_SAMMHz)) 2901 outp += sprintf(outp, "%sSAMMHz", (printed++ ? delim : "")); 2902 2903 if (DO_BIC(BIC_SAMACTMHz)) 2904 outp += sprintf(outp, "%sSAMAMHz", (printed++ ? delim : "")); 2905 2906 if (DO_BIC(BIC_Totl_c0)) 2907 outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : "")); 2908 if (DO_BIC(BIC_Any_c0)) 2909 outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : "")); 2910 if (DO_BIC(BIC_GFX_c0)) 2911 outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : "")); 2912 if (DO_BIC(BIC_CPUGFX)) 2913 outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : "")); 2914 2915 if (DO_BIC(BIC_Pkgpc2)) 2916 outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : "")); 2917 if (DO_BIC(BIC_Pkgpc3)) 2918 outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : "")); 2919 if (DO_BIC(BIC_Pkgpc6)) 2920 outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : "")); 2921 if (DO_BIC(BIC_Pkgpc7)) 2922 outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : "")); 2923 if (DO_BIC(BIC_Pkgpc8)) 2924 outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : "")); 2925 if (DO_BIC(BIC_Pkgpc9)) 2926 outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : "")); 2927 if (DO_BIC(BIC_Pkgpc10)) 2928 outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : "")); 2929 if (DO_BIC(BIC_Diec6)) 2930 outp += sprintf(outp, "%sDie%%c6", (printed++ ? delim : "")); 2931 if (DO_BIC(BIC_CPU_LPI)) 2932 outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : "")); 2933 if (DO_BIC(BIC_SYS_LPI)) 2934 outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : "")); 2935 2936 if (!rapl_joules) { 2937 if (DO_BIC(BIC_PkgWatt)) 2938 outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : "")); 2939 if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl) 2940 outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : "")); 2941 if (DO_BIC(BIC_GFXWatt)) 2942 outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : "")); 2943 if (DO_BIC(BIC_RAMWatt)) 2944 outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : "")); 2945 if (DO_BIC(BIC_PKG__)) 2946 outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : "")); 2947 if (DO_BIC(BIC_RAM__)) 2948 outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : "")); 2949 } else { 2950 if (DO_BIC(BIC_Pkg_J)) 2951 outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : "")); 2952 if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl) 2953 outp += sprintf(outp, "%sCor_J", (printed++ ? delim : "")); 2954 if (DO_BIC(BIC_GFX_J)) 2955 outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : "")); 2956 if (DO_BIC(BIC_RAM_J)) 2957 outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : "")); 2958 if (DO_BIC(BIC_PKG__)) 2959 outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : "")); 2960 if (DO_BIC(BIC_RAM__)) 2961 outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : "")); 2962 } 2963 if (DO_BIC(BIC_UNCORE_MHZ)) 2964 outp += sprintf(outp, "%sUncMHz", (printed++ ? delim : "")); 2965 2966 for (mp = sys.pp; mp; mp = mp->next) { 2967 if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) { 2968 if (mp->width == 64) 2969 outp += sprintf(outp, "%s%18.18s", delim, mp->name); 2970 else if (mp->width == 32) 2971 outp += sprintf(outp, "%s%10.10s", delim, mp->name); 2972 else 2973 outp += sprintf(outp, "%s%7.7s", delim, mp->name); 2974 } else { 2975 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2976 outp += sprintf(outp, "%s%8s", delim, mp->name); 2977 else 2978 outp += sprintf(outp, "%s%7.7s", delim, mp->name); 2979 } 2980 } 2981 2982 for (pp = sys.perf_pp; pp; pp = pp->next) { 2983 2984 if (pp->format == FORMAT_RAW) { 2985 if (pp->width == 64) 2986 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); 2987 else 2988 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); 2989 } else { 2990 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2991 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); 2992 else 2993 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); 2994 } 2995 } 2996 2997 ppmt = sys.pmt_pp; 2998 while (ppmt) { 2999 switch (ppmt->type) { 3000 case PMT_TYPE_RAW: 3001 if (pmt_counter_get_width(ppmt) <= 32) 3002 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); 3003 else 3004 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); 3005 3006 break; 3007 3008 case PMT_TYPE_XTAL_TIME: 3009 case PMT_TYPE_TCORE_CLOCK: 3010 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); 3011 break; 3012 } 3013 3014 ppmt = ppmt->next; 3015 } 3016 3017 if (DO_BIC(BIC_SysWatt)) 3018 outp += sprintf(outp, "%sSysWatt", (printed++ ? delim : "")); 3019 if (DO_BIC(BIC_Sys_J)) 3020 outp += sprintf(outp, "%sSys_J", (printed++ ? delim : "")); 3021 3022 outp += sprintf(outp, "\n"); 3023 } 3024 3025 int dump_counters(PER_THREAD_PARAMS) 3026 { 3027 int i; 3028 struct msr_counter *mp; 3029 struct platform_counters *pplat_cnt = p == package_odd ? &platform_counters_odd : &platform_counters_even; 3030 3031 outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p); 3032 3033 if (t) { 3034 outp += sprintf(outp, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags); 3035 outp += sprintf(outp, "TSC: %016llX\n", t->tsc); 3036 outp += sprintf(outp, "aperf: %016llX\n", t->aperf); 3037 outp += sprintf(outp, "mperf: %016llX\n", t->mperf); 3038 outp += sprintf(outp, "c1: %016llX\n", t->c1); 3039 3040 if (DO_BIC(BIC_IPC)) 3041 outp += sprintf(outp, "IPC: %lld\n", t->instr_count); 3042 3043 if (DO_BIC(BIC_IRQ)) 3044 outp += sprintf(outp, "IRQ: %lld\n", t->irq_count); 3045 if (DO_BIC(BIC_NMI)) 3046 outp += sprintf(outp, "IRQ: %lld\n", t->nmi_count); 3047 if (DO_BIC(BIC_SMI)) 3048 outp += sprintf(outp, "SMI: %d\n", t->smi_count); 3049 3050 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 3051 outp += 3052 sprintf(outp, "tADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, 3053 t->counter[i], mp->sp->path); 3054 } 3055 } 3056 3057 if (c && is_cpu_first_thread_in_core(t, c, p)) { 3058 outp += sprintf(outp, "core: %d\n", c->core_id); 3059 outp += sprintf(outp, "c3: %016llX\n", c->c3); 3060 outp += sprintf(outp, "c6: %016llX\n", c->c6); 3061 outp += sprintf(outp, "c7: %016llX\n", c->c7); 3062 outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c); 3063 outp += sprintf(outp, "cpu_throt_count: %016llX\n", c->core_throt_cnt); 3064 3065 const unsigned long long energy_value = c->core_energy.raw_value * c->core_energy.scale; 3066 const double energy_scale = c->core_energy.scale; 3067 3068 if (c->core_energy.unit == RAPL_UNIT_JOULES) 3069 outp += sprintf(outp, "Joules: %0llX (scale: %lf)\n", energy_value, energy_scale); 3070 3071 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 3072 outp += 3073 sprintf(outp, "cADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, 3074 c->counter[i], mp->sp->path); 3075 } 3076 outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us); 3077 } 3078 3079 if (p && is_cpu_first_core_in_package(t, c, p)) { 3080 outp += sprintf(outp, "package: %d\n", p->package_id); 3081 3082 outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0); 3083 outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0); 3084 outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0); 3085 outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0); 3086 3087 outp += sprintf(outp, "pc2: %016llX\n", p->pc2); 3088 if (DO_BIC(BIC_Pkgpc3)) 3089 outp += sprintf(outp, "pc3: %016llX\n", p->pc3); 3090 if (DO_BIC(BIC_Pkgpc6)) 3091 outp += sprintf(outp, "pc6: %016llX\n", p->pc6); 3092 if (DO_BIC(BIC_Pkgpc7)) 3093 outp += sprintf(outp, "pc7: %016llX\n", p->pc7); 3094 outp += sprintf(outp, "pc8: %016llX\n", p->pc8); 3095 outp += sprintf(outp, "pc9: %016llX\n", p->pc9); 3096 outp += sprintf(outp, "pc10: %016llX\n", p->pc10); 3097 outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi); 3098 outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi); 3099 outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg.raw_value); 3100 outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores.raw_value); 3101 outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx.raw_value); 3102 outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram.raw_value); 3103 outp += sprintf(outp, "Joules PSYS: %0llX\n", pplat_cnt->energy_psys.raw_value); 3104 outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status.raw_value); 3105 outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status.raw_value); 3106 outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c); 3107 3108 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 3109 outp += 3110 sprintf(outp, "pADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, 3111 p->counter[i], mp->sp->path); 3112 } 3113 } 3114 3115 outp += sprintf(outp, "\n"); 3116 3117 return 0; 3118 } 3119 3120 double rapl_counter_get_value(const struct rapl_counter *c, enum rapl_unit desired_unit, double interval) 3121 { 3122 assert(desired_unit != RAPL_UNIT_INVALID); 3123 3124 /* 3125 * For now we don't expect anything other than joules, 3126 * so just simplify the logic. 3127 */ 3128 assert(c->unit == RAPL_UNIT_JOULES); 3129 3130 const double scaled = c->raw_value * c->scale; 3131 3132 if (desired_unit == RAPL_UNIT_WATTS) 3133 return scaled / interval; 3134 return scaled; 3135 } 3136 3137 /* 3138 * column formatting convention & formats 3139 */ 3140 int format_counters(PER_THREAD_PARAMS) 3141 { 3142 static int count; 3143 3144 struct platform_counters *pplat_cnt = NULL; 3145 double interval_float, tsc; 3146 char *fmt8; 3147 int i; 3148 struct msr_counter *mp; 3149 struct perf_counter_info *pp; 3150 struct pmt_counter *ppmt; 3151 char *delim = "\t"; 3152 int printed = 0; 3153 3154 if (t == &average.threads) { 3155 pplat_cnt = count & 1 ? &platform_counters_odd : &platform_counters_even; 3156 ++count; 3157 } 3158 3159 /* if showing only 1st thread in core and this isn't one, bail out */ 3160 if (show_core_only && !is_cpu_first_thread_in_core(t, c, p)) 3161 return 0; 3162 3163 /* if showing only 1st thread in pkg and this isn't one, bail out */ 3164 if (show_pkg_only && !is_cpu_first_core_in_package(t, c, p)) 3165 return 0; 3166 3167 /*if not summary line and --cpu is used */ 3168 if ((t != &average.threads) && (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset))) 3169 return 0; 3170 3171 if (DO_BIC(BIC_USEC)) { 3172 /* on each row, print how many usec each timestamp took to gather */ 3173 struct timeval tv; 3174 3175 timersub(&t->tv_end, &t->tv_begin, &tv); 3176 outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec); 3177 } 3178 3179 /* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */ 3180 if (DO_BIC(BIC_TOD)) 3181 outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec); 3182 3183 interval_float = t->tv_delta.tv_sec + t->tv_delta.tv_usec / 1000000.0; 3184 3185 tsc = t->tsc * tsc_tweak; 3186 3187 /* topo columns, print blanks on 1st (average) line */ 3188 if (t == &average.threads) { 3189 if (DO_BIC(BIC_Package)) 3190 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 3191 if (DO_BIC(BIC_Die)) 3192 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 3193 if (DO_BIC(BIC_L3)) 3194 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 3195 if (DO_BIC(BIC_Node)) 3196 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 3197 if (DO_BIC(BIC_Core)) 3198 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 3199 if (DO_BIC(BIC_CPU)) 3200 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 3201 if (DO_BIC(BIC_APIC)) 3202 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 3203 if (DO_BIC(BIC_X2APIC)) 3204 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 3205 } else { 3206 if (DO_BIC(BIC_Package)) { 3207 if (p) 3208 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id); 3209 else 3210 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 3211 } 3212 if (DO_BIC(BIC_Die)) { 3213 if (c) 3214 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].die_id); 3215 else 3216 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 3217 } 3218 if (DO_BIC(BIC_L3)) { 3219 if (c) 3220 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].l3_id); 3221 else 3222 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 3223 } 3224 if (DO_BIC(BIC_Node)) { 3225 if (t) 3226 outp += sprintf(outp, "%s%d", 3227 (printed++ ? delim : ""), cpus[t->cpu_id].physical_node_id); 3228 else 3229 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 3230 } 3231 if (DO_BIC(BIC_Core)) { 3232 if (c) 3233 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id); 3234 else 3235 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 3236 } 3237 if (DO_BIC(BIC_CPU)) 3238 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id); 3239 if (DO_BIC(BIC_APIC)) 3240 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id); 3241 if (DO_BIC(BIC_X2APIC)) 3242 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id); 3243 } 3244 3245 if (DO_BIC(BIC_Avg_MHz)) 3246 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 / units * t->aperf / interval_float); 3247 3248 if (DO_BIC(BIC_Busy)) 3249 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf / tsc); 3250 3251 if (DO_BIC(BIC_Bzy_MHz)) { 3252 if (has_base_hz) 3253 outp += 3254 sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf); 3255 else 3256 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 3257 tsc / units * t->aperf / t->mperf / interval_float); 3258 } 3259 3260 if (DO_BIC(BIC_TSC_MHz)) 3261 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc / units / interval_float); 3262 3263 if (DO_BIC(BIC_IPC)) 3264 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 1.0 * t->instr_count / t->aperf); 3265 3266 /* IRQ */ 3267 if (DO_BIC(BIC_IRQ)) { 3268 if (sums_need_wide_columns) 3269 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count); 3270 else 3271 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count); 3272 } 3273 3274 /* NMI */ 3275 if (DO_BIC(BIC_NMI)) { 3276 if (sums_need_wide_columns) 3277 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->nmi_count); 3278 else 3279 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->nmi_count); 3280 } 3281 3282 /* SMI */ 3283 if (DO_BIC(BIC_SMI)) 3284 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count); 3285 3286 /* Added counters */ 3287 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 3288 if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) { 3289 if (mp->width == 32) 3290 outp += 3291 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)t->counter[i]); 3292 else 3293 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]); 3294 } else if (mp->format == FORMAT_DELTA) { 3295 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 3296 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]); 3297 else 3298 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]); 3299 } else if (mp->format == FORMAT_PERCENT) { 3300 if (mp->type == COUNTER_USEC) 3301 outp += 3302 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 3303 t->counter[i] / interval_float / 10000); 3304 else 3305 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i] / tsc); 3306 } 3307 } 3308 3309 /* Added perf counters */ 3310 for (i = 0, pp = sys.perf_tp; pp; ++i, pp = pp->next) { 3311 if (pp->format == FORMAT_RAW) { 3312 if (pp->width == 32) 3313 outp += 3314 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), 3315 (unsigned int)t->perf_counter[i]); 3316 else 3317 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->perf_counter[i]); 3318 } else if (pp->format == FORMAT_DELTA) { 3319 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) 3320 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->perf_counter[i]); 3321 else 3322 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->perf_counter[i]); 3323 } else if (pp->format == FORMAT_PERCENT) { 3324 if (pp->type == COUNTER_USEC) 3325 outp += 3326 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 3327 t->perf_counter[i] / interval_float / 10000); 3328 else 3329 outp += 3330 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->perf_counter[i] / tsc); 3331 } 3332 } 3333 3334 for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { 3335 const unsigned long value_raw = t->pmt_counter[i]; 3336 double value_converted; 3337 switch (ppmt->type) { 3338 case PMT_TYPE_RAW: 3339 if (pmt_counter_get_width(ppmt) <= 32) 3340 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), 3341 (unsigned int)t->pmt_counter[i]); 3342 else 3343 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->pmt_counter[i]); 3344 3345 break; 3346 3347 case PMT_TYPE_XTAL_TIME: 3348 value_converted = 100.0 * value_raw / crystal_hz / interval_float; 3349 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); 3350 break; 3351 3352 case PMT_TYPE_TCORE_CLOCK: 3353 value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float; 3354 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); 3355 } 3356 } 3357 3358 /* C1 */ 3359 if (DO_BIC(BIC_CPU_c1)) 3360 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc); 3361 3362 /* print per-core data only for 1st thread in core */ 3363 if (!is_cpu_first_thread_in_core(t, c, p)) 3364 goto done; 3365 3366 if (DO_BIC(BIC_CPU_c3)) 3367 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3 / tsc); 3368 if (DO_BIC(BIC_CPU_c6)) 3369 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6 / tsc); 3370 if (DO_BIC(BIC_CPU_c7)) 3371 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7 / tsc); 3372 3373 /* Mod%c6 */ 3374 if (DO_BIC(BIC_Mod_c6)) 3375 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc); 3376 3377 if (DO_BIC(BIC_CoreTmp)) 3378 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c); 3379 3380 /* Core throttle count */ 3381 if (DO_BIC(BIC_CORE_THROT_CNT)) 3382 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->core_throt_cnt); 3383 3384 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 3385 if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) { 3386 if (mp->width == 32) 3387 outp += 3388 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)c->counter[i]); 3389 else 3390 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]); 3391 } else if (mp->format == FORMAT_DELTA) { 3392 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 3393 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]); 3394 else 3395 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]); 3396 } else if (mp->format == FORMAT_PERCENT) { 3397 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i] / tsc); 3398 } 3399 } 3400 3401 for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { 3402 if (pp->format == FORMAT_RAW) { 3403 if (pp->width == 32) 3404 outp += 3405 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), 3406 (unsigned int)c->perf_counter[i]); 3407 else 3408 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->perf_counter[i]); 3409 } else if (pp->format == FORMAT_DELTA) { 3410 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) 3411 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->perf_counter[i]); 3412 else 3413 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->perf_counter[i]); 3414 } else if (pp->format == FORMAT_PERCENT) { 3415 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->perf_counter[i] / tsc); 3416 } 3417 } 3418 3419 for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { 3420 const unsigned long value_raw = c->pmt_counter[i]; 3421 double value_converted; 3422 switch (ppmt->type) { 3423 case PMT_TYPE_RAW: 3424 if (pmt_counter_get_width(ppmt) <= 32) 3425 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), 3426 (unsigned int)c->pmt_counter[i]); 3427 else 3428 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->pmt_counter[i]); 3429 3430 break; 3431 3432 case PMT_TYPE_XTAL_TIME: 3433 value_converted = 100.0 * value_raw / crystal_hz / interval_float; 3434 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); 3435 break; 3436 3437 case PMT_TYPE_TCORE_CLOCK: 3438 value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float; 3439 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); 3440 } 3441 } 3442 3443 fmt8 = "%s%.2f"; 3444 3445 if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl) 3446 outp += 3447 sprintf(outp, fmt8, (printed++ ? delim : ""), 3448 rapl_counter_get_value(&c->core_energy, RAPL_UNIT_WATTS, interval_float)); 3449 if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl) 3450 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 3451 rapl_counter_get_value(&c->core_energy, RAPL_UNIT_JOULES, interval_float)); 3452 3453 /* print per-package data only for 1st core in package */ 3454 if (!is_cpu_first_core_in_package(t, c, p)) 3455 goto done; 3456 3457 /* PkgTmp */ 3458 if (DO_BIC(BIC_PkgTmp)) 3459 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c); 3460 3461 /* GFXrc6 */ 3462 if (DO_BIC(BIC_GFX_rc6)) { 3463 if (p->gfx_rc6_ms == -1) { /* detect GFX counter reset */ 3464 outp += sprintf(outp, "%s**.**", (printed++ ? delim : "")); 3465 } else { 3466 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 3467 p->gfx_rc6_ms / 10.0 / interval_float); 3468 } 3469 } 3470 3471 /* GFXMHz */ 3472 if (DO_BIC(BIC_GFXMHz)) 3473 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz); 3474 3475 /* GFXACTMHz */ 3476 if (DO_BIC(BIC_GFXACTMHz)) 3477 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz); 3478 3479 /* SAMmc6 */ 3480 if (DO_BIC(BIC_SAM_mc6)) { 3481 if (p->sam_mc6_ms == -1) { /* detect GFX counter reset */ 3482 outp += sprintf(outp, "%s**.**", (printed++ ? delim : "")); 3483 } else { 3484 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 3485 p->sam_mc6_ms / 10.0 / interval_float); 3486 } 3487 } 3488 3489 /* SAMMHz */ 3490 if (DO_BIC(BIC_SAMMHz)) 3491 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_mhz); 3492 3493 /* SAMACTMHz */ 3494 if (DO_BIC(BIC_SAMACTMHz)) 3495 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_act_mhz); 3496 3497 /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ 3498 if (DO_BIC(BIC_Totl_c0)) 3499 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0 / tsc); 3500 if (DO_BIC(BIC_Any_c0)) 3501 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0 / tsc); 3502 if (DO_BIC(BIC_GFX_c0)) 3503 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0 / tsc); 3504 if (DO_BIC(BIC_CPUGFX)) 3505 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0 / tsc); 3506 3507 if (DO_BIC(BIC_Pkgpc2)) 3508 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2 / tsc); 3509 if (DO_BIC(BIC_Pkgpc3)) 3510 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3 / tsc); 3511 if (DO_BIC(BIC_Pkgpc6)) 3512 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6 / tsc); 3513 if (DO_BIC(BIC_Pkgpc7)) 3514 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7 / tsc); 3515 if (DO_BIC(BIC_Pkgpc8)) 3516 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8 / tsc); 3517 if (DO_BIC(BIC_Pkgpc9)) 3518 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9 / tsc); 3519 if (DO_BIC(BIC_Pkgpc10)) 3520 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc); 3521 3522 if (DO_BIC(BIC_Diec6)) 3523 outp += 3524 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->die_c6 / crystal_hz / interval_float); 3525 3526 if (DO_BIC(BIC_CPU_LPI)) { 3527 if (p->cpu_lpi >= 0) 3528 outp += 3529 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 3530 100.0 * p->cpu_lpi / 1000000.0 / interval_float); 3531 else 3532 outp += sprintf(outp, "%s(neg)", (printed++ ? delim : "")); 3533 } 3534 if (DO_BIC(BIC_SYS_LPI)) { 3535 if (p->sys_lpi >= 0) 3536 outp += 3537 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 3538 100.0 * p->sys_lpi / 1000000.0 / interval_float); 3539 else 3540 outp += sprintf(outp, "%s(neg)", (printed++ ? delim : "")); 3541 } 3542 3543 if (DO_BIC(BIC_PkgWatt)) 3544 outp += 3545 sprintf(outp, fmt8, (printed++ ? delim : ""), 3546 rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_WATTS, interval_float)); 3547 if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl) 3548 outp += 3549 sprintf(outp, fmt8, (printed++ ? delim : ""), 3550 rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_WATTS, interval_float)); 3551 if (DO_BIC(BIC_GFXWatt)) 3552 outp += 3553 sprintf(outp, fmt8, (printed++ ? delim : ""), 3554 rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_WATTS, interval_float)); 3555 if (DO_BIC(BIC_RAMWatt)) 3556 outp += 3557 sprintf(outp, fmt8, (printed++ ? delim : ""), 3558 rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_WATTS, interval_float)); 3559 if (DO_BIC(BIC_Pkg_J)) 3560 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 3561 rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_JOULES, interval_float)); 3562 if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl) 3563 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 3564 rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_JOULES, interval_float)); 3565 if (DO_BIC(BIC_GFX_J)) 3566 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 3567 rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_JOULES, interval_float)); 3568 if (DO_BIC(BIC_RAM_J)) 3569 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 3570 rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_JOULES, interval_float)); 3571 if (DO_BIC(BIC_PKG__)) 3572 outp += 3573 sprintf(outp, fmt8, (printed++ ? delim : ""), 3574 rapl_counter_get_value(&p->rapl_pkg_perf_status, RAPL_UNIT_WATTS, interval_float)); 3575 if (DO_BIC(BIC_RAM__)) 3576 outp += 3577 sprintf(outp, fmt8, (printed++ ? delim : ""), 3578 rapl_counter_get_value(&p->rapl_dram_perf_status, RAPL_UNIT_WATTS, interval_float)); 3579 /* UncMHz */ 3580 if (DO_BIC(BIC_UNCORE_MHZ)) 3581 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->uncore_mhz); 3582 3583 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 3584 if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) { 3585 if (mp->width == 32) 3586 outp += 3587 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)p->counter[i]); 3588 else 3589 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]); 3590 } else if (mp->format == FORMAT_DELTA) { 3591 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 3592 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]); 3593 else 3594 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]); 3595 } else if (mp->format == FORMAT_PERCENT) { 3596 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i] / tsc); 3597 } else if (mp->type == COUNTER_K2M) 3598 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->counter[i] / 1000); 3599 } 3600 3601 for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { 3602 if (pp->format == FORMAT_RAW) { 3603 if (pp->width == 32) 3604 outp += 3605 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), 3606 (unsigned int)p->perf_counter[i]); 3607 else 3608 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->perf_counter[i]); 3609 } else if (pp->format == FORMAT_DELTA) { 3610 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) 3611 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->perf_counter[i]); 3612 else 3613 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->perf_counter[i]); 3614 } else if (pp->format == FORMAT_PERCENT) { 3615 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->perf_counter[i] / tsc); 3616 } else if (pp->type == COUNTER_K2M) { 3617 outp += 3618 sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->perf_counter[i] / 1000); 3619 } 3620 } 3621 3622 for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { 3623 const unsigned long value_raw = p->pmt_counter[i]; 3624 double value_converted; 3625 switch (ppmt->type) { 3626 case PMT_TYPE_RAW: 3627 if (pmt_counter_get_width(ppmt) <= 32) 3628 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), 3629 (unsigned int)p->pmt_counter[i]); 3630 else 3631 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->pmt_counter[i]); 3632 3633 break; 3634 3635 case PMT_TYPE_XTAL_TIME: 3636 value_converted = 100.0 * value_raw / crystal_hz / interval_float; 3637 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); 3638 break; 3639 3640 case PMT_TYPE_TCORE_CLOCK: 3641 value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float; 3642 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); 3643 } 3644 } 3645 3646 if (DO_BIC(BIC_SysWatt) && (t == &average.threads)) 3647 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 3648 rapl_counter_get_value(&pplat_cnt->energy_psys, RAPL_UNIT_WATTS, interval_float)); 3649 if (DO_BIC(BIC_Sys_J) && (t == &average.threads)) 3650 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 3651 rapl_counter_get_value(&pplat_cnt->energy_psys, RAPL_UNIT_JOULES, interval_float)); 3652 3653 done: 3654 if (*(outp - 1) != '\n') 3655 outp += sprintf(outp, "\n"); 3656 3657 return 0; 3658 } 3659 3660 void flush_output_stdout(void) 3661 { 3662 FILE *filep; 3663 3664 if (outf == stderr) 3665 filep = stdout; 3666 else 3667 filep = outf; 3668 3669 fputs(output_buffer, filep); 3670 fflush(filep); 3671 3672 outp = output_buffer; 3673 } 3674 3675 void flush_output_stderr(void) 3676 { 3677 fputs(output_buffer, outf); 3678 fflush(outf); 3679 outp = output_buffer; 3680 } 3681 3682 void format_all_counters(PER_THREAD_PARAMS) 3683 { 3684 static int count; 3685 3686 if ((!count || (header_iterations && !(count % header_iterations))) || !summary_only) 3687 print_header("\t"); 3688 3689 format_counters(&average.threads, &average.cores, &average.packages); 3690 3691 count++; 3692 3693 if (summary_only) 3694 return; 3695 3696 for_all_cpus(format_counters, t, c, p); 3697 } 3698 3699 #define DELTA_WRAP32(new, old) \ 3700 old = ((((unsigned long long)new << 32) - ((unsigned long long)old << 32)) >> 32); 3701 3702 int delta_package(struct pkg_data *new, struct pkg_data *old) 3703 { 3704 int i; 3705 struct msr_counter *mp; 3706 struct perf_counter_info *pp; 3707 struct pmt_counter *ppmt; 3708 3709 if (DO_BIC(BIC_Totl_c0)) 3710 old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0; 3711 if (DO_BIC(BIC_Any_c0)) 3712 old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0; 3713 if (DO_BIC(BIC_GFX_c0)) 3714 old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0; 3715 if (DO_BIC(BIC_CPUGFX)) 3716 old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0; 3717 3718 old->pc2 = new->pc2 - old->pc2; 3719 if (DO_BIC(BIC_Pkgpc3)) 3720 old->pc3 = new->pc3 - old->pc3; 3721 if (DO_BIC(BIC_Pkgpc6)) 3722 old->pc6 = new->pc6 - old->pc6; 3723 if (DO_BIC(BIC_Pkgpc7)) 3724 old->pc7 = new->pc7 - old->pc7; 3725 old->pc8 = new->pc8 - old->pc8; 3726 old->pc9 = new->pc9 - old->pc9; 3727 old->pc10 = new->pc10 - old->pc10; 3728 old->die_c6 = new->die_c6 - old->die_c6; 3729 old->cpu_lpi = new->cpu_lpi - old->cpu_lpi; 3730 old->sys_lpi = new->sys_lpi - old->sys_lpi; 3731 old->pkg_temp_c = new->pkg_temp_c; 3732 3733 /* flag an error when rc6 counter resets/wraps */ 3734 if (old->gfx_rc6_ms > new->gfx_rc6_ms) 3735 old->gfx_rc6_ms = -1; 3736 else 3737 old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms; 3738 3739 old->uncore_mhz = new->uncore_mhz; 3740 old->gfx_mhz = new->gfx_mhz; 3741 old->gfx_act_mhz = new->gfx_act_mhz; 3742 3743 /* flag an error when mc6 counter resets/wraps */ 3744 if (old->sam_mc6_ms > new->sam_mc6_ms) 3745 old->sam_mc6_ms = -1; 3746 else 3747 old->sam_mc6_ms = new->sam_mc6_ms - old->sam_mc6_ms; 3748 3749 old->sam_mhz = new->sam_mhz; 3750 old->sam_act_mhz = new->sam_act_mhz; 3751 3752 old->energy_pkg.raw_value = new->energy_pkg.raw_value - old->energy_pkg.raw_value; 3753 old->energy_cores.raw_value = new->energy_cores.raw_value - old->energy_cores.raw_value; 3754 old->energy_gfx.raw_value = new->energy_gfx.raw_value - old->energy_gfx.raw_value; 3755 old->energy_dram.raw_value = new->energy_dram.raw_value - old->energy_dram.raw_value; 3756 old->rapl_pkg_perf_status.raw_value = new->rapl_pkg_perf_status.raw_value - old->rapl_pkg_perf_status.raw_value; 3757 old->rapl_dram_perf_status.raw_value = 3758 new->rapl_dram_perf_status.raw_value - old->rapl_dram_perf_status.raw_value; 3759 3760 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 3761 if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) 3762 old->counter[i] = new->counter[i]; 3763 else if (mp->format == FORMAT_AVERAGE) 3764 old->counter[i] = new->counter[i]; 3765 else 3766 old->counter[i] = new->counter[i] - old->counter[i]; 3767 } 3768 3769 for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { 3770 if (pp->format == FORMAT_RAW) 3771 old->perf_counter[i] = new->perf_counter[i]; 3772 else if (pp->format == FORMAT_AVERAGE) 3773 old->perf_counter[i] = new->perf_counter[i]; 3774 else 3775 old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i]; 3776 } 3777 3778 for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { 3779 if (ppmt->format == FORMAT_RAW) 3780 old->pmt_counter[i] = new->pmt_counter[i]; 3781 else 3782 old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i]; 3783 } 3784 3785 return 0; 3786 } 3787 3788 void delta_core(struct core_data *new, struct core_data *old) 3789 { 3790 int i; 3791 struct msr_counter *mp; 3792 struct perf_counter_info *pp; 3793 struct pmt_counter *ppmt; 3794 3795 old->c3 = new->c3 - old->c3; 3796 old->c6 = new->c6 - old->c6; 3797 old->c7 = new->c7 - old->c7; 3798 old->core_temp_c = new->core_temp_c; 3799 old->core_throt_cnt = new->core_throt_cnt - old->core_throt_cnt; 3800 old->mc6_us = new->mc6_us - old->mc6_us; 3801 3802 DELTA_WRAP32(new->core_energy.raw_value, old->core_energy.raw_value); 3803 3804 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 3805 if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) 3806 old->counter[i] = new->counter[i]; 3807 else 3808 old->counter[i] = new->counter[i] - old->counter[i]; 3809 } 3810 3811 for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { 3812 if (pp->format == FORMAT_RAW) 3813 old->perf_counter[i] = new->perf_counter[i]; 3814 else 3815 old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i]; 3816 } 3817 3818 for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { 3819 if (ppmt->format == FORMAT_RAW) 3820 old->pmt_counter[i] = new->pmt_counter[i]; 3821 else 3822 old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i]; 3823 } 3824 } 3825 3826 int soft_c1_residency_display(int bic) 3827 { 3828 if (!DO_BIC(BIC_CPU_c1) || platform->has_msr_core_c1_res) 3829 return 0; 3830 3831 return DO_BIC_READ(bic); 3832 } 3833 3834 /* 3835 * old = new - old 3836 */ 3837 int delta_thread(struct thread_data *new, struct thread_data *old, struct core_data *core_delta) 3838 { 3839 int i; 3840 struct msr_counter *mp; 3841 struct perf_counter_info *pp; 3842 struct pmt_counter *ppmt; 3843 3844 /* we run cpuid just the 1st time, copy the results */ 3845 if (DO_BIC(BIC_APIC)) 3846 new->apic_id = old->apic_id; 3847 if (DO_BIC(BIC_X2APIC)) 3848 new->x2apic_id = old->x2apic_id; 3849 3850 /* 3851 * the timestamps from start of measurement interval are in "old" 3852 * the timestamp from end of measurement interval are in "new" 3853 * over-write old w/ new so we can print end of interval values 3854 */ 3855 3856 timersub(&new->tv_begin, &old->tv_begin, &old->tv_delta); 3857 old->tv_begin = new->tv_begin; 3858 old->tv_end = new->tv_end; 3859 3860 old->tsc = new->tsc - old->tsc; 3861 3862 /* check for TSC < 1 Mcycles over interval */ 3863 if (old->tsc < (1000 * 1000)) 3864 errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n" 3865 "You can disable all c-states by booting with \"idle=poll\"\n" 3866 "or just the deep ones with \"processor.max_cstate=1\""); 3867 3868 old->c1 = new->c1 - old->c1; 3869 3870 if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC) 3871 || soft_c1_residency_display(BIC_Avg_MHz)) { 3872 if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { 3873 old->aperf = new->aperf - old->aperf; 3874 old->mperf = new->mperf - old->mperf; 3875 } else { 3876 return -1; 3877 } 3878 } 3879 3880 if (platform->has_msr_core_c1_res) { 3881 /* 3882 * Some models have a dedicated C1 residency MSR, 3883 * which should be more accurate than the derivation below. 3884 */ 3885 } else { 3886 /* 3887 * As counter collection is not atomic, 3888 * it is possible for mperf's non-halted cycles + idle states 3889 * to exceed TSC's all cycles: show c1 = 0% in that case. 3890 */ 3891 if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak)) 3892 old->c1 = 0; 3893 else { 3894 /* normal case, derive c1 */ 3895 old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3 3896 - core_delta->c6 - core_delta->c7; 3897 } 3898 } 3899 3900 if (old->mperf == 0) { 3901 if (debug > 1) 3902 fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id); 3903 old->mperf = 1; /* divide by 0 protection */ 3904 } 3905 3906 if (DO_BIC(BIC_IPC)) 3907 old->instr_count = new->instr_count - old->instr_count; 3908 3909 if (DO_BIC(BIC_IRQ)) 3910 old->irq_count = new->irq_count - old->irq_count; 3911 3912 if (DO_BIC(BIC_NMI)) 3913 old->nmi_count = new->nmi_count - old->nmi_count; 3914 3915 if (DO_BIC(BIC_SMI)) 3916 old->smi_count = new->smi_count - old->smi_count; 3917 3918 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 3919 if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) 3920 old->counter[i] = new->counter[i]; 3921 else 3922 old->counter[i] = new->counter[i] - old->counter[i]; 3923 } 3924 3925 for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) { 3926 if (pp->format == FORMAT_RAW) 3927 old->perf_counter[i] = new->perf_counter[i]; 3928 else 3929 old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i]; 3930 } 3931 3932 for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { 3933 if (ppmt->format == FORMAT_RAW) 3934 old->pmt_counter[i] = new->pmt_counter[i]; 3935 else 3936 old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i]; 3937 } 3938 3939 return 0; 3940 } 3941 3942 int delta_cpu(struct thread_data *t, struct core_data *c, 3943 struct pkg_data *p, struct thread_data *t2, struct core_data *c2, struct pkg_data *p2) 3944 { 3945 int retval = 0; 3946 3947 /* calculate core delta only for 1st thread in core */ 3948 if (is_cpu_first_thread_in_core(t, c, p)) 3949 delta_core(c, c2); 3950 3951 /* always calculate thread delta */ 3952 retval = delta_thread(t, t2, c2); /* c2 is core delta */ 3953 3954 /* calculate package delta only for 1st core in package */ 3955 if (is_cpu_first_core_in_package(t, c, p)) 3956 retval |= delta_package(p, p2); 3957 3958 return retval; 3959 } 3960 3961 void delta_platform(struct platform_counters *new, struct platform_counters *old) 3962 { 3963 old->energy_psys.raw_value = new->energy_psys.raw_value - old->energy_psys.raw_value; 3964 } 3965 3966 void rapl_counter_clear(struct rapl_counter *c) 3967 { 3968 c->raw_value = 0; 3969 c->scale = 0.0; 3970 c->unit = RAPL_UNIT_INVALID; 3971 } 3972 3973 void clear_counters(PER_THREAD_PARAMS) 3974 { 3975 int i; 3976 struct msr_counter *mp; 3977 3978 t->tv_begin.tv_sec = 0; 3979 t->tv_begin.tv_usec = 0; 3980 t->tv_end.tv_sec = 0; 3981 t->tv_end.tv_usec = 0; 3982 t->tv_delta.tv_sec = 0; 3983 t->tv_delta.tv_usec = 0; 3984 3985 t->tsc = 0; 3986 t->aperf = 0; 3987 t->mperf = 0; 3988 t->c1 = 0; 3989 3990 t->instr_count = 0; 3991 3992 t->irq_count = 0; 3993 t->nmi_count = 0; 3994 t->smi_count = 0; 3995 3996 c->c3 = 0; 3997 c->c6 = 0; 3998 c->c7 = 0; 3999 c->mc6_us = 0; 4000 c->core_temp_c = 0; 4001 rapl_counter_clear(&c->core_energy); 4002 c->core_throt_cnt = 0; 4003 4004 p->pkg_wtd_core_c0 = 0; 4005 p->pkg_any_core_c0 = 0; 4006 p->pkg_any_gfxe_c0 = 0; 4007 p->pkg_both_core_gfxe_c0 = 0; 4008 4009 p->pc2 = 0; 4010 if (DO_BIC(BIC_Pkgpc3)) 4011 p->pc3 = 0; 4012 if (DO_BIC(BIC_Pkgpc6)) 4013 p->pc6 = 0; 4014 if (DO_BIC(BIC_Pkgpc7)) 4015 p->pc7 = 0; 4016 p->pc8 = 0; 4017 p->pc9 = 0; 4018 p->pc10 = 0; 4019 p->die_c6 = 0; 4020 p->cpu_lpi = 0; 4021 p->sys_lpi = 0; 4022 4023 rapl_counter_clear(&p->energy_pkg); 4024 rapl_counter_clear(&p->energy_dram); 4025 rapl_counter_clear(&p->energy_cores); 4026 rapl_counter_clear(&p->energy_gfx); 4027 rapl_counter_clear(&p->rapl_pkg_perf_status); 4028 rapl_counter_clear(&p->rapl_dram_perf_status); 4029 p->pkg_temp_c = 0; 4030 4031 p->gfx_rc6_ms = 0; 4032 p->uncore_mhz = 0; 4033 p->gfx_mhz = 0; 4034 p->gfx_act_mhz = 0; 4035 p->sam_mc6_ms = 0; 4036 p->sam_mhz = 0; 4037 p->sam_act_mhz = 0; 4038 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) 4039 t->counter[i] = 0; 4040 4041 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) 4042 c->counter[i] = 0; 4043 4044 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) 4045 p->counter[i] = 0; 4046 4047 memset(&t->perf_counter[0], 0, sizeof(t->perf_counter)); 4048 memset(&c->perf_counter[0], 0, sizeof(c->perf_counter)); 4049 memset(&p->perf_counter[0], 0, sizeof(p->perf_counter)); 4050 4051 memset(&t->pmt_counter[0], 0, ARRAY_SIZE(t->pmt_counter)); 4052 memset(&c->pmt_counter[0], 0, ARRAY_SIZE(c->pmt_counter)); 4053 memset(&p->pmt_counter[0], 0, ARRAY_SIZE(p->pmt_counter)); 4054 } 4055 4056 void rapl_counter_accumulate(struct rapl_counter *dst, const struct rapl_counter *src) 4057 { 4058 /* Copy unit and scale from src if dst is not initialized */ 4059 if (dst->unit == RAPL_UNIT_INVALID) { 4060 dst->unit = src->unit; 4061 dst->scale = src->scale; 4062 } 4063 4064 assert(dst->unit == src->unit); 4065 assert(dst->scale == src->scale); 4066 4067 dst->raw_value += src->raw_value; 4068 } 4069 4070 int sum_counters(PER_THREAD_PARAMS) 4071 { 4072 int i; 4073 struct msr_counter *mp; 4074 struct perf_counter_info *pp; 4075 struct pmt_counter *ppmt; 4076 4077 /* copy un-changing apic_id's */ 4078 if (DO_BIC(BIC_APIC)) 4079 average.threads.apic_id = t->apic_id; 4080 if (DO_BIC(BIC_X2APIC)) 4081 average.threads.x2apic_id = t->x2apic_id; 4082 4083 /* remember first tv_begin */ 4084 if (average.threads.tv_begin.tv_sec == 0) 4085 average.threads.tv_begin = procsysfs_tv_begin; 4086 4087 /* remember last tv_end */ 4088 average.threads.tv_end = t->tv_end; 4089 4090 average.threads.tsc += t->tsc; 4091 average.threads.aperf += t->aperf; 4092 average.threads.mperf += t->mperf; 4093 average.threads.c1 += t->c1; 4094 4095 average.threads.instr_count += t->instr_count; 4096 4097 average.threads.irq_count += t->irq_count; 4098 average.threads.nmi_count += t->nmi_count; 4099 average.threads.smi_count += t->smi_count; 4100 4101 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 4102 if (mp->format == FORMAT_RAW) 4103 continue; 4104 average.threads.counter[i] += t->counter[i]; 4105 } 4106 4107 for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) { 4108 if (pp->format == FORMAT_RAW) 4109 continue; 4110 average.threads.perf_counter[i] += t->perf_counter[i]; 4111 } 4112 4113 for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { 4114 average.threads.pmt_counter[i] += t->pmt_counter[i]; 4115 } 4116 4117 /* sum per-core values only for 1st thread in core */ 4118 if (!is_cpu_first_thread_in_core(t, c, p)) 4119 return 0; 4120 4121 average.cores.c3 += c->c3; 4122 average.cores.c6 += c->c6; 4123 average.cores.c7 += c->c7; 4124 average.cores.mc6_us += c->mc6_us; 4125 4126 average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c); 4127 average.cores.core_throt_cnt = MAX(average.cores.core_throt_cnt, c->core_throt_cnt); 4128 4129 rapl_counter_accumulate(&average.cores.core_energy, &c->core_energy); 4130 4131 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 4132 if (mp->format == FORMAT_RAW) 4133 continue; 4134 average.cores.counter[i] += c->counter[i]; 4135 } 4136 4137 for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { 4138 if (pp->format == FORMAT_RAW) 4139 continue; 4140 average.cores.perf_counter[i] += c->perf_counter[i]; 4141 } 4142 4143 for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { 4144 average.cores.pmt_counter[i] += c->pmt_counter[i]; 4145 } 4146 4147 /* sum per-pkg values only for 1st core in pkg */ 4148 if (!is_cpu_first_core_in_package(t, c, p)) 4149 return 0; 4150 4151 if (DO_BIC(BIC_Totl_c0)) 4152 average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0; 4153 if (DO_BIC(BIC_Any_c0)) 4154 average.packages.pkg_any_core_c0 += p->pkg_any_core_c0; 4155 if (DO_BIC(BIC_GFX_c0)) 4156 average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0; 4157 if (DO_BIC(BIC_CPUGFX)) 4158 average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0; 4159 4160 average.packages.pc2 += p->pc2; 4161 if (DO_BIC(BIC_Pkgpc3)) 4162 average.packages.pc3 += p->pc3; 4163 if (DO_BIC(BIC_Pkgpc6)) 4164 average.packages.pc6 += p->pc6; 4165 if (DO_BIC(BIC_Pkgpc7)) 4166 average.packages.pc7 += p->pc7; 4167 average.packages.pc8 += p->pc8; 4168 average.packages.pc9 += p->pc9; 4169 average.packages.pc10 += p->pc10; 4170 average.packages.die_c6 += p->die_c6; 4171 4172 average.packages.cpu_lpi = p->cpu_lpi; 4173 average.packages.sys_lpi = p->sys_lpi; 4174 4175 rapl_counter_accumulate(&average.packages.energy_pkg, &p->energy_pkg); 4176 rapl_counter_accumulate(&average.packages.energy_dram, &p->energy_dram); 4177 rapl_counter_accumulate(&average.packages.energy_cores, &p->energy_cores); 4178 rapl_counter_accumulate(&average.packages.energy_gfx, &p->energy_gfx); 4179 4180 average.packages.gfx_rc6_ms = p->gfx_rc6_ms; 4181 average.packages.uncore_mhz = p->uncore_mhz; 4182 average.packages.gfx_mhz = p->gfx_mhz; 4183 average.packages.gfx_act_mhz = p->gfx_act_mhz; 4184 average.packages.sam_mc6_ms = p->sam_mc6_ms; 4185 average.packages.sam_mhz = p->sam_mhz; 4186 average.packages.sam_act_mhz = p->sam_act_mhz; 4187 4188 average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c); 4189 4190 rapl_counter_accumulate(&average.packages.rapl_pkg_perf_status, &p->rapl_pkg_perf_status); 4191 rapl_counter_accumulate(&average.packages.rapl_dram_perf_status, &p->rapl_dram_perf_status); 4192 4193 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 4194 if ((mp->format == FORMAT_RAW) && (topo.num_packages == 0)) 4195 average.packages.counter[i] = p->counter[i]; 4196 else 4197 average.packages.counter[i] += p->counter[i]; 4198 } 4199 4200 for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { 4201 if ((pp->format == FORMAT_RAW) && (topo.num_packages == 0)) 4202 average.packages.perf_counter[i] = p->perf_counter[i]; 4203 else 4204 average.packages.perf_counter[i] += p->perf_counter[i]; 4205 } 4206 4207 for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { 4208 average.packages.pmt_counter[i] += p->pmt_counter[i]; 4209 } 4210 4211 return 0; 4212 } 4213 4214 /* 4215 * sum the counters for all cpus in the system 4216 * compute the weighted average 4217 */ 4218 void compute_average(PER_THREAD_PARAMS) 4219 { 4220 int i; 4221 struct msr_counter *mp; 4222 struct perf_counter_info *pp; 4223 struct pmt_counter *ppmt; 4224 4225 clear_counters(&average.threads, &average.cores, &average.packages); 4226 4227 for_all_cpus(sum_counters, t, c, p); 4228 4229 /* Use the global time delta for the average. */ 4230 average.threads.tv_delta = tv_delta; 4231 4232 average.threads.tsc /= topo.allowed_cpus; 4233 average.threads.aperf /= topo.allowed_cpus; 4234 average.threads.mperf /= topo.allowed_cpus; 4235 average.threads.instr_count /= topo.allowed_cpus; 4236 average.threads.c1 /= topo.allowed_cpus; 4237 4238 if (average.threads.irq_count > 9999999) 4239 sums_need_wide_columns = 1; 4240 if (average.threads.nmi_count > 9999999) 4241 sums_need_wide_columns = 1; 4242 4243 average.cores.c3 /= topo.allowed_cores; 4244 average.cores.c6 /= topo.allowed_cores; 4245 average.cores.c7 /= topo.allowed_cores; 4246 average.cores.mc6_us /= topo.allowed_cores; 4247 4248 if (DO_BIC(BIC_Totl_c0)) 4249 average.packages.pkg_wtd_core_c0 /= topo.allowed_packages; 4250 if (DO_BIC(BIC_Any_c0)) 4251 average.packages.pkg_any_core_c0 /= topo.allowed_packages; 4252 if (DO_BIC(BIC_GFX_c0)) 4253 average.packages.pkg_any_gfxe_c0 /= topo.allowed_packages; 4254 if (DO_BIC(BIC_CPUGFX)) 4255 average.packages.pkg_both_core_gfxe_c0 /= topo.allowed_packages; 4256 4257 average.packages.pc2 /= topo.allowed_packages; 4258 if (DO_BIC(BIC_Pkgpc3)) 4259 average.packages.pc3 /= topo.allowed_packages; 4260 if (DO_BIC(BIC_Pkgpc6)) 4261 average.packages.pc6 /= topo.allowed_packages; 4262 if (DO_BIC(BIC_Pkgpc7)) 4263 average.packages.pc7 /= topo.allowed_packages; 4264 4265 average.packages.pc8 /= topo.allowed_packages; 4266 average.packages.pc9 /= topo.allowed_packages; 4267 average.packages.pc10 /= topo.allowed_packages; 4268 average.packages.die_c6 /= topo.allowed_packages; 4269 4270 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 4271 if (mp->format == FORMAT_RAW) 4272 continue; 4273 if (mp->type == COUNTER_ITEMS) { 4274 if (average.threads.counter[i] > 9999999) 4275 sums_need_wide_columns = 1; 4276 continue; 4277 } 4278 average.threads.counter[i] /= topo.allowed_cpus; 4279 } 4280 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 4281 if (mp->format == FORMAT_RAW) 4282 continue; 4283 if (mp->type == COUNTER_ITEMS) { 4284 if (average.cores.counter[i] > 9999999) 4285 sums_need_wide_columns = 1; 4286 } 4287 average.cores.counter[i] /= topo.allowed_cores; 4288 } 4289 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 4290 if (mp->format == FORMAT_RAW) 4291 continue; 4292 if (mp->type == COUNTER_ITEMS) { 4293 if (average.packages.counter[i] > 9999999) 4294 sums_need_wide_columns = 1; 4295 } 4296 average.packages.counter[i] /= topo.allowed_packages; 4297 } 4298 4299 for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) { 4300 if (pp->format == FORMAT_RAW) 4301 continue; 4302 if (pp->type == COUNTER_ITEMS) { 4303 if (average.threads.perf_counter[i] > 9999999) 4304 sums_need_wide_columns = 1; 4305 continue; 4306 } 4307 average.threads.perf_counter[i] /= topo.allowed_cpus; 4308 } 4309 for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { 4310 if (pp->format == FORMAT_RAW) 4311 continue; 4312 if (pp->type == COUNTER_ITEMS) { 4313 if (average.cores.perf_counter[i] > 9999999) 4314 sums_need_wide_columns = 1; 4315 } 4316 average.cores.perf_counter[i] /= topo.allowed_cores; 4317 } 4318 for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { 4319 if (pp->format == FORMAT_RAW) 4320 continue; 4321 if (pp->type == COUNTER_ITEMS) { 4322 if (average.packages.perf_counter[i] > 9999999) 4323 sums_need_wide_columns = 1; 4324 } 4325 average.packages.perf_counter[i] /= topo.allowed_packages; 4326 } 4327 4328 for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { 4329 average.threads.pmt_counter[i] /= topo.allowed_cpus; 4330 } 4331 for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { 4332 average.cores.pmt_counter[i] /= topo.allowed_cores; 4333 } 4334 for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { 4335 average.packages.pmt_counter[i] /= topo.allowed_packages; 4336 } 4337 } 4338 4339 static unsigned long long rdtsc(void) 4340 { 4341 unsigned int low, high; 4342 4343 asm volatile ("rdtsc":"=a" (low), "=d"(high)); 4344 4345 return low | ((unsigned long long)high) << 32; 4346 } 4347 4348 /* 4349 * Open a file, and exit on failure 4350 */ 4351 FILE *fopen_or_die(const char *path, const char *mode) 4352 { 4353 FILE *filep = fopen(path, mode); 4354 4355 if (!filep) 4356 err(1, "%s: open failed", path); 4357 return filep; 4358 } 4359 4360 /* 4361 * snapshot_sysfs_counter() 4362 * 4363 * return snapshot of given counter 4364 */ 4365 unsigned long long snapshot_sysfs_counter(char *path) 4366 { 4367 FILE *fp; 4368 int retval; 4369 unsigned long long counter; 4370 4371 fp = fopen_or_die(path, "r"); 4372 4373 retval = fscanf(fp, "%lld", &counter); 4374 if (retval != 1) 4375 err(1, "snapshot_sysfs_counter(%s)", path); 4376 4377 fclose(fp); 4378 4379 return counter; 4380 } 4381 4382 int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp, char *counter_path) 4383 { 4384 if (mp->msr_num != 0) { 4385 assert(!no_msr); 4386 if (get_msr(cpu, mp->msr_num, counterp)) 4387 return -1; 4388 } else { 4389 char path[128 + PATH_BYTES]; 4390 4391 if (mp->flags & SYSFS_PERCPU) { 4392 sprintf(path, "/sys/devices/system/cpu/cpu%d/%s", cpu, mp->sp->path); 4393 4394 *counterp = snapshot_sysfs_counter(path); 4395 } else { 4396 *counterp = snapshot_sysfs_counter(counter_path); 4397 } 4398 } 4399 4400 return 0; 4401 } 4402 4403 unsigned long long get_legacy_uncore_mhz(int package) 4404 { 4405 char path[128]; 4406 int die; 4407 static int warn_once; 4408 4409 /* 4410 * for this package, use the first die_id that exists 4411 */ 4412 for (die = 0; die <= topo.max_die_id; ++die) { 4413 4414 sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d/current_freq_khz", 4415 package, die); 4416 4417 if (access(path, R_OK) == 0) 4418 return (snapshot_sysfs_counter(path) / 1000); 4419 } 4420 if (!warn_once) { 4421 warnx("BUG: %s: No %s", __func__, path); 4422 warn_once = 1; 4423 } 4424 4425 return 0; 4426 } 4427 4428 int get_epb(int cpu) 4429 { 4430 char path[128 + PATH_BYTES]; 4431 unsigned long long msr; 4432 int ret, epb = -1; 4433 FILE *fp; 4434 4435 sprintf(path, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", cpu); 4436 4437 fp = fopen(path, "r"); 4438 if (!fp) 4439 goto msr_fallback; 4440 4441 ret = fscanf(fp, "%d", &epb); 4442 if (ret != 1) 4443 err(1, "%s(%s)", __func__, path); 4444 4445 fclose(fp); 4446 4447 return epb; 4448 4449 msr_fallback: 4450 if (no_msr) 4451 return -1; 4452 4453 get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr); 4454 4455 return msr & 0xf; 4456 } 4457 4458 void get_apic_id(struct thread_data *t) 4459 { 4460 unsigned int eax, ebx, ecx, edx; 4461 4462 if (DO_BIC(BIC_APIC)) { 4463 eax = ebx = ecx = edx = 0; 4464 __cpuid(1, eax, ebx, ecx, edx); 4465 4466 t->apic_id = (ebx >> 24) & 0xff; 4467 } 4468 4469 if (!DO_BIC(BIC_X2APIC)) 4470 return; 4471 4472 if (authentic_amd || hygon_genuine) { 4473 unsigned int topology_extensions; 4474 4475 if (max_extended_level < 0x8000001e) 4476 return; 4477 4478 eax = ebx = ecx = edx = 0; 4479 __cpuid(0x80000001, eax, ebx, ecx, edx); 4480 topology_extensions = ecx & (1 << 22); 4481 4482 if (topology_extensions == 0) 4483 return; 4484 4485 eax = ebx = ecx = edx = 0; 4486 __cpuid(0x8000001e, eax, ebx, ecx, edx); 4487 4488 t->x2apic_id = eax; 4489 return; 4490 } 4491 4492 if (!genuine_intel) 4493 return; 4494 4495 if (max_level < 0xb) 4496 return; 4497 4498 ecx = 0; 4499 __cpuid(0xb, eax, ebx, ecx, edx); 4500 t->x2apic_id = edx; 4501 4502 if (debug && (t->apic_id != (t->x2apic_id & 0xff))) 4503 fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id); 4504 } 4505 4506 int get_core_throt_cnt(int cpu, unsigned long long *cnt) 4507 { 4508 char path[128 + PATH_BYTES]; 4509 unsigned long long tmp; 4510 FILE *fp; 4511 int ret; 4512 4513 sprintf(path, "/sys/devices/system/cpu/cpu%d/thermal_throttle/core_throttle_count", cpu); 4514 fp = fopen(path, "r"); 4515 if (!fp) 4516 return -1; 4517 ret = fscanf(fp, "%lld", &tmp); 4518 fclose(fp); 4519 if (ret != 1) 4520 return -1; 4521 *cnt = tmp; 4522 4523 return 0; 4524 } 4525 4526 struct amperf_group_fd { 4527 int aperf; /* Also the group descriptor */ 4528 int mperf; 4529 }; 4530 4531 static int read_perf_counter_info(const char *const path, const char *const parse_format, void *value_ptr) 4532 { 4533 int fdmt; 4534 int bytes_read; 4535 char buf[64]; 4536 int ret = -1; 4537 4538 fdmt = open(path, O_RDONLY, 0); 4539 if (fdmt == -1) { 4540 if (debug) 4541 fprintf(stderr, "Failed to parse perf counter info %s\n", path); 4542 ret = -1; 4543 goto cleanup_and_exit; 4544 } 4545 4546 bytes_read = read(fdmt, buf, sizeof(buf) - 1); 4547 if (bytes_read <= 0 || bytes_read >= (int)sizeof(buf)) { 4548 if (debug) 4549 fprintf(stderr, "Failed to parse perf counter info %s\n", path); 4550 ret = -1; 4551 goto cleanup_and_exit; 4552 } 4553 4554 buf[bytes_read] = '\0'; 4555 4556 if (sscanf(buf, parse_format, value_ptr) != 1) { 4557 if (debug) 4558 fprintf(stderr, "Failed to parse perf counter info %s\n", path); 4559 ret = -1; 4560 goto cleanup_and_exit; 4561 } 4562 4563 ret = 0; 4564 4565 cleanup_and_exit: 4566 close(fdmt); 4567 return ret; 4568 } 4569 4570 static unsigned int read_perf_counter_info_n(const char *const path, const char *const parse_format) 4571 { 4572 unsigned int v; 4573 int status; 4574 4575 status = read_perf_counter_info(path, parse_format, &v); 4576 if (status) 4577 v = -1; 4578 4579 return v; 4580 } 4581 4582 static unsigned int read_perf_type(const char *subsys) 4583 { 4584 const char *const path_format = "/sys/bus/event_source/devices/%s/type"; 4585 const char *const format = "%u"; 4586 char path[128]; 4587 4588 snprintf(path, sizeof(path), path_format, subsys); 4589 4590 return read_perf_counter_info_n(path, format); 4591 } 4592 4593 static unsigned int read_perf_config(const char *subsys, const char *event_name) 4594 { 4595 const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s"; 4596 FILE *fconfig = NULL; 4597 char path[128]; 4598 char config_str[64]; 4599 unsigned int config; 4600 unsigned int umask; 4601 bool has_config = false; 4602 bool has_umask = false; 4603 unsigned int ret = -1; 4604 4605 snprintf(path, sizeof(path), path_format, subsys, event_name); 4606 4607 fconfig = fopen(path, "r"); 4608 if (!fconfig) 4609 return -1; 4610 4611 if (fgets(config_str, ARRAY_SIZE(config_str), fconfig) != config_str) 4612 goto cleanup_and_exit; 4613 4614 for (char *pconfig_str = &config_str[0]; pconfig_str;) { 4615 if (sscanf(pconfig_str, "event=%x", &config) == 1) { 4616 has_config = true; 4617 goto next; 4618 } 4619 4620 if (sscanf(pconfig_str, "umask=%x", &umask) == 1) { 4621 has_umask = true; 4622 goto next; 4623 } 4624 4625 next: 4626 pconfig_str = strchr(pconfig_str, ','); 4627 if (pconfig_str) { 4628 *pconfig_str = '\0'; 4629 ++pconfig_str; 4630 } 4631 } 4632 4633 if (!has_umask) 4634 umask = 0; 4635 4636 if (has_config) 4637 ret = (umask << 8) | config; 4638 4639 cleanup_and_exit: 4640 fclose(fconfig); 4641 return ret; 4642 } 4643 4644 static unsigned int read_perf_rapl_unit(const char *subsys, const char *event_name) 4645 { 4646 const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.unit"; 4647 const char *const format = "%s"; 4648 char path[128]; 4649 char unit_buffer[16]; 4650 4651 snprintf(path, sizeof(path), path_format, subsys, event_name); 4652 4653 read_perf_counter_info(path, format, &unit_buffer); 4654 if (strcmp("Joules", unit_buffer) == 0) 4655 return RAPL_UNIT_JOULES; 4656 4657 return RAPL_UNIT_INVALID; 4658 } 4659 4660 static double read_perf_scale(const char *subsys, const char *event_name) 4661 { 4662 const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.scale"; 4663 const char *const format = "%lf"; 4664 char path[128]; 4665 double scale; 4666 4667 snprintf(path, sizeof(path), path_format, subsys, event_name); 4668 4669 if (read_perf_counter_info(path, format, &scale)) 4670 return 0.0; 4671 4672 return scale; 4673 } 4674 4675 size_t rapl_counter_info_count_perf(const struct rapl_counter_info_t *rci) 4676 { 4677 size_t ret = 0; 4678 4679 for (int i = 0; i < NUM_RAPL_COUNTERS; ++i) 4680 if (rci->source[i] == COUNTER_SOURCE_PERF) 4681 ++ret; 4682 4683 return ret; 4684 } 4685 4686 static size_t cstate_counter_info_count_perf(const struct cstate_counter_info_t *cci) 4687 { 4688 size_t ret = 0; 4689 4690 for (int i = 0; i < NUM_CSTATE_COUNTERS; ++i) 4691 if (cci->source[i] == COUNTER_SOURCE_PERF) 4692 ++ret; 4693 4694 return ret; 4695 } 4696 4697 void write_rapl_counter(struct rapl_counter *rc, struct rapl_counter_info_t *rci, unsigned int idx) 4698 { 4699 if (rci->source[idx] == COUNTER_SOURCE_NONE) 4700 return; 4701 4702 rc->raw_value = rci->data[idx]; 4703 rc->unit = rci->unit[idx]; 4704 rc->scale = rci->scale[idx]; 4705 } 4706 4707 int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct pkg_data *p) 4708 { 4709 struct platform_counters *pplat_cnt = p == package_odd ? &platform_counters_odd : &platform_counters_even; 4710 unsigned long long perf_data[NUM_RAPL_COUNTERS + 1]; 4711 struct rapl_counter_info_t *rci; 4712 4713 if (debug >= 2) 4714 fprintf(stderr, "%s: cpu%d domain%d\n", __func__, cpu, domain); 4715 4716 assert(rapl_counter_info_perdomain); 4717 assert(domain < rapl_counter_info_perdomain_size); 4718 4719 rci = &rapl_counter_info_perdomain[domain]; 4720 4721 /* 4722 * If we have any perf counters to read, read them all now, in bulk 4723 */ 4724 if (rci->fd_perf != -1) { 4725 size_t num_perf_counters = rapl_counter_info_count_perf(rci); 4726 const ssize_t expected_read_size = (num_perf_counters + 1) * sizeof(unsigned long long); 4727 const ssize_t actual_read_size = read(rci->fd_perf, &perf_data[0], sizeof(perf_data)); 4728 4729 if (actual_read_size != expected_read_size) 4730 err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, 4731 actual_read_size); 4732 } 4733 4734 for (unsigned int i = 0, pi = 1; i < NUM_RAPL_COUNTERS; ++i) { 4735 switch (rci->source[i]) { 4736 case COUNTER_SOURCE_NONE: 4737 rci->data[i] = 0; 4738 break; 4739 4740 case COUNTER_SOURCE_PERF: 4741 assert(pi < ARRAY_SIZE(perf_data)); 4742 assert(rci->fd_perf != -1); 4743 4744 if (debug >= 2) 4745 fprintf(stderr, "Reading rapl counter via perf at %u (%llu %e %lf)\n", 4746 i, perf_data[pi], rci->scale[i], perf_data[pi] * rci->scale[i]); 4747 4748 rci->data[i] = perf_data[pi]; 4749 4750 ++pi; 4751 break; 4752 4753 case COUNTER_SOURCE_MSR: 4754 if (debug >= 2) 4755 fprintf(stderr, "Reading rapl counter via msr at %u\n", i); 4756 4757 assert(!no_msr); 4758 if (rci->flags[i] & RAPL_COUNTER_FLAG_USE_MSR_SUM) { 4759 if (get_msr_sum(cpu, rci->msr[i], &rci->data[i])) 4760 return -13 - i; 4761 } else { 4762 if (get_msr(cpu, rci->msr[i], &rci->data[i])) 4763 return -13 - i; 4764 } 4765 4766 rci->data[i] &= rci->msr_mask[i]; 4767 if (rci->msr_shift[i] >= 0) 4768 rci->data[i] >>= abs(rci->msr_shift[i]); 4769 else 4770 rci->data[i] <<= abs(rci->msr_shift[i]); 4771 4772 break; 4773 } 4774 } 4775 4776 BUILD_BUG_ON(NUM_RAPL_COUNTERS != 8); 4777 write_rapl_counter(&p->energy_pkg, rci, RAPL_RCI_INDEX_ENERGY_PKG); 4778 write_rapl_counter(&p->energy_cores, rci, RAPL_RCI_INDEX_ENERGY_CORES); 4779 write_rapl_counter(&p->energy_dram, rci, RAPL_RCI_INDEX_DRAM); 4780 write_rapl_counter(&p->energy_gfx, rci, RAPL_RCI_INDEX_GFX); 4781 write_rapl_counter(&p->rapl_pkg_perf_status, rci, RAPL_RCI_INDEX_PKG_PERF_STATUS); 4782 write_rapl_counter(&p->rapl_dram_perf_status, rci, RAPL_RCI_INDEX_DRAM_PERF_STATUS); 4783 write_rapl_counter(&c->core_energy, rci, RAPL_RCI_INDEX_CORE_ENERGY); 4784 write_rapl_counter(&pplat_cnt->energy_psys, rci, RAPL_RCI_INDEX_ENERGY_PLATFORM); 4785 4786 return 0; 4787 } 4788 4789 char *find_sysfs_path_by_id(struct sysfs_path *sp, int id) 4790 { 4791 while (sp) { 4792 if (sp->id == id) 4793 return (sp->path); 4794 sp = sp->next; 4795 } 4796 if (debug) 4797 warnx("%s: id%d not found", __func__, id); 4798 return NULL; 4799 } 4800 4801 int get_cstate_counters(unsigned int cpu, PER_THREAD_PARAMS) 4802 { 4803 /* 4804 * Overcommit memory a little bit here, 4805 * but skip calculating exact sizes for the buffers. 4806 */ 4807 unsigned long long perf_data[NUM_CSTATE_COUNTERS]; 4808 unsigned long long perf_data_core[NUM_CSTATE_COUNTERS + 1]; 4809 unsigned long long perf_data_pkg[NUM_CSTATE_COUNTERS + 1]; 4810 4811 struct cstate_counter_info_t *cci; 4812 4813 if (debug >= 2) 4814 fprintf(stderr, "%s: cpu%d\n", __func__, cpu); 4815 4816 assert(ccstate_counter_info); 4817 assert(cpu <= ccstate_counter_info_size); 4818 4819 ZERO_ARRAY(perf_data); 4820 ZERO_ARRAY(perf_data_core); 4821 ZERO_ARRAY(perf_data_pkg); 4822 4823 cci = &ccstate_counter_info[cpu]; 4824 4825 /* 4826 * If we have any perf counters to read, read them all now, in bulk 4827 */ 4828 const size_t num_perf_counters = cstate_counter_info_count_perf(cci); 4829 ssize_t expected_read_size = num_perf_counters * sizeof(unsigned long long); 4830 ssize_t actual_read_size_core = 0, actual_read_size_pkg = 0; 4831 4832 if (cci->fd_perf_core != -1) { 4833 /* Each descriptor read begins with number of counters read. */ 4834 expected_read_size += sizeof(unsigned long long); 4835 4836 actual_read_size_core = read(cci->fd_perf_core, &perf_data_core[0], sizeof(perf_data_core)); 4837 4838 if (actual_read_size_core <= 0) 4839 err(-1, "%s: read perf %s: %ld", __func__, "core", actual_read_size_core); 4840 } 4841 4842 if (cci->fd_perf_pkg != -1) { 4843 /* Each descriptor read begins with number of counters read. */ 4844 expected_read_size += sizeof(unsigned long long); 4845 4846 actual_read_size_pkg = read(cci->fd_perf_pkg, &perf_data_pkg[0], sizeof(perf_data_pkg)); 4847 4848 if (actual_read_size_pkg <= 0) 4849 err(-1, "%s: read perf %s: %ld", __func__, "pkg", actual_read_size_pkg); 4850 } 4851 4852 const ssize_t actual_read_size_total = actual_read_size_core + actual_read_size_pkg; 4853 4854 if (actual_read_size_total != expected_read_size) 4855 err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, actual_read_size_total); 4856 4857 /* 4858 * Copy ccstate and pcstate data into unified buffer. 4859 * 4860 * Skip first element from core and pkg buffers. 4861 * Kernel puts there how many counters were read. 4862 */ 4863 const size_t num_core_counters = perf_data_core[0]; 4864 const size_t num_pkg_counters = perf_data_pkg[0]; 4865 4866 assert(num_perf_counters == num_core_counters + num_pkg_counters); 4867 4868 /* Copy ccstate perf data */ 4869 memcpy(&perf_data[0], &perf_data_core[1], num_core_counters * sizeof(unsigned long long)); 4870 4871 /* Copy pcstate perf data */ 4872 memcpy(&perf_data[num_core_counters], &perf_data_pkg[1], num_pkg_counters * sizeof(unsigned long long)); 4873 4874 for (unsigned int i = 0, pi = 0; i < NUM_CSTATE_COUNTERS; ++i) { 4875 switch (cci->source[i]) { 4876 case COUNTER_SOURCE_NONE: 4877 break; 4878 4879 case COUNTER_SOURCE_PERF: 4880 assert(pi < ARRAY_SIZE(perf_data)); 4881 assert(cci->fd_perf_core != -1 || cci->fd_perf_pkg != -1); 4882 4883 if (debug >= 2) 4884 fprintf(stderr, "cstate via %s %u: %llu\n", "perf", i, perf_data[pi]); 4885 4886 cci->data[i] = perf_data[pi]; 4887 4888 ++pi; 4889 break; 4890 4891 case COUNTER_SOURCE_MSR: 4892 assert(!no_msr); 4893 if (get_msr(cpu, cci->msr[i], &cci->data[i])) 4894 return -13 - i; 4895 4896 if (debug >= 2) 4897 fprintf(stderr, "cstate via %s0x%llx %u: %llu\n", "msr", cci->msr[i], i, cci->data[i]); 4898 4899 break; 4900 } 4901 } 4902 4903 /* 4904 * Helper to write the data only if the source of 4905 * the counter for the current cpu is not none. 4906 * 4907 * Otherwise we would overwrite core data with 0 (default value), 4908 * when invoked for the thread sibling. 4909 */ 4910 #define PERF_COUNTER_WRITE_DATA(out_counter, index) do { \ 4911 if (cci->source[index] != COUNTER_SOURCE_NONE) \ 4912 out_counter = cci->data[index]; \ 4913 } while (0) 4914 4915 BUILD_BUG_ON(NUM_CSTATE_COUNTERS != 11); 4916 4917 PERF_COUNTER_WRITE_DATA(t->c1, CCSTATE_RCI_INDEX_C1_RESIDENCY); 4918 PERF_COUNTER_WRITE_DATA(c->c3, CCSTATE_RCI_INDEX_C3_RESIDENCY); 4919 PERF_COUNTER_WRITE_DATA(c->c6, CCSTATE_RCI_INDEX_C6_RESIDENCY); 4920 PERF_COUNTER_WRITE_DATA(c->c7, CCSTATE_RCI_INDEX_C7_RESIDENCY); 4921 4922 PERF_COUNTER_WRITE_DATA(p->pc2, PCSTATE_RCI_INDEX_C2_RESIDENCY); 4923 PERF_COUNTER_WRITE_DATA(p->pc3, PCSTATE_RCI_INDEX_C3_RESIDENCY); 4924 PERF_COUNTER_WRITE_DATA(p->pc6, PCSTATE_RCI_INDEX_C6_RESIDENCY); 4925 PERF_COUNTER_WRITE_DATA(p->pc7, PCSTATE_RCI_INDEX_C7_RESIDENCY); 4926 PERF_COUNTER_WRITE_DATA(p->pc8, PCSTATE_RCI_INDEX_C8_RESIDENCY); 4927 PERF_COUNTER_WRITE_DATA(p->pc9, PCSTATE_RCI_INDEX_C9_RESIDENCY); 4928 PERF_COUNTER_WRITE_DATA(p->pc10, PCSTATE_RCI_INDEX_C10_RESIDENCY); 4929 4930 #undef PERF_COUNTER_WRITE_DATA 4931 4932 return 0; 4933 } 4934 4935 size_t msr_counter_info_count_perf(const struct msr_counter_info_t *mci) 4936 { 4937 size_t ret = 0; 4938 4939 for (int i = 0; i < NUM_MSR_COUNTERS; ++i) 4940 if (mci->source[i] == COUNTER_SOURCE_PERF) 4941 ++ret; 4942 4943 return ret; 4944 } 4945 4946 int get_smi_aperf_mperf(unsigned int cpu, struct thread_data *t) 4947 { 4948 unsigned long long perf_data[NUM_MSR_COUNTERS + 1]; 4949 4950 struct msr_counter_info_t *mci; 4951 4952 if (debug >= 2) 4953 fprintf(stderr, "%s: cpu%d\n", __func__, cpu); 4954 4955 assert(msr_counter_info); 4956 assert(cpu <= msr_counter_info_size); 4957 4958 mci = &msr_counter_info[cpu]; 4959 4960 ZERO_ARRAY(perf_data); 4961 ZERO_ARRAY(mci->data); 4962 4963 if (mci->fd_perf != -1) { 4964 const size_t num_perf_counters = msr_counter_info_count_perf(mci); 4965 const ssize_t expected_read_size = (num_perf_counters + 1) * sizeof(unsigned long long); 4966 const ssize_t actual_read_size = read(mci->fd_perf, &perf_data[0], sizeof(perf_data)); 4967 4968 if (actual_read_size != expected_read_size) 4969 err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, 4970 actual_read_size); 4971 } 4972 4973 for (unsigned int i = 0, pi = 1; i < NUM_MSR_COUNTERS; ++i) { 4974 switch (mci->source[i]) { 4975 case COUNTER_SOURCE_NONE: 4976 break; 4977 4978 case COUNTER_SOURCE_PERF: 4979 assert(pi < ARRAY_SIZE(perf_data)); 4980 assert(mci->fd_perf != -1); 4981 4982 if (debug >= 2) 4983 fprintf(stderr, "Reading msr counter via perf at %u: %llu\n", i, perf_data[pi]); 4984 4985 mci->data[i] = perf_data[pi]; 4986 4987 ++pi; 4988 break; 4989 4990 case COUNTER_SOURCE_MSR: 4991 assert(!no_msr); 4992 4993 if (get_msr(cpu, mci->msr[i], &mci->data[i])) 4994 return -2 - i; 4995 4996 mci->data[i] &= mci->msr_mask[i]; 4997 4998 if (debug >= 2) 4999 fprintf(stderr, "Reading msr counter via msr at %u: %llu\n", i, mci->data[i]); 5000 5001 break; 5002 } 5003 } 5004 5005 BUILD_BUG_ON(NUM_MSR_COUNTERS != 3); 5006 t->aperf = mci->data[MSR_RCI_INDEX_APERF]; 5007 t->mperf = mci->data[MSR_RCI_INDEX_MPERF]; 5008 t->smi_count = mci->data[MSR_RCI_INDEX_SMI]; 5009 5010 return 0; 5011 } 5012 5013 int perf_counter_info_read_values(struct perf_counter_info *pp, int cpu, unsigned long long *out, size_t out_size) 5014 { 5015 unsigned int domain; 5016 unsigned long long value; 5017 int fd_counter; 5018 5019 for (size_t i = 0; pp; ++i, pp = pp->next) { 5020 domain = cpu_to_domain(pp, cpu); 5021 assert(domain < pp->num_domains); 5022 5023 fd_counter = pp->fd_perf_per_domain[domain]; 5024 5025 if (fd_counter == -1) 5026 continue; 5027 5028 if (read(fd_counter, &value, sizeof(value)) != sizeof(value)) 5029 return 1; 5030 5031 assert(i < out_size); 5032 out[i] = value * pp->scale; 5033 } 5034 5035 return 0; 5036 } 5037 5038 unsigned long pmt_gen_value_mask(unsigned int lsb, unsigned int msb) 5039 { 5040 unsigned long mask; 5041 5042 if (msb == 63) 5043 mask = 0xffffffffffffffff; 5044 else 5045 mask = ((1 << (msb + 1)) - 1); 5046 5047 mask -= (1 << lsb) - 1; 5048 5049 return mask; 5050 } 5051 5052 unsigned long pmt_read_counter(struct pmt_counter *ppmt, unsigned int domain_id) 5053 { 5054 if (domain_id >= ppmt->num_domains) 5055 return 0; 5056 5057 const unsigned long *pmmio = ppmt->domains[domain_id].pcounter; 5058 const unsigned long value = pmmio ? *pmmio : 0; 5059 const unsigned long value_mask = pmt_gen_value_mask(ppmt->lsb, ppmt->msb); 5060 const unsigned long value_shift = ppmt->lsb; 5061 5062 return (value & value_mask) >> value_shift; 5063 } 5064 5065 /* Rapl domain enumeration helpers */ 5066 static inline int get_rapl_num_domains(void) 5067 { 5068 int num_packages = topo.max_package_id + 1; 5069 int num_cores_per_package; 5070 int num_cores; 5071 5072 if (!platform->has_per_core_rapl) 5073 return num_packages; 5074 5075 num_cores_per_package = topo.max_core_id + 1; 5076 num_cores = num_cores_per_package * num_packages; 5077 5078 return num_cores; 5079 } 5080 5081 static inline int get_rapl_domain_id(int cpu) 5082 { 5083 int nr_cores_per_package = topo.max_core_id + 1; 5084 int rapl_core_id; 5085 5086 if (!platform->has_per_core_rapl) 5087 return cpus[cpu].physical_package_id; 5088 5089 /* Compute the system-wide unique core-id for @cpu */ 5090 rapl_core_id = cpus[cpu].physical_core_id; 5091 rapl_core_id += cpus[cpu].physical_package_id * nr_cores_per_package; 5092 5093 return rapl_core_id; 5094 } 5095 5096 /* 5097 * get_counters(...) 5098 * migrate to cpu 5099 * acquire and record local counters for that cpu 5100 */ 5101 int get_counters(PER_THREAD_PARAMS) 5102 { 5103 int cpu = t->cpu_id; 5104 unsigned long long msr; 5105 struct msr_counter *mp; 5106 struct pmt_counter *pp; 5107 int i; 5108 int status; 5109 5110 if (cpu_migrate(cpu)) { 5111 fprintf(outf, "%s: Could not migrate to CPU %d\n", __func__, cpu); 5112 return -1; 5113 } 5114 5115 gettimeofday(&t->tv_begin, (struct timezone *)NULL); 5116 5117 if (first_counter_read) 5118 get_apic_id(t); 5119 5120 t->tsc = rdtsc(); /* we are running on local CPU of interest */ 5121 5122 get_smi_aperf_mperf(cpu, t); 5123 5124 if (DO_BIC(BIC_IPC)) 5125 if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long)) 5126 return -4; 5127 5128 if (DO_BIC(BIC_IRQ)) 5129 t->irq_count = irqs_per_cpu[cpu]; 5130 if (DO_BIC(BIC_NMI)) 5131 t->nmi_count = nmi_per_cpu[cpu]; 5132 5133 get_cstate_counters(cpu, t, c, p); 5134 5135 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 5136 if (get_mp(cpu, mp, &t->counter[i], mp->sp->path)) 5137 return -10; 5138 } 5139 5140 if (perf_counter_info_read_values(sys.perf_tp, cpu, t->perf_counter, MAX_ADDED_THREAD_COUNTERS)) 5141 return -10; 5142 5143 for (i = 0, pp = sys.pmt_tp; pp; i++, pp = pp->next) 5144 t->pmt_counter[i] = pmt_read_counter(pp, t->cpu_id); 5145 5146 /* collect core counters only for 1st thread in core */ 5147 if (!is_cpu_first_thread_in_core(t, c, p)) 5148 goto done; 5149 5150 if (platform->has_per_core_rapl) { 5151 status = get_rapl_counters(cpu, get_rapl_domain_id(cpu), c, p); 5152 if (status != 0) 5153 return status; 5154 } 5155 5156 if (DO_BIC(BIC_CPU_c7) && t->is_atom) { 5157 /* 5158 * For Atom CPUs that has core cstate deeper than c6, 5159 * MSR_CORE_C6_RESIDENCY returns residency of cc6 and deeper. 5160 * Minus CC7 (and deeper cstates) residency to get 5161 * accturate cc6 residency. 5162 */ 5163 c->c6 -= c->c7; 5164 } 5165 5166 if (DO_BIC(BIC_Mod_c6)) 5167 if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us)) 5168 return -8; 5169 5170 if (DO_BIC(BIC_CoreTmp)) { 5171 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) 5172 return -9; 5173 c->core_temp_c = tj_max - ((msr >> 16) & 0x7F); 5174 } 5175 5176 if (DO_BIC(BIC_CORE_THROT_CNT)) 5177 get_core_throt_cnt(cpu, &c->core_throt_cnt); 5178 5179 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 5180 if (get_mp(cpu, mp, &c->counter[i], mp->sp->path)) 5181 return -10; 5182 } 5183 5184 if (perf_counter_info_read_values(sys.perf_cp, cpu, c->perf_counter, MAX_ADDED_CORE_COUNTERS)) 5185 return -10; 5186 5187 for (i = 0, pp = sys.pmt_cp; pp; i++, pp = pp->next) 5188 c->pmt_counter[i] = pmt_read_counter(pp, c->core_id); 5189 5190 /* collect package counters only for 1st core in package */ 5191 if (!is_cpu_first_core_in_package(t, c, p)) 5192 goto done; 5193 5194 if (DO_BIC(BIC_Totl_c0)) { 5195 if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0)) 5196 return -10; 5197 } 5198 if (DO_BIC(BIC_Any_c0)) { 5199 if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0)) 5200 return -11; 5201 } 5202 if (DO_BIC(BIC_GFX_c0)) { 5203 if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0)) 5204 return -12; 5205 } 5206 if (DO_BIC(BIC_CPUGFX)) { 5207 if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0)) 5208 return -13; 5209 } 5210 5211 if (DO_BIC(BIC_CPU_LPI)) 5212 p->cpu_lpi = cpuidle_cur_cpu_lpi_us; 5213 if (DO_BIC(BIC_SYS_LPI)) 5214 p->sys_lpi = cpuidle_cur_sys_lpi_us; 5215 5216 if (!platform->has_per_core_rapl) { 5217 status = get_rapl_counters(cpu, get_rapl_domain_id(cpu), c, p); 5218 if (status != 0) 5219 return status; 5220 } 5221 5222 if (DO_BIC(BIC_PkgTmp)) { 5223 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) 5224 return -17; 5225 p->pkg_temp_c = tj_max - ((msr >> 16) & 0x7F); 5226 } 5227 5228 if (DO_BIC(BIC_UNCORE_MHZ)) 5229 p->uncore_mhz = get_legacy_uncore_mhz(p->package_id); 5230 5231 if (DO_BIC(BIC_GFX_rc6)) 5232 p->gfx_rc6_ms = gfx_info[GFX_rc6].val_ull; 5233 5234 if (DO_BIC(BIC_GFXMHz)) 5235 p->gfx_mhz = gfx_info[GFX_MHz].val; 5236 5237 if (DO_BIC(BIC_GFXACTMHz)) 5238 p->gfx_act_mhz = gfx_info[GFX_ACTMHz].val; 5239 5240 if (DO_BIC(BIC_SAM_mc6)) 5241 p->sam_mc6_ms = gfx_info[SAM_mc6].val_ull; 5242 5243 if (DO_BIC(BIC_SAMMHz)) 5244 p->sam_mhz = gfx_info[SAM_MHz].val; 5245 5246 if (DO_BIC(BIC_SAMACTMHz)) 5247 p->sam_act_mhz = gfx_info[SAM_ACTMHz].val; 5248 5249 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 5250 char *path = NULL; 5251 5252 if (mp->msr_num == 0) { 5253 path = find_sysfs_path_by_id(mp->sp, p->package_id); 5254 if (path == NULL) { 5255 warnx("%s: package_id %d not found", __func__, p->package_id); 5256 return -10; 5257 } 5258 } 5259 if (get_mp(cpu, mp, &p->counter[i], path)) 5260 return -10; 5261 } 5262 5263 if (perf_counter_info_read_values(sys.perf_pp, cpu, p->perf_counter, MAX_ADDED_PACKAGE_COUNTERS)) 5264 return -10; 5265 5266 for (i = 0, pp = sys.pmt_pp; pp; i++, pp = pp->next) 5267 p->pmt_counter[i] = pmt_read_counter(pp, p->package_id); 5268 5269 done: 5270 gettimeofday(&t->tv_end, (struct timezone *)NULL); 5271 5272 return 0; 5273 } 5274 5275 int pkg_cstate_limit = PCLUKN; 5276 char *pkg_cstate_limit_strings[] = { "unknown", "reserved", "pc0", "pc1", "pc2", 5277 "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited" 5278 }; 5279 5280 int nhm_pkg_cstate_limits[16] = 5281 { PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 5282 PCLRSV, PCLRSV 5283 }; 5284 5285 int snb_pkg_cstate_limits[16] = 5286 { PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 5287 PCLRSV, PCLRSV 5288 }; 5289 5290 int hsw_pkg_cstate_limits[16] = 5291 { PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 5292 PCLRSV, PCLRSV 5293 }; 5294 5295 int slv_pkg_cstate_limits[16] = 5296 { PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 5297 PCL__6, PCL__7 5298 }; 5299 5300 int amt_pkg_cstate_limits[16] = 5301 { PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 5302 PCLRSV, PCLRSV 5303 }; 5304 5305 int phi_pkg_cstate_limits[16] = 5306 { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 5307 PCLRSV, PCLRSV 5308 }; 5309 5310 int glm_pkg_cstate_limits[16] = 5311 { PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 5312 PCLRSV, PCLRSV 5313 }; 5314 5315 int skx_pkg_cstate_limits[16] = 5316 { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 5317 PCLRSV, PCLRSV 5318 }; 5319 5320 int icx_pkg_cstate_limits[16] = 5321 { PCL__0, PCL__2, PCL__6, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 5322 PCLRSV, PCLRSV 5323 }; 5324 5325 void probe_cst_limit(void) 5326 { 5327 unsigned long long msr; 5328 int *pkg_cstate_limits; 5329 5330 if (!platform->has_nhm_msrs || no_msr) 5331 return; 5332 5333 switch (platform->cst_limit) { 5334 case CST_LIMIT_NHM: 5335 pkg_cstate_limits = nhm_pkg_cstate_limits; 5336 break; 5337 case CST_LIMIT_SNB: 5338 pkg_cstate_limits = snb_pkg_cstate_limits; 5339 break; 5340 case CST_LIMIT_HSW: 5341 pkg_cstate_limits = hsw_pkg_cstate_limits; 5342 break; 5343 case CST_LIMIT_SKX: 5344 pkg_cstate_limits = skx_pkg_cstate_limits; 5345 break; 5346 case CST_LIMIT_ICX: 5347 pkg_cstate_limits = icx_pkg_cstate_limits; 5348 break; 5349 case CST_LIMIT_SLV: 5350 pkg_cstate_limits = slv_pkg_cstate_limits; 5351 break; 5352 case CST_LIMIT_AMT: 5353 pkg_cstate_limits = amt_pkg_cstate_limits; 5354 break; 5355 case CST_LIMIT_KNL: 5356 pkg_cstate_limits = phi_pkg_cstate_limits; 5357 break; 5358 case CST_LIMIT_GMT: 5359 pkg_cstate_limits = glm_pkg_cstate_limits; 5360 break; 5361 default: 5362 return; 5363 } 5364 5365 get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr); 5366 pkg_cstate_limit = pkg_cstate_limits[msr & 0xF]; 5367 } 5368 5369 static void dump_platform_info(void) 5370 { 5371 unsigned long long msr; 5372 unsigned int ratio; 5373 5374 if (!platform->has_nhm_msrs || no_msr) 5375 return; 5376 5377 get_msr(base_cpu, MSR_PLATFORM_INFO, &msr); 5378 5379 fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr); 5380 5381 ratio = (msr >> 40) & 0xFF; 5382 fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n", ratio, bclk, ratio * bclk); 5383 5384 ratio = (msr >> 8) & 0xFF; 5385 fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk); 5386 } 5387 5388 static void dump_power_ctl(void) 5389 { 5390 unsigned long long msr; 5391 5392 if (!platform->has_nhm_msrs || no_msr) 5393 return; 5394 5395 get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr); 5396 fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", 5397 base_cpu, msr, msr & 0x2 ? "EN" : "DIS"); 5398 5399 /* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */ 5400 if (platform->has_cst_prewake_bit) 5401 fprintf(outf, "C-state Pre-wake: %sabled\n", msr & 0x40000000 ? "DIS" : "EN"); 5402 5403 return; 5404 } 5405 5406 static void dump_turbo_ratio_limit2(void) 5407 { 5408 unsigned long long msr; 5409 unsigned int ratio; 5410 5411 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr); 5412 5413 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr); 5414 5415 ratio = (msr >> 8) & 0xFF; 5416 if (ratio) 5417 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n", ratio, bclk, ratio * bclk); 5418 5419 ratio = (msr >> 0) & 0xFF; 5420 if (ratio) 5421 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n", ratio, bclk, ratio * bclk); 5422 return; 5423 } 5424 5425 static void dump_turbo_ratio_limit1(void) 5426 { 5427 unsigned long long msr; 5428 unsigned int ratio; 5429 5430 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr); 5431 5432 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr); 5433 5434 ratio = (msr >> 56) & 0xFF; 5435 if (ratio) 5436 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n", ratio, bclk, ratio * bclk); 5437 5438 ratio = (msr >> 48) & 0xFF; 5439 if (ratio) 5440 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n", ratio, bclk, ratio * bclk); 5441 5442 ratio = (msr >> 40) & 0xFF; 5443 if (ratio) 5444 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n", ratio, bclk, ratio * bclk); 5445 5446 ratio = (msr >> 32) & 0xFF; 5447 if (ratio) 5448 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n", ratio, bclk, ratio * bclk); 5449 5450 ratio = (msr >> 24) & 0xFF; 5451 if (ratio) 5452 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n", ratio, bclk, ratio * bclk); 5453 5454 ratio = (msr >> 16) & 0xFF; 5455 if (ratio) 5456 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n", ratio, bclk, ratio * bclk); 5457 5458 ratio = (msr >> 8) & 0xFF; 5459 if (ratio) 5460 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n", ratio, bclk, ratio * bclk); 5461 5462 ratio = (msr >> 0) & 0xFF; 5463 if (ratio) 5464 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n", ratio, bclk, ratio * bclk); 5465 return; 5466 } 5467 5468 static void dump_turbo_ratio_limits(int trl_msr_offset) 5469 { 5470 unsigned long long msr, core_counts; 5471 int shift; 5472 5473 get_msr(base_cpu, trl_msr_offset, &msr); 5474 fprintf(outf, "cpu%d: MSR_%sTURBO_RATIO_LIMIT: 0x%08llx\n", 5475 base_cpu, trl_msr_offset == MSR_SECONDARY_TURBO_RATIO_LIMIT ? "SECONDARY_" : "", msr); 5476 5477 if (platform->trl_msrs & TRL_CORECOUNT) { 5478 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts); 5479 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts); 5480 } else { 5481 core_counts = 0x0807060504030201; 5482 } 5483 5484 for (shift = 56; shift >= 0; shift -= 8) { 5485 unsigned int ratio, group_size; 5486 5487 ratio = (msr >> shift) & 0xFF; 5488 group_size = (core_counts >> shift) & 0xFF; 5489 if (ratio) 5490 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n", 5491 ratio, bclk, ratio * bclk, group_size); 5492 } 5493 5494 return; 5495 } 5496 5497 static void dump_atom_turbo_ratio_limits(void) 5498 { 5499 unsigned long long msr; 5500 unsigned int ratio; 5501 5502 get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr); 5503 fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF); 5504 5505 ratio = (msr >> 0) & 0x3F; 5506 if (ratio) 5507 fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n", ratio, bclk, ratio * bclk); 5508 5509 ratio = (msr >> 8) & 0x3F; 5510 if (ratio) 5511 fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n", ratio, bclk, ratio * bclk); 5512 5513 ratio = (msr >> 16) & 0x3F; 5514 if (ratio) 5515 fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk); 5516 5517 get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr); 5518 fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF); 5519 5520 ratio = (msr >> 24) & 0x3F; 5521 if (ratio) 5522 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n", ratio, bclk, ratio * bclk); 5523 5524 ratio = (msr >> 16) & 0x3F; 5525 if (ratio) 5526 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n", ratio, bclk, ratio * bclk); 5527 5528 ratio = (msr >> 8) & 0x3F; 5529 if (ratio) 5530 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n", ratio, bclk, ratio * bclk); 5531 5532 ratio = (msr >> 0) & 0x3F; 5533 if (ratio) 5534 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n", ratio, bclk, ratio * bclk); 5535 } 5536 5537 static void dump_knl_turbo_ratio_limits(void) 5538 { 5539 const unsigned int buckets_no = 7; 5540 5541 unsigned long long msr; 5542 int delta_cores, delta_ratio; 5543 int i, b_nr; 5544 unsigned int cores[buckets_no]; 5545 unsigned int ratio[buckets_no]; 5546 5547 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr); 5548 5549 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr); 5550 5551 /* 5552 * Turbo encoding in KNL is as follows: 5553 * [0] -- Reserved 5554 * [7:1] -- Base value of number of active cores of bucket 1. 5555 * [15:8] -- Base value of freq ratio of bucket 1. 5556 * [20:16] -- +ve delta of number of active cores of bucket 2. 5557 * i.e. active cores of bucket 2 = 5558 * active cores of bucket 1 + delta 5559 * [23:21] -- Negative delta of freq ratio of bucket 2. 5560 * i.e. freq ratio of bucket 2 = 5561 * freq ratio of bucket 1 - delta 5562 * [28:24]-- +ve delta of number of active cores of bucket 3. 5563 * [31:29]-- -ve delta of freq ratio of bucket 3. 5564 * [36:32]-- +ve delta of number of active cores of bucket 4. 5565 * [39:37]-- -ve delta of freq ratio of bucket 4. 5566 * [44:40]-- +ve delta of number of active cores of bucket 5. 5567 * [47:45]-- -ve delta of freq ratio of bucket 5. 5568 * [52:48]-- +ve delta of number of active cores of bucket 6. 5569 * [55:53]-- -ve delta of freq ratio of bucket 6. 5570 * [60:56]-- +ve delta of number of active cores of bucket 7. 5571 * [63:61]-- -ve delta of freq ratio of bucket 7. 5572 */ 5573 5574 b_nr = 0; 5575 cores[b_nr] = (msr & 0xFF) >> 1; 5576 ratio[b_nr] = (msr >> 8) & 0xFF; 5577 5578 for (i = 16; i < 64; i += 8) { 5579 delta_cores = (msr >> i) & 0x1F; 5580 delta_ratio = (msr >> (i + 5)) & 0x7; 5581 5582 cores[b_nr + 1] = cores[b_nr] + delta_cores; 5583 ratio[b_nr + 1] = ratio[b_nr] - delta_ratio; 5584 b_nr++; 5585 } 5586 5587 for (i = buckets_no - 1; i >= 0; i--) 5588 if (i > 0 ? ratio[i] != ratio[i - 1] : 1) 5589 fprintf(outf, 5590 "%d * %.1f = %.1f MHz max turbo %d active cores\n", 5591 ratio[i], bclk, ratio[i] * bclk, cores[i]); 5592 } 5593 5594 static void dump_cst_cfg(void) 5595 { 5596 unsigned long long msr; 5597 5598 if (!platform->has_nhm_msrs || no_msr) 5599 return; 5600 5601 get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr); 5602 5603 fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr); 5604 5605 fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)", 5606 (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", 5607 (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", 5608 (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", 5609 (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", 5610 (msr & (1 << 15)) ? "" : "UN", (unsigned int)msr & 0xF, pkg_cstate_limit_strings[pkg_cstate_limit]); 5611 5612 #define AUTOMATIC_CSTATE_CONVERSION (1UL << 16) 5613 if (platform->has_cst_auto_convension) { 5614 fprintf(outf, ", automatic c-state conversion=%s", (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off"); 5615 } 5616 5617 fprintf(outf, ")\n"); 5618 5619 return; 5620 } 5621 5622 static void dump_config_tdp(void) 5623 { 5624 unsigned long long msr; 5625 5626 get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr); 5627 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr); 5628 fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF); 5629 5630 get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr); 5631 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr); 5632 if (msr) { 5633 fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF); 5634 fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF); 5635 fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF); 5636 fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF); 5637 } 5638 fprintf(outf, ")\n"); 5639 5640 get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr); 5641 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr); 5642 if (msr) { 5643 fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF); 5644 fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF); 5645 fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF); 5646 fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF); 5647 } 5648 fprintf(outf, ")\n"); 5649 5650 get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr); 5651 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr); 5652 if ((msr) & 0x3) 5653 fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3); 5654 fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1); 5655 fprintf(outf, ")\n"); 5656 5657 get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr); 5658 fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr); 5659 fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF); 5660 fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1); 5661 fprintf(outf, ")\n"); 5662 } 5663 5664 unsigned int irtl_time_units[] = { 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 }; 5665 5666 void print_irtl(void) 5667 { 5668 unsigned long long msr; 5669 5670 if (!platform->has_irtl_msrs || no_msr) 5671 return; 5672 5673 if (platform->supported_cstates & PC3) { 5674 get_msr(base_cpu, MSR_PKGC3_IRTL, &msr); 5675 fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr); 5676 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 5677 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 5678 } 5679 5680 if (platform->supported_cstates & PC6) { 5681 get_msr(base_cpu, MSR_PKGC6_IRTL, &msr); 5682 fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr); 5683 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 5684 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 5685 } 5686 5687 if (platform->supported_cstates & PC7) { 5688 get_msr(base_cpu, MSR_PKGC7_IRTL, &msr); 5689 fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr); 5690 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 5691 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 5692 } 5693 5694 if (platform->supported_cstates & PC8) { 5695 get_msr(base_cpu, MSR_PKGC8_IRTL, &msr); 5696 fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr); 5697 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 5698 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 5699 } 5700 5701 if (platform->supported_cstates & PC9) { 5702 get_msr(base_cpu, MSR_PKGC9_IRTL, &msr); 5703 fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr); 5704 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 5705 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 5706 } 5707 5708 if (platform->supported_cstates & PC10) { 5709 get_msr(base_cpu, MSR_PKGC10_IRTL, &msr); 5710 fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr); 5711 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 5712 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 5713 } 5714 } 5715 5716 void free_fd_percpu(void) 5717 { 5718 int i; 5719 5720 if (!fd_percpu) 5721 return; 5722 5723 for (i = 0; i < topo.max_cpu_num + 1; ++i) { 5724 if (fd_percpu[i] != 0) 5725 close(fd_percpu[i]); 5726 } 5727 5728 free(fd_percpu); 5729 fd_percpu = NULL; 5730 } 5731 5732 void free_fd_instr_count_percpu(void) 5733 { 5734 if (!fd_instr_count_percpu) 5735 return; 5736 5737 for (int i = 0; i < topo.max_cpu_num + 1; ++i) { 5738 if (fd_instr_count_percpu[i] != 0) 5739 close(fd_instr_count_percpu[i]); 5740 } 5741 5742 free(fd_instr_count_percpu); 5743 fd_instr_count_percpu = NULL; 5744 } 5745 5746 void free_fd_cstate(void) 5747 { 5748 if (!ccstate_counter_info) 5749 return; 5750 5751 const int counter_info_num = ccstate_counter_info_size; 5752 5753 for (int counter_id = 0; counter_id < counter_info_num; ++counter_id) { 5754 if (ccstate_counter_info[counter_id].fd_perf_core != -1) 5755 close(ccstate_counter_info[counter_id].fd_perf_core); 5756 5757 if (ccstate_counter_info[counter_id].fd_perf_pkg != -1) 5758 close(ccstate_counter_info[counter_id].fd_perf_pkg); 5759 } 5760 5761 free(ccstate_counter_info); 5762 ccstate_counter_info = NULL; 5763 ccstate_counter_info_size = 0; 5764 } 5765 5766 void free_fd_msr(void) 5767 { 5768 if (!msr_counter_info) 5769 return; 5770 5771 for (int cpu = 0; cpu < topo.max_cpu_num; ++cpu) { 5772 if (msr_counter_info[cpu].fd_perf != -1) 5773 close(msr_counter_info[cpu].fd_perf); 5774 } 5775 5776 free(msr_counter_info); 5777 msr_counter_info = NULL; 5778 msr_counter_info_size = 0; 5779 } 5780 5781 void free_fd_rapl_percpu(void) 5782 { 5783 if (!rapl_counter_info_perdomain) 5784 return; 5785 5786 const int num_domains = rapl_counter_info_perdomain_size; 5787 5788 for (int domain_id = 0; domain_id < num_domains; ++domain_id) { 5789 if (rapl_counter_info_perdomain[domain_id].fd_perf != -1) 5790 close(rapl_counter_info_perdomain[domain_id].fd_perf); 5791 } 5792 5793 free(rapl_counter_info_perdomain); 5794 rapl_counter_info_perdomain = NULL; 5795 rapl_counter_info_perdomain_size = 0; 5796 } 5797 5798 void free_fd_added_perf_counters_(struct perf_counter_info *pp) 5799 { 5800 if (!pp) 5801 return; 5802 5803 if (!pp->fd_perf_per_domain) 5804 return; 5805 5806 while (pp) { 5807 for (size_t domain = 0; domain < pp->num_domains; ++domain) { 5808 if (pp->fd_perf_per_domain[domain] != -1) { 5809 close(pp->fd_perf_per_domain[domain]); 5810 pp->fd_perf_per_domain[domain] = -1; 5811 } 5812 } 5813 5814 free(pp->fd_perf_per_domain); 5815 pp->fd_perf_per_domain = NULL; 5816 5817 pp = pp->next; 5818 } 5819 } 5820 5821 void free_fd_added_perf_counters(void) 5822 { 5823 free_fd_added_perf_counters_(sys.perf_tp); 5824 free_fd_added_perf_counters_(sys.perf_cp); 5825 free_fd_added_perf_counters_(sys.perf_pp); 5826 } 5827 5828 void free_all_buffers(void) 5829 { 5830 int i; 5831 5832 CPU_FREE(cpu_present_set); 5833 cpu_present_set = NULL; 5834 cpu_present_setsize = 0; 5835 5836 CPU_FREE(cpu_effective_set); 5837 cpu_effective_set = NULL; 5838 cpu_effective_setsize = 0; 5839 5840 CPU_FREE(cpu_allowed_set); 5841 cpu_allowed_set = NULL; 5842 cpu_allowed_setsize = 0; 5843 5844 CPU_FREE(cpu_affinity_set); 5845 cpu_affinity_set = NULL; 5846 cpu_affinity_setsize = 0; 5847 5848 free(thread_even); 5849 free(core_even); 5850 free(package_even); 5851 5852 thread_even = NULL; 5853 core_even = NULL; 5854 package_even = NULL; 5855 5856 free(thread_odd); 5857 free(core_odd); 5858 free(package_odd); 5859 5860 thread_odd = NULL; 5861 core_odd = NULL; 5862 package_odd = NULL; 5863 5864 free(output_buffer); 5865 output_buffer = NULL; 5866 outp = NULL; 5867 5868 free_fd_percpu(); 5869 free_fd_instr_count_percpu(); 5870 free_fd_msr(); 5871 free_fd_rapl_percpu(); 5872 free_fd_cstate(); 5873 free_fd_added_perf_counters(); 5874 5875 free(irq_column_2_cpu); 5876 free(irqs_per_cpu); 5877 free(nmi_per_cpu); 5878 5879 for (i = 0; i <= topo.max_cpu_num; ++i) { 5880 if (cpus[i].put_ids) 5881 CPU_FREE(cpus[i].put_ids); 5882 } 5883 free(cpus); 5884 } 5885 5886 /* 5887 * Parse a file containing a single int. 5888 * Return 0 if file can not be opened 5889 * Exit if file can be opened, but can not be parsed 5890 */ 5891 int parse_int_file(const char *fmt, ...) 5892 { 5893 va_list args; 5894 char path[PATH_MAX]; 5895 FILE *filep; 5896 int value; 5897 5898 va_start(args, fmt); 5899 vsnprintf(path, sizeof(path), fmt, args); 5900 va_end(args); 5901 filep = fopen(path, "r"); 5902 if (!filep) 5903 return 0; 5904 if (fscanf(filep, "%d", &value) != 1) 5905 err(1, "%s: failed to parse number from file", path); 5906 fclose(filep); 5907 return value; 5908 } 5909 5910 /* 5911 * cpu_is_first_core_in_package(cpu) 5912 * return 1 if given CPU is 1st core in package 5913 */ 5914 int cpu_is_first_core_in_package(int cpu) 5915 { 5916 return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu); 5917 } 5918 5919 int get_physical_package_id(int cpu) 5920 { 5921 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu); 5922 } 5923 5924 int get_die_id(int cpu) 5925 { 5926 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/die_id", cpu); 5927 } 5928 5929 int get_l3_id(int cpu) 5930 { 5931 return parse_int_file("/sys/devices/system/cpu/cpu%d/cache/index3/id", cpu); 5932 } 5933 5934 int get_core_id(int cpu) 5935 { 5936 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu); 5937 } 5938 5939 void set_node_data(void) 5940 { 5941 int pkg, node, lnode, cpu, cpux; 5942 int cpu_count; 5943 5944 /* initialize logical_node_id */ 5945 for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) 5946 cpus[cpu].logical_node_id = -1; 5947 5948 cpu_count = 0; 5949 for (pkg = 0; pkg < topo.num_packages; pkg++) { 5950 lnode = 0; 5951 for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) { 5952 if (cpus[cpu].physical_package_id != pkg) 5953 continue; 5954 /* find a cpu with an unset logical_node_id */ 5955 if (cpus[cpu].logical_node_id != -1) 5956 continue; 5957 cpus[cpu].logical_node_id = lnode; 5958 node = cpus[cpu].physical_node_id; 5959 cpu_count++; 5960 /* 5961 * find all matching cpus on this pkg and set 5962 * the logical_node_id 5963 */ 5964 for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) { 5965 if ((cpus[cpux].physical_package_id == pkg) && (cpus[cpux].physical_node_id == node)) { 5966 cpus[cpux].logical_node_id = lnode; 5967 cpu_count++; 5968 } 5969 } 5970 lnode++; 5971 if (lnode > topo.nodes_per_pkg) 5972 topo.nodes_per_pkg = lnode; 5973 } 5974 if (cpu_count >= topo.max_cpu_num) 5975 break; 5976 } 5977 } 5978 5979 int get_physical_node_id(struct cpu_topology *thiscpu) 5980 { 5981 char path[80]; 5982 FILE *filep; 5983 int i; 5984 int cpu = thiscpu->logical_cpu_id; 5985 5986 for (i = 0; i <= topo.max_cpu_num; i++) { 5987 sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist", cpu, i); 5988 filep = fopen(path, "r"); 5989 if (!filep) 5990 continue; 5991 fclose(filep); 5992 return i; 5993 } 5994 return -1; 5995 } 5996 5997 static int parse_cpu_str(char *cpu_str, cpu_set_t *cpu_set, int cpu_set_size) 5998 { 5999 unsigned int start, end; 6000 char *next = cpu_str; 6001 6002 while (next && *next) { 6003 6004 if (*next == '-') /* no negative cpu numbers */ 6005 return 1; 6006 6007 if (*next == '\0' || *next == '\n') 6008 break; 6009 6010 start = strtoul(next, &next, 10); 6011 6012 if (start >= CPU_SUBSET_MAXCPUS) 6013 return 1; 6014 CPU_SET_S(start, cpu_set_size, cpu_set); 6015 6016 if (*next == '\0' || *next == '\n') 6017 break; 6018 6019 if (*next == ',') { 6020 next += 1; 6021 continue; 6022 } 6023 6024 if (*next == '-') { 6025 next += 1; /* start range */ 6026 } else if (*next == '.') { 6027 next += 1; 6028 if (*next == '.') 6029 next += 1; /* start range */ 6030 else 6031 return 1; 6032 } 6033 6034 end = strtoul(next, &next, 10); 6035 if (end <= start) 6036 return 1; 6037 6038 while (++start <= end) { 6039 if (start >= CPU_SUBSET_MAXCPUS) 6040 return 1; 6041 CPU_SET_S(start, cpu_set_size, cpu_set); 6042 } 6043 6044 if (*next == ',') 6045 next += 1; 6046 else if (*next != '\0' && *next != '\n') 6047 return 1; 6048 } 6049 6050 return 0; 6051 } 6052 6053 int get_thread_siblings(struct cpu_topology *thiscpu) 6054 { 6055 char path[80], character; 6056 FILE *filep; 6057 unsigned long map; 6058 int so, shift, sib_core; 6059 int cpu = thiscpu->logical_cpu_id; 6060 int offset = topo.max_cpu_num + 1; 6061 size_t size; 6062 int thread_id = 0; 6063 6064 thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1)); 6065 if (thiscpu->thread_id < 0) 6066 thiscpu->thread_id = thread_id++; 6067 if (!thiscpu->put_ids) 6068 return -1; 6069 6070 size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 6071 CPU_ZERO_S(size, thiscpu->put_ids); 6072 6073 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu); 6074 filep = fopen(path, "r"); 6075 6076 if (!filep) { 6077 warnx("%s: open failed", path); 6078 return -1; 6079 } 6080 do { 6081 offset -= BITMASK_SIZE; 6082 if (fscanf(filep, "%lx%c", &map, &character) != 2) 6083 err(1, "%s: failed to parse file", path); 6084 for (shift = 0; shift < BITMASK_SIZE; shift++) { 6085 if ((map >> shift) & 0x1) { 6086 so = shift + offset; 6087 sib_core = get_core_id(so); 6088 if (sib_core == thiscpu->physical_core_id) { 6089 CPU_SET_S(so, size, thiscpu->put_ids); 6090 if ((so != cpu) && (cpus[so].thread_id < 0)) 6091 cpus[so].thread_id = thread_id++; 6092 } 6093 } 6094 } 6095 } while (character == ','); 6096 fclose(filep); 6097 6098 return CPU_COUNT_S(size, thiscpu->put_ids); 6099 } 6100 6101 /* 6102 * run func(thread, core, package) in topology order 6103 * skip non-present cpus 6104 */ 6105 6106 int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *, 6107 struct pkg_data *, struct thread_data *, struct core_data *, 6108 struct pkg_data *), struct thread_data *thread_base, 6109 struct core_data *core_base, struct pkg_data *pkg_base, 6110 struct thread_data *thread_base2, struct core_data *core_base2, struct pkg_data *pkg_base2) 6111 { 6112 int retval, pkg_no, node_no, core_no, thread_no; 6113 6114 retval = 0; 6115 6116 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { 6117 for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) { 6118 for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { 6119 for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) { 6120 struct thread_data *t, *t2; 6121 struct core_data *c, *c2; 6122 6123 t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no); 6124 6125 if (cpu_is_not_allowed(t->cpu_id)) 6126 continue; 6127 6128 t2 = GET_THREAD(thread_base2, thread_no, core_no, node_no, pkg_no); 6129 6130 c = GET_CORE(core_base, core_no, node_no, pkg_no); 6131 c2 = GET_CORE(core_base2, core_no, node_no, pkg_no); 6132 6133 retval |= func(t, c, &pkg_base[pkg_no], t2, c2, &pkg_base2[pkg_no]); 6134 } 6135 } 6136 } 6137 } 6138 return retval; 6139 } 6140 6141 /* 6142 * run func(cpu) on every cpu in /proc/stat 6143 * return max_cpu number 6144 */ 6145 int for_all_proc_cpus(int (func) (int)) 6146 { 6147 FILE *fp; 6148 int cpu_num; 6149 int retval; 6150 6151 fp = fopen_or_die(proc_stat, "r"); 6152 6153 retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n"); 6154 if (retval != 0) 6155 err(1, "%s: failed to parse format", proc_stat); 6156 6157 while (1) { 6158 retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num); 6159 if (retval != 1) 6160 break; 6161 6162 retval = func(cpu_num); 6163 if (retval) { 6164 fclose(fp); 6165 return (retval); 6166 } 6167 } 6168 fclose(fp); 6169 return 0; 6170 } 6171 6172 #define PATH_EFFECTIVE_CPUS "/sys/fs/cgroup/cpuset.cpus.effective" 6173 6174 static char cpu_effective_str[1024]; 6175 6176 static int update_effective_str(bool startup) 6177 { 6178 FILE *fp; 6179 char *pos; 6180 char buf[1024]; 6181 int ret; 6182 6183 if (cpu_effective_str[0] == '\0' && !startup) 6184 return 0; 6185 6186 fp = fopen(PATH_EFFECTIVE_CPUS, "r"); 6187 if (!fp) 6188 return 0; 6189 6190 pos = fgets(buf, 1024, fp); 6191 if (!pos) 6192 err(1, "%s: file read failed\n", PATH_EFFECTIVE_CPUS); 6193 6194 fclose(fp); 6195 6196 ret = strncmp(cpu_effective_str, buf, 1024); 6197 if (!ret) 6198 return 0; 6199 6200 strncpy(cpu_effective_str, buf, 1024); 6201 return 1; 6202 } 6203 6204 static void update_effective_set(bool startup) 6205 { 6206 update_effective_str(startup); 6207 6208 if (parse_cpu_str(cpu_effective_str, cpu_effective_set, cpu_effective_setsize)) 6209 err(1, "%s: cpu str malformat %s\n", PATH_EFFECTIVE_CPUS, cpu_effective_str); 6210 } 6211 6212 void linux_perf_init(void); 6213 void msr_perf_init(void); 6214 void rapl_perf_init(void); 6215 void cstate_perf_init(void); 6216 void added_perf_counters_init(void); 6217 void pmt_init(void); 6218 6219 void re_initialize(void) 6220 { 6221 free_all_buffers(); 6222 setup_all_buffers(false); 6223 linux_perf_init(); 6224 msr_perf_init(); 6225 rapl_perf_init(); 6226 cstate_perf_init(); 6227 added_perf_counters_init(); 6228 pmt_init(); 6229 fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, 6230 topo.allowed_cpus); 6231 } 6232 6233 void set_max_cpu_num(void) 6234 { 6235 FILE *filep; 6236 int base_cpu; 6237 unsigned long dummy; 6238 char pathname[64]; 6239 6240 base_cpu = sched_getcpu(); 6241 if (base_cpu < 0) 6242 err(1, "cannot find calling cpu ID"); 6243 sprintf(pathname, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", base_cpu); 6244 6245 filep = fopen_or_die(pathname, "r"); 6246 topo.max_cpu_num = 0; 6247 while (fscanf(filep, "%lx,", &dummy) == 1) 6248 topo.max_cpu_num += BITMASK_SIZE; 6249 fclose(filep); 6250 topo.max_cpu_num--; /* 0 based */ 6251 } 6252 6253 /* 6254 * count_cpus() 6255 * remember the last one seen, it will be the max 6256 */ 6257 int count_cpus(int cpu) 6258 { 6259 UNUSED(cpu); 6260 6261 topo.num_cpus++; 6262 return 0; 6263 } 6264 6265 int mark_cpu_present(int cpu) 6266 { 6267 CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set); 6268 return 0; 6269 } 6270 6271 int init_thread_id(int cpu) 6272 { 6273 cpus[cpu].thread_id = -1; 6274 return 0; 6275 } 6276 6277 int set_my_cpu_type(void) 6278 { 6279 unsigned int eax, ebx, ecx, edx; 6280 unsigned int max_level; 6281 6282 __cpuid(0, max_level, ebx, ecx, edx); 6283 6284 if (max_level < CPUID_LEAF_MODEL_ID) 6285 return 0; 6286 6287 __cpuid(CPUID_LEAF_MODEL_ID, eax, ebx, ecx, edx); 6288 6289 return (eax >> CPUID_LEAF_MODEL_ID_CORE_TYPE_SHIFT); 6290 } 6291 6292 int set_cpu_hybrid_type(int cpu) 6293 { 6294 if (cpu_migrate(cpu)) 6295 return -1; 6296 6297 int type = set_my_cpu_type(); 6298 6299 cpus[cpu].type = type; 6300 return 0; 6301 } 6302 6303 /* 6304 * snapshot_proc_interrupts() 6305 * 6306 * read and record summary of /proc/interrupts 6307 * 6308 * return 1 if config change requires a restart, else return 0 6309 */ 6310 int snapshot_proc_interrupts(void) 6311 { 6312 static FILE *fp; 6313 int column, retval; 6314 6315 if (fp == NULL) 6316 fp = fopen_or_die("/proc/interrupts", "r"); 6317 else 6318 rewind(fp); 6319 6320 /* read 1st line of /proc/interrupts to get cpu* name for each column */ 6321 for (column = 0; column < topo.num_cpus; ++column) { 6322 int cpu_number; 6323 6324 retval = fscanf(fp, " CPU%d", &cpu_number); 6325 if (retval != 1) 6326 break; 6327 6328 if (cpu_number > topo.max_cpu_num) { 6329 warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num); 6330 return 1; 6331 } 6332 6333 irq_column_2_cpu[column] = cpu_number; 6334 irqs_per_cpu[cpu_number] = 0; 6335 nmi_per_cpu[cpu_number] = 0; 6336 } 6337 6338 /* read /proc/interrupt count lines and sum up irqs per cpu */ 6339 while (1) { 6340 int column; 6341 char buf[64]; 6342 int this_row_is_nmi = 0; 6343 6344 retval = fscanf(fp, " %s:", buf); /* irq# "N:" */ 6345 if (retval != 1) 6346 break; 6347 6348 if (strncmp(buf, "NMI", strlen("NMI")) == 0) 6349 this_row_is_nmi = 1; 6350 6351 /* read the count per cpu */ 6352 for (column = 0; column < topo.num_cpus; ++column) { 6353 6354 int cpu_number, irq_count; 6355 6356 retval = fscanf(fp, " %d", &irq_count); 6357 6358 if (retval != 1) 6359 break; 6360 6361 cpu_number = irq_column_2_cpu[column]; 6362 irqs_per_cpu[cpu_number] += irq_count; 6363 if (this_row_is_nmi) 6364 nmi_per_cpu[cpu_number] += irq_count; 6365 } 6366 while (getc(fp) != '\n') ; /* flush interrupt description */ 6367 6368 } 6369 return 0; 6370 } 6371 6372 /* 6373 * snapshot_graphics() 6374 * 6375 * record snapshot of specified graphics sysfs knob 6376 * 6377 * return 1 if config change requires a restart, else return 0 6378 */ 6379 int snapshot_graphics(int idx) 6380 { 6381 int retval; 6382 6383 rewind(gfx_info[idx].fp); 6384 fflush(gfx_info[idx].fp); 6385 6386 switch (idx) { 6387 case GFX_rc6: 6388 case SAM_mc6: 6389 retval = fscanf(gfx_info[idx].fp, "%lld", &gfx_info[idx].val_ull); 6390 if (retval != 1) 6391 err(1, "rc6"); 6392 return 0; 6393 case GFX_MHz: 6394 case GFX_ACTMHz: 6395 case SAM_MHz: 6396 case SAM_ACTMHz: 6397 retval = fscanf(gfx_info[idx].fp, "%d", &gfx_info[idx].val); 6398 if (retval != 1) 6399 err(1, "MHz"); 6400 return 0; 6401 default: 6402 return -EINVAL; 6403 } 6404 } 6405 6406 /* 6407 * snapshot_cpu_lpi() 6408 * 6409 * record snapshot of 6410 * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us 6411 */ 6412 int snapshot_cpu_lpi_us(void) 6413 { 6414 FILE *fp; 6415 int retval; 6416 6417 fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r"); 6418 6419 retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us); 6420 if (retval != 1) { 6421 fprintf(stderr, "Disabling Low Power Idle CPU output\n"); 6422 BIC_NOT_PRESENT(BIC_CPU_LPI); 6423 fclose(fp); 6424 return -1; 6425 } 6426 6427 fclose(fp); 6428 6429 return 0; 6430 } 6431 6432 /* 6433 * snapshot_sys_lpi() 6434 * 6435 * record snapshot of sys_lpi_file 6436 */ 6437 int snapshot_sys_lpi_us(void) 6438 { 6439 FILE *fp; 6440 int retval; 6441 6442 fp = fopen_or_die(sys_lpi_file, "r"); 6443 6444 retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us); 6445 if (retval != 1) { 6446 fprintf(stderr, "Disabling Low Power Idle System output\n"); 6447 BIC_NOT_PRESENT(BIC_SYS_LPI); 6448 fclose(fp); 6449 return -1; 6450 } 6451 fclose(fp); 6452 6453 return 0; 6454 } 6455 6456 /* 6457 * snapshot /proc and /sys files 6458 * 6459 * return 1 if configuration restart needed, else return 0 6460 */ 6461 int snapshot_proc_sysfs_files(void) 6462 { 6463 gettimeofday(&procsysfs_tv_begin, (struct timezone *)NULL); 6464 6465 if (DO_BIC(BIC_IRQ) || DO_BIC(BIC_NMI)) 6466 if (snapshot_proc_interrupts()) 6467 return 1; 6468 6469 if (DO_BIC(BIC_GFX_rc6)) 6470 snapshot_graphics(GFX_rc6); 6471 6472 if (DO_BIC(BIC_GFXMHz)) 6473 snapshot_graphics(GFX_MHz); 6474 6475 if (DO_BIC(BIC_GFXACTMHz)) 6476 snapshot_graphics(GFX_ACTMHz); 6477 6478 if (DO_BIC(BIC_SAM_mc6)) 6479 snapshot_graphics(SAM_mc6); 6480 6481 if (DO_BIC(BIC_SAMMHz)) 6482 snapshot_graphics(SAM_MHz); 6483 6484 if (DO_BIC(BIC_SAMACTMHz)) 6485 snapshot_graphics(SAM_ACTMHz); 6486 6487 if (DO_BIC(BIC_CPU_LPI)) 6488 snapshot_cpu_lpi_us(); 6489 6490 if (DO_BIC(BIC_SYS_LPI)) 6491 snapshot_sys_lpi_us(); 6492 6493 return 0; 6494 } 6495 6496 int exit_requested; 6497 6498 static void signal_handler(int signal) 6499 { 6500 switch (signal) { 6501 case SIGINT: 6502 exit_requested = 1; 6503 if (debug) 6504 fprintf(stderr, " SIGINT\n"); 6505 break; 6506 case SIGUSR1: 6507 if (debug > 1) 6508 fprintf(stderr, "SIGUSR1\n"); 6509 break; 6510 } 6511 } 6512 6513 void setup_signal_handler(void) 6514 { 6515 struct sigaction sa; 6516 6517 memset(&sa, 0, sizeof(sa)); 6518 6519 sa.sa_handler = &signal_handler; 6520 6521 if (sigaction(SIGINT, &sa, NULL) < 0) 6522 err(1, "sigaction SIGINT"); 6523 if (sigaction(SIGUSR1, &sa, NULL) < 0) 6524 err(1, "sigaction SIGUSR1"); 6525 } 6526 6527 void do_sleep(void) 6528 { 6529 struct timeval tout; 6530 struct timespec rest; 6531 fd_set readfds; 6532 int retval; 6533 6534 FD_ZERO(&readfds); 6535 FD_SET(0, &readfds); 6536 6537 if (ignore_stdin) { 6538 nanosleep(&interval_ts, NULL); 6539 return; 6540 } 6541 6542 tout = interval_tv; 6543 retval = select(1, &readfds, NULL, NULL, &tout); 6544 6545 if (retval == 1) { 6546 switch (getc(stdin)) { 6547 case 'q': 6548 exit_requested = 1; 6549 break; 6550 case EOF: 6551 /* 6552 * 'stdin' is a pipe closed on the other end. There 6553 * won't be any further input. 6554 */ 6555 ignore_stdin = 1; 6556 /* Sleep the rest of the time */ 6557 rest.tv_sec = (tout.tv_sec + tout.tv_usec / 1000000); 6558 rest.tv_nsec = (tout.tv_usec % 1000000) * 1000; 6559 nanosleep(&rest, NULL); 6560 } 6561 } 6562 } 6563 6564 int get_msr_sum(int cpu, off_t offset, unsigned long long *msr) 6565 { 6566 int ret, idx; 6567 unsigned long long msr_cur, msr_last; 6568 6569 assert(!no_msr); 6570 6571 if (!per_cpu_msr_sum) 6572 return 1; 6573 6574 idx = offset_to_idx(offset); 6575 if (idx < 0) 6576 return idx; 6577 /* get_msr_sum() = sum + (get_msr() - last) */ 6578 ret = get_msr(cpu, offset, &msr_cur); 6579 if (ret) 6580 return ret; 6581 msr_last = per_cpu_msr_sum[cpu].entries[idx].last; 6582 DELTA_WRAP32(msr_cur, msr_last); 6583 *msr = msr_last + per_cpu_msr_sum[cpu].entries[idx].sum; 6584 6585 return 0; 6586 } 6587 6588 timer_t timerid; 6589 6590 /* Timer callback, update the sum of MSRs periodically. */ 6591 static int update_msr_sum(PER_THREAD_PARAMS) 6592 { 6593 int i, ret; 6594 int cpu = t->cpu_id; 6595 6596 UNUSED(c); 6597 UNUSED(p); 6598 6599 assert(!no_msr); 6600 6601 for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) { 6602 unsigned long long msr_cur, msr_last; 6603 off_t offset; 6604 6605 if (!idx_valid(i)) 6606 continue; 6607 offset = idx_to_offset(i); 6608 if (offset < 0) 6609 continue; 6610 ret = get_msr(cpu, offset, &msr_cur); 6611 if (ret) { 6612 fprintf(outf, "Can not update msr(0x%llx)\n", (unsigned long long)offset); 6613 continue; 6614 } 6615 6616 msr_last = per_cpu_msr_sum[cpu].entries[i].last; 6617 per_cpu_msr_sum[cpu].entries[i].last = msr_cur & 0xffffffff; 6618 6619 DELTA_WRAP32(msr_cur, msr_last); 6620 per_cpu_msr_sum[cpu].entries[i].sum += msr_last; 6621 } 6622 return 0; 6623 } 6624 6625 static void msr_record_handler(union sigval v) 6626 { 6627 UNUSED(v); 6628 6629 for_all_cpus(update_msr_sum, EVEN_COUNTERS); 6630 } 6631 6632 void msr_sum_record(void) 6633 { 6634 struct itimerspec its; 6635 struct sigevent sev; 6636 6637 per_cpu_msr_sum = calloc(topo.max_cpu_num + 1, sizeof(struct msr_sum_array)); 6638 if (!per_cpu_msr_sum) { 6639 fprintf(outf, "Can not allocate memory for long time MSR.\n"); 6640 return; 6641 } 6642 /* 6643 * Signal handler might be restricted, so use thread notifier instead. 6644 */ 6645 memset(&sev, 0, sizeof(struct sigevent)); 6646 sev.sigev_notify = SIGEV_THREAD; 6647 sev.sigev_notify_function = msr_record_handler; 6648 6649 sev.sigev_value.sival_ptr = &timerid; 6650 if (timer_create(CLOCK_REALTIME, &sev, &timerid) == -1) { 6651 fprintf(outf, "Can not create timer.\n"); 6652 goto release_msr; 6653 } 6654 6655 its.it_value.tv_sec = 0; 6656 its.it_value.tv_nsec = 1; 6657 /* 6658 * A wraparound time has been calculated early. 6659 * Some sources state that the peak power for a 6660 * microprocessor is usually 1.5 times the TDP rating, 6661 * use 2 * TDP for safety. 6662 */ 6663 its.it_interval.tv_sec = rapl_joule_counter_range / 2; 6664 its.it_interval.tv_nsec = 0; 6665 6666 if (timer_settime(timerid, 0, &its, NULL) == -1) { 6667 fprintf(outf, "Can not set timer.\n"); 6668 goto release_timer; 6669 } 6670 return; 6671 6672 release_timer: 6673 timer_delete(timerid); 6674 release_msr: 6675 free(per_cpu_msr_sum); 6676 } 6677 6678 /* 6679 * set_my_sched_priority(pri) 6680 * return previous priority on success 6681 * return value < -20 on failure 6682 */ 6683 int set_my_sched_priority(int priority) 6684 { 6685 int retval; 6686 int original_priority; 6687 6688 errno = 0; 6689 original_priority = getpriority(PRIO_PROCESS, 0); 6690 if (errno && (original_priority == -1)) 6691 return -21; 6692 6693 retval = setpriority(PRIO_PROCESS, 0, priority); 6694 if (retval) 6695 return -21; 6696 6697 errno = 0; 6698 retval = getpriority(PRIO_PROCESS, 0); 6699 if (retval != priority) 6700 return -21; 6701 6702 return original_priority; 6703 } 6704 6705 void turbostat_loop() 6706 { 6707 int retval; 6708 int restarted = 0; 6709 unsigned int done_iters = 0; 6710 6711 setup_signal_handler(); 6712 6713 /* 6714 * elevate own priority for interval mode 6715 * 6716 * ignore on error - we probably don't have permission to set it, but 6717 * it's not a big deal 6718 */ 6719 set_my_sched_priority(-20); 6720 6721 restart: 6722 restarted++; 6723 6724 snapshot_proc_sysfs_files(); 6725 retval = for_all_cpus(get_counters, EVEN_COUNTERS); 6726 first_counter_read = 0; 6727 if (retval < -1) { 6728 exit(retval); 6729 } else if (retval == -1) { 6730 if (restarted > 10) { 6731 exit(retval); 6732 } 6733 re_initialize(); 6734 goto restart; 6735 } 6736 restarted = 0; 6737 done_iters = 0; 6738 gettimeofday(&tv_even, (struct timezone *)NULL); 6739 6740 while (1) { 6741 if (for_all_proc_cpus(cpu_is_not_present)) { 6742 re_initialize(); 6743 goto restart; 6744 } 6745 if (update_effective_str(false)) { 6746 re_initialize(); 6747 goto restart; 6748 } 6749 do_sleep(); 6750 if (snapshot_proc_sysfs_files()) 6751 goto restart; 6752 retval = for_all_cpus(get_counters, ODD_COUNTERS); 6753 if (retval < -1) { 6754 exit(retval); 6755 } else if (retval == -1) { 6756 re_initialize(); 6757 goto restart; 6758 } 6759 gettimeofday(&tv_odd, (struct timezone *)NULL); 6760 timersub(&tv_odd, &tv_even, &tv_delta); 6761 if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) { 6762 re_initialize(); 6763 goto restart; 6764 } 6765 delta_platform(&platform_counters_odd, &platform_counters_even); 6766 compute_average(EVEN_COUNTERS); 6767 format_all_counters(EVEN_COUNTERS); 6768 flush_output_stdout(); 6769 if (exit_requested) 6770 break; 6771 if (num_iterations && ++done_iters >= num_iterations) 6772 break; 6773 do_sleep(); 6774 if (snapshot_proc_sysfs_files()) 6775 goto restart; 6776 retval = for_all_cpus(get_counters, EVEN_COUNTERS); 6777 if (retval < -1) { 6778 exit(retval); 6779 } else if (retval == -1) { 6780 re_initialize(); 6781 goto restart; 6782 } 6783 gettimeofday(&tv_even, (struct timezone *)NULL); 6784 timersub(&tv_even, &tv_odd, &tv_delta); 6785 if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) { 6786 re_initialize(); 6787 goto restart; 6788 } 6789 delta_platform(&platform_counters_even, &platform_counters_odd); 6790 compute_average(ODD_COUNTERS); 6791 format_all_counters(ODD_COUNTERS); 6792 flush_output_stdout(); 6793 if (exit_requested) 6794 break; 6795 if (num_iterations && ++done_iters >= num_iterations) 6796 break; 6797 } 6798 } 6799 6800 void check_dev_msr() 6801 { 6802 struct stat sb; 6803 char pathname[32]; 6804 6805 if (no_msr) 6806 return; 6807 #if defined(ANDROID) 6808 sprintf(pathname, "/dev/msr%d", base_cpu); 6809 #else 6810 sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); 6811 #endif 6812 if (stat(pathname, &sb)) 6813 if (system("/sbin/modprobe msr > /dev/null 2>&1")) 6814 no_msr = 1; 6815 } 6816 6817 /* 6818 * check for CAP_SYS_RAWIO 6819 * return 0 on success 6820 * return 1 on fail 6821 */ 6822 int check_for_cap_sys_rawio(void) 6823 { 6824 cap_t caps; 6825 cap_flag_value_t cap_flag_value; 6826 int ret = 0; 6827 6828 caps = cap_get_proc(); 6829 if (caps == NULL) { 6830 /* 6831 * CONFIG_MULTIUSER=n kernels have no cap_get_proc() 6832 * Allow them to continue and attempt to access MSRs 6833 */ 6834 if (errno == ENOSYS) 6835 return 0; 6836 6837 return 1; 6838 } 6839 6840 if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value)) { 6841 ret = 1; 6842 goto free_and_exit; 6843 } 6844 6845 if (cap_flag_value != CAP_SET) { 6846 ret = 1; 6847 goto free_and_exit; 6848 } 6849 6850 free_and_exit: 6851 if (cap_free(caps) == -1) 6852 err(-6, "cap_free\n"); 6853 6854 return ret; 6855 } 6856 6857 void check_msr_permission(void) 6858 { 6859 int failed = 0; 6860 char pathname[32]; 6861 6862 if (no_msr) 6863 return; 6864 6865 /* check for CAP_SYS_RAWIO */ 6866 failed += check_for_cap_sys_rawio(); 6867 6868 /* test file permissions */ 6869 #if defined(ANDROID) 6870 sprintf(pathname, "/dev/msr%d", base_cpu); 6871 #else 6872 sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); 6873 #endif 6874 if (euidaccess(pathname, R_OK)) { 6875 failed++; 6876 } 6877 6878 if (failed) { 6879 warnx("Failed to access %s. Some of the counters may not be available\n" 6880 "\tRun as root to enable them or use %s to disable the access explicitly", pathname, "--no-msr"); 6881 no_msr = 1; 6882 } 6883 } 6884 6885 void probe_bclk(void) 6886 { 6887 unsigned long long msr; 6888 unsigned int base_ratio; 6889 6890 if (!platform->has_nhm_msrs || no_msr) 6891 return; 6892 6893 if (platform->bclk_freq == BCLK_100MHZ) 6894 bclk = 100.00; 6895 else if (platform->bclk_freq == BCLK_133MHZ) 6896 bclk = 133.33; 6897 else if (platform->bclk_freq == BCLK_SLV) 6898 bclk = slm_bclk(); 6899 else 6900 return; 6901 6902 get_msr(base_cpu, MSR_PLATFORM_INFO, &msr); 6903 base_ratio = (msr >> 8) & 0xFF; 6904 6905 base_hz = base_ratio * bclk * 1000000; 6906 has_base_hz = 1; 6907 6908 if (platform->enable_tsc_tweak) 6909 tsc_tweak = base_hz / tsc_hz; 6910 } 6911 6912 static void remove_underbar(char *s) 6913 { 6914 char *to = s; 6915 6916 while (*s) { 6917 if (*s != '_') 6918 *to++ = *s; 6919 s++; 6920 } 6921 6922 *to = 0; 6923 } 6924 6925 static void dump_turbo_ratio_info(void) 6926 { 6927 if (!has_turbo) 6928 return; 6929 6930 if (!platform->has_nhm_msrs || no_msr) 6931 return; 6932 6933 if (platform->trl_msrs & TRL_LIMIT2) 6934 dump_turbo_ratio_limit2(); 6935 6936 if (platform->trl_msrs & TRL_LIMIT1) 6937 dump_turbo_ratio_limit1(); 6938 6939 if (platform->trl_msrs & TRL_BASE) { 6940 dump_turbo_ratio_limits(MSR_TURBO_RATIO_LIMIT); 6941 6942 if (is_hybrid) 6943 dump_turbo_ratio_limits(MSR_SECONDARY_TURBO_RATIO_LIMIT); 6944 } 6945 6946 if (platform->trl_msrs & TRL_ATOM) 6947 dump_atom_turbo_ratio_limits(); 6948 6949 if (platform->trl_msrs & TRL_KNL) 6950 dump_knl_turbo_ratio_limits(); 6951 6952 if (platform->has_config_tdp) 6953 dump_config_tdp(); 6954 } 6955 6956 static int read_sysfs_int(char *path) 6957 { 6958 FILE *input; 6959 int retval = -1; 6960 6961 input = fopen(path, "r"); 6962 if (input == NULL) { 6963 if (debug) 6964 fprintf(outf, "NSFOD %s\n", path); 6965 return (-1); 6966 } 6967 if (fscanf(input, "%d", &retval) != 1) 6968 err(1, "%s: failed to read int from file", path); 6969 fclose(input); 6970 6971 return (retval); 6972 } 6973 6974 static void dump_sysfs_file(char *path) 6975 { 6976 FILE *input; 6977 char cpuidle_buf[64]; 6978 6979 input = fopen(path, "r"); 6980 if (input == NULL) { 6981 if (debug) 6982 fprintf(outf, "NSFOD %s\n", path); 6983 return; 6984 } 6985 if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input)) 6986 err(1, "%s: failed to read file", path); 6987 fclose(input); 6988 6989 fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf); 6990 } 6991 6992 static void probe_intel_uncore_frequency_legacy(void) 6993 { 6994 int i, j; 6995 char path[256]; 6996 6997 for (i = 0; i < topo.num_packages; ++i) { 6998 for (j = 0; j <= topo.max_die_id; ++j) { 6999 int k, l; 7000 char path_base[128]; 7001 7002 sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i, 7003 j); 7004 7005 sprintf(path, "%s/current_freq_khz", path_base); 7006 if (access(path, R_OK)) 7007 continue; 7008 7009 BIC_PRESENT(BIC_UNCORE_MHZ); 7010 7011 if (quiet) 7012 return; 7013 7014 sprintf(path, "%s/min_freq_khz", path_base); 7015 k = read_sysfs_int(path); 7016 sprintf(path, "%s/max_freq_khz", path_base); 7017 l = read_sysfs_int(path); 7018 fprintf(outf, "Uncore Frequency package%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000); 7019 7020 sprintf(path, "%s/initial_min_freq_khz", path_base); 7021 k = read_sysfs_int(path); 7022 sprintf(path, "%s/initial_max_freq_khz", path_base); 7023 l = read_sysfs_int(path); 7024 fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000); 7025 7026 sprintf(path, "%s/current_freq_khz", path_base); 7027 k = read_sysfs_int(path); 7028 fprintf(outf, " %d MHz\n", k / 1000); 7029 } 7030 } 7031 } 7032 7033 static void probe_intel_uncore_frequency_cluster(void) 7034 { 7035 int i, uncore_max_id; 7036 char path[256]; 7037 char path_base[128]; 7038 7039 if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00/current_freq_khz", R_OK)) 7040 return; 7041 7042 for (uncore_max_id = 0;; ++uncore_max_id) { 7043 7044 sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", uncore_max_id); 7045 7046 /* uncore## start at 00 and skips no numbers, so stop upon first missing */ 7047 if (access(path_base, R_OK)) { 7048 uncore_max_id -= 1; 7049 break; 7050 } 7051 } 7052 for (i = uncore_max_id; i >= 0; --i) { 7053 int k, l; 7054 int package_id, domain_id, cluster_id; 7055 char name_buf[16]; 7056 7057 sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", i); 7058 7059 if (access(path_base, R_OK)) 7060 err(1, "%s: %s\n", __func__, path_base); 7061 7062 sprintf(path, "%s/package_id", path_base); 7063 package_id = read_sysfs_int(path); 7064 7065 sprintf(path, "%s/domain_id", path_base); 7066 domain_id = read_sysfs_int(path); 7067 7068 sprintf(path, "%s/fabric_cluster_id", path_base); 7069 cluster_id = read_sysfs_int(path); 7070 7071 sprintf(path, "%s/current_freq_khz", path_base); 7072 sprintf(name_buf, "UMHz%d.%d", domain_id, cluster_id); 7073 7074 /* 7075 * Once add_couter() is called, that counter is always read 7076 * and reported -- So it is effectively (enabled & present). 7077 * Only call add_counter() here if legacy BIC_UNCORE_MHZ (UncMHz) 7078 * is (enabled). Since we are in this routine, we 7079 * know we will not probe and set (present) the legacy counter. 7080 * 7081 * This allows "--show/--hide UncMHz" to be effective for 7082 * the clustered MHz counters, as a group. 7083 */ 7084 if BIC_IS_ENABLED 7085 (BIC_UNCORE_MHZ) 7086 add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, 7087 package_id); 7088 7089 if (quiet) 7090 continue; 7091 7092 sprintf(path, "%s/min_freq_khz", path_base); 7093 k = read_sysfs_int(path); 7094 sprintf(path, "%s/max_freq_khz", path_base); 7095 l = read_sysfs_int(path); 7096 fprintf(outf, "Uncore Frequency package%d domain%d cluster%d: %d - %d MHz ", package_id, domain_id, 7097 cluster_id, k / 1000, l / 1000); 7098 7099 sprintf(path, "%s/initial_min_freq_khz", path_base); 7100 k = read_sysfs_int(path); 7101 sprintf(path, "%s/initial_max_freq_khz", path_base); 7102 l = read_sysfs_int(path); 7103 fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000); 7104 7105 sprintf(path, "%s/current_freq_khz", path_base); 7106 k = read_sysfs_int(path); 7107 fprintf(outf, " %d MHz\n", k / 1000); 7108 } 7109 } 7110 7111 static void probe_intel_uncore_frequency(void) 7112 { 7113 if (!genuine_intel) 7114 return; 7115 7116 if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00", R_OK) == 0) 7117 probe_intel_uncore_frequency_cluster(); 7118 else 7119 probe_intel_uncore_frequency_legacy(); 7120 } 7121 7122 static void set_graphics_fp(char *path, int idx) 7123 { 7124 if (!access(path, R_OK)) 7125 gfx_info[idx].fp = fopen_or_die(path, "r"); 7126 } 7127 7128 /* Enlarge this if there are /sys/class/drm/card2 ... */ 7129 #define GFX_MAX_CARDS 2 7130 7131 static void probe_graphics(void) 7132 { 7133 char path[PATH_MAX]; 7134 int i; 7135 7136 /* Xe graphics sysfs knobs */ 7137 if (!access("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", R_OK)) { 7138 FILE *fp; 7139 char buf[8]; 7140 bool gt0_is_gt; 7141 7142 fp = fopen("/sys/class/drm/card0/device/tile0/gt0/gtidle/name", "r"); 7143 if (!fp) 7144 goto next; 7145 7146 if (!fread(buf, sizeof(char), 7, fp)) { 7147 fclose(fp); 7148 goto next; 7149 } 7150 fclose(fp); 7151 7152 if (!strncmp(buf, "gt0-rc", strlen("gt0-rc"))) 7153 gt0_is_gt = true; 7154 else if (!strncmp(buf, "gt0-mc", strlen("gt0-mc"))) 7155 gt0_is_gt = false; 7156 else 7157 goto next; 7158 7159 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", 7160 gt0_is_gt ? GFX_rc6 : SAM_mc6); 7161 7162 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq", gt0_is_gt ? GFX_MHz : SAM_MHz); 7163 7164 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq", 7165 gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz); 7166 7167 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", 7168 gt0_is_gt ? SAM_mc6 : GFX_rc6); 7169 7170 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq", gt0_is_gt ? SAM_MHz : GFX_MHz); 7171 7172 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq", 7173 gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz); 7174 7175 goto end; 7176 } 7177 7178 next: 7179 /* New i915 graphics sysfs knobs */ 7180 for (i = 0; i < GFX_MAX_CARDS; i++) { 7181 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rc6_residency_ms", i); 7182 if (!access(path, R_OK)) 7183 break; 7184 } 7185 7186 if (i == GFX_MAX_CARDS) 7187 goto legacy_i915; 7188 7189 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rc6_residency_ms", i); 7190 set_graphics_fp(path, GFX_rc6); 7191 7192 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rps_cur_freq_mhz", i); 7193 set_graphics_fp(path, GFX_MHz); 7194 7195 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rps_act_freq_mhz", i); 7196 set_graphics_fp(path, GFX_ACTMHz); 7197 7198 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt1/rc6_residency_ms", i); 7199 set_graphics_fp(path, SAM_mc6); 7200 7201 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt1/rps_cur_freq_mhz", i); 7202 set_graphics_fp(path, SAM_MHz); 7203 7204 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt1/rps_act_freq_mhz", i); 7205 set_graphics_fp(path, SAM_ACTMHz); 7206 7207 goto end; 7208 7209 legacy_i915: 7210 /* Fall back to traditional i915 graphics sysfs knobs */ 7211 set_graphics_fp("/sys/class/drm/card0/power/rc6_residency_ms", GFX_rc6); 7212 7213 set_graphics_fp("/sys/class/drm/card0/gt_cur_freq_mhz", GFX_MHz); 7214 if (!gfx_info[GFX_MHz].fp) 7215 set_graphics_fp("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", GFX_MHz); 7216 7217 set_graphics_fp("/sys/class/drm/card0/gt_act_freq_mhz", GFX_ACTMHz); 7218 if (!gfx_info[GFX_ACTMHz].fp) 7219 set_graphics_fp("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", GFX_ACTMHz); 7220 7221 end: 7222 if (gfx_info[GFX_rc6].fp) 7223 BIC_PRESENT(BIC_GFX_rc6); 7224 if (gfx_info[GFX_MHz].fp) 7225 BIC_PRESENT(BIC_GFXMHz); 7226 if (gfx_info[GFX_ACTMHz].fp) 7227 BIC_PRESENT(BIC_GFXACTMHz); 7228 if (gfx_info[SAM_mc6].fp) 7229 BIC_PRESENT(BIC_SAM_mc6); 7230 if (gfx_info[SAM_MHz].fp) 7231 BIC_PRESENT(BIC_SAMMHz); 7232 if (gfx_info[SAM_ACTMHz].fp) 7233 BIC_PRESENT(BIC_SAMACTMHz); 7234 } 7235 7236 static void dump_sysfs_cstate_config(void) 7237 { 7238 char path[64]; 7239 char name_buf[16]; 7240 char desc[64]; 7241 FILE *input; 7242 int state; 7243 char *sp; 7244 7245 if (access("/sys/devices/system/cpu/cpuidle", R_OK)) { 7246 fprintf(outf, "cpuidle not loaded\n"); 7247 return; 7248 } 7249 7250 dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_driver"); 7251 dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor"); 7252 dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor_ro"); 7253 7254 for (state = 0; state < 10; ++state) { 7255 7256 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); 7257 input = fopen(path, "r"); 7258 if (input == NULL) 7259 continue; 7260 if (!fgets(name_buf, sizeof(name_buf), input)) 7261 err(1, "%s: failed to read file", path); 7262 7263 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ 7264 sp = strchr(name_buf, '-'); 7265 if (!sp) 7266 sp = strchrnul(name_buf, '\n'); 7267 *sp = '\0'; 7268 fclose(input); 7269 7270 remove_underbar(name_buf); 7271 7272 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc", base_cpu, state); 7273 input = fopen(path, "r"); 7274 if (input == NULL) 7275 continue; 7276 if (!fgets(desc, sizeof(desc), input)) 7277 err(1, "%s: failed to read file", path); 7278 7279 fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc); 7280 fclose(input); 7281 } 7282 } 7283 7284 static void dump_sysfs_pstate_config(void) 7285 { 7286 char path[64]; 7287 char driver_buf[64]; 7288 char governor_buf[64]; 7289 FILE *input; 7290 int turbo; 7291 7292 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver", base_cpu); 7293 input = fopen(path, "r"); 7294 if (input == NULL) { 7295 fprintf(outf, "NSFOD %s\n", path); 7296 return; 7297 } 7298 if (!fgets(driver_buf, sizeof(driver_buf), input)) 7299 err(1, "%s: failed to read file", path); 7300 fclose(input); 7301 7302 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", base_cpu); 7303 input = fopen(path, "r"); 7304 if (input == NULL) { 7305 fprintf(outf, "NSFOD %s\n", path); 7306 return; 7307 } 7308 if (!fgets(governor_buf, sizeof(governor_buf), input)) 7309 err(1, "%s: failed to read file", path); 7310 fclose(input); 7311 7312 fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf); 7313 fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf); 7314 7315 sprintf(path, "/sys/devices/system/cpu/cpufreq/boost"); 7316 input = fopen(path, "r"); 7317 if (input != NULL) { 7318 if (fscanf(input, "%d", &turbo) != 1) 7319 err(1, "%s: failed to parse number from file", path); 7320 fprintf(outf, "cpufreq boost: %d\n", turbo); 7321 fclose(input); 7322 } 7323 7324 sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo"); 7325 input = fopen(path, "r"); 7326 if (input != NULL) { 7327 if (fscanf(input, "%d", &turbo) != 1) 7328 err(1, "%s: failed to parse number from file", path); 7329 fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo); 7330 fclose(input); 7331 } 7332 } 7333 7334 /* 7335 * print_epb() 7336 * Decode the ENERGY_PERF_BIAS MSR 7337 */ 7338 int print_epb(PER_THREAD_PARAMS) 7339 { 7340 char *epb_string; 7341 int cpu, epb; 7342 7343 UNUSED(c); 7344 UNUSED(p); 7345 7346 if (!has_epb) 7347 return 0; 7348 7349 cpu = t->cpu_id; 7350 7351 /* EPB is per-package */ 7352 if (!is_cpu_first_thread_in_package(t, c, p)) 7353 return 0; 7354 7355 if (cpu_migrate(cpu)) { 7356 fprintf(outf, "print_epb: Could not migrate to CPU %d\n", cpu); 7357 return -1; 7358 } 7359 7360 epb = get_epb(cpu); 7361 if (epb < 0) 7362 return 0; 7363 7364 switch (epb) { 7365 case ENERGY_PERF_BIAS_PERFORMANCE: 7366 epb_string = "performance"; 7367 break; 7368 case ENERGY_PERF_BIAS_NORMAL: 7369 epb_string = "balanced"; 7370 break; 7371 case ENERGY_PERF_BIAS_POWERSAVE: 7372 epb_string = "powersave"; 7373 break; 7374 default: 7375 epb_string = "custom"; 7376 break; 7377 } 7378 fprintf(outf, "cpu%d: EPB: %d (%s)\n", cpu, epb, epb_string); 7379 7380 return 0; 7381 } 7382 7383 /* 7384 * print_hwp() 7385 * Decode the MSR_HWP_CAPABILITIES 7386 */ 7387 int print_hwp(PER_THREAD_PARAMS) 7388 { 7389 unsigned long long msr; 7390 int cpu; 7391 7392 UNUSED(c); 7393 UNUSED(p); 7394 7395 if (no_msr) 7396 return 0; 7397 7398 if (!has_hwp) 7399 return 0; 7400 7401 cpu = t->cpu_id; 7402 7403 /* MSR_HWP_CAPABILITIES is per-package */ 7404 if (!is_cpu_first_thread_in_package(t, c, p)) 7405 return 0; 7406 7407 if (cpu_migrate(cpu)) { 7408 fprintf(outf, "print_hwp: Could not migrate to CPU %d\n", cpu); 7409 return -1; 7410 } 7411 7412 if (get_msr(cpu, MSR_PM_ENABLE, &msr)) 7413 return 0; 7414 7415 fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n", cpu, msr, (msr & (1 << 0)) ? "" : "No-"); 7416 7417 /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */ 7418 if ((msr & (1 << 0)) == 0) 7419 return 0; 7420 7421 if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr)) 7422 return 0; 7423 7424 fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx " 7425 "(high %d guar %d eff %d low %d)\n", 7426 cpu, msr, 7427 (unsigned int)HWP_HIGHEST_PERF(msr), 7428 (unsigned int)HWP_GUARANTEED_PERF(msr), 7429 (unsigned int)HWP_MOSTEFFICIENT_PERF(msr), (unsigned int)HWP_LOWEST_PERF(msr)); 7430 7431 if (get_msr(cpu, MSR_HWP_REQUEST, &msr)) 7432 return 0; 7433 7434 fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx " 7435 "(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n", 7436 cpu, msr, 7437 (unsigned int)(((msr) >> 0) & 0xff), 7438 (unsigned int)(((msr) >> 8) & 0xff), 7439 (unsigned int)(((msr) >> 16) & 0xff), 7440 (unsigned int)(((msr) >> 24) & 0xff), 7441 (unsigned int)(((msr) >> 32) & 0xff3), (unsigned int)(((msr) >> 42) & 0x1)); 7442 7443 if (has_hwp_pkg) { 7444 if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr)) 7445 return 0; 7446 7447 fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx " 7448 "(min %d max %d des %d epp 0x%x window 0x%x)\n", 7449 cpu, msr, 7450 (unsigned int)(((msr) >> 0) & 0xff), 7451 (unsigned int)(((msr) >> 8) & 0xff), 7452 (unsigned int)(((msr) >> 16) & 0xff), 7453 (unsigned int)(((msr) >> 24) & 0xff), (unsigned int)(((msr) >> 32) & 0xff3)); 7454 } 7455 if (has_hwp_notify) { 7456 if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr)) 7457 return 0; 7458 7459 fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx " 7460 "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n", 7461 cpu, msr, ((msr) & 0x1) ? "EN" : "Dis", ((msr) & 0x2) ? "EN" : "Dis"); 7462 } 7463 if (get_msr(cpu, MSR_HWP_STATUS, &msr)) 7464 return 0; 7465 7466 fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx " 7467 "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n", 7468 cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x4) ? "" : "No-"); 7469 7470 return 0; 7471 } 7472 7473 /* 7474 * print_perf_limit() 7475 */ 7476 int print_perf_limit(PER_THREAD_PARAMS) 7477 { 7478 unsigned long long msr; 7479 int cpu; 7480 7481 UNUSED(c); 7482 UNUSED(p); 7483 7484 if (no_msr) 7485 return 0; 7486 7487 cpu = t->cpu_id; 7488 7489 /* per-package */ 7490 if (!is_cpu_first_thread_in_package(t, c, p)) 7491 return 0; 7492 7493 if (cpu_migrate(cpu)) { 7494 fprintf(outf, "print_perf_limit: Could not migrate to CPU %d\n", cpu); 7495 return -1; 7496 } 7497 7498 if (platform->plr_msrs & PLR_CORE) { 7499 get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr); 7500 fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); 7501 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)", 7502 (msr & 1 << 15) ? "bit15, " : "", 7503 (msr & 1 << 14) ? "bit14, " : "", 7504 (msr & 1 << 13) ? "Transitions, " : "", 7505 (msr & 1 << 12) ? "MultiCoreTurbo, " : "", 7506 (msr & 1 << 11) ? "PkgPwrL2, " : "", 7507 (msr & 1 << 10) ? "PkgPwrL1, " : "", 7508 (msr & 1 << 9) ? "CorePwr, " : "", 7509 (msr & 1 << 8) ? "Amps, " : "", 7510 (msr & 1 << 6) ? "VR-Therm, " : "", 7511 (msr & 1 << 5) ? "Auto-HWP, " : "", 7512 (msr & 1 << 4) ? "Graphics, " : "", 7513 (msr & 1 << 2) ? "bit2, " : "", 7514 (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 0) ? "PROCHOT, " : ""); 7515 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", 7516 (msr & 1 << 31) ? "bit31, " : "", 7517 (msr & 1 << 30) ? "bit30, " : "", 7518 (msr & 1 << 29) ? "Transitions, " : "", 7519 (msr & 1 << 28) ? "MultiCoreTurbo, " : "", 7520 (msr & 1 << 27) ? "PkgPwrL2, " : "", 7521 (msr & 1 << 26) ? "PkgPwrL1, " : "", 7522 (msr & 1 << 25) ? "CorePwr, " : "", 7523 (msr & 1 << 24) ? "Amps, " : "", 7524 (msr & 1 << 22) ? "VR-Therm, " : "", 7525 (msr & 1 << 21) ? "Auto-HWP, " : "", 7526 (msr & 1 << 20) ? "Graphics, " : "", 7527 (msr & 1 << 18) ? "bit18, " : "", 7528 (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : ""); 7529 7530 } 7531 if (platform->plr_msrs & PLR_GFX) { 7532 get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr); 7533 fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); 7534 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)", 7535 (msr & 1 << 0) ? "PROCHOT, " : "", 7536 (msr & 1 << 1) ? "ThermStatus, " : "", 7537 (msr & 1 << 4) ? "Graphics, " : "", 7538 (msr & 1 << 6) ? "VR-Therm, " : "", 7539 (msr & 1 << 8) ? "Amps, " : "", 7540 (msr & 1 << 9) ? "GFXPwr, " : "", 7541 (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); 7542 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n", 7543 (msr & 1 << 16) ? "PROCHOT, " : "", 7544 (msr & 1 << 17) ? "ThermStatus, " : "", 7545 (msr & 1 << 20) ? "Graphics, " : "", 7546 (msr & 1 << 22) ? "VR-Therm, " : "", 7547 (msr & 1 << 24) ? "Amps, " : "", 7548 (msr & 1 << 25) ? "GFXPwr, " : "", 7549 (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); 7550 } 7551 if (platform->plr_msrs & PLR_RING) { 7552 get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr); 7553 fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); 7554 fprintf(outf, " (Active: %s%s%s%s%s%s)", 7555 (msr & 1 << 0) ? "PROCHOT, " : "", 7556 (msr & 1 << 1) ? "ThermStatus, " : "", 7557 (msr & 1 << 6) ? "VR-Therm, " : "", 7558 (msr & 1 << 8) ? "Amps, " : "", 7559 (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); 7560 fprintf(outf, " (Logged: %s%s%s%s%s%s)\n", 7561 (msr & 1 << 16) ? "PROCHOT, " : "", 7562 (msr & 1 << 17) ? "ThermStatus, " : "", 7563 (msr & 1 << 22) ? "VR-Therm, " : "", 7564 (msr & 1 << 24) ? "Amps, " : "", 7565 (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); 7566 } 7567 return 0; 7568 } 7569 7570 #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ 7571 #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ 7572 7573 double get_quirk_tdp(void) 7574 { 7575 if (platform->rapl_quirk_tdp) 7576 return platform->rapl_quirk_tdp; 7577 7578 return 135.0; 7579 } 7580 7581 double get_tdp_intel(void) 7582 { 7583 unsigned long long msr; 7584 7585 if (platform->rapl_msrs & RAPL_PKG_POWER_INFO) 7586 if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr)) 7587 return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; 7588 return get_quirk_tdp(); 7589 } 7590 7591 double get_tdp_amd(void) 7592 { 7593 return get_quirk_tdp(); 7594 } 7595 7596 void rapl_probe_intel(void) 7597 { 7598 unsigned long long msr; 7599 unsigned int time_unit; 7600 double tdp; 7601 7602 if (rapl_joules) { 7603 CLR_BIC(BIC_SysWatt, &bic_enabled); 7604 CLR_BIC(BIC_PkgWatt, &bic_enabled); 7605 CLR_BIC(BIC_CorWatt, &bic_enabled); 7606 CLR_BIC(BIC_RAMWatt, &bic_enabled); 7607 CLR_BIC(BIC_GFXWatt, &bic_enabled); 7608 } else { 7609 CLR_BIC(BIC_Sys_J, &bic_enabled); 7610 CLR_BIC(BIC_Pkg_J, &bic_enabled); 7611 CLR_BIC(BIC_Cor_J, &bic_enabled); 7612 CLR_BIC(BIC_RAM_J, &bic_enabled); 7613 CLR_BIC(BIC_GFX_J, &bic_enabled); 7614 } 7615 7616 if (!platform->rapl_msrs || no_msr) 7617 return; 7618 7619 if (!(platform->rapl_msrs & RAPL_PKG_PERF_STATUS)) 7620 CLR_BIC(BIC_PKG__, &bic_enabled); 7621 if (!(platform->rapl_msrs & RAPL_DRAM_PERF_STATUS)) 7622 CLR_BIC(BIC_RAM__, &bic_enabled); 7623 7624 /* units on package 0, verify later other packages match */ 7625 if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr)) 7626 return; 7627 7628 rapl_power_units = 1.0 / (1 << (msr & 0xF)); 7629 if (platform->has_rapl_divisor) 7630 rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000; 7631 else 7632 rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); 7633 7634 if (platform->has_fixed_rapl_unit) 7635 rapl_dram_energy_units = (15.3 / 1000000); 7636 else 7637 rapl_dram_energy_units = rapl_energy_units; 7638 7639 if (platform->has_fixed_rapl_psys_unit) 7640 rapl_psys_energy_units = 1.0; 7641 else 7642 rapl_psys_energy_units = rapl_energy_units; 7643 7644 time_unit = msr >> 16 & 0xF; 7645 if (time_unit == 0) 7646 time_unit = 0xA; 7647 7648 rapl_time_units = 1.0 / (1 << (time_unit)); 7649 7650 tdp = get_tdp_intel(); 7651 7652 rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; 7653 if (!quiet) 7654 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); 7655 } 7656 7657 void rapl_probe_amd(void) 7658 { 7659 unsigned long long msr; 7660 double tdp; 7661 7662 if (rapl_joules) { 7663 CLR_BIC(BIC_SysWatt, &bic_enabled); 7664 CLR_BIC(BIC_CorWatt, &bic_enabled); 7665 } else { 7666 CLR_BIC(BIC_Pkg_J, &bic_enabled); 7667 CLR_BIC(BIC_Cor_J, &bic_enabled); 7668 } 7669 7670 if (!platform->rapl_msrs || no_msr) 7671 return; 7672 7673 if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr)) 7674 return; 7675 7676 rapl_time_units = ldexp(1.0, -(msr >> 16 & 0xf)); 7677 rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f)); 7678 rapl_power_units = ldexp(1.0, -(msr & 0xf)); 7679 7680 tdp = get_tdp_amd(); 7681 7682 rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; 7683 if (!quiet) 7684 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); 7685 } 7686 7687 void print_power_limit_msr(int cpu, unsigned long long msr, char *label) 7688 { 7689 fprintf(outf, "cpu%d: %s: %sabled (%0.3f Watts, %f sec, clamp %sabled)\n", 7690 cpu, label, 7691 ((msr >> 15) & 1) ? "EN" : "DIS", 7692 ((msr >> 0) & 0x7FFF) * rapl_power_units, 7693 (1.0 + (((msr >> 22) & 0x3) / 4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units, 7694 (((msr >> 16) & 1) ? "EN" : "DIS")); 7695 7696 return; 7697 } 7698 7699 static int fread_int(char *path, int *val) 7700 { 7701 FILE *filep; 7702 int ret; 7703 7704 filep = fopen(path, "r"); 7705 if (!filep) 7706 return -1; 7707 7708 ret = fscanf(filep, "%d", val); 7709 fclose(filep); 7710 return ret; 7711 } 7712 7713 static int fread_ull(char *path, unsigned long long *val) 7714 { 7715 FILE *filep; 7716 int ret; 7717 7718 filep = fopen(path, "r"); 7719 if (!filep) 7720 return -1; 7721 7722 ret = fscanf(filep, "%llu", val); 7723 fclose(filep); 7724 return ret; 7725 } 7726 7727 static int fread_str(char *path, char *buf, int size) 7728 { 7729 FILE *filep; 7730 int ret; 7731 char *cp; 7732 7733 filep = fopen(path, "r"); 7734 if (!filep) 7735 return -1; 7736 7737 ret = fread(buf, 1, size, filep); 7738 fclose(filep); 7739 7740 /* replace '\n' with '\0' */ 7741 cp = strchr(buf, '\n'); 7742 if (cp != NULL) 7743 *cp = '\0'; 7744 7745 return ret; 7746 } 7747 7748 #define PATH_RAPL_SYSFS "/sys/class/powercap" 7749 7750 static int dump_one_domain(char *domain_path) 7751 { 7752 char path[PATH_MAX]; 7753 char str[PATH_MAX]; 7754 unsigned long long val; 7755 int constraint; 7756 int enable; 7757 int ret; 7758 7759 snprintf(path, PATH_MAX, "%s/name", domain_path); 7760 ret = fread_str(path, str, PATH_MAX); 7761 if (ret <= 0) 7762 return -1; 7763 7764 fprintf(outf, "%s: %s", domain_path + strlen(PATH_RAPL_SYSFS) + 1, str); 7765 7766 snprintf(path, PATH_MAX, "%s/enabled", domain_path); 7767 ret = fread_int(path, &enable); 7768 if (ret <= 0) 7769 return -1; 7770 7771 if (!enable) { 7772 fputs(" disabled\n", outf); 7773 return 0; 7774 } 7775 7776 for (constraint = 0;; constraint++) { 7777 snprintf(path, PATH_MAX, "%s/constraint_%d_time_window_us", domain_path, constraint); 7778 ret = fread_ull(path, &val); 7779 if (ret <= 0) 7780 break; 7781 7782 if (val > 1000000) 7783 fprintf(outf, " %0.1fs", (double)val / 1000000); 7784 else if (val > 1000) 7785 fprintf(outf, " %0.1fms", (double)val / 1000); 7786 else 7787 fprintf(outf, " %0.1fus", (double)val); 7788 7789 snprintf(path, PATH_MAX, "%s/constraint_%d_power_limit_uw", domain_path, constraint); 7790 ret = fread_ull(path, &val); 7791 if (ret > 0 && val) 7792 fprintf(outf, ":%lluW", val / 1000000); 7793 7794 snprintf(path, PATH_MAX, "%s/constraint_%d_max_power_uw", domain_path, constraint); 7795 ret = fread_ull(path, &val); 7796 if (ret > 0 && val) 7797 fprintf(outf, ",max:%lluW", val / 1000000); 7798 } 7799 fputc('\n', outf); 7800 7801 return 0; 7802 } 7803 7804 static int print_rapl_sysfs(void) 7805 { 7806 DIR *dir, *cdir; 7807 struct dirent *entry, *centry; 7808 char path[PATH_MAX]; 7809 char str[PATH_MAX]; 7810 7811 if ((dir = opendir(PATH_RAPL_SYSFS)) == NULL) { 7812 warn("open %s failed", PATH_RAPL_SYSFS); 7813 return 1; 7814 } 7815 7816 while ((entry = readdir(dir)) != NULL) { 7817 if (strlen(entry->d_name) > 100) 7818 continue; 7819 7820 if (strncmp(entry->d_name, "intel-rapl", strlen("intel-rapl"))) 7821 continue; 7822 7823 snprintf(path, PATH_MAX, "%s/%s/name", PATH_RAPL_SYSFS, entry->d_name); 7824 7825 /* Parse top level domains first, including package and psys */ 7826 fread_str(path, str, PATH_MAX); 7827 if (strncmp(str, "package", strlen("package")) && strncmp(str, "psys", strlen("psys"))) 7828 continue; 7829 7830 snprintf(path, PATH_MAX, "%s/%s", PATH_RAPL_SYSFS, entry->d_name); 7831 if ((cdir = opendir(path)) == NULL) { 7832 perror("opendir() error"); 7833 return 1; 7834 } 7835 7836 dump_one_domain(path); 7837 7838 while ((centry = readdir(cdir)) != NULL) { 7839 if (strncmp(centry->d_name, "intel-rapl", strlen("intel-rapl"))) 7840 continue; 7841 snprintf(path, PATH_MAX, "%s/%s/%s", PATH_RAPL_SYSFS, entry->d_name, centry->d_name); 7842 dump_one_domain(path); 7843 } 7844 closedir(cdir); 7845 } 7846 7847 closedir(dir); 7848 return 0; 7849 } 7850 7851 int print_rapl(PER_THREAD_PARAMS) 7852 { 7853 unsigned long long msr; 7854 const char *msr_name; 7855 int cpu; 7856 7857 UNUSED(c); 7858 UNUSED(p); 7859 7860 if (!platform->rapl_msrs) 7861 return 0; 7862 7863 /* RAPL counters are per package, so print only for 1st thread/package */ 7864 if (!is_cpu_first_thread_in_package(t, c, p)) 7865 return 0; 7866 7867 cpu = t->cpu_id; 7868 if (cpu_migrate(cpu)) { 7869 fprintf(outf, "print_rapl: Could not migrate to CPU %d\n", cpu); 7870 return -1; 7871 } 7872 7873 if (platform->rapl_msrs & RAPL_AMD_F17H) { 7874 msr_name = "MSR_RAPL_PWR_UNIT"; 7875 if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr)) 7876 return -1; 7877 } else { 7878 msr_name = "MSR_RAPL_POWER_UNIT"; 7879 if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr)) 7880 return -1; 7881 } 7882 7883 fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr, 7884 rapl_power_units, rapl_energy_units, rapl_time_units); 7885 7886 if (platform->rapl_msrs & RAPL_PKG_POWER_INFO) { 7887 7888 if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr)) 7889 return -5; 7890 7891 fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", 7892 cpu, msr, 7893 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, 7894 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, 7895 ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, 7896 ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); 7897 7898 } 7899 if (platform->rapl_msrs & RAPL_PKG) { 7900 7901 if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr)) 7902 return -9; 7903 7904 fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", 7905 cpu, msr, (msr >> 63) & 1 ? "" : "UN"); 7906 7907 print_power_limit_msr(cpu, msr, "PKG Limit #1"); 7908 fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%0.3f Watts, %f* sec, clamp %sabled)\n", 7909 cpu, 7910 ((msr >> 47) & 1) ? "EN" : "DIS", 7911 ((msr >> 32) & 0x7FFF) * rapl_power_units, 7912 (1.0 + (((msr >> 54) & 0x3) / 4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units, 7913 ((msr >> 48) & 1) ? "EN" : "DIS"); 7914 7915 if (get_msr(cpu, MSR_VR_CURRENT_CONFIG, &msr)) 7916 return -9; 7917 7918 fprintf(outf, "cpu%d: MSR_VR_CURRENT_CONFIG: 0x%08llx\n", cpu, msr); 7919 fprintf(outf, "cpu%d: PKG Limit #4: %f Watts (%slocked)\n", 7920 cpu, ((msr >> 0) & 0x1FFF) * rapl_power_units, (msr >> 31) & 1 ? "" : "UN"); 7921 } 7922 7923 if (platform->rapl_msrs & RAPL_DRAM_POWER_INFO) { 7924 if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) 7925 return -6; 7926 7927 fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", 7928 cpu, msr, 7929 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, 7930 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, 7931 ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, 7932 ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); 7933 } 7934 if (platform->rapl_msrs & RAPL_DRAM) { 7935 if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) 7936 return -9; 7937 fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", 7938 cpu, msr, (msr >> 31) & 1 ? "" : "UN"); 7939 7940 print_power_limit_msr(cpu, msr, "DRAM Limit"); 7941 } 7942 if (platform->rapl_msrs & RAPL_CORE_POLICY) { 7943 if (get_msr(cpu, MSR_PP0_POLICY, &msr)) 7944 return -7; 7945 7946 fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); 7947 } 7948 if (platform->rapl_msrs & RAPL_CORE_POWER_LIMIT) { 7949 if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) 7950 return -9; 7951 fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", 7952 cpu, msr, (msr >> 31) & 1 ? "" : "UN"); 7953 print_power_limit_msr(cpu, msr, "Cores Limit"); 7954 } 7955 if (platform->rapl_msrs & RAPL_GFX) { 7956 if (get_msr(cpu, MSR_PP1_POLICY, &msr)) 7957 return -8; 7958 7959 fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF); 7960 7961 if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr)) 7962 return -9; 7963 fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", 7964 cpu, msr, (msr >> 31) & 1 ? "" : "UN"); 7965 print_power_limit_msr(cpu, msr, "GFX Limit"); 7966 } 7967 return 0; 7968 } 7969 7970 /* 7971 * probe_rapl() 7972 * 7973 * sets rapl_power_units, rapl_energy_units, rapl_time_units 7974 */ 7975 void probe_rapl(void) 7976 { 7977 if (genuine_intel) 7978 rapl_probe_intel(); 7979 if (authentic_amd || hygon_genuine) 7980 rapl_probe_amd(); 7981 7982 if (quiet) 7983 return; 7984 7985 print_rapl_sysfs(); 7986 7987 if (!platform->rapl_msrs || no_msr) 7988 return; 7989 7990 for_all_cpus(print_rapl, ODD_COUNTERS); 7991 } 7992 7993 /* 7994 * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where 7995 * the Thermal Control Circuit (TCC) activates. 7996 * This is usually equal to tjMax. 7997 * 7998 * Older processors do not have this MSR, so there we guess, 7999 * but also allow cmdline over-ride with -T. 8000 * 8001 * Several MSR temperature values are in units of degrees-C 8002 * below this value, including the Digital Thermal Sensor (DTS), 8003 * Package Thermal Management Sensor (PTM), and thermal event thresholds. 8004 */ 8005 int set_temperature_target(PER_THREAD_PARAMS) 8006 { 8007 unsigned long long msr; 8008 unsigned int tcc_default, tcc_offset; 8009 int cpu; 8010 8011 UNUSED(c); 8012 UNUSED(p); 8013 8014 /* tj_max is used only for dts or ptm */ 8015 if (!(do_dts || do_ptm)) 8016 return 0; 8017 8018 /* this is a per-package concept */ 8019 if (!is_cpu_first_thread_in_package(t, c, p)) 8020 return 0; 8021 8022 cpu = t->cpu_id; 8023 if (cpu_migrate(cpu)) { 8024 fprintf(outf, "Could not migrate to CPU %d\n", cpu); 8025 return -1; 8026 } 8027 8028 if (tj_max_override != 0) { 8029 tj_max = tj_max_override; 8030 fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n", cpu, tj_max); 8031 return 0; 8032 } 8033 8034 /* Temperature Target MSR is Nehalem and newer only */ 8035 if (!platform->has_nhm_msrs || no_msr) 8036 goto guess; 8037 8038 if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr)) 8039 goto guess; 8040 8041 tcc_default = (msr >> 16) & 0xFF; 8042 8043 if (!quiet) { 8044 int bits = platform->tcc_offset_bits; 8045 unsigned long long enabled = 0; 8046 8047 if (bits && !get_msr(base_cpu, MSR_PLATFORM_INFO, &enabled)) 8048 enabled = (enabled >> 30) & 1; 8049 8050 if (bits && enabled) { 8051 tcc_offset = (msr >> 24) & GENMASK(bits - 1, 0); 8052 fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n", 8053 cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset); 8054 } else { 8055 fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, tcc_default); 8056 } 8057 } 8058 8059 if (!tcc_default) 8060 goto guess; 8061 8062 tj_max = tcc_default; 8063 8064 return 0; 8065 8066 guess: 8067 tj_max = TJMAX_DEFAULT; 8068 fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", cpu, tj_max); 8069 8070 return 0; 8071 } 8072 8073 int print_thermal(PER_THREAD_PARAMS) 8074 { 8075 unsigned long long msr; 8076 unsigned int dts, dts2; 8077 int cpu; 8078 8079 UNUSED(c); 8080 UNUSED(p); 8081 8082 if (no_msr) 8083 return 0; 8084 8085 if (!(do_dts || do_ptm)) 8086 return 0; 8087 8088 cpu = t->cpu_id; 8089 8090 /* DTS is per-core, no need to print for each thread */ 8091 if (!is_cpu_first_thread_in_core(t, c, p)) 8092 return 0; 8093 8094 if (cpu_migrate(cpu)) { 8095 fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu); 8096 return -1; 8097 } 8098 8099 if (do_ptm && is_cpu_first_core_in_package(t, c, p)) { 8100 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) 8101 return 0; 8102 8103 dts = (msr >> 16) & 0x7F; 8104 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts); 8105 8106 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr)) 8107 return 0; 8108 8109 dts = (msr >> 16) & 0x7F; 8110 dts2 = (msr >> 8) & 0x7F; 8111 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", 8112 cpu, msr, tj_max - dts, tj_max - dts2); 8113 } 8114 8115 if (do_dts && debug) { 8116 unsigned int resolution; 8117 8118 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) 8119 return 0; 8120 8121 dts = (msr >> 16) & 0x7F; 8122 resolution = (msr >> 27) & 0xF; 8123 fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", 8124 cpu, msr, tj_max - dts, resolution); 8125 8126 if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr)) 8127 return 0; 8128 8129 dts = (msr >> 16) & 0x7F; 8130 dts2 = (msr >> 8) & 0x7F; 8131 fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", 8132 cpu, msr, tj_max - dts, tj_max - dts2); 8133 } 8134 8135 return 0; 8136 } 8137 8138 void probe_thermal(void) 8139 { 8140 if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK)) 8141 BIC_PRESENT(BIC_CORE_THROT_CNT); 8142 else 8143 BIC_NOT_PRESENT(BIC_CORE_THROT_CNT); 8144 8145 for_all_cpus(set_temperature_target, ODD_COUNTERS); 8146 8147 if (quiet) 8148 return; 8149 8150 for_all_cpus(print_thermal, ODD_COUNTERS); 8151 } 8152 8153 int get_cpu_type(PER_THREAD_PARAMS) 8154 { 8155 unsigned int eax, ebx, ecx, edx; 8156 8157 UNUSED(c); 8158 UNUSED(p); 8159 8160 if (!genuine_intel) 8161 return 0; 8162 8163 if (cpu_migrate(t->cpu_id)) { 8164 fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id); 8165 return -1; 8166 } 8167 8168 if (max_level < 0x1a) 8169 return 0; 8170 8171 __cpuid(0x1a, eax, ebx, ecx, edx); 8172 eax = (eax >> 24) & 0xFF; 8173 if (eax == 0x20) 8174 t->is_atom = true; 8175 return 0; 8176 } 8177 8178 void decode_feature_control_msr(void) 8179 { 8180 unsigned long long msr; 8181 8182 if (no_msr) 8183 return; 8184 8185 if (!get_msr(base_cpu, MSR_IA32_FEAT_CTL, &msr)) 8186 fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n", 8187 base_cpu, msr, msr & FEAT_CTL_LOCKED ? "" : "UN-", msr & (1 << 18) ? "SGX" : ""); 8188 } 8189 8190 void decode_misc_enable_msr(void) 8191 { 8192 unsigned long long msr; 8193 8194 if (no_msr) 8195 return; 8196 8197 if (!genuine_intel) 8198 return; 8199 8200 if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr)) 8201 fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n", 8202 base_cpu, msr, 8203 msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-", 8204 msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-", 8205 msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-", 8206 msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "", 8207 msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : ""); 8208 } 8209 8210 void decode_misc_feature_control(void) 8211 { 8212 unsigned long long msr; 8213 8214 if (no_msr) 8215 return; 8216 8217 if (!platform->has_msr_misc_feature_control) 8218 return; 8219 8220 if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr)) 8221 fprintf(outf, 8222 "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n", 8223 base_cpu, msr, msr & (0 << 0) ? "No-" : "", msr & (1 << 0) ? "No-" : "", 8224 msr & (2 << 0) ? "No-" : "", msr & (3 << 0) ? "No-" : ""); 8225 } 8226 8227 /* 8228 * Decode MSR_MISC_PWR_MGMT 8229 * 8230 * Decode the bits according to the Nehalem documentation 8231 * bit[0] seems to continue to have same meaning going forward 8232 * bit[1] less so... 8233 */ 8234 void decode_misc_pwr_mgmt_msr(void) 8235 { 8236 unsigned long long msr; 8237 8238 if (no_msr) 8239 return; 8240 8241 if (!platform->has_msr_misc_pwr_mgmt) 8242 return; 8243 8244 if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr)) 8245 fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n", 8246 base_cpu, msr, 8247 msr & (1 << 0) ? "DIS" : "EN", msr & (1 << 1) ? "EN" : "DIS", msr & (1 << 8) ? "EN" : "DIS"); 8248 } 8249 8250 /* 8251 * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG 8252 * 8253 * This MSRs are present on Silvermont processors, 8254 * Intel Atom processor E3000 series (Baytrail), and friends. 8255 */ 8256 void decode_c6_demotion_policy_msr(void) 8257 { 8258 unsigned long long msr; 8259 8260 if (no_msr) 8261 return; 8262 8263 if (!platform->has_msr_c6_demotion_policy_config) 8264 return; 8265 8266 if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr)) 8267 fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n", 8268 base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS"); 8269 8270 if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr)) 8271 fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n", 8272 base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS"); 8273 } 8274 8275 void print_dev_latency(void) 8276 { 8277 char *path = "/dev/cpu_dma_latency"; 8278 int fd; 8279 int value; 8280 int retval; 8281 8282 fd = open(path, O_RDONLY); 8283 if (fd < 0) { 8284 if (debug) 8285 warnx("Read %s failed", path); 8286 return; 8287 } 8288 8289 retval = read(fd, (void *)&value, sizeof(int)); 8290 if (retval != sizeof(int)) { 8291 warn("read failed %s", path); 8292 close(fd); 8293 return; 8294 } 8295 fprintf(outf, "/dev/cpu_dma_latency: %d usec (%s)\n", value, value == 2000000000 ? "default" : "constrained"); 8296 8297 close(fd); 8298 } 8299 8300 static int has_instr_count_access(void) 8301 { 8302 int fd; 8303 int has_access; 8304 8305 if (no_perf) 8306 return 0; 8307 8308 fd = open_perf_counter(base_cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0); 8309 has_access = fd != -1; 8310 8311 if (fd != -1) 8312 close(fd); 8313 8314 if (!has_access) 8315 warnx("Failed to access %s. Some of the counters may not be available\n" 8316 "\tRun as root to enable them or use %s to disable the access explicitly", 8317 "instructions retired perf counter", "--no-perf"); 8318 8319 return has_access; 8320 } 8321 8322 int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai, 8323 double *scale_, enum rapl_unit *unit_) 8324 { 8325 int ret = -1; 8326 8327 if (no_perf) 8328 return -1; 8329 8330 if (!cai->perf_name) 8331 return -1; 8332 8333 const double scale = read_perf_scale(cai->perf_subsys, cai->perf_name); 8334 8335 if (scale == 0.0) 8336 goto end; 8337 8338 const enum rapl_unit unit = read_perf_rapl_unit(cai->perf_subsys, cai->perf_name); 8339 8340 if (unit == RAPL_UNIT_INVALID) 8341 goto end; 8342 8343 const unsigned int rapl_type = read_perf_type(cai->perf_subsys); 8344 const unsigned int rapl_energy_pkg_config = read_perf_config(cai->perf_subsys, cai->perf_name); 8345 8346 ret = open_perf_counter(cpu, rapl_type, rapl_energy_pkg_config, rci->fd_perf, PERF_FORMAT_GROUP); 8347 if (ret == -1) 8348 goto end; 8349 8350 /* If it's the first counter opened, make it a group descriptor */ 8351 if (rci->fd_perf == -1) 8352 rci->fd_perf = ret; 8353 8354 *scale_ = scale; 8355 *unit_ = unit; 8356 8357 end: 8358 if (debug >= 2) 8359 fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu); 8360 8361 return ret; 8362 } 8363 8364 /* 8365 * Linux-perf manages the HW instructions-retired counter 8366 * by enabling when requested, and hiding rollover 8367 */ 8368 void linux_perf_init(void) 8369 { 8370 if (access("/proc/sys/kernel/perf_event_paranoid", F_OK)) 8371 return; 8372 8373 if (BIC_IS_ENABLED(BIC_IPC) && has_aperf) { 8374 fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int)); 8375 if (fd_instr_count_percpu == NULL) 8376 err(-1, "calloc fd_instr_count_percpu"); 8377 } 8378 } 8379 8380 void rapl_perf_init(void) 8381 { 8382 const unsigned int num_domains = get_rapl_num_domains(); 8383 bool *domain_visited = calloc(num_domains, sizeof(bool)); 8384 8385 rapl_counter_info_perdomain = calloc(num_domains, sizeof(*rapl_counter_info_perdomain)); 8386 if (rapl_counter_info_perdomain == NULL) 8387 err(-1, "calloc rapl_counter_info_percpu"); 8388 rapl_counter_info_perdomain_size = num_domains; 8389 8390 /* 8391 * Initialize rapl_counter_info_percpu 8392 */ 8393 for (unsigned int domain_id = 0; domain_id < num_domains; ++domain_id) { 8394 struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[domain_id]; 8395 8396 rci->fd_perf = -1; 8397 for (size_t i = 0; i < NUM_RAPL_COUNTERS; ++i) { 8398 rci->data[i] = 0; 8399 rci->source[i] = COUNTER_SOURCE_NONE; 8400 } 8401 } 8402 8403 /* 8404 * Open/probe the counters 8405 * If can't get it via perf, fallback to MSR 8406 */ 8407 for (size_t i = 0; i < ARRAY_SIZE(rapl_counter_arch_infos); ++i) { 8408 8409 const struct rapl_counter_arch_info *const cai = &rapl_counter_arch_infos[i]; 8410 bool has_counter = 0; 8411 double scale; 8412 enum rapl_unit unit; 8413 unsigned int next_domain; 8414 8415 if (!BIC_IS_ENABLED(cai->bic_number)) 8416 continue; 8417 8418 memset(domain_visited, 0, num_domains * sizeof(*domain_visited)); 8419 8420 for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) { 8421 8422 if (cpu_is_not_allowed(cpu)) 8423 continue; 8424 8425 /* Skip already seen and handled RAPL domains */ 8426 next_domain = get_rapl_domain_id(cpu); 8427 8428 assert(next_domain < num_domains); 8429 8430 if (domain_visited[next_domain]) 8431 continue; 8432 8433 domain_visited[next_domain] = 1; 8434 8435 if ((cai->flags & RAPL_COUNTER_FLAG_PLATFORM_COUNTER) && (cpu != base_cpu)) 8436 continue; 8437 8438 struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[next_domain]; 8439 8440 /* 8441 * rapl_counter_arch_infos[] can have multiple entries describing the same 8442 * counter, due to the difference from different platforms/Vendors. 8443 * E.g. rapl_counter_arch_infos[0] and rapl_counter_arch_infos[1] share the 8444 * same perf_subsys and perf_name, but with different MSR address. 8445 * rapl_counter_arch_infos[0] is for Intel and rapl_counter_arch_infos[1] 8446 * is for AMD. 8447 * In this case, it is possible that multiple rapl_counter_arch_infos[] 8448 * entries are probed just because their perf/msr is duplicate and valid. 8449 * 8450 * Thus need a check to avoid re-probe the same counters. 8451 */ 8452 if (rci->source[cai->rci_index] != COUNTER_SOURCE_NONE) 8453 break; 8454 8455 /* Use perf API for this counter */ 8456 if (add_rapl_perf_counter(cpu, rci, cai, &scale, &unit) != -1) { 8457 rci->source[cai->rci_index] = COUNTER_SOURCE_PERF; 8458 rci->scale[cai->rci_index] = scale * cai->compat_scale; 8459 rci->unit[cai->rci_index] = unit; 8460 rci->flags[cai->rci_index] = cai->flags; 8461 8462 /* Use MSR for this counter */ 8463 } else if (add_rapl_msr_counter(cpu, cai) >= 0) { 8464 rci->source[cai->rci_index] = COUNTER_SOURCE_MSR; 8465 rci->msr[cai->rci_index] = cai->msr; 8466 rci->msr_mask[cai->rci_index] = cai->msr_mask; 8467 rci->msr_shift[cai->rci_index] = cai->msr_shift; 8468 rci->unit[cai->rci_index] = RAPL_UNIT_JOULES; 8469 rci->scale[cai->rci_index] = *cai->platform_rapl_msr_scale * cai->compat_scale; 8470 rci->flags[cai->rci_index] = cai->flags; 8471 } 8472 8473 if (rci->source[cai->rci_index] != COUNTER_SOURCE_NONE) 8474 has_counter = 1; 8475 } 8476 8477 /* If any CPU has access to the counter, make it present */ 8478 if (has_counter) 8479 BIC_PRESENT(cai->bic_number); 8480 } 8481 8482 free(domain_visited); 8483 } 8484 8485 /* Assumes msr_counter_info is populated */ 8486 static int has_amperf_access(void) 8487 { 8488 return msr_counter_arch_infos[MSR_ARCH_INFO_APERF_INDEX].present && 8489 msr_counter_arch_infos[MSR_ARCH_INFO_MPERF_INDEX].present; 8490 } 8491 8492 int *get_cstate_perf_group_fd(struct cstate_counter_info_t *cci, const char *group_name) 8493 { 8494 if (strcmp(group_name, "cstate_core") == 0) 8495 return &cci->fd_perf_core; 8496 8497 if (strcmp(group_name, "cstate_pkg") == 0) 8498 return &cci->fd_perf_pkg; 8499 8500 return NULL; 8501 } 8502 8503 int add_cstate_perf_counter(int cpu, struct cstate_counter_info_t *cci, const struct cstate_counter_arch_info *cai) 8504 { 8505 int ret = -1; 8506 8507 if (no_perf) 8508 return -1; 8509 8510 if (!cai->perf_name) 8511 return -1; 8512 8513 int *pfd_group = get_cstate_perf_group_fd(cci, cai->perf_subsys); 8514 8515 if (pfd_group == NULL) 8516 goto end; 8517 8518 const unsigned int type = read_perf_type(cai->perf_subsys); 8519 const unsigned int config = read_perf_config(cai->perf_subsys, cai->perf_name); 8520 8521 ret = open_perf_counter(cpu, type, config, *pfd_group, PERF_FORMAT_GROUP); 8522 8523 if (ret == -1) 8524 goto end; 8525 8526 /* If it's the first counter opened, make it a group descriptor */ 8527 if (*pfd_group == -1) 8528 *pfd_group = ret; 8529 8530 end: 8531 if (debug >= 2) 8532 fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu); 8533 8534 return ret; 8535 } 8536 8537 int add_msr_perf_counter(int cpu, struct msr_counter_info_t *cci, const struct msr_counter_arch_info *cai) 8538 { 8539 int ret = -1; 8540 8541 if (no_perf) 8542 return -1; 8543 8544 if (!cai->perf_name) 8545 return -1; 8546 8547 const unsigned int type = read_perf_type(cai->perf_subsys); 8548 const unsigned int config = read_perf_config(cai->perf_subsys, cai->perf_name); 8549 8550 ret = open_perf_counter(cpu, type, config, cci->fd_perf, PERF_FORMAT_GROUP); 8551 8552 if (ret == -1) 8553 goto end; 8554 8555 /* If it's the first counter opened, make it a group descriptor */ 8556 if (cci->fd_perf == -1) 8557 cci->fd_perf = ret; 8558 8559 end: 8560 if (debug) 8561 fprintf(stderr, "%s: %s/%s: %d (cpu: %d)\n", __func__, cai->perf_subsys, cai->perf_name, ret, cpu); 8562 8563 return ret; 8564 } 8565 8566 void msr_perf_init_(void) 8567 { 8568 const int mci_num = topo.max_cpu_num + 1; 8569 8570 msr_counter_info = calloc(mci_num, sizeof(*msr_counter_info)); 8571 if (!msr_counter_info) 8572 err(1, "calloc msr_counter_info"); 8573 msr_counter_info_size = mci_num; 8574 8575 for (int cpu = 0; cpu < mci_num; ++cpu) 8576 msr_counter_info[cpu].fd_perf = -1; 8577 8578 for (int cidx = 0; cidx < NUM_MSR_COUNTERS; ++cidx) { 8579 8580 struct msr_counter_arch_info *cai = &msr_counter_arch_infos[cidx]; 8581 8582 cai->present = false; 8583 8584 for (int cpu = 0; cpu < mci_num; ++cpu) { 8585 8586 struct msr_counter_info_t *const cci = &msr_counter_info[cpu]; 8587 8588 if (cpu_is_not_allowed(cpu)) 8589 continue; 8590 8591 if (cai->needed) { 8592 /* Use perf API for this counter */ 8593 if (add_msr_perf_counter(cpu, cci, cai) != -1) { 8594 cci->source[cai->rci_index] = COUNTER_SOURCE_PERF; 8595 cai->present = true; 8596 8597 /* User MSR for this counter */ 8598 } else if (add_msr_counter(cpu, cai->msr) >= 0) { 8599 cci->source[cai->rci_index] = COUNTER_SOURCE_MSR; 8600 cci->msr[cai->rci_index] = cai->msr; 8601 cci->msr_mask[cai->rci_index] = cai->msr_mask; 8602 cai->present = true; 8603 } 8604 } 8605 } 8606 } 8607 } 8608 8609 /* Initialize data for reading perf counters from the MSR group. */ 8610 void msr_perf_init(void) 8611 { 8612 bool need_amperf = false, need_smi = false; 8613 const bool need_soft_c1 = (!platform->has_msr_core_c1_res) && (platform->supported_cstates & CC1); 8614 8615 need_amperf = BIC_IS_ENABLED(BIC_Avg_MHz) || BIC_IS_ENABLED(BIC_Busy) || BIC_IS_ENABLED(BIC_Bzy_MHz) 8616 || BIC_IS_ENABLED(BIC_IPC) || need_soft_c1; 8617 8618 if (BIC_IS_ENABLED(BIC_SMI)) 8619 need_smi = true; 8620 8621 /* Enable needed counters */ 8622 msr_counter_arch_infos[MSR_ARCH_INFO_APERF_INDEX].needed = need_amperf; 8623 msr_counter_arch_infos[MSR_ARCH_INFO_MPERF_INDEX].needed = need_amperf; 8624 msr_counter_arch_infos[MSR_ARCH_INFO_SMI_INDEX].needed = need_smi; 8625 8626 msr_perf_init_(); 8627 8628 const bool has_amperf = has_amperf_access(); 8629 const bool has_smi = msr_counter_arch_infos[MSR_ARCH_INFO_SMI_INDEX].present; 8630 8631 has_aperf_access = has_amperf; 8632 8633 if (has_amperf) { 8634 BIC_PRESENT(BIC_Avg_MHz); 8635 BIC_PRESENT(BIC_Busy); 8636 BIC_PRESENT(BIC_Bzy_MHz); 8637 BIC_PRESENT(BIC_SMI); 8638 } 8639 8640 if (has_smi) 8641 BIC_PRESENT(BIC_SMI); 8642 } 8643 8644 void cstate_perf_init_(bool soft_c1) 8645 { 8646 bool has_counter; 8647 bool *cores_visited = NULL, *pkg_visited = NULL; 8648 const int cores_visited_elems = topo.max_core_id + 1; 8649 const int pkg_visited_elems = topo.max_package_id + 1; 8650 const int cci_num = topo.max_cpu_num + 1; 8651 8652 ccstate_counter_info = calloc(cci_num, sizeof(*ccstate_counter_info)); 8653 if (!ccstate_counter_info) 8654 err(1, "calloc ccstate_counter_arch_info"); 8655 ccstate_counter_info_size = cci_num; 8656 8657 cores_visited = calloc(cores_visited_elems, sizeof(*cores_visited)); 8658 if (!cores_visited) 8659 err(1, "calloc cores_visited"); 8660 8661 pkg_visited = calloc(pkg_visited_elems, sizeof(*pkg_visited)); 8662 if (!pkg_visited) 8663 err(1, "calloc pkg_visited"); 8664 8665 /* Initialize cstate_counter_info_percpu */ 8666 for (int cpu = 0; cpu < cci_num; ++cpu) { 8667 ccstate_counter_info[cpu].fd_perf_core = -1; 8668 ccstate_counter_info[cpu].fd_perf_pkg = -1; 8669 } 8670 8671 for (int cidx = 0; cidx < NUM_CSTATE_COUNTERS; ++cidx) { 8672 has_counter = false; 8673 memset(cores_visited, 0, cores_visited_elems * sizeof(*cores_visited)); 8674 memset(pkg_visited, 0, pkg_visited_elems * sizeof(*pkg_visited)); 8675 8676 const struct cstate_counter_arch_info *cai = &ccstate_counter_arch_infos[cidx]; 8677 8678 for (int cpu = 0; cpu < cci_num; ++cpu) { 8679 8680 struct cstate_counter_info_t *const cci = &ccstate_counter_info[cpu]; 8681 8682 if (cpu_is_not_allowed(cpu)) 8683 continue; 8684 8685 const int core_id = cpus[cpu].physical_core_id; 8686 const int pkg_id = cpus[cpu].physical_package_id; 8687 8688 assert(core_id < cores_visited_elems); 8689 assert(pkg_id < pkg_visited_elems); 8690 8691 const bool per_thread = cai->flags & CSTATE_COUNTER_FLAG_COLLECT_PER_THREAD; 8692 const bool per_core = cai->flags & CSTATE_COUNTER_FLAG_COLLECT_PER_CORE; 8693 8694 if (!per_thread && cores_visited[core_id]) 8695 continue; 8696 8697 if (!per_core && pkg_visited[pkg_id]) 8698 continue; 8699 8700 const bool counter_needed = BIC_IS_ENABLED(cai->bic_number) || 8701 (soft_c1 && (cai->flags & CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY)); 8702 const bool counter_supported = (platform->supported_cstates & cai->feature_mask); 8703 8704 if (counter_needed && counter_supported) { 8705 /* Use perf API for this counter */ 8706 if (add_cstate_perf_counter(cpu, cci, cai) != -1) { 8707 8708 cci->source[cai->rci_index] = COUNTER_SOURCE_PERF; 8709 8710 /* User MSR for this counter */ 8711 } else if (pkg_cstate_limit >= cai->pkg_cstate_limit 8712 && add_msr_counter(cpu, cai->msr) >= 0) { 8713 cci->source[cai->rci_index] = COUNTER_SOURCE_MSR; 8714 cci->msr[cai->rci_index] = cai->msr; 8715 } 8716 } 8717 8718 if (cci->source[cai->rci_index] != COUNTER_SOURCE_NONE) { 8719 has_counter = true; 8720 cores_visited[core_id] = true; 8721 pkg_visited[pkg_id] = true; 8722 } 8723 } 8724 8725 /* If any CPU has access to the counter, make it present */ 8726 if (has_counter) 8727 BIC_PRESENT(cai->bic_number); 8728 } 8729 8730 free(cores_visited); 8731 free(pkg_visited); 8732 } 8733 8734 void cstate_perf_init(void) 8735 { 8736 /* 8737 * If we don't have a C1 residency MSR, we calculate it "in software", 8738 * but we need APERF, MPERF too. 8739 */ 8740 const bool soft_c1 = !platform->has_msr_core_c1_res && has_amperf_access() 8741 && platform->supported_cstates & CC1; 8742 8743 if (soft_c1) 8744 BIC_PRESENT(BIC_CPU_c1); 8745 8746 cstate_perf_init_(soft_c1); 8747 } 8748 8749 void probe_cstates(void) 8750 { 8751 probe_cst_limit(); 8752 8753 if (platform->has_msr_module_c6_res_ms) 8754 BIC_PRESENT(BIC_Mod_c6); 8755 8756 if (platform->has_ext_cst_msrs && !no_msr) { 8757 BIC_PRESENT(BIC_Totl_c0); 8758 BIC_PRESENT(BIC_Any_c0); 8759 BIC_PRESENT(BIC_GFX_c0); 8760 BIC_PRESENT(BIC_CPUGFX); 8761 } 8762 8763 if (quiet) 8764 return; 8765 8766 dump_power_ctl(); 8767 dump_cst_cfg(); 8768 decode_c6_demotion_policy_msr(); 8769 print_dev_latency(); 8770 dump_sysfs_cstate_config(); 8771 print_irtl(); 8772 } 8773 8774 void probe_lpi(void) 8775 { 8776 if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK)) 8777 BIC_PRESENT(BIC_CPU_LPI); 8778 else 8779 BIC_NOT_PRESENT(BIC_CPU_LPI); 8780 8781 if (!access(sys_lpi_file_sysfs, R_OK)) { 8782 sys_lpi_file = sys_lpi_file_sysfs; 8783 BIC_PRESENT(BIC_SYS_LPI); 8784 } else if (!access(sys_lpi_file_debugfs, R_OK)) { 8785 sys_lpi_file = sys_lpi_file_debugfs; 8786 BIC_PRESENT(BIC_SYS_LPI); 8787 } else { 8788 sys_lpi_file_sysfs = NULL; 8789 BIC_NOT_PRESENT(BIC_SYS_LPI); 8790 } 8791 8792 } 8793 8794 void probe_pstates(void) 8795 { 8796 probe_bclk(); 8797 8798 if (quiet) 8799 return; 8800 8801 dump_platform_info(); 8802 dump_turbo_ratio_info(); 8803 dump_sysfs_pstate_config(); 8804 decode_misc_pwr_mgmt_msr(); 8805 8806 for_all_cpus(print_hwp, ODD_COUNTERS); 8807 for_all_cpus(print_epb, ODD_COUNTERS); 8808 for_all_cpus(print_perf_limit, ODD_COUNTERS); 8809 } 8810 8811 void process_cpuid() 8812 { 8813 unsigned int eax, ebx, ecx, edx; 8814 unsigned int fms, family, model, stepping, ecx_flags, edx_flags; 8815 unsigned long long ucode_patch = 0; 8816 bool ucode_patch_valid = false; 8817 8818 eax = ebx = ecx = edx = 0; 8819 8820 __cpuid(0, max_level, ebx, ecx, edx); 8821 8822 if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) 8823 genuine_intel = 1; 8824 else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) 8825 authentic_amd = 1; 8826 else if (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e) 8827 hygon_genuine = 1; 8828 8829 if (!quiet) 8830 fprintf(outf, "CPUID(0): %.4s%.4s%.4s 0x%x CPUID levels\n", 8831 (char *)&ebx, (char *)&edx, (char *)&ecx, max_level); 8832 8833 __cpuid(1, fms, ebx, ecx, edx); 8834 family = (fms >> 8) & 0xf; 8835 model = (fms >> 4) & 0xf; 8836 stepping = fms & 0xf; 8837 if (family == 0xf) 8838 family += (fms >> 20) & 0xff; 8839 if (family >= 6) 8840 model += ((fms >> 16) & 0xf) << 4; 8841 ecx_flags = ecx; 8842 edx_flags = edx; 8843 8844 if (!no_msr) { 8845 if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch)) 8846 warnx("get_msr(UCODE)"); 8847 else 8848 ucode_patch_valid = true; 8849 } 8850 8851 /* 8852 * check max extended function levels of CPUID. 8853 * This is needed to check for invariant TSC. 8854 * This check is valid for both Intel and AMD. 8855 */ 8856 ebx = ecx = edx = 0; 8857 __cpuid(0x80000000, max_extended_level, ebx, ecx, edx); 8858 8859 if (!quiet) { 8860 fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d)", 8861 family, model, stepping, family, model, stepping); 8862 if (ucode_patch_valid) 8863 fprintf(outf, " microcode 0x%x", (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF)); 8864 fputc('\n', outf); 8865 8866 fprintf(outf, "CPUID(0x80000000): max_extended_levels: 0x%x\n", max_extended_level); 8867 fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n", 8868 ecx_flags & (1 << 0) ? "SSE3" : "-", 8869 ecx_flags & (1 << 3) ? "MONITOR" : "-", 8870 ecx_flags & (1 << 6) ? "SMX" : "-", 8871 ecx_flags & (1 << 7) ? "EIST" : "-", 8872 ecx_flags & (1 << 8) ? "TM2" : "-", 8873 edx_flags & (1 << 4) ? "TSC" : "-", 8874 edx_flags & (1 << 5) ? "MSR" : "-", 8875 edx_flags & (1 << 22) ? "ACPI-TM" : "-", 8876 edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-"); 8877 } 8878 8879 probe_platform_features(family, model); 8880 8881 if (!(edx_flags & (1 << 5))) 8882 errx(1, "CPUID: no MSR"); 8883 8884 if (max_extended_level >= 0x80000007) { 8885 8886 /* 8887 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 8888 * this check is valid for both Intel and AMD 8889 */ 8890 __cpuid(0x80000007, eax, ebx, ecx, edx); 8891 has_invariant_tsc = edx & (1 << 8); 8892 } 8893 8894 /* 8895 * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0 8896 * this check is valid for both Intel and AMD 8897 */ 8898 8899 __cpuid(0x6, eax, ebx, ecx, edx); 8900 has_aperf = ecx & (1 << 0); 8901 do_dts = eax & (1 << 0); 8902 if (do_dts) 8903 BIC_PRESENT(BIC_CoreTmp); 8904 has_turbo = eax & (1 << 1); 8905 do_ptm = eax & (1 << 6); 8906 if (do_ptm) 8907 BIC_PRESENT(BIC_PkgTmp); 8908 has_hwp = eax & (1 << 7); 8909 has_hwp_notify = eax & (1 << 8); 8910 has_hwp_activity_window = eax & (1 << 9); 8911 has_hwp_epp = eax & (1 << 10); 8912 has_hwp_pkg = eax & (1 << 11); 8913 has_epb = ecx & (1 << 3); 8914 8915 if (!quiet) 8916 fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, " 8917 "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n", 8918 has_aperf ? "" : "No-", 8919 has_turbo ? "" : "No-", 8920 do_dts ? "" : "No-", 8921 do_ptm ? "" : "No-", 8922 has_hwp ? "" : "No-", 8923 has_hwp_notify ? "" : "No-", 8924 has_hwp_activity_window ? "" : "No-", 8925 has_hwp_epp ? "" : "No-", has_hwp_pkg ? "" : "No-", has_epb ? "" : "No-"); 8926 8927 if (!quiet) 8928 decode_misc_enable_msr(); 8929 8930 if (max_level >= 0x7 && !quiet) { 8931 int has_sgx; 8932 8933 ecx = 0; 8934 8935 __cpuid_count(0x7, 0, eax, ebx, ecx, edx); 8936 8937 has_sgx = ebx & (1 << 2); 8938 8939 is_hybrid = edx & (1 << 15); 8940 8941 fprintf(outf, "CPUID(7): %sSGX %sHybrid\n", has_sgx ? "" : "No-", is_hybrid ? "" : "No-"); 8942 8943 if (has_sgx) 8944 decode_feature_control_msr(); 8945 } 8946 8947 if (max_level >= 0x15) { 8948 unsigned int eax_crystal; 8949 unsigned int ebx_tsc; 8950 8951 /* 8952 * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz 8953 */ 8954 eax_crystal = ebx_tsc = crystal_hz = edx = 0; 8955 __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx); 8956 8957 if (ebx_tsc != 0) { 8958 if (!quiet && (ebx != 0)) 8959 fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", 8960 eax_crystal, ebx_tsc, crystal_hz); 8961 8962 if (crystal_hz == 0) 8963 crystal_hz = platform->crystal_freq; 8964 8965 if (crystal_hz) { 8966 tsc_hz = (unsigned long long)crystal_hz *ebx_tsc / eax_crystal; 8967 if (!quiet) 8968 fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n", 8969 tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal); 8970 } 8971 } 8972 } 8973 if (max_level >= 0x16) { 8974 unsigned int base_mhz, max_mhz, bus_mhz, edx; 8975 8976 /* 8977 * CPUID 16H Base MHz, Max MHz, Bus MHz 8978 */ 8979 base_mhz = max_mhz = bus_mhz = edx = 0; 8980 8981 __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx); 8982 8983 bclk = bus_mhz; 8984 8985 base_hz = base_mhz * 1000000; 8986 has_base_hz = 1; 8987 8988 if (platform->enable_tsc_tweak) 8989 tsc_tweak = base_hz / tsc_hz; 8990 8991 if (!quiet) 8992 fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n", 8993 base_mhz, max_mhz, bus_mhz); 8994 } 8995 8996 if (has_aperf) 8997 aperf_mperf_multiplier = platform->need_perf_multiplier ? 1024 : 1; 8998 8999 BIC_PRESENT(BIC_IRQ); 9000 BIC_PRESENT(BIC_NMI); 9001 BIC_PRESENT(BIC_TSC_MHz); 9002 } 9003 9004 static void counter_info_init(void) 9005 { 9006 for (int i = 0; i < NUM_CSTATE_COUNTERS; ++i) { 9007 struct cstate_counter_arch_info *const cai = &ccstate_counter_arch_infos[i]; 9008 9009 if (platform->has_msr_knl_core_c6_residency && cai->msr == MSR_CORE_C6_RESIDENCY) 9010 cai->msr = MSR_KNL_CORE_C6_RESIDENCY; 9011 9012 if (!platform->has_msr_core_c1_res && cai->msr == MSR_CORE_C1_RES) 9013 cai->msr = 0; 9014 9015 if (platform->has_msr_atom_pkg_c6_residency && cai->msr == MSR_PKG_C6_RESIDENCY) 9016 cai->msr = MSR_ATOM_PKG_C6_RESIDENCY; 9017 } 9018 9019 for (int i = 0; i < NUM_MSR_COUNTERS; ++i) { 9020 msr_counter_arch_infos[i].present = false; 9021 msr_counter_arch_infos[i].needed = false; 9022 } 9023 } 9024 9025 void probe_pm_features(void) 9026 { 9027 probe_pstates(); 9028 9029 probe_cstates(); 9030 9031 probe_lpi(); 9032 9033 probe_intel_uncore_frequency(); 9034 9035 probe_graphics(); 9036 9037 probe_rapl(); 9038 9039 probe_thermal(); 9040 9041 if (platform->has_nhm_msrs && !no_msr) 9042 BIC_PRESENT(BIC_SMI); 9043 9044 if (!quiet) 9045 decode_misc_feature_control(); 9046 } 9047 9048 /* 9049 * in /dev/cpu/ return success for names that are numbers 9050 * ie. filter out ".", "..", "microcode". 9051 */ 9052 int dir_filter(const struct dirent *dirp) 9053 { 9054 if (isdigit(dirp->d_name[0])) 9055 return 1; 9056 else 9057 return 0; 9058 } 9059 9060 char *possible_file = "/sys/devices/system/cpu/possible"; 9061 char possible_buf[1024]; 9062 9063 int initialize_cpu_possible_set(void) 9064 { 9065 FILE *fp; 9066 9067 fp = fopen(possible_file, "r"); 9068 if (!fp) { 9069 warn("open %s", possible_file); 9070 return -1; 9071 } 9072 if (fread(possible_buf, sizeof(char), 1024, fp) == 0) { 9073 warn("read %s", possible_file); 9074 goto err; 9075 } 9076 if (parse_cpu_str(possible_buf, cpu_possible_set, cpu_possible_setsize)) { 9077 warnx("%s: cpu str malformat %s\n", possible_file, cpu_effective_str); 9078 goto err; 9079 } 9080 return 0; 9081 9082 err: 9083 fclose(fp); 9084 return -1; 9085 } 9086 9087 void topology_probe(bool startup) 9088 { 9089 int i; 9090 int max_core_id = 0; 9091 int max_package_id = 0; 9092 int max_siblings = 0; 9093 9094 /* Initialize num_cpus, max_cpu_num */ 9095 set_max_cpu_num(); 9096 topo.num_cpus = 0; 9097 for_all_proc_cpus(count_cpus); 9098 if (!summary_only) 9099 BIC_PRESENT(BIC_CPU); 9100 9101 if (debug > 1) 9102 fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); 9103 9104 cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology)); 9105 if (cpus == NULL) 9106 err(1, "calloc cpus"); 9107 9108 /* 9109 * Allocate and initialize cpu_present_set 9110 */ 9111 cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1)); 9112 if (cpu_present_set == NULL) 9113 err(3, "CPU_ALLOC"); 9114 cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 9115 CPU_ZERO_S(cpu_present_setsize, cpu_present_set); 9116 for_all_proc_cpus(mark_cpu_present); 9117 9118 /* 9119 * Allocate and initialize cpu_possible_set 9120 */ 9121 cpu_possible_set = CPU_ALLOC((topo.max_cpu_num + 1)); 9122 if (cpu_possible_set == NULL) 9123 err(3, "CPU_ALLOC"); 9124 cpu_possible_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 9125 CPU_ZERO_S(cpu_possible_setsize, cpu_possible_set); 9126 initialize_cpu_possible_set(); 9127 9128 /* 9129 * Allocate and initialize cpu_effective_set 9130 */ 9131 cpu_effective_set = CPU_ALLOC((topo.max_cpu_num + 1)); 9132 if (cpu_effective_set == NULL) 9133 err(3, "CPU_ALLOC"); 9134 cpu_effective_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 9135 CPU_ZERO_S(cpu_effective_setsize, cpu_effective_set); 9136 update_effective_set(startup); 9137 9138 /* 9139 * Allocate and initialize cpu_allowed_set 9140 */ 9141 cpu_allowed_set = CPU_ALLOC((topo.max_cpu_num + 1)); 9142 if (cpu_allowed_set == NULL) 9143 err(3, "CPU_ALLOC"); 9144 cpu_allowed_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 9145 CPU_ZERO_S(cpu_allowed_setsize, cpu_allowed_set); 9146 9147 /* 9148 * Validate and update cpu_allowed_set. 9149 * 9150 * Make sure all cpus in cpu_subset are also in cpu_present_set during startup. 9151 * Give a warning when cpus in cpu_subset become unavailable at runtime. 9152 * Give a warning when cpus are not effective because of cgroup setting. 9153 * 9154 * cpu_allowed_set is the intersection of cpu_present_set/cpu_effective_set/cpu_subset. 9155 */ 9156 for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) { 9157 if (cpu_subset && !CPU_ISSET_S(i, cpu_subset_size, cpu_subset)) 9158 continue; 9159 9160 if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set)) { 9161 if (cpu_subset) { 9162 /* cpus in cpu_subset must be in cpu_present_set during startup */ 9163 if (startup) 9164 err(1, "cpu%d not present", i); 9165 else 9166 fprintf(stderr, "cpu%d not present\n", i); 9167 } 9168 continue; 9169 } 9170 9171 if (CPU_COUNT_S(cpu_effective_setsize, cpu_effective_set)) { 9172 if (!CPU_ISSET_S(i, cpu_effective_setsize, cpu_effective_set)) { 9173 fprintf(stderr, "cpu%d not effective\n", i); 9174 continue; 9175 } 9176 } 9177 9178 CPU_SET_S(i, cpu_allowed_setsize, cpu_allowed_set); 9179 } 9180 9181 if (!CPU_COUNT_S(cpu_allowed_setsize, cpu_allowed_set)) 9182 err(-ENODEV, "No valid cpus found"); 9183 sched_setaffinity(0, cpu_allowed_setsize, cpu_allowed_set); 9184 9185 /* 9186 * Allocate and initialize cpu_affinity_set 9187 */ 9188 cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1)); 9189 if (cpu_affinity_set == NULL) 9190 err(3, "CPU_ALLOC"); 9191 cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 9192 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); 9193 9194 for_all_proc_cpus(init_thread_id); 9195 9196 for_all_proc_cpus(set_cpu_hybrid_type); 9197 9198 /* 9199 * For online cpus 9200 * find max_core_id, max_package_id 9201 */ 9202 for (i = 0; i <= topo.max_cpu_num; ++i) { 9203 int siblings; 9204 9205 if (cpu_is_not_present(i)) { 9206 if (debug > 1) 9207 fprintf(outf, "cpu%d NOT PRESENT\n", i); 9208 continue; 9209 } 9210 9211 cpus[i].logical_cpu_id = i; 9212 9213 /* get package information */ 9214 cpus[i].physical_package_id = get_physical_package_id(i); 9215 if (cpus[i].physical_package_id > max_package_id) 9216 max_package_id = cpus[i].physical_package_id; 9217 9218 /* get die information */ 9219 cpus[i].die_id = get_die_id(i); 9220 if (cpus[i].die_id > topo.max_die_id) 9221 topo.max_die_id = cpus[i].die_id; 9222 9223 /* get l3 information */ 9224 cpus[i].l3_id = get_l3_id(i); 9225 if (cpus[i].l3_id > topo.max_l3_id) 9226 topo.max_l3_id = cpus[i].l3_id; 9227 9228 /* get numa node information */ 9229 cpus[i].physical_node_id = get_physical_node_id(&cpus[i]); 9230 if (cpus[i].physical_node_id > topo.max_node_num) 9231 topo.max_node_num = cpus[i].physical_node_id; 9232 9233 /* get core information */ 9234 cpus[i].physical_core_id = get_core_id(i); 9235 if (cpus[i].physical_core_id > max_core_id) 9236 max_core_id = cpus[i].physical_core_id; 9237 9238 /* get thread information */ 9239 siblings = get_thread_siblings(&cpus[i]); 9240 if (siblings > max_siblings) 9241 max_siblings = siblings; 9242 if (cpus[i].thread_id == 0) 9243 topo.num_cores++; 9244 } 9245 topo.max_core_id = max_core_id; 9246 topo.max_package_id = max_package_id; 9247 9248 topo.cores_per_node = max_core_id + 1; 9249 if (debug > 1) 9250 fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", max_core_id, topo.cores_per_node); 9251 if (!summary_only) 9252 BIC_PRESENT(BIC_Core); 9253 9254 topo.num_die = topo.max_die_id + 1; 9255 if (debug > 1) 9256 fprintf(outf, "max_die_id %d, sizing for %d die\n", topo.max_die_id, topo.num_die); 9257 if (!summary_only && topo.num_die > 1) 9258 BIC_PRESENT(BIC_Die); 9259 9260 if (!summary_only && topo.max_l3_id > 0) 9261 BIC_PRESENT(BIC_L3); 9262 9263 topo.num_packages = max_package_id + 1; 9264 if (debug > 1) 9265 fprintf(outf, "max_package_id %d, sizing for %d packages\n", max_package_id, topo.num_packages); 9266 if (!summary_only && topo.num_packages > 1) 9267 BIC_PRESENT(BIC_Package); 9268 9269 set_node_data(); 9270 if (debug > 1) 9271 fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg); 9272 if (!summary_only && topo.nodes_per_pkg > 1) 9273 BIC_PRESENT(BIC_Node); 9274 9275 topo.threads_per_core = max_siblings; 9276 if (debug > 1) 9277 fprintf(outf, "max_siblings %d\n", max_siblings); 9278 9279 if (debug < 1) 9280 return; 9281 9282 for (i = 0; i <= topo.max_cpu_num; ++i) { 9283 if (cpu_is_not_present(i)) 9284 continue; 9285 fprintf(outf, 9286 "cpu %d pkg %d die %d l3 %d node %d lnode %d core %d thread %d\n", 9287 i, cpus[i].physical_package_id, cpus[i].die_id, cpus[i].l3_id, 9288 cpus[i].physical_node_id, cpus[i].logical_node_id, cpus[i].physical_core_id, cpus[i].thread_id); 9289 } 9290 9291 } 9292 9293 void allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p) 9294 { 9295 int i; 9296 int num_cores = topo.cores_per_node * topo.nodes_per_pkg * topo.num_packages; 9297 int num_threads = topo.threads_per_core * num_cores; 9298 9299 *t = calloc(num_threads, sizeof(struct thread_data)); 9300 if (*t == NULL) 9301 goto error; 9302 9303 for (i = 0; i < num_threads; i++) 9304 (*t)[i].cpu_id = -1; 9305 9306 *c = calloc(num_cores, sizeof(struct core_data)); 9307 if (*c == NULL) 9308 goto error; 9309 9310 for (i = 0; i < num_cores; i++) { 9311 (*c)[i].core_id = -1; 9312 (*c)[i].base_cpu = -1; 9313 } 9314 9315 *p = calloc(topo.num_packages, sizeof(struct pkg_data)); 9316 if (*p == NULL) 9317 goto error; 9318 9319 for (i = 0; i < topo.num_packages; i++) { 9320 (*p)[i].package_id = i; 9321 (*p)[i].base_cpu = -1; 9322 } 9323 9324 return; 9325 error: 9326 err(1, "calloc counters"); 9327 } 9328 9329 /* 9330 * init_counter() 9331 * 9332 * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE 9333 */ 9334 void init_counter(struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base, int cpu_id) 9335 { 9336 int pkg_id = cpus[cpu_id].physical_package_id; 9337 int node_id = cpus[cpu_id].logical_node_id; 9338 int core_id = cpus[cpu_id].physical_core_id; 9339 int thread_id = cpus[cpu_id].thread_id; 9340 struct thread_data *t; 9341 struct core_data *c; 9342 9343 /* Workaround for systems where physical_node_id==-1 9344 * and logical_node_id==(-1 - topo.num_cpus) 9345 */ 9346 if (node_id < 0) 9347 node_id = 0; 9348 9349 t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id); 9350 c = GET_CORE(core_base, core_id, node_id, pkg_id); 9351 9352 t->cpu_id = cpu_id; 9353 if (!cpu_is_not_allowed(cpu_id)) { 9354 if (c->base_cpu < 0) 9355 c->base_cpu = t->cpu_id; 9356 if (pkg_base[pkg_id].base_cpu < 0) 9357 pkg_base[pkg_id].base_cpu = t->cpu_id; 9358 } 9359 9360 c->core_id = core_id; 9361 pkg_base[pkg_id].package_id = pkg_id; 9362 } 9363 9364 int initialize_counters(int cpu_id) 9365 { 9366 init_counter(EVEN_COUNTERS, cpu_id); 9367 init_counter(ODD_COUNTERS, cpu_id); 9368 return 0; 9369 } 9370 9371 void allocate_output_buffer() 9372 { 9373 output_buffer = calloc(1, (1 + topo.num_cpus) * 2048); 9374 outp = output_buffer; 9375 if (outp == NULL) 9376 err(-1, "calloc output buffer"); 9377 } 9378 9379 void allocate_fd_percpu(void) 9380 { 9381 fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int)); 9382 if (fd_percpu == NULL) 9383 err(-1, "calloc fd_percpu"); 9384 } 9385 9386 void allocate_irq_buffers(void) 9387 { 9388 irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int)); 9389 if (irq_column_2_cpu == NULL) 9390 err(-1, "calloc %d", topo.num_cpus); 9391 9392 irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int)); 9393 if (irqs_per_cpu == NULL) 9394 err(-1, "calloc %d IRQ", topo.max_cpu_num + 1); 9395 9396 nmi_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int)); 9397 if (nmi_per_cpu == NULL) 9398 err(-1, "calloc %d NMI", topo.max_cpu_num + 1); 9399 } 9400 9401 int update_topo(PER_THREAD_PARAMS) 9402 { 9403 topo.allowed_cpus++; 9404 if ((int)t->cpu_id == c->base_cpu) 9405 topo.allowed_cores++; 9406 if ((int)t->cpu_id == p->base_cpu) 9407 topo.allowed_packages++; 9408 9409 return 0; 9410 } 9411 9412 void topology_update(void) 9413 { 9414 topo.allowed_cpus = 0; 9415 topo.allowed_cores = 0; 9416 topo.allowed_packages = 0; 9417 for_all_cpus(update_topo, ODD_COUNTERS); 9418 } 9419 9420 void setup_all_buffers(bool startup) 9421 { 9422 topology_probe(startup); 9423 allocate_irq_buffers(); 9424 allocate_fd_percpu(); 9425 allocate_counters(&thread_even, &core_even, &package_even); 9426 allocate_counters(&thread_odd, &core_odd, &package_odd); 9427 allocate_output_buffer(); 9428 for_all_proc_cpus(initialize_counters); 9429 topology_update(); 9430 } 9431 9432 void set_base_cpu(void) 9433 { 9434 int i; 9435 9436 for (i = 0; i < topo.max_cpu_num + 1; ++i) { 9437 if (cpu_is_not_allowed(i)) 9438 continue; 9439 base_cpu = i; 9440 if (debug > 1) 9441 fprintf(outf, "base_cpu = %d\n", base_cpu); 9442 return; 9443 } 9444 err(-ENODEV, "No valid cpus found"); 9445 } 9446 9447 bool has_added_counters(void) 9448 { 9449 /* 9450 * It only makes sense to call this after the command line is parsed, 9451 * otherwise sys structure is not populated. 9452 */ 9453 9454 return sys.added_core_counters | sys.added_thread_counters | sys.added_package_counters; 9455 } 9456 9457 void check_msr_access(void) 9458 { 9459 check_dev_msr(); 9460 check_msr_permission(); 9461 9462 if (no_msr) 9463 bic_disable_msr_access(); 9464 } 9465 9466 void check_perf_access(void) 9467 { 9468 if (no_perf || !BIC_IS_ENABLED(BIC_IPC) || !has_instr_count_access()) 9469 CLR_BIC(BIC_IPC, &bic_enabled); 9470 } 9471 9472 bool perf_has_hybrid_devices(void) 9473 { 9474 /* 9475 * 0: unknown 9476 * 1: has separate perf device for p and e core 9477 * -1: doesn't have separate perf device for p and e core 9478 */ 9479 static int cached; 9480 9481 if (cached > 0) 9482 return true; 9483 9484 if (cached < 0) 9485 return false; 9486 9487 if (access("/sys/bus/event_source/devices/cpu_core", F_OK)) { 9488 cached = -1; 9489 return false; 9490 } 9491 9492 if (access("/sys/bus/event_source/devices/cpu_atom", F_OK)) { 9493 cached = -1; 9494 return false; 9495 } 9496 9497 cached = 1; 9498 return true; 9499 } 9500 9501 int added_perf_counters_init_(struct perf_counter_info *pinfo) 9502 { 9503 size_t num_domains = 0; 9504 unsigned int next_domain; 9505 bool *domain_visited; 9506 unsigned int perf_type, perf_config; 9507 double perf_scale; 9508 int fd_perf; 9509 9510 if (!pinfo) 9511 return 0; 9512 9513 const size_t max_num_domains = MAX(topo.max_cpu_num + 1, MAX(topo.max_core_id + 1, topo.max_package_id + 1)); 9514 9515 domain_visited = calloc(max_num_domains, sizeof(*domain_visited)); 9516 9517 while (pinfo) { 9518 switch (pinfo->scope) { 9519 case SCOPE_CPU: 9520 num_domains = topo.max_cpu_num + 1; 9521 break; 9522 9523 case SCOPE_CORE: 9524 num_domains = topo.max_core_id + 1; 9525 break; 9526 9527 case SCOPE_PACKAGE: 9528 num_domains = topo.max_package_id + 1; 9529 break; 9530 } 9531 9532 /* Allocate buffer for file descriptor for each domain. */ 9533 pinfo->fd_perf_per_domain = calloc(num_domains, sizeof(*pinfo->fd_perf_per_domain)); 9534 if (!pinfo->fd_perf_per_domain) 9535 errx(1, "%s: alloc %s", __func__, "fd_perf_per_domain"); 9536 9537 for (size_t i = 0; i < num_domains; ++i) 9538 pinfo->fd_perf_per_domain[i] = -1; 9539 9540 pinfo->num_domains = num_domains; 9541 pinfo->scale = 1.0; 9542 9543 memset(domain_visited, 0, max_num_domains * sizeof(*domain_visited)); 9544 9545 for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) { 9546 9547 next_domain = cpu_to_domain(pinfo, cpu); 9548 9549 assert(next_domain < num_domains); 9550 9551 if (cpu_is_not_allowed(cpu)) 9552 continue; 9553 9554 if (domain_visited[next_domain]) 9555 continue; 9556 9557 /* 9558 * Intel hybrid platforms expose different perf devices for P and E cores. 9559 * Instead of one, "/sys/bus/event_source/devices/cpu" device, there are 9560 * "/sys/bus/event_source/devices/{cpu_core,cpu_atom}". 9561 * 9562 * This makes it more complicated to the user, because most of the counters 9563 * are available on both and have to be handled manually, otherwise. 9564 * 9565 * Code below, allow user to use the old "cpu" name, which is translated accordingly. 9566 */ 9567 const char *perf_device = pinfo->device; 9568 9569 if (strcmp(perf_device, "cpu") == 0 && perf_has_hybrid_devices()) { 9570 switch (cpus[cpu].type) { 9571 case INTEL_PCORE_TYPE: 9572 perf_device = "cpu_core"; 9573 break; 9574 9575 case INTEL_ECORE_TYPE: 9576 perf_device = "cpu_atom"; 9577 break; 9578 9579 default: /* Don't change, we will probably fail and report a problem soon. */ 9580 break; 9581 } 9582 } 9583 9584 perf_type = read_perf_type(perf_device); 9585 if (perf_type == (unsigned int)-1) { 9586 warnx("%s: perf/%s/%s: failed to read %s", __func__, perf_device, pinfo->event, "type"); 9587 continue; 9588 } 9589 9590 perf_config = read_perf_config(perf_device, pinfo->event); 9591 if (perf_config == (unsigned int)-1) { 9592 warnx("%s: perf/%s/%s: failed to read %s", 9593 __func__, perf_device, pinfo->event, "config"); 9594 continue; 9595 } 9596 9597 /* Scale is not required, some counters just don't have it. */ 9598 perf_scale = read_perf_scale(perf_device, pinfo->event); 9599 if (perf_scale == 0.0) 9600 perf_scale = 1.0; 9601 9602 fd_perf = open_perf_counter(cpu, perf_type, perf_config, -1, 0); 9603 if (fd_perf == -1) { 9604 warnx("%s: perf/%s/%s: failed to open counter on cpu%d", 9605 __func__, perf_device, pinfo->event, cpu); 9606 continue; 9607 } 9608 9609 domain_visited[next_domain] = 1; 9610 pinfo->fd_perf_per_domain[next_domain] = fd_perf; 9611 pinfo->scale = perf_scale; 9612 9613 if (debug) 9614 fprintf(stderr, "Add perf/%s/%s cpu%d: %d\n", 9615 perf_device, pinfo->event, cpu, pinfo->fd_perf_per_domain[next_domain]); 9616 } 9617 9618 pinfo = pinfo->next; 9619 } 9620 9621 free(domain_visited); 9622 9623 return 0; 9624 } 9625 9626 void added_perf_counters_init(void) 9627 { 9628 if (added_perf_counters_init_(sys.perf_tp)) 9629 errx(1, "%s: %s", __func__, "thread"); 9630 9631 if (added_perf_counters_init_(sys.perf_cp)) 9632 errx(1, "%s: %s", __func__, "core"); 9633 9634 if (added_perf_counters_init_(sys.perf_pp)) 9635 errx(1, "%s: %s", __func__, "package"); 9636 } 9637 9638 int parse_telem_info_file(int fd_dir, const char *info_filename, const char *format, unsigned long *output) 9639 { 9640 int fd_telem_info; 9641 FILE *file_telem_info; 9642 unsigned long value; 9643 9644 fd_telem_info = openat(fd_dir, info_filename, O_RDONLY); 9645 if (fd_telem_info == -1) 9646 return -1; 9647 9648 file_telem_info = fdopen(fd_telem_info, "r"); 9649 if (file_telem_info == NULL) { 9650 close(fd_telem_info); 9651 return -1; 9652 } 9653 9654 if (fscanf(file_telem_info, format, &value) != 1) { 9655 fclose(file_telem_info); 9656 return -1; 9657 } 9658 9659 fclose(file_telem_info); 9660 9661 *output = value; 9662 9663 return 0; 9664 } 9665 9666 struct pmt_mmio *pmt_mmio_open(unsigned int target_guid) 9667 { 9668 struct pmt_diriter_t pmt_iter; 9669 const struct dirent *entry; 9670 struct stat st; 9671 int fd_telem_dir, fd_pmt; 9672 unsigned long guid, size, offset; 9673 size_t mmap_size; 9674 void *mmio; 9675 struct pmt_mmio *head = NULL, *last = NULL; 9676 struct pmt_mmio *new_pmt = NULL; 9677 9678 if (stat(SYSFS_TELEM_PATH, &st) == -1) 9679 return NULL; 9680 9681 pmt_diriter_init(&pmt_iter); 9682 entry = pmt_diriter_begin(&pmt_iter, SYSFS_TELEM_PATH); 9683 if (!entry) { 9684 pmt_diriter_remove(&pmt_iter); 9685 return NULL; 9686 } 9687 9688 for (; entry != NULL; entry = pmt_diriter_next(&pmt_iter)) { 9689 if (fstatat(dirfd(pmt_iter.dir), entry->d_name, &st, 0) == -1) 9690 break; 9691 9692 if (!S_ISDIR(st.st_mode)) 9693 continue; 9694 9695 fd_telem_dir = openat(dirfd(pmt_iter.dir), entry->d_name, O_RDONLY); 9696 if (fd_telem_dir == -1) 9697 break; 9698 9699 if (parse_telem_info_file(fd_telem_dir, "guid", "%lx", &guid)) { 9700 close(fd_telem_dir); 9701 break; 9702 } 9703 9704 if (parse_telem_info_file(fd_telem_dir, "size", "%lu", &size)) { 9705 close(fd_telem_dir); 9706 break; 9707 } 9708 9709 if (guid != target_guid) { 9710 close(fd_telem_dir); 9711 continue; 9712 } 9713 9714 if (parse_telem_info_file(fd_telem_dir, "offset", "%lu", &offset)) { 9715 close(fd_telem_dir); 9716 break; 9717 } 9718 9719 assert(offset == 0); 9720 9721 fd_pmt = openat(fd_telem_dir, "telem", O_RDONLY); 9722 if (fd_pmt == -1) 9723 goto loop_cleanup_and_break; 9724 9725 mmap_size = ROUND_UP_TO_PAGE_SIZE(size); 9726 mmio = mmap(0, mmap_size, PROT_READ, MAP_SHARED, fd_pmt, 0); 9727 if (mmio != MAP_FAILED) { 9728 if (debug) 9729 fprintf(stderr, "%s: 0x%lx mmaped at: %p\n", __func__, guid, mmio); 9730 9731 new_pmt = calloc(1, sizeof(*new_pmt)); 9732 9733 if (!new_pmt) { 9734 fprintf(stderr, "%s: Failed to allocate pmt_mmio\n", __func__); 9735 exit(1); 9736 } 9737 9738 /* 9739 * Create linked list of mmaped regions, 9740 * but preserve the ordering from sysfs. 9741 * Ordering is important for the user to 9742 * use the seq=%u parameter when adding a counter. 9743 */ 9744 new_pmt->guid = guid; 9745 new_pmt->mmio_base = mmio; 9746 new_pmt->pmt_offset = offset; 9747 new_pmt->size = size; 9748 new_pmt->next = pmt_mmios; 9749 9750 if (last) 9751 last->next = new_pmt; 9752 else 9753 head = new_pmt; 9754 9755 last = new_pmt; 9756 } 9757 9758 loop_cleanup_and_break: 9759 close(fd_pmt); 9760 close(fd_telem_dir); 9761 } 9762 9763 pmt_diriter_remove(&pmt_iter); 9764 9765 /* 9766 * If we found something, stick just 9767 * created linked list to the front. 9768 */ 9769 if (head) 9770 pmt_mmios = head; 9771 9772 return head; 9773 } 9774 9775 struct pmt_mmio *pmt_mmio_find(unsigned int guid) 9776 { 9777 struct pmt_mmio *pmmio = pmt_mmios; 9778 9779 while (pmmio) { 9780 if (pmmio->guid == guid) 9781 return pmmio; 9782 9783 pmmio = pmmio->next; 9784 } 9785 9786 return NULL; 9787 } 9788 9789 void *pmt_get_counter_pointer(struct pmt_mmio *pmmio, unsigned long counter_offset) 9790 { 9791 char *ret; 9792 9793 /* Get base of mmaped PMT file. */ 9794 ret = (char *)pmmio->mmio_base; 9795 9796 /* 9797 * Apply PMT MMIO offset to obtain beginning of the mmaped telemetry data. 9798 * It's not guaranteed that the mmaped memory begins with the telemetry data 9799 * - we might have to apply the offset first. 9800 */ 9801 ret += pmmio->pmt_offset; 9802 9803 /* Apply the counter offset to get the address to the mmaped counter. */ 9804 ret += counter_offset; 9805 9806 return ret; 9807 } 9808 9809 struct pmt_mmio *pmt_add_guid(unsigned int guid, unsigned int seq) 9810 { 9811 struct pmt_mmio *ret; 9812 9813 ret = pmt_mmio_find(guid); 9814 if (!ret) 9815 ret = pmt_mmio_open(guid); 9816 9817 while (ret && seq) { 9818 ret = ret->next; 9819 --seq; 9820 } 9821 9822 return ret; 9823 } 9824 9825 enum pmt_open_mode { 9826 PMT_OPEN_TRY, /* Open failure is not an error. */ 9827 PMT_OPEN_REQUIRED, /* Open failure is a fatal error. */ 9828 }; 9829 9830 struct pmt_counter *pmt_find_counter(struct pmt_counter *pcounter, const char *name) 9831 { 9832 while (pcounter) { 9833 if (strcmp(pcounter->name, name) == 0) 9834 break; 9835 9836 pcounter = pcounter->next; 9837 } 9838 9839 return pcounter; 9840 } 9841 9842 struct pmt_counter **pmt_get_scope_root(enum counter_scope scope) 9843 { 9844 switch (scope) { 9845 case SCOPE_CPU: 9846 return &sys.pmt_tp; 9847 case SCOPE_CORE: 9848 return &sys.pmt_cp; 9849 case SCOPE_PACKAGE: 9850 return &sys.pmt_pp; 9851 } 9852 9853 __builtin_unreachable(); 9854 } 9855 9856 void pmt_counter_add_domain(struct pmt_counter *pcounter, unsigned long *pmmio, unsigned int domain_id) 9857 { 9858 /* Make sure the new domain fits. */ 9859 if (domain_id >= pcounter->num_domains) 9860 pmt_counter_resize(pcounter, domain_id + 1); 9861 9862 assert(pcounter->domains); 9863 assert(domain_id < pcounter->num_domains); 9864 9865 pcounter->domains[domain_id].pcounter = pmmio; 9866 } 9867 9868 int pmt_add_counter(unsigned int guid, unsigned int seq, const char *name, enum pmt_datatype type, 9869 unsigned int lsb, unsigned int msb, unsigned int offset, enum counter_scope scope, 9870 enum counter_format format, unsigned int domain_id, enum pmt_open_mode mode) 9871 { 9872 struct pmt_mmio *mmio; 9873 struct pmt_counter *pcounter; 9874 struct pmt_counter **const pmt_root = pmt_get_scope_root(scope); 9875 bool new_counter = false; 9876 int conflict = 0; 9877 9878 if (lsb > msb) { 9879 fprintf(stderr, "%s: %s: `%s` must be satisfied\n", __func__, "lsb <= msb", name); 9880 exit(1); 9881 } 9882 9883 if (msb >= 64) { 9884 fprintf(stderr, "%s: %s: `%s` must be satisfied\n", __func__, "msb < 64", name); 9885 exit(1); 9886 } 9887 9888 mmio = pmt_add_guid(guid, seq); 9889 if (!mmio) { 9890 if (mode != PMT_OPEN_TRY) { 9891 fprintf(stderr, "%s: failed to map PMT MMIO for guid %x, seq %u\n", __func__, guid, seq); 9892 exit(1); 9893 } 9894 9895 return 1; 9896 } 9897 9898 if (offset >= mmio->size) { 9899 if (mode != PMT_OPEN_TRY) { 9900 fprintf(stderr, "%s: offset %u outside of PMT MMIO size %u\n", __func__, offset, mmio->size); 9901 exit(1); 9902 } 9903 9904 return 1; 9905 } 9906 9907 pcounter = pmt_find_counter(*pmt_root, name); 9908 if (!pcounter) { 9909 pcounter = calloc(1, sizeof(*pcounter)); 9910 new_counter = true; 9911 } 9912 9913 if (new_counter) { 9914 strncpy(pcounter->name, name, ARRAY_SIZE(pcounter->name) - 1); 9915 pcounter->type = type; 9916 pcounter->scope = scope; 9917 pcounter->lsb = lsb; 9918 pcounter->msb = msb; 9919 pcounter->format = format; 9920 } else { 9921 conflict += pcounter->type != type; 9922 conflict += pcounter->scope != scope; 9923 conflict += pcounter->lsb != lsb; 9924 conflict += pcounter->msb != msb; 9925 conflict += pcounter->format != format; 9926 } 9927 9928 if (conflict) { 9929 fprintf(stderr, "%s: conflicting parameters for the PMT counter with the same name %s\n", 9930 __func__, name); 9931 exit(1); 9932 } 9933 9934 pmt_counter_add_domain(pcounter, pmt_get_counter_pointer(mmio, offset), domain_id); 9935 9936 if (new_counter) { 9937 pcounter->next = *pmt_root; 9938 *pmt_root = pcounter; 9939 } 9940 9941 return 0; 9942 } 9943 9944 void pmt_init(void) 9945 { 9946 int cpu_num; 9947 unsigned long seq, offset, mod_num; 9948 9949 if (BIC_IS_ENABLED(BIC_Diec6)) { 9950 pmt_add_counter(PMT_MTL_DC6_GUID, PMT_MTL_DC6_SEQ, "Die%c6", PMT_TYPE_XTAL_TIME, 9951 PMT_COUNTER_MTL_DC6_LSB, PMT_COUNTER_MTL_DC6_MSB, PMT_COUNTER_MTL_DC6_OFFSET, 9952 SCOPE_PACKAGE, FORMAT_DELTA, 0, PMT_OPEN_TRY); 9953 } 9954 9955 if (BIC_IS_ENABLED(BIC_CPU_c1e)) { 9956 seq = 0; 9957 offset = PMT_COUNTER_CWF_MC1E_OFFSET_BASE; 9958 mod_num = 0; /* Relative module number for current PMT file. */ 9959 9960 /* Open the counter for each CPU. */ 9961 for (cpu_num = 0; cpu_num < topo.max_cpu_num;) { 9962 9963 if (cpu_is_not_allowed(cpu_num)) 9964 goto next_loop_iter; 9965 9966 /* 9967 * Set the scope to CPU, even though CWF report the counter per module. 9968 * CPUs inside the same module will read from the same location, instead of reporting zeros. 9969 * 9970 * CWF with newer firmware might require a PMT_TYPE_XTAL_TIME intead of PMT_TYPE_TCORE_CLOCK. 9971 */ 9972 pmt_add_counter(PMT_CWF_MC1E_GUID, seq, "CPU%c1e", PMT_TYPE_TCORE_CLOCK, 9973 PMT_COUNTER_CWF_MC1E_LSB, PMT_COUNTER_CWF_MC1E_MSB, offset, SCOPE_CPU, 9974 FORMAT_DELTA, cpu_num, PMT_OPEN_TRY); 9975 9976 /* 9977 * Rather complex logic for each time we go to the next loop iteration, 9978 * so keep it as a label. 9979 */ 9980 next_loop_iter: 9981 /* 9982 * Advance the cpu number and check if we should also advance offset to 9983 * the next counter inside the PMT file. 9984 * 9985 * On Clearwater Forest platform, the counter is reported per module, 9986 * so open the same counter for all of the CPUs inside the module. 9987 * That way, reported table show the correct value for all of the CPUs inside the module, 9988 * instead of zeros. 9989 */ 9990 ++cpu_num; 9991 if (cpu_num % PMT_COUNTER_CWF_CPUS_PER_MODULE == 0) { 9992 offset += PMT_COUNTER_CWF_MC1E_OFFSET_INCREMENT; 9993 ++mod_num; 9994 } 9995 9996 /* 9997 * There are PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE in each PMT file. 9998 * 9999 * If that number is reached, seq must be incremented to advance to the next file in a sequence. 10000 * Offset inside that file and a module counter has to be reset. 10001 */ 10002 if (mod_num == PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE) { 10003 ++seq; 10004 offset = PMT_COUNTER_CWF_MC1E_OFFSET_BASE; 10005 mod_num = 0; 10006 } 10007 } 10008 } 10009 } 10010 10011 void turbostat_init() 10012 { 10013 setup_all_buffers(true); 10014 set_base_cpu(); 10015 check_msr_access(); 10016 check_perf_access(); 10017 process_cpuid(); 10018 counter_info_init(); 10019 probe_pm_features(); 10020 msr_perf_init(); 10021 linux_perf_init(); 10022 rapl_perf_init(); 10023 cstate_perf_init(); 10024 added_perf_counters_init(); 10025 pmt_init(); 10026 10027 for_all_cpus(get_cpu_type, ODD_COUNTERS); 10028 for_all_cpus(get_cpu_type, EVEN_COUNTERS); 10029 10030 if (BIC_IS_ENABLED(BIC_IPC) && has_aperf_access && get_instr_count_fd(base_cpu) != -1) 10031 BIC_PRESENT(BIC_IPC); 10032 10033 /* 10034 * If TSC tweak is needed, but couldn't get it, 10035 * disable more BICs, since it can't be reported accurately. 10036 */ 10037 if (platform->enable_tsc_tweak && !has_base_hz) { 10038 CLR_BIC(BIC_Busy, &bic_enabled); 10039 CLR_BIC(BIC_Bzy_MHz, &bic_enabled); 10040 } 10041 } 10042 10043 void affinitize_child(void) 10044 { 10045 /* Prefer cpu_possible_set, if available */ 10046 if (sched_setaffinity(0, cpu_possible_setsize, cpu_possible_set)) { 10047 warn("sched_setaffinity cpu_possible_set"); 10048 10049 /* Otherwise, allow child to run on same cpu set as turbostat */ 10050 if (sched_setaffinity(0, cpu_allowed_setsize, cpu_allowed_set)) 10051 warn("sched_setaffinity cpu_allowed_set"); 10052 } 10053 } 10054 10055 int fork_it(char **argv) 10056 { 10057 pid_t child_pid; 10058 int status; 10059 10060 snapshot_proc_sysfs_files(); 10061 status = for_all_cpus(get_counters, EVEN_COUNTERS); 10062 first_counter_read = 0; 10063 if (status) 10064 exit(status); 10065 gettimeofday(&tv_even, (struct timezone *)NULL); 10066 10067 child_pid = fork(); 10068 if (!child_pid) { 10069 /* child */ 10070 affinitize_child(); 10071 execvp(argv[0], argv); 10072 err(errno, "exec %s", argv[0]); 10073 } else { 10074 10075 /* parent */ 10076 if (child_pid == -1) 10077 err(1, "fork"); 10078 10079 signal(SIGINT, SIG_IGN); 10080 signal(SIGQUIT, SIG_IGN); 10081 if (waitpid(child_pid, &status, 0) == -1) 10082 err(status, "waitpid"); 10083 10084 if (WIFEXITED(status)) 10085 status = WEXITSTATUS(status); 10086 } 10087 /* 10088 * n.b. fork_it() does not check for errors from for_all_cpus() 10089 * because re-starting is problematic when forking 10090 */ 10091 snapshot_proc_sysfs_files(); 10092 for_all_cpus(get_counters, ODD_COUNTERS); 10093 gettimeofday(&tv_odd, (struct timezone *)NULL); 10094 timersub(&tv_odd, &tv_even, &tv_delta); 10095 if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) 10096 fprintf(outf, "%s: Counter reset detected\n", progname); 10097 delta_platform(&platform_counters_odd, &platform_counters_even); 10098 10099 compute_average(EVEN_COUNTERS); 10100 format_all_counters(EVEN_COUNTERS); 10101 10102 fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec / 1000000.0); 10103 10104 flush_output_stderr(); 10105 10106 return status; 10107 } 10108 10109 int get_and_dump_counters(void) 10110 { 10111 int status; 10112 10113 snapshot_proc_sysfs_files(); 10114 status = for_all_cpus(get_counters, ODD_COUNTERS); 10115 if (status) 10116 return status; 10117 10118 status = for_all_cpus(dump_counters, ODD_COUNTERS); 10119 if (status) 10120 return status; 10121 10122 flush_output_stdout(); 10123 10124 return status; 10125 } 10126 10127 void print_version() 10128 { 10129 fprintf(outf, "turbostat version 2025.09.09 - Len Brown <lenb@kernel.org>\n"); 10130 } 10131 10132 #define COMMAND_LINE_SIZE 2048 10133 10134 void print_bootcmd(void) 10135 { 10136 char bootcmd[COMMAND_LINE_SIZE]; 10137 FILE *fp; 10138 int ret; 10139 10140 memset(bootcmd, 0, COMMAND_LINE_SIZE); 10141 fp = fopen("/proc/cmdline", "r"); 10142 if (!fp) 10143 return; 10144 10145 ret = fread(bootcmd, sizeof(char), COMMAND_LINE_SIZE - 1, fp); 10146 if (ret) { 10147 bootcmd[ret] = '\0'; 10148 /* the last character is already '\n' */ 10149 fprintf(outf, "Kernel command line: %s", bootcmd); 10150 } 10151 10152 fclose(fp); 10153 } 10154 10155 struct msr_counter *find_msrp_by_name(struct msr_counter *head, char *name) 10156 { 10157 struct msr_counter *mp; 10158 10159 for (mp = head; mp; mp = mp->next) { 10160 if (debug) 10161 fprintf(stderr, "%s: %s %s\n", __func__, name, mp->name); 10162 if (!strcmp(name, mp->name)) 10163 return mp; 10164 } 10165 return NULL; 10166 } 10167 10168 int add_counter(unsigned int msr_num, char *path, char *name, 10169 unsigned int width, enum counter_scope scope, 10170 enum counter_type type, enum counter_format format, int flags, int id) 10171 { 10172 struct msr_counter *msrp; 10173 10174 if (no_msr && msr_num) 10175 errx(1, "Requested MSR counter 0x%x, but in --no-msr mode", msr_num); 10176 10177 if (debug) 10178 fprintf(stderr, "%s(msr%d, %s, %s, width%d, scope%d, type%d, format%d, flags%x, id%d)\n", 10179 __func__, msr_num, path, name, width, scope, type, format, flags, id); 10180 10181 switch (scope) { 10182 10183 case SCOPE_CPU: 10184 msrp = find_msrp_by_name(sys.tp, name); 10185 if (msrp) { 10186 if (debug) 10187 fprintf(stderr, "%s: %s FOUND\n", __func__, name); 10188 break; 10189 } 10190 if (sys.added_thread_counters++ >= MAX_ADDED_THREAD_COUNTERS) { 10191 warnx("ignoring thread counter %s", name); 10192 return -1; 10193 } 10194 break; 10195 case SCOPE_CORE: 10196 msrp = find_msrp_by_name(sys.cp, name); 10197 if (msrp) { 10198 if (debug) 10199 fprintf(stderr, "%s: %s FOUND\n", __func__, name); 10200 break; 10201 } 10202 if (sys.added_core_counters++ >= MAX_ADDED_CORE_COUNTERS) { 10203 warnx("ignoring core counter %s", name); 10204 return -1; 10205 } 10206 break; 10207 case SCOPE_PACKAGE: 10208 msrp = find_msrp_by_name(sys.pp, name); 10209 if (msrp) { 10210 if (debug) 10211 fprintf(stderr, "%s: %s FOUND\n", __func__, name); 10212 break; 10213 } 10214 if (sys.added_package_counters++ >= MAX_ADDED_PACKAGE_COUNTERS) { 10215 warnx("ignoring package counter %s", name); 10216 return -1; 10217 } 10218 break; 10219 default: 10220 warnx("ignoring counter %s with unknown scope", name); 10221 return -1; 10222 } 10223 10224 if (msrp == NULL) { 10225 msrp = calloc(1, sizeof(struct msr_counter)); 10226 if (msrp == NULL) 10227 err(-1, "calloc msr_counter"); 10228 10229 msrp->msr_num = msr_num; 10230 strncpy(msrp->name, name, NAME_BYTES - 1); 10231 msrp->width = width; 10232 msrp->type = type; 10233 msrp->format = format; 10234 msrp->flags = flags; 10235 10236 switch (scope) { 10237 case SCOPE_CPU: 10238 msrp->next = sys.tp; 10239 sys.tp = msrp; 10240 break; 10241 case SCOPE_CORE: 10242 msrp->next = sys.cp; 10243 sys.cp = msrp; 10244 break; 10245 case SCOPE_PACKAGE: 10246 msrp->next = sys.pp; 10247 sys.pp = msrp; 10248 break; 10249 } 10250 } 10251 10252 if (path) { 10253 struct sysfs_path *sp; 10254 10255 sp = calloc(1, sizeof(struct sysfs_path)); 10256 if (sp == NULL) { 10257 perror("calloc"); 10258 exit(1); 10259 } 10260 strncpy(sp->path, path, PATH_BYTES - 1); 10261 sp->id = id; 10262 sp->next = msrp->sp; 10263 msrp->sp = sp; 10264 } 10265 10266 return 0; 10267 } 10268 10269 /* 10270 * Initialize the fields used for identifying and opening the counter. 10271 * 10272 * Defer the initialization of any runtime buffers for actually reading 10273 * the counters for when we initialize all perf counters, so we can later 10274 * easily call re_initialize(). 10275 */ 10276 struct perf_counter_info *make_perf_counter_info(const char *perf_device, 10277 const char *perf_event, 10278 const char *name, 10279 unsigned int width, 10280 enum counter_scope scope, 10281 enum counter_type type, enum counter_format format) 10282 { 10283 struct perf_counter_info *pinfo; 10284 10285 pinfo = calloc(1, sizeof(*pinfo)); 10286 if (!pinfo) 10287 errx(1, "%s: Failed to allocate %s/%s\n", __func__, perf_device, perf_event); 10288 10289 strncpy(pinfo->device, perf_device, ARRAY_SIZE(pinfo->device) - 1); 10290 strncpy(pinfo->event, perf_event, ARRAY_SIZE(pinfo->event) - 1); 10291 10292 strncpy(pinfo->name, name, ARRAY_SIZE(pinfo->name) - 1); 10293 pinfo->width = width; 10294 pinfo->scope = scope; 10295 pinfo->type = type; 10296 pinfo->format = format; 10297 10298 return pinfo; 10299 } 10300 10301 int add_perf_counter(const char *perf_device, const char *perf_event, const char *name_buffer, unsigned int width, 10302 enum counter_scope scope, enum counter_type type, enum counter_format format) 10303 { 10304 struct perf_counter_info *pinfo; 10305 10306 switch (scope) { 10307 case SCOPE_CPU: 10308 if (sys.added_thread_perf_counters >= MAX_ADDED_THREAD_COUNTERS) { 10309 warnx("ignoring thread counter perf/%s/%s", perf_device, perf_event); 10310 return -1; 10311 } 10312 break; 10313 10314 case SCOPE_CORE: 10315 if (sys.added_core_perf_counters >= MAX_ADDED_CORE_COUNTERS) { 10316 warnx("ignoring core counter perf/%s/%s", perf_device, perf_event); 10317 return -1; 10318 } 10319 break; 10320 10321 case SCOPE_PACKAGE: 10322 if (sys.added_package_perf_counters >= MAX_ADDED_PACKAGE_COUNTERS) { 10323 warnx("ignoring package counter perf/%s/%s", perf_device, perf_event); 10324 return -1; 10325 } 10326 break; 10327 } 10328 10329 pinfo = make_perf_counter_info(perf_device, perf_event, name_buffer, width, scope, type, format); 10330 10331 if (!pinfo) 10332 return -1; 10333 10334 switch (scope) { 10335 case SCOPE_CPU: 10336 pinfo->next = sys.perf_tp; 10337 sys.perf_tp = pinfo; 10338 ++sys.added_thread_perf_counters; 10339 break; 10340 10341 case SCOPE_CORE: 10342 pinfo->next = sys.perf_cp; 10343 sys.perf_cp = pinfo; 10344 ++sys.added_core_perf_counters; 10345 break; 10346 10347 case SCOPE_PACKAGE: 10348 pinfo->next = sys.perf_pp; 10349 sys.perf_pp = pinfo; 10350 ++sys.added_package_perf_counters; 10351 break; 10352 } 10353 10354 // FIXME: we might not have debug here yet 10355 if (debug) 10356 fprintf(stderr, "%s: %s/%s, name: %s, scope%d\n", 10357 __func__, pinfo->device, pinfo->event, pinfo->name, pinfo->scope); 10358 10359 return 0; 10360 } 10361 10362 void parse_add_command_msr(char *add_command) 10363 { 10364 int msr_num = 0; 10365 char *path = NULL; 10366 char perf_device[PERF_DEV_NAME_BYTES] = ""; 10367 char perf_event[PERF_EVT_NAME_BYTES] = ""; 10368 char name_buffer[PERF_NAME_BYTES] = ""; 10369 int width = 64; 10370 int fail = 0; 10371 enum counter_scope scope = SCOPE_CPU; 10372 enum counter_type type = COUNTER_CYCLES; 10373 enum counter_format format = FORMAT_DELTA; 10374 10375 while (add_command) { 10376 10377 if (sscanf(add_command, "msr0x%x", &msr_num) == 1) 10378 goto next; 10379 10380 if (sscanf(add_command, "msr%d", &msr_num) == 1) 10381 goto next; 10382 10383 BUILD_BUG_ON(ARRAY_SIZE(perf_device) <= 31); 10384 BUILD_BUG_ON(ARRAY_SIZE(perf_event) <= 31); 10385 if (sscanf(add_command, "perf/%31[^/]/%31[^,]", &perf_device[0], &perf_event[0]) == 2) 10386 goto next; 10387 10388 if (*add_command == '/') { 10389 path = add_command; 10390 goto next; 10391 } 10392 10393 if (sscanf(add_command, "u%d", &width) == 1) { 10394 if ((width == 32) || (width == 64)) 10395 goto next; 10396 width = 64; 10397 } 10398 if (!strncmp(add_command, "cpu", strlen("cpu"))) { 10399 scope = SCOPE_CPU; 10400 goto next; 10401 } 10402 if (!strncmp(add_command, "core", strlen("core"))) { 10403 scope = SCOPE_CORE; 10404 goto next; 10405 } 10406 if (!strncmp(add_command, "package", strlen("package"))) { 10407 scope = SCOPE_PACKAGE; 10408 goto next; 10409 } 10410 if (!strncmp(add_command, "cycles", strlen("cycles"))) { 10411 type = COUNTER_CYCLES; 10412 goto next; 10413 } 10414 if (!strncmp(add_command, "seconds", strlen("seconds"))) { 10415 type = COUNTER_SECONDS; 10416 goto next; 10417 } 10418 if (!strncmp(add_command, "usec", strlen("usec"))) { 10419 type = COUNTER_USEC; 10420 goto next; 10421 } 10422 if (!strncmp(add_command, "raw", strlen("raw"))) { 10423 format = FORMAT_RAW; 10424 goto next; 10425 } 10426 if (!strncmp(add_command, "average", strlen("average"))) { 10427 format = FORMAT_AVERAGE; 10428 goto next; 10429 } 10430 if (!strncmp(add_command, "delta", strlen("delta"))) { 10431 format = FORMAT_DELTA; 10432 goto next; 10433 } 10434 if (!strncmp(add_command, "percent", strlen("percent"))) { 10435 format = FORMAT_PERCENT; 10436 goto next; 10437 } 10438 10439 BUILD_BUG_ON(ARRAY_SIZE(name_buffer) <= 18); 10440 if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) { 10441 char *eos; 10442 10443 eos = strchr(name_buffer, ','); 10444 if (eos) 10445 *eos = '\0'; 10446 goto next; 10447 } 10448 10449 next: 10450 add_command = strchr(add_command, ','); 10451 if (add_command) { 10452 *add_command = '\0'; 10453 add_command++; 10454 } 10455 10456 } 10457 if ((msr_num == 0) && (path == NULL) && (perf_device[0] == '\0' || perf_event[0] == '\0')) { 10458 fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter | perf/device/event) required\n"); 10459 fail++; 10460 } 10461 10462 /* Test for non-empty perf_device and perf_event */ 10463 const bool is_perf_counter = perf_device[0] && perf_event[0]; 10464 10465 /* generate default column header */ 10466 if (*name_buffer == '\0') { 10467 if (is_perf_counter) { 10468 snprintf(name_buffer, ARRAY_SIZE(name_buffer), "perf/%s", perf_event); 10469 } else { 10470 if (width == 32) 10471 sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); 10472 else 10473 sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); 10474 } 10475 } 10476 10477 if (is_perf_counter) { 10478 if (add_perf_counter(perf_device, perf_event, name_buffer, width, scope, type, format)) 10479 fail++; 10480 } else { 10481 if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0, 0)) 10482 fail++; 10483 } 10484 10485 if (fail) { 10486 help(); 10487 exit(1); 10488 } 10489 } 10490 10491 bool starts_with(const char *str, const char *prefix) 10492 { 10493 return strncmp(prefix, str, strlen(prefix)) == 0; 10494 } 10495 10496 int pmt_parse_from_path(const char *target_path, unsigned int *out_guid, unsigned int *out_seq) 10497 { 10498 struct pmt_diriter_t pmt_iter; 10499 const struct dirent *dirname; 10500 struct stat stat, target_stat; 10501 int fd_telem_dir = -1; 10502 int fd_target_dir; 10503 unsigned int seq = 0; 10504 unsigned long guid, target_guid; 10505 int ret = -1; 10506 10507 fd_target_dir = open(target_path, O_RDONLY | O_DIRECTORY); 10508 if (fd_target_dir == -1) { 10509 return -1; 10510 } 10511 10512 if (fstat(fd_target_dir, &target_stat) == -1) { 10513 fprintf(stderr, "%s: Failed to stat the target: %s", __func__, strerror(errno)); 10514 exit(1); 10515 } 10516 10517 if (parse_telem_info_file(fd_target_dir, "guid", "%lx", &target_guid)) { 10518 fprintf(stderr, "%s: Failed to parse the target guid file: %s", __func__, strerror(errno)); 10519 exit(1); 10520 } 10521 10522 close(fd_target_dir); 10523 10524 pmt_diriter_init(&pmt_iter); 10525 10526 for (dirname = pmt_diriter_begin(&pmt_iter, SYSFS_TELEM_PATH); dirname != NULL; 10527 dirname = pmt_diriter_next(&pmt_iter)) { 10528 10529 fd_telem_dir = openat(dirfd(pmt_iter.dir), dirname->d_name, O_RDONLY | O_DIRECTORY); 10530 if (fd_telem_dir == -1) 10531 continue; 10532 10533 if (parse_telem_info_file(fd_telem_dir, "guid", "%lx", &guid)) { 10534 fprintf(stderr, "%s: Failed to parse the guid file: %s", __func__, strerror(errno)); 10535 continue; 10536 } 10537 10538 if (fstat(fd_telem_dir, &stat) == -1) { 10539 fprintf(stderr, "%s: Failed to stat %s directory: %s", __func__, 10540 dirname->d_name, strerror(errno)); 10541 continue; 10542 } 10543 10544 /* 10545 * If reached the same directory as target, exit the loop. 10546 * Seq has the correct value now. 10547 */ 10548 if (stat.st_dev == target_stat.st_dev && stat.st_ino == target_stat.st_ino) { 10549 ret = 0; 10550 break; 10551 } 10552 10553 /* 10554 * If reached directory with the same guid, 10555 * but it's not the target directory yet, 10556 * increment seq and continue the search. 10557 */ 10558 if (guid == target_guid) 10559 ++seq; 10560 10561 close(fd_telem_dir); 10562 fd_telem_dir = -1; 10563 } 10564 10565 pmt_diriter_remove(&pmt_iter); 10566 10567 if (fd_telem_dir != -1) 10568 close(fd_telem_dir); 10569 10570 if (!ret) { 10571 *out_guid = target_guid; 10572 *out_seq = seq; 10573 } 10574 10575 return ret; 10576 } 10577 10578 void parse_add_command_pmt(char *add_command) 10579 { 10580 char *name = NULL; 10581 char *type_name = NULL; 10582 char *format_name = NULL; 10583 char *direct_path = NULL; 10584 static const char direct_path_prefix[] = "path="; 10585 unsigned int offset; 10586 unsigned int lsb; 10587 unsigned int msb; 10588 unsigned int guid; 10589 unsigned int seq = 0; /* By default, pick first file in a sequence with a given GUID. */ 10590 unsigned int domain_id; 10591 enum counter_scope scope = 0; 10592 enum pmt_datatype type = PMT_TYPE_RAW; 10593 enum counter_format format = FORMAT_RAW; 10594 bool has_offset = false; 10595 bool has_lsb = false; 10596 bool has_msb = false; 10597 bool has_format = true; /* Format has a default value. */ 10598 bool has_guid = false; 10599 bool has_scope = false; 10600 bool has_type = true; /* Type has a default value. */ 10601 10602 /* Consume the "pmt," prefix. */ 10603 add_command = strchr(add_command, ','); 10604 if (!add_command) { 10605 help(); 10606 exit(1); 10607 } 10608 ++add_command; 10609 10610 while (add_command) { 10611 if (starts_with(add_command, "name=")) { 10612 name = add_command + strlen("name="); 10613 goto next; 10614 } 10615 10616 if (starts_with(add_command, "type=")) { 10617 type_name = add_command + strlen("type="); 10618 goto next; 10619 } 10620 10621 if (starts_with(add_command, "domain=")) { 10622 const size_t prefix_len = strlen("domain="); 10623 10624 if (sscanf(add_command + prefix_len, "cpu%u", &domain_id) == 1) { 10625 scope = SCOPE_CPU; 10626 has_scope = true; 10627 } else if (sscanf(add_command + prefix_len, "core%u", &domain_id) == 1) { 10628 scope = SCOPE_CORE; 10629 has_scope = true; 10630 } else if (sscanf(add_command + prefix_len, "package%u", &domain_id) == 1) { 10631 scope = SCOPE_PACKAGE; 10632 has_scope = true; 10633 } 10634 10635 if (!has_scope) { 10636 printf("%s: invalid value for scope. Expected cpu%%u, core%%u or package%%u.\n", 10637 __func__); 10638 exit(1); 10639 } 10640 10641 goto next; 10642 } 10643 10644 if (starts_with(add_command, "format=")) { 10645 format_name = add_command + strlen("format="); 10646 goto next; 10647 } 10648 10649 if (sscanf(add_command, "offset=%u", &offset) == 1) { 10650 has_offset = true; 10651 goto next; 10652 } 10653 10654 if (sscanf(add_command, "lsb=%u", &lsb) == 1) { 10655 has_lsb = true; 10656 goto next; 10657 } 10658 10659 if (sscanf(add_command, "msb=%u", &msb) == 1) { 10660 has_msb = true; 10661 goto next; 10662 } 10663 10664 if (sscanf(add_command, "guid=%x", &guid) == 1) { 10665 has_guid = true; 10666 goto next; 10667 } 10668 10669 if (sscanf(add_command, "seq=%x", &seq) == 1) 10670 goto next; 10671 10672 if (strncmp(add_command, direct_path_prefix, strlen(direct_path_prefix)) == 0) { 10673 direct_path = add_command + strlen(direct_path_prefix); 10674 goto next; 10675 } 10676 next: 10677 add_command = strchr(add_command, ','); 10678 if (add_command) { 10679 *add_command = '\0'; 10680 add_command++; 10681 } 10682 } 10683 10684 if (!name) { 10685 printf("%s: missing %s\n", __func__, "name"); 10686 exit(1); 10687 } 10688 10689 if (strlen(name) >= PMT_COUNTER_NAME_SIZE_BYTES) { 10690 printf("%s: name has to be at most %d characters long\n", __func__, PMT_COUNTER_NAME_SIZE_BYTES); 10691 exit(1); 10692 } 10693 10694 if (format_name) { 10695 has_format = false; 10696 10697 if (strcmp("raw", format_name) == 0) { 10698 format = FORMAT_RAW; 10699 has_format = true; 10700 } 10701 10702 if (strcmp("average", format_name) == 0) { 10703 format = FORMAT_AVERAGE; 10704 has_format = true; 10705 } 10706 10707 if (strcmp("delta", format_name) == 0) { 10708 format = FORMAT_DELTA; 10709 has_format = true; 10710 } 10711 10712 if (!has_format) { 10713 fprintf(stderr, "%s: Invalid format %s. Expected raw, average or delta\n", 10714 __func__, format_name); 10715 exit(1); 10716 } 10717 } 10718 10719 if (type_name) { 10720 has_type = false; 10721 10722 if (strcmp("raw", type_name) == 0) { 10723 type = PMT_TYPE_RAW; 10724 has_type = true; 10725 } 10726 10727 if (strcmp("txtal_time", type_name) == 0) { 10728 type = PMT_TYPE_XTAL_TIME; 10729 has_type = true; 10730 } 10731 10732 if (strcmp("tcore_clock", type_name) == 0) { 10733 type = PMT_TYPE_TCORE_CLOCK; 10734 has_type = true; 10735 } 10736 10737 if (!has_type) { 10738 printf("%s: invalid %s: %s\n", __func__, "type", type_name); 10739 exit(1); 10740 } 10741 } 10742 10743 if (!has_offset) { 10744 printf("%s : missing %s\n", __func__, "offset"); 10745 exit(1); 10746 } 10747 10748 if (!has_lsb) { 10749 printf("%s: missing %s\n", __func__, "lsb"); 10750 exit(1); 10751 } 10752 10753 if (!has_msb) { 10754 printf("%s: missing %s\n", __func__, "msb"); 10755 exit(1); 10756 } 10757 10758 if (direct_path && has_guid) { 10759 printf("%s: path and guid+seq parameters are mutually exclusive\n" 10760 "notice: passed guid=0x%x and path=%s\n", __func__, guid, direct_path); 10761 exit(1); 10762 } 10763 10764 if (direct_path) { 10765 if (pmt_parse_from_path(direct_path, &guid, &seq)) { 10766 printf("%s: failed to parse PMT file from %s\n", __func__, direct_path); 10767 exit(1); 10768 } 10769 10770 /* GUID was just infered from the direct path. */ 10771 has_guid = true; 10772 } 10773 10774 if (!has_guid) { 10775 printf("%s: missing %s\n", __func__, "guid or path"); 10776 exit(1); 10777 } 10778 10779 if (!has_scope) { 10780 printf("%s: missing %s\n", __func__, "scope"); 10781 exit(1); 10782 } 10783 10784 if (lsb > msb) { 10785 printf("%s: lsb > msb doesn't make sense\n", __func__); 10786 exit(1); 10787 } 10788 10789 pmt_add_counter(guid, seq, name, type, lsb, msb, offset, scope, format, domain_id, PMT_OPEN_REQUIRED); 10790 } 10791 10792 void parse_add_command(char *add_command) 10793 { 10794 if (strncmp(add_command, "pmt", strlen("pmt")) == 0) 10795 return parse_add_command_pmt(add_command); 10796 return parse_add_command_msr(add_command); 10797 } 10798 10799 int is_deferred_add(char *name) 10800 { 10801 int i; 10802 10803 for (i = 0; i < deferred_add_index; ++i) 10804 if (!strcmp(name, deferred_add_names[i])) { 10805 deferred_add_consumed |= (1 << i); 10806 return 1; 10807 } 10808 return 0; 10809 } 10810 10811 int is_deferred_skip(char *name) 10812 { 10813 int i; 10814 10815 for (i = 0; i < deferred_skip_index; ++i) 10816 if (!strcmp(name, deferred_skip_names[i])) { 10817 deferred_skip_consumed |= (1 << i); 10818 return 1; 10819 } 10820 return 0; 10821 } 10822 10823 void verify_deferred_consumed(void) 10824 { 10825 int i; 10826 int fail = 0; 10827 10828 for (i = 0; i < deferred_add_index; ++i) { 10829 if (!(deferred_add_consumed & (1 << i))) { 10830 warnx("Counter '%s' can not be added.", deferred_add_names[i]); 10831 fail++; 10832 } 10833 } 10834 for (i = 0; i < deferred_skip_index; ++i) { 10835 if (!(deferred_skip_consumed & (1 << i))) { 10836 warnx("Counter '%s' can not be skipped.", deferred_skip_names[i]); 10837 fail++; 10838 } 10839 } 10840 if (fail) 10841 exit(-EINVAL); 10842 } 10843 10844 void probe_cpuidle_residency(void) 10845 { 10846 char path[64]; 10847 char name_buf[16]; 10848 FILE *input; 10849 int state; 10850 int min_state = 1024, max_state = 0; 10851 char *sp; 10852 10853 for (state = 10; state >= 0; --state) { 10854 10855 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); 10856 input = fopen(path, "r"); 10857 if (input == NULL) 10858 continue; 10859 if (!fgets(name_buf, sizeof(name_buf), input)) 10860 err(1, "%s: failed to read file", path); 10861 10862 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ 10863 sp = strchr(name_buf, '-'); 10864 if (!sp) 10865 sp = strchrnul(name_buf, '\n'); 10866 *sp = '%'; 10867 *(sp + 1) = '\0'; 10868 10869 remove_underbar(name_buf); 10870 10871 fclose(input); 10872 10873 sprintf(path, "cpuidle/state%d/time", state); 10874 10875 if (!DO_BIC(BIC_pct_idle) && !is_deferred_add(name_buf)) 10876 continue; 10877 10878 if (is_deferred_skip(name_buf)) 10879 continue; 10880 10881 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU, 0); 10882 10883 if (state > max_state) 10884 max_state = state; 10885 if (state < min_state) 10886 min_state = state; 10887 } 10888 } 10889 10890 void probe_cpuidle_counts(void) 10891 { 10892 char path[64]; 10893 char name_buf[16]; 10894 FILE *input; 10895 int state; 10896 int min_state = 1024, max_state = 0; 10897 char *sp; 10898 10899 if (!DO_BIC(BIC_cpuidle)) 10900 return; 10901 10902 for (state = 10; state >= 0; --state) { 10903 10904 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); 10905 input = fopen(path, "r"); 10906 if (input == NULL) 10907 continue; 10908 if (!fgets(name_buf, sizeof(name_buf), input)) 10909 err(1, "%s: failed to read file", path); 10910 fclose(input); 10911 10912 remove_underbar(name_buf); 10913 10914 if (!DO_BIC(BIC_cpuidle) && !is_deferred_add(name_buf)) 10915 continue; 10916 10917 if (is_deferred_skip(name_buf)) 10918 continue; 10919 10920 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ 10921 sp = strchr(name_buf, '-'); 10922 if (!sp) 10923 sp = strchrnul(name_buf, '\n'); 10924 10925 /* 10926 * The 'below' sysfs file always contains 0 for the deepest state (largest index), 10927 * do not add it. 10928 */ 10929 if (state != max_state) { 10930 /* 10931 * Add 'C1+' for C1, and so on. The 'below' sysfs file always contains 0 for 10932 * the last state, so do not add it. 10933 */ 10934 10935 *sp = '+'; 10936 *(sp + 1) = '\0'; 10937 sprintf(path, "cpuidle/state%d/below", state); 10938 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0); 10939 } 10940 10941 *sp = '\0'; 10942 sprintf(path, "cpuidle/state%d/usage", state); 10943 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0); 10944 10945 /* 10946 * The 'above' sysfs file always contains 0 for the shallowest state (smallest 10947 * index), do not add it. 10948 */ 10949 if (state != min_state) { 10950 *sp = '-'; 10951 *(sp + 1) = '\0'; 10952 sprintf(path, "cpuidle/state%d/above", state); 10953 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0); 10954 } 10955 } 10956 } 10957 10958 /* 10959 * parse cpuset with following syntax 10960 * 1,2,4..6,8-10 and set bits in cpu_subset 10961 */ 10962 void parse_cpu_command(char *optarg) 10963 { 10964 if (!strcmp(optarg, "core")) { 10965 if (cpu_subset) 10966 goto error; 10967 show_core_only++; 10968 return; 10969 } 10970 if (!strcmp(optarg, "package")) { 10971 if (cpu_subset) 10972 goto error; 10973 show_pkg_only++; 10974 return; 10975 } 10976 if (show_core_only || show_pkg_only) 10977 goto error; 10978 10979 cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS); 10980 if (cpu_subset == NULL) 10981 err(3, "CPU_ALLOC"); 10982 cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS); 10983 10984 CPU_ZERO_S(cpu_subset_size, cpu_subset); 10985 10986 if (parse_cpu_str(optarg, cpu_subset, cpu_subset_size)) 10987 goto error; 10988 10989 return; 10990 10991 error: 10992 fprintf(stderr, "\"--cpu %s\" malformed\n", optarg); 10993 help(); 10994 exit(-1); 10995 } 10996 10997 void cmdline(int argc, char **argv) 10998 { 10999 int opt; 11000 int option_index = 0; 11001 static struct option long_options[] = { 11002 { "add", required_argument, 0, 'a' }, 11003 { "cpu", required_argument, 0, 'c' }, 11004 { "Dump", no_argument, 0, 'D' }, 11005 { "debug", no_argument, 0, 'd' }, /* internal, not documented */ 11006 { "enable", required_argument, 0, 'e' }, 11007 { "force", no_argument, 0, 'f' }, 11008 { "interval", required_argument, 0, 'i' }, 11009 { "IPC", no_argument, 0, 'I' }, 11010 { "num_iterations", required_argument, 0, 'n' }, 11011 { "header_iterations", required_argument, 0, 'N' }, 11012 { "help", no_argument, 0, 'h' }, 11013 { "hide", required_argument, 0, 'H' }, // meh, -h taken by --help 11014 { "Joules", no_argument, 0, 'J' }, 11015 { "list", no_argument, 0, 'l' }, 11016 { "out", required_argument, 0, 'o' }, 11017 { "quiet", no_argument, 0, 'q' }, 11018 { "no-msr", no_argument, 0, 'M' }, 11019 { "no-perf", no_argument, 0, 'P' }, 11020 { "show", required_argument, 0, 's' }, 11021 { "Summary", no_argument, 0, 'S' }, 11022 { "TCC", required_argument, 0, 'T' }, 11023 { "version", no_argument, 0, 'v' }, 11024 { 0, 0, 0, 0 } 11025 }; 11026 11027 progname = argv[0]; 11028 11029 /* 11030 * Parse some options early, because they may make other options invalid, 11031 * like adding the MSR counter with --add and at the same time using --no-msr. 11032 */ 11033 while ((opt = getopt_long_only(argc, argv, "+MPn:", long_options, &option_index)) != -1) { 11034 switch (opt) { 11035 case 'M': 11036 no_msr = 1; 11037 break; 11038 case 'P': 11039 no_perf = 1; 11040 break; 11041 default: 11042 break; 11043 } 11044 } 11045 optind = 0; 11046 11047 while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qMST:v", long_options, &option_index)) != -1) { 11048 switch (opt) { 11049 case 'a': 11050 parse_add_command(optarg); 11051 break; 11052 case 'c': 11053 parse_cpu_command(optarg); 11054 break; 11055 case 'D': 11056 dump_only++; 11057 /* 11058 * Force the no_perf early to prevent using it as a source. 11059 * User asks for raw values, but perf returns them relative 11060 * to the opening of the file descriptor. 11061 */ 11062 no_perf = 1; 11063 break; 11064 case 'e': 11065 /* --enable specified counter, without clearning existing list */ 11066 bic_lookup(&bic_enabled, optarg, SHOW_LIST); 11067 break; 11068 case 'f': 11069 force_load++; 11070 break; 11071 case 'd': 11072 debug++; 11073 bic_set_all(&bic_enabled); 11074 break; 11075 case 'H': 11076 /* 11077 * --hide: do not show those specified 11078 * multiple invocations simply clear more bits in enabled mask 11079 */ 11080 { 11081 cpu_set_t bic_group_hide; 11082 11083 BIC_INIT(&bic_group_hide); 11084 11085 bic_lookup(&bic_group_hide, optarg, HIDE_LIST); 11086 bic_clear_bits(&bic_enabled, &bic_group_hide); 11087 } 11088 break; 11089 case 'h': 11090 default: 11091 help(); 11092 exit(1); 11093 case 'i': 11094 { 11095 double interval = strtod(optarg, NULL); 11096 11097 if (interval < 0.001) { 11098 fprintf(outf, "interval %f seconds is too small\n", interval); 11099 exit(2); 11100 } 11101 11102 interval_tv.tv_sec = interval_ts.tv_sec = interval; 11103 interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000; 11104 interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000; 11105 } 11106 break; 11107 case 'J': 11108 rapl_joules++; 11109 break; 11110 case 'l': 11111 bic_set_all(&bic_enabled); 11112 list_header_only++; 11113 quiet++; 11114 break; 11115 case 'o': 11116 outf = fopen_or_die(optarg, "w"); 11117 break; 11118 case 'q': 11119 quiet = 1; 11120 break; 11121 case 'M': 11122 case 'P': 11123 /* Parsed earlier */ 11124 break; 11125 case 'n': 11126 num_iterations = strtod(optarg, NULL); 11127 11128 if (num_iterations <= 0) { 11129 fprintf(outf, "iterations %d should be positive number\n", num_iterations); 11130 exit(2); 11131 } 11132 break; 11133 case 'N': 11134 header_iterations = strtod(optarg, NULL); 11135 11136 if (header_iterations <= 0) { 11137 fprintf(outf, "iterations %d should be positive number\n", header_iterations); 11138 exit(2); 11139 } 11140 break; 11141 case 's': 11142 /* 11143 * --show: show only those specified 11144 * The 1st invocation will clear and replace the enabled mask 11145 * subsequent invocations can add to it. 11146 */ 11147 if (shown == 0) 11148 BIC_INIT(&bic_enabled); 11149 bic_lookup(&bic_enabled, optarg, SHOW_LIST); 11150 shown = 1; 11151 break; 11152 case 'S': 11153 summary_only++; 11154 break; 11155 case 'T': 11156 tj_max_override = atoi(optarg); 11157 break; 11158 case 'v': 11159 print_version(); 11160 exit(0); 11161 break; 11162 } 11163 } 11164 } 11165 11166 void set_rlimit(void) 11167 { 11168 struct rlimit limit; 11169 11170 if (getrlimit(RLIMIT_NOFILE, &limit) < 0) 11171 err(1, "Failed to get rlimit"); 11172 11173 if (limit.rlim_max < MAX_NOFILE) 11174 limit.rlim_max = MAX_NOFILE; 11175 if (limit.rlim_cur < MAX_NOFILE) 11176 limit.rlim_cur = MAX_NOFILE; 11177 11178 if (setrlimit(RLIMIT_NOFILE, &limit) < 0) 11179 err(1, "Failed to set rlimit"); 11180 } 11181 11182 int main(int argc, char **argv) 11183 { 11184 int fd, ret; 11185 11186 bic_groups_init(); 11187 11188 fd = open("/sys/fs/cgroup/cgroup.procs", O_WRONLY); 11189 if (fd < 0) 11190 goto skip_cgroup_setting; 11191 11192 ret = write(fd, "0\n", 2); 11193 if (ret == -1) 11194 perror("Can't update cgroup\n"); 11195 11196 close(fd); 11197 11198 skip_cgroup_setting: 11199 outf = stderr; 11200 cmdline(argc, argv); 11201 11202 if (!quiet) { 11203 print_version(); 11204 print_bootcmd(); 11205 } 11206 11207 probe_cpuidle_residency(); 11208 probe_cpuidle_counts(); 11209 11210 verify_deferred_consumed(); 11211 11212 if (!getuid()) 11213 set_rlimit(); 11214 11215 turbostat_init(); 11216 11217 if (!no_msr) 11218 msr_sum_record(); 11219 11220 /* dump counters and exit */ 11221 if (dump_only) 11222 return get_and_dump_counters(); 11223 11224 /* list header and exit */ 11225 if (list_header_only) { 11226 print_header(","); 11227 flush_output_stdout(); 11228 return 0; 11229 } 11230 11231 /* 11232 * if any params left, it must be a command to fork 11233 */ 11234 if (argc - optind) 11235 return fork_it(argv + optind); 11236 else 11237 turbostat_loop(); 11238 11239 return 0; 11240 } 11241