1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * turbostat -- show CPU frequency and C-state residency 4 * on modern Intel and AMD processors. 5 * 6 * Copyright (c) 2024 Intel Corporation. 7 * Len Brown <len.brown@intel.com> 8 */ 9 10 #define _GNU_SOURCE 11 #include MSRHEADER 12 #include INTEL_FAMILY_HEADER 13 #include <stdarg.h> 14 #include <stdio.h> 15 #include <err.h> 16 #include <unistd.h> 17 #include <sys/types.h> 18 #include <sys/wait.h> 19 #include <sys/stat.h> 20 #include <sys/select.h> 21 #include <sys/resource.h> 22 #include <fcntl.h> 23 #include <signal.h> 24 #include <sys/time.h> 25 #include <stdlib.h> 26 #include <getopt.h> 27 #include <dirent.h> 28 #include <string.h> 29 #include <ctype.h> 30 #include <sched.h> 31 #include <time.h> 32 #include <cpuid.h> 33 #include <sys/capability.h> 34 #include <errno.h> 35 #include <math.h> 36 #include <linux/perf_event.h> 37 #include <asm/unistd.h> 38 #include <stdbool.h> 39 #include <assert.h> 40 #include <linux/kernel.h> 41 42 #define UNUSED(x) (void)(x) 43 44 /* 45 * This list matches the column headers, except 46 * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time 47 * 2. Core and CPU are moved to the end, we can't have strings that contain them 48 * matching on them for --show and --hide. 49 */ 50 51 /* 52 * buffer size used by sscanf() for added column names 53 * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters 54 */ 55 #define NAME_BYTES 20 56 #define PATH_BYTES 128 57 58 #define MAX_NOFILE 0x8000 59 60 enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE }; 61 enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC }; 62 enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT }; 63 enum amperf_source { AMPERF_SOURCE_PERF, AMPERF_SOURCE_MSR }; 64 enum rapl_source { RAPL_SOURCE_NONE, RAPL_SOURCE_PERF, RAPL_SOURCE_MSR }; 65 66 struct msr_counter { 67 unsigned int msr_num; 68 char name[NAME_BYTES]; 69 char path[PATH_BYTES]; 70 unsigned int width; 71 enum counter_type type; 72 enum counter_format format; 73 struct msr_counter *next; 74 unsigned int flags; 75 #define FLAGS_HIDE (1 << 0) 76 #define FLAGS_SHOW (1 << 1) 77 #define SYSFS_PERCPU (1 << 1) 78 }; 79 80 struct msr_counter bic[] = { 81 { 0x0, "usec", "", 0, 0, 0, NULL, 0 }, 82 { 0x0, "Time_Of_Day_Seconds", "", 0, 0, 0, NULL, 0 }, 83 { 0x0, "Package", "", 0, 0, 0, NULL, 0 }, 84 { 0x0, "Node", "", 0, 0, 0, NULL, 0 }, 85 { 0x0, "Avg_MHz", "", 0, 0, 0, NULL, 0 }, 86 { 0x0, "Busy%", "", 0, 0, 0, NULL, 0 }, 87 { 0x0, "Bzy_MHz", "", 0, 0, 0, NULL, 0 }, 88 { 0x0, "TSC_MHz", "", 0, 0, 0, NULL, 0 }, 89 { 0x0, "IRQ", "", 0, 0, 0, NULL, 0 }, 90 { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL, 0 }, 91 { 0x0, "sysfs", "", 0, 0, 0, NULL, 0 }, 92 { 0x0, "CPU%c1", "", 0, 0, 0, NULL, 0 }, 93 { 0x0, "CPU%c3", "", 0, 0, 0, NULL, 0 }, 94 { 0x0, "CPU%c6", "", 0, 0, 0, NULL, 0 }, 95 { 0x0, "CPU%c7", "", 0, 0, 0, NULL, 0 }, 96 { 0x0, "ThreadC", "", 0, 0, 0, NULL, 0 }, 97 { 0x0, "CoreTmp", "", 0, 0, 0, NULL, 0 }, 98 { 0x0, "CoreCnt", "", 0, 0, 0, NULL, 0 }, 99 { 0x0, "PkgTmp", "", 0, 0, 0, NULL, 0 }, 100 { 0x0, "GFX%rc6", "", 0, 0, 0, NULL, 0 }, 101 { 0x0, "GFXMHz", "", 0, 0, 0, NULL, 0 }, 102 { 0x0, "Pkg%pc2", "", 0, 0, 0, NULL, 0 }, 103 { 0x0, "Pkg%pc3", "", 0, 0, 0, NULL, 0 }, 104 { 0x0, "Pkg%pc6", "", 0, 0, 0, NULL, 0 }, 105 { 0x0, "Pkg%pc7", "", 0, 0, 0, NULL, 0 }, 106 { 0x0, "Pkg%pc8", "", 0, 0, 0, NULL, 0 }, 107 { 0x0, "Pkg%pc9", "", 0, 0, 0, NULL, 0 }, 108 { 0x0, "Pk%pc10", "", 0, 0, 0, NULL, 0 }, 109 { 0x0, "CPU%LPI", "", 0, 0, 0, NULL, 0 }, 110 { 0x0, "SYS%LPI", "", 0, 0, 0, NULL, 0 }, 111 { 0x0, "PkgWatt", "", 0, 0, 0, NULL, 0 }, 112 { 0x0, "CorWatt", "", 0, 0, 0, NULL, 0 }, 113 { 0x0, "GFXWatt", "", 0, 0, 0, NULL, 0 }, 114 { 0x0, "PkgCnt", "", 0, 0, 0, NULL, 0 }, 115 { 0x0, "RAMWatt", "", 0, 0, 0, NULL, 0 }, 116 { 0x0, "PKG_%", "", 0, 0, 0, NULL, 0 }, 117 { 0x0, "RAM_%", "", 0, 0, 0, NULL, 0 }, 118 { 0x0, "Pkg_J", "", 0, 0, 0, NULL, 0 }, 119 { 0x0, "Cor_J", "", 0, 0, 0, NULL, 0 }, 120 { 0x0, "GFX_J", "", 0, 0, 0, NULL, 0 }, 121 { 0x0, "RAM_J", "", 0, 0, 0, NULL, 0 }, 122 { 0x0, "Mod%c6", "", 0, 0, 0, NULL, 0 }, 123 { 0x0, "Totl%C0", "", 0, 0, 0, NULL, 0 }, 124 { 0x0, "Any%C0", "", 0, 0, 0, NULL, 0 }, 125 { 0x0, "GFX%C0", "", 0, 0, 0, NULL, 0 }, 126 { 0x0, "CPUGFX%", "", 0, 0, 0, NULL, 0 }, 127 { 0x0, "Core", "", 0, 0, 0, NULL, 0 }, 128 { 0x0, "CPU", "", 0, 0, 0, NULL, 0 }, 129 { 0x0, "APIC", "", 0, 0, 0, NULL, 0 }, 130 { 0x0, "X2APIC", "", 0, 0, 0, NULL, 0 }, 131 { 0x0, "Die", "", 0, 0, 0, NULL, 0 }, 132 { 0x0, "GFXAMHz", "", 0, 0, 0, NULL, 0 }, 133 { 0x0, "IPC", "", 0, 0, 0, NULL, 0 }, 134 { 0x0, "CoreThr", "", 0, 0, 0, NULL, 0 }, 135 { 0x0, "UncMHz", "", 0, 0, 0, NULL, 0 }, 136 { 0x0, "SAM%mc6", "", 0, 0, 0, NULL, 0 }, 137 { 0x0, "SAMMHz", "", 0, 0, 0, NULL, 0 }, 138 { 0x0, "SAMAMHz", "", 0, 0, 0, NULL, 0 }, 139 }; 140 141 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) 142 #define BIC_USEC (1ULL << 0) 143 #define BIC_TOD (1ULL << 1) 144 #define BIC_Package (1ULL << 2) 145 #define BIC_Node (1ULL << 3) 146 #define BIC_Avg_MHz (1ULL << 4) 147 #define BIC_Busy (1ULL << 5) 148 #define BIC_Bzy_MHz (1ULL << 6) 149 #define BIC_TSC_MHz (1ULL << 7) 150 #define BIC_IRQ (1ULL << 8) 151 #define BIC_SMI (1ULL << 9) 152 #define BIC_sysfs (1ULL << 10) 153 #define BIC_CPU_c1 (1ULL << 11) 154 #define BIC_CPU_c3 (1ULL << 12) 155 #define BIC_CPU_c6 (1ULL << 13) 156 #define BIC_CPU_c7 (1ULL << 14) 157 #define BIC_ThreadC (1ULL << 15) 158 #define BIC_CoreTmp (1ULL << 16) 159 #define BIC_CoreCnt (1ULL << 17) 160 #define BIC_PkgTmp (1ULL << 18) 161 #define BIC_GFX_rc6 (1ULL << 19) 162 #define BIC_GFXMHz (1ULL << 20) 163 #define BIC_Pkgpc2 (1ULL << 21) 164 #define BIC_Pkgpc3 (1ULL << 22) 165 #define BIC_Pkgpc6 (1ULL << 23) 166 #define BIC_Pkgpc7 (1ULL << 24) 167 #define BIC_Pkgpc8 (1ULL << 25) 168 #define BIC_Pkgpc9 (1ULL << 26) 169 #define BIC_Pkgpc10 (1ULL << 27) 170 #define BIC_CPU_LPI (1ULL << 28) 171 #define BIC_SYS_LPI (1ULL << 29) 172 #define BIC_PkgWatt (1ULL << 30) 173 #define BIC_CorWatt (1ULL << 31) 174 #define BIC_GFXWatt (1ULL << 32) 175 #define BIC_PkgCnt (1ULL << 33) 176 #define BIC_RAMWatt (1ULL << 34) 177 #define BIC_PKG__ (1ULL << 35) 178 #define BIC_RAM__ (1ULL << 36) 179 #define BIC_Pkg_J (1ULL << 37) 180 #define BIC_Cor_J (1ULL << 38) 181 #define BIC_GFX_J (1ULL << 39) 182 #define BIC_RAM_J (1ULL << 40) 183 #define BIC_Mod_c6 (1ULL << 41) 184 #define BIC_Totl_c0 (1ULL << 42) 185 #define BIC_Any_c0 (1ULL << 43) 186 #define BIC_GFX_c0 (1ULL << 44) 187 #define BIC_CPUGFX (1ULL << 45) 188 #define BIC_Core (1ULL << 46) 189 #define BIC_CPU (1ULL << 47) 190 #define BIC_APIC (1ULL << 48) 191 #define BIC_X2APIC (1ULL << 49) 192 #define BIC_Die (1ULL << 50) 193 #define BIC_GFXACTMHz (1ULL << 51) 194 #define BIC_IPC (1ULL << 52) 195 #define BIC_CORE_THROT_CNT (1ULL << 53) 196 #define BIC_UNCORE_MHZ (1ULL << 54) 197 #define BIC_SAM_mc6 (1ULL << 55) 198 #define BIC_SAMMHz (1ULL << 56) 199 #define BIC_SAMACTMHz (1ULL << 57) 200 201 #define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die ) 202 #define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__) 203 #define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ) 204 #define BIC_IDLE (BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6) 205 #define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) 206 207 #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) 208 209 unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT); 210 unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC; 211 212 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME) 213 #define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME) 214 #define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME) 215 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT) 216 #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT) 217 #define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT) 218 219 struct amperf_group_fd; 220 221 char *proc_stat = "/proc/stat"; 222 FILE *outf; 223 int *fd_percpu; 224 int *fd_instr_count_percpu; 225 struct amperf_group_fd *fd_amperf_percpu; /* File descriptors for perf group with APERF and MPERF counters. */ 226 struct timeval interval_tv = { 5, 0 }; 227 struct timespec interval_ts = { 5, 0 }; 228 229 unsigned int num_iterations; 230 unsigned int header_iterations; 231 unsigned int debug; 232 unsigned int quiet; 233 unsigned int shown; 234 unsigned int sums_need_wide_columns; 235 unsigned int rapl_joules; 236 unsigned int summary_only; 237 unsigned int list_header_only; 238 unsigned int dump_only; 239 unsigned int has_aperf; 240 unsigned int has_epb; 241 unsigned int has_turbo; 242 unsigned int is_hybrid; 243 unsigned int units = 1000000; /* MHz etc */ 244 unsigned int genuine_intel; 245 unsigned int authentic_amd; 246 unsigned int hygon_genuine; 247 unsigned int max_level, max_extended_level; 248 unsigned int has_invariant_tsc; 249 unsigned int aperf_mperf_multiplier = 1; 250 double bclk; 251 double base_hz; 252 unsigned int has_base_hz; 253 double tsc_tweak = 1.0; 254 unsigned int show_pkg_only; 255 unsigned int show_core_only; 256 char *output_buffer, *outp; 257 unsigned int do_dts; 258 unsigned int do_ptm; 259 unsigned int do_ipc; 260 unsigned long long cpuidle_cur_cpu_lpi_us; 261 unsigned long long cpuidle_cur_sys_lpi_us; 262 unsigned int tj_max; 263 unsigned int tj_max_override; 264 double rapl_power_units, rapl_time_units; 265 double rapl_dram_energy_units, rapl_energy_units; 266 double rapl_joule_counter_range; 267 unsigned int crystal_hz; 268 unsigned long long tsc_hz; 269 int base_cpu; 270 unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */ 271 /* IA32_HWP_REQUEST, IA32_HWP_STATUS */ 272 unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */ 273 unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */ 274 unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */ 275 unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */ 276 unsigned int first_counter_read = 1; 277 int ignore_stdin; 278 bool no_msr; 279 bool no_perf; 280 enum amperf_source amperf_source; 281 282 enum gfx_sysfs_idx { 283 GFX_rc6, 284 GFX_MHz, 285 GFX_ACTMHz, 286 SAM_mc6, 287 SAM_MHz, 288 SAM_ACTMHz, 289 GFX_MAX 290 }; 291 292 struct gfx_sysfs_info { 293 const char *path; 294 FILE *fp; 295 unsigned int val; 296 unsigned long long val_ull; 297 }; 298 299 static struct gfx_sysfs_info gfx_info[GFX_MAX]; 300 301 int get_msr(int cpu, off_t offset, unsigned long long *msr); 302 303 /* Model specific support Start */ 304 305 /* List of features that may diverge among different platforms */ 306 struct platform_features { 307 bool has_msr_misc_feature_control; /* MSR_MISC_FEATURE_CONTROL */ 308 bool has_msr_misc_pwr_mgmt; /* MSR_MISC_PWR_MGMT */ 309 bool has_nhm_msrs; /* MSR_PLATFORM_INFO, MSR_IA32_TEMPERATURE_TARGET, MSR_SMI_COUNT, MSR_PKG_CST_CONFIG_CONTROL, MSR_IA32_POWER_CTL, TRL MSRs */ 310 bool has_config_tdp; /* MSR_CONFIG_TDP_NOMINAL/LEVEL_1/LEVEL_2/CONTROL, MSR_TURBO_ACTIVATION_RATIO */ 311 int bclk_freq; /* CPU base clock */ 312 int crystal_freq; /* Crystal clock to use when not available from CPUID.15 */ 313 int supported_cstates; /* Core cstates and Package cstates supported */ 314 int cst_limit; /* MSR_PKG_CST_CONFIG_CONTROL */ 315 bool has_cst_auto_convension; /* AUTOMATIC_CSTATE_CONVERSION bit in MSR_PKG_CST_CONFIG_CONTROL */ 316 bool has_irtl_msrs; /* MSR_PKGC3/PKGC6/PKGC7/PKGC8/PKGC9/PKGC10_IRTL */ 317 bool has_msr_core_c1_res; /* MSR_CORE_C1_RES */ 318 bool has_msr_module_c6_res_ms; /* MSR_MODULE_C6_RES_MS */ 319 bool has_msr_c6_demotion_policy_config; /* MSR_CC6_DEMOTION_POLICY_CONFIG/MSR_MC6_DEMOTION_POLICY_CONFIG */ 320 bool has_msr_atom_pkg_c6_residency; /* MSR_ATOM_PKG_C6_RESIDENCY */ 321 bool has_msr_knl_core_c6_residency; /* MSR_KNL_CORE_C6_RESIDENCY */ 322 bool has_ext_cst_msrs; /* MSR_PKG_WEIGHTED_CORE_C0_RES/MSR_PKG_ANY_CORE_C0_RES/MSR_PKG_ANY_GFXE_C0_RES/MSR_PKG_BOTH_CORE_GFXE_C0_RES */ 323 bool has_cst_prewake_bit; /* Cstate prewake bit in MSR_IA32_POWER_CTL */ 324 int trl_msrs; /* MSR_TURBO_RATIO_LIMIT/LIMIT1/LIMIT2/SECONDARY, Atom TRL MSRs */ 325 int plr_msrs; /* MSR_CORE/GFX/RING_PERF_LIMIT_REASONS */ 326 int rapl_msrs; /* RAPL PKG/DRAM/CORE/GFX MSRs, AMD RAPL MSRs */ 327 bool has_per_core_rapl; /* Indicates cores energy collection is per-core, not per-package. AMD specific for now */ 328 bool has_rapl_divisor; /* Divisor for Energy unit raw value from MSR_RAPL_POWER_UNIT */ 329 bool has_fixed_rapl_unit; /* Fixed Energy Unit used for DRAM RAPL Domain */ 330 int rapl_quirk_tdp; /* Hardcoded TDP value when cannot be retrieved from hardware */ 331 int tcc_offset_bits; /* TCC Offset bits in MSR_IA32_TEMPERATURE_TARGET */ 332 bool enable_tsc_tweak; /* Use CPU Base freq instead of TSC freq for aperf/mperf counter */ 333 bool need_perf_multiplier; /* mperf/aperf multiplier */ 334 }; 335 336 struct platform_data { 337 unsigned int model; 338 const struct platform_features *features; 339 }; 340 341 /* For BCLK */ 342 enum bclk_freq { 343 BCLK_100MHZ = 1, 344 BCLK_133MHZ, 345 BCLK_SLV, 346 }; 347 348 #define SLM_BCLK_FREQS 5 349 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 }; 350 351 double slm_bclk(void) 352 { 353 unsigned long long msr = 3; 354 unsigned int i; 355 double freq; 356 357 if (get_msr(base_cpu, MSR_FSB_FREQ, &msr)) 358 fprintf(outf, "SLM BCLK: unknown\n"); 359 360 i = msr & 0xf; 361 if (i >= SLM_BCLK_FREQS) { 362 fprintf(outf, "SLM BCLK[%d] invalid\n", i); 363 i = 3; 364 } 365 freq = slm_freq_table[i]; 366 367 if (!quiet) 368 fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq); 369 370 return freq; 371 } 372 373 /* For Package cstate limit */ 374 enum package_cstate_limit { 375 CST_LIMIT_NHM = 1, 376 CST_LIMIT_SNB, 377 CST_LIMIT_HSW, 378 CST_LIMIT_SKX, 379 CST_LIMIT_ICX, 380 CST_LIMIT_SLV, 381 CST_LIMIT_AMT, 382 CST_LIMIT_KNL, 383 CST_LIMIT_GMT, 384 }; 385 386 /* For Turbo Ratio Limit MSRs */ 387 enum turbo_ratio_limit_msrs { 388 TRL_BASE = BIT(0), 389 TRL_LIMIT1 = BIT(1), 390 TRL_LIMIT2 = BIT(2), 391 TRL_ATOM = BIT(3), 392 TRL_KNL = BIT(4), 393 TRL_CORECOUNT = BIT(5), 394 }; 395 396 /* For Perf Limit Reason MSRs */ 397 enum perf_limit_reason_msrs { 398 PLR_CORE = BIT(0), 399 PLR_GFX = BIT(1), 400 PLR_RING = BIT(2), 401 }; 402 403 /* For RAPL MSRs */ 404 enum rapl_msrs { 405 RAPL_PKG_POWER_LIMIT = BIT(0), /* 0x610 MSR_PKG_POWER_LIMIT */ 406 RAPL_PKG_ENERGY_STATUS = BIT(1), /* 0x611 MSR_PKG_ENERGY_STATUS */ 407 RAPL_PKG_PERF_STATUS = BIT(2), /* 0x613 MSR_PKG_PERF_STATUS */ 408 RAPL_PKG_POWER_INFO = BIT(3), /* 0x614 MSR_PKG_POWER_INFO */ 409 RAPL_DRAM_POWER_LIMIT = BIT(4), /* 0x618 MSR_DRAM_POWER_LIMIT */ 410 RAPL_DRAM_ENERGY_STATUS = BIT(5), /* 0x619 MSR_DRAM_ENERGY_STATUS */ 411 RAPL_DRAM_PERF_STATUS = BIT(6), /* 0x61b MSR_DRAM_PERF_STATUS */ 412 RAPL_DRAM_POWER_INFO = BIT(7), /* 0x61c MSR_DRAM_POWER_INFO */ 413 RAPL_CORE_POWER_LIMIT = BIT(8), /* 0x638 MSR_PP0_POWER_LIMIT */ 414 RAPL_CORE_ENERGY_STATUS = BIT(9), /* 0x639 MSR_PP0_ENERGY_STATUS */ 415 RAPL_CORE_POLICY = BIT(10), /* 0x63a MSR_PP0_POLICY */ 416 RAPL_GFX_POWER_LIMIT = BIT(11), /* 0x640 MSR_PP1_POWER_LIMIT */ 417 RAPL_GFX_ENERGY_STATUS = BIT(12), /* 0x641 MSR_PP1_ENERGY_STATUS */ 418 RAPL_GFX_POLICY = BIT(13), /* 0x642 MSR_PP1_POLICY */ 419 RAPL_AMD_PWR_UNIT = BIT(14), /* 0xc0010299 MSR_AMD_RAPL_POWER_UNIT */ 420 RAPL_AMD_CORE_ENERGY_STAT = BIT(15), /* 0xc001029a MSR_AMD_CORE_ENERGY_STATUS */ 421 RAPL_AMD_PKG_ENERGY_STAT = BIT(16), /* 0xc001029b MSR_AMD_PKG_ENERGY_STATUS */ 422 }; 423 424 #define RAPL_PKG (RAPL_PKG_ENERGY_STATUS | RAPL_PKG_POWER_LIMIT) 425 #define RAPL_DRAM (RAPL_DRAM_ENERGY_STATUS | RAPL_DRAM_POWER_LIMIT) 426 #define RAPL_CORE (RAPL_CORE_ENERGY_STATUS | RAPL_CORE_POWER_LIMIT) 427 #define RAPL_GFX (RAPL_GFX_POWER_LIMIT | RAPL_GFX_ENERGY_STATUS) 428 429 #define RAPL_PKG_ALL (RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO) 430 #define RAPL_DRAM_ALL (RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_DRAM_POWER_INFO) 431 #define RAPL_CORE_ALL (RAPL_CORE | RAPL_CORE_POLICY) 432 #define RAPL_GFX_ALL (RAPL_GFX | RAPL_GFX_POLIGY) 433 434 #define RAPL_AMD_F17H (RAPL_AMD_PWR_UNIT | RAPL_AMD_CORE_ENERGY_STAT | RAPL_AMD_PKG_ENERGY_STAT) 435 436 /* For Cstates */ 437 enum cstates { 438 CC1 = BIT(0), 439 CC3 = BIT(1), 440 CC6 = BIT(2), 441 CC7 = BIT(3), 442 PC2 = BIT(4), 443 PC3 = BIT(5), 444 PC6 = BIT(6), 445 PC7 = BIT(7), 446 PC8 = BIT(8), 447 PC9 = BIT(9), 448 PC10 = BIT(10), 449 }; 450 451 static const struct platform_features nhm_features = { 452 .has_msr_misc_pwr_mgmt = 1, 453 .has_nhm_msrs = 1, 454 .bclk_freq = BCLK_133MHZ, 455 .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6, 456 .cst_limit = CST_LIMIT_NHM, 457 .trl_msrs = TRL_BASE, 458 }; 459 460 static const struct platform_features nhx_features = { 461 .has_msr_misc_pwr_mgmt = 1, 462 .has_nhm_msrs = 1, 463 .bclk_freq = BCLK_133MHZ, 464 .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6, 465 .cst_limit = CST_LIMIT_NHM, 466 }; 467 468 static const struct platform_features snb_features = { 469 .has_msr_misc_feature_control = 1, 470 .has_msr_misc_pwr_mgmt = 1, 471 .has_nhm_msrs = 1, 472 .bclk_freq = BCLK_100MHZ, 473 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 474 .cst_limit = CST_LIMIT_SNB, 475 .has_irtl_msrs = 1, 476 .trl_msrs = TRL_BASE, 477 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 478 }; 479 480 static const struct platform_features snx_features = { 481 .has_msr_misc_feature_control = 1, 482 .has_msr_misc_pwr_mgmt = 1, 483 .has_nhm_msrs = 1, 484 .bclk_freq = BCLK_100MHZ, 485 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 486 .cst_limit = CST_LIMIT_SNB, 487 .has_irtl_msrs = 1, 488 .trl_msrs = TRL_BASE, 489 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL, 490 }; 491 492 static const struct platform_features ivb_features = { 493 .has_msr_misc_feature_control = 1, 494 .has_msr_misc_pwr_mgmt = 1, 495 .has_nhm_msrs = 1, 496 .has_config_tdp = 1, 497 .bclk_freq = BCLK_100MHZ, 498 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 499 .cst_limit = CST_LIMIT_SNB, 500 .has_irtl_msrs = 1, 501 .trl_msrs = TRL_BASE, 502 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 503 }; 504 505 static const struct platform_features ivx_features = { 506 .has_msr_misc_feature_control = 1, 507 .has_msr_misc_pwr_mgmt = 1, 508 .has_nhm_msrs = 1, 509 .bclk_freq = BCLK_100MHZ, 510 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 511 .cst_limit = CST_LIMIT_SNB, 512 .has_irtl_msrs = 1, 513 .trl_msrs = TRL_BASE | TRL_LIMIT1, 514 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL, 515 }; 516 517 static const struct platform_features hsw_features = { 518 .has_msr_misc_feature_control = 1, 519 .has_msr_misc_pwr_mgmt = 1, 520 .has_nhm_msrs = 1, 521 .has_config_tdp = 1, 522 .bclk_freq = BCLK_100MHZ, 523 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 524 .cst_limit = CST_LIMIT_HSW, 525 .has_irtl_msrs = 1, 526 .trl_msrs = TRL_BASE, 527 .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, 528 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 529 }; 530 531 static const struct platform_features hsx_features = { 532 .has_msr_misc_feature_control = 1, 533 .has_msr_misc_pwr_mgmt = 1, 534 .has_nhm_msrs = 1, 535 .has_config_tdp = 1, 536 .bclk_freq = BCLK_100MHZ, 537 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 538 .cst_limit = CST_LIMIT_HSW, 539 .has_irtl_msrs = 1, 540 .trl_msrs = TRL_BASE | TRL_LIMIT1 | TRL_LIMIT2, 541 .plr_msrs = PLR_CORE | PLR_RING, 542 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 543 .has_fixed_rapl_unit = 1, 544 }; 545 546 static const struct platform_features hswl_features = { 547 .has_msr_misc_feature_control = 1, 548 .has_msr_misc_pwr_mgmt = 1, 549 .has_nhm_msrs = 1, 550 .has_config_tdp = 1, 551 .bclk_freq = BCLK_100MHZ, 552 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 553 .cst_limit = CST_LIMIT_HSW, 554 .has_irtl_msrs = 1, 555 .trl_msrs = TRL_BASE, 556 .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, 557 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 558 }; 559 560 static const struct platform_features hswg_features = { 561 .has_msr_misc_feature_control = 1, 562 .has_msr_misc_pwr_mgmt = 1, 563 .has_nhm_msrs = 1, 564 .has_config_tdp = 1, 565 .bclk_freq = BCLK_100MHZ, 566 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 567 .cst_limit = CST_LIMIT_HSW, 568 .has_irtl_msrs = 1, 569 .trl_msrs = TRL_BASE, 570 .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, 571 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 572 }; 573 574 static const struct platform_features bdw_features = { 575 .has_msr_misc_feature_control = 1, 576 .has_msr_misc_pwr_mgmt = 1, 577 .has_nhm_msrs = 1, 578 .has_config_tdp = 1, 579 .bclk_freq = BCLK_100MHZ, 580 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 581 .cst_limit = CST_LIMIT_HSW, 582 .has_irtl_msrs = 1, 583 .trl_msrs = TRL_BASE, 584 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 585 }; 586 587 static const struct platform_features bdwg_features = { 588 .has_msr_misc_feature_control = 1, 589 .has_msr_misc_pwr_mgmt = 1, 590 .has_nhm_msrs = 1, 591 .has_config_tdp = 1, 592 .bclk_freq = BCLK_100MHZ, 593 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 594 .cst_limit = CST_LIMIT_HSW, 595 .has_irtl_msrs = 1, 596 .trl_msrs = TRL_BASE, 597 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 598 }; 599 600 static const struct platform_features bdx_features = { 601 .has_msr_misc_feature_control = 1, 602 .has_msr_misc_pwr_mgmt = 1, 603 .has_nhm_msrs = 1, 604 .has_config_tdp = 1, 605 .bclk_freq = BCLK_100MHZ, 606 .supported_cstates = CC1 | CC3 | CC6 | PC2 | PC3 | PC6, 607 .cst_limit = CST_LIMIT_HSW, 608 .has_irtl_msrs = 1, 609 .has_cst_auto_convension = 1, 610 .trl_msrs = TRL_BASE, 611 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 612 .has_fixed_rapl_unit = 1, 613 }; 614 615 static const struct platform_features skl_features = { 616 .has_msr_misc_feature_control = 1, 617 .has_msr_misc_pwr_mgmt = 1, 618 .has_nhm_msrs = 1, 619 .has_config_tdp = 1, 620 .bclk_freq = BCLK_100MHZ, 621 .crystal_freq = 24000000, 622 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 623 .cst_limit = CST_LIMIT_HSW, 624 .has_irtl_msrs = 1, 625 .has_ext_cst_msrs = 1, 626 .trl_msrs = TRL_BASE, 627 .tcc_offset_bits = 6, 628 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, 629 .enable_tsc_tweak = 1, 630 }; 631 632 static const struct platform_features cnl_features = { 633 .has_msr_misc_feature_control = 1, 634 .has_msr_misc_pwr_mgmt = 1, 635 .has_nhm_msrs = 1, 636 .has_config_tdp = 1, 637 .bclk_freq = BCLK_100MHZ, 638 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 639 .cst_limit = CST_LIMIT_HSW, 640 .has_irtl_msrs = 1, 641 .has_msr_core_c1_res = 1, 642 .has_ext_cst_msrs = 1, 643 .trl_msrs = TRL_BASE, 644 .tcc_offset_bits = 6, 645 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, 646 .enable_tsc_tweak = 1, 647 }; 648 649 static const struct platform_features adl_features = { 650 .has_msr_misc_feature_control = 1, 651 .has_msr_misc_pwr_mgmt = 1, 652 .has_nhm_msrs = 1, 653 .has_config_tdp = 1, 654 .bclk_freq = BCLK_100MHZ, 655 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC8 | PC10, 656 .cst_limit = CST_LIMIT_HSW, 657 .has_irtl_msrs = 1, 658 .has_msr_core_c1_res = 1, 659 .has_ext_cst_msrs = 1, 660 .trl_msrs = TRL_BASE, 661 .tcc_offset_bits = 6, 662 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, 663 .enable_tsc_tweak = 1, 664 }; 665 666 static const struct platform_features skx_features = { 667 .has_msr_misc_feature_control = 1, 668 .has_msr_misc_pwr_mgmt = 1, 669 .has_nhm_msrs = 1, 670 .has_config_tdp = 1, 671 .bclk_freq = BCLK_100MHZ, 672 .supported_cstates = CC1 | CC6 | PC2 | PC6, 673 .cst_limit = CST_LIMIT_SKX, 674 .has_irtl_msrs = 1, 675 .has_cst_auto_convension = 1, 676 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 677 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 678 .has_fixed_rapl_unit = 1, 679 }; 680 681 static const struct platform_features icx_features = { 682 .has_msr_misc_feature_control = 1, 683 .has_msr_misc_pwr_mgmt = 1, 684 .has_nhm_msrs = 1, 685 .has_config_tdp = 1, 686 .bclk_freq = BCLK_100MHZ, 687 .supported_cstates = CC1 | CC6 | PC2 | PC6, 688 .cst_limit = CST_LIMIT_ICX, 689 .has_msr_core_c1_res = 1, 690 .has_irtl_msrs = 1, 691 .has_cst_prewake_bit = 1, 692 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 693 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 694 .has_fixed_rapl_unit = 1, 695 }; 696 697 static const struct platform_features spr_features = { 698 .has_msr_misc_feature_control = 1, 699 .has_msr_misc_pwr_mgmt = 1, 700 .has_nhm_msrs = 1, 701 .has_config_tdp = 1, 702 .bclk_freq = BCLK_100MHZ, 703 .supported_cstates = CC1 | CC6 | PC2 | PC6, 704 .cst_limit = CST_LIMIT_SKX, 705 .has_msr_core_c1_res = 1, 706 .has_irtl_msrs = 1, 707 .has_cst_prewake_bit = 1, 708 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 709 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 710 }; 711 712 static const struct platform_features srf_features = { 713 .has_msr_misc_feature_control = 1, 714 .has_msr_misc_pwr_mgmt = 1, 715 .has_nhm_msrs = 1, 716 .has_config_tdp = 1, 717 .bclk_freq = BCLK_100MHZ, 718 .supported_cstates = CC1 | CC6 | PC2 | PC6, 719 .cst_limit = CST_LIMIT_SKX, 720 .has_msr_core_c1_res = 1, 721 .has_msr_module_c6_res_ms = 1, 722 .has_irtl_msrs = 1, 723 .has_cst_prewake_bit = 1, 724 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 725 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 726 }; 727 728 static const struct platform_features grr_features = { 729 .has_msr_misc_feature_control = 1, 730 .has_msr_misc_pwr_mgmt = 1, 731 .has_nhm_msrs = 1, 732 .has_config_tdp = 1, 733 .bclk_freq = BCLK_100MHZ, 734 .supported_cstates = CC1 | CC6, 735 .cst_limit = CST_LIMIT_SKX, 736 .has_msr_core_c1_res = 1, 737 .has_msr_module_c6_res_ms = 1, 738 .has_irtl_msrs = 1, 739 .has_cst_prewake_bit = 1, 740 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 741 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 742 }; 743 744 static const struct platform_features slv_features = { 745 .has_nhm_msrs = 1, 746 .bclk_freq = BCLK_SLV, 747 .supported_cstates = CC1 | CC6 | PC6, 748 .cst_limit = CST_LIMIT_SLV, 749 .has_msr_core_c1_res = 1, 750 .has_msr_module_c6_res_ms = 1, 751 .has_msr_c6_demotion_policy_config = 1, 752 .has_msr_atom_pkg_c6_residency = 1, 753 .trl_msrs = TRL_ATOM, 754 .rapl_msrs = RAPL_PKG | RAPL_CORE, 755 .has_rapl_divisor = 1, 756 .rapl_quirk_tdp = 30, 757 }; 758 759 static const struct platform_features slvd_features = { 760 .has_msr_misc_pwr_mgmt = 1, 761 .has_nhm_msrs = 1, 762 .bclk_freq = BCLK_SLV, 763 .supported_cstates = CC1 | CC6 | PC3 | PC6, 764 .cst_limit = CST_LIMIT_SLV, 765 .has_msr_atom_pkg_c6_residency = 1, 766 .trl_msrs = TRL_BASE, 767 .rapl_msrs = RAPL_PKG | RAPL_CORE, 768 .rapl_quirk_tdp = 30, 769 }; 770 771 static const struct platform_features amt_features = { 772 .has_nhm_msrs = 1, 773 .bclk_freq = BCLK_133MHZ, 774 .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6, 775 .cst_limit = CST_LIMIT_AMT, 776 .trl_msrs = TRL_BASE, 777 }; 778 779 static const struct platform_features gmt_features = { 780 .has_msr_misc_pwr_mgmt = 1, 781 .has_nhm_msrs = 1, 782 .bclk_freq = BCLK_100MHZ, 783 .crystal_freq = 19200000, 784 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 785 .cst_limit = CST_LIMIT_GMT, 786 .has_irtl_msrs = 1, 787 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 788 .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO, 789 }; 790 791 static const struct platform_features gmtd_features = { 792 .has_msr_misc_pwr_mgmt = 1, 793 .has_nhm_msrs = 1, 794 .bclk_freq = BCLK_100MHZ, 795 .crystal_freq = 25000000, 796 .supported_cstates = CC1 | CC6 | PC2 | PC6, 797 .cst_limit = CST_LIMIT_GMT, 798 .has_irtl_msrs = 1, 799 .has_msr_core_c1_res = 1, 800 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 801 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_CORE_ENERGY_STATUS, 802 }; 803 804 static const struct platform_features gmtp_features = { 805 .has_msr_misc_pwr_mgmt = 1, 806 .has_nhm_msrs = 1, 807 .bclk_freq = BCLK_100MHZ, 808 .crystal_freq = 19200000, 809 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 810 .cst_limit = CST_LIMIT_GMT, 811 .has_irtl_msrs = 1, 812 .trl_msrs = TRL_BASE, 813 .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO, 814 }; 815 816 static const struct platform_features tmt_features = { 817 .has_msr_misc_pwr_mgmt = 1, 818 .has_nhm_msrs = 1, 819 .bclk_freq = BCLK_100MHZ, 820 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 821 .cst_limit = CST_LIMIT_GMT, 822 .has_irtl_msrs = 1, 823 .trl_msrs = TRL_BASE, 824 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, 825 .enable_tsc_tweak = 1, 826 }; 827 828 static const struct platform_features tmtd_features = { 829 .has_msr_misc_pwr_mgmt = 1, 830 .has_nhm_msrs = 1, 831 .bclk_freq = BCLK_100MHZ, 832 .supported_cstates = CC1 | CC6, 833 .cst_limit = CST_LIMIT_GMT, 834 .has_irtl_msrs = 1, 835 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 836 .rapl_msrs = RAPL_PKG_ALL, 837 }; 838 839 static const struct platform_features knl_features = { 840 .has_msr_misc_pwr_mgmt = 1, 841 .has_nhm_msrs = 1, 842 .has_config_tdp = 1, 843 .bclk_freq = BCLK_100MHZ, 844 .supported_cstates = CC1 | CC6 | PC3 | PC6, 845 .cst_limit = CST_LIMIT_KNL, 846 .has_msr_knl_core_c6_residency = 1, 847 .trl_msrs = TRL_KNL, 848 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 849 .has_fixed_rapl_unit = 1, 850 .need_perf_multiplier = 1, 851 }; 852 853 static const struct platform_features default_features = { 854 }; 855 856 static const struct platform_features amd_features_with_rapl = { 857 .rapl_msrs = RAPL_AMD_F17H, 858 .has_per_core_rapl = 1, 859 .rapl_quirk_tdp = 280, /* This is the max stock TDP of HEDT/Server Fam17h+ chips */ 860 }; 861 862 static const struct platform_data turbostat_pdata[] = { 863 { INTEL_FAM6_NEHALEM, &nhm_features }, 864 { INTEL_FAM6_NEHALEM_G, &nhm_features }, 865 { INTEL_FAM6_NEHALEM_EP, &nhm_features }, 866 { INTEL_FAM6_NEHALEM_EX, &nhx_features }, 867 { INTEL_FAM6_WESTMERE, &nhm_features }, 868 { INTEL_FAM6_WESTMERE_EP, &nhm_features }, 869 { INTEL_FAM6_WESTMERE_EX, &nhx_features }, 870 { INTEL_FAM6_SANDYBRIDGE, &snb_features }, 871 { INTEL_FAM6_SANDYBRIDGE_X, &snx_features }, 872 { INTEL_FAM6_IVYBRIDGE, &ivb_features }, 873 { INTEL_FAM6_IVYBRIDGE_X, &ivx_features }, 874 { INTEL_FAM6_HASWELL, &hsw_features }, 875 { INTEL_FAM6_HASWELL_X, &hsx_features }, 876 { INTEL_FAM6_HASWELL_L, &hswl_features }, 877 { INTEL_FAM6_HASWELL_G, &hswg_features }, 878 { INTEL_FAM6_BROADWELL, &bdw_features }, 879 { INTEL_FAM6_BROADWELL_G, &bdwg_features }, 880 { INTEL_FAM6_BROADWELL_X, &bdx_features }, 881 { INTEL_FAM6_BROADWELL_D, &bdx_features }, 882 { INTEL_FAM6_SKYLAKE_L, &skl_features }, 883 { INTEL_FAM6_SKYLAKE, &skl_features }, 884 { INTEL_FAM6_SKYLAKE_X, &skx_features }, 885 { INTEL_FAM6_KABYLAKE_L, &skl_features }, 886 { INTEL_FAM6_KABYLAKE, &skl_features }, 887 { INTEL_FAM6_COMETLAKE, &skl_features }, 888 { INTEL_FAM6_COMETLAKE_L, &skl_features }, 889 { INTEL_FAM6_CANNONLAKE_L, &cnl_features }, 890 { INTEL_FAM6_ICELAKE_X, &icx_features }, 891 { INTEL_FAM6_ICELAKE_D, &icx_features }, 892 { INTEL_FAM6_ICELAKE_L, &cnl_features }, 893 { INTEL_FAM6_ICELAKE_NNPI, &cnl_features }, 894 { INTEL_FAM6_ROCKETLAKE, &cnl_features }, 895 { INTEL_FAM6_TIGERLAKE_L, &cnl_features }, 896 { INTEL_FAM6_TIGERLAKE, &cnl_features }, 897 { INTEL_FAM6_SAPPHIRERAPIDS_X, &spr_features }, 898 { INTEL_FAM6_EMERALDRAPIDS_X, &spr_features }, 899 { INTEL_FAM6_GRANITERAPIDS_X, &spr_features }, 900 { INTEL_FAM6_LAKEFIELD, &cnl_features }, 901 { INTEL_FAM6_ALDERLAKE, &adl_features }, 902 { INTEL_FAM6_ALDERLAKE_L, &adl_features }, 903 { INTEL_FAM6_RAPTORLAKE, &adl_features }, 904 { INTEL_FAM6_RAPTORLAKE_P, &adl_features }, 905 { INTEL_FAM6_RAPTORLAKE_S, &adl_features }, 906 { INTEL_FAM6_METEORLAKE, &cnl_features }, 907 { INTEL_FAM6_METEORLAKE_L, &cnl_features }, 908 { INTEL_FAM6_ARROWLAKE, &cnl_features }, 909 { INTEL_FAM6_LUNARLAKE_M, &cnl_features }, 910 { INTEL_FAM6_ATOM_SILVERMONT, &slv_features }, 911 { INTEL_FAM6_ATOM_SILVERMONT_D, &slvd_features }, 912 { INTEL_FAM6_ATOM_AIRMONT, &amt_features }, 913 { INTEL_FAM6_ATOM_GOLDMONT, &gmt_features }, 914 { INTEL_FAM6_ATOM_GOLDMONT_D, &gmtd_features }, 915 { INTEL_FAM6_ATOM_GOLDMONT_PLUS, &gmtp_features }, 916 { INTEL_FAM6_ATOM_TREMONT_D, &tmtd_features }, 917 { INTEL_FAM6_ATOM_TREMONT, &tmt_features }, 918 { INTEL_FAM6_ATOM_TREMONT_L, &tmt_features }, 919 { INTEL_FAM6_ATOM_GRACEMONT, &adl_features }, 920 { INTEL_FAM6_ATOM_CRESTMONT_X, &srf_features }, 921 { INTEL_FAM6_ATOM_CRESTMONT, &grr_features }, 922 { INTEL_FAM6_XEON_PHI_KNL, &knl_features }, 923 { INTEL_FAM6_XEON_PHI_KNM, &knl_features }, 924 /* 925 * Missing support for 926 * INTEL_FAM6_ICELAKE 927 * INTEL_FAM6_ATOM_SILVERMONT_MID 928 * INTEL_FAM6_ATOM_AIRMONT_MID 929 * INTEL_FAM6_ATOM_AIRMONT_NP 930 */ 931 { 0, NULL }, 932 }; 933 934 static const struct platform_features *platform; 935 936 void probe_platform_features(unsigned int family, unsigned int model) 937 { 938 int i; 939 940 platform = &default_features; 941 942 if (authentic_amd || hygon_genuine) { 943 if (max_extended_level >= 0x80000007) { 944 unsigned int eax, ebx, ecx, edx; 945 946 __cpuid(0x80000007, eax, ebx, ecx, edx); 947 /* RAPL (Fam 17h+) */ 948 if ((edx & (1 << 14)) && family >= 0x17) 949 platform = &amd_features_with_rapl; 950 } 951 return; 952 } 953 954 if (!genuine_intel || family != 6) 955 return; 956 957 for (i = 0; turbostat_pdata[i].features; i++) { 958 if (turbostat_pdata[i].model == model) { 959 platform = turbostat_pdata[i].features; 960 return; 961 } 962 } 963 } 964 965 /* Model specific support End */ 966 967 #define TJMAX_DEFAULT 100 968 969 /* MSRs that are not yet in the kernel-provided header. */ 970 #define MSR_RAPL_PWR_UNIT 0xc0010299 971 #define MSR_CORE_ENERGY_STAT 0xc001029a 972 #define MSR_PKG_ENERGY_STAT 0xc001029b 973 974 #define MAX(a, b) ((a) > (b) ? (a) : (b)) 975 976 int backwards_count; 977 char *progname; 978 979 #define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */ 980 cpu_set_t *cpu_present_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset; 981 size_t cpu_present_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size; 982 #define MAX_ADDED_COUNTERS 8 983 #define MAX_ADDED_THREAD_COUNTERS 24 984 #define BITMASK_SIZE 32 985 986 /* Indexes used to map data read from perf and MSRs into global variables */ 987 enum rapl_rci_index { 988 RAPL_RCI_INDEX_ENERGY_PKG = 0, 989 RAPL_RCI_INDEX_ENERGY_CORES = 1, 990 RAPL_RCI_INDEX_DRAM = 2, 991 RAPL_RCI_INDEX_GFX = 3, 992 RAPL_RCI_INDEX_PKG_PERF_STATUS = 4, 993 RAPL_RCI_INDEX_DRAM_PERF_STATUS = 5, 994 RAPL_RCI_INDEX_CORE_ENERGY = 6, 995 NUM_RAPL_COUNTERS, 996 }; 997 998 enum rapl_unit { 999 RAPL_UNIT_INVALID, 1000 RAPL_UNIT_JOULES, 1001 RAPL_UNIT_WATTS, 1002 }; 1003 1004 struct rapl_counter_info_t { 1005 unsigned long long data[NUM_RAPL_COUNTERS]; 1006 enum rapl_source source[NUM_RAPL_COUNTERS]; 1007 unsigned long long flags[NUM_RAPL_COUNTERS]; 1008 double scale[NUM_RAPL_COUNTERS]; 1009 enum rapl_unit unit[NUM_RAPL_COUNTERS]; 1010 1011 union { 1012 /* Active when source == RAPL_SOURCE_MSR */ 1013 struct { 1014 unsigned long long msr[NUM_RAPL_COUNTERS]; 1015 unsigned long long msr_mask[NUM_RAPL_COUNTERS]; 1016 int msr_shift[NUM_RAPL_COUNTERS]; 1017 }; 1018 }; 1019 1020 int fd_perf; 1021 }; 1022 1023 /* struct rapl_counter_info_t for each RAPL domain */ 1024 struct rapl_counter_info_t *rapl_counter_info_perdomain; 1025 1026 #define RAPL_COUNTER_FLAG_USE_MSR_SUM (1u << 1) 1027 1028 struct rapl_counter_arch_info { 1029 int feature_mask; /* Mask for testing if the counter is supported on host */ 1030 const char *perf_subsys; 1031 const char *perf_name; 1032 unsigned long long msr; 1033 unsigned long long msr_mask; 1034 int msr_shift; /* Positive mean shift right, negative mean shift left */ 1035 double *platform_rapl_msr_scale; /* Scale applied to values read by MSR (platform dependent, filled at runtime) */ 1036 unsigned int rci_index; /* Maps data from perf counters to global variables */ 1037 unsigned long long bic; 1038 double compat_scale; /* Some counters require constant scaling to be in the same range as other, similar ones */ 1039 unsigned long long flags; 1040 }; 1041 1042 static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { 1043 { 1044 .feature_mask = RAPL_PKG, 1045 .perf_subsys = "power", 1046 .perf_name = "energy-pkg", 1047 .msr = MSR_PKG_ENERGY_STATUS, 1048 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1049 .msr_shift = 0, 1050 .platform_rapl_msr_scale = &rapl_energy_units, 1051 .rci_index = RAPL_RCI_INDEX_ENERGY_PKG, 1052 .bic = BIC_PkgWatt | BIC_Pkg_J, 1053 .compat_scale = 1.0, 1054 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1055 }, 1056 { 1057 .feature_mask = RAPL_AMD_F17H, 1058 .perf_subsys = "power", 1059 .perf_name = "energy-pkg", 1060 .msr = MSR_PKG_ENERGY_STAT, 1061 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1062 .msr_shift = 0, 1063 .platform_rapl_msr_scale = &rapl_energy_units, 1064 .rci_index = RAPL_RCI_INDEX_ENERGY_PKG, 1065 .bic = BIC_PkgWatt | BIC_Pkg_J, 1066 .compat_scale = 1.0, 1067 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1068 }, 1069 { 1070 .feature_mask = RAPL_CORE_ENERGY_STATUS, 1071 .perf_subsys = "power", 1072 .perf_name = "energy-cores", 1073 .msr = MSR_PP0_ENERGY_STATUS, 1074 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1075 .msr_shift = 0, 1076 .platform_rapl_msr_scale = &rapl_energy_units, 1077 .rci_index = RAPL_RCI_INDEX_ENERGY_CORES, 1078 .bic = BIC_CorWatt | BIC_Cor_J, 1079 .compat_scale = 1.0, 1080 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1081 }, 1082 { 1083 .feature_mask = RAPL_DRAM, 1084 .perf_subsys = "power", 1085 .perf_name = "energy-ram", 1086 .msr = MSR_DRAM_ENERGY_STATUS, 1087 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1088 .msr_shift = 0, 1089 .platform_rapl_msr_scale = &rapl_dram_energy_units, 1090 .rci_index = RAPL_RCI_INDEX_DRAM, 1091 .bic = BIC_RAMWatt | BIC_RAM_J, 1092 .compat_scale = 1.0, 1093 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1094 }, 1095 { 1096 .feature_mask = RAPL_GFX, 1097 .perf_subsys = "power", 1098 .perf_name = "energy-gpu", 1099 .msr = MSR_PP1_ENERGY_STATUS, 1100 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1101 .msr_shift = 0, 1102 .platform_rapl_msr_scale = &rapl_energy_units, 1103 .rci_index = RAPL_RCI_INDEX_GFX, 1104 .bic = BIC_GFXWatt | BIC_GFX_J, 1105 .compat_scale = 1.0, 1106 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1107 }, 1108 { 1109 .feature_mask = RAPL_PKG_PERF_STATUS, 1110 .perf_subsys = NULL, 1111 .perf_name = NULL, 1112 .msr = MSR_PKG_PERF_STATUS, 1113 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1114 .msr_shift = 0, 1115 .platform_rapl_msr_scale = &rapl_time_units, 1116 .rci_index = RAPL_RCI_INDEX_PKG_PERF_STATUS, 1117 .bic = BIC_PKG__, 1118 .compat_scale = 100.0, 1119 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1120 }, 1121 { 1122 .feature_mask = RAPL_DRAM_PERF_STATUS, 1123 .perf_subsys = NULL, 1124 .perf_name = NULL, 1125 .msr = MSR_DRAM_PERF_STATUS, 1126 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1127 .msr_shift = 0, 1128 .platform_rapl_msr_scale = &rapl_time_units, 1129 .rci_index = RAPL_RCI_INDEX_DRAM_PERF_STATUS, 1130 .bic = BIC_RAM__, 1131 .compat_scale = 100.0, 1132 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1133 }, 1134 { 1135 .feature_mask = RAPL_AMD_F17H, 1136 .perf_subsys = NULL, 1137 .perf_name = NULL, 1138 .msr = MSR_CORE_ENERGY_STAT, 1139 .msr_mask = 0xFFFFFFFF, 1140 .msr_shift = 0, 1141 .platform_rapl_msr_scale = &rapl_energy_units, 1142 .rci_index = RAPL_RCI_INDEX_CORE_ENERGY, 1143 .bic = BIC_CorWatt | BIC_Cor_J, 1144 .compat_scale = 1.0, 1145 .flags = 0, 1146 }, 1147 }; 1148 1149 struct rapl_counter { 1150 unsigned long long raw_value; 1151 enum rapl_unit unit; 1152 double scale; 1153 }; 1154 1155 struct thread_data { 1156 struct timeval tv_begin; 1157 struct timeval tv_end; 1158 struct timeval tv_delta; 1159 unsigned long long tsc; 1160 unsigned long long aperf; 1161 unsigned long long mperf; 1162 unsigned long long c1; 1163 unsigned long long instr_count; 1164 unsigned long long irq_count; 1165 unsigned int smi_count; 1166 unsigned int cpu_id; 1167 unsigned int apic_id; 1168 unsigned int x2apic_id; 1169 unsigned int flags; 1170 bool is_atom; 1171 unsigned long long counter[MAX_ADDED_THREAD_COUNTERS]; 1172 } *thread_even, *thread_odd; 1173 1174 struct core_data { 1175 int base_cpu; 1176 unsigned long long c3; 1177 unsigned long long c6; 1178 unsigned long long c7; 1179 unsigned long long mc6_us; /* duplicate as per-core for now, even though per module */ 1180 unsigned int core_temp_c; 1181 struct rapl_counter core_energy; /* MSR_CORE_ENERGY_STAT */ 1182 unsigned int core_id; 1183 unsigned long long core_throt_cnt; 1184 unsigned long long counter[MAX_ADDED_COUNTERS]; 1185 } *core_even, *core_odd; 1186 1187 struct pkg_data { 1188 int base_cpu; 1189 unsigned long long pc2; 1190 unsigned long long pc3; 1191 unsigned long long pc6; 1192 unsigned long long pc7; 1193 unsigned long long pc8; 1194 unsigned long long pc9; 1195 unsigned long long pc10; 1196 long long cpu_lpi; 1197 long long sys_lpi; 1198 unsigned long long pkg_wtd_core_c0; 1199 unsigned long long pkg_any_core_c0; 1200 unsigned long long pkg_any_gfxe_c0; 1201 unsigned long long pkg_both_core_gfxe_c0; 1202 long long gfx_rc6_ms; 1203 unsigned int gfx_mhz; 1204 unsigned int gfx_act_mhz; 1205 long long sam_mc6_ms; 1206 unsigned int sam_mhz; 1207 unsigned int sam_act_mhz; 1208 unsigned int package_id; 1209 struct rapl_counter energy_pkg; /* MSR_PKG_ENERGY_STATUS */ 1210 struct rapl_counter energy_dram; /* MSR_DRAM_ENERGY_STATUS */ 1211 struct rapl_counter energy_cores; /* MSR_PP0_ENERGY_STATUS */ 1212 struct rapl_counter energy_gfx; /* MSR_PP1_ENERGY_STATUS */ 1213 struct rapl_counter rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */ 1214 struct rapl_counter rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */ 1215 unsigned int pkg_temp_c; 1216 unsigned int uncore_mhz; 1217 unsigned long long counter[MAX_ADDED_COUNTERS]; 1218 } *package_even, *package_odd; 1219 1220 #define ODD_COUNTERS thread_odd, core_odd, package_odd 1221 #define EVEN_COUNTERS thread_even, core_even, package_even 1222 1223 #define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no) \ 1224 ((thread_base) + \ 1225 ((pkg_no) * \ 1226 topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \ 1227 ((node_no) * topo.cores_per_node * topo.threads_per_core) + \ 1228 ((core_no) * topo.threads_per_core) + \ 1229 (thread_no)) 1230 1231 #define GET_CORE(core_base, core_no, node_no, pkg_no) \ 1232 ((core_base) + \ 1233 ((pkg_no) * topo.nodes_per_pkg * topo.cores_per_node) + \ 1234 ((node_no) * topo.cores_per_node) + \ 1235 (core_no)) 1236 1237 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) 1238 1239 /* 1240 * The accumulated sum of MSR is defined as a monotonic 1241 * increasing MSR, it will be accumulated periodically, 1242 * despite its register's bit width. 1243 */ 1244 enum { 1245 IDX_PKG_ENERGY, 1246 IDX_DRAM_ENERGY, 1247 IDX_PP0_ENERGY, 1248 IDX_PP1_ENERGY, 1249 IDX_PKG_PERF, 1250 IDX_DRAM_PERF, 1251 IDX_COUNT, 1252 }; 1253 1254 int get_msr_sum(int cpu, off_t offset, unsigned long long *msr); 1255 1256 struct msr_sum_array { 1257 /* get_msr_sum() = sum + (get_msr() - last) */ 1258 struct { 1259 /*The accumulated MSR value is updated by the timer */ 1260 unsigned long long sum; 1261 /*The MSR footprint recorded in last timer */ 1262 unsigned long long last; 1263 } entries[IDX_COUNT]; 1264 }; 1265 1266 /* The percpu MSR sum array.*/ 1267 struct msr_sum_array *per_cpu_msr_sum; 1268 1269 off_t idx_to_offset(int idx) 1270 { 1271 off_t offset; 1272 1273 switch (idx) { 1274 case IDX_PKG_ENERGY: 1275 if (platform->rapl_msrs & RAPL_AMD_F17H) 1276 offset = MSR_PKG_ENERGY_STAT; 1277 else 1278 offset = MSR_PKG_ENERGY_STATUS; 1279 break; 1280 case IDX_DRAM_ENERGY: 1281 offset = MSR_DRAM_ENERGY_STATUS; 1282 break; 1283 case IDX_PP0_ENERGY: 1284 offset = MSR_PP0_ENERGY_STATUS; 1285 break; 1286 case IDX_PP1_ENERGY: 1287 offset = MSR_PP1_ENERGY_STATUS; 1288 break; 1289 case IDX_PKG_PERF: 1290 offset = MSR_PKG_PERF_STATUS; 1291 break; 1292 case IDX_DRAM_PERF: 1293 offset = MSR_DRAM_PERF_STATUS; 1294 break; 1295 default: 1296 offset = -1; 1297 } 1298 return offset; 1299 } 1300 1301 int offset_to_idx(off_t offset) 1302 { 1303 int idx; 1304 1305 switch (offset) { 1306 case MSR_PKG_ENERGY_STATUS: 1307 case MSR_PKG_ENERGY_STAT: 1308 idx = IDX_PKG_ENERGY; 1309 break; 1310 case MSR_DRAM_ENERGY_STATUS: 1311 idx = IDX_DRAM_ENERGY; 1312 break; 1313 case MSR_PP0_ENERGY_STATUS: 1314 idx = IDX_PP0_ENERGY; 1315 break; 1316 case MSR_PP1_ENERGY_STATUS: 1317 idx = IDX_PP1_ENERGY; 1318 break; 1319 case MSR_PKG_PERF_STATUS: 1320 idx = IDX_PKG_PERF; 1321 break; 1322 case MSR_DRAM_PERF_STATUS: 1323 idx = IDX_DRAM_PERF; 1324 break; 1325 default: 1326 idx = -1; 1327 } 1328 return idx; 1329 } 1330 1331 int idx_valid(int idx) 1332 { 1333 switch (idx) { 1334 case IDX_PKG_ENERGY: 1335 return platform->rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H); 1336 case IDX_DRAM_ENERGY: 1337 return platform->rapl_msrs & RAPL_DRAM; 1338 case IDX_PP0_ENERGY: 1339 return platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS; 1340 case IDX_PP1_ENERGY: 1341 return platform->rapl_msrs & RAPL_GFX; 1342 case IDX_PKG_PERF: 1343 return platform->rapl_msrs & RAPL_PKG_PERF_STATUS; 1344 case IDX_DRAM_PERF: 1345 return platform->rapl_msrs & RAPL_DRAM_PERF_STATUS; 1346 default: 1347 return 0; 1348 } 1349 } 1350 1351 struct sys_counters { 1352 unsigned int added_thread_counters; 1353 unsigned int added_core_counters; 1354 unsigned int added_package_counters; 1355 struct msr_counter *tp; 1356 struct msr_counter *cp; 1357 struct msr_counter *pp; 1358 } sys; 1359 1360 void free_sys_counters(void) 1361 { 1362 struct msr_counter *p = sys.tp, *pnext = NULL; 1363 1364 while (p) { 1365 pnext = p->next; 1366 free(p); 1367 p = pnext; 1368 } 1369 1370 p = sys.cp, pnext = NULL; 1371 while (p) { 1372 pnext = p->next; 1373 free(p); 1374 p = pnext; 1375 } 1376 1377 p = sys.pp, pnext = NULL; 1378 while (p) { 1379 pnext = p->next; 1380 free(p); 1381 p = pnext; 1382 } 1383 1384 sys.added_thread_counters = 0; 1385 sys.added_core_counters = 0; 1386 sys.added_package_counters = 0; 1387 sys.tp = NULL; 1388 sys.cp = NULL; 1389 sys.pp = NULL; 1390 } 1391 1392 struct system_summary { 1393 struct thread_data threads; 1394 struct core_data cores; 1395 struct pkg_data packages; 1396 } average; 1397 1398 struct cpu_topology { 1399 int physical_package_id; 1400 int die_id; 1401 int logical_cpu_id; 1402 int physical_node_id; 1403 int logical_node_id; /* 0-based count within the package */ 1404 int physical_core_id; 1405 int thread_id; 1406 cpu_set_t *put_ids; /* Processing Unit/Thread IDs */ 1407 } *cpus; 1408 1409 struct topo_params { 1410 int num_packages; 1411 int num_die; 1412 int num_cpus; 1413 int num_cores; 1414 int allowed_packages; 1415 int allowed_cpus; 1416 int allowed_cores; 1417 int max_cpu_num; 1418 int max_node_num; 1419 int nodes_per_pkg; 1420 int cores_per_node; 1421 int threads_per_core; 1422 } topo; 1423 1424 struct timeval tv_even, tv_odd, tv_delta; 1425 1426 int *irq_column_2_cpu; /* /proc/interrupts column numbers */ 1427 int *irqs_per_cpu; /* indexed by cpu_num */ 1428 1429 void setup_all_buffers(bool startup); 1430 1431 char *sys_lpi_file; 1432 char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us"; 1433 char *sys_lpi_file_debugfs = "/sys/kernel/debug/pmc_core/slp_s0_residency_usec"; 1434 1435 int cpu_is_not_present(int cpu) 1436 { 1437 return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set); 1438 } 1439 1440 int cpu_is_not_allowed(int cpu) 1441 { 1442 return !CPU_ISSET_S(cpu, cpu_allowed_setsize, cpu_allowed_set); 1443 } 1444 1445 /* 1446 * run func(thread, core, package) in topology order 1447 * skip non-present cpus 1448 */ 1449 1450 int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pkg_data *), 1451 struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base) 1452 { 1453 int retval, pkg_no, core_no, thread_no, node_no; 1454 1455 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { 1456 for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) { 1457 for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { 1458 for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) { 1459 struct thread_data *t; 1460 struct core_data *c; 1461 struct pkg_data *p; 1462 t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no); 1463 1464 if (cpu_is_not_allowed(t->cpu_id)) 1465 continue; 1466 1467 c = GET_CORE(core_base, core_no, node_no, pkg_no); 1468 p = GET_PKG(pkg_base, pkg_no); 1469 1470 retval = func(t, c, p); 1471 if (retval) 1472 return retval; 1473 } 1474 } 1475 } 1476 } 1477 return 0; 1478 } 1479 1480 int is_cpu_first_thread_in_core(struct thread_data *t, struct core_data *c, struct pkg_data *p) 1481 { 1482 UNUSED(p); 1483 1484 return ((int)t->cpu_id == c->base_cpu || c->base_cpu < 0); 1485 } 1486 1487 int is_cpu_first_core_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p) 1488 { 1489 UNUSED(c); 1490 1491 return ((int)t->cpu_id == p->base_cpu || p->base_cpu < 0); 1492 } 1493 1494 int is_cpu_first_thread_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p) 1495 { 1496 return is_cpu_first_thread_in_core(t, c, p) && is_cpu_first_core_in_package(t, c, p); 1497 } 1498 1499 int cpu_migrate(int cpu) 1500 { 1501 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); 1502 CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set); 1503 if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1) 1504 return -1; 1505 else 1506 return 0; 1507 } 1508 1509 int get_msr_fd(int cpu) 1510 { 1511 char pathname[32]; 1512 int fd; 1513 1514 fd = fd_percpu[cpu]; 1515 1516 if (fd) 1517 return fd; 1518 1519 sprintf(pathname, "/dev/cpu/%d/msr", cpu); 1520 fd = open(pathname, O_RDONLY); 1521 if (fd < 0) 1522 err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, " 1523 "or run with --no-msr, or run as root", pathname); 1524 1525 fd_percpu[cpu] = fd; 1526 1527 return fd; 1528 } 1529 1530 static void bic_disable_msr_access(void) 1531 { 1532 const unsigned long bic_msrs = 1533 BIC_SMI | 1534 BIC_CPU_c1 | 1535 BIC_CPU_c3 | 1536 BIC_CPU_c6 | 1537 BIC_CPU_c7 | 1538 BIC_Mod_c6 | 1539 BIC_CoreTmp | 1540 BIC_Totl_c0 | 1541 BIC_Any_c0 | 1542 BIC_GFX_c0 | 1543 BIC_CPUGFX | 1544 BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_PkgTmp; 1545 1546 bic_enabled &= ~bic_msrs; 1547 1548 free_sys_counters(); 1549 } 1550 1551 static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) 1552 { 1553 assert(!no_perf); 1554 1555 return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags); 1556 } 1557 1558 static long open_perf_counter(int cpu, unsigned int type, unsigned int config, int group_fd, __u64 read_format) 1559 { 1560 struct perf_event_attr attr; 1561 const pid_t pid = -1; 1562 const unsigned long flags = 0; 1563 1564 assert(!no_perf); 1565 1566 memset(&attr, 0, sizeof(struct perf_event_attr)); 1567 1568 attr.type = type; 1569 attr.size = sizeof(struct perf_event_attr); 1570 attr.config = config; 1571 attr.disabled = 0; 1572 attr.sample_type = PERF_SAMPLE_IDENTIFIER; 1573 attr.read_format = read_format; 1574 1575 const int fd = perf_event_open(&attr, pid, cpu, group_fd, flags); 1576 1577 return fd; 1578 } 1579 1580 int get_instr_count_fd(int cpu) 1581 { 1582 if (fd_instr_count_percpu[cpu]) 1583 return fd_instr_count_percpu[cpu]; 1584 1585 fd_instr_count_percpu[cpu] = open_perf_counter(cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0); 1586 1587 return fd_instr_count_percpu[cpu]; 1588 } 1589 1590 int get_msr(int cpu, off_t offset, unsigned long long *msr) 1591 { 1592 ssize_t retval; 1593 1594 assert(!no_msr); 1595 1596 retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset); 1597 1598 if (retval != sizeof *msr) 1599 err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset); 1600 1601 return 0; 1602 } 1603 1604 int probe_msr(int cpu, off_t offset) 1605 { 1606 ssize_t retval; 1607 unsigned long long dummy; 1608 1609 assert(!no_msr); 1610 1611 retval = pread(get_msr_fd(cpu), &dummy, sizeof(dummy), offset); 1612 1613 if (retval != sizeof(dummy)) 1614 return 1; 1615 1616 return 0; 1617 } 1618 1619 #define MAX_DEFERRED 16 1620 char *deferred_add_names[MAX_DEFERRED]; 1621 char *deferred_skip_names[MAX_DEFERRED]; 1622 int deferred_add_index; 1623 int deferred_skip_index; 1624 1625 /* 1626 * HIDE_LIST - hide this list of counters, show the rest [default] 1627 * SHOW_LIST - show this list of counters, hide the rest 1628 */ 1629 enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST; 1630 1631 void help(void) 1632 { 1633 fprintf(outf, 1634 "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n" 1635 "\n" 1636 "Turbostat forks the specified COMMAND and prints statistics\n" 1637 "when COMMAND completes.\n" 1638 "If no COMMAND is specified, turbostat wakes every 5-seconds\n" 1639 "to print statistics, until interrupted.\n" 1640 " -a, --add add a counter\n" 1641 " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n" 1642 " -c, --cpu cpu-set limit output to summary plus cpu-set:\n" 1643 " {core | package | j,k,l..m,n-p }\n" 1644 " -d, --debug displays usec, Time_Of_Day_Seconds and more debugging\n" 1645 " -D, --Dump displays the raw counter values\n" 1646 " -e, --enable [all | column]\n" 1647 " shows all or the specified disabled column\n" 1648 " -H, --hide [column|column,column,...]\n" 1649 " hide the specified column(s)\n" 1650 " -i, --interval sec.subsec\n" 1651 " Override default 5-second measurement interval\n" 1652 " -J, --Joules displays energy in Joules instead of Watts\n" 1653 " -l, --list list column headers only\n" 1654 " -M, --no-msr Disable all uses of the MSR driver\n" 1655 " -P, --no-perf Disable all uses of the perf API\n" 1656 " -n, --num_iterations num\n" 1657 " number of the measurement iterations\n" 1658 " -N, --header_iterations num\n" 1659 " print header every num iterations\n" 1660 " -o, --out file\n" 1661 " create or truncate \"file\" for all output\n" 1662 " -q, --quiet skip decoding system configuration header\n" 1663 " -s, --show [column|column,column,...]\n" 1664 " show only the specified column(s)\n" 1665 " -S, --Summary\n" 1666 " limits output to 1-line system summary per interval\n" 1667 " -T, --TCC temperature\n" 1668 " sets the Thermal Control Circuit temperature in\n" 1669 " degrees Celsius\n" 1670 " -h, --help print this help message\n" 1671 " -v, --version print version information\n" "\n" "For more help, run \"man turbostat\"\n"); 1672 } 1673 1674 /* 1675 * bic_lookup 1676 * for all the strings in comma separate name_list, 1677 * set the approprate bit in return value. 1678 */ 1679 unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) 1680 { 1681 unsigned int i; 1682 unsigned long long retval = 0; 1683 1684 while (name_list) { 1685 char *comma; 1686 1687 comma = strchr(name_list, ','); 1688 1689 if (comma) 1690 *comma = '\0'; 1691 1692 for (i = 0; i < MAX_BIC; ++i) { 1693 if (!strcmp(name_list, bic[i].name)) { 1694 retval |= (1ULL << i); 1695 break; 1696 } 1697 if (!strcmp(name_list, "all")) { 1698 retval |= ~0; 1699 break; 1700 } else if (!strcmp(name_list, "topology")) { 1701 retval |= BIC_TOPOLOGY; 1702 break; 1703 } else if (!strcmp(name_list, "power")) { 1704 retval |= BIC_THERMAL_PWR; 1705 break; 1706 } else if (!strcmp(name_list, "idle")) { 1707 retval |= BIC_IDLE; 1708 break; 1709 } else if (!strcmp(name_list, "frequency")) { 1710 retval |= BIC_FREQUENCY; 1711 break; 1712 } else if (!strcmp(name_list, "other")) { 1713 retval |= BIC_OTHER; 1714 break; 1715 } 1716 1717 } 1718 if (i == MAX_BIC) { 1719 if (mode == SHOW_LIST) { 1720 deferred_add_names[deferred_add_index++] = name_list; 1721 if (deferred_add_index >= MAX_DEFERRED) { 1722 fprintf(stderr, "More than max %d un-recognized --add options '%s'\n", 1723 MAX_DEFERRED, name_list); 1724 help(); 1725 exit(1); 1726 } 1727 } else { 1728 deferred_skip_names[deferred_skip_index++] = name_list; 1729 if (debug) 1730 fprintf(stderr, "deferred \"%s\"\n", name_list); 1731 if (deferred_skip_index >= MAX_DEFERRED) { 1732 fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n", 1733 MAX_DEFERRED, name_list); 1734 help(); 1735 exit(1); 1736 } 1737 } 1738 } 1739 1740 name_list = comma; 1741 if (name_list) 1742 name_list++; 1743 1744 } 1745 return retval; 1746 } 1747 1748 void print_header(char *delim) 1749 { 1750 struct msr_counter *mp; 1751 int printed = 0; 1752 1753 if (DO_BIC(BIC_USEC)) 1754 outp += sprintf(outp, "%susec", (printed++ ? delim : "")); 1755 if (DO_BIC(BIC_TOD)) 1756 outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : "")); 1757 if (DO_BIC(BIC_Package)) 1758 outp += sprintf(outp, "%sPackage", (printed++ ? delim : "")); 1759 if (DO_BIC(BIC_Die)) 1760 outp += sprintf(outp, "%sDie", (printed++ ? delim : "")); 1761 if (DO_BIC(BIC_Node)) 1762 outp += sprintf(outp, "%sNode", (printed++ ? delim : "")); 1763 if (DO_BIC(BIC_Core)) 1764 outp += sprintf(outp, "%sCore", (printed++ ? delim : "")); 1765 if (DO_BIC(BIC_CPU)) 1766 outp += sprintf(outp, "%sCPU", (printed++ ? delim : "")); 1767 if (DO_BIC(BIC_APIC)) 1768 outp += sprintf(outp, "%sAPIC", (printed++ ? delim : "")); 1769 if (DO_BIC(BIC_X2APIC)) 1770 outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : "")); 1771 if (DO_BIC(BIC_Avg_MHz)) 1772 outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : "")); 1773 if (DO_BIC(BIC_Busy)) 1774 outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : "")); 1775 if (DO_BIC(BIC_Bzy_MHz)) 1776 outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : "")); 1777 if (DO_BIC(BIC_TSC_MHz)) 1778 outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : "")); 1779 1780 if (DO_BIC(BIC_IPC)) 1781 outp += sprintf(outp, "%sIPC", (printed++ ? delim : "")); 1782 1783 if (DO_BIC(BIC_IRQ)) { 1784 if (sums_need_wide_columns) 1785 outp += sprintf(outp, "%s IRQ", (printed++ ? delim : "")); 1786 else 1787 outp += sprintf(outp, "%sIRQ", (printed++ ? delim : "")); 1788 } 1789 1790 if (DO_BIC(BIC_SMI)) 1791 outp += sprintf(outp, "%sSMI", (printed++ ? delim : "")); 1792 1793 for (mp = sys.tp; mp; mp = mp->next) { 1794 1795 if (mp->format == FORMAT_RAW) { 1796 if (mp->width == 64) 1797 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name); 1798 else 1799 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name); 1800 } else { 1801 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 1802 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name); 1803 else 1804 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name); 1805 } 1806 } 1807 1808 if (DO_BIC(BIC_CPU_c1)) 1809 outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : "")); 1810 if (DO_BIC(BIC_CPU_c3)) 1811 outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : "")); 1812 if (DO_BIC(BIC_CPU_c6)) 1813 outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : "")); 1814 if (DO_BIC(BIC_CPU_c7)) 1815 outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : "")); 1816 1817 if (DO_BIC(BIC_Mod_c6)) 1818 outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : "")); 1819 1820 if (DO_BIC(BIC_CoreTmp)) 1821 outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : "")); 1822 1823 if (DO_BIC(BIC_CORE_THROT_CNT)) 1824 outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : "")); 1825 1826 if (platform->rapl_msrs && !rapl_joules) { 1827 if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl) 1828 outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : "")); 1829 } else if (platform->rapl_msrs && rapl_joules) { 1830 if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl) 1831 outp += sprintf(outp, "%sCor_J", (printed++ ? delim : "")); 1832 } 1833 1834 for (mp = sys.cp; mp; mp = mp->next) { 1835 if (mp->format == FORMAT_RAW) { 1836 if (mp->width == 64) 1837 outp += sprintf(outp, "%s%18.18s", delim, mp->name); 1838 else 1839 outp += sprintf(outp, "%s%10.10s", delim, mp->name); 1840 } else { 1841 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 1842 outp += sprintf(outp, "%s%8s", delim, mp->name); 1843 else 1844 outp += sprintf(outp, "%s%s", delim, mp->name); 1845 } 1846 } 1847 1848 if (DO_BIC(BIC_PkgTmp)) 1849 outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : "")); 1850 1851 if (DO_BIC(BIC_GFX_rc6)) 1852 outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : "")); 1853 1854 if (DO_BIC(BIC_GFXMHz)) 1855 outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : "")); 1856 1857 if (DO_BIC(BIC_GFXACTMHz)) 1858 outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : "")); 1859 1860 if (DO_BIC(BIC_SAM_mc6)) 1861 outp += sprintf(outp, "%sSAM%%mc6", (printed++ ? delim : "")); 1862 1863 if (DO_BIC(BIC_SAMMHz)) 1864 outp += sprintf(outp, "%sSAMMHz", (printed++ ? delim : "")); 1865 1866 if (DO_BIC(BIC_SAMACTMHz)) 1867 outp += sprintf(outp, "%sSAMAMHz", (printed++ ? delim : "")); 1868 1869 if (DO_BIC(BIC_Totl_c0)) 1870 outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : "")); 1871 if (DO_BIC(BIC_Any_c0)) 1872 outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : "")); 1873 if (DO_BIC(BIC_GFX_c0)) 1874 outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : "")); 1875 if (DO_BIC(BIC_CPUGFX)) 1876 outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : "")); 1877 1878 if (DO_BIC(BIC_Pkgpc2)) 1879 outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : "")); 1880 if (DO_BIC(BIC_Pkgpc3)) 1881 outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : "")); 1882 if (DO_BIC(BIC_Pkgpc6)) 1883 outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : "")); 1884 if (DO_BIC(BIC_Pkgpc7)) 1885 outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : "")); 1886 if (DO_BIC(BIC_Pkgpc8)) 1887 outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : "")); 1888 if (DO_BIC(BIC_Pkgpc9)) 1889 outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : "")); 1890 if (DO_BIC(BIC_Pkgpc10)) 1891 outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : "")); 1892 if (DO_BIC(BIC_CPU_LPI)) 1893 outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : "")); 1894 if (DO_BIC(BIC_SYS_LPI)) 1895 outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : "")); 1896 1897 if (platform->rapl_msrs && !rapl_joules) { 1898 if (DO_BIC(BIC_PkgWatt)) 1899 outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : "")); 1900 if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl) 1901 outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : "")); 1902 if (DO_BIC(BIC_GFXWatt)) 1903 outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : "")); 1904 if (DO_BIC(BIC_RAMWatt)) 1905 outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : "")); 1906 if (DO_BIC(BIC_PKG__)) 1907 outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : "")); 1908 if (DO_BIC(BIC_RAM__)) 1909 outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : "")); 1910 } else if (platform->rapl_msrs && rapl_joules) { 1911 if (DO_BIC(BIC_Pkg_J)) 1912 outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : "")); 1913 if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl) 1914 outp += sprintf(outp, "%sCor_J", (printed++ ? delim : "")); 1915 if (DO_BIC(BIC_GFX_J)) 1916 outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : "")); 1917 if (DO_BIC(BIC_RAM_J)) 1918 outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : "")); 1919 if (DO_BIC(BIC_PKG__)) 1920 outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : "")); 1921 if (DO_BIC(BIC_RAM__)) 1922 outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : "")); 1923 } 1924 if (DO_BIC(BIC_UNCORE_MHZ)) 1925 outp += sprintf(outp, "%sUncMHz", (printed++ ? delim : "")); 1926 1927 for (mp = sys.pp; mp; mp = mp->next) { 1928 if (mp->format == FORMAT_RAW) { 1929 if (mp->width == 64) 1930 outp += sprintf(outp, "%s%18.18s", delim, mp->name); 1931 else 1932 outp += sprintf(outp, "%s%10.10s", delim, mp->name); 1933 } else { 1934 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 1935 outp += sprintf(outp, "%s%8s", delim, mp->name); 1936 else 1937 outp += sprintf(outp, "%s%s", delim, mp->name); 1938 } 1939 } 1940 1941 outp += sprintf(outp, "\n"); 1942 } 1943 1944 int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 1945 { 1946 int i; 1947 struct msr_counter *mp; 1948 1949 outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p); 1950 1951 if (t) { 1952 outp += sprintf(outp, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags); 1953 outp += sprintf(outp, "TSC: %016llX\n", t->tsc); 1954 outp += sprintf(outp, "aperf: %016llX\n", t->aperf); 1955 outp += sprintf(outp, "mperf: %016llX\n", t->mperf); 1956 outp += sprintf(outp, "c1: %016llX\n", t->c1); 1957 1958 if (DO_BIC(BIC_IPC)) 1959 outp += sprintf(outp, "IPC: %lld\n", t->instr_count); 1960 1961 if (DO_BIC(BIC_IRQ)) 1962 outp += sprintf(outp, "IRQ: %lld\n", t->irq_count); 1963 if (DO_BIC(BIC_SMI)) 1964 outp += sprintf(outp, "SMI: %d\n", t->smi_count); 1965 1966 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 1967 outp += 1968 sprintf(outp, "tADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, 1969 t->counter[i], mp->path); 1970 } 1971 } 1972 1973 if (c && is_cpu_first_thread_in_core(t, c, p)) { 1974 outp += sprintf(outp, "core: %d\n", c->core_id); 1975 outp += sprintf(outp, "c3: %016llX\n", c->c3); 1976 outp += sprintf(outp, "c6: %016llX\n", c->c6); 1977 outp += sprintf(outp, "c7: %016llX\n", c->c7); 1978 outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c); 1979 outp += sprintf(outp, "cpu_throt_count: %016llX\n", c->core_throt_cnt); 1980 1981 const unsigned long long energy_value = c->core_energy.raw_value * c->core_energy.scale; 1982 const double energy_scale = c->core_energy.scale; 1983 1984 if (c->core_energy.unit == RAPL_UNIT_JOULES) 1985 outp += sprintf(outp, "Joules: %0llX (scale: %lf)\n", energy_value, energy_scale); 1986 1987 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 1988 outp += 1989 sprintf(outp, "cADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, 1990 c->counter[i], mp->path); 1991 } 1992 outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us); 1993 } 1994 1995 if (p && is_cpu_first_core_in_package(t, c, p)) { 1996 outp += sprintf(outp, "package: %d\n", p->package_id); 1997 1998 outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0); 1999 outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0); 2000 outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0); 2001 outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0); 2002 2003 outp += sprintf(outp, "pc2: %016llX\n", p->pc2); 2004 if (DO_BIC(BIC_Pkgpc3)) 2005 outp += sprintf(outp, "pc3: %016llX\n", p->pc3); 2006 if (DO_BIC(BIC_Pkgpc6)) 2007 outp += sprintf(outp, "pc6: %016llX\n", p->pc6); 2008 if (DO_BIC(BIC_Pkgpc7)) 2009 outp += sprintf(outp, "pc7: %016llX\n", p->pc7); 2010 outp += sprintf(outp, "pc8: %016llX\n", p->pc8); 2011 outp += sprintf(outp, "pc9: %016llX\n", p->pc9); 2012 outp += sprintf(outp, "pc10: %016llX\n", p->pc10); 2013 outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi); 2014 outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi); 2015 outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg.raw_value); 2016 outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores.raw_value); 2017 outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx.raw_value); 2018 outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram.raw_value); 2019 outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status.raw_value); 2020 outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status.raw_value); 2021 outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c); 2022 2023 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 2024 outp += 2025 sprintf(outp, "pADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, 2026 p->counter[i], mp->path); 2027 } 2028 } 2029 2030 outp += sprintf(outp, "\n"); 2031 2032 return 0; 2033 } 2034 2035 double rapl_counter_get_value(const struct rapl_counter *c, enum rapl_unit desired_unit, double interval) 2036 { 2037 assert(desired_unit != RAPL_UNIT_INVALID); 2038 2039 /* 2040 * For now we don't expect anything other than joules, 2041 * so just simplify the logic. 2042 */ 2043 assert(c->unit == RAPL_UNIT_JOULES); 2044 2045 const double scaled = c->raw_value * c->scale; 2046 2047 if (desired_unit == RAPL_UNIT_WATTS) 2048 return scaled / interval; 2049 return scaled; 2050 } 2051 2052 /* 2053 * column formatting convention & formats 2054 */ 2055 int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 2056 { 2057 double interval_float, tsc; 2058 char *fmt8; 2059 int i; 2060 struct msr_counter *mp; 2061 char *delim = "\t"; 2062 int printed = 0; 2063 2064 /* if showing only 1st thread in core and this isn't one, bail out */ 2065 if (show_core_only && !is_cpu_first_thread_in_core(t, c, p)) 2066 return 0; 2067 2068 /* if showing only 1st thread in pkg and this isn't one, bail out */ 2069 if (show_pkg_only && !is_cpu_first_core_in_package(t, c, p)) 2070 return 0; 2071 2072 /*if not summary line and --cpu is used */ 2073 if ((t != &average.threads) && (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset))) 2074 return 0; 2075 2076 if (DO_BIC(BIC_USEC)) { 2077 /* on each row, print how many usec each timestamp took to gather */ 2078 struct timeval tv; 2079 2080 timersub(&t->tv_end, &t->tv_begin, &tv); 2081 outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec); 2082 } 2083 2084 /* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */ 2085 if (DO_BIC(BIC_TOD)) 2086 outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec); 2087 2088 interval_float = t->tv_delta.tv_sec + t->tv_delta.tv_usec / 1000000.0; 2089 2090 tsc = t->tsc * tsc_tweak; 2091 2092 /* topo columns, print blanks on 1st (average) line */ 2093 if (t == &average.threads) { 2094 if (DO_BIC(BIC_Package)) 2095 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2096 if (DO_BIC(BIC_Die)) 2097 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2098 if (DO_BIC(BIC_Node)) 2099 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2100 if (DO_BIC(BIC_Core)) 2101 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2102 if (DO_BIC(BIC_CPU)) 2103 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2104 if (DO_BIC(BIC_APIC)) 2105 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2106 if (DO_BIC(BIC_X2APIC)) 2107 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2108 } else { 2109 if (DO_BIC(BIC_Package)) { 2110 if (p) 2111 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id); 2112 else 2113 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2114 } 2115 if (DO_BIC(BIC_Die)) { 2116 if (c) 2117 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].die_id); 2118 else 2119 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2120 } 2121 if (DO_BIC(BIC_Node)) { 2122 if (t) 2123 outp += sprintf(outp, "%s%d", 2124 (printed++ ? delim : ""), cpus[t->cpu_id].physical_node_id); 2125 else 2126 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2127 } 2128 if (DO_BIC(BIC_Core)) { 2129 if (c) 2130 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id); 2131 else 2132 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2133 } 2134 if (DO_BIC(BIC_CPU)) 2135 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id); 2136 if (DO_BIC(BIC_APIC)) 2137 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id); 2138 if (DO_BIC(BIC_X2APIC)) 2139 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id); 2140 } 2141 2142 if (DO_BIC(BIC_Avg_MHz)) 2143 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 / units * t->aperf / interval_float); 2144 2145 if (DO_BIC(BIC_Busy)) 2146 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf / tsc); 2147 2148 if (DO_BIC(BIC_Bzy_MHz)) { 2149 if (has_base_hz) 2150 outp += 2151 sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf); 2152 else 2153 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 2154 tsc / units * t->aperf / t->mperf / interval_float); 2155 } 2156 2157 if (DO_BIC(BIC_TSC_MHz)) 2158 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc / units / interval_float); 2159 2160 if (DO_BIC(BIC_IPC)) 2161 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 1.0 * t->instr_count / t->aperf); 2162 2163 /* IRQ */ 2164 if (DO_BIC(BIC_IRQ)) { 2165 if (sums_need_wide_columns) 2166 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count); 2167 else 2168 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count); 2169 } 2170 2171 /* SMI */ 2172 if (DO_BIC(BIC_SMI)) 2173 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count); 2174 2175 /* Added counters */ 2176 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 2177 if (mp->format == FORMAT_RAW) { 2178 if (mp->width == 32) 2179 outp += 2180 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)t->counter[i]); 2181 else 2182 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]); 2183 } else if (mp->format == FORMAT_DELTA) { 2184 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2185 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]); 2186 else 2187 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]); 2188 } else if (mp->format == FORMAT_PERCENT) { 2189 if (mp->type == COUNTER_USEC) 2190 outp += 2191 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 2192 t->counter[i] / interval_float / 10000); 2193 else 2194 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i] / tsc); 2195 } 2196 } 2197 2198 /* C1 */ 2199 if (DO_BIC(BIC_CPU_c1)) 2200 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc); 2201 2202 /* print per-core data only for 1st thread in core */ 2203 if (!is_cpu_first_thread_in_core(t, c, p)) 2204 goto done; 2205 2206 if (DO_BIC(BIC_CPU_c3)) 2207 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3 / tsc); 2208 if (DO_BIC(BIC_CPU_c6)) 2209 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6 / tsc); 2210 if (DO_BIC(BIC_CPU_c7)) 2211 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7 / tsc); 2212 2213 /* Mod%c6 */ 2214 if (DO_BIC(BIC_Mod_c6)) 2215 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc); 2216 2217 if (DO_BIC(BIC_CoreTmp)) 2218 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c); 2219 2220 /* Core throttle count */ 2221 if (DO_BIC(BIC_CORE_THROT_CNT)) 2222 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->core_throt_cnt); 2223 2224 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 2225 if (mp->format == FORMAT_RAW) { 2226 if (mp->width == 32) 2227 outp += 2228 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)c->counter[i]); 2229 else 2230 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]); 2231 } else if (mp->format == FORMAT_DELTA) { 2232 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2233 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]); 2234 else 2235 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]); 2236 } else if (mp->format == FORMAT_PERCENT) { 2237 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i] / tsc); 2238 } 2239 } 2240 2241 fmt8 = "%s%.2f"; 2242 2243 if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl) 2244 outp += 2245 sprintf(outp, fmt8, (printed++ ? delim : ""), 2246 rapl_counter_get_value(&c->core_energy, RAPL_UNIT_WATTS, interval_float)); 2247 if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl) 2248 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 2249 rapl_counter_get_value(&c->core_energy, RAPL_UNIT_JOULES, interval_float)); 2250 2251 /* print per-package data only for 1st core in package */ 2252 if (!is_cpu_first_core_in_package(t, c, p)) 2253 goto done; 2254 2255 /* PkgTmp */ 2256 if (DO_BIC(BIC_PkgTmp)) 2257 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c); 2258 2259 /* GFXrc6 */ 2260 if (DO_BIC(BIC_GFX_rc6)) { 2261 if (p->gfx_rc6_ms == -1) { /* detect GFX counter reset */ 2262 outp += sprintf(outp, "%s**.**", (printed++ ? delim : "")); 2263 } else { 2264 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 2265 p->gfx_rc6_ms / 10.0 / interval_float); 2266 } 2267 } 2268 2269 /* GFXMHz */ 2270 if (DO_BIC(BIC_GFXMHz)) 2271 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz); 2272 2273 /* GFXACTMHz */ 2274 if (DO_BIC(BIC_GFXACTMHz)) 2275 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz); 2276 2277 /* SAMmc6 */ 2278 if (DO_BIC(BIC_SAM_mc6)) { 2279 if (p->sam_mc6_ms == -1) { /* detect GFX counter reset */ 2280 outp += sprintf(outp, "%s**.**", (printed++ ? delim : "")); 2281 } else { 2282 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 2283 p->sam_mc6_ms / 10.0 / interval_float); 2284 } 2285 } 2286 2287 /* SAMMHz */ 2288 if (DO_BIC(BIC_SAMMHz)) 2289 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_mhz); 2290 2291 /* SAMACTMHz */ 2292 if (DO_BIC(BIC_SAMACTMHz)) 2293 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_act_mhz); 2294 2295 /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ 2296 if (DO_BIC(BIC_Totl_c0)) 2297 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0 / tsc); 2298 if (DO_BIC(BIC_Any_c0)) 2299 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0 / tsc); 2300 if (DO_BIC(BIC_GFX_c0)) 2301 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0 / tsc); 2302 if (DO_BIC(BIC_CPUGFX)) 2303 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0 / tsc); 2304 2305 if (DO_BIC(BIC_Pkgpc2)) 2306 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2 / tsc); 2307 if (DO_BIC(BIC_Pkgpc3)) 2308 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3 / tsc); 2309 if (DO_BIC(BIC_Pkgpc6)) 2310 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6 / tsc); 2311 if (DO_BIC(BIC_Pkgpc7)) 2312 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7 / tsc); 2313 if (DO_BIC(BIC_Pkgpc8)) 2314 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8 / tsc); 2315 if (DO_BIC(BIC_Pkgpc9)) 2316 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9 / tsc); 2317 if (DO_BIC(BIC_Pkgpc10)) 2318 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc); 2319 2320 if (DO_BIC(BIC_CPU_LPI)) { 2321 if (p->cpu_lpi >= 0) 2322 outp += 2323 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 2324 100.0 * p->cpu_lpi / 1000000.0 / interval_float); 2325 else 2326 outp += sprintf(outp, "%s(neg)", (printed++ ? delim : "")); 2327 } 2328 if (DO_BIC(BIC_SYS_LPI)) { 2329 if (p->sys_lpi >= 0) 2330 outp += 2331 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 2332 100.0 * p->sys_lpi / 1000000.0 / interval_float); 2333 else 2334 outp += sprintf(outp, "%s(neg)", (printed++ ? delim : "")); 2335 } 2336 2337 if (DO_BIC(BIC_PkgWatt)) 2338 outp += 2339 sprintf(outp, fmt8, (printed++ ? delim : ""), 2340 rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_WATTS, interval_float)); 2341 if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl) 2342 outp += 2343 sprintf(outp, fmt8, (printed++ ? delim : ""), 2344 rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_WATTS, interval_float)); 2345 if (DO_BIC(BIC_GFXWatt)) 2346 outp += 2347 sprintf(outp, fmt8, (printed++ ? delim : ""), 2348 rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_WATTS, interval_float)); 2349 if (DO_BIC(BIC_RAMWatt)) 2350 outp += 2351 sprintf(outp, fmt8, (printed++ ? delim : ""), 2352 rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_WATTS, interval_float)); 2353 if (DO_BIC(BIC_Pkg_J)) 2354 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 2355 rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_JOULES, interval_float)); 2356 if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl) 2357 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 2358 rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_JOULES, interval_float)); 2359 if (DO_BIC(BIC_GFX_J)) 2360 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 2361 rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_JOULES, interval_float)); 2362 if (DO_BIC(BIC_RAM_J)) 2363 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 2364 rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_JOULES, interval_float)); 2365 if (DO_BIC(BIC_PKG__)) 2366 outp += 2367 sprintf(outp, fmt8, (printed++ ? delim : ""), 2368 rapl_counter_get_value(&p->rapl_pkg_perf_status, RAPL_UNIT_WATTS, interval_float)); 2369 if (DO_BIC(BIC_RAM__)) 2370 outp += 2371 sprintf(outp, fmt8, (printed++ ? delim : ""), 2372 rapl_counter_get_value(&p->rapl_dram_perf_status, RAPL_UNIT_WATTS, interval_float)); 2373 /* UncMHz */ 2374 if (DO_BIC(BIC_UNCORE_MHZ)) 2375 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->uncore_mhz); 2376 2377 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 2378 if (mp->format == FORMAT_RAW) { 2379 if (mp->width == 32) 2380 outp += 2381 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)p->counter[i]); 2382 else 2383 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]); 2384 } else if (mp->format == FORMAT_DELTA) { 2385 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2386 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]); 2387 else 2388 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]); 2389 } else if (mp->format == FORMAT_PERCENT) { 2390 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i] / tsc); 2391 } 2392 } 2393 2394 done: 2395 if (*(outp - 1) != '\n') 2396 outp += sprintf(outp, "\n"); 2397 2398 return 0; 2399 } 2400 2401 void flush_output_stdout(void) 2402 { 2403 FILE *filep; 2404 2405 if (outf == stderr) 2406 filep = stdout; 2407 else 2408 filep = outf; 2409 2410 fputs(output_buffer, filep); 2411 fflush(filep); 2412 2413 outp = output_buffer; 2414 } 2415 2416 void flush_output_stderr(void) 2417 { 2418 fputs(output_buffer, outf); 2419 fflush(outf); 2420 outp = output_buffer; 2421 } 2422 2423 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 2424 { 2425 static int count; 2426 2427 if ((!count || (header_iterations && !(count % header_iterations))) || !summary_only) 2428 print_header("\t"); 2429 2430 format_counters(&average.threads, &average.cores, &average.packages); 2431 2432 count++; 2433 2434 if (summary_only) 2435 return; 2436 2437 for_all_cpus(format_counters, t, c, p); 2438 } 2439 2440 #define DELTA_WRAP32(new, old) \ 2441 old = ((((unsigned long long)new << 32) - ((unsigned long long)old << 32)) >> 32); 2442 2443 int delta_package(struct pkg_data *new, struct pkg_data *old) 2444 { 2445 int i; 2446 struct msr_counter *mp; 2447 2448 if (DO_BIC(BIC_Totl_c0)) 2449 old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0; 2450 if (DO_BIC(BIC_Any_c0)) 2451 old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0; 2452 if (DO_BIC(BIC_GFX_c0)) 2453 old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0; 2454 if (DO_BIC(BIC_CPUGFX)) 2455 old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0; 2456 2457 old->pc2 = new->pc2 - old->pc2; 2458 if (DO_BIC(BIC_Pkgpc3)) 2459 old->pc3 = new->pc3 - old->pc3; 2460 if (DO_BIC(BIC_Pkgpc6)) 2461 old->pc6 = new->pc6 - old->pc6; 2462 if (DO_BIC(BIC_Pkgpc7)) 2463 old->pc7 = new->pc7 - old->pc7; 2464 old->pc8 = new->pc8 - old->pc8; 2465 old->pc9 = new->pc9 - old->pc9; 2466 old->pc10 = new->pc10 - old->pc10; 2467 old->cpu_lpi = new->cpu_lpi - old->cpu_lpi; 2468 old->sys_lpi = new->sys_lpi - old->sys_lpi; 2469 old->pkg_temp_c = new->pkg_temp_c; 2470 2471 /* flag an error when rc6 counter resets/wraps */ 2472 if (old->gfx_rc6_ms > new->gfx_rc6_ms) 2473 old->gfx_rc6_ms = -1; 2474 else 2475 old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms; 2476 2477 old->uncore_mhz = new->uncore_mhz; 2478 old->gfx_mhz = new->gfx_mhz; 2479 old->gfx_act_mhz = new->gfx_act_mhz; 2480 2481 /* flag an error when mc6 counter resets/wraps */ 2482 if (old->sam_mc6_ms > new->sam_mc6_ms) 2483 old->sam_mc6_ms = -1; 2484 else 2485 old->sam_mc6_ms = new->sam_mc6_ms - old->sam_mc6_ms; 2486 2487 old->sam_mhz = new->sam_mhz; 2488 old->sam_act_mhz = new->sam_act_mhz; 2489 2490 old->energy_pkg.raw_value = new->energy_pkg.raw_value - old->energy_pkg.raw_value; 2491 old->energy_cores.raw_value = new->energy_cores.raw_value - old->energy_cores.raw_value; 2492 old->energy_gfx.raw_value = new->energy_gfx.raw_value - old->energy_gfx.raw_value; 2493 old->energy_dram.raw_value = new->energy_dram.raw_value - old->energy_dram.raw_value; 2494 old->rapl_pkg_perf_status.raw_value = new->rapl_pkg_perf_status.raw_value - old->rapl_pkg_perf_status.raw_value; 2495 old->rapl_dram_perf_status.raw_value = 2496 new->rapl_dram_perf_status.raw_value - old->rapl_dram_perf_status.raw_value; 2497 2498 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 2499 if (mp->format == FORMAT_RAW) 2500 old->counter[i] = new->counter[i]; 2501 else 2502 old->counter[i] = new->counter[i] - old->counter[i]; 2503 } 2504 2505 return 0; 2506 } 2507 2508 void delta_core(struct core_data *new, struct core_data *old) 2509 { 2510 int i; 2511 struct msr_counter *mp; 2512 2513 old->c3 = new->c3 - old->c3; 2514 old->c6 = new->c6 - old->c6; 2515 old->c7 = new->c7 - old->c7; 2516 old->core_temp_c = new->core_temp_c; 2517 old->core_throt_cnt = new->core_throt_cnt; 2518 old->mc6_us = new->mc6_us - old->mc6_us; 2519 2520 DELTA_WRAP32(new->core_energy.raw_value, old->core_energy.raw_value); 2521 2522 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 2523 if (mp->format == FORMAT_RAW) 2524 old->counter[i] = new->counter[i]; 2525 else 2526 old->counter[i] = new->counter[i] - old->counter[i]; 2527 } 2528 } 2529 2530 int soft_c1_residency_display(int bic) 2531 { 2532 if (!DO_BIC(BIC_CPU_c1) || platform->has_msr_core_c1_res) 2533 return 0; 2534 2535 return DO_BIC_READ(bic); 2536 } 2537 2538 /* 2539 * old = new - old 2540 */ 2541 int delta_thread(struct thread_data *new, struct thread_data *old, struct core_data *core_delta) 2542 { 2543 int i; 2544 struct msr_counter *mp; 2545 2546 /* we run cpuid just the 1st time, copy the results */ 2547 if (DO_BIC(BIC_APIC)) 2548 new->apic_id = old->apic_id; 2549 if (DO_BIC(BIC_X2APIC)) 2550 new->x2apic_id = old->x2apic_id; 2551 2552 /* 2553 * the timestamps from start of measurement interval are in "old" 2554 * the timestamp from end of measurement interval are in "new" 2555 * over-write old w/ new so we can print end of interval values 2556 */ 2557 2558 timersub(&new->tv_begin, &old->tv_begin, &old->tv_delta); 2559 old->tv_begin = new->tv_begin; 2560 old->tv_end = new->tv_end; 2561 2562 old->tsc = new->tsc - old->tsc; 2563 2564 /* check for TSC < 1 Mcycles over interval */ 2565 if (old->tsc < (1000 * 1000)) 2566 errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n" 2567 "You can disable all c-states by booting with \"idle=poll\"\n" 2568 "or just the deep ones with \"processor.max_cstate=1\""); 2569 2570 old->c1 = new->c1 - old->c1; 2571 2572 if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC) 2573 || soft_c1_residency_display(BIC_Avg_MHz)) { 2574 if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { 2575 old->aperf = new->aperf - old->aperf; 2576 old->mperf = new->mperf - old->mperf; 2577 } else { 2578 return -1; 2579 } 2580 } 2581 2582 if (platform->has_msr_core_c1_res) { 2583 /* 2584 * Some models have a dedicated C1 residency MSR, 2585 * which should be more accurate than the derivation below. 2586 */ 2587 } else { 2588 /* 2589 * As counter collection is not atomic, 2590 * it is possible for mperf's non-halted cycles + idle states 2591 * to exceed TSC's all cycles: show c1 = 0% in that case. 2592 */ 2593 if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak)) 2594 old->c1 = 0; 2595 else { 2596 /* normal case, derive c1 */ 2597 old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3 2598 - core_delta->c6 - core_delta->c7; 2599 } 2600 } 2601 2602 if (old->mperf == 0) { 2603 if (debug > 1) 2604 fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id); 2605 old->mperf = 1; /* divide by 0 protection */ 2606 } 2607 2608 if (DO_BIC(BIC_IPC)) 2609 old->instr_count = new->instr_count - old->instr_count; 2610 2611 if (DO_BIC(BIC_IRQ)) 2612 old->irq_count = new->irq_count - old->irq_count; 2613 2614 if (DO_BIC(BIC_SMI)) 2615 old->smi_count = new->smi_count - old->smi_count; 2616 2617 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 2618 if (mp->format == FORMAT_RAW) 2619 old->counter[i] = new->counter[i]; 2620 else 2621 old->counter[i] = new->counter[i] - old->counter[i]; 2622 } 2623 return 0; 2624 } 2625 2626 int delta_cpu(struct thread_data *t, struct core_data *c, 2627 struct pkg_data *p, struct thread_data *t2, struct core_data *c2, struct pkg_data *p2) 2628 { 2629 int retval = 0; 2630 2631 /* calculate core delta only for 1st thread in core */ 2632 if (is_cpu_first_thread_in_core(t, c, p)) 2633 delta_core(c, c2); 2634 2635 /* always calculate thread delta */ 2636 retval = delta_thread(t, t2, c2); /* c2 is core delta */ 2637 if (retval) 2638 return retval; 2639 2640 /* calculate package delta only for 1st core in package */ 2641 if (is_cpu_first_core_in_package(t, c, p)) 2642 retval = delta_package(p, p2); 2643 2644 return retval; 2645 } 2646 2647 void rapl_counter_clear(struct rapl_counter *c) 2648 { 2649 c->raw_value = 0; 2650 c->scale = 0.0; 2651 c->unit = RAPL_UNIT_INVALID; 2652 } 2653 2654 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 2655 { 2656 int i; 2657 struct msr_counter *mp; 2658 2659 t->tv_begin.tv_sec = 0; 2660 t->tv_begin.tv_usec = 0; 2661 t->tv_end.tv_sec = 0; 2662 t->tv_end.tv_usec = 0; 2663 t->tv_delta.tv_sec = 0; 2664 t->tv_delta.tv_usec = 0; 2665 2666 t->tsc = 0; 2667 t->aperf = 0; 2668 t->mperf = 0; 2669 t->c1 = 0; 2670 2671 t->instr_count = 0; 2672 2673 t->irq_count = 0; 2674 t->smi_count = 0; 2675 2676 c->c3 = 0; 2677 c->c6 = 0; 2678 c->c7 = 0; 2679 c->mc6_us = 0; 2680 c->core_temp_c = 0; 2681 rapl_counter_clear(&c->core_energy); 2682 c->core_throt_cnt = 0; 2683 2684 p->pkg_wtd_core_c0 = 0; 2685 p->pkg_any_core_c0 = 0; 2686 p->pkg_any_gfxe_c0 = 0; 2687 p->pkg_both_core_gfxe_c0 = 0; 2688 2689 p->pc2 = 0; 2690 if (DO_BIC(BIC_Pkgpc3)) 2691 p->pc3 = 0; 2692 if (DO_BIC(BIC_Pkgpc6)) 2693 p->pc6 = 0; 2694 if (DO_BIC(BIC_Pkgpc7)) 2695 p->pc7 = 0; 2696 p->pc8 = 0; 2697 p->pc9 = 0; 2698 p->pc10 = 0; 2699 p->cpu_lpi = 0; 2700 p->sys_lpi = 0; 2701 2702 rapl_counter_clear(&p->energy_pkg); 2703 rapl_counter_clear(&p->energy_dram); 2704 rapl_counter_clear(&p->energy_cores); 2705 rapl_counter_clear(&p->energy_gfx); 2706 rapl_counter_clear(&p->rapl_pkg_perf_status); 2707 rapl_counter_clear(&p->rapl_dram_perf_status); 2708 p->pkg_temp_c = 0; 2709 2710 p->gfx_rc6_ms = 0; 2711 p->uncore_mhz = 0; 2712 p->gfx_mhz = 0; 2713 p->gfx_act_mhz = 0; 2714 p->sam_mc6_ms = 0; 2715 p->sam_mhz = 0; 2716 p->sam_act_mhz = 0; 2717 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) 2718 t->counter[i] = 0; 2719 2720 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) 2721 c->counter[i] = 0; 2722 2723 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) 2724 p->counter[i] = 0; 2725 } 2726 2727 void rapl_counter_accumulate(struct rapl_counter *dst, const struct rapl_counter *src) 2728 { 2729 /* Copy unit and scale from src if dst is not initialized */ 2730 if (dst->unit == RAPL_UNIT_INVALID) { 2731 dst->unit = src->unit; 2732 dst->scale = src->scale; 2733 } 2734 2735 assert(dst->unit == src->unit); 2736 assert(dst->scale == src->scale); 2737 2738 dst->raw_value += src->raw_value; 2739 } 2740 2741 int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 2742 { 2743 int i; 2744 struct msr_counter *mp; 2745 2746 /* copy un-changing apic_id's */ 2747 if (DO_BIC(BIC_APIC)) 2748 average.threads.apic_id = t->apic_id; 2749 if (DO_BIC(BIC_X2APIC)) 2750 average.threads.x2apic_id = t->x2apic_id; 2751 2752 /* remember first tv_begin */ 2753 if (average.threads.tv_begin.tv_sec == 0) 2754 average.threads.tv_begin = t->tv_begin; 2755 2756 /* remember last tv_end */ 2757 average.threads.tv_end = t->tv_end; 2758 2759 average.threads.tsc += t->tsc; 2760 average.threads.aperf += t->aperf; 2761 average.threads.mperf += t->mperf; 2762 average.threads.c1 += t->c1; 2763 2764 average.threads.instr_count += t->instr_count; 2765 2766 average.threads.irq_count += t->irq_count; 2767 average.threads.smi_count += t->smi_count; 2768 2769 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 2770 if (mp->format == FORMAT_RAW) 2771 continue; 2772 average.threads.counter[i] += t->counter[i]; 2773 } 2774 2775 /* sum per-core values only for 1st thread in core */ 2776 if (!is_cpu_first_thread_in_core(t, c, p)) 2777 return 0; 2778 2779 average.cores.c3 += c->c3; 2780 average.cores.c6 += c->c6; 2781 average.cores.c7 += c->c7; 2782 average.cores.mc6_us += c->mc6_us; 2783 2784 average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c); 2785 average.cores.core_throt_cnt = MAX(average.cores.core_throt_cnt, c->core_throt_cnt); 2786 2787 rapl_counter_accumulate(&average.cores.core_energy, &c->core_energy); 2788 2789 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 2790 if (mp->format == FORMAT_RAW) 2791 continue; 2792 average.cores.counter[i] += c->counter[i]; 2793 } 2794 2795 /* sum per-pkg values only for 1st core in pkg */ 2796 if (!is_cpu_first_core_in_package(t, c, p)) 2797 return 0; 2798 2799 if (DO_BIC(BIC_Totl_c0)) 2800 average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0; 2801 if (DO_BIC(BIC_Any_c0)) 2802 average.packages.pkg_any_core_c0 += p->pkg_any_core_c0; 2803 if (DO_BIC(BIC_GFX_c0)) 2804 average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0; 2805 if (DO_BIC(BIC_CPUGFX)) 2806 average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0; 2807 2808 average.packages.pc2 += p->pc2; 2809 if (DO_BIC(BIC_Pkgpc3)) 2810 average.packages.pc3 += p->pc3; 2811 if (DO_BIC(BIC_Pkgpc6)) 2812 average.packages.pc6 += p->pc6; 2813 if (DO_BIC(BIC_Pkgpc7)) 2814 average.packages.pc7 += p->pc7; 2815 average.packages.pc8 += p->pc8; 2816 average.packages.pc9 += p->pc9; 2817 average.packages.pc10 += p->pc10; 2818 2819 average.packages.cpu_lpi = p->cpu_lpi; 2820 average.packages.sys_lpi = p->sys_lpi; 2821 2822 rapl_counter_accumulate(&average.packages.energy_pkg, &p->energy_pkg); 2823 rapl_counter_accumulate(&average.packages.energy_dram, &p->energy_dram); 2824 rapl_counter_accumulate(&average.packages.energy_cores, &p->energy_cores); 2825 rapl_counter_accumulate(&average.packages.energy_gfx, &p->energy_gfx); 2826 2827 average.packages.gfx_rc6_ms = p->gfx_rc6_ms; 2828 average.packages.uncore_mhz = p->uncore_mhz; 2829 average.packages.gfx_mhz = p->gfx_mhz; 2830 average.packages.gfx_act_mhz = p->gfx_act_mhz; 2831 average.packages.sam_mc6_ms = p->sam_mc6_ms; 2832 average.packages.sam_mhz = p->sam_mhz; 2833 average.packages.sam_act_mhz = p->sam_act_mhz; 2834 2835 average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c); 2836 2837 rapl_counter_accumulate(&average.packages.rapl_pkg_perf_status, &p->rapl_pkg_perf_status); 2838 rapl_counter_accumulate(&average.packages.rapl_dram_perf_status, &p->rapl_dram_perf_status); 2839 2840 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 2841 if ((mp->format == FORMAT_RAW) && (topo.num_packages == 0)) 2842 average.packages.counter[i] = p->counter[i]; 2843 else 2844 average.packages.counter[i] += p->counter[i]; 2845 } 2846 return 0; 2847 } 2848 2849 /* 2850 * sum the counters for all cpus in the system 2851 * compute the weighted average 2852 */ 2853 void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data *p) 2854 { 2855 int i; 2856 struct msr_counter *mp; 2857 2858 clear_counters(&average.threads, &average.cores, &average.packages); 2859 2860 for_all_cpus(sum_counters, t, c, p); 2861 2862 /* Use the global time delta for the average. */ 2863 average.threads.tv_delta = tv_delta; 2864 2865 average.threads.tsc /= topo.allowed_cpus; 2866 average.threads.aperf /= topo.allowed_cpus; 2867 average.threads.mperf /= topo.allowed_cpus; 2868 average.threads.instr_count /= topo.allowed_cpus; 2869 average.threads.c1 /= topo.allowed_cpus; 2870 2871 if (average.threads.irq_count > 9999999) 2872 sums_need_wide_columns = 1; 2873 2874 average.cores.c3 /= topo.allowed_cores; 2875 average.cores.c6 /= topo.allowed_cores; 2876 average.cores.c7 /= topo.allowed_cores; 2877 average.cores.mc6_us /= topo.allowed_cores; 2878 2879 if (DO_BIC(BIC_Totl_c0)) 2880 average.packages.pkg_wtd_core_c0 /= topo.allowed_packages; 2881 if (DO_BIC(BIC_Any_c0)) 2882 average.packages.pkg_any_core_c0 /= topo.allowed_packages; 2883 if (DO_BIC(BIC_GFX_c0)) 2884 average.packages.pkg_any_gfxe_c0 /= topo.allowed_packages; 2885 if (DO_BIC(BIC_CPUGFX)) 2886 average.packages.pkg_both_core_gfxe_c0 /= topo.allowed_packages; 2887 2888 average.packages.pc2 /= topo.allowed_packages; 2889 if (DO_BIC(BIC_Pkgpc3)) 2890 average.packages.pc3 /= topo.allowed_packages; 2891 if (DO_BIC(BIC_Pkgpc6)) 2892 average.packages.pc6 /= topo.allowed_packages; 2893 if (DO_BIC(BIC_Pkgpc7)) 2894 average.packages.pc7 /= topo.allowed_packages; 2895 2896 average.packages.pc8 /= topo.allowed_packages; 2897 average.packages.pc9 /= topo.allowed_packages; 2898 average.packages.pc10 /= topo.allowed_packages; 2899 2900 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 2901 if (mp->format == FORMAT_RAW) 2902 continue; 2903 if (mp->type == COUNTER_ITEMS) { 2904 if (average.threads.counter[i] > 9999999) 2905 sums_need_wide_columns = 1; 2906 continue; 2907 } 2908 average.threads.counter[i] /= topo.allowed_cpus; 2909 } 2910 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 2911 if (mp->format == FORMAT_RAW) 2912 continue; 2913 if (mp->type == COUNTER_ITEMS) { 2914 if (average.cores.counter[i] > 9999999) 2915 sums_need_wide_columns = 1; 2916 } 2917 average.cores.counter[i] /= topo.allowed_cores; 2918 } 2919 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 2920 if (mp->format == FORMAT_RAW) 2921 continue; 2922 if (mp->type == COUNTER_ITEMS) { 2923 if (average.packages.counter[i] > 9999999) 2924 sums_need_wide_columns = 1; 2925 } 2926 average.packages.counter[i] /= topo.allowed_packages; 2927 } 2928 } 2929 2930 static unsigned long long rdtsc(void) 2931 { 2932 unsigned int low, high; 2933 2934 asm volatile ("rdtsc":"=a" (low), "=d"(high)); 2935 2936 return low | ((unsigned long long)high) << 32; 2937 } 2938 2939 /* 2940 * Open a file, and exit on failure 2941 */ 2942 FILE *fopen_or_die(const char *path, const char *mode) 2943 { 2944 FILE *filep = fopen(path, mode); 2945 2946 if (!filep) 2947 err(1, "%s: open failed", path); 2948 return filep; 2949 } 2950 2951 /* 2952 * snapshot_sysfs_counter() 2953 * 2954 * return snapshot of given counter 2955 */ 2956 unsigned long long snapshot_sysfs_counter(char *path) 2957 { 2958 FILE *fp; 2959 int retval; 2960 unsigned long long counter; 2961 2962 fp = fopen_or_die(path, "r"); 2963 2964 retval = fscanf(fp, "%lld", &counter); 2965 if (retval != 1) 2966 err(1, "snapshot_sysfs_counter(%s)", path); 2967 2968 fclose(fp); 2969 2970 return counter; 2971 } 2972 2973 int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp) 2974 { 2975 if (mp->msr_num != 0) { 2976 assert(!no_msr); 2977 if (get_msr(cpu, mp->msr_num, counterp)) 2978 return -1; 2979 } else { 2980 char path[128 + PATH_BYTES]; 2981 2982 if (mp->flags & SYSFS_PERCPU) { 2983 sprintf(path, "/sys/devices/system/cpu/cpu%d/%s", cpu, mp->path); 2984 2985 *counterp = snapshot_sysfs_counter(path); 2986 } else { 2987 *counterp = snapshot_sysfs_counter(mp->path); 2988 } 2989 } 2990 2991 return 0; 2992 } 2993 2994 unsigned long long get_uncore_mhz(int package, int die) 2995 { 2996 char path[128]; 2997 2998 sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d/current_freq_khz", package, 2999 die); 3000 3001 return (snapshot_sysfs_counter(path) / 1000); 3002 } 3003 3004 int get_epb(int cpu) 3005 { 3006 char path[128 + PATH_BYTES]; 3007 unsigned long long msr; 3008 int ret, epb = -1; 3009 FILE *fp; 3010 3011 sprintf(path, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", cpu); 3012 3013 fp = fopen(path, "r"); 3014 if (!fp) 3015 goto msr_fallback; 3016 3017 ret = fscanf(fp, "%d", &epb); 3018 if (ret != 1) 3019 err(1, "%s(%s)", __func__, path); 3020 3021 fclose(fp); 3022 3023 return epb; 3024 3025 msr_fallback: 3026 if (no_msr) 3027 return -1; 3028 3029 get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr); 3030 3031 return msr & 0xf; 3032 } 3033 3034 void get_apic_id(struct thread_data *t) 3035 { 3036 unsigned int eax, ebx, ecx, edx; 3037 3038 if (DO_BIC(BIC_APIC)) { 3039 eax = ebx = ecx = edx = 0; 3040 __cpuid(1, eax, ebx, ecx, edx); 3041 3042 t->apic_id = (ebx >> 24) & 0xff; 3043 } 3044 3045 if (!DO_BIC(BIC_X2APIC)) 3046 return; 3047 3048 if (authentic_amd || hygon_genuine) { 3049 unsigned int topology_extensions; 3050 3051 if (max_extended_level < 0x8000001e) 3052 return; 3053 3054 eax = ebx = ecx = edx = 0; 3055 __cpuid(0x80000001, eax, ebx, ecx, edx); 3056 topology_extensions = ecx & (1 << 22); 3057 3058 if (topology_extensions == 0) 3059 return; 3060 3061 eax = ebx = ecx = edx = 0; 3062 __cpuid(0x8000001e, eax, ebx, ecx, edx); 3063 3064 t->x2apic_id = eax; 3065 return; 3066 } 3067 3068 if (!genuine_intel) 3069 return; 3070 3071 if (max_level < 0xb) 3072 return; 3073 3074 ecx = 0; 3075 __cpuid(0xb, eax, ebx, ecx, edx); 3076 t->x2apic_id = edx; 3077 3078 if (debug && (t->apic_id != (t->x2apic_id & 0xff))) 3079 fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id); 3080 } 3081 3082 int get_core_throt_cnt(int cpu, unsigned long long *cnt) 3083 { 3084 char path[128 + PATH_BYTES]; 3085 unsigned long long tmp; 3086 FILE *fp; 3087 int ret; 3088 3089 sprintf(path, "/sys/devices/system/cpu/cpu%d/thermal_throttle/core_throttle_count", cpu); 3090 fp = fopen(path, "r"); 3091 if (!fp) 3092 return -1; 3093 ret = fscanf(fp, "%lld", &tmp); 3094 fclose(fp); 3095 if (ret != 1) 3096 return -1; 3097 *cnt = tmp; 3098 3099 return 0; 3100 } 3101 3102 struct amperf_group_fd { 3103 int aperf; /* Also the group descriptor */ 3104 int mperf; 3105 }; 3106 3107 static int read_perf_counter_info(const char *const path, const char *const parse_format, void *value_ptr) 3108 { 3109 int fdmt; 3110 int bytes_read; 3111 char buf[64]; 3112 int ret = -1; 3113 3114 fdmt = open(path, O_RDONLY, 0); 3115 if (fdmt == -1) { 3116 if (debug) 3117 fprintf(stderr, "Failed to parse perf counter info %s\n", path); 3118 ret = -1; 3119 goto cleanup_and_exit; 3120 } 3121 3122 bytes_read = read(fdmt, buf, sizeof(buf) - 1); 3123 if (bytes_read <= 0 || bytes_read >= (int)sizeof(buf)) { 3124 if (debug) 3125 fprintf(stderr, "Failed to parse perf counter info %s\n", path); 3126 ret = -1; 3127 goto cleanup_and_exit; 3128 } 3129 3130 buf[bytes_read] = '\0'; 3131 3132 if (sscanf(buf, parse_format, value_ptr) != 1) { 3133 if (debug) 3134 fprintf(stderr, "Failed to parse perf counter info %s\n", path); 3135 ret = -1; 3136 goto cleanup_and_exit; 3137 } 3138 3139 ret = 0; 3140 3141 cleanup_and_exit: 3142 close(fdmt); 3143 return ret; 3144 } 3145 3146 static unsigned int read_perf_counter_info_n(const char *const path, const char *const parse_format) 3147 { 3148 unsigned int v; 3149 int status; 3150 3151 status = read_perf_counter_info(path, parse_format, &v); 3152 if (status) 3153 v = -1; 3154 3155 return v; 3156 } 3157 3158 static unsigned int read_msr_type(void) 3159 { 3160 const char *const path = "/sys/bus/event_source/devices/msr/type"; 3161 const char *const format = "%u"; 3162 3163 return read_perf_counter_info_n(path, format); 3164 } 3165 3166 static unsigned int read_aperf_config(void) 3167 { 3168 const char *const path = "/sys/bus/event_source/devices/msr/events/aperf"; 3169 const char *const format = "event=%x"; 3170 3171 return read_perf_counter_info_n(path, format); 3172 } 3173 3174 static unsigned int read_mperf_config(void) 3175 { 3176 const char *const path = "/sys/bus/event_source/devices/msr/events/mperf"; 3177 const char *const format = "event=%x"; 3178 3179 return read_perf_counter_info_n(path, format); 3180 } 3181 3182 static unsigned int read_perf_type(const char *subsys) 3183 { 3184 const char *const path_format = "/sys/bus/event_source/devices/%s/type"; 3185 const char *const format = "%u"; 3186 char path[128]; 3187 3188 snprintf(path, sizeof(path), path_format, subsys); 3189 3190 return read_perf_counter_info_n(path, format); 3191 } 3192 3193 static unsigned int read_rapl_config(const char *subsys, const char *event_name) 3194 { 3195 const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s"; 3196 const char *const format = "event=%x"; 3197 char path[128]; 3198 3199 snprintf(path, sizeof(path), path_format, subsys, event_name); 3200 3201 return read_perf_counter_info_n(path, format); 3202 } 3203 3204 static unsigned int read_perf_rapl_unit(const char *subsys, const char *event_name) 3205 { 3206 const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.unit"; 3207 const char *const format = "%s"; 3208 char path[128]; 3209 char unit_buffer[16]; 3210 3211 snprintf(path, sizeof(path), path_format, subsys, event_name); 3212 3213 read_perf_counter_info(path, format, &unit_buffer); 3214 if (strcmp("Joules", unit_buffer) == 0) 3215 return RAPL_UNIT_JOULES; 3216 3217 return RAPL_UNIT_INVALID; 3218 } 3219 3220 static double read_perf_rapl_scale(const char *subsys, const char *event_name) 3221 { 3222 const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.scale"; 3223 const char *const format = "%lf"; 3224 char path[128]; 3225 double scale; 3226 3227 snprintf(path, sizeof(path), path_format, subsys, event_name); 3228 3229 if (read_perf_counter_info(path, format, &scale)) 3230 return 0.0; 3231 3232 return scale; 3233 } 3234 3235 static struct amperf_group_fd open_amperf_fd(int cpu) 3236 { 3237 const unsigned int msr_type = read_msr_type(); 3238 const unsigned int aperf_config = read_aperf_config(); 3239 const unsigned int mperf_config = read_mperf_config(); 3240 struct amperf_group_fd fds = {.aperf = -1, .mperf = -1 }; 3241 3242 fds.aperf = open_perf_counter(cpu, msr_type, aperf_config, -1, PERF_FORMAT_GROUP); 3243 fds.mperf = open_perf_counter(cpu, msr_type, mperf_config, fds.aperf, PERF_FORMAT_GROUP); 3244 3245 return fds; 3246 } 3247 3248 static int get_amperf_fd(int cpu) 3249 { 3250 assert(fd_amperf_percpu); 3251 3252 if (fd_amperf_percpu[cpu].aperf) 3253 return fd_amperf_percpu[cpu].aperf; 3254 3255 fd_amperf_percpu[cpu] = open_amperf_fd(cpu); 3256 3257 return fd_amperf_percpu[cpu].aperf; 3258 } 3259 3260 /* Read APERF, MPERF and TSC using the perf API. */ 3261 static int read_aperf_mperf_tsc_perf(struct thread_data *t, int cpu) 3262 { 3263 union { 3264 struct { 3265 unsigned long nr_entries; 3266 unsigned long aperf; 3267 unsigned long mperf; 3268 }; 3269 3270 unsigned long as_array[3]; 3271 } cnt; 3272 3273 const int fd_amperf = get_amperf_fd(cpu); 3274 3275 /* 3276 * Read the TSC with rdtsc, because we want the absolute value and not 3277 * the offset from the start of the counter. 3278 */ 3279 t->tsc = rdtsc(); 3280 3281 const int n = read(fd_amperf, &cnt.as_array[0], sizeof(cnt.as_array)); 3282 3283 if (n != sizeof(cnt.as_array)) 3284 return -2; 3285 3286 t->aperf = cnt.aperf * aperf_mperf_multiplier; 3287 t->mperf = cnt.mperf * aperf_mperf_multiplier; 3288 3289 return 0; 3290 } 3291 3292 /* Read APERF, MPERF and TSC using the MSR driver and rdtsc instruction. */ 3293 static int read_aperf_mperf_tsc_msr(struct thread_data *t, int cpu) 3294 { 3295 unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time; 3296 int aperf_mperf_retry_count = 0; 3297 3298 /* 3299 * The TSC, APERF and MPERF must be read together for 3300 * APERF/MPERF and MPERF/TSC to give accurate results. 3301 * 3302 * Unfortunately, APERF and MPERF are read by 3303 * individual system call, so delays may occur 3304 * between them. If the time to read them 3305 * varies by a large amount, we re-read them. 3306 */ 3307 3308 /* 3309 * This initial dummy APERF read has been seen to 3310 * reduce jitter in the subsequent reads. 3311 */ 3312 3313 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) 3314 return -3; 3315 3316 retry: 3317 t->tsc = rdtsc(); /* re-read close to APERF */ 3318 3319 tsc_before = t->tsc; 3320 3321 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) 3322 return -3; 3323 3324 tsc_between = rdtsc(); 3325 3326 if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf)) 3327 return -4; 3328 3329 tsc_after = rdtsc(); 3330 3331 aperf_time = tsc_between - tsc_before; 3332 mperf_time = tsc_after - tsc_between; 3333 3334 /* 3335 * If the system call latency to read APERF and MPERF 3336 * differ by more than 2x, then try again. 3337 */ 3338 if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) { 3339 aperf_mperf_retry_count++; 3340 if (aperf_mperf_retry_count < 5) 3341 goto retry; 3342 else 3343 warnx("cpu%d jitter %lld %lld", cpu, aperf_time, mperf_time); 3344 } 3345 aperf_mperf_retry_count = 0; 3346 3347 t->aperf = t->aperf * aperf_mperf_multiplier; 3348 t->mperf = t->mperf * aperf_mperf_multiplier; 3349 3350 return 0; 3351 } 3352 3353 size_t rapl_counter_info_count_perf(const struct rapl_counter_info_t *rci) 3354 { 3355 size_t ret = 0; 3356 3357 for (int i = 0; i < NUM_RAPL_COUNTERS; ++i) 3358 if (rci->source[i] == RAPL_SOURCE_PERF) 3359 ++ret; 3360 3361 return ret; 3362 } 3363 3364 void write_rapl_counter(struct rapl_counter *rc, struct rapl_counter_info_t *rci, unsigned int idx) 3365 { 3366 rc->raw_value = rci->data[idx]; 3367 rc->unit = rci->unit[idx]; 3368 rc->scale = rci->scale[idx]; 3369 } 3370 3371 int get_rapl_counters(int cpu, int domain, struct core_data *c, struct pkg_data *p) 3372 { 3373 unsigned long long perf_data[NUM_RAPL_COUNTERS + 1]; 3374 struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[domain]; 3375 3376 if (debug) 3377 fprintf(stderr, "%s: cpu%d domain%d\n", __func__, cpu, domain); 3378 3379 assert(rapl_counter_info_perdomain); 3380 3381 /* 3382 * If we have any perf counters to read, read them all now, in bulk 3383 */ 3384 if (rci->fd_perf != -1) { 3385 size_t num_perf_counters = rapl_counter_info_count_perf(rci); 3386 const ssize_t expected_read_size = (num_perf_counters + 1) * sizeof(unsigned long long); 3387 const ssize_t actual_read_size = read(rci->fd_perf, &perf_data[0], sizeof(perf_data)); 3388 3389 if (actual_read_size != expected_read_size) 3390 err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, 3391 actual_read_size); 3392 } 3393 3394 for (unsigned int i = 0, pi = 1; i < NUM_RAPL_COUNTERS; ++i) { 3395 switch (rci->source[i]) { 3396 case RAPL_SOURCE_NONE: 3397 break; 3398 3399 case RAPL_SOURCE_PERF: 3400 assert(pi < ARRAY_SIZE(perf_data)); 3401 assert(rci->fd_perf != -1); 3402 3403 if (debug) 3404 fprintf(stderr, "Reading rapl counter via perf at %u (%llu %e %lf)\n", 3405 i, perf_data[pi], rci->scale[i], perf_data[pi] * rci->scale[i]); 3406 3407 rci->data[i] = perf_data[pi]; 3408 3409 ++pi; 3410 break; 3411 3412 case RAPL_SOURCE_MSR: 3413 if (debug) 3414 fprintf(stderr, "Reading rapl counter via msr at %u\n", i); 3415 3416 assert(!no_msr); 3417 if (rci->flags[i] & RAPL_COUNTER_FLAG_USE_MSR_SUM) { 3418 if (get_msr_sum(cpu, rci->msr[i], &rci->data[i])) 3419 return -13 - i; 3420 } else { 3421 if (get_msr(cpu, rci->msr[i], &rci->data[i])) 3422 return -13 - i; 3423 } 3424 3425 rci->data[i] &= rci->msr_mask[i]; 3426 if (rci->msr_shift[i] >= 0) 3427 rci->data[i] >>= abs(rci->msr_shift[i]); 3428 else 3429 rci->data[i] <<= abs(rci->msr_shift[i]); 3430 3431 break; 3432 } 3433 } 3434 3435 _Static_assert(NUM_RAPL_COUNTERS == 7); 3436 write_rapl_counter(&p->energy_pkg, rci, RAPL_RCI_INDEX_ENERGY_PKG); 3437 write_rapl_counter(&p->energy_cores, rci, RAPL_RCI_INDEX_ENERGY_CORES); 3438 write_rapl_counter(&p->energy_dram, rci, RAPL_RCI_INDEX_DRAM); 3439 write_rapl_counter(&p->energy_gfx, rci, RAPL_RCI_INDEX_GFX); 3440 write_rapl_counter(&p->rapl_pkg_perf_status, rci, RAPL_RCI_INDEX_PKG_PERF_STATUS); 3441 write_rapl_counter(&p->rapl_dram_perf_status, rci, RAPL_RCI_INDEX_DRAM_PERF_STATUS); 3442 write_rapl_counter(&c->core_energy, rci, RAPL_RCI_INDEX_CORE_ENERGY); 3443 3444 return 0; 3445 } 3446 3447 /* 3448 * get_counters(...) 3449 * migrate to cpu 3450 * acquire and record local counters for that cpu 3451 */ 3452 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 3453 { 3454 int cpu = t->cpu_id; 3455 unsigned long long msr; 3456 struct msr_counter *mp; 3457 int i; 3458 int status; 3459 3460 if (cpu_migrate(cpu)) { 3461 fprintf(outf, "%s: Could not migrate to CPU %d\n", __func__, cpu); 3462 return -1; 3463 } 3464 3465 gettimeofday(&t->tv_begin, (struct timezone *)NULL); 3466 3467 if (first_counter_read) 3468 get_apic_id(t); 3469 3470 t->tsc = rdtsc(); /* we are running on local CPU of interest */ 3471 3472 if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC) 3473 || soft_c1_residency_display(BIC_Avg_MHz)) { 3474 int status = -1; 3475 3476 assert(!no_perf || !no_msr); 3477 3478 switch (amperf_source) { 3479 case AMPERF_SOURCE_PERF: 3480 status = read_aperf_mperf_tsc_perf(t, cpu); 3481 break; 3482 case AMPERF_SOURCE_MSR: 3483 status = read_aperf_mperf_tsc_msr(t, cpu); 3484 break; 3485 } 3486 3487 if (status != 0) 3488 return status; 3489 } 3490 3491 if (DO_BIC(BIC_IPC)) 3492 if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long)) 3493 return -4; 3494 3495 if (DO_BIC(BIC_IRQ)) 3496 t->irq_count = irqs_per_cpu[cpu]; 3497 if (DO_BIC(BIC_SMI)) { 3498 if (get_msr(cpu, MSR_SMI_COUNT, &msr)) 3499 return -5; 3500 t->smi_count = msr & 0xFFFFFFFF; 3501 } 3502 if (DO_BIC(BIC_CPU_c1) && platform->has_msr_core_c1_res) { 3503 if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1)) 3504 return -6; 3505 } 3506 3507 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 3508 if (get_mp(cpu, mp, &t->counter[i])) 3509 return -10; 3510 } 3511 3512 /* collect core counters only for 1st thread in core */ 3513 if (!is_cpu_first_thread_in_core(t, c, p)) 3514 goto done; 3515 3516 if (platform->has_per_core_rapl) { 3517 status = get_rapl_counters(cpu, c->core_id, c, p); 3518 if (status != 0) 3519 return status; 3520 } 3521 3522 if (DO_BIC(BIC_CPU_c3) || soft_c1_residency_display(BIC_CPU_c3)) { 3523 if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) 3524 return -6; 3525 } 3526 3527 if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !platform->has_msr_knl_core_c6_residency) { 3528 if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6)) 3529 return -7; 3530 } else if (platform->has_msr_knl_core_c6_residency && soft_c1_residency_display(BIC_CPU_c6)) { 3531 if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6)) 3532 return -7; 3533 } 3534 3535 if (DO_BIC(BIC_CPU_c7) || soft_c1_residency_display(BIC_CPU_c7)) { 3536 if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7)) 3537 return -8; 3538 else if (t->is_atom) { 3539 /* 3540 * For Atom CPUs that has core cstate deeper than c6, 3541 * MSR_CORE_C6_RESIDENCY returns residency of cc6 and deeper. 3542 * Minus CC7 (and deeper cstates) residency to get 3543 * accturate cc6 residency. 3544 */ 3545 c->c6 -= c->c7; 3546 } 3547 } 3548 3549 if (DO_BIC(BIC_Mod_c6)) 3550 if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us)) 3551 return -8; 3552 3553 if (DO_BIC(BIC_CoreTmp)) { 3554 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) 3555 return -9; 3556 c->core_temp_c = tj_max - ((msr >> 16) & 0x7F); 3557 } 3558 3559 if (DO_BIC(BIC_CORE_THROT_CNT)) 3560 get_core_throt_cnt(cpu, &c->core_throt_cnt); 3561 3562 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 3563 if (get_mp(cpu, mp, &c->counter[i])) 3564 return -10; 3565 } 3566 3567 /* collect package counters only for 1st core in package */ 3568 if (!is_cpu_first_core_in_package(t, c, p)) 3569 goto done; 3570 3571 if (DO_BIC(BIC_Totl_c0)) { 3572 if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0)) 3573 return -10; 3574 } 3575 if (DO_BIC(BIC_Any_c0)) { 3576 if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0)) 3577 return -11; 3578 } 3579 if (DO_BIC(BIC_GFX_c0)) { 3580 if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0)) 3581 return -12; 3582 } 3583 if (DO_BIC(BIC_CPUGFX)) { 3584 if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0)) 3585 return -13; 3586 } 3587 if (DO_BIC(BIC_Pkgpc3)) 3588 if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3)) 3589 return -9; 3590 if (DO_BIC(BIC_Pkgpc6)) { 3591 if (platform->has_msr_atom_pkg_c6_residency) { 3592 if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6)) 3593 return -10; 3594 } else { 3595 if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6)) 3596 return -10; 3597 } 3598 } 3599 3600 if (DO_BIC(BIC_Pkgpc2)) 3601 if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2)) 3602 return -11; 3603 if (DO_BIC(BIC_Pkgpc7)) 3604 if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7)) 3605 return -12; 3606 if (DO_BIC(BIC_Pkgpc8)) 3607 if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8)) 3608 return -13; 3609 if (DO_BIC(BIC_Pkgpc9)) 3610 if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9)) 3611 return -13; 3612 if (DO_BIC(BIC_Pkgpc10)) 3613 if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10)) 3614 return -13; 3615 3616 if (DO_BIC(BIC_CPU_LPI)) 3617 p->cpu_lpi = cpuidle_cur_cpu_lpi_us; 3618 if (DO_BIC(BIC_SYS_LPI)) 3619 p->sys_lpi = cpuidle_cur_sys_lpi_us; 3620 3621 if (!platform->has_per_core_rapl) { 3622 status = get_rapl_counters(cpu, p->package_id, c, p); 3623 if (status != 0) 3624 return status; 3625 } 3626 3627 if (DO_BIC(BIC_PkgTmp)) { 3628 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) 3629 return -17; 3630 p->pkg_temp_c = tj_max - ((msr >> 16) & 0x7F); 3631 } 3632 3633 /* n.b. assume die0 uncore frequency applies to whole package */ 3634 if (DO_BIC(BIC_UNCORE_MHZ)) 3635 p->uncore_mhz = get_uncore_mhz(p->package_id, 0); 3636 3637 if (DO_BIC(BIC_GFX_rc6)) 3638 p->gfx_rc6_ms = gfx_info[GFX_rc6].val_ull; 3639 3640 if (DO_BIC(BIC_GFXMHz)) 3641 p->gfx_mhz = gfx_info[GFX_MHz].val; 3642 3643 if (DO_BIC(BIC_GFXACTMHz)) 3644 p->gfx_act_mhz = gfx_info[GFX_ACTMHz].val; 3645 3646 if (DO_BIC(BIC_SAM_mc6)) 3647 p->sam_mc6_ms = gfx_info[SAM_mc6].val_ull; 3648 3649 if (DO_BIC(BIC_SAMMHz)) 3650 p->sam_mhz = gfx_info[SAM_MHz].val; 3651 3652 if (DO_BIC(BIC_SAMACTMHz)) 3653 p->sam_act_mhz = gfx_info[SAM_ACTMHz].val; 3654 3655 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 3656 if (get_mp(cpu, mp, &p->counter[i])) 3657 return -10; 3658 } 3659 done: 3660 gettimeofday(&t->tv_end, (struct timezone *)NULL); 3661 3662 return 0; 3663 } 3664 3665 /* 3666 * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit: 3667 * If you change the values, note they are used both in comparisons 3668 * (>= PCL__7) and to index pkg_cstate_limit_strings[]. 3669 */ 3670 3671 #define PCLUKN 0 /* Unknown */ 3672 #define PCLRSV 1 /* Reserved */ 3673 #define PCL__0 2 /* PC0 */ 3674 #define PCL__1 3 /* PC1 */ 3675 #define PCL__2 4 /* PC2 */ 3676 #define PCL__3 5 /* PC3 */ 3677 #define PCL__4 6 /* PC4 */ 3678 #define PCL__6 7 /* PC6 */ 3679 #define PCL_6N 8 /* PC6 No Retention */ 3680 #define PCL_6R 9 /* PC6 Retention */ 3681 #define PCL__7 10 /* PC7 */ 3682 #define PCL_7S 11 /* PC7 Shrink */ 3683 #define PCL__8 12 /* PC8 */ 3684 #define PCL__9 13 /* PC9 */ 3685 #define PCL_10 14 /* PC10 */ 3686 #define PCLUNL 15 /* Unlimited */ 3687 3688 int pkg_cstate_limit = PCLUKN; 3689 char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2", 3690 "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited" 3691 }; 3692 3693 int nhm_pkg_cstate_limits[16] = 3694 { PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 3695 PCLRSV, PCLRSV 3696 }; 3697 3698 int snb_pkg_cstate_limits[16] = 3699 { PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 3700 PCLRSV, PCLRSV 3701 }; 3702 3703 int hsw_pkg_cstate_limits[16] = 3704 { PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 3705 PCLRSV, PCLRSV 3706 }; 3707 3708 int slv_pkg_cstate_limits[16] = 3709 { PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 3710 PCL__6, PCL__7 3711 }; 3712 3713 int amt_pkg_cstate_limits[16] = 3714 { PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 3715 PCLRSV, PCLRSV 3716 }; 3717 3718 int phi_pkg_cstate_limits[16] = 3719 { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 3720 PCLRSV, PCLRSV 3721 }; 3722 3723 int glm_pkg_cstate_limits[16] = 3724 { PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 3725 PCLRSV, PCLRSV 3726 }; 3727 3728 int skx_pkg_cstate_limits[16] = 3729 { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 3730 PCLRSV, PCLRSV 3731 }; 3732 3733 int icx_pkg_cstate_limits[16] = 3734 { PCL__0, PCL__2, PCL__6, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 3735 PCLRSV, PCLRSV 3736 }; 3737 3738 void probe_cst_limit(void) 3739 { 3740 unsigned long long msr; 3741 int *pkg_cstate_limits; 3742 3743 if (!platform->has_nhm_msrs || no_msr) 3744 return; 3745 3746 switch (platform->cst_limit) { 3747 case CST_LIMIT_NHM: 3748 pkg_cstate_limits = nhm_pkg_cstate_limits; 3749 break; 3750 case CST_LIMIT_SNB: 3751 pkg_cstate_limits = snb_pkg_cstate_limits; 3752 break; 3753 case CST_LIMIT_HSW: 3754 pkg_cstate_limits = hsw_pkg_cstate_limits; 3755 break; 3756 case CST_LIMIT_SKX: 3757 pkg_cstate_limits = skx_pkg_cstate_limits; 3758 break; 3759 case CST_LIMIT_ICX: 3760 pkg_cstate_limits = icx_pkg_cstate_limits; 3761 break; 3762 case CST_LIMIT_SLV: 3763 pkg_cstate_limits = slv_pkg_cstate_limits; 3764 break; 3765 case CST_LIMIT_AMT: 3766 pkg_cstate_limits = amt_pkg_cstate_limits; 3767 break; 3768 case CST_LIMIT_KNL: 3769 pkg_cstate_limits = phi_pkg_cstate_limits; 3770 break; 3771 case CST_LIMIT_GMT: 3772 pkg_cstate_limits = glm_pkg_cstate_limits; 3773 break; 3774 default: 3775 return; 3776 } 3777 3778 get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr); 3779 pkg_cstate_limit = pkg_cstate_limits[msr & 0xF]; 3780 } 3781 3782 static void dump_platform_info(void) 3783 { 3784 unsigned long long msr; 3785 unsigned int ratio; 3786 3787 if (!platform->has_nhm_msrs || no_msr) 3788 return; 3789 3790 get_msr(base_cpu, MSR_PLATFORM_INFO, &msr); 3791 3792 fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr); 3793 3794 ratio = (msr >> 40) & 0xFF; 3795 fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n", ratio, bclk, ratio * bclk); 3796 3797 ratio = (msr >> 8) & 0xFF; 3798 fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk); 3799 } 3800 3801 static void dump_power_ctl(void) 3802 { 3803 unsigned long long msr; 3804 3805 if (!platform->has_nhm_msrs || no_msr) 3806 return; 3807 3808 get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr); 3809 fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", 3810 base_cpu, msr, msr & 0x2 ? "EN" : "DIS"); 3811 3812 /* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */ 3813 if (platform->has_cst_prewake_bit) 3814 fprintf(outf, "C-state Pre-wake: %sabled\n", msr & 0x40000000 ? "DIS" : "EN"); 3815 3816 return; 3817 } 3818 3819 static void dump_turbo_ratio_limit2(void) 3820 { 3821 unsigned long long msr; 3822 unsigned int ratio; 3823 3824 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr); 3825 3826 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr); 3827 3828 ratio = (msr >> 8) & 0xFF; 3829 if (ratio) 3830 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n", ratio, bclk, ratio * bclk); 3831 3832 ratio = (msr >> 0) & 0xFF; 3833 if (ratio) 3834 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n", ratio, bclk, ratio * bclk); 3835 return; 3836 } 3837 3838 static void dump_turbo_ratio_limit1(void) 3839 { 3840 unsigned long long msr; 3841 unsigned int ratio; 3842 3843 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr); 3844 3845 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr); 3846 3847 ratio = (msr >> 56) & 0xFF; 3848 if (ratio) 3849 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n", ratio, bclk, ratio * bclk); 3850 3851 ratio = (msr >> 48) & 0xFF; 3852 if (ratio) 3853 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n", ratio, bclk, ratio * bclk); 3854 3855 ratio = (msr >> 40) & 0xFF; 3856 if (ratio) 3857 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n", ratio, bclk, ratio * bclk); 3858 3859 ratio = (msr >> 32) & 0xFF; 3860 if (ratio) 3861 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n", ratio, bclk, ratio * bclk); 3862 3863 ratio = (msr >> 24) & 0xFF; 3864 if (ratio) 3865 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n", ratio, bclk, ratio * bclk); 3866 3867 ratio = (msr >> 16) & 0xFF; 3868 if (ratio) 3869 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n", ratio, bclk, ratio * bclk); 3870 3871 ratio = (msr >> 8) & 0xFF; 3872 if (ratio) 3873 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n", ratio, bclk, ratio * bclk); 3874 3875 ratio = (msr >> 0) & 0xFF; 3876 if (ratio) 3877 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n", ratio, bclk, ratio * bclk); 3878 return; 3879 } 3880 3881 static void dump_turbo_ratio_limits(int trl_msr_offset) 3882 { 3883 unsigned long long msr, core_counts; 3884 int shift; 3885 3886 get_msr(base_cpu, trl_msr_offset, &msr); 3887 fprintf(outf, "cpu%d: MSR_%sTURBO_RATIO_LIMIT: 0x%08llx\n", 3888 base_cpu, trl_msr_offset == MSR_SECONDARY_TURBO_RATIO_LIMIT ? "SECONDARY_" : "", msr); 3889 3890 if (platform->trl_msrs & TRL_CORECOUNT) { 3891 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts); 3892 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts); 3893 } else { 3894 core_counts = 0x0807060504030201; 3895 } 3896 3897 for (shift = 56; shift >= 0; shift -= 8) { 3898 unsigned int ratio, group_size; 3899 3900 ratio = (msr >> shift) & 0xFF; 3901 group_size = (core_counts >> shift) & 0xFF; 3902 if (ratio) 3903 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n", 3904 ratio, bclk, ratio * bclk, group_size); 3905 } 3906 3907 return; 3908 } 3909 3910 static void dump_atom_turbo_ratio_limits(void) 3911 { 3912 unsigned long long msr; 3913 unsigned int ratio; 3914 3915 get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr); 3916 fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF); 3917 3918 ratio = (msr >> 0) & 0x3F; 3919 if (ratio) 3920 fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n", ratio, bclk, ratio * bclk); 3921 3922 ratio = (msr >> 8) & 0x3F; 3923 if (ratio) 3924 fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n", ratio, bclk, ratio * bclk); 3925 3926 ratio = (msr >> 16) & 0x3F; 3927 if (ratio) 3928 fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk); 3929 3930 get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr); 3931 fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF); 3932 3933 ratio = (msr >> 24) & 0x3F; 3934 if (ratio) 3935 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n", ratio, bclk, ratio * bclk); 3936 3937 ratio = (msr >> 16) & 0x3F; 3938 if (ratio) 3939 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n", ratio, bclk, ratio * bclk); 3940 3941 ratio = (msr >> 8) & 0x3F; 3942 if (ratio) 3943 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n", ratio, bclk, ratio * bclk); 3944 3945 ratio = (msr >> 0) & 0x3F; 3946 if (ratio) 3947 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n", ratio, bclk, ratio * bclk); 3948 } 3949 3950 static void dump_knl_turbo_ratio_limits(void) 3951 { 3952 const unsigned int buckets_no = 7; 3953 3954 unsigned long long msr; 3955 int delta_cores, delta_ratio; 3956 int i, b_nr; 3957 unsigned int cores[buckets_no]; 3958 unsigned int ratio[buckets_no]; 3959 3960 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr); 3961 3962 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr); 3963 3964 /* 3965 * Turbo encoding in KNL is as follows: 3966 * [0] -- Reserved 3967 * [7:1] -- Base value of number of active cores of bucket 1. 3968 * [15:8] -- Base value of freq ratio of bucket 1. 3969 * [20:16] -- +ve delta of number of active cores of bucket 2. 3970 * i.e. active cores of bucket 2 = 3971 * active cores of bucket 1 + delta 3972 * [23:21] -- Negative delta of freq ratio of bucket 2. 3973 * i.e. freq ratio of bucket 2 = 3974 * freq ratio of bucket 1 - delta 3975 * [28:24]-- +ve delta of number of active cores of bucket 3. 3976 * [31:29]-- -ve delta of freq ratio of bucket 3. 3977 * [36:32]-- +ve delta of number of active cores of bucket 4. 3978 * [39:37]-- -ve delta of freq ratio of bucket 4. 3979 * [44:40]-- +ve delta of number of active cores of bucket 5. 3980 * [47:45]-- -ve delta of freq ratio of bucket 5. 3981 * [52:48]-- +ve delta of number of active cores of bucket 6. 3982 * [55:53]-- -ve delta of freq ratio of bucket 6. 3983 * [60:56]-- +ve delta of number of active cores of bucket 7. 3984 * [63:61]-- -ve delta of freq ratio of bucket 7. 3985 */ 3986 3987 b_nr = 0; 3988 cores[b_nr] = (msr & 0xFF) >> 1; 3989 ratio[b_nr] = (msr >> 8) & 0xFF; 3990 3991 for (i = 16; i < 64; i += 8) { 3992 delta_cores = (msr >> i) & 0x1F; 3993 delta_ratio = (msr >> (i + 5)) & 0x7; 3994 3995 cores[b_nr + 1] = cores[b_nr] + delta_cores; 3996 ratio[b_nr + 1] = ratio[b_nr] - delta_ratio; 3997 b_nr++; 3998 } 3999 4000 for (i = buckets_no - 1; i >= 0; i--) 4001 if (i > 0 ? ratio[i] != ratio[i - 1] : 1) 4002 fprintf(outf, 4003 "%d * %.1f = %.1f MHz max turbo %d active cores\n", 4004 ratio[i], bclk, ratio[i] * bclk, cores[i]); 4005 } 4006 4007 static void dump_cst_cfg(void) 4008 { 4009 unsigned long long msr; 4010 4011 if (!platform->has_nhm_msrs || no_msr) 4012 return; 4013 4014 get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr); 4015 4016 fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr); 4017 4018 fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)", 4019 (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", 4020 (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", 4021 (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", 4022 (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", 4023 (msr & (1 << 15)) ? "" : "UN", (unsigned int)msr & 0xF, pkg_cstate_limit_strings[pkg_cstate_limit]); 4024 4025 #define AUTOMATIC_CSTATE_CONVERSION (1UL << 16) 4026 if (platform->has_cst_auto_convension) { 4027 fprintf(outf, ", automatic c-state conversion=%s", (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off"); 4028 } 4029 4030 fprintf(outf, ")\n"); 4031 4032 return; 4033 } 4034 4035 static void dump_config_tdp(void) 4036 { 4037 unsigned long long msr; 4038 4039 get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr); 4040 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr); 4041 fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF); 4042 4043 get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr); 4044 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr); 4045 if (msr) { 4046 fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF); 4047 fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF); 4048 fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF); 4049 fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF); 4050 } 4051 fprintf(outf, ")\n"); 4052 4053 get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr); 4054 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr); 4055 if (msr) { 4056 fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF); 4057 fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF); 4058 fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF); 4059 fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF); 4060 } 4061 fprintf(outf, ")\n"); 4062 4063 get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr); 4064 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr); 4065 if ((msr) & 0x3) 4066 fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3); 4067 fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1); 4068 fprintf(outf, ")\n"); 4069 4070 get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr); 4071 fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr); 4072 fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF); 4073 fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1); 4074 fprintf(outf, ")\n"); 4075 } 4076 4077 unsigned int irtl_time_units[] = { 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 }; 4078 4079 void print_irtl(void) 4080 { 4081 unsigned long long msr; 4082 4083 if (!platform->has_irtl_msrs || no_msr) 4084 return; 4085 4086 if (platform->supported_cstates & PC3) { 4087 get_msr(base_cpu, MSR_PKGC3_IRTL, &msr); 4088 fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr); 4089 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 4090 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 4091 } 4092 4093 if (platform->supported_cstates & PC6) { 4094 get_msr(base_cpu, MSR_PKGC6_IRTL, &msr); 4095 fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr); 4096 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 4097 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 4098 } 4099 4100 if (platform->supported_cstates & PC7) { 4101 get_msr(base_cpu, MSR_PKGC7_IRTL, &msr); 4102 fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr); 4103 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 4104 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 4105 } 4106 4107 if (platform->supported_cstates & PC8) { 4108 get_msr(base_cpu, MSR_PKGC8_IRTL, &msr); 4109 fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr); 4110 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 4111 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 4112 } 4113 4114 if (platform->supported_cstates & PC9) { 4115 get_msr(base_cpu, MSR_PKGC9_IRTL, &msr); 4116 fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr); 4117 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 4118 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 4119 } 4120 4121 if (platform->supported_cstates & PC10) { 4122 get_msr(base_cpu, MSR_PKGC10_IRTL, &msr); 4123 fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr); 4124 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 4125 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 4126 } 4127 } 4128 4129 void free_fd_percpu(void) 4130 { 4131 int i; 4132 4133 if (!fd_percpu) 4134 return; 4135 4136 for (i = 0; i < topo.max_cpu_num + 1; ++i) { 4137 if (fd_percpu[i] != 0) 4138 close(fd_percpu[i]); 4139 } 4140 4141 free(fd_percpu); 4142 fd_percpu = NULL; 4143 } 4144 4145 void free_fd_amperf_percpu(void) 4146 { 4147 int i; 4148 4149 if (!fd_amperf_percpu) 4150 return; 4151 4152 for (i = 0; i < topo.max_cpu_num + 1; ++i) { 4153 if (fd_amperf_percpu[i].mperf != 0) 4154 close(fd_amperf_percpu[i].mperf); 4155 4156 if (fd_amperf_percpu[i].aperf != 0) 4157 close(fd_amperf_percpu[i].aperf); 4158 } 4159 4160 free(fd_amperf_percpu); 4161 fd_amperf_percpu = NULL; 4162 } 4163 4164 void free_fd_instr_count_percpu(void) 4165 { 4166 if (!fd_instr_count_percpu) 4167 return; 4168 4169 for (int i = 0; i < topo.max_cpu_num + 1; ++i) { 4170 if (fd_instr_count_percpu[i] != 0) 4171 close(fd_instr_count_percpu[i]); 4172 } 4173 4174 free(fd_instr_count_percpu); 4175 fd_instr_count_percpu = NULL; 4176 } 4177 4178 void free_fd_rapl_percpu(void) 4179 { 4180 if (!rapl_counter_info_perdomain) 4181 return; 4182 4183 const int num_domains = platform->has_per_core_rapl ? topo.num_cores : topo.num_packages; 4184 4185 for (int domain_id = 0; domain_id < num_domains; ++domain_id) { 4186 if (rapl_counter_info_perdomain[domain_id].fd_perf != -1) 4187 close(rapl_counter_info_perdomain[domain_id].fd_perf); 4188 } 4189 4190 free(rapl_counter_info_perdomain); 4191 } 4192 4193 void free_all_buffers(void) 4194 { 4195 int i; 4196 4197 CPU_FREE(cpu_present_set); 4198 cpu_present_set = NULL; 4199 cpu_present_setsize = 0; 4200 4201 CPU_FREE(cpu_effective_set); 4202 cpu_effective_set = NULL; 4203 cpu_effective_setsize = 0; 4204 4205 CPU_FREE(cpu_allowed_set); 4206 cpu_allowed_set = NULL; 4207 cpu_allowed_setsize = 0; 4208 4209 CPU_FREE(cpu_affinity_set); 4210 cpu_affinity_set = NULL; 4211 cpu_affinity_setsize = 0; 4212 4213 free(thread_even); 4214 free(core_even); 4215 free(package_even); 4216 4217 thread_even = NULL; 4218 core_even = NULL; 4219 package_even = NULL; 4220 4221 free(thread_odd); 4222 free(core_odd); 4223 free(package_odd); 4224 4225 thread_odd = NULL; 4226 core_odd = NULL; 4227 package_odd = NULL; 4228 4229 free(output_buffer); 4230 output_buffer = NULL; 4231 outp = NULL; 4232 4233 free_fd_percpu(); 4234 free_fd_instr_count_percpu(); 4235 free_fd_amperf_percpu(); 4236 free_fd_rapl_percpu(); 4237 4238 free(irq_column_2_cpu); 4239 free(irqs_per_cpu); 4240 4241 for (i = 0; i <= topo.max_cpu_num; ++i) { 4242 if (cpus[i].put_ids) 4243 CPU_FREE(cpus[i].put_ids); 4244 } 4245 free(cpus); 4246 } 4247 4248 /* 4249 * Parse a file containing a single int. 4250 * Return 0 if file can not be opened 4251 * Exit if file can be opened, but can not be parsed 4252 */ 4253 int parse_int_file(const char *fmt, ...) 4254 { 4255 va_list args; 4256 char path[PATH_MAX]; 4257 FILE *filep; 4258 int value; 4259 4260 va_start(args, fmt); 4261 vsnprintf(path, sizeof(path), fmt, args); 4262 va_end(args); 4263 filep = fopen(path, "r"); 4264 if (!filep) 4265 return 0; 4266 if (fscanf(filep, "%d", &value) != 1) 4267 err(1, "%s: failed to parse number from file", path); 4268 fclose(filep); 4269 return value; 4270 } 4271 4272 /* 4273 * cpu_is_first_core_in_package(cpu) 4274 * return 1 if given CPU is 1st core in package 4275 */ 4276 int cpu_is_first_core_in_package(int cpu) 4277 { 4278 return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu); 4279 } 4280 4281 int get_physical_package_id(int cpu) 4282 { 4283 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu); 4284 } 4285 4286 int get_die_id(int cpu) 4287 { 4288 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/die_id", cpu); 4289 } 4290 4291 int get_core_id(int cpu) 4292 { 4293 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu); 4294 } 4295 4296 void set_node_data(void) 4297 { 4298 int pkg, node, lnode, cpu, cpux; 4299 int cpu_count; 4300 4301 /* initialize logical_node_id */ 4302 for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) 4303 cpus[cpu].logical_node_id = -1; 4304 4305 cpu_count = 0; 4306 for (pkg = 0; pkg < topo.num_packages; pkg++) { 4307 lnode = 0; 4308 for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) { 4309 if (cpus[cpu].physical_package_id != pkg) 4310 continue; 4311 /* find a cpu with an unset logical_node_id */ 4312 if (cpus[cpu].logical_node_id != -1) 4313 continue; 4314 cpus[cpu].logical_node_id = lnode; 4315 node = cpus[cpu].physical_node_id; 4316 cpu_count++; 4317 /* 4318 * find all matching cpus on this pkg and set 4319 * the logical_node_id 4320 */ 4321 for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) { 4322 if ((cpus[cpux].physical_package_id == pkg) && (cpus[cpux].physical_node_id == node)) { 4323 cpus[cpux].logical_node_id = lnode; 4324 cpu_count++; 4325 } 4326 } 4327 lnode++; 4328 if (lnode > topo.nodes_per_pkg) 4329 topo.nodes_per_pkg = lnode; 4330 } 4331 if (cpu_count >= topo.max_cpu_num) 4332 break; 4333 } 4334 } 4335 4336 int get_physical_node_id(struct cpu_topology *thiscpu) 4337 { 4338 char path[80]; 4339 FILE *filep; 4340 int i; 4341 int cpu = thiscpu->logical_cpu_id; 4342 4343 for (i = 0; i <= topo.max_cpu_num; i++) { 4344 sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist", cpu, i); 4345 filep = fopen(path, "r"); 4346 if (!filep) 4347 continue; 4348 fclose(filep); 4349 return i; 4350 } 4351 return -1; 4352 } 4353 4354 static int parse_cpu_str(char *cpu_str, cpu_set_t *cpu_set, int cpu_set_size) 4355 { 4356 unsigned int start, end; 4357 char *next = cpu_str; 4358 4359 while (next && *next) { 4360 4361 if (*next == '-') /* no negative cpu numbers */ 4362 return 1; 4363 4364 start = strtoul(next, &next, 10); 4365 4366 if (start >= CPU_SUBSET_MAXCPUS) 4367 return 1; 4368 CPU_SET_S(start, cpu_set_size, cpu_set); 4369 4370 if (*next == '\0' || *next == '\n') 4371 break; 4372 4373 if (*next == ',') { 4374 next += 1; 4375 continue; 4376 } 4377 4378 if (*next == '-') { 4379 next += 1; /* start range */ 4380 } else if (*next == '.') { 4381 next += 1; 4382 if (*next == '.') 4383 next += 1; /* start range */ 4384 else 4385 return 1; 4386 } 4387 4388 end = strtoul(next, &next, 10); 4389 if (end <= start) 4390 return 1; 4391 4392 while (++start <= end) { 4393 if (start >= CPU_SUBSET_MAXCPUS) 4394 return 1; 4395 CPU_SET_S(start, cpu_set_size, cpu_set); 4396 } 4397 4398 if (*next == ',') 4399 next += 1; 4400 else if (*next != '\0' && *next != '\n') 4401 return 1; 4402 } 4403 4404 return 0; 4405 } 4406 4407 int get_thread_siblings(struct cpu_topology *thiscpu) 4408 { 4409 char path[80], character; 4410 FILE *filep; 4411 unsigned long map; 4412 int so, shift, sib_core; 4413 int cpu = thiscpu->logical_cpu_id; 4414 int offset = topo.max_cpu_num + 1; 4415 size_t size; 4416 int thread_id = 0; 4417 4418 thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1)); 4419 if (thiscpu->thread_id < 0) 4420 thiscpu->thread_id = thread_id++; 4421 if (!thiscpu->put_ids) 4422 return -1; 4423 4424 size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 4425 CPU_ZERO_S(size, thiscpu->put_ids); 4426 4427 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu); 4428 filep = fopen(path, "r"); 4429 4430 if (!filep) { 4431 warnx("%s: open failed", path); 4432 return -1; 4433 } 4434 do { 4435 offset -= BITMASK_SIZE; 4436 if (fscanf(filep, "%lx%c", &map, &character) != 2) 4437 err(1, "%s: failed to parse file", path); 4438 for (shift = 0; shift < BITMASK_SIZE; shift++) { 4439 if ((map >> shift) & 0x1) { 4440 so = shift + offset; 4441 sib_core = get_core_id(so); 4442 if (sib_core == thiscpu->physical_core_id) { 4443 CPU_SET_S(so, size, thiscpu->put_ids); 4444 if ((so != cpu) && (cpus[so].thread_id < 0)) 4445 cpus[so].thread_id = thread_id++; 4446 } 4447 } 4448 } 4449 } while (character == ','); 4450 fclose(filep); 4451 4452 return CPU_COUNT_S(size, thiscpu->put_ids); 4453 } 4454 4455 /* 4456 * run func(thread, core, package) in topology order 4457 * skip non-present cpus 4458 */ 4459 4460 int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *, 4461 struct pkg_data *, struct thread_data *, struct core_data *, 4462 struct pkg_data *), struct thread_data *thread_base, 4463 struct core_data *core_base, struct pkg_data *pkg_base, 4464 struct thread_data *thread_base2, struct core_data *core_base2, struct pkg_data *pkg_base2) 4465 { 4466 int retval, pkg_no, node_no, core_no, thread_no; 4467 4468 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { 4469 for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) { 4470 for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { 4471 for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) { 4472 struct thread_data *t, *t2; 4473 struct core_data *c, *c2; 4474 struct pkg_data *p, *p2; 4475 4476 t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no); 4477 4478 if (cpu_is_not_allowed(t->cpu_id)) 4479 continue; 4480 4481 t2 = GET_THREAD(thread_base2, thread_no, core_no, node_no, pkg_no); 4482 4483 c = GET_CORE(core_base, core_no, node_no, pkg_no); 4484 c2 = GET_CORE(core_base2, core_no, node_no, pkg_no); 4485 4486 p = GET_PKG(pkg_base, pkg_no); 4487 p2 = GET_PKG(pkg_base2, pkg_no); 4488 4489 retval = func(t, c, p, t2, c2, p2); 4490 if (retval) 4491 return retval; 4492 } 4493 } 4494 } 4495 } 4496 return 0; 4497 } 4498 4499 /* 4500 * run func(cpu) on every cpu in /proc/stat 4501 * return max_cpu number 4502 */ 4503 int for_all_proc_cpus(int (func) (int)) 4504 { 4505 FILE *fp; 4506 int cpu_num; 4507 int retval; 4508 4509 fp = fopen_or_die(proc_stat, "r"); 4510 4511 retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n"); 4512 if (retval != 0) 4513 err(1, "%s: failed to parse format", proc_stat); 4514 4515 while (1) { 4516 retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num); 4517 if (retval != 1) 4518 break; 4519 4520 retval = func(cpu_num); 4521 if (retval) { 4522 fclose(fp); 4523 return (retval); 4524 } 4525 } 4526 fclose(fp); 4527 return 0; 4528 } 4529 4530 #define PATH_EFFECTIVE_CPUS "/sys/fs/cgroup/cpuset.cpus.effective" 4531 4532 static char cpu_effective_str[1024]; 4533 4534 static int update_effective_str(bool startup) 4535 { 4536 FILE *fp; 4537 char *pos; 4538 char buf[1024]; 4539 int ret; 4540 4541 if (cpu_effective_str[0] == '\0' && !startup) 4542 return 0; 4543 4544 fp = fopen(PATH_EFFECTIVE_CPUS, "r"); 4545 if (!fp) 4546 return 0; 4547 4548 pos = fgets(buf, 1024, fp); 4549 if (!pos) 4550 err(1, "%s: file read failed\n", PATH_EFFECTIVE_CPUS); 4551 4552 fclose(fp); 4553 4554 ret = strncmp(cpu_effective_str, buf, 1024); 4555 if (!ret) 4556 return 0; 4557 4558 strncpy(cpu_effective_str, buf, 1024); 4559 return 1; 4560 } 4561 4562 static void update_effective_set(bool startup) 4563 { 4564 update_effective_str(startup); 4565 4566 if (parse_cpu_str(cpu_effective_str, cpu_effective_set, cpu_effective_setsize)) 4567 err(1, "%s: cpu str malformat %s\n", PATH_EFFECTIVE_CPUS, cpu_effective_str); 4568 } 4569 4570 void linux_perf_init(void); 4571 void rapl_perf_init(void); 4572 4573 void re_initialize(void) 4574 { 4575 free_all_buffers(); 4576 setup_all_buffers(false); 4577 linux_perf_init(); 4578 rapl_perf_init(); 4579 fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, 4580 topo.allowed_cpus); 4581 } 4582 4583 void set_max_cpu_num(void) 4584 { 4585 FILE *filep; 4586 int base_cpu; 4587 unsigned long dummy; 4588 char pathname[64]; 4589 4590 base_cpu = sched_getcpu(); 4591 if (base_cpu < 0) 4592 err(1, "cannot find calling cpu ID"); 4593 sprintf(pathname, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", base_cpu); 4594 4595 filep = fopen_or_die(pathname, "r"); 4596 topo.max_cpu_num = 0; 4597 while (fscanf(filep, "%lx,", &dummy) == 1) 4598 topo.max_cpu_num += BITMASK_SIZE; 4599 fclose(filep); 4600 topo.max_cpu_num--; /* 0 based */ 4601 } 4602 4603 /* 4604 * count_cpus() 4605 * remember the last one seen, it will be the max 4606 */ 4607 int count_cpus(int cpu) 4608 { 4609 UNUSED(cpu); 4610 4611 topo.num_cpus++; 4612 return 0; 4613 } 4614 4615 int mark_cpu_present(int cpu) 4616 { 4617 CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set); 4618 return 0; 4619 } 4620 4621 int init_thread_id(int cpu) 4622 { 4623 cpus[cpu].thread_id = -1; 4624 return 0; 4625 } 4626 4627 /* 4628 * snapshot_proc_interrupts() 4629 * 4630 * read and record summary of /proc/interrupts 4631 * 4632 * return 1 if config change requires a restart, else return 0 4633 */ 4634 int snapshot_proc_interrupts(void) 4635 { 4636 static FILE *fp; 4637 int column, retval; 4638 4639 if (fp == NULL) 4640 fp = fopen_or_die("/proc/interrupts", "r"); 4641 else 4642 rewind(fp); 4643 4644 /* read 1st line of /proc/interrupts to get cpu* name for each column */ 4645 for (column = 0; column < topo.num_cpus; ++column) { 4646 int cpu_number; 4647 4648 retval = fscanf(fp, " CPU%d", &cpu_number); 4649 if (retval != 1) 4650 break; 4651 4652 if (cpu_number > topo.max_cpu_num) { 4653 warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num); 4654 return 1; 4655 } 4656 4657 irq_column_2_cpu[column] = cpu_number; 4658 irqs_per_cpu[cpu_number] = 0; 4659 } 4660 4661 /* read /proc/interrupt count lines and sum up irqs per cpu */ 4662 while (1) { 4663 int column; 4664 char buf[64]; 4665 4666 retval = fscanf(fp, " %s:", buf); /* flush irq# "N:" */ 4667 if (retval != 1) 4668 break; 4669 4670 /* read the count per cpu */ 4671 for (column = 0; column < topo.num_cpus; ++column) { 4672 4673 int cpu_number, irq_count; 4674 4675 retval = fscanf(fp, " %d", &irq_count); 4676 if (retval != 1) 4677 break; 4678 4679 cpu_number = irq_column_2_cpu[column]; 4680 irqs_per_cpu[cpu_number] += irq_count; 4681 4682 } 4683 4684 while (getc(fp) != '\n') ; /* flush interrupt description */ 4685 4686 } 4687 return 0; 4688 } 4689 4690 /* 4691 * snapshot_graphics() 4692 * 4693 * record snapshot of specified graphics sysfs knob 4694 * 4695 * return 1 if config change requires a restart, else return 0 4696 */ 4697 int snapshot_graphics(int idx) 4698 { 4699 FILE *fp; 4700 int retval; 4701 4702 switch (idx) { 4703 case GFX_rc6: 4704 case SAM_mc6: 4705 fp = fopen_or_die(gfx_info[idx].path, "r"); 4706 retval = fscanf(fp, "%lld", &gfx_info[idx].val_ull); 4707 if (retval != 1) 4708 err(1, "rc6"); 4709 fclose(fp); 4710 return 0; 4711 case GFX_MHz: 4712 case GFX_ACTMHz: 4713 case SAM_MHz: 4714 case SAM_ACTMHz: 4715 if (gfx_info[idx].fp == NULL) { 4716 gfx_info[idx].fp = fopen_or_die(gfx_info[idx].path, "r"); 4717 } else { 4718 rewind(gfx_info[idx].fp); 4719 fflush(gfx_info[idx].fp); 4720 } 4721 retval = fscanf(gfx_info[idx].fp, "%d", &gfx_info[idx].val); 4722 if (retval != 1) 4723 err(1, "MHz"); 4724 return 0; 4725 default: 4726 return -EINVAL; 4727 } 4728 } 4729 4730 /* 4731 * snapshot_cpu_lpi() 4732 * 4733 * record snapshot of 4734 * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us 4735 */ 4736 int snapshot_cpu_lpi_us(void) 4737 { 4738 FILE *fp; 4739 int retval; 4740 4741 fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r"); 4742 4743 retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us); 4744 if (retval != 1) { 4745 fprintf(stderr, "Disabling Low Power Idle CPU output\n"); 4746 BIC_NOT_PRESENT(BIC_CPU_LPI); 4747 fclose(fp); 4748 return -1; 4749 } 4750 4751 fclose(fp); 4752 4753 return 0; 4754 } 4755 4756 /* 4757 * snapshot_sys_lpi() 4758 * 4759 * record snapshot of sys_lpi_file 4760 */ 4761 int snapshot_sys_lpi_us(void) 4762 { 4763 FILE *fp; 4764 int retval; 4765 4766 fp = fopen_or_die(sys_lpi_file, "r"); 4767 4768 retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us); 4769 if (retval != 1) { 4770 fprintf(stderr, "Disabling Low Power Idle System output\n"); 4771 BIC_NOT_PRESENT(BIC_SYS_LPI); 4772 fclose(fp); 4773 return -1; 4774 } 4775 fclose(fp); 4776 4777 return 0; 4778 } 4779 4780 /* 4781 * snapshot /proc and /sys files 4782 * 4783 * return 1 if configuration restart needed, else return 0 4784 */ 4785 int snapshot_proc_sysfs_files(void) 4786 { 4787 if (DO_BIC(BIC_IRQ)) 4788 if (snapshot_proc_interrupts()) 4789 return 1; 4790 4791 if (DO_BIC(BIC_GFX_rc6)) 4792 snapshot_graphics(GFX_rc6); 4793 4794 if (DO_BIC(BIC_GFXMHz)) 4795 snapshot_graphics(GFX_MHz); 4796 4797 if (DO_BIC(BIC_GFXACTMHz)) 4798 snapshot_graphics(GFX_ACTMHz); 4799 4800 if (DO_BIC(BIC_SAM_mc6)) 4801 snapshot_graphics(SAM_mc6); 4802 4803 if (DO_BIC(BIC_SAMMHz)) 4804 snapshot_graphics(SAM_MHz); 4805 4806 if (DO_BIC(BIC_SAMACTMHz)) 4807 snapshot_graphics(SAM_ACTMHz); 4808 4809 if (DO_BIC(BIC_CPU_LPI)) 4810 snapshot_cpu_lpi_us(); 4811 4812 if (DO_BIC(BIC_SYS_LPI)) 4813 snapshot_sys_lpi_us(); 4814 4815 return 0; 4816 } 4817 4818 int exit_requested; 4819 4820 static void signal_handler(int signal) 4821 { 4822 switch (signal) { 4823 case SIGINT: 4824 exit_requested = 1; 4825 if (debug) 4826 fprintf(stderr, " SIGINT\n"); 4827 break; 4828 case SIGUSR1: 4829 if (debug > 1) 4830 fprintf(stderr, "SIGUSR1\n"); 4831 break; 4832 } 4833 } 4834 4835 void setup_signal_handler(void) 4836 { 4837 struct sigaction sa; 4838 4839 memset(&sa, 0, sizeof(sa)); 4840 4841 sa.sa_handler = &signal_handler; 4842 4843 if (sigaction(SIGINT, &sa, NULL) < 0) 4844 err(1, "sigaction SIGINT"); 4845 if (sigaction(SIGUSR1, &sa, NULL) < 0) 4846 err(1, "sigaction SIGUSR1"); 4847 } 4848 4849 void do_sleep(void) 4850 { 4851 struct timeval tout; 4852 struct timespec rest; 4853 fd_set readfds; 4854 int retval; 4855 4856 FD_ZERO(&readfds); 4857 FD_SET(0, &readfds); 4858 4859 if (ignore_stdin) { 4860 nanosleep(&interval_ts, NULL); 4861 return; 4862 } 4863 4864 tout = interval_tv; 4865 retval = select(1, &readfds, NULL, NULL, &tout); 4866 4867 if (retval == 1) { 4868 switch (getc(stdin)) { 4869 case 'q': 4870 exit_requested = 1; 4871 break; 4872 case EOF: 4873 /* 4874 * 'stdin' is a pipe closed on the other end. There 4875 * won't be any further input. 4876 */ 4877 ignore_stdin = 1; 4878 /* Sleep the rest of the time */ 4879 rest.tv_sec = (tout.tv_sec + tout.tv_usec / 1000000); 4880 rest.tv_nsec = (tout.tv_usec % 1000000) * 1000; 4881 nanosleep(&rest, NULL); 4882 } 4883 } 4884 } 4885 4886 int get_msr_sum(int cpu, off_t offset, unsigned long long *msr) 4887 { 4888 int ret, idx; 4889 unsigned long long msr_cur, msr_last; 4890 4891 assert(!no_msr); 4892 4893 if (!per_cpu_msr_sum) 4894 return 1; 4895 4896 idx = offset_to_idx(offset); 4897 if (idx < 0) 4898 return idx; 4899 /* get_msr_sum() = sum + (get_msr() - last) */ 4900 ret = get_msr(cpu, offset, &msr_cur); 4901 if (ret) 4902 return ret; 4903 msr_last = per_cpu_msr_sum[cpu].entries[idx].last; 4904 DELTA_WRAP32(msr_cur, msr_last); 4905 *msr = msr_last + per_cpu_msr_sum[cpu].entries[idx].sum; 4906 4907 return 0; 4908 } 4909 4910 timer_t timerid; 4911 4912 /* Timer callback, update the sum of MSRs periodically. */ 4913 static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg_data *p) 4914 { 4915 int i, ret; 4916 int cpu = t->cpu_id; 4917 4918 UNUSED(c); 4919 UNUSED(p); 4920 4921 assert(!no_msr); 4922 4923 for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) { 4924 unsigned long long msr_cur, msr_last; 4925 off_t offset; 4926 4927 if (!idx_valid(i)) 4928 continue; 4929 offset = idx_to_offset(i); 4930 if (offset < 0) 4931 continue; 4932 ret = get_msr(cpu, offset, &msr_cur); 4933 if (ret) { 4934 fprintf(outf, "Can not update msr(0x%llx)\n", (unsigned long long)offset); 4935 continue; 4936 } 4937 4938 msr_last = per_cpu_msr_sum[cpu].entries[i].last; 4939 per_cpu_msr_sum[cpu].entries[i].last = msr_cur & 0xffffffff; 4940 4941 DELTA_WRAP32(msr_cur, msr_last); 4942 per_cpu_msr_sum[cpu].entries[i].sum += msr_last; 4943 } 4944 return 0; 4945 } 4946 4947 static void msr_record_handler(union sigval v) 4948 { 4949 UNUSED(v); 4950 4951 for_all_cpus(update_msr_sum, EVEN_COUNTERS); 4952 } 4953 4954 void msr_sum_record(void) 4955 { 4956 struct itimerspec its; 4957 struct sigevent sev; 4958 4959 per_cpu_msr_sum = calloc(topo.max_cpu_num + 1, sizeof(struct msr_sum_array)); 4960 if (!per_cpu_msr_sum) { 4961 fprintf(outf, "Can not allocate memory for long time MSR.\n"); 4962 return; 4963 } 4964 /* 4965 * Signal handler might be restricted, so use thread notifier instead. 4966 */ 4967 memset(&sev, 0, sizeof(struct sigevent)); 4968 sev.sigev_notify = SIGEV_THREAD; 4969 sev.sigev_notify_function = msr_record_handler; 4970 4971 sev.sigev_value.sival_ptr = &timerid; 4972 if (timer_create(CLOCK_REALTIME, &sev, &timerid) == -1) { 4973 fprintf(outf, "Can not create timer.\n"); 4974 goto release_msr; 4975 } 4976 4977 its.it_value.tv_sec = 0; 4978 its.it_value.tv_nsec = 1; 4979 /* 4980 * A wraparound time has been calculated early. 4981 * Some sources state that the peak power for a 4982 * microprocessor is usually 1.5 times the TDP rating, 4983 * use 2 * TDP for safety. 4984 */ 4985 its.it_interval.tv_sec = rapl_joule_counter_range / 2; 4986 its.it_interval.tv_nsec = 0; 4987 4988 if (timer_settime(timerid, 0, &its, NULL) == -1) { 4989 fprintf(outf, "Can not set timer.\n"); 4990 goto release_timer; 4991 } 4992 return; 4993 4994 release_timer: 4995 timer_delete(timerid); 4996 release_msr: 4997 free(per_cpu_msr_sum); 4998 } 4999 5000 /* 5001 * set_my_sched_priority(pri) 5002 * return previous priority on success 5003 * return value < -20 on failure 5004 */ 5005 int set_my_sched_priority(int priority) 5006 { 5007 int retval; 5008 int original_priority; 5009 5010 errno = 0; 5011 original_priority = getpriority(PRIO_PROCESS, 0); 5012 if (errno && (original_priority == -1)) 5013 return -21; 5014 5015 retval = setpriority(PRIO_PROCESS, 0, priority); 5016 if (retval) 5017 return -21; 5018 5019 errno = 0; 5020 retval = getpriority(PRIO_PROCESS, 0); 5021 if (retval != priority) 5022 return -21; 5023 5024 return original_priority; 5025 } 5026 5027 void turbostat_loop() 5028 { 5029 int retval; 5030 int restarted = 0; 5031 unsigned int done_iters = 0; 5032 5033 setup_signal_handler(); 5034 5035 /* 5036 * elevate own priority for interval mode 5037 * 5038 * ignore on error - we probably don't have permission to set it, but 5039 * it's not a big deal 5040 */ 5041 set_my_sched_priority(-20); 5042 5043 restart: 5044 restarted++; 5045 5046 snapshot_proc_sysfs_files(); 5047 retval = for_all_cpus(get_counters, EVEN_COUNTERS); 5048 first_counter_read = 0; 5049 if (retval < -1) { 5050 exit(retval); 5051 } else if (retval == -1) { 5052 if (restarted > 10) { 5053 exit(retval); 5054 } 5055 re_initialize(); 5056 goto restart; 5057 } 5058 restarted = 0; 5059 done_iters = 0; 5060 gettimeofday(&tv_even, (struct timezone *)NULL); 5061 5062 while (1) { 5063 if (for_all_proc_cpus(cpu_is_not_present)) { 5064 re_initialize(); 5065 goto restart; 5066 } 5067 if (update_effective_str(false)) { 5068 re_initialize(); 5069 goto restart; 5070 } 5071 do_sleep(); 5072 if (snapshot_proc_sysfs_files()) 5073 goto restart; 5074 retval = for_all_cpus(get_counters, ODD_COUNTERS); 5075 if (retval < -1) { 5076 exit(retval); 5077 } else if (retval == -1) { 5078 re_initialize(); 5079 goto restart; 5080 } 5081 gettimeofday(&tv_odd, (struct timezone *)NULL); 5082 timersub(&tv_odd, &tv_even, &tv_delta); 5083 if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) { 5084 re_initialize(); 5085 goto restart; 5086 } 5087 compute_average(EVEN_COUNTERS); 5088 format_all_counters(EVEN_COUNTERS); 5089 flush_output_stdout(); 5090 if (exit_requested) 5091 break; 5092 if (num_iterations && ++done_iters >= num_iterations) 5093 break; 5094 do_sleep(); 5095 if (snapshot_proc_sysfs_files()) 5096 goto restart; 5097 retval = for_all_cpus(get_counters, EVEN_COUNTERS); 5098 if (retval < -1) { 5099 exit(retval); 5100 } else if (retval == -1) { 5101 re_initialize(); 5102 goto restart; 5103 } 5104 gettimeofday(&tv_even, (struct timezone *)NULL); 5105 timersub(&tv_even, &tv_odd, &tv_delta); 5106 if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) { 5107 re_initialize(); 5108 goto restart; 5109 } 5110 compute_average(ODD_COUNTERS); 5111 format_all_counters(ODD_COUNTERS); 5112 flush_output_stdout(); 5113 if (exit_requested) 5114 break; 5115 if (num_iterations && ++done_iters >= num_iterations) 5116 break; 5117 } 5118 } 5119 5120 void check_dev_msr() 5121 { 5122 struct stat sb; 5123 char pathname[32]; 5124 5125 if (no_msr) 5126 return; 5127 5128 sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); 5129 if (stat(pathname, &sb)) 5130 if (system("/sbin/modprobe msr > /dev/null 2>&1")) 5131 no_msr = 1; 5132 } 5133 5134 /* 5135 * check for CAP_SYS_RAWIO 5136 * return 0 on success 5137 * return 1 on fail 5138 */ 5139 int check_for_cap_sys_rawio(void) 5140 { 5141 cap_t caps; 5142 cap_flag_value_t cap_flag_value; 5143 int ret = 0; 5144 5145 caps = cap_get_proc(); 5146 if (caps == NULL) 5147 return 1; 5148 5149 if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value)) { 5150 ret = 1; 5151 goto free_and_exit; 5152 } 5153 5154 if (cap_flag_value != CAP_SET) { 5155 ret = 1; 5156 goto free_and_exit; 5157 } 5158 5159 free_and_exit: 5160 if (cap_free(caps) == -1) 5161 err(-6, "cap_free\n"); 5162 5163 return ret; 5164 } 5165 5166 void check_msr_permission(void) 5167 { 5168 int failed = 0; 5169 char pathname[32]; 5170 5171 if (no_msr) 5172 return; 5173 5174 /* check for CAP_SYS_RAWIO */ 5175 failed += check_for_cap_sys_rawio(); 5176 5177 /* test file permissions */ 5178 sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); 5179 if (euidaccess(pathname, R_OK)) { 5180 failed++; 5181 } 5182 5183 if (failed) { 5184 warnx("Failed to access %s. Some of the counters may not be available\n" 5185 "\tRun as root to enable them or use %s to disable the access explicitly", pathname, "--no-msr"); 5186 no_msr = 1; 5187 } 5188 } 5189 5190 void probe_bclk(void) 5191 { 5192 unsigned long long msr; 5193 unsigned int base_ratio; 5194 5195 if (!platform->has_nhm_msrs || no_msr) 5196 return; 5197 5198 if (platform->bclk_freq == BCLK_100MHZ) 5199 bclk = 100.00; 5200 else if (platform->bclk_freq == BCLK_133MHZ) 5201 bclk = 133.33; 5202 else if (platform->bclk_freq == BCLK_SLV) 5203 bclk = slm_bclk(); 5204 else 5205 return; 5206 5207 get_msr(base_cpu, MSR_PLATFORM_INFO, &msr); 5208 base_ratio = (msr >> 8) & 0xFF; 5209 5210 base_hz = base_ratio * bclk * 1000000; 5211 has_base_hz = 1; 5212 5213 if (platform->enable_tsc_tweak) 5214 tsc_tweak = base_hz / tsc_hz; 5215 } 5216 5217 static void remove_underbar(char *s) 5218 { 5219 char *to = s; 5220 5221 while (*s) { 5222 if (*s != '_') 5223 *to++ = *s; 5224 s++; 5225 } 5226 5227 *to = 0; 5228 } 5229 5230 static void dump_turbo_ratio_info(void) 5231 { 5232 if (!has_turbo) 5233 return; 5234 5235 if (!platform->has_nhm_msrs || no_msr) 5236 return; 5237 5238 if (platform->trl_msrs & TRL_LIMIT2) 5239 dump_turbo_ratio_limit2(); 5240 5241 if (platform->trl_msrs & TRL_LIMIT1) 5242 dump_turbo_ratio_limit1(); 5243 5244 if (platform->trl_msrs & TRL_BASE) { 5245 dump_turbo_ratio_limits(MSR_TURBO_RATIO_LIMIT); 5246 5247 if (is_hybrid) 5248 dump_turbo_ratio_limits(MSR_SECONDARY_TURBO_RATIO_LIMIT); 5249 } 5250 5251 if (platform->trl_msrs & TRL_ATOM) 5252 dump_atom_turbo_ratio_limits(); 5253 5254 if (platform->trl_msrs & TRL_KNL) 5255 dump_knl_turbo_ratio_limits(); 5256 5257 if (platform->has_config_tdp) 5258 dump_config_tdp(); 5259 } 5260 5261 static int read_sysfs_int(char *path) 5262 { 5263 FILE *input; 5264 int retval = -1; 5265 5266 input = fopen(path, "r"); 5267 if (input == NULL) { 5268 if (debug) 5269 fprintf(outf, "NSFOD %s\n", path); 5270 return (-1); 5271 } 5272 if (fscanf(input, "%d", &retval) != 1) 5273 err(1, "%s: failed to read int from file", path); 5274 fclose(input); 5275 5276 return (retval); 5277 } 5278 5279 static void dump_sysfs_file(char *path) 5280 { 5281 FILE *input; 5282 char cpuidle_buf[64]; 5283 5284 input = fopen(path, "r"); 5285 if (input == NULL) { 5286 if (debug) 5287 fprintf(outf, "NSFOD %s\n", path); 5288 return; 5289 } 5290 if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input)) 5291 err(1, "%s: failed to read file", path); 5292 fclose(input); 5293 5294 fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf); 5295 } 5296 5297 static void probe_intel_uncore_frequency(void) 5298 { 5299 int i, j; 5300 char path[256]; 5301 5302 if (!genuine_intel) 5303 return; 5304 5305 if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK)) 5306 goto probe_cluster; 5307 5308 BIC_PRESENT(BIC_UNCORE_MHZ); 5309 5310 if (quiet) 5311 return; 5312 5313 for (i = 0; i < topo.num_packages; ++i) { 5314 for (j = 0; j < topo.num_die; ++j) { 5315 int k, l; 5316 char path_base[128]; 5317 5318 sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i, 5319 j); 5320 5321 sprintf(path, "%s/min_freq_khz", path_base); 5322 k = read_sysfs_int(path); 5323 sprintf(path, "%s/max_freq_khz", path_base); 5324 l = read_sysfs_int(path); 5325 fprintf(outf, "Uncore Frequency package%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000); 5326 5327 sprintf(path, "%s/initial_min_freq_khz", path_base); 5328 k = read_sysfs_int(path); 5329 sprintf(path, "%s/initial_max_freq_khz", path_base); 5330 l = read_sysfs_int(path); 5331 fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000); 5332 5333 sprintf(path, "%s/current_freq_khz", path_base); 5334 k = read_sysfs_int(path); 5335 fprintf(outf, " %d MHz\n", k / 1000); 5336 } 5337 } 5338 return; 5339 5340 probe_cluster: 5341 if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00/current_freq_khz", R_OK)) 5342 return; 5343 5344 if (quiet) 5345 return; 5346 5347 for (i = 0;; ++i) { 5348 int k, l; 5349 char path_base[128]; 5350 int package_id, domain_id, cluster_id; 5351 5352 sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", i); 5353 5354 if (access(path_base, R_OK)) 5355 break; 5356 5357 sprintf(path, "%s/package_id", path_base); 5358 package_id = read_sysfs_int(path); 5359 5360 sprintf(path, "%s/domain_id", path_base); 5361 domain_id = read_sysfs_int(path); 5362 5363 sprintf(path, "%s/fabric_cluster_id", path_base); 5364 cluster_id = read_sysfs_int(path); 5365 5366 sprintf(path, "%s/min_freq_khz", path_base); 5367 k = read_sysfs_int(path); 5368 sprintf(path, "%s/max_freq_khz", path_base); 5369 l = read_sysfs_int(path); 5370 fprintf(outf, "Uncore Frequency package%d domain%d cluster%d: %d - %d MHz ", package_id, domain_id, 5371 cluster_id, k / 1000, l / 1000); 5372 5373 sprintf(path, "%s/initial_min_freq_khz", path_base); 5374 k = read_sysfs_int(path); 5375 sprintf(path, "%s/initial_max_freq_khz", path_base); 5376 l = read_sysfs_int(path); 5377 fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000); 5378 5379 sprintf(path, "%s/current_freq_khz", path_base); 5380 k = read_sysfs_int(path); 5381 fprintf(outf, " %d MHz\n", k / 1000); 5382 } 5383 } 5384 5385 static void probe_graphics(void) 5386 { 5387 /* Xe graphics sysfs knobs */ 5388 if (!access("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", R_OK)) { 5389 FILE *fp; 5390 char buf[8]; 5391 bool gt0_is_gt; 5392 int idx; 5393 5394 fp = fopen("/sys/class/drm/card0/device/tile0/gt0/gtidle/name", "r"); 5395 if (!fp) 5396 goto next; 5397 5398 if (!fread(buf, sizeof(char), 7, fp)) { 5399 fclose(fp); 5400 goto next; 5401 } 5402 fclose(fp); 5403 5404 if (!strncmp(buf, "gt0-rc", strlen("gt0-rc"))) 5405 gt0_is_gt = true; 5406 else if (!strncmp(buf, "gt0-mc", strlen("gt0-mc"))) 5407 gt0_is_gt = false; 5408 else 5409 goto next; 5410 5411 idx = gt0_is_gt ? GFX_rc6 : SAM_mc6; 5412 gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms"; 5413 5414 idx = gt0_is_gt ? GFX_MHz : SAM_MHz; 5415 if (!access("/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq", R_OK)) 5416 gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq"; 5417 5418 idx = gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz; 5419 if (!access("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq", R_OK)) 5420 gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq"; 5421 5422 idx = gt0_is_gt ? SAM_mc6 : GFX_rc6; 5423 if (!access("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", R_OK)) 5424 gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms"; 5425 5426 idx = gt0_is_gt ? SAM_MHz : GFX_MHz; 5427 if (!access("/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq", R_OK)) 5428 gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq"; 5429 5430 idx = gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz; 5431 if (!access("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq", R_OK)) 5432 gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq"; 5433 5434 goto end; 5435 } 5436 5437 next: 5438 /* New i915 graphics sysfs knobs */ 5439 if (!access("/sys/class/drm/card0/gt/gt0/rc6_residency_ms", R_OK)) { 5440 gfx_info[GFX_rc6].path = "/sys/class/drm/card0/gt/gt0/rc6_residency_ms"; 5441 5442 if (!access("/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz", R_OK)) 5443 gfx_info[GFX_MHz].path = "/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz"; 5444 5445 if (!access("/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz", R_OK)) 5446 gfx_info[GFX_ACTMHz].path = "/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz"; 5447 5448 if (!access("/sys/class/drm/card0/gt/gt1/rc6_residency_ms", R_OK)) 5449 gfx_info[SAM_mc6].path = "/sys/class/drm/card0/gt/gt1/rc6_residency_ms"; 5450 5451 if (!access("/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz", R_OK)) 5452 gfx_info[SAM_MHz].path = "/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz"; 5453 5454 if (!access("/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz", R_OK)) 5455 gfx_info[SAM_ACTMHz].path = "/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz"; 5456 5457 goto end; 5458 } 5459 5460 /* Fall back to traditional i915 graphics sysfs knobs */ 5461 if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK)) 5462 gfx_info[GFX_rc6].path = "/sys/class/drm/card0/power/rc6_residency_ms"; 5463 5464 if (!access("/sys/class/drm/card0/gt_cur_freq_mhz", R_OK)) 5465 gfx_info[GFX_MHz].path = "/sys/class/drm/card0/gt_cur_freq_mhz"; 5466 else if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK)) 5467 gfx_info[GFX_MHz].path = "/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz"; 5468 5469 5470 if (!access("/sys/class/drm/card0/gt_act_freq_mhz", R_OK)) 5471 gfx_info[GFX_ACTMHz].path = "/sys/class/drm/card0/gt_act_freq_mhz"; 5472 else if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK)) 5473 gfx_info[GFX_ACTMHz].path = "/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz"; 5474 5475 end: 5476 if (gfx_info[GFX_rc6].path) 5477 BIC_PRESENT(BIC_GFX_rc6); 5478 if (gfx_info[GFX_MHz].path) 5479 BIC_PRESENT(BIC_GFXMHz); 5480 if (gfx_info[GFX_ACTMHz].path) 5481 BIC_PRESENT(BIC_GFXACTMHz); 5482 if (gfx_info[SAM_mc6].path) 5483 BIC_PRESENT(BIC_SAM_mc6); 5484 if (gfx_info[SAM_MHz].path) 5485 BIC_PRESENT(BIC_SAMMHz); 5486 if (gfx_info[SAM_ACTMHz].path) 5487 BIC_PRESENT(BIC_SAMACTMHz); 5488 } 5489 5490 static void dump_sysfs_cstate_config(void) 5491 { 5492 char path[64]; 5493 char name_buf[16]; 5494 char desc[64]; 5495 FILE *input; 5496 int state; 5497 char *sp; 5498 5499 if (access("/sys/devices/system/cpu/cpuidle", R_OK)) { 5500 fprintf(outf, "cpuidle not loaded\n"); 5501 return; 5502 } 5503 5504 dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_driver"); 5505 dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor"); 5506 dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor_ro"); 5507 5508 for (state = 0; state < 10; ++state) { 5509 5510 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); 5511 input = fopen(path, "r"); 5512 if (input == NULL) 5513 continue; 5514 if (!fgets(name_buf, sizeof(name_buf), input)) 5515 err(1, "%s: failed to read file", path); 5516 5517 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ 5518 sp = strchr(name_buf, '-'); 5519 if (!sp) 5520 sp = strchrnul(name_buf, '\n'); 5521 *sp = '\0'; 5522 fclose(input); 5523 5524 remove_underbar(name_buf); 5525 5526 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc", base_cpu, state); 5527 input = fopen(path, "r"); 5528 if (input == NULL) 5529 continue; 5530 if (!fgets(desc, sizeof(desc), input)) 5531 err(1, "%s: failed to read file", path); 5532 5533 fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc); 5534 fclose(input); 5535 } 5536 } 5537 5538 static void dump_sysfs_pstate_config(void) 5539 { 5540 char path[64]; 5541 char driver_buf[64]; 5542 char governor_buf[64]; 5543 FILE *input; 5544 int turbo; 5545 5546 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver", base_cpu); 5547 input = fopen(path, "r"); 5548 if (input == NULL) { 5549 fprintf(outf, "NSFOD %s\n", path); 5550 return; 5551 } 5552 if (!fgets(driver_buf, sizeof(driver_buf), input)) 5553 err(1, "%s: failed to read file", path); 5554 fclose(input); 5555 5556 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", base_cpu); 5557 input = fopen(path, "r"); 5558 if (input == NULL) { 5559 fprintf(outf, "NSFOD %s\n", path); 5560 return; 5561 } 5562 if (!fgets(governor_buf, sizeof(governor_buf), input)) 5563 err(1, "%s: failed to read file", path); 5564 fclose(input); 5565 5566 fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf); 5567 fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf); 5568 5569 sprintf(path, "/sys/devices/system/cpu/cpufreq/boost"); 5570 input = fopen(path, "r"); 5571 if (input != NULL) { 5572 if (fscanf(input, "%d", &turbo) != 1) 5573 err(1, "%s: failed to parse number from file", path); 5574 fprintf(outf, "cpufreq boost: %d\n", turbo); 5575 fclose(input); 5576 } 5577 5578 sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo"); 5579 input = fopen(path, "r"); 5580 if (input != NULL) { 5581 if (fscanf(input, "%d", &turbo) != 1) 5582 err(1, "%s: failed to parse number from file", path); 5583 fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo); 5584 fclose(input); 5585 } 5586 } 5587 5588 /* 5589 * print_epb() 5590 * Decode the ENERGY_PERF_BIAS MSR 5591 */ 5592 int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) 5593 { 5594 char *epb_string; 5595 int cpu, epb; 5596 5597 UNUSED(c); 5598 UNUSED(p); 5599 5600 if (!has_epb) 5601 return 0; 5602 5603 cpu = t->cpu_id; 5604 5605 /* EPB is per-package */ 5606 if (!is_cpu_first_thread_in_package(t, c, p)) 5607 return 0; 5608 5609 if (cpu_migrate(cpu)) { 5610 fprintf(outf, "print_epb: Could not migrate to CPU %d\n", cpu); 5611 return -1; 5612 } 5613 5614 epb = get_epb(cpu); 5615 if (epb < 0) 5616 return 0; 5617 5618 switch (epb) { 5619 case ENERGY_PERF_BIAS_PERFORMANCE: 5620 epb_string = "performance"; 5621 break; 5622 case ENERGY_PERF_BIAS_NORMAL: 5623 epb_string = "balanced"; 5624 break; 5625 case ENERGY_PERF_BIAS_POWERSAVE: 5626 epb_string = "powersave"; 5627 break; 5628 default: 5629 epb_string = "custom"; 5630 break; 5631 } 5632 fprintf(outf, "cpu%d: EPB: %d (%s)\n", cpu, epb, epb_string); 5633 5634 return 0; 5635 } 5636 5637 /* 5638 * print_hwp() 5639 * Decode the MSR_HWP_CAPABILITIES 5640 */ 5641 int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) 5642 { 5643 unsigned long long msr; 5644 int cpu; 5645 5646 UNUSED(c); 5647 UNUSED(p); 5648 5649 if (no_msr) 5650 return 0; 5651 5652 if (!has_hwp) 5653 return 0; 5654 5655 cpu = t->cpu_id; 5656 5657 /* MSR_HWP_CAPABILITIES is per-package */ 5658 if (!is_cpu_first_thread_in_package(t, c, p)) 5659 return 0; 5660 5661 if (cpu_migrate(cpu)) { 5662 fprintf(outf, "print_hwp: Could not migrate to CPU %d\n", cpu); 5663 return -1; 5664 } 5665 5666 if (get_msr(cpu, MSR_PM_ENABLE, &msr)) 5667 return 0; 5668 5669 fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n", cpu, msr, (msr & (1 << 0)) ? "" : "No-"); 5670 5671 /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */ 5672 if ((msr & (1 << 0)) == 0) 5673 return 0; 5674 5675 if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr)) 5676 return 0; 5677 5678 fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx " 5679 "(high %d guar %d eff %d low %d)\n", 5680 cpu, msr, 5681 (unsigned int)HWP_HIGHEST_PERF(msr), 5682 (unsigned int)HWP_GUARANTEED_PERF(msr), 5683 (unsigned int)HWP_MOSTEFFICIENT_PERF(msr), (unsigned int)HWP_LOWEST_PERF(msr)); 5684 5685 if (get_msr(cpu, MSR_HWP_REQUEST, &msr)) 5686 return 0; 5687 5688 fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx " 5689 "(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n", 5690 cpu, msr, 5691 (unsigned int)(((msr) >> 0) & 0xff), 5692 (unsigned int)(((msr) >> 8) & 0xff), 5693 (unsigned int)(((msr) >> 16) & 0xff), 5694 (unsigned int)(((msr) >> 24) & 0xff), 5695 (unsigned int)(((msr) >> 32) & 0xff3), (unsigned int)(((msr) >> 42) & 0x1)); 5696 5697 if (has_hwp_pkg) { 5698 if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr)) 5699 return 0; 5700 5701 fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx " 5702 "(min %d max %d des %d epp 0x%x window 0x%x)\n", 5703 cpu, msr, 5704 (unsigned int)(((msr) >> 0) & 0xff), 5705 (unsigned int)(((msr) >> 8) & 0xff), 5706 (unsigned int)(((msr) >> 16) & 0xff), 5707 (unsigned int)(((msr) >> 24) & 0xff), (unsigned int)(((msr) >> 32) & 0xff3)); 5708 } 5709 if (has_hwp_notify) { 5710 if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr)) 5711 return 0; 5712 5713 fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx " 5714 "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n", 5715 cpu, msr, ((msr) & 0x1) ? "EN" : "Dis", ((msr) & 0x2) ? "EN" : "Dis"); 5716 } 5717 if (get_msr(cpu, MSR_HWP_STATUS, &msr)) 5718 return 0; 5719 5720 fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx " 5721 "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n", 5722 cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x4) ? "" : "No-"); 5723 5724 return 0; 5725 } 5726 5727 /* 5728 * print_perf_limit() 5729 */ 5730 int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p) 5731 { 5732 unsigned long long msr; 5733 int cpu; 5734 5735 UNUSED(c); 5736 UNUSED(p); 5737 5738 if (no_msr) 5739 return 0; 5740 5741 cpu = t->cpu_id; 5742 5743 /* per-package */ 5744 if (!is_cpu_first_thread_in_package(t, c, p)) 5745 return 0; 5746 5747 if (cpu_migrate(cpu)) { 5748 fprintf(outf, "print_perf_limit: Could not migrate to CPU %d\n", cpu); 5749 return -1; 5750 } 5751 5752 if (platform->plr_msrs & PLR_CORE) { 5753 get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr); 5754 fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); 5755 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)", 5756 (msr & 1 << 15) ? "bit15, " : "", 5757 (msr & 1 << 14) ? "bit14, " : "", 5758 (msr & 1 << 13) ? "Transitions, " : "", 5759 (msr & 1 << 12) ? "MultiCoreTurbo, " : "", 5760 (msr & 1 << 11) ? "PkgPwrL2, " : "", 5761 (msr & 1 << 10) ? "PkgPwrL1, " : "", 5762 (msr & 1 << 9) ? "CorePwr, " : "", 5763 (msr & 1 << 8) ? "Amps, " : "", 5764 (msr & 1 << 6) ? "VR-Therm, " : "", 5765 (msr & 1 << 5) ? "Auto-HWP, " : "", 5766 (msr & 1 << 4) ? "Graphics, " : "", 5767 (msr & 1 << 2) ? "bit2, " : "", 5768 (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 0) ? "PROCHOT, " : ""); 5769 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", 5770 (msr & 1 << 31) ? "bit31, " : "", 5771 (msr & 1 << 30) ? "bit30, " : "", 5772 (msr & 1 << 29) ? "Transitions, " : "", 5773 (msr & 1 << 28) ? "MultiCoreTurbo, " : "", 5774 (msr & 1 << 27) ? "PkgPwrL2, " : "", 5775 (msr & 1 << 26) ? "PkgPwrL1, " : "", 5776 (msr & 1 << 25) ? "CorePwr, " : "", 5777 (msr & 1 << 24) ? "Amps, " : "", 5778 (msr & 1 << 22) ? "VR-Therm, " : "", 5779 (msr & 1 << 21) ? "Auto-HWP, " : "", 5780 (msr & 1 << 20) ? "Graphics, " : "", 5781 (msr & 1 << 18) ? "bit18, " : "", 5782 (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : ""); 5783 5784 } 5785 if (platform->plr_msrs & PLR_GFX) { 5786 get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr); 5787 fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); 5788 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)", 5789 (msr & 1 << 0) ? "PROCHOT, " : "", 5790 (msr & 1 << 1) ? "ThermStatus, " : "", 5791 (msr & 1 << 4) ? "Graphics, " : "", 5792 (msr & 1 << 6) ? "VR-Therm, " : "", 5793 (msr & 1 << 8) ? "Amps, " : "", 5794 (msr & 1 << 9) ? "GFXPwr, " : "", 5795 (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); 5796 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n", 5797 (msr & 1 << 16) ? "PROCHOT, " : "", 5798 (msr & 1 << 17) ? "ThermStatus, " : "", 5799 (msr & 1 << 20) ? "Graphics, " : "", 5800 (msr & 1 << 22) ? "VR-Therm, " : "", 5801 (msr & 1 << 24) ? "Amps, " : "", 5802 (msr & 1 << 25) ? "GFXPwr, " : "", 5803 (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); 5804 } 5805 if (platform->plr_msrs & PLR_RING) { 5806 get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr); 5807 fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); 5808 fprintf(outf, " (Active: %s%s%s%s%s%s)", 5809 (msr & 1 << 0) ? "PROCHOT, " : "", 5810 (msr & 1 << 1) ? "ThermStatus, " : "", 5811 (msr & 1 << 6) ? "VR-Therm, " : "", 5812 (msr & 1 << 8) ? "Amps, " : "", 5813 (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); 5814 fprintf(outf, " (Logged: %s%s%s%s%s%s)\n", 5815 (msr & 1 << 16) ? "PROCHOT, " : "", 5816 (msr & 1 << 17) ? "ThermStatus, " : "", 5817 (msr & 1 << 22) ? "VR-Therm, " : "", 5818 (msr & 1 << 24) ? "Amps, " : "", 5819 (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); 5820 } 5821 return 0; 5822 } 5823 5824 #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ 5825 #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ 5826 5827 double get_quirk_tdp(void) 5828 { 5829 if (platform->rapl_quirk_tdp) 5830 return platform->rapl_quirk_tdp; 5831 5832 return 135.0; 5833 } 5834 5835 double get_tdp_intel(void) 5836 { 5837 unsigned long long msr; 5838 5839 if (platform->rapl_msrs & RAPL_PKG_POWER_INFO) 5840 if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr)) 5841 return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; 5842 return get_quirk_tdp(); 5843 } 5844 5845 double get_tdp_amd(void) 5846 { 5847 return get_quirk_tdp(); 5848 } 5849 5850 void rapl_probe_intel(void) 5851 { 5852 unsigned long long msr; 5853 unsigned int time_unit; 5854 double tdp; 5855 const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt | BIC_RAMWatt | BIC_GFXWatt; 5856 const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J | BIC_RAM_J | BIC_GFX_J; 5857 5858 if (rapl_joules) 5859 bic_enabled &= ~bic_watt_bits; 5860 else 5861 bic_enabled &= ~bic_joules_bits; 5862 5863 if (!(platform->rapl_msrs & RAPL_PKG_PERF_STATUS)) 5864 bic_enabled &= ~BIC_PKG__; 5865 if (!(platform->rapl_msrs & RAPL_DRAM_PERF_STATUS)) 5866 bic_enabled &= ~BIC_RAM__; 5867 5868 /* units on package 0, verify later other packages match */ 5869 if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr)) 5870 return; 5871 5872 rapl_power_units = 1.0 / (1 << (msr & 0xF)); 5873 if (platform->has_rapl_divisor) 5874 rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000; 5875 else 5876 rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); 5877 5878 if (platform->has_fixed_rapl_unit) 5879 rapl_dram_energy_units = (15.3 / 1000000); 5880 else 5881 rapl_dram_energy_units = rapl_energy_units; 5882 5883 time_unit = msr >> 16 & 0xF; 5884 if (time_unit == 0) 5885 time_unit = 0xA; 5886 5887 rapl_time_units = 1.0 / (1 << (time_unit)); 5888 5889 tdp = get_tdp_intel(); 5890 5891 rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; 5892 if (!quiet) 5893 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); 5894 } 5895 5896 void rapl_probe_amd(void) 5897 { 5898 unsigned long long msr; 5899 double tdp; 5900 const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt; 5901 const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J; 5902 5903 if (rapl_joules) 5904 bic_enabled &= ~bic_watt_bits; 5905 else 5906 bic_enabled &= ~bic_joules_bits; 5907 5908 if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr)) 5909 return; 5910 5911 rapl_time_units = ldexp(1.0, -(msr >> 16 & 0xf)); 5912 rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f)); 5913 rapl_power_units = ldexp(1.0, -(msr & 0xf)); 5914 5915 tdp = get_tdp_amd(); 5916 5917 rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; 5918 if (!quiet) 5919 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); 5920 } 5921 5922 void print_power_limit_msr(int cpu, unsigned long long msr, char *label) 5923 { 5924 fprintf(outf, "cpu%d: %s: %sabled (%0.3f Watts, %f sec, clamp %sabled)\n", 5925 cpu, label, 5926 ((msr >> 15) & 1) ? "EN" : "DIS", 5927 ((msr >> 0) & 0x7FFF) * rapl_power_units, 5928 (1.0 + (((msr >> 22) & 0x3) / 4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units, 5929 (((msr >> 16) & 1) ? "EN" : "DIS")); 5930 5931 return; 5932 } 5933 5934 int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) 5935 { 5936 unsigned long long msr; 5937 const char *msr_name; 5938 int cpu; 5939 5940 UNUSED(c); 5941 UNUSED(p); 5942 5943 if (!platform->rapl_msrs) 5944 return 0; 5945 5946 /* RAPL counters are per package, so print only for 1st thread/package */ 5947 if (!is_cpu_first_thread_in_package(t, c, p)) 5948 return 0; 5949 5950 cpu = t->cpu_id; 5951 if (cpu_migrate(cpu)) { 5952 fprintf(outf, "print_rapl: Could not migrate to CPU %d\n", cpu); 5953 return -1; 5954 } 5955 5956 if (platform->rapl_msrs & RAPL_AMD_F17H) { 5957 msr_name = "MSR_RAPL_PWR_UNIT"; 5958 if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr)) 5959 return -1; 5960 } else { 5961 msr_name = "MSR_RAPL_POWER_UNIT"; 5962 if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr)) 5963 return -1; 5964 } 5965 5966 fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr, 5967 rapl_power_units, rapl_energy_units, rapl_time_units); 5968 5969 if (platform->rapl_msrs & RAPL_PKG_POWER_INFO) { 5970 5971 if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr)) 5972 return -5; 5973 5974 fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", 5975 cpu, msr, 5976 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, 5977 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, 5978 ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, 5979 ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); 5980 5981 } 5982 if (platform->rapl_msrs & RAPL_PKG) { 5983 5984 if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr)) 5985 return -9; 5986 5987 fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", 5988 cpu, msr, (msr >> 63) & 1 ? "" : "UN"); 5989 5990 print_power_limit_msr(cpu, msr, "PKG Limit #1"); 5991 fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%0.3f Watts, %f* sec, clamp %sabled)\n", 5992 cpu, 5993 ((msr >> 47) & 1) ? "EN" : "DIS", 5994 ((msr >> 32) & 0x7FFF) * rapl_power_units, 5995 (1.0 + (((msr >> 54) & 0x3) / 4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units, 5996 ((msr >> 48) & 1) ? "EN" : "DIS"); 5997 5998 if (get_msr(cpu, MSR_VR_CURRENT_CONFIG, &msr)) 5999 return -9; 6000 6001 fprintf(outf, "cpu%d: MSR_VR_CURRENT_CONFIG: 0x%08llx\n", cpu, msr); 6002 fprintf(outf, "cpu%d: PKG Limit #4: %f Watts (%slocked)\n", 6003 cpu, ((msr >> 0) & 0x1FFF) * rapl_power_units, (msr >> 31) & 1 ? "" : "UN"); 6004 } 6005 6006 if (platform->rapl_msrs & RAPL_DRAM_POWER_INFO) { 6007 if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) 6008 return -6; 6009 6010 fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", 6011 cpu, msr, 6012 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, 6013 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, 6014 ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, 6015 ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); 6016 } 6017 if (platform->rapl_msrs & RAPL_DRAM) { 6018 if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) 6019 return -9; 6020 fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", 6021 cpu, msr, (msr >> 31) & 1 ? "" : "UN"); 6022 6023 print_power_limit_msr(cpu, msr, "DRAM Limit"); 6024 } 6025 if (platform->rapl_msrs & RAPL_CORE_POLICY) { 6026 if (get_msr(cpu, MSR_PP0_POLICY, &msr)) 6027 return -7; 6028 6029 fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); 6030 } 6031 if (platform->rapl_msrs & RAPL_CORE_POWER_LIMIT) { 6032 if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) 6033 return -9; 6034 fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", 6035 cpu, msr, (msr >> 31) & 1 ? "" : "UN"); 6036 print_power_limit_msr(cpu, msr, "Cores Limit"); 6037 } 6038 if (platform->rapl_msrs & RAPL_GFX) { 6039 if (get_msr(cpu, MSR_PP1_POLICY, &msr)) 6040 return -8; 6041 6042 fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF); 6043 6044 if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr)) 6045 return -9; 6046 fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", 6047 cpu, msr, (msr >> 31) & 1 ? "" : "UN"); 6048 print_power_limit_msr(cpu, msr, "GFX Limit"); 6049 } 6050 return 0; 6051 } 6052 6053 /* 6054 * probe_rapl() 6055 * 6056 * sets rapl_power_units, rapl_energy_units, rapl_time_units 6057 */ 6058 void probe_rapl(void) 6059 { 6060 if (!platform->rapl_msrs || no_msr) 6061 return; 6062 6063 if (genuine_intel) 6064 rapl_probe_intel(); 6065 if (authentic_amd || hygon_genuine) 6066 rapl_probe_amd(); 6067 6068 if (quiet) 6069 return; 6070 6071 for_all_cpus(print_rapl, ODD_COUNTERS); 6072 } 6073 6074 /* 6075 * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where 6076 * the Thermal Control Circuit (TCC) activates. 6077 * This is usually equal to tjMax. 6078 * 6079 * Older processors do not have this MSR, so there we guess, 6080 * but also allow cmdline over-ride with -T. 6081 * 6082 * Several MSR temperature values are in units of degrees-C 6083 * below this value, including the Digital Thermal Sensor (DTS), 6084 * Package Thermal Management Sensor (PTM), and thermal event thresholds. 6085 */ 6086 int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p) 6087 { 6088 unsigned long long msr; 6089 unsigned int tcc_default, tcc_offset; 6090 int cpu; 6091 6092 UNUSED(c); 6093 UNUSED(p); 6094 6095 /* tj_max is used only for dts or ptm */ 6096 if (!(do_dts || do_ptm)) 6097 return 0; 6098 6099 /* this is a per-package concept */ 6100 if (!is_cpu_first_thread_in_package(t, c, p)) 6101 return 0; 6102 6103 cpu = t->cpu_id; 6104 if (cpu_migrate(cpu)) { 6105 fprintf(outf, "Could not migrate to CPU %d\n", cpu); 6106 return -1; 6107 } 6108 6109 if (tj_max_override != 0) { 6110 tj_max = tj_max_override; 6111 fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n", cpu, tj_max); 6112 return 0; 6113 } 6114 6115 /* Temperature Target MSR is Nehalem and newer only */ 6116 if (!platform->has_nhm_msrs || no_msr) 6117 goto guess; 6118 6119 if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr)) 6120 goto guess; 6121 6122 tcc_default = (msr >> 16) & 0xFF; 6123 6124 if (!quiet) { 6125 int bits = platform->tcc_offset_bits; 6126 unsigned long long enabled = 0; 6127 6128 if (bits && !get_msr(base_cpu, MSR_PLATFORM_INFO, &enabled)) 6129 enabled = (enabled >> 30) & 1; 6130 6131 if (bits && enabled) { 6132 tcc_offset = (msr >> 24) & GENMASK(bits - 1, 0); 6133 fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n", 6134 cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset); 6135 } else { 6136 fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, tcc_default); 6137 } 6138 } 6139 6140 if (!tcc_default) 6141 goto guess; 6142 6143 tj_max = tcc_default; 6144 6145 return 0; 6146 6147 guess: 6148 tj_max = TJMAX_DEFAULT; 6149 fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", cpu, tj_max); 6150 6151 return 0; 6152 } 6153 6154 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) 6155 { 6156 unsigned long long msr; 6157 unsigned int dts, dts2; 6158 int cpu; 6159 6160 UNUSED(c); 6161 UNUSED(p); 6162 6163 if (no_msr) 6164 return 0; 6165 6166 if (!(do_dts || do_ptm)) 6167 return 0; 6168 6169 cpu = t->cpu_id; 6170 6171 /* DTS is per-core, no need to print for each thread */ 6172 if (!is_cpu_first_thread_in_core(t, c, p)) 6173 return 0; 6174 6175 if (cpu_migrate(cpu)) { 6176 fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu); 6177 return -1; 6178 } 6179 6180 if (do_ptm && is_cpu_first_core_in_package(t, c, p)) { 6181 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) 6182 return 0; 6183 6184 dts = (msr >> 16) & 0x7F; 6185 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts); 6186 6187 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr)) 6188 return 0; 6189 6190 dts = (msr >> 16) & 0x7F; 6191 dts2 = (msr >> 8) & 0x7F; 6192 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", 6193 cpu, msr, tj_max - dts, tj_max - dts2); 6194 } 6195 6196 if (do_dts && debug) { 6197 unsigned int resolution; 6198 6199 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) 6200 return 0; 6201 6202 dts = (msr >> 16) & 0x7F; 6203 resolution = (msr >> 27) & 0xF; 6204 fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", 6205 cpu, msr, tj_max - dts, resolution); 6206 6207 if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr)) 6208 return 0; 6209 6210 dts = (msr >> 16) & 0x7F; 6211 dts2 = (msr >> 8) & 0x7F; 6212 fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", 6213 cpu, msr, tj_max - dts, tj_max - dts2); 6214 } 6215 6216 return 0; 6217 } 6218 6219 void probe_thermal(void) 6220 { 6221 if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK)) 6222 BIC_PRESENT(BIC_CORE_THROT_CNT); 6223 else 6224 BIC_NOT_PRESENT(BIC_CORE_THROT_CNT); 6225 6226 for_all_cpus(set_temperature_target, ODD_COUNTERS); 6227 6228 if (quiet) 6229 return; 6230 6231 for_all_cpus(print_thermal, ODD_COUNTERS); 6232 } 6233 6234 int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p) 6235 { 6236 unsigned int eax, ebx, ecx, edx; 6237 6238 UNUSED(c); 6239 UNUSED(p); 6240 6241 if (!genuine_intel) 6242 return 0; 6243 6244 if (cpu_migrate(t->cpu_id)) { 6245 fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id); 6246 return -1; 6247 } 6248 6249 if (max_level < 0x1a) 6250 return 0; 6251 6252 __cpuid(0x1a, eax, ebx, ecx, edx); 6253 eax = (eax >> 24) & 0xFF; 6254 if (eax == 0x20) 6255 t->is_atom = true; 6256 return 0; 6257 } 6258 6259 void decode_feature_control_msr(void) 6260 { 6261 unsigned long long msr; 6262 6263 if (no_msr) 6264 return; 6265 6266 if (!get_msr(base_cpu, MSR_IA32_FEAT_CTL, &msr)) 6267 fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n", 6268 base_cpu, msr, msr & FEAT_CTL_LOCKED ? "" : "UN-", msr & (1 << 18) ? "SGX" : ""); 6269 } 6270 6271 void decode_misc_enable_msr(void) 6272 { 6273 unsigned long long msr; 6274 6275 if (no_msr) 6276 return; 6277 6278 if (!genuine_intel) 6279 return; 6280 6281 if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr)) 6282 fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n", 6283 base_cpu, msr, 6284 msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-", 6285 msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-", 6286 msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-", 6287 msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "", 6288 msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : ""); 6289 } 6290 6291 void decode_misc_feature_control(void) 6292 { 6293 unsigned long long msr; 6294 6295 if (no_msr) 6296 return; 6297 6298 if (!platform->has_msr_misc_feature_control) 6299 return; 6300 6301 if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr)) 6302 fprintf(outf, 6303 "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n", 6304 base_cpu, msr, msr & (0 << 0) ? "No-" : "", msr & (1 << 0) ? "No-" : "", 6305 msr & (2 << 0) ? "No-" : "", msr & (3 << 0) ? "No-" : ""); 6306 } 6307 6308 /* 6309 * Decode MSR_MISC_PWR_MGMT 6310 * 6311 * Decode the bits according to the Nehalem documentation 6312 * bit[0] seems to continue to have same meaning going forward 6313 * bit[1] less so... 6314 */ 6315 void decode_misc_pwr_mgmt_msr(void) 6316 { 6317 unsigned long long msr; 6318 6319 if (no_msr) 6320 return; 6321 6322 if (!platform->has_msr_misc_pwr_mgmt) 6323 return; 6324 6325 if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr)) 6326 fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n", 6327 base_cpu, msr, 6328 msr & (1 << 0) ? "DIS" : "EN", msr & (1 << 1) ? "EN" : "DIS", msr & (1 << 8) ? "EN" : "DIS"); 6329 } 6330 6331 /* 6332 * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG 6333 * 6334 * This MSRs are present on Silvermont processors, 6335 * Intel Atom processor E3000 series (Baytrail), and friends. 6336 */ 6337 void decode_c6_demotion_policy_msr(void) 6338 { 6339 unsigned long long msr; 6340 6341 if (no_msr) 6342 return; 6343 6344 if (!platform->has_msr_c6_demotion_policy_config) 6345 return; 6346 6347 if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr)) 6348 fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n", 6349 base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS"); 6350 6351 if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr)) 6352 fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n", 6353 base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS"); 6354 } 6355 6356 void print_dev_latency(void) 6357 { 6358 char *path = "/dev/cpu_dma_latency"; 6359 int fd; 6360 int value; 6361 int retval; 6362 6363 fd = open(path, O_RDONLY); 6364 if (fd < 0) { 6365 if (debug) 6366 warnx("Read %s failed", path); 6367 return; 6368 } 6369 6370 retval = read(fd, (void *)&value, sizeof(int)); 6371 if (retval != sizeof(int)) { 6372 warn("read failed %s", path); 6373 close(fd); 6374 return; 6375 } 6376 fprintf(outf, "/dev/cpu_dma_latency: %d usec (%s)\n", value, value == 2000000000 ? "default" : "constrained"); 6377 6378 close(fd); 6379 } 6380 6381 static int has_instr_count_access(void) 6382 { 6383 int fd; 6384 int has_access; 6385 6386 if (no_perf) 6387 return 0; 6388 6389 fd = open_perf_counter(base_cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0); 6390 has_access = fd != -1; 6391 6392 if (fd != -1) 6393 close(fd); 6394 6395 if (!has_access) 6396 warnx("Failed to access %s. Some of the counters may not be available\n" 6397 "\tRun as root to enable them or use %s to disable the access explicitly", 6398 "instructions retired perf counter", "--no-perf"); 6399 6400 return has_access; 6401 } 6402 6403 bool is_aperf_access_required(void) 6404 { 6405 return BIC_IS_ENABLED(BIC_Avg_MHz) 6406 || BIC_IS_ENABLED(BIC_Busy) 6407 || BIC_IS_ENABLED(BIC_Bzy_MHz) 6408 || BIC_IS_ENABLED(BIC_IPC); 6409 } 6410 6411 int add_rapl_perf_counter_(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai, 6412 double *scale_, enum rapl_unit *unit_) 6413 { 6414 if (no_perf) 6415 return -1; 6416 6417 const double scale = read_perf_rapl_scale(cai->perf_subsys, cai->perf_name); 6418 6419 if (scale == 0.0) 6420 return -1; 6421 6422 const enum rapl_unit unit = read_perf_rapl_unit(cai->perf_subsys, cai->perf_name); 6423 6424 if (unit == RAPL_UNIT_INVALID) 6425 return -1; 6426 6427 const unsigned int rapl_type = read_perf_type(cai->perf_subsys); 6428 const unsigned int rapl_energy_pkg_config = read_rapl_config(cai->perf_subsys, cai->perf_name); 6429 6430 const int fd_counter = 6431 open_perf_counter(cpu, rapl_type, rapl_energy_pkg_config, rci->fd_perf, PERF_FORMAT_GROUP); 6432 if (fd_counter == -1) 6433 return -1; 6434 6435 /* If it's the first counter opened, make it a group descriptor */ 6436 if (rci->fd_perf == -1) 6437 rci->fd_perf = fd_counter; 6438 6439 *scale_ = scale; 6440 *unit_ = unit; 6441 return fd_counter; 6442 } 6443 6444 int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai, 6445 double *scale, enum rapl_unit *unit) 6446 { 6447 int ret = add_rapl_perf_counter_(cpu, rci, cai, scale, unit); 6448 6449 if (debug) 6450 fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu); 6451 6452 return ret; 6453 } 6454 6455 /* 6456 * Linux-perf manages the HW instructions-retired counter 6457 * by enabling when requested, and hiding rollover 6458 */ 6459 void linux_perf_init(void) 6460 { 6461 if (access("/proc/sys/kernel/perf_event_paranoid", F_OK)) 6462 return; 6463 6464 if (BIC_IS_ENABLED(BIC_IPC) && has_aperf) { 6465 fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int)); 6466 if (fd_instr_count_percpu == NULL) 6467 err(-1, "calloc fd_instr_count_percpu"); 6468 } 6469 6470 const bool aperf_required = is_aperf_access_required(); 6471 6472 if (aperf_required && has_aperf && amperf_source == AMPERF_SOURCE_PERF) { 6473 fd_amperf_percpu = calloc(topo.max_cpu_num + 1, sizeof(*fd_amperf_percpu)); 6474 if (fd_amperf_percpu == NULL) 6475 err(-1, "calloc fd_amperf_percpu"); 6476 } 6477 } 6478 6479 void rapl_perf_init(void) 6480 { 6481 const int num_domains = platform->has_per_core_rapl ? topo.num_cores : topo.num_packages; 6482 bool *domain_visited = calloc(num_domains, sizeof(bool)); 6483 6484 rapl_counter_info_perdomain = calloc(num_domains, sizeof(*rapl_counter_info_perdomain)); 6485 if (rapl_counter_info_perdomain == NULL) 6486 err(-1, "calloc rapl_counter_info_percpu"); 6487 6488 /* 6489 * Initialize rapl_counter_info_percpu 6490 */ 6491 for (int domain_id = 0; domain_id < num_domains; ++domain_id) { 6492 struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[domain_id]; 6493 6494 rci->fd_perf = -1; 6495 for (size_t i = 0; i < NUM_RAPL_COUNTERS; ++i) { 6496 rci->data[i] = 0; 6497 rci->source[i] = RAPL_SOURCE_NONE; 6498 } 6499 } 6500 6501 /* 6502 * Open/probe the counters 6503 * If can't get it via perf, fallback to MSR 6504 */ 6505 for (size_t i = 0; i < ARRAY_SIZE(rapl_counter_arch_infos); ++i) { 6506 6507 const struct rapl_counter_arch_info *const cai = &rapl_counter_arch_infos[i]; 6508 bool has_counter = 0; 6509 double scale; 6510 enum rapl_unit unit; 6511 int next_domain; 6512 6513 memset(domain_visited, 0, num_domains * sizeof(*domain_visited)); 6514 6515 for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) { 6516 6517 if (cpu_is_not_allowed(cpu)) 6518 continue; 6519 6520 /* Skip already seen and handled RAPL domains */ 6521 next_domain = 6522 platform->has_per_core_rapl ? cpus[cpu].physical_core_id : cpus[cpu].physical_package_id; 6523 6524 if (domain_visited[next_domain]) 6525 continue; 6526 6527 domain_visited[next_domain] = 1; 6528 6529 struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[next_domain]; 6530 6531 /* Check if the counter is enabled and accessible */ 6532 if (BIC_IS_ENABLED(cai->bic) && (platform->rapl_msrs & cai->feature_mask)) { 6533 6534 /* Use perf API for this counter */ 6535 if (!no_perf && cai->perf_name 6536 && add_rapl_perf_counter(cpu, rci, cai, &scale, &unit) != -1) { 6537 rci->source[cai->rci_index] = RAPL_SOURCE_PERF; 6538 rci->scale[cai->rci_index] = scale * cai->compat_scale; 6539 rci->unit[cai->rci_index] = unit; 6540 rci->flags[cai->rci_index] = cai->flags; 6541 6542 /* Use MSR for this counter */ 6543 } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) { 6544 rci->source[cai->rci_index] = RAPL_SOURCE_MSR; 6545 rci->msr[cai->rci_index] = cai->msr; 6546 rci->msr_mask[cai->rci_index] = cai->msr_mask; 6547 rci->msr_shift[cai->rci_index] = cai->msr_shift; 6548 rci->unit[cai->rci_index] = RAPL_UNIT_JOULES; 6549 rci->scale[cai->rci_index] = *cai->platform_rapl_msr_scale * cai->compat_scale; 6550 rci->flags[cai->rci_index] = cai->flags; 6551 } 6552 } 6553 6554 if (rci->source[cai->rci_index] != RAPL_SOURCE_NONE) 6555 has_counter = 1; 6556 } 6557 6558 /* If any CPU has access to the counter, make it present */ 6559 if (has_counter) 6560 BIC_PRESENT(cai->bic); 6561 } 6562 6563 free(domain_visited); 6564 } 6565 6566 static int has_amperf_access_via_msr(void) 6567 { 6568 if (no_msr) 6569 return 0; 6570 6571 if (probe_msr(base_cpu, MSR_IA32_APERF)) 6572 return 0; 6573 6574 if (probe_msr(base_cpu, MSR_IA32_MPERF)) 6575 return 0; 6576 6577 return 1; 6578 } 6579 6580 static int has_amperf_access_via_perf(void) 6581 { 6582 struct amperf_group_fd fds; 6583 6584 /* 6585 * Cache the last result, so we don't warn the user multiple times 6586 * 6587 * Negative means cached, no access 6588 * Zero means not cached 6589 * Positive means cached, has access 6590 */ 6591 static int has_access_cached; 6592 6593 if (no_perf) 6594 return 0; 6595 6596 if (has_access_cached != 0) 6597 return has_access_cached > 0; 6598 6599 fds = open_amperf_fd(base_cpu); 6600 has_access_cached = (fds.aperf != -1) && (fds.mperf != -1); 6601 6602 if (fds.aperf == -1) 6603 warnx("Failed to access %s. Some of the counters may not be available\n" 6604 "\tRun as root to enable them or use %s to disable the access explicitly", 6605 "APERF perf counter", "--no-perf"); 6606 else 6607 close(fds.aperf); 6608 6609 if (fds.mperf == -1) 6610 warnx("Failed to access %s. Some of the counters may not be available\n" 6611 "\tRun as root to enable them or use %s to disable the access explicitly", 6612 "MPERF perf counter", "--no-perf"); 6613 else 6614 close(fds.mperf); 6615 6616 if (has_access_cached == 0) 6617 has_access_cached = -1; 6618 6619 return has_access_cached > 0; 6620 } 6621 6622 /* Check if we can access APERF and MPERF */ 6623 static int has_amperf_access(void) 6624 { 6625 if (!is_aperf_access_required()) 6626 return 0; 6627 6628 if (!no_msr && has_amperf_access_via_msr()) 6629 return 1; 6630 6631 if (!no_perf && has_amperf_access_via_perf()) 6632 return 1; 6633 6634 return 0; 6635 } 6636 6637 void probe_cstates(void) 6638 { 6639 probe_cst_limit(); 6640 6641 if (platform->supported_cstates & CC1) 6642 BIC_PRESENT(BIC_CPU_c1); 6643 6644 if (platform->supported_cstates & CC3) 6645 BIC_PRESENT(BIC_CPU_c3); 6646 6647 if (platform->supported_cstates & CC6) 6648 BIC_PRESENT(BIC_CPU_c6); 6649 6650 if (platform->supported_cstates & CC7) 6651 BIC_PRESENT(BIC_CPU_c7); 6652 6653 if (platform->supported_cstates & PC2 && (pkg_cstate_limit >= PCL__2)) 6654 BIC_PRESENT(BIC_Pkgpc2); 6655 6656 if (platform->supported_cstates & PC3 && (pkg_cstate_limit >= PCL__3)) 6657 BIC_PRESENT(BIC_Pkgpc3); 6658 6659 if (platform->supported_cstates & PC6 && (pkg_cstate_limit >= PCL__6)) 6660 BIC_PRESENT(BIC_Pkgpc6); 6661 6662 if (platform->supported_cstates & PC7 && (pkg_cstate_limit >= PCL__7)) 6663 BIC_PRESENT(BIC_Pkgpc7); 6664 6665 if (platform->supported_cstates & PC8 && (pkg_cstate_limit >= PCL__8)) 6666 BIC_PRESENT(BIC_Pkgpc8); 6667 6668 if (platform->supported_cstates & PC9 && (pkg_cstate_limit >= PCL__9)) 6669 BIC_PRESENT(BIC_Pkgpc9); 6670 6671 if (platform->supported_cstates & PC10 && (pkg_cstate_limit >= PCL_10)) 6672 BIC_PRESENT(BIC_Pkgpc10); 6673 6674 if (platform->has_msr_module_c6_res_ms) 6675 BIC_PRESENT(BIC_Mod_c6); 6676 6677 if (platform->has_ext_cst_msrs && !no_msr) { 6678 BIC_PRESENT(BIC_Totl_c0); 6679 BIC_PRESENT(BIC_Any_c0); 6680 BIC_PRESENT(BIC_GFX_c0); 6681 BIC_PRESENT(BIC_CPUGFX); 6682 } 6683 6684 if (quiet) 6685 return; 6686 6687 dump_power_ctl(); 6688 dump_cst_cfg(); 6689 decode_c6_demotion_policy_msr(); 6690 print_dev_latency(); 6691 dump_sysfs_cstate_config(); 6692 print_irtl(); 6693 } 6694 6695 void probe_lpi(void) 6696 { 6697 if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK)) 6698 BIC_PRESENT(BIC_CPU_LPI); 6699 else 6700 BIC_NOT_PRESENT(BIC_CPU_LPI); 6701 6702 if (!access(sys_lpi_file_sysfs, R_OK)) { 6703 sys_lpi_file = sys_lpi_file_sysfs; 6704 BIC_PRESENT(BIC_SYS_LPI); 6705 } else if (!access(sys_lpi_file_debugfs, R_OK)) { 6706 sys_lpi_file = sys_lpi_file_debugfs; 6707 BIC_PRESENT(BIC_SYS_LPI); 6708 } else { 6709 sys_lpi_file_sysfs = NULL; 6710 BIC_NOT_PRESENT(BIC_SYS_LPI); 6711 } 6712 6713 } 6714 6715 void probe_pstates(void) 6716 { 6717 probe_bclk(); 6718 6719 if (quiet) 6720 return; 6721 6722 dump_platform_info(); 6723 dump_turbo_ratio_info(); 6724 dump_sysfs_pstate_config(); 6725 decode_misc_pwr_mgmt_msr(); 6726 6727 for_all_cpus(print_hwp, ODD_COUNTERS); 6728 for_all_cpus(print_epb, ODD_COUNTERS); 6729 for_all_cpus(print_perf_limit, ODD_COUNTERS); 6730 } 6731 6732 void process_cpuid() 6733 { 6734 unsigned int eax, ebx, ecx, edx; 6735 unsigned int fms, family, model, stepping, ecx_flags, edx_flags; 6736 unsigned long long ucode_patch = 0; 6737 bool ucode_patch_valid = false; 6738 6739 eax = ebx = ecx = edx = 0; 6740 6741 __cpuid(0, max_level, ebx, ecx, edx); 6742 6743 if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) 6744 genuine_intel = 1; 6745 else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) 6746 authentic_amd = 1; 6747 else if (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e) 6748 hygon_genuine = 1; 6749 6750 if (!quiet) 6751 fprintf(outf, "CPUID(0): %.4s%.4s%.4s 0x%x CPUID levels\n", 6752 (char *)&ebx, (char *)&edx, (char *)&ecx, max_level); 6753 6754 __cpuid(1, fms, ebx, ecx, edx); 6755 family = (fms >> 8) & 0xf; 6756 model = (fms >> 4) & 0xf; 6757 stepping = fms & 0xf; 6758 if (family == 0xf) 6759 family += (fms >> 20) & 0xff; 6760 if (family >= 6) 6761 model += ((fms >> 16) & 0xf) << 4; 6762 ecx_flags = ecx; 6763 edx_flags = edx; 6764 6765 if (!no_msr) { 6766 if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch)) 6767 warnx("get_msr(UCODE)"); 6768 else 6769 ucode_patch_valid = true; 6770 } 6771 6772 /* 6773 * check max extended function levels of CPUID. 6774 * This is needed to check for invariant TSC. 6775 * This check is valid for both Intel and AMD. 6776 */ 6777 ebx = ecx = edx = 0; 6778 __cpuid(0x80000000, max_extended_level, ebx, ecx, edx); 6779 6780 if (!quiet) { 6781 fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d)", 6782 family, model, stepping, family, model, stepping); 6783 if (ucode_patch_valid) 6784 fprintf(outf, " microcode 0x%x", (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF)); 6785 fputc('\n', outf); 6786 6787 fprintf(outf, "CPUID(0x80000000): max_extended_levels: 0x%x\n", max_extended_level); 6788 fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n", 6789 ecx_flags & (1 << 0) ? "SSE3" : "-", 6790 ecx_flags & (1 << 3) ? "MONITOR" : "-", 6791 ecx_flags & (1 << 6) ? "SMX" : "-", 6792 ecx_flags & (1 << 7) ? "EIST" : "-", 6793 ecx_flags & (1 << 8) ? "TM2" : "-", 6794 edx_flags & (1 << 4) ? "TSC" : "-", 6795 edx_flags & (1 << 5) ? "MSR" : "-", 6796 edx_flags & (1 << 22) ? "ACPI-TM" : "-", 6797 edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-"); 6798 } 6799 6800 probe_platform_features(family, model); 6801 6802 if (!(edx_flags & (1 << 5))) 6803 errx(1, "CPUID: no MSR"); 6804 6805 if (max_extended_level >= 0x80000007) { 6806 6807 /* 6808 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 6809 * this check is valid for both Intel and AMD 6810 */ 6811 __cpuid(0x80000007, eax, ebx, ecx, edx); 6812 has_invariant_tsc = edx & (1 << 8); 6813 } 6814 6815 /* 6816 * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0 6817 * this check is valid for both Intel and AMD 6818 */ 6819 6820 __cpuid(0x6, eax, ebx, ecx, edx); 6821 has_aperf = ecx & (1 << 0); 6822 if (has_aperf && has_amperf_access()) { 6823 BIC_PRESENT(BIC_Avg_MHz); 6824 BIC_PRESENT(BIC_Busy); 6825 BIC_PRESENT(BIC_Bzy_MHz); 6826 BIC_PRESENT(BIC_IPC); 6827 } 6828 do_dts = eax & (1 << 0); 6829 if (do_dts) 6830 BIC_PRESENT(BIC_CoreTmp); 6831 has_turbo = eax & (1 << 1); 6832 do_ptm = eax & (1 << 6); 6833 if (do_ptm) 6834 BIC_PRESENT(BIC_PkgTmp); 6835 has_hwp = eax & (1 << 7); 6836 has_hwp_notify = eax & (1 << 8); 6837 has_hwp_activity_window = eax & (1 << 9); 6838 has_hwp_epp = eax & (1 << 10); 6839 has_hwp_pkg = eax & (1 << 11); 6840 has_epb = ecx & (1 << 3); 6841 6842 if (!quiet) 6843 fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, " 6844 "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n", 6845 has_aperf ? "" : "No-", 6846 has_turbo ? "" : "No-", 6847 do_dts ? "" : "No-", 6848 do_ptm ? "" : "No-", 6849 has_hwp ? "" : "No-", 6850 has_hwp_notify ? "" : "No-", 6851 has_hwp_activity_window ? "" : "No-", 6852 has_hwp_epp ? "" : "No-", has_hwp_pkg ? "" : "No-", has_epb ? "" : "No-"); 6853 6854 if (!quiet) 6855 decode_misc_enable_msr(); 6856 6857 if (max_level >= 0x7 && !quiet) { 6858 int has_sgx; 6859 6860 ecx = 0; 6861 6862 __cpuid_count(0x7, 0, eax, ebx, ecx, edx); 6863 6864 has_sgx = ebx & (1 << 2); 6865 6866 is_hybrid = edx & (1 << 15); 6867 6868 fprintf(outf, "CPUID(7): %sSGX %sHybrid\n", has_sgx ? "" : "No-", is_hybrid ? "" : "No-"); 6869 6870 if (has_sgx) 6871 decode_feature_control_msr(); 6872 } 6873 6874 if (max_level >= 0x15) { 6875 unsigned int eax_crystal; 6876 unsigned int ebx_tsc; 6877 6878 /* 6879 * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz 6880 */ 6881 eax_crystal = ebx_tsc = crystal_hz = edx = 0; 6882 __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx); 6883 6884 if (ebx_tsc != 0) { 6885 if (!quiet && (ebx != 0)) 6886 fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", 6887 eax_crystal, ebx_tsc, crystal_hz); 6888 6889 if (crystal_hz == 0) 6890 crystal_hz = platform->crystal_freq; 6891 6892 if (crystal_hz) { 6893 tsc_hz = (unsigned long long)crystal_hz *ebx_tsc / eax_crystal; 6894 if (!quiet) 6895 fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n", 6896 tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal); 6897 } 6898 } 6899 } 6900 if (max_level >= 0x16) { 6901 unsigned int base_mhz, max_mhz, bus_mhz, edx; 6902 6903 /* 6904 * CPUID 16H Base MHz, Max MHz, Bus MHz 6905 */ 6906 base_mhz = max_mhz = bus_mhz = edx = 0; 6907 6908 __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx); 6909 6910 bclk = bus_mhz; 6911 6912 base_hz = base_mhz * 1000000; 6913 has_base_hz = 1; 6914 6915 if (platform->enable_tsc_tweak) 6916 tsc_tweak = base_hz / tsc_hz; 6917 6918 if (!quiet) 6919 fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n", 6920 base_mhz, max_mhz, bus_mhz); 6921 } 6922 6923 if (has_aperf) 6924 aperf_mperf_multiplier = platform->need_perf_multiplier ? 1024 : 1; 6925 6926 BIC_PRESENT(BIC_IRQ); 6927 BIC_PRESENT(BIC_TSC_MHz); 6928 } 6929 6930 void probe_pm_features(void) 6931 { 6932 probe_pstates(); 6933 6934 probe_cstates(); 6935 6936 probe_lpi(); 6937 6938 probe_intel_uncore_frequency(); 6939 6940 probe_graphics(); 6941 6942 probe_rapl(); 6943 6944 probe_thermal(); 6945 6946 if (platform->has_nhm_msrs && !no_msr) 6947 BIC_PRESENT(BIC_SMI); 6948 6949 if (!quiet) 6950 decode_misc_feature_control(); 6951 } 6952 6953 /* 6954 * in /dev/cpu/ return success for names that are numbers 6955 * ie. filter out ".", "..", "microcode". 6956 */ 6957 int dir_filter(const struct dirent *dirp) 6958 { 6959 if (isdigit(dirp->d_name[0])) 6960 return 1; 6961 else 6962 return 0; 6963 } 6964 6965 void topology_probe(bool startup) 6966 { 6967 int i; 6968 int max_core_id = 0; 6969 int max_package_id = 0; 6970 int max_die_id = 0; 6971 int max_siblings = 0; 6972 6973 /* Initialize num_cpus, max_cpu_num */ 6974 set_max_cpu_num(); 6975 topo.num_cpus = 0; 6976 for_all_proc_cpus(count_cpus); 6977 if (!summary_only && topo.num_cpus > 1) 6978 BIC_PRESENT(BIC_CPU); 6979 6980 if (debug > 1) 6981 fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); 6982 6983 cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology)); 6984 if (cpus == NULL) 6985 err(1, "calloc cpus"); 6986 6987 /* 6988 * Allocate and initialize cpu_present_set 6989 */ 6990 cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1)); 6991 if (cpu_present_set == NULL) 6992 err(3, "CPU_ALLOC"); 6993 cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 6994 CPU_ZERO_S(cpu_present_setsize, cpu_present_set); 6995 for_all_proc_cpus(mark_cpu_present); 6996 6997 /* 6998 * Allocate and initialize cpu_effective_set 6999 */ 7000 cpu_effective_set = CPU_ALLOC((topo.max_cpu_num + 1)); 7001 if (cpu_effective_set == NULL) 7002 err(3, "CPU_ALLOC"); 7003 cpu_effective_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 7004 CPU_ZERO_S(cpu_effective_setsize, cpu_effective_set); 7005 update_effective_set(startup); 7006 7007 /* 7008 * Allocate and initialize cpu_allowed_set 7009 */ 7010 cpu_allowed_set = CPU_ALLOC((topo.max_cpu_num + 1)); 7011 if (cpu_allowed_set == NULL) 7012 err(3, "CPU_ALLOC"); 7013 cpu_allowed_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 7014 CPU_ZERO_S(cpu_allowed_setsize, cpu_allowed_set); 7015 7016 /* 7017 * Validate and update cpu_allowed_set. 7018 * 7019 * Make sure all cpus in cpu_subset are also in cpu_present_set during startup. 7020 * Give a warning when cpus in cpu_subset become unavailable at runtime. 7021 * Give a warning when cpus are not effective because of cgroup setting. 7022 * 7023 * cpu_allowed_set is the intersection of cpu_present_set/cpu_effective_set/cpu_subset. 7024 */ 7025 for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) { 7026 if (cpu_subset && !CPU_ISSET_S(i, cpu_subset_size, cpu_subset)) 7027 continue; 7028 7029 if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set)) { 7030 if (cpu_subset) { 7031 /* cpus in cpu_subset must be in cpu_present_set during startup */ 7032 if (startup) 7033 err(1, "cpu%d not present", i); 7034 else 7035 fprintf(stderr, "cpu%d not present\n", i); 7036 } 7037 continue; 7038 } 7039 7040 if (CPU_COUNT_S(cpu_effective_setsize, cpu_effective_set)) { 7041 if (!CPU_ISSET_S(i, cpu_effective_setsize, cpu_effective_set)) { 7042 fprintf(stderr, "cpu%d not effective\n", i); 7043 continue; 7044 } 7045 } 7046 7047 CPU_SET_S(i, cpu_allowed_setsize, cpu_allowed_set); 7048 } 7049 7050 if (!CPU_COUNT_S(cpu_allowed_setsize, cpu_allowed_set)) 7051 err(-ENODEV, "No valid cpus found"); 7052 sched_setaffinity(0, cpu_allowed_setsize, cpu_allowed_set); 7053 7054 /* 7055 * Allocate and initialize cpu_affinity_set 7056 */ 7057 cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1)); 7058 if (cpu_affinity_set == NULL) 7059 err(3, "CPU_ALLOC"); 7060 cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 7061 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); 7062 7063 for_all_proc_cpus(init_thread_id); 7064 7065 /* 7066 * For online cpus 7067 * find max_core_id, max_package_id 7068 */ 7069 for (i = 0; i <= topo.max_cpu_num; ++i) { 7070 int siblings; 7071 7072 if (cpu_is_not_present(i)) { 7073 if (debug > 1) 7074 fprintf(outf, "cpu%d NOT PRESENT\n", i); 7075 continue; 7076 } 7077 7078 cpus[i].logical_cpu_id = i; 7079 7080 /* get package information */ 7081 cpus[i].physical_package_id = get_physical_package_id(i); 7082 if (cpus[i].physical_package_id > max_package_id) 7083 max_package_id = cpus[i].physical_package_id; 7084 7085 /* get die information */ 7086 cpus[i].die_id = get_die_id(i); 7087 if (cpus[i].die_id > max_die_id) 7088 max_die_id = cpus[i].die_id; 7089 7090 /* get numa node information */ 7091 cpus[i].physical_node_id = get_physical_node_id(&cpus[i]); 7092 if (cpus[i].physical_node_id > topo.max_node_num) 7093 topo.max_node_num = cpus[i].physical_node_id; 7094 7095 /* get core information */ 7096 cpus[i].physical_core_id = get_core_id(i); 7097 if (cpus[i].physical_core_id > max_core_id) 7098 max_core_id = cpus[i].physical_core_id; 7099 7100 /* get thread information */ 7101 siblings = get_thread_siblings(&cpus[i]); 7102 if (siblings > max_siblings) 7103 max_siblings = siblings; 7104 if (cpus[i].thread_id == 0) 7105 topo.num_cores++; 7106 } 7107 7108 topo.cores_per_node = max_core_id + 1; 7109 if (debug > 1) 7110 fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", max_core_id, topo.cores_per_node); 7111 if (!summary_only && topo.cores_per_node > 1) 7112 BIC_PRESENT(BIC_Core); 7113 7114 topo.num_die = max_die_id + 1; 7115 if (debug > 1) 7116 fprintf(outf, "max_die_id %d, sizing for %d die\n", max_die_id, topo.num_die); 7117 if (!summary_only && topo.num_die > 1) 7118 BIC_PRESENT(BIC_Die); 7119 7120 topo.num_packages = max_package_id + 1; 7121 if (debug > 1) 7122 fprintf(outf, "max_package_id %d, sizing for %d packages\n", max_package_id, topo.num_packages); 7123 if (!summary_only && topo.num_packages > 1) 7124 BIC_PRESENT(BIC_Package); 7125 7126 set_node_data(); 7127 if (debug > 1) 7128 fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg); 7129 if (!summary_only && topo.nodes_per_pkg > 1) 7130 BIC_PRESENT(BIC_Node); 7131 7132 topo.threads_per_core = max_siblings; 7133 if (debug > 1) 7134 fprintf(outf, "max_siblings %d\n", max_siblings); 7135 7136 if (debug < 1) 7137 return; 7138 7139 for (i = 0; i <= topo.max_cpu_num; ++i) { 7140 if (cpu_is_not_present(i)) 7141 continue; 7142 fprintf(outf, 7143 "cpu %d pkg %d die %d node %d lnode %d core %d thread %d\n", 7144 i, cpus[i].physical_package_id, cpus[i].die_id, 7145 cpus[i].physical_node_id, cpus[i].logical_node_id, cpus[i].physical_core_id, cpus[i].thread_id); 7146 } 7147 7148 } 7149 7150 void allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p) 7151 { 7152 int i; 7153 int num_cores = topo.cores_per_node * topo.nodes_per_pkg * topo.num_packages; 7154 int num_threads = topo.threads_per_core * num_cores; 7155 7156 *t = calloc(num_threads, sizeof(struct thread_data)); 7157 if (*t == NULL) 7158 goto error; 7159 7160 for (i = 0; i < num_threads; i++) 7161 (*t)[i].cpu_id = -1; 7162 7163 *c = calloc(num_cores, sizeof(struct core_data)); 7164 if (*c == NULL) 7165 goto error; 7166 7167 for (i = 0; i < num_cores; i++) { 7168 (*c)[i].core_id = -1; 7169 (*c)[i].base_cpu = -1; 7170 } 7171 7172 *p = calloc(topo.num_packages, sizeof(struct pkg_data)); 7173 if (*p == NULL) 7174 goto error; 7175 7176 for (i = 0; i < topo.num_packages; i++) { 7177 (*p)[i].package_id = i; 7178 (*p)[i].base_cpu = -1; 7179 } 7180 7181 return; 7182 error: 7183 err(1, "calloc counters"); 7184 } 7185 7186 /* 7187 * init_counter() 7188 * 7189 * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE 7190 */ 7191 void init_counter(struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base, int cpu_id) 7192 { 7193 int pkg_id = cpus[cpu_id].physical_package_id; 7194 int node_id = cpus[cpu_id].logical_node_id; 7195 int core_id = cpus[cpu_id].physical_core_id; 7196 int thread_id = cpus[cpu_id].thread_id; 7197 struct thread_data *t; 7198 struct core_data *c; 7199 struct pkg_data *p; 7200 7201 /* Workaround for systems where physical_node_id==-1 7202 * and logical_node_id==(-1 - topo.num_cpus) 7203 */ 7204 if (node_id < 0) 7205 node_id = 0; 7206 7207 t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id); 7208 c = GET_CORE(core_base, core_id, node_id, pkg_id); 7209 p = GET_PKG(pkg_base, pkg_id); 7210 7211 t->cpu_id = cpu_id; 7212 if (!cpu_is_not_allowed(cpu_id)) { 7213 if (c->base_cpu < 0) 7214 c->base_cpu = t->cpu_id; 7215 if (p->base_cpu < 0) 7216 p->base_cpu = t->cpu_id; 7217 } 7218 7219 c->core_id = core_id; 7220 p->package_id = pkg_id; 7221 } 7222 7223 int initialize_counters(int cpu_id) 7224 { 7225 init_counter(EVEN_COUNTERS, cpu_id); 7226 init_counter(ODD_COUNTERS, cpu_id); 7227 return 0; 7228 } 7229 7230 void allocate_output_buffer() 7231 { 7232 output_buffer = calloc(1, (1 + topo.num_cpus) * 2048); 7233 outp = output_buffer; 7234 if (outp == NULL) 7235 err(-1, "calloc output buffer"); 7236 } 7237 7238 void allocate_fd_percpu(void) 7239 { 7240 fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int)); 7241 if (fd_percpu == NULL) 7242 err(-1, "calloc fd_percpu"); 7243 } 7244 7245 void allocate_irq_buffers(void) 7246 { 7247 irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int)); 7248 if (irq_column_2_cpu == NULL) 7249 err(-1, "calloc %d", topo.num_cpus); 7250 7251 irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int)); 7252 if (irqs_per_cpu == NULL) 7253 err(-1, "calloc %d", topo.max_cpu_num + 1); 7254 } 7255 7256 int update_topo(struct thread_data *t, struct core_data *c, struct pkg_data *p) 7257 { 7258 topo.allowed_cpus++; 7259 if ((int)t->cpu_id == c->base_cpu) 7260 topo.allowed_cores++; 7261 if ((int)t->cpu_id == p->base_cpu) 7262 topo.allowed_packages++; 7263 7264 return 0; 7265 } 7266 7267 void topology_update(void) 7268 { 7269 topo.allowed_cpus = 0; 7270 topo.allowed_cores = 0; 7271 topo.allowed_packages = 0; 7272 for_all_cpus(update_topo, ODD_COUNTERS); 7273 } 7274 7275 void setup_all_buffers(bool startup) 7276 { 7277 topology_probe(startup); 7278 allocate_irq_buffers(); 7279 allocate_fd_percpu(); 7280 allocate_counters(&thread_even, &core_even, &package_even); 7281 allocate_counters(&thread_odd, &core_odd, &package_odd); 7282 allocate_output_buffer(); 7283 for_all_proc_cpus(initialize_counters); 7284 topology_update(); 7285 } 7286 7287 void set_base_cpu(void) 7288 { 7289 int i; 7290 7291 for (i = 0; i < topo.max_cpu_num + 1; ++i) { 7292 if (cpu_is_not_allowed(i)) 7293 continue; 7294 base_cpu = i; 7295 if (debug > 1) 7296 fprintf(outf, "base_cpu = %d\n", base_cpu); 7297 return; 7298 } 7299 err(-ENODEV, "No valid cpus found"); 7300 } 7301 7302 static void set_amperf_source(void) 7303 { 7304 amperf_source = AMPERF_SOURCE_PERF; 7305 7306 const bool aperf_required = is_aperf_access_required(); 7307 7308 if (no_perf || !aperf_required || !has_amperf_access_via_perf()) 7309 amperf_source = AMPERF_SOURCE_MSR; 7310 7311 if (quiet || !debug) 7312 return; 7313 7314 fprintf(outf, "aperf/mperf source preference: %s\n", amperf_source == AMPERF_SOURCE_MSR ? "msr" : "perf"); 7315 } 7316 7317 bool has_added_counters(void) 7318 { 7319 /* 7320 * It only makes sense to call this after the command line is parsed, 7321 * otherwise sys structure is not populated. 7322 */ 7323 7324 return sys.added_core_counters | sys.added_thread_counters | sys.added_package_counters; 7325 } 7326 7327 bool is_msr_access_required(void) 7328 { 7329 if (no_msr) 7330 return false; 7331 7332 if (has_added_counters()) 7333 return true; 7334 7335 return BIC_IS_ENABLED(BIC_SMI) 7336 || BIC_IS_ENABLED(BIC_CPU_c1) 7337 || BIC_IS_ENABLED(BIC_CPU_c3) 7338 || BIC_IS_ENABLED(BIC_CPU_c6) 7339 || BIC_IS_ENABLED(BIC_CPU_c7) 7340 || BIC_IS_ENABLED(BIC_Mod_c6) 7341 || BIC_IS_ENABLED(BIC_CoreTmp) 7342 || BIC_IS_ENABLED(BIC_Totl_c0) 7343 || BIC_IS_ENABLED(BIC_Any_c0) 7344 || BIC_IS_ENABLED(BIC_GFX_c0) 7345 || BIC_IS_ENABLED(BIC_CPUGFX) 7346 || BIC_IS_ENABLED(BIC_Pkgpc3) 7347 || BIC_IS_ENABLED(BIC_Pkgpc6) 7348 || BIC_IS_ENABLED(BIC_Pkgpc2) 7349 || BIC_IS_ENABLED(BIC_Pkgpc7) 7350 || BIC_IS_ENABLED(BIC_Pkgpc8) 7351 || BIC_IS_ENABLED(BIC_Pkgpc9) 7352 || BIC_IS_ENABLED(BIC_Pkgpc10) 7353 /* TODO: Multiplex access with perf */ 7354 || BIC_IS_ENABLED(BIC_CorWatt) 7355 || BIC_IS_ENABLED(BIC_Cor_J) 7356 || BIC_IS_ENABLED(BIC_PkgWatt) 7357 || BIC_IS_ENABLED(BIC_CorWatt) 7358 || BIC_IS_ENABLED(BIC_GFXWatt) 7359 || BIC_IS_ENABLED(BIC_RAMWatt) 7360 || BIC_IS_ENABLED(BIC_Pkg_J) 7361 || BIC_IS_ENABLED(BIC_Cor_J) 7362 || BIC_IS_ENABLED(BIC_GFX_J) 7363 || BIC_IS_ENABLED(BIC_RAM_J) 7364 || BIC_IS_ENABLED(BIC_PKG__) 7365 || BIC_IS_ENABLED(BIC_RAM__) 7366 || BIC_IS_ENABLED(BIC_PkgTmp) 7367 || (is_aperf_access_required() && !has_amperf_access_via_perf()); 7368 } 7369 7370 void check_msr_access(void) 7371 { 7372 if (!is_msr_access_required()) 7373 no_msr = 1; 7374 7375 check_dev_msr(); 7376 check_msr_permission(); 7377 7378 if (no_msr) 7379 bic_disable_msr_access(); 7380 } 7381 7382 void check_perf_access(void) 7383 { 7384 const bool intrcount_required = BIC_IS_ENABLED(BIC_IPC); 7385 7386 if (no_perf || !intrcount_required || !has_instr_count_access()) 7387 bic_enabled &= ~BIC_IPC; 7388 7389 const bool aperf_required = is_aperf_access_required(); 7390 7391 if (!aperf_required || !has_amperf_access()) { 7392 bic_enabled &= ~BIC_Avg_MHz; 7393 bic_enabled &= ~BIC_Busy; 7394 bic_enabled &= ~BIC_Bzy_MHz; 7395 bic_enabled &= ~BIC_IPC; 7396 } 7397 } 7398 7399 void turbostat_init() 7400 { 7401 setup_all_buffers(true); 7402 set_base_cpu(); 7403 check_msr_access(); 7404 check_perf_access(); 7405 process_cpuid(); 7406 probe_pm_features(); 7407 set_amperf_source(); 7408 linux_perf_init(); 7409 rapl_perf_init(); 7410 7411 for_all_cpus(get_cpu_type, ODD_COUNTERS); 7412 for_all_cpus(get_cpu_type, EVEN_COUNTERS); 7413 7414 if (DO_BIC(BIC_IPC)) 7415 (void)get_instr_count_fd(base_cpu); 7416 7417 /* 7418 * If TSC tweak is needed, but couldn't get it, 7419 * disable more BICs, since it can't be reported accurately. 7420 */ 7421 if (platform->enable_tsc_tweak && !has_base_hz) { 7422 bic_enabled &= ~BIC_Busy; 7423 bic_enabled &= ~BIC_Bzy_MHz; 7424 } 7425 } 7426 7427 int fork_it(char **argv) 7428 { 7429 pid_t child_pid; 7430 int status; 7431 7432 snapshot_proc_sysfs_files(); 7433 status = for_all_cpus(get_counters, EVEN_COUNTERS); 7434 first_counter_read = 0; 7435 if (status) 7436 exit(status); 7437 gettimeofday(&tv_even, (struct timezone *)NULL); 7438 7439 child_pid = fork(); 7440 if (!child_pid) { 7441 /* child */ 7442 execvp(argv[0], argv); 7443 err(errno, "exec %s", argv[0]); 7444 } else { 7445 7446 /* parent */ 7447 if (child_pid == -1) 7448 err(1, "fork"); 7449 7450 signal(SIGINT, SIG_IGN); 7451 signal(SIGQUIT, SIG_IGN); 7452 if (waitpid(child_pid, &status, 0) == -1) 7453 err(status, "waitpid"); 7454 7455 if (WIFEXITED(status)) 7456 status = WEXITSTATUS(status); 7457 } 7458 /* 7459 * n.b. fork_it() does not check for errors from for_all_cpus() 7460 * because re-starting is problematic when forking 7461 */ 7462 snapshot_proc_sysfs_files(); 7463 for_all_cpus(get_counters, ODD_COUNTERS); 7464 gettimeofday(&tv_odd, (struct timezone *)NULL); 7465 timersub(&tv_odd, &tv_even, &tv_delta); 7466 if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) 7467 fprintf(outf, "%s: Counter reset detected\n", progname); 7468 else { 7469 compute_average(EVEN_COUNTERS); 7470 format_all_counters(EVEN_COUNTERS); 7471 } 7472 7473 fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec / 1000000.0); 7474 7475 flush_output_stderr(); 7476 7477 return status; 7478 } 7479 7480 int get_and_dump_counters(void) 7481 { 7482 int status; 7483 7484 snapshot_proc_sysfs_files(); 7485 status = for_all_cpus(get_counters, ODD_COUNTERS); 7486 if (status) 7487 return status; 7488 7489 status = for_all_cpus(dump_counters, ODD_COUNTERS); 7490 if (status) 7491 return status; 7492 7493 flush_output_stdout(); 7494 7495 return status; 7496 } 7497 7498 void print_version() 7499 { 7500 fprintf(outf, "turbostat version 2024.04.08 - Len Brown <lenb@kernel.org>\n"); 7501 } 7502 7503 #define COMMAND_LINE_SIZE 2048 7504 7505 void print_bootcmd(void) 7506 { 7507 char bootcmd[COMMAND_LINE_SIZE]; 7508 FILE *fp; 7509 int ret; 7510 7511 memset(bootcmd, 0, COMMAND_LINE_SIZE); 7512 fp = fopen("/proc/cmdline", "r"); 7513 if (!fp) 7514 return; 7515 7516 ret = fread(bootcmd, sizeof(char), COMMAND_LINE_SIZE - 1, fp); 7517 if (ret) { 7518 bootcmd[ret] = '\0'; 7519 /* the last character is already '\n' */ 7520 fprintf(outf, "Kernel command line: %s", bootcmd); 7521 } 7522 7523 fclose(fp); 7524 } 7525 7526 int add_counter(unsigned int msr_num, char *path, char *name, 7527 unsigned int width, enum counter_scope scope, 7528 enum counter_type type, enum counter_format format, int flags) 7529 { 7530 struct msr_counter *msrp; 7531 7532 if (no_msr && msr_num) 7533 errx(1, "Requested MSR counter 0x%x, but in --no-msr mode", msr_num); 7534 7535 msrp = calloc(1, sizeof(struct msr_counter)); 7536 if (msrp == NULL) { 7537 perror("calloc"); 7538 exit(1); 7539 } 7540 7541 msrp->msr_num = msr_num; 7542 strncpy(msrp->name, name, NAME_BYTES - 1); 7543 if (path) 7544 strncpy(msrp->path, path, PATH_BYTES - 1); 7545 msrp->width = width; 7546 msrp->type = type; 7547 msrp->format = format; 7548 msrp->flags = flags; 7549 7550 switch (scope) { 7551 7552 case SCOPE_CPU: 7553 msrp->next = sys.tp; 7554 sys.tp = msrp; 7555 sys.added_thread_counters++; 7556 if (sys.added_thread_counters > MAX_ADDED_THREAD_COUNTERS) { 7557 fprintf(stderr, "exceeded max %d added thread counters\n", MAX_ADDED_COUNTERS); 7558 exit(-1); 7559 } 7560 break; 7561 7562 case SCOPE_CORE: 7563 msrp->next = sys.cp; 7564 sys.cp = msrp; 7565 sys.added_core_counters++; 7566 if (sys.added_core_counters > MAX_ADDED_COUNTERS) { 7567 fprintf(stderr, "exceeded max %d added core counters\n", MAX_ADDED_COUNTERS); 7568 exit(-1); 7569 } 7570 break; 7571 7572 case SCOPE_PACKAGE: 7573 msrp->next = sys.pp; 7574 sys.pp = msrp; 7575 sys.added_package_counters++; 7576 if (sys.added_package_counters > MAX_ADDED_COUNTERS) { 7577 fprintf(stderr, "exceeded max %d added package counters\n", MAX_ADDED_COUNTERS); 7578 exit(-1); 7579 } 7580 break; 7581 } 7582 7583 return 0; 7584 } 7585 7586 void parse_add_command(char *add_command) 7587 { 7588 int msr_num = 0; 7589 char *path = NULL; 7590 char name_buffer[NAME_BYTES] = ""; 7591 int width = 64; 7592 int fail = 0; 7593 enum counter_scope scope = SCOPE_CPU; 7594 enum counter_type type = COUNTER_CYCLES; 7595 enum counter_format format = FORMAT_DELTA; 7596 7597 while (add_command) { 7598 7599 if (sscanf(add_command, "msr0x%x", &msr_num) == 1) 7600 goto next; 7601 7602 if (sscanf(add_command, "msr%d", &msr_num) == 1) 7603 goto next; 7604 7605 if (*add_command == '/') { 7606 path = add_command; 7607 goto next; 7608 } 7609 7610 if (sscanf(add_command, "u%d", &width) == 1) { 7611 if ((width == 32) || (width == 64)) 7612 goto next; 7613 width = 64; 7614 } 7615 if (!strncmp(add_command, "cpu", strlen("cpu"))) { 7616 scope = SCOPE_CPU; 7617 goto next; 7618 } 7619 if (!strncmp(add_command, "core", strlen("core"))) { 7620 scope = SCOPE_CORE; 7621 goto next; 7622 } 7623 if (!strncmp(add_command, "package", strlen("package"))) { 7624 scope = SCOPE_PACKAGE; 7625 goto next; 7626 } 7627 if (!strncmp(add_command, "cycles", strlen("cycles"))) { 7628 type = COUNTER_CYCLES; 7629 goto next; 7630 } 7631 if (!strncmp(add_command, "seconds", strlen("seconds"))) { 7632 type = COUNTER_SECONDS; 7633 goto next; 7634 } 7635 if (!strncmp(add_command, "usec", strlen("usec"))) { 7636 type = COUNTER_USEC; 7637 goto next; 7638 } 7639 if (!strncmp(add_command, "raw", strlen("raw"))) { 7640 format = FORMAT_RAW; 7641 goto next; 7642 } 7643 if (!strncmp(add_command, "delta", strlen("delta"))) { 7644 format = FORMAT_DELTA; 7645 goto next; 7646 } 7647 if (!strncmp(add_command, "percent", strlen("percent"))) { 7648 format = FORMAT_PERCENT; 7649 goto next; 7650 } 7651 7652 if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) { /* 18 < NAME_BYTES */ 7653 char *eos; 7654 7655 eos = strchr(name_buffer, ','); 7656 if (eos) 7657 *eos = '\0'; 7658 goto next; 7659 } 7660 7661 next: 7662 add_command = strchr(add_command, ','); 7663 if (add_command) { 7664 *add_command = '\0'; 7665 add_command++; 7666 } 7667 7668 } 7669 if ((msr_num == 0) && (path == NULL)) { 7670 fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter ) required\n"); 7671 fail++; 7672 } 7673 7674 /* generate default column header */ 7675 if (*name_buffer == '\0') { 7676 if (width == 32) 7677 sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); 7678 else 7679 sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); 7680 } 7681 7682 if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0)) 7683 fail++; 7684 7685 if (fail) { 7686 help(); 7687 exit(1); 7688 } 7689 } 7690 7691 int is_deferred_add(char *name) 7692 { 7693 int i; 7694 7695 for (i = 0; i < deferred_add_index; ++i) 7696 if (!strcmp(name, deferred_add_names[i])) 7697 return 1; 7698 return 0; 7699 } 7700 7701 int is_deferred_skip(char *name) 7702 { 7703 int i; 7704 7705 for (i = 0; i < deferred_skip_index; ++i) 7706 if (!strcmp(name, deferred_skip_names[i])) 7707 return 1; 7708 return 0; 7709 } 7710 7711 void probe_sysfs(void) 7712 { 7713 char path[64]; 7714 char name_buf[16]; 7715 FILE *input; 7716 int state; 7717 char *sp; 7718 7719 for (state = 10; state >= 0; --state) { 7720 7721 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); 7722 input = fopen(path, "r"); 7723 if (input == NULL) 7724 continue; 7725 if (!fgets(name_buf, sizeof(name_buf), input)) 7726 err(1, "%s: failed to read file", path); 7727 7728 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ 7729 sp = strchr(name_buf, '-'); 7730 if (!sp) 7731 sp = strchrnul(name_buf, '\n'); 7732 *sp = '%'; 7733 *(sp + 1) = '\0'; 7734 7735 remove_underbar(name_buf); 7736 7737 fclose(input); 7738 7739 sprintf(path, "cpuidle/state%d/time", state); 7740 7741 if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf)) 7742 continue; 7743 7744 if (is_deferred_skip(name_buf)) 7745 continue; 7746 7747 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU); 7748 } 7749 7750 for (state = 10; state >= 0; --state) { 7751 7752 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); 7753 input = fopen(path, "r"); 7754 if (input == NULL) 7755 continue; 7756 if (!fgets(name_buf, sizeof(name_buf), input)) 7757 err(1, "%s: failed to read file", path); 7758 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ 7759 sp = strchr(name_buf, '-'); 7760 if (!sp) 7761 sp = strchrnul(name_buf, '\n'); 7762 *sp = '\0'; 7763 fclose(input); 7764 7765 remove_underbar(name_buf); 7766 7767 sprintf(path, "cpuidle/state%d/usage", state); 7768 7769 if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf)) 7770 continue; 7771 7772 if (is_deferred_skip(name_buf)) 7773 continue; 7774 7775 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU); 7776 } 7777 7778 } 7779 7780 /* 7781 * parse cpuset with following syntax 7782 * 1,2,4..6,8-10 and set bits in cpu_subset 7783 */ 7784 void parse_cpu_command(char *optarg) 7785 { 7786 if (!strcmp(optarg, "core")) { 7787 if (cpu_subset) 7788 goto error; 7789 show_core_only++; 7790 return; 7791 } 7792 if (!strcmp(optarg, "package")) { 7793 if (cpu_subset) 7794 goto error; 7795 show_pkg_only++; 7796 return; 7797 } 7798 if (show_core_only || show_pkg_only) 7799 goto error; 7800 7801 cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS); 7802 if (cpu_subset == NULL) 7803 err(3, "CPU_ALLOC"); 7804 cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS); 7805 7806 CPU_ZERO_S(cpu_subset_size, cpu_subset); 7807 7808 if (parse_cpu_str(optarg, cpu_subset, cpu_subset_size)) 7809 goto error; 7810 7811 return; 7812 7813 error: 7814 fprintf(stderr, "\"--cpu %s\" malformed\n", optarg); 7815 help(); 7816 exit(-1); 7817 } 7818 7819 void cmdline(int argc, char **argv) 7820 { 7821 int opt; 7822 int option_index = 0; 7823 static struct option long_options[] = { 7824 { "add", required_argument, 0, 'a' }, 7825 { "cpu", required_argument, 0, 'c' }, 7826 { "Dump", no_argument, 0, 'D' }, 7827 { "debug", no_argument, 0, 'd' }, /* internal, not documented */ 7828 { "enable", required_argument, 0, 'e' }, 7829 { "interval", required_argument, 0, 'i' }, 7830 { "IPC", no_argument, 0, 'I' }, 7831 { "num_iterations", required_argument, 0, 'n' }, 7832 { "header_iterations", required_argument, 0, 'N' }, 7833 { "help", no_argument, 0, 'h' }, 7834 { "hide", required_argument, 0, 'H' }, // meh, -h taken by --help 7835 { "Joules", no_argument, 0, 'J' }, 7836 { "list", no_argument, 0, 'l' }, 7837 { "out", required_argument, 0, 'o' }, 7838 { "quiet", no_argument, 0, 'q' }, 7839 { "no-msr", no_argument, 0, 'M' }, 7840 { "no-perf", no_argument, 0, 'P' }, 7841 { "show", required_argument, 0, 's' }, 7842 { "Summary", no_argument, 0, 'S' }, 7843 { "TCC", required_argument, 0, 'T' }, 7844 { "version", no_argument, 0, 'v' }, 7845 { 0, 0, 0, 0 } 7846 }; 7847 7848 progname = argv[0]; 7849 7850 /* 7851 * Parse some options early, because they may make other options invalid, 7852 * like adding the MSR counter with --add and at the same time using --no-msr. 7853 */ 7854 while ((opt = getopt_long_only(argc, argv, "MP", long_options, &option_index)) != -1) { 7855 switch (opt) { 7856 case 'M': 7857 no_msr = 1; 7858 break; 7859 case 'P': 7860 no_perf = 1; 7861 break; 7862 default: 7863 break; 7864 } 7865 } 7866 optind = 0; 7867 7868 while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qMST:v", long_options, &option_index)) != -1) { 7869 switch (opt) { 7870 case 'a': 7871 parse_add_command(optarg); 7872 break; 7873 case 'c': 7874 parse_cpu_command(optarg); 7875 break; 7876 case 'D': 7877 dump_only++; 7878 break; 7879 case 'e': 7880 /* --enable specified counter */ 7881 bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST); 7882 break; 7883 case 'd': 7884 debug++; 7885 ENABLE_BIC(BIC_DISABLED_BY_DEFAULT); 7886 break; 7887 case 'H': 7888 /* 7889 * --hide: do not show those specified 7890 * multiple invocations simply clear more bits in enabled mask 7891 */ 7892 bic_enabled &= ~bic_lookup(optarg, HIDE_LIST); 7893 break; 7894 case 'h': 7895 default: 7896 help(); 7897 exit(1); 7898 case 'i': 7899 { 7900 double interval = strtod(optarg, NULL); 7901 7902 if (interval < 0.001) { 7903 fprintf(outf, "interval %f seconds is too small\n", interval); 7904 exit(2); 7905 } 7906 7907 interval_tv.tv_sec = interval_ts.tv_sec = interval; 7908 interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000; 7909 interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000; 7910 } 7911 break; 7912 case 'J': 7913 rapl_joules++; 7914 break; 7915 case 'l': 7916 ENABLE_BIC(BIC_DISABLED_BY_DEFAULT); 7917 list_header_only++; 7918 quiet++; 7919 break; 7920 case 'o': 7921 outf = fopen_or_die(optarg, "w"); 7922 break; 7923 case 'q': 7924 quiet = 1; 7925 break; 7926 case 'M': 7927 case 'P': 7928 /* Parsed earlier */ 7929 break; 7930 case 'n': 7931 num_iterations = strtod(optarg, NULL); 7932 7933 if (num_iterations <= 0) { 7934 fprintf(outf, "iterations %d should be positive number\n", num_iterations); 7935 exit(2); 7936 } 7937 break; 7938 case 'N': 7939 header_iterations = strtod(optarg, NULL); 7940 7941 if (header_iterations <= 0) { 7942 fprintf(outf, "iterations %d should be positive number\n", header_iterations); 7943 exit(2); 7944 } 7945 break; 7946 case 's': 7947 /* 7948 * --show: show only those specified 7949 * The 1st invocation will clear and replace the enabled mask 7950 * subsequent invocations can add to it. 7951 */ 7952 if (shown == 0) 7953 bic_enabled = bic_lookup(optarg, SHOW_LIST); 7954 else 7955 bic_enabled |= bic_lookup(optarg, SHOW_LIST); 7956 shown = 1; 7957 break; 7958 case 'S': 7959 summary_only++; 7960 break; 7961 case 'T': 7962 tj_max_override = atoi(optarg); 7963 break; 7964 case 'v': 7965 print_version(); 7966 exit(0); 7967 break; 7968 } 7969 } 7970 } 7971 7972 void set_rlimit(void) 7973 { 7974 struct rlimit limit; 7975 7976 if (getrlimit(RLIMIT_NOFILE, &limit) < 0) 7977 err(1, "Failed to get rlimit"); 7978 7979 if (limit.rlim_max < MAX_NOFILE) 7980 limit.rlim_max = MAX_NOFILE; 7981 if (limit.rlim_cur < MAX_NOFILE) 7982 limit.rlim_cur = MAX_NOFILE; 7983 7984 if (setrlimit(RLIMIT_NOFILE, &limit) < 0) 7985 err(1, "Failed to set rlimit"); 7986 } 7987 7988 int main(int argc, char **argv) 7989 { 7990 int fd, ret; 7991 7992 fd = open("/sys/fs/cgroup/cgroup.procs", O_WRONLY); 7993 if (fd < 0) 7994 goto skip_cgroup_setting; 7995 7996 ret = write(fd, "0\n", 2); 7997 if (ret == -1) 7998 perror("Can't update cgroup\n"); 7999 8000 close(fd); 8001 8002 skip_cgroup_setting: 8003 outf = stderr; 8004 cmdline(argc, argv); 8005 8006 if (!quiet) { 8007 print_version(); 8008 print_bootcmd(); 8009 } 8010 8011 probe_sysfs(); 8012 8013 if (!getuid()) 8014 set_rlimit(); 8015 8016 turbostat_init(); 8017 8018 if (!no_msr) 8019 msr_sum_record(); 8020 8021 /* dump counters and exit */ 8022 if (dump_only) 8023 return get_and_dump_counters(); 8024 8025 /* list header and exit */ 8026 if (list_header_only) { 8027 print_header(","); 8028 flush_output_stdout(); 8029 return 0; 8030 } 8031 8032 /* 8033 * if any params left, it must be a command to fork 8034 */ 8035 if (argc - optind) 8036 return fork_it(argv + optind); 8037 else 8038 turbostat_loop(); 8039 8040 return 0; 8041 } 8042