1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * turbostat -- show CPU frequency and C-state residency 4 * on modern Intel and AMD processors. 5 * 6 * Copyright (c) 2024 Intel Corporation. 7 * Len Brown <len.brown@intel.com> 8 */ 9 10 #define _GNU_SOURCE 11 #include MSRHEADER 12 #include INTEL_FAMILY_HEADER 13 #include <stdarg.h> 14 #include <stdio.h> 15 #include <err.h> 16 #include <unistd.h> 17 #include <sys/types.h> 18 #include <sys/wait.h> 19 #include <sys/stat.h> 20 #include <sys/select.h> 21 #include <sys/resource.h> 22 #include <fcntl.h> 23 #include <signal.h> 24 #include <sys/time.h> 25 #include <stdlib.h> 26 #include <getopt.h> 27 #include <dirent.h> 28 #include <string.h> 29 #include <ctype.h> 30 #include <sched.h> 31 #include <time.h> 32 #include <cpuid.h> 33 #include <sys/capability.h> 34 #include <errno.h> 35 #include <math.h> 36 #include <linux/perf_event.h> 37 #include <asm/unistd.h> 38 #include <stdbool.h> 39 #include <assert.h> 40 #include <linux/kernel.h> 41 #include <linux/build_bug.h> 42 43 #define UNUSED(x) (void)(x) 44 45 /* 46 * This list matches the column headers, except 47 * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time 48 * 2. Core and CPU are moved to the end, we can't have strings that contain them 49 * matching on them for --show and --hide. 50 */ 51 52 /* 53 * buffer size used by sscanf() for added column names 54 * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters 55 */ 56 #define NAME_BYTES 20 57 #define PATH_BYTES 128 58 59 #define MAX_NOFILE 0x8000 60 61 enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE }; 62 enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC, COUNTER_K2M }; 63 enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT, FORMAT_AVERAGE }; 64 enum amperf_source { AMPERF_SOURCE_PERF, AMPERF_SOURCE_MSR }; 65 enum rapl_source { RAPL_SOURCE_NONE, RAPL_SOURCE_PERF, RAPL_SOURCE_MSR }; 66 enum cstate_source { CSTATE_SOURCE_NONE, CSTATE_SOURCE_PERF, CSTATE_SOURCE_MSR }; 67 68 struct sysfs_path { 69 char path[PATH_BYTES]; 70 int id; 71 struct sysfs_path *next; 72 }; 73 74 struct msr_counter { 75 unsigned int msr_num; 76 char name[NAME_BYTES]; 77 struct sysfs_path *sp; 78 unsigned int width; 79 enum counter_type type; 80 enum counter_format format; 81 struct msr_counter *next; 82 unsigned int flags; 83 #define FLAGS_HIDE (1 << 0) 84 #define FLAGS_SHOW (1 << 1) 85 #define SYSFS_PERCPU (1 << 1) 86 }; 87 88 struct msr_counter bic[] = { 89 { 0x0, "usec", NULL, 0, 0, 0, NULL, 0 }, 90 { 0x0, "Time_Of_Day_Seconds", NULL, 0, 0, 0, NULL, 0 }, 91 { 0x0, "Package", NULL, 0, 0, 0, NULL, 0 }, 92 { 0x0, "Node", NULL, 0, 0, 0, NULL, 0 }, 93 { 0x0, "Avg_MHz", NULL, 0, 0, 0, NULL, 0 }, 94 { 0x0, "Busy%", NULL, 0, 0, 0, NULL, 0 }, 95 { 0x0, "Bzy_MHz", NULL, 0, 0, 0, NULL, 0 }, 96 { 0x0, "TSC_MHz", NULL, 0, 0, 0, NULL, 0 }, 97 { 0x0, "IRQ", NULL, 0, 0, 0, NULL, 0 }, 98 { 0x0, "SMI", NULL, 32, 0, FORMAT_DELTA, NULL, 0 }, 99 { 0x0, "sysfs", NULL, 0, 0, 0, NULL, 0 }, 100 { 0x0, "CPU%c1", NULL, 0, 0, 0, NULL, 0 }, 101 { 0x0, "CPU%c3", NULL, 0, 0, 0, NULL, 0 }, 102 { 0x0, "CPU%c6", NULL, 0, 0, 0, NULL, 0 }, 103 { 0x0, "CPU%c7", NULL, 0, 0, 0, NULL, 0 }, 104 { 0x0, "ThreadC", NULL, 0, 0, 0, NULL, 0 }, 105 { 0x0, "CoreTmp", NULL, 0, 0, 0, NULL, 0 }, 106 { 0x0, "CoreCnt", NULL, 0, 0, 0, NULL, 0 }, 107 { 0x0, "PkgTmp", NULL, 0, 0, 0, NULL, 0 }, 108 { 0x0, "GFX%rc6", NULL, 0, 0, 0, NULL, 0 }, 109 { 0x0, "GFXMHz", NULL, 0, 0, 0, NULL, 0 }, 110 { 0x0, "Pkg%pc2", NULL, 0, 0, 0, NULL, 0 }, 111 { 0x0, "Pkg%pc3", NULL, 0, 0, 0, NULL, 0 }, 112 { 0x0, "Pkg%pc6", NULL, 0, 0, 0, NULL, 0 }, 113 { 0x0, "Pkg%pc7", NULL, 0, 0, 0, NULL, 0 }, 114 { 0x0, "Pkg%pc8", NULL, 0, 0, 0, NULL, 0 }, 115 { 0x0, "Pkg%pc9", NULL, 0, 0, 0, NULL, 0 }, 116 { 0x0, "Pk%pc10", NULL, 0, 0, 0, NULL, 0 }, 117 { 0x0, "CPU%LPI", NULL, 0, 0, 0, NULL, 0 }, 118 { 0x0, "SYS%LPI", NULL, 0, 0, 0, NULL, 0 }, 119 { 0x0, "PkgWatt", NULL, 0, 0, 0, NULL, 0 }, 120 { 0x0, "CorWatt", NULL, 0, 0, 0, NULL, 0 }, 121 { 0x0, "GFXWatt", NULL, 0, 0, 0, NULL, 0 }, 122 { 0x0, "PkgCnt", NULL, 0, 0, 0, NULL, 0 }, 123 { 0x0, "RAMWatt", NULL, 0, 0, 0, NULL, 0 }, 124 { 0x0, "PKG_%", NULL, 0, 0, 0, NULL, 0 }, 125 { 0x0, "RAM_%", NULL, 0, 0, 0, NULL, 0 }, 126 { 0x0, "Pkg_J", NULL, 0, 0, 0, NULL, 0 }, 127 { 0x0, "Cor_J", NULL, 0, 0, 0, NULL, 0 }, 128 { 0x0, "GFX_J", NULL, 0, 0, 0, NULL, 0 }, 129 { 0x0, "RAM_J", NULL, 0, 0, 0, NULL, 0 }, 130 { 0x0, "Mod%c6", NULL, 0, 0, 0, NULL, 0 }, 131 { 0x0, "Totl%C0", NULL, 0, 0, 0, NULL, 0 }, 132 { 0x0, "Any%C0", NULL, 0, 0, 0, NULL, 0 }, 133 { 0x0, "GFX%C0", NULL, 0, 0, 0, NULL, 0 }, 134 { 0x0, "CPUGFX%", NULL, 0, 0, 0, NULL, 0 }, 135 { 0x0, "Core", NULL, 0, 0, 0, NULL, 0 }, 136 { 0x0, "CPU", NULL, 0, 0, 0, NULL, 0 }, 137 { 0x0, "APIC", NULL, 0, 0, 0, NULL, 0 }, 138 { 0x0, "X2APIC", NULL, 0, 0, 0, NULL, 0 }, 139 { 0x0, "Die", NULL, 0, 0, 0, NULL, 0 }, 140 { 0x0, "GFXAMHz", NULL, 0, 0, 0, NULL, 0 }, 141 { 0x0, "IPC", NULL, 0, 0, 0, NULL, 0 }, 142 { 0x0, "CoreThr", NULL, 0, 0, 0, NULL, 0 }, 143 { 0x0, "UncMHz", NULL, 0, 0, 0, NULL, 0 }, 144 { 0x0, "SAM%mc6", NULL, 0, 0, 0, NULL, 0 }, 145 { 0x0, "SAMMHz", NULL, 0, 0, 0, NULL, 0 }, 146 { 0x0, "SAMAMHz", NULL, 0, 0, 0, NULL, 0 }, 147 }; 148 149 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) 150 #define BIC_USEC (1ULL << 0) 151 #define BIC_TOD (1ULL << 1) 152 #define BIC_Package (1ULL << 2) 153 #define BIC_Node (1ULL << 3) 154 #define BIC_Avg_MHz (1ULL << 4) 155 #define BIC_Busy (1ULL << 5) 156 #define BIC_Bzy_MHz (1ULL << 6) 157 #define BIC_TSC_MHz (1ULL << 7) 158 #define BIC_IRQ (1ULL << 8) 159 #define BIC_SMI (1ULL << 9) 160 #define BIC_sysfs (1ULL << 10) 161 #define BIC_CPU_c1 (1ULL << 11) 162 #define BIC_CPU_c3 (1ULL << 12) 163 #define BIC_CPU_c6 (1ULL << 13) 164 #define BIC_CPU_c7 (1ULL << 14) 165 #define BIC_ThreadC (1ULL << 15) 166 #define BIC_CoreTmp (1ULL << 16) 167 #define BIC_CoreCnt (1ULL << 17) 168 #define BIC_PkgTmp (1ULL << 18) 169 #define BIC_GFX_rc6 (1ULL << 19) 170 #define BIC_GFXMHz (1ULL << 20) 171 #define BIC_Pkgpc2 (1ULL << 21) 172 #define BIC_Pkgpc3 (1ULL << 22) 173 #define BIC_Pkgpc6 (1ULL << 23) 174 #define BIC_Pkgpc7 (1ULL << 24) 175 #define BIC_Pkgpc8 (1ULL << 25) 176 #define BIC_Pkgpc9 (1ULL << 26) 177 #define BIC_Pkgpc10 (1ULL << 27) 178 #define BIC_CPU_LPI (1ULL << 28) 179 #define BIC_SYS_LPI (1ULL << 29) 180 #define BIC_PkgWatt (1ULL << 30) 181 #define BIC_CorWatt (1ULL << 31) 182 #define BIC_GFXWatt (1ULL << 32) 183 #define BIC_PkgCnt (1ULL << 33) 184 #define BIC_RAMWatt (1ULL << 34) 185 #define BIC_PKG__ (1ULL << 35) 186 #define BIC_RAM__ (1ULL << 36) 187 #define BIC_Pkg_J (1ULL << 37) 188 #define BIC_Cor_J (1ULL << 38) 189 #define BIC_GFX_J (1ULL << 39) 190 #define BIC_RAM_J (1ULL << 40) 191 #define BIC_Mod_c6 (1ULL << 41) 192 #define BIC_Totl_c0 (1ULL << 42) 193 #define BIC_Any_c0 (1ULL << 43) 194 #define BIC_GFX_c0 (1ULL << 44) 195 #define BIC_CPUGFX (1ULL << 45) 196 #define BIC_Core (1ULL << 46) 197 #define BIC_CPU (1ULL << 47) 198 #define BIC_APIC (1ULL << 48) 199 #define BIC_X2APIC (1ULL << 49) 200 #define BIC_Die (1ULL << 50) 201 #define BIC_GFXACTMHz (1ULL << 51) 202 #define BIC_IPC (1ULL << 52) 203 #define BIC_CORE_THROT_CNT (1ULL << 53) 204 #define BIC_UNCORE_MHZ (1ULL << 54) 205 #define BIC_SAM_mc6 (1ULL << 55) 206 #define BIC_SAMMHz (1ULL << 56) 207 #define BIC_SAMACTMHz (1ULL << 57) 208 209 #define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die ) 210 #define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__) 211 #define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ) 212 #define BIC_IDLE (BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6) 213 #define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) 214 215 #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) 216 217 unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT); 218 unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC; 219 220 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME) 221 #define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME) 222 #define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME) 223 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT) 224 #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT) 225 #define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT) 226 227 /* 228 * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit: 229 * If you change the values, note they are used both in comparisons 230 * (>= PCL__7) and to index pkg_cstate_limit_strings[]. 231 */ 232 #define PCLUKN 0 /* Unknown */ 233 #define PCLRSV 1 /* Reserved */ 234 #define PCL__0 2 /* PC0 */ 235 #define PCL__1 3 /* PC1 */ 236 #define PCL__2 4 /* PC2 */ 237 #define PCL__3 5 /* PC3 */ 238 #define PCL__4 6 /* PC4 */ 239 #define PCL__6 7 /* PC6 */ 240 #define PCL_6N 8 /* PC6 No Retention */ 241 #define PCL_6R 9 /* PC6 Retention */ 242 #define PCL__7 10 /* PC7 */ 243 #define PCL_7S 11 /* PC7 Shrink */ 244 #define PCL__8 12 /* PC8 */ 245 #define PCL__9 13 /* PC9 */ 246 #define PCL_10 14 /* PC10 */ 247 #define PCLUNL 15 /* Unlimited */ 248 249 struct amperf_group_fd; 250 251 char *proc_stat = "/proc/stat"; 252 FILE *outf; 253 int *fd_percpu; 254 int *fd_instr_count_percpu; 255 struct amperf_group_fd *fd_amperf_percpu; /* File descriptors for perf group with APERF and MPERF counters. */ 256 struct timeval interval_tv = { 5, 0 }; 257 struct timespec interval_ts = { 5, 0 }; 258 259 unsigned int num_iterations; 260 unsigned int header_iterations; 261 unsigned int debug; 262 unsigned int quiet; 263 unsigned int shown; 264 unsigned int sums_need_wide_columns; 265 unsigned int rapl_joules; 266 unsigned int summary_only; 267 unsigned int list_header_only; 268 unsigned int dump_only; 269 unsigned int has_aperf; 270 unsigned int has_epb; 271 unsigned int has_turbo; 272 unsigned int is_hybrid; 273 unsigned int units = 1000000; /* MHz etc */ 274 unsigned int genuine_intel; 275 unsigned int authentic_amd; 276 unsigned int hygon_genuine; 277 unsigned int max_level, max_extended_level; 278 unsigned int has_invariant_tsc; 279 unsigned int aperf_mperf_multiplier = 1; 280 double bclk; 281 double base_hz; 282 unsigned int has_base_hz; 283 double tsc_tweak = 1.0; 284 unsigned int show_pkg_only; 285 unsigned int show_core_only; 286 char *output_buffer, *outp; 287 unsigned int do_dts; 288 unsigned int do_ptm; 289 unsigned int do_ipc; 290 unsigned long long cpuidle_cur_cpu_lpi_us; 291 unsigned long long cpuidle_cur_sys_lpi_us; 292 unsigned int tj_max; 293 unsigned int tj_max_override; 294 double rapl_power_units, rapl_time_units; 295 double rapl_dram_energy_units, rapl_energy_units; 296 double rapl_joule_counter_range; 297 unsigned int crystal_hz; 298 unsigned long long tsc_hz; 299 int base_cpu; 300 unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */ 301 /* IA32_HWP_REQUEST, IA32_HWP_STATUS */ 302 unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */ 303 unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */ 304 unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */ 305 unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */ 306 unsigned int first_counter_read = 1; 307 int ignore_stdin; 308 bool no_msr; 309 bool no_perf; 310 enum amperf_source amperf_source; 311 312 enum gfx_sysfs_idx { 313 GFX_rc6, 314 GFX_MHz, 315 GFX_ACTMHz, 316 SAM_mc6, 317 SAM_MHz, 318 SAM_ACTMHz, 319 GFX_MAX 320 }; 321 322 struct gfx_sysfs_info { 323 const char *path; 324 FILE *fp; 325 unsigned int val; 326 unsigned long long val_ull; 327 }; 328 329 static struct gfx_sysfs_info gfx_info[GFX_MAX]; 330 331 int get_msr(int cpu, off_t offset, unsigned long long *msr); 332 int add_counter(unsigned int msr_num, char *path, char *name, 333 unsigned int width, enum counter_scope scope, 334 enum counter_type type, enum counter_format format, int flags, int package_num); 335 336 /* Model specific support Start */ 337 338 /* List of features that may diverge among different platforms */ 339 struct platform_features { 340 bool has_msr_misc_feature_control; /* MSR_MISC_FEATURE_CONTROL */ 341 bool has_msr_misc_pwr_mgmt; /* MSR_MISC_PWR_MGMT */ 342 bool has_nhm_msrs; /* MSR_PLATFORM_INFO, MSR_IA32_TEMPERATURE_TARGET, MSR_SMI_COUNT, MSR_PKG_CST_CONFIG_CONTROL, MSR_IA32_POWER_CTL, TRL MSRs */ 343 bool has_config_tdp; /* MSR_CONFIG_TDP_NOMINAL/LEVEL_1/LEVEL_2/CONTROL, MSR_TURBO_ACTIVATION_RATIO */ 344 int bclk_freq; /* CPU base clock */ 345 int crystal_freq; /* Crystal clock to use when not available from CPUID.15 */ 346 int supported_cstates; /* Core cstates and Package cstates supported */ 347 int cst_limit; /* MSR_PKG_CST_CONFIG_CONTROL */ 348 bool has_cst_auto_convension; /* AUTOMATIC_CSTATE_CONVERSION bit in MSR_PKG_CST_CONFIG_CONTROL */ 349 bool has_irtl_msrs; /* MSR_PKGC3/PKGC6/PKGC7/PKGC8/PKGC9/PKGC10_IRTL */ 350 bool has_msr_core_c1_res; /* MSR_CORE_C1_RES */ 351 bool has_msr_module_c6_res_ms; /* MSR_MODULE_C6_RES_MS */ 352 bool has_msr_c6_demotion_policy_config; /* MSR_CC6_DEMOTION_POLICY_CONFIG/MSR_MC6_DEMOTION_POLICY_CONFIG */ 353 bool has_msr_atom_pkg_c6_residency; /* MSR_ATOM_PKG_C6_RESIDENCY */ 354 bool has_msr_knl_core_c6_residency; /* MSR_KNL_CORE_C6_RESIDENCY */ 355 bool has_ext_cst_msrs; /* MSR_PKG_WEIGHTED_CORE_C0_RES/MSR_PKG_ANY_CORE_C0_RES/MSR_PKG_ANY_GFXE_C0_RES/MSR_PKG_BOTH_CORE_GFXE_C0_RES */ 356 bool has_cst_prewake_bit; /* Cstate prewake bit in MSR_IA32_POWER_CTL */ 357 int trl_msrs; /* MSR_TURBO_RATIO_LIMIT/LIMIT1/LIMIT2/SECONDARY, Atom TRL MSRs */ 358 int plr_msrs; /* MSR_CORE/GFX/RING_PERF_LIMIT_REASONS */ 359 int rapl_msrs; /* RAPL PKG/DRAM/CORE/GFX MSRs, AMD RAPL MSRs */ 360 bool has_per_core_rapl; /* Indicates cores energy collection is per-core, not per-package. AMD specific for now */ 361 bool has_rapl_divisor; /* Divisor for Energy unit raw value from MSR_RAPL_POWER_UNIT */ 362 bool has_fixed_rapl_unit; /* Fixed Energy Unit used for DRAM RAPL Domain */ 363 int rapl_quirk_tdp; /* Hardcoded TDP value when cannot be retrieved from hardware */ 364 int tcc_offset_bits; /* TCC Offset bits in MSR_IA32_TEMPERATURE_TARGET */ 365 bool enable_tsc_tweak; /* Use CPU Base freq instead of TSC freq for aperf/mperf counter */ 366 bool need_perf_multiplier; /* mperf/aperf multiplier */ 367 }; 368 369 struct platform_data { 370 unsigned int model; 371 const struct platform_features *features; 372 }; 373 374 /* For BCLK */ 375 enum bclk_freq { 376 BCLK_100MHZ = 1, 377 BCLK_133MHZ, 378 BCLK_SLV, 379 }; 380 381 #define SLM_BCLK_FREQS 5 382 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 }; 383 384 double slm_bclk(void) 385 { 386 unsigned long long msr = 3; 387 unsigned int i; 388 double freq; 389 390 if (get_msr(base_cpu, MSR_FSB_FREQ, &msr)) 391 fprintf(outf, "SLM BCLK: unknown\n"); 392 393 i = msr & 0xf; 394 if (i >= SLM_BCLK_FREQS) { 395 fprintf(outf, "SLM BCLK[%d] invalid\n", i); 396 i = 3; 397 } 398 freq = slm_freq_table[i]; 399 400 if (!quiet) 401 fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq); 402 403 return freq; 404 } 405 406 /* For Package cstate limit */ 407 enum package_cstate_limit { 408 CST_LIMIT_NHM = 1, 409 CST_LIMIT_SNB, 410 CST_LIMIT_HSW, 411 CST_LIMIT_SKX, 412 CST_LIMIT_ICX, 413 CST_LIMIT_SLV, 414 CST_LIMIT_AMT, 415 CST_LIMIT_KNL, 416 CST_LIMIT_GMT, 417 }; 418 419 /* For Turbo Ratio Limit MSRs */ 420 enum turbo_ratio_limit_msrs { 421 TRL_BASE = BIT(0), 422 TRL_LIMIT1 = BIT(1), 423 TRL_LIMIT2 = BIT(2), 424 TRL_ATOM = BIT(3), 425 TRL_KNL = BIT(4), 426 TRL_CORECOUNT = BIT(5), 427 }; 428 429 /* For Perf Limit Reason MSRs */ 430 enum perf_limit_reason_msrs { 431 PLR_CORE = BIT(0), 432 PLR_GFX = BIT(1), 433 PLR_RING = BIT(2), 434 }; 435 436 /* For RAPL MSRs */ 437 enum rapl_msrs { 438 RAPL_PKG_POWER_LIMIT = BIT(0), /* 0x610 MSR_PKG_POWER_LIMIT */ 439 RAPL_PKG_ENERGY_STATUS = BIT(1), /* 0x611 MSR_PKG_ENERGY_STATUS */ 440 RAPL_PKG_PERF_STATUS = BIT(2), /* 0x613 MSR_PKG_PERF_STATUS */ 441 RAPL_PKG_POWER_INFO = BIT(3), /* 0x614 MSR_PKG_POWER_INFO */ 442 RAPL_DRAM_POWER_LIMIT = BIT(4), /* 0x618 MSR_DRAM_POWER_LIMIT */ 443 RAPL_DRAM_ENERGY_STATUS = BIT(5), /* 0x619 MSR_DRAM_ENERGY_STATUS */ 444 RAPL_DRAM_PERF_STATUS = BIT(6), /* 0x61b MSR_DRAM_PERF_STATUS */ 445 RAPL_DRAM_POWER_INFO = BIT(7), /* 0x61c MSR_DRAM_POWER_INFO */ 446 RAPL_CORE_POWER_LIMIT = BIT(8), /* 0x638 MSR_PP0_POWER_LIMIT */ 447 RAPL_CORE_ENERGY_STATUS = BIT(9), /* 0x639 MSR_PP0_ENERGY_STATUS */ 448 RAPL_CORE_POLICY = BIT(10), /* 0x63a MSR_PP0_POLICY */ 449 RAPL_GFX_POWER_LIMIT = BIT(11), /* 0x640 MSR_PP1_POWER_LIMIT */ 450 RAPL_GFX_ENERGY_STATUS = BIT(12), /* 0x641 MSR_PP1_ENERGY_STATUS */ 451 RAPL_GFX_POLICY = BIT(13), /* 0x642 MSR_PP1_POLICY */ 452 RAPL_AMD_PWR_UNIT = BIT(14), /* 0xc0010299 MSR_AMD_RAPL_POWER_UNIT */ 453 RAPL_AMD_CORE_ENERGY_STAT = BIT(15), /* 0xc001029a MSR_AMD_CORE_ENERGY_STATUS */ 454 RAPL_AMD_PKG_ENERGY_STAT = BIT(16), /* 0xc001029b MSR_AMD_PKG_ENERGY_STATUS */ 455 }; 456 457 #define RAPL_PKG (RAPL_PKG_ENERGY_STATUS | RAPL_PKG_POWER_LIMIT) 458 #define RAPL_DRAM (RAPL_DRAM_ENERGY_STATUS | RAPL_DRAM_POWER_LIMIT) 459 #define RAPL_CORE (RAPL_CORE_ENERGY_STATUS | RAPL_CORE_POWER_LIMIT) 460 #define RAPL_GFX (RAPL_GFX_POWER_LIMIT | RAPL_GFX_ENERGY_STATUS) 461 462 #define RAPL_PKG_ALL (RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO) 463 #define RAPL_DRAM_ALL (RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_DRAM_POWER_INFO) 464 #define RAPL_CORE_ALL (RAPL_CORE | RAPL_CORE_POLICY) 465 #define RAPL_GFX_ALL (RAPL_GFX | RAPL_GFX_POLIGY) 466 467 #define RAPL_AMD_F17H (RAPL_AMD_PWR_UNIT | RAPL_AMD_CORE_ENERGY_STAT | RAPL_AMD_PKG_ENERGY_STAT) 468 469 /* For Cstates */ 470 enum cstates { 471 CC1 = BIT(0), 472 CC3 = BIT(1), 473 CC6 = BIT(2), 474 CC7 = BIT(3), 475 PC2 = BIT(4), 476 PC3 = BIT(5), 477 PC6 = BIT(6), 478 PC7 = BIT(7), 479 PC8 = BIT(8), 480 PC9 = BIT(9), 481 PC10 = BIT(10), 482 }; 483 484 static const struct platform_features nhm_features = { 485 .has_msr_misc_pwr_mgmt = 1, 486 .has_nhm_msrs = 1, 487 .bclk_freq = BCLK_133MHZ, 488 .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6, 489 .cst_limit = CST_LIMIT_NHM, 490 .trl_msrs = TRL_BASE, 491 }; 492 493 static const struct platform_features nhx_features = { 494 .has_msr_misc_pwr_mgmt = 1, 495 .has_nhm_msrs = 1, 496 .bclk_freq = BCLK_133MHZ, 497 .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6, 498 .cst_limit = CST_LIMIT_NHM, 499 }; 500 501 static const struct platform_features snb_features = { 502 .has_msr_misc_feature_control = 1, 503 .has_msr_misc_pwr_mgmt = 1, 504 .has_nhm_msrs = 1, 505 .bclk_freq = BCLK_100MHZ, 506 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 507 .cst_limit = CST_LIMIT_SNB, 508 .has_irtl_msrs = 1, 509 .trl_msrs = TRL_BASE, 510 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 511 }; 512 513 static const struct platform_features snx_features = { 514 .has_msr_misc_feature_control = 1, 515 .has_msr_misc_pwr_mgmt = 1, 516 .has_nhm_msrs = 1, 517 .bclk_freq = BCLK_100MHZ, 518 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 519 .cst_limit = CST_LIMIT_SNB, 520 .has_irtl_msrs = 1, 521 .trl_msrs = TRL_BASE, 522 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL, 523 }; 524 525 static const struct platform_features ivb_features = { 526 .has_msr_misc_feature_control = 1, 527 .has_msr_misc_pwr_mgmt = 1, 528 .has_nhm_msrs = 1, 529 .has_config_tdp = 1, 530 .bclk_freq = BCLK_100MHZ, 531 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 532 .cst_limit = CST_LIMIT_SNB, 533 .has_irtl_msrs = 1, 534 .trl_msrs = TRL_BASE, 535 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 536 }; 537 538 static const struct platform_features ivx_features = { 539 .has_msr_misc_feature_control = 1, 540 .has_msr_misc_pwr_mgmt = 1, 541 .has_nhm_msrs = 1, 542 .bclk_freq = BCLK_100MHZ, 543 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 544 .cst_limit = CST_LIMIT_SNB, 545 .has_irtl_msrs = 1, 546 .trl_msrs = TRL_BASE | TRL_LIMIT1, 547 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL, 548 }; 549 550 static const struct platform_features hsw_features = { 551 .has_msr_misc_feature_control = 1, 552 .has_msr_misc_pwr_mgmt = 1, 553 .has_nhm_msrs = 1, 554 .has_config_tdp = 1, 555 .bclk_freq = BCLK_100MHZ, 556 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 557 .cst_limit = CST_LIMIT_HSW, 558 .has_irtl_msrs = 1, 559 .trl_msrs = TRL_BASE, 560 .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, 561 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 562 }; 563 564 static const struct platform_features hsx_features = { 565 .has_msr_misc_feature_control = 1, 566 .has_msr_misc_pwr_mgmt = 1, 567 .has_nhm_msrs = 1, 568 .has_config_tdp = 1, 569 .bclk_freq = BCLK_100MHZ, 570 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 571 .cst_limit = CST_LIMIT_HSW, 572 .has_irtl_msrs = 1, 573 .trl_msrs = TRL_BASE | TRL_LIMIT1 | TRL_LIMIT2, 574 .plr_msrs = PLR_CORE | PLR_RING, 575 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 576 .has_fixed_rapl_unit = 1, 577 }; 578 579 static const struct platform_features hswl_features = { 580 .has_msr_misc_feature_control = 1, 581 .has_msr_misc_pwr_mgmt = 1, 582 .has_nhm_msrs = 1, 583 .has_config_tdp = 1, 584 .bclk_freq = BCLK_100MHZ, 585 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 586 .cst_limit = CST_LIMIT_HSW, 587 .has_irtl_msrs = 1, 588 .trl_msrs = TRL_BASE, 589 .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, 590 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 591 }; 592 593 static const struct platform_features hswg_features = { 594 .has_msr_misc_feature_control = 1, 595 .has_msr_misc_pwr_mgmt = 1, 596 .has_nhm_msrs = 1, 597 .has_config_tdp = 1, 598 .bclk_freq = BCLK_100MHZ, 599 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 600 .cst_limit = CST_LIMIT_HSW, 601 .has_irtl_msrs = 1, 602 .trl_msrs = TRL_BASE, 603 .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, 604 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 605 }; 606 607 static const struct platform_features bdw_features = { 608 .has_msr_misc_feature_control = 1, 609 .has_msr_misc_pwr_mgmt = 1, 610 .has_nhm_msrs = 1, 611 .has_config_tdp = 1, 612 .bclk_freq = BCLK_100MHZ, 613 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 614 .cst_limit = CST_LIMIT_HSW, 615 .has_irtl_msrs = 1, 616 .trl_msrs = TRL_BASE, 617 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 618 }; 619 620 static const struct platform_features bdwg_features = { 621 .has_msr_misc_feature_control = 1, 622 .has_msr_misc_pwr_mgmt = 1, 623 .has_nhm_msrs = 1, 624 .has_config_tdp = 1, 625 .bclk_freq = BCLK_100MHZ, 626 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 627 .cst_limit = CST_LIMIT_HSW, 628 .has_irtl_msrs = 1, 629 .trl_msrs = TRL_BASE, 630 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 631 }; 632 633 static const struct platform_features bdx_features = { 634 .has_msr_misc_feature_control = 1, 635 .has_msr_misc_pwr_mgmt = 1, 636 .has_nhm_msrs = 1, 637 .has_config_tdp = 1, 638 .bclk_freq = BCLK_100MHZ, 639 .supported_cstates = CC1 | CC3 | CC6 | PC2 | PC3 | PC6, 640 .cst_limit = CST_LIMIT_HSW, 641 .has_irtl_msrs = 1, 642 .has_cst_auto_convension = 1, 643 .trl_msrs = TRL_BASE, 644 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 645 .has_fixed_rapl_unit = 1, 646 }; 647 648 static const struct platform_features skl_features = { 649 .has_msr_misc_feature_control = 1, 650 .has_msr_misc_pwr_mgmt = 1, 651 .has_nhm_msrs = 1, 652 .has_config_tdp = 1, 653 .bclk_freq = BCLK_100MHZ, 654 .crystal_freq = 24000000, 655 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 656 .cst_limit = CST_LIMIT_HSW, 657 .has_irtl_msrs = 1, 658 .has_ext_cst_msrs = 1, 659 .trl_msrs = TRL_BASE, 660 .tcc_offset_bits = 6, 661 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, 662 .enable_tsc_tweak = 1, 663 }; 664 665 static const struct platform_features cnl_features = { 666 .has_msr_misc_feature_control = 1, 667 .has_msr_misc_pwr_mgmt = 1, 668 .has_nhm_msrs = 1, 669 .has_config_tdp = 1, 670 .bclk_freq = BCLK_100MHZ, 671 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 672 .cst_limit = CST_LIMIT_HSW, 673 .has_irtl_msrs = 1, 674 .has_msr_core_c1_res = 1, 675 .has_ext_cst_msrs = 1, 676 .trl_msrs = TRL_BASE, 677 .tcc_offset_bits = 6, 678 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, 679 .enable_tsc_tweak = 1, 680 }; 681 682 static const struct platform_features adl_features = { 683 .has_msr_misc_feature_control = 1, 684 .has_msr_misc_pwr_mgmt = 1, 685 .has_nhm_msrs = 1, 686 .has_config_tdp = 1, 687 .bclk_freq = BCLK_100MHZ, 688 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC8 | PC10, 689 .cst_limit = CST_LIMIT_HSW, 690 .has_irtl_msrs = 1, 691 .has_msr_core_c1_res = 1, 692 .has_ext_cst_msrs = 1, 693 .trl_msrs = TRL_BASE, 694 .tcc_offset_bits = 6, 695 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, 696 .enable_tsc_tweak = 1, 697 }; 698 699 static const struct platform_features arl_features = { 700 .has_msr_misc_feature_control = 1, 701 .has_msr_misc_pwr_mgmt = 1, 702 .has_nhm_msrs = 1, 703 .has_config_tdp = 1, 704 .bclk_freq = BCLK_100MHZ, 705 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC10, 706 .cst_limit = CST_LIMIT_HSW, 707 .has_irtl_msrs = 1, 708 .has_msr_core_c1_res = 1, 709 .has_ext_cst_msrs = 1, 710 .trl_msrs = TRL_BASE, 711 .tcc_offset_bits = 6, 712 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, 713 .enable_tsc_tweak = 1, 714 }; 715 716 static const struct platform_features skx_features = { 717 .has_msr_misc_feature_control = 1, 718 .has_msr_misc_pwr_mgmt = 1, 719 .has_nhm_msrs = 1, 720 .has_config_tdp = 1, 721 .bclk_freq = BCLK_100MHZ, 722 .supported_cstates = CC1 | CC6 | PC2 | PC6, 723 .cst_limit = CST_LIMIT_SKX, 724 .has_irtl_msrs = 1, 725 .has_cst_auto_convension = 1, 726 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 727 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 728 .has_fixed_rapl_unit = 1, 729 }; 730 731 static const struct platform_features icx_features = { 732 .has_msr_misc_feature_control = 1, 733 .has_msr_misc_pwr_mgmt = 1, 734 .has_nhm_msrs = 1, 735 .has_config_tdp = 1, 736 .bclk_freq = BCLK_100MHZ, 737 .supported_cstates = CC1 | CC6 | PC2 | PC6, 738 .cst_limit = CST_LIMIT_ICX, 739 .has_msr_core_c1_res = 1, 740 .has_irtl_msrs = 1, 741 .has_cst_prewake_bit = 1, 742 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 743 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 744 .has_fixed_rapl_unit = 1, 745 }; 746 747 static const struct platform_features spr_features = { 748 .has_msr_misc_feature_control = 1, 749 .has_msr_misc_pwr_mgmt = 1, 750 .has_nhm_msrs = 1, 751 .has_config_tdp = 1, 752 .bclk_freq = BCLK_100MHZ, 753 .supported_cstates = CC1 | CC6 | PC2 | PC6, 754 .cst_limit = CST_LIMIT_SKX, 755 .has_msr_core_c1_res = 1, 756 .has_irtl_msrs = 1, 757 .has_cst_prewake_bit = 1, 758 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 759 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 760 }; 761 762 static const struct platform_features srf_features = { 763 .has_msr_misc_feature_control = 1, 764 .has_msr_misc_pwr_mgmt = 1, 765 .has_nhm_msrs = 1, 766 .has_config_tdp = 1, 767 .bclk_freq = BCLK_100MHZ, 768 .supported_cstates = CC1 | CC6 | PC2 | PC6, 769 .cst_limit = CST_LIMIT_SKX, 770 .has_msr_core_c1_res = 1, 771 .has_msr_module_c6_res_ms = 1, 772 .has_irtl_msrs = 1, 773 .has_cst_prewake_bit = 1, 774 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 775 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 776 }; 777 778 static const struct platform_features grr_features = { 779 .has_msr_misc_feature_control = 1, 780 .has_msr_misc_pwr_mgmt = 1, 781 .has_nhm_msrs = 1, 782 .has_config_tdp = 1, 783 .bclk_freq = BCLK_100MHZ, 784 .supported_cstates = CC1 | CC6, 785 .cst_limit = CST_LIMIT_SKX, 786 .has_msr_core_c1_res = 1, 787 .has_msr_module_c6_res_ms = 1, 788 .has_irtl_msrs = 1, 789 .has_cst_prewake_bit = 1, 790 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 791 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 792 }; 793 794 static const struct platform_features slv_features = { 795 .has_nhm_msrs = 1, 796 .bclk_freq = BCLK_SLV, 797 .supported_cstates = CC1 | CC6 | PC6, 798 .cst_limit = CST_LIMIT_SLV, 799 .has_msr_core_c1_res = 1, 800 .has_msr_module_c6_res_ms = 1, 801 .has_msr_c6_demotion_policy_config = 1, 802 .has_msr_atom_pkg_c6_residency = 1, 803 .trl_msrs = TRL_ATOM, 804 .rapl_msrs = RAPL_PKG | RAPL_CORE, 805 .has_rapl_divisor = 1, 806 .rapl_quirk_tdp = 30, 807 }; 808 809 static const struct platform_features slvd_features = { 810 .has_msr_misc_pwr_mgmt = 1, 811 .has_nhm_msrs = 1, 812 .bclk_freq = BCLK_SLV, 813 .supported_cstates = CC1 | CC6 | PC3 | PC6, 814 .cst_limit = CST_LIMIT_SLV, 815 .has_msr_atom_pkg_c6_residency = 1, 816 .trl_msrs = TRL_BASE, 817 .rapl_msrs = RAPL_PKG | RAPL_CORE, 818 .rapl_quirk_tdp = 30, 819 }; 820 821 static const struct platform_features amt_features = { 822 .has_nhm_msrs = 1, 823 .bclk_freq = BCLK_133MHZ, 824 .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6, 825 .cst_limit = CST_LIMIT_AMT, 826 .trl_msrs = TRL_BASE, 827 }; 828 829 static const struct platform_features gmt_features = { 830 .has_msr_misc_pwr_mgmt = 1, 831 .has_nhm_msrs = 1, 832 .bclk_freq = BCLK_100MHZ, 833 .crystal_freq = 19200000, 834 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 835 .cst_limit = CST_LIMIT_GMT, 836 .has_irtl_msrs = 1, 837 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 838 .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO, 839 }; 840 841 static const struct platform_features gmtd_features = { 842 .has_msr_misc_pwr_mgmt = 1, 843 .has_nhm_msrs = 1, 844 .bclk_freq = BCLK_100MHZ, 845 .crystal_freq = 25000000, 846 .supported_cstates = CC1 | CC6 | PC2 | PC6, 847 .cst_limit = CST_LIMIT_GMT, 848 .has_irtl_msrs = 1, 849 .has_msr_core_c1_res = 1, 850 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 851 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_CORE_ENERGY_STATUS, 852 }; 853 854 static const struct platform_features gmtp_features = { 855 .has_msr_misc_pwr_mgmt = 1, 856 .has_nhm_msrs = 1, 857 .bclk_freq = BCLK_100MHZ, 858 .crystal_freq = 19200000, 859 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 860 .cst_limit = CST_LIMIT_GMT, 861 .has_irtl_msrs = 1, 862 .trl_msrs = TRL_BASE, 863 .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO, 864 }; 865 866 static const struct platform_features tmt_features = { 867 .has_msr_misc_pwr_mgmt = 1, 868 .has_nhm_msrs = 1, 869 .bclk_freq = BCLK_100MHZ, 870 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 871 .cst_limit = CST_LIMIT_GMT, 872 .has_irtl_msrs = 1, 873 .trl_msrs = TRL_BASE, 874 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, 875 .enable_tsc_tweak = 1, 876 }; 877 878 static const struct platform_features tmtd_features = { 879 .has_msr_misc_pwr_mgmt = 1, 880 .has_nhm_msrs = 1, 881 .bclk_freq = BCLK_100MHZ, 882 .supported_cstates = CC1 | CC6, 883 .cst_limit = CST_LIMIT_GMT, 884 .has_irtl_msrs = 1, 885 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 886 .rapl_msrs = RAPL_PKG_ALL, 887 }; 888 889 static const struct platform_features knl_features = { 890 .has_msr_misc_pwr_mgmt = 1, 891 .has_nhm_msrs = 1, 892 .has_config_tdp = 1, 893 .bclk_freq = BCLK_100MHZ, 894 .supported_cstates = CC1 | CC6 | PC3 | PC6, 895 .cst_limit = CST_LIMIT_KNL, 896 .has_msr_knl_core_c6_residency = 1, 897 .trl_msrs = TRL_KNL, 898 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 899 .has_fixed_rapl_unit = 1, 900 .need_perf_multiplier = 1, 901 }; 902 903 static const struct platform_features default_features = { 904 }; 905 906 static const struct platform_features amd_features_with_rapl = { 907 .rapl_msrs = RAPL_AMD_F17H, 908 .has_per_core_rapl = 1, 909 .rapl_quirk_tdp = 280, /* This is the max stock TDP of HEDT/Server Fam17h+ chips */ 910 }; 911 912 static const struct platform_data turbostat_pdata[] = { 913 { INTEL_FAM6_NEHALEM, &nhm_features }, 914 { INTEL_FAM6_NEHALEM_G, &nhm_features }, 915 { INTEL_FAM6_NEHALEM_EP, &nhm_features }, 916 { INTEL_FAM6_NEHALEM_EX, &nhx_features }, 917 { INTEL_FAM6_WESTMERE, &nhm_features }, 918 { INTEL_FAM6_WESTMERE_EP, &nhm_features }, 919 { INTEL_FAM6_WESTMERE_EX, &nhx_features }, 920 { INTEL_FAM6_SANDYBRIDGE, &snb_features }, 921 { INTEL_FAM6_SANDYBRIDGE_X, &snx_features }, 922 { INTEL_FAM6_IVYBRIDGE, &ivb_features }, 923 { INTEL_FAM6_IVYBRIDGE_X, &ivx_features }, 924 { INTEL_FAM6_HASWELL, &hsw_features }, 925 { INTEL_FAM6_HASWELL_X, &hsx_features }, 926 { INTEL_FAM6_HASWELL_L, &hswl_features }, 927 { INTEL_FAM6_HASWELL_G, &hswg_features }, 928 { INTEL_FAM6_BROADWELL, &bdw_features }, 929 { INTEL_FAM6_BROADWELL_G, &bdwg_features }, 930 { INTEL_FAM6_BROADWELL_X, &bdx_features }, 931 { INTEL_FAM6_BROADWELL_D, &bdx_features }, 932 { INTEL_FAM6_SKYLAKE_L, &skl_features }, 933 { INTEL_FAM6_SKYLAKE, &skl_features }, 934 { INTEL_FAM6_SKYLAKE_X, &skx_features }, 935 { INTEL_FAM6_KABYLAKE_L, &skl_features }, 936 { INTEL_FAM6_KABYLAKE, &skl_features }, 937 { INTEL_FAM6_COMETLAKE, &skl_features }, 938 { INTEL_FAM6_COMETLAKE_L, &skl_features }, 939 { INTEL_FAM6_CANNONLAKE_L, &cnl_features }, 940 { INTEL_FAM6_ICELAKE_X, &icx_features }, 941 { INTEL_FAM6_ICELAKE_D, &icx_features }, 942 { INTEL_FAM6_ICELAKE_L, &cnl_features }, 943 { INTEL_FAM6_ICELAKE_NNPI, &cnl_features }, 944 { INTEL_FAM6_ROCKETLAKE, &cnl_features }, 945 { INTEL_FAM6_TIGERLAKE_L, &cnl_features }, 946 { INTEL_FAM6_TIGERLAKE, &cnl_features }, 947 { INTEL_FAM6_SAPPHIRERAPIDS_X, &spr_features }, 948 { INTEL_FAM6_EMERALDRAPIDS_X, &spr_features }, 949 { INTEL_FAM6_GRANITERAPIDS_X, &spr_features }, 950 { INTEL_FAM6_LAKEFIELD, &cnl_features }, 951 { INTEL_FAM6_ALDERLAKE, &adl_features }, 952 { INTEL_FAM6_ALDERLAKE_L, &adl_features }, 953 { INTEL_FAM6_RAPTORLAKE, &adl_features }, 954 { INTEL_FAM6_RAPTORLAKE_P, &adl_features }, 955 { INTEL_FAM6_RAPTORLAKE_S, &adl_features }, 956 { INTEL_FAM6_METEORLAKE, &cnl_features }, 957 { INTEL_FAM6_METEORLAKE_L, &cnl_features }, 958 { INTEL_FAM6_ARROWLAKE_H, &arl_features }, 959 { INTEL_FAM6_ARROWLAKE_U, &arl_features }, 960 { INTEL_FAM6_ARROWLAKE, &arl_features }, 961 { INTEL_FAM6_LUNARLAKE_M, &arl_features }, 962 { INTEL_FAM6_ATOM_SILVERMONT, &slv_features }, 963 { INTEL_FAM6_ATOM_SILVERMONT_D, &slvd_features }, 964 { INTEL_FAM6_ATOM_AIRMONT, &amt_features }, 965 { INTEL_FAM6_ATOM_GOLDMONT, &gmt_features }, 966 { INTEL_FAM6_ATOM_GOLDMONT_D, &gmtd_features }, 967 { INTEL_FAM6_ATOM_GOLDMONT_PLUS, &gmtp_features }, 968 { INTEL_FAM6_ATOM_TREMONT_D, &tmtd_features }, 969 { INTEL_FAM6_ATOM_TREMONT, &tmt_features }, 970 { INTEL_FAM6_ATOM_TREMONT_L, &tmt_features }, 971 { INTEL_FAM6_ATOM_GRACEMONT, &adl_features }, 972 { INTEL_FAM6_ATOM_CRESTMONT_X, &srf_features }, 973 { INTEL_FAM6_ATOM_CRESTMONT, &grr_features }, 974 { INTEL_FAM6_XEON_PHI_KNL, &knl_features }, 975 { INTEL_FAM6_XEON_PHI_KNM, &knl_features }, 976 /* 977 * Missing support for 978 * INTEL_FAM6_ICELAKE 979 * INTEL_FAM6_ATOM_SILVERMONT_MID 980 * INTEL_FAM6_ATOM_AIRMONT_MID 981 * INTEL_FAM6_ATOM_AIRMONT_NP 982 */ 983 { 0, NULL }, 984 }; 985 986 static const struct platform_features *platform; 987 988 void probe_platform_features(unsigned int family, unsigned int model) 989 { 990 int i; 991 992 platform = &default_features; 993 994 if (authentic_amd || hygon_genuine) { 995 if (max_extended_level >= 0x80000007) { 996 unsigned int eax, ebx, ecx, edx; 997 998 __cpuid(0x80000007, eax, ebx, ecx, edx); 999 /* RAPL (Fam 17h+) */ 1000 if ((edx & (1 << 14)) && family >= 0x17) 1001 platform = &amd_features_with_rapl; 1002 } 1003 return; 1004 } 1005 1006 if (!genuine_intel || family != 6) 1007 return; 1008 1009 for (i = 0; turbostat_pdata[i].features; i++) { 1010 if (turbostat_pdata[i].model == model) { 1011 platform = turbostat_pdata[i].features; 1012 return; 1013 } 1014 } 1015 } 1016 1017 /* Model specific support End */ 1018 1019 #define TJMAX_DEFAULT 100 1020 1021 /* MSRs that are not yet in the kernel-provided header. */ 1022 #define MSR_RAPL_PWR_UNIT 0xc0010299 1023 #define MSR_CORE_ENERGY_STAT 0xc001029a 1024 #define MSR_PKG_ENERGY_STAT 0xc001029b 1025 1026 #define MAX(a, b) ((a) > (b) ? (a) : (b)) 1027 1028 int backwards_count; 1029 char *progname; 1030 1031 #define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */ 1032 cpu_set_t *cpu_present_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset; 1033 size_t cpu_present_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size; 1034 #define MAX_ADDED_THREAD_COUNTERS 24 1035 #define MAX_ADDED_CORE_COUNTERS 8 1036 #define MAX_ADDED_PACKAGE_COUNTERS 16 1037 #define BITMASK_SIZE 32 1038 1039 /* Indexes used to map data read from perf and MSRs into global variables */ 1040 enum rapl_rci_index { 1041 RAPL_RCI_INDEX_ENERGY_PKG = 0, 1042 RAPL_RCI_INDEX_ENERGY_CORES = 1, 1043 RAPL_RCI_INDEX_DRAM = 2, 1044 RAPL_RCI_INDEX_GFX = 3, 1045 RAPL_RCI_INDEX_PKG_PERF_STATUS = 4, 1046 RAPL_RCI_INDEX_DRAM_PERF_STATUS = 5, 1047 RAPL_RCI_INDEX_CORE_ENERGY = 6, 1048 NUM_RAPL_COUNTERS, 1049 }; 1050 1051 enum rapl_unit { 1052 RAPL_UNIT_INVALID, 1053 RAPL_UNIT_JOULES, 1054 RAPL_UNIT_WATTS, 1055 }; 1056 1057 struct rapl_counter_info_t { 1058 unsigned long long data[NUM_RAPL_COUNTERS]; 1059 enum rapl_source source[NUM_RAPL_COUNTERS]; 1060 unsigned long long flags[NUM_RAPL_COUNTERS]; 1061 double scale[NUM_RAPL_COUNTERS]; 1062 enum rapl_unit unit[NUM_RAPL_COUNTERS]; 1063 1064 union { 1065 /* Active when source == RAPL_SOURCE_MSR */ 1066 struct { 1067 unsigned long long msr[NUM_RAPL_COUNTERS]; 1068 unsigned long long msr_mask[NUM_RAPL_COUNTERS]; 1069 int msr_shift[NUM_RAPL_COUNTERS]; 1070 }; 1071 }; 1072 1073 int fd_perf; 1074 }; 1075 1076 /* struct rapl_counter_info_t for each RAPL domain */ 1077 struct rapl_counter_info_t *rapl_counter_info_perdomain; 1078 unsigned int rapl_counter_info_perdomain_size; 1079 1080 #define RAPL_COUNTER_FLAG_USE_MSR_SUM (1u << 1) 1081 1082 struct rapl_counter_arch_info { 1083 int feature_mask; /* Mask for testing if the counter is supported on host */ 1084 const char *perf_subsys; 1085 const char *perf_name; 1086 unsigned long long msr; 1087 unsigned long long msr_mask; 1088 int msr_shift; /* Positive mean shift right, negative mean shift left */ 1089 double *platform_rapl_msr_scale; /* Scale applied to values read by MSR (platform dependent, filled at runtime) */ 1090 unsigned int rci_index; /* Maps data from perf counters to global variables */ 1091 unsigned long long bic; 1092 double compat_scale; /* Some counters require constant scaling to be in the same range as other, similar ones */ 1093 unsigned long long flags; 1094 }; 1095 1096 static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { 1097 { 1098 .feature_mask = RAPL_PKG, 1099 .perf_subsys = "power", 1100 .perf_name = "energy-pkg", 1101 .msr = MSR_PKG_ENERGY_STATUS, 1102 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1103 .msr_shift = 0, 1104 .platform_rapl_msr_scale = &rapl_energy_units, 1105 .rci_index = RAPL_RCI_INDEX_ENERGY_PKG, 1106 .bic = BIC_PkgWatt | BIC_Pkg_J, 1107 .compat_scale = 1.0, 1108 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1109 }, 1110 { 1111 .feature_mask = RAPL_AMD_F17H, 1112 .perf_subsys = "power", 1113 .perf_name = "energy-pkg", 1114 .msr = MSR_PKG_ENERGY_STAT, 1115 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1116 .msr_shift = 0, 1117 .platform_rapl_msr_scale = &rapl_energy_units, 1118 .rci_index = RAPL_RCI_INDEX_ENERGY_PKG, 1119 .bic = BIC_PkgWatt | BIC_Pkg_J, 1120 .compat_scale = 1.0, 1121 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1122 }, 1123 { 1124 .feature_mask = RAPL_CORE_ENERGY_STATUS, 1125 .perf_subsys = "power", 1126 .perf_name = "energy-cores", 1127 .msr = MSR_PP0_ENERGY_STATUS, 1128 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1129 .msr_shift = 0, 1130 .platform_rapl_msr_scale = &rapl_energy_units, 1131 .rci_index = RAPL_RCI_INDEX_ENERGY_CORES, 1132 .bic = BIC_CorWatt | BIC_Cor_J, 1133 .compat_scale = 1.0, 1134 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1135 }, 1136 { 1137 .feature_mask = RAPL_DRAM, 1138 .perf_subsys = "power", 1139 .perf_name = "energy-ram", 1140 .msr = MSR_DRAM_ENERGY_STATUS, 1141 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1142 .msr_shift = 0, 1143 .platform_rapl_msr_scale = &rapl_dram_energy_units, 1144 .rci_index = RAPL_RCI_INDEX_DRAM, 1145 .bic = BIC_RAMWatt | BIC_RAM_J, 1146 .compat_scale = 1.0, 1147 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1148 }, 1149 { 1150 .feature_mask = RAPL_GFX, 1151 .perf_subsys = "power", 1152 .perf_name = "energy-gpu", 1153 .msr = MSR_PP1_ENERGY_STATUS, 1154 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1155 .msr_shift = 0, 1156 .platform_rapl_msr_scale = &rapl_energy_units, 1157 .rci_index = RAPL_RCI_INDEX_GFX, 1158 .bic = BIC_GFXWatt | BIC_GFX_J, 1159 .compat_scale = 1.0, 1160 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1161 }, 1162 { 1163 .feature_mask = RAPL_PKG_PERF_STATUS, 1164 .perf_subsys = NULL, 1165 .perf_name = NULL, 1166 .msr = MSR_PKG_PERF_STATUS, 1167 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1168 .msr_shift = 0, 1169 .platform_rapl_msr_scale = &rapl_time_units, 1170 .rci_index = RAPL_RCI_INDEX_PKG_PERF_STATUS, 1171 .bic = BIC_PKG__, 1172 .compat_scale = 100.0, 1173 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1174 }, 1175 { 1176 .feature_mask = RAPL_DRAM_PERF_STATUS, 1177 .perf_subsys = NULL, 1178 .perf_name = NULL, 1179 .msr = MSR_DRAM_PERF_STATUS, 1180 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1181 .msr_shift = 0, 1182 .platform_rapl_msr_scale = &rapl_time_units, 1183 .rci_index = RAPL_RCI_INDEX_DRAM_PERF_STATUS, 1184 .bic = BIC_RAM__, 1185 .compat_scale = 100.0, 1186 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1187 }, 1188 { 1189 .feature_mask = RAPL_AMD_F17H, 1190 .perf_subsys = NULL, 1191 .perf_name = NULL, 1192 .msr = MSR_CORE_ENERGY_STAT, 1193 .msr_mask = 0xFFFFFFFF, 1194 .msr_shift = 0, 1195 .platform_rapl_msr_scale = &rapl_energy_units, 1196 .rci_index = RAPL_RCI_INDEX_CORE_ENERGY, 1197 .bic = BIC_CorWatt | BIC_Cor_J, 1198 .compat_scale = 1.0, 1199 .flags = 0, 1200 }, 1201 }; 1202 1203 struct rapl_counter { 1204 unsigned long long raw_value; 1205 enum rapl_unit unit; 1206 double scale; 1207 }; 1208 1209 /* Indexes used to map data read from perf and MSRs into global variables */ 1210 enum ccstate_rci_index { 1211 CCSTATE_RCI_INDEX_C1_RESIDENCY = 0, 1212 CCSTATE_RCI_INDEX_C3_RESIDENCY = 1, 1213 CCSTATE_RCI_INDEX_C6_RESIDENCY = 2, 1214 CCSTATE_RCI_INDEX_C7_RESIDENCY = 3, 1215 PCSTATE_RCI_INDEX_C2_RESIDENCY = 4, 1216 PCSTATE_RCI_INDEX_C3_RESIDENCY = 5, 1217 PCSTATE_RCI_INDEX_C6_RESIDENCY = 6, 1218 PCSTATE_RCI_INDEX_C7_RESIDENCY = 7, 1219 PCSTATE_RCI_INDEX_C8_RESIDENCY = 8, 1220 PCSTATE_RCI_INDEX_C9_RESIDENCY = 9, 1221 PCSTATE_RCI_INDEX_C10_RESIDENCY = 10, 1222 NUM_CSTATE_COUNTERS, 1223 }; 1224 1225 struct cstate_counter_info_t { 1226 unsigned long long data[NUM_CSTATE_COUNTERS]; 1227 enum cstate_source source[NUM_CSTATE_COUNTERS]; 1228 unsigned long long msr[NUM_CSTATE_COUNTERS]; 1229 int fd_perf_core; 1230 int fd_perf_pkg; 1231 }; 1232 1233 struct cstate_counter_info_t *ccstate_counter_info; 1234 unsigned int ccstate_counter_info_size; 1235 1236 #define CSTATE_COUNTER_FLAG_COLLECT_PER_CORE (1u << 0) 1237 #define CSTATE_COUNTER_FLAG_COLLECT_PER_THREAD ((1u << 1) | CSTATE_COUNTER_FLAG_COLLECT_PER_CORE) 1238 #define CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY (1u << 2) 1239 1240 struct cstate_counter_arch_info { 1241 int feature_mask; /* Mask for testing if the counter is supported on host */ 1242 const char *perf_subsys; 1243 const char *perf_name; 1244 unsigned long long msr; 1245 unsigned int rci_index; /* Maps data from perf counters to global variables */ 1246 unsigned long long bic; 1247 unsigned long long flags; 1248 int pkg_cstate_limit; 1249 }; 1250 1251 static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { 1252 { 1253 .feature_mask = CC1, 1254 .perf_subsys = "cstate_core", 1255 .perf_name = "c1-residency", 1256 .msr = MSR_CORE_C1_RES, 1257 .rci_index = CCSTATE_RCI_INDEX_C1_RESIDENCY, 1258 .bic = BIC_CPU_c1, 1259 .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_THREAD, 1260 .pkg_cstate_limit = 0, 1261 }, 1262 { 1263 .feature_mask = CC3, 1264 .perf_subsys = "cstate_core", 1265 .perf_name = "c3-residency", 1266 .msr = MSR_CORE_C3_RESIDENCY, 1267 .rci_index = CCSTATE_RCI_INDEX_C3_RESIDENCY, 1268 .bic = BIC_CPU_c3, 1269 .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY, 1270 .pkg_cstate_limit = 0, 1271 }, 1272 { 1273 .feature_mask = CC6, 1274 .perf_subsys = "cstate_core", 1275 .perf_name = "c6-residency", 1276 .msr = MSR_CORE_C6_RESIDENCY, 1277 .rci_index = CCSTATE_RCI_INDEX_C6_RESIDENCY, 1278 .bic = BIC_CPU_c6, 1279 .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY, 1280 .pkg_cstate_limit = 0, 1281 }, 1282 { 1283 .feature_mask = CC7, 1284 .perf_subsys = "cstate_core", 1285 .perf_name = "c7-residency", 1286 .msr = MSR_CORE_C7_RESIDENCY, 1287 .rci_index = CCSTATE_RCI_INDEX_C7_RESIDENCY, 1288 .bic = BIC_CPU_c7, 1289 .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY, 1290 .pkg_cstate_limit = 0, 1291 }, 1292 { 1293 .feature_mask = PC2, 1294 .perf_subsys = "cstate_pkg", 1295 .perf_name = "c2-residency", 1296 .msr = MSR_PKG_C2_RESIDENCY, 1297 .rci_index = PCSTATE_RCI_INDEX_C2_RESIDENCY, 1298 .bic = BIC_Pkgpc2, 1299 .flags = 0, 1300 .pkg_cstate_limit = PCL__2, 1301 }, 1302 { 1303 .feature_mask = PC3, 1304 .perf_subsys = "cstate_pkg", 1305 .perf_name = "c3-residency", 1306 .msr = MSR_PKG_C3_RESIDENCY, 1307 .rci_index = PCSTATE_RCI_INDEX_C3_RESIDENCY, 1308 .bic = BIC_Pkgpc3, 1309 .flags = 0, 1310 .pkg_cstate_limit = PCL__3, 1311 }, 1312 { 1313 .feature_mask = PC6, 1314 .perf_subsys = "cstate_pkg", 1315 .perf_name = "c6-residency", 1316 .msr = MSR_PKG_C6_RESIDENCY, 1317 .rci_index = PCSTATE_RCI_INDEX_C6_RESIDENCY, 1318 .bic = BIC_Pkgpc6, 1319 .flags = 0, 1320 .pkg_cstate_limit = PCL__6, 1321 }, 1322 { 1323 .feature_mask = PC7, 1324 .perf_subsys = "cstate_pkg", 1325 .perf_name = "c7-residency", 1326 .msr = MSR_PKG_C7_RESIDENCY, 1327 .rci_index = PCSTATE_RCI_INDEX_C7_RESIDENCY, 1328 .bic = BIC_Pkgpc7, 1329 .flags = 0, 1330 .pkg_cstate_limit = PCL__7, 1331 }, 1332 { 1333 .feature_mask = PC8, 1334 .perf_subsys = "cstate_pkg", 1335 .perf_name = "c8-residency", 1336 .msr = MSR_PKG_C8_RESIDENCY, 1337 .rci_index = PCSTATE_RCI_INDEX_C8_RESIDENCY, 1338 .bic = BIC_Pkgpc8, 1339 .flags = 0, 1340 .pkg_cstate_limit = PCL__8, 1341 }, 1342 { 1343 .feature_mask = PC9, 1344 .perf_subsys = "cstate_pkg", 1345 .perf_name = "c9-residency", 1346 .msr = MSR_PKG_C9_RESIDENCY, 1347 .rci_index = PCSTATE_RCI_INDEX_C9_RESIDENCY, 1348 .bic = BIC_Pkgpc9, 1349 .flags = 0, 1350 .pkg_cstate_limit = PCL__9, 1351 }, 1352 { 1353 .feature_mask = PC10, 1354 .perf_subsys = "cstate_pkg", 1355 .perf_name = "c10-residency", 1356 .msr = MSR_PKG_C10_RESIDENCY, 1357 .rci_index = PCSTATE_RCI_INDEX_C10_RESIDENCY, 1358 .bic = BIC_Pkgpc10, 1359 .flags = 0, 1360 .pkg_cstate_limit = PCL_10, 1361 }, 1362 }; 1363 1364 struct thread_data { 1365 struct timeval tv_begin; 1366 struct timeval tv_end; 1367 struct timeval tv_delta; 1368 unsigned long long tsc; 1369 unsigned long long aperf; 1370 unsigned long long mperf; 1371 unsigned long long c1; 1372 unsigned long long instr_count; 1373 unsigned long long irq_count; 1374 unsigned int smi_count; 1375 unsigned int cpu_id; 1376 unsigned int apic_id; 1377 unsigned int x2apic_id; 1378 unsigned int flags; 1379 bool is_atom; 1380 unsigned long long counter[MAX_ADDED_THREAD_COUNTERS]; 1381 } *thread_even, *thread_odd; 1382 1383 struct core_data { 1384 int base_cpu; 1385 unsigned long long c3; 1386 unsigned long long c6; 1387 unsigned long long c7; 1388 unsigned long long mc6_us; /* duplicate as per-core for now, even though per module */ 1389 unsigned int core_temp_c; 1390 struct rapl_counter core_energy; /* MSR_CORE_ENERGY_STAT */ 1391 unsigned int core_id; 1392 unsigned long long core_throt_cnt; 1393 unsigned long long counter[MAX_ADDED_CORE_COUNTERS]; 1394 } *core_even, *core_odd; 1395 1396 struct pkg_data { 1397 int base_cpu; 1398 unsigned long long pc2; 1399 unsigned long long pc3; 1400 unsigned long long pc6; 1401 unsigned long long pc7; 1402 unsigned long long pc8; 1403 unsigned long long pc9; 1404 unsigned long long pc10; 1405 long long cpu_lpi; 1406 long long sys_lpi; 1407 unsigned long long pkg_wtd_core_c0; 1408 unsigned long long pkg_any_core_c0; 1409 unsigned long long pkg_any_gfxe_c0; 1410 unsigned long long pkg_both_core_gfxe_c0; 1411 long long gfx_rc6_ms; 1412 unsigned int gfx_mhz; 1413 unsigned int gfx_act_mhz; 1414 long long sam_mc6_ms; 1415 unsigned int sam_mhz; 1416 unsigned int sam_act_mhz; 1417 unsigned int package_id; 1418 struct rapl_counter energy_pkg; /* MSR_PKG_ENERGY_STATUS */ 1419 struct rapl_counter energy_dram; /* MSR_DRAM_ENERGY_STATUS */ 1420 struct rapl_counter energy_cores; /* MSR_PP0_ENERGY_STATUS */ 1421 struct rapl_counter energy_gfx; /* MSR_PP1_ENERGY_STATUS */ 1422 struct rapl_counter rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */ 1423 struct rapl_counter rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */ 1424 unsigned int pkg_temp_c; 1425 unsigned int uncore_mhz; 1426 unsigned long long counter[MAX_ADDED_PACKAGE_COUNTERS]; 1427 } *package_even, *package_odd; 1428 1429 #define ODD_COUNTERS thread_odd, core_odd, package_odd 1430 #define EVEN_COUNTERS thread_even, core_even, package_even 1431 1432 #define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no) \ 1433 ((thread_base) + \ 1434 ((pkg_no) * \ 1435 topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \ 1436 ((node_no) * topo.cores_per_node * topo.threads_per_core) + \ 1437 ((core_no) * topo.threads_per_core) + \ 1438 (thread_no)) 1439 1440 #define GET_CORE(core_base, core_no, node_no, pkg_no) \ 1441 ((core_base) + \ 1442 ((pkg_no) * topo.nodes_per_pkg * topo.cores_per_node) + \ 1443 ((node_no) * topo.cores_per_node) + \ 1444 (core_no)) 1445 1446 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) 1447 1448 /* 1449 * The accumulated sum of MSR is defined as a monotonic 1450 * increasing MSR, it will be accumulated periodically, 1451 * despite its register's bit width. 1452 */ 1453 enum { 1454 IDX_PKG_ENERGY, 1455 IDX_DRAM_ENERGY, 1456 IDX_PP0_ENERGY, 1457 IDX_PP1_ENERGY, 1458 IDX_PKG_PERF, 1459 IDX_DRAM_PERF, 1460 IDX_COUNT, 1461 }; 1462 1463 int get_msr_sum(int cpu, off_t offset, unsigned long long *msr); 1464 1465 struct msr_sum_array { 1466 /* get_msr_sum() = sum + (get_msr() - last) */ 1467 struct { 1468 /*The accumulated MSR value is updated by the timer */ 1469 unsigned long long sum; 1470 /*The MSR footprint recorded in last timer */ 1471 unsigned long long last; 1472 } entries[IDX_COUNT]; 1473 }; 1474 1475 /* The percpu MSR sum array.*/ 1476 struct msr_sum_array *per_cpu_msr_sum; 1477 1478 off_t idx_to_offset(int idx) 1479 { 1480 off_t offset; 1481 1482 switch (idx) { 1483 case IDX_PKG_ENERGY: 1484 if (platform->rapl_msrs & RAPL_AMD_F17H) 1485 offset = MSR_PKG_ENERGY_STAT; 1486 else 1487 offset = MSR_PKG_ENERGY_STATUS; 1488 break; 1489 case IDX_DRAM_ENERGY: 1490 offset = MSR_DRAM_ENERGY_STATUS; 1491 break; 1492 case IDX_PP0_ENERGY: 1493 offset = MSR_PP0_ENERGY_STATUS; 1494 break; 1495 case IDX_PP1_ENERGY: 1496 offset = MSR_PP1_ENERGY_STATUS; 1497 break; 1498 case IDX_PKG_PERF: 1499 offset = MSR_PKG_PERF_STATUS; 1500 break; 1501 case IDX_DRAM_PERF: 1502 offset = MSR_DRAM_PERF_STATUS; 1503 break; 1504 default: 1505 offset = -1; 1506 } 1507 return offset; 1508 } 1509 1510 int offset_to_idx(off_t offset) 1511 { 1512 int idx; 1513 1514 switch (offset) { 1515 case MSR_PKG_ENERGY_STATUS: 1516 case MSR_PKG_ENERGY_STAT: 1517 idx = IDX_PKG_ENERGY; 1518 break; 1519 case MSR_DRAM_ENERGY_STATUS: 1520 idx = IDX_DRAM_ENERGY; 1521 break; 1522 case MSR_PP0_ENERGY_STATUS: 1523 idx = IDX_PP0_ENERGY; 1524 break; 1525 case MSR_PP1_ENERGY_STATUS: 1526 idx = IDX_PP1_ENERGY; 1527 break; 1528 case MSR_PKG_PERF_STATUS: 1529 idx = IDX_PKG_PERF; 1530 break; 1531 case MSR_DRAM_PERF_STATUS: 1532 idx = IDX_DRAM_PERF; 1533 break; 1534 default: 1535 idx = -1; 1536 } 1537 return idx; 1538 } 1539 1540 int idx_valid(int idx) 1541 { 1542 switch (idx) { 1543 case IDX_PKG_ENERGY: 1544 return platform->rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H); 1545 case IDX_DRAM_ENERGY: 1546 return platform->rapl_msrs & RAPL_DRAM; 1547 case IDX_PP0_ENERGY: 1548 return platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS; 1549 case IDX_PP1_ENERGY: 1550 return platform->rapl_msrs & RAPL_GFX; 1551 case IDX_PKG_PERF: 1552 return platform->rapl_msrs & RAPL_PKG_PERF_STATUS; 1553 case IDX_DRAM_PERF: 1554 return platform->rapl_msrs & RAPL_DRAM_PERF_STATUS; 1555 default: 1556 return 0; 1557 } 1558 } 1559 1560 struct sys_counters { 1561 unsigned int added_thread_counters; 1562 unsigned int added_core_counters; 1563 unsigned int added_package_counters; 1564 struct msr_counter *tp; 1565 struct msr_counter *cp; 1566 struct msr_counter *pp; 1567 } sys; 1568 1569 static size_t free_msr_counters_(struct msr_counter **pp) 1570 { 1571 struct msr_counter *p = NULL; 1572 size_t num_freed = 0; 1573 1574 while (*pp) { 1575 p = *pp; 1576 1577 if (p->msr_num != 0) { 1578 *pp = p->next; 1579 1580 free(p); 1581 ++num_freed; 1582 1583 continue; 1584 } 1585 1586 pp = &p->next; 1587 } 1588 1589 return num_freed; 1590 } 1591 1592 /* 1593 * Free all added counters accessed via msr. 1594 */ 1595 static void free_sys_msr_counters(void) 1596 { 1597 /* Thread counters */ 1598 sys.added_thread_counters -= free_msr_counters_(&sys.tp); 1599 1600 /* Core counters */ 1601 sys.added_core_counters -= free_msr_counters_(&sys.cp); 1602 1603 /* Package counters */ 1604 sys.added_package_counters -= free_msr_counters_(&sys.pp); 1605 } 1606 1607 struct system_summary { 1608 struct thread_data threads; 1609 struct core_data cores; 1610 struct pkg_data packages; 1611 } average; 1612 1613 struct cpu_topology { 1614 int physical_package_id; 1615 int die_id; 1616 int logical_cpu_id; 1617 int physical_node_id; 1618 int logical_node_id; /* 0-based count within the package */ 1619 int physical_core_id; 1620 int thread_id; 1621 cpu_set_t *put_ids; /* Processing Unit/Thread IDs */ 1622 } *cpus; 1623 1624 struct topo_params { 1625 int num_packages; 1626 int num_die; 1627 int num_cpus; 1628 int num_cores; 1629 int allowed_packages; 1630 int allowed_cpus; 1631 int allowed_cores; 1632 int max_cpu_num; 1633 int max_core_id; 1634 int max_package_id; 1635 int max_die_id; 1636 int max_node_num; 1637 int nodes_per_pkg; 1638 int cores_per_node; 1639 int threads_per_core; 1640 } topo; 1641 1642 struct timeval tv_even, tv_odd, tv_delta; 1643 1644 int *irq_column_2_cpu; /* /proc/interrupts column numbers */ 1645 int *irqs_per_cpu; /* indexed by cpu_num */ 1646 1647 void setup_all_buffers(bool startup); 1648 1649 char *sys_lpi_file; 1650 char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us"; 1651 char *sys_lpi_file_debugfs = "/sys/kernel/debug/pmc_core/slp_s0_residency_usec"; 1652 1653 int cpu_is_not_present(int cpu) 1654 { 1655 return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set); 1656 } 1657 1658 int cpu_is_not_allowed(int cpu) 1659 { 1660 return !CPU_ISSET_S(cpu, cpu_allowed_setsize, cpu_allowed_set); 1661 } 1662 1663 /* 1664 * run func(thread, core, package) in topology order 1665 * skip non-present cpus 1666 */ 1667 1668 int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pkg_data *), 1669 struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base) 1670 { 1671 int retval, pkg_no, core_no, thread_no, node_no; 1672 1673 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { 1674 for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) { 1675 for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { 1676 for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) { 1677 struct thread_data *t; 1678 struct core_data *c; 1679 struct pkg_data *p; 1680 t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no); 1681 1682 if (cpu_is_not_allowed(t->cpu_id)) 1683 continue; 1684 1685 c = GET_CORE(core_base, core_no, node_no, pkg_no); 1686 p = GET_PKG(pkg_base, pkg_no); 1687 1688 retval = func(t, c, p); 1689 if (retval) 1690 return retval; 1691 } 1692 } 1693 } 1694 } 1695 return 0; 1696 } 1697 1698 int is_cpu_first_thread_in_core(struct thread_data *t, struct core_data *c, struct pkg_data *p) 1699 { 1700 UNUSED(p); 1701 1702 return ((int)t->cpu_id == c->base_cpu || c->base_cpu < 0); 1703 } 1704 1705 int is_cpu_first_core_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p) 1706 { 1707 UNUSED(c); 1708 1709 return ((int)t->cpu_id == p->base_cpu || p->base_cpu < 0); 1710 } 1711 1712 int is_cpu_first_thread_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p) 1713 { 1714 return is_cpu_first_thread_in_core(t, c, p) && is_cpu_first_core_in_package(t, c, p); 1715 } 1716 1717 int cpu_migrate(int cpu) 1718 { 1719 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); 1720 CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set); 1721 if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1) 1722 return -1; 1723 else 1724 return 0; 1725 } 1726 1727 int get_msr_fd(int cpu) 1728 { 1729 char pathname[32]; 1730 int fd; 1731 1732 fd = fd_percpu[cpu]; 1733 1734 if (fd) 1735 return fd; 1736 1737 sprintf(pathname, "/dev/cpu/%d/msr", cpu); 1738 fd = open(pathname, O_RDONLY); 1739 if (fd < 0) 1740 err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, " 1741 "or run with --no-msr, or run as root", pathname); 1742 1743 fd_percpu[cpu] = fd; 1744 1745 return fd; 1746 } 1747 1748 static void bic_disable_msr_access(void) 1749 { 1750 const unsigned long bic_msrs = BIC_SMI | BIC_Mod_c6 | BIC_CoreTmp | 1751 BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_PkgTmp; 1752 1753 bic_enabled &= ~bic_msrs; 1754 1755 free_sys_msr_counters(); 1756 } 1757 1758 static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) 1759 { 1760 assert(!no_perf); 1761 1762 return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags); 1763 } 1764 1765 static long open_perf_counter(int cpu, unsigned int type, unsigned int config, int group_fd, __u64 read_format) 1766 { 1767 struct perf_event_attr attr; 1768 const pid_t pid = -1; 1769 const unsigned long flags = 0; 1770 1771 assert(!no_perf); 1772 1773 memset(&attr, 0, sizeof(struct perf_event_attr)); 1774 1775 attr.type = type; 1776 attr.size = sizeof(struct perf_event_attr); 1777 attr.config = config; 1778 attr.disabled = 0; 1779 attr.sample_type = PERF_SAMPLE_IDENTIFIER; 1780 attr.read_format = read_format; 1781 1782 const int fd = perf_event_open(&attr, pid, cpu, group_fd, flags); 1783 1784 return fd; 1785 } 1786 1787 int get_instr_count_fd(int cpu) 1788 { 1789 if (fd_instr_count_percpu[cpu]) 1790 return fd_instr_count_percpu[cpu]; 1791 1792 fd_instr_count_percpu[cpu] = open_perf_counter(cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0); 1793 1794 return fd_instr_count_percpu[cpu]; 1795 } 1796 1797 int get_msr(int cpu, off_t offset, unsigned long long *msr) 1798 { 1799 ssize_t retval; 1800 1801 assert(!no_msr); 1802 1803 retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset); 1804 1805 if (retval != sizeof *msr) 1806 err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset); 1807 1808 return 0; 1809 } 1810 1811 int probe_msr(int cpu, off_t offset) 1812 { 1813 ssize_t retval; 1814 unsigned long long dummy; 1815 1816 assert(!no_msr); 1817 1818 retval = pread(get_msr_fd(cpu), &dummy, sizeof(dummy), offset); 1819 1820 if (retval != sizeof(dummy)) 1821 return 1; 1822 1823 return 0; 1824 } 1825 1826 #define MAX_DEFERRED 16 1827 char *deferred_add_names[MAX_DEFERRED]; 1828 char *deferred_skip_names[MAX_DEFERRED]; 1829 int deferred_add_index; 1830 int deferred_skip_index; 1831 1832 /* 1833 * HIDE_LIST - hide this list of counters, show the rest [default] 1834 * SHOW_LIST - show this list of counters, hide the rest 1835 */ 1836 enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST; 1837 1838 void help(void) 1839 { 1840 fprintf(outf, 1841 "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n" 1842 "\n" 1843 "Turbostat forks the specified COMMAND and prints statistics\n" 1844 "when COMMAND completes.\n" 1845 "If no COMMAND is specified, turbostat wakes every 5-seconds\n" 1846 "to print statistics, until interrupted.\n" 1847 " -a, --add add a counter\n" 1848 " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n" 1849 " -c, --cpu cpu-set limit output to summary plus cpu-set:\n" 1850 " {core | package | j,k,l..m,n-p }\n" 1851 " -d, --debug displays usec, Time_Of_Day_Seconds and more debugging\n" 1852 " -D, --Dump displays the raw counter values\n" 1853 " -e, --enable [all | column]\n" 1854 " shows all or the specified disabled column\n" 1855 " -H, --hide [column|column,column,...]\n" 1856 " hide the specified column(s)\n" 1857 " -i, --interval sec.subsec\n" 1858 " Override default 5-second measurement interval\n" 1859 " -J, --Joules displays energy in Joules instead of Watts\n" 1860 " -l, --list list column headers only\n" 1861 " -M, --no-msr Disable all uses of the MSR driver\n" 1862 " -P, --no-perf Disable all uses of the perf API\n" 1863 " -n, --num_iterations num\n" 1864 " number of the measurement iterations\n" 1865 " -N, --header_iterations num\n" 1866 " print header every num iterations\n" 1867 " -o, --out file\n" 1868 " create or truncate \"file\" for all output\n" 1869 " -q, --quiet skip decoding system configuration header\n" 1870 " -s, --show [column|column,column,...]\n" 1871 " show only the specified column(s)\n" 1872 " -S, --Summary\n" 1873 " limits output to 1-line system summary per interval\n" 1874 " -T, --TCC temperature\n" 1875 " sets the Thermal Control Circuit temperature in\n" 1876 " degrees Celsius\n" 1877 " -h, --help print this help message\n" 1878 " -v, --version print version information\n" "\n" "For more help, run \"man turbostat\"\n"); 1879 } 1880 1881 /* 1882 * bic_lookup 1883 * for all the strings in comma separate name_list, 1884 * set the approprate bit in return value. 1885 */ 1886 unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) 1887 { 1888 unsigned int i; 1889 unsigned long long retval = 0; 1890 1891 while (name_list) { 1892 char *comma; 1893 1894 comma = strchr(name_list, ','); 1895 1896 if (comma) 1897 *comma = '\0'; 1898 1899 for (i = 0; i < MAX_BIC; ++i) { 1900 if (!strcmp(name_list, bic[i].name)) { 1901 retval |= (1ULL << i); 1902 break; 1903 } 1904 if (!strcmp(name_list, "all")) { 1905 retval |= ~0; 1906 break; 1907 } else if (!strcmp(name_list, "topology")) { 1908 retval |= BIC_TOPOLOGY; 1909 break; 1910 } else if (!strcmp(name_list, "power")) { 1911 retval |= BIC_THERMAL_PWR; 1912 break; 1913 } else if (!strcmp(name_list, "idle")) { 1914 retval |= BIC_IDLE; 1915 break; 1916 } else if (!strcmp(name_list, "frequency")) { 1917 retval |= BIC_FREQUENCY; 1918 break; 1919 } else if (!strcmp(name_list, "other")) { 1920 retval |= BIC_OTHER; 1921 break; 1922 } 1923 1924 } 1925 if (i == MAX_BIC) { 1926 if (mode == SHOW_LIST) { 1927 deferred_add_names[deferred_add_index++] = name_list; 1928 if (deferred_add_index >= MAX_DEFERRED) { 1929 fprintf(stderr, "More than max %d un-recognized --add options '%s'\n", 1930 MAX_DEFERRED, name_list); 1931 help(); 1932 exit(1); 1933 } 1934 } else { 1935 deferred_skip_names[deferred_skip_index++] = name_list; 1936 if (debug) 1937 fprintf(stderr, "deferred \"%s\"\n", name_list); 1938 if (deferred_skip_index >= MAX_DEFERRED) { 1939 fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n", 1940 MAX_DEFERRED, name_list); 1941 help(); 1942 exit(1); 1943 } 1944 } 1945 } 1946 1947 name_list = comma; 1948 if (name_list) 1949 name_list++; 1950 1951 } 1952 return retval; 1953 } 1954 1955 void print_header(char *delim) 1956 { 1957 struct msr_counter *mp; 1958 int printed = 0; 1959 1960 if (DO_BIC(BIC_USEC)) 1961 outp += sprintf(outp, "%susec", (printed++ ? delim : "")); 1962 if (DO_BIC(BIC_TOD)) 1963 outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : "")); 1964 if (DO_BIC(BIC_Package)) 1965 outp += sprintf(outp, "%sPackage", (printed++ ? delim : "")); 1966 if (DO_BIC(BIC_Die)) 1967 outp += sprintf(outp, "%sDie", (printed++ ? delim : "")); 1968 if (DO_BIC(BIC_Node)) 1969 outp += sprintf(outp, "%sNode", (printed++ ? delim : "")); 1970 if (DO_BIC(BIC_Core)) 1971 outp += sprintf(outp, "%sCore", (printed++ ? delim : "")); 1972 if (DO_BIC(BIC_CPU)) 1973 outp += sprintf(outp, "%sCPU", (printed++ ? delim : "")); 1974 if (DO_BIC(BIC_APIC)) 1975 outp += sprintf(outp, "%sAPIC", (printed++ ? delim : "")); 1976 if (DO_BIC(BIC_X2APIC)) 1977 outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : "")); 1978 if (DO_BIC(BIC_Avg_MHz)) 1979 outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : "")); 1980 if (DO_BIC(BIC_Busy)) 1981 outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : "")); 1982 if (DO_BIC(BIC_Bzy_MHz)) 1983 outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : "")); 1984 if (DO_BIC(BIC_TSC_MHz)) 1985 outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : "")); 1986 1987 if (DO_BIC(BIC_IPC)) 1988 outp += sprintf(outp, "%sIPC", (printed++ ? delim : "")); 1989 1990 if (DO_BIC(BIC_IRQ)) { 1991 if (sums_need_wide_columns) 1992 outp += sprintf(outp, "%s IRQ", (printed++ ? delim : "")); 1993 else 1994 outp += sprintf(outp, "%sIRQ", (printed++ ? delim : "")); 1995 } 1996 1997 if (DO_BIC(BIC_SMI)) 1998 outp += sprintf(outp, "%sSMI", (printed++ ? delim : "")); 1999 2000 for (mp = sys.tp; mp; mp = mp->next) { 2001 2002 if (mp->format == FORMAT_RAW) { 2003 if (mp->width == 64) 2004 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name); 2005 else 2006 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name); 2007 } else { 2008 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2009 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name); 2010 else 2011 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name); 2012 } 2013 } 2014 2015 if (DO_BIC(BIC_CPU_c1)) 2016 outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : "")); 2017 if (DO_BIC(BIC_CPU_c3)) 2018 outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : "")); 2019 if (DO_BIC(BIC_CPU_c6)) 2020 outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : "")); 2021 if (DO_BIC(BIC_CPU_c7)) 2022 outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : "")); 2023 2024 if (DO_BIC(BIC_Mod_c6)) 2025 outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : "")); 2026 2027 if (DO_BIC(BIC_CoreTmp)) 2028 outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : "")); 2029 2030 if (DO_BIC(BIC_CORE_THROT_CNT)) 2031 outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : "")); 2032 2033 if (platform->rapl_msrs && !rapl_joules) { 2034 if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl) 2035 outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : "")); 2036 } else if (platform->rapl_msrs && rapl_joules) { 2037 if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl) 2038 outp += sprintf(outp, "%sCor_J", (printed++ ? delim : "")); 2039 } 2040 2041 for (mp = sys.cp; mp; mp = mp->next) { 2042 if (mp->format == FORMAT_RAW) { 2043 if (mp->width == 64) 2044 outp += sprintf(outp, "%s%18.18s", delim, mp->name); 2045 else 2046 outp += sprintf(outp, "%s%10.10s", delim, mp->name); 2047 } else { 2048 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2049 outp += sprintf(outp, "%s%8s", delim, mp->name); 2050 else 2051 outp += sprintf(outp, "%s%s", delim, mp->name); 2052 } 2053 } 2054 2055 if (DO_BIC(BIC_PkgTmp)) 2056 outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : "")); 2057 2058 if (DO_BIC(BIC_GFX_rc6)) 2059 outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : "")); 2060 2061 if (DO_BIC(BIC_GFXMHz)) 2062 outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : "")); 2063 2064 if (DO_BIC(BIC_GFXACTMHz)) 2065 outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : "")); 2066 2067 if (DO_BIC(BIC_SAM_mc6)) 2068 outp += sprintf(outp, "%sSAM%%mc6", (printed++ ? delim : "")); 2069 2070 if (DO_BIC(BIC_SAMMHz)) 2071 outp += sprintf(outp, "%sSAMMHz", (printed++ ? delim : "")); 2072 2073 if (DO_BIC(BIC_SAMACTMHz)) 2074 outp += sprintf(outp, "%sSAMAMHz", (printed++ ? delim : "")); 2075 2076 if (DO_BIC(BIC_Totl_c0)) 2077 outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : "")); 2078 if (DO_BIC(BIC_Any_c0)) 2079 outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : "")); 2080 if (DO_BIC(BIC_GFX_c0)) 2081 outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : "")); 2082 if (DO_BIC(BIC_CPUGFX)) 2083 outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : "")); 2084 2085 if (DO_BIC(BIC_Pkgpc2)) 2086 outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : "")); 2087 if (DO_BIC(BIC_Pkgpc3)) 2088 outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : "")); 2089 if (DO_BIC(BIC_Pkgpc6)) 2090 outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : "")); 2091 if (DO_BIC(BIC_Pkgpc7)) 2092 outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : "")); 2093 if (DO_BIC(BIC_Pkgpc8)) 2094 outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : "")); 2095 if (DO_BIC(BIC_Pkgpc9)) 2096 outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : "")); 2097 if (DO_BIC(BIC_Pkgpc10)) 2098 outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : "")); 2099 if (DO_BIC(BIC_CPU_LPI)) 2100 outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : "")); 2101 if (DO_BIC(BIC_SYS_LPI)) 2102 outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : "")); 2103 2104 if (platform->rapl_msrs && !rapl_joules) { 2105 if (DO_BIC(BIC_PkgWatt)) 2106 outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : "")); 2107 if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl) 2108 outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : "")); 2109 if (DO_BIC(BIC_GFXWatt)) 2110 outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : "")); 2111 if (DO_BIC(BIC_RAMWatt)) 2112 outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : "")); 2113 if (DO_BIC(BIC_PKG__)) 2114 outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : "")); 2115 if (DO_BIC(BIC_RAM__)) 2116 outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : "")); 2117 } else if (platform->rapl_msrs && rapl_joules) { 2118 if (DO_BIC(BIC_Pkg_J)) 2119 outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : "")); 2120 if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl) 2121 outp += sprintf(outp, "%sCor_J", (printed++ ? delim : "")); 2122 if (DO_BIC(BIC_GFX_J)) 2123 outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : "")); 2124 if (DO_BIC(BIC_RAM_J)) 2125 outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : "")); 2126 if (DO_BIC(BIC_PKG__)) 2127 outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : "")); 2128 if (DO_BIC(BIC_RAM__)) 2129 outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : "")); 2130 } 2131 if (DO_BIC(BIC_UNCORE_MHZ)) 2132 outp += sprintf(outp, "%sUncMHz", (printed++ ? delim : "")); 2133 2134 for (mp = sys.pp; mp; mp = mp->next) { 2135 if (mp->format == FORMAT_RAW) { 2136 if (mp->width == 64) 2137 outp += sprintf(outp, "%s%18.18s", delim, mp->name); 2138 else if (mp->width == 32) 2139 outp += sprintf(outp, "%s%10.10s", delim, mp->name); 2140 else 2141 outp += sprintf(outp, "%s%7.7s", delim, mp->name); 2142 } else { 2143 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2144 outp += sprintf(outp, "%s%8s", delim, mp->name); 2145 else 2146 outp += sprintf(outp, "%s%7.7s", delim, mp->name); 2147 } 2148 } 2149 2150 outp += sprintf(outp, "\n"); 2151 } 2152 2153 int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 2154 { 2155 int i; 2156 struct msr_counter *mp; 2157 2158 outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p); 2159 2160 if (t) { 2161 outp += sprintf(outp, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags); 2162 outp += sprintf(outp, "TSC: %016llX\n", t->tsc); 2163 outp += sprintf(outp, "aperf: %016llX\n", t->aperf); 2164 outp += sprintf(outp, "mperf: %016llX\n", t->mperf); 2165 outp += sprintf(outp, "c1: %016llX\n", t->c1); 2166 2167 if (DO_BIC(BIC_IPC)) 2168 outp += sprintf(outp, "IPC: %lld\n", t->instr_count); 2169 2170 if (DO_BIC(BIC_IRQ)) 2171 outp += sprintf(outp, "IRQ: %lld\n", t->irq_count); 2172 if (DO_BIC(BIC_SMI)) 2173 outp += sprintf(outp, "SMI: %d\n", t->smi_count); 2174 2175 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 2176 outp += 2177 sprintf(outp, "tADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, 2178 t->counter[i], mp->sp->path); 2179 } 2180 } 2181 2182 if (c && is_cpu_first_thread_in_core(t, c, p)) { 2183 outp += sprintf(outp, "core: %d\n", c->core_id); 2184 outp += sprintf(outp, "c3: %016llX\n", c->c3); 2185 outp += sprintf(outp, "c6: %016llX\n", c->c6); 2186 outp += sprintf(outp, "c7: %016llX\n", c->c7); 2187 outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c); 2188 outp += sprintf(outp, "cpu_throt_count: %016llX\n", c->core_throt_cnt); 2189 2190 const unsigned long long energy_value = c->core_energy.raw_value * c->core_energy.scale; 2191 const double energy_scale = c->core_energy.scale; 2192 2193 if (c->core_energy.unit == RAPL_UNIT_JOULES) 2194 outp += sprintf(outp, "Joules: %0llX (scale: %lf)\n", energy_value, energy_scale); 2195 2196 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 2197 outp += 2198 sprintf(outp, "cADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, 2199 c->counter[i], mp->sp->path); 2200 } 2201 outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us); 2202 } 2203 2204 if (p && is_cpu_first_core_in_package(t, c, p)) { 2205 outp += sprintf(outp, "package: %d\n", p->package_id); 2206 2207 outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0); 2208 outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0); 2209 outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0); 2210 outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0); 2211 2212 outp += sprintf(outp, "pc2: %016llX\n", p->pc2); 2213 if (DO_BIC(BIC_Pkgpc3)) 2214 outp += sprintf(outp, "pc3: %016llX\n", p->pc3); 2215 if (DO_BIC(BIC_Pkgpc6)) 2216 outp += sprintf(outp, "pc6: %016llX\n", p->pc6); 2217 if (DO_BIC(BIC_Pkgpc7)) 2218 outp += sprintf(outp, "pc7: %016llX\n", p->pc7); 2219 outp += sprintf(outp, "pc8: %016llX\n", p->pc8); 2220 outp += sprintf(outp, "pc9: %016llX\n", p->pc9); 2221 outp += sprintf(outp, "pc10: %016llX\n", p->pc10); 2222 outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi); 2223 outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi); 2224 outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg.raw_value); 2225 outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores.raw_value); 2226 outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx.raw_value); 2227 outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram.raw_value); 2228 outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status.raw_value); 2229 outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status.raw_value); 2230 outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c); 2231 2232 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 2233 outp += 2234 sprintf(outp, "pADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, 2235 p->counter[i], mp->sp->path); 2236 } 2237 } 2238 2239 outp += sprintf(outp, "\n"); 2240 2241 return 0; 2242 } 2243 2244 double rapl_counter_get_value(const struct rapl_counter *c, enum rapl_unit desired_unit, double interval) 2245 { 2246 assert(desired_unit != RAPL_UNIT_INVALID); 2247 2248 /* 2249 * For now we don't expect anything other than joules, 2250 * so just simplify the logic. 2251 */ 2252 assert(c->unit == RAPL_UNIT_JOULES); 2253 2254 const double scaled = c->raw_value * c->scale; 2255 2256 if (desired_unit == RAPL_UNIT_WATTS) 2257 return scaled / interval; 2258 return scaled; 2259 } 2260 2261 /* 2262 * column formatting convention & formats 2263 */ 2264 int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 2265 { 2266 double interval_float, tsc; 2267 char *fmt8; 2268 int i; 2269 struct msr_counter *mp; 2270 char *delim = "\t"; 2271 int printed = 0; 2272 2273 /* if showing only 1st thread in core and this isn't one, bail out */ 2274 if (show_core_only && !is_cpu_first_thread_in_core(t, c, p)) 2275 return 0; 2276 2277 /* if showing only 1st thread in pkg and this isn't one, bail out */ 2278 if (show_pkg_only && !is_cpu_first_core_in_package(t, c, p)) 2279 return 0; 2280 2281 /*if not summary line and --cpu is used */ 2282 if ((t != &average.threads) && (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset))) 2283 return 0; 2284 2285 if (DO_BIC(BIC_USEC)) { 2286 /* on each row, print how many usec each timestamp took to gather */ 2287 struct timeval tv; 2288 2289 timersub(&t->tv_end, &t->tv_begin, &tv); 2290 outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec); 2291 } 2292 2293 /* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */ 2294 if (DO_BIC(BIC_TOD)) 2295 outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec); 2296 2297 interval_float = t->tv_delta.tv_sec + t->tv_delta.tv_usec / 1000000.0; 2298 2299 tsc = t->tsc * tsc_tweak; 2300 2301 /* topo columns, print blanks on 1st (average) line */ 2302 if (t == &average.threads) { 2303 if (DO_BIC(BIC_Package)) 2304 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2305 if (DO_BIC(BIC_Die)) 2306 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2307 if (DO_BIC(BIC_Node)) 2308 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2309 if (DO_BIC(BIC_Core)) 2310 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2311 if (DO_BIC(BIC_CPU)) 2312 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2313 if (DO_BIC(BIC_APIC)) 2314 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2315 if (DO_BIC(BIC_X2APIC)) 2316 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2317 } else { 2318 if (DO_BIC(BIC_Package)) { 2319 if (p) 2320 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id); 2321 else 2322 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2323 } 2324 if (DO_BIC(BIC_Die)) { 2325 if (c) 2326 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].die_id); 2327 else 2328 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2329 } 2330 if (DO_BIC(BIC_Node)) { 2331 if (t) 2332 outp += sprintf(outp, "%s%d", 2333 (printed++ ? delim : ""), cpus[t->cpu_id].physical_node_id); 2334 else 2335 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2336 } 2337 if (DO_BIC(BIC_Core)) { 2338 if (c) 2339 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id); 2340 else 2341 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2342 } 2343 if (DO_BIC(BIC_CPU)) 2344 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id); 2345 if (DO_BIC(BIC_APIC)) 2346 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id); 2347 if (DO_BIC(BIC_X2APIC)) 2348 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id); 2349 } 2350 2351 if (DO_BIC(BIC_Avg_MHz)) 2352 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 / units * t->aperf / interval_float); 2353 2354 if (DO_BIC(BIC_Busy)) 2355 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf / tsc); 2356 2357 if (DO_BIC(BIC_Bzy_MHz)) { 2358 if (has_base_hz) 2359 outp += 2360 sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf); 2361 else 2362 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 2363 tsc / units * t->aperf / t->mperf / interval_float); 2364 } 2365 2366 if (DO_BIC(BIC_TSC_MHz)) 2367 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc / units / interval_float); 2368 2369 if (DO_BIC(BIC_IPC)) 2370 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 1.0 * t->instr_count / t->aperf); 2371 2372 /* IRQ */ 2373 if (DO_BIC(BIC_IRQ)) { 2374 if (sums_need_wide_columns) 2375 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count); 2376 else 2377 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count); 2378 } 2379 2380 /* SMI */ 2381 if (DO_BIC(BIC_SMI)) 2382 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count); 2383 2384 /* Added counters */ 2385 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 2386 if (mp->format == FORMAT_RAW) { 2387 if (mp->width == 32) 2388 outp += 2389 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)t->counter[i]); 2390 else 2391 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]); 2392 } else if (mp->format == FORMAT_DELTA) { 2393 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2394 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]); 2395 else 2396 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]); 2397 } else if (mp->format == FORMAT_PERCENT) { 2398 if (mp->type == COUNTER_USEC) 2399 outp += 2400 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 2401 t->counter[i] / interval_float / 10000); 2402 else 2403 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i] / tsc); 2404 } 2405 } 2406 2407 /* C1 */ 2408 if (DO_BIC(BIC_CPU_c1)) 2409 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc); 2410 2411 /* print per-core data only for 1st thread in core */ 2412 if (!is_cpu_first_thread_in_core(t, c, p)) 2413 goto done; 2414 2415 if (DO_BIC(BIC_CPU_c3)) 2416 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3 / tsc); 2417 if (DO_BIC(BIC_CPU_c6)) 2418 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6 / tsc); 2419 if (DO_BIC(BIC_CPU_c7)) 2420 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7 / tsc); 2421 2422 /* Mod%c6 */ 2423 if (DO_BIC(BIC_Mod_c6)) 2424 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc); 2425 2426 if (DO_BIC(BIC_CoreTmp)) 2427 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c); 2428 2429 /* Core throttle count */ 2430 if (DO_BIC(BIC_CORE_THROT_CNT)) 2431 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->core_throt_cnt); 2432 2433 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 2434 if (mp->format == FORMAT_RAW) { 2435 if (mp->width == 32) 2436 outp += 2437 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)c->counter[i]); 2438 else 2439 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]); 2440 } else if (mp->format == FORMAT_DELTA) { 2441 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2442 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]); 2443 else 2444 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]); 2445 } else if (mp->format == FORMAT_PERCENT) { 2446 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i] / tsc); 2447 } 2448 } 2449 2450 fmt8 = "%s%.2f"; 2451 2452 if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl) 2453 outp += 2454 sprintf(outp, fmt8, (printed++ ? delim : ""), 2455 rapl_counter_get_value(&c->core_energy, RAPL_UNIT_WATTS, interval_float)); 2456 if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl) 2457 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 2458 rapl_counter_get_value(&c->core_energy, RAPL_UNIT_JOULES, interval_float)); 2459 2460 /* print per-package data only for 1st core in package */ 2461 if (!is_cpu_first_core_in_package(t, c, p)) 2462 goto done; 2463 2464 /* PkgTmp */ 2465 if (DO_BIC(BIC_PkgTmp)) 2466 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c); 2467 2468 /* GFXrc6 */ 2469 if (DO_BIC(BIC_GFX_rc6)) { 2470 if (p->gfx_rc6_ms == -1) { /* detect GFX counter reset */ 2471 outp += sprintf(outp, "%s**.**", (printed++ ? delim : "")); 2472 } else { 2473 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 2474 p->gfx_rc6_ms / 10.0 / interval_float); 2475 } 2476 } 2477 2478 /* GFXMHz */ 2479 if (DO_BIC(BIC_GFXMHz)) 2480 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz); 2481 2482 /* GFXACTMHz */ 2483 if (DO_BIC(BIC_GFXACTMHz)) 2484 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz); 2485 2486 /* SAMmc6 */ 2487 if (DO_BIC(BIC_SAM_mc6)) { 2488 if (p->sam_mc6_ms == -1) { /* detect GFX counter reset */ 2489 outp += sprintf(outp, "%s**.**", (printed++ ? delim : "")); 2490 } else { 2491 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 2492 p->sam_mc6_ms / 10.0 / interval_float); 2493 } 2494 } 2495 2496 /* SAMMHz */ 2497 if (DO_BIC(BIC_SAMMHz)) 2498 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_mhz); 2499 2500 /* SAMACTMHz */ 2501 if (DO_BIC(BIC_SAMACTMHz)) 2502 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_act_mhz); 2503 2504 /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ 2505 if (DO_BIC(BIC_Totl_c0)) 2506 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0 / tsc); 2507 if (DO_BIC(BIC_Any_c0)) 2508 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0 / tsc); 2509 if (DO_BIC(BIC_GFX_c0)) 2510 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0 / tsc); 2511 if (DO_BIC(BIC_CPUGFX)) 2512 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0 / tsc); 2513 2514 if (DO_BIC(BIC_Pkgpc2)) 2515 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2 / tsc); 2516 if (DO_BIC(BIC_Pkgpc3)) 2517 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3 / tsc); 2518 if (DO_BIC(BIC_Pkgpc6)) 2519 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6 / tsc); 2520 if (DO_BIC(BIC_Pkgpc7)) 2521 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7 / tsc); 2522 if (DO_BIC(BIC_Pkgpc8)) 2523 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8 / tsc); 2524 if (DO_BIC(BIC_Pkgpc9)) 2525 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9 / tsc); 2526 if (DO_BIC(BIC_Pkgpc10)) 2527 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc); 2528 2529 if (DO_BIC(BIC_CPU_LPI)) { 2530 if (p->cpu_lpi >= 0) 2531 outp += 2532 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 2533 100.0 * p->cpu_lpi / 1000000.0 / interval_float); 2534 else 2535 outp += sprintf(outp, "%s(neg)", (printed++ ? delim : "")); 2536 } 2537 if (DO_BIC(BIC_SYS_LPI)) { 2538 if (p->sys_lpi >= 0) 2539 outp += 2540 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 2541 100.0 * p->sys_lpi / 1000000.0 / interval_float); 2542 else 2543 outp += sprintf(outp, "%s(neg)", (printed++ ? delim : "")); 2544 } 2545 2546 if (DO_BIC(BIC_PkgWatt)) 2547 outp += 2548 sprintf(outp, fmt8, (printed++ ? delim : ""), 2549 rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_WATTS, interval_float)); 2550 if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl) 2551 outp += 2552 sprintf(outp, fmt8, (printed++ ? delim : ""), 2553 rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_WATTS, interval_float)); 2554 if (DO_BIC(BIC_GFXWatt)) 2555 outp += 2556 sprintf(outp, fmt8, (printed++ ? delim : ""), 2557 rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_WATTS, interval_float)); 2558 if (DO_BIC(BIC_RAMWatt)) 2559 outp += 2560 sprintf(outp, fmt8, (printed++ ? delim : ""), 2561 rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_WATTS, interval_float)); 2562 if (DO_BIC(BIC_Pkg_J)) 2563 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 2564 rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_JOULES, interval_float)); 2565 if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl) 2566 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 2567 rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_JOULES, interval_float)); 2568 if (DO_BIC(BIC_GFX_J)) 2569 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 2570 rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_JOULES, interval_float)); 2571 if (DO_BIC(BIC_RAM_J)) 2572 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 2573 rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_JOULES, interval_float)); 2574 if (DO_BIC(BIC_PKG__)) 2575 outp += 2576 sprintf(outp, fmt8, (printed++ ? delim : ""), 2577 rapl_counter_get_value(&p->rapl_pkg_perf_status, RAPL_UNIT_WATTS, interval_float)); 2578 if (DO_BIC(BIC_RAM__)) 2579 outp += 2580 sprintf(outp, fmt8, (printed++ ? delim : ""), 2581 rapl_counter_get_value(&p->rapl_dram_perf_status, RAPL_UNIT_WATTS, interval_float)); 2582 /* UncMHz */ 2583 if (DO_BIC(BIC_UNCORE_MHZ)) 2584 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->uncore_mhz); 2585 2586 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 2587 if (mp->format == FORMAT_RAW) { 2588 if (mp->width == 32) 2589 outp += 2590 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)p->counter[i]); 2591 else 2592 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]); 2593 } else if (mp->format == FORMAT_DELTA) { 2594 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2595 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]); 2596 else 2597 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]); 2598 } else if (mp->format == FORMAT_PERCENT) { 2599 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i] / tsc); 2600 } else if (mp->type == COUNTER_K2M) 2601 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->counter[i] / 1000); 2602 } 2603 2604 done: 2605 if (*(outp - 1) != '\n') 2606 outp += sprintf(outp, "\n"); 2607 2608 return 0; 2609 } 2610 2611 void flush_output_stdout(void) 2612 { 2613 FILE *filep; 2614 2615 if (outf == stderr) 2616 filep = stdout; 2617 else 2618 filep = outf; 2619 2620 fputs(output_buffer, filep); 2621 fflush(filep); 2622 2623 outp = output_buffer; 2624 } 2625 2626 void flush_output_stderr(void) 2627 { 2628 fputs(output_buffer, outf); 2629 fflush(outf); 2630 outp = output_buffer; 2631 } 2632 2633 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 2634 { 2635 static int count; 2636 2637 if ((!count || (header_iterations && !(count % header_iterations))) || !summary_only) 2638 print_header("\t"); 2639 2640 format_counters(&average.threads, &average.cores, &average.packages); 2641 2642 count++; 2643 2644 if (summary_only) 2645 return; 2646 2647 for_all_cpus(format_counters, t, c, p); 2648 } 2649 2650 #define DELTA_WRAP32(new, old) \ 2651 old = ((((unsigned long long)new << 32) - ((unsigned long long)old << 32)) >> 32); 2652 2653 int delta_package(struct pkg_data *new, struct pkg_data *old) 2654 { 2655 int i; 2656 struct msr_counter *mp; 2657 2658 if (DO_BIC(BIC_Totl_c0)) 2659 old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0; 2660 if (DO_BIC(BIC_Any_c0)) 2661 old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0; 2662 if (DO_BIC(BIC_GFX_c0)) 2663 old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0; 2664 if (DO_BIC(BIC_CPUGFX)) 2665 old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0; 2666 2667 old->pc2 = new->pc2 - old->pc2; 2668 if (DO_BIC(BIC_Pkgpc3)) 2669 old->pc3 = new->pc3 - old->pc3; 2670 if (DO_BIC(BIC_Pkgpc6)) 2671 old->pc6 = new->pc6 - old->pc6; 2672 if (DO_BIC(BIC_Pkgpc7)) 2673 old->pc7 = new->pc7 - old->pc7; 2674 old->pc8 = new->pc8 - old->pc8; 2675 old->pc9 = new->pc9 - old->pc9; 2676 old->pc10 = new->pc10 - old->pc10; 2677 old->cpu_lpi = new->cpu_lpi - old->cpu_lpi; 2678 old->sys_lpi = new->sys_lpi - old->sys_lpi; 2679 old->pkg_temp_c = new->pkg_temp_c; 2680 2681 /* flag an error when rc6 counter resets/wraps */ 2682 if (old->gfx_rc6_ms > new->gfx_rc6_ms) 2683 old->gfx_rc6_ms = -1; 2684 else 2685 old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms; 2686 2687 old->uncore_mhz = new->uncore_mhz; 2688 old->gfx_mhz = new->gfx_mhz; 2689 old->gfx_act_mhz = new->gfx_act_mhz; 2690 2691 /* flag an error when mc6 counter resets/wraps */ 2692 if (old->sam_mc6_ms > new->sam_mc6_ms) 2693 old->sam_mc6_ms = -1; 2694 else 2695 old->sam_mc6_ms = new->sam_mc6_ms - old->sam_mc6_ms; 2696 2697 old->sam_mhz = new->sam_mhz; 2698 old->sam_act_mhz = new->sam_act_mhz; 2699 2700 old->energy_pkg.raw_value = new->energy_pkg.raw_value - old->energy_pkg.raw_value; 2701 old->energy_cores.raw_value = new->energy_cores.raw_value - old->energy_cores.raw_value; 2702 old->energy_gfx.raw_value = new->energy_gfx.raw_value - old->energy_gfx.raw_value; 2703 old->energy_dram.raw_value = new->energy_dram.raw_value - old->energy_dram.raw_value; 2704 old->rapl_pkg_perf_status.raw_value = new->rapl_pkg_perf_status.raw_value - old->rapl_pkg_perf_status.raw_value; 2705 old->rapl_dram_perf_status.raw_value = 2706 new->rapl_dram_perf_status.raw_value - old->rapl_dram_perf_status.raw_value; 2707 2708 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 2709 if (mp->format == FORMAT_RAW) 2710 old->counter[i] = new->counter[i]; 2711 else if (mp->format == FORMAT_AVERAGE) 2712 old->counter[i] = new->counter[i]; 2713 else 2714 old->counter[i] = new->counter[i] - old->counter[i]; 2715 } 2716 2717 return 0; 2718 } 2719 2720 void delta_core(struct core_data *new, struct core_data *old) 2721 { 2722 int i; 2723 struct msr_counter *mp; 2724 2725 old->c3 = new->c3 - old->c3; 2726 old->c6 = new->c6 - old->c6; 2727 old->c7 = new->c7 - old->c7; 2728 old->core_temp_c = new->core_temp_c; 2729 old->core_throt_cnt = new->core_throt_cnt; 2730 old->mc6_us = new->mc6_us - old->mc6_us; 2731 2732 DELTA_WRAP32(new->core_energy.raw_value, old->core_energy.raw_value); 2733 2734 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 2735 if (mp->format == FORMAT_RAW) 2736 old->counter[i] = new->counter[i]; 2737 else 2738 old->counter[i] = new->counter[i] - old->counter[i]; 2739 } 2740 } 2741 2742 int soft_c1_residency_display(int bic) 2743 { 2744 if (!DO_BIC(BIC_CPU_c1) || platform->has_msr_core_c1_res) 2745 return 0; 2746 2747 return DO_BIC_READ(bic); 2748 } 2749 2750 /* 2751 * old = new - old 2752 */ 2753 int delta_thread(struct thread_data *new, struct thread_data *old, struct core_data *core_delta) 2754 { 2755 int i; 2756 struct msr_counter *mp; 2757 2758 /* we run cpuid just the 1st time, copy the results */ 2759 if (DO_BIC(BIC_APIC)) 2760 new->apic_id = old->apic_id; 2761 if (DO_BIC(BIC_X2APIC)) 2762 new->x2apic_id = old->x2apic_id; 2763 2764 /* 2765 * the timestamps from start of measurement interval are in "old" 2766 * the timestamp from end of measurement interval are in "new" 2767 * over-write old w/ new so we can print end of interval values 2768 */ 2769 2770 timersub(&new->tv_begin, &old->tv_begin, &old->tv_delta); 2771 old->tv_begin = new->tv_begin; 2772 old->tv_end = new->tv_end; 2773 2774 old->tsc = new->tsc - old->tsc; 2775 2776 /* check for TSC < 1 Mcycles over interval */ 2777 if (old->tsc < (1000 * 1000)) 2778 errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n" 2779 "You can disable all c-states by booting with \"idle=poll\"\n" 2780 "or just the deep ones with \"processor.max_cstate=1\""); 2781 2782 old->c1 = new->c1 - old->c1; 2783 2784 if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC) 2785 || soft_c1_residency_display(BIC_Avg_MHz)) { 2786 if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { 2787 old->aperf = new->aperf - old->aperf; 2788 old->mperf = new->mperf - old->mperf; 2789 } else { 2790 return -1; 2791 } 2792 } 2793 2794 if (platform->has_msr_core_c1_res) { 2795 /* 2796 * Some models have a dedicated C1 residency MSR, 2797 * which should be more accurate than the derivation below. 2798 */ 2799 } else { 2800 /* 2801 * As counter collection is not atomic, 2802 * it is possible for mperf's non-halted cycles + idle states 2803 * to exceed TSC's all cycles: show c1 = 0% in that case. 2804 */ 2805 if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak)) 2806 old->c1 = 0; 2807 else { 2808 /* normal case, derive c1 */ 2809 old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3 2810 - core_delta->c6 - core_delta->c7; 2811 } 2812 } 2813 2814 if (old->mperf == 0) { 2815 if (debug > 1) 2816 fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id); 2817 old->mperf = 1; /* divide by 0 protection */ 2818 } 2819 2820 if (DO_BIC(BIC_IPC)) 2821 old->instr_count = new->instr_count - old->instr_count; 2822 2823 if (DO_BIC(BIC_IRQ)) 2824 old->irq_count = new->irq_count - old->irq_count; 2825 2826 if (DO_BIC(BIC_SMI)) 2827 old->smi_count = new->smi_count - old->smi_count; 2828 2829 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 2830 if (mp->format == FORMAT_RAW) 2831 old->counter[i] = new->counter[i]; 2832 else 2833 old->counter[i] = new->counter[i] - old->counter[i]; 2834 } 2835 return 0; 2836 } 2837 2838 int delta_cpu(struct thread_data *t, struct core_data *c, 2839 struct pkg_data *p, struct thread_data *t2, struct core_data *c2, struct pkg_data *p2) 2840 { 2841 int retval = 0; 2842 2843 /* calculate core delta only for 1st thread in core */ 2844 if (is_cpu_first_thread_in_core(t, c, p)) 2845 delta_core(c, c2); 2846 2847 /* always calculate thread delta */ 2848 retval = delta_thread(t, t2, c2); /* c2 is core delta */ 2849 if (retval) 2850 return retval; 2851 2852 /* calculate package delta only for 1st core in package */ 2853 if (is_cpu_first_core_in_package(t, c, p)) 2854 retval = delta_package(p, p2); 2855 2856 return retval; 2857 } 2858 2859 void rapl_counter_clear(struct rapl_counter *c) 2860 { 2861 c->raw_value = 0; 2862 c->scale = 0.0; 2863 c->unit = RAPL_UNIT_INVALID; 2864 } 2865 2866 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 2867 { 2868 int i; 2869 struct msr_counter *mp; 2870 2871 t->tv_begin.tv_sec = 0; 2872 t->tv_begin.tv_usec = 0; 2873 t->tv_end.tv_sec = 0; 2874 t->tv_end.tv_usec = 0; 2875 t->tv_delta.tv_sec = 0; 2876 t->tv_delta.tv_usec = 0; 2877 2878 t->tsc = 0; 2879 t->aperf = 0; 2880 t->mperf = 0; 2881 t->c1 = 0; 2882 2883 t->instr_count = 0; 2884 2885 t->irq_count = 0; 2886 t->smi_count = 0; 2887 2888 c->c3 = 0; 2889 c->c6 = 0; 2890 c->c7 = 0; 2891 c->mc6_us = 0; 2892 c->core_temp_c = 0; 2893 rapl_counter_clear(&c->core_energy); 2894 c->core_throt_cnt = 0; 2895 2896 p->pkg_wtd_core_c0 = 0; 2897 p->pkg_any_core_c0 = 0; 2898 p->pkg_any_gfxe_c0 = 0; 2899 p->pkg_both_core_gfxe_c0 = 0; 2900 2901 p->pc2 = 0; 2902 if (DO_BIC(BIC_Pkgpc3)) 2903 p->pc3 = 0; 2904 if (DO_BIC(BIC_Pkgpc6)) 2905 p->pc6 = 0; 2906 if (DO_BIC(BIC_Pkgpc7)) 2907 p->pc7 = 0; 2908 p->pc8 = 0; 2909 p->pc9 = 0; 2910 p->pc10 = 0; 2911 p->cpu_lpi = 0; 2912 p->sys_lpi = 0; 2913 2914 rapl_counter_clear(&p->energy_pkg); 2915 rapl_counter_clear(&p->energy_dram); 2916 rapl_counter_clear(&p->energy_cores); 2917 rapl_counter_clear(&p->energy_gfx); 2918 rapl_counter_clear(&p->rapl_pkg_perf_status); 2919 rapl_counter_clear(&p->rapl_dram_perf_status); 2920 p->pkg_temp_c = 0; 2921 2922 p->gfx_rc6_ms = 0; 2923 p->uncore_mhz = 0; 2924 p->gfx_mhz = 0; 2925 p->gfx_act_mhz = 0; 2926 p->sam_mc6_ms = 0; 2927 p->sam_mhz = 0; 2928 p->sam_act_mhz = 0; 2929 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) 2930 t->counter[i] = 0; 2931 2932 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) 2933 c->counter[i] = 0; 2934 2935 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) 2936 p->counter[i] = 0; 2937 } 2938 2939 void rapl_counter_accumulate(struct rapl_counter *dst, const struct rapl_counter *src) 2940 { 2941 /* Copy unit and scale from src if dst is not initialized */ 2942 if (dst->unit == RAPL_UNIT_INVALID) { 2943 dst->unit = src->unit; 2944 dst->scale = src->scale; 2945 } 2946 2947 assert(dst->unit == src->unit); 2948 assert(dst->scale == src->scale); 2949 2950 dst->raw_value += src->raw_value; 2951 } 2952 2953 int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 2954 { 2955 int i; 2956 struct msr_counter *mp; 2957 2958 /* copy un-changing apic_id's */ 2959 if (DO_BIC(BIC_APIC)) 2960 average.threads.apic_id = t->apic_id; 2961 if (DO_BIC(BIC_X2APIC)) 2962 average.threads.x2apic_id = t->x2apic_id; 2963 2964 /* remember first tv_begin */ 2965 if (average.threads.tv_begin.tv_sec == 0) 2966 average.threads.tv_begin = t->tv_begin; 2967 2968 /* remember last tv_end */ 2969 average.threads.tv_end = t->tv_end; 2970 2971 average.threads.tsc += t->tsc; 2972 average.threads.aperf += t->aperf; 2973 average.threads.mperf += t->mperf; 2974 average.threads.c1 += t->c1; 2975 2976 average.threads.instr_count += t->instr_count; 2977 2978 average.threads.irq_count += t->irq_count; 2979 average.threads.smi_count += t->smi_count; 2980 2981 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 2982 if (mp->format == FORMAT_RAW) 2983 continue; 2984 average.threads.counter[i] += t->counter[i]; 2985 } 2986 2987 /* sum per-core values only for 1st thread in core */ 2988 if (!is_cpu_first_thread_in_core(t, c, p)) 2989 return 0; 2990 2991 average.cores.c3 += c->c3; 2992 average.cores.c6 += c->c6; 2993 average.cores.c7 += c->c7; 2994 average.cores.mc6_us += c->mc6_us; 2995 2996 average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c); 2997 average.cores.core_throt_cnt = MAX(average.cores.core_throt_cnt, c->core_throt_cnt); 2998 2999 rapl_counter_accumulate(&average.cores.core_energy, &c->core_energy); 3000 3001 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 3002 if (mp->format == FORMAT_RAW) 3003 continue; 3004 average.cores.counter[i] += c->counter[i]; 3005 } 3006 3007 /* sum per-pkg values only for 1st core in pkg */ 3008 if (!is_cpu_first_core_in_package(t, c, p)) 3009 return 0; 3010 3011 if (DO_BIC(BIC_Totl_c0)) 3012 average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0; 3013 if (DO_BIC(BIC_Any_c0)) 3014 average.packages.pkg_any_core_c0 += p->pkg_any_core_c0; 3015 if (DO_BIC(BIC_GFX_c0)) 3016 average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0; 3017 if (DO_BIC(BIC_CPUGFX)) 3018 average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0; 3019 3020 average.packages.pc2 += p->pc2; 3021 if (DO_BIC(BIC_Pkgpc3)) 3022 average.packages.pc3 += p->pc3; 3023 if (DO_BIC(BIC_Pkgpc6)) 3024 average.packages.pc6 += p->pc6; 3025 if (DO_BIC(BIC_Pkgpc7)) 3026 average.packages.pc7 += p->pc7; 3027 average.packages.pc8 += p->pc8; 3028 average.packages.pc9 += p->pc9; 3029 average.packages.pc10 += p->pc10; 3030 3031 average.packages.cpu_lpi = p->cpu_lpi; 3032 average.packages.sys_lpi = p->sys_lpi; 3033 3034 rapl_counter_accumulate(&average.packages.energy_pkg, &p->energy_pkg); 3035 rapl_counter_accumulate(&average.packages.energy_dram, &p->energy_dram); 3036 rapl_counter_accumulate(&average.packages.energy_cores, &p->energy_cores); 3037 rapl_counter_accumulate(&average.packages.energy_gfx, &p->energy_gfx); 3038 3039 average.packages.gfx_rc6_ms = p->gfx_rc6_ms; 3040 average.packages.uncore_mhz = p->uncore_mhz; 3041 average.packages.gfx_mhz = p->gfx_mhz; 3042 average.packages.gfx_act_mhz = p->gfx_act_mhz; 3043 average.packages.sam_mc6_ms = p->sam_mc6_ms; 3044 average.packages.sam_mhz = p->sam_mhz; 3045 average.packages.sam_act_mhz = p->sam_act_mhz; 3046 3047 average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c); 3048 3049 rapl_counter_accumulate(&average.packages.rapl_pkg_perf_status, &p->rapl_pkg_perf_status); 3050 rapl_counter_accumulate(&average.packages.rapl_dram_perf_status, &p->rapl_dram_perf_status); 3051 3052 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 3053 if ((mp->format == FORMAT_RAW) && (topo.num_packages == 0)) 3054 average.packages.counter[i] = p->counter[i]; 3055 else 3056 average.packages.counter[i] += p->counter[i]; 3057 } 3058 return 0; 3059 } 3060 3061 /* 3062 * sum the counters for all cpus in the system 3063 * compute the weighted average 3064 */ 3065 void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data *p) 3066 { 3067 int i; 3068 struct msr_counter *mp; 3069 3070 clear_counters(&average.threads, &average.cores, &average.packages); 3071 3072 for_all_cpus(sum_counters, t, c, p); 3073 3074 /* Use the global time delta for the average. */ 3075 average.threads.tv_delta = tv_delta; 3076 3077 average.threads.tsc /= topo.allowed_cpus; 3078 average.threads.aperf /= topo.allowed_cpus; 3079 average.threads.mperf /= topo.allowed_cpus; 3080 average.threads.instr_count /= topo.allowed_cpus; 3081 average.threads.c1 /= topo.allowed_cpus; 3082 3083 if (average.threads.irq_count > 9999999) 3084 sums_need_wide_columns = 1; 3085 3086 average.cores.c3 /= topo.allowed_cores; 3087 average.cores.c6 /= topo.allowed_cores; 3088 average.cores.c7 /= topo.allowed_cores; 3089 average.cores.mc6_us /= topo.allowed_cores; 3090 3091 if (DO_BIC(BIC_Totl_c0)) 3092 average.packages.pkg_wtd_core_c0 /= topo.allowed_packages; 3093 if (DO_BIC(BIC_Any_c0)) 3094 average.packages.pkg_any_core_c0 /= topo.allowed_packages; 3095 if (DO_BIC(BIC_GFX_c0)) 3096 average.packages.pkg_any_gfxe_c0 /= topo.allowed_packages; 3097 if (DO_BIC(BIC_CPUGFX)) 3098 average.packages.pkg_both_core_gfxe_c0 /= topo.allowed_packages; 3099 3100 average.packages.pc2 /= topo.allowed_packages; 3101 if (DO_BIC(BIC_Pkgpc3)) 3102 average.packages.pc3 /= topo.allowed_packages; 3103 if (DO_BIC(BIC_Pkgpc6)) 3104 average.packages.pc6 /= topo.allowed_packages; 3105 if (DO_BIC(BIC_Pkgpc7)) 3106 average.packages.pc7 /= topo.allowed_packages; 3107 3108 average.packages.pc8 /= topo.allowed_packages; 3109 average.packages.pc9 /= topo.allowed_packages; 3110 average.packages.pc10 /= topo.allowed_packages; 3111 3112 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 3113 if (mp->format == FORMAT_RAW) 3114 continue; 3115 if (mp->type == COUNTER_ITEMS) { 3116 if (average.threads.counter[i] > 9999999) 3117 sums_need_wide_columns = 1; 3118 continue; 3119 } 3120 average.threads.counter[i] /= topo.allowed_cpus; 3121 } 3122 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 3123 if (mp->format == FORMAT_RAW) 3124 continue; 3125 if (mp->type == COUNTER_ITEMS) { 3126 if (average.cores.counter[i] > 9999999) 3127 sums_need_wide_columns = 1; 3128 } 3129 average.cores.counter[i] /= topo.allowed_cores; 3130 } 3131 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 3132 if (mp->format == FORMAT_RAW) 3133 continue; 3134 if (mp->type == COUNTER_ITEMS) { 3135 if (average.packages.counter[i] > 9999999) 3136 sums_need_wide_columns = 1; 3137 } 3138 average.packages.counter[i] /= topo.allowed_packages; 3139 } 3140 } 3141 3142 static unsigned long long rdtsc(void) 3143 { 3144 unsigned int low, high; 3145 3146 asm volatile ("rdtsc":"=a" (low), "=d"(high)); 3147 3148 return low | ((unsigned long long)high) << 32; 3149 } 3150 3151 /* 3152 * Open a file, and exit on failure 3153 */ 3154 FILE *fopen_or_die(const char *path, const char *mode) 3155 { 3156 FILE *filep = fopen(path, mode); 3157 3158 if (!filep) 3159 err(1, "%s: open failed", path); 3160 return filep; 3161 } 3162 3163 /* 3164 * snapshot_sysfs_counter() 3165 * 3166 * return snapshot of given counter 3167 */ 3168 unsigned long long snapshot_sysfs_counter(char *path) 3169 { 3170 FILE *fp; 3171 int retval; 3172 unsigned long long counter; 3173 3174 fp = fopen_or_die(path, "r"); 3175 3176 retval = fscanf(fp, "%lld", &counter); 3177 if (retval != 1) 3178 err(1, "snapshot_sysfs_counter(%s)", path); 3179 3180 fclose(fp); 3181 3182 return counter; 3183 } 3184 3185 int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp, char *counter_path) 3186 { 3187 if (mp->msr_num != 0) { 3188 assert(!no_msr); 3189 if (get_msr(cpu, mp->msr_num, counterp)) 3190 return -1; 3191 } else { 3192 char path[128 + PATH_BYTES]; 3193 3194 if (mp->flags & SYSFS_PERCPU) { 3195 sprintf(path, "/sys/devices/system/cpu/cpu%d/%s", cpu, mp->sp->path); 3196 3197 *counterp = snapshot_sysfs_counter(path); 3198 } else { 3199 *counterp = snapshot_sysfs_counter(counter_path); 3200 } 3201 } 3202 3203 return 0; 3204 } 3205 3206 unsigned long long get_legacy_uncore_mhz(int package) 3207 { 3208 char path[128]; 3209 int die; 3210 static int warn_once; 3211 3212 /* 3213 * for this package, use the first die_id that exists 3214 */ 3215 for (die = 0; die <= topo.max_die_id; ++die) { 3216 3217 sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d/current_freq_khz", 3218 package, die); 3219 3220 if (access(path, R_OK) == 0) 3221 return (snapshot_sysfs_counter(path) / 1000); 3222 } 3223 if (!warn_once) { 3224 warnx("BUG: %s: No %s", __func__, path); 3225 warn_once = 1; 3226 } 3227 3228 return 0; 3229 } 3230 3231 int get_epb(int cpu) 3232 { 3233 char path[128 + PATH_BYTES]; 3234 unsigned long long msr; 3235 int ret, epb = -1; 3236 FILE *fp; 3237 3238 sprintf(path, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", cpu); 3239 3240 fp = fopen(path, "r"); 3241 if (!fp) 3242 goto msr_fallback; 3243 3244 ret = fscanf(fp, "%d", &epb); 3245 if (ret != 1) 3246 err(1, "%s(%s)", __func__, path); 3247 3248 fclose(fp); 3249 3250 return epb; 3251 3252 msr_fallback: 3253 if (no_msr) 3254 return -1; 3255 3256 get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr); 3257 3258 return msr & 0xf; 3259 } 3260 3261 void get_apic_id(struct thread_data *t) 3262 { 3263 unsigned int eax, ebx, ecx, edx; 3264 3265 if (DO_BIC(BIC_APIC)) { 3266 eax = ebx = ecx = edx = 0; 3267 __cpuid(1, eax, ebx, ecx, edx); 3268 3269 t->apic_id = (ebx >> 24) & 0xff; 3270 } 3271 3272 if (!DO_BIC(BIC_X2APIC)) 3273 return; 3274 3275 if (authentic_amd || hygon_genuine) { 3276 unsigned int topology_extensions; 3277 3278 if (max_extended_level < 0x8000001e) 3279 return; 3280 3281 eax = ebx = ecx = edx = 0; 3282 __cpuid(0x80000001, eax, ebx, ecx, edx); 3283 topology_extensions = ecx & (1 << 22); 3284 3285 if (topology_extensions == 0) 3286 return; 3287 3288 eax = ebx = ecx = edx = 0; 3289 __cpuid(0x8000001e, eax, ebx, ecx, edx); 3290 3291 t->x2apic_id = eax; 3292 return; 3293 } 3294 3295 if (!genuine_intel) 3296 return; 3297 3298 if (max_level < 0xb) 3299 return; 3300 3301 ecx = 0; 3302 __cpuid(0xb, eax, ebx, ecx, edx); 3303 t->x2apic_id = edx; 3304 3305 if (debug && (t->apic_id != (t->x2apic_id & 0xff))) 3306 fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id); 3307 } 3308 3309 int get_core_throt_cnt(int cpu, unsigned long long *cnt) 3310 { 3311 char path[128 + PATH_BYTES]; 3312 unsigned long long tmp; 3313 FILE *fp; 3314 int ret; 3315 3316 sprintf(path, "/sys/devices/system/cpu/cpu%d/thermal_throttle/core_throttle_count", cpu); 3317 fp = fopen(path, "r"); 3318 if (!fp) 3319 return -1; 3320 ret = fscanf(fp, "%lld", &tmp); 3321 fclose(fp); 3322 if (ret != 1) 3323 return -1; 3324 *cnt = tmp; 3325 3326 return 0; 3327 } 3328 3329 struct amperf_group_fd { 3330 int aperf; /* Also the group descriptor */ 3331 int mperf; 3332 }; 3333 3334 static int read_perf_counter_info(const char *const path, const char *const parse_format, void *value_ptr) 3335 { 3336 int fdmt; 3337 int bytes_read; 3338 char buf[64]; 3339 int ret = -1; 3340 3341 fdmt = open(path, O_RDONLY, 0); 3342 if (fdmt == -1) { 3343 if (debug) 3344 fprintf(stderr, "Failed to parse perf counter info %s\n", path); 3345 ret = -1; 3346 goto cleanup_and_exit; 3347 } 3348 3349 bytes_read = read(fdmt, buf, sizeof(buf) - 1); 3350 if (bytes_read <= 0 || bytes_read >= (int)sizeof(buf)) { 3351 if (debug) 3352 fprintf(stderr, "Failed to parse perf counter info %s\n", path); 3353 ret = -1; 3354 goto cleanup_and_exit; 3355 } 3356 3357 buf[bytes_read] = '\0'; 3358 3359 if (sscanf(buf, parse_format, value_ptr) != 1) { 3360 if (debug) 3361 fprintf(stderr, "Failed to parse perf counter info %s\n", path); 3362 ret = -1; 3363 goto cleanup_and_exit; 3364 } 3365 3366 ret = 0; 3367 3368 cleanup_and_exit: 3369 close(fdmt); 3370 return ret; 3371 } 3372 3373 static unsigned int read_perf_counter_info_n(const char *const path, const char *const parse_format) 3374 { 3375 unsigned int v; 3376 int status; 3377 3378 status = read_perf_counter_info(path, parse_format, &v); 3379 if (status) 3380 v = -1; 3381 3382 return v; 3383 } 3384 3385 static unsigned int read_msr_type(void) 3386 { 3387 const char *const path = "/sys/bus/event_source/devices/msr/type"; 3388 const char *const format = "%u"; 3389 3390 return read_perf_counter_info_n(path, format); 3391 } 3392 3393 static unsigned int read_aperf_config(void) 3394 { 3395 const char *const path = "/sys/bus/event_source/devices/msr/events/aperf"; 3396 const char *const format = "event=%x"; 3397 3398 return read_perf_counter_info_n(path, format); 3399 } 3400 3401 static unsigned int read_mperf_config(void) 3402 { 3403 const char *const path = "/sys/bus/event_source/devices/msr/events/mperf"; 3404 const char *const format = "event=%x"; 3405 3406 return read_perf_counter_info_n(path, format); 3407 } 3408 3409 static unsigned int read_perf_type(const char *subsys) 3410 { 3411 const char *const path_format = "/sys/bus/event_source/devices/%s/type"; 3412 const char *const format = "%u"; 3413 char path[128]; 3414 3415 snprintf(path, sizeof(path), path_format, subsys); 3416 3417 return read_perf_counter_info_n(path, format); 3418 } 3419 3420 static unsigned int read_rapl_config(const char *subsys, const char *event_name) 3421 { 3422 const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s"; 3423 const char *const format = "event=%x"; 3424 char path[128]; 3425 3426 snprintf(path, sizeof(path), path_format, subsys, event_name); 3427 3428 return read_perf_counter_info_n(path, format); 3429 } 3430 3431 static unsigned int read_perf_rapl_unit(const char *subsys, const char *event_name) 3432 { 3433 const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.unit"; 3434 const char *const format = "%s"; 3435 char path[128]; 3436 char unit_buffer[16]; 3437 3438 snprintf(path, sizeof(path), path_format, subsys, event_name); 3439 3440 read_perf_counter_info(path, format, &unit_buffer); 3441 if (strcmp("Joules", unit_buffer) == 0) 3442 return RAPL_UNIT_JOULES; 3443 3444 return RAPL_UNIT_INVALID; 3445 } 3446 3447 static double read_perf_rapl_scale(const char *subsys, const char *event_name) 3448 { 3449 const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.scale"; 3450 const char *const format = "%lf"; 3451 char path[128]; 3452 double scale; 3453 3454 snprintf(path, sizeof(path), path_format, subsys, event_name); 3455 3456 if (read_perf_counter_info(path, format, &scale)) 3457 return 0.0; 3458 3459 return scale; 3460 } 3461 3462 static struct amperf_group_fd open_amperf_fd(int cpu) 3463 { 3464 const unsigned int msr_type = read_msr_type(); 3465 const unsigned int aperf_config = read_aperf_config(); 3466 const unsigned int mperf_config = read_mperf_config(); 3467 struct amperf_group_fd fds = {.aperf = -1, .mperf = -1 }; 3468 3469 fds.aperf = open_perf_counter(cpu, msr_type, aperf_config, -1, PERF_FORMAT_GROUP); 3470 fds.mperf = open_perf_counter(cpu, msr_type, mperf_config, fds.aperf, PERF_FORMAT_GROUP); 3471 3472 return fds; 3473 } 3474 3475 static int get_amperf_fd(int cpu) 3476 { 3477 assert(fd_amperf_percpu); 3478 3479 if (fd_amperf_percpu[cpu].aperf) 3480 return fd_amperf_percpu[cpu].aperf; 3481 3482 fd_amperf_percpu[cpu] = open_amperf_fd(cpu); 3483 3484 return fd_amperf_percpu[cpu].aperf; 3485 } 3486 3487 /* Read APERF, MPERF and TSC using the perf API. */ 3488 static int read_aperf_mperf_tsc_perf(struct thread_data *t, int cpu) 3489 { 3490 union { 3491 struct { 3492 unsigned long nr_entries; 3493 unsigned long aperf; 3494 unsigned long mperf; 3495 }; 3496 3497 unsigned long as_array[3]; 3498 } cnt; 3499 3500 const int fd_amperf = get_amperf_fd(cpu); 3501 3502 /* 3503 * Read the TSC with rdtsc, because we want the absolute value and not 3504 * the offset from the start of the counter. 3505 */ 3506 t->tsc = rdtsc(); 3507 3508 const int n = read(fd_amperf, &cnt.as_array[0], sizeof(cnt.as_array)); 3509 3510 if (n != sizeof(cnt.as_array)) 3511 return -2; 3512 3513 t->aperf = cnt.aperf * aperf_mperf_multiplier; 3514 t->mperf = cnt.mperf * aperf_mperf_multiplier; 3515 3516 return 0; 3517 } 3518 3519 /* Read APERF, MPERF and TSC using the MSR driver and rdtsc instruction. */ 3520 static int read_aperf_mperf_tsc_msr(struct thread_data *t, int cpu) 3521 { 3522 unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time; 3523 int aperf_mperf_retry_count = 0; 3524 3525 /* 3526 * The TSC, APERF and MPERF must be read together for 3527 * APERF/MPERF and MPERF/TSC to give accurate results. 3528 * 3529 * Unfortunately, APERF and MPERF are read by 3530 * individual system call, so delays may occur 3531 * between them. If the time to read them 3532 * varies by a large amount, we re-read them. 3533 */ 3534 3535 /* 3536 * This initial dummy APERF read has been seen to 3537 * reduce jitter in the subsequent reads. 3538 */ 3539 3540 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) 3541 return -3; 3542 3543 retry: 3544 t->tsc = rdtsc(); /* re-read close to APERF */ 3545 3546 tsc_before = t->tsc; 3547 3548 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) 3549 return -3; 3550 3551 tsc_between = rdtsc(); 3552 3553 if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf)) 3554 return -4; 3555 3556 tsc_after = rdtsc(); 3557 3558 aperf_time = tsc_between - tsc_before; 3559 mperf_time = tsc_after - tsc_between; 3560 3561 /* 3562 * If the system call latency to read APERF and MPERF 3563 * differ by more than 2x, then try again. 3564 */ 3565 if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) { 3566 aperf_mperf_retry_count++; 3567 if (aperf_mperf_retry_count < 5) 3568 goto retry; 3569 else 3570 warnx("cpu%d jitter %lld %lld", cpu, aperf_time, mperf_time); 3571 } 3572 aperf_mperf_retry_count = 0; 3573 3574 t->aperf = t->aperf * aperf_mperf_multiplier; 3575 t->mperf = t->mperf * aperf_mperf_multiplier; 3576 3577 return 0; 3578 } 3579 3580 size_t rapl_counter_info_count_perf(const struct rapl_counter_info_t *rci) 3581 { 3582 size_t ret = 0; 3583 3584 for (int i = 0; i < NUM_RAPL_COUNTERS; ++i) 3585 if (rci->source[i] == RAPL_SOURCE_PERF) 3586 ++ret; 3587 3588 return ret; 3589 } 3590 3591 static size_t cstate_counter_info_count_perf(const struct cstate_counter_info_t *cci) 3592 { 3593 size_t ret = 0; 3594 3595 for (int i = 0; i < NUM_CSTATE_COUNTERS; ++i) 3596 if (cci->source[i] == CSTATE_SOURCE_PERF) 3597 ++ret; 3598 3599 return ret; 3600 } 3601 3602 void write_rapl_counter(struct rapl_counter *rc, struct rapl_counter_info_t *rci, unsigned int idx) 3603 { 3604 rc->raw_value = rci->data[idx]; 3605 rc->unit = rci->unit[idx]; 3606 rc->scale = rci->scale[idx]; 3607 } 3608 3609 int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct pkg_data *p) 3610 { 3611 unsigned long long perf_data[NUM_RAPL_COUNTERS + 1]; 3612 struct rapl_counter_info_t *rci; 3613 3614 if (debug) 3615 fprintf(stderr, "%s: cpu%d domain%d\n", __func__, cpu, domain); 3616 3617 assert(rapl_counter_info_perdomain); 3618 assert(domain < rapl_counter_info_perdomain_size); 3619 3620 rci = &rapl_counter_info_perdomain[domain]; 3621 3622 /* 3623 * If we have any perf counters to read, read them all now, in bulk 3624 */ 3625 if (rci->fd_perf != -1) { 3626 size_t num_perf_counters = rapl_counter_info_count_perf(rci); 3627 const ssize_t expected_read_size = (num_perf_counters + 1) * sizeof(unsigned long long); 3628 const ssize_t actual_read_size = read(rci->fd_perf, &perf_data[0], sizeof(perf_data)); 3629 3630 if (actual_read_size != expected_read_size) 3631 err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, 3632 actual_read_size); 3633 } 3634 3635 for (unsigned int i = 0, pi = 1; i < NUM_RAPL_COUNTERS; ++i) { 3636 switch (rci->source[i]) { 3637 case RAPL_SOURCE_NONE: 3638 break; 3639 3640 case RAPL_SOURCE_PERF: 3641 assert(pi < ARRAY_SIZE(perf_data)); 3642 assert(rci->fd_perf != -1); 3643 3644 if (debug) 3645 fprintf(stderr, "Reading rapl counter via perf at %u (%llu %e %lf)\n", 3646 i, perf_data[pi], rci->scale[i], perf_data[pi] * rci->scale[i]); 3647 3648 rci->data[i] = perf_data[pi]; 3649 3650 ++pi; 3651 break; 3652 3653 case RAPL_SOURCE_MSR: 3654 if (debug) 3655 fprintf(stderr, "Reading rapl counter via msr at %u\n", i); 3656 3657 assert(!no_msr); 3658 if (rci->flags[i] & RAPL_COUNTER_FLAG_USE_MSR_SUM) { 3659 if (get_msr_sum(cpu, rci->msr[i], &rci->data[i])) 3660 return -13 - i; 3661 } else { 3662 if (get_msr(cpu, rci->msr[i], &rci->data[i])) 3663 return -13 - i; 3664 } 3665 3666 rci->data[i] &= rci->msr_mask[i]; 3667 if (rci->msr_shift[i] >= 0) 3668 rci->data[i] >>= abs(rci->msr_shift[i]); 3669 else 3670 rci->data[i] <<= abs(rci->msr_shift[i]); 3671 3672 break; 3673 } 3674 } 3675 3676 BUILD_BUG_ON(NUM_RAPL_COUNTERS != 7); 3677 write_rapl_counter(&p->energy_pkg, rci, RAPL_RCI_INDEX_ENERGY_PKG); 3678 write_rapl_counter(&p->energy_cores, rci, RAPL_RCI_INDEX_ENERGY_CORES); 3679 write_rapl_counter(&p->energy_dram, rci, RAPL_RCI_INDEX_DRAM); 3680 write_rapl_counter(&p->energy_gfx, rci, RAPL_RCI_INDEX_GFX); 3681 write_rapl_counter(&p->rapl_pkg_perf_status, rci, RAPL_RCI_INDEX_PKG_PERF_STATUS); 3682 write_rapl_counter(&p->rapl_dram_perf_status, rci, RAPL_RCI_INDEX_DRAM_PERF_STATUS); 3683 write_rapl_counter(&c->core_energy, rci, RAPL_RCI_INDEX_CORE_ENERGY); 3684 3685 return 0; 3686 } 3687 3688 char *find_sysfs_path_by_id(struct sysfs_path *sp, int id) 3689 { 3690 while (sp) { 3691 if (sp->id == id) 3692 return (sp->path); 3693 sp = sp->next; 3694 } 3695 if (debug) 3696 warnx("%s: id%d not found", __func__, id); 3697 return NULL; 3698 } 3699 3700 int get_cstate_counters(unsigned int cpu, struct thread_data *t, struct core_data *c, struct pkg_data *p) 3701 { 3702 /* 3703 * Overcommit memory a little bit here, 3704 * but skip calculating exact sizes for the buffers. 3705 */ 3706 unsigned long long perf_data[NUM_CSTATE_COUNTERS]; 3707 unsigned long long perf_data_core[NUM_CSTATE_COUNTERS + 1]; 3708 unsigned long long perf_data_pkg[NUM_CSTATE_COUNTERS + 1]; 3709 3710 struct cstate_counter_info_t *cci; 3711 3712 if (debug) 3713 fprintf(stderr, "%s: cpu%d\n", __func__, cpu); 3714 3715 assert(ccstate_counter_info); 3716 assert(cpu <= ccstate_counter_info_size); 3717 3718 memset(perf_data, 0, sizeof(perf_data)); 3719 memset(perf_data_core, 0, sizeof(perf_data_core)); 3720 memset(perf_data_pkg, 0, sizeof(perf_data_pkg)); 3721 3722 cci = &ccstate_counter_info[cpu]; 3723 3724 /* 3725 * If we have any perf counters to read, read them all now, in bulk 3726 */ 3727 const size_t num_perf_counters = cstate_counter_info_count_perf(cci); 3728 ssize_t expected_read_size = num_perf_counters * sizeof(unsigned long long); 3729 ssize_t actual_read_size_core = 0, actual_read_size_pkg = 0; 3730 3731 if (cci->fd_perf_core != -1) { 3732 /* Each descriptor read begins with number of counters read. */ 3733 expected_read_size += sizeof(unsigned long long); 3734 3735 actual_read_size_core = read(cci->fd_perf_core, &perf_data_core[0], sizeof(perf_data_core)); 3736 3737 if (actual_read_size_core <= 0) 3738 err(-1, "%s: read perf %s: %ld", __func__, "core", actual_read_size_core); 3739 } 3740 3741 if (cci->fd_perf_pkg != -1) { 3742 /* Each descriptor read begins with number of counters read. */ 3743 expected_read_size += sizeof(unsigned long long); 3744 3745 actual_read_size_pkg = read(cci->fd_perf_pkg, &perf_data_pkg[0], sizeof(perf_data_pkg)); 3746 3747 if (actual_read_size_pkg <= 0) 3748 err(-1, "%s: read perf %s: %ld", __func__, "pkg", actual_read_size_pkg); 3749 } 3750 3751 const ssize_t actual_read_size_total = actual_read_size_core + actual_read_size_pkg; 3752 3753 if (actual_read_size_total != expected_read_size) 3754 err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, actual_read_size_total); 3755 3756 /* 3757 * Copy ccstate and pcstate data into unified buffer. 3758 * 3759 * Skip first element from core and pkg buffers. 3760 * Kernel puts there how many counters were read. 3761 */ 3762 const size_t num_core_counters = perf_data_core[0]; 3763 const size_t num_pkg_counters = perf_data_pkg[0]; 3764 3765 assert(num_perf_counters == num_core_counters + num_pkg_counters); 3766 3767 /* Copy ccstate perf data */ 3768 memcpy(&perf_data[0], &perf_data_core[1], num_core_counters * sizeof(unsigned long long)); 3769 3770 /* Copy pcstate perf data */ 3771 memcpy(&perf_data[num_core_counters], &perf_data_pkg[1], num_pkg_counters * sizeof(unsigned long long)); 3772 3773 for (unsigned int i = 0, pi = 0; i < NUM_CSTATE_COUNTERS; ++i) { 3774 switch (cci->source[i]) { 3775 case CSTATE_SOURCE_NONE: 3776 break; 3777 3778 case CSTATE_SOURCE_PERF: 3779 assert(pi < ARRAY_SIZE(perf_data)); 3780 assert(cci->fd_perf_core != -1 || cci->fd_perf_pkg != -1); 3781 3782 if (debug) { 3783 fprintf(stderr, "cstate via %s %u: %llu\n", "perf", i, perf_data[pi]); 3784 } 3785 3786 cci->data[i] = perf_data[pi]; 3787 3788 ++pi; 3789 break; 3790 3791 case CSTATE_SOURCE_MSR: 3792 assert(!no_msr); 3793 if (get_msr(cpu, cci->msr[i], &cci->data[i])) 3794 return -13 - i; 3795 3796 if (debug) { 3797 fprintf(stderr, "cstate via %s0x%llx %u: %llu\n", "msr", cci->msr[i], i, cci->data[i]); 3798 } 3799 3800 break; 3801 } 3802 } 3803 3804 /* 3805 * Helper to write the data only if the source of 3806 * the counter for the current cpu is not none. 3807 * 3808 * Otherwise we would overwrite core data with 0 (default value), 3809 * when invoked for the thread sibling. 3810 */ 3811 #define PERF_COUNTER_WRITE_DATA(out_counter, index) do { \ 3812 if (cci->source[index] != CSTATE_SOURCE_NONE) \ 3813 out_counter = cci->data[index]; \ 3814 } while (0) 3815 3816 BUILD_BUG_ON(NUM_CSTATE_COUNTERS != 11); 3817 3818 PERF_COUNTER_WRITE_DATA(t->c1, CCSTATE_RCI_INDEX_C1_RESIDENCY); 3819 PERF_COUNTER_WRITE_DATA(c->c3, CCSTATE_RCI_INDEX_C3_RESIDENCY); 3820 PERF_COUNTER_WRITE_DATA(c->c6, CCSTATE_RCI_INDEX_C6_RESIDENCY); 3821 PERF_COUNTER_WRITE_DATA(c->c7, CCSTATE_RCI_INDEX_C7_RESIDENCY); 3822 3823 PERF_COUNTER_WRITE_DATA(p->pc2, PCSTATE_RCI_INDEX_C2_RESIDENCY); 3824 PERF_COUNTER_WRITE_DATA(p->pc3, PCSTATE_RCI_INDEX_C3_RESIDENCY); 3825 PERF_COUNTER_WRITE_DATA(p->pc6, PCSTATE_RCI_INDEX_C6_RESIDENCY); 3826 PERF_COUNTER_WRITE_DATA(p->pc7, PCSTATE_RCI_INDEX_C7_RESIDENCY); 3827 PERF_COUNTER_WRITE_DATA(p->pc8, PCSTATE_RCI_INDEX_C8_RESIDENCY); 3828 PERF_COUNTER_WRITE_DATA(p->pc9, PCSTATE_RCI_INDEX_C9_RESIDENCY); 3829 PERF_COUNTER_WRITE_DATA(p->pc10, PCSTATE_RCI_INDEX_C10_RESIDENCY); 3830 3831 #undef PERF_COUNTER_WRITE_DATA 3832 3833 return 0; 3834 } 3835 3836 /* 3837 * get_counters(...) 3838 * migrate to cpu 3839 * acquire and record local counters for that cpu 3840 */ 3841 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 3842 { 3843 int cpu = t->cpu_id; 3844 unsigned long long msr; 3845 struct msr_counter *mp; 3846 int i; 3847 int status; 3848 3849 if (cpu_migrate(cpu)) { 3850 fprintf(outf, "%s: Could not migrate to CPU %d\n", __func__, cpu); 3851 return -1; 3852 } 3853 3854 gettimeofday(&t->tv_begin, (struct timezone *)NULL); 3855 3856 if (first_counter_read) 3857 get_apic_id(t); 3858 3859 t->tsc = rdtsc(); /* we are running on local CPU of interest */ 3860 3861 if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC) 3862 || soft_c1_residency_display(BIC_Avg_MHz)) { 3863 int status = -1; 3864 3865 assert(!no_perf || !no_msr); 3866 3867 switch (amperf_source) { 3868 case AMPERF_SOURCE_PERF: 3869 status = read_aperf_mperf_tsc_perf(t, cpu); 3870 break; 3871 case AMPERF_SOURCE_MSR: 3872 status = read_aperf_mperf_tsc_msr(t, cpu); 3873 break; 3874 } 3875 3876 if (status != 0) 3877 return status; 3878 } 3879 3880 if (DO_BIC(BIC_IPC)) 3881 if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long)) 3882 return -4; 3883 3884 if (DO_BIC(BIC_IRQ)) 3885 t->irq_count = irqs_per_cpu[cpu]; 3886 if (DO_BIC(BIC_SMI)) { 3887 if (get_msr(cpu, MSR_SMI_COUNT, &msr)) 3888 return -5; 3889 t->smi_count = msr & 0xFFFFFFFF; 3890 } 3891 3892 get_cstate_counters(cpu, t, c, p); 3893 3894 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 3895 if (get_mp(cpu, mp, &t->counter[i], mp->sp->path)) 3896 return -10; 3897 } 3898 3899 /* collect core counters only for 1st thread in core */ 3900 if (!is_cpu_first_thread_in_core(t, c, p)) 3901 goto done; 3902 3903 if (platform->has_per_core_rapl) { 3904 status = get_rapl_counters(cpu, c->core_id, c, p); 3905 if (status != 0) 3906 return status; 3907 } 3908 3909 if (DO_BIC(BIC_CPU_c7) && t->is_atom) { 3910 /* 3911 * For Atom CPUs that has core cstate deeper than c6, 3912 * MSR_CORE_C6_RESIDENCY returns residency of cc6 and deeper. 3913 * Minus CC7 (and deeper cstates) residency to get 3914 * accturate cc6 residency. 3915 */ 3916 c->c6 -= c->c7; 3917 } 3918 3919 if (DO_BIC(BIC_Mod_c6)) 3920 if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us)) 3921 return -8; 3922 3923 if (DO_BIC(BIC_CoreTmp)) { 3924 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) 3925 return -9; 3926 c->core_temp_c = tj_max - ((msr >> 16) & 0x7F); 3927 } 3928 3929 if (DO_BIC(BIC_CORE_THROT_CNT)) 3930 get_core_throt_cnt(cpu, &c->core_throt_cnt); 3931 3932 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 3933 if (get_mp(cpu, mp, &c->counter[i], mp->sp->path)) 3934 return -10; 3935 } 3936 3937 /* collect package counters only for 1st core in package */ 3938 if (!is_cpu_first_core_in_package(t, c, p)) 3939 goto done; 3940 3941 if (DO_BIC(BIC_Totl_c0)) { 3942 if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0)) 3943 return -10; 3944 } 3945 if (DO_BIC(BIC_Any_c0)) { 3946 if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0)) 3947 return -11; 3948 } 3949 if (DO_BIC(BIC_GFX_c0)) { 3950 if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0)) 3951 return -12; 3952 } 3953 if (DO_BIC(BIC_CPUGFX)) { 3954 if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0)) 3955 return -13; 3956 } 3957 3958 if (DO_BIC(BIC_CPU_LPI)) 3959 p->cpu_lpi = cpuidle_cur_cpu_lpi_us; 3960 if (DO_BIC(BIC_SYS_LPI)) 3961 p->sys_lpi = cpuidle_cur_sys_lpi_us; 3962 3963 if (!platform->has_per_core_rapl) { 3964 status = get_rapl_counters(cpu, p->package_id, c, p); 3965 if (status != 0) 3966 return status; 3967 } 3968 3969 if (DO_BIC(BIC_PkgTmp)) { 3970 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) 3971 return -17; 3972 p->pkg_temp_c = tj_max - ((msr >> 16) & 0x7F); 3973 } 3974 3975 if (DO_BIC(BIC_UNCORE_MHZ)) 3976 p->uncore_mhz = get_legacy_uncore_mhz(p->package_id); 3977 3978 if (DO_BIC(BIC_GFX_rc6)) 3979 p->gfx_rc6_ms = gfx_info[GFX_rc6].val_ull; 3980 3981 if (DO_BIC(BIC_GFXMHz)) 3982 p->gfx_mhz = gfx_info[GFX_MHz].val; 3983 3984 if (DO_BIC(BIC_GFXACTMHz)) 3985 p->gfx_act_mhz = gfx_info[GFX_ACTMHz].val; 3986 3987 if (DO_BIC(BIC_SAM_mc6)) 3988 p->sam_mc6_ms = gfx_info[SAM_mc6].val_ull; 3989 3990 if (DO_BIC(BIC_SAMMHz)) 3991 p->sam_mhz = gfx_info[SAM_MHz].val; 3992 3993 if (DO_BIC(BIC_SAMACTMHz)) 3994 p->sam_act_mhz = gfx_info[SAM_ACTMHz].val; 3995 3996 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 3997 char *path = NULL; 3998 3999 if (mp->msr_num == 0) { 4000 path = find_sysfs_path_by_id(mp->sp, p->package_id); 4001 if (path == NULL) { 4002 warnx("%s: package_id %d not found", __func__, p->package_id); 4003 return -10; 4004 } 4005 } 4006 if (get_mp(cpu, mp, &p->counter[i], path)) 4007 return -10; 4008 } 4009 done: 4010 gettimeofday(&t->tv_end, (struct timezone *)NULL); 4011 4012 return 0; 4013 } 4014 4015 int pkg_cstate_limit = PCLUKN; 4016 char *pkg_cstate_limit_strings[] = { "unknown", "reserved", "pc0", "pc1", "pc2", 4017 "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited" 4018 }; 4019 4020 int nhm_pkg_cstate_limits[16] = 4021 { PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4022 PCLRSV, PCLRSV 4023 }; 4024 4025 int snb_pkg_cstate_limits[16] = 4026 { PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4027 PCLRSV, PCLRSV 4028 }; 4029 4030 int hsw_pkg_cstate_limits[16] = 4031 { PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4032 PCLRSV, PCLRSV 4033 }; 4034 4035 int slv_pkg_cstate_limits[16] = 4036 { PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4037 PCL__6, PCL__7 4038 }; 4039 4040 int amt_pkg_cstate_limits[16] = 4041 { PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4042 PCLRSV, PCLRSV 4043 }; 4044 4045 int phi_pkg_cstate_limits[16] = 4046 { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4047 PCLRSV, PCLRSV 4048 }; 4049 4050 int glm_pkg_cstate_limits[16] = 4051 { PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4052 PCLRSV, PCLRSV 4053 }; 4054 4055 int skx_pkg_cstate_limits[16] = 4056 { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4057 PCLRSV, PCLRSV 4058 }; 4059 4060 int icx_pkg_cstate_limits[16] = 4061 { PCL__0, PCL__2, PCL__6, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4062 PCLRSV, PCLRSV 4063 }; 4064 4065 void probe_cst_limit(void) 4066 { 4067 unsigned long long msr; 4068 int *pkg_cstate_limits; 4069 4070 if (!platform->has_nhm_msrs || no_msr) 4071 return; 4072 4073 switch (platform->cst_limit) { 4074 case CST_LIMIT_NHM: 4075 pkg_cstate_limits = nhm_pkg_cstate_limits; 4076 break; 4077 case CST_LIMIT_SNB: 4078 pkg_cstate_limits = snb_pkg_cstate_limits; 4079 break; 4080 case CST_LIMIT_HSW: 4081 pkg_cstate_limits = hsw_pkg_cstate_limits; 4082 break; 4083 case CST_LIMIT_SKX: 4084 pkg_cstate_limits = skx_pkg_cstate_limits; 4085 break; 4086 case CST_LIMIT_ICX: 4087 pkg_cstate_limits = icx_pkg_cstate_limits; 4088 break; 4089 case CST_LIMIT_SLV: 4090 pkg_cstate_limits = slv_pkg_cstate_limits; 4091 break; 4092 case CST_LIMIT_AMT: 4093 pkg_cstate_limits = amt_pkg_cstate_limits; 4094 break; 4095 case CST_LIMIT_KNL: 4096 pkg_cstate_limits = phi_pkg_cstate_limits; 4097 break; 4098 case CST_LIMIT_GMT: 4099 pkg_cstate_limits = glm_pkg_cstate_limits; 4100 break; 4101 default: 4102 return; 4103 } 4104 4105 get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr); 4106 pkg_cstate_limit = pkg_cstate_limits[msr & 0xF]; 4107 } 4108 4109 static void dump_platform_info(void) 4110 { 4111 unsigned long long msr; 4112 unsigned int ratio; 4113 4114 if (!platform->has_nhm_msrs || no_msr) 4115 return; 4116 4117 get_msr(base_cpu, MSR_PLATFORM_INFO, &msr); 4118 4119 fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr); 4120 4121 ratio = (msr >> 40) & 0xFF; 4122 fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n", ratio, bclk, ratio * bclk); 4123 4124 ratio = (msr >> 8) & 0xFF; 4125 fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk); 4126 } 4127 4128 static void dump_power_ctl(void) 4129 { 4130 unsigned long long msr; 4131 4132 if (!platform->has_nhm_msrs || no_msr) 4133 return; 4134 4135 get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr); 4136 fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", 4137 base_cpu, msr, msr & 0x2 ? "EN" : "DIS"); 4138 4139 /* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */ 4140 if (platform->has_cst_prewake_bit) 4141 fprintf(outf, "C-state Pre-wake: %sabled\n", msr & 0x40000000 ? "DIS" : "EN"); 4142 4143 return; 4144 } 4145 4146 static void dump_turbo_ratio_limit2(void) 4147 { 4148 unsigned long long msr; 4149 unsigned int ratio; 4150 4151 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr); 4152 4153 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr); 4154 4155 ratio = (msr >> 8) & 0xFF; 4156 if (ratio) 4157 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n", ratio, bclk, ratio * bclk); 4158 4159 ratio = (msr >> 0) & 0xFF; 4160 if (ratio) 4161 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n", ratio, bclk, ratio * bclk); 4162 return; 4163 } 4164 4165 static void dump_turbo_ratio_limit1(void) 4166 { 4167 unsigned long long msr; 4168 unsigned int ratio; 4169 4170 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr); 4171 4172 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr); 4173 4174 ratio = (msr >> 56) & 0xFF; 4175 if (ratio) 4176 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n", ratio, bclk, ratio * bclk); 4177 4178 ratio = (msr >> 48) & 0xFF; 4179 if (ratio) 4180 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n", ratio, bclk, ratio * bclk); 4181 4182 ratio = (msr >> 40) & 0xFF; 4183 if (ratio) 4184 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n", ratio, bclk, ratio * bclk); 4185 4186 ratio = (msr >> 32) & 0xFF; 4187 if (ratio) 4188 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n", ratio, bclk, ratio * bclk); 4189 4190 ratio = (msr >> 24) & 0xFF; 4191 if (ratio) 4192 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n", ratio, bclk, ratio * bclk); 4193 4194 ratio = (msr >> 16) & 0xFF; 4195 if (ratio) 4196 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n", ratio, bclk, ratio * bclk); 4197 4198 ratio = (msr >> 8) & 0xFF; 4199 if (ratio) 4200 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n", ratio, bclk, ratio * bclk); 4201 4202 ratio = (msr >> 0) & 0xFF; 4203 if (ratio) 4204 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n", ratio, bclk, ratio * bclk); 4205 return; 4206 } 4207 4208 static void dump_turbo_ratio_limits(int trl_msr_offset) 4209 { 4210 unsigned long long msr, core_counts; 4211 int shift; 4212 4213 get_msr(base_cpu, trl_msr_offset, &msr); 4214 fprintf(outf, "cpu%d: MSR_%sTURBO_RATIO_LIMIT: 0x%08llx\n", 4215 base_cpu, trl_msr_offset == MSR_SECONDARY_TURBO_RATIO_LIMIT ? "SECONDARY_" : "", msr); 4216 4217 if (platform->trl_msrs & TRL_CORECOUNT) { 4218 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts); 4219 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts); 4220 } else { 4221 core_counts = 0x0807060504030201; 4222 } 4223 4224 for (shift = 56; shift >= 0; shift -= 8) { 4225 unsigned int ratio, group_size; 4226 4227 ratio = (msr >> shift) & 0xFF; 4228 group_size = (core_counts >> shift) & 0xFF; 4229 if (ratio) 4230 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n", 4231 ratio, bclk, ratio * bclk, group_size); 4232 } 4233 4234 return; 4235 } 4236 4237 static void dump_atom_turbo_ratio_limits(void) 4238 { 4239 unsigned long long msr; 4240 unsigned int ratio; 4241 4242 get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr); 4243 fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF); 4244 4245 ratio = (msr >> 0) & 0x3F; 4246 if (ratio) 4247 fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n", ratio, bclk, ratio * bclk); 4248 4249 ratio = (msr >> 8) & 0x3F; 4250 if (ratio) 4251 fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n", ratio, bclk, ratio * bclk); 4252 4253 ratio = (msr >> 16) & 0x3F; 4254 if (ratio) 4255 fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk); 4256 4257 get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr); 4258 fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF); 4259 4260 ratio = (msr >> 24) & 0x3F; 4261 if (ratio) 4262 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n", ratio, bclk, ratio * bclk); 4263 4264 ratio = (msr >> 16) & 0x3F; 4265 if (ratio) 4266 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n", ratio, bclk, ratio * bclk); 4267 4268 ratio = (msr >> 8) & 0x3F; 4269 if (ratio) 4270 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n", ratio, bclk, ratio * bclk); 4271 4272 ratio = (msr >> 0) & 0x3F; 4273 if (ratio) 4274 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n", ratio, bclk, ratio * bclk); 4275 } 4276 4277 static void dump_knl_turbo_ratio_limits(void) 4278 { 4279 const unsigned int buckets_no = 7; 4280 4281 unsigned long long msr; 4282 int delta_cores, delta_ratio; 4283 int i, b_nr; 4284 unsigned int cores[buckets_no]; 4285 unsigned int ratio[buckets_no]; 4286 4287 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr); 4288 4289 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr); 4290 4291 /* 4292 * Turbo encoding in KNL is as follows: 4293 * [0] -- Reserved 4294 * [7:1] -- Base value of number of active cores of bucket 1. 4295 * [15:8] -- Base value of freq ratio of bucket 1. 4296 * [20:16] -- +ve delta of number of active cores of bucket 2. 4297 * i.e. active cores of bucket 2 = 4298 * active cores of bucket 1 + delta 4299 * [23:21] -- Negative delta of freq ratio of bucket 2. 4300 * i.e. freq ratio of bucket 2 = 4301 * freq ratio of bucket 1 - delta 4302 * [28:24]-- +ve delta of number of active cores of bucket 3. 4303 * [31:29]-- -ve delta of freq ratio of bucket 3. 4304 * [36:32]-- +ve delta of number of active cores of bucket 4. 4305 * [39:37]-- -ve delta of freq ratio of bucket 4. 4306 * [44:40]-- +ve delta of number of active cores of bucket 5. 4307 * [47:45]-- -ve delta of freq ratio of bucket 5. 4308 * [52:48]-- +ve delta of number of active cores of bucket 6. 4309 * [55:53]-- -ve delta of freq ratio of bucket 6. 4310 * [60:56]-- +ve delta of number of active cores of bucket 7. 4311 * [63:61]-- -ve delta of freq ratio of bucket 7. 4312 */ 4313 4314 b_nr = 0; 4315 cores[b_nr] = (msr & 0xFF) >> 1; 4316 ratio[b_nr] = (msr >> 8) & 0xFF; 4317 4318 for (i = 16; i < 64; i += 8) { 4319 delta_cores = (msr >> i) & 0x1F; 4320 delta_ratio = (msr >> (i + 5)) & 0x7; 4321 4322 cores[b_nr + 1] = cores[b_nr] + delta_cores; 4323 ratio[b_nr + 1] = ratio[b_nr] - delta_ratio; 4324 b_nr++; 4325 } 4326 4327 for (i = buckets_no - 1; i >= 0; i--) 4328 if (i > 0 ? ratio[i] != ratio[i - 1] : 1) 4329 fprintf(outf, 4330 "%d * %.1f = %.1f MHz max turbo %d active cores\n", 4331 ratio[i], bclk, ratio[i] * bclk, cores[i]); 4332 } 4333 4334 static void dump_cst_cfg(void) 4335 { 4336 unsigned long long msr; 4337 4338 if (!platform->has_nhm_msrs || no_msr) 4339 return; 4340 4341 get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr); 4342 4343 fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr); 4344 4345 fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)", 4346 (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", 4347 (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", 4348 (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", 4349 (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", 4350 (msr & (1 << 15)) ? "" : "UN", (unsigned int)msr & 0xF, pkg_cstate_limit_strings[pkg_cstate_limit]); 4351 4352 #define AUTOMATIC_CSTATE_CONVERSION (1UL << 16) 4353 if (platform->has_cst_auto_convension) { 4354 fprintf(outf, ", automatic c-state conversion=%s", (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off"); 4355 } 4356 4357 fprintf(outf, ")\n"); 4358 4359 return; 4360 } 4361 4362 static void dump_config_tdp(void) 4363 { 4364 unsigned long long msr; 4365 4366 get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr); 4367 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr); 4368 fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF); 4369 4370 get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr); 4371 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr); 4372 if (msr) { 4373 fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF); 4374 fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF); 4375 fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF); 4376 fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF); 4377 } 4378 fprintf(outf, ")\n"); 4379 4380 get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr); 4381 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr); 4382 if (msr) { 4383 fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF); 4384 fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF); 4385 fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF); 4386 fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF); 4387 } 4388 fprintf(outf, ")\n"); 4389 4390 get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr); 4391 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr); 4392 if ((msr) & 0x3) 4393 fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3); 4394 fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1); 4395 fprintf(outf, ")\n"); 4396 4397 get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr); 4398 fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr); 4399 fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF); 4400 fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1); 4401 fprintf(outf, ")\n"); 4402 } 4403 4404 unsigned int irtl_time_units[] = { 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 }; 4405 4406 void print_irtl(void) 4407 { 4408 unsigned long long msr; 4409 4410 if (!platform->has_irtl_msrs || no_msr) 4411 return; 4412 4413 if (platform->supported_cstates & PC3) { 4414 get_msr(base_cpu, MSR_PKGC3_IRTL, &msr); 4415 fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr); 4416 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 4417 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 4418 } 4419 4420 if (platform->supported_cstates & PC6) { 4421 get_msr(base_cpu, MSR_PKGC6_IRTL, &msr); 4422 fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr); 4423 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 4424 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 4425 } 4426 4427 if (platform->supported_cstates & PC7) { 4428 get_msr(base_cpu, MSR_PKGC7_IRTL, &msr); 4429 fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr); 4430 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 4431 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 4432 } 4433 4434 if (platform->supported_cstates & PC8) { 4435 get_msr(base_cpu, MSR_PKGC8_IRTL, &msr); 4436 fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr); 4437 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 4438 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 4439 } 4440 4441 if (platform->supported_cstates & PC9) { 4442 get_msr(base_cpu, MSR_PKGC9_IRTL, &msr); 4443 fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr); 4444 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 4445 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 4446 } 4447 4448 if (platform->supported_cstates & PC10) { 4449 get_msr(base_cpu, MSR_PKGC10_IRTL, &msr); 4450 fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr); 4451 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 4452 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 4453 } 4454 } 4455 4456 void free_fd_percpu(void) 4457 { 4458 int i; 4459 4460 if (!fd_percpu) 4461 return; 4462 4463 for (i = 0; i < topo.max_cpu_num + 1; ++i) { 4464 if (fd_percpu[i] != 0) 4465 close(fd_percpu[i]); 4466 } 4467 4468 free(fd_percpu); 4469 fd_percpu = NULL; 4470 } 4471 4472 void free_fd_amperf_percpu(void) 4473 { 4474 int i; 4475 4476 if (!fd_amperf_percpu) 4477 return; 4478 4479 for (i = 0; i < topo.max_cpu_num + 1; ++i) { 4480 if (fd_amperf_percpu[i].mperf != 0) 4481 close(fd_amperf_percpu[i].mperf); 4482 4483 if (fd_amperf_percpu[i].aperf != 0) 4484 close(fd_amperf_percpu[i].aperf); 4485 } 4486 4487 free(fd_amperf_percpu); 4488 fd_amperf_percpu = NULL; 4489 } 4490 4491 void free_fd_instr_count_percpu(void) 4492 { 4493 if (!fd_instr_count_percpu) 4494 return; 4495 4496 for (int i = 0; i < topo.max_cpu_num + 1; ++i) { 4497 if (fd_instr_count_percpu[i] != 0) 4498 close(fd_instr_count_percpu[i]); 4499 } 4500 4501 free(fd_instr_count_percpu); 4502 fd_instr_count_percpu = NULL; 4503 } 4504 4505 void free_fd_cstate(void) 4506 { 4507 if (!ccstate_counter_info) 4508 return; 4509 4510 const int counter_info_num = ccstate_counter_info_size; 4511 4512 for (int counter_id = 0; counter_id < counter_info_num; ++counter_id) { 4513 if (ccstate_counter_info[counter_id].fd_perf_core != -1) 4514 close(ccstate_counter_info[counter_id].fd_perf_core); 4515 4516 if (ccstate_counter_info[counter_id].fd_perf_pkg != -1) 4517 close(ccstate_counter_info[counter_id].fd_perf_pkg); 4518 } 4519 4520 free(ccstate_counter_info); 4521 ccstate_counter_info = NULL; 4522 ccstate_counter_info_size = 0; 4523 } 4524 4525 void free_fd_rapl_percpu(void) 4526 { 4527 if (!rapl_counter_info_perdomain) 4528 return; 4529 4530 const int num_domains = rapl_counter_info_perdomain_size; 4531 4532 for (int domain_id = 0; domain_id < num_domains; ++domain_id) { 4533 if (rapl_counter_info_perdomain[domain_id].fd_perf != -1) 4534 close(rapl_counter_info_perdomain[domain_id].fd_perf); 4535 } 4536 4537 free(rapl_counter_info_perdomain); 4538 rapl_counter_info_perdomain = NULL; 4539 rapl_counter_info_perdomain_size = 0; 4540 } 4541 4542 void free_all_buffers(void) 4543 { 4544 int i; 4545 4546 CPU_FREE(cpu_present_set); 4547 cpu_present_set = NULL; 4548 cpu_present_setsize = 0; 4549 4550 CPU_FREE(cpu_effective_set); 4551 cpu_effective_set = NULL; 4552 cpu_effective_setsize = 0; 4553 4554 CPU_FREE(cpu_allowed_set); 4555 cpu_allowed_set = NULL; 4556 cpu_allowed_setsize = 0; 4557 4558 CPU_FREE(cpu_affinity_set); 4559 cpu_affinity_set = NULL; 4560 cpu_affinity_setsize = 0; 4561 4562 free(thread_even); 4563 free(core_even); 4564 free(package_even); 4565 4566 thread_even = NULL; 4567 core_even = NULL; 4568 package_even = NULL; 4569 4570 free(thread_odd); 4571 free(core_odd); 4572 free(package_odd); 4573 4574 thread_odd = NULL; 4575 core_odd = NULL; 4576 package_odd = NULL; 4577 4578 free(output_buffer); 4579 output_buffer = NULL; 4580 outp = NULL; 4581 4582 free_fd_percpu(); 4583 free_fd_instr_count_percpu(); 4584 free_fd_amperf_percpu(); 4585 free_fd_rapl_percpu(); 4586 free_fd_cstate(); 4587 4588 free(irq_column_2_cpu); 4589 free(irqs_per_cpu); 4590 4591 for (i = 0; i <= topo.max_cpu_num; ++i) { 4592 if (cpus[i].put_ids) 4593 CPU_FREE(cpus[i].put_ids); 4594 } 4595 free(cpus); 4596 } 4597 4598 /* 4599 * Parse a file containing a single int. 4600 * Return 0 if file can not be opened 4601 * Exit if file can be opened, but can not be parsed 4602 */ 4603 int parse_int_file(const char *fmt, ...) 4604 { 4605 va_list args; 4606 char path[PATH_MAX]; 4607 FILE *filep; 4608 int value; 4609 4610 va_start(args, fmt); 4611 vsnprintf(path, sizeof(path), fmt, args); 4612 va_end(args); 4613 filep = fopen(path, "r"); 4614 if (!filep) 4615 return 0; 4616 if (fscanf(filep, "%d", &value) != 1) 4617 err(1, "%s: failed to parse number from file", path); 4618 fclose(filep); 4619 return value; 4620 } 4621 4622 /* 4623 * cpu_is_first_core_in_package(cpu) 4624 * return 1 if given CPU is 1st core in package 4625 */ 4626 int cpu_is_first_core_in_package(int cpu) 4627 { 4628 return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu); 4629 } 4630 4631 int get_physical_package_id(int cpu) 4632 { 4633 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu); 4634 } 4635 4636 int get_die_id(int cpu) 4637 { 4638 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/die_id", cpu); 4639 } 4640 4641 int get_core_id(int cpu) 4642 { 4643 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu); 4644 } 4645 4646 void set_node_data(void) 4647 { 4648 int pkg, node, lnode, cpu, cpux; 4649 int cpu_count; 4650 4651 /* initialize logical_node_id */ 4652 for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) 4653 cpus[cpu].logical_node_id = -1; 4654 4655 cpu_count = 0; 4656 for (pkg = 0; pkg < topo.num_packages; pkg++) { 4657 lnode = 0; 4658 for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) { 4659 if (cpus[cpu].physical_package_id != pkg) 4660 continue; 4661 /* find a cpu with an unset logical_node_id */ 4662 if (cpus[cpu].logical_node_id != -1) 4663 continue; 4664 cpus[cpu].logical_node_id = lnode; 4665 node = cpus[cpu].physical_node_id; 4666 cpu_count++; 4667 /* 4668 * find all matching cpus on this pkg and set 4669 * the logical_node_id 4670 */ 4671 for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) { 4672 if ((cpus[cpux].physical_package_id == pkg) && (cpus[cpux].physical_node_id == node)) { 4673 cpus[cpux].logical_node_id = lnode; 4674 cpu_count++; 4675 } 4676 } 4677 lnode++; 4678 if (lnode > topo.nodes_per_pkg) 4679 topo.nodes_per_pkg = lnode; 4680 } 4681 if (cpu_count >= topo.max_cpu_num) 4682 break; 4683 } 4684 } 4685 4686 int get_physical_node_id(struct cpu_topology *thiscpu) 4687 { 4688 char path[80]; 4689 FILE *filep; 4690 int i; 4691 int cpu = thiscpu->logical_cpu_id; 4692 4693 for (i = 0; i <= topo.max_cpu_num; i++) { 4694 sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist", cpu, i); 4695 filep = fopen(path, "r"); 4696 if (!filep) 4697 continue; 4698 fclose(filep); 4699 return i; 4700 } 4701 return -1; 4702 } 4703 4704 static int parse_cpu_str(char *cpu_str, cpu_set_t *cpu_set, int cpu_set_size) 4705 { 4706 unsigned int start, end; 4707 char *next = cpu_str; 4708 4709 while (next && *next) { 4710 4711 if (*next == '-') /* no negative cpu numbers */ 4712 return 1; 4713 4714 start = strtoul(next, &next, 10); 4715 4716 if (start >= CPU_SUBSET_MAXCPUS) 4717 return 1; 4718 CPU_SET_S(start, cpu_set_size, cpu_set); 4719 4720 if (*next == '\0' || *next == '\n') 4721 break; 4722 4723 if (*next == ',') { 4724 next += 1; 4725 continue; 4726 } 4727 4728 if (*next == '-') { 4729 next += 1; /* start range */ 4730 } else if (*next == '.') { 4731 next += 1; 4732 if (*next == '.') 4733 next += 1; /* start range */ 4734 else 4735 return 1; 4736 } 4737 4738 end = strtoul(next, &next, 10); 4739 if (end <= start) 4740 return 1; 4741 4742 while (++start <= end) { 4743 if (start >= CPU_SUBSET_MAXCPUS) 4744 return 1; 4745 CPU_SET_S(start, cpu_set_size, cpu_set); 4746 } 4747 4748 if (*next == ',') 4749 next += 1; 4750 else if (*next != '\0' && *next != '\n') 4751 return 1; 4752 } 4753 4754 return 0; 4755 } 4756 4757 int get_thread_siblings(struct cpu_topology *thiscpu) 4758 { 4759 char path[80], character; 4760 FILE *filep; 4761 unsigned long map; 4762 int so, shift, sib_core; 4763 int cpu = thiscpu->logical_cpu_id; 4764 int offset = topo.max_cpu_num + 1; 4765 size_t size; 4766 int thread_id = 0; 4767 4768 thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1)); 4769 if (thiscpu->thread_id < 0) 4770 thiscpu->thread_id = thread_id++; 4771 if (!thiscpu->put_ids) 4772 return -1; 4773 4774 size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 4775 CPU_ZERO_S(size, thiscpu->put_ids); 4776 4777 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu); 4778 filep = fopen(path, "r"); 4779 4780 if (!filep) { 4781 warnx("%s: open failed", path); 4782 return -1; 4783 } 4784 do { 4785 offset -= BITMASK_SIZE; 4786 if (fscanf(filep, "%lx%c", &map, &character) != 2) 4787 err(1, "%s: failed to parse file", path); 4788 for (shift = 0; shift < BITMASK_SIZE; shift++) { 4789 if ((map >> shift) & 0x1) { 4790 so = shift + offset; 4791 sib_core = get_core_id(so); 4792 if (sib_core == thiscpu->physical_core_id) { 4793 CPU_SET_S(so, size, thiscpu->put_ids); 4794 if ((so != cpu) && (cpus[so].thread_id < 0)) 4795 cpus[so].thread_id = thread_id++; 4796 } 4797 } 4798 } 4799 } while (character == ','); 4800 fclose(filep); 4801 4802 return CPU_COUNT_S(size, thiscpu->put_ids); 4803 } 4804 4805 /* 4806 * run func(thread, core, package) in topology order 4807 * skip non-present cpus 4808 */ 4809 4810 int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *, 4811 struct pkg_data *, struct thread_data *, struct core_data *, 4812 struct pkg_data *), struct thread_data *thread_base, 4813 struct core_data *core_base, struct pkg_data *pkg_base, 4814 struct thread_data *thread_base2, struct core_data *core_base2, struct pkg_data *pkg_base2) 4815 { 4816 int retval, pkg_no, node_no, core_no, thread_no; 4817 4818 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { 4819 for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) { 4820 for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { 4821 for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) { 4822 struct thread_data *t, *t2; 4823 struct core_data *c, *c2; 4824 struct pkg_data *p, *p2; 4825 4826 t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no); 4827 4828 if (cpu_is_not_allowed(t->cpu_id)) 4829 continue; 4830 4831 t2 = GET_THREAD(thread_base2, thread_no, core_no, node_no, pkg_no); 4832 4833 c = GET_CORE(core_base, core_no, node_no, pkg_no); 4834 c2 = GET_CORE(core_base2, core_no, node_no, pkg_no); 4835 4836 p = GET_PKG(pkg_base, pkg_no); 4837 p2 = GET_PKG(pkg_base2, pkg_no); 4838 4839 retval = func(t, c, p, t2, c2, p2); 4840 if (retval) 4841 return retval; 4842 } 4843 } 4844 } 4845 } 4846 return 0; 4847 } 4848 4849 /* 4850 * run func(cpu) on every cpu in /proc/stat 4851 * return max_cpu number 4852 */ 4853 int for_all_proc_cpus(int (func) (int)) 4854 { 4855 FILE *fp; 4856 int cpu_num; 4857 int retval; 4858 4859 fp = fopen_or_die(proc_stat, "r"); 4860 4861 retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n"); 4862 if (retval != 0) 4863 err(1, "%s: failed to parse format", proc_stat); 4864 4865 while (1) { 4866 retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num); 4867 if (retval != 1) 4868 break; 4869 4870 retval = func(cpu_num); 4871 if (retval) { 4872 fclose(fp); 4873 return (retval); 4874 } 4875 } 4876 fclose(fp); 4877 return 0; 4878 } 4879 4880 #define PATH_EFFECTIVE_CPUS "/sys/fs/cgroup/cpuset.cpus.effective" 4881 4882 static char cpu_effective_str[1024]; 4883 4884 static int update_effective_str(bool startup) 4885 { 4886 FILE *fp; 4887 char *pos; 4888 char buf[1024]; 4889 int ret; 4890 4891 if (cpu_effective_str[0] == '\0' && !startup) 4892 return 0; 4893 4894 fp = fopen(PATH_EFFECTIVE_CPUS, "r"); 4895 if (!fp) 4896 return 0; 4897 4898 pos = fgets(buf, 1024, fp); 4899 if (!pos) 4900 err(1, "%s: file read failed\n", PATH_EFFECTIVE_CPUS); 4901 4902 fclose(fp); 4903 4904 ret = strncmp(cpu_effective_str, buf, 1024); 4905 if (!ret) 4906 return 0; 4907 4908 strncpy(cpu_effective_str, buf, 1024); 4909 return 1; 4910 } 4911 4912 static void update_effective_set(bool startup) 4913 { 4914 update_effective_str(startup); 4915 4916 if (parse_cpu_str(cpu_effective_str, cpu_effective_set, cpu_effective_setsize)) 4917 err(1, "%s: cpu str malformat %s\n", PATH_EFFECTIVE_CPUS, cpu_effective_str); 4918 } 4919 4920 void linux_perf_init(void); 4921 void rapl_perf_init(void); 4922 void cstate_perf_init(void); 4923 4924 void re_initialize(void) 4925 { 4926 free_all_buffers(); 4927 setup_all_buffers(false); 4928 linux_perf_init(); 4929 rapl_perf_init(); 4930 cstate_perf_init(); 4931 fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, 4932 topo.allowed_cpus); 4933 } 4934 4935 void set_max_cpu_num(void) 4936 { 4937 FILE *filep; 4938 int base_cpu; 4939 unsigned long dummy; 4940 char pathname[64]; 4941 4942 base_cpu = sched_getcpu(); 4943 if (base_cpu < 0) 4944 err(1, "cannot find calling cpu ID"); 4945 sprintf(pathname, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", base_cpu); 4946 4947 filep = fopen_or_die(pathname, "r"); 4948 topo.max_cpu_num = 0; 4949 while (fscanf(filep, "%lx,", &dummy) == 1) 4950 topo.max_cpu_num += BITMASK_SIZE; 4951 fclose(filep); 4952 topo.max_cpu_num--; /* 0 based */ 4953 } 4954 4955 /* 4956 * count_cpus() 4957 * remember the last one seen, it will be the max 4958 */ 4959 int count_cpus(int cpu) 4960 { 4961 UNUSED(cpu); 4962 4963 topo.num_cpus++; 4964 return 0; 4965 } 4966 4967 int mark_cpu_present(int cpu) 4968 { 4969 CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set); 4970 return 0; 4971 } 4972 4973 int init_thread_id(int cpu) 4974 { 4975 cpus[cpu].thread_id = -1; 4976 return 0; 4977 } 4978 4979 /* 4980 * snapshot_proc_interrupts() 4981 * 4982 * read and record summary of /proc/interrupts 4983 * 4984 * return 1 if config change requires a restart, else return 0 4985 */ 4986 int snapshot_proc_interrupts(void) 4987 { 4988 static FILE *fp; 4989 int column, retval; 4990 4991 if (fp == NULL) 4992 fp = fopen_or_die("/proc/interrupts", "r"); 4993 else 4994 rewind(fp); 4995 4996 /* read 1st line of /proc/interrupts to get cpu* name for each column */ 4997 for (column = 0; column < topo.num_cpus; ++column) { 4998 int cpu_number; 4999 5000 retval = fscanf(fp, " CPU%d", &cpu_number); 5001 if (retval != 1) 5002 break; 5003 5004 if (cpu_number > topo.max_cpu_num) { 5005 warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num); 5006 return 1; 5007 } 5008 5009 irq_column_2_cpu[column] = cpu_number; 5010 irqs_per_cpu[cpu_number] = 0; 5011 } 5012 5013 /* read /proc/interrupt count lines and sum up irqs per cpu */ 5014 while (1) { 5015 int column; 5016 char buf[64]; 5017 5018 retval = fscanf(fp, " %s:", buf); /* flush irq# "N:" */ 5019 if (retval != 1) 5020 break; 5021 5022 /* read the count per cpu */ 5023 for (column = 0; column < topo.num_cpus; ++column) { 5024 5025 int cpu_number, irq_count; 5026 5027 retval = fscanf(fp, " %d", &irq_count); 5028 if (retval != 1) 5029 break; 5030 5031 cpu_number = irq_column_2_cpu[column]; 5032 irqs_per_cpu[cpu_number] += irq_count; 5033 5034 } 5035 5036 while (getc(fp) != '\n') ; /* flush interrupt description */ 5037 5038 } 5039 return 0; 5040 } 5041 5042 /* 5043 * snapshot_graphics() 5044 * 5045 * record snapshot of specified graphics sysfs knob 5046 * 5047 * return 1 if config change requires a restart, else return 0 5048 */ 5049 int snapshot_graphics(int idx) 5050 { 5051 FILE *fp; 5052 int retval; 5053 5054 switch (idx) { 5055 case GFX_rc6: 5056 case SAM_mc6: 5057 fp = fopen_or_die(gfx_info[idx].path, "r"); 5058 retval = fscanf(fp, "%lld", &gfx_info[idx].val_ull); 5059 if (retval != 1) 5060 err(1, "rc6"); 5061 fclose(fp); 5062 return 0; 5063 case GFX_MHz: 5064 case GFX_ACTMHz: 5065 case SAM_MHz: 5066 case SAM_ACTMHz: 5067 if (gfx_info[idx].fp == NULL) { 5068 gfx_info[idx].fp = fopen_or_die(gfx_info[idx].path, "r"); 5069 } else { 5070 rewind(gfx_info[idx].fp); 5071 fflush(gfx_info[idx].fp); 5072 } 5073 retval = fscanf(gfx_info[idx].fp, "%d", &gfx_info[idx].val); 5074 if (retval != 1) 5075 err(1, "MHz"); 5076 return 0; 5077 default: 5078 return -EINVAL; 5079 } 5080 } 5081 5082 /* 5083 * snapshot_cpu_lpi() 5084 * 5085 * record snapshot of 5086 * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us 5087 */ 5088 int snapshot_cpu_lpi_us(void) 5089 { 5090 FILE *fp; 5091 int retval; 5092 5093 fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r"); 5094 5095 retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us); 5096 if (retval != 1) { 5097 fprintf(stderr, "Disabling Low Power Idle CPU output\n"); 5098 BIC_NOT_PRESENT(BIC_CPU_LPI); 5099 fclose(fp); 5100 return -1; 5101 } 5102 5103 fclose(fp); 5104 5105 return 0; 5106 } 5107 5108 /* 5109 * snapshot_sys_lpi() 5110 * 5111 * record snapshot of sys_lpi_file 5112 */ 5113 int snapshot_sys_lpi_us(void) 5114 { 5115 FILE *fp; 5116 int retval; 5117 5118 fp = fopen_or_die(sys_lpi_file, "r"); 5119 5120 retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us); 5121 if (retval != 1) { 5122 fprintf(stderr, "Disabling Low Power Idle System output\n"); 5123 BIC_NOT_PRESENT(BIC_SYS_LPI); 5124 fclose(fp); 5125 return -1; 5126 } 5127 fclose(fp); 5128 5129 return 0; 5130 } 5131 5132 /* 5133 * snapshot /proc and /sys files 5134 * 5135 * return 1 if configuration restart needed, else return 0 5136 */ 5137 int snapshot_proc_sysfs_files(void) 5138 { 5139 if (DO_BIC(BIC_IRQ)) 5140 if (snapshot_proc_interrupts()) 5141 return 1; 5142 5143 if (DO_BIC(BIC_GFX_rc6)) 5144 snapshot_graphics(GFX_rc6); 5145 5146 if (DO_BIC(BIC_GFXMHz)) 5147 snapshot_graphics(GFX_MHz); 5148 5149 if (DO_BIC(BIC_GFXACTMHz)) 5150 snapshot_graphics(GFX_ACTMHz); 5151 5152 if (DO_BIC(BIC_SAM_mc6)) 5153 snapshot_graphics(SAM_mc6); 5154 5155 if (DO_BIC(BIC_SAMMHz)) 5156 snapshot_graphics(SAM_MHz); 5157 5158 if (DO_BIC(BIC_SAMACTMHz)) 5159 snapshot_graphics(SAM_ACTMHz); 5160 5161 if (DO_BIC(BIC_CPU_LPI)) 5162 snapshot_cpu_lpi_us(); 5163 5164 if (DO_BIC(BIC_SYS_LPI)) 5165 snapshot_sys_lpi_us(); 5166 5167 return 0; 5168 } 5169 5170 int exit_requested; 5171 5172 static void signal_handler(int signal) 5173 { 5174 switch (signal) { 5175 case SIGINT: 5176 exit_requested = 1; 5177 if (debug) 5178 fprintf(stderr, " SIGINT\n"); 5179 break; 5180 case SIGUSR1: 5181 if (debug > 1) 5182 fprintf(stderr, "SIGUSR1\n"); 5183 break; 5184 } 5185 } 5186 5187 void setup_signal_handler(void) 5188 { 5189 struct sigaction sa; 5190 5191 memset(&sa, 0, sizeof(sa)); 5192 5193 sa.sa_handler = &signal_handler; 5194 5195 if (sigaction(SIGINT, &sa, NULL) < 0) 5196 err(1, "sigaction SIGINT"); 5197 if (sigaction(SIGUSR1, &sa, NULL) < 0) 5198 err(1, "sigaction SIGUSR1"); 5199 } 5200 5201 void do_sleep(void) 5202 { 5203 struct timeval tout; 5204 struct timespec rest; 5205 fd_set readfds; 5206 int retval; 5207 5208 FD_ZERO(&readfds); 5209 FD_SET(0, &readfds); 5210 5211 if (ignore_stdin) { 5212 nanosleep(&interval_ts, NULL); 5213 return; 5214 } 5215 5216 tout = interval_tv; 5217 retval = select(1, &readfds, NULL, NULL, &tout); 5218 5219 if (retval == 1) { 5220 switch (getc(stdin)) { 5221 case 'q': 5222 exit_requested = 1; 5223 break; 5224 case EOF: 5225 /* 5226 * 'stdin' is a pipe closed on the other end. There 5227 * won't be any further input. 5228 */ 5229 ignore_stdin = 1; 5230 /* Sleep the rest of the time */ 5231 rest.tv_sec = (tout.tv_sec + tout.tv_usec / 1000000); 5232 rest.tv_nsec = (tout.tv_usec % 1000000) * 1000; 5233 nanosleep(&rest, NULL); 5234 } 5235 } 5236 } 5237 5238 int get_msr_sum(int cpu, off_t offset, unsigned long long *msr) 5239 { 5240 int ret, idx; 5241 unsigned long long msr_cur, msr_last; 5242 5243 assert(!no_msr); 5244 5245 if (!per_cpu_msr_sum) 5246 return 1; 5247 5248 idx = offset_to_idx(offset); 5249 if (idx < 0) 5250 return idx; 5251 /* get_msr_sum() = sum + (get_msr() - last) */ 5252 ret = get_msr(cpu, offset, &msr_cur); 5253 if (ret) 5254 return ret; 5255 msr_last = per_cpu_msr_sum[cpu].entries[idx].last; 5256 DELTA_WRAP32(msr_cur, msr_last); 5257 *msr = msr_last + per_cpu_msr_sum[cpu].entries[idx].sum; 5258 5259 return 0; 5260 } 5261 5262 timer_t timerid; 5263 5264 /* Timer callback, update the sum of MSRs periodically. */ 5265 static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg_data *p) 5266 { 5267 int i, ret; 5268 int cpu = t->cpu_id; 5269 5270 UNUSED(c); 5271 UNUSED(p); 5272 5273 assert(!no_msr); 5274 5275 for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) { 5276 unsigned long long msr_cur, msr_last; 5277 off_t offset; 5278 5279 if (!idx_valid(i)) 5280 continue; 5281 offset = idx_to_offset(i); 5282 if (offset < 0) 5283 continue; 5284 ret = get_msr(cpu, offset, &msr_cur); 5285 if (ret) { 5286 fprintf(outf, "Can not update msr(0x%llx)\n", (unsigned long long)offset); 5287 continue; 5288 } 5289 5290 msr_last = per_cpu_msr_sum[cpu].entries[i].last; 5291 per_cpu_msr_sum[cpu].entries[i].last = msr_cur & 0xffffffff; 5292 5293 DELTA_WRAP32(msr_cur, msr_last); 5294 per_cpu_msr_sum[cpu].entries[i].sum += msr_last; 5295 } 5296 return 0; 5297 } 5298 5299 static void msr_record_handler(union sigval v) 5300 { 5301 UNUSED(v); 5302 5303 for_all_cpus(update_msr_sum, EVEN_COUNTERS); 5304 } 5305 5306 void msr_sum_record(void) 5307 { 5308 struct itimerspec its; 5309 struct sigevent sev; 5310 5311 per_cpu_msr_sum = calloc(topo.max_cpu_num + 1, sizeof(struct msr_sum_array)); 5312 if (!per_cpu_msr_sum) { 5313 fprintf(outf, "Can not allocate memory for long time MSR.\n"); 5314 return; 5315 } 5316 /* 5317 * Signal handler might be restricted, so use thread notifier instead. 5318 */ 5319 memset(&sev, 0, sizeof(struct sigevent)); 5320 sev.sigev_notify = SIGEV_THREAD; 5321 sev.sigev_notify_function = msr_record_handler; 5322 5323 sev.sigev_value.sival_ptr = &timerid; 5324 if (timer_create(CLOCK_REALTIME, &sev, &timerid) == -1) { 5325 fprintf(outf, "Can not create timer.\n"); 5326 goto release_msr; 5327 } 5328 5329 its.it_value.tv_sec = 0; 5330 its.it_value.tv_nsec = 1; 5331 /* 5332 * A wraparound time has been calculated early. 5333 * Some sources state that the peak power for a 5334 * microprocessor is usually 1.5 times the TDP rating, 5335 * use 2 * TDP for safety. 5336 */ 5337 its.it_interval.tv_sec = rapl_joule_counter_range / 2; 5338 its.it_interval.tv_nsec = 0; 5339 5340 if (timer_settime(timerid, 0, &its, NULL) == -1) { 5341 fprintf(outf, "Can not set timer.\n"); 5342 goto release_timer; 5343 } 5344 return; 5345 5346 release_timer: 5347 timer_delete(timerid); 5348 release_msr: 5349 free(per_cpu_msr_sum); 5350 } 5351 5352 /* 5353 * set_my_sched_priority(pri) 5354 * return previous priority on success 5355 * return value < -20 on failure 5356 */ 5357 int set_my_sched_priority(int priority) 5358 { 5359 int retval; 5360 int original_priority; 5361 5362 errno = 0; 5363 original_priority = getpriority(PRIO_PROCESS, 0); 5364 if (errno && (original_priority == -1)) 5365 return -21; 5366 5367 retval = setpriority(PRIO_PROCESS, 0, priority); 5368 if (retval) 5369 return -21; 5370 5371 errno = 0; 5372 retval = getpriority(PRIO_PROCESS, 0); 5373 if (retval != priority) 5374 return -21; 5375 5376 return original_priority; 5377 } 5378 5379 void turbostat_loop() 5380 { 5381 int retval; 5382 int restarted = 0; 5383 unsigned int done_iters = 0; 5384 5385 setup_signal_handler(); 5386 5387 /* 5388 * elevate own priority for interval mode 5389 * 5390 * ignore on error - we probably don't have permission to set it, but 5391 * it's not a big deal 5392 */ 5393 set_my_sched_priority(-20); 5394 5395 restart: 5396 restarted++; 5397 5398 snapshot_proc_sysfs_files(); 5399 retval = for_all_cpus(get_counters, EVEN_COUNTERS); 5400 first_counter_read = 0; 5401 if (retval < -1) { 5402 exit(retval); 5403 } else if (retval == -1) { 5404 if (restarted > 10) { 5405 exit(retval); 5406 } 5407 re_initialize(); 5408 goto restart; 5409 } 5410 restarted = 0; 5411 done_iters = 0; 5412 gettimeofday(&tv_even, (struct timezone *)NULL); 5413 5414 while (1) { 5415 if (for_all_proc_cpus(cpu_is_not_present)) { 5416 re_initialize(); 5417 goto restart; 5418 } 5419 if (update_effective_str(false)) { 5420 re_initialize(); 5421 goto restart; 5422 } 5423 do_sleep(); 5424 if (snapshot_proc_sysfs_files()) 5425 goto restart; 5426 retval = for_all_cpus(get_counters, ODD_COUNTERS); 5427 if (retval < -1) { 5428 exit(retval); 5429 } else if (retval == -1) { 5430 re_initialize(); 5431 goto restart; 5432 } 5433 gettimeofday(&tv_odd, (struct timezone *)NULL); 5434 timersub(&tv_odd, &tv_even, &tv_delta); 5435 if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) { 5436 re_initialize(); 5437 goto restart; 5438 } 5439 compute_average(EVEN_COUNTERS); 5440 format_all_counters(EVEN_COUNTERS); 5441 flush_output_stdout(); 5442 if (exit_requested) 5443 break; 5444 if (num_iterations && ++done_iters >= num_iterations) 5445 break; 5446 do_sleep(); 5447 if (snapshot_proc_sysfs_files()) 5448 goto restart; 5449 retval = for_all_cpus(get_counters, EVEN_COUNTERS); 5450 if (retval < -1) { 5451 exit(retval); 5452 } else if (retval == -1) { 5453 re_initialize(); 5454 goto restart; 5455 } 5456 gettimeofday(&tv_even, (struct timezone *)NULL); 5457 timersub(&tv_even, &tv_odd, &tv_delta); 5458 if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) { 5459 re_initialize(); 5460 goto restart; 5461 } 5462 compute_average(ODD_COUNTERS); 5463 format_all_counters(ODD_COUNTERS); 5464 flush_output_stdout(); 5465 if (exit_requested) 5466 break; 5467 if (num_iterations && ++done_iters >= num_iterations) 5468 break; 5469 } 5470 } 5471 5472 void check_dev_msr() 5473 { 5474 struct stat sb; 5475 char pathname[32]; 5476 5477 if (no_msr) 5478 return; 5479 5480 sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); 5481 if (stat(pathname, &sb)) 5482 if (system("/sbin/modprobe msr > /dev/null 2>&1")) 5483 no_msr = 1; 5484 } 5485 5486 /* 5487 * check for CAP_SYS_RAWIO 5488 * return 0 on success 5489 * return 1 on fail 5490 */ 5491 int check_for_cap_sys_rawio(void) 5492 { 5493 cap_t caps; 5494 cap_flag_value_t cap_flag_value; 5495 int ret = 0; 5496 5497 caps = cap_get_proc(); 5498 if (caps == NULL) 5499 return 1; 5500 5501 if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value)) { 5502 ret = 1; 5503 goto free_and_exit; 5504 } 5505 5506 if (cap_flag_value != CAP_SET) { 5507 ret = 1; 5508 goto free_and_exit; 5509 } 5510 5511 free_and_exit: 5512 if (cap_free(caps) == -1) 5513 err(-6, "cap_free\n"); 5514 5515 return ret; 5516 } 5517 5518 void check_msr_permission(void) 5519 { 5520 int failed = 0; 5521 char pathname[32]; 5522 5523 if (no_msr) 5524 return; 5525 5526 /* check for CAP_SYS_RAWIO */ 5527 failed += check_for_cap_sys_rawio(); 5528 5529 /* test file permissions */ 5530 sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); 5531 if (euidaccess(pathname, R_OK)) { 5532 failed++; 5533 } 5534 5535 if (failed) { 5536 warnx("Failed to access %s. Some of the counters may not be available\n" 5537 "\tRun as root to enable them or use %s to disable the access explicitly", pathname, "--no-msr"); 5538 no_msr = 1; 5539 } 5540 } 5541 5542 void probe_bclk(void) 5543 { 5544 unsigned long long msr; 5545 unsigned int base_ratio; 5546 5547 if (!platform->has_nhm_msrs || no_msr) 5548 return; 5549 5550 if (platform->bclk_freq == BCLK_100MHZ) 5551 bclk = 100.00; 5552 else if (platform->bclk_freq == BCLK_133MHZ) 5553 bclk = 133.33; 5554 else if (platform->bclk_freq == BCLK_SLV) 5555 bclk = slm_bclk(); 5556 else 5557 return; 5558 5559 get_msr(base_cpu, MSR_PLATFORM_INFO, &msr); 5560 base_ratio = (msr >> 8) & 0xFF; 5561 5562 base_hz = base_ratio * bclk * 1000000; 5563 has_base_hz = 1; 5564 5565 if (platform->enable_tsc_tweak) 5566 tsc_tweak = base_hz / tsc_hz; 5567 } 5568 5569 static void remove_underbar(char *s) 5570 { 5571 char *to = s; 5572 5573 while (*s) { 5574 if (*s != '_') 5575 *to++ = *s; 5576 s++; 5577 } 5578 5579 *to = 0; 5580 } 5581 5582 static void dump_turbo_ratio_info(void) 5583 { 5584 if (!has_turbo) 5585 return; 5586 5587 if (!platform->has_nhm_msrs || no_msr) 5588 return; 5589 5590 if (platform->trl_msrs & TRL_LIMIT2) 5591 dump_turbo_ratio_limit2(); 5592 5593 if (platform->trl_msrs & TRL_LIMIT1) 5594 dump_turbo_ratio_limit1(); 5595 5596 if (platform->trl_msrs & TRL_BASE) { 5597 dump_turbo_ratio_limits(MSR_TURBO_RATIO_LIMIT); 5598 5599 if (is_hybrid) 5600 dump_turbo_ratio_limits(MSR_SECONDARY_TURBO_RATIO_LIMIT); 5601 } 5602 5603 if (platform->trl_msrs & TRL_ATOM) 5604 dump_atom_turbo_ratio_limits(); 5605 5606 if (platform->trl_msrs & TRL_KNL) 5607 dump_knl_turbo_ratio_limits(); 5608 5609 if (platform->has_config_tdp) 5610 dump_config_tdp(); 5611 } 5612 5613 static int read_sysfs_int(char *path) 5614 { 5615 FILE *input; 5616 int retval = -1; 5617 5618 input = fopen(path, "r"); 5619 if (input == NULL) { 5620 if (debug) 5621 fprintf(outf, "NSFOD %s\n", path); 5622 return (-1); 5623 } 5624 if (fscanf(input, "%d", &retval) != 1) 5625 err(1, "%s: failed to read int from file", path); 5626 fclose(input); 5627 5628 return (retval); 5629 } 5630 5631 static void dump_sysfs_file(char *path) 5632 { 5633 FILE *input; 5634 char cpuidle_buf[64]; 5635 5636 input = fopen(path, "r"); 5637 if (input == NULL) { 5638 if (debug) 5639 fprintf(outf, "NSFOD %s\n", path); 5640 return; 5641 } 5642 if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input)) 5643 err(1, "%s: failed to read file", path); 5644 fclose(input); 5645 5646 fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf); 5647 } 5648 5649 static void probe_intel_uncore_frequency_legacy(void) 5650 { 5651 int i, j; 5652 char path[256]; 5653 5654 for (i = 0; i < topo.num_packages; ++i) { 5655 for (j = 0; j <= topo.max_die_id; ++j) { 5656 int k, l; 5657 char path_base[128]; 5658 5659 sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i, 5660 j); 5661 5662 if (access(path_base, R_OK)) 5663 continue; 5664 5665 BIC_PRESENT(BIC_UNCORE_MHZ); 5666 5667 if (quiet) 5668 return; 5669 5670 sprintf(path, "%s/min_freq_khz", path_base); 5671 k = read_sysfs_int(path); 5672 sprintf(path, "%s/max_freq_khz", path_base); 5673 l = read_sysfs_int(path); 5674 fprintf(outf, "Uncore Frequency package%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000); 5675 5676 sprintf(path, "%s/initial_min_freq_khz", path_base); 5677 k = read_sysfs_int(path); 5678 sprintf(path, "%s/initial_max_freq_khz", path_base); 5679 l = read_sysfs_int(path); 5680 fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000); 5681 5682 sprintf(path, "%s/current_freq_khz", path_base); 5683 k = read_sysfs_int(path); 5684 fprintf(outf, " %d MHz\n", k / 1000); 5685 } 5686 } 5687 } 5688 5689 static void probe_intel_uncore_frequency_cluster(void) 5690 { 5691 int i, uncore_max_id; 5692 char path[256]; 5693 char path_base[128]; 5694 5695 if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00/current_freq_khz", R_OK)) 5696 return; 5697 5698 if (quiet) 5699 return; 5700 5701 for (uncore_max_id = 0;; ++uncore_max_id) { 5702 5703 sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", uncore_max_id); 5704 5705 /* uncore## start at 00 and skips no numbers, so stop upon first missing */ 5706 if (access(path_base, R_OK)) { 5707 uncore_max_id -= 1; 5708 break; 5709 } 5710 } 5711 for (i = uncore_max_id; i >= 0; --i) { 5712 int k, l; 5713 int package_id, domain_id, cluster_id; 5714 char name_buf[16]; 5715 5716 sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", i); 5717 5718 if (access(path_base, R_OK)) 5719 err(1, "%s: %s\n", __func__, path_base); 5720 5721 sprintf(path, "%s/package_id", path_base); 5722 package_id = read_sysfs_int(path); 5723 5724 sprintf(path, "%s/domain_id", path_base); 5725 domain_id = read_sysfs_int(path); 5726 5727 sprintf(path, "%s/fabric_cluster_id", path_base); 5728 cluster_id = read_sysfs_int(path); 5729 5730 sprintf(path, "%s/min_freq_khz", path_base); 5731 k = read_sysfs_int(path); 5732 sprintf(path, "%s/max_freq_khz", path_base); 5733 l = read_sysfs_int(path); 5734 fprintf(outf, "Uncore Frequency package%d domain%d cluster%d: %d - %d MHz ", package_id, domain_id, 5735 cluster_id, k / 1000, l / 1000); 5736 5737 sprintf(path, "%s/initial_min_freq_khz", path_base); 5738 k = read_sysfs_int(path); 5739 sprintf(path, "%s/initial_max_freq_khz", path_base); 5740 l = read_sysfs_int(path); 5741 fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000); 5742 5743 sprintf(path, "%s/current_freq_khz", path_base); 5744 k = read_sysfs_int(path); 5745 fprintf(outf, " %d MHz\n", k / 1000); 5746 5747 sprintf(path, "%s/current_freq_khz", path_base); 5748 sprintf(name_buf, "UMHz%d.%d", domain_id, cluster_id); 5749 5750 add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id); 5751 } 5752 } 5753 5754 static void probe_intel_uncore_frequency(void) 5755 { 5756 if (!genuine_intel) 5757 return; 5758 5759 if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00", R_OK) == 0) 5760 probe_intel_uncore_frequency_cluster(); 5761 else 5762 probe_intel_uncore_frequency_legacy(); 5763 } 5764 5765 static void probe_graphics(void) 5766 { 5767 /* Xe graphics sysfs knobs */ 5768 if (!access("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", R_OK)) { 5769 FILE *fp; 5770 char buf[8]; 5771 bool gt0_is_gt; 5772 int idx; 5773 5774 fp = fopen("/sys/class/drm/card0/device/tile0/gt0/gtidle/name", "r"); 5775 if (!fp) 5776 goto next; 5777 5778 if (!fread(buf, sizeof(char), 7, fp)) { 5779 fclose(fp); 5780 goto next; 5781 } 5782 fclose(fp); 5783 5784 if (!strncmp(buf, "gt0-rc", strlen("gt0-rc"))) 5785 gt0_is_gt = true; 5786 else if (!strncmp(buf, "gt0-mc", strlen("gt0-mc"))) 5787 gt0_is_gt = false; 5788 else 5789 goto next; 5790 5791 idx = gt0_is_gt ? GFX_rc6 : SAM_mc6; 5792 gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms"; 5793 5794 idx = gt0_is_gt ? GFX_MHz : SAM_MHz; 5795 if (!access("/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq", R_OK)) 5796 gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq"; 5797 5798 idx = gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz; 5799 if (!access("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq", R_OK)) 5800 gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq"; 5801 5802 idx = gt0_is_gt ? SAM_mc6 : GFX_rc6; 5803 if (!access("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", R_OK)) 5804 gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms"; 5805 5806 idx = gt0_is_gt ? SAM_MHz : GFX_MHz; 5807 if (!access("/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq", R_OK)) 5808 gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq"; 5809 5810 idx = gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz; 5811 if (!access("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq", R_OK)) 5812 gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq"; 5813 5814 goto end; 5815 } 5816 5817 next: 5818 /* New i915 graphics sysfs knobs */ 5819 if (!access("/sys/class/drm/card0/gt/gt0/rc6_residency_ms", R_OK)) { 5820 gfx_info[GFX_rc6].path = "/sys/class/drm/card0/gt/gt0/rc6_residency_ms"; 5821 5822 if (!access("/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz", R_OK)) 5823 gfx_info[GFX_MHz].path = "/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz"; 5824 5825 if (!access("/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz", R_OK)) 5826 gfx_info[GFX_ACTMHz].path = "/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz"; 5827 5828 if (!access("/sys/class/drm/card0/gt/gt1/rc6_residency_ms", R_OK)) 5829 gfx_info[SAM_mc6].path = "/sys/class/drm/card0/gt/gt1/rc6_residency_ms"; 5830 5831 if (!access("/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz", R_OK)) 5832 gfx_info[SAM_MHz].path = "/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz"; 5833 5834 if (!access("/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz", R_OK)) 5835 gfx_info[SAM_ACTMHz].path = "/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz"; 5836 5837 goto end; 5838 } 5839 5840 /* Fall back to traditional i915 graphics sysfs knobs */ 5841 if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK)) 5842 gfx_info[GFX_rc6].path = "/sys/class/drm/card0/power/rc6_residency_ms"; 5843 5844 if (!access("/sys/class/drm/card0/gt_cur_freq_mhz", R_OK)) 5845 gfx_info[GFX_MHz].path = "/sys/class/drm/card0/gt_cur_freq_mhz"; 5846 else if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK)) 5847 gfx_info[GFX_MHz].path = "/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz"; 5848 5849 if (!access("/sys/class/drm/card0/gt_act_freq_mhz", R_OK)) 5850 gfx_info[GFX_ACTMHz].path = "/sys/class/drm/card0/gt_act_freq_mhz"; 5851 else if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK)) 5852 gfx_info[GFX_ACTMHz].path = "/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz"; 5853 5854 end: 5855 if (gfx_info[GFX_rc6].path) 5856 BIC_PRESENT(BIC_GFX_rc6); 5857 if (gfx_info[GFX_MHz].path) 5858 BIC_PRESENT(BIC_GFXMHz); 5859 if (gfx_info[GFX_ACTMHz].path) 5860 BIC_PRESENT(BIC_GFXACTMHz); 5861 if (gfx_info[SAM_mc6].path) 5862 BIC_PRESENT(BIC_SAM_mc6); 5863 if (gfx_info[SAM_MHz].path) 5864 BIC_PRESENT(BIC_SAMMHz); 5865 if (gfx_info[SAM_ACTMHz].path) 5866 BIC_PRESENT(BIC_SAMACTMHz); 5867 } 5868 5869 static void dump_sysfs_cstate_config(void) 5870 { 5871 char path[64]; 5872 char name_buf[16]; 5873 char desc[64]; 5874 FILE *input; 5875 int state; 5876 char *sp; 5877 5878 if (access("/sys/devices/system/cpu/cpuidle", R_OK)) { 5879 fprintf(outf, "cpuidle not loaded\n"); 5880 return; 5881 } 5882 5883 dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_driver"); 5884 dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor"); 5885 dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor_ro"); 5886 5887 for (state = 0; state < 10; ++state) { 5888 5889 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); 5890 input = fopen(path, "r"); 5891 if (input == NULL) 5892 continue; 5893 if (!fgets(name_buf, sizeof(name_buf), input)) 5894 err(1, "%s: failed to read file", path); 5895 5896 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ 5897 sp = strchr(name_buf, '-'); 5898 if (!sp) 5899 sp = strchrnul(name_buf, '\n'); 5900 *sp = '\0'; 5901 fclose(input); 5902 5903 remove_underbar(name_buf); 5904 5905 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc", base_cpu, state); 5906 input = fopen(path, "r"); 5907 if (input == NULL) 5908 continue; 5909 if (!fgets(desc, sizeof(desc), input)) 5910 err(1, "%s: failed to read file", path); 5911 5912 fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc); 5913 fclose(input); 5914 } 5915 } 5916 5917 static void dump_sysfs_pstate_config(void) 5918 { 5919 char path[64]; 5920 char driver_buf[64]; 5921 char governor_buf[64]; 5922 FILE *input; 5923 int turbo; 5924 5925 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver", base_cpu); 5926 input = fopen(path, "r"); 5927 if (input == NULL) { 5928 fprintf(outf, "NSFOD %s\n", path); 5929 return; 5930 } 5931 if (!fgets(driver_buf, sizeof(driver_buf), input)) 5932 err(1, "%s: failed to read file", path); 5933 fclose(input); 5934 5935 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", base_cpu); 5936 input = fopen(path, "r"); 5937 if (input == NULL) { 5938 fprintf(outf, "NSFOD %s\n", path); 5939 return; 5940 } 5941 if (!fgets(governor_buf, sizeof(governor_buf), input)) 5942 err(1, "%s: failed to read file", path); 5943 fclose(input); 5944 5945 fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf); 5946 fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf); 5947 5948 sprintf(path, "/sys/devices/system/cpu/cpufreq/boost"); 5949 input = fopen(path, "r"); 5950 if (input != NULL) { 5951 if (fscanf(input, "%d", &turbo) != 1) 5952 err(1, "%s: failed to parse number from file", path); 5953 fprintf(outf, "cpufreq boost: %d\n", turbo); 5954 fclose(input); 5955 } 5956 5957 sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo"); 5958 input = fopen(path, "r"); 5959 if (input != NULL) { 5960 if (fscanf(input, "%d", &turbo) != 1) 5961 err(1, "%s: failed to parse number from file", path); 5962 fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo); 5963 fclose(input); 5964 } 5965 } 5966 5967 /* 5968 * print_epb() 5969 * Decode the ENERGY_PERF_BIAS MSR 5970 */ 5971 int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) 5972 { 5973 char *epb_string; 5974 int cpu, epb; 5975 5976 UNUSED(c); 5977 UNUSED(p); 5978 5979 if (!has_epb) 5980 return 0; 5981 5982 cpu = t->cpu_id; 5983 5984 /* EPB is per-package */ 5985 if (!is_cpu_first_thread_in_package(t, c, p)) 5986 return 0; 5987 5988 if (cpu_migrate(cpu)) { 5989 fprintf(outf, "print_epb: Could not migrate to CPU %d\n", cpu); 5990 return -1; 5991 } 5992 5993 epb = get_epb(cpu); 5994 if (epb < 0) 5995 return 0; 5996 5997 switch (epb) { 5998 case ENERGY_PERF_BIAS_PERFORMANCE: 5999 epb_string = "performance"; 6000 break; 6001 case ENERGY_PERF_BIAS_NORMAL: 6002 epb_string = "balanced"; 6003 break; 6004 case ENERGY_PERF_BIAS_POWERSAVE: 6005 epb_string = "powersave"; 6006 break; 6007 default: 6008 epb_string = "custom"; 6009 break; 6010 } 6011 fprintf(outf, "cpu%d: EPB: %d (%s)\n", cpu, epb, epb_string); 6012 6013 return 0; 6014 } 6015 6016 /* 6017 * print_hwp() 6018 * Decode the MSR_HWP_CAPABILITIES 6019 */ 6020 int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) 6021 { 6022 unsigned long long msr; 6023 int cpu; 6024 6025 UNUSED(c); 6026 UNUSED(p); 6027 6028 if (no_msr) 6029 return 0; 6030 6031 if (!has_hwp) 6032 return 0; 6033 6034 cpu = t->cpu_id; 6035 6036 /* MSR_HWP_CAPABILITIES is per-package */ 6037 if (!is_cpu_first_thread_in_package(t, c, p)) 6038 return 0; 6039 6040 if (cpu_migrate(cpu)) { 6041 fprintf(outf, "print_hwp: Could not migrate to CPU %d\n", cpu); 6042 return -1; 6043 } 6044 6045 if (get_msr(cpu, MSR_PM_ENABLE, &msr)) 6046 return 0; 6047 6048 fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n", cpu, msr, (msr & (1 << 0)) ? "" : "No-"); 6049 6050 /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */ 6051 if ((msr & (1 << 0)) == 0) 6052 return 0; 6053 6054 if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr)) 6055 return 0; 6056 6057 fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx " 6058 "(high %d guar %d eff %d low %d)\n", 6059 cpu, msr, 6060 (unsigned int)HWP_HIGHEST_PERF(msr), 6061 (unsigned int)HWP_GUARANTEED_PERF(msr), 6062 (unsigned int)HWP_MOSTEFFICIENT_PERF(msr), (unsigned int)HWP_LOWEST_PERF(msr)); 6063 6064 if (get_msr(cpu, MSR_HWP_REQUEST, &msr)) 6065 return 0; 6066 6067 fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx " 6068 "(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n", 6069 cpu, msr, 6070 (unsigned int)(((msr) >> 0) & 0xff), 6071 (unsigned int)(((msr) >> 8) & 0xff), 6072 (unsigned int)(((msr) >> 16) & 0xff), 6073 (unsigned int)(((msr) >> 24) & 0xff), 6074 (unsigned int)(((msr) >> 32) & 0xff3), (unsigned int)(((msr) >> 42) & 0x1)); 6075 6076 if (has_hwp_pkg) { 6077 if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr)) 6078 return 0; 6079 6080 fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx " 6081 "(min %d max %d des %d epp 0x%x window 0x%x)\n", 6082 cpu, msr, 6083 (unsigned int)(((msr) >> 0) & 0xff), 6084 (unsigned int)(((msr) >> 8) & 0xff), 6085 (unsigned int)(((msr) >> 16) & 0xff), 6086 (unsigned int)(((msr) >> 24) & 0xff), (unsigned int)(((msr) >> 32) & 0xff3)); 6087 } 6088 if (has_hwp_notify) { 6089 if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr)) 6090 return 0; 6091 6092 fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx " 6093 "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n", 6094 cpu, msr, ((msr) & 0x1) ? "EN" : "Dis", ((msr) & 0x2) ? "EN" : "Dis"); 6095 } 6096 if (get_msr(cpu, MSR_HWP_STATUS, &msr)) 6097 return 0; 6098 6099 fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx " 6100 "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n", 6101 cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x4) ? "" : "No-"); 6102 6103 return 0; 6104 } 6105 6106 /* 6107 * print_perf_limit() 6108 */ 6109 int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p) 6110 { 6111 unsigned long long msr; 6112 int cpu; 6113 6114 UNUSED(c); 6115 UNUSED(p); 6116 6117 if (no_msr) 6118 return 0; 6119 6120 cpu = t->cpu_id; 6121 6122 /* per-package */ 6123 if (!is_cpu_first_thread_in_package(t, c, p)) 6124 return 0; 6125 6126 if (cpu_migrate(cpu)) { 6127 fprintf(outf, "print_perf_limit: Could not migrate to CPU %d\n", cpu); 6128 return -1; 6129 } 6130 6131 if (platform->plr_msrs & PLR_CORE) { 6132 get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr); 6133 fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); 6134 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)", 6135 (msr & 1 << 15) ? "bit15, " : "", 6136 (msr & 1 << 14) ? "bit14, " : "", 6137 (msr & 1 << 13) ? "Transitions, " : "", 6138 (msr & 1 << 12) ? "MultiCoreTurbo, " : "", 6139 (msr & 1 << 11) ? "PkgPwrL2, " : "", 6140 (msr & 1 << 10) ? "PkgPwrL1, " : "", 6141 (msr & 1 << 9) ? "CorePwr, " : "", 6142 (msr & 1 << 8) ? "Amps, " : "", 6143 (msr & 1 << 6) ? "VR-Therm, " : "", 6144 (msr & 1 << 5) ? "Auto-HWP, " : "", 6145 (msr & 1 << 4) ? "Graphics, " : "", 6146 (msr & 1 << 2) ? "bit2, " : "", 6147 (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 0) ? "PROCHOT, " : ""); 6148 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", 6149 (msr & 1 << 31) ? "bit31, " : "", 6150 (msr & 1 << 30) ? "bit30, " : "", 6151 (msr & 1 << 29) ? "Transitions, " : "", 6152 (msr & 1 << 28) ? "MultiCoreTurbo, " : "", 6153 (msr & 1 << 27) ? "PkgPwrL2, " : "", 6154 (msr & 1 << 26) ? "PkgPwrL1, " : "", 6155 (msr & 1 << 25) ? "CorePwr, " : "", 6156 (msr & 1 << 24) ? "Amps, " : "", 6157 (msr & 1 << 22) ? "VR-Therm, " : "", 6158 (msr & 1 << 21) ? "Auto-HWP, " : "", 6159 (msr & 1 << 20) ? "Graphics, " : "", 6160 (msr & 1 << 18) ? "bit18, " : "", 6161 (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : ""); 6162 6163 } 6164 if (platform->plr_msrs & PLR_GFX) { 6165 get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr); 6166 fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); 6167 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)", 6168 (msr & 1 << 0) ? "PROCHOT, " : "", 6169 (msr & 1 << 1) ? "ThermStatus, " : "", 6170 (msr & 1 << 4) ? "Graphics, " : "", 6171 (msr & 1 << 6) ? "VR-Therm, " : "", 6172 (msr & 1 << 8) ? "Amps, " : "", 6173 (msr & 1 << 9) ? "GFXPwr, " : "", 6174 (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); 6175 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n", 6176 (msr & 1 << 16) ? "PROCHOT, " : "", 6177 (msr & 1 << 17) ? "ThermStatus, " : "", 6178 (msr & 1 << 20) ? "Graphics, " : "", 6179 (msr & 1 << 22) ? "VR-Therm, " : "", 6180 (msr & 1 << 24) ? "Amps, " : "", 6181 (msr & 1 << 25) ? "GFXPwr, " : "", 6182 (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); 6183 } 6184 if (platform->plr_msrs & PLR_RING) { 6185 get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr); 6186 fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); 6187 fprintf(outf, " (Active: %s%s%s%s%s%s)", 6188 (msr & 1 << 0) ? "PROCHOT, " : "", 6189 (msr & 1 << 1) ? "ThermStatus, " : "", 6190 (msr & 1 << 6) ? "VR-Therm, " : "", 6191 (msr & 1 << 8) ? "Amps, " : "", 6192 (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); 6193 fprintf(outf, " (Logged: %s%s%s%s%s%s)\n", 6194 (msr & 1 << 16) ? "PROCHOT, " : "", 6195 (msr & 1 << 17) ? "ThermStatus, " : "", 6196 (msr & 1 << 22) ? "VR-Therm, " : "", 6197 (msr & 1 << 24) ? "Amps, " : "", 6198 (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); 6199 } 6200 return 0; 6201 } 6202 6203 #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ 6204 #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ 6205 6206 double get_quirk_tdp(void) 6207 { 6208 if (platform->rapl_quirk_tdp) 6209 return platform->rapl_quirk_tdp; 6210 6211 return 135.0; 6212 } 6213 6214 double get_tdp_intel(void) 6215 { 6216 unsigned long long msr; 6217 6218 if (platform->rapl_msrs & RAPL_PKG_POWER_INFO) 6219 if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr)) 6220 return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; 6221 return get_quirk_tdp(); 6222 } 6223 6224 double get_tdp_amd(void) 6225 { 6226 return get_quirk_tdp(); 6227 } 6228 6229 void rapl_probe_intel(void) 6230 { 6231 unsigned long long msr; 6232 unsigned int time_unit; 6233 double tdp; 6234 const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt | BIC_RAMWatt | BIC_GFXWatt; 6235 const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J | BIC_RAM_J | BIC_GFX_J; 6236 6237 if (rapl_joules) 6238 bic_enabled &= ~bic_watt_bits; 6239 else 6240 bic_enabled &= ~bic_joules_bits; 6241 6242 if (!(platform->rapl_msrs & RAPL_PKG_PERF_STATUS)) 6243 bic_enabled &= ~BIC_PKG__; 6244 if (!(platform->rapl_msrs & RAPL_DRAM_PERF_STATUS)) 6245 bic_enabled &= ~BIC_RAM__; 6246 6247 /* units on package 0, verify later other packages match */ 6248 if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr)) 6249 return; 6250 6251 rapl_power_units = 1.0 / (1 << (msr & 0xF)); 6252 if (platform->has_rapl_divisor) 6253 rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000; 6254 else 6255 rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); 6256 6257 if (platform->has_fixed_rapl_unit) 6258 rapl_dram_energy_units = (15.3 / 1000000); 6259 else 6260 rapl_dram_energy_units = rapl_energy_units; 6261 6262 time_unit = msr >> 16 & 0xF; 6263 if (time_unit == 0) 6264 time_unit = 0xA; 6265 6266 rapl_time_units = 1.0 / (1 << (time_unit)); 6267 6268 tdp = get_tdp_intel(); 6269 6270 rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; 6271 if (!quiet) 6272 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); 6273 } 6274 6275 void rapl_probe_amd(void) 6276 { 6277 unsigned long long msr; 6278 double tdp; 6279 const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt; 6280 const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J; 6281 6282 if (rapl_joules) 6283 bic_enabled &= ~bic_watt_bits; 6284 else 6285 bic_enabled &= ~bic_joules_bits; 6286 6287 if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr)) 6288 return; 6289 6290 rapl_time_units = ldexp(1.0, -(msr >> 16 & 0xf)); 6291 rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f)); 6292 rapl_power_units = ldexp(1.0, -(msr & 0xf)); 6293 6294 tdp = get_tdp_amd(); 6295 6296 rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; 6297 if (!quiet) 6298 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); 6299 } 6300 6301 void print_power_limit_msr(int cpu, unsigned long long msr, char *label) 6302 { 6303 fprintf(outf, "cpu%d: %s: %sabled (%0.3f Watts, %f sec, clamp %sabled)\n", 6304 cpu, label, 6305 ((msr >> 15) & 1) ? "EN" : "DIS", 6306 ((msr >> 0) & 0x7FFF) * rapl_power_units, 6307 (1.0 + (((msr >> 22) & 0x3) / 4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units, 6308 (((msr >> 16) & 1) ? "EN" : "DIS")); 6309 6310 return; 6311 } 6312 6313 int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) 6314 { 6315 unsigned long long msr; 6316 const char *msr_name; 6317 int cpu; 6318 6319 UNUSED(c); 6320 UNUSED(p); 6321 6322 if (!platform->rapl_msrs) 6323 return 0; 6324 6325 /* RAPL counters are per package, so print only for 1st thread/package */ 6326 if (!is_cpu_first_thread_in_package(t, c, p)) 6327 return 0; 6328 6329 cpu = t->cpu_id; 6330 if (cpu_migrate(cpu)) { 6331 fprintf(outf, "print_rapl: Could not migrate to CPU %d\n", cpu); 6332 return -1; 6333 } 6334 6335 if (platform->rapl_msrs & RAPL_AMD_F17H) { 6336 msr_name = "MSR_RAPL_PWR_UNIT"; 6337 if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr)) 6338 return -1; 6339 } else { 6340 msr_name = "MSR_RAPL_POWER_UNIT"; 6341 if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr)) 6342 return -1; 6343 } 6344 6345 fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr, 6346 rapl_power_units, rapl_energy_units, rapl_time_units); 6347 6348 if (platform->rapl_msrs & RAPL_PKG_POWER_INFO) { 6349 6350 if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr)) 6351 return -5; 6352 6353 fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", 6354 cpu, msr, 6355 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, 6356 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, 6357 ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, 6358 ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); 6359 6360 } 6361 if (platform->rapl_msrs & RAPL_PKG) { 6362 6363 if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr)) 6364 return -9; 6365 6366 fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", 6367 cpu, msr, (msr >> 63) & 1 ? "" : "UN"); 6368 6369 print_power_limit_msr(cpu, msr, "PKG Limit #1"); 6370 fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%0.3f Watts, %f* sec, clamp %sabled)\n", 6371 cpu, 6372 ((msr >> 47) & 1) ? "EN" : "DIS", 6373 ((msr >> 32) & 0x7FFF) * rapl_power_units, 6374 (1.0 + (((msr >> 54) & 0x3) / 4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units, 6375 ((msr >> 48) & 1) ? "EN" : "DIS"); 6376 6377 if (get_msr(cpu, MSR_VR_CURRENT_CONFIG, &msr)) 6378 return -9; 6379 6380 fprintf(outf, "cpu%d: MSR_VR_CURRENT_CONFIG: 0x%08llx\n", cpu, msr); 6381 fprintf(outf, "cpu%d: PKG Limit #4: %f Watts (%slocked)\n", 6382 cpu, ((msr >> 0) & 0x1FFF) * rapl_power_units, (msr >> 31) & 1 ? "" : "UN"); 6383 } 6384 6385 if (platform->rapl_msrs & RAPL_DRAM_POWER_INFO) { 6386 if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) 6387 return -6; 6388 6389 fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", 6390 cpu, msr, 6391 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, 6392 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, 6393 ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, 6394 ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); 6395 } 6396 if (platform->rapl_msrs & RAPL_DRAM) { 6397 if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) 6398 return -9; 6399 fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", 6400 cpu, msr, (msr >> 31) & 1 ? "" : "UN"); 6401 6402 print_power_limit_msr(cpu, msr, "DRAM Limit"); 6403 } 6404 if (platform->rapl_msrs & RAPL_CORE_POLICY) { 6405 if (get_msr(cpu, MSR_PP0_POLICY, &msr)) 6406 return -7; 6407 6408 fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); 6409 } 6410 if (platform->rapl_msrs & RAPL_CORE_POWER_LIMIT) { 6411 if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) 6412 return -9; 6413 fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", 6414 cpu, msr, (msr >> 31) & 1 ? "" : "UN"); 6415 print_power_limit_msr(cpu, msr, "Cores Limit"); 6416 } 6417 if (platform->rapl_msrs & RAPL_GFX) { 6418 if (get_msr(cpu, MSR_PP1_POLICY, &msr)) 6419 return -8; 6420 6421 fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF); 6422 6423 if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr)) 6424 return -9; 6425 fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", 6426 cpu, msr, (msr >> 31) & 1 ? "" : "UN"); 6427 print_power_limit_msr(cpu, msr, "GFX Limit"); 6428 } 6429 return 0; 6430 } 6431 6432 /* 6433 * probe_rapl() 6434 * 6435 * sets rapl_power_units, rapl_energy_units, rapl_time_units 6436 */ 6437 void probe_rapl(void) 6438 { 6439 if (!platform->rapl_msrs || no_msr) 6440 return; 6441 6442 if (genuine_intel) 6443 rapl_probe_intel(); 6444 if (authentic_amd || hygon_genuine) 6445 rapl_probe_amd(); 6446 6447 if (quiet) 6448 return; 6449 6450 for_all_cpus(print_rapl, ODD_COUNTERS); 6451 } 6452 6453 /* 6454 * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where 6455 * the Thermal Control Circuit (TCC) activates. 6456 * This is usually equal to tjMax. 6457 * 6458 * Older processors do not have this MSR, so there we guess, 6459 * but also allow cmdline over-ride with -T. 6460 * 6461 * Several MSR temperature values are in units of degrees-C 6462 * below this value, including the Digital Thermal Sensor (DTS), 6463 * Package Thermal Management Sensor (PTM), and thermal event thresholds. 6464 */ 6465 int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p) 6466 { 6467 unsigned long long msr; 6468 unsigned int tcc_default, tcc_offset; 6469 int cpu; 6470 6471 UNUSED(c); 6472 UNUSED(p); 6473 6474 /* tj_max is used only for dts or ptm */ 6475 if (!(do_dts || do_ptm)) 6476 return 0; 6477 6478 /* this is a per-package concept */ 6479 if (!is_cpu_first_thread_in_package(t, c, p)) 6480 return 0; 6481 6482 cpu = t->cpu_id; 6483 if (cpu_migrate(cpu)) { 6484 fprintf(outf, "Could not migrate to CPU %d\n", cpu); 6485 return -1; 6486 } 6487 6488 if (tj_max_override != 0) { 6489 tj_max = tj_max_override; 6490 fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n", cpu, tj_max); 6491 return 0; 6492 } 6493 6494 /* Temperature Target MSR is Nehalem and newer only */ 6495 if (!platform->has_nhm_msrs || no_msr) 6496 goto guess; 6497 6498 if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr)) 6499 goto guess; 6500 6501 tcc_default = (msr >> 16) & 0xFF; 6502 6503 if (!quiet) { 6504 int bits = platform->tcc_offset_bits; 6505 unsigned long long enabled = 0; 6506 6507 if (bits && !get_msr(base_cpu, MSR_PLATFORM_INFO, &enabled)) 6508 enabled = (enabled >> 30) & 1; 6509 6510 if (bits && enabled) { 6511 tcc_offset = (msr >> 24) & GENMASK(bits - 1, 0); 6512 fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n", 6513 cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset); 6514 } else { 6515 fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, tcc_default); 6516 } 6517 } 6518 6519 if (!tcc_default) 6520 goto guess; 6521 6522 tj_max = tcc_default; 6523 6524 return 0; 6525 6526 guess: 6527 tj_max = TJMAX_DEFAULT; 6528 fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", cpu, tj_max); 6529 6530 return 0; 6531 } 6532 6533 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) 6534 { 6535 unsigned long long msr; 6536 unsigned int dts, dts2; 6537 int cpu; 6538 6539 UNUSED(c); 6540 UNUSED(p); 6541 6542 if (no_msr) 6543 return 0; 6544 6545 if (!(do_dts || do_ptm)) 6546 return 0; 6547 6548 cpu = t->cpu_id; 6549 6550 /* DTS is per-core, no need to print for each thread */ 6551 if (!is_cpu_first_thread_in_core(t, c, p)) 6552 return 0; 6553 6554 if (cpu_migrate(cpu)) { 6555 fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu); 6556 return -1; 6557 } 6558 6559 if (do_ptm && is_cpu_first_core_in_package(t, c, p)) { 6560 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) 6561 return 0; 6562 6563 dts = (msr >> 16) & 0x7F; 6564 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts); 6565 6566 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr)) 6567 return 0; 6568 6569 dts = (msr >> 16) & 0x7F; 6570 dts2 = (msr >> 8) & 0x7F; 6571 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", 6572 cpu, msr, tj_max - dts, tj_max - dts2); 6573 } 6574 6575 if (do_dts && debug) { 6576 unsigned int resolution; 6577 6578 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) 6579 return 0; 6580 6581 dts = (msr >> 16) & 0x7F; 6582 resolution = (msr >> 27) & 0xF; 6583 fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", 6584 cpu, msr, tj_max - dts, resolution); 6585 6586 if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr)) 6587 return 0; 6588 6589 dts = (msr >> 16) & 0x7F; 6590 dts2 = (msr >> 8) & 0x7F; 6591 fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", 6592 cpu, msr, tj_max - dts, tj_max - dts2); 6593 } 6594 6595 return 0; 6596 } 6597 6598 void probe_thermal(void) 6599 { 6600 if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK)) 6601 BIC_PRESENT(BIC_CORE_THROT_CNT); 6602 else 6603 BIC_NOT_PRESENT(BIC_CORE_THROT_CNT); 6604 6605 for_all_cpus(set_temperature_target, ODD_COUNTERS); 6606 6607 if (quiet) 6608 return; 6609 6610 for_all_cpus(print_thermal, ODD_COUNTERS); 6611 } 6612 6613 int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p) 6614 { 6615 unsigned int eax, ebx, ecx, edx; 6616 6617 UNUSED(c); 6618 UNUSED(p); 6619 6620 if (!genuine_intel) 6621 return 0; 6622 6623 if (cpu_migrate(t->cpu_id)) { 6624 fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id); 6625 return -1; 6626 } 6627 6628 if (max_level < 0x1a) 6629 return 0; 6630 6631 __cpuid(0x1a, eax, ebx, ecx, edx); 6632 eax = (eax >> 24) & 0xFF; 6633 if (eax == 0x20) 6634 t->is_atom = true; 6635 return 0; 6636 } 6637 6638 void decode_feature_control_msr(void) 6639 { 6640 unsigned long long msr; 6641 6642 if (no_msr) 6643 return; 6644 6645 if (!get_msr(base_cpu, MSR_IA32_FEAT_CTL, &msr)) 6646 fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n", 6647 base_cpu, msr, msr & FEAT_CTL_LOCKED ? "" : "UN-", msr & (1 << 18) ? "SGX" : ""); 6648 } 6649 6650 void decode_misc_enable_msr(void) 6651 { 6652 unsigned long long msr; 6653 6654 if (no_msr) 6655 return; 6656 6657 if (!genuine_intel) 6658 return; 6659 6660 if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr)) 6661 fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n", 6662 base_cpu, msr, 6663 msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-", 6664 msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-", 6665 msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-", 6666 msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "", 6667 msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : ""); 6668 } 6669 6670 void decode_misc_feature_control(void) 6671 { 6672 unsigned long long msr; 6673 6674 if (no_msr) 6675 return; 6676 6677 if (!platform->has_msr_misc_feature_control) 6678 return; 6679 6680 if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr)) 6681 fprintf(outf, 6682 "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n", 6683 base_cpu, msr, msr & (0 << 0) ? "No-" : "", msr & (1 << 0) ? "No-" : "", 6684 msr & (2 << 0) ? "No-" : "", msr & (3 << 0) ? "No-" : ""); 6685 } 6686 6687 /* 6688 * Decode MSR_MISC_PWR_MGMT 6689 * 6690 * Decode the bits according to the Nehalem documentation 6691 * bit[0] seems to continue to have same meaning going forward 6692 * bit[1] less so... 6693 */ 6694 void decode_misc_pwr_mgmt_msr(void) 6695 { 6696 unsigned long long msr; 6697 6698 if (no_msr) 6699 return; 6700 6701 if (!platform->has_msr_misc_pwr_mgmt) 6702 return; 6703 6704 if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr)) 6705 fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n", 6706 base_cpu, msr, 6707 msr & (1 << 0) ? "DIS" : "EN", msr & (1 << 1) ? "EN" : "DIS", msr & (1 << 8) ? "EN" : "DIS"); 6708 } 6709 6710 /* 6711 * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG 6712 * 6713 * This MSRs are present on Silvermont processors, 6714 * Intel Atom processor E3000 series (Baytrail), and friends. 6715 */ 6716 void decode_c6_demotion_policy_msr(void) 6717 { 6718 unsigned long long msr; 6719 6720 if (no_msr) 6721 return; 6722 6723 if (!platform->has_msr_c6_demotion_policy_config) 6724 return; 6725 6726 if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr)) 6727 fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n", 6728 base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS"); 6729 6730 if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr)) 6731 fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n", 6732 base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS"); 6733 } 6734 6735 void print_dev_latency(void) 6736 { 6737 char *path = "/dev/cpu_dma_latency"; 6738 int fd; 6739 int value; 6740 int retval; 6741 6742 fd = open(path, O_RDONLY); 6743 if (fd < 0) { 6744 if (debug) 6745 warnx("Read %s failed", path); 6746 return; 6747 } 6748 6749 retval = read(fd, (void *)&value, sizeof(int)); 6750 if (retval != sizeof(int)) { 6751 warn("read failed %s", path); 6752 close(fd); 6753 return; 6754 } 6755 fprintf(outf, "/dev/cpu_dma_latency: %d usec (%s)\n", value, value == 2000000000 ? "default" : "constrained"); 6756 6757 close(fd); 6758 } 6759 6760 static int has_instr_count_access(void) 6761 { 6762 int fd; 6763 int has_access; 6764 6765 if (no_perf) 6766 return 0; 6767 6768 fd = open_perf_counter(base_cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0); 6769 has_access = fd != -1; 6770 6771 if (fd != -1) 6772 close(fd); 6773 6774 if (!has_access) 6775 warnx("Failed to access %s. Some of the counters may not be available\n" 6776 "\tRun as root to enable them or use %s to disable the access explicitly", 6777 "instructions retired perf counter", "--no-perf"); 6778 6779 return has_access; 6780 } 6781 6782 bool is_aperf_access_required(void) 6783 { 6784 return BIC_IS_ENABLED(BIC_Avg_MHz) 6785 || BIC_IS_ENABLED(BIC_Busy) 6786 || BIC_IS_ENABLED(BIC_Bzy_MHz) 6787 || BIC_IS_ENABLED(BIC_IPC) 6788 || BIC_IS_ENABLED(BIC_CPU_c1); 6789 } 6790 6791 int add_rapl_perf_counter_(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai, 6792 double *scale_, enum rapl_unit *unit_) 6793 { 6794 if (no_perf) 6795 return -1; 6796 6797 const double scale = read_perf_rapl_scale(cai->perf_subsys, cai->perf_name); 6798 6799 if (scale == 0.0) 6800 return -1; 6801 6802 const enum rapl_unit unit = read_perf_rapl_unit(cai->perf_subsys, cai->perf_name); 6803 6804 if (unit == RAPL_UNIT_INVALID) 6805 return -1; 6806 6807 const unsigned int rapl_type = read_perf_type(cai->perf_subsys); 6808 const unsigned int rapl_energy_pkg_config = read_rapl_config(cai->perf_subsys, cai->perf_name); 6809 6810 const int fd_counter = 6811 open_perf_counter(cpu, rapl_type, rapl_energy_pkg_config, rci->fd_perf, PERF_FORMAT_GROUP); 6812 if (fd_counter == -1) 6813 return -1; 6814 6815 /* If it's the first counter opened, make it a group descriptor */ 6816 if (rci->fd_perf == -1) 6817 rci->fd_perf = fd_counter; 6818 6819 *scale_ = scale; 6820 *unit_ = unit; 6821 return fd_counter; 6822 } 6823 6824 int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai, 6825 double *scale, enum rapl_unit *unit) 6826 { 6827 int ret = add_rapl_perf_counter_(cpu, rci, cai, scale, unit); 6828 6829 if (debug) 6830 fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu); 6831 6832 return ret; 6833 } 6834 6835 /* 6836 * Linux-perf manages the HW instructions-retired counter 6837 * by enabling when requested, and hiding rollover 6838 */ 6839 void linux_perf_init(void) 6840 { 6841 if (access("/proc/sys/kernel/perf_event_paranoid", F_OK)) 6842 return; 6843 6844 if (BIC_IS_ENABLED(BIC_IPC) && has_aperf) { 6845 fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int)); 6846 if (fd_instr_count_percpu == NULL) 6847 err(-1, "calloc fd_instr_count_percpu"); 6848 } 6849 6850 const bool aperf_required = is_aperf_access_required(); 6851 6852 if (aperf_required && has_aperf && amperf_source == AMPERF_SOURCE_PERF) { 6853 fd_amperf_percpu = calloc(topo.max_cpu_num + 1, sizeof(*fd_amperf_percpu)); 6854 if (fd_amperf_percpu == NULL) 6855 err(-1, "calloc fd_amperf_percpu"); 6856 } 6857 } 6858 6859 void rapl_perf_init(void) 6860 { 6861 const unsigned int num_domains = (platform->has_per_core_rapl ? topo.max_core_id : topo.max_package_id) + 1; 6862 bool *domain_visited = calloc(num_domains, sizeof(bool)); 6863 6864 rapl_counter_info_perdomain = calloc(num_domains, sizeof(*rapl_counter_info_perdomain)); 6865 if (rapl_counter_info_perdomain == NULL) 6866 err(-1, "calloc rapl_counter_info_percpu"); 6867 rapl_counter_info_perdomain_size = num_domains; 6868 6869 /* 6870 * Initialize rapl_counter_info_percpu 6871 */ 6872 for (unsigned int domain_id = 0; domain_id < num_domains; ++domain_id) { 6873 struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[domain_id]; 6874 6875 rci->fd_perf = -1; 6876 for (size_t i = 0; i < NUM_RAPL_COUNTERS; ++i) { 6877 rci->data[i] = 0; 6878 rci->source[i] = RAPL_SOURCE_NONE; 6879 } 6880 } 6881 6882 /* 6883 * Open/probe the counters 6884 * If can't get it via perf, fallback to MSR 6885 */ 6886 for (size_t i = 0; i < ARRAY_SIZE(rapl_counter_arch_infos); ++i) { 6887 6888 const struct rapl_counter_arch_info *const cai = &rapl_counter_arch_infos[i]; 6889 bool has_counter = 0; 6890 double scale; 6891 enum rapl_unit unit; 6892 unsigned int next_domain; 6893 6894 memset(domain_visited, 0, num_domains * sizeof(*domain_visited)); 6895 6896 for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) { 6897 6898 if (cpu_is_not_allowed(cpu)) 6899 continue; 6900 6901 /* Skip already seen and handled RAPL domains */ 6902 next_domain = 6903 platform->has_per_core_rapl ? cpus[cpu].physical_core_id : cpus[cpu].physical_package_id; 6904 6905 assert(next_domain < num_domains); 6906 6907 if (domain_visited[next_domain]) 6908 continue; 6909 6910 domain_visited[next_domain] = 1; 6911 6912 struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[next_domain]; 6913 6914 /* Check if the counter is enabled and accessible */ 6915 if (BIC_IS_ENABLED(cai->bic) && (platform->rapl_msrs & cai->feature_mask)) { 6916 6917 /* Use perf API for this counter */ 6918 if (!no_perf && cai->perf_name 6919 && add_rapl_perf_counter(cpu, rci, cai, &scale, &unit) != -1) { 6920 rci->source[cai->rci_index] = RAPL_SOURCE_PERF; 6921 rci->scale[cai->rci_index] = scale * cai->compat_scale; 6922 rci->unit[cai->rci_index] = unit; 6923 rci->flags[cai->rci_index] = cai->flags; 6924 6925 /* Use MSR for this counter */ 6926 } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) { 6927 rci->source[cai->rci_index] = RAPL_SOURCE_MSR; 6928 rci->msr[cai->rci_index] = cai->msr; 6929 rci->msr_mask[cai->rci_index] = cai->msr_mask; 6930 rci->msr_shift[cai->rci_index] = cai->msr_shift; 6931 rci->unit[cai->rci_index] = RAPL_UNIT_JOULES; 6932 rci->scale[cai->rci_index] = *cai->platform_rapl_msr_scale * cai->compat_scale; 6933 rci->flags[cai->rci_index] = cai->flags; 6934 } 6935 } 6936 6937 if (rci->source[cai->rci_index] != RAPL_SOURCE_NONE) 6938 has_counter = 1; 6939 } 6940 6941 /* If any CPU has access to the counter, make it present */ 6942 if (has_counter) 6943 BIC_PRESENT(cai->bic); 6944 } 6945 6946 free(domain_visited); 6947 } 6948 6949 static int has_amperf_access_via_msr(void) 6950 { 6951 if (no_msr) 6952 return 0; 6953 6954 if (probe_msr(base_cpu, MSR_IA32_APERF)) 6955 return 0; 6956 6957 if (probe_msr(base_cpu, MSR_IA32_MPERF)) 6958 return 0; 6959 6960 return 1; 6961 } 6962 6963 static int has_amperf_access_via_perf(void) 6964 { 6965 struct amperf_group_fd fds; 6966 6967 /* 6968 * Cache the last result, so we don't warn the user multiple times 6969 * 6970 * Negative means cached, no access 6971 * Zero means not cached 6972 * Positive means cached, has access 6973 */ 6974 static int has_access_cached; 6975 6976 if (no_perf) 6977 return 0; 6978 6979 if (has_access_cached != 0) 6980 return has_access_cached > 0; 6981 6982 fds = open_amperf_fd(base_cpu); 6983 has_access_cached = (fds.aperf != -1) && (fds.mperf != -1); 6984 6985 if (fds.aperf == -1) 6986 warnx("Failed to access %s. Some of the counters may not be available\n" 6987 "\tRun as root to enable them or use %s to disable the access explicitly", 6988 "APERF perf counter", "--no-perf"); 6989 else 6990 close(fds.aperf); 6991 6992 if (fds.mperf == -1) 6993 warnx("Failed to access %s. Some of the counters may not be available\n" 6994 "\tRun as root to enable them or use %s to disable the access explicitly", 6995 "MPERF perf counter", "--no-perf"); 6996 else 6997 close(fds.mperf); 6998 6999 if (has_access_cached == 0) 7000 has_access_cached = -1; 7001 7002 return has_access_cached > 0; 7003 } 7004 7005 /* Check if we can access APERF and MPERF */ 7006 static int has_amperf_access(void) 7007 { 7008 if (!is_aperf_access_required()) 7009 return 0; 7010 7011 if (!no_msr && has_amperf_access_via_msr()) 7012 return 1; 7013 7014 if (!no_perf && has_amperf_access_via_perf()) 7015 return 1; 7016 7017 return 0; 7018 } 7019 7020 int *get_cstate_perf_group_fd(struct cstate_counter_info_t *cci, const char *group_name) 7021 { 7022 if (strcmp(group_name, "cstate_core") == 0) 7023 return &cci->fd_perf_core; 7024 7025 if (strcmp(group_name, "cstate_pkg") == 0) 7026 return &cci->fd_perf_pkg; 7027 7028 return NULL; 7029 } 7030 7031 int add_cstate_perf_counter_(int cpu, struct cstate_counter_info_t *cci, const struct cstate_counter_arch_info *cai) 7032 { 7033 if (no_perf) 7034 return -1; 7035 7036 int *pfd_group = get_cstate_perf_group_fd(cci, cai->perf_subsys); 7037 7038 if (pfd_group == NULL) 7039 return -1; 7040 7041 const unsigned int type = read_perf_type(cai->perf_subsys); 7042 const unsigned int config = read_rapl_config(cai->perf_subsys, cai->perf_name); 7043 7044 const int fd_counter = open_perf_counter(cpu, type, config, *pfd_group, PERF_FORMAT_GROUP); 7045 7046 if (fd_counter == -1) 7047 return -1; 7048 7049 /* If it's the first counter opened, make it a group descriptor */ 7050 if (*pfd_group == -1) 7051 *pfd_group = fd_counter; 7052 7053 return fd_counter; 7054 } 7055 7056 int add_cstate_perf_counter(int cpu, struct cstate_counter_info_t *cci, const struct cstate_counter_arch_info *cai) 7057 { 7058 int ret = add_cstate_perf_counter_(cpu, cci, cai); 7059 7060 if (debug) 7061 fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu); 7062 7063 return ret; 7064 } 7065 7066 void cstate_perf_init_(bool soft_c1) 7067 { 7068 bool has_counter; 7069 bool *cores_visited = NULL, *pkg_visited = NULL; 7070 const int cores_visited_elems = topo.max_core_id + 1; 7071 const int pkg_visited_elems = topo.max_package_id + 1; 7072 const int cci_num = topo.max_cpu_num + 1; 7073 7074 ccstate_counter_info = calloc(cci_num, sizeof(*ccstate_counter_info)); 7075 if (!ccstate_counter_info) 7076 err(1, "calloc ccstate_counter_arch_info"); 7077 ccstate_counter_info_size = cci_num; 7078 7079 cores_visited = calloc(cores_visited_elems, sizeof(*cores_visited)); 7080 if (!cores_visited) 7081 err(1, "calloc cores_visited"); 7082 7083 pkg_visited = calloc(pkg_visited_elems, sizeof(*pkg_visited)); 7084 if (!pkg_visited) 7085 err(1, "calloc pkg_visited"); 7086 7087 /* Initialize cstate_counter_info_percpu */ 7088 for (int cpu = 0; cpu < cci_num; ++cpu) { 7089 ccstate_counter_info[cpu].fd_perf_core = -1; 7090 ccstate_counter_info[cpu].fd_perf_pkg = -1; 7091 } 7092 7093 for (int cidx = 0; cidx < NUM_CSTATE_COUNTERS; ++cidx) { 7094 has_counter = false; 7095 memset(cores_visited, 0, cores_visited_elems * sizeof(*cores_visited)); 7096 memset(pkg_visited, 0, pkg_visited_elems * sizeof(*pkg_visited)); 7097 7098 const struct cstate_counter_arch_info *cai = &ccstate_counter_arch_infos[cidx]; 7099 7100 for (int cpu = 0; cpu < cci_num; ++cpu) { 7101 7102 struct cstate_counter_info_t *const cci = &ccstate_counter_info[cpu]; 7103 7104 if (cpu_is_not_allowed(cpu)) 7105 continue; 7106 7107 const int core_id = cpus[cpu].physical_core_id; 7108 const int pkg_id = cpus[cpu].physical_package_id; 7109 7110 assert(core_id < cores_visited_elems); 7111 assert(pkg_id < pkg_visited_elems); 7112 7113 const bool per_thread = cai->flags & CSTATE_COUNTER_FLAG_COLLECT_PER_THREAD; 7114 const bool per_core = cai->flags & CSTATE_COUNTER_FLAG_COLLECT_PER_CORE; 7115 7116 if (!per_thread && cores_visited[core_id]) 7117 continue; 7118 7119 if (!per_core && pkg_visited[pkg_id]) 7120 continue; 7121 7122 const bool counter_needed = BIC_IS_ENABLED(cai->bic) || 7123 (soft_c1 && (cai->flags & CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY)); 7124 const bool counter_supported = (platform->supported_cstates & cai->feature_mask); 7125 7126 if (counter_needed && counter_supported) { 7127 /* Use perf API for this counter */ 7128 if (!no_perf && cai->perf_name && add_cstate_perf_counter(cpu, cci, cai) != -1) { 7129 7130 cci->source[cai->rci_index] = CSTATE_SOURCE_PERF; 7131 7132 /* User MSR for this counter */ 7133 } else if (!no_msr && cai->msr && pkg_cstate_limit >= cai->pkg_cstate_limit 7134 && probe_msr(cpu, cai->msr) == 0) { 7135 cci->source[cai->rci_index] = CSTATE_SOURCE_MSR; 7136 cci->msr[cai->rci_index] = cai->msr; 7137 } 7138 } 7139 7140 if (cci->source[cai->rci_index] != CSTATE_SOURCE_NONE) { 7141 has_counter = true; 7142 cores_visited[core_id] = true; 7143 pkg_visited[pkg_id] = true; 7144 } 7145 } 7146 7147 /* If any CPU has access to the counter, make it present */ 7148 if (has_counter) 7149 BIC_PRESENT(cai->bic); 7150 } 7151 7152 free(cores_visited); 7153 free(pkg_visited); 7154 } 7155 7156 void cstate_perf_init(void) 7157 { 7158 /* 7159 * If we don't have a C1 residency MSR, we calculate it "in software", 7160 * but we need APERF, MPERF too. 7161 */ 7162 const bool soft_c1 = !platform->has_msr_core_c1_res && has_amperf_access() 7163 && platform->supported_cstates & CC1; 7164 7165 if (soft_c1) 7166 BIC_PRESENT(BIC_CPU_c1); 7167 7168 cstate_perf_init_(soft_c1); 7169 } 7170 7171 void probe_cstates(void) 7172 { 7173 probe_cst_limit(); 7174 7175 if (platform->has_msr_module_c6_res_ms) 7176 BIC_PRESENT(BIC_Mod_c6); 7177 7178 if (platform->has_ext_cst_msrs && !no_msr) { 7179 BIC_PRESENT(BIC_Totl_c0); 7180 BIC_PRESENT(BIC_Any_c0); 7181 BIC_PRESENT(BIC_GFX_c0); 7182 BIC_PRESENT(BIC_CPUGFX); 7183 } 7184 7185 if (quiet) 7186 return; 7187 7188 dump_power_ctl(); 7189 dump_cst_cfg(); 7190 decode_c6_demotion_policy_msr(); 7191 print_dev_latency(); 7192 dump_sysfs_cstate_config(); 7193 print_irtl(); 7194 } 7195 7196 void probe_lpi(void) 7197 { 7198 if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK)) 7199 BIC_PRESENT(BIC_CPU_LPI); 7200 else 7201 BIC_NOT_PRESENT(BIC_CPU_LPI); 7202 7203 if (!access(sys_lpi_file_sysfs, R_OK)) { 7204 sys_lpi_file = sys_lpi_file_sysfs; 7205 BIC_PRESENT(BIC_SYS_LPI); 7206 } else if (!access(sys_lpi_file_debugfs, R_OK)) { 7207 sys_lpi_file = sys_lpi_file_debugfs; 7208 BIC_PRESENT(BIC_SYS_LPI); 7209 } else { 7210 sys_lpi_file_sysfs = NULL; 7211 BIC_NOT_PRESENT(BIC_SYS_LPI); 7212 } 7213 7214 } 7215 7216 void probe_pstates(void) 7217 { 7218 probe_bclk(); 7219 7220 if (quiet) 7221 return; 7222 7223 dump_platform_info(); 7224 dump_turbo_ratio_info(); 7225 dump_sysfs_pstate_config(); 7226 decode_misc_pwr_mgmt_msr(); 7227 7228 for_all_cpus(print_hwp, ODD_COUNTERS); 7229 for_all_cpus(print_epb, ODD_COUNTERS); 7230 for_all_cpus(print_perf_limit, ODD_COUNTERS); 7231 } 7232 7233 void process_cpuid() 7234 { 7235 unsigned int eax, ebx, ecx, edx; 7236 unsigned int fms, family, model, stepping, ecx_flags, edx_flags; 7237 unsigned long long ucode_patch = 0; 7238 bool ucode_patch_valid = false; 7239 7240 eax = ebx = ecx = edx = 0; 7241 7242 __cpuid(0, max_level, ebx, ecx, edx); 7243 7244 if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) 7245 genuine_intel = 1; 7246 else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) 7247 authentic_amd = 1; 7248 else if (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e) 7249 hygon_genuine = 1; 7250 7251 if (!quiet) 7252 fprintf(outf, "CPUID(0): %.4s%.4s%.4s 0x%x CPUID levels\n", 7253 (char *)&ebx, (char *)&edx, (char *)&ecx, max_level); 7254 7255 __cpuid(1, fms, ebx, ecx, edx); 7256 family = (fms >> 8) & 0xf; 7257 model = (fms >> 4) & 0xf; 7258 stepping = fms & 0xf; 7259 if (family == 0xf) 7260 family += (fms >> 20) & 0xff; 7261 if (family >= 6) 7262 model += ((fms >> 16) & 0xf) << 4; 7263 ecx_flags = ecx; 7264 edx_flags = edx; 7265 7266 if (!no_msr) { 7267 if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch)) 7268 warnx("get_msr(UCODE)"); 7269 else 7270 ucode_patch_valid = true; 7271 } 7272 7273 /* 7274 * check max extended function levels of CPUID. 7275 * This is needed to check for invariant TSC. 7276 * This check is valid for both Intel and AMD. 7277 */ 7278 ebx = ecx = edx = 0; 7279 __cpuid(0x80000000, max_extended_level, ebx, ecx, edx); 7280 7281 if (!quiet) { 7282 fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d)", 7283 family, model, stepping, family, model, stepping); 7284 if (ucode_patch_valid) 7285 fprintf(outf, " microcode 0x%x", (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF)); 7286 fputc('\n', outf); 7287 7288 fprintf(outf, "CPUID(0x80000000): max_extended_levels: 0x%x\n", max_extended_level); 7289 fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n", 7290 ecx_flags & (1 << 0) ? "SSE3" : "-", 7291 ecx_flags & (1 << 3) ? "MONITOR" : "-", 7292 ecx_flags & (1 << 6) ? "SMX" : "-", 7293 ecx_flags & (1 << 7) ? "EIST" : "-", 7294 ecx_flags & (1 << 8) ? "TM2" : "-", 7295 edx_flags & (1 << 4) ? "TSC" : "-", 7296 edx_flags & (1 << 5) ? "MSR" : "-", 7297 edx_flags & (1 << 22) ? "ACPI-TM" : "-", 7298 edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-"); 7299 } 7300 7301 probe_platform_features(family, model); 7302 7303 if (!(edx_flags & (1 << 5))) 7304 errx(1, "CPUID: no MSR"); 7305 7306 if (max_extended_level >= 0x80000007) { 7307 7308 /* 7309 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 7310 * this check is valid for both Intel and AMD 7311 */ 7312 __cpuid(0x80000007, eax, ebx, ecx, edx); 7313 has_invariant_tsc = edx & (1 << 8); 7314 } 7315 7316 /* 7317 * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0 7318 * this check is valid for both Intel and AMD 7319 */ 7320 7321 __cpuid(0x6, eax, ebx, ecx, edx); 7322 has_aperf = ecx & (1 << 0); 7323 if (has_aperf && has_amperf_access()) { 7324 BIC_PRESENT(BIC_Avg_MHz); 7325 BIC_PRESENT(BIC_Busy); 7326 BIC_PRESENT(BIC_Bzy_MHz); 7327 BIC_PRESENT(BIC_IPC); 7328 } 7329 do_dts = eax & (1 << 0); 7330 if (do_dts) 7331 BIC_PRESENT(BIC_CoreTmp); 7332 has_turbo = eax & (1 << 1); 7333 do_ptm = eax & (1 << 6); 7334 if (do_ptm) 7335 BIC_PRESENT(BIC_PkgTmp); 7336 has_hwp = eax & (1 << 7); 7337 has_hwp_notify = eax & (1 << 8); 7338 has_hwp_activity_window = eax & (1 << 9); 7339 has_hwp_epp = eax & (1 << 10); 7340 has_hwp_pkg = eax & (1 << 11); 7341 has_epb = ecx & (1 << 3); 7342 7343 if (!quiet) 7344 fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, " 7345 "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n", 7346 has_aperf ? "" : "No-", 7347 has_turbo ? "" : "No-", 7348 do_dts ? "" : "No-", 7349 do_ptm ? "" : "No-", 7350 has_hwp ? "" : "No-", 7351 has_hwp_notify ? "" : "No-", 7352 has_hwp_activity_window ? "" : "No-", 7353 has_hwp_epp ? "" : "No-", has_hwp_pkg ? "" : "No-", has_epb ? "" : "No-"); 7354 7355 if (!quiet) 7356 decode_misc_enable_msr(); 7357 7358 if (max_level >= 0x7 && !quiet) { 7359 int has_sgx; 7360 7361 ecx = 0; 7362 7363 __cpuid_count(0x7, 0, eax, ebx, ecx, edx); 7364 7365 has_sgx = ebx & (1 << 2); 7366 7367 is_hybrid = edx & (1 << 15); 7368 7369 fprintf(outf, "CPUID(7): %sSGX %sHybrid\n", has_sgx ? "" : "No-", is_hybrid ? "" : "No-"); 7370 7371 if (has_sgx) 7372 decode_feature_control_msr(); 7373 } 7374 7375 if (max_level >= 0x15) { 7376 unsigned int eax_crystal; 7377 unsigned int ebx_tsc; 7378 7379 /* 7380 * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz 7381 */ 7382 eax_crystal = ebx_tsc = crystal_hz = edx = 0; 7383 __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx); 7384 7385 if (ebx_tsc != 0) { 7386 if (!quiet && (ebx != 0)) 7387 fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", 7388 eax_crystal, ebx_tsc, crystal_hz); 7389 7390 if (crystal_hz == 0) 7391 crystal_hz = platform->crystal_freq; 7392 7393 if (crystal_hz) { 7394 tsc_hz = (unsigned long long)crystal_hz *ebx_tsc / eax_crystal; 7395 if (!quiet) 7396 fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n", 7397 tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal); 7398 } 7399 } 7400 } 7401 if (max_level >= 0x16) { 7402 unsigned int base_mhz, max_mhz, bus_mhz, edx; 7403 7404 /* 7405 * CPUID 16H Base MHz, Max MHz, Bus MHz 7406 */ 7407 base_mhz = max_mhz = bus_mhz = edx = 0; 7408 7409 __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx); 7410 7411 bclk = bus_mhz; 7412 7413 base_hz = base_mhz * 1000000; 7414 has_base_hz = 1; 7415 7416 if (platform->enable_tsc_tweak) 7417 tsc_tweak = base_hz / tsc_hz; 7418 7419 if (!quiet) 7420 fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n", 7421 base_mhz, max_mhz, bus_mhz); 7422 } 7423 7424 if (has_aperf) 7425 aperf_mperf_multiplier = platform->need_perf_multiplier ? 1024 : 1; 7426 7427 BIC_PRESENT(BIC_IRQ); 7428 BIC_PRESENT(BIC_TSC_MHz); 7429 } 7430 7431 static void counter_info_init(void) 7432 { 7433 for (int i = 0; i < NUM_CSTATE_COUNTERS; ++i) { 7434 struct cstate_counter_arch_info *const cai = &ccstate_counter_arch_infos[i]; 7435 7436 if (platform->has_msr_knl_core_c6_residency && cai->msr == MSR_CORE_C6_RESIDENCY) 7437 cai->msr = MSR_KNL_CORE_C6_RESIDENCY; 7438 7439 if (!platform->has_msr_core_c1_res && cai->msr == MSR_CORE_C1_RES) 7440 cai->msr = 0; 7441 7442 if (platform->has_msr_atom_pkg_c6_residency && cai->msr == MSR_PKG_C6_RESIDENCY) 7443 cai->msr = MSR_ATOM_PKG_C6_RESIDENCY; 7444 } 7445 } 7446 7447 void probe_pm_features(void) 7448 { 7449 probe_pstates(); 7450 7451 probe_cstates(); 7452 7453 probe_lpi(); 7454 7455 probe_intel_uncore_frequency(); 7456 7457 probe_graphics(); 7458 7459 probe_rapl(); 7460 7461 probe_thermal(); 7462 7463 if (platform->has_nhm_msrs && !no_msr) 7464 BIC_PRESENT(BIC_SMI); 7465 7466 if (!quiet) 7467 decode_misc_feature_control(); 7468 } 7469 7470 /* 7471 * in /dev/cpu/ return success for names that are numbers 7472 * ie. filter out ".", "..", "microcode". 7473 */ 7474 int dir_filter(const struct dirent *dirp) 7475 { 7476 if (isdigit(dirp->d_name[0])) 7477 return 1; 7478 else 7479 return 0; 7480 } 7481 7482 void topology_probe(bool startup) 7483 { 7484 int i; 7485 int max_core_id = 0; 7486 int max_package_id = 0; 7487 int max_siblings = 0; 7488 7489 /* Initialize num_cpus, max_cpu_num */ 7490 set_max_cpu_num(); 7491 topo.num_cpus = 0; 7492 for_all_proc_cpus(count_cpus); 7493 if (!summary_only && topo.num_cpus > 1) 7494 BIC_PRESENT(BIC_CPU); 7495 7496 if (debug > 1) 7497 fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); 7498 7499 cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology)); 7500 if (cpus == NULL) 7501 err(1, "calloc cpus"); 7502 7503 /* 7504 * Allocate and initialize cpu_present_set 7505 */ 7506 cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1)); 7507 if (cpu_present_set == NULL) 7508 err(3, "CPU_ALLOC"); 7509 cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 7510 CPU_ZERO_S(cpu_present_setsize, cpu_present_set); 7511 for_all_proc_cpus(mark_cpu_present); 7512 7513 /* 7514 * Allocate and initialize cpu_effective_set 7515 */ 7516 cpu_effective_set = CPU_ALLOC((topo.max_cpu_num + 1)); 7517 if (cpu_effective_set == NULL) 7518 err(3, "CPU_ALLOC"); 7519 cpu_effective_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 7520 CPU_ZERO_S(cpu_effective_setsize, cpu_effective_set); 7521 update_effective_set(startup); 7522 7523 /* 7524 * Allocate and initialize cpu_allowed_set 7525 */ 7526 cpu_allowed_set = CPU_ALLOC((topo.max_cpu_num + 1)); 7527 if (cpu_allowed_set == NULL) 7528 err(3, "CPU_ALLOC"); 7529 cpu_allowed_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 7530 CPU_ZERO_S(cpu_allowed_setsize, cpu_allowed_set); 7531 7532 /* 7533 * Validate and update cpu_allowed_set. 7534 * 7535 * Make sure all cpus in cpu_subset are also in cpu_present_set during startup. 7536 * Give a warning when cpus in cpu_subset become unavailable at runtime. 7537 * Give a warning when cpus are not effective because of cgroup setting. 7538 * 7539 * cpu_allowed_set is the intersection of cpu_present_set/cpu_effective_set/cpu_subset. 7540 */ 7541 for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) { 7542 if (cpu_subset && !CPU_ISSET_S(i, cpu_subset_size, cpu_subset)) 7543 continue; 7544 7545 if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set)) { 7546 if (cpu_subset) { 7547 /* cpus in cpu_subset must be in cpu_present_set during startup */ 7548 if (startup) 7549 err(1, "cpu%d not present", i); 7550 else 7551 fprintf(stderr, "cpu%d not present\n", i); 7552 } 7553 continue; 7554 } 7555 7556 if (CPU_COUNT_S(cpu_effective_setsize, cpu_effective_set)) { 7557 if (!CPU_ISSET_S(i, cpu_effective_setsize, cpu_effective_set)) { 7558 fprintf(stderr, "cpu%d not effective\n", i); 7559 continue; 7560 } 7561 } 7562 7563 CPU_SET_S(i, cpu_allowed_setsize, cpu_allowed_set); 7564 } 7565 7566 if (!CPU_COUNT_S(cpu_allowed_setsize, cpu_allowed_set)) 7567 err(-ENODEV, "No valid cpus found"); 7568 sched_setaffinity(0, cpu_allowed_setsize, cpu_allowed_set); 7569 7570 /* 7571 * Allocate and initialize cpu_affinity_set 7572 */ 7573 cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1)); 7574 if (cpu_affinity_set == NULL) 7575 err(3, "CPU_ALLOC"); 7576 cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 7577 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); 7578 7579 for_all_proc_cpus(init_thread_id); 7580 7581 /* 7582 * For online cpus 7583 * find max_core_id, max_package_id 7584 */ 7585 for (i = 0; i <= topo.max_cpu_num; ++i) { 7586 int siblings; 7587 7588 if (cpu_is_not_present(i)) { 7589 if (debug > 1) 7590 fprintf(outf, "cpu%d NOT PRESENT\n", i); 7591 continue; 7592 } 7593 7594 cpus[i].logical_cpu_id = i; 7595 7596 /* get package information */ 7597 cpus[i].physical_package_id = get_physical_package_id(i); 7598 if (cpus[i].physical_package_id > max_package_id) 7599 max_package_id = cpus[i].physical_package_id; 7600 7601 /* get die information */ 7602 cpus[i].die_id = get_die_id(i); 7603 if (cpus[i].die_id > topo.max_die_id) 7604 topo.max_die_id = cpus[i].die_id; 7605 7606 /* get numa node information */ 7607 cpus[i].physical_node_id = get_physical_node_id(&cpus[i]); 7608 if (cpus[i].physical_node_id > topo.max_node_num) 7609 topo.max_node_num = cpus[i].physical_node_id; 7610 7611 /* get core information */ 7612 cpus[i].physical_core_id = get_core_id(i); 7613 if (cpus[i].physical_core_id > max_core_id) 7614 max_core_id = cpus[i].physical_core_id; 7615 7616 /* get thread information */ 7617 siblings = get_thread_siblings(&cpus[i]); 7618 if (siblings > max_siblings) 7619 max_siblings = siblings; 7620 if (cpus[i].thread_id == 0) 7621 topo.num_cores++; 7622 } 7623 topo.max_core_id = max_core_id; 7624 topo.max_package_id = max_package_id; 7625 7626 topo.cores_per_node = max_core_id + 1; 7627 if (debug > 1) 7628 fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", max_core_id, topo.cores_per_node); 7629 if (!summary_only && topo.cores_per_node > 1) 7630 BIC_PRESENT(BIC_Core); 7631 7632 topo.num_die = topo.max_die_id + 1; 7633 if (debug > 1) 7634 fprintf(outf, "max_die_id %d, sizing for %d die\n", topo.max_die_id, topo.num_die); 7635 if (!summary_only && topo.num_die > 1) 7636 BIC_PRESENT(BIC_Die); 7637 7638 topo.num_packages = max_package_id + 1; 7639 if (debug > 1) 7640 fprintf(outf, "max_package_id %d, sizing for %d packages\n", max_package_id, topo.num_packages); 7641 if (!summary_only && topo.num_packages > 1) 7642 BIC_PRESENT(BIC_Package); 7643 7644 set_node_data(); 7645 if (debug > 1) 7646 fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg); 7647 if (!summary_only && topo.nodes_per_pkg > 1) 7648 BIC_PRESENT(BIC_Node); 7649 7650 topo.threads_per_core = max_siblings; 7651 if (debug > 1) 7652 fprintf(outf, "max_siblings %d\n", max_siblings); 7653 7654 if (debug < 1) 7655 return; 7656 7657 for (i = 0; i <= topo.max_cpu_num; ++i) { 7658 if (cpu_is_not_present(i)) 7659 continue; 7660 fprintf(outf, 7661 "cpu %d pkg %d die %d node %d lnode %d core %d thread %d\n", 7662 i, cpus[i].physical_package_id, cpus[i].die_id, 7663 cpus[i].physical_node_id, cpus[i].logical_node_id, cpus[i].physical_core_id, cpus[i].thread_id); 7664 } 7665 7666 } 7667 7668 void allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p) 7669 { 7670 int i; 7671 int num_cores = topo.cores_per_node * topo.nodes_per_pkg * topo.num_packages; 7672 int num_threads = topo.threads_per_core * num_cores; 7673 7674 *t = calloc(num_threads, sizeof(struct thread_data)); 7675 if (*t == NULL) 7676 goto error; 7677 7678 for (i = 0; i < num_threads; i++) 7679 (*t)[i].cpu_id = -1; 7680 7681 *c = calloc(num_cores, sizeof(struct core_data)); 7682 if (*c == NULL) 7683 goto error; 7684 7685 for (i = 0; i < num_cores; i++) { 7686 (*c)[i].core_id = -1; 7687 (*c)[i].base_cpu = -1; 7688 } 7689 7690 *p = calloc(topo.num_packages, sizeof(struct pkg_data)); 7691 if (*p == NULL) 7692 goto error; 7693 7694 for (i = 0; i < topo.num_packages; i++) { 7695 (*p)[i].package_id = i; 7696 (*p)[i].base_cpu = -1; 7697 } 7698 7699 return; 7700 error: 7701 err(1, "calloc counters"); 7702 } 7703 7704 /* 7705 * init_counter() 7706 * 7707 * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE 7708 */ 7709 void init_counter(struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base, int cpu_id) 7710 { 7711 int pkg_id = cpus[cpu_id].physical_package_id; 7712 int node_id = cpus[cpu_id].logical_node_id; 7713 int core_id = cpus[cpu_id].physical_core_id; 7714 int thread_id = cpus[cpu_id].thread_id; 7715 struct thread_data *t; 7716 struct core_data *c; 7717 struct pkg_data *p; 7718 7719 /* Workaround for systems where physical_node_id==-1 7720 * and logical_node_id==(-1 - topo.num_cpus) 7721 */ 7722 if (node_id < 0) 7723 node_id = 0; 7724 7725 t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id); 7726 c = GET_CORE(core_base, core_id, node_id, pkg_id); 7727 p = GET_PKG(pkg_base, pkg_id); 7728 7729 t->cpu_id = cpu_id; 7730 if (!cpu_is_not_allowed(cpu_id)) { 7731 if (c->base_cpu < 0) 7732 c->base_cpu = t->cpu_id; 7733 if (p->base_cpu < 0) 7734 p->base_cpu = t->cpu_id; 7735 } 7736 7737 c->core_id = core_id; 7738 p->package_id = pkg_id; 7739 } 7740 7741 int initialize_counters(int cpu_id) 7742 { 7743 init_counter(EVEN_COUNTERS, cpu_id); 7744 init_counter(ODD_COUNTERS, cpu_id); 7745 return 0; 7746 } 7747 7748 void allocate_output_buffer() 7749 { 7750 output_buffer = calloc(1, (1 + topo.num_cpus) * 2048); 7751 outp = output_buffer; 7752 if (outp == NULL) 7753 err(-1, "calloc output buffer"); 7754 } 7755 7756 void allocate_fd_percpu(void) 7757 { 7758 fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int)); 7759 if (fd_percpu == NULL) 7760 err(-1, "calloc fd_percpu"); 7761 } 7762 7763 void allocate_irq_buffers(void) 7764 { 7765 irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int)); 7766 if (irq_column_2_cpu == NULL) 7767 err(-1, "calloc %d", topo.num_cpus); 7768 7769 irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int)); 7770 if (irqs_per_cpu == NULL) 7771 err(-1, "calloc %d", topo.max_cpu_num + 1); 7772 } 7773 7774 int update_topo(struct thread_data *t, struct core_data *c, struct pkg_data *p) 7775 { 7776 topo.allowed_cpus++; 7777 if ((int)t->cpu_id == c->base_cpu) 7778 topo.allowed_cores++; 7779 if ((int)t->cpu_id == p->base_cpu) 7780 topo.allowed_packages++; 7781 7782 return 0; 7783 } 7784 7785 void topology_update(void) 7786 { 7787 topo.allowed_cpus = 0; 7788 topo.allowed_cores = 0; 7789 topo.allowed_packages = 0; 7790 for_all_cpus(update_topo, ODD_COUNTERS); 7791 } 7792 7793 void setup_all_buffers(bool startup) 7794 { 7795 topology_probe(startup); 7796 allocate_irq_buffers(); 7797 allocate_fd_percpu(); 7798 allocate_counters(&thread_even, &core_even, &package_even); 7799 allocate_counters(&thread_odd, &core_odd, &package_odd); 7800 allocate_output_buffer(); 7801 for_all_proc_cpus(initialize_counters); 7802 topology_update(); 7803 } 7804 7805 void set_base_cpu(void) 7806 { 7807 int i; 7808 7809 for (i = 0; i < topo.max_cpu_num + 1; ++i) { 7810 if (cpu_is_not_allowed(i)) 7811 continue; 7812 base_cpu = i; 7813 if (debug > 1) 7814 fprintf(outf, "base_cpu = %d\n", base_cpu); 7815 return; 7816 } 7817 err(-ENODEV, "No valid cpus found"); 7818 } 7819 7820 static void set_amperf_source(void) 7821 { 7822 amperf_source = AMPERF_SOURCE_PERF; 7823 7824 const bool aperf_required = is_aperf_access_required(); 7825 7826 if (no_perf || !aperf_required || !has_amperf_access_via_perf()) 7827 amperf_source = AMPERF_SOURCE_MSR; 7828 7829 if (quiet || !debug) 7830 return; 7831 7832 fprintf(outf, "aperf/mperf source preference: %s\n", amperf_source == AMPERF_SOURCE_MSR ? "msr" : "perf"); 7833 } 7834 7835 bool has_added_counters(void) 7836 { 7837 /* 7838 * It only makes sense to call this after the command line is parsed, 7839 * otherwise sys structure is not populated. 7840 */ 7841 7842 return sys.added_core_counters | sys.added_thread_counters | sys.added_package_counters; 7843 } 7844 7845 bool is_msr_access_required(void) 7846 { 7847 if (no_msr) 7848 return false; 7849 7850 if (has_added_counters()) 7851 return true; 7852 7853 return BIC_IS_ENABLED(BIC_SMI) 7854 || BIC_IS_ENABLED(BIC_CPU_c1) 7855 || BIC_IS_ENABLED(BIC_CPU_c3) 7856 || BIC_IS_ENABLED(BIC_CPU_c6) 7857 || BIC_IS_ENABLED(BIC_CPU_c7) 7858 || BIC_IS_ENABLED(BIC_Mod_c6) 7859 || BIC_IS_ENABLED(BIC_CoreTmp) 7860 || BIC_IS_ENABLED(BIC_Totl_c0) 7861 || BIC_IS_ENABLED(BIC_Any_c0) 7862 || BIC_IS_ENABLED(BIC_GFX_c0) 7863 || BIC_IS_ENABLED(BIC_CPUGFX) 7864 || BIC_IS_ENABLED(BIC_Pkgpc3) 7865 || BIC_IS_ENABLED(BIC_Pkgpc6) 7866 || BIC_IS_ENABLED(BIC_Pkgpc2) 7867 || BIC_IS_ENABLED(BIC_Pkgpc7) 7868 || BIC_IS_ENABLED(BIC_Pkgpc8) 7869 || BIC_IS_ENABLED(BIC_Pkgpc9) 7870 || BIC_IS_ENABLED(BIC_Pkgpc10) 7871 /* TODO: Multiplex access with perf */ 7872 || BIC_IS_ENABLED(BIC_CorWatt) 7873 || BIC_IS_ENABLED(BIC_Cor_J) 7874 || BIC_IS_ENABLED(BIC_PkgWatt) 7875 || BIC_IS_ENABLED(BIC_CorWatt) 7876 || BIC_IS_ENABLED(BIC_GFXWatt) 7877 || BIC_IS_ENABLED(BIC_RAMWatt) 7878 || BIC_IS_ENABLED(BIC_Pkg_J) 7879 || BIC_IS_ENABLED(BIC_Cor_J) 7880 || BIC_IS_ENABLED(BIC_GFX_J) 7881 || BIC_IS_ENABLED(BIC_RAM_J) 7882 || BIC_IS_ENABLED(BIC_PKG__) 7883 || BIC_IS_ENABLED(BIC_RAM__) 7884 || BIC_IS_ENABLED(BIC_PkgTmp) 7885 || (is_aperf_access_required() && !has_amperf_access_via_perf()); 7886 } 7887 7888 void check_msr_access(void) 7889 { 7890 if (!is_msr_access_required()) 7891 no_msr = 1; 7892 7893 check_dev_msr(); 7894 check_msr_permission(); 7895 7896 if (no_msr) 7897 bic_disable_msr_access(); 7898 } 7899 7900 void check_perf_access(void) 7901 { 7902 const bool intrcount_required = BIC_IS_ENABLED(BIC_IPC); 7903 7904 if (no_perf || !intrcount_required || !has_instr_count_access()) 7905 bic_enabled &= ~BIC_IPC; 7906 7907 const bool aperf_required = is_aperf_access_required(); 7908 7909 if (!aperf_required || !has_amperf_access()) { 7910 bic_enabled &= ~BIC_Avg_MHz; 7911 bic_enabled &= ~BIC_Busy; 7912 bic_enabled &= ~BIC_Bzy_MHz; 7913 bic_enabled &= ~BIC_IPC; 7914 } 7915 } 7916 7917 void turbostat_init() 7918 { 7919 setup_all_buffers(true); 7920 set_base_cpu(); 7921 check_msr_access(); 7922 check_perf_access(); 7923 process_cpuid(); 7924 counter_info_init(); 7925 probe_pm_features(); 7926 set_amperf_source(); 7927 linux_perf_init(); 7928 rapl_perf_init(); 7929 cstate_perf_init(); 7930 7931 for_all_cpus(get_cpu_type, ODD_COUNTERS); 7932 for_all_cpus(get_cpu_type, EVEN_COUNTERS); 7933 7934 if (DO_BIC(BIC_IPC)) 7935 (void)get_instr_count_fd(base_cpu); 7936 7937 /* 7938 * If TSC tweak is needed, but couldn't get it, 7939 * disable more BICs, since it can't be reported accurately. 7940 */ 7941 if (platform->enable_tsc_tweak && !has_base_hz) { 7942 bic_enabled &= ~BIC_Busy; 7943 bic_enabled &= ~BIC_Bzy_MHz; 7944 } 7945 } 7946 7947 int fork_it(char **argv) 7948 { 7949 pid_t child_pid; 7950 int status; 7951 7952 snapshot_proc_sysfs_files(); 7953 status = for_all_cpus(get_counters, EVEN_COUNTERS); 7954 first_counter_read = 0; 7955 if (status) 7956 exit(status); 7957 gettimeofday(&tv_even, (struct timezone *)NULL); 7958 7959 child_pid = fork(); 7960 if (!child_pid) { 7961 /* child */ 7962 execvp(argv[0], argv); 7963 err(errno, "exec %s", argv[0]); 7964 } else { 7965 7966 /* parent */ 7967 if (child_pid == -1) 7968 err(1, "fork"); 7969 7970 signal(SIGINT, SIG_IGN); 7971 signal(SIGQUIT, SIG_IGN); 7972 if (waitpid(child_pid, &status, 0) == -1) 7973 err(status, "waitpid"); 7974 7975 if (WIFEXITED(status)) 7976 status = WEXITSTATUS(status); 7977 } 7978 /* 7979 * n.b. fork_it() does not check for errors from for_all_cpus() 7980 * because re-starting is problematic when forking 7981 */ 7982 snapshot_proc_sysfs_files(); 7983 for_all_cpus(get_counters, ODD_COUNTERS); 7984 gettimeofday(&tv_odd, (struct timezone *)NULL); 7985 timersub(&tv_odd, &tv_even, &tv_delta); 7986 if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) 7987 fprintf(outf, "%s: Counter reset detected\n", progname); 7988 else { 7989 compute_average(EVEN_COUNTERS); 7990 format_all_counters(EVEN_COUNTERS); 7991 } 7992 7993 fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec / 1000000.0); 7994 7995 flush_output_stderr(); 7996 7997 return status; 7998 } 7999 8000 int get_and_dump_counters(void) 8001 { 8002 int status; 8003 8004 snapshot_proc_sysfs_files(); 8005 status = for_all_cpus(get_counters, ODD_COUNTERS); 8006 if (status) 8007 return status; 8008 8009 status = for_all_cpus(dump_counters, ODD_COUNTERS); 8010 if (status) 8011 return status; 8012 8013 flush_output_stdout(); 8014 8015 return status; 8016 } 8017 8018 void print_version() 8019 { 8020 fprintf(outf, "turbostat version 2024.05.10 - Len Brown <lenb@kernel.org>\n"); 8021 } 8022 8023 #define COMMAND_LINE_SIZE 2048 8024 8025 void print_bootcmd(void) 8026 { 8027 char bootcmd[COMMAND_LINE_SIZE]; 8028 FILE *fp; 8029 int ret; 8030 8031 memset(bootcmd, 0, COMMAND_LINE_SIZE); 8032 fp = fopen("/proc/cmdline", "r"); 8033 if (!fp) 8034 return; 8035 8036 ret = fread(bootcmd, sizeof(char), COMMAND_LINE_SIZE - 1, fp); 8037 if (ret) { 8038 bootcmd[ret] = '\0'; 8039 /* the last character is already '\n' */ 8040 fprintf(outf, "Kernel command line: %s", bootcmd); 8041 } 8042 8043 fclose(fp); 8044 } 8045 8046 struct msr_counter *find_msrp_by_name(struct msr_counter *head, char *name) 8047 { 8048 struct msr_counter *mp; 8049 8050 for (mp = head; mp; mp = mp->next) { 8051 if (debug) 8052 printf("%s: %s %s\n", __func__, name, mp->name); 8053 if (!strncmp(name, mp->name, strlen(mp->name))) 8054 return mp; 8055 } 8056 return NULL; 8057 } 8058 8059 int add_counter(unsigned int msr_num, char *path, char *name, 8060 unsigned int width, enum counter_scope scope, 8061 enum counter_type type, enum counter_format format, int flags, int id) 8062 { 8063 struct msr_counter *msrp; 8064 8065 if (no_msr && msr_num) 8066 errx(1, "Requested MSR counter 0x%x, but in --no-msr mode", msr_num); 8067 8068 if (debug) 8069 printf("%s(msr%d, %s, %s, width%d, scope%d, type%d, format%d, flags%x, id%d)\n", __func__, msr_num, 8070 path, name, width, scope, type, format, flags, id); 8071 8072 switch (scope) { 8073 8074 case SCOPE_CPU: 8075 msrp = find_msrp_by_name(sys.tp, name); 8076 if (msrp) { 8077 if (debug) 8078 printf("%s: %s FOUND\n", __func__, name); 8079 break; 8080 } 8081 if (sys.added_thread_counters++ >= MAX_ADDED_THREAD_COUNTERS) { 8082 warnx("ignoring thread counter %s", name); 8083 return -1; 8084 } 8085 break; 8086 case SCOPE_CORE: 8087 msrp = find_msrp_by_name(sys.cp, name); 8088 if (msrp) { 8089 if (debug) 8090 printf("%s: %s FOUND\n", __func__, name); 8091 break; 8092 } 8093 if (sys.added_core_counters++ >= MAX_ADDED_CORE_COUNTERS) { 8094 warnx("ignoring core counter %s", name); 8095 return -1; 8096 } 8097 break; 8098 case SCOPE_PACKAGE: 8099 msrp = find_msrp_by_name(sys.pp, name); 8100 if (msrp) { 8101 if (debug) 8102 printf("%s: %s FOUND\n", __func__, name); 8103 break; 8104 } 8105 if (sys.added_package_counters++ >= MAX_ADDED_PACKAGE_COUNTERS) { 8106 warnx("ignoring package counter %s", name); 8107 return -1; 8108 } 8109 break; 8110 default: 8111 warnx("ignoring counter %s with unknown scope", name); 8112 return -1; 8113 } 8114 8115 if (msrp == NULL) { 8116 msrp = calloc(1, sizeof(struct msr_counter)); 8117 if (msrp == NULL) 8118 err(-1, "calloc msr_counter"); 8119 msrp->msr_num = msr_num; 8120 strncpy(msrp->name, name, NAME_BYTES - 1); 8121 msrp->width = width; 8122 msrp->type = type; 8123 msrp->format = format; 8124 msrp->flags = flags; 8125 8126 switch (scope) { 8127 case SCOPE_CPU: 8128 msrp->next = sys.tp; 8129 sys.tp = msrp; 8130 break; 8131 case SCOPE_CORE: 8132 msrp->next = sys.cp; 8133 sys.cp = msrp; 8134 break; 8135 case SCOPE_PACKAGE: 8136 msrp->next = sys.pp; 8137 sys.pp = msrp; 8138 break; 8139 } 8140 } 8141 8142 if (path) { 8143 struct sysfs_path *sp; 8144 8145 sp = calloc(1, sizeof(struct sysfs_path)); 8146 if (sp == NULL) { 8147 perror("calloc"); 8148 exit(1); 8149 } 8150 strncpy(sp->path, path, PATH_BYTES - 1); 8151 sp->id = id; 8152 sp->next = msrp->sp; 8153 msrp->sp = sp; 8154 } 8155 8156 return 0; 8157 } 8158 8159 void parse_add_command(char *add_command) 8160 { 8161 int msr_num = 0; 8162 char *path = NULL; 8163 char name_buffer[NAME_BYTES] = ""; 8164 int width = 64; 8165 int fail = 0; 8166 enum counter_scope scope = SCOPE_CPU; 8167 enum counter_type type = COUNTER_CYCLES; 8168 enum counter_format format = FORMAT_DELTA; 8169 8170 while (add_command) { 8171 8172 if (sscanf(add_command, "msr0x%x", &msr_num) == 1) 8173 goto next; 8174 8175 if (sscanf(add_command, "msr%d", &msr_num) == 1) 8176 goto next; 8177 8178 if (*add_command == '/') { 8179 path = add_command; 8180 goto next; 8181 } 8182 8183 if (sscanf(add_command, "u%d", &width) == 1) { 8184 if ((width == 32) || (width == 64)) 8185 goto next; 8186 width = 64; 8187 } 8188 if (!strncmp(add_command, "cpu", strlen("cpu"))) { 8189 scope = SCOPE_CPU; 8190 goto next; 8191 } 8192 if (!strncmp(add_command, "core", strlen("core"))) { 8193 scope = SCOPE_CORE; 8194 goto next; 8195 } 8196 if (!strncmp(add_command, "package", strlen("package"))) { 8197 scope = SCOPE_PACKAGE; 8198 goto next; 8199 } 8200 if (!strncmp(add_command, "cycles", strlen("cycles"))) { 8201 type = COUNTER_CYCLES; 8202 goto next; 8203 } 8204 if (!strncmp(add_command, "seconds", strlen("seconds"))) { 8205 type = COUNTER_SECONDS; 8206 goto next; 8207 } 8208 if (!strncmp(add_command, "usec", strlen("usec"))) { 8209 type = COUNTER_USEC; 8210 goto next; 8211 } 8212 if (!strncmp(add_command, "raw", strlen("raw"))) { 8213 format = FORMAT_RAW; 8214 goto next; 8215 } 8216 if (!strncmp(add_command, "delta", strlen("delta"))) { 8217 format = FORMAT_DELTA; 8218 goto next; 8219 } 8220 if (!strncmp(add_command, "percent", strlen("percent"))) { 8221 format = FORMAT_PERCENT; 8222 goto next; 8223 } 8224 8225 if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) { /* 18 < NAME_BYTES */ 8226 char *eos; 8227 8228 eos = strchr(name_buffer, ','); 8229 if (eos) 8230 *eos = '\0'; 8231 goto next; 8232 } 8233 8234 next: 8235 add_command = strchr(add_command, ','); 8236 if (add_command) { 8237 *add_command = '\0'; 8238 add_command++; 8239 } 8240 8241 } 8242 if ((msr_num == 0) && (path == NULL)) { 8243 fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter ) required\n"); 8244 fail++; 8245 } 8246 8247 /* generate default column header */ 8248 if (*name_buffer == '\0') { 8249 if (width == 32) 8250 sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); 8251 else 8252 sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); 8253 } 8254 8255 if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0, 0)) 8256 fail++; 8257 8258 if (fail) { 8259 help(); 8260 exit(1); 8261 } 8262 } 8263 8264 int is_deferred_add(char *name) 8265 { 8266 int i; 8267 8268 for (i = 0; i < deferred_add_index; ++i) 8269 if (!strcmp(name, deferred_add_names[i])) 8270 return 1; 8271 return 0; 8272 } 8273 8274 int is_deferred_skip(char *name) 8275 { 8276 int i; 8277 8278 for (i = 0; i < deferred_skip_index; ++i) 8279 if (!strcmp(name, deferred_skip_names[i])) 8280 return 1; 8281 return 0; 8282 } 8283 8284 void probe_sysfs(void) 8285 { 8286 char path[64]; 8287 char name_buf[16]; 8288 FILE *input; 8289 int state; 8290 char *sp; 8291 8292 for (state = 10; state >= 0; --state) { 8293 8294 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); 8295 input = fopen(path, "r"); 8296 if (input == NULL) 8297 continue; 8298 if (!fgets(name_buf, sizeof(name_buf), input)) 8299 err(1, "%s: failed to read file", path); 8300 8301 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ 8302 sp = strchr(name_buf, '-'); 8303 if (!sp) 8304 sp = strchrnul(name_buf, '\n'); 8305 *sp = '%'; 8306 *(sp + 1) = '\0'; 8307 8308 remove_underbar(name_buf); 8309 8310 fclose(input); 8311 8312 sprintf(path, "cpuidle/state%d/time", state); 8313 8314 if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf)) 8315 continue; 8316 8317 if (is_deferred_skip(name_buf)) 8318 continue; 8319 8320 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU, 0); 8321 } 8322 8323 for (state = 10; state >= 0; --state) { 8324 8325 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); 8326 input = fopen(path, "r"); 8327 if (input == NULL) 8328 continue; 8329 if (!fgets(name_buf, sizeof(name_buf), input)) 8330 err(1, "%s: failed to read file", path); 8331 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ 8332 sp = strchr(name_buf, '-'); 8333 if (!sp) 8334 sp = strchrnul(name_buf, '\n'); 8335 *sp = '\0'; 8336 fclose(input); 8337 8338 remove_underbar(name_buf); 8339 8340 sprintf(path, "cpuidle/state%d/usage", state); 8341 8342 if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf)) 8343 continue; 8344 8345 if (is_deferred_skip(name_buf)) 8346 continue; 8347 8348 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0); 8349 } 8350 8351 } 8352 8353 /* 8354 * parse cpuset with following syntax 8355 * 1,2,4..6,8-10 and set bits in cpu_subset 8356 */ 8357 void parse_cpu_command(char *optarg) 8358 { 8359 if (!strcmp(optarg, "core")) { 8360 if (cpu_subset) 8361 goto error; 8362 show_core_only++; 8363 return; 8364 } 8365 if (!strcmp(optarg, "package")) { 8366 if (cpu_subset) 8367 goto error; 8368 show_pkg_only++; 8369 return; 8370 } 8371 if (show_core_only || show_pkg_only) 8372 goto error; 8373 8374 cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS); 8375 if (cpu_subset == NULL) 8376 err(3, "CPU_ALLOC"); 8377 cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS); 8378 8379 CPU_ZERO_S(cpu_subset_size, cpu_subset); 8380 8381 if (parse_cpu_str(optarg, cpu_subset, cpu_subset_size)) 8382 goto error; 8383 8384 return; 8385 8386 error: 8387 fprintf(stderr, "\"--cpu %s\" malformed\n", optarg); 8388 help(); 8389 exit(-1); 8390 } 8391 8392 void cmdline(int argc, char **argv) 8393 { 8394 int opt; 8395 int option_index = 0; 8396 static struct option long_options[] = { 8397 { "add", required_argument, 0, 'a' }, 8398 { "cpu", required_argument, 0, 'c' }, 8399 { "Dump", no_argument, 0, 'D' }, 8400 { "debug", no_argument, 0, 'd' }, /* internal, not documented */ 8401 { "enable", required_argument, 0, 'e' }, 8402 { "interval", required_argument, 0, 'i' }, 8403 { "IPC", no_argument, 0, 'I' }, 8404 { "num_iterations", required_argument, 0, 'n' }, 8405 { "header_iterations", required_argument, 0, 'N' }, 8406 { "help", no_argument, 0, 'h' }, 8407 { "hide", required_argument, 0, 'H' }, // meh, -h taken by --help 8408 { "Joules", no_argument, 0, 'J' }, 8409 { "list", no_argument, 0, 'l' }, 8410 { "out", required_argument, 0, 'o' }, 8411 { "quiet", no_argument, 0, 'q' }, 8412 { "no-msr", no_argument, 0, 'M' }, 8413 { "no-perf", no_argument, 0, 'P' }, 8414 { "show", required_argument, 0, 's' }, 8415 { "Summary", no_argument, 0, 'S' }, 8416 { "TCC", required_argument, 0, 'T' }, 8417 { "version", no_argument, 0, 'v' }, 8418 { 0, 0, 0, 0 } 8419 }; 8420 8421 progname = argv[0]; 8422 8423 /* 8424 * Parse some options early, because they may make other options invalid, 8425 * like adding the MSR counter with --add and at the same time using --no-msr. 8426 */ 8427 while ((opt = getopt_long_only(argc, argv, "MP", long_options, &option_index)) != -1) { 8428 switch (opt) { 8429 case 'M': 8430 no_msr = 1; 8431 break; 8432 case 'P': 8433 no_perf = 1; 8434 break; 8435 default: 8436 break; 8437 } 8438 } 8439 optind = 0; 8440 8441 while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qMST:v", long_options, &option_index)) != -1) { 8442 switch (opt) { 8443 case 'a': 8444 parse_add_command(optarg); 8445 break; 8446 case 'c': 8447 parse_cpu_command(optarg); 8448 break; 8449 case 'D': 8450 dump_only++; 8451 break; 8452 case 'e': 8453 /* --enable specified counter */ 8454 bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST); 8455 break; 8456 case 'd': 8457 debug++; 8458 ENABLE_BIC(BIC_DISABLED_BY_DEFAULT); 8459 break; 8460 case 'H': 8461 /* 8462 * --hide: do not show those specified 8463 * multiple invocations simply clear more bits in enabled mask 8464 */ 8465 bic_enabled &= ~bic_lookup(optarg, HIDE_LIST); 8466 break; 8467 case 'h': 8468 default: 8469 help(); 8470 exit(1); 8471 case 'i': 8472 { 8473 double interval = strtod(optarg, NULL); 8474 8475 if (interval < 0.001) { 8476 fprintf(outf, "interval %f seconds is too small\n", interval); 8477 exit(2); 8478 } 8479 8480 interval_tv.tv_sec = interval_ts.tv_sec = interval; 8481 interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000; 8482 interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000; 8483 } 8484 break; 8485 case 'J': 8486 rapl_joules++; 8487 break; 8488 case 'l': 8489 ENABLE_BIC(BIC_DISABLED_BY_DEFAULT); 8490 list_header_only++; 8491 quiet++; 8492 break; 8493 case 'o': 8494 outf = fopen_or_die(optarg, "w"); 8495 break; 8496 case 'q': 8497 quiet = 1; 8498 break; 8499 case 'M': 8500 case 'P': 8501 /* Parsed earlier */ 8502 break; 8503 case 'n': 8504 num_iterations = strtod(optarg, NULL); 8505 8506 if (num_iterations <= 0) { 8507 fprintf(outf, "iterations %d should be positive number\n", num_iterations); 8508 exit(2); 8509 } 8510 break; 8511 case 'N': 8512 header_iterations = strtod(optarg, NULL); 8513 8514 if (header_iterations <= 0) { 8515 fprintf(outf, "iterations %d should be positive number\n", header_iterations); 8516 exit(2); 8517 } 8518 break; 8519 case 's': 8520 /* 8521 * --show: show only those specified 8522 * The 1st invocation will clear and replace the enabled mask 8523 * subsequent invocations can add to it. 8524 */ 8525 if (shown == 0) 8526 bic_enabled = bic_lookup(optarg, SHOW_LIST); 8527 else 8528 bic_enabled |= bic_lookup(optarg, SHOW_LIST); 8529 shown = 1; 8530 break; 8531 case 'S': 8532 summary_only++; 8533 break; 8534 case 'T': 8535 tj_max_override = atoi(optarg); 8536 break; 8537 case 'v': 8538 print_version(); 8539 exit(0); 8540 break; 8541 } 8542 } 8543 } 8544 8545 void set_rlimit(void) 8546 { 8547 struct rlimit limit; 8548 8549 if (getrlimit(RLIMIT_NOFILE, &limit) < 0) 8550 err(1, "Failed to get rlimit"); 8551 8552 if (limit.rlim_max < MAX_NOFILE) 8553 limit.rlim_max = MAX_NOFILE; 8554 if (limit.rlim_cur < MAX_NOFILE) 8555 limit.rlim_cur = MAX_NOFILE; 8556 8557 if (setrlimit(RLIMIT_NOFILE, &limit) < 0) 8558 err(1, "Failed to set rlimit"); 8559 } 8560 8561 int main(int argc, char **argv) 8562 { 8563 int fd, ret; 8564 8565 fd = open("/sys/fs/cgroup/cgroup.procs", O_WRONLY); 8566 if (fd < 0) 8567 goto skip_cgroup_setting; 8568 8569 ret = write(fd, "0\n", 2); 8570 if (ret == -1) 8571 perror("Can't update cgroup\n"); 8572 8573 close(fd); 8574 8575 skip_cgroup_setting: 8576 outf = stderr; 8577 cmdline(argc, argv); 8578 8579 if (!quiet) { 8580 print_version(); 8581 print_bootcmd(); 8582 } 8583 8584 probe_sysfs(); 8585 8586 if (!getuid()) 8587 set_rlimit(); 8588 8589 turbostat_init(); 8590 8591 if (!no_msr) 8592 msr_sum_record(); 8593 8594 /* dump counters and exit */ 8595 if (dump_only) 8596 return get_and_dump_counters(); 8597 8598 /* list header and exit */ 8599 if (list_header_only) { 8600 print_header(","); 8601 flush_output_stdout(); 8602 return 0; 8603 } 8604 8605 /* 8606 * if any params left, it must be a command to fork 8607 */ 8608 if (argc - optind) 8609 return fork_it(argv + optind); 8610 else 8611 turbostat_loop(); 8612 8613 return 0; 8614 } 8615