1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * turbostat -- show CPU frequency and C-state residency 4 * on modern Intel and AMD processors. 5 * 6 * Copyright (c) 2024 Intel Corporation. 7 * Len Brown <len.brown@intel.com> 8 */ 9 10 #define _GNU_SOURCE 11 #include MSRHEADER 12 13 // copied from arch/x86/include/asm/cpu_device_id.h 14 #define VFM_MODEL_BIT 0 15 #define VFM_FAMILY_BIT 8 16 #define VFM_VENDOR_BIT 16 17 #define VFM_RSVD_BIT 24 18 19 #define VFM_MODEL_MASK GENMASK(VFM_FAMILY_BIT - 1, VFM_MODEL_BIT) 20 #define VFM_FAMILY_MASK GENMASK(VFM_VENDOR_BIT - 1, VFM_FAMILY_BIT) 21 #define VFM_VENDOR_MASK GENMASK(VFM_RSVD_BIT - 1, VFM_VENDOR_BIT) 22 23 #define VFM_MODEL(vfm) (((vfm) & VFM_MODEL_MASK) >> VFM_MODEL_BIT) 24 #define VFM_FAMILY(vfm) (((vfm) & VFM_FAMILY_MASK) >> VFM_FAMILY_BIT) 25 #define VFM_VENDOR(vfm) (((vfm) & VFM_VENDOR_MASK) >> VFM_VENDOR_BIT) 26 27 #define VFM_MAKE(_vendor, _family, _model) ( \ 28 ((_model) << VFM_MODEL_BIT) | \ 29 ((_family) << VFM_FAMILY_BIT) | \ 30 ((_vendor) << VFM_VENDOR_BIT) \ 31 ) 32 // end copied section 33 34 #define X86_VENDOR_INTEL 0 35 36 #include INTEL_FAMILY_HEADER 37 #include BUILD_BUG_HEADER 38 #include <stdarg.h> 39 #include <stdio.h> 40 #include <err.h> 41 #include <unistd.h> 42 #include <sys/types.h> 43 #include <sys/wait.h> 44 #include <sys/stat.h> 45 #include <sys/select.h> 46 #include <sys/resource.h> 47 #include <sys/mman.h> 48 #include <fcntl.h> 49 #include <signal.h> 50 #include <sys/time.h> 51 #include <stdlib.h> 52 #include <getopt.h> 53 #include <dirent.h> 54 #include <string.h> 55 #include <ctype.h> 56 #include <sched.h> 57 #include <time.h> 58 #include <cpuid.h> 59 #include <sys/capability.h> 60 #include <errno.h> 61 #include <math.h> 62 #include <linux/perf_event.h> 63 #include <asm/unistd.h> 64 #include <stdbool.h> 65 #include <assert.h> 66 #include <linux/kernel.h> 67 68 #define UNUSED(x) (void)(x) 69 70 /* 71 * This list matches the column headers, except 72 * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time 73 * 2. Core and CPU are moved to the end, we can't have strings that contain them 74 * matching on them for --show and --hide. 75 */ 76 77 /* 78 * buffer size used by sscanf() for added column names 79 * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters 80 */ 81 #define NAME_BYTES 20 82 #define PATH_BYTES 128 83 #define PERF_NAME_BYTES 128 84 85 #define MAX_NOFILE 0x8000 86 87 #define COUNTER_KIND_PERF_PREFIX "perf/" 88 #define COUNTER_KIND_PERF_PREFIX_LEN strlen(COUNTER_KIND_PERF_PREFIX) 89 #define PERF_DEV_NAME_BYTES 32 90 #define PERF_EVT_NAME_BYTES 32 91 92 enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE }; 93 enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC, COUNTER_K2M }; 94 enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT, FORMAT_AVERAGE }; 95 enum counter_source { COUNTER_SOURCE_NONE, COUNTER_SOURCE_PERF, COUNTER_SOURCE_MSR }; 96 97 struct perf_counter_info { 98 struct perf_counter_info *next; 99 100 /* How to open the counter / What counter it is. */ 101 char device[PERF_DEV_NAME_BYTES]; 102 char event[PERF_EVT_NAME_BYTES]; 103 104 /* How to show/format the counter. */ 105 char name[PERF_NAME_BYTES]; 106 unsigned int width; 107 enum counter_scope scope; 108 enum counter_type type; 109 enum counter_format format; 110 double scale; 111 112 /* For reading the counter. */ 113 int *fd_perf_per_domain; 114 size_t num_domains; 115 }; 116 117 struct sysfs_path { 118 char path[PATH_BYTES]; 119 int id; 120 struct sysfs_path *next; 121 }; 122 123 struct msr_counter { 124 unsigned int msr_num; 125 char name[NAME_BYTES]; 126 struct sysfs_path *sp; 127 unsigned int width; 128 enum counter_type type; 129 enum counter_format format; 130 struct msr_counter *next; 131 unsigned int flags; 132 #define FLAGS_HIDE (1 << 0) 133 #define FLAGS_SHOW (1 << 1) 134 #define SYSFS_PERCPU (1 << 1) 135 }; 136 137 struct msr_counter bic[] = { 138 { 0x0, "usec", NULL, 0, 0, 0, NULL, 0 }, 139 { 0x0, "Time_Of_Day_Seconds", NULL, 0, 0, 0, NULL, 0 }, 140 { 0x0, "Package", NULL, 0, 0, 0, NULL, 0 }, 141 { 0x0, "Node", NULL, 0, 0, 0, NULL, 0 }, 142 { 0x0, "Avg_MHz", NULL, 0, 0, 0, NULL, 0 }, 143 { 0x0, "Busy%", NULL, 0, 0, 0, NULL, 0 }, 144 { 0x0, "Bzy_MHz", NULL, 0, 0, 0, NULL, 0 }, 145 { 0x0, "TSC_MHz", NULL, 0, 0, 0, NULL, 0 }, 146 { 0x0, "IRQ", NULL, 0, 0, 0, NULL, 0 }, 147 { 0x0, "SMI", NULL, 32, 0, FORMAT_DELTA, NULL, 0 }, 148 { 0x0, "sysfs", NULL, 0, 0, 0, NULL, 0 }, 149 { 0x0, "CPU%c1", NULL, 0, 0, 0, NULL, 0 }, 150 { 0x0, "CPU%c3", NULL, 0, 0, 0, NULL, 0 }, 151 { 0x0, "CPU%c6", NULL, 0, 0, 0, NULL, 0 }, 152 { 0x0, "CPU%c7", NULL, 0, 0, 0, NULL, 0 }, 153 { 0x0, "ThreadC", NULL, 0, 0, 0, NULL, 0 }, 154 { 0x0, "CoreTmp", NULL, 0, 0, 0, NULL, 0 }, 155 { 0x0, "CoreCnt", NULL, 0, 0, 0, NULL, 0 }, 156 { 0x0, "PkgTmp", NULL, 0, 0, 0, NULL, 0 }, 157 { 0x0, "GFX%rc6", NULL, 0, 0, 0, NULL, 0 }, 158 { 0x0, "GFXMHz", NULL, 0, 0, 0, NULL, 0 }, 159 { 0x0, "Pkg%pc2", NULL, 0, 0, 0, NULL, 0 }, 160 { 0x0, "Pkg%pc3", NULL, 0, 0, 0, NULL, 0 }, 161 { 0x0, "Pkg%pc6", NULL, 0, 0, 0, NULL, 0 }, 162 { 0x0, "Pkg%pc7", NULL, 0, 0, 0, NULL, 0 }, 163 { 0x0, "Pkg%pc8", NULL, 0, 0, 0, NULL, 0 }, 164 { 0x0, "Pkg%pc9", NULL, 0, 0, 0, NULL, 0 }, 165 { 0x0, "Pk%pc10", NULL, 0, 0, 0, NULL, 0 }, 166 { 0x0, "CPU%LPI", NULL, 0, 0, 0, NULL, 0 }, 167 { 0x0, "SYS%LPI", NULL, 0, 0, 0, NULL, 0 }, 168 { 0x0, "PkgWatt", NULL, 0, 0, 0, NULL, 0 }, 169 { 0x0, "CorWatt", NULL, 0, 0, 0, NULL, 0 }, 170 { 0x0, "GFXWatt", NULL, 0, 0, 0, NULL, 0 }, 171 { 0x0, "PkgCnt", NULL, 0, 0, 0, NULL, 0 }, 172 { 0x0, "RAMWatt", NULL, 0, 0, 0, NULL, 0 }, 173 { 0x0, "PKG_%", NULL, 0, 0, 0, NULL, 0 }, 174 { 0x0, "RAM_%", NULL, 0, 0, 0, NULL, 0 }, 175 { 0x0, "Pkg_J", NULL, 0, 0, 0, NULL, 0 }, 176 { 0x0, "Cor_J", NULL, 0, 0, 0, NULL, 0 }, 177 { 0x0, "GFX_J", NULL, 0, 0, 0, NULL, 0 }, 178 { 0x0, "RAM_J", NULL, 0, 0, 0, NULL, 0 }, 179 { 0x0, "Mod%c6", NULL, 0, 0, 0, NULL, 0 }, 180 { 0x0, "Totl%C0", NULL, 0, 0, 0, NULL, 0 }, 181 { 0x0, "Any%C0", NULL, 0, 0, 0, NULL, 0 }, 182 { 0x0, "GFX%C0", NULL, 0, 0, 0, NULL, 0 }, 183 { 0x0, "CPUGFX%", NULL, 0, 0, 0, NULL, 0 }, 184 { 0x0, "Core", NULL, 0, 0, 0, NULL, 0 }, 185 { 0x0, "CPU", NULL, 0, 0, 0, NULL, 0 }, 186 { 0x0, "APIC", NULL, 0, 0, 0, NULL, 0 }, 187 { 0x0, "X2APIC", NULL, 0, 0, 0, NULL, 0 }, 188 { 0x0, "Die", NULL, 0, 0, 0, NULL, 0 }, 189 { 0x0, "GFXAMHz", NULL, 0, 0, 0, NULL, 0 }, 190 { 0x0, "IPC", NULL, 0, 0, 0, NULL, 0 }, 191 { 0x0, "CoreThr", NULL, 0, 0, 0, NULL, 0 }, 192 { 0x0, "UncMHz", NULL, 0, 0, 0, NULL, 0 }, 193 { 0x0, "SAM%mc6", NULL, 0, 0, 0, NULL, 0 }, 194 { 0x0, "SAMMHz", NULL, 0, 0, 0, NULL, 0 }, 195 { 0x0, "SAMAMHz", NULL, 0, 0, 0, NULL, 0 }, 196 { 0x0, "Die%c6", NULL, 0, 0, 0, NULL, 0 }, 197 }; 198 199 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) 200 #define BIC_USEC (1ULL << 0) 201 #define BIC_TOD (1ULL << 1) 202 #define BIC_Package (1ULL << 2) 203 #define BIC_Node (1ULL << 3) 204 #define BIC_Avg_MHz (1ULL << 4) 205 #define BIC_Busy (1ULL << 5) 206 #define BIC_Bzy_MHz (1ULL << 6) 207 #define BIC_TSC_MHz (1ULL << 7) 208 #define BIC_IRQ (1ULL << 8) 209 #define BIC_SMI (1ULL << 9) 210 #define BIC_sysfs (1ULL << 10) 211 #define BIC_CPU_c1 (1ULL << 11) 212 #define BIC_CPU_c3 (1ULL << 12) 213 #define BIC_CPU_c6 (1ULL << 13) 214 #define BIC_CPU_c7 (1ULL << 14) 215 #define BIC_ThreadC (1ULL << 15) 216 #define BIC_CoreTmp (1ULL << 16) 217 #define BIC_CoreCnt (1ULL << 17) 218 #define BIC_PkgTmp (1ULL << 18) 219 #define BIC_GFX_rc6 (1ULL << 19) 220 #define BIC_GFXMHz (1ULL << 20) 221 #define BIC_Pkgpc2 (1ULL << 21) 222 #define BIC_Pkgpc3 (1ULL << 22) 223 #define BIC_Pkgpc6 (1ULL << 23) 224 #define BIC_Pkgpc7 (1ULL << 24) 225 #define BIC_Pkgpc8 (1ULL << 25) 226 #define BIC_Pkgpc9 (1ULL << 26) 227 #define BIC_Pkgpc10 (1ULL << 27) 228 #define BIC_CPU_LPI (1ULL << 28) 229 #define BIC_SYS_LPI (1ULL << 29) 230 #define BIC_PkgWatt (1ULL << 30) 231 #define BIC_CorWatt (1ULL << 31) 232 #define BIC_GFXWatt (1ULL << 32) 233 #define BIC_PkgCnt (1ULL << 33) 234 #define BIC_RAMWatt (1ULL << 34) 235 #define BIC_PKG__ (1ULL << 35) 236 #define BIC_RAM__ (1ULL << 36) 237 #define BIC_Pkg_J (1ULL << 37) 238 #define BIC_Cor_J (1ULL << 38) 239 #define BIC_GFX_J (1ULL << 39) 240 #define BIC_RAM_J (1ULL << 40) 241 #define BIC_Mod_c6 (1ULL << 41) 242 #define BIC_Totl_c0 (1ULL << 42) 243 #define BIC_Any_c0 (1ULL << 43) 244 #define BIC_GFX_c0 (1ULL << 44) 245 #define BIC_CPUGFX (1ULL << 45) 246 #define BIC_Core (1ULL << 46) 247 #define BIC_CPU (1ULL << 47) 248 #define BIC_APIC (1ULL << 48) 249 #define BIC_X2APIC (1ULL << 49) 250 #define BIC_Die (1ULL << 50) 251 #define BIC_GFXACTMHz (1ULL << 51) 252 #define BIC_IPC (1ULL << 52) 253 #define BIC_CORE_THROT_CNT (1ULL << 53) 254 #define BIC_UNCORE_MHZ (1ULL << 54) 255 #define BIC_SAM_mc6 (1ULL << 55) 256 #define BIC_SAMMHz (1ULL << 56) 257 #define BIC_SAMACTMHz (1ULL << 57) 258 #define BIC_Diec6 (1ULL << 58) 259 260 #define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die ) 261 #define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__) 262 #define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ) 263 #define BIC_IDLE (BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) 264 #define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) 265 266 #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) 267 268 unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT); 269 unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC; 270 271 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME) 272 #define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME) 273 #define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME) 274 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT) 275 #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT) 276 #define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT) 277 278 /* 279 * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit: 280 * If you change the values, note they are used both in comparisons 281 * (>= PCL__7) and to index pkg_cstate_limit_strings[]. 282 */ 283 #define PCLUKN 0 /* Unknown */ 284 #define PCLRSV 1 /* Reserved */ 285 #define PCL__0 2 /* PC0 */ 286 #define PCL__1 3 /* PC1 */ 287 #define PCL__2 4 /* PC2 */ 288 #define PCL__3 5 /* PC3 */ 289 #define PCL__4 6 /* PC4 */ 290 #define PCL__6 7 /* PC6 */ 291 #define PCL_6N 8 /* PC6 No Retention */ 292 #define PCL_6R 9 /* PC6 Retention */ 293 #define PCL__7 10 /* PC7 */ 294 #define PCL_7S 11 /* PC7 Shrink */ 295 #define PCL__8 12 /* PC8 */ 296 #define PCL__9 13 /* PC9 */ 297 #define PCL_10 14 /* PC10 */ 298 #define PCLUNL 15 /* Unlimited */ 299 300 struct amperf_group_fd; 301 302 char *proc_stat = "/proc/stat"; 303 FILE *outf; 304 int *fd_percpu; 305 int *fd_instr_count_percpu; 306 struct timeval interval_tv = { 5, 0 }; 307 struct timespec interval_ts = { 5, 0 }; 308 309 unsigned int num_iterations; 310 unsigned int header_iterations; 311 unsigned int debug; 312 unsigned int quiet; 313 unsigned int shown; 314 unsigned int sums_need_wide_columns; 315 unsigned int rapl_joules; 316 unsigned int summary_only; 317 unsigned int list_header_only; 318 unsigned int dump_only; 319 unsigned int has_aperf; 320 unsigned int has_aperf_access; 321 unsigned int has_epb; 322 unsigned int has_turbo; 323 unsigned int is_hybrid; 324 unsigned int units = 1000000; /* MHz etc */ 325 unsigned int genuine_intel; 326 unsigned int authentic_amd; 327 unsigned int hygon_genuine; 328 unsigned int max_level, max_extended_level; 329 unsigned int has_invariant_tsc; 330 unsigned int aperf_mperf_multiplier = 1; 331 double bclk; 332 double base_hz; 333 unsigned int has_base_hz; 334 double tsc_tweak = 1.0; 335 unsigned int show_pkg_only; 336 unsigned int show_core_only; 337 char *output_buffer, *outp; 338 unsigned int do_dts; 339 unsigned int do_ptm; 340 unsigned int do_ipc; 341 unsigned long long cpuidle_cur_cpu_lpi_us; 342 unsigned long long cpuidle_cur_sys_lpi_us; 343 unsigned int tj_max; 344 unsigned int tj_max_override; 345 double rapl_power_units, rapl_time_units; 346 double rapl_dram_energy_units, rapl_energy_units; 347 double rapl_joule_counter_range; 348 unsigned int crystal_hz; 349 unsigned long long tsc_hz; 350 int base_cpu; 351 unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */ 352 /* IA32_HWP_REQUEST, IA32_HWP_STATUS */ 353 unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */ 354 unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */ 355 unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */ 356 unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */ 357 unsigned int first_counter_read = 1; 358 int ignore_stdin; 359 bool no_msr; 360 bool no_perf; 361 362 enum gfx_sysfs_idx { 363 GFX_rc6, 364 GFX_MHz, 365 GFX_ACTMHz, 366 SAM_mc6, 367 SAM_MHz, 368 SAM_ACTMHz, 369 GFX_MAX 370 }; 371 372 struct gfx_sysfs_info { 373 const char *path; 374 FILE *fp; 375 unsigned int val; 376 unsigned long long val_ull; 377 }; 378 379 static struct gfx_sysfs_info gfx_info[GFX_MAX]; 380 381 int get_msr(int cpu, off_t offset, unsigned long long *msr); 382 int add_counter(unsigned int msr_num, char *path, char *name, 383 unsigned int width, enum counter_scope scope, 384 enum counter_type type, enum counter_format format, int flags, int package_num); 385 386 /* Model specific support Start */ 387 388 /* List of features that may diverge among different platforms */ 389 struct platform_features { 390 bool has_msr_misc_feature_control; /* MSR_MISC_FEATURE_CONTROL */ 391 bool has_msr_misc_pwr_mgmt; /* MSR_MISC_PWR_MGMT */ 392 bool has_nhm_msrs; /* MSR_PLATFORM_INFO, MSR_IA32_TEMPERATURE_TARGET, MSR_SMI_COUNT, MSR_PKG_CST_CONFIG_CONTROL, MSR_IA32_POWER_CTL, TRL MSRs */ 393 bool has_config_tdp; /* MSR_CONFIG_TDP_NOMINAL/LEVEL_1/LEVEL_2/CONTROL, MSR_TURBO_ACTIVATION_RATIO */ 394 int bclk_freq; /* CPU base clock */ 395 int crystal_freq; /* Crystal clock to use when not available from CPUID.15 */ 396 int supported_cstates; /* Core cstates and Package cstates supported */ 397 int cst_limit; /* MSR_PKG_CST_CONFIG_CONTROL */ 398 bool has_cst_auto_convension; /* AUTOMATIC_CSTATE_CONVERSION bit in MSR_PKG_CST_CONFIG_CONTROL */ 399 bool has_irtl_msrs; /* MSR_PKGC3/PKGC6/PKGC7/PKGC8/PKGC9/PKGC10_IRTL */ 400 bool has_msr_core_c1_res; /* MSR_CORE_C1_RES */ 401 bool has_msr_module_c6_res_ms; /* MSR_MODULE_C6_RES_MS */ 402 bool has_msr_c6_demotion_policy_config; /* MSR_CC6_DEMOTION_POLICY_CONFIG/MSR_MC6_DEMOTION_POLICY_CONFIG */ 403 bool has_msr_atom_pkg_c6_residency; /* MSR_ATOM_PKG_C6_RESIDENCY */ 404 bool has_msr_knl_core_c6_residency; /* MSR_KNL_CORE_C6_RESIDENCY */ 405 bool has_ext_cst_msrs; /* MSR_PKG_WEIGHTED_CORE_C0_RES/MSR_PKG_ANY_CORE_C0_RES/MSR_PKG_ANY_GFXE_C0_RES/MSR_PKG_BOTH_CORE_GFXE_C0_RES */ 406 bool has_cst_prewake_bit; /* Cstate prewake bit in MSR_IA32_POWER_CTL */ 407 int trl_msrs; /* MSR_TURBO_RATIO_LIMIT/LIMIT1/LIMIT2/SECONDARY, Atom TRL MSRs */ 408 int plr_msrs; /* MSR_CORE/GFX/RING_PERF_LIMIT_REASONS */ 409 int rapl_msrs; /* RAPL PKG/DRAM/CORE/GFX MSRs, AMD RAPL MSRs */ 410 bool has_per_core_rapl; /* Indicates cores energy collection is per-core, not per-package. AMD specific for now */ 411 bool has_rapl_divisor; /* Divisor for Energy unit raw value from MSR_RAPL_POWER_UNIT */ 412 bool has_fixed_rapl_unit; /* Fixed Energy Unit used for DRAM RAPL Domain */ 413 int rapl_quirk_tdp; /* Hardcoded TDP value when cannot be retrieved from hardware */ 414 int tcc_offset_bits; /* TCC Offset bits in MSR_IA32_TEMPERATURE_TARGET */ 415 bool enable_tsc_tweak; /* Use CPU Base freq instead of TSC freq for aperf/mperf counter */ 416 bool need_perf_multiplier; /* mperf/aperf multiplier */ 417 }; 418 419 struct platform_data { 420 unsigned int vfm; 421 const struct platform_features *features; 422 }; 423 424 /* For BCLK */ 425 enum bclk_freq { 426 BCLK_100MHZ = 1, 427 BCLK_133MHZ, 428 BCLK_SLV, 429 }; 430 431 #define SLM_BCLK_FREQS 5 432 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 }; 433 434 double slm_bclk(void) 435 { 436 unsigned long long msr = 3; 437 unsigned int i; 438 double freq; 439 440 if (get_msr(base_cpu, MSR_FSB_FREQ, &msr)) 441 fprintf(outf, "SLM BCLK: unknown\n"); 442 443 i = msr & 0xf; 444 if (i >= SLM_BCLK_FREQS) { 445 fprintf(outf, "SLM BCLK[%d] invalid\n", i); 446 i = 3; 447 } 448 freq = slm_freq_table[i]; 449 450 if (!quiet) 451 fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq); 452 453 return freq; 454 } 455 456 /* For Package cstate limit */ 457 enum package_cstate_limit { 458 CST_LIMIT_NHM = 1, 459 CST_LIMIT_SNB, 460 CST_LIMIT_HSW, 461 CST_LIMIT_SKX, 462 CST_LIMIT_ICX, 463 CST_LIMIT_SLV, 464 CST_LIMIT_AMT, 465 CST_LIMIT_KNL, 466 CST_LIMIT_GMT, 467 }; 468 469 /* For Turbo Ratio Limit MSRs */ 470 enum turbo_ratio_limit_msrs { 471 TRL_BASE = BIT(0), 472 TRL_LIMIT1 = BIT(1), 473 TRL_LIMIT2 = BIT(2), 474 TRL_ATOM = BIT(3), 475 TRL_KNL = BIT(4), 476 TRL_CORECOUNT = BIT(5), 477 }; 478 479 /* For Perf Limit Reason MSRs */ 480 enum perf_limit_reason_msrs { 481 PLR_CORE = BIT(0), 482 PLR_GFX = BIT(1), 483 PLR_RING = BIT(2), 484 }; 485 486 /* For RAPL MSRs */ 487 enum rapl_msrs { 488 RAPL_PKG_POWER_LIMIT = BIT(0), /* 0x610 MSR_PKG_POWER_LIMIT */ 489 RAPL_PKG_ENERGY_STATUS = BIT(1), /* 0x611 MSR_PKG_ENERGY_STATUS */ 490 RAPL_PKG_PERF_STATUS = BIT(2), /* 0x613 MSR_PKG_PERF_STATUS */ 491 RAPL_PKG_POWER_INFO = BIT(3), /* 0x614 MSR_PKG_POWER_INFO */ 492 RAPL_DRAM_POWER_LIMIT = BIT(4), /* 0x618 MSR_DRAM_POWER_LIMIT */ 493 RAPL_DRAM_ENERGY_STATUS = BIT(5), /* 0x619 MSR_DRAM_ENERGY_STATUS */ 494 RAPL_DRAM_PERF_STATUS = BIT(6), /* 0x61b MSR_DRAM_PERF_STATUS */ 495 RAPL_DRAM_POWER_INFO = BIT(7), /* 0x61c MSR_DRAM_POWER_INFO */ 496 RAPL_CORE_POWER_LIMIT = BIT(8), /* 0x638 MSR_PP0_POWER_LIMIT */ 497 RAPL_CORE_ENERGY_STATUS = BIT(9), /* 0x639 MSR_PP0_ENERGY_STATUS */ 498 RAPL_CORE_POLICY = BIT(10), /* 0x63a MSR_PP0_POLICY */ 499 RAPL_GFX_POWER_LIMIT = BIT(11), /* 0x640 MSR_PP1_POWER_LIMIT */ 500 RAPL_GFX_ENERGY_STATUS = BIT(12), /* 0x641 MSR_PP1_ENERGY_STATUS */ 501 RAPL_GFX_POLICY = BIT(13), /* 0x642 MSR_PP1_POLICY */ 502 RAPL_AMD_PWR_UNIT = BIT(14), /* 0xc0010299 MSR_AMD_RAPL_POWER_UNIT */ 503 RAPL_AMD_CORE_ENERGY_STAT = BIT(15), /* 0xc001029a MSR_AMD_CORE_ENERGY_STATUS */ 504 RAPL_AMD_PKG_ENERGY_STAT = BIT(16), /* 0xc001029b MSR_AMD_PKG_ENERGY_STATUS */ 505 }; 506 507 #define RAPL_PKG (RAPL_PKG_ENERGY_STATUS | RAPL_PKG_POWER_LIMIT) 508 #define RAPL_DRAM (RAPL_DRAM_ENERGY_STATUS | RAPL_DRAM_POWER_LIMIT) 509 #define RAPL_CORE (RAPL_CORE_ENERGY_STATUS | RAPL_CORE_POWER_LIMIT) 510 #define RAPL_GFX (RAPL_GFX_POWER_LIMIT | RAPL_GFX_ENERGY_STATUS) 511 512 #define RAPL_PKG_ALL (RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO) 513 #define RAPL_DRAM_ALL (RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_DRAM_POWER_INFO) 514 #define RAPL_CORE_ALL (RAPL_CORE | RAPL_CORE_POLICY) 515 #define RAPL_GFX_ALL (RAPL_GFX | RAPL_GFX_POLIGY) 516 517 #define RAPL_AMD_F17H (RAPL_AMD_PWR_UNIT | RAPL_AMD_CORE_ENERGY_STAT | RAPL_AMD_PKG_ENERGY_STAT) 518 519 /* For Cstates */ 520 enum cstates { 521 CC1 = BIT(0), 522 CC3 = BIT(1), 523 CC6 = BIT(2), 524 CC7 = BIT(3), 525 PC2 = BIT(4), 526 PC3 = BIT(5), 527 PC6 = BIT(6), 528 PC7 = BIT(7), 529 PC8 = BIT(8), 530 PC9 = BIT(9), 531 PC10 = BIT(10), 532 }; 533 534 static const struct platform_features nhm_features = { 535 .has_msr_misc_pwr_mgmt = 1, 536 .has_nhm_msrs = 1, 537 .bclk_freq = BCLK_133MHZ, 538 .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6, 539 .cst_limit = CST_LIMIT_NHM, 540 .trl_msrs = TRL_BASE, 541 }; 542 543 static const struct platform_features nhx_features = { 544 .has_msr_misc_pwr_mgmt = 1, 545 .has_nhm_msrs = 1, 546 .bclk_freq = BCLK_133MHZ, 547 .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6, 548 .cst_limit = CST_LIMIT_NHM, 549 }; 550 551 static const struct platform_features snb_features = { 552 .has_msr_misc_feature_control = 1, 553 .has_msr_misc_pwr_mgmt = 1, 554 .has_nhm_msrs = 1, 555 .bclk_freq = BCLK_100MHZ, 556 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 557 .cst_limit = CST_LIMIT_SNB, 558 .has_irtl_msrs = 1, 559 .trl_msrs = TRL_BASE, 560 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 561 }; 562 563 static const struct platform_features snx_features = { 564 .has_msr_misc_feature_control = 1, 565 .has_msr_misc_pwr_mgmt = 1, 566 .has_nhm_msrs = 1, 567 .bclk_freq = BCLK_100MHZ, 568 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 569 .cst_limit = CST_LIMIT_SNB, 570 .has_irtl_msrs = 1, 571 .trl_msrs = TRL_BASE, 572 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL, 573 }; 574 575 static const struct platform_features ivb_features = { 576 .has_msr_misc_feature_control = 1, 577 .has_msr_misc_pwr_mgmt = 1, 578 .has_nhm_msrs = 1, 579 .has_config_tdp = 1, 580 .bclk_freq = BCLK_100MHZ, 581 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 582 .cst_limit = CST_LIMIT_SNB, 583 .has_irtl_msrs = 1, 584 .trl_msrs = TRL_BASE, 585 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 586 }; 587 588 static const struct platform_features ivx_features = { 589 .has_msr_misc_feature_control = 1, 590 .has_msr_misc_pwr_mgmt = 1, 591 .has_nhm_msrs = 1, 592 .bclk_freq = BCLK_100MHZ, 593 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 594 .cst_limit = CST_LIMIT_SNB, 595 .has_irtl_msrs = 1, 596 .trl_msrs = TRL_BASE | TRL_LIMIT1, 597 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL, 598 }; 599 600 static const struct platform_features hsw_features = { 601 .has_msr_misc_feature_control = 1, 602 .has_msr_misc_pwr_mgmt = 1, 603 .has_nhm_msrs = 1, 604 .has_config_tdp = 1, 605 .bclk_freq = BCLK_100MHZ, 606 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 607 .cst_limit = CST_LIMIT_HSW, 608 .has_irtl_msrs = 1, 609 .trl_msrs = TRL_BASE, 610 .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, 611 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 612 }; 613 614 static const struct platform_features hsx_features = { 615 .has_msr_misc_feature_control = 1, 616 .has_msr_misc_pwr_mgmt = 1, 617 .has_nhm_msrs = 1, 618 .has_config_tdp = 1, 619 .bclk_freq = BCLK_100MHZ, 620 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 621 .cst_limit = CST_LIMIT_HSW, 622 .has_irtl_msrs = 1, 623 .trl_msrs = TRL_BASE | TRL_LIMIT1 | TRL_LIMIT2, 624 .plr_msrs = PLR_CORE | PLR_RING, 625 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 626 .has_fixed_rapl_unit = 1, 627 }; 628 629 static const struct platform_features hswl_features = { 630 .has_msr_misc_feature_control = 1, 631 .has_msr_misc_pwr_mgmt = 1, 632 .has_nhm_msrs = 1, 633 .has_config_tdp = 1, 634 .bclk_freq = BCLK_100MHZ, 635 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 636 .cst_limit = CST_LIMIT_HSW, 637 .has_irtl_msrs = 1, 638 .trl_msrs = TRL_BASE, 639 .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, 640 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 641 }; 642 643 static const struct platform_features hswg_features = { 644 .has_msr_misc_feature_control = 1, 645 .has_msr_misc_pwr_mgmt = 1, 646 .has_nhm_msrs = 1, 647 .has_config_tdp = 1, 648 .bclk_freq = BCLK_100MHZ, 649 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 650 .cst_limit = CST_LIMIT_HSW, 651 .has_irtl_msrs = 1, 652 .trl_msrs = TRL_BASE, 653 .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, 654 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 655 }; 656 657 static const struct platform_features bdw_features = { 658 .has_msr_misc_feature_control = 1, 659 .has_msr_misc_pwr_mgmt = 1, 660 .has_nhm_msrs = 1, 661 .has_config_tdp = 1, 662 .bclk_freq = BCLK_100MHZ, 663 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 664 .cst_limit = CST_LIMIT_HSW, 665 .has_irtl_msrs = 1, 666 .trl_msrs = TRL_BASE, 667 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 668 }; 669 670 static const struct platform_features bdwg_features = { 671 .has_msr_misc_feature_control = 1, 672 .has_msr_misc_pwr_mgmt = 1, 673 .has_nhm_msrs = 1, 674 .has_config_tdp = 1, 675 .bclk_freq = BCLK_100MHZ, 676 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 677 .cst_limit = CST_LIMIT_HSW, 678 .has_irtl_msrs = 1, 679 .trl_msrs = TRL_BASE, 680 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 681 }; 682 683 static const struct platform_features bdx_features = { 684 .has_msr_misc_feature_control = 1, 685 .has_msr_misc_pwr_mgmt = 1, 686 .has_nhm_msrs = 1, 687 .has_config_tdp = 1, 688 .bclk_freq = BCLK_100MHZ, 689 .supported_cstates = CC1 | CC3 | CC6 | PC2 | PC3 | PC6, 690 .cst_limit = CST_LIMIT_HSW, 691 .has_irtl_msrs = 1, 692 .has_cst_auto_convension = 1, 693 .trl_msrs = TRL_BASE, 694 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 695 .has_fixed_rapl_unit = 1, 696 }; 697 698 static const struct platform_features skl_features = { 699 .has_msr_misc_feature_control = 1, 700 .has_msr_misc_pwr_mgmt = 1, 701 .has_nhm_msrs = 1, 702 .has_config_tdp = 1, 703 .bclk_freq = BCLK_100MHZ, 704 .crystal_freq = 24000000, 705 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 706 .cst_limit = CST_LIMIT_HSW, 707 .has_irtl_msrs = 1, 708 .has_ext_cst_msrs = 1, 709 .trl_msrs = TRL_BASE, 710 .tcc_offset_bits = 6, 711 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, 712 .enable_tsc_tweak = 1, 713 }; 714 715 static const struct platform_features cnl_features = { 716 .has_msr_misc_feature_control = 1, 717 .has_msr_misc_pwr_mgmt = 1, 718 .has_nhm_msrs = 1, 719 .has_config_tdp = 1, 720 .bclk_freq = BCLK_100MHZ, 721 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 722 .cst_limit = CST_LIMIT_HSW, 723 .has_irtl_msrs = 1, 724 .has_msr_core_c1_res = 1, 725 .has_ext_cst_msrs = 1, 726 .trl_msrs = TRL_BASE, 727 .tcc_offset_bits = 6, 728 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, 729 .enable_tsc_tweak = 1, 730 }; 731 732 static const struct platform_features adl_features = { 733 .has_msr_misc_feature_control = 1, 734 .has_msr_misc_pwr_mgmt = 1, 735 .has_nhm_msrs = 1, 736 .has_config_tdp = 1, 737 .bclk_freq = BCLK_100MHZ, 738 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC8 | PC10, 739 .cst_limit = CST_LIMIT_HSW, 740 .has_irtl_msrs = 1, 741 .has_msr_core_c1_res = 1, 742 .has_ext_cst_msrs = 1, 743 .trl_msrs = TRL_BASE, 744 .tcc_offset_bits = 6, 745 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, 746 .enable_tsc_tweak = 1, 747 }; 748 749 static const struct platform_features arl_features = { 750 .has_msr_misc_feature_control = 1, 751 .has_msr_misc_pwr_mgmt = 1, 752 .has_nhm_msrs = 1, 753 .has_config_tdp = 1, 754 .bclk_freq = BCLK_100MHZ, 755 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC10, 756 .cst_limit = CST_LIMIT_HSW, 757 .has_irtl_msrs = 1, 758 .has_msr_core_c1_res = 1, 759 .has_ext_cst_msrs = 1, 760 .trl_msrs = TRL_BASE, 761 .tcc_offset_bits = 6, 762 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, 763 .enable_tsc_tweak = 1, 764 }; 765 766 static const struct platform_features skx_features = { 767 .has_msr_misc_feature_control = 1, 768 .has_msr_misc_pwr_mgmt = 1, 769 .has_nhm_msrs = 1, 770 .has_config_tdp = 1, 771 .bclk_freq = BCLK_100MHZ, 772 .supported_cstates = CC1 | CC6 | PC2 | PC6, 773 .cst_limit = CST_LIMIT_SKX, 774 .has_irtl_msrs = 1, 775 .has_cst_auto_convension = 1, 776 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 777 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 778 .has_fixed_rapl_unit = 1, 779 }; 780 781 static const struct platform_features icx_features = { 782 .has_msr_misc_feature_control = 1, 783 .has_msr_misc_pwr_mgmt = 1, 784 .has_nhm_msrs = 1, 785 .has_config_tdp = 1, 786 .bclk_freq = BCLK_100MHZ, 787 .supported_cstates = CC1 | CC6 | PC2 | PC6, 788 .cst_limit = CST_LIMIT_ICX, 789 .has_msr_core_c1_res = 1, 790 .has_irtl_msrs = 1, 791 .has_cst_prewake_bit = 1, 792 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 793 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 794 .has_fixed_rapl_unit = 1, 795 }; 796 797 static const struct platform_features spr_features = { 798 .has_msr_misc_feature_control = 1, 799 .has_msr_misc_pwr_mgmt = 1, 800 .has_nhm_msrs = 1, 801 .has_config_tdp = 1, 802 .bclk_freq = BCLK_100MHZ, 803 .supported_cstates = CC1 | CC6 | PC2 | PC6, 804 .cst_limit = CST_LIMIT_SKX, 805 .has_msr_core_c1_res = 1, 806 .has_irtl_msrs = 1, 807 .has_cst_prewake_bit = 1, 808 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 809 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 810 }; 811 812 static const struct platform_features srf_features = { 813 .has_msr_misc_feature_control = 1, 814 .has_msr_misc_pwr_mgmt = 1, 815 .has_nhm_msrs = 1, 816 .has_config_tdp = 1, 817 .bclk_freq = BCLK_100MHZ, 818 .supported_cstates = CC1 | CC6 | PC2 | PC6, 819 .cst_limit = CST_LIMIT_SKX, 820 .has_msr_core_c1_res = 1, 821 .has_msr_module_c6_res_ms = 1, 822 .has_irtl_msrs = 1, 823 .has_cst_prewake_bit = 1, 824 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 825 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 826 }; 827 828 static const struct platform_features grr_features = { 829 .has_msr_misc_feature_control = 1, 830 .has_msr_misc_pwr_mgmt = 1, 831 .has_nhm_msrs = 1, 832 .has_config_tdp = 1, 833 .bclk_freq = BCLK_100MHZ, 834 .supported_cstates = CC1 | CC6, 835 .cst_limit = CST_LIMIT_SKX, 836 .has_msr_core_c1_res = 1, 837 .has_msr_module_c6_res_ms = 1, 838 .has_irtl_msrs = 1, 839 .has_cst_prewake_bit = 1, 840 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 841 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 842 }; 843 844 static const struct platform_features slv_features = { 845 .has_nhm_msrs = 1, 846 .bclk_freq = BCLK_SLV, 847 .supported_cstates = CC1 | CC6 | PC6, 848 .cst_limit = CST_LIMIT_SLV, 849 .has_msr_core_c1_res = 1, 850 .has_msr_module_c6_res_ms = 1, 851 .has_msr_c6_demotion_policy_config = 1, 852 .has_msr_atom_pkg_c6_residency = 1, 853 .trl_msrs = TRL_ATOM, 854 .rapl_msrs = RAPL_PKG | RAPL_CORE, 855 .has_rapl_divisor = 1, 856 .rapl_quirk_tdp = 30, 857 }; 858 859 static const struct platform_features slvd_features = { 860 .has_msr_misc_pwr_mgmt = 1, 861 .has_nhm_msrs = 1, 862 .bclk_freq = BCLK_SLV, 863 .supported_cstates = CC1 | CC6 | PC3 | PC6, 864 .cst_limit = CST_LIMIT_SLV, 865 .has_msr_atom_pkg_c6_residency = 1, 866 .trl_msrs = TRL_BASE, 867 .rapl_msrs = RAPL_PKG | RAPL_CORE, 868 .rapl_quirk_tdp = 30, 869 }; 870 871 static const struct platform_features amt_features = { 872 .has_nhm_msrs = 1, 873 .bclk_freq = BCLK_133MHZ, 874 .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6, 875 .cst_limit = CST_LIMIT_AMT, 876 .trl_msrs = TRL_BASE, 877 }; 878 879 static const struct platform_features gmt_features = { 880 .has_msr_misc_pwr_mgmt = 1, 881 .has_nhm_msrs = 1, 882 .bclk_freq = BCLK_100MHZ, 883 .crystal_freq = 19200000, 884 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 885 .cst_limit = CST_LIMIT_GMT, 886 .has_irtl_msrs = 1, 887 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 888 .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO, 889 }; 890 891 static const struct platform_features gmtd_features = { 892 .has_msr_misc_pwr_mgmt = 1, 893 .has_nhm_msrs = 1, 894 .bclk_freq = BCLK_100MHZ, 895 .crystal_freq = 25000000, 896 .supported_cstates = CC1 | CC6 | PC2 | PC6, 897 .cst_limit = CST_LIMIT_GMT, 898 .has_irtl_msrs = 1, 899 .has_msr_core_c1_res = 1, 900 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 901 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_CORE_ENERGY_STATUS, 902 }; 903 904 static const struct platform_features gmtp_features = { 905 .has_msr_misc_pwr_mgmt = 1, 906 .has_nhm_msrs = 1, 907 .bclk_freq = BCLK_100MHZ, 908 .crystal_freq = 19200000, 909 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 910 .cst_limit = CST_LIMIT_GMT, 911 .has_irtl_msrs = 1, 912 .trl_msrs = TRL_BASE, 913 .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO, 914 }; 915 916 static const struct platform_features tmt_features = { 917 .has_msr_misc_pwr_mgmt = 1, 918 .has_nhm_msrs = 1, 919 .bclk_freq = BCLK_100MHZ, 920 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 921 .cst_limit = CST_LIMIT_GMT, 922 .has_irtl_msrs = 1, 923 .trl_msrs = TRL_BASE, 924 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, 925 .enable_tsc_tweak = 1, 926 }; 927 928 static const struct platform_features tmtd_features = { 929 .has_msr_misc_pwr_mgmt = 1, 930 .has_nhm_msrs = 1, 931 .bclk_freq = BCLK_100MHZ, 932 .supported_cstates = CC1 | CC6, 933 .cst_limit = CST_LIMIT_GMT, 934 .has_irtl_msrs = 1, 935 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 936 .rapl_msrs = RAPL_PKG_ALL, 937 }; 938 939 static const struct platform_features knl_features = { 940 .has_msr_misc_pwr_mgmt = 1, 941 .has_nhm_msrs = 1, 942 .has_config_tdp = 1, 943 .bclk_freq = BCLK_100MHZ, 944 .supported_cstates = CC1 | CC6 | PC3 | PC6, 945 .cst_limit = CST_LIMIT_KNL, 946 .has_msr_knl_core_c6_residency = 1, 947 .trl_msrs = TRL_KNL, 948 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 949 .has_fixed_rapl_unit = 1, 950 .need_perf_multiplier = 1, 951 }; 952 953 static const struct platform_features default_features = { 954 }; 955 956 static const struct platform_features amd_features_with_rapl = { 957 .rapl_msrs = RAPL_AMD_F17H, 958 .has_per_core_rapl = 1, 959 .rapl_quirk_tdp = 280, /* This is the max stock TDP of HEDT/Server Fam17h+ chips */ 960 }; 961 962 static const struct platform_data turbostat_pdata[] = { 963 { INTEL_NEHALEM, &nhm_features }, 964 { INTEL_NEHALEM_G, &nhm_features }, 965 { INTEL_NEHALEM_EP, &nhm_features }, 966 { INTEL_NEHALEM_EX, &nhx_features }, 967 { INTEL_WESTMERE, &nhm_features }, 968 { INTEL_WESTMERE_EP, &nhm_features }, 969 { INTEL_WESTMERE_EX, &nhx_features }, 970 { INTEL_SANDYBRIDGE, &snb_features }, 971 { INTEL_SANDYBRIDGE_X, &snx_features }, 972 { INTEL_IVYBRIDGE, &ivb_features }, 973 { INTEL_IVYBRIDGE_X, &ivx_features }, 974 { INTEL_HASWELL, &hsw_features }, 975 { INTEL_HASWELL_X, &hsx_features }, 976 { INTEL_HASWELL_L, &hswl_features }, 977 { INTEL_HASWELL_G, &hswg_features }, 978 { INTEL_BROADWELL, &bdw_features }, 979 { INTEL_BROADWELL_G, &bdwg_features }, 980 { INTEL_BROADWELL_X, &bdx_features }, 981 { INTEL_BROADWELL_D, &bdx_features }, 982 { INTEL_SKYLAKE_L, &skl_features }, 983 { INTEL_SKYLAKE, &skl_features }, 984 { INTEL_SKYLAKE_X, &skx_features }, 985 { INTEL_KABYLAKE_L, &skl_features }, 986 { INTEL_KABYLAKE, &skl_features }, 987 { INTEL_COMETLAKE, &skl_features }, 988 { INTEL_COMETLAKE_L, &skl_features }, 989 { INTEL_CANNONLAKE_L, &cnl_features }, 990 { INTEL_ICELAKE_X, &icx_features }, 991 { INTEL_ICELAKE_D, &icx_features }, 992 { INTEL_ICELAKE_L, &cnl_features }, 993 { INTEL_ICELAKE_NNPI, &cnl_features }, 994 { INTEL_ROCKETLAKE, &cnl_features }, 995 { INTEL_TIGERLAKE_L, &cnl_features }, 996 { INTEL_TIGERLAKE, &cnl_features }, 997 { INTEL_SAPPHIRERAPIDS_X, &spr_features }, 998 { INTEL_EMERALDRAPIDS_X, &spr_features }, 999 { INTEL_GRANITERAPIDS_X, &spr_features }, 1000 { INTEL_LAKEFIELD, &cnl_features }, 1001 { INTEL_ALDERLAKE, &adl_features }, 1002 { INTEL_ALDERLAKE_L, &adl_features }, 1003 { INTEL_RAPTORLAKE, &adl_features }, 1004 { INTEL_RAPTORLAKE_P, &adl_features }, 1005 { INTEL_RAPTORLAKE_S, &adl_features }, 1006 { INTEL_METEORLAKE, &cnl_features }, 1007 { INTEL_METEORLAKE_L, &cnl_features }, 1008 { INTEL_ARROWLAKE_H, &arl_features }, 1009 { INTEL_ARROWLAKE_U, &arl_features }, 1010 { INTEL_ARROWLAKE, &arl_features }, 1011 { INTEL_LUNARLAKE_M, &arl_features }, 1012 { INTEL_ATOM_SILVERMONT, &slv_features }, 1013 { INTEL_ATOM_SILVERMONT_D, &slvd_features }, 1014 { INTEL_ATOM_AIRMONT, &amt_features }, 1015 { INTEL_ATOM_GOLDMONT, &gmt_features }, 1016 { INTEL_ATOM_GOLDMONT_D, &gmtd_features }, 1017 { INTEL_ATOM_GOLDMONT_PLUS, &gmtp_features }, 1018 { INTEL_ATOM_TREMONT_D, &tmtd_features }, 1019 { INTEL_ATOM_TREMONT, &tmt_features }, 1020 { INTEL_ATOM_TREMONT_L, &tmt_features }, 1021 { INTEL_ATOM_GRACEMONT, &adl_features }, 1022 { INTEL_ATOM_CRESTMONT_X, &srf_features }, 1023 { INTEL_ATOM_CRESTMONT, &grr_features }, 1024 { INTEL_XEON_PHI_KNL, &knl_features }, 1025 { INTEL_XEON_PHI_KNM, &knl_features }, 1026 /* 1027 * Missing support for 1028 * INTEL_ICELAKE 1029 * INTEL_ATOM_SILVERMONT_MID 1030 * INTEL_ATOM_AIRMONT_MID 1031 * INTEL_ATOM_AIRMONT_NP 1032 */ 1033 { 0, NULL }, 1034 }; 1035 1036 static const struct platform_features *platform; 1037 1038 void probe_platform_features(unsigned int family, unsigned int model) 1039 { 1040 int i; 1041 1042 platform = &default_features; 1043 1044 if (authentic_amd || hygon_genuine) { 1045 if (max_extended_level >= 0x80000007) { 1046 unsigned int eax, ebx, ecx, edx; 1047 1048 __cpuid(0x80000007, eax, ebx, ecx, edx); 1049 /* RAPL (Fam 17h+) */ 1050 if ((edx & (1 << 14)) && family >= 0x17) 1051 platform = &amd_features_with_rapl; 1052 } 1053 return; 1054 } 1055 1056 if (!genuine_intel) 1057 return; 1058 1059 for (i = 0; turbostat_pdata[i].features; i++) { 1060 if (VFM_FAMILY(turbostat_pdata[i].vfm) == family && VFM_MODEL(turbostat_pdata[i].vfm) == model) { 1061 platform = turbostat_pdata[i].features; 1062 return; 1063 } 1064 } 1065 } 1066 1067 /* Model specific support End */ 1068 1069 #define TJMAX_DEFAULT 100 1070 1071 /* MSRs that are not yet in the kernel-provided header. */ 1072 #define MSR_RAPL_PWR_UNIT 0xc0010299 1073 #define MSR_CORE_ENERGY_STAT 0xc001029a 1074 #define MSR_PKG_ENERGY_STAT 0xc001029b 1075 1076 #define MAX(a, b) ((a) > (b) ? (a) : (b)) 1077 1078 int backwards_count; 1079 char *progname; 1080 1081 #define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */ 1082 cpu_set_t *cpu_present_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset; 1083 size_t cpu_present_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size; 1084 #define MAX_ADDED_THREAD_COUNTERS 24 1085 #define MAX_ADDED_CORE_COUNTERS 8 1086 #define MAX_ADDED_PACKAGE_COUNTERS 16 1087 #define PMT_MAX_ADDED_THREAD_COUNTERS 24 1088 #define PMT_MAX_ADDED_CORE_COUNTERS 8 1089 #define PMT_MAX_ADDED_PACKAGE_COUNTERS 16 1090 #define BITMASK_SIZE 32 1091 1092 #define ZERO_ARRAY(arr) (memset(arr, 0, sizeof(arr)) + __must_be_array(arr)) 1093 1094 /* Indexes used to map data read from perf and MSRs into global variables */ 1095 enum rapl_rci_index { 1096 RAPL_RCI_INDEX_ENERGY_PKG = 0, 1097 RAPL_RCI_INDEX_ENERGY_CORES = 1, 1098 RAPL_RCI_INDEX_DRAM = 2, 1099 RAPL_RCI_INDEX_GFX = 3, 1100 RAPL_RCI_INDEX_PKG_PERF_STATUS = 4, 1101 RAPL_RCI_INDEX_DRAM_PERF_STATUS = 5, 1102 RAPL_RCI_INDEX_CORE_ENERGY = 6, 1103 NUM_RAPL_COUNTERS, 1104 }; 1105 1106 enum rapl_unit { 1107 RAPL_UNIT_INVALID, 1108 RAPL_UNIT_JOULES, 1109 RAPL_UNIT_WATTS, 1110 }; 1111 1112 struct rapl_counter_info_t { 1113 unsigned long long data[NUM_RAPL_COUNTERS]; 1114 enum counter_source source[NUM_RAPL_COUNTERS]; 1115 unsigned long long flags[NUM_RAPL_COUNTERS]; 1116 double scale[NUM_RAPL_COUNTERS]; 1117 enum rapl_unit unit[NUM_RAPL_COUNTERS]; 1118 unsigned long long msr[NUM_RAPL_COUNTERS]; 1119 unsigned long long msr_mask[NUM_RAPL_COUNTERS]; 1120 int msr_shift[NUM_RAPL_COUNTERS]; 1121 1122 int fd_perf; 1123 }; 1124 1125 /* struct rapl_counter_info_t for each RAPL domain */ 1126 struct rapl_counter_info_t *rapl_counter_info_perdomain; 1127 unsigned int rapl_counter_info_perdomain_size; 1128 1129 #define RAPL_COUNTER_FLAG_USE_MSR_SUM (1u << 1) 1130 1131 struct rapl_counter_arch_info { 1132 int feature_mask; /* Mask for testing if the counter is supported on host */ 1133 const char *perf_subsys; 1134 const char *perf_name; 1135 unsigned long long msr; 1136 unsigned long long msr_mask; 1137 int msr_shift; /* Positive mean shift right, negative mean shift left */ 1138 double *platform_rapl_msr_scale; /* Scale applied to values read by MSR (platform dependent, filled at runtime) */ 1139 unsigned int rci_index; /* Maps data from perf counters to global variables */ 1140 unsigned long long bic; 1141 double compat_scale; /* Some counters require constant scaling to be in the same range as other, similar ones */ 1142 unsigned long long flags; 1143 }; 1144 1145 static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { 1146 { 1147 .feature_mask = RAPL_PKG, 1148 .perf_subsys = "power", 1149 .perf_name = "energy-pkg", 1150 .msr = MSR_PKG_ENERGY_STATUS, 1151 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1152 .msr_shift = 0, 1153 .platform_rapl_msr_scale = &rapl_energy_units, 1154 .rci_index = RAPL_RCI_INDEX_ENERGY_PKG, 1155 .bic = BIC_PkgWatt | BIC_Pkg_J, 1156 .compat_scale = 1.0, 1157 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1158 }, 1159 { 1160 .feature_mask = RAPL_AMD_F17H, 1161 .perf_subsys = "power", 1162 .perf_name = "energy-pkg", 1163 .msr = MSR_PKG_ENERGY_STAT, 1164 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1165 .msr_shift = 0, 1166 .platform_rapl_msr_scale = &rapl_energy_units, 1167 .rci_index = RAPL_RCI_INDEX_ENERGY_PKG, 1168 .bic = BIC_PkgWatt | BIC_Pkg_J, 1169 .compat_scale = 1.0, 1170 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1171 }, 1172 { 1173 .feature_mask = RAPL_CORE_ENERGY_STATUS, 1174 .perf_subsys = "power", 1175 .perf_name = "energy-cores", 1176 .msr = MSR_PP0_ENERGY_STATUS, 1177 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1178 .msr_shift = 0, 1179 .platform_rapl_msr_scale = &rapl_energy_units, 1180 .rci_index = RAPL_RCI_INDEX_ENERGY_CORES, 1181 .bic = BIC_CorWatt | BIC_Cor_J, 1182 .compat_scale = 1.0, 1183 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1184 }, 1185 { 1186 .feature_mask = RAPL_DRAM, 1187 .perf_subsys = "power", 1188 .perf_name = "energy-ram", 1189 .msr = MSR_DRAM_ENERGY_STATUS, 1190 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1191 .msr_shift = 0, 1192 .platform_rapl_msr_scale = &rapl_dram_energy_units, 1193 .rci_index = RAPL_RCI_INDEX_DRAM, 1194 .bic = BIC_RAMWatt | BIC_RAM_J, 1195 .compat_scale = 1.0, 1196 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1197 }, 1198 { 1199 .feature_mask = RAPL_GFX, 1200 .perf_subsys = "power", 1201 .perf_name = "energy-gpu", 1202 .msr = MSR_PP1_ENERGY_STATUS, 1203 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1204 .msr_shift = 0, 1205 .platform_rapl_msr_scale = &rapl_energy_units, 1206 .rci_index = RAPL_RCI_INDEX_GFX, 1207 .bic = BIC_GFXWatt | BIC_GFX_J, 1208 .compat_scale = 1.0, 1209 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1210 }, 1211 { 1212 .feature_mask = RAPL_PKG_PERF_STATUS, 1213 .perf_subsys = NULL, 1214 .perf_name = NULL, 1215 .msr = MSR_PKG_PERF_STATUS, 1216 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1217 .msr_shift = 0, 1218 .platform_rapl_msr_scale = &rapl_time_units, 1219 .rci_index = RAPL_RCI_INDEX_PKG_PERF_STATUS, 1220 .bic = BIC_PKG__, 1221 .compat_scale = 100.0, 1222 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1223 }, 1224 { 1225 .feature_mask = RAPL_DRAM_PERF_STATUS, 1226 .perf_subsys = NULL, 1227 .perf_name = NULL, 1228 .msr = MSR_DRAM_PERF_STATUS, 1229 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1230 .msr_shift = 0, 1231 .platform_rapl_msr_scale = &rapl_time_units, 1232 .rci_index = RAPL_RCI_INDEX_DRAM_PERF_STATUS, 1233 .bic = BIC_RAM__, 1234 .compat_scale = 100.0, 1235 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1236 }, 1237 { 1238 .feature_mask = RAPL_AMD_F17H, 1239 .perf_subsys = NULL, 1240 .perf_name = NULL, 1241 .msr = MSR_CORE_ENERGY_STAT, 1242 .msr_mask = 0xFFFFFFFF, 1243 .msr_shift = 0, 1244 .platform_rapl_msr_scale = &rapl_energy_units, 1245 .rci_index = RAPL_RCI_INDEX_CORE_ENERGY, 1246 .bic = BIC_CorWatt | BIC_Cor_J, 1247 .compat_scale = 1.0, 1248 .flags = 0, 1249 }, 1250 }; 1251 1252 struct rapl_counter { 1253 unsigned long long raw_value; 1254 enum rapl_unit unit; 1255 double scale; 1256 }; 1257 1258 /* Indexes used to map data read from perf and MSRs into global variables */ 1259 enum ccstate_rci_index { 1260 CCSTATE_RCI_INDEX_C1_RESIDENCY = 0, 1261 CCSTATE_RCI_INDEX_C3_RESIDENCY = 1, 1262 CCSTATE_RCI_INDEX_C6_RESIDENCY = 2, 1263 CCSTATE_RCI_INDEX_C7_RESIDENCY = 3, 1264 PCSTATE_RCI_INDEX_C2_RESIDENCY = 4, 1265 PCSTATE_RCI_INDEX_C3_RESIDENCY = 5, 1266 PCSTATE_RCI_INDEX_C6_RESIDENCY = 6, 1267 PCSTATE_RCI_INDEX_C7_RESIDENCY = 7, 1268 PCSTATE_RCI_INDEX_C8_RESIDENCY = 8, 1269 PCSTATE_RCI_INDEX_C9_RESIDENCY = 9, 1270 PCSTATE_RCI_INDEX_C10_RESIDENCY = 10, 1271 NUM_CSTATE_COUNTERS, 1272 }; 1273 1274 struct cstate_counter_info_t { 1275 unsigned long long data[NUM_CSTATE_COUNTERS]; 1276 enum counter_source source[NUM_CSTATE_COUNTERS]; 1277 unsigned long long msr[NUM_CSTATE_COUNTERS]; 1278 int fd_perf_core; 1279 int fd_perf_pkg; 1280 }; 1281 1282 struct cstate_counter_info_t *ccstate_counter_info; 1283 unsigned int ccstate_counter_info_size; 1284 1285 #define CSTATE_COUNTER_FLAG_COLLECT_PER_CORE (1u << 0) 1286 #define CSTATE_COUNTER_FLAG_COLLECT_PER_THREAD ((1u << 1) | CSTATE_COUNTER_FLAG_COLLECT_PER_CORE) 1287 #define CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY (1u << 2) 1288 1289 struct cstate_counter_arch_info { 1290 int feature_mask; /* Mask for testing if the counter is supported on host */ 1291 const char *perf_subsys; 1292 const char *perf_name; 1293 unsigned long long msr; 1294 unsigned int rci_index; /* Maps data from perf counters to global variables */ 1295 unsigned long long bic; 1296 unsigned long long flags; 1297 int pkg_cstate_limit; 1298 }; 1299 1300 static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { 1301 { 1302 .feature_mask = CC1, 1303 .perf_subsys = "cstate_core", 1304 .perf_name = "c1-residency", 1305 .msr = MSR_CORE_C1_RES, 1306 .rci_index = CCSTATE_RCI_INDEX_C1_RESIDENCY, 1307 .bic = BIC_CPU_c1, 1308 .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_THREAD, 1309 .pkg_cstate_limit = 0, 1310 }, 1311 { 1312 .feature_mask = CC3, 1313 .perf_subsys = "cstate_core", 1314 .perf_name = "c3-residency", 1315 .msr = MSR_CORE_C3_RESIDENCY, 1316 .rci_index = CCSTATE_RCI_INDEX_C3_RESIDENCY, 1317 .bic = BIC_CPU_c3, 1318 .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY, 1319 .pkg_cstate_limit = 0, 1320 }, 1321 { 1322 .feature_mask = CC6, 1323 .perf_subsys = "cstate_core", 1324 .perf_name = "c6-residency", 1325 .msr = MSR_CORE_C6_RESIDENCY, 1326 .rci_index = CCSTATE_RCI_INDEX_C6_RESIDENCY, 1327 .bic = BIC_CPU_c6, 1328 .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY, 1329 .pkg_cstate_limit = 0, 1330 }, 1331 { 1332 .feature_mask = CC7, 1333 .perf_subsys = "cstate_core", 1334 .perf_name = "c7-residency", 1335 .msr = MSR_CORE_C7_RESIDENCY, 1336 .rci_index = CCSTATE_RCI_INDEX_C7_RESIDENCY, 1337 .bic = BIC_CPU_c7, 1338 .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY, 1339 .pkg_cstate_limit = 0, 1340 }, 1341 { 1342 .feature_mask = PC2, 1343 .perf_subsys = "cstate_pkg", 1344 .perf_name = "c2-residency", 1345 .msr = MSR_PKG_C2_RESIDENCY, 1346 .rci_index = PCSTATE_RCI_INDEX_C2_RESIDENCY, 1347 .bic = BIC_Pkgpc2, 1348 .flags = 0, 1349 .pkg_cstate_limit = PCL__2, 1350 }, 1351 { 1352 .feature_mask = PC3, 1353 .perf_subsys = "cstate_pkg", 1354 .perf_name = "c3-residency", 1355 .msr = MSR_PKG_C3_RESIDENCY, 1356 .rci_index = PCSTATE_RCI_INDEX_C3_RESIDENCY, 1357 .bic = BIC_Pkgpc3, 1358 .flags = 0, 1359 .pkg_cstate_limit = PCL__3, 1360 }, 1361 { 1362 .feature_mask = PC6, 1363 .perf_subsys = "cstate_pkg", 1364 .perf_name = "c6-residency", 1365 .msr = MSR_PKG_C6_RESIDENCY, 1366 .rci_index = PCSTATE_RCI_INDEX_C6_RESIDENCY, 1367 .bic = BIC_Pkgpc6, 1368 .flags = 0, 1369 .pkg_cstate_limit = PCL__6, 1370 }, 1371 { 1372 .feature_mask = PC7, 1373 .perf_subsys = "cstate_pkg", 1374 .perf_name = "c7-residency", 1375 .msr = MSR_PKG_C7_RESIDENCY, 1376 .rci_index = PCSTATE_RCI_INDEX_C7_RESIDENCY, 1377 .bic = BIC_Pkgpc7, 1378 .flags = 0, 1379 .pkg_cstate_limit = PCL__7, 1380 }, 1381 { 1382 .feature_mask = PC8, 1383 .perf_subsys = "cstate_pkg", 1384 .perf_name = "c8-residency", 1385 .msr = MSR_PKG_C8_RESIDENCY, 1386 .rci_index = PCSTATE_RCI_INDEX_C8_RESIDENCY, 1387 .bic = BIC_Pkgpc8, 1388 .flags = 0, 1389 .pkg_cstate_limit = PCL__8, 1390 }, 1391 { 1392 .feature_mask = PC9, 1393 .perf_subsys = "cstate_pkg", 1394 .perf_name = "c9-residency", 1395 .msr = MSR_PKG_C9_RESIDENCY, 1396 .rci_index = PCSTATE_RCI_INDEX_C9_RESIDENCY, 1397 .bic = BIC_Pkgpc9, 1398 .flags = 0, 1399 .pkg_cstate_limit = PCL__9, 1400 }, 1401 { 1402 .feature_mask = PC10, 1403 .perf_subsys = "cstate_pkg", 1404 .perf_name = "c10-residency", 1405 .msr = MSR_PKG_C10_RESIDENCY, 1406 .rci_index = PCSTATE_RCI_INDEX_C10_RESIDENCY, 1407 .bic = BIC_Pkgpc10, 1408 .flags = 0, 1409 .pkg_cstate_limit = PCL_10, 1410 }, 1411 }; 1412 1413 /* Indexes used to map data read from perf and MSRs into global variables */ 1414 enum msr_rci_index { 1415 MSR_RCI_INDEX_APERF = 0, 1416 MSR_RCI_INDEX_MPERF = 1, 1417 MSR_RCI_INDEX_SMI = 2, 1418 NUM_MSR_COUNTERS, 1419 }; 1420 1421 struct msr_counter_info_t { 1422 unsigned long long data[NUM_MSR_COUNTERS]; 1423 enum counter_source source[NUM_MSR_COUNTERS]; 1424 unsigned long long msr[NUM_MSR_COUNTERS]; 1425 unsigned long long msr_mask[NUM_MSR_COUNTERS]; 1426 int fd_perf; 1427 }; 1428 1429 struct msr_counter_info_t *msr_counter_info; 1430 unsigned int msr_counter_info_size; 1431 1432 struct msr_counter_arch_info { 1433 const char *perf_subsys; 1434 const char *perf_name; 1435 unsigned long long msr; 1436 unsigned long long msr_mask; 1437 unsigned int rci_index; /* Maps data from perf counters to global variables */ 1438 bool needed; 1439 bool present; 1440 }; 1441 1442 enum msr_arch_info_index { 1443 MSR_ARCH_INFO_APERF_INDEX = 0, 1444 MSR_ARCH_INFO_MPERF_INDEX = 1, 1445 MSR_ARCH_INFO_SMI_INDEX = 2, 1446 }; 1447 1448 static struct msr_counter_arch_info msr_counter_arch_infos[] = { 1449 [MSR_ARCH_INFO_APERF_INDEX] = { 1450 .perf_subsys = "msr", 1451 .perf_name = "aperf", 1452 .msr = MSR_IA32_APERF, 1453 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1454 .rci_index = MSR_RCI_INDEX_APERF, 1455 }, 1456 1457 [MSR_ARCH_INFO_MPERF_INDEX] = { 1458 .perf_subsys = "msr", 1459 .perf_name = "mperf", 1460 .msr = MSR_IA32_MPERF, 1461 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1462 .rci_index = MSR_RCI_INDEX_MPERF, 1463 }, 1464 1465 [MSR_ARCH_INFO_SMI_INDEX] = { 1466 .perf_subsys = "msr", 1467 .perf_name = "smi", 1468 .msr = MSR_SMI_COUNT, 1469 .msr_mask = 0xFFFFFFFF, 1470 .rci_index = MSR_RCI_INDEX_SMI, 1471 }, 1472 }; 1473 1474 /* Can be redefined when compiling, useful for testing. */ 1475 #ifndef SYSFS_TELEM_PATH 1476 #define SYSFS_TELEM_PATH "/sys/class/intel_pmt" 1477 #endif 1478 1479 #define PMT_COUNTER_MTL_DC6_OFFSET 120 1480 #define PMT_COUNTER_MTL_DC6_LSB 0 1481 #define PMT_COUNTER_MTL_DC6_MSB 63 1482 #define PMT_MTL_DC6_GUID 0x1a067102 1483 1484 #define PMT_COUNTER_NAME_SIZE_BYTES 16 1485 #define PMT_COUNTER_TYPE_NAME_SIZE_BYTES 32 1486 1487 struct pmt_mmio { 1488 struct pmt_mmio *next; 1489 1490 unsigned int guid; 1491 unsigned int size; 1492 1493 /* Base pointer to the mmaped memory. */ 1494 void *mmio_base; 1495 1496 /* 1497 * Offset to be applied to the mmio_base 1498 * to get the beginning of the PMT counters for given GUID. 1499 */ 1500 unsigned long pmt_offset; 1501 } *pmt_mmios; 1502 1503 enum pmt_datatype { 1504 PMT_TYPE_RAW, 1505 PMT_TYPE_XTAL_TIME, 1506 }; 1507 1508 struct pmt_domain_info { 1509 /* 1510 * Pointer to the MMIO obtained by applying a counter offset 1511 * to the mmio_base of the mmaped region for the given GUID. 1512 * 1513 * This is where to read the raw value of the counter from. 1514 */ 1515 unsigned long *pcounter; 1516 }; 1517 1518 struct pmt_counter { 1519 struct pmt_counter *next; 1520 1521 /* PMT metadata */ 1522 char name[PMT_COUNTER_NAME_SIZE_BYTES]; 1523 enum pmt_datatype type; 1524 enum counter_scope scope; 1525 unsigned int lsb; 1526 unsigned int msb; 1527 1528 /* BIC-like metadata */ 1529 enum counter_format format; 1530 1531 unsigned int num_domains; 1532 struct pmt_domain_info *domains; 1533 }; 1534 1535 unsigned int pmt_counter_get_width(const struct pmt_counter *p) 1536 { 1537 return (p->msb - p->lsb) + 1; 1538 } 1539 1540 void pmt_counter_resize_(struct pmt_counter *pcounter, unsigned int new_size) 1541 { 1542 struct pmt_domain_info *new_mem; 1543 1544 new_mem = (struct pmt_domain_info *)reallocarray(pcounter->domains, new_size, sizeof(*pcounter->domains)); 1545 if (!new_mem) { 1546 fprintf(stderr, "%s: failed to allocate memory for PMT counters\n", __func__); 1547 exit(1); 1548 } 1549 1550 /* Zero initialize just allocated memory. */ 1551 const size_t num_new_domains = new_size - pcounter->num_domains; 1552 1553 memset(&new_mem[pcounter->num_domains], 0, num_new_domains * sizeof(*pcounter->domains)); 1554 1555 pcounter->num_domains = new_size; 1556 pcounter->domains = new_mem; 1557 } 1558 1559 void pmt_counter_resize(struct pmt_counter *pcounter, unsigned int new_size) 1560 { 1561 /* 1562 * Allocate more memory ahead of time. 1563 * 1564 * Always allocate space for at least 8 elements 1565 * and double the size when growing. 1566 */ 1567 if (new_size < 8) 1568 new_size = 8; 1569 new_size = MAX(new_size, pcounter->num_domains * 2); 1570 1571 pmt_counter_resize_(pcounter, new_size); 1572 } 1573 1574 struct thread_data { 1575 struct timeval tv_begin; 1576 struct timeval tv_end; 1577 struct timeval tv_delta; 1578 unsigned long long tsc; 1579 unsigned long long aperf; 1580 unsigned long long mperf; 1581 unsigned long long c1; 1582 unsigned long long instr_count; 1583 unsigned long long irq_count; 1584 unsigned int smi_count; 1585 unsigned int cpu_id; 1586 unsigned int apic_id; 1587 unsigned int x2apic_id; 1588 unsigned int flags; 1589 bool is_atom; 1590 unsigned long long counter[MAX_ADDED_THREAD_COUNTERS]; 1591 unsigned long long perf_counter[MAX_ADDED_THREAD_COUNTERS]; 1592 unsigned long long pmt_counter[PMT_MAX_ADDED_THREAD_COUNTERS]; 1593 } *thread_even, *thread_odd; 1594 1595 struct core_data { 1596 int base_cpu; 1597 unsigned long long c3; 1598 unsigned long long c6; 1599 unsigned long long c7; 1600 unsigned long long mc6_us; /* duplicate as per-core for now, even though per module */ 1601 unsigned int core_temp_c; 1602 struct rapl_counter core_energy; /* MSR_CORE_ENERGY_STAT */ 1603 unsigned int core_id; 1604 unsigned long long core_throt_cnt; 1605 unsigned long long counter[MAX_ADDED_CORE_COUNTERS]; 1606 unsigned long long perf_counter[MAX_ADDED_CORE_COUNTERS]; 1607 unsigned long long pmt_counter[PMT_MAX_ADDED_CORE_COUNTERS]; 1608 } *core_even, *core_odd; 1609 1610 struct pkg_data { 1611 int base_cpu; 1612 unsigned long long pc2; 1613 unsigned long long pc3; 1614 unsigned long long pc6; 1615 unsigned long long pc7; 1616 unsigned long long pc8; 1617 unsigned long long pc9; 1618 unsigned long long pc10; 1619 long long cpu_lpi; 1620 long long sys_lpi; 1621 unsigned long long pkg_wtd_core_c0; 1622 unsigned long long pkg_any_core_c0; 1623 unsigned long long pkg_any_gfxe_c0; 1624 unsigned long long pkg_both_core_gfxe_c0; 1625 long long gfx_rc6_ms; 1626 unsigned int gfx_mhz; 1627 unsigned int gfx_act_mhz; 1628 long long sam_mc6_ms; 1629 unsigned int sam_mhz; 1630 unsigned int sam_act_mhz; 1631 unsigned int package_id; 1632 struct rapl_counter energy_pkg; /* MSR_PKG_ENERGY_STATUS */ 1633 struct rapl_counter energy_dram; /* MSR_DRAM_ENERGY_STATUS */ 1634 struct rapl_counter energy_cores; /* MSR_PP0_ENERGY_STATUS */ 1635 struct rapl_counter energy_gfx; /* MSR_PP1_ENERGY_STATUS */ 1636 struct rapl_counter rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */ 1637 struct rapl_counter rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */ 1638 unsigned int pkg_temp_c; 1639 unsigned int uncore_mhz; 1640 unsigned long long die_c6; 1641 unsigned long long counter[MAX_ADDED_PACKAGE_COUNTERS]; 1642 unsigned long long perf_counter[MAX_ADDED_PACKAGE_COUNTERS]; 1643 unsigned long long pmt_counter[PMT_MAX_ADDED_PACKAGE_COUNTERS]; 1644 } *package_even, *package_odd; 1645 1646 #define ODD_COUNTERS thread_odd, core_odd, package_odd 1647 #define EVEN_COUNTERS thread_even, core_even, package_even 1648 1649 #define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no) \ 1650 ((thread_base) + \ 1651 ((pkg_no) * \ 1652 topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \ 1653 ((node_no) * topo.cores_per_node * topo.threads_per_core) + \ 1654 ((core_no) * topo.threads_per_core) + \ 1655 (thread_no)) 1656 1657 #define GET_CORE(core_base, core_no, node_no, pkg_no) \ 1658 ((core_base) + \ 1659 ((pkg_no) * topo.nodes_per_pkg * topo.cores_per_node) + \ 1660 ((node_no) * topo.cores_per_node) + \ 1661 (core_no)) 1662 1663 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) 1664 1665 /* 1666 * The accumulated sum of MSR is defined as a monotonic 1667 * increasing MSR, it will be accumulated periodically, 1668 * despite its register's bit width. 1669 */ 1670 enum { 1671 IDX_PKG_ENERGY, 1672 IDX_DRAM_ENERGY, 1673 IDX_PP0_ENERGY, 1674 IDX_PP1_ENERGY, 1675 IDX_PKG_PERF, 1676 IDX_DRAM_PERF, 1677 IDX_COUNT, 1678 }; 1679 1680 int get_msr_sum(int cpu, off_t offset, unsigned long long *msr); 1681 1682 struct msr_sum_array { 1683 /* get_msr_sum() = sum + (get_msr() - last) */ 1684 struct { 1685 /*The accumulated MSR value is updated by the timer */ 1686 unsigned long long sum; 1687 /*The MSR footprint recorded in last timer */ 1688 unsigned long long last; 1689 } entries[IDX_COUNT]; 1690 }; 1691 1692 /* The percpu MSR sum array.*/ 1693 struct msr_sum_array *per_cpu_msr_sum; 1694 1695 off_t idx_to_offset(int idx) 1696 { 1697 off_t offset; 1698 1699 switch (idx) { 1700 case IDX_PKG_ENERGY: 1701 if (platform->rapl_msrs & RAPL_AMD_F17H) 1702 offset = MSR_PKG_ENERGY_STAT; 1703 else 1704 offset = MSR_PKG_ENERGY_STATUS; 1705 break; 1706 case IDX_DRAM_ENERGY: 1707 offset = MSR_DRAM_ENERGY_STATUS; 1708 break; 1709 case IDX_PP0_ENERGY: 1710 offset = MSR_PP0_ENERGY_STATUS; 1711 break; 1712 case IDX_PP1_ENERGY: 1713 offset = MSR_PP1_ENERGY_STATUS; 1714 break; 1715 case IDX_PKG_PERF: 1716 offset = MSR_PKG_PERF_STATUS; 1717 break; 1718 case IDX_DRAM_PERF: 1719 offset = MSR_DRAM_PERF_STATUS; 1720 break; 1721 default: 1722 offset = -1; 1723 } 1724 return offset; 1725 } 1726 1727 int offset_to_idx(off_t offset) 1728 { 1729 int idx; 1730 1731 switch (offset) { 1732 case MSR_PKG_ENERGY_STATUS: 1733 case MSR_PKG_ENERGY_STAT: 1734 idx = IDX_PKG_ENERGY; 1735 break; 1736 case MSR_DRAM_ENERGY_STATUS: 1737 idx = IDX_DRAM_ENERGY; 1738 break; 1739 case MSR_PP0_ENERGY_STATUS: 1740 idx = IDX_PP0_ENERGY; 1741 break; 1742 case MSR_PP1_ENERGY_STATUS: 1743 idx = IDX_PP1_ENERGY; 1744 break; 1745 case MSR_PKG_PERF_STATUS: 1746 idx = IDX_PKG_PERF; 1747 break; 1748 case MSR_DRAM_PERF_STATUS: 1749 idx = IDX_DRAM_PERF; 1750 break; 1751 default: 1752 idx = -1; 1753 } 1754 return idx; 1755 } 1756 1757 int idx_valid(int idx) 1758 { 1759 switch (idx) { 1760 case IDX_PKG_ENERGY: 1761 return platform->rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H); 1762 case IDX_DRAM_ENERGY: 1763 return platform->rapl_msrs & RAPL_DRAM; 1764 case IDX_PP0_ENERGY: 1765 return platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS; 1766 case IDX_PP1_ENERGY: 1767 return platform->rapl_msrs & RAPL_GFX; 1768 case IDX_PKG_PERF: 1769 return platform->rapl_msrs & RAPL_PKG_PERF_STATUS; 1770 case IDX_DRAM_PERF: 1771 return platform->rapl_msrs & RAPL_DRAM_PERF_STATUS; 1772 default: 1773 return 0; 1774 } 1775 } 1776 1777 struct sys_counters { 1778 /* MSR added counters */ 1779 unsigned int added_thread_counters; 1780 unsigned int added_core_counters; 1781 unsigned int added_package_counters; 1782 struct msr_counter *tp; 1783 struct msr_counter *cp; 1784 struct msr_counter *pp; 1785 1786 /* perf added counters */ 1787 unsigned int added_thread_perf_counters; 1788 unsigned int added_core_perf_counters; 1789 unsigned int added_package_perf_counters; 1790 struct perf_counter_info *perf_tp; 1791 struct perf_counter_info *perf_cp; 1792 struct perf_counter_info *perf_pp; 1793 1794 struct pmt_counter *pmt_tp; 1795 struct pmt_counter *pmt_cp; 1796 struct pmt_counter *pmt_pp; 1797 } sys; 1798 1799 static size_t free_msr_counters_(struct msr_counter **pp) 1800 { 1801 struct msr_counter *p = NULL; 1802 size_t num_freed = 0; 1803 1804 while (*pp) { 1805 p = *pp; 1806 1807 if (p->msr_num != 0) { 1808 *pp = p->next; 1809 1810 free(p); 1811 ++num_freed; 1812 1813 continue; 1814 } 1815 1816 pp = &p->next; 1817 } 1818 1819 return num_freed; 1820 } 1821 1822 /* 1823 * Free all added counters accessed via msr. 1824 */ 1825 static void free_sys_msr_counters(void) 1826 { 1827 /* Thread counters */ 1828 sys.added_thread_counters -= free_msr_counters_(&sys.tp); 1829 1830 /* Core counters */ 1831 sys.added_core_counters -= free_msr_counters_(&sys.cp); 1832 1833 /* Package counters */ 1834 sys.added_package_counters -= free_msr_counters_(&sys.pp); 1835 } 1836 1837 struct system_summary { 1838 struct thread_data threads; 1839 struct core_data cores; 1840 struct pkg_data packages; 1841 } average; 1842 1843 struct cpu_topology { 1844 int physical_package_id; 1845 int die_id; 1846 int logical_cpu_id; 1847 int physical_node_id; 1848 int logical_node_id; /* 0-based count within the package */ 1849 int physical_core_id; 1850 int thread_id; 1851 cpu_set_t *put_ids; /* Processing Unit/Thread IDs */ 1852 } *cpus; 1853 1854 struct topo_params { 1855 int num_packages; 1856 int num_die; 1857 int num_cpus; 1858 int num_cores; 1859 int allowed_packages; 1860 int allowed_cpus; 1861 int allowed_cores; 1862 int max_cpu_num; 1863 int max_core_id; 1864 int max_package_id; 1865 int max_die_id; 1866 int max_node_num; 1867 int nodes_per_pkg; 1868 int cores_per_node; 1869 int threads_per_core; 1870 } topo; 1871 1872 struct timeval tv_even, tv_odd, tv_delta; 1873 1874 int *irq_column_2_cpu; /* /proc/interrupts column numbers */ 1875 int *irqs_per_cpu; /* indexed by cpu_num */ 1876 1877 void setup_all_buffers(bool startup); 1878 1879 char *sys_lpi_file; 1880 char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us"; 1881 char *sys_lpi_file_debugfs = "/sys/kernel/debug/pmc_core/slp_s0_residency_usec"; 1882 1883 int cpu_is_not_present(int cpu) 1884 { 1885 return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set); 1886 } 1887 1888 int cpu_is_not_allowed(int cpu) 1889 { 1890 return !CPU_ISSET_S(cpu, cpu_allowed_setsize, cpu_allowed_set); 1891 } 1892 1893 /* 1894 * run func(thread, core, package) in topology order 1895 * skip non-present cpus 1896 */ 1897 1898 int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pkg_data *), 1899 struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base) 1900 { 1901 int retval, pkg_no, core_no, thread_no, node_no; 1902 1903 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { 1904 for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) { 1905 for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { 1906 for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) { 1907 struct thread_data *t; 1908 struct core_data *c; 1909 struct pkg_data *p; 1910 t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no); 1911 1912 if (cpu_is_not_allowed(t->cpu_id)) 1913 continue; 1914 1915 c = GET_CORE(core_base, core_no, node_no, pkg_no); 1916 p = GET_PKG(pkg_base, pkg_no); 1917 1918 retval = func(t, c, p); 1919 if (retval) 1920 return retval; 1921 } 1922 } 1923 } 1924 } 1925 return 0; 1926 } 1927 1928 int is_cpu_first_thread_in_core(struct thread_data *t, struct core_data *c, struct pkg_data *p) 1929 { 1930 UNUSED(p); 1931 1932 return ((int)t->cpu_id == c->base_cpu || c->base_cpu < 0); 1933 } 1934 1935 int is_cpu_first_core_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p) 1936 { 1937 UNUSED(c); 1938 1939 return ((int)t->cpu_id == p->base_cpu || p->base_cpu < 0); 1940 } 1941 1942 int is_cpu_first_thread_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p) 1943 { 1944 return is_cpu_first_thread_in_core(t, c, p) && is_cpu_first_core_in_package(t, c, p); 1945 } 1946 1947 int cpu_migrate(int cpu) 1948 { 1949 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); 1950 CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set); 1951 if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1) 1952 return -1; 1953 else 1954 return 0; 1955 } 1956 1957 int get_msr_fd(int cpu) 1958 { 1959 char pathname[32]; 1960 int fd; 1961 1962 fd = fd_percpu[cpu]; 1963 1964 if (fd) 1965 return fd; 1966 1967 sprintf(pathname, "/dev/cpu/%d/msr", cpu); 1968 fd = open(pathname, O_RDONLY); 1969 if (fd < 0) 1970 err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, " 1971 "or run with --no-msr, or run as root", pathname); 1972 1973 fd_percpu[cpu] = fd; 1974 1975 return fd; 1976 } 1977 1978 static void bic_disable_msr_access(void) 1979 { 1980 const unsigned long bic_msrs = BIC_Mod_c6 | BIC_CoreTmp | 1981 BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_PkgTmp; 1982 1983 bic_enabled &= ~bic_msrs; 1984 1985 free_sys_msr_counters(); 1986 } 1987 1988 static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) 1989 { 1990 assert(!no_perf); 1991 1992 return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags); 1993 } 1994 1995 static long open_perf_counter(int cpu, unsigned int type, unsigned int config, int group_fd, __u64 read_format) 1996 { 1997 struct perf_event_attr attr; 1998 const pid_t pid = -1; 1999 const unsigned long flags = 0; 2000 2001 assert(!no_perf); 2002 2003 memset(&attr, 0, sizeof(struct perf_event_attr)); 2004 2005 attr.type = type; 2006 attr.size = sizeof(struct perf_event_attr); 2007 attr.config = config; 2008 attr.disabled = 0; 2009 attr.sample_type = PERF_SAMPLE_IDENTIFIER; 2010 attr.read_format = read_format; 2011 2012 const int fd = perf_event_open(&attr, pid, cpu, group_fd, flags); 2013 2014 return fd; 2015 } 2016 2017 int get_instr_count_fd(int cpu) 2018 { 2019 if (fd_instr_count_percpu[cpu]) 2020 return fd_instr_count_percpu[cpu]; 2021 2022 fd_instr_count_percpu[cpu] = open_perf_counter(cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0); 2023 2024 return fd_instr_count_percpu[cpu]; 2025 } 2026 2027 int get_msr(int cpu, off_t offset, unsigned long long *msr) 2028 { 2029 ssize_t retval; 2030 2031 assert(!no_msr); 2032 2033 retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset); 2034 2035 if (retval != sizeof *msr) 2036 err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset); 2037 2038 return 0; 2039 } 2040 2041 int probe_msr(int cpu, off_t offset) 2042 { 2043 ssize_t retval; 2044 unsigned long long dummy; 2045 2046 assert(!no_msr); 2047 2048 retval = pread(get_msr_fd(cpu), &dummy, sizeof(dummy), offset); 2049 2050 if (retval != sizeof(dummy)) 2051 return 1; 2052 2053 return 0; 2054 } 2055 2056 /* Convert CPU ID to domain ID for given added perf counter. */ 2057 unsigned int cpu_to_domain(const struct perf_counter_info *pc, int cpu) 2058 { 2059 switch (pc->scope) { 2060 case SCOPE_CPU: 2061 return cpu; 2062 2063 case SCOPE_CORE: 2064 return cpus[cpu].physical_core_id; 2065 2066 case SCOPE_PACKAGE: 2067 return cpus[cpu].physical_package_id; 2068 } 2069 2070 __builtin_unreachable(); 2071 } 2072 2073 #define MAX_DEFERRED 16 2074 char *deferred_add_names[MAX_DEFERRED]; 2075 char *deferred_skip_names[MAX_DEFERRED]; 2076 int deferred_add_index; 2077 int deferred_skip_index; 2078 2079 /* 2080 * HIDE_LIST - hide this list of counters, show the rest [default] 2081 * SHOW_LIST - show this list of counters, hide the rest 2082 */ 2083 enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST; 2084 2085 void help(void) 2086 { 2087 fprintf(outf, 2088 "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n" 2089 "\n" 2090 "Turbostat forks the specified COMMAND and prints statistics\n" 2091 "when COMMAND completes.\n" 2092 "If no COMMAND is specified, turbostat wakes every 5-seconds\n" 2093 "to print statistics, until interrupted.\n" 2094 " -a, --add add a counter\n" 2095 " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n" 2096 " eg. --add perf/cstate_pkg/c2-residency,package,delta,percent,perfPC2\n" 2097 " eg. --add pmt,name=XTAL,type=raw,domain=package0,offset=0,lsb=0,msb=63,guid=0x1a067102\n" 2098 " -c, --cpu cpu-set limit output to summary plus cpu-set:\n" 2099 " {core | package | j,k,l..m,n-p }\n" 2100 " -d, --debug displays usec, Time_Of_Day_Seconds and more debugging\n" 2101 " debug messages are printed to stderr\n" 2102 " -D, --Dump displays the raw counter values\n" 2103 " -e, --enable [all | column]\n" 2104 " shows all or the specified disabled column\n" 2105 " -H, --hide [column|column,column,...]\n" 2106 " hide the specified column(s)\n" 2107 " -i, --interval sec.subsec\n" 2108 " Override default 5-second measurement interval\n" 2109 " -J, --Joules displays energy in Joules instead of Watts\n" 2110 " -l, --list list column headers only\n" 2111 " -M, --no-msr Disable all uses of the MSR driver\n" 2112 " -P, --no-perf Disable all uses of the perf API\n" 2113 " -n, --num_iterations num\n" 2114 " number of the measurement iterations\n" 2115 " -N, --header_iterations num\n" 2116 " print header every num iterations\n" 2117 " -o, --out file\n" 2118 " create or truncate \"file\" for all output\n" 2119 " -q, --quiet skip decoding system configuration header\n" 2120 " -s, --show [column|column,column,...]\n" 2121 " show only the specified column(s)\n" 2122 " -S, --Summary\n" 2123 " limits output to 1-line system summary per interval\n" 2124 " -T, --TCC temperature\n" 2125 " sets the Thermal Control Circuit temperature in\n" 2126 " degrees Celsius\n" 2127 " -h, --help print this help message\n" 2128 " -v, --version print version information\n" "\n" "For more help, run \"man turbostat\"\n"); 2129 } 2130 2131 /* 2132 * bic_lookup 2133 * for all the strings in comma separate name_list, 2134 * set the approprate bit in return value. 2135 */ 2136 unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) 2137 { 2138 unsigned int i; 2139 unsigned long long retval = 0; 2140 2141 while (name_list) { 2142 char *comma; 2143 2144 comma = strchr(name_list, ','); 2145 2146 if (comma) 2147 *comma = '\0'; 2148 2149 for (i = 0; i < MAX_BIC; ++i) { 2150 if (!strcmp(name_list, bic[i].name)) { 2151 retval |= (1ULL << i); 2152 break; 2153 } 2154 if (!strcmp(name_list, "all")) { 2155 retval |= ~0; 2156 break; 2157 } else if (!strcmp(name_list, "topology")) { 2158 retval |= BIC_TOPOLOGY; 2159 break; 2160 } else if (!strcmp(name_list, "power")) { 2161 retval |= BIC_THERMAL_PWR; 2162 break; 2163 } else if (!strcmp(name_list, "idle")) { 2164 retval |= BIC_IDLE; 2165 break; 2166 } else if (!strcmp(name_list, "frequency")) { 2167 retval |= BIC_FREQUENCY; 2168 break; 2169 } else if (!strcmp(name_list, "other")) { 2170 retval |= BIC_OTHER; 2171 break; 2172 } 2173 2174 } 2175 if (i == MAX_BIC) { 2176 if (mode == SHOW_LIST) { 2177 deferred_add_names[deferred_add_index++] = name_list; 2178 if (deferred_add_index >= MAX_DEFERRED) { 2179 fprintf(stderr, "More than max %d un-recognized --add options '%s'\n", 2180 MAX_DEFERRED, name_list); 2181 help(); 2182 exit(1); 2183 } 2184 } else { 2185 deferred_skip_names[deferred_skip_index++] = name_list; 2186 if (debug) 2187 fprintf(stderr, "deferred \"%s\"\n", name_list); 2188 if (deferred_skip_index >= MAX_DEFERRED) { 2189 fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n", 2190 MAX_DEFERRED, name_list); 2191 help(); 2192 exit(1); 2193 } 2194 } 2195 } 2196 2197 name_list = comma; 2198 if (name_list) 2199 name_list++; 2200 2201 } 2202 return retval; 2203 } 2204 2205 void print_header(char *delim) 2206 { 2207 struct msr_counter *mp; 2208 struct perf_counter_info *pp; 2209 struct pmt_counter *ppmt; 2210 int printed = 0; 2211 2212 if (DO_BIC(BIC_USEC)) 2213 outp += sprintf(outp, "%susec", (printed++ ? delim : "")); 2214 if (DO_BIC(BIC_TOD)) 2215 outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : "")); 2216 if (DO_BIC(BIC_Package)) 2217 outp += sprintf(outp, "%sPackage", (printed++ ? delim : "")); 2218 if (DO_BIC(BIC_Die)) 2219 outp += sprintf(outp, "%sDie", (printed++ ? delim : "")); 2220 if (DO_BIC(BIC_Node)) 2221 outp += sprintf(outp, "%sNode", (printed++ ? delim : "")); 2222 if (DO_BIC(BIC_Core)) 2223 outp += sprintf(outp, "%sCore", (printed++ ? delim : "")); 2224 if (DO_BIC(BIC_CPU)) 2225 outp += sprintf(outp, "%sCPU", (printed++ ? delim : "")); 2226 if (DO_BIC(BIC_APIC)) 2227 outp += sprintf(outp, "%sAPIC", (printed++ ? delim : "")); 2228 if (DO_BIC(BIC_X2APIC)) 2229 outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : "")); 2230 if (DO_BIC(BIC_Avg_MHz)) 2231 outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : "")); 2232 if (DO_BIC(BIC_Busy)) 2233 outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : "")); 2234 if (DO_BIC(BIC_Bzy_MHz)) 2235 outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : "")); 2236 if (DO_BIC(BIC_TSC_MHz)) 2237 outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : "")); 2238 2239 if (DO_BIC(BIC_IPC)) 2240 outp += sprintf(outp, "%sIPC", (printed++ ? delim : "")); 2241 2242 if (DO_BIC(BIC_IRQ)) { 2243 if (sums_need_wide_columns) 2244 outp += sprintf(outp, "%s IRQ", (printed++ ? delim : "")); 2245 else 2246 outp += sprintf(outp, "%sIRQ", (printed++ ? delim : "")); 2247 } 2248 2249 if (DO_BIC(BIC_SMI)) 2250 outp += sprintf(outp, "%sSMI", (printed++ ? delim : "")); 2251 2252 for (mp = sys.tp; mp; mp = mp->next) { 2253 2254 if (mp->format == FORMAT_RAW) { 2255 if (mp->width == 64) 2256 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name); 2257 else 2258 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name); 2259 } else { 2260 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2261 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name); 2262 else 2263 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name); 2264 } 2265 } 2266 2267 for (pp = sys.perf_tp; pp; pp = pp->next) { 2268 2269 if (pp->format == FORMAT_RAW) { 2270 if (pp->width == 64) 2271 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); 2272 else 2273 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); 2274 } else { 2275 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2276 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); 2277 else 2278 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); 2279 } 2280 } 2281 2282 ppmt = sys.pmt_tp; 2283 while (ppmt) { 2284 switch (ppmt->type) { 2285 case PMT_TYPE_RAW: 2286 if (pmt_counter_get_width(ppmt) <= 32) 2287 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); 2288 else 2289 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); 2290 2291 break; 2292 2293 case PMT_TYPE_XTAL_TIME: 2294 outp += sprintf(outp, "%s%s", delim, ppmt->name); 2295 break; 2296 } 2297 2298 ppmt = ppmt->next; 2299 } 2300 2301 if (DO_BIC(BIC_CPU_c1)) 2302 outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : "")); 2303 if (DO_BIC(BIC_CPU_c3)) 2304 outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : "")); 2305 if (DO_BIC(BIC_CPU_c6)) 2306 outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : "")); 2307 if (DO_BIC(BIC_CPU_c7)) 2308 outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : "")); 2309 2310 if (DO_BIC(BIC_Mod_c6)) 2311 outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : "")); 2312 2313 if (DO_BIC(BIC_CoreTmp)) 2314 outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : "")); 2315 2316 if (DO_BIC(BIC_CORE_THROT_CNT)) 2317 outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : "")); 2318 2319 if (platform->rapl_msrs && !rapl_joules) { 2320 if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl) 2321 outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : "")); 2322 } else if (platform->rapl_msrs && rapl_joules) { 2323 if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl) 2324 outp += sprintf(outp, "%sCor_J", (printed++ ? delim : "")); 2325 } 2326 2327 for (mp = sys.cp; mp; mp = mp->next) { 2328 if (mp->format == FORMAT_RAW) { 2329 if (mp->width == 64) 2330 outp += sprintf(outp, "%s%18.18s", delim, mp->name); 2331 else 2332 outp += sprintf(outp, "%s%10.10s", delim, mp->name); 2333 } else { 2334 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2335 outp += sprintf(outp, "%s%8s", delim, mp->name); 2336 else 2337 outp += sprintf(outp, "%s%s", delim, mp->name); 2338 } 2339 } 2340 2341 for (pp = sys.perf_cp; pp; pp = pp->next) { 2342 2343 if (pp->format == FORMAT_RAW) { 2344 if (pp->width == 64) 2345 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); 2346 else 2347 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); 2348 } else { 2349 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2350 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); 2351 else 2352 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); 2353 } 2354 } 2355 2356 ppmt = sys.pmt_cp; 2357 while (ppmt) { 2358 switch (ppmt->type) { 2359 case PMT_TYPE_RAW: 2360 if (pmt_counter_get_width(ppmt) <= 32) 2361 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); 2362 else 2363 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); 2364 2365 break; 2366 2367 case PMT_TYPE_XTAL_TIME: 2368 outp += sprintf(outp, "%s%s", delim, ppmt->name); 2369 break; 2370 } 2371 2372 ppmt = ppmt->next; 2373 } 2374 2375 if (DO_BIC(BIC_PkgTmp)) 2376 outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : "")); 2377 2378 if (DO_BIC(BIC_GFX_rc6)) 2379 outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : "")); 2380 2381 if (DO_BIC(BIC_GFXMHz)) 2382 outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : "")); 2383 2384 if (DO_BIC(BIC_GFXACTMHz)) 2385 outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : "")); 2386 2387 if (DO_BIC(BIC_SAM_mc6)) 2388 outp += sprintf(outp, "%sSAM%%mc6", (printed++ ? delim : "")); 2389 2390 if (DO_BIC(BIC_SAMMHz)) 2391 outp += sprintf(outp, "%sSAMMHz", (printed++ ? delim : "")); 2392 2393 if (DO_BIC(BIC_SAMACTMHz)) 2394 outp += sprintf(outp, "%sSAMAMHz", (printed++ ? delim : "")); 2395 2396 if (DO_BIC(BIC_Totl_c0)) 2397 outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : "")); 2398 if (DO_BIC(BIC_Any_c0)) 2399 outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : "")); 2400 if (DO_BIC(BIC_GFX_c0)) 2401 outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : "")); 2402 if (DO_BIC(BIC_CPUGFX)) 2403 outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : "")); 2404 2405 if (DO_BIC(BIC_Pkgpc2)) 2406 outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : "")); 2407 if (DO_BIC(BIC_Pkgpc3)) 2408 outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : "")); 2409 if (DO_BIC(BIC_Pkgpc6)) 2410 outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : "")); 2411 if (DO_BIC(BIC_Pkgpc7)) 2412 outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : "")); 2413 if (DO_BIC(BIC_Pkgpc8)) 2414 outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : "")); 2415 if (DO_BIC(BIC_Pkgpc9)) 2416 outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : "")); 2417 if (DO_BIC(BIC_Pkgpc10)) 2418 outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : "")); 2419 if (DO_BIC(BIC_Diec6)) 2420 outp += sprintf(outp, "%sDie%%c6", (printed++ ? delim : "")); 2421 if (DO_BIC(BIC_CPU_LPI)) 2422 outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : "")); 2423 if (DO_BIC(BIC_SYS_LPI)) 2424 outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : "")); 2425 2426 if (platform->rapl_msrs && !rapl_joules) { 2427 if (DO_BIC(BIC_PkgWatt)) 2428 outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : "")); 2429 if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl) 2430 outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : "")); 2431 if (DO_BIC(BIC_GFXWatt)) 2432 outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : "")); 2433 if (DO_BIC(BIC_RAMWatt)) 2434 outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : "")); 2435 if (DO_BIC(BIC_PKG__)) 2436 outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : "")); 2437 if (DO_BIC(BIC_RAM__)) 2438 outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : "")); 2439 } else if (platform->rapl_msrs && rapl_joules) { 2440 if (DO_BIC(BIC_Pkg_J)) 2441 outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : "")); 2442 if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl) 2443 outp += sprintf(outp, "%sCor_J", (printed++ ? delim : "")); 2444 if (DO_BIC(BIC_GFX_J)) 2445 outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : "")); 2446 if (DO_BIC(BIC_RAM_J)) 2447 outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : "")); 2448 if (DO_BIC(BIC_PKG__)) 2449 outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : "")); 2450 if (DO_BIC(BIC_RAM__)) 2451 outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : "")); 2452 } 2453 if (DO_BIC(BIC_UNCORE_MHZ)) 2454 outp += sprintf(outp, "%sUncMHz", (printed++ ? delim : "")); 2455 2456 for (mp = sys.pp; mp; mp = mp->next) { 2457 if (mp->format == FORMAT_RAW) { 2458 if (mp->width == 64) 2459 outp += sprintf(outp, "%s%18.18s", delim, mp->name); 2460 else if (mp->width == 32) 2461 outp += sprintf(outp, "%s%10.10s", delim, mp->name); 2462 else 2463 outp += sprintf(outp, "%s%7.7s", delim, mp->name); 2464 } else { 2465 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2466 outp += sprintf(outp, "%s%8s", delim, mp->name); 2467 else 2468 outp += sprintf(outp, "%s%7.7s", delim, mp->name); 2469 } 2470 } 2471 2472 for (pp = sys.perf_pp; pp; pp = pp->next) { 2473 2474 if (pp->format == FORMAT_RAW) { 2475 if (pp->width == 64) 2476 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); 2477 else 2478 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); 2479 } else { 2480 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2481 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); 2482 else 2483 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); 2484 } 2485 } 2486 2487 ppmt = sys.pmt_pp; 2488 while (ppmt) { 2489 switch (ppmt->type) { 2490 case PMT_TYPE_RAW: 2491 if (pmt_counter_get_width(ppmt) <= 32) 2492 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); 2493 else 2494 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); 2495 2496 break; 2497 2498 case PMT_TYPE_XTAL_TIME: 2499 outp += sprintf(outp, "%s%s", delim, ppmt->name); 2500 break; 2501 } 2502 2503 ppmt = ppmt->next; 2504 } 2505 2506 outp += sprintf(outp, "\n"); 2507 } 2508 2509 int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 2510 { 2511 int i; 2512 struct msr_counter *mp; 2513 2514 outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p); 2515 2516 if (t) { 2517 outp += sprintf(outp, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags); 2518 outp += sprintf(outp, "TSC: %016llX\n", t->tsc); 2519 outp += sprintf(outp, "aperf: %016llX\n", t->aperf); 2520 outp += sprintf(outp, "mperf: %016llX\n", t->mperf); 2521 outp += sprintf(outp, "c1: %016llX\n", t->c1); 2522 2523 if (DO_BIC(BIC_IPC)) 2524 outp += sprintf(outp, "IPC: %lld\n", t->instr_count); 2525 2526 if (DO_BIC(BIC_IRQ)) 2527 outp += sprintf(outp, "IRQ: %lld\n", t->irq_count); 2528 if (DO_BIC(BIC_SMI)) 2529 outp += sprintf(outp, "SMI: %d\n", t->smi_count); 2530 2531 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 2532 outp += 2533 sprintf(outp, "tADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, 2534 t->counter[i], mp->sp->path); 2535 } 2536 } 2537 2538 if (c && is_cpu_first_thread_in_core(t, c, p)) { 2539 outp += sprintf(outp, "core: %d\n", c->core_id); 2540 outp += sprintf(outp, "c3: %016llX\n", c->c3); 2541 outp += sprintf(outp, "c6: %016llX\n", c->c6); 2542 outp += sprintf(outp, "c7: %016llX\n", c->c7); 2543 outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c); 2544 outp += sprintf(outp, "cpu_throt_count: %016llX\n", c->core_throt_cnt); 2545 2546 const unsigned long long energy_value = c->core_energy.raw_value * c->core_energy.scale; 2547 const double energy_scale = c->core_energy.scale; 2548 2549 if (c->core_energy.unit == RAPL_UNIT_JOULES) 2550 outp += sprintf(outp, "Joules: %0llX (scale: %lf)\n", energy_value, energy_scale); 2551 2552 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 2553 outp += 2554 sprintf(outp, "cADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, 2555 c->counter[i], mp->sp->path); 2556 } 2557 outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us); 2558 } 2559 2560 if (p && is_cpu_first_core_in_package(t, c, p)) { 2561 outp += sprintf(outp, "package: %d\n", p->package_id); 2562 2563 outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0); 2564 outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0); 2565 outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0); 2566 outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0); 2567 2568 outp += sprintf(outp, "pc2: %016llX\n", p->pc2); 2569 if (DO_BIC(BIC_Pkgpc3)) 2570 outp += sprintf(outp, "pc3: %016llX\n", p->pc3); 2571 if (DO_BIC(BIC_Pkgpc6)) 2572 outp += sprintf(outp, "pc6: %016llX\n", p->pc6); 2573 if (DO_BIC(BIC_Pkgpc7)) 2574 outp += sprintf(outp, "pc7: %016llX\n", p->pc7); 2575 outp += sprintf(outp, "pc8: %016llX\n", p->pc8); 2576 outp += sprintf(outp, "pc9: %016llX\n", p->pc9); 2577 outp += sprintf(outp, "pc10: %016llX\n", p->pc10); 2578 outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi); 2579 outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi); 2580 outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg.raw_value); 2581 outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores.raw_value); 2582 outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx.raw_value); 2583 outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram.raw_value); 2584 outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status.raw_value); 2585 outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status.raw_value); 2586 outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c); 2587 2588 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 2589 outp += 2590 sprintf(outp, "pADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, 2591 p->counter[i], mp->sp->path); 2592 } 2593 } 2594 2595 outp += sprintf(outp, "\n"); 2596 2597 return 0; 2598 } 2599 2600 double rapl_counter_get_value(const struct rapl_counter *c, enum rapl_unit desired_unit, double interval) 2601 { 2602 assert(desired_unit != RAPL_UNIT_INVALID); 2603 2604 /* 2605 * For now we don't expect anything other than joules, 2606 * so just simplify the logic. 2607 */ 2608 assert(c->unit == RAPL_UNIT_JOULES); 2609 2610 const double scaled = c->raw_value * c->scale; 2611 2612 if (desired_unit == RAPL_UNIT_WATTS) 2613 return scaled / interval; 2614 return scaled; 2615 } 2616 2617 /* 2618 * column formatting convention & formats 2619 */ 2620 int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 2621 { 2622 double interval_float, tsc; 2623 char *fmt8; 2624 int i; 2625 struct msr_counter *mp; 2626 struct perf_counter_info *pp; 2627 struct pmt_counter *ppmt; 2628 char *delim = "\t"; 2629 int printed = 0; 2630 2631 /* if showing only 1st thread in core and this isn't one, bail out */ 2632 if (show_core_only && !is_cpu_first_thread_in_core(t, c, p)) 2633 return 0; 2634 2635 /* if showing only 1st thread in pkg and this isn't one, bail out */ 2636 if (show_pkg_only && !is_cpu_first_core_in_package(t, c, p)) 2637 return 0; 2638 2639 /*if not summary line and --cpu is used */ 2640 if ((t != &average.threads) && (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset))) 2641 return 0; 2642 2643 if (DO_BIC(BIC_USEC)) { 2644 /* on each row, print how many usec each timestamp took to gather */ 2645 struct timeval tv; 2646 2647 timersub(&t->tv_end, &t->tv_begin, &tv); 2648 outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec); 2649 } 2650 2651 /* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */ 2652 if (DO_BIC(BIC_TOD)) 2653 outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec); 2654 2655 interval_float = t->tv_delta.tv_sec + t->tv_delta.tv_usec / 1000000.0; 2656 2657 tsc = t->tsc * tsc_tweak; 2658 2659 /* topo columns, print blanks on 1st (average) line */ 2660 if (t == &average.threads) { 2661 if (DO_BIC(BIC_Package)) 2662 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2663 if (DO_BIC(BIC_Die)) 2664 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2665 if (DO_BIC(BIC_Node)) 2666 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2667 if (DO_BIC(BIC_Core)) 2668 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2669 if (DO_BIC(BIC_CPU)) 2670 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2671 if (DO_BIC(BIC_APIC)) 2672 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2673 if (DO_BIC(BIC_X2APIC)) 2674 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2675 } else { 2676 if (DO_BIC(BIC_Package)) { 2677 if (p) 2678 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id); 2679 else 2680 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2681 } 2682 if (DO_BIC(BIC_Die)) { 2683 if (c) 2684 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].die_id); 2685 else 2686 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2687 } 2688 if (DO_BIC(BIC_Node)) { 2689 if (t) 2690 outp += sprintf(outp, "%s%d", 2691 (printed++ ? delim : ""), cpus[t->cpu_id].physical_node_id); 2692 else 2693 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2694 } 2695 if (DO_BIC(BIC_Core)) { 2696 if (c) 2697 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id); 2698 else 2699 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2700 } 2701 if (DO_BIC(BIC_CPU)) 2702 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id); 2703 if (DO_BIC(BIC_APIC)) 2704 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id); 2705 if (DO_BIC(BIC_X2APIC)) 2706 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id); 2707 } 2708 2709 if (DO_BIC(BIC_Avg_MHz)) 2710 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 / units * t->aperf / interval_float); 2711 2712 if (DO_BIC(BIC_Busy)) 2713 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf / tsc); 2714 2715 if (DO_BIC(BIC_Bzy_MHz)) { 2716 if (has_base_hz) 2717 outp += 2718 sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf); 2719 else 2720 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 2721 tsc / units * t->aperf / t->mperf / interval_float); 2722 } 2723 2724 if (DO_BIC(BIC_TSC_MHz)) 2725 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc / units / interval_float); 2726 2727 if (DO_BIC(BIC_IPC)) 2728 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 1.0 * t->instr_count / t->aperf); 2729 2730 /* IRQ */ 2731 if (DO_BIC(BIC_IRQ)) { 2732 if (sums_need_wide_columns) 2733 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count); 2734 else 2735 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count); 2736 } 2737 2738 /* SMI */ 2739 if (DO_BIC(BIC_SMI)) 2740 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count); 2741 2742 /* Added counters */ 2743 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 2744 if (mp->format == FORMAT_RAW) { 2745 if (mp->width == 32) 2746 outp += 2747 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)t->counter[i]); 2748 else 2749 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]); 2750 } else if (mp->format == FORMAT_DELTA) { 2751 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2752 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]); 2753 else 2754 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]); 2755 } else if (mp->format == FORMAT_PERCENT) { 2756 if (mp->type == COUNTER_USEC) 2757 outp += 2758 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 2759 t->counter[i] / interval_float / 10000); 2760 else 2761 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i] / tsc); 2762 } 2763 } 2764 2765 /* Added perf counters */ 2766 for (i = 0, pp = sys.perf_tp; pp; ++i, pp = pp->next) { 2767 if (pp->format == FORMAT_RAW) { 2768 if (pp->width == 32) 2769 outp += 2770 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), 2771 (unsigned int)t->perf_counter[i]); 2772 else 2773 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->perf_counter[i]); 2774 } else if (pp->format == FORMAT_DELTA) { 2775 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2776 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->perf_counter[i]); 2777 else 2778 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->perf_counter[i]); 2779 } else if (pp->format == FORMAT_PERCENT) { 2780 if (pp->type == COUNTER_USEC) 2781 outp += 2782 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 2783 t->perf_counter[i] / interval_float / 10000); 2784 else 2785 outp += 2786 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->perf_counter[i] / tsc); 2787 } 2788 } 2789 2790 for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { 2791 switch (ppmt->type) { 2792 case PMT_TYPE_RAW: 2793 if (pmt_counter_get_width(ppmt) <= 32) 2794 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), 2795 (unsigned int)t->pmt_counter[i]); 2796 else 2797 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->pmt_counter[i]); 2798 2799 break; 2800 2801 case PMT_TYPE_XTAL_TIME: 2802 const unsigned long value_raw = t->pmt_counter[i]; 2803 const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; 2804 2805 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); 2806 break; 2807 } 2808 } 2809 2810 /* C1 */ 2811 if (DO_BIC(BIC_CPU_c1)) 2812 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc); 2813 2814 /* print per-core data only for 1st thread in core */ 2815 if (!is_cpu_first_thread_in_core(t, c, p)) 2816 goto done; 2817 2818 if (DO_BIC(BIC_CPU_c3)) 2819 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3 / tsc); 2820 if (DO_BIC(BIC_CPU_c6)) 2821 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6 / tsc); 2822 if (DO_BIC(BIC_CPU_c7)) 2823 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7 / tsc); 2824 2825 /* Mod%c6 */ 2826 if (DO_BIC(BIC_Mod_c6)) 2827 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc); 2828 2829 if (DO_BIC(BIC_CoreTmp)) 2830 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c); 2831 2832 /* Core throttle count */ 2833 if (DO_BIC(BIC_CORE_THROT_CNT)) 2834 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->core_throt_cnt); 2835 2836 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 2837 if (mp->format == FORMAT_RAW) { 2838 if (mp->width == 32) 2839 outp += 2840 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)c->counter[i]); 2841 else 2842 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]); 2843 } else if (mp->format == FORMAT_DELTA) { 2844 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2845 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]); 2846 else 2847 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]); 2848 } else if (mp->format == FORMAT_PERCENT) { 2849 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i] / tsc); 2850 } 2851 } 2852 2853 for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { 2854 if (pp->format == FORMAT_RAW) { 2855 if (pp->width == 32) 2856 outp += 2857 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), 2858 (unsigned int)c->perf_counter[i]); 2859 else 2860 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->perf_counter[i]); 2861 } else if (pp->format == FORMAT_DELTA) { 2862 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2863 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->perf_counter[i]); 2864 else 2865 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->perf_counter[i]); 2866 } else if (pp->format == FORMAT_PERCENT) { 2867 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->perf_counter[i] / tsc); 2868 } 2869 } 2870 2871 for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { 2872 switch (ppmt->type) { 2873 case PMT_TYPE_RAW: 2874 if (pmt_counter_get_width(ppmt) <= 32) 2875 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), 2876 (unsigned int)c->pmt_counter[i]); 2877 else 2878 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->pmt_counter[i]); 2879 2880 break; 2881 2882 case PMT_TYPE_XTAL_TIME: 2883 const unsigned long value_raw = c->pmt_counter[i]; 2884 const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; 2885 2886 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); 2887 break; 2888 } 2889 } 2890 2891 fmt8 = "%s%.2f"; 2892 2893 if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl) 2894 outp += 2895 sprintf(outp, fmt8, (printed++ ? delim : ""), 2896 rapl_counter_get_value(&c->core_energy, RAPL_UNIT_WATTS, interval_float)); 2897 if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl) 2898 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 2899 rapl_counter_get_value(&c->core_energy, RAPL_UNIT_JOULES, interval_float)); 2900 2901 /* print per-package data only for 1st core in package */ 2902 if (!is_cpu_first_core_in_package(t, c, p)) 2903 goto done; 2904 2905 /* PkgTmp */ 2906 if (DO_BIC(BIC_PkgTmp)) 2907 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c); 2908 2909 /* GFXrc6 */ 2910 if (DO_BIC(BIC_GFX_rc6)) { 2911 if (p->gfx_rc6_ms == -1) { /* detect GFX counter reset */ 2912 outp += sprintf(outp, "%s**.**", (printed++ ? delim : "")); 2913 } else { 2914 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 2915 p->gfx_rc6_ms / 10.0 / interval_float); 2916 } 2917 } 2918 2919 /* GFXMHz */ 2920 if (DO_BIC(BIC_GFXMHz)) 2921 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz); 2922 2923 /* GFXACTMHz */ 2924 if (DO_BIC(BIC_GFXACTMHz)) 2925 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz); 2926 2927 /* SAMmc6 */ 2928 if (DO_BIC(BIC_SAM_mc6)) { 2929 if (p->sam_mc6_ms == -1) { /* detect GFX counter reset */ 2930 outp += sprintf(outp, "%s**.**", (printed++ ? delim : "")); 2931 } else { 2932 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 2933 p->sam_mc6_ms / 10.0 / interval_float); 2934 } 2935 } 2936 2937 /* SAMMHz */ 2938 if (DO_BIC(BIC_SAMMHz)) 2939 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_mhz); 2940 2941 /* SAMACTMHz */ 2942 if (DO_BIC(BIC_SAMACTMHz)) 2943 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_act_mhz); 2944 2945 /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ 2946 if (DO_BIC(BIC_Totl_c0)) 2947 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0 / tsc); 2948 if (DO_BIC(BIC_Any_c0)) 2949 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0 / tsc); 2950 if (DO_BIC(BIC_GFX_c0)) 2951 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0 / tsc); 2952 if (DO_BIC(BIC_CPUGFX)) 2953 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0 / tsc); 2954 2955 if (DO_BIC(BIC_Pkgpc2)) 2956 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2 / tsc); 2957 if (DO_BIC(BIC_Pkgpc3)) 2958 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3 / tsc); 2959 if (DO_BIC(BIC_Pkgpc6)) 2960 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6 / tsc); 2961 if (DO_BIC(BIC_Pkgpc7)) 2962 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7 / tsc); 2963 if (DO_BIC(BIC_Pkgpc8)) 2964 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8 / tsc); 2965 if (DO_BIC(BIC_Pkgpc9)) 2966 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9 / tsc); 2967 if (DO_BIC(BIC_Pkgpc10)) 2968 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc); 2969 2970 if (DO_BIC(BIC_Diec6)) 2971 outp += 2972 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->die_c6 / crystal_hz / interval_float); 2973 2974 if (DO_BIC(BIC_CPU_LPI)) { 2975 if (p->cpu_lpi >= 0) 2976 outp += 2977 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 2978 100.0 * p->cpu_lpi / 1000000.0 / interval_float); 2979 else 2980 outp += sprintf(outp, "%s(neg)", (printed++ ? delim : "")); 2981 } 2982 if (DO_BIC(BIC_SYS_LPI)) { 2983 if (p->sys_lpi >= 0) 2984 outp += 2985 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 2986 100.0 * p->sys_lpi / 1000000.0 / interval_float); 2987 else 2988 outp += sprintf(outp, "%s(neg)", (printed++ ? delim : "")); 2989 } 2990 2991 if (DO_BIC(BIC_PkgWatt)) 2992 outp += 2993 sprintf(outp, fmt8, (printed++ ? delim : ""), 2994 rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_WATTS, interval_float)); 2995 if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl) 2996 outp += 2997 sprintf(outp, fmt8, (printed++ ? delim : ""), 2998 rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_WATTS, interval_float)); 2999 if (DO_BIC(BIC_GFXWatt)) 3000 outp += 3001 sprintf(outp, fmt8, (printed++ ? delim : ""), 3002 rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_WATTS, interval_float)); 3003 if (DO_BIC(BIC_RAMWatt)) 3004 outp += 3005 sprintf(outp, fmt8, (printed++ ? delim : ""), 3006 rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_WATTS, interval_float)); 3007 if (DO_BIC(BIC_Pkg_J)) 3008 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 3009 rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_JOULES, interval_float)); 3010 if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl) 3011 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 3012 rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_JOULES, interval_float)); 3013 if (DO_BIC(BIC_GFX_J)) 3014 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 3015 rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_JOULES, interval_float)); 3016 if (DO_BIC(BIC_RAM_J)) 3017 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 3018 rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_JOULES, interval_float)); 3019 if (DO_BIC(BIC_PKG__)) 3020 outp += 3021 sprintf(outp, fmt8, (printed++ ? delim : ""), 3022 rapl_counter_get_value(&p->rapl_pkg_perf_status, RAPL_UNIT_WATTS, interval_float)); 3023 if (DO_BIC(BIC_RAM__)) 3024 outp += 3025 sprintf(outp, fmt8, (printed++ ? delim : ""), 3026 rapl_counter_get_value(&p->rapl_dram_perf_status, RAPL_UNIT_WATTS, interval_float)); 3027 /* UncMHz */ 3028 if (DO_BIC(BIC_UNCORE_MHZ)) 3029 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->uncore_mhz); 3030 3031 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 3032 if (mp->format == FORMAT_RAW) { 3033 if (mp->width == 32) 3034 outp += 3035 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)p->counter[i]); 3036 else 3037 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]); 3038 } else if (mp->format == FORMAT_DELTA) { 3039 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 3040 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]); 3041 else 3042 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]); 3043 } else if (mp->format == FORMAT_PERCENT) { 3044 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i] / tsc); 3045 } else if (mp->type == COUNTER_K2M) 3046 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->counter[i] / 1000); 3047 } 3048 3049 for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { 3050 if (pp->format == FORMAT_RAW) { 3051 if (pp->width == 32) 3052 outp += 3053 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), 3054 (unsigned int)p->perf_counter[i]); 3055 else 3056 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->perf_counter[i]); 3057 } else if (pp->format == FORMAT_DELTA) { 3058 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) 3059 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->perf_counter[i]); 3060 else 3061 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->perf_counter[i]); 3062 } else if (pp->format == FORMAT_PERCENT) { 3063 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->perf_counter[i] / tsc); 3064 } else if (pp->type == COUNTER_K2M) { 3065 outp += 3066 sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->perf_counter[i] / 1000); 3067 } 3068 } 3069 3070 for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { 3071 switch (ppmt->type) { 3072 case PMT_TYPE_RAW: 3073 if (pmt_counter_get_width(ppmt) <= 32) 3074 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), 3075 (unsigned int)p->pmt_counter[i]); 3076 else 3077 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->pmt_counter[i]); 3078 3079 break; 3080 3081 case PMT_TYPE_XTAL_TIME: 3082 const unsigned long value_raw = p->pmt_counter[i]; 3083 const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; 3084 3085 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); 3086 break; 3087 } 3088 } 3089 3090 done: 3091 if (*(outp - 1) != '\n') 3092 outp += sprintf(outp, "\n"); 3093 3094 return 0; 3095 } 3096 3097 void flush_output_stdout(void) 3098 { 3099 FILE *filep; 3100 3101 if (outf == stderr) 3102 filep = stdout; 3103 else 3104 filep = outf; 3105 3106 fputs(output_buffer, filep); 3107 fflush(filep); 3108 3109 outp = output_buffer; 3110 } 3111 3112 void flush_output_stderr(void) 3113 { 3114 fputs(output_buffer, outf); 3115 fflush(outf); 3116 outp = output_buffer; 3117 } 3118 3119 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 3120 { 3121 static int count; 3122 3123 if ((!count || (header_iterations && !(count % header_iterations))) || !summary_only) 3124 print_header("\t"); 3125 3126 format_counters(&average.threads, &average.cores, &average.packages); 3127 3128 count++; 3129 3130 if (summary_only) 3131 return; 3132 3133 for_all_cpus(format_counters, t, c, p); 3134 } 3135 3136 #define DELTA_WRAP32(new, old) \ 3137 old = ((((unsigned long long)new << 32) - ((unsigned long long)old << 32)) >> 32); 3138 3139 int delta_package(struct pkg_data *new, struct pkg_data *old) 3140 { 3141 int i; 3142 struct msr_counter *mp; 3143 struct perf_counter_info *pp; 3144 struct pmt_counter *ppmt; 3145 3146 if (DO_BIC(BIC_Totl_c0)) 3147 old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0; 3148 if (DO_BIC(BIC_Any_c0)) 3149 old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0; 3150 if (DO_BIC(BIC_GFX_c0)) 3151 old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0; 3152 if (DO_BIC(BIC_CPUGFX)) 3153 old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0; 3154 3155 old->pc2 = new->pc2 - old->pc2; 3156 if (DO_BIC(BIC_Pkgpc3)) 3157 old->pc3 = new->pc3 - old->pc3; 3158 if (DO_BIC(BIC_Pkgpc6)) 3159 old->pc6 = new->pc6 - old->pc6; 3160 if (DO_BIC(BIC_Pkgpc7)) 3161 old->pc7 = new->pc7 - old->pc7; 3162 old->pc8 = new->pc8 - old->pc8; 3163 old->pc9 = new->pc9 - old->pc9; 3164 old->pc10 = new->pc10 - old->pc10; 3165 old->die_c6 = new->die_c6 - old->die_c6; 3166 old->cpu_lpi = new->cpu_lpi - old->cpu_lpi; 3167 old->sys_lpi = new->sys_lpi - old->sys_lpi; 3168 old->pkg_temp_c = new->pkg_temp_c; 3169 3170 /* flag an error when rc6 counter resets/wraps */ 3171 if (old->gfx_rc6_ms > new->gfx_rc6_ms) 3172 old->gfx_rc6_ms = -1; 3173 else 3174 old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms; 3175 3176 old->uncore_mhz = new->uncore_mhz; 3177 old->gfx_mhz = new->gfx_mhz; 3178 old->gfx_act_mhz = new->gfx_act_mhz; 3179 3180 /* flag an error when mc6 counter resets/wraps */ 3181 if (old->sam_mc6_ms > new->sam_mc6_ms) 3182 old->sam_mc6_ms = -1; 3183 else 3184 old->sam_mc6_ms = new->sam_mc6_ms - old->sam_mc6_ms; 3185 3186 old->sam_mhz = new->sam_mhz; 3187 old->sam_act_mhz = new->sam_act_mhz; 3188 3189 old->energy_pkg.raw_value = new->energy_pkg.raw_value - old->energy_pkg.raw_value; 3190 old->energy_cores.raw_value = new->energy_cores.raw_value - old->energy_cores.raw_value; 3191 old->energy_gfx.raw_value = new->energy_gfx.raw_value - old->energy_gfx.raw_value; 3192 old->energy_dram.raw_value = new->energy_dram.raw_value - old->energy_dram.raw_value; 3193 old->rapl_pkg_perf_status.raw_value = new->rapl_pkg_perf_status.raw_value - old->rapl_pkg_perf_status.raw_value; 3194 old->rapl_dram_perf_status.raw_value = 3195 new->rapl_dram_perf_status.raw_value - old->rapl_dram_perf_status.raw_value; 3196 3197 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 3198 if (mp->format == FORMAT_RAW) 3199 old->counter[i] = new->counter[i]; 3200 else if (mp->format == FORMAT_AVERAGE) 3201 old->counter[i] = new->counter[i]; 3202 else 3203 old->counter[i] = new->counter[i] - old->counter[i]; 3204 } 3205 3206 for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { 3207 if (pp->format == FORMAT_RAW) 3208 old->perf_counter[i] = new->perf_counter[i]; 3209 else if (pp->format == FORMAT_AVERAGE) 3210 old->perf_counter[i] = new->perf_counter[i]; 3211 else 3212 old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i]; 3213 } 3214 3215 for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { 3216 if (ppmt->format == FORMAT_RAW) 3217 old->pmt_counter[i] = new->pmt_counter[i]; 3218 else 3219 old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i]; 3220 } 3221 3222 return 0; 3223 } 3224 3225 void delta_core(struct core_data *new, struct core_data *old) 3226 { 3227 int i; 3228 struct msr_counter *mp; 3229 struct perf_counter_info *pp; 3230 struct pmt_counter *ppmt; 3231 3232 old->c3 = new->c3 - old->c3; 3233 old->c6 = new->c6 - old->c6; 3234 old->c7 = new->c7 - old->c7; 3235 old->core_temp_c = new->core_temp_c; 3236 old->core_throt_cnt = new->core_throt_cnt; 3237 old->mc6_us = new->mc6_us - old->mc6_us; 3238 3239 DELTA_WRAP32(new->core_energy.raw_value, old->core_energy.raw_value); 3240 3241 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 3242 if (mp->format == FORMAT_RAW) 3243 old->counter[i] = new->counter[i]; 3244 else 3245 old->counter[i] = new->counter[i] - old->counter[i]; 3246 } 3247 3248 for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { 3249 if (pp->format == FORMAT_RAW) 3250 old->perf_counter[i] = new->perf_counter[i]; 3251 else 3252 old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i]; 3253 } 3254 3255 for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { 3256 if (ppmt->format == FORMAT_RAW) 3257 old->pmt_counter[i] = new->pmt_counter[i]; 3258 else 3259 old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i]; 3260 } 3261 } 3262 3263 int soft_c1_residency_display(int bic) 3264 { 3265 if (!DO_BIC(BIC_CPU_c1) || platform->has_msr_core_c1_res) 3266 return 0; 3267 3268 return DO_BIC_READ(bic); 3269 } 3270 3271 /* 3272 * old = new - old 3273 */ 3274 int delta_thread(struct thread_data *new, struct thread_data *old, struct core_data *core_delta) 3275 { 3276 int i; 3277 struct msr_counter *mp; 3278 struct perf_counter_info *pp; 3279 struct pmt_counter *ppmt; 3280 3281 /* we run cpuid just the 1st time, copy the results */ 3282 if (DO_BIC(BIC_APIC)) 3283 new->apic_id = old->apic_id; 3284 if (DO_BIC(BIC_X2APIC)) 3285 new->x2apic_id = old->x2apic_id; 3286 3287 /* 3288 * the timestamps from start of measurement interval are in "old" 3289 * the timestamp from end of measurement interval are in "new" 3290 * over-write old w/ new so we can print end of interval values 3291 */ 3292 3293 timersub(&new->tv_begin, &old->tv_begin, &old->tv_delta); 3294 old->tv_begin = new->tv_begin; 3295 old->tv_end = new->tv_end; 3296 3297 old->tsc = new->tsc - old->tsc; 3298 3299 /* check for TSC < 1 Mcycles over interval */ 3300 if (old->tsc < (1000 * 1000)) 3301 errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n" 3302 "You can disable all c-states by booting with \"idle=poll\"\n" 3303 "or just the deep ones with \"processor.max_cstate=1\""); 3304 3305 old->c1 = new->c1 - old->c1; 3306 3307 if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC) 3308 || soft_c1_residency_display(BIC_Avg_MHz)) { 3309 if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { 3310 old->aperf = new->aperf - old->aperf; 3311 old->mperf = new->mperf - old->mperf; 3312 } else { 3313 return -1; 3314 } 3315 } 3316 3317 if (platform->has_msr_core_c1_res) { 3318 /* 3319 * Some models have a dedicated C1 residency MSR, 3320 * which should be more accurate than the derivation below. 3321 */ 3322 } else { 3323 /* 3324 * As counter collection is not atomic, 3325 * it is possible for mperf's non-halted cycles + idle states 3326 * to exceed TSC's all cycles: show c1 = 0% in that case. 3327 */ 3328 if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak)) 3329 old->c1 = 0; 3330 else { 3331 /* normal case, derive c1 */ 3332 old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3 3333 - core_delta->c6 - core_delta->c7; 3334 } 3335 } 3336 3337 if (old->mperf == 0) { 3338 if (debug > 1) 3339 fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id); 3340 old->mperf = 1; /* divide by 0 protection */ 3341 } 3342 3343 if (DO_BIC(BIC_IPC)) 3344 old->instr_count = new->instr_count - old->instr_count; 3345 3346 if (DO_BIC(BIC_IRQ)) 3347 old->irq_count = new->irq_count - old->irq_count; 3348 3349 if (DO_BIC(BIC_SMI)) 3350 old->smi_count = new->smi_count - old->smi_count; 3351 3352 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 3353 if (mp->format == FORMAT_RAW) 3354 old->counter[i] = new->counter[i]; 3355 else 3356 old->counter[i] = new->counter[i] - old->counter[i]; 3357 } 3358 3359 for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) { 3360 if (pp->format == FORMAT_RAW) 3361 old->perf_counter[i] = new->perf_counter[i]; 3362 else 3363 old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i]; 3364 } 3365 3366 for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { 3367 if (ppmt->format == FORMAT_RAW) 3368 old->pmt_counter[i] = new->pmt_counter[i]; 3369 else 3370 old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i]; 3371 } 3372 3373 return 0; 3374 } 3375 3376 int delta_cpu(struct thread_data *t, struct core_data *c, 3377 struct pkg_data *p, struct thread_data *t2, struct core_data *c2, struct pkg_data *p2) 3378 { 3379 int retval = 0; 3380 3381 /* calculate core delta only for 1st thread in core */ 3382 if (is_cpu_first_thread_in_core(t, c, p)) 3383 delta_core(c, c2); 3384 3385 /* always calculate thread delta */ 3386 retval = delta_thread(t, t2, c2); /* c2 is core delta */ 3387 if (retval) 3388 return retval; 3389 3390 /* calculate package delta only for 1st core in package */ 3391 if (is_cpu_first_core_in_package(t, c, p)) 3392 retval = delta_package(p, p2); 3393 3394 return retval; 3395 } 3396 3397 void rapl_counter_clear(struct rapl_counter *c) 3398 { 3399 c->raw_value = 0; 3400 c->scale = 0.0; 3401 c->unit = RAPL_UNIT_INVALID; 3402 } 3403 3404 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 3405 { 3406 int i; 3407 struct msr_counter *mp; 3408 3409 t->tv_begin.tv_sec = 0; 3410 t->tv_begin.tv_usec = 0; 3411 t->tv_end.tv_sec = 0; 3412 t->tv_end.tv_usec = 0; 3413 t->tv_delta.tv_sec = 0; 3414 t->tv_delta.tv_usec = 0; 3415 3416 t->tsc = 0; 3417 t->aperf = 0; 3418 t->mperf = 0; 3419 t->c1 = 0; 3420 3421 t->instr_count = 0; 3422 3423 t->irq_count = 0; 3424 t->smi_count = 0; 3425 3426 c->c3 = 0; 3427 c->c6 = 0; 3428 c->c7 = 0; 3429 c->mc6_us = 0; 3430 c->core_temp_c = 0; 3431 rapl_counter_clear(&c->core_energy); 3432 c->core_throt_cnt = 0; 3433 3434 p->pkg_wtd_core_c0 = 0; 3435 p->pkg_any_core_c0 = 0; 3436 p->pkg_any_gfxe_c0 = 0; 3437 p->pkg_both_core_gfxe_c0 = 0; 3438 3439 p->pc2 = 0; 3440 if (DO_BIC(BIC_Pkgpc3)) 3441 p->pc3 = 0; 3442 if (DO_BIC(BIC_Pkgpc6)) 3443 p->pc6 = 0; 3444 if (DO_BIC(BIC_Pkgpc7)) 3445 p->pc7 = 0; 3446 p->pc8 = 0; 3447 p->pc9 = 0; 3448 p->pc10 = 0; 3449 p->die_c6 = 0; 3450 p->cpu_lpi = 0; 3451 p->sys_lpi = 0; 3452 3453 rapl_counter_clear(&p->energy_pkg); 3454 rapl_counter_clear(&p->energy_dram); 3455 rapl_counter_clear(&p->energy_cores); 3456 rapl_counter_clear(&p->energy_gfx); 3457 rapl_counter_clear(&p->rapl_pkg_perf_status); 3458 rapl_counter_clear(&p->rapl_dram_perf_status); 3459 p->pkg_temp_c = 0; 3460 3461 p->gfx_rc6_ms = 0; 3462 p->uncore_mhz = 0; 3463 p->gfx_mhz = 0; 3464 p->gfx_act_mhz = 0; 3465 p->sam_mc6_ms = 0; 3466 p->sam_mhz = 0; 3467 p->sam_act_mhz = 0; 3468 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) 3469 t->counter[i] = 0; 3470 3471 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) 3472 c->counter[i] = 0; 3473 3474 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) 3475 p->counter[i] = 0; 3476 3477 memset(&t->perf_counter[0], 0, sizeof(t->perf_counter)); 3478 memset(&c->perf_counter[0], 0, sizeof(c->perf_counter)); 3479 memset(&p->perf_counter[0], 0, sizeof(p->perf_counter)); 3480 3481 memset(&t->pmt_counter[0], 0, ARRAY_SIZE(t->pmt_counter)); 3482 memset(&c->pmt_counter[0], 0, ARRAY_SIZE(c->pmt_counter)); 3483 memset(&p->pmt_counter[0], 0, ARRAY_SIZE(p->pmt_counter)); 3484 } 3485 3486 void rapl_counter_accumulate(struct rapl_counter *dst, const struct rapl_counter *src) 3487 { 3488 /* Copy unit and scale from src if dst is not initialized */ 3489 if (dst->unit == RAPL_UNIT_INVALID) { 3490 dst->unit = src->unit; 3491 dst->scale = src->scale; 3492 } 3493 3494 assert(dst->unit == src->unit); 3495 assert(dst->scale == src->scale); 3496 3497 dst->raw_value += src->raw_value; 3498 } 3499 3500 int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 3501 { 3502 int i; 3503 struct msr_counter *mp; 3504 struct perf_counter_info *pp; 3505 struct pmt_counter *ppmt; 3506 3507 /* copy un-changing apic_id's */ 3508 if (DO_BIC(BIC_APIC)) 3509 average.threads.apic_id = t->apic_id; 3510 if (DO_BIC(BIC_X2APIC)) 3511 average.threads.x2apic_id = t->x2apic_id; 3512 3513 /* remember first tv_begin */ 3514 if (average.threads.tv_begin.tv_sec == 0) 3515 average.threads.tv_begin = t->tv_begin; 3516 3517 /* remember last tv_end */ 3518 average.threads.tv_end = t->tv_end; 3519 3520 average.threads.tsc += t->tsc; 3521 average.threads.aperf += t->aperf; 3522 average.threads.mperf += t->mperf; 3523 average.threads.c1 += t->c1; 3524 3525 average.threads.instr_count += t->instr_count; 3526 3527 average.threads.irq_count += t->irq_count; 3528 average.threads.smi_count += t->smi_count; 3529 3530 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 3531 if (mp->format == FORMAT_RAW) 3532 continue; 3533 average.threads.counter[i] += t->counter[i]; 3534 } 3535 3536 for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) { 3537 if (pp->format == FORMAT_RAW) 3538 continue; 3539 average.threads.perf_counter[i] += t->perf_counter[i]; 3540 } 3541 3542 for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { 3543 average.threads.pmt_counter[i] += t->pmt_counter[i]; 3544 } 3545 3546 /* sum per-core values only for 1st thread in core */ 3547 if (!is_cpu_first_thread_in_core(t, c, p)) 3548 return 0; 3549 3550 average.cores.c3 += c->c3; 3551 average.cores.c6 += c->c6; 3552 average.cores.c7 += c->c7; 3553 average.cores.mc6_us += c->mc6_us; 3554 3555 average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c); 3556 average.cores.core_throt_cnt = MAX(average.cores.core_throt_cnt, c->core_throt_cnt); 3557 3558 rapl_counter_accumulate(&average.cores.core_energy, &c->core_energy); 3559 3560 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 3561 if (mp->format == FORMAT_RAW) 3562 continue; 3563 average.cores.counter[i] += c->counter[i]; 3564 } 3565 3566 for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { 3567 if (pp->format == FORMAT_RAW) 3568 continue; 3569 average.cores.perf_counter[i] += c->perf_counter[i]; 3570 } 3571 3572 for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { 3573 average.cores.pmt_counter[i] += c->pmt_counter[i]; 3574 } 3575 3576 /* sum per-pkg values only for 1st core in pkg */ 3577 if (!is_cpu_first_core_in_package(t, c, p)) 3578 return 0; 3579 3580 if (DO_BIC(BIC_Totl_c0)) 3581 average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0; 3582 if (DO_BIC(BIC_Any_c0)) 3583 average.packages.pkg_any_core_c0 += p->pkg_any_core_c0; 3584 if (DO_BIC(BIC_GFX_c0)) 3585 average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0; 3586 if (DO_BIC(BIC_CPUGFX)) 3587 average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0; 3588 3589 average.packages.pc2 += p->pc2; 3590 if (DO_BIC(BIC_Pkgpc3)) 3591 average.packages.pc3 += p->pc3; 3592 if (DO_BIC(BIC_Pkgpc6)) 3593 average.packages.pc6 += p->pc6; 3594 if (DO_BIC(BIC_Pkgpc7)) 3595 average.packages.pc7 += p->pc7; 3596 average.packages.pc8 += p->pc8; 3597 average.packages.pc9 += p->pc9; 3598 average.packages.pc10 += p->pc10; 3599 average.packages.die_c6 += p->die_c6; 3600 3601 average.packages.cpu_lpi = p->cpu_lpi; 3602 average.packages.sys_lpi = p->sys_lpi; 3603 3604 rapl_counter_accumulate(&average.packages.energy_pkg, &p->energy_pkg); 3605 rapl_counter_accumulate(&average.packages.energy_dram, &p->energy_dram); 3606 rapl_counter_accumulate(&average.packages.energy_cores, &p->energy_cores); 3607 rapl_counter_accumulate(&average.packages.energy_gfx, &p->energy_gfx); 3608 3609 average.packages.gfx_rc6_ms = p->gfx_rc6_ms; 3610 average.packages.uncore_mhz = p->uncore_mhz; 3611 average.packages.gfx_mhz = p->gfx_mhz; 3612 average.packages.gfx_act_mhz = p->gfx_act_mhz; 3613 average.packages.sam_mc6_ms = p->sam_mc6_ms; 3614 average.packages.sam_mhz = p->sam_mhz; 3615 average.packages.sam_act_mhz = p->sam_act_mhz; 3616 3617 average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c); 3618 3619 rapl_counter_accumulate(&average.packages.rapl_pkg_perf_status, &p->rapl_pkg_perf_status); 3620 rapl_counter_accumulate(&average.packages.rapl_dram_perf_status, &p->rapl_dram_perf_status); 3621 3622 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 3623 if ((mp->format == FORMAT_RAW) && (topo.num_packages == 0)) 3624 average.packages.counter[i] = p->counter[i]; 3625 else 3626 average.packages.counter[i] += p->counter[i]; 3627 } 3628 3629 for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { 3630 if ((pp->format == FORMAT_RAW) && (topo.num_packages == 0)) 3631 average.packages.perf_counter[i] = p->perf_counter[i]; 3632 else 3633 average.packages.perf_counter[i] += p->perf_counter[i]; 3634 } 3635 3636 for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { 3637 average.packages.pmt_counter[i] += p->pmt_counter[i]; 3638 } 3639 3640 return 0; 3641 } 3642 3643 /* 3644 * sum the counters for all cpus in the system 3645 * compute the weighted average 3646 */ 3647 void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data *p) 3648 { 3649 int i; 3650 struct msr_counter *mp; 3651 struct perf_counter_info *pp; 3652 struct pmt_counter *ppmt; 3653 3654 clear_counters(&average.threads, &average.cores, &average.packages); 3655 3656 for_all_cpus(sum_counters, t, c, p); 3657 3658 /* Use the global time delta for the average. */ 3659 average.threads.tv_delta = tv_delta; 3660 3661 average.threads.tsc /= topo.allowed_cpus; 3662 average.threads.aperf /= topo.allowed_cpus; 3663 average.threads.mperf /= topo.allowed_cpus; 3664 average.threads.instr_count /= topo.allowed_cpus; 3665 average.threads.c1 /= topo.allowed_cpus; 3666 3667 if (average.threads.irq_count > 9999999) 3668 sums_need_wide_columns = 1; 3669 3670 average.cores.c3 /= topo.allowed_cores; 3671 average.cores.c6 /= topo.allowed_cores; 3672 average.cores.c7 /= topo.allowed_cores; 3673 average.cores.mc6_us /= topo.allowed_cores; 3674 3675 if (DO_BIC(BIC_Totl_c0)) 3676 average.packages.pkg_wtd_core_c0 /= topo.allowed_packages; 3677 if (DO_BIC(BIC_Any_c0)) 3678 average.packages.pkg_any_core_c0 /= topo.allowed_packages; 3679 if (DO_BIC(BIC_GFX_c0)) 3680 average.packages.pkg_any_gfxe_c0 /= topo.allowed_packages; 3681 if (DO_BIC(BIC_CPUGFX)) 3682 average.packages.pkg_both_core_gfxe_c0 /= topo.allowed_packages; 3683 3684 average.packages.pc2 /= topo.allowed_packages; 3685 if (DO_BIC(BIC_Pkgpc3)) 3686 average.packages.pc3 /= topo.allowed_packages; 3687 if (DO_BIC(BIC_Pkgpc6)) 3688 average.packages.pc6 /= topo.allowed_packages; 3689 if (DO_BIC(BIC_Pkgpc7)) 3690 average.packages.pc7 /= topo.allowed_packages; 3691 3692 average.packages.pc8 /= topo.allowed_packages; 3693 average.packages.pc9 /= topo.allowed_packages; 3694 average.packages.pc10 /= topo.allowed_packages; 3695 average.packages.die_c6 /= topo.allowed_packages; 3696 3697 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 3698 if (mp->format == FORMAT_RAW) 3699 continue; 3700 if (mp->type == COUNTER_ITEMS) { 3701 if (average.threads.counter[i] > 9999999) 3702 sums_need_wide_columns = 1; 3703 continue; 3704 } 3705 average.threads.counter[i] /= topo.allowed_cpus; 3706 } 3707 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 3708 if (mp->format == FORMAT_RAW) 3709 continue; 3710 if (mp->type == COUNTER_ITEMS) { 3711 if (average.cores.counter[i] > 9999999) 3712 sums_need_wide_columns = 1; 3713 } 3714 average.cores.counter[i] /= topo.allowed_cores; 3715 } 3716 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 3717 if (mp->format == FORMAT_RAW) 3718 continue; 3719 if (mp->type == COUNTER_ITEMS) { 3720 if (average.packages.counter[i] > 9999999) 3721 sums_need_wide_columns = 1; 3722 } 3723 average.packages.counter[i] /= topo.allowed_packages; 3724 } 3725 3726 for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) { 3727 if (pp->format == FORMAT_RAW) 3728 continue; 3729 if (pp->type == COUNTER_ITEMS) { 3730 if (average.threads.perf_counter[i] > 9999999) 3731 sums_need_wide_columns = 1; 3732 continue; 3733 } 3734 average.threads.perf_counter[i] /= topo.allowed_cpus; 3735 } 3736 for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { 3737 if (pp->format == FORMAT_RAW) 3738 continue; 3739 if (pp->type == COUNTER_ITEMS) { 3740 if (average.cores.perf_counter[i] > 9999999) 3741 sums_need_wide_columns = 1; 3742 } 3743 average.cores.perf_counter[i] /= topo.allowed_cores; 3744 } 3745 for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { 3746 if (pp->format == FORMAT_RAW) 3747 continue; 3748 if (pp->type == COUNTER_ITEMS) { 3749 if (average.packages.perf_counter[i] > 9999999) 3750 sums_need_wide_columns = 1; 3751 } 3752 average.packages.perf_counter[i] /= topo.allowed_packages; 3753 } 3754 3755 for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { 3756 average.threads.pmt_counter[i] /= topo.allowed_cpus; 3757 } 3758 for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { 3759 average.cores.pmt_counter[i] /= topo.allowed_cores; 3760 } 3761 for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { 3762 average.packages.pmt_counter[i] /= topo.allowed_packages; 3763 } 3764 } 3765 3766 static unsigned long long rdtsc(void) 3767 { 3768 unsigned int low, high; 3769 3770 asm volatile ("rdtsc":"=a" (low), "=d"(high)); 3771 3772 return low | ((unsigned long long)high) << 32; 3773 } 3774 3775 /* 3776 * Open a file, and exit on failure 3777 */ 3778 FILE *fopen_or_die(const char *path, const char *mode) 3779 { 3780 FILE *filep = fopen(path, mode); 3781 3782 if (!filep) 3783 err(1, "%s: open failed", path); 3784 return filep; 3785 } 3786 3787 /* 3788 * snapshot_sysfs_counter() 3789 * 3790 * return snapshot of given counter 3791 */ 3792 unsigned long long snapshot_sysfs_counter(char *path) 3793 { 3794 FILE *fp; 3795 int retval; 3796 unsigned long long counter; 3797 3798 fp = fopen_or_die(path, "r"); 3799 3800 retval = fscanf(fp, "%lld", &counter); 3801 if (retval != 1) 3802 err(1, "snapshot_sysfs_counter(%s)", path); 3803 3804 fclose(fp); 3805 3806 return counter; 3807 } 3808 3809 int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp, char *counter_path) 3810 { 3811 if (mp->msr_num != 0) { 3812 assert(!no_msr); 3813 if (get_msr(cpu, mp->msr_num, counterp)) 3814 return -1; 3815 } else { 3816 char path[128 + PATH_BYTES]; 3817 3818 if (mp->flags & SYSFS_PERCPU) { 3819 sprintf(path, "/sys/devices/system/cpu/cpu%d/%s", cpu, mp->sp->path); 3820 3821 *counterp = snapshot_sysfs_counter(path); 3822 } else { 3823 *counterp = snapshot_sysfs_counter(counter_path); 3824 } 3825 } 3826 3827 return 0; 3828 } 3829 3830 unsigned long long get_legacy_uncore_mhz(int package) 3831 { 3832 char path[128]; 3833 int die; 3834 static int warn_once; 3835 3836 /* 3837 * for this package, use the first die_id that exists 3838 */ 3839 for (die = 0; die <= topo.max_die_id; ++die) { 3840 3841 sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d/current_freq_khz", 3842 package, die); 3843 3844 if (access(path, R_OK) == 0) 3845 return (snapshot_sysfs_counter(path) / 1000); 3846 } 3847 if (!warn_once) { 3848 warnx("BUG: %s: No %s", __func__, path); 3849 warn_once = 1; 3850 } 3851 3852 return 0; 3853 } 3854 3855 int get_epb(int cpu) 3856 { 3857 char path[128 + PATH_BYTES]; 3858 unsigned long long msr; 3859 int ret, epb = -1; 3860 FILE *fp; 3861 3862 sprintf(path, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", cpu); 3863 3864 fp = fopen(path, "r"); 3865 if (!fp) 3866 goto msr_fallback; 3867 3868 ret = fscanf(fp, "%d", &epb); 3869 if (ret != 1) 3870 err(1, "%s(%s)", __func__, path); 3871 3872 fclose(fp); 3873 3874 return epb; 3875 3876 msr_fallback: 3877 if (no_msr) 3878 return -1; 3879 3880 get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr); 3881 3882 return msr & 0xf; 3883 } 3884 3885 void get_apic_id(struct thread_data *t) 3886 { 3887 unsigned int eax, ebx, ecx, edx; 3888 3889 if (DO_BIC(BIC_APIC)) { 3890 eax = ebx = ecx = edx = 0; 3891 __cpuid(1, eax, ebx, ecx, edx); 3892 3893 t->apic_id = (ebx >> 24) & 0xff; 3894 } 3895 3896 if (!DO_BIC(BIC_X2APIC)) 3897 return; 3898 3899 if (authentic_amd || hygon_genuine) { 3900 unsigned int topology_extensions; 3901 3902 if (max_extended_level < 0x8000001e) 3903 return; 3904 3905 eax = ebx = ecx = edx = 0; 3906 __cpuid(0x80000001, eax, ebx, ecx, edx); 3907 topology_extensions = ecx & (1 << 22); 3908 3909 if (topology_extensions == 0) 3910 return; 3911 3912 eax = ebx = ecx = edx = 0; 3913 __cpuid(0x8000001e, eax, ebx, ecx, edx); 3914 3915 t->x2apic_id = eax; 3916 return; 3917 } 3918 3919 if (!genuine_intel) 3920 return; 3921 3922 if (max_level < 0xb) 3923 return; 3924 3925 ecx = 0; 3926 __cpuid(0xb, eax, ebx, ecx, edx); 3927 t->x2apic_id = edx; 3928 3929 if (debug && (t->apic_id != (t->x2apic_id & 0xff))) 3930 fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id); 3931 } 3932 3933 int get_core_throt_cnt(int cpu, unsigned long long *cnt) 3934 { 3935 char path[128 + PATH_BYTES]; 3936 unsigned long long tmp; 3937 FILE *fp; 3938 int ret; 3939 3940 sprintf(path, "/sys/devices/system/cpu/cpu%d/thermal_throttle/core_throttle_count", cpu); 3941 fp = fopen(path, "r"); 3942 if (!fp) 3943 return -1; 3944 ret = fscanf(fp, "%lld", &tmp); 3945 fclose(fp); 3946 if (ret != 1) 3947 return -1; 3948 *cnt = tmp; 3949 3950 return 0; 3951 } 3952 3953 struct amperf_group_fd { 3954 int aperf; /* Also the group descriptor */ 3955 int mperf; 3956 }; 3957 3958 static int read_perf_counter_info(const char *const path, const char *const parse_format, void *value_ptr) 3959 { 3960 int fdmt; 3961 int bytes_read; 3962 char buf[64]; 3963 int ret = -1; 3964 3965 fdmt = open(path, O_RDONLY, 0); 3966 if (fdmt == -1) { 3967 if (debug) 3968 fprintf(stderr, "Failed to parse perf counter info %s\n", path); 3969 ret = -1; 3970 goto cleanup_and_exit; 3971 } 3972 3973 bytes_read = read(fdmt, buf, sizeof(buf) - 1); 3974 if (bytes_read <= 0 || bytes_read >= (int)sizeof(buf)) { 3975 if (debug) 3976 fprintf(stderr, "Failed to parse perf counter info %s\n", path); 3977 ret = -1; 3978 goto cleanup_and_exit; 3979 } 3980 3981 buf[bytes_read] = '\0'; 3982 3983 if (sscanf(buf, parse_format, value_ptr) != 1) { 3984 if (debug) 3985 fprintf(stderr, "Failed to parse perf counter info %s\n", path); 3986 ret = -1; 3987 goto cleanup_and_exit; 3988 } 3989 3990 ret = 0; 3991 3992 cleanup_and_exit: 3993 close(fdmt); 3994 return ret; 3995 } 3996 3997 static unsigned int read_perf_counter_info_n(const char *const path, const char *const parse_format) 3998 { 3999 unsigned int v; 4000 int status; 4001 4002 status = read_perf_counter_info(path, parse_format, &v); 4003 if (status) 4004 v = -1; 4005 4006 return v; 4007 } 4008 4009 static unsigned int read_perf_type(const char *subsys) 4010 { 4011 const char *const path_format = "/sys/bus/event_source/devices/%s/type"; 4012 const char *const format = "%u"; 4013 char path[128]; 4014 4015 snprintf(path, sizeof(path), path_format, subsys); 4016 4017 return read_perf_counter_info_n(path, format); 4018 } 4019 4020 static unsigned int read_perf_config(const char *subsys, const char *event_name) 4021 { 4022 const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s"; 4023 FILE *fconfig = NULL; 4024 char path[128]; 4025 char config_str[64]; 4026 unsigned int config; 4027 unsigned int umask; 4028 bool has_config = false; 4029 bool has_umask = false; 4030 unsigned int ret = -1; 4031 4032 snprintf(path, sizeof(path), path_format, subsys, event_name); 4033 4034 fconfig = fopen(path, "r"); 4035 if (!fconfig) 4036 return -1; 4037 4038 if (fgets(config_str, ARRAY_SIZE(config_str), fconfig) != config_str) 4039 goto cleanup_and_exit; 4040 4041 for (char *pconfig_str = &config_str[0]; pconfig_str;) { 4042 if (sscanf(pconfig_str, "event=%x", &config) == 1) { 4043 has_config = true; 4044 goto next; 4045 } 4046 4047 if (sscanf(pconfig_str, "umask=%x", &umask) == 1) { 4048 has_umask = true; 4049 goto next; 4050 } 4051 4052 next: 4053 pconfig_str = strchr(pconfig_str, ','); 4054 if (pconfig_str) { 4055 *pconfig_str = '\0'; 4056 ++pconfig_str; 4057 } 4058 } 4059 4060 if (!has_umask) 4061 umask = 0; 4062 4063 if (has_config) 4064 ret = (umask << 8) | config; 4065 4066 cleanup_and_exit: 4067 fclose(fconfig); 4068 return ret; 4069 } 4070 4071 static unsigned int read_perf_rapl_unit(const char *subsys, const char *event_name) 4072 { 4073 const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.unit"; 4074 const char *const format = "%s"; 4075 char path[128]; 4076 char unit_buffer[16]; 4077 4078 snprintf(path, sizeof(path), path_format, subsys, event_name); 4079 4080 read_perf_counter_info(path, format, &unit_buffer); 4081 if (strcmp("Joules", unit_buffer) == 0) 4082 return RAPL_UNIT_JOULES; 4083 4084 return RAPL_UNIT_INVALID; 4085 } 4086 4087 static double read_perf_scale(const char *subsys, const char *event_name) 4088 { 4089 const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.scale"; 4090 const char *const format = "%lf"; 4091 char path[128]; 4092 double scale; 4093 4094 snprintf(path, sizeof(path), path_format, subsys, event_name); 4095 4096 if (read_perf_counter_info(path, format, &scale)) 4097 return 0.0; 4098 4099 return scale; 4100 } 4101 4102 size_t rapl_counter_info_count_perf(const struct rapl_counter_info_t *rci) 4103 { 4104 size_t ret = 0; 4105 4106 for (int i = 0; i < NUM_RAPL_COUNTERS; ++i) 4107 if (rci->source[i] == COUNTER_SOURCE_PERF) 4108 ++ret; 4109 4110 return ret; 4111 } 4112 4113 static size_t cstate_counter_info_count_perf(const struct cstate_counter_info_t *cci) 4114 { 4115 size_t ret = 0; 4116 4117 for (int i = 0; i < NUM_CSTATE_COUNTERS; ++i) 4118 if (cci->source[i] == COUNTER_SOURCE_PERF) 4119 ++ret; 4120 4121 return ret; 4122 } 4123 4124 void write_rapl_counter(struct rapl_counter *rc, struct rapl_counter_info_t *rci, unsigned int idx) 4125 { 4126 rc->raw_value = rci->data[idx]; 4127 rc->unit = rci->unit[idx]; 4128 rc->scale = rci->scale[idx]; 4129 } 4130 4131 int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct pkg_data *p) 4132 { 4133 unsigned long long perf_data[NUM_RAPL_COUNTERS + 1]; 4134 struct rapl_counter_info_t *rci; 4135 4136 if (debug >= 2) 4137 fprintf(stderr, "%s: cpu%d domain%d\n", __func__, cpu, domain); 4138 4139 assert(rapl_counter_info_perdomain); 4140 assert(domain < rapl_counter_info_perdomain_size); 4141 4142 rci = &rapl_counter_info_perdomain[domain]; 4143 4144 /* 4145 * If we have any perf counters to read, read them all now, in bulk 4146 */ 4147 if (rci->fd_perf != -1) { 4148 size_t num_perf_counters = rapl_counter_info_count_perf(rci); 4149 const ssize_t expected_read_size = (num_perf_counters + 1) * sizeof(unsigned long long); 4150 const ssize_t actual_read_size = read(rci->fd_perf, &perf_data[0], sizeof(perf_data)); 4151 4152 if (actual_read_size != expected_read_size) 4153 err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, 4154 actual_read_size); 4155 } 4156 4157 for (unsigned int i = 0, pi = 1; i < NUM_RAPL_COUNTERS; ++i) { 4158 switch (rci->source[i]) { 4159 case COUNTER_SOURCE_NONE: 4160 break; 4161 4162 case COUNTER_SOURCE_PERF: 4163 assert(pi < ARRAY_SIZE(perf_data)); 4164 assert(rci->fd_perf != -1); 4165 4166 if (debug >= 2) 4167 fprintf(stderr, "Reading rapl counter via perf at %u (%llu %e %lf)\n", 4168 i, perf_data[pi], rci->scale[i], perf_data[pi] * rci->scale[i]); 4169 4170 rci->data[i] = perf_data[pi]; 4171 4172 ++pi; 4173 break; 4174 4175 case COUNTER_SOURCE_MSR: 4176 if (debug >= 2) 4177 fprintf(stderr, "Reading rapl counter via msr at %u\n", i); 4178 4179 assert(!no_msr); 4180 if (rci->flags[i] & RAPL_COUNTER_FLAG_USE_MSR_SUM) { 4181 if (get_msr_sum(cpu, rci->msr[i], &rci->data[i])) 4182 return -13 - i; 4183 } else { 4184 if (get_msr(cpu, rci->msr[i], &rci->data[i])) 4185 return -13 - i; 4186 } 4187 4188 rci->data[i] &= rci->msr_mask[i]; 4189 if (rci->msr_shift[i] >= 0) 4190 rci->data[i] >>= abs(rci->msr_shift[i]); 4191 else 4192 rci->data[i] <<= abs(rci->msr_shift[i]); 4193 4194 break; 4195 } 4196 } 4197 4198 BUILD_BUG_ON(NUM_RAPL_COUNTERS != 7); 4199 write_rapl_counter(&p->energy_pkg, rci, RAPL_RCI_INDEX_ENERGY_PKG); 4200 write_rapl_counter(&p->energy_cores, rci, RAPL_RCI_INDEX_ENERGY_CORES); 4201 write_rapl_counter(&p->energy_dram, rci, RAPL_RCI_INDEX_DRAM); 4202 write_rapl_counter(&p->energy_gfx, rci, RAPL_RCI_INDEX_GFX); 4203 write_rapl_counter(&p->rapl_pkg_perf_status, rci, RAPL_RCI_INDEX_PKG_PERF_STATUS); 4204 write_rapl_counter(&p->rapl_dram_perf_status, rci, RAPL_RCI_INDEX_DRAM_PERF_STATUS); 4205 write_rapl_counter(&c->core_energy, rci, RAPL_RCI_INDEX_CORE_ENERGY); 4206 4207 return 0; 4208 } 4209 4210 char *find_sysfs_path_by_id(struct sysfs_path *sp, int id) 4211 { 4212 while (sp) { 4213 if (sp->id == id) 4214 return (sp->path); 4215 sp = sp->next; 4216 } 4217 if (debug) 4218 warnx("%s: id%d not found", __func__, id); 4219 return NULL; 4220 } 4221 4222 int get_cstate_counters(unsigned int cpu, struct thread_data *t, struct core_data *c, struct pkg_data *p) 4223 { 4224 /* 4225 * Overcommit memory a little bit here, 4226 * but skip calculating exact sizes for the buffers. 4227 */ 4228 unsigned long long perf_data[NUM_CSTATE_COUNTERS]; 4229 unsigned long long perf_data_core[NUM_CSTATE_COUNTERS + 1]; 4230 unsigned long long perf_data_pkg[NUM_CSTATE_COUNTERS + 1]; 4231 4232 struct cstate_counter_info_t *cci; 4233 4234 if (debug >= 2) 4235 fprintf(stderr, "%s: cpu%d\n", __func__, cpu); 4236 4237 assert(ccstate_counter_info); 4238 assert(cpu <= ccstate_counter_info_size); 4239 4240 ZERO_ARRAY(perf_data); 4241 ZERO_ARRAY(perf_data_core); 4242 ZERO_ARRAY(perf_data_pkg); 4243 4244 cci = &ccstate_counter_info[cpu]; 4245 4246 /* 4247 * If we have any perf counters to read, read them all now, in bulk 4248 */ 4249 const size_t num_perf_counters = cstate_counter_info_count_perf(cci); 4250 ssize_t expected_read_size = num_perf_counters * sizeof(unsigned long long); 4251 ssize_t actual_read_size_core = 0, actual_read_size_pkg = 0; 4252 4253 if (cci->fd_perf_core != -1) { 4254 /* Each descriptor read begins with number of counters read. */ 4255 expected_read_size += sizeof(unsigned long long); 4256 4257 actual_read_size_core = read(cci->fd_perf_core, &perf_data_core[0], sizeof(perf_data_core)); 4258 4259 if (actual_read_size_core <= 0) 4260 err(-1, "%s: read perf %s: %ld", __func__, "core", actual_read_size_core); 4261 } 4262 4263 if (cci->fd_perf_pkg != -1) { 4264 /* Each descriptor read begins with number of counters read. */ 4265 expected_read_size += sizeof(unsigned long long); 4266 4267 actual_read_size_pkg = read(cci->fd_perf_pkg, &perf_data_pkg[0], sizeof(perf_data_pkg)); 4268 4269 if (actual_read_size_pkg <= 0) 4270 err(-1, "%s: read perf %s: %ld", __func__, "pkg", actual_read_size_pkg); 4271 } 4272 4273 const ssize_t actual_read_size_total = actual_read_size_core + actual_read_size_pkg; 4274 4275 if (actual_read_size_total != expected_read_size) 4276 err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, actual_read_size_total); 4277 4278 /* 4279 * Copy ccstate and pcstate data into unified buffer. 4280 * 4281 * Skip first element from core and pkg buffers. 4282 * Kernel puts there how many counters were read. 4283 */ 4284 const size_t num_core_counters = perf_data_core[0]; 4285 const size_t num_pkg_counters = perf_data_pkg[0]; 4286 4287 assert(num_perf_counters == num_core_counters + num_pkg_counters); 4288 4289 /* Copy ccstate perf data */ 4290 memcpy(&perf_data[0], &perf_data_core[1], num_core_counters * sizeof(unsigned long long)); 4291 4292 /* Copy pcstate perf data */ 4293 memcpy(&perf_data[num_core_counters], &perf_data_pkg[1], num_pkg_counters * sizeof(unsigned long long)); 4294 4295 for (unsigned int i = 0, pi = 0; i < NUM_CSTATE_COUNTERS; ++i) { 4296 switch (cci->source[i]) { 4297 case COUNTER_SOURCE_NONE: 4298 break; 4299 4300 case COUNTER_SOURCE_PERF: 4301 assert(pi < ARRAY_SIZE(perf_data)); 4302 assert(cci->fd_perf_core != -1 || cci->fd_perf_pkg != -1); 4303 4304 if (debug >= 2) 4305 fprintf(stderr, "cstate via %s %u: %llu\n", "perf", i, perf_data[pi]); 4306 4307 cci->data[i] = perf_data[pi]; 4308 4309 ++pi; 4310 break; 4311 4312 case COUNTER_SOURCE_MSR: 4313 assert(!no_msr); 4314 if (get_msr(cpu, cci->msr[i], &cci->data[i])) 4315 return -13 - i; 4316 4317 if (debug >= 2) 4318 fprintf(stderr, "cstate via %s0x%llx %u: %llu\n", "msr", cci->msr[i], i, cci->data[i]); 4319 4320 break; 4321 } 4322 } 4323 4324 /* 4325 * Helper to write the data only if the source of 4326 * the counter for the current cpu is not none. 4327 * 4328 * Otherwise we would overwrite core data with 0 (default value), 4329 * when invoked for the thread sibling. 4330 */ 4331 #define PERF_COUNTER_WRITE_DATA(out_counter, index) do { \ 4332 if (cci->source[index] != COUNTER_SOURCE_NONE) \ 4333 out_counter = cci->data[index]; \ 4334 } while (0) 4335 4336 BUILD_BUG_ON(NUM_CSTATE_COUNTERS != 11); 4337 4338 PERF_COUNTER_WRITE_DATA(t->c1, CCSTATE_RCI_INDEX_C1_RESIDENCY); 4339 PERF_COUNTER_WRITE_DATA(c->c3, CCSTATE_RCI_INDEX_C3_RESIDENCY); 4340 PERF_COUNTER_WRITE_DATA(c->c6, CCSTATE_RCI_INDEX_C6_RESIDENCY); 4341 PERF_COUNTER_WRITE_DATA(c->c7, CCSTATE_RCI_INDEX_C7_RESIDENCY); 4342 4343 PERF_COUNTER_WRITE_DATA(p->pc2, PCSTATE_RCI_INDEX_C2_RESIDENCY); 4344 PERF_COUNTER_WRITE_DATA(p->pc3, PCSTATE_RCI_INDEX_C3_RESIDENCY); 4345 PERF_COUNTER_WRITE_DATA(p->pc6, PCSTATE_RCI_INDEX_C6_RESIDENCY); 4346 PERF_COUNTER_WRITE_DATA(p->pc7, PCSTATE_RCI_INDEX_C7_RESIDENCY); 4347 PERF_COUNTER_WRITE_DATA(p->pc8, PCSTATE_RCI_INDEX_C8_RESIDENCY); 4348 PERF_COUNTER_WRITE_DATA(p->pc9, PCSTATE_RCI_INDEX_C9_RESIDENCY); 4349 PERF_COUNTER_WRITE_DATA(p->pc10, PCSTATE_RCI_INDEX_C10_RESIDENCY); 4350 4351 #undef PERF_COUNTER_WRITE_DATA 4352 4353 return 0; 4354 } 4355 4356 size_t msr_counter_info_count_perf(const struct msr_counter_info_t *mci) 4357 { 4358 size_t ret = 0; 4359 4360 for (int i = 0; i < NUM_MSR_COUNTERS; ++i) 4361 if (mci->source[i] == COUNTER_SOURCE_PERF) 4362 ++ret; 4363 4364 return ret; 4365 } 4366 4367 int get_smi_aperf_mperf(unsigned int cpu, struct thread_data *t) 4368 { 4369 unsigned long long perf_data[NUM_MSR_COUNTERS + 1]; 4370 4371 struct msr_counter_info_t *mci; 4372 4373 if (debug >= 2) 4374 fprintf(stderr, "%s: cpu%d\n", __func__, cpu); 4375 4376 assert(msr_counter_info); 4377 assert(cpu <= msr_counter_info_size); 4378 4379 mci = &msr_counter_info[cpu]; 4380 4381 ZERO_ARRAY(perf_data); 4382 ZERO_ARRAY(mci->data); 4383 4384 if (mci->fd_perf != -1) { 4385 const size_t num_perf_counters = msr_counter_info_count_perf(mci); 4386 const ssize_t expected_read_size = (num_perf_counters + 1) * sizeof(unsigned long long); 4387 const ssize_t actual_read_size = read(mci->fd_perf, &perf_data[0], sizeof(perf_data)); 4388 4389 if (actual_read_size != expected_read_size) 4390 err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, 4391 actual_read_size); 4392 } 4393 4394 for (unsigned int i = 0, pi = 1; i < NUM_MSR_COUNTERS; ++i) { 4395 switch (mci->source[i]) { 4396 case COUNTER_SOURCE_NONE: 4397 break; 4398 4399 case COUNTER_SOURCE_PERF: 4400 assert(pi < ARRAY_SIZE(perf_data)); 4401 assert(mci->fd_perf != -1); 4402 4403 if (debug >= 2) 4404 fprintf(stderr, "Reading msr counter via perf at %u: %llu\n", i, perf_data[pi]); 4405 4406 mci->data[i] = perf_data[pi]; 4407 4408 ++pi; 4409 break; 4410 4411 case COUNTER_SOURCE_MSR: 4412 assert(!no_msr); 4413 4414 if (get_msr(cpu, mci->msr[i], &mci->data[i])) 4415 return -2 - i; 4416 4417 mci->data[i] &= mci->msr_mask[i]; 4418 4419 if (debug >= 2) 4420 fprintf(stderr, "Reading msr counter via msr at %u: %llu\n", i, mci->data[i]); 4421 4422 break; 4423 } 4424 } 4425 4426 BUILD_BUG_ON(NUM_MSR_COUNTERS != 3); 4427 t->aperf = mci->data[MSR_RCI_INDEX_APERF]; 4428 t->mperf = mci->data[MSR_RCI_INDEX_MPERF]; 4429 t->smi_count = mci->data[MSR_RCI_INDEX_SMI]; 4430 4431 return 0; 4432 } 4433 4434 int perf_counter_info_read_values(struct perf_counter_info *pp, int cpu, unsigned long long *out, size_t out_size) 4435 { 4436 unsigned int domain; 4437 unsigned long long value; 4438 int fd_counter; 4439 4440 for (size_t i = 0; pp; ++i, pp = pp->next) { 4441 domain = cpu_to_domain(pp, cpu); 4442 assert(domain < pp->num_domains); 4443 4444 fd_counter = pp->fd_perf_per_domain[domain]; 4445 4446 if (fd_counter == -1) 4447 continue; 4448 4449 if (read(fd_counter, &value, sizeof(value)) != sizeof(value)) 4450 return 1; 4451 4452 assert(i < out_size); 4453 out[i] = value * pp->scale; 4454 } 4455 4456 return 0; 4457 } 4458 4459 unsigned long pmt_gen_value_mask(unsigned int lsb, unsigned int msb) 4460 { 4461 unsigned long mask; 4462 4463 if (msb == 63) 4464 mask = 0xffffffffffffffff; 4465 else 4466 mask = ((1 << (msb + 1)) - 1); 4467 4468 mask -= (1 << lsb) - 1; 4469 4470 return mask; 4471 } 4472 4473 unsigned long pmt_read_counter(struct pmt_counter *ppmt, unsigned int domain_id) 4474 { 4475 assert(domain_id < ppmt->num_domains); 4476 4477 const unsigned long *pmmio = ppmt->domains[domain_id].pcounter; 4478 const unsigned long value = pmmio ? *pmmio : 0; 4479 const unsigned long value_mask = pmt_gen_value_mask(ppmt->lsb, ppmt->msb); 4480 const unsigned long value_shift = ppmt->lsb; 4481 4482 return (value & value_mask) >> value_shift; 4483 } 4484 4485 /* 4486 * get_counters(...) 4487 * migrate to cpu 4488 * acquire and record local counters for that cpu 4489 */ 4490 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 4491 { 4492 int cpu = t->cpu_id; 4493 unsigned long long msr; 4494 struct msr_counter *mp; 4495 struct pmt_counter *pp; 4496 int i; 4497 int status; 4498 4499 if (cpu_migrate(cpu)) { 4500 fprintf(outf, "%s: Could not migrate to CPU %d\n", __func__, cpu); 4501 return -1; 4502 } 4503 4504 gettimeofday(&t->tv_begin, (struct timezone *)NULL); 4505 4506 if (first_counter_read) 4507 get_apic_id(t); 4508 4509 t->tsc = rdtsc(); /* we are running on local CPU of interest */ 4510 4511 get_smi_aperf_mperf(cpu, t); 4512 4513 if (DO_BIC(BIC_IPC)) 4514 if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long)) 4515 return -4; 4516 4517 if (DO_BIC(BIC_IRQ)) 4518 t->irq_count = irqs_per_cpu[cpu]; 4519 4520 get_cstate_counters(cpu, t, c, p); 4521 4522 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 4523 if (get_mp(cpu, mp, &t->counter[i], mp->sp->path)) 4524 return -10; 4525 } 4526 4527 if (perf_counter_info_read_values(sys.perf_tp, cpu, t->perf_counter, MAX_ADDED_THREAD_COUNTERS)) 4528 return -10; 4529 4530 for (i = 0, pp = sys.pmt_tp; pp; i++, pp = pp->next) 4531 t->pmt_counter[i] = pmt_read_counter(pp, t->cpu_id); 4532 4533 /* collect core counters only for 1st thread in core */ 4534 if (!is_cpu_first_thread_in_core(t, c, p)) 4535 goto done; 4536 4537 if (platform->has_per_core_rapl) { 4538 status = get_rapl_counters(cpu, c->core_id, c, p); 4539 if (status != 0) 4540 return status; 4541 } 4542 4543 if (DO_BIC(BIC_CPU_c7) && t->is_atom) { 4544 /* 4545 * For Atom CPUs that has core cstate deeper than c6, 4546 * MSR_CORE_C6_RESIDENCY returns residency of cc6 and deeper. 4547 * Minus CC7 (and deeper cstates) residency to get 4548 * accturate cc6 residency. 4549 */ 4550 c->c6 -= c->c7; 4551 } 4552 4553 if (DO_BIC(BIC_Mod_c6)) 4554 if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us)) 4555 return -8; 4556 4557 if (DO_BIC(BIC_CoreTmp)) { 4558 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) 4559 return -9; 4560 c->core_temp_c = tj_max - ((msr >> 16) & 0x7F); 4561 } 4562 4563 if (DO_BIC(BIC_CORE_THROT_CNT)) 4564 get_core_throt_cnt(cpu, &c->core_throt_cnt); 4565 4566 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 4567 if (get_mp(cpu, mp, &c->counter[i], mp->sp->path)) 4568 return -10; 4569 } 4570 4571 if (perf_counter_info_read_values(sys.perf_cp, cpu, c->perf_counter, MAX_ADDED_CORE_COUNTERS)) 4572 return -10; 4573 4574 for (i = 0, pp = sys.pmt_cp; pp; i++, pp = pp->next) 4575 c->pmt_counter[i] = pmt_read_counter(pp, c->core_id); 4576 4577 /* collect package counters only for 1st core in package */ 4578 if (!is_cpu_first_core_in_package(t, c, p)) 4579 goto done; 4580 4581 if (DO_BIC(BIC_Totl_c0)) { 4582 if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0)) 4583 return -10; 4584 } 4585 if (DO_BIC(BIC_Any_c0)) { 4586 if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0)) 4587 return -11; 4588 } 4589 if (DO_BIC(BIC_GFX_c0)) { 4590 if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0)) 4591 return -12; 4592 } 4593 if (DO_BIC(BIC_CPUGFX)) { 4594 if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0)) 4595 return -13; 4596 } 4597 4598 if (DO_BIC(BIC_CPU_LPI)) 4599 p->cpu_lpi = cpuidle_cur_cpu_lpi_us; 4600 if (DO_BIC(BIC_SYS_LPI)) 4601 p->sys_lpi = cpuidle_cur_sys_lpi_us; 4602 4603 if (!platform->has_per_core_rapl) { 4604 status = get_rapl_counters(cpu, p->package_id, c, p); 4605 if (status != 0) 4606 return status; 4607 } 4608 4609 if (DO_BIC(BIC_PkgTmp)) { 4610 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) 4611 return -17; 4612 p->pkg_temp_c = tj_max - ((msr >> 16) & 0x7F); 4613 } 4614 4615 if (DO_BIC(BIC_UNCORE_MHZ)) 4616 p->uncore_mhz = get_legacy_uncore_mhz(p->package_id); 4617 4618 if (DO_BIC(BIC_GFX_rc6)) 4619 p->gfx_rc6_ms = gfx_info[GFX_rc6].val_ull; 4620 4621 if (DO_BIC(BIC_GFXMHz)) 4622 p->gfx_mhz = gfx_info[GFX_MHz].val; 4623 4624 if (DO_BIC(BIC_GFXACTMHz)) 4625 p->gfx_act_mhz = gfx_info[GFX_ACTMHz].val; 4626 4627 if (DO_BIC(BIC_SAM_mc6)) 4628 p->sam_mc6_ms = gfx_info[SAM_mc6].val_ull; 4629 4630 if (DO_BIC(BIC_SAMMHz)) 4631 p->sam_mhz = gfx_info[SAM_MHz].val; 4632 4633 if (DO_BIC(BIC_SAMACTMHz)) 4634 p->sam_act_mhz = gfx_info[SAM_ACTMHz].val; 4635 4636 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 4637 char *path = NULL; 4638 4639 if (mp->msr_num == 0) { 4640 path = find_sysfs_path_by_id(mp->sp, p->package_id); 4641 if (path == NULL) { 4642 warnx("%s: package_id %d not found", __func__, p->package_id); 4643 return -10; 4644 } 4645 } 4646 if (get_mp(cpu, mp, &p->counter[i], path)) 4647 return -10; 4648 } 4649 4650 if (perf_counter_info_read_values(sys.perf_pp, cpu, p->perf_counter, MAX_ADDED_PACKAGE_COUNTERS)) 4651 return -10; 4652 4653 for (i = 0, pp = sys.pmt_pp; pp; i++, pp = pp->next) 4654 p->pmt_counter[i] = pmt_read_counter(pp, p->package_id); 4655 4656 done: 4657 gettimeofday(&t->tv_end, (struct timezone *)NULL); 4658 4659 return 0; 4660 } 4661 4662 int pkg_cstate_limit = PCLUKN; 4663 char *pkg_cstate_limit_strings[] = { "unknown", "reserved", "pc0", "pc1", "pc2", 4664 "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited" 4665 }; 4666 4667 int nhm_pkg_cstate_limits[16] = 4668 { PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4669 PCLRSV, PCLRSV 4670 }; 4671 4672 int snb_pkg_cstate_limits[16] = 4673 { PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4674 PCLRSV, PCLRSV 4675 }; 4676 4677 int hsw_pkg_cstate_limits[16] = 4678 { PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4679 PCLRSV, PCLRSV 4680 }; 4681 4682 int slv_pkg_cstate_limits[16] = 4683 { PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4684 PCL__6, PCL__7 4685 }; 4686 4687 int amt_pkg_cstate_limits[16] = 4688 { PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4689 PCLRSV, PCLRSV 4690 }; 4691 4692 int phi_pkg_cstate_limits[16] = 4693 { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4694 PCLRSV, PCLRSV 4695 }; 4696 4697 int glm_pkg_cstate_limits[16] = 4698 { PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4699 PCLRSV, PCLRSV 4700 }; 4701 4702 int skx_pkg_cstate_limits[16] = 4703 { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4704 PCLRSV, PCLRSV 4705 }; 4706 4707 int icx_pkg_cstate_limits[16] = 4708 { PCL__0, PCL__2, PCL__6, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4709 PCLRSV, PCLRSV 4710 }; 4711 4712 void probe_cst_limit(void) 4713 { 4714 unsigned long long msr; 4715 int *pkg_cstate_limits; 4716 4717 if (!platform->has_nhm_msrs || no_msr) 4718 return; 4719 4720 switch (platform->cst_limit) { 4721 case CST_LIMIT_NHM: 4722 pkg_cstate_limits = nhm_pkg_cstate_limits; 4723 break; 4724 case CST_LIMIT_SNB: 4725 pkg_cstate_limits = snb_pkg_cstate_limits; 4726 break; 4727 case CST_LIMIT_HSW: 4728 pkg_cstate_limits = hsw_pkg_cstate_limits; 4729 break; 4730 case CST_LIMIT_SKX: 4731 pkg_cstate_limits = skx_pkg_cstate_limits; 4732 break; 4733 case CST_LIMIT_ICX: 4734 pkg_cstate_limits = icx_pkg_cstate_limits; 4735 break; 4736 case CST_LIMIT_SLV: 4737 pkg_cstate_limits = slv_pkg_cstate_limits; 4738 break; 4739 case CST_LIMIT_AMT: 4740 pkg_cstate_limits = amt_pkg_cstate_limits; 4741 break; 4742 case CST_LIMIT_KNL: 4743 pkg_cstate_limits = phi_pkg_cstate_limits; 4744 break; 4745 case CST_LIMIT_GMT: 4746 pkg_cstate_limits = glm_pkg_cstate_limits; 4747 break; 4748 default: 4749 return; 4750 } 4751 4752 get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr); 4753 pkg_cstate_limit = pkg_cstate_limits[msr & 0xF]; 4754 } 4755 4756 static void dump_platform_info(void) 4757 { 4758 unsigned long long msr; 4759 unsigned int ratio; 4760 4761 if (!platform->has_nhm_msrs || no_msr) 4762 return; 4763 4764 get_msr(base_cpu, MSR_PLATFORM_INFO, &msr); 4765 4766 fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr); 4767 4768 ratio = (msr >> 40) & 0xFF; 4769 fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n", ratio, bclk, ratio * bclk); 4770 4771 ratio = (msr >> 8) & 0xFF; 4772 fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk); 4773 } 4774 4775 static void dump_power_ctl(void) 4776 { 4777 unsigned long long msr; 4778 4779 if (!platform->has_nhm_msrs || no_msr) 4780 return; 4781 4782 get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr); 4783 fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", 4784 base_cpu, msr, msr & 0x2 ? "EN" : "DIS"); 4785 4786 /* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */ 4787 if (platform->has_cst_prewake_bit) 4788 fprintf(outf, "C-state Pre-wake: %sabled\n", msr & 0x40000000 ? "DIS" : "EN"); 4789 4790 return; 4791 } 4792 4793 static void dump_turbo_ratio_limit2(void) 4794 { 4795 unsigned long long msr; 4796 unsigned int ratio; 4797 4798 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr); 4799 4800 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr); 4801 4802 ratio = (msr >> 8) & 0xFF; 4803 if (ratio) 4804 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n", ratio, bclk, ratio * bclk); 4805 4806 ratio = (msr >> 0) & 0xFF; 4807 if (ratio) 4808 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n", ratio, bclk, ratio * bclk); 4809 return; 4810 } 4811 4812 static void dump_turbo_ratio_limit1(void) 4813 { 4814 unsigned long long msr; 4815 unsigned int ratio; 4816 4817 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr); 4818 4819 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr); 4820 4821 ratio = (msr >> 56) & 0xFF; 4822 if (ratio) 4823 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n", ratio, bclk, ratio * bclk); 4824 4825 ratio = (msr >> 48) & 0xFF; 4826 if (ratio) 4827 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n", ratio, bclk, ratio * bclk); 4828 4829 ratio = (msr >> 40) & 0xFF; 4830 if (ratio) 4831 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n", ratio, bclk, ratio * bclk); 4832 4833 ratio = (msr >> 32) & 0xFF; 4834 if (ratio) 4835 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n", ratio, bclk, ratio * bclk); 4836 4837 ratio = (msr >> 24) & 0xFF; 4838 if (ratio) 4839 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n", ratio, bclk, ratio * bclk); 4840 4841 ratio = (msr >> 16) & 0xFF; 4842 if (ratio) 4843 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n", ratio, bclk, ratio * bclk); 4844 4845 ratio = (msr >> 8) & 0xFF; 4846 if (ratio) 4847 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n", ratio, bclk, ratio * bclk); 4848 4849 ratio = (msr >> 0) & 0xFF; 4850 if (ratio) 4851 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n", ratio, bclk, ratio * bclk); 4852 return; 4853 } 4854 4855 static void dump_turbo_ratio_limits(int trl_msr_offset) 4856 { 4857 unsigned long long msr, core_counts; 4858 int shift; 4859 4860 get_msr(base_cpu, trl_msr_offset, &msr); 4861 fprintf(outf, "cpu%d: MSR_%sTURBO_RATIO_LIMIT: 0x%08llx\n", 4862 base_cpu, trl_msr_offset == MSR_SECONDARY_TURBO_RATIO_LIMIT ? "SECONDARY_" : "", msr); 4863 4864 if (platform->trl_msrs & TRL_CORECOUNT) { 4865 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts); 4866 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts); 4867 } else { 4868 core_counts = 0x0807060504030201; 4869 } 4870 4871 for (shift = 56; shift >= 0; shift -= 8) { 4872 unsigned int ratio, group_size; 4873 4874 ratio = (msr >> shift) & 0xFF; 4875 group_size = (core_counts >> shift) & 0xFF; 4876 if (ratio) 4877 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n", 4878 ratio, bclk, ratio * bclk, group_size); 4879 } 4880 4881 return; 4882 } 4883 4884 static void dump_atom_turbo_ratio_limits(void) 4885 { 4886 unsigned long long msr; 4887 unsigned int ratio; 4888 4889 get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr); 4890 fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF); 4891 4892 ratio = (msr >> 0) & 0x3F; 4893 if (ratio) 4894 fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n", ratio, bclk, ratio * bclk); 4895 4896 ratio = (msr >> 8) & 0x3F; 4897 if (ratio) 4898 fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n", ratio, bclk, ratio * bclk); 4899 4900 ratio = (msr >> 16) & 0x3F; 4901 if (ratio) 4902 fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk); 4903 4904 get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr); 4905 fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF); 4906 4907 ratio = (msr >> 24) & 0x3F; 4908 if (ratio) 4909 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n", ratio, bclk, ratio * bclk); 4910 4911 ratio = (msr >> 16) & 0x3F; 4912 if (ratio) 4913 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n", ratio, bclk, ratio * bclk); 4914 4915 ratio = (msr >> 8) & 0x3F; 4916 if (ratio) 4917 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n", ratio, bclk, ratio * bclk); 4918 4919 ratio = (msr >> 0) & 0x3F; 4920 if (ratio) 4921 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n", ratio, bclk, ratio * bclk); 4922 } 4923 4924 static void dump_knl_turbo_ratio_limits(void) 4925 { 4926 const unsigned int buckets_no = 7; 4927 4928 unsigned long long msr; 4929 int delta_cores, delta_ratio; 4930 int i, b_nr; 4931 unsigned int cores[buckets_no]; 4932 unsigned int ratio[buckets_no]; 4933 4934 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr); 4935 4936 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr); 4937 4938 /* 4939 * Turbo encoding in KNL is as follows: 4940 * [0] -- Reserved 4941 * [7:1] -- Base value of number of active cores of bucket 1. 4942 * [15:8] -- Base value of freq ratio of bucket 1. 4943 * [20:16] -- +ve delta of number of active cores of bucket 2. 4944 * i.e. active cores of bucket 2 = 4945 * active cores of bucket 1 + delta 4946 * [23:21] -- Negative delta of freq ratio of bucket 2. 4947 * i.e. freq ratio of bucket 2 = 4948 * freq ratio of bucket 1 - delta 4949 * [28:24]-- +ve delta of number of active cores of bucket 3. 4950 * [31:29]-- -ve delta of freq ratio of bucket 3. 4951 * [36:32]-- +ve delta of number of active cores of bucket 4. 4952 * [39:37]-- -ve delta of freq ratio of bucket 4. 4953 * [44:40]-- +ve delta of number of active cores of bucket 5. 4954 * [47:45]-- -ve delta of freq ratio of bucket 5. 4955 * [52:48]-- +ve delta of number of active cores of bucket 6. 4956 * [55:53]-- -ve delta of freq ratio of bucket 6. 4957 * [60:56]-- +ve delta of number of active cores of bucket 7. 4958 * [63:61]-- -ve delta of freq ratio of bucket 7. 4959 */ 4960 4961 b_nr = 0; 4962 cores[b_nr] = (msr & 0xFF) >> 1; 4963 ratio[b_nr] = (msr >> 8) & 0xFF; 4964 4965 for (i = 16; i < 64; i += 8) { 4966 delta_cores = (msr >> i) & 0x1F; 4967 delta_ratio = (msr >> (i + 5)) & 0x7; 4968 4969 cores[b_nr + 1] = cores[b_nr] + delta_cores; 4970 ratio[b_nr + 1] = ratio[b_nr] - delta_ratio; 4971 b_nr++; 4972 } 4973 4974 for (i = buckets_no - 1; i >= 0; i--) 4975 if (i > 0 ? ratio[i] != ratio[i - 1] : 1) 4976 fprintf(outf, 4977 "%d * %.1f = %.1f MHz max turbo %d active cores\n", 4978 ratio[i], bclk, ratio[i] * bclk, cores[i]); 4979 } 4980 4981 static void dump_cst_cfg(void) 4982 { 4983 unsigned long long msr; 4984 4985 if (!platform->has_nhm_msrs || no_msr) 4986 return; 4987 4988 get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr); 4989 4990 fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr); 4991 4992 fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)", 4993 (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", 4994 (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", 4995 (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", 4996 (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", 4997 (msr & (1 << 15)) ? "" : "UN", (unsigned int)msr & 0xF, pkg_cstate_limit_strings[pkg_cstate_limit]); 4998 4999 #define AUTOMATIC_CSTATE_CONVERSION (1UL << 16) 5000 if (platform->has_cst_auto_convension) { 5001 fprintf(outf, ", automatic c-state conversion=%s", (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off"); 5002 } 5003 5004 fprintf(outf, ")\n"); 5005 5006 return; 5007 } 5008 5009 static void dump_config_tdp(void) 5010 { 5011 unsigned long long msr; 5012 5013 get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr); 5014 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr); 5015 fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF); 5016 5017 get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr); 5018 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr); 5019 if (msr) { 5020 fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF); 5021 fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF); 5022 fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF); 5023 fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF); 5024 } 5025 fprintf(outf, ")\n"); 5026 5027 get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr); 5028 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr); 5029 if (msr) { 5030 fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF); 5031 fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF); 5032 fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF); 5033 fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF); 5034 } 5035 fprintf(outf, ")\n"); 5036 5037 get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr); 5038 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr); 5039 if ((msr) & 0x3) 5040 fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3); 5041 fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1); 5042 fprintf(outf, ")\n"); 5043 5044 get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr); 5045 fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr); 5046 fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF); 5047 fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1); 5048 fprintf(outf, ")\n"); 5049 } 5050 5051 unsigned int irtl_time_units[] = { 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 }; 5052 5053 void print_irtl(void) 5054 { 5055 unsigned long long msr; 5056 5057 if (!platform->has_irtl_msrs || no_msr) 5058 return; 5059 5060 if (platform->supported_cstates & PC3) { 5061 get_msr(base_cpu, MSR_PKGC3_IRTL, &msr); 5062 fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr); 5063 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 5064 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 5065 } 5066 5067 if (platform->supported_cstates & PC6) { 5068 get_msr(base_cpu, MSR_PKGC6_IRTL, &msr); 5069 fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr); 5070 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 5071 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 5072 } 5073 5074 if (platform->supported_cstates & PC7) { 5075 get_msr(base_cpu, MSR_PKGC7_IRTL, &msr); 5076 fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr); 5077 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 5078 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 5079 } 5080 5081 if (platform->supported_cstates & PC8) { 5082 get_msr(base_cpu, MSR_PKGC8_IRTL, &msr); 5083 fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr); 5084 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 5085 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 5086 } 5087 5088 if (platform->supported_cstates & PC9) { 5089 get_msr(base_cpu, MSR_PKGC9_IRTL, &msr); 5090 fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr); 5091 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 5092 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 5093 } 5094 5095 if (platform->supported_cstates & PC10) { 5096 get_msr(base_cpu, MSR_PKGC10_IRTL, &msr); 5097 fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr); 5098 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 5099 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 5100 } 5101 } 5102 5103 void free_fd_percpu(void) 5104 { 5105 int i; 5106 5107 if (!fd_percpu) 5108 return; 5109 5110 for (i = 0; i < topo.max_cpu_num + 1; ++i) { 5111 if (fd_percpu[i] != 0) 5112 close(fd_percpu[i]); 5113 } 5114 5115 free(fd_percpu); 5116 fd_percpu = NULL; 5117 } 5118 5119 void free_fd_instr_count_percpu(void) 5120 { 5121 if (!fd_instr_count_percpu) 5122 return; 5123 5124 for (int i = 0; i < topo.max_cpu_num + 1; ++i) { 5125 if (fd_instr_count_percpu[i] != 0) 5126 close(fd_instr_count_percpu[i]); 5127 } 5128 5129 free(fd_instr_count_percpu); 5130 fd_instr_count_percpu = NULL; 5131 } 5132 5133 void free_fd_cstate(void) 5134 { 5135 if (!ccstate_counter_info) 5136 return; 5137 5138 const int counter_info_num = ccstate_counter_info_size; 5139 5140 for (int counter_id = 0; counter_id < counter_info_num; ++counter_id) { 5141 if (ccstate_counter_info[counter_id].fd_perf_core != -1) 5142 close(ccstate_counter_info[counter_id].fd_perf_core); 5143 5144 if (ccstate_counter_info[counter_id].fd_perf_pkg != -1) 5145 close(ccstate_counter_info[counter_id].fd_perf_pkg); 5146 } 5147 5148 free(ccstate_counter_info); 5149 ccstate_counter_info = NULL; 5150 ccstate_counter_info_size = 0; 5151 } 5152 5153 void free_fd_msr(void) 5154 { 5155 if (!msr_counter_info) 5156 return; 5157 5158 for (int cpu = 0; cpu < topo.max_cpu_num; ++cpu) { 5159 if (msr_counter_info[cpu].fd_perf != -1) 5160 close(msr_counter_info[cpu].fd_perf); 5161 } 5162 5163 free(msr_counter_info); 5164 msr_counter_info = NULL; 5165 msr_counter_info_size = 0; 5166 } 5167 5168 void free_fd_rapl_percpu(void) 5169 { 5170 if (!rapl_counter_info_perdomain) 5171 return; 5172 5173 const int num_domains = rapl_counter_info_perdomain_size; 5174 5175 for (int domain_id = 0; domain_id < num_domains; ++domain_id) { 5176 if (rapl_counter_info_perdomain[domain_id].fd_perf != -1) 5177 close(rapl_counter_info_perdomain[domain_id].fd_perf); 5178 } 5179 5180 free(rapl_counter_info_perdomain); 5181 rapl_counter_info_perdomain = NULL; 5182 rapl_counter_info_perdomain_size = 0; 5183 } 5184 5185 void free_fd_added_perf_counters_(struct perf_counter_info *pp) 5186 { 5187 if (!pp) 5188 return; 5189 5190 if (!pp->fd_perf_per_domain) 5191 return; 5192 5193 while (pp) { 5194 for (size_t domain = 0; domain < pp->num_domains; ++domain) { 5195 if (pp->fd_perf_per_domain[domain] != -1) { 5196 close(pp->fd_perf_per_domain[domain]); 5197 pp->fd_perf_per_domain[domain] = -1; 5198 } 5199 } 5200 5201 free(pp->fd_perf_per_domain); 5202 pp->fd_perf_per_domain = NULL; 5203 5204 pp = pp->next; 5205 } 5206 } 5207 5208 void free_fd_added_perf_counters(void) 5209 { 5210 free_fd_added_perf_counters_(sys.perf_tp); 5211 free_fd_added_perf_counters_(sys.perf_cp); 5212 free_fd_added_perf_counters_(sys.perf_pp); 5213 } 5214 5215 void free_all_buffers(void) 5216 { 5217 int i; 5218 5219 CPU_FREE(cpu_present_set); 5220 cpu_present_set = NULL; 5221 cpu_present_setsize = 0; 5222 5223 CPU_FREE(cpu_effective_set); 5224 cpu_effective_set = NULL; 5225 cpu_effective_setsize = 0; 5226 5227 CPU_FREE(cpu_allowed_set); 5228 cpu_allowed_set = NULL; 5229 cpu_allowed_setsize = 0; 5230 5231 CPU_FREE(cpu_affinity_set); 5232 cpu_affinity_set = NULL; 5233 cpu_affinity_setsize = 0; 5234 5235 free(thread_even); 5236 free(core_even); 5237 free(package_even); 5238 5239 thread_even = NULL; 5240 core_even = NULL; 5241 package_even = NULL; 5242 5243 free(thread_odd); 5244 free(core_odd); 5245 free(package_odd); 5246 5247 thread_odd = NULL; 5248 core_odd = NULL; 5249 package_odd = NULL; 5250 5251 free(output_buffer); 5252 output_buffer = NULL; 5253 outp = NULL; 5254 5255 free_fd_percpu(); 5256 free_fd_instr_count_percpu(); 5257 free_fd_msr(); 5258 free_fd_rapl_percpu(); 5259 free_fd_cstate(); 5260 free_fd_added_perf_counters(); 5261 5262 free(irq_column_2_cpu); 5263 free(irqs_per_cpu); 5264 5265 for (i = 0; i <= topo.max_cpu_num; ++i) { 5266 if (cpus[i].put_ids) 5267 CPU_FREE(cpus[i].put_ids); 5268 } 5269 free(cpus); 5270 } 5271 5272 /* 5273 * Parse a file containing a single int. 5274 * Return 0 if file can not be opened 5275 * Exit if file can be opened, but can not be parsed 5276 */ 5277 int parse_int_file(const char *fmt, ...) 5278 { 5279 va_list args; 5280 char path[PATH_MAX]; 5281 FILE *filep; 5282 int value; 5283 5284 va_start(args, fmt); 5285 vsnprintf(path, sizeof(path), fmt, args); 5286 va_end(args); 5287 filep = fopen(path, "r"); 5288 if (!filep) 5289 return 0; 5290 if (fscanf(filep, "%d", &value) != 1) 5291 err(1, "%s: failed to parse number from file", path); 5292 fclose(filep); 5293 return value; 5294 } 5295 5296 /* 5297 * cpu_is_first_core_in_package(cpu) 5298 * return 1 if given CPU is 1st core in package 5299 */ 5300 int cpu_is_first_core_in_package(int cpu) 5301 { 5302 return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu); 5303 } 5304 5305 int get_physical_package_id(int cpu) 5306 { 5307 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu); 5308 } 5309 5310 int get_die_id(int cpu) 5311 { 5312 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/die_id", cpu); 5313 } 5314 5315 int get_core_id(int cpu) 5316 { 5317 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu); 5318 } 5319 5320 void set_node_data(void) 5321 { 5322 int pkg, node, lnode, cpu, cpux; 5323 int cpu_count; 5324 5325 /* initialize logical_node_id */ 5326 for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) 5327 cpus[cpu].logical_node_id = -1; 5328 5329 cpu_count = 0; 5330 for (pkg = 0; pkg < topo.num_packages; pkg++) { 5331 lnode = 0; 5332 for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) { 5333 if (cpus[cpu].physical_package_id != pkg) 5334 continue; 5335 /* find a cpu with an unset logical_node_id */ 5336 if (cpus[cpu].logical_node_id != -1) 5337 continue; 5338 cpus[cpu].logical_node_id = lnode; 5339 node = cpus[cpu].physical_node_id; 5340 cpu_count++; 5341 /* 5342 * find all matching cpus on this pkg and set 5343 * the logical_node_id 5344 */ 5345 for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) { 5346 if ((cpus[cpux].physical_package_id == pkg) && (cpus[cpux].physical_node_id == node)) { 5347 cpus[cpux].logical_node_id = lnode; 5348 cpu_count++; 5349 } 5350 } 5351 lnode++; 5352 if (lnode > topo.nodes_per_pkg) 5353 topo.nodes_per_pkg = lnode; 5354 } 5355 if (cpu_count >= topo.max_cpu_num) 5356 break; 5357 } 5358 } 5359 5360 int get_physical_node_id(struct cpu_topology *thiscpu) 5361 { 5362 char path[80]; 5363 FILE *filep; 5364 int i; 5365 int cpu = thiscpu->logical_cpu_id; 5366 5367 for (i = 0; i <= topo.max_cpu_num; i++) { 5368 sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist", cpu, i); 5369 filep = fopen(path, "r"); 5370 if (!filep) 5371 continue; 5372 fclose(filep); 5373 return i; 5374 } 5375 return -1; 5376 } 5377 5378 static int parse_cpu_str(char *cpu_str, cpu_set_t *cpu_set, int cpu_set_size) 5379 { 5380 unsigned int start, end; 5381 char *next = cpu_str; 5382 5383 while (next && *next) { 5384 5385 if (*next == '-') /* no negative cpu numbers */ 5386 return 1; 5387 5388 start = strtoul(next, &next, 10); 5389 5390 if (start >= CPU_SUBSET_MAXCPUS) 5391 return 1; 5392 CPU_SET_S(start, cpu_set_size, cpu_set); 5393 5394 if (*next == '\0' || *next == '\n') 5395 break; 5396 5397 if (*next == ',') { 5398 next += 1; 5399 continue; 5400 } 5401 5402 if (*next == '-') { 5403 next += 1; /* start range */ 5404 } else if (*next == '.') { 5405 next += 1; 5406 if (*next == '.') 5407 next += 1; /* start range */ 5408 else 5409 return 1; 5410 } 5411 5412 end = strtoul(next, &next, 10); 5413 if (end <= start) 5414 return 1; 5415 5416 while (++start <= end) { 5417 if (start >= CPU_SUBSET_MAXCPUS) 5418 return 1; 5419 CPU_SET_S(start, cpu_set_size, cpu_set); 5420 } 5421 5422 if (*next == ',') 5423 next += 1; 5424 else if (*next != '\0' && *next != '\n') 5425 return 1; 5426 } 5427 5428 return 0; 5429 } 5430 5431 int get_thread_siblings(struct cpu_topology *thiscpu) 5432 { 5433 char path[80], character; 5434 FILE *filep; 5435 unsigned long map; 5436 int so, shift, sib_core; 5437 int cpu = thiscpu->logical_cpu_id; 5438 int offset = topo.max_cpu_num + 1; 5439 size_t size; 5440 int thread_id = 0; 5441 5442 thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1)); 5443 if (thiscpu->thread_id < 0) 5444 thiscpu->thread_id = thread_id++; 5445 if (!thiscpu->put_ids) 5446 return -1; 5447 5448 size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 5449 CPU_ZERO_S(size, thiscpu->put_ids); 5450 5451 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu); 5452 filep = fopen(path, "r"); 5453 5454 if (!filep) { 5455 warnx("%s: open failed", path); 5456 return -1; 5457 } 5458 do { 5459 offset -= BITMASK_SIZE; 5460 if (fscanf(filep, "%lx%c", &map, &character) != 2) 5461 err(1, "%s: failed to parse file", path); 5462 for (shift = 0; shift < BITMASK_SIZE; shift++) { 5463 if ((map >> shift) & 0x1) { 5464 so = shift + offset; 5465 sib_core = get_core_id(so); 5466 if (sib_core == thiscpu->physical_core_id) { 5467 CPU_SET_S(so, size, thiscpu->put_ids); 5468 if ((so != cpu) && (cpus[so].thread_id < 0)) 5469 cpus[so].thread_id = thread_id++; 5470 } 5471 } 5472 } 5473 } while (character == ','); 5474 fclose(filep); 5475 5476 return CPU_COUNT_S(size, thiscpu->put_ids); 5477 } 5478 5479 /* 5480 * run func(thread, core, package) in topology order 5481 * skip non-present cpus 5482 */ 5483 5484 int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *, 5485 struct pkg_data *, struct thread_data *, struct core_data *, 5486 struct pkg_data *), struct thread_data *thread_base, 5487 struct core_data *core_base, struct pkg_data *pkg_base, 5488 struct thread_data *thread_base2, struct core_data *core_base2, struct pkg_data *pkg_base2) 5489 { 5490 int retval, pkg_no, node_no, core_no, thread_no; 5491 5492 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { 5493 for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) { 5494 for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { 5495 for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) { 5496 struct thread_data *t, *t2; 5497 struct core_data *c, *c2; 5498 struct pkg_data *p, *p2; 5499 5500 t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no); 5501 5502 if (cpu_is_not_allowed(t->cpu_id)) 5503 continue; 5504 5505 t2 = GET_THREAD(thread_base2, thread_no, core_no, node_no, pkg_no); 5506 5507 c = GET_CORE(core_base, core_no, node_no, pkg_no); 5508 c2 = GET_CORE(core_base2, core_no, node_no, pkg_no); 5509 5510 p = GET_PKG(pkg_base, pkg_no); 5511 p2 = GET_PKG(pkg_base2, pkg_no); 5512 5513 retval = func(t, c, p, t2, c2, p2); 5514 if (retval) 5515 return retval; 5516 } 5517 } 5518 } 5519 } 5520 return 0; 5521 } 5522 5523 /* 5524 * run func(cpu) on every cpu in /proc/stat 5525 * return max_cpu number 5526 */ 5527 int for_all_proc_cpus(int (func) (int)) 5528 { 5529 FILE *fp; 5530 int cpu_num; 5531 int retval; 5532 5533 fp = fopen_or_die(proc_stat, "r"); 5534 5535 retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n"); 5536 if (retval != 0) 5537 err(1, "%s: failed to parse format", proc_stat); 5538 5539 while (1) { 5540 retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num); 5541 if (retval != 1) 5542 break; 5543 5544 retval = func(cpu_num); 5545 if (retval) { 5546 fclose(fp); 5547 return (retval); 5548 } 5549 } 5550 fclose(fp); 5551 return 0; 5552 } 5553 5554 #define PATH_EFFECTIVE_CPUS "/sys/fs/cgroup/cpuset.cpus.effective" 5555 5556 static char cpu_effective_str[1024]; 5557 5558 static int update_effective_str(bool startup) 5559 { 5560 FILE *fp; 5561 char *pos; 5562 char buf[1024]; 5563 int ret; 5564 5565 if (cpu_effective_str[0] == '\0' && !startup) 5566 return 0; 5567 5568 fp = fopen(PATH_EFFECTIVE_CPUS, "r"); 5569 if (!fp) 5570 return 0; 5571 5572 pos = fgets(buf, 1024, fp); 5573 if (!pos) 5574 err(1, "%s: file read failed\n", PATH_EFFECTIVE_CPUS); 5575 5576 fclose(fp); 5577 5578 ret = strncmp(cpu_effective_str, buf, 1024); 5579 if (!ret) 5580 return 0; 5581 5582 strncpy(cpu_effective_str, buf, 1024); 5583 return 1; 5584 } 5585 5586 static void update_effective_set(bool startup) 5587 { 5588 update_effective_str(startup); 5589 5590 if (parse_cpu_str(cpu_effective_str, cpu_effective_set, cpu_effective_setsize)) 5591 err(1, "%s: cpu str malformat %s\n", PATH_EFFECTIVE_CPUS, cpu_effective_str); 5592 } 5593 5594 void linux_perf_init(void); 5595 void msr_perf_init(void); 5596 void rapl_perf_init(void); 5597 void cstate_perf_init(void); 5598 void added_perf_counters_init(void); 5599 void pmt_init(void); 5600 5601 void re_initialize(void) 5602 { 5603 free_all_buffers(); 5604 setup_all_buffers(false); 5605 linux_perf_init(); 5606 msr_perf_init(); 5607 rapl_perf_init(); 5608 cstate_perf_init(); 5609 added_perf_counters_init(); 5610 pmt_init(); 5611 fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, 5612 topo.allowed_cpus); 5613 } 5614 5615 void set_max_cpu_num(void) 5616 { 5617 FILE *filep; 5618 int base_cpu; 5619 unsigned long dummy; 5620 char pathname[64]; 5621 5622 base_cpu = sched_getcpu(); 5623 if (base_cpu < 0) 5624 err(1, "cannot find calling cpu ID"); 5625 sprintf(pathname, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", base_cpu); 5626 5627 filep = fopen_or_die(pathname, "r"); 5628 topo.max_cpu_num = 0; 5629 while (fscanf(filep, "%lx,", &dummy) == 1) 5630 topo.max_cpu_num += BITMASK_SIZE; 5631 fclose(filep); 5632 topo.max_cpu_num--; /* 0 based */ 5633 } 5634 5635 /* 5636 * count_cpus() 5637 * remember the last one seen, it will be the max 5638 */ 5639 int count_cpus(int cpu) 5640 { 5641 UNUSED(cpu); 5642 5643 topo.num_cpus++; 5644 return 0; 5645 } 5646 5647 int mark_cpu_present(int cpu) 5648 { 5649 CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set); 5650 return 0; 5651 } 5652 5653 int init_thread_id(int cpu) 5654 { 5655 cpus[cpu].thread_id = -1; 5656 return 0; 5657 } 5658 5659 /* 5660 * snapshot_proc_interrupts() 5661 * 5662 * read and record summary of /proc/interrupts 5663 * 5664 * return 1 if config change requires a restart, else return 0 5665 */ 5666 int snapshot_proc_interrupts(void) 5667 { 5668 static FILE *fp; 5669 int column, retval; 5670 5671 if (fp == NULL) 5672 fp = fopen_or_die("/proc/interrupts", "r"); 5673 else 5674 rewind(fp); 5675 5676 /* read 1st line of /proc/interrupts to get cpu* name for each column */ 5677 for (column = 0; column < topo.num_cpus; ++column) { 5678 int cpu_number; 5679 5680 retval = fscanf(fp, " CPU%d", &cpu_number); 5681 if (retval != 1) 5682 break; 5683 5684 if (cpu_number > topo.max_cpu_num) { 5685 warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num); 5686 return 1; 5687 } 5688 5689 irq_column_2_cpu[column] = cpu_number; 5690 irqs_per_cpu[cpu_number] = 0; 5691 } 5692 5693 /* read /proc/interrupt count lines and sum up irqs per cpu */ 5694 while (1) { 5695 int column; 5696 char buf[64]; 5697 5698 retval = fscanf(fp, " %s:", buf); /* flush irq# "N:" */ 5699 if (retval != 1) 5700 break; 5701 5702 /* read the count per cpu */ 5703 for (column = 0; column < topo.num_cpus; ++column) { 5704 5705 int cpu_number, irq_count; 5706 5707 retval = fscanf(fp, " %d", &irq_count); 5708 if (retval != 1) 5709 break; 5710 5711 cpu_number = irq_column_2_cpu[column]; 5712 irqs_per_cpu[cpu_number] += irq_count; 5713 5714 } 5715 5716 while (getc(fp) != '\n') ; /* flush interrupt description */ 5717 5718 } 5719 return 0; 5720 } 5721 5722 /* 5723 * snapshot_graphics() 5724 * 5725 * record snapshot of specified graphics sysfs knob 5726 * 5727 * return 1 if config change requires a restart, else return 0 5728 */ 5729 int snapshot_graphics(int idx) 5730 { 5731 FILE *fp; 5732 int retval; 5733 5734 switch (idx) { 5735 case GFX_rc6: 5736 case SAM_mc6: 5737 fp = fopen_or_die(gfx_info[idx].path, "r"); 5738 retval = fscanf(fp, "%lld", &gfx_info[idx].val_ull); 5739 if (retval != 1) 5740 err(1, "rc6"); 5741 fclose(fp); 5742 return 0; 5743 case GFX_MHz: 5744 case GFX_ACTMHz: 5745 case SAM_MHz: 5746 case SAM_ACTMHz: 5747 if (gfx_info[idx].fp == NULL) { 5748 gfx_info[idx].fp = fopen_or_die(gfx_info[idx].path, "r"); 5749 } else { 5750 rewind(gfx_info[idx].fp); 5751 fflush(gfx_info[idx].fp); 5752 } 5753 retval = fscanf(gfx_info[idx].fp, "%d", &gfx_info[idx].val); 5754 if (retval != 1) 5755 err(1, "MHz"); 5756 return 0; 5757 default: 5758 return -EINVAL; 5759 } 5760 } 5761 5762 /* 5763 * snapshot_cpu_lpi() 5764 * 5765 * record snapshot of 5766 * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us 5767 */ 5768 int snapshot_cpu_lpi_us(void) 5769 { 5770 FILE *fp; 5771 int retval; 5772 5773 fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r"); 5774 5775 retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us); 5776 if (retval != 1) { 5777 fprintf(stderr, "Disabling Low Power Idle CPU output\n"); 5778 BIC_NOT_PRESENT(BIC_CPU_LPI); 5779 fclose(fp); 5780 return -1; 5781 } 5782 5783 fclose(fp); 5784 5785 return 0; 5786 } 5787 5788 /* 5789 * snapshot_sys_lpi() 5790 * 5791 * record snapshot of sys_lpi_file 5792 */ 5793 int snapshot_sys_lpi_us(void) 5794 { 5795 FILE *fp; 5796 int retval; 5797 5798 fp = fopen_or_die(sys_lpi_file, "r"); 5799 5800 retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us); 5801 if (retval != 1) { 5802 fprintf(stderr, "Disabling Low Power Idle System output\n"); 5803 BIC_NOT_PRESENT(BIC_SYS_LPI); 5804 fclose(fp); 5805 return -1; 5806 } 5807 fclose(fp); 5808 5809 return 0; 5810 } 5811 5812 /* 5813 * snapshot /proc and /sys files 5814 * 5815 * return 1 if configuration restart needed, else return 0 5816 */ 5817 int snapshot_proc_sysfs_files(void) 5818 { 5819 if (DO_BIC(BIC_IRQ)) 5820 if (snapshot_proc_interrupts()) 5821 return 1; 5822 5823 if (DO_BIC(BIC_GFX_rc6)) 5824 snapshot_graphics(GFX_rc6); 5825 5826 if (DO_BIC(BIC_GFXMHz)) 5827 snapshot_graphics(GFX_MHz); 5828 5829 if (DO_BIC(BIC_GFXACTMHz)) 5830 snapshot_graphics(GFX_ACTMHz); 5831 5832 if (DO_BIC(BIC_SAM_mc6)) 5833 snapshot_graphics(SAM_mc6); 5834 5835 if (DO_BIC(BIC_SAMMHz)) 5836 snapshot_graphics(SAM_MHz); 5837 5838 if (DO_BIC(BIC_SAMACTMHz)) 5839 snapshot_graphics(SAM_ACTMHz); 5840 5841 if (DO_BIC(BIC_CPU_LPI)) 5842 snapshot_cpu_lpi_us(); 5843 5844 if (DO_BIC(BIC_SYS_LPI)) 5845 snapshot_sys_lpi_us(); 5846 5847 return 0; 5848 } 5849 5850 int exit_requested; 5851 5852 static void signal_handler(int signal) 5853 { 5854 switch (signal) { 5855 case SIGINT: 5856 exit_requested = 1; 5857 if (debug) 5858 fprintf(stderr, " SIGINT\n"); 5859 break; 5860 case SIGUSR1: 5861 if (debug > 1) 5862 fprintf(stderr, "SIGUSR1\n"); 5863 break; 5864 } 5865 } 5866 5867 void setup_signal_handler(void) 5868 { 5869 struct sigaction sa; 5870 5871 memset(&sa, 0, sizeof(sa)); 5872 5873 sa.sa_handler = &signal_handler; 5874 5875 if (sigaction(SIGINT, &sa, NULL) < 0) 5876 err(1, "sigaction SIGINT"); 5877 if (sigaction(SIGUSR1, &sa, NULL) < 0) 5878 err(1, "sigaction SIGUSR1"); 5879 } 5880 5881 void do_sleep(void) 5882 { 5883 struct timeval tout; 5884 struct timespec rest; 5885 fd_set readfds; 5886 int retval; 5887 5888 FD_ZERO(&readfds); 5889 FD_SET(0, &readfds); 5890 5891 if (ignore_stdin) { 5892 nanosleep(&interval_ts, NULL); 5893 return; 5894 } 5895 5896 tout = interval_tv; 5897 retval = select(1, &readfds, NULL, NULL, &tout); 5898 5899 if (retval == 1) { 5900 switch (getc(stdin)) { 5901 case 'q': 5902 exit_requested = 1; 5903 break; 5904 case EOF: 5905 /* 5906 * 'stdin' is a pipe closed on the other end. There 5907 * won't be any further input. 5908 */ 5909 ignore_stdin = 1; 5910 /* Sleep the rest of the time */ 5911 rest.tv_sec = (tout.tv_sec + tout.tv_usec / 1000000); 5912 rest.tv_nsec = (tout.tv_usec % 1000000) * 1000; 5913 nanosleep(&rest, NULL); 5914 } 5915 } 5916 } 5917 5918 int get_msr_sum(int cpu, off_t offset, unsigned long long *msr) 5919 { 5920 int ret, idx; 5921 unsigned long long msr_cur, msr_last; 5922 5923 assert(!no_msr); 5924 5925 if (!per_cpu_msr_sum) 5926 return 1; 5927 5928 idx = offset_to_idx(offset); 5929 if (idx < 0) 5930 return idx; 5931 /* get_msr_sum() = sum + (get_msr() - last) */ 5932 ret = get_msr(cpu, offset, &msr_cur); 5933 if (ret) 5934 return ret; 5935 msr_last = per_cpu_msr_sum[cpu].entries[idx].last; 5936 DELTA_WRAP32(msr_cur, msr_last); 5937 *msr = msr_last + per_cpu_msr_sum[cpu].entries[idx].sum; 5938 5939 return 0; 5940 } 5941 5942 timer_t timerid; 5943 5944 /* Timer callback, update the sum of MSRs periodically. */ 5945 static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg_data *p) 5946 { 5947 int i, ret; 5948 int cpu = t->cpu_id; 5949 5950 UNUSED(c); 5951 UNUSED(p); 5952 5953 assert(!no_msr); 5954 5955 for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) { 5956 unsigned long long msr_cur, msr_last; 5957 off_t offset; 5958 5959 if (!idx_valid(i)) 5960 continue; 5961 offset = idx_to_offset(i); 5962 if (offset < 0) 5963 continue; 5964 ret = get_msr(cpu, offset, &msr_cur); 5965 if (ret) { 5966 fprintf(outf, "Can not update msr(0x%llx)\n", (unsigned long long)offset); 5967 continue; 5968 } 5969 5970 msr_last = per_cpu_msr_sum[cpu].entries[i].last; 5971 per_cpu_msr_sum[cpu].entries[i].last = msr_cur & 0xffffffff; 5972 5973 DELTA_WRAP32(msr_cur, msr_last); 5974 per_cpu_msr_sum[cpu].entries[i].sum += msr_last; 5975 } 5976 return 0; 5977 } 5978 5979 static void msr_record_handler(union sigval v) 5980 { 5981 UNUSED(v); 5982 5983 for_all_cpus(update_msr_sum, EVEN_COUNTERS); 5984 } 5985 5986 void msr_sum_record(void) 5987 { 5988 struct itimerspec its; 5989 struct sigevent sev; 5990 5991 per_cpu_msr_sum = calloc(topo.max_cpu_num + 1, sizeof(struct msr_sum_array)); 5992 if (!per_cpu_msr_sum) { 5993 fprintf(outf, "Can not allocate memory for long time MSR.\n"); 5994 return; 5995 } 5996 /* 5997 * Signal handler might be restricted, so use thread notifier instead. 5998 */ 5999 memset(&sev, 0, sizeof(struct sigevent)); 6000 sev.sigev_notify = SIGEV_THREAD; 6001 sev.sigev_notify_function = msr_record_handler; 6002 6003 sev.sigev_value.sival_ptr = &timerid; 6004 if (timer_create(CLOCK_REALTIME, &sev, &timerid) == -1) { 6005 fprintf(outf, "Can not create timer.\n"); 6006 goto release_msr; 6007 } 6008 6009 its.it_value.tv_sec = 0; 6010 its.it_value.tv_nsec = 1; 6011 /* 6012 * A wraparound time has been calculated early. 6013 * Some sources state that the peak power for a 6014 * microprocessor is usually 1.5 times the TDP rating, 6015 * use 2 * TDP for safety. 6016 */ 6017 its.it_interval.tv_sec = rapl_joule_counter_range / 2; 6018 its.it_interval.tv_nsec = 0; 6019 6020 if (timer_settime(timerid, 0, &its, NULL) == -1) { 6021 fprintf(outf, "Can not set timer.\n"); 6022 goto release_timer; 6023 } 6024 return; 6025 6026 release_timer: 6027 timer_delete(timerid); 6028 release_msr: 6029 free(per_cpu_msr_sum); 6030 } 6031 6032 /* 6033 * set_my_sched_priority(pri) 6034 * return previous priority on success 6035 * return value < -20 on failure 6036 */ 6037 int set_my_sched_priority(int priority) 6038 { 6039 int retval; 6040 int original_priority; 6041 6042 errno = 0; 6043 original_priority = getpriority(PRIO_PROCESS, 0); 6044 if (errno && (original_priority == -1)) 6045 return -21; 6046 6047 retval = setpriority(PRIO_PROCESS, 0, priority); 6048 if (retval) 6049 return -21; 6050 6051 errno = 0; 6052 retval = getpriority(PRIO_PROCESS, 0); 6053 if (retval != priority) 6054 return -21; 6055 6056 return original_priority; 6057 } 6058 6059 void turbostat_loop() 6060 { 6061 int retval; 6062 int restarted = 0; 6063 unsigned int done_iters = 0; 6064 6065 setup_signal_handler(); 6066 6067 /* 6068 * elevate own priority for interval mode 6069 * 6070 * ignore on error - we probably don't have permission to set it, but 6071 * it's not a big deal 6072 */ 6073 set_my_sched_priority(-20); 6074 6075 restart: 6076 restarted++; 6077 6078 snapshot_proc_sysfs_files(); 6079 retval = for_all_cpus(get_counters, EVEN_COUNTERS); 6080 first_counter_read = 0; 6081 if (retval < -1) { 6082 exit(retval); 6083 } else if (retval == -1) { 6084 if (restarted > 10) { 6085 exit(retval); 6086 } 6087 re_initialize(); 6088 goto restart; 6089 } 6090 restarted = 0; 6091 done_iters = 0; 6092 gettimeofday(&tv_even, (struct timezone *)NULL); 6093 6094 while (1) { 6095 if (for_all_proc_cpus(cpu_is_not_present)) { 6096 re_initialize(); 6097 goto restart; 6098 } 6099 if (update_effective_str(false)) { 6100 re_initialize(); 6101 goto restart; 6102 } 6103 do_sleep(); 6104 if (snapshot_proc_sysfs_files()) 6105 goto restart; 6106 retval = for_all_cpus(get_counters, ODD_COUNTERS); 6107 if (retval < -1) { 6108 exit(retval); 6109 } else if (retval == -1) { 6110 re_initialize(); 6111 goto restart; 6112 } 6113 gettimeofday(&tv_odd, (struct timezone *)NULL); 6114 timersub(&tv_odd, &tv_even, &tv_delta); 6115 if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) { 6116 re_initialize(); 6117 goto restart; 6118 } 6119 compute_average(EVEN_COUNTERS); 6120 format_all_counters(EVEN_COUNTERS); 6121 flush_output_stdout(); 6122 if (exit_requested) 6123 break; 6124 if (num_iterations && ++done_iters >= num_iterations) 6125 break; 6126 do_sleep(); 6127 if (snapshot_proc_sysfs_files()) 6128 goto restart; 6129 retval = for_all_cpus(get_counters, EVEN_COUNTERS); 6130 if (retval < -1) { 6131 exit(retval); 6132 } else if (retval == -1) { 6133 re_initialize(); 6134 goto restart; 6135 } 6136 gettimeofday(&tv_even, (struct timezone *)NULL); 6137 timersub(&tv_even, &tv_odd, &tv_delta); 6138 if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) { 6139 re_initialize(); 6140 goto restart; 6141 } 6142 compute_average(ODD_COUNTERS); 6143 format_all_counters(ODD_COUNTERS); 6144 flush_output_stdout(); 6145 if (exit_requested) 6146 break; 6147 if (num_iterations && ++done_iters >= num_iterations) 6148 break; 6149 } 6150 } 6151 6152 void check_dev_msr() 6153 { 6154 struct stat sb; 6155 char pathname[32]; 6156 6157 if (no_msr) 6158 return; 6159 6160 sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); 6161 if (stat(pathname, &sb)) 6162 if (system("/sbin/modprobe msr > /dev/null 2>&1")) 6163 no_msr = 1; 6164 } 6165 6166 /* 6167 * check for CAP_SYS_RAWIO 6168 * return 0 on success 6169 * return 1 on fail 6170 */ 6171 int check_for_cap_sys_rawio(void) 6172 { 6173 cap_t caps; 6174 cap_flag_value_t cap_flag_value; 6175 int ret = 0; 6176 6177 caps = cap_get_proc(); 6178 if (caps == NULL) 6179 return 1; 6180 6181 if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value)) { 6182 ret = 1; 6183 goto free_and_exit; 6184 } 6185 6186 if (cap_flag_value != CAP_SET) { 6187 ret = 1; 6188 goto free_and_exit; 6189 } 6190 6191 free_and_exit: 6192 if (cap_free(caps) == -1) 6193 err(-6, "cap_free\n"); 6194 6195 return ret; 6196 } 6197 6198 void check_msr_permission(void) 6199 { 6200 int failed = 0; 6201 char pathname[32]; 6202 6203 if (no_msr) 6204 return; 6205 6206 /* check for CAP_SYS_RAWIO */ 6207 failed += check_for_cap_sys_rawio(); 6208 6209 /* test file permissions */ 6210 sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); 6211 if (euidaccess(pathname, R_OK)) { 6212 failed++; 6213 } 6214 6215 if (failed) { 6216 warnx("Failed to access %s. Some of the counters may not be available\n" 6217 "\tRun as root to enable them or use %s to disable the access explicitly", pathname, "--no-msr"); 6218 no_msr = 1; 6219 } 6220 } 6221 6222 void probe_bclk(void) 6223 { 6224 unsigned long long msr; 6225 unsigned int base_ratio; 6226 6227 if (!platform->has_nhm_msrs || no_msr) 6228 return; 6229 6230 if (platform->bclk_freq == BCLK_100MHZ) 6231 bclk = 100.00; 6232 else if (platform->bclk_freq == BCLK_133MHZ) 6233 bclk = 133.33; 6234 else if (platform->bclk_freq == BCLK_SLV) 6235 bclk = slm_bclk(); 6236 else 6237 return; 6238 6239 get_msr(base_cpu, MSR_PLATFORM_INFO, &msr); 6240 base_ratio = (msr >> 8) & 0xFF; 6241 6242 base_hz = base_ratio * bclk * 1000000; 6243 has_base_hz = 1; 6244 6245 if (platform->enable_tsc_tweak) 6246 tsc_tweak = base_hz / tsc_hz; 6247 } 6248 6249 static void remove_underbar(char *s) 6250 { 6251 char *to = s; 6252 6253 while (*s) { 6254 if (*s != '_') 6255 *to++ = *s; 6256 s++; 6257 } 6258 6259 *to = 0; 6260 } 6261 6262 static void dump_turbo_ratio_info(void) 6263 { 6264 if (!has_turbo) 6265 return; 6266 6267 if (!platform->has_nhm_msrs || no_msr) 6268 return; 6269 6270 if (platform->trl_msrs & TRL_LIMIT2) 6271 dump_turbo_ratio_limit2(); 6272 6273 if (platform->trl_msrs & TRL_LIMIT1) 6274 dump_turbo_ratio_limit1(); 6275 6276 if (platform->trl_msrs & TRL_BASE) { 6277 dump_turbo_ratio_limits(MSR_TURBO_RATIO_LIMIT); 6278 6279 if (is_hybrid) 6280 dump_turbo_ratio_limits(MSR_SECONDARY_TURBO_RATIO_LIMIT); 6281 } 6282 6283 if (platform->trl_msrs & TRL_ATOM) 6284 dump_atom_turbo_ratio_limits(); 6285 6286 if (platform->trl_msrs & TRL_KNL) 6287 dump_knl_turbo_ratio_limits(); 6288 6289 if (platform->has_config_tdp) 6290 dump_config_tdp(); 6291 } 6292 6293 static int read_sysfs_int(char *path) 6294 { 6295 FILE *input; 6296 int retval = -1; 6297 6298 input = fopen(path, "r"); 6299 if (input == NULL) { 6300 if (debug) 6301 fprintf(outf, "NSFOD %s\n", path); 6302 return (-1); 6303 } 6304 if (fscanf(input, "%d", &retval) != 1) 6305 err(1, "%s: failed to read int from file", path); 6306 fclose(input); 6307 6308 return (retval); 6309 } 6310 6311 static void dump_sysfs_file(char *path) 6312 { 6313 FILE *input; 6314 char cpuidle_buf[64]; 6315 6316 input = fopen(path, "r"); 6317 if (input == NULL) { 6318 if (debug) 6319 fprintf(outf, "NSFOD %s\n", path); 6320 return; 6321 } 6322 if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input)) 6323 err(1, "%s: failed to read file", path); 6324 fclose(input); 6325 6326 fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf); 6327 } 6328 6329 static void probe_intel_uncore_frequency_legacy(void) 6330 { 6331 int i, j; 6332 char path[256]; 6333 6334 for (i = 0; i < topo.num_packages; ++i) { 6335 for (j = 0; j <= topo.max_die_id; ++j) { 6336 int k, l; 6337 char path_base[128]; 6338 6339 sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i, 6340 j); 6341 6342 if (access(path_base, R_OK)) 6343 continue; 6344 6345 BIC_PRESENT(BIC_UNCORE_MHZ); 6346 6347 if (quiet) 6348 return; 6349 6350 sprintf(path, "%s/min_freq_khz", path_base); 6351 k = read_sysfs_int(path); 6352 sprintf(path, "%s/max_freq_khz", path_base); 6353 l = read_sysfs_int(path); 6354 fprintf(outf, "Uncore Frequency package%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000); 6355 6356 sprintf(path, "%s/initial_min_freq_khz", path_base); 6357 k = read_sysfs_int(path); 6358 sprintf(path, "%s/initial_max_freq_khz", path_base); 6359 l = read_sysfs_int(path); 6360 fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000); 6361 6362 sprintf(path, "%s/current_freq_khz", path_base); 6363 k = read_sysfs_int(path); 6364 fprintf(outf, " %d MHz\n", k / 1000); 6365 } 6366 } 6367 } 6368 6369 static void probe_intel_uncore_frequency_cluster(void) 6370 { 6371 int i, uncore_max_id; 6372 char path[256]; 6373 char path_base[128]; 6374 6375 if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00/current_freq_khz", R_OK)) 6376 return; 6377 6378 for (uncore_max_id = 0;; ++uncore_max_id) { 6379 6380 sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", uncore_max_id); 6381 6382 /* uncore## start at 00 and skips no numbers, so stop upon first missing */ 6383 if (access(path_base, R_OK)) { 6384 uncore_max_id -= 1; 6385 break; 6386 } 6387 } 6388 for (i = uncore_max_id; i >= 0; --i) { 6389 int k, l; 6390 int package_id, domain_id, cluster_id; 6391 char name_buf[16]; 6392 6393 sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", i); 6394 6395 if (access(path_base, R_OK)) 6396 err(1, "%s: %s\n", __func__, path_base); 6397 6398 sprintf(path, "%s/package_id", path_base); 6399 package_id = read_sysfs_int(path); 6400 6401 sprintf(path, "%s/domain_id", path_base); 6402 domain_id = read_sysfs_int(path); 6403 6404 sprintf(path, "%s/fabric_cluster_id", path_base); 6405 cluster_id = read_sysfs_int(path); 6406 6407 sprintf(path, "%s/current_freq_khz", path_base); 6408 sprintf(name_buf, "UMHz%d.%d", domain_id, cluster_id); 6409 6410 add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id); 6411 6412 if (quiet) 6413 continue; 6414 6415 sprintf(path, "%s/min_freq_khz", path_base); 6416 k = read_sysfs_int(path); 6417 sprintf(path, "%s/max_freq_khz", path_base); 6418 l = read_sysfs_int(path); 6419 fprintf(outf, "Uncore Frequency package%d domain%d cluster%d: %d - %d MHz ", package_id, domain_id, 6420 cluster_id, k / 1000, l / 1000); 6421 6422 sprintf(path, "%s/initial_min_freq_khz", path_base); 6423 k = read_sysfs_int(path); 6424 sprintf(path, "%s/initial_max_freq_khz", path_base); 6425 l = read_sysfs_int(path); 6426 fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000); 6427 6428 sprintf(path, "%s/current_freq_khz", path_base); 6429 k = read_sysfs_int(path); 6430 fprintf(outf, " %d MHz\n", k / 1000); 6431 } 6432 } 6433 6434 static void probe_intel_uncore_frequency(void) 6435 { 6436 if (!genuine_intel) 6437 return; 6438 6439 if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00", R_OK) == 0) 6440 probe_intel_uncore_frequency_cluster(); 6441 else 6442 probe_intel_uncore_frequency_legacy(); 6443 } 6444 6445 static void probe_graphics(void) 6446 { 6447 /* Xe graphics sysfs knobs */ 6448 if (!access("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", R_OK)) { 6449 FILE *fp; 6450 char buf[8]; 6451 bool gt0_is_gt; 6452 int idx; 6453 6454 fp = fopen("/sys/class/drm/card0/device/tile0/gt0/gtidle/name", "r"); 6455 if (!fp) 6456 goto next; 6457 6458 if (!fread(buf, sizeof(char), 7, fp)) { 6459 fclose(fp); 6460 goto next; 6461 } 6462 fclose(fp); 6463 6464 if (!strncmp(buf, "gt0-rc", strlen("gt0-rc"))) 6465 gt0_is_gt = true; 6466 else if (!strncmp(buf, "gt0-mc", strlen("gt0-mc"))) 6467 gt0_is_gt = false; 6468 else 6469 goto next; 6470 6471 idx = gt0_is_gt ? GFX_rc6 : SAM_mc6; 6472 gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms"; 6473 6474 idx = gt0_is_gt ? GFX_MHz : SAM_MHz; 6475 if (!access("/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq", R_OK)) 6476 gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq"; 6477 6478 idx = gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz; 6479 if (!access("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq", R_OK)) 6480 gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq"; 6481 6482 idx = gt0_is_gt ? SAM_mc6 : GFX_rc6; 6483 if (!access("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", R_OK)) 6484 gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms"; 6485 6486 idx = gt0_is_gt ? SAM_MHz : GFX_MHz; 6487 if (!access("/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq", R_OK)) 6488 gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq"; 6489 6490 idx = gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz; 6491 if (!access("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq", R_OK)) 6492 gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq"; 6493 6494 goto end; 6495 } 6496 6497 next: 6498 /* New i915 graphics sysfs knobs */ 6499 if (!access("/sys/class/drm/card0/gt/gt0/rc6_residency_ms", R_OK)) { 6500 gfx_info[GFX_rc6].path = "/sys/class/drm/card0/gt/gt0/rc6_residency_ms"; 6501 6502 if (!access("/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz", R_OK)) 6503 gfx_info[GFX_MHz].path = "/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz"; 6504 6505 if (!access("/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz", R_OK)) 6506 gfx_info[GFX_ACTMHz].path = "/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz"; 6507 6508 if (!access("/sys/class/drm/card0/gt/gt1/rc6_residency_ms", R_OK)) 6509 gfx_info[SAM_mc6].path = "/sys/class/drm/card0/gt/gt1/rc6_residency_ms"; 6510 6511 if (!access("/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz", R_OK)) 6512 gfx_info[SAM_MHz].path = "/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz"; 6513 6514 if (!access("/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz", R_OK)) 6515 gfx_info[SAM_ACTMHz].path = "/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz"; 6516 6517 goto end; 6518 } 6519 6520 /* Fall back to traditional i915 graphics sysfs knobs */ 6521 if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK)) 6522 gfx_info[GFX_rc6].path = "/sys/class/drm/card0/power/rc6_residency_ms"; 6523 6524 if (!access("/sys/class/drm/card0/gt_cur_freq_mhz", R_OK)) 6525 gfx_info[GFX_MHz].path = "/sys/class/drm/card0/gt_cur_freq_mhz"; 6526 else if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK)) 6527 gfx_info[GFX_MHz].path = "/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz"; 6528 6529 if (!access("/sys/class/drm/card0/gt_act_freq_mhz", R_OK)) 6530 gfx_info[GFX_ACTMHz].path = "/sys/class/drm/card0/gt_act_freq_mhz"; 6531 else if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK)) 6532 gfx_info[GFX_ACTMHz].path = "/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz"; 6533 6534 end: 6535 if (gfx_info[GFX_rc6].path) 6536 BIC_PRESENT(BIC_GFX_rc6); 6537 if (gfx_info[GFX_MHz].path) 6538 BIC_PRESENT(BIC_GFXMHz); 6539 if (gfx_info[GFX_ACTMHz].path) 6540 BIC_PRESENT(BIC_GFXACTMHz); 6541 if (gfx_info[SAM_mc6].path) 6542 BIC_PRESENT(BIC_SAM_mc6); 6543 if (gfx_info[SAM_MHz].path) 6544 BIC_PRESENT(BIC_SAMMHz); 6545 if (gfx_info[SAM_ACTMHz].path) 6546 BIC_PRESENT(BIC_SAMACTMHz); 6547 } 6548 6549 static void dump_sysfs_cstate_config(void) 6550 { 6551 char path[64]; 6552 char name_buf[16]; 6553 char desc[64]; 6554 FILE *input; 6555 int state; 6556 char *sp; 6557 6558 if (access("/sys/devices/system/cpu/cpuidle", R_OK)) { 6559 fprintf(outf, "cpuidle not loaded\n"); 6560 return; 6561 } 6562 6563 dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_driver"); 6564 dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor"); 6565 dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor_ro"); 6566 6567 for (state = 0; state < 10; ++state) { 6568 6569 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); 6570 input = fopen(path, "r"); 6571 if (input == NULL) 6572 continue; 6573 if (!fgets(name_buf, sizeof(name_buf), input)) 6574 err(1, "%s: failed to read file", path); 6575 6576 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ 6577 sp = strchr(name_buf, '-'); 6578 if (!sp) 6579 sp = strchrnul(name_buf, '\n'); 6580 *sp = '\0'; 6581 fclose(input); 6582 6583 remove_underbar(name_buf); 6584 6585 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc", base_cpu, state); 6586 input = fopen(path, "r"); 6587 if (input == NULL) 6588 continue; 6589 if (!fgets(desc, sizeof(desc), input)) 6590 err(1, "%s: failed to read file", path); 6591 6592 fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc); 6593 fclose(input); 6594 } 6595 } 6596 6597 static void dump_sysfs_pstate_config(void) 6598 { 6599 char path[64]; 6600 char driver_buf[64]; 6601 char governor_buf[64]; 6602 FILE *input; 6603 int turbo; 6604 6605 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver", base_cpu); 6606 input = fopen(path, "r"); 6607 if (input == NULL) { 6608 fprintf(outf, "NSFOD %s\n", path); 6609 return; 6610 } 6611 if (!fgets(driver_buf, sizeof(driver_buf), input)) 6612 err(1, "%s: failed to read file", path); 6613 fclose(input); 6614 6615 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", base_cpu); 6616 input = fopen(path, "r"); 6617 if (input == NULL) { 6618 fprintf(outf, "NSFOD %s\n", path); 6619 return; 6620 } 6621 if (!fgets(governor_buf, sizeof(governor_buf), input)) 6622 err(1, "%s: failed to read file", path); 6623 fclose(input); 6624 6625 fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf); 6626 fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf); 6627 6628 sprintf(path, "/sys/devices/system/cpu/cpufreq/boost"); 6629 input = fopen(path, "r"); 6630 if (input != NULL) { 6631 if (fscanf(input, "%d", &turbo) != 1) 6632 err(1, "%s: failed to parse number from file", path); 6633 fprintf(outf, "cpufreq boost: %d\n", turbo); 6634 fclose(input); 6635 } 6636 6637 sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo"); 6638 input = fopen(path, "r"); 6639 if (input != NULL) { 6640 if (fscanf(input, "%d", &turbo) != 1) 6641 err(1, "%s: failed to parse number from file", path); 6642 fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo); 6643 fclose(input); 6644 } 6645 } 6646 6647 /* 6648 * print_epb() 6649 * Decode the ENERGY_PERF_BIAS MSR 6650 */ 6651 int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) 6652 { 6653 char *epb_string; 6654 int cpu, epb; 6655 6656 UNUSED(c); 6657 UNUSED(p); 6658 6659 if (!has_epb) 6660 return 0; 6661 6662 cpu = t->cpu_id; 6663 6664 /* EPB is per-package */ 6665 if (!is_cpu_first_thread_in_package(t, c, p)) 6666 return 0; 6667 6668 if (cpu_migrate(cpu)) { 6669 fprintf(outf, "print_epb: Could not migrate to CPU %d\n", cpu); 6670 return -1; 6671 } 6672 6673 epb = get_epb(cpu); 6674 if (epb < 0) 6675 return 0; 6676 6677 switch (epb) { 6678 case ENERGY_PERF_BIAS_PERFORMANCE: 6679 epb_string = "performance"; 6680 break; 6681 case ENERGY_PERF_BIAS_NORMAL: 6682 epb_string = "balanced"; 6683 break; 6684 case ENERGY_PERF_BIAS_POWERSAVE: 6685 epb_string = "powersave"; 6686 break; 6687 default: 6688 epb_string = "custom"; 6689 break; 6690 } 6691 fprintf(outf, "cpu%d: EPB: %d (%s)\n", cpu, epb, epb_string); 6692 6693 return 0; 6694 } 6695 6696 /* 6697 * print_hwp() 6698 * Decode the MSR_HWP_CAPABILITIES 6699 */ 6700 int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) 6701 { 6702 unsigned long long msr; 6703 int cpu; 6704 6705 UNUSED(c); 6706 UNUSED(p); 6707 6708 if (no_msr) 6709 return 0; 6710 6711 if (!has_hwp) 6712 return 0; 6713 6714 cpu = t->cpu_id; 6715 6716 /* MSR_HWP_CAPABILITIES is per-package */ 6717 if (!is_cpu_first_thread_in_package(t, c, p)) 6718 return 0; 6719 6720 if (cpu_migrate(cpu)) { 6721 fprintf(outf, "print_hwp: Could not migrate to CPU %d\n", cpu); 6722 return -1; 6723 } 6724 6725 if (get_msr(cpu, MSR_PM_ENABLE, &msr)) 6726 return 0; 6727 6728 fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n", cpu, msr, (msr & (1 << 0)) ? "" : "No-"); 6729 6730 /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */ 6731 if ((msr & (1 << 0)) == 0) 6732 return 0; 6733 6734 if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr)) 6735 return 0; 6736 6737 fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx " 6738 "(high %d guar %d eff %d low %d)\n", 6739 cpu, msr, 6740 (unsigned int)HWP_HIGHEST_PERF(msr), 6741 (unsigned int)HWP_GUARANTEED_PERF(msr), 6742 (unsigned int)HWP_MOSTEFFICIENT_PERF(msr), (unsigned int)HWP_LOWEST_PERF(msr)); 6743 6744 if (get_msr(cpu, MSR_HWP_REQUEST, &msr)) 6745 return 0; 6746 6747 fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx " 6748 "(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n", 6749 cpu, msr, 6750 (unsigned int)(((msr) >> 0) & 0xff), 6751 (unsigned int)(((msr) >> 8) & 0xff), 6752 (unsigned int)(((msr) >> 16) & 0xff), 6753 (unsigned int)(((msr) >> 24) & 0xff), 6754 (unsigned int)(((msr) >> 32) & 0xff3), (unsigned int)(((msr) >> 42) & 0x1)); 6755 6756 if (has_hwp_pkg) { 6757 if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr)) 6758 return 0; 6759 6760 fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx " 6761 "(min %d max %d des %d epp 0x%x window 0x%x)\n", 6762 cpu, msr, 6763 (unsigned int)(((msr) >> 0) & 0xff), 6764 (unsigned int)(((msr) >> 8) & 0xff), 6765 (unsigned int)(((msr) >> 16) & 0xff), 6766 (unsigned int)(((msr) >> 24) & 0xff), (unsigned int)(((msr) >> 32) & 0xff3)); 6767 } 6768 if (has_hwp_notify) { 6769 if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr)) 6770 return 0; 6771 6772 fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx " 6773 "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n", 6774 cpu, msr, ((msr) & 0x1) ? "EN" : "Dis", ((msr) & 0x2) ? "EN" : "Dis"); 6775 } 6776 if (get_msr(cpu, MSR_HWP_STATUS, &msr)) 6777 return 0; 6778 6779 fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx " 6780 "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n", 6781 cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x4) ? "" : "No-"); 6782 6783 return 0; 6784 } 6785 6786 /* 6787 * print_perf_limit() 6788 */ 6789 int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p) 6790 { 6791 unsigned long long msr; 6792 int cpu; 6793 6794 UNUSED(c); 6795 UNUSED(p); 6796 6797 if (no_msr) 6798 return 0; 6799 6800 cpu = t->cpu_id; 6801 6802 /* per-package */ 6803 if (!is_cpu_first_thread_in_package(t, c, p)) 6804 return 0; 6805 6806 if (cpu_migrate(cpu)) { 6807 fprintf(outf, "print_perf_limit: Could not migrate to CPU %d\n", cpu); 6808 return -1; 6809 } 6810 6811 if (platform->plr_msrs & PLR_CORE) { 6812 get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr); 6813 fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); 6814 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)", 6815 (msr & 1 << 15) ? "bit15, " : "", 6816 (msr & 1 << 14) ? "bit14, " : "", 6817 (msr & 1 << 13) ? "Transitions, " : "", 6818 (msr & 1 << 12) ? "MultiCoreTurbo, " : "", 6819 (msr & 1 << 11) ? "PkgPwrL2, " : "", 6820 (msr & 1 << 10) ? "PkgPwrL1, " : "", 6821 (msr & 1 << 9) ? "CorePwr, " : "", 6822 (msr & 1 << 8) ? "Amps, " : "", 6823 (msr & 1 << 6) ? "VR-Therm, " : "", 6824 (msr & 1 << 5) ? "Auto-HWP, " : "", 6825 (msr & 1 << 4) ? "Graphics, " : "", 6826 (msr & 1 << 2) ? "bit2, " : "", 6827 (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 0) ? "PROCHOT, " : ""); 6828 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", 6829 (msr & 1 << 31) ? "bit31, " : "", 6830 (msr & 1 << 30) ? "bit30, " : "", 6831 (msr & 1 << 29) ? "Transitions, " : "", 6832 (msr & 1 << 28) ? "MultiCoreTurbo, " : "", 6833 (msr & 1 << 27) ? "PkgPwrL2, " : "", 6834 (msr & 1 << 26) ? "PkgPwrL1, " : "", 6835 (msr & 1 << 25) ? "CorePwr, " : "", 6836 (msr & 1 << 24) ? "Amps, " : "", 6837 (msr & 1 << 22) ? "VR-Therm, " : "", 6838 (msr & 1 << 21) ? "Auto-HWP, " : "", 6839 (msr & 1 << 20) ? "Graphics, " : "", 6840 (msr & 1 << 18) ? "bit18, " : "", 6841 (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : ""); 6842 6843 } 6844 if (platform->plr_msrs & PLR_GFX) { 6845 get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr); 6846 fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); 6847 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)", 6848 (msr & 1 << 0) ? "PROCHOT, " : "", 6849 (msr & 1 << 1) ? "ThermStatus, " : "", 6850 (msr & 1 << 4) ? "Graphics, " : "", 6851 (msr & 1 << 6) ? "VR-Therm, " : "", 6852 (msr & 1 << 8) ? "Amps, " : "", 6853 (msr & 1 << 9) ? "GFXPwr, " : "", 6854 (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); 6855 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n", 6856 (msr & 1 << 16) ? "PROCHOT, " : "", 6857 (msr & 1 << 17) ? "ThermStatus, " : "", 6858 (msr & 1 << 20) ? "Graphics, " : "", 6859 (msr & 1 << 22) ? "VR-Therm, " : "", 6860 (msr & 1 << 24) ? "Amps, " : "", 6861 (msr & 1 << 25) ? "GFXPwr, " : "", 6862 (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); 6863 } 6864 if (platform->plr_msrs & PLR_RING) { 6865 get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr); 6866 fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); 6867 fprintf(outf, " (Active: %s%s%s%s%s%s)", 6868 (msr & 1 << 0) ? "PROCHOT, " : "", 6869 (msr & 1 << 1) ? "ThermStatus, " : "", 6870 (msr & 1 << 6) ? "VR-Therm, " : "", 6871 (msr & 1 << 8) ? "Amps, " : "", 6872 (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); 6873 fprintf(outf, " (Logged: %s%s%s%s%s%s)\n", 6874 (msr & 1 << 16) ? "PROCHOT, " : "", 6875 (msr & 1 << 17) ? "ThermStatus, " : "", 6876 (msr & 1 << 22) ? "VR-Therm, " : "", 6877 (msr & 1 << 24) ? "Amps, " : "", 6878 (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); 6879 } 6880 return 0; 6881 } 6882 6883 #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ 6884 #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ 6885 6886 double get_quirk_tdp(void) 6887 { 6888 if (platform->rapl_quirk_tdp) 6889 return platform->rapl_quirk_tdp; 6890 6891 return 135.0; 6892 } 6893 6894 double get_tdp_intel(void) 6895 { 6896 unsigned long long msr; 6897 6898 if (platform->rapl_msrs & RAPL_PKG_POWER_INFO) 6899 if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr)) 6900 return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; 6901 return get_quirk_tdp(); 6902 } 6903 6904 double get_tdp_amd(void) 6905 { 6906 return get_quirk_tdp(); 6907 } 6908 6909 void rapl_probe_intel(void) 6910 { 6911 unsigned long long msr; 6912 unsigned int time_unit; 6913 double tdp; 6914 const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt | BIC_RAMWatt | BIC_GFXWatt; 6915 const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J | BIC_RAM_J | BIC_GFX_J; 6916 6917 if (rapl_joules) 6918 bic_enabled &= ~bic_watt_bits; 6919 else 6920 bic_enabled &= ~bic_joules_bits; 6921 6922 if (!(platform->rapl_msrs & RAPL_PKG_PERF_STATUS)) 6923 bic_enabled &= ~BIC_PKG__; 6924 if (!(platform->rapl_msrs & RAPL_DRAM_PERF_STATUS)) 6925 bic_enabled &= ~BIC_RAM__; 6926 6927 /* units on package 0, verify later other packages match */ 6928 if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr)) 6929 return; 6930 6931 rapl_power_units = 1.0 / (1 << (msr & 0xF)); 6932 if (platform->has_rapl_divisor) 6933 rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000; 6934 else 6935 rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); 6936 6937 if (platform->has_fixed_rapl_unit) 6938 rapl_dram_energy_units = (15.3 / 1000000); 6939 else 6940 rapl_dram_energy_units = rapl_energy_units; 6941 6942 time_unit = msr >> 16 & 0xF; 6943 if (time_unit == 0) 6944 time_unit = 0xA; 6945 6946 rapl_time_units = 1.0 / (1 << (time_unit)); 6947 6948 tdp = get_tdp_intel(); 6949 6950 rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; 6951 if (!quiet) 6952 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); 6953 } 6954 6955 void rapl_probe_amd(void) 6956 { 6957 unsigned long long msr; 6958 double tdp; 6959 const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt; 6960 const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J; 6961 6962 if (rapl_joules) 6963 bic_enabled &= ~bic_watt_bits; 6964 else 6965 bic_enabled &= ~bic_joules_bits; 6966 6967 if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr)) 6968 return; 6969 6970 rapl_time_units = ldexp(1.0, -(msr >> 16 & 0xf)); 6971 rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f)); 6972 rapl_power_units = ldexp(1.0, -(msr & 0xf)); 6973 6974 tdp = get_tdp_amd(); 6975 6976 rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; 6977 if (!quiet) 6978 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); 6979 } 6980 6981 void print_power_limit_msr(int cpu, unsigned long long msr, char *label) 6982 { 6983 fprintf(outf, "cpu%d: %s: %sabled (%0.3f Watts, %f sec, clamp %sabled)\n", 6984 cpu, label, 6985 ((msr >> 15) & 1) ? "EN" : "DIS", 6986 ((msr >> 0) & 0x7FFF) * rapl_power_units, 6987 (1.0 + (((msr >> 22) & 0x3) / 4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units, 6988 (((msr >> 16) & 1) ? "EN" : "DIS")); 6989 6990 return; 6991 } 6992 6993 int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) 6994 { 6995 unsigned long long msr; 6996 const char *msr_name; 6997 int cpu; 6998 6999 UNUSED(c); 7000 UNUSED(p); 7001 7002 if (!platform->rapl_msrs) 7003 return 0; 7004 7005 /* RAPL counters are per package, so print only for 1st thread/package */ 7006 if (!is_cpu_first_thread_in_package(t, c, p)) 7007 return 0; 7008 7009 cpu = t->cpu_id; 7010 if (cpu_migrate(cpu)) { 7011 fprintf(outf, "print_rapl: Could not migrate to CPU %d\n", cpu); 7012 return -1; 7013 } 7014 7015 if (platform->rapl_msrs & RAPL_AMD_F17H) { 7016 msr_name = "MSR_RAPL_PWR_UNIT"; 7017 if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr)) 7018 return -1; 7019 } else { 7020 msr_name = "MSR_RAPL_POWER_UNIT"; 7021 if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr)) 7022 return -1; 7023 } 7024 7025 fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr, 7026 rapl_power_units, rapl_energy_units, rapl_time_units); 7027 7028 if (platform->rapl_msrs & RAPL_PKG_POWER_INFO) { 7029 7030 if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr)) 7031 return -5; 7032 7033 fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", 7034 cpu, msr, 7035 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, 7036 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, 7037 ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, 7038 ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); 7039 7040 } 7041 if (platform->rapl_msrs & RAPL_PKG) { 7042 7043 if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr)) 7044 return -9; 7045 7046 fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", 7047 cpu, msr, (msr >> 63) & 1 ? "" : "UN"); 7048 7049 print_power_limit_msr(cpu, msr, "PKG Limit #1"); 7050 fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%0.3f Watts, %f* sec, clamp %sabled)\n", 7051 cpu, 7052 ((msr >> 47) & 1) ? "EN" : "DIS", 7053 ((msr >> 32) & 0x7FFF) * rapl_power_units, 7054 (1.0 + (((msr >> 54) & 0x3) / 4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units, 7055 ((msr >> 48) & 1) ? "EN" : "DIS"); 7056 7057 if (get_msr(cpu, MSR_VR_CURRENT_CONFIG, &msr)) 7058 return -9; 7059 7060 fprintf(outf, "cpu%d: MSR_VR_CURRENT_CONFIG: 0x%08llx\n", cpu, msr); 7061 fprintf(outf, "cpu%d: PKG Limit #4: %f Watts (%slocked)\n", 7062 cpu, ((msr >> 0) & 0x1FFF) * rapl_power_units, (msr >> 31) & 1 ? "" : "UN"); 7063 } 7064 7065 if (platform->rapl_msrs & RAPL_DRAM_POWER_INFO) { 7066 if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) 7067 return -6; 7068 7069 fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", 7070 cpu, msr, 7071 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, 7072 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, 7073 ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, 7074 ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); 7075 } 7076 if (platform->rapl_msrs & RAPL_DRAM) { 7077 if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) 7078 return -9; 7079 fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", 7080 cpu, msr, (msr >> 31) & 1 ? "" : "UN"); 7081 7082 print_power_limit_msr(cpu, msr, "DRAM Limit"); 7083 } 7084 if (platform->rapl_msrs & RAPL_CORE_POLICY) { 7085 if (get_msr(cpu, MSR_PP0_POLICY, &msr)) 7086 return -7; 7087 7088 fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); 7089 } 7090 if (platform->rapl_msrs & RAPL_CORE_POWER_LIMIT) { 7091 if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) 7092 return -9; 7093 fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", 7094 cpu, msr, (msr >> 31) & 1 ? "" : "UN"); 7095 print_power_limit_msr(cpu, msr, "Cores Limit"); 7096 } 7097 if (platform->rapl_msrs & RAPL_GFX) { 7098 if (get_msr(cpu, MSR_PP1_POLICY, &msr)) 7099 return -8; 7100 7101 fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF); 7102 7103 if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr)) 7104 return -9; 7105 fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", 7106 cpu, msr, (msr >> 31) & 1 ? "" : "UN"); 7107 print_power_limit_msr(cpu, msr, "GFX Limit"); 7108 } 7109 return 0; 7110 } 7111 7112 /* 7113 * probe_rapl() 7114 * 7115 * sets rapl_power_units, rapl_energy_units, rapl_time_units 7116 */ 7117 void probe_rapl(void) 7118 { 7119 if (!platform->rapl_msrs || no_msr) 7120 return; 7121 7122 if (genuine_intel) 7123 rapl_probe_intel(); 7124 if (authentic_amd || hygon_genuine) 7125 rapl_probe_amd(); 7126 7127 if (quiet) 7128 return; 7129 7130 for_all_cpus(print_rapl, ODD_COUNTERS); 7131 } 7132 7133 /* 7134 * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where 7135 * the Thermal Control Circuit (TCC) activates. 7136 * This is usually equal to tjMax. 7137 * 7138 * Older processors do not have this MSR, so there we guess, 7139 * but also allow cmdline over-ride with -T. 7140 * 7141 * Several MSR temperature values are in units of degrees-C 7142 * below this value, including the Digital Thermal Sensor (DTS), 7143 * Package Thermal Management Sensor (PTM), and thermal event thresholds. 7144 */ 7145 int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p) 7146 { 7147 unsigned long long msr; 7148 unsigned int tcc_default, tcc_offset; 7149 int cpu; 7150 7151 UNUSED(c); 7152 UNUSED(p); 7153 7154 /* tj_max is used only for dts or ptm */ 7155 if (!(do_dts || do_ptm)) 7156 return 0; 7157 7158 /* this is a per-package concept */ 7159 if (!is_cpu_first_thread_in_package(t, c, p)) 7160 return 0; 7161 7162 cpu = t->cpu_id; 7163 if (cpu_migrate(cpu)) { 7164 fprintf(outf, "Could not migrate to CPU %d\n", cpu); 7165 return -1; 7166 } 7167 7168 if (tj_max_override != 0) { 7169 tj_max = tj_max_override; 7170 fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n", cpu, tj_max); 7171 return 0; 7172 } 7173 7174 /* Temperature Target MSR is Nehalem and newer only */ 7175 if (!platform->has_nhm_msrs || no_msr) 7176 goto guess; 7177 7178 if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr)) 7179 goto guess; 7180 7181 tcc_default = (msr >> 16) & 0xFF; 7182 7183 if (!quiet) { 7184 int bits = platform->tcc_offset_bits; 7185 unsigned long long enabled = 0; 7186 7187 if (bits && !get_msr(base_cpu, MSR_PLATFORM_INFO, &enabled)) 7188 enabled = (enabled >> 30) & 1; 7189 7190 if (bits && enabled) { 7191 tcc_offset = (msr >> 24) & GENMASK(bits - 1, 0); 7192 fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n", 7193 cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset); 7194 } else { 7195 fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, tcc_default); 7196 } 7197 } 7198 7199 if (!tcc_default) 7200 goto guess; 7201 7202 tj_max = tcc_default; 7203 7204 return 0; 7205 7206 guess: 7207 tj_max = TJMAX_DEFAULT; 7208 fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", cpu, tj_max); 7209 7210 return 0; 7211 } 7212 7213 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) 7214 { 7215 unsigned long long msr; 7216 unsigned int dts, dts2; 7217 int cpu; 7218 7219 UNUSED(c); 7220 UNUSED(p); 7221 7222 if (no_msr) 7223 return 0; 7224 7225 if (!(do_dts || do_ptm)) 7226 return 0; 7227 7228 cpu = t->cpu_id; 7229 7230 /* DTS is per-core, no need to print for each thread */ 7231 if (!is_cpu_first_thread_in_core(t, c, p)) 7232 return 0; 7233 7234 if (cpu_migrate(cpu)) { 7235 fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu); 7236 return -1; 7237 } 7238 7239 if (do_ptm && is_cpu_first_core_in_package(t, c, p)) { 7240 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) 7241 return 0; 7242 7243 dts = (msr >> 16) & 0x7F; 7244 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts); 7245 7246 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr)) 7247 return 0; 7248 7249 dts = (msr >> 16) & 0x7F; 7250 dts2 = (msr >> 8) & 0x7F; 7251 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", 7252 cpu, msr, tj_max - dts, tj_max - dts2); 7253 } 7254 7255 if (do_dts && debug) { 7256 unsigned int resolution; 7257 7258 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) 7259 return 0; 7260 7261 dts = (msr >> 16) & 0x7F; 7262 resolution = (msr >> 27) & 0xF; 7263 fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", 7264 cpu, msr, tj_max - dts, resolution); 7265 7266 if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr)) 7267 return 0; 7268 7269 dts = (msr >> 16) & 0x7F; 7270 dts2 = (msr >> 8) & 0x7F; 7271 fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", 7272 cpu, msr, tj_max - dts, tj_max - dts2); 7273 } 7274 7275 return 0; 7276 } 7277 7278 void probe_thermal(void) 7279 { 7280 if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK)) 7281 BIC_PRESENT(BIC_CORE_THROT_CNT); 7282 else 7283 BIC_NOT_PRESENT(BIC_CORE_THROT_CNT); 7284 7285 for_all_cpus(set_temperature_target, ODD_COUNTERS); 7286 7287 if (quiet) 7288 return; 7289 7290 for_all_cpus(print_thermal, ODD_COUNTERS); 7291 } 7292 7293 int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p) 7294 { 7295 unsigned int eax, ebx, ecx, edx; 7296 7297 UNUSED(c); 7298 UNUSED(p); 7299 7300 if (!genuine_intel) 7301 return 0; 7302 7303 if (cpu_migrate(t->cpu_id)) { 7304 fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id); 7305 return -1; 7306 } 7307 7308 if (max_level < 0x1a) 7309 return 0; 7310 7311 __cpuid(0x1a, eax, ebx, ecx, edx); 7312 eax = (eax >> 24) & 0xFF; 7313 if (eax == 0x20) 7314 t->is_atom = true; 7315 return 0; 7316 } 7317 7318 void decode_feature_control_msr(void) 7319 { 7320 unsigned long long msr; 7321 7322 if (no_msr) 7323 return; 7324 7325 if (!get_msr(base_cpu, MSR_IA32_FEAT_CTL, &msr)) 7326 fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n", 7327 base_cpu, msr, msr & FEAT_CTL_LOCKED ? "" : "UN-", msr & (1 << 18) ? "SGX" : ""); 7328 } 7329 7330 void decode_misc_enable_msr(void) 7331 { 7332 unsigned long long msr; 7333 7334 if (no_msr) 7335 return; 7336 7337 if (!genuine_intel) 7338 return; 7339 7340 if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr)) 7341 fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n", 7342 base_cpu, msr, 7343 msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-", 7344 msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-", 7345 msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-", 7346 msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "", 7347 msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : ""); 7348 } 7349 7350 void decode_misc_feature_control(void) 7351 { 7352 unsigned long long msr; 7353 7354 if (no_msr) 7355 return; 7356 7357 if (!platform->has_msr_misc_feature_control) 7358 return; 7359 7360 if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr)) 7361 fprintf(outf, 7362 "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n", 7363 base_cpu, msr, msr & (0 << 0) ? "No-" : "", msr & (1 << 0) ? "No-" : "", 7364 msr & (2 << 0) ? "No-" : "", msr & (3 << 0) ? "No-" : ""); 7365 } 7366 7367 /* 7368 * Decode MSR_MISC_PWR_MGMT 7369 * 7370 * Decode the bits according to the Nehalem documentation 7371 * bit[0] seems to continue to have same meaning going forward 7372 * bit[1] less so... 7373 */ 7374 void decode_misc_pwr_mgmt_msr(void) 7375 { 7376 unsigned long long msr; 7377 7378 if (no_msr) 7379 return; 7380 7381 if (!platform->has_msr_misc_pwr_mgmt) 7382 return; 7383 7384 if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr)) 7385 fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n", 7386 base_cpu, msr, 7387 msr & (1 << 0) ? "DIS" : "EN", msr & (1 << 1) ? "EN" : "DIS", msr & (1 << 8) ? "EN" : "DIS"); 7388 } 7389 7390 /* 7391 * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG 7392 * 7393 * This MSRs are present on Silvermont processors, 7394 * Intel Atom processor E3000 series (Baytrail), and friends. 7395 */ 7396 void decode_c6_demotion_policy_msr(void) 7397 { 7398 unsigned long long msr; 7399 7400 if (no_msr) 7401 return; 7402 7403 if (!platform->has_msr_c6_demotion_policy_config) 7404 return; 7405 7406 if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr)) 7407 fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n", 7408 base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS"); 7409 7410 if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr)) 7411 fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n", 7412 base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS"); 7413 } 7414 7415 void print_dev_latency(void) 7416 { 7417 char *path = "/dev/cpu_dma_latency"; 7418 int fd; 7419 int value; 7420 int retval; 7421 7422 fd = open(path, O_RDONLY); 7423 if (fd < 0) { 7424 if (debug) 7425 warnx("Read %s failed", path); 7426 return; 7427 } 7428 7429 retval = read(fd, (void *)&value, sizeof(int)); 7430 if (retval != sizeof(int)) { 7431 warn("read failed %s", path); 7432 close(fd); 7433 return; 7434 } 7435 fprintf(outf, "/dev/cpu_dma_latency: %d usec (%s)\n", value, value == 2000000000 ? "default" : "constrained"); 7436 7437 close(fd); 7438 } 7439 7440 static int has_instr_count_access(void) 7441 { 7442 int fd; 7443 int has_access; 7444 7445 if (no_perf) 7446 return 0; 7447 7448 fd = open_perf_counter(base_cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0); 7449 has_access = fd != -1; 7450 7451 if (fd != -1) 7452 close(fd); 7453 7454 if (!has_access) 7455 warnx("Failed to access %s. Some of the counters may not be available\n" 7456 "\tRun as root to enable them or use %s to disable the access explicitly", 7457 "instructions retired perf counter", "--no-perf"); 7458 7459 return has_access; 7460 } 7461 7462 int add_rapl_perf_counter_(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai, 7463 double *scale_, enum rapl_unit *unit_) 7464 { 7465 if (no_perf) 7466 return -1; 7467 7468 const double scale = read_perf_scale(cai->perf_subsys, cai->perf_name); 7469 7470 if (scale == 0.0) 7471 return -1; 7472 7473 const enum rapl_unit unit = read_perf_rapl_unit(cai->perf_subsys, cai->perf_name); 7474 7475 if (unit == RAPL_UNIT_INVALID) 7476 return -1; 7477 7478 const unsigned int rapl_type = read_perf_type(cai->perf_subsys); 7479 const unsigned int rapl_energy_pkg_config = read_perf_config(cai->perf_subsys, cai->perf_name); 7480 7481 const int fd_counter = 7482 open_perf_counter(cpu, rapl_type, rapl_energy_pkg_config, rci->fd_perf, PERF_FORMAT_GROUP); 7483 if (fd_counter == -1) 7484 return -1; 7485 7486 /* If it's the first counter opened, make it a group descriptor */ 7487 if (rci->fd_perf == -1) 7488 rci->fd_perf = fd_counter; 7489 7490 *scale_ = scale; 7491 *unit_ = unit; 7492 return fd_counter; 7493 } 7494 7495 int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai, 7496 double *scale, enum rapl_unit *unit) 7497 { 7498 int ret = add_rapl_perf_counter_(cpu, rci, cai, scale, unit); 7499 7500 if (debug >= 2) 7501 fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu); 7502 7503 return ret; 7504 } 7505 7506 /* 7507 * Linux-perf manages the HW instructions-retired counter 7508 * by enabling when requested, and hiding rollover 7509 */ 7510 void linux_perf_init(void) 7511 { 7512 if (access("/proc/sys/kernel/perf_event_paranoid", F_OK)) 7513 return; 7514 7515 if (BIC_IS_ENABLED(BIC_IPC) && has_aperf) { 7516 fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int)); 7517 if (fd_instr_count_percpu == NULL) 7518 err(-1, "calloc fd_instr_count_percpu"); 7519 } 7520 } 7521 7522 void rapl_perf_init(void) 7523 { 7524 const unsigned int num_domains = (platform->has_per_core_rapl ? topo.max_core_id : topo.max_package_id) + 1; 7525 bool *domain_visited = calloc(num_domains, sizeof(bool)); 7526 7527 rapl_counter_info_perdomain = calloc(num_domains, sizeof(*rapl_counter_info_perdomain)); 7528 if (rapl_counter_info_perdomain == NULL) 7529 err(-1, "calloc rapl_counter_info_percpu"); 7530 rapl_counter_info_perdomain_size = num_domains; 7531 7532 /* 7533 * Initialize rapl_counter_info_percpu 7534 */ 7535 for (unsigned int domain_id = 0; domain_id < num_domains; ++domain_id) { 7536 struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[domain_id]; 7537 7538 rci->fd_perf = -1; 7539 for (size_t i = 0; i < NUM_RAPL_COUNTERS; ++i) { 7540 rci->data[i] = 0; 7541 rci->source[i] = COUNTER_SOURCE_NONE; 7542 } 7543 } 7544 7545 /* 7546 * Open/probe the counters 7547 * If can't get it via perf, fallback to MSR 7548 */ 7549 for (size_t i = 0; i < ARRAY_SIZE(rapl_counter_arch_infos); ++i) { 7550 7551 const struct rapl_counter_arch_info *const cai = &rapl_counter_arch_infos[i]; 7552 bool has_counter = 0; 7553 double scale; 7554 enum rapl_unit unit; 7555 unsigned int next_domain; 7556 7557 memset(domain_visited, 0, num_domains * sizeof(*domain_visited)); 7558 7559 for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) { 7560 7561 if (cpu_is_not_allowed(cpu)) 7562 continue; 7563 7564 /* Skip already seen and handled RAPL domains */ 7565 next_domain = 7566 platform->has_per_core_rapl ? cpus[cpu].physical_core_id : cpus[cpu].physical_package_id; 7567 7568 assert(next_domain < num_domains); 7569 7570 if (domain_visited[next_domain]) 7571 continue; 7572 7573 domain_visited[next_domain] = 1; 7574 7575 struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[next_domain]; 7576 7577 /* Check if the counter is enabled and accessible */ 7578 if (BIC_IS_ENABLED(cai->bic) && (platform->rapl_msrs & cai->feature_mask)) { 7579 7580 /* Use perf API for this counter */ 7581 if (!no_perf && cai->perf_name 7582 && add_rapl_perf_counter(cpu, rci, cai, &scale, &unit) != -1) { 7583 rci->source[cai->rci_index] = COUNTER_SOURCE_PERF; 7584 rci->scale[cai->rci_index] = scale * cai->compat_scale; 7585 rci->unit[cai->rci_index] = unit; 7586 rci->flags[cai->rci_index] = cai->flags; 7587 7588 /* Use MSR for this counter */ 7589 } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) { 7590 rci->source[cai->rci_index] = COUNTER_SOURCE_MSR; 7591 rci->msr[cai->rci_index] = cai->msr; 7592 rci->msr_mask[cai->rci_index] = cai->msr_mask; 7593 rci->msr_shift[cai->rci_index] = cai->msr_shift; 7594 rci->unit[cai->rci_index] = RAPL_UNIT_JOULES; 7595 rci->scale[cai->rci_index] = *cai->platform_rapl_msr_scale * cai->compat_scale; 7596 rci->flags[cai->rci_index] = cai->flags; 7597 } 7598 } 7599 7600 if (rci->source[cai->rci_index] != COUNTER_SOURCE_NONE) 7601 has_counter = 1; 7602 } 7603 7604 /* If any CPU has access to the counter, make it present */ 7605 if (has_counter) 7606 BIC_PRESENT(cai->bic); 7607 } 7608 7609 free(domain_visited); 7610 } 7611 7612 /* Assumes msr_counter_info is populated */ 7613 static int has_amperf_access(void) 7614 { 7615 return msr_counter_arch_infos[MSR_ARCH_INFO_APERF_INDEX].present && 7616 msr_counter_arch_infos[MSR_ARCH_INFO_MPERF_INDEX].present; 7617 } 7618 7619 int *get_cstate_perf_group_fd(struct cstate_counter_info_t *cci, const char *group_name) 7620 { 7621 if (strcmp(group_name, "cstate_core") == 0) 7622 return &cci->fd_perf_core; 7623 7624 if (strcmp(group_name, "cstate_pkg") == 0) 7625 return &cci->fd_perf_pkg; 7626 7627 return NULL; 7628 } 7629 7630 int add_cstate_perf_counter_(int cpu, struct cstate_counter_info_t *cci, const struct cstate_counter_arch_info *cai) 7631 { 7632 if (no_perf) 7633 return -1; 7634 7635 int *pfd_group = get_cstate_perf_group_fd(cci, cai->perf_subsys); 7636 7637 if (pfd_group == NULL) 7638 return -1; 7639 7640 const unsigned int type = read_perf_type(cai->perf_subsys); 7641 const unsigned int config = read_perf_config(cai->perf_subsys, cai->perf_name); 7642 7643 const int fd_counter = open_perf_counter(cpu, type, config, *pfd_group, PERF_FORMAT_GROUP); 7644 7645 if (fd_counter == -1) 7646 return -1; 7647 7648 /* If it's the first counter opened, make it a group descriptor */ 7649 if (*pfd_group == -1) 7650 *pfd_group = fd_counter; 7651 7652 return fd_counter; 7653 } 7654 7655 int add_cstate_perf_counter(int cpu, struct cstate_counter_info_t *cci, const struct cstate_counter_arch_info *cai) 7656 { 7657 int ret = add_cstate_perf_counter_(cpu, cci, cai); 7658 7659 if (debug >= 2) 7660 fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu); 7661 7662 return ret; 7663 } 7664 7665 int add_msr_perf_counter_(int cpu, struct msr_counter_info_t *cci, const struct msr_counter_arch_info *cai) 7666 { 7667 if (no_perf) 7668 return -1; 7669 7670 const unsigned int type = read_perf_type(cai->perf_subsys); 7671 const unsigned int config = read_perf_config(cai->perf_subsys, cai->perf_name); 7672 7673 const int fd_counter = open_perf_counter(cpu, type, config, cci->fd_perf, PERF_FORMAT_GROUP); 7674 7675 if (fd_counter == -1) 7676 return -1; 7677 7678 /* If it's the first counter opened, make it a group descriptor */ 7679 if (cci->fd_perf == -1) 7680 cci->fd_perf = fd_counter; 7681 7682 return fd_counter; 7683 } 7684 7685 int add_msr_perf_counter(int cpu, struct msr_counter_info_t *cci, const struct msr_counter_arch_info *cai) 7686 { 7687 int ret = add_msr_perf_counter_(cpu, cci, cai); 7688 7689 if (debug) 7690 fprintf(stderr, "%s: %s/%s: %d (cpu: %d)\n", __func__, cai->perf_subsys, cai->perf_name, ret, cpu); 7691 7692 return ret; 7693 } 7694 7695 void msr_perf_init_(void) 7696 { 7697 const int mci_num = topo.max_cpu_num + 1; 7698 7699 msr_counter_info = calloc(mci_num, sizeof(*msr_counter_info)); 7700 if (!msr_counter_info) 7701 err(1, "calloc msr_counter_info"); 7702 msr_counter_info_size = mci_num; 7703 7704 for (int cpu = 0; cpu < mci_num; ++cpu) 7705 msr_counter_info[cpu].fd_perf = -1; 7706 7707 for (int cidx = 0; cidx < NUM_MSR_COUNTERS; ++cidx) { 7708 7709 struct msr_counter_arch_info *cai = &msr_counter_arch_infos[cidx]; 7710 7711 cai->present = false; 7712 7713 for (int cpu = 0; cpu < mci_num; ++cpu) { 7714 7715 struct msr_counter_info_t *const cci = &msr_counter_info[cpu]; 7716 7717 if (cpu_is_not_allowed(cpu)) 7718 continue; 7719 7720 if (cai->needed) { 7721 /* Use perf API for this counter */ 7722 if (!no_perf && cai->perf_name && add_msr_perf_counter(cpu, cci, cai) != -1) { 7723 cci->source[cai->rci_index] = COUNTER_SOURCE_PERF; 7724 cai->present = true; 7725 7726 /* User MSR for this counter */ 7727 } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) { 7728 cci->source[cai->rci_index] = COUNTER_SOURCE_MSR; 7729 cci->msr[cai->rci_index] = cai->msr; 7730 cci->msr_mask[cai->rci_index] = cai->msr_mask; 7731 cai->present = true; 7732 } 7733 } 7734 } 7735 } 7736 } 7737 7738 /* Initialize data for reading perf counters from the MSR group. */ 7739 void msr_perf_init(void) 7740 { 7741 bool need_amperf = false, need_smi = false; 7742 const bool need_soft_c1 = (!platform->has_msr_core_c1_res) && (platform->supported_cstates & CC1); 7743 7744 need_amperf = BIC_IS_ENABLED(BIC_Avg_MHz) || BIC_IS_ENABLED(BIC_Busy) || BIC_IS_ENABLED(BIC_Bzy_MHz) 7745 || BIC_IS_ENABLED(BIC_IPC) || need_soft_c1; 7746 7747 if (BIC_IS_ENABLED(BIC_SMI)) 7748 need_smi = true; 7749 7750 /* Enable needed counters */ 7751 msr_counter_arch_infos[MSR_ARCH_INFO_APERF_INDEX].needed = need_amperf; 7752 msr_counter_arch_infos[MSR_ARCH_INFO_MPERF_INDEX].needed = need_amperf; 7753 msr_counter_arch_infos[MSR_ARCH_INFO_SMI_INDEX].needed = need_smi; 7754 7755 msr_perf_init_(); 7756 7757 const bool has_amperf = has_amperf_access(); 7758 const bool has_smi = msr_counter_arch_infos[MSR_ARCH_INFO_SMI_INDEX].present; 7759 7760 has_aperf_access = has_amperf; 7761 7762 if (has_amperf) { 7763 BIC_PRESENT(BIC_Avg_MHz); 7764 BIC_PRESENT(BIC_Busy); 7765 BIC_PRESENT(BIC_Bzy_MHz); 7766 BIC_PRESENT(BIC_SMI); 7767 } 7768 7769 if (has_smi) 7770 BIC_PRESENT(BIC_SMI); 7771 } 7772 7773 void cstate_perf_init_(bool soft_c1) 7774 { 7775 bool has_counter; 7776 bool *cores_visited = NULL, *pkg_visited = NULL; 7777 const int cores_visited_elems = topo.max_core_id + 1; 7778 const int pkg_visited_elems = topo.max_package_id + 1; 7779 const int cci_num = topo.max_cpu_num + 1; 7780 7781 ccstate_counter_info = calloc(cci_num, sizeof(*ccstate_counter_info)); 7782 if (!ccstate_counter_info) 7783 err(1, "calloc ccstate_counter_arch_info"); 7784 ccstate_counter_info_size = cci_num; 7785 7786 cores_visited = calloc(cores_visited_elems, sizeof(*cores_visited)); 7787 if (!cores_visited) 7788 err(1, "calloc cores_visited"); 7789 7790 pkg_visited = calloc(pkg_visited_elems, sizeof(*pkg_visited)); 7791 if (!pkg_visited) 7792 err(1, "calloc pkg_visited"); 7793 7794 /* Initialize cstate_counter_info_percpu */ 7795 for (int cpu = 0; cpu < cci_num; ++cpu) { 7796 ccstate_counter_info[cpu].fd_perf_core = -1; 7797 ccstate_counter_info[cpu].fd_perf_pkg = -1; 7798 } 7799 7800 for (int cidx = 0; cidx < NUM_CSTATE_COUNTERS; ++cidx) { 7801 has_counter = false; 7802 memset(cores_visited, 0, cores_visited_elems * sizeof(*cores_visited)); 7803 memset(pkg_visited, 0, pkg_visited_elems * sizeof(*pkg_visited)); 7804 7805 const struct cstate_counter_arch_info *cai = &ccstate_counter_arch_infos[cidx]; 7806 7807 for (int cpu = 0; cpu < cci_num; ++cpu) { 7808 7809 struct cstate_counter_info_t *const cci = &ccstate_counter_info[cpu]; 7810 7811 if (cpu_is_not_allowed(cpu)) 7812 continue; 7813 7814 const int core_id = cpus[cpu].physical_core_id; 7815 const int pkg_id = cpus[cpu].physical_package_id; 7816 7817 assert(core_id < cores_visited_elems); 7818 assert(pkg_id < pkg_visited_elems); 7819 7820 const bool per_thread = cai->flags & CSTATE_COUNTER_FLAG_COLLECT_PER_THREAD; 7821 const bool per_core = cai->flags & CSTATE_COUNTER_FLAG_COLLECT_PER_CORE; 7822 7823 if (!per_thread && cores_visited[core_id]) 7824 continue; 7825 7826 if (!per_core && pkg_visited[pkg_id]) 7827 continue; 7828 7829 const bool counter_needed = BIC_IS_ENABLED(cai->bic) || 7830 (soft_c1 && (cai->flags & CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY)); 7831 const bool counter_supported = (platform->supported_cstates & cai->feature_mask); 7832 7833 if (counter_needed && counter_supported) { 7834 /* Use perf API for this counter */ 7835 if (!no_perf && cai->perf_name && add_cstate_perf_counter(cpu, cci, cai) != -1) { 7836 7837 cci->source[cai->rci_index] = COUNTER_SOURCE_PERF; 7838 7839 /* User MSR for this counter */ 7840 } else if (!no_msr && cai->msr && pkg_cstate_limit >= cai->pkg_cstate_limit 7841 && probe_msr(cpu, cai->msr) == 0) { 7842 cci->source[cai->rci_index] = COUNTER_SOURCE_MSR; 7843 cci->msr[cai->rci_index] = cai->msr; 7844 } 7845 } 7846 7847 if (cci->source[cai->rci_index] != COUNTER_SOURCE_NONE) { 7848 has_counter = true; 7849 cores_visited[core_id] = true; 7850 pkg_visited[pkg_id] = true; 7851 } 7852 } 7853 7854 /* If any CPU has access to the counter, make it present */ 7855 if (has_counter) 7856 BIC_PRESENT(cai->bic); 7857 } 7858 7859 free(cores_visited); 7860 free(pkg_visited); 7861 } 7862 7863 void cstate_perf_init(void) 7864 { 7865 /* 7866 * If we don't have a C1 residency MSR, we calculate it "in software", 7867 * but we need APERF, MPERF too. 7868 */ 7869 const bool soft_c1 = !platform->has_msr_core_c1_res && has_amperf_access() 7870 && platform->supported_cstates & CC1; 7871 7872 if (soft_c1) 7873 BIC_PRESENT(BIC_CPU_c1); 7874 7875 cstate_perf_init_(soft_c1); 7876 } 7877 7878 void probe_cstates(void) 7879 { 7880 probe_cst_limit(); 7881 7882 if (platform->has_msr_module_c6_res_ms) 7883 BIC_PRESENT(BIC_Mod_c6); 7884 7885 if (platform->has_ext_cst_msrs && !no_msr) { 7886 BIC_PRESENT(BIC_Totl_c0); 7887 BIC_PRESENT(BIC_Any_c0); 7888 BIC_PRESENT(BIC_GFX_c0); 7889 BIC_PRESENT(BIC_CPUGFX); 7890 } 7891 7892 if (quiet) 7893 return; 7894 7895 dump_power_ctl(); 7896 dump_cst_cfg(); 7897 decode_c6_demotion_policy_msr(); 7898 print_dev_latency(); 7899 dump_sysfs_cstate_config(); 7900 print_irtl(); 7901 } 7902 7903 void probe_lpi(void) 7904 { 7905 if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK)) 7906 BIC_PRESENT(BIC_CPU_LPI); 7907 else 7908 BIC_NOT_PRESENT(BIC_CPU_LPI); 7909 7910 if (!access(sys_lpi_file_sysfs, R_OK)) { 7911 sys_lpi_file = sys_lpi_file_sysfs; 7912 BIC_PRESENT(BIC_SYS_LPI); 7913 } else if (!access(sys_lpi_file_debugfs, R_OK)) { 7914 sys_lpi_file = sys_lpi_file_debugfs; 7915 BIC_PRESENT(BIC_SYS_LPI); 7916 } else { 7917 sys_lpi_file_sysfs = NULL; 7918 BIC_NOT_PRESENT(BIC_SYS_LPI); 7919 } 7920 7921 } 7922 7923 void probe_pstates(void) 7924 { 7925 probe_bclk(); 7926 7927 if (quiet) 7928 return; 7929 7930 dump_platform_info(); 7931 dump_turbo_ratio_info(); 7932 dump_sysfs_pstate_config(); 7933 decode_misc_pwr_mgmt_msr(); 7934 7935 for_all_cpus(print_hwp, ODD_COUNTERS); 7936 for_all_cpus(print_epb, ODD_COUNTERS); 7937 for_all_cpus(print_perf_limit, ODD_COUNTERS); 7938 } 7939 7940 void process_cpuid() 7941 { 7942 unsigned int eax, ebx, ecx, edx; 7943 unsigned int fms, family, model, stepping, ecx_flags, edx_flags; 7944 unsigned long long ucode_patch = 0; 7945 bool ucode_patch_valid = false; 7946 7947 eax = ebx = ecx = edx = 0; 7948 7949 __cpuid(0, max_level, ebx, ecx, edx); 7950 7951 if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) 7952 genuine_intel = 1; 7953 else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) 7954 authentic_amd = 1; 7955 else if (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e) 7956 hygon_genuine = 1; 7957 7958 if (!quiet) 7959 fprintf(outf, "CPUID(0): %.4s%.4s%.4s 0x%x CPUID levels\n", 7960 (char *)&ebx, (char *)&edx, (char *)&ecx, max_level); 7961 7962 __cpuid(1, fms, ebx, ecx, edx); 7963 family = (fms >> 8) & 0xf; 7964 model = (fms >> 4) & 0xf; 7965 stepping = fms & 0xf; 7966 if (family == 0xf) 7967 family += (fms >> 20) & 0xff; 7968 if (family >= 6) 7969 model += ((fms >> 16) & 0xf) << 4; 7970 ecx_flags = ecx; 7971 edx_flags = edx; 7972 7973 if (!no_msr) { 7974 if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch)) 7975 warnx("get_msr(UCODE)"); 7976 else 7977 ucode_patch_valid = true; 7978 } 7979 7980 /* 7981 * check max extended function levels of CPUID. 7982 * This is needed to check for invariant TSC. 7983 * This check is valid for both Intel and AMD. 7984 */ 7985 ebx = ecx = edx = 0; 7986 __cpuid(0x80000000, max_extended_level, ebx, ecx, edx); 7987 7988 if (!quiet) { 7989 fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d)", 7990 family, model, stepping, family, model, stepping); 7991 if (ucode_patch_valid) 7992 fprintf(outf, " microcode 0x%x", (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF)); 7993 fputc('\n', outf); 7994 7995 fprintf(outf, "CPUID(0x80000000): max_extended_levels: 0x%x\n", max_extended_level); 7996 fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n", 7997 ecx_flags & (1 << 0) ? "SSE3" : "-", 7998 ecx_flags & (1 << 3) ? "MONITOR" : "-", 7999 ecx_flags & (1 << 6) ? "SMX" : "-", 8000 ecx_flags & (1 << 7) ? "EIST" : "-", 8001 ecx_flags & (1 << 8) ? "TM2" : "-", 8002 edx_flags & (1 << 4) ? "TSC" : "-", 8003 edx_flags & (1 << 5) ? "MSR" : "-", 8004 edx_flags & (1 << 22) ? "ACPI-TM" : "-", 8005 edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-"); 8006 } 8007 8008 probe_platform_features(family, model); 8009 8010 if (!(edx_flags & (1 << 5))) 8011 errx(1, "CPUID: no MSR"); 8012 8013 if (max_extended_level >= 0x80000007) { 8014 8015 /* 8016 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 8017 * this check is valid for both Intel and AMD 8018 */ 8019 __cpuid(0x80000007, eax, ebx, ecx, edx); 8020 has_invariant_tsc = edx & (1 << 8); 8021 } 8022 8023 /* 8024 * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0 8025 * this check is valid for both Intel and AMD 8026 */ 8027 8028 __cpuid(0x6, eax, ebx, ecx, edx); 8029 has_aperf = ecx & (1 << 0); 8030 do_dts = eax & (1 << 0); 8031 if (do_dts) 8032 BIC_PRESENT(BIC_CoreTmp); 8033 has_turbo = eax & (1 << 1); 8034 do_ptm = eax & (1 << 6); 8035 if (do_ptm) 8036 BIC_PRESENT(BIC_PkgTmp); 8037 has_hwp = eax & (1 << 7); 8038 has_hwp_notify = eax & (1 << 8); 8039 has_hwp_activity_window = eax & (1 << 9); 8040 has_hwp_epp = eax & (1 << 10); 8041 has_hwp_pkg = eax & (1 << 11); 8042 has_epb = ecx & (1 << 3); 8043 8044 if (!quiet) 8045 fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, " 8046 "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n", 8047 has_aperf ? "" : "No-", 8048 has_turbo ? "" : "No-", 8049 do_dts ? "" : "No-", 8050 do_ptm ? "" : "No-", 8051 has_hwp ? "" : "No-", 8052 has_hwp_notify ? "" : "No-", 8053 has_hwp_activity_window ? "" : "No-", 8054 has_hwp_epp ? "" : "No-", has_hwp_pkg ? "" : "No-", has_epb ? "" : "No-"); 8055 8056 if (!quiet) 8057 decode_misc_enable_msr(); 8058 8059 if (max_level >= 0x7 && !quiet) { 8060 int has_sgx; 8061 8062 ecx = 0; 8063 8064 __cpuid_count(0x7, 0, eax, ebx, ecx, edx); 8065 8066 has_sgx = ebx & (1 << 2); 8067 8068 is_hybrid = edx & (1 << 15); 8069 8070 fprintf(outf, "CPUID(7): %sSGX %sHybrid\n", has_sgx ? "" : "No-", is_hybrid ? "" : "No-"); 8071 8072 if (has_sgx) 8073 decode_feature_control_msr(); 8074 } 8075 8076 if (max_level >= 0x15) { 8077 unsigned int eax_crystal; 8078 unsigned int ebx_tsc; 8079 8080 /* 8081 * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz 8082 */ 8083 eax_crystal = ebx_tsc = crystal_hz = edx = 0; 8084 __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx); 8085 8086 if (ebx_tsc != 0) { 8087 if (!quiet && (ebx != 0)) 8088 fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", 8089 eax_crystal, ebx_tsc, crystal_hz); 8090 8091 if (crystal_hz == 0) 8092 crystal_hz = platform->crystal_freq; 8093 8094 if (crystal_hz) { 8095 tsc_hz = (unsigned long long)crystal_hz *ebx_tsc / eax_crystal; 8096 if (!quiet) 8097 fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n", 8098 tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal); 8099 } 8100 } 8101 } 8102 if (max_level >= 0x16) { 8103 unsigned int base_mhz, max_mhz, bus_mhz, edx; 8104 8105 /* 8106 * CPUID 16H Base MHz, Max MHz, Bus MHz 8107 */ 8108 base_mhz = max_mhz = bus_mhz = edx = 0; 8109 8110 __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx); 8111 8112 bclk = bus_mhz; 8113 8114 base_hz = base_mhz * 1000000; 8115 has_base_hz = 1; 8116 8117 if (platform->enable_tsc_tweak) 8118 tsc_tweak = base_hz / tsc_hz; 8119 8120 if (!quiet) 8121 fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n", 8122 base_mhz, max_mhz, bus_mhz); 8123 } 8124 8125 if (has_aperf) 8126 aperf_mperf_multiplier = platform->need_perf_multiplier ? 1024 : 1; 8127 8128 BIC_PRESENT(BIC_IRQ); 8129 BIC_PRESENT(BIC_TSC_MHz); 8130 } 8131 8132 static void counter_info_init(void) 8133 { 8134 for (int i = 0; i < NUM_CSTATE_COUNTERS; ++i) { 8135 struct cstate_counter_arch_info *const cai = &ccstate_counter_arch_infos[i]; 8136 8137 if (platform->has_msr_knl_core_c6_residency && cai->msr == MSR_CORE_C6_RESIDENCY) 8138 cai->msr = MSR_KNL_CORE_C6_RESIDENCY; 8139 8140 if (!platform->has_msr_core_c1_res && cai->msr == MSR_CORE_C1_RES) 8141 cai->msr = 0; 8142 8143 if (platform->has_msr_atom_pkg_c6_residency && cai->msr == MSR_PKG_C6_RESIDENCY) 8144 cai->msr = MSR_ATOM_PKG_C6_RESIDENCY; 8145 } 8146 8147 for (int i = 0; i < NUM_MSR_COUNTERS; ++i) { 8148 msr_counter_arch_infos[i].present = false; 8149 msr_counter_arch_infos[i].needed = false; 8150 } 8151 } 8152 8153 void probe_pm_features(void) 8154 { 8155 probe_pstates(); 8156 8157 probe_cstates(); 8158 8159 probe_lpi(); 8160 8161 probe_intel_uncore_frequency(); 8162 8163 probe_graphics(); 8164 8165 probe_rapl(); 8166 8167 probe_thermal(); 8168 8169 if (platform->has_nhm_msrs && !no_msr) 8170 BIC_PRESENT(BIC_SMI); 8171 8172 if (!quiet) 8173 decode_misc_feature_control(); 8174 } 8175 8176 /* 8177 * in /dev/cpu/ return success for names that are numbers 8178 * ie. filter out ".", "..", "microcode". 8179 */ 8180 int dir_filter(const struct dirent *dirp) 8181 { 8182 if (isdigit(dirp->d_name[0])) 8183 return 1; 8184 else 8185 return 0; 8186 } 8187 8188 void topology_probe(bool startup) 8189 { 8190 int i; 8191 int max_core_id = 0; 8192 int max_package_id = 0; 8193 int max_siblings = 0; 8194 8195 /* Initialize num_cpus, max_cpu_num */ 8196 set_max_cpu_num(); 8197 topo.num_cpus = 0; 8198 for_all_proc_cpus(count_cpus); 8199 if (!summary_only && topo.num_cpus > 1) 8200 BIC_PRESENT(BIC_CPU); 8201 8202 if (debug > 1) 8203 fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); 8204 8205 cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology)); 8206 if (cpus == NULL) 8207 err(1, "calloc cpus"); 8208 8209 /* 8210 * Allocate and initialize cpu_present_set 8211 */ 8212 cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1)); 8213 if (cpu_present_set == NULL) 8214 err(3, "CPU_ALLOC"); 8215 cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 8216 CPU_ZERO_S(cpu_present_setsize, cpu_present_set); 8217 for_all_proc_cpus(mark_cpu_present); 8218 8219 /* 8220 * Allocate and initialize cpu_effective_set 8221 */ 8222 cpu_effective_set = CPU_ALLOC((topo.max_cpu_num + 1)); 8223 if (cpu_effective_set == NULL) 8224 err(3, "CPU_ALLOC"); 8225 cpu_effective_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 8226 CPU_ZERO_S(cpu_effective_setsize, cpu_effective_set); 8227 update_effective_set(startup); 8228 8229 /* 8230 * Allocate and initialize cpu_allowed_set 8231 */ 8232 cpu_allowed_set = CPU_ALLOC((topo.max_cpu_num + 1)); 8233 if (cpu_allowed_set == NULL) 8234 err(3, "CPU_ALLOC"); 8235 cpu_allowed_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 8236 CPU_ZERO_S(cpu_allowed_setsize, cpu_allowed_set); 8237 8238 /* 8239 * Validate and update cpu_allowed_set. 8240 * 8241 * Make sure all cpus in cpu_subset are also in cpu_present_set during startup. 8242 * Give a warning when cpus in cpu_subset become unavailable at runtime. 8243 * Give a warning when cpus are not effective because of cgroup setting. 8244 * 8245 * cpu_allowed_set is the intersection of cpu_present_set/cpu_effective_set/cpu_subset. 8246 */ 8247 for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) { 8248 if (cpu_subset && !CPU_ISSET_S(i, cpu_subset_size, cpu_subset)) 8249 continue; 8250 8251 if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set)) { 8252 if (cpu_subset) { 8253 /* cpus in cpu_subset must be in cpu_present_set during startup */ 8254 if (startup) 8255 err(1, "cpu%d not present", i); 8256 else 8257 fprintf(stderr, "cpu%d not present\n", i); 8258 } 8259 continue; 8260 } 8261 8262 if (CPU_COUNT_S(cpu_effective_setsize, cpu_effective_set)) { 8263 if (!CPU_ISSET_S(i, cpu_effective_setsize, cpu_effective_set)) { 8264 fprintf(stderr, "cpu%d not effective\n", i); 8265 continue; 8266 } 8267 } 8268 8269 CPU_SET_S(i, cpu_allowed_setsize, cpu_allowed_set); 8270 } 8271 8272 if (!CPU_COUNT_S(cpu_allowed_setsize, cpu_allowed_set)) 8273 err(-ENODEV, "No valid cpus found"); 8274 sched_setaffinity(0, cpu_allowed_setsize, cpu_allowed_set); 8275 8276 /* 8277 * Allocate and initialize cpu_affinity_set 8278 */ 8279 cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1)); 8280 if (cpu_affinity_set == NULL) 8281 err(3, "CPU_ALLOC"); 8282 cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 8283 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); 8284 8285 for_all_proc_cpus(init_thread_id); 8286 8287 /* 8288 * For online cpus 8289 * find max_core_id, max_package_id 8290 */ 8291 for (i = 0; i <= topo.max_cpu_num; ++i) { 8292 int siblings; 8293 8294 if (cpu_is_not_present(i)) { 8295 if (debug > 1) 8296 fprintf(outf, "cpu%d NOT PRESENT\n", i); 8297 continue; 8298 } 8299 8300 cpus[i].logical_cpu_id = i; 8301 8302 /* get package information */ 8303 cpus[i].physical_package_id = get_physical_package_id(i); 8304 if (cpus[i].physical_package_id > max_package_id) 8305 max_package_id = cpus[i].physical_package_id; 8306 8307 /* get die information */ 8308 cpus[i].die_id = get_die_id(i); 8309 if (cpus[i].die_id > topo.max_die_id) 8310 topo.max_die_id = cpus[i].die_id; 8311 8312 /* get numa node information */ 8313 cpus[i].physical_node_id = get_physical_node_id(&cpus[i]); 8314 if (cpus[i].physical_node_id > topo.max_node_num) 8315 topo.max_node_num = cpus[i].physical_node_id; 8316 8317 /* get core information */ 8318 cpus[i].physical_core_id = get_core_id(i); 8319 if (cpus[i].physical_core_id > max_core_id) 8320 max_core_id = cpus[i].physical_core_id; 8321 8322 /* get thread information */ 8323 siblings = get_thread_siblings(&cpus[i]); 8324 if (siblings > max_siblings) 8325 max_siblings = siblings; 8326 if (cpus[i].thread_id == 0) 8327 topo.num_cores++; 8328 } 8329 topo.max_core_id = max_core_id; 8330 topo.max_package_id = max_package_id; 8331 8332 topo.cores_per_node = max_core_id + 1; 8333 if (debug > 1) 8334 fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", max_core_id, topo.cores_per_node); 8335 if (!summary_only && topo.cores_per_node > 1) 8336 BIC_PRESENT(BIC_Core); 8337 8338 topo.num_die = topo.max_die_id + 1; 8339 if (debug > 1) 8340 fprintf(outf, "max_die_id %d, sizing for %d die\n", topo.max_die_id, topo.num_die); 8341 if (!summary_only && topo.num_die > 1) 8342 BIC_PRESENT(BIC_Die); 8343 8344 topo.num_packages = max_package_id + 1; 8345 if (debug > 1) 8346 fprintf(outf, "max_package_id %d, sizing for %d packages\n", max_package_id, topo.num_packages); 8347 if (!summary_only && topo.num_packages > 1) 8348 BIC_PRESENT(BIC_Package); 8349 8350 set_node_data(); 8351 if (debug > 1) 8352 fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg); 8353 if (!summary_only && topo.nodes_per_pkg > 1) 8354 BIC_PRESENT(BIC_Node); 8355 8356 topo.threads_per_core = max_siblings; 8357 if (debug > 1) 8358 fprintf(outf, "max_siblings %d\n", max_siblings); 8359 8360 if (debug < 1) 8361 return; 8362 8363 for (i = 0; i <= topo.max_cpu_num; ++i) { 8364 if (cpu_is_not_present(i)) 8365 continue; 8366 fprintf(outf, 8367 "cpu %d pkg %d die %d node %d lnode %d core %d thread %d\n", 8368 i, cpus[i].physical_package_id, cpus[i].die_id, 8369 cpus[i].physical_node_id, cpus[i].logical_node_id, cpus[i].physical_core_id, cpus[i].thread_id); 8370 } 8371 8372 } 8373 8374 void allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p) 8375 { 8376 int i; 8377 int num_cores = topo.cores_per_node * topo.nodes_per_pkg * topo.num_packages; 8378 int num_threads = topo.threads_per_core * num_cores; 8379 8380 *t = calloc(num_threads, sizeof(struct thread_data)); 8381 if (*t == NULL) 8382 goto error; 8383 8384 for (i = 0; i < num_threads; i++) 8385 (*t)[i].cpu_id = -1; 8386 8387 *c = calloc(num_cores, sizeof(struct core_data)); 8388 if (*c == NULL) 8389 goto error; 8390 8391 for (i = 0; i < num_cores; i++) { 8392 (*c)[i].core_id = -1; 8393 (*c)[i].base_cpu = -1; 8394 } 8395 8396 *p = calloc(topo.num_packages, sizeof(struct pkg_data)); 8397 if (*p == NULL) 8398 goto error; 8399 8400 for (i = 0; i < topo.num_packages; i++) { 8401 (*p)[i].package_id = i; 8402 (*p)[i].base_cpu = -1; 8403 } 8404 8405 return; 8406 error: 8407 err(1, "calloc counters"); 8408 } 8409 8410 /* 8411 * init_counter() 8412 * 8413 * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE 8414 */ 8415 void init_counter(struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base, int cpu_id) 8416 { 8417 int pkg_id = cpus[cpu_id].physical_package_id; 8418 int node_id = cpus[cpu_id].logical_node_id; 8419 int core_id = cpus[cpu_id].physical_core_id; 8420 int thread_id = cpus[cpu_id].thread_id; 8421 struct thread_data *t; 8422 struct core_data *c; 8423 struct pkg_data *p; 8424 8425 /* Workaround for systems where physical_node_id==-1 8426 * and logical_node_id==(-1 - topo.num_cpus) 8427 */ 8428 if (node_id < 0) 8429 node_id = 0; 8430 8431 t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id); 8432 c = GET_CORE(core_base, core_id, node_id, pkg_id); 8433 p = GET_PKG(pkg_base, pkg_id); 8434 8435 t->cpu_id = cpu_id; 8436 if (!cpu_is_not_allowed(cpu_id)) { 8437 if (c->base_cpu < 0) 8438 c->base_cpu = t->cpu_id; 8439 if (p->base_cpu < 0) 8440 p->base_cpu = t->cpu_id; 8441 } 8442 8443 c->core_id = core_id; 8444 p->package_id = pkg_id; 8445 } 8446 8447 int initialize_counters(int cpu_id) 8448 { 8449 init_counter(EVEN_COUNTERS, cpu_id); 8450 init_counter(ODD_COUNTERS, cpu_id); 8451 return 0; 8452 } 8453 8454 void allocate_output_buffer() 8455 { 8456 output_buffer = calloc(1, (1 + topo.num_cpus) * 2048); 8457 outp = output_buffer; 8458 if (outp == NULL) 8459 err(-1, "calloc output buffer"); 8460 } 8461 8462 void allocate_fd_percpu(void) 8463 { 8464 fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int)); 8465 if (fd_percpu == NULL) 8466 err(-1, "calloc fd_percpu"); 8467 } 8468 8469 void allocate_irq_buffers(void) 8470 { 8471 irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int)); 8472 if (irq_column_2_cpu == NULL) 8473 err(-1, "calloc %d", topo.num_cpus); 8474 8475 irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int)); 8476 if (irqs_per_cpu == NULL) 8477 err(-1, "calloc %d", topo.max_cpu_num + 1); 8478 } 8479 8480 int update_topo(struct thread_data *t, struct core_data *c, struct pkg_data *p) 8481 { 8482 topo.allowed_cpus++; 8483 if ((int)t->cpu_id == c->base_cpu) 8484 topo.allowed_cores++; 8485 if ((int)t->cpu_id == p->base_cpu) 8486 topo.allowed_packages++; 8487 8488 return 0; 8489 } 8490 8491 void topology_update(void) 8492 { 8493 topo.allowed_cpus = 0; 8494 topo.allowed_cores = 0; 8495 topo.allowed_packages = 0; 8496 for_all_cpus(update_topo, ODD_COUNTERS); 8497 } 8498 8499 void setup_all_buffers(bool startup) 8500 { 8501 topology_probe(startup); 8502 allocate_irq_buffers(); 8503 allocate_fd_percpu(); 8504 allocate_counters(&thread_even, &core_even, &package_even); 8505 allocate_counters(&thread_odd, &core_odd, &package_odd); 8506 allocate_output_buffer(); 8507 for_all_proc_cpus(initialize_counters); 8508 topology_update(); 8509 } 8510 8511 void set_base_cpu(void) 8512 { 8513 int i; 8514 8515 for (i = 0; i < topo.max_cpu_num + 1; ++i) { 8516 if (cpu_is_not_allowed(i)) 8517 continue; 8518 base_cpu = i; 8519 if (debug > 1) 8520 fprintf(outf, "base_cpu = %d\n", base_cpu); 8521 return; 8522 } 8523 err(-ENODEV, "No valid cpus found"); 8524 } 8525 8526 bool has_added_counters(void) 8527 { 8528 /* 8529 * It only makes sense to call this after the command line is parsed, 8530 * otherwise sys structure is not populated. 8531 */ 8532 8533 return sys.added_core_counters | sys.added_thread_counters | sys.added_package_counters; 8534 } 8535 8536 void check_msr_access(void) 8537 { 8538 check_dev_msr(); 8539 check_msr_permission(); 8540 8541 if (no_msr) 8542 bic_disable_msr_access(); 8543 } 8544 8545 void check_perf_access(void) 8546 { 8547 if (no_perf || !BIC_IS_ENABLED(BIC_IPC) || !has_instr_count_access()) 8548 bic_enabled &= ~BIC_IPC; 8549 } 8550 8551 int added_perf_counters_init_(struct perf_counter_info *pinfo) 8552 { 8553 size_t num_domains = 0; 8554 unsigned int next_domain; 8555 bool *domain_visited; 8556 unsigned int perf_type, perf_config; 8557 double perf_scale; 8558 int fd_perf; 8559 8560 if (!pinfo) 8561 return 0; 8562 8563 const size_t max_num_domains = MAX(topo.max_cpu_num + 1, MAX(topo.max_core_id + 1, topo.max_package_id + 1)); 8564 8565 domain_visited = calloc(max_num_domains, sizeof(*domain_visited)); 8566 8567 while (pinfo) { 8568 switch (pinfo->scope) { 8569 case SCOPE_CPU: 8570 num_domains = topo.max_cpu_num + 1; 8571 break; 8572 8573 case SCOPE_CORE: 8574 num_domains = topo.max_core_id + 1; 8575 break; 8576 8577 case SCOPE_PACKAGE: 8578 num_domains = topo.max_package_id + 1; 8579 break; 8580 } 8581 8582 /* Allocate buffer for file descriptor for each domain. */ 8583 pinfo->fd_perf_per_domain = calloc(num_domains, sizeof(*pinfo->fd_perf_per_domain)); 8584 if (!pinfo->fd_perf_per_domain) 8585 errx(1, "%s: alloc %s", __func__, "fd_perf_per_domain"); 8586 8587 for (size_t i = 0; i < num_domains; ++i) 8588 pinfo->fd_perf_per_domain[i] = -1; 8589 8590 pinfo->num_domains = num_domains; 8591 pinfo->scale = 1.0; 8592 8593 memset(domain_visited, 0, max_num_domains * sizeof(*domain_visited)); 8594 8595 for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) { 8596 8597 next_domain = cpu_to_domain(pinfo, cpu); 8598 8599 assert(next_domain < num_domains); 8600 8601 if (cpu_is_not_allowed(cpu)) 8602 continue; 8603 8604 if (domain_visited[next_domain]) 8605 continue; 8606 8607 perf_type = read_perf_type(pinfo->device); 8608 if (perf_type == (unsigned int)-1) { 8609 warnx("%s: perf/%s/%s: failed to read %s", 8610 __func__, pinfo->device, pinfo->event, "type"); 8611 continue; 8612 } 8613 8614 perf_config = read_perf_config(pinfo->device, pinfo->event); 8615 if (perf_config == (unsigned int)-1) { 8616 warnx("%s: perf/%s/%s: failed to read %s", 8617 __func__, pinfo->device, pinfo->event, "config"); 8618 continue; 8619 } 8620 8621 /* Scale is not required, some counters just don't have it. */ 8622 perf_scale = read_perf_scale(pinfo->device, pinfo->event); 8623 if (perf_scale == 0.0) 8624 perf_scale = 1.0; 8625 8626 fd_perf = open_perf_counter(cpu, perf_type, perf_config, -1, 0); 8627 if (fd_perf == -1) { 8628 warnx("%s: perf/%s/%s: failed to open counter on cpu%d", 8629 __func__, pinfo->device, pinfo->event, cpu); 8630 continue; 8631 } 8632 8633 domain_visited[next_domain] = 1; 8634 pinfo->fd_perf_per_domain[next_domain] = fd_perf; 8635 pinfo->scale = perf_scale; 8636 8637 if (debug) 8638 fprintf(stderr, "Add perf/%s/%s cpu%d: %d\n", 8639 pinfo->device, pinfo->event, cpu, pinfo->fd_perf_per_domain[next_domain]); 8640 } 8641 8642 pinfo = pinfo->next; 8643 } 8644 8645 free(domain_visited); 8646 8647 return 0; 8648 } 8649 8650 void added_perf_counters_init(void) 8651 { 8652 if (added_perf_counters_init_(sys.perf_tp)) 8653 errx(1, "%s: %s", __func__, "thread"); 8654 8655 if (added_perf_counters_init_(sys.perf_cp)) 8656 errx(1, "%s: %s", __func__, "core"); 8657 8658 if (added_perf_counters_init_(sys.perf_pp)) 8659 errx(1, "%s: %s", __func__, "package"); 8660 } 8661 8662 int parse_telem_info_file(int fd_dir, const char *info_filename, const char *format, unsigned long *output) 8663 { 8664 int fd_telem_info; 8665 FILE *file_telem_info; 8666 unsigned long value; 8667 8668 fd_telem_info = openat(fd_dir, info_filename, O_RDONLY); 8669 if (fd_telem_info == -1) 8670 return -1; 8671 8672 file_telem_info = fdopen(fd_telem_info, "r"); 8673 if (file_telem_info == NULL) { 8674 close(fd_telem_info); 8675 return -1; 8676 } 8677 8678 if (fscanf(file_telem_info, format, &value) != 1) { 8679 fclose(file_telem_info); 8680 return -1; 8681 } 8682 8683 fclose(file_telem_info); 8684 8685 *output = value; 8686 8687 return 0; 8688 } 8689 8690 struct pmt_mmio *pmt_mmio_open(unsigned int target_guid) 8691 { 8692 DIR *dirp; 8693 struct dirent *entry; 8694 struct stat st; 8695 unsigned int telem_idx; 8696 int fd_telem_dir, fd_pmt; 8697 unsigned long guid, size, offset; 8698 size_t mmap_size; 8699 void *mmio; 8700 struct pmt_mmio *ret = NULL; 8701 8702 if (stat(SYSFS_TELEM_PATH, &st) == -1) 8703 return NULL; 8704 8705 dirp = opendir(SYSFS_TELEM_PATH); 8706 if (dirp == NULL) 8707 return NULL; 8708 8709 for (;;) { 8710 entry = readdir(dirp); 8711 8712 if (entry == NULL) 8713 break; 8714 8715 if (strcmp(entry->d_name, ".") == 0) 8716 continue; 8717 8718 if (strcmp(entry->d_name, "..") == 0) 8719 continue; 8720 8721 if (sscanf(entry->d_name, "telem%u", &telem_idx) != 1) 8722 continue; 8723 8724 if (fstatat(dirfd(dirp), entry->d_name, &st, 0) == -1) { 8725 break; 8726 } 8727 8728 if (!S_ISDIR(st.st_mode)) 8729 continue; 8730 8731 fd_telem_dir = openat(dirfd(dirp), entry->d_name, O_RDONLY); 8732 if (fd_telem_dir == -1) { 8733 break; 8734 } 8735 8736 if (parse_telem_info_file(fd_telem_dir, "guid", "%lx", &guid)) { 8737 close(fd_telem_dir); 8738 break; 8739 } 8740 8741 if (parse_telem_info_file(fd_telem_dir, "size", "%lu", &size)) { 8742 close(fd_telem_dir); 8743 break; 8744 } 8745 8746 if (guid != target_guid) { 8747 close(fd_telem_dir); 8748 continue; 8749 } 8750 8751 if (parse_telem_info_file(fd_telem_dir, "offset", "%lu", &offset)) { 8752 close(fd_telem_dir); 8753 break; 8754 } 8755 8756 assert(offset == 0); 8757 8758 fd_pmt = openat(fd_telem_dir, "telem", O_RDONLY); 8759 if (fd_pmt == -1) 8760 goto loop_cleanup_and_break; 8761 8762 mmap_size = (size + 0x1000UL) & (~0x1000UL); 8763 mmio = mmap(0, mmap_size, PROT_READ, MAP_SHARED, fd_pmt, 0); 8764 if (mmio != MAP_FAILED) { 8765 8766 if (debug) 8767 fprintf(stderr, "%s: 0x%lx mmaped at: %p\n", __func__, guid, mmio); 8768 8769 ret = calloc(1, sizeof(*ret)); 8770 8771 if (!ret) { 8772 fprintf(stderr, "%s: Failed to allocate pmt_mmio\n", __func__); 8773 exit(1); 8774 } 8775 8776 ret->guid = guid; 8777 ret->mmio_base = mmio; 8778 ret->pmt_offset = offset; 8779 ret->size = size; 8780 8781 ret->next = pmt_mmios; 8782 pmt_mmios = ret; 8783 } 8784 8785 loop_cleanup_and_break: 8786 close(fd_pmt); 8787 close(fd_telem_dir); 8788 break; 8789 } 8790 8791 closedir(dirp); 8792 8793 return ret; 8794 } 8795 8796 struct pmt_mmio *pmt_mmio_find(unsigned int guid) 8797 { 8798 struct pmt_mmio *pmmio = pmt_mmios; 8799 8800 while (pmmio) { 8801 if (pmmio->guid == guid) 8802 return pmmio; 8803 8804 pmmio = pmmio->next; 8805 } 8806 8807 return NULL; 8808 } 8809 8810 void *pmt_get_counter_pointer(struct pmt_mmio *pmmio, unsigned long counter_offset) 8811 { 8812 char *ret; 8813 8814 /* Get base of mmaped PMT file. */ 8815 ret = (char *)pmmio->mmio_base; 8816 8817 /* 8818 * Apply PMT MMIO offset to obtain beginning of the mmaped telemetry data. 8819 * It's not guaranteed that the mmaped memory begins with the telemetry data 8820 * - we might have to apply the offset first. 8821 */ 8822 ret += pmmio->pmt_offset; 8823 8824 /* Apply the counter offset to get the address to the mmaped counter. */ 8825 ret += counter_offset; 8826 8827 return ret; 8828 } 8829 8830 struct pmt_mmio *pmt_add_guid(unsigned int guid) 8831 { 8832 struct pmt_mmio *ret; 8833 8834 ret = pmt_mmio_find(guid); 8835 if (!ret) 8836 ret = pmt_mmio_open(guid); 8837 8838 return ret; 8839 } 8840 8841 enum pmt_open_mode { 8842 PMT_OPEN_TRY, /* Open failure is not an error. */ 8843 PMT_OPEN_REQUIRED, /* Open failure is a fatal error. */ 8844 }; 8845 8846 struct pmt_counter *pmt_find_counter(struct pmt_counter *pcounter, const char *name) 8847 { 8848 while (pcounter) { 8849 if (strcmp(pcounter->name, name) == 0) 8850 break; 8851 8852 pcounter = pcounter->next; 8853 } 8854 8855 return pcounter; 8856 } 8857 8858 struct pmt_counter **pmt_get_scope_root(enum counter_scope scope) 8859 { 8860 switch (scope) { 8861 case SCOPE_CPU: 8862 return &sys.pmt_tp; 8863 case SCOPE_CORE: 8864 return &sys.pmt_cp; 8865 case SCOPE_PACKAGE: 8866 return &sys.pmt_pp; 8867 } 8868 8869 __builtin_unreachable(); 8870 } 8871 8872 void pmt_counter_add_domain(struct pmt_counter *pcounter, unsigned long *pmmio, unsigned int domain_id) 8873 { 8874 /* Make sure the new domain fits. */ 8875 if (domain_id >= pcounter->num_domains) 8876 pmt_counter_resize(pcounter, domain_id + 1); 8877 8878 assert(pcounter->domains); 8879 assert(domain_id < pcounter->num_domains); 8880 8881 pcounter->domains[domain_id].pcounter = pmmio; 8882 } 8883 8884 int pmt_add_counter(unsigned int guid, const char *name, enum pmt_datatype type, 8885 unsigned int lsb, unsigned int msb, unsigned int offset, enum counter_scope scope, 8886 enum counter_format format, unsigned int domain_id, enum pmt_open_mode mode) 8887 { 8888 struct pmt_mmio *mmio; 8889 struct pmt_counter *pcounter; 8890 struct pmt_counter **const pmt_root = pmt_get_scope_root(scope); 8891 bool new_counter = false; 8892 int conflict = 0; 8893 8894 if (lsb > msb) { 8895 fprintf(stderr, "%s: %s: `%s` must be satisfied\n", __func__, "lsb <= msb", name); 8896 exit(1); 8897 } 8898 8899 if (msb >= 64) { 8900 fprintf(stderr, "%s: %s: `%s` must be satisfied\n", __func__, "msb < 64", name); 8901 exit(1); 8902 } 8903 8904 mmio = pmt_add_guid(guid); 8905 if (!mmio) { 8906 if (mode != PMT_OPEN_TRY) { 8907 fprintf(stderr, "%s: failed to map PMT MMIO for guid %x\n", __func__, guid); 8908 exit(1); 8909 } 8910 8911 return 1; 8912 } 8913 8914 if (offset >= mmio->size) { 8915 if (mode != PMT_OPEN_TRY) { 8916 fprintf(stderr, "%s: offset %u outside of PMT MMIO size %u\n", __func__, offset, mmio->size); 8917 exit(1); 8918 } 8919 8920 return 1; 8921 } 8922 8923 pcounter = pmt_find_counter(*pmt_root, name); 8924 if (!pcounter) { 8925 pcounter = calloc(1, sizeof(*pcounter)); 8926 new_counter = true; 8927 } 8928 8929 if (new_counter) { 8930 strncpy(pcounter->name, name, ARRAY_SIZE(pcounter->name) - 1); 8931 pcounter->type = type; 8932 pcounter->scope = scope; 8933 pcounter->lsb = lsb; 8934 pcounter->msb = msb; 8935 pcounter->format = format; 8936 } else { 8937 conflict += pcounter->type != type; 8938 conflict += pcounter->scope != scope; 8939 conflict += pcounter->lsb != lsb; 8940 conflict += pcounter->msb != msb; 8941 conflict += pcounter->format != format; 8942 } 8943 8944 if (conflict) { 8945 fprintf(stderr, "%s: conflicting parameters for the PMT counter with the same name %s\n", 8946 __func__, name); 8947 exit(1); 8948 } 8949 8950 pmt_counter_add_domain(pcounter, pmt_get_counter_pointer(mmio, offset), domain_id); 8951 8952 if (new_counter) { 8953 pcounter->next = *pmt_root; 8954 *pmt_root = pcounter; 8955 } 8956 8957 return 0; 8958 } 8959 8960 void pmt_init(void) 8961 { 8962 if (BIC_IS_ENABLED(BIC_Diec6)) { 8963 pmt_add_counter(PMT_MTL_DC6_GUID, "Die%c6", PMT_TYPE_XTAL_TIME, PMT_COUNTER_MTL_DC6_LSB, 8964 PMT_COUNTER_MTL_DC6_MSB, PMT_COUNTER_MTL_DC6_OFFSET, SCOPE_PACKAGE, FORMAT_DELTA, 8965 0, PMT_OPEN_TRY); 8966 } 8967 } 8968 8969 void turbostat_init() 8970 { 8971 setup_all_buffers(true); 8972 set_base_cpu(); 8973 check_msr_access(); 8974 check_perf_access(); 8975 process_cpuid(); 8976 counter_info_init(); 8977 probe_pm_features(); 8978 msr_perf_init(); 8979 linux_perf_init(); 8980 rapl_perf_init(); 8981 cstate_perf_init(); 8982 added_perf_counters_init(); 8983 pmt_init(); 8984 8985 for_all_cpus(get_cpu_type, ODD_COUNTERS); 8986 for_all_cpus(get_cpu_type, EVEN_COUNTERS); 8987 8988 if (BIC_IS_ENABLED(BIC_IPC) && has_aperf_access && get_instr_count_fd(base_cpu) != -1) 8989 BIC_PRESENT(BIC_IPC); 8990 8991 /* 8992 * If TSC tweak is needed, but couldn't get it, 8993 * disable more BICs, since it can't be reported accurately. 8994 */ 8995 if (platform->enable_tsc_tweak && !has_base_hz) { 8996 bic_enabled &= ~BIC_Busy; 8997 bic_enabled &= ~BIC_Bzy_MHz; 8998 } 8999 } 9000 9001 int fork_it(char **argv) 9002 { 9003 pid_t child_pid; 9004 int status; 9005 9006 snapshot_proc_sysfs_files(); 9007 status = for_all_cpus(get_counters, EVEN_COUNTERS); 9008 first_counter_read = 0; 9009 if (status) 9010 exit(status); 9011 gettimeofday(&tv_even, (struct timezone *)NULL); 9012 9013 child_pid = fork(); 9014 if (!child_pid) { 9015 /* child */ 9016 execvp(argv[0], argv); 9017 err(errno, "exec %s", argv[0]); 9018 } else { 9019 9020 /* parent */ 9021 if (child_pid == -1) 9022 err(1, "fork"); 9023 9024 signal(SIGINT, SIG_IGN); 9025 signal(SIGQUIT, SIG_IGN); 9026 if (waitpid(child_pid, &status, 0) == -1) 9027 err(status, "waitpid"); 9028 9029 if (WIFEXITED(status)) 9030 status = WEXITSTATUS(status); 9031 } 9032 /* 9033 * n.b. fork_it() does not check for errors from for_all_cpus() 9034 * because re-starting is problematic when forking 9035 */ 9036 snapshot_proc_sysfs_files(); 9037 for_all_cpus(get_counters, ODD_COUNTERS); 9038 gettimeofday(&tv_odd, (struct timezone *)NULL); 9039 timersub(&tv_odd, &tv_even, &tv_delta); 9040 if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) 9041 fprintf(outf, "%s: Counter reset detected\n", progname); 9042 else { 9043 compute_average(EVEN_COUNTERS); 9044 format_all_counters(EVEN_COUNTERS); 9045 } 9046 9047 fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec / 1000000.0); 9048 9049 flush_output_stderr(); 9050 9051 return status; 9052 } 9053 9054 int get_and_dump_counters(void) 9055 { 9056 int status; 9057 9058 snapshot_proc_sysfs_files(); 9059 status = for_all_cpus(get_counters, ODD_COUNTERS); 9060 if (status) 9061 return status; 9062 9063 status = for_all_cpus(dump_counters, ODD_COUNTERS); 9064 if (status) 9065 return status; 9066 9067 flush_output_stdout(); 9068 9069 return status; 9070 } 9071 9072 void print_version() 9073 { 9074 fprintf(outf, "turbostat version 2024.07.26 - Len Brown <lenb@kernel.org>\n"); 9075 } 9076 9077 #define COMMAND_LINE_SIZE 2048 9078 9079 void print_bootcmd(void) 9080 { 9081 char bootcmd[COMMAND_LINE_SIZE]; 9082 FILE *fp; 9083 int ret; 9084 9085 memset(bootcmd, 0, COMMAND_LINE_SIZE); 9086 fp = fopen("/proc/cmdline", "r"); 9087 if (!fp) 9088 return; 9089 9090 ret = fread(bootcmd, sizeof(char), COMMAND_LINE_SIZE - 1, fp); 9091 if (ret) { 9092 bootcmd[ret] = '\0'; 9093 /* the last character is already '\n' */ 9094 fprintf(outf, "Kernel command line: %s", bootcmd); 9095 } 9096 9097 fclose(fp); 9098 } 9099 9100 struct msr_counter *find_msrp_by_name(struct msr_counter *head, char *name) 9101 { 9102 struct msr_counter *mp; 9103 9104 for (mp = head; mp; mp = mp->next) { 9105 if (debug) 9106 fprintf(stderr, "%s: %s %s\n", __func__, name, mp->name); 9107 if (!strncmp(name, mp->name, strlen(mp->name))) 9108 return mp; 9109 } 9110 return NULL; 9111 } 9112 9113 int add_counter(unsigned int msr_num, char *path, char *name, 9114 unsigned int width, enum counter_scope scope, 9115 enum counter_type type, enum counter_format format, int flags, int id) 9116 { 9117 struct msr_counter *msrp; 9118 9119 if (no_msr && msr_num) 9120 errx(1, "Requested MSR counter 0x%x, but in --no-msr mode", msr_num); 9121 9122 if (debug) 9123 fprintf(stderr, "%s(msr%d, %s, %s, width%d, scope%d, type%d, format%d, flags%x, id%d)\n", 9124 __func__, msr_num, path, name, width, scope, type, format, flags, id); 9125 9126 switch (scope) { 9127 9128 case SCOPE_CPU: 9129 msrp = find_msrp_by_name(sys.tp, name); 9130 if (msrp) { 9131 if (debug) 9132 fprintf(stderr, "%s: %s FOUND\n", __func__, name); 9133 break; 9134 } 9135 if (sys.added_thread_counters++ >= MAX_ADDED_THREAD_COUNTERS) { 9136 warnx("ignoring thread counter %s", name); 9137 return -1; 9138 } 9139 break; 9140 case SCOPE_CORE: 9141 msrp = find_msrp_by_name(sys.cp, name); 9142 if (msrp) { 9143 if (debug) 9144 fprintf(stderr, "%s: %s FOUND\n", __func__, name); 9145 break; 9146 } 9147 if (sys.added_core_counters++ >= MAX_ADDED_CORE_COUNTERS) { 9148 warnx("ignoring core counter %s", name); 9149 return -1; 9150 } 9151 break; 9152 case SCOPE_PACKAGE: 9153 msrp = find_msrp_by_name(sys.pp, name); 9154 if (msrp) { 9155 if (debug) 9156 fprintf(stderr, "%s: %s FOUND\n", __func__, name); 9157 break; 9158 } 9159 if (sys.added_package_counters++ >= MAX_ADDED_PACKAGE_COUNTERS) { 9160 warnx("ignoring package counter %s", name); 9161 return -1; 9162 } 9163 break; 9164 default: 9165 warnx("ignoring counter %s with unknown scope", name); 9166 return -1; 9167 } 9168 9169 if (msrp == NULL) { 9170 msrp = calloc(1, sizeof(struct msr_counter)); 9171 if (msrp == NULL) 9172 err(-1, "calloc msr_counter"); 9173 9174 msrp->msr_num = msr_num; 9175 strncpy(msrp->name, name, NAME_BYTES - 1); 9176 msrp->width = width; 9177 msrp->type = type; 9178 msrp->format = format; 9179 msrp->flags = flags; 9180 9181 switch (scope) { 9182 case SCOPE_CPU: 9183 msrp->next = sys.tp; 9184 sys.tp = msrp; 9185 break; 9186 case SCOPE_CORE: 9187 msrp->next = sys.cp; 9188 sys.cp = msrp; 9189 break; 9190 case SCOPE_PACKAGE: 9191 msrp->next = sys.pp; 9192 sys.pp = msrp; 9193 break; 9194 } 9195 } 9196 9197 if (path) { 9198 struct sysfs_path *sp; 9199 9200 sp = calloc(1, sizeof(struct sysfs_path)); 9201 if (sp == NULL) { 9202 perror("calloc"); 9203 exit(1); 9204 } 9205 strncpy(sp->path, path, PATH_BYTES - 1); 9206 sp->id = id; 9207 sp->next = msrp->sp; 9208 msrp->sp = sp; 9209 } 9210 9211 return 0; 9212 } 9213 9214 /* 9215 * Initialize the fields used for identifying and opening the counter. 9216 * 9217 * Defer the initialization of any runtime buffers for actually reading 9218 * the counters for when we initialize all perf counters, so we can later 9219 * easily call re_initialize(). 9220 */ 9221 struct perf_counter_info *make_perf_counter_info(const char *perf_device, 9222 const char *perf_event, 9223 const char *name, 9224 unsigned int width, 9225 enum counter_scope scope, 9226 enum counter_type type, enum counter_format format) 9227 { 9228 struct perf_counter_info *pinfo; 9229 9230 pinfo = calloc(1, sizeof(*pinfo)); 9231 if (!pinfo) 9232 errx(1, "%s: Failed to allocate %s/%s\n", __func__, perf_device, perf_event); 9233 9234 strncpy(pinfo->device, perf_device, ARRAY_SIZE(pinfo->device) - 1); 9235 strncpy(pinfo->event, perf_event, ARRAY_SIZE(pinfo->event) - 1); 9236 9237 strncpy(pinfo->name, name, ARRAY_SIZE(pinfo->name) - 1); 9238 pinfo->width = width; 9239 pinfo->scope = scope; 9240 pinfo->type = type; 9241 pinfo->format = format; 9242 9243 return pinfo; 9244 } 9245 9246 int add_perf_counter(const char *perf_device, const char *perf_event, const char *name_buffer, unsigned int width, 9247 enum counter_scope scope, enum counter_type type, enum counter_format format) 9248 { 9249 struct perf_counter_info *pinfo; 9250 9251 switch (scope) { 9252 case SCOPE_CPU: 9253 if (sys.added_thread_perf_counters >= MAX_ADDED_THREAD_COUNTERS) { 9254 warnx("ignoring thread counter perf/%s/%s", perf_device, perf_event); 9255 return -1; 9256 } 9257 break; 9258 9259 case SCOPE_CORE: 9260 if (sys.added_core_perf_counters >= MAX_ADDED_CORE_COUNTERS) { 9261 warnx("ignoring core counter perf/%s/%s", perf_device, perf_event); 9262 return -1; 9263 } 9264 break; 9265 9266 case SCOPE_PACKAGE: 9267 if (sys.added_package_perf_counters >= MAX_ADDED_PACKAGE_COUNTERS) { 9268 warnx("ignoring package counter perf/%s/%s", perf_device, perf_event); 9269 return -1; 9270 } 9271 break; 9272 } 9273 9274 pinfo = make_perf_counter_info(perf_device, perf_event, name_buffer, width, scope, type, format); 9275 9276 if (!pinfo) 9277 return -1; 9278 9279 switch (scope) { 9280 case SCOPE_CPU: 9281 pinfo->next = sys.perf_tp; 9282 sys.perf_tp = pinfo; 9283 ++sys.added_thread_perf_counters; 9284 break; 9285 9286 case SCOPE_CORE: 9287 pinfo->next = sys.perf_cp; 9288 sys.perf_cp = pinfo; 9289 ++sys.added_core_perf_counters; 9290 break; 9291 9292 case SCOPE_PACKAGE: 9293 pinfo->next = sys.perf_pp; 9294 sys.perf_pp = pinfo; 9295 ++sys.added_package_perf_counters; 9296 break; 9297 } 9298 9299 // FIXME: we might not have debug here yet 9300 if (debug) 9301 fprintf(stderr, "%s: %s/%s, name: %s, scope%d\n", 9302 __func__, pinfo->device, pinfo->event, pinfo->name, pinfo->scope); 9303 9304 return 0; 9305 } 9306 9307 void parse_add_command_msr(char *add_command) 9308 { 9309 int msr_num = 0; 9310 char *path = NULL; 9311 char perf_device[PERF_DEV_NAME_BYTES] = ""; 9312 char perf_event[PERF_EVT_NAME_BYTES] = ""; 9313 char name_buffer[PERF_NAME_BYTES] = ""; 9314 int width = 64; 9315 int fail = 0; 9316 enum counter_scope scope = SCOPE_CPU; 9317 enum counter_type type = COUNTER_CYCLES; 9318 enum counter_format format = FORMAT_DELTA; 9319 9320 while (add_command) { 9321 9322 if (sscanf(add_command, "msr0x%x", &msr_num) == 1) 9323 goto next; 9324 9325 if (sscanf(add_command, "msr%d", &msr_num) == 1) 9326 goto next; 9327 9328 BUILD_BUG_ON(ARRAY_SIZE(perf_device) <= 31); 9329 BUILD_BUG_ON(ARRAY_SIZE(perf_event) <= 31); 9330 if (sscanf(add_command, "perf/%31[^/]/%31[^,]", &perf_device[0], &perf_event[0]) == 2) 9331 goto next; 9332 9333 if (*add_command == '/') { 9334 path = add_command; 9335 goto next; 9336 } 9337 9338 if (sscanf(add_command, "u%d", &width) == 1) { 9339 if ((width == 32) || (width == 64)) 9340 goto next; 9341 width = 64; 9342 } 9343 if (!strncmp(add_command, "cpu", strlen("cpu"))) { 9344 scope = SCOPE_CPU; 9345 goto next; 9346 } 9347 if (!strncmp(add_command, "core", strlen("core"))) { 9348 scope = SCOPE_CORE; 9349 goto next; 9350 } 9351 if (!strncmp(add_command, "package", strlen("package"))) { 9352 scope = SCOPE_PACKAGE; 9353 goto next; 9354 } 9355 if (!strncmp(add_command, "cycles", strlen("cycles"))) { 9356 type = COUNTER_CYCLES; 9357 goto next; 9358 } 9359 if (!strncmp(add_command, "seconds", strlen("seconds"))) { 9360 type = COUNTER_SECONDS; 9361 goto next; 9362 } 9363 if (!strncmp(add_command, "usec", strlen("usec"))) { 9364 type = COUNTER_USEC; 9365 goto next; 9366 } 9367 if (!strncmp(add_command, "raw", strlen("raw"))) { 9368 format = FORMAT_RAW; 9369 goto next; 9370 } 9371 if (!strncmp(add_command, "delta", strlen("delta"))) { 9372 format = FORMAT_DELTA; 9373 goto next; 9374 } 9375 if (!strncmp(add_command, "percent", strlen("percent"))) { 9376 format = FORMAT_PERCENT; 9377 goto next; 9378 } 9379 9380 BUILD_BUG_ON(ARRAY_SIZE(name_buffer) <= 18); 9381 if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) { 9382 char *eos; 9383 9384 eos = strchr(name_buffer, ','); 9385 if (eos) 9386 *eos = '\0'; 9387 goto next; 9388 } 9389 9390 next: 9391 add_command = strchr(add_command, ','); 9392 if (add_command) { 9393 *add_command = '\0'; 9394 add_command++; 9395 } 9396 9397 } 9398 if ((msr_num == 0) && (path == NULL) && (perf_device[0] == '\0' || perf_event[0] == '\0')) { 9399 fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter | perf/device/event ) required\n"); 9400 fail++; 9401 } 9402 9403 /* Test for non-empty perf_device and perf_event */ 9404 const bool is_perf_counter = perf_device[0] && perf_event[0]; 9405 9406 /* generate default column header */ 9407 if (*name_buffer == '\0') { 9408 if (is_perf_counter) { 9409 snprintf(name_buffer, ARRAY_SIZE(name_buffer), "perf/%s", perf_event); 9410 } else { 9411 if (width == 32) 9412 sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); 9413 else 9414 sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); 9415 } 9416 } 9417 9418 if (is_perf_counter) { 9419 if (add_perf_counter(perf_device, perf_event, name_buffer, width, scope, type, format)) 9420 fail++; 9421 } else { 9422 if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0, 0)) 9423 fail++; 9424 } 9425 9426 if (fail) { 9427 help(); 9428 exit(1); 9429 } 9430 } 9431 9432 bool starts_with(const char *str, const char *prefix) 9433 { 9434 return strncmp(prefix, str, strlen(prefix)) == 0; 9435 } 9436 9437 void parse_add_command_pmt(char *add_command) 9438 { 9439 char *name = NULL; 9440 char *type_name = NULL; 9441 char *format_name = NULL; 9442 unsigned int offset; 9443 unsigned int lsb; 9444 unsigned int msb; 9445 unsigned int guid; 9446 unsigned int domain_id; 9447 enum counter_scope scope = 0; 9448 enum pmt_datatype type = PMT_TYPE_RAW; 9449 enum counter_format format = FORMAT_RAW; 9450 bool has_offset = false; 9451 bool has_lsb = false; 9452 bool has_msb = false; 9453 bool has_format = true; /* Format has a default value. */ 9454 bool has_guid = false; 9455 bool has_scope = false; 9456 bool has_type = true; /* Type has a default value. */ 9457 9458 /* Consume the "pmt," prefix. */ 9459 add_command = strchr(add_command, ','); 9460 if (!add_command) { 9461 help(); 9462 exit(1); 9463 } 9464 ++add_command; 9465 9466 while (add_command) { 9467 if (starts_with(add_command, "name=")) { 9468 name = add_command + strlen("name="); 9469 goto next; 9470 } 9471 9472 if (starts_with(add_command, "type=")) { 9473 type_name = add_command + strlen("type="); 9474 goto next; 9475 } 9476 9477 if (starts_with(add_command, "domain=")) { 9478 const size_t prefix_len = strlen("domain="); 9479 9480 if (sscanf(add_command + prefix_len, "cpu%u", &domain_id) == 1) { 9481 scope = SCOPE_CPU; 9482 has_scope = true; 9483 } else if (sscanf(add_command + prefix_len, "core%u", &domain_id) == 1) { 9484 scope = SCOPE_CORE; 9485 has_scope = true; 9486 } else if (sscanf(add_command + prefix_len, "package%u", &domain_id) == 1) { 9487 scope = SCOPE_PACKAGE; 9488 has_scope = true; 9489 } 9490 9491 if (!has_scope) { 9492 printf("%s: invalid value for scope. Expected cpu%%u, core%%u or package%%u.\n", 9493 __func__); 9494 exit(1); 9495 } 9496 9497 goto next; 9498 } 9499 9500 if (starts_with(add_command, "format=")) { 9501 format_name = add_command + strlen("format="); 9502 goto next; 9503 } 9504 9505 if (sscanf(add_command, "offset=%u", &offset) == 1) { 9506 has_offset = true; 9507 goto next; 9508 } 9509 9510 if (sscanf(add_command, "lsb=%u", &lsb) == 1) { 9511 has_lsb = true; 9512 goto next; 9513 } 9514 9515 if (sscanf(add_command, "msb=%u", &msb) == 1) { 9516 has_msb = true; 9517 goto next; 9518 } 9519 9520 if (sscanf(add_command, "guid=%x", &guid) == 1) { 9521 has_guid = true; 9522 goto next; 9523 } 9524 9525 next: 9526 add_command = strchr(add_command, ','); 9527 if (add_command) { 9528 *add_command = '\0'; 9529 add_command++; 9530 } 9531 } 9532 9533 if (!name) { 9534 printf("%s: missing %s\n", __func__, "name"); 9535 exit(1); 9536 } 9537 9538 if (strlen(name) >= PMT_COUNTER_NAME_SIZE_BYTES) { 9539 printf("%s: name has to be at most %d characters long\n", __func__, PMT_COUNTER_NAME_SIZE_BYTES); 9540 exit(1); 9541 } 9542 9543 if (format_name) { 9544 has_format = false; 9545 9546 if (strcmp("raw", format_name) == 0) { 9547 format = FORMAT_RAW; 9548 has_format = true; 9549 } 9550 9551 if (strcmp("delta", format_name) == 0) { 9552 format = FORMAT_DELTA; 9553 has_format = true; 9554 } 9555 9556 if (!has_format) { 9557 fprintf(stderr, "%s: Invalid format %s. Expected raw or delta\n", __func__, format_name); 9558 exit(1); 9559 } 9560 } 9561 9562 if (type_name) { 9563 has_type = false; 9564 9565 if (strcmp("raw", type_name) == 0) { 9566 type = PMT_TYPE_RAW; 9567 has_type = true; 9568 } 9569 9570 if (strcmp("txtal_time", type_name) == 0) { 9571 type = PMT_TYPE_XTAL_TIME; 9572 has_type = true; 9573 } 9574 9575 if (!has_type) { 9576 printf("%s: invalid %s: %s\n", __func__, "type", type_name); 9577 exit(1); 9578 } 9579 } 9580 9581 if (!has_offset) { 9582 printf("%s : missing %s\n", __func__, "offset"); 9583 exit(1); 9584 } 9585 9586 if (!has_lsb) { 9587 printf("%s: missing %s\n", __func__, "lsb"); 9588 exit(1); 9589 } 9590 9591 if (!has_msb) { 9592 printf("%s: missing %s\n", __func__, "msb"); 9593 exit(1); 9594 } 9595 9596 if (!has_guid) { 9597 printf("%s: missing %s\n", __func__, "guid"); 9598 exit(1); 9599 } 9600 9601 if (!has_scope) { 9602 printf("%s: missing %s\n", __func__, "scope"); 9603 exit(1); 9604 } 9605 9606 if (lsb > msb) { 9607 printf("%s: lsb > msb doesn't make sense\n", __func__); 9608 exit(1); 9609 } 9610 9611 pmt_add_counter(guid, name, type, lsb, msb, offset, scope, format, domain_id, PMT_OPEN_REQUIRED); 9612 } 9613 9614 void parse_add_command(char *add_command) 9615 { 9616 if (strncmp(add_command, "pmt", strlen("pmt")) == 0) 9617 return parse_add_command_pmt(add_command); 9618 return parse_add_command_msr(add_command); 9619 } 9620 9621 int is_deferred_add(char *name) 9622 { 9623 int i; 9624 9625 for (i = 0; i < deferred_add_index; ++i) 9626 if (!strcmp(name, deferred_add_names[i])) 9627 return 1; 9628 return 0; 9629 } 9630 9631 int is_deferred_skip(char *name) 9632 { 9633 int i; 9634 9635 for (i = 0; i < deferred_skip_index; ++i) 9636 if (!strcmp(name, deferred_skip_names[i])) 9637 return 1; 9638 return 0; 9639 } 9640 9641 void probe_sysfs(void) 9642 { 9643 char path[64]; 9644 char name_buf[16]; 9645 FILE *input; 9646 int state; 9647 char *sp; 9648 9649 for (state = 10; state >= 0; --state) { 9650 9651 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); 9652 input = fopen(path, "r"); 9653 if (input == NULL) 9654 continue; 9655 if (!fgets(name_buf, sizeof(name_buf), input)) 9656 err(1, "%s: failed to read file", path); 9657 9658 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ 9659 sp = strchr(name_buf, '-'); 9660 if (!sp) 9661 sp = strchrnul(name_buf, '\n'); 9662 *sp = '%'; 9663 *(sp + 1) = '\0'; 9664 9665 remove_underbar(name_buf); 9666 9667 fclose(input); 9668 9669 sprintf(path, "cpuidle/state%d/time", state); 9670 9671 if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf)) 9672 continue; 9673 9674 if (is_deferred_skip(name_buf)) 9675 continue; 9676 9677 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU, 0); 9678 } 9679 9680 for (state = 10; state >= 0; --state) { 9681 9682 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); 9683 input = fopen(path, "r"); 9684 if (input == NULL) 9685 continue; 9686 if (!fgets(name_buf, sizeof(name_buf), input)) 9687 err(1, "%s: failed to read file", path); 9688 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ 9689 sp = strchr(name_buf, '-'); 9690 if (!sp) 9691 sp = strchrnul(name_buf, '\n'); 9692 *sp = '\0'; 9693 fclose(input); 9694 9695 remove_underbar(name_buf); 9696 9697 sprintf(path, "cpuidle/state%d/usage", state); 9698 9699 if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf)) 9700 continue; 9701 9702 if (is_deferred_skip(name_buf)) 9703 continue; 9704 9705 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0); 9706 } 9707 9708 } 9709 9710 /* 9711 * parse cpuset with following syntax 9712 * 1,2,4..6,8-10 and set bits in cpu_subset 9713 */ 9714 void parse_cpu_command(char *optarg) 9715 { 9716 if (!strcmp(optarg, "core")) { 9717 if (cpu_subset) 9718 goto error; 9719 show_core_only++; 9720 return; 9721 } 9722 if (!strcmp(optarg, "package")) { 9723 if (cpu_subset) 9724 goto error; 9725 show_pkg_only++; 9726 return; 9727 } 9728 if (show_core_only || show_pkg_only) 9729 goto error; 9730 9731 cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS); 9732 if (cpu_subset == NULL) 9733 err(3, "CPU_ALLOC"); 9734 cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS); 9735 9736 CPU_ZERO_S(cpu_subset_size, cpu_subset); 9737 9738 if (parse_cpu_str(optarg, cpu_subset, cpu_subset_size)) 9739 goto error; 9740 9741 return; 9742 9743 error: 9744 fprintf(stderr, "\"--cpu %s\" malformed\n", optarg); 9745 help(); 9746 exit(-1); 9747 } 9748 9749 void cmdline(int argc, char **argv) 9750 { 9751 int opt; 9752 int option_index = 0; 9753 static struct option long_options[] = { 9754 { "add", required_argument, 0, 'a' }, 9755 { "cpu", required_argument, 0, 'c' }, 9756 { "Dump", no_argument, 0, 'D' }, 9757 { "debug", no_argument, 0, 'd' }, /* internal, not documented */ 9758 { "enable", required_argument, 0, 'e' }, 9759 { "interval", required_argument, 0, 'i' }, 9760 { "IPC", no_argument, 0, 'I' }, 9761 { "num_iterations", required_argument, 0, 'n' }, 9762 { "header_iterations", required_argument, 0, 'N' }, 9763 { "help", no_argument, 0, 'h' }, 9764 { "hide", required_argument, 0, 'H' }, // meh, -h taken by --help 9765 { "Joules", no_argument, 0, 'J' }, 9766 { "list", no_argument, 0, 'l' }, 9767 { "out", required_argument, 0, 'o' }, 9768 { "quiet", no_argument, 0, 'q' }, 9769 { "no-msr", no_argument, 0, 'M' }, 9770 { "no-perf", no_argument, 0, 'P' }, 9771 { "show", required_argument, 0, 's' }, 9772 { "Summary", no_argument, 0, 'S' }, 9773 { "TCC", required_argument, 0, 'T' }, 9774 { "version", no_argument, 0, 'v' }, 9775 { 0, 0, 0, 0 } 9776 }; 9777 9778 progname = argv[0]; 9779 9780 /* 9781 * Parse some options early, because they may make other options invalid, 9782 * like adding the MSR counter with --add and at the same time using --no-msr. 9783 */ 9784 while ((opt = getopt_long_only(argc, argv, "MPn:", long_options, &option_index)) != -1) { 9785 switch (opt) { 9786 case 'M': 9787 no_msr = 1; 9788 break; 9789 case 'P': 9790 no_perf = 1; 9791 break; 9792 default: 9793 break; 9794 } 9795 } 9796 optind = 0; 9797 9798 while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qMST:v", long_options, &option_index)) != -1) { 9799 switch (opt) { 9800 case 'a': 9801 parse_add_command(optarg); 9802 break; 9803 case 'c': 9804 parse_cpu_command(optarg); 9805 break; 9806 case 'D': 9807 dump_only++; 9808 break; 9809 case 'e': 9810 /* --enable specified counter */ 9811 bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST); 9812 break; 9813 case 'd': 9814 debug++; 9815 ENABLE_BIC(BIC_DISABLED_BY_DEFAULT); 9816 break; 9817 case 'H': 9818 /* 9819 * --hide: do not show those specified 9820 * multiple invocations simply clear more bits in enabled mask 9821 */ 9822 bic_enabled &= ~bic_lookup(optarg, HIDE_LIST); 9823 break; 9824 case 'h': 9825 default: 9826 help(); 9827 exit(1); 9828 case 'i': 9829 { 9830 double interval = strtod(optarg, NULL); 9831 9832 if (interval < 0.001) { 9833 fprintf(outf, "interval %f seconds is too small\n", interval); 9834 exit(2); 9835 } 9836 9837 interval_tv.tv_sec = interval_ts.tv_sec = interval; 9838 interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000; 9839 interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000; 9840 } 9841 break; 9842 case 'J': 9843 rapl_joules++; 9844 break; 9845 case 'l': 9846 ENABLE_BIC(BIC_DISABLED_BY_DEFAULT); 9847 list_header_only++; 9848 quiet++; 9849 break; 9850 case 'o': 9851 outf = fopen_or_die(optarg, "w"); 9852 break; 9853 case 'q': 9854 quiet = 1; 9855 break; 9856 case 'M': 9857 case 'P': 9858 /* Parsed earlier */ 9859 break; 9860 case 'n': 9861 num_iterations = strtod(optarg, NULL); 9862 9863 if (num_iterations <= 0) { 9864 fprintf(outf, "iterations %d should be positive number\n", num_iterations); 9865 exit(2); 9866 } 9867 break; 9868 case 'N': 9869 header_iterations = strtod(optarg, NULL); 9870 9871 if (header_iterations <= 0) { 9872 fprintf(outf, "iterations %d should be positive number\n", header_iterations); 9873 exit(2); 9874 } 9875 break; 9876 case 's': 9877 /* 9878 * --show: show only those specified 9879 * The 1st invocation will clear and replace the enabled mask 9880 * subsequent invocations can add to it. 9881 */ 9882 if (shown == 0) 9883 bic_enabled = bic_lookup(optarg, SHOW_LIST); 9884 else 9885 bic_enabled |= bic_lookup(optarg, SHOW_LIST); 9886 shown = 1; 9887 break; 9888 case 'S': 9889 summary_only++; 9890 break; 9891 case 'T': 9892 tj_max_override = atoi(optarg); 9893 break; 9894 case 'v': 9895 print_version(); 9896 exit(0); 9897 break; 9898 } 9899 } 9900 } 9901 9902 void set_rlimit(void) 9903 { 9904 struct rlimit limit; 9905 9906 if (getrlimit(RLIMIT_NOFILE, &limit) < 0) 9907 err(1, "Failed to get rlimit"); 9908 9909 if (limit.rlim_max < MAX_NOFILE) 9910 limit.rlim_max = MAX_NOFILE; 9911 if (limit.rlim_cur < MAX_NOFILE) 9912 limit.rlim_cur = MAX_NOFILE; 9913 9914 if (setrlimit(RLIMIT_NOFILE, &limit) < 0) 9915 err(1, "Failed to set rlimit"); 9916 } 9917 9918 int main(int argc, char **argv) 9919 { 9920 int fd, ret; 9921 9922 fd = open("/sys/fs/cgroup/cgroup.procs", O_WRONLY); 9923 if (fd < 0) 9924 goto skip_cgroup_setting; 9925 9926 ret = write(fd, "0\n", 2); 9927 if (ret == -1) 9928 perror("Can't update cgroup\n"); 9929 9930 close(fd); 9931 9932 skip_cgroup_setting: 9933 outf = stderr; 9934 cmdline(argc, argv); 9935 9936 if (!quiet) { 9937 print_version(); 9938 print_bootcmd(); 9939 } 9940 9941 probe_sysfs(); 9942 9943 if (!getuid()) 9944 set_rlimit(); 9945 9946 turbostat_init(); 9947 9948 if (!no_msr) 9949 msr_sum_record(); 9950 9951 /* dump counters and exit */ 9952 if (dump_only) 9953 return get_and_dump_counters(); 9954 9955 /* list header and exit */ 9956 if (list_header_only) { 9957 print_header(","); 9958 flush_output_stdout(); 9959 return 0; 9960 } 9961 9962 /* 9963 * if any params left, it must be a command to fork 9964 */ 9965 if (argc - optind) 9966 return fork_it(argv + optind); 9967 else 9968 turbostat_loop(); 9969 9970 return 0; 9971 } 9972