1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * turbostat -- show CPU frequency and C-state residency 4 * on modern Intel and AMD processors. 5 * 6 * Copyright (c) 2024 Intel Corporation. 7 * Len Brown <len.brown@intel.com> 8 */ 9 10 #define _GNU_SOURCE 11 #include MSRHEADER 12 13 // copied from arch/x86/include/asm/cpu_device_id.h 14 #define VFM_MODEL_BIT 0 15 #define VFM_FAMILY_BIT 8 16 #define VFM_VENDOR_BIT 16 17 #define VFM_RSVD_BIT 24 18 19 #define VFM_MODEL_MASK GENMASK(VFM_FAMILY_BIT - 1, VFM_MODEL_BIT) 20 #define VFM_FAMILY_MASK GENMASK(VFM_VENDOR_BIT - 1, VFM_FAMILY_BIT) 21 #define VFM_VENDOR_MASK GENMASK(VFM_RSVD_BIT - 1, VFM_VENDOR_BIT) 22 23 #define VFM_MODEL(vfm) (((vfm) & VFM_MODEL_MASK) >> VFM_MODEL_BIT) 24 #define VFM_FAMILY(vfm) (((vfm) & VFM_FAMILY_MASK) >> VFM_FAMILY_BIT) 25 #define VFM_VENDOR(vfm) (((vfm) & VFM_VENDOR_MASK) >> VFM_VENDOR_BIT) 26 27 #define VFM_MAKE(_vendor, _family, _model) ( \ 28 ((_model) << VFM_MODEL_BIT) | \ 29 ((_family) << VFM_FAMILY_BIT) | \ 30 ((_vendor) << VFM_VENDOR_BIT) \ 31 ) 32 // end copied section 33 34 #define CPUID_LEAF_MODEL_ID 0x1A 35 #define CPUID_LEAF_MODEL_ID_CORE_TYPE_SHIFT 24 36 37 #define X86_VENDOR_INTEL 0 38 39 #include INTEL_FAMILY_HEADER 40 #include BUILD_BUG_HEADER 41 #include <stdarg.h> 42 #include <stdio.h> 43 #include <err.h> 44 #include <unistd.h> 45 #include <sys/types.h> 46 #include <sys/wait.h> 47 #include <sys/stat.h> 48 #include <sys/select.h> 49 #include <sys/resource.h> 50 #include <sys/mman.h> 51 #include <fcntl.h> 52 #include <signal.h> 53 #include <sys/time.h> 54 #include <stdlib.h> 55 #include <getopt.h> 56 #include <dirent.h> 57 #include <string.h> 58 #include <ctype.h> 59 #include <sched.h> 60 #include <time.h> 61 #include <cpuid.h> 62 #include <sys/capability.h> 63 #include <errno.h> 64 #include <math.h> 65 #include <linux/perf_event.h> 66 #include <asm/unistd.h> 67 #include <stdbool.h> 68 #include <assert.h> 69 #include <linux/kernel.h> 70 71 #define UNUSED(x) (void)(x) 72 73 /* 74 * This list matches the column headers, except 75 * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time 76 * 2. Core and CPU are moved to the end, we can't have strings that contain them 77 * matching on them for --show and --hide. 78 */ 79 80 /* 81 * buffer size used by sscanf() for added column names 82 * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters 83 */ 84 #define NAME_BYTES 20 85 #define PATH_BYTES 128 86 #define PERF_NAME_BYTES 128 87 88 #define MAX_NOFILE 0x8000 89 90 #define COUNTER_KIND_PERF_PREFIX "perf/" 91 #define COUNTER_KIND_PERF_PREFIX_LEN strlen(COUNTER_KIND_PERF_PREFIX) 92 #define PERF_DEV_NAME_BYTES 32 93 #define PERF_EVT_NAME_BYTES 32 94 95 #define INTEL_ECORE_TYPE 0x20 96 #define INTEL_PCORE_TYPE 0x40 97 98 enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE }; 99 enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC, COUNTER_K2M }; 100 enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT, FORMAT_AVERAGE }; 101 enum counter_source { COUNTER_SOURCE_NONE, COUNTER_SOURCE_PERF, COUNTER_SOURCE_MSR }; 102 103 struct perf_counter_info { 104 struct perf_counter_info *next; 105 106 /* How to open the counter / What counter it is. */ 107 char device[PERF_DEV_NAME_BYTES]; 108 char event[PERF_EVT_NAME_BYTES]; 109 110 /* How to show/format the counter. */ 111 char name[PERF_NAME_BYTES]; 112 unsigned int width; 113 enum counter_scope scope; 114 enum counter_type type; 115 enum counter_format format; 116 double scale; 117 118 /* For reading the counter. */ 119 int *fd_perf_per_domain; 120 size_t num_domains; 121 }; 122 123 struct sysfs_path { 124 char path[PATH_BYTES]; 125 int id; 126 struct sysfs_path *next; 127 }; 128 129 struct msr_counter { 130 unsigned int msr_num; 131 char name[NAME_BYTES]; 132 struct sysfs_path *sp; 133 unsigned int width; 134 enum counter_type type; 135 enum counter_format format; 136 struct msr_counter *next; 137 unsigned int flags; 138 #define FLAGS_HIDE (1 << 0) 139 #define FLAGS_SHOW (1 << 1) 140 #define SYSFS_PERCPU (1 << 1) 141 }; 142 143 struct msr_counter bic[] = { 144 { 0x0, "usec", NULL, 0, 0, 0, NULL, 0 }, 145 { 0x0, "Time_Of_Day_Seconds", NULL, 0, 0, 0, NULL, 0 }, 146 { 0x0, "Package", NULL, 0, 0, 0, NULL, 0 }, 147 { 0x0, "Node", NULL, 0, 0, 0, NULL, 0 }, 148 { 0x0, "Avg_MHz", NULL, 0, 0, 0, NULL, 0 }, 149 { 0x0, "Busy%", NULL, 0, 0, 0, NULL, 0 }, 150 { 0x0, "Bzy_MHz", NULL, 0, 0, 0, NULL, 0 }, 151 { 0x0, "TSC_MHz", NULL, 0, 0, 0, NULL, 0 }, 152 { 0x0, "IRQ", NULL, 0, 0, 0, NULL, 0 }, 153 { 0x0, "SMI", NULL, 32, 0, FORMAT_DELTA, NULL, 0 }, 154 { 0x0, "sysfs", NULL, 0, 0, 0, NULL, 0 }, 155 { 0x0, "CPU%c1", NULL, 0, 0, 0, NULL, 0 }, 156 { 0x0, "CPU%c3", NULL, 0, 0, 0, NULL, 0 }, 157 { 0x0, "CPU%c6", NULL, 0, 0, 0, NULL, 0 }, 158 { 0x0, "CPU%c7", NULL, 0, 0, 0, NULL, 0 }, 159 { 0x0, "ThreadC", NULL, 0, 0, 0, NULL, 0 }, 160 { 0x0, "CoreTmp", NULL, 0, 0, 0, NULL, 0 }, 161 { 0x0, "CoreCnt", NULL, 0, 0, 0, NULL, 0 }, 162 { 0x0, "PkgTmp", NULL, 0, 0, 0, NULL, 0 }, 163 { 0x0, "GFX%rc6", NULL, 0, 0, 0, NULL, 0 }, 164 { 0x0, "GFXMHz", NULL, 0, 0, 0, NULL, 0 }, 165 { 0x0, "Pkg%pc2", NULL, 0, 0, 0, NULL, 0 }, 166 { 0x0, "Pkg%pc3", NULL, 0, 0, 0, NULL, 0 }, 167 { 0x0, "Pkg%pc6", NULL, 0, 0, 0, NULL, 0 }, 168 { 0x0, "Pkg%pc7", NULL, 0, 0, 0, NULL, 0 }, 169 { 0x0, "Pkg%pc8", NULL, 0, 0, 0, NULL, 0 }, 170 { 0x0, "Pkg%pc9", NULL, 0, 0, 0, NULL, 0 }, 171 { 0x0, "Pk%pc10", NULL, 0, 0, 0, NULL, 0 }, 172 { 0x0, "CPU%LPI", NULL, 0, 0, 0, NULL, 0 }, 173 { 0x0, "SYS%LPI", NULL, 0, 0, 0, NULL, 0 }, 174 { 0x0, "PkgWatt", NULL, 0, 0, 0, NULL, 0 }, 175 { 0x0, "CorWatt", NULL, 0, 0, 0, NULL, 0 }, 176 { 0x0, "GFXWatt", NULL, 0, 0, 0, NULL, 0 }, 177 { 0x0, "PkgCnt", NULL, 0, 0, 0, NULL, 0 }, 178 { 0x0, "RAMWatt", NULL, 0, 0, 0, NULL, 0 }, 179 { 0x0, "PKG_%", NULL, 0, 0, 0, NULL, 0 }, 180 { 0x0, "RAM_%", NULL, 0, 0, 0, NULL, 0 }, 181 { 0x0, "Pkg_J", NULL, 0, 0, 0, NULL, 0 }, 182 { 0x0, "Cor_J", NULL, 0, 0, 0, NULL, 0 }, 183 { 0x0, "GFX_J", NULL, 0, 0, 0, NULL, 0 }, 184 { 0x0, "RAM_J", NULL, 0, 0, 0, NULL, 0 }, 185 { 0x0, "Mod%c6", NULL, 0, 0, 0, NULL, 0 }, 186 { 0x0, "Totl%C0", NULL, 0, 0, 0, NULL, 0 }, 187 { 0x0, "Any%C0", NULL, 0, 0, 0, NULL, 0 }, 188 { 0x0, "GFX%C0", NULL, 0, 0, 0, NULL, 0 }, 189 { 0x0, "CPUGFX%", NULL, 0, 0, 0, NULL, 0 }, 190 { 0x0, "Core", NULL, 0, 0, 0, NULL, 0 }, 191 { 0x0, "CPU", NULL, 0, 0, 0, NULL, 0 }, 192 { 0x0, "APIC", NULL, 0, 0, 0, NULL, 0 }, 193 { 0x0, "X2APIC", NULL, 0, 0, 0, NULL, 0 }, 194 { 0x0, "Die", NULL, 0, 0, 0, NULL, 0 }, 195 { 0x0, "GFXAMHz", NULL, 0, 0, 0, NULL, 0 }, 196 { 0x0, "IPC", NULL, 0, 0, 0, NULL, 0 }, 197 { 0x0, "CoreThr", NULL, 0, 0, 0, NULL, 0 }, 198 { 0x0, "UncMHz", NULL, 0, 0, 0, NULL, 0 }, 199 { 0x0, "SAM%mc6", NULL, 0, 0, 0, NULL, 0 }, 200 { 0x0, "SAMMHz", NULL, 0, 0, 0, NULL, 0 }, 201 { 0x0, "SAMAMHz", NULL, 0, 0, 0, NULL, 0 }, 202 { 0x0, "Die%c6", NULL, 0, 0, 0, NULL, 0 }, 203 { 0x0, "SysWatt", NULL, 0, 0, 0, NULL, 0 }, 204 { 0x0, "Sys_J", NULL, 0, 0, 0, NULL, 0 }, 205 }; 206 207 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) 208 #define BIC_USEC (1ULL << 0) 209 #define BIC_TOD (1ULL << 1) 210 #define BIC_Package (1ULL << 2) 211 #define BIC_Node (1ULL << 3) 212 #define BIC_Avg_MHz (1ULL << 4) 213 #define BIC_Busy (1ULL << 5) 214 #define BIC_Bzy_MHz (1ULL << 6) 215 #define BIC_TSC_MHz (1ULL << 7) 216 #define BIC_IRQ (1ULL << 8) 217 #define BIC_SMI (1ULL << 9) 218 #define BIC_sysfs (1ULL << 10) 219 #define BIC_CPU_c1 (1ULL << 11) 220 #define BIC_CPU_c3 (1ULL << 12) 221 #define BIC_CPU_c6 (1ULL << 13) 222 #define BIC_CPU_c7 (1ULL << 14) 223 #define BIC_ThreadC (1ULL << 15) 224 #define BIC_CoreTmp (1ULL << 16) 225 #define BIC_CoreCnt (1ULL << 17) 226 #define BIC_PkgTmp (1ULL << 18) 227 #define BIC_GFX_rc6 (1ULL << 19) 228 #define BIC_GFXMHz (1ULL << 20) 229 #define BIC_Pkgpc2 (1ULL << 21) 230 #define BIC_Pkgpc3 (1ULL << 22) 231 #define BIC_Pkgpc6 (1ULL << 23) 232 #define BIC_Pkgpc7 (1ULL << 24) 233 #define BIC_Pkgpc8 (1ULL << 25) 234 #define BIC_Pkgpc9 (1ULL << 26) 235 #define BIC_Pkgpc10 (1ULL << 27) 236 #define BIC_CPU_LPI (1ULL << 28) 237 #define BIC_SYS_LPI (1ULL << 29) 238 #define BIC_PkgWatt (1ULL << 30) 239 #define BIC_CorWatt (1ULL << 31) 240 #define BIC_GFXWatt (1ULL << 32) 241 #define BIC_PkgCnt (1ULL << 33) 242 #define BIC_RAMWatt (1ULL << 34) 243 #define BIC_PKG__ (1ULL << 35) 244 #define BIC_RAM__ (1ULL << 36) 245 #define BIC_Pkg_J (1ULL << 37) 246 #define BIC_Cor_J (1ULL << 38) 247 #define BIC_GFX_J (1ULL << 39) 248 #define BIC_RAM_J (1ULL << 40) 249 #define BIC_Mod_c6 (1ULL << 41) 250 #define BIC_Totl_c0 (1ULL << 42) 251 #define BIC_Any_c0 (1ULL << 43) 252 #define BIC_GFX_c0 (1ULL << 44) 253 #define BIC_CPUGFX (1ULL << 45) 254 #define BIC_Core (1ULL << 46) 255 #define BIC_CPU (1ULL << 47) 256 #define BIC_APIC (1ULL << 48) 257 #define BIC_X2APIC (1ULL << 49) 258 #define BIC_Die (1ULL << 50) 259 #define BIC_GFXACTMHz (1ULL << 51) 260 #define BIC_IPC (1ULL << 52) 261 #define BIC_CORE_THROT_CNT (1ULL << 53) 262 #define BIC_UNCORE_MHZ (1ULL << 54) 263 #define BIC_SAM_mc6 (1ULL << 55) 264 #define BIC_SAMMHz (1ULL << 56) 265 #define BIC_SAMACTMHz (1ULL << 57) 266 #define BIC_Diec6 (1ULL << 58) 267 #define BIC_SysWatt (1ULL << 59) 268 #define BIC_Sys_J (1ULL << 60) 269 270 #define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die ) 271 #define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__) 272 #define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ) 273 #define BIC_IDLE (BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) 274 #define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) 275 276 #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC | BIC_SysWatt | BIC_Sys_J) 277 278 unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT); 279 unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC; 280 281 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME) 282 #define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME) 283 #define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME) 284 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT) 285 #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT) 286 #define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT) 287 288 /* 289 * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit: 290 * If you change the values, note they are used both in comparisons 291 * (>= PCL__7) and to index pkg_cstate_limit_strings[]. 292 */ 293 #define PCLUKN 0 /* Unknown */ 294 #define PCLRSV 1 /* Reserved */ 295 #define PCL__0 2 /* PC0 */ 296 #define PCL__1 3 /* PC1 */ 297 #define PCL__2 4 /* PC2 */ 298 #define PCL__3 5 /* PC3 */ 299 #define PCL__4 6 /* PC4 */ 300 #define PCL__6 7 /* PC6 */ 301 #define PCL_6N 8 /* PC6 No Retention */ 302 #define PCL_6R 9 /* PC6 Retention */ 303 #define PCL__7 10 /* PC7 */ 304 #define PCL_7S 11 /* PC7 Shrink */ 305 #define PCL__8 12 /* PC8 */ 306 #define PCL__9 13 /* PC9 */ 307 #define PCL_10 14 /* PC10 */ 308 #define PCLUNL 15 /* Unlimited */ 309 310 struct amperf_group_fd; 311 312 char *proc_stat = "/proc/stat"; 313 FILE *outf; 314 int *fd_percpu; 315 int *fd_instr_count_percpu; 316 struct timeval interval_tv = { 5, 0 }; 317 struct timespec interval_ts = { 5, 0 }; 318 319 unsigned int num_iterations; 320 unsigned int header_iterations; 321 unsigned int debug; 322 unsigned int quiet; 323 unsigned int shown; 324 unsigned int sums_need_wide_columns; 325 unsigned int rapl_joules; 326 unsigned int summary_only; 327 unsigned int list_header_only; 328 unsigned int dump_only; 329 unsigned int has_aperf; 330 unsigned int has_aperf_access; 331 unsigned int has_epb; 332 unsigned int has_turbo; 333 unsigned int is_hybrid; 334 unsigned int units = 1000000; /* MHz etc */ 335 unsigned int genuine_intel; 336 unsigned int authentic_amd; 337 unsigned int hygon_genuine; 338 unsigned int max_level, max_extended_level; 339 unsigned int has_invariant_tsc; 340 unsigned int aperf_mperf_multiplier = 1; 341 double bclk; 342 double base_hz; 343 unsigned int has_base_hz; 344 double tsc_tweak = 1.0; 345 unsigned int show_pkg_only; 346 unsigned int show_core_only; 347 char *output_buffer, *outp; 348 unsigned int do_dts; 349 unsigned int do_ptm; 350 unsigned int do_ipc; 351 unsigned long long cpuidle_cur_cpu_lpi_us; 352 unsigned long long cpuidle_cur_sys_lpi_us; 353 unsigned int tj_max; 354 unsigned int tj_max_override; 355 double rapl_power_units, rapl_time_units; 356 double rapl_dram_energy_units, rapl_energy_units; 357 double rapl_joule_counter_range; 358 unsigned int crystal_hz; 359 unsigned long long tsc_hz; 360 int base_cpu; 361 unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */ 362 /* IA32_HWP_REQUEST, IA32_HWP_STATUS */ 363 unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */ 364 unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */ 365 unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */ 366 unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */ 367 unsigned int first_counter_read = 1; 368 int ignore_stdin; 369 bool no_msr; 370 bool no_perf; 371 372 enum gfx_sysfs_idx { 373 GFX_rc6, 374 GFX_MHz, 375 GFX_ACTMHz, 376 SAM_mc6, 377 SAM_MHz, 378 SAM_ACTMHz, 379 GFX_MAX 380 }; 381 382 struct gfx_sysfs_info { 383 FILE *fp; 384 unsigned int val; 385 unsigned long long val_ull; 386 }; 387 388 static struct gfx_sysfs_info gfx_info[GFX_MAX]; 389 390 int get_msr(int cpu, off_t offset, unsigned long long *msr); 391 int add_counter(unsigned int msr_num, char *path, char *name, 392 unsigned int width, enum counter_scope scope, 393 enum counter_type type, enum counter_format format, int flags, int package_num); 394 395 /* Model specific support Start */ 396 397 /* List of features that may diverge among different platforms */ 398 struct platform_features { 399 bool has_msr_misc_feature_control; /* MSR_MISC_FEATURE_CONTROL */ 400 bool has_msr_misc_pwr_mgmt; /* MSR_MISC_PWR_MGMT */ 401 bool has_nhm_msrs; /* MSR_PLATFORM_INFO, MSR_IA32_TEMPERATURE_TARGET, MSR_SMI_COUNT, MSR_PKG_CST_CONFIG_CONTROL, MSR_IA32_POWER_CTL, TRL MSRs */ 402 bool has_config_tdp; /* MSR_CONFIG_TDP_NOMINAL/LEVEL_1/LEVEL_2/CONTROL, MSR_TURBO_ACTIVATION_RATIO */ 403 int bclk_freq; /* CPU base clock */ 404 int crystal_freq; /* Crystal clock to use when not available from CPUID.15 */ 405 int supported_cstates; /* Core cstates and Package cstates supported */ 406 int cst_limit; /* MSR_PKG_CST_CONFIG_CONTROL */ 407 bool has_cst_auto_convension; /* AUTOMATIC_CSTATE_CONVERSION bit in MSR_PKG_CST_CONFIG_CONTROL */ 408 bool has_irtl_msrs; /* MSR_PKGC3/PKGC6/PKGC7/PKGC8/PKGC9/PKGC10_IRTL */ 409 bool has_msr_core_c1_res; /* MSR_CORE_C1_RES */ 410 bool has_msr_module_c6_res_ms; /* MSR_MODULE_C6_RES_MS */ 411 bool has_msr_c6_demotion_policy_config; /* MSR_CC6_DEMOTION_POLICY_CONFIG/MSR_MC6_DEMOTION_POLICY_CONFIG */ 412 bool has_msr_atom_pkg_c6_residency; /* MSR_ATOM_PKG_C6_RESIDENCY */ 413 bool has_msr_knl_core_c6_residency; /* MSR_KNL_CORE_C6_RESIDENCY */ 414 bool has_ext_cst_msrs; /* MSR_PKG_WEIGHTED_CORE_C0_RES/MSR_PKG_ANY_CORE_C0_RES/MSR_PKG_ANY_GFXE_C0_RES/MSR_PKG_BOTH_CORE_GFXE_C0_RES */ 415 bool has_cst_prewake_bit; /* Cstate prewake bit in MSR_IA32_POWER_CTL */ 416 int trl_msrs; /* MSR_TURBO_RATIO_LIMIT/LIMIT1/LIMIT2/SECONDARY, Atom TRL MSRs */ 417 int plr_msrs; /* MSR_CORE/GFX/RING_PERF_LIMIT_REASONS */ 418 int rapl_msrs; /* RAPL PKG/DRAM/CORE/GFX MSRs, AMD RAPL MSRs */ 419 bool has_per_core_rapl; /* Indicates cores energy collection is per-core, not per-package. AMD specific for now */ 420 bool has_rapl_divisor; /* Divisor for Energy unit raw value from MSR_RAPL_POWER_UNIT */ 421 bool has_fixed_rapl_unit; /* Fixed Energy Unit used for DRAM RAPL Domain */ 422 int rapl_quirk_tdp; /* Hardcoded TDP value when cannot be retrieved from hardware */ 423 int tcc_offset_bits; /* TCC Offset bits in MSR_IA32_TEMPERATURE_TARGET */ 424 bool enable_tsc_tweak; /* Use CPU Base freq instead of TSC freq for aperf/mperf counter */ 425 bool need_perf_multiplier; /* mperf/aperf multiplier */ 426 }; 427 428 struct platform_data { 429 unsigned int vfm; 430 const struct platform_features *features; 431 }; 432 433 /* For BCLK */ 434 enum bclk_freq { 435 BCLK_100MHZ = 1, 436 BCLK_133MHZ, 437 BCLK_SLV, 438 }; 439 440 #define SLM_BCLK_FREQS 5 441 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 }; 442 443 double slm_bclk(void) 444 { 445 unsigned long long msr = 3; 446 unsigned int i; 447 double freq; 448 449 if (get_msr(base_cpu, MSR_FSB_FREQ, &msr)) 450 fprintf(outf, "SLM BCLK: unknown\n"); 451 452 i = msr & 0xf; 453 if (i >= SLM_BCLK_FREQS) { 454 fprintf(outf, "SLM BCLK[%d] invalid\n", i); 455 i = 3; 456 } 457 freq = slm_freq_table[i]; 458 459 if (!quiet) 460 fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq); 461 462 return freq; 463 } 464 465 /* For Package cstate limit */ 466 enum package_cstate_limit { 467 CST_LIMIT_NHM = 1, 468 CST_LIMIT_SNB, 469 CST_LIMIT_HSW, 470 CST_LIMIT_SKX, 471 CST_LIMIT_ICX, 472 CST_LIMIT_SLV, 473 CST_LIMIT_AMT, 474 CST_LIMIT_KNL, 475 CST_LIMIT_GMT, 476 }; 477 478 /* For Turbo Ratio Limit MSRs */ 479 enum turbo_ratio_limit_msrs { 480 TRL_BASE = BIT(0), 481 TRL_LIMIT1 = BIT(1), 482 TRL_LIMIT2 = BIT(2), 483 TRL_ATOM = BIT(3), 484 TRL_KNL = BIT(4), 485 TRL_CORECOUNT = BIT(5), 486 }; 487 488 /* For Perf Limit Reason MSRs */ 489 enum perf_limit_reason_msrs { 490 PLR_CORE = BIT(0), 491 PLR_GFX = BIT(1), 492 PLR_RING = BIT(2), 493 }; 494 495 /* For RAPL MSRs */ 496 enum rapl_msrs { 497 RAPL_PKG_POWER_LIMIT = BIT(0), /* 0x610 MSR_PKG_POWER_LIMIT */ 498 RAPL_PKG_ENERGY_STATUS = BIT(1), /* 0x611 MSR_PKG_ENERGY_STATUS */ 499 RAPL_PKG_PERF_STATUS = BIT(2), /* 0x613 MSR_PKG_PERF_STATUS */ 500 RAPL_PKG_POWER_INFO = BIT(3), /* 0x614 MSR_PKG_POWER_INFO */ 501 RAPL_DRAM_POWER_LIMIT = BIT(4), /* 0x618 MSR_DRAM_POWER_LIMIT */ 502 RAPL_DRAM_ENERGY_STATUS = BIT(5), /* 0x619 MSR_DRAM_ENERGY_STATUS */ 503 RAPL_DRAM_PERF_STATUS = BIT(6), /* 0x61b MSR_DRAM_PERF_STATUS */ 504 RAPL_DRAM_POWER_INFO = BIT(7), /* 0x61c MSR_DRAM_POWER_INFO */ 505 RAPL_CORE_POWER_LIMIT = BIT(8), /* 0x638 MSR_PP0_POWER_LIMIT */ 506 RAPL_CORE_ENERGY_STATUS = BIT(9), /* 0x639 MSR_PP0_ENERGY_STATUS */ 507 RAPL_CORE_POLICY = BIT(10), /* 0x63a MSR_PP0_POLICY */ 508 RAPL_GFX_POWER_LIMIT = BIT(11), /* 0x640 MSR_PP1_POWER_LIMIT */ 509 RAPL_GFX_ENERGY_STATUS = BIT(12), /* 0x641 MSR_PP1_ENERGY_STATUS */ 510 RAPL_GFX_POLICY = BIT(13), /* 0x642 MSR_PP1_POLICY */ 511 RAPL_AMD_PWR_UNIT = BIT(14), /* 0xc0010299 MSR_AMD_RAPL_POWER_UNIT */ 512 RAPL_AMD_CORE_ENERGY_STAT = BIT(15), /* 0xc001029a MSR_AMD_CORE_ENERGY_STATUS */ 513 RAPL_AMD_PKG_ENERGY_STAT = BIT(16), /* 0xc001029b MSR_AMD_PKG_ENERGY_STATUS */ 514 RAPL_PLATFORM_ENERGY_LIMIT = BIT(17), /* 0x64c MSR_PLATFORM_ENERGY_LIMIT */ 515 RAPL_PLATFORM_ENERGY_STATUS = BIT(18), /* 0x64d MSR_PLATFORM_ENERGY_STATUS */ 516 }; 517 518 #define RAPL_PKG (RAPL_PKG_ENERGY_STATUS | RAPL_PKG_POWER_LIMIT) 519 #define RAPL_DRAM (RAPL_DRAM_ENERGY_STATUS | RAPL_DRAM_POWER_LIMIT) 520 #define RAPL_CORE (RAPL_CORE_ENERGY_STATUS | RAPL_CORE_POWER_LIMIT) 521 #define RAPL_GFX (RAPL_GFX_POWER_LIMIT | RAPL_GFX_ENERGY_STATUS) 522 #define RAPL_PSYS (RAPL_PLATFORM_ENERGY_STATUS | RAPL_PLATFORM_ENERGY_LIMIT) 523 524 #define RAPL_PKG_ALL (RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO) 525 #define RAPL_DRAM_ALL (RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_DRAM_POWER_INFO) 526 #define RAPL_CORE_ALL (RAPL_CORE | RAPL_CORE_POLICY) 527 #define RAPL_GFX_ALL (RAPL_GFX | RAPL_GFX_POLIGY) 528 529 #define RAPL_AMD_F17H (RAPL_AMD_PWR_UNIT | RAPL_AMD_CORE_ENERGY_STAT | RAPL_AMD_PKG_ENERGY_STAT) 530 531 /* For Cstates */ 532 enum cstates { 533 CC1 = BIT(0), 534 CC3 = BIT(1), 535 CC6 = BIT(2), 536 CC7 = BIT(3), 537 PC2 = BIT(4), 538 PC3 = BIT(5), 539 PC6 = BIT(6), 540 PC7 = BIT(7), 541 PC8 = BIT(8), 542 PC9 = BIT(9), 543 PC10 = BIT(10), 544 }; 545 546 static const struct platform_features nhm_features = { 547 .has_msr_misc_pwr_mgmt = 1, 548 .has_nhm_msrs = 1, 549 .bclk_freq = BCLK_133MHZ, 550 .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6, 551 .cst_limit = CST_LIMIT_NHM, 552 .trl_msrs = TRL_BASE, 553 }; 554 555 static const struct platform_features nhx_features = { 556 .has_msr_misc_pwr_mgmt = 1, 557 .has_nhm_msrs = 1, 558 .bclk_freq = BCLK_133MHZ, 559 .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6, 560 .cst_limit = CST_LIMIT_NHM, 561 }; 562 563 static const struct platform_features snb_features = { 564 .has_msr_misc_feature_control = 1, 565 .has_msr_misc_pwr_mgmt = 1, 566 .has_nhm_msrs = 1, 567 .bclk_freq = BCLK_100MHZ, 568 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 569 .cst_limit = CST_LIMIT_SNB, 570 .has_irtl_msrs = 1, 571 .trl_msrs = TRL_BASE, 572 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 573 }; 574 575 static const struct platform_features snx_features = { 576 .has_msr_misc_feature_control = 1, 577 .has_msr_misc_pwr_mgmt = 1, 578 .has_nhm_msrs = 1, 579 .bclk_freq = BCLK_100MHZ, 580 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 581 .cst_limit = CST_LIMIT_SNB, 582 .has_irtl_msrs = 1, 583 .trl_msrs = TRL_BASE, 584 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL, 585 }; 586 587 static const struct platform_features ivb_features = { 588 .has_msr_misc_feature_control = 1, 589 .has_msr_misc_pwr_mgmt = 1, 590 .has_nhm_msrs = 1, 591 .has_config_tdp = 1, 592 .bclk_freq = BCLK_100MHZ, 593 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 594 .cst_limit = CST_LIMIT_SNB, 595 .has_irtl_msrs = 1, 596 .trl_msrs = TRL_BASE, 597 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 598 }; 599 600 static const struct platform_features ivx_features = { 601 .has_msr_misc_feature_control = 1, 602 .has_msr_misc_pwr_mgmt = 1, 603 .has_nhm_msrs = 1, 604 .bclk_freq = BCLK_100MHZ, 605 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 606 .cst_limit = CST_LIMIT_SNB, 607 .has_irtl_msrs = 1, 608 .trl_msrs = TRL_BASE | TRL_LIMIT1, 609 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL, 610 }; 611 612 static const struct platform_features hsw_features = { 613 .has_msr_misc_feature_control = 1, 614 .has_msr_misc_pwr_mgmt = 1, 615 .has_nhm_msrs = 1, 616 .has_config_tdp = 1, 617 .bclk_freq = BCLK_100MHZ, 618 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 619 .cst_limit = CST_LIMIT_HSW, 620 .has_irtl_msrs = 1, 621 .trl_msrs = TRL_BASE, 622 .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, 623 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 624 }; 625 626 static const struct platform_features hsx_features = { 627 .has_msr_misc_feature_control = 1, 628 .has_msr_misc_pwr_mgmt = 1, 629 .has_nhm_msrs = 1, 630 .has_config_tdp = 1, 631 .bclk_freq = BCLK_100MHZ, 632 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 633 .cst_limit = CST_LIMIT_HSW, 634 .has_irtl_msrs = 1, 635 .trl_msrs = TRL_BASE | TRL_LIMIT1 | TRL_LIMIT2, 636 .plr_msrs = PLR_CORE | PLR_RING, 637 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 638 .has_fixed_rapl_unit = 1, 639 }; 640 641 static const struct platform_features hswl_features = { 642 .has_msr_misc_feature_control = 1, 643 .has_msr_misc_pwr_mgmt = 1, 644 .has_nhm_msrs = 1, 645 .has_config_tdp = 1, 646 .bclk_freq = BCLK_100MHZ, 647 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 648 .cst_limit = CST_LIMIT_HSW, 649 .has_irtl_msrs = 1, 650 .trl_msrs = TRL_BASE, 651 .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, 652 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 653 }; 654 655 static const struct platform_features hswg_features = { 656 .has_msr_misc_feature_control = 1, 657 .has_msr_misc_pwr_mgmt = 1, 658 .has_nhm_msrs = 1, 659 .has_config_tdp = 1, 660 .bclk_freq = BCLK_100MHZ, 661 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 662 .cst_limit = CST_LIMIT_HSW, 663 .has_irtl_msrs = 1, 664 .trl_msrs = TRL_BASE, 665 .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, 666 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 667 }; 668 669 static const struct platform_features bdw_features = { 670 .has_msr_misc_feature_control = 1, 671 .has_msr_misc_pwr_mgmt = 1, 672 .has_nhm_msrs = 1, 673 .has_config_tdp = 1, 674 .bclk_freq = BCLK_100MHZ, 675 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 676 .cst_limit = CST_LIMIT_HSW, 677 .has_irtl_msrs = 1, 678 .trl_msrs = TRL_BASE, 679 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 680 }; 681 682 static const struct platform_features bdwg_features = { 683 .has_msr_misc_feature_control = 1, 684 .has_msr_misc_pwr_mgmt = 1, 685 .has_nhm_msrs = 1, 686 .has_config_tdp = 1, 687 .bclk_freq = BCLK_100MHZ, 688 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 689 .cst_limit = CST_LIMIT_HSW, 690 .has_irtl_msrs = 1, 691 .trl_msrs = TRL_BASE, 692 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 693 }; 694 695 static const struct platform_features bdx_features = { 696 .has_msr_misc_feature_control = 1, 697 .has_msr_misc_pwr_mgmt = 1, 698 .has_nhm_msrs = 1, 699 .has_config_tdp = 1, 700 .bclk_freq = BCLK_100MHZ, 701 .supported_cstates = CC1 | CC3 | CC6 | PC2 | PC3 | PC6, 702 .cst_limit = CST_LIMIT_HSW, 703 .has_irtl_msrs = 1, 704 .has_cst_auto_convension = 1, 705 .trl_msrs = TRL_BASE, 706 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 707 .has_fixed_rapl_unit = 1, 708 }; 709 710 static const struct platform_features skl_features = { 711 .has_msr_misc_feature_control = 1, 712 .has_msr_misc_pwr_mgmt = 1, 713 .has_nhm_msrs = 1, 714 .has_config_tdp = 1, 715 .bclk_freq = BCLK_100MHZ, 716 .crystal_freq = 24000000, 717 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 718 .cst_limit = CST_LIMIT_HSW, 719 .has_irtl_msrs = 1, 720 .has_ext_cst_msrs = 1, 721 .trl_msrs = TRL_BASE, 722 .tcc_offset_bits = 6, 723 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS, 724 .enable_tsc_tweak = 1, 725 }; 726 727 static const struct platform_features cnl_features = { 728 .has_msr_misc_feature_control = 1, 729 .has_msr_misc_pwr_mgmt = 1, 730 .has_nhm_msrs = 1, 731 .has_config_tdp = 1, 732 .bclk_freq = BCLK_100MHZ, 733 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 734 .cst_limit = CST_LIMIT_HSW, 735 .has_irtl_msrs = 1, 736 .has_msr_core_c1_res = 1, 737 .has_ext_cst_msrs = 1, 738 .trl_msrs = TRL_BASE, 739 .tcc_offset_bits = 6, 740 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS, 741 .enable_tsc_tweak = 1, 742 }; 743 744 /* Copied from cnl_features, with PC7/PC9 removed */ 745 static const struct platform_features adl_features = { 746 .has_msr_misc_feature_control = cnl_features.has_msr_misc_feature_control, 747 .has_msr_misc_pwr_mgmt = cnl_features.has_msr_misc_pwr_mgmt, 748 .has_nhm_msrs = cnl_features.has_nhm_msrs, 749 .has_config_tdp = cnl_features.has_config_tdp, 750 .bclk_freq = cnl_features.bclk_freq, 751 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC8 | PC10, 752 .cst_limit = cnl_features.cst_limit, 753 .has_irtl_msrs = cnl_features.has_irtl_msrs, 754 .has_msr_core_c1_res = cnl_features.has_msr_core_c1_res, 755 .has_ext_cst_msrs = cnl_features.has_ext_cst_msrs, 756 .trl_msrs = cnl_features.trl_msrs, 757 .tcc_offset_bits = cnl_features.tcc_offset_bits, 758 .rapl_msrs = cnl_features.rapl_msrs, 759 .enable_tsc_tweak = cnl_features.enable_tsc_tweak, 760 }; 761 762 /* Copied from adl_features, with PC3/PC8 removed */ 763 static const struct platform_features lnl_features = { 764 .has_msr_misc_feature_control = adl_features.has_msr_misc_feature_control, 765 .has_msr_misc_pwr_mgmt = adl_features.has_msr_misc_pwr_mgmt, 766 .has_nhm_msrs = adl_features.has_nhm_msrs, 767 .has_config_tdp = adl_features.has_config_tdp, 768 .bclk_freq = adl_features.bclk_freq, 769 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC6 | PC10, 770 .cst_limit = adl_features.cst_limit, 771 .has_irtl_msrs = adl_features.has_irtl_msrs, 772 .has_msr_core_c1_res = adl_features.has_msr_core_c1_res, 773 .has_ext_cst_msrs = adl_features.has_ext_cst_msrs, 774 .trl_msrs = adl_features.trl_msrs, 775 .tcc_offset_bits = adl_features.tcc_offset_bits, 776 .rapl_msrs = adl_features.rapl_msrs, 777 .enable_tsc_tweak = adl_features.enable_tsc_tweak, 778 }; 779 780 static const struct platform_features skx_features = { 781 .has_msr_misc_feature_control = 1, 782 .has_msr_misc_pwr_mgmt = 1, 783 .has_nhm_msrs = 1, 784 .has_config_tdp = 1, 785 .bclk_freq = BCLK_100MHZ, 786 .supported_cstates = CC1 | CC6 | PC2 | PC6, 787 .cst_limit = CST_LIMIT_SKX, 788 .has_irtl_msrs = 1, 789 .has_cst_auto_convension = 1, 790 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 791 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 792 .has_fixed_rapl_unit = 1, 793 }; 794 795 static const struct platform_features icx_features = { 796 .has_msr_misc_feature_control = 1, 797 .has_msr_misc_pwr_mgmt = 1, 798 .has_nhm_msrs = 1, 799 .has_config_tdp = 1, 800 .bclk_freq = BCLK_100MHZ, 801 .supported_cstates = CC1 | CC6 | PC2 | PC6, 802 .cst_limit = CST_LIMIT_ICX, 803 .has_msr_core_c1_res = 1, 804 .has_irtl_msrs = 1, 805 .has_cst_prewake_bit = 1, 806 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 807 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, 808 .has_fixed_rapl_unit = 1, 809 }; 810 811 static const struct platform_features spr_features = { 812 .has_msr_misc_feature_control = 1, 813 .has_msr_misc_pwr_mgmt = 1, 814 .has_nhm_msrs = 1, 815 .has_config_tdp = 1, 816 .bclk_freq = BCLK_100MHZ, 817 .supported_cstates = CC1 | CC6 | PC2 | PC6, 818 .cst_limit = CST_LIMIT_SKX, 819 .has_msr_core_c1_res = 1, 820 .has_irtl_msrs = 1, 821 .has_cst_prewake_bit = 1, 822 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 823 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, 824 }; 825 826 static const struct platform_features srf_features = { 827 .has_msr_misc_feature_control = 1, 828 .has_msr_misc_pwr_mgmt = 1, 829 .has_nhm_msrs = 1, 830 .has_config_tdp = 1, 831 .bclk_freq = BCLK_100MHZ, 832 .supported_cstates = CC1 | CC6 | PC2 | PC6, 833 .cst_limit = CST_LIMIT_SKX, 834 .has_msr_core_c1_res = 1, 835 .has_msr_module_c6_res_ms = 1, 836 .has_irtl_msrs = 1, 837 .has_cst_prewake_bit = 1, 838 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 839 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, 840 }; 841 842 static const struct platform_features grr_features = { 843 .has_msr_misc_feature_control = 1, 844 .has_msr_misc_pwr_mgmt = 1, 845 .has_nhm_msrs = 1, 846 .has_config_tdp = 1, 847 .bclk_freq = BCLK_100MHZ, 848 .supported_cstates = CC1 | CC6, 849 .cst_limit = CST_LIMIT_SKX, 850 .has_msr_core_c1_res = 1, 851 .has_msr_module_c6_res_ms = 1, 852 .has_irtl_msrs = 1, 853 .has_cst_prewake_bit = 1, 854 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 855 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, 856 }; 857 858 static const struct platform_features slv_features = { 859 .has_nhm_msrs = 1, 860 .bclk_freq = BCLK_SLV, 861 .supported_cstates = CC1 | CC6 | PC6, 862 .cst_limit = CST_LIMIT_SLV, 863 .has_msr_core_c1_res = 1, 864 .has_msr_module_c6_res_ms = 1, 865 .has_msr_c6_demotion_policy_config = 1, 866 .has_msr_atom_pkg_c6_residency = 1, 867 .trl_msrs = TRL_ATOM, 868 .rapl_msrs = RAPL_PKG | RAPL_CORE, 869 .has_rapl_divisor = 1, 870 .rapl_quirk_tdp = 30, 871 }; 872 873 static const struct platform_features slvd_features = { 874 .has_msr_misc_pwr_mgmt = 1, 875 .has_nhm_msrs = 1, 876 .bclk_freq = BCLK_SLV, 877 .supported_cstates = CC1 | CC6 | PC3 | PC6, 878 .cst_limit = CST_LIMIT_SLV, 879 .has_msr_atom_pkg_c6_residency = 1, 880 .trl_msrs = TRL_BASE, 881 .rapl_msrs = RAPL_PKG | RAPL_CORE, 882 .rapl_quirk_tdp = 30, 883 }; 884 885 static const struct platform_features amt_features = { 886 .has_nhm_msrs = 1, 887 .bclk_freq = BCLK_133MHZ, 888 .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6, 889 .cst_limit = CST_LIMIT_AMT, 890 .trl_msrs = TRL_BASE, 891 }; 892 893 static const struct platform_features gmt_features = { 894 .has_msr_misc_pwr_mgmt = 1, 895 .has_nhm_msrs = 1, 896 .bclk_freq = BCLK_100MHZ, 897 .crystal_freq = 19200000, 898 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 899 .cst_limit = CST_LIMIT_GMT, 900 .has_irtl_msrs = 1, 901 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 902 .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO, 903 }; 904 905 static const struct platform_features gmtd_features = { 906 .has_msr_misc_pwr_mgmt = 1, 907 .has_nhm_msrs = 1, 908 .bclk_freq = BCLK_100MHZ, 909 .crystal_freq = 25000000, 910 .supported_cstates = CC1 | CC6 | PC2 | PC6, 911 .cst_limit = CST_LIMIT_GMT, 912 .has_irtl_msrs = 1, 913 .has_msr_core_c1_res = 1, 914 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 915 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_CORE_ENERGY_STATUS, 916 }; 917 918 static const struct platform_features gmtp_features = { 919 .has_msr_misc_pwr_mgmt = 1, 920 .has_nhm_msrs = 1, 921 .bclk_freq = BCLK_100MHZ, 922 .crystal_freq = 19200000, 923 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 924 .cst_limit = CST_LIMIT_GMT, 925 .has_irtl_msrs = 1, 926 .trl_msrs = TRL_BASE, 927 .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO, 928 }; 929 930 static const struct platform_features tmt_features = { 931 .has_msr_misc_pwr_mgmt = 1, 932 .has_nhm_msrs = 1, 933 .bclk_freq = BCLK_100MHZ, 934 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 935 .cst_limit = CST_LIMIT_GMT, 936 .has_irtl_msrs = 1, 937 .trl_msrs = TRL_BASE, 938 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, 939 .enable_tsc_tweak = 1, 940 }; 941 942 static const struct platform_features tmtd_features = { 943 .has_msr_misc_pwr_mgmt = 1, 944 .has_nhm_msrs = 1, 945 .bclk_freq = BCLK_100MHZ, 946 .supported_cstates = CC1 | CC6, 947 .cst_limit = CST_LIMIT_GMT, 948 .has_irtl_msrs = 1, 949 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 950 .rapl_msrs = RAPL_PKG_ALL, 951 }; 952 953 static const struct platform_features knl_features = { 954 .has_msr_misc_pwr_mgmt = 1, 955 .has_nhm_msrs = 1, 956 .has_config_tdp = 1, 957 .bclk_freq = BCLK_100MHZ, 958 .supported_cstates = CC1 | CC6 | PC3 | PC6, 959 .cst_limit = CST_LIMIT_KNL, 960 .has_msr_knl_core_c6_residency = 1, 961 .trl_msrs = TRL_KNL, 962 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 963 .has_fixed_rapl_unit = 1, 964 .need_perf_multiplier = 1, 965 }; 966 967 static const struct platform_features default_features = { 968 }; 969 970 static const struct platform_features amd_features_with_rapl = { 971 .rapl_msrs = RAPL_AMD_F17H, 972 .has_per_core_rapl = 1, 973 .rapl_quirk_tdp = 280, /* This is the max stock TDP of HEDT/Server Fam17h+ chips */ 974 }; 975 976 static const struct platform_data turbostat_pdata[] = { 977 { INTEL_NEHALEM, &nhm_features }, 978 { INTEL_NEHALEM_G, &nhm_features }, 979 { INTEL_NEHALEM_EP, &nhm_features }, 980 { INTEL_NEHALEM_EX, &nhx_features }, 981 { INTEL_WESTMERE, &nhm_features }, 982 { INTEL_WESTMERE_EP, &nhm_features }, 983 { INTEL_WESTMERE_EX, &nhx_features }, 984 { INTEL_SANDYBRIDGE, &snb_features }, 985 { INTEL_SANDYBRIDGE_X, &snx_features }, 986 { INTEL_IVYBRIDGE, &ivb_features }, 987 { INTEL_IVYBRIDGE_X, &ivx_features }, 988 { INTEL_HASWELL, &hsw_features }, 989 { INTEL_HASWELL_X, &hsx_features }, 990 { INTEL_HASWELL_L, &hswl_features }, 991 { INTEL_HASWELL_G, &hswg_features }, 992 { INTEL_BROADWELL, &bdw_features }, 993 { INTEL_BROADWELL_G, &bdwg_features }, 994 { INTEL_BROADWELL_X, &bdx_features }, 995 { INTEL_BROADWELL_D, &bdx_features }, 996 { INTEL_SKYLAKE_L, &skl_features }, 997 { INTEL_SKYLAKE, &skl_features }, 998 { INTEL_SKYLAKE_X, &skx_features }, 999 { INTEL_KABYLAKE_L, &skl_features }, 1000 { INTEL_KABYLAKE, &skl_features }, 1001 { INTEL_COMETLAKE, &skl_features }, 1002 { INTEL_COMETLAKE_L, &skl_features }, 1003 { INTEL_CANNONLAKE_L, &cnl_features }, 1004 { INTEL_ICELAKE_X, &icx_features }, 1005 { INTEL_ICELAKE_D, &icx_features }, 1006 { INTEL_ICELAKE_L, &cnl_features }, 1007 { INTEL_ICELAKE_NNPI, &cnl_features }, 1008 { INTEL_ROCKETLAKE, &cnl_features }, 1009 { INTEL_TIGERLAKE_L, &cnl_features }, 1010 { INTEL_TIGERLAKE, &cnl_features }, 1011 { INTEL_SAPPHIRERAPIDS_X, &spr_features }, 1012 { INTEL_EMERALDRAPIDS_X, &spr_features }, 1013 { INTEL_GRANITERAPIDS_X, &spr_features }, 1014 { INTEL_GRANITERAPIDS_D, &spr_features }, 1015 { INTEL_LAKEFIELD, &cnl_features }, 1016 { INTEL_ALDERLAKE, &adl_features }, 1017 { INTEL_ALDERLAKE_L, &adl_features }, 1018 { INTEL_RAPTORLAKE, &adl_features }, 1019 { INTEL_RAPTORLAKE_P, &adl_features }, 1020 { INTEL_RAPTORLAKE_S, &adl_features }, 1021 { INTEL_METEORLAKE, &adl_features }, 1022 { INTEL_METEORLAKE_L, &adl_features }, 1023 { INTEL_ARROWLAKE_H, &adl_features }, 1024 { INTEL_ARROWLAKE_U, &adl_features }, 1025 { INTEL_ARROWLAKE, &adl_features }, 1026 { INTEL_LUNARLAKE_M, &lnl_features }, 1027 { INTEL_ATOM_SILVERMONT, &slv_features }, 1028 { INTEL_ATOM_SILVERMONT_D, &slvd_features }, 1029 { INTEL_ATOM_AIRMONT, &amt_features }, 1030 { INTEL_ATOM_GOLDMONT, &gmt_features }, 1031 { INTEL_ATOM_GOLDMONT_D, &gmtd_features }, 1032 { INTEL_ATOM_GOLDMONT_PLUS, &gmtp_features }, 1033 { INTEL_ATOM_TREMONT_D, &tmtd_features }, 1034 { INTEL_ATOM_TREMONT, &tmt_features }, 1035 { INTEL_ATOM_TREMONT_L, &tmt_features }, 1036 { INTEL_ATOM_GRACEMONT, &adl_features }, 1037 { INTEL_ATOM_CRESTMONT_X, &srf_features }, 1038 { INTEL_ATOM_CRESTMONT, &grr_features }, 1039 { INTEL_XEON_PHI_KNL, &knl_features }, 1040 { INTEL_XEON_PHI_KNM, &knl_features }, 1041 /* 1042 * Missing support for 1043 * INTEL_ICELAKE 1044 * INTEL_ATOM_SILVERMONT_MID 1045 * INTEL_ATOM_AIRMONT_MID 1046 * INTEL_ATOM_AIRMONT_NP 1047 */ 1048 { 0, NULL }, 1049 }; 1050 1051 static const struct platform_features *platform; 1052 1053 void probe_platform_features(unsigned int family, unsigned int model) 1054 { 1055 int i; 1056 1057 platform = &default_features; 1058 1059 if (authentic_amd || hygon_genuine) { 1060 if (max_extended_level >= 0x80000007) { 1061 unsigned int eax, ebx, ecx, edx; 1062 1063 __cpuid(0x80000007, eax, ebx, ecx, edx); 1064 /* RAPL (Fam 17h+) */ 1065 if ((edx & (1 << 14)) && family >= 0x17) 1066 platform = &amd_features_with_rapl; 1067 } 1068 return; 1069 } 1070 1071 if (!genuine_intel) 1072 return; 1073 1074 for (i = 0; turbostat_pdata[i].features; i++) { 1075 if (VFM_FAMILY(turbostat_pdata[i].vfm) == family && VFM_MODEL(turbostat_pdata[i].vfm) == model) { 1076 platform = turbostat_pdata[i].features; 1077 return; 1078 } 1079 } 1080 } 1081 1082 /* Model specific support End */ 1083 1084 #define TJMAX_DEFAULT 100 1085 1086 /* MSRs that are not yet in the kernel-provided header. */ 1087 #define MSR_RAPL_PWR_UNIT 0xc0010299 1088 #define MSR_CORE_ENERGY_STAT 0xc001029a 1089 #define MSR_PKG_ENERGY_STAT 0xc001029b 1090 1091 #define MAX(a, b) ((a) > (b) ? (a) : (b)) 1092 1093 int backwards_count; 1094 char *progname; 1095 1096 #define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */ 1097 cpu_set_t *cpu_present_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset; 1098 size_t cpu_present_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size; 1099 #define MAX_ADDED_THREAD_COUNTERS 24 1100 #define MAX_ADDED_CORE_COUNTERS 8 1101 #define MAX_ADDED_PACKAGE_COUNTERS 16 1102 #define PMT_MAX_ADDED_THREAD_COUNTERS 24 1103 #define PMT_MAX_ADDED_CORE_COUNTERS 8 1104 #define PMT_MAX_ADDED_PACKAGE_COUNTERS 16 1105 #define BITMASK_SIZE 32 1106 1107 #define ZERO_ARRAY(arr) (memset(arr, 0, sizeof(arr)) + __must_be_array(arr)) 1108 1109 /* Indexes used to map data read from perf and MSRs into global variables */ 1110 enum rapl_rci_index { 1111 RAPL_RCI_INDEX_ENERGY_PKG = 0, 1112 RAPL_RCI_INDEX_ENERGY_CORES = 1, 1113 RAPL_RCI_INDEX_DRAM = 2, 1114 RAPL_RCI_INDEX_GFX = 3, 1115 RAPL_RCI_INDEX_PKG_PERF_STATUS = 4, 1116 RAPL_RCI_INDEX_DRAM_PERF_STATUS = 5, 1117 RAPL_RCI_INDEX_CORE_ENERGY = 6, 1118 RAPL_RCI_INDEX_ENERGY_PLATFORM = 7, 1119 NUM_RAPL_COUNTERS, 1120 }; 1121 1122 enum rapl_unit { 1123 RAPL_UNIT_INVALID, 1124 RAPL_UNIT_JOULES, 1125 RAPL_UNIT_WATTS, 1126 }; 1127 1128 struct rapl_counter_info_t { 1129 unsigned long long data[NUM_RAPL_COUNTERS]; 1130 enum counter_source source[NUM_RAPL_COUNTERS]; 1131 unsigned long long flags[NUM_RAPL_COUNTERS]; 1132 double scale[NUM_RAPL_COUNTERS]; 1133 enum rapl_unit unit[NUM_RAPL_COUNTERS]; 1134 unsigned long long msr[NUM_RAPL_COUNTERS]; 1135 unsigned long long msr_mask[NUM_RAPL_COUNTERS]; 1136 int msr_shift[NUM_RAPL_COUNTERS]; 1137 1138 int fd_perf; 1139 }; 1140 1141 /* struct rapl_counter_info_t for each RAPL domain */ 1142 struct rapl_counter_info_t *rapl_counter_info_perdomain; 1143 unsigned int rapl_counter_info_perdomain_size; 1144 1145 #define RAPL_COUNTER_FLAG_PLATFORM_COUNTER (1u << 0) 1146 #define RAPL_COUNTER_FLAG_USE_MSR_SUM (1u << 1) 1147 1148 struct rapl_counter_arch_info { 1149 int feature_mask; /* Mask for testing if the counter is supported on host */ 1150 const char *perf_subsys; 1151 const char *perf_name; 1152 unsigned long long msr; 1153 unsigned long long msr_mask; 1154 int msr_shift; /* Positive mean shift right, negative mean shift left */ 1155 double *platform_rapl_msr_scale; /* Scale applied to values read by MSR (platform dependent, filled at runtime) */ 1156 unsigned int rci_index; /* Maps data from perf counters to global variables */ 1157 unsigned long long bic; 1158 double compat_scale; /* Some counters require constant scaling to be in the same range as other, similar ones */ 1159 unsigned long long flags; 1160 }; 1161 1162 static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { 1163 { 1164 .feature_mask = RAPL_PKG, 1165 .perf_subsys = "power", 1166 .perf_name = "energy-pkg", 1167 .msr = MSR_PKG_ENERGY_STATUS, 1168 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1169 .msr_shift = 0, 1170 .platform_rapl_msr_scale = &rapl_energy_units, 1171 .rci_index = RAPL_RCI_INDEX_ENERGY_PKG, 1172 .bic = BIC_PkgWatt | BIC_Pkg_J, 1173 .compat_scale = 1.0, 1174 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1175 }, 1176 { 1177 .feature_mask = RAPL_AMD_F17H, 1178 .perf_subsys = "power", 1179 .perf_name = "energy-pkg", 1180 .msr = MSR_PKG_ENERGY_STAT, 1181 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1182 .msr_shift = 0, 1183 .platform_rapl_msr_scale = &rapl_energy_units, 1184 .rci_index = RAPL_RCI_INDEX_ENERGY_PKG, 1185 .bic = BIC_PkgWatt | BIC_Pkg_J, 1186 .compat_scale = 1.0, 1187 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1188 }, 1189 { 1190 .feature_mask = RAPL_CORE_ENERGY_STATUS, 1191 .perf_subsys = "power", 1192 .perf_name = "energy-cores", 1193 .msr = MSR_PP0_ENERGY_STATUS, 1194 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1195 .msr_shift = 0, 1196 .platform_rapl_msr_scale = &rapl_energy_units, 1197 .rci_index = RAPL_RCI_INDEX_ENERGY_CORES, 1198 .bic = BIC_CorWatt | BIC_Cor_J, 1199 .compat_scale = 1.0, 1200 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1201 }, 1202 { 1203 .feature_mask = RAPL_DRAM, 1204 .perf_subsys = "power", 1205 .perf_name = "energy-ram", 1206 .msr = MSR_DRAM_ENERGY_STATUS, 1207 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1208 .msr_shift = 0, 1209 .platform_rapl_msr_scale = &rapl_dram_energy_units, 1210 .rci_index = RAPL_RCI_INDEX_DRAM, 1211 .bic = BIC_RAMWatt | BIC_RAM_J, 1212 .compat_scale = 1.0, 1213 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1214 }, 1215 { 1216 .feature_mask = RAPL_GFX, 1217 .perf_subsys = "power", 1218 .perf_name = "energy-gpu", 1219 .msr = MSR_PP1_ENERGY_STATUS, 1220 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1221 .msr_shift = 0, 1222 .platform_rapl_msr_scale = &rapl_energy_units, 1223 .rci_index = RAPL_RCI_INDEX_GFX, 1224 .bic = BIC_GFXWatt | BIC_GFX_J, 1225 .compat_scale = 1.0, 1226 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1227 }, 1228 { 1229 .feature_mask = RAPL_PKG_PERF_STATUS, 1230 .perf_subsys = NULL, 1231 .perf_name = NULL, 1232 .msr = MSR_PKG_PERF_STATUS, 1233 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1234 .msr_shift = 0, 1235 .platform_rapl_msr_scale = &rapl_time_units, 1236 .rci_index = RAPL_RCI_INDEX_PKG_PERF_STATUS, 1237 .bic = BIC_PKG__, 1238 .compat_scale = 100.0, 1239 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1240 }, 1241 { 1242 .feature_mask = RAPL_DRAM_PERF_STATUS, 1243 .perf_subsys = NULL, 1244 .perf_name = NULL, 1245 .msr = MSR_DRAM_PERF_STATUS, 1246 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1247 .msr_shift = 0, 1248 .platform_rapl_msr_scale = &rapl_time_units, 1249 .rci_index = RAPL_RCI_INDEX_DRAM_PERF_STATUS, 1250 .bic = BIC_RAM__, 1251 .compat_scale = 100.0, 1252 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1253 }, 1254 { 1255 .feature_mask = RAPL_AMD_F17H, 1256 .perf_subsys = NULL, 1257 .perf_name = NULL, 1258 .msr = MSR_CORE_ENERGY_STAT, 1259 .msr_mask = 0xFFFFFFFF, 1260 .msr_shift = 0, 1261 .platform_rapl_msr_scale = &rapl_energy_units, 1262 .rci_index = RAPL_RCI_INDEX_CORE_ENERGY, 1263 .bic = BIC_CorWatt | BIC_Cor_J, 1264 .compat_scale = 1.0, 1265 .flags = 0, 1266 }, 1267 { 1268 .feature_mask = RAPL_PSYS, 1269 .perf_subsys = "power", 1270 .perf_name = "energy-psys", 1271 .msr = MSR_PLATFORM_ENERGY_STATUS, 1272 .msr_mask = 0x00000000FFFFFFFF, 1273 .msr_shift = 0, 1274 .platform_rapl_msr_scale = &rapl_energy_units, 1275 .rci_index = RAPL_RCI_INDEX_ENERGY_PLATFORM, 1276 .bic = BIC_SysWatt | BIC_Sys_J, 1277 .compat_scale = 1.0, 1278 .flags = RAPL_COUNTER_FLAG_PLATFORM_COUNTER | RAPL_COUNTER_FLAG_USE_MSR_SUM, 1279 }, 1280 }; 1281 1282 struct rapl_counter { 1283 unsigned long long raw_value; 1284 enum rapl_unit unit; 1285 double scale; 1286 }; 1287 1288 /* Indexes used to map data read from perf and MSRs into global variables */ 1289 enum ccstate_rci_index { 1290 CCSTATE_RCI_INDEX_C1_RESIDENCY = 0, 1291 CCSTATE_RCI_INDEX_C3_RESIDENCY = 1, 1292 CCSTATE_RCI_INDEX_C6_RESIDENCY = 2, 1293 CCSTATE_RCI_INDEX_C7_RESIDENCY = 3, 1294 PCSTATE_RCI_INDEX_C2_RESIDENCY = 4, 1295 PCSTATE_RCI_INDEX_C3_RESIDENCY = 5, 1296 PCSTATE_RCI_INDEX_C6_RESIDENCY = 6, 1297 PCSTATE_RCI_INDEX_C7_RESIDENCY = 7, 1298 PCSTATE_RCI_INDEX_C8_RESIDENCY = 8, 1299 PCSTATE_RCI_INDEX_C9_RESIDENCY = 9, 1300 PCSTATE_RCI_INDEX_C10_RESIDENCY = 10, 1301 NUM_CSTATE_COUNTERS, 1302 }; 1303 1304 struct cstate_counter_info_t { 1305 unsigned long long data[NUM_CSTATE_COUNTERS]; 1306 enum counter_source source[NUM_CSTATE_COUNTERS]; 1307 unsigned long long msr[NUM_CSTATE_COUNTERS]; 1308 int fd_perf_core; 1309 int fd_perf_pkg; 1310 }; 1311 1312 struct cstate_counter_info_t *ccstate_counter_info; 1313 unsigned int ccstate_counter_info_size; 1314 1315 #define CSTATE_COUNTER_FLAG_COLLECT_PER_CORE (1u << 0) 1316 #define CSTATE_COUNTER_FLAG_COLLECT_PER_THREAD ((1u << 1) | CSTATE_COUNTER_FLAG_COLLECT_PER_CORE) 1317 #define CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY (1u << 2) 1318 1319 struct cstate_counter_arch_info { 1320 int feature_mask; /* Mask for testing if the counter is supported on host */ 1321 const char *perf_subsys; 1322 const char *perf_name; 1323 unsigned long long msr; 1324 unsigned int rci_index; /* Maps data from perf counters to global variables */ 1325 unsigned long long bic; 1326 unsigned long long flags; 1327 int pkg_cstate_limit; 1328 }; 1329 1330 static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { 1331 { 1332 .feature_mask = CC1, 1333 .perf_subsys = "cstate_core", 1334 .perf_name = "c1-residency", 1335 .msr = MSR_CORE_C1_RES, 1336 .rci_index = CCSTATE_RCI_INDEX_C1_RESIDENCY, 1337 .bic = BIC_CPU_c1, 1338 .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_THREAD, 1339 .pkg_cstate_limit = 0, 1340 }, 1341 { 1342 .feature_mask = CC3, 1343 .perf_subsys = "cstate_core", 1344 .perf_name = "c3-residency", 1345 .msr = MSR_CORE_C3_RESIDENCY, 1346 .rci_index = CCSTATE_RCI_INDEX_C3_RESIDENCY, 1347 .bic = BIC_CPU_c3, 1348 .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY, 1349 .pkg_cstate_limit = 0, 1350 }, 1351 { 1352 .feature_mask = CC6, 1353 .perf_subsys = "cstate_core", 1354 .perf_name = "c6-residency", 1355 .msr = MSR_CORE_C6_RESIDENCY, 1356 .rci_index = CCSTATE_RCI_INDEX_C6_RESIDENCY, 1357 .bic = BIC_CPU_c6, 1358 .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY, 1359 .pkg_cstate_limit = 0, 1360 }, 1361 { 1362 .feature_mask = CC7, 1363 .perf_subsys = "cstate_core", 1364 .perf_name = "c7-residency", 1365 .msr = MSR_CORE_C7_RESIDENCY, 1366 .rci_index = CCSTATE_RCI_INDEX_C7_RESIDENCY, 1367 .bic = BIC_CPU_c7, 1368 .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY, 1369 .pkg_cstate_limit = 0, 1370 }, 1371 { 1372 .feature_mask = PC2, 1373 .perf_subsys = "cstate_pkg", 1374 .perf_name = "c2-residency", 1375 .msr = MSR_PKG_C2_RESIDENCY, 1376 .rci_index = PCSTATE_RCI_INDEX_C2_RESIDENCY, 1377 .bic = BIC_Pkgpc2, 1378 .flags = 0, 1379 .pkg_cstate_limit = PCL__2, 1380 }, 1381 { 1382 .feature_mask = PC3, 1383 .perf_subsys = "cstate_pkg", 1384 .perf_name = "c3-residency", 1385 .msr = MSR_PKG_C3_RESIDENCY, 1386 .rci_index = PCSTATE_RCI_INDEX_C3_RESIDENCY, 1387 .bic = BIC_Pkgpc3, 1388 .flags = 0, 1389 .pkg_cstate_limit = PCL__3, 1390 }, 1391 { 1392 .feature_mask = PC6, 1393 .perf_subsys = "cstate_pkg", 1394 .perf_name = "c6-residency", 1395 .msr = MSR_PKG_C6_RESIDENCY, 1396 .rci_index = PCSTATE_RCI_INDEX_C6_RESIDENCY, 1397 .bic = BIC_Pkgpc6, 1398 .flags = 0, 1399 .pkg_cstate_limit = PCL__6, 1400 }, 1401 { 1402 .feature_mask = PC7, 1403 .perf_subsys = "cstate_pkg", 1404 .perf_name = "c7-residency", 1405 .msr = MSR_PKG_C7_RESIDENCY, 1406 .rci_index = PCSTATE_RCI_INDEX_C7_RESIDENCY, 1407 .bic = BIC_Pkgpc7, 1408 .flags = 0, 1409 .pkg_cstate_limit = PCL__7, 1410 }, 1411 { 1412 .feature_mask = PC8, 1413 .perf_subsys = "cstate_pkg", 1414 .perf_name = "c8-residency", 1415 .msr = MSR_PKG_C8_RESIDENCY, 1416 .rci_index = PCSTATE_RCI_INDEX_C8_RESIDENCY, 1417 .bic = BIC_Pkgpc8, 1418 .flags = 0, 1419 .pkg_cstate_limit = PCL__8, 1420 }, 1421 { 1422 .feature_mask = PC9, 1423 .perf_subsys = "cstate_pkg", 1424 .perf_name = "c9-residency", 1425 .msr = MSR_PKG_C9_RESIDENCY, 1426 .rci_index = PCSTATE_RCI_INDEX_C9_RESIDENCY, 1427 .bic = BIC_Pkgpc9, 1428 .flags = 0, 1429 .pkg_cstate_limit = PCL__9, 1430 }, 1431 { 1432 .feature_mask = PC10, 1433 .perf_subsys = "cstate_pkg", 1434 .perf_name = "c10-residency", 1435 .msr = MSR_PKG_C10_RESIDENCY, 1436 .rci_index = PCSTATE_RCI_INDEX_C10_RESIDENCY, 1437 .bic = BIC_Pkgpc10, 1438 .flags = 0, 1439 .pkg_cstate_limit = PCL_10, 1440 }, 1441 }; 1442 1443 /* Indexes used to map data read from perf and MSRs into global variables */ 1444 enum msr_rci_index { 1445 MSR_RCI_INDEX_APERF = 0, 1446 MSR_RCI_INDEX_MPERF = 1, 1447 MSR_RCI_INDEX_SMI = 2, 1448 NUM_MSR_COUNTERS, 1449 }; 1450 1451 struct msr_counter_info_t { 1452 unsigned long long data[NUM_MSR_COUNTERS]; 1453 enum counter_source source[NUM_MSR_COUNTERS]; 1454 unsigned long long msr[NUM_MSR_COUNTERS]; 1455 unsigned long long msr_mask[NUM_MSR_COUNTERS]; 1456 int fd_perf; 1457 }; 1458 1459 struct msr_counter_info_t *msr_counter_info; 1460 unsigned int msr_counter_info_size; 1461 1462 struct msr_counter_arch_info { 1463 const char *perf_subsys; 1464 const char *perf_name; 1465 unsigned long long msr; 1466 unsigned long long msr_mask; 1467 unsigned int rci_index; /* Maps data from perf counters to global variables */ 1468 bool needed; 1469 bool present; 1470 }; 1471 1472 enum msr_arch_info_index { 1473 MSR_ARCH_INFO_APERF_INDEX = 0, 1474 MSR_ARCH_INFO_MPERF_INDEX = 1, 1475 MSR_ARCH_INFO_SMI_INDEX = 2, 1476 }; 1477 1478 static struct msr_counter_arch_info msr_counter_arch_infos[] = { 1479 [MSR_ARCH_INFO_APERF_INDEX] = { 1480 .perf_subsys = "msr", 1481 .perf_name = "aperf", 1482 .msr = MSR_IA32_APERF, 1483 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1484 .rci_index = MSR_RCI_INDEX_APERF, 1485 }, 1486 1487 [MSR_ARCH_INFO_MPERF_INDEX] = { 1488 .perf_subsys = "msr", 1489 .perf_name = "mperf", 1490 .msr = MSR_IA32_MPERF, 1491 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1492 .rci_index = MSR_RCI_INDEX_MPERF, 1493 }, 1494 1495 [MSR_ARCH_INFO_SMI_INDEX] = { 1496 .perf_subsys = "msr", 1497 .perf_name = "smi", 1498 .msr = MSR_SMI_COUNT, 1499 .msr_mask = 0xFFFFFFFF, 1500 .rci_index = MSR_RCI_INDEX_SMI, 1501 }, 1502 }; 1503 1504 /* Can be redefined when compiling, useful for testing. */ 1505 #ifndef SYSFS_TELEM_PATH 1506 #define SYSFS_TELEM_PATH "/sys/class/intel_pmt" 1507 #endif 1508 1509 #define PMT_COUNTER_MTL_DC6_OFFSET 120 1510 #define PMT_COUNTER_MTL_DC6_LSB 0 1511 #define PMT_COUNTER_MTL_DC6_MSB 63 1512 #define PMT_MTL_DC6_GUID 0x1a067102 1513 1514 #define PMT_COUNTER_NAME_SIZE_BYTES 16 1515 #define PMT_COUNTER_TYPE_NAME_SIZE_BYTES 32 1516 1517 struct pmt_mmio { 1518 struct pmt_mmio *next; 1519 1520 unsigned int guid; 1521 unsigned int size; 1522 1523 /* Base pointer to the mmaped memory. */ 1524 void *mmio_base; 1525 1526 /* 1527 * Offset to be applied to the mmio_base 1528 * to get the beginning of the PMT counters for given GUID. 1529 */ 1530 unsigned long pmt_offset; 1531 } *pmt_mmios; 1532 1533 enum pmt_datatype { 1534 PMT_TYPE_RAW, 1535 PMT_TYPE_XTAL_TIME, 1536 }; 1537 1538 struct pmt_domain_info { 1539 /* 1540 * Pointer to the MMIO obtained by applying a counter offset 1541 * to the mmio_base of the mmaped region for the given GUID. 1542 * 1543 * This is where to read the raw value of the counter from. 1544 */ 1545 unsigned long *pcounter; 1546 }; 1547 1548 struct pmt_counter { 1549 struct pmt_counter *next; 1550 1551 /* PMT metadata */ 1552 char name[PMT_COUNTER_NAME_SIZE_BYTES]; 1553 enum pmt_datatype type; 1554 enum counter_scope scope; 1555 unsigned int lsb; 1556 unsigned int msb; 1557 1558 /* BIC-like metadata */ 1559 enum counter_format format; 1560 1561 unsigned int num_domains; 1562 struct pmt_domain_info *domains; 1563 }; 1564 1565 unsigned int pmt_counter_get_width(const struct pmt_counter *p) 1566 { 1567 return (p->msb - p->lsb) + 1; 1568 } 1569 1570 void pmt_counter_resize_(struct pmt_counter *pcounter, unsigned int new_size) 1571 { 1572 struct pmt_domain_info *new_mem; 1573 1574 new_mem = (struct pmt_domain_info *)reallocarray(pcounter->domains, new_size, sizeof(*pcounter->domains)); 1575 if (!new_mem) { 1576 fprintf(stderr, "%s: failed to allocate memory for PMT counters\n", __func__); 1577 exit(1); 1578 } 1579 1580 /* Zero initialize just allocated memory. */ 1581 const size_t num_new_domains = new_size - pcounter->num_domains; 1582 1583 memset(&new_mem[pcounter->num_domains], 0, num_new_domains * sizeof(*pcounter->domains)); 1584 1585 pcounter->num_domains = new_size; 1586 pcounter->domains = new_mem; 1587 } 1588 1589 void pmt_counter_resize(struct pmt_counter *pcounter, unsigned int new_size) 1590 { 1591 /* 1592 * Allocate more memory ahead of time. 1593 * 1594 * Always allocate space for at least 8 elements 1595 * and double the size when growing. 1596 */ 1597 if (new_size < 8) 1598 new_size = 8; 1599 new_size = MAX(new_size, pcounter->num_domains * 2); 1600 1601 pmt_counter_resize_(pcounter, new_size); 1602 } 1603 1604 struct thread_data { 1605 struct timeval tv_begin; 1606 struct timeval tv_end; 1607 struct timeval tv_delta; 1608 unsigned long long tsc; 1609 unsigned long long aperf; 1610 unsigned long long mperf; 1611 unsigned long long c1; 1612 unsigned long long instr_count; 1613 unsigned long long irq_count; 1614 unsigned int smi_count; 1615 unsigned int cpu_id; 1616 unsigned int apic_id; 1617 unsigned int x2apic_id; 1618 unsigned int flags; 1619 bool is_atom; 1620 unsigned long long counter[MAX_ADDED_THREAD_COUNTERS]; 1621 unsigned long long perf_counter[MAX_ADDED_THREAD_COUNTERS]; 1622 unsigned long long pmt_counter[PMT_MAX_ADDED_THREAD_COUNTERS]; 1623 } *thread_even, *thread_odd; 1624 1625 struct core_data { 1626 int base_cpu; 1627 unsigned long long c3; 1628 unsigned long long c6; 1629 unsigned long long c7; 1630 unsigned long long mc6_us; /* duplicate as per-core for now, even though per module */ 1631 unsigned int core_temp_c; 1632 struct rapl_counter core_energy; /* MSR_CORE_ENERGY_STAT */ 1633 unsigned int core_id; 1634 unsigned long long core_throt_cnt; 1635 unsigned long long counter[MAX_ADDED_CORE_COUNTERS]; 1636 unsigned long long perf_counter[MAX_ADDED_CORE_COUNTERS]; 1637 unsigned long long pmt_counter[PMT_MAX_ADDED_CORE_COUNTERS]; 1638 } *core_even, *core_odd; 1639 1640 struct pkg_data { 1641 int base_cpu; 1642 unsigned long long pc2; 1643 unsigned long long pc3; 1644 unsigned long long pc6; 1645 unsigned long long pc7; 1646 unsigned long long pc8; 1647 unsigned long long pc9; 1648 unsigned long long pc10; 1649 long long cpu_lpi; 1650 long long sys_lpi; 1651 unsigned long long pkg_wtd_core_c0; 1652 unsigned long long pkg_any_core_c0; 1653 unsigned long long pkg_any_gfxe_c0; 1654 unsigned long long pkg_both_core_gfxe_c0; 1655 long long gfx_rc6_ms; 1656 unsigned int gfx_mhz; 1657 unsigned int gfx_act_mhz; 1658 long long sam_mc6_ms; 1659 unsigned int sam_mhz; 1660 unsigned int sam_act_mhz; 1661 unsigned int package_id; 1662 struct rapl_counter energy_pkg; /* MSR_PKG_ENERGY_STATUS */ 1663 struct rapl_counter energy_dram; /* MSR_DRAM_ENERGY_STATUS */ 1664 struct rapl_counter energy_cores; /* MSR_PP0_ENERGY_STATUS */ 1665 struct rapl_counter energy_gfx; /* MSR_PP1_ENERGY_STATUS */ 1666 struct rapl_counter rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */ 1667 struct rapl_counter rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */ 1668 unsigned int pkg_temp_c; 1669 unsigned int uncore_mhz; 1670 unsigned long long die_c6; 1671 unsigned long long counter[MAX_ADDED_PACKAGE_COUNTERS]; 1672 unsigned long long perf_counter[MAX_ADDED_PACKAGE_COUNTERS]; 1673 unsigned long long pmt_counter[PMT_MAX_ADDED_PACKAGE_COUNTERS]; 1674 } *package_even, *package_odd; 1675 1676 #define ODD_COUNTERS thread_odd, core_odd, package_odd 1677 #define EVEN_COUNTERS thread_even, core_even, package_even 1678 1679 #define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no) \ 1680 ((thread_base) + \ 1681 ((pkg_no) * \ 1682 topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \ 1683 ((node_no) * topo.cores_per_node * topo.threads_per_core) + \ 1684 ((core_no) * topo.threads_per_core) + \ 1685 (thread_no)) 1686 1687 #define GET_CORE(core_base, core_no, node_no, pkg_no) \ 1688 ((core_base) + \ 1689 ((pkg_no) * topo.nodes_per_pkg * topo.cores_per_node) + \ 1690 ((node_no) * topo.cores_per_node) + \ 1691 (core_no)) 1692 1693 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) 1694 1695 /* 1696 * The accumulated sum of MSR is defined as a monotonic 1697 * increasing MSR, it will be accumulated periodically, 1698 * despite its register's bit width. 1699 */ 1700 enum { 1701 IDX_PKG_ENERGY, 1702 IDX_DRAM_ENERGY, 1703 IDX_PP0_ENERGY, 1704 IDX_PP1_ENERGY, 1705 IDX_PKG_PERF, 1706 IDX_DRAM_PERF, 1707 IDX_PSYS_ENERGY, 1708 IDX_COUNT, 1709 }; 1710 1711 int get_msr_sum(int cpu, off_t offset, unsigned long long *msr); 1712 1713 struct msr_sum_array { 1714 /* get_msr_sum() = sum + (get_msr() - last) */ 1715 struct { 1716 /*The accumulated MSR value is updated by the timer */ 1717 unsigned long long sum; 1718 /*The MSR footprint recorded in last timer */ 1719 unsigned long long last; 1720 } entries[IDX_COUNT]; 1721 }; 1722 1723 /* The percpu MSR sum array.*/ 1724 struct msr_sum_array *per_cpu_msr_sum; 1725 1726 off_t idx_to_offset(int idx) 1727 { 1728 off_t offset; 1729 1730 switch (idx) { 1731 case IDX_PKG_ENERGY: 1732 if (platform->rapl_msrs & RAPL_AMD_F17H) 1733 offset = MSR_PKG_ENERGY_STAT; 1734 else 1735 offset = MSR_PKG_ENERGY_STATUS; 1736 break; 1737 case IDX_DRAM_ENERGY: 1738 offset = MSR_DRAM_ENERGY_STATUS; 1739 break; 1740 case IDX_PP0_ENERGY: 1741 offset = MSR_PP0_ENERGY_STATUS; 1742 break; 1743 case IDX_PP1_ENERGY: 1744 offset = MSR_PP1_ENERGY_STATUS; 1745 break; 1746 case IDX_PKG_PERF: 1747 offset = MSR_PKG_PERF_STATUS; 1748 break; 1749 case IDX_DRAM_PERF: 1750 offset = MSR_DRAM_PERF_STATUS; 1751 break; 1752 case IDX_PSYS_ENERGY: 1753 offset = MSR_PLATFORM_ENERGY_STATUS; 1754 break; 1755 default: 1756 offset = -1; 1757 } 1758 return offset; 1759 } 1760 1761 int offset_to_idx(off_t offset) 1762 { 1763 int idx; 1764 1765 switch (offset) { 1766 case MSR_PKG_ENERGY_STATUS: 1767 case MSR_PKG_ENERGY_STAT: 1768 idx = IDX_PKG_ENERGY; 1769 break; 1770 case MSR_DRAM_ENERGY_STATUS: 1771 idx = IDX_DRAM_ENERGY; 1772 break; 1773 case MSR_PP0_ENERGY_STATUS: 1774 idx = IDX_PP0_ENERGY; 1775 break; 1776 case MSR_PP1_ENERGY_STATUS: 1777 idx = IDX_PP1_ENERGY; 1778 break; 1779 case MSR_PKG_PERF_STATUS: 1780 idx = IDX_PKG_PERF; 1781 break; 1782 case MSR_DRAM_PERF_STATUS: 1783 idx = IDX_DRAM_PERF; 1784 break; 1785 case MSR_PLATFORM_ENERGY_STATUS: 1786 idx = IDX_PSYS_ENERGY; 1787 break; 1788 default: 1789 idx = -1; 1790 } 1791 return idx; 1792 } 1793 1794 int idx_valid(int idx) 1795 { 1796 switch (idx) { 1797 case IDX_PKG_ENERGY: 1798 return platform->rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H); 1799 case IDX_DRAM_ENERGY: 1800 return platform->rapl_msrs & RAPL_DRAM; 1801 case IDX_PP0_ENERGY: 1802 return platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS; 1803 case IDX_PP1_ENERGY: 1804 return platform->rapl_msrs & RAPL_GFX; 1805 case IDX_PKG_PERF: 1806 return platform->rapl_msrs & RAPL_PKG_PERF_STATUS; 1807 case IDX_DRAM_PERF: 1808 return platform->rapl_msrs & RAPL_DRAM_PERF_STATUS; 1809 case IDX_PSYS_ENERGY: 1810 return platform->rapl_msrs & RAPL_PSYS; 1811 default: 1812 return 0; 1813 } 1814 } 1815 1816 struct sys_counters { 1817 /* MSR added counters */ 1818 unsigned int added_thread_counters; 1819 unsigned int added_core_counters; 1820 unsigned int added_package_counters; 1821 struct msr_counter *tp; 1822 struct msr_counter *cp; 1823 struct msr_counter *pp; 1824 1825 /* perf added counters */ 1826 unsigned int added_thread_perf_counters; 1827 unsigned int added_core_perf_counters; 1828 unsigned int added_package_perf_counters; 1829 struct perf_counter_info *perf_tp; 1830 struct perf_counter_info *perf_cp; 1831 struct perf_counter_info *perf_pp; 1832 1833 struct pmt_counter *pmt_tp; 1834 struct pmt_counter *pmt_cp; 1835 struct pmt_counter *pmt_pp; 1836 } sys; 1837 1838 static size_t free_msr_counters_(struct msr_counter **pp) 1839 { 1840 struct msr_counter *p = NULL; 1841 size_t num_freed = 0; 1842 1843 while (*pp) { 1844 p = *pp; 1845 1846 if (p->msr_num != 0) { 1847 *pp = p->next; 1848 1849 free(p); 1850 ++num_freed; 1851 1852 continue; 1853 } 1854 1855 pp = &p->next; 1856 } 1857 1858 return num_freed; 1859 } 1860 1861 /* 1862 * Free all added counters accessed via msr. 1863 */ 1864 static void free_sys_msr_counters(void) 1865 { 1866 /* Thread counters */ 1867 sys.added_thread_counters -= free_msr_counters_(&sys.tp); 1868 1869 /* Core counters */ 1870 sys.added_core_counters -= free_msr_counters_(&sys.cp); 1871 1872 /* Package counters */ 1873 sys.added_package_counters -= free_msr_counters_(&sys.pp); 1874 } 1875 1876 struct system_summary { 1877 struct thread_data threads; 1878 struct core_data cores; 1879 struct pkg_data packages; 1880 } average; 1881 1882 struct platform_counters { 1883 struct rapl_counter energy_psys; /* MSR_PLATFORM_ENERGY_STATUS */ 1884 } platform_counters_odd, platform_counters_even; 1885 1886 struct cpu_topology { 1887 int physical_package_id; 1888 int die_id; 1889 int logical_cpu_id; 1890 int physical_node_id; 1891 int logical_node_id; /* 0-based count within the package */ 1892 int physical_core_id; 1893 int thread_id; 1894 int type; 1895 cpu_set_t *put_ids; /* Processing Unit/Thread IDs */ 1896 } *cpus; 1897 1898 struct topo_params { 1899 int num_packages; 1900 int num_die; 1901 int num_cpus; 1902 int num_cores; 1903 int allowed_packages; 1904 int allowed_cpus; 1905 int allowed_cores; 1906 int max_cpu_num; 1907 int max_core_id; 1908 int max_package_id; 1909 int max_die_id; 1910 int max_node_num; 1911 int nodes_per_pkg; 1912 int cores_per_node; 1913 int threads_per_core; 1914 } topo; 1915 1916 struct timeval tv_even, tv_odd, tv_delta; 1917 1918 int *irq_column_2_cpu; /* /proc/interrupts column numbers */ 1919 int *irqs_per_cpu; /* indexed by cpu_num */ 1920 1921 void setup_all_buffers(bool startup); 1922 1923 char *sys_lpi_file; 1924 char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us"; 1925 char *sys_lpi_file_debugfs = "/sys/kernel/debug/pmc_core/slp_s0_residency_usec"; 1926 1927 int cpu_is_not_present(int cpu) 1928 { 1929 return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set); 1930 } 1931 1932 int cpu_is_not_allowed(int cpu) 1933 { 1934 return !CPU_ISSET_S(cpu, cpu_allowed_setsize, cpu_allowed_set); 1935 } 1936 1937 /* 1938 * run func(thread, core, package) in topology order 1939 * skip non-present cpus 1940 */ 1941 1942 int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pkg_data *), 1943 struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base) 1944 { 1945 int retval, pkg_no, core_no, thread_no, node_no; 1946 1947 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { 1948 for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) { 1949 for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { 1950 for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) { 1951 struct thread_data *t; 1952 struct core_data *c; 1953 struct pkg_data *p; 1954 t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no); 1955 1956 if (cpu_is_not_allowed(t->cpu_id)) 1957 continue; 1958 1959 c = GET_CORE(core_base, core_no, node_no, pkg_no); 1960 p = GET_PKG(pkg_base, pkg_no); 1961 1962 retval = func(t, c, p); 1963 if (retval) 1964 return retval; 1965 } 1966 } 1967 } 1968 } 1969 return 0; 1970 } 1971 1972 int is_cpu_first_thread_in_core(struct thread_data *t, struct core_data *c, struct pkg_data *p) 1973 { 1974 UNUSED(p); 1975 1976 return ((int)t->cpu_id == c->base_cpu || c->base_cpu < 0); 1977 } 1978 1979 int is_cpu_first_core_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p) 1980 { 1981 UNUSED(c); 1982 1983 return ((int)t->cpu_id == p->base_cpu || p->base_cpu < 0); 1984 } 1985 1986 int is_cpu_first_thread_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p) 1987 { 1988 return is_cpu_first_thread_in_core(t, c, p) && is_cpu_first_core_in_package(t, c, p); 1989 } 1990 1991 int cpu_migrate(int cpu) 1992 { 1993 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); 1994 CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set); 1995 if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1) 1996 return -1; 1997 else 1998 return 0; 1999 } 2000 2001 int get_msr_fd(int cpu) 2002 { 2003 char pathname[32]; 2004 int fd; 2005 2006 fd = fd_percpu[cpu]; 2007 2008 if (fd) 2009 return fd; 2010 2011 sprintf(pathname, "/dev/cpu/%d/msr", cpu); 2012 fd = open(pathname, O_RDONLY); 2013 if (fd < 0) 2014 err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, " 2015 "or run with --no-msr, or run as root", pathname); 2016 2017 fd_percpu[cpu] = fd; 2018 2019 return fd; 2020 } 2021 2022 static void bic_disable_msr_access(void) 2023 { 2024 const unsigned long bic_msrs = BIC_Mod_c6 | BIC_CoreTmp | 2025 BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_PkgTmp; 2026 2027 bic_enabled &= ~bic_msrs; 2028 2029 free_sys_msr_counters(); 2030 } 2031 2032 static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) 2033 { 2034 assert(!no_perf); 2035 2036 return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags); 2037 } 2038 2039 static long open_perf_counter(int cpu, unsigned int type, unsigned int config, int group_fd, __u64 read_format) 2040 { 2041 struct perf_event_attr attr; 2042 const pid_t pid = -1; 2043 const unsigned long flags = 0; 2044 2045 assert(!no_perf); 2046 2047 memset(&attr, 0, sizeof(struct perf_event_attr)); 2048 2049 attr.type = type; 2050 attr.size = sizeof(struct perf_event_attr); 2051 attr.config = config; 2052 attr.disabled = 0; 2053 attr.sample_type = PERF_SAMPLE_IDENTIFIER; 2054 attr.read_format = read_format; 2055 2056 const int fd = perf_event_open(&attr, pid, cpu, group_fd, flags); 2057 2058 return fd; 2059 } 2060 2061 int get_instr_count_fd(int cpu) 2062 { 2063 if (fd_instr_count_percpu[cpu]) 2064 return fd_instr_count_percpu[cpu]; 2065 2066 fd_instr_count_percpu[cpu] = open_perf_counter(cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0); 2067 2068 return fd_instr_count_percpu[cpu]; 2069 } 2070 2071 int get_msr(int cpu, off_t offset, unsigned long long *msr) 2072 { 2073 ssize_t retval; 2074 2075 assert(!no_msr); 2076 2077 retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset); 2078 2079 if (retval != sizeof *msr) 2080 err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset); 2081 2082 return 0; 2083 } 2084 2085 int probe_msr(int cpu, off_t offset) 2086 { 2087 ssize_t retval; 2088 unsigned long long dummy; 2089 2090 assert(!no_msr); 2091 2092 retval = pread(get_msr_fd(cpu), &dummy, sizeof(dummy), offset); 2093 2094 if (retval != sizeof(dummy)) 2095 return 1; 2096 2097 return 0; 2098 } 2099 2100 /* Convert CPU ID to domain ID for given added perf counter. */ 2101 unsigned int cpu_to_domain(const struct perf_counter_info *pc, int cpu) 2102 { 2103 switch (pc->scope) { 2104 case SCOPE_CPU: 2105 return cpu; 2106 2107 case SCOPE_CORE: 2108 return cpus[cpu].physical_core_id; 2109 2110 case SCOPE_PACKAGE: 2111 return cpus[cpu].physical_package_id; 2112 } 2113 2114 __builtin_unreachable(); 2115 } 2116 2117 #define MAX_DEFERRED 16 2118 char *deferred_add_names[MAX_DEFERRED]; 2119 char *deferred_skip_names[MAX_DEFERRED]; 2120 int deferred_add_index; 2121 int deferred_skip_index; 2122 2123 /* 2124 * HIDE_LIST - hide this list of counters, show the rest [default] 2125 * SHOW_LIST - show this list of counters, hide the rest 2126 */ 2127 enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST; 2128 2129 void help(void) 2130 { 2131 fprintf(outf, 2132 "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n" 2133 "\n" 2134 "Turbostat forks the specified COMMAND and prints statistics\n" 2135 "when COMMAND completes.\n" 2136 "If no COMMAND is specified, turbostat wakes every 5-seconds\n" 2137 "to print statistics, until interrupted.\n" 2138 " -a, --add add a counter\n" 2139 " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n" 2140 " eg. --add perf/cstate_pkg/c2-residency,package,delta,percent,perfPC2\n" 2141 " eg. --add pmt,name=XTAL,type=raw,domain=package0,offset=0,lsb=0,msb=63,guid=0x1a067102\n" 2142 " -c, --cpu cpu-set limit output to summary plus cpu-set:\n" 2143 " {core | package | j,k,l..m,n-p }\n" 2144 " -d, --debug displays usec, Time_Of_Day_Seconds and more debugging\n" 2145 " debug messages are printed to stderr\n" 2146 " -D, --Dump displays the raw counter values\n" 2147 " -e, --enable [all | column]\n" 2148 " shows all or the specified disabled column\n" 2149 " -H, --hide [column|column,column,...]\n" 2150 " hide the specified column(s)\n" 2151 " -i, --interval sec.subsec\n" 2152 " Override default 5-second measurement interval\n" 2153 " -J, --Joules displays energy in Joules instead of Watts\n" 2154 " -l, --list list column headers only\n" 2155 " -M, --no-msr Disable all uses of the MSR driver\n" 2156 " -P, --no-perf Disable all uses of the perf API\n" 2157 " -n, --num_iterations num\n" 2158 " number of the measurement iterations\n" 2159 " -N, --header_iterations num\n" 2160 " print header every num iterations\n" 2161 " -o, --out file\n" 2162 " create or truncate \"file\" for all output\n" 2163 " -q, --quiet skip decoding system configuration header\n" 2164 " -s, --show [column|column,column,...]\n" 2165 " show only the specified column(s)\n" 2166 " -S, --Summary\n" 2167 " limits output to 1-line system summary per interval\n" 2168 " -T, --TCC temperature\n" 2169 " sets the Thermal Control Circuit temperature in\n" 2170 " degrees Celsius\n" 2171 " -h, --help print this help message\n" 2172 " -v, --version print version information\n" "\n" "For more help, run \"man turbostat\"\n"); 2173 } 2174 2175 /* 2176 * bic_lookup 2177 * for all the strings in comma separate name_list, 2178 * set the approprate bit in return value. 2179 */ 2180 unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) 2181 { 2182 unsigned int i; 2183 unsigned long long retval = 0; 2184 2185 while (name_list) { 2186 char *comma; 2187 2188 comma = strchr(name_list, ','); 2189 2190 if (comma) 2191 *comma = '\0'; 2192 2193 for (i = 0; i < MAX_BIC; ++i) { 2194 if (!strcmp(name_list, bic[i].name)) { 2195 retval |= (1ULL << i); 2196 break; 2197 } 2198 if (!strcmp(name_list, "all")) { 2199 retval |= ~0; 2200 break; 2201 } else if (!strcmp(name_list, "topology")) { 2202 retval |= BIC_TOPOLOGY; 2203 break; 2204 } else if (!strcmp(name_list, "power")) { 2205 retval |= BIC_THERMAL_PWR; 2206 break; 2207 } else if (!strcmp(name_list, "idle")) { 2208 retval |= BIC_IDLE; 2209 break; 2210 } else if (!strcmp(name_list, "frequency")) { 2211 retval |= BIC_FREQUENCY; 2212 break; 2213 } else if (!strcmp(name_list, "other")) { 2214 retval |= BIC_OTHER; 2215 break; 2216 } 2217 2218 } 2219 if (i == MAX_BIC) { 2220 if (mode == SHOW_LIST) { 2221 deferred_add_names[deferred_add_index++] = name_list; 2222 if (deferred_add_index >= MAX_DEFERRED) { 2223 fprintf(stderr, "More than max %d un-recognized --add options '%s'\n", 2224 MAX_DEFERRED, name_list); 2225 help(); 2226 exit(1); 2227 } 2228 } else { 2229 deferred_skip_names[deferred_skip_index++] = name_list; 2230 if (debug) 2231 fprintf(stderr, "deferred \"%s\"\n", name_list); 2232 if (deferred_skip_index >= MAX_DEFERRED) { 2233 fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n", 2234 MAX_DEFERRED, name_list); 2235 help(); 2236 exit(1); 2237 } 2238 } 2239 } 2240 2241 name_list = comma; 2242 if (name_list) 2243 name_list++; 2244 2245 } 2246 return retval; 2247 } 2248 2249 void print_header(char *delim) 2250 { 2251 struct msr_counter *mp; 2252 struct perf_counter_info *pp; 2253 struct pmt_counter *ppmt; 2254 int printed = 0; 2255 2256 if (DO_BIC(BIC_USEC)) 2257 outp += sprintf(outp, "%susec", (printed++ ? delim : "")); 2258 if (DO_BIC(BIC_TOD)) 2259 outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : "")); 2260 if (DO_BIC(BIC_Package)) 2261 outp += sprintf(outp, "%sPackage", (printed++ ? delim : "")); 2262 if (DO_BIC(BIC_Die)) 2263 outp += sprintf(outp, "%sDie", (printed++ ? delim : "")); 2264 if (DO_BIC(BIC_Node)) 2265 outp += sprintf(outp, "%sNode", (printed++ ? delim : "")); 2266 if (DO_BIC(BIC_Core)) 2267 outp += sprintf(outp, "%sCore", (printed++ ? delim : "")); 2268 if (DO_BIC(BIC_CPU)) 2269 outp += sprintf(outp, "%sCPU", (printed++ ? delim : "")); 2270 if (DO_BIC(BIC_APIC)) 2271 outp += sprintf(outp, "%sAPIC", (printed++ ? delim : "")); 2272 if (DO_BIC(BIC_X2APIC)) 2273 outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : "")); 2274 if (DO_BIC(BIC_Avg_MHz)) 2275 outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : "")); 2276 if (DO_BIC(BIC_Busy)) 2277 outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : "")); 2278 if (DO_BIC(BIC_Bzy_MHz)) 2279 outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : "")); 2280 if (DO_BIC(BIC_TSC_MHz)) 2281 outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : "")); 2282 2283 if (DO_BIC(BIC_IPC)) 2284 outp += sprintf(outp, "%sIPC", (printed++ ? delim : "")); 2285 2286 if (DO_BIC(BIC_IRQ)) { 2287 if (sums_need_wide_columns) 2288 outp += sprintf(outp, "%s IRQ", (printed++ ? delim : "")); 2289 else 2290 outp += sprintf(outp, "%sIRQ", (printed++ ? delim : "")); 2291 } 2292 2293 if (DO_BIC(BIC_SMI)) 2294 outp += sprintf(outp, "%sSMI", (printed++ ? delim : "")); 2295 2296 for (mp = sys.tp; mp; mp = mp->next) { 2297 2298 if (mp->format == FORMAT_RAW) { 2299 if (mp->width == 64) 2300 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name); 2301 else 2302 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name); 2303 } else { 2304 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2305 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name); 2306 else 2307 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name); 2308 } 2309 } 2310 2311 for (pp = sys.perf_tp; pp; pp = pp->next) { 2312 2313 if (pp->format == FORMAT_RAW) { 2314 if (pp->width == 64) 2315 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); 2316 else 2317 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); 2318 } else { 2319 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2320 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); 2321 else 2322 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); 2323 } 2324 } 2325 2326 ppmt = sys.pmt_tp; 2327 while (ppmt) { 2328 switch (ppmt->type) { 2329 case PMT_TYPE_RAW: 2330 if (pmt_counter_get_width(ppmt) <= 32) 2331 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); 2332 else 2333 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); 2334 2335 break; 2336 2337 case PMT_TYPE_XTAL_TIME: 2338 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); 2339 break; 2340 } 2341 2342 ppmt = ppmt->next; 2343 } 2344 2345 if (DO_BIC(BIC_CPU_c1)) 2346 outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : "")); 2347 if (DO_BIC(BIC_CPU_c3)) 2348 outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : "")); 2349 if (DO_BIC(BIC_CPU_c6)) 2350 outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : "")); 2351 if (DO_BIC(BIC_CPU_c7)) 2352 outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : "")); 2353 2354 if (DO_BIC(BIC_Mod_c6)) 2355 outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : "")); 2356 2357 if (DO_BIC(BIC_CoreTmp)) 2358 outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : "")); 2359 2360 if (DO_BIC(BIC_CORE_THROT_CNT)) 2361 outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : "")); 2362 2363 if (platform->rapl_msrs && !rapl_joules) { 2364 if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl) 2365 outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : "")); 2366 } else if (platform->rapl_msrs && rapl_joules) { 2367 if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl) 2368 outp += sprintf(outp, "%sCor_J", (printed++ ? delim : "")); 2369 } 2370 2371 for (mp = sys.cp; mp; mp = mp->next) { 2372 if (mp->format == FORMAT_RAW) { 2373 if (mp->width == 64) 2374 outp += sprintf(outp, "%s%18.18s", delim, mp->name); 2375 else 2376 outp += sprintf(outp, "%s%10.10s", delim, mp->name); 2377 } else { 2378 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2379 outp += sprintf(outp, "%s%8s", delim, mp->name); 2380 else 2381 outp += sprintf(outp, "%s%s", delim, mp->name); 2382 } 2383 } 2384 2385 for (pp = sys.perf_cp; pp; pp = pp->next) { 2386 2387 if (pp->format == FORMAT_RAW) { 2388 if (pp->width == 64) 2389 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); 2390 else 2391 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); 2392 } else { 2393 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2394 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); 2395 else 2396 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); 2397 } 2398 } 2399 2400 ppmt = sys.pmt_cp; 2401 while (ppmt) { 2402 switch (ppmt->type) { 2403 case PMT_TYPE_RAW: 2404 if (pmt_counter_get_width(ppmt) <= 32) 2405 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); 2406 else 2407 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); 2408 2409 break; 2410 2411 case PMT_TYPE_XTAL_TIME: 2412 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); 2413 break; 2414 } 2415 2416 ppmt = ppmt->next; 2417 } 2418 2419 if (DO_BIC(BIC_PkgTmp)) 2420 outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : "")); 2421 2422 if (DO_BIC(BIC_GFX_rc6)) 2423 outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : "")); 2424 2425 if (DO_BIC(BIC_GFXMHz)) 2426 outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : "")); 2427 2428 if (DO_BIC(BIC_GFXACTMHz)) 2429 outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : "")); 2430 2431 if (DO_BIC(BIC_SAM_mc6)) 2432 outp += sprintf(outp, "%sSAM%%mc6", (printed++ ? delim : "")); 2433 2434 if (DO_BIC(BIC_SAMMHz)) 2435 outp += sprintf(outp, "%sSAMMHz", (printed++ ? delim : "")); 2436 2437 if (DO_BIC(BIC_SAMACTMHz)) 2438 outp += sprintf(outp, "%sSAMAMHz", (printed++ ? delim : "")); 2439 2440 if (DO_BIC(BIC_Totl_c0)) 2441 outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : "")); 2442 if (DO_BIC(BIC_Any_c0)) 2443 outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : "")); 2444 if (DO_BIC(BIC_GFX_c0)) 2445 outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : "")); 2446 if (DO_BIC(BIC_CPUGFX)) 2447 outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : "")); 2448 2449 if (DO_BIC(BIC_Pkgpc2)) 2450 outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : "")); 2451 if (DO_BIC(BIC_Pkgpc3)) 2452 outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : "")); 2453 if (DO_BIC(BIC_Pkgpc6)) 2454 outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : "")); 2455 if (DO_BIC(BIC_Pkgpc7)) 2456 outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : "")); 2457 if (DO_BIC(BIC_Pkgpc8)) 2458 outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : "")); 2459 if (DO_BIC(BIC_Pkgpc9)) 2460 outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : "")); 2461 if (DO_BIC(BIC_Pkgpc10)) 2462 outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : "")); 2463 if (DO_BIC(BIC_Diec6)) 2464 outp += sprintf(outp, "%sDie%%c6", (printed++ ? delim : "")); 2465 if (DO_BIC(BIC_CPU_LPI)) 2466 outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : "")); 2467 if (DO_BIC(BIC_SYS_LPI)) 2468 outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : "")); 2469 2470 if (platform->rapl_msrs && !rapl_joules) { 2471 if (DO_BIC(BIC_PkgWatt)) 2472 outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : "")); 2473 if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl) 2474 outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : "")); 2475 if (DO_BIC(BIC_GFXWatt)) 2476 outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : "")); 2477 if (DO_BIC(BIC_RAMWatt)) 2478 outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : "")); 2479 if (DO_BIC(BIC_PKG__)) 2480 outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : "")); 2481 if (DO_BIC(BIC_RAM__)) 2482 outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : "")); 2483 } else if (platform->rapl_msrs && rapl_joules) { 2484 if (DO_BIC(BIC_Pkg_J)) 2485 outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : "")); 2486 if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl) 2487 outp += sprintf(outp, "%sCor_J", (printed++ ? delim : "")); 2488 if (DO_BIC(BIC_GFX_J)) 2489 outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : "")); 2490 if (DO_BIC(BIC_RAM_J)) 2491 outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : "")); 2492 if (DO_BIC(BIC_PKG__)) 2493 outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : "")); 2494 if (DO_BIC(BIC_RAM__)) 2495 outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : "")); 2496 } 2497 if (DO_BIC(BIC_UNCORE_MHZ)) 2498 outp += sprintf(outp, "%sUncMHz", (printed++ ? delim : "")); 2499 2500 for (mp = sys.pp; mp; mp = mp->next) { 2501 if (mp->format == FORMAT_RAW) { 2502 if (mp->width == 64) 2503 outp += sprintf(outp, "%s%18.18s", delim, mp->name); 2504 else if (mp->width == 32) 2505 outp += sprintf(outp, "%s%10.10s", delim, mp->name); 2506 else 2507 outp += sprintf(outp, "%s%7.7s", delim, mp->name); 2508 } else { 2509 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2510 outp += sprintf(outp, "%s%8s", delim, mp->name); 2511 else 2512 outp += sprintf(outp, "%s%7.7s", delim, mp->name); 2513 } 2514 } 2515 2516 for (pp = sys.perf_pp; pp; pp = pp->next) { 2517 2518 if (pp->format == FORMAT_RAW) { 2519 if (pp->width == 64) 2520 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); 2521 else 2522 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); 2523 } else { 2524 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2525 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); 2526 else 2527 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); 2528 } 2529 } 2530 2531 ppmt = sys.pmt_pp; 2532 while (ppmt) { 2533 switch (ppmt->type) { 2534 case PMT_TYPE_RAW: 2535 if (pmt_counter_get_width(ppmt) <= 32) 2536 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); 2537 else 2538 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); 2539 2540 break; 2541 2542 case PMT_TYPE_XTAL_TIME: 2543 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); 2544 break; 2545 } 2546 2547 ppmt = ppmt->next; 2548 } 2549 2550 if (DO_BIC(BIC_SysWatt)) 2551 outp += sprintf(outp, "%sSysWatt", (printed++ ? delim : "")); 2552 if (DO_BIC(BIC_Sys_J)) 2553 outp += sprintf(outp, "%sSys_J", (printed++ ? delim : "")); 2554 2555 outp += sprintf(outp, "\n"); 2556 } 2557 2558 int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 2559 { 2560 int i; 2561 struct msr_counter *mp; 2562 struct platform_counters *pplat_cnt = p == package_odd ? &platform_counters_odd : &platform_counters_even; 2563 2564 outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p); 2565 2566 if (t) { 2567 outp += sprintf(outp, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags); 2568 outp += sprintf(outp, "TSC: %016llX\n", t->tsc); 2569 outp += sprintf(outp, "aperf: %016llX\n", t->aperf); 2570 outp += sprintf(outp, "mperf: %016llX\n", t->mperf); 2571 outp += sprintf(outp, "c1: %016llX\n", t->c1); 2572 2573 if (DO_BIC(BIC_IPC)) 2574 outp += sprintf(outp, "IPC: %lld\n", t->instr_count); 2575 2576 if (DO_BIC(BIC_IRQ)) 2577 outp += sprintf(outp, "IRQ: %lld\n", t->irq_count); 2578 if (DO_BIC(BIC_SMI)) 2579 outp += sprintf(outp, "SMI: %d\n", t->smi_count); 2580 2581 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 2582 outp += 2583 sprintf(outp, "tADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, 2584 t->counter[i], mp->sp->path); 2585 } 2586 } 2587 2588 if (c && is_cpu_first_thread_in_core(t, c, p)) { 2589 outp += sprintf(outp, "core: %d\n", c->core_id); 2590 outp += sprintf(outp, "c3: %016llX\n", c->c3); 2591 outp += sprintf(outp, "c6: %016llX\n", c->c6); 2592 outp += sprintf(outp, "c7: %016llX\n", c->c7); 2593 outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c); 2594 outp += sprintf(outp, "cpu_throt_count: %016llX\n", c->core_throt_cnt); 2595 2596 const unsigned long long energy_value = c->core_energy.raw_value * c->core_energy.scale; 2597 const double energy_scale = c->core_energy.scale; 2598 2599 if (c->core_energy.unit == RAPL_UNIT_JOULES) 2600 outp += sprintf(outp, "Joules: %0llX (scale: %lf)\n", energy_value, energy_scale); 2601 2602 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 2603 outp += 2604 sprintf(outp, "cADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, 2605 c->counter[i], mp->sp->path); 2606 } 2607 outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us); 2608 } 2609 2610 if (p && is_cpu_first_core_in_package(t, c, p)) { 2611 outp += sprintf(outp, "package: %d\n", p->package_id); 2612 2613 outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0); 2614 outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0); 2615 outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0); 2616 outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0); 2617 2618 outp += sprintf(outp, "pc2: %016llX\n", p->pc2); 2619 if (DO_BIC(BIC_Pkgpc3)) 2620 outp += sprintf(outp, "pc3: %016llX\n", p->pc3); 2621 if (DO_BIC(BIC_Pkgpc6)) 2622 outp += sprintf(outp, "pc6: %016llX\n", p->pc6); 2623 if (DO_BIC(BIC_Pkgpc7)) 2624 outp += sprintf(outp, "pc7: %016llX\n", p->pc7); 2625 outp += sprintf(outp, "pc8: %016llX\n", p->pc8); 2626 outp += sprintf(outp, "pc9: %016llX\n", p->pc9); 2627 outp += sprintf(outp, "pc10: %016llX\n", p->pc10); 2628 outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi); 2629 outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi); 2630 outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg.raw_value); 2631 outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores.raw_value); 2632 outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx.raw_value); 2633 outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram.raw_value); 2634 outp += sprintf(outp, "Joules PSYS: %0llX\n", pplat_cnt->energy_psys.raw_value); 2635 outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status.raw_value); 2636 outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status.raw_value); 2637 outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c); 2638 2639 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 2640 outp += 2641 sprintf(outp, "pADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, 2642 p->counter[i], mp->sp->path); 2643 } 2644 } 2645 2646 outp += sprintf(outp, "\n"); 2647 2648 return 0; 2649 } 2650 2651 double rapl_counter_get_value(const struct rapl_counter *c, enum rapl_unit desired_unit, double interval) 2652 { 2653 assert(desired_unit != RAPL_UNIT_INVALID); 2654 2655 /* 2656 * For now we don't expect anything other than joules, 2657 * so just simplify the logic. 2658 */ 2659 assert(c->unit == RAPL_UNIT_JOULES); 2660 2661 const double scaled = c->raw_value * c->scale; 2662 2663 if (desired_unit == RAPL_UNIT_WATTS) 2664 return scaled / interval; 2665 return scaled; 2666 } 2667 2668 /* 2669 * column formatting convention & formats 2670 */ 2671 int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 2672 { 2673 static int count; 2674 2675 struct platform_counters *pplat_cnt = NULL; 2676 double interval_float, tsc; 2677 char *fmt8; 2678 int i; 2679 struct msr_counter *mp; 2680 struct perf_counter_info *pp; 2681 struct pmt_counter *ppmt; 2682 char *delim = "\t"; 2683 int printed = 0; 2684 2685 if (t == &average.threads) { 2686 pplat_cnt = count & 1 ? &platform_counters_odd : &platform_counters_even; 2687 ++count; 2688 } 2689 2690 /* if showing only 1st thread in core and this isn't one, bail out */ 2691 if (show_core_only && !is_cpu_first_thread_in_core(t, c, p)) 2692 return 0; 2693 2694 /* if showing only 1st thread in pkg and this isn't one, bail out */ 2695 if (show_pkg_only && !is_cpu_first_core_in_package(t, c, p)) 2696 return 0; 2697 2698 /*if not summary line and --cpu is used */ 2699 if ((t != &average.threads) && (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset))) 2700 return 0; 2701 2702 if (DO_BIC(BIC_USEC)) { 2703 /* on each row, print how many usec each timestamp took to gather */ 2704 struct timeval tv; 2705 2706 timersub(&t->tv_end, &t->tv_begin, &tv); 2707 outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec); 2708 } 2709 2710 /* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */ 2711 if (DO_BIC(BIC_TOD)) 2712 outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec); 2713 2714 interval_float = t->tv_delta.tv_sec + t->tv_delta.tv_usec / 1000000.0; 2715 2716 tsc = t->tsc * tsc_tweak; 2717 2718 /* topo columns, print blanks on 1st (average) line */ 2719 if (t == &average.threads) { 2720 if (DO_BIC(BIC_Package)) 2721 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2722 if (DO_BIC(BIC_Die)) 2723 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2724 if (DO_BIC(BIC_Node)) 2725 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2726 if (DO_BIC(BIC_Core)) 2727 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2728 if (DO_BIC(BIC_CPU)) 2729 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2730 if (DO_BIC(BIC_APIC)) 2731 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2732 if (DO_BIC(BIC_X2APIC)) 2733 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2734 } else { 2735 if (DO_BIC(BIC_Package)) { 2736 if (p) 2737 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id); 2738 else 2739 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2740 } 2741 if (DO_BIC(BIC_Die)) { 2742 if (c) 2743 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].die_id); 2744 else 2745 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2746 } 2747 if (DO_BIC(BIC_Node)) { 2748 if (t) 2749 outp += sprintf(outp, "%s%d", 2750 (printed++ ? delim : ""), cpus[t->cpu_id].physical_node_id); 2751 else 2752 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2753 } 2754 if (DO_BIC(BIC_Core)) { 2755 if (c) 2756 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id); 2757 else 2758 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2759 } 2760 if (DO_BIC(BIC_CPU)) 2761 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id); 2762 if (DO_BIC(BIC_APIC)) 2763 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id); 2764 if (DO_BIC(BIC_X2APIC)) 2765 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id); 2766 } 2767 2768 if (DO_BIC(BIC_Avg_MHz)) 2769 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 / units * t->aperf / interval_float); 2770 2771 if (DO_BIC(BIC_Busy)) 2772 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf / tsc); 2773 2774 if (DO_BIC(BIC_Bzy_MHz)) { 2775 if (has_base_hz) 2776 outp += 2777 sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf); 2778 else 2779 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 2780 tsc / units * t->aperf / t->mperf / interval_float); 2781 } 2782 2783 if (DO_BIC(BIC_TSC_MHz)) 2784 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc / units / interval_float); 2785 2786 if (DO_BIC(BIC_IPC)) 2787 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 1.0 * t->instr_count / t->aperf); 2788 2789 /* IRQ */ 2790 if (DO_BIC(BIC_IRQ)) { 2791 if (sums_need_wide_columns) 2792 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count); 2793 else 2794 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count); 2795 } 2796 2797 /* SMI */ 2798 if (DO_BIC(BIC_SMI)) 2799 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count); 2800 2801 /* Added counters */ 2802 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 2803 if (mp->format == FORMAT_RAW) { 2804 if (mp->width == 32) 2805 outp += 2806 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)t->counter[i]); 2807 else 2808 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]); 2809 } else if (mp->format == FORMAT_DELTA) { 2810 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2811 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]); 2812 else 2813 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]); 2814 } else if (mp->format == FORMAT_PERCENT) { 2815 if (mp->type == COUNTER_USEC) 2816 outp += 2817 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 2818 t->counter[i] / interval_float / 10000); 2819 else 2820 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i] / tsc); 2821 } 2822 } 2823 2824 /* Added perf counters */ 2825 for (i = 0, pp = sys.perf_tp; pp; ++i, pp = pp->next) { 2826 if (pp->format == FORMAT_RAW) { 2827 if (pp->width == 32) 2828 outp += 2829 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), 2830 (unsigned int)t->perf_counter[i]); 2831 else 2832 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->perf_counter[i]); 2833 } else if (pp->format == FORMAT_DELTA) { 2834 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2835 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->perf_counter[i]); 2836 else 2837 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->perf_counter[i]); 2838 } else if (pp->format == FORMAT_PERCENT) { 2839 if (pp->type == COUNTER_USEC) 2840 outp += 2841 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 2842 t->perf_counter[i] / interval_float / 10000); 2843 else 2844 outp += 2845 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->perf_counter[i] / tsc); 2846 } 2847 } 2848 2849 for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { 2850 const unsigned long value_raw = t->pmt_counter[i]; 2851 const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; 2852 switch (ppmt->type) { 2853 case PMT_TYPE_RAW: 2854 if (pmt_counter_get_width(ppmt) <= 32) 2855 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), 2856 (unsigned int)t->pmt_counter[i]); 2857 else 2858 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->pmt_counter[i]); 2859 2860 break; 2861 2862 case PMT_TYPE_XTAL_TIME: 2863 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); 2864 break; 2865 } 2866 } 2867 2868 /* C1 */ 2869 if (DO_BIC(BIC_CPU_c1)) 2870 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc); 2871 2872 /* print per-core data only for 1st thread in core */ 2873 if (!is_cpu_first_thread_in_core(t, c, p)) 2874 goto done; 2875 2876 if (DO_BIC(BIC_CPU_c3)) 2877 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3 / tsc); 2878 if (DO_BIC(BIC_CPU_c6)) 2879 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6 / tsc); 2880 if (DO_BIC(BIC_CPU_c7)) 2881 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7 / tsc); 2882 2883 /* Mod%c6 */ 2884 if (DO_BIC(BIC_Mod_c6)) 2885 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc); 2886 2887 if (DO_BIC(BIC_CoreTmp)) 2888 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c); 2889 2890 /* Core throttle count */ 2891 if (DO_BIC(BIC_CORE_THROT_CNT)) 2892 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->core_throt_cnt); 2893 2894 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 2895 if (mp->format == FORMAT_RAW) { 2896 if (mp->width == 32) 2897 outp += 2898 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)c->counter[i]); 2899 else 2900 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]); 2901 } else if (mp->format == FORMAT_DELTA) { 2902 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2903 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]); 2904 else 2905 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]); 2906 } else if (mp->format == FORMAT_PERCENT) { 2907 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i] / tsc); 2908 } 2909 } 2910 2911 for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { 2912 if (pp->format == FORMAT_RAW) { 2913 if (pp->width == 32) 2914 outp += 2915 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), 2916 (unsigned int)c->perf_counter[i]); 2917 else 2918 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->perf_counter[i]); 2919 } else if (pp->format == FORMAT_DELTA) { 2920 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2921 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->perf_counter[i]); 2922 else 2923 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->perf_counter[i]); 2924 } else if (pp->format == FORMAT_PERCENT) { 2925 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->perf_counter[i] / tsc); 2926 } 2927 } 2928 2929 for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { 2930 const unsigned long value_raw = c->pmt_counter[i]; 2931 const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; 2932 switch (ppmt->type) { 2933 case PMT_TYPE_RAW: 2934 if (pmt_counter_get_width(ppmt) <= 32) 2935 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), 2936 (unsigned int)c->pmt_counter[i]); 2937 else 2938 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->pmt_counter[i]); 2939 2940 break; 2941 2942 case PMT_TYPE_XTAL_TIME: 2943 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); 2944 break; 2945 } 2946 } 2947 2948 fmt8 = "%s%.2f"; 2949 2950 if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl) 2951 outp += 2952 sprintf(outp, fmt8, (printed++ ? delim : ""), 2953 rapl_counter_get_value(&c->core_energy, RAPL_UNIT_WATTS, interval_float)); 2954 if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl) 2955 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 2956 rapl_counter_get_value(&c->core_energy, RAPL_UNIT_JOULES, interval_float)); 2957 2958 /* print per-package data only for 1st core in package */ 2959 if (!is_cpu_first_core_in_package(t, c, p)) 2960 goto done; 2961 2962 /* PkgTmp */ 2963 if (DO_BIC(BIC_PkgTmp)) 2964 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c); 2965 2966 /* GFXrc6 */ 2967 if (DO_BIC(BIC_GFX_rc6)) { 2968 if (p->gfx_rc6_ms == -1) { /* detect GFX counter reset */ 2969 outp += sprintf(outp, "%s**.**", (printed++ ? delim : "")); 2970 } else { 2971 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 2972 p->gfx_rc6_ms / 10.0 / interval_float); 2973 } 2974 } 2975 2976 /* GFXMHz */ 2977 if (DO_BIC(BIC_GFXMHz)) 2978 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz); 2979 2980 /* GFXACTMHz */ 2981 if (DO_BIC(BIC_GFXACTMHz)) 2982 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz); 2983 2984 /* SAMmc6 */ 2985 if (DO_BIC(BIC_SAM_mc6)) { 2986 if (p->sam_mc6_ms == -1) { /* detect GFX counter reset */ 2987 outp += sprintf(outp, "%s**.**", (printed++ ? delim : "")); 2988 } else { 2989 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 2990 p->sam_mc6_ms / 10.0 / interval_float); 2991 } 2992 } 2993 2994 /* SAMMHz */ 2995 if (DO_BIC(BIC_SAMMHz)) 2996 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_mhz); 2997 2998 /* SAMACTMHz */ 2999 if (DO_BIC(BIC_SAMACTMHz)) 3000 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_act_mhz); 3001 3002 /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ 3003 if (DO_BIC(BIC_Totl_c0)) 3004 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0 / tsc); 3005 if (DO_BIC(BIC_Any_c0)) 3006 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0 / tsc); 3007 if (DO_BIC(BIC_GFX_c0)) 3008 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0 / tsc); 3009 if (DO_BIC(BIC_CPUGFX)) 3010 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0 / tsc); 3011 3012 if (DO_BIC(BIC_Pkgpc2)) 3013 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2 / tsc); 3014 if (DO_BIC(BIC_Pkgpc3)) 3015 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3 / tsc); 3016 if (DO_BIC(BIC_Pkgpc6)) 3017 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6 / tsc); 3018 if (DO_BIC(BIC_Pkgpc7)) 3019 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7 / tsc); 3020 if (DO_BIC(BIC_Pkgpc8)) 3021 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8 / tsc); 3022 if (DO_BIC(BIC_Pkgpc9)) 3023 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9 / tsc); 3024 if (DO_BIC(BIC_Pkgpc10)) 3025 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc); 3026 3027 if (DO_BIC(BIC_Diec6)) 3028 outp += 3029 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->die_c6 / crystal_hz / interval_float); 3030 3031 if (DO_BIC(BIC_CPU_LPI)) { 3032 if (p->cpu_lpi >= 0) 3033 outp += 3034 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 3035 100.0 * p->cpu_lpi / 1000000.0 / interval_float); 3036 else 3037 outp += sprintf(outp, "%s(neg)", (printed++ ? delim : "")); 3038 } 3039 if (DO_BIC(BIC_SYS_LPI)) { 3040 if (p->sys_lpi >= 0) 3041 outp += 3042 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 3043 100.0 * p->sys_lpi / 1000000.0 / interval_float); 3044 else 3045 outp += sprintf(outp, "%s(neg)", (printed++ ? delim : "")); 3046 } 3047 3048 if (DO_BIC(BIC_PkgWatt)) 3049 outp += 3050 sprintf(outp, fmt8, (printed++ ? delim : ""), 3051 rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_WATTS, interval_float)); 3052 if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl) 3053 outp += 3054 sprintf(outp, fmt8, (printed++ ? delim : ""), 3055 rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_WATTS, interval_float)); 3056 if (DO_BIC(BIC_GFXWatt)) 3057 outp += 3058 sprintf(outp, fmt8, (printed++ ? delim : ""), 3059 rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_WATTS, interval_float)); 3060 if (DO_BIC(BIC_RAMWatt)) 3061 outp += 3062 sprintf(outp, fmt8, (printed++ ? delim : ""), 3063 rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_WATTS, interval_float)); 3064 if (DO_BIC(BIC_Pkg_J)) 3065 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 3066 rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_JOULES, interval_float)); 3067 if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl) 3068 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 3069 rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_JOULES, interval_float)); 3070 if (DO_BIC(BIC_GFX_J)) 3071 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 3072 rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_JOULES, interval_float)); 3073 if (DO_BIC(BIC_RAM_J)) 3074 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 3075 rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_JOULES, interval_float)); 3076 if (DO_BIC(BIC_PKG__)) 3077 outp += 3078 sprintf(outp, fmt8, (printed++ ? delim : ""), 3079 rapl_counter_get_value(&p->rapl_pkg_perf_status, RAPL_UNIT_WATTS, interval_float)); 3080 if (DO_BIC(BIC_RAM__)) 3081 outp += 3082 sprintf(outp, fmt8, (printed++ ? delim : ""), 3083 rapl_counter_get_value(&p->rapl_dram_perf_status, RAPL_UNIT_WATTS, interval_float)); 3084 /* UncMHz */ 3085 if (DO_BIC(BIC_UNCORE_MHZ)) 3086 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->uncore_mhz); 3087 3088 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 3089 if (mp->format == FORMAT_RAW) { 3090 if (mp->width == 32) 3091 outp += 3092 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)p->counter[i]); 3093 else 3094 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]); 3095 } else if (mp->format == FORMAT_DELTA) { 3096 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 3097 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]); 3098 else 3099 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]); 3100 } else if (mp->format == FORMAT_PERCENT) { 3101 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i] / tsc); 3102 } else if (mp->type == COUNTER_K2M) 3103 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->counter[i] / 1000); 3104 } 3105 3106 for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { 3107 if (pp->format == FORMAT_RAW) { 3108 if (pp->width == 32) 3109 outp += 3110 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), 3111 (unsigned int)p->perf_counter[i]); 3112 else 3113 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->perf_counter[i]); 3114 } else if (pp->format == FORMAT_DELTA) { 3115 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) 3116 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->perf_counter[i]); 3117 else 3118 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->perf_counter[i]); 3119 } else if (pp->format == FORMAT_PERCENT) { 3120 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->perf_counter[i] / tsc); 3121 } else if (pp->type == COUNTER_K2M) { 3122 outp += 3123 sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->perf_counter[i] / 1000); 3124 } 3125 } 3126 3127 for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { 3128 const unsigned long value_raw = p->pmt_counter[i]; 3129 const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; 3130 switch (ppmt->type) { 3131 case PMT_TYPE_RAW: 3132 if (pmt_counter_get_width(ppmt) <= 32) 3133 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), 3134 (unsigned int)p->pmt_counter[i]); 3135 else 3136 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->pmt_counter[i]); 3137 3138 break; 3139 3140 case PMT_TYPE_XTAL_TIME: 3141 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); 3142 break; 3143 } 3144 } 3145 3146 if (DO_BIC(BIC_SysWatt) && (t == &average.threads)) 3147 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 3148 rapl_counter_get_value(&pplat_cnt->energy_psys, RAPL_UNIT_WATTS, interval_float)); 3149 if (DO_BIC(BIC_Sys_J) && (t == &average.threads)) 3150 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 3151 rapl_counter_get_value(&pplat_cnt->energy_psys, RAPL_UNIT_JOULES, interval_float)); 3152 3153 done: 3154 if (*(outp - 1) != '\n') 3155 outp += sprintf(outp, "\n"); 3156 3157 return 0; 3158 } 3159 3160 void flush_output_stdout(void) 3161 { 3162 FILE *filep; 3163 3164 if (outf == stderr) 3165 filep = stdout; 3166 else 3167 filep = outf; 3168 3169 fputs(output_buffer, filep); 3170 fflush(filep); 3171 3172 outp = output_buffer; 3173 } 3174 3175 void flush_output_stderr(void) 3176 { 3177 fputs(output_buffer, outf); 3178 fflush(outf); 3179 outp = output_buffer; 3180 } 3181 3182 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 3183 { 3184 static int count; 3185 3186 if ((!count || (header_iterations && !(count % header_iterations))) || !summary_only) 3187 print_header("\t"); 3188 3189 format_counters(&average.threads, &average.cores, &average.packages); 3190 3191 count++; 3192 3193 if (summary_only) 3194 return; 3195 3196 for_all_cpus(format_counters, t, c, p); 3197 } 3198 3199 #define DELTA_WRAP32(new, old) \ 3200 old = ((((unsigned long long)new << 32) - ((unsigned long long)old << 32)) >> 32); 3201 3202 int delta_package(struct pkg_data *new, struct pkg_data *old) 3203 { 3204 int i; 3205 struct msr_counter *mp; 3206 struct perf_counter_info *pp; 3207 struct pmt_counter *ppmt; 3208 3209 if (DO_BIC(BIC_Totl_c0)) 3210 old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0; 3211 if (DO_BIC(BIC_Any_c0)) 3212 old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0; 3213 if (DO_BIC(BIC_GFX_c0)) 3214 old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0; 3215 if (DO_BIC(BIC_CPUGFX)) 3216 old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0; 3217 3218 old->pc2 = new->pc2 - old->pc2; 3219 if (DO_BIC(BIC_Pkgpc3)) 3220 old->pc3 = new->pc3 - old->pc3; 3221 if (DO_BIC(BIC_Pkgpc6)) 3222 old->pc6 = new->pc6 - old->pc6; 3223 if (DO_BIC(BIC_Pkgpc7)) 3224 old->pc7 = new->pc7 - old->pc7; 3225 old->pc8 = new->pc8 - old->pc8; 3226 old->pc9 = new->pc9 - old->pc9; 3227 old->pc10 = new->pc10 - old->pc10; 3228 old->die_c6 = new->die_c6 - old->die_c6; 3229 old->cpu_lpi = new->cpu_lpi - old->cpu_lpi; 3230 old->sys_lpi = new->sys_lpi - old->sys_lpi; 3231 old->pkg_temp_c = new->pkg_temp_c; 3232 3233 /* flag an error when rc6 counter resets/wraps */ 3234 if (old->gfx_rc6_ms > new->gfx_rc6_ms) 3235 old->gfx_rc6_ms = -1; 3236 else 3237 old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms; 3238 3239 old->uncore_mhz = new->uncore_mhz; 3240 old->gfx_mhz = new->gfx_mhz; 3241 old->gfx_act_mhz = new->gfx_act_mhz; 3242 3243 /* flag an error when mc6 counter resets/wraps */ 3244 if (old->sam_mc6_ms > new->sam_mc6_ms) 3245 old->sam_mc6_ms = -1; 3246 else 3247 old->sam_mc6_ms = new->sam_mc6_ms - old->sam_mc6_ms; 3248 3249 old->sam_mhz = new->sam_mhz; 3250 old->sam_act_mhz = new->sam_act_mhz; 3251 3252 old->energy_pkg.raw_value = new->energy_pkg.raw_value - old->energy_pkg.raw_value; 3253 old->energy_cores.raw_value = new->energy_cores.raw_value - old->energy_cores.raw_value; 3254 old->energy_gfx.raw_value = new->energy_gfx.raw_value - old->energy_gfx.raw_value; 3255 old->energy_dram.raw_value = new->energy_dram.raw_value - old->energy_dram.raw_value; 3256 old->rapl_pkg_perf_status.raw_value = new->rapl_pkg_perf_status.raw_value - old->rapl_pkg_perf_status.raw_value; 3257 old->rapl_dram_perf_status.raw_value = 3258 new->rapl_dram_perf_status.raw_value - old->rapl_dram_perf_status.raw_value; 3259 3260 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 3261 if (mp->format == FORMAT_RAW) 3262 old->counter[i] = new->counter[i]; 3263 else if (mp->format == FORMAT_AVERAGE) 3264 old->counter[i] = new->counter[i]; 3265 else 3266 old->counter[i] = new->counter[i] - old->counter[i]; 3267 } 3268 3269 for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { 3270 if (pp->format == FORMAT_RAW) 3271 old->perf_counter[i] = new->perf_counter[i]; 3272 else if (pp->format == FORMAT_AVERAGE) 3273 old->perf_counter[i] = new->perf_counter[i]; 3274 else 3275 old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i]; 3276 } 3277 3278 for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { 3279 if (ppmt->format == FORMAT_RAW) 3280 old->pmt_counter[i] = new->pmt_counter[i]; 3281 else 3282 old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i]; 3283 } 3284 3285 return 0; 3286 } 3287 3288 void delta_core(struct core_data *new, struct core_data *old) 3289 { 3290 int i; 3291 struct msr_counter *mp; 3292 struct perf_counter_info *pp; 3293 struct pmt_counter *ppmt; 3294 3295 old->c3 = new->c3 - old->c3; 3296 old->c6 = new->c6 - old->c6; 3297 old->c7 = new->c7 - old->c7; 3298 old->core_temp_c = new->core_temp_c; 3299 old->core_throt_cnt = new->core_throt_cnt; 3300 old->mc6_us = new->mc6_us - old->mc6_us; 3301 3302 DELTA_WRAP32(new->core_energy.raw_value, old->core_energy.raw_value); 3303 3304 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 3305 if (mp->format == FORMAT_RAW) 3306 old->counter[i] = new->counter[i]; 3307 else 3308 old->counter[i] = new->counter[i] - old->counter[i]; 3309 } 3310 3311 for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { 3312 if (pp->format == FORMAT_RAW) 3313 old->perf_counter[i] = new->perf_counter[i]; 3314 else 3315 old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i]; 3316 } 3317 3318 for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { 3319 if (ppmt->format == FORMAT_RAW) 3320 old->pmt_counter[i] = new->pmt_counter[i]; 3321 else 3322 old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i]; 3323 } 3324 } 3325 3326 int soft_c1_residency_display(int bic) 3327 { 3328 if (!DO_BIC(BIC_CPU_c1) || platform->has_msr_core_c1_res) 3329 return 0; 3330 3331 return DO_BIC_READ(bic); 3332 } 3333 3334 /* 3335 * old = new - old 3336 */ 3337 int delta_thread(struct thread_data *new, struct thread_data *old, struct core_data *core_delta) 3338 { 3339 int i; 3340 struct msr_counter *mp; 3341 struct perf_counter_info *pp; 3342 struct pmt_counter *ppmt; 3343 3344 /* we run cpuid just the 1st time, copy the results */ 3345 if (DO_BIC(BIC_APIC)) 3346 new->apic_id = old->apic_id; 3347 if (DO_BIC(BIC_X2APIC)) 3348 new->x2apic_id = old->x2apic_id; 3349 3350 /* 3351 * the timestamps from start of measurement interval are in "old" 3352 * the timestamp from end of measurement interval are in "new" 3353 * over-write old w/ new so we can print end of interval values 3354 */ 3355 3356 timersub(&new->tv_begin, &old->tv_begin, &old->tv_delta); 3357 old->tv_begin = new->tv_begin; 3358 old->tv_end = new->tv_end; 3359 3360 old->tsc = new->tsc - old->tsc; 3361 3362 /* check for TSC < 1 Mcycles over interval */ 3363 if (old->tsc < (1000 * 1000)) 3364 errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n" 3365 "You can disable all c-states by booting with \"idle=poll\"\n" 3366 "or just the deep ones with \"processor.max_cstate=1\""); 3367 3368 old->c1 = new->c1 - old->c1; 3369 3370 if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC) 3371 || soft_c1_residency_display(BIC_Avg_MHz)) { 3372 if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { 3373 old->aperf = new->aperf - old->aperf; 3374 old->mperf = new->mperf - old->mperf; 3375 } else { 3376 return -1; 3377 } 3378 } 3379 3380 if (platform->has_msr_core_c1_res) { 3381 /* 3382 * Some models have a dedicated C1 residency MSR, 3383 * which should be more accurate than the derivation below. 3384 */ 3385 } else { 3386 /* 3387 * As counter collection is not atomic, 3388 * it is possible for mperf's non-halted cycles + idle states 3389 * to exceed TSC's all cycles: show c1 = 0% in that case. 3390 */ 3391 if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak)) 3392 old->c1 = 0; 3393 else { 3394 /* normal case, derive c1 */ 3395 old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3 3396 - core_delta->c6 - core_delta->c7; 3397 } 3398 } 3399 3400 if (old->mperf == 0) { 3401 if (debug > 1) 3402 fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id); 3403 old->mperf = 1; /* divide by 0 protection */ 3404 } 3405 3406 if (DO_BIC(BIC_IPC)) 3407 old->instr_count = new->instr_count - old->instr_count; 3408 3409 if (DO_BIC(BIC_IRQ)) 3410 old->irq_count = new->irq_count - old->irq_count; 3411 3412 if (DO_BIC(BIC_SMI)) 3413 old->smi_count = new->smi_count - old->smi_count; 3414 3415 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 3416 if (mp->format == FORMAT_RAW) 3417 old->counter[i] = new->counter[i]; 3418 else 3419 old->counter[i] = new->counter[i] - old->counter[i]; 3420 } 3421 3422 for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) { 3423 if (pp->format == FORMAT_RAW) 3424 old->perf_counter[i] = new->perf_counter[i]; 3425 else 3426 old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i]; 3427 } 3428 3429 for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { 3430 if (ppmt->format == FORMAT_RAW) 3431 old->pmt_counter[i] = new->pmt_counter[i]; 3432 else 3433 old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i]; 3434 } 3435 3436 return 0; 3437 } 3438 3439 int delta_cpu(struct thread_data *t, struct core_data *c, 3440 struct pkg_data *p, struct thread_data *t2, struct core_data *c2, struct pkg_data *p2) 3441 { 3442 int retval = 0; 3443 3444 /* calculate core delta only for 1st thread in core */ 3445 if (is_cpu_first_thread_in_core(t, c, p)) 3446 delta_core(c, c2); 3447 3448 /* always calculate thread delta */ 3449 retval = delta_thread(t, t2, c2); /* c2 is core delta */ 3450 if (retval) 3451 return retval; 3452 3453 /* calculate package delta only for 1st core in package */ 3454 if (is_cpu_first_core_in_package(t, c, p)) 3455 retval = delta_package(p, p2); 3456 3457 return retval; 3458 } 3459 3460 void delta_platform(struct platform_counters *new, struct platform_counters *old) 3461 { 3462 old->energy_psys.raw_value = new->energy_psys.raw_value - old->energy_psys.raw_value; 3463 } 3464 3465 void rapl_counter_clear(struct rapl_counter *c) 3466 { 3467 c->raw_value = 0; 3468 c->scale = 0.0; 3469 c->unit = RAPL_UNIT_INVALID; 3470 } 3471 3472 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 3473 { 3474 int i; 3475 struct msr_counter *mp; 3476 3477 t->tv_begin.tv_sec = 0; 3478 t->tv_begin.tv_usec = 0; 3479 t->tv_end.tv_sec = 0; 3480 t->tv_end.tv_usec = 0; 3481 t->tv_delta.tv_sec = 0; 3482 t->tv_delta.tv_usec = 0; 3483 3484 t->tsc = 0; 3485 t->aperf = 0; 3486 t->mperf = 0; 3487 t->c1 = 0; 3488 3489 t->instr_count = 0; 3490 3491 t->irq_count = 0; 3492 t->smi_count = 0; 3493 3494 c->c3 = 0; 3495 c->c6 = 0; 3496 c->c7 = 0; 3497 c->mc6_us = 0; 3498 c->core_temp_c = 0; 3499 rapl_counter_clear(&c->core_energy); 3500 c->core_throt_cnt = 0; 3501 3502 p->pkg_wtd_core_c0 = 0; 3503 p->pkg_any_core_c0 = 0; 3504 p->pkg_any_gfxe_c0 = 0; 3505 p->pkg_both_core_gfxe_c0 = 0; 3506 3507 p->pc2 = 0; 3508 if (DO_BIC(BIC_Pkgpc3)) 3509 p->pc3 = 0; 3510 if (DO_BIC(BIC_Pkgpc6)) 3511 p->pc6 = 0; 3512 if (DO_BIC(BIC_Pkgpc7)) 3513 p->pc7 = 0; 3514 p->pc8 = 0; 3515 p->pc9 = 0; 3516 p->pc10 = 0; 3517 p->die_c6 = 0; 3518 p->cpu_lpi = 0; 3519 p->sys_lpi = 0; 3520 3521 rapl_counter_clear(&p->energy_pkg); 3522 rapl_counter_clear(&p->energy_dram); 3523 rapl_counter_clear(&p->energy_cores); 3524 rapl_counter_clear(&p->energy_gfx); 3525 rapl_counter_clear(&p->rapl_pkg_perf_status); 3526 rapl_counter_clear(&p->rapl_dram_perf_status); 3527 p->pkg_temp_c = 0; 3528 3529 p->gfx_rc6_ms = 0; 3530 p->uncore_mhz = 0; 3531 p->gfx_mhz = 0; 3532 p->gfx_act_mhz = 0; 3533 p->sam_mc6_ms = 0; 3534 p->sam_mhz = 0; 3535 p->sam_act_mhz = 0; 3536 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) 3537 t->counter[i] = 0; 3538 3539 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) 3540 c->counter[i] = 0; 3541 3542 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) 3543 p->counter[i] = 0; 3544 3545 memset(&t->perf_counter[0], 0, sizeof(t->perf_counter)); 3546 memset(&c->perf_counter[0], 0, sizeof(c->perf_counter)); 3547 memset(&p->perf_counter[0], 0, sizeof(p->perf_counter)); 3548 3549 memset(&t->pmt_counter[0], 0, ARRAY_SIZE(t->pmt_counter)); 3550 memset(&c->pmt_counter[0], 0, ARRAY_SIZE(c->pmt_counter)); 3551 memset(&p->pmt_counter[0], 0, ARRAY_SIZE(p->pmt_counter)); 3552 } 3553 3554 void rapl_counter_accumulate(struct rapl_counter *dst, const struct rapl_counter *src) 3555 { 3556 /* Copy unit and scale from src if dst is not initialized */ 3557 if (dst->unit == RAPL_UNIT_INVALID) { 3558 dst->unit = src->unit; 3559 dst->scale = src->scale; 3560 } 3561 3562 assert(dst->unit == src->unit); 3563 assert(dst->scale == src->scale); 3564 3565 dst->raw_value += src->raw_value; 3566 } 3567 3568 int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 3569 { 3570 int i; 3571 struct msr_counter *mp; 3572 struct perf_counter_info *pp; 3573 struct pmt_counter *ppmt; 3574 3575 /* copy un-changing apic_id's */ 3576 if (DO_BIC(BIC_APIC)) 3577 average.threads.apic_id = t->apic_id; 3578 if (DO_BIC(BIC_X2APIC)) 3579 average.threads.x2apic_id = t->x2apic_id; 3580 3581 /* remember first tv_begin */ 3582 if (average.threads.tv_begin.tv_sec == 0) 3583 average.threads.tv_begin = t->tv_begin; 3584 3585 /* remember last tv_end */ 3586 average.threads.tv_end = t->tv_end; 3587 3588 average.threads.tsc += t->tsc; 3589 average.threads.aperf += t->aperf; 3590 average.threads.mperf += t->mperf; 3591 average.threads.c1 += t->c1; 3592 3593 average.threads.instr_count += t->instr_count; 3594 3595 average.threads.irq_count += t->irq_count; 3596 average.threads.smi_count += t->smi_count; 3597 3598 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 3599 if (mp->format == FORMAT_RAW) 3600 continue; 3601 average.threads.counter[i] += t->counter[i]; 3602 } 3603 3604 for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) { 3605 if (pp->format == FORMAT_RAW) 3606 continue; 3607 average.threads.perf_counter[i] += t->perf_counter[i]; 3608 } 3609 3610 for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { 3611 average.threads.pmt_counter[i] += t->pmt_counter[i]; 3612 } 3613 3614 /* sum per-core values only for 1st thread in core */ 3615 if (!is_cpu_first_thread_in_core(t, c, p)) 3616 return 0; 3617 3618 average.cores.c3 += c->c3; 3619 average.cores.c6 += c->c6; 3620 average.cores.c7 += c->c7; 3621 average.cores.mc6_us += c->mc6_us; 3622 3623 average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c); 3624 average.cores.core_throt_cnt = MAX(average.cores.core_throt_cnt, c->core_throt_cnt); 3625 3626 rapl_counter_accumulate(&average.cores.core_energy, &c->core_energy); 3627 3628 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 3629 if (mp->format == FORMAT_RAW) 3630 continue; 3631 average.cores.counter[i] += c->counter[i]; 3632 } 3633 3634 for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { 3635 if (pp->format == FORMAT_RAW) 3636 continue; 3637 average.cores.perf_counter[i] += c->perf_counter[i]; 3638 } 3639 3640 for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { 3641 average.cores.pmt_counter[i] += c->pmt_counter[i]; 3642 } 3643 3644 /* sum per-pkg values only for 1st core in pkg */ 3645 if (!is_cpu_first_core_in_package(t, c, p)) 3646 return 0; 3647 3648 if (DO_BIC(BIC_Totl_c0)) 3649 average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0; 3650 if (DO_BIC(BIC_Any_c0)) 3651 average.packages.pkg_any_core_c0 += p->pkg_any_core_c0; 3652 if (DO_BIC(BIC_GFX_c0)) 3653 average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0; 3654 if (DO_BIC(BIC_CPUGFX)) 3655 average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0; 3656 3657 average.packages.pc2 += p->pc2; 3658 if (DO_BIC(BIC_Pkgpc3)) 3659 average.packages.pc3 += p->pc3; 3660 if (DO_BIC(BIC_Pkgpc6)) 3661 average.packages.pc6 += p->pc6; 3662 if (DO_BIC(BIC_Pkgpc7)) 3663 average.packages.pc7 += p->pc7; 3664 average.packages.pc8 += p->pc8; 3665 average.packages.pc9 += p->pc9; 3666 average.packages.pc10 += p->pc10; 3667 average.packages.die_c6 += p->die_c6; 3668 3669 average.packages.cpu_lpi = p->cpu_lpi; 3670 average.packages.sys_lpi = p->sys_lpi; 3671 3672 rapl_counter_accumulate(&average.packages.energy_pkg, &p->energy_pkg); 3673 rapl_counter_accumulate(&average.packages.energy_dram, &p->energy_dram); 3674 rapl_counter_accumulate(&average.packages.energy_cores, &p->energy_cores); 3675 rapl_counter_accumulate(&average.packages.energy_gfx, &p->energy_gfx); 3676 3677 average.packages.gfx_rc6_ms = p->gfx_rc6_ms; 3678 average.packages.uncore_mhz = p->uncore_mhz; 3679 average.packages.gfx_mhz = p->gfx_mhz; 3680 average.packages.gfx_act_mhz = p->gfx_act_mhz; 3681 average.packages.sam_mc6_ms = p->sam_mc6_ms; 3682 average.packages.sam_mhz = p->sam_mhz; 3683 average.packages.sam_act_mhz = p->sam_act_mhz; 3684 3685 average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c); 3686 3687 rapl_counter_accumulate(&average.packages.rapl_pkg_perf_status, &p->rapl_pkg_perf_status); 3688 rapl_counter_accumulate(&average.packages.rapl_dram_perf_status, &p->rapl_dram_perf_status); 3689 3690 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 3691 if ((mp->format == FORMAT_RAW) && (topo.num_packages == 0)) 3692 average.packages.counter[i] = p->counter[i]; 3693 else 3694 average.packages.counter[i] += p->counter[i]; 3695 } 3696 3697 for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { 3698 if ((pp->format == FORMAT_RAW) && (topo.num_packages == 0)) 3699 average.packages.perf_counter[i] = p->perf_counter[i]; 3700 else 3701 average.packages.perf_counter[i] += p->perf_counter[i]; 3702 } 3703 3704 for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { 3705 average.packages.pmt_counter[i] += p->pmt_counter[i]; 3706 } 3707 3708 return 0; 3709 } 3710 3711 /* 3712 * sum the counters for all cpus in the system 3713 * compute the weighted average 3714 */ 3715 void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data *p) 3716 { 3717 int i; 3718 struct msr_counter *mp; 3719 struct perf_counter_info *pp; 3720 struct pmt_counter *ppmt; 3721 3722 clear_counters(&average.threads, &average.cores, &average.packages); 3723 3724 for_all_cpus(sum_counters, t, c, p); 3725 3726 /* Use the global time delta for the average. */ 3727 average.threads.tv_delta = tv_delta; 3728 3729 average.threads.tsc /= topo.allowed_cpus; 3730 average.threads.aperf /= topo.allowed_cpus; 3731 average.threads.mperf /= topo.allowed_cpus; 3732 average.threads.instr_count /= topo.allowed_cpus; 3733 average.threads.c1 /= topo.allowed_cpus; 3734 3735 if (average.threads.irq_count > 9999999) 3736 sums_need_wide_columns = 1; 3737 3738 average.cores.c3 /= topo.allowed_cores; 3739 average.cores.c6 /= topo.allowed_cores; 3740 average.cores.c7 /= topo.allowed_cores; 3741 average.cores.mc6_us /= topo.allowed_cores; 3742 3743 if (DO_BIC(BIC_Totl_c0)) 3744 average.packages.pkg_wtd_core_c0 /= topo.allowed_packages; 3745 if (DO_BIC(BIC_Any_c0)) 3746 average.packages.pkg_any_core_c0 /= topo.allowed_packages; 3747 if (DO_BIC(BIC_GFX_c0)) 3748 average.packages.pkg_any_gfxe_c0 /= topo.allowed_packages; 3749 if (DO_BIC(BIC_CPUGFX)) 3750 average.packages.pkg_both_core_gfxe_c0 /= topo.allowed_packages; 3751 3752 average.packages.pc2 /= topo.allowed_packages; 3753 if (DO_BIC(BIC_Pkgpc3)) 3754 average.packages.pc3 /= topo.allowed_packages; 3755 if (DO_BIC(BIC_Pkgpc6)) 3756 average.packages.pc6 /= topo.allowed_packages; 3757 if (DO_BIC(BIC_Pkgpc7)) 3758 average.packages.pc7 /= topo.allowed_packages; 3759 3760 average.packages.pc8 /= topo.allowed_packages; 3761 average.packages.pc9 /= topo.allowed_packages; 3762 average.packages.pc10 /= topo.allowed_packages; 3763 average.packages.die_c6 /= topo.allowed_packages; 3764 3765 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 3766 if (mp->format == FORMAT_RAW) 3767 continue; 3768 if (mp->type == COUNTER_ITEMS) { 3769 if (average.threads.counter[i] > 9999999) 3770 sums_need_wide_columns = 1; 3771 continue; 3772 } 3773 average.threads.counter[i] /= topo.allowed_cpus; 3774 } 3775 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 3776 if (mp->format == FORMAT_RAW) 3777 continue; 3778 if (mp->type == COUNTER_ITEMS) { 3779 if (average.cores.counter[i] > 9999999) 3780 sums_need_wide_columns = 1; 3781 } 3782 average.cores.counter[i] /= topo.allowed_cores; 3783 } 3784 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 3785 if (mp->format == FORMAT_RAW) 3786 continue; 3787 if (mp->type == COUNTER_ITEMS) { 3788 if (average.packages.counter[i] > 9999999) 3789 sums_need_wide_columns = 1; 3790 } 3791 average.packages.counter[i] /= topo.allowed_packages; 3792 } 3793 3794 for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) { 3795 if (pp->format == FORMAT_RAW) 3796 continue; 3797 if (pp->type == COUNTER_ITEMS) { 3798 if (average.threads.perf_counter[i] > 9999999) 3799 sums_need_wide_columns = 1; 3800 continue; 3801 } 3802 average.threads.perf_counter[i] /= topo.allowed_cpus; 3803 } 3804 for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { 3805 if (pp->format == FORMAT_RAW) 3806 continue; 3807 if (pp->type == COUNTER_ITEMS) { 3808 if (average.cores.perf_counter[i] > 9999999) 3809 sums_need_wide_columns = 1; 3810 } 3811 average.cores.perf_counter[i] /= topo.allowed_cores; 3812 } 3813 for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { 3814 if (pp->format == FORMAT_RAW) 3815 continue; 3816 if (pp->type == COUNTER_ITEMS) { 3817 if (average.packages.perf_counter[i] > 9999999) 3818 sums_need_wide_columns = 1; 3819 } 3820 average.packages.perf_counter[i] /= topo.allowed_packages; 3821 } 3822 3823 for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { 3824 average.threads.pmt_counter[i] /= topo.allowed_cpus; 3825 } 3826 for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { 3827 average.cores.pmt_counter[i] /= topo.allowed_cores; 3828 } 3829 for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { 3830 average.packages.pmt_counter[i] /= topo.allowed_packages; 3831 } 3832 } 3833 3834 static unsigned long long rdtsc(void) 3835 { 3836 unsigned int low, high; 3837 3838 asm volatile ("rdtsc":"=a" (low), "=d"(high)); 3839 3840 return low | ((unsigned long long)high) << 32; 3841 } 3842 3843 /* 3844 * Open a file, and exit on failure 3845 */ 3846 FILE *fopen_or_die(const char *path, const char *mode) 3847 { 3848 FILE *filep = fopen(path, mode); 3849 3850 if (!filep) 3851 err(1, "%s: open failed", path); 3852 return filep; 3853 } 3854 3855 /* 3856 * snapshot_sysfs_counter() 3857 * 3858 * return snapshot of given counter 3859 */ 3860 unsigned long long snapshot_sysfs_counter(char *path) 3861 { 3862 FILE *fp; 3863 int retval; 3864 unsigned long long counter; 3865 3866 fp = fopen_or_die(path, "r"); 3867 3868 retval = fscanf(fp, "%lld", &counter); 3869 if (retval != 1) 3870 err(1, "snapshot_sysfs_counter(%s)", path); 3871 3872 fclose(fp); 3873 3874 return counter; 3875 } 3876 3877 int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp, char *counter_path) 3878 { 3879 if (mp->msr_num != 0) { 3880 assert(!no_msr); 3881 if (get_msr(cpu, mp->msr_num, counterp)) 3882 return -1; 3883 } else { 3884 char path[128 + PATH_BYTES]; 3885 3886 if (mp->flags & SYSFS_PERCPU) { 3887 sprintf(path, "/sys/devices/system/cpu/cpu%d/%s", cpu, mp->sp->path); 3888 3889 *counterp = snapshot_sysfs_counter(path); 3890 } else { 3891 *counterp = snapshot_sysfs_counter(counter_path); 3892 } 3893 } 3894 3895 return 0; 3896 } 3897 3898 unsigned long long get_legacy_uncore_mhz(int package) 3899 { 3900 char path[128]; 3901 int die; 3902 static int warn_once; 3903 3904 /* 3905 * for this package, use the first die_id that exists 3906 */ 3907 for (die = 0; die <= topo.max_die_id; ++die) { 3908 3909 sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d/current_freq_khz", 3910 package, die); 3911 3912 if (access(path, R_OK) == 0) 3913 return (snapshot_sysfs_counter(path) / 1000); 3914 } 3915 if (!warn_once) { 3916 warnx("BUG: %s: No %s", __func__, path); 3917 warn_once = 1; 3918 } 3919 3920 return 0; 3921 } 3922 3923 int get_epb(int cpu) 3924 { 3925 char path[128 + PATH_BYTES]; 3926 unsigned long long msr; 3927 int ret, epb = -1; 3928 FILE *fp; 3929 3930 sprintf(path, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", cpu); 3931 3932 fp = fopen(path, "r"); 3933 if (!fp) 3934 goto msr_fallback; 3935 3936 ret = fscanf(fp, "%d", &epb); 3937 if (ret != 1) 3938 err(1, "%s(%s)", __func__, path); 3939 3940 fclose(fp); 3941 3942 return epb; 3943 3944 msr_fallback: 3945 if (no_msr) 3946 return -1; 3947 3948 get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr); 3949 3950 return msr & 0xf; 3951 } 3952 3953 void get_apic_id(struct thread_data *t) 3954 { 3955 unsigned int eax, ebx, ecx, edx; 3956 3957 if (DO_BIC(BIC_APIC)) { 3958 eax = ebx = ecx = edx = 0; 3959 __cpuid(1, eax, ebx, ecx, edx); 3960 3961 t->apic_id = (ebx >> 24) & 0xff; 3962 } 3963 3964 if (!DO_BIC(BIC_X2APIC)) 3965 return; 3966 3967 if (authentic_amd || hygon_genuine) { 3968 unsigned int topology_extensions; 3969 3970 if (max_extended_level < 0x8000001e) 3971 return; 3972 3973 eax = ebx = ecx = edx = 0; 3974 __cpuid(0x80000001, eax, ebx, ecx, edx); 3975 topology_extensions = ecx & (1 << 22); 3976 3977 if (topology_extensions == 0) 3978 return; 3979 3980 eax = ebx = ecx = edx = 0; 3981 __cpuid(0x8000001e, eax, ebx, ecx, edx); 3982 3983 t->x2apic_id = eax; 3984 return; 3985 } 3986 3987 if (!genuine_intel) 3988 return; 3989 3990 if (max_level < 0xb) 3991 return; 3992 3993 ecx = 0; 3994 __cpuid(0xb, eax, ebx, ecx, edx); 3995 t->x2apic_id = edx; 3996 3997 if (debug && (t->apic_id != (t->x2apic_id & 0xff))) 3998 fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id); 3999 } 4000 4001 int get_core_throt_cnt(int cpu, unsigned long long *cnt) 4002 { 4003 char path[128 + PATH_BYTES]; 4004 unsigned long long tmp; 4005 FILE *fp; 4006 int ret; 4007 4008 sprintf(path, "/sys/devices/system/cpu/cpu%d/thermal_throttle/core_throttle_count", cpu); 4009 fp = fopen(path, "r"); 4010 if (!fp) 4011 return -1; 4012 ret = fscanf(fp, "%lld", &tmp); 4013 fclose(fp); 4014 if (ret != 1) 4015 return -1; 4016 *cnt = tmp; 4017 4018 return 0; 4019 } 4020 4021 struct amperf_group_fd { 4022 int aperf; /* Also the group descriptor */ 4023 int mperf; 4024 }; 4025 4026 static int read_perf_counter_info(const char *const path, const char *const parse_format, void *value_ptr) 4027 { 4028 int fdmt; 4029 int bytes_read; 4030 char buf[64]; 4031 int ret = -1; 4032 4033 fdmt = open(path, O_RDONLY, 0); 4034 if (fdmt == -1) { 4035 if (debug) 4036 fprintf(stderr, "Failed to parse perf counter info %s\n", path); 4037 ret = -1; 4038 goto cleanup_and_exit; 4039 } 4040 4041 bytes_read = read(fdmt, buf, sizeof(buf) - 1); 4042 if (bytes_read <= 0 || bytes_read >= (int)sizeof(buf)) { 4043 if (debug) 4044 fprintf(stderr, "Failed to parse perf counter info %s\n", path); 4045 ret = -1; 4046 goto cleanup_and_exit; 4047 } 4048 4049 buf[bytes_read] = '\0'; 4050 4051 if (sscanf(buf, parse_format, value_ptr) != 1) { 4052 if (debug) 4053 fprintf(stderr, "Failed to parse perf counter info %s\n", path); 4054 ret = -1; 4055 goto cleanup_and_exit; 4056 } 4057 4058 ret = 0; 4059 4060 cleanup_and_exit: 4061 close(fdmt); 4062 return ret; 4063 } 4064 4065 static unsigned int read_perf_counter_info_n(const char *const path, const char *const parse_format) 4066 { 4067 unsigned int v; 4068 int status; 4069 4070 status = read_perf_counter_info(path, parse_format, &v); 4071 if (status) 4072 v = -1; 4073 4074 return v; 4075 } 4076 4077 static unsigned int read_perf_type(const char *subsys) 4078 { 4079 const char *const path_format = "/sys/bus/event_source/devices/%s/type"; 4080 const char *const format = "%u"; 4081 char path[128]; 4082 4083 snprintf(path, sizeof(path), path_format, subsys); 4084 4085 return read_perf_counter_info_n(path, format); 4086 } 4087 4088 static unsigned int read_perf_config(const char *subsys, const char *event_name) 4089 { 4090 const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s"; 4091 FILE *fconfig = NULL; 4092 char path[128]; 4093 char config_str[64]; 4094 unsigned int config; 4095 unsigned int umask; 4096 bool has_config = false; 4097 bool has_umask = false; 4098 unsigned int ret = -1; 4099 4100 snprintf(path, sizeof(path), path_format, subsys, event_name); 4101 4102 fconfig = fopen(path, "r"); 4103 if (!fconfig) 4104 return -1; 4105 4106 if (fgets(config_str, ARRAY_SIZE(config_str), fconfig) != config_str) 4107 goto cleanup_and_exit; 4108 4109 for (char *pconfig_str = &config_str[0]; pconfig_str;) { 4110 if (sscanf(pconfig_str, "event=%x", &config) == 1) { 4111 has_config = true; 4112 goto next; 4113 } 4114 4115 if (sscanf(pconfig_str, "umask=%x", &umask) == 1) { 4116 has_umask = true; 4117 goto next; 4118 } 4119 4120 next: 4121 pconfig_str = strchr(pconfig_str, ','); 4122 if (pconfig_str) { 4123 *pconfig_str = '\0'; 4124 ++pconfig_str; 4125 } 4126 } 4127 4128 if (!has_umask) 4129 umask = 0; 4130 4131 if (has_config) 4132 ret = (umask << 8) | config; 4133 4134 cleanup_and_exit: 4135 fclose(fconfig); 4136 return ret; 4137 } 4138 4139 static unsigned int read_perf_rapl_unit(const char *subsys, const char *event_name) 4140 { 4141 const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.unit"; 4142 const char *const format = "%s"; 4143 char path[128]; 4144 char unit_buffer[16]; 4145 4146 snprintf(path, sizeof(path), path_format, subsys, event_name); 4147 4148 read_perf_counter_info(path, format, &unit_buffer); 4149 if (strcmp("Joules", unit_buffer) == 0) 4150 return RAPL_UNIT_JOULES; 4151 4152 return RAPL_UNIT_INVALID; 4153 } 4154 4155 static double read_perf_scale(const char *subsys, const char *event_name) 4156 { 4157 const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.scale"; 4158 const char *const format = "%lf"; 4159 char path[128]; 4160 double scale; 4161 4162 snprintf(path, sizeof(path), path_format, subsys, event_name); 4163 4164 if (read_perf_counter_info(path, format, &scale)) 4165 return 0.0; 4166 4167 return scale; 4168 } 4169 4170 size_t rapl_counter_info_count_perf(const struct rapl_counter_info_t *rci) 4171 { 4172 size_t ret = 0; 4173 4174 for (int i = 0; i < NUM_RAPL_COUNTERS; ++i) 4175 if (rci->source[i] == COUNTER_SOURCE_PERF) 4176 ++ret; 4177 4178 return ret; 4179 } 4180 4181 static size_t cstate_counter_info_count_perf(const struct cstate_counter_info_t *cci) 4182 { 4183 size_t ret = 0; 4184 4185 for (int i = 0; i < NUM_CSTATE_COUNTERS; ++i) 4186 if (cci->source[i] == COUNTER_SOURCE_PERF) 4187 ++ret; 4188 4189 return ret; 4190 } 4191 4192 void write_rapl_counter(struct rapl_counter *rc, struct rapl_counter_info_t *rci, unsigned int idx) 4193 { 4194 if (rci->source[idx] == COUNTER_SOURCE_NONE) 4195 return; 4196 4197 rc->raw_value = rci->data[idx]; 4198 rc->unit = rci->unit[idx]; 4199 rc->scale = rci->scale[idx]; 4200 } 4201 4202 int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct pkg_data *p) 4203 { 4204 struct platform_counters *pplat_cnt = p == package_odd ? &platform_counters_odd : &platform_counters_even; 4205 unsigned long long perf_data[NUM_RAPL_COUNTERS + 1]; 4206 struct rapl_counter_info_t *rci; 4207 4208 if (debug >= 2) 4209 fprintf(stderr, "%s: cpu%d domain%d\n", __func__, cpu, domain); 4210 4211 assert(rapl_counter_info_perdomain); 4212 assert(domain < rapl_counter_info_perdomain_size); 4213 4214 rci = &rapl_counter_info_perdomain[domain]; 4215 4216 /* 4217 * If we have any perf counters to read, read them all now, in bulk 4218 */ 4219 if (rci->fd_perf != -1) { 4220 size_t num_perf_counters = rapl_counter_info_count_perf(rci); 4221 const ssize_t expected_read_size = (num_perf_counters + 1) * sizeof(unsigned long long); 4222 const ssize_t actual_read_size = read(rci->fd_perf, &perf_data[0], sizeof(perf_data)); 4223 4224 if (actual_read_size != expected_read_size) 4225 err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, 4226 actual_read_size); 4227 } 4228 4229 for (unsigned int i = 0, pi = 1; i < NUM_RAPL_COUNTERS; ++i) { 4230 switch (rci->source[i]) { 4231 case COUNTER_SOURCE_NONE: 4232 rci->data[i] = 0; 4233 break; 4234 4235 case COUNTER_SOURCE_PERF: 4236 assert(pi < ARRAY_SIZE(perf_data)); 4237 assert(rci->fd_perf != -1); 4238 4239 if (debug >= 2) 4240 fprintf(stderr, "Reading rapl counter via perf at %u (%llu %e %lf)\n", 4241 i, perf_data[pi], rci->scale[i], perf_data[pi] * rci->scale[i]); 4242 4243 rci->data[i] = perf_data[pi]; 4244 4245 ++pi; 4246 break; 4247 4248 case COUNTER_SOURCE_MSR: 4249 if (debug >= 2) 4250 fprintf(stderr, "Reading rapl counter via msr at %u\n", i); 4251 4252 assert(!no_msr); 4253 if (rci->flags[i] & RAPL_COUNTER_FLAG_USE_MSR_SUM) { 4254 if (get_msr_sum(cpu, rci->msr[i], &rci->data[i])) 4255 return -13 - i; 4256 } else { 4257 if (get_msr(cpu, rci->msr[i], &rci->data[i])) 4258 return -13 - i; 4259 } 4260 4261 rci->data[i] &= rci->msr_mask[i]; 4262 if (rci->msr_shift[i] >= 0) 4263 rci->data[i] >>= abs(rci->msr_shift[i]); 4264 else 4265 rci->data[i] <<= abs(rci->msr_shift[i]); 4266 4267 break; 4268 } 4269 } 4270 4271 BUILD_BUG_ON(NUM_RAPL_COUNTERS != 8); 4272 write_rapl_counter(&p->energy_pkg, rci, RAPL_RCI_INDEX_ENERGY_PKG); 4273 write_rapl_counter(&p->energy_cores, rci, RAPL_RCI_INDEX_ENERGY_CORES); 4274 write_rapl_counter(&p->energy_dram, rci, RAPL_RCI_INDEX_DRAM); 4275 write_rapl_counter(&p->energy_gfx, rci, RAPL_RCI_INDEX_GFX); 4276 write_rapl_counter(&p->rapl_pkg_perf_status, rci, RAPL_RCI_INDEX_PKG_PERF_STATUS); 4277 write_rapl_counter(&p->rapl_dram_perf_status, rci, RAPL_RCI_INDEX_DRAM_PERF_STATUS); 4278 write_rapl_counter(&c->core_energy, rci, RAPL_RCI_INDEX_CORE_ENERGY); 4279 write_rapl_counter(&pplat_cnt->energy_psys, rci, RAPL_RCI_INDEX_ENERGY_PLATFORM); 4280 4281 return 0; 4282 } 4283 4284 char *find_sysfs_path_by_id(struct sysfs_path *sp, int id) 4285 { 4286 while (sp) { 4287 if (sp->id == id) 4288 return (sp->path); 4289 sp = sp->next; 4290 } 4291 if (debug) 4292 warnx("%s: id%d not found", __func__, id); 4293 return NULL; 4294 } 4295 4296 int get_cstate_counters(unsigned int cpu, struct thread_data *t, struct core_data *c, struct pkg_data *p) 4297 { 4298 /* 4299 * Overcommit memory a little bit here, 4300 * but skip calculating exact sizes for the buffers. 4301 */ 4302 unsigned long long perf_data[NUM_CSTATE_COUNTERS]; 4303 unsigned long long perf_data_core[NUM_CSTATE_COUNTERS + 1]; 4304 unsigned long long perf_data_pkg[NUM_CSTATE_COUNTERS + 1]; 4305 4306 struct cstate_counter_info_t *cci; 4307 4308 if (debug >= 2) 4309 fprintf(stderr, "%s: cpu%d\n", __func__, cpu); 4310 4311 assert(ccstate_counter_info); 4312 assert(cpu <= ccstate_counter_info_size); 4313 4314 ZERO_ARRAY(perf_data); 4315 ZERO_ARRAY(perf_data_core); 4316 ZERO_ARRAY(perf_data_pkg); 4317 4318 cci = &ccstate_counter_info[cpu]; 4319 4320 /* 4321 * If we have any perf counters to read, read them all now, in bulk 4322 */ 4323 const size_t num_perf_counters = cstate_counter_info_count_perf(cci); 4324 ssize_t expected_read_size = num_perf_counters * sizeof(unsigned long long); 4325 ssize_t actual_read_size_core = 0, actual_read_size_pkg = 0; 4326 4327 if (cci->fd_perf_core != -1) { 4328 /* Each descriptor read begins with number of counters read. */ 4329 expected_read_size += sizeof(unsigned long long); 4330 4331 actual_read_size_core = read(cci->fd_perf_core, &perf_data_core[0], sizeof(perf_data_core)); 4332 4333 if (actual_read_size_core <= 0) 4334 err(-1, "%s: read perf %s: %ld", __func__, "core", actual_read_size_core); 4335 } 4336 4337 if (cci->fd_perf_pkg != -1) { 4338 /* Each descriptor read begins with number of counters read. */ 4339 expected_read_size += sizeof(unsigned long long); 4340 4341 actual_read_size_pkg = read(cci->fd_perf_pkg, &perf_data_pkg[0], sizeof(perf_data_pkg)); 4342 4343 if (actual_read_size_pkg <= 0) 4344 err(-1, "%s: read perf %s: %ld", __func__, "pkg", actual_read_size_pkg); 4345 } 4346 4347 const ssize_t actual_read_size_total = actual_read_size_core + actual_read_size_pkg; 4348 4349 if (actual_read_size_total != expected_read_size) 4350 err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, actual_read_size_total); 4351 4352 /* 4353 * Copy ccstate and pcstate data into unified buffer. 4354 * 4355 * Skip first element from core and pkg buffers. 4356 * Kernel puts there how many counters were read. 4357 */ 4358 const size_t num_core_counters = perf_data_core[0]; 4359 const size_t num_pkg_counters = perf_data_pkg[0]; 4360 4361 assert(num_perf_counters == num_core_counters + num_pkg_counters); 4362 4363 /* Copy ccstate perf data */ 4364 memcpy(&perf_data[0], &perf_data_core[1], num_core_counters * sizeof(unsigned long long)); 4365 4366 /* Copy pcstate perf data */ 4367 memcpy(&perf_data[num_core_counters], &perf_data_pkg[1], num_pkg_counters * sizeof(unsigned long long)); 4368 4369 for (unsigned int i = 0, pi = 0; i < NUM_CSTATE_COUNTERS; ++i) { 4370 switch (cci->source[i]) { 4371 case COUNTER_SOURCE_NONE: 4372 break; 4373 4374 case COUNTER_SOURCE_PERF: 4375 assert(pi < ARRAY_SIZE(perf_data)); 4376 assert(cci->fd_perf_core != -1 || cci->fd_perf_pkg != -1); 4377 4378 if (debug >= 2) 4379 fprintf(stderr, "cstate via %s %u: %llu\n", "perf", i, perf_data[pi]); 4380 4381 cci->data[i] = perf_data[pi]; 4382 4383 ++pi; 4384 break; 4385 4386 case COUNTER_SOURCE_MSR: 4387 assert(!no_msr); 4388 if (get_msr(cpu, cci->msr[i], &cci->data[i])) 4389 return -13 - i; 4390 4391 if (debug >= 2) 4392 fprintf(stderr, "cstate via %s0x%llx %u: %llu\n", "msr", cci->msr[i], i, cci->data[i]); 4393 4394 break; 4395 } 4396 } 4397 4398 /* 4399 * Helper to write the data only if the source of 4400 * the counter for the current cpu is not none. 4401 * 4402 * Otherwise we would overwrite core data with 0 (default value), 4403 * when invoked for the thread sibling. 4404 */ 4405 #define PERF_COUNTER_WRITE_DATA(out_counter, index) do { \ 4406 if (cci->source[index] != COUNTER_SOURCE_NONE) \ 4407 out_counter = cci->data[index]; \ 4408 } while (0) 4409 4410 BUILD_BUG_ON(NUM_CSTATE_COUNTERS != 11); 4411 4412 PERF_COUNTER_WRITE_DATA(t->c1, CCSTATE_RCI_INDEX_C1_RESIDENCY); 4413 PERF_COUNTER_WRITE_DATA(c->c3, CCSTATE_RCI_INDEX_C3_RESIDENCY); 4414 PERF_COUNTER_WRITE_DATA(c->c6, CCSTATE_RCI_INDEX_C6_RESIDENCY); 4415 PERF_COUNTER_WRITE_DATA(c->c7, CCSTATE_RCI_INDEX_C7_RESIDENCY); 4416 4417 PERF_COUNTER_WRITE_DATA(p->pc2, PCSTATE_RCI_INDEX_C2_RESIDENCY); 4418 PERF_COUNTER_WRITE_DATA(p->pc3, PCSTATE_RCI_INDEX_C3_RESIDENCY); 4419 PERF_COUNTER_WRITE_DATA(p->pc6, PCSTATE_RCI_INDEX_C6_RESIDENCY); 4420 PERF_COUNTER_WRITE_DATA(p->pc7, PCSTATE_RCI_INDEX_C7_RESIDENCY); 4421 PERF_COUNTER_WRITE_DATA(p->pc8, PCSTATE_RCI_INDEX_C8_RESIDENCY); 4422 PERF_COUNTER_WRITE_DATA(p->pc9, PCSTATE_RCI_INDEX_C9_RESIDENCY); 4423 PERF_COUNTER_WRITE_DATA(p->pc10, PCSTATE_RCI_INDEX_C10_RESIDENCY); 4424 4425 #undef PERF_COUNTER_WRITE_DATA 4426 4427 return 0; 4428 } 4429 4430 size_t msr_counter_info_count_perf(const struct msr_counter_info_t *mci) 4431 { 4432 size_t ret = 0; 4433 4434 for (int i = 0; i < NUM_MSR_COUNTERS; ++i) 4435 if (mci->source[i] == COUNTER_SOURCE_PERF) 4436 ++ret; 4437 4438 return ret; 4439 } 4440 4441 int get_smi_aperf_mperf(unsigned int cpu, struct thread_data *t) 4442 { 4443 unsigned long long perf_data[NUM_MSR_COUNTERS + 1]; 4444 4445 struct msr_counter_info_t *mci; 4446 4447 if (debug >= 2) 4448 fprintf(stderr, "%s: cpu%d\n", __func__, cpu); 4449 4450 assert(msr_counter_info); 4451 assert(cpu <= msr_counter_info_size); 4452 4453 mci = &msr_counter_info[cpu]; 4454 4455 ZERO_ARRAY(perf_data); 4456 ZERO_ARRAY(mci->data); 4457 4458 if (mci->fd_perf != -1) { 4459 const size_t num_perf_counters = msr_counter_info_count_perf(mci); 4460 const ssize_t expected_read_size = (num_perf_counters + 1) * sizeof(unsigned long long); 4461 const ssize_t actual_read_size = read(mci->fd_perf, &perf_data[0], sizeof(perf_data)); 4462 4463 if (actual_read_size != expected_read_size) 4464 err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, 4465 actual_read_size); 4466 } 4467 4468 for (unsigned int i = 0, pi = 1; i < NUM_MSR_COUNTERS; ++i) { 4469 switch (mci->source[i]) { 4470 case COUNTER_SOURCE_NONE: 4471 break; 4472 4473 case COUNTER_SOURCE_PERF: 4474 assert(pi < ARRAY_SIZE(perf_data)); 4475 assert(mci->fd_perf != -1); 4476 4477 if (debug >= 2) 4478 fprintf(stderr, "Reading msr counter via perf at %u: %llu\n", i, perf_data[pi]); 4479 4480 mci->data[i] = perf_data[pi]; 4481 4482 ++pi; 4483 break; 4484 4485 case COUNTER_SOURCE_MSR: 4486 assert(!no_msr); 4487 4488 if (get_msr(cpu, mci->msr[i], &mci->data[i])) 4489 return -2 - i; 4490 4491 mci->data[i] &= mci->msr_mask[i]; 4492 4493 if (debug >= 2) 4494 fprintf(stderr, "Reading msr counter via msr at %u: %llu\n", i, mci->data[i]); 4495 4496 break; 4497 } 4498 } 4499 4500 BUILD_BUG_ON(NUM_MSR_COUNTERS != 3); 4501 t->aperf = mci->data[MSR_RCI_INDEX_APERF]; 4502 t->mperf = mci->data[MSR_RCI_INDEX_MPERF]; 4503 t->smi_count = mci->data[MSR_RCI_INDEX_SMI]; 4504 4505 return 0; 4506 } 4507 4508 int perf_counter_info_read_values(struct perf_counter_info *pp, int cpu, unsigned long long *out, size_t out_size) 4509 { 4510 unsigned int domain; 4511 unsigned long long value; 4512 int fd_counter; 4513 4514 for (size_t i = 0; pp; ++i, pp = pp->next) { 4515 domain = cpu_to_domain(pp, cpu); 4516 assert(domain < pp->num_domains); 4517 4518 fd_counter = pp->fd_perf_per_domain[domain]; 4519 4520 if (fd_counter == -1) 4521 continue; 4522 4523 if (read(fd_counter, &value, sizeof(value)) != sizeof(value)) 4524 return 1; 4525 4526 assert(i < out_size); 4527 out[i] = value * pp->scale; 4528 } 4529 4530 return 0; 4531 } 4532 4533 unsigned long pmt_gen_value_mask(unsigned int lsb, unsigned int msb) 4534 { 4535 unsigned long mask; 4536 4537 if (msb == 63) 4538 mask = 0xffffffffffffffff; 4539 else 4540 mask = ((1 << (msb + 1)) - 1); 4541 4542 mask -= (1 << lsb) - 1; 4543 4544 return mask; 4545 } 4546 4547 unsigned long pmt_read_counter(struct pmt_counter *ppmt, unsigned int domain_id) 4548 { 4549 assert(domain_id < ppmt->num_domains); 4550 4551 const unsigned long *pmmio = ppmt->domains[domain_id].pcounter; 4552 const unsigned long value = pmmio ? *pmmio : 0; 4553 const unsigned long value_mask = pmt_gen_value_mask(ppmt->lsb, ppmt->msb); 4554 const unsigned long value_shift = ppmt->lsb; 4555 4556 return (value & value_mask) >> value_shift; 4557 } 4558 4559 /* 4560 * get_counters(...) 4561 * migrate to cpu 4562 * acquire and record local counters for that cpu 4563 */ 4564 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 4565 { 4566 int cpu = t->cpu_id; 4567 unsigned long long msr; 4568 struct msr_counter *mp; 4569 struct pmt_counter *pp; 4570 int i; 4571 int status; 4572 4573 if (cpu_migrate(cpu)) { 4574 fprintf(outf, "%s: Could not migrate to CPU %d\n", __func__, cpu); 4575 return -1; 4576 } 4577 4578 gettimeofday(&t->tv_begin, (struct timezone *)NULL); 4579 4580 if (first_counter_read) 4581 get_apic_id(t); 4582 4583 t->tsc = rdtsc(); /* we are running on local CPU of interest */ 4584 4585 get_smi_aperf_mperf(cpu, t); 4586 4587 if (DO_BIC(BIC_IPC)) 4588 if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long)) 4589 return -4; 4590 4591 if (DO_BIC(BIC_IRQ)) 4592 t->irq_count = irqs_per_cpu[cpu]; 4593 4594 get_cstate_counters(cpu, t, c, p); 4595 4596 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 4597 if (get_mp(cpu, mp, &t->counter[i], mp->sp->path)) 4598 return -10; 4599 } 4600 4601 if (perf_counter_info_read_values(sys.perf_tp, cpu, t->perf_counter, MAX_ADDED_THREAD_COUNTERS)) 4602 return -10; 4603 4604 for (i = 0, pp = sys.pmt_tp; pp; i++, pp = pp->next) 4605 t->pmt_counter[i] = pmt_read_counter(pp, t->cpu_id); 4606 4607 /* collect core counters only for 1st thread in core */ 4608 if (!is_cpu_first_thread_in_core(t, c, p)) 4609 goto done; 4610 4611 if (platform->has_per_core_rapl) { 4612 status = get_rapl_counters(cpu, c->core_id, c, p); 4613 if (status != 0) 4614 return status; 4615 } 4616 4617 if (DO_BIC(BIC_CPU_c7) && t->is_atom) { 4618 /* 4619 * For Atom CPUs that has core cstate deeper than c6, 4620 * MSR_CORE_C6_RESIDENCY returns residency of cc6 and deeper. 4621 * Minus CC7 (and deeper cstates) residency to get 4622 * accturate cc6 residency. 4623 */ 4624 c->c6 -= c->c7; 4625 } 4626 4627 if (DO_BIC(BIC_Mod_c6)) 4628 if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us)) 4629 return -8; 4630 4631 if (DO_BIC(BIC_CoreTmp)) { 4632 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) 4633 return -9; 4634 c->core_temp_c = tj_max - ((msr >> 16) & 0x7F); 4635 } 4636 4637 if (DO_BIC(BIC_CORE_THROT_CNT)) 4638 get_core_throt_cnt(cpu, &c->core_throt_cnt); 4639 4640 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 4641 if (get_mp(cpu, mp, &c->counter[i], mp->sp->path)) 4642 return -10; 4643 } 4644 4645 if (perf_counter_info_read_values(sys.perf_cp, cpu, c->perf_counter, MAX_ADDED_CORE_COUNTERS)) 4646 return -10; 4647 4648 for (i = 0, pp = sys.pmt_cp; pp; i++, pp = pp->next) 4649 c->pmt_counter[i] = pmt_read_counter(pp, c->core_id); 4650 4651 /* collect package counters only for 1st core in package */ 4652 if (!is_cpu_first_core_in_package(t, c, p)) 4653 goto done; 4654 4655 if (DO_BIC(BIC_Totl_c0)) { 4656 if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0)) 4657 return -10; 4658 } 4659 if (DO_BIC(BIC_Any_c0)) { 4660 if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0)) 4661 return -11; 4662 } 4663 if (DO_BIC(BIC_GFX_c0)) { 4664 if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0)) 4665 return -12; 4666 } 4667 if (DO_BIC(BIC_CPUGFX)) { 4668 if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0)) 4669 return -13; 4670 } 4671 4672 if (DO_BIC(BIC_CPU_LPI)) 4673 p->cpu_lpi = cpuidle_cur_cpu_lpi_us; 4674 if (DO_BIC(BIC_SYS_LPI)) 4675 p->sys_lpi = cpuidle_cur_sys_lpi_us; 4676 4677 if (!platform->has_per_core_rapl) { 4678 status = get_rapl_counters(cpu, p->package_id, c, p); 4679 if (status != 0) 4680 return status; 4681 } 4682 4683 if (DO_BIC(BIC_PkgTmp)) { 4684 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) 4685 return -17; 4686 p->pkg_temp_c = tj_max - ((msr >> 16) & 0x7F); 4687 } 4688 4689 if (DO_BIC(BIC_UNCORE_MHZ)) 4690 p->uncore_mhz = get_legacy_uncore_mhz(p->package_id); 4691 4692 if (DO_BIC(BIC_GFX_rc6)) 4693 p->gfx_rc6_ms = gfx_info[GFX_rc6].val_ull; 4694 4695 if (DO_BIC(BIC_GFXMHz)) 4696 p->gfx_mhz = gfx_info[GFX_MHz].val; 4697 4698 if (DO_BIC(BIC_GFXACTMHz)) 4699 p->gfx_act_mhz = gfx_info[GFX_ACTMHz].val; 4700 4701 if (DO_BIC(BIC_SAM_mc6)) 4702 p->sam_mc6_ms = gfx_info[SAM_mc6].val_ull; 4703 4704 if (DO_BIC(BIC_SAMMHz)) 4705 p->sam_mhz = gfx_info[SAM_MHz].val; 4706 4707 if (DO_BIC(BIC_SAMACTMHz)) 4708 p->sam_act_mhz = gfx_info[SAM_ACTMHz].val; 4709 4710 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 4711 char *path = NULL; 4712 4713 if (mp->msr_num == 0) { 4714 path = find_sysfs_path_by_id(mp->sp, p->package_id); 4715 if (path == NULL) { 4716 warnx("%s: package_id %d not found", __func__, p->package_id); 4717 return -10; 4718 } 4719 } 4720 if (get_mp(cpu, mp, &p->counter[i], path)) 4721 return -10; 4722 } 4723 4724 if (perf_counter_info_read_values(sys.perf_pp, cpu, p->perf_counter, MAX_ADDED_PACKAGE_COUNTERS)) 4725 return -10; 4726 4727 for (i = 0, pp = sys.pmt_pp; pp; i++, pp = pp->next) 4728 p->pmt_counter[i] = pmt_read_counter(pp, p->package_id); 4729 4730 done: 4731 gettimeofday(&t->tv_end, (struct timezone *)NULL); 4732 4733 return 0; 4734 } 4735 4736 int pkg_cstate_limit = PCLUKN; 4737 char *pkg_cstate_limit_strings[] = { "unknown", "reserved", "pc0", "pc1", "pc2", 4738 "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited" 4739 }; 4740 4741 int nhm_pkg_cstate_limits[16] = 4742 { PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4743 PCLRSV, PCLRSV 4744 }; 4745 4746 int snb_pkg_cstate_limits[16] = 4747 { PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4748 PCLRSV, PCLRSV 4749 }; 4750 4751 int hsw_pkg_cstate_limits[16] = 4752 { PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4753 PCLRSV, PCLRSV 4754 }; 4755 4756 int slv_pkg_cstate_limits[16] = 4757 { PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4758 PCL__6, PCL__7 4759 }; 4760 4761 int amt_pkg_cstate_limits[16] = 4762 { PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4763 PCLRSV, PCLRSV 4764 }; 4765 4766 int phi_pkg_cstate_limits[16] = 4767 { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4768 PCLRSV, PCLRSV 4769 }; 4770 4771 int glm_pkg_cstate_limits[16] = 4772 { PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4773 PCLRSV, PCLRSV 4774 }; 4775 4776 int skx_pkg_cstate_limits[16] = 4777 { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4778 PCLRSV, PCLRSV 4779 }; 4780 4781 int icx_pkg_cstate_limits[16] = 4782 { PCL__0, PCL__2, PCL__6, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4783 PCLRSV, PCLRSV 4784 }; 4785 4786 void probe_cst_limit(void) 4787 { 4788 unsigned long long msr; 4789 int *pkg_cstate_limits; 4790 4791 if (!platform->has_nhm_msrs || no_msr) 4792 return; 4793 4794 switch (platform->cst_limit) { 4795 case CST_LIMIT_NHM: 4796 pkg_cstate_limits = nhm_pkg_cstate_limits; 4797 break; 4798 case CST_LIMIT_SNB: 4799 pkg_cstate_limits = snb_pkg_cstate_limits; 4800 break; 4801 case CST_LIMIT_HSW: 4802 pkg_cstate_limits = hsw_pkg_cstate_limits; 4803 break; 4804 case CST_LIMIT_SKX: 4805 pkg_cstate_limits = skx_pkg_cstate_limits; 4806 break; 4807 case CST_LIMIT_ICX: 4808 pkg_cstate_limits = icx_pkg_cstate_limits; 4809 break; 4810 case CST_LIMIT_SLV: 4811 pkg_cstate_limits = slv_pkg_cstate_limits; 4812 break; 4813 case CST_LIMIT_AMT: 4814 pkg_cstate_limits = amt_pkg_cstate_limits; 4815 break; 4816 case CST_LIMIT_KNL: 4817 pkg_cstate_limits = phi_pkg_cstate_limits; 4818 break; 4819 case CST_LIMIT_GMT: 4820 pkg_cstate_limits = glm_pkg_cstate_limits; 4821 break; 4822 default: 4823 return; 4824 } 4825 4826 get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr); 4827 pkg_cstate_limit = pkg_cstate_limits[msr & 0xF]; 4828 } 4829 4830 static void dump_platform_info(void) 4831 { 4832 unsigned long long msr; 4833 unsigned int ratio; 4834 4835 if (!platform->has_nhm_msrs || no_msr) 4836 return; 4837 4838 get_msr(base_cpu, MSR_PLATFORM_INFO, &msr); 4839 4840 fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr); 4841 4842 ratio = (msr >> 40) & 0xFF; 4843 fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n", ratio, bclk, ratio * bclk); 4844 4845 ratio = (msr >> 8) & 0xFF; 4846 fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk); 4847 } 4848 4849 static void dump_power_ctl(void) 4850 { 4851 unsigned long long msr; 4852 4853 if (!platform->has_nhm_msrs || no_msr) 4854 return; 4855 4856 get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr); 4857 fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", 4858 base_cpu, msr, msr & 0x2 ? "EN" : "DIS"); 4859 4860 /* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */ 4861 if (platform->has_cst_prewake_bit) 4862 fprintf(outf, "C-state Pre-wake: %sabled\n", msr & 0x40000000 ? "DIS" : "EN"); 4863 4864 return; 4865 } 4866 4867 static void dump_turbo_ratio_limit2(void) 4868 { 4869 unsigned long long msr; 4870 unsigned int ratio; 4871 4872 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr); 4873 4874 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr); 4875 4876 ratio = (msr >> 8) & 0xFF; 4877 if (ratio) 4878 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n", ratio, bclk, ratio * bclk); 4879 4880 ratio = (msr >> 0) & 0xFF; 4881 if (ratio) 4882 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n", ratio, bclk, ratio * bclk); 4883 return; 4884 } 4885 4886 static void dump_turbo_ratio_limit1(void) 4887 { 4888 unsigned long long msr; 4889 unsigned int ratio; 4890 4891 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr); 4892 4893 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr); 4894 4895 ratio = (msr >> 56) & 0xFF; 4896 if (ratio) 4897 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n", ratio, bclk, ratio * bclk); 4898 4899 ratio = (msr >> 48) & 0xFF; 4900 if (ratio) 4901 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n", ratio, bclk, ratio * bclk); 4902 4903 ratio = (msr >> 40) & 0xFF; 4904 if (ratio) 4905 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n", ratio, bclk, ratio * bclk); 4906 4907 ratio = (msr >> 32) & 0xFF; 4908 if (ratio) 4909 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n", ratio, bclk, ratio * bclk); 4910 4911 ratio = (msr >> 24) & 0xFF; 4912 if (ratio) 4913 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n", ratio, bclk, ratio * bclk); 4914 4915 ratio = (msr >> 16) & 0xFF; 4916 if (ratio) 4917 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n", ratio, bclk, ratio * bclk); 4918 4919 ratio = (msr >> 8) & 0xFF; 4920 if (ratio) 4921 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n", ratio, bclk, ratio * bclk); 4922 4923 ratio = (msr >> 0) & 0xFF; 4924 if (ratio) 4925 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n", ratio, bclk, ratio * bclk); 4926 return; 4927 } 4928 4929 static void dump_turbo_ratio_limits(int trl_msr_offset) 4930 { 4931 unsigned long long msr, core_counts; 4932 int shift; 4933 4934 get_msr(base_cpu, trl_msr_offset, &msr); 4935 fprintf(outf, "cpu%d: MSR_%sTURBO_RATIO_LIMIT: 0x%08llx\n", 4936 base_cpu, trl_msr_offset == MSR_SECONDARY_TURBO_RATIO_LIMIT ? "SECONDARY_" : "", msr); 4937 4938 if (platform->trl_msrs & TRL_CORECOUNT) { 4939 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts); 4940 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts); 4941 } else { 4942 core_counts = 0x0807060504030201; 4943 } 4944 4945 for (shift = 56; shift >= 0; shift -= 8) { 4946 unsigned int ratio, group_size; 4947 4948 ratio = (msr >> shift) & 0xFF; 4949 group_size = (core_counts >> shift) & 0xFF; 4950 if (ratio) 4951 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n", 4952 ratio, bclk, ratio * bclk, group_size); 4953 } 4954 4955 return; 4956 } 4957 4958 static void dump_atom_turbo_ratio_limits(void) 4959 { 4960 unsigned long long msr; 4961 unsigned int ratio; 4962 4963 get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr); 4964 fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF); 4965 4966 ratio = (msr >> 0) & 0x3F; 4967 if (ratio) 4968 fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n", ratio, bclk, ratio * bclk); 4969 4970 ratio = (msr >> 8) & 0x3F; 4971 if (ratio) 4972 fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n", ratio, bclk, ratio * bclk); 4973 4974 ratio = (msr >> 16) & 0x3F; 4975 if (ratio) 4976 fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk); 4977 4978 get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr); 4979 fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF); 4980 4981 ratio = (msr >> 24) & 0x3F; 4982 if (ratio) 4983 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n", ratio, bclk, ratio * bclk); 4984 4985 ratio = (msr >> 16) & 0x3F; 4986 if (ratio) 4987 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n", ratio, bclk, ratio * bclk); 4988 4989 ratio = (msr >> 8) & 0x3F; 4990 if (ratio) 4991 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n", ratio, bclk, ratio * bclk); 4992 4993 ratio = (msr >> 0) & 0x3F; 4994 if (ratio) 4995 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n", ratio, bclk, ratio * bclk); 4996 } 4997 4998 static void dump_knl_turbo_ratio_limits(void) 4999 { 5000 const unsigned int buckets_no = 7; 5001 5002 unsigned long long msr; 5003 int delta_cores, delta_ratio; 5004 int i, b_nr; 5005 unsigned int cores[buckets_no]; 5006 unsigned int ratio[buckets_no]; 5007 5008 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr); 5009 5010 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr); 5011 5012 /* 5013 * Turbo encoding in KNL is as follows: 5014 * [0] -- Reserved 5015 * [7:1] -- Base value of number of active cores of bucket 1. 5016 * [15:8] -- Base value of freq ratio of bucket 1. 5017 * [20:16] -- +ve delta of number of active cores of bucket 2. 5018 * i.e. active cores of bucket 2 = 5019 * active cores of bucket 1 + delta 5020 * [23:21] -- Negative delta of freq ratio of bucket 2. 5021 * i.e. freq ratio of bucket 2 = 5022 * freq ratio of bucket 1 - delta 5023 * [28:24]-- +ve delta of number of active cores of bucket 3. 5024 * [31:29]-- -ve delta of freq ratio of bucket 3. 5025 * [36:32]-- +ve delta of number of active cores of bucket 4. 5026 * [39:37]-- -ve delta of freq ratio of bucket 4. 5027 * [44:40]-- +ve delta of number of active cores of bucket 5. 5028 * [47:45]-- -ve delta of freq ratio of bucket 5. 5029 * [52:48]-- +ve delta of number of active cores of bucket 6. 5030 * [55:53]-- -ve delta of freq ratio of bucket 6. 5031 * [60:56]-- +ve delta of number of active cores of bucket 7. 5032 * [63:61]-- -ve delta of freq ratio of bucket 7. 5033 */ 5034 5035 b_nr = 0; 5036 cores[b_nr] = (msr & 0xFF) >> 1; 5037 ratio[b_nr] = (msr >> 8) & 0xFF; 5038 5039 for (i = 16; i < 64; i += 8) { 5040 delta_cores = (msr >> i) & 0x1F; 5041 delta_ratio = (msr >> (i + 5)) & 0x7; 5042 5043 cores[b_nr + 1] = cores[b_nr] + delta_cores; 5044 ratio[b_nr + 1] = ratio[b_nr] - delta_ratio; 5045 b_nr++; 5046 } 5047 5048 for (i = buckets_no - 1; i >= 0; i--) 5049 if (i > 0 ? ratio[i] != ratio[i - 1] : 1) 5050 fprintf(outf, 5051 "%d * %.1f = %.1f MHz max turbo %d active cores\n", 5052 ratio[i], bclk, ratio[i] * bclk, cores[i]); 5053 } 5054 5055 static void dump_cst_cfg(void) 5056 { 5057 unsigned long long msr; 5058 5059 if (!platform->has_nhm_msrs || no_msr) 5060 return; 5061 5062 get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr); 5063 5064 fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr); 5065 5066 fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)", 5067 (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", 5068 (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", 5069 (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", 5070 (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", 5071 (msr & (1 << 15)) ? "" : "UN", (unsigned int)msr & 0xF, pkg_cstate_limit_strings[pkg_cstate_limit]); 5072 5073 #define AUTOMATIC_CSTATE_CONVERSION (1UL << 16) 5074 if (platform->has_cst_auto_convension) { 5075 fprintf(outf, ", automatic c-state conversion=%s", (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off"); 5076 } 5077 5078 fprintf(outf, ")\n"); 5079 5080 return; 5081 } 5082 5083 static void dump_config_tdp(void) 5084 { 5085 unsigned long long msr; 5086 5087 get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr); 5088 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr); 5089 fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF); 5090 5091 get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr); 5092 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr); 5093 if (msr) { 5094 fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF); 5095 fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF); 5096 fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF); 5097 fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF); 5098 } 5099 fprintf(outf, ")\n"); 5100 5101 get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr); 5102 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr); 5103 if (msr) { 5104 fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF); 5105 fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF); 5106 fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF); 5107 fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF); 5108 } 5109 fprintf(outf, ")\n"); 5110 5111 get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr); 5112 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr); 5113 if ((msr) & 0x3) 5114 fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3); 5115 fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1); 5116 fprintf(outf, ")\n"); 5117 5118 get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr); 5119 fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr); 5120 fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF); 5121 fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1); 5122 fprintf(outf, ")\n"); 5123 } 5124 5125 unsigned int irtl_time_units[] = { 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 }; 5126 5127 void print_irtl(void) 5128 { 5129 unsigned long long msr; 5130 5131 if (!platform->has_irtl_msrs || no_msr) 5132 return; 5133 5134 if (platform->supported_cstates & PC3) { 5135 get_msr(base_cpu, MSR_PKGC3_IRTL, &msr); 5136 fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr); 5137 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 5138 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 5139 } 5140 5141 if (platform->supported_cstates & PC6) { 5142 get_msr(base_cpu, MSR_PKGC6_IRTL, &msr); 5143 fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr); 5144 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 5145 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 5146 } 5147 5148 if (platform->supported_cstates & PC7) { 5149 get_msr(base_cpu, MSR_PKGC7_IRTL, &msr); 5150 fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr); 5151 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 5152 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 5153 } 5154 5155 if (platform->supported_cstates & PC8) { 5156 get_msr(base_cpu, MSR_PKGC8_IRTL, &msr); 5157 fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr); 5158 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 5159 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 5160 } 5161 5162 if (platform->supported_cstates & PC9) { 5163 get_msr(base_cpu, MSR_PKGC9_IRTL, &msr); 5164 fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr); 5165 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 5166 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 5167 } 5168 5169 if (platform->supported_cstates & PC10) { 5170 get_msr(base_cpu, MSR_PKGC10_IRTL, &msr); 5171 fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr); 5172 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 5173 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 5174 } 5175 } 5176 5177 void free_fd_percpu(void) 5178 { 5179 int i; 5180 5181 if (!fd_percpu) 5182 return; 5183 5184 for (i = 0; i < topo.max_cpu_num + 1; ++i) { 5185 if (fd_percpu[i] != 0) 5186 close(fd_percpu[i]); 5187 } 5188 5189 free(fd_percpu); 5190 fd_percpu = NULL; 5191 } 5192 5193 void free_fd_instr_count_percpu(void) 5194 { 5195 if (!fd_instr_count_percpu) 5196 return; 5197 5198 for (int i = 0; i < topo.max_cpu_num + 1; ++i) { 5199 if (fd_instr_count_percpu[i] != 0) 5200 close(fd_instr_count_percpu[i]); 5201 } 5202 5203 free(fd_instr_count_percpu); 5204 fd_instr_count_percpu = NULL; 5205 } 5206 5207 void free_fd_cstate(void) 5208 { 5209 if (!ccstate_counter_info) 5210 return; 5211 5212 const int counter_info_num = ccstate_counter_info_size; 5213 5214 for (int counter_id = 0; counter_id < counter_info_num; ++counter_id) { 5215 if (ccstate_counter_info[counter_id].fd_perf_core != -1) 5216 close(ccstate_counter_info[counter_id].fd_perf_core); 5217 5218 if (ccstate_counter_info[counter_id].fd_perf_pkg != -1) 5219 close(ccstate_counter_info[counter_id].fd_perf_pkg); 5220 } 5221 5222 free(ccstate_counter_info); 5223 ccstate_counter_info = NULL; 5224 ccstate_counter_info_size = 0; 5225 } 5226 5227 void free_fd_msr(void) 5228 { 5229 if (!msr_counter_info) 5230 return; 5231 5232 for (int cpu = 0; cpu < topo.max_cpu_num; ++cpu) { 5233 if (msr_counter_info[cpu].fd_perf != -1) 5234 close(msr_counter_info[cpu].fd_perf); 5235 } 5236 5237 free(msr_counter_info); 5238 msr_counter_info = NULL; 5239 msr_counter_info_size = 0; 5240 } 5241 5242 void free_fd_rapl_percpu(void) 5243 { 5244 if (!rapl_counter_info_perdomain) 5245 return; 5246 5247 const int num_domains = rapl_counter_info_perdomain_size; 5248 5249 for (int domain_id = 0; domain_id < num_domains; ++domain_id) { 5250 if (rapl_counter_info_perdomain[domain_id].fd_perf != -1) 5251 close(rapl_counter_info_perdomain[domain_id].fd_perf); 5252 } 5253 5254 free(rapl_counter_info_perdomain); 5255 rapl_counter_info_perdomain = NULL; 5256 rapl_counter_info_perdomain_size = 0; 5257 } 5258 5259 void free_fd_added_perf_counters_(struct perf_counter_info *pp) 5260 { 5261 if (!pp) 5262 return; 5263 5264 if (!pp->fd_perf_per_domain) 5265 return; 5266 5267 while (pp) { 5268 for (size_t domain = 0; domain < pp->num_domains; ++domain) { 5269 if (pp->fd_perf_per_domain[domain] != -1) { 5270 close(pp->fd_perf_per_domain[domain]); 5271 pp->fd_perf_per_domain[domain] = -1; 5272 } 5273 } 5274 5275 free(pp->fd_perf_per_domain); 5276 pp->fd_perf_per_domain = NULL; 5277 5278 pp = pp->next; 5279 } 5280 } 5281 5282 void free_fd_added_perf_counters(void) 5283 { 5284 free_fd_added_perf_counters_(sys.perf_tp); 5285 free_fd_added_perf_counters_(sys.perf_cp); 5286 free_fd_added_perf_counters_(sys.perf_pp); 5287 } 5288 5289 void free_all_buffers(void) 5290 { 5291 int i; 5292 5293 CPU_FREE(cpu_present_set); 5294 cpu_present_set = NULL; 5295 cpu_present_setsize = 0; 5296 5297 CPU_FREE(cpu_effective_set); 5298 cpu_effective_set = NULL; 5299 cpu_effective_setsize = 0; 5300 5301 CPU_FREE(cpu_allowed_set); 5302 cpu_allowed_set = NULL; 5303 cpu_allowed_setsize = 0; 5304 5305 CPU_FREE(cpu_affinity_set); 5306 cpu_affinity_set = NULL; 5307 cpu_affinity_setsize = 0; 5308 5309 free(thread_even); 5310 free(core_even); 5311 free(package_even); 5312 5313 thread_even = NULL; 5314 core_even = NULL; 5315 package_even = NULL; 5316 5317 free(thread_odd); 5318 free(core_odd); 5319 free(package_odd); 5320 5321 thread_odd = NULL; 5322 core_odd = NULL; 5323 package_odd = NULL; 5324 5325 free(output_buffer); 5326 output_buffer = NULL; 5327 outp = NULL; 5328 5329 free_fd_percpu(); 5330 free_fd_instr_count_percpu(); 5331 free_fd_msr(); 5332 free_fd_rapl_percpu(); 5333 free_fd_cstate(); 5334 free_fd_added_perf_counters(); 5335 5336 free(irq_column_2_cpu); 5337 free(irqs_per_cpu); 5338 5339 for (i = 0; i <= topo.max_cpu_num; ++i) { 5340 if (cpus[i].put_ids) 5341 CPU_FREE(cpus[i].put_ids); 5342 } 5343 free(cpus); 5344 } 5345 5346 /* 5347 * Parse a file containing a single int. 5348 * Return 0 if file can not be opened 5349 * Exit if file can be opened, but can not be parsed 5350 */ 5351 int parse_int_file(const char *fmt, ...) 5352 { 5353 va_list args; 5354 char path[PATH_MAX]; 5355 FILE *filep; 5356 int value; 5357 5358 va_start(args, fmt); 5359 vsnprintf(path, sizeof(path), fmt, args); 5360 va_end(args); 5361 filep = fopen(path, "r"); 5362 if (!filep) 5363 return 0; 5364 if (fscanf(filep, "%d", &value) != 1) 5365 err(1, "%s: failed to parse number from file", path); 5366 fclose(filep); 5367 return value; 5368 } 5369 5370 /* 5371 * cpu_is_first_core_in_package(cpu) 5372 * return 1 if given CPU is 1st core in package 5373 */ 5374 int cpu_is_first_core_in_package(int cpu) 5375 { 5376 return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu); 5377 } 5378 5379 int get_physical_package_id(int cpu) 5380 { 5381 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu); 5382 } 5383 5384 int get_die_id(int cpu) 5385 { 5386 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/die_id", cpu); 5387 } 5388 5389 int get_core_id(int cpu) 5390 { 5391 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu); 5392 } 5393 5394 void set_node_data(void) 5395 { 5396 int pkg, node, lnode, cpu, cpux; 5397 int cpu_count; 5398 5399 /* initialize logical_node_id */ 5400 for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) 5401 cpus[cpu].logical_node_id = -1; 5402 5403 cpu_count = 0; 5404 for (pkg = 0; pkg < topo.num_packages; pkg++) { 5405 lnode = 0; 5406 for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) { 5407 if (cpus[cpu].physical_package_id != pkg) 5408 continue; 5409 /* find a cpu with an unset logical_node_id */ 5410 if (cpus[cpu].logical_node_id != -1) 5411 continue; 5412 cpus[cpu].logical_node_id = lnode; 5413 node = cpus[cpu].physical_node_id; 5414 cpu_count++; 5415 /* 5416 * find all matching cpus on this pkg and set 5417 * the logical_node_id 5418 */ 5419 for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) { 5420 if ((cpus[cpux].physical_package_id == pkg) && (cpus[cpux].physical_node_id == node)) { 5421 cpus[cpux].logical_node_id = lnode; 5422 cpu_count++; 5423 } 5424 } 5425 lnode++; 5426 if (lnode > topo.nodes_per_pkg) 5427 topo.nodes_per_pkg = lnode; 5428 } 5429 if (cpu_count >= topo.max_cpu_num) 5430 break; 5431 } 5432 } 5433 5434 int get_physical_node_id(struct cpu_topology *thiscpu) 5435 { 5436 char path[80]; 5437 FILE *filep; 5438 int i; 5439 int cpu = thiscpu->logical_cpu_id; 5440 5441 for (i = 0; i <= topo.max_cpu_num; i++) { 5442 sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist", cpu, i); 5443 filep = fopen(path, "r"); 5444 if (!filep) 5445 continue; 5446 fclose(filep); 5447 return i; 5448 } 5449 return -1; 5450 } 5451 5452 static int parse_cpu_str(char *cpu_str, cpu_set_t *cpu_set, int cpu_set_size) 5453 { 5454 unsigned int start, end; 5455 char *next = cpu_str; 5456 5457 while (next && *next) { 5458 5459 if (*next == '-') /* no negative cpu numbers */ 5460 return 1; 5461 5462 if (*next == '\0' || *next == '\n') 5463 break; 5464 5465 start = strtoul(next, &next, 10); 5466 5467 if (start >= CPU_SUBSET_MAXCPUS) 5468 return 1; 5469 CPU_SET_S(start, cpu_set_size, cpu_set); 5470 5471 if (*next == '\0' || *next == '\n') 5472 break; 5473 5474 if (*next == ',') { 5475 next += 1; 5476 continue; 5477 } 5478 5479 if (*next == '-') { 5480 next += 1; /* start range */ 5481 } else if (*next == '.') { 5482 next += 1; 5483 if (*next == '.') 5484 next += 1; /* start range */ 5485 else 5486 return 1; 5487 } 5488 5489 end = strtoul(next, &next, 10); 5490 if (end <= start) 5491 return 1; 5492 5493 while (++start <= end) { 5494 if (start >= CPU_SUBSET_MAXCPUS) 5495 return 1; 5496 CPU_SET_S(start, cpu_set_size, cpu_set); 5497 } 5498 5499 if (*next == ',') 5500 next += 1; 5501 else if (*next != '\0' && *next != '\n') 5502 return 1; 5503 } 5504 5505 return 0; 5506 } 5507 5508 int get_thread_siblings(struct cpu_topology *thiscpu) 5509 { 5510 char path[80], character; 5511 FILE *filep; 5512 unsigned long map; 5513 int so, shift, sib_core; 5514 int cpu = thiscpu->logical_cpu_id; 5515 int offset = topo.max_cpu_num + 1; 5516 size_t size; 5517 int thread_id = 0; 5518 5519 thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1)); 5520 if (thiscpu->thread_id < 0) 5521 thiscpu->thread_id = thread_id++; 5522 if (!thiscpu->put_ids) 5523 return -1; 5524 5525 size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 5526 CPU_ZERO_S(size, thiscpu->put_ids); 5527 5528 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu); 5529 filep = fopen(path, "r"); 5530 5531 if (!filep) { 5532 warnx("%s: open failed", path); 5533 return -1; 5534 } 5535 do { 5536 offset -= BITMASK_SIZE; 5537 if (fscanf(filep, "%lx%c", &map, &character) != 2) 5538 err(1, "%s: failed to parse file", path); 5539 for (shift = 0; shift < BITMASK_SIZE; shift++) { 5540 if ((map >> shift) & 0x1) { 5541 so = shift + offset; 5542 sib_core = get_core_id(so); 5543 if (sib_core == thiscpu->physical_core_id) { 5544 CPU_SET_S(so, size, thiscpu->put_ids); 5545 if ((so != cpu) && (cpus[so].thread_id < 0)) 5546 cpus[so].thread_id = thread_id++; 5547 } 5548 } 5549 } 5550 } while (character == ','); 5551 fclose(filep); 5552 5553 return CPU_COUNT_S(size, thiscpu->put_ids); 5554 } 5555 5556 /* 5557 * run func(thread, core, package) in topology order 5558 * skip non-present cpus 5559 */ 5560 5561 int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *, 5562 struct pkg_data *, struct thread_data *, struct core_data *, 5563 struct pkg_data *), struct thread_data *thread_base, 5564 struct core_data *core_base, struct pkg_data *pkg_base, 5565 struct thread_data *thread_base2, struct core_data *core_base2, struct pkg_data *pkg_base2) 5566 { 5567 int retval, pkg_no, node_no, core_no, thread_no; 5568 5569 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { 5570 for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) { 5571 for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { 5572 for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) { 5573 struct thread_data *t, *t2; 5574 struct core_data *c, *c2; 5575 struct pkg_data *p, *p2; 5576 5577 t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no); 5578 5579 if (cpu_is_not_allowed(t->cpu_id)) 5580 continue; 5581 5582 t2 = GET_THREAD(thread_base2, thread_no, core_no, node_no, pkg_no); 5583 5584 c = GET_CORE(core_base, core_no, node_no, pkg_no); 5585 c2 = GET_CORE(core_base2, core_no, node_no, pkg_no); 5586 5587 p = GET_PKG(pkg_base, pkg_no); 5588 p2 = GET_PKG(pkg_base2, pkg_no); 5589 5590 retval = func(t, c, p, t2, c2, p2); 5591 if (retval) 5592 return retval; 5593 } 5594 } 5595 } 5596 } 5597 return 0; 5598 } 5599 5600 /* 5601 * run func(cpu) on every cpu in /proc/stat 5602 * return max_cpu number 5603 */ 5604 int for_all_proc_cpus(int (func) (int)) 5605 { 5606 FILE *fp; 5607 int cpu_num; 5608 int retval; 5609 5610 fp = fopen_or_die(proc_stat, "r"); 5611 5612 retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n"); 5613 if (retval != 0) 5614 err(1, "%s: failed to parse format", proc_stat); 5615 5616 while (1) { 5617 retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num); 5618 if (retval != 1) 5619 break; 5620 5621 retval = func(cpu_num); 5622 if (retval) { 5623 fclose(fp); 5624 return (retval); 5625 } 5626 } 5627 fclose(fp); 5628 return 0; 5629 } 5630 5631 #define PATH_EFFECTIVE_CPUS "/sys/fs/cgroup/cpuset.cpus.effective" 5632 5633 static char cpu_effective_str[1024]; 5634 5635 static int update_effective_str(bool startup) 5636 { 5637 FILE *fp; 5638 char *pos; 5639 char buf[1024]; 5640 int ret; 5641 5642 if (cpu_effective_str[0] == '\0' && !startup) 5643 return 0; 5644 5645 fp = fopen(PATH_EFFECTIVE_CPUS, "r"); 5646 if (!fp) 5647 return 0; 5648 5649 pos = fgets(buf, 1024, fp); 5650 if (!pos) 5651 err(1, "%s: file read failed\n", PATH_EFFECTIVE_CPUS); 5652 5653 fclose(fp); 5654 5655 ret = strncmp(cpu_effective_str, buf, 1024); 5656 if (!ret) 5657 return 0; 5658 5659 strncpy(cpu_effective_str, buf, 1024); 5660 return 1; 5661 } 5662 5663 static void update_effective_set(bool startup) 5664 { 5665 update_effective_str(startup); 5666 5667 if (parse_cpu_str(cpu_effective_str, cpu_effective_set, cpu_effective_setsize)) 5668 err(1, "%s: cpu str malformat %s\n", PATH_EFFECTIVE_CPUS, cpu_effective_str); 5669 } 5670 5671 void linux_perf_init(void); 5672 void msr_perf_init(void); 5673 void rapl_perf_init(void); 5674 void cstate_perf_init(void); 5675 void added_perf_counters_init(void); 5676 void pmt_init(void); 5677 5678 void re_initialize(void) 5679 { 5680 free_all_buffers(); 5681 setup_all_buffers(false); 5682 linux_perf_init(); 5683 msr_perf_init(); 5684 rapl_perf_init(); 5685 cstate_perf_init(); 5686 added_perf_counters_init(); 5687 pmt_init(); 5688 fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, 5689 topo.allowed_cpus); 5690 } 5691 5692 void set_max_cpu_num(void) 5693 { 5694 FILE *filep; 5695 int base_cpu; 5696 unsigned long dummy; 5697 char pathname[64]; 5698 5699 base_cpu = sched_getcpu(); 5700 if (base_cpu < 0) 5701 err(1, "cannot find calling cpu ID"); 5702 sprintf(pathname, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", base_cpu); 5703 5704 filep = fopen_or_die(pathname, "r"); 5705 topo.max_cpu_num = 0; 5706 while (fscanf(filep, "%lx,", &dummy) == 1) 5707 topo.max_cpu_num += BITMASK_SIZE; 5708 fclose(filep); 5709 topo.max_cpu_num--; /* 0 based */ 5710 } 5711 5712 /* 5713 * count_cpus() 5714 * remember the last one seen, it will be the max 5715 */ 5716 int count_cpus(int cpu) 5717 { 5718 UNUSED(cpu); 5719 5720 topo.num_cpus++; 5721 return 0; 5722 } 5723 5724 int mark_cpu_present(int cpu) 5725 { 5726 CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set); 5727 return 0; 5728 } 5729 5730 int init_thread_id(int cpu) 5731 { 5732 cpus[cpu].thread_id = -1; 5733 return 0; 5734 } 5735 5736 int set_my_cpu_type(void) 5737 { 5738 unsigned int eax, ebx, ecx, edx; 5739 unsigned int max_level; 5740 5741 __cpuid(0, max_level, ebx, ecx, edx); 5742 5743 if (max_level < CPUID_LEAF_MODEL_ID) 5744 return 0; 5745 5746 __cpuid(CPUID_LEAF_MODEL_ID, eax, ebx, ecx, edx); 5747 5748 return (eax >> CPUID_LEAF_MODEL_ID_CORE_TYPE_SHIFT); 5749 } 5750 5751 int set_cpu_hybrid_type(int cpu) 5752 { 5753 if (cpu_migrate(cpu)) 5754 return -1; 5755 5756 int type = set_my_cpu_type(); 5757 5758 cpus[cpu].type = type; 5759 return 0; 5760 } 5761 5762 /* 5763 * snapshot_proc_interrupts() 5764 * 5765 * read and record summary of /proc/interrupts 5766 * 5767 * return 1 if config change requires a restart, else return 0 5768 */ 5769 int snapshot_proc_interrupts(void) 5770 { 5771 static FILE *fp; 5772 int column, retval; 5773 5774 if (fp == NULL) 5775 fp = fopen_or_die("/proc/interrupts", "r"); 5776 else 5777 rewind(fp); 5778 5779 /* read 1st line of /proc/interrupts to get cpu* name for each column */ 5780 for (column = 0; column < topo.num_cpus; ++column) { 5781 int cpu_number; 5782 5783 retval = fscanf(fp, " CPU%d", &cpu_number); 5784 if (retval != 1) 5785 break; 5786 5787 if (cpu_number > topo.max_cpu_num) { 5788 warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num); 5789 return 1; 5790 } 5791 5792 irq_column_2_cpu[column] = cpu_number; 5793 irqs_per_cpu[cpu_number] = 0; 5794 } 5795 5796 /* read /proc/interrupt count lines and sum up irqs per cpu */ 5797 while (1) { 5798 int column; 5799 char buf[64]; 5800 5801 retval = fscanf(fp, " %s:", buf); /* flush irq# "N:" */ 5802 if (retval != 1) 5803 break; 5804 5805 /* read the count per cpu */ 5806 for (column = 0; column < topo.num_cpus; ++column) { 5807 5808 int cpu_number, irq_count; 5809 5810 retval = fscanf(fp, " %d", &irq_count); 5811 if (retval != 1) 5812 break; 5813 5814 cpu_number = irq_column_2_cpu[column]; 5815 irqs_per_cpu[cpu_number] += irq_count; 5816 5817 } 5818 5819 while (getc(fp) != '\n') ; /* flush interrupt description */ 5820 5821 } 5822 return 0; 5823 } 5824 5825 /* 5826 * snapshot_graphics() 5827 * 5828 * record snapshot of specified graphics sysfs knob 5829 * 5830 * return 1 if config change requires a restart, else return 0 5831 */ 5832 int snapshot_graphics(int idx) 5833 { 5834 int retval; 5835 5836 rewind(gfx_info[idx].fp); 5837 5838 switch (idx) { 5839 case GFX_rc6: 5840 case SAM_mc6: 5841 retval = fscanf(gfx_info[idx].fp, "%lld", &gfx_info[idx].val_ull); 5842 if (retval != 1) 5843 err(1, "rc6"); 5844 return 0; 5845 case GFX_MHz: 5846 case GFX_ACTMHz: 5847 case SAM_MHz: 5848 case SAM_ACTMHz: 5849 retval = fscanf(gfx_info[idx].fp, "%d", &gfx_info[idx].val); 5850 if (retval != 1) 5851 err(1, "MHz"); 5852 return 0; 5853 default: 5854 return -EINVAL; 5855 } 5856 } 5857 5858 /* 5859 * snapshot_cpu_lpi() 5860 * 5861 * record snapshot of 5862 * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us 5863 */ 5864 int snapshot_cpu_lpi_us(void) 5865 { 5866 FILE *fp; 5867 int retval; 5868 5869 fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r"); 5870 5871 retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us); 5872 if (retval != 1) { 5873 fprintf(stderr, "Disabling Low Power Idle CPU output\n"); 5874 BIC_NOT_PRESENT(BIC_CPU_LPI); 5875 fclose(fp); 5876 return -1; 5877 } 5878 5879 fclose(fp); 5880 5881 return 0; 5882 } 5883 5884 /* 5885 * snapshot_sys_lpi() 5886 * 5887 * record snapshot of sys_lpi_file 5888 */ 5889 int snapshot_sys_lpi_us(void) 5890 { 5891 FILE *fp; 5892 int retval; 5893 5894 fp = fopen_or_die(sys_lpi_file, "r"); 5895 5896 retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us); 5897 if (retval != 1) { 5898 fprintf(stderr, "Disabling Low Power Idle System output\n"); 5899 BIC_NOT_PRESENT(BIC_SYS_LPI); 5900 fclose(fp); 5901 return -1; 5902 } 5903 fclose(fp); 5904 5905 return 0; 5906 } 5907 5908 /* 5909 * snapshot /proc and /sys files 5910 * 5911 * return 1 if configuration restart needed, else return 0 5912 */ 5913 int snapshot_proc_sysfs_files(void) 5914 { 5915 if (DO_BIC(BIC_IRQ)) 5916 if (snapshot_proc_interrupts()) 5917 return 1; 5918 5919 if (DO_BIC(BIC_GFX_rc6)) 5920 snapshot_graphics(GFX_rc6); 5921 5922 if (DO_BIC(BIC_GFXMHz)) 5923 snapshot_graphics(GFX_MHz); 5924 5925 if (DO_BIC(BIC_GFXACTMHz)) 5926 snapshot_graphics(GFX_ACTMHz); 5927 5928 if (DO_BIC(BIC_SAM_mc6)) 5929 snapshot_graphics(SAM_mc6); 5930 5931 if (DO_BIC(BIC_SAMMHz)) 5932 snapshot_graphics(SAM_MHz); 5933 5934 if (DO_BIC(BIC_SAMACTMHz)) 5935 snapshot_graphics(SAM_ACTMHz); 5936 5937 if (DO_BIC(BIC_CPU_LPI)) 5938 snapshot_cpu_lpi_us(); 5939 5940 if (DO_BIC(BIC_SYS_LPI)) 5941 snapshot_sys_lpi_us(); 5942 5943 return 0; 5944 } 5945 5946 int exit_requested; 5947 5948 static void signal_handler(int signal) 5949 { 5950 switch (signal) { 5951 case SIGINT: 5952 exit_requested = 1; 5953 if (debug) 5954 fprintf(stderr, " SIGINT\n"); 5955 break; 5956 case SIGUSR1: 5957 if (debug > 1) 5958 fprintf(stderr, "SIGUSR1\n"); 5959 break; 5960 } 5961 } 5962 5963 void setup_signal_handler(void) 5964 { 5965 struct sigaction sa; 5966 5967 memset(&sa, 0, sizeof(sa)); 5968 5969 sa.sa_handler = &signal_handler; 5970 5971 if (sigaction(SIGINT, &sa, NULL) < 0) 5972 err(1, "sigaction SIGINT"); 5973 if (sigaction(SIGUSR1, &sa, NULL) < 0) 5974 err(1, "sigaction SIGUSR1"); 5975 } 5976 5977 void do_sleep(void) 5978 { 5979 struct timeval tout; 5980 struct timespec rest; 5981 fd_set readfds; 5982 int retval; 5983 5984 FD_ZERO(&readfds); 5985 FD_SET(0, &readfds); 5986 5987 if (ignore_stdin) { 5988 nanosleep(&interval_ts, NULL); 5989 return; 5990 } 5991 5992 tout = interval_tv; 5993 retval = select(1, &readfds, NULL, NULL, &tout); 5994 5995 if (retval == 1) { 5996 switch (getc(stdin)) { 5997 case 'q': 5998 exit_requested = 1; 5999 break; 6000 case EOF: 6001 /* 6002 * 'stdin' is a pipe closed on the other end. There 6003 * won't be any further input. 6004 */ 6005 ignore_stdin = 1; 6006 /* Sleep the rest of the time */ 6007 rest.tv_sec = (tout.tv_sec + tout.tv_usec / 1000000); 6008 rest.tv_nsec = (tout.tv_usec % 1000000) * 1000; 6009 nanosleep(&rest, NULL); 6010 } 6011 } 6012 } 6013 6014 int get_msr_sum(int cpu, off_t offset, unsigned long long *msr) 6015 { 6016 int ret, idx; 6017 unsigned long long msr_cur, msr_last; 6018 6019 assert(!no_msr); 6020 6021 if (!per_cpu_msr_sum) 6022 return 1; 6023 6024 idx = offset_to_idx(offset); 6025 if (idx < 0) 6026 return idx; 6027 /* get_msr_sum() = sum + (get_msr() - last) */ 6028 ret = get_msr(cpu, offset, &msr_cur); 6029 if (ret) 6030 return ret; 6031 msr_last = per_cpu_msr_sum[cpu].entries[idx].last; 6032 DELTA_WRAP32(msr_cur, msr_last); 6033 *msr = msr_last + per_cpu_msr_sum[cpu].entries[idx].sum; 6034 6035 return 0; 6036 } 6037 6038 timer_t timerid; 6039 6040 /* Timer callback, update the sum of MSRs periodically. */ 6041 static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg_data *p) 6042 { 6043 int i, ret; 6044 int cpu = t->cpu_id; 6045 6046 UNUSED(c); 6047 UNUSED(p); 6048 6049 assert(!no_msr); 6050 6051 for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) { 6052 unsigned long long msr_cur, msr_last; 6053 off_t offset; 6054 6055 if (!idx_valid(i)) 6056 continue; 6057 offset = idx_to_offset(i); 6058 if (offset < 0) 6059 continue; 6060 ret = get_msr(cpu, offset, &msr_cur); 6061 if (ret) { 6062 fprintf(outf, "Can not update msr(0x%llx)\n", (unsigned long long)offset); 6063 continue; 6064 } 6065 6066 msr_last = per_cpu_msr_sum[cpu].entries[i].last; 6067 per_cpu_msr_sum[cpu].entries[i].last = msr_cur & 0xffffffff; 6068 6069 DELTA_WRAP32(msr_cur, msr_last); 6070 per_cpu_msr_sum[cpu].entries[i].sum += msr_last; 6071 } 6072 return 0; 6073 } 6074 6075 static void msr_record_handler(union sigval v) 6076 { 6077 UNUSED(v); 6078 6079 for_all_cpus(update_msr_sum, EVEN_COUNTERS); 6080 } 6081 6082 void msr_sum_record(void) 6083 { 6084 struct itimerspec its; 6085 struct sigevent sev; 6086 6087 per_cpu_msr_sum = calloc(topo.max_cpu_num + 1, sizeof(struct msr_sum_array)); 6088 if (!per_cpu_msr_sum) { 6089 fprintf(outf, "Can not allocate memory for long time MSR.\n"); 6090 return; 6091 } 6092 /* 6093 * Signal handler might be restricted, so use thread notifier instead. 6094 */ 6095 memset(&sev, 0, sizeof(struct sigevent)); 6096 sev.sigev_notify = SIGEV_THREAD; 6097 sev.sigev_notify_function = msr_record_handler; 6098 6099 sev.sigev_value.sival_ptr = &timerid; 6100 if (timer_create(CLOCK_REALTIME, &sev, &timerid) == -1) { 6101 fprintf(outf, "Can not create timer.\n"); 6102 goto release_msr; 6103 } 6104 6105 its.it_value.tv_sec = 0; 6106 its.it_value.tv_nsec = 1; 6107 /* 6108 * A wraparound time has been calculated early. 6109 * Some sources state that the peak power for a 6110 * microprocessor is usually 1.5 times the TDP rating, 6111 * use 2 * TDP for safety. 6112 */ 6113 its.it_interval.tv_sec = rapl_joule_counter_range / 2; 6114 its.it_interval.tv_nsec = 0; 6115 6116 if (timer_settime(timerid, 0, &its, NULL) == -1) { 6117 fprintf(outf, "Can not set timer.\n"); 6118 goto release_timer; 6119 } 6120 return; 6121 6122 release_timer: 6123 timer_delete(timerid); 6124 release_msr: 6125 free(per_cpu_msr_sum); 6126 } 6127 6128 /* 6129 * set_my_sched_priority(pri) 6130 * return previous priority on success 6131 * return value < -20 on failure 6132 */ 6133 int set_my_sched_priority(int priority) 6134 { 6135 int retval; 6136 int original_priority; 6137 6138 errno = 0; 6139 original_priority = getpriority(PRIO_PROCESS, 0); 6140 if (errno && (original_priority == -1)) 6141 return -21; 6142 6143 retval = setpriority(PRIO_PROCESS, 0, priority); 6144 if (retval) 6145 return -21; 6146 6147 errno = 0; 6148 retval = getpriority(PRIO_PROCESS, 0); 6149 if (retval != priority) 6150 return -21; 6151 6152 return original_priority; 6153 } 6154 6155 void turbostat_loop() 6156 { 6157 int retval; 6158 int restarted = 0; 6159 unsigned int done_iters = 0; 6160 6161 setup_signal_handler(); 6162 6163 /* 6164 * elevate own priority for interval mode 6165 * 6166 * ignore on error - we probably don't have permission to set it, but 6167 * it's not a big deal 6168 */ 6169 set_my_sched_priority(-20); 6170 6171 restart: 6172 restarted++; 6173 6174 snapshot_proc_sysfs_files(); 6175 retval = for_all_cpus(get_counters, EVEN_COUNTERS); 6176 first_counter_read = 0; 6177 if (retval < -1) { 6178 exit(retval); 6179 } else if (retval == -1) { 6180 if (restarted > 10) { 6181 exit(retval); 6182 } 6183 re_initialize(); 6184 goto restart; 6185 } 6186 restarted = 0; 6187 done_iters = 0; 6188 gettimeofday(&tv_even, (struct timezone *)NULL); 6189 6190 while (1) { 6191 if (for_all_proc_cpus(cpu_is_not_present)) { 6192 re_initialize(); 6193 goto restart; 6194 } 6195 if (update_effective_str(false)) { 6196 re_initialize(); 6197 goto restart; 6198 } 6199 do_sleep(); 6200 if (snapshot_proc_sysfs_files()) 6201 goto restart; 6202 retval = for_all_cpus(get_counters, ODD_COUNTERS); 6203 if (retval < -1) { 6204 exit(retval); 6205 } else if (retval == -1) { 6206 re_initialize(); 6207 goto restart; 6208 } 6209 gettimeofday(&tv_odd, (struct timezone *)NULL); 6210 timersub(&tv_odd, &tv_even, &tv_delta); 6211 if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) { 6212 re_initialize(); 6213 goto restart; 6214 } 6215 delta_platform(&platform_counters_odd, &platform_counters_even); 6216 compute_average(EVEN_COUNTERS); 6217 format_all_counters(EVEN_COUNTERS); 6218 flush_output_stdout(); 6219 if (exit_requested) 6220 break; 6221 if (num_iterations && ++done_iters >= num_iterations) 6222 break; 6223 do_sleep(); 6224 if (snapshot_proc_sysfs_files()) 6225 goto restart; 6226 retval = for_all_cpus(get_counters, EVEN_COUNTERS); 6227 if (retval < -1) { 6228 exit(retval); 6229 } else if (retval == -1) { 6230 re_initialize(); 6231 goto restart; 6232 } 6233 gettimeofday(&tv_even, (struct timezone *)NULL); 6234 timersub(&tv_even, &tv_odd, &tv_delta); 6235 if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) { 6236 re_initialize(); 6237 goto restart; 6238 } 6239 delta_platform(&platform_counters_even, &platform_counters_odd); 6240 compute_average(ODD_COUNTERS); 6241 format_all_counters(ODD_COUNTERS); 6242 flush_output_stdout(); 6243 if (exit_requested) 6244 break; 6245 if (num_iterations && ++done_iters >= num_iterations) 6246 break; 6247 } 6248 } 6249 6250 void check_dev_msr() 6251 { 6252 struct stat sb; 6253 char pathname[32]; 6254 6255 if (no_msr) 6256 return; 6257 6258 sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); 6259 if (stat(pathname, &sb)) 6260 if (system("/sbin/modprobe msr > /dev/null 2>&1")) 6261 no_msr = 1; 6262 } 6263 6264 /* 6265 * check for CAP_SYS_RAWIO 6266 * return 0 on success 6267 * return 1 on fail 6268 */ 6269 int check_for_cap_sys_rawio(void) 6270 { 6271 cap_t caps; 6272 cap_flag_value_t cap_flag_value; 6273 int ret = 0; 6274 6275 caps = cap_get_proc(); 6276 if (caps == NULL) 6277 return 1; 6278 6279 if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value)) { 6280 ret = 1; 6281 goto free_and_exit; 6282 } 6283 6284 if (cap_flag_value != CAP_SET) { 6285 ret = 1; 6286 goto free_and_exit; 6287 } 6288 6289 free_and_exit: 6290 if (cap_free(caps) == -1) 6291 err(-6, "cap_free\n"); 6292 6293 return ret; 6294 } 6295 6296 void check_msr_permission(void) 6297 { 6298 int failed = 0; 6299 char pathname[32]; 6300 6301 if (no_msr) 6302 return; 6303 6304 /* check for CAP_SYS_RAWIO */ 6305 failed += check_for_cap_sys_rawio(); 6306 6307 /* test file permissions */ 6308 sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); 6309 if (euidaccess(pathname, R_OK)) { 6310 failed++; 6311 } 6312 6313 if (failed) { 6314 warnx("Failed to access %s. Some of the counters may not be available\n" 6315 "\tRun as root to enable them or use %s to disable the access explicitly", pathname, "--no-msr"); 6316 no_msr = 1; 6317 } 6318 } 6319 6320 void probe_bclk(void) 6321 { 6322 unsigned long long msr; 6323 unsigned int base_ratio; 6324 6325 if (!platform->has_nhm_msrs || no_msr) 6326 return; 6327 6328 if (platform->bclk_freq == BCLK_100MHZ) 6329 bclk = 100.00; 6330 else if (platform->bclk_freq == BCLK_133MHZ) 6331 bclk = 133.33; 6332 else if (platform->bclk_freq == BCLK_SLV) 6333 bclk = slm_bclk(); 6334 else 6335 return; 6336 6337 get_msr(base_cpu, MSR_PLATFORM_INFO, &msr); 6338 base_ratio = (msr >> 8) & 0xFF; 6339 6340 base_hz = base_ratio * bclk * 1000000; 6341 has_base_hz = 1; 6342 6343 if (platform->enable_tsc_tweak) 6344 tsc_tweak = base_hz / tsc_hz; 6345 } 6346 6347 static void remove_underbar(char *s) 6348 { 6349 char *to = s; 6350 6351 while (*s) { 6352 if (*s != '_') 6353 *to++ = *s; 6354 s++; 6355 } 6356 6357 *to = 0; 6358 } 6359 6360 static void dump_turbo_ratio_info(void) 6361 { 6362 if (!has_turbo) 6363 return; 6364 6365 if (!platform->has_nhm_msrs || no_msr) 6366 return; 6367 6368 if (platform->trl_msrs & TRL_LIMIT2) 6369 dump_turbo_ratio_limit2(); 6370 6371 if (platform->trl_msrs & TRL_LIMIT1) 6372 dump_turbo_ratio_limit1(); 6373 6374 if (platform->trl_msrs & TRL_BASE) { 6375 dump_turbo_ratio_limits(MSR_TURBO_RATIO_LIMIT); 6376 6377 if (is_hybrid) 6378 dump_turbo_ratio_limits(MSR_SECONDARY_TURBO_RATIO_LIMIT); 6379 } 6380 6381 if (platform->trl_msrs & TRL_ATOM) 6382 dump_atom_turbo_ratio_limits(); 6383 6384 if (platform->trl_msrs & TRL_KNL) 6385 dump_knl_turbo_ratio_limits(); 6386 6387 if (platform->has_config_tdp) 6388 dump_config_tdp(); 6389 } 6390 6391 static int read_sysfs_int(char *path) 6392 { 6393 FILE *input; 6394 int retval = -1; 6395 6396 input = fopen(path, "r"); 6397 if (input == NULL) { 6398 if (debug) 6399 fprintf(outf, "NSFOD %s\n", path); 6400 return (-1); 6401 } 6402 if (fscanf(input, "%d", &retval) != 1) 6403 err(1, "%s: failed to read int from file", path); 6404 fclose(input); 6405 6406 return (retval); 6407 } 6408 6409 static void dump_sysfs_file(char *path) 6410 { 6411 FILE *input; 6412 char cpuidle_buf[64]; 6413 6414 input = fopen(path, "r"); 6415 if (input == NULL) { 6416 if (debug) 6417 fprintf(outf, "NSFOD %s\n", path); 6418 return; 6419 } 6420 if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input)) 6421 err(1, "%s: failed to read file", path); 6422 fclose(input); 6423 6424 fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf); 6425 } 6426 6427 static void probe_intel_uncore_frequency_legacy(void) 6428 { 6429 int i, j; 6430 char path[256]; 6431 6432 for (i = 0; i < topo.num_packages; ++i) { 6433 for (j = 0; j <= topo.max_die_id; ++j) { 6434 int k, l; 6435 char path_base[128]; 6436 6437 sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i, 6438 j); 6439 6440 if (access(path_base, R_OK)) 6441 continue; 6442 6443 BIC_PRESENT(BIC_UNCORE_MHZ); 6444 6445 if (quiet) 6446 return; 6447 6448 sprintf(path, "%s/min_freq_khz", path_base); 6449 k = read_sysfs_int(path); 6450 sprintf(path, "%s/max_freq_khz", path_base); 6451 l = read_sysfs_int(path); 6452 fprintf(outf, "Uncore Frequency package%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000); 6453 6454 sprintf(path, "%s/initial_min_freq_khz", path_base); 6455 k = read_sysfs_int(path); 6456 sprintf(path, "%s/initial_max_freq_khz", path_base); 6457 l = read_sysfs_int(path); 6458 fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000); 6459 6460 sprintf(path, "%s/current_freq_khz", path_base); 6461 k = read_sysfs_int(path); 6462 fprintf(outf, " %d MHz\n", k / 1000); 6463 } 6464 } 6465 } 6466 6467 static void probe_intel_uncore_frequency_cluster(void) 6468 { 6469 int i, uncore_max_id; 6470 char path[256]; 6471 char path_base[128]; 6472 6473 if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00/current_freq_khz", R_OK)) 6474 return; 6475 6476 for (uncore_max_id = 0;; ++uncore_max_id) { 6477 6478 sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", uncore_max_id); 6479 6480 /* uncore## start at 00 and skips no numbers, so stop upon first missing */ 6481 if (access(path_base, R_OK)) { 6482 uncore_max_id -= 1; 6483 break; 6484 } 6485 } 6486 for (i = uncore_max_id; i >= 0; --i) { 6487 int k, l; 6488 int package_id, domain_id, cluster_id; 6489 char name_buf[16]; 6490 6491 sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", i); 6492 6493 if (access(path_base, R_OK)) 6494 err(1, "%s: %s\n", __func__, path_base); 6495 6496 sprintf(path, "%s/package_id", path_base); 6497 package_id = read_sysfs_int(path); 6498 6499 sprintf(path, "%s/domain_id", path_base); 6500 domain_id = read_sysfs_int(path); 6501 6502 sprintf(path, "%s/fabric_cluster_id", path_base); 6503 cluster_id = read_sysfs_int(path); 6504 6505 sprintf(path, "%s/current_freq_khz", path_base); 6506 sprintf(name_buf, "UMHz%d.%d", domain_id, cluster_id); 6507 6508 add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id); 6509 6510 if (quiet) 6511 continue; 6512 6513 sprintf(path, "%s/min_freq_khz", path_base); 6514 k = read_sysfs_int(path); 6515 sprintf(path, "%s/max_freq_khz", path_base); 6516 l = read_sysfs_int(path); 6517 fprintf(outf, "Uncore Frequency package%d domain%d cluster%d: %d - %d MHz ", package_id, domain_id, 6518 cluster_id, k / 1000, l / 1000); 6519 6520 sprintf(path, "%s/initial_min_freq_khz", path_base); 6521 k = read_sysfs_int(path); 6522 sprintf(path, "%s/initial_max_freq_khz", path_base); 6523 l = read_sysfs_int(path); 6524 fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000); 6525 6526 sprintf(path, "%s/current_freq_khz", path_base); 6527 k = read_sysfs_int(path); 6528 fprintf(outf, " %d MHz\n", k / 1000); 6529 } 6530 } 6531 6532 static void probe_intel_uncore_frequency(void) 6533 { 6534 if (!genuine_intel) 6535 return; 6536 6537 if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00", R_OK) == 0) 6538 probe_intel_uncore_frequency_cluster(); 6539 else 6540 probe_intel_uncore_frequency_legacy(); 6541 } 6542 6543 static void set_graphics_fp(char *path, int idx) 6544 { 6545 if (!access(path, R_OK)) 6546 gfx_info[idx].fp = fopen_or_die(path, "r"); 6547 } 6548 6549 /* Enlarge this if there are /sys/class/drm/card2 ... */ 6550 #define GFX_MAX_CARDS 2 6551 6552 static void probe_graphics(void) 6553 { 6554 char path[PATH_MAX]; 6555 int i; 6556 6557 /* Xe graphics sysfs knobs */ 6558 if (!access("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", R_OK)) { 6559 FILE *fp; 6560 char buf[8]; 6561 bool gt0_is_gt; 6562 6563 fp = fopen("/sys/class/drm/card0/device/tile0/gt0/gtidle/name", "r"); 6564 if (!fp) 6565 goto next; 6566 6567 if (!fread(buf, sizeof(char), 7, fp)) { 6568 fclose(fp); 6569 goto next; 6570 } 6571 fclose(fp); 6572 6573 if (!strncmp(buf, "gt0-rc", strlen("gt0-rc"))) 6574 gt0_is_gt = true; 6575 else if (!strncmp(buf, "gt0-mc", strlen("gt0-mc"))) 6576 gt0_is_gt = false; 6577 else 6578 goto next; 6579 6580 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", gt0_is_gt ? GFX_rc6 : SAM_mc6); 6581 6582 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq", gt0_is_gt ? GFX_MHz : SAM_MHz); 6583 6584 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq", gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz); 6585 6586 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", gt0_is_gt ? SAM_mc6 : GFX_rc6); 6587 6588 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq", gt0_is_gt ? SAM_MHz : GFX_MHz); 6589 6590 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq", gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz); 6591 6592 goto end; 6593 } 6594 6595 next: 6596 /* New i915 graphics sysfs knobs */ 6597 for (i = 0; i < GFX_MAX_CARDS; i++) { 6598 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rc6_residency_ms", i); 6599 if (!access(path, R_OK)) 6600 break; 6601 } 6602 6603 if (i == GFX_MAX_CARDS) 6604 goto legacy_i915; 6605 6606 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rc6_residency_ms", i); 6607 set_graphics_fp(path, GFX_rc6); 6608 6609 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rps_cur_freq_mhz", i); 6610 set_graphics_fp(path, GFX_MHz); 6611 6612 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rps_act_freq_mhz", i); 6613 set_graphics_fp(path, GFX_ACTMHz); 6614 6615 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt1/rc6_residency_ms", i); 6616 set_graphics_fp(path, SAM_mc6); 6617 6618 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt1/rps_cur_freq_mhz", i); 6619 set_graphics_fp(path, SAM_MHz); 6620 6621 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt1/rps_act_freq_mhz", i); 6622 set_graphics_fp(path, SAM_ACTMHz); 6623 6624 goto end; 6625 6626 legacy_i915: 6627 /* Fall back to traditional i915 graphics sysfs knobs */ 6628 set_graphics_fp("/sys/class/drm/card0/power/rc6_residency_ms", GFX_rc6); 6629 6630 set_graphics_fp("/sys/class/drm/card0/gt_cur_freq_mhz", GFX_MHz); 6631 if (!gfx_info[GFX_MHz].fp) 6632 set_graphics_fp("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", GFX_MHz); 6633 6634 set_graphics_fp("/sys/class/drm/card0/gt_act_freq_mhz", GFX_ACTMHz); 6635 if (!gfx_info[GFX_ACTMHz].fp) 6636 set_graphics_fp("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", GFX_ACTMHz); 6637 6638 end: 6639 if (gfx_info[GFX_rc6].fp) 6640 BIC_PRESENT(BIC_GFX_rc6); 6641 if (gfx_info[GFX_MHz].fp) 6642 BIC_PRESENT(BIC_GFXMHz); 6643 if (gfx_info[GFX_ACTMHz].fp) 6644 BIC_PRESENT(BIC_GFXACTMHz); 6645 if (gfx_info[SAM_mc6].fp) 6646 BIC_PRESENT(BIC_SAM_mc6); 6647 if (gfx_info[SAM_MHz].fp) 6648 BIC_PRESENT(BIC_SAMMHz); 6649 if (gfx_info[SAM_ACTMHz].fp) 6650 BIC_PRESENT(BIC_SAMACTMHz); 6651 } 6652 6653 static void dump_sysfs_cstate_config(void) 6654 { 6655 char path[64]; 6656 char name_buf[16]; 6657 char desc[64]; 6658 FILE *input; 6659 int state; 6660 char *sp; 6661 6662 if (access("/sys/devices/system/cpu/cpuidle", R_OK)) { 6663 fprintf(outf, "cpuidle not loaded\n"); 6664 return; 6665 } 6666 6667 dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_driver"); 6668 dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor"); 6669 dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor_ro"); 6670 6671 for (state = 0; state < 10; ++state) { 6672 6673 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); 6674 input = fopen(path, "r"); 6675 if (input == NULL) 6676 continue; 6677 if (!fgets(name_buf, sizeof(name_buf), input)) 6678 err(1, "%s: failed to read file", path); 6679 6680 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ 6681 sp = strchr(name_buf, '-'); 6682 if (!sp) 6683 sp = strchrnul(name_buf, '\n'); 6684 *sp = '\0'; 6685 fclose(input); 6686 6687 remove_underbar(name_buf); 6688 6689 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc", base_cpu, state); 6690 input = fopen(path, "r"); 6691 if (input == NULL) 6692 continue; 6693 if (!fgets(desc, sizeof(desc), input)) 6694 err(1, "%s: failed to read file", path); 6695 6696 fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc); 6697 fclose(input); 6698 } 6699 } 6700 6701 static void dump_sysfs_pstate_config(void) 6702 { 6703 char path[64]; 6704 char driver_buf[64]; 6705 char governor_buf[64]; 6706 FILE *input; 6707 int turbo; 6708 6709 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver", base_cpu); 6710 input = fopen(path, "r"); 6711 if (input == NULL) { 6712 fprintf(outf, "NSFOD %s\n", path); 6713 return; 6714 } 6715 if (!fgets(driver_buf, sizeof(driver_buf), input)) 6716 err(1, "%s: failed to read file", path); 6717 fclose(input); 6718 6719 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", base_cpu); 6720 input = fopen(path, "r"); 6721 if (input == NULL) { 6722 fprintf(outf, "NSFOD %s\n", path); 6723 return; 6724 } 6725 if (!fgets(governor_buf, sizeof(governor_buf), input)) 6726 err(1, "%s: failed to read file", path); 6727 fclose(input); 6728 6729 fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf); 6730 fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf); 6731 6732 sprintf(path, "/sys/devices/system/cpu/cpufreq/boost"); 6733 input = fopen(path, "r"); 6734 if (input != NULL) { 6735 if (fscanf(input, "%d", &turbo) != 1) 6736 err(1, "%s: failed to parse number from file", path); 6737 fprintf(outf, "cpufreq boost: %d\n", turbo); 6738 fclose(input); 6739 } 6740 6741 sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo"); 6742 input = fopen(path, "r"); 6743 if (input != NULL) { 6744 if (fscanf(input, "%d", &turbo) != 1) 6745 err(1, "%s: failed to parse number from file", path); 6746 fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo); 6747 fclose(input); 6748 } 6749 } 6750 6751 /* 6752 * print_epb() 6753 * Decode the ENERGY_PERF_BIAS MSR 6754 */ 6755 int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) 6756 { 6757 char *epb_string; 6758 int cpu, epb; 6759 6760 UNUSED(c); 6761 UNUSED(p); 6762 6763 if (!has_epb) 6764 return 0; 6765 6766 cpu = t->cpu_id; 6767 6768 /* EPB is per-package */ 6769 if (!is_cpu_first_thread_in_package(t, c, p)) 6770 return 0; 6771 6772 if (cpu_migrate(cpu)) { 6773 fprintf(outf, "print_epb: Could not migrate to CPU %d\n", cpu); 6774 return -1; 6775 } 6776 6777 epb = get_epb(cpu); 6778 if (epb < 0) 6779 return 0; 6780 6781 switch (epb) { 6782 case ENERGY_PERF_BIAS_PERFORMANCE: 6783 epb_string = "performance"; 6784 break; 6785 case ENERGY_PERF_BIAS_NORMAL: 6786 epb_string = "balanced"; 6787 break; 6788 case ENERGY_PERF_BIAS_POWERSAVE: 6789 epb_string = "powersave"; 6790 break; 6791 default: 6792 epb_string = "custom"; 6793 break; 6794 } 6795 fprintf(outf, "cpu%d: EPB: %d (%s)\n", cpu, epb, epb_string); 6796 6797 return 0; 6798 } 6799 6800 /* 6801 * print_hwp() 6802 * Decode the MSR_HWP_CAPABILITIES 6803 */ 6804 int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) 6805 { 6806 unsigned long long msr; 6807 int cpu; 6808 6809 UNUSED(c); 6810 UNUSED(p); 6811 6812 if (no_msr) 6813 return 0; 6814 6815 if (!has_hwp) 6816 return 0; 6817 6818 cpu = t->cpu_id; 6819 6820 /* MSR_HWP_CAPABILITIES is per-package */ 6821 if (!is_cpu_first_thread_in_package(t, c, p)) 6822 return 0; 6823 6824 if (cpu_migrate(cpu)) { 6825 fprintf(outf, "print_hwp: Could not migrate to CPU %d\n", cpu); 6826 return -1; 6827 } 6828 6829 if (get_msr(cpu, MSR_PM_ENABLE, &msr)) 6830 return 0; 6831 6832 fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n", cpu, msr, (msr & (1 << 0)) ? "" : "No-"); 6833 6834 /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */ 6835 if ((msr & (1 << 0)) == 0) 6836 return 0; 6837 6838 if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr)) 6839 return 0; 6840 6841 fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx " 6842 "(high %d guar %d eff %d low %d)\n", 6843 cpu, msr, 6844 (unsigned int)HWP_HIGHEST_PERF(msr), 6845 (unsigned int)HWP_GUARANTEED_PERF(msr), 6846 (unsigned int)HWP_MOSTEFFICIENT_PERF(msr), (unsigned int)HWP_LOWEST_PERF(msr)); 6847 6848 if (get_msr(cpu, MSR_HWP_REQUEST, &msr)) 6849 return 0; 6850 6851 fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx " 6852 "(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n", 6853 cpu, msr, 6854 (unsigned int)(((msr) >> 0) & 0xff), 6855 (unsigned int)(((msr) >> 8) & 0xff), 6856 (unsigned int)(((msr) >> 16) & 0xff), 6857 (unsigned int)(((msr) >> 24) & 0xff), 6858 (unsigned int)(((msr) >> 32) & 0xff3), (unsigned int)(((msr) >> 42) & 0x1)); 6859 6860 if (has_hwp_pkg) { 6861 if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr)) 6862 return 0; 6863 6864 fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx " 6865 "(min %d max %d des %d epp 0x%x window 0x%x)\n", 6866 cpu, msr, 6867 (unsigned int)(((msr) >> 0) & 0xff), 6868 (unsigned int)(((msr) >> 8) & 0xff), 6869 (unsigned int)(((msr) >> 16) & 0xff), 6870 (unsigned int)(((msr) >> 24) & 0xff), (unsigned int)(((msr) >> 32) & 0xff3)); 6871 } 6872 if (has_hwp_notify) { 6873 if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr)) 6874 return 0; 6875 6876 fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx " 6877 "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n", 6878 cpu, msr, ((msr) & 0x1) ? "EN" : "Dis", ((msr) & 0x2) ? "EN" : "Dis"); 6879 } 6880 if (get_msr(cpu, MSR_HWP_STATUS, &msr)) 6881 return 0; 6882 6883 fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx " 6884 "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n", 6885 cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x4) ? "" : "No-"); 6886 6887 return 0; 6888 } 6889 6890 /* 6891 * print_perf_limit() 6892 */ 6893 int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p) 6894 { 6895 unsigned long long msr; 6896 int cpu; 6897 6898 UNUSED(c); 6899 UNUSED(p); 6900 6901 if (no_msr) 6902 return 0; 6903 6904 cpu = t->cpu_id; 6905 6906 /* per-package */ 6907 if (!is_cpu_first_thread_in_package(t, c, p)) 6908 return 0; 6909 6910 if (cpu_migrate(cpu)) { 6911 fprintf(outf, "print_perf_limit: Could not migrate to CPU %d\n", cpu); 6912 return -1; 6913 } 6914 6915 if (platform->plr_msrs & PLR_CORE) { 6916 get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr); 6917 fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); 6918 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)", 6919 (msr & 1 << 15) ? "bit15, " : "", 6920 (msr & 1 << 14) ? "bit14, " : "", 6921 (msr & 1 << 13) ? "Transitions, " : "", 6922 (msr & 1 << 12) ? "MultiCoreTurbo, " : "", 6923 (msr & 1 << 11) ? "PkgPwrL2, " : "", 6924 (msr & 1 << 10) ? "PkgPwrL1, " : "", 6925 (msr & 1 << 9) ? "CorePwr, " : "", 6926 (msr & 1 << 8) ? "Amps, " : "", 6927 (msr & 1 << 6) ? "VR-Therm, " : "", 6928 (msr & 1 << 5) ? "Auto-HWP, " : "", 6929 (msr & 1 << 4) ? "Graphics, " : "", 6930 (msr & 1 << 2) ? "bit2, " : "", 6931 (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 0) ? "PROCHOT, " : ""); 6932 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", 6933 (msr & 1 << 31) ? "bit31, " : "", 6934 (msr & 1 << 30) ? "bit30, " : "", 6935 (msr & 1 << 29) ? "Transitions, " : "", 6936 (msr & 1 << 28) ? "MultiCoreTurbo, " : "", 6937 (msr & 1 << 27) ? "PkgPwrL2, " : "", 6938 (msr & 1 << 26) ? "PkgPwrL1, " : "", 6939 (msr & 1 << 25) ? "CorePwr, " : "", 6940 (msr & 1 << 24) ? "Amps, " : "", 6941 (msr & 1 << 22) ? "VR-Therm, " : "", 6942 (msr & 1 << 21) ? "Auto-HWP, " : "", 6943 (msr & 1 << 20) ? "Graphics, " : "", 6944 (msr & 1 << 18) ? "bit18, " : "", 6945 (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : ""); 6946 6947 } 6948 if (platform->plr_msrs & PLR_GFX) { 6949 get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr); 6950 fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); 6951 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)", 6952 (msr & 1 << 0) ? "PROCHOT, " : "", 6953 (msr & 1 << 1) ? "ThermStatus, " : "", 6954 (msr & 1 << 4) ? "Graphics, " : "", 6955 (msr & 1 << 6) ? "VR-Therm, " : "", 6956 (msr & 1 << 8) ? "Amps, " : "", 6957 (msr & 1 << 9) ? "GFXPwr, " : "", 6958 (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); 6959 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n", 6960 (msr & 1 << 16) ? "PROCHOT, " : "", 6961 (msr & 1 << 17) ? "ThermStatus, " : "", 6962 (msr & 1 << 20) ? "Graphics, " : "", 6963 (msr & 1 << 22) ? "VR-Therm, " : "", 6964 (msr & 1 << 24) ? "Amps, " : "", 6965 (msr & 1 << 25) ? "GFXPwr, " : "", 6966 (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); 6967 } 6968 if (platform->plr_msrs & PLR_RING) { 6969 get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr); 6970 fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); 6971 fprintf(outf, " (Active: %s%s%s%s%s%s)", 6972 (msr & 1 << 0) ? "PROCHOT, " : "", 6973 (msr & 1 << 1) ? "ThermStatus, " : "", 6974 (msr & 1 << 6) ? "VR-Therm, " : "", 6975 (msr & 1 << 8) ? "Amps, " : "", 6976 (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); 6977 fprintf(outf, " (Logged: %s%s%s%s%s%s)\n", 6978 (msr & 1 << 16) ? "PROCHOT, " : "", 6979 (msr & 1 << 17) ? "ThermStatus, " : "", 6980 (msr & 1 << 22) ? "VR-Therm, " : "", 6981 (msr & 1 << 24) ? "Amps, " : "", 6982 (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); 6983 } 6984 return 0; 6985 } 6986 6987 #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ 6988 #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ 6989 6990 double get_quirk_tdp(void) 6991 { 6992 if (platform->rapl_quirk_tdp) 6993 return platform->rapl_quirk_tdp; 6994 6995 return 135.0; 6996 } 6997 6998 double get_tdp_intel(void) 6999 { 7000 unsigned long long msr; 7001 7002 if (platform->rapl_msrs & RAPL_PKG_POWER_INFO) 7003 if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr)) 7004 return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; 7005 return get_quirk_tdp(); 7006 } 7007 7008 double get_tdp_amd(void) 7009 { 7010 return get_quirk_tdp(); 7011 } 7012 7013 void rapl_probe_intel(void) 7014 { 7015 unsigned long long msr; 7016 unsigned int time_unit; 7017 double tdp; 7018 const unsigned long long bic_watt_bits = BIC_SysWatt | BIC_PkgWatt | BIC_CorWatt | BIC_RAMWatt | BIC_GFXWatt; 7019 const unsigned long long bic_joules_bits = BIC_Sys_J | BIC_Pkg_J | BIC_Cor_J | BIC_RAM_J | BIC_GFX_J; 7020 7021 if (rapl_joules) 7022 bic_enabled &= ~bic_watt_bits; 7023 else 7024 bic_enabled &= ~bic_joules_bits; 7025 7026 if (!(platform->rapl_msrs & RAPL_PKG_PERF_STATUS)) 7027 bic_enabled &= ~BIC_PKG__; 7028 if (!(platform->rapl_msrs & RAPL_DRAM_PERF_STATUS)) 7029 bic_enabled &= ~BIC_RAM__; 7030 7031 /* units on package 0, verify later other packages match */ 7032 if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr)) 7033 return; 7034 7035 rapl_power_units = 1.0 / (1 << (msr & 0xF)); 7036 if (platform->has_rapl_divisor) 7037 rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000; 7038 else 7039 rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); 7040 7041 if (platform->has_fixed_rapl_unit) 7042 rapl_dram_energy_units = (15.3 / 1000000); 7043 else 7044 rapl_dram_energy_units = rapl_energy_units; 7045 7046 time_unit = msr >> 16 & 0xF; 7047 if (time_unit == 0) 7048 time_unit = 0xA; 7049 7050 rapl_time_units = 1.0 / (1 << (time_unit)); 7051 7052 tdp = get_tdp_intel(); 7053 7054 rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; 7055 if (!quiet) 7056 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); 7057 } 7058 7059 void rapl_probe_amd(void) 7060 { 7061 unsigned long long msr; 7062 double tdp; 7063 const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt; 7064 const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J; 7065 7066 if (rapl_joules) 7067 bic_enabled &= ~bic_watt_bits; 7068 else 7069 bic_enabled &= ~bic_joules_bits; 7070 7071 if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr)) 7072 return; 7073 7074 rapl_time_units = ldexp(1.0, -(msr >> 16 & 0xf)); 7075 rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f)); 7076 rapl_power_units = ldexp(1.0, -(msr & 0xf)); 7077 7078 tdp = get_tdp_amd(); 7079 7080 rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; 7081 if (!quiet) 7082 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); 7083 } 7084 7085 void print_power_limit_msr(int cpu, unsigned long long msr, char *label) 7086 { 7087 fprintf(outf, "cpu%d: %s: %sabled (%0.3f Watts, %f sec, clamp %sabled)\n", 7088 cpu, label, 7089 ((msr >> 15) & 1) ? "EN" : "DIS", 7090 ((msr >> 0) & 0x7FFF) * rapl_power_units, 7091 (1.0 + (((msr >> 22) & 0x3) / 4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units, 7092 (((msr >> 16) & 1) ? "EN" : "DIS")); 7093 7094 return; 7095 } 7096 7097 int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) 7098 { 7099 unsigned long long msr; 7100 const char *msr_name; 7101 int cpu; 7102 7103 UNUSED(c); 7104 UNUSED(p); 7105 7106 if (!platform->rapl_msrs) 7107 return 0; 7108 7109 /* RAPL counters are per package, so print only for 1st thread/package */ 7110 if (!is_cpu_first_thread_in_package(t, c, p)) 7111 return 0; 7112 7113 cpu = t->cpu_id; 7114 if (cpu_migrate(cpu)) { 7115 fprintf(outf, "print_rapl: Could not migrate to CPU %d\n", cpu); 7116 return -1; 7117 } 7118 7119 if (platform->rapl_msrs & RAPL_AMD_F17H) { 7120 msr_name = "MSR_RAPL_PWR_UNIT"; 7121 if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr)) 7122 return -1; 7123 } else { 7124 msr_name = "MSR_RAPL_POWER_UNIT"; 7125 if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr)) 7126 return -1; 7127 } 7128 7129 fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr, 7130 rapl_power_units, rapl_energy_units, rapl_time_units); 7131 7132 if (platform->rapl_msrs & RAPL_PKG_POWER_INFO) { 7133 7134 if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr)) 7135 return -5; 7136 7137 fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", 7138 cpu, msr, 7139 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, 7140 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, 7141 ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, 7142 ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); 7143 7144 } 7145 if (platform->rapl_msrs & RAPL_PKG) { 7146 7147 if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr)) 7148 return -9; 7149 7150 fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", 7151 cpu, msr, (msr >> 63) & 1 ? "" : "UN"); 7152 7153 print_power_limit_msr(cpu, msr, "PKG Limit #1"); 7154 fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%0.3f Watts, %f* sec, clamp %sabled)\n", 7155 cpu, 7156 ((msr >> 47) & 1) ? "EN" : "DIS", 7157 ((msr >> 32) & 0x7FFF) * rapl_power_units, 7158 (1.0 + (((msr >> 54) & 0x3) / 4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units, 7159 ((msr >> 48) & 1) ? "EN" : "DIS"); 7160 7161 if (get_msr(cpu, MSR_VR_CURRENT_CONFIG, &msr)) 7162 return -9; 7163 7164 fprintf(outf, "cpu%d: MSR_VR_CURRENT_CONFIG: 0x%08llx\n", cpu, msr); 7165 fprintf(outf, "cpu%d: PKG Limit #4: %f Watts (%slocked)\n", 7166 cpu, ((msr >> 0) & 0x1FFF) * rapl_power_units, (msr >> 31) & 1 ? "" : "UN"); 7167 } 7168 7169 if (platform->rapl_msrs & RAPL_DRAM_POWER_INFO) { 7170 if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) 7171 return -6; 7172 7173 fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", 7174 cpu, msr, 7175 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, 7176 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, 7177 ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, 7178 ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); 7179 } 7180 if (platform->rapl_msrs & RAPL_DRAM) { 7181 if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) 7182 return -9; 7183 fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", 7184 cpu, msr, (msr >> 31) & 1 ? "" : "UN"); 7185 7186 print_power_limit_msr(cpu, msr, "DRAM Limit"); 7187 } 7188 if (platform->rapl_msrs & RAPL_CORE_POLICY) { 7189 if (get_msr(cpu, MSR_PP0_POLICY, &msr)) 7190 return -7; 7191 7192 fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); 7193 } 7194 if (platform->rapl_msrs & RAPL_CORE_POWER_LIMIT) { 7195 if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) 7196 return -9; 7197 fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", 7198 cpu, msr, (msr >> 31) & 1 ? "" : "UN"); 7199 print_power_limit_msr(cpu, msr, "Cores Limit"); 7200 } 7201 if (platform->rapl_msrs & RAPL_GFX) { 7202 if (get_msr(cpu, MSR_PP1_POLICY, &msr)) 7203 return -8; 7204 7205 fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF); 7206 7207 if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr)) 7208 return -9; 7209 fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", 7210 cpu, msr, (msr >> 31) & 1 ? "" : "UN"); 7211 print_power_limit_msr(cpu, msr, "GFX Limit"); 7212 } 7213 return 0; 7214 } 7215 7216 /* 7217 * probe_rapl() 7218 * 7219 * sets rapl_power_units, rapl_energy_units, rapl_time_units 7220 */ 7221 void probe_rapl(void) 7222 { 7223 if (!platform->rapl_msrs || no_msr) 7224 return; 7225 7226 if (genuine_intel) 7227 rapl_probe_intel(); 7228 if (authentic_amd || hygon_genuine) 7229 rapl_probe_amd(); 7230 7231 if (quiet) 7232 return; 7233 7234 for_all_cpus(print_rapl, ODD_COUNTERS); 7235 } 7236 7237 /* 7238 * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where 7239 * the Thermal Control Circuit (TCC) activates. 7240 * This is usually equal to tjMax. 7241 * 7242 * Older processors do not have this MSR, so there we guess, 7243 * but also allow cmdline over-ride with -T. 7244 * 7245 * Several MSR temperature values are in units of degrees-C 7246 * below this value, including the Digital Thermal Sensor (DTS), 7247 * Package Thermal Management Sensor (PTM), and thermal event thresholds. 7248 */ 7249 int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p) 7250 { 7251 unsigned long long msr; 7252 unsigned int tcc_default, tcc_offset; 7253 int cpu; 7254 7255 UNUSED(c); 7256 UNUSED(p); 7257 7258 /* tj_max is used only for dts or ptm */ 7259 if (!(do_dts || do_ptm)) 7260 return 0; 7261 7262 /* this is a per-package concept */ 7263 if (!is_cpu_first_thread_in_package(t, c, p)) 7264 return 0; 7265 7266 cpu = t->cpu_id; 7267 if (cpu_migrate(cpu)) { 7268 fprintf(outf, "Could not migrate to CPU %d\n", cpu); 7269 return -1; 7270 } 7271 7272 if (tj_max_override != 0) { 7273 tj_max = tj_max_override; 7274 fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n", cpu, tj_max); 7275 return 0; 7276 } 7277 7278 /* Temperature Target MSR is Nehalem and newer only */ 7279 if (!platform->has_nhm_msrs || no_msr) 7280 goto guess; 7281 7282 if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr)) 7283 goto guess; 7284 7285 tcc_default = (msr >> 16) & 0xFF; 7286 7287 if (!quiet) { 7288 int bits = platform->tcc_offset_bits; 7289 unsigned long long enabled = 0; 7290 7291 if (bits && !get_msr(base_cpu, MSR_PLATFORM_INFO, &enabled)) 7292 enabled = (enabled >> 30) & 1; 7293 7294 if (bits && enabled) { 7295 tcc_offset = (msr >> 24) & GENMASK(bits - 1, 0); 7296 fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n", 7297 cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset); 7298 } else { 7299 fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, tcc_default); 7300 } 7301 } 7302 7303 if (!tcc_default) 7304 goto guess; 7305 7306 tj_max = tcc_default; 7307 7308 return 0; 7309 7310 guess: 7311 tj_max = TJMAX_DEFAULT; 7312 fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", cpu, tj_max); 7313 7314 return 0; 7315 } 7316 7317 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) 7318 { 7319 unsigned long long msr; 7320 unsigned int dts, dts2; 7321 int cpu; 7322 7323 UNUSED(c); 7324 UNUSED(p); 7325 7326 if (no_msr) 7327 return 0; 7328 7329 if (!(do_dts || do_ptm)) 7330 return 0; 7331 7332 cpu = t->cpu_id; 7333 7334 /* DTS is per-core, no need to print for each thread */ 7335 if (!is_cpu_first_thread_in_core(t, c, p)) 7336 return 0; 7337 7338 if (cpu_migrate(cpu)) { 7339 fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu); 7340 return -1; 7341 } 7342 7343 if (do_ptm && is_cpu_first_core_in_package(t, c, p)) { 7344 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) 7345 return 0; 7346 7347 dts = (msr >> 16) & 0x7F; 7348 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts); 7349 7350 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr)) 7351 return 0; 7352 7353 dts = (msr >> 16) & 0x7F; 7354 dts2 = (msr >> 8) & 0x7F; 7355 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", 7356 cpu, msr, tj_max - dts, tj_max - dts2); 7357 } 7358 7359 if (do_dts && debug) { 7360 unsigned int resolution; 7361 7362 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) 7363 return 0; 7364 7365 dts = (msr >> 16) & 0x7F; 7366 resolution = (msr >> 27) & 0xF; 7367 fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", 7368 cpu, msr, tj_max - dts, resolution); 7369 7370 if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr)) 7371 return 0; 7372 7373 dts = (msr >> 16) & 0x7F; 7374 dts2 = (msr >> 8) & 0x7F; 7375 fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", 7376 cpu, msr, tj_max - dts, tj_max - dts2); 7377 } 7378 7379 return 0; 7380 } 7381 7382 void probe_thermal(void) 7383 { 7384 if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK)) 7385 BIC_PRESENT(BIC_CORE_THROT_CNT); 7386 else 7387 BIC_NOT_PRESENT(BIC_CORE_THROT_CNT); 7388 7389 for_all_cpus(set_temperature_target, ODD_COUNTERS); 7390 7391 if (quiet) 7392 return; 7393 7394 for_all_cpus(print_thermal, ODD_COUNTERS); 7395 } 7396 7397 int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p) 7398 { 7399 unsigned int eax, ebx, ecx, edx; 7400 7401 UNUSED(c); 7402 UNUSED(p); 7403 7404 if (!genuine_intel) 7405 return 0; 7406 7407 if (cpu_migrate(t->cpu_id)) { 7408 fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id); 7409 return -1; 7410 } 7411 7412 if (max_level < 0x1a) 7413 return 0; 7414 7415 __cpuid(0x1a, eax, ebx, ecx, edx); 7416 eax = (eax >> 24) & 0xFF; 7417 if (eax == 0x20) 7418 t->is_atom = true; 7419 return 0; 7420 } 7421 7422 void decode_feature_control_msr(void) 7423 { 7424 unsigned long long msr; 7425 7426 if (no_msr) 7427 return; 7428 7429 if (!get_msr(base_cpu, MSR_IA32_FEAT_CTL, &msr)) 7430 fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n", 7431 base_cpu, msr, msr & FEAT_CTL_LOCKED ? "" : "UN-", msr & (1 << 18) ? "SGX" : ""); 7432 } 7433 7434 void decode_misc_enable_msr(void) 7435 { 7436 unsigned long long msr; 7437 7438 if (no_msr) 7439 return; 7440 7441 if (!genuine_intel) 7442 return; 7443 7444 if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr)) 7445 fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n", 7446 base_cpu, msr, 7447 msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-", 7448 msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-", 7449 msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-", 7450 msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "", 7451 msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : ""); 7452 } 7453 7454 void decode_misc_feature_control(void) 7455 { 7456 unsigned long long msr; 7457 7458 if (no_msr) 7459 return; 7460 7461 if (!platform->has_msr_misc_feature_control) 7462 return; 7463 7464 if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr)) 7465 fprintf(outf, 7466 "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n", 7467 base_cpu, msr, msr & (0 << 0) ? "No-" : "", msr & (1 << 0) ? "No-" : "", 7468 msr & (2 << 0) ? "No-" : "", msr & (3 << 0) ? "No-" : ""); 7469 } 7470 7471 /* 7472 * Decode MSR_MISC_PWR_MGMT 7473 * 7474 * Decode the bits according to the Nehalem documentation 7475 * bit[0] seems to continue to have same meaning going forward 7476 * bit[1] less so... 7477 */ 7478 void decode_misc_pwr_mgmt_msr(void) 7479 { 7480 unsigned long long msr; 7481 7482 if (no_msr) 7483 return; 7484 7485 if (!platform->has_msr_misc_pwr_mgmt) 7486 return; 7487 7488 if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr)) 7489 fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n", 7490 base_cpu, msr, 7491 msr & (1 << 0) ? "DIS" : "EN", msr & (1 << 1) ? "EN" : "DIS", msr & (1 << 8) ? "EN" : "DIS"); 7492 } 7493 7494 /* 7495 * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG 7496 * 7497 * This MSRs are present on Silvermont processors, 7498 * Intel Atom processor E3000 series (Baytrail), and friends. 7499 */ 7500 void decode_c6_demotion_policy_msr(void) 7501 { 7502 unsigned long long msr; 7503 7504 if (no_msr) 7505 return; 7506 7507 if (!platform->has_msr_c6_demotion_policy_config) 7508 return; 7509 7510 if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr)) 7511 fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n", 7512 base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS"); 7513 7514 if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr)) 7515 fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n", 7516 base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS"); 7517 } 7518 7519 void print_dev_latency(void) 7520 { 7521 char *path = "/dev/cpu_dma_latency"; 7522 int fd; 7523 int value; 7524 int retval; 7525 7526 fd = open(path, O_RDONLY); 7527 if (fd < 0) { 7528 if (debug) 7529 warnx("Read %s failed", path); 7530 return; 7531 } 7532 7533 retval = read(fd, (void *)&value, sizeof(int)); 7534 if (retval != sizeof(int)) { 7535 warn("read failed %s", path); 7536 close(fd); 7537 return; 7538 } 7539 fprintf(outf, "/dev/cpu_dma_latency: %d usec (%s)\n", value, value == 2000000000 ? "default" : "constrained"); 7540 7541 close(fd); 7542 } 7543 7544 static int has_instr_count_access(void) 7545 { 7546 int fd; 7547 int has_access; 7548 7549 if (no_perf) 7550 return 0; 7551 7552 fd = open_perf_counter(base_cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0); 7553 has_access = fd != -1; 7554 7555 if (fd != -1) 7556 close(fd); 7557 7558 if (!has_access) 7559 warnx("Failed to access %s. Some of the counters may not be available\n" 7560 "\tRun as root to enable them or use %s to disable the access explicitly", 7561 "instructions retired perf counter", "--no-perf"); 7562 7563 return has_access; 7564 } 7565 7566 int add_rapl_perf_counter_(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai, 7567 double *scale_, enum rapl_unit *unit_) 7568 { 7569 if (no_perf) 7570 return -1; 7571 7572 const double scale = read_perf_scale(cai->perf_subsys, cai->perf_name); 7573 7574 if (scale == 0.0) 7575 return -1; 7576 7577 const enum rapl_unit unit = read_perf_rapl_unit(cai->perf_subsys, cai->perf_name); 7578 7579 if (unit == RAPL_UNIT_INVALID) 7580 return -1; 7581 7582 const unsigned int rapl_type = read_perf_type(cai->perf_subsys); 7583 const unsigned int rapl_energy_pkg_config = read_perf_config(cai->perf_subsys, cai->perf_name); 7584 7585 const int fd_counter = 7586 open_perf_counter(cpu, rapl_type, rapl_energy_pkg_config, rci->fd_perf, PERF_FORMAT_GROUP); 7587 if (fd_counter == -1) 7588 return -1; 7589 7590 /* If it's the first counter opened, make it a group descriptor */ 7591 if (rci->fd_perf == -1) 7592 rci->fd_perf = fd_counter; 7593 7594 *scale_ = scale; 7595 *unit_ = unit; 7596 return fd_counter; 7597 } 7598 7599 int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai, 7600 double *scale, enum rapl_unit *unit) 7601 { 7602 int ret = add_rapl_perf_counter_(cpu, rci, cai, scale, unit); 7603 7604 if (debug >= 2) 7605 fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu); 7606 7607 return ret; 7608 } 7609 7610 /* 7611 * Linux-perf manages the HW instructions-retired counter 7612 * by enabling when requested, and hiding rollover 7613 */ 7614 void linux_perf_init(void) 7615 { 7616 if (access("/proc/sys/kernel/perf_event_paranoid", F_OK)) 7617 return; 7618 7619 if (BIC_IS_ENABLED(BIC_IPC) && has_aperf) { 7620 fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int)); 7621 if (fd_instr_count_percpu == NULL) 7622 err(-1, "calloc fd_instr_count_percpu"); 7623 } 7624 } 7625 7626 void rapl_perf_init(void) 7627 { 7628 const unsigned int num_domains = (platform->has_per_core_rapl ? topo.max_core_id : topo.max_package_id) + 1; 7629 bool *domain_visited = calloc(num_domains, sizeof(bool)); 7630 7631 rapl_counter_info_perdomain = calloc(num_domains, sizeof(*rapl_counter_info_perdomain)); 7632 if (rapl_counter_info_perdomain == NULL) 7633 err(-1, "calloc rapl_counter_info_percpu"); 7634 rapl_counter_info_perdomain_size = num_domains; 7635 7636 /* 7637 * Initialize rapl_counter_info_percpu 7638 */ 7639 for (unsigned int domain_id = 0; domain_id < num_domains; ++domain_id) { 7640 struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[domain_id]; 7641 7642 rci->fd_perf = -1; 7643 for (size_t i = 0; i < NUM_RAPL_COUNTERS; ++i) { 7644 rci->data[i] = 0; 7645 rci->source[i] = COUNTER_SOURCE_NONE; 7646 } 7647 } 7648 7649 /* 7650 * Open/probe the counters 7651 * If can't get it via perf, fallback to MSR 7652 */ 7653 for (size_t i = 0; i < ARRAY_SIZE(rapl_counter_arch_infos); ++i) { 7654 7655 const struct rapl_counter_arch_info *const cai = &rapl_counter_arch_infos[i]; 7656 bool has_counter = 0; 7657 double scale; 7658 enum rapl_unit unit; 7659 unsigned int next_domain; 7660 7661 memset(domain_visited, 0, num_domains * sizeof(*domain_visited)); 7662 7663 for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) { 7664 7665 if (cpu_is_not_allowed(cpu)) 7666 continue; 7667 7668 /* Skip already seen and handled RAPL domains */ 7669 next_domain = 7670 platform->has_per_core_rapl ? cpus[cpu].physical_core_id : cpus[cpu].physical_package_id; 7671 7672 assert(next_domain < num_domains); 7673 7674 if (domain_visited[next_domain]) 7675 continue; 7676 7677 domain_visited[next_domain] = 1; 7678 7679 if ((cai->flags & RAPL_COUNTER_FLAG_PLATFORM_COUNTER) && (cpu != base_cpu)) 7680 continue; 7681 7682 struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[next_domain]; 7683 7684 /* Check if the counter is enabled and accessible */ 7685 if (BIC_IS_ENABLED(cai->bic) && (platform->rapl_msrs & cai->feature_mask)) { 7686 7687 /* Use perf API for this counter */ 7688 if (!no_perf && cai->perf_name 7689 && add_rapl_perf_counter(cpu, rci, cai, &scale, &unit) != -1) { 7690 rci->source[cai->rci_index] = COUNTER_SOURCE_PERF; 7691 rci->scale[cai->rci_index] = scale * cai->compat_scale; 7692 rci->unit[cai->rci_index] = unit; 7693 rci->flags[cai->rci_index] = cai->flags; 7694 7695 /* Use MSR for this counter */ 7696 } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) { 7697 rci->source[cai->rci_index] = COUNTER_SOURCE_MSR; 7698 rci->msr[cai->rci_index] = cai->msr; 7699 rci->msr_mask[cai->rci_index] = cai->msr_mask; 7700 rci->msr_shift[cai->rci_index] = cai->msr_shift; 7701 rci->unit[cai->rci_index] = RAPL_UNIT_JOULES; 7702 rci->scale[cai->rci_index] = *cai->platform_rapl_msr_scale * cai->compat_scale; 7703 rci->flags[cai->rci_index] = cai->flags; 7704 } 7705 } 7706 7707 if (rci->source[cai->rci_index] != COUNTER_SOURCE_NONE) 7708 has_counter = 1; 7709 } 7710 7711 /* If any CPU has access to the counter, make it present */ 7712 if (has_counter) 7713 BIC_PRESENT(cai->bic); 7714 } 7715 7716 free(domain_visited); 7717 } 7718 7719 /* Assumes msr_counter_info is populated */ 7720 static int has_amperf_access(void) 7721 { 7722 return msr_counter_arch_infos[MSR_ARCH_INFO_APERF_INDEX].present && 7723 msr_counter_arch_infos[MSR_ARCH_INFO_MPERF_INDEX].present; 7724 } 7725 7726 int *get_cstate_perf_group_fd(struct cstate_counter_info_t *cci, const char *group_name) 7727 { 7728 if (strcmp(group_name, "cstate_core") == 0) 7729 return &cci->fd_perf_core; 7730 7731 if (strcmp(group_name, "cstate_pkg") == 0) 7732 return &cci->fd_perf_pkg; 7733 7734 return NULL; 7735 } 7736 7737 int add_cstate_perf_counter_(int cpu, struct cstate_counter_info_t *cci, const struct cstate_counter_arch_info *cai) 7738 { 7739 if (no_perf) 7740 return -1; 7741 7742 int *pfd_group = get_cstate_perf_group_fd(cci, cai->perf_subsys); 7743 7744 if (pfd_group == NULL) 7745 return -1; 7746 7747 const unsigned int type = read_perf_type(cai->perf_subsys); 7748 const unsigned int config = read_perf_config(cai->perf_subsys, cai->perf_name); 7749 7750 const int fd_counter = open_perf_counter(cpu, type, config, *pfd_group, PERF_FORMAT_GROUP); 7751 7752 if (fd_counter == -1) 7753 return -1; 7754 7755 /* If it's the first counter opened, make it a group descriptor */ 7756 if (*pfd_group == -1) 7757 *pfd_group = fd_counter; 7758 7759 return fd_counter; 7760 } 7761 7762 int add_cstate_perf_counter(int cpu, struct cstate_counter_info_t *cci, const struct cstate_counter_arch_info *cai) 7763 { 7764 int ret = add_cstate_perf_counter_(cpu, cci, cai); 7765 7766 if (debug >= 2) 7767 fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu); 7768 7769 return ret; 7770 } 7771 7772 int add_msr_perf_counter_(int cpu, struct msr_counter_info_t *cci, const struct msr_counter_arch_info *cai) 7773 { 7774 if (no_perf) 7775 return -1; 7776 7777 const unsigned int type = read_perf_type(cai->perf_subsys); 7778 const unsigned int config = read_perf_config(cai->perf_subsys, cai->perf_name); 7779 7780 const int fd_counter = open_perf_counter(cpu, type, config, cci->fd_perf, PERF_FORMAT_GROUP); 7781 7782 if (fd_counter == -1) 7783 return -1; 7784 7785 /* If it's the first counter opened, make it a group descriptor */ 7786 if (cci->fd_perf == -1) 7787 cci->fd_perf = fd_counter; 7788 7789 return fd_counter; 7790 } 7791 7792 int add_msr_perf_counter(int cpu, struct msr_counter_info_t *cci, const struct msr_counter_arch_info *cai) 7793 { 7794 int ret = add_msr_perf_counter_(cpu, cci, cai); 7795 7796 if (debug) 7797 fprintf(stderr, "%s: %s/%s: %d (cpu: %d)\n", __func__, cai->perf_subsys, cai->perf_name, ret, cpu); 7798 7799 return ret; 7800 } 7801 7802 void msr_perf_init_(void) 7803 { 7804 const int mci_num = topo.max_cpu_num + 1; 7805 7806 msr_counter_info = calloc(mci_num, sizeof(*msr_counter_info)); 7807 if (!msr_counter_info) 7808 err(1, "calloc msr_counter_info"); 7809 msr_counter_info_size = mci_num; 7810 7811 for (int cpu = 0; cpu < mci_num; ++cpu) 7812 msr_counter_info[cpu].fd_perf = -1; 7813 7814 for (int cidx = 0; cidx < NUM_MSR_COUNTERS; ++cidx) { 7815 7816 struct msr_counter_arch_info *cai = &msr_counter_arch_infos[cidx]; 7817 7818 cai->present = false; 7819 7820 for (int cpu = 0; cpu < mci_num; ++cpu) { 7821 7822 struct msr_counter_info_t *const cci = &msr_counter_info[cpu]; 7823 7824 if (cpu_is_not_allowed(cpu)) 7825 continue; 7826 7827 if (cai->needed) { 7828 /* Use perf API for this counter */ 7829 if (!no_perf && cai->perf_name && add_msr_perf_counter(cpu, cci, cai) != -1) { 7830 cci->source[cai->rci_index] = COUNTER_SOURCE_PERF; 7831 cai->present = true; 7832 7833 /* User MSR for this counter */ 7834 } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) { 7835 cci->source[cai->rci_index] = COUNTER_SOURCE_MSR; 7836 cci->msr[cai->rci_index] = cai->msr; 7837 cci->msr_mask[cai->rci_index] = cai->msr_mask; 7838 cai->present = true; 7839 } 7840 } 7841 } 7842 } 7843 } 7844 7845 /* Initialize data for reading perf counters from the MSR group. */ 7846 void msr_perf_init(void) 7847 { 7848 bool need_amperf = false, need_smi = false; 7849 const bool need_soft_c1 = (!platform->has_msr_core_c1_res) && (platform->supported_cstates & CC1); 7850 7851 need_amperf = BIC_IS_ENABLED(BIC_Avg_MHz) || BIC_IS_ENABLED(BIC_Busy) || BIC_IS_ENABLED(BIC_Bzy_MHz) 7852 || BIC_IS_ENABLED(BIC_IPC) || need_soft_c1; 7853 7854 if (BIC_IS_ENABLED(BIC_SMI)) 7855 need_smi = true; 7856 7857 /* Enable needed counters */ 7858 msr_counter_arch_infos[MSR_ARCH_INFO_APERF_INDEX].needed = need_amperf; 7859 msr_counter_arch_infos[MSR_ARCH_INFO_MPERF_INDEX].needed = need_amperf; 7860 msr_counter_arch_infos[MSR_ARCH_INFO_SMI_INDEX].needed = need_smi; 7861 7862 msr_perf_init_(); 7863 7864 const bool has_amperf = has_amperf_access(); 7865 const bool has_smi = msr_counter_arch_infos[MSR_ARCH_INFO_SMI_INDEX].present; 7866 7867 has_aperf_access = has_amperf; 7868 7869 if (has_amperf) { 7870 BIC_PRESENT(BIC_Avg_MHz); 7871 BIC_PRESENT(BIC_Busy); 7872 BIC_PRESENT(BIC_Bzy_MHz); 7873 BIC_PRESENT(BIC_SMI); 7874 } 7875 7876 if (has_smi) 7877 BIC_PRESENT(BIC_SMI); 7878 } 7879 7880 void cstate_perf_init_(bool soft_c1) 7881 { 7882 bool has_counter; 7883 bool *cores_visited = NULL, *pkg_visited = NULL; 7884 const int cores_visited_elems = topo.max_core_id + 1; 7885 const int pkg_visited_elems = topo.max_package_id + 1; 7886 const int cci_num = topo.max_cpu_num + 1; 7887 7888 ccstate_counter_info = calloc(cci_num, sizeof(*ccstate_counter_info)); 7889 if (!ccstate_counter_info) 7890 err(1, "calloc ccstate_counter_arch_info"); 7891 ccstate_counter_info_size = cci_num; 7892 7893 cores_visited = calloc(cores_visited_elems, sizeof(*cores_visited)); 7894 if (!cores_visited) 7895 err(1, "calloc cores_visited"); 7896 7897 pkg_visited = calloc(pkg_visited_elems, sizeof(*pkg_visited)); 7898 if (!pkg_visited) 7899 err(1, "calloc pkg_visited"); 7900 7901 /* Initialize cstate_counter_info_percpu */ 7902 for (int cpu = 0; cpu < cci_num; ++cpu) { 7903 ccstate_counter_info[cpu].fd_perf_core = -1; 7904 ccstate_counter_info[cpu].fd_perf_pkg = -1; 7905 } 7906 7907 for (int cidx = 0; cidx < NUM_CSTATE_COUNTERS; ++cidx) { 7908 has_counter = false; 7909 memset(cores_visited, 0, cores_visited_elems * sizeof(*cores_visited)); 7910 memset(pkg_visited, 0, pkg_visited_elems * sizeof(*pkg_visited)); 7911 7912 const struct cstate_counter_arch_info *cai = &ccstate_counter_arch_infos[cidx]; 7913 7914 for (int cpu = 0; cpu < cci_num; ++cpu) { 7915 7916 struct cstate_counter_info_t *const cci = &ccstate_counter_info[cpu]; 7917 7918 if (cpu_is_not_allowed(cpu)) 7919 continue; 7920 7921 const int core_id = cpus[cpu].physical_core_id; 7922 const int pkg_id = cpus[cpu].physical_package_id; 7923 7924 assert(core_id < cores_visited_elems); 7925 assert(pkg_id < pkg_visited_elems); 7926 7927 const bool per_thread = cai->flags & CSTATE_COUNTER_FLAG_COLLECT_PER_THREAD; 7928 const bool per_core = cai->flags & CSTATE_COUNTER_FLAG_COLLECT_PER_CORE; 7929 7930 if (!per_thread && cores_visited[core_id]) 7931 continue; 7932 7933 if (!per_core && pkg_visited[pkg_id]) 7934 continue; 7935 7936 const bool counter_needed = BIC_IS_ENABLED(cai->bic) || 7937 (soft_c1 && (cai->flags & CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY)); 7938 const bool counter_supported = (platform->supported_cstates & cai->feature_mask); 7939 7940 if (counter_needed && counter_supported) { 7941 /* Use perf API for this counter */ 7942 if (!no_perf && cai->perf_name && add_cstate_perf_counter(cpu, cci, cai) != -1) { 7943 7944 cci->source[cai->rci_index] = COUNTER_SOURCE_PERF; 7945 7946 /* User MSR for this counter */ 7947 } else if (!no_msr && cai->msr && pkg_cstate_limit >= cai->pkg_cstate_limit 7948 && probe_msr(cpu, cai->msr) == 0) { 7949 cci->source[cai->rci_index] = COUNTER_SOURCE_MSR; 7950 cci->msr[cai->rci_index] = cai->msr; 7951 } 7952 } 7953 7954 if (cci->source[cai->rci_index] != COUNTER_SOURCE_NONE) { 7955 has_counter = true; 7956 cores_visited[core_id] = true; 7957 pkg_visited[pkg_id] = true; 7958 } 7959 } 7960 7961 /* If any CPU has access to the counter, make it present */ 7962 if (has_counter) 7963 BIC_PRESENT(cai->bic); 7964 } 7965 7966 free(cores_visited); 7967 free(pkg_visited); 7968 } 7969 7970 void cstate_perf_init(void) 7971 { 7972 /* 7973 * If we don't have a C1 residency MSR, we calculate it "in software", 7974 * but we need APERF, MPERF too. 7975 */ 7976 const bool soft_c1 = !platform->has_msr_core_c1_res && has_amperf_access() 7977 && platform->supported_cstates & CC1; 7978 7979 if (soft_c1) 7980 BIC_PRESENT(BIC_CPU_c1); 7981 7982 cstate_perf_init_(soft_c1); 7983 } 7984 7985 void probe_cstates(void) 7986 { 7987 probe_cst_limit(); 7988 7989 if (platform->has_msr_module_c6_res_ms) 7990 BIC_PRESENT(BIC_Mod_c6); 7991 7992 if (platform->has_ext_cst_msrs && !no_msr) { 7993 BIC_PRESENT(BIC_Totl_c0); 7994 BIC_PRESENT(BIC_Any_c0); 7995 BIC_PRESENT(BIC_GFX_c0); 7996 BIC_PRESENT(BIC_CPUGFX); 7997 } 7998 7999 if (quiet) 8000 return; 8001 8002 dump_power_ctl(); 8003 dump_cst_cfg(); 8004 decode_c6_demotion_policy_msr(); 8005 print_dev_latency(); 8006 dump_sysfs_cstate_config(); 8007 print_irtl(); 8008 } 8009 8010 void probe_lpi(void) 8011 { 8012 if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK)) 8013 BIC_PRESENT(BIC_CPU_LPI); 8014 else 8015 BIC_NOT_PRESENT(BIC_CPU_LPI); 8016 8017 if (!access(sys_lpi_file_sysfs, R_OK)) { 8018 sys_lpi_file = sys_lpi_file_sysfs; 8019 BIC_PRESENT(BIC_SYS_LPI); 8020 } else if (!access(sys_lpi_file_debugfs, R_OK)) { 8021 sys_lpi_file = sys_lpi_file_debugfs; 8022 BIC_PRESENT(BIC_SYS_LPI); 8023 } else { 8024 sys_lpi_file_sysfs = NULL; 8025 BIC_NOT_PRESENT(BIC_SYS_LPI); 8026 } 8027 8028 } 8029 8030 void probe_pstates(void) 8031 { 8032 probe_bclk(); 8033 8034 if (quiet) 8035 return; 8036 8037 dump_platform_info(); 8038 dump_turbo_ratio_info(); 8039 dump_sysfs_pstate_config(); 8040 decode_misc_pwr_mgmt_msr(); 8041 8042 for_all_cpus(print_hwp, ODD_COUNTERS); 8043 for_all_cpus(print_epb, ODD_COUNTERS); 8044 for_all_cpus(print_perf_limit, ODD_COUNTERS); 8045 } 8046 8047 void process_cpuid() 8048 { 8049 unsigned int eax, ebx, ecx, edx; 8050 unsigned int fms, family, model, stepping, ecx_flags, edx_flags; 8051 unsigned long long ucode_patch = 0; 8052 bool ucode_patch_valid = false; 8053 8054 eax = ebx = ecx = edx = 0; 8055 8056 __cpuid(0, max_level, ebx, ecx, edx); 8057 8058 if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) 8059 genuine_intel = 1; 8060 else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) 8061 authentic_amd = 1; 8062 else if (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e) 8063 hygon_genuine = 1; 8064 8065 if (!quiet) 8066 fprintf(outf, "CPUID(0): %.4s%.4s%.4s 0x%x CPUID levels\n", 8067 (char *)&ebx, (char *)&edx, (char *)&ecx, max_level); 8068 8069 __cpuid(1, fms, ebx, ecx, edx); 8070 family = (fms >> 8) & 0xf; 8071 model = (fms >> 4) & 0xf; 8072 stepping = fms & 0xf; 8073 if (family == 0xf) 8074 family += (fms >> 20) & 0xff; 8075 if (family >= 6) 8076 model += ((fms >> 16) & 0xf) << 4; 8077 ecx_flags = ecx; 8078 edx_flags = edx; 8079 8080 if (!no_msr) { 8081 if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch)) 8082 warnx("get_msr(UCODE)"); 8083 else 8084 ucode_patch_valid = true; 8085 } 8086 8087 /* 8088 * check max extended function levels of CPUID. 8089 * This is needed to check for invariant TSC. 8090 * This check is valid for both Intel and AMD. 8091 */ 8092 ebx = ecx = edx = 0; 8093 __cpuid(0x80000000, max_extended_level, ebx, ecx, edx); 8094 8095 if (!quiet) { 8096 fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d)", 8097 family, model, stepping, family, model, stepping); 8098 if (ucode_patch_valid) 8099 fprintf(outf, " microcode 0x%x", (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF)); 8100 fputc('\n', outf); 8101 8102 fprintf(outf, "CPUID(0x80000000): max_extended_levels: 0x%x\n", max_extended_level); 8103 fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n", 8104 ecx_flags & (1 << 0) ? "SSE3" : "-", 8105 ecx_flags & (1 << 3) ? "MONITOR" : "-", 8106 ecx_flags & (1 << 6) ? "SMX" : "-", 8107 ecx_flags & (1 << 7) ? "EIST" : "-", 8108 ecx_flags & (1 << 8) ? "TM2" : "-", 8109 edx_flags & (1 << 4) ? "TSC" : "-", 8110 edx_flags & (1 << 5) ? "MSR" : "-", 8111 edx_flags & (1 << 22) ? "ACPI-TM" : "-", 8112 edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-"); 8113 } 8114 8115 probe_platform_features(family, model); 8116 8117 if (!(edx_flags & (1 << 5))) 8118 errx(1, "CPUID: no MSR"); 8119 8120 if (max_extended_level >= 0x80000007) { 8121 8122 /* 8123 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 8124 * this check is valid for both Intel and AMD 8125 */ 8126 __cpuid(0x80000007, eax, ebx, ecx, edx); 8127 has_invariant_tsc = edx & (1 << 8); 8128 } 8129 8130 /* 8131 * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0 8132 * this check is valid for both Intel and AMD 8133 */ 8134 8135 __cpuid(0x6, eax, ebx, ecx, edx); 8136 has_aperf = ecx & (1 << 0); 8137 do_dts = eax & (1 << 0); 8138 if (do_dts) 8139 BIC_PRESENT(BIC_CoreTmp); 8140 has_turbo = eax & (1 << 1); 8141 do_ptm = eax & (1 << 6); 8142 if (do_ptm) 8143 BIC_PRESENT(BIC_PkgTmp); 8144 has_hwp = eax & (1 << 7); 8145 has_hwp_notify = eax & (1 << 8); 8146 has_hwp_activity_window = eax & (1 << 9); 8147 has_hwp_epp = eax & (1 << 10); 8148 has_hwp_pkg = eax & (1 << 11); 8149 has_epb = ecx & (1 << 3); 8150 8151 if (!quiet) 8152 fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, " 8153 "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n", 8154 has_aperf ? "" : "No-", 8155 has_turbo ? "" : "No-", 8156 do_dts ? "" : "No-", 8157 do_ptm ? "" : "No-", 8158 has_hwp ? "" : "No-", 8159 has_hwp_notify ? "" : "No-", 8160 has_hwp_activity_window ? "" : "No-", 8161 has_hwp_epp ? "" : "No-", has_hwp_pkg ? "" : "No-", has_epb ? "" : "No-"); 8162 8163 if (!quiet) 8164 decode_misc_enable_msr(); 8165 8166 if (max_level >= 0x7 && !quiet) { 8167 int has_sgx; 8168 8169 ecx = 0; 8170 8171 __cpuid_count(0x7, 0, eax, ebx, ecx, edx); 8172 8173 has_sgx = ebx & (1 << 2); 8174 8175 is_hybrid = edx & (1 << 15); 8176 8177 fprintf(outf, "CPUID(7): %sSGX %sHybrid\n", has_sgx ? "" : "No-", is_hybrid ? "" : "No-"); 8178 8179 if (has_sgx) 8180 decode_feature_control_msr(); 8181 } 8182 8183 if (max_level >= 0x15) { 8184 unsigned int eax_crystal; 8185 unsigned int ebx_tsc; 8186 8187 /* 8188 * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz 8189 */ 8190 eax_crystal = ebx_tsc = crystal_hz = edx = 0; 8191 __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx); 8192 8193 if (ebx_tsc != 0) { 8194 if (!quiet && (ebx != 0)) 8195 fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", 8196 eax_crystal, ebx_tsc, crystal_hz); 8197 8198 if (crystal_hz == 0) 8199 crystal_hz = platform->crystal_freq; 8200 8201 if (crystal_hz) { 8202 tsc_hz = (unsigned long long)crystal_hz *ebx_tsc / eax_crystal; 8203 if (!quiet) 8204 fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n", 8205 tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal); 8206 } 8207 } 8208 } 8209 if (max_level >= 0x16) { 8210 unsigned int base_mhz, max_mhz, bus_mhz, edx; 8211 8212 /* 8213 * CPUID 16H Base MHz, Max MHz, Bus MHz 8214 */ 8215 base_mhz = max_mhz = bus_mhz = edx = 0; 8216 8217 __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx); 8218 8219 bclk = bus_mhz; 8220 8221 base_hz = base_mhz * 1000000; 8222 has_base_hz = 1; 8223 8224 if (platform->enable_tsc_tweak) 8225 tsc_tweak = base_hz / tsc_hz; 8226 8227 if (!quiet) 8228 fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n", 8229 base_mhz, max_mhz, bus_mhz); 8230 } 8231 8232 if (has_aperf) 8233 aperf_mperf_multiplier = platform->need_perf_multiplier ? 1024 : 1; 8234 8235 BIC_PRESENT(BIC_IRQ); 8236 BIC_PRESENT(BIC_TSC_MHz); 8237 } 8238 8239 static void counter_info_init(void) 8240 { 8241 for (int i = 0; i < NUM_CSTATE_COUNTERS; ++i) { 8242 struct cstate_counter_arch_info *const cai = &ccstate_counter_arch_infos[i]; 8243 8244 if (platform->has_msr_knl_core_c6_residency && cai->msr == MSR_CORE_C6_RESIDENCY) 8245 cai->msr = MSR_KNL_CORE_C6_RESIDENCY; 8246 8247 if (!platform->has_msr_core_c1_res && cai->msr == MSR_CORE_C1_RES) 8248 cai->msr = 0; 8249 8250 if (platform->has_msr_atom_pkg_c6_residency && cai->msr == MSR_PKG_C6_RESIDENCY) 8251 cai->msr = MSR_ATOM_PKG_C6_RESIDENCY; 8252 } 8253 8254 for (int i = 0; i < NUM_MSR_COUNTERS; ++i) { 8255 msr_counter_arch_infos[i].present = false; 8256 msr_counter_arch_infos[i].needed = false; 8257 } 8258 } 8259 8260 void probe_pm_features(void) 8261 { 8262 probe_pstates(); 8263 8264 probe_cstates(); 8265 8266 probe_lpi(); 8267 8268 probe_intel_uncore_frequency(); 8269 8270 probe_graphics(); 8271 8272 probe_rapl(); 8273 8274 probe_thermal(); 8275 8276 if (platform->has_nhm_msrs && !no_msr) 8277 BIC_PRESENT(BIC_SMI); 8278 8279 if (!quiet) 8280 decode_misc_feature_control(); 8281 } 8282 8283 /* 8284 * in /dev/cpu/ return success for names that are numbers 8285 * ie. filter out ".", "..", "microcode". 8286 */ 8287 int dir_filter(const struct dirent *dirp) 8288 { 8289 if (isdigit(dirp->d_name[0])) 8290 return 1; 8291 else 8292 return 0; 8293 } 8294 8295 void topology_probe(bool startup) 8296 { 8297 int i; 8298 int max_core_id = 0; 8299 int max_package_id = 0; 8300 int max_siblings = 0; 8301 8302 /* Initialize num_cpus, max_cpu_num */ 8303 set_max_cpu_num(); 8304 topo.num_cpus = 0; 8305 for_all_proc_cpus(count_cpus); 8306 if (!summary_only) 8307 BIC_PRESENT(BIC_CPU); 8308 8309 if (debug > 1) 8310 fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); 8311 8312 cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology)); 8313 if (cpus == NULL) 8314 err(1, "calloc cpus"); 8315 8316 /* 8317 * Allocate and initialize cpu_present_set 8318 */ 8319 cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1)); 8320 if (cpu_present_set == NULL) 8321 err(3, "CPU_ALLOC"); 8322 cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 8323 CPU_ZERO_S(cpu_present_setsize, cpu_present_set); 8324 for_all_proc_cpus(mark_cpu_present); 8325 8326 /* 8327 * Allocate and initialize cpu_effective_set 8328 */ 8329 cpu_effective_set = CPU_ALLOC((topo.max_cpu_num + 1)); 8330 if (cpu_effective_set == NULL) 8331 err(3, "CPU_ALLOC"); 8332 cpu_effective_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 8333 CPU_ZERO_S(cpu_effective_setsize, cpu_effective_set); 8334 update_effective_set(startup); 8335 8336 /* 8337 * Allocate and initialize cpu_allowed_set 8338 */ 8339 cpu_allowed_set = CPU_ALLOC((topo.max_cpu_num + 1)); 8340 if (cpu_allowed_set == NULL) 8341 err(3, "CPU_ALLOC"); 8342 cpu_allowed_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 8343 CPU_ZERO_S(cpu_allowed_setsize, cpu_allowed_set); 8344 8345 /* 8346 * Validate and update cpu_allowed_set. 8347 * 8348 * Make sure all cpus in cpu_subset are also in cpu_present_set during startup. 8349 * Give a warning when cpus in cpu_subset become unavailable at runtime. 8350 * Give a warning when cpus are not effective because of cgroup setting. 8351 * 8352 * cpu_allowed_set is the intersection of cpu_present_set/cpu_effective_set/cpu_subset. 8353 */ 8354 for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) { 8355 if (cpu_subset && !CPU_ISSET_S(i, cpu_subset_size, cpu_subset)) 8356 continue; 8357 8358 if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set)) { 8359 if (cpu_subset) { 8360 /* cpus in cpu_subset must be in cpu_present_set during startup */ 8361 if (startup) 8362 err(1, "cpu%d not present", i); 8363 else 8364 fprintf(stderr, "cpu%d not present\n", i); 8365 } 8366 continue; 8367 } 8368 8369 if (CPU_COUNT_S(cpu_effective_setsize, cpu_effective_set)) { 8370 if (!CPU_ISSET_S(i, cpu_effective_setsize, cpu_effective_set)) { 8371 fprintf(stderr, "cpu%d not effective\n", i); 8372 continue; 8373 } 8374 } 8375 8376 CPU_SET_S(i, cpu_allowed_setsize, cpu_allowed_set); 8377 } 8378 8379 if (!CPU_COUNT_S(cpu_allowed_setsize, cpu_allowed_set)) 8380 err(-ENODEV, "No valid cpus found"); 8381 sched_setaffinity(0, cpu_allowed_setsize, cpu_allowed_set); 8382 8383 /* 8384 * Allocate and initialize cpu_affinity_set 8385 */ 8386 cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1)); 8387 if (cpu_affinity_set == NULL) 8388 err(3, "CPU_ALLOC"); 8389 cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 8390 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); 8391 8392 for_all_proc_cpus(init_thread_id); 8393 8394 for_all_proc_cpus(set_cpu_hybrid_type); 8395 8396 /* 8397 * For online cpus 8398 * find max_core_id, max_package_id 8399 */ 8400 for (i = 0; i <= topo.max_cpu_num; ++i) { 8401 int siblings; 8402 8403 if (cpu_is_not_present(i)) { 8404 if (debug > 1) 8405 fprintf(outf, "cpu%d NOT PRESENT\n", i); 8406 continue; 8407 } 8408 8409 cpus[i].logical_cpu_id = i; 8410 8411 /* get package information */ 8412 cpus[i].physical_package_id = get_physical_package_id(i); 8413 if (cpus[i].physical_package_id > max_package_id) 8414 max_package_id = cpus[i].physical_package_id; 8415 8416 /* get die information */ 8417 cpus[i].die_id = get_die_id(i); 8418 if (cpus[i].die_id > topo.max_die_id) 8419 topo.max_die_id = cpus[i].die_id; 8420 8421 /* get numa node information */ 8422 cpus[i].physical_node_id = get_physical_node_id(&cpus[i]); 8423 if (cpus[i].physical_node_id > topo.max_node_num) 8424 topo.max_node_num = cpus[i].physical_node_id; 8425 8426 /* get core information */ 8427 cpus[i].physical_core_id = get_core_id(i); 8428 if (cpus[i].physical_core_id > max_core_id) 8429 max_core_id = cpus[i].physical_core_id; 8430 8431 /* get thread information */ 8432 siblings = get_thread_siblings(&cpus[i]); 8433 if (siblings > max_siblings) 8434 max_siblings = siblings; 8435 if (cpus[i].thread_id == 0) 8436 topo.num_cores++; 8437 } 8438 topo.max_core_id = max_core_id; 8439 topo.max_package_id = max_package_id; 8440 8441 topo.cores_per_node = max_core_id + 1; 8442 if (debug > 1) 8443 fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", max_core_id, topo.cores_per_node); 8444 if (!summary_only) 8445 BIC_PRESENT(BIC_Core); 8446 8447 topo.num_die = topo.max_die_id + 1; 8448 if (debug > 1) 8449 fprintf(outf, "max_die_id %d, sizing for %d die\n", topo.max_die_id, topo.num_die); 8450 if (!summary_only && topo.num_die > 1) 8451 BIC_PRESENT(BIC_Die); 8452 8453 topo.num_packages = max_package_id + 1; 8454 if (debug > 1) 8455 fprintf(outf, "max_package_id %d, sizing for %d packages\n", max_package_id, topo.num_packages); 8456 if (!summary_only && topo.num_packages > 1) 8457 BIC_PRESENT(BIC_Package); 8458 8459 set_node_data(); 8460 if (debug > 1) 8461 fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg); 8462 if (!summary_only && topo.nodes_per_pkg > 1) 8463 BIC_PRESENT(BIC_Node); 8464 8465 topo.threads_per_core = max_siblings; 8466 if (debug > 1) 8467 fprintf(outf, "max_siblings %d\n", max_siblings); 8468 8469 if (debug < 1) 8470 return; 8471 8472 for (i = 0; i <= topo.max_cpu_num; ++i) { 8473 if (cpu_is_not_present(i)) 8474 continue; 8475 fprintf(outf, 8476 "cpu %d pkg %d die %d node %d lnode %d core %d thread %d\n", 8477 i, cpus[i].physical_package_id, cpus[i].die_id, 8478 cpus[i].physical_node_id, cpus[i].logical_node_id, cpus[i].physical_core_id, cpus[i].thread_id); 8479 } 8480 8481 } 8482 8483 void allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p) 8484 { 8485 int i; 8486 int num_cores = topo.cores_per_node * topo.nodes_per_pkg * topo.num_packages; 8487 int num_threads = topo.threads_per_core * num_cores; 8488 8489 *t = calloc(num_threads, sizeof(struct thread_data)); 8490 if (*t == NULL) 8491 goto error; 8492 8493 for (i = 0; i < num_threads; i++) 8494 (*t)[i].cpu_id = -1; 8495 8496 *c = calloc(num_cores, sizeof(struct core_data)); 8497 if (*c == NULL) 8498 goto error; 8499 8500 for (i = 0; i < num_cores; i++) { 8501 (*c)[i].core_id = -1; 8502 (*c)[i].base_cpu = -1; 8503 } 8504 8505 *p = calloc(topo.num_packages, sizeof(struct pkg_data)); 8506 if (*p == NULL) 8507 goto error; 8508 8509 for (i = 0; i < topo.num_packages; i++) { 8510 (*p)[i].package_id = i; 8511 (*p)[i].base_cpu = -1; 8512 } 8513 8514 return; 8515 error: 8516 err(1, "calloc counters"); 8517 } 8518 8519 /* 8520 * init_counter() 8521 * 8522 * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE 8523 */ 8524 void init_counter(struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base, int cpu_id) 8525 { 8526 int pkg_id = cpus[cpu_id].physical_package_id; 8527 int node_id = cpus[cpu_id].logical_node_id; 8528 int core_id = cpus[cpu_id].physical_core_id; 8529 int thread_id = cpus[cpu_id].thread_id; 8530 struct thread_data *t; 8531 struct core_data *c; 8532 struct pkg_data *p; 8533 8534 /* Workaround for systems where physical_node_id==-1 8535 * and logical_node_id==(-1 - topo.num_cpus) 8536 */ 8537 if (node_id < 0) 8538 node_id = 0; 8539 8540 t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id); 8541 c = GET_CORE(core_base, core_id, node_id, pkg_id); 8542 p = GET_PKG(pkg_base, pkg_id); 8543 8544 t->cpu_id = cpu_id; 8545 if (!cpu_is_not_allowed(cpu_id)) { 8546 if (c->base_cpu < 0) 8547 c->base_cpu = t->cpu_id; 8548 if (p->base_cpu < 0) 8549 p->base_cpu = t->cpu_id; 8550 } 8551 8552 c->core_id = core_id; 8553 p->package_id = pkg_id; 8554 } 8555 8556 int initialize_counters(int cpu_id) 8557 { 8558 init_counter(EVEN_COUNTERS, cpu_id); 8559 init_counter(ODD_COUNTERS, cpu_id); 8560 return 0; 8561 } 8562 8563 void allocate_output_buffer() 8564 { 8565 output_buffer = calloc(1, (1 + topo.num_cpus) * 2048); 8566 outp = output_buffer; 8567 if (outp == NULL) 8568 err(-1, "calloc output buffer"); 8569 } 8570 8571 void allocate_fd_percpu(void) 8572 { 8573 fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int)); 8574 if (fd_percpu == NULL) 8575 err(-1, "calloc fd_percpu"); 8576 } 8577 8578 void allocate_irq_buffers(void) 8579 { 8580 irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int)); 8581 if (irq_column_2_cpu == NULL) 8582 err(-1, "calloc %d", topo.num_cpus); 8583 8584 irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int)); 8585 if (irqs_per_cpu == NULL) 8586 err(-1, "calloc %d", topo.max_cpu_num + 1); 8587 } 8588 8589 int update_topo(struct thread_data *t, struct core_data *c, struct pkg_data *p) 8590 { 8591 topo.allowed_cpus++; 8592 if ((int)t->cpu_id == c->base_cpu) 8593 topo.allowed_cores++; 8594 if ((int)t->cpu_id == p->base_cpu) 8595 topo.allowed_packages++; 8596 8597 return 0; 8598 } 8599 8600 void topology_update(void) 8601 { 8602 topo.allowed_cpus = 0; 8603 topo.allowed_cores = 0; 8604 topo.allowed_packages = 0; 8605 for_all_cpus(update_topo, ODD_COUNTERS); 8606 } 8607 8608 void setup_all_buffers(bool startup) 8609 { 8610 topology_probe(startup); 8611 allocate_irq_buffers(); 8612 allocate_fd_percpu(); 8613 allocate_counters(&thread_even, &core_even, &package_even); 8614 allocate_counters(&thread_odd, &core_odd, &package_odd); 8615 allocate_output_buffer(); 8616 for_all_proc_cpus(initialize_counters); 8617 topology_update(); 8618 } 8619 8620 void set_base_cpu(void) 8621 { 8622 int i; 8623 8624 for (i = 0; i < topo.max_cpu_num + 1; ++i) { 8625 if (cpu_is_not_allowed(i)) 8626 continue; 8627 base_cpu = i; 8628 if (debug > 1) 8629 fprintf(outf, "base_cpu = %d\n", base_cpu); 8630 return; 8631 } 8632 err(-ENODEV, "No valid cpus found"); 8633 } 8634 8635 bool has_added_counters(void) 8636 { 8637 /* 8638 * It only makes sense to call this after the command line is parsed, 8639 * otherwise sys structure is not populated. 8640 */ 8641 8642 return sys.added_core_counters | sys.added_thread_counters | sys.added_package_counters; 8643 } 8644 8645 void check_msr_access(void) 8646 { 8647 check_dev_msr(); 8648 check_msr_permission(); 8649 8650 if (no_msr) 8651 bic_disable_msr_access(); 8652 } 8653 8654 void check_perf_access(void) 8655 { 8656 if (no_perf || !BIC_IS_ENABLED(BIC_IPC) || !has_instr_count_access()) 8657 bic_enabled &= ~BIC_IPC; 8658 } 8659 8660 bool perf_has_hybrid_devices(void) 8661 { 8662 /* 8663 * 0: unknown 8664 * 1: has separate perf device for p and e core 8665 * -1: doesn't have separate perf device for p and e core 8666 */ 8667 static int cached; 8668 8669 if (cached > 0) 8670 return true; 8671 8672 if (cached < 0) 8673 return false; 8674 8675 if (access("/sys/bus/event_source/devices/cpu_core", F_OK)) { 8676 cached = -1; 8677 return false; 8678 } 8679 8680 if (access("/sys/bus/event_source/devices/cpu_atom", F_OK)) { 8681 cached = -1; 8682 return false; 8683 } 8684 8685 cached = 1; 8686 return true; 8687 } 8688 8689 int added_perf_counters_init_(struct perf_counter_info *pinfo) 8690 { 8691 size_t num_domains = 0; 8692 unsigned int next_domain; 8693 bool *domain_visited; 8694 unsigned int perf_type, perf_config; 8695 double perf_scale; 8696 int fd_perf; 8697 8698 if (!pinfo) 8699 return 0; 8700 8701 const size_t max_num_domains = MAX(topo.max_cpu_num + 1, MAX(topo.max_core_id + 1, topo.max_package_id + 1)); 8702 8703 domain_visited = calloc(max_num_domains, sizeof(*domain_visited)); 8704 8705 while (pinfo) { 8706 switch (pinfo->scope) { 8707 case SCOPE_CPU: 8708 num_domains = topo.max_cpu_num + 1; 8709 break; 8710 8711 case SCOPE_CORE: 8712 num_domains = topo.max_core_id + 1; 8713 break; 8714 8715 case SCOPE_PACKAGE: 8716 num_domains = topo.max_package_id + 1; 8717 break; 8718 } 8719 8720 /* Allocate buffer for file descriptor for each domain. */ 8721 pinfo->fd_perf_per_domain = calloc(num_domains, sizeof(*pinfo->fd_perf_per_domain)); 8722 if (!pinfo->fd_perf_per_domain) 8723 errx(1, "%s: alloc %s", __func__, "fd_perf_per_domain"); 8724 8725 for (size_t i = 0; i < num_domains; ++i) 8726 pinfo->fd_perf_per_domain[i] = -1; 8727 8728 pinfo->num_domains = num_domains; 8729 pinfo->scale = 1.0; 8730 8731 memset(domain_visited, 0, max_num_domains * sizeof(*domain_visited)); 8732 8733 for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) { 8734 8735 next_domain = cpu_to_domain(pinfo, cpu); 8736 8737 assert(next_domain < num_domains); 8738 8739 if (cpu_is_not_allowed(cpu)) 8740 continue; 8741 8742 if (domain_visited[next_domain]) 8743 continue; 8744 8745 /* 8746 * Intel hybrid platforms expose different perf devices for P and E cores. 8747 * Instead of one, "/sys/bus/event_source/devices/cpu" device, there are 8748 * "/sys/bus/event_source/devices/{cpu_core,cpu_atom}". 8749 * 8750 * This makes it more complicated to the user, because most of the counters 8751 * are available on both and have to be handled manually, otherwise. 8752 * 8753 * Code below, allow user to use the old "cpu" name, which is translated accordingly. 8754 */ 8755 const char *perf_device = pinfo->device; 8756 8757 if (strcmp(perf_device, "cpu") == 0 && perf_has_hybrid_devices()) { 8758 switch (cpus[cpu].type) { 8759 case INTEL_PCORE_TYPE: 8760 perf_device = "cpu_core"; 8761 break; 8762 8763 case INTEL_ECORE_TYPE: 8764 perf_device = "cpu_atom"; 8765 break; 8766 8767 default: /* Don't change, we will probably fail and report a problem soon. */ 8768 break; 8769 } 8770 } 8771 8772 perf_type = read_perf_type(perf_device); 8773 if (perf_type == (unsigned int)-1) { 8774 warnx("%s: perf/%s/%s: failed to read %s", 8775 __func__, perf_device, pinfo->event, "type"); 8776 continue; 8777 } 8778 8779 perf_config = read_perf_config(perf_device, pinfo->event); 8780 if (perf_config == (unsigned int)-1) { 8781 warnx("%s: perf/%s/%s: failed to read %s", 8782 __func__, perf_device, pinfo->event, "config"); 8783 continue; 8784 } 8785 8786 /* Scale is not required, some counters just don't have it. */ 8787 perf_scale = read_perf_scale(perf_device, pinfo->event); 8788 if (perf_scale == 0.0) 8789 perf_scale = 1.0; 8790 8791 fd_perf = open_perf_counter(cpu, perf_type, perf_config, -1, 0); 8792 if (fd_perf == -1) { 8793 warnx("%s: perf/%s/%s: failed to open counter on cpu%d", 8794 __func__, perf_device, pinfo->event, cpu); 8795 continue; 8796 } 8797 8798 domain_visited[next_domain] = 1; 8799 pinfo->fd_perf_per_domain[next_domain] = fd_perf; 8800 pinfo->scale = perf_scale; 8801 8802 if (debug) 8803 fprintf(stderr, "Add perf/%s/%s cpu%d: %d\n", 8804 perf_device, pinfo->event, cpu, pinfo->fd_perf_per_domain[next_domain]); 8805 } 8806 8807 pinfo = pinfo->next; 8808 } 8809 8810 free(domain_visited); 8811 8812 return 0; 8813 } 8814 8815 void added_perf_counters_init(void) 8816 { 8817 if (added_perf_counters_init_(sys.perf_tp)) 8818 errx(1, "%s: %s", __func__, "thread"); 8819 8820 if (added_perf_counters_init_(sys.perf_cp)) 8821 errx(1, "%s: %s", __func__, "core"); 8822 8823 if (added_perf_counters_init_(sys.perf_pp)) 8824 errx(1, "%s: %s", __func__, "package"); 8825 } 8826 8827 int parse_telem_info_file(int fd_dir, const char *info_filename, const char *format, unsigned long *output) 8828 { 8829 int fd_telem_info; 8830 FILE *file_telem_info; 8831 unsigned long value; 8832 8833 fd_telem_info = openat(fd_dir, info_filename, O_RDONLY); 8834 if (fd_telem_info == -1) 8835 return -1; 8836 8837 file_telem_info = fdopen(fd_telem_info, "r"); 8838 if (file_telem_info == NULL) { 8839 close(fd_telem_info); 8840 return -1; 8841 } 8842 8843 if (fscanf(file_telem_info, format, &value) != 1) { 8844 fclose(file_telem_info); 8845 return -1; 8846 } 8847 8848 fclose(file_telem_info); 8849 8850 *output = value; 8851 8852 return 0; 8853 } 8854 8855 struct pmt_mmio *pmt_mmio_open(unsigned int target_guid) 8856 { 8857 DIR *dirp; 8858 struct dirent *entry; 8859 struct stat st; 8860 unsigned int telem_idx; 8861 int fd_telem_dir, fd_pmt; 8862 unsigned long guid, size, offset; 8863 size_t mmap_size; 8864 void *mmio; 8865 struct pmt_mmio *ret = NULL; 8866 8867 if (stat(SYSFS_TELEM_PATH, &st) == -1) 8868 return NULL; 8869 8870 dirp = opendir(SYSFS_TELEM_PATH); 8871 if (dirp == NULL) 8872 return NULL; 8873 8874 for (;;) { 8875 entry = readdir(dirp); 8876 8877 if (entry == NULL) 8878 break; 8879 8880 if (strcmp(entry->d_name, ".") == 0) 8881 continue; 8882 8883 if (strcmp(entry->d_name, "..") == 0) 8884 continue; 8885 8886 if (sscanf(entry->d_name, "telem%u", &telem_idx) != 1) 8887 continue; 8888 8889 if (fstatat(dirfd(dirp), entry->d_name, &st, 0) == -1) { 8890 break; 8891 } 8892 8893 if (!S_ISDIR(st.st_mode)) 8894 continue; 8895 8896 fd_telem_dir = openat(dirfd(dirp), entry->d_name, O_RDONLY); 8897 if (fd_telem_dir == -1) { 8898 break; 8899 } 8900 8901 if (parse_telem_info_file(fd_telem_dir, "guid", "%lx", &guid)) { 8902 close(fd_telem_dir); 8903 break; 8904 } 8905 8906 if (parse_telem_info_file(fd_telem_dir, "size", "%lu", &size)) { 8907 close(fd_telem_dir); 8908 break; 8909 } 8910 8911 if (guid != target_guid) { 8912 close(fd_telem_dir); 8913 continue; 8914 } 8915 8916 if (parse_telem_info_file(fd_telem_dir, "offset", "%lu", &offset)) { 8917 close(fd_telem_dir); 8918 break; 8919 } 8920 8921 assert(offset == 0); 8922 8923 fd_pmt = openat(fd_telem_dir, "telem", O_RDONLY); 8924 if (fd_pmt == -1) 8925 goto loop_cleanup_and_break; 8926 8927 mmap_size = (size + 0x1000UL) & (~0x1000UL); 8928 mmio = mmap(0, mmap_size, PROT_READ, MAP_SHARED, fd_pmt, 0); 8929 if (mmio != MAP_FAILED) { 8930 8931 if (debug) 8932 fprintf(stderr, "%s: 0x%lx mmaped at: %p\n", __func__, guid, mmio); 8933 8934 ret = calloc(1, sizeof(*ret)); 8935 8936 if (!ret) { 8937 fprintf(stderr, "%s: Failed to allocate pmt_mmio\n", __func__); 8938 exit(1); 8939 } 8940 8941 ret->guid = guid; 8942 ret->mmio_base = mmio; 8943 ret->pmt_offset = offset; 8944 ret->size = size; 8945 8946 ret->next = pmt_mmios; 8947 pmt_mmios = ret; 8948 } 8949 8950 loop_cleanup_and_break: 8951 close(fd_pmt); 8952 close(fd_telem_dir); 8953 break; 8954 } 8955 8956 closedir(dirp); 8957 8958 return ret; 8959 } 8960 8961 struct pmt_mmio *pmt_mmio_find(unsigned int guid) 8962 { 8963 struct pmt_mmio *pmmio = pmt_mmios; 8964 8965 while (pmmio) { 8966 if (pmmio->guid == guid) 8967 return pmmio; 8968 8969 pmmio = pmmio->next; 8970 } 8971 8972 return NULL; 8973 } 8974 8975 void *pmt_get_counter_pointer(struct pmt_mmio *pmmio, unsigned long counter_offset) 8976 { 8977 char *ret; 8978 8979 /* Get base of mmaped PMT file. */ 8980 ret = (char *)pmmio->mmio_base; 8981 8982 /* 8983 * Apply PMT MMIO offset to obtain beginning of the mmaped telemetry data. 8984 * It's not guaranteed that the mmaped memory begins with the telemetry data 8985 * - we might have to apply the offset first. 8986 */ 8987 ret += pmmio->pmt_offset; 8988 8989 /* Apply the counter offset to get the address to the mmaped counter. */ 8990 ret += counter_offset; 8991 8992 return ret; 8993 } 8994 8995 struct pmt_mmio *pmt_add_guid(unsigned int guid) 8996 { 8997 struct pmt_mmio *ret; 8998 8999 ret = pmt_mmio_find(guid); 9000 if (!ret) 9001 ret = pmt_mmio_open(guid); 9002 9003 return ret; 9004 } 9005 9006 enum pmt_open_mode { 9007 PMT_OPEN_TRY, /* Open failure is not an error. */ 9008 PMT_OPEN_REQUIRED, /* Open failure is a fatal error. */ 9009 }; 9010 9011 struct pmt_counter *pmt_find_counter(struct pmt_counter *pcounter, const char *name) 9012 { 9013 while (pcounter) { 9014 if (strcmp(pcounter->name, name) == 0) 9015 break; 9016 9017 pcounter = pcounter->next; 9018 } 9019 9020 return pcounter; 9021 } 9022 9023 struct pmt_counter **pmt_get_scope_root(enum counter_scope scope) 9024 { 9025 switch (scope) { 9026 case SCOPE_CPU: 9027 return &sys.pmt_tp; 9028 case SCOPE_CORE: 9029 return &sys.pmt_cp; 9030 case SCOPE_PACKAGE: 9031 return &sys.pmt_pp; 9032 } 9033 9034 __builtin_unreachable(); 9035 } 9036 9037 void pmt_counter_add_domain(struct pmt_counter *pcounter, unsigned long *pmmio, unsigned int domain_id) 9038 { 9039 /* Make sure the new domain fits. */ 9040 if (domain_id >= pcounter->num_domains) 9041 pmt_counter_resize(pcounter, domain_id + 1); 9042 9043 assert(pcounter->domains); 9044 assert(domain_id < pcounter->num_domains); 9045 9046 pcounter->domains[domain_id].pcounter = pmmio; 9047 } 9048 9049 int pmt_add_counter(unsigned int guid, const char *name, enum pmt_datatype type, 9050 unsigned int lsb, unsigned int msb, unsigned int offset, enum counter_scope scope, 9051 enum counter_format format, unsigned int domain_id, enum pmt_open_mode mode) 9052 { 9053 struct pmt_mmio *mmio; 9054 struct pmt_counter *pcounter; 9055 struct pmt_counter **const pmt_root = pmt_get_scope_root(scope); 9056 bool new_counter = false; 9057 int conflict = 0; 9058 9059 if (lsb > msb) { 9060 fprintf(stderr, "%s: %s: `%s` must be satisfied\n", __func__, "lsb <= msb", name); 9061 exit(1); 9062 } 9063 9064 if (msb >= 64) { 9065 fprintf(stderr, "%s: %s: `%s` must be satisfied\n", __func__, "msb < 64", name); 9066 exit(1); 9067 } 9068 9069 mmio = pmt_add_guid(guid); 9070 if (!mmio) { 9071 if (mode != PMT_OPEN_TRY) { 9072 fprintf(stderr, "%s: failed to map PMT MMIO for guid %x\n", __func__, guid); 9073 exit(1); 9074 } 9075 9076 return 1; 9077 } 9078 9079 if (offset >= mmio->size) { 9080 if (mode != PMT_OPEN_TRY) { 9081 fprintf(stderr, "%s: offset %u outside of PMT MMIO size %u\n", __func__, offset, mmio->size); 9082 exit(1); 9083 } 9084 9085 return 1; 9086 } 9087 9088 pcounter = pmt_find_counter(*pmt_root, name); 9089 if (!pcounter) { 9090 pcounter = calloc(1, sizeof(*pcounter)); 9091 new_counter = true; 9092 } 9093 9094 if (new_counter) { 9095 strncpy(pcounter->name, name, ARRAY_SIZE(pcounter->name) - 1); 9096 pcounter->type = type; 9097 pcounter->scope = scope; 9098 pcounter->lsb = lsb; 9099 pcounter->msb = msb; 9100 pcounter->format = format; 9101 } else { 9102 conflict += pcounter->type != type; 9103 conflict += pcounter->scope != scope; 9104 conflict += pcounter->lsb != lsb; 9105 conflict += pcounter->msb != msb; 9106 conflict += pcounter->format != format; 9107 } 9108 9109 if (conflict) { 9110 fprintf(stderr, "%s: conflicting parameters for the PMT counter with the same name %s\n", 9111 __func__, name); 9112 exit(1); 9113 } 9114 9115 pmt_counter_add_domain(pcounter, pmt_get_counter_pointer(mmio, offset), domain_id); 9116 9117 if (new_counter) { 9118 pcounter->next = *pmt_root; 9119 *pmt_root = pcounter; 9120 } 9121 9122 return 0; 9123 } 9124 9125 void pmt_init(void) 9126 { 9127 if (BIC_IS_ENABLED(BIC_Diec6)) { 9128 pmt_add_counter(PMT_MTL_DC6_GUID, "Die%c6", PMT_TYPE_XTAL_TIME, PMT_COUNTER_MTL_DC6_LSB, 9129 PMT_COUNTER_MTL_DC6_MSB, PMT_COUNTER_MTL_DC6_OFFSET, SCOPE_PACKAGE, FORMAT_DELTA, 9130 0, PMT_OPEN_TRY); 9131 } 9132 } 9133 9134 void turbostat_init() 9135 { 9136 setup_all_buffers(true); 9137 set_base_cpu(); 9138 check_msr_access(); 9139 check_perf_access(); 9140 process_cpuid(); 9141 counter_info_init(); 9142 probe_pm_features(); 9143 msr_perf_init(); 9144 linux_perf_init(); 9145 rapl_perf_init(); 9146 cstate_perf_init(); 9147 added_perf_counters_init(); 9148 pmt_init(); 9149 9150 for_all_cpus(get_cpu_type, ODD_COUNTERS); 9151 for_all_cpus(get_cpu_type, EVEN_COUNTERS); 9152 9153 if (BIC_IS_ENABLED(BIC_IPC) && has_aperf_access && get_instr_count_fd(base_cpu) != -1) 9154 BIC_PRESENT(BIC_IPC); 9155 9156 /* 9157 * If TSC tweak is needed, but couldn't get it, 9158 * disable more BICs, since it can't be reported accurately. 9159 */ 9160 if (platform->enable_tsc_tweak && !has_base_hz) { 9161 bic_enabled &= ~BIC_Busy; 9162 bic_enabled &= ~BIC_Bzy_MHz; 9163 } 9164 } 9165 9166 int fork_it(char **argv) 9167 { 9168 pid_t child_pid; 9169 int status; 9170 9171 snapshot_proc_sysfs_files(); 9172 status = for_all_cpus(get_counters, EVEN_COUNTERS); 9173 first_counter_read = 0; 9174 if (status) 9175 exit(status); 9176 gettimeofday(&tv_even, (struct timezone *)NULL); 9177 9178 child_pid = fork(); 9179 if (!child_pid) { 9180 /* child */ 9181 execvp(argv[0], argv); 9182 err(errno, "exec %s", argv[0]); 9183 } else { 9184 9185 /* parent */ 9186 if (child_pid == -1) 9187 err(1, "fork"); 9188 9189 signal(SIGINT, SIG_IGN); 9190 signal(SIGQUIT, SIG_IGN); 9191 if (waitpid(child_pid, &status, 0) == -1) 9192 err(status, "waitpid"); 9193 9194 if (WIFEXITED(status)) 9195 status = WEXITSTATUS(status); 9196 } 9197 /* 9198 * n.b. fork_it() does not check for errors from for_all_cpus() 9199 * because re-starting is problematic when forking 9200 */ 9201 snapshot_proc_sysfs_files(); 9202 for_all_cpus(get_counters, ODD_COUNTERS); 9203 gettimeofday(&tv_odd, (struct timezone *)NULL); 9204 timersub(&tv_odd, &tv_even, &tv_delta); 9205 if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) 9206 fprintf(outf, "%s: Counter reset detected\n", progname); 9207 else { 9208 compute_average(EVEN_COUNTERS); 9209 format_all_counters(EVEN_COUNTERS); 9210 } 9211 9212 fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec / 1000000.0); 9213 9214 flush_output_stderr(); 9215 9216 return status; 9217 } 9218 9219 int get_and_dump_counters(void) 9220 { 9221 int status; 9222 9223 snapshot_proc_sysfs_files(); 9224 status = for_all_cpus(get_counters, ODD_COUNTERS); 9225 if (status) 9226 return status; 9227 9228 status = for_all_cpus(dump_counters, ODD_COUNTERS); 9229 if (status) 9230 return status; 9231 9232 flush_output_stdout(); 9233 9234 return status; 9235 } 9236 9237 void print_version() 9238 { 9239 fprintf(outf, "turbostat version 2024.11.30 - Len Brown <lenb@kernel.org>\n"); 9240 } 9241 9242 #define COMMAND_LINE_SIZE 2048 9243 9244 void print_bootcmd(void) 9245 { 9246 char bootcmd[COMMAND_LINE_SIZE]; 9247 FILE *fp; 9248 int ret; 9249 9250 memset(bootcmd, 0, COMMAND_LINE_SIZE); 9251 fp = fopen("/proc/cmdline", "r"); 9252 if (!fp) 9253 return; 9254 9255 ret = fread(bootcmd, sizeof(char), COMMAND_LINE_SIZE - 1, fp); 9256 if (ret) { 9257 bootcmd[ret] = '\0'; 9258 /* the last character is already '\n' */ 9259 fprintf(outf, "Kernel command line: %s", bootcmd); 9260 } 9261 9262 fclose(fp); 9263 } 9264 9265 struct msr_counter *find_msrp_by_name(struct msr_counter *head, char *name) 9266 { 9267 struct msr_counter *mp; 9268 9269 for (mp = head; mp; mp = mp->next) { 9270 if (debug) 9271 fprintf(stderr, "%s: %s %s\n", __func__, name, mp->name); 9272 if (!strncmp(name, mp->name, strlen(mp->name))) 9273 return mp; 9274 } 9275 return NULL; 9276 } 9277 9278 int add_counter(unsigned int msr_num, char *path, char *name, 9279 unsigned int width, enum counter_scope scope, 9280 enum counter_type type, enum counter_format format, int flags, int id) 9281 { 9282 struct msr_counter *msrp; 9283 9284 if (no_msr && msr_num) 9285 errx(1, "Requested MSR counter 0x%x, but in --no-msr mode", msr_num); 9286 9287 if (debug) 9288 fprintf(stderr, "%s(msr%d, %s, %s, width%d, scope%d, type%d, format%d, flags%x, id%d)\n", 9289 __func__, msr_num, path, name, width, scope, type, format, flags, id); 9290 9291 switch (scope) { 9292 9293 case SCOPE_CPU: 9294 msrp = find_msrp_by_name(sys.tp, name); 9295 if (msrp) { 9296 if (debug) 9297 fprintf(stderr, "%s: %s FOUND\n", __func__, name); 9298 break; 9299 } 9300 if (sys.added_thread_counters++ >= MAX_ADDED_THREAD_COUNTERS) { 9301 warnx("ignoring thread counter %s", name); 9302 return -1; 9303 } 9304 break; 9305 case SCOPE_CORE: 9306 msrp = find_msrp_by_name(sys.cp, name); 9307 if (msrp) { 9308 if (debug) 9309 fprintf(stderr, "%s: %s FOUND\n", __func__, name); 9310 break; 9311 } 9312 if (sys.added_core_counters++ >= MAX_ADDED_CORE_COUNTERS) { 9313 warnx("ignoring core counter %s", name); 9314 return -1; 9315 } 9316 break; 9317 case SCOPE_PACKAGE: 9318 msrp = find_msrp_by_name(sys.pp, name); 9319 if (msrp) { 9320 if (debug) 9321 fprintf(stderr, "%s: %s FOUND\n", __func__, name); 9322 break; 9323 } 9324 if (sys.added_package_counters++ >= MAX_ADDED_PACKAGE_COUNTERS) { 9325 warnx("ignoring package counter %s", name); 9326 return -1; 9327 } 9328 break; 9329 default: 9330 warnx("ignoring counter %s with unknown scope", name); 9331 return -1; 9332 } 9333 9334 if (msrp == NULL) { 9335 msrp = calloc(1, sizeof(struct msr_counter)); 9336 if (msrp == NULL) 9337 err(-1, "calloc msr_counter"); 9338 9339 msrp->msr_num = msr_num; 9340 strncpy(msrp->name, name, NAME_BYTES - 1); 9341 msrp->width = width; 9342 msrp->type = type; 9343 msrp->format = format; 9344 msrp->flags = flags; 9345 9346 switch (scope) { 9347 case SCOPE_CPU: 9348 msrp->next = sys.tp; 9349 sys.tp = msrp; 9350 break; 9351 case SCOPE_CORE: 9352 msrp->next = sys.cp; 9353 sys.cp = msrp; 9354 break; 9355 case SCOPE_PACKAGE: 9356 msrp->next = sys.pp; 9357 sys.pp = msrp; 9358 break; 9359 } 9360 } 9361 9362 if (path) { 9363 struct sysfs_path *sp; 9364 9365 sp = calloc(1, sizeof(struct sysfs_path)); 9366 if (sp == NULL) { 9367 perror("calloc"); 9368 exit(1); 9369 } 9370 strncpy(sp->path, path, PATH_BYTES - 1); 9371 sp->id = id; 9372 sp->next = msrp->sp; 9373 msrp->sp = sp; 9374 } 9375 9376 return 0; 9377 } 9378 9379 /* 9380 * Initialize the fields used for identifying and opening the counter. 9381 * 9382 * Defer the initialization of any runtime buffers for actually reading 9383 * the counters for when we initialize all perf counters, so we can later 9384 * easily call re_initialize(). 9385 */ 9386 struct perf_counter_info *make_perf_counter_info(const char *perf_device, 9387 const char *perf_event, 9388 const char *name, 9389 unsigned int width, 9390 enum counter_scope scope, 9391 enum counter_type type, enum counter_format format) 9392 { 9393 struct perf_counter_info *pinfo; 9394 9395 pinfo = calloc(1, sizeof(*pinfo)); 9396 if (!pinfo) 9397 errx(1, "%s: Failed to allocate %s/%s\n", __func__, perf_device, perf_event); 9398 9399 strncpy(pinfo->device, perf_device, ARRAY_SIZE(pinfo->device) - 1); 9400 strncpy(pinfo->event, perf_event, ARRAY_SIZE(pinfo->event) - 1); 9401 9402 strncpy(pinfo->name, name, ARRAY_SIZE(pinfo->name) - 1); 9403 pinfo->width = width; 9404 pinfo->scope = scope; 9405 pinfo->type = type; 9406 pinfo->format = format; 9407 9408 return pinfo; 9409 } 9410 9411 int add_perf_counter(const char *perf_device, const char *perf_event, const char *name_buffer, unsigned int width, 9412 enum counter_scope scope, enum counter_type type, enum counter_format format) 9413 { 9414 struct perf_counter_info *pinfo; 9415 9416 switch (scope) { 9417 case SCOPE_CPU: 9418 if (sys.added_thread_perf_counters >= MAX_ADDED_THREAD_COUNTERS) { 9419 warnx("ignoring thread counter perf/%s/%s", perf_device, perf_event); 9420 return -1; 9421 } 9422 break; 9423 9424 case SCOPE_CORE: 9425 if (sys.added_core_perf_counters >= MAX_ADDED_CORE_COUNTERS) { 9426 warnx("ignoring core counter perf/%s/%s", perf_device, perf_event); 9427 return -1; 9428 } 9429 break; 9430 9431 case SCOPE_PACKAGE: 9432 if (sys.added_package_perf_counters >= MAX_ADDED_PACKAGE_COUNTERS) { 9433 warnx("ignoring package counter perf/%s/%s", perf_device, perf_event); 9434 return -1; 9435 } 9436 break; 9437 } 9438 9439 pinfo = make_perf_counter_info(perf_device, perf_event, name_buffer, width, scope, type, format); 9440 9441 if (!pinfo) 9442 return -1; 9443 9444 switch (scope) { 9445 case SCOPE_CPU: 9446 pinfo->next = sys.perf_tp; 9447 sys.perf_tp = pinfo; 9448 ++sys.added_thread_perf_counters; 9449 break; 9450 9451 case SCOPE_CORE: 9452 pinfo->next = sys.perf_cp; 9453 sys.perf_cp = pinfo; 9454 ++sys.added_core_perf_counters; 9455 break; 9456 9457 case SCOPE_PACKAGE: 9458 pinfo->next = sys.perf_pp; 9459 sys.perf_pp = pinfo; 9460 ++sys.added_package_perf_counters; 9461 break; 9462 } 9463 9464 // FIXME: we might not have debug here yet 9465 if (debug) 9466 fprintf(stderr, "%s: %s/%s, name: %s, scope%d\n", 9467 __func__, pinfo->device, pinfo->event, pinfo->name, pinfo->scope); 9468 9469 return 0; 9470 } 9471 9472 void parse_add_command_msr(char *add_command) 9473 { 9474 int msr_num = 0; 9475 char *path = NULL; 9476 char perf_device[PERF_DEV_NAME_BYTES] = ""; 9477 char perf_event[PERF_EVT_NAME_BYTES] = ""; 9478 char name_buffer[PERF_NAME_BYTES] = ""; 9479 int width = 64; 9480 int fail = 0; 9481 enum counter_scope scope = SCOPE_CPU; 9482 enum counter_type type = COUNTER_CYCLES; 9483 enum counter_format format = FORMAT_DELTA; 9484 9485 while (add_command) { 9486 9487 if (sscanf(add_command, "msr0x%x", &msr_num) == 1) 9488 goto next; 9489 9490 if (sscanf(add_command, "msr%d", &msr_num) == 1) 9491 goto next; 9492 9493 BUILD_BUG_ON(ARRAY_SIZE(perf_device) <= 31); 9494 BUILD_BUG_ON(ARRAY_SIZE(perf_event) <= 31); 9495 if (sscanf(add_command, "perf/%31[^/]/%31[^,]", &perf_device[0], &perf_event[0]) == 2) 9496 goto next; 9497 9498 if (*add_command == '/') { 9499 path = add_command; 9500 goto next; 9501 } 9502 9503 if (sscanf(add_command, "u%d", &width) == 1) { 9504 if ((width == 32) || (width == 64)) 9505 goto next; 9506 width = 64; 9507 } 9508 if (!strncmp(add_command, "cpu", strlen("cpu"))) { 9509 scope = SCOPE_CPU; 9510 goto next; 9511 } 9512 if (!strncmp(add_command, "core", strlen("core"))) { 9513 scope = SCOPE_CORE; 9514 goto next; 9515 } 9516 if (!strncmp(add_command, "package", strlen("package"))) { 9517 scope = SCOPE_PACKAGE; 9518 goto next; 9519 } 9520 if (!strncmp(add_command, "cycles", strlen("cycles"))) { 9521 type = COUNTER_CYCLES; 9522 goto next; 9523 } 9524 if (!strncmp(add_command, "seconds", strlen("seconds"))) { 9525 type = COUNTER_SECONDS; 9526 goto next; 9527 } 9528 if (!strncmp(add_command, "usec", strlen("usec"))) { 9529 type = COUNTER_USEC; 9530 goto next; 9531 } 9532 if (!strncmp(add_command, "raw", strlen("raw"))) { 9533 format = FORMAT_RAW; 9534 goto next; 9535 } 9536 if (!strncmp(add_command, "delta", strlen("delta"))) { 9537 format = FORMAT_DELTA; 9538 goto next; 9539 } 9540 if (!strncmp(add_command, "percent", strlen("percent"))) { 9541 format = FORMAT_PERCENT; 9542 goto next; 9543 } 9544 9545 BUILD_BUG_ON(ARRAY_SIZE(name_buffer) <= 18); 9546 if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) { 9547 char *eos; 9548 9549 eos = strchr(name_buffer, ','); 9550 if (eos) 9551 *eos = '\0'; 9552 goto next; 9553 } 9554 9555 next: 9556 add_command = strchr(add_command, ','); 9557 if (add_command) { 9558 *add_command = '\0'; 9559 add_command++; 9560 } 9561 9562 } 9563 if ((msr_num == 0) && (path == NULL) && (perf_device[0] == '\0' || perf_event[0] == '\0')) { 9564 fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter | perf/device/event ) required\n"); 9565 fail++; 9566 } 9567 9568 /* Test for non-empty perf_device and perf_event */ 9569 const bool is_perf_counter = perf_device[0] && perf_event[0]; 9570 9571 /* generate default column header */ 9572 if (*name_buffer == '\0') { 9573 if (is_perf_counter) { 9574 snprintf(name_buffer, ARRAY_SIZE(name_buffer), "perf/%s", perf_event); 9575 } else { 9576 if (width == 32) 9577 sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); 9578 else 9579 sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); 9580 } 9581 } 9582 9583 if (is_perf_counter) { 9584 if (add_perf_counter(perf_device, perf_event, name_buffer, width, scope, type, format)) 9585 fail++; 9586 } else { 9587 if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0, 0)) 9588 fail++; 9589 } 9590 9591 if (fail) { 9592 help(); 9593 exit(1); 9594 } 9595 } 9596 9597 bool starts_with(const char *str, const char *prefix) 9598 { 9599 return strncmp(prefix, str, strlen(prefix)) == 0; 9600 } 9601 9602 void parse_add_command_pmt(char *add_command) 9603 { 9604 char *name = NULL; 9605 char *type_name = NULL; 9606 char *format_name = NULL; 9607 unsigned int offset; 9608 unsigned int lsb; 9609 unsigned int msb; 9610 unsigned int guid; 9611 unsigned int domain_id; 9612 enum counter_scope scope = 0; 9613 enum pmt_datatype type = PMT_TYPE_RAW; 9614 enum counter_format format = FORMAT_RAW; 9615 bool has_offset = false; 9616 bool has_lsb = false; 9617 bool has_msb = false; 9618 bool has_format = true; /* Format has a default value. */ 9619 bool has_guid = false; 9620 bool has_scope = false; 9621 bool has_type = true; /* Type has a default value. */ 9622 9623 /* Consume the "pmt," prefix. */ 9624 add_command = strchr(add_command, ','); 9625 if (!add_command) { 9626 help(); 9627 exit(1); 9628 } 9629 ++add_command; 9630 9631 while (add_command) { 9632 if (starts_with(add_command, "name=")) { 9633 name = add_command + strlen("name="); 9634 goto next; 9635 } 9636 9637 if (starts_with(add_command, "type=")) { 9638 type_name = add_command + strlen("type="); 9639 goto next; 9640 } 9641 9642 if (starts_with(add_command, "domain=")) { 9643 const size_t prefix_len = strlen("domain="); 9644 9645 if (sscanf(add_command + prefix_len, "cpu%u", &domain_id) == 1) { 9646 scope = SCOPE_CPU; 9647 has_scope = true; 9648 } else if (sscanf(add_command + prefix_len, "core%u", &domain_id) == 1) { 9649 scope = SCOPE_CORE; 9650 has_scope = true; 9651 } else if (sscanf(add_command + prefix_len, "package%u", &domain_id) == 1) { 9652 scope = SCOPE_PACKAGE; 9653 has_scope = true; 9654 } 9655 9656 if (!has_scope) { 9657 printf("%s: invalid value for scope. Expected cpu%%u, core%%u or package%%u.\n", 9658 __func__); 9659 exit(1); 9660 } 9661 9662 goto next; 9663 } 9664 9665 if (starts_with(add_command, "format=")) { 9666 format_name = add_command + strlen("format="); 9667 goto next; 9668 } 9669 9670 if (sscanf(add_command, "offset=%u", &offset) == 1) { 9671 has_offset = true; 9672 goto next; 9673 } 9674 9675 if (sscanf(add_command, "lsb=%u", &lsb) == 1) { 9676 has_lsb = true; 9677 goto next; 9678 } 9679 9680 if (sscanf(add_command, "msb=%u", &msb) == 1) { 9681 has_msb = true; 9682 goto next; 9683 } 9684 9685 if (sscanf(add_command, "guid=%x", &guid) == 1) { 9686 has_guid = true; 9687 goto next; 9688 } 9689 9690 next: 9691 add_command = strchr(add_command, ','); 9692 if (add_command) { 9693 *add_command = '\0'; 9694 add_command++; 9695 } 9696 } 9697 9698 if (!name) { 9699 printf("%s: missing %s\n", __func__, "name"); 9700 exit(1); 9701 } 9702 9703 if (strlen(name) >= PMT_COUNTER_NAME_SIZE_BYTES) { 9704 printf("%s: name has to be at most %d characters long\n", __func__, PMT_COUNTER_NAME_SIZE_BYTES); 9705 exit(1); 9706 } 9707 9708 if (format_name) { 9709 has_format = false; 9710 9711 if (strcmp("raw", format_name) == 0) { 9712 format = FORMAT_RAW; 9713 has_format = true; 9714 } 9715 9716 if (strcmp("delta", format_name) == 0) { 9717 format = FORMAT_DELTA; 9718 has_format = true; 9719 } 9720 9721 if (!has_format) { 9722 fprintf(stderr, "%s: Invalid format %s. Expected raw or delta\n", __func__, format_name); 9723 exit(1); 9724 } 9725 } 9726 9727 if (type_name) { 9728 has_type = false; 9729 9730 if (strcmp("raw", type_name) == 0) { 9731 type = PMT_TYPE_RAW; 9732 has_type = true; 9733 } 9734 9735 if (strcmp("txtal_time", type_name) == 0) { 9736 type = PMT_TYPE_XTAL_TIME; 9737 has_type = true; 9738 } 9739 9740 if (!has_type) { 9741 printf("%s: invalid %s: %s\n", __func__, "type", type_name); 9742 exit(1); 9743 } 9744 } 9745 9746 if (!has_offset) { 9747 printf("%s : missing %s\n", __func__, "offset"); 9748 exit(1); 9749 } 9750 9751 if (!has_lsb) { 9752 printf("%s: missing %s\n", __func__, "lsb"); 9753 exit(1); 9754 } 9755 9756 if (!has_msb) { 9757 printf("%s: missing %s\n", __func__, "msb"); 9758 exit(1); 9759 } 9760 9761 if (!has_guid) { 9762 printf("%s: missing %s\n", __func__, "guid"); 9763 exit(1); 9764 } 9765 9766 if (!has_scope) { 9767 printf("%s: missing %s\n", __func__, "scope"); 9768 exit(1); 9769 } 9770 9771 if (lsb > msb) { 9772 printf("%s: lsb > msb doesn't make sense\n", __func__); 9773 exit(1); 9774 } 9775 9776 pmt_add_counter(guid, name, type, lsb, msb, offset, scope, format, domain_id, PMT_OPEN_REQUIRED); 9777 } 9778 9779 void parse_add_command(char *add_command) 9780 { 9781 if (strncmp(add_command, "pmt", strlen("pmt")) == 0) 9782 return parse_add_command_pmt(add_command); 9783 return parse_add_command_msr(add_command); 9784 } 9785 9786 int is_deferred_add(char *name) 9787 { 9788 int i; 9789 9790 for (i = 0; i < deferred_add_index; ++i) 9791 if (!strcmp(name, deferred_add_names[i])) 9792 return 1; 9793 return 0; 9794 } 9795 9796 int is_deferred_skip(char *name) 9797 { 9798 int i; 9799 9800 for (i = 0; i < deferred_skip_index; ++i) 9801 if (!strcmp(name, deferred_skip_names[i])) 9802 return 1; 9803 return 0; 9804 } 9805 9806 void probe_sysfs(void) 9807 { 9808 char path[64]; 9809 char name_buf[16]; 9810 FILE *input; 9811 int state; 9812 char *sp; 9813 9814 for (state = 10; state >= 0; --state) { 9815 9816 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); 9817 input = fopen(path, "r"); 9818 if (input == NULL) 9819 continue; 9820 if (!fgets(name_buf, sizeof(name_buf), input)) 9821 err(1, "%s: failed to read file", path); 9822 9823 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ 9824 sp = strchr(name_buf, '-'); 9825 if (!sp) 9826 sp = strchrnul(name_buf, '\n'); 9827 *sp = '%'; 9828 *(sp + 1) = '\0'; 9829 9830 remove_underbar(name_buf); 9831 9832 fclose(input); 9833 9834 sprintf(path, "cpuidle/state%d/time", state); 9835 9836 if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf)) 9837 continue; 9838 9839 if (is_deferred_skip(name_buf)) 9840 continue; 9841 9842 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU, 0); 9843 } 9844 9845 for (state = 10; state >= 0; --state) { 9846 9847 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); 9848 input = fopen(path, "r"); 9849 if (input == NULL) 9850 continue; 9851 if (!fgets(name_buf, sizeof(name_buf), input)) 9852 err(1, "%s: failed to read file", path); 9853 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ 9854 sp = strchr(name_buf, '-'); 9855 if (!sp) 9856 sp = strchrnul(name_buf, '\n'); 9857 *sp = '\0'; 9858 fclose(input); 9859 9860 remove_underbar(name_buf); 9861 9862 sprintf(path, "cpuidle/state%d/usage", state); 9863 9864 if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf)) 9865 continue; 9866 9867 if (is_deferred_skip(name_buf)) 9868 continue; 9869 9870 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0); 9871 } 9872 9873 } 9874 9875 /* 9876 * parse cpuset with following syntax 9877 * 1,2,4..6,8-10 and set bits in cpu_subset 9878 */ 9879 void parse_cpu_command(char *optarg) 9880 { 9881 if (!strcmp(optarg, "core")) { 9882 if (cpu_subset) 9883 goto error; 9884 show_core_only++; 9885 return; 9886 } 9887 if (!strcmp(optarg, "package")) { 9888 if (cpu_subset) 9889 goto error; 9890 show_pkg_only++; 9891 return; 9892 } 9893 if (show_core_only || show_pkg_only) 9894 goto error; 9895 9896 cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS); 9897 if (cpu_subset == NULL) 9898 err(3, "CPU_ALLOC"); 9899 cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS); 9900 9901 CPU_ZERO_S(cpu_subset_size, cpu_subset); 9902 9903 if (parse_cpu_str(optarg, cpu_subset, cpu_subset_size)) 9904 goto error; 9905 9906 return; 9907 9908 error: 9909 fprintf(stderr, "\"--cpu %s\" malformed\n", optarg); 9910 help(); 9911 exit(-1); 9912 } 9913 9914 void cmdline(int argc, char **argv) 9915 { 9916 int opt; 9917 int option_index = 0; 9918 static struct option long_options[] = { 9919 { "add", required_argument, 0, 'a' }, 9920 { "cpu", required_argument, 0, 'c' }, 9921 { "Dump", no_argument, 0, 'D' }, 9922 { "debug", no_argument, 0, 'd' }, /* internal, not documented */ 9923 { "enable", required_argument, 0, 'e' }, 9924 { "interval", required_argument, 0, 'i' }, 9925 { "IPC", no_argument, 0, 'I' }, 9926 { "num_iterations", required_argument, 0, 'n' }, 9927 { "header_iterations", required_argument, 0, 'N' }, 9928 { "help", no_argument, 0, 'h' }, 9929 { "hide", required_argument, 0, 'H' }, // meh, -h taken by --help 9930 { "Joules", no_argument, 0, 'J' }, 9931 { "list", no_argument, 0, 'l' }, 9932 { "out", required_argument, 0, 'o' }, 9933 { "quiet", no_argument, 0, 'q' }, 9934 { "no-msr", no_argument, 0, 'M' }, 9935 { "no-perf", no_argument, 0, 'P' }, 9936 { "show", required_argument, 0, 's' }, 9937 { "Summary", no_argument, 0, 'S' }, 9938 { "TCC", required_argument, 0, 'T' }, 9939 { "version", no_argument, 0, 'v' }, 9940 { 0, 0, 0, 0 } 9941 }; 9942 9943 progname = argv[0]; 9944 9945 /* 9946 * Parse some options early, because they may make other options invalid, 9947 * like adding the MSR counter with --add and at the same time using --no-msr. 9948 */ 9949 while ((opt = getopt_long_only(argc, argv, "+MPn:", long_options, &option_index)) != -1) { 9950 switch (opt) { 9951 case 'M': 9952 no_msr = 1; 9953 break; 9954 case 'P': 9955 no_perf = 1; 9956 break; 9957 default: 9958 break; 9959 } 9960 } 9961 optind = 0; 9962 9963 while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qMST:v", long_options, &option_index)) != -1) { 9964 switch (opt) { 9965 case 'a': 9966 parse_add_command(optarg); 9967 break; 9968 case 'c': 9969 parse_cpu_command(optarg); 9970 break; 9971 case 'D': 9972 dump_only++; 9973 /* 9974 * Force the no_perf early to prevent using it as a source. 9975 * User asks for raw values, but perf returns them relative 9976 * to the opening of the file descriptor. 9977 */ 9978 no_perf = 1; 9979 break; 9980 case 'e': 9981 /* --enable specified counter */ 9982 bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST); 9983 break; 9984 case 'd': 9985 debug++; 9986 ENABLE_BIC(BIC_DISABLED_BY_DEFAULT); 9987 break; 9988 case 'H': 9989 /* 9990 * --hide: do not show those specified 9991 * multiple invocations simply clear more bits in enabled mask 9992 */ 9993 bic_enabled &= ~bic_lookup(optarg, HIDE_LIST); 9994 break; 9995 case 'h': 9996 default: 9997 help(); 9998 exit(1); 9999 case 'i': 10000 { 10001 double interval = strtod(optarg, NULL); 10002 10003 if (interval < 0.001) { 10004 fprintf(outf, "interval %f seconds is too small\n", interval); 10005 exit(2); 10006 } 10007 10008 interval_tv.tv_sec = interval_ts.tv_sec = interval; 10009 interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000; 10010 interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000; 10011 } 10012 break; 10013 case 'J': 10014 rapl_joules++; 10015 break; 10016 case 'l': 10017 ENABLE_BIC(BIC_DISABLED_BY_DEFAULT); 10018 list_header_only++; 10019 quiet++; 10020 break; 10021 case 'o': 10022 outf = fopen_or_die(optarg, "w"); 10023 break; 10024 case 'q': 10025 quiet = 1; 10026 break; 10027 case 'M': 10028 case 'P': 10029 /* Parsed earlier */ 10030 break; 10031 case 'n': 10032 num_iterations = strtod(optarg, NULL); 10033 10034 if (num_iterations <= 0) { 10035 fprintf(outf, "iterations %d should be positive number\n", num_iterations); 10036 exit(2); 10037 } 10038 break; 10039 case 'N': 10040 header_iterations = strtod(optarg, NULL); 10041 10042 if (header_iterations <= 0) { 10043 fprintf(outf, "iterations %d should be positive number\n", header_iterations); 10044 exit(2); 10045 } 10046 break; 10047 case 's': 10048 /* 10049 * --show: show only those specified 10050 * The 1st invocation will clear and replace the enabled mask 10051 * subsequent invocations can add to it. 10052 */ 10053 if (shown == 0) 10054 bic_enabled = bic_lookup(optarg, SHOW_LIST); 10055 else 10056 bic_enabled |= bic_lookup(optarg, SHOW_LIST); 10057 shown = 1; 10058 break; 10059 case 'S': 10060 summary_only++; 10061 break; 10062 case 'T': 10063 tj_max_override = atoi(optarg); 10064 break; 10065 case 'v': 10066 print_version(); 10067 exit(0); 10068 break; 10069 } 10070 } 10071 } 10072 10073 void set_rlimit(void) 10074 { 10075 struct rlimit limit; 10076 10077 if (getrlimit(RLIMIT_NOFILE, &limit) < 0) 10078 err(1, "Failed to get rlimit"); 10079 10080 if (limit.rlim_max < MAX_NOFILE) 10081 limit.rlim_max = MAX_NOFILE; 10082 if (limit.rlim_cur < MAX_NOFILE) 10083 limit.rlim_cur = MAX_NOFILE; 10084 10085 if (setrlimit(RLIMIT_NOFILE, &limit) < 0) 10086 err(1, "Failed to set rlimit"); 10087 } 10088 10089 int main(int argc, char **argv) 10090 { 10091 int fd, ret; 10092 10093 fd = open("/sys/fs/cgroup/cgroup.procs", O_WRONLY); 10094 if (fd < 0) 10095 goto skip_cgroup_setting; 10096 10097 ret = write(fd, "0\n", 2); 10098 if (ret == -1) 10099 perror("Can't update cgroup\n"); 10100 10101 close(fd); 10102 10103 skip_cgroup_setting: 10104 outf = stderr; 10105 cmdline(argc, argv); 10106 10107 if (!quiet) { 10108 print_version(); 10109 print_bootcmd(); 10110 } 10111 10112 probe_sysfs(); 10113 10114 if (!getuid()) 10115 set_rlimit(); 10116 10117 turbostat_init(); 10118 10119 if (!no_msr) 10120 msr_sum_record(); 10121 10122 /* dump counters and exit */ 10123 if (dump_only) 10124 return get_and_dump_counters(); 10125 10126 /* list header and exit */ 10127 if (list_header_only) { 10128 print_header(","); 10129 flush_output_stdout(); 10130 return 0; 10131 } 10132 10133 /* 10134 * if any params left, it must be a command to fork 10135 */ 10136 if (argc - optind) 10137 return fork_it(argv + optind); 10138 else 10139 turbostat_loop(); 10140 10141 return 0; 10142 } 10143