1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * turbostat -- show CPU frequency and C-state residency 4 * on modern Intel and AMD processors. 5 * 6 * Copyright (c) 2025 Intel Corporation. 7 * Len Brown <len.brown@intel.com> 8 */ 9 10 #define _GNU_SOURCE 11 #include MSRHEADER 12 13 // copied from arch/x86/include/asm/cpu_device_id.h 14 #define VFM_MODEL_BIT 0 15 #define VFM_FAMILY_BIT 8 16 #define VFM_VENDOR_BIT 16 17 #define VFM_RSVD_BIT 24 18 19 #define VFM_MODEL_MASK GENMASK(VFM_FAMILY_BIT - 1, VFM_MODEL_BIT) 20 #define VFM_FAMILY_MASK GENMASK(VFM_VENDOR_BIT - 1, VFM_FAMILY_BIT) 21 #define VFM_VENDOR_MASK GENMASK(VFM_RSVD_BIT - 1, VFM_VENDOR_BIT) 22 23 #define VFM_MODEL(vfm) (((vfm) & VFM_MODEL_MASK) >> VFM_MODEL_BIT) 24 #define VFM_FAMILY(vfm) (((vfm) & VFM_FAMILY_MASK) >> VFM_FAMILY_BIT) 25 #define VFM_VENDOR(vfm) (((vfm) & VFM_VENDOR_MASK) >> VFM_VENDOR_BIT) 26 27 #define VFM_MAKE(_vendor, _family, _model) ( \ 28 ((_model) << VFM_MODEL_BIT) | \ 29 ((_family) << VFM_FAMILY_BIT) | \ 30 ((_vendor) << VFM_VENDOR_BIT) \ 31 ) 32 // end copied section 33 34 #define CPUID_LEAF_MODEL_ID 0x1A 35 #define CPUID_LEAF_MODEL_ID_CORE_TYPE_SHIFT 24 36 37 #define X86_VENDOR_INTEL 0 38 39 #include INTEL_FAMILY_HEADER 40 #include BUILD_BUG_HEADER 41 #include <stdarg.h> 42 #include <stdio.h> 43 #include <err.h> 44 #include <unistd.h> 45 #include <sys/types.h> 46 #include <sys/wait.h> 47 #include <sys/stat.h> 48 #include <sys/select.h> 49 #include <sys/resource.h> 50 #include <sys/mman.h> 51 #include <fcntl.h> 52 #include <signal.h> 53 #include <sys/time.h> 54 #include <stdlib.h> 55 #include <getopt.h> 56 #include <dirent.h> 57 #include <string.h> 58 #include <ctype.h> 59 #include <sched.h> 60 #include <time.h> 61 #include <cpuid.h> 62 #include <sys/capability.h> 63 #include <errno.h> 64 #include <math.h> 65 #include <linux/perf_event.h> 66 #include <asm/unistd.h> 67 #include <stdbool.h> 68 #include <assert.h> 69 #include <linux/kernel.h> 70 71 #define UNUSED(x) (void)(x) 72 73 /* 74 * This list matches the column headers, except 75 * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time 76 * 2. Core and CPU are moved to the end, we can't have strings that contain them 77 * matching on them for --show and --hide. 78 */ 79 80 /* 81 * buffer size used by sscanf() for added column names 82 * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters 83 */ 84 #define NAME_BYTES 20 85 #define PATH_BYTES 128 86 #define PERF_NAME_BYTES 128 87 88 #define MAX_NOFILE 0x8000 89 90 #define COUNTER_KIND_PERF_PREFIX "perf/" 91 #define COUNTER_KIND_PERF_PREFIX_LEN strlen(COUNTER_KIND_PERF_PREFIX) 92 #define PERF_DEV_NAME_BYTES 32 93 #define PERF_EVT_NAME_BYTES 32 94 95 #define INTEL_ECORE_TYPE 0x20 96 #define INTEL_PCORE_TYPE 0x40 97 98 #define ROUND_UP_TO_PAGE_SIZE(n) (((n) + 0x1000UL-1UL) & ~(0x1000UL-1UL)) 99 100 enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE }; 101 enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC, COUNTER_K2M }; 102 enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT, FORMAT_AVERAGE }; 103 enum counter_source { COUNTER_SOURCE_NONE, COUNTER_SOURCE_PERF, COUNTER_SOURCE_MSR }; 104 105 struct perf_counter_info { 106 struct perf_counter_info *next; 107 108 /* How to open the counter / What counter it is. */ 109 char device[PERF_DEV_NAME_BYTES]; 110 char event[PERF_EVT_NAME_BYTES]; 111 112 /* How to show/format the counter. */ 113 char name[PERF_NAME_BYTES]; 114 unsigned int width; 115 enum counter_scope scope; 116 enum counter_type type; 117 enum counter_format format; 118 double scale; 119 120 /* For reading the counter. */ 121 int *fd_perf_per_domain; 122 size_t num_domains; 123 }; 124 125 struct sysfs_path { 126 char path[PATH_BYTES]; 127 int id; 128 struct sysfs_path *next; 129 }; 130 131 struct msr_counter { 132 unsigned int msr_num; 133 char name[NAME_BYTES]; 134 struct sysfs_path *sp; 135 unsigned int width; 136 enum counter_type type; 137 enum counter_format format; 138 struct msr_counter *next; 139 unsigned int flags; 140 #define FLAGS_HIDE (1 << 0) 141 #define FLAGS_SHOW (1 << 1) 142 #define SYSFS_PERCPU (1 << 1) 143 }; 144 145 struct msr_counter bic[] = { 146 { 0x0, "usec", NULL, 0, 0, 0, NULL, 0 }, 147 { 0x0, "Time_Of_Day_Seconds", NULL, 0, 0, 0, NULL, 0 }, 148 { 0x0, "Package", NULL, 0, 0, 0, NULL, 0 }, 149 { 0x0, "Node", NULL, 0, 0, 0, NULL, 0 }, 150 { 0x0, "Avg_MHz", NULL, 0, 0, 0, NULL, 0 }, 151 { 0x0, "Busy%", NULL, 0, 0, 0, NULL, 0 }, 152 { 0x0, "Bzy_MHz", NULL, 0, 0, 0, NULL, 0 }, 153 { 0x0, "TSC_MHz", NULL, 0, 0, 0, NULL, 0 }, 154 { 0x0, "IRQ", NULL, 0, 0, 0, NULL, 0 }, 155 { 0x0, "SMI", NULL, 32, 0, FORMAT_DELTA, NULL, 0 }, 156 { 0x0, "cpuidle", NULL, 0, 0, 0, NULL, 0 }, 157 { 0x0, "CPU%c1", NULL, 0, 0, 0, NULL, 0 }, 158 { 0x0, "CPU%c3", NULL, 0, 0, 0, NULL, 0 }, 159 { 0x0, "CPU%c6", NULL, 0, 0, 0, NULL, 0 }, 160 { 0x0, "CPU%c7", NULL, 0, 0, 0, NULL, 0 }, 161 { 0x0, "ThreadC", NULL, 0, 0, 0, NULL, 0 }, 162 { 0x0, "CoreTmp", NULL, 0, 0, 0, NULL, 0 }, 163 { 0x0, "CoreCnt", NULL, 0, 0, 0, NULL, 0 }, 164 { 0x0, "PkgTmp", NULL, 0, 0, 0, NULL, 0 }, 165 { 0x0, "GFX%rc6", NULL, 0, 0, 0, NULL, 0 }, 166 { 0x0, "GFXMHz", NULL, 0, 0, 0, NULL, 0 }, 167 { 0x0, "Pkg%pc2", NULL, 0, 0, 0, NULL, 0 }, 168 { 0x0, "Pkg%pc3", NULL, 0, 0, 0, NULL, 0 }, 169 { 0x0, "Pkg%pc6", NULL, 0, 0, 0, NULL, 0 }, 170 { 0x0, "Pkg%pc7", NULL, 0, 0, 0, NULL, 0 }, 171 { 0x0, "Pkg%pc8", NULL, 0, 0, 0, NULL, 0 }, 172 { 0x0, "Pkg%pc9", NULL, 0, 0, 0, NULL, 0 }, 173 { 0x0, "Pk%pc10", NULL, 0, 0, 0, NULL, 0 }, 174 { 0x0, "CPU%LPI", NULL, 0, 0, 0, NULL, 0 }, 175 { 0x0, "SYS%LPI", NULL, 0, 0, 0, NULL, 0 }, 176 { 0x0, "PkgWatt", NULL, 0, 0, 0, NULL, 0 }, 177 { 0x0, "CorWatt", NULL, 0, 0, 0, NULL, 0 }, 178 { 0x0, "GFXWatt", NULL, 0, 0, 0, NULL, 0 }, 179 { 0x0, "PkgCnt", NULL, 0, 0, 0, NULL, 0 }, 180 { 0x0, "RAMWatt", NULL, 0, 0, 0, NULL, 0 }, 181 { 0x0, "PKG_%", NULL, 0, 0, 0, NULL, 0 }, 182 { 0x0, "RAM_%", NULL, 0, 0, 0, NULL, 0 }, 183 { 0x0, "Pkg_J", NULL, 0, 0, 0, NULL, 0 }, 184 { 0x0, "Cor_J", NULL, 0, 0, 0, NULL, 0 }, 185 { 0x0, "GFX_J", NULL, 0, 0, 0, NULL, 0 }, 186 { 0x0, "RAM_J", NULL, 0, 0, 0, NULL, 0 }, 187 { 0x0, "Mod%c6", NULL, 0, 0, 0, NULL, 0 }, 188 { 0x0, "Totl%C0", NULL, 0, 0, 0, NULL, 0 }, 189 { 0x0, "Any%C0", NULL, 0, 0, 0, NULL, 0 }, 190 { 0x0, "GFX%C0", NULL, 0, 0, 0, NULL, 0 }, 191 { 0x0, "CPUGFX%", NULL, 0, 0, 0, NULL, 0 }, 192 { 0x0, "Core", NULL, 0, 0, 0, NULL, 0 }, 193 { 0x0, "CPU", NULL, 0, 0, 0, NULL, 0 }, 194 { 0x0, "APIC", NULL, 0, 0, 0, NULL, 0 }, 195 { 0x0, "X2APIC", NULL, 0, 0, 0, NULL, 0 }, 196 { 0x0, "Die", NULL, 0, 0, 0, NULL, 0 }, 197 { 0x0, "GFXAMHz", NULL, 0, 0, 0, NULL, 0 }, 198 { 0x0, "IPC", NULL, 0, 0, 0, NULL, 0 }, 199 { 0x0, "CoreThr", NULL, 0, 0, 0, NULL, 0 }, 200 { 0x0, "UncMHz", NULL, 0, 0, 0, NULL, 0 }, 201 { 0x0, "SAM%mc6", NULL, 0, 0, 0, NULL, 0 }, 202 { 0x0, "SAMMHz", NULL, 0, 0, 0, NULL, 0 }, 203 { 0x0, "SAMAMHz", NULL, 0, 0, 0, NULL, 0 }, 204 { 0x0, "Die%c6", NULL, 0, 0, 0, NULL, 0 }, 205 { 0x0, "SysWatt", NULL, 0, 0, 0, NULL, 0 }, 206 { 0x0, "Sys_J", NULL, 0, 0, 0, NULL, 0 }, 207 { 0x0, "NMI", NULL, 0, 0, 0, NULL, 0 }, 208 { 0x0, "CPU%c1e", NULL, 0, 0, 0, NULL, 0 }, 209 { 0x0, "pct_idle", NULL, 0, 0, 0, NULL, 0 }, 210 }; 211 212 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) 213 #define BIC_USEC (1ULL << 0) 214 #define BIC_TOD (1ULL << 1) 215 #define BIC_Package (1ULL << 2) 216 #define BIC_Node (1ULL << 3) 217 #define BIC_Avg_MHz (1ULL << 4) 218 #define BIC_Busy (1ULL << 5) 219 #define BIC_Bzy_MHz (1ULL << 6) 220 #define BIC_TSC_MHz (1ULL << 7) 221 #define BIC_IRQ (1ULL << 8) 222 #define BIC_SMI (1ULL << 9) 223 #define BIC_cpuidle (1ULL << 10) 224 #define BIC_CPU_c1 (1ULL << 11) 225 #define BIC_CPU_c3 (1ULL << 12) 226 #define BIC_CPU_c6 (1ULL << 13) 227 #define BIC_CPU_c7 (1ULL << 14) 228 #define BIC_ThreadC (1ULL << 15) 229 #define BIC_CoreTmp (1ULL << 16) 230 #define BIC_CoreCnt (1ULL << 17) 231 #define BIC_PkgTmp (1ULL << 18) 232 #define BIC_GFX_rc6 (1ULL << 19) 233 #define BIC_GFXMHz (1ULL << 20) 234 #define BIC_Pkgpc2 (1ULL << 21) 235 #define BIC_Pkgpc3 (1ULL << 22) 236 #define BIC_Pkgpc6 (1ULL << 23) 237 #define BIC_Pkgpc7 (1ULL << 24) 238 #define BIC_Pkgpc8 (1ULL << 25) 239 #define BIC_Pkgpc9 (1ULL << 26) 240 #define BIC_Pkgpc10 (1ULL << 27) 241 #define BIC_CPU_LPI (1ULL << 28) 242 #define BIC_SYS_LPI (1ULL << 29) 243 #define BIC_PkgWatt (1ULL << 30) 244 #define BIC_CorWatt (1ULL << 31) 245 #define BIC_GFXWatt (1ULL << 32) 246 #define BIC_PkgCnt (1ULL << 33) 247 #define BIC_RAMWatt (1ULL << 34) 248 #define BIC_PKG__ (1ULL << 35) 249 #define BIC_RAM__ (1ULL << 36) 250 #define BIC_Pkg_J (1ULL << 37) 251 #define BIC_Cor_J (1ULL << 38) 252 #define BIC_GFX_J (1ULL << 39) 253 #define BIC_RAM_J (1ULL << 40) 254 #define BIC_Mod_c6 (1ULL << 41) 255 #define BIC_Totl_c0 (1ULL << 42) 256 #define BIC_Any_c0 (1ULL << 43) 257 #define BIC_GFX_c0 (1ULL << 44) 258 #define BIC_CPUGFX (1ULL << 45) 259 #define BIC_Core (1ULL << 46) 260 #define BIC_CPU (1ULL << 47) 261 #define BIC_APIC (1ULL << 48) 262 #define BIC_X2APIC (1ULL << 49) 263 #define BIC_Die (1ULL << 50) 264 #define BIC_GFXACTMHz (1ULL << 51) 265 #define BIC_IPC (1ULL << 52) 266 #define BIC_CORE_THROT_CNT (1ULL << 53) 267 #define BIC_UNCORE_MHZ (1ULL << 54) 268 #define BIC_SAM_mc6 (1ULL << 55) 269 #define BIC_SAMMHz (1ULL << 56) 270 #define BIC_SAMACTMHz (1ULL << 57) 271 #define BIC_Diec6 (1ULL << 58) 272 #define BIC_SysWatt (1ULL << 59) 273 #define BIC_Sys_J (1ULL << 60) 274 #define BIC_NMI (1ULL << 61) 275 #define BIC_CPU_c1e (1ULL << 62) 276 #define BIC_pct_idle (1ULL << 63) 277 278 #define BIC_GROUP_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die) 279 #define BIC_GROUP_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt) 280 #define BIC_GROUP_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ) 281 #define BIC_GROUP_HW_IDLE (BIC_Busy | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) 282 #define BIC_GROUP_SW_IDLE (BIC_Busy | BIC_cpuidle | BIC_pct_idle ) 283 #define BIC_GROUP_IDLE (BIC_GROUP_HW_IDLE | BIC_pct_idle) 284 #define BIC_OTHER (BIC_IRQ | BIC_NMI | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) 285 286 #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC | BIC_cpuidle) 287 288 unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT); 289 unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_cpuidle | BIC_pct_idle | BIC_APIC | BIC_X2APIC; 290 291 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME) 292 #define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME) 293 #define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME) 294 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT) 295 #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT) 296 #define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT) 297 298 /* 299 * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit: 300 * If you change the values, note they are used both in comparisons 301 * (>= PCL__7) and to index pkg_cstate_limit_strings[]. 302 */ 303 #define PCLUKN 0 /* Unknown */ 304 #define PCLRSV 1 /* Reserved */ 305 #define PCL__0 2 /* PC0 */ 306 #define PCL__1 3 /* PC1 */ 307 #define PCL__2 4 /* PC2 */ 308 #define PCL__3 5 /* PC3 */ 309 #define PCL__4 6 /* PC4 */ 310 #define PCL__6 7 /* PC6 */ 311 #define PCL_6N 8 /* PC6 No Retention */ 312 #define PCL_6R 9 /* PC6 Retention */ 313 #define PCL__7 10 /* PC7 */ 314 #define PCL_7S 11 /* PC7 Shrink */ 315 #define PCL__8 12 /* PC8 */ 316 #define PCL__9 13 /* PC9 */ 317 #define PCL_10 14 /* PC10 */ 318 #define PCLUNL 15 /* Unlimited */ 319 320 struct amperf_group_fd; 321 322 char *proc_stat = "/proc/stat"; 323 FILE *outf; 324 int *fd_percpu; 325 int *fd_instr_count_percpu; 326 struct timeval interval_tv = { 5, 0 }; 327 struct timespec interval_ts = { 5, 0 }; 328 329 unsigned int num_iterations; 330 unsigned int header_iterations; 331 unsigned int debug; 332 unsigned int quiet; 333 unsigned int shown; 334 unsigned int sums_need_wide_columns; 335 unsigned int rapl_joules; 336 unsigned int summary_only; 337 unsigned int list_header_only; 338 unsigned int dump_only; 339 unsigned int force_load; 340 unsigned int has_aperf; 341 unsigned int has_aperf_access; 342 unsigned int has_epb; 343 unsigned int has_turbo; 344 unsigned int is_hybrid; 345 unsigned int units = 1000000; /* MHz etc */ 346 unsigned int genuine_intel; 347 unsigned int authentic_amd; 348 unsigned int hygon_genuine; 349 unsigned int max_level, max_extended_level; 350 unsigned int has_invariant_tsc; 351 unsigned int aperf_mperf_multiplier = 1; 352 double bclk; 353 double base_hz; 354 unsigned int has_base_hz; 355 double tsc_tweak = 1.0; 356 unsigned int show_pkg_only; 357 unsigned int show_core_only; 358 char *output_buffer, *outp; 359 unsigned int do_dts; 360 unsigned int do_ptm; 361 unsigned int do_ipc; 362 unsigned long long cpuidle_cur_cpu_lpi_us; 363 unsigned long long cpuidle_cur_sys_lpi_us; 364 unsigned int tj_max; 365 unsigned int tj_max_override; 366 double rapl_power_units, rapl_time_units; 367 double rapl_dram_energy_units, rapl_energy_units, rapl_psys_energy_units; 368 double rapl_joule_counter_range; 369 unsigned int crystal_hz; 370 unsigned long long tsc_hz; 371 int base_cpu; 372 unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */ 373 /* IA32_HWP_REQUEST, IA32_HWP_STATUS */ 374 unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */ 375 unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */ 376 unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */ 377 unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */ 378 unsigned int first_counter_read = 1; 379 380 static struct timeval procsysfs_tv_begin; 381 382 int ignore_stdin; 383 bool no_msr; 384 bool no_perf; 385 386 enum gfx_sysfs_idx { 387 GFX_rc6, 388 GFX_MHz, 389 GFX_ACTMHz, 390 SAM_mc6, 391 SAM_MHz, 392 SAM_ACTMHz, 393 GFX_MAX 394 }; 395 396 struct gfx_sysfs_info { 397 FILE *fp; 398 unsigned int val; 399 unsigned long long val_ull; 400 }; 401 402 static struct gfx_sysfs_info gfx_info[GFX_MAX]; 403 404 int get_msr(int cpu, off_t offset, unsigned long long *msr); 405 int add_counter(unsigned int msr_num, char *path, char *name, 406 unsigned int width, enum counter_scope scope, 407 enum counter_type type, enum counter_format format, int flags, int package_num); 408 409 /* Model specific support Start */ 410 411 /* List of features that may diverge among different platforms */ 412 struct platform_features { 413 bool has_msr_misc_feature_control; /* MSR_MISC_FEATURE_CONTROL */ 414 bool has_msr_misc_pwr_mgmt; /* MSR_MISC_PWR_MGMT */ 415 bool has_nhm_msrs; /* MSR_PLATFORM_INFO, MSR_IA32_TEMPERATURE_TARGET, MSR_SMI_COUNT, MSR_PKG_CST_CONFIG_CONTROL, MSR_IA32_POWER_CTL, TRL MSRs */ 416 bool has_config_tdp; /* MSR_CONFIG_TDP_NOMINAL/LEVEL_1/LEVEL_2/CONTROL, MSR_TURBO_ACTIVATION_RATIO */ 417 int bclk_freq; /* CPU base clock */ 418 int crystal_freq; /* Crystal clock to use when not available from CPUID.15 */ 419 int supported_cstates; /* Core cstates and Package cstates supported */ 420 int cst_limit; /* MSR_PKG_CST_CONFIG_CONTROL */ 421 bool has_cst_auto_convension; /* AUTOMATIC_CSTATE_CONVERSION bit in MSR_PKG_CST_CONFIG_CONTROL */ 422 bool has_irtl_msrs; /* MSR_PKGC3/PKGC6/PKGC7/PKGC8/PKGC9/PKGC10_IRTL */ 423 bool has_msr_core_c1_res; /* MSR_CORE_C1_RES */ 424 bool has_msr_module_c6_res_ms; /* MSR_MODULE_C6_RES_MS */ 425 bool has_msr_c6_demotion_policy_config; /* MSR_CC6_DEMOTION_POLICY_CONFIG/MSR_MC6_DEMOTION_POLICY_CONFIG */ 426 bool has_msr_atom_pkg_c6_residency; /* MSR_ATOM_PKG_C6_RESIDENCY */ 427 bool has_msr_knl_core_c6_residency; /* MSR_KNL_CORE_C6_RESIDENCY */ 428 bool has_ext_cst_msrs; /* MSR_PKG_WEIGHTED_CORE_C0_RES/MSR_PKG_ANY_CORE_C0_RES/MSR_PKG_ANY_GFXE_C0_RES/MSR_PKG_BOTH_CORE_GFXE_C0_RES */ 429 bool has_cst_prewake_bit; /* Cstate prewake bit in MSR_IA32_POWER_CTL */ 430 int trl_msrs; /* MSR_TURBO_RATIO_LIMIT/LIMIT1/LIMIT2/SECONDARY, Atom TRL MSRs */ 431 int plr_msrs; /* MSR_CORE/GFX/RING_PERF_LIMIT_REASONS */ 432 int rapl_msrs; /* RAPL PKG/DRAM/CORE/GFX MSRs, AMD RAPL MSRs */ 433 bool has_per_core_rapl; /* Indicates cores energy collection is per-core, not per-package. AMD specific for now */ 434 bool has_rapl_divisor; /* Divisor for Energy unit raw value from MSR_RAPL_POWER_UNIT */ 435 bool has_fixed_rapl_unit; /* Fixed Energy Unit used for DRAM RAPL Domain */ 436 bool has_fixed_rapl_psys_unit; /* Fixed Energy Unit used for PSYS RAPL Domain */ 437 int rapl_quirk_tdp; /* Hardcoded TDP value when cannot be retrieved from hardware */ 438 int tcc_offset_bits; /* TCC Offset bits in MSR_IA32_TEMPERATURE_TARGET */ 439 bool enable_tsc_tweak; /* Use CPU Base freq instead of TSC freq for aperf/mperf counter */ 440 bool need_perf_multiplier; /* mperf/aperf multiplier */ 441 }; 442 443 struct platform_data { 444 unsigned int vfm; 445 const struct platform_features *features; 446 }; 447 448 /* For BCLK */ 449 enum bclk_freq { 450 BCLK_100MHZ = 1, 451 BCLK_133MHZ, 452 BCLK_SLV, 453 }; 454 455 #define SLM_BCLK_FREQS 5 456 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 }; 457 458 double slm_bclk(void) 459 { 460 unsigned long long msr = 3; 461 unsigned int i; 462 double freq; 463 464 if (get_msr(base_cpu, MSR_FSB_FREQ, &msr)) 465 fprintf(outf, "SLM BCLK: unknown\n"); 466 467 i = msr & 0xf; 468 if (i >= SLM_BCLK_FREQS) { 469 fprintf(outf, "SLM BCLK[%d] invalid\n", i); 470 i = 3; 471 } 472 freq = slm_freq_table[i]; 473 474 if (!quiet) 475 fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq); 476 477 return freq; 478 } 479 480 /* For Package cstate limit */ 481 enum package_cstate_limit { 482 CST_LIMIT_NHM = 1, 483 CST_LIMIT_SNB, 484 CST_LIMIT_HSW, 485 CST_LIMIT_SKX, 486 CST_LIMIT_ICX, 487 CST_LIMIT_SLV, 488 CST_LIMIT_AMT, 489 CST_LIMIT_KNL, 490 CST_LIMIT_GMT, 491 }; 492 493 /* For Turbo Ratio Limit MSRs */ 494 enum turbo_ratio_limit_msrs { 495 TRL_BASE = BIT(0), 496 TRL_LIMIT1 = BIT(1), 497 TRL_LIMIT2 = BIT(2), 498 TRL_ATOM = BIT(3), 499 TRL_KNL = BIT(4), 500 TRL_CORECOUNT = BIT(5), 501 }; 502 503 /* For Perf Limit Reason MSRs */ 504 enum perf_limit_reason_msrs { 505 PLR_CORE = BIT(0), 506 PLR_GFX = BIT(1), 507 PLR_RING = BIT(2), 508 }; 509 510 /* For RAPL MSRs */ 511 enum rapl_msrs { 512 RAPL_PKG_POWER_LIMIT = BIT(0), /* 0x610 MSR_PKG_POWER_LIMIT */ 513 RAPL_PKG_ENERGY_STATUS = BIT(1), /* 0x611 MSR_PKG_ENERGY_STATUS */ 514 RAPL_PKG_PERF_STATUS = BIT(2), /* 0x613 MSR_PKG_PERF_STATUS */ 515 RAPL_PKG_POWER_INFO = BIT(3), /* 0x614 MSR_PKG_POWER_INFO */ 516 RAPL_DRAM_POWER_LIMIT = BIT(4), /* 0x618 MSR_DRAM_POWER_LIMIT */ 517 RAPL_DRAM_ENERGY_STATUS = BIT(5), /* 0x619 MSR_DRAM_ENERGY_STATUS */ 518 RAPL_DRAM_PERF_STATUS = BIT(6), /* 0x61b MSR_DRAM_PERF_STATUS */ 519 RAPL_DRAM_POWER_INFO = BIT(7), /* 0x61c MSR_DRAM_POWER_INFO */ 520 RAPL_CORE_POWER_LIMIT = BIT(8), /* 0x638 MSR_PP0_POWER_LIMIT */ 521 RAPL_CORE_ENERGY_STATUS = BIT(9), /* 0x639 MSR_PP0_ENERGY_STATUS */ 522 RAPL_CORE_POLICY = BIT(10), /* 0x63a MSR_PP0_POLICY */ 523 RAPL_GFX_POWER_LIMIT = BIT(11), /* 0x640 MSR_PP1_POWER_LIMIT */ 524 RAPL_GFX_ENERGY_STATUS = BIT(12), /* 0x641 MSR_PP1_ENERGY_STATUS */ 525 RAPL_GFX_POLICY = BIT(13), /* 0x642 MSR_PP1_POLICY */ 526 RAPL_AMD_PWR_UNIT = BIT(14), /* 0xc0010299 MSR_AMD_RAPL_POWER_UNIT */ 527 RAPL_AMD_CORE_ENERGY_STAT = BIT(15), /* 0xc001029a MSR_AMD_CORE_ENERGY_STATUS */ 528 RAPL_AMD_PKG_ENERGY_STAT = BIT(16), /* 0xc001029b MSR_AMD_PKG_ENERGY_STATUS */ 529 RAPL_PLATFORM_ENERGY_LIMIT = BIT(17), /* 0x64c MSR_PLATFORM_ENERGY_LIMIT */ 530 RAPL_PLATFORM_ENERGY_STATUS = BIT(18), /* 0x64d MSR_PLATFORM_ENERGY_STATUS */ 531 }; 532 533 #define RAPL_PKG (RAPL_PKG_ENERGY_STATUS | RAPL_PKG_POWER_LIMIT) 534 #define RAPL_DRAM (RAPL_DRAM_ENERGY_STATUS | RAPL_DRAM_POWER_LIMIT) 535 #define RAPL_CORE (RAPL_CORE_ENERGY_STATUS | RAPL_CORE_POWER_LIMIT) 536 #define RAPL_GFX (RAPL_GFX_POWER_LIMIT | RAPL_GFX_ENERGY_STATUS) 537 #define RAPL_PSYS (RAPL_PLATFORM_ENERGY_STATUS | RAPL_PLATFORM_ENERGY_LIMIT) 538 539 #define RAPL_PKG_ALL (RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO) 540 #define RAPL_DRAM_ALL (RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_DRAM_POWER_INFO) 541 #define RAPL_CORE_ALL (RAPL_CORE | RAPL_CORE_POLICY) 542 #define RAPL_GFX_ALL (RAPL_GFX | RAPL_GFX_POLIGY) 543 544 #define RAPL_AMD_F17H (RAPL_AMD_PWR_UNIT | RAPL_AMD_CORE_ENERGY_STAT | RAPL_AMD_PKG_ENERGY_STAT) 545 546 /* For Cstates */ 547 enum cstates { 548 CC1 = BIT(0), 549 CC3 = BIT(1), 550 CC6 = BIT(2), 551 CC7 = BIT(3), 552 PC2 = BIT(4), 553 PC3 = BIT(5), 554 PC6 = BIT(6), 555 PC7 = BIT(7), 556 PC8 = BIT(8), 557 PC9 = BIT(9), 558 PC10 = BIT(10), 559 }; 560 561 static const struct platform_features nhm_features = { 562 .has_msr_misc_pwr_mgmt = 1, 563 .has_nhm_msrs = 1, 564 .bclk_freq = BCLK_133MHZ, 565 .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6, 566 .cst_limit = CST_LIMIT_NHM, 567 .trl_msrs = TRL_BASE, 568 }; 569 570 static const struct platform_features nhx_features = { 571 .has_msr_misc_pwr_mgmt = 1, 572 .has_nhm_msrs = 1, 573 .bclk_freq = BCLK_133MHZ, 574 .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6, 575 .cst_limit = CST_LIMIT_NHM, 576 }; 577 578 static const struct platform_features snb_features = { 579 .has_msr_misc_feature_control = 1, 580 .has_msr_misc_pwr_mgmt = 1, 581 .has_nhm_msrs = 1, 582 .bclk_freq = BCLK_100MHZ, 583 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 584 .cst_limit = CST_LIMIT_SNB, 585 .has_irtl_msrs = 1, 586 .trl_msrs = TRL_BASE, 587 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 588 }; 589 590 static const struct platform_features snx_features = { 591 .has_msr_misc_feature_control = 1, 592 .has_msr_misc_pwr_mgmt = 1, 593 .has_nhm_msrs = 1, 594 .bclk_freq = BCLK_100MHZ, 595 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 596 .cst_limit = CST_LIMIT_SNB, 597 .has_irtl_msrs = 1, 598 .trl_msrs = TRL_BASE, 599 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL, 600 }; 601 602 static const struct platform_features ivb_features = { 603 .has_msr_misc_feature_control = 1, 604 .has_msr_misc_pwr_mgmt = 1, 605 .has_nhm_msrs = 1, 606 .has_config_tdp = 1, 607 .bclk_freq = BCLK_100MHZ, 608 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 609 .cst_limit = CST_LIMIT_SNB, 610 .has_irtl_msrs = 1, 611 .trl_msrs = TRL_BASE, 612 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 613 }; 614 615 static const struct platform_features ivx_features = { 616 .has_msr_misc_feature_control = 1, 617 .has_msr_misc_pwr_mgmt = 1, 618 .has_nhm_msrs = 1, 619 .bclk_freq = BCLK_100MHZ, 620 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 621 .cst_limit = CST_LIMIT_SNB, 622 .has_irtl_msrs = 1, 623 .trl_msrs = TRL_BASE | TRL_LIMIT1, 624 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL, 625 }; 626 627 static const struct platform_features hsw_features = { 628 .has_msr_misc_feature_control = 1, 629 .has_msr_misc_pwr_mgmt = 1, 630 .has_nhm_msrs = 1, 631 .has_config_tdp = 1, 632 .bclk_freq = BCLK_100MHZ, 633 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 634 .cst_limit = CST_LIMIT_HSW, 635 .has_irtl_msrs = 1, 636 .trl_msrs = TRL_BASE, 637 .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, 638 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 639 }; 640 641 static const struct platform_features hsx_features = { 642 .has_msr_misc_feature_control = 1, 643 .has_msr_misc_pwr_mgmt = 1, 644 .has_nhm_msrs = 1, 645 .has_config_tdp = 1, 646 .bclk_freq = BCLK_100MHZ, 647 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 648 .cst_limit = CST_LIMIT_HSW, 649 .has_irtl_msrs = 1, 650 .trl_msrs = TRL_BASE | TRL_LIMIT1 | TRL_LIMIT2, 651 .plr_msrs = PLR_CORE | PLR_RING, 652 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 653 .has_fixed_rapl_unit = 1, 654 }; 655 656 static const struct platform_features hswl_features = { 657 .has_msr_misc_feature_control = 1, 658 .has_msr_misc_pwr_mgmt = 1, 659 .has_nhm_msrs = 1, 660 .has_config_tdp = 1, 661 .bclk_freq = BCLK_100MHZ, 662 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 663 .cst_limit = CST_LIMIT_HSW, 664 .has_irtl_msrs = 1, 665 .trl_msrs = TRL_BASE, 666 .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, 667 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 668 }; 669 670 static const struct platform_features hswg_features = { 671 .has_msr_misc_feature_control = 1, 672 .has_msr_misc_pwr_mgmt = 1, 673 .has_nhm_msrs = 1, 674 .has_config_tdp = 1, 675 .bclk_freq = BCLK_100MHZ, 676 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 677 .cst_limit = CST_LIMIT_HSW, 678 .has_irtl_msrs = 1, 679 .trl_msrs = TRL_BASE, 680 .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, 681 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 682 }; 683 684 static const struct platform_features bdw_features = { 685 .has_msr_misc_feature_control = 1, 686 .has_msr_misc_pwr_mgmt = 1, 687 .has_nhm_msrs = 1, 688 .has_config_tdp = 1, 689 .bclk_freq = BCLK_100MHZ, 690 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 691 .cst_limit = CST_LIMIT_HSW, 692 .has_irtl_msrs = 1, 693 .trl_msrs = TRL_BASE, 694 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 695 }; 696 697 static const struct platform_features bdwg_features = { 698 .has_msr_misc_feature_control = 1, 699 .has_msr_misc_pwr_mgmt = 1, 700 .has_nhm_msrs = 1, 701 .has_config_tdp = 1, 702 .bclk_freq = BCLK_100MHZ, 703 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 704 .cst_limit = CST_LIMIT_HSW, 705 .has_irtl_msrs = 1, 706 .trl_msrs = TRL_BASE, 707 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 708 }; 709 710 static const struct platform_features bdx_features = { 711 .has_msr_misc_feature_control = 1, 712 .has_msr_misc_pwr_mgmt = 1, 713 .has_nhm_msrs = 1, 714 .has_config_tdp = 1, 715 .bclk_freq = BCLK_100MHZ, 716 .supported_cstates = CC1 | CC3 | CC6 | PC2 | PC3 | PC6, 717 .cst_limit = CST_LIMIT_HSW, 718 .has_irtl_msrs = 1, 719 .has_cst_auto_convension = 1, 720 .trl_msrs = TRL_BASE, 721 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 722 .has_fixed_rapl_unit = 1, 723 }; 724 725 static const struct platform_features skl_features = { 726 .has_msr_misc_feature_control = 1, 727 .has_msr_misc_pwr_mgmt = 1, 728 .has_nhm_msrs = 1, 729 .has_config_tdp = 1, 730 .bclk_freq = BCLK_100MHZ, 731 .crystal_freq = 24000000, 732 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 733 .cst_limit = CST_LIMIT_HSW, 734 .has_irtl_msrs = 1, 735 .has_ext_cst_msrs = 1, 736 .trl_msrs = TRL_BASE, 737 .tcc_offset_bits = 6, 738 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS, 739 .enable_tsc_tweak = 1, 740 }; 741 742 static const struct platform_features cnl_features = { 743 .has_msr_misc_feature_control = 1, 744 .has_msr_misc_pwr_mgmt = 1, 745 .has_nhm_msrs = 1, 746 .has_config_tdp = 1, 747 .bclk_freq = BCLK_100MHZ, 748 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 749 .cst_limit = CST_LIMIT_HSW, 750 .has_irtl_msrs = 1, 751 .has_msr_core_c1_res = 1, 752 .has_ext_cst_msrs = 1, 753 .trl_msrs = TRL_BASE, 754 .tcc_offset_bits = 6, 755 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS, 756 .enable_tsc_tweak = 1, 757 }; 758 759 /* Copied from cnl_features, with PC7/PC9 removed */ 760 static const struct platform_features adl_features = { 761 .has_msr_misc_feature_control = cnl_features.has_msr_misc_feature_control, 762 .has_msr_misc_pwr_mgmt = cnl_features.has_msr_misc_pwr_mgmt, 763 .has_nhm_msrs = cnl_features.has_nhm_msrs, 764 .has_config_tdp = cnl_features.has_config_tdp, 765 .bclk_freq = cnl_features.bclk_freq, 766 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC8 | PC10, 767 .cst_limit = cnl_features.cst_limit, 768 .has_irtl_msrs = cnl_features.has_irtl_msrs, 769 .has_msr_core_c1_res = cnl_features.has_msr_core_c1_res, 770 .has_ext_cst_msrs = cnl_features.has_ext_cst_msrs, 771 .trl_msrs = cnl_features.trl_msrs, 772 .tcc_offset_bits = cnl_features.tcc_offset_bits, 773 .rapl_msrs = cnl_features.rapl_msrs, 774 .enable_tsc_tweak = cnl_features.enable_tsc_tweak, 775 }; 776 777 /* Copied from adl_features, with PC3/PC8 removed */ 778 static const struct platform_features lnl_features = { 779 .has_msr_misc_feature_control = adl_features.has_msr_misc_feature_control, 780 .has_msr_misc_pwr_mgmt = adl_features.has_msr_misc_pwr_mgmt, 781 .has_nhm_msrs = adl_features.has_nhm_msrs, 782 .has_config_tdp = adl_features.has_config_tdp, 783 .bclk_freq = adl_features.bclk_freq, 784 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC6 | PC10, 785 .cst_limit = adl_features.cst_limit, 786 .has_irtl_msrs = adl_features.has_irtl_msrs, 787 .has_msr_core_c1_res = adl_features.has_msr_core_c1_res, 788 .has_ext_cst_msrs = adl_features.has_ext_cst_msrs, 789 .trl_msrs = adl_features.trl_msrs, 790 .tcc_offset_bits = adl_features.tcc_offset_bits, 791 .rapl_msrs = adl_features.rapl_msrs, 792 .enable_tsc_tweak = adl_features.enable_tsc_tweak, 793 }; 794 795 static const struct platform_features skx_features = { 796 .has_msr_misc_feature_control = 1, 797 .has_msr_misc_pwr_mgmt = 1, 798 .has_nhm_msrs = 1, 799 .has_config_tdp = 1, 800 .bclk_freq = BCLK_100MHZ, 801 .supported_cstates = CC1 | CC6 | PC2 | PC6, 802 .cst_limit = CST_LIMIT_SKX, 803 .has_irtl_msrs = 1, 804 .has_cst_auto_convension = 1, 805 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 806 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 807 .has_fixed_rapl_unit = 1, 808 }; 809 810 static const struct platform_features icx_features = { 811 .has_msr_misc_feature_control = 1, 812 .has_msr_misc_pwr_mgmt = 1, 813 .has_nhm_msrs = 1, 814 .has_config_tdp = 1, 815 .bclk_freq = BCLK_100MHZ, 816 .supported_cstates = CC1 | CC6 | PC2 | PC6, 817 .cst_limit = CST_LIMIT_ICX, 818 .has_msr_core_c1_res = 1, 819 .has_irtl_msrs = 1, 820 .has_cst_prewake_bit = 1, 821 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 822 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, 823 .has_fixed_rapl_unit = 1, 824 }; 825 826 static const struct platform_features spr_features = { 827 .has_msr_misc_feature_control = 1, 828 .has_msr_misc_pwr_mgmt = 1, 829 .has_nhm_msrs = 1, 830 .has_config_tdp = 1, 831 .bclk_freq = BCLK_100MHZ, 832 .supported_cstates = CC1 | CC6 | PC2 | PC6, 833 .cst_limit = CST_LIMIT_SKX, 834 .has_msr_core_c1_res = 1, 835 .has_irtl_msrs = 1, 836 .has_cst_prewake_bit = 1, 837 .has_fixed_rapl_psys_unit = 1, 838 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 839 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, 840 }; 841 842 static const struct platform_features srf_features = { 843 .has_msr_misc_feature_control = 1, 844 .has_msr_misc_pwr_mgmt = 1, 845 .has_nhm_msrs = 1, 846 .has_config_tdp = 1, 847 .bclk_freq = BCLK_100MHZ, 848 .supported_cstates = CC1 | CC6 | PC2 | PC6, 849 .cst_limit = CST_LIMIT_SKX, 850 .has_msr_core_c1_res = 1, 851 .has_msr_module_c6_res_ms = 1, 852 .has_irtl_msrs = 1, 853 .has_cst_prewake_bit = 1, 854 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 855 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, 856 }; 857 858 static const struct platform_features grr_features = { 859 .has_msr_misc_feature_control = 1, 860 .has_msr_misc_pwr_mgmt = 1, 861 .has_nhm_msrs = 1, 862 .has_config_tdp = 1, 863 .bclk_freq = BCLK_100MHZ, 864 .supported_cstates = CC1 | CC6, 865 .cst_limit = CST_LIMIT_SKX, 866 .has_msr_core_c1_res = 1, 867 .has_msr_module_c6_res_ms = 1, 868 .has_irtl_msrs = 1, 869 .has_cst_prewake_bit = 1, 870 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 871 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, 872 }; 873 874 static const struct platform_features slv_features = { 875 .has_nhm_msrs = 1, 876 .bclk_freq = BCLK_SLV, 877 .supported_cstates = CC1 | CC6 | PC6, 878 .cst_limit = CST_LIMIT_SLV, 879 .has_msr_core_c1_res = 1, 880 .has_msr_module_c6_res_ms = 1, 881 .has_msr_c6_demotion_policy_config = 1, 882 .has_msr_atom_pkg_c6_residency = 1, 883 .trl_msrs = TRL_ATOM, 884 .rapl_msrs = RAPL_PKG | RAPL_CORE, 885 .has_rapl_divisor = 1, 886 .rapl_quirk_tdp = 30, 887 }; 888 889 static const struct platform_features slvd_features = { 890 .has_msr_misc_pwr_mgmt = 1, 891 .has_nhm_msrs = 1, 892 .bclk_freq = BCLK_SLV, 893 .supported_cstates = CC1 | CC6 | PC3 | PC6, 894 .cst_limit = CST_LIMIT_SLV, 895 .has_msr_atom_pkg_c6_residency = 1, 896 .trl_msrs = TRL_BASE, 897 .rapl_msrs = RAPL_PKG | RAPL_CORE, 898 .rapl_quirk_tdp = 30, 899 }; 900 901 static const struct platform_features amt_features = { 902 .has_nhm_msrs = 1, 903 .bclk_freq = BCLK_133MHZ, 904 .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6, 905 .cst_limit = CST_LIMIT_AMT, 906 .trl_msrs = TRL_BASE, 907 }; 908 909 static const struct platform_features gmt_features = { 910 .has_msr_misc_pwr_mgmt = 1, 911 .has_nhm_msrs = 1, 912 .bclk_freq = BCLK_100MHZ, 913 .crystal_freq = 19200000, 914 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 915 .cst_limit = CST_LIMIT_GMT, 916 .has_irtl_msrs = 1, 917 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 918 .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO, 919 }; 920 921 static const struct platform_features gmtd_features = { 922 .has_msr_misc_pwr_mgmt = 1, 923 .has_nhm_msrs = 1, 924 .bclk_freq = BCLK_100MHZ, 925 .crystal_freq = 25000000, 926 .supported_cstates = CC1 | CC6 | PC2 | PC6, 927 .cst_limit = CST_LIMIT_GMT, 928 .has_irtl_msrs = 1, 929 .has_msr_core_c1_res = 1, 930 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 931 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_CORE_ENERGY_STATUS, 932 }; 933 934 static const struct platform_features gmtp_features = { 935 .has_msr_misc_pwr_mgmt = 1, 936 .has_nhm_msrs = 1, 937 .bclk_freq = BCLK_100MHZ, 938 .crystal_freq = 19200000, 939 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 940 .cst_limit = CST_LIMIT_GMT, 941 .has_irtl_msrs = 1, 942 .trl_msrs = TRL_BASE, 943 .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO, 944 }; 945 946 static const struct platform_features tmt_features = { 947 .has_msr_misc_pwr_mgmt = 1, 948 .has_nhm_msrs = 1, 949 .bclk_freq = BCLK_100MHZ, 950 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 951 .cst_limit = CST_LIMIT_GMT, 952 .has_irtl_msrs = 1, 953 .trl_msrs = TRL_BASE, 954 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, 955 .enable_tsc_tweak = 1, 956 }; 957 958 static const struct platform_features tmtd_features = { 959 .has_msr_misc_pwr_mgmt = 1, 960 .has_nhm_msrs = 1, 961 .bclk_freq = BCLK_100MHZ, 962 .supported_cstates = CC1 | CC6, 963 .cst_limit = CST_LIMIT_GMT, 964 .has_irtl_msrs = 1, 965 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 966 .rapl_msrs = RAPL_PKG_ALL, 967 }; 968 969 static const struct platform_features knl_features = { 970 .has_msr_misc_pwr_mgmt = 1, 971 .has_nhm_msrs = 1, 972 .has_config_tdp = 1, 973 .bclk_freq = BCLK_100MHZ, 974 .supported_cstates = CC1 | CC6 | PC3 | PC6, 975 .cst_limit = CST_LIMIT_KNL, 976 .has_msr_knl_core_c6_residency = 1, 977 .trl_msrs = TRL_KNL, 978 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 979 .has_fixed_rapl_unit = 1, 980 .need_perf_multiplier = 1, 981 }; 982 983 static const struct platform_features default_features = { 984 }; 985 986 static const struct platform_features amd_features_with_rapl = { 987 .rapl_msrs = RAPL_AMD_F17H, 988 .has_per_core_rapl = 1, 989 .rapl_quirk_tdp = 280, /* This is the max stock TDP of HEDT/Server Fam17h+ chips */ 990 }; 991 992 static const struct platform_data turbostat_pdata[] = { 993 { INTEL_NEHALEM, &nhm_features }, 994 { INTEL_NEHALEM_G, &nhm_features }, 995 { INTEL_NEHALEM_EP, &nhm_features }, 996 { INTEL_NEHALEM_EX, &nhx_features }, 997 { INTEL_WESTMERE, &nhm_features }, 998 { INTEL_WESTMERE_EP, &nhm_features }, 999 { INTEL_WESTMERE_EX, &nhx_features }, 1000 { INTEL_SANDYBRIDGE, &snb_features }, 1001 { INTEL_SANDYBRIDGE_X, &snx_features }, 1002 { INTEL_IVYBRIDGE, &ivb_features }, 1003 { INTEL_IVYBRIDGE_X, &ivx_features }, 1004 { INTEL_HASWELL, &hsw_features }, 1005 { INTEL_HASWELL_X, &hsx_features }, 1006 { INTEL_HASWELL_L, &hswl_features }, 1007 { INTEL_HASWELL_G, &hswg_features }, 1008 { INTEL_BROADWELL, &bdw_features }, 1009 { INTEL_BROADWELL_G, &bdwg_features }, 1010 { INTEL_BROADWELL_X, &bdx_features }, 1011 { INTEL_BROADWELL_D, &bdx_features }, 1012 { INTEL_SKYLAKE_L, &skl_features }, 1013 { INTEL_SKYLAKE, &skl_features }, 1014 { INTEL_SKYLAKE_X, &skx_features }, 1015 { INTEL_KABYLAKE_L, &skl_features }, 1016 { INTEL_KABYLAKE, &skl_features }, 1017 { INTEL_COMETLAKE, &skl_features }, 1018 { INTEL_COMETLAKE_L, &skl_features }, 1019 { INTEL_CANNONLAKE_L, &cnl_features }, 1020 { INTEL_ICELAKE_X, &icx_features }, 1021 { INTEL_ICELAKE_D, &icx_features }, 1022 { INTEL_ICELAKE_L, &cnl_features }, 1023 { INTEL_ICELAKE_NNPI, &cnl_features }, 1024 { INTEL_ROCKETLAKE, &cnl_features }, 1025 { INTEL_TIGERLAKE_L, &cnl_features }, 1026 { INTEL_TIGERLAKE, &cnl_features }, 1027 { INTEL_SAPPHIRERAPIDS_X, &spr_features }, 1028 { INTEL_EMERALDRAPIDS_X, &spr_features }, 1029 { INTEL_GRANITERAPIDS_X, &spr_features }, 1030 { INTEL_GRANITERAPIDS_D, &spr_features }, 1031 { INTEL_LAKEFIELD, &cnl_features }, 1032 { INTEL_ALDERLAKE, &adl_features }, 1033 { INTEL_ALDERLAKE_L, &adl_features }, 1034 { INTEL_RAPTORLAKE, &adl_features }, 1035 { INTEL_RAPTORLAKE_P, &adl_features }, 1036 { INTEL_RAPTORLAKE_S, &adl_features }, 1037 { INTEL_METEORLAKE, &adl_features }, 1038 { INTEL_METEORLAKE_L, &adl_features }, 1039 { INTEL_ARROWLAKE_H, &adl_features }, 1040 { INTEL_ARROWLAKE_U, &adl_features }, 1041 { INTEL_ARROWLAKE, &adl_features }, 1042 { INTEL_LUNARLAKE_M, &lnl_features }, 1043 { INTEL_PANTHERLAKE_L, &lnl_features }, 1044 { INTEL_ATOM_SILVERMONT, &slv_features }, 1045 { INTEL_ATOM_SILVERMONT_D, &slvd_features }, 1046 { INTEL_ATOM_AIRMONT, &amt_features }, 1047 { INTEL_ATOM_GOLDMONT, &gmt_features }, 1048 { INTEL_ATOM_GOLDMONT_D, &gmtd_features }, 1049 { INTEL_ATOM_GOLDMONT_PLUS, &gmtp_features }, 1050 { INTEL_ATOM_TREMONT_D, &tmtd_features }, 1051 { INTEL_ATOM_TREMONT, &tmt_features }, 1052 { INTEL_ATOM_TREMONT_L, &tmt_features }, 1053 { INTEL_ATOM_GRACEMONT, &adl_features }, 1054 { INTEL_ATOM_CRESTMONT_X, &srf_features }, 1055 { INTEL_ATOM_CRESTMONT, &grr_features }, 1056 { INTEL_ATOM_DARKMONT_X, &srf_features }, 1057 { INTEL_XEON_PHI_KNL, &knl_features }, 1058 { INTEL_XEON_PHI_KNM, &knl_features }, 1059 /* 1060 * Missing support for 1061 * INTEL_ICELAKE 1062 * INTEL_ATOM_SILVERMONT_MID 1063 * INTEL_ATOM_SILVERMONT_MID2 1064 * INTEL_ATOM_AIRMONT_NP 1065 */ 1066 { 0, NULL }, 1067 }; 1068 1069 static const struct platform_features *platform; 1070 1071 void probe_platform_features(unsigned int family, unsigned int model) 1072 { 1073 int i; 1074 1075 1076 if (authentic_amd || hygon_genuine) { 1077 /* fallback to default features on unsupported models */ 1078 force_load++; 1079 if (max_extended_level >= 0x80000007) { 1080 unsigned int eax, ebx, ecx, edx; 1081 1082 __cpuid(0x80000007, eax, ebx, ecx, edx); 1083 /* RAPL (Fam 17h+) */ 1084 if ((edx & (1 << 14)) && family >= 0x17) 1085 platform = &amd_features_with_rapl; 1086 } 1087 goto end; 1088 } 1089 1090 if (!genuine_intel) 1091 goto end; 1092 1093 for (i = 0; turbostat_pdata[i].features; i++) { 1094 if (VFM_FAMILY(turbostat_pdata[i].vfm) == family && VFM_MODEL(turbostat_pdata[i].vfm) == model) { 1095 platform = turbostat_pdata[i].features; 1096 return; 1097 } 1098 } 1099 1100 end: 1101 if (force_load && !platform) { 1102 fprintf(outf, "Forced to run on unsupported platform!\n"); 1103 platform = &default_features; 1104 } 1105 1106 if (platform) 1107 return; 1108 1109 fprintf(stderr, "Unsupported platform detected.\n" 1110 "\tSee RUN THE LATEST VERSION on turbostat(8)\n"); 1111 exit(1); 1112 } 1113 1114 /* Model specific support End */ 1115 1116 #define TJMAX_DEFAULT 100 1117 1118 /* MSRs that are not yet in the kernel-provided header. */ 1119 #define MSR_RAPL_PWR_UNIT 0xc0010299 1120 #define MSR_CORE_ENERGY_STAT 0xc001029a 1121 #define MSR_PKG_ENERGY_STAT 0xc001029b 1122 1123 #define MAX(a, b) ((a) > (b) ? (a) : (b)) 1124 1125 int backwards_count; 1126 char *progname; 1127 1128 #define CPU_SUBSET_MAXCPUS 8192 /* need to use before probe... */ 1129 cpu_set_t *cpu_present_set, *cpu_possible_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset; 1130 size_t cpu_present_setsize, cpu_possible_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size; 1131 #define MAX_ADDED_THREAD_COUNTERS 24 1132 #define MAX_ADDED_CORE_COUNTERS 8 1133 #define MAX_ADDED_PACKAGE_COUNTERS 16 1134 #define PMT_MAX_ADDED_THREAD_COUNTERS 24 1135 #define PMT_MAX_ADDED_CORE_COUNTERS 8 1136 #define PMT_MAX_ADDED_PACKAGE_COUNTERS 16 1137 #define BITMASK_SIZE 32 1138 1139 #define ZERO_ARRAY(arr) (memset(arr, 0, sizeof(arr)) + __must_be_array(arr)) 1140 1141 /* Indexes used to map data read from perf and MSRs into global variables */ 1142 enum rapl_rci_index { 1143 RAPL_RCI_INDEX_ENERGY_PKG = 0, 1144 RAPL_RCI_INDEX_ENERGY_CORES = 1, 1145 RAPL_RCI_INDEX_DRAM = 2, 1146 RAPL_RCI_INDEX_GFX = 3, 1147 RAPL_RCI_INDEX_PKG_PERF_STATUS = 4, 1148 RAPL_RCI_INDEX_DRAM_PERF_STATUS = 5, 1149 RAPL_RCI_INDEX_CORE_ENERGY = 6, 1150 RAPL_RCI_INDEX_ENERGY_PLATFORM = 7, 1151 NUM_RAPL_COUNTERS, 1152 }; 1153 1154 enum rapl_unit { 1155 RAPL_UNIT_INVALID, 1156 RAPL_UNIT_JOULES, 1157 RAPL_UNIT_WATTS, 1158 }; 1159 1160 struct rapl_counter_info_t { 1161 unsigned long long data[NUM_RAPL_COUNTERS]; 1162 enum counter_source source[NUM_RAPL_COUNTERS]; 1163 unsigned long long flags[NUM_RAPL_COUNTERS]; 1164 double scale[NUM_RAPL_COUNTERS]; 1165 enum rapl_unit unit[NUM_RAPL_COUNTERS]; 1166 unsigned long long msr[NUM_RAPL_COUNTERS]; 1167 unsigned long long msr_mask[NUM_RAPL_COUNTERS]; 1168 int msr_shift[NUM_RAPL_COUNTERS]; 1169 1170 int fd_perf; 1171 }; 1172 1173 /* struct rapl_counter_info_t for each RAPL domain */ 1174 struct rapl_counter_info_t *rapl_counter_info_perdomain; 1175 unsigned int rapl_counter_info_perdomain_size; 1176 1177 #define RAPL_COUNTER_FLAG_PLATFORM_COUNTER (1u << 0) 1178 #define RAPL_COUNTER_FLAG_USE_MSR_SUM (1u << 1) 1179 1180 struct rapl_counter_arch_info { 1181 int feature_mask; /* Mask for testing if the counter is supported on host */ 1182 const char *perf_subsys; 1183 const char *perf_name; 1184 unsigned long long msr; 1185 unsigned long long msr_mask; 1186 int msr_shift; /* Positive mean shift right, negative mean shift left */ 1187 double *platform_rapl_msr_scale; /* Scale applied to values read by MSR (platform dependent, filled at runtime) */ 1188 unsigned int rci_index; /* Maps data from perf counters to global variables */ 1189 unsigned long long bic; 1190 double compat_scale; /* Some counters require constant scaling to be in the same range as other, similar ones */ 1191 unsigned long long flags; 1192 }; 1193 1194 static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { 1195 { 1196 .feature_mask = RAPL_PKG, 1197 .perf_subsys = "power", 1198 .perf_name = "energy-pkg", 1199 .msr = MSR_PKG_ENERGY_STATUS, 1200 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1201 .msr_shift = 0, 1202 .platform_rapl_msr_scale = &rapl_energy_units, 1203 .rci_index = RAPL_RCI_INDEX_ENERGY_PKG, 1204 .bic = BIC_PkgWatt | BIC_Pkg_J, 1205 .compat_scale = 1.0, 1206 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1207 }, 1208 { 1209 .feature_mask = RAPL_AMD_F17H, 1210 .perf_subsys = "power", 1211 .perf_name = "energy-pkg", 1212 .msr = MSR_PKG_ENERGY_STAT, 1213 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1214 .msr_shift = 0, 1215 .platform_rapl_msr_scale = &rapl_energy_units, 1216 .rci_index = RAPL_RCI_INDEX_ENERGY_PKG, 1217 .bic = BIC_PkgWatt | BIC_Pkg_J, 1218 .compat_scale = 1.0, 1219 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1220 }, 1221 { 1222 .feature_mask = RAPL_CORE_ENERGY_STATUS, 1223 .perf_subsys = "power", 1224 .perf_name = "energy-cores", 1225 .msr = MSR_PP0_ENERGY_STATUS, 1226 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1227 .msr_shift = 0, 1228 .platform_rapl_msr_scale = &rapl_energy_units, 1229 .rci_index = RAPL_RCI_INDEX_ENERGY_CORES, 1230 .bic = BIC_CorWatt | BIC_Cor_J, 1231 .compat_scale = 1.0, 1232 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1233 }, 1234 { 1235 .feature_mask = RAPL_DRAM, 1236 .perf_subsys = "power", 1237 .perf_name = "energy-ram", 1238 .msr = MSR_DRAM_ENERGY_STATUS, 1239 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1240 .msr_shift = 0, 1241 .platform_rapl_msr_scale = &rapl_dram_energy_units, 1242 .rci_index = RAPL_RCI_INDEX_DRAM, 1243 .bic = BIC_RAMWatt | BIC_RAM_J, 1244 .compat_scale = 1.0, 1245 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1246 }, 1247 { 1248 .feature_mask = RAPL_GFX, 1249 .perf_subsys = "power", 1250 .perf_name = "energy-gpu", 1251 .msr = MSR_PP1_ENERGY_STATUS, 1252 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1253 .msr_shift = 0, 1254 .platform_rapl_msr_scale = &rapl_energy_units, 1255 .rci_index = RAPL_RCI_INDEX_GFX, 1256 .bic = BIC_GFXWatt | BIC_GFX_J, 1257 .compat_scale = 1.0, 1258 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1259 }, 1260 { 1261 .feature_mask = RAPL_PKG_PERF_STATUS, 1262 .perf_subsys = NULL, 1263 .perf_name = NULL, 1264 .msr = MSR_PKG_PERF_STATUS, 1265 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1266 .msr_shift = 0, 1267 .platform_rapl_msr_scale = &rapl_time_units, 1268 .rci_index = RAPL_RCI_INDEX_PKG_PERF_STATUS, 1269 .bic = BIC_PKG__, 1270 .compat_scale = 100.0, 1271 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1272 }, 1273 { 1274 .feature_mask = RAPL_DRAM_PERF_STATUS, 1275 .perf_subsys = NULL, 1276 .perf_name = NULL, 1277 .msr = MSR_DRAM_PERF_STATUS, 1278 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1279 .msr_shift = 0, 1280 .platform_rapl_msr_scale = &rapl_time_units, 1281 .rci_index = RAPL_RCI_INDEX_DRAM_PERF_STATUS, 1282 .bic = BIC_RAM__, 1283 .compat_scale = 100.0, 1284 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1285 }, 1286 { 1287 .feature_mask = RAPL_AMD_F17H, 1288 .perf_subsys = NULL, 1289 .perf_name = NULL, 1290 .msr = MSR_CORE_ENERGY_STAT, 1291 .msr_mask = 0xFFFFFFFF, 1292 .msr_shift = 0, 1293 .platform_rapl_msr_scale = &rapl_energy_units, 1294 .rci_index = RAPL_RCI_INDEX_CORE_ENERGY, 1295 .bic = BIC_CorWatt | BIC_Cor_J, 1296 .compat_scale = 1.0, 1297 .flags = 0, 1298 }, 1299 { 1300 .feature_mask = RAPL_PSYS, 1301 .perf_subsys = "power", 1302 .perf_name = "energy-psys", 1303 .msr = MSR_PLATFORM_ENERGY_STATUS, 1304 .msr_mask = 0x00000000FFFFFFFF, 1305 .msr_shift = 0, 1306 .platform_rapl_msr_scale = &rapl_psys_energy_units, 1307 .rci_index = RAPL_RCI_INDEX_ENERGY_PLATFORM, 1308 .bic = BIC_SysWatt | BIC_Sys_J, 1309 .compat_scale = 1.0, 1310 .flags = RAPL_COUNTER_FLAG_PLATFORM_COUNTER | RAPL_COUNTER_FLAG_USE_MSR_SUM, 1311 }, 1312 }; 1313 1314 struct rapl_counter { 1315 unsigned long long raw_value; 1316 enum rapl_unit unit; 1317 double scale; 1318 }; 1319 1320 /* Indexes used to map data read from perf and MSRs into global variables */ 1321 enum ccstate_rci_index { 1322 CCSTATE_RCI_INDEX_C1_RESIDENCY = 0, 1323 CCSTATE_RCI_INDEX_C3_RESIDENCY = 1, 1324 CCSTATE_RCI_INDEX_C6_RESIDENCY = 2, 1325 CCSTATE_RCI_INDEX_C7_RESIDENCY = 3, 1326 PCSTATE_RCI_INDEX_C2_RESIDENCY = 4, 1327 PCSTATE_RCI_INDEX_C3_RESIDENCY = 5, 1328 PCSTATE_RCI_INDEX_C6_RESIDENCY = 6, 1329 PCSTATE_RCI_INDEX_C7_RESIDENCY = 7, 1330 PCSTATE_RCI_INDEX_C8_RESIDENCY = 8, 1331 PCSTATE_RCI_INDEX_C9_RESIDENCY = 9, 1332 PCSTATE_RCI_INDEX_C10_RESIDENCY = 10, 1333 NUM_CSTATE_COUNTERS, 1334 }; 1335 1336 struct cstate_counter_info_t { 1337 unsigned long long data[NUM_CSTATE_COUNTERS]; 1338 enum counter_source source[NUM_CSTATE_COUNTERS]; 1339 unsigned long long msr[NUM_CSTATE_COUNTERS]; 1340 int fd_perf_core; 1341 int fd_perf_pkg; 1342 }; 1343 1344 struct cstate_counter_info_t *ccstate_counter_info; 1345 unsigned int ccstate_counter_info_size; 1346 1347 #define CSTATE_COUNTER_FLAG_COLLECT_PER_CORE (1u << 0) 1348 #define CSTATE_COUNTER_FLAG_COLLECT_PER_THREAD ((1u << 1) | CSTATE_COUNTER_FLAG_COLLECT_PER_CORE) 1349 #define CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY (1u << 2) 1350 1351 struct cstate_counter_arch_info { 1352 int feature_mask; /* Mask for testing if the counter is supported on host */ 1353 const char *perf_subsys; 1354 const char *perf_name; 1355 unsigned long long msr; 1356 unsigned int rci_index; /* Maps data from perf counters to global variables */ 1357 unsigned long long bic; 1358 unsigned long long flags; 1359 int pkg_cstate_limit; 1360 }; 1361 1362 static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { 1363 { 1364 .feature_mask = CC1, 1365 .perf_subsys = "cstate_core", 1366 .perf_name = "c1-residency", 1367 .msr = MSR_CORE_C1_RES, 1368 .rci_index = CCSTATE_RCI_INDEX_C1_RESIDENCY, 1369 .bic = BIC_CPU_c1, 1370 .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_THREAD, 1371 .pkg_cstate_limit = 0, 1372 }, 1373 { 1374 .feature_mask = CC3, 1375 .perf_subsys = "cstate_core", 1376 .perf_name = "c3-residency", 1377 .msr = MSR_CORE_C3_RESIDENCY, 1378 .rci_index = CCSTATE_RCI_INDEX_C3_RESIDENCY, 1379 .bic = BIC_CPU_c3, 1380 .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY, 1381 .pkg_cstate_limit = 0, 1382 }, 1383 { 1384 .feature_mask = CC6, 1385 .perf_subsys = "cstate_core", 1386 .perf_name = "c6-residency", 1387 .msr = MSR_CORE_C6_RESIDENCY, 1388 .rci_index = CCSTATE_RCI_INDEX_C6_RESIDENCY, 1389 .bic = BIC_CPU_c6, 1390 .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY, 1391 .pkg_cstate_limit = 0, 1392 }, 1393 { 1394 .feature_mask = CC7, 1395 .perf_subsys = "cstate_core", 1396 .perf_name = "c7-residency", 1397 .msr = MSR_CORE_C7_RESIDENCY, 1398 .rci_index = CCSTATE_RCI_INDEX_C7_RESIDENCY, 1399 .bic = BIC_CPU_c7, 1400 .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY, 1401 .pkg_cstate_limit = 0, 1402 }, 1403 { 1404 .feature_mask = PC2, 1405 .perf_subsys = "cstate_pkg", 1406 .perf_name = "c2-residency", 1407 .msr = MSR_PKG_C2_RESIDENCY, 1408 .rci_index = PCSTATE_RCI_INDEX_C2_RESIDENCY, 1409 .bic = BIC_Pkgpc2, 1410 .flags = 0, 1411 .pkg_cstate_limit = PCL__2, 1412 }, 1413 { 1414 .feature_mask = PC3, 1415 .perf_subsys = "cstate_pkg", 1416 .perf_name = "c3-residency", 1417 .msr = MSR_PKG_C3_RESIDENCY, 1418 .rci_index = PCSTATE_RCI_INDEX_C3_RESIDENCY, 1419 .bic = BIC_Pkgpc3, 1420 .flags = 0, 1421 .pkg_cstate_limit = PCL__3, 1422 }, 1423 { 1424 .feature_mask = PC6, 1425 .perf_subsys = "cstate_pkg", 1426 .perf_name = "c6-residency", 1427 .msr = MSR_PKG_C6_RESIDENCY, 1428 .rci_index = PCSTATE_RCI_INDEX_C6_RESIDENCY, 1429 .bic = BIC_Pkgpc6, 1430 .flags = 0, 1431 .pkg_cstate_limit = PCL__6, 1432 }, 1433 { 1434 .feature_mask = PC7, 1435 .perf_subsys = "cstate_pkg", 1436 .perf_name = "c7-residency", 1437 .msr = MSR_PKG_C7_RESIDENCY, 1438 .rci_index = PCSTATE_RCI_INDEX_C7_RESIDENCY, 1439 .bic = BIC_Pkgpc7, 1440 .flags = 0, 1441 .pkg_cstate_limit = PCL__7, 1442 }, 1443 { 1444 .feature_mask = PC8, 1445 .perf_subsys = "cstate_pkg", 1446 .perf_name = "c8-residency", 1447 .msr = MSR_PKG_C8_RESIDENCY, 1448 .rci_index = PCSTATE_RCI_INDEX_C8_RESIDENCY, 1449 .bic = BIC_Pkgpc8, 1450 .flags = 0, 1451 .pkg_cstate_limit = PCL__8, 1452 }, 1453 { 1454 .feature_mask = PC9, 1455 .perf_subsys = "cstate_pkg", 1456 .perf_name = "c9-residency", 1457 .msr = MSR_PKG_C9_RESIDENCY, 1458 .rci_index = PCSTATE_RCI_INDEX_C9_RESIDENCY, 1459 .bic = BIC_Pkgpc9, 1460 .flags = 0, 1461 .pkg_cstate_limit = PCL__9, 1462 }, 1463 { 1464 .feature_mask = PC10, 1465 .perf_subsys = "cstate_pkg", 1466 .perf_name = "c10-residency", 1467 .msr = MSR_PKG_C10_RESIDENCY, 1468 .rci_index = PCSTATE_RCI_INDEX_C10_RESIDENCY, 1469 .bic = BIC_Pkgpc10, 1470 .flags = 0, 1471 .pkg_cstate_limit = PCL_10, 1472 }, 1473 }; 1474 1475 /* Indexes used to map data read from perf and MSRs into global variables */ 1476 enum msr_rci_index { 1477 MSR_RCI_INDEX_APERF = 0, 1478 MSR_RCI_INDEX_MPERF = 1, 1479 MSR_RCI_INDEX_SMI = 2, 1480 NUM_MSR_COUNTERS, 1481 }; 1482 1483 struct msr_counter_info_t { 1484 unsigned long long data[NUM_MSR_COUNTERS]; 1485 enum counter_source source[NUM_MSR_COUNTERS]; 1486 unsigned long long msr[NUM_MSR_COUNTERS]; 1487 unsigned long long msr_mask[NUM_MSR_COUNTERS]; 1488 int fd_perf; 1489 }; 1490 1491 struct msr_counter_info_t *msr_counter_info; 1492 unsigned int msr_counter_info_size; 1493 1494 struct msr_counter_arch_info { 1495 const char *perf_subsys; 1496 const char *perf_name; 1497 unsigned long long msr; 1498 unsigned long long msr_mask; 1499 unsigned int rci_index; /* Maps data from perf counters to global variables */ 1500 bool needed; 1501 bool present; 1502 }; 1503 1504 enum msr_arch_info_index { 1505 MSR_ARCH_INFO_APERF_INDEX = 0, 1506 MSR_ARCH_INFO_MPERF_INDEX = 1, 1507 MSR_ARCH_INFO_SMI_INDEX = 2, 1508 }; 1509 1510 static struct msr_counter_arch_info msr_counter_arch_infos[] = { 1511 [MSR_ARCH_INFO_APERF_INDEX] = { 1512 .perf_subsys = "msr", 1513 .perf_name = "aperf", 1514 .msr = MSR_IA32_APERF, 1515 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1516 .rci_index = MSR_RCI_INDEX_APERF, 1517 }, 1518 1519 [MSR_ARCH_INFO_MPERF_INDEX] = { 1520 .perf_subsys = "msr", 1521 .perf_name = "mperf", 1522 .msr = MSR_IA32_MPERF, 1523 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1524 .rci_index = MSR_RCI_INDEX_MPERF, 1525 }, 1526 1527 [MSR_ARCH_INFO_SMI_INDEX] = { 1528 .perf_subsys = "msr", 1529 .perf_name = "smi", 1530 .msr = MSR_SMI_COUNT, 1531 .msr_mask = 0xFFFFFFFF, 1532 .rci_index = MSR_RCI_INDEX_SMI, 1533 }, 1534 }; 1535 1536 /* Can be redefined when compiling, useful for testing. */ 1537 #ifndef SYSFS_TELEM_PATH 1538 #define SYSFS_TELEM_PATH "/sys/class/intel_pmt" 1539 #endif 1540 1541 #define PMT_COUNTER_MTL_DC6_OFFSET 120 1542 #define PMT_COUNTER_MTL_DC6_LSB 0 1543 #define PMT_COUNTER_MTL_DC6_MSB 63 1544 #define PMT_MTL_DC6_GUID 0x1a067102 1545 #define PMT_MTL_DC6_SEQ 0 1546 1547 #define PMT_COUNTER_CWF_MC1E_OFFSET_BASE 20936 1548 #define PMT_COUNTER_CWF_MC1E_OFFSET_INCREMENT 24 1549 #define PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE 12 1550 #define PMT_COUNTER_CWF_CPUS_PER_MODULE 4 1551 #define PMT_COUNTER_CWF_MC1E_LSB 0 1552 #define PMT_COUNTER_CWF_MC1E_MSB 63 1553 #define PMT_CWF_MC1E_GUID 0x14421519 1554 1555 unsigned long long tcore_clock_freq_hz = 800000000; 1556 1557 #define PMT_COUNTER_NAME_SIZE_BYTES 16 1558 #define PMT_COUNTER_TYPE_NAME_SIZE_BYTES 32 1559 1560 struct pmt_mmio { 1561 struct pmt_mmio *next; 1562 1563 unsigned int guid; 1564 unsigned int size; 1565 1566 /* Base pointer to the mmaped memory. */ 1567 void *mmio_base; 1568 1569 /* 1570 * Offset to be applied to the mmio_base 1571 * to get the beginning of the PMT counters for given GUID. 1572 */ 1573 unsigned long pmt_offset; 1574 } *pmt_mmios; 1575 1576 enum pmt_datatype { 1577 PMT_TYPE_RAW, 1578 PMT_TYPE_XTAL_TIME, 1579 PMT_TYPE_TCORE_CLOCK, 1580 }; 1581 1582 struct pmt_domain_info { 1583 /* 1584 * Pointer to the MMIO obtained by applying a counter offset 1585 * to the mmio_base of the mmaped region for the given GUID. 1586 * 1587 * This is where to read the raw value of the counter from. 1588 */ 1589 unsigned long *pcounter; 1590 }; 1591 1592 struct pmt_counter { 1593 struct pmt_counter *next; 1594 1595 /* PMT metadata */ 1596 char name[PMT_COUNTER_NAME_SIZE_BYTES]; 1597 enum pmt_datatype type; 1598 enum counter_scope scope; 1599 unsigned int lsb; 1600 unsigned int msb; 1601 1602 /* BIC-like metadata */ 1603 enum counter_format format; 1604 1605 unsigned int num_domains; 1606 struct pmt_domain_info *domains; 1607 }; 1608 1609 /* 1610 * PMT telemetry directory iterator. 1611 * Used to iterate telemetry files in sysfs in correct order. 1612 */ 1613 struct pmt_diriter_t { 1614 DIR *dir; 1615 struct dirent **namelist; 1616 unsigned int num_names; 1617 unsigned int current_name_idx; 1618 }; 1619 1620 int pmt_telemdir_filter(const struct dirent *e) 1621 { 1622 unsigned int dummy; 1623 1624 return sscanf(e->d_name, "telem%u", &dummy); 1625 } 1626 1627 int pmt_telemdir_sort(const struct dirent **a, const struct dirent **b) 1628 { 1629 unsigned int aidx = 0, bidx = 0; 1630 1631 sscanf((*a)->d_name, "telem%u", &aidx); 1632 sscanf((*b)->d_name, "telem%u", &bidx); 1633 1634 return aidx >= bidx; 1635 } 1636 1637 const struct dirent *pmt_diriter_next(struct pmt_diriter_t *iter) 1638 { 1639 const struct dirent *ret = NULL; 1640 1641 if (!iter->dir) 1642 return NULL; 1643 1644 if (iter->current_name_idx >= iter->num_names) 1645 return NULL; 1646 1647 ret = iter->namelist[iter->current_name_idx]; 1648 ++iter->current_name_idx; 1649 1650 return ret; 1651 } 1652 1653 const struct dirent *pmt_diriter_begin(struct pmt_diriter_t *iter, const char *pmt_root_path) 1654 { 1655 int num_names = iter->num_names; 1656 1657 if (!iter->dir) { 1658 iter->dir = opendir(pmt_root_path); 1659 if (iter->dir == NULL) 1660 return NULL; 1661 1662 num_names = scandir(pmt_root_path, &iter->namelist, pmt_telemdir_filter, pmt_telemdir_sort); 1663 if (num_names == -1) 1664 return NULL; 1665 } 1666 1667 iter->current_name_idx = 0; 1668 iter->num_names = num_names; 1669 1670 return pmt_diriter_next(iter); 1671 } 1672 1673 void pmt_diriter_init(struct pmt_diriter_t *iter) 1674 { 1675 memset(iter, 0, sizeof(*iter)); 1676 } 1677 1678 void pmt_diriter_remove(struct pmt_diriter_t *iter) 1679 { 1680 if (iter->namelist) { 1681 for (unsigned int i = 0; i < iter->num_names; i++) { 1682 free(iter->namelist[i]); 1683 iter->namelist[i] = NULL; 1684 } 1685 } 1686 1687 free(iter->namelist); 1688 iter->namelist = NULL; 1689 iter->num_names = 0; 1690 iter->current_name_idx = 0; 1691 1692 closedir(iter->dir); 1693 iter->dir = NULL; 1694 } 1695 1696 unsigned int pmt_counter_get_width(const struct pmt_counter *p) 1697 { 1698 return (p->msb - p->lsb) + 1; 1699 } 1700 1701 void pmt_counter_resize_(struct pmt_counter *pcounter, unsigned int new_size) 1702 { 1703 struct pmt_domain_info *new_mem; 1704 1705 new_mem = (struct pmt_domain_info *)reallocarray(pcounter->domains, new_size, sizeof(*pcounter->domains)); 1706 if (!new_mem) { 1707 fprintf(stderr, "%s: failed to allocate memory for PMT counters\n", __func__); 1708 exit(1); 1709 } 1710 1711 /* Zero initialize just allocated memory. */ 1712 const size_t num_new_domains = new_size - pcounter->num_domains; 1713 1714 memset(&new_mem[pcounter->num_domains], 0, num_new_domains * sizeof(*pcounter->domains)); 1715 1716 pcounter->num_domains = new_size; 1717 pcounter->domains = new_mem; 1718 } 1719 1720 void pmt_counter_resize(struct pmt_counter *pcounter, unsigned int new_size) 1721 { 1722 /* 1723 * Allocate more memory ahead of time. 1724 * 1725 * Always allocate space for at least 8 elements 1726 * and double the size when growing. 1727 */ 1728 if (new_size < 8) 1729 new_size = 8; 1730 new_size = MAX(new_size, pcounter->num_domains * 2); 1731 1732 pmt_counter_resize_(pcounter, new_size); 1733 } 1734 1735 struct thread_data { 1736 struct timeval tv_begin; 1737 struct timeval tv_end; 1738 struct timeval tv_delta; 1739 unsigned long long tsc; 1740 unsigned long long aperf; 1741 unsigned long long mperf; 1742 unsigned long long c1; 1743 unsigned long long instr_count; 1744 unsigned long long irq_count; 1745 unsigned long long nmi_count; 1746 unsigned int smi_count; 1747 unsigned int cpu_id; 1748 unsigned int apic_id; 1749 unsigned int x2apic_id; 1750 unsigned int flags; 1751 bool is_atom; 1752 unsigned long long counter[MAX_ADDED_THREAD_COUNTERS]; 1753 unsigned long long perf_counter[MAX_ADDED_THREAD_COUNTERS]; 1754 unsigned long long pmt_counter[PMT_MAX_ADDED_THREAD_COUNTERS]; 1755 } *thread_even, *thread_odd; 1756 1757 struct core_data { 1758 int base_cpu; 1759 unsigned long long c3; 1760 unsigned long long c6; 1761 unsigned long long c7; 1762 unsigned long long mc6_us; /* duplicate as per-core for now, even though per module */ 1763 unsigned int core_temp_c; 1764 struct rapl_counter core_energy; /* MSR_CORE_ENERGY_STAT */ 1765 unsigned int core_id; 1766 unsigned long long core_throt_cnt; 1767 unsigned long long counter[MAX_ADDED_CORE_COUNTERS]; 1768 unsigned long long perf_counter[MAX_ADDED_CORE_COUNTERS]; 1769 unsigned long long pmt_counter[PMT_MAX_ADDED_CORE_COUNTERS]; 1770 } *core_even, *core_odd; 1771 1772 struct pkg_data { 1773 int base_cpu; 1774 unsigned long long pc2; 1775 unsigned long long pc3; 1776 unsigned long long pc6; 1777 unsigned long long pc7; 1778 unsigned long long pc8; 1779 unsigned long long pc9; 1780 unsigned long long pc10; 1781 long long cpu_lpi; 1782 long long sys_lpi; 1783 unsigned long long pkg_wtd_core_c0; 1784 unsigned long long pkg_any_core_c0; 1785 unsigned long long pkg_any_gfxe_c0; 1786 unsigned long long pkg_both_core_gfxe_c0; 1787 long long gfx_rc6_ms; 1788 unsigned int gfx_mhz; 1789 unsigned int gfx_act_mhz; 1790 long long sam_mc6_ms; 1791 unsigned int sam_mhz; 1792 unsigned int sam_act_mhz; 1793 unsigned int package_id; 1794 struct rapl_counter energy_pkg; /* MSR_PKG_ENERGY_STATUS */ 1795 struct rapl_counter energy_dram; /* MSR_DRAM_ENERGY_STATUS */ 1796 struct rapl_counter energy_cores; /* MSR_PP0_ENERGY_STATUS */ 1797 struct rapl_counter energy_gfx; /* MSR_PP1_ENERGY_STATUS */ 1798 struct rapl_counter rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */ 1799 struct rapl_counter rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */ 1800 unsigned int pkg_temp_c; 1801 unsigned int uncore_mhz; 1802 unsigned long long die_c6; 1803 unsigned long long counter[MAX_ADDED_PACKAGE_COUNTERS]; 1804 unsigned long long perf_counter[MAX_ADDED_PACKAGE_COUNTERS]; 1805 unsigned long long pmt_counter[PMT_MAX_ADDED_PACKAGE_COUNTERS]; 1806 } *package_even, *package_odd; 1807 1808 #define ODD_COUNTERS thread_odd, core_odd, package_odd 1809 #define EVEN_COUNTERS thread_even, core_even, package_even 1810 1811 #define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no) \ 1812 ((thread_base) + \ 1813 ((pkg_no) * \ 1814 topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \ 1815 ((node_no) * topo.cores_per_node * topo.threads_per_core) + \ 1816 ((core_no) * topo.threads_per_core) + \ 1817 (thread_no)) 1818 1819 #define GET_CORE(core_base, core_no, node_no, pkg_no) \ 1820 ((core_base) + \ 1821 ((pkg_no) * topo.nodes_per_pkg * topo.cores_per_node) + \ 1822 ((node_no) * topo.cores_per_node) + \ 1823 (core_no)) 1824 1825 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) 1826 1827 /* 1828 * The accumulated sum of MSR is defined as a monotonic 1829 * increasing MSR, it will be accumulated periodically, 1830 * despite its register's bit width. 1831 */ 1832 enum { 1833 IDX_PKG_ENERGY, 1834 IDX_DRAM_ENERGY, 1835 IDX_PP0_ENERGY, 1836 IDX_PP1_ENERGY, 1837 IDX_PKG_PERF, 1838 IDX_DRAM_PERF, 1839 IDX_PSYS_ENERGY, 1840 IDX_COUNT, 1841 }; 1842 1843 int get_msr_sum(int cpu, off_t offset, unsigned long long *msr); 1844 1845 struct msr_sum_array { 1846 /* get_msr_sum() = sum + (get_msr() - last) */ 1847 struct { 1848 /*The accumulated MSR value is updated by the timer */ 1849 unsigned long long sum; 1850 /*The MSR footprint recorded in last timer */ 1851 unsigned long long last; 1852 } entries[IDX_COUNT]; 1853 }; 1854 1855 /* The percpu MSR sum array.*/ 1856 struct msr_sum_array *per_cpu_msr_sum; 1857 1858 off_t idx_to_offset(int idx) 1859 { 1860 off_t offset; 1861 1862 switch (idx) { 1863 case IDX_PKG_ENERGY: 1864 if (platform->rapl_msrs & RAPL_AMD_F17H) 1865 offset = MSR_PKG_ENERGY_STAT; 1866 else 1867 offset = MSR_PKG_ENERGY_STATUS; 1868 break; 1869 case IDX_DRAM_ENERGY: 1870 offset = MSR_DRAM_ENERGY_STATUS; 1871 break; 1872 case IDX_PP0_ENERGY: 1873 offset = MSR_PP0_ENERGY_STATUS; 1874 break; 1875 case IDX_PP1_ENERGY: 1876 offset = MSR_PP1_ENERGY_STATUS; 1877 break; 1878 case IDX_PKG_PERF: 1879 offset = MSR_PKG_PERF_STATUS; 1880 break; 1881 case IDX_DRAM_PERF: 1882 offset = MSR_DRAM_PERF_STATUS; 1883 break; 1884 case IDX_PSYS_ENERGY: 1885 offset = MSR_PLATFORM_ENERGY_STATUS; 1886 break; 1887 default: 1888 offset = -1; 1889 } 1890 return offset; 1891 } 1892 1893 int offset_to_idx(off_t offset) 1894 { 1895 int idx; 1896 1897 switch (offset) { 1898 case MSR_PKG_ENERGY_STATUS: 1899 case MSR_PKG_ENERGY_STAT: 1900 idx = IDX_PKG_ENERGY; 1901 break; 1902 case MSR_DRAM_ENERGY_STATUS: 1903 idx = IDX_DRAM_ENERGY; 1904 break; 1905 case MSR_PP0_ENERGY_STATUS: 1906 idx = IDX_PP0_ENERGY; 1907 break; 1908 case MSR_PP1_ENERGY_STATUS: 1909 idx = IDX_PP1_ENERGY; 1910 break; 1911 case MSR_PKG_PERF_STATUS: 1912 idx = IDX_PKG_PERF; 1913 break; 1914 case MSR_DRAM_PERF_STATUS: 1915 idx = IDX_DRAM_PERF; 1916 break; 1917 case MSR_PLATFORM_ENERGY_STATUS: 1918 idx = IDX_PSYS_ENERGY; 1919 break; 1920 default: 1921 idx = -1; 1922 } 1923 return idx; 1924 } 1925 1926 int idx_valid(int idx) 1927 { 1928 switch (idx) { 1929 case IDX_PKG_ENERGY: 1930 return platform->rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H); 1931 case IDX_DRAM_ENERGY: 1932 return platform->rapl_msrs & RAPL_DRAM; 1933 case IDX_PP0_ENERGY: 1934 return platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS; 1935 case IDX_PP1_ENERGY: 1936 return platform->rapl_msrs & RAPL_GFX; 1937 case IDX_PKG_PERF: 1938 return platform->rapl_msrs & RAPL_PKG_PERF_STATUS; 1939 case IDX_DRAM_PERF: 1940 return platform->rapl_msrs & RAPL_DRAM_PERF_STATUS; 1941 case IDX_PSYS_ENERGY: 1942 return platform->rapl_msrs & RAPL_PSYS; 1943 default: 1944 return 0; 1945 } 1946 } 1947 1948 struct sys_counters { 1949 /* MSR added counters */ 1950 unsigned int added_thread_counters; 1951 unsigned int added_core_counters; 1952 unsigned int added_package_counters; 1953 struct msr_counter *tp; 1954 struct msr_counter *cp; 1955 struct msr_counter *pp; 1956 1957 /* perf added counters */ 1958 unsigned int added_thread_perf_counters; 1959 unsigned int added_core_perf_counters; 1960 unsigned int added_package_perf_counters; 1961 struct perf_counter_info *perf_tp; 1962 struct perf_counter_info *perf_cp; 1963 struct perf_counter_info *perf_pp; 1964 1965 struct pmt_counter *pmt_tp; 1966 struct pmt_counter *pmt_cp; 1967 struct pmt_counter *pmt_pp; 1968 } sys; 1969 1970 static size_t free_msr_counters_(struct msr_counter **pp) 1971 { 1972 struct msr_counter *p = NULL; 1973 size_t num_freed = 0; 1974 1975 while (*pp) { 1976 p = *pp; 1977 1978 if (p->msr_num != 0) { 1979 *pp = p->next; 1980 1981 free(p); 1982 ++num_freed; 1983 1984 continue; 1985 } 1986 1987 pp = &p->next; 1988 } 1989 1990 return num_freed; 1991 } 1992 1993 /* 1994 * Free all added counters accessed via msr. 1995 */ 1996 static void free_sys_msr_counters(void) 1997 { 1998 /* Thread counters */ 1999 sys.added_thread_counters -= free_msr_counters_(&sys.tp); 2000 2001 /* Core counters */ 2002 sys.added_core_counters -= free_msr_counters_(&sys.cp); 2003 2004 /* Package counters */ 2005 sys.added_package_counters -= free_msr_counters_(&sys.pp); 2006 } 2007 2008 struct system_summary { 2009 struct thread_data threads; 2010 struct core_data cores; 2011 struct pkg_data packages; 2012 } average; 2013 2014 struct platform_counters { 2015 struct rapl_counter energy_psys; /* MSR_PLATFORM_ENERGY_STATUS */ 2016 } platform_counters_odd, platform_counters_even; 2017 2018 struct cpu_topology { 2019 int physical_package_id; 2020 int die_id; 2021 int logical_cpu_id; 2022 int physical_node_id; 2023 int logical_node_id; /* 0-based count within the package */ 2024 int physical_core_id; 2025 int thread_id; 2026 int type; 2027 cpu_set_t *put_ids; /* Processing Unit/Thread IDs */ 2028 } *cpus; 2029 2030 struct topo_params { 2031 int num_packages; 2032 int num_die; 2033 int num_cpus; 2034 int num_cores; 2035 int allowed_packages; 2036 int allowed_cpus; 2037 int allowed_cores; 2038 int max_cpu_num; 2039 int max_core_id; 2040 int max_package_id; 2041 int max_die_id; 2042 int max_node_num; 2043 int nodes_per_pkg; 2044 int cores_per_node; 2045 int threads_per_core; 2046 } topo; 2047 2048 struct timeval tv_even, tv_odd, tv_delta; 2049 2050 int *irq_column_2_cpu; /* /proc/interrupts column numbers */ 2051 int *irqs_per_cpu; /* indexed by cpu_num */ 2052 int *nmi_per_cpu; /* indexed by cpu_num */ 2053 2054 void setup_all_buffers(bool startup); 2055 2056 char *sys_lpi_file; 2057 char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us"; 2058 char *sys_lpi_file_debugfs = "/sys/kernel/debug/pmc_core/slp_s0_residency_usec"; 2059 2060 int cpu_is_not_present(int cpu) 2061 { 2062 return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set); 2063 } 2064 2065 int cpu_is_not_allowed(int cpu) 2066 { 2067 return !CPU_ISSET_S(cpu, cpu_allowed_setsize, cpu_allowed_set); 2068 } 2069 2070 /* 2071 * run func(thread, core, package) in topology order 2072 * skip non-present cpus 2073 */ 2074 2075 int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pkg_data *), 2076 struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base) 2077 { 2078 int retval, pkg_no, core_no, thread_no, node_no; 2079 2080 retval = 0; 2081 2082 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { 2083 for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) { 2084 for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { 2085 for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) { 2086 struct thread_data *t; 2087 struct core_data *c; 2088 struct pkg_data *p; 2089 t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no); 2090 2091 if (cpu_is_not_allowed(t->cpu_id)) 2092 continue; 2093 2094 c = GET_CORE(core_base, core_no, node_no, pkg_no); 2095 p = GET_PKG(pkg_base, pkg_no); 2096 2097 retval |= func(t, c, p); 2098 } 2099 } 2100 } 2101 } 2102 return retval; 2103 } 2104 2105 int is_cpu_first_thread_in_core(struct thread_data *t, struct core_data *c, struct pkg_data *p) 2106 { 2107 UNUSED(p); 2108 2109 return ((int)t->cpu_id == c->base_cpu || c->base_cpu < 0); 2110 } 2111 2112 int is_cpu_first_core_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p) 2113 { 2114 UNUSED(c); 2115 2116 return ((int)t->cpu_id == p->base_cpu || p->base_cpu < 0); 2117 } 2118 2119 int is_cpu_first_thread_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p) 2120 { 2121 return is_cpu_first_thread_in_core(t, c, p) && is_cpu_first_core_in_package(t, c, p); 2122 } 2123 2124 int cpu_migrate(int cpu) 2125 { 2126 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); 2127 CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set); 2128 if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1) 2129 return -1; 2130 else 2131 return 0; 2132 } 2133 2134 int get_msr_fd(int cpu) 2135 { 2136 char pathname[32]; 2137 int fd; 2138 2139 fd = fd_percpu[cpu]; 2140 2141 if (fd) 2142 return fd; 2143 2144 sprintf(pathname, "/dev/cpu/%d/msr", cpu); 2145 fd = open(pathname, O_RDONLY); 2146 if (fd < 0) 2147 err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, " 2148 "or run with --no-msr, or run as root", pathname); 2149 2150 fd_percpu[cpu] = fd; 2151 2152 return fd; 2153 } 2154 2155 static void bic_disable_msr_access(void) 2156 { 2157 const unsigned long bic_msrs = BIC_Mod_c6 | BIC_CoreTmp | 2158 BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_PkgTmp; 2159 2160 bic_enabled &= ~bic_msrs; 2161 2162 free_sys_msr_counters(); 2163 } 2164 2165 static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) 2166 { 2167 assert(!no_perf); 2168 2169 return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags); 2170 } 2171 2172 static long open_perf_counter(int cpu, unsigned int type, unsigned int config, int group_fd, __u64 read_format) 2173 { 2174 struct perf_event_attr attr; 2175 const pid_t pid = -1; 2176 const unsigned long flags = 0; 2177 2178 assert(!no_perf); 2179 2180 memset(&attr, 0, sizeof(struct perf_event_attr)); 2181 2182 attr.type = type; 2183 attr.size = sizeof(struct perf_event_attr); 2184 attr.config = config; 2185 attr.disabled = 0; 2186 attr.sample_type = PERF_SAMPLE_IDENTIFIER; 2187 attr.read_format = read_format; 2188 2189 const int fd = perf_event_open(&attr, pid, cpu, group_fd, flags); 2190 2191 return fd; 2192 } 2193 2194 int get_instr_count_fd(int cpu) 2195 { 2196 if (fd_instr_count_percpu[cpu]) 2197 return fd_instr_count_percpu[cpu]; 2198 2199 fd_instr_count_percpu[cpu] = open_perf_counter(cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0); 2200 2201 return fd_instr_count_percpu[cpu]; 2202 } 2203 2204 int get_msr(int cpu, off_t offset, unsigned long long *msr) 2205 { 2206 ssize_t retval; 2207 2208 assert(!no_msr); 2209 2210 retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset); 2211 2212 if (retval != sizeof *msr) 2213 err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset); 2214 2215 return 0; 2216 } 2217 2218 int probe_rapl_msr(int cpu, off_t offset, int index) 2219 { 2220 ssize_t retval; 2221 unsigned long long value; 2222 2223 assert(!no_msr); 2224 2225 retval = pread(get_msr_fd(cpu), &value, sizeof(value), offset); 2226 2227 /* if the read failed, the probe fails */ 2228 if (retval != sizeof(value)) 2229 return 1; 2230 2231 /* If an Energy Status Counter MSR returns 0, the probe fails */ 2232 switch (index) { 2233 case RAPL_RCI_INDEX_ENERGY_PKG: 2234 case RAPL_RCI_INDEX_ENERGY_CORES: 2235 case RAPL_RCI_INDEX_DRAM: 2236 case RAPL_RCI_INDEX_GFX: 2237 case RAPL_RCI_INDEX_ENERGY_PLATFORM: 2238 if (value == 0) 2239 return 1; 2240 } 2241 2242 /* PKG,DRAM_PERF_STATUS MSRs, can return any value */ 2243 return 0; 2244 } 2245 2246 /* Convert CPU ID to domain ID for given added perf counter. */ 2247 unsigned int cpu_to_domain(const struct perf_counter_info *pc, int cpu) 2248 { 2249 switch (pc->scope) { 2250 case SCOPE_CPU: 2251 return cpu; 2252 2253 case SCOPE_CORE: 2254 return cpus[cpu].physical_core_id; 2255 2256 case SCOPE_PACKAGE: 2257 return cpus[cpu].physical_package_id; 2258 } 2259 2260 __builtin_unreachable(); 2261 } 2262 2263 #define MAX_DEFERRED 16 2264 char *deferred_add_names[MAX_DEFERRED]; 2265 char *deferred_skip_names[MAX_DEFERRED]; 2266 int deferred_add_index; 2267 int deferred_skip_index; 2268 2269 /* 2270 * HIDE_LIST - hide this list of counters, show the rest [default] 2271 * SHOW_LIST - show this list of counters, hide the rest 2272 */ 2273 enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST; 2274 2275 void help(void) 2276 { 2277 fprintf(outf, 2278 "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n" 2279 "\n" 2280 "Turbostat forks the specified COMMAND and prints statistics\n" 2281 "when COMMAND completes.\n" 2282 "If no COMMAND is specified, turbostat wakes every 5-seconds\n" 2283 "to print statistics, until interrupted.\n" 2284 " -a, --add counter\n" 2285 " add a counter\n" 2286 " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n" 2287 " eg. --add perf/cstate_pkg/c2-residency,package,delta,percent,perfPC2\n" 2288 " eg. --add pmt,name=XTAL,type=raw,domain=package0,offset=0,lsb=0,msb=63,guid=0x1a067102\n" 2289 " -c, --cpu cpu-set\n" 2290 " limit output to summary plus cpu-set:\n" 2291 " {core | package | j,k,l..m,n-p }\n" 2292 " -d, --debug\n" 2293 " displays usec, Time_Of_Day_Seconds and more debugging\n" 2294 " debug messages are printed to stderr\n" 2295 " -D, --Dump\n" 2296 " displays the raw counter values\n" 2297 " -e, --enable [all | column]\n" 2298 " shows all or the specified disabled column\n" 2299 " -f, --force\n" 2300 " force load turbostat with minimum default features on unsupported platforms.\n" 2301 " -H, --hide [column | column,column,...]\n" 2302 " hide the specified column(s)\n" 2303 " -i, --interval sec.subsec\n" 2304 " override default 5-second measurement interval\n" 2305 " -J, --Joules\n" 2306 " displays energy in Joules instead of Watts\n" 2307 " -l, --list\n" 2308 " list column headers only\n" 2309 " -M, --no-msr\n" 2310 " disable all uses of the MSR driver\n" 2311 " -P, --no-perf\n" 2312 " disable all uses of the perf API\n" 2313 " -n, --num_iterations num\n" 2314 " number of the measurement iterations\n" 2315 " -N, --header_iterations num\n" 2316 " print header every num iterations\n" 2317 " -o, --out file\n" 2318 " create or truncate \"file\" for all output\n" 2319 " -q, --quiet\n" 2320 " skip decoding system configuration header\n" 2321 " -s, --show [column | column,column,...]\n" 2322 " show only the specified column(s)\n" 2323 " -S, --Summary\n" 2324 " limits output to 1-line system summary per interval\n" 2325 " -T, --TCC temperature\n" 2326 " sets the Thermal Control Circuit temperature in\n" 2327 " degrees Celsius\n" 2328 " -h, --help\n" 2329 " print this help message\n" 2330 " -v, --version\n" 2331 " print version information\n\nFor more help, run \"man turbostat\"\n"); 2332 } 2333 2334 /* 2335 * bic_lookup 2336 * for all the strings in comma separate name_list, 2337 * set the approprate bit in return value. 2338 */ 2339 unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) 2340 { 2341 unsigned int i; 2342 unsigned long long retval = 0; 2343 2344 while (name_list) { 2345 char *comma; 2346 2347 comma = strchr(name_list, ','); 2348 2349 if (comma) 2350 *comma = '\0'; 2351 2352 for (i = 0; i < MAX_BIC; ++i) { 2353 if (!strcmp(name_list, bic[i].name)) { 2354 retval |= (1ULL << i); 2355 break; 2356 } 2357 if (!strcmp(name_list, "all")) { 2358 retval |= ~0; 2359 break; 2360 } else if (!strcmp(name_list, "topology")) { 2361 retval |= BIC_GROUP_TOPOLOGY; 2362 break; 2363 } else if (!strcmp(name_list, "power")) { 2364 retval |= BIC_GROUP_THERMAL_PWR; 2365 break; 2366 } else if (!strcmp(name_list, "idle")) { 2367 retval |= BIC_GROUP_IDLE; 2368 break; 2369 } else if (!strcmp(name_list, "swidle")) { 2370 retval |= BIC_GROUP_SW_IDLE; 2371 break; 2372 } else if (!strcmp(name_list, "sysfs")) { /* legacy compatibility */ 2373 retval |= BIC_GROUP_SW_IDLE; 2374 break; 2375 } else if (!strcmp(name_list, "hwidle")) { 2376 retval |= BIC_GROUP_HW_IDLE; 2377 break; 2378 } else if (!strcmp(name_list, "frequency")) { 2379 retval |= BIC_GROUP_FREQUENCY; 2380 break; 2381 } else if (!strcmp(name_list, "other")) { 2382 retval |= BIC_OTHER; 2383 break; 2384 } 2385 2386 } 2387 if (i == MAX_BIC) { 2388 fprintf(stderr, "deferred %s\n", name_list); 2389 if (mode == SHOW_LIST) { 2390 deferred_add_names[deferred_add_index++] = name_list; 2391 if (deferred_add_index >= MAX_DEFERRED) { 2392 fprintf(stderr, "More than max %d un-recognized --add options '%s'\n", 2393 MAX_DEFERRED, name_list); 2394 help(); 2395 exit(1); 2396 } 2397 } else { 2398 deferred_skip_names[deferred_skip_index++] = name_list; 2399 if (debug) 2400 fprintf(stderr, "deferred \"%s\"\n", name_list); 2401 if (deferred_skip_index >= MAX_DEFERRED) { 2402 fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n", 2403 MAX_DEFERRED, name_list); 2404 help(); 2405 exit(1); 2406 } 2407 } 2408 } 2409 2410 name_list = comma; 2411 if (name_list) 2412 name_list++; 2413 2414 } 2415 return retval; 2416 } 2417 2418 void print_header(char *delim) 2419 { 2420 struct msr_counter *mp; 2421 struct perf_counter_info *pp; 2422 struct pmt_counter *ppmt; 2423 int printed = 0; 2424 2425 if (DO_BIC(BIC_USEC)) 2426 outp += sprintf(outp, "%susec", (printed++ ? delim : "")); 2427 if (DO_BIC(BIC_TOD)) 2428 outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : "")); 2429 if (DO_BIC(BIC_Package)) 2430 outp += sprintf(outp, "%sPackage", (printed++ ? delim : "")); 2431 if (DO_BIC(BIC_Die)) 2432 outp += sprintf(outp, "%sDie", (printed++ ? delim : "")); 2433 if (DO_BIC(BIC_Node)) 2434 outp += sprintf(outp, "%sNode", (printed++ ? delim : "")); 2435 if (DO_BIC(BIC_Core)) 2436 outp += sprintf(outp, "%sCore", (printed++ ? delim : "")); 2437 if (DO_BIC(BIC_CPU)) 2438 outp += sprintf(outp, "%sCPU", (printed++ ? delim : "")); 2439 if (DO_BIC(BIC_APIC)) 2440 outp += sprintf(outp, "%sAPIC", (printed++ ? delim : "")); 2441 if (DO_BIC(BIC_X2APIC)) 2442 outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : "")); 2443 if (DO_BIC(BIC_Avg_MHz)) 2444 outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : "")); 2445 if (DO_BIC(BIC_Busy)) 2446 outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : "")); 2447 if (DO_BIC(BIC_Bzy_MHz)) 2448 outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : "")); 2449 if (DO_BIC(BIC_TSC_MHz)) 2450 outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : "")); 2451 2452 if (DO_BIC(BIC_IPC)) 2453 outp += sprintf(outp, "%sIPC", (printed++ ? delim : "")); 2454 2455 if (DO_BIC(BIC_IRQ)) { 2456 if (sums_need_wide_columns) 2457 outp += sprintf(outp, "%s IRQ", (printed++ ? delim : "")); 2458 else 2459 outp += sprintf(outp, "%sIRQ", (printed++ ? delim : "")); 2460 } 2461 if (DO_BIC(BIC_NMI)) { 2462 if (sums_need_wide_columns) 2463 outp += sprintf(outp, "%s NMI", (printed++ ? delim : "")); 2464 else 2465 outp += sprintf(outp, "%sNMI", (printed++ ? delim : "")); 2466 } 2467 2468 if (DO_BIC(BIC_SMI)) 2469 outp += sprintf(outp, "%sSMI", (printed++ ? delim : "")); 2470 2471 for (mp = sys.tp; mp; mp = mp->next) { 2472 2473 if (mp->format == FORMAT_RAW) { 2474 if (mp->width == 64) 2475 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name); 2476 else 2477 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name); 2478 } else { 2479 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2480 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name); 2481 else 2482 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name); 2483 } 2484 } 2485 2486 for (pp = sys.perf_tp; pp; pp = pp->next) { 2487 2488 if (pp->format == FORMAT_RAW) { 2489 if (pp->width == 64) 2490 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); 2491 else 2492 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); 2493 } else { 2494 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2495 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); 2496 else 2497 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); 2498 } 2499 } 2500 2501 ppmt = sys.pmt_tp; 2502 while (ppmt) { 2503 switch (ppmt->type) { 2504 case PMT_TYPE_RAW: 2505 if (pmt_counter_get_width(ppmt) <= 32) 2506 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); 2507 else 2508 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); 2509 2510 break; 2511 2512 case PMT_TYPE_XTAL_TIME: 2513 case PMT_TYPE_TCORE_CLOCK: 2514 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); 2515 break; 2516 } 2517 2518 ppmt = ppmt->next; 2519 } 2520 2521 if (DO_BIC(BIC_CPU_c1)) 2522 outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : "")); 2523 if (DO_BIC(BIC_CPU_c3)) 2524 outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : "")); 2525 if (DO_BIC(BIC_CPU_c6)) 2526 outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : "")); 2527 if (DO_BIC(BIC_CPU_c7)) 2528 outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : "")); 2529 2530 if (DO_BIC(BIC_Mod_c6)) 2531 outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : "")); 2532 2533 if (DO_BIC(BIC_CoreTmp)) 2534 outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : "")); 2535 2536 if (DO_BIC(BIC_CORE_THROT_CNT)) 2537 outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : "")); 2538 2539 if (platform->rapl_msrs && !rapl_joules) { 2540 if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl) 2541 outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : "")); 2542 } else if (platform->rapl_msrs && rapl_joules) { 2543 if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl) 2544 outp += sprintf(outp, "%sCor_J", (printed++ ? delim : "")); 2545 } 2546 2547 for (mp = sys.cp; mp; mp = mp->next) { 2548 if (mp->format == FORMAT_RAW) { 2549 if (mp->width == 64) 2550 outp += sprintf(outp, "%s%18.18s", delim, mp->name); 2551 else 2552 outp += sprintf(outp, "%s%10.10s", delim, mp->name); 2553 } else { 2554 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2555 outp += sprintf(outp, "%s%8s", delim, mp->name); 2556 else 2557 outp += sprintf(outp, "%s%s", delim, mp->name); 2558 } 2559 } 2560 2561 for (pp = sys.perf_cp; pp; pp = pp->next) { 2562 2563 if (pp->format == FORMAT_RAW) { 2564 if (pp->width == 64) 2565 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); 2566 else 2567 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); 2568 } else { 2569 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2570 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); 2571 else 2572 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); 2573 } 2574 } 2575 2576 ppmt = sys.pmt_cp; 2577 while (ppmt) { 2578 switch (ppmt->type) { 2579 case PMT_TYPE_RAW: 2580 if (pmt_counter_get_width(ppmt) <= 32) 2581 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); 2582 else 2583 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); 2584 2585 break; 2586 2587 case PMT_TYPE_XTAL_TIME: 2588 case PMT_TYPE_TCORE_CLOCK: 2589 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); 2590 break; 2591 } 2592 2593 ppmt = ppmt->next; 2594 } 2595 2596 if (DO_BIC(BIC_PkgTmp)) 2597 outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : "")); 2598 2599 if (DO_BIC(BIC_GFX_rc6)) 2600 outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : "")); 2601 2602 if (DO_BIC(BIC_GFXMHz)) 2603 outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : "")); 2604 2605 if (DO_BIC(BIC_GFXACTMHz)) 2606 outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : "")); 2607 2608 if (DO_BIC(BIC_SAM_mc6)) 2609 outp += sprintf(outp, "%sSAM%%mc6", (printed++ ? delim : "")); 2610 2611 if (DO_BIC(BIC_SAMMHz)) 2612 outp += sprintf(outp, "%sSAMMHz", (printed++ ? delim : "")); 2613 2614 if (DO_BIC(BIC_SAMACTMHz)) 2615 outp += sprintf(outp, "%sSAMAMHz", (printed++ ? delim : "")); 2616 2617 if (DO_BIC(BIC_Totl_c0)) 2618 outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : "")); 2619 if (DO_BIC(BIC_Any_c0)) 2620 outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : "")); 2621 if (DO_BIC(BIC_GFX_c0)) 2622 outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : "")); 2623 if (DO_BIC(BIC_CPUGFX)) 2624 outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : "")); 2625 2626 if (DO_BIC(BIC_Pkgpc2)) 2627 outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : "")); 2628 if (DO_BIC(BIC_Pkgpc3)) 2629 outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : "")); 2630 if (DO_BIC(BIC_Pkgpc6)) 2631 outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : "")); 2632 if (DO_BIC(BIC_Pkgpc7)) 2633 outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : "")); 2634 if (DO_BIC(BIC_Pkgpc8)) 2635 outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : "")); 2636 if (DO_BIC(BIC_Pkgpc9)) 2637 outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : "")); 2638 if (DO_BIC(BIC_Pkgpc10)) 2639 outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : "")); 2640 if (DO_BIC(BIC_Diec6)) 2641 outp += sprintf(outp, "%sDie%%c6", (printed++ ? delim : "")); 2642 if (DO_BIC(BIC_CPU_LPI)) 2643 outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : "")); 2644 if (DO_BIC(BIC_SYS_LPI)) 2645 outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : "")); 2646 2647 if (platform->rapl_msrs && !rapl_joules) { 2648 if (DO_BIC(BIC_PkgWatt)) 2649 outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : "")); 2650 if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl) 2651 outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : "")); 2652 if (DO_BIC(BIC_GFXWatt)) 2653 outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : "")); 2654 if (DO_BIC(BIC_RAMWatt)) 2655 outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : "")); 2656 if (DO_BIC(BIC_PKG__)) 2657 outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : "")); 2658 if (DO_BIC(BIC_RAM__)) 2659 outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : "")); 2660 } else if (platform->rapl_msrs && rapl_joules) { 2661 if (DO_BIC(BIC_Pkg_J)) 2662 outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : "")); 2663 if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl) 2664 outp += sprintf(outp, "%sCor_J", (printed++ ? delim : "")); 2665 if (DO_BIC(BIC_GFX_J)) 2666 outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : "")); 2667 if (DO_BIC(BIC_RAM_J)) 2668 outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : "")); 2669 if (DO_BIC(BIC_PKG__)) 2670 outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : "")); 2671 if (DO_BIC(BIC_RAM__)) 2672 outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : "")); 2673 } 2674 if (DO_BIC(BIC_UNCORE_MHZ)) 2675 outp += sprintf(outp, "%sUncMHz", (printed++ ? delim : "")); 2676 2677 for (mp = sys.pp; mp; mp = mp->next) { 2678 if (mp->format == FORMAT_RAW) { 2679 if (mp->width == 64) 2680 outp += sprintf(outp, "%s%18.18s", delim, mp->name); 2681 else if (mp->width == 32) 2682 outp += sprintf(outp, "%s%10.10s", delim, mp->name); 2683 else 2684 outp += sprintf(outp, "%s%7.7s", delim, mp->name); 2685 } else { 2686 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2687 outp += sprintf(outp, "%s%8s", delim, mp->name); 2688 else 2689 outp += sprintf(outp, "%s%7.7s", delim, mp->name); 2690 } 2691 } 2692 2693 for (pp = sys.perf_pp; pp; pp = pp->next) { 2694 2695 if (pp->format == FORMAT_RAW) { 2696 if (pp->width == 64) 2697 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); 2698 else 2699 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); 2700 } else { 2701 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2702 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); 2703 else 2704 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); 2705 } 2706 } 2707 2708 ppmt = sys.pmt_pp; 2709 while (ppmt) { 2710 switch (ppmt->type) { 2711 case PMT_TYPE_RAW: 2712 if (pmt_counter_get_width(ppmt) <= 32) 2713 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); 2714 else 2715 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); 2716 2717 break; 2718 2719 case PMT_TYPE_XTAL_TIME: 2720 case PMT_TYPE_TCORE_CLOCK: 2721 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); 2722 break; 2723 } 2724 2725 ppmt = ppmt->next; 2726 } 2727 2728 if (DO_BIC(BIC_SysWatt)) 2729 outp += sprintf(outp, "%sSysWatt", (printed++ ? delim : "")); 2730 if (DO_BIC(BIC_Sys_J)) 2731 outp += sprintf(outp, "%sSys_J", (printed++ ? delim : "")); 2732 2733 outp += sprintf(outp, "\n"); 2734 } 2735 2736 int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 2737 { 2738 int i; 2739 struct msr_counter *mp; 2740 struct platform_counters *pplat_cnt = p == package_odd ? &platform_counters_odd : &platform_counters_even; 2741 2742 outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p); 2743 2744 if (t) { 2745 outp += sprintf(outp, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags); 2746 outp += sprintf(outp, "TSC: %016llX\n", t->tsc); 2747 outp += sprintf(outp, "aperf: %016llX\n", t->aperf); 2748 outp += sprintf(outp, "mperf: %016llX\n", t->mperf); 2749 outp += sprintf(outp, "c1: %016llX\n", t->c1); 2750 2751 if (DO_BIC(BIC_IPC)) 2752 outp += sprintf(outp, "IPC: %lld\n", t->instr_count); 2753 2754 if (DO_BIC(BIC_IRQ)) 2755 outp += sprintf(outp, "IRQ: %lld\n", t->irq_count); 2756 if (DO_BIC(BIC_NMI)) 2757 outp += sprintf(outp, "IRQ: %lld\n", t->nmi_count); 2758 if (DO_BIC(BIC_SMI)) 2759 outp += sprintf(outp, "SMI: %d\n", t->smi_count); 2760 2761 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 2762 outp += 2763 sprintf(outp, "tADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, 2764 t->counter[i], mp->sp->path); 2765 } 2766 } 2767 2768 if (c && is_cpu_first_thread_in_core(t, c, p)) { 2769 outp += sprintf(outp, "core: %d\n", c->core_id); 2770 outp += sprintf(outp, "c3: %016llX\n", c->c3); 2771 outp += sprintf(outp, "c6: %016llX\n", c->c6); 2772 outp += sprintf(outp, "c7: %016llX\n", c->c7); 2773 outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c); 2774 outp += sprintf(outp, "cpu_throt_count: %016llX\n", c->core_throt_cnt); 2775 2776 const unsigned long long energy_value = c->core_energy.raw_value * c->core_energy.scale; 2777 const double energy_scale = c->core_energy.scale; 2778 2779 if (c->core_energy.unit == RAPL_UNIT_JOULES) 2780 outp += sprintf(outp, "Joules: %0llX (scale: %lf)\n", energy_value, energy_scale); 2781 2782 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 2783 outp += 2784 sprintf(outp, "cADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, 2785 c->counter[i], mp->sp->path); 2786 } 2787 outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us); 2788 } 2789 2790 if (p && is_cpu_first_core_in_package(t, c, p)) { 2791 outp += sprintf(outp, "package: %d\n", p->package_id); 2792 2793 outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0); 2794 outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0); 2795 outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0); 2796 outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0); 2797 2798 outp += sprintf(outp, "pc2: %016llX\n", p->pc2); 2799 if (DO_BIC(BIC_Pkgpc3)) 2800 outp += sprintf(outp, "pc3: %016llX\n", p->pc3); 2801 if (DO_BIC(BIC_Pkgpc6)) 2802 outp += sprintf(outp, "pc6: %016llX\n", p->pc6); 2803 if (DO_BIC(BIC_Pkgpc7)) 2804 outp += sprintf(outp, "pc7: %016llX\n", p->pc7); 2805 outp += sprintf(outp, "pc8: %016llX\n", p->pc8); 2806 outp += sprintf(outp, "pc9: %016llX\n", p->pc9); 2807 outp += sprintf(outp, "pc10: %016llX\n", p->pc10); 2808 outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi); 2809 outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi); 2810 outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg.raw_value); 2811 outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores.raw_value); 2812 outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx.raw_value); 2813 outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram.raw_value); 2814 outp += sprintf(outp, "Joules PSYS: %0llX\n", pplat_cnt->energy_psys.raw_value); 2815 outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status.raw_value); 2816 outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status.raw_value); 2817 outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c); 2818 2819 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 2820 outp += 2821 sprintf(outp, "pADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, 2822 p->counter[i], mp->sp->path); 2823 } 2824 } 2825 2826 outp += sprintf(outp, "\n"); 2827 2828 return 0; 2829 } 2830 2831 double rapl_counter_get_value(const struct rapl_counter *c, enum rapl_unit desired_unit, double interval) 2832 { 2833 assert(desired_unit != RAPL_UNIT_INVALID); 2834 2835 /* 2836 * For now we don't expect anything other than joules, 2837 * so just simplify the logic. 2838 */ 2839 assert(c->unit == RAPL_UNIT_JOULES); 2840 2841 const double scaled = c->raw_value * c->scale; 2842 2843 if (desired_unit == RAPL_UNIT_WATTS) 2844 return scaled / interval; 2845 return scaled; 2846 } 2847 2848 /* 2849 * column formatting convention & formats 2850 */ 2851 int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 2852 { 2853 static int count; 2854 2855 struct platform_counters *pplat_cnt = NULL; 2856 double interval_float, tsc; 2857 char *fmt8; 2858 int i; 2859 struct msr_counter *mp; 2860 struct perf_counter_info *pp; 2861 struct pmt_counter *ppmt; 2862 char *delim = "\t"; 2863 int printed = 0; 2864 2865 if (t == &average.threads) { 2866 pplat_cnt = count & 1 ? &platform_counters_odd : &platform_counters_even; 2867 ++count; 2868 } 2869 2870 /* if showing only 1st thread in core and this isn't one, bail out */ 2871 if (show_core_only && !is_cpu_first_thread_in_core(t, c, p)) 2872 return 0; 2873 2874 /* if showing only 1st thread in pkg and this isn't one, bail out */ 2875 if (show_pkg_only && !is_cpu_first_core_in_package(t, c, p)) 2876 return 0; 2877 2878 /*if not summary line and --cpu is used */ 2879 if ((t != &average.threads) && (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset))) 2880 return 0; 2881 2882 if (DO_BIC(BIC_USEC)) { 2883 /* on each row, print how many usec each timestamp took to gather */ 2884 struct timeval tv; 2885 2886 timersub(&t->tv_end, &t->tv_begin, &tv); 2887 outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec); 2888 } 2889 2890 /* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */ 2891 if (DO_BIC(BIC_TOD)) 2892 outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec); 2893 2894 interval_float = t->tv_delta.tv_sec + t->tv_delta.tv_usec / 1000000.0; 2895 2896 tsc = t->tsc * tsc_tweak; 2897 2898 /* topo columns, print blanks on 1st (average) line */ 2899 if (t == &average.threads) { 2900 if (DO_BIC(BIC_Package)) 2901 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2902 if (DO_BIC(BIC_Die)) 2903 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2904 if (DO_BIC(BIC_Node)) 2905 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2906 if (DO_BIC(BIC_Core)) 2907 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2908 if (DO_BIC(BIC_CPU)) 2909 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2910 if (DO_BIC(BIC_APIC)) 2911 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2912 if (DO_BIC(BIC_X2APIC)) 2913 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2914 } else { 2915 if (DO_BIC(BIC_Package)) { 2916 if (p) 2917 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id); 2918 else 2919 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2920 } 2921 if (DO_BIC(BIC_Die)) { 2922 if (c) 2923 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].die_id); 2924 else 2925 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2926 } 2927 if (DO_BIC(BIC_Node)) { 2928 if (t) 2929 outp += sprintf(outp, "%s%d", 2930 (printed++ ? delim : ""), cpus[t->cpu_id].physical_node_id); 2931 else 2932 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2933 } 2934 if (DO_BIC(BIC_Core)) { 2935 if (c) 2936 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id); 2937 else 2938 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2939 } 2940 if (DO_BIC(BIC_CPU)) 2941 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id); 2942 if (DO_BIC(BIC_APIC)) 2943 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id); 2944 if (DO_BIC(BIC_X2APIC)) 2945 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id); 2946 } 2947 2948 if (DO_BIC(BIC_Avg_MHz)) 2949 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 / units * t->aperf / interval_float); 2950 2951 if (DO_BIC(BIC_Busy)) 2952 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf / tsc); 2953 2954 if (DO_BIC(BIC_Bzy_MHz)) { 2955 if (has_base_hz) 2956 outp += 2957 sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf); 2958 else 2959 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 2960 tsc / units * t->aperf / t->mperf / interval_float); 2961 } 2962 2963 if (DO_BIC(BIC_TSC_MHz)) 2964 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc / units / interval_float); 2965 2966 if (DO_BIC(BIC_IPC)) 2967 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 1.0 * t->instr_count / t->aperf); 2968 2969 /* IRQ */ 2970 if (DO_BIC(BIC_IRQ)) { 2971 if (sums_need_wide_columns) 2972 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count); 2973 else 2974 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count); 2975 } 2976 2977 /* NMI */ 2978 if (DO_BIC(BIC_NMI)) { 2979 if (sums_need_wide_columns) 2980 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->nmi_count); 2981 else 2982 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->nmi_count); 2983 } 2984 2985 /* SMI */ 2986 if (DO_BIC(BIC_SMI)) 2987 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count); 2988 2989 /* Added counters */ 2990 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 2991 if (mp->format == FORMAT_RAW) { 2992 if (mp->width == 32) 2993 outp += 2994 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)t->counter[i]); 2995 else 2996 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]); 2997 } else if (mp->format == FORMAT_DELTA) { 2998 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2999 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]); 3000 else 3001 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]); 3002 } else if (mp->format == FORMAT_PERCENT) { 3003 if (mp->type == COUNTER_USEC) 3004 outp += 3005 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 3006 t->counter[i] / interval_float / 10000); 3007 else 3008 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i] / tsc); 3009 } 3010 } 3011 3012 /* Added perf counters */ 3013 for (i = 0, pp = sys.perf_tp; pp; ++i, pp = pp->next) { 3014 if (pp->format == FORMAT_RAW) { 3015 if (pp->width == 32) 3016 outp += 3017 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), 3018 (unsigned int)t->perf_counter[i]); 3019 else 3020 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->perf_counter[i]); 3021 } else if (pp->format == FORMAT_DELTA) { 3022 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) 3023 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->perf_counter[i]); 3024 else 3025 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->perf_counter[i]); 3026 } else if (pp->format == FORMAT_PERCENT) { 3027 if (pp->type == COUNTER_USEC) 3028 outp += 3029 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 3030 t->perf_counter[i] / interval_float / 10000); 3031 else 3032 outp += 3033 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->perf_counter[i] / tsc); 3034 } 3035 } 3036 3037 for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { 3038 const unsigned long value_raw = t->pmt_counter[i]; 3039 double value_converted; 3040 switch (ppmt->type) { 3041 case PMT_TYPE_RAW: 3042 if (pmt_counter_get_width(ppmt) <= 32) 3043 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), 3044 (unsigned int)t->pmt_counter[i]); 3045 else 3046 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->pmt_counter[i]); 3047 3048 break; 3049 3050 case PMT_TYPE_XTAL_TIME: 3051 value_converted = 100.0 * value_raw / crystal_hz / interval_float; 3052 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); 3053 break; 3054 3055 case PMT_TYPE_TCORE_CLOCK: 3056 value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float; 3057 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); 3058 } 3059 } 3060 3061 /* C1 */ 3062 if (DO_BIC(BIC_CPU_c1)) 3063 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc); 3064 3065 /* print per-core data only for 1st thread in core */ 3066 if (!is_cpu_first_thread_in_core(t, c, p)) 3067 goto done; 3068 3069 if (DO_BIC(BIC_CPU_c3)) 3070 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3 / tsc); 3071 if (DO_BIC(BIC_CPU_c6)) 3072 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6 / tsc); 3073 if (DO_BIC(BIC_CPU_c7)) 3074 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7 / tsc); 3075 3076 /* Mod%c6 */ 3077 if (DO_BIC(BIC_Mod_c6)) 3078 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc); 3079 3080 if (DO_BIC(BIC_CoreTmp)) 3081 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c); 3082 3083 /* Core throttle count */ 3084 if (DO_BIC(BIC_CORE_THROT_CNT)) 3085 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->core_throt_cnt); 3086 3087 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 3088 if (mp->format == FORMAT_RAW) { 3089 if (mp->width == 32) 3090 outp += 3091 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)c->counter[i]); 3092 else 3093 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]); 3094 } else if (mp->format == FORMAT_DELTA) { 3095 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 3096 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]); 3097 else 3098 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]); 3099 } else if (mp->format == FORMAT_PERCENT) { 3100 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i] / tsc); 3101 } 3102 } 3103 3104 for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { 3105 if (pp->format == FORMAT_RAW) { 3106 if (pp->width == 32) 3107 outp += 3108 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), 3109 (unsigned int)c->perf_counter[i]); 3110 else 3111 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->perf_counter[i]); 3112 } else if (pp->format == FORMAT_DELTA) { 3113 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) 3114 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->perf_counter[i]); 3115 else 3116 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->perf_counter[i]); 3117 } else if (pp->format == FORMAT_PERCENT) { 3118 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->perf_counter[i] / tsc); 3119 } 3120 } 3121 3122 for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { 3123 const unsigned long value_raw = c->pmt_counter[i]; 3124 double value_converted; 3125 switch (ppmt->type) { 3126 case PMT_TYPE_RAW: 3127 if (pmt_counter_get_width(ppmt) <= 32) 3128 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), 3129 (unsigned int)c->pmt_counter[i]); 3130 else 3131 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->pmt_counter[i]); 3132 3133 break; 3134 3135 case PMT_TYPE_XTAL_TIME: 3136 value_converted = 100.0 * value_raw / crystal_hz / interval_float; 3137 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); 3138 break; 3139 3140 case PMT_TYPE_TCORE_CLOCK: 3141 value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float; 3142 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); 3143 } 3144 } 3145 3146 fmt8 = "%s%.2f"; 3147 3148 if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl) 3149 outp += 3150 sprintf(outp, fmt8, (printed++ ? delim : ""), 3151 rapl_counter_get_value(&c->core_energy, RAPL_UNIT_WATTS, interval_float)); 3152 if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl) 3153 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 3154 rapl_counter_get_value(&c->core_energy, RAPL_UNIT_JOULES, interval_float)); 3155 3156 /* print per-package data only for 1st core in package */ 3157 if (!is_cpu_first_core_in_package(t, c, p)) 3158 goto done; 3159 3160 /* PkgTmp */ 3161 if (DO_BIC(BIC_PkgTmp)) 3162 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c); 3163 3164 /* GFXrc6 */ 3165 if (DO_BIC(BIC_GFX_rc6)) { 3166 if (p->gfx_rc6_ms == -1) { /* detect GFX counter reset */ 3167 outp += sprintf(outp, "%s**.**", (printed++ ? delim : "")); 3168 } else { 3169 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 3170 p->gfx_rc6_ms / 10.0 / interval_float); 3171 } 3172 } 3173 3174 /* GFXMHz */ 3175 if (DO_BIC(BIC_GFXMHz)) 3176 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz); 3177 3178 /* GFXACTMHz */ 3179 if (DO_BIC(BIC_GFXACTMHz)) 3180 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz); 3181 3182 /* SAMmc6 */ 3183 if (DO_BIC(BIC_SAM_mc6)) { 3184 if (p->sam_mc6_ms == -1) { /* detect GFX counter reset */ 3185 outp += sprintf(outp, "%s**.**", (printed++ ? delim : "")); 3186 } else { 3187 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 3188 p->sam_mc6_ms / 10.0 / interval_float); 3189 } 3190 } 3191 3192 /* SAMMHz */ 3193 if (DO_BIC(BIC_SAMMHz)) 3194 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_mhz); 3195 3196 /* SAMACTMHz */ 3197 if (DO_BIC(BIC_SAMACTMHz)) 3198 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_act_mhz); 3199 3200 /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ 3201 if (DO_BIC(BIC_Totl_c0)) 3202 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0 / tsc); 3203 if (DO_BIC(BIC_Any_c0)) 3204 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0 / tsc); 3205 if (DO_BIC(BIC_GFX_c0)) 3206 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0 / tsc); 3207 if (DO_BIC(BIC_CPUGFX)) 3208 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0 / tsc); 3209 3210 if (DO_BIC(BIC_Pkgpc2)) 3211 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2 / tsc); 3212 if (DO_BIC(BIC_Pkgpc3)) 3213 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3 / tsc); 3214 if (DO_BIC(BIC_Pkgpc6)) 3215 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6 / tsc); 3216 if (DO_BIC(BIC_Pkgpc7)) 3217 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7 / tsc); 3218 if (DO_BIC(BIC_Pkgpc8)) 3219 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8 / tsc); 3220 if (DO_BIC(BIC_Pkgpc9)) 3221 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9 / tsc); 3222 if (DO_BIC(BIC_Pkgpc10)) 3223 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc); 3224 3225 if (DO_BIC(BIC_Diec6)) 3226 outp += 3227 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->die_c6 / crystal_hz / interval_float); 3228 3229 if (DO_BIC(BIC_CPU_LPI)) { 3230 if (p->cpu_lpi >= 0) 3231 outp += 3232 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 3233 100.0 * p->cpu_lpi / 1000000.0 / interval_float); 3234 else 3235 outp += sprintf(outp, "%s(neg)", (printed++ ? delim : "")); 3236 } 3237 if (DO_BIC(BIC_SYS_LPI)) { 3238 if (p->sys_lpi >= 0) 3239 outp += 3240 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 3241 100.0 * p->sys_lpi / 1000000.0 / interval_float); 3242 else 3243 outp += sprintf(outp, "%s(neg)", (printed++ ? delim : "")); 3244 } 3245 3246 if (DO_BIC(BIC_PkgWatt)) 3247 outp += 3248 sprintf(outp, fmt8, (printed++ ? delim : ""), 3249 rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_WATTS, interval_float)); 3250 if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl) 3251 outp += 3252 sprintf(outp, fmt8, (printed++ ? delim : ""), 3253 rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_WATTS, interval_float)); 3254 if (DO_BIC(BIC_GFXWatt)) 3255 outp += 3256 sprintf(outp, fmt8, (printed++ ? delim : ""), 3257 rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_WATTS, interval_float)); 3258 if (DO_BIC(BIC_RAMWatt)) 3259 outp += 3260 sprintf(outp, fmt8, (printed++ ? delim : ""), 3261 rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_WATTS, interval_float)); 3262 if (DO_BIC(BIC_Pkg_J)) 3263 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 3264 rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_JOULES, interval_float)); 3265 if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl) 3266 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 3267 rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_JOULES, interval_float)); 3268 if (DO_BIC(BIC_GFX_J)) 3269 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 3270 rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_JOULES, interval_float)); 3271 if (DO_BIC(BIC_RAM_J)) 3272 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 3273 rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_JOULES, interval_float)); 3274 if (DO_BIC(BIC_PKG__)) 3275 outp += 3276 sprintf(outp, fmt8, (printed++ ? delim : ""), 3277 rapl_counter_get_value(&p->rapl_pkg_perf_status, RAPL_UNIT_WATTS, interval_float)); 3278 if (DO_BIC(BIC_RAM__)) 3279 outp += 3280 sprintf(outp, fmt8, (printed++ ? delim : ""), 3281 rapl_counter_get_value(&p->rapl_dram_perf_status, RAPL_UNIT_WATTS, interval_float)); 3282 /* UncMHz */ 3283 if (DO_BIC(BIC_UNCORE_MHZ)) 3284 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->uncore_mhz); 3285 3286 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 3287 if (mp->format == FORMAT_RAW) { 3288 if (mp->width == 32) 3289 outp += 3290 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)p->counter[i]); 3291 else 3292 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]); 3293 } else if (mp->format == FORMAT_DELTA) { 3294 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 3295 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]); 3296 else 3297 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]); 3298 } else if (mp->format == FORMAT_PERCENT) { 3299 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i] / tsc); 3300 } else if (mp->type == COUNTER_K2M) 3301 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->counter[i] / 1000); 3302 } 3303 3304 for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { 3305 if (pp->format == FORMAT_RAW) { 3306 if (pp->width == 32) 3307 outp += 3308 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), 3309 (unsigned int)p->perf_counter[i]); 3310 else 3311 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->perf_counter[i]); 3312 } else if (pp->format == FORMAT_DELTA) { 3313 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) 3314 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->perf_counter[i]); 3315 else 3316 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->perf_counter[i]); 3317 } else if (pp->format == FORMAT_PERCENT) { 3318 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->perf_counter[i] / tsc); 3319 } else if (pp->type == COUNTER_K2M) { 3320 outp += 3321 sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->perf_counter[i] / 1000); 3322 } 3323 } 3324 3325 for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { 3326 const unsigned long value_raw = p->pmt_counter[i]; 3327 double value_converted; 3328 switch (ppmt->type) { 3329 case PMT_TYPE_RAW: 3330 if (pmt_counter_get_width(ppmt) <= 32) 3331 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), 3332 (unsigned int)p->pmt_counter[i]); 3333 else 3334 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->pmt_counter[i]); 3335 3336 break; 3337 3338 case PMT_TYPE_XTAL_TIME: 3339 value_converted = 100.0 * value_raw / crystal_hz / interval_float; 3340 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); 3341 break; 3342 3343 case PMT_TYPE_TCORE_CLOCK: 3344 value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float; 3345 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); 3346 } 3347 } 3348 3349 if (DO_BIC(BIC_SysWatt) && (t == &average.threads)) 3350 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 3351 rapl_counter_get_value(&pplat_cnt->energy_psys, RAPL_UNIT_WATTS, interval_float)); 3352 if (DO_BIC(BIC_Sys_J) && (t == &average.threads)) 3353 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 3354 rapl_counter_get_value(&pplat_cnt->energy_psys, RAPL_UNIT_JOULES, interval_float)); 3355 3356 done: 3357 if (*(outp - 1) != '\n') 3358 outp += sprintf(outp, "\n"); 3359 3360 return 0; 3361 } 3362 3363 void flush_output_stdout(void) 3364 { 3365 FILE *filep; 3366 3367 if (outf == stderr) 3368 filep = stdout; 3369 else 3370 filep = outf; 3371 3372 fputs(output_buffer, filep); 3373 fflush(filep); 3374 3375 outp = output_buffer; 3376 } 3377 3378 void flush_output_stderr(void) 3379 { 3380 fputs(output_buffer, outf); 3381 fflush(outf); 3382 outp = output_buffer; 3383 } 3384 3385 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 3386 { 3387 static int count; 3388 3389 if ((!count || (header_iterations && !(count % header_iterations))) || !summary_only) 3390 print_header("\t"); 3391 3392 format_counters(&average.threads, &average.cores, &average.packages); 3393 3394 count++; 3395 3396 if (summary_only) 3397 return; 3398 3399 for_all_cpus(format_counters, t, c, p); 3400 } 3401 3402 #define DELTA_WRAP32(new, old) \ 3403 old = ((((unsigned long long)new << 32) - ((unsigned long long)old << 32)) >> 32); 3404 3405 int delta_package(struct pkg_data *new, struct pkg_data *old) 3406 { 3407 int i; 3408 struct msr_counter *mp; 3409 struct perf_counter_info *pp; 3410 struct pmt_counter *ppmt; 3411 3412 if (DO_BIC(BIC_Totl_c0)) 3413 old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0; 3414 if (DO_BIC(BIC_Any_c0)) 3415 old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0; 3416 if (DO_BIC(BIC_GFX_c0)) 3417 old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0; 3418 if (DO_BIC(BIC_CPUGFX)) 3419 old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0; 3420 3421 old->pc2 = new->pc2 - old->pc2; 3422 if (DO_BIC(BIC_Pkgpc3)) 3423 old->pc3 = new->pc3 - old->pc3; 3424 if (DO_BIC(BIC_Pkgpc6)) 3425 old->pc6 = new->pc6 - old->pc6; 3426 if (DO_BIC(BIC_Pkgpc7)) 3427 old->pc7 = new->pc7 - old->pc7; 3428 old->pc8 = new->pc8 - old->pc8; 3429 old->pc9 = new->pc9 - old->pc9; 3430 old->pc10 = new->pc10 - old->pc10; 3431 old->die_c6 = new->die_c6 - old->die_c6; 3432 old->cpu_lpi = new->cpu_lpi - old->cpu_lpi; 3433 old->sys_lpi = new->sys_lpi - old->sys_lpi; 3434 old->pkg_temp_c = new->pkg_temp_c; 3435 3436 /* flag an error when rc6 counter resets/wraps */ 3437 if (old->gfx_rc6_ms > new->gfx_rc6_ms) 3438 old->gfx_rc6_ms = -1; 3439 else 3440 old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms; 3441 3442 old->uncore_mhz = new->uncore_mhz; 3443 old->gfx_mhz = new->gfx_mhz; 3444 old->gfx_act_mhz = new->gfx_act_mhz; 3445 3446 /* flag an error when mc6 counter resets/wraps */ 3447 if (old->sam_mc6_ms > new->sam_mc6_ms) 3448 old->sam_mc6_ms = -1; 3449 else 3450 old->sam_mc6_ms = new->sam_mc6_ms - old->sam_mc6_ms; 3451 3452 old->sam_mhz = new->sam_mhz; 3453 old->sam_act_mhz = new->sam_act_mhz; 3454 3455 old->energy_pkg.raw_value = new->energy_pkg.raw_value - old->energy_pkg.raw_value; 3456 old->energy_cores.raw_value = new->energy_cores.raw_value - old->energy_cores.raw_value; 3457 old->energy_gfx.raw_value = new->energy_gfx.raw_value - old->energy_gfx.raw_value; 3458 old->energy_dram.raw_value = new->energy_dram.raw_value - old->energy_dram.raw_value; 3459 old->rapl_pkg_perf_status.raw_value = new->rapl_pkg_perf_status.raw_value - old->rapl_pkg_perf_status.raw_value; 3460 old->rapl_dram_perf_status.raw_value = 3461 new->rapl_dram_perf_status.raw_value - old->rapl_dram_perf_status.raw_value; 3462 3463 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 3464 if (mp->format == FORMAT_RAW) 3465 old->counter[i] = new->counter[i]; 3466 else if (mp->format == FORMAT_AVERAGE) 3467 old->counter[i] = new->counter[i]; 3468 else 3469 old->counter[i] = new->counter[i] - old->counter[i]; 3470 } 3471 3472 for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { 3473 if (pp->format == FORMAT_RAW) 3474 old->perf_counter[i] = new->perf_counter[i]; 3475 else if (pp->format == FORMAT_AVERAGE) 3476 old->perf_counter[i] = new->perf_counter[i]; 3477 else 3478 old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i]; 3479 } 3480 3481 for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { 3482 if (ppmt->format == FORMAT_RAW) 3483 old->pmt_counter[i] = new->pmt_counter[i]; 3484 else 3485 old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i]; 3486 } 3487 3488 return 0; 3489 } 3490 3491 void delta_core(struct core_data *new, struct core_data *old) 3492 { 3493 int i; 3494 struct msr_counter *mp; 3495 struct perf_counter_info *pp; 3496 struct pmt_counter *ppmt; 3497 3498 old->c3 = new->c3 - old->c3; 3499 old->c6 = new->c6 - old->c6; 3500 old->c7 = new->c7 - old->c7; 3501 old->core_temp_c = new->core_temp_c; 3502 old->core_throt_cnt = new->core_throt_cnt - old->core_throt_cnt; 3503 old->mc6_us = new->mc6_us - old->mc6_us; 3504 3505 DELTA_WRAP32(new->core_energy.raw_value, old->core_energy.raw_value); 3506 3507 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 3508 if (mp->format == FORMAT_RAW) 3509 old->counter[i] = new->counter[i]; 3510 else 3511 old->counter[i] = new->counter[i] - old->counter[i]; 3512 } 3513 3514 for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { 3515 if (pp->format == FORMAT_RAW) 3516 old->perf_counter[i] = new->perf_counter[i]; 3517 else 3518 old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i]; 3519 } 3520 3521 for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { 3522 if (ppmt->format == FORMAT_RAW) 3523 old->pmt_counter[i] = new->pmt_counter[i]; 3524 else 3525 old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i]; 3526 } 3527 } 3528 3529 int soft_c1_residency_display(int bic) 3530 { 3531 if (!DO_BIC(BIC_CPU_c1) || platform->has_msr_core_c1_res) 3532 return 0; 3533 3534 return DO_BIC_READ(bic); 3535 } 3536 3537 /* 3538 * old = new - old 3539 */ 3540 int delta_thread(struct thread_data *new, struct thread_data *old, struct core_data *core_delta) 3541 { 3542 int i; 3543 struct msr_counter *mp; 3544 struct perf_counter_info *pp; 3545 struct pmt_counter *ppmt; 3546 3547 /* we run cpuid just the 1st time, copy the results */ 3548 if (DO_BIC(BIC_APIC)) 3549 new->apic_id = old->apic_id; 3550 if (DO_BIC(BIC_X2APIC)) 3551 new->x2apic_id = old->x2apic_id; 3552 3553 /* 3554 * the timestamps from start of measurement interval are in "old" 3555 * the timestamp from end of measurement interval are in "new" 3556 * over-write old w/ new so we can print end of interval values 3557 */ 3558 3559 timersub(&new->tv_begin, &old->tv_begin, &old->tv_delta); 3560 old->tv_begin = new->tv_begin; 3561 old->tv_end = new->tv_end; 3562 3563 old->tsc = new->tsc - old->tsc; 3564 3565 /* check for TSC < 1 Mcycles over interval */ 3566 if (old->tsc < (1000 * 1000)) 3567 errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n" 3568 "You can disable all c-states by booting with \"idle=poll\"\n" 3569 "or just the deep ones with \"processor.max_cstate=1\""); 3570 3571 old->c1 = new->c1 - old->c1; 3572 3573 if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC) 3574 || soft_c1_residency_display(BIC_Avg_MHz)) { 3575 if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { 3576 old->aperf = new->aperf - old->aperf; 3577 old->mperf = new->mperf - old->mperf; 3578 } else { 3579 return -1; 3580 } 3581 } 3582 3583 if (platform->has_msr_core_c1_res) { 3584 /* 3585 * Some models have a dedicated C1 residency MSR, 3586 * which should be more accurate than the derivation below. 3587 */ 3588 } else { 3589 /* 3590 * As counter collection is not atomic, 3591 * it is possible for mperf's non-halted cycles + idle states 3592 * to exceed TSC's all cycles: show c1 = 0% in that case. 3593 */ 3594 if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak)) 3595 old->c1 = 0; 3596 else { 3597 /* normal case, derive c1 */ 3598 old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3 3599 - core_delta->c6 - core_delta->c7; 3600 } 3601 } 3602 3603 if (old->mperf == 0) { 3604 if (debug > 1) 3605 fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id); 3606 old->mperf = 1; /* divide by 0 protection */ 3607 } 3608 3609 if (DO_BIC(BIC_IPC)) 3610 old->instr_count = new->instr_count - old->instr_count; 3611 3612 if (DO_BIC(BIC_IRQ)) 3613 old->irq_count = new->irq_count - old->irq_count; 3614 3615 if (DO_BIC(BIC_NMI)) 3616 old->nmi_count = new->nmi_count - old->nmi_count; 3617 3618 if (DO_BIC(BIC_SMI)) 3619 old->smi_count = new->smi_count - old->smi_count; 3620 3621 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 3622 if (mp->format == FORMAT_RAW) 3623 old->counter[i] = new->counter[i]; 3624 else 3625 old->counter[i] = new->counter[i] - old->counter[i]; 3626 } 3627 3628 for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) { 3629 if (pp->format == FORMAT_RAW) 3630 old->perf_counter[i] = new->perf_counter[i]; 3631 else 3632 old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i]; 3633 } 3634 3635 for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { 3636 if (ppmt->format == FORMAT_RAW) 3637 old->pmt_counter[i] = new->pmt_counter[i]; 3638 else 3639 old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i]; 3640 } 3641 3642 return 0; 3643 } 3644 3645 int delta_cpu(struct thread_data *t, struct core_data *c, 3646 struct pkg_data *p, struct thread_data *t2, struct core_data *c2, struct pkg_data *p2) 3647 { 3648 int retval = 0; 3649 3650 /* calculate core delta only for 1st thread in core */ 3651 if (is_cpu_first_thread_in_core(t, c, p)) 3652 delta_core(c, c2); 3653 3654 /* always calculate thread delta */ 3655 retval = delta_thread(t, t2, c2); /* c2 is core delta */ 3656 3657 /* calculate package delta only for 1st core in package */ 3658 if (is_cpu_first_core_in_package(t, c, p)) 3659 retval |= delta_package(p, p2); 3660 3661 return retval; 3662 } 3663 3664 void delta_platform(struct platform_counters *new, struct platform_counters *old) 3665 { 3666 old->energy_psys.raw_value = new->energy_psys.raw_value - old->energy_psys.raw_value; 3667 } 3668 3669 void rapl_counter_clear(struct rapl_counter *c) 3670 { 3671 c->raw_value = 0; 3672 c->scale = 0.0; 3673 c->unit = RAPL_UNIT_INVALID; 3674 } 3675 3676 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 3677 { 3678 int i; 3679 struct msr_counter *mp; 3680 3681 t->tv_begin.tv_sec = 0; 3682 t->tv_begin.tv_usec = 0; 3683 t->tv_end.tv_sec = 0; 3684 t->tv_end.tv_usec = 0; 3685 t->tv_delta.tv_sec = 0; 3686 t->tv_delta.tv_usec = 0; 3687 3688 t->tsc = 0; 3689 t->aperf = 0; 3690 t->mperf = 0; 3691 t->c1 = 0; 3692 3693 t->instr_count = 0; 3694 3695 t->irq_count = 0; 3696 t->nmi_count = 0; 3697 t->smi_count = 0; 3698 3699 c->c3 = 0; 3700 c->c6 = 0; 3701 c->c7 = 0; 3702 c->mc6_us = 0; 3703 c->core_temp_c = 0; 3704 rapl_counter_clear(&c->core_energy); 3705 c->core_throt_cnt = 0; 3706 3707 p->pkg_wtd_core_c0 = 0; 3708 p->pkg_any_core_c0 = 0; 3709 p->pkg_any_gfxe_c0 = 0; 3710 p->pkg_both_core_gfxe_c0 = 0; 3711 3712 p->pc2 = 0; 3713 if (DO_BIC(BIC_Pkgpc3)) 3714 p->pc3 = 0; 3715 if (DO_BIC(BIC_Pkgpc6)) 3716 p->pc6 = 0; 3717 if (DO_BIC(BIC_Pkgpc7)) 3718 p->pc7 = 0; 3719 p->pc8 = 0; 3720 p->pc9 = 0; 3721 p->pc10 = 0; 3722 p->die_c6 = 0; 3723 p->cpu_lpi = 0; 3724 p->sys_lpi = 0; 3725 3726 rapl_counter_clear(&p->energy_pkg); 3727 rapl_counter_clear(&p->energy_dram); 3728 rapl_counter_clear(&p->energy_cores); 3729 rapl_counter_clear(&p->energy_gfx); 3730 rapl_counter_clear(&p->rapl_pkg_perf_status); 3731 rapl_counter_clear(&p->rapl_dram_perf_status); 3732 p->pkg_temp_c = 0; 3733 3734 p->gfx_rc6_ms = 0; 3735 p->uncore_mhz = 0; 3736 p->gfx_mhz = 0; 3737 p->gfx_act_mhz = 0; 3738 p->sam_mc6_ms = 0; 3739 p->sam_mhz = 0; 3740 p->sam_act_mhz = 0; 3741 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) 3742 t->counter[i] = 0; 3743 3744 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) 3745 c->counter[i] = 0; 3746 3747 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) 3748 p->counter[i] = 0; 3749 3750 memset(&t->perf_counter[0], 0, sizeof(t->perf_counter)); 3751 memset(&c->perf_counter[0], 0, sizeof(c->perf_counter)); 3752 memset(&p->perf_counter[0], 0, sizeof(p->perf_counter)); 3753 3754 memset(&t->pmt_counter[0], 0, ARRAY_SIZE(t->pmt_counter)); 3755 memset(&c->pmt_counter[0], 0, ARRAY_SIZE(c->pmt_counter)); 3756 memset(&p->pmt_counter[0], 0, ARRAY_SIZE(p->pmt_counter)); 3757 } 3758 3759 void rapl_counter_accumulate(struct rapl_counter *dst, const struct rapl_counter *src) 3760 { 3761 /* Copy unit and scale from src if dst is not initialized */ 3762 if (dst->unit == RAPL_UNIT_INVALID) { 3763 dst->unit = src->unit; 3764 dst->scale = src->scale; 3765 } 3766 3767 assert(dst->unit == src->unit); 3768 assert(dst->scale == src->scale); 3769 3770 dst->raw_value += src->raw_value; 3771 } 3772 3773 int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 3774 { 3775 int i; 3776 struct msr_counter *mp; 3777 struct perf_counter_info *pp; 3778 struct pmt_counter *ppmt; 3779 3780 /* copy un-changing apic_id's */ 3781 if (DO_BIC(BIC_APIC)) 3782 average.threads.apic_id = t->apic_id; 3783 if (DO_BIC(BIC_X2APIC)) 3784 average.threads.x2apic_id = t->x2apic_id; 3785 3786 /* remember first tv_begin */ 3787 if (average.threads.tv_begin.tv_sec == 0) 3788 average.threads.tv_begin = procsysfs_tv_begin; 3789 3790 /* remember last tv_end */ 3791 average.threads.tv_end = t->tv_end; 3792 3793 average.threads.tsc += t->tsc; 3794 average.threads.aperf += t->aperf; 3795 average.threads.mperf += t->mperf; 3796 average.threads.c1 += t->c1; 3797 3798 average.threads.instr_count += t->instr_count; 3799 3800 average.threads.irq_count += t->irq_count; 3801 average.threads.nmi_count += t->nmi_count; 3802 average.threads.smi_count += t->smi_count; 3803 3804 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 3805 if (mp->format == FORMAT_RAW) 3806 continue; 3807 average.threads.counter[i] += t->counter[i]; 3808 } 3809 3810 for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) { 3811 if (pp->format == FORMAT_RAW) 3812 continue; 3813 average.threads.perf_counter[i] += t->perf_counter[i]; 3814 } 3815 3816 for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { 3817 average.threads.pmt_counter[i] += t->pmt_counter[i]; 3818 } 3819 3820 /* sum per-core values only for 1st thread in core */ 3821 if (!is_cpu_first_thread_in_core(t, c, p)) 3822 return 0; 3823 3824 average.cores.c3 += c->c3; 3825 average.cores.c6 += c->c6; 3826 average.cores.c7 += c->c7; 3827 average.cores.mc6_us += c->mc6_us; 3828 3829 average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c); 3830 average.cores.core_throt_cnt = MAX(average.cores.core_throt_cnt, c->core_throt_cnt); 3831 3832 rapl_counter_accumulate(&average.cores.core_energy, &c->core_energy); 3833 3834 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 3835 if (mp->format == FORMAT_RAW) 3836 continue; 3837 average.cores.counter[i] += c->counter[i]; 3838 } 3839 3840 for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { 3841 if (pp->format == FORMAT_RAW) 3842 continue; 3843 average.cores.perf_counter[i] += c->perf_counter[i]; 3844 } 3845 3846 for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { 3847 average.cores.pmt_counter[i] += c->pmt_counter[i]; 3848 } 3849 3850 /* sum per-pkg values only for 1st core in pkg */ 3851 if (!is_cpu_first_core_in_package(t, c, p)) 3852 return 0; 3853 3854 if (DO_BIC(BIC_Totl_c0)) 3855 average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0; 3856 if (DO_BIC(BIC_Any_c0)) 3857 average.packages.pkg_any_core_c0 += p->pkg_any_core_c0; 3858 if (DO_BIC(BIC_GFX_c0)) 3859 average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0; 3860 if (DO_BIC(BIC_CPUGFX)) 3861 average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0; 3862 3863 average.packages.pc2 += p->pc2; 3864 if (DO_BIC(BIC_Pkgpc3)) 3865 average.packages.pc3 += p->pc3; 3866 if (DO_BIC(BIC_Pkgpc6)) 3867 average.packages.pc6 += p->pc6; 3868 if (DO_BIC(BIC_Pkgpc7)) 3869 average.packages.pc7 += p->pc7; 3870 average.packages.pc8 += p->pc8; 3871 average.packages.pc9 += p->pc9; 3872 average.packages.pc10 += p->pc10; 3873 average.packages.die_c6 += p->die_c6; 3874 3875 average.packages.cpu_lpi = p->cpu_lpi; 3876 average.packages.sys_lpi = p->sys_lpi; 3877 3878 rapl_counter_accumulate(&average.packages.energy_pkg, &p->energy_pkg); 3879 rapl_counter_accumulate(&average.packages.energy_dram, &p->energy_dram); 3880 rapl_counter_accumulate(&average.packages.energy_cores, &p->energy_cores); 3881 rapl_counter_accumulate(&average.packages.energy_gfx, &p->energy_gfx); 3882 3883 average.packages.gfx_rc6_ms = p->gfx_rc6_ms; 3884 average.packages.uncore_mhz = p->uncore_mhz; 3885 average.packages.gfx_mhz = p->gfx_mhz; 3886 average.packages.gfx_act_mhz = p->gfx_act_mhz; 3887 average.packages.sam_mc6_ms = p->sam_mc6_ms; 3888 average.packages.sam_mhz = p->sam_mhz; 3889 average.packages.sam_act_mhz = p->sam_act_mhz; 3890 3891 average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c); 3892 3893 rapl_counter_accumulate(&average.packages.rapl_pkg_perf_status, &p->rapl_pkg_perf_status); 3894 rapl_counter_accumulate(&average.packages.rapl_dram_perf_status, &p->rapl_dram_perf_status); 3895 3896 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 3897 if ((mp->format == FORMAT_RAW) && (topo.num_packages == 0)) 3898 average.packages.counter[i] = p->counter[i]; 3899 else 3900 average.packages.counter[i] += p->counter[i]; 3901 } 3902 3903 for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { 3904 if ((pp->format == FORMAT_RAW) && (topo.num_packages == 0)) 3905 average.packages.perf_counter[i] = p->perf_counter[i]; 3906 else 3907 average.packages.perf_counter[i] += p->perf_counter[i]; 3908 } 3909 3910 for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { 3911 average.packages.pmt_counter[i] += p->pmt_counter[i]; 3912 } 3913 3914 return 0; 3915 } 3916 3917 /* 3918 * sum the counters for all cpus in the system 3919 * compute the weighted average 3920 */ 3921 void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data *p) 3922 { 3923 int i; 3924 struct msr_counter *mp; 3925 struct perf_counter_info *pp; 3926 struct pmt_counter *ppmt; 3927 3928 clear_counters(&average.threads, &average.cores, &average.packages); 3929 3930 for_all_cpus(sum_counters, t, c, p); 3931 3932 /* Use the global time delta for the average. */ 3933 average.threads.tv_delta = tv_delta; 3934 3935 average.threads.tsc /= topo.allowed_cpus; 3936 average.threads.aperf /= topo.allowed_cpus; 3937 average.threads.mperf /= topo.allowed_cpus; 3938 average.threads.instr_count /= topo.allowed_cpus; 3939 average.threads.c1 /= topo.allowed_cpus; 3940 3941 if (average.threads.irq_count > 9999999) 3942 sums_need_wide_columns = 1; 3943 if (average.threads.nmi_count > 9999999) 3944 sums_need_wide_columns = 1; 3945 3946 3947 average.cores.c3 /= topo.allowed_cores; 3948 average.cores.c6 /= topo.allowed_cores; 3949 average.cores.c7 /= topo.allowed_cores; 3950 average.cores.mc6_us /= topo.allowed_cores; 3951 3952 if (DO_BIC(BIC_Totl_c0)) 3953 average.packages.pkg_wtd_core_c0 /= topo.allowed_packages; 3954 if (DO_BIC(BIC_Any_c0)) 3955 average.packages.pkg_any_core_c0 /= topo.allowed_packages; 3956 if (DO_BIC(BIC_GFX_c0)) 3957 average.packages.pkg_any_gfxe_c0 /= topo.allowed_packages; 3958 if (DO_BIC(BIC_CPUGFX)) 3959 average.packages.pkg_both_core_gfxe_c0 /= topo.allowed_packages; 3960 3961 average.packages.pc2 /= topo.allowed_packages; 3962 if (DO_BIC(BIC_Pkgpc3)) 3963 average.packages.pc3 /= topo.allowed_packages; 3964 if (DO_BIC(BIC_Pkgpc6)) 3965 average.packages.pc6 /= topo.allowed_packages; 3966 if (DO_BIC(BIC_Pkgpc7)) 3967 average.packages.pc7 /= topo.allowed_packages; 3968 3969 average.packages.pc8 /= topo.allowed_packages; 3970 average.packages.pc9 /= topo.allowed_packages; 3971 average.packages.pc10 /= topo.allowed_packages; 3972 average.packages.die_c6 /= topo.allowed_packages; 3973 3974 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 3975 if (mp->format == FORMAT_RAW) 3976 continue; 3977 if (mp->type == COUNTER_ITEMS) { 3978 if (average.threads.counter[i] > 9999999) 3979 sums_need_wide_columns = 1; 3980 continue; 3981 } 3982 average.threads.counter[i] /= topo.allowed_cpus; 3983 } 3984 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 3985 if (mp->format == FORMAT_RAW) 3986 continue; 3987 if (mp->type == COUNTER_ITEMS) { 3988 if (average.cores.counter[i] > 9999999) 3989 sums_need_wide_columns = 1; 3990 } 3991 average.cores.counter[i] /= topo.allowed_cores; 3992 } 3993 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 3994 if (mp->format == FORMAT_RAW) 3995 continue; 3996 if (mp->type == COUNTER_ITEMS) { 3997 if (average.packages.counter[i] > 9999999) 3998 sums_need_wide_columns = 1; 3999 } 4000 average.packages.counter[i] /= topo.allowed_packages; 4001 } 4002 4003 for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) { 4004 if (pp->format == FORMAT_RAW) 4005 continue; 4006 if (pp->type == COUNTER_ITEMS) { 4007 if (average.threads.perf_counter[i] > 9999999) 4008 sums_need_wide_columns = 1; 4009 continue; 4010 } 4011 average.threads.perf_counter[i] /= topo.allowed_cpus; 4012 } 4013 for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { 4014 if (pp->format == FORMAT_RAW) 4015 continue; 4016 if (pp->type == COUNTER_ITEMS) { 4017 if (average.cores.perf_counter[i] > 9999999) 4018 sums_need_wide_columns = 1; 4019 } 4020 average.cores.perf_counter[i] /= topo.allowed_cores; 4021 } 4022 for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { 4023 if (pp->format == FORMAT_RAW) 4024 continue; 4025 if (pp->type == COUNTER_ITEMS) { 4026 if (average.packages.perf_counter[i] > 9999999) 4027 sums_need_wide_columns = 1; 4028 } 4029 average.packages.perf_counter[i] /= topo.allowed_packages; 4030 } 4031 4032 for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { 4033 average.threads.pmt_counter[i] /= topo.allowed_cpus; 4034 } 4035 for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { 4036 average.cores.pmt_counter[i] /= topo.allowed_cores; 4037 } 4038 for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { 4039 average.packages.pmt_counter[i] /= topo.allowed_packages; 4040 } 4041 } 4042 4043 static unsigned long long rdtsc(void) 4044 { 4045 unsigned int low, high; 4046 4047 asm volatile ("rdtsc":"=a" (low), "=d"(high)); 4048 4049 return low | ((unsigned long long)high) << 32; 4050 } 4051 4052 /* 4053 * Open a file, and exit on failure 4054 */ 4055 FILE *fopen_or_die(const char *path, const char *mode) 4056 { 4057 FILE *filep = fopen(path, mode); 4058 4059 if (!filep) 4060 err(1, "%s: open failed", path); 4061 return filep; 4062 } 4063 4064 /* 4065 * snapshot_sysfs_counter() 4066 * 4067 * return snapshot of given counter 4068 */ 4069 unsigned long long snapshot_sysfs_counter(char *path) 4070 { 4071 FILE *fp; 4072 int retval; 4073 unsigned long long counter; 4074 4075 fp = fopen_or_die(path, "r"); 4076 4077 retval = fscanf(fp, "%lld", &counter); 4078 if (retval != 1) 4079 err(1, "snapshot_sysfs_counter(%s)", path); 4080 4081 fclose(fp); 4082 4083 return counter; 4084 } 4085 4086 int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp, char *counter_path) 4087 { 4088 if (mp->msr_num != 0) { 4089 assert(!no_msr); 4090 if (get_msr(cpu, mp->msr_num, counterp)) 4091 return -1; 4092 } else { 4093 char path[128 + PATH_BYTES]; 4094 4095 if (mp->flags & SYSFS_PERCPU) { 4096 sprintf(path, "/sys/devices/system/cpu/cpu%d/%s", cpu, mp->sp->path); 4097 4098 *counterp = snapshot_sysfs_counter(path); 4099 } else { 4100 *counterp = snapshot_sysfs_counter(counter_path); 4101 } 4102 } 4103 4104 return 0; 4105 } 4106 4107 unsigned long long get_legacy_uncore_mhz(int package) 4108 { 4109 char path[128]; 4110 int die; 4111 static int warn_once; 4112 4113 /* 4114 * for this package, use the first die_id that exists 4115 */ 4116 for (die = 0; die <= topo.max_die_id; ++die) { 4117 4118 sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d/current_freq_khz", 4119 package, die); 4120 4121 if (access(path, R_OK) == 0) 4122 return (snapshot_sysfs_counter(path) / 1000); 4123 } 4124 if (!warn_once) { 4125 warnx("BUG: %s: No %s", __func__, path); 4126 warn_once = 1; 4127 } 4128 4129 return 0; 4130 } 4131 4132 int get_epb(int cpu) 4133 { 4134 char path[128 + PATH_BYTES]; 4135 unsigned long long msr; 4136 int ret, epb = -1; 4137 FILE *fp; 4138 4139 sprintf(path, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", cpu); 4140 4141 fp = fopen(path, "r"); 4142 if (!fp) 4143 goto msr_fallback; 4144 4145 ret = fscanf(fp, "%d", &epb); 4146 if (ret != 1) 4147 err(1, "%s(%s)", __func__, path); 4148 4149 fclose(fp); 4150 4151 return epb; 4152 4153 msr_fallback: 4154 if (no_msr) 4155 return -1; 4156 4157 get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr); 4158 4159 return msr & 0xf; 4160 } 4161 4162 void get_apic_id(struct thread_data *t) 4163 { 4164 unsigned int eax, ebx, ecx, edx; 4165 4166 if (DO_BIC(BIC_APIC)) { 4167 eax = ebx = ecx = edx = 0; 4168 __cpuid(1, eax, ebx, ecx, edx); 4169 4170 t->apic_id = (ebx >> 24) & 0xff; 4171 } 4172 4173 if (!DO_BIC(BIC_X2APIC)) 4174 return; 4175 4176 if (authentic_amd || hygon_genuine) { 4177 unsigned int topology_extensions; 4178 4179 if (max_extended_level < 0x8000001e) 4180 return; 4181 4182 eax = ebx = ecx = edx = 0; 4183 __cpuid(0x80000001, eax, ebx, ecx, edx); 4184 topology_extensions = ecx & (1 << 22); 4185 4186 if (topology_extensions == 0) 4187 return; 4188 4189 eax = ebx = ecx = edx = 0; 4190 __cpuid(0x8000001e, eax, ebx, ecx, edx); 4191 4192 t->x2apic_id = eax; 4193 return; 4194 } 4195 4196 if (!genuine_intel) 4197 return; 4198 4199 if (max_level < 0xb) 4200 return; 4201 4202 ecx = 0; 4203 __cpuid(0xb, eax, ebx, ecx, edx); 4204 t->x2apic_id = edx; 4205 4206 if (debug && (t->apic_id != (t->x2apic_id & 0xff))) 4207 fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id); 4208 } 4209 4210 int get_core_throt_cnt(int cpu, unsigned long long *cnt) 4211 { 4212 char path[128 + PATH_BYTES]; 4213 unsigned long long tmp; 4214 FILE *fp; 4215 int ret; 4216 4217 sprintf(path, "/sys/devices/system/cpu/cpu%d/thermal_throttle/core_throttle_count", cpu); 4218 fp = fopen(path, "r"); 4219 if (!fp) 4220 return -1; 4221 ret = fscanf(fp, "%lld", &tmp); 4222 fclose(fp); 4223 if (ret != 1) 4224 return -1; 4225 *cnt = tmp; 4226 4227 return 0; 4228 } 4229 4230 struct amperf_group_fd { 4231 int aperf; /* Also the group descriptor */ 4232 int mperf; 4233 }; 4234 4235 static int read_perf_counter_info(const char *const path, const char *const parse_format, void *value_ptr) 4236 { 4237 int fdmt; 4238 int bytes_read; 4239 char buf[64]; 4240 int ret = -1; 4241 4242 fdmt = open(path, O_RDONLY, 0); 4243 if (fdmt == -1) { 4244 if (debug) 4245 fprintf(stderr, "Failed to parse perf counter info %s\n", path); 4246 ret = -1; 4247 goto cleanup_and_exit; 4248 } 4249 4250 bytes_read = read(fdmt, buf, sizeof(buf) - 1); 4251 if (bytes_read <= 0 || bytes_read >= (int)sizeof(buf)) { 4252 if (debug) 4253 fprintf(stderr, "Failed to parse perf counter info %s\n", path); 4254 ret = -1; 4255 goto cleanup_and_exit; 4256 } 4257 4258 buf[bytes_read] = '\0'; 4259 4260 if (sscanf(buf, parse_format, value_ptr) != 1) { 4261 if (debug) 4262 fprintf(stderr, "Failed to parse perf counter info %s\n", path); 4263 ret = -1; 4264 goto cleanup_and_exit; 4265 } 4266 4267 ret = 0; 4268 4269 cleanup_and_exit: 4270 close(fdmt); 4271 return ret; 4272 } 4273 4274 static unsigned int read_perf_counter_info_n(const char *const path, const char *const parse_format) 4275 { 4276 unsigned int v; 4277 int status; 4278 4279 status = read_perf_counter_info(path, parse_format, &v); 4280 if (status) 4281 v = -1; 4282 4283 return v; 4284 } 4285 4286 static unsigned int read_perf_type(const char *subsys) 4287 { 4288 const char *const path_format = "/sys/bus/event_source/devices/%s/type"; 4289 const char *const format = "%u"; 4290 char path[128]; 4291 4292 snprintf(path, sizeof(path), path_format, subsys); 4293 4294 return read_perf_counter_info_n(path, format); 4295 } 4296 4297 static unsigned int read_perf_config(const char *subsys, const char *event_name) 4298 { 4299 const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s"; 4300 FILE *fconfig = NULL; 4301 char path[128]; 4302 char config_str[64]; 4303 unsigned int config; 4304 unsigned int umask; 4305 bool has_config = false; 4306 bool has_umask = false; 4307 unsigned int ret = -1; 4308 4309 snprintf(path, sizeof(path), path_format, subsys, event_name); 4310 4311 fconfig = fopen(path, "r"); 4312 if (!fconfig) 4313 return -1; 4314 4315 if (fgets(config_str, ARRAY_SIZE(config_str), fconfig) != config_str) 4316 goto cleanup_and_exit; 4317 4318 for (char *pconfig_str = &config_str[0]; pconfig_str;) { 4319 if (sscanf(pconfig_str, "event=%x", &config) == 1) { 4320 has_config = true; 4321 goto next; 4322 } 4323 4324 if (sscanf(pconfig_str, "umask=%x", &umask) == 1) { 4325 has_umask = true; 4326 goto next; 4327 } 4328 4329 next: 4330 pconfig_str = strchr(pconfig_str, ','); 4331 if (pconfig_str) { 4332 *pconfig_str = '\0'; 4333 ++pconfig_str; 4334 } 4335 } 4336 4337 if (!has_umask) 4338 umask = 0; 4339 4340 if (has_config) 4341 ret = (umask << 8) | config; 4342 4343 cleanup_and_exit: 4344 fclose(fconfig); 4345 return ret; 4346 } 4347 4348 static unsigned int read_perf_rapl_unit(const char *subsys, const char *event_name) 4349 { 4350 const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.unit"; 4351 const char *const format = "%s"; 4352 char path[128]; 4353 char unit_buffer[16]; 4354 4355 snprintf(path, sizeof(path), path_format, subsys, event_name); 4356 4357 read_perf_counter_info(path, format, &unit_buffer); 4358 if (strcmp("Joules", unit_buffer) == 0) 4359 return RAPL_UNIT_JOULES; 4360 4361 return RAPL_UNIT_INVALID; 4362 } 4363 4364 static double read_perf_scale(const char *subsys, const char *event_name) 4365 { 4366 const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.scale"; 4367 const char *const format = "%lf"; 4368 char path[128]; 4369 double scale; 4370 4371 snprintf(path, sizeof(path), path_format, subsys, event_name); 4372 4373 if (read_perf_counter_info(path, format, &scale)) 4374 return 0.0; 4375 4376 return scale; 4377 } 4378 4379 size_t rapl_counter_info_count_perf(const struct rapl_counter_info_t *rci) 4380 { 4381 size_t ret = 0; 4382 4383 for (int i = 0; i < NUM_RAPL_COUNTERS; ++i) 4384 if (rci->source[i] == COUNTER_SOURCE_PERF) 4385 ++ret; 4386 4387 return ret; 4388 } 4389 4390 static size_t cstate_counter_info_count_perf(const struct cstate_counter_info_t *cci) 4391 { 4392 size_t ret = 0; 4393 4394 for (int i = 0; i < NUM_CSTATE_COUNTERS; ++i) 4395 if (cci->source[i] == COUNTER_SOURCE_PERF) 4396 ++ret; 4397 4398 return ret; 4399 } 4400 4401 void write_rapl_counter(struct rapl_counter *rc, struct rapl_counter_info_t *rci, unsigned int idx) 4402 { 4403 if (rci->source[idx] == COUNTER_SOURCE_NONE) 4404 return; 4405 4406 rc->raw_value = rci->data[idx]; 4407 rc->unit = rci->unit[idx]; 4408 rc->scale = rci->scale[idx]; 4409 } 4410 4411 int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct pkg_data *p) 4412 { 4413 struct platform_counters *pplat_cnt = p == package_odd ? &platform_counters_odd : &platform_counters_even; 4414 unsigned long long perf_data[NUM_RAPL_COUNTERS + 1]; 4415 struct rapl_counter_info_t *rci; 4416 4417 if (debug >= 2) 4418 fprintf(stderr, "%s: cpu%d domain%d\n", __func__, cpu, domain); 4419 4420 assert(rapl_counter_info_perdomain); 4421 assert(domain < rapl_counter_info_perdomain_size); 4422 4423 rci = &rapl_counter_info_perdomain[domain]; 4424 4425 /* 4426 * If we have any perf counters to read, read them all now, in bulk 4427 */ 4428 if (rci->fd_perf != -1) { 4429 size_t num_perf_counters = rapl_counter_info_count_perf(rci); 4430 const ssize_t expected_read_size = (num_perf_counters + 1) * sizeof(unsigned long long); 4431 const ssize_t actual_read_size = read(rci->fd_perf, &perf_data[0], sizeof(perf_data)); 4432 4433 if (actual_read_size != expected_read_size) 4434 err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, 4435 actual_read_size); 4436 } 4437 4438 for (unsigned int i = 0, pi = 1; i < NUM_RAPL_COUNTERS; ++i) { 4439 switch (rci->source[i]) { 4440 case COUNTER_SOURCE_NONE: 4441 rci->data[i] = 0; 4442 break; 4443 4444 case COUNTER_SOURCE_PERF: 4445 assert(pi < ARRAY_SIZE(perf_data)); 4446 assert(rci->fd_perf != -1); 4447 4448 if (debug >= 2) 4449 fprintf(stderr, "Reading rapl counter via perf at %u (%llu %e %lf)\n", 4450 i, perf_data[pi], rci->scale[i], perf_data[pi] * rci->scale[i]); 4451 4452 rci->data[i] = perf_data[pi]; 4453 4454 ++pi; 4455 break; 4456 4457 case COUNTER_SOURCE_MSR: 4458 if (debug >= 2) 4459 fprintf(stderr, "Reading rapl counter via msr at %u\n", i); 4460 4461 assert(!no_msr); 4462 if (rci->flags[i] & RAPL_COUNTER_FLAG_USE_MSR_SUM) { 4463 if (get_msr_sum(cpu, rci->msr[i], &rci->data[i])) 4464 return -13 - i; 4465 } else { 4466 if (get_msr(cpu, rci->msr[i], &rci->data[i])) 4467 return -13 - i; 4468 } 4469 4470 rci->data[i] &= rci->msr_mask[i]; 4471 if (rci->msr_shift[i] >= 0) 4472 rci->data[i] >>= abs(rci->msr_shift[i]); 4473 else 4474 rci->data[i] <<= abs(rci->msr_shift[i]); 4475 4476 break; 4477 } 4478 } 4479 4480 BUILD_BUG_ON(NUM_RAPL_COUNTERS != 8); 4481 write_rapl_counter(&p->energy_pkg, rci, RAPL_RCI_INDEX_ENERGY_PKG); 4482 write_rapl_counter(&p->energy_cores, rci, RAPL_RCI_INDEX_ENERGY_CORES); 4483 write_rapl_counter(&p->energy_dram, rci, RAPL_RCI_INDEX_DRAM); 4484 write_rapl_counter(&p->energy_gfx, rci, RAPL_RCI_INDEX_GFX); 4485 write_rapl_counter(&p->rapl_pkg_perf_status, rci, RAPL_RCI_INDEX_PKG_PERF_STATUS); 4486 write_rapl_counter(&p->rapl_dram_perf_status, rci, RAPL_RCI_INDEX_DRAM_PERF_STATUS); 4487 write_rapl_counter(&c->core_energy, rci, RAPL_RCI_INDEX_CORE_ENERGY); 4488 write_rapl_counter(&pplat_cnt->energy_psys, rci, RAPL_RCI_INDEX_ENERGY_PLATFORM); 4489 4490 return 0; 4491 } 4492 4493 char *find_sysfs_path_by_id(struct sysfs_path *sp, int id) 4494 { 4495 while (sp) { 4496 if (sp->id == id) 4497 return (sp->path); 4498 sp = sp->next; 4499 } 4500 if (debug) 4501 warnx("%s: id%d not found", __func__, id); 4502 return NULL; 4503 } 4504 4505 int get_cstate_counters(unsigned int cpu, struct thread_data *t, struct core_data *c, struct pkg_data *p) 4506 { 4507 /* 4508 * Overcommit memory a little bit here, 4509 * but skip calculating exact sizes for the buffers. 4510 */ 4511 unsigned long long perf_data[NUM_CSTATE_COUNTERS]; 4512 unsigned long long perf_data_core[NUM_CSTATE_COUNTERS + 1]; 4513 unsigned long long perf_data_pkg[NUM_CSTATE_COUNTERS + 1]; 4514 4515 struct cstate_counter_info_t *cci; 4516 4517 if (debug >= 2) 4518 fprintf(stderr, "%s: cpu%d\n", __func__, cpu); 4519 4520 assert(ccstate_counter_info); 4521 assert(cpu <= ccstate_counter_info_size); 4522 4523 ZERO_ARRAY(perf_data); 4524 ZERO_ARRAY(perf_data_core); 4525 ZERO_ARRAY(perf_data_pkg); 4526 4527 cci = &ccstate_counter_info[cpu]; 4528 4529 /* 4530 * If we have any perf counters to read, read them all now, in bulk 4531 */ 4532 const size_t num_perf_counters = cstate_counter_info_count_perf(cci); 4533 ssize_t expected_read_size = num_perf_counters * sizeof(unsigned long long); 4534 ssize_t actual_read_size_core = 0, actual_read_size_pkg = 0; 4535 4536 if (cci->fd_perf_core != -1) { 4537 /* Each descriptor read begins with number of counters read. */ 4538 expected_read_size += sizeof(unsigned long long); 4539 4540 actual_read_size_core = read(cci->fd_perf_core, &perf_data_core[0], sizeof(perf_data_core)); 4541 4542 if (actual_read_size_core <= 0) 4543 err(-1, "%s: read perf %s: %ld", __func__, "core", actual_read_size_core); 4544 } 4545 4546 if (cci->fd_perf_pkg != -1) { 4547 /* Each descriptor read begins with number of counters read. */ 4548 expected_read_size += sizeof(unsigned long long); 4549 4550 actual_read_size_pkg = read(cci->fd_perf_pkg, &perf_data_pkg[0], sizeof(perf_data_pkg)); 4551 4552 if (actual_read_size_pkg <= 0) 4553 err(-1, "%s: read perf %s: %ld", __func__, "pkg", actual_read_size_pkg); 4554 } 4555 4556 const ssize_t actual_read_size_total = actual_read_size_core + actual_read_size_pkg; 4557 4558 if (actual_read_size_total != expected_read_size) 4559 err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, actual_read_size_total); 4560 4561 /* 4562 * Copy ccstate and pcstate data into unified buffer. 4563 * 4564 * Skip first element from core and pkg buffers. 4565 * Kernel puts there how many counters were read. 4566 */ 4567 const size_t num_core_counters = perf_data_core[0]; 4568 const size_t num_pkg_counters = perf_data_pkg[0]; 4569 4570 assert(num_perf_counters == num_core_counters + num_pkg_counters); 4571 4572 /* Copy ccstate perf data */ 4573 memcpy(&perf_data[0], &perf_data_core[1], num_core_counters * sizeof(unsigned long long)); 4574 4575 /* Copy pcstate perf data */ 4576 memcpy(&perf_data[num_core_counters], &perf_data_pkg[1], num_pkg_counters * sizeof(unsigned long long)); 4577 4578 for (unsigned int i = 0, pi = 0; i < NUM_CSTATE_COUNTERS; ++i) { 4579 switch (cci->source[i]) { 4580 case COUNTER_SOURCE_NONE: 4581 break; 4582 4583 case COUNTER_SOURCE_PERF: 4584 assert(pi < ARRAY_SIZE(perf_data)); 4585 assert(cci->fd_perf_core != -1 || cci->fd_perf_pkg != -1); 4586 4587 if (debug >= 2) 4588 fprintf(stderr, "cstate via %s %u: %llu\n", "perf", i, perf_data[pi]); 4589 4590 cci->data[i] = perf_data[pi]; 4591 4592 ++pi; 4593 break; 4594 4595 case COUNTER_SOURCE_MSR: 4596 assert(!no_msr); 4597 if (get_msr(cpu, cci->msr[i], &cci->data[i])) 4598 return -13 - i; 4599 4600 if (debug >= 2) 4601 fprintf(stderr, "cstate via %s0x%llx %u: %llu\n", "msr", cci->msr[i], i, cci->data[i]); 4602 4603 break; 4604 } 4605 } 4606 4607 /* 4608 * Helper to write the data only if the source of 4609 * the counter for the current cpu is not none. 4610 * 4611 * Otherwise we would overwrite core data with 0 (default value), 4612 * when invoked for the thread sibling. 4613 */ 4614 #define PERF_COUNTER_WRITE_DATA(out_counter, index) do { \ 4615 if (cci->source[index] != COUNTER_SOURCE_NONE) \ 4616 out_counter = cci->data[index]; \ 4617 } while (0) 4618 4619 BUILD_BUG_ON(NUM_CSTATE_COUNTERS != 11); 4620 4621 PERF_COUNTER_WRITE_DATA(t->c1, CCSTATE_RCI_INDEX_C1_RESIDENCY); 4622 PERF_COUNTER_WRITE_DATA(c->c3, CCSTATE_RCI_INDEX_C3_RESIDENCY); 4623 PERF_COUNTER_WRITE_DATA(c->c6, CCSTATE_RCI_INDEX_C6_RESIDENCY); 4624 PERF_COUNTER_WRITE_DATA(c->c7, CCSTATE_RCI_INDEX_C7_RESIDENCY); 4625 4626 PERF_COUNTER_WRITE_DATA(p->pc2, PCSTATE_RCI_INDEX_C2_RESIDENCY); 4627 PERF_COUNTER_WRITE_DATA(p->pc3, PCSTATE_RCI_INDEX_C3_RESIDENCY); 4628 PERF_COUNTER_WRITE_DATA(p->pc6, PCSTATE_RCI_INDEX_C6_RESIDENCY); 4629 PERF_COUNTER_WRITE_DATA(p->pc7, PCSTATE_RCI_INDEX_C7_RESIDENCY); 4630 PERF_COUNTER_WRITE_DATA(p->pc8, PCSTATE_RCI_INDEX_C8_RESIDENCY); 4631 PERF_COUNTER_WRITE_DATA(p->pc9, PCSTATE_RCI_INDEX_C9_RESIDENCY); 4632 PERF_COUNTER_WRITE_DATA(p->pc10, PCSTATE_RCI_INDEX_C10_RESIDENCY); 4633 4634 #undef PERF_COUNTER_WRITE_DATA 4635 4636 return 0; 4637 } 4638 4639 size_t msr_counter_info_count_perf(const struct msr_counter_info_t *mci) 4640 { 4641 size_t ret = 0; 4642 4643 for (int i = 0; i < NUM_MSR_COUNTERS; ++i) 4644 if (mci->source[i] == COUNTER_SOURCE_PERF) 4645 ++ret; 4646 4647 return ret; 4648 } 4649 4650 int get_smi_aperf_mperf(unsigned int cpu, struct thread_data *t) 4651 { 4652 unsigned long long perf_data[NUM_MSR_COUNTERS + 1]; 4653 4654 struct msr_counter_info_t *mci; 4655 4656 if (debug >= 2) 4657 fprintf(stderr, "%s: cpu%d\n", __func__, cpu); 4658 4659 assert(msr_counter_info); 4660 assert(cpu <= msr_counter_info_size); 4661 4662 mci = &msr_counter_info[cpu]; 4663 4664 ZERO_ARRAY(perf_data); 4665 ZERO_ARRAY(mci->data); 4666 4667 if (mci->fd_perf != -1) { 4668 const size_t num_perf_counters = msr_counter_info_count_perf(mci); 4669 const ssize_t expected_read_size = (num_perf_counters + 1) * sizeof(unsigned long long); 4670 const ssize_t actual_read_size = read(mci->fd_perf, &perf_data[0], sizeof(perf_data)); 4671 4672 if (actual_read_size != expected_read_size) 4673 err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, 4674 actual_read_size); 4675 } 4676 4677 for (unsigned int i = 0, pi = 1; i < NUM_MSR_COUNTERS; ++i) { 4678 switch (mci->source[i]) { 4679 case COUNTER_SOURCE_NONE: 4680 break; 4681 4682 case COUNTER_SOURCE_PERF: 4683 assert(pi < ARRAY_SIZE(perf_data)); 4684 assert(mci->fd_perf != -1); 4685 4686 if (debug >= 2) 4687 fprintf(stderr, "Reading msr counter via perf at %u: %llu\n", i, perf_data[pi]); 4688 4689 mci->data[i] = perf_data[pi]; 4690 4691 ++pi; 4692 break; 4693 4694 case COUNTER_SOURCE_MSR: 4695 assert(!no_msr); 4696 4697 if (get_msr(cpu, mci->msr[i], &mci->data[i])) 4698 return -2 - i; 4699 4700 mci->data[i] &= mci->msr_mask[i]; 4701 4702 if (debug >= 2) 4703 fprintf(stderr, "Reading msr counter via msr at %u: %llu\n", i, mci->data[i]); 4704 4705 break; 4706 } 4707 } 4708 4709 BUILD_BUG_ON(NUM_MSR_COUNTERS != 3); 4710 t->aperf = mci->data[MSR_RCI_INDEX_APERF]; 4711 t->mperf = mci->data[MSR_RCI_INDEX_MPERF]; 4712 t->smi_count = mci->data[MSR_RCI_INDEX_SMI]; 4713 4714 return 0; 4715 } 4716 4717 int perf_counter_info_read_values(struct perf_counter_info *pp, int cpu, unsigned long long *out, size_t out_size) 4718 { 4719 unsigned int domain; 4720 unsigned long long value; 4721 int fd_counter; 4722 4723 for (size_t i = 0; pp; ++i, pp = pp->next) { 4724 domain = cpu_to_domain(pp, cpu); 4725 assert(domain < pp->num_domains); 4726 4727 fd_counter = pp->fd_perf_per_domain[domain]; 4728 4729 if (fd_counter == -1) 4730 continue; 4731 4732 if (read(fd_counter, &value, sizeof(value)) != sizeof(value)) 4733 return 1; 4734 4735 assert(i < out_size); 4736 out[i] = value * pp->scale; 4737 } 4738 4739 return 0; 4740 } 4741 4742 unsigned long pmt_gen_value_mask(unsigned int lsb, unsigned int msb) 4743 { 4744 unsigned long mask; 4745 4746 if (msb == 63) 4747 mask = 0xffffffffffffffff; 4748 else 4749 mask = ((1 << (msb + 1)) - 1); 4750 4751 mask -= (1 << lsb) - 1; 4752 4753 return mask; 4754 } 4755 4756 unsigned long pmt_read_counter(struct pmt_counter *ppmt, unsigned int domain_id) 4757 { 4758 if (domain_id >= ppmt->num_domains) 4759 return 0; 4760 4761 const unsigned long *pmmio = ppmt->domains[domain_id].pcounter; 4762 const unsigned long value = pmmio ? *pmmio : 0; 4763 const unsigned long value_mask = pmt_gen_value_mask(ppmt->lsb, ppmt->msb); 4764 const unsigned long value_shift = ppmt->lsb; 4765 4766 return (value & value_mask) >> value_shift; 4767 } 4768 4769 /* 4770 * get_counters(...) 4771 * migrate to cpu 4772 * acquire and record local counters for that cpu 4773 */ 4774 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 4775 { 4776 int cpu = t->cpu_id; 4777 unsigned long long msr; 4778 struct msr_counter *mp; 4779 struct pmt_counter *pp; 4780 int i; 4781 int status; 4782 4783 if (cpu_migrate(cpu)) { 4784 fprintf(outf, "%s: Could not migrate to CPU %d\n", __func__, cpu); 4785 return -1; 4786 } 4787 4788 gettimeofday(&t->tv_begin, (struct timezone *)NULL); 4789 4790 if (first_counter_read) 4791 get_apic_id(t); 4792 4793 t->tsc = rdtsc(); /* we are running on local CPU of interest */ 4794 4795 get_smi_aperf_mperf(cpu, t); 4796 4797 if (DO_BIC(BIC_IPC)) 4798 if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long)) 4799 return -4; 4800 4801 if (DO_BIC(BIC_IRQ)) 4802 t->irq_count = irqs_per_cpu[cpu]; 4803 if (DO_BIC(BIC_NMI)) 4804 t->nmi_count = nmi_per_cpu[cpu]; 4805 4806 get_cstate_counters(cpu, t, c, p); 4807 4808 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 4809 if (get_mp(cpu, mp, &t->counter[i], mp->sp->path)) 4810 return -10; 4811 } 4812 4813 if (perf_counter_info_read_values(sys.perf_tp, cpu, t->perf_counter, MAX_ADDED_THREAD_COUNTERS)) 4814 return -10; 4815 4816 for (i = 0, pp = sys.pmt_tp; pp; i++, pp = pp->next) 4817 t->pmt_counter[i] = pmt_read_counter(pp, t->cpu_id); 4818 4819 /* collect core counters only for 1st thread in core */ 4820 if (!is_cpu_first_thread_in_core(t, c, p)) 4821 goto done; 4822 4823 if (platform->has_per_core_rapl) { 4824 status = get_rapl_counters(cpu, c->core_id, c, p); 4825 if (status != 0) 4826 return status; 4827 } 4828 4829 if (DO_BIC(BIC_CPU_c7) && t->is_atom) { 4830 /* 4831 * For Atom CPUs that has core cstate deeper than c6, 4832 * MSR_CORE_C6_RESIDENCY returns residency of cc6 and deeper. 4833 * Minus CC7 (and deeper cstates) residency to get 4834 * accturate cc6 residency. 4835 */ 4836 c->c6 -= c->c7; 4837 } 4838 4839 if (DO_BIC(BIC_Mod_c6)) 4840 if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us)) 4841 return -8; 4842 4843 if (DO_BIC(BIC_CoreTmp)) { 4844 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) 4845 return -9; 4846 c->core_temp_c = tj_max - ((msr >> 16) & 0x7F); 4847 } 4848 4849 if (DO_BIC(BIC_CORE_THROT_CNT)) 4850 get_core_throt_cnt(cpu, &c->core_throt_cnt); 4851 4852 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 4853 if (get_mp(cpu, mp, &c->counter[i], mp->sp->path)) 4854 return -10; 4855 } 4856 4857 if (perf_counter_info_read_values(sys.perf_cp, cpu, c->perf_counter, MAX_ADDED_CORE_COUNTERS)) 4858 return -10; 4859 4860 for (i = 0, pp = sys.pmt_cp; pp; i++, pp = pp->next) 4861 c->pmt_counter[i] = pmt_read_counter(pp, c->core_id); 4862 4863 /* collect package counters only for 1st core in package */ 4864 if (!is_cpu_first_core_in_package(t, c, p)) 4865 goto done; 4866 4867 if (DO_BIC(BIC_Totl_c0)) { 4868 if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0)) 4869 return -10; 4870 } 4871 if (DO_BIC(BIC_Any_c0)) { 4872 if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0)) 4873 return -11; 4874 } 4875 if (DO_BIC(BIC_GFX_c0)) { 4876 if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0)) 4877 return -12; 4878 } 4879 if (DO_BIC(BIC_CPUGFX)) { 4880 if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0)) 4881 return -13; 4882 } 4883 4884 if (DO_BIC(BIC_CPU_LPI)) 4885 p->cpu_lpi = cpuidle_cur_cpu_lpi_us; 4886 if (DO_BIC(BIC_SYS_LPI)) 4887 p->sys_lpi = cpuidle_cur_sys_lpi_us; 4888 4889 if (!platform->has_per_core_rapl) { 4890 status = get_rapl_counters(cpu, p->package_id, c, p); 4891 if (status != 0) 4892 return status; 4893 } 4894 4895 if (DO_BIC(BIC_PkgTmp)) { 4896 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) 4897 return -17; 4898 p->pkg_temp_c = tj_max - ((msr >> 16) & 0x7F); 4899 } 4900 4901 if (DO_BIC(BIC_UNCORE_MHZ)) 4902 p->uncore_mhz = get_legacy_uncore_mhz(p->package_id); 4903 4904 if (DO_BIC(BIC_GFX_rc6)) 4905 p->gfx_rc6_ms = gfx_info[GFX_rc6].val_ull; 4906 4907 if (DO_BIC(BIC_GFXMHz)) 4908 p->gfx_mhz = gfx_info[GFX_MHz].val; 4909 4910 if (DO_BIC(BIC_GFXACTMHz)) 4911 p->gfx_act_mhz = gfx_info[GFX_ACTMHz].val; 4912 4913 if (DO_BIC(BIC_SAM_mc6)) 4914 p->sam_mc6_ms = gfx_info[SAM_mc6].val_ull; 4915 4916 if (DO_BIC(BIC_SAMMHz)) 4917 p->sam_mhz = gfx_info[SAM_MHz].val; 4918 4919 if (DO_BIC(BIC_SAMACTMHz)) 4920 p->sam_act_mhz = gfx_info[SAM_ACTMHz].val; 4921 4922 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 4923 char *path = NULL; 4924 4925 if (mp->msr_num == 0) { 4926 path = find_sysfs_path_by_id(mp->sp, p->package_id); 4927 if (path == NULL) { 4928 warnx("%s: package_id %d not found", __func__, p->package_id); 4929 return -10; 4930 } 4931 } 4932 if (get_mp(cpu, mp, &p->counter[i], path)) 4933 return -10; 4934 } 4935 4936 if (perf_counter_info_read_values(sys.perf_pp, cpu, p->perf_counter, MAX_ADDED_PACKAGE_COUNTERS)) 4937 return -10; 4938 4939 for (i = 0, pp = sys.pmt_pp; pp; i++, pp = pp->next) 4940 p->pmt_counter[i] = pmt_read_counter(pp, p->package_id); 4941 4942 done: 4943 gettimeofday(&t->tv_end, (struct timezone *)NULL); 4944 4945 return 0; 4946 } 4947 4948 int pkg_cstate_limit = PCLUKN; 4949 char *pkg_cstate_limit_strings[] = { "unknown", "reserved", "pc0", "pc1", "pc2", 4950 "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited" 4951 }; 4952 4953 int nhm_pkg_cstate_limits[16] = 4954 { PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4955 PCLRSV, PCLRSV 4956 }; 4957 4958 int snb_pkg_cstate_limits[16] = 4959 { PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4960 PCLRSV, PCLRSV 4961 }; 4962 4963 int hsw_pkg_cstate_limits[16] = 4964 { PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4965 PCLRSV, PCLRSV 4966 }; 4967 4968 int slv_pkg_cstate_limits[16] = 4969 { PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4970 PCL__6, PCL__7 4971 }; 4972 4973 int amt_pkg_cstate_limits[16] = 4974 { PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4975 PCLRSV, PCLRSV 4976 }; 4977 4978 int phi_pkg_cstate_limits[16] = 4979 { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4980 PCLRSV, PCLRSV 4981 }; 4982 4983 int glm_pkg_cstate_limits[16] = 4984 { PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4985 PCLRSV, PCLRSV 4986 }; 4987 4988 int skx_pkg_cstate_limits[16] = 4989 { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4990 PCLRSV, PCLRSV 4991 }; 4992 4993 int icx_pkg_cstate_limits[16] = 4994 { PCL__0, PCL__2, PCL__6, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4995 PCLRSV, PCLRSV 4996 }; 4997 4998 void probe_cst_limit(void) 4999 { 5000 unsigned long long msr; 5001 int *pkg_cstate_limits; 5002 5003 if (!platform->has_nhm_msrs || no_msr) 5004 return; 5005 5006 switch (platform->cst_limit) { 5007 case CST_LIMIT_NHM: 5008 pkg_cstate_limits = nhm_pkg_cstate_limits; 5009 break; 5010 case CST_LIMIT_SNB: 5011 pkg_cstate_limits = snb_pkg_cstate_limits; 5012 break; 5013 case CST_LIMIT_HSW: 5014 pkg_cstate_limits = hsw_pkg_cstate_limits; 5015 break; 5016 case CST_LIMIT_SKX: 5017 pkg_cstate_limits = skx_pkg_cstate_limits; 5018 break; 5019 case CST_LIMIT_ICX: 5020 pkg_cstate_limits = icx_pkg_cstate_limits; 5021 break; 5022 case CST_LIMIT_SLV: 5023 pkg_cstate_limits = slv_pkg_cstate_limits; 5024 break; 5025 case CST_LIMIT_AMT: 5026 pkg_cstate_limits = amt_pkg_cstate_limits; 5027 break; 5028 case CST_LIMIT_KNL: 5029 pkg_cstate_limits = phi_pkg_cstate_limits; 5030 break; 5031 case CST_LIMIT_GMT: 5032 pkg_cstate_limits = glm_pkg_cstate_limits; 5033 break; 5034 default: 5035 return; 5036 } 5037 5038 get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr); 5039 pkg_cstate_limit = pkg_cstate_limits[msr & 0xF]; 5040 } 5041 5042 static void dump_platform_info(void) 5043 { 5044 unsigned long long msr; 5045 unsigned int ratio; 5046 5047 if (!platform->has_nhm_msrs || no_msr) 5048 return; 5049 5050 get_msr(base_cpu, MSR_PLATFORM_INFO, &msr); 5051 5052 fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr); 5053 5054 ratio = (msr >> 40) & 0xFF; 5055 fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n", ratio, bclk, ratio * bclk); 5056 5057 ratio = (msr >> 8) & 0xFF; 5058 fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk); 5059 } 5060 5061 static void dump_power_ctl(void) 5062 { 5063 unsigned long long msr; 5064 5065 if (!platform->has_nhm_msrs || no_msr) 5066 return; 5067 5068 get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr); 5069 fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", 5070 base_cpu, msr, msr & 0x2 ? "EN" : "DIS"); 5071 5072 /* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */ 5073 if (platform->has_cst_prewake_bit) 5074 fprintf(outf, "C-state Pre-wake: %sabled\n", msr & 0x40000000 ? "DIS" : "EN"); 5075 5076 return; 5077 } 5078 5079 static void dump_turbo_ratio_limit2(void) 5080 { 5081 unsigned long long msr; 5082 unsigned int ratio; 5083 5084 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr); 5085 5086 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr); 5087 5088 ratio = (msr >> 8) & 0xFF; 5089 if (ratio) 5090 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n", ratio, bclk, ratio * bclk); 5091 5092 ratio = (msr >> 0) & 0xFF; 5093 if (ratio) 5094 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n", ratio, bclk, ratio * bclk); 5095 return; 5096 } 5097 5098 static void dump_turbo_ratio_limit1(void) 5099 { 5100 unsigned long long msr; 5101 unsigned int ratio; 5102 5103 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr); 5104 5105 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr); 5106 5107 ratio = (msr >> 56) & 0xFF; 5108 if (ratio) 5109 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n", ratio, bclk, ratio * bclk); 5110 5111 ratio = (msr >> 48) & 0xFF; 5112 if (ratio) 5113 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n", ratio, bclk, ratio * bclk); 5114 5115 ratio = (msr >> 40) & 0xFF; 5116 if (ratio) 5117 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n", ratio, bclk, ratio * bclk); 5118 5119 ratio = (msr >> 32) & 0xFF; 5120 if (ratio) 5121 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n", ratio, bclk, ratio * bclk); 5122 5123 ratio = (msr >> 24) & 0xFF; 5124 if (ratio) 5125 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n", ratio, bclk, ratio * bclk); 5126 5127 ratio = (msr >> 16) & 0xFF; 5128 if (ratio) 5129 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n", ratio, bclk, ratio * bclk); 5130 5131 ratio = (msr >> 8) & 0xFF; 5132 if (ratio) 5133 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n", ratio, bclk, ratio * bclk); 5134 5135 ratio = (msr >> 0) & 0xFF; 5136 if (ratio) 5137 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n", ratio, bclk, ratio * bclk); 5138 return; 5139 } 5140 5141 static void dump_turbo_ratio_limits(int trl_msr_offset) 5142 { 5143 unsigned long long msr, core_counts; 5144 int shift; 5145 5146 get_msr(base_cpu, trl_msr_offset, &msr); 5147 fprintf(outf, "cpu%d: MSR_%sTURBO_RATIO_LIMIT: 0x%08llx\n", 5148 base_cpu, trl_msr_offset == MSR_SECONDARY_TURBO_RATIO_LIMIT ? "SECONDARY_" : "", msr); 5149 5150 if (platform->trl_msrs & TRL_CORECOUNT) { 5151 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts); 5152 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts); 5153 } else { 5154 core_counts = 0x0807060504030201; 5155 } 5156 5157 for (shift = 56; shift >= 0; shift -= 8) { 5158 unsigned int ratio, group_size; 5159 5160 ratio = (msr >> shift) & 0xFF; 5161 group_size = (core_counts >> shift) & 0xFF; 5162 if (ratio) 5163 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n", 5164 ratio, bclk, ratio * bclk, group_size); 5165 } 5166 5167 return; 5168 } 5169 5170 static void dump_atom_turbo_ratio_limits(void) 5171 { 5172 unsigned long long msr; 5173 unsigned int ratio; 5174 5175 get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr); 5176 fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF); 5177 5178 ratio = (msr >> 0) & 0x3F; 5179 if (ratio) 5180 fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n", ratio, bclk, ratio * bclk); 5181 5182 ratio = (msr >> 8) & 0x3F; 5183 if (ratio) 5184 fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n", ratio, bclk, ratio * bclk); 5185 5186 ratio = (msr >> 16) & 0x3F; 5187 if (ratio) 5188 fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk); 5189 5190 get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr); 5191 fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF); 5192 5193 ratio = (msr >> 24) & 0x3F; 5194 if (ratio) 5195 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n", ratio, bclk, ratio * bclk); 5196 5197 ratio = (msr >> 16) & 0x3F; 5198 if (ratio) 5199 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n", ratio, bclk, ratio * bclk); 5200 5201 ratio = (msr >> 8) & 0x3F; 5202 if (ratio) 5203 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n", ratio, bclk, ratio * bclk); 5204 5205 ratio = (msr >> 0) & 0x3F; 5206 if (ratio) 5207 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n", ratio, bclk, ratio * bclk); 5208 } 5209 5210 static void dump_knl_turbo_ratio_limits(void) 5211 { 5212 const unsigned int buckets_no = 7; 5213 5214 unsigned long long msr; 5215 int delta_cores, delta_ratio; 5216 int i, b_nr; 5217 unsigned int cores[buckets_no]; 5218 unsigned int ratio[buckets_no]; 5219 5220 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr); 5221 5222 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr); 5223 5224 /* 5225 * Turbo encoding in KNL is as follows: 5226 * [0] -- Reserved 5227 * [7:1] -- Base value of number of active cores of bucket 1. 5228 * [15:8] -- Base value of freq ratio of bucket 1. 5229 * [20:16] -- +ve delta of number of active cores of bucket 2. 5230 * i.e. active cores of bucket 2 = 5231 * active cores of bucket 1 + delta 5232 * [23:21] -- Negative delta of freq ratio of bucket 2. 5233 * i.e. freq ratio of bucket 2 = 5234 * freq ratio of bucket 1 - delta 5235 * [28:24]-- +ve delta of number of active cores of bucket 3. 5236 * [31:29]-- -ve delta of freq ratio of bucket 3. 5237 * [36:32]-- +ve delta of number of active cores of bucket 4. 5238 * [39:37]-- -ve delta of freq ratio of bucket 4. 5239 * [44:40]-- +ve delta of number of active cores of bucket 5. 5240 * [47:45]-- -ve delta of freq ratio of bucket 5. 5241 * [52:48]-- +ve delta of number of active cores of bucket 6. 5242 * [55:53]-- -ve delta of freq ratio of bucket 6. 5243 * [60:56]-- +ve delta of number of active cores of bucket 7. 5244 * [63:61]-- -ve delta of freq ratio of bucket 7. 5245 */ 5246 5247 b_nr = 0; 5248 cores[b_nr] = (msr & 0xFF) >> 1; 5249 ratio[b_nr] = (msr >> 8) & 0xFF; 5250 5251 for (i = 16; i < 64; i += 8) { 5252 delta_cores = (msr >> i) & 0x1F; 5253 delta_ratio = (msr >> (i + 5)) & 0x7; 5254 5255 cores[b_nr + 1] = cores[b_nr] + delta_cores; 5256 ratio[b_nr + 1] = ratio[b_nr] - delta_ratio; 5257 b_nr++; 5258 } 5259 5260 for (i = buckets_no - 1; i >= 0; i--) 5261 if (i > 0 ? ratio[i] != ratio[i - 1] : 1) 5262 fprintf(outf, 5263 "%d * %.1f = %.1f MHz max turbo %d active cores\n", 5264 ratio[i], bclk, ratio[i] * bclk, cores[i]); 5265 } 5266 5267 static void dump_cst_cfg(void) 5268 { 5269 unsigned long long msr; 5270 5271 if (!platform->has_nhm_msrs || no_msr) 5272 return; 5273 5274 get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr); 5275 5276 fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr); 5277 5278 fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)", 5279 (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", 5280 (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", 5281 (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", 5282 (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", 5283 (msr & (1 << 15)) ? "" : "UN", (unsigned int)msr & 0xF, pkg_cstate_limit_strings[pkg_cstate_limit]); 5284 5285 #define AUTOMATIC_CSTATE_CONVERSION (1UL << 16) 5286 if (platform->has_cst_auto_convension) { 5287 fprintf(outf, ", automatic c-state conversion=%s", (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off"); 5288 } 5289 5290 fprintf(outf, ")\n"); 5291 5292 return; 5293 } 5294 5295 static void dump_config_tdp(void) 5296 { 5297 unsigned long long msr; 5298 5299 get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr); 5300 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr); 5301 fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF); 5302 5303 get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr); 5304 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr); 5305 if (msr) { 5306 fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF); 5307 fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF); 5308 fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF); 5309 fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF); 5310 } 5311 fprintf(outf, ")\n"); 5312 5313 get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr); 5314 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr); 5315 if (msr) { 5316 fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF); 5317 fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF); 5318 fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF); 5319 fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF); 5320 } 5321 fprintf(outf, ")\n"); 5322 5323 get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr); 5324 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr); 5325 if ((msr) & 0x3) 5326 fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3); 5327 fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1); 5328 fprintf(outf, ")\n"); 5329 5330 get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr); 5331 fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr); 5332 fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF); 5333 fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1); 5334 fprintf(outf, ")\n"); 5335 } 5336 5337 unsigned int irtl_time_units[] = { 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 }; 5338 5339 void print_irtl(void) 5340 { 5341 unsigned long long msr; 5342 5343 if (!platform->has_irtl_msrs || no_msr) 5344 return; 5345 5346 if (platform->supported_cstates & PC3) { 5347 get_msr(base_cpu, MSR_PKGC3_IRTL, &msr); 5348 fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr); 5349 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 5350 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 5351 } 5352 5353 if (platform->supported_cstates & PC6) { 5354 get_msr(base_cpu, MSR_PKGC6_IRTL, &msr); 5355 fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr); 5356 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 5357 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 5358 } 5359 5360 if (platform->supported_cstates & PC7) { 5361 get_msr(base_cpu, MSR_PKGC7_IRTL, &msr); 5362 fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr); 5363 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 5364 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 5365 } 5366 5367 if (platform->supported_cstates & PC8) { 5368 get_msr(base_cpu, MSR_PKGC8_IRTL, &msr); 5369 fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr); 5370 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 5371 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 5372 } 5373 5374 if (platform->supported_cstates & PC9) { 5375 get_msr(base_cpu, MSR_PKGC9_IRTL, &msr); 5376 fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr); 5377 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 5378 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 5379 } 5380 5381 if (platform->supported_cstates & PC10) { 5382 get_msr(base_cpu, MSR_PKGC10_IRTL, &msr); 5383 fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr); 5384 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 5385 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 5386 } 5387 } 5388 5389 void free_fd_percpu(void) 5390 { 5391 int i; 5392 5393 if (!fd_percpu) 5394 return; 5395 5396 for (i = 0; i < topo.max_cpu_num + 1; ++i) { 5397 if (fd_percpu[i] != 0) 5398 close(fd_percpu[i]); 5399 } 5400 5401 free(fd_percpu); 5402 fd_percpu = NULL; 5403 } 5404 5405 void free_fd_instr_count_percpu(void) 5406 { 5407 if (!fd_instr_count_percpu) 5408 return; 5409 5410 for (int i = 0; i < topo.max_cpu_num + 1; ++i) { 5411 if (fd_instr_count_percpu[i] != 0) 5412 close(fd_instr_count_percpu[i]); 5413 } 5414 5415 free(fd_instr_count_percpu); 5416 fd_instr_count_percpu = NULL; 5417 } 5418 5419 void free_fd_cstate(void) 5420 { 5421 if (!ccstate_counter_info) 5422 return; 5423 5424 const int counter_info_num = ccstate_counter_info_size; 5425 5426 for (int counter_id = 0; counter_id < counter_info_num; ++counter_id) { 5427 if (ccstate_counter_info[counter_id].fd_perf_core != -1) 5428 close(ccstate_counter_info[counter_id].fd_perf_core); 5429 5430 if (ccstate_counter_info[counter_id].fd_perf_pkg != -1) 5431 close(ccstate_counter_info[counter_id].fd_perf_pkg); 5432 } 5433 5434 free(ccstate_counter_info); 5435 ccstate_counter_info = NULL; 5436 ccstate_counter_info_size = 0; 5437 } 5438 5439 void free_fd_msr(void) 5440 { 5441 if (!msr_counter_info) 5442 return; 5443 5444 for (int cpu = 0; cpu < topo.max_cpu_num; ++cpu) { 5445 if (msr_counter_info[cpu].fd_perf != -1) 5446 close(msr_counter_info[cpu].fd_perf); 5447 } 5448 5449 free(msr_counter_info); 5450 msr_counter_info = NULL; 5451 msr_counter_info_size = 0; 5452 } 5453 5454 void free_fd_rapl_percpu(void) 5455 { 5456 if (!rapl_counter_info_perdomain) 5457 return; 5458 5459 const int num_domains = rapl_counter_info_perdomain_size; 5460 5461 for (int domain_id = 0; domain_id < num_domains; ++domain_id) { 5462 if (rapl_counter_info_perdomain[domain_id].fd_perf != -1) 5463 close(rapl_counter_info_perdomain[domain_id].fd_perf); 5464 } 5465 5466 free(rapl_counter_info_perdomain); 5467 rapl_counter_info_perdomain = NULL; 5468 rapl_counter_info_perdomain_size = 0; 5469 } 5470 5471 void free_fd_added_perf_counters_(struct perf_counter_info *pp) 5472 { 5473 if (!pp) 5474 return; 5475 5476 if (!pp->fd_perf_per_domain) 5477 return; 5478 5479 while (pp) { 5480 for (size_t domain = 0; domain < pp->num_domains; ++domain) { 5481 if (pp->fd_perf_per_domain[domain] != -1) { 5482 close(pp->fd_perf_per_domain[domain]); 5483 pp->fd_perf_per_domain[domain] = -1; 5484 } 5485 } 5486 5487 free(pp->fd_perf_per_domain); 5488 pp->fd_perf_per_domain = NULL; 5489 5490 pp = pp->next; 5491 } 5492 } 5493 5494 void free_fd_added_perf_counters(void) 5495 { 5496 free_fd_added_perf_counters_(sys.perf_tp); 5497 free_fd_added_perf_counters_(sys.perf_cp); 5498 free_fd_added_perf_counters_(sys.perf_pp); 5499 } 5500 5501 void free_all_buffers(void) 5502 { 5503 int i; 5504 5505 CPU_FREE(cpu_present_set); 5506 cpu_present_set = NULL; 5507 cpu_present_setsize = 0; 5508 5509 CPU_FREE(cpu_effective_set); 5510 cpu_effective_set = NULL; 5511 cpu_effective_setsize = 0; 5512 5513 CPU_FREE(cpu_allowed_set); 5514 cpu_allowed_set = NULL; 5515 cpu_allowed_setsize = 0; 5516 5517 CPU_FREE(cpu_affinity_set); 5518 cpu_affinity_set = NULL; 5519 cpu_affinity_setsize = 0; 5520 5521 free(thread_even); 5522 free(core_even); 5523 free(package_even); 5524 5525 thread_even = NULL; 5526 core_even = NULL; 5527 package_even = NULL; 5528 5529 free(thread_odd); 5530 free(core_odd); 5531 free(package_odd); 5532 5533 thread_odd = NULL; 5534 core_odd = NULL; 5535 package_odd = NULL; 5536 5537 free(output_buffer); 5538 output_buffer = NULL; 5539 outp = NULL; 5540 5541 free_fd_percpu(); 5542 free_fd_instr_count_percpu(); 5543 free_fd_msr(); 5544 free_fd_rapl_percpu(); 5545 free_fd_cstate(); 5546 free_fd_added_perf_counters(); 5547 5548 free(irq_column_2_cpu); 5549 free(irqs_per_cpu); 5550 free(nmi_per_cpu); 5551 5552 for (i = 0; i <= topo.max_cpu_num; ++i) { 5553 if (cpus[i].put_ids) 5554 CPU_FREE(cpus[i].put_ids); 5555 } 5556 free(cpus); 5557 } 5558 5559 /* 5560 * Parse a file containing a single int. 5561 * Return 0 if file can not be opened 5562 * Exit if file can be opened, but can not be parsed 5563 */ 5564 int parse_int_file(const char *fmt, ...) 5565 { 5566 va_list args; 5567 char path[PATH_MAX]; 5568 FILE *filep; 5569 int value; 5570 5571 va_start(args, fmt); 5572 vsnprintf(path, sizeof(path), fmt, args); 5573 va_end(args); 5574 filep = fopen(path, "r"); 5575 if (!filep) 5576 return 0; 5577 if (fscanf(filep, "%d", &value) != 1) 5578 err(1, "%s: failed to parse number from file", path); 5579 fclose(filep); 5580 return value; 5581 } 5582 5583 /* 5584 * cpu_is_first_core_in_package(cpu) 5585 * return 1 if given CPU is 1st core in package 5586 */ 5587 int cpu_is_first_core_in_package(int cpu) 5588 { 5589 return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu); 5590 } 5591 5592 int get_physical_package_id(int cpu) 5593 { 5594 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu); 5595 } 5596 5597 int get_die_id(int cpu) 5598 { 5599 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/die_id", cpu); 5600 } 5601 5602 int get_core_id(int cpu) 5603 { 5604 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu); 5605 } 5606 5607 void set_node_data(void) 5608 { 5609 int pkg, node, lnode, cpu, cpux; 5610 int cpu_count; 5611 5612 /* initialize logical_node_id */ 5613 for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) 5614 cpus[cpu].logical_node_id = -1; 5615 5616 cpu_count = 0; 5617 for (pkg = 0; pkg < topo.num_packages; pkg++) { 5618 lnode = 0; 5619 for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) { 5620 if (cpus[cpu].physical_package_id != pkg) 5621 continue; 5622 /* find a cpu with an unset logical_node_id */ 5623 if (cpus[cpu].logical_node_id != -1) 5624 continue; 5625 cpus[cpu].logical_node_id = lnode; 5626 node = cpus[cpu].physical_node_id; 5627 cpu_count++; 5628 /* 5629 * find all matching cpus on this pkg and set 5630 * the logical_node_id 5631 */ 5632 for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) { 5633 if ((cpus[cpux].physical_package_id == pkg) && (cpus[cpux].physical_node_id == node)) { 5634 cpus[cpux].logical_node_id = lnode; 5635 cpu_count++; 5636 } 5637 } 5638 lnode++; 5639 if (lnode > topo.nodes_per_pkg) 5640 topo.nodes_per_pkg = lnode; 5641 } 5642 if (cpu_count >= topo.max_cpu_num) 5643 break; 5644 } 5645 } 5646 5647 int get_physical_node_id(struct cpu_topology *thiscpu) 5648 { 5649 char path[80]; 5650 FILE *filep; 5651 int i; 5652 int cpu = thiscpu->logical_cpu_id; 5653 5654 for (i = 0; i <= topo.max_cpu_num; i++) { 5655 sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist", cpu, i); 5656 filep = fopen(path, "r"); 5657 if (!filep) 5658 continue; 5659 fclose(filep); 5660 return i; 5661 } 5662 return -1; 5663 } 5664 5665 static int parse_cpu_str(char *cpu_str, cpu_set_t *cpu_set, int cpu_set_size) 5666 { 5667 unsigned int start, end; 5668 char *next = cpu_str; 5669 5670 while (next && *next) { 5671 5672 if (*next == '-') /* no negative cpu numbers */ 5673 return 1; 5674 5675 if (*next == '\0' || *next == '\n') 5676 break; 5677 5678 start = strtoul(next, &next, 10); 5679 5680 if (start >= CPU_SUBSET_MAXCPUS) 5681 return 1; 5682 CPU_SET_S(start, cpu_set_size, cpu_set); 5683 5684 if (*next == '\0' || *next == '\n') 5685 break; 5686 5687 if (*next == ',') { 5688 next += 1; 5689 continue; 5690 } 5691 5692 if (*next == '-') { 5693 next += 1; /* start range */ 5694 } else if (*next == '.') { 5695 next += 1; 5696 if (*next == '.') 5697 next += 1; /* start range */ 5698 else 5699 return 1; 5700 } 5701 5702 end = strtoul(next, &next, 10); 5703 if (end <= start) 5704 return 1; 5705 5706 while (++start <= end) { 5707 if (start >= CPU_SUBSET_MAXCPUS) 5708 return 1; 5709 CPU_SET_S(start, cpu_set_size, cpu_set); 5710 } 5711 5712 if (*next == ',') 5713 next += 1; 5714 else if (*next != '\0' && *next != '\n') 5715 return 1; 5716 } 5717 5718 return 0; 5719 } 5720 5721 int get_thread_siblings(struct cpu_topology *thiscpu) 5722 { 5723 char path[80], character; 5724 FILE *filep; 5725 unsigned long map; 5726 int so, shift, sib_core; 5727 int cpu = thiscpu->logical_cpu_id; 5728 int offset = topo.max_cpu_num + 1; 5729 size_t size; 5730 int thread_id = 0; 5731 5732 thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1)); 5733 if (thiscpu->thread_id < 0) 5734 thiscpu->thread_id = thread_id++; 5735 if (!thiscpu->put_ids) 5736 return -1; 5737 5738 size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 5739 CPU_ZERO_S(size, thiscpu->put_ids); 5740 5741 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu); 5742 filep = fopen(path, "r"); 5743 5744 if (!filep) { 5745 warnx("%s: open failed", path); 5746 return -1; 5747 } 5748 do { 5749 offset -= BITMASK_SIZE; 5750 if (fscanf(filep, "%lx%c", &map, &character) != 2) 5751 err(1, "%s: failed to parse file", path); 5752 for (shift = 0; shift < BITMASK_SIZE; shift++) { 5753 if ((map >> shift) & 0x1) { 5754 so = shift + offset; 5755 sib_core = get_core_id(so); 5756 if (sib_core == thiscpu->physical_core_id) { 5757 CPU_SET_S(so, size, thiscpu->put_ids); 5758 if ((so != cpu) && (cpus[so].thread_id < 0)) 5759 cpus[so].thread_id = thread_id++; 5760 } 5761 } 5762 } 5763 } while (character == ','); 5764 fclose(filep); 5765 5766 return CPU_COUNT_S(size, thiscpu->put_ids); 5767 } 5768 5769 /* 5770 * run func(thread, core, package) in topology order 5771 * skip non-present cpus 5772 */ 5773 5774 int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *, 5775 struct pkg_data *, struct thread_data *, struct core_data *, 5776 struct pkg_data *), struct thread_data *thread_base, 5777 struct core_data *core_base, struct pkg_data *pkg_base, 5778 struct thread_data *thread_base2, struct core_data *core_base2, struct pkg_data *pkg_base2) 5779 { 5780 int retval, pkg_no, node_no, core_no, thread_no; 5781 5782 retval = 0; 5783 5784 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { 5785 for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) { 5786 for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { 5787 for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) { 5788 struct thread_data *t, *t2; 5789 struct core_data *c, *c2; 5790 struct pkg_data *p, *p2; 5791 5792 t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no); 5793 5794 if (cpu_is_not_allowed(t->cpu_id)) 5795 continue; 5796 5797 t2 = GET_THREAD(thread_base2, thread_no, core_no, node_no, pkg_no); 5798 5799 c = GET_CORE(core_base, core_no, node_no, pkg_no); 5800 c2 = GET_CORE(core_base2, core_no, node_no, pkg_no); 5801 5802 p = GET_PKG(pkg_base, pkg_no); 5803 p2 = GET_PKG(pkg_base2, pkg_no); 5804 5805 retval |= func(t, c, p, t2, c2, p2); 5806 } 5807 } 5808 } 5809 } 5810 return retval; 5811 } 5812 5813 /* 5814 * run func(cpu) on every cpu in /proc/stat 5815 * return max_cpu number 5816 */ 5817 int for_all_proc_cpus(int (func) (int)) 5818 { 5819 FILE *fp; 5820 int cpu_num; 5821 int retval; 5822 5823 fp = fopen_or_die(proc_stat, "r"); 5824 5825 retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n"); 5826 if (retval != 0) 5827 err(1, "%s: failed to parse format", proc_stat); 5828 5829 while (1) { 5830 retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num); 5831 if (retval != 1) 5832 break; 5833 5834 retval = func(cpu_num); 5835 if (retval) { 5836 fclose(fp); 5837 return (retval); 5838 } 5839 } 5840 fclose(fp); 5841 return 0; 5842 } 5843 5844 #define PATH_EFFECTIVE_CPUS "/sys/fs/cgroup/cpuset.cpus.effective" 5845 5846 static char cpu_effective_str[1024]; 5847 5848 static int update_effective_str(bool startup) 5849 { 5850 FILE *fp; 5851 char *pos; 5852 char buf[1024]; 5853 int ret; 5854 5855 if (cpu_effective_str[0] == '\0' && !startup) 5856 return 0; 5857 5858 fp = fopen(PATH_EFFECTIVE_CPUS, "r"); 5859 if (!fp) 5860 return 0; 5861 5862 pos = fgets(buf, 1024, fp); 5863 if (!pos) 5864 err(1, "%s: file read failed\n", PATH_EFFECTIVE_CPUS); 5865 5866 fclose(fp); 5867 5868 ret = strncmp(cpu_effective_str, buf, 1024); 5869 if (!ret) 5870 return 0; 5871 5872 strncpy(cpu_effective_str, buf, 1024); 5873 return 1; 5874 } 5875 5876 static void update_effective_set(bool startup) 5877 { 5878 update_effective_str(startup); 5879 5880 if (parse_cpu_str(cpu_effective_str, cpu_effective_set, cpu_effective_setsize)) 5881 err(1, "%s: cpu str malformat %s\n", PATH_EFFECTIVE_CPUS, cpu_effective_str); 5882 } 5883 5884 void linux_perf_init(void); 5885 void msr_perf_init(void); 5886 void rapl_perf_init(void); 5887 void cstate_perf_init(void); 5888 void added_perf_counters_init(void); 5889 void pmt_init(void); 5890 5891 void re_initialize(void) 5892 { 5893 free_all_buffers(); 5894 setup_all_buffers(false); 5895 linux_perf_init(); 5896 msr_perf_init(); 5897 rapl_perf_init(); 5898 cstate_perf_init(); 5899 added_perf_counters_init(); 5900 pmt_init(); 5901 fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, 5902 topo.allowed_cpus); 5903 } 5904 5905 void set_max_cpu_num(void) 5906 { 5907 FILE *filep; 5908 int base_cpu; 5909 unsigned long dummy; 5910 char pathname[64]; 5911 5912 base_cpu = sched_getcpu(); 5913 if (base_cpu < 0) 5914 err(1, "cannot find calling cpu ID"); 5915 sprintf(pathname, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", base_cpu); 5916 5917 filep = fopen_or_die(pathname, "r"); 5918 topo.max_cpu_num = 0; 5919 while (fscanf(filep, "%lx,", &dummy) == 1) 5920 topo.max_cpu_num += BITMASK_SIZE; 5921 fclose(filep); 5922 topo.max_cpu_num--; /* 0 based */ 5923 } 5924 5925 /* 5926 * count_cpus() 5927 * remember the last one seen, it will be the max 5928 */ 5929 int count_cpus(int cpu) 5930 { 5931 UNUSED(cpu); 5932 5933 topo.num_cpus++; 5934 return 0; 5935 } 5936 5937 int mark_cpu_present(int cpu) 5938 { 5939 CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set); 5940 return 0; 5941 } 5942 5943 int init_thread_id(int cpu) 5944 { 5945 cpus[cpu].thread_id = -1; 5946 return 0; 5947 } 5948 5949 int set_my_cpu_type(void) 5950 { 5951 unsigned int eax, ebx, ecx, edx; 5952 unsigned int max_level; 5953 5954 __cpuid(0, max_level, ebx, ecx, edx); 5955 5956 if (max_level < CPUID_LEAF_MODEL_ID) 5957 return 0; 5958 5959 __cpuid(CPUID_LEAF_MODEL_ID, eax, ebx, ecx, edx); 5960 5961 return (eax >> CPUID_LEAF_MODEL_ID_CORE_TYPE_SHIFT); 5962 } 5963 5964 int set_cpu_hybrid_type(int cpu) 5965 { 5966 if (cpu_migrate(cpu)) 5967 return -1; 5968 5969 int type = set_my_cpu_type(); 5970 5971 cpus[cpu].type = type; 5972 return 0; 5973 } 5974 5975 /* 5976 * snapshot_proc_interrupts() 5977 * 5978 * read and record summary of /proc/interrupts 5979 * 5980 * return 1 if config change requires a restart, else return 0 5981 */ 5982 int snapshot_proc_interrupts(void) 5983 { 5984 static FILE *fp; 5985 int column, retval; 5986 5987 if (fp == NULL) 5988 fp = fopen_or_die("/proc/interrupts", "r"); 5989 else 5990 rewind(fp); 5991 5992 /* read 1st line of /proc/interrupts to get cpu* name for each column */ 5993 for (column = 0; column < topo.num_cpus; ++column) { 5994 int cpu_number; 5995 5996 retval = fscanf(fp, " CPU%d", &cpu_number); 5997 if (retval != 1) 5998 break; 5999 6000 if (cpu_number > topo.max_cpu_num) { 6001 warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num); 6002 return 1; 6003 } 6004 6005 irq_column_2_cpu[column] = cpu_number; 6006 irqs_per_cpu[cpu_number] = 0; 6007 nmi_per_cpu[cpu_number] = 0; 6008 } 6009 6010 /* read /proc/interrupt count lines and sum up irqs per cpu */ 6011 while (1) { 6012 int column; 6013 char buf[64]; 6014 int this_row_is_nmi = 0; 6015 6016 retval = fscanf(fp, " %s:", buf); /* irq# "N:" */ 6017 if (retval != 1) 6018 break; 6019 6020 if (strncmp(buf, "NMI", strlen("NMI")) == 0) 6021 this_row_is_nmi = 1; 6022 6023 /* read the count per cpu */ 6024 for (column = 0; column < topo.num_cpus; ++column) { 6025 6026 int cpu_number, irq_count; 6027 6028 retval = fscanf(fp, " %d", &irq_count); 6029 6030 if (retval != 1) 6031 break; 6032 6033 cpu_number = irq_column_2_cpu[column]; 6034 irqs_per_cpu[cpu_number] += irq_count; 6035 if (this_row_is_nmi) 6036 nmi_per_cpu[cpu_number] += irq_count; 6037 } 6038 while (getc(fp) != '\n') ; /* flush interrupt description */ 6039 6040 } 6041 return 0; 6042 } 6043 6044 /* 6045 * snapshot_graphics() 6046 * 6047 * record snapshot of specified graphics sysfs knob 6048 * 6049 * return 1 if config change requires a restart, else return 0 6050 */ 6051 int snapshot_graphics(int idx) 6052 { 6053 int retval; 6054 6055 rewind(gfx_info[idx].fp); 6056 fflush(gfx_info[idx].fp); 6057 6058 switch (idx) { 6059 case GFX_rc6: 6060 case SAM_mc6: 6061 retval = fscanf(gfx_info[idx].fp, "%lld", &gfx_info[idx].val_ull); 6062 if (retval != 1) 6063 err(1, "rc6"); 6064 return 0; 6065 case GFX_MHz: 6066 case GFX_ACTMHz: 6067 case SAM_MHz: 6068 case SAM_ACTMHz: 6069 retval = fscanf(gfx_info[idx].fp, "%d", &gfx_info[idx].val); 6070 if (retval != 1) 6071 err(1, "MHz"); 6072 return 0; 6073 default: 6074 return -EINVAL; 6075 } 6076 } 6077 6078 /* 6079 * snapshot_cpu_lpi() 6080 * 6081 * record snapshot of 6082 * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us 6083 */ 6084 int snapshot_cpu_lpi_us(void) 6085 { 6086 FILE *fp; 6087 int retval; 6088 6089 fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r"); 6090 6091 retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us); 6092 if (retval != 1) { 6093 fprintf(stderr, "Disabling Low Power Idle CPU output\n"); 6094 BIC_NOT_PRESENT(BIC_CPU_LPI); 6095 fclose(fp); 6096 return -1; 6097 } 6098 6099 fclose(fp); 6100 6101 return 0; 6102 } 6103 6104 /* 6105 * snapshot_sys_lpi() 6106 * 6107 * record snapshot of sys_lpi_file 6108 */ 6109 int snapshot_sys_lpi_us(void) 6110 { 6111 FILE *fp; 6112 int retval; 6113 6114 fp = fopen_or_die(sys_lpi_file, "r"); 6115 6116 retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us); 6117 if (retval != 1) { 6118 fprintf(stderr, "Disabling Low Power Idle System output\n"); 6119 BIC_NOT_PRESENT(BIC_SYS_LPI); 6120 fclose(fp); 6121 return -1; 6122 } 6123 fclose(fp); 6124 6125 return 0; 6126 } 6127 6128 /* 6129 * snapshot /proc and /sys files 6130 * 6131 * return 1 if configuration restart needed, else return 0 6132 */ 6133 int snapshot_proc_sysfs_files(void) 6134 { 6135 gettimeofday(&procsysfs_tv_begin, (struct timezone *)NULL); 6136 6137 if (DO_BIC(BIC_IRQ) || DO_BIC(BIC_NMI)) 6138 if (snapshot_proc_interrupts()) 6139 return 1; 6140 6141 if (DO_BIC(BIC_GFX_rc6)) 6142 snapshot_graphics(GFX_rc6); 6143 6144 if (DO_BIC(BIC_GFXMHz)) 6145 snapshot_graphics(GFX_MHz); 6146 6147 if (DO_BIC(BIC_GFXACTMHz)) 6148 snapshot_graphics(GFX_ACTMHz); 6149 6150 if (DO_BIC(BIC_SAM_mc6)) 6151 snapshot_graphics(SAM_mc6); 6152 6153 if (DO_BIC(BIC_SAMMHz)) 6154 snapshot_graphics(SAM_MHz); 6155 6156 if (DO_BIC(BIC_SAMACTMHz)) 6157 snapshot_graphics(SAM_ACTMHz); 6158 6159 if (DO_BIC(BIC_CPU_LPI)) 6160 snapshot_cpu_lpi_us(); 6161 6162 if (DO_BIC(BIC_SYS_LPI)) 6163 snapshot_sys_lpi_us(); 6164 6165 return 0; 6166 } 6167 6168 int exit_requested; 6169 6170 static void signal_handler(int signal) 6171 { 6172 switch (signal) { 6173 case SIGINT: 6174 exit_requested = 1; 6175 if (debug) 6176 fprintf(stderr, " SIGINT\n"); 6177 break; 6178 case SIGUSR1: 6179 if (debug > 1) 6180 fprintf(stderr, "SIGUSR1\n"); 6181 break; 6182 } 6183 } 6184 6185 void setup_signal_handler(void) 6186 { 6187 struct sigaction sa; 6188 6189 memset(&sa, 0, sizeof(sa)); 6190 6191 sa.sa_handler = &signal_handler; 6192 6193 if (sigaction(SIGINT, &sa, NULL) < 0) 6194 err(1, "sigaction SIGINT"); 6195 if (sigaction(SIGUSR1, &sa, NULL) < 0) 6196 err(1, "sigaction SIGUSR1"); 6197 } 6198 6199 void do_sleep(void) 6200 { 6201 struct timeval tout; 6202 struct timespec rest; 6203 fd_set readfds; 6204 int retval; 6205 6206 FD_ZERO(&readfds); 6207 FD_SET(0, &readfds); 6208 6209 if (ignore_stdin) { 6210 nanosleep(&interval_ts, NULL); 6211 return; 6212 } 6213 6214 tout = interval_tv; 6215 retval = select(1, &readfds, NULL, NULL, &tout); 6216 6217 if (retval == 1) { 6218 switch (getc(stdin)) { 6219 case 'q': 6220 exit_requested = 1; 6221 break; 6222 case EOF: 6223 /* 6224 * 'stdin' is a pipe closed on the other end. There 6225 * won't be any further input. 6226 */ 6227 ignore_stdin = 1; 6228 /* Sleep the rest of the time */ 6229 rest.tv_sec = (tout.tv_sec + tout.tv_usec / 1000000); 6230 rest.tv_nsec = (tout.tv_usec % 1000000) * 1000; 6231 nanosleep(&rest, NULL); 6232 } 6233 } 6234 } 6235 6236 int get_msr_sum(int cpu, off_t offset, unsigned long long *msr) 6237 { 6238 int ret, idx; 6239 unsigned long long msr_cur, msr_last; 6240 6241 assert(!no_msr); 6242 6243 if (!per_cpu_msr_sum) 6244 return 1; 6245 6246 idx = offset_to_idx(offset); 6247 if (idx < 0) 6248 return idx; 6249 /* get_msr_sum() = sum + (get_msr() - last) */ 6250 ret = get_msr(cpu, offset, &msr_cur); 6251 if (ret) 6252 return ret; 6253 msr_last = per_cpu_msr_sum[cpu].entries[idx].last; 6254 DELTA_WRAP32(msr_cur, msr_last); 6255 *msr = msr_last + per_cpu_msr_sum[cpu].entries[idx].sum; 6256 6257 return 0; 6258 } 6259 6260 timer_t timerid; 6261 6262 /* Timer callback, update the sum of MSRs periodically. */ 6263 static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg_data *p) 6264 { 6265 int i, ret; 6266 int cpu = t->cpu_id; 6267 6268 UNUSED(c); 6269 UNUSED(p); 6270 6271 assert(!no_msr); 6272 6273 for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) { 6274 unsigned long long msr_cur, msr_last; 6275 off_t offset; 6276 6277 if (!idx_valid(i)) 6278 continue; 6279 offset = idx_to_offset(i); 6280 if (offset < 0) 6281 continue; 6282 ret = get_msr(cpu, offset, &msr_cur); 6283 if (ret) { 6284 fprintf(outf, "Can not update msr(0x%llx)\n", (unsigned long long)offset); 6285 continue; 6286 } 6287 6288 msr_last = per_cpu_msr_sum[cpu].entries[i].last; 6289 per_cpu_msr_sum[cpu].entries[i].last = msr_cur & 0xffffffff; 6290 6291 DELTA_WRAP32(msr_cur, msr_last); 6292 per_cpu_msr_sum[cpu].entries[i].sum += msr_last; 6293 } 6294 return 0; 6295 } 6296 6297 static void msr_record_handler(union sigval v) 6298 { 6299 UNUSED(v); 6300 6301 for_all_cpus(update_msr_sum, EVEN_COUNTERS); 6302 } 6303 6304 void msr_sum_record(void) 6305 { 6306 struct itimerspec its; 6307 struct sigevent sev; 6308 6309 per_cpu_msr_sum = calloc(topo.max_cpu_num + 1, sizeof(struct msr_sum_array)); 6310 if (!per_cpu_msr_sum) { 6311 fprintf(outf, "Can not allocate memory for long time MSR.\n"); 6312 return; 6313 } 6314 /* 6315 * Signal handler might be restricted, so use thread notifier instead. 6316 */ 6317 memset(&sev, 0, sizeof(struct sigevent)); 6318 sev.sigev_notify = SIGEV_THREAD; 6319 sev.sigev_notify_function = msr_record_handler; 6320 6321 sev.sigev_value.sival_ptr = &timerid; 6322 if (timer_create(CLOCK_REALTIME, &sev, &timerid) == -1) { 6323 fprintf(outf, "Can not create timer.\n"); 6324 goto release_msr; 6325 } 6326 6327 its.it_value.tv_sec = 0; 6328 its.it_value.tv_nsec = 1; 6329 /* 6330 * A wraparound time has been calculated early. 6331 * Some sources state that the peak power for a 6332 * microprocessor is usually 1.5 times the TDP rating, 6333 * use 2 * TDP for safety. 6334 */ 6335 its.it_interval.tv_sec = rapl_joule_counter_range / 2; 6336 its.it_interval.tv_nsec = 0; 6337 6338 if (timer_settime(timerid, 0, &its, NULL) == -1) { 6339 fprintf(outf, "Can not set timer.\n"); 6340 goto release_timer; 6341 } 6342 return; 6343 6344 release_timer: 6345 timer_delete(timerid); 6346 release_msr: 6347 free(per_cpu_msr_sum); 6348 } 6349 6350 /* 6351 * set_my_sched_priority(pri) 6352 * return previous priority on success 6353 * return value < -20 on failure 6354 */ 6355 int set_my_sched_priority(int priority) 6356 { 6357 int retval; 6358 int original_priority; 6359 6360 errno = 0; 6361 original_priority = getpriority(PRIO_PROCESS, 0); 6362 if (errno && (original_priority == -1)) 6363 return -21; 6364 6365 retval = setpriority(PRIO_PROCESS, 0, priority); 6366 if (retval) 6367 return -21; 6368 6369 errno = 0; 6370 retval = getpriority(PRIO_PROCESS, 0); 6371 if (retval != priority) 6372 return -21; 6373 6374 return original_priority; 6375 } 6376 6377 void turbostat_loop() 6378 { 6379 int retval; 6380 int restarted = 0; 6381 unsigned int done_iters = 0; 6382 6383 setup_signal_handler(); 6384 6385 /* 6386 * elevate own priority for interval mode 6387 * 6388 * ignore on error - we probably don't have permission to set it, but 6389 * it's not a big deal 6390 */ 6391 set_my_sched_priority(-20); 6392 6393 restart: 6394 restarted++; 6395 6396 snapshot_proc_sysfs_files(); 6397 retval = for_all_cpus(get_counters, EVEN_COUNTERS); 6398 first_counter_read = 0; 6399 if (retval < -1) { 6400 exit(retval); 6401 } else if (retval == -1) { 6402 if (restarted > 10) { 6403 exit(retval); 6404 } 6405 re_initialize(); 6406 goto restart; 6407 } 6408 restarted = 0; 6409 done_iters = 0; 6410 gettimeofday(&tv_even, (struct timezone *)NULL); 6411 6412 while (1) { 6413 if (for_all_proc_cpus(cpu_is_not_present)) { 6414 re_initialize(); 6415 goto restart; 6416 } 6417 if (update_effective_str(false)) { 6418 re_initialize(); 6419 goto restart; 6420 } 6421 do_sleep(); 6422 if (snapshot_proc_sysfs_files()) 6423 goto restart; 6424 retval = for_all_cpus(get_counters, ODD_COUNTERS); 6425 if (retval < -1) { 6426 exit(retval); 6427 } else if (retval == -1) { 6428 re_initialize(); 6429 goto restart; 6430 } 6431 gettimeofday(&tv_odd, (struct timezone *)NULL); 6432 timersub(&tv_odd, &tv_even, &tv_delta); 6433 if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) { 6434 re_initialize(); 6435 goto restart; 6436 } 6437 delta_platform(&platform_counters_odd, &platform_counters_even); 6438 compute_average(EVEN_COUNTERS); 6439 format_all_counters(EVEN_COUNTERS); 6440 flush_output_stdout(); 6441 if (exit_requested) 6442 break; 6443 if (num_iterations && ++done_iters >= num_iterations) 6444 break; 6445 do_sleep(); 6446 if (snapshot_proc_sysfs_files()) 6447 goto restart; 6448 retval = for_all_cpus(get_counters, EVEN_COUNTERS); 6449 if (retval < -1) { 6450 exit(retval); 6451 } else if (retval == -1) { 6452 re_initialize(); 6453 goto restart; 6454 } 6455 gettimeofday(&tv_even, (struct timezone *)NULL); 6456 timersub(&tv_even, &tv_odd, &tv_delta); 6457 if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) { 6458 re_initialize(); 6459 goto restart; 6460 } 6461 delta_platform(&platform_counters_even, &platform_counters_odd); 6462 compute_average(ODD_COUNTERS); 6463 format_all_counters(ODD_COUNTERS); 6464 flush_output_stdout(); 6465 if (exit_requested) 6466 break; 6467 if (num_iterations && ++done_iters >= num_iterations) 6468 break; 6469 } 6470 } 6471 6472 void check_dev_msr() 6473 { 6474 struct stat sb; 6475 char pathname[32]; 6476 6477 if (no_msr) 6478 return; 6479 6480 sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); 6481 if (stat(pathname, &sb)) 6482 if (system("/sbin/modprobe msr > /dev/null 2>&1")) 6483 no_msr = 1; 6484 } 6485 6486 /* 6487 * check for CAP_SYS_RAWIO 6488 * return 0 on success 6489 * return 1 on fail 6490 */ 6491 int check_for_cap_sys_rawio(void) 6492 { 6493 cap_t caps; 6494 cap_flag_value_t cap_flag_value; 6495 int ret = 0; 6496 6497 caps = cap_get_proc(); 6498 if (caps == NULL) 6499 return 1; 6500 6501 if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value)) { 6502 ret = 1; 6503 goto free_and_exit; 6504 } 6505 6506 if (cap_flag_value != CAP_SET) { 6507 ret = 1; 6508 goto free_and_exit; 6509 } 6510 6511 free_and_exit: 6512 if (cap_free(caps) == -1) 6513 err(-6, "cap_free\n"); 6514 6515 return ret; 6516 } 6517 6518 void check_msr_permission(void) 6519 { 6520 int failed = 0; 6521 char pathname[32]; 6522 6523 if (no_msr) 6524 return; 6525 6526 /* check for CAP_SYS_RAWIO */ 6527 failed += check_for_cap_sys_rawio(); 6528 6529 /* test file permissions */ 6530 sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); 6531 if (euidaccess(pathname, R_OK)) { 6532 failed++; 6533 } 6534 6535 if (failed) { 6536 warnx("Failed to access %s. Some of the counters may not be available\n" 6537 "\tRun as root to enable them or use %s to disable the access explicitly", pathname, "--no-msr"); 6538 no_msr = 1; 6539 } 6540 } 6541 6542 void probe_bclk(void) 6543 { 6544 unsigned long long msr; 6545 unsigned int base_ratio; 6546 6547 if (!platform->has_nhm_msrs || no_msr) 6548 return; 6549 6550 if (platform->bclk_freq == BCLK_100MHZ) 6551 bclk = 100.00; 6552 else if (platform->bclk_freq == BCLK_133MHZ) 6553 bclk = 133.33; 6554 else if (platform->bclk_freq == BCLK_SLV) 6555 bclk = slm_bclk(); 6556 else 6557 return; 6558 6559 get_msr(base_cpu, MSR_PLATFORM_INFO, &msr); 6560 base_ratio = (msr >> 8) & 0xFF; 6561 6562 base_hz = base_ratio * bclk * 1000000; 6563 has_base_hz = 1; 6564 6565 if (platform->enable_tsc_tweak) 6566 tsc_tweak = base_hz / tsc_hz; 6567 } 6568 6569 static void remove_underbar(char *s) 6570 { 6571 char *to = s; 6572 6573 while (*s) { 6574 if (*s != '_') 6575 *to++ = *s; 6576 s++; 6577 } 6578 6579 *to = 0; 6580 } 6581 6582 static void dump_turbo_ratio_info(void) 6583 { 6584 if (!has_turbo) 6585 return; 6586 6587 if (!platform->has_nhm_msrs || no_msr) 6588 return; 6589 6590 if (platform->trl_msrs & TRL_LIMIT2) 6591 dump_turbo_ratio_limit2(); 6592 6593 if (platform->trl_msrs & TRL_LIMIT1) 6594 dump_turbo_ratio_limit1(); 6595 6596 if (platform->trl_msrs & TRL_BASE) { 6597 dump_turbo_ratio_limits(MSR_TURBO_RATIO_LIMIT); 6598 6599 if (is_hybrid) 6600 dump_turbo_ratio_limits(MSR_SECONDARY_TURBO_RATIO_LIMIT); 6601 } 6602 6603 if (platform->trl_msrs & TRL_ATOM) 6604 dump_atom_turbo_ratio_limits(); 6605 6606 if (platform->trl_msrs & TRL_KNL) 6607 dump_knl_turbo_ratio_limits(); 6608 6609 if (platform->has_config_tdp) 6610 dump_config_tdp(); 6611 } 6612 6613 static int read_sysfs_int(char *path) 6614 { 6615 FILE *input; 6616 int retval = -1; 6617 6618 input = fopen(path, "r"); 6619 if (input == NULL) { 6620 if (debug) 6621 fprintf(outf, "NSFOD %s\n", path); 6622 return (-1); 6623 } 6624 if (fscanf(input, "%d", &retval) != 1) 6625 err(1, "%s: failed to read int from file", path); 6626 fclose(input); 6627 6628 return (retval); 6629 } 6630 6631 static void dump_sysfs_file(char *path) 6632 { 6633 FILE *input; 6634 char cpuidle_buf[64]; 6635 6636 input = fopen(path, "r"); 6637 if (input == NULL) { 6638 if (debug) 6639 fprintf(outf, "NSFOD %s\n", path); 6640 return; 6641 } 6642 if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input)) 6643 err(1, "%s: failed to read file", path); 6644 fclose(input); 6645 6646 fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf); 6647 } 6648 6649 static void probe_intel_uncore_frequency_legacy(void) 6650 { 6651 int i, j; 6652 char path[256]; 6653 6654 for (i = 0; i < topo.num_packages; ++i) { 6655 for (j = 0; j <= topo.max_die_id; ++j) { 6656 int k, l; 6657 char path_base[128]; 6658 6659 sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i, 6660 j); 6661 6662 if (access(path_base, R_OK)) 6663 continue; 6664 6665 BIC_PRESENT(BIC_UNCORE_MHZ); 6666 6667 if (quiet) 6668 return; 6669 6670 sprintf(path, "%s/min_freq_khz", path_base); 6671 k = read_sysfs_int(path); 6672 sprintf(path, "%s/max_freq_khz", path_base); 6673 l = read_sysfs_int(path); 6674 fprintf(outf, "Uncore Frequency package%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000); 6675 6676 sprintf(path, "%s/initial_min_freq_khz", path_base); 6677 k = read_sysfs_int(path); 6678 sprintf(path, "%s/initial_max_freq_khz", path_base); 6679 l = read_sysfs_int(path); 6680 fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000); 6681 6682 sprintf(path, "%s/current_freq_khz", path_base); 6683 k = read_sysfs_int(path); 6684 fprintf(outf, " %d MHz\n", k / 1000); 6685 } 6686 } 6687 } 6688 6689 static void probe_intel_uncore_frequency_cluster(void) 6690 { 6691 int i, uncore_max_id; 6692 char path[256]; 6693 char path_base[128]; 6694 6695 if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00/current_freq_khz", R_OK)) 6696 return; 6697 6698 for (uncore_max_id = 0;; ++uncore_max_id) { 6699 6700 sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", uncore_max_id); 6701 6702 /* uncore## start at 00 and skips no numbers, so stop upon first missing */ 6703 if (access(path_base, R_OK)) { 6704 uncore_max_id -= 1; 6705 break; 6706 } 6707 } 6708 for (i = uncore_max_id; i >= 0; --i) { 6709 int k, l; 6710 int package_id, domain_id, cluster_id; 6711 char name_buf[16]; 6712 6713 sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", i); 6714 6715 if (access(path_base, R_OK)) 6716 err(1, "%s: %s\n", __func__, path_base); 6717 6718 sprintf(path, "%s/package_id", path_base); 6719 package_id = read_sysfs_int(path); 6720 6721 sprintf(path, "%s/domain_id", path_base); 6722 domain_id = read_sysfs_int(path); 6723 6724 sprintf(path, "%s/fabric_cluster_id", path_base); 6725 cluster_id = read_sysfs_int(path); 6726 6727 sprintf(path, "%s/current_freq_khz", path_base); 6728 sprintf(name_buf, "UMHz%d.%d", domain_id, cluster_id); 6729 6730 /* 6731 * Once add_couter() is called, that counter is always read 6732 * and reported -- So it is effectively (enabled & present). 6733 * Only call add_counter() here if legacy BIC_UNCORE_MHZ (UncMHz) 6734 * is (enabled). Since we are in this routine, we 6735 * know we will not probe and set (present) the legacy counter. 6736 * 6737 * This allows "--show/--hide UncMHz" to be effective for 6738 * the clustered MHz counters, as a group. 6739 */ 6740 if BIC_IS_ENABLED(BIC_UNCORE_MHZ) 6741 add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id); 6742 6743 if (quiet) 6744 continue; 6745 6746 sprintf(path, "%s/min_freq_khz", path_base); 6747 k = read_sysfs_int(path); 6748 sprintf(path, "%s/max_freq_khz", path_base); 6749 l = read_sysfs_int(path); 6750 fprintf(outf, "Uncore Frequency package%d domain%d cluster%d: %d - %d MHz ", package_id, domain_id, 6751 cluster_id, k / 1000, l / 1000); 6752 6753 sprintf(path, "%s/initial_min_freq_khz", path_base); 6754 k = read_sysfs_int(path); 6755 sprintf(path, "%s/initial_max_freq_khz", path_base); 6756 l = read_sysfs_int(path); 6757 fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000); 6758 6759 sprintf(path, "%s/current_freq_khz", path_base); 6760 k = read_sysfs_int(path); 6761 fprintf(outf, " %d MHz\n", k / 1000); 6762 } 6763 } 6764 6765 static void probe_intel_uncore_frequency(void) 6766 { 6767 if (!genuine_intel) 6768 return; 6769 6770 if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00", R_OK) == 0) 6771 probe_intel_uncore_frequency_cluster(); 6772 else 6773 probe_intel_uncore_frequency_legacy(); 6774 } 6775 6776 static void set_graphics_fp(char *path, int idx) 6777 { 6778 if (!access(path, R_OK)) 6779 gfx_info[idx].fp = fopen_or_die(path, "r"); 6780 } 6781 6782 /* Enlarge this if there are /sys/class/drm/card2 ... */ 6783 #define GFX_MAX_CARDS 2 6784 6785 static void probe_graphics(void) 6786 { 6787 char path[PATH_MAX]; 6788 int i; 6789 6790 /* Xe graphics sysfs knobs */ 6791 if (!access("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", R_OK)) { 6792 FILE *fp; 6793 char buf[8]; 6794 bool gt0_is_gt; 6795 6796 fp = fopen("/sys/class/drm/card0/device/tile0/gt0/gtidle/name", "r"); 6797 if (!fp) 6798 goto next; 6799 6800 if (!fread(buf, sizeof(char), 7, fp)) { 6801 fclose(fp); 6802 goto next; 6803 } 6804 fclose(fp); 6805 6806 if (!strncmp(buf, "gt0-rc", strlen("gt0-rc"))) 6807 gt0_is_gt = true; 6808 else if (!strncmp(buf, "gt0-mc", strlen("gt0-mc"))) 6809 gt0_is_gt = false; 6810 else 6811 goto next; 6812 6813 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", gt0_is_gt ? GFX_rc6 : SAM_mc6); 6814 6815 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq", gt0_is_gt ? GFX_MHz : SAM_MHz); 6816 6817 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq", gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz); 6818 6819 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", gt0_is_gt ? SAM_mc6 : GFX_rc6); 6820 6821 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq", gt0_is_gt ? SAM_MHz : GFX_MHz); 6822 6823 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq", gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz); 6824 6825 goto end; 6826 } 6827 6828 next: 6829 /* New i915 graphics sysfs knobs */ 6830 for (i = 0; i < GFX_MAX_CARDS; i++) { 6831 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rc6_residency_ms", i); 6832 if (!access(path, R_OK)) 6833 break; 6834 } 6835 6836 if (i == GFX_MAX_CARDS) 6837 goto legacy_i915; 6838 6839 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rc6_residency_ms", i); 6840 set_graphics_fp(path, GFX_rc6); 6841 6842 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rps_cur_freq_mhz", i); 6843 set_graphics_fp(path, GFX_MHz); 6844 6845 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rps_act_freq_mhz", i); 6846 set_graphics_fp(path, GFX_ACTMHz); 6847 6848 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt1/rc6_residency_ms", i); 6849 set_graphics_fp(path, SAM_mc6); 6850 6851 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt1/rps_cur_freq_mhz", i); 6852 set_graphics_fp(path, SAM_MHz); 6853 6854 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt1/rps_act_freq_mhz", i); 6855 set_graphics_fp(path, SAM_ACTMHz); 6856 6857 goto end; 6858 6859 legacy_i915: 6860 /* Fall back to traditional i915 graphics sysfs knobs */ 6861 set_graphics_fp("/sys/class/drm/card0/power/rc6_residency_ms", GFX_rc6); 6862 6863 set_graphics_fp("/sys/class/drm/card0/gt_cur_freq_mhz", GFX_MHz); 6864 if (!gfx_info[GFX_MHz].fp) 6865 set_graphics_fp("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", GFX_MHz); 6866 6867 set_graphics_fp("/sys/class/drm/card0/gt_act_freq_mhz", GFX_ACTMHz); 6868 if (!gfx_info[GFX_ACTMHz].fp) 6869 set_graphics_fp("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", GFX_ACTMHz); 6870 6871 end: 6872 if (gfx_info[GFX_rc6].fp) 6873 BIC_PRESENT(BIC_GFX_rc6); 6874 if (gfx_info[GFX_MHz].fp) 6875 BIC_PRESENT(BIC_GFXMHz); 6876 if (gfx_info[GFX_ACTMHz].fp) 6877 BIC_PRESENT(BIC_GFXACTMHz); 6878 if (gfx_info[SAM_mc6].fp) 6879 BIC_PRESENT(BIC_SAM_mc6); 6880 if (gfx_info[SAM_MHz].fp) 6881 BIC_PRESENT(BIC_SAMMHz); 6882 if (gfx_info[SAM_ACTMHz].fp) 6883 BIC_PRESENT(BIC_SAMACTMHz); 6884 } 6885 6886 static void dump_sysfs_cstate_config(void) 6887 { 6888 char path[64]; 6889 char name_buf[16]; 6890 char desc[64]; 6891 FILE *input; 6892 int state; 6893 char *sp; 6894 6895 if (access("/sys/devices/system/cpu/cpuidle", R_OK)) { 6896 fprintf(outf, "cpuidle not loaded\n"); 6897 return; 6898 } 6899 6900 dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_driver"); 6901 dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor"); 6902 dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor_ro"); 6903 6904 for (state = 0; state < 10; ++state) { 6905 6906 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); 6907 input = fopen(path, "r"); 6908 if (input == NULL) 6909 continue; 6910 if (!fgets(name_buf, sizeof(name_buf), input)) 6911 err(1, "%s: failed to read file", path); 6912 6913 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ 6914 sp = strchr(name_buf, '-'); 6915 if (!sp) 6916 sp = strchrnul(name_buf, '\n'); 6917 *sp = '\0'; 6918 fclose(input); 6919 6920 remove_underbar(name_buf); 6921 6922 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc", base_cpu, state); 6923 input = fopen(path, "r"); 6924 if (input == NULL) 6925 continue; 6926 if (!fgets(desc, sizeof(desc), input)) 6927 err(1, "%s: failed to read file", path); 6928 6929 fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc); 6930 fclose(input); 6931 } 6932 } 6933 6934 static void dump_sysfs_pstate_config(void) 6935 { 6936 char path[64]; 6937 char driver_buf[64]; 6938 char governor_buf[64]; 6939 FILE *input; 6940 int turbo; 6941 6942 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver", base_cpu); 6943 input = fopen(path, "r"); 6944 if (input == NULL) { 6945 fprintf(outf, "NSFOD %s\n", path); 6946 return; 6947 } 6948 if (!fgets(driver_buf, sizeof(driver_buf), input)) 6949 err(1, "%s: failed to read file", path); 6950 fclose(input); 6951 6952 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", base_cpu); 6953 input = fopen(path, "r"); 6954 if (input == NULL) { 6955 fprintf(outf, "NSFOD %s\n", path); 6956 return; 6957 } 6958 if (!fgets(governor_buf, sizeof(governor_buf), input)) 6959 err(1, "%s: failed to read file", path); 6960 fclose(input); 6961 6962 fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf); 6963 fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf); 6964 6965 sprintf(path, "/sys/devices/system/cpu/cpufreq/boost"); 6966 input = fopen(path, "r"); 6967 if (input != NULL) { 6968 if (fscanf(input, "%d", &turbo) != 1) 6969 err(1, "%s: failed to parse number from file", path); 6970 fprintf(outf, "cpufreq boost: %d\n", turbo); 6971 fclose(input); 6972 } 6973 6974 sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo"); 6975 input = fopen(path, "r"); 6976 if (input != NULL) { 6977 if (fscanf(input, "%d", &turbo) != 1) 6978 err(1, "%s: failed to parse number from file", path); 6979 fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo); 6980 fclose(input); 6981 } 6982 } 6983 6984 /* 6985 * print_epb() 6986 * Decode the ENERGY_PERF_BIAS MSR 6987 */ 6988 int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) 6989 { 6990 char *epb_string; 6991 int cpu, epb; 6992 6993 UNUSED(c); 6994 UNUSED(p); 6995 6996 if (!has_epb) 6997 return 0; 6998 6999 cpu = t->cpu_id; 7000 7001 /* EPB is per-package */ 7002 if (!is_cpu_first_thread_in_package(t, c, p)) 7003 return 0; 7004 7005 if (cpu_migrate(cpu)) { 7006 fprintf(outf, "print_epb: Could not migrate to CPU %d\n", cpu); 7007 return -1; 7008 } 7009 7010 epb = get_epb(cpu); 7011 if (epb < 0) 7012 return 0; 7013 7014 switch (epb) { 7015 case ENERGY_PERF_BIAS_PERFORMANCE: 7016 epb_string = "performance"; 7017 break; 7018 case ENERGY_PERF_BIAS_NORMAL: 7019 epb_string = "balanced"; 7020 break; 7021 case ENERGY_PERF_BIAS_POWERSAVE: 7022 epb_string = "powersave"; 7023 break; 7024 default: 7025 epb_string = "custom"; 7026 break; 7027 } 7028 fprintf(outf, "cpu%d: EPB: %d (%s)\n", cpu, epb, epb_string); 7029 7030 return 0; 7031 } 7032 7033 /* 7034 * print_hwp() 7035 * Decode the MSR_HWP_CAPABILITIES 7036 */ 7037 int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) 7038 { 7039 unsigned long long msr; 7040 int cpu; 7041 7042 UNUSED(c); 7043 UNUSED(p); 7044 7045 if (no_msr) 7046 return 0; 7047 7048 if (!has_hwp) 7049 return 0; 7050 7051 cpu = t->cpu_id; 7052 7053 /* MSR_HWP_CAPABILITIES is per-package */ 7054 if (!is_cpu_first_thread_in_package(t, c, p)) 7055 return 0; 7056 7057 if (cpu_migrate(cpu)) { 7058 fprintf(outf, "print_hwp: Could not migrate to CPU %d\n", cpu); 7059 return -1; 7060 } 7061 7062 if (get_msr(cpu, MSR_PM_ENABLE, &msr)) 7063 return 0; 7064 7065 fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n", cpu, msr, (msr & (1 << 0)) ? "" : "No-"); 7066 7067 /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */ 7068 if ((msr & (1 << 0)) == 0) 7069 return 0; 7070 7071 if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr)) 7072 return 0; 7073 7074 fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx " 7075 "(high %d guar %d eff %d low %d)\n", 7076 cpu, msr, 7077 (unsigned int)HWP_HIGHEST_PERF(msr), 7078 (unsigned int)HWP_GUARANTEED_PERF(msr), 7079 (unsigned int)HWP_MOSTEFFICIENT_PERF(msr), (unsigned int)HWP_LOWEST_PERF(msr)); 7080 7081 if (get_msr(cpu, MSR_HWP_REQUEST, &msr)) 7082 return 0; 7083 7084 fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx " 7085 "(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n", 7086 cpu, msr, 7087 (unsigned int)(((msr) >> 0) & 0xff), 7088 (unsigned int)(((msr) >> 8) & 0xff), 7089 (unsigned int)(((msr) >> 16) & 0xff), 7090 (unsigned int)(((msr) >> 24) & 0xff), 7091 (unsigned int)(((msr) >> 32) & 0xff3), (unsigned int)(((msr) >> 42) & 0x1)); 7092 7093 if (has_hwp_pkg) { 7094 if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr)) 7095 return 0; 7096 7097 fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx " 7098 "(min %d max %d des %d epp 0x%x window 0x%x)\n", 7099 cpu, msr, 7100 (unsigned int)(((msr) >> 0) & 0xff), 7101 (unsigned int)(((msr) >> 8) & 0xff), 7102 (unsigned int)(((msr) >> 16) & 0xff), 7103 (unsigned int)(((msr) >> 24) & 0xff), (unsigned int)(((msr) >> 32) & 0xff3)); 7104 } 7105 if (has_hwp_notify) { 7106 if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr)) 7107 return 0; 7108 7109 fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx " 7110 "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n", 7111 cpu, msr, ((msr) & 0x1) ? "EN" : "Dis", ((msr) & 0x2) ? "EN" : "Dis"); 7112 } 7113 if (get_msr(cpu, MSR_HWP_STATUS, &msr)) 7114 return 0; 7115 7116 fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx " 7117 "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n", 7118 cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x4) ? "" : "No-"); 7119 7120 return 0; 7121 } 7122 7123 /* 7124 * print_perf_limit() 7125 */ 7126 int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p) 7127 { 7128 unsigned long long msr; 7129 int cpu; 7130 7131 UNUSED(c); 7132 UNUSED(p); 7133 7134 if (no_msr) 7135 return 0; 7136 7137 cpu = t->cpu_id; 7138 7139 /* per-package */ 7140 if (!is_cpu_first_thread_in_package(t, c, p)) 7141 return 0; 7142 7143 if (cpu_migrate(cpu)) { 7144 fprintf(outf, "print_perf_limit: Could not migrate to CPU %d\n", cpu); 7145 return -1; 7146 } 7147 7148 if (platform->plr_msrs & PLR_CORE) { 7149 get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr); 7150 fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); 7151 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)", 7152 (msr & 1 << 15) ? "bit15, " : "", 7153 (msr & 1 << 14) ? "bit14, " : "", 7154 (msr & 1 << 13) ? "Transitions, " : "", 7155 (msr & 1 << 12) ? "MultiCoreTurbo, " : "", 7156 (msr & 1 << 11) ? "PkgPwrL2, " : "", 7157 (msr & 1 << 10) ? "PkgPwrL1, " : "", 7158 (msr & 1 << 9) ? "CorePwr, " : "", 7159 (msr & 1 << 8) ? "Amps, " : "", 7160 (msr & 1 << 6) ? "VR-Therm, " : "", 7161 (msr & 1 << 5) ? "Auto-HWP, " : "", 7162 (msr & 1 << 4) ? "Graphics, " : "", 7163 (msr & 1 << 2) ? "bit2, " : "", 7164 (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 0) ? "PROCHOT, " : ""); 7165 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", 7166 (msr & 1 << 31) ? "bit31, " : "", 7167 (msr & 1 << 30) ? "bit30, " : "", 7168 (msr & 1 << 29) ? "Transitions, " : "", 7169 (msr & 1 << 28) ? "MultiCoreTurbo, " : "", 7170 (msr & 1 << 27) ? "PkgPwrL2, " : "", 7171 (msr & 1 << 26) ? "PkgPwrL1, " : "", 7172 (msr & 1 << 25) ? "CorePwr, " : "", 7173 (msr & 1 << 24) ? "Amps, " : "", 7174 (msr & 1 << 22) ? "VR-Therm, " : "", 7175 (msr & 1 << 21) ? "Auto-HWP, " : "", 7176 (msr & 1 << 20) ? "Graphics, " : "", 7177 (msr & 1 << 18) ? "bit18, " : "", 7178 (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : ""); 7179 7180 } 7181 if (platform->plr_msrs & PLR_GFX) { 7182 get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr); 7183 fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); 7184 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)", 7185 (msr & 1 << 0) ? "PROCHOT, " : "", 7186 (msr & 1 << 1) ? "ThermStatus, " : "", 7187 (msr & 1 << 4) ? "Graphics, " : "", 7188 (msr & 1 << 6) ? "VR-Therm, " : "", 7189 (msr & 1 << 8) ? "Amps, " : "", 7190 (msr & 1 << 9) ? "GFXPwr, " : "", 7191 (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); 7192 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n", 7193 (msr & 1 << 16) ? "PROCHOT, " : "", 7194 (msr & 1 << 17) ? "ThermStatus, " : "", 7195 (msr & 1 << 20) ? "Graphics, " : "", 7196 (msr & 1 << 22) ? "VR-Therm, " : "", 7197 (msr & 1 << 24) ? "Amps, " : "", 7198 (msr & 1 << 25) ? "GFXPwr, " : "", 7199 (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); 7200 } 7201 if (platform->plr_msrs & PLR_RING) { 7202 get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr); 7203 fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); 7204 fprintf(outf, " (Active: %s%s%s%s%s%s)", 7205 (msr & 1 << 0) ? "PROCHOT, " : "", 7206 (msr & 1 << 1) ? "ThermStatus, " : "", 7207 (msr & 1 << 6) ? "VR-Therm, " : "", 7208 (msr & 1 << 8) ? "Amps, " : "", 7209 (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); 7210 fprintf(outf, " (Logged: %s%s%s%s%s%s)\n", 7211 (msr & 1 << 16) ? "PROCHOT, " : "", 7212 (msr & 1 << 17) ? "ThermStatus, " : "", 7213 (msr & 1 << 22) ? "VR-Therm, " : "", 7214 (msr & 1 << 24) ? "Amps, " : "", 7215 (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); 7216 } 7217 return 0; 7218 } 7219 7220 #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ 7221 #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ 7222 7223 double get_quirk_tdp(void) 7224 { 7225 if (platform->rapl_quirk_tdp) 7226 return platform->rapl_quirk_tdp; 7227 7228 return 135.0; 7229 } 7230 7231 double get_tdp_intel(void) 7232 { 7233 unsigned long long msr; 7234 7235 if (platform->rapl_msrs & RAPL_PKG_POWER_INFO) 7236 if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr)) 7237 return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; 7238 return get_quirk_tdp(); 7239 } 7240 7241 double get_tdp_amd(void) 7242 { 7243 return get_quirk_tdp(); 7244 } 7245 7246 void rapl_probe_intel(void) 7247 { 7248 unsigned long long msr; 7249 unsigned int time_unit; 7250 double tdp; 7251 const unsigned long long bic_watt_bits = BIC_SysWatt | BIC_PkgWatt | BIC_CorWatt | BIC_RAMWatt | BIC_GFXWatt; 7252 const unsigned long long bic_joules_bits = BIC_Sys_J | BIC_Pkg_J | BIC_Cor_J | BIC_RAM_J | BIC_GFX_J; 7253 7254 if (rapl_joules) 7255 bic_enabled &= ~bic_watt_bits; 7256 else 7257 bic_enabled &= ~bic_joules_bits; 7258 7259 if (!(platform->rapl_msrs & RAPL_PKG_PERF_STATUS)) 7260 bic_enabled &= ~BIC_PKG__; 7261 if (!(platform->rapl_msrs & RAPL_DRAM_PERF_STATUS)) 7262 bic_enabled &= ~BIC_RAM__; 7263 7264 /* units on package 0, verify later other packages match */ 7265 if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr)) 7266 return; 7267 7268 rapl_power_units = 1.0 / (1 << (msr & 0xF)); 7269 if (platform->has_rapl_divisor) 7270 rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000; 7271 else 7272 rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); 7273 7274 if (platform->has_fixed_rapl_unit) 7275 rapl_dram_energy_units = (15.3 / 1000000); 7276 else 7277 rapl_dram_energy_units = rapl_energy_units; 7278 7279 if (platform->has_fixed_rapl_psys_unit) 7280 rapl_psys_energy_units = 1.0; 7281 else 7282 rapl_psys_energy_units = rapl_energy_units; 7283 7284 time_unit = msr >> 16 & 0xF; 7285 if (time_unit == 0) 7286 time_unit = 0xA; 7287 7288 rapl_time_units = 1.0 / (1 << (time_unit)); 7289 7290 tdp = get_tdp_intel(); 7291 7292 rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; 7293 if (!quiet) 7294 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); 7295 } 7296 7297 void rapl_probe_amd(void) 7298 { 7299 unsigned long long msr; 7300 double tdp; 7301 const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt; 7302 const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J; 7303 7304 if (rapl_joules) 7305 bic_enabled &= ~bic_watt_bits; 7306 else 7307 bic_enabled &= ~bic_joules_bits; 7308 7309 if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr)) 7310 return; 7311 7312 rapl_time_units = ldexp(1.0, -(msr >> 16 & 0xf)); 7313 rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f)); 7314 rapl_power_units = ldexp(1.0, -(msr & 0xf)); 7315 7316 tdp = get_tdp_amd(); 7317 7318 rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; 7319 if (!quiet) 7320 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); 7321 } 7322 7323 void print_power_limit_msr(int cpu, unsigned long long msr, char *label) 7324 { 7325 fprintf(outf, "cpu%d: %s: %sabled (%0.3f Watts, %f sec, clamp %sabled)\n", 7326 cpu, label, 7327 ((msr >> 15) & 1) ? "EN" : "DIS", 7328 ((msr >> 0) & 0x7FFF) * rapl_power_units, 7329 (1.0 + (((msr >> 22) & 0x3) / 4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units, 7330 (((msr >> 16) & 1) ? "EN" : "DIS")); 7331 7332 return; 7333 } 7334 7335 int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) 7336 { 7337 unsigned long long msr; 7338 const char *msr_name; 7339 int cpu; 7340 7341 UNUSED(c); 7342 UNUSED(p); 7343 7344 if (!platform->rapl_msrs) 7345 return 0; 7346 7347 /* RAPL counters are per package, so print only for 1st thread/package */ 7348 if (!is_cpu_first_thread_in_package(t, c, p)) 7349 return 0; 7350 7351 cpu = t->cpu_id; 7352 if (cpu_migrate(cpu)) { 7353 fprintf(outf, "print_rapl: Could not migrate to CPU %d\n", cpu); 7354 return -1; 7355 } 7356 7357 if (platform->rapl_msrs & RAPL_AMD_F17H) { 7358 msr_name = "MSR_RAPL_PWR_UNIT"; 7359 if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr)) 7360 return -1; 7361 } else { 7362 msr_name = "MSR_RAPL_POWER_UNIT"; 7363 if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr)) 7364 return -1; 7365 } 7366 7367 fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr, 7368 rapl_power_units, rapl_energy_units, rapl_time_units); 7369 7370 if (platform->rapl_msrs & RAPL_PKG_POWER_INFO) { 7371 7372 if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr)) 7373 return -5; 7374 7375 fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", 7376 cpu, msr, 7377 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, 7378 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, 7379 ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, 7380 ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); 7381 7382 } 7383 if (platform->rapl_msrs & RAPL_PKG) { 7384 7385 if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr)) 7386 return -9; 7387 7388 fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", 7389 cpu, msr, (msr >> 63) & 1 ? "" : "UN"); 7390 7391 print_power_limit_msr(cpu, msr, "PKG Limit #1"); 7392 fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%0.3f Watts, %f* sec, clamp %sabled)\n", 7393 cpu, 7394 ((msr >> 47) & 1) ? "EN" : "DIS", 7395 ((msr >> 32) & 0x7FFF) * rapl_power_units, 7396 (1.0 + (((msr >> 54) & 0x3) / 4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units, 7397 ((msr >> 48) & 1) ? "EN" : "DIS"); 7398 7399 if (get_msr(cpu, MSR_VR_CURRENT_CONFIG, &msr)) 7400 return -9; 7401 7402 fprintf(outf, "cpu%d: MSR_VR_CURRENT_CONFIG: 0x%08llx\n", cpu, msr); 7403 fprintf(outf, "cpu%d: PKG Limit #4: %f Watts (%slocked)\n", 7404 cpu, ((msr >> 0) & 0x1FFF) * rapl_power_units, (msr >> 31) & 1 ? "" : "UN"); 7405 } 7406 7407 if (platform->rapl_msrs & RAPL_DRAM_POWER_INFO) { 7408 if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) 7409 return -6; 7410 7411 fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", 7412 cpu, msr, 7413 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, 7414 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, 7415 ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, 7416 ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); 7417 } 7418 if (platform->rapl_msrs & RAPL_DRAM) { 7419 if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) 7420 return -9; 7421 fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", 7422 cpu, msr, (msr >> 31) & 1 ? "" : "UN"); 7423 7424 print_power_limit_msr(cpu, msr, "DRAM Limit"); 7425 } 7426 if (platform->rapl_msrs & RAPL_CORE_POLICY) { 7427 if (get_msr(cpu, MSR_PP0_POLICY, &msr)) 7428 return -7; 7429 7430 fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); 7431 } 7432 if (platform->rapl_msrs & RAPL_CORE_POWER_LIMIT) { 7433 if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) 7434 return -9; 7435 fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", 7436 cpu, msr, (msr >> 31) & 1 ? "" : "UN"); 7437 print_power_limit_msr(cpu, msr, "Cores Limit"); 7438 } 7439 if (platform->rapl_msrs & RAPL_GFX) { 7440 if (get_msr(cpu, MSR_PP1_POLICY, &msr)) 7441 return -8; 7442 7443 fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF); 7444 7445 if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr)) 7446 return -9; 7447 fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", 7448 cpu, msr, (msr >> 31) & 1 ? "" : "UN"); 7449 print_power_limit_msr(cpu, msr, "GFX Limit"); 7450 } 7451 return 0; 7452 } 7453 7454 /* 7455 * probe_rapl() 7456 * 7457 * sets rapl_power_units, rapl_energy_units, rapl_time_units 7458 */ 7459 void probe_rapl(void) 7460 { 7461 if (!platform->rapl_msrs || no_msr) 7462 return; 7463 7464 if (genuine_intel) 7465 rapl_probe_intel(); 7466 if (authentic_amd || hygon_genuine) 7467 rapl_probe_amd(); 7468 7469 if (quiet) 7470 return; 7471 7472 for_all_cpus(print_rapl, ODD_COUNTERS); 7473 } 7474 7475 /* 7476 * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where 7477 * the Thermal Control Circuit (TCC) activates. 7478 * This is usually equal to tjMax. 7479 * 7480 * Older processors do not have this MSR, so there we guess, 7481 * but also allow cmdline over-ride with -T. 7482 * 7483 * Several MSR temperature values are in units of degrees-C 7484 * below this value, including the Digital Thermal Sensor (DTS), 7485 * Package Thermal Management Sensor (PTM), and thermal event thresholds. 7486 */ 7487 int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p) 7488 { 7489 unsigned long long msr; 7490 unsigned int tcc_default, tcc_offset; 7491 int cpu; 7492 7493 UNUSED(c); 7494 UNUSED(p); 7495 7496 /* tj_max is used only for dts or ptm */ 7497 if (!(do_dts || do_ptm)) 7498 return 0; 7499 7500 /* this is a per-package concept */ 7501 if (!is_cpu_first_thread_in_package(t, c, p)) 7502 return 0; 7503 7504 cpu = t->cpu_id; 7505 if (cpu_migrate(cpu)) { 7506 fprintf(outf, "Could not migrate to CPU %d\n", cpu); 7507 return -1; 7508 } 7509 7510 if (tj_max_override != 0) { 7511 tj_max = tj_max_override; 7512 fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n", cpu, tj_max); 7513 return 0; 7514 } 7515 7516 /* Temperature Target MSR is Nehalem and newer only */ 7517 if (!platform->has_nhm_msrs || no_msr) 7518 goto guess; 7519 7520 if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr)) 7521 goto guess; 7522 7523 tcc_default = (msr >> 16) & 0xFF; 7524 7525 if (!quiet) { 7526 int bits = platform->tcc_offset_bits; 7527 unsigned long long enabled = 0; 7528 7529 if (bits && !get_msr(base_cpu, MSR_PLATFORM_INFO, &enabled)) 7530 enabled = (enabled >> 30) & 1; 7531 7532 if (bits && enabled) { 7533 tcc_offset = (msr >> 24) & GENMASK(bits - 1, 0); 7534 fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n", 7535 cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset); 7536 } else { 7537 fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, tcc_default); 7538 } 7539 } 7540 7541 if (!tcc_default) 7542 goto guess; 7543 7544 tj_max = tcc_default; 7545 7546 return 0; 7547 7548 guess: 7549 tj_max = TJMAX_DEFAULT; 7550 fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", cpu, tj_max); 7551 7552 return 0; 7553 } 7554 7555 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) 7556 { 7557 unsigned long long msr; 7558 unsigned int dts, dts2; 7559 int cpu; 7560 7561 UNUSED(c); 7562 UNUSED(p); 7563 7564 if (no_msr) 7565 return 0; 7566 7567 if (!(do_dts || do_ptm)) 7568 return 0; 7569 7570 cpu = t->cpu_id; 7571 7572 /* DTS is per-core, no need to print for each thread */ 7573 if (!is_cpu_first_thread_in_core(t, c, p)) 7574 return 0; 7575 7576 if (cpu_migrate(cpu)) { 7577 fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu); 7578 return -1; 7579 } 7580 7581 if (do_ptm && is_cpu_first_core_in_package(t, c, p)) { 7582 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) 7583 return 0; 7584 7585 dts = (msr >> 16) & 0x7F; 7586 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts); 7587 7588 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr)) 7589 return 0; 7590 7591 dts = (msr >> 16) & 0x7F; 7592 dts2 = (msr >> 8) & 0x7F; 7593 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", 7594 cpu, msr, tj_max - dts, tj_max - dts2); 7595 } 7596 7597 if (do_dts && debug) { 7598 unsigned int resolution; 7599 7600 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) 7601 return 0; 7602 7603 dts = (msr >> 16) & 0x7F; 7604 resolution = (msr >> 27) & 0xF; 7605 fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", 7606 cpu, msr, tj_max - dts, resolution); 7607 7608 if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr)) 7609 return 0; 7610 7611 dts = (msr >> 16) & 0x7F; 7612 dts2 = (msr >> 8) & 0x7F; 7613 fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", 7614 cpu, msr, tj_max - dts, tj_max - dts2); 7615 } 7616 7617 return 0; 7618 } 7619 7620 void probe_thermal(void) 7621 { 7622 if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK)) 7623 BIC_PRESENT(BIC_CORE_THROT_CNT); 7624 else 7625 BIC_NOT_PRESENT(BIC_CORE_THROT_CNT); 7626 7627 for_all_cpus(set_temperature_target, ODD_COUNTERS); 7628 7629 if (quiet) 7630 return; 7631 7632 for_all_cpus(print_thermal, ODD_COUNTERS); 7633 } 7634 7635 int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p) 7636 { 7637 unsigned int eax, ebx, ecx, edx; 7638 7639 UNUSED(c); 7640 UNUSED(p); 7641 7642 if (!genuine_intel) 7643 return 0; 7644 7645 if (cpu_migrate(t->cpu_id)) { 7646 fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id); 7647 return -1; 7648 } 7649 7650 if (max_level < 0x1a) 7651 return 0; 7652 7653 __cpuid(0x1a, eax, ebx, ecx, edx); 7654 eax = (eax >> 24) & 0xFF; 7655 if (eax == 0x20) 7656 t->is_atom = true; 7657 return 0; 7658 } 7659 7660 void decode_feature_control_msr(void) 7661 { 7662 unsigned long long msr; 7663 7664 if (no_msr) 7665 return; 7666 7667 if (!get_msr(base_cpu, MSR_IA32_FEAT_CTL, &msr)) 7668 fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n", 7669 base_cpu, msr, msr & FEAT_CTL_LOCKED ? "" : "UN-", msr & (1 << 18) ? "SGX" : ""); 7670 } 7671 7672 void decode_misc_enable_msr(void) 7673 { 7674 unsigned long long msr; 7675 7676 if (no_msr) 7677 return; 7678 7679 if (!genuine_intel) 7680 return; 7681 7682 if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr)) 7683 fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n", 7684 base_cpu, msr, 7685 msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-", 7686 msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-", 7687 msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-", 7688 msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "", 7689 msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : ""); 7690 } 7691 7692 void decode_misc_feature_control(void) 7693 { 7694 unsigned long long msr; 7695 7696 if (no_msr) 7697 return; 7698 7699 if (!platform->has_msr_misc_feature_control) 7700 return; 7701 7702 if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr)) 7703 fprintf(outf, 7704 "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n", 7705 base_cpu, msr, msr & (0 << 0) ? "No-" : "", msr & (1 << 0) ? "No-" : "", 7706 msr & (2 << 0) ? "No-" : "", msr & (3 << 0) ? "No-" : ""); 7707 } 7708 7709 /* 7710 * Decode MSR_MISC_PWR_MGMT 7711 * 7712 * Decode the bits according to the Nehalem documentation 7713 * bit[0] seems to continue to have same meaning going forward 7714 * bit[1] less so... 7715 */ 7716 void decode_misc_pwr_mgmt_msr(void) 7717 { 7718 unsigned long long msr; 7719 7720 if (no_msr) 7721 return; 7722 7723 if (!platform->has_msr_misc_pwr_mgmt) 7724 return; 7725 7726 if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr)) 7727 fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n", 7728 base_cpu, msr, 7729 msr & (1 << 0) ? "DIS" : "EN", msr & (1 << 1) ? "EN" : "DIS", msr & (1 << 8) ? "EN" : "DIS"); 7730 } 7731 7732 /* 7733 * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG 7734 * 7735 * This MSRs are present on Silvermont processors, 7736 * Intel Atom processor E3000 series (Baytrail), and friends. 7737 */ 7738 void decode_c6_demotion_policy_msr(void) 7739 { 7740 unsigned long long msr; 7741 7742 if (no_msr) 7743 return; 7744 7745 if (!platform->has_msr_c6_demotion_policy_config) 7746 return; 7747 7748 if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr)) 7749 fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n", 7750 base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS"); 7751 7752 if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr)) 7753 fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n", 7754 base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS"); 7755 } 7756 7757 void print_dev_latency(void) 7758 { 7759 char *path = "/dev/cpu_dma_latency"; 7760 int fd; 7761 int value; 7762 int retval; 7763 7764 fd = open(path, O_RDONLY); 7765 if (fd < 0) { 7766 if (debug) 7767 warnx("Read %s failed", path); 7768 return; 7769 } 7770 7771 retval = read(fd, (void *)&value, sizeof(int)); 7772 if (retval != sizeof(int)) { 7773 warn("read failed %s", path); 7774 close(fd); 7775 return; 7776 } 7777 fprintf(outf, "/dev/cpu_dma_latency: %d usec (%s)\n", value, value == 2000000000 ? "default" : "constrained"); 7778 7779 close(fd); 7780 } 7781 7782 static int has_instr_count_access(void) 7783 { 7784 int fd; 7785 int has_access; 7786 7787 if (no_perf) 7788 return 0; 7789 7790 fd = open_perf_counter(base_cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0); 7791 has_access = fd != -1; 7792 7793 if (fd != -1) 7794 close(fd); 7795 7796 if (!has_access) 7797 warnx("Failed to access %s. Some of the counters may not be available\n" 7798 "\tRun as root to enable them or use %s to disable the access explicitly", 7799 "instructions retired perf counter", "--no-perf"); 7800 7801 return has_access; 7802 } 7803 7804 int add_rapl_perf_counter_(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai, 7805 double *scale_, enum rapl_unit *unit_) 7806 { 7807 if (no_perf) 7808 return -1; 7809 7810 const double scale = read_perf_scale(cai->perf_subsys, cai->perf_name); 7811 7812 if (scale == 0.0) 7813 return -1; 7814 7815 const enum rapl_unit unit = read_perf_rapl_unit(cai->perf_subsys, cai->perf_name); 7816 7817 if (unit == RAPL_UNIT_INVALID) 7818 return -1; 7819 7820 const unsigned int rapl_type = read_perf_type(cai->perf_subsys); 7821 const unsigned int rapl_energy_pkg_config = read_perf_config(cai->perf_subsys, cai->perf_name); 7822 7823 const int fd_counter = 7824 open_perf_counter(cpu, rapl_type, rapl_energy_pkg_config, rci->fd_perf, PERF_FORMAT_GROUP); 7825 if (fd_counter == -1) 7826 return -1; 7827 7828 /* If it's the first counter opened, make it a group descriptor */ 7829 if (rci->fd_perf == -1) 7830 rci->fd_perf = fd_counter; 7831 7832 *scale_ = scale; 7833 *unit_ = unit; 7834 return fd_counter; 7835 } 7836 7837 int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai, 7838 double *scale, enum rapl_unit *unit) 7839 { 7840 int ret = add_rapl_perf_counter_(cpu, rci, cai, scale, unit); 7841 7842 if (debug >= 2) 7843 fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu); 7844 7845 return ret; 7846 } 7847 7848 /* 7849 * Linux-perf manages the HW instructions-retired counter 7850 * by enabling when requested, and hiding rollover 7851 */ 7852 void linux_perf_init(void) 7853 { 7854 if (access("/proc/sys/kernel/perf_event_paranoid", F_OK)) 7855 return; 7856 7857 if (BIC_IS_ENABLED(BIC_IPC) && has_aperf) { 7858 fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int)); 7859 if (fd_instr_count_percpu == NULL) 7860 err(-1, "calloc fd_instr_count_percpu"); 7861 } 7862 } 7863 7864 void rapl_perf_init(void) 7865 { 7866 const unsigned int num_domains = (platform->has_per_core_rapl ? topo.max_core_id : topo.max_package_id) + 1; 7867 bool *domain_visited = calloc(num_domains, sizeof(bool)); 7868 7869 rapl_counter_info_perdomain = calloc(num_domains, sizeof(*rapl_counter_info_perdomain)); 7870 if (rapl_counter_info_perdomain == NULL) 7871 err(-1, "calloc rapl_counter_info_percpu"); 7872 rapl_counter_info_perdomain_size = num_domains; 7873 7874 /* 7875 * Initialize rapl_counter_info_percpu 7876 */ 7877 for (unsigned int domain_id = 0; domain_id < num_domains; ++domain_id) { 7878 struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[domain_id]; 7879 7880 rci->fd_perf = -1; 7881 for (size_t i = 0; i < NUM_RAPL_COUNTERS; ++i) { 7882 rci->data[i] = 0; 7883 rci->source[i] = COUNTER_SOURCE_NONE; 7884 } 7885 } 7886 7887 /* 7888 * Open/probe the counters 7889 * If can't get it via perf, fallback to MSR 7890 */ 7891 for (size_t i = 0; i < ARRAY_SIZE(rapl_counter_arch_infos); ++i) { 7892 7893 const struct rapl_counter_arch_info *const cai = &rapl_counter_arch_infos[i]; 7894 bool has_counter = 0; 7895 double scale; 7896 enum rapl_unit unit; 7897 unsigned int next_domain; 7898 7899 memset(domain_visited, 0, num_domains * sizeof(*domain_visited)); 7900 7901 for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) { 7902 7903 if (cpu_is_not_allowed(cpu)) 7904 continue; 7905 7906 /* Skip already seen and handled RAPL domains */ 7907 next_domain = 7908 platform->has_per_core_rapl ? cpus[cpu].physical_core_id : cpus[cpu].physical_package_id; 7909 7910 assert(next_domain < num_domains); 7911 7912 if (domain_visited[next_domain]) 7913 continue; 7914 7915 domain_visited[next_domain] = 1; 7916 7917 if ((cai->flags & RAPL_COUNTER_FLAG_PLATFORM_COUNTER) && (cpu != base_cpu)) 7918 continue; 7919 7920 struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[next_domain]; 7921 7922 /* Check if the counter is enabled and accessible */ 7923 if (BIC_IS_ENABLED(cai->bic) && (platform->rapl_msrs & cai->feature_mask)) { 7924 7925 /* Use perf API for this counter */ 7926 if (!no_perf && cai->perf_name 7927 && add_rapl_perf_counter(cpu, rci, cai, &scale, &unit) != -1) { 7928 rci->source[cai->rci_index] = COUNTER_SOURCE_PERF; 7929 rci->scale[cai->rci_index] = scale * cai->compat_scale; 7930 rci->unit[cai->rci_index] = unit; 7931 rci->flags[cai->rci_index] = cai->flags; 7932 7933 /* Use MSR for this counter */ 7934 } else if (!no_msr && cai->msr && probe_rapl_msr(cpu, cai->msr, cai->rci_index) == 0) { 7935 rci->source[cai->rci_index] = COUNTER_SOURCE_MSR; 7936 rci->msr[cai->rci_index] = cai->msr; 7937 rci->msr_mask[cai->rci_index] = cai->msr_mask; 7938 rci->msr_shift[cai->rci_index] = cai->msr_shift; 7939 rci->unit[cai->rci_index] = RAPL_UNIT_JOULES; 7940 rci->scale[cai->rci_index] = *cai->platform_rapl_msr_scale * cai->compat_scale; 7941 rci->flags[cai->rci_index] = cai->flags; 7942 } 7943 } 7944 7945 if (rci->source[cai->rci_index] != COUNTER_SOURCE_NONE) 7946 has_counter = 1; 7947 } 7948 7949 /* If any CPU has access to the counter, make it present */ 7950 if (has_counter) 7951 BIC_PRESENT(cai->bic); 7952 } 7953 7954 free(domain_visited); 7955 } 7956 7957 /* Assumes msr_counter_info is populated */ 7958 static int has_amperf_access(void) 7959 { 7960 return msr_counter_arch_infos[MSR_ARCH_INFO_APERF_INDEX].present && 7961 msr_counter_arch_infos[MSR_ARCH_INFO_MPERF_INDEX].present; 7962 } 7963 7964 int *get_cstate_perf_group_fd(struct cstate_counter_info_t *cci, const char *group_name) 7965 { 7966 if (strcmp(group_name, "cstate_core") == 0) 7967 return &cci->fd_perf_core; 7968 7969 if (strcmp(group_name, "cstate_pkg") == 0) 7970 return &cci->fd_perf_pkg; 7971 7972 return NULL; 7973 } 7974 7975 int add_cstate_perf_counter_(int cpu, struct cstate_counter_info_t *cci, const struct cstate_counter_arch_info *cai) 7976 { 7977 if (no_perf) 7978 return -1; 7979 7980 int *pfd_group = get_cstate_perf_group_fd(cci, cai->perf_subsys); 7981 7982 if (pfd_group == NULL) 7983 return -1; 7984 7985 const unsigned int type = read_perf_type(cai->perf_subsys); 7986 const unsigned int config = read_perf_config(cai->perf_subsys, cai->perf_name); 7987 7988 const int fd_counter = open_perf_counter(cpu, type, config, *pfd_group, PERF_FORMAT_GROUP); 7989 7990 if (fd_counter == -1) 7991 return -1; 7992 7993 /* If it's the first counter opened, make it a group descriptor */ 7994 if (*pfd_group == -1) 7995 *pfd_group = fd_counter; 7996 7997 return fd_counter; 7998 } 7999 8000 int add_cstate_perf_counter(int cpu, struct cstate_counter_info_t *cci, const struct cstate_counter_arch_info *cai) 8001 { 8002 int ret = add_cstate_perf_counter_(cpu, cci, cai); 8003 8004 if (debug >= 2) 8005 fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu); 8006 8007 return ret; 8008 } 8009 8010 int add_msr_perf_counter_(int cpu, struct msr_counter_info_t *cci, const struct msr_counter_arch_info *cai) 8011 { 8012 if (no_perf) 8013 return -1; 8014 8015 const unsigned int type = read_perf_type(cai->perf_subsys); 8016 const unsigned int config = read_perf_config(cai->perf_subsys, cai->perf_name); 8017 8018 const int fd_counter = open_perf_counter(cpu, type, config, cci->fd_perf, PERF_FORMAT_GROUP); 8019 8020 if (fd_counter == -1) 8021 return -1; 8022 8023 /* If it's the first counter opened, make it a group descriptor */ 8024 if (cci->fd_perf == -1) 8025 cci->fd_perf = fd_counter; 8026 8027 return fd_counter; 8028 } 8029 8030 int add_msr_perf_counter(int cpu, struct msr_counter_info_t *cci, const struct msr_counter_arch_info *cai) 8031 { 8032 int ret = add_msr_perf_counter_(cpu, cci, cai); 8033 8034 if (debug) 8035 fprintf(stderr, "%s: %s/%s: %d (cpu: %d)\n", __func__, cai->perf_subsys, cai->perf_name, ret, cpu); 8036 8037 return ret; 8038 } 8039 8040 void msr_perf_init_(void) 8041 { 8042 const int mci_num = topo.max_cpu_num + 1; 8043 8044 msr_counter_info = calloc(mci_num, sizeof(*msr_counter_info)); 8045 if (!msr_counter_info) 8046 err(1, "calloc msr_counter_info"); 8047 msr_counter_info_size = mci_num; 8048 8049 for (int cpu = 0; cpu < mci_num; ++cpu) 8050 msr_counter_info[cpu].fd_perf = -1; 8051 8052 for (int cidx = 0; cidx < NUM_MSR_COUNTERS; ++cidx) { 8053 8054 struct msr_counter_arch_info *cai = &msr_counter_arch_infos[cidx]; 8055 8056 cai->present = false; 8057 8058 for (int cpu = 0; cpu < mci_num; ++cpu) { 8059 8060 struct msr_counter_info_t *const cci = &msr_counter_info[cpu]; 8061 8062 if (cpu_is_not_allowed(cpu)) 8063 continue; 8064 8065 if (cai->needed) { 8066 /* Use perf API for this counter */ 8067 if (!no_perf && cai->perf_name && add_msr_perf_counter(cpu, cci, cai) != -1) { 8068 cci->source[cai->rci_index] = COUNTER_SOURCE_PERF; 8069 cai->present = true; 8070 8071 /* User MSR for this counter */ 8072 } else if (!no_msr && cai->msr && probe_rapl_msr(cpu, cai->msr, cai->rci_index) == 0) { 8073 cci->source[cai->rci_index] = COUNTER_SOURCE_MSR; 8074 cci->msr[cai->rci_index] = cai->msr; 8075 cci->msr_mask[cai->rci_index] = cai->msr_mask; 8076 cai->present = true; 8077 } 8078 } 8079 } 8080 } 8081 } 8082 8083 /* Initialize data for reading perf counters from the MSR group. */ 8084 void msr_perf_init(void) 8085 { 8086 bool need_amperf = false, need_smi = false; 8087 const bool need_soft_c1 = (!platform->has_msr_core_c1_res) && (platform->supported_cstates & CC1); 8088 8089 need_amperf = BIC_IS_ENABLED(BIC_Avg_MHz) || BIC_IS_ENABLED(BIC_Busy) || BIC_IS_ENABLED(BIC_Bzy_MHz) 8090 || BIC_IS_ENABLED(BIC_IPC) || need_soft_c1; 8091 8092 if (BIC_IS_ENABLED(BIC_SMI)) 8093 need_smi = true; 8094 8095 /* Enable needed counters */ 8096 msr_counter_arch_infos[MSR_ARCH_INFO_APERF_INDEX].needed = need_amperf; 8097 msr_counter_arch_infos[MSR_ARCH_INFO_MPERF_INDEX].needed = need_amperf; 8098 msr_counter_arch_infos[MSR_ARCH_INFO_SMI_INDEX].needed = need_smi; 8099 8100 msr_perf_init_(); 8101 8102 const bool has_amperf = has_amperf_access(); 8103 const bool has_smi = msr_counter_arch_infos[MSR_ARCH_INFO_SMI_INDEX].present; 8104 8105 has_aperf_access = has_amperf; 8106 8107 if (has_amperf) { 8108 BIC_PRESENT(BIC_Avg_MHz); 8109 BIC_PRESENT(BIC_Busy); 8110 BIC_PRESENT(BIC_Bzy_MHz); 8111 BIC_PRESENT(BIC_SMI); 8112 } 8113 8114 if (has_smi) 8115 BIC_PRESENT(BIC_SMI); 8116 } 8117 8118 void cstate_perf_init_(bool soft_c1) 8119 { 8120 bool has_counter; 8121 bool *cores_visited = NULL, *pkg_visited = NULL; 8122 const int cores_visited_elems = topo.max_core_id + 1; 8123 const int pkg_visited_elems = topo.max_package_id + 1; 8124 const int cci_num = topo.max_cpu_num + 1; 8125 8126 ccstate_counter_info = calloc(cci_num, sizeof(*ccstate_counter_info)); 8127 if (!ccstate_counter_info) 8128 err(1, "calloc ccstate_counter_arch_info"); 8129 ccstate_counter_info_size = cci_num; 8130 8131 cores_visited = calloc(cores_visited_elems, sizeof(*cores_visited)); 8132 if (!cores_visited) 8133 err(1, "calloc cores_visited"); 8134 8135 pkg_visited = calloc(pkg_visited_elems, sizeof(*pkg_visited)); 8136 if (!pkg_visited) 8137 err(1, "calloc pkg_visited"); 8138 8139 /* Initialize cstate_counter_info_percpu */ 8140 for (int cpu = 0; cpu < cci_num; ++cpu) { 8141 ccstate_counter_info[cpu].fd_perf_core = -1; 8142 ccstate_counter_info[cpu].fd_perf_pkg = -1; 8143 } 8144 8145 for (int cidx = 0; cidx < NUM_CSTATE_COUNTERS; ++cidx) { 8146 has_counter = false; 8147 memset(cores_visited, 0, cores_visited_elems * sizeof(*cores_visited)); 8148 memset(pkg_visited, 0, pkg_visited_elems * sizeof(*pkg_visited)); 8149 8150 const struct cstate_counter_arch_info *cai = &ccstate_counter_arch_infos[cidx]; 8151 8152 for (int cpu = 0; cpu < cci_num; ++cpu) { 8153 8154 struct cstate_counter_info_t *const cci = &ccstate_counter_info[cpu]; 8155 8156 if (cpu_is_not_allowed(cpu)) 8157 continue; 8158 8159 const int core_id = cpus[cpu].physical_core_id; 8160 const int pkg_id = cpus[cpu].physical_package_id; 8161 8162 assert(core_id < cores_visited_elems); 8163 assert(pkg_id < pkg_visited_elems); 8164 8165 const bool per_thread = cai->flags & CSTATE_COUNTER_FLAG_COLLECT_PER_THREAD; 8166 const bool per_core = cai->flags & CSTATE_COUNTER_FLAG_COLLECT_PER_CORE; 8167 8168 if (!per_thread && cores_visited[core_id]) 8169 continue; 8170 8171 if (!per_core && pkg_visited[pkg_id]) 8172 continue; 8173 8174 const bool counter_needed = BIC_IS_ENABLED(cai->bic) || 8175 (soft_c1 && (cai->flags & CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY)); 8176 const bool counter_supported = (platform->supported_cstates & cai->feature_mask); 8177 8178 if (counter_needed && counter_supported) { 8179 /* Use perf API for this counter */ 8180 if (!no_perf && cai->perf_name && add_cstate_perf_counter(cpu, cci, cai) != -1) { 8181 8182 cci->source[cai->rci_index] = COUNTER_SOURCE_PERF; 8183 8184 /* User MSR for this counter */ 8185 } else if (!no_msr && cai->msr && pkg_cstate_limit >= cai->pkg_cstate_limit 8186 && probe_rapl_msr(cpu, cai->msr, cai->rci_index) == 0) { 8187 cci->source[cai->rci_index] = COUNTER_SOURCE_MSR; 8188 cci->msr[cai->rci_index] = cai->msr; 8189 } 8190 } 8191 8192 if (cci->source[cai->rci_index] != COUNTER_SOURCE_NONE) { 8193 has_counter = true; 8194 cores_visited[core_id] = true; 8195 pkg_visited[pkg_id] = true; 8196 } 8197 } 8198 8199 /* If any CPU has access to the counter, make it present */ 8200 if (has_counter) 8201 BIC_PRESENT(cai->bic); 8202 } 8203 8204 free(cores_visited); 8205 free(pkg_visited); 8206 } 8207 8208 void cstate_perf_init(void) 8209 { 8210 /* 8211 * If we don't have a C1 residency MSR, we calculate it "in software", 8212 * but we need APERF, MPERF too. 8213 */ 8214 const bool soft_c1 = !platform->has_msr_core_c1_res && has_amperf_access() 8215 && platform->supported_cstates & CC1; 8216 8217 if (soft_c1) 8218 BIC_PRESENT(BIC_CPU_c1); 8219 8220 cstate_perf_init_(soft_c1); 8221 } 8222 8223 void probe_cstates(void) 8224 { 8225 probe_cst_limit(); 8226 8227 if (platform->has_msr_module_c6_res_ms) 8228 BIC_PRESENT(BIC_Mod_c6); 8229 8230 if (platform->has_ext_cst_msrs && !no_msr) { 8231 BIC_PRESENT(BIC_Totl_c0); 8232 BIC_PRESENT(BIC_Any_c0); 8233 BIC_PRESENT(BIC_GFX_c0); 8234 BIC_PRESENT(BIC_CPUGFX); 8235 } 8236 8237 if (quiet) 8238 return; 8239 8240 dump_power_ctl(); 8241 dump_cst_cfg(); 8242 decode_c6_demotion_policy_msr(); 8243 print_dev_latency(); 8244 dump_sysfs_cstate_config(); 8245 print_irtl(); 8246 } 8247 8248 void probe_lpi(void) 8249 { 8250 if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK)) 8251 BIC_PRESENT(BIC_CPU_LPI); 8252 else 8253 BIC_NOT_PRESENT(BIC_CPU_LPI); 8254 8255 if (!access(sys_lpi_file_sysfs, R_OK)) { 8256 sys_lpi_file = sys_lpi_file_sysfs; 8257 BIC_PRESENT(BIC_SYS_LPI); 8258 } else if (!access(sys_lpi_file_debugfs, R_OK)) { 8259 sys_lpi_file = sys_lpi_file_debugfs; 8260 BIC_PRESENT(BIC_SYS_LPI); 8261 } else { 8262 sys_lpi_file_sysfs = NULL; 8263 BIC_NOT_PRESENT(BIC_SYS_LPI); 8264 } 8265 8266 } 8267 8268 void probe_pstates(void) 8269 { 8270 probe_bclk(); 8271 8272 if (quiet) 8273 return; 8274 8275 dump_platform_info(); 8276 dump_turbo_ratio_info(); 8277 dump_sysfs_pstate_config(); 8278 decode_misc_pwr_mgmt_msr(); 8279 8280 for_all_cpus(print_hwp, ODD_COUNTERS); 8281 for_all_cpus(print_epb, ODD_COUNTERS); 8282 for_all_cpus(print_perf_limit, ODD_COUNTERS); 8283 } 8284 8285 void process_cpuid() 8286 { 8287 unsigned int eax, ebx, ecx, edx; 8288 unsigned int fms, family, model, stepping, ecx_flags, edx_flags; 8289 unsigned long long ucode_patch = 0; 8290 bool ucode_patch_valid = false; 8291 8292 eax = ebx = ecx = edx = 0; 8293 8294 __cpuid(0, max_level, ebx, ecx, edx); 8295 8296 if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) 8297 genuine_intel = 1; 8298 else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) 8299 authentic_amd = 1; 8300 else if (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e) 8301 hygon_genuine = 1; 8302 8303 if (!quiet) 8304 fprintf(outf, "CPUID(0): %.4s%.4s%.4s 0x%x CPUID levels\n", 8305 (char *)&ebx, (char *)&edx, (char *)&ecx, max_level); 8306 8307 __cpuid(1, fms, ebx, ecx, edx); 8308 family = (fms >> 8) & 0xf; 8309 model = (fms >> 4) & 0xf; 8310 stepping = fms & 0xf; 8311 if (family == 0xf) 8312 family += (fms >> 20) & 0xff; 8313 if (family >= 6) 8314 model += ((fms >> 16) & 0xf) << 4; 8315 ecx_flags = ecx; 8316 edx_flags = edx; 8317 8318 if (!no_msr) { 8319 if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch)) 8320 warnx("get_msr(UCODE)"); 8321 else 8322 ucode_patch_valid = true; 8323 } 8324 8325 /* 8326 * check max extended function levels of CPUID. 8327 * This is needed to check for invariant TSC. 8328 * This check is valid for both Intel and AMD. 8329 */ 8330 ebx = ecx = edx = 0; 8331 __cpuid(0x80000000, max_extended_level, ebx, ecx, edx); 8332 8333 if (!quiet) { 8334 fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d)", 8335 family, model, stepping, family, model, stepping); 8336 if (ucode_patch_valid) 8337 fprintf(outf, " microcode 0x%x", (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF)); 8338 fputc('\n', outf); 8339 8340 fprintf(outf, "CPUID(0x80000000): max_extended_levels: 0x%x\n", max_extended_level); 8341 fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n", 8342 ecx_flags & (1 << 0) ? "SSE3" : "-", 8343 ecx_flags & (1 << 3) ? "MONITOR" : "-", 8344 ecx_flags & (1 << 6) ? "SMX" : "-", 8345 ecx_flags & (1 << 7) ? "EIST" : "-", 8346 ecx_flags & (1 << 8) ? "TM2" : "-", 8347 edx_flags & (1 << 4) ? "TSC" : "-", 8348 edx_flags & (1 << 5) ? "MSR" : "-", 8349 edx_flags & (1 << 22) ? "ACPI-TM" : "-", 8350 edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-"); 8351 } 8352 8353 probe_platform_features(family, model); 8354 8355 if (!(edx_flags & (1 << 5))) 8356 errx(1, "CPUID: no MSR"); 8357 8358 if (max_extended_level >= 0x80000007) { 8359 8360 /* 8361 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 8362 * this check is valid for both Intel and AMD 8363 */ 8364 __cpuid(0x80000007, eax, ebx, ecx, edx); 8365 has_invariant_tsc = edx & (1 << 8); 8366 } 8367 8368 /* 8369 * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0 8370 * this check is valid for both Intel and AMD 8371 */ 8372 8373 __cpuid(0x6, eax, ebx, ecx, edx); 8374 has_aperf = ecx & (1 << 0); 8375 do_dts = eax & (1 << 0); 8376 if (do_dts) 8377 BIC_PRESENT(BIC_CoreTmp); 8378 has_turbo = eax & (1 << 1); 8379 do_ptm = eax & (1 << 6); 8380 if (do_ptm) 8381 BIC_PRESENT(BIC_PkgTmp); 8382 has_hwp = eax & (1 << 7); 8383 has_hwp_notify = eax & (1 << 8); 8384 has_hwp_activity_window = eax & (1 << 9); 8385 has_hwp_epp = eax & (1 << 10); 8386 has_hwp_pkg = eax & (1 << 11); 8387 has_epb = ecx & (1 << 3); 8388 8389 if (!quiet) 8390 fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, " 8391 "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n", 8392 has_aperf ? "" : "No-", 8393 has_turbo ? "" : "No-", 8394 do_dts ? "" : "No-", 8395 do_ptm ? "" : "No-", 8396 has_hwp ? "" : "No-", 8397 has_hwp_notify ? "" : "No-", 8398 has_hwp_activity_window ? "" : "No-", 8399 has_hwp_epp ? "" : "No-", has_hwp_pkg ? "" : "No-", has_epb ? "" : "No-"); 8400 8401 if (!quiet) 8402 decode_misc_enable_msr(); 8403 8404 if (max_level >= 0x7 && !quiet) { 8405 int has_sgx; 8406 8407 ecx = 0; 8408 8409 __cpuid_count(0x7, 0, eax, ebx, ecx, edx); 8410 8411 has_sgx = ebx & (1 << 2); 8412 8413 is_hybrid = edx & (1 << 15); 8414 8415 fprintf(outf, "CPUID(7): %sSGX %sHybrid\n", has_sgx ? "" : "No-", is_hybrid ? "" : "No-"); 8416 8417 if (has_sgx) 8418 decode_feature_control_msr(); 8419 } 8420 8421 if (max_level >= 0x15) { 8422 unsigned int eax_crystal; 8423 unsigned int ebx_tsc; 8424 8425 /* 8426 * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz 8427 */ 8428 eax_crystal = ebx_tsc = crystal_hz = edx = 0; 8429 __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx); 8430 8431 if (ebx_tsc != 0) { 8432 if (!quiet && (ebx != 0)) 8433 fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", 8434 eax_crystal, ebx_tsc, crystal_hz); 8435 8436 if (crystal_hz == 0) 8437 crystal_hz = platform->crystal_freq; 8438 8439 if (crystal_hz) { 8440 tsc_hz = (unsigned long long)crystal_hz *ebx_tsc / eax_crystal; 8441 if (!quiet) 8442 fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n", 8443 tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal); 8444 } 8445 } 8446 } 8447 if (max_level >= 0x16) { 8448 unsigned int base_mhz, max_mhz, bus_mhz, edx; 8449 8450 /* 8451 * CPUID 16H Base MHz, Max MHz, Bus MHz 8452 */ 8453 base_mhz = max_mhz = bus_mhz = edx = 0; 8454 8455 __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx); 8456 8457 bclk = bus_mhz; 8458 8459 base_hz = base_mhz * 1000000; 8460 has_base_hz = 1; 8461 8462 if (platform->enable_tsc_tweak) 8463 tsc_tweak = base_hz / tsc_hz; 8464 8465 if (!quiet) 8466 fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n", 8467 base_mhz, max_mhz, bus_mhz); 8468 } 8469 8470 if (has_aperf) 8471 aperf_mperf_multiplier = platform->need_perf_multiplier ? 1024 : 1; 8472 8473 BIC_PRESENT(BIC_IRQ); 8474 BIC_PRESENT(BIC_NMI); 8475 BIC_PRESENT(BIC_TSC_MHz); 8476 } 8477 8478 static void counter_info_init(void) 8479 { 8480 for (int i = 0; i < NUM_CSTATE_COUNTERS; ++i) { 8481 struct cstate_counter_arch_info *const cai = &ccstate_counter_arch_infos[i]; 8482 8483 if (platform->has_msr_knl_core_c6_residency && cai->msr == MSR_CORE_C6_RESIDENCY) 8484 cai->msr = MSR_KNL_CORE_C6_RESIDENCY; 8485 8486 if (!platform->has_msr_core_c1_res && cai->msr == MSR_CORE_C1_RES) 8487 cai->msr = 0; 8488 8489 if (platform->has_msr_atom_pkg_c6_residency && cai->msr == MSR_PKG_C6_RESIDENCY) 8490 cai->msr = MSR_ATOM_PKG_C6_RESIDENCY; 8491 } 8492 8493 for (int i = 0; i < NUM_MSR_COUNTERS; ++i) { 8494 msr_counter_arch_infos[i].present = false; 8495 msr_counter_arch_infos[i].needed = false; 8496 } 8497 } 8498 8499 void probe_pm_features(void) 8500 { 8501 probe_pstates(); 8502 8503 probe_cstates(); 8504 8505 probe_lpi(); 8506 8507 probe_intel_uncore_frequency(); 8508 8509 probe_graphics(); 8510 8511 probe_rapl(); 8512 8513 probe_thermal(); 8514 8515 if (platform->has_nhm_msrs && !no_msr) 8516 BIC_PRESENT(BIC_SMI); 8517 8518 if (!quiet) 8519 decode_misc_feature_control(); 8520 } 8521 8522 /* 8523 * in /dev/cpu/ return success for names that are numbers 8524 * ie. filter out ".", "..", "microcode". 8525 */ 8526 int dir_filter(const struct dirent *dirp) 8527 { 8528 if (isdigit(dirp->d_name[0])) 8529 return 1; 8530 else 8531 return 0; 8532 } 8533 8534 char *possible_file = "/sys/devices/system/cpu/possible"; 8535 char possible_buf[1024]; 8536 8537 int initialize_cpu_possible_set(void) 8538 { 8539 FILE *fp; 8540 8541 fp = fopen(possible_file, "r"); 8542 if (!fp) { 8543 warn("open %s", possible_file); 8544 return -1; 8545 } 8546 if (fread(possible_buf, sizeof(char), 1024, fp) == 0) { 8547 warn("read %s", possible_file); 8548 goto err; 8549 } 8550 if (parse_cpu_str(possible_buf, cpu_possible_set, cpu_possible_setsize)) { 8551 warnx("%s: cpu str malformat %s\n", possible_file, cpu_effective_str); 8552 goto err; 8553 } 8554 return 0; 8555 8556 err: 8557 fclose(fp); 8558 return -1; 8559 } 8560 8561 void topology_probe(bool startup) 8562 { 8563 int i; 8564 int max_core_id = 0; 8565 int max_package_id = 0; 8566 int max_siblings = 0; 8567 8568 /* Initialize num_cpus, max_cpu_num */ 8569 set_max_cpu_num(); 8570 topo.num_cpus = 0; 8571 for_all_proc_cpus(count_cpus); 8572 if (!summary_only) 8573 BIC_PRESENT(BIC_CPU); 8574 8575 if (debug > 1) 8576 fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); 8577 8578 cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology)); 8579 if (cpus == NULL) 8580 err(1, "calloc cpus"); 8581 8582 /* 8583 * Allocate and initialize cpu_present_set 8584 */ 8585 cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1)); 8586 if (cpu_present_set == NULL) 8587 err(3, "CPU_ALLOC"); 8588 cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 8589 CPU_ZERO_S(cpu_present_setsize, cpu_present_set); 8590 for_all_proc_cpus(mark_cpu_present); 8591 8592 /* 8593 * Allocate and initialize cpu_possible_set 8594 */ 8595 cpu_possible_set = CPU_ALLOC((topo.max_cpu_num + 1)); 8596 if (cpu_possible_set == NULL) 8597 err(3, "CPU_ALLOC"); 8598 cpu_possible_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 8599 CPU_ZERO_S(cpu_possible_setsize, cpu_possible_set); 8600 initialize_cpu_possible_set(); 8601 8602 /* 8603 * Allocate and initialize cpu_effective_set 8604 */ 8605 cpu_effective_set = CPU_ALLOC((topo.max_cpu_num + 1)); 8606 if (cpu_effective_set == NULL) 8607 err(3, "CPU_ALLOC"); 8608 cpu_effective_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 8609 CPU_ZERO_S(cpu_effective_setsize, cpu_effective_set); 8610 update_effective_set(startup); 8611 8612 /* 8613 * Allocate and initialize cpu_allowed_set 8614 */ 8615 cpu_allowed_set = CPU_ALLOC((topo.max_cpu_num + 1)); 8616 if (cpu_allowed_set == NULL) 8617 err(3, "CPU_ALLOC"); 8618 cpu_allowed_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 8619 CPU_ZERO_S(cpu_allowed_setsize, cpu_allowed_set); 8620 8621 /* 8622 * Validate and update cpu_allowed_set. 8623 * 8624 * Make sure all cpus in cpu_subset are also in cpu_present_set during startup. 8625 * Give a warning when cpus in cpu_subset become unavailable at runtime. 8626 * Give a warning when cpus are not effective because of cgroup setting. 8627 * 8628 * cpu_allowed_set is the intersection of cpu_present_set/cpu_effective_set/cpu_subset. 8629 */ 8630 for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) { 8631 if (cpu_subset && !CPU_ISSET_S(i, cpu_subset_size, cpu_subset)) 8632 continue; 8633 8634 if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set)) { 8635 if (cpu_subset) { 8636 /* cpus in cpu_subset must be in cpu_present_set during startup */ 8637 if (startup) 8638 err(1, "cpu%d not present", i); 8639 else 8640 fprintf(stderr, "cpu%d not present\n", i); 8641 } 8642 continue; 8643 } 8644 8645 if (CPU_COUNT_S(cpu_effective_setsize, cpu_effective_set)) { 8646 if (!CPU_ISSET_S(i, cpu_effective_setsize, cpu_effective_set)) { 8647 fprintf(stderr, "cpu%d not effective\n", i); 8648 continue; 8649 } 8650 } 8651 8652 CPU_SET_S(i, cpu_allowed_setsize, cpu_allowed_set); 8653 } 8654 8655 if (!CPU_COUNT_S(cpu_allowed_setsize, cpu_allowed_set)) 8656 err(-ENODEV, "No valid cpus found"); 8657 sched_setaffinity(0, cpu_allowed_setsize, cpu_allowed_set); 8658 8659 /* 8660 * Allocate and initialize cpu_affinity_set 8661 */ 8662 cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1)); 8663 if (cpu_affinity_set == NULL) 8664 err(3, "CPU_ALLOC"); 8665 cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 8666 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); 8667 8668 for_all_proc_cpus(init_thread_id); 8669 8670 for_all_proc_cpus(set_cpu_hybrid_type); 8671 8672 /* 8673 * For online cpus 8674 * find max_core_id, max_package_id 8675 */ 8676 for (i = 0; i <= topo.max_cpu_num; ++i) { 8677 int siblings; 8678 8679 if (cpu_is_not_present(i)) { 8680 if (debug > 1) 8681 fprintf(outf, "cpu%d NOT PRESENT\n", i); 8682 continue; 8683 } 8684 8685 cpus[i].logical_cpu_id = i; 8686 8687 /* get package information */ 8688 cpus[i].physical_package_id = get_physical_package_id(i); 8689 if (cpus[i].physical_package_id > max_package_id) 8690 max_package_id = cpus[i].physical_package_id; 8691 8692 /* get die information */ 8693 cpus[i].die_id = get_die_id(i); 8694 if (cpus[i].die_id > topo.max_die_id) 8695 topo.max_die_id = cpus[i].die_id; 8696 8697 /* get numa node information */ 8698 cpus[i].physical_node_id = get_physical_node_id(&cpus[i]); 8699 if (cpus[i].physical_node_id > topo.max_node_num) 8700 topo.max_node_num = cpus[i].physical_node_id; 8701 8702 /* get core information */ 8703 cpus[i].physical_core_id = get_core_id(i); 8704 if (cpus[i].physical_core_id > max_core_id) 8705 max_core_id = cpus[i].physical_core_id; 8706 8707 /* get thread information */ 8708 siblings = get_thread_siblings(&cpus[i]); 8709 if (siblings > max_siblings) 8710 max_siblings = siblings; 8711 if (cpus[i].thread_id == 0) 8712 topo.num_cores++; 8713 } 8714 topo.max_core_id = max_core_id; 8715 topo.max_package_id = max_package_id; 8716 8717 topo.cores_per_node = max_core_id + 1; 8718 if (debug > 1) 8719 fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", max_core_id, topo.cores_per_node); 8720 if (!summary_only) 8721 BIC_PRESENT(BIC_Core); 8722 8723 topo.num_die = topo.max_die_id + 1; 8724 if (debug > 1) 8725 fprintf(outf, "max_die_id %d, sizing for %d die\n", topo.max_die_id, topo.num_die); 8726 if (!summary_only && topo.num_die > 1) 8727 BIC_PRESENT(BIC_Die); 8728 8729 topo.num_packages = max_package_id + 1; 8730 if (debug > 1) 8731 fprintf(outf, "max_package_id %d, sizing for %d packages\n", max_package_id, topo.num_packages); 8732 if (!summary_only && topo.num_packages > 1) 8733 BIC_PRESENT(BIC_Package); 8734 8735 set_node_data(); 8736 if (debug > 1) 8737 fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg); 8738 if (!summary_only && topo.nodes_per_pkg > 1) 8739 BIC_PRESENT(BIC_Node); 8740 8741 topo.threads_per_core = max_siblings; 8742 if (debug > 1) 8743 fprintf(outf, "max_siblings %d\n", max_siblings); 8744 8745 if (debug < 1) 8746 return; 8747 8748 for (i = 0; i <= topo.max_cpu_num; ++i) { 8749 if (cpu_is_not_present(i)) 8750 continue; 8751 fprintf(outf, 8752 "cpu %d pkg %d die %d node %d lnode %d core %d thread %d\n", 8753 i, cpus[i].physical_package_id, cpus[i].die_id, 8754 cpus[i].physical_node_id, cpus[i].logical_node_id, cpus[i].physical_core_id, cpus[i].thread_id); 8755 } 8756 8757 } 8758 8759 void allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p) 8760 { 8761 int i; 8762 int num_cores = topo.cores_per_node * topo.nodes_per_pkg * topo.num_packages; 8763 int num_threads = topo.threads_per_core * num_cores; 8764 8765 *t = calloc(num_threads, sizeof(struct thread_data)); 8766 if (*t == NULL) 8767 goto error; 8768 8769 for (i = 0; i < num_threads; i++) 8770 (*t)[i].cpu_id = -1; 8771 8772 *c = calloc(num_cores, sizeof(struct core_data)); 8773 if (*c == NULL) 8774 goto error; 8775 8776 for (i = 0; i < num_cores; i++) { 8777 (*c)[i].core_id = -1; 8778 (*c)[i].base_cpu = -1; 8779 } 8780 8781 *p = calloc(topo.num_packages, sizeof(struct pkg_data)); 8782 if (*p == NULL) 8783 goto error; 8784 8785 for (i = 0; i < topo.num_packages; i++) { 8786 (*p)[i].package_id = i; 8787 (*p)[i].base_cpu = -1; 8788 } 8789 8790 return; 8791 error: 8792 err(1, "calloc counters"); 8793 } 8794 8795 /* 8796 * init_counter() 8797 * 8798 * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE 8799 */ 8800 void init_counter(struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base, int cpu_id) 8801 { 8802 int pkg_id = cpus[cpu_id].physical_package_id; 8803 int node_id = cpus[cpu_id].logical_node_id; 8804 int core_id = cpus[cpu_id].physical_core_id; 8805 int thread_id = cpus[cpu_id].thread_id; 8806 struct thread_data *t; 8807 struct core_data *c; 8808 struct pkg_data *p; 8809 8810 /* Workaround for systems where physical_node_id==-1 8811 * and logical_node_id==(-1 - topo.num_cpus) 8812 */ 8813 if (node_id < 0) 8814 node_id = 0; 8815 8816 t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id); 8817 c = GET_CORE(core_base, core_id, node_id, pkg_id); 8818 p = GET_PKG(pkg_base, pkg_id); 8819 8820 t->cpu_id = cpu_id; 8821 if (!cpu_is_not_allowed(cpu_id)) { 8822 if (c->base_cpu < 0) 8823 c->base_cpu = t->cpu_id; 8824 if (p->base_cpu < 0) 8825 p->base_cpu = t->cpu_id; 8826 } 8827 8828 c->core_id = core_id; 8829 p->package_id = pkg_id; 8830 } 8831 8832 int initialize_counters(int cpu_id) 8833 { 8834 init_counter(EVEN_COUNTERS, cpu_id); 8835 init_counter(ODD_COUNTERS, cpu_id); 8836 return 0; 8837 } 8838 8839 void allocate_output_buffer() 8840 { 8841 output_buffer = calloc(1, (1 + topo.num_cpus) * 2048); 8842 outp = output_buffer; 8843 if (outp == NULL) 8844 err(-1, "calloc output buffer"); 8845 } 8846 8847 void allocate_fd_percpu(void) 8848 { 8849 fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int)); 8850 if (fd_percpu == NULL) 8851 err(-1, "calloc fd_percpu"); 8852 } 8853 8854 void allocate_irq_buffers(void) 8855 { 8856 irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int)); 8857 if (irq_column_2_cpu == NULL) 8858 err(-1, "calloc %d", topo.num_cpus); 8859 8860 irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int)); 8861 if (irqs_per_cpu == NULL) 8862 err(-1, "calloc %d IRQ", topo.max_cpu_num + 1); 8863 8864 nmi_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int)); 8865 if (nmi_per_cpu == NULL) 8866 err(-1, "calloc %d NMI", topo.max_cpu_num + 1); 8867 } 8868 8869 int update_topo(struct thread_data *t, struct core_data *c, struct pkg_data *p) 8870 { 8871 topo.allowed_cpus++; 8872 if ((int)t->cpu_id == c->base_cpu) 8873 topo.allowed_cores++; 8874 if ((int)t->cpu_id == p->base_cpu) 8875 topo.allowed_packages++; 8876 8877 return 0; 8878 } 8879 8880 void topology_update(void) 8881 { 8882 topo.allowed_cpus = 0; 8883 topo.allowed_cores = 0; 8884 topo.allowed_packages = 0; 8885 for_all_cpus(update_topo, ODD_COUNTERS); 8886 } 8887 8888 void setup_all_buffers(bool startup) 8889 { 8890 topology_probe(startup); 8891 allocate_irq_buffers(); 8892 allocate_fd_percpu(); 8893 allocate_counters(&thread_even, &core_even, &package_even); 8894 allocate_counters(&thread_odd, &core_odd, &package_odd); 8895 allocate_output_buffer(); 8896 for_all_proc_cpus(initialize_counters); 8897 topology_update(); 8898 } 8899 8900 void set_base_cpu(void) 8901 { 8902 int i; 8903 8904 for (i = 0; i < topo.max_cpu_num + 1; ++i) { 8905 if (cpu_is_not_allowed(i)) 8906 continue; 8907 base_cpu = i; 8908 if (debug > 1) 8909 fprintf(outf, "base_cpu = %d\n", base_cpu); 8910 return; 8911 } 8912 err(-ENODEV, "No valid cpus found"); 8913 } 8914 8915 bool has_added_counters(void) 8916 { 8917 /* 8918 * It only makes sense to call this after the command line is parsed, 8919 * otherwise sys structure is not populated. 8920 */ 8921 8922 return sys.added_core_counters | sys.added_thread_counters | sys.added_package_counters; 8923 } 8924 8925 void check_msr_access(void) 8926 { 8927 check_dev_msr(); 8928 check_msr_permission(); 8929 8930 if (no_msr) 8931 bic_disable_msr_access(); 8932 } 8933 8934 void check_perf_access(void) 8935 { 8936 if (no_perf || !BIC_IS_ENABLED(BIC_IPC) || !has_instr_count_access()) 8937 bic_enabled &= ~BIC_IPC; 8938 } 8939 8940 bool perf_has_hybrid_devices(void) 8941 { 8942 /* 8943 * 0: unknown 8944 * 1: has separate perf device for p and e core 8945 * -1: doesn't have separate perf device for p and e core 8946 */ 8947 static int cached; 8948 8949 if (cached > 0) 8950 return true; 8951 8952 if (cached < 0) 8953 return false; 8954 8955 if (access("/sys/bus/event_source/devices/cpu_core", F_OK)) { 8956 cached = -1; 8957 return false; 8958 } 8959 8960 if (access("/sys/bus/event_source/devices/cpu_atom", F_OK)) { 8961 cached = -1; 8962 return false; 8963 } 8964 8965 cached = 1; 8966 return true; 8967 } 8968 8969 int added_perf_counters_init_(struct perf_counter_info *pinfo) 8970 { 8971 size_t num_domains = 0; 8972 unsigned int next_domain; 8973 bool *domain_visited; 8974 unsigned int perf_type, perf_config; 8975 double perf_scale; 8976 int fd_perf; 8977 8978 if (!pinfo) 8979 return 0; 8980 8981 const size_t max_num_domains = MAX(topo.max_cpu_num + 1, MAX(topo.max_core_id + 1, topo.max_package_id + 1)); 8982 8983 domain_visited = calloc(max_num_domains, sizeof(*domain_visited)); 8984 8985 while (pinfo) { 8986 switch (pinfo->scope) { 8987 case SCOPE_CPU: 8988 num_domains = topo.max_cpu_num + 1; 8989 break; 8990 8991 case SCOPE_CORE: 8992 num_domains = topo.max_core_id + 1; 8993 break; 8994 8995 case SCOPE_PACKAGE: 8996 num_domains = topo.max_package_id + 1; 8997 break; 8998 } 8999 9000 /* Allocate buffer for file descriptor for each domain. */ 9001 pinfo->fd_perf_per_domain = calloc(num_domains, sizeof(*pinfo->fd_perf_per_domain)); 9002 if (!pinfo->fd_perf_per_domain) 9003 errx(1, "%s: alloc %s", __func__, "fd_perf_per_domain"); 9004 9005 for (size_t i = 0; i < num_domains; ++i) 9006 pinfo->fd_perf_per_domain[i] = -1; 9007 9008 pinfo->num_domains = num_domains; 9009 pinfo->scale = 1.0; 9010 9011 memset(domain_visited, 0, max_num_domains * sizeof(*domain_visited)); 9012 9013 for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) { 9014 9015 next_domain = cpu_to_domain(pinfo, cpu); 9016 9017 assert(next_domain < num_domains); 9018 9019 if (cpu_is_not_allowed(cpu)) 9020 continue; 9021 9022 if (domain_visited[next_domain]) 9023 continue; 9024 9025 /* 9026 * Intel hybrid platforms expose different perf devices for P and E cores. 9027 * Instead of one, "/sys/bus/event_source/devices/cpu" device, there are 9028 * "/sys/bus/event_source/devices/{cpu_core,cpu_atom}". 9029 * 9030 * This makes it more complicated to the user, because most of the counters 9031 * are available on both and have to be handled manually, otherwise. 9032 * 9033 * Code below, allow user to use the old "cpu" name, which is translated accordingly. 9034 */ 9035 const char *perf_device = pinfo->device; 9036 9037 if (strcmp(perf_device, "cpu") == 0 && perf_has_hybrid_devices()) { 9038 switch (cpus[cpu].type) { 9039 case INTEL_PCORE_TYPE: 9040 perf_device = "cpu_core"; 9041 break; 9042 9043 case INTEL_ECORE_TYPE: 9044 perf_device = "cpu_atom"; 9045 break; 9046 9047 default: /* Don't change, we will probably fail and report a problem soon. */ 9048 break; 9049 } 9050 } 9051 9052 perf_type = read_perf_type(perf_device); 9053 if (perf_type == (unsigned int)-1) { 9054 warnx("%s: perf/%s/%s: failed to read %s", 9055 __func__, perf_device, pinfo->event, "type"); 9056 continue; 9057 } 9058 9059 perf_config = read_perf_config(perf_device, pinfo->event); 9060 if (perf_config == (unsigned int)-1) { 9061 warnx("%s: perf/%s/%s: failed to read %s", 9062 __func__, perf_device, pinfo->event, "config"); 9063 continue; 9064 } 9065 9066 /* Scale is not required, some counters just don't have it. */ 9067 perf_scale = read_perf_scale(perf_device, pinfo->event); 9068 if (perf_scale == 0.0) 9069 perf_scale = 1.0; 9070 9071 fd_perf = open_perf_counter(cpu, perf_type, perf_config, -1, 0); 9072 if (fd_perf == -1) { 9073 warnx("%s: perf/%s/%s: failed to open counter on cpu%d", 9074 __func__, perf_device, pinfo->event, cpu); 9075 continue; 9076 } 9077 9078 domain_visited[next_domain] = 1; 9079 pinfo->fd_perf_per_domain[next_domain] = fd_perf; 9080 pinfo->scale = perf_scale; 9081 9082 if (debug) 9083 fprintf(stderr, "Add perf/%s/%s cpu%d: %d\n", 9084 perf_device, pinfo->event, cpu, pinfo->fd_perf_per_domain[next_domain]); 9085 } 9086 9087 pinfo = pinfo->next; 9088 } 9089 9090 free(domain_visited); 9091 9092 return 0; 9093 } 9094 9095 void added_perf_counters_init(void) 9096 { 9097 if (added_perf_counters_init_(sys.perf_tp)) 9098 errx(1, "%s: %s", __func__, "thread"); 9099 9100 if (added_perf_counters_init_(sys.perf_cp)) 9101 errx(1, "%s: %s", __func__, "core"); 9102 9103 if (added_perf_counters_init_(sys.perf_pp)) 9104 errx(1, "%s: %s", __func__, "package"); 9105 } 9106 9107 int parse_telem_info_file(int fd_dir, const char *info_filename, const char *format, unsigned long *output) 9108 { 9109 int fd_telem_info; 9110 FILE *file_telem_info; 9111 unsigned long value; 9112 9113 fd_telem_info = openat(fd_dir, info_filename, O_RDONLY); 9114 if (fd_telem_info == -1) 9115 return -1; 9116 9117 file_telem_info = fdopen(fd_telem_info, "r"); 9118 if (file_telem_info == NULL) { 9119 close(fd_telem_info); 9120 return -1; 9121 } 9122 9123 if (fscanf(file_telem_info, format, &value) != 1) { 9124 fclose(file_telem_info); 9125 return -1; 9126 } 9127 9128 fclose(file_telem_info); 9129 9130 *output = value; 9131 9132 return 0; 9133 } 9134 9135 struct pmt_mmio *pmt_mmio_open(unsigned int target_guid) 9136 { 9137 struct pmt_diriter_t pmt_iter; 9138 const struct dirent *entry; 9139 struct stat st; 9140 int fd_telem_dir, fd_pmt; 9141 unsigned long guid, size, offset; 9142 size_t mmap_size; 9143 void *mmio; 9144 struct pmt_mmio *head = NULL, *last = NULL; 9145 struct pmt_mmio *new_pmt = NULL; 9146 9147 if (stat(SYSFS_TELEM_PATH, &st) == -1) 9148 return NULL; 9149 9150 pmt_diriter_init(&pmt_iter); 9151 entry = pmt_diriter_begin(&pmt_iter, SYSFS_TELEM_PATH); 9152 if (!entry) { 9153 pmt_diriter_remove(&pmt_iter); 9154 return NULL; 9155 } 9156 9157 for ( ; entry != NULL; entry = pmt_diriter_next(&pmt_iter)) { 9158 if (fstatat(dirfd(pmt_iter.dir), entry->d_name, &st, 0) == -1) 9159 break; 9160 9161 if (!S_ISDIR(st.st_mode)) 9162 continue; 9163 9164 fd_telem_dir = openat(dirfd(pmt_iter.dir), entry->d_name, O_RDONLY); 9165 if (fd_telem_dir == -1) 9166 break; 9167 9168 if (parse_telem_info_file(fd_telem_dir, "guid", "%lx", &guid)) { 9169 close(fd_telem_dir); 9170 break; 9171 } 9172 9173 if (parse_telem_info_file(fd_telem_dir, "size", "%lu", &size)) { 9174 close(fd_telem_dir); 9175 break; 9176 } 9177 9178 if (guid != target_guid) { 9179 close(fd_telem_dir); 9180 continue; 9181 } 9182 9183 if (parse_telem_info_file(fd_telem_dir, "offset", "%lu", &offset)) { 9184 close(fd_telem_dir); 9185 break; 9186 } 9187 9188 assert(offset == 0); 9189 9190 fd_pmt = openat(fd_telem_dir, "telem", O_RDONLY); 9191 if (fd_pmt == -1) 9192 goto loop_cleanup_and_break; 9193 9194 mmap_size = ROUND_UP_TO_PAGE_SIZE(size); 9195 mmio = mmap(0, mmap_size, PROT_READ, MAP_SHARED, fd_pmt, 0); 9196 if (mmio != MAP_FAILED) { 9197 if (debug) 9198 fprintf(stderr, "%s: 0x%lx mmaped at: %p\n", __func__, guid, mmio); 9199 9200 new_pmt = calloc(1, sizeof(*new_pmt)); 9201 9202 if (!new_pmt) { 9203 fprintf(stderr, "%s: Failed to allocate pmt_mmio\n", __func__); 9204 exit(1); 9205 } 9206 9207 /* 9208 * Create linked list of mmaped regions, 9209 * but preserve the ordering from sysfs. 9210 * Ordering is important for the user to 9211 * use the seq=%u parameter when adding a counter. 9212 */ 9213 new_pmt->guid = guid; 9214 new_pmt->mmio_base = mmio; 9215 new_pmt->pmt_offset = offset; 9216 new_pmt->size = size; 9217 new_pmt->next = pmt_mmios; 9218 9219 if (last) 9220 last->next = new_pmt; 9221 else 9222 head = new_pmt; 9223 9224 last = new_pmt; 9225 } 9226 9227 loop_cleanup_and_break: 9228 close(fd_pmt); 9229 close(fd_telem_dir); 9230 } 9231 9232 pmt_diriter_remove(&pmt_iter); 9233 9234 /* 9235 * If we found something, stick just 9236 * created linked list to the front. 9237 */ 9238 if (head) 9239 pmt_mmios = head; 9240 9241 return head; 9242 } 9243 9244 struct pmt_mmio *pmt_mmio_find(unsigned int guid) 9245 { 9246 struct pmt_mmio *pmmio = pmt_mmios; 9247 9248 while (pmmio) { 9249 if (pmmio->guid == guid) 9250 return pmmio; 9251 9252 pmmio = pmmio->next; 9253 } 9254 9255 return NULL; 9256 } 9257 9258 void *pmt_get_counter_pointer(struct pmt_mmio *pmmio, unsigned long counter_offset) 9259 { 9260 char *ret; 9261 9262 /* Get base of mmaped PMT file. */ 9263 ret = (char *)pmmio->mmio_base; 9264 9265 /* 9266 * Apply PMT MMIO offset to obtain beginning of the mmaped telemetry data. 9267 * It's not guaranteed that the mmaped memory begins with the telemetry data 9268 * - we might have to apply the offset first. 9269 */ 9270 ret += pmmio->pmt_offset; 9271 9272 /* Apply the counter offset to get the address to the mmaped counter. */ 9273 ret += counter_offset; 9274 9275 return ret; 9276 } 9277 9278 struct pmt_mmio *pmt_add_guid(unsigned int guid, unsigned int seq) 9279 { 9280 struct pmt_mmio *ret; 9281 9282 ret = pmt_mmio_find(guid); 9283 if (!ret) 9284 ret = pmt_mmio_open(guid); 9285 9286 while (ret && seq) { 9287 ret = ret->next; 9288 --seq; 9289 } 9290 9291 return ret; 9292 } 9293 9294 enum pmt_open_mode { 9295 PMT_OPEN_TRY, /* Open failure is not an error. */ 9296 PMT_OPEN_REQUIRED, /* Open failure is a fatal error. */ 9297 }; 9298 9299 struct pmt_counter *pmt_find_counter(struct pmt_counter *pcounter, const char *name) 9300 { 9301 while (pcounter) { 9302 if (strcmp(pcounter->name, name) == 0) 9303 break; 9304 9305 pcounter = pcounter->next; 9306 } 9307 9308 return pcounter; 9309 } 9310 9311 struct pmt_counter **pmt_get_scope_root(enum counter_scope scope) 9312 { 9313 switch (scope) { 9314 case SCOPE_CPU: 9315 return &sys.pmt_tp; 9316 case SCOPE_CORE: 9317 return &sys.pmt_cp; 9318 case SCOPE_PACKAGE: 9319 return &sys.pmt_pp; 9320 } 9321 9322 __builtin_unreachable(); 9323 } 9324 9325 void pmt_counter_add_domain(struct pmt_counter *pcounter, unsigned long *pmmio, unsigned int domain_id) 9326 { 9327 /* Make sure the new domain fits. */ 9328 if (domain_id >= pcounter->num_domains) 9329 pmt_counter_resize(pcounter, domain_id + 1); 9330 9331 assert(pcounter->domains); 9332 assert(domain_id < pcounter->num_domains); 9333 9334 pcounter->domains[domain_id].pcounter = pmmio; 9335 } 9336 9337 int pmt_add_counter(unsigned int guid, unsigned int seq, const char *name, enum pmt_datatype type, 9338 unsigned int lsb, unsigned int msb, unsigned int offset, enum counter_scope scope, 9339 enum counter_format format, unsigned int domain_id, enum pmt_open_mode mode) 9340 { 9341 struct pmt_mmio *mmio; 9342 struct pmt_counter *pcounter; 9343 struct pmt_counter **const pmt_root = pmt_get_scope_root(scope); 9344 bool new_counter = false; 9345 int conflict = 0; 9346 9347 if (lsb > msb) { 9348 fprintf(stderr, "%s: %s: `%s` must be satisfied\n", __func__, "lsb <= msb", name); 9349 exit(1); 9350 } 9351 9352 if (msb >= 64) { 9353 fprintf(stderr, "%s: %s: `%s` must be satisfied\n", __func__, "msb < 64", name); 9354 exit(1); 9355 } 9356 9357 mmio = pmt_add_guid(guid, seq); 9358 if (!mmio) { 9359 if (mode != PMT_OPEN_TRY) { 9360 fprintf(stderr, "%s: failed to map PMT MMIO for guid %x, seq %u\n", __func__, guid, seq); 9361 exit(1); 9362 } 9363 9364 return 1; 9365 } 9366 9367 if (offset >= mmio->size) { 9368 if (mode != PMT_OPEN_TRY) { 9369 fprintf(stderr, "%s: offset %u outside of PMT MMIO size %u\n", __func__, offset, mmio->size); 9370 exit(1); 9371 } 9372 9373 return 1; 9374 } 9375 9376 pcounter = pmt_find_counter(*pmt_root, name); 9377 if (!pcounter) { 9378 pcounter = calloc(1, sizeof(*pcounter)); 9379 new_counter = true; 9380 } 9381 9382 if (new_counter) { 9383 strncpy(pcounter->name, name, ARRAY_SIZE(pcounter->name) - 1); 9384 pcounter->type = type; 9385 pcounter->scope = scope; 9386 pcounter->lsb = lsb; 9387 pcounter->msb = msb; 9388 pcounter->format = format; 9389 } else { 9390 conflict += pcounter->type != type; 9391 conflict += pcounter->scope != scope; 9392 conflict += pcounter->lsb != lsb; 9393 conflict += pcounter->msb != msb; 9394 conflict += pcounter->format != format; 9395 } 9396 9397 if (conflict) { 9398 fprintf(stderr, "%s: conflicting parameters for the PMT counter with the same name %s\n", 9399 __func__, name); 9400 exit(1); 9401 } 9402 9403 pmt_counter_add_domain(pcounter, pmt_get_counter_pointer(mmio, offset), domain_id); 9404 9405 if (new_counter) { 9406 pcounter->next = *pmt_root; 9407 *pmt_root = pcounter; 9408 } 9409 9410 return 0; 9411 } 9412 9413 void pmt_init(void) 9414 { 9415 int cpu_num; 9416 unsigned long seq, offset, mod_num; 9417 9418 if (BIC_IS_ENABLED(BIC_Diec6)) { 9419 pmt_add_counter(PMT_MTL_DC6_GUID, PMT_MTL_DC6_SEQ, "Die%c6", PMT_TYPE_XTAL_TIME, 9420 PMT_COUNTER_MTL_DC6_LSB, PMT_COUNTER_MTL_DC6_MSB, PMT_COUNTER_MTL_DC6_OFFSET, 9421 SCOPE_PACKAGE, FORMAT_DELTA, 0, PMT_OPEN_TRY); 9422 } 9423 9424 if (BIC_IS_ENABLED(BIC_CPU_c1e)) { 9425 seq = 0; 9426 offset = PMT_COUNTER_CWF_MC1E_OFFSET_BASE; 9427 mod_num = 0; /* Relative module number for current PMT file. */ 9428 9429 /* Open the counter for each CPU. */ 9430 for (cpu_num = 0; cpu_num < topo.max_cpu_num;) { 9431 9432 if (cpu_is_not_allowed(cpu_num)) 9433 goto next_loop_iter; 9434 9435 /* 9436 * Set the scope to CPU, even though CWF report the counter per module. 9437 * CPUs inside the same module will read from the same location, instead of reporting zeros. 9438 * 9439 * CWF with newer firmware might require a PMT_TYPE_XTAL_TIME intead of PMT_TYPE_TCORE_CLOCK. 9440 */ 9441 pmt_add_counter(PMT_CWF_MC1E_GUID, seq, "CPU%c1e", PMT_TYPE_TCORE_CLOCK, 9442 PMT_COUNTER_CWF_MC1E_LSB, PMT_COUNTER_CWF_MC1E_MSB, offset, SCOPE_CPU, 9443 FORMAT_DELTA, cpu_num, PMT_OPEN_TRY); 9444 9445 /* 9446 * Rather complex logic for each time we go to the next loop iteration, 9447 * so keep it as a label. 9448 */ 9449 next_loop_iter: 9450 /* 9451 * Advance the cpu number and check if we should also advance offset to 9452 * the next counter inside the PMT file. 9453 * 9454 * On Clearwater Forest platform, the counter is reported per module, 9455 * so open the same counter for all of the CPUs inside the module. 9456 * That way, reported table show the correct value for all of the CPUs inside the module, 9457 * instead of zeros. 9458 */ 9459 ++cpu_num; 9460 if (cpu_num % PMT_COUNTER_CWF_CPUS_PER_MODULE == 0) { 9461 offset += PMT_COUNTER_CWF_MC1E_OFFSET_INCREMENT; 9462 ++mod_num; 9463 } 9464 9465 /* 9466 * There are PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE in each PMT file. 9467 * 9468 * If that number is reached, seq must be incremented to advance to the next file in a sequence. 9469 * Offset inside that file and a module counter has to be reset. 9470 */ 9471 if (mod_num == PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE) { 9472 ++seq; 9473 offset = PMT_COUNTER_CWF_MC1E_OFFSET_BASE; 9474 mod_num = 0; 9475 } 9476 } 9477 } 9478 } 9479 9480 void turbostat_init() 9481 { 9482 setup_all_buffers(true); 9483 set_base_cpu(); 9484 check_msr_access(); 9485 check_perf_access(); 9486 process_cpuid(); 9487 counter_info_init(); 9488 probe_pm_features(); 9489 msr_perf_init(); 9490 linux_perf_init(); 9491 rapl_perf_init(); 9492 cstate_perf_init(); 9493 added_perf_counters_init(); 9494 pmt_init(); 9495 9496 for_all_cpus(get_cpu_type, ODD_COUNTERS); 9497 for_all_cpus(get_cpu_type, EVEN_COUNTERS); 9498 9499 if (BIC_IS_ENABLED(BIC_IPC) && has_aperf_access && get_instr_count_fd(base_cpu) != -1) 9500 BIC_PRESENT(BIC_IPC); 9501 9502 /* 9503 * If TSC tweak is needed, but couldn't get it, 9504 * disable more BICs, since it can't be reported accurately. 9505 */ 9506 if (platform->enable_tsc_tweak && !has_base_hz) { 9507 bic_enabled &= ~BIC_Busy; 9508 bic_enabled &= ~BIC_Bzy_MHz; 9509 } 9510 } 9511 9512 void affinitize_child(void) 9513 { 9514 /* Prefer cpu_possible_set, if available */ 9515 if (sched_setaffinity(0, cpu_possible_setsize, cpu_possible_set)) { 9516 warn("sched_setaffinity cpu_possible_set"); 9517 9518 /* Otherwise, allow child to run on same cpu set as turbostat */ 9519 if (sched_setaffinity(0, cpu_allowed_setsize, cpu_allowed_set)) 9520 warn("sched_setaffinity cpu_allowed_set"); 9521 } 9522 } 9523 9524 int fork_it(char **argv) 9525 { 9526 pid_t child_pid; 9527 int status; 9528 9529 snapshot_proc_sysfs_files(); 9530 status = for_all_cpus(get_counters, EVEN_COUNTERS); 9531 first_counter_read = 0; 9532 if (status) 9533 exit(status); 9534 gettimeofday(&tv_even, (struct timezone *)NULL); 9535 9536 child_pid = fork(); 9537 if (!child_pid) { 9538 /* child */ 9539 affinitize_child(); 9540 execvp(argv[0], argv); 9541 err(errno, "exec %s", argv[0]); 9542 } else { 9543 9544 /* parent */ 9545 if (child_pid == -1) 9546 err(1, "fork"); 9547 9548 signal(SIGINT, SIG_IGN); 9549 signal(SIGQUIT, SIG_IGN); 9550 if (waitpid(child_pid, &status, 0) == -1) 9551 err(status, "waitpid"); 9552 9553 if (WIFEXITED(status)) 9554 status = WEXITSTATUS(status); 9555 } 9556 /* 9557 * n.b. fork_it() does not check for errors from for_all_cpus() 9558 * because re-starting is problematic when forking 9559 */ 9560 snapshot_proc_sysfs_files(); 9561 for_all_cpus(get_counters, ODD_COUNTERS); 9562 gettimeofday(&tv_odd, (struct timezone *)NULL); 9563 timersub(&tv_odd, &tv_even, &tv_delta); 9564 if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) 9565 fprintf(outf, "%s: Counter reset detected\n", progname); 9566 9567 compute_average(EVEN_COUNTERS); 9568 format_all_counters(EVEN_COUNTERS); 9569 9570 fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec / 1000000.0); 9571 9572 flush_output_stderr(); 9573 9574 return status; 9575 } 9576 9577 int get_and_dump_counters(void) 9578 { 9579 int status; 9580 9581 snapshot_proc_sysfs_files(); 9582 status = for_all_cpus(get_counters, ODD_COUNTERS); 9583 if (status) 9584 return status; 9585 9586 status = for_all_cpus(dump_counters, ODD_COUNTERS); 9587 if (status) 9588 return status; 9589 9590 flush_output_stdout(); 9591 9592 return status; 9593 } 9594 9595 void print_version() 9596 { 9597 fprintf(outf, "turbostat version 2025.04.06 - Len Brown <lenb@kernel.org>\n"); 9598 } 9599 9600 #define COMMAND_LINE_SIZE 2048 9601 9602 void print_bootcmd(void) 9603 { 9604 char bootcmd[COMMAND_LINE_SIZE]; 9605 FILE *fp; 9606 int ret; 9607 9608 memset(bootcmd, 0, COMMAND_LINE_SIZE); 9609 fp = fopen("/proc/cmdline", "r"); 9610 if (!fp) 9611 return; 9612 9613 ret = fread(bootcmd, sizeof(char), COMMAND_LINE_SIZE - 1, fp); 9614 if (ret) { 9615 bootcmd[ret] = '\0'; 9616 /* the last character is already '\n' */ 9617 fprintf(outf, "Kernel command line: %s", bootcmd); 9618 } 9619 9620 fclose(fp); 9621 } 9622 9623 struct msr_counter *find_msrp_by_name(struct msr_counter *head, char *name) 9624 { 9625 struct msr_counter *mp; 9626 9627 for (mp = head; mp; mp = mp->next) { 9628 if (debug) 9629 fprintf(stderr, "%s: %s %s\n", __func__, name, mp->name); 9630 if (!strcmp(name, mp->name)) 9631 return mp; 9632 } 9633 return NULL; 9634 } 9635 9636 int add_counter(unsigned int msr_num, char *path, char *name, 9637 unsigned int width, enum counter_scope scope, 9638 enum counter_type type, enum counter_format format, int flags, int id) 9639 { 9640 struct msr_counter *msrp; 9641 9642 if (no_msr && msr_num) 9643 errx(1, "Requested MSR counter 0x%x, but in --no-msr mode", msr_num); 9644 9645 if (debug) 9646 fprintf(stderr, "%s(msr%d, %s, %s, width%d, scope%d, type%d, format%d, flags%x, id%d)\n", 9647 __func__, msr_num, path, name, width, scope, type, format, flags, id); 9648 9649 switch (scope) { 9650 9651 case SCOPE_CPU: 9652 msrp = find_msrp_by_name(sys.tp, name); 9653 if (msrp) { 9654 if (debug) 9655 fprintf(stderr, "%s: %s FOUND\n", __func__, name); 9656 break; 9657 } 9658 if (sys.added_thread_counters++ >= MAX_ADDED_THREAD_COUNTERS) { 9659 warnx("ignoring thread counter %s", name); 9660 return -1; 9661 } 9662 break; 9663 case SCOPE_CORE: 9664 msrp = find_msrp_by_name(sys.cp, name); 9665 if (msrp) { 9666 if (debug) 9667 fprintf(stderr, "%s: %s FOUND\n", __func__, name); 9668 break; 9669 } 9670 if (sys.added_core_counters++ >= MAX_ADDED_CORE_COUNTERS) { 9671 warnx("ignoring core counter %s", name); 9672 return -1; 9673 } 9674 break; 9675 case SCOPE_PACKAGE: 9676 msrp = find_msrp_by_name(sys.pp, name); 9677 if (msrp) { 9678 if (debug) 9679 fprintf(stderr, "%s: %s FOUND\n", __func__, name); 9680 break; 9681 } 9682 if (sys.added_package_counters++ >= MAX_ADDED_PACKAGE_COUNTERS) { 9683 warnx("ignoring package counter %s", name); 9684 return -1; 9685 } 9686 break; 9687 default: 9688 warnx("ignoring counter %s with unknown scope", name); 9689 return -1; 9690 } 9691 9692 if (msrp == NULL) { 9693 msrp = calloc(1, sizeof(struct msr_counter)); 9694 if (msrp == NULL) 9695 err(-1, "calloc msr_counter"); 9696 9697 msrp->msr_num = msr_num; 9698 strncpy(msrp->name, name, NAME_BYTES - 1); 9699 msrp->width = width; 9700 msrp->type = type; 9701 msrp->format = format; 9702 msrp->flags = flags; 9703 9704 switch (scope) { 9705 case SCOPE_CPU: 9706 msrp->next = sys.tp; 9707 sys.tp = msrp; 9708 break; 9709 case SCOPE_CORE: 9710 msrp->next = sys.cp; 9711 sys.cp = msrp; 9712 break; 9713 case SCOPE_PACKAGE: 9714 msrp->next = sys.pp; 9715 sys.pp = msrp; 9716 break; 9717 } 9718 } 9719 9720 if (path) { 9721 struct sysfs_path *sp; 9722 9723 sp = calloc(1, sizeof(struct sysfs_path)); 9724 if (sp == NULL) { 9725 perror("calloc"); 9726 exit(1); 9727 } 9728 strncpy(sp->path, path, PATH_BYTES - 1); 9729 sp->id = id; 9730 sp->next = msrp->sp; 9731 msrp->sp = sp; 9732 } 9733 9734 return 0; 9735 } 9736 9737 /* 9738 * Initialize the fields used for identifying and opening the counter. 9739 * 9740 * Defer the initialization of any runtime buffers for actually reading 9741 * the counters for when we initialize all perf counters, so we can later 9742 * easily call re_initialize(). 9743 */ 9744 struct perf_counter_info *make_perf_counter_info(const char *perf_device, 9745 const char *perf_event, 9746 const char *name, 9747 unsigned int width, 9748 enum counter_scope scope, 9749 enum counter_type type, enum counter_format format) 9750 { 9751 struct perf_counter_info *pinfo; 9752 9753 pinfo = calloc(1, sizeof(*pinfo)); 9754 if (!pinfo) 9755 errx(1, "%s: Failed to allocate %s/%s\n", __func__, perf_device, perf_event); 9756 9757 strncpy(pinfo->device, perf_device, ARRAY_SIZE(pinfo->device) - 1); 9758 strncpy(pinfo->event, perf_event, ARRAY_SIZE(pinfo->event) - 1); 9759 9760 strncpy(pinfo->name, name, ARRAY_SIZE(pinfo->name) - 1); 9761 pinfo->width = width; 9762 pinfo->scope = scope; 9763 pinfo->type = type; 9764 pinfo->format = format; 9765 9766 return pinfo; 9767 } 9768 9769 int add_perf_counter(const char *perf_device, const char *perf_event, const char *name_buffer, unsigned int width, 9770 enum counter_scope scope, enum counter_type type, enum counter_format format) 9771 { 9772 struct perf_counter_info *pinfo; 9773 9774 switch (scope) { 9775 case SCOPE_CPU: 9776 if (sys.added_thread_perf_counters >= MAX_ADDED_THREAD_COUNTERS) { 9777 warnx("ignoring thread counter perf/%s/%s", perf_device, perf_event); 9778 return -1; 9779 } 9780 break; 9781 9782 case SCOPE_CORE: 9783 if (sys.added_core_perf_counters >= MAX_ADDED_CORE_COUNTERS) { 9784 warnx("ignoring core counter perf/%s/%s", perf_device, perf_event); 9785 return -1; 9786 } 9787 break; 9788 9789 case SCOPE_PACKAGE: 9790 if (sys.added_package_perf_counters >= MAX_ADDED_PACKAGE_COUNTERS) { 9791 warnx("ignoring package counter perf/%s/%s", perf_device, perf_event); 9792 return -1; 9793 } 9794 break; 9795 } 9796 9797 pinfo = make_perf_counter_info(perf_device, perf_event, name_buffer, width, scope, type, format); 9798 9799 if (!pinfo) 9800 return -1; 9801 9802 switch (scope) { 9803 case SCOPE_CPU: 9804 pinfo->next = sys.perf_tp; 9805 sys.perf_tp = pinfo; 9806 ++sys.added_thread_perf_counters; 9807 break; 9808 9809 case SCOPE_CORE: 9810 pinfo->next = sys.perf_cp; 9811 sys.perf_cp = pinfo; 9812 ++sys.added_core_perf_counters; 9813 break; 9814 9815 case SCOPE_PACKAGE: 9816 pinfo->next = sys.perf_pp; 9817 sys.perf_pp = pinfo; 9818 ++sys.added_package_perf_counters; 9819 break; 9820 } 9821 9822 // FIXME: we might not have debug here yet 9823 if (debug) 9824 fprintf(stderr, "%s: %s/%s, name: %s, scope%d\n", 9825 __func__, pinfo->device, pinfo->event, pinfo->name, pinfo->scope); 9826 9827 return 0; 9828 } 9829 9830 void parse_add_command_msr(char *add_command) 9831 { 9832 int msr_num = 0; 9833 char *path = NULL; 9834 char perf_device[PERF_DEV_NAME_BYTES] = ""; 9835 char perf_event[PERF_EVT_NAME_BYTES] = ""; 9836 char name_buffer[PERF_NAME_BYTES] = ""; 9837 int width = 64; 9838 int fail = 0; 9839 enum counter_scope scope = SCOPE_CPU; 9840 enum counter_type type = COUNTER_CYCLES; 9841 enum counter_format format = FORMAT_DELTA; 9842 9843 while (add_command) { 9844 9845 if (sscanf(add_command, "msr0x%x", &msr_num) == 1) 9846 goto next; 9847 9848 if (sscanf(add_command, "msr%d", &msr_num) == 1) 9849 goto next; 9850 9851 BUILD_BUG_ON(ARRAY_SIZE(perf_device) <= 31); 9852 BUILD_BUG_ON(ARRAY_SIZE(perf_event) <= 31); 9853 if (sscanf(add_command, "perf/%31[^/]/%31[^,]", &perf_device[0], &perf_event[0]) == 2) 9854 goto next; 9855 9856 if (*add_command == '/') { 9857 path = add_command; 9858 goto next; 9859 } 9860 9861 if (sscanf(add_command, "u%d", &width) == 1) { 9862 if ((width == 32) || (width == 64)) 9863 goto next; 9864 width = 64; 9865 } 9866 if (!strncmp(add_command, "cpu", strlen("cpu"))) { 9867 scope = SCOPE_CPU; 9868 goto next; 9869 } 9870 if (!strncmp(add_command, "core", strlen("core"))) { 9871 scope = SCOPE_CORE; 9872 goto next; 9873 } 9874 if (!strncmp(add_command, "package", strlen("package"))) { 9875 scope = SCOPE_PACKAGE; 9876 goto next; 9877 } 9878 if (!strncmp(add_command, "cycles", strlen("cycles"))) { 9879 type = COUNTER_CYCLES; 9880 goto next; 9881 } 9882 if (!strncmp(add_command, "seconds", strlen("seconds"))) { 9883 type = COUNTER_SECONDS; 9884 goto next; 9885 } 9886 if (!strncmp(add_command, "usec", strlen("usec"))) { 9887 type = COUNTER_USEC; 9888 goto next; 9889 } 9890 if (!strncmp(add_command, "raw", strlen("raw"))) { 9891 format = FORMAT_RAW; 9892 goto next; 9893 } 9894 if (!strncmp(add_command, "delta", strlen("delta"))) { 9895 format = FORMAT_DELTA; 9896 goto next; 9897 } 9898 if (!strncmp(add_command, "percent", strlen("percent"))) { 9899 format = FORMAT_PERCENT; 9900 goto next; 9901 } 9902 9903 BUILD_BUG_ON(ARRAY_SIZE(name_buffer) <= 18); 9904 if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) { 9905 char *eos; 9906 9907 eos = strchr(name_buffer, ','); 9908 if (eos) 9909 *eos = '\0'; 9910 goto next; 9911 } 9912 9913 next: 9914 add_command = strchr(add_command, ','); 9915 if (add_command) { 9916 *add_command = '\0'; 9917 add_command++; 9918 } 9919 9920 } 9921 if ((msr_num == 0) && (path == NULL) && (perf_device[0] == '\0' || perf_event[0] == '\0')) { 9922 fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter | perf/device/event) required\n"); 9923 fail++; 9924 } 9925 9926 /* Test for non-empty perf_device and perf_event */ 9927 const bool is_perf_counter = perf_device[0] && perf_event[0]; 9928 9929 /* generate default column header */ 9930 if (*name_buffer == '\0') { 9931 if (is_perf_counter) { 9932 snprintf(name_buffer, ARRAY_SIZE(name_buffer), "perf/%s", perf_event); 9933 } else { 9934 if (width == 32) 9935 sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); 9936 else 9937 sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); 9938 } 9939 } 9940 9941 if (is_perf_counter) { 9942 if (add_perf_counter(perf_device, perf_event, name_buffer, width, scope, type, format)) 9943 fail++; 9944 } else { 9945 if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0, 0)) 9946 fail++; 9947 } 9948 9949 if (fail) { 9950 help(); 9951 exit(1); 9952 } 9953 } 9954 9955 bool starts_with(const char *str, const char *prefix) 9956 { 9957 return strncmp(prefix, str, strlen(prefix)) == 0; 9958 } 9959 9960 int pmt_parse_from_path(const char *target_path, unsigned int *out_guid, unsigned int *out_seq) 9961 { 9962 struct pmt_diriter_t pmt_iter; 9963 const struct dirent *dirname; 9964 struct stat stat, target_stat; 9965 int fd_telem_dir = -1; 9966 int fd_target_dir; 9967 unsigned int seq = 0; 9968 unsigned long guid, target_guid; 9969 int ret = -1; 9970 9971 fd_target_dir = open(target_path, O_RDONLY | O_DIRECTORY); 9972 if (fd_target_dir == -1) { 9973 return -1; 9974 } 9975 9976 if (fstat(fd_target_dir, &target_stat) == -1) { 9977 fprintf(stderr, "%s: Failed to stat the target: %s", __func__, strerror(errno)); 9978 exit(1); 9979 } 9980 9981 if (parse_telem_info_file(fd_target_dir, "guid", "%lx", &target_guid)) { 9982 fprintf(stderr, "%s: Failed to parse the target guid file: %s", __func__, strerror(errno)); 9983 exit(1); 9984 } 9985 9986 close(fd_target_dir); 9987 9988 pmt_diriter_init(&pmt_iter); 9989 9990 for (dirname = pmt_diriter_begin(&pmt_iter, SYSFS_TELEM_PATH); dirname != NULL; 9991 dirname = pmt_diriter_next(&pmt_iter)) { 9992 9993 fd_telem_dir = openat(dirfd(pmt_iter.dir), dirname->d_name, O_RDONLY | O_DIRECTORY); 9994 if (fd_telem_dir == -1) 9995 continue; 9996 9997 if (parse_telem_info_file(fd_telem_dir, "guid", "%lx", &guid)) { 9998 fprintf(stderr, "%s: Failed to parse the guid file: %s", __func__, strerror(errno)); 9999 continue; 10000 } 10001 10002 if (fstat(fd_telem_dir, &stat) == -1) { 10003 fprintf(stderr, "%s: Failed to stat %s directory: %s", __func__, 10004 dirname->d_name, strerror(errno)); 10005 continue; 10006 } 10007 10008 /* 10009 * If reached the same directory as target, exit the loop. 10010 * Seq has the correct value now. 10011 */ 10012 if (stat.st_dev == target_stat.st_dev && stat.st_ino == target_stat.st_ino) { 10013 ret = 0; 10014 break; 10015 } 10016 10017 /* 10018 * If reached directory with the same guid, 10019 * but it's not the target directory yet, 10020 * increment seq and continue the search. 10021 */ 10022 if (guid == target_guid) 10023 ++seq; 10024 10025 close(fd_telem_dir); 10026 fd_telem_dir = -1; 10027 } 10028 10029 pmt_diriter_remove(&pmt_iter); 10030 10031 if (fd_telem_dir != -1) 10032 close(fd_telem_dir); 10033 10034 if (!ret) { 10035 *out_guid = target_guid; 10036 *out_seq = seq; 10037 } 10038 10039 return ret; 10040 } 10041 10042 void parse_add_command_pmt(char *add_command) 10043 { 10044 char *name = NULL; 10045 char *type_name = NULL; 10046 char *format_name = NULL; 10047 char *direct_path = NULL; 10048 static const char direct_path_prefix[] = "path="; 10049 unsigned int offset; 10050 unsigned int lsb; 10051 unsigned int msb; 10052 unsigned int guid; 10053 unsigned int seq = 0; /* By default, pick first file in a sequence with a given GUID. */ 10054 unsigned int domain_id; 10055 enum counter_scope scope = 0; 10056 enum pmt_datatype type = PMT_TYPE_RAW; 10057 enum counter_format format = FORMAT_RAW; 10058 bool has_offset = false; 10059 bool has_lsb = false; 10060 bool has_msb = false; 10061 bool has_format = true; /* Format has a default value. */ 10062 bool has_guid = false; 10063 bool has_scope = false; 10064 bool has_type = true; /* Type has a default value. */ 10065 10066 /* Consume the "pmt," prefix. */ 10067 add_command = strchr(add_command, ','); 10068 if (!add_command) { 10069 help(); 10070 exit(1); 10071 } 10072 ++add_command; 10073 10074 while (add_command) { 10075 if (starts_with(add_command, "name=")) { 10076 name = add_command + strlen("name="); 10077 goto next; 10078 } 10079 10080 if (starts_with(add_command, "type=")) { 10081 type_name = add_command + strlen("type="); 10082 goto next; 10083 } 10084 10085 if (starts_with(add_command, "domain=")) { 10086 const size_t prefix_len = strlen("domain="); 10087 10088 if (sscanf(add_command + prefix_len, "cpu%u", &domain_id) == 1) { 10089 scope = SCOPE_CPU; 10090 has_scope = true; 10091 } else if (sscanf(add_command + prefix_len, "core%u", &domain_id) == 1) { 10092 scope = SCOPE_CORE; 10093 has_scope = true; 10094 } else if (sscanf(add_command + prefix_len, "package%u", &domain_id) == 1) { 10095 scope = SCOPE_PACKAGE; 10096 has_scope = true; 10097 } 10098 10099 if (!has_scope) { 10100 printf("%s: invalid value for scope. Expected cpu%%u, core%%u or package%%u.\n", 10101 __func__); 10102 exit(1); 10103 } 10104 10105 goto next; 10106 } 10107 10108 if (starts_with(add_command, "format=")) { 10109 format_name = add_command + strlen("format="); 10110 goto next; 10111 } 10112 10113 if (sscanf(add_command, "offset=%u", &offset) == 1) { 10114 has_offset = true; 10115 goto next; 10116 } 10117 10118 if (sscanf(add_command, "lsb=%u", &lsb) == 1) { 10119 has_lsb = true; 10120 goto next; 10121 } 10122 10123 if (sscanf(add_command, "msb=%u", &msb) == 1) { 10124 has_msb = true; 10125 goto next; 10126 } 10127 10128 if (sscanf(add_command, "guid=%x", &guid) == 1) { 10129 has_guid = true; 10130 goto next; 10131 } 10132 10133 if (sscanf(add_command, "seq=%x", &seq) == 1) 10134 goto next; 10135 10136 if (strncmp(add_command, direct_path_prefix, strlen(direct_path_prefix)) == 0) { 10137 direct_path = add_command + strlen(direct_path_prefix); 10138 goto next; 10139 } 10140 next: 10141 add_command = strchr(add_command, ','); 10142 if (add_command) { 10143 *add_command = '\0'; 10144 add_command++; 10145 } 10146 } 10147 10148 if (!name) { 10149 printf("%s: missing %s\n", __func__, "name"); 10150 exit(1); 10151 } 10152 10153 if (strlen(name) >= PMT_COUNTER_NAME_SIZE_BYTES) { 10154 printf("%s: name has to be at most %d characters long\n", __func__, PMT_COUNTER_NAME_SIZE_BYTES); 10155 exit(1); 10156 } 10157 10158 if (format_name) { 10159 has_format = false; 10160 10161 if (strcmp("raw", format_name) == 0) { 10162 format = FORMAT_RAW; 10163 has_format = true; 10164 } 10165 10166 if (strcmp("delta", format_name) == 0) { 10167 format = FORMAT_DELTA; 10168 has_format = true; 10169 } 10170 10171 if (!has_format) { 10172 fprintf(stderr, "%s: Invalid format %s. Expected raw or delta\n", __func__, format_name); 10173 exit(1); 10174 } 10175 } 10176 10177 if (type_name) { 10178 has_type = false; 10179 10180 if (strcmp("raw", type_name) == 0) { 10181 type = PMT_TYPE_RAW; 10182 has_type = true; 10183 } 10184 10185 if (strcmp("txtal_time", type_name) == 0) { 10186 type = PMT_TYPE_XTAL_TIME; 10187 has_type = true; 10188 } 10189 10190 if (strcmp("tcore_clock", type_name) == 0) { 10191 type = PMT_TYPE_TCORE_CLOCK; 10192 has_type = true; 10193 } 10194 10195 if (!has_type) { 10196 printf("%s: invalid %s: %s\n", __func__, "type", type_name); 10197 exit(1); 10198 } 10199 } 10200 10201 if (!has_offset) { 10202 printf("%s : missing %s\n", __func__, "offset"); 10203 exit(1); 10204 } 10205 10206 if (!has_lsb) { 10207 printf("%s: missing %s\n", __func__, "lsb"); 10208 exit(1); 10209 } 10210 10211 if (!has_msb) { 10212 printf("%s: missing %s\n", __func__, "msb"); 10213 exit(1); 10214 } 10215 10216 if (direct_path && has_guid) { 10217 printf("%s: path and guid+seq parameters are mutually exclusive\n" 10218 "notice: passed guid=0x%x and path=%s\n", __func__, guid, direct_path); 10219 exit(1); 10220 } 10221 10222 if (direct_path) { 10223 if (pmt_parse_from_path(direct_path, &guid, &seq)) { 10224 printf("%s: failed to parse PMT file from %s\n", __func__, direct_path); 10225 exit(1); 10226 } 10227 10228 /* GUID was just infered from the direct path. */ 10229 has_guid = true; 10230 } 10231 10232 if (!has_guid) { 10233 printf("%s: missing %s\n", __func__, "guid or path"); 10234 exit(1); 10235 } 10236 10237 if (!has_scope) { 10238 printf("%s: missing %s\n", __func__, "scope"); 10239 exit(1); 10240 } 10241 10242 if (lsb > msb) { 10243 printf("%s: lsb > msb doesn't make sense\n", __func__); 10244 exit(1); 10245 } 10246 10247 pmt_add_counter(guid, seq, name, type, lsb, msb, offset, scope, format, domain_id, PMT_OPEN_REQUIRED); 10248 } 10249 10250 void parse_add_command(char *add_command) 10251 { 10252 if (strncmp(add_command, "pmt", strlen("pmt")) == 0) 10253 return parse_add_command_pmt(add_command); 10254 return parse_add_command_msr(add_command); 10255 } 10256 10257 int is_deferred_add(char *name) 10258 { 10259 int i; 10260 10261 for (i = 0; i < deferred_add_index; ++i) 10262 if (!strcmp(name, deferred_add_names[i])) 10263 return 1; 10264 return 0; 10265 } 10266 10267 int is_deferred_skip(char *name) 10268 { 10269 int i; 10270 10271 for (i = 0; i < deferred_skip_index; ++i) 10272 if (!strcmp(name, deferred_skip_names[i])) 10273 return 1; 10274 return 0; 10275 } 10276 10277 void probe_cpuidle_residency(void) 10278 { 10279 char path[64]; 10280 char name_buf[16]; 10281 FILE *input; 10282 int state; 10283 int min_state = 1024, max_state = 0; 10284 char *sp; 10285 10286 if (!DO_BIC(BIC_pct_idle)) 10287 return; 10288 10289 for (state = 10; state >= 0; --state) { 10290 10291 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); 10292 input = fopen(path, "r"); 10293 if (input == NULL) 10294 continue; 10295 if (!fgets(name_buf, sizeof(name_buf), input)) 10296 err(1, "%s: failed to read file", path); 10297 10298 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ 10299 sp = strchr(name_buf, '-'); 10300 if (!sp) 10301 sp = strchrnul(name_buf, '\n'); 10302 *sp = '%'; 10303 *(sp + 1) = '\0'; 10304 10305 remove_underbar(name_buf); 10306 10307 fclose(input); 10308 10309 sprintf(path, "cpuidle/state%d/time", state); 10310 10311 if (!DO_BIC(BIC_pct_idle) && !is_deferred_add(name_buf)) 10312 continue; 10313 10314 if (is_deferred_skip(name_buf)) 10315 continue; 10316 10317 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU, 0); 10318 10319 if (state > max_state) 10320 max_state = state; 10321 if (state < min_state) 10322 min_state = state; 10323 } 10324 } 10325 10326 void probe_cpuidle_counts(void) 10327 { 10328 char path[64]; 10329 char name_buf[16]; 10330 FILE *input; 10331 int state; 10332 int min_state = 1024, max_state = 0; 10333 char *sp; 10334 10335 if (!DO_BIC(BIC_cpuidle)) 10336 return; 10337 10338 for (state = 10; state >= 0; --state) { 10339 10340 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); 10341 input = fopen(path, "r"); 10342 if (input == NULL) 10343 continue; 10344 if (!fgets(name_buf, sizeof(name_buf), input)) 10345 err(1, "%s: failed to read file", path); 10346 fclose(input); 10347 10348 remove_underbar(name_buf); 10349 10350 if (!DO_BIC(BIC_cpuidle) && !is_deferred_add(name_buf)) 10351 continue; 10352 10353 if (is_deferred_skip(name_buf)) 10354 continue; 10355 10356 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ 10357 sp = strchr(name_buf, '-'); 10358 if (!sp) 10359 sp = strchrnul(name_buf, '\n'); 10360 10361 /* 10362 * The 'below' sysfs file always contains 0 for the deepest state (largest index), 10363 * do not add it. 10364 */ 10365 if (state != max_state) { 10366 /* 10367 * Add 'C1+' for C1, and so on. The 'below' sysfs file always contains 0 for 10368 * the last state, so do not add it. 10369 */ 10370 10371 *sp = '+'; 10372 *(sp + 1) = '\0'; 10373 sprintf(path, "cpuidle/state%d/below", state); 10374 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0); 10375 } 10376 10377 *sp = '\0'; 10378 sprintf(path, "cpuidle/state%d/usage", state); 10379 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0); 10380 10381 /* 10382 * The 'above' sysfs file always contains 0 for the shallowest state (smallest 10383 * index), do not add it. 10384 */ 10385 if (state != min_state) { 10386 *sp = '-'; 10387 *(sp + 1) = '\0'; 10388 sprintf(path, "cpuidle/state%d/above", state); 10389 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0); 10390 } 10391 } 10392 } 10393 10394 /* 10395 * parse cpuset with following syntax 10396 * 1,2,4..6,8-10 and set bits in cpu_subset 10397 */ 10398 void parse_cpu_command(char *optarg) 10399 { 10400 if (!strcmp(optarg, "core")) { 10401 if (cpu_subset) 10402 goto error; 10403 show_core_only++; 10404 return; 10405 } 10406 if (!strcmp(optarg, "package")) { 10407 if (cpu_subset) 10408 goto error; 10409 show_pkg_only++; 10410 return; 10411 } 10412 if (show_core_only || show_pkg_only) 10413 goto error; 10414 10415 cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS); 10416 if (cpu_subset == NULL) 10417 err(3, "CPU_ALLOC"); 10418 cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS); 10419 10420 CPU_ZERO_S(cpu_subset_size, cpu_subset); 10421 10422 if (parse_cpu_str(optarg, cpu_subset, cpu_subset_size)) 10423 goto error; 10424 10425 return; 10426 10427 error: 10428 fprintf(stderr, "\"--cpu %s\" malformed\n", optarg); 10429 help(); 10430 exit(-1); 10431 } 10432 10433 void cmdline(int argc, char **argv) 10434 { 10435 int opt; 10436 int option_index = 0; 10437 static struct option long_options[] = { 10438 { "add", required_argument, 0, 'a' }, 10439 { "cpu", required_argument, 0, 'c' }, 10440 { "Dump", no_argument, 0, 'D' }, 10441 { "debug", no_argument, 0, 'd' }, /* internal, not documented */ 10442 { "enable", required_argument, 0, 'e' }, 10443 { "force", no_argument, 0, 'f' }, 10444 { "interval", required_argument, 0, 'i' }, 10445 { "IPC", no_argument, 0, 'I' }, 10446 { "num_iterations", required_argument, 0, 'n' }, 10447 { "header_iterations", required_argument, 0, 'N' }, 10448 { "help", no_argument, 0, 'h' }, 10449 { "hide", required_argument, 0, 'H' }, // meh, -h taken by --help 10450 { "Joules", no_argument, 0, 'J' }, 10451 { "list", no_argument, 0, 'l' }, 10452 { "out", required_argument, 0, 'o' }, 10453 { "quiet", no_argument, 0, 'q' }, 10454 { "no-msr", no_argument, 0, 'M' }, 10455 { "no-perf", no_argument, 0, 'P' }, 10456 { "show", required_argument, 0, 's' }, 10457 { "Summary", no_argument, 0, 'S' }, 10458 { "TCC", required_argument, 0, 'T' }, 10459 { "version", no_argument, 0, 'v' }, 10460 { 0, 0, 0, 0 } 10461 }; 10462 10463 progname = argv[0]; 10464 10465 /* 10466 * Parse some options early, because they may make other options invalid, 10467 * like adding the MSR counter with --add and at the same time using --no-msr. 10468 */ 10469 while ((opt = getopt_long_only(argc, argv, "+MPn:", long_options, &option_index)) != -1) { 10470 switch (opt) { 10471 case 'M': 10472 no_msr = 1; 10473 break; 10474 case 'P': 10475 no_perf = 1; 10476 break; 10477 default: 10478 break; 10479 } 10480 } 10481 optind = 0; 10482 10483 while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qMST:v", long_options, &option_index)) != -1) { 10484 switch (opt) { 10485 case 'a': 10486 parse_add_command(optarg); 10487 break; 10488 case 'c': 10489 parse_cpu_command(optarg); 10490 break; 10491 case 'D': 10492 dump_only++; 10493 /* 10494 * Force the no_perf early to prevent using it as a source. 10495 * User asks for raw values, but perf returns them relative 10496 * to the opening of the file descriptor. 10497 */ 10498 no_perf = 1; 10499 break; 10500 case 'e': 10501 /* --enable specified counter */ 10502 bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST); 10503 break; 10504 case 'f': 10505 force_load++; 10506 break; 10507 case 'd': 10508 debug++; 10509 ENABLE_BIC(BIC_DISABLED_BY_DEFAULT); 10510 break; 10511 case 'H': 10512 /* 10513 * --hide: do not show those specified 10514 * multiple invocations simply clear more bits in enabled mask 10515 */ 10516 bic_enabled &= ~bic_lookup(optarg, HIDE_LIST); 10517 break; 10518 case 'h': 10519 default: 10520 help(); 10521 exit(1); 10522 case 'i': 10523 { 10524 double interval = strtod(optarg, NULL); 10525 10526 if (interval < 0.001) { 10527 fprintf(outf, "interval %f seconds is too small\n", interval); 10528 exit(2); 10529 } 10530 10531 interval_tv.tv_sec = interval_ts.tv_sec = interval; 10532 interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000; 10533 interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000; 10534 } 10535 break; 10536 case 'J': 10537 rapl_joules++; 10538 break; 10539 case 'l': 10540 ENABLE_BIC(BIC_DISABLED_BY_DEFAULT); 10541 list_header_only++; 10542 quiet++; 10543 break; 10544 case 'o': 10545 outf = fopen_or_die(optarg, "w"); 10546 break; 10547 case 'q': 10548 quiet = 1; 10549 break; 10550 case 'M': 10551 case 'P': 10552 /* Parsed earlier */ 10553 break; 10554 case 'n': 10555 num_iterations = strtod(optarg, NULL); 10556 10557 if (num_iterations <= 0) { 10558 fprintf(outf, "iterations %d should be positive number\n", num_iterations); 10559 exit(2); 10560 } 10561 break; 10562 case 'N': 10563 header_iterations = strtod(optarg, NULL); 10564 10565 if (header_iterations <= 0) { 10566 fprintf(outf, "iterations %d should be positive number\n", header_iterations); 10567 exit(2); 10568 } 10569 break; 10570 case 's': 10571 /* 10572 * --show: show only those specified 10573 * The 1st invocation will clear and replace the enabled mask 10574 * subsequent invocations can add to it. 10575 */ 10576 if (shown == 0) 10577 bic_enabled = bic_lookup(optarg, SHOW_LIST); 10578 else 10579 bic_enabled |= bic_lookup(optarg, SHOW_LIST); 10580 shown = 1; 10581 break; 10582 case 'S': 10583 summary_only++; 10584 break; 10585 case 'T': 10586 tj_max_override = atoi(optarg); 10587 break; 10588 case 'v': 10589 print_version(); 10590 exit(0); 10591 break; 10592 } 10593 } 10594 } 10595 10596 void set_rlimit(void) 10597 { 10598 struct rlimit limit; 10599 10600 if (getrlimit(RLIMIT_NOFILE, &limit) < 0) 10601 err(1, "Failed to get rlimit"); 10602 10603 if (limit.rlim_max < MAX_NOFILE) 10604 limit.rlim_max = MAX_NOFILE; 10605 if (limit.rlim_cur < MAX_NOFILE) 10606 limit.rlim_cur = MAX_NOFILE; 10607 10608 if (setrlimit(RLIMIT_NOFILE, &limit) < 0) 10609 err(1, "Failed to set rlimit"); 10610 } 10611 10612 int main(int argc, char **argv) 10613 { 10614 int fd, ret; 10615 10616 fd = open("/sys/fs/cgroup/cgroup.procs", O_WRONLY); 10617 if (fd < 0) 10618 goto skip_cgroup_setting; 10619 10620 ret = write(fd, "0\n", 2); 10621 if (ret == -1) 10622 perror("Can't update cgroup\n"); 10623 10624 close(fd); 10625 10626 skip_cgroup_setting: 10627 outf = stderr; 10628 cmdline(argc, argv); 10629 10630 if (!quiet) { 10631 print_version(); 10632 print_bootcmd(); 10633 } 10634 10635 probe_cpuidle_residency(); 10636 probe_cpuidle_counts(); 10637 10638 if (!getuid()) 10639 set_rlimit(); 10640 10641 turbostat_init(); 10642 10643 if (!no_msr) 10644 msr_sum_record(); 10645 10646 /* dump counters and exit */ 10647 if (dump_only) 10648 return get_and_dump_counters(); 10649 10650 /* list header and exit */ 10651 if (list_header_only) { 10652 print_header(","); 10653 flush_output_stdout(); 10654 return 0; 10655 } 10656 10657 /* 10658 * if any params left, it must be a command to fork 10659 */ 10660 if (argc - optind) 10661 return fork_it(argv + optind); 10662 else 10663 turbostat_loop(); 10664 10665 return 0; 10666 } 10667