1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * turbostat -- show CPU frequency and C-state residency 4 * on modern Intel and AMD processors. 5 * 6 * Copyright (c) 2025 Intel Corporation. 7 * Len Brown <len.brown@intel.com> 8 */ 9 10 #define _GNU_SOURCE 11 #include MSRHEADER 12 13 // copied from arch/x86/include/asm/cpu_device_id.h 14 #define VFM_MODEL_BIT 0 15 #define VFM_FAMILY_BIT 8 16 #define VFM_VENDOR_BIT 16 17 #define VFM_RSVD_BIT 24 18 19 #define VFM_MODEL_MASK GENMASK(VFM_FAMILY_BIT - 1, VFM_MODEL_BIT) 20 #define VFM_FAMILY_MASK GENMASK(VFM_VENDOR_BIT - 1, VFM_FAMILY_BIT) 21 #define VFM_VENDOR_MASK GENMASK(VFM_RSVD_BIT - 1, VFM_VENDOR_BIT) 22 23 #define VFM_MODEL(vfm) (((vfm) & VFM_MODEL_MASK) >> VFM_MODEL_BIT) 24 #define VFM_FAMILY(vfm) (((vfm) & VFM_FAMILY_MASK) >> VFM_FAMILY_BIT) 25 #define VFM_VENDOR(vfm) (((vfm) & VFM_VENDOR_MASK) >> VFM_VENDOR_BIT) 26 27 #define VFM_MAKE(_vendor, _family, _model) ( \ 28 ((_model) << VFM_MODEL_BIT) | \ 29 ((_family) << VFM_FAMILY_BIT) | \ 30 ((_vendor) << VFM_VENDOR_BIT) \ 31 ) 32 // end copied section 33 34 #define CPUID_LEAF_MODEL_ID 0x1A 35 #define CPUID_LEAF_MODEL_ID_CORE_TYPE_SHIFT 24 36 37 #define X86_VENDOR_INTEL 0 38 39 #include INTEL_FAMILY_HEADER 40 #include BUILD_BUG_HEADER 41 #include <stdarg.h> 42 #include <stdio.h> 43 #include <err.h> 44 #include <unistd.h> 45 #include <sys/types.h> 46 #include <sys/wait.h> 47 #include <sys/stat.h> 48 #include <sys/select.h> 49 #include <sys/resource.h> 50 #include <sys/mman.h> 51 #include <fcntl.h> 52 #include <signal.h> 53 #include <sys/time.h> 54 #include <stdlib.h> 55 #include <getopt.h> 56 #include <dirent.h> 57 #include <string.h> 58 #include <ctype.h> 59 #include <sched.h> 60 #include <time.h> 61 #include <cpuid.h> 62 #include <sys/capability.h> 63 #include <errno.h> 64 #include <math.h> 65 #include <linux/perf_event.h> 66 #include <asm/unistd.h> 67 #include <stdbool.h> 68 #include <assert.h> 69 #include <linux/kernel.h> 70 71 #define UNUSED(x) (void)(x) 72 73 /* 74 * This list matches the column headers, except 75 * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time 76 * 2. Core and CPU are moved to the end, we can't have strings that contain them 77 * matching on them for --show and --hide. 78 */ 79 80 /* 81 * buffer size used by sscanf() for added column names 82 * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters 83 */ 84 #define NAME_BYTES 20 85 #define PATH_BYTES 128 86 #define PERF_NAME_BYTES 128 87 88 #define MAX_NOFILE 0x8000 89 90 #define COUNTER_KIND_PERF_PREFIX "perf/" 91 #define COUNTER_KIND_PERF_PREFIX_LEN strlen(COUNTER_KIND_PERF_PREFIX) 92 #define PERF_DEV_NAME_BYTES 32 93 #define PERF_EVT_NAME_BYTES 32 94 95 #define INTEL_ECORE_TYPE 0x20 96 #define INTEL_PCORE_TYPE 0x40 97 98 #define ROUND_UP_TO_PAGE_SIZE(n) (((n) + 0x1000UL-1UL) & ~(0x1000UL-1UL)) 99 100 enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE }; 101 enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC, COUNTER_K2M }; 102 enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT, FORMAT_AVERAGE }; 103 enum counter_source { COUNTER_SOURCE_NONE, COUNTER_SOURCE_PERF, COUNTER_SOURCE_MSR }; 104 105 struct perf_counter_info { 106 struct perf_counter_info *next; 107 108 /* How to open the counter / What counter it is. */ 109 char device[PERF_DEV_NAME_BYTES]; 110 char event[PERF_EVT_NAME_BYTES]; 111 112 /* How to show/format the counter. */ 113 char name[PERF_NAME_BYTES]; 114 unsigned int width; 115 enum counter_scope scope; 116 enum counter_type type; 117 enum counter_format format; 118 double scale; 119 120 /* For reading the counter. */ 121 int *fd_perf_per_domain; 122 size_t num_domains; 123 }; 124 125 struct sysfs_path { 126 char path[PATH_BYTES]; 127 int id; 128 struct sysfs_path *next; 129 }; 130 131 struct msr_counter { 132 unsigned int msr_num; 133 char name[NAME_BYTES]; 134 struct sysfs_path *sp; 135 unsigned int width; 136 enum counter_type type; 137 enum counter_format format; 138 struct msr_counter *next; 139 unsigned int flags; 140 #define FLAGS_HIDE (1 << 0) 141 #define FLAGS_SHOW (1 << 1) 142 #define SYSFS_PERCPU (1 << 1) 143 }; 144 145 struct msr_counter bic[] = { 146 { 0x0, "usec", NULL, 0, 0, 0, NULL, 0 }, 147 { 0x0, "Time_Of_Day_Seconds", NULL, 0, 0, 0, NULL, 0 }, 148 { 0x0, "Package", NULL, 0, 0, 0, NULL, 0 }, 149 { 0x0, "Node", NULL, 0, 0, 0, NULL, 0 }, 150 { 0x0, "Avg_MHz", NULL, 0, 0, 0, NULL, 0 }, 151 { 0x0, "Busy%", NULL, 0, 0, 0, NULL, 0 }, 152 { 0x0, "Bzy_MHz", NULL, 0, 0, 0, NULL, 0 }, 153 { 0x0, "TSC_MHz", NULL, 0, 0, 0, NULL, 0 }, 154 { 0x0, "IRQ", NULL, 0, 0, 0, NULL, 0 }, 155 { 0x0, "SMI", NULL, 32, 0, FORMAT_DELTA, NULL, 0 }, 156 { 0x0, "sysfs", NULL, 0, 0, 0, NULL, 0 }, 157 { 0x0, "CPU%c1", NULL, 0, 0, 0, NULL, 0 }, 158 { 0x0, "CPU%c3", NULL, 0, 0, 0, NULL, 0 }, 159 { 0x0, "CPU%c6", NULL, 0, 0, 0, NULL, 0 }, 160 { 0x0, "CPU%c7", NULL, 0, 0, 0, NULL, 0 }, 161 { 0x0, "ThreadC", NULL, 0, 0, 0, NULL, 0 }, 162 { 0x0, "CoreTmp", NULL, 0, 0, 0, NULL, 0 }, 163 { 0x0, "CoreCnt", NULL, 0, 0, 0, NULL, 0 }, 164 { 0x0, "PkgTmp", NULL, 0, 0, 0, NULL, 0 }, 165 { 0x0, "GFX%rc6", NULL, 0, 0, 0, NULL, 0 }, 166 { 0x0, "GFXMHz", NULL, 0, 0, 0, NULL, 0 }, 167 { 0x0, "Pkg%pc2", NULL, 0, 0, 0, NULL, 0 }, 168 { 0x0, "Pkg%pc3", NULL, 0, 0, 0, NULL, 0 }, 169 { 0x0, "Pkg%pc6", NULL, 0, 0, 0, NULL, 0 }, 170 { 0x0, "Pkg%pc7", NULL, 0, 0, 0, NULL, 0 }, 171 { 0x0, "Pkg%pc8", NULL, 0, 0, 0, NULL, 0 }, 172 { 0x0, "Pkg%pc9", NULL, 0, 0, 0, NULL, 0 }, 173 { 0x0, "Pk%pc10", NULL, 0, 0, 0, NULL, 0 }, 174 { 0x0, "CPU%LPI", NULL, 0, 0, 0, NULL, 0 }, 175 { 0x0, "SYS%LPI", NULL, 0, 0, 0, NULL, 0 }, 176 { 0x0, "PkgWatt", NULL, 0, 0, 0, NULL, 0 }, 177 { 0x0, "CorWatt", NULL, 0, 0, 0, NULL, 0 }, 178 { 0x0, "GFXWatt", NULL, 0, 0, 0, NULL, 0 }, 179 { 0x0, "PkgCnt", NULL, 0, 0, 0, NULL, 0 }, 180 { 0x0, "RAMWatt", NULL, 0, 0, 0, NULL, 0 }, 181 { 0x0, "PKG_%", NULL, 0, 0, 0, NULL, 0 }, 182 { 0x0, "RAM_%", NULL, 0, 0, 0, NULL, 0 }, 183 { 0x0, "Pkg_J", NULL, 0, 0, 0, NULL, 0 }, 184 { 0x0, "Cor_J", NULL, 0, 0, 0, NULL, 0 }, 185 { 0x0, "GFX_J", NULL, 0, 0, 0, NULL, 0 }, 186 { 0x0, "RAM_J", NULL, 0, 0, 0, NULL, 0 }, 187 { 0x0, "Mod%c6", NULL, 0, 0, 0, NULL, 0 }, 188 { 0x0, "Totl%C0", NULL, 0, 0, 0, NULL, 0 }, 189 { 0x0, "Any%C0", NULL, 0, 0, 0, NULL, 0 }, 190 { 0x0, "GFX%C0", NULL, 0, 0, 0, NULL, 0 }, 191 { 0x0, "CPUGFX%", NULL, 0, 0, 0, NULL, 0 }, 192 { 0x0, "Core", NULL, 0, 0, 0, NULL, 0 }, 193 { 0x0, "CPU", NULL, 0, 0, 0, NULL, 0 }, 194 { 0x0, "APIC", NULL, 0, 0, 0, NULL, 0 }, 195 { 0x0, "X2APIC", NULL, 0, 0, 0, NULL, 0 }, 196 { 0x0, "Die", NULL, 0, 0, 0, NULL, 0 }, 197 { 0x0, "GFXAMHz", NULL, 0, 0, 0, NULL, 0 }, 198 { 0x0, "IPC", NULL, 0, 0, 0, NULL, 0 }, 199 { 0x0, "CoreThr", NULL, 0, 0, 0, NULL, 0 }, 200 { 0x0, "UncMHz", NULL, 0, 0, 0, NULL, 0 }, 201 { 0x0, "SAM%mc6", NULL, 0, 0, 0, NULL, 0 }, 202 { 0x0, "SAMMHz", NULL, 0, 0, 0, NULL, 0 }, 203 { 0x0, "SAMAMHz", NULL, 0, 0, 0, NULL, 0 }, 204 { 0x0, "Die%c6", NULL, 0, 0, 0, NULL, 0 }, 205 { 0x0, "SysWatt", NULL, 0, 0, 0, NULL, 0 }, 206 { 0x0, "Sys_J", NULL, 0, 0, 0, NULL, 0 }, 207 { 0x0, "NMI", NULL, 0, 0, 0, NULL, 0 }, 208 { 0x0, "CPU%c1e", NULL, 0, 0, 0, NULL, 0 }, 209 }; 210 211 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) 212 #define BIC_USEC (1ULL << 0) 213 #define BIC_TOD (1ULL << 1) 214 #define BIC_Package (1ULL << 2) 215 #define BIC_Node (1ULL << 3) 216 #define BIC_Avg_MHz (1ULL << 4) 217 #define BIC_Busy (1ULL << 5) 218 #define BIC_Bzy_MHz (1ULL << 6) 219 #define BIC_TSC_MHz (1ULL << 7) 220 #define BIC_IRQ (1ULL << 8) 221 #define BIC_SMI (1ULL << 9) 222 #define BIC_sysfs (1ULL << 10) 223 #define BIC_CPU_c1 (1ULL << 11) 224 #define BIC_CPU_c3 (1ULL << 12) 225 #define BIC_CPU_c6 (1ULL << 13) 226 #define BIC_CPU_c7 (1ULL << 14) 227 #define BIC_ThreadC (1ULL << 15) 228 #define BIC_CoreTmp (1ULL << 16) 229 #define BIC_CoreCnt (1ULL << 17) 230 #define BIC_PkgTmp (1ULL << 18) 231 #define BIC_GFX_rc6 (1ULL << 19) 232 #define BIC_GFXMHz (1ULL << 20) 233 #define BIC_Pkgpc2 (1ULL << 21) 234 #define BIC_Pkgpc3 (1ULL << 22) 235 #define BIC_Pkgpc6 (1ULL << 23) 236 #define BIC_Pkgpc7 (1ULL << 24) 237 #define BIC_Pkgpc8 (1ULL << 25) 238 #define BIC_Pkgpc9 (1ULL << 26) 239 #define BIC_Pkgpc10 (1ULL << 27) 240 #define BIC_CPU_LPI (1ULL << 28) 241 #define BIC_SYS_LPI (1ULL << 29) 242 #define BIC_PkgWatt (1ULL << 30) 243 #define BIC_CorWatt (1ULL << 31) 244 #define BIC_GFXWatt (1ULL << 32) 245 #define BIC_PkgCnt (1ULL << 33) 246 #define BIC_RAMWatt (1ULL << 34) 247 #define BIC_PKG__ (1ULL << 35) 248 #define BIC_RAM__ (1ULL << 36) 249 #define BIC_Pkg_J (1ULL << 37) 250 #define BIC_Cor_J (1ULL << 38) 251 #define BIC_GFX_J (1ULL << 39) 252 #define BIC_RAM_J (1ULL << 40) 253 #define BIC_Mod_c6 (1ULL << 41) 254 #define BIC_Totl_c0 (1ULL << 42) 255 #define BIC_Any_c0 (1ULL << 43) 256 #define BIC_GFX_c0 (1ULL << 44) 257 #define BIC_CPUGFX (1ULL << 45) 258 #define BIC_Core (1ULL << 46) 259 #define BIC_CPU (1ULL << 47) 260 #define BIC_APIC (1ULL << 48) 261 #define BIC_X2APIC (1ULL << 49) 262 #define BIC_Die (1ULL << 50) 263 #define BIC_GFXACTMHz (1ULL << 51) 264 #define BIC_IPC (1ULL << 52) 265 #define BIC_CORE_THROT_CNT (1ULL << 53) 266 #define BIC_UNCORE_MHZ (1ULL << 54) 267 #define BIC_SAM_mc6 (1ULL << 55) 268 #define BIC_SAMMHz (1ULL << 56) 269 #define BIC_SAMACTMHz (1ULL << 57) 270 #define BIC_Diec6 (1ULL << 58) 271 #define BIC_SysWatt (1ULL << 59) 272 #define BIC_Sys_J (1ULL << 60) 273 #define BIC_NMI (1ULL << 61) 274 #define BIC_CPU_c1e (1ULL << 62) 275 276 #define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die) 277 #define BIC_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt) 278 #define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ) 279 #define BIC_IDLE (BIC_Busy | BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) 280 #define BIC_OTHER (BIC_IRQ | BIC_NMI | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) 281 282 #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) 283 284 unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT); 285 unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC; 286 287 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME) 288 #define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME) 289 #define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME) 290 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT) 291 #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT) 292 #define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT) 293 294 /* 295 * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit: 296 * If you change the values, note they are used both in comparisons 297 * (>= PCL__7) and to index pkg_cstate_limit_strings[]. 298 */ 299 #define PCLUKN 0 /* Unknown */ 300 #define PCLRSV 1 /* Reserved */ 301 #define PCL__0 2 /* PC0 */ 302 #define PCL__1 3 /* PC1 */ 303 #define PCL__2 4 /* PC2 */ 304 #define PCL__3 5 /* PC3 */ 305 #define PCL__4 6 /* PC4 */ 306 #define PCL__6 7 /* PC6 */ 307 #define PCL_6N 8 /* PC6 No Retention */ 308 #define PCL_6R 9 /* PC6 Retention */ 309 #define PCL__7 10 /* PC7 */ 310 #define PCL_7S 11 /* PC7 Shrink */ 311 #define PCL__8 12 /* PC8 */ 312 #define PCL__9 13 /* PC9 */ 313 #define PCL_10 14 /* PC10 */ 314 #define PCLUNL 15 /* Unlimited */ 315 316 struct amperf_group_fd; 317 318 char *proc_stat = "/proc/stat"; 319 FILE *outf; 320 int *fd_percpu; 321 int *fd_instr_count_percpu; 322 struct timeval interval_tv = { 5, 0 }; 323 struct timespec interval_ts = { 5, 0 }; 324 325 unsigned int num_iterations; 326 unsigned int header_iterations; 327 unsigned int debug; 328 unsigned int quiet; 329 unsigned int shown; 330 unsigned int sums_need_wide_columns; 331 unsigned int rapl_joules; 332 unsigned int summary_only; 333 unsigned int list_header_only; 334 unsigned int dump_only; 335 unsigned int force_load; 336 unsigned int has_aperf; 337 unsigned int has_aperf_access; 338 unsigned int has_epb; 339 unsigned int has_turbo; 340 unsigned int is_hybrid; 341 unsigned int units = 1000000; /* MHz etc */ 342 unsigned int genuine_intel; 343 unsigned int authentic_amd; 344 unsigned int hygon_genuine; 345 unsigned int max_level, max_extended_level; 346 unsigned int has_invariant_tsc; 347 unsigned int aperf_mperf_multiplier = 1; 348 double bclk; 349 double base_hz; 350 unsigned int has_base_hz; 351 double tsc_tweak = 1.0; 352 unsigned int show_pkg_only; 353 unsigned int show_core_only; 354 char *output_buffer, *outp; 355 unsigned int do_dts; 356 unsigned int do_ptm; 357 unsigned int do_ipc; 358 unsigned long long cpuidle_cur_cpu_lpi_us; 359 unsigned long long cpuidle_cur_sys_lpi_us; 360 unsigned int tj_max; 361 unsigned int tj_max_override; 362 double rapl_power_units, rapl_time_units; 363 double rapl_dram_energy_units, rapl_energy_units, rapl_psys_energy_units; 364 double rapl_joule_counter_range; 365 unsigned int crystal_hz; 366 unsigned long long tsc_hz; 367 int base_cpu; 368 unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */ 369 /* IA32_HWP_REQUEST, IA32_HWP_STATUS */ 370 unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */ 371 unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */ 372 unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */ 373 unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */ 374 unsigned int first_counter_read = 1; 375 376 static struct timeval procsysfs_tv_begin; 377 378 int ignore_stdin; 379 bool no_msr; 380 bool no_perf; 381 382 enum gfx_sysfs_idx { 383 GFX_rc6, 384 GFX_MHz, 385 GFX_ACTMHz, 386 SAM_mc6, 387 SAM_MHz, 388 SAM_ACTMHz, 389 GFX_MAX 390 }; 391 392 struct gfx_sysfs_info { 393 FILE *fp; 394 unsigned int val; 395 unsigned long long val_ull; 396 }; 397 398 static struct gfx_sysfs_info gfx_info[GFX_MAX]; 399 400 int get_msr(int cpu, off_t offset, unsigned long long *msr); 401 int add_counter(unsigned int msr_num, char *path, char *name, 402 unsigned int width, enum counter_scope scope, 403 enum counter_type type, enum counter_format format, int flags, int package_num); 404 405 /* Model specific support Start */ 406 407 /* List of features that may diverge among different platforms */ 408 struct platform_features { 409 bool has_msr_misc_feature_control; /* MSR_MISC_FEATURE_CONTROL */ 410 bool has_msr_misc_pwr_mgmt; /* MSR_MISC_PWR_MGMT */ 411 bool has_nhm_msrs; /* MSR_PLATFORM_INFO, MSR_IA32_TEMPERATURE_TARGET, MSR_SMI_COUNT, MSR_PKG_CST_CONFIG_CONTROL, MSR_IA32_POWER_CTL, TRL MSRs */ 412 bool has_config_tdp; /* MSR_CONFIG_TDP_NOMINAL/LEVEL_1/LEVEL_2/CONTROL, MSR_TURBO_ACTIVATION_RATIO */ 413 int bclk_freq; /* CPU base clock */ 414 int crystal_freq; /* Crystal clock to use when not available from CPUID.15 */ 415 int supported_cstates; /* Core cstates and Package cstates supported */ 416 int cst_limit; /* MSR_PKG_CST_CONFIG_CONTROL */ 417 bool has_cst_auto_convension; /* AUTOMATIC_CSTATE_CONVERSION bit in MSR_PKG_CST_CONFIG_CONTROL */ 418 bool has_irtl_msrs; /* MSR_PKGC3/PKGC6/PKGC7/PKGC8/PKGC9/PKGC10_IRTL */ 419 bool has_msr_core_c1_res; /* MSR_CORE_C1_RES */ 420 bool has_msr_module_c6_res_ms; /* MSR_MODULE_C6_RES_MS */ 421 bool has_msr_c6_demotion_policy_config; /* MSR_CC6_DEMOTION_POLICY_CONFIG/MSR_MC6_DEMOTION_POLICY_CONFIG */ 422 bool has_msr_atom_pkg_c6_residency; /* MSR_ATOM_PKG_C6_RESIDENCY */ 423 bool has_msr_knl_core_c6_residency; /* MSR_KNL_CORE_C6_RESIDENCY */ 424 bool has_ext_cst_msrs; /* MSR_PKG_WEIGHTED_CORE_C0_RES/MSR_PKG_ANY_CORE_C0_RES/MSR_PKG_ANY_GFXE_C0_RES/MSR_PKG_BOTH_CORE_GFXE_C0_RES */ 425 bool has_cst_prewake_bit; /* Cstate prewake bit in MSR_IA32_POWER_CTL */ 426 int trl_msrs; /* MSR_TURBO_RATIO_LIMIT/LIMIT1/LIMIT2/SECONDARY, Atom TRL MSRs */ 427 int plr_msrs; /* MSR_CORE/GFX/RING_PERF_LIMIT_REASONS */ 428 int rapl_msrs; /* RAPL PKG/DRAM/CORE/GFX MSRs, AMD RAPL MSRs */ 429 bool has_per_core_rapl; /* Indicates cores energy collection is per-core, not per-package. AMD specific for now */ 430 bool has_rapl_divisor; /* Divisor for Energy unit raw value from MSR_RAPL_POWER_UNIT */ 431 bool has_fixed_rapl_unit; /* Fixed Energy Unit used for DRAM RAPL Domain */ 432 bool has_fixed_rapl_psys_unit; /* Fixed Energy Unit used for PSYS RAPL Domain */ 433 int rapl_quirk_tdp; /* Hardcoded TDP value when cannot be retrieved from hardware */ 434 int tcc_offset_bits; /* TCC Offset bits in MSR_IA32_TEMPERATURE_TARGET */ 435 bool enable_tsc_tweak; /* Use CPU Base freq instead of TSC freq for aperf/mperf counter */ 436 bool need_perf_multiplier; /* mperf/aperf multiplier */ 437 }; 438 439 struct platform_data { 440 unsigned int vfm; 441 const struct platform_features *features; 442 }; 443 444 /* For BCLK */ 445 enum bclk_freq { 446 BCLK_100MHZ = 1, 447 BCLK_133MHZ, 448 BCLK_SLV, 449 }; 450 451 #define SLM_BCLK_FREQS 5 452 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 }; 453 454 double slm_bclk(void) 455 { 456 unsigned long long msr = 3; 457 unsigned int i; 458 double freq; 459 460 if (get_msr(base_cpu, MSR_FSB_FREQ, &msr)) 461 fprintf(outf, "SLM BCLK: unknown\n"); 462 463 i = msr & 0xf; 464 if (i >= SLM_BCLK_FREQS) { 465 fprintf(outf, "SLM BCLK[%d] invalid\n", i); 466 i = 3; 467 } 468 freq = slm_freq_table[i]; 469 470 if (!quiet) 471 fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq); 472 473 return freq; 474 } 475 476 /* For Package cstate limit */ 477 enum package_cstate_limit { 478 CST_LIMIT_NHM = 1, 479 CST_LIMIT_SNB, 480 CST_LIMIT_HSW, 481 CST_LIMIT_SKX, 482 CST_LIMIT_ICX, 483 CST_LIMIT_SLV, 484 CST_LIMIT_AMT, 485 CST_LIMIT_KNL, 486 CST_LIMIT_GMT, 487 }; 488 489 /* For Turbo Ratio Limit MSRs */ 490 enum turbo_ratio_limit_msrs { 491 TRL_BASE = BIT(0), 492 TRL_LIMIT1 = BIT(1), 493 TRL_LIMIT2 = BIT(2), 494 TRL_ATOM = BIT(3), 495 TRL_KNL = BIT(4), 496 TRL_CORECOUNT = BIT(5), 497 }; 498 499 /* For Perf Limit Reason MSRs */ 500 enum perf_limit_reason_msrs { 501 PLR_CORE = BIT(0), 502 PLR_GFX = BIT(1), 503 PLR_RING = BIT(2), 504 }; 505 506 /* For RAPL MSRs */ 507 enum rapl_msrs { 508 RAPL_PKG_POWER_LIMIT = BIT(0), /* 0x610 MSR_PKG_POWER_LIMIT */ 509 RAPL_PKG_ENERGY_STATUS = BIT(1), /* 0x611 MSR_PKG_ENERGY_STATUS */ 510 RAPL_PKG_PERF_STATUS = BIT(2), /* 0x613 MSR_PKG_PERF_STATUS */ 511 RAPL_PKG_POWER_INFO = BIT(3), /* 0x614 MSR_PKG_POWER_INFO */ 512 RAPL_DRAM_POWER_LIMIT = BIT(4), /* 0x618 MSR_DRAM_POWER_LIMIT */ 513 RAPL_DRAM_ENERGY_STATUS = BIT(5), /* 0x619 MSR_DRAM_ENERGY_STATUS */ 514 RAPL_DRAM_PERF_STATUS = BIT(6), /* 0x61b MSR_DRAM_PERF_STATUS */ 515 RAPL_DRAM_POWER_INFO = BIT(7), /* 0x61c MSR_DRAM_POWER_INFO */ 516 RAPL_CORE_POWER_LIMIT = BIT(8), /* 0x638 MSR_PP0_POWER_LIMIT */ 517 RAPL_CORE_ENERGY_STATUS = BIT(9), /* 0x639 MSR_PP0_ENERGY_STATUS */ 518 RAPL_CORE_POLICY = BIT(10), /* 0x63a MSR_PP0_POLICY */ 519 RAPL_GFX_POWER_LIMIT = BIT(11), /* 0x640 MSR_PP1_POWER_LIMIT */ 520 RAPL_GFX_ENERGY_STATUS = BIT(12), /* 0x641 MSR_PP1_ENERGY_STATUS */ 521 RAPL_GFX_POLICY = BIT(13), /* 0x642 MSR_PP1_POLICY */ 522 RAPL_AMD_PWR_UNIT = BIT(14), /* 0xc0010299 MSR_AMD_RAPL_POWER_UNIT */ 523 RAPL_AMD_CORE_ENERGY_STAT = BIT(15), /* 0xc001029a MSR_AMD_CORE_ENERGY_STATUS */ 524 RAPL_AMD_PKG_ENERGY_STAT = BIT(16), /* 0xc001029b MSR_AMD_PKG_ENERGY_STATUS */ 525 RAPL_PLATFORM_ENERGY_LIMIT = BIT(17), /* 0x64c MSR_PLATFORM_ENERGY_LIMIT */ 526 RAPL_PLATFORM_ENERGY_STATUS = BIT(18), /* 0x64d MSR_PLATFORM_ENERGY_STATUS */ 527 }; 528 529 #define RAPL_PKG (RAPL_PKG_ENERGY_STATUS | RAPL_PKG_POWER_LIMIT) 530 #define RAPL_DRAM (RAPL_DRAM_ENERGY_STATUS | RAPL_DRAM_POWER_LIMIT) 531 #define RAPL_CORE (RAPL_CORE_ENERGY_STATUS | RAPL_CORE_POWER_LIMIT) 532 #define RAPL_GFX (RAPL_GFX_POWER_LIMIT | RAPL_GFX_ENERGY_STATUS) 533 #define RAPL_PSYS (RAPL_PLATFORM_ENERGY_STATUS | RAPL_PLATFORM_ENERGY_LIMIT) 534 535 #define RAPL_PKG_ALL (RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO) 536 #define RAPL_DRAM_ALL (RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_DRAM_POWER_INFO) 537 #define RAPL_CORE_ALL (RAPL_CORE | RAPL_CORE_POLICY) 538 #define RAPL_GFX_ALL (RAPL_GFX | RAPL_GFX_POLIGY) 539 540 #define RAPL_AMD_F17H (RAPL_AMD_PWR_UNIT | RAPL_AMD_CORE_ENERGY_STAT | RAPL_AMD_PKG_ENERGY_STAT) 541 542 /* For Cstates */ 543 enum cstates { 544 CC1 = BIT(0), 545 CC3 = BIT(1), 546 CC6 = BIT(2), 547 CC7 = BIT(3), 548 PC2 = BIT(4), 549 PC3 = BIT(5), 550 PC6 = BIT(6), 551 PC7 = BIT(7), 552 PC8 = BIT(8), 553 PC9 = BIT(9), 554 PC10 = BIT(10), 555 }; 556 557 static const struct platform_features nhm_features = { 558 .has_msr_misc_pwr_mgmt = 1, 559 .has_nhm_msrs = 1, 560 .bclk_freq = BCLK_133MHZ, 561 .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6, 562 .cst_limit = CST_LIMIT_NHM, 563 .trl_msrs = TRL_BASE, 564 }; 565 566 static const struct platform_features nhx_features = { 567 .has_msr_misc_pwr_mgmt = 1, 568 .has_nhm_msrs = 1, 569 .bclk_freq = BCLK_133MHZ, 570 .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6, 571 .cst_limit = CST_LIMIT_NHM, 572 }; 573 574 static const struct platform_features snb_features = { 575 .has_msr_misc_feature_control = 1, 576 .has_msr_misc_pwr_mgmt = 1, 577 .has_nhm_msrs = 1, 578 .bclk_freq = BCLK_100MHZ, 579 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 580 .cst_limit = CST_LIMIT_SNB, 581 .has_irtl_msrs = 1, 582 .trl_msrs = TRL_BASE, 583 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 584 }; 585 586 static const struct platform_features snx_features = { 587 .has_msr_misc_feature_control = 1, 588 .has_msr_misc_pwr_mgmt = 1, 589 .has_nhm_msrs = 1, 590 .bclk_freq = BCLK_100MHZ, 591 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 592 .cst_limit = CST_LIMIT_SNB, 593 .has_irtl_msrs = 1, 594 .trl_msrs = TRL_BASE, 595 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL, 596 }; 597 598 static const struct platform_features ivb_features = { 599 .has_msr_misc_feature_control = 1, 600 .has_msr_misc_pwr_mgmt = 1, 601 .has_nhm_msrs = 1, 602 .has_config_tdp = 1, 603 .bclk_freq = BCLK_100MHZ, 604 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 605 .cst_limit = CST_LIMIT_SNB, 606 .has_irtl_msrs = 1, 607 .trl_msrs = TRL_BASE, 608 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 609 }; 610 611 static const struct platform_features ivx_features = { 612 .has_msr_misc_feature_control = 1, 613 .has_msr_misc_pwr_mgmt = 1, 614 .has_nhm_msrs = 1, 615 .bclk_freq = BCLK_100MHZ, 616 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 617 .cst_limit = CST_LIMIT_SNB, 618 .has_irtl_msrs = 1, 619 .trl_msrs = TRL_BASE | TRL_LIMIT1, 620 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL, 621 }; 622 623 static const struct platform_features hsw_features = { 624 .has_msr_misc_feature_control = 1, 625 .has_msr_misc_pwr_mgmt = 1, 626 .has_nhm_msrs = 1, 627 .has_config_tdp = 1, 628 .bclk_freq = BCLK_100MHZ, 629 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 630 .cst_limit = CST_LIMIT_HSW, 631 .has_irtl_msrs = 1, 632 .trl_msrs = TRL_BASE, 633 .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, 634 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 635 }; 636 637 static const struct platform_features hsx_features = { 638 .has_msr_misc_feature_control = 1, 639 .has_msr_misc_pwr_mgmt = 1, 640 .has_nhm_msrs = 1, 641 .has_config_tdp = 1, 642 .bclk_freq = BCLK_100MHZ, 643 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 644 .cst_limit = CST_LIMIT_HSW, 645 .has_irtl_msrs = 1, 646 .trl_msrs = TRL_BASE | TRL_LIMIT1 | TRL_LIMIT2, 647 .plr_msrs = PLR_CORE | PLR_RING, 648 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 649 .has_fixed_rapl_unit = 1, 650 }; 651 652 static const struct platform_features hswl_features = { 653 .has_msr_misc_feature_control = 1, 654 .has_msr_misc_pwr_mgmt = 1, 655 .has_nhm_msrs = 1, 656 .has_config_tdp = 1, 657 .bclk_freq = BCLK_100MHZ, 658 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 659 .cst_limit = CST_LIMIT_HSW, 660 .has_irtl_msrs = 1, 661 .trl_msrs = TRL_BASE, 662 .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, 663 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 664 }; 665 666 static const struct platform_features hswg_features = { 667 .has_msr_misc_feature_control = 1, 668 .has_msr_misc_pwr_mgmt = 1, 669 .has_nhm_msrs = 1, 670 .has_config_tdp = 1, 671 .bclk_freq = BCLK_100MHZ, 672 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 673 .cst_limit = CST_LIMIT_HSW, 674 .has_irtl_msrs = 1, 675 .trl_msrs = TRL_BASE, 676 .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, 677 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 678 }; 679 680 static const struct platform_features bdw_features = { 681 .has_msr_misc_feature_control = 1, 682 .has_msr_misc_pwr_mgmt = 1, 683 .has_nhm_msrs = 1, 684 .has_config_tdp = 1, 685 .bclk_freq = BCLK_100MHZ, 686 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 687 .cst_limit = CST_LIMIT_HSW, 688 .has_irtl_msrs = 1, 689 .trl_msrs = TRL_BASE, 690 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 691 }; 692 693 static const struct platform_features bdwg_features = { 694 .has_msr_misc_feature_control = 1, 695 .has_msr_misc_pwr_mgmt = 1, 696 .has_nhm_msrs = 1, 697 .has_config_tdp = 1, 698 .bclk_freq = BCLK_100MHZ, 699 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7, 700 .cst_limit = CST_LIMIT_HSW, 701 .has_irtl_msrs = 1, 702 .trl_msrs = TRL_BASE, 703 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, 704 }; 705 706 static const struct platform_features bdx_features = { 707 .has_msr_misc_feature_control = 1, 708 .has_msr_misc_pwr_mgmt = 1, 709 .has_nhm_msrs = 1, 710 .has_config_tdp = 1, 711 .bclk_freq = BCLK_100MHZ, 712 .supported_cstates = CC1 | CC3 | CC6 | PC2 | PC3 | PC6, 713 .cst_limit = CST_LIMIT_HSW, 714 .has_irtl_msrs = 1, 715 .has_cst_auto_convension = 1, 716 .trl_msrs = TRL_BASE, 717 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 718 .has_fixed_rapl_unit = 1, 719 }; 720 721 static const struct platform_features skl_features = { 722 .has_msr_misc_feature_control = 1, 723 .has_msr_misc_pwr_mgmt = 1, 724 .has_nhm_msrs = 1, 725 .has_config_tdp = 1, 726 .bclk_freq = BCLK_100MHZ, 727 .crystal_freq = 24000000, 728 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 729 .cst_limit = CST_LIMIT_HSW, 730 .has_irtl_msrs = 1, 731 .has_ext_cst_msrs = 1, 732 .trl_msrs = TRL_BASE, 733 .tcc_offset_bits = 6, 734 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS, 735 .enable_tsc_tweak = 1, 736 }; 737 738 static const struct platform_features cnl_features = { 739 .has_msr_misc_feature_control = 1, 740 .has_msr_misc_pwr_mgmt = 1, 741 .has_nhm_msrs = 1, 742 .has_config_tdp = 1, 743 .bclk_freq = BCLK_100MHZ, 744 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 745 .cst_limit = CST_LIMIT_HSW, 746 .has_irtl_msrs = 1, 747 .has_msr_core_c1_res = 1, 748 .has_ext_cst_msrs = 1, 749 .trl_msrs = TRL_BASE, 750 .tcc_offset_bits = 6, 751 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS, 752 .enable_tsc_tweak = 1, 753 }; 754 755 /* Copied from cnl_features, with PC7/PC9 removed */ 756 static const struct platform_features adl_features = { 757 .has_msr_misc_feature_control = cnl_features.has_msr_misc_feature_control, 758 .has_msr_misc_pwr_mgmt = cnl_features.has_msr_misc_pwr_mgmt, 759 .has_nhm_msrs = cnl_features.has_nhm_msrs, 760 .has_config_tdp = cnl_features.has_config_tdp, 761 .bclk_freq = cnl_features.bclk_freq, 762 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC8 | PC10, 763 .cst_limit = cnl_features.cst_limit, 764 .has_irtl_msrs = cnl_features.has_irtl_msrs, 765 .has_msr_core_c1_res = cnl_features.has_msr_core_c1_res, 766 .has_ext_cst_msrs = cnl_features.has_ext_cst_msrs, 767 .trl_msrs = cnl_features.trl_msrs, 768 .tcc_offset_bits = cnl_features.tcc_offset_bits, 769 .rapl_msrs = cnl_features.rapl_msrs, 770 .enable_tsc_tweak = cnl_features.enable_tsc_tweak, 771 }; 772 773 /* Copied from adl_features, with PC3/PC8 removed */ 774 static const struct platform_features lnl_features = { 775 .has_msr_misc_feature_control = adl_features.has_msr_misc_feature_control, 776 .has_msr_misc_pwr_mgmt = adl_features.has_msr_misc_pwr_mgmt, 777 .has_nhm_msrs = adl_features.has_nhm_msrs, 778 .has_config_tdp = adl_features.has_config_tdp, 779 .bclk_freq = adl_features.bclk_freq, 780 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC6 | PC10, 781 .cst_limit = adl_features.cst_limit, 782 .has_irtl_msrs = adl_features.has_irtl_msrs, 783 .has_msr_core_c1_res = adl_features.has_msr_core_c1_res, 784 .has_ext_cst_msrs = adl_features.has_ext_cst_msrs, 785 .trl_msrs = adl_features.trl_msrs, 786 .tcc_offset_bits = adl_features.tcc_offset_bits, 787 .rapl_msrs = adl_features.rapl_msrs, 788 .enable_tsc_tweak = adl_features.enable_tsc_tweak, 789 }; 790 791 static const struct platform_features skx_features = { 792 .has_msr_misc_feature_control = 1, 793 .has_msr_misc_pwr_mgmt = 1, 794 .has_nhm_msrs = 1, 795 .has_config_tdp = 1, 796 .bclk_freq = BCLK_100MHZ, 797 .supported_cstates = CC1 | CC6 | PC2 | PC6, 798 .cst_limit = CST_LIMIT_SKX, 799 .has_irtl_msrs = 1, 800 .has_cst_auto_convension = 1, 801 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 802 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 803 .has_fixed_rapl_unit = 1, 804 }; 805 806 static const struct platform_features icx_features = { 807 .has_msr_misc_feature_control = 1, 808 .has_msr_misc_pwr_mgmt = 1, 809 .has_nhm_msrs = 1, 810 .has_config_tdp = 1, 811 .bclk_freq = BCLK_100MHZ, 812 .supported_cstates = CC1 | CC6 | PC2 | PC6, 813 .cst_limit = CST_LIMIT_ICX, 814 .has_msr_core_c1_res = 1, 815 .has_irtl_msrs = 1, 816 .has_cst_prewake_bit = 1, 817 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 818 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, 819 .has_fixed_rapl_unit = 1, 820 }; 821 822 static const struct platform_features spr_features = { 823 .has_msr_misc_feature_control = 1, 824 .has_msr_misc_pwr_mgmt = 1, 825 .has_nhm_msrs = 1, 826 .has_config_tdp = 1, 827 .bclk_freq = BCLK_100MHZ, 828 .supported_cstates = CC1 | CC6 | PC2 | PC6, 829 .cst_limit = CST_LIMIT_SKX, 830 .has_msr_core_c1_res = 1, 831 .has_irtl_msrs = 1, 832 .has_cst_prewake_bit = 1, 833 .has_fixed_rapl_psys_unit = 1, 834 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 835 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, 836 }; 837 838 static const struct platform_features srf_features = { 839 .has_msr_misc_feature_control = 1, 840 .has_msr_misc_pwr_mgmt = 1, 841 .has_nhm_msrs = 1, 842 .has_config_tdp = 1, 843 .bclk_freq = BCLK_100MHZ, 844 .supported_cstates = CC1 | CC6 | PC2 | PC6, 845 .cst_limit = CST_LIMIT_SKX, 846 .has_msr_core_c1_res = 1, 847 .has_msr_module_c6_res_ms = 1, 848 .has_irtl_msrs = 1, 849 .has_cst_prewake_bit = 1, 850 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 851 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, 852 }; 853 854 static const struct platform_features grr_features = { 855 .has_msr_misc_feature_control = 1, 856 .has_msr_misc_pwr_mgmt = 1, 857 .has_nhm_msrs = 1, 858 .has_config_tdp = 1, 859 .bclk_freq = BCLK_100MHZ, 860 .supported_cstates = CC1 | CC6, 861 .cst_limit = CST_LIMIT_SKX, 862 .has_msr_core_c1_res = 1, 863 .has_msr_module_c6_res_ms = 1, 864 .has_irtl_msrs = 1, 865 .has_cst_prewake_bit = 1, 866 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 867 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, 868 }; 869 870 static const struct platform_features slv_features = { 871 .has_nhm_msrs = 1, 872 .bclk_freq = BCLK_SLV, 873 .supported_cstates = CC1 | CC6 | PC6, 874 .cst_limit = CST_LIMIT_SLV, 875 .has_msr_core_c1_res = 1, 876 .has_msr_module_c6_res_ms = 1, 877 .has_msr_c6_demotion_policy_config = 1, 878 .has_msr_atom_pkg_c6_residency = 1, 879 .trl_msrs = TRL_ATOM, 880 .rapl_msrs = RAPL_PKG | RAPL_CORE, 881 .has_rapl_divisor = 1, 882 .rapl_quirk_tdp = 30, 883 }; 884 885 static const struct platform_features slvd_features = { 886 .has_msr_misc_pwr_mgmt = 1, 887 .has_nhm_msrs = 1, 888 .bclk_freq = BCLK_SLV, 889 .supported_cstates = CC1 | CC6 | PC3 | PC6, 890 .cst_limit = CST_LIMIT_SLV, 891 .has_msr_atom_pkg_c6_residency = 1, 892 .trl_msrs = TRL_BASE, 893 .rapl_msrs = RAPL_PKG | RAPL_CORE, 894 .rapl_quirk_tdp = 30, 895 }; 896 897 static const struct platform_features amt_features = { 898 .has_nhm_msrs = 1, 899 .bclk_freq = BCLK_133MHZ, 900 .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6, 901 .cst_limit = CST_LIMIT_AMT, 902 .trl_msrs = TRL_BASE, 903 }; 904 905 static const struct platform_features gmt_features = { 906 .has_msr_misc_pwr_mgmt = 1, 907 .has_nhm_msrs = 1, 908 .bclk_freq = BCLK_100MHZ, 909 .crystal_freq = 19200000, 910 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 911 .cst_limit = CST_LIMIT_GMT, 912 .has_irtl_msrs = 1, 913 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 914 .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO, 915 }; 916 917 static const struct platform_features gmtd_features = { 918 .has_msr_misc_pwr_mgmt = 1, 919 .has_nhm_msrs = 1, 920 .bclk_freq = BCLK_100MHZ, 921 .crystal_freq = 25000000, 922 .supported_cstates = CC1 | CC6 | PC2 | PC6, 923 .cst_limit = CST_LIMIT_GMT, 924 .has_irtl_msrs = 1, 925 .has_msr_core_c1_res = 1, 926 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 927 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_CORE_ENERGY_STATUS, 928 }; 929 930 static const struct platform_features gmtp_features = { 931 .has_msr_misc_pwr_mgmt = 1, 932 .has_nhm_msrs = 1, 933 .bclk_freq = BCLK_100MHZ, 934 .crystal_freq = 19200000, 935 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 936 .cst_limit = CST_LIMIT_GMT, 937 .has_irtl_msrs = 1, 938 .trl_msrs = TRL_BASE, 939 .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO, 940 }; 941 942 static const struct platform_features tmt_features = { 943 .has_msr_misc_pwr_mgmt = 1, 944 .has_nhm_msrs = 1, 945 .bclk_freq = BCLK_100MHZ, 946 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10, 947 .cst_limit = CST_LIMIT_GMT, 948 .has_irtl_msrs = 1, 949 .trl_msrs = TRL_BASE, 950 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, 951 .enable_tsc_tweak = 1, 952 }; 953 954 static const struct platform_features tmtd_features = { 955 .has_msr_misc_pwr_mgmt = 1, 956 .has_nhm_msrs = 1, 957 .bclk_freq = BCLK_100MHZ, 958 .supported_cstates = CC1 | CC6, 959 .cst_limit = CST_LIMIT_GMT, 960 .has_irtl_msrs = 1, 961 .trl_msrs = TRL_BASE | TRL_CORECOUNT, 962 .rapl_msrs = RAPL_PKG_ALL, 963 }; 964 965 static const struct platform_features knl_features = { 966 .has_msr_misc_pwr_mgmt = 1, 967 .has_nhm_msrs = 1, 968 .has_config_tdp = 1, 969 .bclk_freq = BCLK_100MHZ, 970 .supported_cstates = CC1 | CC6 | PC3 | PC6, 971 .cst_limit = CST_LIMIT_KNL, 972 .has_msr_knl_core_c6_residency = 1, 973 .trl_msrs = TRL_KNL, 974 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, 975 .has_fixed_rapl_unit = 1, 976 .need_perf_multiplier = 1, 977 }; 978 979 static const struct platform_features default_features = { 980 }; 981 982 static const struct platform_features amd_features_with_rapl = { 983 .rapl_msrs = RAPL_AMD_F17H, 984 .has_per_core_rapl = 1, 985 .rapl_quirk_tdp = 280, /* This is the max stock TDP of HEDT/Server Fam17h+ chips */ 986 }; 987 988 static const struct platform_data turbostat_pdata[] = { 989 { INTEL_NEHALEM, &nhm_features }, 990 { INTEL_NEHALEM_G, &nhm_features }, 991 { INTEL_NEHALEM_EP, &nhm_features }, 992 { INTEL_NEHALEM_EX, &nhx_features }, 993 { INTEL_WESTMERE, &nhm_features }, 994 { INTEL_WESTMERE_EP, &nhm_features }, 995 { INTEL_WESTMERE_EX, &nhx_features }, 996 { INTEL_SANDYBRIDGE, &snb_features }, 997 { INTEL_SANDYBRIDGE_X, &snx_features }, 998 { INTEL_IVYBRIDGE, &ivb_features }, 999 { INTEL_IVYBRIDGE_X, &ivx_features }, 1000 { INTEL_HASWELL, &hsw_features }, 1001 { INTEL_HASWELL_X, &hsx_features }, 1002 { INTEL_HASWELL_L, &hswl_features }, 1003 { INTEL_HASWELL_G, &hswg_features }, 1004 { INTEL_BROADWELL, &bdw_features }, 1005 { INTEL_BROADWELL_G, &bdwg_features }, 1006 { INTEL_BROADWELL_X, &bdx_features }, 1007 { INTEL_BROADWELL_D, &bdx_features }, 1008 { INTEL_SKYLAKE_L, &skl_features }, 1009 { INTEL_SKYLAKE, &skl_features }, 1010 { INTEL_SKYLAKE_X, &skx_features }, 1011 { INTEL_KABYLAKE_L, &skl_features }, 1012 { INTEL_KABYLAKE, &skl_features }, 1013 { INTEL_COMETLAKE, &skl_features }, 1014 { INTEL_COMETLAKE_L, &skl_features }, 1015 { INTEL_CANNONLAKE_L, &cnl_features }, 1016 { INTEL_ICELAKE_X, &icx_features }, 1017 { INTEL_ICELAKE_D, &icx_features }, 1018 { INTEL_ICELAKE_L, &cnl_features }, 1019 { INTEL_ICELAKE_NNPI, &cnl_features }, 1020 { INTEL_ROCKETLAKE, &cnl_features }, 1021 { INTEL_TIGERLAKE_L, &cnl_features }, 1022 { INTEL_TIGERLAKE, &cnl_features }, 1023 { INTEL_SAPPHIRERAPIDS_X, &spr_features }, 1024 { INTEL_EMERALDRAPIDS_X, &spr_features }, 1025 { INTEL_GRANITERAPIDS_X, &spr_features }, 1026 { INTEL_GRANITERAPIDS_D, &spr_features }, 1027 { INTEL_LAKEFIELD, &cnl_features }, 1028 { INTEL_ALDERLAKE, &adl_features }, 1029 { INTEL_ALDERLAKE_L, &adl_features }, 1030 { INTEL_RAPTORLAKE, &adl_features }, 1031 { INTEL_RAPTORLAKE_P, &adl_features }, 1032 { INTEL_RAPTORLAKE_S, &adl_features }, 1033 { INTEL_METEORLAKE, &adl_features }, 1034 { INTEL_METEORLAKE_L, &adl_features }, 1035 { INTEL_ARROWLAKE_H, &adl_features }, 1036 { INTEL_ARROWLAKE_U, &adl_features }, 1037 { INTEL_ARROWLAKE, &adl_features }, 1038 { INTEL_LUNARLAKE_M, &lnl_features }, 1039 { INTEL_PANTHERLAKE_L, &lnl_features }, 1040 { INTEL_ATOM_SILVERMONT, &slv_features }, 1041 { INTEL_ATOM_SILVERMONT_D, &slvd_features }, 1042 { INTEL_ATOM_AIRMONT, &amt_features }, 1043 { INTEL_ATOM_GOLDMONT, &gmt_features }, 1044 { INTEL_ATOM_GOLDMONT_D, &gmtd_features }, 1045 { INTEL_ATOM_GOLDMONT_PLUS, &gmtp_features }, 1046 { INTEL_ATOM_TREMONT_D, &tmtd_features }, 1047 { INTEL_ATOM_TREMONT, &tmt_features }, 1048 { INTEL_ATOM_TREMONT_L, &tmt_features }, 1049 { INTEL_ATOM_GRACEMONT, &adl_features }, 1050 { INTEL_ATOM_CRESTMONT_X, &srf_features }, 1051 { INTEL_ATOM_CRESTMONT, &grr_features }, 1052 { INTEL_ATOM_DARKMONT_X, &srf_features }, 1053 { INTEL_XEON_PHI_KNL, &knl_features }, 1054 { INTEL_XEON_PHI_KNM, &knl_features }, 1055 /* 1056 * Missing support for 1057 * INTEL_ICELAKE 1058 * INTEL_ATOM_SILVERMONT_MID 1059 * INTEL_ATOM_AIRMONT_MID 1060 * INTEL_ATOM_AIRMONT_NP 1061 */ 1062 { 0, NULL }, 1063 }; 1064 1065 static const struct platform_features *platform; 1066 1067 void probe_platform_features(unsigned int family, unsigned int model) 1068 { 1069 int i; 1070 1071 1072 if (authentic_amd || hygon_genuine) { 1073 /* fallback to default features on unsupported models */ 1074 force_load++; 1075 if (max_extended_level >= 0x80000007) { 1076 unsigned int eax, ebx, ecx, edx; 1077 1078 __cpuid(0x80000007, eax, ebx, ecx, edx); 1079 /* RAPL (Fam 17h+) */ 1080 if ((edx & (1 << 14)) && family >= 0x17) 1081 platform = &amd_features_with_rapl; 1082 } 1083 goto end; 1084 } 1085 1086 if (!genuine_intel) 1087 goto end; 1088 1089 for (i = 0; turbostat_pdata[i].features; i++) { 1090 if (VFM_FAMILY(turbostat_pdata[i].vfm) == family && VFM_MODEL(turbostat_pdata[i].vfm) == model) { 1091 platform = turbostat_pdata[i].features; 1092 return; 1093 } 1094 } 1095 1096 end: 1097 if (force_load && !platform) { 1098 fprintf(outf, "Forced to run on unsupported platform!\n"); 1099 platform = &default_features; 1100 } 1101 1102 if (platform) 1103 return; 1104 1105 fprintf(stderr, "Unsupported platform detected.\n" 1106 "\tSee RUN THE LATEST VERSION on turbostat(8)\n"); 1107 exit(1); 1108 } 1109 1110 /* Model specific support End */ 1111 1112 #define TJMAX_DEFAULT 100 1113 1114 /* MSRs that are not yet in the kernel-provided header. */ 1115 #define MSR_RAPL_PWR_UNIT 0xc0010299 1116 #define MSR_CORE_ENERGY_STAT 0xc001029a 1117 #define MSR_PKG_ENERGY_STAT 0xc001029b 1118 1119 #define MAX(a, b) ((a) > (b) ? (a) : (b)) 1120 1121 int backwards_count; 1122 char *progname; 1123 1124 #define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */ 1125 cpu_set_t *cpu_present_set, *cpu_possible_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset; 1126 size_t cpu_present_setsize, cpu_possible_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size; 1127 #define MAX_ADDED_THREAD_COUNTERS 24 1128 #define MAX_ADDED_CORE_COUNTERS 8 1129 #define MAX_ADDED_PACKAGE_COUNTERS 16 1130 #define PMT_MAX_ADDED_THREAD_COUNTERS 24 1131 #define PMT_MAX_ADDED_CORE_COUNTERS 8 1132 #define PMT_MAX_ADDED_PACKAGE_COUNTERS 16 1133 #define BITMASK_SIZE 32 1134 1135 #define ZERO_ARRAY(arr) (memset(arr, 0, sizeof(arr)) + __must_be_array(arr)) 1136 1137 /* Indexes used to map data read from perf and MSRs into global variables */ 1138 enum rapl_rci_index { 1139 RAPL_RCI_INDEX_ENERGY_PKG = 0, 1140 RAPL_RCI_INDEX_ENERGY_CORES = 1, 1141 RAPL_RCI_INDEX_DRAM = 2, 1142 RAPL_RCI_INDEX_GFX = 3, 1143 RAPL_RCI_INDEX_PKG_PERF_STATUS = 4, 1144 RAPL_RCI_INDEX_DRAM_PERF_STATUS = 5, 1145 RAPL_RCI_INDEX_CORE_ENERGY = 6, 1146 RAPL_RCI_INDEX_ENERGY_PLATFORM = 7, 1147 NUM_RAPL_COUNTERS, 1148 }; 1149 1150 enum rapl_unit { 1151 RAPL_UNIT_INVALID, 1152 RAPL_UNIT_JOULES, 1153 RAPL_UNIT_WATTS, 1154 }; 1155 1156 struct rapl_counter_info_t { 1157 unsigned long long data[NUM_RAPL_COUNTERS]; 1158 enum counter_source source[NUM_RAPL_COUNTERS]; 1159 unsigned long long flags[NUM_RAPL_COUNTERS]; 1160 double scale[NUM_RAPL_COUNTERS]; 1161 enum rapl_unit unit[NUM_RAPL_COUNTERS]; 1162 unsigned long long msr[NUM_RAPL_COUNTERS]; 1163 unsigned long long msr_mask[NUM_RAPL_COUNTERS]; 1164 int msr_shift[NUM_RAPL_COUNTERS]; 1165 1166 int fd_perf; 1167 }; 1168 1169 /* struct rapl_counter_info_t for each RAPL domain */ 1170 struct rapl_counter_info_t *rapl_counter_info_perdomain; 1171 unsigned int rapl_counter_info_perdomain_size; 1172 1173 #define RAPL_COUNTER_FLAG_PLATFORM_COUNTER (1u << 0) 1174 #define RAPL_COUNTER_FLAG_USE_MSR_SUM (1u << 1) 1175 1176 struct rapl_counter_arch_info { 1177 int feature_mask; /* Mask for testing if the counter is supported on host */ 1178 const char *perf_subsys; 1179 const char *perf_name; 1180 unsigned long long msr; 1181 unsigned long long msr_mask; 1182 int msr_shift; /* Positive mean shift right, negative mean shift left */ 1183 double *platform_rapl_msr_scale; /* Scale applied to values read by MSR (platform dependent, filled at runtime) */ 1184 unsigned int rci_index; /* Maps data from perf counters to global variables */ 1185 unsigned long long bic; 1186 double compat_scale; /* Some counters require constant scaling to be in the same range as other, similar ones */ 1187 unsigned long long flags; 1188 }; 1189 1190 static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { 1191 { 1192 .feature_mask = RAPL_PKG, 1193 .perf_subsys = "power", 1194 .perf_name = "energy-pkg", 1195 .msr = MSR_PKG_ENERGY_STATUS, 1196 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1197 .msr_shift = 0, 1198 .platform_rapl_msr_scale = &rapl_energy_units, 1199 .rci_index = RAPL_RCI_INDEX_ENERGY_PKG, 1200 .bic = BIC_PkgWatt | BIC_Pkg_J, 1201 .compat_scale = 1.0, 1202 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1203 }, 1204 { 1205 .feature_mask = RAPL_AMD_F17H, 1206 .perf_subsys = "power", 1207 .perf_name = "energy-pkg", 1208 .msr = MSR_PKG_ENERGY_STAT, 1209 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1210 .msr_shift = 0, 1211 .platform_rapl_msr_scale = &rapl_energy_units, 1212 .rci_index = RAPL_RCI_INDEX_ENERGY_PKG, 1213 .bic = BIC_PkgWatt | BIC_Pkg_J, 1214 .compat_scale = 1.0, 1215 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1216 }, 1217 { 1218 .feature_mask = RAPL_CORE_ENERGY_STATUS, 1219 .perf_subsys = "power", 1220 .perf_name = "energy-cores", 1221 .msr = MSR_PP0_ENERGY_STATUS, 1222 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1223 .msr_shift = 0, 1224 .platform_rapl_msr_scale = &rapl_energy_units, 1225 .rci_index = RAPL_RCI_INDEX_ENERGY_CORES, 1226 .bic = BIC_CorWatt | BIC_Cor_J, 1227 .compat_scale = 1.0, 1228 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1229 }, 1230 { 1231 .feature_mask = RAPL_DRAM, 1232 .perf_subsys = "power", 1233 .perf_name = "energy-ram", 1234 .msr = MSR_DRAM_ENERGY_STATUS, 1235 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1236 .msr_shift = 0, 1237 .platform_rapl_msr_scale = &rapl_dram_energy_units, 1238 .rci_index = RAPL_RCI_INDEX_DRAM, 1239 .bic = BIC_RAMWatt | BIC_RAM_J, 1240 .compat_scale = 1.0, 1241 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1242 }, 1243 { 1244 .feature_mask = RAPL_GFX, 1245 .perf_subsys = "power", 1246 .perf_name = "energy-gpu", 1247 .msr = MSR_PP1_ENERGY_STATUS, 1248 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1249 .msr_shift = 0, 1250 .platform_rapl_msr_scale = &rapl_energy_units, 1251 .rci_index = RAPL_RCI_INDEX_GFX, 1252 .bic = BIC_GFXWatt | BIC_GFX_J, 1253 .compat_scale = 1.0, 1254 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1255 }, 1256 { 1257 .feature_mask = RAPL_PKG_PERF_STATUS, 1258 .perf_subsys = NULL, 1259 .perf_name = NULL, 1260 .msr = MSR_PKG_PERF_STATUS, 1261 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1262 .msr_shift = 0, 1263 .platform_rapl_msr_scale = &rapl_time_units, 1264 .rci_index = RAPL_RCI_INDEX_PKG_PERF_STATUS, 1265 .bic = BIC_PKG__, 1266 .compat_scale = 100.0, 1267 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1268 }, 1269 { 1270 .feature_mask = RAPL_DRAM_PERF_STATUS, 1271 .perf_subsys = NULL, 1272 .perf_name = NULL, 1273 .msr = MSR_DRAM_PERF_STATUS, 1274 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1275 .msr_shift = 0, 1276 .platform_rapl_msr_scale = &rapl_time_units, 1277 .rci_index = RAPL_RCI_INDEX_DRAM_PERF_STATUS, 1278 .bic = BIC_RAM__, 1279 .compat_scale = 100.0, 1280 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, 1281 }, 1282 { 1283 .feature_mask = RAPL_AMD_F17H, 1284 .perf_subsys = NULL, 1285 .perf_name = NULL, 1286 .msr = MSR_CORE_ENERGY_STAT, 1287 .msr_mask = 0xFFFFFFFF, 1288 .msr_shift = 0, 1289 .platform_rapl_msr_scale = &rapl_energy_units, 1290 .rci_index = RAPL_RCI_INDEX_CORE_ENERGY, 1291 .bic = BIC_CorWatt | BIC_Cor_J, 1292 .compat_scale = 1.0, 1293 .flags = 0, 1294 }, 1295 { 1296 .feature_mask = RAPL_PSYS, 1297 .perf_subsys = "power", 1298 .perf_name = "energy-psys", 1299 .msr = MSR_PLATFORM_ENERGY_STATUS, 1300 .msr_mask = 0x00000000FFFFFFFF, 1301 .msr_shift = 0, 1302 .platform_rapl_msr_scale = &rapl_psys_energy_units, 1303 .rci_index = RAPL_RCI_INDEX_ENERGY_PLATFORM, 1304 .bic = BIC_SysWatt | BIC_Sys_J, 1305 .compat_scale = 1.0, 1306 .flags = RAPL_COUNTER_FLAG_PLATFORM_COUNTER | RAPL_COUNTER_FLAG_USE_MSR_SUM, 1307 }, 1308 }; 1309 1310 struct rapl_counter { 1311 unsigned long long raw_value; 1312 enum rapl_unit unit; 1313 double scale; 1314 }; 1315 1316 /* Indexes used to map data read from perf and MSRs into global variables */ 1317 enum ccstate_rci_index { 1318 CCSTATE_RCI_INDEX_C1_RESIDENCY = 0, 1319 CCSTATE_RCI_INDEX_C3_RESIDENCY = 1, 1320 CCSTATE_RCI_INDEX_C6_RESIDENCY = 2, 1321 CCSTATE_RCI_INDEX_C7_RESIDENCY = 3, 1322 PCSTATE_RCI_INDEX_C2_RESIDENCY = 4, 1323 PCSTATE_RCI_INDEX_C3_RESIDENCY = 5, 1324 PCSTATE_RCI_INDEX_C6_RESIDENCY = 6, 1325 PCSTATE_RCI_INDEX_C7_RESIDENCY = 7, 1326 PCSTATE_RCI_INDEX_C8_RESIDENCY = 8, 1327 PCSTATE_RCI_INDEX_C9_RESIDENCY = 9, 1328 PCSTATE_RCI_INDEX_C10_RESIDENCY = 10, 1329 NUM_CSTATE_COUNTERS, 1330 }; 1331 1332 struct cstate_counter_info_t { 1333 unsigned long long data[NUM_CSTATE_COUNTERS]; 1334 enum counter_source source[NUM_CSTATE_COUNTERS]; 1335 unsigned long long msr[NUM_CSTATE_COUNTERS]; 1336 int fd_perf_core; 1337 int fd_perf_pkg; 1338 }; 1339 1340 struct cstate_counter_info_t *ccstate_counter_info; 1341 unsigned int ccstate_counter_info_size; 1342 1343 #define CSTATE_COUNTER_FLAG_COLLECT_PER_CORE (1u << 0) 1344 #define CSTATE_COUNTER_FLAG_COLLECT_PER_THREAD ((1u << 1) | CSTATE_COUNTER_FLAG_COLLECT_PER_CORE) 1345 #define CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY (1u << 2) 1346 1347 struct cstate_counter_arch_info { 1348 int feature_mask; /* Mask for testing if the counter is supported on host */ 1349 const char *perf_subsys; 1350 const char *perf_name; 1351 unsigned long long msr; 1352 unsigned int rci_index; /* Maps data from perf counters to global variables */ 1353 unsigned long long bic; 1354 unsigned long long flags; 1355 int pkg_cstate_limit; 1356 }; 1357 1358 static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { 1359 { 1360 .feature_mask = CC1, 1361 .perf_subsys = "cstate_core", 1362 .perf_name = "c1-residency", 1363 .msr = MSR_CORE_C1_RES, 1364 .rci_index = CCSTATE_RCI_INDEX_C1_RESIDENCY, 1365 .bic = BIC_CPU_c1, 1366 .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_THREAD, 1367 .pkg_cstate_limit = 0, 1368 }, 1369 { 1370 .feature_mask = CC3, 1371 .perf_subsys = "cstate_core", 1372 .perf_name = "c3-residency", 1373 .msr = MSR_CORE_C3_RESIDENCY, 1374 .rci_index = CCSTATE_RCI_INDEX_C3_RESIDENCY, 1375 .bic = BIC_CPU_c3, 1376 .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY, 1377 .pkg_cstate_limit = 0, 1378 }, 1379 { 1380 .feature_mask = CC6, 1381 .perf_subsys = "cstate_core", 1382 .perf_name = "c6-residency", 1383 .msr = MSR_CORE_C6_RESIDENCY, 1384 .rci_index = CCSTATE_RCI_INDEX_C6_RESIDENCY, 1385 .bic = BIC_CPU_c6, 1386 .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY, 1387 .pkg_cstate_limit = 0, 1388 }, 1389 { 1390 .feature_mask = CC7, 1391 .perf_subsys = "cstate_core", 1392 .perf_name = "c7-residency", 1393 .msr = MSR_CORE_C7_RESIDENCY, 1394 .rci_index = CCSTATE_RCI_INDEX_C7_RESIDENCY, 1395 .bic = BIC_CPU_c7, 1396 .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY, 1397 .pkg_cstate_limit = 0, 1398 }, 1399 { 1400 .feature_mask = PC2, 1401 .perf_subsys = "cstate_pkg", 1402 .perf_name = "c2-residency", 1403 .msr = MSR_PKG_C2_RESIDENCY, 1404 .rci_index = PCSTATE_RCI_INDEX_C2_RESIDENCY, 1405 .bic = BIC_Pkgpc2, 1406 .flags = 0, 1407 .pkg_cstate_limit = PCL__2, 1408 }, 1409 { 1410 .feature_mask = PC3, 1411 .perf_subsys = "cstate_pkg", 1412 .perf_name = "c3-residency", 1413 .msr = MSR_PKG_C3_RESIDENCY, 1414 .rci_index = PCSTATE_RCI_INDEX_C3_RESIDENCY, 1415 .bic = BIC_Pkgpc3, 1416 .flags = 0, 1417 .pkg_cstate_limit = PCL__3, 1418 }, 1419 { 1420 .feature_mask = PC6, 1421 .perf_subsys = "cstate_pkg", 1422 .perf_name = "c6-residency", 1423 .msr = MSR_PKG_C6_RESIDENCY, 1424 .rci_index = PCSTATE_RCI_INDEX_C6_RESIDENCY, 1425 .bic = BIC_Pkgpc6, 1426 .flags = 0, 1427 .pkg_cstate_limit = PCL__6, 1428 }, 1429 { 1430 .feature_mask = PC7, 1431 .perf_subsys = "cstate_pkg", 1432 .perf_name = "c7-residency", 1433 .msr = MSR_PKG_C7_RESIDENCY, 1434 .rci_index = PCSTATE_RCI_INDEX_C7_RESIDENCY, 1435 .bic = BIC_Pkgpc7, 1436 .flags = 0, 1437 .pkg_cstate_limit = PCL__7, 1438 }, 1439 { 1440 .feature_mask = PC8, 1441 .perf_subsys = "cstate_pkg", 1442 .perf_name = "c8-residency", 1443 .msr = MSR_PKG_C8_RESIDENCY, 1444 .rci_index = PCSTATE_RCI_INDEX_C8_RESIDENCY, 1445 .bic = BIC_Pkgpc8, 1446 .flags = 0, 1447 .pkg_cstate_limit = PCL__8, 1448 }, 1449 { 1450 .feature_mask = PC9, 1451 .perf_subsys = "cstate_pkg", 1452 .perf_name = "c9-residency", 1453 .msr = MSR_PKG_C9_RESIDENCY, 1454 .rci_index = PCSTATE_RCI_INDEX_C9_RESIDENCY, 1455 .bic = BIC_Pkgpc9, 1456 .flags = 0, 1457 .pkg_cstate_limit = PCL__9, 1458 }, 1459 { 1460 .feature_mask = PC10, 1461 .perf_subsys = "cstate_pkg", 1462 .perf_name = "c10-residency", 1463 .msr = MSR_PKG_C10_RESIDENCY, 1464 .rci_index = PCSTATE_RCI_INDEX_C10_RESIDENCY, 1465 .bic = BIC_Pkgpc10, 1466 .flags = 0, 1467 .pkg_cstate_limit = PCL_10, 1468 }, 1469 }; 1470 1471 /* Indexes used to map data read from perf and MSRs into global variables */ 1472 enum msr_rci_index { 1473 MSR_RCI_INDEX_APERF = 0, 1474 MSR_RCI_INDEX_MPERF = 1, 1475 MSR_RCI_INDEX_SMI = 2, 1476 NUM_MSR_COUNTERS, 1477 }; 1478 1479 struct msr_counter_info_t { 1480 unsigned long long data[NUM_MSR_COUNTERS]; 1481 enum counter_source source[NUM_MSR_COUNTERS]; 1482 unsigned long long msr[NUM_MSR_COUNTERS]; 1483 unsigned long long msr_mask[NUM_MSR_COUNTERS]; 1484 int fd_perf; 1485 }; 1486 1487 struct msr_counter_info_t *msr_counter_info; 1488 unsigned int msr_counter_info_size; 1489 1490 struct msr_counter_arch_info { 1491 const char *perf_subsys; 1492 const char *perf_name; 1493 unsigned long long msr; 1494 unsigned long long msr_mask; 1495 unsigned int rci_index; /* Maps data from perf counters to global variables */ 1496 bool needed; 1497 bool present; 1498 }; 1499 1500 enum msr_arch_info_index { 1501 MSR_ARCH_INFO_APERF_INDEX = 0, 1502 MSR_ARCH_INFO_MPERF_INDEX = 1, 1503 MSR_ARCH_INFO_SMI_INDEX = 2, 1504 }; 1505 1506 static struct msr_counter_arch_info msr_counter_arch_infos[] = { 1507 [MSR_ARCH_INFO_APERF_INDEX] = { 1508 .perf_subsys = "msr", 1509 .perf_name = "aperf", 1510 .msr = MSR_IA32_APERF, 1511 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1512 .rci_index = MSR_RCI_INDEX_APERF, 1513 }, 1514 1515 [MSR_ARCH_INFO_MPERF_INDEX] = { 1516 .perf_subsys = "msr", 1517 .perf_name = "mperf", 1518 .msr = MSR_IA32_MPERF, 1519 .msr_mask = 0xFFFFFFFFFFFFFFFF, 1520 .rci_index = MSR_RCI_INDEX_MPERF, 1521 }, 1522 1523 [MSR_ARCH_INFO_SMI_INDEX] = { 1524 .perf_subsys = "msr", 1525 .perf_name = "smi", 1526 .msr = MSR_SMI_COUNT, 1527 .msr_mask = 0xFFFFFFFF, 1528 .rci_index = MSR_RCI_INDEX_SMI, 1529 }, 1530 }; 1531 1532 /* Can be redefined when compiling, useful for testing. */ 1533 #ifndef SYSFS_TELEM_PATH 1534 #define SYSFS_TELEM_PATH "/sys/class/intel_pmt" 1535 #endif 1536 1537 #define PMT_COUNTER_MTL_DC6_OFFSET 120 1538 #define PMT_COUNTER_MTL_DC6_LSB 0 1539 #define PMT_COUNTER_MTL_DC6_MSB 63 1540 #define PMT_MTL_DC6_GUID 0x1a067102 1541 #define PMT_MTL_DC6_SEQ 0 1542 1543 #define PMT_COUNTER_CWF_MC1E_OFFSET_BASE 20936 1544 #define PMT_COUNTER_CWF_MC1E_OFFSET_INCREMENT 24 1545 #define PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE 12 1546 #define PMT_COUNTER_CWF_CPUS_PER_MODULE 4 1547 #define PMT_COUNTER_CWF_MC1E_LSB 0 1548 #define PMT_COUNTER_CWF_MC1E_MSB 63 1549 #define PMT_CWF_MC1E_GUID 0x14421519 1550 1551 unsigned long long tcore_clock_freq_hz = 800000000; 1552 1553 #define PMT_COUNTER_NAME_SIZE_BYTES 16 1554 #define PMT_COUNTER_TYPE_NAME_SIZE_BYTES 32 1555 1556 struct pmt_mmio { 1557 struct pmt_mmio *next; 1558 1559 unsigned int guid; 1560 unsigned int size; 1561 1562 /* Base pointer to the mmaped memory. */ 1563 void *mmio_base; 1564 1565 /* 1566 * Offset to be applied to the mmio_base 1567 * to get the beginning of the PMT counters for given GUID. 1568 */ 1569 unsigned long pmt_offset; 1570 } *pmt_mmios; 1571 1572 enum pmt_datatype { 1573 PMT_TYPE_RAW, 1574 PMT_TYPE_XTAL_TIME, 1575 PMT_TYPE_TCORE_CLOCK, 1576 }; 1577 1578 struct pmt_domain_info { 1579 /* 1580 * Pointer to the MMIO obtained by applying a counter offset 1581 * to the mmio_base of the mmaped region for the given GUID. 1582 * 1583 * This is where to read the raw value of the counter from. 1584 */ 1585 unsigned long *pcounter; 1586 }; 1587 1588 struct pmt_counter { 1589 struct pmt_counter *next; 1590 1591 /* PMT metadata */ 1592 char name[PMT_COUNTER_NAME_SIZE_BYTES]; 1593 enum pmt_datatype type; 1594 enum counter_scope scope; 1595 unsigned int lsb; 1596 unsigned int msb; 1597 1598 /* BIC-like metadata */ 1599 enum counter_format format; 1600 1601 unsigned int num_domains; 1602 struct pmt_domain_info *domains; 1603 }; 1604 1605 /* 1606 * PMT telemetry directory iterator. 1607 * Used to iterate telemetry files in sysfs in correct order. 1608 */ 1609 struct pmt_diriter_t { 1610 DIR *dir; 1611 struct dirent **namelist; 1612 unsigned int num_names; 1613 unsigned int current_name_idx; 1614 }; 1615 1616 int pmt_telemdir_filter(const struct dirent *e) 1617 { 1618 unsigned int dummy; 1619 1620 return sscanf(e->d_name, "telem%u", &dummy); 1621 } 1622 1623 int pmt_telemdir_sort(const struct dirent **a, const struct dirent **b) 1624 { 1625 unsigned int aidx = 0, bidx = 0; 1626 1627 sscanf((*a)->d_name, "telem%u", &aidx); 1628 sscanf((*b)->d_name, "telem%u", &bidx); 1629 1630 return aidx >= bidx; 1631 } 1632 1633 const struct dirent *pmt_diriter_next(struct pmt_diriter_t *iter) 1634 { 1635 const struct dirent *ret = NULL; 1636 1637 if (!iter->dir) 1638 return NULL; 1639 1640 if (iter->current_name_idx >= iter->num_names) 1641 return NULL; 1642 1643 ret = iter->namelist[iter->current_name_idx]; 1644 ++iter->current_name_idx; 1645 1646 return ret; 1647 } 1648 1649 const struct dirent *pmt_diriter_begin(struct pmt_diriter_t *iter, const char *pmt_root_path) 1650 { 1651 int num_names = iter->num_names; 1652 1653 if (!iter->dir) { 1654 iter->dir = opendir(pmt_root_path); 1655 if (iter->dir == NULL) 1656 return NULL; 1657 1658 num_names = scandir(pmt_root_path, &iter->namelist, pmt_telemdir_filter, pmt_telemdir_sort); 1659 if (num_names == -1) 1660 return NULL; 1661 } 1662 1663 iter->current_name_idx = 0; 1664 iter->num_names = num_names; 1665 1666 return pmt_diriter_next(iter); 1667 } 1668 1669 void pmt_diriter_init(struct pmt_diriter_t *iter) 1670 { 1671 memset(iter, 0, sizeof(*iter)); 1672 } 1673 1674 void pmt_diriter_remove(struct pmt_diriter_t *iter) 1675 { 1676 if (iter->namelist) { 1677 for (unsigned int i = 0; i < iter->num_names; i++) { 1678 free(iter->namelist[i]); 1679 iter->namelist[i] = NULL; 1680 } 1681 } 1682 1683 free(iter->namelist); 1684 iter->namelist = NULL; 1685 iter->num_names = 0; 1686 iter->current_name_idx = 0; 1687 1688 closedir(iter->dir); 1689 iter->dir = NULL; 1690 } 1691 1692 unsigned int pmt_counter_get_width(const struct pmt_counter *p) 1693 { 1694 return (p->msb - p->lsb) + 1; 1695 } 1696 1697 void pmt_counter_resize_(struct pmt_counter *pcounter, unsigned int new_size) 1698 { 1699 struct pmt_domain_info *new_mem; 1700 1701 new_mem = (struct pmt_domain_info *)reallocarray(pcounter->domains, new_size, sizeof(*pcounter->domains)); 1702 if (!new_mem) { 1703 fprintf(stderr, "%s: failed to allocate memory for PMT counters\n", __func__); 1704 exit(1); 1705 } 1706 1707 /* Zero initialize just allocated memory. */ 1708 const size_t num_new_domains = new_size - pcounter->num_domains; 1709 1710 memset(&new_mem[pcounter->num_domains], 0, num_new_domains * sizeof(*pcounter->domains)); 1711 1712 pcounter->num_domains = new_size; 1713 pcounter->domains = new_mem; 1714 } 1715 1716 void pmt_counter_resize(struct pmt_counter *pcounter, unsigned int new_size) 1717 { 1718 /* 1719 * Allocate more memory ahead of time. 1720 * 1721 * Always allocate space for at least 8 elements 1722 * and double the size when growing. 1723 */ 1724 if (new_size < 8) 1725 new_size = 8; 1726 new_size = MAX(new_size, pcounter->num_domains * 2); 1727 1728 pmt_counter_resize_(pcounter, new_size); 1729 } 1730 1731 struct thread_data { 1732 struct timeval tv_begin; 1733 struct timeval tv_end; 1734 struct timeval tv_delta; 1735 unsigned long long tsc; 1736 unsigned long long aperf; 1737 unsigned long long mperf; 1738 unsigned long long c1; 1739 unsigned long long instr_count; 1740 unsigned long long irq_count; 1741 unsigned long long nmi_count; 1742 unsigned int smi_count; 1743 unsigned int cpu_id; 1744 unsigned int apic_id; 1745 unsigned int x2apic_id; 1746 unsigned int flags; 1747 bool is_atom; 1748 unsigned long long counter[MAX_ADDED_THREAD_COUNTERS]; 1749 unsigned long long perf_counter[MAX_ADDED_THREAD_COUNTERS]; 1750 unsigned long long pmt_counter[PMT_MAX_ADDED_THREAD_COUNTERS]; 1751 } *thread_even, *thread_odd; 1752 1753 struct core_data { 1754 int base_cpu; 1755 unsigned long long c3; 1756 unsigned long long c6; 1757 unsigned long long c7; 1758 unsigned long long mc6_us; /* duplicate as per-core for now, even though per module */ 1759 unsigned int core_temp_c; 1760 struct rapl_counter core_energy; /* MSR_CORE_ENERGY_STAT */ 1761 unsigned int core_id; 1762 unsigned long long core_throt_cnt; 1763 unsigned long long counter[MAX_ADDED_CORE_COUNTERS]; 1764 unsigned long long perf_counter[MAX_ADDED_CORE_COUNTERS]; 1765 unsigned long long pmt_counter[PMT_MAX_ADDED_CORE_COUNTERS]; 1766 } *core_even, *core_odd; 1767 1768 struct pkg_data { 1769 int base_cpu; 1770 unsigned long long pc2; 1771 unsigned long long pc3; 1772 unsigned long long pc6; 1773 unsigned long long pc7; 1774 unsigned long long pc8; 1775 unsigned long long pc9; 1776 unsigned long long pc10; 1777 long long cpu_lpi; 1778 long long sys_lpi; 1779 unsigned long long pkg_wtd_core_c0; 1780 unsigned long long pkg_any_core_c0; 1781 unsigned long long pkg_any_gfxe_c0; 1782 unsigned long long pkg_both_core_gfxe_c0; 1783 long long gfx_rc6_ms; 1784 unsigned int gfx_mhz; 1785 unsigned int gfx_act_mhz; 1786 long long sam_mc6_ms; 1787 unsigned int sam_mhz; 1788 unsigned int sam_act_mhz; 1789 unsigned int package_id; 1790 struct rapl_counter energy_pkg; /* MSR_PKG_ENERGY_STATUS */ 1791 struct rapl_counter energy_dram; /* MSR_DRAM_ENERGY_STATUS */ 1792 struct rapl_counter energy_cores; /* MSR_PP0_ENERGY_STATUS */ 1793 struct rapl_counter energy_gfx; /* MSR_PP1_ENERGY_STATUS */ 1794 struct rapl_counter rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */ 1795 struct rapl_counter rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */ 1796 unsigned int pkg_temp_c; 1797 unsigned int uncore_mhz; 1798 unsigned long long die_c6; 1799 unsigned long long counter[MAX_ADDED_PACKAGE_COUNTERS]; 1800 unsigned long long perf_counter[MAX_ADDED_PACKAGE_COUNTERS]; 1801 unsigned long long pmt_counter[PMT_MAX_ADDED_PACKAGE_COUNTERS]; 1802 } *package_even, *package_odd; 1803 1804 #define ODD_COUNTERS thread_odd, core_odd, package_odd 1805 #define EVEN_COUNTERS thread_even, core_even, package_even 1806 1807 #define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no) \ 1808 ((thread_base) + \ 1809 ((pkg_no) * \ 1810 topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \ 1811 ((node_no) * topo.cores_per_node * topo.threads_per_core) + \ 1812 ((core_no) * topo.threads_per_core) + \ 1813 (thread_no)) 1814 1815 #define GET_CORE(core_base, core_no, node_no, pkg_no) \ 1816 ((core_base) + \ 1817 ((pkg_no) * topo.nodes_per_pkg * topo.cores_per_node) + \ 1818 ((node_no) * topo.cores_per_node) + \ 1819 (core_no)) 1820 1821 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) 1822 1823 /* 1824 * The accumulated sum of MSR is defined as a monotonic 1825 * increasing MSR, it will be accumulated periodically, 1826 * despite its register's bit width. 1827 */ 1828 enum { 1829 IDX_PKG_ENERGY, 1830 IDX_DRAM_ENERGY, 1831 IDX_PP0_ENERGY, 1832 IDX_PP1_ENERGY, 1833 IDX_PKG_PERF, 1834 IDX_DRAM_PERF, 1835 IDX_PSYS_ENERGY, 1836 IDX_COUNT, 1837 }; 1838 1839 int get_msr_sum(int cpu, off_t offset, unsigned long long *msr); 1840 1841 struct msr_sum_array { 1842 /* get_msr_sum() = sum + (get_msr() - last) */ 1843 struct { 1844 /*The accumulated MSR value is updated by the timer */ 1845 unsigned long long sum; 1846 /*The MSR footprint recorded in last timer */ 1847 unsigned long long last; 1848 } entries[IDX_COUNT]; 1849 }; 1850 1851 /* The percpu MSR sum array.*/ 1852 struct msr_sum_array *per_cpu_msr_sum; 1853 1854 off_t idx_to_offset(int idx) 1855 { 1856 off_t offset; 1857 1858 switch (idx) { 1859 case IDX_PKG_ENERGY: 1860 if (platform->rapl_msrs & RAPL_AMD_F17H) 1861 offset = MSR_PKG_ENERGY_STAT; 1862 else 1863 offset = MSR_PKG_ENERGY_STATUS; 1864 break; 1865 case IDX_DRAM_ENERGY: 1866 offset = MSR_DRAM_ENERGY_STATUS; 1867 break; 1868 case IDX_PP0_ENERGY: 1869 offset = MSR_PP0_ENERGY_STATUS; 1870 break; 1871 case IDX_PP1_ENERGY: 1872 offset = MSR_PP1_ENERGY_STATUS; 1873 break; 1874 case IDX_PKG_PERF: 1875 offset = MSR_PKG_PERF_STATUS; 1876 break; 1877 case IDX_DRAM_PERF: 1878 offset = MSR_DRAM_PERF_STATUS; 1879 break; 1880 case IDX_PSYS_ENERGY: 1881 offset = MSR_PLATFORM_ENERGY_STATUS; 1882 break; 1883 default: 1884 offset = -1; 1885 } 1886 return offset; 1887 } 1888 1889 int offset_to_idx(off_t offset) 1890 { 1891 int idx; 1892 1893 switch (offset) { 1894 case MSR_PKG_ENERGY_STATUS: 1895 case MSR_PKG_ENERGY_STAT: 1896 idx = IDX_PKG_ENERGY; 1897 break; 1898 case MSR_DRAM_ENERGY_STATUS: 1899 idx = IDX_DRAM_ENERGY; 1900 break; 1901 case MSR_PP0_ENERGY_STATUS: 1902 idx = IDX_PP0_ENERGY; 1903 break; 1904 case MSR_PP1_ENERGY_STATUS: 1905 idx = IDX_PP1_ENERGY; 1906 break; 1907 case MSR_PKG_PERF_STATUS: 1908 idx = IDX_PKG_PERF; 1909 break; 1910 case MSR_DRAM_PERF_STATUS: 1911 idx = IDX_DRAM_PERF; 1912 break; 1913 case MSR_PLATFORM_ENERGY_STATUS: 1914 idx = IDX_PSYS_ENERGY; 1915 break; 1916 default: 1917 idx = -1; 1918 } 1919 return idx; 1920 } 1921 1922 int idx_valid(int idx) 1923 { 1924 switch (idx) { 1925 case IDX_PKG_ENERGY: 1926 return platform->rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H); 1927 case IDX_DRAM_ENERGY: 1928 return platform->rapl_msrs & RAPL_DRAM; 1929 case IDX_PP0_ENERGY: 1930 return platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS; 1931 case IDX_PP1_ENERGY: 1932 return platform->rapl_msrs & RAPL_GFX; 1933 case IDX_PKG_PERF: 1934 return platform->rapl_msrs & RAPL_PKG_PERF_STATUS; 1935 case IDX_DRAM_PERF: 1936 return platform->rapl_msrs & RAPL_DRAM_PERF_STATUS; 1937 case IDX_PSYS_ENERGY: 1938 return platform->rapl_msrs & RAPL_PSYS; 1939 default: 1940 return 0; 1941 } 1942 } 1943 1944 struct sys_counters { 1945 /* MSR added counters */ 1946 unsigned int added_thread_counters; 1947 unsigned int added_core_counters; 1948 unsigned int added_package_counters; 1949 struct msr_counter *tp; 1950 struct msr_counter *cp; 1951 struct msr_counter *pp; 1952 1953 /* perf added counters */ 1954 unsigned int added_thread_perf_counters; 1955 unsigned int added_core_perf_counters; 1956 unsigned int added_package_perf_counters; 1957 struct perf_counter_info *perf_tp; 1958 struct perf_counter_info *perf_cp; 1959 struct perf_counter_info *perf_pp; 1960 1961 struct pmt_counter *pmt_tp; 1962 struct pmt_counter *pmt_cp; 1963 struct pmt_counter *pmt_pp; 1964 } sys; 1965 1966 static size_t free_msr_counters_(struct msr_counter **pp) 1967 { 1968 struct msr_counter *p = NULL; 1969 size_t num_freed = 0; 1970 1971 while (*pp) { 1972 p = *pp; 1973 1974 if (p->msr_num != 0) { 1975 *pp = p->next; 1976 1977 free(p); 1978 ++num_freed; 1979 1980 continue; 1981 } 1982 1983 pp = &p->next; 1984 } 1985 1986 return num_freed; 1987 } 1988 1989 /* 1990 * Free all added counters accessed via msr. 1991 */ 1992 static void free_sys_msr_counters(void) 1993 { 1994 /* Thread counters */ 1995 sys.added_thread_counters -= free_msr_counters_(&sys.tp); 1996 1997 /* Core counters */ 1998 sys.added_core_counters -= free_msr_counters_(&sys.cp); 1999 2000 /* Package counters */ 2001 sys.added_package_counters -= free_msr_counters_(&sys.pp); 2002 } 2003 2004 struct system_summary { 2005 struct thread_data threads; 2006 struct core_data cores; 2007 struct pkg_data packages; 2008 } average; 2009 2010 struct platform_counters { 2011 struct rapl_counter energy_psys; /* MSR_PLATFORM_ENERGY_STATUS */ 2012 } platform_counters_odd, platform_counters_even; 2013 2014 struct cpu_topology { 2015 int physical_package_id; 2016 int die_id; 2017 int logical_cpu_id; 2018 int physical_node_id; 2019 int logical_node_id; /* 0-based count within the package */ 2020 int physical_core_id; 2021 int thread_id; 2022 int type; 2023 cpu_set_t *put_ids; /* Processing Unit/Thread IDs */ 2024 } *cpus; 2025 2026 struct topo_params { 2027 int num_packages; 2028 int num_die; 2029 int num_cpus; 2030 int num_cores; 2031 int allowed_packages; 2032 int allowed_cpus; 2033 int allowed_cores; 2034 int max_cpu_num; 2035 int max_core_id; 2036 int max_package_id; 2037 int max_die_id; 2038 int max_node_num; 2039 int nodes_per_pkg; 2040 int cores_per_node; 2041 int threads_per_core; 2042 } topo; 2043 2044 struct timeval tv_even, tv_odd, tv_delta; 2045 2046 int *irq_column_2_cpu; /* /proc/interrupts column numbers */ 2047 int *irqs_per_cpu; /* indexed by cpu_num */ 2048 int *nmi_per_cpu; /* indexed by cpu_num */ 2049 2050 void setup_all_buffers(bool startup); 2051 2052 char *sys_lpi_file; 2053 char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us"; 2054 char *sys_lpi_file_debugfs = "/sys/kernel/debug/pmc_core/slp_s0_residency_usec"; 2055 2056 int cpu_is_not_present(int cpu) 2057 { 2058 return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set); 2059 } 2060 2061 int cpu_is_not_allowed(int cpu) 2062 { 2063 return !CPU_ISSET_S(cpu, cpu_allowed_setsize, cpu_allowed_set); 2064 } 2065 2066 /* 2067 * run func(thread, core, package) in topology order 2068 * skip non-present cpus 2069 */ 2070 2071 int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pkg_data *), 2072 struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base) 2073 { 2074 int retval, pkg_no, core_no, thread_no, node_no; 2075 2076 retval = 0; 2077 2078 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { 2079 for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) { 2080 for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { 2081 for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) { 2082 struct thread_data *t; 2083 struct core_data *c; 2084 struct pkg_data *p; 2085 t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no); 2086 2087 if (cpu_is_not_allowed(t->cpu_id)) 2088 continue; 2089 2090 c = GET_CORE(core_base, core_no, node_no, pkg_no); 2091 p = GET_PKG(pkg_base, pkg_no); 2092 2093 retval |= func(t, c, p); 2094 } 2095 } 2096 } 2097 } 2098 return retval; 2099 } 2100 2101 int is_cpu_first_thread_in_core(struct thread_data *t, struct core_data *c, struct pkg_data *p) 2102 { 2103 UNUSED(p); 2104 2105 return ((int)t->cpu_id == c->base_cpu || c->base_cpu < 0); 2106 } 2107 2108 int is_cpu_first_core_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p) 2109 { 2110 UNUSED(c); 2111 2112 return ((int)t->cpu_id == p->base_cpu || p->base_cpu < 0); 2113 } 2114 2115 int is_cpu_first_thread_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p) 2116 { 2117 return is_cpu_first_thread_in_core(t, c, p) && is_cpu_first_core_in_package(t, c, p); 2118 } 2119 2120 int cpu_migrate(int cpu) 2121 { 2122 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); 2123 CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set); 2124 if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1) 2125 return -1; 2126 else 2127 return 0; 2128 } 2129 2130 int get_msr_fd(int cpu) 2131 { 2132 char pathname[32]; 2133 int fd; 2134 2135 fd = fd_percpu[cpu]; 2136 2137 if (fd) 2138 return fd; 2139 2140 sprintf(pathname, "/dev/cpu/%d/msr", cpu); 2141 fd = open(pathname, O_RDONLY); 2142 if (fd < 0) 2143 err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, " 2144 "or run with --no-msr, or run as root", pathname); 2145 2146 fd_percpu[cpu] = fd; 2147 2148 return fd; 2149 } 2150 2151 static void bic_disable_msr_access(void) 2152 { 2153 const unsigned long bic_msrs = BIC_Mod_c6 | BIC_CoreTmp | 2154 BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_PkgTmp; 2155 2156 bic_enabled &= ~bic_msrs; 2157 2158 free_sys_msr_counters(); 2159 } 2160 2161 static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) 2162 { 2163 assert(!no_perf); 2164 2165 return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags); 2166 } 2167 2168 static long open_perf_counter(int cpu, unsigned int type, unsigned int config, int group_fd, __u64 read_format) 2169 { 2170 struct perf_event_attr attr; 2171 const pid_t pid = -1; 2172 const unsigned long flags = 0; 2173 2174 assert(!no_perf); 2175 2176 memset(&attr, 0, sizeof(struct perf_event_attr)); 2177 2178 attr.type = type; 2179 attr.size = sizeof(struct perf_event_attr); 2180 attr.config = config; 2181 attr.disabled = 0; 2182 attr.sample_type = PERF_SAMPLE_IDENTIFIER; 2183 attr.read_format = read_format; 2184 2185 const int fd = perf_event_open(&attr, pid, cpu, group_fd, flags); 2186 2187 return fd; 2188 } 2189 2190 int get_instr_count_fd(int cpu) 2191 { 2192 if (fd_instr_count_percpu[cpu]) 2193 return fd_instr_count_percpu[cpu]; 2194 2195 fd_instr_count_percpu[cpu] = open_perf_counter(cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0); 2196 2197 return fd_instr_count_percpu[cpu]; 2198 } 2199 2200 int get_msr(int cpu, off_t offset, unsigned long long *msr) 2201 { 2202 ssize_t retval; 2203 2204 assert(!no_msr); 2205 2206 retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset); 2207 2208 if (retval != sizeof *msr) 2209 err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset); 2210 2211 return 0; 2212 } 2213 2214 int probe_msr(int cpu, off_t offset) 2215 { 2216 ssize_t retval; 2217 unsigned long long value; 2218 2219 assert(!no_msr); 2220 2221 retval = pread(get_msr_fd(cpu), &value, sizeof(value), offset); 2222 2223 /* 2224 * Expect MSRs to accumulate some non-zero value since the system was powered on. 2225 * Treat zero as a read failure. 2226 */ 2227 if (retval != sizeof(value) || value == 0) 2228 return 1; 2229 2230 return 0; 2231 } 2232 2233 /* Convert CPU ID to domain ID for given added perf counter. */ 2234 unsigned int cpu_to_domain(const struct perf_counter_info *pc, int cpu) 2235 { 2236 switch (pc->scope) { 2237 case SCOPE_CPU: 2238 return cpu; 2239 2240 case SCOPE_CORE: 2241 return cpus[cpu].physical_core_id; 2242 2243 case SCOPE_PACKAGE: 2244 return cpus[cpu].physical_package_id; 2245 } 2246 2247 __builtin_unreachable(); 2248 } 2249 2250 #define MAX_DEFERRED 16 2251 char *deferred_add_names[MAX_DEFERRED]; 2252 char *deferred_skip_names[MAX_DEFERRED]; 2253 int deferred_add_index; 2254 int deferred_skip_index; 2255 2256 /* 2257 * HIDE_LIST - hide this list of counters, show the rest [default] 2258 * SHOW_LIST - show this list of counters, hide the rest 2259 */ 2260 enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST; 2261 2262 void help(void) 2263 { 2264 fprintf(outf, 2265 "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n" 2266 "\n" 2267 "Turbostat forks the specified COMMAND and prints statistics\n" 2268 "when COMMAND completes.\n" 2269 "If no COMMAND is specified, turbostat wakes every 5-seconds\n" 2270 "to print statistics, until interrupted.\n" 2271 " -a, --add counter\n" 2272 " add a counter\n" 2273 " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n" 2274 " eg. --add perf/cstate_pkg/c2-residency,package,delta,percent,perfPC2\n" 2275 " eg. --add pmt,name=XTAL,type=raw,domain=package0,offset=0,lsb=0,msb=63,guid=0x1a067102\n" 2276 " -c, --cpu cpu-set\n" 2277 " limit output to summary plus cpu-set:\n" 2278 " {core | package | j,k,l..m,n-p }\n" 2279 " -d, --debug\n" 2280 " displays usec, Time_Of_Day_Seconds and more debugging\n" 2281 " debug messages are printed to stderr\n" 2282 " -D, --Dump\n" 2283 " displays the raw counter values\n" 2284 " -e, --enable [all | column]\n" 2285 " shows all or the specified disabled column\n" 2286 " -f, --force\n" 2287 " force load turbostat with minimum default features on unsupported platforms.\n" 2288 " -H, --hide [column | column,column,...]\n" 2289 " hide the specified column(s)\n" 2290 " -i, --interval sec.subsec\n" 2291 " override default 5-second measurement interval\n" 2292 " -J, --Joules\n" 2293 " displays energy in Joules instead of Watts\n" 2294 " -l, --list\n" 2295 " list column headers only\n" 2296 " -M, --no-msr\n" 2297 " disable all uses of the MSR driver\n" 2298 " -P, --no-perf\n" 2299 " disable all uses of the perf API\n" 2300 " -n, --num_iterations num\n" 2301 " number of the measurement iterations\n" 2302 " -N, --header_iterations num\n" 2303 " print header every num iterations\n" 2304 " -o, --out file\n" 2305 " create or truncate \"file\" for all output\n" 2306 " -q, --quiet\n" 2307 " skip decoding system configuration header\n" 2308 " -s, --show [column | column,column,...]\n" 2309 " show only the specified column(s)\n" 2310 " -S, --Summary\n" 2311 " limits output to 1-line system summary per interval\n" 2312 " -T, --TCC temperature\n" 2313 " sets the Thermal Control Circuit temperature in\n" 2314 " degrees Celsius\n" 2315 " -h, --help\n" 2316 " print this help message\n" 2317 " -v, --version\n" 2318 " print version information\n\nFor more help, run \"man turbostat\"\n"); 2319 } 2320 2321 /* 2322 * bic_lookup 2323 * for all the strings in comma separate name_list, 2324 * set the approprate bit in return value. 2325 */ 2326 unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) 2327 { 2328 unsigned int i; 2329 unsigned long long retval = 0; 2330 2331 while (name_list) { 2332 char *comma; 2333 2334 comma = strchr(name_list, ','); 2335 2336 if (comma) 2337 *comma = '\0'; 2338 2339 for (i = 0; i < MAX_BIC; ++i) { 2340 if (!strcmp(name_list, bic[i].name)) { 2341 retval |= (1ULL << i); 2342 break; 2343 } 2344 if (!strcmp(name_list, "all")) { 2345 retval |= ~0; 2346 break; 2347 } else if (!strcmp(name_list, "topology")) { 2348 retval |= BIC_TOPOLOGY; 2349 break; 2350 } else if (!strcmp(name_list, "power")) { 2351 retval |= BIC_THERMAL_PWR; 2352 break; 2353 } else if (!strcmp(name_list, "idle")) { 2354 retval |= BIC_IDLE; 2355 break; 2356 } else if (!strcmp(name_list, "frequency")) { 2357 retval |= BIC_FREQUENCY; 2358 break; 2359 } else if (!strcmp(name_list, "other")) { 2360 retval |= BIC_OTHER; 2361 break; 2362 } 2363 2364 } 2365 if (i == MAX_BIC) { 2366 if (mode == SHOW_LIST) { 2367 deferred_add_names[deferred_add_index++] = name_list; 2368 if (deferred_add_index >= MAX_DEFERRED) { 2369 fprintf(stderr, "More than max %d un-recognized --add options '%s'\n", 2370 MAX_DEFERRED, name_list); 2371 help(); 2372 exit(1); 2373 } 2374 } else { 2375 deferred_skip_names[deferred_skip_index++] = name_list; 2376 if (debug) 2377 fprintf(stderr, "deferred \"%s\"\n", name_list); 2378 if (deferred_skip_index >= MAX_DEFERRED) { 2379 fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n", 2380 MAX_DEFERRED, name_list); 2381 help(); 2382 exit(1); 2383 } 2384 } 2385 } 2386 2387 name_list = comma; 2388 if (name_list) 2389 name_list++; 2390 2391 } 2392 return retval; 2393 } 2394 2395 void print_header(char *delim) 2396 { 2397 struct msr_counter *mp; 2398 struct perf_counter_info *pp; 2399 struct pmt_counter *ppmt; 2400 int printed = 0; 2401 2402 if (DO_BIC(BIC_USEC)) 2403 outp += sprintf(outp, "%susec", (printed++ ? delim : "")); 2404 if (DO_BIC(BIC_TOD)) 2405 outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : "")); 2406 if (DO_BIC(BIC_Package)) 2407 outp += sprintf(outp, "%sPackage", (printed++ ? delim : "")); 2408 if (DO_BIC(BIC_Die)) 2409 outp += sprintf(outp, "%sDie", (printed++ ? delim : "")); 2410 if (DO_BIC(BIC_Node)) 2411 outp += sprintf(outp, "%sNode", (printed++ ? delim : "")); 2412 if (DO_BIC(BIC_Core)) 2413 outp += sprintf(outp, "%sCore", (printed++ ? delim : "")); 2414 if (DO_BIC(BIC_CPU)) 2415 outp += sprintf(outp, "%sCPU", (printed++ ? delim : "")); 2416 if (DO_BIC(BIC_APIC)) 2417 outp += sprintf(outp, "%sAPIC", (printed++ ? delim : "")); 2418 if (DO_BIC(BIC_X2APIC)) 2419 outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : "")); 2420 if (DO_BIC(BIC_Avg_MHz)) 2421 outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : "")); 2422 if (DO_BIC(BIC_Busy)) 2423 outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : "")); 2424 if (DO_BIC(BIC_Bzy_MHz)) 2425 outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : "")); 2426 if (DO_BIC(BIC_TSC_MHz)) 2427 outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : "")); 2428 2429 if (DO_BIC(BIC_IPC)) 2430 outp += sprintf(outp, "%sIPC", (printed++ ? delim : "")); 2431 2432 if (DO_BIC(BIC_IRQ)) { 2433 if (sums_need_wide_columns) 2434 outp += sprintf(outp, "%s IRQ", (printed++ ? delim : "")); 2435 else 2436 outp += sprintf(outp, "%sIRQ", (printed++ ? delim : "")); 2437 } 2438 if (DO_BIC(BIC_NMI)) { 2439 if (sums_need_wide_columns) 2440 outp += sprintf(outp, "%s NMI", (printed++ ? delim : "")); 2441 else 2442 outp += sprintf(outp, "%sNMI", (printed++ ? delim : "")); 2443 } 2444 2445 if (DO_BIC(BIC_SMI)) 2446 outp += sprintf(outp, "%sSMI", (printed++ ? delim : "")); 2447 2448 for (mp = sys.tp; mp; mp = mp->next) { 2449 2450 if (mp->format == FORMAT_RAW) { 2451 if (mp->width == 64) 2452 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name); 2453 else 2454 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name); 2455 } else { 2456 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2457 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name); 2458 else 2459 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name); 2460 } 2461 } 2462 2463 for (pp = sys.perf_tp; pp; pp = pp->next) { 2464 2465 if (pp->format == FORMAT_RAW) { 2466 if (pp->width == 64) 2467 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); 2468 else 2469 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); 2470 } else { 2471 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2472 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); 2473 else 2474 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); 2475 } 2476 } 2477 2478 ppmt = sys.pmt_tp; 2479 while (ppmt) { 2480 switch (ppmt->type) { 2481 case PMT_TYPE_RAW: 2482 if (pmt_counter_get_width(ppmt) <= 32) 2483 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); 2484 else 2485 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); 2486 2487 break; 2488 2489 case PMT_TYPE_XTAL_TIME: 2490 case PMT_TYPE_TCORE_CLOCK: 2491 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); 2492 break; 2493 } 2494 2495 ppmt = ppmt->next; 2496 } 2497 2498 if (DO_BIC(BIC_CPU_c1)) 2499 outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : "")); 2500 if (DO_BIC(BIC_CPU_c3)) 2501 outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : "")); 2502 if (DO_BIC(BIC_CPU_c6)) 2503 outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : "")); 2504 if (DO_BIC(BIC_CPU_c7)) 2505 outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : "")); 2506 2507 if (DO_BIC(BIC_Mod_c6)) 2508 outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : "")); 2509 2510 if (DO_BIC(BIC_CoreTmp)) 2511 outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : "")); 2512 2513 if (DO_BIC(BIC_CORE_THROT_CNT)) 2514 outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : "")); 2515 2516 if (platform->rapl_msrs && !rapl_joules) { 2517 if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl) 2518 outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : "")); 2519 } else if (platform->rapl_msrs && rapl_joules) { 2520 if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl) 2521 outp += sprintf(outp, "%sCor_J", (printed++ ? delim : "")); 2522 } 2523 2524 for (mp = sys.cp; mp; mp = mp->next) { 2525 if (mp->format == FORMAT_RAW) { 2526 if (mp->width == 64) 2527 outp += sprintf(outp, "%s%18.18s", delim, mp->name); 2528 else 2529 outp += sprintf(outp, "%s%10.10s", delim, mp->name); 2530 } else { 2531 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2532 outp += sprintf(outp, "%s%8s", delim, mp->name); 2533 else 2534 outp += sprintf(outp, "%s%s", delim, mp->name); 2535 } 2536 } 2537 2538 for (pp = sys.perf_cp; pp; pp = pp->next) { 2539 2540 if (pp->format == FORMAT_RAW) { 2541 if (pp->width == 64) 2542 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); 2543 else 2544 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); 2545 } else { 2546 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2547 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); 2548 else 2549 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); 2550 } 2551 } 2552 2553 ppmt = sys.pmt_cp; 2554 while (ppmt) { 2555 switch (ppmt->type) { 2556 case PMT_TYPE_RAW: 2557 if (pmt_counter_get_width(ppmt) <= 32) 2558 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); 2559 else 2560 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); 2561 2562 break; 2563 2564 case PMT_TYPE_XTAL_TIME: 2565 case PMT_TYPE_TCORE_CLOCK: 2566 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); 2567 break; 2568 } 2569 2570 ppmt = ppmt->next; 2571 } 2572 2573 if (DO_BIC(BIC_PkgTmp)) 2574 outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : "")); 2575 2576 if (DO_BIC(BIC_GFX_rc6)) 2577 outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : "")); 2578 2579 if (DO_BIC(BIC_GFXMHz)) 2580 outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : "")); 2581 2582 if (DO_BIC(BIC_GFXACTMHz)) 2583 outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : "")); 2584 2585 if (DO_BIC(BIC_SAM_mc6)) 2586 outp += sprintf(outp, "%sSAM%%mc6", (printed++ ? delim : "")); 2587 2588 if (DO_BIC(BIC_SAMMHz)) 2589 outp += sprintf(outp, "%sSAMMHz", (printed++ ? delim : "")); 2590 2591 if (DO_BIC(BIC_SAMACTMHz)) 2592 outp += sprintf(outp, "%sSAMAMHz", (printed++ ? delim : "")); 2593 2594 if (DO_BIC(BIC_Totl_c0)) 2595 outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : "")); 2596 if (DO_BIC(BIC_Any_c0)) 2597 outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : "")); 2598 if (DO_BIC(BIC_GFX_c0)) 2599 outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : "")); 2600 if (DO_BIC(BIC_CPUGFX)) 2601 outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : "")); 2602 2603 if (DO_BIC(BIC_Pkgpc2)) 2604 outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : "")); 2605 if (DO_BIC(BIC_Pkgpc3)) 2606 outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : "")); 2607 if (DO_BIC(BIC_Pkgpc6)) 2608 outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : "")); 2609 if (DO_BIC(BIC_Pkgpc7)) 2610 outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : "")); 2611 if (DO_BIC(BIC_Pkgpc8)) 2612 outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : "")); 2613 if (DO_BIC(BIC_Pkgpc9)) 2614 outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : "")); 2615 if (DO_BIC(BIC_Pkgpc10)) 2616 outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : "")); 2617 if (DO_BIC(BIC_Diec6)) 2618 outp += sprintf(outp, "%sDie%%c6", (printed++ ? delim : "")); 2619 if (DO_BIC(BIC_CPU_LPI)) 2620 outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : "")); 2621 if (DO_BIC(BIC_SYS_LPI)) 2622 outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : "")); 2623 2624 if (platform->rapl_msrs && !rapl_joules) { 2625 if (DO_BIC(BIC_PkgWatt)) 2626 outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : "")); 2627 if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl) 2628 outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : "")); 2629 if (DO_BIC(BIC_GFXWatt)) 2630 outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : "")); 2631 if (DO_BIC(BIC_RAMWatt)) 2632 outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : "")); 2633 if (DO_BIC(BIC_PKG__)) 2634 outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : "")); 2635 if (DO_BIC(BIC_RAM__)) 2636 outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : "")); 2637 } else if (platform->rapl_msrs && rapl_joules) { 2638 if (DO_BIC(BIC_Pkg_J)) 2639 outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : "")); 2640 if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl) 2641 outp += sprintf(outp, "%sCor_J", (printed++ ? delim : "")); 2642 if (DO_BIC(BIC_GFX_J)) 2643 outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : "")); 2644 if (DO_BIC(BIC_RAM_J)) 2645 outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : "")); 2646 if (DO_BIC(BIC_PKG__)) 2647 outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : "")); 2648 if (DO_BIC(BIC_RAM__)) 2649 outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : "")); 2650 } 2651 if (DO_BIC(BIC_UNCORE_MHZ)) 2652 outp += sprintf(outp, "%sUncMHz", (printed++ ? delim : "")); 2653 2654 for (mp = sys.pp; mp; mp = mp->next) { 2655 if (mp->format == FORMAT_RAW) { 2656 if (mp->width == 64) 2657 outp += sprintf(outp, "%s%18.18s", delim, mp->name); 2658 else if (mp->width == 32) 2659 outp += sprintf(outp, "%s%10.10s", delim, mp->name); 2660 else 2661 outp += sprintf(outp, "%s%7.7s", delim, mp->name); 2662 } else { 2663 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2664 outp += sprintf(outp, "%s%8s", delim, mp->name); 2665 else 2666 outp += sprintf(outp, "%s%7.7s", delim, mp->name); 2667 } 2668 } 2669 2670 for (pp = sys.perf_pp; pp; pp = pp->next) { 2671 2672 if (pp->format == FORMAT_RAW) { 2673 if (pp->width == 64) 2674 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); 2675 else 2676 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); 2677 } else { 2678 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2679 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); 2680 else 2681 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); 2682 } 2683 } 2684 2685 ppmt = sys.pmt_pp; 2686 while (ppmt) { 2687 switch (ppmt->type) { 2688 case PMT_TYPE_RAW: 2689 if (pmt_counter_get_width(ppmt) <= 32) 2690 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); 2691 else 2692 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); 2693 2694 break; 2695 2696 case PMT_TYPE_XTAL_TIME: 2697 case PMT_TYPE_TCORE_CLOCK: 2698 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); 2699 break; 2700 } 2701 2702 ppmt = ppmt->next; 2703 } 2704 2705 if (DO_BIC(BIC_SysWatt)) 2706 outp += sprintf(outp, "%sSysWatt", (printed++ ? delim : "")); 2707 if (DO_BIC(BIC_Sys_J)) 2708 outp += sprintf(outp, "%sSys_J", (printed++ ? delim : "")); 2709 2710 outp += sprintf(outp, "\n"); 2711 } 2712 2713 int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 2714 { 2715 int i; 2716 struct msr_counter *mp; 2717 struct platform_counters *pplat_cnt = p == package_odd ? &platform_counters_odd : &platform_counters_even; 2718 2719 outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p); 2720 2721 if (t) { 2722 outp += sprintf(outp, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags); 2723 outp += sprintf(outp, "TSC: %016llX\n", t->tsc); 2724 outp += sprintf(outp, "aperf: %016llX\n", t->aperf); 2725 outp += sprintf(outp, "mperf: %016llX\n", t->mperf); 2726 outp += sprintf(outp, "c1: %016llX\n", t->c1); 2727 2728 if (DO_BIC(BIC_IPC)) 2729 outp += sprintf(outp, "IPC: %lld\n", t->instr_count); 2730 2731 if (DO_BIC(BIC_IRQ)) 2732 outp += sprintf(outp, "IRQ: %lld\n", t->irq_count); 2733 if (DO_BIC(BIC_NMI)) 2734 outp += sprintf(outp, "IRQ: %lld\n", t->nmi_count); 2735 if (DO_BIC(BIC_SMI)) 2736 outp += sprintf(outp, "SMI: %d\n", t->smi_count); 2737 2738 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 2739 outp += 2740 sprintf(outp, "tADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, 2741 t->counter[i], mp->sp->path); 2742 } 2743 } 2744 2745 if (c && is_cpu_first_thread_in_core(t, c, p)) { 2746 outp += sprintf(outp, "core: %d\n", c->core_id); 2747 outp += sprintf(outp, "c3: %016llX\n", c->c3); 2748 outp += sprintf(outp, "c6: %016llX\n", c->c6); 2749 outp += sprintf(outp, "c7: %016llX\n", c->c7); 2750 outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c); 2751 outp += sprintf(outp, "cpu_throt_count: %016llX\n", c->core_throt_cnt); 2752 2753 const unsigned long long energy_value = c->core_energy.raw_value * c->core_energy.scale; 2754 const double energy_scale = c->core_energy.scale; 2755 2756 if (c->core_energy.unit == RAPL_UNIT_JOULES) 2757 outp += sprintf(outp, "Joules: %0llX (scale: %lf)\n", energy_value, energy_scale); 2758 2759 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 2760 outp += 2761 sprintf(outp, "cADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, 2762 c->counter[i], mp->sp->path); 2763 } 2764 outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us); 2765 } 2766 2767 if (p && is_cpu_first_core_in_package(t, c, p)) { 2768 outp += sprintf(outp, "package: %d\n", p->package_id); 2769 2770 outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0); 2771 outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0); 2772 outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0); 2773 outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0); 2774 2775 outp += sprintf(outp, "pc2: %016llX\n", p->pc2); 2776 if (DO_BIC(BIC_Pkgpc3)) 2777 outp += sprintf(outp, "pc3: %016llX\n", p->pc3); 2778 if (DO_BIC(BIC_Pkgpc6)) 2779 outp += sprintf(outp, "pc6: %016llX\n", p->pc6); 2780 if (DO_BIC(BIC_Pkgpc7)) 2781 outp += sprintf(outp, "pc7: %016llX\n", p->pc7); 2782 outp += sprintf(outp, "pc8: %016llX\n", p->pc8); 2783 outp += sprintf(outp, "pc9: %016llX\n", p->pc9); 2784 outp += sprintf(outp, "pc10: %016llX\n", p->pc10); 2785 outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi); 2786 outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi); 2787 outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg.raw_value); 2788 outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores.raw_value); 2789 outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx.raw_value); 2790 outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram.raw_value); 2791 outp += sprintf(outp, "Joules PSYS: %0llX\n", pplat_cnt->energy_psys.raw_value); 2792 outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status.raw_value); 2793 outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status.raw_value); 2794 outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c); 2795 2796 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 2797 outp += 2798 sprintf(outp, "pADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, 2799 p->counter[i], mp->sp->path); 2800 } 2801 } 2802 2803 outp += sprintf(outp, "\n"); 2804 2805 return 0; 2806 } 2807 2808 double rapl_counter_get_value(const struct rapl_counter *c, enum rapl_unit desired_unit, double interval) 2809 { 2810 assert(desired_unit != RAPL_UNIT_INVALID); 2811 2812 /* 2813 * For now we don't expect anything other than joules, 2814 * so just simplify the logic. 2815 */ 2816 assert(c->unit == RAPL_UNIT_JOULES); 2817 2818 const double scaled = c->raw_value * c->scale; 2819 2820 if (desired_unit == RAPL_UNIT_WATTS) 2821 return scaled / interval; 2822 return scaled; 2823 } 2824 2825 /* 2826 * column formatting convention & formats 2827 */ 2828 int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 2829 { 2830 static int count; 2831 2832 struct platform_counters *pplat_cnt = NULL; 2833 double interval_float, tsc; 2834 char *fmt8; 2835 int i; 2836 struct msr_counter *mp; 2837 struct perf_counter_info *pp; 2838 struct pmt_counter *ppmt; 2839 char *delim = "\t"; 2840 int printed = 0; 2841 2842 if (t == &average.threads) { 2843 pplat_cnt = count & 1 ? &platform_counters_odd : &platform_counters_even; 2844 ++count; 2845 } 2846 2847 /* if showing only 1st thread in core and this isn't one, bail out */ 2848 if (show_core_only && !is_cpu_first_thread_in_core(t, c, p)) 2849 return 0; 2850 2851 /* if showing only 1st thread in pkg and this isn't one, bail out */ 2852 if (show_pkg_only && !is_cpu_first_core_in_package(t, c, p)) 2853 return 0; 2854 2855 /*if not summary line and --cpu is used */ 2856 if ((t != &average.threads) && (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset))) 2857 return 0; 2858 2859 if (DO_BIC(BIC_USEC)) { 2860 /* on each row, print how many usec each timestamp took to gather */ 2861 struct timeval tv; 2862 2863 timersub(&t->tv_end, &t->tv_begin, &tv); 2864 outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec); 2865 } 2866 2867 /* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */ 2868 if (DO_BIC(BIC_TOD)) 2869 outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec); 2870 2871 interval_float = t->tv_delta.tv_sec + t->tv_delta.tv_usec / 1000000.0; 2872 2873 tsc = t->tsc * tsc_tweak; 2874 2875 /* topo columns, print blanks on 1st (average) line */ 2876 if (t == &average.threads) { 2877 if (DO_BIC(BIC_Package)) 2878 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2879 if (DO_BIC(BIC_Die)) 2880 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2881 if (DO_BIC(BIC_Node)) 2882 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2883 if (DO_BIC(BIC_Core)) 2884 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2885 if (DO_BIC(BIC_CPU)) 2886 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2887 if (DO_BIC(BIC_APIC)) 2888 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2889 if (DO_BIC(BIC_X2APIC)) 2890 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2891 } else { 2892 if (DO_BIC(BIC_Package)) { 2893 if (p) 2894 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id); 2895 else 2896 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2897 } 2898 if (DO_BIC(BIC_Die)) { 2899 if (c) 2900 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].die_id); 2901 else 2902 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2903 } 2904 if (DO_BIC(BIC_Node)) { 2905 if (t) 2906 outp += sprintf(outp, "%s%d", 2907 (printed++ ? delim : ""), cpus[t->cpu_id].physical_node_id); 2908 else 2909 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2910 } 2911 if (DO_BIC(BIC_Core)) { 2912 if (c) 2913 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id); 2914 else 2915 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 2916 } 2917 if (DO_BIC(BIC_CPU)) 2918 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id); 2919 if (DO_BIC(BIC_APIC)) 2920 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id); 2921 if (DO_BIC(BIC_X2APIC)) 2922 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id); 2923 } 2924 2925 if (DO_BIC(BIC_Avg_MHz)) 2926 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 / units * t->aperf / interval_float); 2927 2928 if (DO_BIC(BIC_Busy)) 2929 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf / tsc); 2930 2931 if (DO_BIC(BIC_Bzy_MHz)) { 2932 if (has_base_hz) 2933 outp += 2934 sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf); 2935 else 2936 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 2937 tsc / units * t->aperf / t->mperf / interval_float); 2938 } 2939 2940 if (DO_BIC(BIC_TSC_MHz)) 2941 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc / units / interval_float); 2942 2943 if (DO_BIC(BIC_IPC)) 2944 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 1.0 * t->instr_count / t->aperf); 2945 2946 /* IRQ */ 2947 if (DO_BIC(BIC_IRQ)) { 2948 if (sums_need_wide_columns) 2949 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count); 2950 else 2951 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count); 2952 } 2953 2954 /* NMI */ 2955 if (DO_BIC(BIC_NMI)) { 2956 if (sums_need_wide_columns) 2957 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->nmi_count); 2958 else 2959 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->nmi_count); 2960 } 2961 2962 /* SMI */ 2963 if (DO_BIC(BIC_SMI)) 2964 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count); 2965 2966 /* Added counters */ 2967 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 2968 if (mp->format == FORMAT_RAW) { 2969 if (mp->width == 32) 2970 outp += 2971 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)t->counter[i]); 2972 else 2973 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]); 2974 } else if (mp->format == FORMAT_DELTA) { 2975 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 2976 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]); 2977 else 2978 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]); 2979 } else if (mp->format == FORMAT_PERCENT) { 2980 if (mp->type == COUNTER_USEC) 2981 outp += 2982 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 2983 t->counter[i] / interval_float / 10000); 2984 else 2985 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i] / tsc); 2986 } 2987 } 2988 2989 /* Added perf counters */ 2990 for (i = 0, pp = sys.perf_tp; pp; ++i, pp = pp->next) { 2991 if (pp->format == FORMAT_RAW) { 2992 if (pp->width == 32) 2993 outp += 2994 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), 2995 (unsigned int)t->perf_counter[i]); 2996 else 2997 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->perf_counter[i]); 2998 } else if (pp->format == FORMAT_DELTA) { 2999 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) 3000 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->perf_counter[i]); 3001 else 3002 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->perf_counter[i]); 3003 } else if (pp->format == FORMAT_PERCENT) { 3004 if (pp->type == COUNTER_USEC) 3005 outp += 3006 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 3007 t->perf_counter[i] / interval_float / 10000); 3008 else 3009 outp += 3010 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->perf_counter[i] / tsc); 3011 } 3012 } 3013 3014 for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { 3015 const unsigned long value_raw = t->pmt_counter[i]; 3016 double value_converted; 3017 switch (ppmt->type) { 3018 case PMT_TYPE_RAW: 3019 if (pmt_counter_get_width(ppmt) <= 32) 3020 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), 3021 (unsigned int)t->pmt_counter[i]); 3022 else 3023 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->pmt_counter[i]); 3024 3025 break; 3026 3027 case PMT_TYPE_XTAL_TIME: 3028 value_converted = 100.0 * value_raw / crystal_hz / interval_float; 3029 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); 3030 break; 3031 3032 case PMT_TYPE_TCORE_CLOCK: 3033 value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float; 3034 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); 3035 } 3036 } 3037 3038 /* C1 */ 3039 if (DO_BIC(BIC_CPU_c1)) 3040 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc); 3041 3042 /* print per-core data only for 1st thread in core */ 3043 if (!is_cpu_first_thread_in_core(t, c, p)) 3044 goto done; 3045 3046 if (DO_BIC(BIC_CPU_c3)) 3047 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3 / tsc); 3048 if (DO_BIC(BIC_CPU_c6)) 3049 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6 / tsc); 3050 if (DO_BIC(BIC_CPU_c7)) 3051 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7 / tsc); 3052 3053 /* Mod%c6 */ 3054 if (DO_BIC(BIC_Mod_c6)) 3055 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc); 3056 3057 if (DO_BIC(BIC_CoreTmp)) 3058 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c); 3059 3060 /* Core throttle count */ 3061 if (DO_BIC(BIC_CORE_THROT_CNT)) 3062 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->core_throt_cnt); 3063 3064 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 3065 if (mp->format == FORMAT_RAW) { 3066 if (mp->width == 32) 3067 outp += 3068 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)c->counter[i]); 3069 else 3070 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]); 3071 } else if (mp->format == FORMAT_DELTA) { 3072 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 3073 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]); 3074 else 3075 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]); 3076 } else if (mp->format == FORMAT_PERCENT) { 3077 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i] / tsc); 3078 } 3079 } 3080 3081 for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { 3082 if (pp->format == FORMAT_RAW) { 3083 if (pp->width == 32) 3084 outp += 3085 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), 3086 (unsigned int)c->perf_counter[i]); 3087 else 3088 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->perf_counter[i]); 3089 } else if (pp->format == FORMAT_DELTA) { 3090 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) 3091 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->perf_counter[i]); 3092 else 3093 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->perf_counter[i]); 3094 } else if (pp->format == FORMAT_PERCENT) { 3095 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->perf_counter[i] / tsc); 3096 } 3097 } 3098 3099 for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { 3100 const unsigned long value_raw = c->pmt_counter[i]; 3101 double value_converted; 3102 switch (ppmt->type) { 3103 case PMT_TYPE_RAW: 3104 if (pmt_counter_get_width(ppmt) <= 32) 3105 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), 3106 (unsigned int)c->pmt_counter[i]); 3107 else 3108 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->pmt_counter[i]); 3109 3110 break; 3111 3112 case PMT_TYPE_XTAL_TIME: 3113 value_converted = 100.0 * value_raw / crystal_hz / interval_float; 3114 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); 3115 break; 3116 3117 case PMT_TYPE_TCORE_CLOCK: 3118 value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float; 3119 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); 3120 } 3121 } 3122 3123 fmt8 = "%s%.2f"; 3124 3125 if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl) 3126 outp += 3127 sprintf(outp, fmt8, (printed++ ? delim : ""), 3128 rapl_counter_get_value(&c->core_energy, RAPL_UNIT_WATTS, interval_float)); 3129 if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl) 3130 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 3131 rapl_counter_get_value(&c->core_energy, RAPL_UNIT_JOULES, interval_float)); 3132 3133 /* print per-package data only for 1st core in package */ 3134 if (!is_cpu_first_core_in_package(t, c, p)) 3135 goto done; 3136 3137 /* PkgTmp */ 3138 if (DO_BIC(BIC_PkgTmp)) 3139 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c); 3140 3141 /* GFXrc6 */ 3142 if (DO_BIC(BIC_GFX_rc6)) { 3143 if (p->gfx_rc6_ms == -1) { /* detect GFX counter reset */ 3144 outp += sprintf(outp, "%s**.**", (printed++ ? delim : "")); 3145 } else { 3146 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 3147 p->gfx_rc6_ms / 10.0 / interval_float); 3148 } 3149 } 3150 3151 /* GFXMHz */ 3152 if (DO_BIC(BIC_GFXMHz)) 3153 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz); 3154 3155 /* GFXACTMHz */ 3156 if (DO_BIC(BIC_GFXACTMHz)) 3157 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz); 3158 3159 /* SAMmc6 */ 3160 if (DO_BIC(BIC_SAM_mc6)) { 3161 if (p->sam_mc6_ms == -1) { /* detect GFX counter reset */ 3162 outp += sprintf(outp, "%s**.**", (printed++ ? delim : "")); 3163 } else { 3164 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 3165 p->sam_mc6_ms / 10.0 / interval_float); 3166 } 3167 } 3168 3169 /* SAMMHz */ 3170 if (DO_BIC(BIC_SAMMHz)) 3171 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_mhz); 3172 3173 /* SAMACTMHz */ 3174 if (DO_BIC(BIC_SAMACTMHz)) 3175 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_act_mhz); 3176 3177 /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ 3178 if (DO_BIC(BIC_Totl_c0)) 3179 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0 / tsc); 3180 if (DO_BIC(BIC_Any_c0)) 3181 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0 / tsc); 3182 if (DO_BIC(BIC_GFX_c0)) 3183 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0 / tsc); 3184 if (DO_BIC(BIC_CPUGFX)) 3185 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0 / tsc); 3186 3187 if (DO_BIC(BIC_Pkgpc2)) 3188 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2 / tsc); 3189 if (DO_BIC(BIC_Pkgpc3)) 3190 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3 / tsc); 3191 if (DO_BIC(BIC_Pkgpc6)) 3192 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6 / tsc); 3193 if (DO_BIC(BIC_Pkgpc7)) 3194 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7 / tsc); 3195 if (DO_BIC(BIC_Pkgpc8)) 3196 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8 / tsc); 3197 if (DO_BIC(BIC_Pkgpc9)) 3198 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9 / tsc); 3199 if (DO_BIC(BIC_Pkgpc10)) 3200 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc); 3201 3202 if (DO_BIC(BIC_Diec6)) 3203 outp += 3204 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->die_c6 / crystal_hz / interval_float); 3205 3206 if (DO_BIC(BIC_CPU_LPI)) { 3207 if (p->cpu_lpi >= 0) 3208 outp += 3209 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 3210 100.0 * p->cpu_lpi / 1000000.0 / interval_float); 3211 else 3212 outp += sprintf(outp, "%s(neg)", (printed++ ? delim : "")); 3213 } 3214 if (DO_BIC(BIC_SYS_LPI)) { 3215 if (p->sys_lpi >= 0) 3216 outp += 3217 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 3218 100.0 * p->sys_lpi / 1000000.0 / interval_float); 3219 else 3220 outp += sprintf(outp, "%s(neg)", (printed++ ? delim : "")); 3221 } 3222 3223 if (DO_BIC(BIC_PkgWatt)) 3224 outp += 3225 sprintf(outp, fmt8, (printed++ ? delim : ""), 3226 rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_WATTS, interval_float)); 3227 if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl) 3228 outp += 3229 sprintf(outp, fmt8, (printed++ ? delim : ""), 3230 rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_WATTS, interval_float)); 3231 if (DO_BIC(BIC_GFXWatt)) 3232 outp += 3233 sprintf(outp, fmt8, (printed++ ? delim : ""), 3234 rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_WATTS, interval_float)); 3235 if (DO_BIC(BIC_RAMWatt)) 3236 outp += 3237 sprintf(outp, fmt8, (printed++ ? delim : ""), 3238 rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_WATTS, interval_float)); 3239 if (DO_BIC(BIC_Pkg_J)) 3240 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 3241 rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_JOULES, interval_float)); 3242 if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl) 3243 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 3244 rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_JOULES, interval_float)); 3245 if (DO_BIC(BIC_GFX_J)) 3246 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 3247 rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_JOULES, interval_float)); 3248 if (DO_BIC(BIC_RAM_J)) 3249 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 3250 rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_JOULES, interval_float)); 3251 if (DO_BIC(BIC_PKG__)) 3252 outp += 3253 sprintf(outp, fmt8, (printed++ ? delim : ""), 3254 rapl_counter_get_value(&p->rapl_pkg_perf_status, RAPL_UNIT_WATTS, interval_float)); 3255 if (DO_BIC(BIC_RAM__)) 3256 outp += 3257 sprintf(outp, fmt8, (printed++ ? delim : ""), 3258 rapl_counter_get_value(&p->rapl_dram_perf_status, RAPL_UNIT_WATTS, interval_float)); 3259 /* UncMHz */ 3260 if (DO_BIC(BIC_UNCORE_MHZ)) 3261 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->uncore_mhz); 3262 3263 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 3264 if (mp->format == FORMAT_RAW) { 3265 if (mp->width == 32) 3266 outp += 3267 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)p->counter[i]); 3268 else 3269 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]); 3270 } else if (mp->format == FORMAT_DELTA) { 3271 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) 3272 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]); 3273 else 3274 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]); 3275 } else if (mp->format == FORMAT_PERCENT) { 3276 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i] / tsc); 3277 } else if (mp->type == COUNTER_K2M) 3278 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->counter[i] / 1000); 3279 } 3280 3281 for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { 3282 if (pp->format == FORMAT_RAW) { 3283 if (pp->width == 32) 3284 outp += 3285 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), 3286 (unsigned int)p->perf_counter[i]); 3287 else 3288 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->perf_counter[i]); 3289 } else if (pp->format == FORMAT_DELTA) { 3290 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) 3291 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->perf_counter[i]); 3292 else 3293 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->perf_counter[i]); 3294 } else if (pp->format == FORMAT_PERCENT) { 3295 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->perf_counter[i] / tsc); 3296 } else if (pp->type == COUNTER_K2M) { 3297 outp += 3298 sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->perf_counter[i] / 1000); 3299 } 3300 } 3301 3302 for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { 3303 const unsigned long value_raw = p->pmt_counter[i]; 3304 double value_converted; 3305 switch (ppmt->type) { 3306 case PMT_TYPE_RAW: 3307 if (pmt_counter_get_width(ppmt) <= 32) 3308 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), 3309 (unsigned int)p->pmt_counter[i]); 3310 else 3311 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->pmt_counter[i]); 3312 3313 break; 3314 3315 case PMT_TYPE_XTAL_TIME: 3316 value_converted = 100.0 * value_raw / crystal_hz / interval_float; 3317 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); 3318 break; 3319 3320 case PMT_TYPE_TCORE_CLOCK: 3321 value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float; 3322 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); 3323 } 3324 } 3325 3326 if (DO_BIC(BIC_SysWatt) && (t == &average.threads)) 3327 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 3328 rapl_counter_get_value(&pplat_cnt->energy_psys, RAPL_UNIT_WATTS, interval_float)); 3329 if (DO_BIC(BIC_Sys_J) && (t == &average.threads)) 3330 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 3331 rapl_counter_get_value(&pplat_cnt->energy_psys, RAPL_UNIT_JOULES, interval_float)); 3332 3333 done: 3334 if (*(outp - 1) != '\n') 3335 outp += sprintf(outp, "\n"); 3336 3337 return 0; 3338 } 3339 3340 void flush_output_stdout(void) 3341 { 3342 FILE *filep; 3343 3344 if (outf == stderr) 3345 filep = stdout; 3346 else 3347 filep = outf; 3348 3349 fputs(output_buffer, filep); 3350 fflush(filep); 3351 3352 outp = output_buffer; 3353 } 3354 3355 void flush_output_stderr(void) 3356 { 3357 fputs(output_buffer, outf); 3358 fflush(outf); 3359 outp = output_buffer; 3360 } 3361 3362 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 3363 { 3364 static int count; 3365 3366 if ((!count || (header_iterations && !(count % header_iterations))) || !summary_only) 3367 print_header("\t"); 3368 3369 format_counters(&average.threads, &average.cores, &average.packages); 3370 3371 count++; 3372 3373 if (summary_only) 3374 return; 3375 3376 for_all_cpus(format_counters, t, c, p); 3377 } 3378 3379 #define DELTA_WRAP32(new, old) \ 3380 old = ((((unsigned long long)new << 32) - ((unsigned long long)old << 32)) >> 32); 3381 3382 int delta_package(struct pkg_data *new, struct pkg_data *old) 3383 { 3384 int i; 3385 struct msr_counter *mp; 3386 struct perf_counter_info *pp; 3387 struct pmt_counter *ppmt; 3388 3389 if (DO_BIC(BIC_Totl_c0)) 3390 old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0; 3391 if (DO_BIC(BIC_Any_c0)) 3392 old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0; 3393 if (DO_BIC(BIC_GFX_c0)) 3394 old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0; 3395 if (DO_BIC(BIC_CPUGFX)) 3396 old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0; 3397 3398 old->pc2 = new->pc2 - old->pc2; 3399 if (DO_BIC(BIC_Pkgpc3)) 3400 old->pc3 = new->pc3 - old->pc3; 3401 if (DO_BIC(BIC_Pkgpc6)) 3402 old->pc6 = new->pc6 - old->pc6; 3403 if (DO_BIC(BIC_Pkgpc7)) 3404 old->pc7 = new->pc7 - old->pc7; 3405 old->pc8 = new->pc8 - old->pc8; 3406 old->pc9 = new->pc9 - old->pc9; 3407 old->pc10 = new->pc10 - old->pc10; 3408 old->die_c6 = new->die_c6 - old->die_c6; 3409 old->cpu_lpi = new->cpu_lpi - old->cpu_lpi; 3410 old->sys_lpi = new->sys_lpi - old->sys_lpi; 3411 old->pkg_temp_c = new->pkg_temp_c; 3412 3413 /* flag an error when rc6 counter resets/wraps */ 3414 if (old->gfx_rc6_ms > new->gfx_rc6_ms) 3415 old->gfx_rc6_ms = -1; 3416 else 3417 old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms; 3418 3419 old->uncore_mhz = new->uncore_mhz; 3420 old->gfx_mhz = new->gfx_mhz; 3421 old->gfx_act_mhz = new->gfx_act_mhz; 3422 3423 /* flag an error when mc6 counter resets/wraps */ 3424 if (old->sam_mc6_ms > new->sam_mc6_ms) 3425 old->sam_mc6_ms = -1; 3426 else 3427 old->sam_mc6_ms = new->sam_mc6_ms - old->sam_mc6_ms; 3428 3429 old->sam_mhz = new->sam_mhz; 3430 old->sam_act_mhz = new->sam_act_mhz; 3431 3432 old->energy_pkg.raw_value = new->energy_pkg.raw_value - old->energy_pkg.raw_value; 3433 old->energy_cores.raw_value = new->energy_cores.raw_value - old->energy_cores.raw_value; 3434 old->energy_gfx.raw_value = new->energy_gfx.raw_value - old->energy_gfx.raw_value; 3435 old->energy_dram.raw_value = new->energy_dram.raw_value - old->energy_dram.raw_value; 3436 old->rapl_pkg_perf_status.raw_value = new->rapl_pkg_perf_status.raw_value - old->rapl_pkg_perf_status.raw_value; 3437 old->rapl_dram_perf_status.raw_value = 3438 new->rapl_dram_perf_status.raw_value - old->rapl_dram_perf_status.raw_value; 3439 3440 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 3441 if (mp->format == FORMAT_RAW) 3442 old->counter[i] = new->counter[i]; 3443 else if (mp->format == FORMAT_AVERAGE) 3444 old->counter[i] = new->counter[i]; 3445 else 3446 old->counter[i] = new->counter[i] - old->counter[i]; 3447 } 3448 3449 for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { 3450 if (pp->format == FORMAT_RAW) 3451 old->perf_counter[i] = new->perf_counter[i]; 3452 else if (pp->format == FORMAT_AVERAGE) 3453 old->perf_counter[i] = new->perf_counter[i]; 3454 else 3455 old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i]; 3456 } 3457 3458 for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { 3459 if (ppmt->format == FORMAT_RAW) 3460 old->pmt_counter[i] = new->pmt_counter[i]; 3461 else 3462 old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i]; 3463 } 3464 3465 return 0; 3466 } 3467 3468 void delta_core(struct core_data *new, struct core_data *old) 3469 { 3470 int i; 3471 struct msr_counter *mp; 3472 struct perf_counter_info *pp; 3473 struct pmt_counter *ppmt; 3474 3475 old->c3 = new->c3 - old->c3; 3476 old->c6 = new->c6 - old->c6; 3477 old->c7 = new->c7 - old->c7; 3478 old->core_temp_c = new->core_temp_c; 3479 old->core_throt_cnt = new->core_throt_cnt; 3480 old->mc6_us = new->mc6_us - old->mc6_us; 3481 3482 DELTA_WRAP32(new->core_energy.raw_value, old->core_energy.raw_value); 3483 3484 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 3485 if (mp->format == FORMAT_RAW) 3486 old->counter[i] = new->counter[i]; 3487 else 3488 old->counter[i] = new->counter[i] - old->counter[i]; 3489 } 3490 3491 for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { 3492 if (pp->format == FORMAT_RAW) 3493 old->perf_counter[i] = new->perf_counter[i]; 3494 else 3495 old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i]; 3496 } 3497 3498 for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { 3499 if (ppmt->format == FORMAT_RAW) 3500 old->pmt_counter[i] = new->pmt_counter[i]; 3501 else 3502 old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i]; 3503 } 3504 } 3505 3506 int soft_c1_residency_display(int bic) 3507 { 3508 if (!DO_BIC(BIC_CPU_c1) || platform->has_msr_core_c1_res) 3509 return 0; 3510 3511 return DO_BIC_READ(bic); 3512 } 3513 3514 /* 3515 * old = new - old 3516 */ 3517 int delta_thread(struct thread_data *new, struct thread_data *old, struct core_data *core_delta) 3518 { 3519 int i; 3520 struct msr_counter *mp; 3521 struct perf_counter_info *pp; 3522 struct pmt_counter *ppmt; 3523 3524 /* we run cpuid just the 1st time, copy the results */ 3525 if (DO_BIC(BIC_APIC)) 3526 new->apic_id = old->apic_id; 3527 if (DO_BIC(BIC_X2APIC)) 3528 new->x2apic_id = old->x2apic_id; 3529 3530 /* 3531 * the timestamps from start of measurement interval are in "old" 3532 * the timestamp from end of measurement interval are in "new" 3533 * over-write old w/ new so we can print end of interval values 3534 */ 3535 3536 timersub(&new->tv_begin, &old->tv_begin, &old->tv_delta); 3537 old->tv_begin = new->tv_begin; 3538 old->tv_end = new->tv_end; 3539 3540 old->tsc = new->tsc - old->tsc; 3541 3542 /* check for TSC < 1 Mcycles over interval */ 3543 if (old->tsc < (1000 * 1000)) 3544 errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n" 3545 "You can disable all c-states by booting with \"idle=poll\"\n" 3546 "or just the deep ones with \"processor.max_cstate=1\""); 3547 3548 old->c1 = new->c1 - old->c1; 3549 3550 if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC) 3551 || soft_c1_residency_display(BIC_Avg_MHz)) { 3552 if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { 3553 old->aperf = new->aperf - old->aperf; 3554 old->mperf = new->mperf - old->mperf; 3555 } else { 3556 return -1; 3557 } 3558 } 3559 3560 if (platform->has_msr_core_c1_res) { 3561 /* 3562 * Some models have a dedicated C1 residency MSR, 3563 * which should be more accurate than the derivation below. 3564 */ 3565 } else { 3566 /* 3567 * As counter collection is not atomic, 3568 * it is possible for mperf's non-halted cycles + idle states 3569 * to exceed TSC's all cycles: show c1 = 0% in that case. 3570 */ 3571 if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak)) 3572 old->c1 = 0; 3573 else { 3574 /* normal case, derive c1 */ 3575 old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3 3576 - core_delta->c6 - core_delta->c7; 3577 } 3578 } 3579 3580 if (old->mperf == 0) { 3581 if (debug > 1) 3582 fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id); 3583 old->mperf = 1; /* divide by 0 protection */ 3584 } 3585 3586 if (DO_BIC(BIC_IPC)) 3587 old->instr_count = new->instr_count - old->instr_count; 3588 3589 if (DO_BIC(BIC_IRQ)) 3590 old->irq_count = new->irq_count - old->irq_count; 3591 3592 if (DO_BIC(BIC_NMI)) 3593 old->nmi_count = new->nmi_count - old->nmi_count; 3594 3595 if (DO_BIC(BIC_SMI)) 3596 old->smi_count = new->smi_count - old->smi_count; 3597 3598 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 3599 if (mp->format == FORMAT_RAW) 3600 old->counter[i] = new->counter[i]; 3601 else 3602 old->counter[i] = new->counter[i] - old->counter[i]; 3603 } 3604 3605 for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) { 3606 if (pp->format == FORMAT_RAW) 3607 old->perf_counter[i] = new->perf_counter[i]; 3608 else 3609 old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i]; 3610 } 3611 3612 for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { 3613 if (ppmt->format == FORMAT_RAW) 3614 old->pmt_counter[i] = new->pmt_counter[i]; 3615 else 3616 old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i]; 3617 } 3618 3619 return 0; 3620 } 3621 3622 int delta_cpu(struct thread_data *t, struct core_data *c, 3623 struct pkg_data *p, struct thread_data *t2, struct core_data *c2, struct pkg_data *p2) 3624 { 3625 int retval = 0; 3626 3627 /* calculate core delta only for 1st thread in core */ 3628 if (is_cpu_first_thread_in_core(t, c, p)) 3629 delta_core(c, c2); 3630 3631 /* always calculate thread delta */ 3632 retval = delta_thread(t, t2, c2); /* c2 is core delta */ 3633 3634 /* calculate package delta only for 1st core in package */ 3635 if (is_cpu_first_core_in_package(t, c, p)) 3636 retval |= delta_package(p, p2); 3637 3638 return retval; 3639 } 3640 3641 void delta_platform(struct platform_counters *new, struct platform_counters *old) 3642 { 3643 old->energy_psys.raw_value = new->energy_psys.raw_value - old->energy_psys.raw_value; 3644 } 3645 3646 void rapl_counter_clear(struct rapl_counter *c) 3647 { 3648 c->raw_value = 0; 3649 c->scale = 0.0; 3650 c->unit = RAPL_UNIT_INVALID; 3651 } 3652 3653 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 3654 { 3655 int i; 3656 struct msr_counter *mp; 3657 3658 t->tv_begin.tv_sec = 0; 3659 t->tv_begin.tv_usec = 0; 3660 t->tv_end.tv_sec = 0; 3661 t->tv_end.tv_usec = 0; 3662 t->tv_delta.tv_sec = 0; 3663 t->tv_delta.tv_usec = 0; 3664 3665 t->tsc = 0; 3666 t->aperf = 0; 3667 t->mperf = 0; 3668 t->c1 = 0; 3669 3670 t->instr_count = 0; 3671 3672 t->irq_count = 0; 3673 t->nmi_count = 0; 3674 t->smi_count = 0; 3675 3676 c->c3 = 0; 3677 c->c6 = 0; 3678 c->c7 = 0; 3679 c->mc6_us = 0; 3680 c->core_temp_c = 0; 3681 rapl_counter_clear(&c->core_energy); 3682 c->core_throt_cnt = 0; 3683 3684 p->pkg_wtd_core_c0 = 0; 3685 p->pkg_any_core_c0 = 0; 3686 p->pkg_any_gfxe_c0 = 0; 3687 p->pkg_both_core_gfxe_c0 = 0; 3688 3689 p->pc2 = 0; 3690 if (DO_BIC(BIC_Pkgpc3)) 3691 p->pc3 = 0; 3692 if (DO_BIC(BIC_Pkgpc6)) 3693 p->pc6 = 0; 3694 if (DO_BIC(BIC_Pkgpc7)) 3695 p->pc7 = 0; 3696 p->pc8 = 0; 3697 p->pc9 = 0; 3698 p->pc10 = 0; 3699 p->die_c6 = 0; 3700 p->cpu_lpi = 0; 3701 p->sys_lpi = 0; 3702 3703 rapl_counter_clear(&p->energy_pkg); 3704 rapl_counter_clear(&p->energy_dram); 3705 rapl_counter_clear(&p->energy_cores); 3706 rapl_counter_clear(&p->energy_gfx); 3707 rapl_counter_clear(&p->rapl_pkg_perf_status); 3708 rapl_counter_clear(&p->rapl_dram_perf_status); 3709 p->pkg_temp_c = 0; 3710 3711 p->gfx_rc6_ms = 0; 3712 p->uncore_mhz = 0; 3713 p->gfx_mhz = 0; 3714 p->gfx_act_mhz = 0; 3715 p->sam_mc6_ms = 0; 3716 p->sam_mhz = 0; 3717 p->sam_act_mhz = 0; 3718 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) 3719 t->counter[i] = 0; 3720 3721 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) 3722 c->counter[i] = 0; 3723 3724 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) 3725 p->counter[i] = 0; 3726 3727 memset(&t->perf_counter[0], 0, sizeof(t->perf_counter)); 3728 memset(&c->perf_counter[0], 0, sizeof(c->perf_counter)); 3729 memset(&p->perf_counter[0], 0, sizeof(p->perf_counter)); 3730 3731 memset(&t->pmt_counter[0], 0, ARRAY_SIZE(t->pmt_counter)); 3732 memset(&c->pmt_counter[0], 0, ARRAY_SIZE(c->pmt_counter)); 3733 memset(&p->pmt_counter[0], 0, ARRAY_SIZE(p->pmt_counter)); 3734 } 3735 3736 void rapl_counter_accumulate(struct rapl_counter *dst, const struct rapl_counter *src) 3737 { 3738 /* Copy unit and scale from src if dst is not initialized */ 3739 if (dst->unit == RAPL_UNIT_INVALID) { 3740 dst->unit = src->unit; 3741 dst->scale = src->scale; 3742 } 3743 3744 assert(dst->unit == src->unit); 3745 assert(dst->scale == src->scale); 3746 3747 dst->raw_value += src->raw_value; 3748 } 3749 3750 int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 3751 { 3752 int i; 3753 struct msr_counter *mp; 3754 struct perf_counter_info *pp; 3755 struct pmt_counter *ppmt; 3756 3757 /* copy un-changing apic_id's */ 3758 if (DO_BIC(BIC_APIC)) 3759 average.threads.apic_id = t->apic_id; 3760 if (DO_BIC(BIC_X2APIC)) 3761 average.threads.x2apic_id = t->x2apic_id; 3762 3763 /* remember first tv_begin */ 3764 if (average.threads.tv_begin.tv_sec == 0) 3765 average.threads.tv_begin = procsysfs_tv_begin; 3766 3767 /* remember last tv_end */ 3768 average.threads.tv_end = t->tv_end; 3769 3770 average.threads.tsc += t->tsc; 3771 average.threads.aperf += t->aperf; 3772 average.threads.mperf += t->mperf; 3773 average.threads.c1 += t->c1; 3774 3775 average.threads.instr_count += t->instr_count; 3776 3777 average.threads.irq_count += t->irq_count; 3778 average.threads.nmi_count += t->nmi_count; 3779 average.threads.smi_count += t->smi_count; 3780 3781 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 3782 if (mp->format == FORMAT_RAW) 3783 continue; 3784 average.threads.counter[i] += t->counter[i]; 3785 } 3786 3787 for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) { 3788 if (pp->format == FORMAT_RAW) 3789 continue; 3790 average.threads.perf_counter[i] += t->perf_counter[i]; 3791 } 3792 3793 for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { 3794 average.threads.pmt_counter[i] += t->pmt_counter[i]; 3795 } 3796 3797 /* sum per-core values only for 1st thread in core */ 3798 if (!is_cpu_first_thread_in_core(t, c, p)) 3799 return 0; 3800 3801 average.cores.c3 += c->c3; 3802 average.cores.c6 += c->c6; 3803 average.cores.c7 += c->c7; 3804 average.cores.mc6_us += c->mc6_us; 3805 3806 average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c); 3807 average.cores.core_throt_cnt = MAX(average.cores.core_throt_cnt, c->core_throt_cnt); 3808 3809 rapl_counter_accumulate(&average.cores.core_energy, &c->core_energy); 3810 3811 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 3812 if (mp->format == FORMAT_RAW) 3813 continue; 3814 average.cores.counter[i] += c->counter[i]; 3815 } 3816 3817 for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { 3818 if (pp->format == FORMAT_RAW) 3819 continue; 3820 average.cores.perf_counter[i] += c->perf_counter[i]; 3821 } 3822 3823 for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { 3824 average.cores.pmt_counter[i] += c->pmt_counter[i]; 3825 } 3826 3827 /* sum per-pkg values only for 1st core in pkg */ 3828 if (!is_cpu_first_core_in_package(t, c, p)) 3829 return 0; 3830 3831 if (DO_BIC(BIC_Totl_c0)) 3832 average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0; 3833 if (DO_BIC(BIC_Any_c0)) 3834 average.packages.pkg_any_core_c0 += p->pkg_any_core_c0; 3835 if (DO_BIC(BIC_GFX_c0)) 3836 average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0; 3837 if (DO_BIC(BIC_CPUGFX)) 3838 average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0; 3839 3840 average.packages.pc2 += p->pc2; 3841 if (DO_BIC(BIC_Pkgpc3)) 3842 average.packages.pc3 += p->pc3; 3843 if (DO_BIC(BIC_Pkgpc6)) 3844 average.packages.pc6 += p->pc6; 3845 if (DO_BIC(BIC_Pkgpc7)) 3846 average.packages.pc7 += p->pc7; 3847 average.packages.pc8 += p->pc8; 3848 average.packages.pc9 += p->pc9; 3849 average.packages.pc10 += p->pc10; 3850 average.packages.die_c6 += p->die_c6; 3851 3852 average.packages.cpu_lpi = p->cpu_lpi; 3853 average.packages.sys_lpi = p->sys_lpi; 3854 3855 rapl_counter_accumulate(&average.packages.energy_pkg, &p->energy_pkg); 3856 rapl_counter_accumulate(&average.packages.energy_dram, &p->energy_dram); 3857 rapl_counter_accumulate(&average.packages.energy_cores, &p->energy_cores); 3858 rapl_counter_accumulate(&average.packages.energy_gfx, &p->energy_gfx); 3859 3860 average.packages.gfx_rc6_ms = p->gfx_rc6_ms; 3861 average.packages.uncore_mhz = p->uncore_mhz; 3862 average.packages.gfx_mhz = p->gfx_mhz; 3863 average.packages.gfx_act_mhz = p->gfx_act_mhz; 3864 average.packages.sam_mc6_ms = p->sam_mc6_ms; 3865 average.packages.sam_mhz = p->sam_mhz; 3866 average.packages.sam_act_mhz = p->sam_act_mhz; 3867 3868 average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c); 3869 3870 rapl_counter_accumulate(&average.packages.rapl_pkg_perf_status, &p->rapl_pkg_perf_status); 3871 rapl_counter_accumulate(&average.packages.rapl_dram_perf_status, &p->rapl_dram_perf_status); 3872 3873 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 3874 if ((mp->format == FORMAT_RAW) && (topo.num_packages == 0)) 3875 average.packages.counter[i] = p->counter[i]; 3876 else 3877 average.packages.counter[i] += p->counter[i]; 3878 } 3879 3880 for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { 3881 if ((pp->format == FORMAT_RAW) && (topo.num_packages == 0)) 3882 average.packages.perf_counter[i] = p->perf_counter[i]; 3883 else 3884 average.packages.perf_counter[i] += p->perf_counter[i]; 3885 } 3886 3887 for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { 3888 average.packages.pmt_counter[i] += p->pmt_counter[i]; 3889 } 3890 3891 return 0; 3892 } 3893 3894 /* 3895 * sum the counters for all cpus in the system 3896 * compute the weighted average 3897 */ 3898 void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data *p) 3899 { 3900 int i; 3901 struct msr_counter *mp; 3902 struct perf_counter_info *pp; 3903 struct pmt_counter *ppmt; 3904 3905 clear_counters(&average.threads, &average.cores, &average.packages); 3906 3907 for_all_cpus(sum_counters, t, c, p); 3908 3909 /* Use the global time delta for the average. */ 3910 average.threads.tv_delta = tv_delta; 3911 3912 average.threads.tsc /= topo.allowed_cpus; 3913 average.threads.aperf /= topo.allowed_cpus; 3914 average.threads.mperf /= topo.allowed_cpus; 3915 average.threads.instr_count /= topo.allowed_cpus; 3916 average.threads.c1 /= topo.allowed_cpus; 3917 3918 if (average.threads.irq_count > 9999999) 3919 sums_need_wide_columns = 1; 3920 if (average.threads.nmi_count > 9999999) 3921 sums_need_wide_columns = 1; 3922 3923 3924 average.cores.c3 /= topo.allowed_cores; 3925 average.cores.c6 /= topo.allowed_cores; 3926 average.cores.c7 /= topo.allowed_cores; 3927 average.cores.mc6_us /= topo.allowed_cores; 3928 3929 if (DO_BIC(BIC_Totl_c0)) 3930 average.packages.pkg_wtd_core_c0 /= topo.allowed_packages; 3931 if (DO_BIC(BIC_Any_c0)) 3932 average.packages.pkg_any_core_c0 /= topo.allowed_packages; 3933 if (DO_BIC(BIC_GFX_c0)) 3934 average.packages.pkg_any_gfxe_c0 /= topo.allowed_packages; 3935 if (DO_BIC(BIC_CPUGFX)) 3936 average.packages.pkg_both_core_gfxe_c0 /= topo.allowed_packages; 3937 3938 average.packages.pc2 /= topo.allowed_packages; 3939 if (DO_BIC(BIC_Pkgpc3)) 3940 average.packages.pc3 /= topo.allowed_packages; 3941 if (DO_BIC(BIC_Pkgpc6)) 3942 average.packages.pc6 /= topo.allowed_packages; 3943 if (DO_BIC(BIC_Pkgpc7)) 3944 average.packages.pc7 /= topo.allowed_packages; 3945 3946 average.packages.pc8 /= topo.allowed_packages; 3947 average.packages.pc9 /= topo.allowed_packages; 3948 average.packages.pc10 /= topo.allowed_packages; 3949 average.packages.die_c6 /= topo.allowed_packages; 3950 3951 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 3952 if (mp->format == FORMAT_RAW) 3953 continue; 3954 if (mp->type == COUNTER_ITEMS) { 3955 if (average.threads.counter[i] > 9999999) 3956 sums_need_wide_columns = 1; 3957 continue; 3958 } 3959 average.threads.counter[i] /= topo.allowed_cpus; 3960 } 3961 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 3962 if (mp->format == FORMAT_RAW) 3963 continue; 3964 if (mp->type == COUNTER_ITEMS) { 3965 if (average.cores.counter[i] > 9999999) 3966 sums_need_wide_columns = 1; 3967 } 3968 average.cores.counter[i] /= topo.allowed_cores; 3969 } 3970 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 3971 if (mp->format == FORMAT_RAW) 3972 continue; 3973 if (mp->type == COUNTER_ITEMS) { 3974 if (average.packages.counter[i] > 9999999) 3975 sums_need_wide_columns = 1; 3976 } 3977 average.packages.counter[i] /= topo.allowed_packages; 3978 } 3979 3980 for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) { 3981 if (pp->format == FORMAT_RAW) 3982 continue; 3983 if (pp->type == COUNTER_ITEMS) { 3984 if (average.threads.perf_counter[i] > 9999999) 3985 sums_need_wide_columns = 1; 3986 continue; 3987 } 3988 average.threads.perf_counter[i] /= topo.allowed_cpus; 3989 } 3990 for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { 3991 if (pp->format == FORMAT_RAW) 3992 continue; 3993 if (pp->type == COUNTER_ITEMS) { 3994 if (average.cores.perf_counter[i] > 9999999) 3995 sums_need_wide_columns = 1; 3996 } 3997 average.cores.perf_counter[i] /= topo.allowed_cores; 3998 } 3999 for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { 4000 if (pp->format == FORMAT_RAW) 4001 continue; 4002 if (pp->type == COUNTER_ITEMS) { 4003 if (average.packages.perf_counter[i] > 9999999) 4004 sums_need_wide_columns = 1; 4005 } 4006 average.packages.perf_counter[i] /= topo.allowed_packages; 4007 } 4008 4009 for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { 4010 average.threads.pmt_counter[i] /= topo.allowed_cpus; 4011 } 4012 for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { 4013 average.cores.pmt_counter[i] /= topo.allowed_cores; 4014 } 4015 for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { 4016 average.packages.pmt_counter[i] /= topo.allowed_packages; 4017 } 4018 } 4019 4020 static unsigned long long rdtsc(void) 4021 { 4022 unsigned int low, high; 4023 4024 asm volatile ("rdtsc":"=a" (low), "=d"(high)); 4025 4026 return low | ((unsigned long long)high) << 32; 4027 } 4028 4029 /* 4030 * Open a file, and exit on failure 4031 */ 4032 FILE *fopen_or_die(const char *path, const char *mode) 4033 { 4034 FILE *filep = fopen(path, mode); 4035 4036 if (!filep) 4037 err(1, "%s: open failed", path); 4038 return filep; 4039 } 4040 4041 /* 4042 * snapshot_sysfs_counter() 4043 * 4044 * return snapshot of given counter 4045 */ 4046 unsigned long long snapshot_sysfs_counter(char *path) 4047 { 4048 FILE *fp; 4049 int retval; 4050 unsigned long long counter; 4051 4052 fp = fopen_or_die(path, "r"); 4053 4054 retval = fscanf(fp, "%lld", &counter); 4055 if (retval != 1) 4056 err(1, "snapshot_sysfs_counter(%s)", path); 4057 4058 fclose(fp); 4059 4060 return counter; 4061 } 4062 4063 int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp, char *counter_path) 4064 { 4065 if (mp->msr_num != 0) { 4066 assert(!no_msr); 4067 if (get_msr(cpu, mp->msr_num, counterp)) 4068 return -1; 4069 } else { 4070 char path[128 + PATH_BYTES]; 4071 4072 if (mp->flags & SYSFS_PERCPU) { 4073 sprintf(path, "/sys/devices/system/cpu/cpu%d/%s", cpu, mp->sp->path); 4074 4075 *counterp = snapshot_sysfs_counter(path); 4076 } else { 4077 *counterp = snapshot_sysfs_counter(counter_path); 4078 } 4079 } 4080 4081 return 0; 4082 } 4083 4084 unsigned long long get_legacy_uncore_mhz(int package) 4085 { 4086 char path[128]; 4087 int die; 4088 static int warn_once; 4089 4090 /* 4091 * for this package, use the first die_id that exists 4092 */ 4093 for (die = 0; die <= topo.max_die_id; ++die) { 4094 4095 sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d/current_freq_khz", 4096 package, die); 4097 4098 if (access(path, R_OK) == 0) 4099 return (snapshot_sysfs_counter(path) / 1000); 4100 } 4101 if (!warn_once) { 4102 warnx("BUG: %s: No %s", __func__, path); 4103 warn_once = 1; 4104 } 4105 4106 return 0; 4107 } 4108 4109 int get_epb(int cpu) 4110 { 4111 char path[128 + PATH_BYTES]; 4112 unsigned long long msr; 4113 int ret, epb = -1; 4114 FILE *fp; 4115 4116 sprintf(path, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", cpu); 4117 4118 fp = fopen(path, "r"); 4119 if (!fp) 4120 goto msr_fallback; 4121 4122 ret = fscanf(fp, "%d", &epb); 4123 if (ret != 1) 4124 err(1, "%s(%s)", __func__, path); 4125 4126 fclose(fp); 4127 4128 return epb; 4129 4130 msr_fallback: 4131 if (no_msr) 4132 return -1; 4133 4134 get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr); 4135 4136 return msr & 0xf; 4137 } 4138 4139 void get_apic_id(struct thread_data *t) 4140 { 4141 unsigned int eax, ebx, ecx, edx; 4142 4143 if (DO_BIC(BIC_APIC)) { 4144 eax = ebx = ecx = edx = 0; 4145 __cpuid(1, eax, ebx, ecx, edx); 4146 4147 t->apic_id = (ebx >> 24) & 0xff; 4148 } 4149 4150 if (!DO_BIC(BIC_X2APIC)) 4151 return; 4152 4153 if (authentic_amd || hygon_genuine) { 4154 unsigned int topology_extensions; 4155 4156 if (max_extended_level < 0x8000001e) 4157 return; 4158 4159 eax = ebx = ecx = edx = 0; 4160 __cpuid(0x80000001, eax, ebx, ecx, edx); 4161 topology_extensions = ecx & (1 << 22); 4162 4163 if (topology_extensions == 0) 4164 return; 4165 4166 eax = ebx = ecx = edx = 0; 4167 __cpuid(0x8000001e, eax, ebx, ecx, edx); 4168 4169 t->x2apic_id = eax; 4170 return; 4171 } 4172 4173 if (!genuine_intel) 4174 return; 4175 4176 if (max_level < 0xb) 4177 return; 4178 4179 ecx = 0; 4180 __cpuid(0xb, eax, ebx, ecx, edx); 4181 t->x2apic_id = edx; 4182 4183 if (debug && (t->apic_id != (t->x2apic_id & 0xff))) 4184 fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id); 4185 } 4186 4187 int get_core_throt_cnt(int cpu, unsigned long long *cnt) 4188 { 4189 char path[128 + PATH_BYTES]; 4190 unsigned long long tmp; 4191 FILE *fp; 4192 int ret; 4193 4194 sprintf(path, "/sys/devices/system/cpu/cpu%d/thermal_throttle/core_throttle_count", cpu); 4195 fp = fopen(path, "r"); 4196 if (!fp) 4197 return -1; 4198 ret = fscanf(fp, "%lld", &tmp); 4199 fclose(fp); 4200 if (ret != 1) 4201 return -1; 4202 *cnt = tmp; 4203 4204 return 0; 4205 } 4206 4207 struct amperf_group_fd { 4208 int aperf; /* Also the group descriptor */ 4209 int mperf; 4210 }; 4211 4212 static int read_perf_counter_info(const char *const path, const char *const parse_format, void *value_ptr) 4213 { 4214 int fdmt; 4215 int bytes_read; 4216 char buf[64]; 4217 int ret = -1; 4218 4219 fdmt = open(path, O_RDONLY, 0); 4220 if (fdmt == -1) { 4221 if (debug) 4222 fprintf(stderr, "Failed to parse perf counter info %s\n", path); 4223 ret = -1; 4224 goto cleanup_and_exit; 4225 } 4226 4227 bytes_read = read(fdmt, buf, sizeof(buf) - 1); 4228 if (bytes_read <= 0 || bytes_read >= (int)sizeof(buf)) { 4229 if (debug) 4230 fprintf(stderr, "Failed to parse perf counter info %s\n", path); 4231 ret = -1; 4232 goto cleanup_and_exit; 4233 } 4234 4235 buf[bytes_read] = '\0'; 4236 4237 if (sscanf(buf, parse_format, value_ptr) != 1) { 4238 if (debug) 4239 fprintf(stderr, "Failed to parse perf counter info %s\n", path); 4240 ret = -1; 4241 goto cleanup_and_exit; 4242 } 4243 4244 ret = 0; 4245 4246 cleanup_and_exit: 4247 close(fdmt); 4248 return ret; 4249 } 4250 4251 static unsigned int read_perf_counter_info_n(const char *const path, const char *const parse_format) 4252 { 4253 unsigned int v; 4254 int status; 4255 4256 status = read_perf_counter_info(path, parse_format, &v); 4257 if (status) 4258 v = -1; 4259 4260 return v; 4261 } 4262 4263 static unsigned int read_perf_type(const char *subsys) 4264 { 4265 const char *const path_format = "/sys/bus/event_source/devices/%s/type"; 4266 const char *const format = "%u"; 4267 char path[128]; 4268 4269 snprintf(path, sizeof(path), path_format, subsys); 4270 4271 return read_perf_counter_info_n(path, format); 4272 } 4273 4274 static unsigned int read_perf_config(const char *subsys, const char *event_name) 4275 { 4276 const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s"; 4277 FILE *fconfig = NULL; 4278 char path[128]; 4279 char config_str[64]; 4280 unsigned int config; 4281 unsigned int umask; 4282 bool has_config = false; 4283 bool has_umask = false; 4284 unsigned int ret = -1; 4285 4286 snprintf(path, sizeof(path), path_format, subsys, event_name); 4287 4288 fconfig = fopen(path, "r"); 4289 if (!fconfig) 4290 return -1; 4291 4292 if (fgets(config_str, ARRAY_SIZE(config_str), fconfig) != config_str) 4293 goto cleanup_and_exit; 4294 4295 for (char *pconfig_str = &config_str[0]; pconfig_str;) { 4296 if (sscanf(pconfig_str, "event=%x", &config) == 1) { 4297 has_config = true; 4298 goto next; 4299 } 4300 4301 if (sscanf(pconfig_str, "umask=%x", &umask) == 1) { 4302 has_umask = true; 4303 goto next; 4304 } 4305 4306 next: 4307 pconfig_str = strchr(pconfig_str, ','); 4308 if (pconfig_str) { 4309 *pconfig_str = '\0'; 4310 ++pconfig_str; 4311 } 4312 } 4313 4314 if (!has_umask) 4315 umask = 0; 4316 4317 if (has_config) 4318 ret = (umask << 8) | config; 4319 4320 cleanup_and_exit: 4321 fclose(fconfig); 4322 return ret; 4323 } 4324 4325 static unsigned int read_perf_rapl_unit(const char *subsys, const char *event_name) 4326 { 4327 const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.unit"; 4328 const char *const format = "%s"; 4329 char path[128]; 4330 char unit_buffer[16]; 4331 4332 snprintf(path, sizeof(path), path_format, subsys, event_name); 4333 4334 read_perf_counter_info(path, format, &unit_buffer); 4335 if (strcmp("Joules", unit_buffer) == 0) 4336 return RAPL_UNIT_JOULES; 4337 4338 return RAPL_UNIT_INVALID; 4339 } 4340 4341 static double read_perf_scale(const char *subsys, const char *event_name) 4342 { 4343 const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.scale"; 4344 const char *const format = "%lf"; 4345 char path[128]; 4346 double scale; 4347 4348 snprintf(path, sizeof(path), path_format, subsys, event_name); 4349 4350 if (read_perf_counter_info(path, format, &scale)) 4351 return 0.0; 4352 4353 return scale; 4354 } 4355 4356 size_t rapl_counter_info_count_perf(const struct rapl_counter_info_t *rci) 4357 { 4358 size_t ret = 0; 4359 4360 for (int i = 0; i < NUM_RAPL_COUNTERS; ++i) 4361 if (rci->source[i] == COUNTER_SOURCE_PERF) 4362 ++ret; 4363 4364 return ret; 4365 } 4366 4367 static size_t cstate_counter_info_count_perf(const struct cstate_counter_info_t *cci) 4368 { 4369 size_t ret = 0; 4370 4371 for (int i = 0; i < NUM_CSTATE_COUNTERS; ++i) 4372 if (cci->source[i] == COUNTER_SOURCE_PERF) 4373 ++ret; 4374 4375 return ret; 4376 } 4377 4378 void write_rapl_counter(struct rapl_counter *rc, struct rapl_counter_info_t *rci, unsigned int idx) 4379 { 4380 if (rci->source[idx] == COUNTER_SOURCE_NONE) 4381 return; 4382 4383 rc->raw_value = rci->data[idx]; 4384 rc->unit = rci->unit[idx]; 4385 rc->scale = rci->scale[idx]; 4386 } 4387 4388 int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct pkg_data *p) 4389 { 4390 struct platform_counters *pplat_cnt = p == package_odd ? &platform_counters_odd : &platform_counters_even; 4391 unsigned long long perf_data[NUM_RAPL_COUNTERS + 1]; 4392 struct rapl_counter_info_t *rci; 4393 4394 if (debug >= 2) 4395 fprintf(stderr, "%s: cpu%d domain%d\n", __func__, cpu, domain); 4396 4397 assert(rapl_counter_info_perdomain); 4398 assert(domain < rapl_counter_info_perdomain_size); 4399 4400 rci = &rapl_counter_info_perdomain[domain]; 4401 4402 /* 4403 * If we have any perf counters to read, read them all now, in bulk 4404 */ 4405 if (rci->fd_perf != -1) { 4406 size_t num_perf_counters = rapl_counter_info_count_perf(rci); 4407 const ssize_t expected_read_size = (num_perf_counters + 1) * sizeof(unsigned long long); 4408 const ssize_t actual_read_size = read(rci->fd_perf, &perf_data[0], sizeof(perf_data)); 4409 4410 if (actual_read_size != expected_read_size) 4411 err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, 4412 actual_read_size); 4413 } 4414 4415 for (unsigned int i = 0, pi = 1; i < NUM_RAPL_COUNTERS; ++i) { 4416 switch (rci->source[i]) { 4417 case COUNTER_SOURCE_NONE: 4418 rci->data[i] = 0; 4419 break; 4420 4421 case COUNTER_SOURCE_PERF: 4422 assert(pi < ARRAY_SIZE(perf_data)); 4423 assert(rci->fd_perf != -1); 4424 4425 if (debug >= 2) 4426 fprintf(stderr, "Reading rapl counter via perf at %u (%llu %e %lf)\n", 4427 i, perf_data[pi], rci->scale[i], perf_data[pi] * rci->scale[i]); 4428 4429 rci->data[i] = perf_data[pi]; 4430 4431 ++pi; 4432 break; 4433 4434 case COUNTER_SOURCE_MSR: 4435 if (debug >= 2) 4436 fprintf(stderr, "Reading rapl counter via msr at %u\n", i); 4437 4438 assert(!no_msr); 4439 if (rci->flags[i] & RAPL_COUNTER_FLAG_USE_MSR_SUM) { 4440 if (get_msr_sum(cpu, rci->msr[i], &rci->data[i])) 4441 return -13 - i; 4442 } else { 4443 if (get_msr(cpu, rci->msr[i], &rci->data[i])) 4444 return -13 - i; 4445 } 4446 4447 rci->data[i] &= rci->msr_mask[i]; 4448 if (rci->msr_shift[i] >= 0) 4449 rci->data[i] >>= abs(rci->msr_shift[i]); 4450 else 4451 rci->data[i] <<= abs(rci->msr_shift[i]); 4452 4453 break; 4454 } 4455 } 4456 4457 BUILD_BUG_ON(NUM_RAPL_COUNTERS != 8); 4458 write_rapl_counter(&p->energy_pkg, rci, RAPL_RCI_INDEX_ENERGY_PKG); 4459 write_rapl_counter(&p->energy_cores, rci, RAPL_RCI_INDEX_ENERGY_CORES); 4460 write_rapl_counter(&p->energy_dram, rci, RAPL_RCI_INDEX_DRAM); 4461 write_rapl_counter(&p->energy_gfx, rci, RAPL_RCI_INDEX_GFX); 4462 write_rapl_counter(&p->rapl_pkg_perf_status, rci, RAPL_RCI_INDEX_PKG_PERF_STATUS); 4463 write_rapl_counter(&p->rapl_dram_perf_status, rci, RAPL_RCI_INDEX_DRAM_PERF_STATUS); 4464 write_rapl_counter(&c->core_energy, rci, RAPL_RCI_INDEX_CORE_ENERGY); 4465 write_rapl_counter(&pplat_cnt->energy_psys, rci, RAPL_RCI_INDEX_ENERGY_PLATFORM); 4466 4467 return 0; 4468 } 4469 4470 char *find_sysfs_path_by_id(struct sysfs_path *sp, int id) 4471 { 4472 while (sp) { 4473 if (sp->id == id) 4474 return (sp->path); 4475 sp = sp->next; 4476 } 4477 if (debug) 4478 warnx("%s: id%d not found", __func__, id); 4479 return NULL; 4480 } 4481 4482 int get_cstate_counters(unsigned int cpu, struct thread_data *t, struct core_data *c, struct pkg_data *p) 4483 { 4484 /* 4485 * Overcommit memory a little bit here, 4486 * but skip calculating exact sizes for the buffers. 4487 */ 4488 unsigned long long perf_data[NUM_CSTATE_COUNTERS]; 4489 unsigned long long perf_data_core[NUM_CSTATE_COUNTERS + 1]; 4490 unsigned long long perf_data_pkg[NUM_CSTATE_COUNTERS + 1]; 4491 4492 struct cstate_counter_info_t *cci; 4493 4494 if (debug >= 2) 4495 fprintf(stderr, "%s: cpu%d\n", __func__, cpu); 4496 4497 assert(ccstate_counter_info); 4498 assert(cpu <= ccstate_counter_info_size); 4499 4500 ZERO_ARRAY(perf_data); 4501 ZERO_ARRAY(perf_data_core); 4502 ZERO_ARRAY(perf_data_pkg); 4503 4504 cci = &ccstate_counter_info[cpu]; 4505 4506 /* 4507 * If we have any perf counters to read, read them all now, in bulk 4508 */ 4509 const size_t num_perf_counters = cstate_counter_info_count_perf(cci); 4510 ssize_t expected_read_size = num_perf_counters * sizeof(unsigned long long); 4511 ssize_t actual_read_size_core = 0, actual_read_size_pkg = 0; 4512 4513 if (cci->fd_perf_core != -1) { 4514 /* Each descriptor read begins with number of counters read. */ 4515 expected_read_size += sizeof(unsigned long long); 4516 4517 actual_read_size_core = read(cci->fd_perf_core, &perf_data_core[0], sizeof(perf_data_core)); 4518 4519 if (actual_read_size_core <= 0) 4520 err(-1, "%s: read perf %s: %ld", __func__, "core", actual_read_size_core); 4521 } 4522 4523 if (cci->fd_perf_pkg != -1) { 4524 /* Each descriptor read begins with number of counters read. */ 4525 expected_read_size += sizeof(unsigned long long); 4526 4527 actual_read_size_pkg = read(cci->fd_perf_pkg, &perf_data_pkg[0], sizeof(perf_data_pkg)); 4528 4529 if (actual_read_size_pkg <= 0) 4530 err(-1, "%s: read perf %s: %ld", __func__, "pkg", actual_read_size_pkg); 4531 } 4532 4533 const ssize_t actual_read_size_total = actual_read_size_core + actual_read_size_pkg; 4534 4535 if (actual_read_size_total != expected_read_size) 4536 err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, actual_read_size_total); 4537 4538 /* 4539 * Copy ccstate and pcstate data into unified buffer. 4540 * 4541 * Skip first element from core and pkg buffers. 4542 * Kernel puts there how many counters were read. 4543 */ 4544 const size_t num_core_counters = perf_data_core[0]; 4545 const size_t num_pkg_counters = perf_data_pkg[0]; 4546 4547 assert(num_perf_counters == num_core_counters + num_pkg_counters); 4548 4549 /* Copy ccstate perf data */ 4550 memcpy(&perf_data[0], &perf_data_core[1], num_core_counters * sizeof(unsigned long long)); 4551 4552 /* Copy pcstate perf data */ 4553 memcpy(&perf_data[num_core_counters], &perf_data_pkg[1], num_pkg_counters * sizeof(unsigned long long)); 4554 4555 for (unsigned int i = 0, pi = 0; i < NUM_CSTATE_COUNTERS; ++i) { 4556 switch (cci->source[i]) { 4557 case COUNTER_SOURCE_NONE: 4558 break; 4559 4560 case COUNTER_SOURCE_PERF: 4561 assert(pi < ARRAY_SIZE(perf_data)); 4562 assert(cci->fd_perf_core != -1 || cci->fd_perf_pkg != -1); 4563 4564 if (debug >= 2) 4565 fprintf(stderr, "cstate via %s %u: %llu\n", "perf", i, perf_data[pi]); 4566 4567 cci->data[i] = perf_data[pi]; 4568 4569 ++pi; 4570 break; 4571 4572 case COUNTER_SOURCE_MSR: 4573 assert(!no_msr); 4574 if (get_msr(cpu, cci->msr[i], &cci->data[i])) 4575 return -13 - i; 4576 4577 if (debug >= 2) 4578 fprintf(stderr, "cstate via %s0x%llx %u: %llu\n", "msr", cci->msr[i], i, cci->data[i]); 4579 4580 break; 4581 } 4582 } 4583 4584 /* 4585 * Helper to write the data only if the source of 4586 * the counter for the current cpu is not none. 4587 * 4588 * Otherwise we would overwrite core data with 0 (default value), 4589 * when invoked for the thread sibling. 4590 */ 4591 #define PERF_COUNTER_WRITE_DATA(out_counter, index) do { \ 4592 if (cci->source[index] != COUNTER_SOURCE_NONE) \ 4593 out_counter = cci->data[index]; \ 4594 } while (0) 4595 4596 BUILD_BUG_ON(NUM_CSTATE_COUNTERS != 11); 4597 4598 PERF_COUNTER_WRITE_DATA(t->c1, CCSTATE_RCI_INDEX_C1_RESIDENCY); 4599 PERF_COUNTER_WRITE_DATA(c->c3, CCSTATE_RCI_INDEX_C3_RESIDENCY); 4600 PERF_COUNTER_WRITE_DATA(c->c6, CCSTATE_RCI_INDEX_C6_RESIDENCY); 4601 PERF_COUNTER_WRITE_DATA(c->c7, CCSTATE_RCI_INDEX_C7_RESIDENCY); 4602 4603 PERF_COUNTER_WRITE_DATA(p->pc2, PCSTATE_RCI_INDEX_C2_RESIDENCY); 4604 PERF_COUNTER_WRITE_DATA(p->pc3, PCSTATE_RCI_INDEX_C3_RESIDENCY); 4605 PERF_COUNTER_WRITE_DATA(p->pc6, PCSTATE_RCI_INDEX_C6_RESIDENCY); 4606 PERF_COUNTER_WRITE_DATA(p->pc7, PCSTATE_RCI_INDEX_C7_RESIDENCY); 4607 PERF_COUNTER_WRITE_DATA(p->pc8, PCSTATE_RCI_INDEX_C8_RESIDENCY); 4608 PERF_COUNTER_WRITE_DATA(p->pc9, PCSTATE_RCI_INDEX_C9_RESIDENCY); 4609 PERF_COUNTER_WRITE_DATA(p->pc10, PCSTATE_RCI_INDEX_C10_RESIDENCY); 4610 4611 #undef PERF_COUNTER_WRITE_DATA 4612 4613 return 0; 4614 } 4615 4616 size_t msr_counter_info_count_perf(const struct msr_counter_info_t *mci) 4617 { 4618 size_t ret = 0; 4619 4620 for (int i = 0; i < NUM_MSR_COUNTERS; ++i) 4621 if (mci->source[i] == COUNTER_SOURCE_PERF) 4622 ++ret; 4623 4624 return ret; 4625 } 4626 4627 int get_smi_aperf_mperf(unsigned int cpu, struct thread_data *t) 4628 { 4629 unsigned long long perf_data[NUM_MSR_COUNTERS + 1]; 4630 4631 struct msr_counter_info_t *mci; 4632 4633 if (debug >= 2) 4634 fprintf(stderr, "%s: cpu%d\n", __func__, cpu); 4635 4636 assert(msr_counter_info); 4637 assert(cpu <= msr_counter_info_size); 4638 4639 mci = &msr_counter_info[cpu]; 4640 4641 ZERO_ARRAY(perf_data); 4642 ZERO_ARRAY(mci->data); 4643 4644 if (mci->fd_perf != -1) { 4645 const size_t num_perf_counters = msr_counter_info_count_perf(mci); 4646 const ssize_t expected_read_size = (num_perf_counters + 1) * sizeof(unsigned long long); 4647 const ssize_t actual_read_size = read(mci->fd_perf, &perf_data[0], sizeof(perf_data)); 4648 4649 if (actual_read_size != expected_read_size) 4650 err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, 4651 actual_read_size); 4652 } 4653 4654 for (unsigned int i = 0, pi = 1; i < NUM_MSR_COUNTERS; ++i) { 4655 switch (mci->source[i]) { 4656 case COUNTER_SOURCE_NONE: 4657 break; 4658 4659 case COUNTER_SOURCE_PERF: 4660 assert(pi < ARRAY_SIZE(perf_data)); 4661 assert(mci->fd_perf != -1); 4662 4663 if (debug >= 2) 4664 fprintf(stderr, "Reading msr counter via perf at %u: %llu\n", i, perf_data[pi]); 4665 4666 mci->data[i] = perf_data[pi]; 4667 4668 ++pi; 4669 break; 4670 4671 case COUNTER_SOURCE_MSR: 4672 assert(!no_msr); 4673 4674 if (get_msr(cpu, mci->msr[i], &mci->data[i])) 4675 return -2 - i; 4676 4677 mci->data[i] &= mci->msr_mask[i]; 4678 4679 if (debug >= 2) 4680 fprintf(stderr, "Reading msr counter via msr at %u: %llu\n", i, mci->data[i]); 4681 4682 break; 4683 } 4684 } 4685 4686 BUILD_BUG_ON(NUM_MSR_COUNTERS != 3); 4687 t->aperf = mci->data[MSR_RCI_INDEX_APERF]; 4688 t->mperf = mci->data[MSR_RCI_INDEX_MPERF]; 4689 t->smi_count = mci->data[MSR_RCI_INDEX_SMI]; 4690 4691 return 0; 4692 } 4693 4694 int perf_counter_info_read_values(struct perf_counter_info *pp, int cpu, unsigned long long *out, size_t out_size) 4695 { 4696 unsigned int domain; 4697 unsigned long long value; 4698 int fd_counter; 4699 4700 for (size_t i = 0; pp; ++i, pp = pp->next) { 4701 domain = cpu_to_domain(pp, cpu); 4702 assert(domain < pp->num_domains); 4703 4704 fd_counter = pp->fd_perf_per_domain[domain]; 4705 4706 if (fd_counter == -1) 4707 continue; 4708 4709 if (read(fd_counter, &value, sizeof(value)) != sizeof(value)) 4710 return 1; 4711 4712 assert(i < out_size); 4713 out[i] = value * pp->scale; 4714 } 4715 4716 return 0; 4717 } 4718 4719 unsigned long pmt_gen_value_mask(unsigned int lsb, unsigned int msb) 4720 { 4721 unsigned long mask; 4722 4723 if (msb == 63) 4724 mask = 0xffffffffffffffff; 4725 else 4726 mask = ((1 << (msb + 1)) - 1); 4727 4728 mask -= (1 << lsb) - 1; 4729 4730 return mask; 4731 } 4732 4733 unsigned long pmt_read_counter(struct pmt_counter *ppmt, unsigned int domain_id) 4734 { 4735 if (domain_id >= ppmt->num_domains) 4736 return 0; 4737 4738 const unsigned long *pmmio = ppmt->domains[domain_id].pcounter; 4739 const unsigned long value = pmmio ? *pmmio : 0; 4740 const unsigned long value_mask = pmt_gen_value_mask(ppmt->lsb, ppmt->msb); 4741 const unsigned long value_shift = ppmt->lsb; 4742 4743 return (value & value_mask) >> value_shift; 4744 } 4745 4746 /* 4747 * get_counters(...) 4748 * migrate to cpu 4749 * acquire and record local counters for that cpu 4750 */ 4751 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 4752 { 4753 int cpu = t->cpu_id; 4754 unsigned long long msr; 4755 struct msr_counter *mp; 4756 struct pmt_counter *pp; 4757 int i; 4758 int status; 4759 4760 if (cpu_migrate(cpu)) { 4761 fprintf(outf, "%s: Could not migrate to CPU %d\n", __func__, cpu); 4762 return -1; 4763 } 4764 4765 gettimeofday(&t->tv_begin, (struct timezone *)NULL); 4766 4767 if (first_counter_read) 4768 get_apic_id(t); 4769 4770 t->tsc = rdtsc(); /* we are running on local CPU of interest */ 4771 4772 get_smi_aperf_mperf(cpu, t); 4773 4774 if (DO_BIC(BIC_IPC)) 4775 if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long)) 4776 return -4; 4777 4778 if (DO_BIC(BIC_IRQ)) 4779 t->irq_count = irqs_per_cpu[cpu]; 4780 if (DO_BIC(BIC_NMI)) 4781 t->nmi_count = nmi_per_cpu[cpu]; 4782 4783 get_cstate_counters(cpu, t, c, p); 4784 4785 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 4786 if (get_mp(cpu, mp, &t->counter[i], mp->sp->path)) 4787 return -10; 4788 } 4789 4790 if (perf_counter_info_read_values(sys.perf_tp, cpu, t->perf_counter, MAX_ADDED_THREAD_COUNTERS)) 4791 return -10; 4792 4793 for (i = 0, pp = sys.pmt_tp; pp; i++, pp = pp->next) 4794 t->pmt_counter[i] = pmt_read_counter(pp, t->cpu_id); 4795 4796 /* collect core counters only for 1st thread in core */ 4797 if (!is_cpu_first_thread_in_core(t, c, p)) 4798 goto done; 4799 4800 if (platform->has_per_core_rapl) { 4801 status = get_rapl_counters(cpu, c->core_id, c, p); 4802 if (status != 0) 4803 return status; 4804 } 4805 4806 if (DO_BIC(BIC_CPU_c7) && t->is_atom) { 4807 /* 4808 * For Atom CPUs that has core cstate deeper than c6, 4809 * MSR_CORE_C6_RESIDENCY returns residency of cc6 and deeper. 4810 * Minus CC7 (and deeper cstates) residency to get 4811 * accturate cc6 residency. 4812 */ 4813 c->c6 -= c->c7; 4814 } 4815 4816 if (DO_BIC(BIC_Mod_c6)) 4817 if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us)) 4818 return -8; 4819 4820 if (DO_BIC(BIC_CoreTmp)) { 4821 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) 4822 return -9; 4823 c->core_temp_c = tj_max - ((msr >> 16) & 0x7F); 4824 } 4825 4826 if (DO_BIC(BIC_CORE_THROT_CNT)) 4827 get_core_throt_cnt(cpu, &c->core_throt_cnt); 4828 4829 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { 4830 if (get_mp(cpu, mp, &c->counter[i], mp->sp->path)) 4831 return -10; 4832 } 4833 4834 if (perf_counter_info_read_values(sys.perf_cp, cpu, c->perf_counter, MAX_ADDED_CORE_COUNTERS)) 4835 return -10; 4836 4837 for (i = 0, pp = sys.pmt_cp; pp; i++, pp = pp->next) 4838 c->pmt_counter[i] = pmt_read_counter(pp, c->core_id); 4839 4840 /* collect package counters only for 1st core in package */ 4841 if (!is_cpu_first_core_in_package(t, c, p)) 4842 goto done; 4843 4844 if (DO_BIC(BIC_Totl_c0)) { 4845 if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0)) 4846 return -10; 4847 } 4848 if (DO_BIC(BIC_Any_c0)) { 4849 if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0)) 4850 return -11; 4851 } 4852 if (DO_BIC(BIC_GFX_c0)) { 4853 if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0)) 4854 return -12; 4855 } 4856 if (DO_BIC(BIC_CPUGFX)) { 4857 if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0)) 4858 return -13; 4859 } 4860 4861 if (DO_BIC(BIC_CPU_LPI)) 4862 p->cpu_lpi = cpuidle_cur_cpu_lpi_us; 4863 if (DO_BIC(BIC_SYS_LPI)) 4864 p->sys_lpi = cpuidle_cur_sys_lpi_us; 4865 4866 if (!platform->has_per_core_rapl) { 4867 status = get_rapl_counters(cpu, p->package_id, c, p); 4868 if (status != 0) 4869 return status; 4870 } 4871 4872 if (DO_BIC(BIC_PkgTmp)) { 4873 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) 4874 return -17; 4875 p->pkg_temp_c = tj_max - ((msr >> 16) & 0x7F); 4876 } 4877 4878 if (DO_BIC(BIC_UNCORE_MHZ)) 4879 p->uncore_mhz = get_legacy_uncore_mhz(p->package_id); 4880 4881 if (DO_BIC(BIC_GFX_rc6)) 4882 p->gfx_rc6_ms = gfx_info[GFX_rc6].val_ull; 4883 4884 if (DO_BIC(BIC_GFXMHz)) 4885 p->gfx_mhz = gfx_info[GFX_MHz].val; 4886 4887 if (DO_BIC(BIC_GFXACTMHz)) 4888 p->gfx_act_mhz = gfx_info[GFX_ACTMHz].val; 4889 4890 if (DO_BIC(BIC_SAM_mc6)) 4891 p->sam_mc6_ms = gfx_info[SAM_mc6].val_ull; 4892 4893 if (DO_BIC(BIC_SAMMHz)) 4894 p->sam_mhz = gfx_info[SAM_MHz].val; 4895 4896 if (DO_BIC(BIC_SAMACTMHz)) 4897 p->sam_act_mhz = gfx_info[SAM_ACTMHz].val; 4898 4899 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { 4900 char *path = NULL; 4901 4902 if (mp->msr_num == 0) { 4903 path = find_sysfs_path_by_id(mp->sp, p->package_id); 4904 if (path == NULL) { 4905 warnx("%s: package_id %d not found", __func__, p->package_id); 4906 return -10; 4907 } 4908 } 4909 if (get_mp(cpu, mp, &p->counter[i], path)) 4910 return -10; 4911 } 4912 4913 if (perf_counter_info_read_values(sys.perf_pp, cpu, p->perf_counter, MAX_ADDED_PACKAGE_COUNTERS)) 4914 return -10; 4915 4916 for (i = 0, pp = sys.pmt_pp; pp; i++, pp = pp->next) 4917 p->pmt_counter[i] = pmt_read_counter(pp, p->package_id); 4918 4919 done: 4920 gettimeofday(&t->tv_end, (struct timezone *)NULL); 4921 4922 return 0; 4923 } 4924 4925 int pkg_cstate_limit = PCLUKN; 4926 char *pkg_cstate_limit_strings[] = { "unknown", "reserved", "pc0", "pc1", "pc2", 4927 "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited" 4928 }; 4929 4930 int nhm_pkg_cstate_limits[16] = 4931 { PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4932 PCLRSV, PCLRSV 4933 }; 4934 4935 int snb_pkg_cstate_limits[16] = 4936 { PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4937 PCLRSV, PCLRSV 4938 }; 4939 4940 int hsw_pkg_cstate_limits[16] = 4941 { PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4942 PCLRSV, PCLRSV 4943 }; 4944 4945 int slv_pkg_cstate_limits[16] = 4946 { PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4947 PCL__6, PCL__7 4948 }; 4949 4950 int amt_pkg_cstate_limits[16] = 4951 { PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4952 PCLRSV, PCLRSV 4953 }; 4954 4955 int phi_pkg_cstate_limits[16] = 4956 { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4957 PCLRSV, PCLRSV 4958 }; 4959 4960 int glm_pkg_cstate_limits[16] = 4961 { PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4962 PCLRSV, PCLRSV 4963 }; 4964 4965 int skx_pkg_cstate_limits[16] = 4966 { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4967 PCLRSV, PCLRSV 4968 }; 4969 4970 int icx_pkg_cstate_limits[16] = 4971 { PCL__0, PCL__2, PCL__6, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, 4972 PCLRSV, PCLRSV 4973 }; 4974 4975 void probe_cst_limit(void) 4976 { 4977 unsigned long long msr; 4978 int *pkg_cstate_limits; 4979 4980 if (!platform->has_nhm_msrs || no_msr) 4981 return; 4982 4983 switch (platform->cst_limit) { 4984 case CST_LIMIT_NHM: 4985 pkg_cstate_limits = nhm_pkg_cstate_limits; 4986 break; 4987 case CST_LIMIT_SNB: 4988 pkg_cstate_limits = snb_pkg_cstate_limits; 4989 break; 4990 case CST_LIMIT_HSW: 4991 pkg_cstate_limits = hsw_pkg_cstate_limits; 4992 break; 4993 case CST_LIMIT_SKX: 4994 pkg_cstate_limits = skx_pkg_cstate_limits; 4995 break; 4996 case CST_LIMIT_ICX: 4997 pkg_cstate_limits = icx_pkg_cstate_limits; 4998 break; 4999 case CST_LIMIT_SLV: 5000 pkg_cstate_limits = slv_pkg_cstate_limits; 5001 break; 5002 case CST_LIMIT_AMT: 5003 pkg_cstate_limits = amt_pkg_cstate_limits; 5004 break; 5005 case CST_LIMIT_KNL: 5006 pkg_cstate_limits = phi_pkg_cstate_limits; 5007 break; 5008 case CST_LIMIT_GMT: 5009 pkg_cstate_limits = glm_pkg_cstate_limits; 5010 break; 5011 default: 5012 return; 5013 } 5014 5015 get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr); 5016 pkg_cstate_limit = pkg_cstate_limits[msr & 0xF]; 5017 } 5018 5019 static void dump_platform_info(void) 5020 { 5021 unsigned long long msr; 5022 unsigned int ratio; 5023 5024 if (!platform->has_nhm_msrs || no_msr) 5025 return; 5026 5027 get_msr(base_cpu, MSR_PLATFORM_INFO, &msr); 5028 5029 fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr); 5030 5031 ratio = (msr >> 40) & 0xFF; 5032 fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n", ratio, bclk, ratio * bclk); 5033 5034 ratio = (msr >> 8) & 0xFF; 5035 fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk); 5036 } 5037 5038 static void dump_power_ctl(void) 5039 { 5040 unsigned long long msr; 5041 5042 if (!platform->has_nhm_msrs || no_msr) 5043 return; 5044 5045 get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr); 5046 fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", 5047 base_cpu, msr, msr & 0x2 ? "EN" : "DIS"); 5048 5049 /* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */ 5050 if (platform->has_cst_prewake_bit) 5051 fprintf(outf, "C-state Pre-wake: %sabled\n", msr & 0x40000000 ? "DIS" : "EN"); 5052 5053 return; 5054 } 5055 5056 static void dump_turbo_ratio_limit2(void) 5057 { 5058 unsigned long long msr; 5059 unsigned int ratio; 5060 5061 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr); 5062 5063 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr); 5064 5065 ratio = (msr >> 8) & 0xFF; 5066 if (ratio) 5067 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n", ratio, bclk, ratio * bclk); 5068 5069 ratio = (msr >> 0) & 0xFF; 5070 if (ratio) 5071 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n", ratio, bclk, ratio * bclk); 5072 return; 5073 } 5074 5075 static void dump_turbo_ratio_limit1(void) 5076 { 5077 unsigned long long msr; 5078 unsigned int ratio; 5079 5080 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr); 5081 5082 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr); 5083 5084 ratio = (msr >> 56) & 0xFF; 5085 if (ratio) 5086 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n", ratio, bclk, ratio * bclk); 5087 5088 ratio = (msr >> 48) & 0xFF; 5089 if (ratio) 5090 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n", ratio, bclk, ratio * bclk); 5091 5092 ratio = (msr >> 40) & 0xFF; 5093 if (ratio) 5094 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n", ratio, bclk, ratio * bclk); 5095 5096 ratio = (msr >> 32) & 0xFF; 5097 if (ratio) 5098 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n", ratio, bclk, ratio * bclk); 5099 5100 ratio = (msr >> 24) & 0xFF; 5101 if (ratio) 5102 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n", ratio, bclk, ratio * bclk); 5103 5104 ratio = (msr >> 16) & 0xFF; 5105 if (ratio) 5106 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n", ratio, bclk, ratio * bclk); 5107 5108 ratio = (msr >> 8) & 0xFF; 5109 if (ratio) 5110 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n", ratio, bclk, ratio * bclk); 5111 5112 ratio = (msr >> 0) & 0xFF; 5113 if (ratio) 5114 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n", ratio, bclk, ratio * bclk); 5115 return; 5116 } 5117 5118 static void dump_turbo_ratio_limits(int trl_msr_offset) 5119 { 5120 unsigned long long msr, core_counts; 5121 int shift; 5122 5123 get_msr(base_cpu, trl_msr_offset, &msr); 5124 fprintf(outf, "cpu%d: MSR_%sTURBO_RATIO_LIMIT: 0x%08llx\n", 5125 base_cpu, trl_msr_offset == MSR_SECONDARY_TURBO_RATIO_LIMIT ? "SECONDARY_" : "", msr); 5126 5127 if (platform->trl_msrs & TRL_CORECOUNT) { 5128 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts); 5129 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts); 5130 } else { 5131 core_counts = 0x0807060504030201; 5132 } 5133 5134 for (shift = 56; shift >= 0; shift -= 8) { 5135 unsigned int ratio, group_size; 5136 5137 ratio = (msr >> shift) & 0xFF; 5138 group_size = (core_counts >> shift) & 0xFF; 5139 if (ratio) 5140 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n", 5141 ratio, bclk, ratio * bclk, group_size); 5142 } 5143 5144 return; 5145 } 5146 5147 static void dump_atom_turbo_ratio_limits(void) 5148 { 5149 unsigned long long msr; 5150 unsigned int ratio; 5151 5152 get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr); 5153 fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF); 5154 5155 ratio = (msr >> 0) & 0x3F; 5156 if (ratio) 5157 fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n", ratio, bclk, ratio * bclk); 5158 5159 ratio = (msr >> 8) & 0x3F; 5160 if (ratio) 5161 fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n", ratio, bclk, ratio * bclk); 5162 5163 ratio = (msr >> 16) & 0x3F; 5164 if (ratio) 5165 fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk); 5166 5167 get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr); 5168 fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF); 5169 5170 ratio = (msr >> 24) & 0x3F; 5171 if (ratio) 5172 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n", ratio, bclk, ratio * bclk); 5173 5174 ratio = (msr >> 16) & 0x3F; 5175 if (ratio) 5176 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n", ratio, bclk, ratio * bclk); 5177 5178 ratio = (msr >> 8) & 0x3F; 5179 if (ratio) 5180 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n", ratio, bclk, ratio * bclk); 5181 5182 ratio = (msr >> 0) & 0x3F; 5183 if (ratio) 5184 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n", ratio, bclk, ratio * bclk); 5185 } 5186 5187 static void dump_knl_turbo_ratio_limits(void) 5188 { 5189 const unsigned int buckets_no = 7; 5190 5191 unsigned long long msr; 5192 int delta_cores, delta_ratio; 5193 int i, b_nr; 5194 unsigned int cores[buckets_no]; 5195 unsigned int ratio[buckets_no]; 5196 5197 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr); 5198 5199 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr); 5200 5201 /* 5202 * Turbo encoding in KNL is as follows: 5203 * [0] -- Reserved 5204 * [7:1] -- Base value of number of active cores of bucket 1. 5205 * [15:8] -- Base value of freq ratio of bucket 1. 5206 * [20:16] -- +ve delta of number of active cores of bucket 2. 5207 * i.e. active cores of bucket 2 = 5208 * active cores of bucket 1 + delta 5209 * [23:21] -- Negative delta of freq ratio of bucket 2. 5210 * i.e. freq ratio of bucket 2 = 5211 * freq ratio of bucket 1 - delta 5212 * [28:24]-- +ve delta of number of active cores of bucket 3. 5213 * [31:29]-- -ve delta of freq ratio of bucket 3. 5214 * [36:32]-- +ve delta of number of active cores of bucket 4. 5215 * [39:37]-- -ve delta of freq ratio of bucket 4. 5216 * [44:40]-- +ve delta of number of active cores of bucket 5. 5217 * [47:45]-- -ve delta of freq ratio of bucket 5. 5218 * [52:48]-- +ve delta of number of active cores of bucket 6. 5219 * [55:53]-- -ve delta of freq ratio of bucket 6. 5220 * [60:56]-- +ve delta of number of active cores of bucket 7. 5221 * [63:61]-- -ve delta of freq ratio of bucket 7. 5222 */ 5223 5224 b_nr = 0; 5225 cores[b_nr] = (msr & 0xFF) >> 1; 5226 ratio[b_nr] = (msr >> 8) & 0xFF; 5227 5228 for (i = 16; i < 64; i += 8) { 5229 delta_cores = (msr >> i) & 0x1F; 5230 delta_ratio = (msr >> (i + 5)) & 0x7; 5231 5232 cores[b_nr + 1] = cores[b_nr] + delta_cores; 5233 ratio[b_nr + 1] = ratio[b_nr] - delta_ratio; 5234 b_nr++; 5235 } 5236 5237 for (i = buckets_no - 1; i >= 0; i--) 5238 if (i > 0 ? ratio[i] != ratio[i - 1] : 1) 5239 fprintf(outf, 5240 "%d * %.1f = %.1f MHz max turbo %d active cores\n", 5241 ratio[i], bclk, ratio[i] * bclk, cores[i]); 5242 } 5243 5244 static void dump_cst_cfg(void) 5245 { 5246 unsigned long long msr; 5247 5248 if (!platform->has_nhm_msrs || no_msr) 5249 return; 5250 5251 get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr); 5252 5253 fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr); 5254 5255 fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)", 5256 (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", 5257 (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", 5258 (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", 5259 (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", 5260 (msr & (1 << 15)) ? "" : "UN", (unsigned int)msr & 0xF, pkg_cstate_limit_strings[pkg_cstate_limit]); 5261 5262 #define AUTOMATIC_CSTATE_CONVERSION (1UL << 16) 5263 if (platform->has_cst_auto_convension) { 5264 fprintf(outf, ", automatic c-state conversion=%s", (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off"); 5265 } 5266 5267 fprintf(outf, ")\n"); 5268 5269 return; 5270 } 5271 5272 static void dump_config_tdp(void) 5273 { 5274 unsigned long long msr; 5275 5276 get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr); 5277 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr); 5278 fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF); 5279 5280 get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr); 5281 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr); 5282 if (msr) { 5283 fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF); 5284 fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF); 5285 fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF); 5286 fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF); 5287 } 5288 fprintf(outf, ")\n"); 5289 5290 get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr); 5291 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr); 5292 if (msr) { 5293 fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF); 5294 fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF); 5295 fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF); 5296 fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF); 5297 } 5298 fprintf(outf, ")\n"); 5299 5300 get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr); 5301 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr); 5302 if ((msr) & 0x3) 5303 fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3); 5304 fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1); 5305 fprintf(outf, ")\n"); 5306 5307 get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr); 5308 fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr); 5309 fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF); 5310 fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1); 5311 fprintf(outf, ")\n"); 5312 } 5313 5314 unsigned int irtl_time_units[] = { 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 }; 5315 5316 void print_irtl(void) 5317 { 5318 unsigned long long msr; 5319 5320 if (!platform->has_irtl_msrs || no_msr) 5321 return; 5322 5323 if (platform->supported_cstates & PC3) { 5324 get_msr(base_cpu, MSR_PKGC3_IRTL, &msr); 5325 fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr); 5326 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 5327 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 5328 } 5329 5330 if (platform->supported_cstates & PC6) { 5331 get_msr(base_cpu, MSR_PKGC6_IRTL, &msr); 5332 fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr); 5333 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 5334 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 5335 } 5336 5337 if (platform->supported_cstates & PC7) { 5338 get_msr(base_cpu, MSR_PKGC7_IRTL, &msr); 5339 fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr); 5340 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 5341 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 5342 } 5343 5344 if (platform->supported_cstates & PC8) { 5345 get_msr(base_cpu, MSR_PKGC8_IRTL, &msr); 5346 fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr); 5347 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 5348 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 5349 } 5350 5351 if (platform->supported_cstates & PC9) { 5352 get_msr(base_cpu, MSR_PKGC9_IRTL, &msr); 5353 fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr); 5354 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 5355 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 5356 } 5357 5358 if (platform->supported_cstates & PC10) { 5359 get_msr(base_cpu, MSR_PKGC10_IRTL, &msr); 5360 fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr); 5361 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", 5362 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); 5363 } 5364 } 5365 5366 void free_fd_percpu(void) 5367 { 5368 int i; 5369 5370 if (!fd_percpu) 5371 return; 5372 5373 for (i = 0; i < topo.max_cpu_num + 1; ++i) { 5374 if (fd_percpu[i] != 0) 5375 close(fd_percpu[i]); 5376 } 5377 5378 free(fd_percpu); 5379 fd_percpu = NULL; 5380 } 5381 5382 void free_fd_instr_count_percpu(void) 5383 { 5384 if (!fd_instr_count_percpu) 5385 return; 5386 5387 for (int i = 0; i < topo.max_cpu_num + 1; ++i) { 5388 if (fd_instr_count_percpu[i] != 0) 5389 close(fd_instr_count_percpu[i]); 5390 } 5391 5392 free(fd_instr_count_percpu); 5393 fd_instr_count_percpu = NULL; 5394 } 5395 5396 void free_fd_cstate(void) 5397 { 5398 if (!ccstate_counter_info) 5399 return; 5400 5401 const int counter_info_num = ccstate_counter_info_size; 5402 5403 for (int counter_id = 0; counter_id < counter_info_num; ++counter_id) { 5404 if (ccstate_counter_info[counter_id].fd_perf_core != -1) 5405 close(ccstate_counter_info[counter_id].fd_perf_core); 5406 5407 if (ccstate_counter_info[counter_id].fd_perf_pkg != -1) 5408 close(ccstate_counter_info[counter_id].fd_perf_pkg); 5409 } 5410 5411 free(ccstate_counter_info); 5412 ccstate_counter_info = NULL; 5413 ccstate_counter_info_size = 0; 5414 } 5415 5416 void free_fd_msr(void) 5417 { 5418 if (!msr_counter_info) 5419 return; 5420 5421 for (int cpu = 0; cpu < topo.max_cpu_num; ++cpu) { 5422 if (msr_counter_info[cpu].fd_perf != -1) 5423 close(msr_counter_info[cpu].fd_perf); 5424 } 5425 5426 free(msr_counter_info); 5427 msr_counter_info = NULL; 5428 msr_counter_info_size = 0; 5429 } 5430 5431 void free_fd_rapl_percpu(void) 5432 { 5433 if (!rapl_counter_info_perdomain) 5434 return; 5435 5436 const int num_domains = rapl_counter_info_perdomain_size; 5437 5438 for (int domain_id = 0; domain_id < num_domains; ++domain_id) { 5439 if (rapl_counter_info_perdomain[domain_id].fd_perf != -1) 5440 close(rapl_counter_info_perdomain[domain_id].fd_perf); 5441 } 5442 5443 free(rapl_counter_info_perdomain); 5444 rapl_counter_info_perdomain = NULL; 5445 rapl_counter_info_perdomain_size = 0; 5446 } 5447 5448 void free_fd_added_perf_counters_(struct perf_counter_info *pp) 5449 { 5450 if (!pp) 5451 return; 5452 5453 if (!pp->fd_perf_per_domain) 5454 return; 5455 5456 while (pp) { 5457 for (size_t domain = 0; domain < pp->num_domains; ++domain) { 5458 if (pp->fd_perf_per_domain[domain] != -1) { 5459 close(pp->fd_perf_per_domain[domain]); 5460 pp->fd_perf_per_domain[domain] = -1; 5461 } 5462 } 5463 5464 free(pp->fd_perf_per_domain); 5465 pp->fd_perf_per_domain = NULL; 5466 5467 pp = pp->next; 5468 } 5469 } 5470 5471 void free_fd_added_perf_counters(void) 5472 { 5473 free_fd_added_perf_counters_(sys.perf_tp); 5474 free_fd_added_perf_counters_(sys.perf_cp); 5475 free_fd_added_perf_counters_(sys.perf_pp); 5476 } 5477 5478 void free_all_buffers(void) 5479 { 5480 int i; 5481 5482 CPU_FREE(cpu_present_set); 5483 cpu_present_set = NULL; 5484 cpu_present_setsize = 0; 5485 5486 CPU_FREE(cpu_effective_set); 5487 cpu_effective_set = NULL; 5488 cpu_effective_setsize = 0; 5489 5490 CPU_FREE(cpu_allowed_set); 5491 cpu_allowed_set = NULL; 5492 cpu_allowed_setsize = 0; 5493 5494 CPU_FREE(cpu_affinity_set); 5495 cpu_affinity_set = NULL; 5496 cpu_affinity_setsize = 0; 5497 5498 free(thread_even); 5499 free(core_even); 5500 free(package_even); 5501 5502 thread_even = NULL; 5503 core_even = NULL; 5504 package_even = NULL; 5505 5506 free(thread_odd); 5507 free(core_odd); 5508 free(package_odd); 5509 5510 thread_odd = NULL; 5511 core_odd = NULL; 5512 package_odd = NULL; 5513 5514 free(output_buffer); 5515 output_buffer = NULL; 5516 outp = NULL; 5517 5518 free_fd_percpu(); 5519 free_fd_instr_count_percpu(); 5520 free_fd_msr(); 5521 free_fd_rapl_percpu(); 5522 free_fd_cstate(); 5523 free_fd_added_perf_counters(); 5524 5525 free(irq_column_2_cpu); 5526 free(irqs_per_cpu); 5527 free(nmi_per_cpu); 5528 5529 for (i = 0; i <= topo.max_cpu_num; ++i) { 5530 if (cpus[i].put_ids) 5531 CPU_FREE(cpus[i].put_ids); 5532 } 5533 free(cpus); 5534 } 5535 5536 /* 5537 * Parse a file containing a single int. 5538 * Return 0 if file can not be opened 5539 * Exit if file can be opened, but can not be parsed 5540 */ 5541 int parse_int_file(const char *fmt, ...) 5542 { 5543 va_list args; 5544 char path[PATH_MAX]; 5545 FILE *filep; 5546 int value; 5547 5548 va_start(args, fmt); 5549 vsnprintf(path, sizeof(path), fmt, args); 5550 va_end(args); 5551 filep = fopen(path, "r"); 5552 if (!filep) 5553 return 0; 5554 if (fscanf(filep, "%d", &value) != 1) 5555 err(1, "%s: failed to parse number from file", path); 5556 fclose(filep); 5557 return value; 5558 } 5559 5560 /* 5561 * cpu_is_first_core_in_package(cpu) 5562 * return 1 if given CPU is 1st core in package 5563 */ 5564 int cpu_is_first_core_in_package(int cpu) 5565 { 5566 return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu); 5567 } 5568 5569 int get_physical_package_id(int cpu) 5570 { 5571 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu); 5572 } 5573 5574 int get_die_id(int cpu) 5575 { 5576 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/die_id", cpu); 5577 } 5578 5579 int get_core_id(int cpu) 5580 { 5581 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu); 5582 } 5583 5584 void set_node_data(void) 5585 { 5586 int pkg, node, lnode, cpu, cpux; 5587 int cpu_count; 5588 5589 /* initialize logical_node_id */ 5590 for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) 5591 cpus[cpu].logical_node_id = -1; 5592 5593 cpu_count = 0; 5594 for (pkg = 0; pkg < topo.num_packages; pkg++) { 5595 lnode = 0; 5596 for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) { 5597 if (cpus[cpu].physical_package_id != pkg) 5598 continue; 5599 /* find a cpu with an unset logical_node_id */ 5600 if (cpus[cpu].logical_node_id != -1) 5601 continue; 5602 cpus[cpu].logical_node_id = lnode; 5603 node = cpus[cpu].physical_node_id; 5604 cpu_count++; 5605 /* 5606 * find all matching cpus on this pkg and set 5607 * the logical_node_id 5608 */ 5609 for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) { 5610 if ((cpus[cpux].physical_package_id == pkg) && (cpus[cpux].physical_node_id == node)) { 5611 cpus[cpux].logical_node_id = lnode; 5612 cpu_count++; 5613 } 5614 } 5615 lnode++; 5616 if (lnode > topo.nodes_per_pkg) 5617 topo.nodes_per_pkg = lnode; 5618 } 5619 if (cpu_count >= topo.max_cpu_num) 5620 break; 5621 } 5622 } 5623 5624 int get_physical_node_id(struct cpu_topology *thiscpu) 5625 { 5626 char path[80]; 5627 FILE *filep; 5628 int i; 5629 int cpu = thiscpu->logical_cpu_id; 5630 5631 for (i = 0; i <= topo.max_cpu_num; i++) { 5632 sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist", cpu, i); 5633 filep = fopen(path, "r"); 5634 if (!filep) 5635 continue; 5636 fclose(filep); 5637 return i; 5638 } 5639 return -1; 5640 } 5641 5642 static int parse_cpu_str(char *cpu_str, cpu_set_t *cpu_set, int cpu_set_size) 5643 { 5644 unsigned int start, end; 5645 char *next = cpu_str; 5646 5647 while (next && *next) { 5648 5649 if (*next == '-') /* no negative cpu numbers */ 5650 return 1; 5651 5652 if (*next == '\0' || *next == '\n') 5653 break; 5654 5655 start = strtoul(next, &next, 10); 5656 5657 if (start >= CPU_SUBSET_MAXCPUS) 5658 return 1; 5659 CPU_SET_S(start, cpu_set_size, cpu_set); 5660 5661 if (*next == '\0' || *next == '\n') 5662 break; 5663 5664 if (*next == ',') { 5665 next += 1; 5666 continue; 5667 } 5668 5669 if (*next == '-') { 5670 next += 1; /* start range */ 5671 } else if (*next == '.') { 5672 next += 1; 5673 if (*next == '.') 5674 next += 1; /* start range */ 5675 else 5676 return 1; 5677 } 5678 5679 end = strtoul(next, &next, 10); 5680 if (end <= start) 5681 return 1; 5682 5683 while (++start <= end) { 5684 if (start >= CPU_SUBSET_MAXCPUS) 5685 return 1; 5686 CPU_SET_S(start, cpu_set_size, cpu_set); 5687 } 5688 5689 if (*next == ',') 5690 next += 1; 5691 else if (*next != '\0' && *next != '\n') 5692 return 1; 5693 } 5694 5695 return 0; 5696 } 5697 5698 int get_thread_siblings(struct cpu_topology *thiscpu) 5699 { 5700 char path[80], character; 5701 FILE *filep; 5702 unsigned long map; 5703 int so, shift, sib_core; 5704 int cpu = thiscpu->logical_cpu_id; 5705 int offset = topo.max_cpu_num + 1; 5706 size_t size; 5707 int thread_id = 0; 5708 5709 thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1)); 5710 if (thiscpu->thread_id < 0) 5711 thiscpu->thread_id = thread_id++; 5712 if (!thiscpu->put_ids) 5713 return -1; 5714 5715 size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 5716 CPU_ZERO_S(size, thiscpu->put_ids); 5717 5718 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu); 5719 filep = fopen(path, "r"); 5720 5721 if (!filep) { 5722 warnx("%s: open failed", path); 5723 return -1; 5724 } 5725 do { 5726 offset -= BITMASK_SIZE; 5727 if (fscanf(filep, "%lx%c", &map, &character) != 2) 5728 err(1, "%s: failed to parse file", path); 5729 for (shift = 0; shift < BITMASK_SIZE; shift++) { 5730 if ((map >> shift) & 0x1) { 5731 so = shift + offset; 5732 sib_core = get_core_id(so); 5733 if (sib_core == thiscpu->physical_core_id) { 5734 CPU_SET_S(so, size, thiscpu->put_ids); 5735 if ((so != cpu) && (cpus[so].thread_id < 0)) 5736 cpus[so].thread_id = thread_id++; 5737 } 5738 } 5739 } 5740 } while (character == ','); 5741 fclose(filep); 5742 5743 return CPU_COUNT_S(size, thiscpu->put_ids); 5744 } 5745 5746 /* 5747 * run func(thread, core, package) in topology order 5748 * skip non-present cpus 5749 */ 5750 5751 int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *, 5752 struct pkg_data *, struct thread_data *, struct core_data *, 5753 struct pkg_data *), struct thread_data *thread_base, 5754 struct core_data *core_base, struct pkg_data *pkg_base, 5755 struct thread_data *thread_base2, struct core_data *core_base2, struct pkg_data *pkg_base2) 5756 { 5757 int retval, pkg_no, node_no, core_no, thread_no; 5758 5759 retval = 0; 5760 5761 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { 5762 for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) { 5763 for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { 5764 for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) { 5765 struct thread_data *t, *t2; 5766 struct core_data *c, *c2; 5767 struct pkg_data *p, *p2; 5768 5769 t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no); 5770 5771 if (cpu_is_not_allowed(t->cpu_id)) 5772 continue; 5773 5774 t2 = GET_THREAD(thread_base2, thread_no, core_no, node_no, pkg_no); 5775 5776 c = GET_CORE(core_base, core_no, node_no, pkg_no); 5777 c2 = GET_CORE(core_base2, core_no, node_no, pkg_no); 5778 5779 p = GET_PKG(pkg_base, pkg_no); 5780 p2 = GET_PKG(pkg_base2, pkg_no); 5781 5782 retval |= func(t, c, p, t2, c2, p2); 5783 } 5784 } 5785 } 5786 } 5787 return retval; 5788 } 5789 5790 /* 5791 * run func(cpu) on every cpu in /proc/stat 5792 * return max_cpu number 5793 */ 5794 int for_all_proc_cpus(int (func) (int)) 5795 { 5796 FILE *fp; 5797 int cpu_num; 5798 int retval; 5799 5800 fp = fopen_or_die(proc_stat, "r"); 5801 5802 retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n"); 5803 if (retval != 0) 5804 err(1, "%s: failed to parse format", proc_stat); 5805 5806 while (1) { 5807 retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num); 5808 if (retval != 1) 5809 break; 5810 5811 retval = func(cpu_num); 5812 if (retval) { 5813 fclose(fp); 5814 return (retval); 5815 } 5816 } 5817 fclose(fp); 5818 return 0; 5819 } 5820 5821 #define PATH_EFFECTIVE_CPUS "/sys/fs/cgroup/cpuset.cpus.effective" 5822 5823 static char cpu_effective_str[1024]; 5824 5825 static int update_effective_str(bool startup) 5826 { 5827 FILE *fp; 5828 char *pos; 5829 char buf[1024]; 5830 int ret; 5831 5832 if (cpu_effective_str[0] == '\0' && !startup) 5833 return 0; 5834 5835 fp = fopen(PATH_EFFECTIVE_CPUS, "r"); 5836 if (!fp) 5837 return 0; 5838 5839 pos = fgets(buf, 1024, fp); 5840 if (!pos) 5841 err(1, "%s: file read failed\n", PATH_EFFECTIVE_CPUS); 5842 5843 fclose(fp); 5844 5845 ret = strncmp(cpu_effective_str, buf, 1024); 5846 if (!ret) 5847 return 0; 5848 5849 strncpy(cpu_effective_str, buf, 1024); 5850 return 1; 5851 } 5852 5853 static void update_effective_set(bool startup) 5854 { 5855 update_effective_str(startup); 5856 5857 if (parse_cpu_str(cpu_effective_str, cpu_effective_set, cpu_effective_setsize)) 5858 err(1, "%s: cpu str malformat %s\n", PATH_EFFECTIVE_CPUS, cpu_effective_str); 5859 } 5860 5861 void linux_perf_init(void); 5862 void msr_perf_init(void); 5863 void rapl_perf_init(void); 5864 void cstate_perf_init(void); 5865 void added_perf_counters_init(void); 5866 void pmt_init(void); 5867 5868 void re_initialize(void) 5869 { 5870 free_all_buffers(); 5871 setup_all_buffers(false); 5872 linux_perf_init(); 5873 msr_perf_init(); 5874 rapl_perf_init(); 5875 cstate_perf_init(); 5876 added_perf_counters_init(); 5877 pmt_init(); 5878 fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, 5879 topo.allowed_cpus); 5880 } 5881 5882 void set_max_cpu_num(void) 5883 { 5884 FILE *filep; 5885 int base_cpu; 5886 unsigned long dummy; 5887 char pathname[64]; 5888 5889 base_cpu = sched_getcpu(); 5890 if (base_cpu < 0) 5891 err(1, "cannot find calling cpu ID"); 5892 sprintf(pathname, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", base_cpu); 5893 5894 filep = fopen_or_die(pathname, "r"); 5895 topo.max_cpu_num = 0; 5896 while (fscanf(filep, "%lx,", &dummy) == 1) 5897 topo.max_cpu_num += BITMASK_SIZE; 5898 fclose(filep); 5899 topo.max_cpu_num--; /* 0 based */ 5900 } 5901 5902 /* 5903 * count_cpus() 5904 * remember the last one seen, it will be the max 5905 */ 5906 int count_cpus(int cpu) 5907 { 5908 UNUSED(cpu); 5909 5910 topo.num_cpus++; 5911 return 0; 5912 } 5913 5914 int mark_cpu_present(int cpu) 5915 { 5916 CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set); 5917 return 0; 5918 } 5919 5920 int init_thread_id(int cpu) 5921 { 5922 cpus[cpu].thread_id = -1; 5923 return 0; 5924 } 5925 5926 int set_my_cpu_type(void) 5927 { 5928 unsigned int eax, ebx, ecx, edx; 5929 unsigned int max_level; 5930 5931 __cpuid(0, max_level, ebx, ecx, edx); 5932 5933 if (max_level < CPUID_LEAF_MODEL_ID) 5934 return 0; 5935 5936 __cpuid(CPUID_LEAF_MODEL_ID, eax, ebx, ecx, edx); 5937 5938 return (eax >> CPUID_LEAF_MODEL_ID_CORE_TYPE_SHIFT); 5939 } 5940 5941 int set_cpu_hybrid_type(int cpu) 5942 { 5943 if (cpu_migrate(cpu)) 5944 return -1; 5945 5946 int type = set_my_cpu_type(); 5947 5948 cpus[cpu].type = type; 5949 return 0; 5950 } 5951 5952 /* 5953 * snapshot_proc_interrupts() 5954 * 5955 * read and record summary of /proc/interrupts 5956 * 5957 * return 1 if config change requires a restart, else return 0 5958 */ 5959 int snapshot_proc_interrupts(void) 5960 { 5961 static FILE *fp; 5962 int column, retval; 5963 5964 if (fp == NULL) 5965 fp = fopen_or_die("/proc/interrupts", "r"); 5966 else 5967 rewind(fp); 5968 5969 /* read 1st line of /proc/interrupts to get cpu* name for each column */ 5970 for (column = 0; column < topo.num_cpus; ++column) { 5971 int cpu_number; 5972 5973 retval = fscanf(fp, " CPU%d", &cpu_number); 5974 if (retval != 1) 5975 break; 5976 5977 if (cpu_number > topo.max_cpu_num) { 5978 warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num); 5979 return 1; 5980 } 5981 5982 irq_column_2_cpu[column] = cpu_number; 5983 irqs_per_cpu[cpu_number] = 0; 5984 nmi_per_cpu[cpu_number] = 0; 5985 } 5986 5987 /* read /proc/interrupt count lines and sum up irqs per cpu */ 5988 while (1) { 5989 int column; 5990 char buf[64]; 5991 int this_row_is_nmi = 0; 5992 5993 retval = fscanf(fp, " %s:", buf); /* irq# "N:" */ 5994 if (retval != 1) 5995 break; 5996 5997 if (strncmp(buf, "NMI", strlen("NMI")) == 0) 5998 this_row_is_nmi = 1; 5999 6000 /* read the count per cpu */ 6001 for (column = 0; column < topo.num_cpus; ++column) { 6002 6003 int cpu_number, irq_count; 6004 6005 retval = fscanf(fp, " %d", &irq_count); 6006 6007 if (retval != 1) 6008 break; 6009 6010 cpu_number = irq_column_2_cpu[column]; 6011 irqs_per_cpu[cpu_number] += irq_count; 6012 if (this_row_is_nmi) 6013 nmi_per_cpu[cpu_number] += irq_count; 6014 } 6015 while (getc(fp) != '\n') ; /* flush interrupt description */ 6016 6017 } 6018 return 0; 6019 } 6020 6021 /* 6022 * snapshot_graphics() 6023 * 6024 * record snapshot of specified graphics sysfs knob 6025 * 6026 * return 1 if config change requires a restart, else return 0 6027 */ 6028 int snapshot_graphics(int idx) 6029 { 6030 int retval; 6031 6032 rewind(gfx_info[idx].fp); 6033 6034 switch (idx) { 6035 case GFX_rc6: 6036 case SAM_mc6: 6037 retval = fscanf(gfx_info[idx].fp, "%lld", &gfx_info[idx].val_ull); 6038 if (retval != 1) 6039 err(1, "rc6"); 6040 return 0; 6041 case GFX_MHz: 6042 case GFX_ACTMHz: 6043 case SAM_MHz: 6044 case SAM_ACTMHz: 6045 retval = fscanf(gfx_info[idx].fp, "%d", &gfx_info[idx].val); 6046 if (retval != 1) 6047 err(1, "MHz"); 6048 return 0; 6049 default: 6050 return -EINVAL; 6051 } 6052 } 6053 6054 /* 6055 * snapshot_cpu_lpi() 6056 * 6057 * record snapshot of 6058 * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us 6059 */ 6060 int snapshot_cpu_lpi_us(void) 6061 { 6062 FILE *fp; 6063 int retval; 6064 6065 fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r"); 6066 6067 retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us); 6068 if (retval != 1) { 6069 fprintf(stderr, "Disabling Low Power Idle CPU output\n"); 6070 BIC_NOT_PRESENT(BIC_CPU_LPI); 6071 fclose(fp); 6072 return -1; 6073 } 6074 6075 fclose(fp); 6076 6077 return 0; 6078 } 6079 6080 /* 6081 * snapshot_sys_lpi() 6082 * 6083 * record snapshot of sys_lpi_file 6084 */ 6085 int snapshot_sys_lpi_us(void) 6086 { 6087 FILE *fp; 6088 int retval; 6089 6090 fp = fopen_or_die(sys_lpi_file, "r"); 6091 6092 retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us); 6093 if (retval != 1) { 6094 fprintf(stderr, "Disabling Low Power Idle System output\n"); 6095 BIC_NOT_PRESENT(BIC_SYS_LPI); 6096 fclose(fp); 6097 return -1; 6098 } 6099 fclose(fp); 6100 6101 return 0; 6102 } 6103 6104 /* 6105 * snapshot /proc and /sys files 6106 * 6107 * return 1 if configuration restart needed, else return 0 6108 */ 6109 int snapshot_proc_sysfs_files(void) 6110 { 6111 gettimeofday(&procsysfs_tv_begin, (struct timezone *)NULL); 6112 6113 if (DO_BIC(BIC_IRQ) || DO_BIC(BIC_NMI)) 6114 if (snapshot_proc_interrupts()) 6115 return 1; 6116 6117 if (DO_BIC(BIC_GFX_rc6)) 6118 snapshot_graphics(GFX_rc6); 6119 6120 if (DO_BIC(BIC_GFXMHz)) 6121 snapshot_graphics(GFX_MHz); 6122 6123 if (DO_BIC(BIC_GFXACTMHz)) 6124 snapshot_graphics(GFX_ACTMHz); 6125 6126 if (DO_BIC(BIC_SAM_mc6)) 6127 snapshot_graphics(SAM_mc6); 6128 6129 if (DO_BIC(BIC_SAMMHz)) 6130 snapshot_graphics(SAM_MHz); 6131 6132 if (DO_BIC(BIC_SAMACTMHz)) 6133 snapshot_graphics(SAM_ACTMHz); 6134 6135 if (DO_BIC(BIC_CPU_LPI)) 6136 snapshot_cpu_lpi_us(); 6137 6138 if (DO_BIC(BIC_SYS_LPI)) 6139 snapshot_sys_lpi_us(); 6140 6141 return 0; 6142 } 6143 6144 int exit_requested; 6145 6146 static void signal_handler(int signal) 6147 { 6148 switch (signal) { 6149 case SIGINT: 6150 exit_requested = 1; 6151 if (debug) 6152 fprintf(stderr, " SIGINT\n"); 6153 break; 6154 case SIGUSR1: 6155 if (debug > 1) 6156 fprintf(stderr, "SIGUSR1\n"); 6157 break; 6158 } 6159 } 6160 6161 void setup_signal_handler(void) 6162 { 6163 struct sigaction sa; 6164 6165 memset(&sa, 0, sizeof(sa)); 6166 6167 sa.sa_handler = &signal_handler; 6168 6169 if (sigaction(SIGINT, &sa, NULL) < 0) 6170 err(1, "sigaction SIGINT"); 6171 if (sigaction(SIGUSR1, &sa, NULL) < 0) 6172 err(1, "sigaction SIGUSR1"); 6173 } 6174 6175 void do_sleep(void) 6176 { 6177 struct timeval tout; 6178 struct timespec rest; 6179 fd_set readfds; 6180 int retval; 6181 6182 FD_ZERO(&readfds); 6183 FD_SET(0, &readfds); 6184 6185 if (ignore_stdin) { 6186 nanosleep(&interval_ts, NULL); 6187 return; 6188 } 6189 6190 tout = interval_tv; 6191 retval = select(1, &readfds, NULL, NULL, &tout); 6192 6193 if (retval == 1) { 6194 switch (getc(stdin)) { 6195 case 'q': 6196 exit_requested = 1; 6197 break; 6198 case EOF: 6199 /* 6200 * 'stdin' is a pipe closed on the other end. There 6201 * won't be any further input. 6202 */ 6203 ignore_stdin = 1; 6204 /* Sleep the rest of the time */ 6205 rest.tv_sec = (tout.tv_sec + tout.tv_usec / 1000000); 6206 rest.tv_nsec = (tout.tv_usec % 1000000) * 1000; 6207 nanosleep(&rest, NULL); 6208 } 6209 } 6210 } 6211 6212 int get_msr_sum(int cpu, off_t offset, unsigned long long *msr) 6213 { 6214 int ret, idx; 6215 unsigned long long msr_cur, msr_last; 6216 6217 assert(!no_msr); 6218 6219 if (!per_cpu_msr_sum) 6220 return 1; 6221 6222 idx = offset_to_idx(offset); 6223 if (idx < 0) 6224 return idx; 6225 /* get_msr_sum() = sum + (get_msr() - last) */ 6226 ret = get_msr(cpu, offset, &msr_cur); 6227 if (ret) 6228 return ret; 6229 msr_last = per_cpu_msr_sum[cpu].entries[idx].last; 6230 DELTA_WRAP32(msr_cur, msr_last); 6231 *msr = msr_last + per_cpu_msr_sum[cpu].entries[idx].sum; 6232 6233 return 0; 6234 } 6235 6236 timer_t timerid; 6237 6238 /* Timer callback, update the sum of MSRs periodically. */ 6239 static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg_data *p) 6240 { 6241 int i, ret; 6242 int cpu = t->cpu_id; 6243 6244 UNUSED(c); 6245 UNUSED(p); 6246 6247 assert(!no_msr); 6248 6249 for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) { 6250 unsigned long long msr_cur, msr_last; 6251 off_t offset; 6252 6253 if (!idx_valid(i)) 6254 continue; 6255 offset = idx_to_offset(i); 6256 if (offset < 0) 6257 continue; 6258 ret = get_msr(cpu, offset, &msr_cur); 6259 if (ret) { 6260 fprintf(outf, "Can not update msr(0x%llx)\n", (unsigned long long)offset); 6261 continue; 6262 } 6263 6264 msr_last = per_cpu_msr_sum[cpu].entries[i].last; 6265 per_cpu_msr_sum[cpu].entries[i].last = msr_cur & 0xffffffff; 6266 6267 DELTA_WRAP32(msr_cur, msr_last); 6268 per_cpu_msr_sum[cpu].entries[i].sum += msr_last; 6269 } 6270 return 0; 6271 } 6272 6273 static void msr_record_handler(union sigval v) 6274 { 6275 UNUSED(v); 6276 6277 for_all_cpus(update_msr_sum, EVEN_COUNTERS); 6278 } 6279 6280 void msr_sum_record(void) 6281 { 6282 struct itimerspec its; 6283 struct sigevent sev; 6284 6285 per_cpu_msr_sum = calloc(topo.max_cpu_num + 1, sizeof(struct msr_sum_array)); 6286 if (!per_cpu_msr_sum) { 6287 fprintf(outf, "Can not allocate memory for long time MSR.\n"); 6288 return; 6289 } 6290 /* 6291 * Signal handler might be restricted, so use thread notifier instead. 6292 */ 6293 memset(&sev, 0, sizeof(struct sigevent)); 6294 sev.sigev_notify = SIGEV_THREAD; 6295 sev.sigev_notify_function = msr_record_handler; 6296 6297 sev.sigev_value.sival_ptr = &timerid; 6298 if (timer_create(CLOCK_REALTIME, &sev, &timerid) == -1) { 6299 fprintf(outf, "Can not create timer.\n"); 6300 goto release_msr; 6301 } 6302 6303 its.it_value.tv_sec = 0; 6304 its.it_value.tv_nsec = 1; 6305 /* 6306 * A wraparound time has been calculated early. 6307 * Some sources state that the peak power for a 6308 * microprocessor is usually 1.5 times the TDP rating, 6309 * use 2 * TDP for safety. 6310 */ 6311 its.it_interval.tv_sec = rapl_joule_counter_range / 2; 6312 its.it_interval.tv_nsec = 0; 6313 6314 if (timer_settime(timerid, 0, &its, NULL) == -1) { 6315 fprintf(outf, "Can not set timer.\n"); 6316 goto release_timer; 6317 } 6318 return; 6319 6320 release_timer: 6321 timer_delete(timerid); 6322 release_msr: 6323 free(per_cpu_msr_sum); 6324 } 6325 6326 /* 6327 * set_my_sched_priority(pri) 6328 * return previous priority on success 6329 * return value < -20 on failure 6330 */ 6331 int set_my_sched_priority(int priority) 6332 { 6333 int retval; 6334 int original_priority; 6335 6336 errno = 0; 6337 original_priority = getpriority(PRIO_PROCESS, 0); 6338 if (errno && (original_priority == -1)) 6339 return -21; 6340 6341 retval = setpriority(PRIO_PROCESS, 0, priority); 6342 if (retval) 6343 return -21; 6344 6345 errno = 0; 6346 retval = getpriority(PRIO_PROCESS, 0); 6347 if (retval != priority) 6348 return -21; 6349 6350 return original_priority; 6351 } 6352 6353 void turbostat_loop() 6354 { 6355 int retval; 6356 int restarted = 0; 6357 unsigned int done_iters = 0; 6358 6359 setup_signal_handler(); 6360 6361 /* 6362 * elevate own priority for interval mode 6363 * 6364 * ignore on error - we probably don't have permission to set it, but 6365 * it's not a big deal 6366 */ 6367 set_my_sched_priority(-20); 6368 6369 restart: 6370 restarted++; 6371 6372 snapshot_proc_sysfs_files(); 6373 retval = for_all_cpus(get_counters, EVEN_COUNTERS); 6374 first_counter_read = 0; 6375 if (retval < -1) { 6376 exit(retval); 6377 } else if (retval == -1) { 6378 if (restarted > 10) { 6379 exit(retval); 6380 } 6381 re_initialize(); 6382 goto restart; 6383 } 6384 restarted = 0; 6385 done_iters = 0; 6386 gettimeofday(&tv_even, (struct timezone *)NULL); 6387 6388 while (1) { 6389 if (for_all_proc_cpus(cpu_is_not_present)) { 6390 re_initialize(); 6391 goto restart; 6392 } 6393 if (update_effective_str(false)) { 6394 re_initialize(); 6395 goto restart; 6396 } 6397 do_sleep(); 6398 if (snapshot_proc_sysfs_files()) 6399 goto restart; 6400 retval = for_all_cpus(get_counters, ODD_COUNTERS); 6401 if (retval < -1) { 6402 exit(retval); 6403 } else if (retval == -1) { 6404 re_initialize(); 6405 goto restart; 6406 } 6407 gettimeofday(&tv_odd, (struct timezone *)NULL); 6408 timersub(&tv_odd, &tv_even, &tv_delta); 6409 if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) { 6410 re_initialize(); 6411 goto restart; 6412 } 6413 delta_platform(&platform_counters_odd, &platform_counters_even); 6414 compute_average(EVEN_COUNTERS); 6415 format_all_counters(EVEN_COUNTERS); 6416 flush_output_stdout(); 6417 if (exit_requested) 6418 break; 6419 if (num_iterations && ++done_iters >= num_iterations) 6420 break; 6421 do_sleep(); 6422 if (snapshot_proc_sysfs_files()) 6423 goto restart; 6424 retval = for_all_cpus(get_counters, EVEN_COUNTERS); 6425 if (retval < -1) { 6426 exit(retval); 6427 } else if (retval == -1) { 6428 re_initialize(); 6429 goto restart; 6430 } 6431 gettimeofday(&tv_even, (struct timezone *)NULL); 6432 timersub(&tv_even, &tv_odd, &tv_delta); 6433 if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) { 6434 re_initialize(); 6435 goto restart; 6436 } 6437 delta_platform(&platform_counters_even, &platform_counters_odd); 6438 compute_average(ODD_COUNTERS); 6439 format_all_counters(ODD_COUNTERS); 6440 flush_output_stdout(); 6441 if (exit_requested) 6442 break; 6443 if (num_iterations && ++done_iters >= num_iterations) 6444 break; 6445 } 6446 } 6447 6448 void check_dev_msr() 6449 { 6450 struct stat sb; 6451 char pathname[32]; 6452 6453 if (no_msr) 6454 return; 6455 6456 sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); 6457 if (stat(pathname, &sb)) 6458 if (system("/sbin/modprobe msr > /dev/null 2>&1")) 6459 no_msr = 1; 6460 } 6461 6462 /* 6463 * check for CAP_SYS_RAWIO 6464 * return 0 on success 6465 * return 1 on fail 6466 */ 6467 int check_for_cap_sys_rawio(void) 6468 { 6469 cap_t caps; 6470 cap_flag_value_t cap_flag_value; 6471 int ret = 0; 6472 6473 caps = cap_get_proc(); 6474 if (caps == NULL) 6475 return 1; 6476 6477 if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value)) { 6478 ret = 1; 6479 goto free_and_exit; 6480 } 6481 6482 if (cap_flag_value != CAP_SET) { 6483 ret = 1; 6484 goto free_and_exit; 6485 } 6486 6487 free_and_exit: 6488 if (cap_free(caps) == -1) 6489 err(-6, "cap_free\n"); 6490 6491 return ret; 6492 } 6493 6494 void check_msr_permission(void) 6495 { 6496 int failed = 0; 6497 char pathname[32]; 6498 6499 if (no_msr) 6500 return; 6501 6502 /* check for CAP_SYS_RAWIO */ 6503 failed += check_for_cap_sys_rawio(); 6504 6505 /* test file permissions */ 6506 sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); 6507 if (euidaccess(pathname, R_OK)) { 6508 failed++; 6509 } 6510 6511 if (failed) { 6512 warnx("Failed to access %s. Some of the counters may not be available\n" 6513 "\tRun as root to enable them or use %s to disable the access explicitly", pathname, "--no-msr"); 6514 no_msr = 1; 6515 } 6516 } 6517 6518 void probe_bclk(void) 6519 { 6520 unsigned long long msr; 6521 unsigned int base_ratio; 6522 6523 if (!platform->has_nhm_msrs || no_msr) 6524 return; 6525 6526 if (platform->bclk_freq == BCLK_100MHZ) 6527 bclk = 100.00; 6528 else if (platform->bclk_freq == BCLK_133MHZ) 6529 bclk = 133.33; 6530 else if (platform->bclk_freq == BCLK_SLV) 6531 bclk = slm_bclk(); 6532 else 6533 return; 6534 6535 get_msr(base_cpu, MSR_PLATFORM_INFO, &msr); 6536 base_ratio = (msr >> 8) & 0xFF; 6537 6538 base_hz = base_ratio * bclk * 1000000; 6539 has_base_hz = 1; 6540 6541 if (platform->enable_tsc_tweak) 6542 tsc_tweak = base_hz / tsc_hz; 6543 } 6544 6545 static void remove_underbar(char *s) 6546 { 6547 char *to = s; 6548 6549 while (*s) { 6550 if (*s != '_') 6551 *to++ = *s; 6552 s++; 6553 } 6554 6555 *to = 0; 6556 } 6557 6558 static void dump_turbo_ratio_info(void) 6559 { 6560 if (!has_turbo) 6561 return; 6562 6563 if (!platform->has_nhm_msrs || no_msr) 6564 return; 6565 6566 if (platform->trl_msrs & TRL_LIMIT2) 6567 dump_turbo_ratio_limit2(); 6568 6569 if (platform->trl_msrs & TRL_LIMIT1) 6570 dump_turbo_ratio_limit1(); 6571 6572 if (platform->trl_msrs & TRL_BASE) { 6573 dump_turbo_ratio_limits(MSR_TURBO_RATIO_LIMIT); 6574 6575 if (is_hybrid) 6576 dump_turbo_ratio_limits(MSR_SECONDARY_TURBO_RATIO_LIMIT); 6577 } 6578 6579 if (platform->trl_msrs & TRL_ATOM) 6580 dump_atom_turbo_ratio_limits(); 6581 6582 if (platform->trl_msrs & TRL_KNL) 6583 dump_knl_turbo_ratio_limits(); 6584 6585 if (platform->has_config_tdp) 6586 dump_config_tdp(); 6587 } 6588 6589 static int read_sysfs_int(char *path) 6590 { 6591 FILE *input; 6592 int retval = -1; 6593 6594 input = fopen(path, "r"); 6595 if (input == NULL) { 6596 if (debug) 6597 fprintf(outf, "NSFOD %s\n", path); 6598 return (-1); 6599 } 6600 if (fscanf(input, "%d", &retval) != 1) 6601 err(1, "%s: failed to read int from file", path); 6602 fclose(input); 6603 6604 return (retval); 6605 } 6606 6607 static void dump_sysfs_file(char *path) 6608 { 6609 FILE *input; 6610 char cpuidle_buf[64]; 6611 6612 input = fopen(path, "r"); 6613 if (input == NULL) { 6614 if (debug) 6615 fprintf(outf, "NSFOD %s\n", path); 6616 return; 6617 } 6618 if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input)) 6619 err(1, "%s: failed to read file", path); 6620 fclose(input); 6621 6622 fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf); 6623 } 6624 6625 static void probe_intel_uncore_frequency_legacy(void) 6626 { 6627 int i, j; 6628 char path[256]; 6629 6630 for (i = 0; i < topo.num_packages; ++i) { 6631 for (j = 0; j <= topo.max_die_id; ++j) { 6632 int k, l; 6633 char path_base[128]; 6634 6635 sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i, 6636 j); 6637 6638 if (access(path_base, R_OK)) 6639 continue; 6640 6641 BIC_PRESENT(BIC_UNCORE_MHZ); 6642 6643 if (quiet) 6644 return; 6645 6646 sprintf(path, "%s/min_freq_khz", path_base); 6647 k = read_sysfs_int(path); 6648 sprintf(path, "%s/max_freq_khz", path_base); 6649 l = read_sysfs_int(path); 6650 fprintf(outf, "Uncore Frequency package%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000); 6651 6652 sprintf(path, "%s/initial_min_freq_khz", path_base); 6653 k = read_sysfs_int(path); 6654 sprintf(path, "%s/initial_max_freq_khz", path_base); 6655 l = read_sysfs_int(path); 6656 fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000); 6657 6658 sprintf(path, "%s/current_freq_khz", path_base); 6659 k = read_sysfs_int(path); 6660 fprintf(outf, " %d MHz\n", k / 1000); 6661 } 6662 } 6663 } 6664 6665 static void probe_intel_uncore_frequency_cluster(void) 6666 { 6667 int i, uncore_max_id; 6668 char path[256]; 6669 char path_base[128]; 6670 6671 if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00/current_freq_khz", R_OK)) 6672 return; 6673 6674 for (uncore_max_id = 0;; ++uncore_max_id) { 6675 6676 sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", uncore_max_id); 6677 6678 /* uncore## start at 00 and skips no numbers, so stop upon first missing */ 6679 if (access(path_base, R_OK)) { 6680 uncore_max_id -= 1; 6681 break; 6682 } 6683 } 6684 for (i = uncore_max_id; i >= 0; --i) { 6685 int k, l; 6686 int package_id, domain_id, cluster_id; 6687 char name_buf[16]; 6688 6689 sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", i); 6690 6691 if (access(path_base, R_OK)) 6692 err(1, "%s: %s\n", __func__, path_base); 6693 6694 sprintf(path, "%s/package_id", path_base); 6695 package_id = read_sysfs_int(path); 6696 6697 sprintf(path, "%s/domain_id", path_base); 6698 domain_id = read_sysfs_int(path); 6699 6700 sprintf(path, "%s/fabric_cluster_id", path_base); 6701 cluster_id = read_sysfs_int(path); 6702 6703 sprintf(path, "%s/current_freq_khz", path_base); 6704 sprintf(name_buf, "UMHz%d.%d", domain_id, cluster_id); 6705 6706 add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id); 6707 6708 if (quiet) 6709 continue; 6710 6711 sprintf(path, "%s/min_freq_khz", path_base); 6712 k = read_sysfs_int(path); 6713 sprintf(path, "%s/max_freq_khz", path_base); 6714 l = read_sysfs_int(path); 6715 fprintf(outf, "Uncore Frequency package%d domain%d cluster%d: %d - %d MHz ", package_id, domain_id, 6716 cluster_id, k / 1000, l / 1000); 6717 6718 sprintf(path, "%s/initial_min_freq_khz", path_base); 6719 k = read_sysfs_int(path); 6720 sprintf(path, "%s/initial_max_freq_khz", path_base); 6721 l = read_sysfs_int(path); 6722 fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000); 6723 6724 sprintf(path, "%s/current_freq_khz", path_base); 6725 k = read_sysfs_int(path); 6726 fprintf(outf, " %d MHz\n", k / 1000); 6727 } 6728 } 6729 6730 static void probe_intel_uncore_frequency(void) 6731 { 6732 if (!genuine_intel) 6733 return; 6734 6735 if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00", R_OK) == 0) 6736 probe_intel_uncore_frequency_cluster(); 6737 else 6738 probe_intel_uncore_frequency_legacy(); 6739 } 6740 6741 static void set_graphics_fp(char *path, int idx) 6742 { 6743 if (!access(path, R_OK)) 6744 gfx_info[idx].fp = fopen_or_die(path, "r"); 6745 } 6746 6747 /* Enlarge this if there are /sys/class/drm/card2 ... */ 6748 #define GFX_MAX_CARDS 2 6749 6750 static void probe_graphics(void) 6751 { 6752 char path[PATH_MAX]; 6753 int i; 6754 6755 /* Xe graphics sysfs knobs */ 6756 if (!access("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", R_OK)) { 6757 FILE *fp; 6758 char buf[8]; 6759 bool gt0_is_gt; 6760 6761 fp = fopen("/sys/class/drm/card0/device/tile0/gt0/gtidle/name", "r"); 6762 if (!fp) 6763 goto next; 6764 6765 if (!fread(buf, sizeof(char), 7, fp)) { 6766 fclose(fp); 6767 goto next; 6768 } 6769 fclose(fp); 6770 6771 if (!strncmp(buf, "gt0-rc", strlen("gt0-rc"))) 6772 gt0_is_gt = true; 6773 else if (!strncmp(buf, "gt0-mc", strlen("gt0-mc"))) 6774 gt0_is_gt = false; 6775 else 6776 goto next; 6777 6778 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", gt0_is_gt ? GFX_rc6 : SAM_mc6); 6779 6780 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq", gt0_is_gt ? GFX_MHz : SAM_MHz); 6781 6782 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq", gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz); 6783 6784 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", gt0_is_gt ? SAM_mc6 : GFX_rc6); 6785 6786 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq", gt0_is_gt ? SAM_MHz : GFX_MHz); 6787 6788 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq", gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz); 6789 6790 goto end; 6791 } 6792 6793 next: 6794 /* New i915 graphics sysfs knobs */ 6795 for (i = 0; i < GFX_MAX_CARDS; i++) { 6796 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rc6_residency_ms", i); 6797 if (!access(path, R_OK)) 6798 break; 6799 } 6800 6801 if (i == GFX_MAX_CARDS) 6802 goto legacy_i915; 6803 6804 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rc6_residency_ms", i); 6805 set_graphics_fp(path, GFX_rc6); 6806 6807 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rps_cur_freq_mhz", i); 6808 set_graphics_fp(path, GFX_MHz); 6809 6810 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rps_act_freq_mhz", i); 6811 set_graphics_fp(path, GFX_ACTMHz); 6812 6813 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt1/rc6_residency_ms", i); 6814 set_graphics_fp(path, SAM_mc6); 6815 6816 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt1/rps_cur_freq_mhz", i); 6817 set_graphics_fp(path, SAM_MHz); 6818 6819 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt1/rps_act_freq_mhz", i); 6820 set_graphics_fp(path, SAM_ACTMHz); 6821 6822 goto end; 6823 6824 legacy_i915: 6825 /* Fall back to traditional i915 graphics sysfs knobs */ 6826 set_graphics_fp("/sys/class/drm/card0/power/rc6_residency_ms", GFX_rc6); 6827 6828 set_graphics_fp("/sys/class/drm/card0/gt_cur_freq_mhz", GFX_MHz); 6829 if (!gfx_info[GFX_MHz].fp) 6830 set_graphics_fp("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", GFX_MHz); 6831 6832 set_graphics_fp("/sys/class/drm/card0/gt_act_freq_mhz", GFX_ACTMHz); 6833 if (!gfx_info[GFX_ACTMHz].fp) 6834 set_graphics_fp("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", GFX_ACTMHz); 6835 6836 end: 6837 if (gfx_info[GFX_rc6].fp) 6838 BIC_PRESENT(BIC_GFX_rc6); 6839 if (gfx_info[GFX_MHz].fp) 6840 BIC_PRESENT(BIC_GFXMHz); 6841 if (gfx_info[GFX_ACTMHz].fp) 6842 BIC_PRESENT(BIC_GFXACTMHz); 6843 if (gfx_info[SAM_mc6].fp) 6844 BIC_PRESENT(BIC_SAM_mc6); 6845 if (gfx_info[SAM_MHz].fp) 6846 BIC_PRESENT(BIC_SAMMHz); 6847 if (gfx_info[SAM_ACTMHz].fp) 6848 BIC_PRESENT(BIC_SAMACTMHz); 6849 } 6850 6851 static void dump_sysfs_cstate_config(void) 6852 { 6853 char path[64]; 6854 char name_buf[16]; 6855 char desc[64]; 6856 FILE *input; 6857 int state; 6858 char *sp; 6859 6860 if (access("/sys/devices/system/cpu/cpuidle", R_OK)) { 6861 fprintf(outf, "cpuidle not loaded\n"); 6862 return; 6863 } 6864 6865 dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_driver"); 6866 dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor"); 6867 dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor_ro"); 6868 6869 for (state = 0; state < 10; ++state) { 6870 6871 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); 6872 input = fopen(path, "r"); 6873 if (input == NULL) 6874 continue; 6875 if (!fgets(name_buf, sizeof(name_buf), input)) 6876 err(1, "%s: failed to read file", path); 6877 6878 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ 6879 sp = strchr(name_buf, '-'); 6880 if (!sp) 6881 sp = strchrnul(name_buf, '\n'); 6882 *sp = '\0'; 6883 fclose(input); 6884 6885 remove_underbar(name_buf); 6886 6887 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc", base_cpu, state); 6888 input = fopen(path, "r"); 6889 if (input == NULL) 6890 continue; 6891 if (!fgets(desc, sizeof(desc), input)) 6892 err(1, "%s: failed to read file", path); 6893 6894 fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc); 6895 fclose(input); 6896 } 6897 } 6898 6899 static void dump_sysfs_pstate_config(void) 6900 { 6901 char path[64]; 6902 char driver_buf[64]; 6903 char governor_buf[64]; 6904 FILE *input; 6905 int turbo; 6906 6907 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver", base_cpu); 6908 input = fopen(path, "r"); 6909 if (input == NULL) { 6910 fprintf(outf, "NSFOD %s\n", path); 6911 return; 6912 } 6913 if (!fgets(driver_buf, sizeof(driver_buf), input)) 6914 err(1, "%s: failed to read file", path); 6915 fclose(input); 6916 6917 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", base_cpu); 6918 input = fopen(path, "r"); 6919 if (input == NULL) { 6920 fprintf(outf, "NSFOD %s\n", path); 6921 return; 6922 } 6923 if (!fgets(governor_buf, sizeof(governor_buf), input)) 6924 err(1, "%s: failed to read file", path); 6925 fclose(input); 6926 6927 fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf); 6928 fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf); 6929 6930 sprintf(path, "/sys/devices/system/cpu/cpufreq/boost"); 6931 input = fopen(path, "r"); 6932 if (input != NULL) { 6933 if (fscanf(input, "%d", &turbo) != 1) 6934 err(1, "%s: failed to parse number from file", path); 6935 fprintf(outf, "cpufreq boost: %d\n", turbo); 6936 fclose(input); 6937 } 6938 6939 sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo"); 6940 input = fopen(path, "r"); 6941 if (input != NULL) { 6942 if (fscanf(input, "%d", &turbo) != 1) 6943 err(1, "%s: failed to parse number from file", path); 6944 fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo); 6945 fclose(input); 6946 } 6947 } 6948 6949 /* 6950 * print_epb() 6951 * Decode the ENERGY_PERF_BIAS MSR 6952 */ 6953 int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) 6954 { 6955 char *epb_string; 6956 int cpu, epb; 6957 6958 UNUSED(c); 6959 UNUSED(p); 6960 6961 if (!has_epb) 6962 return 0; 6963 6964 cpu = t->cpu_id; 6965 6966 /* EPB is per-package */ 6967 if (!is_cpu_first_thread_in_package(t, c, p)) 6968 return 0; 6969 6970 if (cpu_migrate(cpu)) { 6971 fprintf(outf, "print_epb: Could not migrate to CPU %d\n", cpu); 6972 return -1; 6973 } 6974 6975 epb = get_epb(cpu); 6976 if (epb < 0) 6977 return 0; 6978 6979 switch (epb) { 6980 case ENERGY_PERF_BIAS_PERFORMANCE: 6981 epb_string = "performance"; 6982 break; 6983 case ENERGY_PERF_BIAS_NORMAL: 6984 epb_string = "balanced"; 6985 break; 6986 case ENERGY_PERF_BIAS_POWERSAVE: 6987 epb_string = "powersave"; 6988 break; 6989 default: 6990 epb_string = "custom"; 6991 break; 6992 } 6993 fprintf(outf, "cpu%d: EPB: %d (%s)\n", cpu, epb, epb_string); 6994 6995 return 0; 6996 } 6997 6998 /* 6999 * print_hwp() 7000 * Decode the MSR_HWP_CAPABILITIES 7001 */ 7002 int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) 7003 { 7004 unsigned long long msr; 7005 int cpu; 7006 7007 UNUSED(c); 7008 UNUSED(p); 7009 7010 if (no_msr) 7011 return 0; 7012 7013 if (!has_hwp) 7014 return 0; 7015 7016 cpu = t->cpu_id; 7017 7018 /* MSR_HWP_CAPABILITIES is per-package */ 7019 if (!is_cpu_first_thread_in_package(t, c, p)) 7020 return 0; 7021 7022 if (cpu_migrate(cpu)) { 7023 fprintf(outf, "print_hwp: Could not migrate to CPU %d\n", cpu); 7024 return -1; 7025 } 7026 7027 if (get_msr(cpu, MSR_PM_ENABLE, &msr)) 7028 return 0; 7029 7030 fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n", cpu, msr, (msr & (1 << 0)) ? "" : "No-"); 7031 7032 /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */ 7033 if ((msr & (1 << 0)) == 0) 7034 return 0; 7035 7036 if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr)) 7037 return 0; 7038 7039 fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx " 7040 "(high %d guar %d eff %d low %d)\n", 7041 cpu, msr, 7042 (unsigned int)HWP_HIGHEST_PERF(msr), 7043 (unsigned int)HWP_GUARANTEED_PERF(msr), 7044 (unsigned int)HWP_MOSTEFFICIENT_PERF(msr), (unsigned int)HWP_LOWEST_PERF(msr)); 7045 7046 if (get_msr(cpu, MSR_HWP_REQUEST, &msr)) 7047 return 0; 7048 7049 fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx " 7050 "(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n", 7051 cpu, msr, 7052 (unsigned int)(((msr) >> 0) & 0xff), 7053 (unsigned int)(((msr) >> 8) & 0xff), 7054 (unsigned int)(((msr) >> 16) & 0xff), 7055 (unsigned int)(((msr) >> 24) & 0xff), 7056 (unsigned int)(((msr) >> 32) & 0xff3), (unsigned int)(((msr) >> 42) & 0x1)); 7057 7058 if (has_hwp_pkg) { 7059 if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr)) 7060 return 0; 7061 7062 fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx " 7063 "(min %d max %d des %d epp 0x%x window 0x%x)\n", 7064 cpu, msr, 7065 (unsigned int)(((msr) >> 0) & 0xff), 7066 (unsigned int)(((msr) >> 8) & 0xff), 7067 (unsigned int)(((msr) >> 16) & 0xff), 7068 (unsigned int)(((msr) >> 24) & 0xff), (unsigned int)(((msr) >> 32) & 0xff3)); 7069 } 7070 if (has_hwp_notify) { 7071 if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr)) 7072 return 0; 7073 7074 fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx " 7075 "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n", 7076 cpu, msr, ((msr) & 0x1) ? "EN" : "Dis", ((msr) & 0x2) ? "EN" : "Dis"); 7077 } 7078 if (get_msr(cpu, MSR_HWP_STATUS, &msr)) 7079 return 0; 7080 7081 fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx " 7082 "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n", 7083 cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x4) ? "" : "No-"); 7084 7085 return 0; 7086 } 7087 7088 /* 7089 * print_perf_limit() 7090 */ 7091 int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p) 7092 { 7093 unsigned long long msr; 7094 int cpu; 7095 7096 UNUSED(c); 7097 UNUSED(p); 7098 7099 if (no_msr) 7100 return 0; 7101 7102 cpu = t->cpu_id; 7103 7104 /* per-package */ 7105 if (!is_cpu_first_thread_in_package(t, c, p)) 7106 return 0; 7107 7108 if (cpu_migrate(cpu)) { 7109 fprintf(outf, "print_perf_limit: Could not migrate to CPU %d\n", cpu); 7110 return -1; 7111 } 7112 7113 if (platform->plr_msrs & PLR_CORE) { 7114 get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr); 7115 fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); 7116 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)", 7117 (msr & 1 << 15) ? "bit15, " : "", 7118 (msr & 1 << 14) ? "bit14, " : "", 7119 (msr & 1 << 13) ? "Transitions, " : "", 7120 (msr & 1 << 12) ? "MultiCoreTurbo, " : "", 7121 (msr & 1 << 11) ? "PkgPwrL2, " : "", 7122 (msr & 1 << 10) ? "PkgPwrL1, " : "", 7123 (msr & 1 << 9) ? "CorePwr, " : "", 7124 (msr & 1 << 8) ? "Amps, " : "", 7125 (msr & 1 << 6) ? "VR-Therm, " : "", 7126 (msr & 1 << 5) ? "Auto-HWP, " : "", 7127 (msr & 1 << 4) ? "Graphics, " : "", 7128 (msr & 1 << 2) ? "bit2, " : "", 7129 (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 0) ? "PROCHOT, " : ""); 7130 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", 7131 (msr & 1 << 31) ? "bit31, " : "", 7132 (msr & 1 << 30) ? "bit30, " : "", 7133 (msr & 1 << 29) ? "Transitions, " : "", 7134 (msr & 1 << 28) ? "MultiCoreTurbo, " : "", 7135 (msr & 1 << 27) ? "PkgPwrL2, " : "", 7136 (msr & 1 << 26) ? "PkgPwrL1, " : "", 7137 (msr & 1 << 25) ? "CorePwr, " : "", 7138 (msr & 1 << 24) ? "Amps, " : "", 7139 (msr & 1 << 22) ? "VR-Therm, " : "", 7140 (msr & 1 << 21) ? "Auto-HWP, " : "", 7141 (msr & 1 << 20) ? "Graphics, " : "", 7142 (msr & 1 << 18) ? "bit18, " : "", 7143 (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : ""); 7144 7145 } 7146 if (platform->plr_msrs & PLR_GFX) { 7147 get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr); 7148 fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); 7149 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)", 7150 (msr & 1 << 0) ? "PROCHOT, " : "", 7151 (msr & 1 << 1) ? "ThermStatus, " : "", 7152 (msr & 1 << 4) ? "Graphics, " : "", 7153 (msr & 1 << 6) ? "VR-Therm, " : "", 7154 (msr & 1 << 8) ? "Amps, " : "", 7155 (msr & 1 << 9) ? "GFXPwr, " : "", 7156 (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); 7157 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n", 7158 (msr & 1 << 16) ? "PROCHOT, " : "", 7159 (msr & 1 << 17) ? "ThermStatus, " : "", 7160 (msr & 1 << 20) ? "Graphics, " : "", 7161 (msr & 1 << 22) ? "VR-Therm, " : "", 7162 (msr & 1 << 24) ? "Amps, " : "", 7163 (msr & 1 << 25) ? "GFXPwr, " : "", 7164 (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); 7165 } 7166 if (platform->plr_msrs & PLR_RING) { 7167 get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr); 7168 fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); 7169 fprintf(outf, " (Active: %s%s%s%s%s%s)", 7170 (msr & 1 << 0) ? "PROCHOT, " : "", 7171 (msr & 1 << 1) ? "ThermStatus, " : "", 7172 (msr & 1 << 6) ? "VR-Therm, " : "", 7173 (msr & 1 << 8) ? "Amps, " : "", 7174 (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); 7175 fprintf(outf, " (Logged: %s%s%s%s%s%s)\n", 7176 (msr & 1 << 16) ? "PROCHOT, " : "", 7177 (msr & 1 << 17) ? "ThermStatus, " : "", 7178 (msr & 1 << 22) ? "VR-Therm, " : "", 7179 (msr & 1 << 24) ? "Amps, " : "", 7180 (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); 7181 } 7182 return 0; 7183 } 7184 7185 #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ 7186 #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ 7187 7188 double get_quirk_tdp(void) 7189 { 7190 if (platform->rapl_quirk_tdp) 7191 return platform->rapl_quirk_tdp; 7192 7193 return 135.0; 7194 } 7195 7196 double get_tdp_intel(void) 7197 { 7198 unsigned long long msr; 7199 7200 if (platform->rapl_msrs & RAPL_PKG_POWER_INFO) 7201 if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr)) 7202 return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; 7203 return get_quirk_tdp(); 7204 } 7205 7206 double get_tdp_amd(void) 7207 { 7208 return get_quirk_tdp(); 7209 } 7210 7211 void rapl_probe_intel(void) 7212 { 7213 unsigned long long msr; 7214 unsigned int time_unit; 7215 double tdp; 7216 const unsigned long long bic_watt_bits = BIC_SysWatt | BIC_PkgWatt | BIC_CorWatt | BIC_RAMWatt | BIC_GFXWatt; 7217 const unsigned long long bic_joules_bits = BIC_Sys_J | BIC_Pkg_J | BIC_Cor_J | BIC_RAM_J | BIC_GFX_J; 7218 7219 if (rapl_joules) 7220 bic_enabled &= ~bic_watt_bits; 7221 else 7222 bic_enabled &= ~bic_joules_bits; 7223 7224 if (!(platform->rapl_msrs & RAPL_PKG_PERF_STATUS)) 7225 bic_enabled &= ~BIC_PKG__; 7226 if (!(platform->rapl_msrs & RAPL_DRAM_PERF_STATUS)) 7227 bic_enabled &= ~BIC_RAM__; 7228 7229 /* units on package 0, verify later other packages match */ 7230 if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr)) 7231 return; 7232 7233 rapl_power_units = 1.0 / (1 << (msr & 0xF)); 7234 if (platform->has_rapl_divisor) 7235 rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000; 7236 else 7237 rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); 7238 7239 if (platform->has_fixed_rapl_unit) 7240 rapl_dram_energy_units = (15.3 / 1000000); 7241 else 7242 rapl_dram_energy_units = rapl_energy_units; 7243 7244 if (platform->has_fixed_rapl_psys_unit) 7245 rapl_psys_energy_units = 1.0; 7246 else 7247 rapl_psys_energy_units = rapl_energy_units; 7248 7249 time_unit = msr >> 16 & 0xF; 7250 if (time_unit == 0) 7251 time_unit = 0xA; 7252 7253 rapl_time_units = 1.0 / (1 << (time_unit)); 7254 7255 tdp = get_tdp_intel(); 7256 7257 rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; 7258 if (!quiet) 7259 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); 7260 } 7261 7262 void rapl_probe_amd(void) 7263 { 7264 unsigned long long msr; 7265 double tdp; 7266 const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt; 7267 const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J; 7268 7269 if (rapl_joules) 7270 bic_enabled &= ~bic_watt_bits; 7271 else 7272 bic_enabled &= ~bic_joules_bits; 7273 7274 if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr)) 7275 return; 7276 7277 rapl_time_units = ldexp(1.0, -(msr >> 16 & 0xf)); 7278 rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f)); 7279 rapl_power_units = ldexp(1.0, -(msr & 0xf)); 7280 7281 tdp = get_tdp_amd(); 7282 7283 rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; 7284 if (!quiet) 7285 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); 7286 } 7287 7288 void print_power_limit_msr(int cpu, unsigned long long msr, char *label) 7289 { 7290 fprintf(outf, "cpu%d: %s: %sabled (%0.3f Watts, %f sec, clamp %sabled)\n", 7291 cpu, label, 7292 ((msr >> 15) & 1) ? "EN" : "DIS", 7293 ((msr >> 0) & 0x7FFF) * rapl_power_units, 7294 (1.0 + (((msr >> 22) & 0x3) / 4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units, 7295 (((msr >> 16) & 1) ? "EN" : "DIS")); 7296 7297 return; 7298 } 7299 7300 int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) 7301 { 7302 unsigned long long msr; 7303 const char *msr_name; 7304 int cpu; 7305 7306 UNUSED(c); 7307 UNUSED(p); 7308 7309 if (!platform->rapl_msrs) 7310 return 0; 7311 7312 /* RAPL counters are per package, so print only for 1st thread/package */ 7313 if (!is_cpu_first_thread_in_package(t, c, p)) 7314 return 0; 7315 7316 cpu = t->cpu_id; 7317 if (cpu_migrate(cpu)) { 7318 fprintf(outf, "print_rapl: Could not migrate to CPU %d\n", cpu); 7319 return -1; 7320 } 7321 7322 if (platform->rapl_msrs & RAPL_AMD_F17H) { 7323 msr_name = "MSR_RAPL_PWR_UNIT"; 7324 if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr)) 7325 return -1; 7326 } else { 7327 msr_name = "MSR_RAPL_POWER_UNIT"; 7328 if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr)) 7329 return -1; 7330 } 7331 7332 fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr, 7333 rapl_power_units, rapl_energy_units, rapl_time_units); 7334 7335 if (platform->rapl_msrs & RAPL_PKG_POWER_INFO) { 7336 7337 if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr)) 7338 return -5; 7339 7340 fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", 7341 cpu, msr, 7342 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, 7343 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, 7344 ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, 7345 ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); 7346 7347 } 7348 if (platform->rapl_msrs & RAPL_PKG) { 7349 7350 if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr)) 7351 return -9; 7352 7353 fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", 7354 cpu, msr, (msr >> 63) & 1 ? "" : "UN"); 7355 7356 print_power_limit_msr(cpu, msr, "PKG Limit #1"); 7357 fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%0.3f Watts, %f* sec, clamp %sabled)\n", 7358 cpu, 7359 ((msr >> 47) & 1) ? "EN" : "DIS", 7360 ((msr >> 32) & 0x7FFF) * rapl_power_units, 7361 (1.0 + (((msr >> 54) & 0x3) / 4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units, 7362 ((msr >> 48) & 1) ? "EN" : "DIS"); 7363 7364 if (get_msr(cpu, MSR_VR_CURRENT_CONFIG, &msr)) 7365 return -9; 7366 7367 fprintf(outf, "cpu%d: MSR_VR_CURRENT_CONFIG: 0x%08llx\n", cpu, msr); 7368 fprintf(outf, "cpu%d: PKG Limit #4: %f Watts (%slocked)\n", 7369 cpu, ((msr >> 0) & 0x1FFF) * rapl_power_units, (msr >> 31) & 1 ? "" : "UN"); 7370 } 7371 7372 if (platform->rapl_msrs & RAPL_DRAM_POWER_INFO) { 7373 if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) 7374 return -6; 7375 7376 fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", 7377 cpu, msr, 7378 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, 7379 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, 7380 ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, 7381 ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); 7382 } 7383 if (platform->rapl_msrs & RAPL_DRAM) { 7384 if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) 7385 return -9; 7386 fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", 7387 cpu, msr, (msr >> 31) & 1 ? "" : "UN"); 7388 7389 print_power_limit_msr(cpu, msr, "DRAM Limit"); 7390 } 7391 if (platform->rapl_msrs & RAPL_CORE_POLICY) { 7392 if (get_msr(cpu, MSR_PP0_POLICY, &msr)) 7393 return -7; 7394 7395 fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); 7396 } 7397 if (platform->rapl_msrs & RAPL_CORE_POWER_LIMIT) { 7398 if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) 7399 return -9; 7400 fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", 7401 cpu, msr, (msr >> 31) & 1 ? "" : "UN"); 7402 print_power_limit_msr(cpu, msr, "Cores Limit"); 7403 } 7404 if (platform->rapl_msrs & RAPL_GFX) { 7405 if (get_msr(cpu, MSR_PP1_POLICY, &msr)) 7406 return -8; 7407 7408 fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF); 7409 7410 if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr)) 7411 return -9; 7412 fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", 7413 cpu, msr, (msr >> 31) & 1 ? "" : "UN"); 7414 print_power_limit_msr(cpu, msr, "GFX Limit"); 7415 } 7416 return 0; 7417 } 7418 7419 /* 7420 * probe_rapl() 7421 * 7422 * sets rapl_power_units, rapl_energy_units, rapl_time_units 7423 */ 7424 void probe_rapl(void) 7425 { 7426 if (!platform->rapl_msrs || no_msr) 7427 return; 7428 7429 if (genuine_intel) 7430 rapl_probe_intel(); 7431 if (authentic_amd || hygon_genuine) 7432 rapl_probe_amd(); 7433 7434 if (quiet) 7435 return; 7436 7437 for_all_cpus(print_rapl, ODD_COUNTERS); 7438 } 7439 7440 /* 7441 * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where 7442 * the Thermal Control Circuit (TCC) activates. 7443 * This is usually equal to tjMax. 7444 * 7445 * Older processors do not have this MSR, so there we guess, 7446 * but also allow cmdline over-ride with -T. 7447 * 7448 * Several MSR temperature values are in units of degrees-C 7449 * below this value, including the Digital Thermal Sensor (DTS), 7450 * Package Thermal Management Sensor (PTM), and thermal event thresholds. 7451 */ 7452 int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p) 7453 { 7454 unsigned long long msr; 7455 unsigned int tcc_default, tcc_offset; 7456 int cpu; 7457 7458 UNUSED(c); 7459 UNUSED(p); 7460 7461 /* tj_max is used only for dts or ptm */ 7462 if (!(do_dts || do_ptm)) 7463 return 0; 7464 7465 /* this is a per-package concept */ 7466 if (!is_cpu_first_thread_in_package(t, c, p)) 7467 return 0; 7468 7469 cpu = t->cpu_id; 7470 if (cpu_migrate(cpu)) { 7471 fprintf(outf, "Could not migrate to CPU %d\n", cpu); 7472 return -1; 7473 } 7474 7475 if (tj_max_override != 0) { 7476 tj_max = tj_max_override; 7477 fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n", cpu, tj_max); 7478 return 0; 7479 } 7480 7481 /* Temperature Target MSR is Nehalem and newer only */ 7482 if (!platform->has_nhm_msrs || no_msr) 7483 goto guess; 7484 7485 if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr)) 7486 goto guess; 7487 7488 tcc_default = (msr >> 16) & 0xFF; 7489 7490 if (!quiet) { 7491 int bits = platform->tcc_offset_bits; 7492 unsigned long long enabled = 0; 7493 7494 if (bits && !get_msr(base_cpu, MSR_PLATFORM_INFO, &enabled)) 7495 enabled = (enabled >> 30) & 1; 7496 7497 if (bits && enabled) { 7498 tcc_offset = (msr >> 24) & GENMASK(bits - 1, 0); 7499 fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n", 7500 cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset); 7501 } else { 7502 fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, tcc_default); 7503 } 7504 } 7505 7506 if (!tcc_default) 7507 goto guess; 7508 7509 tj_max = tcc_default; 7510 7511 return 0; 7512 7513 guess: 7514 tj_max = TJMAX_DEFAULT; 7515 fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", cpu, tj_max); 7516 7517 return 0; 7518 } 7519 7520 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) 7521 { 7522 unsigned long long msr; 7523 unsigned int dts, dts2; 7524 int cpu; 7525 7526 UNUSED(c); 7527 UNUSED(p); 7528 7529 if (no_msr) 7530 return 0; 7531 7532 if (!(do_dts || do_ptm)) 7533 return 0; 7534 7535 cpu = t->cpu_id; 7536 7537 /* DTS is per-core, no need to print for each thread */ 7538 if (!is_cpu_first_thread_in_core(t, c, p)) 7539 return 0; 7540 7541 if (cpu_migrate(cpu)) { 7542 fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu); 7543 return -1; 7544 } 7545 7546 if (do_ptm && is_cpu_first_core_in_package(t, c, p)) { 7547 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) 7548 return 0; 7549 7550 dts = (msr >> 16) & 0x7F; 7551 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts); 7552 7553 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr)) 7554 return 0; 7555 7556 dts = (msr >> 16) & 0x7F; 7557 dts2 = (msr >> 8) & 0x7F; 7558 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", 7559 cpu, msr, tj_max - dts, tj_max - dts2); 7560 } 7561 7562 if (do_dts && debug) { 7563 unsigned int resolution; 7564 7565 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) 7566 return 0; 7567 7568 dts = (msr >> 16) & 0x7F; 7569 resolution = (msr >> 27) & 0xF; 7570 fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", 7571 cpu, msr, tj_max - dts, resolution); 7572 7573 if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr)) 7574 return 0; 7575 7576 dts = (msr >> 16) & 0x7F; 7577 dts2 = (msr >> 8) & 0x7F; 7578 fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", 7579 cpu, msr, tj_max - dts, tj_max - dts2); 7580 } 7581 7582 return 0; 7583 } 7584 7585 void probe_thermal(void) 7586 { 7587 if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK)) 7588 BIC_PRESENT(BIC_CORE_THROT_CNT); 7589 else 7590 BIC_NOT_PRESENT(BIC_CORE_THROT_CNT); 7591 7592 for_all_cpus(set_temperature_target, ODD_COUNTERS); 7593 7594 if (quiet) 7595 return; 7596 7597 for_all_cpus(print_thermal, ODD_COUNTERS); 7598 } 7599 7600 int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p) 7601 { 7602 unsigned int eax, ebx, ecx, edx; 7603 7604 UNUSED(c); 7605 UNUSED(p); 7606 7607 if (!genuine_intel) 7608 return 0; 7609 7610 if (cpu_migrate(t->cpu_id)) { 7611 fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id); 7612 return -1; 7613 } 7614 7615 if (max_level < 0x1a) 7616 return 0; 7617 7618 __cpuid(0x1a, eax, ebx, ecx, edx); 7619 eax = (eax >> 24) & 0xFF; 7620 if (eax == 0x20) 7621 t->is_atom = true; 7622 return 0; 7623 } 7624 7625 void decode_feature_control_msr(void) 7626 { 7627 unsigned long long msr; 7628 7629 if (no_msr) 7630 return; 7631 7632 if (!get_msr(base_cpu, MSR_IA32_FEAT_CTL, &msr)) 7633 fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n", 7634 base_cpu, msr, msr & FEAT_CTL_LOCKED ? "" : "UN-", msr & (1 << 18) ? "SGX" : ""); 7635 } 7636 7637 void decode_misc_enable_msr(void) 7638 { 7639 unsigned long long msr; 7640 7641 if (no_msr) 7642 return; 7643 7644 if (!genuine_intel) 7645 return; 7646 7647 if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr)) 7648 fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n", 7649 base_cpu, msr, 7650 msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-", 7651 msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-", 7652 msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-", 7653 msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "", 7654 msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : ""); 7655 } 7656 7657 void decode_misc_feature_control(void) 7658 { 7659 unsigned long long msr; 7660 7661 if (no_msr) 7662 return; 7663 7664 if (!platform->has_msr_misc_feature_control) 7665 return; 7666 7667 if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr)) 7668 fprintf(outf, 7669 "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n", 7670 base_cpu, msr, msr & (0 << 0) ? "No-" : "", msr & (1 << 0) ? "No-" : "", 7671 msr & (2 << 0) ? "No-" : "", msr & (3 << 0) ? "No-" : ""); 7672 } 7673 7674 /* 7675 * Decode MSR_MISC_PWR_MGMT 7676 * 7677 * Decode the bits according to the Nehalem documentation 7678 * bit[0] seems to continue to have same meaning going forward 7679 * bit[1] less so... 7680 */ 7681 void decode_misc_pwr_mgmt_msr(void) 7682 { 7683 unsigned long long msr; 7684 7685 if (no_msr) 7686 return; 7687 7688 if (!platform->has_msr_misc_pwr_mgmt) 7689 return; 7690 7691 if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr)) 7692 fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n", 7693 base_cpu, msr, 7694 msr & (1 << 0) ? "DIS" : "EN", msr & (1 << 1) ? "EN" : "DIS", msr & (1 << 8) ? "EN" : "DIS"); 7695 } 7696 7697 /* 7698 * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG 7699 * 7700 * This MSRs are present on Silvermont processors, 7701 * Intel Atom processor E3000 series (Baytrail), and friends. 7702 */ 7703 void decode_c6_demotion_policy_msr(void) 7704 { 7705 unsigned long long msr; 7706 7707 if (no_msr) 7708 return; 7709 7710 if (!platform->has_msr_c6_demotion_policy_config) 7711 return; 7712 7713 if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr)) 7714 fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n", 7715 base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS"); 7716 7717 if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr)) 7718 fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n", 7719 base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS"); 7720 } 7721 7722 void print_dev_latency(void) 7723 { 7724 char *path = "/dev/cpu_dma_latency"; 7725 int fd; 7726 int value; 7727 int retval; 7728 7729 fd = open(path, O_RDONLY); 7730 if (fd < 0) { 7731 if (debug) 7732 warnx("Read %s failed", path); 7733 return; 7734 } 7735 7736 retval = read(fd, (void *)&value, sizeof(int)); 7737 if (retval != sizeof(int)) { 7738 warn("read failed %s", path); 7739 close(fd); 7740 return; 7741 } 7742 fprintf(outf, "/dev/cpu_dma_latency: %d usec (%s)\n", value, value == 2000000000 ? "default" : "constrained"); 7743 7744 close(fd); 7745 } 7746 7747 static int has_instr_count_access(void) 7748 { 7749 int fd; 7750 int has_access; 7751 7752 if (no_perf) 7753 return 0; 7754 7755 fd = open_perf_counter(base_cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0); 7756 has_access = fd != -1; 7757 7758 if (fd != -1) 7759 close(fd); 7760 7761 if (!has_access) 7762 warnx("Failed to access %s. Some of the counters may not be available\n" 7763 "\tRun as root to enable them or use %s to disable the access explicitly", 7764 "instructions retired perf counter", "--no-perf"); 7765 7766 return has_access; 7767 } 7768 7769 int add_rapl_perf_counter_(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai, 7770 double *scale_, enum rapl_unit *unit_) 7771 { 7772 if (no_perf) 7773 return -1; 7774 7775 const double scale = read_perf_scale(cai->perf_subsys, cai->perf_name); 7776 7777 if (scale == 0.0) 7778 return -1; 7779 7780 const enum rapl_unit unit = read_perf_rapl_unit(cai->perf_subsys, cai->perf_name); 7781 7782 if (unit == RAPL_UNIT_INVALID) 7783 return -1; 7784 7785 const unsigned int rapl_type = read_perf_type(cai->perf_subsys); 7786 const unsigned int rapl_energy_pkg_config = read_perf_config(cai->perf_subsys, cai->perf_name); 7787 7788 const int fd_counter = 7789 open_perf_counter(cpu, rapl_type, rapl_energy_pkg_config, rci->fd_perf, PERF_FORMAT_GROUP); 7790 if (fd_counter == -1) 7791 return -1; 7792 7793 /* If it's the first counter opened, make it a group descriptor */ 7794 if (rci->fd_perf == -1) 7795 rci->fd_perf = fd_counter; 7796 7797 *scale_ = scale; 7798 *unit_ = unit; 7799 return fd_counter; 7800 } 7801 7802 int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai, 7803 double *scale, enum rapl_unit *unit) 7804 { 7805 int ret = add_rapl_perf_counter_(cpu, rci, cai, scale, unit); 7806 7807 if (debug >= 2) 7808 fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu); 7809 7810 return ret; 7811 } 7812 7813 /* 7814 * Linux-perf manages the HW instructions-retired counter 7815 * by enabling when requested, and hiding rollover 7816 */ 7817 void linux_perf_init(void) 7818 { 7819 if (access("/proc/sys/kernel/perf_event_paranoid", F_OK)) 7820 return; 7821 7822 if (BIC_IS_ENABLED(BIC_IPC) && has_aperf) { 7823 fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int)); 7824 if (fd_instr_count_percpu == NULL) 7825 err(-1, "calloc fd_instr_count_percpu"); 7826 } 7827 } 7828 7829 void rapl_perf_init(void) 7830 { 7831 const unsigned int num_domains = (platform->has_per_core_rapl ? topo.max_core_id : topo.max_package_id) + 1; 7832 bool *domain_visited = calloc(num_domains, sizeof(bool)); 7833 7834 rapl_counter_info_perdomain = calloc(num_domains, sizeof(*rapl_counter_info_perdomain)); 7835 if (rapl_counter_info_perdomain == NULL) 7836 err(-1, "calloc rapl_counter_info_percpu"); 7837 rapl_counter_info_perdomain_size = num_domains; 7838 7839 /* 7840 * Initialize rapl_counter_info_percpu 7841 */ 7842 for (unsigned int domain_id = 0; domain_id < num_domains; ++domain_id) { 7843 struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[domain_id]; 7844 7845 rci->fd_perf = -1; 7846 for (size_t i = 0; i < NUM_RAPL_COUNTERS; ++i) { 7847 rci->data[i] = 0; 7848 rci->source[i] = COUNTER_SOURCE_NONE; 7849 } 7850 } 7851 7852 /* 7853 * Open/probe the counters 7854 * If can't get it via perf, fallback to MSR 7855 */ 7856 for (size_t i = 0; i < ARRAY_SIZE(rapl_counter_arch_infos); ++i) { 7857 7858 const struct rapl_counter_arch_info *const cai = &rapl_counter_arch_infos[i]; 7859 bool has_counter = 0; 7860 double scale; 7861 enum rapl_unit unit; 7862 unsigned int next_domain; 7863 7864 memset(domain_visited, 0, num_domains * sizeof(*domain_visited)); 7865 7866 for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) { 7867 7868 if (cpu_is_not_allowed(cpu)) 7869 continue; 7870 7871 /* Skip already seen and handled RAPL domains */ 7872 next_domain = 7873 platform->has_per_core_rapl ? cpus[cpu].physical_core_id : cpus[cpu].physical_package_id; 7874 7875 assert(next_domain < num_domains); 7876 7877 if (domain_visited[next_domain]) 7878 continue; 7879 7880 domain_visited[next_domain] = 1; 7881 7882 if ((cai->flags & RAPL_COUNTER_FLAG_PLATFORM_COUNTER) && (cpu != base_cpu)) 7883 continue; 7884 7885 struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[next_domain]; 7886 7887 /* Check if the counter is enabled and accessible */ 7888 if (BIC_IS_ENABLED(cai->bic) && (platform->rapl_msrs & cai->feature_mask)) { 7889 7890 /* Use perf API for this counter */ 7891 if (!no_perf && cai->perf_name 7892 && add_rapl_perf_counter(cpu, rci, cai, &scale, &unit) != -1) { 7893 rci->source[cai->rci_index] = COUNTER_SOURCE_PERF; 7894 rci->scale[cai->rci_index] = scale * cai->compat_scale; 7895 rci->unit[cai->rci_index] = unit; 7896 rci->flags[cai->rci_index] = cai->flags; 7897 7898 /* Use MSR for this counter */ 7899 } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) { 7900 rci->source[cai->rci_index] = COUNTER_SOURCE_MSR; 7901 rci->msr[cai->rci_index] = cai->msr; 7902 rci->msr_mask[cai->rci_index] = cai->msr_mask; 7903 rci->msr_shift[cai->rci_index] = cai->msr_shift; 7904 rci->unit[cai->rci_index] = RAPL_UNIT_JOULES; 7905 rci->scale[cai->rci_index] = *cai->platform_rapl_msr_scale * cai->compat_scale; 7906 rci->flags[cai->rci_index] = cai->flags; 7907 } 7908 } 7909 7910 if (rci->source[cai->rci_index] != COUNTER_SOURCE_NONE) 7911 has_counter = 1; 7912 } 7913 7914 /* If any CPU has access to the counter, make it present */ 7915 if (has_counter) 7916 BIC_PRESENT(cai->bic); 7917 } 7918 7919 free(domain_visited); 7920 } 7921 7922 /* Assumes msr_counter_info is populated */ 7923 static int has_amperf_access(void) 7924 { 7925 return msr_counter_arch_infos[MSR_ARCH_INFO_APERF_INDEX].present && 7926 msr_counter_arch_infos[MSR_ARCH_INFO_MPERF_INDEX].present; 7927 } 7928 7929 int *get_cstate_perf_group_fd(struct cstate_counter_info_t *cci, const char *group_name) 7930 { 7931 if (strcmp(group_name, "cstate_core") == 0) 7932 return &cci->fd_perf_core; 7933 7934 if (strcmp(group_name, "cstate_pkg") == 0) 7935 return &cci->fd_perf_pkg; 7936 7937 return NULL; 7938 } 7939 7940 int add_cstate_perf_counter_(int cpu, struct cstate_counter_info_t *cci, const struct cstate_counter_arch_info *cai) 7941 { 7942 if (no_perf) 7943 return -1; 7944 7945 int *pfd_group = get_cstate_perf_group_fd(cci, cai->perf_subsys); 7946 7947 if (pfd_group == NULL) 7948 return -1; 7949 7950 const unsigned int type = read_perf_type(cai->perf_subsys); 7951 const unsigned int config = read_perf_config(cai->perf_subsys, cai->perf_name); 7952 7953 const int fd_counter = open_perf_counter(cpu, type, config, *pfd_group, PERF_FORMAT_GROUP); 7954 7955 if (fd_counter == -1) 7956 return -1; 7957 7958 /* If it's the first counter opened, make it a group descriptor */ 7959 if (*pfd_group == -1) 7960 *pfd_group = fd_counter; 7961 7962 return fd_counter; 7963 } 7964 7965 int add_cstate_perf_counter(int cpu, struct cstate_counter_info_t *cci, const struct cstate_counter_arch_info *cai) 7966 { 7967 int ret = add_cstate_perf_counter_(cpu, cci, cai); 7968 7969 if (debug >= 2) 7970 fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu); 7971 7972 return ret; 7973 } 7974 7975 int add_msr_perf_counter_(int cpu, struct msr_counter_info_t *cci, const struct msr_counter_arch_info *cai) 7976 { 7977 if (no_perf) 7978 return -1; 7979 7980 const unsigned int type = read_perf_type(cai->perf_subsys); 7981 const unsigned int config = read_perf_config(cai->perf_subsys, cai->perf_name); 7982 7983 const int fd_counter = open_perf_counter(cpu, type, config, cci->fd_perf, PERF_FORMAT_GROUP); 7984 7985 if (fd_counter == -1) 7986 return -1; 7987 7988 /* If it's the first counter opened, make it a group descriptor */ 7989 if (cci->fd_perf == -1) 7990 cci->fd_perf = fd_counter; 7991 7992 return fd_counter; 7993 } 7994 7995 int add_msr_perf_counter(int cpu, struct msr_counter_info_t *cci, const struct msr_counter_arch_info *cai) 7996 { 7997 int ret = add_msr_perf_counter_(cpu, cci, cai); 7998 7999 if (debug) 8000 fprintf(stderr, "%s: %s/%s: %d (cpu: %d)\n", __func__, cai->perf_subsys, cai->perf_name, ret, cpu); 8001 8002 return ret; 8003 } 8004 8005 void msr_perf_init_(void) 8006 { 8007 const int mci_num = topo.max_cpu_num + 1; 8008 8009 msr_counter_info = calloc(mci_num, sizeof(*msr_counter_info)); 8010 if (!msr_counter_info) 8011 err(1, "calloc msr_counter_info"); 8012 msr_counter_info_size = mci_num; 8013 8014 for (int cpu = 0; cpu < mci_num; ++cpu) 8015 msr_counter_info[cpu].fd_perf = -1; 8016 8017 for (int cidx = 0; cidx < NUM_MSR_COUNTERS; ++cidx) { 8018 8019 struct msr_counter_arch_info *cai = &msr_counter_arch_infos[cidx]; 8020 8021 cai->present = false; 8022 8023 for (int cpu = 0; cpu < mci_num; ++cpu) { 8024 8025 struct msr_counter_info_t *const cci = &msr_counter_info[cpu]; 8026 8027 if (cpu_is_not_allowed(cpu)) 8028 continue; 8029 8030 if (cai->needed) { 8031 /* Use perf API for this counter */ 8032 if (!no_perf && cai->perf_name && add_msr_perf_counter(cpu, cci, cai) != -1) { 8033 cci->source[cai->rci_index] = COUNTER_SOURCE_PERF; 8034 cai->present = true; 8035 8036 /* User MSR for this counter */ 8037 } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) { 8038 cci->source[cai->rci_index] = COUNTER_SOURCE_MSR; 8039 cci->msr[cai->rci_index] = cai->msr; 8040 cci->msr_mask[cai->rci_index] = cai->msr_mask; 8041 cai->present = true; 8042 } 8043 } 8044 } 8045 } 8046 } 8047 8048 /* Initialize data for reading perf counters from the MSR group. */ 8049 void msr_perf_init(void) 8050 { 8051 bool need_amperf = false, need_smi = false; 8052 const bool need_soft_c1 = (!platform->has_msr_core_c1_res) && (platform->supported_cstates & CC1); 8053 8054 need_amperf = BIC_IS_ENABLED(BIC_Avg_MHz) || BIC_IS_ENABLED(BIC_Busy) || BIC_IS_ENABLED(BIC_Bzy_MHz) 8055 || BIC_IS_ENABLED(BIC_IPC) || need_soft_c1; 8056 8057 if (BIC_IS_ENABLED(BIC_SMI)) 8058 need_smi = true; 8059 8060 /* Enable needed counters */ 8061 msr_counter_arch_infos[MSR_ARCH_INFO_APERF_INDEX].needed = need_amperf; 8062 msr_counter_arch_infos[MSR_ARCH_INFO_MPERF_INDEX].needed = need_amperf; 8063 msr_counter_arch_infos[MSR_ARCH_INFO_SMI_INDEX].needed = need_smi; 8064 8065 msr_perf_init_(); 8066 8067 const bool has_amperf = has_amperf_access(); 8068 const bool has_smi = msr_counter_arch_infos[MSR_ARCH_INFO_SMI_INDEX].present; 8069 8070 has_aperf_access = has_amperf; 8071 8072 if (has_amperf) { 8073 BIC_PRESENT(BIC_Avg_MHz); 8074 BIC_PRESENT(BIC_Busy); 8075 BIC_PRESENT(BIC_Bzy_MHz); 8076 BIC_PRESENT(BIC_SMI); 8077 } 8078 8079 if (has_smi) 8080 BIC_PRESENT(BIC_SMI); 8081 } 8082 8083 void cstate_perf_init_(bool soft_c1) 8084 { 8085 bool has_counter; 8086 bool *cores_visited = NULL, *pkg_visited = NULL; 8087 const int cores_visited_elems = topo.max_core_id + 1; 8088 const int pkg_visited_elems = topo.max_package_id + 1; 8089 const int cci_num = topo.max_cpu_num + 1; 8090 8091 ccstate_counter_info = calloc(cci_num, sizeof(*ccstate_counter_info)); 8092 if (!ccstate_counter_info) 8093 err(1, "calloc ccstate_counter_arch_info"); 8094 ccstate_counter_info_size = cci_num; 8095 8096 cores_visited = calloc(cores_visited_elems, sizeof(*cores_visited)); 8097 if (!cores_visited) 8098 err(1, "calloc cores_visited"); 8099 8100 pkg_visited = calloc(pkg_visited_elems, sizeof(*pkg_visited)); 8101 if (!pkg_visited) 8102 err(1, "calloc pkg_visited"); 8103 8104 /* Initialize cstate_counter_info_percpu */ 8105 for (int cpu = 0; cpu < cci_num; ++cpu) { 8106 ccstate_counter_info[cpu].fd_perf_core = -1; 8107 ccstate_counter_info[cpu].fd_perf_pkg = -1; 8108 } 8109 8110 for (int cidx = 0; cidx < NUM_CSTATE_COUNTERS; ++cidx) { 8111 has_counter = false; 8112 memset(cores_visited, 0, cores_visited_elems * sizeof(*cores_visited)); 8113 memset(pkg_visited, 0, pkg_visited_elems * sizeof(*pkg_visited)); 8114 8115 const struct cstate_counter_arch_info *cai = &ccstate_counter_arch_infos[cidx]; 8116 8117 for (int cpu = 0; cpu < cci_num; ++cpu) { 8118 8119 struct cstate_counter_info_t *const cci = &ccstate_counter_info[cpu]; 8120 8121 if (cpu_is_not_allowed(cpu)) 8122 continue; 8123 8124 const int core_id = cpus[cpu].physical_core_id; 8125 const int pkg_id = cpus[cpu].physical_package_id; 8126 8127 assert(core_id < cores_visited_elems); 8128 assert(pkg_id < pkg_visited_elems); 8129 8130 const bool per_thread = cai->flags & CSTATE_COUNTER_FLAG_COLLECT_PER_THREAD; 8131 const bool per_core = cai->flags & CSTATE_COUNTER_FLAG_COLLECT_PER_CORE; 8132 8133 if (!per_thread && cores_visited[core_id]) 8134 continue; 8135 8136 if (!per_core && pkg_visited[pkg_id]) 8137 continue; 8138 8139 const bool counter_needed = BIC_IS_ENABLED(cai->bic) || 8140 (soft_c1 && (cai->flags & CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY)); 8141 const bool counter_supported = (platform->supported_cstates & cai->feature_mask); 8142 8143 if (counter_needed && counter_supported) { 8144 /* Use perf API for this counter */ 8145 if (!no_perf && cai->perf_name && add_cstate_perf_counter(cpu, cci, cai) != -1) { 8146 8147 cci->source[cai->rci_index] = COUNTER_SOURCE_PERF; 8148 8149 /* User MSR for this counter */ 8150 } else if (!no_msr && cai->msr && pkg_cstate_limit >= cai->pkg_cstate_limit 8151 && probe_msr(cpu, cai->msr) == 0) { 8152 cci->source[cai->rci_index] = COUNTER_SOURCE_MSR; 8153 cci->msr[cai->rci_index] = cai->msr; 8154 } 8155 } 8156 8157 if (cci->source[cai->rci_index] != COUNTER_SOURCE_NONE) { 8158 has_counter = true; 8159 cores_visited[core_id] = true; 8160 pkg_visited[pkg_id] = true; 8161 } 8162 } 8163 8164 /* If any CPU has access to the counter, make it present */ 8165 if (has_counter) 8166 BIC_PRESENT(cai->bic); 8167 } 8168 8169 free(cores_visited); 8170 free(pkg_visited); 8171 } 8172 8173 void cstate_perf_init(void) 8174 { 8175 /* 8176 * If we don't have a C1 residency MSR, we calculate it "in software", 8177 * but we need APERF, MPERF too. 8178 */ 8179 const bool soft_c1 = !platform->has_msr_core_c1_res && has_amperf_access() 8180 && platform->supported_cstates & CC1; 8181 8182 if (soft_c1) 8183 BIC_PRESENT(BIC_CPU_c1); 8184 8185 cstate_perf_init_(soft_c1); 8186 } 8187 8188 void probe_cstates(void) 8189 { 8190 probe_cst_limit(); 8191 8192 if (platform->has_msr_module_c6_res_ms) 8193 BIC_PRESENT(BIC_Mod_c6); 8194 8195 if (platform->has_ext_cst_msrs && !no_msr) { 8196 BIC_PRESENT(BIC_Totl_c0); 8197 BIC_PRESENT(BIC_Any_c0); 8198 BIC_PRESENT(BIC_GFX_c0); 8199 BIC_PRESENT(BIC_CPUGFX); 8200 } 8201 8202 if (quiet) 8203 return; 8204 8205 dump_power_ctl(); 8206 dump_cst_cfg(); 8207 decode_c6_demotion_policy_msr(); 8208 print_dev_latency(); 8209 dump_sysfs_cstate_config(); 8210 print_irtl(); 8211 } 8212 8213 void probe_lpi(void) 8214 { 8215 if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK)) 8216 BIC_PRESENT(BIC_CPU_LPI); 8217 else 8218 BIC_NOT_PRESENT(BIC_CPU_LPI); 8219 8220 if (!access(sys_lpi_file_sysfs, R_OK)) { 8221 sys_lpi_file = sys_lpi_file_sysfs; 8222 BIC_PRESENT(BIC_SYS_LPI); 8223 } else if (!access(sys_lpi_file_debugfs, R_OK)) { 8224 sys_lpi_file = sys_lpi_file_debugfs; 8225 BIC_PRESENT(BIC_SYS_LPI); 8226 } else { 8227 sys_lpi_file_sysfs = NULL; 8228 BIC_NOT_PRESENT(BIC_SYS_LPI); 8229 } 8230 8231 } 8232 8233 void probe_pstates(void) 8234 { 8235 probe_bclk(); 8236 8237 if (quiet) 8238 return; 8239 8240 dump_platform_info(); 8241 dump_turbo_ratio_info(); 8242 dump_sysfs_pstate_config(); 8243 decode_misc_pwr_mgmt_msr(); 8244 8245 for_all_cpus(print_hwp, ODD_COUNTERS); 8246 for_all_cpus(print_epb, ODD_COUNTERS); 8247 for_all_cpus(print_perf_limit, ODD_COUNTERS); 8248 } 8249 8250 void process_cpuid() 8251 { 8252 unsigned int eax, ebx, ecx, edx; 8253 unsigned int fms, family, model, stepping, ecx_flags, edx_flags; 8254 unsigned long long ucode_patch = 0; 8255 bool ucode_patch_valid = false; 8256 8257 eax = ebx = ecx = edx = 0; 8258 8259 __cpuid(0, max_level, ebx, ecx, edx); 8260 8261 if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) 8262 genuine_intel = 1; 8263 else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) 8264 authentic_amd = 1; 8265 else if (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e) 8266 hygon_genuine = 1; 8267 8268 if (!quiet) 8269 fprintf(outf, "CPUID(0): %.4s%.4s%.4s 0x%x CPUID levels\n", 8270 (char *)&ebx, (char *)&edx, (char *)&ecx, max_level); 8271 8272 __cpuid(1, fms, ebx, ecx, edx); 8273 family = (fms >> 8) & 0xf; 8274 model = (fms >> 4) & 0xf; 8275 stepping = fms & 0xf; 8276 if (family == 0xf) 8277 family += (fms >> 20) & 0xff; 8278 if (family >= 6) 8279 model += ((fms >> 16) & 0xf) << 4; 8280 ecx_flags = ecx; 8281 edx_flags = edx; 8282 8283 if (!no_msr) { 8284 if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch)) 8285 warnx("get_msr(UCODE)"); 8286 else 8287 ucode_patch_valid = true; 8288 } 8289 8290 /* 8291 * check max extended function levels of CPUID. 8292 * This is needed to check for invariant TSC. 8293 * This check is valid for both Intel and AMD. 8294 */ 8295 ebx = ecx = edx = 0; 8296 __cpuid(0x80000000, max_extended_level, ebx, ecx, edx); 8297 8298 if (!quiet) { 8299 fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d)", 8300 family, model, stepping, family, model, stepping); 8301 if (ucode_patch_valid) 8302 fprintf(outf, " microcode 0x%x", (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF)); 8303 fputc('\n', outf); 8304 8305 fprintf(outf, "CPUID(0x80000000): max_extended_levels: 0x%x\n", max_extended_level); 8306 fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n", 8307 ecx_flags & (1 << 0) ? "SSE3" : "-", 8308 ecx_flags & (1 << 3) ? "MONITOR" : "-", 8309 ecx_flags & (1 << 6) ? "SMX" : "-", 8310 ecx_flags & (1 << 7) ? "EIST" : "-", 8311 ecx_flags & (1 << 8) ? "TM2" : "-", 8312 edx_flags & (1 << 4) ? "TSC" : "-", 8313 edx_flags & (1 << 5) ? "MSR" : "-", 8314 edx_flags & (1 << 22) ? "ACPI-TM" : "-", 8315 edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-"); 8316 } 8317 8318 probe_platform_features(family, model); 8319 8320 if (!(edx_flags & (1 << 5))) 8321 errx(1, "CPUID: no MSR"); 8322 8323 if (max_extended_level >= 0x80000007) { 8324 8325 /* 8326 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 8327 * this check is valid for both Intel and AMD 8328 */ 8329 __cpuid(0x80000007, eax, ebx, ecx, edx); 8330 has_invariant_tsc = edx & (1 << 8); 8331 } 8332 8333 /* 8334 * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0 8335 * this check is valid for both Intel and AMD 8336 */ 8337 8338 __cpuid(0x6, eax, ebx, ecx, edx); 8339 has_aperf = ecx & (1 << 0); 8340 do_dts = eax & (1 << 0); 8341 if (do_dts) 8342 BIC_PRESENT(BIC_CoreTmp); 8343 has_turbo = eax & (1 << 1); 8344 do_ptm = eax & (1 << 6); 8345 if (do_ptm) 8346 BIC_PRESENT(BIC_PkgTmp); 8347 has_hwp = eax & (1 << 7); 8348 has_hwp_notify = eax & (1 << 8); 8349 has_hwp_activity_window = eax & (1 << 9); 8350 has_hwp_epp = eax & (1 << 10); 8351 has_hwp_pkg = eax & (1 << 11); 8352 has_epb = ecx & (1 << 3); 8353 8354 if (!quiet) 8355 fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, " 8356 "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n", 8357 has_aperf ? "" : "No-", 8358 has_turbo ? "" : "No-", 8359 do_dts ? "" : "No-", 8360 do_ptm ? "" : "No-", 8361 has_hwp ? "" : "No-", 8362 has_hwp_notify ? "" : "No-", 8363 has_hwp_activity_window ? "" : "No-", 8364 has_hwp_epp ? "" : "No-", has_hwp_pkg ? "" : "No-", has_epb ? "" : "No-"); 8365 8366 if (!quiet) 8367 decode_misc_enable_msr(); 8368 8369 if (max_level >= 0x7 && !quiet) { 8370 int has_sgx; 8371 8372 ecx = 0; 8373 8374 __cpuid_count(0x7, 0, eax, ebx, ecx, edx); 8375 8376 has_sgx = ebx & (1 << 2); 8377 8378 is_hybrid = edx & (1 << 15); 8379 8380 fprintf(outf, "CPUID(7): %sSGX %sHybrid\n", has_sgx ? "" : "No-", is_hybrid ? "" : "No-"); 8381 8382 if (has_sgx) 8383 decode_feature_control_msr(); 8384 } 8385 8386 if (max_level >= 0x15) { 8387 unsigned int eax_crystal; 8388 unsigned int ebx_tsc; 8389 8390 /* 8391 * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz 8392 */ 8393 eax_crystal = ebx_tsc = crystal_hz = edx = 0; 8394 __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx); 8395 8396 if (ebx_tsc != 0) { 8397 if (!quiet && (ebx != 0)) 8398 fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", 8399 eax_crystal, ebx_tsc, crystal_hz); 8400 8401 if (crystal_hz == 0) 8402 crystal_hz = platform->crystal_freq; 8403 8404 if (crystal_hz) { 8405 tsc_hz = (unsigned long long)crystal_hz *ebx_tsc / eax_crystal; 8406 if (!quiet) 8407 fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n", 8408 tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal); 8409 } 8410 } 8411 } 8412 if (max_level >= 0x16) { 8413 unsigned int base_mhz, max_mhz, bus_mhz, edx; 8414 8415 /* 8416 * CPUID 16H Base MHz, Max MHz, Bus MHz 8417 */ 8418 base_mhz = max_mhz = bus_mhz = edx = 0; 8419 8420 __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx); 8421 8422 bclk = bus_mhz; 8423 8424 base_hz = base_mhz * 1000000; 8425 has_base_hz = 1; 8426 8427 if (platform->enable_tsc_tweak) 8428 tsc_tweak = base_hz / tsc_hz; 8429 8430 if (!quiet) 8431 fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n", 8432 base_mhz, max_mhz, bus_mhz); 8433 } 8434 8435 if (has_aperf) 8436 aperf_mperf_multiplier = platform->need_perf_multiplier ? 1024 : 1; 8437 8438 BIC_PRESENT(BIC_IRQ); 8439 BIC_PRESENT(BIC_NMI); 8440 BIC_PRESENT(BIC_TSC_MHz); 8441 } 8442 8443 static void counter_info_init(void) 8444 { 8445 for (int i = 0; i < NUM_CSTATE_COUNTERS; ++i) { 8446 struct cstate_counter_arch_info *const cai = &ccstate_counter_arch_infos[i]; 8447 8448 if (platform->has_msr_knl_core_c6_residency && cai->msr == MSR_CORE_C6_RESIDENCY) 8449 cai->msr = MSR_KNL_CORE_C6_RESIDENCY; 8450 8451 if (!platform->has_msr_core_c1_res && cai->msr == MSR_CORE_C1_RES) 8452 cai->msr = 0; 8453 8454 if (platform->has_msr_atom_pkg_c6_residency && cai->msr == MSR_PKG_C6_RESIDENCY) 8455 cai->msr = MSR_ATOM_PKG_C6_RESIDENCY; 8456 } 8457 8458 for (int i = 0; i < NUM_MSR_COUNTERS; ++i) { 8459 msr_counter_arch_infos[i].present = false; 8460 msr_counter_arch_infos[i].needed = false; 8461 } 8462 } 8463 8464 void probe_pm_features(void) 8465 { 8466 probe_pstates(); 8467 8468 probe_cstates(); 8469 8470 probe_lpi(); 8471 8472 probe_intel_uncore_frequency(); 8473 8474 probe_graphics(); 8475 8476 probe_rapl(); 8477 8478 probe_thermal(); 8479 8480 if (platform->has_nhm_msrs && !no_msr) 8481 BIC_PRESENT(BIC_SMI); 8482 8483 if (!quiet) 8484 decode_misc_feature_control(); 8485 } 8486 8487 /* 8488 * in /dev/cpu/ return success for names that are numbers 8489 * ie. filter out ".", "..", "microcode". 8490 */ 8491 int dir_filter(const struct dirent *dirp) 8492 { 8493 if (isdigit(dirp->d_name[0])) 8494 return 1; 8495 else 8496 return 0; 8497 } 8498 8499 char *possible_file = "/sys/devices/system/cpu/possible"; 8500 char possible_buf[1024]; 8501 8502 int initialize_cpu_possible_set(void) 8503 { 8504 FILE *fp; 8505 8506 fp = fopen(possible_file, "r"); 8507 if (!fp) { 8508 warn("open %s", possible_file); 8509 return -1; 8510 } 8511 if (fread(possible_buf, sizeof(char), 1024, fp) == 0) { 8512 warn("read %s", possible_file); 8513 goto err; 8514 } 8515 if (parse_cpu_str(possible_buf, cpu_possible_set, cpu_possible_setsize)) { 8516 warnx("%s: cpu str malformat %s\n", possible_file, cpu_effective_str); 8517 goto err; 8518 } 8519 return 0; 8520 8521 err: 8522 fclose(fp); 8523 return -1; 8524 } 8525 8526 void topology_probe(bool startup) 8527 { 8528 int i; 8529 int max_core_id = 0; 8530 int max_package_id = 0; 8531 int max_siblings = 0; 8532 8533 /* Initialize num_cpus, max_cpu_num */ 8534 set_max_cpu_num(); 8535 topo.num_cpus = 0; 8536 for_all_proc_cpus(count_cpus); 8537 if (!summary_only) 8538 BIC_PRESENT(BIC_CPU); 8539 8540 if (debug > 1) 8541 fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); 8542 8543 cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology)); 8544 if (cpus == NULL) 8545 err(1, "calloc cpus"); 8546 8547 /* 8548 * Allocate and initialize cpu_present_set 8549 */ 8550 cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1)); 8551 if (cpu_present_set == NULL) 8552 err(3, "CPU_ALLOC"); 8553 cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 8554 CPU_ZERO_S(cpu_present_setsize, cpu_present_set); 8555 for_all_proc_cpus(mark_cpu_present); 8556 8557 /* 8558 * Allocate and initialize cpu_possible_set 8559 */ 8560 cpu_possible_set = CPU_ALLOC((topo.max_cpu_num + 1)); 8561 if (cpu_possible_set == NULL) 8562 err(3, "CPU_ALLOC"); 8563 cpu_possible_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 8564 CPU_ZERO_S(cpu_possible_setsize, cpu_possible_set); 8565 initialize_cpu_possible_set(); 8566 8567 /* 8568 * Allocate and initialize cpu_effective_set 8569 */ 8570 cpu_effective_set = CPU_ALLOC((topo.max_cpu_num + 1)); 8571 if (cpu_effective_set == NULL) 8572 err(3, "CPU_ALLOC"); 8573 cpu_effective_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 8574 CPU_ZERO_S(cpu_effective_setsize, cpu_effective_set); 8575 update_effective_set(startup); 8576 8577 /* 8578 * Allocate and initialize cpu_allowed_set 8579 */ 8580 cpu_allowed_set = CPU_ALLOC((topo.max_cpu_num + 1)); 8581 if (cpu_allowed_set == NULL) 8582 err(3, "CPU_ALLOC"); 8583 cpu_allowed_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 8584 CPU_ZERO_S(cpu_allowed_setsize, cpu_allowed_set); 8585 8586 /* 8587 * Validate and update cpu_allowed_set. 8588 * 8589 * Make sure all cpus in cpu_subset are also in cpu_present_set during startup. 8590 * Give a warning when cpus in cpu_subset become unavailable at runtime. 8591 * Give a warning when cpus are not effective because of cgroup setting. 8592 * 8593 * cpu_allowed_set is the intersection of cpu_present_set/cpu_effective_set/cpu_subset. 8594 */ 8595 for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) { 8596 if (cpu_subset && !CPU_ISSET_S(i, cpu_subset_size, cpu_subset)) 8597 continue; 8598 8599 if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set)) { 8600 if (cpu_subset) { 8601 /* cpus in cpu_subset must be in cpu_present_set during startup */ 8602 if (startup) 8603 err(1, "cpu%d not present", i); 8604 else 8605 fprintf(stderr, "cpu%d not present\n", i); 8606 } 8607 continue; 8608 } 8609 8610 if (CPU_COUNT_S(cpu_effective_setsize, cpu_effective_set)) { 8611 if (!CPU_ISSET_S(i, cpu_effective_setsize, cpu_effective_set)) { 8612 fprintf(stderr, "cpu%d not effective\n", i); 8613 continue; 8614 } 8615 } 8616 8617 CPU_SET_S(i, cpu_allowed_setsize, cpu_allowed_set); 8618 } 8619 8620 if (!CPU_COUNT_S(cpu_allowed_setsize, cpu_allowed_set)) 8621 err(-ENODEV, "No valid cpus found"); 8622 sched_setaffinity(0, cpu_allowed_setsize, cpu_allowed_set); 8623 8624 /* 8625 * Allocate and initialize cpu_affinity_set 8626 */ 8627 cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1)); 8628 if (cpu_affinity_set == NULL) 8629 err(3, "CPU_ALLOC"); 8630 cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 8631 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); 8632 8633 for_all_proc_cpus(init_thread_id); 8634 8635 for_all_proc_cpus(set_cpu_hybrid_type); 8636 8637 /* 8638 * For online cpus 8639 * find max_core_id, max_package_id 8640 */ 8641 for (i = 0; i <= topo.max_cpu_num; ++i) { 8642 int siblings; 8643 8644 if (cpu_is_not_present(i)) { 8645 if (debug > 1) 8646 fprintf(outf, "cpu%d NOT PRESENT\n", i); 8647 continue; 8648 } 8649 8650 cpus[i].logical_cpu_id = i; 8651 8652 /* get package information */ 8653 cpus[i].physical_package_id = get_physical_package_id(i); 8654 if (cpus[i].physical_package_id > max_package_id) 8655 max_package_id = cpus[i].physical_package_id; 8656 8657 /* get die information */ 8658 cpus[i].die_id = get_die_id(i); 8659 if (cpus[i].die_id > topo.max_die_id) 8660 topo.max_die_id = cpus[i].die_id; 8661 8662 /* get numa node information */ 8663 cpus[i].physical_node_id = get_physical_node_id(&cpus[i]); 8664 if (cpus[i].physical_node_id > topo.max_node_num) 8665 topo.max_node_num = cpus[i].physical_node_id; 8666 8667 /* get core information */ 8668 cpus[i].physical_core_id = get_core_id(i); 8669 if (cpus[i].physical_core_id > max_core_id) 8670 max_core_id = cpus[i].physical_core_id; 8671 8672 /* get thread information */ 8673 siblings = get_thread_siblings(&cpus[i]); 8674 if (siblings > max_siblings) 8675 max_siblings = siblings; 8676 if (cpus[i].thread_id == 0) 8677 topo.num_cores++; 8678 } 8679 topo.max_core_id = max_core_id; 8680 topo.max_package_id = max_package_id; 8681 8682 topo.cores_per_node = max_core_id + 1; 8683 if (debug > 1) 8684 fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", max_core_id, topo.cores_per_node); 8685 if (!summary_only) 8686 BIC_PRESENT(BIC_Core); 8687 8688 topo.num_die = topo.max_die_id + 1; 8689 if (debug > 1) 8690 fprintf(outf, "max_die_id %d, sizing for %d die\n", topo.max_die_id, topo.num_die); 8691 if (!summary_only && topo.num_die > 1) 8692 BIC_PRESENT(BIC_Die); 8693 8694 topo.num_packages = max_package_id + 1; 8695 if (debug > 1) 8696 fprintf(outf, "max_package_id %d, sizing for %d packages\n", max_package_id, topo.num_packages); 8697 if (!summary_only && topo.num_packages > 1) 8698 BIC_PRESENT(BIC_Package); 8699 8700 set_node_data(); 8701 if (debug > 1) 8702 fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg); 8703 if (!summary_only && topo.nodes_per_pkg > 1) 8704 BIC_PRESENT(BIC_Node); 8705 8706 topo.threads_per_core = max_siblings; 8707 if (debug > 1) 8708 fprintf(outf, "max_siblings %d\n", max_siblings); 8709 8710 if (debug < 1) 8711 return; 8712 8713 for (i = 0; i <= topo.max_cpu_num; ++i) { 8714 if (cpu_is_not_present(i)) 8715 continue; 8716 fprintf(outf, 8717 "cpu %d pkg %d die %d node %d lnode %d core %d thread %d\n", 8718 i, cpus[i].physical_package_id, cpus[i].die_id, 8719 cpus[i].physical_node_id, cpus[i].logical_node_id, cpus[i].physical_core_id, cpus[i].thread_id); 8720 } 8721 8722 } 8723 8724 void allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p) 8725 { 8726 int i; 8727 int num_cores = topo.cores_per_node * topo.nodes_per_pkg * topo.num_packages; 8728 int num_threads = topo.threads_per_core * num_cores; 8729 8730 *t = calloc(num_threads, sizeof(struct thread_data)); 8731 if (*t == NULL) 8732 goto error; 8733 8734 for (i = 0; i < num_threads; i++) 8735 (*t)[i].cpu_id = -1; 8736 8737 *c = calloc(num_cores, sizeof(struct core_data)); 8738 if (*c == NULL) 8739 goto error; 8740 8741 for (i = 0; i < num_cores; i++) { 8742 (*c)[i].core_id = -1; 8743 (*c)[i].base_cpu = -1; 8744 } 8745 8746 *p = calloc(topo.num_packages, sizeof(struct pkg_data)); 8747 if (*p == NULL) 8748 goto error; 8749 8750 for (i = 0; i < topo.num_packages; i++) { 8751 (*p)[i].package_id = i; 8752 (*p)[i].base_cpu = -1; 8753 } 8754 8755 return; 8756 error: 8757 err(1, "calloc counters"); 8758 } 8759 8760 /* 8761 * init_counter() 8762 * 8763 * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE 8764 */ 8765 void init_counter(struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base, int cpu_id) 8766 { 8767 int pkg_id = cpus[cpu_id].physical_package_id; 8768 int node_id = cpus[cpu_id].logical_node_id; 8769 int core_id = cpus[cpu_id].physical_core_id; 8770 int thread_id = cpus[cpu_id].thread_id; 8771 struct thread_data *t; 8772 struct core_data *c; 8773 struct pkg_data *p; 8774 8775 /* Workaround for systems where physical_node_id==-1 8776 * and logical_node_id==(-1 - topo.num_cpus) 8777 */ 8778 if (node_id < 0) 8779 node_id = 0; 8780 8781 t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id); 8782 c = GET_CORE(core_base, core_id, node_id, pkg_id); 8783 p = GET_PKG(pkg_base, pkg_id); 8784 8785 t->cpu_id = cpu_id; 8786 if (!cpu_is_not_allowed(cpu_id)) { 8787 if (c->base_cpu < 0) 8788 c->base_cpu = t->cpu_id; 8789 if (p->base_cpu < 0) 8790 p->base_cpu = t->cpu_id; 8791 } 8792 8793 c->core_id = core_id; 8794 p->package_id = pkg_id; 8795 } 8796 8797 int initialize_counters(int cpu_id) 8798 { 8799 init_counter(EVEN_COUNTERS, cpu_id); 8800 init_counter(ODD_COUNTERS, cpu_id); 8801 return 0; 8802 } 8803 8804 void allocate_output_buffer() 8805 { 8806 output_buffer = calloc(1, (1 + topo.num_cpus) * 2048); 8807 outp = output_buffer; 8808 if (outp == NULL) 8809 err(-1, "calloc output buffer"); 8810 } 8811 8812 void allocate_fd_percpu(void) 8813 { 8814 fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int)); 8815 if (fd_percpu == NULL) 8816 err(-1, "calloc fd_percpu"); 8817 } 8818 8819 void allocate_irq_buffers(void) 8820 { 8821 irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int)); 8822 if (irq_column_2_cpu == NULL) 8823 err(-1, "calloc %d", topo.num_cpus); 8824 8825 irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int)); 8826 if (irqs_per_cpu == NULL) 8827 err(-1, "calloc %d IRQ", topo.max_cpu_num + 1); 8828 8829 nmi_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int)); 8830 if (nmi_per_cpu == NULL) 8831 err(-1, "calloc %d NMI", topo.max_cpu_num + 1); 8832 } 8833 8834 int update_topo(struct thread_data *t, struct core_data *c, struct pkg_data *p) 8835 { 8836 topo.allowed_cpus++; 8837 if ((int)t->cpu_id == c->base_cpu) 8838 topo.allowed_cores++; 8839 if ((int)t->cpu_id == p->base_cpu) 8840 topo.allowed_packages++; 8841 8842 return 0; 8843 } 8844 8845 void topology_update(void) 8846 { 8847 topo.allowed_cpus = 0; 8848 topo.allowed_cores = 0; 8849 topo.allowed_packages = 0; 8850 for_all_cpus(update_topo, ODD_COUNTERS); 8851 } 8852 8853 void setup_all_buffers(bool startup) 8854 { 8855 topology_probe(startup); 8856 allocate_irq_buffers(); 8857 allocate_fd_percpu(); 8858 allocate_counters(&thread_even, &core_even, &package_even); 8859 allocate_counters(&thread_odd, &core_odd, &package_odd); 8860 allocate_output_buffer(); 8861 for_all_proc_cpus(initialize_counters); 8862 topology_update(); 8863 } 8864 8865 void set_base_cpu(void) 8866 { 8867 int i; 8868 8869 for (i = 0; i < topo.max_cpu_num + 1; ++i) { 8870 if (cpu_is_not_allowed(i)) 8871 continue; 8872 base_cpu = i; 8873 if (debug > 1) 8874 fprintf(outf, "base_cpu = %d\n", base_cpu); 8875 return; 8876 } 8877 err(-ENODEV, "No valid cpus found"); 8878 } 8879 8880 bool has_added_counters(void) 8881 { 8882 /* 8883 * It only makes sense to call this after the command line is parsed, 8884 * otherwise sys structure is not populated. 8885 */ 8886 8887 return sys.added_core_counters | sys.added_thread_counters | sys.added_package_counters; 8888 } 8889 8890 void check_msr_access(void) 8891 { 8892 check_dev_msr(); 8893 check_msr_permission(); 8894 8895 if (no_msr) 8896 bic_disable_msr_access(); 8897 } 8898 8899 void check_perf_access(void) 8900 { 8901 if (no_perf || !BIC_IS_ENABLED(BIC_IPC) || !has_instr_count_access()) 8902 bic_enabled &= ~BIC_IPC; 8903 } 8904 8905 bool perf_has_hybrid_devices(void) 8906 { 8907 /* 8908 * 0: unknown 8909 * 1: has separate perf device for p and e core 8910 * -1: doesn't have separate perf device for p and e core 8911 */ 8912 static int cached; 8913 8914 if (cached > 0) 8915 return true; 8916 8917 if (cached < 0) 8918 return false; 8919 8920 if (access("/sys/bus/event_source/devices/cpu_core", F_OK)) { 8921 cached = -1; 8922 return false; 8923 } 8924 8925 if (access("/sys/bus/event_source/devices/cpu_atom", F_OK)) { 8926 cached = -1; 8927 return false; 8928 } 8929 8930 cached = 1; 8931 return true; 8932 } 8933 8934 int added_perf_counters_init_(struct perf_counter_info *pinfo) 8935 { 8936 size_t num_domains = 0; 8937 unsigned int next_domain; 8938 bool *domain_visited; 8939 unsigned int perf_type, perf_config; 8940 double perf_scale; 8941 int fd_perf; 8942 8943 if (!pinfo) 8944 return 0; 8945 8946 const size_t max_num_domains = MAX(topo.max_cpu_num + 1, MAX(topo.max_core_id + 1, topo.max_package_id + 1)); 8947 8948 domain_visited = calloc(max_num_domains, sizeof(*domain_visited)); 8949 8950 while (pinfo) { 8951 switch (pinfo->scope) { 8952 case SCOPE_CPU: 8953 num_domains = topo.max_cpu_num + 1; 8954 break; 8955 8956 case SCOPE_CORE: 8957 num_domains = topo.max_core_id + 1; 8958 break; 8959 8960 case SCOPE_PACKAGE: 8961 num_domains = topo.max_package_id + 1; 8962 break; 8963 } 8964 8965 /* Allocate buffer for file descriptor for each domain. */ 8966 pinfo->fd_perf_per_domain = calloc(num_domains, sizeof(*pinfo->fd_perf_per_domain)); 8967 if (!pinfo->fd_perf_per_domain) 8968 errx(1, "%s: alloc %s", __func__, "fd_perf_per_domain"); 8969 8970 for (size_t i = 0; i < num_domains; ++i) 8971 pinfo->fd_perf_per_domain[i] = -1; 8972 8973 pinfo->num_domains = num_domains; 8974 pinfo->scale = 1.0; 8975 8976 memset(domain_visited, 0, max_num_domains * sizeof(*domain_visited)); 8977 8978 for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) { 8979 8980 next_domain = cpu_to_domain(pinfo, cpu); 8981 8982 assert(next_domain < num_domains); 8983 8984 if (cpu_is_not_allowed(cpu)) 8985 continue; 8986 8987 if (domain_visited[next_domain]) 8988 continue; 8989 8990 /* 8991 * Intel hybrid platforms expose different perf devices for P and E cores. 8992 * Instead of one, "/sys/bus/event_source/devices/cpu" device, there are 8993 * "/sys/bus/event_source/devices/{cpu_core,cpu_atom}". 8994 * 8995 * This makes it more complicated to the user, because most of the counters 8996 * are available on both and have to be handled manually, otherwise. 8997 * 8998 * Code below, allow user to use the old "cpu" name, which is translated accordingly. 8999 */ 9000 const char *perf_device = pinfo->device; 9001 9002 if (strcmp(perf_device, "cpu") == 0 && perf_has_hybrid_devices()) { 9003 switch (cpus[cpu].type) { 9004 case INTEL_PCORE_TYPE: 9005 perf_device = "cpu_core"; 9006 break; 9007 9008 case INTEL_ECORE_TYPE: 9009 perf_device = "cpu_atom"; 9010 break; 9011 9012 default: /* Don't change, we will probably fail and report a problem soon. */ 9013 break; 9014 } 9015 } 9016 9017 perf_type = read_perf_type(perf_device); 9018 if (perf_type == (unsigned int)-1) { 9019 warnx("%s: perf/%s/%s: failed to read %s", 9020 __func__, perf_device, pinfo->event, "type"); 9021 continue; 9022 } 9023 9024 perf_config = read_perf_config(perf_device, pinfo->event); 9025 if (perf_config == (unsigned int)-1) { 9026 warnx("%s: perf/%s/%s: failed to read %s", 9027 __func__, perf_device, pinfo->event, "config"); 9028 continue; 9029 } 9030 9031 /* Scale is not required, some counters just don't have it. */ 9032 perf_scale = read_perf_scale(perf_device, pinfo->event); 9033 if (perf_scale == 0.0) 9034 perf_scale = 1.0; 9035 9036 fd_perf = open_perf_counter(cpu, perf_type, perf_config, -1, 0); 9037 if (fd_perf == -1) { 9038 warnx("%s: perf/%s/%s: failed to open counter on cpu%d", 9039 __func__, perf_device, pinfo->event, cpu); 9040 continue; 9041 } 9042 9043 domain_visited[next_domain] = 1; 9044 pinfo->fd_perf_per_domain[next_domain] = fd_perf; 9045 pinfo->scale = perf_scale; 9046 9047 if (debug) 9048 fprintf(stderr, "Add perf/%s/%s cpu%d: %d\n", 9049 perf_device, pinfo->event, cpu, pinfo->fd_perf_per_domain[next_domain]); 9050 } 9051 9052 pinfo = pinfo->next; 9053 } 9054 9055 free(domain_visited); 9056 9057 return 0; 9058 } 9059 9060 void added_perf_counters_init(void) 9061 { 9062 if (added_perf_counters_init_(sys.perf_tp)) 9063 errx(1, "%s: %s", __func__, "thread"); 9064 9065 if (added_perf_counters_init_(sys.perf_cp)) 9066 errx(1, "%s: %s", __func__, "core"); 9067 9068 if (added_perf_counters_init_(sys.perf_pp)) 9069 errx(1, "%s: %s", __func__, "package"); 9070 } 9071 9072 int parse_telem_info_file(int fd_dir, const char *info_filename, const char *format, unsigned long *output) 9073 { 9074 int fd_telem_info; 9075 FILE *file_telem_info; 9076 unsigned long value; 9077 9078 fd_telem_info = openat(fd_dir, info_filename, O_RDONLY); 9079 if (fd_telem_info == -1) 9080 return -1; 9081 9082 file_telem_info = fdopen(fd_telem_info, "r"); 9083 if (file_telem_info == NULL) { 9084 close(fd_telem_info); 9085 return -1; 9086 } 9087 9088 if (fscanf(file_telem_info, format, &value) != 1) { 9089 fclose(file_telem_info); 9090 return -1; 9091 } 9092 9093 fclose(file_telem_info); 9094 9095 *output = value; 9096 9097 return 0; 9098 } 9099 9100 struct pmt_mmio *pmt_mmio_open(unsigned int target_guid) 9101 { 9102 struct pmt_diriter_t pmt_iter; 9103 const struct dirent *entry; 9104 struct stat st; 9105 int fd_telem_dir, fd_pmt; 9106 unsigned long guid, size, offset; 9107 size_t mmap_size; 9108 void *mmio; 9109 struct pmt_mmio *head = NULL, *last = NULL; 9110 struct pmt_mmio *new_pmt = NULL; 9111 9112 if (stat(SYSFS_TELEM_PATH, &st) == -1) 9113 return NULL; 9114 9115 pmt_diriter_init(&pmt_iter); 9116 entry = pmt_diriter_begin(&pmt_iter, SYSFS_TELEM_PATH); 9117 if (!entry) { 9118 pmt_diriter_remove(&pmt_iter); 9119 return NULL; 9120 } 9121 9122 for ( ; entry != NULL; entry = pmt_diriter_next(&pmt_iter)) { 9123 if (fstatat(dirfd(pmt_iter.dir), entry->d_name, &st, 0) == -1) 9124 break; 9125 9126 if (!S_ISDIR(st.st_mode)) 9127 continue; 9128 9129 fd_telem_dir = openat(dirfd(pmt_iter.dir), entry->d_name, O_RDONLY); 9130 if (fd_telem_dir == -1) 9131 break; 9132 9133 if (parse_telem_info_file(fd_telem_dir, "guid", "%lx", &guid)) { 9134 close(fd_telem_dir); 9135 break; 9136 } 9137 9138 if (parse_telem_info_file(fd_telem_dir, "size", "%lu", &size)) { 9139 close(fd_telem_dir); 9140 break; 9141 } 9142 9143 if (guid != target_guid) { 9144 close(fd_telem_dir); 9145 continue; 9146 } 9147 9148 if (parse_telem_info_file(fd_telem_dir, "offset", "%lu", &offset)) { 9149 close(fd_telem_dir); 9150 break; 9151 } 9152 9153 assert(offset == 0); 9154 9155 fd_pmt = openat(fd_telem_dir, "telem", O_RDONLY); 9156 if (fd_pmt == -1) 9157 goto loop_cleanup_and_break; 9158 9159 mmap_size = ROUND_UP_TO_PAGE_SIZE(size); 9160 mmio = mmap(0, mmap_size, PROT_READ, MAP_SHARED, fd_pmt, 0); 9161 if (mmio != MAP_FAILED) { 9162 if (debug) 9163 fprintf(stderr, "%s: 0x%lx mmaped at: %p\n", __func__, guid, mmio); 9164 9165 new_pmt = calloc(1, sizeof(*new_pmt)); 9166 9167 if (!new_pmt) { 9168 fprintf(stderr, "%s: Failed to allocate pmt_mmio\n", __func__); 9169 exit(1); 9170 } 9171 9172 /* 9173 * Create linked list of mmaped regions, 9174 * but preserve the ordering from sysfs. 9175 * Ordering is important for the user to 9176 * use the seq=%u parameter when adding a counter. 9177 */ 9178 new_pmt->guid = guid; 9179 new_pmt->mmio_base = mmio; 9180 new_pmt->pmt_offset = offset; 9181 new_pmt->size = size; 9182 new_pmt->next = pmt_mmios; 9183 9184 if (last) 9185 last->next = new_pmt; 9186 else 9187 head = new_pmt; 9188 9189 last = new_pmt; 9190 } 9191 9192 loop_cleanup_and_break: 9193 close(fd_pmt); 9194 close(fd_telem_dir); 9195 } 9196 9197 pmt_diriter_remove(&pmt_iter); 9198 9199 /* 9200 * If we found something, stick just 9201 * created linked list to the front. 9202 */ 9203 if (head) 9204 pmt_mmios = head; 9205 9206 return head; 9207 } 9208 9209 struct pmt_mmio *pmt_mmio_find(unsigned int guid) 9210 { 9211 struct pmt_mmio *pmmio = pmt_mmios; 9212 9213 while (pmmio) { 9214 if (pmmio->guid == guid) 9215 return pmmio; 9216 9217 pmmio = pmmio->next; 9218 } 9219 9220 return NULL; 9221 } 9222 9223 void *pmt_get_counter_pointer(struct pmt_mmio *pmmio, unsigned long counter_offset) 9224 { 9225 char *ret; 9226 9227 /* Get base of mmaped PMT file. */ 9228 ret = (char *)pmmio->mmio_base; 9229 9230 /* 9231 * Apply PMT MMIO offset to obtain beginning of the mmaped telemetry data. 9232 * It's not guaranteed that the mmaped memory begins with the telemetry data 9233 * - we might have to apply the offset first. 9234 */ 9235 ret += pmmio->pmt_offset; 9236 9237 /* Apply the counter offset to get the address to the mmaped counter. */ 9238 ret += counter_offset; 9239 9240 return ret; 9241 } 9242 9243 struct pmt_mmio *pmt_add_guid(unsigned int guid, unsigned int seq) 9244 { 9245 struct pmt_mmio *ret; 9246 9247 ret = pmt_mmio_find(guid); 9248 if (!ret) 9249 ret = pmt_mmio_open(guid); 9250 9251 while (ret && seq) { 9252 ret = ret->next; 9253 --seq; 9254 } 9255 9256 return ret; 9257 } 9258 9259 enum pmt_open_mode { 9260 PMT_OPEN_TRY, /* Open failure is not an error. */ 9261 PMT_OPEN_REQUIRED, /* Open failure is a fatal error. */ 9262 }; 9263 9264 struct pmt_counter *pmt_find_counter(struct pmt_counter *pcounter, const char *name) 9265 { 9266 while (pcounter) { 9267 if (strcmp(pcounter->name, name) == 0) 9268 break; 9269 9270 pcounter = pcounter->next; 9271 } 9272 9273 return pcounter; 9274 } 9275 9276 struct pmt_counter **pmt_get_scope_root(enum counter_scope scope) 9277 { 9278 switch (scope) { 9279 case SCOPE_CPU: 9280 return &sys.pmt_tp; 9281 case SCOPE_CORE: 9282 return &sys.pmt_cp; 9283 case SCOPE_PACKAGE: 9284 return &sys.pmt_pp; 9285 } 9286 9287 __builtin_unreachable(); 9288 } 9289 9290 void pmt_counter_add_domain(struct pmt_counter *pcounter, unsigned long *pmmio, unsigned int domain_id) 9291 { 9292 /* Make sure the new domain fits. */ 9293 if (domain_id >= pcounter->num_domains) 9294 pmt_counter_resize(pcounter, domain_id + 1); 9295 9296 assert(pcounter->domains); 9297 assert(domain_id < pcounter->num_domains); 9298 9299 pcounter->domains[domain_id].pcounter = pmmio; 9300 } 9301 9302 int pmt_add_counter(unsigned int guid, unsigned int seq, const char *name, enum pmt_datatype type, 9303 unsigned int lsb, unsigned int msb, unsigned int offset, enum counter_scope scope, 9304 enum counter_format format, unsigned int domain_id, enum pmt_open_mode mode) 9305 { 9306 struct pmt_mmio *mmio; 9307 struct pmt_counter *pcounter; 9308 struct pmt_counter **const pmt_root = pmt_get_scope_root(scope); 9309 bool new_counter = false; 9310 int conflict = 0; 9311 9312 if (lsb > msb) { 9313 fprintf(stderr, "%s: %s: `%s` must be satisfied\n", __func__, "lsb <= msb", name); 9314 exit(1); 9315 } 9316 9317 if (msb >= 64) { 9318 fprintf(stderr, "%s: %s: `%s` must be satisfied\n", __func__, "msb < 64", name); 9319 exit(1); 9320 } 9321 9322 mmio = pmt_add_guid(guid, seq); 9323 if (!mmio) { 9324 if (mode != PMT_OPEN_TRY) { 9325 fprintf(stderr, "%s: failed to map PMT MMIO for guid %x, seq %u\n", __func__, guid, seq); 9326 exit(1); 9327 } 9328 9329 return 1; 9330 } 9331 9332 if (offset >= mmio->size) { 9333 if (mode != PMT_OPEN_TRY) { 9334 fprintf(stderr, "%s: offset %u outside of PMT MMIO size %u\n", __func__, offset, mmio->size); 9335 exit(1); 9336 } 9337 9338 return 1; 9339 } 9340 9341 pcounter = pmt_find_counter(*pmt_root, name); 9342 if (!pcounter) { 9343 pcounter = calloc(1, sizeof(*pcounter)); 9344 new_counter = true; 9345 } 9346 9347 if (new_counter) { 9348 strncpy(pcounter->name, name, ARRAY_SIZE(pcounter->name) - 1); 9349 pcounter->type = type; 9350 pcounter->scope = scope; 9351 pcounter->lsb = lsb; 9352 pcounter->msb = msb; 9353 pcounter->format = format; 9354 } else { 9355 conflict += pcounter->type != type; 9356 conflict += pcounter->scope != scope; 9357 conflict += pcounter->lsb != lsb; 9358 conflict += pcounter->msb != msb; 9359 conflict += pcounter->format != format; 9360 } 9361 9362 if (conflict) { 9363 fprintf(stderr, "%s: conflicting parameters for the PMT counter with the same name %s\n", 9364 __func__, name); 9365 exit(1); 9366 } 9367 9368 pmt_counter_add_domain(pcounter, pmt_get_counter_pointer(mmio, offset), domain_id); 9369 9370 if (new_counter) { 9371 pcounter->next = *pmt_root; 9372 *pmt_root = pcounter; 9373 } 9374 9375 return 0; 9376 } 9377 9378 void pmt_init(void) 9379 { 9380 int cpu_num; 9381 unsigned long seq, offset, mod_num; 9382 9383 if (BIC_IS_ENABLED(BIC_Diec6)) { 9384 pmt_add_counter(PMT_MTL_DC6_GUID, PMT_MTL_DC6_SEQ, "Die%c6", PMT_TYPE_XTAL_TIME, 9385 PMT_COUNTER_MTL_DC6_LSB, PMT_COUNTER_MTL_DC6_MSB, PMT_COUNTER_MTL_DC6_OFFSET, 9386 SCOPE_PACKAGE, FORMAT_DELTA, 0, PMT_OPEN_TRY); 9387 } 9388 9389 if (BIC_IS_ENABLED(BIC_CPU_c1e)) { 9390 seq = 0; 9391 offset = PMT_COUNTER_CWF_MC1E_OFFSET_BASE; 9392 mod_num = 0; /* Relative module number for current PMT file. */ 9393 9394 /* Open the counter for each CPU. */ 9395 for (cpu_num = 0; cpu_num < topo.max_cpu_num;) { 9396 9397 if (cpu_is_not_allowed(cpu_num)) 9398 goto next_loop_iter; 9399 9400 /* 9401 * Set the scope to CPU, even though CWF report the counter per module. 9402 * CPUs inside the same module will read from the same location, instead of reporting zeros. 9403 * 9404 * CWF with newer firmware might require a PMT_TYPE_XTAL_TIME intead of PMT_TYPE_TCORE_CLOCK. 9405 */ 9406 pmt_add_counter(PMT_CWF_MC1E_GUID, seq, "CPU%c1e", PMT_TYPE_TCORE_CLOCK, 9407 PMT_COUNTER_CWF_MC1E_LSB, PMT_COUNTER_CWF_MC1E_MSB, offset, SCOPE_CPU, 9408 FORMAT_DELTA, cpu_num, PMT_OPEN_TRY); 9409 9410 /* 9411 * Rather complex logic for each time we go to the next loop iteration, 9412 * so keep it as a label. 9413 */ 9414 next_loop_iter: 9415 /* 9416 * Advance the cpu number and check if we should also advance offset to 9417 * the next counter inside the PMT file. 9418 * 9419 * On Clearwater Forest platform, the counter is reported per module, 9420 * so open the same counter for all of the CPUs inside the module. 9421 * That way, reported table show the correct value for all of the CPUs inside the module, 9422 * instead of zeros. 9423 */ 9424 ++cpu_num; 9425 if (cpu_num % PMT_COUNTER_CWF_CPUS_PER_MODULE == 0) { 9426 offset += PMT_COUNTER_CWF_MC1E_OFFSET_INCREMENT; 9427 ++mod_num; 9428 } 9429 9430 /* 9431 * There are PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE in each PMT file. 9432 * 9433 * If that number is reached, seq must be incremented to advance to the next file in a sequence. 9434 * Offset inside that file and a module counter has to be reset. 9435 */ 9436 if (mod_num == PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE) { 9437 ++seq; 9438 offset = PMT_COUNTER_CWF_MC1E_OFFSET_BASE; 9439 mod_num = 0; 9440 } 9441 } 9442 } 9443 } 9444 9445 void turbostat_init() 9446 { 9447 setup_all_buffers(true); 9448 set_base_cpu(); 9449 check_msr_access(); 9450 check_perf_access(); 9451 process_cpuid(); 9452 counter_info_init(); 9453 probe_pm_features(); 9454 msr_perf_init(); 9455 linux_perf_init(); 9456 rapl_perf_init(); 9457 cstate_perf_init(); 9458 added_perf_counters_init(); 9459 pmt_init(); 9460 9461 for_all_cpus(get_cpu_type, ODD_COUNTERS); 9462 for_all_cpus(get_cpu_type, EVEN_COUNTERS); 9463 9464 if (BIC_IS_ENABLED(BIC_IPC) && has_aperf_access && get_instr_count_fd(base_cpu) != -1) 9465 BIC_PRESENT(BIC_IPC); 9466 9467 /* 9468 * If TSC tweak is needed, but couldn't get it, 9469 * disable more BICs, since it can't be reported accurately. 9470 */ 9471 if (platform->enable_tsc_tweak && !has_base_hz) { 9472 bic_enabled &= ~BIC_Busy; 9473 bic_enabled &= ~BIC_Bzy_MHz; 9474 } 9475 } 9476 9477 void affinitize_child(void) 9478 { 9479 /* Prefer cpu_possible_set, if available */ 9480 if (sched_setaffinity(0, cpu_possible_setsize, cpu_possible_set)) { 9481 warn("sched_setaffinity cpu_possible_set"); 9482 9483 /* Otherwise, allow child to run on same cpu set as turbostat */ 9484 if (sched_setaffinity(0, cpu_allowed_setsize, cpu_allowed_set)) 9485 warn("sched_setaffinity cpu_allowed_set"); 9486 } 9487 } 9488 9489 int fork_it(char **argv) 9490 { 9491 pid_t child_pid; 9492 int status; 9493 9494 snapshot_proc_sysfs_files(); 9495 status = for_all_cpus(get_counters, EVEN_COUNTERS); 9496 first_counter_read = 0; 9497 if (status) 9498 exit(status); 9499 gettimeofday(&tv_even, (struct timezone *)NULL); 9500 9501 child_pid = fork(); 9502 if (!child_pid) { 9503 /* child */ 9504 affinitize_child(); 9505 execvp(argv[0], argv); 9506 err(errno, "exec %s", argv[0]); 9507 } else { 9508 9509 /* parent */ 9510 if (child_pid == -1) 9511 err(1, "fork"); 9512 9513 signal(SIGINT, SIG_IGN); 9514 signal(SIGQUIT, SIG_IGN); 9515 if (waitpid(child_pid, &status, 0) == -1) 9516 err(status, "waitpid"); 9517 9518 if (WIFEXITED(status)) 9519 status = WEXITSTATUS(status); 9520 } 9521 /* 9522 * n.b. fork_it() does not check for errors from for_all_cpus() 9523 * because re-starting is problematic when forking 9524 */ 9525 snapshot_proc_sysfs_files(); 9526 for_all_cpus(get_counters, ODD_COUNTERS); 9527 gettimeofday(&tv_odd, (struct timezone *)NULL); 9528 timersub(&tv_odd, &tv_even, &tv_delta); 9529 if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) 9530 fprintf(outf, "%s: Counter reset detected\n", progname); 9531 9532 compute_average(EVEN_COUNTERS); 9533 format_all_counters(EVEN_COUNTERS); 9534 9535 fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec / 1000000.0); 9536 9537 flush_output_stderr(); 9538 9539 return status; 9540 } 9541 9542 int get_and_dump_counters(void) 9543 { 9544 int status; 9545 9546 snapshot_proc_sysfs_files(); 9547 status = for_all_cpus(get_counters, ODD_COUNTERS); 9548 if (status) 9549 return status; 9550 9551 status = for_all_cpus(dump_counters, ODD_COUNTERS); 9552 if (status) 9553 return status; 9554 9555 flush_output_stdout(); 9556 9557 return status; 9558 } 9559 9560 void print_version() 9561 { 9562 fprintf(outf, "turbostat version 2025.02.02 - Len Brown <lenb@kernel.org>\n"); 9563 } 9564 9565 #define COMMAND_LINE_SIZE 2048 9566 9567 void print_bootcmd(void) 9568 { 9569 char bootcmd[COMMAND_LINE_SIZE]; 9570 FILE *fp; 9571 int ret; 9572 9573 memset(bootcmd, 0, COMMAND_LINE_SIZE); 9574 fp = fopen("/proc/cmdline", "r"); 9575 if (!fp) 9576 return; 9577 9578 ret = fread(bootcmd, sizeof(char), COMMAND_LINE_SIZE - 1, fp); 9579 if (ret) { 9580 bootcmd[ret] = '\0'; 9581 /* the last character is already '\n' */ 9582 fprintf(outf, "Kernel command line: %s", bootcmd); 9583 } 9584 9585 fclose(fp); 9586 } 9587 9588 struct msr_counter *find_msrp_by_name(struct msr_counter *head, char *name) 9589 { 9590 struct msr_counter *mp; 9591 9592 for (mp = head; mp; mp = mp->next) { 9593 if (debug) 9594 fprintf(stderr, "%s: %s %s\n", __func__, name, mp->name); 9595 if (!strncmp(name, mp->name, strlen(mp->name))) 9596 return mp; 9597 } 9598 return NULL; 9599 } 9600 9601 int add_counter(unsigned int msr_num, char *path, char *name, 9602 unsigned int width, enum counter_scope scope, 9603 enum counter_type type, enum counter_format format, int flags, int id) 9604 { 9605 struct msr_counter *msrp; 9606 9607 if (no_msr && msr_num) 9608 errx(1, "Requested MSR counter 0x%x, but in --no-msr mode", msr_num); 9609 9610 if (debug) 9611 fprintf(stderr, "%s(msr%d, %s, %s, width%d, scope%d, type%d, format%d, flags%x, id%d)\n", 9612 __func__, msr_num, path, name, width, scope, type, format, flags, id); 9613 9614 switch (scope) { 9615 9616 case SCOPE_CPU: 9617 msrp = find_msrp_by_name(sys.tp, name); 9618 if (msrp) { 9619 if (debug) 9620 fprintf(stderr, "%s: %s FOUND\n", __func__, name); 9621 break; 9622 } 9623 if (sys.added_thread_counters++ >= MAX_ADDED_THREAD_COUNTERS) { 9624 warnx("ignoring thread counter %s", name); 9625 return -1; 9626 } 9627 break; 9628 case SCOPE_CORE: 9629 msrp = find_msrp_by_name(sys.cp, name); 9630 if (msrp) { 9631 if (debug) 9632 fprintf(stderr, "%s: %s FOUND\n", __func__, name); 9633 break; 9634 } 9635 if (sys.added_core_counters++ >= MAX_ADDED_CORE_COUNTERS) { 9636 warnx("ignoring core counter %s", name); 9637 return -1; 9638 } 9639 break; 9640 case SCOPE_PACKAGE: 9641 msrp = find_msrp_by_name(sys.pp, name); 9642 if (msrp) { 9643 if (debug) 9644 fprintf(stderr, "%s: %s FOUND\n", __func__, name); 9645 break; 9646 } 9647 if (sys.added_package_counters++ >= MAX_ADDED_PACKAGE_COUNTERS) { 9648 warnx("ignoring package counter %s", name); 9649 return -1; 9650 } 9651 break; 9652 default: 9653 warnx("ignoring counter %s with unknown scope", name); 9654 return -1; 9655 } 9656 9657 if (msrp == NULL) { 9658 msrp = calloc(1, sizeof(struct msr_counter)); 9659 if (msrp == NULL) 9660 err(-1, "calloc msr_counter"); 9661 9662 msrp->msr_num = msr_num; 9663 strncpy(msrp->name, name, NAME_BYTES - 1); 9664 msrp->width = width; 9665 msrp->type = type; 9666 msrp->format = format; 9667 msrp->flags = flags; 9668 9669 switch (scope) { 9670 case SCOPE_CPU: 9671 msrp->next = sys.tp; 9672 sys.tp = msrp; 9673 break; 9674 case SCOPE_CORE: 9675 msrp->next = sys.cp; 9676 sys.cp = msrp; 9677 break; 9678 case SCOPE_PACKAGE: 9679 msrp->next = sys.pp; 9680 sys.pp = msrp; 9681 break; 9682 } 9683 } 9684 9685 if (path) { 9686 struct sysfs_path *sp; 9687 9688 sp = calloc(1, sizeof(struct sysfs_path)); 9689 if (sp == NULL) { 9690 perror("calloc"); 9691 exit(1); 9692 } 9693 strncpy(sp->path, path, PATH_BYTES - 1); 9694 sp->id = id; 9695 sp->next = msrp->sp; 9696 msrp->sp = sp; 9697 } 9698 9699 return 0; 9700 } 9701 9702 /* 9703 * Initialize the fields used for identifying and opening the counter. 9704 * 9705 * Defer the initialization of any runtime buffers for actually reading 9706 * the counters for when we initialize all perf counters, so we can later 9707 * easily call re_initialize(). 9708 */ 9709 struct perf_counter_info *make_perf_counter_info(const char *perf_device, 9710 const char *perf_event, 9711 const char *name, 9712 unsigned int width, 9713 enum counter_scope scope, 9714 enum counter_type type, enum counter_format format) 9715 { 9716 struct perf_counter_info *pinfo; 9717 9718 pinfo = calloc(1, sizeof(*pinfo)); 9719 if (!pinfo) 9720 errx(1, "%s: Failed to allocate %s/%s\n", __func__, perf_device, perf_event); 9721 9722 strncpy(pinfo->device, perf_device, ARRAY_SIZE(pinfo->device) - 1); 9723 strncpy(pinfo->event, perf_event, ARRAY_SIZE(pinfo->event) - 1); 9724 9725 strncpy(pinfo->name, name, ARRAY_SIZE(pinfo->name) - 1); 9726 pinfo->width = width; 9727 pinfo->scope = scope; 9728 pinfo->type = type; 9729 pinfo->format = format; 9730 9731 return pinfo; 9732 } 9733 9734 int add_perf_counter(const char *perf_device, const char *perf_event, const char *name_buffer, unsigned int width, 9735 enum counter_scope scope, enum counter_type type, enum counter_format format) 9736 { 9737 struct perf_counter_info *pinfo; 9738 9739 switch (scope) { 9740 case SCOPE_CPU: 9741 if (sys.added_thread_perf_counters >= MAX_ADDED_THREAD_COUNTERS) { 9742 warnx("ignoring thread counter perf/%s/%s", perf_device, perf_event); 9743 return -1; 9744 } 9745 break; 9746 9747 case SCOPE_CORE: 9748 if (sys.added_core_perf_counters >= MAX_ADDED_CORE_COUNTERS) { 9749 warnx("ignoring core counter perf/%s/%s", perf_device, perf_event); 9750 return -1; 9751 } 9752 break; 9753 9754 case SCOPE_PACKAGE: 9755 if (sys.added_package_perf_counters >= MAX_ADDED_PACKAGE_COUNTERS) { 9756 warnx("ignoring package counter perf/%s/%s", perf_device, perf_event); 9757 return -1; 9758 } 9759 break; 9760 } 9761 9762 pinfo = make_perf_counter_info(perf_device, perf_event, name_buffer, width, scope, type, format); 9763 9764 if (!pinfo) 9765 return -1; 9766 9767 switch (scope) { 9768 case SCOPE_CPU: 9769 pinfo->next = sys.perf_tp; 9770 sys.perf_tp = pinfo; 9771 ++sys.added_thread_perf_counters; 9772 break; 9773 9774 case SCOPE_CORE: 9775 pinfo->next = sys.perf_cp; 9776 sys.perf_cp = pinfo; 9777 ++sys.added_core_perf_counters; 9778 break; 9779 9780 case SCOPE_PACKAGE: 9781 pinfo->next = sys.perf_pp; 9782 sys.perf_pp = pinfo; 9783 ++sys.added_package_perf_counters; 9784 break; 9785 } 9786 9787 // FIXME: we might not have debug here yet 9788 if (debug) 9789 fprintf(stderr, "%s: %s/%s, name: %s, scope%d\n", 9790 __func__, pinfo->device, pinfo->event, pinfo->name, pinfo->scope); 9791 9792 return 0; 9793 } 9794 9795 void parse_add_command_msr(char *add_command) 9796 { 9797 int msr_num = 0; 9798 char *path = NULL; 9799 char perf_device[PERF_DEV_NAME_BYTES] = ""; 9800 char perf_event[PERF_EVT_NAME_BYTES] = ""; 9801 char name_buffer[PERF_NAME_BYTES] = ""; 9802 int width = 64; 9803 int fail = 0; 9804 enum counter_scope scope = SCOPE_CPU; 9805 enum counter_type type = COUNTER_CYCLES; 9806 enum counter_format format = FORMAT_DELTA; 9807 9808 while (add_command) { 9809 9810 if (sscanf(add_command, "msr0x%x", &msr_num) == 1) 9811 goto next; 9812 9813 if (sscanf(add_command, "msr%d", &msr_num) == 1) 9814 goto next; 9815 9816 BUILD_BUG_ON(ARRAY_SIZE(perf_device) <= 31); 9817 BUILD_BUG_ON(ARRAY_SIZE(perf_event) <= 31); 9818 if (sscanf(add_command, "perf/%31[^/]/%31[^,]", &perf_device[0], &perf_event[0]) == 2) 9819 goto next; 9820 9821 if (*add_command == '/') { 9822 path = add_command; 9823 goto next; 9824 } 9825 9826 if (sscanf(add_command, "u%d", &width) == 1) { 9827 if ((width == 32) || (width == 64)) 9828 goto next; 9829 width = 64; 9830 } 9831 if (!strncmp(add_command, "cpu", strlen("cpu"))) { 9832 scope = SCOPE_CPU; 9833 goto next; 9834 } 9835 if (!strncmp(add_command, "core", strlen("core"))) { 9836 scope = SCOPE_CORE; 9837 goto next; 9838 } 9839 if (!strncmp(add_command, "package", strlen("package"))) { 9840 scope = SCOPE_PACKAGE; 9841 goto next; 9842 } 9843 if (!strncmp(add_command, "cycles", strlen("cycles"))) { 9844 type = COUNTER_CYCLES; 9845 goto next; 9846 } 9847 if (!strncmp(add_command, "seconds", strlen("seconds"))) { 9848 type = COUNTER_SECONDS; 9849 goto next; 9850 } 9851 if (!strncmp(add_command, "usec", strlen("usec"))) { 9852 type = COUNTER_USEC; 9853 goto next; 9854 } 9855 if (!strncmp(add_command, "raw", strlen("raw"))) { 9856 format = FORMAT_RAW; 9857 goto next; 9858 } 9859 if (!strncmp(add_command, "delta", strlen("delta"))) { 9860 format = FORMAT_DELTA; 9861 goto next; 9862 } 9863 if (!strncmp(add_command, "percent", strlen("percent"))) { 9864 format = FORMAT_PERCENT; 9865 goto next; 9866 } 9867 9868 BUILD_BUG_ON(ARRAY_SIZE(name_buffer) <= 18); 9869 if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) { 9870 char *eos; 9871 9872 eos = strchr(name_buffer, ','); 9873 if (eos) 9874 *eos = '\0'; 9875 goto next; 9876 } 9877 9878 next: 9879 add_command = strchr(add_command, ','); 9880 if (add_command) { 9881 *add_command = '\0'; 9882 add_command++; 9883 } 9884 9885 } 9886 if ((msr_num == 0) && (path == NULL) && (perf_device[0] == '\0' || perf_event[0] == '\0')) { 9887 fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter | perf/device/event) required\n"); 9888 fail++; 9889 } 9890 9891 /* Test for non-empty perf_device and perf_event */ 9892 const bool is_perf_counter = perf_device[0] && perf_event[0]; 9893 9894 /* generate default column header */ 9895 if (*name_buffer == '\0') { 9896 if (is_perf_counter) { 9897 snprintf(name_buffer, ARRAY_SIZE(name_buffer), "perf/%s", perf_event); 9898 } else { 9899 if (width == 32) 9900 sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); 9901 else 9902 sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); 9903 } 9904 } 9905 9906 if (is_perf_counter) { 9907 if (add_perf_counter(perf_device, perf_event, name_buffer, width, scope, type, format)) 9908 fail++; 9909 } else { 9910 if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0, 0)) 9911 fail++; 9912 } 9913 9914 if (fail) { 9915 help(); 9916 exit(1); 9917 } 9918 } 9919 9920 bool starts_with(const char *str, const char *prefix) 9921 { 9922 return strncmp(prefix, str, strlen(prefix)) == 0; 9923 } 9924 9925 int pmt_parse_from_path(const char *target_path, unsigned int *out_guid, unsigned int *out_seq) 9926 { 9927 struct pmt_diriter_t pmt_iter; 9928 const struct dirent *dirname; 9929 struct stat stat, target_stat; 9930 int fd_telem_dir = -1; 9931 int fd_target_dir; 9932 unsigned int seq = 0; 9933 unsigned long guid, target_guid; 9934 int ret = -1; 9935 9936 fd_target_dir = open(target_path, O_RDONLY | O_DIRECTORY); 9937 if (fd_target_dir == -1) { 9938 return -1; 9939 } 9940 9941 if (fstat(fd_target_dir, &target_stat) == -1) { 9942 fprintf(stderr, "%s: Failed to stat the target: %s", __func__, strerror(errno)); 9943 exit(1); 9944 } 9945 9946 if (parse_telem_info_file(fd_target_dir, "guid", "%lx", &target_guid)) { 9947 fprintf(stderr, "%s: Failed to parse the target guid file: %s", __func__, strerror(errno)); 9948 exit(1); 9949 } 9950 9951 close(fd_target_dir); 9952 9953 pmt_diriter_init(&pmt_iter); 9954 9955 for (dirname = pmt_diriter_begin(&pmt_iter, SYSFS_TELEM_PATH); dirname != NULL; 9956 dirname = pmt_diriter_next(&pmt_iter)) { 9957 9958 fd_telem_dir = openat(dirfd(pmt_iter.dir), dirname->d_name, O_RDONLY | O_DIRECTORY); 9959 if (fd_telem_dir == -1) 9960 continue; 9961 9962 if (parse_telem_info_file(fd_telem_dir, "guid", "%lx", &guid)) { 9963 fprintf(stderr, "%s: Failed to parse the guid file: %s", __func__, strerror(errno)); 9964 continue; 9965 } 9966 9967 if (fstat(fd_telem_dir, &stat) == -1) { 9968 fprintf(stderr, "%s: Failed to stat %s directory: %s", __func__, 9969 dirname->d_name, strerror(errno)); 9970 continue; 9971 } 9972 9973 /* 9974 * If reached the same directory as target, exit the loop. 9975 * Seq has the correct value now. 9976 */ 9977 if (stat.st_dev == target_stat.st_dev && stat.st_ino == target_stat.st_ino) { 9978 ret = 0; 9979 break; 9980 } 9981 9982 /* 9983 * If reached directory with the same guid, 9984 * but it's not the target directory yet, 9985 * increment seq and continue the search. 9986 */ 9987 if (guid == target_guid) 9988 ++seq; 9989 9990 close(fd_telem_dir); 9991 fd_telem_dir = -1; 9992 } 9993 9994 pmt_diriter_remove(&pmt_iter); 9995 9996 if (fd_telem_dir != -1) 9997 close(fd_telem_dir); 9998 9999 if (!ret) { 10000 *out_guid = target_guid; 10001 *out_seq = seq; 10002 } 10003 10004 return ret; 10005 } 10006 10007 void parse_add_command_pmt(char *add_command) 10008 { 10009 char *name = NULL; 10010 char *type_name = NULL; 10011 char *format_name = NULL; 10012 char *direct_path = NULL; 10013 static const char direct_path_prefix[] = "path="; 10014 unsigned int offset; 10015 unsigned int lsb; 10016 unsigned int msb; 10017 unsigned int guid; 10018 unsigned int seq = 0; /* By default, pick first file in a sequence with a given GUID. */ 10019 unsigned int domain_id; 10020 enum counter_scope scope = 0; 10021 enum pmt_datatype type = PMT_TYPE_RAW; 10022 enum counter_format format = FORMAT_RAW; 10023 bool has_offset = false; 10024 bool has_lsb = false; 10025 bool has_msb = false; 10026 bool has_format = true; /* Format has a default value. */ 10027 bool has_guid = false; 10028 bool has_scope = false; 10029 bool has_type = true; /* Type has a default value. */ 10030 10031 /* Consume the "pmt," prefix. */ 10032 add_command = strchr(add_command, ','); 10033 if (!add_command) { 10034 help(); 10035 exit(1); 10036 } 10037 ++add_command; 10038 10039 while (add_command) { 10040 if (starts_with(add_command, "name=")) { 10041 name = add_command + strlen("name="); 10042 goto next; 10043 } 10044 10045 if (starts_with(add_command, "type=")) { 10046 type_name = add_command + strlen("type="); 10047 goto next; 10048 } 10049 10050 if (starts_with(add_command, "domain=")) { 10051 const size_t prefix_len = strlen("domain="); 10052 10053 if (sscanf(add_command + prefix_len, "cpu%u", &domain_id) == 1) { 10054 scope = SCOPE_CPU; 10055 has_scope = true; 10056 } else if (sscanf(add_command + prefix_len, "core%u", &domain_id) == 1) { 10057 scope = SCOPE_CORE; 10058 has_scope = true; 10059 } else if (sscanf(add_command + prefix_len, "package%u", &domain_id) == 1) { 10060 scope = SCOPE_PACKAGE; 10061 has_scope = true; 10062 } 10063 10064 if (!has_scope) { 10065 printf("%s: invalid value for scope. Expected cpu%%u, core%%u or package%%u.\n", 10066 __func__); 10067 exit(1); 10068 } 10069 10070 goto next; 10071 } 10072 10073 if (starts_with(add_command, "format=")) { 10074 format_name = add_command + strlen("format="); 10075 goto next; 10076 } 10077 10078 if (sscanf(add_command, "offset=%u", &offset) == 1) { 10079 has_offset = true; 10080 goto next; 10081 } 10082 10083 if (sscanf(add_command, "lsb=%u", &lsb) == 1) { 10084 has_lsb = true; 10085 goto next; 10086 } 10087 10088 if (sscanf(add_command, "msb=%u", &msb) == 1) { 10089 has_msb = true; 10090 goto next; 10091 } 10092 10093 if (sscanf(add_command, "guid=%x", &guid) == 1) { 10094 has_guid = true; 10095 goto next; 10096 } 10097 10098 if (sscanf(add_command, "seq=%x", &seq) == 1) 10099 goto next; 10100 10101 if (strncmp(add_command, direct_path_prefix, strlen(direct_path_prefix)) == 0) { 10102 direct_path = add_command + strlen(direct_path_prefix); 10103 goto next; 10104 } 10105 next: 10106 add_command = strchr(add_command, ','); 10107 if (add_command) { 10108 *add_command = '\0'; 10109 add_command++; 10110 } 10111 } 10112 10113 if (!name) { 10114 printf("%s: missing %s\n", __func__, "name"); 10115 exit(1); 10116 } 10117 10118 if (strlen(name) >= PMT_COUNTER_NAME_SIZE_BYTES) { 10119 printf("%s: name has to be at most %d characters long\n", __func__, PMT_COUNTER_NAME_SIZE_BYTES); 10120 exit(1); 10121 } 10122 10123 if (format_name) { 10124 has_format = false; 10125 10126 if (strcmp("raw", format_name) == 0) { 10127 format = FORMAT_RAW; 10128 has_format = true; 10129 } 10130 10131 if (strcmp("delta", format_name) == 0) { 10132 format = FORMAT_DELTA; 10133 has_format = true; 10134 } 10135 10136 if (!has_format) { 10137 fprintf(stderr, "%s: Invalid format %s. Expected raw or delta\n", __func__, format_name); 10138 exit(1); 10139 } 10140 } 10141 10142 if (type_name) { 10143 has_type = false; 10144 10145 if (strcmp("raw", type_name) == 0) { 10146 type = PMT_TYPE_RAW; 10147 has_type = true; 10148 } 10149 10150 if (strcmp("txtal_time", type_name) == 0) { 10151 type = PMT_TYPE_XTAL_TIME; 10152 has_type = true; 10153 } 10154 10155 if (strcmp("tcore_clock", type_name) == 0) { 10156 type = PMT_TYPE_TCORE_CLOCK; 10157 has_type = true; 10158 } 10159 10160 if (!has_type) { 10161 printf("%s: invalid %s: %s\n", __func__, "type", type_name); 10162 exit(1); 10163 } 10164 } 10165 10166 if (!has_offset) { 10167 printf("%s : missing %s\n", __func__, "offset"); 10168 exit(1); 10169 } 10170 10171 if (!has_lsb) { 10172 printf("%s: missing %s\n", __func__, "lsb"); 10173 exit(1); 10174 } 10175 10176 if (!has_msb) { 10177 printf("%s: missing %s\n", __func__, "msb"); 10178 exit(1); 10179 } 10180 10181 if (direct_path && has_guid) { 10182 printf("%s: path and guid+seq parameters are mutually exclusive\n" 10183 "notice: passed guid=0x%x and path=%s\n", __func__, guid, direct_path); 10184 exit(1); 10185 } 10186 10187 if (direct_path) { 10188 if (pmt_parse_from_path(direct_path, &guid, &seq)) { 10189 printf("%s: failed to parse PMT file from %s\n", __func__, direct_path); 10190 exit(1); 10191 } 10192 10193 /* GUID was just infered from the direct path. */ 10194 has_guid = true; 10195 } 10196 10197 if (!has_guid) { 10198 printf("%s: missing %s\n", __func__, "guid or path"); 10199 exit(1); 10200 } 10201 10202 if (!has_scope) { 10203 printf("%s: missing %s\n", __func__, "scope"); 10204 exit(1); 10205 } 10206 10207 if (lsb > msb) { 10208 printf("%s: lsb > msb doesn't make sense\n", __func__); 10209 exit(1); 10210 } 10211 10212 pmt_add_counter(guid, seq, name, type, lsb, msb, offset, scope, format, domain_id, PMT_OPEN_REQUIRED); 10213 } 10214 10215 void parse_add_command(char *add_command) 10216 { 10217 if (strncmp(add_command, "pmt", strlen("pmt")) == 0) 10218 return parse_add_command_pmt(add_command); 10219 return parse_add_command_msr(add_command); 10220 } 10221 10222 int is_deferred_add(char *name) 10223 { 10224 int i; 10225 10226 for (i = 0; i < deferred_add_index; ++i) 10227 if (!strcmp(name, deferred_add_names[i])) 10228 return 1; 10229 return 0; 10230 } 10231 10232 int is_deferred_skip(char *name) 10233 { 10234 int i; 10235 10236 for (i = 0; i < deferred_skip_index; ++i) 10237 if (!strcmp(name, deferred_skip_names[i])) 10238 return 1; 10239 return 0; 10240 } 10241 10242 void probe_sysfs(void) 10243 { 10244 char path[64]; 10245 char name_buf[16]; 10246 FILE *input; 10247 int state; 10248 char *sp; 10249 10250 for (state = 10; state >= 0; --state) { 10251 10252 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); 10253 input = fopen(path, "r"); 10254 if (input == NULL) 10255 continue; 10256 if (!fgets(name_buf, sizeof(name_buf), input)) 10257 err(1, "%s: failed to read file", path); 10258 10259 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ 10260 sp = strchr(name_buf, '-'); 10261 if (!sp) 10262 sp = strchrnul(name_buf, '\n'); 10263 *sp = '%'; 10264 *(sp + 1) = '\0'; 10265 10266 remove_underbar(name_buf); 10267 10268 fclose(input); 10269 10270 sprintf(path, "cpuidle/state%d/time", state); 10271 10272 if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf)) 10273 continue; 10274 10275 if (is_deferred_skip(name_buf)) 10276 continue; 10277 10278 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU, 0); 10279 } 10280 10281 for (state = 10; state >= 0; --state) { 10282 10283 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); 10284 input = fopen(path, "r"); 10285 if (input == NULL) 10286 continue; 10287 if (!fgets(name_buf, sizeof(name_buf), input)) 10288 err(1, "%s: failed to read file", path); 10289 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ 10290 sp = strchr(name_buf, '-'); 10291 if (!sp) 10292 sp = strchrnul(name_buf, '\n'); 10293 *sp = '\0'; 10294 fclose(input); 10295 10296 remove_underbar(name_buf); 10297 10298 sprintf(path, "cpuidle/state%d/usage", state); 10299 10300 if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf)) 10301 continue; 10302 10303 if (is_deferred_skip(name_buf)) 10304 continue; 10305 10306 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0); 10307 } 10308 10309 } 10310 10311 /* 10312 * parse cpuset with following syntax 10313 * 1,2,4..6,8-10 and set bits in cpu_subset 10314 */ 10315 void parse_cpu_command(char *optarg) 10316 { 10317 if (!strcmp(optarg, "core")) { 10318 if (cpu_subset) 10319 goto error; 10320 show_core_only++; 10321 return; 10322 } 10323 if (!strcmp(optarg, "package")) { 10324 if (cpu_subset) 10325 goto error; 10326 show_pkg_only++; 10327 return; 10328 } 10329 if (show_core_only || show_pkg_only) 10330 goto error; 10331 10332 cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS); 10333 if (cpu_subset == NULL) 10334 err(3, "CPU_ALLOC"); 10335 cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS); 10336 10337 CPU_ZERO_S(cpu_subset_size, cpu_subset); 10338 10339 if (parse_cpu_str(optarg, cpu_subset, cpu_subset_size)) 10340 goto error; 10341 10342 return; 10343 10344 error: 10345 fprintf(stderr, "\"--cpu %s\" malformed\n", optarg); 10346 help(); 10347 exit(-1); 10348 } 10349 10350 void cmdline(int argc, char **argv) 10351 { 10352 int opt; 10353 int option_index = 0; 10354 static struct option long_options[] = { 10355 { "add", required_argument, 0, 'a' }, 10356 { "cpu", required_argument, 0, 'c' }, 10357 { "Dump", no_argument, 0, 'D' }, 10358 { "debug", no_argument, 0, 'd' }, /* internal, not documented */ 10359 { "enable", required_argument, 0, 'e' }, 10360 { "force", no_argument, 0, 'f' }, 10361 { "interval", required_argument, 0, 'i' }, 10362 { "IPC", no_argument, 0, 'I' }, 10363 { "num_iterations", required_argument, 0, 'n' }, 10364 { "header_iterations", required_argument, 0, 'N' }, 10365 { "help", no_argument, 0, 'h' }, 10366 { "hide", required_argument, 0, 'H' }, // meh, -h taken by --help 10367 { "Joules", no_argument, 0, 'J' }, 10368 { "list", no_argument, 0, 'l' }, 10369 { "out", required_argument, 0, 'o' }, 10370 { "quiet", no_argument, 0, 'q' }, 10371 { "no-msr", no_argument, 0, 'M' }, 10372 { "no-perf", no_argument, 0, 'P' }, 10373 { "show", required_argument, 0, 's' }, 10374 { "Summary", no_argument, 0, 'S' }, 10375 { "TCC", required_argument, 0, 'T' }, 10376 { "version", no_argument, 0, 'v' }, 10377 { 0, 0, 0, 0 } 10378 }; 10379 10380 progname = argv[0]; 10381 10382 /* 10383 * Parse some options early, because they may make other options invalid, 10384 * like adding the MSR counter with --add and at the same time using --no-msr. 10385 */ 10386 while ((opt = getopt_long_only(argc, argv, "+MPn:", long_options, &option_index)) != -1) { 10387 switch (opt) { 10388 case 'M': 10389 no_msr = 1; 10390 break; 10391 case 'P': 10392 no_perf = 1; 10393 break; 10394 default: 10395 break; 10396 } 10397 } 10398 optind = 0; 10399 10400 while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qMST:v", long_options, &option_index)) != -1) { 10401 switch (opt) { 10402 case 'a': 10403 parse_add_command(optarg); 10404 break; 10405 case 'c': 10406 parse_cpu_command(optarg); 10407 break; 10408 case 'D': 10409 dump_only++; 10410 /* 10411 * Force the no_perf early to prevent using it as a source. 10412 * User asks for raw values, but perf returns them relative 10413 * to the opening of the file descriptor. 10414 */ 10415 no_perf = 1; 10416 break; 10417 case 'e': 10418 /* --enable specified counter */ 10419 bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST); 10420 break; 10421 case 'f': 10422 force_load++; 10423 break; 10424 case 'd': 10425 debug++; 10426 ENABLE_BIC(BIC_DISABLED_BY_DEFAULT); 10427 break; 10428 case 'H': 10429 /* 10430 * --hide: do not show those specified 10431 * multiple invocations simply clear more bits in enabled mask 10432 */ 10433 bic_enabled &= ~bic_lookup(optarg, HIDE_LIST); 10434 break; 10435 case 'h': 10436 default: 10437 help(); 10438 exit(1); 10439 case 'i': 10440 { 10441 double interval = strtod(optarg, NULL); 10442 10443 if (interval < 0.001) { 10444 fprintf(outf, "interval %f seconds is too small\n", interval); 10445 exit(2); 10446 } 10447 10448 interval_tv.tv_sec = interval_ts.tv_sec = interval; 10449 interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000; 10450 interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000; 10451 } 10452 break; 10453 case 'J': 10454 rapl_joules++; 10455 break; 10456 case 'l': 10457 ENABLE_BIC(BIC_DISABLED_BY_DEFAULT); 10458 list_header_only++; 10459 quiet++; 10460 break; 10461 case 'o': 10462 outf = fopen_or_die(optarg, "w"); 10463 break; 10464 case 'q': 10465 quiet = 1; 10466 break; 10467 case 'M': 10468 case 'P': 10469 /* Parsed earlier */ 10470 break; 10471 case 'n': 10472 num_iterations = strtod(optarg, NULL); 10473 10474 if (num_iterations <= 0) { 10475 fprintf(outf, "iterations %d should be positive number\n", num_iterations); 10476 exit(2); 10477 } 10478 break; 10479 case 'N': 10480 header_iterations = strtod(optarg, NULL); 10481 10482 if (header_iterations <= 0) { 10483 fprintf(outf, "iterations %d should be positive number\n", header_iterations); 10484 exit(2); 10485 } 10486 break; 10487 case 's': 10488 /* 10489 * --show: show only those specified 10490 * The 1st invocation will clear and replace the enabled mask 10491 * subsequent invocations can add to it. 10492 */ 10493 if (shown == 0) 10494 bic_enabled = bic_lookup(optarg, SHOW_LIST); 10495 else 10496 bic_enabled |= bic_lookup(optarg, SHOW_LIST); 10497 shown = 1; 10498 break; 10499 case 'S': 10500 summary_only++; 10501 break; 10502 case 'T': 10503 tj_max_override = atoi(optarg); 10504 break; 10505 case 'v': 10506 print_version(); 10507 exit(0); 10508 break; 10509 } 10510 } 10511 } 10512 10513 void set_rlimit(void) 10514 { 10515 struct rlimit limit; 10516 10517 if (getrlimit(RLIMIT_NOFILE, &limit) < 0) 10518 err(1, "Failed to get rlimit"); 10519 10520 if (limit.rlim_max < MAX_NOFILE) 10521 limit.rlim_max = MAX_NOFILE; 10522 if (limit.rlim_cur < MAX_NOFILE) 10523 limit.rlim_cur = MAX_NOFILE; 10524 10525 if (setrlimit(RLIMIT_NOFILE, &limit) < 0) 10526 err(1, "Failed to set rlimit"); 10527 } 10528 10529 int main(int argc, char **argv) 10530 { 10531 int fd, ret; 10532 10533 fd = open("/sys/fs/cgroup/cgroup.procs", O_WRONLY); 10534 if (fd < 0) 10535 goto skip_cgroup_setting; 10536 10537 ret = write(fd, "0\n", 2); 10538 if (ret == -1) 10539 perror("Can't update cgroup\n"); 10540 10541 close(fd); 10542 10543 skip_cgroup_setting: 10544 outf = stderr; 10545 cmdline(argc, argv); 10546 10547 if (!quiet) { 10548 print_version(); 10549 print_bootcmd(); 10550 } 10551 10552 probe_sysfs(); 10553 10554 if (!getuid()) 10555 set_rlimit(); 10556 10557 turbostat_init(); 10558 10559 if (!no_msr) 10560 msr_sum_record(); 10561 10562 /* dump counters and exit */ 10563 if (dump_only) 10564 return get_and_dump_counters(); 10565 10566 /* list header and exit */ 10567 if (list_header_only) { 10568 print_header(","); 10569 flush_output_stdout(); 10570 return 0; 10571 } 10572 10573 /* 10574 * if any params left, it must be a command to fork 10575 */ 10576 if (argc - optind) 10577 return fork_it(argv + optind); 10578 else 10579 turbostat_loop(); 10580 10581 return 0; 10582 } 10583