1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * turbostat -- show CPU frequency and C-state residency
4 * on modern Intel and AMD processors.
5 *
6 * Copyright (c) 2025 Intel Corporation.
7 * Len Brown <len.brown@intel.com>
8 */
9
10 #define _GNU_SOURCE
11 #include MSRHEADER
12
13 // copied from arch/x86/include/asm/cpu_device_id.h
14 #define VFM_MODEL_BIT 0
15 #define VFM_FAMILY_BIT 8
16 #define VFM_VENDOR_BIT 16
17 #define VFM_RSVD_BIT 24
18
19 #define VFM_MODEL_MASK GENMASK(VFM_FAMILY_BIT - 1, VFM_MODEL_BIT)
20 #define VFM_FAMILY_MASK GENMASK(VFM_VENDOR_BIT - 1, VFM_FAMILY_BIT)
21 #define VFM_VENDOR_MASK GENMASK(VFM_RSVD_BIT - 1, VFM_VENDOR_BIT)
22
23 #define VFM_MODEL(vfm) (((vfm) & VFM_MODEL_MASK) >> VFM_MODEL_BIT)
24 #define VFM_FAMILY(vfm) (((vfm) & VFM_FAMILY_MASK) >> VFM_FAMILY_BIT)
25 #define VFM_VENDOR(vfm) (((vfm) & VFM_VENDOR_MASK) >> VFM_VENDOR_BIT)
26
27 #define VFM_MAKE(_vendor, _family, _model) ( \
28 ((_model) << VFM_MODEL_BIT) | \
29 ((_family) << VFM_FAMILY_BIT) | \
30 ((_vendor) << VFM_VENDOR_BIT) \
31 )
32 // end copied section
33
34 #define CPUID_LEAF_MODEL_ID 0x1A
35 #define CPUID_LEAF_MODEL_ID_CORE_TYPE_SHIFT 24
36
37 #define X86_VENDOR_INTEL 0
38
39 #include INTEL_FAMILY_HEADER
40 #include BUILD_BUG_HEADER
41 #include <stdarg.h>
42 #include <stdio.h>
43 #include <err.h>
44 #include <unistd.h>
45 #include <sys/types.h>
46 #include <sys/wait.h>
47 #include <sys/stat.h>
48 #include <sys/select.h>
49 #include <sys/resource.h>
50 #include <sys/mman.h>
51 #include <fcntl.h>
52 #include <signal.h>
53 #include <sys/time.h>
54 #include <stdlib.h>
55 #include <getopt.h>
56 #include <dirent.h>
57 #include <string.h>
58 #include <ctype.h>
59 #include <sched.h>
60 #include <time.h>
61 #include <cpuid.h>
62 #include <sys/capability.h>
63 #include <errno.h>
64 #include <math.h>
65 #include <linux/perf_event.h>
66 #include <asm/unistd.h>
67 #include <stdbool.h>
68 #include <assert.h>
69 #include <linux/kernel.h>
70
71 #define UNUSED(x) (void)(x)
72
73 /*
74 * This list matches the column headers, except
75 * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
76 * 2. Core and CPU are moved to the end, we can't have strings that contain them
77 * matching on them for --show and --hide.
78 */
79
80 /*
81 * buffer size used by sscanf() for added column names
82 * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
83 */
84 #define NAME_BYTES 20
85 #define PATH_BYTES 128
86 #define PERF_NAME_BYTES 128
87
88 #define MAX_NOFILE 0x8000
89
90 #define COUNTER_KIND_PERF_PREFIX "perf/"
91 #define COUNTER_KIND_PERF_PREFIX_LEN strlen(COUNTER_KIND_PERF_PREFIX)
92 #define PERF_DEV_NAME_BYTES 32
93 #define PERF_EVT_NAME_BYTES 32
94
95 #define INTEL_ECORE_TYPE 0x20
96 #define INTEL_PCORE_TYPE 0x40
97
98 #define ROUND_UP_TO_PAGE_SIZE(n) (((n) + 0x1000UL-1UL) & ~(0x1000UL-1UL))
99
100 enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE };
101 enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC, COUNTER_K2M };
102 enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT, FORMAT_AVERAGE };
103 enum counter_source { COUNTER_SOURCE_NONE, COUNTER_SOURCE_PERF, COUNTER_SOURCE_MSR };
104
105 struct perf_counter_info {
106 struct perf_counter_info *next;
107
108 /* How to open the counter / What counter it is. */
109 char device[PERF_DEV_NAME_BYTES];
110 char event[PERF_EVT_NAME_BYTES];
111
112 /* How to show/format the counter. */
113 char name[PERF_NAME_BYTES];
114 unsigned int width;
115 enum counter_scope scope;
116 enum counter_type type;
117 enum counter_format format;
118 double scale;
119
120 /* For reading the counter. */
121 int *fd_perf_per_domain;
122 size_t num_domains;
123 };
124
125 struct sysfs_path {
126 char path[PATH_BYTES];
127 int id;
128 struct sysfs_path *next;
129 };
130
131 struct msr_counter {
132 unsigned int msr_num;
133 char name[NAME_BYTES];
134 struct sysfs_path *sp;
135 unsigned int width;
136 enum counter_type type;
137 enum counter_format format;
138 struct msr_counter *next;
139 unsigned int flags;
140 #define FLAGS_HIDE (1 << 0)
141 #define FLAGS_SHOW (1 << 1)
142 #define SYSFS_PERCPU (1 << 1)
143 };
144
145 struct msr_counter bic[] = {
146 { 0x0, "usec", NULL, 0, 0, 0, NULL, 0 },
147 { 0x0, "Time_Of_Day_Seconds", NULL, 0, 0, 0, NULL, 0 },
148 { 0x0, "Package", NULL, 0, 0, 0, NULL, 0 },
149 { 0x0, "Node", NULL, 0, 0, 0, NULL, 0 },
150 { 0x0, "Avg_MHz", NULL, 0, 0, 0, NULL, 0 },
151 { 0x0, "Busy%", NULL, 0, 0, 0, NULL, 0 },
152 { 0x0, "Bzy_MHz", NULL, 0, 0, 0, NULL, 0 },
153 { 0x0, "TSC_MHz", NULL, 0, 0, 0, NULL, 0 },
154 { 0x0, "IRQ", NULL, 0, 0, 0, NULL, 0 },
155 { 0x0, "SMI", NULL, 32, 0, FORMAT_DELTA, NULL, 0 },
156 { 0x0, "cpuidle", NULL, 0, 0, 0, NULL, 0 },
157 { 0x0, "CPU%c1", NULL, 0, 0, 0, NULL, 0 },
158 { 0x0, "CPU%c3", NULL, 0, 0, 0, NULL, 0 },
159 { 0x0, "CPU%c6", NULL, 0, 0, 0, NULL, 0 },
160 { 0x0, "CPU%c7", NULL, 0, 0, 0, NULL, 0 },
161 { 0x0, "ThreadC", NULL, 0, 0, 0, NULL, 0 },
162 { 0x0, "CoreTmp", NULL, 0, 0, 0, NULL, 0 },
163 { 0x0, "CoreCnt", NULL, 0, 0, 0, NULL, 0 },
164 { 0x0, "PkgTmp", NULL, 0, 0, 0, NULL, 0 },
165 { 0x0, "GFX%rc6", NULL, 0, 0, 0, NULL, 0 },
166 { 0x0, "GFXMHz", NULL, 0, 0, 0, NULL, 0 },
167 { 0x0, "Pkg%pc2", NULL, 0, 0, 0, NULL, 0 },
168 { 0x0, "Pkg%pc3", NULL, 0, 0, 0, NULL, 0 },
169 { 0x0, "Pkg%pc6", NULL, 0, 0, 0, NULL, 0 },
170 { 0x0, "Pkg%pc7", NULL, 0, 0, 0, NULL, 0 },
171 { 0x0, "Pkg%pc8", NULL, 0, 0, 0, NULL, 0 },
172 { 0x0, "Pkg%pc9", NULL, 0, 0, 0, NULL, 0 },
173 { 0x0, "Pk%pc10", NULL, 0, 0, 0, NULL, 0 },
174 { 0x0, "CPU%LPI", NULL, 0, 0, 0, NULL, 0 },
175 { 0x0, "SYS%LPI", NULL, 0, 0, 0, NULL, 0 },
176 { 0x0, "PkgWatt", NULL, 0, 0, 0, NULL, 0 },
177 { 0x0, "CorWatt", NULL, 0, 0, 0, NULL, 0 },
178 { 0x0, "GFXWatt", NULL, 0, 0, 0, NULL, 0 },
179 { 0x0, "PkgCnt", NULL, 0, 0, 0, NULL, 0 },
180 { 0x0, "RAMWatt", NULL, 0, 0, 0, NULL, 0 },
181 { 0x0, "PKG_%", NULL, 0, 0, 0, NULL, 0 },
182 { 0x0, "RAM_%", NULL, 0, 0, 0, NULL, 0 },
183 { 0x0, "Pkg_J", NULL, 0, 0, 0, NULL, 0 },
184 { 0x0, "Cor_J", NULL, 0, 0, 0, NULL, 0 },
185 { 0x0, "GFX_J", NULL, 0, 0, 0, NULL, 0 },
186 { 0x0, "RAM_J", NULL, 0, 0, 0, NULL, 0 },
187 { 0x0, "Mod%c6", NULL, 0, 0, 0, NULL, 0 },
188 { 0x0, "Totl%C0", NULL, 0, 0, 0, NULL, 0 },
189 { 0x0, "Any%C0", NULL, 0, 0, 0, NULL, 0 },
190 { 0x0, "GFX%C0", NULL, 0, 0, 0, NULL, 0 },
191 { 0x0, "CPUGFX%", NULL, 0, 0, 0, NULL, 0 },
192 { 0x0, "Core", NULL, 0, 0, 0, NULL, 0 },
193 { 0x0, "CPU", NULL, 0, 0, 0, NULL, 0 },
194 { 0x0, "APIC", NULL, 0, 0, 0, NULL, 0 },
195 { 0x0, "X2APIC", NULL, 0, 0, 0, NULL, 0 },
196 { 0x0, "Die", NULL, 0, 0, 0, NULL, 0 },
197 { 0x0, "GFXAMHz", NULL, 0, 0, 0, NULL, 0 },
198 { 0x0, "IPC", NULL, 0, 0, 0, NULL, 0 },
199 { 0x0, "CoreThr", NULL, 0, 0, 0, NULL, 0 },
200 { 0x0, "UncMHz", NULL, 0, 0, 0, NULL, 0 },
201 { 0x0, "SAM%mc6", NULL, 0, 0, 0, NULL, 0 },
202 { 0x0, "SAMMHz", NULL, 0, 0, 0, NULL, 0 },
203 { 0x0, "SAMAMHz", NULL, 0, 0, 0, NULL, 0 },
204 { 0x0, "Die%c6", NULL, 0, 0, 0, NULL, 0 },
205 { 0x0, "SysWatt", NULL, 0, 0, 0, NULL, 0 },
206 { 0x0, "Sys_J", NULL, 0, 0, 0, NULL, 0 },
207 { 0x0, "NMI", NULL, 0, 0, 0, NULL, 0 },
208 { 0x0, "CPU%c1e", NULL, 0, 0, 0, NULL, 0 },
209 { 0x0, "pct_idle", NULL, 0, 0, 0, NULL, 0 },
210 };
211
212 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
213 #define BIC_USEC (1ULL << 0)
214 #define BIC_TOD (1ULL << 1)
215 #define BIC_Package (1ULL << 2)
216 #define BIC_Node (1ULL << 3)
217 #define BIC_Avg_MHz (1ULL << 4)
218 #define BIC_Busy (1ULL << 5)
219 #define BIC_Bzy_MHz (1ULL << 6)
220 #define BIC_TSC_MHz (1ULL << 7)
221 #define BIC_IRQ (1ULL << 8)
222 #define BIC_SMI (1ULL << 9)
223 #define BIC_cpuidle (1ULL << 10)
224 #define BIC_CPU_c1 (1ULL << 11)
225 #define BIC_CPU_c3 (1ULL << 12)
226 #define BIC_CPU_c6 (1ULL << 13)
227 #define BIC_CPU_c7 (1ULL << 14)
228 #define BIC_ThreadC (1ULL << 15)
229 #define BIC_CoreTmp (1ULL << 16)
230 #define BIC_CoreCnt (1ULL << 17)
231 #define BIC_PkgTmp (1ULL << 18)
232 #define BIC_GFX_rc6 (1ULL << 19)
233 #define BIC_GFXMHz (1ULL << 20)
234 #define BIC_Pkgpc2 (1ULL << 21)
235 #define BIC_Pkgpc3 (1ULL << 22)
236 #define BIC_Pkgpc6 (1ULL << 23)
237 #define BIC_Pkgpc7 (1ULL << 24)
238 #define BIC_Pkgpc8 (1ULL << 25)
239 #define BIC_Pkgpc9 (1ULL << 26)
240 #define BIC_Pkgpc10 (1ULL << 27)
241 #define BIC_CPU_LPI (1ULL << 28)
242 #define BIC_SYS_LPI (1ULL << 29)
243 #define BIC_PkgWatt (1ULL << 30)
244 #define BIC_CorWatt (1ULL << 31)
245 #define BIC_GFXWatt (1ULL << 32)
246 #define BIC_PkgCnt (1ULL << 33)
247 #define BIC_RAMWatt (1ULL << 34)
248 #define BIC_PKG__ (1ULL << 35)
249 #define BIC_RAM__ (1ULL << 36)
250 #define BIC_Pkg_J (1ULL << 37)
251 #define BIC_Cor_J (1ULL << 38)
252 #define BIC_GFX_J (1ULL << 39)
253 #define BIC_RAM_J (1ULL << 40)
254 #define BIC_Mod_c6 (1ULL << 41)
255 #define BIC_Totl_c0 (1ULL << 42)
256 #define BIC_Any_c0 (1ULL << 43)
257 #define BIC_GFX_c0 (1ULL << 44)
258 #define BIC_CPUGFX (1ULL << 45)
259 #define BIC_Core (1ULL << 46)
260 #define BIC_CPU (1ULL << 47)
261 #define BIC_APIC (1ULL << 48)
262 #define BIC_X2APIC (1ULL << 49)
263 #define BIC_Die (1ULL << 50)
264 #define BIC_GFXACTMHz (1ULL << 51)
265 #define BIC_IPC (1ULL << 52)
266 #define BIC_CORE_THROT_CNT (1ULL << 53)
267 #define BIC_UNCORE_MHZ (1ULL << 54)
268 #define BIC_SAM_mc6 (1ULL << 55)
269 #define BIC_SAMMHz (1ULL << 56)
270 #define BIC_SAMACTMHz (1ULL << 57)
271 #define BIC_Diec6 (1ULL << 58)
272 #define BIC_SysWatt (1ULL << 59)
273 #define BIC_Sys_J (1ULL << 60)
274 #define BIC_NMI (1ULL << 61)
275 #define BIC_CPU_c1e (1ULL << 62)
276 #define BIC_pct_idle (1ULL << 63)
277
278 #define BIC_GROUP_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die)
279 #define BIC_GROUP_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt)
280 #define BIC_GROUP_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ)
281 #define BIC_GROUP_HW_IDLE (BIC_Busy | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6)
282 #define BIC_GROUP_SW_IDLE (BIC_Busy | BIC_cpuidle | BIC_pct_idle )
283 #define BIC_GROUP_IDLE (BIC_GROUP_HW_IDLE | BIC_pct_idle)
284 #define BIC_OTHER (BIC_IRQ | BIC_NMI | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
285
286 #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC | BIC_cpuidle)
287
288 unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
289 unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_cpuidle | BIC_pct_idle | BIC_APIC | BIC_X2APIC;
290
291 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
292 #define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME)
293 #define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
294 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
295 #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
296 #define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT)
297
298 /*
299 * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit:
300 * If you change the values, note they are used both in comparisons
301 * (>= PCL__7) and to index pkg_cstate_limit_strings[].
302 */
303 #define PCLUKN 0 /* Unknown */
304 #define PCLRSV 1 /* Reserved */
305 #define PCL__0 2 /* PC0 */
306 #define PCL__1 3 /* PC1 */
307 #define PCL__2 4 /* PC2 */
308 #define PCL__3 5 /* PC3 */
309 #define PCL__4 6 /* PC4 */
310 #define PCL__6 7 /* PC6 */
311 #define PCL_6N 8 /* PC6 No Retention */
312 #define PCL_6R 9 /* PC6 Retention */
313 #define PCL__7 10 /* PC7 */
314 #define PCL_7S 11 /* PC7 Shrink */
315 #define PCL__8 12 /* PC8 */
316 #define PCL__9 13 /* PC9 */
317 #define PCL_10 14 /* PC10 */
318 #define PCLUNL 15 /* Unlimited */
319
320 struct amperf_group_fd;
321
322 char *proc_stat = "/proc/stat";
323 FILE *outf;
324 int *fd_percpu;
325 int *fd_instr_count_percpu;
326 struct timeval interval_tv = { 5, 0 };
327 struct timespec interval_ts = { 5, 0 };
328
329 unsigned int num_iterations;
330 unsigned int header_iterations;
331 unsigned int debug;
332 unsigned int quiet;
333 unsigned int shown;
334 unsigned int sums_need_wide_columns;
335 unsigned int rapl_joules;
336 unsigned int summary_only;
337 unsigned int list_header_only;
338 unsigned int dump_only;
339 unsigned int force_load;
340 unsigned int has_aperf;
341 unsigned int has_aperf_access;
342 unsigned int has_epb;
343 unsigned int has_turbo;
344 unsigned int is_hybrid;
345 unsigned int units = 1000000; /* MHz etc */
346 unsigned int genuine_intel;
347 unsigned int authentic_amd;
348 unsigned int hygon_genuine;
349 unsigned int max_level, max_extended_level;
350 unsigned int has_invariant_tsc;
351 unsigned int aperf_mperf_multiplier = 1;
352 double bclk;
353 double base_hz;
354 unsigned int has_base_hz;
355 double tsc_tweak = 1.0;
356 unsigned int show_pkg_only;
357 unsigned int show_core_only;
358 char *output_buffer, *outp;
359 unsigned int do_dts;
360 unsigned int do_ptm;
361 unsigned int do_ipc;
362 unsigned long long cpuidle_cur_cpu_lpi_us;
363 unsigned long long cpuidle_cur_sys_lpi_us;
364 unsigned int tj_max;
365 unsigned int tj_max_override;
366 double rapl_power_units, rapl_time_units;
367 double rapl_dram_energy_units, rapl_energy_units, rapl_psys_energy_units;
368 double rapl_joule_counter_range;
369 unsigned int crystal_hz;
370 unsigned long long tsc_hz;
371 int base_cpu;
372 unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
373 /* IA32_HWP_REQUEST, IA32_HWP_STATUS */
374 unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */
375 unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */
376 unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */
377 unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */
378 unsigned int first_counter_read = 1;
379
380 static struct timeval procsysfs_tv_begin;
381
382 int ignore_stdin;
383 bool no_msr;
384 bool no_perf;
385
386 enum gfx_sysfs_idx {
387 GFX_rc6,
388 GFX_MHz,
389 GFX_ACTMHz,
390 SAM_mc6,
391 SAM_MHz,
392 SAM_ACTMHz,
393 GFX_MAX
394 };
395
396 struct gfx_sysfs_info {
397 FILE *fp;
398 unsigned int val;
399 unsigned long long val_ull;
400 };
401
402 static struct gfx_sysfs_info gfx_info[GFX_MAX];
403
404 int get_msr(int cpu, off_t offset, unsigned long long *msr);
405 int add_counter(unsigned int msr_num, char *path, char *name,
406 unsigned int width, enum counter_scope scope,
407 enum counter_type type, enum counter_format format, int flags, int package_num);
408
409 /* Model specific support Start */
410
411 /* List of features that may diverge among different platforms */
412 struct platform_features {
413 bool has_msr_misc_feature_control; /* MSR_MISC_FEATURE_CONTROL */
414 bool has_msr_misc_pwr_mgmt; /* MSR_MISC_PWR_MGMT */
415 bool has_nhm_msrs; /* MSR_PLATFORM_INFO, MSR_IA32_TEMPERATURE_TARGET, MSR_SMI_COUNT, MSR_PKG_CST_CONFIG_CONTROL, MSR_IA32_POWER_CTL, TRL MSRs */
416 bool has_config_tdp; /* MSR_CONFIG_TDP_NOMINAL/LEVEL_1/LEVEL_2/CONTROL, MSR_TURBO_ACTIVATION_RATIO */
417 int bclk_freq; /* CPU base clock */
418 int crystal_freq; /* Crystal clock to use when not available from CPUID.15 */
419 int supported_cstates; /* Core cstates and Package cstates supported */
420 int cst_limit; /* MSR_PKG_CST_CONFIG_CONTROL */
421 bool has_cst_auto_convension; /* AUTOMATIC_CSTATE_CONVERSION bit in MSR_PKG_CST_CONFIG_CONTROL */
422 bool has_irtl_msrs; /* MSR_PKGC3/PKGC6/PKGC7/PKGC8/PKGC9/PKGC10_IRTL */
423 bool has_msr_core_c1_res; /* MSR_CORE_C1_RES */
424 bool has_msr_module_c6_res_ms; /* MSR_MODULE_C6_RES_MS */
425 bool has_msr_c6_demotion_policy_config; /* MSR_CC6_DEMOTION_POLICY_CONFIG/MSR_MC6_DEMOTION_POLICY_CONFIG */
426 bool has_msr_atom_pkg_c6_residency; /* MSR_ATOM_PKG_C6_RESIDENCY */
427 bool has_msr_knl_core_c6_residency; /* MSR_KNL_CORE_C6_RESIDENCY */
428 bool has_ext_cst_msrs; /* MSR_PKG_WEIGHTED_CORE_C0_RES/MSR_PKG_ANY_CORE_C0_RES/MSR_PKG_ANY_GFXE_C0_RES/MSR_PKG_BOTH_CORE_GFXE_C0_RES */
429 bool has_cst_prewake_bit; /* Cstate prewake bit in MSR_IA32_POWER_CTL */
430 int trl_msrs; /* MSR_TURBO_RATIO_LIMIT/LIMIT1/LIMIT2/SECONDARY, Atom TRL MSRs */
431 int plr_msrs; /* MSR_CORE/GFX/RING_PERF_LIMIT_REASONS */
432 int rapl_msrs; /* RAPL PKG/DRAM/CORE/GFX MSRs, AMD RAPL MSRs */
433 bool has_per_core_rapl; /* Indicates cores energy collection is per-core, not per-package. AMD specific for now */
434 bool has_rapl_divisor; /* Divisor for Energy unit raw value from MSR_RAPL_POWER_UNIT */
435 bool has_fixed_rapl_unit; /* Fixed Energy Unit used for DRAM RAPL Domain */
436 bool has_fixed_rapl_psys_unit; /* Fixed Energy Unit used for PSYS RAPL Domain */
437 int rapl_quirk_tdp; /* Hardcoded TDP value when cannot be retrieved from hardware */
438 int tcc_offset_bits; /* TCC Offset bits in MSR_IA32_TEMPERATURE_TARGET */
439 bool enable_tsc_tweak; /* Use CPU Base freq instead of TSC freq for aperf/mperf counter */
440 bool need_perf_multiplier; /* mperf/aperf multiplier */
441 };
442
443 struct platform_data {
444 unsigned int vfm;
445 const struct platform_features *features;
446 };
447
448 /* For BCLK */
449 enum bclk_freq {
450 BCLK_100MHZ = 1,
451 BCLK_133MHZ,
452 BCLK_SLV,
453 };
454
455 #define SLM_BCLK_FREQS 5
456 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 };
457
slm_bclk(void)458 double slm_bclk(void)
459 {
460 unsigned long long msr = 3;
461 unsigned int i;
462 double freq;
463
464 if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
465 fprintf(outf, "SLM BCLK: unknown\n");
466
467 i = msr & 0xf;
468 if (i >= SLM_BCLK_FREQS) {
469 fprintf(outf, "SLM BCLK[%d] invalid\n", i);
470 i = 3;
471 }
472 freq = slm_freq_table[i];
473
474 if (!quiet)
475 fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
476
477 return freq;
478 }
479
480 /* For Package cstate limit */
481 enum package_cstate_limit {
482 CST_LIMIT_NHM = 1,
483 CST_LIMIT_SNB,
484 CST_LIMIT_HSW,
485 CST_LIMIT_SKX,
486 CST_LIMIT_ICX,
487 CST_LIMIT_SLV,
488 CST_LIMIT_AMT,
489 CST_LIMIT_KNL,
490 CST_LIMIT_GMT,
491 };
492
493 /* For Turbo Ratio Limit MSRs */
494 enum turbo_ratio_limit_msrs {
495 TRL_BASE = BIT(0),
496 TRL_LIMIT1 = BIT(1),
497 TRL_LIMIT2 = BIT(2),
498 TRL_ATOM = BIT(3),
499 TRL_KNL = BIT(4),
500 TRL_CORECOUNT = BIT(5),
501 };
502
503 /* For Perf Limit Reason MSRs */
504 enum perf_limit_reason_msrs {
505 PLR_CORE = BIT(0),
506 PLR_GFX = BIT(1),
507 PLR_RING = BIT(2),
508 };
509
510 /* For RAPL MSRs */
511 enum rapl_msrs {
512 RAPL_PKG_POWER_LIMIT = BIT(0), /* 0x610 MSR_PKG_POWER_LIMIT */
513 RAPL_PKG_ENERGY_STATUS = BIT(1), /* 0x611 MSR_PKG_ENERGY_STATUS */
514 RAPL_PKG_PERF_STATUS = BIT(2), /* 0x613 MSR_PKG_PERF_STATUS */
515 RAPL_PKG_POWER_INFO = BIT(3), /* 0x614 MSR_PKG_POWER_INFO */
516 RAPL_DRAM_POWER_LIMIT = BIT(4), /* 0x618 MSR_DRAM_POWER_LIMIT */
517 RAPL_DRAM_ENERGY_STATUS = BIT(5), /* 0x619 MSR_DRAM_ENERGY_STATUS */
518 RAPL_DRAM_PERF_STATUS = BIT(6), /* 0x61b MSR_DRAM_PERF_STATUS */
519 RAPL_DRAM_POWER_INFO = BIT(7), /* 0x61c MSR_DRAM_POWER_INFO */
520 RAPL_CORE_POWER_LIMIT = BIT(8), /* 0x638 MSR_PP0_POWER_LIMIT */
521 RAPL_CORE_ENERGY_STATUS = BIT(9), /* 0x639 MSR_PP0_ENERGY_STATUS */
522 RAPL_CORE_POLICY = BIT(10), /* 0x63a MSR_PP0_POLICY */
523 RAPL_GFX_POWER_LIMIT = BIT(11), /* 0x640 MSR_PP1_POWER_LIMIT */
524 RAPL_GFX_ENERGY_STATUS = BIT(12), /* 0x641 MSR_PP1_ENERGY_STATUS */
525 RAPL_GFX_POLICY = BIT(13), /* 0x642 MSR_PP1_POLICY */
526 RAPL_AMD_PWR_UNIT = BIT(14), /* 0xc0010299 MSR_AMD_RAPL_POWER_UNIT */
527 RAPL_AMD_CORE_ENERGY_STAT = BIT(15), /* 0xc001029a MSR_AMD_CORE_ENERGY_STATUS */
528 RAPL_AMD_PKG_ENERGY_STAT = BIT(16), /* 0xc001029b MSR_AMD_PKG_ENERGY_STATUS */
529 RAPL_PLATFORM_ENERGY_LIMIT = BIT(17), /* 0x64c MSR_PLATFORM_ENERGY_LIMIT */
530 RAPL_PLATFORM_ENERGY_STATUS = BIT(18), /* 0x64d MSR_PLATFORM_ENERGY_STATUS */
531 };
532
533 #define RAPL_PKG (RAPL_PKG_ENERGY_STATUS | RAPL_PKG_POWER_LIMIT)
534 #define RAPL_DRAM (RAPL_DRAM_ENERGY_STATUS | RAPL_DRAM_POWER_LIMIT)
535 #define RAPL_CORE (RAPL_CORE_ENERGY_STATUS | RAPL_CORE_POWER_LIMIT)
536 #define RAPL_GFX (RAPL_GFX_POWER_LIMIT | RAPL_GFX_ENERGY_STATUS)
537 #define RAPL_PSYS (RAPL_PLATFORM_ENERGY_STATUS | RAPL_PLATFORM_ENERGY_LIMIT)
538
539 #define RAPL_PKG_ALL (RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO)
540 #define RAPL_DRAM_ALL (RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_DRAM_POWER_INFO)
541 #define RAPL_CORE_ALL (RAPL_CORE | RAPL_CORE_POLICY)
542 #define RAPL_GFX_ALL (RAPL_GFX | RAPL_GFX_POLICY)
543
544 #define RAPL_AMD_F17H (RAPL_AMD_PWR_UNIT | RAPL_AMD_CORE_ENERGY_STAT | RAPL_AMD_PKG_ENERGY_STAT)
545
546 /* For Cstates */
547 enum cstates {
548 CC1 = BIT(0),
549 CC3 = BIT(1),
550 CC6 = BIT(2),
551 CC7 = BIT(3),
552 PC2 = BIT(4),
553 PC3 = BIT(5),
554 PC6 = BIT(6),
555 PC7 = BIT(7),
556 PC8 = BIT(8),
557 PC9 = BIT(9),
558 PC10 = BIT(10),
559 };
560
561 static const struct platform_features nhm_features = {
562 .has_msr_misc_pwr_mgmt = 1,
563 .has_nhm_msrs = 1,
564 .bclk_freq = BCLK_133MHZ,
565 .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6,
566 .cst_limit = CST_LIMIT_NHM,
567 .trl_msrs = TRL_BASE,
568 };
569
570 static const struct platform_features nhx_features = {
571 .has_msr_misc_pwr_mgmt = 1,
572 .has_nhm_msrs = 1,
573 .bclk_freq = BCLK_133MHZ,
574 .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6,
575 .cst_limit = CST_LIMIT_NHM,
576 };
577
578 static const struct platform_features snb_features = {
579 .has_msr_misc_feature_control = 1,
580 .has_msr_misc_pwr_mgmt = 1,
581 .has_nhm_msrs = 1,
582 .bclk_freq = BCLK_100MHZ,
583 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
584 .cst_limit = CST_LIMIT_SNB,
585 .has_irtl_msrs = 1,
586 .trl_msrs = TRL_BASE,
587 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
588 };
589
590 static const struct platform_features snx_features = {
591 .has_msr_misc_feature_control = 1,
592 .has_msr_misc_pwr_mgmt = 1,
593 .has_nhm_msrs = 1,
594 .bclk_freq = BCLK_100MHZ,
595 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
596 .cst_limit = CST_LIMIT_SNB,
597 .has_irtl_msrs = 1,
598 .trl_msrs = TRL_BASE,
599 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL,
600 };
601
602 static const struct platform_features ivb_features = {
603 .has_msr_misc_feature_control = 1,
604 .has_msr_misc_pwr_mgmt = 1,
605 .has_nhm_msrs = 1,
606 .has_config_tdp = 1,
607 .bclk_freq = BCLK_100MHZ,
608 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
609 .cst_limit = CST_LIMIT_SNB,
610 .has_irtl_msrs = 1,
611 .trl_msrs = TRL_BASE,
612 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
613 };
614
615 static const struct platform_features ivx_features = {
616 .has_msr_misc_feature_control = 1,
617 .has_msr_misc_pwr_mgmt = 1,
618 .has_nhm_msrs = 1,
619 .bclk_freq = BCLK_100MHZ,
620 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
621 .cst_limit = CST_LIMIT_SNB,
622 .has_irtl_msrs = 1,
623 .trl_msrs = TRL_BASE | TRL_LIMIT1,
624 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL,
625 };
626
627 static const struct platform_features hsw_features = {
628 .has_msr_misc_feature_control = 1,
629 .has_msr_misc_pwr_mgmt = 1,
630 .has_nhm_msrs = 1,
631 .has_config_tdp = 1,
632 .bclk_freq = BCLK_100MHZ,
633 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
634 .cst_limit = CST_LIMIT_HSW,
635 .has_irtl_msrs = 1,
636 .trl_msrs = TRL_BASE,
637 .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
638 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
639 };
640
641 static const struct platform_features hsx_features = {
642 .has_msr_misc_feature_control = 1,
643 .has_msr_misc_pwr_mgmt = 1,
644 .has_nhm_msrs = 1,
645 .has_config_tdp = 1,
646 .bclk_freq = BCLK_100MHZ,
647 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
648 .cst_limit = CST_LIMIT_HSW,
649 .has_irtl_msrs = 1,
650 .trl_msrs = TRL_BASE | TRL_LIMIT1 | TRL_LIMIT2,
651 .plr_msrs = PLR_CORE | PLR_RING,
652 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
653 .has_fixed_rapl_unit = 1,
654 };
655
656 static const struct platform_features hswl_features = {
657 .has_msr_misc_feature_control = 1,
658 .has_msr_misc_pwr_mgmt = 1,
659 .has_nhm_msrs = 1,
660 .has_config_tdp = 1,
661 .bclk_freq = BCLK_100MHZ,
662 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
663 .cst_limit = CST_LIMIT_HSW,
664 .has_irtl_msrs = 1,
665 .trl_msrs = TRL_BASE,
666 .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
667 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
668 };
669
670 static const struct platform_features hswg_features = {
671 .has_msr_misc_feature_control = 1,
672 .has_msr_misc_pwr_mgmt = 1,
673 .has_nhm_msrs = 1,
674 .has_config_tdp = 1,
675 .bclk_freq = BCLK_100MHZ,
676 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
677 .cst_limit = CST_LIMIT_HSW,
678 .has_irtl_msrs = 1,
679 .trl_msrs = TRL_BASE,
680 .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
681 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
682 };
683
684 static const struct platform_features bdw_features = {
685 .has_msr_misc_feature_control = 1,
686 .has_msr_misc_pwr_mgmt = 1,
687 .has_nhm_msrs = 1,
688 .has_config_tdp = 1,
689 .bclk_freq = BCLK_100MHZ,
690 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
691 .cst_limit = CST_LIMIT_HSW,
692 .has_irtl_msrs = 1,
693 .trl_msrs = TRL_BASE,
694 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
695 };
696
697 static const struct platform_features bdwg_features = {
698 .has_msr_misc_feature_control = 1,
699 .has_msr_misc_pwr_mgmt = 1,
700 .has_nhm_msrs = 1,
701 .has_config_tdp = 1,
702 .bclk_freq = BCLK_100MHZ,
703 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
704 .cst_limit = CST_LIMIT_HSW,
705 .has_irtl_msrs = 1,
706 .trl_msrs = TRL_BASE,
707 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
708 };
709
710 static const struct platform_features bdx_features = {
711 .has_msr_misc_feature_control = 1,
712 .has_msr_misc_pwr_mgmt = 1,
713 .has_nhm_msrs = 1,
714 .has_config_tdp = 1,
715 .bclk_freq = BCLK_100MHZ,
716 .supported_cstates = CC1 | CC3 | CC6 | PC2 | PC3 | PC6,
717 .cst_limit = CST_LIMIT_HSW,
718 .has_irtl_msrs = 1,
719 .has_cst_auto_convension = 1,
720 .trl_msrs = TRL_BASE,
721 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
722 .has_fixed_rapl_unit = 1,
723 };
724
725 static const struct platform_features skl_features = {
726 .has_msr_misc_feature_control = 1,
727 .has_msr_misc_pwr_mgmt = 1,
728 .has_nhm_msrs = 1,
729 .has_config_tdp = 1,
730 .bclk_freq = BCLK_100MHZ,
731 .crystal_freq = 24000000,
732 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
733 .cst_limit = CST_LIMIT_HSW,
734 .has_irtl_msrs = 1,
735 .has_ext_cst_msrs = 1,
736 .trl_msrs = TRL_BASE,
737 .tcc_offset_bits = 6,
738 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS,
739 .enable_tsc_tweak = 1,
740 };
741
742 static const struct platform_features cnl_features = {
743 .has_msr_misc_feature_control = 1,
744 .has_msr_misc_pwr_mgmt = 1,
745 .has_nhm_msrs = 1,
746 .has_config_tdp = 1,
747 .bclk_freq = BCLK_100MHZ,
748 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
749 .cst_limit = CST_LIMIT_HSW,
750 .has_irtl_msrs = 1,
751 .has_msr_core_c1_res = 1,
752 .has_ext_cst_msrs = 1,
753 .trl_msrs = TRL_BASE,
754 .tcc_offset_bits = 6,
755 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS,
756 .enable_tsc_tweak = 1,
757 };
758
759 /* Copied from cnl_features, with PC7/PC9 removed */
760 static const struct platform_features adl_features = {
761 .has_msr_misc_feature_control = cnl_features.has_msr_misc_feature_control,
762 .has_msr_misc_pwr_mgmt = cnl_features.has_msr_misc_pwr_mgmt,
763 .has_nhm_msrs = cnl_features.has_nhm_msrs,
764 .has_config_tdp = cnl_features.has_config_tdp,
765 .bclk_freq = cnl_features.bclk_freq,
766 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC8 | PC10,
767 .cst_limit = cnl_features.cst_limit,
768 .has_irtl_msrs = cnl_features.has_irtl_msrs,
769 .has_msr_core_c1_res = cnl_features.has_msr_core_c1_res,
770 .has_ext_cst_msrs = cnl_features.has_ext_cst_msrs,
771 .trl_msrs = cnl_features.trl_msrs,
772 .tcc_offset_bits = cnl_features.tcc_offset_bits,
773 .rapl_msrs = cnl_features.rapl_msrs,
774 .enable_tsc_tweak = cnl_features.enable_tsc_tweak,
775 };
776
777 /* Copied from adl_features, with PC3/PC8 removed */
778 static const struct platform_features lnl_features = {
779 .has_msr_misc_feature_control = adl_features.has_msr_misc_feature_control,
780 .has_msr_misc_pwr_mgmt = adl_features.has_msr_misc_pwr_mgmt,
781 .has_nhm_msrs = adl_features.has_nhm_msrs,
782 .has_config_tdp = adl_features.has_config_tdp,
783 .bclk_freq = adl_features.bclk_freq,
784 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC6 | PC10,
785 .cst_limit = adl_features.cst_limit,
786 .has_irtl_msrs = adl_features.has_irtl_msrs,
787 .has_msr_core_c1_res = adl_features.has_msr_core_c1_res,
788 .has_ext_cst_msrs = adl_features.has_ext_cst_msrs,
789 .trl_msrs = adl_features.trl_msrs,
790 .tcc_offset_bits = adl_features.tcc_offset_bits,
791 .rapl_msrs = adl_features.rapl_msrs,
792 .enable_tsc_tweak = adl_features.enable_tsc_tweak,
793 };
794
795 static const struct platform_features skx_features = {
796 .has_msr_misc_feature_control = 1,
797 .has_msr_misc_pwr_mgmt = 1,
798 .has_nhm_msrs = 1,
799 .has_config_tdp = 1,
800 .bclk_freq = BCLK_100MHZ,
801 .supported_cstates = CC1 | CC6 | PC2 | PC6,
802 .cst_limit = CST_LIMIT_SKX,
803 .has_irtl_msrs = 1,
804 .has_cst_auto_convension = 1,
805 .trl_msrs = TRL_BASE | TRL_CORECOUNT,
806 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
807 .has_fixed_rapl_unit = 1,
808 };
809
810 static const struct platform_features icx_features = {
811 .has_msr_misc_feature_control = 1,
812 .has_msr_misc_pwr_mgmt = 1,
813 .has_nhm_msrs = 1,
814 .has_config_tdp = 1,
815 .bclk_freq = BCLK_100MHZ,
816 .supported_cstates = CC1 | CC6 | PC2 | PC6,
817 .cst_limit = CST_LIMIT_ICX,
818 .has_msr_core_c1_res = 1,
819 .has_irtl_msrs = 1,
820 .has_cst_prewake_bit = 1,
821 .trl_msrs = TRL_BASE | TRL_CORECOUNT,
822 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS,
823 .has_fixed_rapl_unit = 1,
824 };
825
826 static const struct platform_features spr_features = {
827 .has_msr_misc_feature_control = 1,
828 .has_msr_misc_pwr_mgmt = 1,
829 .has_nhm_msrs = 1,
830 .has_config_tdp = 1,
831 .bclk_freq = BCLK_100MHZ,
832 .supported_cstates = CC1 | CC6 | PC2 | PC6,
833 .cst_limit = CST_LIMIT_SKX,
834 .has_msr_core_c1_res = 1,
835 .has_irtl_msrs = 1,
836 .has_cst_prewake_bit = 1,
837 .has_fixed_rapl_psys_unit = 1,
838 .trl_msrs = TRL_BASE | TRL_CORECOUNT,
839 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS,
840 };
841
842 static const struct platform_features dmr_features = {
843 .has_msr_misc_feature_control = spr_features.has_msr_misc_feature_control,
844 .has_msr_misc_pwr_mgmt = spr_features.has_msr_misc_pwr_mgmt,
845 .has_nhm_msrs = spr_features.has_nhm_msrs,
846 .has_config_tdp = spr_features.has_config_tdp,
847 .bclk_freq = spr_features.bclk_freq,
848 .supported_cstates = spr_features.supported_cstates,
849 .cst_limit = spr_features.cst_limit,
850 .has_msr_core_c1_res = spr_features.has_msr_core_c1_res,
851 .has_msr_module_c6_res_ms = 1, /* DMR has Dual Core Module and MC6 MSR */
852 .has_irtl_msrs = spr_features.has_irtl_msrs,
853 .has_cst_prewake_bit = spr_features.has_cst_prewake_bit,
854 .has_fixed_rapl_psys_unit = spr_features.has_fixed_rapl_psys_unit,
855 .trl_msrs = spr_features.trl_msrs,
856 .rapl_msrs = 0, /* DMR does not have RAPL MSRs */
857 };
858
859 static const struct platform_features srf_features = {
860 .has_msr_misc_feature_control = 1,
861 .has_msr_misc_pwr_mgmt = 1,
862 .has_nhm_msrs = 1,
863 .has_config_tdp = 1,
864 .bclk_freq = BCLK_100MHZ,
865 .supported_cstates = CC1 | CC6 | PC2 | PC6,
866 .cst_limit = CST_LIMIT_SKX,
867 .has_msr_core_c1_res = 1,
868 .has_msr_module_c6_res_ms = 1,
869 .has_irtl_msrs = 1,
870 .has_cst_prewake_bit = 1,
871 .trl_msrs = TRL_BASE | TRL_CORECOUNT,
872 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS,
873 };
874
875 static const struct platform_features grr_features = {
876 .has_msr_misc_feature_control = 1,
877 .has_msr_misc_pwr_mgmt = 1,
878 .has_nhm_msrs = 1,
879 .has_config_tdp = 1,
880 .bclk_freq = BCLK_100MHZ,
881 .supported_cstates = CC1 | CC6,
882 .cst_limit = CST_LIMIT_SKX,
883 .has_msr_core_c1_res = 1,
884 .has_msr_module_c6_res_ms = 1,
885 .has_irtl_msrs = 1,
886 .has_cst_prewake_bit = 1,
887 .trl_msrs = TRL_BASE | TRL_CORECOUNT,
888 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS,
889 };
890
891 static const struct platform_features slv_features = {
892 .has_nhm_msrs = 1,
893 .bclk_freq = BCLK_SLV,
894 .supported_cstates = CC1 | CC6 | PC6,
895 .cst_limit = CST_LIMIT_SLV,
896 .has_msr_core_c1_res = 1,
897 .has_msr_module_c6_res_ms = 1,
898 .has_msr_c6_demotion_policy_config = 1,
899 .has_msr_atom_pkg_c6_residency = 1,
900 .trl_msrs = TRL_ATOM,
901 .rapl_msrs = RAPL_PKG | RAPL_CORE,
902 .has_rapl_divisor = 1,
903 .rapl_quirk_tdp = 30,
904 };
905
906 static const struct platform_features slvd_features = {
907 .has_msr_misc_pwr_mgmt = 1,
908 .has_nhm_msrs = 1,
909 .bclk_freq = BCLK_SLV,
910 .supported_cstates = CC1 | CC6 | PC3 | PC6,
911 .cst_limit = CST_LIMIT_SLV,
912 .has_msr_atom_pkg_c6_residency = 1,
913 .trl_msrs = TRL_BASE,
914 .rapl_msrs = RAPL_PKG | RAPL_CORE,
915 .rapl_quirk_tdp = 30,
916 };
917
918 static const struct platform_features amt_features = {
919 .has_nhm_msrs = 1,
920 .bclk_freq = BCLK_133MHZ,
921 .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6,
922 .cst_limit = CST_LIMIT_AMT,
923 .trl_msrs = TRL_BASE,
924 };
925
926 static const struct platform_features gmt_features = {
927 .has_msr_misc_pwr_mgmt = 1,
928 .has_nhm_msrs = 1,
929 .bclk_freq = BCLK_100MHZ,
930 .crystal_freq = 19200000,
931 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
932 .cst_limit = CST_LIMIT_GMT,
933 .has_irtl_msrs = 1,
934 .trl_msrs = TRL_BASE | TRL_CORECOUNT,
935 .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO,
936 };
937
938 static const struct platform_features gmtd_features = {
939 .has_msr_misc_pwr_mgmt = 1,
940 .has_nhm_msrs = 1,
941 .bclk_freq = BCLK_100MHZ,
942 .crystal_freq = 25000000,
943 .supported_cstates = CC1 | CC6 | PC2 | PC6,
944 .cst_limit = CST_LIMIT_GMT,
945 .has_irtl_msrs = 1,
946 .has_msr_core_c1_res = 1,
947 .trl_msrs = TRL_BASE | TRL_CORECOUNT,
948 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_CORE_ENERGY_STATUS,
949 };
950
951 static const struct platform_features gmtp_features = {
952 .has_msr_misc_pwr_mgmt = 1,
953 .has_nhm_msrs = 1,
954 .bclk_freq = BCLK_100MHZ,
955 .crystal_freq = 19200000,
956 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
957 .cst_limit = CST_LIMIT_GMT,
958 .has_irtl_msrs = 1,
959 .trl_msrs = TRL_BASE,
960 .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO,
961 };
962
963 static const struct platform_features tmt_features = {
964 .has_msr_misc_pwr_mgmt = 1,
965 .has_nhm_msrs = 1,
966 .bclk_freq = BCLK_100MHZ,
967 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
968 .cst_limit = CST_LIMIT_GMT,
969 .has_irtl_msrs = 1,
970 .trl_msrs = TRL_BASE,
971 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
972 .enable_tsc_tweak = 1,
973 };
974
975 static const struct platform_features tmtd_features = {
976 .has_msr_misc_pwr_mgmt = 1,
977 .has_nhm_msrs = 1,
978 .bclk_freq = BCLK_100MHZ,
979 .supported_cstates = CC1 | CC6,
980 .cst_limit = CST_LIMIT_GMT,
981 .has_irtl_msrs = 1,
982 .trl_msrs = TRL_BASE | TRL_CORECOUNT,
983 .rapl_msrs = RAPL_PKG_ALL,
984 };
985
986 static const struct platform_features knl_features = {
987 .has_msr_misc_pwr_mgmt = 1,
988 .has_nhm_msrs = 1,
989 .has_config_tdp = 1,
990 .bclk_freq = BCLK_100MHZ,
991 .supported_cstates = CC1 | CC6 | PC3 | PC6,
992 .cst_limit = CST_LIMIT_KNL,
993 .has_msr_knl_core_c6_residency = 1,
994 .trl_msrs = TRL_KNL,
995 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
996 .has_fixed_rapl_unit = 1,
997 .need_perf_multiplier = 1,
998 };
999
1000 static const struct platform_features default_features = {
1001 };
1002
1003 static const struct platform_features amd_features_with_rapl = {
1004 .rapl_msrs = RAPL_AMD_F17H,
1005 .has_per_core_rapl = 1,
1006 .rapl_quirk_tdp = 280, /* This is the max stock TDP of HEDT/Server Fam17h+ chips */
1007 };
1008
1009 static const struct platform_data turbostat_pdata[] = {
1010 { INTEL_NEHALEM, &nhm_features },
1011 { INTEL_NEHALEM_G, &nhm_features },
1012 { INTEL_NEHALEM_EP, &nhm_features },
1013 { INTEL_NEHALEM_EX, &nhx_features },
1014 { INTEL_WESTMERE, &nhm_features },
1015 { INTEL_WESTMERE_EP, &nhm_features },
1016 { INTEL_WESTMERE_EX, &nhx_features },
1017 { INTEL_SANDYBRIDGE, &snb_features },
1018 { INTEL_SANDYBRIDGE_X, &snx_features },
1019 { INTEL_IVYBRIDGE, &ivb_features },
1020 { INTEL_IVYBRIDGE_X, &ivx_features },
1021 { INTEL_HASWELL, &hsw_features },
1022 { INTEL_HASWELL_X, &hsx_features },
1023 { INTEL_HASWELL_L, &hswl_features },
1024 { INTEL_HASWELL_G, &hswg_features },
1025 { INTEL_BROADWELL, &bdw_features },
1026 { INTEL_BROADWELL_G, &bdwg_features },
1027 { INTEL_BROADWELL_X, &bdx_features },
1028 { INTEL_BROADWELL_D, &bdx_features },
1029 { INTEL_SKYLAKE_L, &skl_features },
1030 { INTEL_SKYLAKE, &skl_features },
1031 { INTEL_SKYLAKE_X, &skx_features },
1032 { INTEL_KABYLAKE_L, &skl_features },
1033 { INTEL_KABYLAKE, &skl_features },
1034 { INTEL_COMETLAKE, &skl_features },
1035 { INTEL_COMETLAKE_L, &skl_features },
1036 { INTEL_CANNONLAKE_L, &cnl_features },
1037 { INTEL_ICELAKE_X, &icx_features },
1038 { INTEL_ICELAKE_D, &icx_features },
1039 { INTEL_ICELAKE_L, &cnl_features },
1040 { INTEL_ICELAKE_NNPI, &cnl_features },
1041 { INTEL_ROCKETLAKE, &cnl_features },
1042 { INTEL_TIGERLAKE_L, &cnl_features },
1043 { INTEL_TIGERLAKE, &cnl_features },
1044 { INTEL_SAPPHIRERAPIDS_X, &spr_features },
1045 { INTEL_EMERALDRAPIDS_X, &spr_features },
1046 { INTEL_GRANITERAPIDS_X, &spr_features },
1047 { INTEL_GRANITERAPIDS_D, &spr_features },
1048 { INTEL_PANTHERCOVE_X, &dmr_features },
1049 { INTEL_LAKEFIELD, &cnl_features },
1050 { INTEL_ALDERLAKE, &adl_features },
1051 { INTEL_ALDERLAKE_L, &adl_features },
1052 { INTEL_RAPTORLAKE, &adl_features },
1053 { INTEL_RAPTORLAKE_P, &adl_features },
1054 { INTEL_RAPTORLAKE_S, &adl_features },
1055 { INTEL_BARTLETTLAKE, &adl_features },
1056 { INTEL_METEORLAKE, &adl_features },
1057 { INTEL_METEORLAKE_L, &adl_features },
1058 { INTEL_ARROWLAKE_H, &adl_features },
1059 { INTEL_ARROWLAKE_U, &adl_features },
1060 { INTEL_ARROWLAKE, &adl_features },
1061 { INTEL_LUNARLAKE_M, &lnl_features },
1062 { INTEL_PANTHERLAKE_L, &lnl_features },
1063 { INTEL_ATOM_SILVERMONT, &slv_features },
1064 { INTEL_ATOM_SILVERMONT_D, &slvd_features },
1065 { INTEL_ATOM_AIRMONT, &amt_features },
1066 { INTEL_ATOM_GOLDMONT, &gmt_features },
1067 { INTEL_ATOM_GOLDMONT_D, &gmtd_features },
1068 { INTEL_ATOM_GOLDMONT_PLUS, &gmtp_features },
1069 { INTEL_ATOM_TREMONT_D, &tmtd_features },
1070 { INTEL_ATOM_TREMONT, &tmt_features },
1071 { INTEL_ATOM_TREMONT_L, &tmt_features },
1072 { INTEL_ATOM_GRACEMONT, &adl_features },
1073 { INTEL_ATOM_CRESTMONT_X, &srf_features },
1074 { INTEL_ATOM_CRESTMONT, &grr_features },
1075 { INTEL_ATOM_DARKMONT_X, &srf_features },
1076 { INTEL_XEON_PHI_KNL, &knl_features },
1077 { INTEL_XEON_PHI_KNM, &knl_features },
1078 /*
1079 * Missing support for
1080 * INTEL_ICELAKE
1081 * INTEL_ATOM_SILVERMONT_MID
1082 * INTEL_ATOM_SILVERMONT_MID2
1083 * INTEL_ATOM_AIRMONT_NP
1084 */
1085 { 0, NULL },
1086 };
1087
1088 static const struct platform_features *platform;
1089
probe_platform_features(unsigned int family,unsigned int model)1090 void probe_platform_features(unsigned int family, unsigned int model)
1091 {
1092 int i;
1093
1094 if (authentic_amd || hygon_genuine) {
1095 /* fallback to default features on unsupported models */
1096 force_load++;
1097 if (max_extended_level >= 0x80000007) {
1098 unsigned int eax, ebx, ecx, edx;
1099
1100 __cpuid(0x80000007, eax, ebx, ecx, edx);
1101 /* RAPL (Fam 17h+) */
1102 if ((edx & (1 << 14)) && family >= 0x17)
1103 platform = &amd_features_with_rapl;
1104 }
1105 goto end;
1106 }
1107
1108 if (!genuine_intel)
1109 goto end;
1110
1111 for (i = 0; turbostat_pdata[i].features; i++) {
1112 if (VFM_FAMILY(turbostat_pdata[i].vfm) == family && VFM_MODEL(turbostat_pdata[i].vfm) == model) {
1113 platform = turbostat_pdata[i].features;
1114 return;
1115 }
1116 }
1117
1118 end:
1119 if (force_load && !platform) {
1120 fprintf(outf, "Forced to run on unsupported platform!\n");
1121 platform = &default_features;
1122 }
1123
1124 if (platform)
1125 return;
1126
1127 fprintf(stderr, "Unsupported platform detected.\n\tSee RUN THE LATEST VERSION on turbostat(8)\n");
1128 exit(1);
1129 }
1130
1131 /* Model specific support End */
1132
1133 #define TJMAX_DEFAULT 100
1134
1135 /* MSRs that are not yet in the kernel-provided header. */
1136 #define MSR_RAPL_PWR_UNIT 0xc0010299
1137 #define MSR_CORE_ENERGY_STAT 0xc001029a
1138 #define MSR_PKG_ENERGY_STAT 0xc001029b
1139
1140 #define MAX(a, b) ((a) > (b) ? (a) : (b))
1141
1142 int backwards_count;
1143 char *progname;
1144
1145 #define CPU_SUBSET_MAXCPUS 8192 /* need to use before probe... */
1146 cpu_set_t *cpu_present_set, *cpu_possible_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset;
1147 size_t cpu_present_setsize, cpu_possible_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize,
1148 cpu_subset_size;
1149 #define MAX_ADDED_THREAD_COUNTERS 24
1150 #define MAX_ADDED_CORE_COUNTERS 8
1151 #define MAX_ADDED_PACKAGE_COUNTERS 16
1152 #define PMT_MAX_ADDED_THREAD_COUNTERS 24
1153 #define PMT_MAX_ADDED_CORE_COUNTERS 8
1154 #define PMT_MAX_ADDED_PACKAGE_COUNTERS 16
1155 #define BITMASK_SIZE 32
1156
1157 #define ZERO_ARRAY(arr) (memset(arr, 0, sizeof(arr)) + __must_be_array(arr))
1158
1159 /* Indexes used to map data read from perf and MSRs into global variables */
1160 enum rapl_rci_index {
1161 RAPL_RCI_INDEX_ENERGY_PKG = 0,
1162 RAPL_RCI_INDEX_ENERGY_CORES = 1,
1163 RAPL_RCI_INDEX_DRAM = 2,
1164 RAPL_RCI_INDEX_GFX = 3,
1165 RAPL_RCI_INDEX_PKG_PERF_STATUS = 4,
1166 RAPL_RCI_INDEX_DRAM_PERF_STATUS = 5,
1167 RAPL_RCI_INDEX_CORE_ENERGY = 6,
1168 RAPL_RCI_INDEX_ENERGY_PLATFORM = 7,
1169 NUM_RAPL_COUNTERS,
1170 };
1171
1172 enum rapl_unit {
1173 RAPL_UNIT_INVALID,
1174 RAPL_UNIT_JOULES,
1175 RAPL_UNIT_WATTS,
1176 };
1177
1178 struct rapl_counter_info_t {
1179 unsigned long long data[NUM_RAPL_COUNTERS];
1180 enum counter_source source[NUM_RAPL_COUNTERS];
1181 unsigned long long flags[NUM_RAPL_COUNTERS];
1182 double scale[NUM_RAPL_COUNTERS];
1183 enum rapl_unit unit[NUM_RAPL_COUNTERS];
1184 unsigned long long msr[NUM_RAPL_COUNTERS];
1185 unsigned long long msr_mask[NUM_RAPL_COUNTERS];
1186 int msr_shift[NUM_RAPL_COUNTERS];
1187
1188 int fd_perf;
1189 };
1190
1191 /* struct rapl_counter_info_t for each RAPL domain */
1192 struct rapl_counter_info_t *rapl_counter_info_perdomain;
1193 unsigned int rapl_counter_info_perdomain_size;
1194
1195 #define RAPL_COUNTER_FLAG_PLATFORM_COUNTER (1u << 0)
1196 #define RAPL_COUNTER_FLAG_USE_MSR_SUM (1u << 1)
1197
1198 struct rapl_counter_arch_info {
1199 int feature_mask; /* Mask for testing if the counter is supported on host */
1200 const char *perf_subsys;
1201 const char *perf_name;
1202 unsigned long long msr;
1203 unsigned long long msr_mask;
1204 int msr_shift; /* Positive mean shift right, negative mean shift left */
1205 double *platform_rapl_msr_scale; /* Scale applied to values read by MSR (platform dependent, filled at runtime) */
1206 unsigned int rci_index; /* Maps data from perf counters to global variables */
1207 unsigned long long bic;
1208 double compat_scale; /* Some counters require constant scaling to be in the same range as other, similar ones */
1209 unsigned long long flags;
1210 };
1211
1212 static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = {
1213 {
1214 .feature_mask = RAPL_PKG,
1215 .perf_subsys = "power",
1216 .perf_name = "energy-pkg",
1217 .msr = MSR_PKG_ENERGY_STATUS,
1218 .msr_mask = 0xFFFFFFFFFFFFFFFF,
1219 .msr_shift = 0,
1220 .platform_rapl_msr_scale = &rapl_energy_units,
1221 .rci_index = RAPL_RCI_INDEX_ENERGY_PKG,
1222 .bic = BIC_PkgWatt | BIC_Pkg_J,
1223 .compat_scale = 1.0,
1224 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
1225 },
1226 {
1227 .feature_mask = RAPL_AMD_F17H,
1228 .perf_subsys = "power",
1229 .perf_name = "energy-pkg",
1230 .msr = MSR_PKG_ENERGY_STAT,
1231 .msr_mask = 0xFFFFFFFFFFFFFFFF,
1232 .msr_shift = 0,
1233 .platform_rapl_msr_scale = &rapl_energy_units,
1234 .rci_index = RAPL_RCI_INDEX_ENERGY_PKG,
1235 .bic = BIC_PkgWatt | BIC_Pkg_J,
1236 .compat_scale = 1.0,
1237 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
1238 },
1239 {
1240 .feature_mask = RAPL_CORE_ENERGY_STATUS,
1241 .perf_subsys = "power",
1242 .perf_name = "energy-cores",
1243 .msr = MSR_PP0_ENERGY_STATUS,
1244 .msr_mask = 0xFFFFFFFFFFFFFFFF,
1245 .msr_shift = 0,
1246 .platform_rapl_msr_scale = &rapl_energy_units,
1247 .rci_index = RAPL_RCI_INDEX_ENERGY_CORES,
1248 .bic = BIC_CorWatt | BIC_Cor_J,
1249 .compat_scale = 1.0,
1250 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
1251 },
1252 {
1253 .feature_mask = RAPL_DRAM,
1254 .perf_subsys = "power",
1255 .perf_name = "energy-ram",
1256 .msr = MSR_DRAM_ENERGY_STATUS,
1257 .msr_mask = 0xFFFFFFFFFFFFFFFF,
1258 .msr_shift = 0,
1259 .platform_rapl_msr_scale = &rapl_dram_energy_units,
1260 .rci_index = RAPL_RCI_INDEX_DRAM,
1261 .bic = BIC_RAMWatt | BIC_RAM_J,
1262 .compat_scale = 1.0,
1263 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
1264 },
1265 {
1266 .feature_mask = RAPL_GFX,
1267 .perf_subsys = "power",
1268 .perf_name = "energy-gpu",
1269 .msr = MSR_PP1_ENERGY_STATUS,
1270 .msr_mask = 0xFFFFFFFFFFFFFFFF,
1271 .msr_shift = 0,
1272 .platform_rapl_msr_scale = &rapl_energy_units,
1273 .rci_index = RAPL_RCI_INDEX_GFX,
1274 .bic = BIC_GFXWatt | BIC_GFX_J,
1275 .compat_scale = 1.0,
1276 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
1277 },
1278 {
1279 .feature_mask = RAPL_PKG_PERF_STATUS,
1280 .perf_subsys = NULL,
1281 .perf_name = NULL,
1282 .msr = MSR_PKG_PERF_STATUS,
1283 .msr_mask = 0xFFFFFFFFFFFFFFFF,
1284 .msr_shift = 0,
1285 .platform_rapl_msr_scale = &rapl_time_units,
1286 .rci_index = RAPL_RCI_INDEX_PKG_PERF_STATUS,
1287 .bic = BIC_PKG__,
1288 .compat_scale = 100.0,
1289 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
1290 },
1291 {
1292 .feature_mask = RAPL_DRAM_PERF_STATUS,
1293 .perf_subsys = NULL,
1294 .perf_name = NULL,
1295 .msr = MSR_DRAM_PERF_STATUS,
1296 .msr_mask = 0xFFFFFFFFFFFFFFFF,
1297 .msr_shift = 0,
1298 .platform_rapl_msr_scale = &rapl_time_units,
1299 .rci_index = RAPL_RCI_INDEX_DRAM_PERF_STATUS,
1300 .bic = BIC_RAM__,
1301 .compat_scale = 100.0,
1302 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
1303 },
1304 {
1305 .feature_mask = RAPL_AMD_F17H,
1306 .perf_subsys = NULL,
1307 .perf_name = NULL,
1308 .msr = MSR_CORE_ENERGY_STAT,
1309 .msr_mask = 0xFFFFFFFF,
1310 .msr_shift = 0,
1311 .platform_rapl_msr_scale = &rapl_energy_units,
1312 .rci_index = RAPL_RCI_INDEX_CORE_ENERGY,
1313 .bic = BIC_CorWatt | BIC_Cor_J,
1314 .compat_scale = 1.0,
1315 .flags = 0,
1316 },
1317 {
1318 .feature_mask = RAPL_PSYS,
1319 .perf_subsys = "power",
1320 .perf_name = "energy-psys",
1321 .msr = MSR_PLATFORM_ENERGY_STATUS,
1322 .msr_mask = 0x00000000FFFFFFFF,
1323 .msr_shift = 0,
1324 .platform_rapl_msr_scale = &rapl_psys_energy_units,
1325 .rci_index = RAPL_RCI_INDEX_ENERGY_PLATFORM,
1326 .bic = BIC_SysWatt | BIC_Sys_J,
1327 .compat_scale = 1.0,
1328 .flags = RAPL_COUNTER_FLAG_PLATFORM_COUNTER | RAPL_COUNTER_FLAG_USE_MSR_SUM,
1329 },
1330 };
1331
1332 struct rapl_counter {
1333 unsigned long long raw_value;
1334 enum rapl_unit unit;
1335 double scale;
1336 };
1337
1338 /* Indexes used to map data read from perf and MSRs into global variables */
1339 enum ccstate_rci_index {
1340 CCSTATE_RCI_INDEX_C1_RESIDENCY = 0,
1341 CCSTATE_RCI_INDEX_C3_RESIDENCY = 1,
1342 CCSTATE_RCI_INDEX_C6_RESIDENCY = 2,
1343 CCSTATE_RCI_INDEX_C7_RESIDENCY = 3,
1344 PCSTATE_RCI_INDEX_C2_RESIDENCY = 4,
1345 PCSTATE_RCI_INDEX_C3_RESIDENCY = 5,
1346 PCSTATE_RCI_INDEX_C6_RESIDENCY = 6,
1347 PCSTATE_RCI_INDEX_C7_RESIDENCY = 7,
1348 PCSTATE_RCI_INDEX_C8_RESIDENCY = 8,
1349 PCSTATE_RCI_INDEX_C9_RESIDENCY = 9,
1350 PCSTATE_RCI_INDEX_C10_RESIDENCY = 10,
1351 NUM_CSTATE_COUNTERS,
1352 };
1353
1354 struct cstate_counter_info_t {
1355 unsigned long long data[NUM_CSTATE_COUNTERS];
1356 enum counter_source source[NUM_CSTATE_COUNTERS];
1357 unsigned long long msr[NUM_CSTATE_COUNTERS];
1358 int fd_perf_core;
1359 int fd_perf_pkg;
1360 };
1361
1362 struct cstate_counter_info_t *ccstate_counter_info;
1363 unsigned int ccstate_counter_info_size;
1364
1365 #define CSTATE_COUNTER_FLAG_COLLECT_PER_CORE (1u << 0)
1366 #define CSTATE_COUNTER_FLAG_COLLECT_PER_THREAD ((1u << 1) | CSTATE_COUNTER_FLAG_COLLECT_PER_CORE)
1367 #define CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY (1u << 2)
1368
1369 struct cstate_counter_arch_info {
1370 int feature_mask; /* Mask for testing if the counter is supported on host */
1371 const char *perf_subsys;
1372 const char *perf_name;
1373 unsigned long long msr;
1374 unsigned int rci_index; /* Maps data from perf counters to global variables */
1375 unsigned long long bic;
1376 unsigned long long flags;
1377 int pkg_cstate_limit;
1378 };
1379
1380 static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = {
1381 {
1382 .feature_mask = CC1,
1383 .perf_subsys = "cstate_core",
1384 .perf_name = "c1-residency",
1385 .msr = MSR_CORE_C1_RES,
1386 .rci_index = CCSTATE_RCI_INDEX_C1_RESIDENCY,
1387 .bic = BIC_CPU_c1,
1388 .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_THREAD,
1389 .pkg_cstate_limit = 0,
1390 },
1391 {
1392 .feature_mask = CC3,
1393 .perf_subsys = "cstate_core",
1394 .perf_name = "c3-residency",
1395 .msr = MSR_CORE_C3_RESIDENCY,
1396 .rci_index = CCSTATE_RCI_INDEX_C3_RESIDENCY,
1397 .bic = BIC_CPU_c3,
1398 .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY,
1399 .pkg_cstate_limit = 0,
1400 },
1401 {
1402 .feature_mask = CC6,
1403 .perf_subsys = "cstate_core",
1404 .perf_name = "c6-residency",
1405 .msr = MSR_CORE_C6_RESIDENCY,
1406 .rci_index = CCSTATE_RCI_INDEX_C6_RESIDENCY,
1407 .bic = BIC_CPU_c6,
1408 .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY,
1409 .pkg_cstate_limit = 0,
1410 },
1411 {
1412 .feature_mask = CC7,
1413 .perf_subsys = "cstate_core",
1414 .perf_name = "c7-residency",
1415 .msr = MSR_CORE_C7_RESIDENCY,
1416 .rci_index = CCSTATE_RCI_INDEX_C7_RESIDENCY,
1417 .bic = BIC_CPU_c7,
1418 .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY,
1419 .pkg_cstate_limit = 0,
1420 },
1421 {
1422 .feature_mask = PC2,
1423 .perf_subsys = "cstate_pkg",
1424 .perf_name = "c2-residency",
1425 .msr = MSR_PKG_C2_RESIDENCY,
1426 .rci_index = PCSTATE_RCI_INDEX_C2_RESIDENCY,
1427 .bic = BIC_Pkgpc2,
1428 .flags = 0,
1429 .pkg_cstate_limit = PCL__2,
1430 },
1431 {
1432 .feature_mask = PC3,
1433 .perf_subsys = "cstate_pkg",
1434 .perf_name = "c3-residency",
1435 .msr = MSR_PKG_C3_RESIDENCY,
1436 .rci_index = PCSTATE_RCI_INDEX_C3_RESIDENCY,
1437 .bic = BIC_Pkgpc3,
1438 .flags = 0,
1439 .pkg_cstate_limit = PCL__3,
1440 },
1441 {
1442 .feature_mask = PC6,
1443 .perf_subsys = "cstate_pkg",
1444 .perf_name = "c6-residency",
1445 .msr = MSR_PKG_C6_RESIDENCY,
1446 .rci_index = PCSTATE_RCI_INDEX_C6_RESIDENCY,
1447 .bic = BIC_Pkgpc6,
1448 .flags = 0,
1449 .pkg_cstate_limit = PCL__6,
1450 },
1451 {
1452 .feature_mask = PC7,
1453 .perf_subsys = "cstate_pkg",
1454 .perf_name = "c7-residency",
1455 .msr = MSR_PKG_C7_RESIDENCY,
1456 .rci_index = PCSTATE_RCI_INDEX_C7_RESIDENCY,
1457 .bic = BIC_Pkgpc7,
1458 .flags = 0,
1459 .pkg_cstate_limit = PCL__7,
1460 },
1461 {
1462 .feature_mask = PC8,
1463 .perf_subsys = "cstate_pkg",
1464 .perf_name = "c8-residency",
1465 .msr = MSR_PKG_C8_RESIDENCY,
1466 .rci_index = PCSTATE_RCI_INDEX_C8_RESIDENCY,
1467 .bic = BIC_Pkgpc8,
1468 .flags = 0,
1469 .pkg_cstate_limit = PCL__8,
1470 },
1471 {
1472 .feature_mask = PC9,
1473 .perf_subsys = "cstate_pkg",
1474 .perf_name = "c9-residency",
1475 .msr = MSR_PKG_C9_RESIDENCY,
1476 .rci_index = PCSTATE_RCI_INDEX_C9_RESIDENCY,
1477 .bic = BIC_Pkgpc9,
1478 .flags = 0,
1479 .pkg_cstate_limit = PCL__9,
1480 },
1481 {
1482 .feature_mask = PC10,
1483 .perf_subsys = "cstate_pkg",
1484 .perf_name = "c10-residency",
1485 .msr = MSR_PKG_C10_RESIDENCY,
1486 .rci_index = PCSTATE_RCI_INDEX_C10_RESIDENCY,
1487 .bic = BIC_Pkgpc10,
1488 .flags = 0,
1489 .pkg_cstate_limit = PCL_10,
1490 },
1491 };
1492
1493 /* Indexes used to map data read from perf and MSRs into global variables */
1494 enum msr_rci_index {
1495 MSR_RCI_INDEX_APERF = 0,
1496 MSR_RCI_INDEX_MPERF = 1,
1497 MSR_RCI_INDEX_SMI = 2,
1498 NUM_MSR_COUNTERS,
1499 };
1500
1501 struct msr_counter_info_t {
1502 unsigned long long data[NUM_MSR_COUNTERS];
1503 enum counter_source source[NUM_MSR_COUNTERS];
1504 unsigned long long msr[NUM_MSR_COUNTERS];
1505 unsigned long long msr_mask[NUM_MSR_COUNTERS];
1506 int fd_perf;
1507 };
1508
1509 struct msr_counter_info_t *msr_counter_info;
1510 unsigned int msr_counter_info_size;
1511
1512 struct msr_counter_arch_info {
1513 const char *perf_subsys;
1514 const char *perf_name;
1515 unsigned long long msr;
1516 unsigned long long msr_mask;
1517 unsigned int rci_index; /* Maps data from perf counters to global variables */
1518 bool needed;
1519 bool present;
1520 };
1521
1522 enum msr_arch_info_index {
1523 MSR_ARCH_INFO_APERF_INDEX = 0,
1524 MSR_ARCH_INFO_MPERF_INDEX = 1,
1525 MSR_ARCH_INFO_SMI_INDEX = 2,
1526 };
1527
1528 static struct msr_counter_arch_info msr_counter_arch_infos[] = {
1529 [MSR_ARCH_INFO_APERF_INDEX] = {
1530 .perf_subsys = "msr",
1531 .perf_name = "aperf",
1532 .msr = MSR_IA32_APERF,
1533 .msr_mask = 0xFFFFFFFFFFFFFFFF,
1534 .rci_index = MSR_RCI_INDEX_APERF,
1535 },
1536
1537 [MSR_ARCH_INFO_MPERF_INDEX] = {
1538 .perf_subsys = "msr",
1539 .perf_name = "mperf",
1540 .msr = MSR_IA32_MPERF,
1541 .msr_mask = 0xFFFFFFFFFFFFFFFF,
1542 .rci_index = MSR_RCI_INDEX_MPERF,
1543 },
1544
1545 [MSR_ARCH_INFO_SMI_INDEX] = {
1546 .perf_subsys = "msr",
1547 .perf_name = "smi",
1548 .msr = MSR_SMI_COUNT,
1549 .msr_mask = 0xFFFFFFFF,
1550 .rci_index = MSR_RCI_INDEX_SMI,
1551 },
1552 };
1553
1554 /* Can be redefined when compiling, useful for testing. */
1555 #ifndef SYSFS_TELEM_PATH
1556 #define SYSFS_TELEM_PATH "/sys/class/intel_pmt"
1557 #endif
1558
1559 #define PMT_COUNTER_MTL_DC6_OFFSET 120
1560 #define PMT_COUNTER_MTL_DC6_LSB 0
1561 #define PMT_COUNTER_MTL_DC6_MSB 63
1562 #define PMT_MTL_DC6_GUID 0x1a067102
1563 #define PMT_MTL_DC6_SEQ 0
1564
1565 #define PMT_COUNTER_CWF_MC1E_OFFSET_BASE 20936
1566 #define PMT_COUNTER_CWF_MC1E_OFFSET_INCREMENT 24
1567 #define PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE 12
1568 #define PMT_COUNTER_CWF_CPUS_PER_MODULE 4
1569 #define PMT_COUNTER_CWF_MC1E_LSB 0
1570 #define PMT_COUNTER_CWF_MC1E_MSB 63
1571 #define PMT_CWF_MC1E_GUID 0x14421519
1572
1573 unsigned long long tcore_clock_freq_hz = 800000000;
1574
1575 #define PMT_COUNTER_NAME_SIZE_BYTES 16
1576 #define PMT_COUNTER_TYPE_NAME_SIZE_BYTES 32
1577
1578 struct pmt_mmio {
1579 struct pmt_mmio *next;
1580
1581 unsigned int guid;
1582 unsigned int size;
1583
1584 /* Base pointer to the mmaped memory. */
1585 void *mmio_base;
1586
1587 /*
1588 * Offset to be applied to the mmio_base
1589 * to get the beginning of the PMT counters for given GUID.
1590 */
1591 unsigned long pmt_offset;
1592 } *pmt_mmios;
1593
1594 enum pmt_datatype {
1595 PMT_TYPE_RAW,
1596 PMT_TYPE_XTAL_TIME,
1597 PMT_TYPE_TCORE_CLOCK,
1598 };
1599
1600 struct pmt_domain_info {
1601 /*
1602 * Pointer to the MMIO obtained by applying a counter offset
1603 * to the mmio_base of the mmaped region for the given GUID.
1604 *
1605 * This is where to read the raw value of the counter from.
1606 */
1607 unsigned long *pcounter;
1608 };
1609
1610 struct pmt_counter {
1611 struct pmt_counter *next;
1612
1613 /* PMT metadata */
1614 char name[PMT_COUNTER_NAME_SIZE_BYTES];
1615 enum pmt_datatype type;
1616 enum counter_scope scope;
1617 unsigned int lsb;
1618 unsigned int msb;
1619
1620 /* BIC-like metadata */
1621 enum counter_format format;
1622
1623 unsigned int num_domains;
1624 struct pmt_domain_info *domains;
1625 };
1626
1627 /*
1628 * PMT telemetry directory iterator.
1629 * Used to iterate telemetry files in sysfs in correct order.
1630 */
1631 struct pmt_diriter_t {
1632 DIR *dir;
1633 struct dirent **namelist;
1634 unsigned int num_names;
1635 unsigned int current_name_idx;
1636 };
1637
pmt_telemdir_filter(const struct dirent * e)1638 int pmt_telemdir_filter(const struct dirent *e)
1639 {
1640 unsigned int dummy;
1641
1642 return sscanf(e->d_name, "telem%u", &dummy);
1643 }
1644
pmt_telemdir_sort(const struct dirent ** a,const struct dirent ** b)1645 int pmt_telemdir_sort(const struct dirent **a, const struct dirent **b)
1646 {
1647 unsigned int aidx = 0, bidx = 0;
1648
1649 sscanf((*a)->d_name, "telem%u", &aidx);
1650 sscanf((*b)->d_name, "telem%u", &bidx);
1651
1652 return aidx >= bidx;
1653 }
1654
pmt_diriter_next(struct pmt_diriter_t * iter)1655 const struct dirent *pmt_diriter_next(struct pmt_diriter_t *iter)
1656 {
1657 const struct dirent *ret = NULL;
1658
1659 if (!iter->dir)
1660 return NULL;
1661
1662 if (iter->current_name_idx >= iter->num_names)
1663 return NULL;
1664
1665 ret = iter->namelist[iter->current_name_idx];
1666 ++iter->current_name_idx;
1667
1668 return ret;
1669 }
1670
pmt_diriter_begin(struct pmt_diriter_t * iter,const char * pmt_root_path)1671 const struct dirent *pmt_diriter_begin(struct pmt_diriter_t *iter, const char *pmt_root_path)
1672 {
1673 int num_names = iter->num_names;
1674
1675 if (!iter->dir) {
1676 iter->dir = opendir(pmt_root_path);
1677 if (iter->dir == NULL)
1678 return NULL;
1679
1680 num_names = scandir(pmt_root_path, &iter->namelist, pmt_telemdir_filter, pmt_telemdir_sort);
1681 if (num_names == -1)
1682 return NULL;
1683 }
1684
1685 iter->current_name_idx = 0;
1686 iter->num_names = num_names;
1687
1688 return pmt_diriter_next(iter);
1689 }
1690
pmt_diriter_init(struct pmt_diriter_t * iter)1691 void pmt_diriter_init(struct pmt_diriter_t *iter)
1692 {
1693 memset(iter, 0, sizeof(*iter));
1694 }
1695
pmt_diriter_remove(struct pmt_diriter_t * iter)1696 void pmt_diriter_remove(struct pmt_diriter_t *iter)
1697 {
1698 if (iter->namelist) {
1699 for (unsigned int i = 0; i < iter->num_names; i++) {
1700 free(iter->namelist[i]);
1701 iter->namelist[i] = NULL;
1702 }
1703 }
1704
1705 free(iter->namelist);
1706 iter->namelist = NULL;
1707 iter->num_names = 0;
1708 iter->current_name_idx = 0;
1709
1710 closedir(iter->dir);
1711 iter->dir = NULL;
1712 }
1713
pmt_counter_get_width(const struct pmt_counter * p)1714 unsigned int pmt_counter_get_width(const struct pmt_counter *p)
1715 {
1716 return (p->msb - p->lsb) + 1;
1717 }
1718
pmt_counter_resize_(struct pmt_counter * pcounter,unsigned int new_size)1719 void pmt_counter_resize_(struct pmt_counter *pcounter, unsigned int new_size)
1720 {
1721 struct pmt_domain_info *new_mem;
1722
1723 new_mem = (struct pmt_domain_info *)reallocarray(pcounter->domains, new_size, sizeof(*pcounter->domains));
1724 if (!new_mem) {
1725 fprintf(stderr, "%s: failed to allocate memory for PMT counters\n", __func__);
1726 exit(1);
1727 }
1728
1729 /* Zero initialize just allocated memory. */
1730 const size_t num_new_domains = new_size - pcounter->num_domains;
1731
1732 memset(&new_mem[pcounter->num_domains], 0, num_new_domains * sizeof(*pcounter->domains));
1733
1734 pcounter->num_domains = new_size;
1735 pcounter->domains = new_mem;
1736 }
1737
pmt_counter_resize(struct pmt_counter * pcounter,unsigned int new_size)1738 void pmt_counter_resize(struct pmt_counter *pcounter, unsigned int new_size)
1739 {
1740 /*
1741 * Allocate more memory ahead of time.
1742 *
1743 * Always allocate space for at least 8 elements
1744 * and double the size when growing.
1745 */
1746 if (new_size < 8)
1747 new_size = 8;
1748 new_size = MAX(new_size, pcounter->num_domains * 2);
1749
1750 pmt_counter_resize_(pcounter, new_size);
1751 }
1752
1753 struct thread_data {
1754 struct timeval tv_begin;
1755 struct timeval tv_end;
1756 struct timeval tv_delta;
1757 unsigned long long tsc;
1758 unsigned long long aperf;
1759 unsigned long long mperf;
1760 unsigned long long c1;
1761 unsigned long long instr_count;
1762 unsigned long long irq_count;
1763 unsigned long long nmi_count;
1764 unsigned int smi_count;
1765 unsigned int cpu_id;
1766 unsigned int apic_id;
1767 unsigned int x2apic_id;
1768 unsigned int flags;
1769 bool is_atom;
1770 unsigned long long counter[MAX_ADDED_THREAD_COUNTERS];
1771 unsigned long long perf_counter[MAX_ADDED_THREAD_COUNTERS];
1772 unsigned long long pmt_counter[PMT_MAX_ADDED_THREAD_COUNTERS];
1773 } *thread_even, *thread_odd;
1774
1775 struct core_data {
1776 int base_cpu;
1777 unsigned long long c3;
1778 unsigned long long c6;
1779 unsigned long long c7;
1780 unsigned long long mc6_us; /* duplicate as per-core for now, even though per module */
1781 unsigned int core_temp_c;
1782 struct rapl_counter core_energy; /* MSR_CORE_ENERGY_STAT */
1783 unsigned int core_id;
1784 unsigned long long core_throt_cnt;
1785 unsigned long long counter[MAX_ADDED_CORE_COUNTERS];
1786 unsigned long long perf_counter[MAX_ADDED_CORE_COUNTERS];
1787 unsigned long long pmt_counter[PMT_MAX_ADDED_CORE_COUNTERS];
1788 } *core_even, *core_odd;
1789
1790 struct pkg_data {
1791 int base_cpu;
1792 unsigned long long pc2;
1793 unsigned long long pc3;
1794 unsigned long long pc6;
1795 unsigned long long pc7;
1796 unsigned long long pc8;
1797 unsigned long long pc9;
1798 unsigned long long pc10;
1799 long long cpu_lpi;
1800 long long sys_lpi;
1801 unsigned long long pkg_wtd_core_c0;
1802 unsigned long long pkg_any_core_c0;
1803 unsigned long long pkg_any_gfxe_c0;
1804 unsigned long long pkg_both_core_gfxe_c0;
1805 long long gfx_rc6_ms;
1806 unsigned int gfx_mhz;
1807 unsigned int gfx_act_mhz;
1808 long long sam_mc6_ms;
1809 unsigned int sam_mhz;
1810 unsigned int sam_act_mhz;
1811 unsigned int package_id;
1812 struct rapl_counter energy_pkg; /* MSR_PKG_ENERGY_STATUS */
1813 struct rapl_counter energy_dram; /* MSR_DRAM_ENERGY_STATUS */
1814 struct rapl_counter energy_cores; /* MSR_PP0_ENERGY_STATUS */
1815 struct rapl_counter energy_gfx; /* MSR_PP1_ENERGY_STATUS */
1816 struct rapl_counter rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */
1817 struct rapl_counter rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */
1818 unsigned int pkg_temp_c;
1819 unsigned int uncore_mhz;
1820 unsigned long long die_c6;
1821 unsigned long long counter[MAX_ADDED_PACKAGE_COUNTERS];
1822 unsigned long long perf_counter[MAX_ADDED_PACKAGE_COUNTERS];
1823 unsigned long long pmt_counter[PMT_MAX_ADDED_PACKAGE_COUNTERS];
1824 } *package_even, *package_odd;
1825
1826 #define ODD_COUNTERS thread_odd, core_odd, package_odd
1827 #define EVEN_COUNTERS thread_even, core_even, package_even
1828
1829 #define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no) \
1830 ((thread_base) + \
1831 ((pkg_no) * \
1832 topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \
1833 ((node_no) * topo.cores_per_node * topo.threads_per_core) + \
1834 ((core_no) * topo.threads_per_core) + \
1835 (thread_no))
1836
1837 #define GET_CORE(core_base, core_no, node_no, pkg_no) \
1838 ((core_base) + \
1839 ((pkg_no) * topo.nodes_per_pkg * topo.cores_per_node) + \
1840 ((node_no) * topo.cores_per_node) + \
1841 (core_no))
1842
1843 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
1844
1845 /*
1846 * The accumulated sum of MSR is defined as a monotonic
1847 * increasing MSR, it will be accumulated periodically,
1848 * despite its register's bit width.
1849 */
1850 enum {
1851 IDX_PKG_ENERGY,
1852 IDX_DRAM_ENERGY,
1853 IDX_PP0_ENERGY,
1854 IDX_PP1_ENERGY,
1855 IDX_PKG_PERF,
1856 IDX_DRAM_PERF,
1857 IDX_PSYS_ENERGY,
1858 IDX_COUNT,
1859 };
1860
1861 int get_msr_sum(int cpu, off_t offset, unsigned long long *msr);
1862
1863 struct msr_sum_array {
1864 /* get_msr_sum() = sum + (get_msr() - last) */
1865 struct {
1866 /*The accumulated MSR value is updated by the timer */
1867 unsigned long long sum;
1868 /*The MSR footprint recorded in last timer */
1869 unsigned long long last;
1870 } entries[IDX_COUNT];
1871 };
1872
1873 /* The percpu MSR sum array.*/
1874 struct msr_sum_array *per_cpu_msr_sum;
1875
idx_to_offset(int idx)1876 off_t idx_to_offset(int idx)
1877 {
1878 off_t offset;
1879
1880 switch (idx) {
1881 case IDX_PKG_ENERGY:
1882 if (platform->rapl_msrs & RAPL_AMD_F17H)
1883 offset = MSR_PKG_ENERGY_STAT;
1884 else
1885 offset = MSR_PKG_ENERGY_STATUS;
1886 break;
1887 case IDX_DRAM_ENERGY:
1888 offset = MSR_DRAM_ENERGY_STATUS;
1889 break;
1890 case IDX_PP0_ENERGY:
1891 offset = MSR_PP0_ENERGY_STATUS;
1892 break;
1893 case IDX_PP1_ENERGY:
1894 offset = MSR_PP1_ENERGY_STATUS;
1895 break;
1896 case IDX_PKG_PERF:
1897 offset = MSR_PKG_PERF_STATUS;
1898 break;
1899 case IDX_DRAM_PERF:
1900 offset = MSR_DRAM_PERF_STATUS;
1901 break;
1902 case IDX_PSYS_ENERGY:
1903 offset = MSR_PLATFORM_ENERGY_STATUS;
1904 break;
1905 default:
1906 offset = -1;
1907 }
1908 return offset;
1909 }
1910
offset_to_idx(off_t offset)1911 int offset_to_idx(off_t offset)
1912 {
1913 int idx;
1914
1915 switch (offset) {
1916 case MSR_PKG_ENERGY_STATUS:
1917 case MSR_PKG_ENERGY_STAT:
1918 idx = IDX_PKG_ENERGY;
1919 break;
1920 case MSR_DRAM_ENERGY_STATUS:
1921 idx = IDX_DRAM_ENERGY;
1922 break;
1923 case MSR_PP0_ENERGY_STATUS:
1924 idx = IDX_PP0_ENERGY;
1925 break;
1926 case MSR_PP1_ENERGY_STATUS:
1927 idx = IDX_PP1_ENERGY;
1928 break;
1929 case MSR_PKG_PERF_STATUS:
1930 idx = IDX_PKG_PERF;
1931 break;
1932 case MSR_DRAM_PERF_STATUS:
1933 idx = IDX_DRAM_PERF;
1934 break;
1935 case MSR_PLATFORM_ENERGY_STATUS:
1936 idx = IDX_PSYS_ENERGY;
1937 break;
1938 default:
1939 idx = -1;
1940 }
1941 return idx;
1942 }
1943
idx_valid(int idx)1944 int idx_valid(int idx)
1945 {
1946 switch (idx) {
1947 case IDX_PKG_ENERGY:
1948 return platform->rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H);
1949 case IDX_DRAM_ENERGY:
1950 return platform->rapl_msrs & RAPL_DRAM;
1951 case IDX_PP0_ENERGY:
1952 return platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS;
1953 case IDX_PP1_ENERGY:
1954 return platform->rapl_msrs & RAPL_GFX;
1955 case IDX_PKG_PERF:
1956 return platform->rapl_msrs & RAPL_PKG_PERF_STATUS;
1957 case IDX_DRAM_PERF:
1958 return platform->rapl_msrs & RAPL_DRAM_PERF_STATUS;
1959 case IDX_PSYS_ENERGY:
1960 return platform->rapl_msrs & RAPL_PSYS;
1961 default:
1962 return 0;
1963 }
1964 }
1965
1966 struct sys_counters {
1967 /* MSR added counters */
1968 unsigned int added_thread_counters;
1969 unsigned int added_core_counters;
1970 unsigned int added_package_counters;
1971 struct msr_counter *tp;
1972 struct msr_counter *cp;
1973 struct msr_counter *pp;
1974
1975 /* perf added counters */
1976 unsigned int added_thread_perf_counters;
1977 unsigned int added_core_perf_counters;
1978 unsigned int added_package_perf_counters;
1979 struct perf_counter_info *perf_tp;
1980 struct perf_counter_info *perf_cp;
1981 struct perf_counter_info *perf_pp;
1982
1983 struct pmt_counter *pmt_tp;
1984 struct pmt_counter *pmt_cp;
1985 struct pmt_counter *pmt_pp;
1986 } sys;
1987
free_msr_counters_(struct msr_counter ** pp)1988 static size_t free_msr_counters_(struct msr_counter **pp)
1989 {
1990 struct msr_counter *p = NULL;
1991 size_t num_freed = 0;
1992
1993 while (*pp) {
1994 p = *pp;
1995
1996 if (p->msr_num != 0) {
1997 *pp = p->next;
1998
1999 free(p);
2000 ++num_freed;
2001
2002 continue;
2003 }
2004
2005 pp = &p->next;
2006 }
2007
2008 return num_freed;
2009 }
2010
2011 /*
2012 * Free all added counters accessed via msr.
2013 */
free_sys_msr_counters(void)2014 static void free_sys_msr_counters(void)
2015 {
2016 /* Thread counters */
2017 sys.added_thread_counters -= free_msr_counters_(&sys.tp);
2018
2019 /* Core counters */
2020 sys.added_core_counters -= free_msr_counters_(&sys.cp);
2021
2022 /* Package counters */
2023 sys.added_package_counters -= free_msr_counters_(&sys.pp);
2024 }
2025
2026 struct system_summary {
2027 struct thread_data threads;
2028 struct core_data cores;
2029 struct pkg_data packages;
2030 } average;
2031
2032 struct platform_counters {
2033 struct rapl_counter energy_psys; /* MSR_PLATFORM_ENERGY_STATUS */
2034 } platform_counters_odd, platform_counters_even;
2035
2036 struct cpu_topology {
2037 int physical_package_id;
2038 int die_id;
2039 int logical_cpu_id;
2040 int physical_node_id;
2041 int logical_node_id; /* 0-based count within the package */
2042 int physical_core_id;
2043 int thread_id;
2044 int type;
2045 cpu_set_t *put_ids; /* Processing Unit/Thread IDs */
2046 } *cpus;
2047
2048 struct topo_params {
2049 int num_packages;
2050 int num_die;
2051 int num_cpus;
2052 int num_cores;
2053 int allowed_packages;
2054 int allowed_cpus;
2055 int allowed_cores;
2056 int max_cpu_num;
2057 int max_core_id;
2058 int max_package_id;
2059 int max_die_id;
2060 int max_node_num;
2061 int nodes_per_pkg;
2062 int cores_per_node;
2063 int threads_per_core;
2064 } topo;
2065
2066 struct timeval tv_even, tv_odd, tv_delta;
2067
2068 int *irq_column_2_cpu; /* /proc/interrupts column numbers */
2069 int *irqs_per_cpu; /* indexed by cpu_num */
2070 int *nmi_per_cpu; /* indexed by cpu_num */
2071
2072 void setup_all_buffers(bool startup);
2073
2074 char *sys_lpi_file;
2075 char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us";
2076 char *sys_lpi_file_debugfs = "/sys/kernel/debug/pmc_core/slp_s0_residency_usec";
2077
cpu_is_not_present(int cpu)2078 int cpu_is_not_present(int cpu)
2079 {
2080 return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
2081 }
2082
cpu_is_not_allowed(int cpu)2083 int cpu_is_not_allowed(int cpu)
2084 {
2085 return !CPU_ISSET_S(cpu, cpu_allowed_setsize, cpu_allowed_set);
2086 }
2087
2088 /*
2089 * run func(thread, core, package) in topology order
2090 * skip non-present cpus
2091 */
2092
for_all_cpus(int (func)(struct thread_data *,struct core_data *,struct pkg_data *),struct thread_data * thread_base,struct core_data * core_base,struct pkg_data * pkg_base)2093 int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pkg_data *),
2094 struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
2095 {
2096 int retval, pkg_no, core_no, thread_no, node_no;
2097
2098 retval = 0;
2099
2100 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
2101 for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) {
2102 for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
2103 for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) {
2104 struct thread_data *t;
2105 struct core_data *c;
2106 struct pkg_data *p;
2107 t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no);
2108
2109 if (cpu_is_not_allowed(t->cpu_id))
2110 continue;
2111
2112 c = GET_CORE(core_base, core_no, node_no, pkg_no);
2113 p = GET_PKG(pkg_base, pkg_no);
2114
2115 retval |= func(t, c, p);
2116 }
2117 }
2118 }
2119 }
2120 return retval;
2121 }
2122
is_cpu_first_thread_in_core(struct thread_data * t,struct core_data * c,struct pkg_data * p)2123 int is_cpu_first_thread_in_core(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2124 {
2125 UNUSED(p);
2126
2127 return ((int)t->cpu_id == c->base_cpu || c->base_cpu < 0);
2128 }
2129
is_cpu_first_core_in_package(struct thread_data * t,struct core_data * c,struct pkg_data * p)2130 int is_cpu_first_core_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2131 {
2132 UNUSED(c);
2133
2134 return ((int)t->cpu_id == p->base_cpu || p->base_cpu < 0);
2135 }
2136
is_cpu_first_thread_in_package(struct thread_data * t,struct core_data * c,struct pkg_data * p)2137 int is_cpu_first_thread_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2138 {
2139 return is_cpu_first_thread_in_core(t, c, p) && is_cpu_first_core_in_package(t, c, p);
2140 }
2141
cpu_migrate(int cpu)2142 int cpu_migrate(int cpu)
2143 {
2144 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
2145 CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
2146 if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
2147 return -1;
2148 else
2149 return 0;
2150 }
2151
get_msr_fd(int cpu)2152 int get_msr_fd(int cpu)
2153 {
2154 char pathname[32];
2155 int fd;
2156
2157 fd = fd_percpu[cpu];
2158
2159 if (fd)
2160 return fd;
2161 #if defined(ANDROID)
2162 sprintf(pathname, "/dev/msr%d", cpu);
2163 #else
2164 sprintf(pathname, "/dev/cpu/%d/msr", cpu);
2165 #endif
2166 fd = open(pathname, O_RDONLY);
2167 if (fd < 0)
2168 #if defined(ANDROID)
2169 err(-1, "%s open failed, try chown or chmod +r /dev/msr*, "
2170 "or run with --no-msr, or run as root", pathname);
2171 #else
2172 err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, "
2173 "or run with --no-msr, or run as root", pathname);
2174 #endif
2175 fd_percpu[cpu] = fd;
2176
2177 return fd;
2178 }
2179
bic_disable_msr_access(void)2180 static void bic_disable_msr_access(void)
2181 {
2182 const unsigned long bic_msrs = BIC_Mod_c6 | BIC_CoreTmp |
2183 BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_PkgTmp;
2184
2185 bic_enabled &= ~bic_msrs;
2186
2187 free_sys_msr_counters();
2188 }
2189
perf_event_open(struct perf_event_attr * hw_event,pid_t pid,int cpu,int group_fd,unsigned long flags)2190 static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags)
2191 {
2192 assert(!no_perf);
2193
2194 return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
2195 }
2196
open_perf_counter(int cpu,unsigned int type,unsigned int config,int group_fd,__u64 read_format)2197 static long open_perf_counter(int cpu, unsigned int type, unsigned int config, int group_fd, __u64 read_format)
2198 {
2199 struct perf_event_attr attr;
2200 const pid_t pid = -1;
2201 const unsigned long flags = 0;
2202
2203 assert(!no_perf);
2204
2205 memset(&attr, 0, sizeof(struct perf_event_attr));
2206
2207 attr.type = type;
2208 attr.size = sizeof(struct perf_event_attr);
2209 attr.config = config;
2210 attr.disabled = 0;
2211 attr.sample_type = PERF_SAMPLE_IDENTIFIER;
2212 attr.read_format = read_format;
2213
2214 const int fd = perf_event_open(&attr, pid, cpu, group_fd, flags);
2215
2216 return fd;
2217 }
2218
get_instr_count_fd(int cpu)2219 int get_instr_count_fd(int cpu)
2220 {
2221 if (fd_instr_count_percpu[cpu])
2222 return fd_instr_count_percpu[cpu];
2223
2224 fd_instr_count_percpu[cpu] = open_perf_counter(cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0);
2225
2226 return fd_instr_count_percpu[cpu];
2227 }
2228
get_msr(int cpu,off_t offset,unsigned long long * msr)2229 int get_msr(int cpu, off_t offset, unsigned long long *msr)
2230 {
2231 ssize_t retval;
2232
2233 assert(!no_msr);
2234
2235 retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
2236
2237 if (retval != sizeof *msr)
2238 err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset);
2239
2240 return 0;
2241 }
2242
add_msr_counter(int cpu,off_t offset)2243 int add_msr_counter(int cpu, off_t offset)
2244 {
2245 ssize_t retval;
2246 unsigned long long value;
2247
2248 if (no_msr)
2249 return -1;
2250
2251 if (!offset)
2252 return -1;
2253
2254 retval = pread(get_msr_fd(cpu), &value, sizeof(value), offset);
2255
2256 /* if the read failed, the probe fails */
2257 if (retval != sizeof(value))
2258 return -1;
2259
2260 if (value == 0)
2261 return 0;
2262
2263 return 1;
2264 }
2265
add_rapl_msr_counter(int cpu,const struct rapl_counter_arch_info * cai)2266 int add_rapl_msr_counter(int cpu, const struct rapl_counter_arch_info *cai)
2267 {
2268 int ret;
2269
2270 if (!(platform->rapl_msrs & cai->feature_mask))
2271 return -1;
2272
2273 ret = add_msr_counter(cpu, cai->msr);
2274 if (ret < 0)
2275 return -1;
2276
2277 switch (cai->rci_index) {
2278 case RAPL_RCI_INDEX_ENERGY_PKG:
2279 case RAPL_RCI_INDEX_ENERGY_CORES:
2280 case RAPL_RCI_INDEX_DRAM:
2281 case RAPL_RCI_INDEX_GFX:
2282 case RAPL_RCI_INDEX_ENERGY_PLATFORM:
2283 if (ret == 0)
2284 return 1;
2285 }
2286
2287 /* PKG,DRAM_PERF_STATUS MSRs, can return any value */
2288 return 1;
2289 }
2290
2291 /* Convert CPU ID to domain ID for given added perf counter. */
cpu_to_domain(const struct perf_counter_info * pc,int cpu)2292 unsigned int cpu_to_domain(const struct perf_counter_info *pc, int cpu)
2293 {
2294 switch (pc->scope) {
2295 case SCOPE_CPU:
2296 return cpu;
2297
2298 case SCOPE_CORE:
2299 return cpus[cpu].physical_core_id;
2300
2301 case SCOPE_PACKAGE:
2302 return cpus[cpu].physical_package_id;
2303 }
2304
2305 __builtin_unreachable();
2306 }
2307
2308 #define MAX_DEFERRED 16
2309 char *deferred_add_names[MAX_DEFERRED];
2310 char *deferred_skip_names[MAX_DEFERRED];
2311 int deferred_add_index;
2312 int deferred_skip_index;
2313
2314 /*
2315 * HIDE_LIST - hide this list of counters, show the rest [default]
2316 * SHOW_LIST - show this list of counters, hide the rest
2317 */
2318 enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;
2319
help(void)2320 void help(void)
2321 {
2322 fprintf(outf,
2323 "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
2324 "\n"
2325 "Turbostat forks the specified COMMAND and prints statistics\n"
2326 "when COMMAND completes.\n"
2327 "If no COMMAND is specified, turbostat wakes every 5-seconds\n"
2328 "to print statistics, until interrupted.\n"
2329 " -a, --add counter\n"
2330 " add a counter\n"
2331 " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
2332 " eg. --add perf/cstate_pkg/c2-residency,package,delta,percent,perfPC2\n"
2333 " eg. --add pmt,name=XTAL,type=raw,domain=package0,offset=0,lsb=0,msb=63,guid=0x1a067102\n"
2334 " -c, --cpu cpu-set\n"
2335 " limit output to summary plus cpu-set:\n"
2336 " {core | package | j,k,l..m,n-p }\n"
2337 " -d, --debug\n"
2338 " displays usec, Time_Of_Day_Seconds and more debugging\n"
2339 " debug messages are printed to stderr\n"
2340 " -D, --Dump\n"
2341 " displays the raw counter values\n"
2342 " -e, --enable [all | column]\n"
2343 " shows all or the specified disabled column\n"
2344 " -f, --force\n"
2345 " force load turbostat with minimum default features on unsupported platforms.\n"
2346 " -H, --hide [column | column,column,...]\n"
2347 " hide the specified column(s)\n"
2348 " -i, --interval sec.subsec\n"
2349 " override default 5-second measurement interval\n"
2350 " -J, --Joules\n"
2351 " displays energy in Joules instead of Watts\n"
2352 " -l, --list\n"
2353 " list column headers only\n"
2354 " -M, --no-msr\n"
2355 " disable all uses of the MSR driver\n"
2356 " -P, --no-perf\n"
2357 " disable all uses of the perf API\n"
2358 " -n, --num_iterations num\n"
2359 " number of the measurement iterations\n"
2360 " -N, --header_iterations num\n"
2361 " print header every num iterations\n"
2362 " -o, --out file\n"
2363 " create or truncate \"file\" for all output\n"
2364 " -q, --quiet\n"
2365 " skip decoding system configuration header\n"
2366 " -s, --show [column | column,column,...]\n"
2367 " show only the specified column(s)\n"
2368 " -S, --Summary\n"
2369 " limits output to 1-line system summary per interval\n"
2370 " -T, --TCC temperature\n"
2371 " sets the Thermal Control Circuit temperature in\n"
2372 " degrees Celsius\n"
2373 " -h, --help\n"
2374 " print this help message\n"
2375 " -v, --version\n\t\tprint version information\n\nFor more help, run \"man turbostat\"\n");
2376 }
2377
2378 /*
2379 * bic_lookup
2380 * for all the strings in comma separate name_list,
2381 * set the approprate bit in return value.
2382 */
bic_lookup(char * name_list,enum show_hide_mode mode)2383 unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
2384 {
2385 unsigned int i;
2386 unsigned long long retval = 0;
2387
2388 while (name_list) {
2389 char *comma;
2390
2391 comma = strchr(name_list, ',');
2392
2393 if (comma)
2394 *comma = '\0';
2395
2396 for (i = 0; i < MAX_BIC; ++i) {
2397 if (!strcmp(name_list, bic[i].name)) {
2398 retval |= (1ULL << i);
2399 break;
2400 }
2401 if (!strcmp(name_list, "all")) {
2402 retval |= ~0;
2403 break;
2404 } else if (!strcmp(name_list, "topology")) {
2405 retval |= BIC_GROUP_TOPOLOGY;
2406 break;
2407 } else if (!strcmp(name_list, "power")) {
2408 retval |= BIC_GROUP_THERMAL_PWR;
2409 break;
2410 } else if (!strcmp(name_list, "idle")) {
2411 retval |= BIC_GROUP_IDLE;
2412 break;
2413 } else if (!strcmp(name_list, "swidle")) {
2414 retval |= BIC_GROUP_SW_IDLE;
2415 break;
2416 } else if (!strcmp(name_list, "sysfs")) { /* legacy compatibility */
2417 retval |= BIC_GROUP_SW_IDLE;
2418 break;
2419 } else if (!strcmp(name_list, "hwidle")) {
2420 retval |= BIC_GROUP_HW_IDLE;
2421 break;
2422 } else if (!strcmp(name_list, "frequency")) {
2423 retval |= BIC_GROUP_FREQUENCY;
2424 break;
2425 } else if (!strcmp(name_list, "other")) {
2426 retval |= BIC_OTHER;
2427 break;
2428 }
2429
2430 }
2431 if (i == MAX_BIC) {
2432 fprintf(stderr, "deferred %s\n", name_list);
2433 if (mode == SHOW_LIST) {
2434 deferred_add_names[deferred_add_index++] = name_list;
2435 if (deferred_add_index >= MAX_DEFERRED) {
2436 fprintf(stderr, "More than max %d un-recognized --add options '%s'\n",
2437 MAX_DEFERRED, name_list);
2438 help();
2439 exit(1);
2440 }
2441 } else {
2442 deferred_skip_names[deferred_skip_index++] = name_list;
2443 if (debug)
2444 fprintf(stderr, "deferred \"%s\"\n", name_list);
2445 if (deferred_skip_index >= MAX_DEFERRED) {
2446 fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
2447 MAX_DEFERRED, name_list);
2448 help();
2449 exit(1);
2450 }
2451 }
2452 }
2453
2454 name_list = comma;
2455 if (name_list)
2456 name_list++;
2457
2458 }
2459 return retval;
2460 }
2461
print_header(char * delim)2462 void print_header(char *delim)
2463 {
2464 struct msr_counter *mp;
2465 struct perf_counter_info *pp;
2466 struct pmt_counter *ppmt;
2467 int printed = 0;
2468
2469 if (DO_BIC(BIC_USEC))
2470 outp += sprintf(outp, "%susec", (printed++ ? delim : ""));
2471 if (DO_BIC(BIC_TOD))
2472 outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : ""));
2473 if (DO_BIC(BIC_Package))
2474 outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
2475 if (DO_BIC(BIC_Die))
2476 outp += sprintf(outp, "%sDie", (printed++ ? delim : ""));
2477 if (DO_BIC(BIC_Node))
2478 outp += sprintf(outp, "%sNode", (printed++ ? delim : ""));
2479 if (DO_BIC(BIC_Core))
2480 outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
2481 if (DO_BIC(BIC_CPU))
2482 outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
2483 if (DO_BIC(BIC_APIC))
2484 outp += sprintf(outp, "%sAPIC", (printed++ ? delim : ""));
2485 if (DO_BIC(BIC_X2APIC))
2486 outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : ""));
2487 if (DO_BIC(BIC_Avg_MHz))
2488 outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
2489 if (DO_BIC(BIC_Busy))
2490 outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : ""));
2491 if (DO_BIC(BIC_Bzy_MHz))
2492 outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : ""));
2493 if (DO_BIC(BIC_TSC_MHz))
2494 outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : ""));
2495
2496 if (DO_BIC(BIC_IPC))
2497 outp += sprintf(outp, "%sIPC", (printed++ ? delim : ""));
2498
2499 if (DO_BIC(BIC_IRQ)) {
2500 if (sums_need_wide_columns)
2501 outp += sprintf(outp, "%s IRQ", (printed++ ? delim : ""));
2502 else
2503 outp += sprintf(outp, "%sIRQ", (printed++ ? delim : ""));
2504 }
2505 if (DO_BIC(BIC_NMI)) {
2506 if (sums_need_wide_columns)
2507 outp += sprintf(outp, "%s NMI", (printed++ ? delim : ""));
2508 else
2509 outp += sprintf(outp, "%sNMI", (printed++ ? delim : ""));
2510 }
2511
2512 if (DO_BIC(BIC_SMI))
2513 outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
2514
2515 for (mp = sys.tp; mp; mp = mp->next) {
2516
2517 if (mp->format == FORMAT_RAW) {
2518 if (mp->width == 64)
2519 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name);
2520 else
2521 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name);
2522 } else {
2523 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
2524 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name);
2525 else
2526 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name);
2527 }
2528 }
2529
2530 for (pp = sys.perf_tp; pp; pp = pp->next) {
2531
2532 if (pp->format == FORMAT_RAW) {
2533 if (pp->width == 64)
2534 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name);
2535 else
2536 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name);
2537 } else {
2538 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns)
2539 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name);
2540 else
2541 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name);
2542 }
2543 }
2544
2545 ppmt = sys.pmt_tp;
2546 while (ppmt) {
2547 switch (ppmt->type) {
2548 case PMT_TYPE_RAW:
2549 if (pmt_counter_get_width(ppmt) <= 32)
2550 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name);
2551 else
2552 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name);
2553
2554 break;
2555
2556 case PMT_TYPE_XTAL_TIME:
2557 case PMT_TYPE_TCORE_CLOCK:
2558 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name);
2559 break;
2560 }
2561
2562 ppmt = ppmt->next;
2563 }
2564
2565 if (DO_BIC(BIC_CPU_c1))
2566 outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : ""));
2567 if (DO_BIC(BIC_CPU_c3))
2568 outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : ""));
2569 if (DO_BIC(BIC_CPU_c6))
2570 outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : ""));
2571 if (DO_BIC(BIC_CPU_c7))
2572 outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : ""));
2573
2574 if (DO_BIC(BIC_Mod_c6))
2575 outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : ""));
2576
2577 if (DO_BIC(BIC_CoreTmp))
2578 outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : ""));
2579
2580 if (DO_BIC(BIC_CORE_THROT_CNT))
2581 outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : ""));
2582
2583 if (platform->rapl_msrs && !rapl_joules) {
2584 if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl)
2585 outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
2586 } else if (platform->rapl_msrs && rapl_joules) {
2587 if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl)
2588 outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
2589 }
2590
2591 for (mp = sys.cp; mp; mp = mp->next) {
2592 if (mp->format == FORMAT_RAW) {
2593 if (mp->width == 64)
2594 outp += sprintf(outp, "%s%18.18s", delim, mp->name);
2595 else
2596 outp += sprintf(outp, "%s%10.10s", delim, mp->name);
2597 } else {
2598 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
2599 outp += sprintf(outp, "%s%8s", delim, mp->name);
2600 else
2601 outp += sprintf(outp, "%s%s", delim, mp->name);
2602 }
2603 }
2604
2605 for (pp = sys.perf_cp; pp; pp = pp->next) {
2606
2607 if (pp->format == FORMAT_RAW) {
2608 if (pp->width == 64)
2609 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name);
2610 else
2611 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name);
2612 } else {
2613 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns)
2614 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name);
2615 else
2616 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name);
2617 }
2618 }
2619
2620 ppmt = sys.pmt_cp;
2621 while (ppmt) {
2622 switch (ppmt->type) {
2623 case PMT_TYPE_RAW:
2624 if (pmt_counter_get_width(ppmt) <= 32)
2625 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name);
2626 else
2627 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name);
2628
2629 break;
2630
2631 case PMT_TYPE_XTAL_TIME:
2632 case PMT_TYPE_TCORE_CLOCK:
2633 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name);
2634 break;
2635 }
2636
2637 ppmt = ppmt->next;
2638 }
2639
2640 if (DO_BIC(BIC_PkgTmp))
2641 outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : ""));
2642
2643 if (DO_BIC(BIC_GFX_rc6))
2644 outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : ""));
2645
2646 if (DO_BIC(BIC_GFXMHz))
2647 outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : ""));
2648
2649 if (DO_BIC(BIC_GFXACTMHz))
2650 outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : ""));
2651
2652 if (DO_BIC(BIC_SAM_mc6))
2653 outp += sprintf(outp, "%sSAM%%mc6", (printed++ ? delim : ""));
2654
2655 if (DO_BIC(BIC_SAMMHz))
2656 outp += sprintf(outp, "%sSAMMHz", (printed++ ? delim : ""));
2657
2658 if (DO_BIC(BIC_SAMACTMHz))
2659 outp += sprintf(outp, "%sSAMAMHz", (printed++ ? delim : ""));
2660
2661 if (DO_BIC(BIC_Totl_c0))
2662 outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
2663 if (DO_BIC(BIC_Any_c0))
2664 outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : ""));
2665 if (DO_BIC(BIC_GFX_c0))
2666 outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : ""));
2667 if (DO_BIC(BIC_CPUGFX))
2668 outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : ""));
2669
2670 if (DO_BIC(BIC_Pkgpc2))
2671 outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : ""));
2672 if (DO_BIC(BIC_Pkgpc3))
2673 outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : ""));
2674 if (DO_BIC(BIC_Pkgpc6))
2675 outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : ""));
2676 if (DO_BIC(BIC_Pkgpc7))
2677 outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : ""));
2678 if (DO_BIC(BIC_Pkgpc8))
2679 outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : ""));
2680 if (DO_BIC(BIC_Pkgpc9))
2681 outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : ""));
2682 if (DO_BIC(BIC_Pkgpc10))
2683 outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : ""));
2684 if (DO_BIC(BIC_Diec6))
2685 outp += sprintf(outp, "%sDie%%c6", (printed++ ? delim : ""));
2686 if (DO_BIC(BIC_CPU_LPI))
2687 outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : ""));
2688 if (DO_BIC(BIC_SYS_LPI))
2689 outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : ""));
2690
2691 if (!rapl_joules) {
2692 if (DO_BIC(BIC_PkgWatt))
2693 outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
2694 if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl)
2695 outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
2696 if (DO_BIC(BIC_GFXWatt))
2697 outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
2698 if (DO_BIC(BIC_RAMWatt))
2699 outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : ""));
2700 if (DO_BIC(BIC_PKG__))
2701 outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
2702 if (DO_BIC(BIC_RAM__))
2703 outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
2704 } else {
2705 if (DO_BIC(BIC_Pkg_J))
2706 outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
2707 if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl)
2708 outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
2709 if (DO_BIC(BIC_GFX_J))
2710 outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
2711 if (DO_BIC(BIC_RAM_J))
2712 outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : ""));
2713 if (DO_BIC(BIC_PKG__))
2714 outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
2715 if (DO_BIC(BIC_RAM__))
2716 outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
2717 }
2718 if (DO_BIC(BIC_UNCORE_MHZ))
2719 outp += sprintf(outp, "%sUncMHz", (printed++ ? delim : ""));
2720
2721 for (mp = sys.pp; mp; mp = mp->next) {
2722 if (mp->format == FORMAT_RAW) {
2723 if (mp->width == 64)
2724 outp += sprintf(outp, "%s%18.18s", delim, mp->name);
2725 else if (mp->width == 32)
2726 outp += sprintf(outp, "%s%10.10s", delim, mp->name);
2727 else
2728 outp += sprintf(outp, "%s%7.7s", delim, mp->name);
2729 } else {
2730 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
2731 outp += sprintf(outp, "%s%8s", delim, mp->name);
2732 else
2733 outp += sprintf(outp, "%s%7.7s", delim, mp->name);
2734 }
2735 }
2736
2737 for (pp = sys.perf_pp; pp; pp = pp->next) {
2738
2739 if (pp->format == FORMAT_RAW) {
2740 if (pp->width == 64)
2741 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name);
2742 else
2743 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name);
2744 } else {
2745 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns)
2746 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name);
2747 else
2748 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name);
2749 }
2750 }
2751
2752 ppmt = sys.pmt_pp;
2753 while (ppmt) {
2754 switch (ppmt->type) {
2755 case PMT_TYPE_RAW:
2756 if (pmt_counter_get_width(ppmt) <= 32)
2757 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name);
2758 else
2759 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name);
2760
2761 break;
2762
2763 case PMT_TYPE_XTAL_TIME:
2764 case PMT_TYPE_TCORE_CLOCK:
2765 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name);
2766 break;
2767 }
2768
2769 ppmt = ppmt->next;
2770 }
2771
2772 if (DO_BIC(BIC_SysWatt))
2773 outp += sprintf(outp, "%sSysWatt", (printed++ ? delim : ""));
2774 if (DO_BIC(BIC_Sys_J))
2775 outp += sprintf(outp, "%sSys_J", (printed++ ? delim : ""));
2776
2777 outp += sprintf(outp, "\n");
2778 }
2779
dump_counters(struct thread_data * t,struct core_data * c,struct pkg_data * p)2780 int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2781 {
2782 int i;
2783 struct msr_counter *mp;
2784 struct platform_counters *pplat_cnt = p == package_odd ? &platform_counters_odd : &platform_counters_even;
2785
2786 outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
2787
2788 if (t) {
2789 outp += sprintf(outp, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags);
2790 outp += sprintf(outp, "TSC: %016llX\n", t->tsc);
2791 outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
2792 outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
2793 outp += sprintf(outp, "c1: %016llX\n", t->c1);
2794
2795 if (DO_BIC(BIC_IPC))
2796 outp += sprintf(outp, "IPC: %lld\n", t->instr_count);
2797
2798 if (DO_BIC(BIC_IRQ))
2799 outp += sprintf(outp, "IRQ: %lld\n", t->irq_count);
2800 if (DO_BIC(BIC_NMI))
2801 outp += sprintf(outp, "IRQ: %lld\n", t->nmi_count);
2802 if (DO_BIC(BIC_SMI))
2803 outp += sprintf(outp, "SMI: %d\n", t->smi_count);
2804
2805 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
2806 outp +=
2807 sprintf(outp, "tADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
2808 t->counter[i], mp->sp->path);
2809 }
2810 }
2811
2812 if (c && is_cpu_first_thread_in_core(t, c, p)) {
2813 outp += sprintf(outp, "core: %d\n", c->core_id);
2814 outp += sprintf(outp, "c3: %016llX\n", c->c3);
2815 outp += sprintf(outp, "c6: %016llX\n", c->c6);
2816 outp += sprintf(outp, "c7: %016llX\n", c->c7);
2817 outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
2818 outp += sprintf(outp, "cpu_throt_count: %016llX\n", c->core_throt_cnt);
2819
2820 const unsigned long long energy_value = c->core_energy.raw_value * c->core_energy.scale;
2821 const double energy_scale = c->core_energy.scale;
2822
2823 if (c->core_energy.unit == RAPL_UNIT_JOULES)
2824 outp += sprintf(outp, "Joules: %0llX (scale: %lf)\n", energy_value, energy_scale);
2825
2826 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
2827 outp +=
2828 sprintf(outp, "cADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
2829 c->counter[i], mp->sp->path);
2830 }
2831 outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
2832 }
2833
2834 if (p && is_cpu_first_core_in_package(t, c, p)) {
2835 outp += sprintf(outp, "package: %d\n", p->package_id);
2836
2837 outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
2838 outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
2839 outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
2840 outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
2841
2842 outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
2843 if (DO_BIC(BIC_Pkgpc3))
2844 outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
2845 if (DO_BIC(BIC_Pkgpc6))
2846 outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
2847 if (DO_BIC(BIC_Pkgpc7))
2848 outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
2849 outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
2850 outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
2851 outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
2852 outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
2853 outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
2854 outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg.raw_value);
2855 outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores.raw_value);
2856 outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx.raw_value);
2857 outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram.raw_value);
2858 outp += sprintf(outp, "Joules PSYS: %0llX\n", pplat_cnt->energy_psys.raw_value);
2859 outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status.raw_value);
2860 outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status.raw_value);
2861 outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
2862
2863 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
2864 outp +=
2865 sprintf(outp, "pADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
2866 p->counter[i], mp->sp->path);
2867 }
2868 }
2869
2870 outp += sprintf(outp, "\n");
2871
2872 return 0;
2873 }
2874
rapl_counter_get_value(const struct rapl_counter * c,enum rapl_unit desired_unit,double interval)2875 double rapl_counter_get_value(const struct rapl_counter *c, enum rapl_unit desired_unit, double interval)
2876 {
2877 assert(desired_unit != RAPL_UNIT_INVALID);
2878
2879 /*
2880 * For now we don't expect anything other than joules,
2881 * so just simplify the logic.
2882 */
2883 assert(c->unit == RAPL_UNIT_JOULES);
2884
2885 const double scaled = c->raw_value * c->scale;
2886
2887 if (desired_unit == RAPL_UNIT_WATTS)
2888 return scaled / interval;
2889 return scaled;
2890 }
2891
2892 /*
2893 * column formatting convention & formats
2894 */
format_counters(struct thread_data * t,struct core_data * c,struct pkg_data * p)2895 int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2896 {
2897 static int count;
2898
2899 struct platform_counters *pplat_cnt = NULL;
2900 double interval_float, tsc;
2901 char *fmt8;
2902 int i;
2903 struct msr_counter *mp;
2904 struct perf_counter_info *pp;
2905 struct pmt_counter *ppmt;
2906 char *delim = "\t";
2907 int printed = 0;
2908
2909 if (t == &average.threads) {
2910 pplat_cnt = count & 1 ? &platform_counters_odd : &platform_counters_even;
2911 ++count;
2912 }
2913
2914 /* if showing only 1st thread in core and this isn't one, bail out */
2915 if (show_core_only && !is_cpu_first_thread_in_core(t, c, p))
2916 return 0;
2917
2918 /* if showing only 1st thread in pkg and this isn't one, bail out */
2919 if (show_pkg_only && !is_cpu_first_core_in_package(t, c, p))
2920 return 0;
2921
2922 /*if not summary line and --cpu is used */
2923 if ((t != &average.threads) && (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
2924 return 0;
2925
2926 if (DO_BIC(BIC_USEC)) {
2927 /* on each row, print how many usec each timestamp took to gather */
2928 struct timeval tv;
2929
2930 timersub(&t->tv_end, &t->tv_begin, &tv);
2931 outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec);
2932 }
2933
2934 /* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */
2935 if (DO_BIC(BIC_TOD))
2936 outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec);
2937
2938 interval_float = t->tv_delta.tv_sec + t->tv_delta.tv_usec / 1000000.0;
2939
2940 tsc = t->tsc * tsc_tweak;
2941
2942 /* topo columns, print blanks on 1st (average) line */
2943 if (t == &average.threads) {
2944 if (DO_BIC(BIC_Package))
2945 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
2946 if (DO_BIC(BIC_Die))
2947 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
2948 if (DO_BIC(BIC_Node))
2949 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
2950 if (DO_BIC(BIC_Core))
2951 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
2952 if (DO_BIC(BIC_CPU))
2953 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
2954 if (DO_BIC(BIC_APIC))
2955 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
2956 if (DO_BIC(BIC_X2APIC))
2957 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
2958 } else {
2959 if (DO_BIC(BIC_Package)) {
2960 if (p)
2961 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id);
2962 else
2963 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
2964 }
2965 if (DO_BIC(BIC_Die)) {
2966 if (c)
2967 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].die_id);
2968 else
2969 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
2970 }
2971 if (DO_BIC(BIC_Node)) {
2972 if (t)
2973 outp += sprintf(outp, "%s%d",
2974 (printed++ ? delim : ""), cpus[t->cpu_id].physical_node_id);
2975 else
2976 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
2977 }
2978 if (DO_BIC(BIC_Core)) {
2979 if (c)
2980 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id);
2981 else
2982 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
2983 }
2984 if (DO_BIC(BIC_CPU))
2985 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
2986 if (DO_BIC(BIC_APIC))
2987 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id);
2988 if (DO_BIC(BIC_X2APIC))
2989 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id);
2990 }
2991
2992 if (DO_BIC(BIC_Avg_MHz))
2993 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 / units * t->aperf / interval_float);
2994
2995 if (DO_BIC(BIC_Busy))
2996 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf / tsc);
2997
2998 if (DO_BIC(BIC_Bzy_MHz)) {
2999 if (has_base_hz)
3000 outp +=
3001 sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf);
3002 else
3003 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
3004 tsc / units * t->aperf / t->mperf / interval_float);
3005 }
3006
3007 if (DO_BIC(BIC_TSC_MHz))
3008 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc / units / interval_float);
3009
3010 if (DO_BIC(BIC_IPC))
3011 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 1.0 * t->instr_count / t->aperf);
3012
3013 /* IRQ */
3014 if (DO_BIC(BIC_IRQ)) {
3015 if (sums_need_wide_columns)
3016 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count);
3017 else
3018 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count);
3019 }
3020
3021 /* NMI */
3022 if (DO_BIC(BIC_NMI)) {
3023 if (sums_need_wide_columns)
3024 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->nmi_count);
3025 else
3026 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->nmi_count);
3027 }
3028
3029 /* SMI */
3030 if (DO_BIC(BIC_SMI))
3031 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count);
3032
3033 /* Added counters */
3034 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
3035 if (mp->format == FORMAT_RAW) {
3036 if (mp->width == 32)
3037 outp +=
3038 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)t->counter[i]);
3039 else
3040 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]);
3041 } else if (mp->format == FORMAT_DELTA) {
3042 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
3043 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]);
3044 else
3045 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]);
3046 } else if (mp->format == FORMAT_PERCENT) {
3047 if (mp->type == COUNTER_USEC)
3048 outp +=
3049 sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
3050 t->counter[i] / interval_float / 10000);
3051 else
3052 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i] / tsc);
3053 }
3054 }
3055
3056 /* Added perf counters */
3057 for (i = 0, pp = sys.perf_tp; pp; ++i, pp = pp->next) {
3058 if (pp->format == FORMAT_RAW) {
3059 if (pp->width == 32)
3060 outp +=
3061 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""),
3062 (unsigned int)t->perf_counter[i]);
3063 else
3064 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->perf_counter[i]);
3065 } else if (pp->format == FORMAT_DELTA) {
3066 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns)
3067 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->perf_counter[i]);
3068 else
3069 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->perf_counter[i]);
3070 } else if (pp->format == FORMAT_PERCENT) {
3071 if (pp->type == COUNTER_USEC)
3072 outp +=
3073 sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
3074 t->perf_counter[i] / interval_float / 10000);
3075 else
3076 outp +=
3077 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->perf_counter[i] / tsc);
3078 }
3079 }
3080
3081 for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) {
3082 const unsigned long value_raw = t->pmt_counter[i];
3083 double value_converted;
3084 switch (ppmt->type) {
3085 case PMT_TYPE_RAW:
3086 if (pmt_counter_get_width(ppmt) <= 32)
3087 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""),
3088 (unsigned int)t->pmt_counter[i]);
3089 else
3090 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->pmt_counter[i]);
3091
3092 break;
3093
3094 case PMT_TYPE_XTAL_TIME:
3095 value_converted = 100.0 * value_raw / crystal_hz / interval_float;
3096 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
3097 break;
3098
3099 case PMT_TYPE_TCORE_CLOCK:
3100 value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float;
3101 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
3102 }
3103 }
3104
3105 /* C1 */
3106 if (DO_BIC(BIC_CPU_c1))
3107 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc);
3108
3109 /* print per-core data only for 1st thread in core */
3110 if (!is_cpu_first_thread_in_core(t, c, p))
3111 goto done;
3112
3113 if (DO_BIC(BIC_CPU_c3))
3114 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3 / tsc);
3115 if (DO_BIC(BIC_CPU_c6))
3116 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6 / tsc);
3117 if (DO_BIC(BIC_CPU_c7))
3118 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7 / tsc);
3119
3120 /* Mod%c6 */
3121 if (DO_BIC(BIC_Mod_c6))
3122 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc);
3123
3124 if (DO_BIC(BIC_CoreTmp))
3125 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c);
3126
3127 /* Core throttle count */
3128 if (DO_BIC(BIC_CORE_THROT_CNT))
3129 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->core_throt_cnt);
3130
3131 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
3132 if (mp->format == FORMAT_RAW) {
3133 if (mp->width == 32)
3134 outp +=
3135 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)c->counter[i]);
3136 else
3137 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]);
3138 } else if (mp->format == FORMAT_DELTA) {
3139 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
3140 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]);
3141 else
3142 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]);
3143 } else if (mp->format == FORMAT_PERCENT) {
3144 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i] / tsc);
3145 }
3146 }
3147
3148 for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) {
3149 if (pp->format == FORMAT_RAW) {
3150 if (pp->width == 32)
3151 outp +=
3152 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""),
3153 (unsigned int)c->perf_counter[i]);
3154 else
3155 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->perf_counter[i]);
3156 } else if (pp->format == FORMAT_DELTA) {
3157 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns)
3158 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->perf_counter[i]);
3159 else
3160 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->perf_counter[i]);
3161 } else if (pp->format == FORMAT_PERCENT) {
3162 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->perf_counter[i] / tsc);
3163 }
3164 }
3165
3166 for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) {
3167 const unsigned long value_raw = c->pmt_counter[i];
3168 double value_converted;
3169 switch (ppmt->type) {
3170 case PMT_TYPE_RAW:
3171 if (pmt_counter_get_width(ppmt) <= 32)
3172 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""),
3173 (unsigned int)c->pmt_counter[i]);
3174 else
3175 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->pmt_counter[i]);
3176
3177 break;
3178
3179 case PMT_TYPE_XTAL_TIME:
3180 value_converted = 100.0 * value_raw / crystal_hz / interval_float;
3181 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
3182 break;
3183
3184 case PMT_TYPE_TCORE_CLOCK:
3185 value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float;
3186 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
3187 }
3188 }
3189
3190 fmt8 = "%s%.2f";
3191
3192 if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl)
3193 outp +=
3194 sprintf(outp, fmt8, (printed++ ? delim : ""),
3195 rapl_counter_get_value(&c->core_energy, RAPL_UNIT_WATTS, interval_float));
3196 if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl)
3197 outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
3198 rapl_counter_get_value(&c->core_energy, RAPL_UNIT_JOULES, interval_float));
3199
3200 /* print per-package data only for 1st core in package */
3201 if (!is_cpu_first_core_in_package(t, c, p))
3202 goto done;
3203
3204 /* PkgTmp */
3205 if (DO_BIC(BIC_PkgTmp))
3206 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c);
3207
3208 /* GFXrc6 */
3209 if (DO_BIC(BIC_GFX_rc6)) {
3210 if (p->gfx_rc6_ms == -1) { /* detect GFX counter reset */
3211 outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
3212 } else {
3213 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
3214 p->gfx_rc6_ms / 10.0 / interval_float);
3215 }
3216 }
3217
3218 /* GFXMHz */
3219 if (DO_BIC(BIC_GFXMHz))
3220 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz);
3221
3222 /* GFXACTMHz */
3223 if (DO_BIC(BIC_GFXACTMHz))
3224 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz);
3225
3226 /* SAMmc6 */
3227 if (DO_BIC(BIC_SAM_mc6)) {
3228 if (p->sam_mc6_ms == -1) { /* detect GFX counter reset */
3229 outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
3230 } else {
3231 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
3232 p->sam_mc6_ms / 10.0 / interval_float);
3233 }
3234 }
3235
3236 /* SAMMHz */
3237 if (DO_BIC(BIC_SAMMHz))
3238 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_mhz);
3239
3240 /* SAMACTMHz */
3241 if (DO_BIC(BIC_SAMACTMHz))
3242 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_act_mhz);
3243
3244 /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
3245 if (DO_BIC(BIC_Totl_c0))
3246 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0 / tsc);
3247 if (DO_BIC(BIC_Any_c0))
3248 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0 / tsc);
3249 if (DO_BIC(BIC_GFX_c0))
3250 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0 / tsc);
3251 if (DO_BIC(BIC_CPUGFX))
3252 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0 / tsc);
3253
3254 if (DO_BIC(BIC_Pkgpc2))
3255 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2 / tsc);
3256 if (DO_BIC(BIC_Pkgpc3))
3257 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3 / tsc);
3258 if (DO_BIC(BIC_Pkgpc6))
3259 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6 / tsc);
3260 if (DO_BIC(BIC_Pkgpc7))
3261 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7 / tsc);
3262 if (DO_BIC(BIC_Pkgpc8))
3263 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8 / tsc);
3264 if (DO_BIC(BIC_Pkgpc9))
3265 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9 / tsc);
3266 if (DO_BIC(BIC_Pkgpc10))
3267 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc);
3268
3269 if (DO_BIC(BIC_Diec6))
3270 outp +=
3271 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->die_c6 / crystal_hz / interval_float);
3272
3273 if (DO_BIC(BIC_CPU_LPI)) {
3274 if (p->cpu_lpi >= 0)
3275 outp +=
3276 sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
3277 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
3278 else
3279 outp += sprintf(outp, "%s(neg)", (printed++ ? delim : ""));
3280 }
3281 if (DO_BIC(BIC_SYS_LPI)) {
3282 if (p->sys_lpi >= 0)
3283 outp +=
3284 sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
3285 100.0 * p->sys_lpi / 1000000.0 / interval_float);
3286 else
3287 outp += sprintf(outp, "%s(neg)", (printed++ ? delim : ""));
3288 }
3289
3290 if (DO_BIC(BIC_PkgWatt))
3291 outp +=
3292 sprintf(outp, fmt8, (printed++ ? delim : ""),
3293 rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_WATTS, interval_float));
3294 if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl)
3295 outp +=
3296 sprintf(outp, fmt8, (printed++ ? delim : ""),
3297 rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_WATTS, interval_float));
3298 if (DO_BIC(BIC_GFXWatt))
3299 outp +=
3300 sprintf(outp, fmt8, (printed++ ? delim : ""),
3301 rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_WATTS, interval_float));
3302 if (DO_BIC(BIC_RAMWatt))
3303 outp +=
3304 sprintf(outp, fmt8, (printed++ ? delim : ""),
3305 rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_WATTS, interval_float));
3306 if (DO_BIC(BIC_Pkg_J))
3307 outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
3308 rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_JOULES, interval_float));
3309 if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl)
3310 outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
3311 rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_JOULES, interval_float));
3312 if (DO_BIC(BIC_GFX_J))
3313 outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
3314 rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_JOULES, interval_float));
3315 if (DO_BIC(BIC_RAM_J))
3316 outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
3317 rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_JOULES, interval_float));
3318 if (DO_BIC(BIC_PKG__))
3319 outp +=
3320 sprintf(outp, fmt8, (printed++ ? delim : ""),
3321 rapl_counter_get_value(&p->rapl_pkg_perf_status, RAPL_UNIT_WATTS, interval_float));
3322 if (DO_BIC(BIC_RAM__))
3323 outp +=
3324 sprintf(outp, fmt8, (printed++ ? delim : ""),
3325 rapl_counter_get_value(&p->rapl_dram_perf_status, RAPL_UNIT_WATTS, interval_float));
3326 /* UncMHz */
3327 if (DO_BIC(BIC_UNCORE_MHZ))
3328 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->uncore_mhz);
3329
3330 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
3331 if (mp->format == FORMAT_RAW) {
3332 if (mp->width == 32)
3333 outp +=
3334 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)p->counter[i]);
3335 else
3336 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]);
3337 } else if (mp->format == FORMAT_DELTA) {
3338 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
3339 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]);
3340 else
3341 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]);
3342 } else if (mp->format == FORMAT_PERCENT) {
3343 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i] / tsc);
3344 } else if (mp->type == COUNTER_K2M)
3345 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->counter[i] / 1000);
3346 }
3347
3348 for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) {
3349 if (pp->format == FORMAT_RAW) {
3350 if (pp->width == 32)
3351 outp +=
3352 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""),
3353 (unsigned int)p->perf_counter[i]);
3354 else
3355 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->perf_counter[i]);
3356 } else if (pp->format == FORMAT_DELTA) {
3357 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns)
3358 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->perf_counter[i]);
3359 else
3360 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->perf_counter[i]);
3361 } else if (pp->format == FORMAT_PERCENT) {
3362 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->perf_counter[i] / tsc);
3363 } else if (pp->type == COUNTER_K2M) {
3364 outp +=
3365 sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->perf_counter[i] / 1000);
3366 }
3367 }
3368
3369 for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) {
3370 const unsigned long value_raw = p->pmt_counter[i];
3371 double value_converted;
3372 switch (ppmt->type) {
3373 case PMT_TYPE_RAW:
3374 if (pmt_counter_get_width(ppmt) <= 32)
3375 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""),
3376 (unsigned int)p->pmt_counter[i]);
3377 else
3378 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->pmt_counter[i]);
3379
3380 break;
3381
3382 case PMT_TYPE_XTAL_TIME:
3383 value_converted = 100.0 * value_raw / crystal_hz / interval_float;
3384 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
3385 break;
3386
3387 case PMT_TYPE_TCORE_CLOCK:
3388 value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float;
3389 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
3390 }
3391 }
3392
3393 if (DO_BIC(BIC_SysWatt) && (t == &average.threads))
3394 outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
3395 rapl_counter_get_value(&pplat_cnt->energy_psys, RAPL_UNIT_WATTS, interval_float));
3396 if (DO_BIC(BIC_Sys_J) && (t == &average.threads))
3397 outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
3398 rapl_counter_get_value(&pplat_cnt->energy_psys, RAPL_UNIT_JOULES, interval_float));
3399
3400 done:
3401 if (*(outp - 1) != '\n')
3402 outp += sprintf(outp, "\n");
3403
3404 return 0;
3405 }
3406
flush_output_stdout(void)3407 void flush_output_stdout(void)
3408 {
3409 FILE *filep;
3410
3411 if (outf == stderr)
3412 filep = stdout;
3413 else
3414 filep = outf;
3415
3416 fputs(output_buffer, filep);
3417 fflush(filep);
3418
3419 outp = output_buffer;
3420 }
3421
flush_output_stderr(void)3422 void flush_output_stderr(void)
3423 {
3424 fputs(output_buffer, outf);
3425 fflush(outf);
3426 outp = output_buffer;
3427 }
3428
format_all_counters(struct thread_data * t,struct core_data * c,struct pkg_data * p)3429 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3430 {
3431 static int count;
3432
3433 if ((!count || (header_iterations && !(count % header_iterations))) || !summary_only)
3434 print_header("\t");
3435
3436 format_counters(&average.threads, &average.cores, &average.packages);
3437
3438 count++;
3439
3440 if (summary_only)
3441 return;
3442
3443 for_all_cpus(format_counters, t, c, p);
3444 }
3445
3446 #define DELTA_WRAP32(new, old) \
3447 old = ((((unsigned long long)new << 32) - ((unsigned long long)old << 32)) >> 32);
3448
delta_package(struct pkg_data * new,struct pkg_data * old)3449 int delta_package(struct pkg_data *new, struct pkg_data *old)
3450 {
3451 int i;
3452 struct msr_counter *mp;
3453 struct perf_counter_info *pp;
3454 struct pmt_counter *ppmt;
3455
3456 if (DO_BIC(BIC_Totl_c0))
3457 old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
3458 if (DO_BIC(BIC_Any_c0))
3459 old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
3460 if (DO_BIC(BIC_GFX_c0))
3461 old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
3462 if (DO_BIC(BIC_CPUGFX))
3463 old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
3464
3465 old->pc2 = new->pc2 - old->pc2;
3466 if (DO_BIC(BIC_Pkgpc3))
3467 old->pc3 = new->pc3 - old->pc3;
3468 if (DO_BIC(BIC_Pkgpc6))
3469 old->pc6 = new->pc6 - old->pc6;
3470 if (DO_BIC(BIC_Pkgpc7))
3471 old->pc7 = new->pc7 - old->pc7;
3472 old->pc8 = new->pc8 - old->pc8;
3473 old->pc9 = new->pc9 - old->pc9;
3474 old->pc10 = new->pc10 - old->pc10;
3475 old->die_c6 = new->die_c6 - old->die_c6;
3476 old->cpu_lpi = new->cpu_lpi - old->cpu_lpi;
3477 old->sys_lpi = new->sys_lpi - old->sys_lpi;
3478 old->pkg_temp_c = new->pkg_temp_c;
3479
3480 /* flag an error when rc6 counter resets/wraps */
3481 if (old->gfx_rc6_ms > new->gfx_rc6_ms)
3482 old->gfx_rc6_ms = -1;
3483 else
3484 old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
3485
3486 old->uncore_mhz = new->uncore_mhz;
3487 old->gfx_mhz = new->gfx_mhz;
3488 old->gfx_act_mhz = new->gfx_act_mhz;
3489
3490 /* flag an error when mc6 counter resets/wraps */
3491 if (old->sam_mc6_ms > new->sam_mc6_ms)
3492 old->sam_mc6_ms = -1;
3493 else
3494 old->sam_mc6_ms = new->sam_mc6_ms - old->sam_mc6_ms;
3495
3496 old->sam_mhz = new->sam_mhz;
3497 old->sam_act_mhz = new->sam_act_mhz;
3498
3499 old->energy_pkg.raw_value = new->energy_pkg.raw_value - old->energy_pkg.raw_value;
3500 old->energy_cores.raw_value = new->energy_cores.raw_value - old->energy_cores.raw_value;
3501 old->energy_gfx.raw_value = new->energy_gfx.raw_value - old->energy_gfx.raw_value;
3502 old->energy_dram.raw_value = new->energy_dram.raw_value - old->energy_dram.raw_value;
3503 old->rapl_pkg_perf_status.raw_value = new->rapl_pkg_perf_status.raw_value - old->rapl_pkg_perf_status.raw_value;
3504 old->rapl_dram_perf_status.raw_value =
3505 new->rapl_dram_perf_status.raw_value - old->rapl_dram_perf_status.raw_value;
3506
3507 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
3508 if (mp->format == FORMAT_RAW)
3509 old->counter[i] = new->counter[i];
3510 else if (mp->format == FORMAT_AVERAGE)
3511 old->counter[i] = new->counter[i];
3512 else
3513 old->counter[i] = new->counter[i] - old->counter[i];
3514 }
3515
3516 for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) {
3517 if (pp->format == FORMAT_RAW)
3518 old->perf_counter[i] = new->perf_counter[i];
3519 else if (pp->format == FORMAT_AVERAGE)
3520 old->perf_counter[i] = new->perf_counter[i];
3521 else
3522 old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i];
3523 }
3524
3525 for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) {
3526 if (ppmt->format == FORMAT_RAW)
3527 old->pmt_counter[i] = new->pmt_counter[i];
3528 else
3529 old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i];
3530 }
3531
3532 return 0;
3533 }
3534
delta_core(struct core_data * new,struct core_data * old)3535 void delta_core(struct core_data *new, struct core_data *old)
3536 {
3537 int i;
3538 struct msr_counter *mp;
3539 struct perf_counter_info *pp;
3540 struct pmt_counter *ppmt;
3541
3542 old->c3 = new->c3 - old->c3;
3543 old->c6 = new->c6 - old->c6;
3544 old->c7 = new->c7 - old->c7;
3545 old->core_temp_c = new->core_temp_c;
3546 old->core_throt_cnt = new->core_throt_cnt - old->core_throt_cnt;
3547 old->mc6_us = new->mc6_us - old->mc6_us;
3548
3549 DELTA_WRAP32(new->core_energy.raw_value, old->core_energy.raw_value);
3550
3551 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
3552 if (mp->format == FORMAT_RAW)
3553 old->counter[i] = new->counter[i];
3554 else
3555 old->counter[i] = new->counter[i] - old->counter[i];
3556 }
3557
3558 for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) {
3559 if (pp->format == FORMAT_RAW)
3560 old->perf_counter[i] = new->perf_counter[i];
3561 else
3562 old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i];
3563 }
3564
3565 for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) {
3566 if (ppmt->format == FORMAT_RAW)
3567 old->pmt_counter[i] = new->pmt_counter[i];
3568 else
3569 old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i];
3570 }
3571 }
3572
soft_c1_residency_display(int bic)3573 int soft_c1_residency_display(int bic)
3574 {
3575 if (!DO_BIC(BIC_CPU_c1) || platform->has_msr_core_c1_res)
3576 return 0;
3577
3578 return DO_BIC_READ(bic);
3579 }
3580
3581 /*
3582 * old = new - old
3583 */
delta_thread(struct thread_data * new,struct thread_data * old,struct core_data * core_delta)3584 int delta_thread(struct thread_data *new, struct thread_data *old, struct core_data *core_delta)
3585 {
3586 int i;
3587 struct msr_counter *mp;
3588 struct perf_counter_info *pp;
3589 struct pmt_counter *ppmt;
3590
3591 /* we run cpuid just the 1st time, copy the results */
3592 if (DO_BIC(BIC_APIC))
3593 new->apic_id = old->apic_id;
3594 if (DO_BIC(BIC_X2APIC))
3595 new->x2apic_id = old->x2apic_id;
3596
3597 /*
3598 * the timestamps from start of measurement interval are in "old"
3599 * the timestamp from end of measurement interval are in "new"
3600 * over-write old w/ new so we can print end of interval values
3601 */
3602
3603 timersub(&new->tv_begin, &old->tv_begin, &old->tv_delta);
3604 old->tv_begin = new->tv_begin;
3605 old->tv_end = new->tv_end;
3606
3607 old->tsc = new->tsc - old->tsc;
3608
3609 /* check for TSC < 1 Mcycles over interval */
3610 if (old->tsc < (1000 * 1000))
3611 errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n"
3612 "You can disable all c-states by booting with \"idle=poll\"\n"
3613 "or just the deep ones with \"processor.max_cstate=1\"");
3614
3615 old->c1 = new->c1 - old->c1;
3616
3617 if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC)
3618 || soft_c1_residency_display(BIC_Avg_MHz)) {
3619 if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
3620 old->aperf = new->aperf - old->aperf;
3621 old->mperf = new->mperf - old->mperf;
3622 } else {
3623 return -1;
3624 }
3625 }
3626
3627 if (platform->has_msr_core_c1_res) {
3628 /*
3629 * Some models have a dedicated C1 residency MSR,
3630 * which should be more accurate than the derivation below.
3631 */
3632 } else {
3633 /*
3634 * As counter collection is not atomic,
3635 * it is possible for mperf's non-halted cycles + idle states
3636 * to exceed TSC's all cycles: show c1 = 0% in that case.
3637 */
3638 if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak))
3639 old->c1 = 0;
3640 else {
3641 /* normal case, derive c1 */
3642 old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3
3643 - core_delta->c6 - core_delta->c7;
3644 }
3645 }
3646
3647 if (old->mperf == 0) {
3648 if (debug > 1)
3649 fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id);
3650 old->mperf = 1; /* divide by 0 protection */
3651 }
3652
3653 if (DO_BIC(BIC_IPC))
3654 old->instr_count = new->instr_count - old->instr_count;
3655
3656 if (DO_BIC(BIC_IRQ))
3657 old->irq_count = new->irq_count - old->irq_count;
3658
3659 if (DO_BIC(BIC_NMI))
3660 old->nmi_count = new->nmi_count - old->nmi_count;
3661
3662 if (DO_BIC(BIC_SMI))
3663 old->smi_count = new->smi_count - old->smi_count;
3664
3665 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
3666 if (mp->format == FORMAT_RAW)
3667 old->counter[i] = new->counter[i];
3668 else
3669 old->counter[i] = new->counter[i] - old->counter[i];
3670 }
3671
3672 for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) {
3673 if (pp->format == FORMAT_RAW)
3674 old->perf_counter[i] = new->perf_counter[i];
3675 else
3676 old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i];
3677 }
3678
3679 for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) {
3680 if (ppmt->format == FORMAT_RAW)
3681 old->pmt_counter[i] = new->pmt_counter[i];
3682 else
3683 old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i];
3684 }
3685
3686 return 0;
3687 }
3688
delta_cpu(struct thread_data * t,struct core_data * c,struct pkg_data * p,struct thread_data * t2,struct core_data * c2,struct pkg_data * p2)3689 int delta_cpu(struct thread_data *t, struct core_data *c,
3690 struct pkg_data *p, struct thread_data *t2, struct core_data *c2, struct pkg_data *p2)
3691 {
3692 int retval = 0;
3693
3694 /* calculate core delta only for 1st thread in core */
3695 if (is_cpu_first_thread_in_core(t, c, p))
3696 delta_core(c, c2);
3697
3698 /* always calculate thread delta */
3699 retval = delta_thread(t, t2, c2); /* c2 is core delta */
3700
3701 /* calculate package delta only for 1st core in package */
3702 if (is_cpu_first_core_in_package(t, c, p))
3703 retval |= delta_package(p, p2);
3704
3705 return retval;
3706 }
3707
delta_platform(struct platform_counters * new,struct platform_counters * old)3708 void delta_platform(struct platform_counters *new, struct platform_counters *old)
3709 {
3710 old->energy_psys.raw_value = new->energy_psys.raw_value - old->energy_psys.raw_value;
3711 }
3712
rapl_counter_clear(struct rapl_counter * c)3713 void rapl_counter_clear(struct rapl_counter *c)
3714 {
3715 c->raw_value = 0;
3716 c->scale = 0.0;
3717 c->unit = RAPL_UNIT_INVALID;
3718 }
3719
clear_counters(struct thread_data * t,struct core_data * c,struct pkg_data * p)3720 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3721 {
3722 int i;
3723 struct msr_counter *mp;
3724
3725 t->tv_begin.tv_sec = 0;
3726 t->tv_begin.tv_usec = 0;
3727 t->tv_end.tv_sec = 0;
3728 t->tv_end.tv_usec = 0;
3729 t->tv_delta.tv_sec = 0;
3730 t->tv_delta.tv_usec = 0;
3731
3732 t->tsc = 0;
3733 t->aperf = 0;
3734 t->mperf = 0;
3735 t->c1 = 0;
3736
3737 t->instr_count = 0;
3738
3739 t->irq_count = 0;
3740 t->nmi_count = 0;
3741 t->smi_count = 0;
3742
3743 c->c3 = 0;
3744 c->c6 = 0;
3745 c->c7 = 0;
3746 c->mc6_us = 0;
3747 c->core_temp_c = 0;
3748 rapl_counter_clear(&c->core_energy);
3749 c->core_throt_cnt = 0;
3750
3751 p->pkg_wtd_core_c0 = 0;
3752 p->pkg_any_core_c0 = 0;
3753 p->pkg_any_gfxe_c0 = 0;
3754 p->pkg_both_core_gfxe_c0 = 0;
3755
3756 p->pc2 = 0;
3757 if (DO_BIC(BIC_Pkgpc3))
3758 p->pc3 = 0;
3759 if (DO_BIC(BIC_Pkgpc6))
3760 p->pc6 = 0;
3761 if (DO_BIC(BIC_Pkgpc7))
3762 p->pc7 = 0;
3763 p->pc8 = 0;
3764 p->pc9 = 0;
3765 p->pc10 = 0;
3766 p->die_c6 = 0;
3767 p->cpu_lpi = 0;
3768 p->sys_lpi = 0;
3769
3770 rapl_counter_clear(&p->energy_pkg);
3771 rapl_counter_clear(&p->energy_dram);
3772 rapl_counter_clear(&p->energy_cores);
3773 rapl_counter_clear(&p->energy_gfx);
3774 rapl_counter_clear(&p->rapl_pkg_perf_status);
3775 rapl_counter_clear(&p->rapl_dram_perf_status);
3776 p->pkg_temp_c = 0;
3777
3778 p->gfx_rc6_ms = 0;
3779 p->uncore_mhz = 0;
3780 p->gfx_mhz = 0;
3781 p->gfx_act_mhz = 0;
3782 p->sam_mc6_ms = 0;
3783 p->sam_mhz = 0;
3784 p->sam_act_mhz = 0;
3785 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
3786 t->counter[i] = 0;
3787
3788 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next)
3789 c->counter[i] = 0;
3790
3791 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next)
3792 p->counter[i] = 0;
3793
3794 memset(&t->perf_counter[0], 0, sizeof(t->perf_counter));
3795 memset(&c->perf_counter[0], 0, sizeof(c->perf_counter));
3796 memset(&p->perf_counter[0], 0, sizeof(p->perf_counter));
3797
3798 memset(&t->pmt_counter[0], 0, ARRAY_SIZE(t->pmt_counter));
3799 memset(&c->pmt_counter[0], 0, ARRAY_SIZE(c->pmt_counter));
3800 memset(&p->pmt_counter[0], 0, ARRAY_SIZE(p->pmt_counter));
3801 }
3802
rapl_counter_accumulate(struct rapl_counter * dst,const struct rapl_counter * src)3803 void rapl_counter_accumulate(struct rapl_counter *dst, const struct rapl_counter *src)
3804 {
3805 /* Copy unit and scale from src if dst is not initialized */
3806 if (dst->unit == RAPL_UNIT_INVALID) {
3807 dst->unit = src->unit;
3808 dst->scale = src->scale;
3809 }
3810
3811 assert(dst->unit == src->unit);
3812 assert(dst->scale == src->scale);
3813
3814 dst->raw_value += src->raw_value;
3815 }
3816
sum_counters(struct thread_data * t,struct core_data * c,struct pkg_data * p)3817 int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3818 {
3819 int i;
3820 struct msr_counter *mp;
3821 struct perf_counter_info *pp;
3822 struct pmt_counter *ppmt;
3823
3824 /* copy un-changing apic_id's */
3825 if (DO_BIC(BIC_APIC))
3826 average.threads.apic_id = t->apic_id;
3827 if (DO_BIC(BIC_X2APIC))
3828 average.threads.x2apic_id = t->x2apic_id;
3829
3830 /* remember first tv_begin */
3831 if (average.threads.tv_begin.tv_sec == 0)
3832 average.threads.tv_begin = procsysfs_tv_begin;
3833
3834 /* remember last tv_end */
3835 average.threads.tv_end = t->tv_end;
3836
3837 average.threads.tsc += t->tsc;
3838 average.threads.aperf += t->aperf;
3839 average.threads.mperf += t->mperf;
3840 average.threads.c1 += t->c1;
3841
3842 average.threads.instr_count += t->instr_count;
3843
3844 average.threads.irq_count += t->irq_count;
3845 average.threads.nmi_count += t->nmi_count;
3846 average.threads.smi_count += t->smi_count;
3847
3848 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
3849 if (mp->format == FORMAT_RAW)
3850 continue;
3851 average.threads.counter[i] += t->counter[i];
3852 }
3853
3854 for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) {
3855 if (pp->format == FORMAT_RAW)
3856 continue;
3857 average.threads.perf_counter[i] += t->perf_counter[i];
3858 }
3859
3860 for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) {
3861 average.threads.pmt_counter[i] += t->pmt_counter[i];
3862 }
3863
3864 /* sum per-core values only for 1st thread in core */
3865 if (!is_cpu_first_thread_in_core(t, c, p))
3866 return 0;
3867
3868 average.cores.c3 += c->c3;
3869 average.cores.c6 += c->c6;
3870 average.cores.c7 += c->c7;
3871 average.cores.mc6_us += c->mc6_us;
3872
3873 average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
3874 average.cores.core_throt_cnt = MAX(average.cores.core_throt_cnt, c->core_throt_cnt);
3875
3876 rapl_counter_accumulate(&average.cores.core_energy, &c->core_energy);
3877
3878 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
3879 if (mp->format == FORMAT_RAW)
3880 continue;
3881 average.cores.counter[i] += c->counter[i];
3882 }
3883
3884 for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) {
3885 if (pp->format == FORMAT_RAW)
3886 continue;
3887 average.cores.perf_counter[i] += c->perf_counter[i];
3888 }
3889
3890 for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) {
3891 average.cores.pmt_counter[i] += c->pmt_counter[i];
3892 }
3893
3894 /* sum per-pkg values only for 1st core in pkg */
3895 if (!is_cpu_first_core_in_package(t, c, p))
3896 return 0;
3897
3898 if (DO_BIC(BIC_Totl_c0))
3899 average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
3900 if (DO_BIC(BIC_Any_c0))
3901 average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
3902 if (DO_BIC(BIC_GFX_c0))
3903 average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
3904 if (DO_BIC(BIC_CPUGFX))
3905 average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
3906
3907 average.packages.pc2 += p->pc2;
3908 if (DO_BIC(BIC_Pkgpc3))
3909 average.packages.pc3 += p->pc3;
3910 if (DO_BIC(BIC_Pkgpc6))
3911 average.packages.pc6 += p->pc6;
3912 if (DO_BIC(BIC_Pkgpc7))
3913 average.packages.pc7 += p->pc7;
3914 average.packages.pc8 += p->pc8;
3915 average.packages.pc9 += p->pc9;
3916 average.packages.pc10 += p->pc10;
3917 average.packages.die_c6 += p->die_c6;
3918
3919 average.packages.cpu_lpi = p->cpu_lpi;
3920 average.packages.sys_lpi = p->sys_lpi;
3921
3922 rapl_counter_accumulate(&average.packages.energy_pkg, &p->energy_pkg);
3923 rapl_counter_accumulate(&average.packages.energy_dram, &p->energy_dram);
3924 rapl_counter_accumulate(&average.packages.energy_cores, &p->energy_cores);
3925 rapl_counter_accumulate(&average.packages.energy_gfx, &p->energy_gfx);
3926
3927 average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
3928 average.packages.uncore_mhz = p->uncore_mhz;
3929 average.packages.gfx_mhz = p->gfx_mhz;
3930 average.packages.gfx_act_mhz = p->gfx_act_mhz;
3931 average.packages.sam_mc6_ms = p->sam_mc6_ms;
3932 average.packages.sam_mhz = p->sam_mhz;
3933 average.packages.sam_act_mhz = p->sam_act_mhz;
3934
3935 average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
3936
3937 rapl_counter_accumulate(&average.packages.rapl_pkg_perf_status, &p->rapl_pkg_perf_status);
3938 rapl_counter_accumulate(&average.packages.rapl_dram_perf_status, &p->rapl_dram_perf_status);
3939
3940 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
3941 if ((mp->format == FORMAT_RAW) && (topo.num_packages == 0))
3942 average.packages.counter[i] = p->counter[i];
3943 else
3944 average.packages.counter[i] += p->counter[i];
3945 }
3946
3947 for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) {
3948 if ((pp->format == FORMAT_RAW) && (topo.num_packages == 0))
3949 average.packages.perf_counter[i] = p->perf_counter[i];
3950 else
3951 average.packages.perf_counter[i] += p->perf_counter[i];
3952 }
3953
3954 for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) {
3955 average.packages.pmt_counter[i] += p->pmt_counter[i];
3956 }
3957
3958 return 0;
3959 }
3960
3961 /*
3962 * sum the counters for all cpus in the system
3963 * compute the weighted average
3964 */
compute_average(struct thread_data * t,struct core_data * c,struct pkg_data * p)3965 void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3966 {
3967 int i;
3968 struct msr_counter *mp;
3969 struct perf_counter_info *pp;
3970 struct pmt_counter *ppmt;
3971
3972 clear_counters(&average.threads, &average.cores, &average.packages);
3973
3974 for_all_cpus(sum_counters, t, c, p);
3975
3976 /* Use the global time delta for the average. */
3977 average.threads.tv_delta = tv_delta;
3978
3979 average.threads.tsc /= topo.allowed_cpus;
3980 average.threads.aperf /= topo.allowed_cpus;
3981 average.threads.mperf /= topo.allowed_cpus;
3982 average.threads.instr_count /= topo.allowed_cpus;
3983 average.threads.c1 /= topo.allowed_cpus;
3984
3985 if (average.threads.irq_count > 9999999)
3986 sums_need_wide_columns = 1;
3987 if (average.threads.nmi_count > 9999999)
3988 sums_need_wide_columns = 1;
3989
3990 average.cores.c3 /= topo.allowed_cores;
3991 average.cores.c6 /= topo.allowed_cores;
3992 average.cores.c7 /= topo.allowed_cores;
3993 average.cores.mc6_us /= topo.allowed_cores;
3994
3995 if (DO_BIC(BIC_Totl_c0))
3996 average.packages.pkg_wtd_core_c0 /= topo.allowed_packages;
3997 if (DO_BIC(BIC_Any_c0))
3998 average.packages.pkg_any_core_c0 /= topo.allowed_packages;
3999 if (DO_BIC(BIC_GFX_c0))
4000 average.packages.pkg_any_gfxe_c0 /= topo.allowed_packages;
4001 if (DO_BIC(BIC_CPUGFX))
4002 average.packages.pkg_both_core_gfxe_c0 /= topo.allowed_packages;
4003
4004 average.packages.pc2 /= topo.allowed_packages;
4005 if (DO_BIC(BIC_Pkgpc3))
4006 average.packages.pc3 /= topo.allowed_packages;
4007 if (DO_BIC(BIC_Pkgpc6))
4008 average.packages.pc6 /= topo.allowed_packages;
4009 if (DO_BIC(BIC_Pkgpc7))
4010 average.packages.pc7 /= topo.allowed_packages;
4011
4012 average.packages.pc8 /= topo.allowed_packages;
4013 average.packages.pc9 /= topo.allowed_packages;
4014 average.packages.pc10 /= topo.allowed_packages;
4015 average.packages.die_c6 /= topo.allowed_packages;
4016
4017 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
4018 if (mp->format == FORMAT_RAW)
4019 continue;
4020 if (mp->type == COUNTER_ITEMS) {
4021 if (average.threads.counter[i] > 9999999)
4022 sums_need_wide_columns = 1;
4023 continue;
4024 }
4025 average.threads.counter[i] /= topo.allowed_cpus;
4026 }
4027 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
4028 if (mp->format == FORMAT_RAW)
4029 continue;
4030 if (mp->type == COUNTER_ITEMS) {
4031 if (average.cores.counter[i] > 9999999)
4032 sums_need_wide_columns = 1;
4033 }
4034 average.cores.counter[i] /= topo.allowed_cores;
4035 }
4036 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
4037 if (mp->format == FORMAT_RAW)
4038 continue;
4039 if (mp->type == COUNTER_ITEMS) {
4040 if (average.packages.counter[i] > 9999999)
4041 sums_need_wide_columns = 1;
4042 }
4043 average.packages.counter[i] /= topo.allowed_packages;
4044 }
4045
4046 for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) {
4047 if (pp->format == FORMAT_RAW)
4048 continue;
4049 if (pp->type == COUNTER_ITEMS) {
4050 if (average.threads.perf_counter[i] > 9999999)
4051 sums_need_wide_columns = 1;
4052 continue;
4053 }
4054 average.threads.perf_counter[i] /= topo.allowed_cpus;
4055 }
4056 for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) {
4057 if (pp->format == FORMAT_RAW)
4058 continue;
4059 if (pp->type == COUNTER_ITEMS) {
4060 if (average.cores.perf_counter[i] > 9999999)
4061 sums_need_wide_columns = 1;
4062 }
4063 average.cores.perf_counter[i] /= topo.allowed_cores;
4064 }
4065 for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) {
4066 if (pp->format == FORMAT_RAW)
4067 continue;
4068 if (pp->type == COUNTER_ITEMS) {
4069 if (average.packages.perf_counter[i] > 9999999)
4070 sums_need_wide_columns = 1;
4071 }
4072 average.packages.perf_counter[i] /= topo.allowed_packages;
4073 }
4074
4075 for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) {
4076 average.threads.pmt_counter[i] /= topo.allowed_cpus;
4077 }
4078 for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) {
4079 average.cores.pmt_counter[i] /= topo.allowed_cores;
4080 }
4081 for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) {
4082 average.packages.pmt_counter[i] /= topo.allowed_packages;
4083 }
4084 }
4085
rdtsc(void)4086 static unsigned long long rdtsc(void)
4087 {
4088 unsigned int low, high;
4089
4090 asm volatile ("rdtsc":"=a" (low), "=d"(high));
4091
4092 return low | ((unsigned long long)high) << 32;
4093 }
4094
4095 /*
4096 * Open a file, and exit on failure
4097 */
fopen_or_die(const char * path,const char * mode)4098 FILE *fopen_or_die(const char *path, const char *mode)
4099 {
4100 FILE *filep = fopen(path, mode);
4101
4102 if (!filep)
4103 err(1, "%s: open failed", path);
4104 return filep;
4105 }
4106
4107 /*
4108 * snapshot_sysfs_counter()
4109 *
4110 * return snapshot of given counter
4111 */
snapshot_sysfs_counter(char * path)4112 unsigned long long snapshot_sysfs_counter(char *path)
4113 {
4114 FILE *fp;
4115 int retval;
4116 unsigned long long counter;
4117
4118 fp = fopen_or_die(path, "r");
4119
4120 retval = fscanf(fp, "%lld", &counter);
4121 if (retval != 1)
4122 err(1, "snapshot_sysfs_counter(%s)", path);
4123
4124 fclose(fp);
4125
4126 return counter;
4127 }
4128
get_mp(int cpu,struct msr_counter * mp,unsigned long long * counterp,char * counter_path)4129 int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp, char *counter_path)
4130 {
4131 if (mp->msr_num != 0) {
4132 assert(!no_msr);
4133 if (get_msr(cpu, mp->msr_num, counterp))
4134 return -1;
4135 } else {
4136 char path[128 + PATH_BYTES];
4137
4138 if (mp->flags & SYSFS_PERCPU) {
4139 sprintf(path, "/sys/devices/system/cpu/cpu%d/%s", cpu, mp->sp->path);
4140
4141 *counterp = snapshot_sysfs_counter(path);
4142 } else {
4143 *counterp = snapshot_sysfs_counter(counter_path);
4144 }
4145 }
4146
4147 return 0;
4148 }
4149
get_legacy_uncore_mhz(int package)4150 unsigned long long get_legacy_uncore_mhz(int package)
4151 {
4152 char path[128];
4153 int die;
4154 static int warn_once;
4155
4156 /*
4157 * for this package, use the first die_id that exists
4158 */
4159 for (die = 0; die <= topo.max_die_id; ++die) {
4160
4161 sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d/current_freq_khz",
4162 package, die);
4163
4164 if (access(path, R_OK) == 0)
4165 return (snapshot_sysfs_counter(path) / 1000);
4166 }
4167 if (!warn_once) {
4168 warnx("BUG: %s: No %s", __func__, path);
4169 warn_once = 1;
4170 }
4171
4172 return 0;
4173 }
4174
get_epb(int cpu)4175 int get_epb(int cpu)
4176 {
4177 char path[128 + PATH_BYTES];
4178 unsigned long long msr;
4179 int ret, epb = -1;
4180 FILE *fp;
4181
4182 sprintf(path, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", cpu);
4183
4184 fp = fopen(path, "r");
4185 if (!fp)
4186 goto msr_fallback;
4187
4188 ret = fscanf(fp, "%d", &epb);
4189 if (ret != 1)
4190 err(1, "%s(%s)", __func__, path);
4191
4192 fclose(fp);
4193
4194 return epb;
4195
4196 msr_fallback:
4197 if (no_msr)
4198 return -1;
4199
4200 get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
4201
4202 return msr & 0xf;
4203 }
4204
get_apic_id(struct thread_data * t)4205 void get_apic_id(struct thread_data *t)
4206 {
4207 unsigned int eax, ebx, ecx, edx;
4208
4209 if (DO_BIC(BIC_APIC)) {
4210 eax = ebx = ecx = edx = 0;
4211 __cpuid(1, eax, ebx, ecx, edx);
4212
4213 t->apic_id = (ebx >> 24) & 0xff;
4214 }
4215
4216 if (!DO_BIC(BIC_X2APIC))
4217 return;
4218
4219 if (authentic_amd || hygon_genuine) {
4220 unsigned int topology_extensions;
4221
4222 if (max_extended_level < 0x8000001e)
4223 return;
4224
4225 eax = ebx = ecx = edx = 0;
4226 __cpuid(0x80000001, eax, ebx, ecx, edx);
4227 topology_extensions = ecx & (1 << 22);
4228
4229 if (topology_extensions == 0)
4230 return;
4231
4232 eax = ebx = ecx = edx = 0;
4233 __cpuid(0x8000001e, eax, ebx, ecx, edx);
4234
4235 t->x2apic_id = eax;
4236 return;
4237 }
4238
4239 if (!genuine_intel)
4240 return;
4241
4242 if (max_level < 0xb)
4243 return;
4244
4245 ecx = 0;
4246 __cpuid(0xb, eax, ebx, ecx, edx);
4247 t->x2apic_id = edx;
4248
4249 if (debug && (t->apic_id != (t->x2apic_id & 0xff)))
4250 fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id);
4251 }
4252
get_core_throt_cnt(int cpu,unsigned long long * cnt)4253 int get_core_throt_cnt(int cpu, unsigned long long *cnt)
4254 {
4255 char path[128 + PATH_BYTES];
4256 unsigned long long tmp;
4257 FILE *fp;
4258 int ret;
4259
4260 sprintf(path, "/sys/devices/system/cpu/cpu%d/thermal_throttle/core_throttle_count", cpu);
4261 fp = fopen(path, "r");
4262 if (!fp)
4263 return -1;
4264 ret = fscanf(fp, "%lld", &tmp);
4265 fclose(fp);
4266 if (ret != 1)
4267 return -1;
4268 *cnt = tmp;
4269
4270 return 0;
4271 }
4272
4273 struct amperf_group_fd {
4274 int aperf; /* Also the group descriptor */
4275 int mperf;
4276 };
4277
read_perf_counter_info(const char * const path,const char * const parse_format,void * value_ptr)4278 static int read_perf_counter_info(const char *const path, const char *const parse_format, void *value_ptr)
4279 {
4280 int fdmt;
4281 int bytes_read;
4282 char buf[64];
4283 int ret = -1;
4284
4285 fdmt = open(path, O_RDONLY, 0);
4286 if (fdmt == -1) {
4287 if (debug)
4288 fprintf(stderr, "Failed to parse perf counter info %s\n", path);
4289 ret = -1;
4290 goto cleanup_and_exit;
4291 }
4292
4293 bytes_read = read(fdmt, buf, sizeof(buf) - 1);
4294 if (bytes_read <= 0 || bytes_read >= (int)sizeof(buf)) {
4295 if (debug)
4296 fprintf(stderr, "Failed to parse perf counter info %s\n", path);
4297 ret = -1;
4298 goto cleanup_and_exit;
4299 }
4300
4301 buf[bytes_read] = '\0';
4302
4303 if (sscanf(buf, parse_format, value_ptr) != 1) {
4304 if (debug)
4305 fprintf(stderr, "Failed to parse perf counter info %s\n", path);
4306 ret = -1;
4307 goto cleanup_and_exit;
4308 }
4309
4310 ret = 0;
4311
4312 cleanup_and_exit:
4313 close(fdmt);
4314 return ret;
4315 }
4316
read_perf_counter_info_n(const char * const path,const char * const parse_format)4317 static unsigned int read_perf_counter_info_n(const char *const path, const char *const parse_format)
4318 {
4319 unsigned int v;
4320 int status;
4321
4322 status = read_perf_counter_info(path, parse_format, &v);
4323 if (status)
4324 v = -1;
4325
4326 return v;
4327 }
4328
read_perf_type(const char * subsys)4329 static unsigned int read_perf_type(const char *subsys)
4330 {
4331 const char *const path_format = "/sys/bus/event_source/devices/%s/type";
4332 const char *const format = "%u";
4333 char path[128];
4334
4335 snprintf(path, sizeof(path), path_format, subsys);
4336
4337 return read_perf_counter_info_n(path, format);
4338 }
4339
read_perf_config(const char * subsys,const char * event_name)4340 static unsigned int read_perf_config(const char *subsys, const char *event_name)
4341 {
4342 const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s";
4343 FILE *fconfig = NULL;
4344 char path[128];
4345 char config_str[64];
4346 unsigned int config;
4347 unsigned int umask;
4348 bool has_config = false;
4349 bool has_umask = false;
4350 unsigned int ret = -1;
4351
4352 snprintf(path, sizeof(path), path_format, subsys, event_name);
4353
4354 fconfig = fopen(path, "r");
4355 if (!fconfig)
4356 return -1;
4357
4358 if (fgets(config_str, ARRAY_SIZE(config_str), fconfig) != config_str)
4359 goto cleanup_and_exit;
4360
4361 for (char *pconfig_str = &config_str[0]; pconfig_str;) {
4362 if (sscanf(pconfig_str, "event=%x", &config) == 1) {
4363 has_config = true;
4364 goto next;
4365 }
4366
4367 if (sscanf(pconfig_str, "umask=%x", &umask) == 1) {
4368 has_umask = true;
4369 goto next;
4370 }
4371
4372 next:
4373 pconfig_str = strchr(pconfig_str, ',');
4374 if (pconfig_str) {
4375 *pconfig_str = '\0';
4376 ++pconfig_str;
4377 }
4378 }
4379
4380 if (!has_umask)
4381 umask = 0;
4382
4383 if (has_config)
4384 ret = (umask << 8) | config;
4385
4386 cleanup_and_exit:
4387 fclose(fconfig);
4388 return ret;
4389 }
4390
read_perf_rapl_unit(const char * subsys,const char * event_name)4391 static unsigned int read_perf_rapl_unit(const char *subsys, const char *event_name)
4392 {
4393 const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.unit";
4394 const char *const format = "%s";
4395 char path[128];
4396 char unit_buffer[16];
4397
4398 snprintf(path, sizeof(path), path_format, subsys, event_name);
4399
4400 read_perf_counter_info(path, format, &unit_buffer);
4401 if (strcmp("Joules", unit_buffer) == 0)
4402 return RAPL_UNIT_JOULES;
4403
4404 return RAPL_UNIT_INVALID;
4405 }
4406
read_perf_scale(const char * subsys,const char * event_name)4407 static double read_perf_scale(const char *subsys, const char *event_name)
4408 {
4409 const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.scale";
4410 const char *const format = "%lf";
4411 char path[128];
4412 double scale;
4413
4414 snprintf(path, sizeof(path), path_format, subsys, event_name);
4415
4416 if (read_perf_counter_info(path, format, &scale))
4417 return 0.0;
4418
4419 return scale;
4420 }
4421
rapl_counter_info_count_perf(const struct rapl_counter_info_t * rci)4422 size_t rapl_counter_info_count_perf(const struct rapl_counter_info_t *rci)
4423 {
4424 size_t ret = 0;
4425
4426 for (int i = 0; i < NUM_RAPL_COUNTERS; ++i)
4427 if (rci->source[i] == COUNTER_SOURCE_PERF)
4428 ++ret;
4429
4430 return ret;
4431 }
4432
cstate_counter_info_count_perf(const struct cstate_counter_info_t * cci)4433 static size_t cstate_counter_info_count_perf(const struct cstate_counter_info_t *cci)
4434 {
4435 size_t ret = 0;
4436
4437 for (int i = 0; i < NUM_CSTATE_COUNTERS; ++i)
4438 if (cci->source[i] == COUNTER_SOURCE_PERF)
4439 ++ret;
4440
4441 return ret;
4442 }
4443
write_rapl_counter(struct rapl_counter * rc,struct rapl_counter_info_t * rci,unsigned int idx)4444 void write_rapl_counter(struct rapl_counter *rc, struct rapl_counter_info_t *rci, unsigned int idx)
4445 {
4446 if (rci->source[idx] == COUNTER_SOURCE_NONE)
4447 return;
4448
4449 rc->raw_value = rci->data[idx];
4450 rc->unit = rci->unit[idx];
4451 rc->scale = rci->scale[idx];
4452 }
4453
get_rapl_counters(int cpu,unsigned int domain,struct core_data * c,struct pkg_data * p)4454 int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct pkg_data *p)
4455 {
4456 struct platform_counters *pplat_cnt = p == package_odd ? &platform_counters_odd : &platform_counters_even;
4457 unsigned long long perf_data[NUM_RAPL_COUNTERS + 1];
4458 struct rapl_counter_info_t *rci;
4459
4460 if (debug >= 2)
4461 fprintf(stderr, "%s: cpu%d domain%d\n", __func__, cpu, domain);
4462
4463 assert(rapl_counter_info_perdomain);
4464 assert(domain < rapl_counter_info_perdomain_size);
4465
4466 rci = &rapl_counter_info_perdomain[domain];
4467
4468 /*
4469 * If we have any perf counters to read, read them all now, in bulk
4470 */
4471 if (rci->fd_perf != -1) {
4472 size_t num_perf_counters = rapl_counter_info_count_perf(rci);
4473 const ssize_t expected_read_size = (num_perf_counters + 1) * sizeof(unsigned long long);
4474 const ssize_t actual_read_size = read(rci->fd_perf, &perf_data[0], sizeof(perf_data));
4475
4476 if (actual_read_size != expected_read_size)
4477 err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size,
4478 actual_read_size);
4479 }
4480
4481 for (unsigned int i = 0, pi = 1; i < NUM_RAPL_COUNTERS; ++i) {
4482 switch (rci->source[i]) {
4483 case COUNTER_SOURCE_NONE:
4484 rci->data[i] = 0;
4485 break;
4486
4487 case COUNTER_SOURCE_PERF:
4488 assert(pi < ARRAY_SIZE(perf_data));
4489 assert(rci->fd_perf != -1);
4490
4491 if (debug >= 2)
4492 fprintf(stderr, "Reading rapl counter via perf at %u (%llu %e %lf)\n",
4493 i, perf_data[pi], rci->scale[i], perf_data[pi] * rci->scale[i]);
4494
4495 rci->data[i] = perf_data[pi];
4496
4497 ++pi;
4498 break;
4499
4500 case COUNTER_SOURCE_MSR:
4501 if (debug >= 2)
4502 fprintf(stderr, "Reading rapl counter via msr at %u\n", i);
4503
4504 assert(!no_msr);
4505 if (rci->flags[i] & RAPL_COUNTER_FLAG_USE_MSR_SUM) {
4506 if (get_msr_sum(cpu, rci->msr[i], &rci->data[i]))
4507 return -13 - i;
4508 } else {
4509 if (get_msr(cpu, rci->msr[i], &rci->data[i]))
4510 return -13 - i;
4511 }
4512
4513 rci->data[i] &= rci->msr_mask[i];
4514 if (rci->msr_shift[i] >= 0)
4515 rci->data[i] >>= abs(rci->msr_shift[i]);
4516 else
4517 rci->data[i] <<= abs(rci->msr_shift[i]);
4518
4519 break;
4520 }
4521 }
4522
4523 BUILD_BUG_ON(NUM_RAPL_COUNTERS != 8);
4524 write_rapl_counter(&p->energy_pkg, rci, RAPL_RCI_INDEX_ENERGY_PKG);
4525 write_rapl_counter(&p->energy_cores, rci, RAPL_RCI_INDEX_ENERGY_CORES);
4526 write_rapl_counter(&p->energy_dram, rci, RAPL_RCI_INDEX_DRAM);
4527 write_rapl_counter(&p->energy_gfx, rci, RAPL_RCI_INDEX_GFX);
4528 write_rapl_counter(&p->rapl_pkg_perf_status, rci, RAPL_RCI_INDEX_PKG_PERF_STATUS);
4529 write_rapl_counter(&p->rapl_dram_perf_status, rci, RAPL_RCI_INDEX_DRAM_PERF_STATUS);
4530 write_rapl_counter(&c->core_energy, rci, RAPL_RCI_INDEX_CORE_ENERGY);
4531 write_rapl_counter(&pplat_cnt->energy_psys, rci, RAPL_RCI_INDEX_ENERGY_PLATFORM);
4532
4533 return 0;
4534 }
4535
find_sysfs_path_by_id(struct sysfs_path * sp,int id)4536 char *find_sysfs_path_by_id(struct sysfs_path *sp, int id)
4537 {
4538 while (sp) {
4539 if (sp->id == id)
4540 return (sp->path);
4541 sp = sp->next;
4542 }
4543 if (debug)
4544 warnx("%s: id%d not found", __func__, id);
4545 return NULL;
4546 }
4547
get_cstate_counters(unsigned int cpu,struct thread_data * t,struct core_data * c,struct pkg_data * p)4548 int get_cstate_counters(unsigned int cpu, struct thread_data *t, struct core_data *c, struct pkg_data *p)
4549 {
4550 /*
4551 * Overcommit memory a little bit here,
4552 * but skip calculating exact sizes for the buffers.
4553 */
4554 unsigned long long perf_data[NUM_CSTATE_COUNTERS];
4555 unsigned long long perf_data_core[NUM_CSTATE_COUNTERS + 1];
4556 unsigned long long perf_data_pkg[NUM_CSTATE_COUNTERS + 1];
4557
4558 struct cstate_counter_info_t *cci;
4559
4560 if (debug >= 2)
4561 fprintf(stderr, "%s: cpu%d\n", __func__, cpu);
4562
4563 assert(ccstate_counter_info);
4564 assert(cpu <= ccstate_counter_info_size);
4565
4566 ZERO_ARRAY(perf_data);
4567 ZERO_ARRAY(perf_data_core);
4568 ZERO_ARRAY(perf_data_pkg);
4569
4570 cci = &ccstate_counter_info[cpu];
4571
4572 /*
4573 * If we have any perf counters to read, read them all now, in bulk
4574 */
4575 const size_t num_perf_counters = cstate_counter_info_count_perf(cci);
4576 ssize_t expected_read_size = num_perf_counters * sizeof(unsigned long long);
4577 ssize_t actual_read_size_core = 0, actual_read_size_pkg = 0;
4578
4579 if (cci->fd_perf_core != -1) {
4580 /* Each descriptor read begins with number of counters read. */
4581 expected_read_size += sizeof(unsigned long long);
4582
4583 actual_read_size_core = read(cci->fd_perf_core, &perf_data_core[0], sizeof(perf_data_core));
4584
4585 if (actual_read_size_core <= 0)
4586 err(-1, "%s: read perf %s: %ld", __func__, "core", actual_read_size_core);
4587 }
4588
4589 if (cci->fd_perf_pkg != -1) {
4590 /* Each descriptor read begins with number of counters read. */
4591 expected_read_size += sizeof(unsigned long long);
4592
4593 actual_read_size_pkg = read(cci->fd_perf_pkg, &perf_data_pkg[0], sizeof(perf_data_pkg));
4594
4595 if (actual_read_size_pkg <= 0)
4596 err(-1, "%s: read perf %s: %ld", __func__, "pkg", actual_read_size_pkg);
4597 }
4598
4599 const ssize_t actual_read_size_total = actual_read_size_core + actual_read_size_pkg;
4600
4601 if (actual_read_size_total != expected_read_size)
4602 err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, actual_read_size_total);
4603
4604 /*
4605 * Copy ccstate and pcstate data into unified buffer.
4606 *
4607 * Skip first element from core and pkg buffers.
4608 * Kernel puts there how many counters were read.
4609 */
4610 const size_t num_core_counters = perf_data_core[0];
4611 const size_t num_pkg_counters = perf_data_pkg[0];
4612
4613 assert(num_perf_counters == num_core_counters + num_pkg_counters);
4614
4615 /* Copy ccstate perf data */
4616 memcpy(&perf_data[0], &perf_data_core[1], num_core_counters * sizeof(unsigned long long));
4617
4618 /* Copy pcstate perf data */
4619 memcpy(&perf_data[num_core_counters], &perf_data_pkg[1], num_pkg_counters * sizeof(unsigned long long));
4620
4621 for (unsigned int i = 0, pi = 0; i < NUM_CSTATE_COUNTERS; ++i) {
4622 switch (cci->source[i]) {
4623 case COUNTER_SOURCE_NONE:
4624 break;
4625
4626 case COUNTER_SOURCE_PERF:
4627 assert(pi < ARRAY_SIZE(perf_data));
4628 assert(cci->fd_perf_core != -1 || cci->fd_perf_pkg != -1);
4629
4630 if (debug >= 2)
4631 fprintf(stderr, "cstate via %s %u: %llu\n", "perf", i, perf_data[pi]);
4632
4633 cci->data[i] = perf_data[pi];
4634
4635 ++pi;
4636 break;
4637
4638 case COUNTER_SOURCE_MSR:
4639 assert(!no_msr);
4640 if (get_msr(cpu, cci->msr[i], &cci->data[i]))
4641 return -13 - i;
4642
4643 if (debug >= 2)
4644 fprintf(stderr, "cstate via %s0x%llx %u: %llu\n", "msr", cci->msr[i], i, cci->data[i]);
4645
4646 break;
4647 }
4648 }
4649
4650 /*
4651 * Helper to write the data only if the source of
4652 * the counter for the current cpu is not none.
4653 *
4654 * Otherwise we would overwrite core data with 0 (default value),
4655 * when invoked for the thread sibling.
4656 */
4657 #define PERF_COUNTER_WRITE_DATA(out_counter, index) do { \
4658 if (cci->source[index] != COUNTER_SOURCE_NONE) \
4659 out_counter = cci->data[index]; \
4660 } while (0)
4661
4662 BUILD_BUG_ON(NUM_CSTATE_COUNTERS != 11);
4663
4664 PERF_COUNTER_WRITE_DATA(t->c1, CCSTATE_RCI_INDEX_C1_RESIDENCY);
4665 PERF_COUNTER_WRITE_DATA(c->c3, CCSTATE_RCI_INDEX_C3_RESIDENCY);
4666 PERF_COUNTER_WRITE_DATA(c->c6, CCSTATE_RCI_INDEX_C6_RESIDENCY);
4667 PERF_COUNTER_WRITE_DATA(c->c7, CCSTATE_RCI_INDEX_C7_RESIDENCY);
4668
4669 PERF_COUNTER_WRITE_DATA(p->pc2, PCSTATE_RCI_INDEX_C2_RESIDENCY);
4670 PERF_COUNTER_WRITE_DATA(p->pc3, PCSTATE_RCI_INDEX_C3_RESIDENCY);
4671 PERF_COUNTER_WRITE_DATA(p->pc6, PCSTATE_RCI_INDEX_C6_RESIDENCY);
4672 PERF_COUNTER_WRITE_DATA(p->pc7, PCSTATE_RCI_INDEX_C7_RESIDENCY);
4673 PERF_COUNTER_WRITE_DATA(p->pc8, PCSTATE_RCI_INDEX_C8_RESIDENCY);
4674 PERF_COUNTER_WRITE_DATA(p->pc9, PCSTATE_RCI_INDEX_C9_RESIDENCY);
4675 PERF_COUNTER_WRITE_DATA(p->pc10, PCSTATE_RCI_INDEX_C10_RESIDENCY);
4676
4677 #undef PERF_COUNTER_WRITE_DATA
4678
4679 return 0;
4680 }
4681
msr_counter_info_count_perf(const struct msr_counter_info_t * mci)4682 size_t msr_counter_info_count_perf(const struct msr_counter_info_t *mci)
4683 {
4684 size_t ret = 0;
4685
4686 for (int i = 0; i < NUM_MSR_COUNTERS; ++i)
4687 if (mci->source[i] == COUNTER_SOURCE_PERF)
4688 ++ret;
4689
4690 return ret;
4691 }
4692
get_smi_aperf_mperf(unsigned int cpu,struct thread_data * t)4693 int get_smi_aperf_mperf(unsigned int cpu, struct thread_data *t)
4694 {
4695 unsigned long long perf_data[NUM_MSR_COUNTERS + 1];
4696
4697 struct msr_counter_info_t *mci;
4698
4699 if (debug >= 2)
4700 fprintf(stderr, "%s: cpu%d\n", __func__, cpu);
4701
4702 assert(msr_counter_info);
4703 assert(cpu <= msr_counter_info_size);
4704
4705 mci = &msr_counter_info[cpu];
4706
4707 ZERO_ARRAY(perf_data);
4708 ZERO_ARRAY(mci->data);
4709
4710 if (mci->fd_perf != -1) {
4711 const size_t num_perf_counters = msr_counter_info_count_perf(mci);
4712 const ssize_t expected_read_size = (num_perf_counters + 1) * sizeof(unsigned long long);
4713 const ssize_t actual_read_size = read(mci->fd_perf, &perf_data[0], sizeof(perf_data));
4714
4715 if (actual_read_size != expected_read_size)
4716 err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size,
4717 actual_read_size);
4718 }
4719
4720 for (unsigned int i = 0, pi = 1; i < NUM_MSR_COUNTERS; ++i) {
4721 switch (mci->source[i]) {
4722 case COUNTER_SOURCE_NONE:
4723 break;
4724
4725 case COUNTER_SOURCE_PERF:
4726 assert(pi < ARRAY_SIZE(perf_data));
4727 assert(mci->fd_perf != -1);
4728
4729 if (debug >= 2)
4730 fprintf(stderr, "Reading msr counter via perf at %u: %llu\n", i, perf_data[pi]);
4731
4732 mci->data[i] = perf_data[pi];
4733
4734 ++pi;
4735 break;
4736
4737 case COUNTER_SOURCE_MSR:
4738 assert(!no_msr);
4739
4740 if (get_msr(cpu, mci->msr[i], &mci->data[i]))
4741 return -2 - i;
4742
4743 mci->data[i] &= mci->msr_mask[i];
4744
4745 if (debug >= 2)
4746 fprintf(stderr, "Reading msr counter via msr at %u: %llu\n", i, mci->data[i]);
4747
4748 break;
4749 }
4750 }
4751
4752 BUILD_BUG_ON(NUM_MSR_COUNTERS != 3);
4753 t->aperf = mci->data[MSR_RCI_INDEX_APERF];
4754 t->mperf = mci->data[MSR_RCI_INDEX_MPERF];
4755 t->smi_count = mci->data[MSR_RCI_INDEX_SMI];
4756
4757 return 0;
4758 }
4759
perf_counter_info_read_values(struct perf_counter_info * pp,int cpu,unsigned long long * out,size_t out_size)4760 int perf_counter_info_read_values(struct perf_counter_info *pp, int cpu, unsigned long long *out, size_t out_size)
4761 {
4762 unsigned int domain;
4763 unsigned long long value;
4764 int fd_counter;
4765
4766 for (size_t i = 0; pp; ++i, pp = pp->next) {
4767 domain = cpu_to_domain(pp, cpu);
4768 assert(domain < pp->num_domains);
4769
4770 fd_counter = pp->fd_perf_per_domain[domain];
4771
4772 if (fd_counter == -1)
4773 continue;
4774
4775 if (read(fd_counter, &value, sizeof(value)) != sizeof(value))
4776 return 1;
4777
4778 assert(i < out_size);
4779 out[i] = value * pp->scale;
4780 }
4781
4782 return 0;
4783 }
4784
pmt_gen_value_mask(unsigned int lsb,unsigned int msb)4785 unsigned long pmt_gen_value_mask(unsigned int lsb, unsigned int msb)
4786 {
4787 unsigned long mask;
4788
4789 if (msb == 63)
4790 mask = 0xffffffffffffffff;
4791 else
4792 mask = ((1 << (msb + 1)) - 1);
4793
4794 mask -= (1 << lsb) - 1;
4795
4796 return mask;
4797 }
4798
pmt_read_counter(struct pmt_counter * ppmt,unsigned int domain_id)4799 unsigned long pmt_read_counter(struct pmt_counter *ppmt, unsigned int domain_id)
4800 {
4801 if (domain_id >= ppmt->num_domains)
4802 return 0;
4803
4804 const unsigned long *pmmio = ppmt->domains[domain_id].pcounter;
4805 const unsigned long value = pmmio ? *pmmio : 0;
4806 const unsigned long value_mask = pmt_gen_value_mask(ppmt->lsb, ppmt->msb);
4807 const unsigned long value_shift = ppmt->lsb;
4808
4809 return (value & value_mask) >> value_shift;
4810 }
4811
4812 /* Rapl domain enumeration helpers */
get_rapl_num_domains(void)4813 static inline int get_rapl_num_domains(void)
4814 {
4815 int num_packages = topo.max_package_id + 1;
4816 int num_cores_per_package;
4817 int num_cores;
4818
4819 if (!platform->has_per_core_rapl)
4820 return num_packages;
4821
4822 num_cores_per_package = topo.max_core_id + 1;
4823 num_cores = num_cores_per_package * num_packages;
4824
4825 return num_cores;
4826 }
4827
get_rapl_domain_id(int cpu)4828 static inline int get_rapl_domain_id(int cpu)
4829 {
4830 int nr_cores_per_package = topo.max_core_id + 1;
4831 int rapl_core_id;
4832
4833 if (!platform->has_per_core_rapl)
4834 return cpus[cpu].physical_package_id;
4835
4836 /* Compute the system-wide unique core-id for @cpu */
4837 rapl_core_id = cpus[cpu].physical_core_id;
4838 rapl_core_id += cpus[cpu].physical_package_id * nr_cores_per_package;
4839
4840 return rapl_core_id;
4841 }
4842
4843 /*
4844 * get_counters(...)
4845 * migrate to cpu
4846 * acquire and record local counters for that cpu
4847 */
get_counters(struct thread_data * t,struct core_data * c,struct pkg_data * p)4848 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4849 {
4850 int cpu = t->cpu_id;
4851 unsigned long long msr;
4852 struct msr_counter *mp;
4853 struct pmt_counter *pp;
4854 int i;
4855 int status;
4856
4857 if (cpu_migrate(cpu)) {
4858 fprintf(outf, "%s: Could not migrate to CPU %d\n", __func__, cpu);
4859 return -1;
4860 }
4861
4862 gettimeofday(&t->tv_begin, (struct timezone *)NULL);
4863
4864 if (first_counter_read)
4865 get_apic_id(t);
4866
4867 t->tsc = rdtsc(); /* we are running on local CPU of interest */
4868
4869 get_smi_aperf_mperf(cpu, t);
4870
4871 if (DO_BIC(BIC_IPC))
4872 if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long))
4873 return -4;
4874
4875 if (DO_BIC(BIC_IRQ))
4876 t->irq_count = irqs_per_cpu[cpu];
4877 if (DO_BIC(BIC_NMI))
4878 t->nmi_count = nmi_per_cpu[cpu];
4879
4880 get_cstate_counters(cpu, t, c, p);
4881
4882 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
4883 if (get_mp(cpu, mp, &t->counter[i], mp->sp->path))
4884 return -10;
4885 }
4886
4887 if (perf_counter_info_read_values(sys.perf_tp, cpu, t->perf_counter, MAX_ADDED_THREAD_COUNTERS))
4888 return -10;
4889
4890 for (i = 0, pp = sys.pmt_tp; pp; i++, pp = pp->next)
4891 t->pmt_counter[i] = pmt_read_counter(pp, t->cpu_id);
4892
4893 /* collect core counters only for 1st thread in core */
4894 if (!is_cpu_first_thread_in_core(t, c, p))
4895 goto done;
4896
4897 if (platform->has_per_core_rapl) {
4898 status = get_rapl_counters(cpu, get_rapl_domain_id(cpu), c, p);
4899 if (status != 0)
4900 return status;
4901 }
4902
4903 if (DO_BIC(BIC_CPU_c7) && t->is_atom) {
4904 /*
4905 * For Atom CPUs that has core cstate deeper than c6,
4906 * MSR_CORE_C6_RESIDENCY returns residency of cc6 and deeper.
4907 * Minus CC7 (and deeper cstates) residency to get
4908 * accturate cc6 residency.
4909 */
4910 c->c6 -= c->c7;
4911 }
4912
4913 if (DO_BIC(BIC_Mod_c6))
4914 if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
4915 return -8;
4916
4917 if (DO_BIC(BIC_CoreTmp)) {
4918 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
4919 return -9;
4920 c->core_temp_c = tj_max - ((msr >> 16) & 0x7F);
4921 }
4922
4923 if (DO_BIC(BIC_CORE_THROT_CNT))
4924 get_core_throt_cnt(cpu, &c->core_throt_cnt);
4925
4926 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
4927 if (get_mp(cpu, mp, &c->counter[i], mp->sp->path))
4928 return -10;
4929 }
4930
4931 if (perf_counter_info_read_values(sys.perf_cp, cpu, c->perf_counter, MAX_ADDED_CORE_COUNTERS))
4932 return -10;
4933
4934 for (i = 0, pp = sys.pmt_cp; pp; i++, pp = pp->next)
4935 c->pmt_counter[i] = pmt_read_counter(pp, c->core_id);
4936
4937 /* collect package counters only for 1st core in package */
4938 if (!is_cpu_first_core_in_package(t, c, p))
4939 goto done;
4940
4941 if (DO_BIC(BIC_Totl_c0)) {
4942 if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
4943 return -10;
4944 }
4945 if (DO_BIC(BIC_Any_c0)) {
4946 if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
4947 return -11;
4948 }
4949 if (DO_BIC(BIC_GFX_c0)) {
4950 if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
4951 return -12;
4952 }
4953 if (DO_BIC(BIC_CPUGFX)) {
4954 if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
4955 return -13;
4956 }
4957
4958 if (DO_BIC(BIC_CPU_LPI))
4959 p->cpu_lpi = cpuidle_cur_cpu_lpi_us;
4960 if (DO_BIC(BIC_SYS_LPI))
4961 p->sys_lpi = cpuidle_cur_sys_lpi_us;
4962
4963 if (!platform->has_per_core_rapl) {
4964 status = get_rapl_counters(cpu, get_rapl_domain_id(cpu), c, p);
4965 if (status != 0)
4966 return status;
4967 }
4968
4969 if (DO_BIC(BIC_PkgTmp)) {
4970 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
4971 return -17;
4972 p->pkg_temp_c = tj_max - ((msr >> 16) & 0x7F);
4973 }
4974
4975 if (DO_BIC(BIC_UNCORE_MHZ))
4976 p->uncore_mhz = get_legacy_uncore_mhz(p->package_id);
4977
4978 if (DO_BIC(BIC_GFX_rc6))
4979 p->gfx_rc6_ms = gfx_info[GFX_rc6].val_ull;
4980
4981 if (DO_BIC(BIC_GFXMHz))
4982 p->gfx_mhz = gfx_info[GFX_MHz].val;
4983
4984 if (DO_BIC(BIC_GFXACTMHz))
4985 p->gfx_act_mhz = gfx_info[GFX_ACTMHz].val;
4986
4987 if (DO_BIC(BIC_SAM_mc6))
4988 p->sam_mc6_ms = gfx_info[SAM_mc6].val_ull;
4989
4990 if (DO_BIC(BIC_SAMMHz))
4991 p->sam_mhz = gfx_info[SAM_MHz].val;
4992
4993 if (DO_BIC(BIC_SAMACTMHz))
4994 p->sam_act_mhz = gfx_info[SAM_ACTMHz].val;
4995
4996 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
4997 char *path = NULL;
4998
4999 if (mp->msr_num == 0) {
5000 path = find_sysfs_path_by_id(mp->sp, p->package_id);
5001 if (path == NULL) {
5002 warnx("%s: package_id %d not found", __func__, p->package_id);
5003 return -10;
5004 }
5005 }
5006 if (get_mp(cpu, mp, &p->counter[i], path))
5007 return -10;
5008 }
5009
5010 if (perf_counter_info_read_values(sys.perf_pp, cpu, p->perf_counter, MAX_ADDED_PACKAGE_COUNTERS))
5011 return -10;
5012
5013 for (i = 0, pp = sys.pmt_pp; pp; i++, pp = pp->next)
5014 p->pmt_counter[i] = pmt_read_counter(pp, p->package_id);
5015
5016 done:
5017 gettimeofday(&t->tv_end, (struct timezone *)NULL);
5018
5019 return 0;
5020 }
5021
5022 int pkg_cstate_limit = PCLUKN;
5023 char *pkg_cstate_limit_strings[] = { "unknown", "reserved", "pc0", "pc1", "pc2",
5024 "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited"
5025 };
5026
5027 int nhm_pkg_cstate_limits[16] =
5028 { PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
5029 PCLRSV, PCLRSV
5030 };
5031
5032 int snb_pkg_cstate_limits[16] =
5033 { PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
5034 PCLRSV, PCLRSV
5035 };
5036
5037 int hsw_pkg_cstate_limits[16] =
5038 { PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
5039 PCLRSV, PCLRSV
5040 };
5041
5042 int slv_pkg_cstate_limits[16] =
5043 { PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
5044 PCL__6, PCL__7
5045 };
5046
5047 int amt_pkg_cstate_limits[16] =
5048 { PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
5049 PCLRSV, PCLRSV
5050 };
5051
5052 int phi_pkg_cstate_limits[16] =
5053 { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
5054 PCLRSV, PCLRSV
5055 };
5056
5057 int glm_pkg_cstate_limits[16] =
5058 { PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
5059 PCLRSV, PCLRSV
5060 };
5061
5062 int skx_pkg_cstate_limits[16] =
5063 { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
5064 PCLRSV, PCLRSV
5065 };
5066
5067 int icx_pkg_cstate_limits[16] =
5068 { PCL__0, PCL__2, PCL__6, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
5069 PCLRSV, PCLRSV
5070 };
5071
probe_cst_limit(void)5072 void probe_cst_limit(void)
5073 {
5074 unsigned long long msr;
5075 int *pkg_cstate_limits;
5076
5077 if (!platform->has_nhm_msrs || no_msr)
5078 return;
5079
5080 switch (platform->cst_limit) {
5081 case CST_LIMIT_NHM:
5082 pkg_cstate_limits = nhm_pkg_cstate_limits;
5083 break;
5084 case CST_LIMIT_SNB:
5085 pkg_cstate_limits = snb_pkg_cstate_limits;
5086 break;
5087 case CST_LIMIT_HSW:
5088 pkg_cstate_limits = hsw_pkg_cstate_limits;
5089 break;
5090 case CST_LIMIT_SKX:
5091 pkg_cstate_limits = skx_pkg_cstate_limits;
5092 break;
5093 case CST_LIMIT_ICX:
5094 pkg_cstate_limits = icx_pkg_cstate_limits;
5095 break;
5096 case CST_LIMIT_SLV:
5097 pkg_cstate_limits = slv_pkg_cstate_limits;
5098 break;
5099 case CST_LIMIT_AMT:
5100 pkg_cstate_limits = amt_pkg_cstate_limits;
5101 break;
5102 case CST_LIMIT_KNL:
5103 pkg_cstate_limits = phi_pkg_cstate_limits;
5104 break;
5105 case CST_LIMIT_GMT:
5106 pkg_cstate_limits = glm_pkg_cstate_limits;
5107 break;
5108 default:
5109 return;
5110 }
5111
5112 get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
5113 pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
5114 }
5115
dump_platform_info(void)5116 static void dump_platform_info(void)
5117 {
5118 unsigned long long msr;
5119 unsigned int ratio;
5120
5121 if (!platform->has_nhm_msrs || no_msr)
5122 return;
5123
5124 get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
5125
5126 fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
5127
5128 ratio = (msr >> 40) & 0xFF;
5129 fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n", ratio, bclk, ratio * bclk);
5130
5131 ratio = (msr >> 8) & 0xFF;
5132 fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk);
5133 }
5134
dump_power_ctl(void)5135 static void dump_power_ctl(void)
5136 {
5137 unsigned long long msr;
5138
5139 if (!platform->has_nhm_msrs || no_msr)
5140 return;
5141
5142 get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
5143 fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
5144 base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
5145
5146 /* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */
5147 if (platform->has_cst_prewake_bit)
5148 fprintf(outf, "C-state Pre-wake: %sabled\n", msr & 0x40000000 ? "DIS" : "EN");
5149
5150 return;
5151 }
5152
dump_turbo_ratio_limit2(void)5153 static void dump_turbo_ratio_limit2(void)
5154 {
5155 unsigned long long msr;
5156 unsigned int ratio;
5157
5158 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
5159
5160 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
5161
5162 ratio = (msr >> 8) & 0xFF;
5163 if (ratio)
5164 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n", ratio, bclk, ratio * bclk);
5165
5166 ratio = (msr >> 0) & 0xFF;
5167 if (ratio)
5168 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n", ratio, bclk, ratio * bclk);
5169 return;
5170 }
5171
dump_turbo_ratio_limit1(void)5172 static void dump_turbo_ratio_limit1(void)
5173 {
5174 unsigned long long msr;
5175 unsigned int ratio;
5176
5177 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
5178
5179 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
5180
5181 ratio = (msr >> 56) & 0xFF;
5182 if (ratio)
5183 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n", ratio, bclk, ratio * bclk);
5184
5185 ratio = (msr >> 48) & 0xFF;
5186 if (ratio)
5187 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n", ratio, bclk, ratio * bclk);
5188
5189 ratio = (msr >> 40) & 0xFF;
5190 if (ratio)
5191 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n", ratio, bclk, ratio * bclk);
5192
5193 ratio = (msr >> 32) & 0xFF;
5194 if (ratio)
5195 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n", ratio, bclk, ratio * bclk);
5196
5197 ratio = (msr >> 24) & 0xFF;
5198 if (ratio)
5199 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n", ratio, bclk, ratio * bclk);
5200
5201 ratio = (msr >> 16) & 0xFF;
5202 if (ratio)
5203 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n", ratio, bclk, ratio * bclk);
5204
5205 ratio = (msr >> 8) & 0xFF;
5206 if (ratio)
5207 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n", ratio, bclk, ratio * bclk);
5208
5209 ratio = (msr >> 0) & 0xFF;
5210 if (ratio)
5211 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n", ratio, bclk, ratio * bclk);
5212 return;
5213 }
5214
dump_turbo_ratio_limits(int trl_msr_offset)5215 static void dump_turbo_ratio_limits(int trl_msr_offset)
5216 {
5217 unsigned long long msr, core_counts;
5218 int shift;
5219
5220 get_msr(base_cpu, trl_msr_offset, &msr);
5221 fprintf(outf, "cpu%d: MSR_%sTURBO_RATIO_LIMIT: 0x%08llx\n",
5222 base_cpu, trl_msr_offset == MSR_SECONDARY_TURBO_RATIO_LIMIT ? "SECONDARY_" : "", msr);
5223
5224 if (platform->trl_msrs & TRL_CORECOUNT) {
5225 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
5226 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts);
5227 } else {
5228 core_counts = 0x0807060504030201;
5229 }
5230
5231 for (shift = 56; shift >= 0; shift -= 8) {
5232 unsigned int ratio, group_size;
5233
5234 ratio = (msr >> shift) & 0xFF;
5235 group_size = (core_counts >> shift) & 0xFF;
5236 if (ratio)
5237 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
5238 ratio, bclk, ratio * bclk, group_size);
5239 }
5240
5241 return;
5242 }
5243
dump_atom_turbo_ratio_limits(void)5244 static void dump_atom_turbo_ratio_limits(void)
5245 {
5246 unsigned long long msr;
5247 unsigned int ratio;
5248
5249 get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr);
5250 fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
5251
5252 ratio = (msr >> 0) & 0x3F;
5253 if (ratio)
5254 fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n", ratio, bclk, ratio * bclk);
5255
5256 ratio = (msr >> 8) & 0x3F;
5257 if (ratio)
5258 fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n", ratio, bclk, ratio * bclk);
5259
5260 ratio = (msr >> 16) & 0x3F;
5261 if (ratio)
5262 fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk);
5263
5264 get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr);
5265 fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
5266
5267 ratio = (msr >> 24) & 0x3F;
5268 if (ratio)
5269 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n", ratio, bclk, ratio * bclk);
5270
5271 ratio = (msr >> 16) & 0x3F;
5272 if (ratio)
5273 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n", ratio, bclk, ratio * bclk);
5274
5275 ratio = (msr >> 8) & 0x3F;
5276 if (ratio)
5277 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n", ratio, bclk, ratio * bclk);
5278
5279 ratio = (msr >> 0) & 0x3F;
5280 if (ratio)
5281 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n", ratio, bclk, ratio * bclk);
5282 }
5283
dump_knl_turbo_ratio_limits(void)5284 static void dump_knl_turbo_ratio_limits(void)
5285 {
5286 const unsigned int buckets_no = 7;
5287
5288 unsigned long long msr;
5289 int delta_cores, delta_ratio;
5290 int i, b_nr;
5291 unsigned int cores[buckets_no];
5292 unsigned int ratio[buckets_no];
5293
5294 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
5295
5296 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
5297
5298 /*
5299 * Turbo encoding in KNL is as follows:
5300 * [0] -- Reserved
5301 * [7:1] -- Base value of number of active cores of bucket 1.
5302 * [15:8] -- Base value of freq ratio of bucket 1.
5303 * [20:16] -- +ve delta of number of active cores of bucket 2.
5304 * i.e. active cores of bucket 2 =
5305 * active cores of bucket 1 + delta
5306 * [23:21] -- Negative delta of freq ratio of bucket 2.
5307 * i.e. freq ratio of bucket 2 =
5308 * freq ratio of bucket 1 - delta
5309 * [28:24]-- +ve delta of number of active cores of bucket 3.
5310 * [31:29]-- -ve delta of freq ratio of bucket 3.
5311 * [36:32]-- +ve delta of number of active cores of bucket 4.
5312 * [39:37]-- -ve delta of freq ratio of bucket 4.
5313 * [44:40]-- +ve delta of number of active cores of bucket 5.
5314 * [47:45]-- -ve delta of freq ratio of bucket 5.
5315 * [52:48]-- +ve delta of number of active cores of bucket 6.
5316 * [55:53]-- -ve delta of freq ratio of bucket 6.
5317 * [60:56]-- +ve delta of number of active cores of bucket 7.
5318 * [63:61]-- -ve delta of freq ratio of bucket 7.
5319 */
5320
5321 b_nr = 0;
5322 cores[b_nr] = (msr & 0xFF) >> 1;
5323 ratio[b_nr] = (msr >> 8) & 0xFF;
5324
5325 for (i = 16; i < 64; i += 8) {
5326 delta_cores = (msr >> i) & 0x1F;
5327 delta_ratio = (msr >> (i + 5)) & 0x7;
5328
5329 cores[b_nr + 1] = cores[b_nr] + delta_cores;
5330 ratio[b_nr + 1] = ratio[b_nr] - delta_ratio;
5331 b_nr++;
5332 }
5333
5334 for (i = buckets_no - 1; i >= 0; i--)
5335 if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
5336 fprintf(outf,
5337 "%d * %.1f = %.1f MHz max turbo %d active cores\n",
5338 ratio[i], bclk, ratio[i] * bclk, cores[i]);
5339 }
5340
dump_cst_cfg(void)5341 static void dump_cst_cfg(void)
5342 {
5343 unsigned long long msr;
5344
5345 if (!platform->has_nhm_msrs || no_msr)
5346 return;
5347
5348 get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
5349
5350 fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
5351
5352 fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)",
5353 (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
5354 (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
5355 (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
5356 (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
5357 (msr & (1 << 15)) ? "" : "UN", (unsigned int)msr & 0xF, pkg_cstate_limit_strings[pkg_cstate_limit]);
5358
5359 #define AUTOMATIC_CSTATE_CONVERSION (1UL << 16)
5360 if (platform->has_cst_auto_convension) {
5361 fprintf(outf, ", automatic c-state conversion=%s", (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off");
5362 }
5363
5364 fprintf(outf, ")\n");
5365
5366 return;
5367 }
5368
dump_config_tdp(void)5369 static void dump_config_tdp(void)
5370 {
5371 unsigned long long msr;
5372
5373 get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr);
5374 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
5375 fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF);
5376
5377 get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr);
5378 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
5379 if (msr) {
5380 fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
5381 fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
5382 fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
5383 fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF);
5384 }
5385 fprintf(outf, ")\n");
5386
5387 get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr);
5388 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
5389 if (msr) {
5390 fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
5391 fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
5392 fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
5393 fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF);
5394 }
5395 fprintf(outf, ")\n");
5396
5397 get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr);
5398 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
5399 if ((msr) & 0x3)
5400 fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
5401 fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
5402 fprintf(outf, ")\n");
5403
5404 get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
5405 fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
5406 fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF);
5407 fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
5408 fprintf(outf, ")\n");
5409 }
5410
5411 unsigned int irtl_time_units[] = { 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
5412
print_irtl(void)5413 void print_irtl(void)
5414 {
5415 unsigned long long msr;
5416
5417 if (!platform->has_irtl_msrs || no_msr)
5418 return;
5419
5420 if (platform->supported_cstates & PC3) {
5421 get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
5422 fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
5423 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
5424 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
5425 }
5426
5427 if (platform->supported_cstates & PC6) {
5428 get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
5429 fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
5430 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
5431 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
5432 }
5433
5434 if (platform->supported_cstates & PC7) {
5435 get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
5436 fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
5437 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
5438 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
5439 }
5440
5441 if (platform->supported_cstates & PC8) {
5442 get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
5443 fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
5444 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
5445 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
5446 }
5447
5448 if (platform->supported_cstates & PC9) {
5449 get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
5450 fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
5451 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
5452 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
5453 }
5454
5455 if (platform->supported_cstates & PC10) {
5456 get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
5457 fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
5458 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
5459 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
5460 }
5461 }
5462
free_fd_percpu(void)5463 void free_fd_percpu(void)
5464 {
5465 int i;
5466
5467 if (!fd_percpu)
5468 return;
5469
5470 for (i = 0; i < topo.max_cpu_num + 1; ++i) {
5471 if (fd_percpu[i] != 0)
5472 close(fd_percpu[i]);
5473 }
5474
5475 free(fd_percpu);
5476 fd_percpu = NULL;
5477 }
5478
free_fd_instr_count_percpu(void)5479 void free_fd_instr_count_percpu(void)
5480 {
5481 if (!fd_instr_count_percpu)
5482 return;
5483
5484 for (int i = 0; i < topo.max_cpu_num + 1; ++i) {
5485 if (fd_instr_count_percpu[i] != 0)
5486 close(fd_instr_count_percpu[i]);
5487 }
5488
5489 free(fd_instr_count_percpu);
5490 fd_instr_count_percpu = NULL;
5491 }
5492
free_fd_cstate(void)5493 void free_fd_cstate(void)
5494 {
5495 if (!ccstate_counter_info)
5496 return;
5497
5498 const int counter_info_num = ccstate_counter_info_size;
5499
5500 for (int counter_id = 0; counter_id < counter_info_num; ++counter_id) {
5501 if (ccstate_counter_info[counter_id].fd_perf_core != -1)
5502 close(ccstate_counter_info[counter_id].fd_perf_core);
5503
5504 if (ccstate_counter_info[counter_id].fd_perf_pkg != -1)
5505 close(ccstate_counter_info[counter_id].fd_perf_pkg);
5506 }
5507
5508 free(ccstate_counter_info);
5509 ccstate_counter_info = NULL;
5510 ccstate_counter_info_size = 0;
5511 }
5512
free_fd_msr(void)5513 void free_fd_msr(void)
5514 {
5515 if (!msr_counter_info)
5516 return;
5517
5518 for (int cpu = 0; cpu < topo.max_cpu_num; ++cpu) {
5519 if (msr_counter_info[cpu].fd_perf != -1)
5520 close(msr_counter_info[cpu].fd_perf);
5521 }
5522
5523 free(msr_counter_info);
5524 msr_counter_info = NULL;
5525 msr_counter_info_size = 0;
5526 }
5527
free_fd_rapl_percpu(void)5528 void free_fd_rapl_percpu(void)
5529 {
5530 if (!rapl_counter_info_perdomain)
5531 return;
5532
5533 const int num_domains = rapl_counter_info_perdomain_size;
5534
5535 for (int domain_id = 0; domain_id < num_domains; ++domain_id) {
5536 if (rapl_counter_info_perdomain[domain_id].fd_perf != -1)
5537 close(rapl_counter_info_perdomain[domain_id].fd_perf);
5538 }
5539
5540 free(rapl_counter_info_perdomain);
5541 rapl_counter_info_perdomain = NULL;
5542 rapl_counter_info_perdomain_size = 0;
5543 }
5544
free_fd_added_perf_counters_(struct perf_counter_info * pp)5545 void free_fd_added_perf_counters_(struct perf_counter_info *pp)
5546 {
5547 if (!pp)
5548 return;
5549
5550 if (!pp->fd_perf_per_domain)
5551 return;
5552
5553 while (pp) {
5554 for (size_t domain = 0; domain < pp->num_domains; ++domain) {
5555 if (pp->fd_perf_per_domain[domain] != -1) {
5556 close(pp->fd_perf_per_domain[domain]);
5557 pp->fd_perf_per_domain[domain] = -1;
5558 }
5559 }
5560
5561 free(pp->fd_perf_per_domain);
5562 pp->fd_perf_per_domain = NULL;
5563
5564 pp = pp->next;
5565 }
5566 }
5567
free_fd_added_perf_counters(void)5568 void free_fd_added_perf_counters(void)
5569 {
5570 free_fd_added_perf_counters_(sys.perf_tp);
5571 free_fd_added_perf_counters_(sys.perf_cp);
5572 free_fd_added_perf_counters_(sys.perf_pp);
5573 }
5574
free_all_buffers(void)5575 void free_all_buffers(void)
5576 {
5577 int i;
5578
5579 CPU_FREE(cpu_present_set);
5580 cpu_present_set = NULL;
5581 cpu_present_setsize = 0;
5582
5583 CPU_FREE(cpu_effective_set);
5584 cpu_effective_set = NULL;
5585 cpu_effective_setsize = 0;
5586
5587 CPU_FREE(cpu_allowed_set);
5588 cpu_allowed_set = NULL;
5589 cpu_allowed_setsize = 0;
5590
5591 CPU_FREE(cpu_affinity_set);
5592 cpu_affinity_set = NULL;
5593 cpu_affinity_setsize = 0;
5594
5595 free(thread_even);
5596 free(core_even);
5597 free(package_even);
5598
5599 thread_even = NULL;
5600 core_even = NULL;
5601 package_even = NULL;
5602
5603 free(thread_odd);
5604 free(core_odd);
5605 free(package_odd);
5606
5607 thread_odd = NULL;
5608 core_odd = NULL;
5609 package_odd = NULL;
5610
5611 free(output_buffer);
5612 output_buffer = NULL;
5613 outp = NULL;
5614
5615 free_fd_percpu();
5616 free_fd_instr_count_percpu();
5617 free_fd_msr();
5618 free_fd_rapl_percpu();
5619 free_fd_cstate();
5620 free_fd_added_perf_counters();
5621
5622 free(irq_column_2_cpu);
5623 free(irqs_per_cpu);
5624 free(nmi_per_cpu);
5625
5626 for (i = 0; i <= topo.max_cpu_num; ++i) {
5627 if (cpus[i].put_ids)
5628 CPU_FREE(cpus[i].put_ids);
5629 }
5630 free(cpus);
5631 }
5632
5633 /*
5634 * Parse a file containing a single int.
5635 * Return 0 if file can not be opened
5636 * Exit if file can be opened, but can not be parsed
5637 */
parse_int_file(const char * fmt,...)5638 int parse_int_file(const char *fmt, ...)
5639 {
5640 va_list args;
5641 char path[PATH_MAX];
5642 FILE *filep;
5643 int value;
5644
5645 va_start(args, fmt);
5646 vsnprintf(path, sizeof(path), fmt, args);
5647 va_end(args);
5648 filep = fopen(path, "r");
5649 if (!filep)
5650 return 0;
5651 if (fscanf(filep, "%d", &value) != 1)
5652 err(1, "%s: failed to parse number from file", path);
5653 fclose(filep);
5654 return value;
5655 }
5656
5657 /*
5658 * cpu_is_first_core_in_package(cpu)
5659 * return 1 if given CPU is 1st core in package
5660 */
cpu_is_first_core_in_package(int cpu)5661 int cpu_is_first_core_in_package(int cpu)
5662 {
5663 return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu);
5664 }
5665
get_physical_package_id(int cpu)5666 int get_physical_package_id(int cpu)
5667 {
5668 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
5669 }
5670
get_die_id(int cpu)5671 int get_die_id(int cpu)
5672 {
5673 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/die_id", cpu);
5674 }
5675
get_core_id(int cpu)5676 int get_core_id(int cpu)
5677 {
5678 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
5679 }
5680
set_node_data(void)5681 void set_node_data(void)
5682 {
5683 int pkg, node, lnode, cpu, cpux;
5684 int cpu_count;
5685
5686 /* initialize logical_node_id */
5687 for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu)
5688 cpus[cpu].logical_node_id = -1;
5689
5690 cpu_count = 0;
5691 for (pkg = 0; pkg < topo.num_packages; pkg++) {
5692 lnode = 0;
5693 for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) {
5694 if (cpus[cpu].physical_package_id != pkg)
5695 continue;
5696 /* find a cpu with an unset logical_node_id */
5697 if (cpus[cpu].logical_node_id != -1)
5698 continue;
5699 cpus[cpu].logical_node_id = lnode;
5700 node = cpus[cpu].physical_node_id;
5701 cpu_count++;
5702 /*
5703 * find all matching cpus on this pkg and set
5704 * the logical_node_id
5705 */
5706 for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) {
5707 if ((cpus[cpux].physical_package_id == pkg) && (cpus[cpux].physical_node_id == node)) {
5708 cpus[cpux].logical_node_id = lnode;
5709 cpu_count++;
5710 }
5711 }
5712 lnode++;
5713 if (lnode > topo.nodes_per_pkg)
5714 topo.nodes_per_pkg = lnode;
5715 }
5716 if (cpu_count >= topo.max_cpu_num)
5717 break;
5718 }
5719 }
5720
get_physical_node_id(struct cpu_topology * thiscpu)5721 int get_physical_node_id(struct cpu_topology *thiscpu)
5722 {
5723 char path[80];
5724 FILE *filep;
5725 int i;
5726 int cpu = thiscpu->logical_cpu_id;
5727
5728 for (i = 0; i <= topo.max_cpu_num; i++) {
5729 sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist", cpu, i);
5730 filep = fopen(path, "r");
5731 if (!filep)
5732 continue;
5733 fclose(filep);
5734 return i;
5735 }
5736 return -1;
5737 }
5738
parse_cpu_str(char * cpu_str,cpu_set_t * cpu_set,int cpu_set_size)5739 static int parse_cpu_str(char *cpu_str, cpu_set_t *cpu_set, int cpu_set_size)
5740 {
5741 unsigned int start, end;
5742 char *next = cpu_str;
5743
5744 while (next && *next) {
5745
5746 if (*next == '-') /* no negative cpu numbers */
5747 return 1;
5748
5749 if (*next == '\0' || *next == '\n')
5750 break;
5751
5752 start = strtoul(next, &next, 10);
5753
5754 if (start >= CPU_SUBSET_MAXCPUS)
5755 return 1;
5756 CPU_SET_S(start, cpu_set_size, cpu_set);
5757
5758 if (*next == '\0' || *next == '\n')
5759 break;
5760
5761 if (*next == ',') {
5762 next += 1;
5763 continue;
5764 }
5765
5766 if (*next == '-') {
5767 next += 1; /* start range */
5768 } else if (*next == '.') {
5769 next += 1;
5770 if (*next == '.')
5771 next += 1; /* start range */
5772 else
5773 return 1;
5774 }
5775
5776 end = strtoul(next, &next, 10);
5777 if (end <= start)
5778 return 1;
5779
5780 while (++start <= end) {
5781 if (start >= CPU_SUBSET_MAXCPUS)
5782 return 1;
5783 CPU_SET_S(start, cpu_set_size, cpu_set);
5784 }
5785
5786 if (*next == ',')
5787 next += 1;
5788 else if (*next != '\0' && *next != '\n')
5789 return 1;
5790 }
5791
5792 return 0;
5793 }
5794
get_thread_siblings(struct cpu_topology * thiscpu)5795 int get_thread_siblings(struct cpu_topology *thiscpu)
5796 {
5797 char path[80], character;
5798 FILE *filep;
5799 unsigned long map;
5800 int so, shift, sib_core;
5801 int cpu = thiscpu->logical_cpu_id;
5802 int offset = topo.max_cpu_num + 1;
5803 size_t size;
5804 int thread_id = 0;
5805
5806 thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1));
5807 if (thiscpu->thread_id < 0)
5808 thiscpu->thread_id = thread_id++;
5809 if (!thiscpu->put_ids)
5810 return -1;
5811
5812 size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
5813 CPU_ZERO_S(size, thiscpu->put_ids);
5814
5815 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
5816 filep = fopen(path, "r");
5817
5818 if (!filep) {
5819 warnx("%s: open failed", path);
5820 return -1;
5821 }
5822 do {
5823 offset -= BITMASK_SIZE;
5824 if (fscanf(filep, "%lx%c", &map, &character) != 2)
5825 err(1, "%s: failed to parse file", path);
5826 for (shift = 0; shift < BITMASK_SIZE; shift++) {
5827 if ((map >> shift) & 0x1) {
5828 so = shift + offset;
5829 sib_core = get_core_id(so);
5830 if (sib_core == thiscpu->physical_core_id) {
5831 CPU_SET_S(so, size, thiscpu->put_ids);
5832 if ((so != cpu) && (cpus[so].thread_id < 0))
5833 cpus[so].thread_id = thread_id++;
5834 }
5835 }
5836 }
5837 } while (character == ',');
5838 fclose(filep);
5839
5840 return CPU_COUNT_S(size, thiscpu->put_ids);
5841 }
5842
5843 /*
5844 * run func(thread, core, package) in topology order
5845 * skip non-present cpus
5846 */
5847
for_all_cpus_2(int (func)(struct thread_data *,struct core_data *,struct pkg_data *,struct thread_data *,struct core_data *,struct pkg_data *),struct thread_data * thread_base,struct core_data * core_base,struct pkg_data * pkg_base,struct thread_data * thread_base2,struct core_data * core_base2,struct pkg_data * pkg_base2)5848 int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *,
5849 struct pkg_data *, struct thread_data *, struct core_data *,
5850 struct pkg_data *), struct thread_data *thread_base,
5851 struct core_data *core_base, struct pkg_data *pkg_base,
5852 struct thread_data *thread_base2, struct core_data *core_base2, struct pkg_data *pkg_base2)
5853 {
5854 int retval, pkg_no, node_no, core_no, thread_no;
5855
5856 retval = 0;
5857
5858 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
5859 for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) {
5860 for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
5861 for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) {
5862 struct thread_data *t, *t2;
5863 struct core_data *c, *c2;
5864 struct pkg_data *p, *p2;
5865
5866 t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no);
5867
5868 if (cpu_is_not_allowed(t->cpu_id))
5869 continue;
5870
5871 t2 = GET_THREAD(thread_base2, thread_no, core_no, node_no, pkg_no);
5872
5873 c = GET_CORE(core_base, core_no, node_no, pkg_no);
5874 c2 = GET_CORE(core_base2, core_no, node_no, pkg_no);
5875
5876 p = GET_PKG(pkg_base, pkg_no);
5877 p2 = GET_PKG(pkg_base2, pkg_no);
5878
5879 retval |= func(t, c, p, t2, c2, p2);
5880 }
5881 }
5882 }
5883 }
5884 return retval;
5885 }
5886
5887 /*
5888 * run func(cpu) on every cpu in /proc/stat
5889 * return max_cpu number
5890 */
for_all_proc_cpus(int (func)(int))5891 int for_all_proc_cpus(int (func) (int))
5892 {
5893 FILE *fp;
5894 int cpu_num;
5895 int retval;
5896
5897 fp = fopen_or_die(proc_stat, "r");
5898
5899 retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
5900 if (retval != 0)
5901 err(1, "%s: failed to parse format", proc_stat);
5902
5903 while (1) {
5904 retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
5905 if (retval != 1)
5906 break;
5907
5908 retval = func(cpu_num);
5909 if (retval) {
5910 fclose(fp);
5911 return (retval);
5912 }
5913 }
5914 fclose(fp);
5915 return 0;
5916 }
5917
5918 #define PATH_EFFECTIVE_CPUS "/sys/fs/cgroup/cpuset.cpus.effective"
5919
5920 static char cpu_effective_str[1024];
5921
update_effective_str(bool startup)5922 static int update_effective_str(bool startup)
5923 {
5924 FILE *fp;
5925 char *pos;
5926 char buf[1024];
5927 int ret;
5928
5929 if (cpu_effective_str[0] == '\0' && !startup)
5930 return 0;
5931
5932 fp = fopen(PATH_EFFECTIVE_CPUS, "r");
5933 if (!fp)
5934 return 0;
5935
5936 pos = fgets(buf, 1024, fp);
5937 if (!pos)
5938 err(1, "%s: file read failed\n", PATH_EFFECTIVE_CPUS);
5939
5940 fclose(fp);
5941
5942 ret = strncmp(cpu_effective_str, buf, 1024);
5943 if (!ret)
5944 return 0;
5945
5946 strncpy(cpu_effective_str, buf, 1024);
5947 return 1;
5948 }
5949
update_effective_set(bool startup)5950 static void update_effective_set(bool startup)
5951 {
5952 update_effective_str(startup);
5953
5954 if (parse_cpu_str(cpu_effective_str, cpu_effective_set, cpu_effective_setsize))
5955 err(1, "%s: cpu str malformat %s\n", PATH_EFFECTIVE_CPUS, cpu_effective_str);
5956 }
5957
5958 void linux_perf_init(void);
5959 void msr_perf_init(void);
5960 void rapl_perf_init(void);
5961 void cstate_perf_init(void);
5962 void added_perf_counters_init(void);
5963 void pmt_init(void);
5964
re_initialize(void)5965 void re_initialize(void)
5966 {
5967 free_all_buffers();
5968 setup_all_buffers(false);
5969 linux_perf_init();
5970 msr_perf_init();
5971 rapl_perf_init();
5972 cstate_perf_init();
5973 added_perf_counters_init();
5974 pmt_init();
5975 fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus,
5976 topo.allowed_cpus);
5977 }
5978
set_max_cpu_num(void)5979 void set_max_cpu_num(void)
5980 {
5981 FILE *filep;
5982 int base_cpu;
5983 unsigned long dummy;
5984 char pathname[64];
5985
5986 base_cpu = sched_getcpu();
5987 if (base_cpu < 0)
5988 err(1, "cannot find calling cpu ID");
5989 sprintf(pathname, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", base_cpu);
5990
5991 filep = fopen_or_die(pathname, "r");
5992 topo.max_cpu_num = 0;
5993 while (fscanf(filep, "%lx,", &dummy) == 1)
5994 topo.max_cpu_num += BITMASK_SIZE;
5995 fclose(filep);
5996 topo.max_cpu_num--; /* 0 based */
5997 }
5998
5999 /*
6000 * count_cpus()
6001 * remember the last one seen, it will be the max
6002 */
count_cpus(int cpu)6003 int count_cpus(int cpu)
6004 {
6005 UNUSED(cpu);
6006
6007 topo.num_cpus++;
6008 return 0;
6009 }
6010
mark_cpu_present(int cpu)6011 int mark_cpu_present(int cpu)
6012 {
6013 CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
6014 return 0;
6015 }
6016
init_thread_id(int cpu)6017 int init_thread_id(int cpu)
6018 {
6019 cpus[cpu].thread_id = -1;
6020 return 0;
6021 }
6022
set_my_cpu_type(void)6023 int set_my_cpu_type(void)
6024 {
6025 unsigned int eax, ebx, ecx, edx;
6026 unsigned int max_level;
6027
6028 __cpuid(0, max_level, ebx, ecx, edx);
6029
6030 if (max_level < CPUID_LEAF_MODEL_ID)
6031 return 0;
6032
6033 __cpuid(CPUID_LEAF_MODEL_ID, eax, ebx, ecx, edx);
6034
6035 return (eax >> CPUID_LEAF_MODEL_ID_CORE_TYPE_SHIFT);
6036 }
6037
set_cpu_hybrid_type(int cpu)6038 int set_cpu_hybrid_type(int cpu)
6039 {
6040 if (cpu_migrate(cpu))
6041 return -1;
6042
6043 int type = set_my_cpu_type();
6044
6045 cpus[cpu].type = type;
6046 return 0;
6047 }
6048
6049 /*
6050 * snapshot_proc_interrupts()
6051 *
6052 * read and record summary of /proc/interrupts
6053 *
6054 * return 1 if config change requires a restart, else return 0
6055 */
snapshot_proc_interrupts(void)6056 int snapshot_proc_interrupts(void)
6057 {
6058 static FILE *fp;
6059 int column, retval;
6060
6061 if (fp == NULL)
6062 fp = fopen_or_die("/proc/interrupts", "r");
6063 else
6064 rewind(fp);
6065
6066 /* read 1st line of /proc/interrupts to get cpu* name for each column */
6067 for (column = 0; column < topo.num_cpus; ++column) {
6068 int cpu_number;
6069
6070 retval = fscanf(fp, " CPU%d", &cpu_number);
6071 if (retval != 1)
6072 break;
6073
6074 if (cpu_number > topo.max_cpu_num) {
6075 warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num);
6076 return 1;
6077 }
6078
6079 irq_column_2_cpu[column] = cpu_number;
6080 irqs_per_cpu[cpu_number] = 0;
6081 nmi_per_cpu[cpu_number] = 0;
6082 }
6083
6084 /* read /proc/interrupt count lines and sum up irqs per cpu */
6085 while (1) {
6086 int column;
6087 char buf[64];
6088 int this_row_is_nmi = 0;
6089
6090 retval = fscanf(fp, " %s:", buf); /* irq# "N:" */
6091 if (retval != 1)
6092 break;
6093
6094 if (strncmp(buf, "NMI", strlen("NMI")) == 0)
6095 this_row_is_nmi = 1;
6096
6097 /* read the count per cpu */
6098 for (column = 0; column < topo.num_cpus; ++column) {
6099
6100 int cpu_number, irq_count;
6101
6102 retval = fscanf(fp, " %d", &irq_count);
6103
6104 if (retval != 1)
6105 break;
6106
6107 cpu_number = irq_column_2_cpu[column];
6108 irqs_per_cpu[cpu_number] += irq_count;
6109 if (this_row_is_nmi)
6110 nmi_per_cpu[cpu_number] += irq_count;
6111 }
6112 while (getc(fp) != '\n') ; /* flush interrupt description */
6113
6114 }
6115 return 0;
6116 }
6117
6118 /*
6119 * snapshot_graphics()
6120 *
6121 * record snapshot of specified graphics sysfs knob
6122 *
6123 * return 1 if config change requires a restart, else return 0
6124 */
snapshot_graphics(int idx)6125 int snapshot_graphics(int idx)
6126 {
6127 int retval;
6128
6129 rewind(gfx_info[idx].fp);
6130 fflush(gfx_info[idx].fp);
6131
6132 switch (idx) {
6133 case GFX_rc6:
6134 case SAM_mc6:
6135 retval = fscanf(gfx_info[idx].fp, "%lld", &gfx_info[idx].val_ull);
6136 if (retval != 1)
6137 err(1, "rc6");
6138 return 0;
6139 case GFX_MHz:
6140 case GFX_ACTMHz:
6141 case SAM_MHz:
6142 case SAM_ACTMHz:
6143 retval = fscanf(gfx_info[idx].fp, "%d", &gfx_info[idx].val);
6144 if (retval != 1)
6145 err(1, "MHz");
6146 return 0;
6147 default:
6148 return -EINVAL;
6149 }
6150 }
6151
6152 /*
6153 * snapshot_cpu_lpi()
6154 *
6155 * record snapshot of
6156 * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
6157 */
snapshot_cpu_lpi_us(void)6158 int snapshot_cpu_lpi_us(void)
6159 {
6160 FILE *fp;
6161 int retval;
6162
6163 fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r");
6164
6165 retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us);
6166 if (retval != 1) {
6167 fprintf(stderr, "Disabling Low Power Idle CPU output\n");
6168 BIC_NOT_PRESENT(BIC_CPU_LPI);
6169 fclose(fp);
6170 return -1;
6171 }
6172
6173 fclose(fp);
6174
6175 return 0;
6176 }
6177
6178 /*
6179 * snapshot_sys_lpi()
6180 *
6181 * record snapshot of sys_lpi_file
6182 */
snapshot_sys_lpi_us(void)6183 int snapshot_sys_lpi_us(void)
6184 {
6185 FILE *fp;
6186 int retval;
6187
6188 fp = fopen_or_die(sys_lpi_file, "r");
6189
6190 retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us);
6191 if (retval != 1) {
6192 fprintf(stderr, "Disabling Low Power Idle System output\n");
6193 BIC_NOT_PRESENT(BIC_SYS_LPI);
6194 fclose(fp);
6195 return -1;
6196 }
6197 fclose(fp);
6198
6199 return 0;
6200 }
6201
6202 /*
6203 * snapshot /proc and /sys files
6204 *
6205 * return 1 if configuration restart needed, else return 0
6206 */
snapshot_proc_sysfs_files(void)6207 int snapshot_proc_sysfs_files(void)
6208 {
6209 gettimeofday(&procsysfs_tv_begin, (struct timezone *)NULL);
6210
6211 if (DO_BIC(BIC_IRQ) || DO_BIC(BIC_NMI))
6212 if (snapshot_proc_interrupts())
6213 return 1;
6214
6215 if (DO_BIC(BIC_GFX_rc6))
6216 snapshot_graphics(GFX_rc6);
6217
6218 if (DO_BIC(BIC_GFXMHz))
6219 snapshot_graphics(GFX_MHz);
6220
6221 if (DO_BIC(BIC_GFXACTMHz))
6222 snapshot_graphics(GFX_ACTMHz);
6223
6224 if (DO_BIC(BIC_SAM_mc6))
6225 snapshot_graphics(SAM_mc6);
6226
6227 if (DO_BIC(BIC_SAMMHz))
6228 snapshot_graphics(SAM_MHz);
6229
6230 if (DO_BIC(BIC_SAMACTMHz))
6231 snapshot_graphics(SAM_ACTMHz);
6232
6233 if (DO_BIC(BIC_CPU_LPI))
6234 snapshot_cpu_lpi_us();
6235
6236 if (DO_BIC(BIC_SYS_LPI))
6237 snapshot_sys_lpi_us();
6238
6239 return 0;
6240 }
6241
6242 int exit_requested;
6243
signal_handler(int signal)6244 static void signal_handler(int signal)
6245 {
6246 switch (signal) {
6247 case SIGINT:
6248 exit_requested = 1;
6249 if (debug)
6250 fprintf(stderr, " SIGINT\n");
6251 break;
6252 case SIGUSR1:
6253 if (debug > 1)
6254 fprintf(stderr, "SIGUSR1\n");
6255 break;
6256 }
6257 }
6258
setup_signal_handler(void)6259 void setup_signal_handler(void)
6260 {
6261 struct sigaction sa;
6262
6263 memset(&sa, 0, sizeof(sa));
6264
6265 sa.sa_handler = &signal_handler;
6266
6267 if (sigaction(SIGINT, &sa, NULL) < 0)
6268 err(1, "sigaction SIGINT");
6269 if (sigaction(SIGUSR1, &sa, NULL) < 0)
6270 err(1, "sigaction SIGUSR1");
6271 }
6272
do_sleep(void)6273 void do_sleep(void)
6274 {
6275 struct timeval tout;
6276 struct timespec rest;
6277 fd_set readfds;
6278 int retval;
6279
6280 FD_ZERO(&readfds);
6281 FD_SET(0, &readfds);
6282
6283 if (ignore_stdin) {
6284 nanosleep(&interval_ts, NULL);
6285 return;
6286 }
6287
6288 tout = interval_tv;
6289 retval = select(1, &readfds, NULL, NULL, &tout);
6290
6291 if (retval == 1) {
6292 switch (getc(stdin)) {
6293 case 'q':
6294 exit_requested = 1;
6295 break;
6296 case EOF:
6297 /*
6298 * 'stdin' is a pipe closed on the other end. There
6299 * won't be any further input.
6300 */
6301 ignore_stdin = 1;
6302 /* Sleep the rest of the time */
6303 rest.tv_sec = (tout.tv_sec + tout.tv_usec / 1000000);
6304 rest.tv_nsec = (tout.tv_usec % 1000000) * 1000;
6305 nanosleep(&rest, NULL);
6306 }
6307 }
6308 }
6309
get_msr_sum(int cpu,off_t offset,unsigned long long * msr)6310 int get_msr_sum(int cpu, off_t offset, unsigned long long *msr)
6311 {
6312 int ret, idx;
6313 unsigned long long msr_cur, msr_last;
6314
6315 assert(!no_msr);
6316
6317 if (!per_cpu_msr_sum)
6318 return 1;
6319
6320 idx = offset_to_idx(offset);
6321 if (idx < 0)
6322 return idx;
6323 /* get_msr_sum() = sum + (get_msr() - last) */
6324 ret = get_msr(cpu, offset, &msr_cur);
6325 if (ret)
6326 return ret;
6327 msr_last = per_cpu_msr_sum[cpu].entries[idx].last;
6328 DELTA_WRAP32(msr_cur, msr_last);
6329 *msr = msr_last + per_cpu_msr_sum[cpu].entries[idx].sum;
6330
6331 return 0;
6332 }
6333
6334 timer_t timerid;
6335
6336 /* Timer callback, update the sum of MSRs periodically. */
update_msr_sum(struct thread_data * t,struct core_data * c,struct pkg_data * p)6337 static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg_data *p)
6338 {
6339 int i, ret;
6340 int cpu = t->cpu_id;
6341
6342 UNUSED(c);
6343 UNUSED(p);
6344
6345 assert(!no_msr);
6346
6347 for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) {
6348 unsigned long long msr_cur, msr_last;
6349 off_t offset;
6350
6351 if (!idx_valid(i))
6352 continue;
6353 offset = idx_to_offset(i);
6354 if (offset < 0)
6355 continue;
6356 ret = get_msr(cpu, offset, &msr_cur);
6357 if (ret) {
6358 fprintf(outf, "Can not update msr(0x%llx)\n", (unsigned long long)offset);
6359 continue;
6360 }
6361
6362 msr_last = per_cpu_msr_sum[cpu].entries[i].last;
6363 per_cpu_msr_sum[cpu].entries[i].last = msr_cur & 0xffffffff;
6364
6365 DELTA_WRAP32(msr_cur, msr_last);
6366 per_cpu_msr_sum[cpu].entries[i].sum += msr_last;
6367 }
6368 return 0;
6369 }
6370
msr_record_handler(union sigval v)6371 static void msr_record_handler(union sigval v)
6372 {
6373 UNUSED(v);
6374
6375 for_all_cpus(update_msr_sum, EVEN_COUNTERS);
6376 }
6377
msr_sum_record(void)6378 void msr_sum_record(void)
6379 {
6380 struct itimerspec its;
6381 struct sigevent sev;
6382
6383 per_cpu_msr_sum = calloc(topo.max_cpu_num + 1, sizeof(struct msr_sum_array));
6384 if (!per_cpu_msr_sum) {
6385 fprintf(outf, "Can not allocate memory for long time MSR.\n");
6386 return;
6387 }
6388 /*
6389 * Signal handler might be restricted, so use thread notifier instead.
6390 */
6391 memset(&sev, 0, sizeof(struct sigevent));
6392 sev.sigev_notify = SIGEV_THREAD;
6393 sev.sigev_notify_function = msr_record_handler;
6394
6395 sev.sigev_value.sival_ptr = &timerid;
6396 if (timer_create(CLOCK_REALTIME, &sev, &timerid) == -1) {
6397 fprintf(outf, "Can not create timer.\n");
6398 goto release_msr;
6399 }
6400
6401 its.it_value.tv_sec = 0;
6402 its.it_value.tv_nsec = 1;
6403 /*
6404 * A wraparound time has been calculated early.
6405 * Some sources state that the peak power for a
6406 * microprocessor is usually 1.5 times the TDP rating,
6407 * use 2 * TDP for safety.
6408 */
6409 its.it_interval.tv_sec = rapl_joule_counter_range / 2;
6410 its.it_interval.tv_nsec = 0;
6411
6412 if (timer_settime(timerid, 0, &its, NULL) == -1) {
6413 fprintf(outf, "Can not set timer.\n");
6414 goto release_timer;
6415 }
6416 return;
6417
6418 release_timer:
6419 timer_delete(timerid);
6420 release_msr:
6421 free(per_cpu_msr_sum);
6422 }
6423
6424 /*
6425 * set_my_sched_priority(pri)
6426 * return previous priority on success
6427 * return value < -20 on failure
6428 */
set_my_sched_priority(int priority)6429 int set_my_sched_priority(int priority)
6430 {
6431 int retval;
6432 int original_priority;
6433
6434 errno = 0;
6435 original_priority = getpriority(PRIO_PROCESS, 0);
6436 if (errno && (original_priority == -1))
6437 return -21;
6438
6439 retval = setpriority(PRIO_PROCESS, 0, priority);
6440 if (retval)
6441 return -21;
6442
6443 errno = 0;
6444 retval = getpriority(PRIO_PROCESS, 0);
6445 if (retval != priority)
6446 return -21;
6447
6448 return original_priority;
6449 }
6450
turbostat_loop()6451 void turbostat_loop()
6452 {
6453 int retval;
6454 int restarted = 0;
6455 unsigned int done_iters = 0;
6456
6457 setup_signal_handler();
6458
6459 /*
6460 * elevate own priority for interval mode
6461 *
6462 * ignore on error - we probably don't have permission to set it, but
6463 * it's not a big deal
6464 */
6465 set_my_sched_priority(-20);
6466
6467 restart:
6468 restarted++;
6469
6470 snapshot_proc_sysfs_files();
6471 retval = for_all_cpus(get_counters, EVEN_COUNTERS);
6472 first_counter_read = 0;
6473 if (retval < -1) {
6474 exit(retval);
6475 } else if (retval == -1) {
6476 if (restarted > 10) {
6477 exit(retval);
6478 }
6479 re_initialize();
6480 goto restart;
6481 }
6482 restarted = 0;
6483 done_iters = 0;
6484 gettimeofday(&tv_even, (struct timezone *)NULL);
6485
6486 while (1) {
6487 if (for_all_proc_cpus(cpu_is_not_present)) {
6488 re_initialize();
6489 goto restart;
6490 }
6491 if (update_effective_str(false)) {
6492 re_initialize();
6493 goto restart;
6494 }
6495 do_sleep();
6496 if (snapshot_proc_sysfs_files())
6497 goto restart;
6498 retval = for_all_cpus(get_counters, ODD_COUNTERS);
6499 if (retval < -1) {
6500 exit(retval);
6501 } else if (retval == -1) {
6502 re_initialize();
6503 goto restart;
6504 }
6505 gettimeofday(&tv_odd, (struct timezone *)NULL);
6506 timersub(&tv_odd, &tv_even, &tv_delta);
6507 if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) {
6508 re_initialize();
6509 goto restart;
6510 }
6511 delta_platform(&platform_counters_odd, &platform_counters_even);
6512 compute_average(EVEN_COUNTERS);
6513 format_all_counters(EVEN_COUNTERS);
6514 flush_output_stdout();
6515 if (exit_requested)
6516 break;
6517 if (num_iterations && ++done_iters >= num_iterations)
6518 break;
6519 do_sleep();
6520 if (snapshot_proc_sysfs_files())
6521 goto restart;
6522 retval = for_all_cpus(get_counters, EVEN_COUNTERS);
6523 if (retval < -1) {
6524 exit(retval);
6525 } else if (retval == -1) {
6526 re_initialize();
6527 goto restart;
6528 }
6529 gettimeofday(&tv_even, (struct timezone *)NULL);
6530 timersub(&tv_even, &tv_odd, &tv_delta);
6531 if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) {
6532 re_initialize();
6533 goto restart;
6534 }
6535 delta_platform(&platform_counters_even, &platform_counters_odd);
6536 compute_average(ODD_COUNTERS);
6537 format_all_counters(ODD_COUNTERS);
6538 flush_output_stdout();
6539 if (exit_requested)
6540 break;
6541 if (num_iterations && ++done_iters >= num_iterations)
6542 break;
6543 }
6544 }
6545
check_dev_msr()6546 void check_dev_msr()
6547 {
6548 struct stat sb;
6549 char pathname[32];
6550
6551 if (no_msr)
6552 return;
6553 #if defined(ANDROID)
6554 sprintf(pathname, "/dev/msr%d", base_cpu);
6555 #else
6556 sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
6557 #endif
6558 if (stat(pathname, &sb))
6559 if (system("/sbin/modprobe msr > /dev/null 2>&1"))
6560 no_msr = 1;
6561 }
6562
6563 /*
6564 * check for CAP_SYS_RAWIO
6565 * return 0 on success
6566 * return 1 on fail
6567 */
check_for_cap_sys_rawio(void)6568 int check_for_cap_sys_rawio(void)
6569 {
6570 cap_t caps;
6571 cap_flag_value_t cap_flag_value;
6572 int ret = 0;
6573
6574 caps = cap_get_proc();
6575 if (caps == NULL)
6576 return 1;
6577
6578 if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value)) {
6579 ret = 1;
6580 goto free_and_exit;
6581 }
6582
6583 if (cap_flag_value != CAP_SET) {
6584 ret = 1;
6585 goto free_and_exit;
6586 }
6587
6588 free_and_exit:
6589 if (cap_free(caps) == -1)
6590 err(-6, "cap_free\n");
6591
6592 return ret;
6593 }
6594
check_msr_permission(void)6595 void check_msr_permission(void)
6596 {
6597 int failed = 0;
6598 char pathname[32];
6599
6600 if (no_msr)
6601 return;
6602
6603 /* check for CAP_SYS_RAWIO */
6604 failed += check_for_cap_sys_rawio();
6605
6606 /* test file permissions */
6607 #if defined(ANDROID)
6608 sprintf(pathname, "/dev/msr%d", base_cpu);
6609 #else
6610 sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
6611 #endif
6612 if (euidaccess(pathname, R_OK)) {
6613 failed++;
6614 }
6615
6616 if (failed) {
6617 warnx("Failed to access %s. Some of the counters may not be available\n"
6618 "\tRun as root to enable them or use %s to disable the access explicitly", pathname, "--no-msr");
6619 no_msr = 1;
6620 }
6621 }
6622
probe_bclk(void)6623 void probe_bclk(void)
6624 {
6625 unsigned long long msr;
6626 unsigned int base_ratio;
6627
6628 if (!platform->has_nhm_msrs || no_msr)
6629 return;
6630
6631 if (platform->bclk_freq == BCLK_100MHZ)
6632 bclk = 100.00;
6633 else if (platform->bclk_freq == BCLK_133MHZ)
6634 bclk = 133.33;
6635 else if (platform->bclk_freq == BCLK_SLV)
6636 bclk = slm_bclk();
6637 else
6638 return;
6639
6640 get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
6641 base_ratio = (msr >> 8) & 0xFF;
6642
6643 base_hz = base_ratio * bclk * 1000000;
6644 has_base_hz = 1;
6645
6646 if (platform->enable_tsc_tweak)
6647 tsc_tweak = base_hz / tsc_hz;
6648 }
6649
remove_underbar(char * s)6650 static void remove_underbar(char *s)
6651 {
6652 char *to = s;
6653
6654 while (*s) {
6655 if (*s != '_')
6656 *to++ = *s;
6657 s++;
6658 }
6659
6660 *to = 0;
6661 }
6662
dump_turbo_ratio_info(void)6663 static void dump_turbo_ratio_info(void)
6664 {
6665 if (!has_turbo)
6666 return;
6667
6668 if (!platform->has_nhm_msrs || no_msr)
6669 return;
6670
6671 if (platform->trl_msrs & TRL_LIMIT2)
6672 dump_turbo_ratio_limit2();
6673
6674 if (platform->trl_msrs & TRL_LIMIT1)
6675 dump_turbo_ratio_limit1();
6676
6677 if (platform->trl_msrs & TRL_BASE) {
6678 dump_turbo_ratio_limits(MSR_TURBO_RATIO_LIMIT);
6679
6680 if (is_hybrid)
6681 dump_turbo_ratio_limits(MSR_SECONDARY_TURBO_RATIO_LIMIT);
6682 }
6683
6684 if (platform->trl_msrs & TRL_ATOM)
6685 dump_atom_turbo_ratio_limits();
6686
6687 if (platform->trl_msrs & TRL_KNL)
6688 dump_knl_turbo_ratio_limits();
6689
6690 if (platform->has_config_tdp)
6691 dump_config_tdp();
6692 }
6693
read_sysfs_int(char * path)6694 static int read_sysfs_int(char *path)
6695 {
6696 FILE *input;
6697 int retval = -1;
6698
6699 input = fopen(path, "r");
6700 if (input == NULL) {
6701 if (debug)
6702 fprintf(outf, "NSFOD %s\n", path);
6703 return (-1);
6704 }
6705 if (fscanf(input, "%d", &retval) != 1)
6706 err(1, "%s: failed to read int from file", path);
6707 fclose(input);
6708
6709 return (retval);
6710 }
6711
dump_sysfs_file(char * path)6712 static void dump_sysfs_file(char *path)
6713 {
6714 FILE *input;
6715 char cpuidle_buf[64];
6716
6717 input = fopen(path, "r");
6718 if (input == NULL) {
6719 if (debug)
6720 fprintf(outf, "NSFOD %s\n", path);
6721 return;
6722 }
6723 if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input))
6724 err(1, "%s: failed to read file", path);
6725 fclose(input);
6726
6727 fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf);
6728 }
6729
probe_intel_uncore_frequency_legacy(void)6730 static void probe_intel_uncore_frequency_legacy(void)
6731 {
6732 int i, j;
6733 char path[256];
6734
6735 for (i = 0; i < topo.num_packages; ++i) {
6736 for (j = 0; j <= topo.max_die_id; ++j) {
6737 int k, l;
6738 char path_base[128];
6739
6740 sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i,
6741 j);
6742
6743 if (access(path_base, R_OK))
6744 continue;
6745
6746 BIC_PRESENT(BIC_UNCORE_MHZ);
6747
6748 if (quiet)
6749 return;
6750
6751 sprintf(path, "%s/min_freq_khz", path_base);
6752 k = read_sysfs_int(path);
6753 sprintf(path, "%s/max_freq_khz", path_base);
6754 l = read_sysfs_int(path);
6755 fprintf(outf, "Uncore Frequency package%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000);
6756
6757 sprintf(path, "%s/initial_min_freq_khz", path_base);
6758 k = read_sysfs_int(path);
6759 sprintf(path, "%s/initial_max_freq_khz", path_base);
6760 l = read_sysfs_int(path);
6761 fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000);
6762
6763 sprintf(path, "%s/current_freq_khz", path_base);
6764 k = read_sysfs_int(path);
6765 fprintf(outf, " %d MHz\n", k / 1000);
6766 }
6767 }
6768 }
6769
probe_intel_uncore_frequency_cluster(void)6770 static void probe_intel_uncore_frequency_cluster(void)
6771 {
6772 int i, uncore_max_id;
6773 char path[256];
6774 char path_base[128];
6775
6776 if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00/current_freq_khz", R_OK))
6777 return;
6778
6779 for (uncore_max_id = 0;; ++uncore_max_id) {
6780
6781 sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", uncore_max_id);
6782
6783 /* uncore## start at 00 and skips no numbers, so stop upon first missing */
6784 if (access(path_base, R_OK)) {
6785 uncore_max_id -= 1;
6786 break;
6787 }
6788 }
6789 for (i = uncore_max_id; i >= 0; --i) {
6790 int k, l;
6791 int package_id, domain_id, cluster_id;
6792 char name_buf[16];
6793
6794 sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", i);
6795
6796 if (access(path_base, R_OK))
6797 err(1, "%s: %s\n", __func__, path_base);
6798
6799 sprintf(path, "%s/package_id", path_base);
6800 package_id = read_sysfs_int(path);
6801
6802 sprintf(path, "%s/domain_id", path_base);
6803 domain_id = read_sysfs_int(path);
6804
6805 sprintf(path, "%s/fabric_cluster_id", path_base);
6806 cluster_id = read_sysfs_int(path);
6807
6808 sprintf(path, "%s/current_freq_khz", path_base);
6809 sprintf(name_buf, "UMHz%d.%d", domain_id, cluster_id);
6810
6811 /*
6812 * Once add_couter() is called, that counter is always read
6813 * and reported -- So it is effectively (enabled & present).
6814 * Only call add_counter() here if legacy BIC_UNCORE_MHZ (UncMHz)
6815 * is (enabled). Since we are in this routine, we
6816 * know we will not probe and set (present) the legacy counter.
6817 *
6818 * This allows "--show/--hide UncMHz" to be effective for
6819 * the clustered MHz counters, as a group.
6820 */
6821 if BIC_IS_ENABLED
6822 (BIC_UNCORE_MHZ)
6823 add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0,
6824 package_id);
6825
6826 if (quiet)
6827 continue;
6828
6829 sprintf(path, "%s/min_freq_khz", path_base);
6830 k = read_sysfs_int(path);
6831 sprintf(path, "%s/max_freq_khz", path_base);
6832 l = read_sysfs_int(path);
6833 fprintf(outf, "Uncore Frequency package%d domain%d cluster%d: %d - %d MHz ", package_id, domain_id,
6834 cluster_id, k / 1000, l / 1000);
6835
6836 sprintf(path, "%s/initial_min_freq_khz", path_base);
6837 k = read_sysfs_int(path);
6838 sprintf(path, "%s/initial_max_freq_khz", path_base);
6839 l = read_sysfs_int(path);
6840 fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000);
6841
6842 sprintf(path, "%s/current_freq_khz", path_base);
6843 k = read_sysfs_int(path);
6844 fprintf(outf, " %d MHz\n", k / 1000);
6845 }
6846 }
6847
probe_intel_uncore_frequency(void)6848 static void probe_intel_uncore_frequency(void)
6849 {
6850 if (!genuine_intel)
6851 return;
6852
6853 if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00", R_OK) == 0)
6854 probe_intel_uncore_frequency_cluster();
6855 else
6856 probe_intel_uncore_frequency_legacy();
6857 }
6858
set_graphics_fp(char * path,int idx)6859 static void set_graphics_fp(char *path, int idx)
6860 {
6861 if (!access(path, R_OK))
6862 gfx_info[idx].fp = fopen_or_die(path, "r");
6863 }
6864
6865 /* Enlarge this if there are /sys/class/drm/card2 ... */
6866 #define GFX_MAX_CARDS 2
6867
probe_graphics(void)6868 static void probe_graphics(void)
6869 {
6870 char path[PATH_MAX];
6871 int i;
6872
6873 /* Xe graphics sysfs knobs */
6874 if (!access("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", R_OK)) {
6875 FILE *fp;
6876 char buf[8];
6877 bool gt0_is_gt;
6878
6879 fp = fopen("/sys/class/drm/card0/device/tile0/gt0/gtidle/name", "r");
6880 if (!fp)
6881 goto next;
6882
6883 if (!fread(buf, sizeof(char), 7, fp)) {
6884 fclose(fp);
6885 goto next;
6886 }
6887 fclose(fp);
6888
6889 if (!strncmp(buf, "gt0-rc", strlen("gt0-rc")))
6890 gt0_is_gt = true;
6891 else if (!strncmp(buf, "gt0-mc", strlen("gt0-mc")))
6892 gt0_is_gt = false;
6893 else
6894 goto next;
6895
6896 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms",
6897 gt0_is_gt ? GFX_rc6 : SAM_mc6);
6898
6899 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq", gt0_is_gt ? GFX_MHz : SAM_MHz);
6900
6901 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq",
6902 gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz);
6903
6904 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms",
6905 gt0_is_gt ? SAM_mc6 : GFX_rc6);
6906
6907 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq", gt0_is_gt ? SAM_MHz : GFX_MHz);
6908
6909 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq",
6910 gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz);
6911
6912 goto end;
6913 }
6914
6915 next:
6916 /* New i915 graphics sysfs knobs */
6917 for (i = 0; i < GFX_MAX_CARDS; i++) {
6918 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rc6_residency_ms", i);
6919 if (!access(path, R_OK))
6920 break;
6921 }
6922
6923 if (i == GFX_MAX_CARDS)
6924 goto legacy_i915;
6925
6926 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rc6_residency_ms", i);
6927 set_graphics_fp(path, GFX_rc6);
6928
6929 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rps_cur_freq_mhz", i);
6930 set_graphics_fp(path, GFX_MHz);
6931
6932 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rps_act_freq_mhz", i);
6933 set_graphics_fp(path, GFX_ACTMHz);
6934
6935 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt1/rc6_residency_ms", i);
6936 set_graphics_fp(path, SAM_mc6);
6937
6938 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt1/rps_cur_freq_mhz", i);
6939 set_graphics_fp(path, SAM_MHz);
6940
6941 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt1/rps_act_freq_mhz", i);
6942 set_graphics_fp(path, SAM_ACTMHz);
6943
6944 goto end;
6945
6946 legacy_i915:
6947 /* Fall back to traditional i915 graphics sysfs knobs */
6948 set_graphics_fp("/sys/class/drm/card0/power/rc6_residency_ms", GFX_rc6);
6949
6950 set_graphics_fp("/sys/class/drm/card0/gt_cur_freq_mhz", GFX_MHz);
6951 if (!gfx_info[GFX_MHz].fp)
6952 set_graphics_fp("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", GFX_MHz);
6953
6954 set_graphics_fp("/sys/class/drm/card0/gt_act_freq_mhz", GFX_ACTMHz);
6955 if (!gfx_info[GFX_ACTMHz].fp)
6956 set_graphics_fp("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", GFX_ACTMHz);
6957
6958 end:
6959 if (gfx_info[GFX_rc6].fp)
6960 BIC_PRESENT(BIC_GFX_rc6);
6961 if (gfx_info[GFX_MHz].fp)
6962 BIC_PRESENT(BIC_GFXMHz);
6963 if (gfx_info[GFX_ACTMHz].fp)
6964 BIC_PRESENT(BIC_GFXACTMHz);
6965 if (gfx_info[SAM_mc6].fp)
6966 BIC_PRESENT(BIC_SAM_mc6);
6967 if (gfx_info[SAM_MHz].fp)
6968 BIC_PRESENT(BIC_SAMMHz);
6969 if (gfx_info[SAM_ACTMHz].fp)
6970 BIC_PRESENT(BIC_SAMACTMHz);
6971 }
6972
dump_sysfs_cstate_config(void)6973 static void dump_sysfs_cstate_config(void)
6974 {
6975 char path[64];
6976 char name_buf[16];
6977 char desc[64];
6978 FILE *input;
6979 int state;
6980 char *sp;
6981
6982 if (access("/sys/devices/system/cpu/cpuidle", R_OK)) {
6983 fprintf(outf, "cpuidle not loaded\n");
6984 return;
6985 }
6986
6987 dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_driver");
6988 dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor");
6989 dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor_ro");
6990
6991 for (state = 0; state < 10; ++state) {
6992
6993 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state);
6994 input = fopen(path, "r");
6995 if (input == NULL)
6996 continue;
6997 if (!fgets(name_buf, sizeof(name_buf), input))
6998 err(1, "%s: failed to read file", path);
6999
7000 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
7001 sp = strchr(name_buf, '-');
7002 if (!sp)
7003 sp = strchrnul(name_buf, '\n');
7004 *sp = '\0';
7005 fclose(input);
7006
7007 remove_underbar(name_buf);
7008
7009 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc", base_cpu, state);
7010 input = fopen(path, "r");
7011 if (input == NULL)
7012 continue;
7013 if (!fgets(desc, sizeof(desc), input))
7014 err(1, "%s: failed to read file", path);
7015
7016 fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc);
7017 fclose(input);
7018 }
7019 }
7020
dump_sysfs_pstate_config(void)7021 static void dump_sysfs_pstate_config(void)
7022 {
7023 char path[64];
7024 char driver_buf[64];
7025 char governor_buf[64];
7026 FILE *input;
7027 int turbo;
7028
7029 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver", base_cpu);
7030 input = fopen(path, "r");
7031 if (input == NULL) {
7032 fprintf(outf, "NSFOD %s\n", path);
7033 return;
7034 }
7035 if (!fgets(driver_buf, sizeof(driver_buf), input))
7036 err(1, "%s: failed to read file", path);
7037 fclose(input);
7038
7039 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", base_cpu);
7040 input = fopen(path, "r");
7041 if (input == NULL) {
7042 fprintf(outf, "NSFOD %s\n", path);
7043 return;
7044 }
7045 if (!fgets(governor_buf, sizeof(governor_buf), input))
7046 err(1, "%s: failed to read file", path);
7047 fclose(input);
7048
7049 fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf);
7050 fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf);
7051
7052 sprintf(path, "/sys/devices/system/cpu/cpufreq/boost");
7053 input = fopen(path, "r");
7054 if (input != NULL) {
7055 if (fscanf(input, "%d", &turbo) != 1)
7056 err(1, "%s: failed to parse number from file", path);
7057 fprintf(outf, "cpufreq boost: %d\n", turbo);
7058 fclose(input);
7059 }
7060
7061 sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo");
7062 input = fopen(path, "r");
7063 if (input != NULL) {
7064 if (fscanf(input, "%d", &turbo) != 1)
7065 err(1, "%s: failed to parse number from file", path);
7066 fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo);
7067 fclose(input);
7068 }
7069 }
7070
7071 /*
7072 * print_epb()
7073 * Decode the ENERGY_PERF_BIAS MSR
7074 */
print_epb(struct thread_data * t,struct core_data * c,struct pkg_data * p)7075 int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
7076 {
7077 char *epb_string;
7078 int cpu, epb;
7079
7080 UNUSED(c);
7081 UNUSED(p);
7082
7083 if (!has_epb)
7084 return 0;
7085
7086 cpu = t->cpu_id;
7087
7088 /* EPB is per-package */
7089 if (!is_cpu_first_thread_in_package(t, c, p))
7090 return 0;
7091
7092 if (cpu_migrate(cpu)) {
7093 fprintf(outf, "print_epb: Could not migrate to CPU %d\n", cpu);
7094 return -1;
7095 }
7096
7097 epb = get_epb(cpu);
7098 if (epb < 0)
7099 return 0;
7100
7101 switch (epb) {
7102 case ENERGY_PERF_BIAS_PERFORMANCE:
7103 epb_string = "performance";
7104 break;
7105 case ENERGY_PERF_BIAS_NORMAL:
7106 epb_string = "balanced";
7107 break;
7108 case ENERGY_PERF_BIAS_POWERSAVE:
7109 epb_string = "powersave";
7110 break;
7111 default:
7112 epb_string = "custom";
7113 break;
7114 }
7115 fprintf(outf, "cpu%d: EPB: %d (%s)\n", cpu, epb, epb_string);
7116
7117 return 0;
7118 }
7119
7120 /*
7121 * print_hwp()
7122 * Decode the MSR_HWP_CAPABILITIES
7123 */
print_hwp(struct thread_data * t,struct core_data * c,struct pkg_data * p)7124 int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
7125 {
7126 unsigned long long msr;
7127 int cpu;
7128
7129 UNUSED(c);
7130 UNUSED(p);
7131
7132 if (no_msr)
7133 return 0;
7134
7135 if (!has_hwp)
7136 return 0;
7137
7138 cpu = t->cpu_id;
7139
7140 /* MSR_HWP_CAPABILITIES is per-package */
7141 if (!is_cpu_first_thread_in_package(t, c, p))
7142 return 0;
7143
7144 if (cpu_migrate(cpu)) {
7145 fprintf(outf, "print_hwp: Could not migrate to CPU %d\n", cpu);
7146 return -1;
7147 }
7148
7149 if (get_msr(cpu, MSR_PM_ENABLE, &msr))
7150 return 0;
7151
7152 fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n", cpu, msr, (msr & (1 << 0)) ? "" : "No-");
7153
7154 /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
7155 if ((msr & (1 << 0)) == 0)
7156 return 0;
7157
7158 if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr))
7159 return 0;
7160
7161 fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
7162 "(high %d guar %d eff %d low %d)\n",
7163 cpu, msr,
7164 (unsigned int)HWP_HIGHEST_PERF(msr),
7165 (unsigned int)HWP_GUARANTEED_PERF(msr),
7166 (unsigned int)HWP_MOSTEFFICIENT_PERF(msr), (unsigned int)HWP_LOWEST_PERF(msr));
7167
7168 if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
7169 return 0;
7170
7171 fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
7172 "(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n",
7173 cpu, msr,
7174 (unsigned int)(((msr) >> 0) & 0xff),
7175 (unsigned int)(((msr) >> 8) & 0xff),
7176 (unsigned int)(((msr) >> 16) & 0xff),
7177 (unsigned int)(((msr) >> 24) & 0xff),
7178 (unsigned int)(((msr) >> 32) & 0xff3), (unsigned int)(((msr) >> 42) & 0x1));
7179
7180 if (has_hwp_pkg) {
7181 if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
7182 return 0;
7183
7184 fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
7185 "(min %d max %d des %d epp 0x%x window 0x%x)\n",
7186 cpu, msr,
7187 (unsigned int)(((msr) >> 0) & 0xff),
7188 (unsigned int)(((msr) >> 8) & 0xff),
7189 (unsigned int)(((msr) >> 16) & 0xff),
7190 (unsigned int)(((msr) >> 24) & 0xff), (unsigned int)(((msr) >> 32) & 0xff3));
7191 }
7192 if (has_hwp_notify) {
7193 if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
7194 return 0;
7195
7196 fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
7197 "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
7198 cpu, msr, ((msr) & 0x1) ? "EN" : "Dis", ((msr) & 0x2) ? "EN" : "Dis");
7199 }
7200 if (get_msr(cpu, MSR_HWP_STATUS, &msr))
7201 return 0;
7202
7203 fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
7204 "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
7205 cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x4) ? "" : "No-");
7206
7207 return 0;
7208 }
7209
7210 /*
7211 * print_perf_limit()
7212 */
print_perf_limit(struct thread_data * t,struct core_data * c,struct pkg_data * p)7213 int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p)
7214 {
7215 unsigned long long msr;
7216 int cpu;
7217
7218 UNUSED(c);
7219 UNUSED(p);
7220
7221 if (no_msr)
7222 return 0;
7223
7224 cpu = t->cpu_id;
7225
7226 /* per-package */
7227 if (!is_cpu_first_thread_in_package(t, c, p))
7228 return 0;
7229
7230 if (cpu_migrate(cpu)) {
7231 fprintf(outf, "print_perf_limit: Could not migrate to CPU %d\n", cpu);
7232 return -1;
7233 }
7234
7235 if (platform->plr_msrs & PLR_CORE) {
7236 get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
7237 fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
7238 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
7239 (msr & 1 << 15) ? "bit15, " : "",
7240 (msr & 1 << 14) ? "bit14, " : "",
7241 (msr & 1 << 13) ? "Transitions, " : "",
7242 (msr & 1 << 12) ? "MultiCoreTurbo, " : "",
7243 (msr & 1 << 11) ? "PkgPwrL2, " : "",
7244 (msr & 1 << 10) ? "PkgPwrL1, " : "",
7245 (msr & 1 << 9) ? "CorePwr, " : "",
7246 (msr & 1 << 8) ? "Amps, " : "",
7247 (msr & 1 << 6) ? "VR-Therm, " : "",
7248 (msr & 1 << 5) ? "Auto-HWP, " : "",
7249 (msr & 1 << 4) ? "Graphics, " : "",
7250 (msr & 1 << 2) ? "bit2, " : "",
7251 (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 0) ? "PROCHOT, " : "");
7252 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
7253 (msr & 1 << 31) ? "bit31, " : "",
7254 (msr & 1 << 30) ? "bit30, " : "",
7255 (msr & 1 << 29) ? "Transitions, " : "",
7256 (msr & 1 << 28) ? "MultiCoreTurbo, " : "",
7257 (msr & 1 << 27) ? "PkgPwrL2, " : "",
7258 (msr & 1 << 26) ? "PkgPwrL1, " : "",
7259 (msr & 1 << 25) ? "CorePwr, " : "",
7260 (msr & 1 << 24) ? "Amps, " : "",
7261 (msr & 1 << 22) ? "VR-Therm, " : "",
7262 (msr & 1 << 21) ? "Auto-HWP, " : "",
7263 (msr & 1 << 20) ? "Graphics, " : "",
7264 (msr & 1 << 18) ? "bit18, " : "",
7265 (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : "");
7266
7267 }
7268 if (platform->plr_msrs & PLR_GFX) {
7269 get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
7270 fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
7271 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
7272 (msr & 1 << 0) ? "PROCHOT, " : "",
7273 (msr & 1 << 1) ? "ThermStatus, " : "",
7274 (msr & 1 << 4) ? "Graphics, " : "",
7275 (msr & 1 << 6) ? "VR-Therm, " : "",
7276 (msr & 1 << 8) ? "Amps, " : "",
7277 (msr & 1 << 9) ? "GFXPwr, " : "",
7278 (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : "");
7279 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
7280 (msr & 1 << 16) ? "PROCHOT, " : "",
7281 (msr & 1 << 17) ? "ThermStatus, " : "",
7282 (msr & 1 << 20) ? "Graphics, " : "",
7283 (msr & 1 << 22) ? "VR-Therm, " : "",
7284 (msr & 1 << 24) ? "Amps, " : "",
7285 (msr & 1 << 25) ? "GFXPwr, " : "",
7286 (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : "");
7287 }
7288 if (platform->plr_msrs & PLR_RING) {
7289 get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
7290 fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
7291 fprintf(outf, " (Active: %s%s%s%s%s%s)",
7292 (msr & 1 << 0) ? "PROCHOT, " : "",
7293 (msr & 1 << 1) ? "ThermStatus, " : "",
7294 (msr & 1 << 6) ? "VR-Therm, " : "",
7295 (msr & 1 << 8) ? "Amps, " : "",
7296 (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : "");
7297 fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
7298 (msr & 1 << 16) ? "PROCHOT, " : "",
7299 (msr & 1 << 17) ? "ThermStatus, " : "",
7300 (msr & 1 << 22) ? "VR-Therm, " : "",
7301 (msr & 1 << 24) ? "Amps, " : "",
7302 (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : "");
7303 }
7304 return 0;
7305 }
7306
7307 #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */
7308 #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */
7309
get_quirk_tdp(void)7310 double get_quirk_tdp(void)
7311 {
7312 if (platform->rapl_quirk_tdp)
7313 return platform->rapl_quirk_tdp;
7314
7315 return 135.0;
7316 }
7317
get_tdp_intel(void)7318 double get_tdp_intel(void)
7319 {
7320 unsigned long long msr;
7321
7322 if (platform->rapl_msrs & RAPL_PKG_POWER_INFO)
7323 if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
7324 return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
7325 return get_quirk_tdp();
7326 }
7327
get_tdp_amd(void)7328 double get_tdp_amd(void)
7329 {
7330 return get_quirk_tdp();
7331 }
7332
rapl_probe_intel(void)7333 void rapl_probe_intel(void)
7334 {
7335 unsigned long long msr;
7336 unsigned int time_unit;
7337 double tdp;
7338 const unsigned long long bic_watt_bits = BIC_SysWatt | BIC_PkgWatt | BIC_CorWatt | BIC_RAMWatt | BIC_GFXWatt;
7339 const unsigned long long bic_joules_bits = BIC_Sys_J | BIC_Pkg_J | BIC_Cor_J | BIC_RAM_J | BIC_GFX_J;
7340
7341 if (rapl_joules)
7342 bic_enabled &= ~bic_watt_bits;
7343 else
7344 bic_enabled &= ~bic_joules_bits;
7345
7346 if (!platform->rapl_msrs || no_msr)
7347 return;
7348
7349 if (!(platform->rapl_msrs & RAPL_PKG_PERF_STATUS))
7350 bic_enabled &= ~BIC_PKG__;
7351 if (!(platform->rapl_msrs & RAPL_DRAM_PERF_STATUS))
7352 bic_enabled &= ~BIC_RAM__;
7353
7354 /* units on package 0, verify later other packages match */
7355 if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
7356 return;
7357
7358 rapl_power_units = 1.0 / (1 << (msr & 0xF));
7359 if (platform->has_rapl_divisor)
7360 rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
7361 else
7362 rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
7363
7364 if (platform->has_fixed_rapl_unit)
7365 rapl_dram_energy_units = (15.3 / 1000000);
7366 else
7367 rapl_dram_energy_units = rapl_energy_units;
7368
7369 if (platform->has_fixed_rapl_psys_unit)
7370 rapl_psys_energy_units = 1.0;
7371 else
7372 rapl_psys_energy_units = rapl_energy_units;
7373
7374 time_unit = msr >> 16 & 0xF;
7375 if (time_unit == 0)
7376 time_unit = 0xA;
7377
7378 rapl_time_units = 1.0 / (1 << (time_unit));
7379
7380 tdp = get_tdp_intel();
7381
7382 rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
7383 if (!quiet)
7384 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
7385 }
7386
rapl_probe_amd(void)7387 void rapl_probe_amd(void)
7388 {
7389 unsigned long long msr;
7390 double tdp;
7391 const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt;
7392 const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J;
7393
7394 if (rapl_joules)
7395 bic_enabled &= ~bic_watt_bits;
7396 else
7397 bic_enabled &= ~bic_joules_bits;
7398
7399 if (!platform->rapl_msrs || no_msr)
7400 return;
7401
7402 if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr))
7403 return;
7404
7405 rapl_time_units = ldexp(1.0, -(msr >> 16 & 0xf));
7406 rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f));
7407 rapl_power_units = ldexp(1.0, -(msr & 0xf));
7408
7409 tdp = get_tdp_amd();
7410
7411 rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
7412 if (!quiet)
7413 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
7414 }
7415
print_power_limit_msr(int cpu,unsigned long long msr,char * label)7416 void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
7417 {
7418 fprintf(outf, "cpu%d: %s: %sabled (%0.3f Watts, %f sec, clamp %sabled)\n",
7419 cpu, label,
7420 ((msr >> 15) & 1) ? "EN" : "DIS",
7421 ((msr >> 0) & 0x7FFF) * rapl_power_units,
7422 (1.0 + (((msr >> 22) & 0x3) / 4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
7423 (((msr >> 16) & 1) ? "EN" : "DIS"));
7424
7425 return;
7426 }
7427
fread_int(char * path,int * val)7428 static int fread_int(char *path, int *val)
7429 {
7430 FILE *filep;
7431 int ret;
7432
7433 filep = fopen(path, "r");
7434 if (!filep)
7435 return -1;
7436
7437 ret = fscanf(filep, "%d", val);
7438 fclose(filep);
7439 return ret;
7440 }
7441
fread_ull(char * path,unsigned long long * val)7442 static int fread_ull(char *path, unsigned long long *val)
7443 {
7444 FILE *filep;
7445 int ret;
7446
7447 filep = fopen(path, "r");
7448 if (!filep)
7449 return -1;
7450
7451 ret = fscanf(filep, "%llu", val);
7452 fclose(filep);
7453 return ret;
7454 }
7455
fread_str(char * path,char * buf,int size)7456 static int fread_str(char *path, char *buf, int size)
7457 {
7458 FILE *filep;
7459 int ret;
7460 char *cp;
7461
7462 filep = fopen(path, "r");
7463 if (!filep)
7464 return -1;
7465
7466 ret = fread(buf, 1, size, filep);
7467 fclose(filep);
7468
7469 /* replace '\n' with '\0' */
7470 cp = strchr(buf, '\n');
7471 if (cp != NULL)
7472 *cp = '\0';
7473
7474 return ret;
7475 }
7476
7477 #define PATH_RAPL_SYSFS "/sys/class/powercap"
7478
dump_one_domain(char * domain_path)7479 static int dump_one_domain(char *domain_path)
7480 {
7481 char path[PATH_MAX];
7482 char str[PATH_MAX];
7483 unsigned long long val;
7484 int constraint;
7485 int enable;
7486 int ret;
7487
7488 snprintf(path, PATH_MAX, "%s/name", domain_path);
7489 ret = fread_str(path, str, PATH_MAX);
7490 if (ret <= 0)
7491 return -1;
7492
7493 fprintf(outf, "%s: %s", domain_path + strlen(PATH_RAPL_SYSFS) + 1, str);
7494
7495 snprintf(path, PATH_MAX, "%s/enabled", domain_path);
7496 ret = fread_int(path, &enable);
7497 if (ret <= 0)
7498 return -1;
7499
7500 if (!enable) {
7501 fputs(" disabled\n", outf);
7502 return 0;
7503 }
7504
7505 for (constraint = 0;; constraint++) {
7506 snprintf(path, PATH_MAX, "%s/constraint_%d_time_window_us", domain_path, constraint);
7507 ret = fread_ull(path, &val);
7508 if (ret <= 0)
7509 break;
7510
7511 if (val > 1000000)
7512 fprintf(outf, " %0.1fs", (double)val / 1000000);
7513 else if (val > 1000)
7514 fprintf(outf, " %0.1fms", (double)val / 1000);
7515 else
7516 fprintf(outf, " %0.1fus", (double)val);
7517
7518 snprintf(path, PATH_MAX, "%s/constraint_%d_power_limit_uw", domain_path, constraint);
7519 ret = fread_ull(path, &val);
7520 if (ret > 0 && val)
7521 fprintf(outf, ":%lluW", val / 1000000);
7522
7523 snprintf(path, PATH_MAX, "%s/constraint_%d_max_power_uw", domain_path, constraint);
7524 ret = fread_ull(path, &val);
7525 if (ret > 0 && val)
7526 fprintf(outf, ",max:%lluW", val / 1000000);
7527 }
7528 fputc('\n', outf);
7529
7530 return 0;
7531 }
7532
print_rapl_sysfs(void)7533 static int print_rapl_sysfs(void)
7534 {
7535 DIR *dir, *cdir;
7536 struct dirent *entry, *centry;
7537 char path[PATH_MAX];
7538 char str[PATH_MAX];
7539
7540 if ((dir = opendir(PATH_RAPL_SYSFS)) == NULL) {
7541 warn("open %s failed", PATH_RAPL_SYSFS);
7542 return 1;
7543 }
7544
7545 while ((entry = readdir(dir)) != NULL) {
7546 if (strlen(entry->d_name) > 100)
7547 continue;
7548
7549 if (strncmp(entry->d_name, "intel-rapl", strlen("intel-rapl")))
7550 continue;
7551
7552 snprintf(path, PATH_MAX, "%s/%s/name", PATH_RAPL_SYSFS, entry->d_name);
7553
7554 /* Parse top level domains first, including package and psys */
7555 fread_str(path, str, PATH_MAX);
7556 if (strncmp(str, "package", strlen("package")) && strncmp(str, "psys", strlen("psys")))
7557 continue;
7558
7559 snprintf(path, PATH_MAX, "%s/%s", PATH_RAPL_SYSFS, entry->d_name);
7560 if ((cdir = opendir(path)) == NULL) {
7561 perror("opendir() error");
7562 return 1;
7563 }
7564
7565 dump_one_domain(path);
7566
7567 while ((centry = readdir(cdir)) != NULL) {
7568 if (strncmp(centry->d_name, "intel-rapl", strlen("intel-rapl")))
7569 continue;
7570 snprintf(path, PATH_MAX, "%s/%s/%s", PATH_RAPL_SYSFS, entry->d_name, centry->d_name);
7571 dump_one_domain(path);
7572 }
7573 closedir(cdir);
7574 }
7575
7576 closedir(dir);
7577 return 0;
7578 }
7579
print_rapl(struct thread_data * t,struct core_data * c,struct pkg_data * p)7580 int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
7581 {
7582 unsigned long long msr;
7583 const char *msr_name;
7584 int cpu;
7585
7586 UNUSED(c);
7587 UNUSED(p);
7588
7589 if (!platform->rapl_msrs)
7590 return 0;
7591
7592 /* RAPL counters are per package, so print only for 1st thread/package */
7593 if (!is_cpu_first_thread_in_package(t, c, p))
7594 return 0;
7595
7596 cpu = t->cpu_id;
7597 if (cpu_migrate(cpu)) {
7598 fprintf(outf, "print_rapl: Could not migrate to CPU %d\n", cpu);
7599 return -1;
7600 }
7601
7602 if (platform->rapl_msrs & RAPL_AMD_F17H) {
7603 msr_name = "MSR_RAPL_PWR_UNIT";
7604 if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr))
7605 return -1;
7606 } else {
7607 msr_name = "MSR_RAPL_POWER_UNIT";
7608 if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
7609 return -1;
7610 }
7611
7612 fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr,
7613 rapl_power_units, rapl_energy_units, rapl_time_units);
7614
7615 if (platform->rapl_msrs & RAPL_PKG_POWER_INFO) {
7616
7617 if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
7618 return -5;
7619
7620 fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
7621 cpu, msr,
7622 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
7623 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
7624 ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
7625 ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
7626
7627 }
7628 if (platform->rapl_msrs & RAPL_PKG) {
7629
7630 if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
7631 return -9;
7632
7633 fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
7634 cpu, msr, (msr >> 63) & 1 ? "" : "UN");
7635
7636 print_power_limit_msr(cpu, msr, "PKG Limit #1");
7637 fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%0.3f Watts, %f* sec, clamp %sabled)\n",
7638 cpu,
7639 ((msr >> 47) & 1) ? "EN" : "DIS",
7640 ((msr >> 32) & 0x7FFF) * rapl_power_units,
7641 (1.0 + (((msr >> 54) & 0x3) / 4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
7642 ((msr >> 48) & 1) ? "EN" : "DIS");
7643
7644 if (get_msr(cpu, MSR_VR_CURRENT_CONFIG, &msr))
7645 return -9;
7646
7647 fprintf(outf, "cpu%d: MSR_VR_CURRENT_CONFIG: 0x%08llx\n", cpu, msr);
7648 fprintf(outf, "cpu%d: PKG Limit #4: %f Watts (%slocked)\n",
7649 cpu, ((msr >> 0) & 0x1FFF) * rapl_power_units, (msr >> 31) & 1 ? "" : "UN");
7650 }
7651
7652 if (platform->rapl_msrs & RAPL_DRAM_POWER_INFO) {
7653 if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
7654 return -6;
7655
7656 fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
7657 cpu, msr,
7658 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
7659 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
7660 ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
7661 ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
7662 }
7663 if (platform->rapl_msrs & RAPL_DRAM) {
7664 if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
7665 return -9;
7666 fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
7667 cpu, msr, (msr >> 31) & 1 ? "" : "UN");
7668
7669 print_power_limit_msr(cpu, msr, "DRAM Limit");
7670 }
7671 if (platform->rapl_msrs & RAPL_CORE_POLICY) {
7672 if (get_msr(cpu, MSR_PP0_POLICY, &msr))
7673 return -7;
7674
7675 fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
7676 }
7677 if (platform->rapl_msrs & RAPL_CORE_POWER_LIMIT) {
7678 if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
7679 return -9;
7680 fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
7681 cpu, msr, (msr >> 31) & 1 ? "" : "UN");
7682 print_power_limit_msr(cpu, msr, "Cores Limit");
7683 }
7684 if (platform->rapl_msrs & RAPL_GFX) {
7685 if (get_msr(cpu, MSR_PP1_POLICY, &msr))
7686 return -8;
7687
7688 fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
7689
7690 if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
7691 return -9;
7692 fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
7693 cpu, msr, (msr >> 31) & 1 ? "" : "UN");
7694 print_power_limit_msr(cpu, msr, "GFX Limit");
7695 }
7696 return 0;
7697 }
7698
7699 /*
7700 * probe_rapl()
7701 *
7702 * sets rapl_power_units, rapl_energy_units, rapl_time_units
7703 */
probe_rapl(void)7704 void probe_rapl(void)
7705 {
7706 if (genuine_intel)
7707 rapl_probe_intel();
7708 if (authentic_amd || hygon_genuine)
7709 rapl_probe_amd();
7710
7711 if (quiet)
7712 return;
7713
7714 print_rapl_sysfs();
7715
7716 if (!platform->rapl_msrs || no_msr)
7717 return;
7718
7719 for_all_cpus(print_rapl, ODD_COUNTERS);
7720 }
7721
7722 /*
7723 * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
7724 * the Thermal Control Circuit (TCC) activates.
7725 * This is usually equal to tjMax.
7726 *
7727 * Older processors do not have this MSR, so there we guess,
7728 * but also allow cmdline over-ride with -T.
7729 *
7730 * Several MSR temperature values are in units of degrees-C
7731 * below this value, including the Digital Thermal Sensor (DTS),
7732 * Package Thermal Management Sensor (PTM), and thermal event thresholds.
7733 */
set_temperature_target(struct thread_data * t,struct core_data * c,struct pkg_data * p)7734 int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
7735 {
7736 unsigned long long msr;
7737 unsigned int tcc_default, tcc_offset;
7738 int cpu;
7739
7740 UNUSED(c);
7741 UNUSED(p);
7742
7743 /* tj_max is used only for dts or ptm */
7744 if (!(do_dts || do_ptm))
7745 return 0;
7746
7747 /* this is a per-package concept */
7748 if (!is_cpu_first_thread_in_package(t, c, p))
7749 return 0;
7750
7751 cpu = t->cpu_id;
7752 if (cpu_migrate(cpu)) {
7753 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
7754 return -1;
7755 }
7756
7757 if (tj_max_override != 0) {
7758 tj_max = tj_max_override;
7759 fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n", cpu, tj_max);
7760 return 0;
7761 }
7762
7763 /* Temperature Target MSR is Nehalem and newer only */
7764 if (!platform->has_nhm_msrs || no_msr)
7765 goto guess;
7766
7767 if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
7768 goto guess;
7769
7770 tcc_default = (msr >> 16) & 0xFF;
7771
7772 if (!quiet) {
7773 int bits = platform->tcc_offset_bits;
7774 unsigned long long enabled = 0;
7775
7776 if (bits && !get_msr(base_cpu, MSR_PLATFORM_INFO, &enabled))
7777 enabled = (enabled >> 30) & 1;
7778
7779 if (bits && enabled) {
7780 tcc_offset = (msr >> 24) & GENMASK(bits - 1, 0);
7781 fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
7782 cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset);
7783 } else {
7784 fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, tcc_default);
7785 }
7786 }
7787
7788 if (!tcc_default)
7789 goto guess;
7790
7791 tj_max = tcc_default;
7792
7793 return 0;
7794
7795 guess:
7796 tj_max = TJMAX_DEFAULT;
7797 fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", cpu, tj_max);
7798
7799 return 0;
7800 }
7801
print_thermal(struct thread_data * t,struct core_data * c,struct pkg_data * p)7802 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
7803 {
7804 unsigned long long msr;
7805 unsigned int dts, dts2;
7806 int cpu;
7807
7808 UNUSED(c);
7809 UNUSED(p);
7810
7811 if (no_msr)
7812 return 0;
7813
7814 if (!(do_dts || do_ptm))
7815 return 0;
7816
7817 cpu = t->cpu_id;
7818
7819 /* DTS is per-core, no need to print for each thread */
7820 if (!is_cpu_first_thread_in_core(t, c, p))
7821 return 0;
7822
7823 if (cpu_migrate(cpu)) {
7824 fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu);
7825 return -1;
7826 }
7827
7828 if (do_ptm && is_cpu_first_core_in_package(t, c, p)) {
7829 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
7830 return 0;
7831
7832 dts = (msr >> 16) & 0x7F;
7833 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts);
7834
7835 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
7836 return 0;
7837
7838 dts = (msr >> 16) & 0x7F;
7839 dts2 = (msr >> 8) & 0x7F;
7840 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
7841 cpu, msr, tj_max - dts, tj_max - dts2);
7842 }
7843
7844 if (do_dts && debug) {
7845 unsigned int resolution;
7846
7847 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
7848 return 0;
7849
7850 dts = (msr >> 16) & 0x7F;
7851 resolution = (msr >> 27) & 0xF;
7852 fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
7853 cpu, msr, tj_max - dts, resolution);
7854
7855 if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
7856 return 0;
7857
7858 dts = (msr >> 16) & 0x7F;
7859 dts2 = (msr >> 8) & 0x7F;
7860 fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
7861 cpu, msr, tj_max - dts, tj_max - dts2);
7862 }
7863
7864 return 0;
7865 }
7866
probe_thermal(void)7867 void probe_thermal(void)
7868 {
7869 if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK))
7870 BIC_PRESENT(BIC_CORE_THROT_CNT);
7871 else
7872 BIC_NOT_PRESENT(BIC_CORE_THROT_CNT);
7873
7874 for_all_cpus(set_temperature_target, ODD_COUNTERS);
7875
7876 if (quiet)
7877 return;
7878
7879 for_all_cpus(print_thermal, ODD_COUNTERS);
7880 }
7881
get_cpu_type(struct thread_data * t,struct core_data * c,struct pkg_data * p)7882 int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p)
7883 {
7884 unsigned int eax, ebx, ecx, edx;
7885
7886 UNUSED(c);
7887 UNUSED(p);
7888
7889 if (!genuine_intel)
7890 return 0;
7891
7892 if (cpu_migrate(t->cpu_id)) {
7893 fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id);
7894 return -1;
7895 }
7896
7897 if (max_level < 0x1a)
7898 return 0;
7899
7900 __cpuid(0x1a, eax, ebx, ecx, edx);
7901 eax = (eax >> 24) & 0xFF;
7902 if (eax == 0x20)
7903 t->is_atom = true;
7904 return 0;
7905 }
7906
decode_feature_control_msr(void)7907 void decode_feature_control_msr(void)
7908 {
7909 unsigned long long msr;
7910
7911 if (no_msr)
7912 return;
7913
7914 if (!get_msr(base_cpu, MSR_IA32_FEAT_CTL, &msr))
7915 fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
7916 base_cpu, msr, msr & FEAT_CTL_LOCKED ? "" : "UN-", msr & (1 << 18) ? "SGX" : "");
7917 }
7918
decode_misc_enable_msr(void)7919 void decode_misc_enable_msr(void)
7920 {
7921 unsigned long long msr;
7922
7923 if (no_msr)
7924 return;
7925
7926 if (!genuine_intel)
7927 return;
7928
7929 if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
7930 fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n",
7931 base_cpu, msr,
7932 msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-",
7933 msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-",
7934 msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-",
7935 msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "",
7936 msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
7937 }
7938
decode_misc_feature_control(void)7939 void decode_misc_feature_control(void)
7940 {
7941 unsigned long long msr;
7942
7943 if (no_msr)
7944 return;
7945
7946 if (!platform->has_msr_misc_feature_control)
7947 return;
7948
7949 if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr))
7950 fprintf(outf,
7951 "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n",
7952 base_cpu, msr, msr & (0 << 0) ? "No-" : "", msr & (1 << 0) ? "No-" : "",
7953 msr & (2 << 0) ? "No-" : "", msr & (3 << 0) ? "No-" : "");
7954 }
7955
7956 /*
7957 * Decode MSR_MISC_PWR_MGMT
7958 *
7959 * Decode the bits according to the Nehalem documentation
7960 * bit[0] seems to continue to have same meaning going forward
7961 * bit[1] less so...
7962 */
decode_misc_pwr_mgmt_msr(void)7963 void decode_misc_pwr_mgmt_msr(void)
7964 {
7965 unsigned long long msr;
7966
7967 if (no_msr)
7968 return;
7969
7970 if (!platform->has_msr_misc_pwr_mgmt)
7971 return;
7972
7973 if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
7974 fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
7975 base_cpu, msr,
7976 msr & (1 << 0) ? "DIS" : "EN", msr & (1 << 1) ? "EN" : "DIS", msr & (1 << 8) ? "EN" : "DIS");
7977 }
7978
7979 /*
7980 * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG
7981 *
7982 * This MSRs are present on Silvermont processors,
7983 * Intel Atom processor E3000 series (Baytrail), and friends.
7984 */
decode_c6_demotion_policy_msr(void)7985 void decode_c6_demotion_policy_msr(void)
7986 {
7987 unsigned long long msr;
7988
7989 if (no_msr)
7990 return;
7991
7992 if (!platform->has_msr_c6_demotion_policy_config)
7993 return;
7994
7995 if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr))
7996 fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n",
7997 base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
7998
7999 if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr))
8000 fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n",
8001 base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
8002 }
8003
print_dev_latency(void)8004 void print_dev_latency(void)
8005 {
8006 char *path = "/dev/cpu_dma_latency";
8007 int fd;
8008 int value;
8009 int retval;
8010
8011 fd = open(path, O_RDONLY);
8012 if (fd < 0) {
8013 if (debug)
8014 warnx("Read %s failed", path);
8015 return;
8016 }
8017
8018 retval = read(fd, (void *)&value, sizeof(int));
8019 if (retval != sizeof(int)) {
8020 warn("read failed %s", path);
8021 close(fd);
8022 return;
8023 }
8024 fprintf(outf, "/dev/cpu_dma_latency: %d usec (%s)\n", value, value == 2000000000 ? "default" : "constrained");
8025
8026 close(fd);
8027 }
8028
has_instr_count_access(void)8029 static int has_instr_count_access(void)
8030 {
8031 int fd;
8032 int has_access;
8033
8034 if (no_perf)
8035 return 0;
8036
8037 fd = open_perf_counter(base_cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0);
8038 has_access = fd != -1;
8039
8040 if (fd != -1)
8041 close(fd);
8042
8043 if (!has_access)
8044 warnx("Failed to access %s. Some of the counters may not be available\n"
8045 "\tRun as root to enable them or use %s to disable the access explicitly",
8046 "instructions retired perf counter", "--no-perf");
8047
8048 return has_access;
8049 }
8050
add_rapl_perf_counter(int cpu,struct rapl_counter_info_t * rci,const struct rapl_counter_arch_info * cai,double * scale_,enum rapl_unit * unit_)8051 int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai,
8052 double *scale_, enum rapl_unit *unit_)
8053 {
8054 int ret = -1;
8055
8056 if (no_perf)
8057 return -1;
8058
8059 if (!cai->perf_name)
8060 return -1;
8061
8062 const double scale = read_perf_scale(cai->perf_subsys, cai->perf_name);
8063
8064 if (scale == 0.0)
8065 goto end;
8066
8067 const enum rapl_unit unit = read_perf_rapl_unit(cai->perf_subsys, cai->perf_name);
8068
8069 if (unit == RAPL_UNIT_INVALID)
8070 goto end;
8071
8072 const unsigned int rapl_type = read_perf_type(cai->perf_subsys);
8073 const unsigned int rapl_energy_pkg_config = read_perf_config(cai->perf_subsys, cai->perf_name);
8074
8075 ret = open_perf_counter(cpu, rapl_type, rapl_energy_pkg_config, rci->fd_perf, PERF_FORMAT_GROUP);
8076 if (ret == -1)
8077 goto end;
8078
8079 /* If it's the first counter opened, make it a group descriptor */
8080 if (rci->fd_perf == -1)
8081 rci->fd_perf = ret;
8082
8083 *scale_ = scale;
8084 *unit_ = unit;
8085
8086 end:
8087 if (debug >= 2)
8088 fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu);
8089
8090 return ret;
8091 }
8092
8093 /*
8094 * Linux-perf manages the HW instructions-retired counter
8095 * by enabling when requested, and hiding rollover
8096 */
linux_perf_init(void)8097 void linux_perf_init(void)
8098 {
8099 if (access("/proc/sys/kernel/perf_event_paranoid", F_OK))
8100 return;
8101
8102 if (BIC_IS_ENABLED(BIC_IPC) && has_aperf) {
8103 fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
8104 if (fd_instr_count_percpu == NULL)
8105 err(-1, "calloc fd_instr_count_percpu");
8106 }
8107 }
8108
rapl_perf_init(void)8109 void rapl_perf_init(void)
8110 {
8111 const unsigned int num_domains = get_rapl_num_domains();
8112 bool *domain_visited = calloc(num_domains, sizeof(bool));
8113
8114 rapl_counter_info_perdomain = calloc(num_domains, sizeof(*rapl_counter_info_perdomain));
8115 if (rapl_counter_info_perdomain == NULL)
8116 err(-1, "calloc rapl_counter_info_percpu");
8117 rapl_counter_info_perdomain_size = num_domains;
8118
8119 /*
8120 * Initialize rapl_counter_info_percpu
8121 */
8122 for (unsigned int domain_id = 0; domain_id < num_domains; ++domain_id) {
8123 struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[domain_id];
8124
8125 rci->fd_perf = -1;
8126 for (size_t i = 0; i < NUM_RAPL_COUNTERS; ++i) {
8127 rci->data[i] = 0;
8128 rci->source[i] = COUNTER_SOURCE_NONE;
8129 }
8130 }
8131
8132 /*
8133 * Open/probe the counters
8134 * If can't get it via perf, fallback to MSR
8135 */
8136 for (size_t i = 0; i < ARRAY_SIZE(rapl_counter_arch_infos); ++i) {
8137
8138 const struct rapl_counter_arch_info *const cai = &rapl_counter_arch_infos[i];
8139 bool has_counter = 0;
8140 double scale;
8141 enum rapl_unit unit;
8142 unsigned int next_domain;
8143
8144 if (!BIC_IS_ENABLED(cai->bic))
8145 continue;
8146
8147 memset(domain_visited, 0, num_domains * sizeof(*domain_visited));
8148
8149 for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) {
8150
8151 if (cpu_is_not_allowed(cpu))
8152 continue;
8153
8154 /* Skip already seen and handled RAPL domains */
8155 next_domain = get_rapl_domain_id(cpu);
8156
8157 assert(next_domain < num_domains);
8158
8159 if (domain_visited[next_domain])
8160 continue;
8161
8162 domain_visited[next_domain] = 1;
8163
8164 if ((cai->flags & RAPL_COUNTER_FLAG_PLATFORM_COUNTER) && (cpu != base_cpu))
8165 continue;
8166
8167 struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[next_domain];
8168
8169 /*
8170 * rapl_counter_arch_infos[] can have multiple entries describing the same
8171 * counter, due to the difference from different platforms/Vendors.
8172 * E.g. rapl_counter_arch_infos[0] and rapl_counter_arch_infos[1] share the
8173 * same perf_subsys and perf_name, but with different MSR address.
8174 * rapl_counter_arch_infos[0] is for Intel and rapl_counter_arch_infos[1]
8175 * is for AMD.
8176 * In this case, it is possible that multiple rapl_counter_arch_infos[]
8177 * entries are probed just because their perf/msr is duplicate and valid.
8178 *
8179 * Thus need a check to avoid re-probe the same counters.
8180 */
8181 if (rci->source[cai->rci_index] != COUNTER_SOURCE_NONE)
8182 break;
8183
8184 /* Use perf API for this counter */
8185 if (add_rapl_perf_counter(cpu, rci, cai, &scale, &unit) != -1) {
8186 rci->source[cai->rci_index] = COUNTER_SOURCE_PERF;
8187 rci->scale[cai->rci_index] = scale * cai->compat_scale;
8188 rci->unit[cai->rci_index] = unit;
8189 rci->flags[cai->rci_index] = cai->flags;
8190
8191 /* Use MSR for this counter */
8192 } else if (add_rapl_msr_counter(cpu, cai) >= 0) {
8193 rci->source[cai->rci_index] = COUNTER_SOURCE_MSR;
8194 rci->msr[cai->rci_index] = cai->msr;
8195 rci->msr_mask[cai->rci_index] = cai->msr_mask;
8196 rci->msr_shift[cai->rci_index] = cai->msr_shift;
8197 rci->unit[cai->rci_index] = RAPL_UNIT_JOULES;
8198 rci->scale[cai->rci_index] = *cai->platform_rapl_msr_scale * cai->compat_scale;
8199 rci->flags[cai->rci_index] = cai->flags;
8200 }
8201
8202 if (rci->source[cai->rci_index] != COUNTER_SOURCE_NONE)
8203 has_counter = 1;
8204 }
8205
8206 /* If any CPU has access to the counter, make it present */
8207 if (has_counter)
8208 BIC_PRESENT(cai->bic);
8209 }
8210
8211 free(domain_visited);
8212 }
8213
8214 /* Assumes msr_counter_info is populated */
has_amperf_access(void)8215 static int has_amperf_access(void)
8216 {
8217 return msr_counter_arch_infos[MSR_ARCH_INFO_APERF_INDEX].present &&
8218 msr_counter_arch_infos[MSR_ARCH_INFO_MPERF_INDEX].present;
8219 }
8220
get_cstate_perf_group_fd(struct cstate_counter_info_t * cci,const char * group_name)8221 int *get_cstate_perf_group_fd(struct cstate_counter_info_t *cci, const char *group_name)
8222 {
8223 if (strcmp(group_name, "cstate_core") == 0)
8224 return &cci->fd_perf_core;
8225
8226 if (strcmp(group_name, "cstate_pkg") == 0)
8227 return &cci->fd_perf_pkg;
8228
8229 return NULL;
8230 }
8231
add_cstate_perf_counter(int cpu,struct cstate_counter_info_t * cci,const struct cstate_counter_arch_info * cai)8232 int add_cstate_perf_counter(int cpu, struct cstate_counter_info_t *cci, const struct cstate_counter_arch_info *cai)
8233 {
8234 int ret = -1;
8235
8236 if (no_perf)
8237 return -1;
8238
8239 if (!cai->perf_name)
8240 return -1;
8241
8242 int *pfd_group = get_cstate_perf_group_fd(cci, cai->perf_subsys);
8243
8244 if (pfd_group == NULL)
8245 goto end;
8246
8247 const unsigned int type = read_perf_type(cai->perf_subsys);
8248 const unsigned int config = read_perf_config(cai->perf_subsys, cai->perf_name);
8249
8250 ret = open_perf_counter(cpu, type, config, *pfd_group, PERF_FORMAT_GROUP);
8251
8252 if (ret == -1)
8253 goto end;
8254
8255 /* If it's the first counter opened, make it a group descriptor */
8256 if (*pfd_group == -1)
8257 *pfd_group = ret;
8258
8259 end:
8260 if (debug >= 2)
8261 fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu);
8262
8263 return ret;
8264 }
8265
add_msr_perf_counter(int cpu,struct msr_counter_info_t * cci,const struct msr_counter_arch_info * cai)8266 int add_msr_perf_counter(int cpu, struct msr_counter_info_t *cci, const struct msr_counter_arch_info *cai)
8267 {
8268 int ret = -1;
8269
8270 if (no_perf)
8271 return -1;
8272
8273 if (!cai->perf_name)
8274 return -1;
8275
8276 const unsigned int type = read_perf_type(cai->perf_subsys);
8277 const unsigned int config = read_perf_config(cai->perf_subsys, cai->perf_name);
8278
8279 ret = open_perf_counter(cpu, type, config, cci->fd_perf, PERF_FORMAT_GROUP);
8280
8281 if (ret == -1)
8282 goto end;
8283
8284 /* If it's the first counter opened, make it a group descriptor */
8285 if (cci->fd_perf == -1)
8286 cci->fd_perf = ret;
8287
8288 end:
8289 if (debug)
8290 fprintf(stderr, "%s: %s/%s: %d (cpu: %d)\n", __func__, cai->perf_subsys, cai->perf_name, ret, cpu);
8291
8292 return ret;
8293 }
8294
msr_perf_init_(void)8295 void msr_perf_init_(void)
8296 {
8297 const int mci_num = topo.max_cpu_num + 1;
8298
8299 msr_counter_info = calloc(mci_num, sizeof(*msr_counter_info));
8300 if (!msr_counter_info)
8301 err(1, "calloc msr_counter_info");
8302 msr_counter_info_size = mci_num;
8303
8304 for (int cpu = 0; cpu < mci_num; ++cpu)
8305 msr_counter_info[cpu].fd_perf = -1;
8306
8307 for (int cidx = 0; cidx < NUM_MSR_COUNTERS; ++cidx) {
8308
8309 struct msr_counter_arch_info *cai = &msr_counter_arch_infos[cidx];
8310
8311 cai->present = false;
8312
8313 for (int cpu = 0; cpu < mci_num; ++cpu) {
8314
8315 struct msr_counter_info_t *const cci = &msr_counter_info[cpu];
8316
8317 if (cpu_is_not_allowed(cpu))
8318 continue;
8319
8320 if (cai->needed) {
8321 /* Use perf API for this counter */
8322 if (add_msr_perf_counter(cpu, cci, cai) != -1) {
8323 cci->source[cai->rci_index] = COUNTER_SOURCE_PERF;
8324 cai->present = true;
8325
8326 /* User MSR for this counter */
8327 } else if (add_msr_counter(cpu, cai->msr) >= 0) {
8328 cci->source[cai->rci_index] = COUNTER_SOURCE_MSR;
8329 cci->msr[cai->rci_index] = cai->msr;
8330 cci->msr_mask[cai->rci_index] = cai->msr_mask;
8331 cai->present = true;
8332 }
8333 }
8334 }
8335 }
8336 }
8337
8338 /* Initialize data for reading perf counters from the MSR group. */
msr_perf_init(void)8339 void msr_perf_init(void)
8340 {
8341 bool need_amperf = false, need_smi = false;
8342 const bool need_soft_c1 = (!platform->has_msr_core_c1_res) && (platform->supported_cstates & CC1);
8343
8344 need_amperf = BIC_IS_ENABLED(BIC_Avg_MHz) || BIC_IS_ENABLED(BIC_Busy) || BIC_IS_ENABLED(BIC_Bzy_MHz)
8345 || BIC_IS_ENABLED(BIC_IPC) || need_soft_c1;
8346
8347 if (BIC_IS_ENABLED(BIC_SMI))
8348 need_smi = true;
8349
8350 /* Enable needed counters */
8351 msr_counter_arch_infos[MSR_ARCH_INFO_APERF_INDEX].needed = need_amperf;
8352 msr_counter_arch_infos[MSR_ARCH_INFO_MPERF_INDEX].needed = need_amperf;
8353 msr_counter_arch_infos[MSR_ARCH_INFO_SMI_INDEX].needed = need_smi;
8354
8355 msr_perf_init_();
8356
8357 const bool has_amperf = has_amperf_access();
8358 const bool has_smi = msr_counter_arch_infos[MSR_ARCH_INFO_SMI_INDEX].present;
8359
8360 has_aperf_access = has_amperf;
8361
8362 if (has_amperf) {
8363 BIC_PRESENT(BIC_Avg_MHz);
8364 BIC_PRESENT(BIC_Busy);
8365 BIC_PRESENT(BIC_Bzy_MHz);
8366 BIC_PRESENT(BIC_SMI);
8367 }
8368
8369 if (has_smi)
8370 BIC_PRESENT(BIC_SMI);
8371 }
8372
cstate_perf_init_(bool soft_c1)8373 void cstate_perf_init_(bool soft_c1)
8374 {
8375 bool has_counter;
8376 bool *cores_visited = NULL, *pkg_visited = NULL;
8377 const int cores_visited_elems = topo.max_core_id + 1;
8378 const int pkg_visited_elems = topo.max_package_id + 1;
8379 const int cci_num = topo.max_cpu_num + 1;
8380
8381 ccstate_counter_info = calloc(cci_num, sizeof(*ccstate_counter_info));
8382 if (!ccstate_counter_info)
8383 err(1, "calloc ccstate_counter_arch_info");
8384 ccstate_counter_info_size = cci_num;
8385
8386 cores_visited = calloc(cores_visited_elems, sizeof(*cores_visited));
8387 if (!cores_visited)
8388 err(1, "calloc cores_visited");
8389
8390 pkg_visited = calloc(pkg_visited_elems, sizeof(*pkg_visited));
8391 if (!pkg_visited)
8392 err(1, "calloc pkg_visited");
8393
8394 /* Initialize cstate_counter_info_percpu */
8395 for (int cpu = 0; cpu < cci_num; ++cpu) {
8396 ccstate_counter_info[cpu].fd_perf_core = -1;
8397 ccstate_counter_info[cpu].fd_perf_pkg = -1;
8398 }
8399
8400 for (int cidx = 0; cidx < NUM_CSTATE_COUNTERS; ++cidx) {
8401 has_counter = false;
8402 memset(cores_visited, 0, cores_visited_elems * sizeof(*cores_visited));
8403 memset(pkg_visited, 0, pkg_visited_elems * sizeof(*pkg_visited));
8404
8405 const struct cstate_counter_arch_info *cai = &ccstate_counter_arch_infos[cidx];
8406
8407 for (int cpu = 0; cpu < cci_num; ++cpu) {
8408
8409 struct cstate_counter_info_t *const cci = &ccstate_counter_info[cpu];
8410
8411 if (cpu_is_not_allowed(cpu))
8412 continue;
8413
8414 const int core_id = cpus[cpu].physical_core_id;
8415 const int pkg_id = cpus[cpu].physical_package_id;
8416
8417 assert(core_id < cores_visited_elems);
8418 assert(pkg_id < pkg_visited_elems);
8419
8420 const bool per_thread = cai->flags & CSTATE_COUNTER_FLAG_COLLECT_PER_THREAD;
8421 const bool per_core = cai->flags & CSTATE_COUNTER_FLAG_COLLECT_PER_CORE;
8422
8423 if (!per_thread && cores_visited[core_id])
8424 continue;
8425
8426 if (!per_core && pkg_visited[pkg_id])
8427 continue;
8428
8429 const bool counter_needed = BIC_IS_ENABLED(cai->bic) ||
8430 (soft_c1 && (cai->flags & CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY));
8431 const bool counter_supported = (platform->supported_cstates & cai->feature_mask);
8432
8433 if (counter_needed && counter_supported) {
8434 /* Use perf API for this counter */
8435 if (add_cstate_perf_counter(cpu, cci, cai) != -1) {
8436
8437 cci->source[cai->rci_index] = COUNTER_SOURCE_PERF;
8438
8439 /* User MSR for this counter */
8440 } else if (pkg_cstate_limit >= cai->pkg_cstate_limit
8441 && add_msr_counter(cpu, cai->msr) >= 0) {
8442 cci->source[cai->rci_index] = COUNTER_SOURCE_MSR;
8443 cci->msr[cai->rci_index] = cai->msr;
8444 }
8445 }
8446
8447 if (cci->source[cai->rci_index] != COUNTER_SOURCE_NONE) {
8448 has_counter = true;
8449 cores_visited[core_id] = true;
8450 pkg_visited[pkg_id] = true;
8451 }
8452 }
8453
8454 /* If any CPU has access to the counter, make it present */
8455 if (has_counter)
8456 BIC_PRESENT(cai->bic);
8457 }
8458
8459 free(cores_visited);
8460 free(pkg_visited);
8461 }
8462
cstate_perf_init(void)8463 void cstate_perf_init(void)
8464 {
8465 /*
8466 * If we don't have a C1 residency MSR, we calculate it "in software",
8467 * but we need APERF, MPERF too.
8468 */
8469 const bool soft_c1 = !platform->has_msr_core_c1_res && has_amperf_access()
8470 && platform->supported_cstates & CC1;
8471
8472 if (soft_c1)
8473 BIC_PRESENT(BIC_CPU_c1);
8474
8475 cstate_perf_init_(soft_c1);
8476 }
8477
probe_cstates(void)8478 void probe_cstates(void)
8479 {
8480 probe_cst_limit();
8481
8482 if (platform->has_msr_module_c6_res_ms)
8483 BIC_PRESENT(BIC_Mod_c6);
8484
8485 if (platform->has_ext_cst_msrs && !no_msr) {
8486 BIC_PRESENT(BIC_Totl_c0);
8487 BIC_PRESENT(BIC_Any_c0);
8488 BIC_PRESENT(BIC_GFX_c0);
8489 BIC_PRESENT(BIC_CPUGFX);
8490 }
8491
8492 if (quiet)
8493 return;
8494
8495 dump_power_ctl();
8496 dump_cst_cfg();
8497 decode_c6_demotion_policy_msr();
8498 print_dev_latency();
8499 dump_sysfs_cstate_config();
8500 print_irtl();
8501 }
8502
probe_lpi(void)8503 void probe_lpi(void)
8504 {
8505 if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
8506 BIC_PRESENT(BIC_CPU_LPI);
8507 else
8508 BIC_NOT_PRESENT(BIC_CPU_LPI);
8509
8510 if (!access(sys_lpi_file_sysfs, R_OK)) {
8511 sys_lpi_file = sys_lpi_file_sysfs;
8512 BIC_PRESENT(BIC_SYS_LPI);
8513 } else if (!access(sys_lpi_file_debugfs, R_OK)) {
8514 sys_lpi_file = sys_lpi_file_debugfs;
8515 BIC_PRESENT(BIC_SYS_LPI);
8516 } else {
8517 sys_lpi_file_sysfs = NULL;
8518 BIC_NOT_PRESENT(BIC_SYS_LPI);
8519 }
8520
8521 }
8522
probe_pstates(void)8523 void probe_pstates(void)
8524 {
8525 probe_bclk();
8526
8527 if (quiet)
8528 return;
8529
8530 dump_platform_info();
8531 dump_turbo_ratio_info();
8532 dump_sysfs_pstate_config();
8533 decode_misc_pwr_mgmt_msr();
8534
8535 for_all_cpus(print_hwp, ODD_COUNTERS);
8536 for_all_cpus(print_epb, ODD_COUNTERS);
8537 for_all_cpus(print_perf_limit, ODD_COUNTERS);
8538 }
8539
process_cpuid()8540 void process_cpuid()
8541 {
8542 unsigned int eax, ebx, ecx, edx;
8543 unsigned int fms, family, model, stepping, ecx_flags, edx_flags;
8544 unsigned long long ucode_patch = 0;
8545 bool ucode_patch_valid = false;
8546
8547 eax = ebx = ecx = edx = 0;
8548
8549 __cpuid(0, max_level, ebx, ecx, edx);
8550
8551 if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
8552 genuine_intel = 1;
8553 else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
8554 authentic_amd = 1;
8555 else if (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e)
8556 hygon_genuine = 1;
8557
8558 if (!quiet)
8559 fprintf(outf, "CPUID(0): %.4s%.4s%.4s 0x%x CPUID levels\n",
8560 (char *)&ebx, (char *)&edx, (char *)&ecx, max_level);
8561
8562 __cpuid(1, fms, ebx, ecx, edx);
8563 family = (fms >> 8) & 0xf;
8564 model = (fms >> 4) & 0xf;
8565 stepping = fms & 0xf;
8566 if (family == 0xf)
8567 family += (fms >> 20) & 0xff;
8568 if (family >= 6)
8569 model += ((fms >> 16) & 0xf) << 4;
8570 ecx_flags = ecx;
8571 edx_flags = edx;
8572
8573 if (!no_msr) {
8574 if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch))
8575 warnx("get_msr(UCODE)");
8576 else
8577 ucode_patch_valid = true;
8578 }
8579
8580 /*
8581 * check max extended function levels of CPUID.
8582 * This is needed to check for invariant TSC.
8583 * This check is valid for both Intel and AMD.
8584 */
8585 ebx = ecx = edx = 0;
8586 __cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
8587
8588 if (!quiet) {
8589 fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d)",
8590 family, model, stepping, family, model, stepping);
8591 if (ucode_patch_valid)
8592 fprintf(outf, " microcode 0x%x", (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF));
8593 fputc('\n', outf);
8594
8595 fprintf(outf, "CPUID(0x80000000): max_extended_levels: 0x%x\n", max_extended_level);
8596 fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
8597 ecx_flags & (1 << 0) ? "SSE3" : "-",
8598 ecx_flags & (1 << 3) ? "MONITOR" : "-",
8599 ecx_flags & (1 << 6) ? "SMX" : "-",
8600 ecx_flags & (1 << 7) ? "EIST" : "-",
8601 ecx_flags & (1 << 8) ? "TM2" : "-",
8602 edx_flags & (1 << 4) ? "TSC" : "-",
8603 edx_flags & (1 << 5) ? "MSR" : "-",
8604 edx_flags & (1 << 22) ? "ACPI-TM" : "-",
8605 edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-");
8606 }
8607
8608 probe_platform_features(family, model);
8609
8610 if (!(edx_flags & (1 << 5)))
8611 errx(1, "CPUID: no MSR");
8612
8613 if (max_extended_level >= 0x80000007) {
8614
8615 /*
8616 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
8617 * this check is valid for both Intel and AMD
8618 */
8619 __cpuid(0x80000007, eax, ebx, ecx, edx);
8620 has_invariant_tsc = edx & (1 << 8);
8621 }
8622
8623 /*
8624 * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
8625 * this check is valid for both Intel and AMD
8626 */
8627
8628 __cpuid(0x6, eax, ebx, ecx, edx);
8629 has_aperf = ecx & (1 << 0);
8630 do_dts = eax & (1 << 0);
8631 if (do_dts)
8632 BIC_PRESENT(BIC_CoreTmp);
8633 has_turbo = eax & (1 << 1);
8634 do_ptm = eax & (1 << 6);
8635 if (do_ptm)
8636 BIC_PRESENT(BIC_PkgTmp);
8637 has_hwp = eax & (1 << 7);
8638 has_hwp_notify = eax & (1 << 8);
8639 has_hwp_activity_window = eax & (1 << 9);
8640 has_hwp_epp = eax & (1 << 10);
8641 has_hwp_pkg = eax & (1 << 11);
8642 has_epb = ecx & (1 << 3);
8643
8644 if (!quiet)
8645 fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, "
8646 "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
8647 has_aperf ? "" : "No-",
8648 has_turbo ? "" : "No-",
8649 do_dts ? "" : "No-",
8650 do_ptm ? "" : "No-",
8651 has_hwp ? "" : "No-",
8652 has_hwp_notify ? "" : "No-",
8653 has_hwp_activity_window ? "" : "No-",
8654 has_hwp_epp ? "" : "No-", has_hwp_pkg ? "" : "No-", has_epb ? "" : "No-");
8655
8656 if (!quiet)
8657 decode_misc_enable_msr();
8658
8659 if (max_level >= 0x7 && !quiet) {
8660 int has_sgx;
8661
8662 ecx = 0;
8663
8664 __cpuid_count(0x7, 0, eax, ebx, ecx, edx);
8665
8666 has_sgx = ebx & (1 << 2);
8667
8668 is_hybrid = edx & (1 << 15);
8669
8670 fprintf(outf, "CPUID(7): %sSGX %sHybrid\n", has_sgx ? "" : "No-", is_hybrid ? "" : "No-");
8671
8672 if (has_sgx)
8673 decode_feature_control_msr();
8674 }
8675
8676 if (max_level >= 0x15) {
8677 unsigned int eax_crystal;
8678 unsigned int ebx_tsc;
8679
8680 /*
8681 * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
8682 */
8683 eax_crystal = ebx_tsc = crystal_hz = edx = 0;
8684 __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
8685
8686 if (ebx_tsc != 0) {
8687 if (!quiet && (ebx != 0))
8688 fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
8689 eax_crystal, ebx_tsc, crystal_hz);
8690
8691 if (crystal_hz == 0)
8692 crystal_hz = platform->crystal_freq;
8693
8694 if (crystal_hz) {
8695 tsc_hz = (unsigned long long)crystal_hz *ebx_tsc / eax_crystal;
8696 if (!quiet)
8697 fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
8698 tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal);
8699 }
8700 }
8701 }
8702 if (max_level >= 0x16) {
8703 unsigned int base_mhz, max_mhz, bus_mhz, edx;
8704
8705 /*
8706 * CPUID 16H Base MHz, Max MHz, Bus MHz
8707 */
8708 base_mhz = max_mhz = bus_mhz = edx = 0;
8709
8710 __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
8711
8712 bclk = bus_mhz;
8713
8714 base_hz = base_mhz * 1000000;
8715 has_base_hz = 1;
8716
8717 if (platform->enable_tsc_tweak)
8718 tsc_tweak = base_hz / tsc_hz;
8719
8720 if (!quiet)
8721 fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
8722 base_mhz, max_mhz, bus_mhz);
8723 }
8724
8725 if (has_aperf)
8726 aperf_mperf_multiplier = platform->need_perf_multiplier ? 1024 : 1;
8727
8728 BIC_PRESENT(BIC_IRQ);
8729 BIC_PRESENT(BIC_NMI);
8730 BIC_PRESENT(BIC_TSC_MHz);
8731 }
8732
counter_info_init(void)8733 static void counter_info_init(void)
8734 {
8735 for (int i = 0; i < NUM_CSTATE_COUNTERS; ++i) {
8736 struct cstate_counter_arch_info *const cai = &ccstate_counter_arch_infos[i];
8737
8738 if (platform->has_msr_knl_core_c6_residency && cai->msr == MSR_CORE_C6_RESIDENCY)
8739 cai->msr = MSR_KNL_CORE_C6_RESIDENCY;
8740
8741 if (!platform->has_msr_core_c1_res && cai->msr == MSR_CORE_C1_RES)
8742 cai->msr = 0;
8743
8744 if (platform->has_msr_atom_pkg_c6_residency && cai->msr == MSR_PKG_C6_RESIDENCY)
8745 cai->msr = MSR_ATOM_PKG_C6_RESIDENCY;
8746 }
8747
8748 for (int i = 0; i < NUM_MSR_COUNTERS; ++i) {
8749 msr_counter_arch_infos[i].present = false;
8750 msr_counter_arch_infos[i].needed = false;
8751 }
8752 }
8753
probe_pm_features(void)8754 void probe_pm_features(void)
8755 {
8756 probe_pstates();
8757
8758 probe_cstates();
8759
8760 probe_lpi();
8761
8762 probe_intel_uncore_frequency();
8763
8764 probe_graphics();
8765
8766 probe_rapl();
8767
8768 probe_thermal();
8769
8770 if (platform->has_nhm_msrs && !no_msr)
8771 BIC_PRESENT(BIC_SMI);
8772
8773 if (!quiet)
8774 decode_misc_feature_control();
8775 }
8776
8777 /*
8778 * in /dev/cpu/ return success for names that are numbers
8779 * ie. filter out ".", "..", "microcode".
8780 */
dir_filter(const struct dirent * dirp)8781 int dir_filter(const struct dirent *dirp)
8782 {
8783 if (isdigit(dirp->d_name[0]))
8784 return 1;
8785 else
8786 return 0;
8787 }
8788
8789 char *possible_file = "/sys/devices/system/cpu/possible";
8790 char possible_buf[1024];
8791
initialize_cpu_possible_set(void)8792 int initialize_cpu_possible_set(void)
8793 {
8794 FILE *fp;
8795
8796 fp = fopen(possible_file, "r");
8797 if (!fp) {
8798 warn("open %s", possible_file);
8799 return -1;
8800 }
8801 if (fread(possible_buf, sizeof(char), 1024, fp) == 0) {
8802 warn("read %s", possible_file);
8803 goto err;
8804 }
8805 if (parse_cpu_str(possible_buf, cpu_possible_set, cpu_possible_setsize)) {
8806 warnx("%s: cpu str malformat %s\n", possible_file, cpu_effective_str);
8807 goto err;
8808 }
8809 return 0;
8810
8811 err:
8812 fclose(fp);
8813 return -1;
8814 }
8815
topology_probe(bool startup)8816 void topology_probe(bool startup)
8817 {
8818 int i;
8819 int max_core_id = 0;
8820 int max_package_id = 0;
8821 int max_siblings = 0;
8822
8823 /* Initialize num_cpus, max_cpu_num */
8824 set_max_cpu_num();
8825 topo.num_cpus = 0;
8826 for_all_proc_cpus(count_cpus);
8827 if (!summary_only)
8828 BIC_PRESENT(BIC_CPU);
8829
8830 if (debug > 1)
8831 fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
8832
8833 cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology));
8834 if (cpus == NULL)
8835 err(1, "calloc cpus");
8836
8837 /*
8838 * Allocate and initialize cpu_present_set
8839 */
8840 cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1));
8841 if (cpu_present_set == NULL)
8842 err(3, "CPU_ALLOC");
8843 cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
8844 CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
8845 for_all_proc_cpus(mark_cpu_present);
8846
8847 /*
8848 * Allocate and initialize cpu_possible_set
8849 */
8850 cpu_possible_set = CPU_ALLOC((topo.max_cpu_num + 1));
8851 if (cpu_possible_set == NULL)
8852 err(3, "CPU_ALLOC");
8853 cpu_possible_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
8854 CPU_ZERO_S(cpu_possible_setsize, cpu_possible_set);
8855 initialize_cpu_possible_set();
8856
8857 /*
8858 * Allocate and initialize cpu_effective_set
8859 */
8860 cpu_effective_set = CPU_ALLOC((topo.max_cpu_num + 1));
8861 if (cpu_effective_set == NULL)
8862 err(3, "CPU_ALLOC");
8863 cpu_effective_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
8864 CPU_ZERO_S(cpu_effective_setsize, cpu_effective_set);
8865 update_effective_set(startup);
8866
8867 /*
8868 * Allocate and initialize cpu_allowed_set
8869 */
8870 cpu_allowed_set = CPU_ALLOC((topo.max_cpu_num + 1));
8871 if (cpu_allowed_set == NULL)
8872 err(3, "CPU_ALLOC");
8873 cpu_allowed_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
8874 CPU_ZERO_S(cpu_allowed_setsize, cpu_allowed_set);
8875
8876 /*
8877 * Validate and update cpu_allowed_set.
8878 *
8879 * Make sure all cpus in cpu_subset are also in cpu_present_set during startup.
8880 * Give a warning when cpus in cpu_subset become unavailable at runtime.
8881 * Give a warning when cpus are not effective because of cgroup setting.
8882 *
8883 * cpu_allowed_set is the intersection of cpu_present_set/cpu_effective_set/cpu_subset.
8884 */
8885 for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) {
8886 if (cpu_subset && !CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
8887 continue;
8888
8889 if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set)) {
8890 if (cpu_subset) {
8891 /* cpus in cpu_subset must be in cpu_present_set during startup */
8892 if (startup)
8893 err(1, "cpu%d not present", i);
8894 else
8895 fprintf(stderr, "cpu%d not present\n", i);
8896 }
8897 continue;
8898 }
8899
8900 if (CPU_COUNT_S(cpu_effective_setsize, cpu_effective_set)) {
8901 if (!CPU_ISSET_S(i, cpu_effective_setsize, cpu_effective_set)) {
8902 fprintf(stderr, "cpu%d not effective\n", i);
8903 continue;
8904 }
8905 }
8906
8907 CPU_SET_S(i, cpu_allowed_setsize, cpu_allowed_set);
8908 }
8909
8910 if (!CPU_COUNT_S(cpu_allowed_setsize, cpu_allowed_set))
8911 err(-ENODEV, "No valid cpus found");
8912 sched_setaffinity(0, cpu_allowed_setsize, cpu_allowed_set);
8913
8914 /*
8915 * Allocate and initialize cpu_affinity_set
8916 */
8917 cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
8918 if (cpu_affinity_set == NULL)
8919 err(3, "CPU_ALLOC");
8920 cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
8921 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
8922
8923 for_all_proc_cpus(init_thread_id);
8924
8925 for_all_proc_cpus(set_cpu_hybrid_type);
8926
8927 /*
8928 * For online cpus
8929 * find max_core_id, max_package_id
8930 */
8931 for (i = 0; i <= topo.max_cpu_num; ++i) {
8932 int siblings;
8933
8934 if (cpu_is_not_present(i)) {
8935 if (debug > 1)
8936 fprintf(outf, "cpu%d NOT PRESENT\n", i);
8937 continue;
8938 }
8939
8940 cpus[i].logical_cpu_id = i;
8941
8942 /* get package information */
8943 cpus[i].physical_package_id = get_physical_package_id(i);
8944 if (cpus[i].physical_package_id > max_package_id)
8945 max_package_id = cpus[i].physical_package_id;
8946
8947 /* get die information */
8948 cpus[i].die_id = get_die_id(i);
8949 if (cpus[i].die_id > topo.max_die_id)
8950 topo.max_die_id = cpus[i].die_id;
8951
8952 /* get numa node information */
8953 cpus[i].physical_node_id = get_physical_node_id(&cpus[i]);
8954 if (cpus[i].physical_node_id > topo.max_node_num)
8955 topo.max_node_num = cpus[i].physical_node_id;
8956
8957 /* get core information */
8958 cpus[i].physical_core_id = get_core_id(i);
8959 if (cpus[i].physical_core_id > max_core_id)
8960 max_core_id = cpus[i].physical_core_id;
8961
8962 /* get thread information */
8963 siblings = get_thread_siblings(&cpus[i]);
8964 if (siblings > max_siblings)
8965 max_siblings = siblings;
8966 if (cpus[i].thread_id == 0)
8967 topo.num_cores++;
8968 }
8969 topo.max_core_id = max_core_id;
8970 topo.max_package_id = max_package_id;
8971
8972 topo.cores_per_node = max_core_id + 1;
8973 if (debug > 1)
8974 fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", max_core_id, topo.cores_per_node);
8975 if (!summary_only)
8976 BIC_PRESENT(BIC_Core);
8977
8978 topo.num_die = topo.max_die_id + 1;
8979 if (debug > 1)
8980 fprintf(outf, "max_die_id %d, sizing for %d die\n", topo.max_die_id, topo.num_die);
8981 if (!summary_only && topo.num_die > 1)
8982 BIC_PRESENT(BIC_Die);
8983
8984 topo.num_packages = max_package_id + 1;
8985 if (debug > 1)
8986 fprintf(outf, "max_package_id %d, sizing for %d packages\n", max_package_id, topo.num_packages);
8987 if (!summary_only && topo.num_packages > 1)
8988 BIC_PRESENT(BIC_Package);
8989
8990 set_node_data();
8991 if (debug > 1)
8992 fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg);
8993 if (!summary_only && topo.nodes_per_pkg > 1)
8994 BIC_PRESENT(BIC_Node);
8995
8996 topo.threads_per_core = max_siblings;
8997 if (debug > 1)
8998 fprintf(outf, "max_siblings %d\n", max_siblings);
8999
9000 if (debug < 1)
9001 return;
9002
9003 for (i = 0; i <= topo.max_cpu_num; ++i) {
9004 if (cpu_is_not_present(i))
9005 continue;
9006 fprintf(outf,
9007 "cpu %d pkg %d die %d node %d lnode %d core %d thread %d\n",
9008 i, cpus[i].physical_package_id, cpus[i].die_id,
9009 cpus[i].physical_node_id, cpus[i].logical_node_id, cpus[i].physical_core_id, cpus[i].thread_id);
9010 }
9011
9012 }
9013
allocate_counters(struct thread_data ** t,struct core_data ** c,struct pkg_data ** p)9014 void allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p)
9015 {
9016 int i;
9017 int num_cores = topo.cores_per_node * topo.nodes_per_pkg * topo.num_packages;
9018 int num_threads = topo.threads_per_core * num_cores;
9019
9020 *t = calloc(num_threads, sizeof(struct thread_data));
9021 if (*t == NULL)
9022 goto error;
9023
9024 for (i = 0; i < num_threads; i++)
9025 (*t)[i].cpu_id = -1;
9026
9027 *c = calloc(num_cores, sizeof(struct core_data));
9028 if (*c == NULL)
9029 goto error;
9030
9031 for (i = 0; i < num_cores; i++) {
9032 (*c)[i].core_id = -1;
9033 (*c)[i].base_cpu = -1;
9034 }
9035
9036 *p = calloc(topo.num_packages, sizeof(struct pkg_data));
9037 if (*p == NULL)
9038 goto error;
9039
9040 for (i = 0; i < topo.num_packages; i++) {
9041 (*p)[i].package_id = i;
9042 (*p)[i].base_cpu = -1;
9043 }
9044
9045 return;
9046 error:
9047 err(1, "calloc counters");
9048 }
9049
9050 /*
9051 * init_counter()
9052 *
9053 * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
9054 */
init_counter(struct thread_data * thread_base,struct core_data * core_base,struct pkg_data * pkg_base,int cpu_id)9055 void init_counter(struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base, int cpu_id)
9056 {
9057 int pkg_id = cpus[cpu_id].physical_package_id;
9058 int node_id = cpus[cpu_id].logical_node_id;
9059 int core_id = cpus[cpu_id].physical_core_id;
9060 int thread_id = cpus[cpu_id].thread_id;
9061 struct thread_data *t;
9062 struct core_data *c;
9063 struct pkg_data *p;
9064
9065 /* Workaround for systems where physical_node_id==-1
9066 * and logical_node_id==(-1 - topo.num_cpus)
9067 */
9068 if (node_id < 0)
9069 node_id = 0;
9070
9071 t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id);
9072 c = GET_CORE(core_base, core_id, node_id, pkg_id);
9073 p = GET_PKG(pkg_base, pkg_id);
9074
9075 t->cpu_id = cpu_id;
9076 if (!cpu_is_not_allowed(cpu_id)) {
9077 if (c->base_cpu < 0)
9078 c->base_cpu = t->cpu_id;
9079 if (p->base_cpu < 0)
9080 p->base_cpu = t->cpu_id;
9081 }
9082
9083 c->core_id = core_id;
9084 p->package_id = pkg_id;
9085 }
9086
initialize_counters(int cpu_id)9087 int initialize_counters(int cpu_id)
9088 {
9089 init_counter(EVEN_COUNTERS, cpu_id);
9090 init_counter(ODD_COUNTERS, cpu_id);
9091 return 0;
9092 }
9093
allocate_output_buffer()9094 void allocate_output_buffer()
9095 {
9096 output_buffer = calloc(1, (1 + topo.num_cpus) * 2048);
9097 outp = output_buffer;
9098 if (outp == NULL)
9099 err(-1, "calloc output buffer");
9100 }
9101
allocate_fd_percpu(void)9102 void allocate_fd_percpu(void)
9103 {
9104 fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
9105 if (fd_percpu == NULL)
9106 err(-1, "calloc fd_percpu");
9107 }
9108
allocate_irq_buffers(void)9109 void allocate_irq_buffers(void)
9110 {
9111 irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int));
9112 if (irq_column_2_cpu == NULL)
9113 err(-1, "calloc %d", topo.num_cpus);
9114
9115 irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
9116 if (irqs_per_cpu == NULL)
9117 err(-1, "calloc %d IRQ", topo.max_cpu_num + 1);
9118
9119 nmi_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
9120 if (nmi_per_cpu == NULL)
9121 err(-1, "calloc %d NMI", topo.max_cpu_num + 1);
9122 }
9123
update_topo(struct thread_data * t,struct core_data * c,struct pkg_data * p)9124 int update_topo(struct thread_data *t, struct core_data *c, struct pkg_data *p)
9125 {
9126 topo.allowed_cpus++;
9127 if ((int)t->cpu_id == c->base_cpu)
9128 topo.allowed_cores++;
9129 if ((int)t->cpu_id == p->base_cpu)
9130 topo.allowed_packages++;
9131
9132 return 0;
9133 }
9134
topology_update(void)9135 void topology_update(void)
9136 {
9137 topo.allowed_cpus = 0;
9138 topo.allowed_cores = 0;
9139 topo.allowed_packages = 0;
9140 for_all_cpus(update_topo, ODD_COUNTERS);
9141 }
9142
setup_all_buffers(bool startup)9143 void setup_all_buffers(bool startup)
9144 {
9145 topology_probe(startup);
9146 allocate_irq_buffers();
9147 allocate_fd_percpu();
9148 allocate_counters(&thread_even, &core_even, &package_even);
9149 allocate_counters(&thread_odd, &core_odd, &package_odd);
9150 allocate_output_buffer();
9151 for_all_proc_cpus(initialize_counters);
9152 topology_update();
9153 }
9154
set_base_cpu(void)9155 void set_base_cpu(void)
9156 {
9157 int i;
9158
9159 for (i = 0; i < topo.max_cpu_num + 1; ++i) {
9160 if (cpu_is_not_allowed(i))
9161 continue;
9162 base_cpu = i;
9163 if (debug > 1)
9164 fprintf(outf, "base_cpu = %d\n", base_cpu);
9165 return;
9166 }
9167 err(-ENODEV, "No valid cpus found");
9168 }
9169
has_added_counters(void)9170 bool has_added_counters(void)
9171 {
9172 /*
9173 * It only makes sense to call this after the command line is parsed,
9174 * otherwise sys structure is not populated.
9175 */
9176
9177 return sys.added_core_counters | sys.added_thread_counters | sys.added_package_counters;
9178 }
9179
check_msr_access(void)9180 void check_msr_access(void)
9181 {
9182 check_dev_msr();
9183 check_msr_permission();
9184
9185 if (no_msr)
9186 bic_disable_msr_access();
9187 }
9188
check_perf_access(void)9189 void check_perf_access(void)
9190 {
9191 if (no_perf || !BIC_IS_ENABLED(BIC_IPC) || !has_instr_count_access())
9192 bic_enabled &= ~BIC_IPC;
9193 }
9194
perf_has_hybrid_devices(void)9195 bool perf_has_hybrid_devices(void)
9196 {
9197 /*
9198 * 0: unknown
9199 * 1: has separate perf device for p and e core
9200 * -1: doesn't have separate perf device for p and e core
9201 */
9202 static int cached;
9203
9204 if (cached > 0)
9205 return true;
9206
9207 if (cached < 0)
9208 return false;
9209
9210 if (access("/sys/bus/event_source/devices/cpu_core", F_OK)) {
9211 cached = -1;
9212 return false;
9213 }
9214
9215 if (access("/sys/bus/event_source/devices/cpu_atom", F_OK)) {
9216 cached = -1;
9217 return false;
9218 }
9219
9220 cached = 1;
9221 return true;
9222 }
9223
added_perf_counters_init_(struct perf_counter_info * pinfo)9224 int added_perf_counters_init_(struct perf_counter_info *pinfo)
9225 {
9226 size_t num_domains = 0;
9227 unsigned int next_domain;
9228 bool *domain_visited;
9229 unsigned int perf_type, perf_config;
9230 double perf_scale;
9231 int fd_perf;
9232
9233 if (!pinfo)
9234 return 0;
9235
9236 const size_t max_num_domains = MAX(topo.max_cpu_num + 1, MAX(topo.max_core_id + 1, topo.max_package_id + 1));
9237
9238 domain_visited = calloc(max_num_domains, sizeof(*domain_visited));
9239
9240 while (pinfo) {
9241 switch (pinfo->scope) {
9242 case SCOPE_CPU:
9243 num_domains = topo.max_cpu_num + 1;
9244 break;
9245
9246 case SCOPE_CORE:
9247 num_domains = topo.max_core_id + 1;
9248 break;
9249
9250 case SCOPE_PACKAGE:
9251 num_domains = topo.max_package_id + 1;
9252 break;
9253 }
9254
9255 /* Allocate buffer for file descriptor for each domain. */
9256 pinfo->fd_perf_per_domain = calloc(num_domains, sizeof(*pinfo->fd_perf_per_domain));
9257 if (!pinfo->fd_perf_per_domain)
9258 errx(1, "%s: alloc %s", __func__, "fd_perf_per_domain");
9259
9260 for (size_t i = 0; i < num_domains; ++i)
9261 pinfo->fd_perf_per_domain[i] = -1;
9262
9263 pinfo->num_domains = num_domains;
9264 pinfo->scale = 1.0;
9265
9266 memset(domain_visited, 0, max_num_domains * sizeof(*domain_visited));
9267
9268 for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) {
9269
9270 next_domain = cpu_to_domain(pinfo, cpu);
9271
9272 assert(next_domain < num_domains);
9273
9274 if (cpu_is_not_allowed(cpu))
9275 continue;
9276
9277 if (domain_visited[next_domain])
9278 continue;
9279
9280 /*
9281 * Intel hybrid platforms expose different perf devices for P and E cores.
9282 * Instead of one, "/sys/bus/event_source/devices/cpu" device, there are
9283 * "/sys/bus/event_source/devices/{cpu_core,cpu_atom}".
9284 *
9285 * This makes it more complicated to the user, because most of the counters
9286 * are available on both and have to be handled manually, otherwise.
9287 *
9288 * Code below, allow user to use the old "cpu" name, which is translated accordingly.
9289 */
9290 const char *perf_device = pinfo->device;
9291
9292 if (strcmp(perf_device, "cpu") == 0 && perf_has_hybrid_devices()) {
9293 switch (cpus[cpu].type) {
9294 case INTEL_PCORE_TYPE:
9295 perf_device = "cpu_core";
9296 break;
9297
9298 case INTEL_ECORE_TYPE:
9299 perf_device = "cpu_atom";
9300 break;
9301
9302 default: /* Don't change, we will probably fail and report a problem soon. */
9303 break;
9304 }
9305 }
9306
9307 perf_type = read_perf_type(perf_device);
9308 if (perf_type == (unsigned int)-1) {
9309 warnx("%s: perf/%s/%s: failed to read %s", __func__, perf_device, pinfo->event, "type");
9310 continue;
9311 }
9312
9313 perf_config = read_perf_config(perf_device, pinfo->event);
9314 if (perf_config == (unsigned int)-1) {
9315 warnx("%s: perf/%s/%s: failed to read %s",
9316 __func__, perf_device, pinfo->event, "config");
9317 continue;
9318 }
9319
9320 /* Scale is not required, some counters just don't have it. */
9321 perf_scale = read_perf_scale(perf_device, pinfo->event);
9322 if (perf_scale == 0.0)
9323 perf_scale = 1.0;
9324
9325 fd_perf = open_perf_counter(cpu, perf_type, perf_config, -1, 0);
9326 if (fd_perf == -1) {
9327 warnx("%s: perf/%s/%s: failed to open counter on cpu%d",
9328 __func__, perf_device, pinfo->event, cpu);
9329 continue;
9330 }
9331
9332 domain_visited[next_domain] = 1;
9333 pinfo->fd_perf_per_domain[next_domain] = fd_perf;
9334 pinfo->scale = perf_scale;
9335
9336 if (debug)
9337 fprintf(stderr, "Add perf/%s/%s cpu%d: %d\n",
9338 perf_device, pinfo->event, cpu, pinfo->fd_perf_per_domain[next_domain]);
9339 }
9340
9341 pinfo = pinfo->next;
9342 }
9343
9344 free(domain_visited);
9345
9346 return 0;
9347 }
9348
added_perf_counters_init(void)9349 void added_perf_counters_init(void)
9350 {
9351 if (added_perf_counters_init_(sys.perf_tp))
9352 errx(1, "%s: %s", __func__, "thread");
9353
9354 if (added_perf_counters_init_(sys.perf_cp))
9355 errx(1, "%s: %s", __func__, "core");
9356
9357 if (added_perf_counters_init_(sys.perf_pp))
9358 errx(1, "%s: %s", __func__, "package");
9359 }
9360
parse_telem_info_file(int fd_dir,const char * info_filename,const char * format,unsigned long * output)9361 int parse_telem_info_file(int fd_dir, const char *info_filename, const char *format, unsigned long *output)
9362 {
9363 int fd_telem_info;
9364 FILE *file_telem_info;
9365 unsigned long value;
9366
9367 fd_telem_info = openat(fd_dir, info_filename, O_RDONLY);
9368 if (fd_telem_info == -1)
9369 return -1;
9370
9371 file_telem_info = fdopen(fd_telem_info, "r");
9372 if (file_telem_info == NULL) {
9373 close(fd_telem_info);
9374 return -1;
9375 }
9376
9377 if (fscanf(file_telem_info, format, &value) != 1) {
9378 fclose(file_telem_info);
9379 return -1;
9380 }
9381
9382 fclose(file_telem_info);
9383
9384 *output = value;
9385
9386 return 0;
9387 }
9388
pmt_mmio_open(unsigned int target_guid)9389 struct pmt_mmio *pmt_mmio_open(unsigned int target_guid)
9390 {
9391 struct pmt_diriter_t pmt_iter;
9392 const struct dirent *entry;
9393 struct stat st;
9394 int fd_telem_dir, fd_pmt;
9395 unsigned long guid, size, offset;
9396 size_t mmap_size;
9397 void *mmio;
9398 struct pmt_mmio *head = NULL, *last = NULL;
9399 struct pmt_mmio *new_pmt = NULL;
9400
9401 if (stat(SYSFS_TELEM_PATH, &st) == -1)
9402 return NULL;
9403
9404 pmt_diriter_init(&pmt_iter);
9405 entry = pmt_diriter_begin(&pmt_iter, SYSFS_TELEM_PATH);
9406 if (!entry) {
9407 pmt_diriter_remove(&pmt_iter);
9408 return NULL;
9409 }
9410
9411 for (; entry != NULL; entry = pmt_diriter_next(&pmt_iter)) {
9412 if (fstatat(dirfd(pmt_iter.dir), entry->d_name, &st, 0) == -1)
9413 break;
9414
9415 if (!S_ISDIR(st.st_mode))
9416 continue;
9417
9418 fd_telem_dir = openat(dirfd(pmt_iter.dir), entry->d_name, O_RDONLY);
9419 if (fd_telem_dir == -1)
9420 break;
9421
9422 if (parse_telem_info_file(fd_telem_dir, "guid", "%lx", &guid)) {
9423 close(fd_telem_dir);
9424 break;
9425 }
9426
9427 if (parse_telem_info_file(fd_telem_dir, "size", "%lu", &size)) {
9428 close(fd_telem_dir);
9429 break;
9430 }
9431
9432 if (guid != target_guid) {
9433 close(fd_telem_dir);
9434 continue;
9435 }
9436
9437 if (parse_telem_info_file(fd_telem_dir, "offset", "%lu", &offset)) {
9438 close(fd_telem_dir);
9439 break;
9440 }
9441
9442 assert(offset == 0);
9443
9444 fd_pmt = openat(fd_telem_dir, "telem", O_RDONLY);
9445 if (fd_pmt == -1)
9446 goto loop_cleanup_and_break;
9447
9448 mmap_size = ROUND_UP_TO_PAGE_SIZE(size);
9449 mmio = mmap(0, mmap_size, PROT_READ, MAP_SHARED, fd_pmt, 0);
9450 if (mmio != MAP_FAILED) {
9451 if (debug)
9452 fprintf(stderr, "%s: 0x%lx mmaped at: %p\n", __func__, guid, mmio);
9453
9454 new_pmt = calloc(1, sizeof(*new_pmt));
9455
9456 if (!new_pmt) {
9457 fprintf(stderr, "%s: Failed to allocate pmt_mmio\n", __func__);
9458 exit(1);
9459 }
9460
9461 /*
9462 * Create linked list of mmaped regions,
9463 * but preserve the ordering from sysfs.
9464 * Ordering is important for the user to
9465 * use the seq=%u parameter when adding a counter.
9466 */
9467 new_pmt->guid = guid;
9468 new_pmt->mmio_base = mmio;
9469 new_pmt->pmt_offset = offset;
9470 new_pmt->size = size;
9471 new_pmt->next = pmt_mmios;
9472
9473 if (last)
9474 last->next = new_pmt;
9475 else
9476 head = new_pmt;
9477
9478 last = new_pmt;
9479 }
9480
9481 loop_cleanup_and_break:
9482 close(fd_pmt);
9483 close(fd_telem_dir);
9484 }
9485
9486 pmt_diriter_remove(&pmt_iter);
9487
9488 /*
9489 * If we found something, stick just
9490 * created linked list to the front.
9491 */
9492 if (head)
9493 pmt_mmios = head;
9494
9495 return head;
9496 }
9497
pmt_mmio_find(unsigned int guid)9498 struct pmt_mmio *pmt_mmio_find(unsigned int guid)
9499 {
9500 struct pmt_mmio *pmmio = pmt_mmios;
9501
9502 while (pmmio) {
9503 if (pmmio->guid == guid)
9504 return pmmio;
9505
9506 pmmio = pmmio->next;
9507 }
9508
9509 return NULL;
9510 }
9511
pmt_get_counter_pointer(struct pmt_mmio * pmmio,unsigned long counter_offset)9512 void *pmt_get_counter_pointer(struct pmt_mmio *pmmio, unsigned long counter_offset)
9513 {
9514 char *ret;
9515
9516 /* Get base of mmaped PMT file. */
9517 ret = (char *)pmmio->mmio_base;
9518
9519 /*
9520 * Apply PMT MMIO offset to obtain beginning of the mmaped telemetry data.
9521 * It's not guaranteed that the mmaped memory begins with the telemetry data
9522 * - we might have to apply the offset first.
9523 */
9524 ret += pmmio->pmt_offset;
9525
9526 /* Apply the counter offset to get the address to the mmaped counter. */
9527 ret += counter_offset;
9528
9529 return ret;
9530 }
9531
pmt_add_guid(unsigned int guid,unsigned int seq)9532 struct pmt_mmio *pmt_add_guid(unsigned int guid, unsigned int seq)
9533 {
9534 struct pmt_mmio *ret;
9535
9536 ret = pmt_mmio_find(guid);
9537 if (!ret)
9538 ret = pmt_mmio_open(guid);
9539
9540 while (ret && seq) {
9541 ret = ret->next;
9542 --seq;
9543 }
9544
9545 return ret;
9546 }
9547
9548 enum pmt_open_mode {
9549 PMT_OPEN_TRY, /* Open failure is not an error. */
9550 PMT_OPEN_REQUIRED, /* Open failure is a fatal error. */
9551 };
9552
pmt_find_counter(struct pmt_counter * pcounter,const char * name)9553 struct pmt_counter *pmt_find_counter(struct pmt_counter *pcounter, const char *name)
9554 {
9555 while (pcounter) {
9556 if (strcmp(pcounter->name, name) == 0)
9557 break;
9558
9559 pcounter = pcounter->next;
9560 }
9561
9562 return pcounter;
9563 }
9564
pmt_get_scope_root(enum counter_scope scope)9565 struct pmt_counter **pmt_get_scope_root(enum counter_scope scope)
9566 {
9567 switch (scope) {
9568 case SCOPE_CPU:
9569 return &sys.pmt_tp;
9570 case SCOPE_CORE:
9571 return &sys.pmt_cp;
9572 case SCOPE_PACKAGE:
9573 return &sys.pmt_pp;
9574 }
9575
9576 __builtin_unreachable();
9577 }
9578
pmt_counter_add_domain(struct pmt_counter * pcounter,unsigned long * pmmio,unsigned int domain_id)9579 void pmt_counter_add_domain(struct pmt_counter *pcounter, unsigned long *pmmio, unsigned int domain_id)
9580 {
9581 /* Make sure the new domain fits. */
9582 if (domain_id >= pcounter->num_domains)
9583 pmt_counter_resize(pcounter, domain_id + 1);
9584
9585 assert(pcounter->domains);
9586 assert(domain_id < pcounter->num_domains);
9587
9588 pcounter->domains[domain_id].pcounter = pmmio;
9589 }
9590
pmt_add_counter(unsigned int guid,unsigned int seq,const char * name,enum pmt_datatype type,unsigned int lsb,unsigned int msb,unsigned int offset,enum counter_scope scope,enum counter_format format,unsigned int domain_id,enum pmt_open_mode mode)9591 int pmt_add_counter(unsigned int guid, unsigned int seq, const char *name, enum pmt_datatype type,
9592 unsigned int lsb, unsigned int msb, unsigned int offset, enum counter_scope scope,
9593 enum counter_format format, unsigned int domain_id, enum pmt_open_mode mode)
9594 {
9595 struct pmt_mmio *mmio;
9596 struct pmt_counter *pcounter;
9597 struct pmt_counter **const pmt_root = pmt_get_scope_root(scope);
9598 bool new_counter = false;
9599 int conflict = 0;
9600
9601 if (lsb > msb) {
9602 fprintf(stderr, "%s: %s: `%s` must be satisfied\n", __func__, "lsb <= msb", name);
9603 exit(1);
9604 }
9605
9606 if (msb >= 64) {
9607 fprintf(stderr, "%s: %s: `%s` must be satisfied\n", __func__, "msb < 64", name);
9608 exit(1);
9609 }
9610
9611 mmio = pmt_add_guid(guid, seq);
9612 if (!mmio) {
9613 if (mode != PMT_OPEN_TRY) {
9614 fprintf(stderr, "%s: failed to map PMT MMIO for guid %x, seq %u\n", __func__, guid, seq);
9615 exit(1);
9616 }
9617
9618 return 1;
9619 }
9620
9621 if (offset >= mmio->size) {
9622 if (mode != PMT_OPEN_TRY) {
9623 fprintf(stderr, "%s: offset %u outside of PMT MMIO size %u\n", __func__, offset, mmio->size);
9624 exit(1);
9625 }
9626
9627 return 1;
9628 }
9629
9630 pcounter = pmt_find_counter(*pmt_root, name);
9631 if (!pcounter) {
9632 pcounter = calloc(1, sizeof(*pcounter));
9633 new_counter = true;
9634 }
9635
9636 if (new_counter) {
9637 strncpy(pcounter->name, name, ARRAY_SIZE(pcounter->name) - 1);
9638 pcounter->type = type;
9639 pcounter->scope = scope;
9640 pcounter->lsb = lsb;
9641 pcounter->msb = msb;
9642 pcounter->format = format;
9643 } else {
9644 conflict += pcounter->type != type;
9645 conflict += pcounter->scope != scope;
9646 conflict += pcounter->lsb != lsb;
9647 conflict += pcounter->msb != msb;
9648 conflict += pcounter->format != format;
9649 }
9650
9651 if (conflict) {
9652 fprintf(stderr, "%s: conflicting parameters for the PMT counter with the same name %s\n",
9653 __func__, name);
9654 exit(1);
9655 }
9656
9657 pmt_counter_add_domain(pcounter, pmt_get_counter_pointer(mmio, offset), domain_id);
9658
9659 if (new_counter) {
9660 pcounter->next = *pmt_root;
9661 *pmt_root = pcounter;
9662 }
9663
9664 return 0;
9665 }
9666
pmt_init(void)9667 void pmt_init(void)
9668 {
9669 int cpu_num;
9670 unsigned long seq, offset, mod_num;
9671
9672 if (BIC_IS_ENABLED(BIC_Diec6)) {
9673 pmt_add_counter(PMT_MTL_DC6_GUID, PMT_MTL_DC6_SEQ, "Die%c6", PMT_TYPE_XTAL_TIME,
9674 PMT_COUNTER_MTL_DC6_LSB, PMT_COUNTER_MTL_DC6_MSB, PMT_COUNTER_MTL_DC6_OFFSET,
9675 SCOPE_PACKAGE, FORMAT_DELTA, 0, PMT_OPEN_TRY);
9676 }
9677
9678 if (BIC_IS_ENABLED(BIC_CPU_c1e)) {
9679 seq = 0;
9680 offset = PMT_COUNTER_CWF_MC1E_OFFSET_BASE;
9681 mod_num = 0; /* Relative module number for current PMT file. */
9682
9683 /* Open the counter for each CPU. */
9684 for (cpu_num = 0; cpu_num < topo.max_cpu_num;) {
9685
9686 if (cpu_is_not_allowed(cpu_num))
9687 goto next_loop_iter;
9688
9689 /*
9690 * Set the scope to CPU, even though CWF report the counter per module.
9691 * CPUs inside the same module will read from the same location, instead of reporting zeros.
9692 *
9693 * CWF with newer firmware might require a PMT_TYPE_XTAL_TIME intead of PMT_TYPE_TCORE_CLOCK.
9694 */
9695 pmt_add_counter(PMT_CWF_MC1E_GUID, seq, "CPU%c1e", PMT_TYPE_TCORE_CLOCK,
9696 PMT_COUNTER_CWF_MC1E_LSB, PMT_COUNTER_CWF_MC1E_MSB, offset, SCOPE_CPU,
9697 FORMAT_DELTA, cpu_num, PMT_OPEN_TRY);
9698
9699 /*
9700 * Rather complex logic for each time we go to the next loop iteration,
9701 * so keep it as a label.
9702 */
9703 next_loop_iter:
9704 /*
9705 * Advance the cpu number and check if we should also advance offset to
9706 * the next counter inside the PMT file.
9707 *
9708 * On Clearwater Forest platform, the counter is reported per module,
9709 * so open the same counter for all of the CPUs inside the module.
9710 * That way, reported table show the correct value for all of the CPUs inside the module,
9711 * instead of zeros.
9712 */
9713 ++cpu_num;
9714 if (cpu_num % PMT_COUNTER_CWF_CPUS_PER_MODULE == 0) {
9715 offset += PMT_COUNTER_CWF_MC1E_OFFSET_INCREMENT;
9716 ++mod_num;
9717 }
9718
9719 /*
9720 * There are PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE in each PMT file.
9721 *
9722 * If that number is reached, seq must be incremented to advance to the next file in a sequence.
9723 * Offset inside that file and a module counter has to be reset.
9724 */
9725 if (mod_num == PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE) {
9726 ++seq;
9727 offset = PMT_COUNTER_CWF_MC1E_OFFSET_BASE;
9728 mod_num = 0;
9729 }
9730 }
9731 }
9732 }
9733
turbostat_init()9734 void turbostat_init()
9735 {
9736 setup_all_buffers(true);
9737 set_base_cpu();
9738 check_msr_access();
9739 check_perf_access();
9740 process_cpuid();
9741 counter_info_init();
9742 probe_pm_features();
9743 msr_perf_init();
9744 linux_perf_init();
9745 rapl_perf_init();
9746 cstate_perf_init();
9747 added_perf_counters_init();
9748 pmt_init();
9749
9750 for_all_cpus(get_cpu_type, ODD_COUNTERS);
9751 for_all_cpus(get_cpu_type, EVEN_COUNTERS);
9752
9753 if (BIC_IS_ENABLED(BIC_IPC) && has_aperf_access && get_instr_count_fd(base_cpu) != -1)
9754 BIC_PRESENT(BIC_IPC);
9755
9756 /*
9757 * If TSC tweak is needed, but couldn't get it,
9758 * disable more BICs, since it can't be reported accurately.
9759 */
9760 if (platform->enable_tsc_tweak && !has_base_hz) {
9761 bic_enabled &= ~BIC_Busy;
9762 bic_enabled &= ~BIC_Bzy_MHz;
9763 }
9764 }
9765
affinitize_child(void)9766 void affinitize_child(void)
9767 {
9768 /* Prefer cpu_possible_set, if available */
9769 if (sched_setaffinity(0, cpu_possible_setsize, cpu_possible_set)) {
9770 warn("sched_setaffinity cpu_possible_set");
9771
9772 /* Otherwise, allow child to run on same cpu set as turbostat */
9773 if (sched_setaffinity(0, cpu_allowed_setsize, cpu_allowed_set))
9774 warn("sched_setaffinity cpu_allowed_set");
9775 }
9776 }
9777
fork_it(char ** argv)9778 int fork_it(char **argv)
9779 {
9780 pid_t child_pid;
9781 int status;
9782
9783 snapshot_proc_sysfs_files();
9784 status = for_all_cpus(get_counters, EVEN_COUNTERS);
9785 first_counter_read = 0;
9786 if (status)
9787 exit(status);
9788 gettimeofday(&tv_even, (struct timezone *)NULL);
9789
9790 child_pid = fork();
9791 if (!child_pid) {
9792 /* child */
9793 affinitize_child();
9794 execvp(argv[0], argv);
9795 err(errno, "exec %s", argv[0]);
9796 } else {
9797
9798 /* parent */
9799 if (child_pid == -1)
9800 err(1, "fork");
9801
9802 signal(SIGINT, SIG_IGN);
9803 signal(SIGQUIT, SIG_IGN);
9804 if (waitpid(child_pid, &status, 0) == -1)
9805 err(status, "waitpid");
9806
9807 if (WIFEXITED(status))
9808 status = WEXITSTATUS(status);
9809 }
9810 /*
9811 * n.b. fork_it() does not check for errors from for_all_cpus()
9812 * because re-starting is problematic when forking
9813 */
9814 snapshot_proc_sysfs_files();
9815 for_all_cpus(get_counters, ODD_COUNTERS);
9816 gettimeofday(&tv_odd, (struct timezone *)NULL);
9817 timersub(&tv_odd, &tv_even, &tv_delta);
9818 if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS))
9819 fprintf(outf, "%s: Counter reset detected\n", progname);
9820
9821 compute_average(EVEN_COUNTERS);
9822 format_all_counters(EVEN_COUNTERS);
9823
9824 fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec / 1000000.0);
9825
9826 flush_output_stderr();
9827
9828 return status;
9829 }
9830
get_and_dump_counters(void)9831 int get_and_dump_counters(void)
9832 {
9833 int status;
9834
9835 snapshot_proc_sysfs_files();
9836 status = for_all_cpus(get_counters, ODD_COUNTERS);
9837 if (status)
9838 return status;
9839
9840 status = for_all_cpus(dump_counters, ODD_COUNTERS);
9841 if (status)
9842 return status;
9843
9844 flush_output_stdout();
9845
9846 return status;
9847 }
9848
print_version()9849 void print_version()
9850 {
9851 fprintf(outf, "turbostat version 2025.06.08 - Len Brown <lenb@kernel.org>\n");
9852 }
9853
9854 #define COMMAND_LINE_SIZE 2048
9855
print_bootcmd(void)9856 void print_bootcmd(void)
9857 {
9858 char bootcmd[COMMAND_LINE_SIZE];
9859 FILE *fp;
9860 int ret;
9861
9862 memset(bootcmd, 0, COMMAND_LINE_SIZE);
9863 fp = fopen("/proc/cmdline", "r");
9864 if (!fp)
9865 return;
9866
9867 ret = fread(bootcmd, sizeof(char), COMMAND_LINE_SIZE - 1, fp);
9868 if (ret) {
9869 bootcmd[ret] = '\0';
9870 /* the last character is already '\n' */
9871 fprintf(outf, "Kernel command line: %s", bootcmd);
9872 }
9873
9874 fclose(fp);
9875 }
9876
find_msrp_by_name(struct msr_counter * head,char * name)9877 struct msr_counter *find_msrp_by_name(struct msr_counter *head, char *name)
9878 {
9879 struct msr_counter *mp;
9880
9881 for (mp = head; mp; mp = mp->next) {
9882 if (debug)
9883 fprintf(stderr, "%s: %s %s\n", __func__, name, mp->name);
9884 if (!strcmp(name, mp->name))
9885 return mp;
9886 }
9887 return NULL;
9888 }
9889
add_counter(unsigned int msr_num,char * path,char * name,unsigned int width,enum counter_scope scope,enum counter_type type,enum counter_format format,int flags,int id)9890 int add_counter(unsigned int msr_num, char *path, char *name,
9891 unsigned int width, enum counter_scope scope,
9892 enum counter_type type, enum counter_format format, int flags, int id)
9893 {
9894 struct msr_counter *msrp;
9895
9896 if (no_msr && msr_num)
9897 errx(1, "Requested MSR counter 0x%x, but in --no-msr mode", msr_num);
9898
9899 if (debug)
9900 fprintf(stderr, "%s(msr%d, %s, %s, width%d, scope%d, type%d, format%d, flags%x, id%d)\n",
9901 __func__, msr_num, path, name, width, scope, type, format, flags, id);
9902
9903 switch (scope) {
9904
9905 case SCOPE_CPU:
9906 msrp = find_msrp_by_name(sys.tp, name);
9907 if (msrp) {
9908 if (debug)
9909 fprintf(stderr, "%s: %s FOUND\n", __func__, name);
9910 break;
9911 }
9912 if (sys.added_thread_counters++ >= MAX_ADDED_THREAD_COUNTERS) {
9913 warnx("ignoring thread counter %s", name);
9914 return -1;
9915 }
9916 break;
9917 case SCOPE_CORE:
9918 msrp = find_msrp_by_name(sys.cp, name);
9919 if (msrp) {
9920 if (debug)
9921 fprintf(stderr, "%s: %s FOUND\n", __func__, name);
9922 break;
9923 }
9924 if (sys.added_core_counters++ >= MAX_ADDED_CORE_COUNTERS) {
9925 warnx("ignoring core counter %s", name);
9926 return -1;
9927 }
9928 break;
9929 case SCOPE_PACKAGE:
9930 msrp = find_msrp_by_name(sys.pp, name);
9931 if (msrp) {
9932 if (debug)
9933 fprintf(stderr, "%s: %s FOUND\n", __func__, name);
9934 break;
9935 }
9936 if (sys.added_package_counters++ >= MAX_ADDED_PACKAGE_COUNTERS) {
9937 warnx("ignoring package counter %s", name);
9938 return -1;
9939 }
9940 break;
9941 default:
9942 warnx("ignoring counter %s with unknown scope", name);
9943 return -1;
9944 }
9945
9946 if (msrp == NULL) {
9947 msrp = calloc(1, sizeof(struct msr_counter));
9948 if (msrp == NULL)
9949 err(-1, "calloc msr_counter");
9950
9951 msrp->msr_num = msr_num;
9952 strncpy(msrp->name, name, NAME_BYTES - 1);
9953 msrp->width = width;
9954 msrp->type = type;
9955 msrp->format = format;
9956 msrp->flags = flags;
9957
9958 switch (scope) {
9959 case SCOPE_CPU:
9960 msrp->next = sys.tp;
9961 sys.tp = msrp;
9962 break;
9963 case SCOPE_CORE:
9964 msrp->next = sys.cp;
9965 sys.cp = msrp;
9966 break;
9967 case SCOPE_PACKAGE:
9968 msrp->next = sys.pp;
9969 sys.pp = msrp;
9970 break;
9971 }
9972 }
9973
9974 if (path) {
9975 struct sysfs_path *sp;
9976
9977 sp = calloc(1, sizeof(struct sysfs_path));
9978 if (sp == NULL) {
9979 perror("calloc");
9980 exit(1);
9981 }
9982 strncpy(sp->path, path, PATH_BYTES - 1);
9983 sp->id = id;
9984 sp->next = msrp->sp;
9985 msrp->sp = sp;
9986 }
9987
9988 return 0;
9989 }
9990
9991 /*
9992 * Initialize the fields used for identifying and opening the counter.
9993 *
9994 * Defer the initialization of any runtime buffers for actually reading
9995 * the counters for when we initialize all perf counters, so we can later
9996 * easily call re_initialize().
9997 */
make_perf_counter_info(const char * perf_device,const char * perf_event,const char * name,unsigned int width,enum counter_scope scope,enum counter_type type,enum counter_format format)9998 struct perf_counter_info *make_perf_counter_info(const char *perf_device,
9999 const char *perf_event,
10000 const char *name,
10001 unsigned int width,
10002 enum counter_scope scope,
10003 enum counter_type type, enum counter_format format)
10004 {
10005 struct perf_counter_info *pinfo;
10006
10007 pinfo = calloc(1, sizeof(*pinfo));
10008 if (!pinfo)
10009 errx(1, "%s: Failed to allocate %s/%s\n", __func__, perf_device, perf_event);
10010
10011 strncpy(pinfo->device, perf_device, ARRAY_SIZE(pinfo->device) - 1);
10012 strncpy(pinfo->event, perf_event, ARRAY_SIZE(pinfo->event) - 1);
10013
10014 strncpy(pinfo->name, name, ARRAY_SIZE(pinfo->name) - 1);
10015 pinfo->width = width;
10016 pinfo->scope = scope;
10017 pinfo->type = type;
10018 pinfo->format = format;
10019
10020 return pinfo;
10021 }
10022
add_perf_counter(const char * perf_device,const char * perf_event,const char * name_buffer,unsigned int width,enum counter_scope scope,enum counter_type type,enum counter_format format)10023 int add_perf_counter(const char *perf_device, const char *perf_event, const char *name_buffer, unsigned int width,
10024 enum counter_scope scope, enum counter_type type, enum counter_format format)
10025 {
10026 struct perf_counter_info *pinfo;
10027
10028 switch (scope) {
10029 case SCOPE_CPU:
10030 if (sys.added_thread_perf_counters >= MAX_ADDED_THREAD_COUNTERS) {
10031 warnx("ignoring thread counter perf/%s/%s", perf_device, perf_event);
10032 return -1;
10033 }
10034 break;
10035
10036 case SCOPE_CORE:
10037 if (sys.added_core_perf_counters >= MAX_ADDED_CORE_COUNTERS) {
10038 warnx("ignoring core counter perf/%s/%s", perf_device, perf_event);
10039 return -1;
10040 }
10041 break;
10042
10043 case SCOPE_PACKAGE:
10044 if (sys.added_package_perf_counters >= MAX_ADDED_PACKAGE_COUNTERS) {
10045 warnx("ignoring package counter perf/%s/%s", perf_device, perf_event);
10046 return -1;
10047 }
10048 break;
10049 }
10050
10051 pinfo = make_perf_counter_info(perf_device, perf_event, name_buffer, width, scope, type, format);
10052
10053 if (!pinfo)
10054 return -1;
10055
10056 switch (scope) {
10057 case SCOPE_CPU:
10058 pinfo->next = sys.perf_tp;
10059 sys.perf_tp = pinfo;
10060 ++sys.added_thread_perf_counters;
10061 break;
10062
10063 case SCOPE_CORE:
10064 pinfo->next = sys.perf_cp;
10065 sys.perf_cp = pinfo;
10066 ++sys.added_core_perf_counters;
10067 break;
10068
10069 case SCOPE_PACKAGE:
10070 pinfo->next = sys.perf_pp;
10071 sys.perf_pp = pinfo;
10072 ++sys.added_package_perf_counters;
10073 break;
10074 }
10075
10076 // FIXME: we might not have debug here yet
10077 if (debug)
10078 fprintf(stderr, "%s: %s/%s, name: %s, scope%d\n",
10079 __func__, pinfo->device, pinfo->event, pinfo->name, pinfo->scope);
10080
10081 return 0;
10082 }
10083
parse_add_command_msr(char * add_command)10084 void parse_add_command_msr(char *add_command)
10085 {
10086 int msr_num = 0;
10087 char *path = NULL;
10088 char perf_device[PERF_DEV_NAME_BYTES] = "";
10089 char perf_event[PERF_EVT_NAME_BYTES] = "";
10090 char name_buffer[PERF_NAME_BYTES] = "";
10091 int width = 64;
10092 int fail = 0;
10093 enum counter_scope scope = SCOPE_CPU;
10094 enum counter_type type = COUNTER_CYCLES;
10095 enum counter_format format = FORMAT_DELTA;
10096
10097 while (add_command) {
10098
10099 if (sscanf(add_command, "msr0x%x", &msr_num) == 1)
10100 goto next;
10101
10102 if (sscanf(add_command, "msr%d", &msr_num) == 1)
10103 goto next;
10104
10105 BUILD_BUG_ON(ARRAY_SIZE(perf_device) <= 31);
10106 BUILD_BUG_ON(ARRAY_SIZE(perf_event) <= 31);
10107 if (sscanf(add_command, "perf/%31[^/]/%31[^,]", &perf_device[0], &perf_event[0]) == 2)
10108 goto next;
10109
10110 if (*add_command == '/') {
10111 path = add_command;
10112 goto next;
10113 }
10114
10115 if (sscanf(add_command, "u%d", &width) == 1) {
10116 if ((width == 32) || (width == 64))
10117 goto next;
10118 width = 64;
10119 }
10120 if (!strncmp(add_command, "cpu", strlen("cpu"))) {
10121 scope = SCOPE_CPU;
10122 goto next;
10123 }
10124 if (!strncmp(add_command, "core", strlen("core"))) {
10125 scope = SCOPE_CORE;
10126 goto next;
10127 }
10128 if (!strncmp(add_command, "package", strlen("package"))) {
10129 scope = SCOPE_PACKAGE;
10130 goto next;
10131 }
10132 if (!strncmp(add_command, "cycles", strlen("cycles"))) {
10133 type = COUNTER_CYCLES;
10134 goto next;
10135 }
10136 if (!strncmp(add_command, "seconds", strlen("seconds"))) {
10137 type = COUNTER_SECONDS;
10138 goto next;
10139 }
10140 if (!strncmp(add_command, "usec", strlen("usec"))) {
10141 type = COUNTER_USEC;
10142 goto next;
10143 }
10144 if (!strncmp(add_command, "raw", strlen("raw"))) {
10145 format = FORMAT_RAW;
10146 goto next;
10147 }
10148 if (!strncmp(add_command, "delta", strlen("delta"))) {
10149 format = FORMAT_DELTA;
10150 goto next;
10151 }
10152 if (!strncmp(add_command, "percent", strlen("percent"))) {
10153 format = FORMAT_PERCENT;
10154 goto next;
10155 }
10156
10157 BUILD_BUG_ON(ARRAY_SIZE(name_buffer) <= 18);
10158 if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) {
10159 char *eos;
10160
10161 eos = strchr(name_buffer, ',');
10162 if (eos)
10163 *eos = '\0';
10164 goto next;
10165 }
10166
10167 next:
10168 add_command = strchr(add_command, ',');
10169 if (add_command) {
10170 *add_command = '\0';
10171 add_command++;
10172 }
10173
10174 }
10175 if ((msr_num == 0) && (path == NULL) && (perf_device[0] == '\0' || perf_event[0] == '\0')) {
10176 fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter | perf/device/event) required\n");
10177 fail++;
10178 }
10179
10180 /* Test for non-empty perf_device and perf_event */
10181 const bool is_perf_counter = perf_device[0] && perf_event[0];
10182
10183 /* generate default column header */
10184 if (*name_buffer == '\0') {
10185 if (is_perf_counter) {
10186 snprintf(name_buffer, ARRAY_SIZE(name_buffer), "perf/%s", perf_event);
10187 } else {
10188 if (width == 32)
10189 sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
10190 else
10191 sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
10192 }
10193 }
10194
10195 if (is_perf_counter) {
10196 if (add_perf_counter(perf_device, perf_event, name_buffer, width, scope, type, format))
10197 fail++;
10198 } else {
10199 if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0, 0))
10200 fail++;
10201 }
10202
10203 if (fail) {
10204 help();
10205 exit(1);
10206 }
10207 }
10208
starts_with(const char * str,const char * prefix)10209 bool starts_with(const char *str, const char *prefix)
10210 {
10211 return strncmp(prefix, str, strlen(prefix)) == 0;
10212 }
10213
pmt_parse_from_path(const char * target_path,unsigned int * out_guid,unsigned int * out_seq)10214 int pmt_parse_from_path(const char *target_path, unsigned int *out_guid, unsigned int *out_seq)
10215 {
10216 struct pmt_diriter_t pmt_iter;
10217 const struct dirent *dirname;
10218 struct stat stat, target_stat;
10219 int fd_telem_dir = -1;
10220 int fd_target_dir;
10221 unsigned int seq = 0;
10222 unsigned long guid, target_guid;
10223 int ret = -1;
10224
10225 fd_target_dir = open(target_path, O_RDONLY | O_DIRECTORY);
10226 if (fd_target_dir == -1) {
10227 return -1;
10228 }
10229
10230 if (fstat(fd_target_dir, &target_stat) == -1) {
10231 fprintf(stderr, "%s: Failed to stat the target: %s", __func__, strerror(errno));
10232 exit(1);
10233 }
10234
10235 if (parse_telem_info_file(fd_target_dir, "guid", "%lx", &target_guid)) {
10236 fprintf(stderr, "%s: Failed to parse the target guid file: %s", __func__, strerror(errno));
10237 exit(1);
10238 }
10239
10240 close(fd_target_dir);
10241
10242 pmt_diriter_init(&pmt_iter);
10243
10244 for (dirname = pmt_diriter_begin(&pmt_iter, SYSFS_TELEM_PATH); dirname != NULL;
10245 dirname = pmt_diriter_next(&pmt_iter)) {
10246
10247 fd_telem_dir = openat(dirfd(pmt_iter.dir), dirname->d_name, O_RDONLY | O_DIRECTORY);
10248 if (fd_telem_dir == -1)
10249 continue;
10250
10251 if (parse_telem_info_file(fd_telem_dir, "guid", "%lx", &guid)) {
10252 fprintf(stderr, "%s: Failed to parse the guid file: %s", __func__, strerror(errno));
10253 continue;
10254 }
10255
10256 if (fstat(fd_telem_dir, &stat) == -1) {
10257 fprintf(stderr, "%s: Failed to stat %s directory: %s", __func__,
10258 dirname->d_name, strerror(errno));
10259 continue;
10260 }
10261
10262 /*
10263 * If reached the same directory as target, exit the loop.
10264 * Seq has the correct value now.
10265 */
10266 if (stat.st_dev == target_stat.st_dev && stat.st_ino == target_stat.st_ino) {
10267 ret = 0;
10268 break;
10269 }
10270
10271 /*
10272 * If reached directory with the same guid,
10273 * but it's not the target directory yet,
10274 * increment seq and continue the search.
10275 */
10276 if (guid == target_guid)
10277 ++seq;
10278
10279 close(fd_telem_dir);
10280 fd_telem_dir = -1;
10281 }
10282
10283 pmt_diriter_remove(&pmt_iter);
10284
10285 if (fd_telem_dir != -1)
10286 close(fd_telem_dir);
10287
10288 if (!ret) {
10289 *out_guid = target_guid;
10290 *out_seq = seq;
10291 }
10292
10293 return ret;
10294 }
10295
parse_add_command_pmt(char * add_command)10296 void parse_add_command_pmt(char *add_command)
10297 {
10298 char *name = NULL;
10299 char *type_name = NULL;
10300 char *format_name = NULL;
10301 char *direct_path = NULL;
10302 static const char direct_path_prefix[] = "path=";
10303 unsigned int offset;
10304 unsigned int lsb;
10305 unsigned int msb;
10306 unsigned int guid;
10307 unsigned int seq = 0; /* By default, pick first file in a sequence with a given GUID. */
10308 unsigned int domain_id;
10309 enum counter_scope scope = 0;
10310 enum pmt_datatype type = PMT_TYPE_RAW;
10311 enum counter_format format = FORMAT_RAW;
10312 bool has_offset = false;
10313 bool has_lsb = false;
10314 bool has_msb = false;
10315 bool has_format = true; /* Format has a default value. */
10316 bool has_guid = false;
10317 bool has_scope = false;
10318 bool has_type = true; /* Type has a default value. */
10319
10320 /* Consume the "pmt," prefix. */
10321 add_command = strchr(add_command, ',');
10322 if (!add_command) {
10323 help();
10324 exit(1);
10325 }
10326 ++add_command;
10327
10328 while (add_command) {
10329 if (starts_with(add_command, "name=")) {
10330 name = add_command + strlen("name=");
10331 goto next;
10332 }
10333
10334 if (starts_with(add_command, "type=")) {
10335 type_name = add_command + strlen("type=");
10336 goto next;
10337 }
10338
10339 if (starts_with(add_command, "domain=")) {
10340 const size_t prefix_len = strlen("domain=");
10341
10342 if (sscanf(add_command + prefix_len, "cpu%u", &domain_id) == 1) {
10343 scope = SCOPE_CPU;
10344 has_scope = true;
10345 } else if (sscanf(add_command + prefix_len, "core%u", &domain_id) == 1) {
10346 scope = SCOPE_CORE;
10347 has_scope = true;
10348 } else if (sscanf(add_command + prefix_len, "package%u", &domain_id) == 1) {
10349 scope = SCOPE_PACKAGE;
10350 has_scope = true;
10351 }
10352
10353 if (!has_scope) {
10354 printf("%s: invalid value for scope. Expected cpu%%u, core%%u or package%%u.\n",
10355 __func__);
10356 exit(1);
10357 }
10358
10359 goto next;
10360 }
10361
10362 if (starts_with(add_command, "format=")) {
10363 format_name = add_command + strlen("format=");
10364 goto next;
10365 }
10366
10367 if (sscanf(add_command, "offset=%u", &offset) == 1) {
10368 has_offset = true;
10369 goto next;
10370 }
10371
10372 if (sscanf(add_command, "lsb=%u", &lsb) == 1) {
10373 has_lsb = true;
10374 goto next;
10375 }
10376
10377 if (sscanf(add_command, "msb=%u", &msb) == 1) {
10378 has_msb = true;
10379 goto next;
10380 }
10381
10382 if (sscanf(add_command, "guid=%x", &guid) == 1) {
10383 has_guid = true;
10384 goto next;
10385 }
10386
10387 if (sscanf(add_command, "seq=%x", &seq) == 1)
10388 goto next;
10389
10390 if (strncmp(add_command, direct_path_prefix, strlen(direct_path_prefix)) == 0) {
10391 direct_path = add_command + strlen(direct_path_prefix);
10392 goto next;
10393 }
10394 next:
10395 add_command = strchr(add_command, ',');
10396 if (add_command) {
10397 *add_command = '\0';
10398 add_command++;
10399 }
10400 }
10401
10402 if (!name) {
10403 printf("%s: missing %s\n", __func__, "name");
10404 exit(1);
10405 }
10406
10407 if (strlen(name) >= PMT_COUNTER_NAME_SIZE_BYTES) {
10408 printf("%s: name has to be at most %d characters long\n", __func__, PMT_COUNTER_NAME_SIZE_BYTES);
10409 exit(1);
10410 }
10411
10412 if (format_name) {
10413 has_format = false;
10414
10415 if (strcmp("raw", format_name) == 0) {
10416 format = FORMAT_RAW;
10417 has_format = true;
10418 }
10419
10420 if (strcmp("delta", format_name) == 0) {
10421 format = FORMAT_DELTA;
10422 has_format = true;
10423 }
10424
10425 if (!has_format) {
10426 fprintf(stderr, "%s: Invalid format %s. Expected raw or delta\n", __func__, format_name);
10427 exit(1);
10428 }
10429 }
10430
10431 if (type_name) {
10432 has_type = false;
10433
10434 if (strcmp("raw", type_name) == 0) {
10435 type = PMT_TYPE_RAW;
10436 has_type = true;
10437 }
10438
10439 if (strcmp("txtal_time", type_name) == 0) {
10440 type = PMT_TYPE_XTAL_TIME;
10441 has_type = true;
10442 }
10443
10444 if (strcmp("tcore_clock", type_name) == 0) {
10445 type = PMT_TYPE_TCORE_CLOCK;
10446 has_type = true;
10447 }
10448
10449 if (!has_type) {
10450 printf("%s: invalid %s: %s\n", __func__, "type", type_name);
10451 exit(1);
10452 }
10453 }
10454
10455 if (!has_offset) {
10456 printf("%s : missing %s\n", __func__, "offset");
10457 exit(1);
10458 }
10459
10460 if (!has_lsb) {
10461 printf("%s: missing %s\n", __func__, "lsb");
10462 exit(1);
10463 }
10464
10465 if (!has_msb) {
10466 printf("%s: missing %s\n", __func__, "msb");
10467 exit(1);
10468 }
10469
10470 if (direct_path && has_guid) {
10471 printf("%s: path and guid+seq parameters are mutually exclusive\n"
10472 "notice: passed guid=0x%x and path=%s\n", __func__, guid, direct_path);
10473 exit(1);
10474 }
10475
10476 if (direct_path) {
10477 if (pmt_parse_from_path(direct_path, &guid, &seq)) {
10478 printf("%s: failed to parse PMT file from %s\n", __func__, direct_path);
10479 exit(1);
10480 }
10481
10482 /* GUID was just infered from the direct path. */
10483 has_guid = true;
10484 }
10485
10486 if (!has_guid) {
10487 printf("%s: missing %s\n", __func__, "guid or path");
10488 exit(1);
10489 }
10490
10491 if (!has_scope) {
10492 printf("%s: missing %s\n", __func__, "scope");
10493 exit(1);
10494 }
10495
10496 if (lsb > msb) {
10497 printf("%s: lsb > msb doesn't make sense\n", __func__);
10498 exit(1);
10499 }
10500
10501 pmt_add_counter(guid, seq, name, type, lsb, msb, offset, scope, format, domain_id, PMT_OPEN_REQUIRED);
10502 }
10503
parse_add_command(char * add_command)10504 void parse_add_command(char *add_command)
10505 {
10506 if (strncmp(add_command, "pmt", strlen("pmt")) == 0)
10507 return parse_add_command_pmt(add_command);
10508 return parse_add_command_msr(add_command);
10509 }
10510
is_deferred_add(char * name)10511 int is_deferred_add(char *name)
10512 {
10513 int i;
10514
10515 for (i = 0; i < deferred_add_index; ++i)
10516 if (!strcmp(name, deferred_add_names[i]))
10517 return 1;
10518 return 0;
10519 }
10520
is_deferred_skip(char * name)10521 int is_deferred_skip(char *name)
10522 {
10523 int i;
10524
10525 for (i = 0; i < deferred_skip_index; ++i)
10526 if (!strcmp(name, deferred_skip_names[i]))
10527 return 1;
10528 return 0;
10529 }
10530
probe_cpuidle_residency(void)10531 void probe_cpuidle_residency(void)
10532 {
10533 char path[64];
10534 char name_buf[16];
10535 FILE *input;
10536 int state;
10537 int min_state = 1024, max_state = 0;
10538 char *sp;
10539
10540 if (!DO_BIC(BIC_pct_idle))
10541 return;
10542
10543 for (state = 10; state >= 0; --state) {
10544
10545 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state);
10546 input = fopen(path, "r");
10547 if (input == NULL)
10548 continue;
10549 if (!fgets(name_buf, sizeof(name_buf), input))
10550 err(1, "%s: failed to read file", path);
10551
10552 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
10553 sp = strchr(name_buf, '-');
10554 if (!sp)
10555 sp = strchrnul(name_buf, '\n');
10556 *sp = '%';
10557 *(sp + 1) = '\0';
10558
10559 remove_underbar(name_buf);
10560
10561 fclose(input);
10562
10563 sprintf(path, "cpuidle/state%d/time", state);
10564
10565 if (!DO_BIC(BIC_pct_idle) && !is_deferred_add(name_buf))
10566 continue;
10567
10568 if (is_deferred_skip(name_buf))
10569 continue;
10570
10571 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU, 0);
10572
10573 if (state > max_state)
10574 max_state = state;
10575 if (state < min_state)
10576 min_state = state;
10577 }
10578 }
10579
probe_cpuidle_counts(void)10580 void probe_cpuidle_counts(void)
10581 {
10582 char path[64];
10583 char name_buf[16];
10584 FILE *input;
10585 int state;
10586 int min_state = 1024, max_state = 0;
10587 char *sp;
10588
10589 if (!DO_BIC(BIC_cpuidle))
10590 return;
10591
10592 for (state = 10; state >= 0; --state) {
10593
10594 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state);
10595 input = fopen(path, "r");
10596 if (input == NULL)
10597 continue;
10598 if (!fgets(name_buf, sizeof(name_buf), input))
10599 err(1, "%s: failed to read file", path);
10600 fclose(input);
10601
10602 remove_underbar(name_buf);
10603
10604 if (!DO_BIC(BIC_cpuidle) && !is_deferred_add(name_buf))
10605 continue;
10606
10607 if (is_deferred_skip(name_buf))
10608 continue;
10609
10610 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
10611 sp = strchr(name_buf, '-');
10612 if (!sp)
10613 sp = strchrnul(name_buf, '\n');
10614
10615 /*
10616 * The 'below' sysfs file always contains 0 for the deepest state (largest index),
10617 * do not add it.
10618 */
10619 if (state != max_state) {
10620 /*
10621 * Add 'C1+' for C1, and so on. The 'below' sysfs file always contains 0 for
10622 * the last state, so do not add it.
10623 */
10624
10625 *sp = '+';
10626 *(sp + 1) = '\0';
10627 sprintf(path, "cpuidle/state%d/below", state);
10628 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0);
10629 }
10630
10631 *sp = '\0';
10632 sprintf(path, "cpuidle/state%d/usage", state);
10633 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0);
10634
10635 /*
10636 * The 'above' sysfs file always contains 0 for the shallowest state (smallest
10637 * index), do not add it.
10638 */
10639 if (state != min_state) {
10640 *sp = '-';
10641 *(sp + 1) = '\0';
10642 sprintf(path, "cpuidle/state%d/above", state);
10643 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0);
10644 }
10645 }
10646 }
10647
10648 /*
10649 * parse cpuset with following syntax
10650 * 1,2,4..6,8-10 and set bits in cpu_subset
10651 */
parse_cpu_command(char * optarg)10652 void parse_cpu_command(char *optarg)
10653 {
10654 if (!strcmp(optarg, "core")) {
10655 if (cpu_subset)
10656 goto error;
10657 show_core_only++;
10658 return;
10659 }
10660 if (!strcmp(optarg, "package")) {
10661 if (cpu_subset)
10662 goto error;
10663 show_pkg_only++;
10664 return;
10665 }
10666 if (show_core_only || show_pkg_only)
10667 goto error;
10668
10669 cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS);
10670 if (cpu_subset == NULL)
10671 err(3, "CPU_ALLOC");
10672 cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS);
10673
10674 CPU_ZERO_S(cpu_subset_size, cpu_subset);
10675
10676 if (parse_cpu_str(optarg, cpu_subset, cpu_subset_size))
10677 goto error;
10678
10679 return;
10680
10681 error:
10682 fprintf(stderr, "\"--cpu %s\" malformed\n", optarg);
10683 help();
10684 exit(-1);
10685 }
10686
cmdline(int argc,char ** argv)10687 void cmdline(int argc, char **argv)
10688 {
10689 int opt;
10690 int option_index = 0;
10691 static struct option long_options[] = {
10692 { "add", required_argument, 0, 'a' },
10693 { "cpu", required_argument, 0, 'c' },
10694 { "Dump", no_argument, 0, 'D' },
10695 { "debug", no_argument, 0, 'd' }, /* internal, not documented */
10696 { "enable", required_argument, 0, 'e' },
10697 { "force", no_argument, 0, 'f' },
10698 { "interval", required_argument, 0, 'i' },
10699 { "IPC", no_argument, 0, 'I' },
10700 { "num_iterations", required_argument, 0, 'n' },
10701 { "header_iterations", required_argument, 0, 'N' },
10702 { "help", no_argument, 0, 'h' },
10703 { "hide", required_argument, 0, 'H' }, // meh, -h taken by --help
10704 { "Joules", no_argument, 0, 'J' },
10705 { "list", no_argument, 0, 'l' },
10706 { "out", required_argument, 0, 'o' },
10707 { "quiet", no_argument, 0, 'q' },
10708 { "no-msr", no_argument, 0, 'M' },
10709 { "no-perf", no_argument, 0, 'P' },
10710 { "show", required_argument, 0, 's' },
10711 { "Summary", no_argument, 0, 'S' },
10712 { "TCC", required_argument, 0, 'T' },
10713 { "version", no_argument, 0, 'v' },
10714 { 0, 0, 0, 0 }
10715 };
10716
10717 progname = argv[0];
10718
10719 /*
10720 * Parse some options early, because they may make other options invalid,
10721 * like adding the MSR counter with --add and at the same time using --no-msr.
10722 */
10723 while ((opt = getopt_long_only(argc, argv, "+MPn:", long_options, &option_index)) != -1) {
10724 switch (opt) {
10725 case 'M':
10726 no_msr = 1;
10727 break;
10728 case 'P':
10729 no_perf = 1;
10730 break;
10731 default:
10732 break;
10733 }
10734 }
10735 optind = 0;
10736
10737 while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qMST:v", long_options, &option_index)) != -1) {
10738 switch (opt) {
10739 case 'a':
10740 parse_add_command(optarg);
10741 break;
10742 case 'c':
10743 parse_cpu_command(optarg);
10744 break;
10745 case 'D':
10746 dump_only++;
10747 /*
10748 * Force the no_perf early to prevent using it as a source.
10749 * User asks for raw values, but perf returns them relative
10750 * to the opening of the file descriptor.
10751 */
10752 no_perf = 1;
10753 break;
10754 case 'e':
10755 /* --enable specified counter */
10756 bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST);
10757 break;
10758 case 'f':
10759 force_load++;
10760 break;
10761 case 'd':
10762 debug++;
10763 ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
10764 break;
10765 case 'H':
10766 /*
10767 * --hide: do not show those specified
10768 * multiple invocations simply clear more bits in enabled mask
10769 */
10770 bic_enabled &= ~bic_lookup(optarg, HIDE_LIST);
10771 break;
10772 case 'h':
10773 default:
10774 help();
10775 exit(1);
10776 case 'i':
10777 {
10778 double interval = strtod(optarg, NULL);
10779
10780 if (interval < 0.001) {
10781 fprintf(outf, "interval %f seconds is too small\n", interval);
10782 exit(2);
10783 }
10784
10785 interval_tv.tv_sec = interval_ts.tv_sec = interval;
10786 interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000;
10787 interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
10788 }
10789 break;
10790 case 'J':
10791 rapl_joules++;
10792 break;
10793 case 'l':
10794 ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
10795 list_header_only++;
10796 quiet++;
10797 break;
10798 case 'o':
10799 outf = fopen_or_die(optarg, "w");
10800 break;
10801 case 'q':
10802 quiet = 1;
10803 break;
10804 case 'M':
10805 case 'P':
10806 /* Parsed earlier */
10807 break;
10808 case 'n':
10809 num_iterations = strtod(optarg, NULL);
10810
10811 if (num_iterations <= 0) {
10812 fprintf(outf, "iterations %d should be positive number\n", num_iterations);
10813 exit(2);
10814 }
10815 break;
10816 case 'N':
10817 header_iterations = strtod(optarg, NULL);
10818
10819 if (header_iterations <= 0) {
10820 fprintf(outf, "iterations %d should be positive number\n", header_iterations);
10821 exit(2);
10822 }
10823 break;
10824 case 's':
10825 /*
10826 * --show: show only those specified
10827 * The 1st invocation will clear and replace the enabled mask
10828 * subsequent invocations can add to it.
10829 */
10830 if (shown == 0)
10831 bic_enabled = bic_lookup(optarg, SHOW_LIST);
10832 else
10833 bic_enabled |= bic_lookup(optarg, SHOW_LIST);
10834 shown = 1;
10835 break;
10836 case 'S':
10837 summary_only++;
10838 break;
10839 case 'T':
10840 tj_max_override = atoi(optarg);
10841 break;
10842 case 'v':
10843 print_version();
10844 exit(0);
10845 break;
10846 }
10847 }
10848 }
10849
set_rlimit(void)10850 void set_rlimit(void)
10851 {
10852 struct rlimit limit;
10853
10854 if (getrlimit(RLIMIT_NOFILE, &limit) < 0)
10855 err(1, "Failed to get rlimit");
10856
10857 if (limit.rlim_max < MAX_NOFILE)
10858 limit.rlim_max = MAX_NOFILE;
10859 if (limit.rlim_cur < MAX_NOFILE)
10860 limit.rlim_cur = MAX_NOFILE;
10861
10862 if (setrlimit(RLIMIT_NOFILE, &limit) < 0)
10863 err(1, "Failed to set rlimit");
10864 }
10865
main(int argc,char ** argv)10866 int main(int argc, char **argv)
10867 {
10868 int fd, ret;
10869
10870 fd = open("/sys/fs/cgroup/cgroup.procs", O_WRONLY);
10871 if (fd < 0)
10872 goto skip_cgroup_setting;
10873
10874 ret = write(fd, "0\n", 2);
10875 if (ret == -1)
10876 perror("Can't update cgroup\n");
10877
10878 close(fd);
10879
10880 skip_cgroup_setting:
10881 outf = stderr;
10882 cmdline(argc, argv);
10883
10884 if (!quiet) {
10885 print_version();
10886 print_bootcmd();
10887 }
10888
10889 probe_cpuidle_residency();
10890 probe_cpuidle_counts();
10891
10892 if (!getuid())
10893 set_rlimit();
10894
10895 turbostat_init();
10896
10897 if (!no_msr)
10898 msr_sum_record();
10899
10900 /* dump counters and exit */
10901 if (dump_only)
10902 return get_and_dump_counters();
10903
10904 /* list header and exit */
10905 if (list_header_only) {
10906 print_header(",");
10907 flush_output_stdout();
10908 return 0;
10909 }
10910
10911 /*
10912 * if any params left, it must be a command to fork
10913 */
10914 if (argc - optind)
10915 return fork_it(argv + optind);
10916 else
10917 turbostat_loop();
10918
10919 return 0;
10920 }
10921