1 // SPDX-License-Identifier: GPL-2.0-only
2 #include "cgroup.h"
3 #include "counts.h"
4 #include "cputopo.h"
5 #include "evsel.h"
6 #include "pmu.h"
7 #include "print-events.h"
8 #include "smt.h"
9 #include "stat.h"
10 #include "time-utils.h"
11 #include "tool_pmu.h"
12 #include "tsc.h"
13 #include <api/fs/fs.h>
14 #include <api/io.h>
15 #include <internal/threadmap.h>
16 #include <perf/threadmap.h>
17 #include <fcntl.h>
18 #include <strings.h>
19
20 static const char *const tool_pmu__event_names[TOOL_PMU__EVENT_MAX] = {
21 NULL,
22 "duration_time",
23 "user_time",
24 "system_time",
25 "has_pmem",
26 "num_cores",
27 "num_cpus",
28 "num_cpus_online",
29 "num_dies",
30 "num_packages",
31 "slots",
32 "smt_on",
33 "system_tsc_freq",
34 "core_wide",
35 "target_cpu",
36 };
37
tool_pmu__skip_event(const char * name __maybe_unused)38 bool tool_pmu__skip_event(const char *name __maybe_unused)
39 {
40 #if !defined(__aarch64__)
41 /* The slots event should only appear on arm64. */
42 if (strcasecmp(name, "slots") == 0)
43 return true;
44 #endif
45 #if !defined(__i386__) && !defined(__x86_64__)
46 /* The system_tsc_freq event should only appear on x86. */
47 if (strcasecmp(name, "system_tsc_freq") == 0)
48 return true;
49 #endif
50 return false;
51 }
52
tool_pmu__num_skip_events(void)53 int tool_pmu__num_skip_events(void)
54 {
55 int num = 0;
56
57 #if !defined(__aarch64__)
58 num++;
59 #endif
60 #if !defined(__i386__) && !defined(__x86_64__)
61 num++;
62 #endif
63 return num;
64 }
65
tool_pmu__event_to_str(enum tool_pmu_event ev)66 const char *tool_pmu__event_to_str(enum tool_pmu_event ev)
67 {
68 if ((ev > TOOL_PMU__EVENT_NONE && ev < TOOL_PMU__EVENT_MAX) &&
69 !tool_pmu__skip_event(tool_pmu__event_names[ev]))
70 return tool_pmu__event_names[ev];
71
72 return NULL;
73 }
74
tool_pmu__str_to_event(const char * str)75 enum tool_pmu_event tool_pmu__str_to_event(const char *str)
76 {
77 int i;
78
79 if (tool_pmu__skip_event(str))
80 return TOOL_PMU__EVENT_NONE;
81
82 tool_pmu__for_each_event(i) {
83 if (!strcasecmp(str, tool_pmu__event_names[i]))
84 return i;
85 }
86 return TOOL_PMU__EVENT_NONE;
87 }
88
perf_pmu__is_tool(const struct perf_pmu * pmu)89 bool perf_pmu__is_tool(const struct perf_pmu *pmu)
90 {
91 return pmu && pmu->type == PERF_PMU_TYPE_TOOL;
92 }
93
evsel__is_tool(const struct evsel * evsel)94 bool evsel__is_tool(const struct evsel *evsel)
95 {
96 return perf_pmu__is_tool(evsel->pmu);
97 }
98
evsel__tool_event(const struct evsel * evsel)99 enum tool_pmu_event evsel__tool_event(const struct evsel *evsel)
100 {
101 if (!evsel__is_tool(evsel))
102 return TOOL_PMU__EVENT_NONE;
103
104 return (enum tool_pmu_event)evsel->core.attr.config;
105 }
106
evsel__tool_pmu_event_name(const struct evsel * evsel)107 const char *evsel__tool_pmu_event_name(const struct evsel *evsel)
108 {
109 return tool_pmu__event_to_str(evsel->core.attr.config);
110 }
111
read_until_char(struct io * io,char e)112 static bool read_until_char(struct io *io, char e)
113 {
114 int c;
115
116 do {
117 c = io__get_char(io);
118 if (c == -1)
119 return false;
120 } while (c != e);
121 return true;
122 }
123
read_stat_field(int fd,struct perf_cpu cpu,int field,__u64 * val)124 static int read_stat_field(int fd, struct perf_cpu cpu, int field, __u64 *val)
125 {
126 char buf[256];
127 struct io io;
128 int i;
129
130 io__init(&io, fd, buf, sizeof(buf));
131
132 /* Skip lines to relevant CPU. */
133 for (i = -1; i < cpu.cpu; i++) {
134 if (!read_until_char(&io, '\n'))
135 return -EINVAL;
136 }
137 /* Skip to "cpu". */
138 if (io__get_char(&io) != 'c') return -EINVAL;
139 if (io__get_char(&io) != 'p') return -EINVAL;
140 if (io__get_char(&io) != 'u') return -EINVAL;
141
142 /* Skip N of cpuN. */
143 if (!read_until_char(&io, ' '))
144 return -EINVAL;
145
146 i = 1;
147 while (true) {
148 if (io__get_dec(&io, val) != ' ')
149 break;
150 if (field == i)
151 return 0;
152 i++;
153 }
154 return -EINVAL;
155 }
156
read_pid_stat_field(int fd,int field,__u64 * val)157 static int read_pid_stat_field(int fd, int field, __u64 *val)
158 {
159 char buf[256];
160 struct io io;
161 int c, i;
162
163 io__init(&io, fd, buf, sizeof(buf));
164 if (io__get_dec(&io, val) != ' ')
165 return -EINVAL;
166 if (field == 1)
167 return 0;
168
169 /* Skip comm. */
170 if (io__get_char(&io) != '(' || !read_until_char(&io, ')'))
171 return -EINVAL;
172 if (field == 2)
173 return -EINVAL; /* String can't be returned. */
174
175 /* Skip state */
176 if (io__get_char(&io) != ' ' || io__get_char(&io) == -1)
177 return -EINVAL;
178 if (field == 3)
179 return -EINVAL; /* String can't be returned. */
180
181 /* Loop over numeric fields*/
182 if (io__get_char(&io) != ' ')
183 return -EINVAL;
184
185 i = 4;
186 while (true) {
187 c = io__get_dec(&io, val);
188 if (c == -1)
189 return -EINVAL;
190 if (c == -2) {
191 /* Assume a -ve was read */
192 c = io__get_dec(&io, val);
193 *val *= -1;
194 }
195 if (c != ' ')
196 return -EINVAL;
197 if (field == i)
198 return 0;
199 i++;
200 }
201 return -EINVAL;
202 }
203
evsel__tool_pmu_prepare_open(struct evsel * evsel,struct perf_cpu_map * cpus,int nthreads)204 int evsel__tool_pmu_prepare_open(struct evsel *evsel,
205 struct perf_cpu_map *cpus,
206 int nthreads)
207 {
208 if ((evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME ||
209 evsel__tool_event(evsel) == TOOL_PMU__EVENT_USER_TIME) &&
210 !evsel->start_times) {
211 evsel->start_times = xyarray__new(perf_cpu_map__nr(cpus),
212 nthreads,
213 sizeof(__u64));
214 if (!evsel->start_times)
215 return -ENOMEM;
216 }
217 return 0;
218 }
219
220 #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
221
evsel__tool_pmu_open(struct evsel * evsel,struct perf_thread_map * threads,int start_cpu_map_idx,int end_cpu_map_idx)222 int evsel__tool_pmu_open(struct evsel *evsel,
223 struct perf_thread_map *threads,
224 int start_cpu_map_idx, int end_cpu_map_idx)
225 {
226 enum tool_pmu_event ev = evsel__tool_event(evsel);
227 int pid = -1, idx = 0, thread = 0, nthreads, err = 0, old_errno;
228
229 if (ev == TOOL_PMU__EVENT_NUM_CPUS)
230 return 0;
231
232 if (ev == TOOL_PMU__EVENT_DURATION_TIME) {
233 if (evsel->core.attr.sample_period) /* no sampling */
234 return -EINVAL;
235 evsel->start_time = rdclock();
236 return 0;
237 }
238
239 if (evsel->cgrp)
240 pid = evsel->cgrp->fd;
241
242 nthreads = perf_thread_map__nr(threads);
243 for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) {
244 for (thread = 0; thread < nthreads; thread++) {
245 if (!evsel->cgrp && !evsel->core.system_wide)
246 pid = perf_thread_map__pid(threads, thread);
247
248 if (ev == TOOL_PMU__EVENT_USER_TIME || ev == TOOL_PMU__EVENT_SYSTEM_TIME) {
249 bool system = ev == TOOL_PMU__EVENT_SYSTEM_TIME;
250 __u64 *start_time = NULL;
251 int fd;
252
253 if (evsel->core.attr.sample_period) {
254 /* no sampling */
255 err = -EINVAL;
256 goto out_close;
257 }
258 if (pid > -1) {
259 char buf[64];
260
261 snprintf(buf, sizeof(buf), "/proc/%d/stat", pid);
262 fd = open(buf, O_RDONLY);
263 evsel->pid_stat = true;
264 } else {
265 fd = open("/proc/stat", O_RDONLY);
266 }
267 FD(evsel, idx, thread) = fd;
268 if (fd < 0) {
269 err = -errno;
270 goto out_close;
271 }
272 start_time = xyarray__entry(evsel->start_times, idx, thread);
273 if (pid > -1) {
274 err = read_pid_stat_field(fd, system ? 15 : 14,
275 start_time);
276 } else {
277 struct perf_cpu cpu;
278
279 cpu = perf_cpu_map__cpu(evsel->core.cpus, idx);
280 err = read_stat_field(fd, cpu, system ? 3 : 1,
281 start_time);
282 }
283 if (err)
284 goto out_close;
285 }
286
287 }
288 }
289 return 0;
290 out_close:
291 if (err)
292 threads->err_thread = thread;
293
294 old_errno = errno;
295 do {
296 while (--thread >= 0) {
297 if (FD(evsel, idx, thread) >= 0)
298 close(FD(evsel, idx, thread));
299 FD(evsel, idx, thread) = -1;
300 }
301 thread = nthreads;
302 } while (--idx >= 0);
303 errno = old_errno;
304 return err;
305 }
306
307 #if !defined(__i386__) && !defined(__x86_64__)
arch_get_tsc_freq(void)308 u64 arch_get_tsc_freq(void)
309 {
310 return 0;
311 }
312 #endif
313
314 #if !defined(__aarch64__)
tool_pmu__cpu_slots_per_cycle(void)315 u64 tool_pmu__cpu_slots_per_cycle(void)
316 {
317 return 0;
318 }
319 #endif
320
has_pmem(void)321 static bool has_pmem(void)
322 {
323 static bool has_pmem, cached;
324 const char *sysfs = sysfs__mountpoint();
325 char path[PATH_MAX];
326
327 if (!cached) {
328 snprintf(path, sizeof(path), "%s/firmware/acpi/tables/NFIT", sysfs);
329 has_pmem = access(path, F_OK) == 0;
330 cached = true;
331 }
332 return has_pmem;
333 }
334
tool_pmu__read_event(enum tool_pmu_event ev,struct evsel * evsel,bool system_wide,const char * user_requested_cpu_list,u64 * result)335 bool tool_pmu__read_event(enum tool_pmu_event ev,
336 struct evsel *evsel,
337 bool system_wide,
338 const char *user_requested_cpu_list,
339 u64 *result)
340 {
341 const struct cpu_topology *topology;
342
343 switch (ev) {
344 case TOOL_PMU__EVENT_HAS_PMEM:
345 *result = has_pmem() ? 1 : 0;
346 return true;
347
348 case TOOL_PMU__EVENT_NUM_CORES:
349 topology = online_topology();
350 *result = topology->core_cpus_lists;
351 return true;
352
353 case TOOL_PMU__EVENT_NUM_CPUS:
354 if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) {
355 /* No evsel to be specific to. */
356 *result = cpu__max_present_cpu().cpu;
357 } else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) {
358 /* Evsel just has specific CPUs. */
359 *result = perf_cpu_map__nr(evsel->core.cpus);
360 } else {
361 /*
362 * "Any CPU" event that can be scheduled on any CPU in
363 * the PMU's cpumask. The PMU cpumask should be saved in
364 * pmu_cpus. If not present fall back to max.
365 */
366 if (!perf_cpu_map__is_empty(evsel->core.pmu_cpus))
367 *result = perf_cpu_map__nr(evsel->core.pmu_cpus);
368 else
369 *result = cpu__max_present_cpu().cpu;
370 }
371 return true;
372
373 case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: {
374 struct perf_cpu_map *online = cpu_map__online();
375
376 if (!online)
377 return false;
378
379 if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) {
380 /* No evsel to be specific to. */
381 *result = perf_cpu_map__nr(online);
382 } else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) {
383 /* Evsel just has specific CPUs. */
384 struct perf_cpu_map *tmp =
385 perf_cpu_map__intersect(online, evsel->core.cpus);
386
387 *result = perf_cpu_map__nr(tmp);
388 perf_cpu_map__put(tmp);
389 } else {
390 /*
391 * "Any CPU" event that can be scheduled on any CPU in
392 * the PMU's cpumask. The PMU cpumask should be saved in
393 * pmu_cpus, if not present then just the online cpu
394 * mask.
395 */
396 if (!perf_cpu_map__is_empty(evsel->core.pmu_cpus)) {
397 struct perf_cpu_map *tmp =
398 perf_cpu_map__intersect(online, evsel->core.pmu_cpus);
399
400 *result = perf_cpu_map__nr(tmp);
401 perf_cpu_map__put(tmp);
402 } else {
403 *result = perf_cpu_map__nr(online);
404 }
405 }
406 perf_cpu_map__put(online);
407 return true;
408 }
409 case TOOL_PMU__EVENT_NUM_DIES:
410 topology = online_topology();
411 *result = topology->die_cpus_lists;
412 return true;
413
414 case TOOL_PMU__EVENT_NUM_PACKAGES:
415 topology = online_topology();
416 *result = topology->package_cpus_lists;
417 return true;
418
419 case TOOL_PMU__EVENT_SLOTS:
420 *result = tool_pmu__cpu_slots_per_cycle();
421 return *result ? true : false;
422
423 case TOOL_PMU__EVENT_SMT_ON:
424 *result = smt_on() ? 1 : 0;
425 return true;
426
427 case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ:
428 *result = arch_get_tsc_freq();
429 return true;
430
431 case TOOL_PMU__EVENT_CORE_WIDE:
432 *result = core_wide(system_wide, user_requested_cpu_list) ? 1 : 0;
433 return true;
434
435 case TOOL_PMU__EVENT_TARGET_CPU:
436 *result = system_wide || (user_requested_cpu_list != NULL) ? 1 : 0;
437 return true;
438
439 case TOOL_PMU__EVENT_NONE:
440 case TOOL_PMU__EVENT_DURATION_TIME:
441 case TOOL_PMU__EVENT_USER_TIME:
442 case TOOL_PMU__EVENT_SYSTEM_TIME:
443 case TOOL_PMU__EVENT_MAX:
444 default:
445 return false;
446 }
447 }
448
perf_counts__update(struct perf_counts_values * count,const struct perf_counts_values * old_count,bool raw,u64 val)449 static void perf_counts__update(struct perf_counts_values *count,
450 const struct perf_counts_values *old_count,
451 bool raw, u64 val)
452 {
453 /*
454 * The values of enabled and running must make a ratio of 100%. The
455 * exact values don't matter as long as they are non-zero to avoid
456 * issues with evsel__count_has_error.
457 */
458 if (old_count) {
459 count->val = raw ? val : old_count->val + val;
460 count->run = old_count->run + 1;
461 count->ena = old_count->ena + 1;
462 count->lost = old_count->lost;
463 } else {
464 count->val = val;
465 count->run++;
466 count->ena++;
467 count->lost = 0;
468 }
469 }
470
evsel__tool_pmu_read(struct evsel * evsel,int cpu_map_idx,int thread)471 int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread)
472 {
473 __u64 *start_time, cur_time, delta_start;
474 int err = 0;
475 struct perf_counts_values *count, *old_count = NULL;
476 bool adjust = false;
477 enum tool_pmu_event ev = evsel__tool_event(evsel);
478
479 count = perf_counts(evsel->counts, cpu_map_idx, thread);
480 if (evsel->prev_raw_counts)
481 old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread);
482
483 switch (ev) {
484 case TOOL_PMU__EVENT_HAS_PMEM:
485 case TOOL_PMU__EVENT_NUM_CORES:
486 case TOOL_PMU__EVENT_NUM_CPUS:
487 case TOOL_PMU__EVENT_NUM_CPUS_ONLINE:
488 case TOOL_PMU__EVENT_NUM_DIES:
489 case TOOL_PMU__EVENT_NUM_PACKAGES:
490 case TOOL_PMU__EVENT_SLOTS:
491 case TOOL_PMU__EVENT_SMT_ON:
492 case TOOL_PMU__EVENT_CORE_WIDE:
493 case TOOL_PMU__EVENT_TARGET_CPU:
494 case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: {
495 u64 val = 0;
496
497 if (cpu_map_idx == 0 && thread == 0) {
498 if (!tool_pmu__read_event(ev, evsel,
499 stat_config.system_wide,
500 stat_config.user_requested_cpu_list,
501 &val)) {
502 count->lost++;
503 val = 0;
504 }
505 }
506 perf_counts__update(count, old_count, /*raw=*/false, val);
507 return 0;
508 }
509 case TOOL_PMU__EVENT_DURATION_TIME:
510 /*
511 * Pretend duration_time is only on the first CPU and thread, or
512 * else aggregation will scale duration_time by the number of
513 * CPUs/threads.
514 */
515 start_time = &evsel->start_time;
516 if (cpu_map_idx == 0 && thread == 0)
517 cur_time = rdclock();
518 else
519 cur_time = *start_time;
520 break;
521 case TOOL_PMU__EVENT_USER_TIME:
522 case TOOL_PMU__EVENT_SYSTEM_TIME: {
523 bool system = evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME;
524 int fd = FD(evsel, cpu_map_idx, thread);
525
526 start_time = xyarray__entry(evsel->start_times, cpu_map_idx, thread);
527 lseek(fd, SEEK_SET, 0);
528 if (evsel->pid_stat) {
529 /* The event exists solely on 1 CPU. */
530 if (cpu_map_idx == 0)
531 err = read_pid_stat_field(fd, system ? 15 : 14, &cur_time);
532 else
533 cur_time = 0;
534 } else {
535 /* The event is for all threads. */
536 if (thread == 0) {
537 struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus,
538 cpu_map_idx);
539
540 err = read_stat_field(fd, cpu, system ? 3 : 1, &cur_time);
541 } else {
542 cur_time = 0;
543 }
544 }
545 adjust = true;
546 break;
547 }
548 case TOOL_PMU__EVENT_NONE:
549 case TOOL_PMU__EVENT_MAX:
550 default:
551 err = -EINVAL;
552 }
553 if (err)
554 return err;
555
556 delta_start = cur_time - *start_time;
557 if (adjust) {
558 __u64 ticks_per_sec = sysconf(_SC_CLK_TCK);
559
560 delta_start *= 1e9 / ticks_per_sec;
561 }
562 perf_counts__update(count, old_count, /*raw=*/true, delta_start);
563 return 0;
564 }
565
tool_pmu__new(void)566 struct perf_pmu *tool_pmu__new(void)
567 {
568 struct perf_pmu *tool = zalloc(sizeof(struct perf_pmu));
569
570 if (!tool)
571 return NULL;
572
573 if (perf_pmu__init(tool, PERF_PMU_TYPE_TOOL, "tool") != 0) {
574 perf_pmu__delete(tool);
575 return NULL;
576 }
577 tool->events_table = find_core_events_table("common", "common");
578 return tool;
579 }
580