1 // SPDX-License-Identifier: GPL-2.0-only
2 #include "cgroup.h"
3 #include "counts.h"
4 #include "cputopo.h"
5 #include "debug.h"
6 #include "evsel.h"
7 #include "pmu.h"
8 #include "print-events.h"
9 #include "smt.h"
10 #include "stat.h"
11 #include "time-utils.h"
12 #include "tool_pmu.h"
13 #include "tsc.h"
14 #include <api/fs/fs.h>
15 #include <api/io.h>
16 #include <internal/threadmap.h>
17 #include <perf/cpumap.h>
18 #include <perf/threadmap.h>
19 #include <fcntl.h>
20 #include <strings.h>
21
22 static const char *const tool_pmu__event_names[TOOL_PMU__EVENT_MAX] = {
23 NULL,
24 "duration_time",
25 "user_time",
26 "system_time",
27 "has_pmem",
28 "num_cores",
29 "num_cpus",
30 "num_cpus_online",
31 "num_dies",
32 "num_packages",
33 "slots",
34 "smt_on",
35 "system_tsc_freq",
36 "core_wide",
37 "target_cpu",
38 };
39
tool_pmu__skip_event(const char * name __maybe_unused)40 bool tool_pmu__skip_event(const char *name __maybe_unused)
41 {
42 #if !defined(__aarch64__)
43 /* The slots event should only appear on arm64. */
44 if (strcasecmp(name, "slots") == 0)
45 return true;
46 #endif
47 #if !defined(__i386__) && !defined(__x86_64__)
48 /* The system_tsc_freq event should only appear on x86. */
49 if (strcasecmp(name, "system_tsc_freq") == 0)
50 return true;
51 #endif
52 return false;
53 }
54
tool_pmu__num_skip_events(void)55 int tool_pmu__num_skip_events(void)
56 {
57 int num = 0;
58
59 #if !defined(__aarch64__)
60 num++;
61 #endif
62 #if !defined(__i386__) && !defined(__x86_64__)
63 num++;
64 #endif
65 return num;
66 }
67
tool_pmu__event_to_str(enum tool_pmu_event ev)68 const char *tool_pmu__event_to_str(enum tool_pmu_event ev)
69 {
70 if ((ev > TOOL_PMU__EVENT_NONE && ev < TOOL_PMU__EVENT_MAX) &&
71 !tool_pmu__skip_event(tool_pmu__event_names[ev]))
72 return tool_pmu__event_names[ev];
73
74 return NULL;
75 }
76
tool_pmu__str_to_event(const char * str)77 enum tool_pmu_event tool_pmu__str_to_event(const char *str)
78 {
79 int i;
80
81 if (tool_pmu__skip_event(str))
82 return TOOL_PMU__EVENT_NONE;
83
84 tool_pmu__for_each_event(i) {
85 if (!strcasecmp(str, tool_pmu__event_names[i]))
86 return i;
87 }
88 return TOOL_PMU__EVENT_NONE;
89 }
90
perf_pmu__is_tool(const struct perf_pmu * pmu)91 bool perf_pmu__is_tool(const struct perf_pmu *pmu)
92 {
93 return pmu && pmu->type == PERF_PMU_TYPE_TOOL;
94 }
95
evsel__is_tool(const struct evsel * evsel)96 bool evsel__is_tool(const struct evsel *evsel)
97 {
98 return perf_pmu__is_tool(evsel->pmu);
99 }
100
evsel__tool_event(const struct evsel * evsel)101 enum tool_pmu_event evsel__tool_event(const struct evsel *evsel)
102 {
103 if (!evsel__is_tool(evsel))
104 return TOOL_PMU__EVENT_NONE;
105
106 return (enum tool_pmu_event)evsel->core.attr.config;
107 }
108
evsel__tool_pmu_event_name(const struct evsel * evsel)109 const char *evsel__tool_pmu_event_name(const struct evsel *evsel)
110 {
111 return tool_pmu__event_to_str(evsel->core.attr.config);
112 }
113
tool_pmu__cpus(struct perf_event_attr * attr)114 struct perf_cpu_map *tool_pmu__cpus(struct perf_event_attr *attr)
115 {
116 static struct perf_cpu_map *cpu0_map;
117 enum tool_pmu_event event = (enum tool_pmu_event)attr->config;
118
119 if (event <= TOOL_PMU__EVENT_NONE || event >= TOOL_PMU__EVENT_MAX) {
120 pr_err("Invalid tool PMU event config %llx\n", attr->config);
121 return NULL;
122 }
123 if (event == TOOL_PMU__EVENT_USER_TIME || event == TOOL_PMU__EVENT_SYSTEM_TIME)
124 return cpu_map__online();
125
126 if (!cpu0_map)
127 cpu0_map = perf_cpu_map__new_int(0);
128 return perf_cpu_map__get(cpu0_map);
129 }
130
read_until_char(struct io * io,char e)131 static bool read_until_char(struct io *io, char e)
132 {
133 int c;
134
135 do {
136 c = io__get_char(io);
137 if (c == -1)
138 return false;
139 } while (c != e);
140 return true;
141 }
142
read_stat_field(int fd,struct perf_cpu cpu,int field,__u64 * val)143 static int read_stat_field(int fd, struct perf_cpu cpu, int field, __u64 *val)
144 {
145 char buf[256];
146 struct io io;
147 int i;
148
149 io__init(&io, fd, buf, sizeof(buf));
150
151 /* Skip lines to relevant CPU. */
152 for (i = -1; i < cpu.cpu; i++) {
153 if (!read_until_char(&io, '\n'))
154 return -EINVAL;
155 }
156 /* Skip to "cpu". */
157 if (io__get_char(&io) != 'c') return -EINVAL;
158 if (io__get_char(&io) != 'p') return -EINVAL;
159 if (io__get_char(&io) != 'u') return -EINVAL;
160
161 /* Skip N of cpuN. */
162 if (!read_until_char(&io, ' '))
163 return -EINVAL;
164
165 i = 1;
166 while (true) {
167 if (io__get_dec(&io, val) != ' ')
168 break;
169 if (field == i)
170 return 0;
171 i++;
172 }
173 return -EINVAL;
174 }
175
read_pid_stat_field(int fd,int field,__u64 * val)176 static int read_pid_stat_field(int fd, int field, __u64 *val)
177 {
178 char buf[256];
179 struct io io;
180 int c, i;
181
182 io__init(&io, fd, buf, sizeof(buf));
183 if (io__get_dec(&io, val) != ' ')
184 return -EINVAL;
185 if (field == 1)
186 return 0;
187
188 /* Skip comm. */
189 if (io__get_char(&io) != '(' || !read_until_char(&io, ')'))
190 return -EINVAL;
191 if (field == 2)
192 return -EINVAL; /* String can't be returned. */
193
194 /* Skip state */
195 if (io__get_char(&io) != ' ' || io__get_char(&io) == -1)
196 return -EINVAL;
197 if (field == 3)
198 return -EINVAL; /* String can't be returned. */
199
200 /* Loop over numeric fields*/
201 if (io__get_char(&io) != ' ')
202 return -EINVAL;
203
204 i = 4;
205 while (true) {
206 c = io__get_dec(&io, val);
207 if (c == -1)
208 return -EINVAL;
209 if (c == -2) {
210 /* Assume a -ve was read */
211 c = io__get_dec(&io, val);
212 *val *= -1;
213 }
214 if (c != ' ')
215 return -EINVAL;
216 if (field == i)
217 return 0;
218 i++;
219 }
220 return -EINVAL;
221 }
222
evsel__tool_pmu_prepare_open(struct evsel * evsel,struct perf_cpu_map * cpus,int nthreads)223 int evsel__tool_pmu_prepare_open(struct evsel *evsel,
224 struct perf_cpu_map *cpus,
225 int nthreads)
226 {
227 if ((evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME ||
228 evsel__tool_event(evsel) == TOOL_PMU__EVENT_USER_TIME) &&
229 !evsel->start_times) {
230 evsel->start_times = xyarray__new(perf_cpu_map__nr(cpus),
231 nthreads,
232 sizeof(__u64));
233 if (!evsel->start_times)
234 return -ENOMEM;
235 }
236 return 0;
237 }
238
239 #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
240
evsel__tool_pmu_open(struct evsel * evsel,struct perf_thread_map * threads,int start_cpu_map_idx,int end_cpu_map_idx)241 int evsel__tool_pmu_open(struct evsel *evsel,
242 struct perf_thread_map *threads,
243 int start_cpu_map_idx, int end_cpu_map_idx)
244 {
245 enum tool_pmu_event ev = evsel__tool_event(evsel);
246 int pid = -1, idx = 0, thread = 0, nthreads, err = 0, old_errno;
247
248 if (ev == TOOL_PMU__EVENT_NUM_CPUS)
249 return 0;
250
251 if (ev == TOOL_PMU__EVENT_DURATION_TIME) {
252 if (evsel->core.attr.sample_period) /* no sampling */
253 return -EINVAL;
254 evsel->start_time = rdclock();
255 return 0;
256 }
257
258 if (evsel->cgrp)
259 pid = evsel->cgrp->fd;
260
261 nthreads = perf_thread_map__nr(threads);
262 for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) {
263 for (thread = 0; thread < nthreads; thread++) {
264 if (!evsel->cgrp && !evsel->core.system_wide)
265 pid = perf_thread_map__pid(threads, thread);
266
267 if (ev == TOOL_PMU__EVENT_USER_TIME || ev == TOOL_PMU__EVENT_SYSTEM_TIME) {
268 bool system = ev == TOOL_PMU__EVENT_SYSTEM_TIME;
269 __u64 *start_time = NULL;
270 int fd;
271
272 if (evsel->core.attr.sample_period) {
273 /* no sampling */
274 err = -EINVAL;
275 goto out_close;
276 }
277 if (pid > -1) {
278 char buf[64];
279
280 snprintf(buf, sizeof(buf), "/proc/%d/stat", pid);
281 fd = open(buf, O_RDONLY);
282 evsel->pid_stat = true;
283 } else {
284 fd = open("/proc/stat", O_RDONLY);
285 }
286 FD(evsel, idx, thread) = fd;
287 if (fd < 0) {
288 err = -errno;
289 goto out_close;
290 }
291 start_time = xyarray__entry(evsel->start_times, idx, thread);
292 if (pid > -1) {
293 err = read_pid_stat_field(fd, system ? 15 : 14,
294 start_time);
295 } else {
296 struct perf_cpu cpu;
297
298 cpu = perf_cpu_map__cpu(evsel->core.cpus, idx);
299 err = read_stat_field(fd, cpu, system ? 3 : 1,
300 start_time);
301 }
302 if (err)
303 goto out_close;
304 }
305
306 }
307 }
308 return 0;
309 out_close:
310 if (err)
311 threads->err_thread = thread;
312
313 old_errno = errno;
314 do {
315 while (--thread >= 0) {
316 if (FD(evsel, idx, thread) >= 0)
317 close(FD(evsel, idx, thread));
318 FD(evsel, idx, thread) = -1;
319 }
320 thread = nthreads;
321 } while (--idx >= 0);
322 errno = old_errno;
323 return err;
324 }
325
326 #if !defined(__i386__) && !defined(__x86_64__)
arch_get_tsc_freq(void)327 u64 arch_get_tsc_freq(void)
328 {
329 return 0;
330 }
331 #endif
332
333 #if !defined(__aarch64__)
tool_pmu__cpu_slots_per_cycle(void)334 u64 tool_pmu__cpu_slots_per_cycle(void)
335 {
336 return 0;
337 }
338 #endif
339
has_pmem(void)340 static bool has_pmem(void)
341 {
342 static bool has_pmem, cached;
343 const char *sysfs = sysfs__mountpoint();
344 char path[PATH_MAX];
345
346 if (!cached) {
347 snprintf(path, sizeof(path), "%s/firmware/acpi/tables/NFIT", sysfs);
348 has_pmem = access(path, F_OK) == 0;
349 cached = true;
350 }
351 return has_pmem;
352 }
353
tool_pmu__read_event(enum tool_pmu_event ev,struct evsel * evsel,bool system_wide,const char * user_requested_cpu_list,u64 * result)354 bool tool_pmu__read_event(enum tool_pmu_event ev,
355 struct evsel *evsel,
356 bool system_wide,
357 const char *user_requested_cpu_list,
358 u64 *result)
359 {
360 const struct cpu_topology *topology;
361
362 switch (ev) {
363 case TOOL_PMU__EVENT_HAS_PMEM:
364 *result = has_pmem() ? 1 : 0;
365 return true;
366
367 case TOOL_PMU__EVENT_NUM_CORES:
368 topology = online_topology();
369 *result = topology->core_cpus_lists;
370 return true;
371
372 case TOOL_PMU__EVENT_NUM_CPUS:
373 if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) {
374 /* No evsel to be specific to. */
375 *result = cpu__max_present_cpu().cpu;
376 } else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) {
377 /* Evsel just has specific CPUs. */
378 *result = perf_cpu_map__nr(evsel->core.cpus);
379 } else {
380 /*
381 * "Any CPU" event that can be scheduled on any CPU in
382 * the PMU's cpumask. The PMU cpumask should be saved in
383 * pmu_cpus. If not present fall back to max.
384 */
385 if (!perf_cpu_map__is_empty(evsel->core.pmu_cpus))
386 *result = perf_cpu_map__nr(evsel->core.pmu_cpus);
387 else
388 *result = cpu__max_present_cpu().cpu;
389 }
390 return true;
391
392 case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: {
393 struct perf_cpu_map *online = cpu_map__online();
394
395 if (!online)
396 return false;
397
398 if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) {
399 /* No evsel to be specific to. */
400 *result = perf_cpu_map__nr(online);
401 } else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) {
402 /* Evsel just has specific CPUs. */
403 struct perf_cpu_map *tmp =
404 perf_cpu_map__intersect(online, evsel->core.cpus);
405
406 *result = perf_cpu_map__nr(tmp);
407 perf_cpu_map__put(tmp);
408 } else {
409 /*
410 * "Any CPU" event that can be scheduled on any CPU in
411 * the PMU's cpumask. The PMU cpumask should be saved in
412 * pmu_cpus, if not present then just the online cpu
413 * mask.
414 */
415 if (!perf_cpu_map__is_empty(evsel->core.pmu_cpus)) {
416 struct perf_cpu_map *tmp =
417 perf_cpu_map__intersect(online, evsel->core.pmu_cpus);
418
419 *result = perf_cpu_map__nr(tmp);
420 perf_cpu_map__put(tmp);
421 } else {
422 *result = perf_cpu_map__nr(online);
423 }
424 }
425 perf_cpu_map__put(online);
426 return true;
427 }
428 case TOOL_PMU__EVENT_NUM_DIES:
429 topology = online_topology();
430 *result = topology->die_cpus_lists;
431 return true;
432
433 case TOOL_PMU__EVENT_NUM_PACKAGES:
434 topology = online_topology();
435 *result = topology->package_cpus_lists;
436 return true;
437
438 case TOOL_PMU__EVENT_SLOTS:
439 *result = tool_pmu__cpu_slots_per_cycle();
440 return *result ? true : false;
441
442 case TOOL_PMU__EVENT_SMT_ON:
443 *result = smt_on() ? 1 : 0;
444 return true;
445
446 case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ:
447 *result = arch_get_tsc_freq();
448 return true;
449
450 case TOOL_PMU__EVENT_CORE_WIDE:
451 *result = core_wide(system_wide, user_requested_cpu_list) ? 1 : 0;
452 return true;
453
454 case TOOL_PMU__EVENT_TARGET_CPU:
455 *result = system_wide || (user_requested_cpu_list != NULL) ? 1 : 0;
456 return true;
457
458 case TOOL_PMU__EVENT_NONE:
459 case TOOL_PMU__EVENT_DURATION_TIME:
460 case TOOL_PMU__EVENT_USER_TIME:
461 case TOOL_PMU__EVENT_SYSTEM_TIME:
462 case TOOL_PMU__EVENT_MAX:
463 default:
464 return false;
465 }
466 }
467
perf_counts__update(struct perf_counts_values * count,const struct perf_counts_values * old_count,bool raw,u64 val)468 static void perf_counts__update(struct perf_counts_values *count,
469 const struct perf_counts_values *old_count,
470 bool raw, u64 val)
471 {
472 /*
473 * The values of enabled and running must make a ratio of 100%. The
474 * exact values don't matter as long as they are non-zero to avoid
475 * issues with evsel__count_has_error.
476 */
477 if (old_count) {
478 count->val = raw ? val : old_count->val + val;
479 count->run = old_count->run + 1;
480 count->ena = old_count->ena + 1;
481 count->lost = old_count->lost;
482 } else {
483 count->val = val;
484 count->run++;
485 count->ena++;
486 count->lost = 0;
487 }
488 }
489
evsel__tool_pmu_read(struct evsel * evsel,int cpu_map_idx,int thread)490 int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread)
491 {
492 __u64 *start_time, cur_time, delta_start;
493 int err = 0;
494 struct perf_counts_values *count, *old_count = NULL;
495 bool adjust = false;
496 enum tool_pmu_event ev = evsel__tool_event(evsel);
497
498 count = perf_counts(evsel->counts, cpu_map_idx, thread);
499 if (evsel->prev_raw_counts)
500 old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread);
501
502 switch (ev) {
503 case TOOL_PMU__EVENT_HAS_PMEM:
504 case TOOL_PMU__EVENT_NUM_CORES:
505 case TOOL_PMU__EVENT_NUM_CPUS:
506 case TOOL_PMU__EVENT_NUM_CPUS_ONLINE:
507 case TOOL_PMU__EVENT_NUM_DIES:
508 case TOOL_PMU__EVENT_NUM_PACKAGES:
509 case TOOL_PMU__EVENT_SLOTS:
510 case TOOL_PMU__EVENT_SMT_ON:
511 case TOOL_PMU__EVENT_CORE_WIDE:
512 case TOOL_PMU__EVENT_TARGET_CPU:
513 case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: {
514 u64 val = 0;
515
516 if (cpu_map_idx == 0 && thread == 0) {
517 if (!tool_pmu__read_event(ev, evsel,
518 stat_config.system_wide,
519 stat_config.user_requested_cpu_list,
520 &val)) {
521 count->lost++;
522 val = 0;
523 }
524 }
525 perf_counts__update(count, old_count, /*raw=*/false, val);
526 return 0;
527 }
528 case TOOL_PMU__EVENT_DURATION_TIME:
529 /*
530 * Pretend duration_time is only on the first CPU and thread, or
531 * else aggregation will scale duration_time by the number of
532 * CPUs/threads.
533 */
534 start_time = &evsel->start_time;
535 if (cpu_map_idx == 0 && thread == 0)
536 cur_time = rdclock();
537 else
538 cur_time = *start_time;
539 break;
540 case TOOL_PMU__EVENT_USER_TIME:
541 case TOOL_PMU__EVENT_SYSTEM_TIME: {
542 bool system = evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME;
543 int fd = FD(evsel, cpu_map_idx, thread);
544
545 start_time = xyarray__entry(evsel->start_times, cpu_map_idx, thread);
546 lseek(fd, SEEK_SET, 0);
547 if (evsel->pid_stat) {
548 /* The event exists solely on 1 CPU. */
549 if (cpu_map_idx == 0)
550 err = read_pid_stat_field(fd, system ? 15 : 14, &cur_time);
551 else
552 cur_time = 0;
553 } else {
554 /* The event is for all threads. */
555 if (thread == 0) {
556 struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus,
557 cpu_map_idx);
558
559 err = read_stat_field(fd, cpu, system ? 3 : 1, &cur_time);
560 } else {
561 cur_time = 0;
562 }
563 }
564 adjust = true;
565 break;
566 }
567 case TOOL_PMU__EVENT_NONE:
568 case TOOL_PMU__EVENT_MAX:
569 default:
570 err = -EINVAL;
571 }
572 if (err)
573 return err;
574
575 delta_start = cur_time - *start_time;
576 if (adjust) {
577 __u64 ticks_per_sec = sysconf(_SC_CLK_TCK);
578
579 delta_start *= 1e9 / ticks_per_sec;
580 }
581 perf_counts__update(count, old_count, /*raw=*/true, delta_start);
582 return 0;
583 }
584
tool_pmu__new(void)585 struct perf_pmu *tool_pmu__new(void)
586 {
587 struct perf_pmu *tool = zalloc(sizeof(struct perf_pmu));
588
589 if (!tool)
590 return NULL;
591
592 if (perf_pmu__init(tool, PERF_PMU_TYPE_TOOL, "tool") != 0) {
593 perf_pmu__delete(tool);
594 return NULL;
595 }
596 tool->events_table = find_core_events_table("common", "common");
597 return tool;
598 }
599