xref: /linux/tools/perf/util/tool_pmu.c (revision 9e906a9dead17d81d6c2687f65e159231d0e3286)
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include "cgroup.h"
3 #include "counts.h"
4 #include "cputopo.h"
5 #include "debug.h"
6 #include "evsel.h"
7 #include "pmu.h"
8 #include "print-events.h"
9 #include "smt.h"
10 #include "stat.h"
11 #include "time-utils.h"
12 #include "tool_pmu.h"
13 #include "tsc.h"
14 #include <api/fs/fs.h>
15 #include <api/io.h>
16 #include <internal/threadmap.h>
17 #include <perf/cpumap.h>
18 #include <perf/threadmap.h>
19 #include <fcntl.h>
20 #include <strings.h>
21 
22 static const char *const tool_pmu__event_names[TOOL_PMU__EVENT_MAX] = {
23 	NULL,
24 	"duration_time",
25 	"user_time",
26 	"system_time",
27 	"has_pmem",
28 	"num_cores",
29 	"num_cpus",
30 	"num_cpus_online",
31 	"num_dies",
32 	"num_packages",
33 	"slots",
34 	"smt_on",
35 	"system_tsc_freq",
36 	"core_wide",
37 	"target_cpu",
38 };
39 
tool_pmu__skip_event(const char * name __maybe_unused)40 bool tool_pmu__skip_event(const char *name __maybe_unused)
41 {
42 #if !defined(__aarch64__)
43 	/* The slots event should only appear on arm64. */
44 	if (strcasecmp(name, "slots") == 0)
45 		return true;
46 #endif
47 #if !defined(__i386__) && !defined(__x86_64__)
48 	/* The system_tsc_freq event should only appear on x86. */
49 	if (strcasecmp(name, "system_tsc_freq") == 0)
50 		return true;
51 #endif
52 	return false;
53 }
54 
tool_pmu__num_skip_events(void)55 int tool_pmu__num_skip_events(void)
56 {
57 	int num = 0;
58 
59 #if !defined(__aarch64__)
60 	num++;
61 #endif
62 #if !defined(__i386__) && !defined(__x86_64__)
63 	num++;
64 #endif
65 	return num;
66 }
67 
tool_pmu__event_to_str(enum tool_pmu_event ev)68 const char *tool_pmu__event_to_str(enum tool_pmu_event ev)
69 {
70 	if ((ev > TOOL_PMU__EVENT_NONE && ev < TOOL_PMU__EVENT_MAX) &&
71 	    !tool_pmu__skip_event(tool_pmu__event_names[ev]))
72 		return tool_pmu__event_names[ev];
73 
74 	return NULL;
75 }
76 
tool_pmu__str_to_event(const char * str)77 enum tool_pmu_event tool_pmu__str_to_event(const char *str)
78 {
79 	int i;
80 
81 	if (tool_pmu__skip_event(str))
82 		return TOOL_PMU__EVENT_NONE;
83 
84 	tool_pmu__for_each_event(i) {
85 		if (!strcasecmp(str, tool_pmu__event_names[i]))
86 			return i;
87 	}
88 	return TOOL_PMU__EVENT_NONE;
89 }
90 
perf_pmu__is_tool(const struct perf_pmu * pmu)91 bool perf_pmu__is_tool(const struct perf_pmu *pmu)
92 {
93 	return pmu && pmu->type == PERF_PMU_TYPE_TOOL;
94 }
95 
evsel__is_tool(const struct evsel * evsel)96 bool evsel__is_tool(const struct evsel *evsel)
97 {
98 	return perf_pmu__is_tool(evsel->pmu);
99 }
100 
evsel__tool_event(const struct evsel * evsel)101 enum tool_pmu_event evsel__tool_event(const struct evsel *evsel)
102 {
103 	if (!evsel__is_tool(evsel))
104 		return TOOL_PMU__EVENT_NONE;
105 
106 	return (enum tool_pmu_event)evsel->core.attr.config;
107 }
108 
evsel__tool_pmu_event_name(const struct evsel * evsel)109 const char *evsel__tool_pmu_event_name(const struct evsel *evsel)
110 {
111 	return tool_pmu__event_to_str(evsel->core.attr.config);
112 }
113 
tool_pmu__cpus(struct perf_event_attr * attr)114 struct perf_cpu_map *tool_pmu__cpus(struct perf_event_attr *attr)
115 {
116 	static struct perf_cpu_map *cpu0_map;
117 	enum tool_pmu_event event = (enum tool_pmu_event)attr->config;
118 
119 	if (event <= TOOL_PMU__EVENT_NONE || event >= TOOL_PMU__EVENT_MAX) {
120 		pr_err("Invalid tool PMU event config %llx\n", attr->config);
121 		return NULL;
122 	}
123 	if (event == TOOL_PMU__EVENT_USER_TIME || event == TOOL_PMU__EVENT_SYSTEM_TIME)
124 		return cpu_map__online();
125 
126 	if (!cpu0_map)
127 		cpu0_map = perf_cpu_map__new_int(0);
128 	return perf_cpu_map__get(cpu0_map);
129 }
130 
read_until_char(struct io * io,char e)131 static bool read_until_char(struct io *io, char e)
132 {
133 	int c;
134 
135 	do {
136 		c = io__get_char(io);
137 		if (c == -1)
138 			return false;
139 	} while (c != e);
140 	return true;
141 }
142 
read_stat_field(int fd,struct perf_cpu cpu,int field,__u64 * val)143 static int read_stat_field(int fd, struct perf_cpu cpu, int field, __u64 *val)
144 {
145 	char buf[256];
146 	struct io io;
147 	int i;
148 
149 	io__init(&io, fd, buf, sizeof(buf));
150 
151 	/* Skip lines to relevant CPU. */
152 	for (i = -1; i < cpu.cpu; i++) {
153 		if (!read_until_char(&io, '\n'))
154 			return -EINVAL;
155 	}
156 	/* Skip to "cpu". */
157 	if (io__get_char(&io) != 'c') return -EINVAL;
158 	if (io__get_char(&io) != 'p') return -EINVAL;
159 	if (io__get_char(&io) != 'u') return -EINVAL;
160 
161 	/* Skip N of cpuN. */
162 	if (!read_until_char(&io, ' '))
163 		return -EINVAL;
164 
165 	i = 1;
166 	while (true) {
167 		if (io__get_dec(&io, val) != ' ')
168 			break;
169 		if (field == i)
170 			return 0;
171 		i++;
172 	}
173 	return -EINVAL;
174 }
175 
read_pid_stat_field(int fd,int field,__u64 * val)176 static int read_pid_stat_field(int fd, int field, __u64 *val)
177 {
178 	char buf[256];
179 	struct io io;
180 	int c, i;
181 
182 	io__init(&io, fd, buf, sizeof(buf));
183 	if (io__get_dec(&io, val) != ' ')
184 		return -EINVAL;
185 	if (field == 1)
186 		return 0;
187 
188 	/* Skip comm. */
189 	if (io__get_char(&io) != '(' || !read_until_char(&io, ')'))
190 		return -EINVAL;
191 	if (field == 2)
192 		return -EINVAL; /* String can't be returned. */
193 
194 	/* Skip state */
195 	if (io__get_char(&io) != ' ' || io__get_char(&io) == -1)
196 		return -EINVAL;
197 	if (field == 3)
198 		return -EINVAL; /* String can't be returned. */
199 
200 	/* Loop over numeric fields*/
201 	if (io__get_char(&io) != ' ')
202 		return -EINVAL;
203 
204 	i = 4;
205 	while (true) {
206 		c = io__get_dec(&io, val);
207 		if (c == -1)
208 			return -EINVAL;
209 		if (c == -2) {
210 			/* Assume a -ve was read */
211 			c = io__get_dec(&io, val);
212 			*val *= -1;
213 		}
214 		if (c != ' ')
215 			return -EINVAL;
216 		if (field == i)
217 			return 0;
218 		i++;
219 	}
220 	return -EINVAL;
221 }
222 
evsel__tool_pmu_prepare_open(struct evsel * evsel,struct perf_cpu_map * cpus,int nthreads)223 int evsel__tool_pmu_prepare_open(struct evsel *evsel,
224 				 struct perf_cpu_map *cpus,
225 				 int nthreads)
226 {
227 	if ((evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME ||
228 	     evsel__tool_event(evsel) == TOOL_PMU__EVENT_USER_TIME) &&
229 	    !evsel->start_times) {
230 		evsel->start_times = xyarray__new(perf_cpu_map__nr(cpus),
231 						  nthreads,
232 						  sizeof(__u64));
233 		if (!evsel->start_times)
234 			return -ENOMEM;
235 	}
236 	return 0;
237 }
238 
239 #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
240 
evsel__tool_pmu_open(struct evsel * evsel,struct perf_thread_map * threads,int start_cpu_map_idx,int end_cpu_map_idx)241 int evsel__tool_pmu_open(struct evsel *evsel,
242 			 struct perf_thread_map *threads,
243 			 int start_cpu_map_idx, int end_cpu_map_idx)
244 {
245 	enum tool_pmu_event ev = evsel__tool_event(evsel);
246 	int pid = -1, idx = 0, thread = 0, nthreads, err = 0, old_errno;
247 
248 	if (ev == TOOL_PMU__EVENT_NUM_CPUS)
249 		return 0;
250 
251 	if (ev == TOOL_PMU__EVENT_DURATION_TIME) {
252 		if (evsel->core.attr.sample_period) /* no sampling */
253 			return -EINVAL;
254 		evsel->start_time = rdclock();
255 		return 0;
256 	}
257 
258 	if (evsel->cgrp)
259 		pid = evsel->cgrp->fd;
260 
261 	nthreads = perf_thread_map__nr(threads);
262 	for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) {
263 		for (thread = 0; thread < nthreads; thread++) {
264 			if (!evsel->cgrp && !evsel->core.system_wide)
265 				pid = perf_thread_map__pid(threads, thread);
266 
267 			if (ev == TOOL_PMU__EVENT_USER_TIME || ev == TOOL_PMU__EVENT_SYSTEM_TIME) {
268 				bool system = ev == TOOL_PMU__EVENT_SYSTEM_TIME;
269 				__u64 *start_time = NULL;
270 				int fd;
271 
272 				if (evsel->core.attr.sample_period) {
273 					/* no sampling */
274 					err = -EINVAL;
275 					goto out_close;
276 				}
277 				if (pid > -1) {
278 					char buf[64];
279 
280 					snprintf(buf, sizeof(buf), "/proc/%d/stat", pid);
281 					fd = open(buf, O_RDONLY);
282 					evsel->pid_stat = true;
283 				} else {
284 					fd = open("/proc/stat", O_RDONLY);
285 				}
286 				FD(evsel, idx, thread) = fd;
287 				if (fd < 0) {
288 					err = -errno;
289 					goto out_close;
290 				}
291 				start_time = xyarray__entry(evsel->start_times, idx, thread);
292 				if (pid > -1) {
293 					err = read_pid_stat_field(fd, system ? 15 : 14,
294 								  start_time);
295 				} else {
296 					struct perf_cpu cpu;
297 
298 					cpu = perf_cpu_map__cpu(evsel->core.cpus, idx);
299 					err = read_stat_field(fd, cpu, system ? 3 : 1,
300 							      start_time);
301 				}
302 				if (err)
303 					goto out_close;
304 			}
305 
306 		}
307 	}
308 	return 0;
309 out_close:
310 	if (err)
311 		threads->err_thread = thread;
312 
313 	old_errno = errno;
314 	do {
315 		while (--thread >= 0) {
316 			if (FD(evsel, idx, thread) >= 0)
317 				close(FD(evsel, idx, thread));
318 			FD(evsel, idx, thread) = -1;
319 		}
320 		thread = nthreads;
321 	} while (--idx >= 0);
322 	errno = old_errno;
323 	return err;
324 }
325 
326 #if !defined(__i386__) && !defined(__x86_64__)
arch_get_tsc_freq(void)327 u64 arch_get_tsc_freq(void)
328 {
329 	return 0;
330 }
331 #endif
332 
333 #if !defined(__aarch64__)
tool_pmu__cpu_slots_per_cycle(void)334 u64 tool_pmu__cpu_slots_per_cycle(void)
335 {
336 	return 0;
337 }
338 #endif
339 
has_pmem(void)340 static bool has_pmem(void)
341 {
342 	static bool has_pmem, cached;
343 	const char *sysfs = sysfs__mountpoint();
344 	char path[PATH_MAX];
345 
346 	if (!cached) {
347 		snprintf(path, sizeof(path), "%s/firmware/acpi/tables/NFIT", sysfs);
348 		has_pmem = access(path, F_OK) == 0;
349 		cached = true;
350 	}
351 	return has_pmem;
352 }
353 
tool_pmu__read_event(enum tool_pmu_event ev,struct evsel * evsel,bool system_wide,const char * user_requested_cpu_list,u64 * result)354 bool tool_pmu__read_event(enum tool_pmu_event ev,
355 			  struct evsel *evsel,
356 			  bool system_wide,
357 			  const char *user_requested_cpu_list,
358 			  u64 *result)
359 {
360 	const struct cpu_topology *topology;
361 
362 	switch (ev) {
363 	case TOOL_PMU__EVENT_HAS_PMEM:
364 		*result = has_pmem() ? 1 : 0;
365 		return true;
366 
367 	case TOOL_PMU__EVENT_NUM_CORES:
368 		topology = online_topology();
369 		*result = topology->core_cpus_lists;
370 		return true;
371 
372 	case TOOL_PMU__EVENT_NUM_CPUS:
373 		if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) {
374 			/* No evsel to be specific to. */
375 			*result = cpu__max_present_cpu().cpu;
376 		} else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) {
377 			/* Evsel just has specific CPUs. */
378 			*result = perf_cpu_map__nr(evsel->core.cpus);
379 		} else {
380 			/*
381 			 * "Any CPU" event that can be scheduled on any CPU in
382 			 * the PMU's cpumask. The PMU cpumask should be saved in
383 			 * pmu_cpus. If not present fall back to max.
384 			 */
385 			if (!perf_cpu_map__is_empty(evsel->core.pmu_cpus))
386 				*result = perf_cpu_map__nr(evsel->core.pmu_cpus);
387 			else
388 				*result = cpu__max_present_cpu().cpu;
389 		}
390 		return true;
391 
392 	case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: {
393 		struct perf_cpu_map *online = cpu_map__online();
394 
395 		if (!online)
396 			return false;
397 
398 		if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) {
399 			/* No evsel to be specific to. */
400 			*result = perf_cpu_map__nr(online);
401 		} else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) {
402 			/* Evsel just has specific CPUs. */
403 			struct perf_cpu_map *tmp =
404 				perf_cpu_map__intersect(online, evsel->core.cpus);
405 
406 			*result = perf_cpu_map__nr(tmp);
407 			perf_cpu_map__put(tmp);
408 		} else {
409 			/*
410 			 * "Any CPU" event that can be scheduled on any CPU in
411 			 * the PMU's cpumask. The PMU cpumask should be saved in
412 			 * pmu_cpus, if not present then just the online cpu
413 			 * mask.
414 			 */
415 			if (!perf_cpu_map__is_empty(evsel->core.pmu_cpus)) {
416 				struct perf_cpu_map *tmp =
417 					perf_cpu_map__intersect(online, evsel->core.pmu_cpus);
418 
419 				*result = perf_cpu_map__nr(tmp);
420 				perf_cpu_map__put(tmp);
421 			} else {
422 				*result = perf_cpu_map__nr(online);
423 			}
424 		}
425 		perf_cpu_map__put(online);
426 		return true;
427 	}
428 	case TOOL_PMU__EVENT_NUM_DIES:
429 		topology = online_topology();
430 		*result = topology->die_cpus_lists;
431 		return true;
432 
433 	case TOOL_PMU__EVENT_NUM_PACKAGES:
434 		topology = online_topology();
435 		*result = topology->package_cpus_lists;
436 		return true;
437 
438 	case TOOL_PMU__EVENT_SLOTS:
439 		*result = tool_pmu__cpu_slots_per_cycle();
440 		return *result ? true : false;
441 
442 	case TOOL_PMU__EVENT_SMT_ON:
443 		*result = smt_on() ? 1 : 0;
444 		return true;
445 
446 	case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ:
447 		*result = arch_get_tsc_freq();
448 		return true;
449 
450 	case TOOL_PMU__EVENT_CORE_WIDE:
451 		*result = core_wide(system_wide, user_requested_cpu_list) ? 1 : 0;
452 		return true;
453 
454 	case TOOL_PMU__EVENT_TARGET_CPU:
455 		*result = system_wide || (user_requested_cpu_list != NULL) ? 1 : 0;
456 		return true;
457 
458 	case TOOL_PMU__EVENT_NONE:
459 	case TOOL_PMU__EVENT_DURATION_TIME:
460 	case TOOL_PMU__EVENT_USER_TIME:
461 	case TOOL_PMU__EVENT_SYSTEM_TIME:
462 	case TOOL_PMU__EVENT_MAX:
463 	default:
464 		return false;
465 	}
466 }
467 
perf_counts__update(struct perf_counts_values * count,const struct perf_counts_values * old_count,bool raw,u64 val)468 static void perf_counts__update(struct perf_counts_values *count,
469 				const struct perf_counts_values *old_count,
470 				bool raw, u64 val)
471 {
472 	/*
473 	 * The values of enabled and running must make a ratio of 100%. The
474 	 * exact values don't matter as long as they are non-zero to avoid
475 	 * issues with evsel__count_has_error.
476 	 */
477 	if (old_count) {
478 		count->val = raw ? val : old_count->val + val;
479 		count->run = old_count->run + 1;
480 		count->ena = old_count->ena + 1;
481 		count->lost = old_count->lost;
482 	} else {
483 		count->val = val;
484 		count->run++;
485 		count->ena++;
486 		count->lost = 0;
487 	}
488 }
489 
evsel__tool_pmu_read(struct evsel * evsel,int cpu_map_idx,int thread)490 int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread)
491 {
492 	__u64 *start_time, cur_time, delta_start;
493 	int err = 0;
494 	struct perf_counts_values *count, *old_count = NULL;
495 	bool adjust = false;
496 	enum tool_pmu_event ev = evsel__tool_event(evsel);
497 
498 	count = perf_counts(evsel->counts, cpu_map_idx, thread);
499 	if (evsel->prev_raw_counts)
500 		old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread);
501 
502 	switch (ev) {
503 	case TOOL_PMU__EVENT_HAS_PMEM:
504 	case TOOL_PMU__EVENT_NUM_CORES:
505 	case TOOL_PMU__EVENT_NUM_CPUS:
506 	case TOOL_PMU__EVENT_NUM_CPUS_ONLINE:
507 	case TOOL_PMU__EVENT_NUM_DIES:
508 	case TOOL_PMU__EVENT_NUM_PACKAGES:
509 	case TOOL_PMU__EVENT_SLOTS:
510 	case TOOL_PMU__EVENT_SMT_ON:
511 	case TOOL_PMU__EVENT_CORE_WIDE:
512 	case TOOL_PMU__EVENT_TARGET_CPU:
513 	case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: {
514 		u64 val = 0;
515 
516 		if (cpu_map_idx == 0 && thread == 0) {
517 			if (!tool_pmu__read_event(ev, evsel,
518 						  stat_config.system_wide,
519 						  stat_config.user_requested_cpu_list,
520 						  &val)) {
521 				count->lost++;
522 				val = 0;
523 			}
524 		}
525 		perf_counts__update(count, old_count, /*raw=*/false, val);
526 		return 0;
527 	}
528 	case TOOL_PMU__EVENT_DURATION_TIME:
529 		/*
530 		 * Pretend duration_time is only on the first CPU and thread, or
531 		 * else aggregation will scale duration_time by the number of
532 		 * CPUs/threads.
533 		 */
534 		start_time = &evsel->start_time;
535 		if (cpu_map_idx == 0 && thread == 0)
536 			cur_time = rdclock();
537 		else
538 			cur_time = *start_time;
539 		break;
540 	case TOOL_PMU__EVENT_USER_TIME:
541 	case TOOL_PMU__EVENT_SYSTEM_TIME: {
542 		bool system = evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME;
543 		int fd = FD(evsel, cpu_map_idx, thread);
544 
545 		start_time = xyarray__entry(evsel->start_times, cpu_map_idx, thread);
546 		lseek(fd, SEEK_SET, 0);
547 		if (evsel->pid_stat) {
548 			/* The event exists solely on 1 CPU. */
549 			if (cpu_map_idx == 0)
550 				err = read_pid_stat_field(fd, system ? 15 : 14, &cur_time);
551 			else
552 				cur_time = 0;
553 		} else {
554 			/* The event is for all threads. */
555 			if (thread == 0) {
556 				struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus,
557 									cpu_map_idx);
558 
559 				err = read_stat_field(fd, cpu, system ? 3 : 1, &cur_time);
560 			} else {
561 				cur_time = 0;
562 			}
563 		}
564 		adjust = true;
565 		break;
566 	}
567 	case TOOL_PMU__EVENT_NONE:
568 	case TOOL_PMU__EVENT_MAX:
569 	default:
570 		err = -EINVAL;
571 	}
572 	if (err)
573 		return err;
574 
575 	delta_start = cur_time - *start_time;
576 	if (adjust) {
577 		__u64 ticks_per_sec = sysconf(_SC_CLK_TCK);
578 
579 		delta_start *= 1e9 / ticks_per_sec;
580 	}
581 	perf_counts__update(count, old_count, /*raw=*/true, delta_start);
582 	return 0;
583 }
584 
tool_pmu__new(void)585 struct perf_pmu *tool_pmu__new(void)
586 {
587 	struct perf_pmu *tool = zalloc(sizeof(struct perf_pmu));
588 
589 	if (!tool)
590 		return NULL;
591 
592 	if (perf_pmu__init(tool, PERF_PMU_TYPE_TOOL, "tool") != 0) {
593 		perf_pmu__delete(tool);
594 		return NULL;
595 	}
596 	tool->events_table = find_core_events_table("common", "common");
597 	return tool;
598 }
599