xref: /linux/tools/perf/util/tool_pmu.c (revision af9e8d12b139c92e748eb2956bbef03315ea7516)
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include "cgroup.h"
3 #include "counts.h"
4 #include "cputopo.h"
5 #include "evsel.h"
6 #include "pmu.h"
7 #include "print-events.h"
8 #include "smt.h"
9 #include "stat.h"
10 #include "time-utils.h"
11 #include "tool_pmu.h"
12 #include "tsc.h"
13 #include <api/fs/fs.h>
14 #include <api/io.h>
15 #include <internal/threadmap.h>
16 #include <perf/threadmap.h>
17 #include <fcntl.h>
18 #include <strings.h>
19 
20 static const char *const tool_pmu__event_names[TOOL_PMU__EVENT_MAX] = {
21 	NULL,
22 	"duration_time",
23 	"user_time",
24 	"system_time",
25 	"has_pmem",
26 	"num_cores",
27 	"num_cpus",
28 	"num_cpus_online",
29 	"num_dies",
30 	"num_packages",
31 	"slots",
32 	"smt_on",
33 	"system_tsc_freq",
34 	"core_wide",
35 	"target_cpu",
36 };
37 
38 bool tool_pmu__skip_event(const char *name __maybe_unused)
39 {
40 #if !defined(__aarch64__)
41 	/* The slots event should only appear on arm64. */
42 	if (strcasecmp(name, "slots") == 0)
43 		return true;
44 #endif
45 #if !defined(__i386__) && !defined(__x86_64__)
46 	/* The system_tsc_freq event should only appear on x86. */
47 	if (strcasecmp(name, "system_tsc_freq") == 0)
48 		return true;
49 #endif
50 	return false;
51 }
52 
53 int tool_pmu__num_skip_events(void)
54 {
55 	int num = 0;
56 
57 #if !defined(__aarch64__)
58 	num++;
59 #endif
60 #if !defined(__i386__) && !defined(__x86_64__)
61 	num++;
62 #endif
63 	return num;
64 }
65 
66 const char *tool_pmu__event_to_str(enum tool_pmu_event ev)
67 {
68 	if ((ev > TOOL_PMU__EVENT_NONE && ev < TOOL_PMU__EVENT_MAX) &&
69 	    !tool_pmu__skip_event(tool_pmu__event_names[ev]))
70 		return tool_pmu__event_names[ev];
71 
72 	return NULL;
73 }
74 
75 enum tool_pmu_event tool_pmu__str_to_event(const char *str)
76 {
77 	int i;
78 
79 	if (tool_pmu__skip_event(str))
80 		return TOOL_PMU__EVENT_NONE;
81 
82 	tool_pmu__for_each_event(i) {
83 		if (!strcasecmp(str, tool_pmu__event_names[i]))
84 			return i;
85 	}
86 	return TOOL_PMU__EVENT_NONE;
87 }
88 
89 bool perf_pmu__is_tool(const struct perf_pmu *pmu)
90 {
91 	return pmu && pmu->type == PERF_PMU_TYPE_TOOL;
92 }
93 
94 bool evsel__is_tool(const struct evsel *evsel)
95 {
96 	return perf_pmu__is_tool(evsel->pmu);
97 }
98 
99 enum tool_pmu_event evsel__tool_event(const struct evsel *evsel)
100 {
101 	if (!evsel__is_tool(evsel))
102 		return TOOL_PMU__EVENT_NONE;
103 
104 	return (enum tool_pmu_event)evsel->core.attr.config;
105 }
106 
107 const char *evsel__tool_pmu_event_name(const struct evsel *evsel)
108 {
109 	return tool_pmu__event_to_str(evsel->core.attr.config);
110 }
111 
112 static bool read_until_char(struct io *io, char e)
113 {
114 	int c;
115 
116 	do {
117 		c = io__get_char(io);
118 		if (c == -1)
119 			return false;
120 	} while (c != e);
121 	return true;
122 }
123 
124 static int read_stat_field(int fd, struct perf_cpu cpu, int field, __u64 *val)
125 {
126 	char buf[256];
127 	struct io io;
128 	int i;
129 
130 	io__init(&io, fd, buf, sizeof(buf));
131 
132 	/* Skip lines to relevant CPU. */
133 	for (i = -1; i < cpu.cpu; i++) {
134 		if (!read_until_char(&io, '\n'))
135 			return -EINVAL;
136 	}
137 	/* Skip to "cpu". */
138 	if (io__get_char(&io) != 'c') return -EINVAL;
139 	if (io__get_char(&io) != 'p') return -EINVAL;
140 	if (io__get_char(&io) != 'u') return -EINVAL;
141 
142 	/* Skip N of cpuN. */
143 	if (!read_until_char(&io, ' '))
144 		return -EINVAL;
145 
146 	i = 1;
147 	while (true) {
148 		if (io__get_dec(&io, val) != ' ')
149 			break;
150 		if (field == i)
151 			return 0;
152 		i++;
153 	}
154 	return -EINVAL;
155 }
156 
157 static int read_pid_stat_field(int fd, int field, __u64 *val)
158 {
159 	char buf[256];
160 	struct io io;
161 	int c, i;
162 
163 	io__init(&io, fd, buf, sizeof(buf));
164 	if (io__get_dec(&io, val) != ' ')
165 		return -EINVAL;
166 	if (field == 1)
167 		return 0;
168 
169 	/* Skip comm. */
170 	if (io__get_char(&io) != '(' || !read_until_char(&io, ')'))
171 		return -EINVAL;
172 	if (field == 2)
173 		return -EINVAL; /* String can't be returned. */
174 
175 	/* Skip state */
176 	if (io__get_char(&io) != ' ' || io__get_char(&io) == -1)
177 		return -EINVAL;
178 	if (field == 3)
179 		return -EINVAL; /* String can't be returned. */
180 
181 	/* Loop over numeric fields*/
182 	if (io__get_char(&io) != ' ')
183 		return -EINVAL;
184 
185 	i = 4;
186 	while (true) {
187 		c = io__get_dec(&io, val);
188 		if (c == -1)
189 			return -EINVAL;
190 		if (c == -2) {
191 			/* Assume a -ve was read */
192 			c = io__get_dec(&io, val);
193 			*val *= -1;
194 		}
195 		if (c != ' ')
196 			return -EINVAL;
197 		if (field == i)
198 			return 0;
199 		i++;
200 	}
201 	return -EINVAL;
202 }
203 
204 int evsel__tool_pmu_prepare_open(struct evsel *evsel,
205 				 struct perf_cpu_map *cpus,
206 				 int nthreads)
207 {
208 	if ((evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME ||
209 	     evsel__tool_event(evsel) == TOOL_PMU__EVENT_USER_TIME) &&
210 	    !evsel->start_times) {
211 		evsel->start_times = xyarray__new(perf_cpu_map__nr(cpus),
212 						  nthreads,
213 						  sizeof(__u64));
214 		if (!evsel->start_times)
215 			return -ENOMEM;
216 	}
217 	return 0;
218 }
219 
220 #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
221 
222 int evsel__tool_pmu_open(struct evsel *evsel,
223 			 struct perf_thread_map *threads,
224 			 int start_cpu_map_idx, int end_cpu_map_idx)
225 {
226 	enum tool_pmu_event ev = evsel__tool_event(evsel);
227 	int pid = -1, idx = 0, thread = 0, nthreads, err = 0, old_errno;
228 
229 	if (ev == TOOL_PMU__EVENT_NUM_CPUS)
230 		return 0;
231 
232 	if (ev == TOOL_PMU__EVENT_DURATION_TIME) {
233 		if (evsel->core.attr.sample_period) /* no sampling */
234 			return -EINVAL;
235 		evsel->start_time = rdclock();
236 		return 0;
237 	}
238 
239 	if (evsel->cgrp)
240 		pid = evsel->cgrp->fd;
241 
242 	nthreads = perf_thread_map__nr(threads);
243 	for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) {
244 		for (thread = 0; thread < nthreads; thread++) {
245 			if (!evsel->cgrp && !evsel->core.system_wide)
246 				pid = perf_thread_map__pid(threads, thread);
247 
248 			if (ev == TOOL_PMU__EVENT_USER_TIME || ev == TOOL_PMU__EVENT_SYSTEM_TIME) {
249 				bool system = ev == TOOL_PMU__EVENT_SYSTEM_TIME;
250 				__u64 *start_time = NULL;
251 				int fd;
252 
253 				if (evsel->core.attr.sample_period) {
254 					/* no sampling */
255 					err = -EINVAL;
256 					goto out_close;
257 				}
258 				if (pid > -1) {
259 					char buf[64];
260 
261 					snprintf(buf, sizeof(buf), "/proc/%d/stat", pid);
262 					fd = open(buf, O_RDONLY);
263 					evsel->pid_stat = true;
264 				} else {
265 					fd = open("/proc/stat", O_RDONLY);
266 				}
267 				FD(evsel, idx, thread) = fd;
268 				if (fd < 0) {
269 					err = -errno;
270 					goto out_close;
271 				}
272 				start_time = xyarray__entry(evsel->start_times, idx, thread);
273 				if (pid > -1) {
274 					err = read_pid_stat_field(fd, system ? 15 : 14,
275 								  start_time);
276 				} else {
277 					struct perf_cpu cpu;
278 
279 					cpu = perf_cpu_map__cpu(evsel->core.cpus, idx);
280 					err = read_stat_field(fd, cpu, system ? 3 : 1,
281 							      start_time);
282 				}
283 				if (err)
284 					goto out_close;
285 			}
286 
287 		}
288 	}
289 	return 0;
290 out_close:
291 	if (err)
292 		threads->err_thread = thread;
293 
294 	old_errno = errno;
295 	do {
296 		while (--thread >= 0) {
297 			if (FD(evsel, idx, thread) >= 0)
298 				close(FD(evsel, idx, thread));
299 			FD(evsel, idx, thread) = -1;
300 		}
301 		thread = nthreads;
302 	} while (--idx >= 0);
303 	errno = old_errno;
304 	return err;
305 }
306 
307 #if !defined(__i386__) && !defined(__x86_64__)
308 u64 arch_get_tsc_freq(void)
309 {
310 	return 0;
311 }
312 #endif
313 
314 #if !defined(__aarch64__)
315 u64 tool_pmu__cpu_slots_per_cycle(void)
316 {
317 	return 0;
318 }
319 #endif
320 
321 static bool has_pmem(void)
322 {
323 	static bool has_pmem, cached;
324 	const char *sysfs = sysfs__mountpoint();
325 	char path[PATH_MAX];
326 
327 	if (!cached) {
328 		snprintf(path, sizeof(path), "%s/firmware/acpi/tables/NFIT", sysfs);
329 		has_pmem = access(path, F_OK) == 0;
330 		cached = true;
331 	}
332 	return has_pmem;
333 }
334 
335 bool tool_pmu__read_event(enum tool_pmu_event ev,
336 			  struct evsel *evsel,
337 			  bool system_wide,
338 			  const char *user_requested_cpu_list,
339 			  u64 *result)
340 {
341 	const struct cpu_topology *topology;
342 
343 	switch (ev) {
344 	case TOOL_PMU__EVENT_HAS_PMEM:
345 		*result = has_pmem() ? 1 : 0;
346 		return true;
347 
348 	case TOOL_PMU__EVENT_NUM_CORES:
349 		topology = online_topology();
350 		*result = topology->core_cpus_lists;
351 		return true;
352 
353 	case TOOL_PMU__EVENT_NUM_CPUS:
354 		if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) {
355 			/* No evsel to be specific to. */
356 			*result = cpu__max_present_cpu().cpu;
357 		} else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) {
358 			/* Evsel just has specific CPUs. */
359 			*result = perf_cpu_map__nr(evsel->core.cpus);
360 		} else {
361 			/*
362 			 * "Any CPU" event that can be scheduled on any CPU in
363 			 * the PMU's cpumask. The PMU cpumask should be saved in
364 			 * pmu_cpus. If not present fall back to max.
365 			 */
366 			if (!perf_cpu_map__is_empty(evsel->core.pmu_cpus))
367 				*result = perf_cpu_map__nr(evsel->core.pmu_cpus);
368 			else
369 				*result = cpu__max_present_cpu().cpu;
370 		}
371 		return true;
372 
373 	case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: {
374 		struct perf_cpu_map *online = cpu_map__online();
375 
376 		if (!online)
377 			return false;
378 
379 		if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) {
380 			/* No evsel to be specific to. */
381 			*result = perf_cpu_map__nr(online);
382 		} else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) {
383 			/* Evsel just has specific CPUs. */
384 			struct perf_cpu_map *tmp =
385 				perf_cpu_map__intersect(online, evsel->core.cpus);
386 
387 			*result = perf_cpu_map__nr(tmp);
388 			perf_cpu_map__put(tmp);
389 		} else {
390 			/*
391 			 * "Any CPU" event that can be scheduled on any CPU in
392 			 * the PMU's cpumask. The PMU cpumask should be saved in
393 			 * pmu_cpus, if not present then just the online cpu
394 			 * mask.
395 			 */
396 			if (!perf_cpu_map__is_empty(evsel->core.pmu_cpus)) {
397 				struct perf_cpu_map *tmp =
398 					perf_cpu_map__intersect(online, evsel->core.pmu_cpus);
399 
400 				*result = perf_cpu_map__nr(tmp);
401 				perf_cpu_map__put(tmp);
402 			} else {
403 				*result = perf_cpu_map__nr(online);
404 			}
405 		}
406 		perf_cpu_map__put(online);
407 		return true;
408 	}
409 	case TOOL_PMU__EVENT_NUM_DIES:
410 		topology = online_topology();
411 		*result = topology->die_cpus_lists;
412 		return true;
413 
414 	case TOOL_PMU__EVENT_NUM_PACKAGES:
415 		topology = online_topology();
416 		*result = topology->package_cpus_lists;
417 		return true;
418 
419 	case TOOL_PMU__EVENT_SLOTS:
420 		*result = tool_pmu__cpu_slots_per_cycle();
421 		return *result ? true : false;
422 
423 	case TOOL_PMU__EVENT_SMT_ON:
424 		*result = smt_on() ? 1 : 0;
425 		return true;
426 
427 	case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ:
428 		*result = arch_get_tsc_freq();
429 		return true;
430 
431 	case TOOL_PMU__EVENT_CORE_WIDE:
432 		*result = core_wide(system_wide, user_requested_cpu_list) ? 1 : 0;
433 		return true;
434 
435 	case TOOL_PMU__EVENT_TARGET_CPU:
436 		*result = system_wide || (user_requested_cpu_list != NULL) ? 1 : 0;
437 		return true;
438 
439 	case TOOL_PMU__EVENT_NONE:
440 	case TOOL_PMU__EVENT_DURATION_TIME:
441 	case TOOL_PMU__EVENT_USER_TIME:
442 	case TOOL_PMU__EVENT_SYSTEM_TIME:
443 	case TOOL_PMU__EVENT_MAX:
444 	default:
445 		return false;
446 	}
447 }
448 
449 int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread)
450 {
451 	__u64 *start_time, cur_time, delta_start;
452 	u64 val;
453 	int fd, err = 0;
454 	struct perf_counts_values *count, *old_count = NULL;
455 	bool adjust = false;
456 	enum tool_pmu_event ev = evsel__tool_event(evsel);
457 
458 	count = perf_counts(evsel->counts, cpu_map_idx, thread);
459 
460 	switch (ev) {
461 	case TOOL_PMU__EVENT_HAS_PMEM:
462 	case TOOL_PMU__EVENT_NUM_CORES:
463 	case TOOL_PMU__EVENT_NUM_CPUS:
464 	case TOOL_PMU__EVENT_NUM_CPUS_ONLINE:
465 	case TOOL_PMU__EVENT_NUM_DIES:
466 	case TOOL_PMU__EVENT_NUM_PACKAGES:
467 	case TOOL_PMU__EVENT_SLOTS:
468 	case TOOL_PMU__EVENT_SMT_ON:
469 	case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ:
470 	case TOOL_PMU__EVENT_CORE_WIDE:
471 	case TOOL_PMU__EVENT_TARGET_CPU:
472 		if (evsel->prev_raw_counts)
473 			old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread);
474 		val = 0;
475 		if (cpu_map_idx == 0 && thread == 0) {
476 			if (!tool_pmu__read_event(ev, evsel,
477 						  stat_config.system_wide,
478 						  stat_config.user_requested_cpu_list,
479 						  &val)) {
480 				count->lost++;
481 				val = 0;
482 			}
483 		}
484 		if (old_count) {
485 			count->val = old_count->val + val;
486 			count->run = old_count->run + 1;
487 			count->ena = old_count->ena + 1;
488 		} else {
489 			count->val = val;
490 			count->run++;
491 			count->ena++;
492 		}
493 		return 0;
494 	case TOOL_PMU__EVENT_DURATION_TIME:
495 		/*
496 		 * Pretend duration_time is only on the first CPU and thread, or
497 		 * else aggregation will scale duration_time by the number of
498 		 * CPUs/threads.
499 		 */
500 		start_time = &evsel->start_time;
501 		if (cpu_map_idx == 0 && thread == 0)
502 			cur_time = rdclock();
503 		else
504 			cur_time = *start_time;
505 		break;
506 	case TOOL_PMU__EVENT_USER_TIME:
507 	case TOOL_PMU__EVENT_SYSTEM_TIME: {
508 		bool system = evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME;
509 
510 		start_time = xyarray__entry(evsel->start_times, cpu_map_idx, thread);
511 		fd = FD(evsel, cpu_map_idx, thread);
512 		lseek(fd, SEEK_SET, 0);
513 		if (evsel->pid_stat) {
514 			/* The event exists solely on 1 CPU. */
515 			if (cpu_map_idx == 0)
516 				err = read_pid_stat_field(fd, system ? 15 : 14, &cur_time);
517 			else
518 				cur_time = 0;
519 		} else {
520 			/* The event is for all threads. */
521 			if (thread == 0) {
522 				struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus,
523 									cpu_map_idx);
524 
525 				err = read_stat_field(fd, cpu, system ? 3 : 1, &cur_time);
526 			} else {
527 				cur_time = 0;
528 			}
529 		}
530 		adjust = true;
531 		break;
532 	}
533 	case TOOL_PMU__EVENT_NONE:
534 	case TOOL_PMU__EVENT_MAX:
535 	default:
536 		err = -EINVAL;
537 	}
538 	if (err)
539 		return err;
540 
541 	delta_start = cur_time - *start_time;
542 	if (adjust) {
543 		__u64 ticks_per_sec = sysconf(_SC_CLK_TCK);
544 
545 		delta_start *= 1000000000 / ticks_per_sec;
546 	}
547 	count->val    = delta_start;
548 	count->lost   = 0;
549 	/*
550 	 * The values of enabled and running must make a ratio of 100%. The
551 	 * exact values don't matter as long as they are non-zero to avoid
552 	 * issues with evsel__count_has_error.
553 	 */
554 	count->ena++;
555 	count->run++;
556 	return 0;
557 }
558 
559 struct perf_pmu *tool_pmu__new(void)
560 {
561 	struct perf_pmu *tool = zalloc(sizeof(struct perf_pmu));
562 
563 	if (!tool)
564 		return NULL;
565 
566 	if (perf_pmu__init(tool, PERF_PMU_TYPE_TOOL, "tool") != 0) {
567 		perf_pmu__delete(tool);
568 		return NULL;
569 	}
570 	tool->events_table = find_core_events_table("common", "common");
571 	return tool;
572 }
573