xref: /linux/tools/perf/util/tool_pmu.c (revision 25768de50b1f2dbb6ea44bd5148a87fe2c9c3688)
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include "cgroup.h"
3 #include "counts.h"
4 #include "cputopo.h"
5 #include "evsel.h"
6 #include "pmu.h"
7 #include "print-events.h"
8 #include "smt.h"
9 #include "time-utils.h"
10 #include "tool_pmu.h"
11 #include "tsc.h"
12 #include <api/fs/fs.h>
13 #include <api/io.h>
14 #include <internal/threadmap.h>
15 #include <perf/threadmap.h>
16 #include <fcntl.h>
17 #include <strings.h>
18 
19 static const char *const tool_pmu__event_names[TOOL_PMU__EVENT_MAX] = {
20 	NULL,
21 	"duration_time",
22 	"user_time",
23 	"system_time",
24 	"has_pmem",
25 	"num_cores",
26 	"num_cpus",
27 	"num_cpus_online",
28 	"num_dies",
29 	"num_packages",
30 	"slots",
31 	"smt_on",
32 	"system_tsc_freq",
33 };
34 
35 bool tool_pmu__skip_event(const char *name __maybe_unused)
36 {
37 #if !defined(__aarch64__)
38 	/* The slots event should only appear on arm64. */
39 	if (strcasecmp(name, "slots") == 0)
40 		return true;
41 #endif
42 #if !defined(__i386__) && !defined(__x86_64__)
43 	/* The system_tsc_freq event should only appear on x86. */
44 	if (strcasecmp(name, "system_tsc_freq") == 0)
45 		return true;
46 #endif
47 	return false;
48 }
49 
50 int tool_pmu__num_skip_events(void)
51 {
52 	int num = 0;
53 
54 #if !defined(__aarch64__)
55 	num++;
56 #endif
57 #if !defined(__i386__) && !defined(__x86_64__)
58 	num++;
59 #endif
60 	return num;
61 }
62 
63 const char *tool_pmu__event_to_str(enum tool_pmu_event ev)
64 {
65 	if (ev > TOOL_PMU__EVENT_NONE && ev < TOOL_PMU__EVENT_MAX)
66 		return tool_pmu__event_names[ev];
67 
68 	return NULL;
69 }
70 
71 enum tool_pmu_event tool_pmu__str_to_event(const char *str)
72 {
73 	int i;
74 
75 	if (tool_pmu__skip_event(str))
76 		return TOOL_PMU__EVENT_NONE;
77 
78 	tool_pmu__for_each_event(i) {
79 		if (!strcasecmp(str, tool_pmu__event_names[i]))
80 			return i;
81 	}
82 	return TOOL_PMU__EVENT_NONE;
83 }
84 
85 bool perf_pmu__is_tool(const struct perf_pmu *pmu)
86 {
87 	return pmu && pmu->type == PERF_PMU_TYPE_TOOL;
88 }
89 
90 bool evsel__is_tool(const struct evsel *evsel)
91 {
92 	return perf_pmu__is_tool(evsel->pmu);
93 }
94 
95 enum tool_pmu_event evsel__tool_event(const struct evsel *evsel)
96 {
97 	if (!evsel__is_tool(evsel))
98 		return TOOL_PMU__EVENT_NONE;
99 
100 	return (enum tool_pmu_event)evsel->core.attr.config;
101 }
102 
103 const char *evsel__tool_pmu_event_name(const struct evsel *evsel)
104 {
105 	return tool_pmu__event_to_str(evsel->core.attr.config);
106 }
107 
108 static bool read_until_char(struct io *io, char e)
109 {
110 	int c;
111 
112 	do {
113 		c = io__get_char(io);
114 		if (c == -1)
115 			return false;
116 	} while (c != e);
117 	return true;
118 }
119 
120 static int read_stat_field(int fd, struct perf_cpu cpu, int field, __u64 *val)
121 {
122 	char buf[256];
123 	struct io io;
124 	int i;
125 
126 	io__init(&io, fd, buf, sizeof(buf));
127 
128 	/* Skip lines to relevant CPU. */
129 	for (i = -1; i < cpu.cpu; i++) {
130 		if (!read_until_char(&io, '\n'))
131 			return -EINVAL;
132 	}
133 	/* Skip to "cpu". */
134 	if (io__get_char(&io) != 'c') return -EINVAL;
135 	if (io__get_char(&io) != 'p') return -EINVAL;
136 	if (io__get_char(&io) != 'u') return -EINVAL;
137 
138 	/* Skip N of cpuN. */
139 	if (!read_until_char(&io, ' '))
140 		return -EINVAL;
141 
142 	i = 1;
143 	while (true) {
144 		if (io__get_dec(&io, val) != ' ')
145 			break;
146 		if (field == i)
147 			return 0;
148 		i++;
149 	}
150 	return -EINVAL;
151 }
152 
153 static int read_pid_stat_field(int fd, int field, __u64 *val)
154 {
155 	char buf[256];
156 	struct io io;
157 	int c, i;
158 
159 	io__init(&io, fd, buf, sizeof(buf));
160 	if (io__get_dec(&io, val) != ' ')
161 		return -EINVAL;
162 	if (field == 1)
163 		return 0;
164 
165 	/* Skip comm. */
166 	if (io__get_char(&io) != '(' || !read_until_char(&io, ')'))
167 		return -EINVAL;
168 	if (field == 2)
169 		return -EINVAL; /* String can't be returned. */
170 
171 	/* Skip state */
172 	if (io__get_char(&io) != ' ' || io__get_char(&io) == -1)
173 		return -EINVAL;
174 	if (field == 3)
175 		return -EINVAL; /* String can't be returned. */
176 
177 	/* Loop over numeric fields*/
178 	if (io__get_char(&io) != ' ')
179 		return -EINVAL;
180 
181 	i = 4;
182 	while (true) {
183 		c = io__get_dec(&io, val);
184 		if (c == -1)
185 			return -EINVAL;
186 		if (c == -2) {
187 			/* Assume a -ve was read */
188 			c = io__get_dec(&io, val);
189 			*val *= -1;
190 		}
191 		if (c != ' ')
192 			return -EINVAL;
193 		if (field == i)
194 			return 0;
195 		i++;
196 	}
197 	return -EINVAL;
198 }
199 
200 int evsel__tool_pmu_prepare_open(struct evsel *evsel,
201 				 struct perf_cpu_map *cpus,
202 				 int nthreads)
203 {
204 	if ((evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME ||
205 	     evsel__tool_event(evsel) == TOOL_PMU__EVENT_USER_TIME) &&
206 	    !evsel->start_times) {
207 		evsel->start_times = xyarray__new(perf_cpu_map__nr(cpus),
208 						  nthreads,
209 						  sizeof(__u64));
210 		if (!evsel->start_times)
211 			return -ENOMEM;
212 	}
213 	return 0;
214 }
215 
216 #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
217 
218 int evsel__tool_pmu_open(struct evsel *evsel,
219 			 struct perf_thread_map *threads,
220 			 int start_cpu_map_idx, int end_cpu_map_idx)
221 {
222 	enum tool_pmu_event ev = evsel__tool_event(evsel);
223 	int pid = -1, idx = 0, thread = 0, nthreads, err = 0, old_errno;
224 
225 	if (ev == TOOL_PMU__EVENT_NUM_CPUS)
226 		return 0;
227 
228 	if (ev == TOOL_PMU__EVENT_DURATION_TIME) {
229 		if (evsel->core.attr.sample_period) /* no sampling */
230 			return -EINVAL;
231 		evsel->start_time = rdclock();
232 		return 0;
233 	}
234 
235 	if (evsel->cgrp)
236 		pid = evsel->cgrp->fd;
237 
238 	nthreads = perf_thread_map__nr(threads);
239 	for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) {
240 		for (thread = 0; thread < nthreads; thread++) {
241 			if (thread >= nthreads)
242 				break;
243 
244 			if (!evsel->cgrp && !evsel->core.system_wide)
245 				pid = perf_thread_map__pid(threads, thread);
246 
247 			if (ev == TOOL_PMU__EVENT_USER_TIME || ev == TOOL_PMU__EVENT_SYSTEM_TIME) {
248 				bool system = ev == TOOL_PMU__EVENT_SYSTEM_TIME;
249 				__u64 *start_time = NULL;
250 				int fd;
251 
252 				if (evsel->core.attr.sample_period) {
253 					/* no sampling */
254 					err = -EINVAL;
255 					goto out_close;
256 				}
257 				if (pid > -1) {
258 					char buf[64];
259 
260 					snprintf(buf, sizeof(buf), "/proc/%d/stat", pid);
261 					fd = open(buf, O_RDONLY);
262 					evsel->pid_stat = true;
263 				} else {
264 					fd = open("/proc/stat", O_RDONLY);
265 				}
266 				FD(evsel, idx, thread) = fd;
267 				if (fd < 0) {
268 					err = -errno;
269 					goto out_close;
270 				}
271 				start_time = xyarray__entry(evsel->start_times, idx, thread);
272 				if (pid > -1) {
273 					err = read_pid_stat_field(fd, system ? 15 : 14,
274 								  start_time);
275 				} else {
276 					struct perf_cpu cpu;
277 
278 					cpu = perf_cpu_map__cpu(evsel->core.cpus, idx);
279 					err = read_stat_field(fd, cpu, system ? 3 : 1,
280 							      start_time);
281 				}
282 				if (err)
283 					goto out_close;
284 			}
285 
286 		}
287 	}
288 	return 0;
289 out_close:
290 	if (err)
291 		threads->err_thread = thread;
292 
293 	old_errno = errno;
294 	do {
295 		while (--thread >= 0) {
296 			if (FD(evsel, idx, thread) >= 0)
297 				close(FD(evsel, idx, thread));
298 			FD(evsel, idx, thread) = -1;
299 		}
300 		thread = nthreads;
301 	} while (--idx >= 0);
302 	errno = old_errno;
303 	return err;
304 }
305 
306 #if !defined(__i386__) && !defined(__x86_64__)
307 u64 arch_get_tsc_freq(void)
308 {
309 	return 0;
310 }
311 #endif
312 
313 #if !defined(__aarch64__)
314 u64 tool_pmu__cpu_slots_per_cycle(void)
315 {
316 	return 0;
317 }
318 #endif
319 
320 static bool has_pmem(void)
321 {
322 	static bool has_pmem, cached;
323 	const char *sysfs = sysfs__mountpoint();
324 	char path[PATH_MAX];
325 
326 	if (!cached) {
327 		snprintf(path, sizeof(path), "%s/firmware/acpi/tables/NFIT", sysfs);
328 		has_pmem = access(path, F_OK) == 0;
329 		cached = true;
330 	}
331 	return has_pmem;
332 }
333 
334 bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result)
335 {
336 	const struct cpu_topology *topology;
337 
338 	switch (ev) {
339 	case TOOL_PMU__EVENT_HAS_PMEM:
340 		*result = has_pmem() ? 1 : 0;
341 		return true;
342 
343 	case TOOL_PMU__EVENT_NUM_CORES:
344 		topology = online_topology();
345 		*result = topology->core_cpus_lists;
346 		return true;
347 
348 	case TOOL_PMU__EVENT_NUM_CPUS:
349 		*result = cpu__max_present_cpu().cpu;
350 		return true;
351 
352 	case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: {
353 		struct perf_cpu_map *online = cpu_map__online();
354 
355 		if (online) {
356 			*result = perf_cpu_map__nr(online);
357 			return true;
358 		}
359 		return false;
360 	}
361 	case TOOL_PMU__EVENT_NUM_DIES:
362 		topology = online_topology();
363 		*result = topology->die_cpus_lists;
364 		return true;
365 
366 	case TOOL_PMU__EVENT_NUM_PACKAGES:
367 		topology = online_topology();
368 		*result = topology->package_cpus_lists;
369 		return true;
370 
371 	case TOOL_PMU__EVENT_SLOTS:
372 		*result = tool_pmu__cpu_slots_per_cycle();
373 		return *result ? true : false;
374 
375 	case TOOL_PMU__EVENT_SMT_ON:
376 		*result = smt_on() ? 1 : 0;
377 		return true;
378 
379 	case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ:
380 		*result = arch_get_tsc_freq();
381 		return true;
382 
383 	case TOOL_PMU__EVENT_NONE:
384 	case TOOL_PMU__EVENT_DURATION_TIME:
385 	case TOOL_PMU__EVENT_USER_TIME:
386 	case TOOL_PMU__EVENT_SYSTEM_TIME:
387 	case TOOL_PMU__EVENT_MAX:
388 	default:
389 		return false;
390 	}
391 }
392 
393 int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread)
394 {
395 	__u64 *start_time, cur_time, delta_start;
396 	u64 val;
397 	int fd, err = 0;
398 	struct perf_counts_values *count, *old_count = NULL;
399 	bool adjust = false;
400 	enum tool_pmu_event ev = evsel__tool_event(evsel);
401 
402 	count = perf_counts(evsel->counts, cpu_map_idx, thread);
403 
404 	switch (ev) {
405 	case TOOL_PMU__EVENT_HAS_PMEM:
406 	case TOOL_PMU__EVENT_NUM_CORES:
407 	case TOOL_PMU__EVENT_NUM_CPUS:
408 	case TOOL_PMU__EVENT_NUM_CPUS_ONLINE:
409 	case TOOL_PMU__EVENT_NUM_DIES:
410 	case TOOL_PMU__EVENT_NUM_PACKAGES:
411 	case TOOL_PMU__EVENT_SLOTS:
412 	case TOOL_PMU__EVENT_SMT_ON:
413 	case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ:
414 		if (evsel->prev_raw_counts)
415 			old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread);
416 		val = 0;
417 		if (cpu_map_idx == 0 && thread == 0) {
418 			if (!tool_pmu__read_event(ev, &val)) {
419 				count->lost++;
420 				val = 0;
421 			}
422 		}
423 		if (old_count) {
424 			count->val = old_count->val + val;
425 			count->run = old_count->run + 1;
426 			count->ena = old_count->ena + 1;
427 		} else {
428 			count->val = val;
429 			count->run++;
430 			count->ena++;
431 		}
432 		return 0;
433 	case TOOL_PMU__EVENT_DURATION_TIME:
434 		/*
435 		 * Pretend duration_time is only on the first CPU and thread, or
436 		 * else aggregation will scale duration_time by the number of
437 		 * CPUs/threads.
438 		 */
439 		start_time = &evsel->start_time;
440 		if (cpu_map_idx == 0 && thread == 0)
441 			cur_time = rdclock();
442 		else
443 			cur_time = *start_time;
444 		break;
445 	case TOOL_PMU__EVENT_USER_TIME:
446 	case TOOL_PMU__EVENT_SYSTEM_TIME: {
447 		bool system = evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME;
448 
449 		start_time = xyarray__entry(evsel->start_times, cpu_map_idx, thread);
450 		fd = FD(evsel, cpu_map_idx, thread);
451 		lseek(fd, SEEK_SET, 0);
452 		if (evsel->pid_stat) {
453 			/* The event exists solely on 1 CPU. */
454 			if (cpu_map_idx == 0)
455 				err = read_pid_stat_field(fd, system ? 15 : 14, &cur_time);
456 			else
457 				cur_time = 0;
458 		} else {
459 			/* The event is for all threads. */
460 			if (thread == 0) {
461 				struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus,
462 									cpu_map_idx);
463 
464 				err = read_stat_field(fd, cpu, system ? 3 : 1, &cur_time);
465 			} else {
466 				cur_time = 0;
467 			}
468 		}
469 		adjust = true;
470 		break;
471 	}
472 	case TOOL_PMU__EVENT_NONE:
473 	case TOOL_PMU__EVENT_MAX:
474 	default:
475 		err = -EINVAL;
476 	}
477 	if (err)
478 		return err;
479 
480 	delta_start = cur_time - *start_time;
481 	if (adjust) {
482 		__u64 ticks_per_sec = sysconf(_SC_CLK_TCK);
483 
484 		delta_start *= 1000000000 / ticks_per_sec;
485 	}
486 	count->val    = delta_start;
487 	count->ena    = count->run = delta_start;
488 	count->lost   = 0;
489 	return 0;
490 }
491 
492 struct perf_pmu *perf_pmus__tool_pmu(void)
493 {
494 	static struct perf_pmu tool = {
495 		.name = "tool",
496 		.type = PERF_PMU_TYPE_TOOL,
497 		.aliases = LIST_HEAD_INIT(tool.aliases),
498 		.caps = LIST_HEAD_INIT(tool.caps),
499 		.format = LIST_HEAD_INIT(tool.format),
500 	};
501 	if (!tool.events_table)
502 		tool.events_table = find_core_events_table("common", "common");
503 
504 	return &tool;
505 }
506