xref: /linux/tools/perf/util/tool_pmu.c (revision bfb4a6c721517a11b277e8841f8a7a64b1b14b72)
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include "cgroup.h"
3 #include "counts.h"
4 #include "cputopo.h"
5 #include "evsel.h"
6 #include "pmu.h"
7 #include "print-events.h"
8 #include "smt.h"
9 #include "time-utils.h"
10 #include "tool_pmu.h"
11 #include "tsc.h"
12 #include <api/fs/fs.h>
13 #include <api/io.h>
14 #include <internal/threadmap.h>
15 #include <perf/threadmap.h>
16 #include <fcntl.h>
17 #include <strings.h>
18 
19 static const char *const tool_pmu__event_names[TOOL_PMU__EVENT_MAX] = {
20 	NULL,
21 	"duration_time",
22 	"user_time",
23 	"system_time",
24 	"has_pmem",
25 	"num_cores",
26 	"num_cpus",
27 	"num_cpus_online",
28 	"num_dies",
29 	"num_packages",
30 	"slots",
31 	"smt_on",
32 	"system_tsc_freq",
33 };
34 
35 bool tool_pmu__skip_event(const char *name __maybe_unused)
36 {
37 #if !defined(__aarch64__)
38 	/* The slots event should only appear on arm64. */
39 	if (strcasecmp(name, "slots") == 0)
40 		return true;
41 #endif
42 #if !defined(__i386__) && !defined(__x86_64__)
43 	/* The system_tsc_freq event should only appear on x86. */
44 	if (strcasecmp(name, "system_tsc_freq") == 0)
45 		return true;
46 #endif
47 	return false;
48 }
49 
50 int tool_pmu__num_skip_events(void)
51 {
52 	int num = 0;
53 
54 #if !defined(__aarch64__)
55 	num++;
56 #endif
57 #if !defined(__i386__) && !defined(__x86_64__)
58 	num++;
59 #endif
60 	return num;
61 }
62 
63 const char *tool_pmu__event_to_str(enum tool_pmu_event ev)
64 {
65 	if ((ev > TOOL_PMU__EVENT_NONE && ev < TOOL_PMU__EVENT_MAX) &&
66 	    !tool_pmu__skip_event(tool_pmu__event_names[ev]))
67 		return tool_pmu__event_names[ev];
68 
69 	return NULL;
70 }
71 
72 enum tool_pmu_event tool_pmu__str_to_event(const char *str)
73 {
74 	int i;
75 
76 	if (tool_pmu__skip_event(str))
77 		return TOOL_PMU__EVENT_NONE;
78 
79 	tool_pmu__for_each_event(i) {
80 		if (!strcasecmp(str, tool_pmu__event_names[i]))
81 			return i;
82 	}
83 	return TOOL_PMU__EVENT_NONE;
84 }
85 
86 bool perf_pmu__is_tool(const struct perf_pmu *pmu)
87 {
88 	return pmu && pmu->type == PERF_PMU_TYPE_TOOL;
89 }
90 
91 bool evsel__is_tool(const struct evsel *evsel)
92 {
93 	return perf_pmu__is_tool(evsel->pmu);
94 }
95 
96 enum tool_pmu_event evsel__tool_event(const struct evsel *evsel)
97 {
98 	if (!evsel__is_tool(evsel))
99 		return TOOL_PMU__EVENT_NONE;
100 
101 	return (enum tool_pmu_event)evsel->core.attr.config;
102 }
103 
104 const char *evsel__tool_pmu_event_name(const struct evsel *evsel)
105 {
106 	return tool_pmu__event_to_str(evsel->core.attr.config);
107 }
108 
109 static bool read_until_char(struct io *io, char e)
110 {
111 	int c;
112 
113 	do {
114 		c = io__get_char(io);
115 		if (c == -1)
116 			return false;
117 	} while (c != e);
118 	return true;
119 }
120 
121 static int read_stat_field(int fd, struct perf_cpu cpu, int field, __u64 *val)
122 {
123 	char buf[256];
124 	struct io io;
125 	int i;
126 
127 	io__init(&io, fd, buf, sizeof(buf));
128 
129 	/* Skip lines to relevant CPU. */
130 	for (i = -1; i < cpu.cpu; i++) {
131 		if (!read_until_char(&io, '\n'))
132 			return -EINVAL;
133 	}
134 	/* Skip to "cpu". */
135 	if (io__get_char(&io) != 'c') return -EINVAL;
136 	if (io__get_char(&io) != 'p') return -EINVAL;
137 	if (io__get_char(&io) != 'u') return -EINVAL;
138 
139 	/* Skip N of cpuN. */
140 	if (!read_until_char(&io, ' '))
141 		return -EINVAL;
142 
143 	i = 1;
144 	while (true) {
145 		if (io__get_dec(&io, val) != ' ')
146 			break;
147 		if (field == i)
148 			return 0;
149 		i++;
150 	}
151 	return -EINVAL;
152 }
153 
154 static int read_pid_stat_field(int fd, int field, __u64 *val)
155 {
156 	char buf[256];
157 	struct io io;
158 	int c, i;
159 
160 	io__init(&io, fd, buf, sizeof(buf));
161 	if (io__get_dec(&io, val) != ' ')
162 		return -EINVAL;
163 	if (field == 1)
164 		return 0;
165 
166 	/* Skip comm. */
167 	if (io__get_char(&io) != '(' || !read_until_char(&io, ')'))
168 		return -EINVAL;
169 	if (field == 2)
170 		return -EINVAL; /* String can't be returned. */
171 
172 	/* Skip state */
173 	if (io__get_char(&io) != ' ' || io__get_char(&io) == -1)
174 		return -EINVAL;
175 	if (field == 3)
176 		return -EINVAL; /* String can't be returned. */
177 
178 	/* Loop over numeric fields*/
179 	if (io__get_char(&io) != ' ')
180 		return -EINVAL;
181 
182 	i = 4;
183 	while (true) {
184 		c = io__get_dec(&io, val);
185 		if (c == -1)
186 			return -EINVAL;
187 		if (c == -2) {
188 			/* Assume a -ve was read */
189 			c = io__get_dec(&io, val);
190 			*val *= -1;
191 		}
192 		if (c != ' ')
193 			return -EINVAL;
194 		if (field == i)
195 			return 0;
196 		i++;
197 	}
198 	return -EINVAL;
199 }
200 
201 int evsel__tool_pmu_prepare_open(struct evsel *evsel,
202 				 struct perf_cpu_map *cpus,
203 				 int nthreads)
204 {
205 	if ((evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME ||
206 	     evsel__tool_event(evsel) == TOOL_PMU__EVENT_USER_TIME) &&
207 	    !evsel->start_times) {
208 		evsel->start_times = xyarray__new(perf_cpu_map__nr(cpus),
209 						  nthreads,
210 						  sizeof(__u64));
211 		if (!evsel->start_times)
212 			return -ENOMEM;
213 	}
214 	return 0;
215 }
216 
217 #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
218 
219 int evsel__tool_pmu_open(struct evsel *evsel,
220 			 struct perf_thread_map *threads,
221 			 int start_cpu_map_idx, int end_cpu_map_idx)
222 {
223 	enum tool_pmu_event ev = evsel__tool_event(evsel);
224 	int pid = -1, idx = 0, thread = 0, nthreads, err = 0, old_errno;
225 
226 	if (ev == TOOL_PMU__EVENT_NUM_CPUS)
227 		return 0;
228 
229 	if (ev == TOOL_PMU__EVENT_DURATION_TIME) {
230 		if (evsel->core.attr.sample_period) /* no sampling */
231 			return -EINVAL;
232 		evsel->start_time = rdclock();
233 		return 0;
234 	}
235 
236 	if (evsel->cgrp)
237 		pid = evsel->cgrp->fd;
238 
239 	nthreads = perf_thread_map__nr(threads);
240 	for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) {
241 		for (thread = 0; thread < nthreads; thread++) {
242 			if (thread >= nthreads)
243 				break;
244 
245 			if (!evsel->cgrp && !evsel->core.system_wide)
246 				pid = perf_thread_map__pid(threads, thread);
247 
248 			if (ev == TOOL_PMU__EVENT_USER_TIME || ev == TOOL_PMU__EVENT_SYSTEM_TIME) {
249 				bool system = ev == TOOL_PMU__EVENT_SYSTEM_TIME;
250 				__u64 *start_time = NULL;
251 				int fd;
252 
253 				if (evsel->core.attr.sample_period) {
254 					/* no sampling */
255 					err = -EINVAL;
256 					goto out_close;
257 				}
258 				if (pid > -1) {
259 					char buf[64];
260 
261 					snprintf(buf, sizeof(buf), "/proc/%d/stat", pid);
262 					fd = open(buf, O_RDONLY);
263 					evsel->pid_stat = true;
264 				} else {
265 					fd = open("/proc/stat", O_RDONLY);
266 				}
267 				FD(evsel, idx, thread) = fd;
268 				if (fd < 0) {
269 					err = -errno;
270 					goto out_close;
271 				}
272 				start_time = xyarray__entry(evsel->start_times, idx, thread);
273 				if (pid > -1) {
274 					err = read_pid_stat_field(fd, system ? 15 : 14,
275 								  start_time);
276 				} else {
277 					struct perf_cpu cpu;
278 
279 					cpu = perf_cpu_map__cpu(evsel->core.cpus, idx);
280 					err = read_stat_field(fd, cpu, system ? 3 : 1,
281 							      start_time);
282 				}
283 				if (err)
284 					goto out_close;
285 			}
286 
287 		}
288 	}
289 	return 0;
290 out_close:
291 	if (err)
292 		threads->err_thread = thread;
293 
294 	old_errno = errno;
295 	do {
296 		while (--thread >= 0) {
297 			if (FD(evsel, idx, thread) >= 0)
298 				close(FD(evsel, idx, thread));
299 			FD(evsel, idx, thread) = -1;
300 		}
301 		thread = nthreads;
302 	} while (--idx >= 0);
303 	errno = old_errno;
304 	return err;
305 }
306 
307 #if !defined(__i386__) && !defined(__x86_64__)
308 u64 arch_get_tsc_freq(void)
309 {
310 	return 0;
311 }
312 #endif
313 
314 #if !defined(__aarch64__)
315 u64 tool_pmu__cpu_slots_per_cycle(void)
316 {
317 	return 0;
318 }
319 #endif
320 
321 static bool has_pmem(void)
322 {
323 	static bool has_pmem, cached;
324 	const char *sysfs = sysfs__mountpoint();
325 	char path[PATH_MAX];
326 
327 	if (!cached) {
328 		snprintf(path, sizeof(path), "%s/firmware/acpi/tables/NFIT", sysfs);
329 		has_pmem = access(path, F_OK) == 0;
330 		cached = true;
331 	}
332 	return has_pmem;
333 }
334 
335 bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result)
336 {
337 	const struct cpu_topology *topology;
338 
339 	switch (ev) {
340 	case TOOL_PMU__EVENT_HAS_PMEM:
341 		*result = has_pmem() ? 1 : 0;
342 		return true;
343 
344 	case TOOL_PMU__EVENT_NUM_CORES:
345 		topology = online_topology();
346 		*result = topology->core_cpus_lists;
347 		return true;
348 
349 	case TOOL_PMU__EVENT_NUM_CPUS:
350 		*result = cpu__max_present_cpu().cpu;
351 		return true;
352 
353 	case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: {
354 		struct perf_cpu_map *online = cpu_map__online();
355 
356 		if (online) {
357 			*result = perf_cpu_map__nr(online);
358 			perf_cpu_map__put(online);
359 			return true;
360 		}
361 		return false;
362 	}
363 	case TOOL_PMU__EVENT_NUM_DIES:
364 		topology = online_topology();
365 		*result = topology->die_cpus_lists;
366 		return true;
367 
368 	case TOOL_PMU__EVENT_NUM_PACKAGES:
369 		topology = online_topology();
370 		*result = topology->package_cpus_lists;
371 		return true;
372 
373 	case TOOL_PMU__EVENT_SLOTS:
374 		*result = tool_pmu__cpu_slots_per_cycle();
375 		return *result ? true : false;
376 
377 	case TOOL_PMU__EVENT_SMT_ON:
378 		*result = smt_on() ? 1 : 0;
379 		return true;
380 
381 	case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ:
382 		*result = arch_get_tsc_freq();
383 		return true;
384 
385 	case TOOL_PMU__EVENT_NONE:
386 	case TOOL_PMU__EVENT_DURATION_TIME:
387 	case TOOL_PMU__EVENT_USER_TIME:
388 	case TOOL_PMU__EVENT_SYSTEM_TIME:
389 	case TOOL_PMU__EVENT_MAX:
390 	default:
391 		return false;
392 	}
393 }
394 
395 int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread)
396 {
397 	__u64 *start_time, cur_time, delta_start;
398 	u64 val;
399 	int fd, err = 0;
400 	struct perf_counts_values *count, *old_count = NULL;
401 	bool adjust = false;
402 	enum tool_pmu_event ev = evsel__tool_event(evsel);
403 
404 	count = perf_counts(evsel->counts, cpu_map_idx, thread);
405 
406 	switch (ev) {
407 	case TOOL_PMU__EVENT_HAS_PMEM:
408 	case TOOL_PMU__EVENT_NUM_CORES:
409 	case TOOL_PMU__EVENT_NUM_CPUS:
410 	case TOOL_PMU__EVENT_NUM_CPUS_ONLINE:
411 	case TOOL_PMU__EVENT_NUM_DIES:
412 	case TOOL_PMU__EVENT_NUM_PACKAGES:
413 	case TOOL_PMU__EVENT_SLOTS:
414 	case TOOL_PMU__EVENT_SMT_ON:
415 	case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ:
416 		if (evsel->prev_raw_counts)
417 			old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread);
418 		val = 0;
419 		if (cpu_map_idx == 0 && thread == 0) {
420 			if (!tool_pmu__read_event(ev, &val)) {
421 				count->lost++;
422 				val = 0;
423 			}
424 		}
425 		if (old_count) {
426 			count->val = old_count->val + val;
427 			count->run = old_count->run + 1;
428 			count->ena = old_count->ena + 1;
429 		} else {
430 			count->val = val;
431 			count->run++;
432 			count->ena++;
433 		}
434 		return 0;
435 	case TOOL_PMU__EVENT_DURATION_TIME:
436 		/*
437 		 * Pretend duration_time is only on the first CPU and thread, or
438 		 * else aggregation will scale duration_time by the number of
439 		 * CPUs/threads.
440 		 */
441 		start_time = &evsel->start_time;
442 		if (cpu_map_idx == 0 && thread == 0)
443 			cur_time = rdclock();
444 		else
445 			cur_time = *start_time;
446 		break;
447 	case TOOL_PMU__EVENT_USER_TIME:
448 	case TOOL_PMU__EVENT_SYSTEM_TIME: {
449 		bool system = evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME;
450 
451 		start_time = xyarray__entry(evsel->start_times, cpu_map_idx, thread);
452 		fd = FD(evsel, cpu_map_idx, thread);
453 		lseek(fd, SEEK_SET, 0);
454 		if (evsel->pid_stat) {
455 			/* The event exists solely on 1 CPU. */
456 			if (cpu_map_idx == 0)
457 				err = read_pid_stat_field(fd, system ? 15 : 14, &cur_time);
458 			else
459 				cur_time = 0;
460 		} else {
461 			/* The event is for all threads. */
462 			if (thread == 0) {
463 				struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus,
464 									cpu_map_idx);
465 
466 				err = read_stat_field(fd, cpu, system ? 3 : 1, &cur_time);
467 			} else {
468 				cur_time = 0;
469 			}
470 		}
471 		adjust = true;
472 		break;
473 	}
474 	case TOOL_PMU__EVENT_NONE:
475 	case TOOL_PMU__EVENT_MAX:
476 	default:
477 		err = -EINVAL;
478 	}
479 	if (err)
480 		return err;
481 
482 	delta_start = cur_time - *start_time;
483 	if (adjust) {
484 		__u64 ticks_per_sec = sysconf(_SC_CLK_TCK);
485 
486 		delta_start *= 1000000000 / ticks_per_sec;
487 	}
488 	count->val    = delta_start;
489 	count->lost   = 0;
490 	/*
491 	 * The values of enabled and running must make a ratio of 100%. The
492 	 * exact values don't matter as long as they are non-zero to avoid
493 	 * issues with evsel__count_has_error.
494 	 */
495 	count->ena++;
496 	count->run++;
497 	return 0;
498 }
499 
500 struct perf_pmu *tool_pmu__new(void)
501 {
502 	struct perf_pmu *tool = zalloc(sizeof(struct perf_pmu));
503 
504 	if (!tool)
505 		return NULL;
506 
507 	if (perf_pmu__init(tool, PERF_PMU_TYPE_TOOL, "tool") != 0) {
508 		perf_pmu__delete(tool);
509 		return NULL;
510 	}
511 	tool->events_table = find_core_events_table("common", "common");
512 	return tool;
513 }
514