xref: /linux/tools/perf/util/evsel.c (revision 390d5ea26622f794c2d29cefd5a01ef116b4fe1d)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
4  *
5  * Parts came from builtin-{top,stat,record}.c, see those files for further
6  * copyright notes.
7  */
8 /*
9  * Powerpc needs __SANE_USERSPACE_TYPES__ before <linux/types.h> to select
10  * 'int-ll64.h' and avoid compile warnings when printing __u64 with %llu.
11  */
12 #define __SANE_USERSPACE_TYPES__
13 
14 #include "evsel.h"
15 
16 #include <errno.h>
17 #include <inttypes.h>
18 #include <stdlib.h>
19 
20 #include <dirent.h>
21 #include <linux/bitops.h>
22 #include <linux/compiler.h>
23 #include <linux/ctype.h>
24 #include <linux/err.h>
25 #include <linux/hw_breakpoint.h>
26 #include <linux/perf_event.h>
27 #include <linux/zalloc.h>
28 #include <sys/ioctl.h>
29 #include <sys/resource.h>
30 #include <sys/syscall.h>
31 #include <sys/types.h>
32 
33 #include <api/fs/fs.h>
34 #include <api/fs/tracing_path.h>
35 #include <byteswap.h>
36 #include <internal/lib.h>
37 #include <internal/threadmap.h>
38 #include <internal/xyarray.h>
39 #include <perf/cpumap.h>
40 #include <perf/evsel.h>
41 
42 #include "../perf-sys.h"
43 #include "asm/bug.h"
44 #include "bpf-filter.h"
45 #include "bpf_counter.h"
46 #include "callchain.h"
47 #include "cgroup.h"
48 #include "counts.h"
49 #include "debug.h"
50 #include "drm_pmu.h"
51 #include "dwarf-regs.h"
52 #include "env.h"
53 #include "event.h"
54 #include "evlist.h"
55 #include "evsel_config.h"
56 #include "evsel_fprintf.h"
57 #include "hashmap.h"
58 #include "hist.h"
59 #include "hwmon_pmu.h"
60 #include "intel-tpebs.h"
61 #include "memswap.h"
62 #include "off_cpu.h"
63 #include "parse-branch-options.h"
64 #include "perf_regs.h"
65 #include "pmu.h"
66 #include "pmus.h"
67 #include "record.h"
68 #include "rlimit.h"
69 #include "session.h"
70 #include "stat.h"
71 #include "string2.h"
72 #include "target.h"
73 #include "thread_map.h"
74 #include "time-utils.h"
75 #include "tool_pmu.h"
76 #include "tp_pmu.h"
77 #include "trace-event.h"
78 #include "util.h"
79 
80 #ifdef HAVE_LIBTRACEEVENT
81 #include <event-parse.h>
82 #endif
83 
84 struct perf_missing_features perf_missing_features;
85 
86 static clockid_t clockid;
87 
88 static int evsel__no_extra_init(struct evsel *evsel __maybe_unused)
89 {
90 	return 0;
91 }
92 
93 static bool test_attr__enabled(void)
94 {
95 	static bool test_attr__enabled;
96 	static bool test_attr__enabled_tested;
97 
98 	if (!test_attr__enabled_tested) {
99 		char *dir = getenv("PERF_TEST_ATTR");
100 
101 		test_attr__enabled = (dir != NULL);
102 		test_attr__enabled_tested = true;
103 	}
104 	return test_attr__enabled;
105 }
106 
107 #define __WRITE_ASS(str, fmt, data)					\
108 do {									\
109 	if (fprintf(file, #str "=%"fmt "\n", data) < 0) {		\
110 		perror("test attr - failed to write event file");	\
111 		fclose(file);						\
112 		return -1;						\
113 	}								\
114 } while (0)
115 
116 #define WRITE_ASS(field, fmt) __WRITE_ASS(field, fmt, attr->field)
117 
118 static int store_event(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu,
119 		       int fd, int group_fd, unsigned long flags)
120 {
121 	FILE *file;
122 	char path[PATH_MAX];
123 	char *dir = getenv("PERF_TEST_ATTR");
124 
125 	snprintf(path, PATH_MAX, "%s/event-%d-%llu-%d", dir,
126 		 attr->type, attr->config, fd);
127 
128 	file = fopen(path, "w+");
129 	if (!file) {
130 		perror("test attr - failed to open event file");
131 		return -1;
132 	}
133 
134 	if (fprintf(file, "[event-%d-%llu-%d]\n",
135 		    attr->type, attr->config, fd) < 0) {
136 		perror("test attr - failed to write event file");
137 		fclose(file);
138 		return -1;
139 	}
140 
141 	/* syscall arguments */
142 	__WRITE_ASS(fd,       "d", fd);
143 	__WRITE_ASS(group_fd, "d", group_fd);
144 	__WRITE_ASS(cpu,      "d", cpu.cpu);
145 	__WRITE_ASS(pid,      "d", pid);
146 	__WRITE_ASS(flags,   "lu", flags);
147 
148 	/* struct perf_event_attr */
149 	WRITE_ASS(type,   PRIu32);
150 	WRITE_ASS(size,   PRIu32);
151 	WRITE_ASS(config,  "llu");
152 	WRITE_ASS(sample_period, "llu");
153 	WRITE_ASS(sample_type,   "llu");
154 	WRITE_ASS(read_format,   "llu");
155 	WRITE_ASS(disabled,       "d");
156 	WRITE_ASS(inherit,        "d");
157 	WRITE_ASS(pinned,         "d");
158 	WRITE_ASS(exclusive,      "d");
159 	WRITE_ASS(exclude_user,   "d");
160 	WRITE_ASS(exclude_kernel, "d");
161 	WRITE_ASS(exclude_hv,     "d");
162 	WRITE_ASS(exclude_idle,   "d");
163 	WRITE_ASS(mmap,           "d");
164 	WRITE_ASS(comm,           "d");
165 	WRITE_ASS(freq,           "d");
166 	WRITE_ASS(inherit_stat,   "d");
167 	WRITE_ASS(enable_on_exec, "d");
168 	WRITE_ASS(task,           "d");
169 	WRITE_ASS(watermark,      "d");
170 	WRITE_ASS(precise_ip,     "d");
171 	WRITE_ASS(mmap_data,      "d");
172 	WRITE_ASS(sample_id_all,  "d");
173 	WRITE_ASS(exclude_host,   "d");
174 	WRITE_ASS(exclude_guest,  "d");
175 	WRITE_ASS(exclude_callchain_kernel, "d");
176 	WRITE_ASS(exclude_callchain_user, "d");
177 	WRITE_ASS(mmap2,	  "d");
178 	WRITE_ASS(comm_exec,	  "d");
179 	WRITE_ASS(context_switch, "d");
180 	WRITE_ASS(write_backward, "d");
181 	WRITE_ASS(namespaces,	  "d");
182 	WRITE_ASS(use_clockid,    "d");
183 	WRITE_ASS(wakeup_events, PRIu32);
184 	WRITE_ASS(bp_type, PRIu32);
185 	WRITE_ASS(config1, "llu");
186 	WRITE_ASS(config2, "llu");
187 	WRITE_ASS(branch_sample_type, "llu");
188 	WRITE_ASS(sample_regs_user,   "llu");
189 	WRITE_ASS(sample_stack_user,  PRIu32);
190 
191 	fclose(file);
192 	return 0;
193 }
194 
195 #undef __WRITE_ASS
196 #undef WRITE_ASS
197 
198 static void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu,
199 		     int fd, int group_fd, unsigned long flags)
200 {
201 	int errno_saved = errno;
202 
203 	if ((fd != -1) && store_event(attr, pid, cpu, fd, group_fd, flags)) {
204 		pr_err("test attr FAILED");
205 		exit(128);
206 	}
207 
208 	errno = errno_saved;
209 }
210 
211 static void evsel__no_extra_fini(struct evsel *evsel __maybe_unused)
212 {
213 }
214 
215 static struct {
216 	size_t	size;
217 	int	(*init)(struct evsel *evsel);
218 	void	(*fini)(struct evsel *evsel);
219 } perf_evsel__object = {
220 	.size = sizeof(struct evsel),
221 	.init = evsel__no_extra_init,
222 	.fini = evsel__no_extra_fini,
223 };
224 
225 int evsel__object_config(size_t object_size, int (*init)(struct evsel *evsel),
226 			 void (*fini)(struct evsel *evsel))
227 {
228 
229 	if (object_size == 0)
230 		goto set_methods;
231 
232 	if (perf_evsel__object.size > object_size)
233 		return -EINVAL;
234 
235 	perf_evsel__object.size = object_size;
236 
237 set_methods:
238 	if (init != NULL)
239 		perf_evsel__object.init = init;
240 
241 	if (fini != NULL)
242 		perf_evsel__object.fini = fini;
243 
244 	return 0;
245 }
246 
247 const char *evsel__pmu_name(const struct evsel *evsel)
248 {
249 	struct perf_pmu *pmu = evsel__find_pmu(evsel);
250 
251 	if (pmu)
252 		return pmu->name;
253 
254 	return event_type(evsel->core.attr.type);
255 }
256 
257 #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
258 
259 int __evsel__sample_size(u64 sample_type)
260 {
261 	u64 mask = sample_type & PERF_SAMPLE_MASK;
262 	int size = 0;
263 	int i;
264 
265 	for (i = 0; i < 64; i++) {
266 		if (mask & (1ULL << i))
267 			size++;
268 	}
269 
270 	size *= sizeof(u64);
271 
272 	return size;
273 }
274 
275 /**
276  * __perf_evsel__calc_id_pos - calculate id_pos.
277  * @sample_type: sample type
278  *
279  * This function returns the position of the event id (PERF_SAMPLE_ID or
280  * PERF_SAMPLE_IDENTIFIER) in a sample event i.e. in the array of struct
281  * perf_record_sample.
282  */
283 static int __perf_evsel__calc_id_pos(u64 sample_type)
284 {
285 	int idx = 0;
286 
287 	if (sample_type & PERF_SAMPLE_IDENTIFIER)
288 		return 0;
289 
290 	if (!(sample_type & PERF_SAMPLE_ID))
291 		return -1;
292 
293 	if (sample_type & PERF_SAMPLE_IP)
294 		idx += 1;
295 
296 	if (sample_type & PERF_SAMPLE_TID)
297 		idx += 1;
298 
299 	if (sample_type & PERF_SAMPLE_TIME)
300 		idx += 1;
301 
302 	if (sample_type & PERF_SAMPLE_ADDR)
303 		idx += 1;
304 
305 	return idx;
306 }
307 
308 /**
309  * __perf_evsel__calc_is_pos - calculate is_pos.
310  * @sample_type: sample type
311  *
312  * This function returns the position (counting backwards) of the event id
313  * (PERF_SAMPLE_ID or PERF_SAMPLE_IDENTIFIER) in a non-sample event i.e. if
314  * sample_id_all is used there is an id sample appended to non-sample events.
315  */
316 static int __perf_evsel__calc_is_pos(u64 sample_type)
317 {
318 	int idx = 1;
319 
320 	if (sample_type & PERF_SAMPLE_IDENTIFIER)
321 		return 1;
322 
323 	if (!(sample_type & PERF_SAMPLE_ID))
324 		return -1;
325 
326 	if (sample_type & PERF_SAMPLE_CPU)
327 		idx += 1;
328 
329 	if (sample_type & PERF_SAMPLE_STREAM_ID)
330 		idx += 1;
331 
332 	return idx;
333 }
334 
335 void evsel__calc_id_pos(struct evsel *evsel)
336 {
337 	evsel->id_pos = __perf_evsel__calc_id_pos(evsel->core.attr.sample_type);
338 	evsel->is_pos = __perf_evsel__calc_is_pos(evsel->core.attr.sample_type);
339 }
340 
341 void __evsel__set_sample_bit(struct evsel *evsel,
342 				  enum perf_event_sample_format bit)
343 {
344 	if (!(evsel->core.attr.sample_type & bit)) {
345 		evsel->core.attr.sample_type |= bit;
346 		evsel->sample_size += sizeof(u64);
347 		evsel__calc_id_pos(evsel);
348 	}
349 }
350 
351 void __evsel__reset_sample_bit(struct evsel *evsel,
352 				    enum perf_event_sample_format bit)
353 {
354 	if (evsel->core.attr.sample_type & bit) {
355 		evsel->core.attr.sample_type &= ~bit;
356 		evsel->sample_size -= sizeof(u64);
357 		evsel__calc_id_pos(evsel);
358 	}
359 }
360 
361 void evsel__set_sample_id(struct evsel *evsel,
362 			       bool can_sample_identifier)
363 {
364 	if (can_sample_identifier) {
365 		evsel__reset_sample_bit(evsel, ID);
366 		evsel__set_sample_bit(evsel, IDENTIFIER);
367 	} else {
368 		evsel__set_sample_bit(evsel, ID);
369 	}
370 	evsel->core.attr.read_format |= PERF_FORMAT_ID;
371 }
372 
373 /**
374  * evsel__is_function_event - Return whether given evsel is a function
375  * trace event
376  *
377  * @evsel - evsel selector to be tested
378  *
379  * Return %true if event is function trace event
380  */
381 bool evsel__is_function_event(struct evsel *evsel)
382 {
383 #define FUNCTION_EVENT "ftrace:function"
384 
385 	return evsel->name &&
386 	       !strncmp(FUNCTION_EVENT, evsel->name, sizeof(FUNCTION_EVENT));
387 
388 #undef FUNCTION_EVENT
389 }
390 
391 void evsel__init(struct evsel *evsel,
392 		 struct perf_event_attr *attr, int idx)
393 {
394 	perf_evsel__init(&evsel->core, attr, idx);
395 	evsel->tracking	   = !idx;
396 	evsel->unit	   = strdup("");
397 	evsel->scale	   = 1.0;
398 	evsel->max_events  = ULONG_MAX;
399 	evsel->evlist	   = NULL;
400 	evsel->bpf_obj	   = NULL;
401 	evsel->bpf_fd	   = -1;
402 	INIT_LIST_HEAD(&evsel->config_terms);
403 	INIT_LIST_HEAD(&evsel->bpf_counter_list);
404 	INIT_LIST_HEAD(&evsel->bpf_filters);
405 	perf_evsel__object.init(evsel);
406 	evsel->sample_size = __evsel__sample_size(attr->sample_type);
407 	evsel__calc_id_pos(evsel);
408 	evsel->cmdline_group_boundary = false;
409 	evsel->per_pkg_mask  = NULL;
410 	evsel->collect_stat  = false;
411 	evsel->group_pmu_name = NULL;
412 	evsel->skippable     = false;
413 	evsel->supported     = true;
414 	evsel->alternate_hw_config = PERF_COUNT_HW_MAX;
415 	evsel->script_output_type = -1; // FIXME: OUTPUT_TYPE_UNSET, see builtin-script.c
416 }
417 
418 struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx)
419 {
420 	struct evsel *evsel = zalloc(perf_evsel__object.size);
421 
422 	if (!evsel)
423 		return NULL;
424 	evsel__init(evsel, attr, idx);
425 
426 	if (evsel__is_bpf_output(evsel) && !attr->sample_type) {
427 		evsel->core.attr.sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
428 					    PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
429 		evsel->core.attr.sample_period = 1;
430 	}
431 
432 	if (evsel__is_clock(evsel)) {
433 		free((char *)evsel->unit);
434 		evsel->unit = strdup("msec");
435 		evsel->scale = 1e-6;
436 	}
437 
438 	return evsel;
439 }
440 
441 int copy_config_terms(struct list_head *dst, struct list_head *src)
442 {
443 	struct evsel_config_term *pos, *tmp;
444 
445 	list_for_each_entry(pos, src, list) {
446 		tmp = malloc(sizeof(*tmp));
447 		if (tmp == NULL)
448 			return -ENOMEM;
449 
450 		*tmp = *pos;
451 		if (tmp->free_str) {
452 			tmp->val.str = strdup(pos->val.str);
453 			if (tmp->val.str == NULL) {
454 				free(tmp);
455 				return -ENOMEM;
456 			}
457 		}
458 		list_add_tail(&tmp->list, dst);
459 	}
460 	return 0;
461 }
462 
463 static int evsel__copy_config_terms(struct evsel *dst, struct evsel *src)
464 {
465 	return copy_config_terms(&dst->config_terms, &src->config_terms);
466 }
467 
468 /**
469  * evsel__clone - create a new evsel copied from @orig
470  * @orig: original evsel
471  *
472  * The assumption is that @orig is not configured nor opened yet.
473  * So we only care about the attributes that can be set while it's parsed.
474  */
475 struct evsel *evsel__clone(struct evsel *dest, struct evsel *orig)
476 {
477 	struct evsel *evsel;
478 
479 	BUG_ON(orig->core.fd);
480 	BUG_ON(orig->counts);
481 	BUG_ON(orig->priv);
482 	BUG_ON(orig->per_pkg_mask);
483 
484 	/* cannot handle BPF objects for now */
485 	if (orig->bpf_obj)
486 		return NULL;
487 
488 	if (dest)
489 		evsel = dest;
490 	else
491 		evsel = evsel__new(&orig->core.attr);
492 
493 	if (evsel == NULL)
494 		return NULL;
495 
496 	evsel->core.cpus = perf_cpu_map__get(orig->core.cpus);
497 	evsel->core.pmu_cpus = perf_cpu_map__get(orig->core.pmu_cpus);
498 	evsel->core.threads = perf_thread_map__get(orig->core.threads);
499 	evsel->core.nr_members = orig->core.nr_members;
500 	evsel->core.system_wide = orig->core.system_wide;
501 	evsel->core.requires_cpu = orig->core.requires_cpu;
502 	evsel->core.is_pmu_core = orig->core.is_pmu_core;
503 
504 	if (orig->name) {
505 		evsel->name = strdup(orig->name);
506 		if (evsel->name == NULL)
507 			goto out_err;
508 	}
509 	if (orig->group_name) {
510 		evsel->group_name = strdup(orig->group_name);
511 		if (evsel->group_name == NULL)
512 			goto out_err;
513 	}
514 	if (orig->group_pmu_name) {
515 		evsel->group_pmu_name = strdup(orig->group_pmu_name);
516 		if (evsel->group_pmu_name == NULL)
517 			goto out_err;
518 	}
519 	if (orig->filter) {
520 		evsel->filter = strdup(orig->filter);
521 		if (evsel->filter == NULL)
522 			goto out_err;
523 	}
524 	if (orig->metric_id) {
525 		evsel->metric_id = strdup(orig->metric_id);
526 		if (evsel->metric_id == NULL)
527 			goto out_err;
528 	}
529 	evsel->cgrp = cgroup__get(orig->cgrp);
530 #ifdef HAVE_LIBTRACEEVENT
531 	if (orig->tp_sys) {
532 		evsel->tp_sys = strdup(orig->tp_sys);
533 		if (evsel->tp_sys == NULL)
534 			goto out_err;
535 	}
536 	if (orig->tp_name) {
537 		evsel->tp_name = strdup(orig->tp_name);
538 		if (evsel->tp_name == NULL)
539 			goto out_err;
540 	}
541 	evsel->tp_format = orig->tp_format;
542 #endif
543 	evsel->handler = orig->handler;
544 	evsel->core.leader = orig->core.leader;
545 	evsel->metric_leader = orig->metric_leader;
546 
547 	evsel->max_events = orig->max_events;
548 	zfree(&evsel->unit);
549 	if (orig->unit) {
550 		evsel->unit = strdup(orig->unit);
551 		if (evsel->unit == NULL)
552 			goto out_err;
553 	}
554 	evsel->scale = orig->scale;
555 	evsel->snapshot = orig->snapshot;
556 	evsel->per_pkg = orig->per_pkg;
557 	evsel->percore = orig->percore;
558 	evsel->precise_max = orig->precise_max;
559 	evsel->is_libpfm_event = orig->is_libpfm_event;
560 
561 	evsel->exclude_GH = orig->exclude_GH;
562 	evsel->sample_read = orig->sample_read;
563 	evsel->collect_stat = orig->collect_stat;
564 	evsel->weak_group = orig->weak_group;
565 	evsel->use_config_name = orig->use_config_name;
566 	evsel->pmu = orig->pmu;
567 	evsel->first_wildcard_match = orig->first_wildcard_match;
568 
569 	if (evsel__copy_config_terms(evsel, orig) < 0)
570 		goto out_err;
571 
572 	evsel->alternate_hw_config = orig->alternate_hw_config;
573 
574 	return evsel;
575 
576 out_err:
577 	evsel__delete(evsel);
578 	return NULL;
579 }
580 
581 /*
582  * Returns pointer with encoded error via <linux/err.h> interface.
583  */
584 struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx, bool format)
585 {
586 	struct perf_event_attr attr = {
587 		.type	       = PERF_TYPE_TRACEPOINT,
588 		.sample_type   = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
589 				PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
590 	};
591 	struct evsel *evsel = zalloc(perf_evsel__object.size);
592 	int err = -ENOMEM, id = -1;
593 
594 	if (evsel == NULL)
595 		goto out_err;
596 
597 
598 	if (asprintf(&evsel->name, "%s:%s", sys, name) < 0)
599 		goto out_free;
600 
601 #ifdef HAVE_LIBTRACEEVENT
602 	evsel->tp_sys = strdup(sys);
603 	if (!evsel->tp_sys)
604 		goto out_free;
605 
606 	evsel->tp_name = strdup(name);
607 	if (!evsel->tp_name)
608 		goto out_free;
609 #endif
610 
611 	event_attr_init(&attr);
612 
613 	if (format) {
614 		id = tp_pmu__id(sys, name);
615 		if (id < 0) {
616 			err = id;
617 			goto out_free;
618 		}
619 	}
620 	attr.config = (__u64)id;
621 	attr.sample_period = 1;
622 	evsel__init(evsel, &attr, idx);
623 	return evsel;
624 
625 out_free:
626 	zfree(&evsel->name);
627 #ifdef HAVE_LIBTRACEEVENT
628 	zfree(&evsel->tp_sys);
629 	zfree(&evsel->tp_name);
630 #endif
631 	free(evsel);
632 out_err:
633 	return ERR_PTR(err);
634 }
635 
636 #ifdef HAVE_LIBTRACEEVENT
637 struct tep_event *evsel__tp_format(struct evsel *evsel)
638 {
639 	struct tep_event *tp_format = evsel->tp_format;
640 
641 	if (tp_format)
642 		return tp_format;
643 
644 	if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
645 		return NULL;
646 
647 	if (!evsel->tp_sys)
648 		tp_format = trace_event__tp_format_id(evsel->core.attr.config);
649 	else
650 		tp_format = trace_event__tp_format(evsel->tp_sys, evsel->tp_name);
651 
652 	if (IS_ERR(tp_format)) {
653 		int err = -PTR_ERR(evsel->tp_format);
654 
655 		errno = err;
656 		pr_err("Error getting tracepoint format '%s': %m\n",
657 			evsel__name(evsel));
658 		return NULL;
659 	}
660 	evsel->tp_format = tp_format;
661 	return evsel->tp_format;
662 }
663 #endif
664 
665 const char *const evsel__hw_names[PERF_COUNT_HW_MAX] = {
666 	"cycles",
667 	"instructions",
668 	"cache-references",
669 	"cache-misses",
670 	"branches",
671 	"branch-misses",
672 	"bus-cycles",
673 	"stalled-cycles-frontend",
674 	"stalled-cycles-backend",
675 	"ref-cycles",
676 };
677 
678 char *evsel__bpf_counter_events;
679 
680 bool evsel__match_bpf_counter_events(const char *name)
681 {
682 	int name_len;
683 	bool match;
684 	char *ptr;
685 
686 	if (!evsel__bpf_counter_events)
687 		return false;
688 
689 	ptr = strstr(evsel__bpf_counter_events, name);
690 	name_len = strlen(name);
691 
692 	/* check name matches a full token in evsel__bpf_counter_events */
693 	match = (ptr != NULL) &&
694 		((ptr == evsel__bpf_counter_events) || (*(ptr - 1) == ',')) &&
695 		((*(ptr + name_len) == ',') || (*(ptr + name_len) == '\0'));
696 
697 	return match;
698 }
699 
700 static const char *__evsel__hw_name(u64 config)
701 {
702 	if (config < PERF_COUNT_HW_MAX && evsel__hw_names[config])
703 		return evsel__hw_names[config];
704 
705 	return "unknown-hardware";
706 }
707 
708 static int evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size)
709 {
710 	int colon = 0, r = 0;
711 	struct perf_event_attr *attr = &evsel->core.attr;
712 
713 #define MOD_PRINT(context, mod)	do {					\
714 		if (!attr->exclude_##context) {				\
715 			if (!colon) colon = ++r;			\
716 			r += scnprintf(bf + r, size - r, "%c", mod);	\
717 		} } while(0)
718 
719 	if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv) {
720 		MOD_PRINT(kernel, 'k');
721 		MOD_PRINT(user, 'u');
722 		MOD_PRINT(hv, 'h');
723 	}
724 
725 	if (attr->precise_ip) {
726 		if (!colon)
727 			colon = ++r;
728 		r += scnprintf(bf + r, size - r, "%.*s", attr->precise_ip, "ppp");
729 	}
730 
731 	if (attr->exclude_host || attr->exclude_guest) {
732 		MOD_PRINT(host, 'H');
733 		MOD_PRINT(guest, 'G');
734 	}
735 #undef MOD_PRINT
736 	if (colon)
737 		bf[colon - 1] = ':';
738 	return r;
739 }
740 
741 int __weak arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
742 {
743 	return scnprintf(bf, size, "%s", __evsel__hw_name(evsel->core.attr.config));
744 }
745 
746 static int evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
747 {
748 	int r = arch_evsel__hw_name(evsel, bf, size);
749 	return r + evsel__add_modifiers(evsel, bf + r, size - r);
750 }
751 
752 const char *const evsel__sw_names[PERF_COUNT_SW_MAX] = {
753 	"cpu-clock",
754 	"task-clock",
755 	"page-faults",
756 	"context-switches",
757 	"cpu-migrations",
758 	"minor-faults",
759 	"major-faults",
760 	"alignment-faults",
761 	"emulation-faults",
762 	"dummy",
763 };
764 
765 static const char *__evsel__sw_name(u64 config)
766 {
767 	if (config < PERF_COUNT_SW_MAX && evsel__sw_names[config])
768 		return evsel__sw_names[config];
769 	return "unknown-software";
770 }
771 
772 static int evsel__sw_name(struct evsel *evsel, char *bf, size_t size)
773 {
774 	int r = scnprintf(bf, size, "%s", __evsel__sw_name(evsel->core.attr.config));
775 	return r + evsel__add_modifiers(evsel, bf + r, size - r);
776 }
777 
778 static int __evsel__bp_name(char *bf, size_t size, u64 addr, u64 type)
779 {
780 	int r;
781 
782 	r = scnprintf(bf, size, "mem:0x%" PRIx64 ":", addr);
783 
784 	if (type & HW_BREAKPOINT_R)
785 		r += scnprintf(bf + r, size - r, "r");
786 
787 	if (type & HW_BREAKPOINT_W)
788 		r += scnprintf(bf + r, size - r, "w");
789 
790 	if (type & HW_BREAKPOINT_X)
791 		r += scnprintf(bf + r, size - r, "x");
792 
793 	return r;
794 }
795 
796 static int evsel__bp_name(struct evsel *evsel, char *bf, size_t size)
797 {
798 	struct perf_event_attr *attr = &evsel->core.attr;
799 	int r = __evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type);
800 	return r + evsel__add_modifiers(evsel, bf + r, size - r);
801 }
802 
803 const char *const evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES] = {
804  { "L1-dcache",	"l1-d",		"l1d",		"L1-data",		},
805  { "L1-icache",	"l1-i",		"l1i",		"L1-instruction",	},
806  { "LLC",	"L2",							},
807  { "dTLB",	"d-tlb",	"Data-TLB",				},
808  { "iTLB",	"i-tlb",	"Instruction-TLB",			},
809  { "branch",	"branches",	"bpu",		"btb",		"bpc",	},
810  { "node",								},
811 };
812 
813 const char *const evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][EVSEL__MAX_ALIASES] = {
814  { "load",	"loads",	"read",					},
815  { "store",	"stores",	"write",				},
816  { "prefetch",	"prefetches",	"speculative-read", "speculative-load",	},
817 };
818 
819 const char *const evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_ALIASES] = {
820  { "refs",	"Reference",	"ops",		"access",		},
821  { "misses",	"miss",							},
822 };
823 
824 #define C(x)		PERF_COUNT_HW_CACHE_##x
825 #define CACHE_READ	(1 << C(OP_READ))
826 #define CACHE_WRITE	(1 << C(OP_WRITE))
827 #define CACHE_PREFETCH	(1 << C(OP_PREFETCH))
828 #define COP(x)		(1 << x)
829 
830 /*
831  * cache operation stat
832  * L1I : Read and prefetch only
833  * ITLB and BPU : Read-only
834  */
835 static const unsigned long evsel__hw_cache_stat[C(MAX)] = {
836  [C(L1D)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
837  [C(L1I)]	= (CACHE_READ | CACHE_PREFETCH),
838  [C(LL)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
839  [C(DTLB)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
840  [C(ITLB)]	= (CACHE_READ),
841  [C(BPU)]	= (CACHE_READ),
842  [C(NODE)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
843 };
844 
845 bool evsel__is_cache_op_valid(u8 type, u8 op)
846 {
847 	if (evsel__hw_cache_stat[type] & COP(op))
848 		return true;	/* valid */
849 	else
850 		return false;	/* invalid */
851 }
852 
853 int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size)
854 {
855 	if (result) {
856 		return scnprintf(bf, size, "%s-%s-%s", evsel__hw_cache[type][0],
857 				 evsel__hw_cache_op[op][0],
858 				 evsel__hw_cache_result[result][0]);
859 	}
860 
861 	return scnprintf(bf, size, "%s-%s", evsel__hw_cache[type][0],
862 			 evsel__hw_cache_op[op][1]);
863 }
864 
865 static int __evsel__hw_cache_name(u64 config, char *bf, size_t size)
866 {
867 	u8 op, result, type = (config >>  0) & 0xff;
868 	const char *err = "unknown-ext-hardware-cache-type";
869 
870 	if (type >= PERF_COUNT_HW_CACHE_MAX)
871 		goto out_err;
872 
873 	op = (config >>  8) & 0xff;
874 	err = "unknown-ext-hardware-cache-op";
875 	if (op >= PERF_COUNT_HW_CACHE_OP_MAX)
876 		goto out_err;
877 
878 	result = (config >> 16) & 0xff;
879 	err = "unknown-ext-hardware-cache-result";
880 	if (result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
881 		goto out_err;
882 
883 	err = "invalid-cache";
884 	if (!evsel__is_cache_op_valid(type, op))
885 		goto out_err;
886 
887 	return __evsel__hw_cache_type_op_res_name(type, op, result, bf, size);
888 out_err:
889 	return scnprintf(bf, size, "%s", err);
890 }
891 
892 static int evsel__hw_cache_name(struct evsel *evsel, char *bf, size_t size)
893 {
894 	int ret = __evsel__hw_cache_name(evsel->core.attr.config, bf, size);
895 	return ret + evsel__add_modifiers(evsel, bf + ret, size - ret);
896 }
897 
898 static int evsel__raw_name(struct evsel *evsel, char *bf, size_t size)
899 {
900 	int ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->core.attr.config);
901 	return ret + evsel__add_modifiers(evsel, bf + ret, size - ret);
902 }
903 
904 const char *evsel__name(struct evsel *evsel)
905 {
906 	char bf[128];
907 
908 	if (!evsel)
909 		goto out_unknown;
910 
911 	if (evsel->name)
912 		return evsel->name;
913 
914 	switch (evsel->core.attr.type) {
915 	case PERF_TYPE_RAW:
916 		evsel__raw_name(evsel, bf, sizeof(bf));
917 		break;
918 
919 	case PERF_TYPE_HARDWARE:
920 		evsel__hw_name(evsel, bf, sizeof(bf));
921 		break;
922 
923 	case PERF_TYPE_HW_CACHE:
924 		evsel__hw_cache_name(evsel, bf, sizeof(bf));
925 		break;
926 
927 	case PERF_TYPE_SOFTWARE:
928 		evsel__sw_name(evsel, bf, sizeof(bf));
929 		break;
930 
931 	case PERF_TYPE_TRACEPOINT:
932 		scnprintf(bf, sizeof(bf), "unknown tracepoint id=%#"PRIx64,
933 			  evsel->core.attr.config);
934 		break;
935 
936 	case PERF_TYPE_BREAKPOINT:
937 		evsel__bp_name(evsel, bf, sizeof(bf));
938 		break;
939 
940 	case PERF_PMU_TYPE_TOOL:
941 		scnprintf(bf, sizeof(bf), "%s", evsel__tool_pmu_event_name(evsel));
942 		break;
943 
944 	default:
945 		scnprintf(bf, sizeof(bf), "unknown event PMU=%d config=%#"PRIx64,
946 			  evsel->core.attr.type, evsel->core.attr.config);
947 		break;
948 	}
949 
950 	evsel->name = strdup(bf);
951 
952 	if (evsel->name)
953 		return evsel->name;
954 out_unknown:
955 	return "unknown";
956 }
957 
958 bool evsel__name_is(struct evsel *evsel, const char *name)
959 {
960 	return !strcmp(evsel__name(evsel), name);
961 }
962 
963 const char *evsel__metric_id(const struct evsel *evsel)
964 {
965 	if (evsel->metric_id)
966 		return evsel->metric_id;
967 
968 	if (evsel__is_tool(evsel))
969 		return evsel__tool_pmu_event_name(evsel);
970 
971 	return "unknown";
972 }
973 
974 const char *evsel__group_name(struct evsel *evsel)
975 {
976 	return evsel->group_name ?: "anon group";
977 }
978 
979 /*
980  * Returns the group details for the specified leader,
981  * with following rules.
982  *
983  *  For record -e '{cycles,instructions}'
984  *    'anon group { cycles:u, instructions:u }'
985  *
986  *  For record -e 'cycles,instructions' and report --group
987  *    'cycles:u, instructions:u'
988  */
989 int evsel__group_desc(struct evsel *evsel, char *buf, size_t size)
990 {
991 	int ret = 0;
992 	bool first = true;
993 	struct evsel *pos;
994 	const char *group_name = evsel__group_name(evsel);
995 
996 	if (!evsel->forced_leader)
997 		ret = scnprintf(buf, size, "%s { ", group_name);
998 
999 	for_each_group_evsel(pos, evsel) {
1000 		if (symbol_conf.skip_empty &&
1001 		    evsel__hists(pos)->stats.nr_samples == 0)
1002 			continue;
1003 
1004 		ret += scnprintf(buf + ret, size - ret, "%s%s",
1005 				 first ? "" : ", ", evsel__name(pos));
1006 		first = false;
1007 	}
1008 
1009 	if (!evsel->forced_leader)
1010 		ret += scnprintf(buf + ret, size - ret, " }");
1011 
1012 	return ret;
1013 }
1014 
1015 uint16_t evsel__e_machine(struct evsel *evsel, uint32_t *e_flags)
1016 {
1017 	struct perf_session *session = evsel__session(evsel);
1018 
1019 	return perf_session__e_machine(session, e_flags);
1020 }
1021 
1022 static void __evsel__config_callchain(struct evsel *evsel, const struct record_opts *opts,
1023 				      const struct callchain_param *param)
1024 {
1025 	bool function = evsel__is_function_event(evsel);
1026 	struct perf_event_attr *attr = &evsel->core.attr;
1027 
1028 	if (EM_HOST == EM_S390 && param->record_mode == CALLCHAIN_FP) {
1029 		pr_warning_once(
1030 			"Framepointer unwinding lacks kernel support. Use '--call-graph dwarf'\n");
1031 	}
1032 
1033 	evsel__set_sample_bit(evsel, CALLCHAIN);
1034 
1035 	attr->sample_max_stack = param->max_stack;
1036 
1037 	if (opts->kernel_callchains)
1038 		attr->exclude_callchain_user = 1;
1039 	if (opts->user_callchains)
1040 		attr->exclude_callchain_kernel = 1;
1041 	if (param->record_mode == CALLCHAIN_LBR) {
1042 		if (!opts->branch_stack) {
1043 			if (attr->exclude_user) {
1044 				pr_warning("LBR callstack option is only available "
1045 					   "to get user callchain information. "
1046 					   "Falling back to framepointers.\n");
1047 			} else {
1048 				evsel__set_sample_bit(evsel, BRANCH_STACK);
1049 				attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER |
1050 							PERF_SAMPLE_BRANCH_CALL_STACK |
1051 							PERF_SAMPLE_BRANCH_NO_CYCLES |
1052 							PERF_SAMPLE_BRANCH_NO_FLAGS |
1053 							PERF_SAMPLE_BRANCH_HW_INDEX;
1054 			}
1055 		} else
1056 			 pr_warning("Cannot use LBR callstack with branch stack. "
1057 				    "Falling back to framepointers.\n");
1058 	}
1059 
1060 	if (param->record_mode == CALLCHAIN_DWARF) {
1061 		if (!function) {
1062 			uint16_t e_machine = evsel__e_machine(evsel, /*e_flags=*/NULL);
1063 
1064 			evsel__set_sample_bit(evsel, REGS_USER);
1065 			evsel__set_sample_bit(evsel, STACK_USER);
1066 			if (opts->sample_user_regs &&
1067 			    DWARF_MINIMAL_REGS(e_machine) != perf_user_reg_mask(EM_HOST)) {
1068 				attr->sample_regs_user |= DWARF_MINIMAL_REGS(e_machine);
1069 				pr_warning("WARNING: The use of --call-graph=dwarf may require all the user registers, "
1070 					   "specifying a subset with --user-regs may render DWARF unwinding unreliable, "
1071 					   "so the minimal registers set (IP, SP) is explicitly forced.\n");
1072 			} else {
1073 				attr->sample_regs_user |= perf_user_reg_mask(EM_HOST);
1074 			}
1075 			attr->sample_stack_user = param->dump_size;
1076 			attr->exclude_callchain_user = 1;
1077 		} else {
1078 			pr_info("Cannot use DWARF unwind for function trace event,"
1079 				" falling back to framepointers.\n");
1080 		}
1081 	}
1082 
1083 	if (function) {
1084 		pr_info("Disabling user space callchains for function trace event.\n");
1085 		attr->exclude_callchain_user = 1;
1086 	}
1087 
1088 	if (param->defer && !attr->exclude_callchain_user)
1089 		attr->defer_callchain = 1;
1090 }
1091 
1092 void evsel__config_callchain(struct evsel *evsel, const struct record_opts *opts,
1093 			     const struct callchain_param *param)
1094 {
1095 	if (param->enabled)
1096 		return __evsel__config_callchain(evsel, opts, param);
1097 }
1098 
1099 static void evsel__reset_callgraph(struct evsel *evsel, const struct callchain_param *param)
1100 {
1101 	struct perf_event_attr *attr = &evsel->core.attr;
1102 
1103 	evsel__reset_sample_bit(evsel, CALLCHAIN);
1104 	if (param->record_mode == CALLCHAIN_LBR) {
1105 		evsel__reset_sample_bit(evsel, BRANCH_STACK);
1106 		attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER |
1107 					      PERF_SAMPLE_BRANCH_CALL_STACK |
1108 					      PERF_SAMPLE_BRANCH_HW_INDEX);
1109 	}
1110 	if (param->record_mode == CALLCHAIN_DWARF) {
1111 		evsel__reset_sample_bit(evsel, REGS_USER);
1112 		evsel__reset_sample_bit(evsel, STACK_USER);
1113 	}
1114 }
1115 
1116 static void evsel__apply_ratio_to_prev(struct evsel *evsel,
1117 				       struct perf_event_attr *attr,
1118 				       const struct record_opts *opts,
1119 				       const char *buf)
1120 {
1121 	struct perf_event_attr *prev_attr = NULL;
1122 	struct evsel *evsel_prev = NULL;
1123 	u64 type = evsel->core.attr.sample_type;
1124 	u64 prev_type = 0;
1125 	double rtp;
1126 
1127 	rtp = strtod(buf, NULL);
1128 	if (rtp <= 0) {
1129 		pr_err("Invalid ratio-to-prev value %lf\n", rtp);
1130 		return;
1131 	}
1132 	if (evsel == evsel__leader(evsel)) {
1133 		pr_err("Invalid use of ratio-to-prev term without preceding element in group\n");
1134 		return;
1135 	}
1136 	if (!evsel->pmu->is_core) {
1137 		pr_err("Event using ratio-to-prev term must have a core PMU\n");
1138 		return;
1139 	}
1140 
1141 	evsel_prev = evsel__prev(evsel);
1142 	if (!evsel_prev) {
1143 		pr_err("Previous event does not exist.\n");
1144 		return;
1145 	}
1146 
1147 	if (evsel_prev->pmu->type != evsel->pmu->type) {
1148 		pr_err("Compared events (\"%s\", \"%s\") must have same PMU\n",
1149 			evsel->name, evsel_prev->name);
1150 		return;
1151 	}
1152 
1153 	prev_attr = &evsel_prev->core.attr;
1154 	prev_type = evsel_prev->core.attr.sample_type;
1155 
1156 	if (!(prev_type & PERF_SAMPLE_PERIOD)) {
1157 		attr->sample_period = prev_attr->sample_period * rtp;
1158 		attr->freq = 0;
1159 		evsel__reset_sample_bit(evsel, PERIOD);
1160 	} else if (!(type & PERF_SAMPLE_PERIOD)) {
1161 		prev_attr->sample_period = attr->sample_period / rtp;
1162 		prev_attr->freq = 0;
1163 		evsel__reset_sample_bit(evsel_prev, PERIOD);
1164 	} else {
1165 		if (opts->user_interval != ULLONG_MAX) {
1166 			prev_attr->sample_period = opts->user_interval;
1167 			attr->sample_period = prev_attr->sample_period * rtp;
1168 			prev_attr->freq = 0;
1169 			attr->freq = 0;
1170 			evsel__reset_sample_bit(evsel_prev, PERIOD);
1171 			evsel__reset_sample_bit(evsel, PERIOD);
1172 		} else {
1173 			pr_err("Event period term or count (-c) must be set when using ratio-to-prev term.\n");
1174 			return;
1175 		}
1176 	}
1177 
1178 	arch_evsel__apply_ratio_to_prev(evsel, attr);
1179 }
1180 
1181 static void evsel__apply_config_terms(struct evsel *evsel,
1182 				      const struct record_opts *opts, bool track)
1183 {
1184 	struct evsel_config_term *term;
1185 	struct list_head *config_terms = &evsel->config_terms;
1186 	struct perf_event_attr *attr = &evsel->core.attr;
1187 	/* callgraph default */
1188 	struct callchain_param param = {
1189 		.record_mode = callchain_param.record_mode,
1190 	};
1191 	u32 dump_size = 0;
1192 	int max_stack = 0;
1193 	const char *callgraph_buf = NULL;
1194 	const char *rtp_buf = NULL;
1195 
1196 	list_for_each_entry(term, config_terms, list) {
1197 		switch (term->type) {
1198 		case EVSEL__CONFIG_TERM_PERIOD:
1199 			if (!(term->weak && opts->user_interval != ULLONG_MAX)) {
1200 				attr->sample_period = term->val.period;
1201 				attr->freq = 0;
1202 				evsel__reset_sample_bit(evsel, PERIOD);
1203 			}
1204 			break;
1205 		case EVSEL__CONFIG_TERM_FREQ:
1206 			if (!(term->weak && opts->user_freq != UINT_MAX)) {
1207 				attr->sample_freq = term->val.freq;
1208 				attr->freq = 1;
1209 				evsel__set_sample_bit(evsel, PERIOD);
1210 			}
1211 			break;
1212 		case EVSEL__CONFIG_TERM_TIME:
1213 			if (term->val.time)
1214 				evsel__set_sample_bit(evsel, TIME);
1215 			else
1216 				evsel__reset_sample_bit(evsel, TIME);
1217 			break;
1218 		case EVSEL__CONFIG_TERM_CALLGRAPH:
1219 			callgraph_buf = term->val.str;
1220 			break;
1221 		case EVSEL__CONFIG_TERM_BRANCH:
1222 			if (term->val.str && strcmp(term->val.str, "no")) {
1223 				evsel__set_sample_bit(evsel, BRANCH_STACK);
1224 				parse_branch_str(term->val.str,
1225 						 &attr->branch_sample_type);
1226 			} else
1227 				evsel__reset_sample_bit(evsel, BRANCH_STACK);
1228 			break;
1229 		case EVSEL__CONFIG_TERM_STACK_USER:
1230 			dump_size = term->val.stack_user;
1231 			break;
1232 		case EVSEL__CONFIG_TERM_MAX_STACK:
1233 			max_stack = term->val.max_stack;
1234 			break;
1235 		case EVSEL__CONFIG_TERM_MAX_EVENTS:
1236 			evsel->max_events = term->val.max_events;
1237 			break;
1238 		case EVSEL__CONFIG_TERM_INHERIT:
1239 			/*
1240 			 * attr->inherit should has already been set by
1241 			 * evsel__config. If user explicitly set
1242 			 * inherit using config terms, override global
1243 			 * opt->no_inherit setting.
1244 			 */
1245 			attr->inherit = term->val.inherit ? 1 : 0;
1246 			break;
1247 		case EVSEL__CONFIG_TERM_OVERWRITE:
1248 			attr->write_backward = term->val.overwrite ? 1 : 0;
1249 			break;
1250 		case EVSEL__CONFIG_TERM_DRV_CFG:
1251 			break;
1252 		case EVSEL__CONFIG_TERM_PERCORE:
1253 			break;
1254 		case EVSEL__CONFIG_TERM_AUX_OUTPUT:
1255 			attr->aux_output = term->val.aux_output ? 1 : 0;
1256 			break;
1257 		case EVSEL__CONFIG_TERM_AUX_ACTION:
1258 			/* Already applied by auxtrace */
1259 			break;
1260 		case EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE:
1261 			/* Already applied by auxtrace */
1262 			break;
1263 		case EVSEL__CONFIG_TERM_USR_CHG_CONFIG:
1264 		case EVSEL__CONFIG_TERM_USR_CHG_CONFIG1:
1265 		case EVSEL__CONFIG_TERM_USR_CHG_CONFIG2:
1266 		case EVSEL__CONFIG_TERM_USR_CHG_CONFIG3:
1267 		case EVSEL__CONFIG_TERM_USR_CHG_CONFIG4:
1268 			break;
1269 		case EVSEL__CONFIG_TERM_RATIO_TO_PREV:
1270 			rtp_buf = term->val.str;
1271 			break;
1272 		default:
1273 			break;
1274 		}
1275 	}
1276 
1277 	/* User explicitly set per-event callgraph, clear the old setting and reset. */
1278 	if ((callgraph_buf != NULL) || (dump_size > 0) || max_stack) {
1279 		bool sample_address = false;
1280 
1281 		if (max_stack) {
1282 			param.max_stack = max_stack;
1283 			if (callgraph_buf == NULL)
1284 				callgraph_buf = "fp";
1285 		}
1286 
1287 		/* parse callgraph parameters */
1288 		if (callgraph_buf != NULL) {
1289 			if (!strcmp(callgraph_buf, "no")) {
1290 				param.enabled = false;
1291 				param.record_mode = CALLCHAIN_NONE;
1292 			} else {
1293 				param.enabled = true;
1294 				if (parse_callchain_record(callgraph_buf, &param)) {
1295 					pr_err("per-event callgraph setting for %s failed. "
1296 					       "Apply callgraph global setting for it\n",
1297 					       evsel->name);
1298 					return;
1299 				}
1300 				if (param.record_mode == CALLCHAIN_DWARF)
1301 					sample_address = true;
1302 			}
1303 		}
1304 		if (dump_size > 0) {
1305 			dump_size = round_up(dump_size, sizeof(u64));
1306 			param.dump_size = dump_size;
1307 		}
1308 
1309 		/* If global callgraph set, clear it */
1310 		if (callchain_param.enabled)
1311 			evsel__reset_callgraph(evsel, &callchain_param);
1312 
1313 		/* set perf-event callgraph */
1314 		if (param.enabled) {
1315 			if (sample_address) {
1316 				evsel__set_sample_bit(evsel, ADDR);
1317 				evsel__set_sample_bit(evsel, DATA_SRC);
1318 				evsel->core.attr.mmap_data = track;
1319 			}
1320 			evsel__config_callchain(evsel, opts, &param);
1321 		}
1322 	}
1323 	if (rtp_buf)
1324 		evsel__apply_ratio_to_prev(evsel, attr, opts, rtp_buf);
1325 }
1326 
1327 struct evsel_config_term *__evsel__get_config_term(struct evsel *evsel, enum evsel_term_type type)
1328 {
1329 	struct evsel_config_term *term, *found_term = NULL;
1330 
1331 	list_for_each_entry(term, &evsel->config_terms, list) {
1332 		if (term->type == type)
1333 			found_term = term;
1334 	}
1335 
1336 	return found_term;
1337 }
1338 
1339 /*
1340  * Set @config_name to @val as long as the user hasn't already set or cleared it
1341  * by passing a config term on the command line.
1342  *
1343  * @val is the value to put into the bits specified by @config_name rather than
1344  * the bit pattern. It is shifted into position by this function, so to set
1345  * something to true, pass 1 for val rather than a pre shifted value.
1346  */
1347 void evsel__set_config_if_unset(struct evsel *evsel, const char *config_name,
1348 				u64 val)
1349 {
1350 	u64 user_bits = 0;
1351 	struct evsel_config_term *term = evsel__get_config_term(evsel,
1352 								USR_CHG_CONFIG);
1353 	struct perf_pmu_format *format = pmu_find_format(&evsel->pmu->format,
1354 							 config_name);
1355 	int fbit;
1356 	__u64 *vp;
1357 
1358 	if (!format)
1359 		return;
1360 
1361 	switch (format->value) {
1362 	case PERF_PMU_FORMAT_VALUE_CONFIG:
1363 		term = evsel__get_config_term(evsel, USR_CHG_CONFIG);
1364 		vp = &evsel->core.attr.config;
1365 		break;
1366 	case PERF_PMU_FORMAT_VALUE_CONFIG1:
1367 		term = evsel__get_config_term(evsel, USR_CHG_CONFIG1);
1368 		vp = &evsel->core.attr.config1;
1369 		break;
1370 	case PERF_PMU_FORMAT_VALUE_CONFIG2:
1371 		term = evsel__get_config_term(evsel, USR_CHG_CONFIG2);
1372 		vp = &evsel->core.attr.config2;
1373 		break;
1374 	case PERF_PMU_FORMAT_VALUE_CONFIG3:
1375 		term = evsel__get_config_term(evsel, USR_CHG_CONFIG3);
1376 		vp = &evsel->core.attr.config3;
1377 		break;
1378 	case PERF_PMU_FORMAT_VALUE_CONFIG4:
1379 		term = evsel__get_config_term(evsel, USR_CHG_CONFIG4);
1380 		vp = &evsel->core.attr.config4;
1381 		break;
1382 	default:
1383 		pr_err("Unknown format value: %d\n", format->value);
1384 		return;
1385 	}
1386 
1387 	if (!format)
1388 		return;
1389 
1390 	if (term)
1391 		user_bits = term->val.cfg_chg;
1392 
1393 	/* Do nothing if the user changed the value */
1394 	for_each_set_bit(fbit, format->bits, PERF_PMU_FORMAT_BITS)
1395 		if ((1ULL << fbit) & user_bits)
1396 			return;
1397 
1398 	/* Otherwise replace it */
1399 	perf_pmu__format_pack(format->bits, val, vp, /*zero=*/true);
1400 }
1401 
1402 
1403 int evsel__get_config_val(const struct evsel *evsel, const char *config_name,
1404 			  u64 *val)
1405 {
1406 	struct perf_pmu_format *format = pmu_find_format(&evsel->pmu->format, config_name);
1407 
1408 	if (!format || bitmap_empty(format->bits, PERF_PMU_FORMAT_BITS)) {
1409 		pr_err("Unknown/empty format name: %s\n", config_name);
1410 		*val = 0;
1411 		return -EINVAL;
1412 	}
1413 
1414 	switch (format->value) {
1415 	case PERF_PMU_FORMAT_VALUE_CONFIG:
1416 		*val = perf_pmu__format_unpack(format->bits,
1417 					       evsel->core.attr.config);
1418 		return 0;
1419 	case PERF_PMU_FORMAT_VALUE_CONFIG1:
1420 		*val = perf_pmu__format_unpack(format->bits,
1421 					       evsel->core.attr.config1);
1422 		return 0;
1423 	case PERF_PMU_FORMAT_VALUE_CONFIG2:
1424 		*val = perf_pmu__format_unpack(format->bits,
1425 					       evsel->core.attr.config2);
1426 		return 0;
1427 	case PERF_PMU_FORMAT_VALUE_CONFIG3:
1428 		*val = perf_pmu__format_unpack(format->bits,
1429 					       evsel->core.attr.config3);
1430 		return 0;
1431 	case PERF_PMU_FORMAT_VALUE_CONFIG4:
1432 		*val = perf_pmu__format_unpack(format->bits,
1433 					       evsel->core.attr.config4);
1434 		return 0;
1435 	default:
1436 		pr_err("Unknown format value: %d\n", format->value);
1437 		*val = 0;
1438 		return -EINVAL;
1439 	}
1440 }
1441 
1442 void __weak arch_evsel__set_sample_weight(struct evsel *evsel)
1443 {
1444 	evsel__set_sample_bit(evsel, WEIGHT);
1445 }
1446 
1447 void __weak arch__post_evsel_config(struct evsel *evsel __maybe_unused,
1448 				    struct perf_event_attr *attr __maybe_unused)
1449 {
1450 }
1451 
1452 void __weak arch_evsel__apply_ratio_to_prev(struct evsel *evsel __maybe_unused,
1453 					    struct perf_event_attr *attr __maybe_unused)
1454 {
1455 }
1456 
1457 static void evsel__set_default_freq_period(const struct record_opts *opts,
1458 					   struct perf_event_attr *attr)
1459 {
1460 	if (opts->freq) {
1461 		attr->freq = 1;
1462 		attr->sample_freq = opts->freq;
1463 	} else {
1464 		attr->sample_period = opts->default_interval;
1465 	}
1466 }
1467 
1468 bool evsel__is_offcpu_event(struct evsel *evsel)
1469 {
1470 	return evsel__is_bpf_output(evsel) && evsel__name_is(evsel, OFFCPU_EVENT) &&
1471 	       evsel->core.attr.sample_type & PERF_SAMPLE_RAW;
1472 }
1473 
1474 /*
1475  * The enable_on_exec/disabled value strategy:
1476  *
1477  *  1) For any type of traced program:
1478  *    - all independent events and group leaders are disabled
1479  *    - all group members are enabled
1480  *
1481  *     Group members are ruled by group leaders. They need to
1482  *     be enabled, because the group scheduling relies on that.
1483  *
1484  *  2) For traced programs executed by perf:
1485  *     - all independent events and group leaders have
1486  *       enable_on_exec set
1487  *     - we don't specifically enable or disable any event during
1488  *       the record command
1489  *
1490  *     Independent events and group leaders are initially disabled
1491  *     and get enabled by exec. Group members are ruled by group
1492  *     leaders as stated in 1).
1493  *
1494  *  3) For traced programs attached by perf (pid/tid):
1495  *     - we specifically enable or disable all events during
1496  *       the record command
1497  *
1498  *     When attaching events to already running traced we
1499  *     enable/disable events specifically, as there's no
1500  *     initial traced exec call.
1501  */
1502 void evsel__config(struct evsel *evsel, const struct record_opts *opts,
1503 		   const struct callchain_param *callchain)
1504 {
1505 	struct evsel *leader = evsel__leader(evsel);
1506 	struct perf_event_attr *attr = &evsel->core.attr;
1507 	int track = evsel->tracking;
1508 	bool per_cpu = opts->target.default_per_cpu && !opts->target.per_thread;
1509 
1510 	attr->sample_id_all = perf_missing_features.sample_id_all ? 0 : 1;
1511 	attr->inherit	    = target__has_cpu(&opts->target) ? 0 : !opts->no_inherit;
1512 	attr->write_backward = opts->overwrite ? 1 : 0;
1513 	attr->read_format   = PERF_FORMAT_LOST;
1514 
1515 	evsel__set_sample_bit(evsel, IP);
1516 	evsel__set_sample_bit(evsel, TID);
1517 
1518 	if (evsel->sample_read) {
1519 		evsel__set_sample_bit(evsel, READ);
1520 
1521 		/*
1522 		 * We need ID even in case of single event, because
1523 		 * PERF_SAMPLE_READ process ID specific data.
1524 		 */
1525 		evsel__set_sample_id(evsel, false);
1526 
1527 		/*
1528 		 * Apply group format only if we belong to group
1529 		 * with more than one members.
1530 		 */
1531 		if (leader->core.nr_members > 1) {
1532 			attr->read_format |= PERF_FORMAT_GROUP;
1533 		}
1534 
1535 		/*
1536 		 * Inherit + SAMPLE_READ requires SAMPLE_TID in the read_format
1537 		 */
1538 		if (attr->inherit) {
1539 			evsel__set_sample_bit(evsel, TID);
1540 			evsel->core.attr.read_format |=
1541 				PERF_FORMAT_ID;
1542 		}
1543 	}
1544 
1545 	/*
1546 	 * We default some events to have a default interval. But keep
1547 	 * it a weak assumption overridable by the user.
1548 	 */
1549 	if ((evsel->is_libpfm_event && !attr->sample_period) ||
1550 	    (!evsel->is_libpfm_event && (!attr->sample_period ||
1551 					 opts->user_freq != UINT_MAX ||
1552 					 opts->user_interval != ULLONG_MAX)))
1553 		evsel__set_default_freq_period(opts, attr);
1554 
1555 	/*
1556 	 * If attr->freq was set (here or earlier), ask for period
1557 	 * to be sampled.
1558 	 */
1559 	if (attr->freq)
1560 		evsel__set_sample_bit(evsel, PERIOD);
1561 
1562 	if (opts->no_samples)
1563 		attr->sample_freq = 0;
1564 
1565 	if (opts->inherit_stat) {
1566 		evsel->core.attr.read_format |=
1567 			PERF_FORMAT_TOTAL_TIME_ENABLED |
1568 			PERF_FORMAT_TOTAL_TIME_RUNNING |
1569 			PERF_FORMAT_ID;
1570 		attr->inherit_stat = 1;
1571 	}
1572 
1573 	if (opts->sample_address)
1574 		evsel__set_sample_bit(evsel, ADDR);
1575 
1576 	if (opts->record_data_mmap)
1577 		attr->mmap_data = track;
1578 
1579 	/*
1580 	 * We don't allow user space callchains for  function trace
1581 	 * event, due to issues with page faults while tracing page
1582 	 * fault handler and its overall trickiness nature.
1583 	 */
1584 	if (evsel__is_function_event(evsel))
1585 		evsel->core.attr.exclude_callchain_user = 1;
1586 
1587 	if (callchain && callchain->enabled && !evsel->no_aux_samples)
1588 		evsel__config_callchain(evsel, opts, callchain);
1589 
1590 	if (opts->sample_intr_regs && !evsel->no_aux_samples &&
1591 	    !evsel__is_dummy_event(evsel)) {
1592 		attr->sample_regs_intr = opts->sample_intr_regs;
1593 		evsel__set_sample_bit(evsel, REGS_INTR);
1594 	}
1595 
1596 	if (opts->sample_user_regs && !evsel->no_aux_samples &&
1597 	    !evsel__is_dummy_event(evsel)) {
1598 		attr->sample_regs_user |= opts->sample_user_regs;
1599 		evsel__set_sample_bit(evsel, REGS_USER);
1600 	}
1601 
1602 	if (target__has_cpu(&opts->target) || opts->sample_cpu)
1603 		evsel__set_sample_bit(evsel, CPU);
1604 
1605 	/*
1606 	 * When the user explicitly disabled time don't force it here.
1607 	 */
1608 	if (opts->sample_time &&
1609 	    (!perf_missing_features.sample_id_all &&
1610 	    (!opts->no_inherit || target__has_cpu(&opts->target) || per_cpu ||
1611 	     opts->sample_time_set)))
1612 		evsel__set_sample_bit(evsel, TIME);
1613 
1614 	if (opts->raw_samples && !evsel->no_aux_samples) {
1615 		evsel__set_sample_bit(evsel, TIME);
1616 		evsel__set_sample_bit(evsel, RAW);
1617 		evsel__set_sample_bit(evsel, CPU);
1618 	}
1619 
1620 	if (opts->sample_data_src)
1621 		evsel__set_sample_bit(evsel, DATA_SRC);
1622 
1623 	if (opts->sample_phys_addr)
1624 		evsel__set_sample_bit(evsel, PHYS_ADDR);
1625 
1626 	if (opts->no_buffering) {
1627 		attr->watermark = 0;
1628 		attr->wakeup_events = 1;
1629 	}
1630 	if (opts->branch_stack && !evsel->no_aux_samples) {
1631 		evsel__set_sample_bit(evsel, BRANCH_STACK);
1632 		attr->branch_sample_type = opts->branch_stack;
1633 	}
1634 
1635 	if (opts->sample_weight || evsel->retire_lat) {
1636 		arch_evsel__set_sample_weight(evsel);
1637 		evsel->retire_lat = false;
1638 	}
1639 	attr->task     = track;
1640 	attr->mmap     = track;
1641 	attr->mmap2    = track && !perf_missing_features.mmap2;
1642 	attr->comm     = track;
1643 	attr->build_id = track && opts->build_id;
1644 	attr->defer_output = track && callchain && callchain->defer;
1645 
1646 	/*
1647 	 * ksymbol is tracked separately with text poke because it needs to be
1648 	 * system wide and enabled immediately.
1649 	 */
1650 	if (!opts->text_poke)
1651 		attr->ksymbol = track && !perf_missing_features.ksymbol;
1652 	attr->bpf_event = track && !opts->no_bpf_event && !perf_missing_features.bpf;
1653 
1654 	if (opts->record_namespaces)
1655 		attr->namespaces  = track;
1656 
1657 	if (opts->record_cgroup) {
1658 		attr->cgroup = track && !perf_missing_features.cgroup;
1659 		evsel__set_sample_bit(evsel, CGROUP);
1660 	}
1661 
1662 	if (opts->sample_data_page_size)
1663 		evsel__set_sample_bit(evsel, DATA_PAGE_SIZE);
1664 
1665 	if (opts->sample_code_page_size)
1666 		evsel__set_sample_bit(evsel, CODE_PAGE_SIZE);
1667 
1668 	if (opts->record_switch_events)
1669 		attr->context_switch = track;
1670 
1671 	if (opts->sample_transaction)
1672 		evsel__set_sample_bit(evsel, TRANSACTION);
1673 
1674 	if (opts->running_time) {
1675 		evsel->core.attr.read_format |=
1676 			PERF_FORMAT_TOTAL_TIME_ENABLED |
1677 			PERF_FORMAT_TOTAL_TIME_RUNNING;
1678 	}
1679 
1680 	/*
1681 	 * XXX see the function comment above
1682 	 *
1683 	 * Disabling only independent events or group leaders,
1684 	 * keeping group members enabled.
1685 	 */
1686 	if (evsel__is_group_leader(evsel))
1687 		attr->disabled = 1;
1688 
1689 	/*
1690 	 * Setting enable_on_exec for independent events and
1691 	 * group leaders for traced executed by perf.
1692 	 */
1693 	if (target__none(&opts->target) && evsel__is_group_leader(evsel) &&
1694 	    !opts->target.initial_delay)
1695 		attr->enable_on_exec = 1;
1696 
1697 	if (evsel->immediate) {
1698 		attr->disabled = 0;
1699 		attr->enable_on_exec = 0;
1700 	}
1701 
1702 	clockid = opts->clockid;
1703 	if (opts->use_clockid) {
1704 		attr->use_clockid = 1;
1705 		attr->clockid = opts->clockid;
1706 	}
1707 
1708 	if (evsel->precise_max)
1709 		attr->precise_ip = 3;
1710 
1711 	if (opts->all_user) {
1712 		attr->exclude_kernel = 1;
1713 		attr->exclude_user   = 0;
1714 	}
1715 
1716 	if (opts->all_kernel) {
1717 		attr->exclude_kernel = 0;
1718 		attr->exclude_user   = 1;
1719 	}
1720 
1721 	if (evsel->core.pmu_cpus || evsel->unit)
1722 		evsel->core.attr.read_format |= PERF_FORMAT_ID;
1723 
1724 	/*
1725 	 * Apply event specific term settings,
1726 	 * it overloads any global configuration.
1727 	 */
1728 	evsel__apply_config_terms(evsel, opts, track);
1729 
1730 	evsel->ignore_missing_thread = opts->ignore_missing_thread;
1731 
1732 	/* The --period option takes the precedence. */
1733 	if (opts->period_set) {
1734 		if (opts->period)
1735 			evsel__set_sample_bit(evsel, PERIOD);
1736 		else
1737 			evsel__reset_sample_bit(evsel, PERIOD);
1738 	}
1739 
1740 	/*
1741 	 * A dummy event never triggers any actual counter and therefore
1742 	 * cannot be used with branch_stack.
1743 	 *
1744 	 * For initial_delay, a dummy event is added implicitly.
1745 	 * The software event will trigger -EOPNOTSUPP error out,
1746 	 * if BRANCH_STACK bit is set.
1747 	 */
1748 	if (evsel__is_dummy_event(evsel))
1749 		evsel__reset_sample_bit(evsel, BRANCH_STACK);
1750 
1751 	if (evsel__is_offcpu_event(evsel)) {
1752 		evsel->core.attr.sample_type &= OFFCPU_SAMPLE_TYPES;
1753 		attr->inherit = 0;
1754 	}
1755 
1756 	arch__post_evsel_config(evsel, attr);
1757 }
1758 
1759 int evsel__set_filter(struct evsel *evsel, const char *filter)
1760 {
1761 	char *new_filter = strdup(filter);
1762 
1763 	if (new_filter != NULL) {
1764 		free(evsel->filter);
1765 		evsel->filter = new_filter;
1766 		return 0;
1767 	}
1768 
1769 	return -1;
1770 }
1771 
1772 static int evsel__append_filter(struct evsel *evsel, const char *fmt, const char *filter)
1773 {
1774 	char *new_filter;
1775 
1776 	if (evsel->filter == NULL)
1777 		return evsel__set_filter(evsel, filter);
1778 
1779 	if (asprintf(&new_filter, fmt, evsel->filter, filter) > 0) {
1780 		free(evsel->filter);
1781 		evsel->filter = new_filter;
1782 		return 0;
1783 	}
1784 
1785 	return -1;
1786 }
1787 
1788 int evsel__append_tp_filter(struct evsel *evsel, const char *filter)
1789 {
1790 	return evsel__append_filter(evsel, "(%s) && (%s)", filter);
1791 }
1792 
1793 int evsel__append_addr_filter(struct evsel *evsel, const char *filter)
1794 {
1795 	return evsel__append_filter(evsel, "%s,%s", filter);
1796 }
1797 
1798 /* Caller has to clear disabled after going through all CPUs. */
1799 int evsel__enable_cpu(struct evsel *evsel, int cpu_map_idx)
1800 {
1801 	int err;
1802 
1803 	if (evsel__is_tool(evsel))
1804 		err = evsel__tool_pmu_enable_cpu(evsel, cpu_map_idx);
1805 	else
1806 		err = perf_evsel__enable_cpu(&evsel->core, cpu_map_idx);
1807 
1808 	if (!err && evsel__is_group_leader(evsel)) {
1809 		struct evsel *member;
1810 
1811 		for_each_group_member(member, evsel) {
1812 			if (evsel__is_non_perf_event_open_pmu(evsel) ||
1813 			    evsel__is_non_perf_event_open_pmu(member)) {
1814 				/*
1815 				 * In a mixed PMU group, userspace PMUs are not
1816 				 * grouped in the kernel (opened with group_fd = -1)
1817 				 * and are skipped by the kernel when enabling the
1818 				 * group leader. We must manually enable them in
1819 				 * userspace.
1820 				 */
1821 				int mem_err = evsel__enable_cpu(member, cpu_map_idx);
1822 
1823 				if (mem_err)
1824 					return mem_err;
1825 			}
1826 		}
1827 	}
1828 	return err;
1829 }
1830 
1831 int evsel__enable(struct evsel *evsel)
1832 {
1833 	int err;
1834 
1835 	if (evsel__is_tool(evsel))
1836 		err = evsel__tool_pmu_enable(evsel);
1837 	else
1838 		err = perf_evsel__enable(&evsel->core);
1839 
1840 	if (!err)
1841 		evsel->disabled = false;
1842 
1843 	if (!err && evsel__is_group_leader(evsel)) {
1844 		struct evsel *member;
1845 
1846 		for_each_group_member(member, evsel) {
1847 			if (evsel__is_non_perf_event_open_pmu(evsel) ||
1848 			    evsel__is_non_perf_event_open_pmu(member)) {
1849 				/*
1850 				 * In a mixed PMU group, userspace PMUs are not
1851 				 * grouped in the kernel (opened with group_fd = -1)
1852 				 * and are skipped by the kernel when enabling the
1853 				 * group leader. We must manually enable them in
1854 				 * userspace.
1855 				 */
1856 				int mem_err = evsel__enable(member);
1857 
1858 				if (mem_err)
1859 					return mem_err;
1860 			}
1861 			member->disabled = false;
1862 		}
1863 	}
1864 
1865 	return err;
1866 }
1867 
1868 /* Caller has to set disabled after going through all CPUs. */
1869 int evsel__disable_cpu(struct evsel *evsel, int cpu_map_idx)
1870 {
1871 	int err;
1872 
1873 	if (evsel__is_tool(evsel))
1874 		err = evsel__tool_pmu_disable_cpu(evsel, cpu_map_idx);
1875 	else
1876 		err = perf_evsel__disable_cpu(&evsel->core, cpu_map_idx);
1877 
1878 	if (!err && evsel__is_group_leader(evsel)) {
1879 		struct evsel *member;
1880 
1881 		for_each_group_member(member, evsel) {
1882 			if (evsel__is_non_perf_event_open_pmu(evsel) ||
1883 			    evsel__is_non_perf_event_open_pmu(member)) {
1884 				/*
1885 				 * In a mixed PMU group, userspace PMUs are not
1886 				 * grouped in the kernel and are skipped by the
1887 				 * kernel when disabling the group leader. We must
1888 				 * manually disable them in userspace.
1889 				 */
1890 				int mem_err = evsel__disable_cpu(member, cpu_map_idx);
1891 
1892 				if (mem_err)
1893 					return mem_err;
1894 			}
1895 		}
1896 	}
1897 	return err;
1898 }
1899 
1900 int evsel__disable(struct evsel *evsel)
1901 {
1902 	int err;
1903 
1904 	if (evsel__is_tool(evsel))
1905 		err = evsel__tool_pmu_disable(evsel);
1906 	else
1907 		err = perf_evsel__disable(&evsel->core);
1908 
1909 	/*
1910 	 * We mark it disabled here so that tools that disable a event can
1911 	 * ignore events after they disable it. I.e. the ring buffer may have
1912 	 * already a few more events queued up before the kernel got the stop
1913 	 * request.
1914 	 */
1915 	if (!err)
1916 		evsel->disabled = true;
1917 
1918 	if (!err && evsel__is_group_leader(evsel)) {
1919 		struct evsel *member;
1920 
1921 		for_each_group_member(member, evsel) {
1922 			if (evsel__is_non_perf_event_open_pmu(evsel) ||
1923 			    evsel__is_non_perf_event_open_pmu(member)) {
1924 				/*
1925 				 * In a mixed PMU group, userspace PMUs are not
1926 				 * grouped in the kernel and are skipped by the
1927 				 * kernel when disabling the group leader. We must
1928 				 * manually disable them in userspace.
1929 				 */
1930 				int mem_err = evsel__disable(member);
1931 
1932 				if (mem_err)
1933 					return mem_err;
1934 			}
1935 			member->disabled = true;
1936 		}
1937 	}
1938 
1939 	return err;
1940 }
1941 
1942 void free_config_terms(struct list_head *config_terms)
1943 {
1944 	struct evsel_config_term *term, *h;
1945 
1946 	list_for_each_entry_safe(term, h, config_terms, list) {
1947 		list_del_init(&term->list);
1948 		if (term->free_str)
1949 			zfree(&term->val.str);
1950 		free(term);
1951 	}
1952 }
1953 
1954 static void evsel__free_config_terms(struct evsel *evsel)
1955 {
1956 	free_config_terms(&evsel->config_terms);
1957 }
1958 
1959 static void (*evsel__priv_destructor)(void *priv);
1960 
1961 void evsel__set_priv_destructor(void (*destructor)(void *priv))
1962 {
1963 	assert(evsel__priv_destructor == NULL);
1964 
1965 	evsel__priv_destructor = destructor;
1966 }
1967 
1968 void evsel__exit(struct evsel *evsel)
1969 {
1970 	assert(list_empty(&evsel->core.node));
1971 	assert(evsel->evlist == NULL);
1972 	if (evsel__is_retire_lat(evsel))
1973 		evsel__tpebs_close(evsel);
1974 	bpf_counter__destroy(evsel);
1975 	perf_bpf_filter__destroy(evsel);
1976 	evsel__free_counts(evsel);
1977 	perf_evsel__free_fd(&evsel->core);
1978 	perf_evsel__free_id(&evsel->core);
1979 	evsel__free_config_terms(evsel);
1980 	cgroup__put(evsel->cgrp);
1981 	perf_evsel__exit(&evsel->core);
1982 	zfree(&evsel->group_name);
1983 	zfree(&evsel->name);
1984 #ifdef HAVE_LIBTRACEEVENT
1985 	zfree(&evsel->tp_sys);
1986 	zfree(&evsel->tp_name);
1987 #endif
1988 	zfree(&evsel->filter);
1989 	zfree(&evsel->group_pmu_name);
1990 	zfree(&evsel->unit);
1991 	zfree(&evsel->metric_id);
1992 	evsel__zero_per_pkg(evsel);
1993 	hashmap__free(evsel->per_pkg_mask);
1994 	evsel->per_pkg_mask = NULL;
1995 	if (evsel__priv_destructor)
1996 		evsel__priv_destructor(evsel->priv);
1997 	perf_evsel__object.fini(evsel);
1998 	if (evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME ||
1999 	    evsel__tool_event(evsel) == TOOL_PMU__EVENT_USER_TIME) {
2000 		xyarray__delete(evsel->process_time.start_times);
2001 		xyarray__delete(evsel->process_time.accumulated_times);
2002 	}
2003 }
2004 
2005 void evsel__delete(struct evsel *evsel)
2006 {
2007 	if (!evsel)
2008 		return;
2009 
2010 	evsel__exit(evsel);
2011 	free(evsel);
2012 }
2013 
2014 void evsel__compute_deltas(struct evsel *evsel, int cpu_map_idx, int thread,
2015 			   struct perf_counts_values *count)
2016 {
2017 	struct perf_counts_values tmp;
2018 
2019 	if (!evsel->prev_raw_counts)
2020 		return;
2021 
2022 	tmp = *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread);
2023 	*perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread) = *count;
2024 
2025 	count->val = count->val - tmp.val;
2026 	count->ena = count->ena - tmp.ena;
2027 	count->run = count->run - tmp.run;
2028 }
2029 
2030 static int evsel__read_one(struct evsel *evsel, int cpu_map_idx, int thread)
2031 {
2032 	struct perf_counts_values *count = perf_counts(evsel->counts, cpu_map_idx, thread);
2033 
2034 	return perf_evsel__read(&evsel->core, cpu_map_idx, thread, count);
2035 }
2036 
2037 static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread,
2038 			     u64 val, u64 ena, u64 run, u64 lost)
2039 {
2040 	struct perf_counts_values *count;
2041 
2042 	count = perf_counts(counter->counts, cpu_map_idx, thread);
2043 
2044 	if (evsel__is_retire_lat(counter)) {
2045 		evsel__tpebs_read(counter, cpu_map_idx, thread);
2046 		perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true);
2047 		return;
2048 	}
2049 
2050 	count->val    = val;
2051 	count->ena    = ena;
2052 	count->run    = run;
2053 	count->lost   = lost;
2054 
2055 	perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true);
2056 }
2057 
2058 static bool evsel__group_has_tpebs(struct evsel *leader)
2059 {
2060 	struct evsel *evsel;
2061 
2062 	for_each_group_evsel(evsel, leader) {
2063 		if (evsel__is_retire_lat(evsel))
2064 			return true;
2065 	}
2066 	return false;
2067 }
2068 
2069 static u64 evsel__group_read_nr_members(struct evsel *leader)
2070 {
2071 	u64 nr = leader->core.nr_members;
2072 	struct evsel *evsel;
2073 
2074 	for_each_group_evsel(evsel, leader) {
2075 		if (evsel__is_retire_lat(evsel))
2076 			nr--;
2077 	}
2078 	return nr;
2079 }
2080 
2081 static u64 evsel__group_read_size(struct evsel *leader)
2082 {
2083 	u64 read_format = leader->core.attr.read_format;
2084 	int entry = sizeof(u64); /* value */
2085 	int size = 0;
2086 	int nr = 1;
2087 
2088 	if (!evsel__group_has_tpebs(leader))
2089 		return perf_evsel__read_size(&leader->core);
2090 
2091 	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
2092 		size += sizeof(u64);
2093 
2094 	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
2095 		size += sizeof(u64);
2096 
2097 	if (read_format & PERF_FORMAT_ID)
2098 		entry += sizeof(u64);
2099 
2100 	if (read_format & PERF_FORMAT_LOST)
2101 		entry += sizeof(u64);
2102 
2103 	if (read_format & PERF_FORMAT_GROUP) {
2104 		nr = evsel__group_read_nr_members(leader);
2105 		size += sizeof(u64);
2106 	}
2107 
2108 	size += entry * nr;
2109 	return size;
2110 }
2111 
2112 static int evsel__process_group_data(struct evsel *leader, int cpu_map_idx, int thread, u64 *data)
2113 {
2114 	u64 read_format = leader->core.attr.read_format;
2115 	struct sample_read_value *v;
2116 	u64 nr, ena = 0, run = 0, lost = 0;
2117 
2118 	nr = *data++;
2119 
2120 	if (nr != evsel__group_read_nr_members(leader))
2121 		return -EINVAL;
2122 
2123 	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
2124 		ena = *data++;
2125 
2126 	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
2127 		run = *data++;
2128 
2129 	v = (void *)data;
2130 	sample_read_group__for_each(v, nr, read_format) {
2131 		struct evsel *counter;
2132 
2133 		counter = evlist__id2evsel(leader->evlist, v->id);
2134 		if (!counter)
2135 			return -EINVAL;
2136 
2137 		if (read_format & PERF_FORMAT_LOST)
2138 			lost = v->lost;
2139 
2140 		evsel__set_count(counter, cpu_map_idx, thread, v->value, ena, run, lost);
2141 	}
2142 
2143 	return 0;
2144 }
2145 
2146 static int evsel__read_group(struct evsel *leader, int cpu_map_idx, int thread)
2147 {
2148 	struct perf_stat_evsel *ps = leader->stats;
2149 	u64 read_format = leader->core.attr.read_format;
2150 	int size = evsel__group_read_size(leader);
2151 	u64 *data = ps->group_data;
2152 
2153 	if (!(read_format & PERF_FORMAT_ID))
2154 		return -EINVAL;
2155 
2156 	if (!evsel__is_group_leader(leader))
2157 		return -EINVAL;
2158 
2159 	if (!data) {
2160 		data = zalloc(size);
2161 		if (!data)
2162 			return -ENOMEM;
2163 
2164 		ps->group_data = data;
2165 	}
2166 
2167 	if (FD(leader, cpu_map_idx, thread) < 0)
2168 		return -EINVAL;
2169 
2170 	if (readn(FD(leader, cpu_map_idx, thread), data, size) <= 0)
2171 		return -errno;
2172 
2173 	return evsel__process_group_data(leader, cpu_map_idx, thread, data);
2174 }
2175 
2176 bool __evsel__match(const struct evsel *evsel, u32 type, u64 config)
2177 {
2178 
2179 	u32 e_type = evsel->core.attr.type;
2180 	u64 e_config = evsel->core.attr.config;
2181 
2182 	if (e_type == type && e_config == config)
2183 		return true;
2184 	if (type != PERF_TYPE_HARDWARE && type != PERF_TYPE_HW_CACHE)
2185 		return false;
2186 	if ((e_type == PERF_TYPE_HARDWARE || e_type == PERF_TYPE_HW_CACHE) &&
2187 		perf_pmus__supports_extended_type())
2188 		e_config &= PERF_HW_EVENT_MASK;
2189 	if (e_type == type && e_config == config)
2190 		return true;
2191 	if (type == PERF_TYPE_HARDWARE && evsel->pmu && evsel->pmu->is_core &&
2192 	    evsel->alternate_hw_config == config)
2193 		return true;
2194 	return false;
2195 }
2196 
2197 int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread)
2198 {
2199 	if (evsel__is_tool(evsel))
2200 		return evsel__tool_pmu_read(evsel, cpu_map_idx, thread);
2201 
2202 	if (evsel__is_hwmon(evsel))
2203 		return evsel__hwmon_pmu_read(evsel, cpu_map_idx, thread);
2204 
2205 	if (evsel__is_drm(evsel))
2206 		return evsel__drm_pmu_read(evsel, cpu_map_idx, thread);
2207 
2208 	if (evsel__is_retire_lat(evsel))
2209 		return evsel__tpebs_read(evsel, cpu_map_idx, thread);
2210 
2211 	if (evsel->core.attr.read_format & PERF_FORMAT_GROUP)
2212 		return evsel__read_group(evsel, cpu_map_idx, thread);
2213 
2214 	return evsel__read_one(evsel, cpu_map_idx, thread);
2215 }
2216 
2217 int __evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread, bool scale)
2218 {
2219 	struct perf_counts_values count;
2220 	size_t nv = scale ? 3 : 1;
2221 
2222 	if (FD(evsel, cpu_map_idx, thread) < 0)
2223 		return -EINVAL;
2224 
2225 	if (evsel->counts == NULL && evsel__alloc_counts(evsel) < 0)
2226 		return -ENOMEM;
2227 
2228 	if (readn(FD(evsel, cpu_map_idx, thread), &count, nv * sizeof(u64)) <= 0)
2229 		return -errno;
2230 
2231 	evsel__compute_deltas(evsel, cpu_map_idx, thread, &count);
2232 	perf_counts_values__scale(&count, scale, NULL);
2233 	*perf_counts(evsel->counts, cpu_map_idx, thread) = count;
2234 	return 0;
2235 }
2236 
2237 static int evsel__match_other_cpu(struct evsel *evsel, struct evsel *other,
2238 				  int cpu_map_idx)
2239 {
2240 	struct perf_cpu cpu;
2241 
2242 	cpu = perf_cpu_map__cpu(evsel->core.cpus, cpu_map_idx);
2243 	return perf_cpu_map__idx(other->core.cpus, cpu);
2244 }
2245 
2246 static int evsel__hybrid_group_cpu_map_idx(struct evsel *evsel, int cpu_map_idx)
2247 {
2248 	struct evsel *leader = evsel__leader(evsel);
2249 
2250 	if ((evsel__is_hybrid(evsel) && !evsel__is_hybrid(leader)) ||
2251 	    (!evsel__is_hybrid(evsel) && evsel__is_hybrid(leader))) {
2252 		return evsel__match_other_cpu(evsel, leader, cpu_map_idx);
2253 	}
2254 
2255 	return cpu_map_idx;
2256 }
2257 
2258 static int get_group_fd(struct evsel *evsel, int cpu_map_idx, int thread)
2259 {
2260 	struct evsel *leader = evsel__leader(evsel);
2261 	int fd;
2262 
2263 	if (!evsel->supported || evsel__is_group_leader(evsel))
2264 		return -1;
2265 
2266 	/*
2267 	 * Leader must be already processed/open,
2268 	 * if not it's a bug.
2269 	 */
2270 	BUG_ON(!leader->core.fd);
2271 
2272 	cpu_map_idx = evsel__hybrid_group_cpu_map_idx(evsel, cpu_map_idx);
2273 	if (cpu_map_idx == -1)
2274 		return -1;
2275 
2276 	fd = FD(leader, cpu_map_idx, thread);
2277 	BUG_ON(fd == -1 && leader->supported);
2278 
2279 	/*
2280 	 * When the leader has been skipped, return -2 to distinguish from no
2281 	 * group leader case.
2282 	 */
2283 	return fd == -1 ? -2 : fd;
2284 }
2285 
2286 static void evsel__remove_fd(struct evsel *pos, int nr_cpus, int nr_threads, int thread_idx)
2287 {
2288 	for (int cpu = 0; cpu < nr_cpus; cpu++)
2289 		for (int thread = thread_idx; thread < nr_threads - 1; thread++)
2290 			FD(pos, cpu, thread) = FD(pos, cpu, thread + 1);
2291 }
2292 
2293 static int update_fds(struct evsel *evsel,
2294 		      int nr_cpus, int cpu_map_idx,
2295 		      int nr_threads, int thread_idx)
2296 {
2297 	struct evsel *pos;
2298 
2299 	if (cpu_map_idx >= nr_cpus || thread_idx >= nr_threads)
2300 		return -EINVAL;
2301 
2302 	evlist__for_each_entry(evsel->evlist, pos) {
2303 		nr_cpus = pos != evsel ? nr_cpus : cpu_map_idx;
2304 
2305 		evsel__remove_fd(pos, nr_cpus, nr_threads, thread_idx);
2306 
2307 		/*
2308 		 * Since fds for next evsel has not been created,
2309 		 * there is no need to iterate whole event list.
2310 		 */
2311 		if (pos == evsel)
2312 			break;
2313 	}
2314 	return 0;
2315 }
2316 
2317 static bool evsel__ignore_missing_thread(struct evsel *evsel,
2318 					 int nr_cpus, int cpu_map_idx,
2319 					 struct perf_thread_map *threads,
2320 					 int thread, int err)
2321 {
2322 	pid_t ignore_pid = perf_thread_map__pid(threads, thread);
2323 
2324 	if (!evsel->ignore_missing_thread)
2325 		return false;
2326 
2327 	/* The system wide setup does not work with threads. */
2328 	if (evsel->core.system_wide)
2329 		return false;
2330 
2331 	/* The -ESRCH is perf event syscall errno for pid's not found. */
2332 	if (err != -ESRCH)
2333 		return false;
2334 
2335 	/* If there's only one thread, let it fail. */
2336 	if (threads->nr == 1)
2337 		return false;
2338 
2339 	/*
2340 	 * We should remove fd for missing_thread first
2341 	 * because thread_map__remove() will decrease threads->nr.
2342 	 */
2343 	if (update_fds(evsel, nr_cpus, cpu_map_idx, threads->nr, thread))
2344 		return false;
2345 
2346 	if (thread_map__remove(threads, thread))
2347 		return false;
2348 
2349 	pr_warning("WARNING: Ignored open failure for pid %d\n",
2350 		   ignore_pid);
2351 	return true;
2352 }
2353 
2354 static int __open_attr__fprintf(FILE *fp, const char *name, const char *val,
2355 				void *priv __maybe_unused)
2356 {
2357 	return fprintf(fp, "  %-32s %s\n", name, val);
2358 }
2359 
2360 static void display_attr(struct perf_event_attr *attr)
2361 {
2362 	if (verbose >= 2 || debug_peo_args) {
2363 		fprintf(stderr, "%.60s\n", graph_dotted_line);
2364 		fprintf(stderr, "perf_event_attr:\n");
2365 		perf_event_attr__fprintf(stderr, attr, __open_attr__fprintf, NULL);
2366 		fprintf(stderr, "%.60s\n", graph_dotted_line);
2367 	}
2368 }
2369 
2370 bool evsel__precise_ip_fallback(struct evsel *evsel)
2371 {
2372 	/* Do not try less precise if not requested. */
2373 	if (!evsel->precise_max)
2374 		return false;
2375 
2376 	/*
2377 	 * We tried all the precise_ip values, and it's
2378 	 * still failing, so leave it to standard fallback.
2379 	 */
2380 	if (!evsel->core.attr.precise_ip) {
2381 		evsel->core.attr.precise_ip = evsel->precise_ip_original;
2382 		return false;
2383 	}
2384 
2385 	if (!evsel->precise_ip_original)
2386 		evsel->precise_ip_original = evsel->core.attr.precise_ip;
2387 
2388 	evsel->core.attr.precise_ip--;
2389 	pr_debug2_peo("decreasing precise_ip by one (%d)\n", evsel->core.attr.precise_ip);
2390 	display_attr(&evsel->core.attr);
2391 	return true;
2392 }
2393 
2394 static struct perf_cpu_map *empty_cpu_map;
2395 static struct perf_thread_map *empty_thread_map;
2396 
2397 static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
2398 		struct perf_thread_map *threads)
2399 {
2400 	int ret = 0;
2401 	int nthreads = perf_thread_map__nr(threads);
2402 
2403 	if ((perf_missing_features.write_backward && evsel->core.attr.write_backward) ||
2404 	    (perf_missing_features.aux_output     && evsel->core.attr.aux_output))
2405 		return -EINVAL;
2406 
2407 	if (cpus == NULL) {
2408 		if (empty_cpu_map == NULL) {
2409 			empty_cpu_map = perf_cpu_map__new_any_cpu();
2410 			if (empty_cpu_map == NULL)
2411 				return -ENOMEM;
2412 		}
2413 
2414 		cpus = empty_cpu_map;
2415 	}
2416 
2417 	if (threads == NULL) {
2418 		if (empty_thread_map == NULL) {
2419 			empty_thread_map = thread_map__new_by_tid(-1);
2420 			if (empty_thread_map == NULL)
2421 				return -ENOMEM;
2422 		}
2423 
2424 		threads = empty_thread_map;
2425 	}
2426 
2427 	if (evsel->core.fd == NULL &&
2428 	    perf_evsel__alloc_fd(&evsel->core, perf_cpu_map__nr(cpus), nthreads) < 0)
2429 		return -ENOMEM;
2430 
2431 	if (evsel__is_tool(evsel))
2432 		ret = evsel__tool_pmu_prepare_open(evsel, cpus, nthreads);
2433 
2434 	evsel->open_flags = PERF_FLAG_FD_CLOEXEC;
2435 	if (evsel->cgrp)
2436 		evsel->open_flags |= PERF_FLAG_PID_CGROUP;
2437 
2438 	return ret;
2439 }
2440 
2441 static void evsel__disable_missing_features(struct evsel *evsel)
2442 {
2443 	if (perf_missing_features.defer_callchain && evsel->core.attr.defer_callchain)
2444 		evsel->core.attr.defer_callchain = 0;
2445 	if (perf_missing_features.defer_callchain && evsel->core.attr.defer_output)
2446 		evsel->core.attr.defer_output = 0;
2447 	if (perf_missing_features.inherit_sample_read && evsel->core.attr.inherit &&
2448 	    (evsel->core.attr.sample_type & PERF_SAMPLE_READ))
2449 		evsel->core.attr.inherit = 0;
2450 	if (perf_missing_features.branch_counters)
2451 		evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_COUNTERS;
2452 	if (perf_missing_features.read_lost)
2453 		evsel->core.attr.read_format &= ~PERF_FORMAT_LOST;
2454 	if (perf_missing_features.weight_struct) {
2455 		evsel__set_sample_bit(evsel, WEIGHT);
2456 		evsel__reset_sample_bit(evsel, WEIGHT_STRUCT);
2457 	}
2458 	if (perf_missing_features.clockid_wrong)
2459 		evsel->core.attr.clockid = CLOCK_MONOTONIC; /* should always work */
2460 	if (perf_missing_features.clockid) {
2461 		evsel->core.attr.use_clockid = 0;
2462 		evsel->core.attr.clockid = 0;
2463 	}
2464 	if (perf_missing_features.cloexec)
2465 		evsel->open_flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC;
2466 	if (perf_missing_features.mmap2)
2467 		evsel->core.attr.mmap2 = 0;
2468 	if (evsel->pmu && evsel->pmu->missing_features.exclude_guest)
2469 		evsel->core.attr.exclude_guest = evsel->core.attr.exclude_host = 0;
2470 	if (perf_missing_features.lbr_flags)
2471 		evsel->core.attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS |
2472 				     PERF_SAMPLE_BRANCH_NO_CYCLES);
2473 	if (perf_missing_features.group_read && evsel->core.attr.inherit)
2474 		evsel->core.attr.read_format &= ~(PERF_FORMAT_GROUP|PERF_FORMAT_ID);
2475 	if (perf_missing_features.ksymbol)
2476 		evsel->core.attr.ksymbol = 0;
2477 	if (perf_missing_features.bpf)
2478 		evsel->core.attr.bpf_event = 0;
2479 	if (perf_missing_features.branch_hw_idx)
2480 		evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_HW_INDEX;
2481 	if (perf_missing_features.sample_id_all)
2482 		evsel->core.attr.sample_id_all = 0;
2483 }
2484 
2485 int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
2486 			struct perf_thread_map *threads)
2487 {
2488 	int err;
2489 
2490 	err = __evsel__prepare_open(evsel, cpus, threads);
2491 	if (err)
2492 		return err;
2493 
2494 	evsel__disable_missing_features(evsel);
2495 
2496 	return err;
2497 }
2498 
2499 static bool __has_attr_feature(struct perf_event_attr *attr,
2500 			       struct perf_cpu cpu, unsigned long flags)
2501 {
2502 	int fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, cpu.cpu,
2503 			 /*group_fd=*/-1, flags);
2504 	close(fd);
2505 
2506 	if (fd < 0) {
2507 		attr->exclude_kernel = 1;
2508 
2509 		fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, cpu.cpu,
2510 			     /*group_fd=*/-1, flags);
2511 		close(fd);
2512 	}
2513 
2514 	if (fd < 0) {
2515 		attr->exclude_hv = 1;
2516 
2517 		fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, cpu.cpu,
2518 			     /*group_fd=*/-1, flags);
2519 		close(fd);
2520 	}
2521 
2522 	if (fd < 0) {
2523 		attr->exclude_guest = 1;
2524 
2525 		fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, cpu.cpu,
2526 			     /*group_fd=*/-1, flags);
2527 		close(fd);
2528 	}
2529 
2530 	attr->exclude_kernel = 0;
2531 	attr->exclude_guest = 0;
2532 	attr->exclude_hv = 0;
2533 
2534 	return fd >= 0;
2535 }
2536 
2537 static bool has_attr_feature(struct perf_event_attr *attr, unsigned long flags)
2538 {
2539 	struct perf_cpu cpu = {.cpu = -1};
2540 
2541 	return __has_attr_feature(attr, cpu, flags);
2542 }
2543 
2544 static void evsel__detect_missing_pmu_features(struct evsel *evsel)
2545 {
2546 	struct perf_event_attr attr = {
2547 		.type = evsel->core.attr.type,
2548 		.config = evsel->core.attr.config,
2549 		.disabled = 1,
2550 	};
2551 	struct perf_pmu *pmu = evsel->pmu;
2552 	int old_errno;
2553 
2554 	old_errno = errno;
2555 
2556 	if (pmu == NULL)
2557 		pmu = evsel->pmu = evsel__find_pmu(evsel);
2558 
2559 	if (pmu == NULL || pmu->missing_features.checked)
2560 		goto out;
2561 
2562 	/*
2563 	 * Must probe features in the order they were added to the
2564 	 * perf_event_attr interface.  These are kernel core limitation but
2565 	 * specific to PMUs with branch stack.  So we can detect with the given
2566 	 * hardware event and stop on the first one succeeded.
2567 	 */
2568 
2569 	/* Please add new feature detection here. */
2570 
2571 	attr.exclude_guest = 1;
2572 	if (has_attr_feature(&attr, /*flags=*/0))
2573 		goto found;
2574 	pmu->missing_features.exclude_guest = true;
2575 	pr_debug2("switching off exclude_guest for PMU %s\n", pmu->name);
2576 
2577 found:
2578 	pmu->missing_features.checked = true;
2579 out:
2580 	errno = old_errno;
2581 }
2582 
2583 static void evsel__detect_missing_brstack_features(struct evsel *evsel)
2584 {
2585 	static bool detection_done = false;
2586 	struct perf_event_attr attr = {
2587 		.type = evsel->core.attr.type,
2588 		.config = evsel->core.attr.config,
2589 		.disabled = 1,
2590 		.sample_type = PERF_SAMPLE_BRANCH_STACK,
2591 		.sample_period = 1000,
2592 	};
2593 	int old_errno;
2594 
2595 	if (detection_done)
2596 		return;
2597 
2598 	old_errno = errno;
2599 
2600 	/*
2601 	 * Must probe features in the order they were added to the
2602 	 * perf_event_attr interface.  These are PMU specific limitation
2603 	 * so we can detect with the given hardware event and stop on the
2604 	 * first one succeeded.
2605 	 */
2606 
2607 	/* Please add new feature detection here. */
2608 
2609 	attr.branch_sample_type = PERF_SAMPLE_BRANCH_COUNTERS;
2610 	if (has_attr_feature(&attr, /*flags=*/0))
2611 		goto found;
2612 	perf_missing_features.branch_counters = true;
2613 	pr_debug2("switching off branch counters support\n");
2614 
2615 	attr.branch_sample_type = PERF_SAMPLE_BRANCH_HW_INDEX;
2616 	if (has_attr_feature(&attr, /*flags=*/0))
2617 		goto found;
2618 	perf_missing_features.branch_hw_idx = true;
2619 	pr_debug2("switching off branch HW index support\n");
2620 
2621 	attr.branch_sample_type = PERF_SAMPLE_BRANCH_NO_CYCLES | PERF_SAMPLE_BRANCH_NO_FLAGS;
2622 	if (has_attr_feature(&attr, /*flags=*/0))
2623 		goto found;
2624 	perf_missing_features.lbr_flags = true;
2625 	pr_debug2_peo("switching off branch sample type no (cycles/flags)\n");
2626 
2627 found:
2628 	detection_done = true;
2629 	errno = old_errno;
2630 }
2631 
2632 static bool evsel__probe_aux_action(struct evsel *evsel, struct perf_cpu cpu)
2633 {
2634 	struct perf_event_attr attr = evsel->core.attr;
2635 	int old_errno = errno;
2636 
2637 	attr.disabled = 1;
2638 	attr.aux_start_paused = 1;
2639 
2640 	if (__has_attr_feature(&attr, cpu, /*flags=*/0)) {
2641 		errno = old_errno;
2642 		return true;
2643 	}
2644 
2645 	/*
2646 	 * EOPNOTSUPP means the kernel supports the feature but the PMU does
2647 	 * not, so keep that distinction if possible.
2648 	 */
2649 	if (errno != EOPNOTSUPP)
2650 		errno = old_errno;
2651 
2652 	return false;
2653 }
2654 
2655 static void evsel__detect_missing_aux_action_feature(struct evsel *evsel, struct perf_cpu cpu)
2656 {
2657 	static bool detection_done;
2658 	struct evsel *leader;
2659 
2660 	/*
2661 	 * Don't bother probing aux_action if it is not being used or has been
2662 	 * probed before.
2663 	 */
2664 	if (!evsel->core.attr.aux_action || detection_done)
2665 		return;
2666 
2667 	detection_done = true;
2668 
2669 	/*
2670 	 * The leader is an AUX area event. If it has failed, assume the feature
2671 	 * is not supported.
2672 	 */
2673 	leader = evsel__leader(evsel);
2674 	if (evsel == leader) {
2675 		perf_missing_features.aux_action = true;
2676 		return;
2677 	}
2678 
2679 	/*
2680 	 * AUX area event with aux_action must have been opened successfully
2681 	 * already, so feature is supported.
2682 	 */
2683 	if (leader->core.attr.aux_action)
2684 		return;
2685 
2686 	if (!evsel__probe_aux_action(leader, cpu))
2687 		perf_missing_features.aux_action = true;
2688 }
2689 
2690 static bool evsel__detect_missing_features(struct evsel *evsel, struct perf_cpu cpu)
2691 {
2692 	static bool detection_done = false;
2693 	struct perf_event_attr attr = {
2694 		.type = PERF_TYPE_SOFTWARE,
2695 		.config = PERF_COUNT_SW_TASK_CLOCK,
2696 		.disabled = 1,
2697 	};
2698 	int old_errno;
2699 
2700 	evsel__detect_missing_aux_action_feature(evsel, cpu);
2701 
2702 	evsel__detect_missing_pmu_features(evsel);
2703 
2704 	if (evsel__has_br_stack(evsel))
2705 		evsel__detect_missing_brstack_features(evsel);
2706 
2707 	if (detection_done)
2708 		goto check;
2709 
2710 	old_errno = errno;
2711 
2712 	/*
2713 	 * Must probe features in the order they were added to the
2714 	 * perf_event_attr interface.  These are kernel core limitation
2715 	 * not PMU-specific so we can detect with a software event and
2716 	 * stop on the first one succeeded.
2717 	 */
2718 
2719 	/* Please add new feature detection here. */
2720 
2721 	attr.defer_callchain = true;
2722 	if (has_attr_feature(&attr, /*flags=*/0))
2723 		goto found;
2724 	perf_missing_features.defer_callchain = true;
2725 	pr_debug2("switching off deferred callchain support\n");
2726 	attr.defer_callchain = false;
2727 
2728 	attr.inherit = true;
2729 	attr.sample_type = PERF_SAMPLE_READ | PERF_SAMPLE_TID;
2730 	if (has_attr_feature(&attr, /*flags=*/0))
2731 		goto found;
2732 	perf_missing_features.inherit_sample_read = true;
2733 	pr_debug2("Using PERF_SAMPLE_READ / :S modifier is not compatible with inherit, falling back to no-inherit.\n");
2734 	attr.inherit = false;
2735 	attr.sample_type = 0;
2736 
2737 	attr.read_format = PERF_FORMAT_LOST;
2738 	if (has_attr_feature(&attr, /*flags=*/0))
2739 		goto found;
2740 	perf_missing_features.read_lost = true;
2741 	pr_debug2("switching off PERF_FORMAT_LOST support\n");
2742 	attr.read_format = 0;
2743 
2744 	attr.sample_type = PERF_SAMPLE_WEIGHT_STRUCT;
2745 	if (has_attr_feature(&attr, /*flags=*/0))
2746 		goto found;
2747 	perf_missing_features.weight_struct = true;
2748 	pr_debug2("switching off weight struct support\n");
2749 	attr.sample_type = 0;
2750 
2751 	attr.sample_type = PERF_SAMPLE_CODE_PAGE_SIZE;
2752 	if (has_attr_feature(&attr, /*flags=*/0))
2753 		goto found;
2754 	perf_missing_features.code_page_size = true;
2755 	pr_debug2_peo("Kernel has no PERF_SAMPLE_CODE_PAGE_SIZE support\n");
2756 	attr.sample_type = 0;
2757 
2758 	attr.sample_type = PERF_SAMPLE_DATA_PAGE_SIZE;
2759 	if (has_attr_feature(&attr, /*flags=*/0))
2760 		goto found;
2761 	perf_missing_features.data_page_size = true;
2762 	pr_debug2_peo("Kernel has no PERF_SAMPLE_DATA_PAGE_SIZE support\n");
2763 	attr.sample_type = 0;
2764 
2765 	attr.cgroup = 1;
2766 	if (has_attr_feature(&attr, /*flags=*/0))
2767 		goto found;
2768 	perf_missing_features.cgroup = true;
2769 	pr_debug2_peo("Kernel has no cgroup sampling support\n");
2770 	attr.cgroup = 0;
2771 
2772 	attr.aux_output = 1;
2773 	if (has_attr_feature(&attr, /*flags=*/0))
2774 		goto found;
2775 	perf_missing_features.aux_output = true;
2776 	pr_debug2_peo("Kernel has no attr.aux_output support\n");
2777 	attr.aux_output = 0;
2778 
2779 	attr.bpf_event = 1;
2780 	if (has_attr_feature(&attr, /*flags=*/0))
2781 		goto found;
2782 	perf_missing_features.bpf = true;
2783 	pr_debug2_peo("switching off bpf_event\n");
2784 	attr.bpf_event = 0;
2785 
2786 	attr.ksymbol = 1;
2787 	if (has_attr_feature(&attr, /*flags=*/0))
2788 		goto found;
2789 	perf_missing_features.ksymbol = true;
2790 	pr_debug2_peo("switching off ksymbol\n");
2791 	attr.ksymbol = 0;
2792 
2793 	attr.write_backward = 1;
2794 	if (has_attr_feature(&attr, /*flags=*/0))
2795 		goto found;
2796 	perf_missing_features.write_backward = true;
2797 	pr_debug2_peo("switching off write_backward\n");
2798 	attr.write_backward = 0;
2799 
2800 	attr.use_clockid = 1;
2801 	attr.clockid = CLOCK_MONOTONIC;
2802 	if (has_attr_feature(&attr, /*flags=*/0))
2803 		goto found;
2804 	perf_missing_features.clockid = true;
2805 	pr_debug2_peo("switching off clockid\n");
2806 	attr.use_clockid = 0;
2807 	attr.clockid = 0;
2808 
2809 	if (has_attr_feature(&attr, /*flags=*/PERF_FLAG_FD_CLOEXEC))
2810 		goto found;
2811 	perf_missing_features.cloexec = true;
2812 	pr_debug2_peo("switching off cloexec flag\n");
2813 
2814 	attr.mmap2 = 1;
2815 	if (has_attr_feature(&attr, /*flags=*/0))
2816 		goto found;
2817 	perf_missing_features.mmap2 = true;
2818 	pr_debug2_peo("switching off mmap2\n");
2819 	attr.mmap2 = 0;
2820 
2821 	/* set this unconditionally? */
2822 	perf_missing_features.sample_id_all = true;
2823 	pr_debug2_peo("switching off sample_id_all\n");
2824 
2825 	attr.inherit = 1;
2826 	attr.read_format = PERF_FORMAT_GROUP;
2827 	if (has_attr_feature(&attr, /*flags=*/0))
2828 		goto found;
2829 	perf_missing_features.group_read = true;
2830 	pr_debug2_peo("switching off group read\n");
2831 	attr.inherit = 0;
2832 	attr.read_format = 0;
2833 
2834 found:
2835 	detection_done = true;
2836 	errno = old_errno;
2837 
2838 check:
2839 	if ((evsel->core.attr.defer_callchain || evsel->core.attr.defer_output) &&
2840 	    perf_missing_features.defer_callchain)
2841 		return true;
2842 
2843 	if (evsel->core.attr.inherit &&
2844 	    (evsel->core.attr.sample_type & PERF_SAMPLE_READ) &&
2845 	    perf_missing_features.inherit_sample_read)
2846 		return true;
2847 
2848 	if ((evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) &&
2849 	    perf_missing_features.branch_counters)
2850 		return true;
2851 
2852 	if ((evsel->core.attr.read_format & PERF_FORMAT_LOST) &&
2853 	    perf_missing_features.read_lost)
2854 		return true;
2855 
2856 	if ((evsel->core.attr.sample_type & PERF_SAMPLE_WEIGHT_STRUCT) &&
2857 	    perf_missing_features.weight_struct)
2858 		return true;
2859 
2860 	if (evsel->core.attr.use_clockid && evsel->core.attr.clockid != CLOCK_MONOTONIC &&
2861 	    !perf_missing_features.clockid) {
2862 		perf_missing_features.clockid_wrong = true;
2863 		return true;
2864 	}
2865 
2866 	if (evsel->core.attr.use_clockid && perf_missing_features.clockid)
2867 		return true;
2868 
2869 	if ((evsel->open_flags & PERF_FLAG_FD_CLOEXEC) &&
2870 	    perf_missing_features.cloexec)
2871 		return true;
2872 
2873 	if (evsel->core.attr.mmap2 && perf_missing_features.mmap2)
2874 		return true;
2875 
2876 	if ((evsel->core.attr.branch_sample_type & (PERF_SAMPLE_BRANCH_NO_FLAGS |
2877 						    PERF_SAMPLE_BRANCH_NO_CYCLES)) &&
2878 	    perf_missing_features.lbr_flags)
2879 		return true;
2880 
2881 	if (evsel->core.attr.inherit && (evsel->core.attr.read_format & PERF_FORMAT_GROUP) &&
2882 	    perf_missing_features.group_read)
2883 		return true;
2884 
2885 	if (evsel->core.attr.ksymbol && perf_missing_features.ksymbol)
2886 		return true;
2887 
2888 	if (evsel->core.attr.bpf_event && perf_missing_features.bpf)
2889 		return true;
2890 
2891 	if ((evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX) &&
2892 	    perf_missing_features.branch_hw_idx)
2893 		return true;
2894 
2895 	if (evsel->core.attr.sample_id_all && perf_missing_features.sample_id_all)
2896 		return true;
2897 
2898 	return false;
2899 }
2900 
2901 static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
2902 		struct perf_thread_map *threads,
2903 		int start_cpu_map_idx, int end_cpu_map_idx)
2904 {
2905 	int idx, thread, nthreads;
2906 	int pid = -1, err, old_errno;
2907 	enum rlimit_action set_rlimit = NO_CHANGE;
2908 	struct perf_cpu cpu;
2909 
2910 	if (evsel__is_retire_lat(evsel)) {
2911 		err = evsel__tpebs_open(evsel);
2912 		goto out;
2913 	}
2914 
2915 	err = __evsel__prepare_open(evsel, cpus, threads);
2916 	if (err)
2917 		goto out;
2918 
2919 	if (cpus == NULL)
2920 		cpus = empty_cpu_map;
2921 
2922 	if (threads == NULL)
2923 		threads = empty_thread_map;
2924 
2925 	nthreads = perf_thread_map__nr(threads);
2926 
2927 	if (evsel->cgrp)
2928 		pid = evsel->cgrp->fd;
2929 
2930 fallback_missing_features:
2931 	evsel__disable_missing_features(evsel);
2932 
2933 	pr_debug3("Opening: %s\n", evsel__name(evsel));
2934 	display_attr(&evsel->core.attr);
2935 
2936 	if (evsel__is_tool(evsel)) {
2937 		err = evsel__tool_pmu_open(evsel, threads,
2938 					   start_cpu_map_idx,
2939 					   end_cpu_map_idx);
2940 		goto out;
2941 	}
2942 	if (evsel__is_hwmon(evsel)) {
2943 		err = evsel__hwmon_pmu_open(evsel, threads,
2944 					    start_cpu_map_idx,
2945 					    end_cpu_map_idx);
2946 		goto out;
2947 	}
2948 	if (evsel__is_drm(evsel)) {
2949 		err = evsel__drm_pmu_open(evsel, threads,
2950 					  start_cpu_map_idx,
2951 					  end_cpu_map_idx);
2952 		goto out;
2953 	}
2954 
2955 	for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) {
2956 		cpu = perf_cpu_map__cpu(cpus, idx);
2957 
2958 		for (thread = 0; thread < nthreads; thread++) {
2959 			int fd, group_fd;
2960 retry_open:
2961 			if (thread >= nthreads)
2962 				break;
2963 
2964 			if (!evsel->cgrp && !evsel->core.system_wide)
2965 				pid = perf_thread_map__pid(threads, thread);
2966 
2967 			group_fd = get_group_fd(evsel, idx, thread);
2968 
2969 			if (group_fd == -2) {
2970 				pr_debug("broken group leader for %s\n", evsel->name);
2971 				err = -EINVAL;
2972 				goto out_close;
2973 			}
2974 
2975 			/* Debug message used by test scripts */
2976 			pr_debug2_peo("sys_perf_event_open: pid %d  cpu %d  group_fd %d  flags %#lx",
2977 				pid, cpu.cpu, group_fd, evsel->open_flags);
2978 
2979 			fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu,
2980 						group_fd, evsel->open_flags);
2981 
2982 			FD(evsel, idx, thread) = fd;
2983 
2984 			if (fd < 0) {
2985 				err = -errno;
2986 
2987 				pr_debug2_peo("\nsys_perf_event_open failed, error %d\n",
2988 					  err);
2989 				goto try_fallback;
2990 			}
2991 
2992 			bpf_counter__install_pe(evsel, idx, fd);
2993 
2994 			if (unlikely(test_attr__enabled())) {
2995 				test_attr__open(&evsel->core.attr, pid, cpu,
2996 						fd, group_fd, evsel->open_flags);
2997 			}
2998 
2999 			/* Debug message used by test scripts */
3000 			pr_debug2_peo(" = %d\n", fd);
3001 
3002 			if (evsel->bpf_fd >= 0) {
3003 				int evt_fd = fd;
3004 				int bpf_fd = evsel->bpf_fd;
3005 
3006 				err = ioctl(evt_fd,
3007 					    PERF_EVENT_IOC_SET_BPF,
3008 					    bpf_fd);
3009 				if (err && errno != EEXIST) {
3010 					pr_err("failed to attach bpf fd %d: %m\n",
3011 					       bpf_fd);
3012 					err = -EINVAL;
3013 					goto out_close;
3014 				}
3015 			}
3016 
3017 			set_rlimit = NO_CHANGE;
3018 
3019 			/*
3020 			 * If we succeeded but had to kill clockid, fail and
3021 			 * have evsel__open_strerror() print us a nice error.
3022 			 */
3023 			if (perf_missing_features.clockid ||
3024 			    perf_missing_features.clockid_wrong) {
3025 				err = -EINVAL;
3026 				goto out_close;
3027 			}
3028 		}
3029 	}
3030 
3031 	err = 0;
3032 	goto out;
3033 
3034 try_fallback:
3035 	if (evsel__ignore_missing_thread(evsel, perf_cpu_map__nr(cpus),
3036 					 idx, threads, thread, err)) {
3037 		/* We just removed 1 thread, so lower the upper nthreads limit. */
3038 		nthreads--;
3039 
3040 		/* ... and pretend like nothing have happened. */
3041 		err = 0;
3042 		goto retry_open;
3043 	}
3044 	/*
3045 	 * perf stat needs between 5 and 22 fds per CPU. When we run out
3046 	 * of them try to increase the limits.
3047 	 */
3048 	if (err == -EMFILE && rlimit__increase_nofile(&set_rlimit))
3049 		goto retry_open;
3050 
3051 	if (err == -EINVAL && evsel__detect_missing_features(evsel, cpu))
3052 		goto fallback_missing_features;
3053 
3054 	if (evsel__precise_ip_fallback(evsel))
3055 		goto retry_open;
3056 
3057 out_close:
3058 	if (err)
3059 		threads->err_thread = thread;
3060 
3061 	old_errno = errno;
3062 	do {
3063 		while (--thread >= 0) {
3064 			if (FD(evsel, idx, thread) >= 0)
3065 				close(FD(evsel, idx, thread));
3066 			FD(evsel, idx, thread) = -1;
3067 		}
3068 		thread = nthreads;
3069 	} while (--idx >= 0);
3070 	errno = old_errno;
3071 out:
3072 	if (err)
3073 		evsel->supported = false;
3074 	return err;
3075 }
3076 
3077 int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
3078 		struct perf_thread_map *threads)
3079 {
3080 	return evsel__open_cpu(evsel, cpus, threads, 0, perf_cpu_map__nr(cpus));
3081 }
3082 
3083 void evsel__close(struct evsel *evsel)
3084 {
3085 	if (evsel__is_retire_lat(evsel))
3086 		evsel__tpebs_close(evsel);
3087 	perf_evsel__close(&evsel->core);
3088 	perf_evsel__free_id(&evsel->core);
3089 }
3090 
3091 int evsel__open_per_cpu_and_thread(struct evsel *evsel,
3092 				   struct perf_cpu_map *cpus, int cpu_map_idx,
3093 				   struct perf_thread_map *threads)
3094 {
3095 	if (cpu_map_idx == -1)
3096 		return evsel__open_cpu(evsel, cpus, threads, 0, perf_cpu_map__nr(cpus));
3097 
3098 	return evsel__open_cpu(evsel, cpus, threads, cpu_map_idx, cpu_map_idx + 1);
3099 }
3100 
3101 int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx)
3102 {
3103 	struct perf_thread_map *threads = thread_map__new_by_tid(-1);
3104 	int ret = evsel__open_per_cpu_and_thread(evsel, cpus, cpu_map_idx, threads);
3105 
3106 	perf_thread_map__put(threads);
3107 	return ret;
3108 }
3109 
3110 int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads)
3111 {
3112 	struct perf_cpu_map *cpus = perf_cpu_map__new_any_cpu();
3113 	int ret = evsel__open_per_cpu_and_thread(evsel, cpus, -1, threads);
3114 
3115 	perf_cpu_map__put(cpus);
3116 	return ret;
3117 }
3118 
3119 static int perf_evsel__parse_id_sample(const union perf_event *event,
3120 				       struct perf_sample *sample)
3121 {
3122 	const struct evsel *evsel = sample->evsel;
3123 	u64 type = evsel->core.attr.sample_type;
3124 	const __u64 *array = event->sample.array;
3125 	bool swapped = evsel->needs_swap;
3126 	union u64_swap u;
3127 	int i = ((event->header.size - sizeof(event->header)) / sizeof(u64)) - 1;
3128 
3129 	if (type & PERF_SAMPLE_IDENTIFIER) {
3130 		if (i < 0)
3131 			return -EFAULT;
3132 
3133 		sample->id = array[i--];
3134 	}
3135 
3136 	if (type & PERF_SAMPLE_CPU) {
3137 		if (i < 0)
3138 			return -EFAULT;
3139 
3140 		u.val64 = array[i--];
3141 		if (swapped) {
3142 			/* undo swap of u64, then swap on individual u32s */
3143 			u.val64 = bswap_64(u.val64);
3144 			u.val32[0] = bswap_32(u.val32[0]);
3145 		}
3146 		sample->cpu = u.val32[0];
3147 	}
3148 
3149 	if (type & PERF_SAMPLE_STREAM_ID) {
3150 		if (i < 0)
3151 			return -EFAULT;
3152 
3153 		sample->stream_id = array[i--];
3154 	}
3155 
3156 	if (type & PERF_SAMPLE_ID) {
3157 		if (i < 0)
3158 			return -EFAULT;
3159 
3160 		sample->id = array[i--];
3161 	}
3162 
3163 	if (type & PERF_SAMPLE_TIME) {
3164 		if (i < 0)
3165 			return -EFAULT;
3166 
3167 		sample->time = array[i--];
3168 	}
3169 
3170 	if (type & PERF_SAMPLE_TID) {
3171 		if (i < 0)
3172 			return -EFAULT;
3173 
3174 		u.val64 = array[i--];
3175 		if (swapped) {
3176 			/* undo swap of u64, then swap on individual u32s */
3177 			u.val64 = bswap_64(u.val64);
3178 			u.val32[0] = bswap_32(u.val32[0]);
3179 			u.val32[1] = bswap_32(u.val32[1]);
3180 		}
3181 
3182 		sample->pid = u.val32[0];
3183 		sample->tid = u.val32[1];
3184 	}
3185 
3186 	return 0;
3187 }
3188 
3189 static inline bool overflow(const void *endp, u16 max_size, const void *offset,
3190 			    u64 size)
3191 {
3192 	return size > max_size || offset + size > endp;
3193 }
3194 
3195 #define OVERFLOW_CHECK(offset, size, max_size)				\
3196 	do {								\
3197 		if (overflow(endp, (max_size), (offset), (size)))	\
3198 			goto out_efault;				\
3199 	} while (0)
3200 
3201 #define OVERFLOW_CHECK_u64(offset) \
3202 	OVERFLOW_CHECK(offset, sizeof(u64), sizeof(u64))
3203 
3204 static int
3205 perf_event__check_size(union perf_event *event, unsigned int sample_size)
3206 {
3207 	/*
3208 	 * The evsel's sample_size is based on PERF_SAMPLE_MASK which includes
3209 	 * up to PERF_SAMPLE_PERIOD.  After that overflow() must be used to
3210 	 * check the format does not go past the end of the event.
3211 	 */
3212 	if (sample_size + sizeof(event->header) > event->header.size)
3213 		return -EFAULT;
3214 
3215 	return 0;
3216 }
3217 
3218 static void perf_parse_sample_weight(struct perf_sample *data, const __u64 *array, u64 type)
3219 {
3220 	union perf_sample_weight weight;
3221 
3222 	weight.full = *array;
3223 	if (type & PERF_SAMPLE_WEIGHT_STRUCT) {
3224 		data->weight = weight.var1_dw;
3225 		data->ins_lat = weight.var2_w;
3226 		data->weight3 = weight.var3_w;
3227 	} else {
3228 		data->weight = weight.full;
3229 	}
3230 }
3231 
3232 u64 evsel__bitfield_swap_branch_flags(u64 value)
3233 {
3234 	u64 new_val = 0;
3235 
3236 	/*
3237 	 * branch_flags
3238 	 * union {
3239 	 * 	u64 values;
3240 	 * 	struct {
3241 	 * 		mispred:1	//target mispredicted
3242 	 * 		predicted:1	//target predicted
3243 	 * 		in_tx:1		//in transaction
3244 	 * 		abort:1		//transaction abort
3245 	 * 		cycles:16	//cycle count to last branch
3246 	 * 		type:4		//branch type
3247 	 * 		spec:2		//branch speculation info
3248 	 * 		new_type:4	//additional branch type
3249 	 * 		priv:3		//privilege level
3250 	 * 		reserved:31
3251 	 * 	}
3252 	 * }
3253 	 *
3254 	 * Avoid bswap64() the entire branch_flag.value,
3255 	 * as it has variable bit-field sizes. Instead the
3256 	 * macro takes the bit-field position/size,
3257 	 * swaps it based on the host endianness.
3258 	 */
3259 	if (host_is_bigendian()) {
3260 		new_val = bitfield_swap(value, 0, 1);
3261 		new_val |= bitfield_swap(value, 1, 1);
3262 		new_val |= bitfield_swap(value, 2, 1);
3263 		new_val |= bitfield_swap(value, 3, 1);
3264 		new_val |= bitfield_swap(value, 4, 16);
3265 		new_val |= bitfield_swap(value, 20, 4);
3266 		new_val |= bitfield_swap(value, 24, 2);
3267 		new_val |= bitfield_swap(value, 26, 4);
3268 		new_val |= bitfield_swap(value, 30, 3);
3269 		new_val |= bitfield_swap(value, 33, 31);
3270 	} else {
3271 		new_val = bitfield_swap(value, 63, 1);
3272 		new_val |= bitfield_swap(value, 62, 1);
3273 		new_val |= bitfield_swap(value, 61, 1);
3274 		new_val |= bitfield_swap(value, 60, 1);
3275 		new_val |= bitfield_swap(value, 44, 16);
3276 		new_val |= bitfield_swap(value, 40, 4);
3277 		new_val |= bitfield_swap(value, 38, 2);
3278 		new_val |= bitfield_swap(value, 34, 4);
3279 		new_val |= bitfield_swap(value, 31, 3);
3280 		new_val |= bitfield_swap(value, 0, 31);
3281 	}
3282 
3283 	return new_val;
3284 }
3285 
3286 static inline bool evsel__has_branch_counters(const struct evsel *evsel)
3287 {
3288 	struct evsel *leader = evsel__leader(evsel);
3289 
3290 	/* The branch counters feature only supports group */
3291 	if (!leader || !evsel->evlist)
3292 		return false;
3293 
3294 	if (evsel->evlist->nr_br_cntr < 0)
3295 		evlist__update_br_cntr(evsel->evlist);
3296 
3297 	if (leader->br_cntr_nr > 0)
3298 		return true;
3299 
3300 	return false;
3301 }
3302 
3303 static int __set_offcpu_sample(struct perf_sample *data)
3304 {
3305 	u64 *array = data->raw_data;
3306 	u32 max_size = data->raw_size, *p32;
3307 	const void *endp = (void *)array + max_size;
3308 
3309 	if (array == NULL)
3310 		return -EFAULT;
3311 
3312 	OVERFLOW_CHECK_u64(array);
3313 	p32 = (void *)array++;
3314 	data->pid = p32[0];
3315 	data->tid = p32[1];
3316 
3317 	OVERFLOW_CHECK_u64(array);
3318 	data->period = *array++;
3319 
3320 	OVERFLOW_CHECK_u64(array);
3321 	data->callchain = (struct ip_callchain *)array++;
3322 	OVERFLOW_CHECK(array, data->callchain->nr * sizeof(u64), max_size);
3323 	data->ip = data->callchain->ips[1];
3324 	array += data->callchain->nr;
3325 
3326 	OVERFLOW_CHECK_u64(array);
3327 	data->cgroup = *array;
3328 
3329 	return 0;
3330 out_efault:
3331 	return -EFAULT;
3332 }
3333 
3334 int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
3335 			struct perf_sample *data)
3336 {
3337 	u64 type = evsel->core.attr.sample_type;
3338 	bool swapped = evsel->needs_swap;
3339 	const __u64 *array;
3340 	u16 max_size = event->header.size;
3341 	const void *endp = (void *)event + max_size;
3342 	u64 sz;
3343 
3344 	/*
3345 	 * used for cross-endian analysis. See git commit 65014ab3
3346 	 * for why this goofiness is needed.
3347 	 */
3348 	union u64_swap u;
3349 
3350 	perf_sample__init(data, /*all=*/true);
3351 	data->evsel = evsel;
3352 	data->cpu = data->pid = data->tid = -1;
3353 	data->stream_id = data->id = data->time = -1ULL;
3354 	data->period = evsel->core.attr.sample_period;
3355 	data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
3356 	data->misc    = event->header.misc;
3357 	data->data_src = PERF_MEM_DATA_SRC_NONE;
3358 	data->vcpu = -1;
3359 
3360 	if (event->header.type == PERF_RECORD_CALLCHAIN_DEFERRED) {
3361 		const u64 max_callchain_nr = UINT64_MAX / sizeof(u64);
3362 
3363 		data->callchain = (struct ip_callchain *)&event->callchain_deferred.nr;
3364 		if (data->callchain->nr > max_callchain_nr)
3365 			goto out_efault;
3366 
3367 		data->deferred_cookie = event->callchain_deferred.cookie;
3368 
3369 		if (evsel->core.attr.sample_id_all) {
3370 			if (perf_evsel__parse_id_sample(event, data))
3371 				goto out_efault;
3372 		}
3373 		return 0;
3374 	}
3375 
3376 	if (event->header.type != PERF_RECORD_SAMPLE) {
3377 		if (evsel->core.attr.sample_id_all) {
3378 			if (perf_evsel__parse_id_sample(event, data))
3379 				goto out_efault;
3380 		}
3381 		return 0;
3382 	}
3383 
3384 	array = event->sample.array;
3385 
3386 	if (perf_event__check_size(event, evsel->sample_size))
3387 		goto out_efault;
3388 
3389 	if (type & PERF_SAMPLE_IDENTIFIER) {
3390 		data->id = *array;
3391 		array++;
3392 	}
3393 
3394 	if (type & PERF_SAMPLE_IP) {
3395 		data->ip = *array;
3396 		array++;
3397 	}
3398 
3399 	if (type & PERF_SAMPLE_TID) {
3400 		u.val64 = *array;
3401 		if (swapped) {
3402 			/* undo swap of u64, then swap on individual u32s */
3403 			u.val64 = bswap_64(u.val64);
3404 			u.val32[0] = bswap_32(u.val32[0]);
3405 			u.val32[1] = bswap_32(u.val32[1]);
3406 		}
3407 
3408 		data->pid = u.val32[0];
3409 		data->tid = u.val32[1];
3410 		array++;
3411 	}
3412 
3413 	if (type & PERF_SAMPLE_TIME) {
3414 		data->time = *array;
3415 		array++;
3416 	}
3417 
3418 	if (type & PERF_SAMPLE_ADDR) {
3419 		data->addr = *array;
3420 		array++;
3421 	}
3422 
3423 	if (type & PERF_SAMPLE_ID) {
3424 		data->id = *array;
3425 		array++;
3426 	}
3427 
3428 	if (type & PERF_SAMPLE_STREAM_ID) {
3429 		data->stream_id = *array;
3430 		array++;
3431 	}
3432 
3433 	if (type & PERF_SAMPLE_CPU) {
3434 
3435 		u.val64 = *array;
3436 		if (swapped) {
3437 			/* undo swap of u64, then swap on individual u32s */
3438 			u.val64 = bswap_64(u.val64);
3439 			u.val32[0] = bswap_32(u.val32[0]);
3440 		}
3441 
3442 		data->cpu = u.val32[0];
3443 		array++;
3444 	}
3445 
3446 	if (type & PERF_SAMPLE_PERIOD) {
3447 		data->period = *array;
3448 		array++;
3449 	}
3450 
3451 	if (type & PERF_SAMPLE_READ) {
3452 		u64 read_format = evsel->core.attr.read_format;
3453 
3454 		OVERFLOW_CHECK_u64(array);
3455 		if (read_format & PERF_FORMAT_GROUP)
3456 			data->read.group.nr = *array;
3457 		else
3458 			data->read.one.value = *array;
3459 
3460 		array++;
3461 
3462 		if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
3463 			OVERFLOW_CHECK_u64(array);
3464 			data->read.time_enabled = *array;
3465 			array++;
3466 		}
3467 
3468 		if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
3469 			OVERFLOW_CHECK_u64(array);
3470 			data->read.time_running = *array;
3471 			array++;
3472 		}
3473 
3474 		/* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
3475 		if (read_format & PERF_FORMAT_GROUP) {
3476 			const u64 max_group_nr = UINT64_MAX /
3477 					sizeof(struct sample_read_value);
3478 
3479 			if (data->read.group.nr > max_group_nr)
3480 				goto out_efault;
3481 
3482 			sz = data->read.group.nr * sample_read_value_size(read_format);
3483 			OVERFLOW_CHECK(array, sz, max_size);
3484 			data->read.group.values =
3485 					(struct sample_read_value *)array;
3486 			array = (void *)array + sz;
3487 		} else {
3488 			OVERFLOW_CHECK_u64(array);
3489 			data->read.one.id = *array;
3490 			array++;
3491 
3492 			if (read_format & PERF_FORMAT_LOST) {
3493 				OVERFLOW_CHECK_u64(array);
3494 				data->read.one.lost = *array;
3495 				array++;
3496 			}
3497 		}
3498 	}
3499 
3500 	if (type & PERF_SAMPLE_CALLCHAIN) {
3501 		const u64 max_callchain_nr = UINT64_MAX / sizeof(u64);
3502 		u64 callchain_nr;
3503 
3504 		OVERFLOW_CHECK_u64(array);
3505 		data->callchain = (struct ip_callchain *)array++;
3506 		callchain_nr = data->callchain->nr;
3507 		if (callchain_nr > max_callchain_nr)
3508 			goto out_efault;
3509 		sz = callchain_nr * sizeof(u64);
3510 		/*
3511 		 * Save the cookie for the deferred user callchain.  The last 2
3512 		 * entries in the callchain should be the context marker and the
3513 		 * cookie.  The cookie will be used to match PERF_RECORD_
3514 		 * CALLCHAIN_DEFERRED later.
3515 		 */
3516 		if (evsel->core.attr.defer_callchain && callchain_nr >= 2 &&
3517 		    data->callchain->ips[callchain_nr - 2] == PERF_CONTEXT_USER_DEFERRED) {
3518 			data->deferred_cookie = data->callchain->ips[callchain_nr - 1];
3519 			data->deferred_callchain = true;
3520 		}
3521 		OVERFLOW_CHECK(array, sz, max_size);
3522 		array = (void *)array + sz;
3523 	}
3524 
3525 	if (type & PERF_SAMPLE_RAW) {
3526 		OVERFLOW_CHECK_u64(array);
3527 		u.val64 = *array;
3528 
3529 		/*
3530 		 * Undo swap of u64, then swap on individual u32s,
3531 		 * get the size of the raw area and undo all of the
3532 		 * swap. The pevent interface handles endianness by
3533 		 * itself.
3534 		 */
3535 		if (swapped) {
3536 			u.val64 = bswap_64(u.val64);
3537 			u.val32[0] = bswap_32(u.val32[0]);
3538 			u.val32[1] = bswap_32(u.val32[1]);
3539 		}
3540 		data->raw_size = u.val32[0];
3541 
3542 		/*
3543 		 * The raw data is aligned on 64bits including the
3544 		 * u32 size, so it's safe to use mem_bswap_64.
3545 		 */
3546 		if (swapped)
3547 			mem_bswap_64((void *) array, data->raw_size);
3548 
3549 		array = (void *)array + sizeof(u32);
3550 
3551 		OVERFLOW_CHECK(array, data->raw_size, max_size);
3552 		data->raw_data = (void *)array;
3553 		array = (void *)array + data->raw_size;
3554 	}
3555 
3556 	if (type & PERF_SAMPLE_BRANCH_STACK) {
3557 		const u64 max_branch_nr = UINT64_MAX /
3558 					  sizeof(struct branch_entry);
3559 		struct branch_entry *e;
3560 		unsigned int i;
3561 
3562 		OVERFLOW_CHECK_u64(array);
3563 		data->branch_stack = (struct branch_stack *)array++;
3564 
3565 		if (data->branch_stack->nr > max_branch_nr)
3566 			goto out_efault;
3567 
3568 		sz = data->branch_stack->nr * sizeof(struct branch_entry);
3569 		if (evsel__has_branch_hw_idx(evsel)) {
3570 			sz += sizeof(u64);
3571 			e = &data->branch_stack->entries[0];
3572 		} else {
3573 			data->no_hw_idx = true;
3574 			/*
3575 			 * if the PERF_SAMPLE_BRANCH_HW_INDEX is not applied,
3576 			 * only nr and entries[] will be output by kernel.
3577 			 */
3578 			e = (struct branch_entry *)&data->branch_stack->hw_idx;
3579 		}
3580 
3581 		if (swapped) {
3582 			/*
3583 			 * struct branch_flag does not have endian
3584 			 * specific bit field definition. And bswap
3585 			 * will not resolve the issue, since these
3586 			 * are bit fields.
3587 			 *
3588 			 * evsel__bitfield_swap_branch_flags() uses a
3589 			 * bitfield_swap macro to swap the bit position
3590 			 * based on the host endians.
3591 			 */
3592 			for (i = 0; i < data->branch_stack->nr; i++, e++)
3593 				e->flags.value = evsel__bitfield_swap_branch_flags(e->flags.value);
3594 		}
3595 
3596 		OVERFLOW_CHECK(array, sz, max_size);
3597 		array = (void *)array + sz;
3598 
3599 		if (evsel__has_branch_counters(evsel)) {
3600 			data->branch_stack_cntr = (u64 *)array;
3601 			sz = data->branch_stack->nr * sizeof(u64);
3602 
3603 			OVERFLOW_CHECK(array, sz, max_size);
3604 			array = (void *)array + sz;
3605 		}
3606 	}
3607 
3608 	if (type & PERF_SAMPLE_REGS_USER) {
3609 		struct regs_dump *regs = perf_sample__user_regs(data);
3610 
3611 		OVERFLOW_CHECK_u64(array);
3612 		regs->abi = *array;
3613 		array++;
3614 
3615 		if (regs->abi) {
3616 			u64 mask = evsel->core.attr.sample_regs_user;
3617 
3618 			sz = hweight64(mask) * sizeof(u64);
3619 			OVERFLOW_CHECK(array, sz, max_size);
3620 			regs->mask = mask;
3621 			regs->regs = (u64 *)array;
3622 			array = (void *)array + sz;
3623 		}
3624 	}
3625 
3626 	if (type & PERF_SAMPLE_STACK_USER) {
3627 		OVERFLOW_CHECK_u64(array);
3628 		sz = *array++;
3629 
3630 		data->user_stack.offset = ((char *)(array - 1)
3631 					  - (char *) event);
3632 
3633 		if (!sz) {
3634 			data->user_stack.size = 0;
3635 		} else {
3636 			OVERFLOW_CHECK(array, sz, max_size);
3637 			data->user_stack.data = (char *)array;
3638 			array = (void *)array + sz;
3639 			OVERFLOW_CHECK_u64(array);
3640 			data->user_stack.size = *array++;
3641 			if (WARN_ONCE(data->user_stack.size > sz,
3642 				      "user stack dump failure\n"))
3643 				goto out_efault;
3644 		}
3645 	}
3646 
3647 	if (type & PERF_SAMPLE_WEIGHT_TYPE) {
3648 		OVERFLOW_CHECK_u64(array);
3649 		perf_parse_sample_weight(data, array, type);
3650 		array++;
3651 	}
3652 
3653 	if (type & PERF_SAMPLE_DATA_SRC) {
3654 		OVERFLOW_CHECK_u64(array);
3655 		data->data_src = *array;
3656 		array++;
3657 	}
3658 
3659 	if (type & PERF_SAMPLE_TRANSACTION) {
3660 		OVERFLOW_CHECK_u64(array);
3661 		data->transaction = *array;
3662 		array++;
3663 	}
3664 
3665 	if (type & PERF_SAMPLE_REGS_INTR) {
3666 		struct regs_dump *regs = perf_sample__intr_regs(data);
3667 
3668 		OVERFLOW_CHECK_u64(array);
3669 		regs->abi = *array;
3670 		array++;
3671 
3672 		if (regs->abi != PERF_SAMPLE_REGS_ABI_NONE) {
3673 			u64 mask = evsel->core.attr.sample_regs_intr;
3674 
3675 			sz = hweight64(mask) * sizeof(u64);
3676 			OVERFLOW_CHECK(array, sz, max_size);
3677 			regs->mask = mask;
3678 			regs->regs = (u64 *)array;
3679 			array = (void *)array + sz;
3680 		}
3681 	}
3682 
3683 	data->phys_addr = 0;
3684 	if (type & PERF_SAMPLE_PHYS_ADDR) {
3685 		data->phys_addr = *array;
3686 		array++;
3687 	}
3688 
3689 	data->cgroup = 0;
3690 	if (type & PERF_SAMPLE_CGROUP) {
3691 		data->cgroup = *array;
3692 		array++;
3693 	}
3694 
3695 	data->data_page_size = 0;
3696 	if (type & PERF_SAMPLE_DATA_PAGE_SIZE) {
3697 		data->data_page_size = *array;
3698 		array++;
3699 	}
3700 
3701 	data->code_page_size = 0;
3702 	if (type & PERF_SAMPLE_CODE_PAGE_SIZE) {
3703 		data->code_page_size = *array;
3704 		array++;
3705 	}
3706 
3707 	if (type & PERF_SAMPLE_AUX) {
3708 		OVERFLOW_CHECK_u64(array);
3709 		sz = *array++;
3710 
3711 		OVERFLOW_CHECK(array, sz, max_size);
3712 		/* Undo swap of data */
3713 		if (swapped)
3714 			mem_bswap_64((char *)array, sz);
3715 		data->aux_sample.size = sz;
3716 		data->aux_sample.data = (char *)array;
3717 		array = (void *)array + sz;
3718 	}
3719 
3720 	if (evsel__is_offcpu_event(evsel)) {
3721 		if (__set_offcpu_sample(data))
3722 			goto out_efault;
3723 	}
3724 
3725 	return 0;
3726 out_efault:
3727 	perf_sample__exit(data);
3728 	return -EFAULT;
3729 }
3730 
3731 int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event,
3732 				  u64 *timestamp)
3733 {
3734 	u64 type = evsel->core.attr.sample_type;
3735 	const __u64 *array;
3736 
3737 	if (!(type & PERF_SAMPLE_TIME))
3738 		return -1;
3739 
3740 	if (event->header.type != PERF_RECORD_SAMPLE) {
3741 		struct perf_sample data = {
3742 			.evsel = evsel,
3743 			.time = -1ULL,
3744 		};
3745 
3746 		if (!evsel->core.attr.sample_id_all)
3747 			return -1;
3748 		if (perf_evsel__parse_id_sample(event, &data))
3749 			return -1;
3750 
3751 		*timestamp = data.time;
3752 		return 0;
3753 	}
3754 
3755 	array = event->sample.array;
3756 
3757 	if (perf_event__check_size(event, evsel->sample_size))
3758 		return -EFAULT;
3759 
3760 	if (type & PERF_SAMPLE_IDENTIFIER)
3761 		array++;
3762 
3763 	if (type & PERF_SAMPLE_IP)
3764 		array++;
3765 
3766 	if (type & PERF_SAMPLE_TID)
3767 		array++;
3768 
3769 	if (type & PERF_SAMPLE_TIME)
3770 		*timestamp = *array;
3771 
3772 	return 0;
3773 }
3774 
3775 u16 evsel__id_hdr_size(const struct evsel *evsel)
3776 {
3777 	u64 sample_type = evsel->core.attr.sample_type;
3778 	u16 size = 0;
3779 
3780 	if (sample_type & PERF_SAMPLE_TID)
3781 		size += sizeof(u64);
3782 
3783 	if (sample_type & PERF_SAMPLE_TIME)
3784 		size += sizeof(u64);
3785 
3786 	if (sample_type & PERF_SAMPLE_ID)
3787 		size += sizeof(u64);
3788 
3789 	if (sample_type & PERF_SAMPLE_STREAM_ID)
3790 		size += sizeof(u64);
3791 
3792 	if (sample_type & PERF_SAMPLE_CPU)
3793 		size += sizeof(u64);
3794 
3795 	if (sample_type & PERF_SAMPLE_IDENTIFIER)
3796 		size += sizeof(u64);
3797 
3798 	return size;
3799 }
3800 
3801 #ifdef HAVE_LIBTRACEEVENT
3802 struct tep_format_field *evsel__field(struct evsel *evsel, const char *name)
3803 {
3804 	struct tep_event *tp_format = evsel__tp_format(evsel);
3805 
3806 	return tp_format ? tep_find_field(tp_format, name) : NULL;
3807 }
3808 
3809 struct tep_format_field *evsel__common_field(struct evsel *evsel, const char *name)
3810 {
3811 	struct tep_event *tp_format = evsel__tp_format(evsel);
3812 
3813 	return tp_format ? tep_find_common_field(tp_format, name) : NULL;
3814 }
3815 
3816 static bool out_of_bounds(const struct tep_format_field *field, int offset, int size, u32 raw_size)
3817 {
3818 	if (offset < 0) {
3819 		pr_warning("Negative trace point field offset %d in %s\n",
3820 			   offset, field->name);
3821 		return true;
3822 	}
3823 	if (size < 0) {
3824 		pr_warning("Negative trace point field size %d in %s\n",
3825 			   size, field->name);
3826 		return true;
3827 	}
3828 	if ((u32)offset + (u32)size > raw_size) {
3829 		pr_warning("Out of bound tracepoint field (%s) offset %d size %d in %u\n",
3830 			   field->name, offset, size, raw_size);
3831 		return true;
3832 	}
3833 	return false;
3834 }
3835 
3836 void *perf_sample__rawptr(struct perf_sample *sample, const char *name)
3837 {
3838 	struct tep_format_field *field = evsel__field(sample->evsel, name);
3839 	int offset, size;
3840 
3841 	if (!field)
3842 		return NULL;
3843 
3844 	offset = field->offset;
3845 	size = field->size;
3846 	if (field->flags & TEP_FIELD_IS_DYNAMIC) {
3847 		int dynamic_data;
3848 
3849 		if (out_of_bounds(field, offset, 4, sample->raw_size))
3850 			return NULL;
3851 
3852 		dynamic_data = *(int *)(sample->raw_data + field->offset);
3853 
3854 		if (sample->evsel->needs_swap)
3855 			dynamic_data = bswap_32(dynamic_data);
3856 
3857 		offset = dynamic_data & 0xffff;
3858 		size = (dynamic_data >> 16) & 0xffff;
3859 
3860 		if (tep_field_is_relative(field->flags)) {
3861 			/*
3862 			 * Newer kernel feature: Relative offsets (__rel_loc).
3863 			 * If the relative flag is set, the parsed offset is not
3864 			 * absolute from the start of the record. Instead, it is
3865 			 * relative to the *end* of the dynamic field descriptor
3866 			 * itself.
3867 			 */
3868 			offset += field->offset + field->size;
3869 		}
3870 	}
3871 	if (out_of_bounds(field, offset, size, sample->raw_size))
3872 		return NULL;
3873 
3874 	return sample->raw_data + offset;
3875 }
3876 
3877 u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sample,
3878 			 bool needs_swap)
3879 {
3880 	u64 value;
3881 	void *ptr = sample->raw_data + field->offset;
3882 
3883 	if (out_of_bounds(field, field->offset, field->size, sample->raw_size))
3884 		return 0;
3885 
3886 	switch (field->size) {
3887 	case 1:
3888 		return *(u8 *)ptr;
3889 	case 2:
3890 		value = *(u16 *)ptr;
3891 		break;
3892 	case 4:
3893 		value = *(u32 *)ptr;
3894 		break;
3895 	case 8:
3896 		memcpy(&value, ptr, sizeof(u64));
3897 		break;
3898 	default:
3899 		return 0;
3900 	}
3901 
3902 	if (!needs_swap)
3903 		return value;
3904 
3905 	switch (field->size) {
3906 	case 2:
3907 		return bswap_16(value);
3908 	case 4:
3909 		return bswap_32(value);
3910 	case 8:
3911 		return bswap_64(value);
3912 	default:
3913 		return 0;
3914 	}
3915 
3916 	return 0;
3917 }
3918 
3919 u64 perf_sample__intval(struct perf_sample *sample, const char *name)
3920 {
3921 	struct tep_format_field *field = evsel__field(sample->evsel, name);
3922 
3923 	return field ? format_field__intval(field, sample, sample->evsel->needs_swap) : 0;
3924 }
3925 
3926 u64 perf_sample__intval_common(struct perf_sample *sample, const char *name)
3927 {
3928 	struct tep_format_field *field = evsel__common_field(sample->evsel, name);
3929 
3930 	return field ? format_field__intval(field, sample, sample->evsel->needs_swap) : 0;
3931 }
3932 
3933 char perf_sample__taskstate(struct perf_sample *sample, const char *name)
3934 {
3935 	static struct tep_format_field *prev_state_field;
3936 	static const char *states;
3937 	struct tep_format_field *field;
3938 	unsigned long long val;
3939 	unsigned int bit;
3940 	char state = '?'; /* '?' denotes unknown task state */
3941 
3942 	field = evsel__field(sample->evsel, name);
3943 
3944 	if (!field)
3945 		return state;
3946 
3947 	if (!states || field != prev_state_field) {
3948 		states = parse_task_states(field);
3949 		if (!states)
3950 			return state;
3951 		prev_state_field = field;
3952 	}
3953 
3954 	/*
3955 	 * Note since the kernel exposes TASK_REPORT_MAX to userspace
3956 	 * to denote the 'preempted' state, we might as welll report
3957 	 * 'R' for this case, which make senses to users as well.
3958 	 *
3959 	 * We can change this if we have a good reason in the future.
3960 	 */
3961 	val = perf_sample__intval(sample, name);
3962 	bit = val ? ffs(val) : 0;
3963 	state = (!bit || bit > strlen(states)) ? 'R' : states[bit-1];
3964 	return state;
3965 }
3966 #endif
3967 
3968 bool evsel__fallback(struct evsel *evsel, struct target *target, int err,
3969 		     char *msg, size_t msgsize)
3970 {
3971 	int paranoid;
3972 
3973 	if ((err == ENODEV || err == ENOENT || err == ENXIO) &&
3974 	    evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
3975 		/*
3976 		 * If it's the legacy hardware cycles event fails then fall back
3977 		 * to hrtimer based cpu-clock sw counter, which is always
3978 		 * available even if no PMU support. PPC returned ENXIO rather
3979 		 * than ENODEV or ENOENT until 2.6.37.
3980 		 */
3981 		evsel->pmu = perf_pmus__find_by_type(PERF_TYPE_SOFTWARE);
3982 		assert(evsel->pmu); /* software is a "well-known" and can't fail PMU type. */
3983 
3984 		/* Configure the event. */
3985 		evsel->core.attr.type = PERF_TYPE_SOFTWARE;
3986 		evsel->core.attr.config = target__has_cpu(target)
3987 			? PERF_COUNT_SW_CPU_CLOCK
3988 			: PERF_COUNT_SW_TASK_CLOCK;
3989 		evsel->core.is_pmu_core = false;
3990 
3991 		/* Remove excludes for new event. */
3992 		if (evsel->fallenback_eacces) {
3993 			evsel->core.attr.exclude_kernel = 0;
3994 			evsel->core.attr.exclude_hv     = 0;
3995 			evsel->fallenback_eacces = false;
3996 		}
3997 		if (evsel->fallenback_eopnotsupp) {
3998 			evsel->core.attr.exclude_guest = 0;
3999 			evsel->fallenback_eopnotsupp = false;
4000 		}
4001 
4002 		/* Name is recomputed by evsel__name. */
4003 		zfree(&evsel->name);
4004 
4005 		/* Log message. */
4006 		scnprintf(msg, msgsize,
4007 			  "The cycles event is not supported, trying to fall back to %s",
4008 			  evsel__name(evsel));
4009 		return true;
4010 	} else if (err == EACCES && !evsel->core.attr.exclude_kernel &&
4011 		   (paranoid = perf_event_paranoid()) > 1) {
4012 		const char *name = evsel__name(evsel);
4013 		char *new_name;
4014 		const char *sep = ":";
4015 
4016 		/* If event has exclude user then don't exclude kernel. */
4017 		if (evsel->core.attr.exclude_user)
4018 			goto no_fallback;
4019 
4020 		/* Is there already the separator in the name. */
4021 		if (strchr(name, '/') ||
4022 		    (strchr(name, ':') && !evsel->is_libpfm_event))
4023 			sep = "";
4024 
4025 		if (asprintf(&new_name, "%s%su", name, sep) < 0)
4026 			goto no_fallback;
4027 
4028 		free(evsel->name);
4029 		evsel->name = new_name;
4030 		scnprintf(msg, msgsize, "kernel.perf_event_paranoid=%d, trying "
4031 			  "to fall back to excluding kernel and hypervisor "
4032 			  " samples", paranoid);
4033 		evsel->core.attr.exclude_kernel = 1;
4034 		evsel->core.attr.exclude_hv     = 1;
4035 		evsel->fallenback_eacces = true;
4036 		return true;
4037 	} else if (err == EOPNOTSUPP && !evsel->core.attr.exclude_guest &&
4038 		   !evsel->exclude_GH) {
4039 		const char *name = evsel__name(evsel);
4040 		char *new_name;
4041 		const char *sep = ":";
4042 
4043 		/* Is there already the separator in the name. */
4044 		if (strchr(name, '/') ||
4045 		    (strchr(name, ':') && !evsel->is_libpfm_event))
4046 			sep = "";
4047 
4048 		if (asprintf(&new_name, "%s%sH", name, sep) < 0)
4049 			goto no_fallback;
4050 
4051 		free(evsel->name);
4052 		evsel->name = new_name;
4053 		/* Apple M1 requires exclude_guest */
4054 		scnprintf(msg, msgsize, "Trying to fall back to excluding guest samples");
4055 		evsel->core.attr.exclude_guest = 1;
4056 		evsel->fallenback_eopnotsupp = true;
4057 		return true;
4058 	}
4059 no_fallback:
4060 	scnprintf(msg, msgsize, "No fallback found for '%s' for error %d",
4061 		  evsel__name(evsel), err);
4062 	return false;
4063 }
4064 
4065 static bool find_process(const char *name)
4066 {
4067 	size_t len = strlen(name);
4068 	DIR *dir;
4069 	struct dirent *d;
4070 	int ret = -1;
4071 
4072 	dir = opendir(procfs__mountpoint());
4073 	if (!dir)
4074 		return false;
4075 
4076 	/* Walk through the directory. */
4077 	while (ret && (d = readdir(dir)) != NULL) {
4078 		char path[PATH_MAX];
4079 		char *data;
4080 		size_t size;
4081 
4082 		if ((d->d_type != DT_DIR) ||
4083 		     !strcmp(".", d->d_name) ||
4084 		     !strcmp("..", d->d_name))
4085 			continue;
4086 
4087 		scnprintf(path, sizeof(path), "%s/%s/comm",
4088 			  procfs__mountpoint(), d->d_name);
4089 
4090 		if (filename__read_str(path, &data, &size))
4091 			continue;
4092 
4093 		ret = strncmp(name, data, len);
4094 		free(data);
4095 	}
4096 
4097 	closedir(dir);
4098 	return ret ? false : true;
4099 }
4100 
4101 static int dump_perf_event_processes(char *msg, size_t size)
4102 {
4103 	DIR *proc_dir;
4104 	struct dirent *proc_entry;
4105 	int printed = 0;
4106 
4107 	proc_dir = opendir(procfs__mountpoint());
4108 	if (!proc_dir)
4109 		return 0;
4110 
4111 	/* Walk through the /proc directory. */
4112 	while ((proc_entry = readdir(proc_dir)) != NULL) {
4113 		char buf[256];
4114 		DIR *fd_dir;
4115 		struct dirent *fd_entry;
4116 		int fd_dir_fd;
4117 
4118 		if (proc_entry->d_type != DT_DIR ||
4119 		    !isdigit(proc_entry->d_name[0]) ||
4120 		    strlen(proc_entry->d_name) > sizeof(buf) - 4)
4121 			continue;
4122 
4123 		scnprintf(buf, sizeof(buf), "%s/fd", proc_entry->d_name);
4124 		fd_dir_fd = openat(dirfd(proc_dir), buf, O_DIRECTORY);
4125 		if (fd_dir_fd == -1)
4126 			continue;
4127 		fd_dir = fdopendir(fd_dir_fd);
4128 		if (!fd_dir) {
4129 			close(fd_dir_fd);
4130 			continue;
4131 		}
4132 		while ((fd_entry = readdir(fd_dir)) != NULL) {
4133 			ssize_t link_size;
4134 
4135 			if (fd_entry->d_type != DT_LNK)
4136 				continue;
4137 			link_size = readlinkat(fd_dir_fd, fd_entry->d_name, buf, sizeof(buf));
4138 			if (link_size < 0)
4139 				continue;
4140 			/* Take care as readlink doesn't null terminate the string. */
4141 			if (!strncmp(buf, "anon_inode:[perf_event]", link_size)) {
4142 				int cmdline_fd;
4143 				ssize_t cmdline_size;
4144 
4145 				scnprintf(buf, sizeof(buf), "%s/cmdline", proc_entry->d_name);
4146 				cmdline_fd = openat(dirfd(proc_dir), buf, O_RDONLY);
4147 				if (cmdline_fd == -1)
4148 					continue;
4149 				cmdline_size = read(cmdline_fd, buf, sizeof(buf) - 1);
4150 				close(cmdline_fd);
4151 				if (cmdline_size < 0)
4152 					continue;
4153 				buf[cmdline_size] = '\0';
4154 				for (ssize_t i = 0; i < cmdline_size; i++) {
4155 					if (buf[i] == '\0')
4156 						buf[i] = ' ';
4157 				}
4158 
4159 				if (printed == 0)
4160 					printed += scnprintf(msg, size, "Possible processes:\n");
4161 
4162 				printed += scnprintf(msg + printed, size - printed,
4163 						"%s %s\n", proc_entry->d_name, buf);
4164 				break;
4165 			}
4166 		}
4167 		closedir(fd_dir);
4168 	}
4169 	closedir(proc_dir);
4170 	return printed;
4171 }
4172 
4173 int __weak arch_evsel__open_strerror(struct evsel *evsel __maybe_unused,
4174 				     int err __maybe_unused,
4175 				     char *msg __maybe_unused,
4176 				     size_t size __maybe_unused)
4177 {
4178 	return 0;
4179 }
4180 
4181 int evsel__open_strerror(struct evsel *evsel, struct target *target,
4182 			 int err, char *msg, size_t size)
4183 {
4184 	struct perf_pmu *pmu;
4185 	int printed = 0, enforced = 0;
4186 	int ret;
4187 
4188 	switch (err) {
4189 	case EPERM:
4190 	case EACCES:
4191 		printed += scnprintf(msg + printed, size - printed,
4192 			"Access to performance monitoring and observability operations is limited.\n");
4193 
4194 		if (!sysfs__read_int("fs/selinux/enforce", &enforced)) {
4195 			if (enforced) {
4196 				printed += scnprintf(msg + printed, size - printed,
4197 					"Enforced MAC policy settings (SELinux) can limit access to performance\n"
4198 					"monitoring and observability operations. Inspect system audit records for\n"
4199 					"more perf_event access control information and adjusting the policy.\n");
4200 			}
4201 		}
4202 
4203 		if (err == EPERM)
4204 			printed += scnprintf(msg, size,
4205 				"No permission to enable %s event.\n\n", evsel__name(evsel));
4206 
4207 		return printed + scnprintf(msg + printed, size - printed,
4208 		 "Consider adjusting /proc/sys/kernel/perf_event_paranoid setting to open\n"
4209 		 "access to performance monitoring and observability operations for processes\n"
4210 		 "without CAP_PERFMON, CAP_SYS_PTRACE or CAP_SYS_ADMIN Linux capability.\n"
4211 		 "More information can be found at 'Perf events and tool security' document:\n"
4212 		 "https://www.kernel.org/doc/html/latest/admin-guide/perf-security.html\n"
4213 		 "perf_event_paranoid setting is %d:\n"
4214 		 "  -1: Allow use of (almost) all events by all users\n"
4215 		 "      Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK\n"
4216 		 ">= 0: Disallow raw and ftrace function tracepoint access\n"
4217 		 ">= 1: Disallow CPU event access\n"
4218 		 ">= 2: Disallow kernel profiling\n"
4219 		 "To make the adjusted perf_event_paranoid setting permanent preserve it\n"
4220 		 "in /etc/sysctl.conf (e.g. kernel.perf_event_paranoid = <setting>)",
4221 		 perf_event_paranoid());
4222 	case ENOENT:
4223 		return scnprintf(msg, size, "The %s event is not supported.", evsel__name(evsel));
4224 	case EMFILE:
4225 		return scnprintf(msg, size, "%s",
4226 			 "Too many events are opened.\n"
4227 			 "Probably the maximum number of open file descriptors has been reached.\n"
4228 			 "Hint: Try again after reducing the number of events.\n"
4229 			 "Hint: Try increasing the limit with 'ulimit -n <limit>'");
4230 	case ENOMEM:
4231 		if (evsel__has_callchain(evsel) &&
4232 		    access("/proc/sys/kernel/perf_event_max_stack", F_OK) == 0)
4233 			return scnprintf(msg, size,
4234 					 "Not enough memory to setup event with callchain.\n"
4235 					 "Hint: Try tweaking /proc/sys/kernel/perf_event_max_stack\n"
4236 					 "Hint: Current value: %d", sysctl__max_stack());
4237 		break;
4238 	case ENODEV:
4239 		if (target->cpu_list)
4240 			return scnprintf(msg, size, "%s",
4241 	 "No such device - did you specify an out-of-range profile CPU?");
4242 		break;
4243 	case EOPNOTSUPP:
4244 		if (evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK)
4245 			return scnprintf(msg, size,
4246 	"%s: PMU Hardware or event type doesn't support branch stack sampling.",
4247 					 evsel__name(evsel));
4248 		if (evsel->core.attr.aux_output)
4249 			return scnprintf(msg, size,
4250 	"%s: PMU Hardware doesn't support 'aux_output' feature",
4251 					 evsel__name(evsel));
4252 		if (evsel->core.attr.aux_action)
4253 			return scnprintf(msg, size,
4254 	"%s: PMU Hardware doesn't support 'aux_action' feature",
4255 					evsel__name(evsel));
4256 		if (evsel->core.attr.sample_period != 0)
4257 			return scnprintf(msg, size,
4258 	"%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'",
4259 					 evsel__name(evsel));
4260 		if (evsel->core.attr.precise_ip)
4261 			return scnprintf(msg, size, "%s",
4262 	"\'precise\' request may not be supported. Try removing 'p' modifier.");
4263 #if defined(__i386__) || defined(__x86_64__)
4264 		if (evsel->core.attr.type == PERF_TYPE_HARDWARE)
4265 			return scnprintf(msg, size, "%s",
4266 	"No hardware sampling interrupt available.\n");
4267 #endif
4268 		if (!target__has_cpu(target))
4269 			return scnprintf(msg, size,
4270 	"Unsupported event (%s) in per-thread mode, enable system wide with '-a'.",
4271 					evsel__name(evsel));
4272 		break;
4273 	case EBUSY:
4274 		if (find_process("oprofiled"))
4275 			return scnprintf(msg, size,
4276 	"The PMU counters are busy/taken by another profiler.\n"
4277 	"We found oprofile daemon running, please stop it and try again.");
4278 		printed += scnprintf(
4279 			msg, size,
4280 			"The PMU %s counters are busy and in use by another process.\n",
4281 			evsel->pmu ? evsel->pmu->name : "");
4282 		return printed + dump_perf_event_processes(msg + printed, size - printed);
4283 		break;
4284 	case EINVAL:
4285 		if (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE && perf_missing_features.code_page_size)
4286 			return scnprintf(msg, size, "Asking for the code page size isn't supported by this kernel.");
4287 		if (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE && perf_missing_features.data_page_size)
4288 			return scnprintf(msg, size, "Asking for the data page size isn't supported by this kernel.");
4289 		if (evsel->core.attr.write_backward && perf_missing_features.write_backward)
4290 			return scnprintf(msg, size, "Reading from overwrite event is not supported by this kernel.");
4291 		if (perf_missing_features.clockid)
4292 			return scnprintf(msg, size, "clockid feature not supported.");
4293 		if (perf_missing_features.clockid_wrong)
4294 			return scnprintf(msg, size, "wrong clockid (%d).", clockid);
4295 		if (perf_missing_features.aux_action)
4296 			return scnprintf(msg, size, "The 'aux_action' feature is not supported, update the kernel.");
4297 		if (perf_missing_features.aux_output)
4298 			return scnprintf(msg, size, "The 'aux_output' feature is not supported, update the kernel.");
4299 		pmu = evsel__find_pmu(evsel);
4300 		if (!pmu->is_core && !target__has_cpu(target))
4301 			return scnprintf(msg, size,
4302 	"Invalid event (%s) in per-thread mode, enable system wide with '-a'.",
4303 					evsel__name(evsel));
4304 
4305 		break;
4306 	case ENODATA:
4307 		return scnprintf(msg, size, "Cannot collect data source with the load latency event alone. "
4308 				 "Please add an auxiliary event in front of the load latency event.");
4309 	default:
4310 		break;
4311 	}
4312 
4313 	ret = arch_evsel__open_strerror(evsel, err, msg, size);
4314 	if (ret)
4315 		return ret;
4316 
4317 	errno = err;
4318 	return scnprintf(msg, size,
4319 			 "The sys_perf_event_open() syscall failed for event (%s): %m\n"
4320 			 "\"dmesg | grep -i perf\" may provide additional information.\n",
4321 			 evsel__name(evsel));
4322 }
4323 
4324 struct perf_session *evsel__session(struct evsel *evsel)
4325 {
4326 	return evsel && evsel->evlist ? evsel->evlist->session : NULL;
4327 }
4328 
4329 struct perf_env *evsel__env(struct evsel *evsel)
4330 {
4331 	struct perf_session *session = evsel__session(evsel);
4332 
4333 	return session ? perf_session__env(session) : NULL;
4334 }
4335 
4336 static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist)
4337 {
4338 	int cpu_map_idx, thread;
4339 
4340 	if (evsel__is_retire_lat(evsel))
4341 		return 0;
4342 
4343 	if (perf_pmu__kind(evsel->pmu) != PERF_PMU_KIND_PE)
4344 		return 0;
4345 
4346 	for (cpu_map_idx = 0; cpu_map_idx < xyarray__max_x(evsel->core.fd); cpu_map_idx++) {
4347 		for (thread = 0; thread < xyarray__max_y(evsel->core.fd);
4348 		     thread++) {
4349 			int fd = FD(evsel, cpu_map_idx, thread);
4350 
4351 			if (perf_evlist__id_add_fd(&evlist->core, &evsel->core,
4352 						   cpu_map_idx, thread, fd) < 0)
4353 				return -1;
4354 		}
4355 	}
4356 
4357 	return 0;
4358 }
4359 
4360 int evsel__store_ids(struct evsel *evsel, struct evlist *evlist)
4361 {
4362 	struct perf_cpu_map *cpus = evsel->core.cpus;
4363 	struct perf_thread_map *threads = evsel->core.threads;
4364 
4365 	if (perf_evsel__alloc_id(&evsel->core, perf_cpu_map__nr(cpus), threads->nr))
4366 		return -ENOMEM;
4367 
4368 	return store_evsel_ids(evsel, evlist);
4369 }
4370 
4371 void evsel__zero_per_pkg(struct evsel *evsel)
4372 {
4373 	struct hashmap_entry *cur;
4374 	size_t bkt;
4375 
4376 	if (evsel->per_pkg_mask) {
4377 		hashmap__for_each_entry(evsel->per_pkg_mask, cur, bkt)
4378 			zfree(&cur->pkey);
4379 
4380 		hashmap__clear(evsel->per_pkg_mask);
4381 	}
4382 }
4383 
4384 /**
4385  * evsel__is_hybrid - does the evsel have a known PMU that is hybrid. Note, this
4386  *                    will be false on hybrid systems for hardware and legacy
4387  *                    cache events.
4388  */
4389 bool evsel__is_hybrid(const struct evsel *evsel)
4390 {
4391 	if (!evsel->core.is_pmu_core)
4392 		return false;
4393 
4394 	return perf_pmus__num_core_pmus() > 1;
4395 }
4396 
4397 struct evsel *evsel__leader(const struct evsel *evsel)
4398 {
4399 	if (evsel->core.leader == NULL)
4400 		return NULL;
4401 	return container_of(evsel->core.leader, struct evsel, core);
4402 }
4403 
4404 bool evsel__has_leader(struct evsel *evsel, struct evsel *leader)
4405 {
4406 	return evsel->core.leader == &leader->core;
4407 }
4408 
4409 bool evsel__is_leader(struct evsel *evsel)
4410 {
4411 	return evsel__has_leader(evsel, evsel);
4412 }
4413 
4414 void evsel__set_leader(struct evsel *evsel, struct evsel *leader)
4415 {
4416 	evsel->core.leader = &leader->core;
4417 }
4418 
4419 bool evsel__is_aux_event(const struct evsel *evsel)
4420 {
4421 	struct perf_pmu *pmu;
4422 
4423 	if (evsel->needs_auxtrace_mmap)
4424 		return true;
4425 
4426 	pmu = evsel__find_pmu(evsel);
4427 	return pmu && pmu->auxtrace;
4428 }
4429 
4430 int evsel__source_count(const struct evsel *evsel)
4431 {
4432 	struct evsel *pos;
4433 	int count = 0;
4434 
4435 	evlist__for_each_entry(evsel->evlist, pos) {
4436 		if (pos->metric_leader == evsel)
4437 			count++;
4438 	}
4439 	return count;
4440 }
4441 
4442 bool __weak arch_evsel__must_be_in_group(const struct evsel *evsel __maybe_unused)
4443 {
4444 	return false;
4445 }
4446 
4447 /*
4448  * Remove an event from a given group (leader).
4449  * Some events, e.g., perf metrics Topdown events,
4450  * must always be grouped. Ignore the events.
4451  */
4452 void evsel__remove_from_group(struct evsel *evsel, struct evsel *leader)
4453 {
4454 	if (!arch_evsel__must_be_in_group(evsel) && evsel != leader) {
4455 		evsel__set_leader(evsel, evsel);
4456 		evsel->core.nr_members = 0;
4457 		leader->core.nr_members--;
4458 	}
4459 }
4460 
4461 bool evsel__set_needs_uniquify(struct evsel *counter, const struct perf_stat_config *config)
4462 {
4463 	struct evsel *evsel;
4464 
4465 	if (counter->needs_uniquify) {
4466 		/* Already set. */
4467 		return true;
4468 	}
4469 
4470 	if (counter->use_config_name || counter->is_libpfm_event) {
4471 		/* Original name will be used. */
4472 		return false;
4473 	}
4474 
4475 	if (!config->hybrid_merge && evsel__is_hybrid(counter)) {
4476 		/* Unique hybrid counters necessary. */
4477 		counter->needs_uniquify = true;
4478 		return true;
4479 	}
4480 
4481 	if  (counter->core.attr.type < PERF_TYPE_MAX && counter->core.attr.type != PERF_TYPE_RAW) {
4482 		/* Legacy event, don't uniquify. */
4483 		return false;
4484 	}
4485 
4486 	if (counter->pmu && counter->pmu->is_core &&
4487 	    counter->alternate_hw_config != PERF_COUNT_HW_MAX) {
4488 		/* A sysfs or json event replacing a legacy event, don't uniquify. */
4489 		return false;
4490 	}
4491 
4492 	if (config->aggr_mode == AGGR_NONE) {
4493 		/* Always unique with no aggregation. */
4494 		counter->needs_uniquify = true;
4495 		return true;
4496 	}
4497 
4498 	if (counter->first_wildcard_match != NULL) {
4499 		/*
4500 		 * If stats are merged then only the first_wildcard_match is
4501 		 * displayed, there is no need to uniquify this evsel as the
4502 		 * name won't be shown.
4503 		 */
4504 		return false;
4505 	}
4506 
4507 	/*
4508 	 * Do other non-merged events in the evlist have the same name? If so
4509 	 * uniquify is necessary.
4510 	 */
4511 	evlist__for_each_entry(counter->evlist, evsel) {
4512 		if (evsel == counter || evsel->first_wildcard_match || evsel->pmu == counter->pmu)
4513 			continue;
4514 
4515 		if (evsel__name_is(counter, evsel__name(evsel))) {
4516 			counter->needs_uniquify = true;
4517 			return true;
4518 		}
4519 	}
4520 	return false;
4521 }
4522 
4523 void evsel__uniquify_counter(struct evsel *counter)
4524 {
4525 	const char *name, *pmu_name, *config;
4526 	char *new_name;
4527 	int len, ret;
4528 
4529 	/* No uniquification necessary. */
4530 	if (!counter->needs_uniquify)
4531 		return;
4532 
4533 	/* The evsel was already uniquified. */
4534 	if (counter->uniquified_name)
4535 		return;
4536 
4537 	/* Avoid checking to uniquify twice. */
4538 	counter->uniquified_name = true;
4539 
4540 	name = evsel__name(counter);
4541 	config = strchr(name, '/');
4542 	pmu_name = counter->pmu->name;
4543 
4544 	/* Already prefixed by the PMU name? */
4545 	len = pmu_name_len_no_suffix(pmu_name);
4546 
4547 	if (!strncmp(name, pmu_name, len)) {
4548 		/*
4549 		 * If the PMU name is there, then there is no sense in not
4550 		 * having a slash. Do this for robustness.
4551 		 */
4552 		if (config == NULL)
4553 			config = name - 1;
4554 
4555 		ret = asprintf(&new_name, "%s/%s", pmu_name, config + 1);
4556 	} else if (config) {
4557 		len = config - name;
4558 		if (config[1] == '/') {
4559 			/* case: event// */
4560 			ret = asprintf(&new_name, "%s/%.*s/%s", pmu_name, len, name, config + 2);
4561 		} else {
4562 			/* case: event/.../ */
4563 			ret = asprintf(&new_name, "%s/%.*s,%s", pmu_name, len, name, config + 1);
4564 		}
4565 	} else {
4566 		config = strchr(name, ':');
4567 		if (config) {
4568 			/* case: event:.. */
4569 			len = config - name;
4570 
4571 			ret = asprintf(&new_name, "%s/%.*s/%s", pmu_name, len, name, config + 1);
4572 		} else {
4573 			/* case: event */
4574 			ret = asprintf(&new_name, "%s/%s/", pmu_name, name);
4575 		}
4576 	}
4577 	if (ret > 0) {
4578 		free(counter->name);
4579 		counter->name = new_name;
4580 	} else {
4581 		/* ENOMEM from asprintf. */
4582 		counter->uniquified_name = false;
4583 	}
4584 }
4585 
4586 void evsel__warn_user_requested_cpus(struct evsel *evsel, struct perf_cpu_map *user_requested_cpus)
4587 {
4588 	struct perf_cpu_map *intersect, *online = NULL;
4589 	const struct perf_pmu *pmu = evsel__find_pmu(evsel);
4590 
4591 	if (pmu && pmu->is_core) {
4592 		intersect = perf_cpu_map__intersect(pmu->cpus, user_requested_cpus);
4593 	} else {
4594 		online = cpu_map__online();
4595 		intersect = perf_cpu_map__intersect(online, user_requested_cpus);
4596 	}
4597 	if (!perf_cpu_map__equal(intersect, user_requested_cpus)) {
4598 		char buf1[128];
4599 		char buf2[128];
4600 
4601 		cpu_map__snprint(user_requested_cpus, buf1, sizeof(buf1));
4602 		cpu_map__snprint(online ?: pmu->cpus, buf2, sizeof(buf2));
4603 		pr_warning("WARNING: A requested CPU in '%s' is not supported by PMU '%s' (CPUs %s) for event '%s'\n",
4604 			   buf1, pmu ? pmu->name : "cpu", buf2, evsel__name(evsel));
4605 	}
4606 	perf_cpu_map__put(intersect);
4607 	perf_cpu_map__put(online);
4608 }
4609