xref: /linux/tools/perf/builtin-record.c (revision a3ef39eb975d4ba1bd1a29b7dcc4e76d6745b305)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10 
11 #include "util/build-id.h"
12 #include <subcmd/parse-options.h>
13 #include <internal/xyarray.h>
14 #include "util/parse-events.h"
15 #include "util/config.h"
16 
17 #include "util/callchain.h"
18 #include "util/cgroup.h"
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/mmap.h"
25 #include "util/mutex.h"
26 #include "util/target.h"
27 #include "util/session.h"
28 #include "util/tool.h"
29 #include "util/stat.h"
30 #include "util/symbol.h"
31 #include "util/record.h"
32 #include "util/cpumap.h"
33 #include "util/thread_map.h"
34 #include "util/data.h"
35 #include "util/perf_regs.h"
36 #include "util/auxtrace.h"
37 #include "util/tsc.h"
38 #include "util/parse-branch-options.h"
39 #include "util/parse-regs-options.h"
40 #include "util/perf_api_probe.h"
41 #include "util/trigger.h"
42 #include "util/perf-hooks.h"
43 #include "util/cpu-set-sched.h"
44 #include "util/synthetic-events.h"
45 #include "util/time-utils.h"
46 #include "util/units.h"
47 #include "util/bpf-event.h"
48 #include "util/util.h"
49 #include "util/pfm.h"
50 #include "util/pmu.h"
51 #include "util/pmus.h"
52 #include "util/clockid.h"
53 #include "util/off_cpu.h"
54 #include "util/bpf-filter.h"
55 #include "util/strbuf.h"
56 #include "asm/bug.h"
57 #include "perf.h"
58 #include "cputopo.h"
59 
60 #include <errno.h>
61 #include <inttypes.h>
62 #include <locale.h>
63 #include <poll.h>
64 #include <pthread.h>
65 #include <unistd.h>
66 #ifndef HAVE_GETTID
67 #include <syscall.h>
68 #endif
69 #include <sched.h>
70 #include <signal.h>
71 #ifdef HAVE_EVENTFD_SUPPORT
72 #include <sys/eventfd.h>
73 #endif
74 #include <sys/mman.h>
75 #include <sys/wait.h>
76 #include <sys/types.h>
77 #include <sys/stat.h>
78 #include <fcntl.h>
79 #include <linux/err.h>
80 #include <linux/string.h>
81 #include <linux/time64.h>
82 #include <linux/zalloc.h>
83 #include <linux/bitmap.h>
84 #include <sys/time.h>
85 
86 struct switch_output {
87 	bool		 enabled;
88 	bool		 signal;
89 	unsigned long	 size;
90 	unsigned long	 time;
91 	const char	*str;
92 	bool		 set;
93 	char		 **filenames;
94 	int		 num_files;
95 	int		 cur_file;
96 };
97 
98 struct thread_mask {
99 	struct mmap_cpu_mask	maps;
100 	struct mmap_cpu_mask	affinity;
101 };
102 
103 struct record_thread {
104 	pid_t			tid;
105 	struct thread_mask	*mask;
106 	struct {
107 		int		msg[2];
108 		int		ack[2];
109 	} pipes;
110 	struct fdarray		pollfd;
111 	int			ctlfd_pos;
112 	int			nr_mmaps;
113 	struct mmap		**maps;
114 	struct mmap		**overwrite_maps;
115 	struct record		*rec;
116 	unsigned long long	samples;
117 	unsigned long		waking;
118 	u64			bytes_written;
119 	u64			bytes_transferred;
120 	u64			bytes_compressed;
121 };
122 
123 static __thread struct record_thread *thread;
124 
125 enum thread_msg {
126 	THREAD_MSG__UNDEFINED = 0,
127 	THREAD_MSG__READY,
128 	THREAD_MSG__MAX,
129 };
130 
131 static const char *thread_msg_tags[THREAD_MSG__MAX] = {
132 	"UNDEFINED", "READY"
133 };
134 
135 enum thread_spec {
136 	THREAD_SPEC__UNDEFINED = 0,
137 	THREAD_SPEC__CPU,
138 	THREAD_SPEC__CORE,
139 	THREAD_SPEC__PACKAGE,
140 	THREAD_SPEC__NUMA,
141 	THREAD_SPEC__USER,
142 	THREAD_SPEC__MAX,
143 };
144 
145 static const char *thread_spec_tags[THREAD_SPEC__MAX] = {
146 	"undefined", "cpu", "core", "package", "numa", "user"
147 };
148 
149 struct pollfd_index_map {
150 	int evlist_pollfd_index;
151 	int thread_pollfd_index;
152 };
153 
154 struct record {
155 	struct perf_tool	tool;
156 	struct record_opts	opts;
157 	u64			bytes_written;
158 	u64			thread_bytes_written;
159 	struct perf_data	data;
160 	struct auxtrace_record	*itr;
161 	struct evlist	*evlist;
162 	struct perf_session	*session;
163 	struct evlist		*sb_evlist;
164 	pthread_t		thread_id;
165 	int			realtime_prio;
166 	bool			latency;
167 	bool			switch_output_event_set;
168 	bool			no_buildid;
169 	bool			no_buildid_set;
170 	bool			no_buildid_cache;
171 	bool			no_buildid_cache_set;
172 	bool			buildid_all;
173 	bool			buildid_mmap;
174 	bool			buildid_mmap_set;
175 	bool			timestamp_filename;
176 	bool			timestamp_boundary;
177 	bool			off_cpu;
178 	const char		*filter_action;
179 	const char		*uid_str;
180 	struct switch_output	switch_output;
181 	unsigned long long	samples;
182 	unsigned long		output_max_size;	/* = 0: unlimited */
183 	struct perf_debuginfod	debuginfod;
184 	int			nr_threads;
185 	struct thread_mask	*thread_masks;
186 	struct record_thread	*thread_data;
187 	struct pollfd_index_map	*index_map;
188 	size_t			index_map_sz;
189 	size_t			index_map_cnt;
190 };
191 
192 static volatile int done;
193 
194 static volatile int auxtrace_record__snapshot_started;
195 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
196 static DEFINE_TRIGGER(switch_output_trigger);
197 
198 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
199 	"SYS", "NODE", "CPU"
200 };
201 
202 static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event,
203 				  struct perf_sample *sample, struct machine *machine);
204 static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event,
205 				   struct perf_sample *sample, struct machine *machine);
206 static int process_timestamp_boundary(const struct perf_tool *tool,
207 				      union perf_event *event,
208 				      struct perf_sample *sample,
209 				      struct machine *machine);
210 
211 #ifndef HAVE_GETTID
212 static inline pid_t gettid(void)
213 {
214 	return (pid_t)syscall(__NR_gettid);
215 }
216 #endif
217 
218 static int record__threads_enabled(struct record *rec)
219 {
220 	return rec->opts.threads_spec;
221 }
222 
223 static bool switch_output_signal(struct record *rec)
224 {
225 	return rec->switch_output.signal &&
226 	       trigger_is_ready(&switch_output_trigger);
227 }
228 
229 static bool switch_output_size(struct record *rec)
230 {
231 	return rec->switch_output.size &&
232 	       trigger_is_ready(&switch_output_trigger) &&
233 	       (rec->bytes_written >= rec->switch_output.size);
234 }
235 
236 static bool switch_output_time(struct record *rec)
237 {
238 	return rec->switch_output.time &&
239 	       trigger_is_ready(&switch_output_trigger);
240 }
241 
242 static u64 record__bytes_written(struct record *rec)
243 {
244 	return rec->bytes_written + rec->thread_bytes_written;
245 }
246 
247 static bool record__output_max_size_exceeded(struct record *rec)
248 {
249 	return rec->output_max_size &&
250 	       (record__bytes_written(rec) >= rec->output_max_size);
251 }
252 
253 static int record__write(struct record *rec, struct mmap *map __maybe_unused,
254 			 void *bf, size_t size)
255 {
256 	struct perf_data_file *file = &rec->session->data->file;
257 
258 	if (map && map->file)
259 		file = map->file;
260 
261 	if (perf_data_file__write(file, bf, size) < 0) {
262 		pr_err("failed to write perf data, error: %m\n");
263 		return -1;
264 	}
265 
266 	if (map && map->file) {
267 		thread->bytes_written += size;
268 		rec->thread_bytes_written += size;
269 	} else {
270 		rec->bytes_written += size;
271 	}
272 
273 	if (record__output_max_size_exceeded(rec) && !done) {
274 		fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
275 				" stopping session ]\n",
276 				record__bytes_written(rec) >> 10);
277 		done = 1;
278 	}
279 
280 	if (switch_output_size(rec))
281 		trigger_hit(&switch_output_trigger);
282 
283 	return 0;
284 }
285 
286 static int record__aio_enabled(struct record *rec);
287 static int record__comp_enabled(struct record *rec);
288 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map,
289 			    void *dst, size_t dst_size, void *src, size_t src_size);
290 
291 #ifdef HAVE_AIO_SUPPORT
292 static int record__aio_write(struct aiocb *cblock, int trace_fd,
293 		void *buf, size_t size, off_t off)
294 {
295 	int rc;
296 
297 	cblock->aio_fildes = trace_fd;
298 	cblock->aio_buf    = buf;
299 	cblock->aio_nbytes = size;
300 	cblock->aio_offset = off;
301 	cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
302 
303 	do {
304 		rc = aio_write(cblock);
305 		if (rc == 0) {
306 			break;
307 		} else if (errno != EAGAIN) {
308 			cblock->aio_fildes = -1;
309 			pr_err("failed to queue perf data, error: %m\n");
310 			break;
311 		}
312 	} while (1);
313 
314 	return rc;
315 }
316 
317 static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
318 {
319 	void *rem_buf;
320 	off_t rem_off;
321 	size_t rem_size;
322 	int rc, aio_errno;
323 	ssize_t aio_ret, written;
324 
325 	aio_errno = aio_error(cblock);
326 	if (aio_errno == EINPROGRESS)
327 		return 0;
328 
329 	written = aio_ret = aio_return(cblock);
330 	if (aio_ret < 0) {
331 		if (aio_errno != EINTR)
332 			pr_err("failed to write perf data, error: %m\n");
333 		written = 0;
334 	}
335 
336 	rem_size = cblock->aio_nbytes - written;
337 
338 	if (rem_size == 0) {
339 		cblock->aio_fildes = -1;
340 		/*
341 		 * md->refcount is incremented in record__aio_pushfn() for
342 		 * every aio write request started in record__aio_push() so
343 		 * decrement it because the request is now complete.
344 		 */
345 		perf_mmap__put(&md->core);
346 		rc = 1;
347 	} else {
348 		/*
349 		 * aio write request may require restart with the
350 		 * remainder if the kernel didn't write whole
351 		 * chunk at once.
352 		 */
353 		rem_off = cblock->aio_offset + written;
354 		rem_buf = (void *)(cblock->aio_buf + written);
355 		record__aio_write(cblock, cblock->aio_fildes,
356 				rem_buf, rem_size, rem_off);
357 		rc = 0;
358 	}
359 
360 	return rc;
361 }
362 
363 static int record__aio_sync(struct mmap *md, bool sync_all)
364 {
365 	struct aiocb **aiocb = md->aio.aiocb;
366 	struct aiocb *cblocks = md->aio.cblocks;
367 	struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
368 	int i, do_suspend;
369 
370 	do {
371 		do_suspend = 0;
372 		for (i = 0; i < md->aio.nr_cblocks; ++i) {
373 			if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
374 				if (sync_all)
375 					aiocb[i] = NULL;
376 				else
377 					return i;
378 			} else {
379 				/*
380 				 * Started aio write is not complete yet
381 				 * so it has to be waited before the
382 				 * next allocation.
383 				 */
384 				aiocb[i] = &cblocks[i];
385 				do_suspend = 1;
386 			}
387 		}
388 		if (!do_suspend)
389 			return -1;
390 
391 		while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
392 			if (!(errno == EAGAIN || errno == EINTR))
393 				pr_err("failed to sync perf data, error: %m\n");
394 		}
395 	} while (1);
396 }
397 
398 struct record_aio {
399 	struct record	*rec;
400 	void		*data;
401 	size_t		size;
402 };
403 
404 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
405 {
406 	struct record_aio *aio = to;
407 
408 	/*
409 	 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
410 	 * to release space in the kernel buffer as fast as possible, calling
411 	 * perf_mmap__consume() from perf_mmap__push() function.
412 	 *
413 	 * That lets the kernel to proceed with storing more profiling data into
414 	 * the kernel buffer earlier than other per-cpu kernel buffers are handled.
415 	 *
416 	 * Coping can be done in two steps in case the chunk of profiling data
417 	 * crosses the upper bound of the kernel buffer. In this case we first move
418 	 * part of data from map->start till the upper bound and then the remainder
419 	 * from the beginning of the kernel buffer till the end of the data chunk.
420 	 */
421 
422 	if (record__comp_enabled(aio->rec)) {
423 		ssize_t compressed = zstd_compress(aio->rec->session, NULL, aio->data + aio->size,
424 						   mmap__mmap_len(map) - aio->size,
425 						   buf, size);
426 		if (compressed < 0)
427 			return (int)compressed;
428 
429 		size = compressed;
430 	} else {
431 		memcpy(aio->data + aio->size, buf, size);
432 	}
433 
434 	if (!aio->size) {
435 		/*
436 		 * Increment map->refcount to guard map->aio.data[] buffer
437 		 * from premature deallocation because map object can be
438 		 * released earlier than aio write request started on
439 		 * map->aio.data[] buffer is complete.
440 		 *
441 		 * perf_mmap__put() is done at record__aio_complete()
442 		 * after started aio request completion or at record__aio_push()
443 		 * if the request failed to start.
444 		 */
445 		perf_mmap__get(&map->core);
446 	}
447 
448 	aio->size += size;
449 
450 	return size;
451 }
452 
453 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
454 {
455 	int ret, idx;
456 	int trace_fd = rec->session->data->file.fd;
457 	struct record_aio aio = { .rec = rec, .size = 0 };
458 
459 	/*
460 	 * Call record__aio_sync() to wait till map->aio.data[] buffer
461 	 * becomes available after previous aio write operation.
462 	 */
463 
464 	idx = record__aio_sync(map, false);
465 	aio.data = map->aio.data[idx];
466 	ret = perf_mmap__push(map, &aio, record__aio_pushfn);
467 	if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
468 		return ret;
469 
470 	rec->samples++;
471 	ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
472 	if (!ret) {
473 		*off += aio.size;
474 		rec->bytes_written += aio.size;
475 		if (switch_output_size(rec))
476 			trigger_hit(&switch_output_trigger);
477 	} else {
478 		/*
479 		 * Decrement map->refcount incremented in record__aio_pushfn()
480 		 * back if record__aio_write() operation failed to start, otherwise
481 		 * map->refcount is decremented in record__aio_complete() after
482 		 * aio write operation finishes successfully.
483 		 */
484 		perf_mmap__put(&map->core);
485 	}
486 
487 	return ret;
488 }
489 
490 static off_t record__aio_get_pos(int trace_fd)
491 {
492 	return lseek(trace_fd, 0, SEEK_CUR);
493 }
494 
495 static void record__aio_set_pos(int trace_fd, off_t pos)
496 {
497 	lseek(trace_fd, pos, SEEK_SET);
498 }
499 
500 static void record__aio_mmap_read_sync(struct record *rec)
501 {
502 	int i;
503 	struct evlist *evlist = rec->evlist;
504 	struct mmap *maps = evlist->mmap;
505 
506 	if (!record__aio_enabled(rec))
507 		return;
508 
509 	for (i = 0; i < evlist->core.nr_mmaps; i++) {
510 		struct mmap *map = &maps[i];
511 
512 		if (map->core.base)
513 			record__aio_sync(map, true);
514 	}
515 }
516 
517 static int nr_cblocks_default = 1;
518 static int nr_cblocks_max = 4;
519 
520 static int record__aio_parse(const struct option *opt,
521 			     const char *str,
522 			     int unset)
523 {
524 	struct record_opts *opts = (struct record_opts *)opt->value;
525 
526 	if (unset) {
527 		opts->nr_cblocks = 0;
528 	} else {
529 		if (str)
530 			opts->nr_cblocks = strtol(str, NULL, 0);
531 		if (!opts->nr_cblocks)
532 			opts->nr_cblocks = nr_cblocks_default;
533 	}
534 
535 	return 0;
536 }
537 #else /* HAVE_AIO_SUPPORT */
538 static int nr_cblocks_max = 0;
539 
540 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
541 			    off_t *off __maybe_unused)
542 {
543 	return -1;
544 }
545 
546 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
547 {
548 	return -1;
549 }
550 
551 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
552 {
553 }
554 
555 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
556 {
557 }
558 #endif
559 
560 static int record__aio_enabled(struct record *rec)
561 {
562 	return rec->opts.nr_cblocks > 0;
563 }
564 
565 #define MMAP_FLUSH_DEFAULT 1
566 static int record__mmap_flush_parse(const struct option *opt,
567 				    const char *str,
568 				    int unset)
569 {
570 	int flush_max;
571 	struct record_opts *opts = (struct record_opts *)opt->value;
572 	static struct parse_tag tags[] = {
573 			{ .tag  = 'B', .mult = 1       },
574 			{ .tag  = 'K', .mult = 1 << 10 },
575 			{ .tag  = 'M', .mult = 1 << 20 },
576 			{ .tag  = 'G', .mult = 1 << 30 },
577 			{ .tag  = 0 },
578 	};
579 
580 	if (unset)
581 		return 0;
582 
583 	if (str) {
584 		opts->mmap_flush = parse_tag_value(str, tags);
585 		if (opts->mmap_flush == (int)-1)
586 			opts->mmap_flush = strtol(str, NULL, 0);
587 	}
588 
589 	if (!opts->mmap_flush)
590 		opts->mmap_flush = MMAP_FLUSH_DEFAULT;
591 
592 	flush_max = evlist__mmap_size(opts->mmap_pages);
593 	flush_max /= 4;
594 	if (opts->mmap_flush > flush_max)
595 		opts->mmap_flush = flush_max;
596 
597 	return 0;
598 }
599 
600 #ifdef HAVE_ZSTD_SUPPORT
601 static unsigned int comp_level_default = 1;
602 
603 static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
604 {
605 	struct record_opts *opts = opt->value;
606 
607 	if (unset) {
608 		opts->comp_level = 0;
609 	} else {
610 		if (str)
611 			opts->comp_level = strtol(str, NULL, 0);
612 		if (!opts->comp_level)
613 			opts->comp_level = comp_level_default;
614 	}
615 
616 	return 0;
617 }
618 #endif
619 static unsigned int comp_level_max = 22;
620 
621 static int record__comp_enabled(struct record *rec)
622 {
623 	return rec->opts.comp_level > 0;
624 }
625 
626 static int process_synthesized_event(const struct perf_tool *tool,
627 				     union perf_event *event,
628 				     struct perf_sample *sample __maybe_unused,
629 				     struct machine *machine __maybe_unused)
630 {
631 	struct record *rec = container_of(tool, struct record, tool);
632 	return record__write(rec, NULL, event, event->header.size);
633 }
634 
635 static struct mutex synth_lock;
636 
637 static int process_locked_synthesized_event(const struct perf_tool *tool,
638 				     union perf_event *event,
639 				     struct perf_sample *sample __maybe_unused,
640 				     struct machine *machine __maybe_unused)
641 {
642 	int ret;
643 
644 	mutex_lock(&synth_lock);
645 	ret = process_synthesized_event(tool, event, sample, machine);
646 	mutex_unlock(&synth_lock);
647 	return ret;
648 }
649 
650 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
651 {
652 	struct record *rec = to;
653 
654 	if (record__comp_enabled(rec)) {
655 		struct perf_record_compressed2 *event = map->data;
656 		size_t padding = 0;
657 		u8 pad[8] = {0};
658 		ssize_t compressed = zstd_compress(rec->session, map, map->data,
659 						   mmap__mmap_len(map), bf, size);
660 
661 		if (compressed < 0)
662 			return (int)compressed;
663 
664 		bf = event;
665 		thread->samples++;
666 
667 		/*
668 		 * The record from `zstd_compress` is not 8 bytes aligned, which would cause asan
669 		 * error. We make it aligned here.
670 		 */
671 		event->data_size = compressed - sizeof(struct perf_record_compressed2);
672 		event->header.size = PERF_ALIGN(compressed, sizeof(u64));
673 		padding = event->header.size - compressed;
674 		return record__write(rec, map, bf, compressed) ||
675 		       record__write(rec, map, &pad, padding);
676 	}
677 
678 	thread->samples++;
679 	return record__write(rec, map, bf, size);
680 }
681 
682 static volatile sig_atomic_t signr = -1;
683 static volatile sig_atomic_t child_finished;
684 #ifdef HAVE_EVENTFD_SUPPORT
685 static volatile sig_atomic_t done_fd = -1;
686 #endif
687 
688 static void sig_handler(int sig)
689 {
690 	if (sig == SIGCHLD)
691 		child_finished = 1;
692 	else
693 		signr = sig;
694 
695 	done = 1;
696 #ifdef HAVE_EVENTFD_SUPPORT
697 	if (done_fd >= 0) {
698 		u64 tmp = 1;
699 		int orig_errno = errno;
700 
701 		/*
702 		 * It is possible for this signal handler to run after done is
703 		 * checked in the main loop, but before the perf counter fds are
704 		 * polled. If this happens, the poll() will continue to wait
705 		 * even though done is set, and will only break out if either
706 		 * another signal is received, or the counters are ready for
707 		 * read. To ensure the poll() doesn't sleep when done is set,
708 		 * use an eventfd (done_fd) to wake up the poll().
709 		 */
710 		if (write(done_fd, &tmp, sizeof(tmp)) < 0)
711 			pr_err("failed to signal wakeup fd, error: %m\n");
712 
713 		errno = orig_errno;
714 	}
715 #endif // HAVE_EVENTFD_SUPPORT
716 }
717 
718 static void sigsegv_handler(int sig)
719 {
720 	perf_hooks__recover();
721 	sighandler_dump_stack(sig);
722 }
723 
724 static void record__sig_exit(void)
725 {
726 	if (signr == -1)
727 		return;
728 
729 	signal(signr, SIG_DFL);
730 	raise(signr);
731 }
732 
733 #ifdef HAVE_AUXTRACE_SUPPORT
734 
735 static int record__process_auxtrace(const struct perf_tool *tool,
736 				    struct mmap *map,
737 				    union perf_event *event, void *data1,
738 				    size_t len1, void *data2, size_t len2)
739 {
740 	struct record *rec = container_of(tool, struct record, tool);
741 	struct perf_data *data = &rec->data;
742 	size_t padding;
743 	u8 pad[8] = {0};
744 
745 	if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
746 		off_t file_offset;
747 		int fd = perf_data__fd(data);
748 		int err;
749 
750 		file_offset = lseek(fd, 0, SEEK_CUR);
751 		if (file_offset == -1)
752 			return -1;
753 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
754 						     event, file_offset);
755 		if (err)
756 			return err;
757 	}
758 
759 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
760 	padding = (len1 + len2) & 7;
761 	if (padding)
762 		padding = 8 - padding;
763 
764 	record__write(rec, map, event, event->header.size);
765 	record__write(rec, map, data1, len1);
766 	if (len2)
767 		record__write(rec, map, data2, len2);
768 	record__write(rec, map, &pad, padding);
769 
770 	return 0;
771 }
772 
773 static int record__auxtrace_mmap_read(struct record *rec,
774 				      struct mmap *map)
775 {
776 	int ret;
777 
778 	ret = auxtrace_mmap__read(map, rec->itr,
779 				  perf_session__env(rec->session),
780 				  &rec->tool,
781 				  record__process_auxtrace);
782 	if (ret < 0)
783 		return ret;
784 
785 	if (ret)
786 		rec->samples++;
787 
788 	return 0;
789 }
790 
791 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
792 					       struct mmap *map)
793 {
794 	int ret;
795 
796 	ret = auxtrace_mmap__read_snapshot(map, rec->itr,
797 					   perf_session__env(rec->session),
798 					   &rec->tool,
799 					   record__process_auxtrace,
800 					   rec->opts.auxtrace_snapshot_size);
801 	if (ret < 0)
802 		return ret;
803 
804 	if (ret)
805 		rec->samples++;
806 
807 	return 0;
808 }
809 
810 static int record__auxtrace_read_snapshot_all(struct record *rec)
811 {
812 	int i;
813 	int rc = 0;
814 
815 	for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
816 		struct mmap *map = &rec->evlist->mmap[i];
817 
818 		if (!map->auxtrace_mmap.base)
819 			continue;
820 
821 		if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
822 			rc = -1;
823 			goto out;
824 		}
825 	}
826 out:
827 	return rc;
828 }
829 
830 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
831 {
832 	pr_debug("Recording AUX area tracing snapshot\n");
833 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
834 		trigger_error(&auxtrace_snapshot_trigger);
835 	} else {
836 		if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
837 			trigger_error(&auxtrace_snapshot_trigger);
838 		else
839 			trigger_ready(&auxtrace_snapshot_trigger);
840 	}
841 }
842 
843 static int record__auxtrace_snapshot_exit(struct record *rec)
844 {
845 	if (trigger_is_error(&auxtrace_snapshot_trigger))
846 		return 0;
847 
848 	if (!auxtrace_record__snapshot_started &&
849 	    auxtrace_record__snapshot_start(rec->itr))
850 		return -1;
851 
852 	record__read_auxtrace_snapshot(rec, true);
853 	if (trigger_is_error(&auxtrace_snapshot_trigger))
854 		return -1;
855 
856 	return 0;
857 }
858 
859 static int record__auxtrace_init(struct record *rec)
860 {
861 	int err;
862 
863 	if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts)
864 	    && record__threads_enabled(rec)) {
865 		pr_err("AUX area tracing options are not available in parallel streaming mode.\n");
866 		return -EINVAL;
867 	}
868 
869 	if (!rec->itr) {
870 		rec->itr = auxtrace_record__init(rec->evlist, &err);
871 		if (err)
872 			return err;
873 	}
874 
875 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
876 					      rec->opts.auxtrace_snapshot_opts);
877 	if (err)
878 		return err;
879 
880 	err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
881 					    rec->opts.auxtrace_sample_opts);
882 	if (err)
883 		return err;
884 
885 	err = auxtrace_parse_aux_action(rec->evlist);
886 	if (err)
887 		return err;
888 
889 	return auxtrace_parse_filters(rec->evlist);
890 }
891 
892 #else
893 
894 static inline
895 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
896 			       struct mmap *map __maybe_unused)
897 {
898 	return 0;
899 }
900 
901 static inline
902 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
903 				    bool on_exit __maybe_unused)
904 {
905 }
906 
907 static inline
908 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
909 {
910 	return 0;
911 }
912 
913 static inline
914 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
915 {
916 	return 0;
917 }
918 
919 static int record__auxtrace_init(struct record *rec __maybe_unused)
920 {
921 	return 0;
922 }
923 
924 #endif
925 
926 static int record__config_text_poke(struct evlist *evlist)
927 {
928 	struct evsel *evsel;
929 
930 	/* Nothing to do if text poke is already configured */
931 	evlist__for_each_entry(evlist, evsel) {
932 		if (evsel->core.attr.text_poke)
933 			return 0;
934 	}
935 
936 	evsel = evlist__add_dummy_on_all_cpus(evlist);
937 	if (!evsel)
938 		return -ENOMEM;
939 
940 	evsel->core.attr.text_poke = 1;
941 	evsel->core.attr.ksymbol = 1;
942 	evsel->immediate = true;
943 	evsel__set_sample_bit(evsel, TIME);
944 
945 	return 0;
946 }
947 
948 static int record__config_off_cpu(struct record *rec)
949 {
950 	return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts);
951 }
952 
953 static bool record__tracking_system_wide(struct record *rec)
954 {
955 	struct evlist *evlist = rec->evlist;
956 	struct evsel *evsel;
957 
958 	/*
959 	 * If non-dummy evsel exists, system_wide sideband is need to
960 	 * help parse sample information.
961 	 * For example, PERF_EVENT_MMAP event to help parse symbol,
962 	 * and PERF_EVENT_COMM event to help parse task executable name.
963 	 */
964 	evlist__for_each_entry(evlist, evsel) {
965 		if (!evsel__is_dummy_event(evsel))
966 			return true;
967 	}
968 
969 	return false;
970 }
971 
972 static int record__config_tracking_events(struct record *rec)
973 {
974 	struct record_opts *opts = &rec->opts;
975 	struct evlist *evlist = rec->evlist;
976 	bool system_wide = false;
977 	struct evsel *evsel;
978 
979 	/*
980 	 * For initial_delay, system wide or a hybrid system, we need to add
981 	 * tracking event so that we can track PERF_RECORD_MMAP to cover the
982 	 * delay of waiting or event synthesis.
983 	 */
984 	if (opts->target.initial_delay || target__has_cpu(&opts->target) ||
985 	    perf_pmus__num_core_pmus() > 1) {
986 		/*
987 		 * User space tasks can migrate between CPUs, so when tracing
988 		 * selected CPUs, sideband for all CPUs is still needed.
989 		 */
990 		if (!!opts->target.cpu_list && record__tracking_system_wide(rec))
991 			system_wide = true;
992 
993 		evsel = evlist__findnew_tracking_event(evlist, system_wide);
994 		if (!evsel)
995 			return -ENOMEM;
996 
997 		/*
998 		 * Enable the tracking event when the process is forked for
999 		 * initial_delay, immediately for system wide.
1000 		 */
1001 		if (opts->target.initial_delay && !evsel->immediate &&
1002 		    !target__has_cpu(&opts->target))
1003 			evsel->core.attr.enable_on_exec = 1;
1004 		else
1005 			evsel->immediate = 1;
1006 	}
1007 
1008 	return 0;
1009 }
1010 
1011 static bool record__kcore_readable(struct machine *machine)
1012 {
1013 	char kcore[PATH_MAX];
1014 	int fd;
1015 
1016 	scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
1017 
1018 	fd = open(kcore, O_RDONLY);
1019 	if (fd < 0)
1020 		return false;
1021 
1022 	close(fd);
1023 
1024 	return true;
1025 }
1026 
1027 static int record__kcore_copy(struct machine *machine, struct perf_data *data)
1028 {
1029 	char from_dir[PATH_MAX];
1030 	char kcore_dir[PATH_MAX];
1031 	int ret;
1032 
1033 	snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
1034 
1035 	ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
1036 	if (ret)
1037 		return ret;
1038 
1039 	return kcore_copy(from_dir, kcore_dir);
1040 }
1041 
1042 static void record__thread_data_init_pipes(struct record_thread *thread_data)
1043 {
1044 	thread_data->pipes.msg[0] = -1;
1045 	thread_data->pipes.msg[1] = -1;
1046 	thread_data->pipes.ack[0] = -1;
1047 	thread_data->pipes.ack[1] = -1;
1048 }
1049 
1050 static int record__thread_data_open_pipes(struct record_thread *thread_data)
1051 {
1052 	if (pipe(thread_data->pipes.msg))
1053 		return -EINVAL;
1054 
1055 	if (pipe(thread_data->pipes.ack)) {
1056 		close(thread_data->pipes.msg[0]);
1057 		thread_data->pipes.msg[0] = -1;
1058 		close(thread_data->pipes.msg[1]);
1059 		thread_data->pipes.msg[1] = -1;
1060 		return -EINVAL;
1061 	}
1062 
1063 	pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data,
1064 		 thread_data->pipes.msg[0], thread_data->pipes.msg[1],
1065 		 thread_data->pipes.ack[0], thread_data->pipes.ack[1]);
1066 
1067 	return 0;
1068 }
1069 
1070 static void record__thread_data_close_pipes(struct record_thread *thread_data)
1071 {
1072 	if (thread_data->pipes.msg[0] != -1) {
1073 		close(thread_data->pipes.msg[0]);
1074 		thread_data->pipes.msg[0] = -1;
1075 	}
1076 	if (thread_data->pipes.msg[1] != -1) {
1077 		close(thread_data->pipes.msg[1]);
1078 		thread_data->pipes.msg[1] = -1;
1079 	}
1080 	if (thread_data->pipes.ack[0] != -1) {
1081 		close(thread_data->pipes.ack[0]);
1082 		thread_data->pipes.ack[0] = -1;
1083 	}
1084 	if (thread_data->pipes.ack[1] != -1) {
1085 		close(thread_data->pipes.ack[1]);
1086 		thread_data->pipes.ack[1] = -1;
1087 	}
1088 }
1089 
1090 static bool evlist__per_thread(struct evlist *evlist)
1091 {
1092 	return cpu_map__is_dummy(evlist->core.user_requested_cpus);
1093 }
1094 
1095 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist)
1096 {
1097 	int m, tm, nr_mmaps = evlist->core.nr_mmaps;
1098 	struct mmap *mmap = evlist->mmap;
1099 	struct mmap *overwrite_mmap = evlist->overwrite_mmap;
1100 	struct perf_cpu_map *cpus = evlist->core.all_cpus;
1101 	bool per_thread = evlist__per_thread(evlist);
1102 
1103 	if (per_thread)
1104 		thread_data->nr_mmaps = nr_mmaps;
1105 	else
1106 		thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
1107 						      thread_data->mask->maps.nbits);
1108 	if (mmap) {
1109 		thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1110 		if (!thread_data->maps)
1111 			return -ENOMEM;
1112 	}
1113 	if (overwrite_mmap) {
1114 		thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1115 		if (!thread_data->overwrite_maps) {
1116 			zfree(&thread_data->maps);
1117 			return -ENOMEM;
1118 		}
1119 	}
1120 	pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data,
1121 		 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
1122 
1123 	for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
1124 		if (per_thread ||
1125 		    test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) {
1126 			if (thread_data->maps) {
1127 				thread_data->maps[tm] = &mmap[m];
1128 				pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
1129 					  thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1130 			}
1131 			if (thread_data->overwrite_maps) {
1132 				thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
1133 				pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
1134 					  thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1135 			}
1136 			tm++;
1137 		}
1138 	}
1139 
1140 	return 0;
1141 }
1142 
1143 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist)
1144 {
1145 	int f, tm, pos;
1146 	struct mmap *map, *overwrite_map;
1147 
1148 	fdarray__init(&thread_data->pollfd, 64);
1149 
1150 	for (tm = 0; tm < thread_data->nr_mmaps; tm++) {
1151 		map = thread_data->maps ? thread_data->maps[tm] : NULL;
1152 		overwrite_map = thread_data->overwrite_maps ?
1153 				thread_data->overwrite_maps[tm] : NULL;
1154 
1155 		for (f = 0; f < evlist->core.pollfd.nr; f++) {
1156 			void *ptr = evlist->core.pollfd.priv[f].ptr;
1157 
1158 			if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) {
1159 				pos = fdarray__dup_entry_from(&thread_data->pollfd, f,
1160 							      &evlist->core.pollfd);
1161 				if (pos < 0)
1162 					return pos;
1163 				pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n",
1164 					 thread_data, pos, evlist->core.pollfd.entries[f].fd);
1165 			}
1166 		}
1167 	}
1168 
1169 	return 0;
1170 }
1171 
1172 static void record__free_thread_data(struct record *rec)
1173 {
1174 	int t;
1175 	struct record_thread *thread_data = rec->thread_data;
1176 
1177 	if (thread_data == NULL)
1178 		return;
1179 
1180 	for (t = 0; t < rec->nr_threads; t++) {
1181 		record__thread_data_close_pipes(&thread_data[t]);
1182 		zfree(&thread_data[t].maps);
1183 		zfree(&thread_data[t].overwrite_maps);
1184 		fdarray__exit(&thread_data[t].pollfd);
1185 	}
1186 
1187 	zfree(&rec->thread_data);
1188 }
1189 
1190 static int record__map_thread_evlist_pollfd_indexes(struct record *rec,
1191 						    int evlist_pollfd_index,
1192 						    int thread_pollfd_index)
1193 {
1194 	size_t x = rec->index_map_cnt;
1195 
1196 	if (realloc_array_as_needed(rec->index_map, rec->index_map_sz, x, NULL))
1197 		return -ENOMEM;
1198 	rec->index_map[x].evlist_pollfd_index = evlist_pollfd_index;
1199 	rec->index_map[x].thread_pollfd_index = thread_pollfd_index;
1200 	rec->index_map_cnt += 1;
1201 	return 0;
1202 }
1203 
1204 static int record__update_evlist_pollfd_from_thread(struct record *rec,
1205 						    struct evlist *evlist,
1206 						    struct record_thread *thread_data)
1207 {
1208 	struct pollfd *e_entries = evlist->core.pollfd.entries;
1209 	struct pollfd *t_entries = thread_data->pollfd.entries;
1210 	int err = 0;
1211 	size_t i;
1212 
1213 	for (i = 0; i < rec->index_map_cnt; i++) {
1214 		int e_pos = rec->index_map[i].evlist_pollfd_index;
1215 		int t_pos = rec->index_map[i].thread_pollfd_index;
1216 
1217 		if (e_entries[e_pos].fd != t_entries[t_pos].fd ||
1218 		    e_entries[e_pos].events != t_entries[t_pos].events) {
1219 			pr_err("Thread and evlist pollfd index mismatch\n");
1220 			err = -EINVAL;
1221 			continue;
1222 		}
1223 		e_entries[e_pos].revents = t_entries[t_pos].revents;
1224 	}
1225 	return err;
1226 }
1227 
1228 static int record__dup_non_perf_events(struct record *rec,
1229 				       struct evlist *evlist,
1230 				       struct record_thread *thread_data)
1231 {
1232 	struct fdarray *fda = &evlist->core.pollfd;
1233 	int i, ret;
1234 
1235 	for (i = 0; i < fda->nr; i++) {
1236 		if (!(fda->priv[i].flags & fdarray_flag__non_perf_event))
1237 			continue;
1238 		ret = fdarray__dup_entry_from(&thread_data->pollfd, i, fda);
1239 		if (ret < 0) {
1240 			pr_err("Failed to duplicate descriptor in main thread pollfd\n");
1241 			return ret;
1242 		}
1243 		pr_debug2("thread_data[%p]: pollfd[%d] <- non_perf_event fd=%d\n",
1244 			  thread_data, ret, fda->entries[i].fd);
1245 		ret = record__map_thread_evlist_pollfd_indexes(rec, i, ret);
1246 		if (ret < 0) {
1247 			pr_err("Failed to map thread and evlist pollfd indexes\n");
1248 			return ret;
1249 		}
1250 	}
1251 	return 0;
1252 }
1253 
1254 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist)
1255 {
1256 	int t, ret;
1257 	struct record_thread *thread_data;
1258 
1259 	rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data)));
1260 	if (!rec->thread_data) {
1261 		pr_err("Failed to allocate thread data\n");
1262 		return -ENOMEM;
1263 	}
1264 	thread_data = rec->thread_data;
1265 
1266 	for (t = 0; t < rec->nr_threads; t++)
1267 		record__thread_data_init_pipes(&thread_data[t]);
1268 
1269 	for (t = 0; t < rec->nr_threads; t++) {
1270 		thread_data[t].rec = rec;
1271 		thread_data[t].mask = &rec->thread_masks[t];
1272 		ret = record__thread_data_init_maps(&thread_data[t], evlist);
1273 		if (ret) {
1274 			pr_err("Failed to initialize thread[%d] maps\n", t);
1275 			goto out_free;
1276 		}
1277 		ret = record__thread_data_init_pollfd(&thread_data[t], evlist);
1278 		if (ret) {
1279 			pr_err("Failed to initialize thread[%d] pollfd\n", t);
1280 			goto out_free;
1281 		}
1282 		if (t) {
1283 			thread_data[t].tid = -1;
1284 			ret = record__thread_data_open_pipes(&thread_data[t]);
1285 			if (ret) {
1286 				pr_err("Failed to open thread[%d] communication pipes\n", t);
1287 				goto out_free;
1288 			}
1289 			ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0],
1290 					   POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable);
1291 			if (ret < 0) {
1292 				pr_err("Failed to add descriptor to thread[%d] pollfd\n", t);
1293 				goto out_free;
1294 			}
1295 			thread_data[t].ctlfd_pos = ret;
1296 			pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1297 				 thread_data, thread_data[t].ctlfd_pos,
1298 				 thread_data[t].pipes.msg[0]);
1299 		} else {
1300 			thread_data[t].tid = gettid();
1301 
1302 			ret = record__dup_non_perf_events(rec, evlist, &thread_data[t]);
1303 			if (ret < 0)
1304 				goto out_free;
1305 
1306 			thread_data[t].ctlfd_pos = -1; /* Not used */
1307 		}
1308 	}
1309 
1310 	return 0;
1311 
1312 out_free:
1313 	record__free_thread_data(rec);
1314 
1315 	return ret;
1316 }
1317 
1318 static int record__mmap_evlist(struct record *rec,
1319 			       struct evlist *evlist)
1320 {
1321 	int i, ret;
1322 	struct record_opts *opts = &rec->opts;
1323 	bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
1324 				  opts->auxtrace_sample_mode;
1325 	char msg[512];
1326 
1327 	if (opts->affinity != PERF_AFFINITY_SYS)
1328 		cpu__setup_cpunode_map();
1329 
1330 	if (evlist__mmap_ex(evlist, opts->mmap_pages,
1331 				 opts->auxtrace_mmap_pages,
1332 				 auxtrace_overwrite,
1333 				 opts->nr_cblocks, opts->affinity,
1334 				 opts->mmap_flush, opts->comp_level) < 0) {
1335 		if (errno == EPERM) {
1336 			pr_err("Permission error mapping pages.\n"
1337 			       "Consider increasing "
1338 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
1339 			       "or try again with a smaller value of -m/--mmap_pages.\n"
1340 			       "(current value: %u,%u)\n",
1341 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
1342 			return -errno;
1343 		} else {
1344 			pr_err("failed to mmap with %d (%s)\n", errno,
1345 				str_error_r(errno, msg, sizeof(msg)));
1346 			if (errno)
1347 				return -errno;
1348 			else
1349 				return -EINVAL;
1350 		}
1351 	}
1352 
1353 	if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack))
1354 		return -1;
1355 
1356 	ret = record__alloc_thread_data(rec, evlist);
1357 	if (ret)
1358 		return ret;
1359 
1360 	if (record__threads_enabled(rec)) {
1361 		ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps);
1362 		if (ret) {
1363 			pr_err("Failed to create data directory: %s\n", strerror(-ret));
1364 			return ret;
1365 		}
1366 		for (i = 0; i < evlist->core.nr_mmaps; i++) {
1367 			if (evlist->mmap)
1368 				evlist->mmap[i].file = &rec->data.dir.files[i];
1369 			if (evlist->overwrite_mmap)
1370 				evlist->overwrite_mmap[i].file = &rec->data.dir.files[i];
1371 		}
1372 	}
1373 
1374 	return 0;
1375 }
1376 
1377 static int record__mmap(struct record *rec)
1378 {
1379 	return record__mmap_evlist(rec, rec->evlist);
1380 }
1381 
1382 static int record__open(struct record *rec)
1383 {
1384 	char msg[BUFSIZ];
1385 	struct evsel *pos;
1386 	struct evlist *evlist = rec->evlist;
1387 	struct perf_session *session = rec->session;
1388 	struct record_opts *opts = &rec->opts;
1389 	int rc = 0;
1390 	bool skipped = false;
1391 	bool removed_tracking = false;
1392 
1393 	evlist__for_each_entry(evlist, pos) {
1394 		if (removed_tracking) {
1395 			/*
1396 			 * Normally the head of the list has tracking enabled
1397 			 * for sideband data like mmaps. If this event is
1398 			 * removed, make sure to add tracking to the next
1399 			 * processed event.
1400 			 */
1401 			if (!pos->tracking) {
1402 				pos->tracking = true;
1403 				evsel__config(pos, opts, &callchain_param);
1404 			}
1405 			removed_tracking = false;
1406 		}
1407 try_again:
1408 		if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
1409 			bool report_error = true;
1410 
1411 			if (evsel__fallback(pos, &opts->target, errno, msg, sizeof(msg))) {
1412 				if (verbose > 0)
1413 					ui__warning("%s\n", msg);
1414 				goto try_again;
1415 			}
1416 			if ((errno == EINVAL || errno == EBADF) &&
1417 			    pos->core.leader != &pos->core &&
1418 			    pos->weak_group) {
1419 			        pos = evlist__reset_weak_group(evlist, pos, true);
1420 				goto try_again;
1421 			}
1422 #if defined(__aarch64__) || defined(__arm__)
1423 			if (strstr(evsel__name(pos), "cycles")) {
1424 				struct evsel *pos2;
1425 				/*
1426 				 * Unfortunately ARM has many events named
1427 				 * "cycles" on PMUs like the system-level (L3)
1428 				 * cache which don't support sampling. Only
1429 				 * display such failures to open when there is
1430 				 * only 1 cycles event or verbose is enabled.
1431 				 */
1432 				evlist__for_each_entry(evlist, pos2) {
1433 					if (pos2 == pos)
1434 						continue;
1435 					if (strstr(evsel__name(pos2), "cycles")) {
1436 						report_error = false;
1437 						break;
1438 					}
1439 				}
1440 			}
1441 #endif
1442 			if (report_error || verbose > 0) {
1443 				ui__error("Failure to open event '%s' on PMU '%s' which will be "
1444 					  "removed.\n%s\n",
1445 					  evsel__name(pos), evsel__pmu_name(pos), msg);
1446 			}
1447 			if (pos->tracking)
1448 				removed_tracking = true;
1449 			pos->skippable = true;
1450 			skipped = true;
1451 		}
1452 	}
1453 
1454 	if (skipped) {
1455 		struct evsel *tmp;
1456 		int idx = 0;
1457 		bool evlist_empty = true;
1458 
1459 		/* Remove evsels that failed to open and update indices. */
1460 		evlist__for_each_entry_safe(evlist, tmp, pos) {
1461 			if (pos->skippable) {
1462 				evlist__remove(evlist, pos);
1463 				continue;
1464 			}
1465 
1466 			/*
1467 			 * Note, dummy events may be command line parsed or
1468 			 * added by the tool. We care about supporting `perf
1469 			 * record -e dummy` which may be used as a permission
1470 			 * check. Dummy events that are added to the command
1471 			 * line and opened along with other events that fail,
1472 			 * will still fail as if the dummy events were tool
1473 			 * added events for the sake of code simplicity.
1474 			 */
1475 			if (!evsel__is_dummy_event(pos))
1476 				evlist_empty = false;
1477 		}
1478 		evlist__for_each_entry(evlist, pos) {
1479 			pos->core.idx = idx++;
1480 		}
1481 		/* If list is empty then fail. */
1482 		if (evlist_empty) {
1483 			ui__error("Failure to open any events for recording.\n");
1484 			rc = -1;
1485 			goto out;
1486 		}
1487 	}
1488 	if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {
1489 		pr_warning(
1490 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1491 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1492 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1493 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1494 "Samples in kernel modules won't be resolved at all.\n\n"
1495 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1496 "even with a suitable vmlinux or kallsyms file.\n\n");
1497 	}
1498 
1499 	if (evlist__apply_filters(evlist, &pos, &opts->target)) {
1500 		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
1501 			pos->filter ?: "BPF", evsel__name(pos), errno,
1502 			str_error_r(errno, msg, sizeof(msg)));
1503 		rc = -1;
1504 		goto out;
1505 	}
1506 
1507 	rc = record__mmap(rec);
1508 	if (rc)
1509 		goto out;
1510 
1511 	session->evlist = evlist;
1512 	perf_session__set_id_hdr_size(session);
1513 out:
1514 	return rc;
1515 }
1516 
1517 static void set_timestamp_boundary(struct record *rec, u64 sample_time)
1518 {
1519 	if (rec->evlist->first_sample_time == 0)
1520 		rec->evlist->first_sample_time = sample_time;
1521 
1522 	if (sample_time)
1523 		rec->evlist->last_sample_time = sample_time;
1524 }
1525 
1526 static int process_sample_event(const struct perf_tool *tool,
1527 				union perf_event *event,
1528 				struct perf_sample *sample,
1529 				struct evsel *evsel,
1530 				struct machine *machine)
1531 {
1532 	struct record *rec = container_of(tool, struct record, tool);
1533 
1534 	set_timestamp_boundary(rec, sample->time);
1535 
1536 	if (rec->buildid_all)
1537 		return 0;
1538 
1539 	rec->samples++;
1540 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
1541 }
1542 
1543 static int process_buildids(struct record *rec)
1544 {
1545 	struct perf_session *session = rec->session;
1546 
1547 	if (perf_data__size(&rec->data) == 0)
1548 		return 0;
1549 
1550 	/*
1551 	 * During this process, it'll load kernel map and replace the
1552 	 * dso->long_name to a real pathname it found.  In this case
1553 	 * we prefer the vmlinux path like
1554 	 *   /lib/modules/3.16.4/build/vmlinux
1555 	 *
1556 	 * rather than build-id path (in debug directory).
1557 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
1558 	 */
1559 	symbol_conf.ignore_vmlinux_buildid = true;
1560 
1561 	/*
1562 	 * If --buildid-all is given, it marks all DSO regardless of hits,
1563 	 * so no need to process samples. But if timestamp_boundary is enabled,
1564 	 * it still needs to walk on all samples to get the timestamps of
1565 	 * first/last samples.
1566 	 */
1567 	if (rec->buildid_all && !rec->timestamp_boundary)
1568 		rec->tool.sample = process_event_sample_stub;
1569 
1570 	return perf_session__process_events(session);
1571 }
1572 
1573 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
1574 {
1575 	int err;
1576 	struct perf_tool *tool = data;
1577 	/*
1578 	 *As for guest kernel when processing subcommand record&report,
1579 	 *we arrange module mmap prior to guest kernel mmap and trigger
1580 	 *a preload dso because default guest module symbols are loaded
1581 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
1582 	 *method is used to avoid symbol missing when the first addr is
1583 	 *in module instead of in guest kernel.
1584 	 */
1585 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
1586 					     machine);
1587 	if (err < 0)
1588 		pr_err("Couldn't record guest kernel [%d]'s reference"
1589 		       " relocation symbol.\n", machine->pid);
1590 
1591 	/*
1592 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
1593 	 * have no _text sometimes.
1594 	 */
1595 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1596 						 machine);
1597 	if (err < 0)
1598 		pr_err("Couldn't record guest kernel [%d]'s reference"
1599 		       " relocation symbol.\n", machine->pid);
1600 }
1601 
1602 static struct perf_event_header finished_round_event = {
1603 	.size = sizeof(struct perf_event_header),
1604 	.type = PERF_RECORD_FINISHED_ROUND,
1605 };
1606 
1607 static struct perf_event_header finished_init_event = {
1608 	.size = sizeof(struct perf_event_header),
1609 	.type = PERF_RECORD_FINISHED_INIT,
1610 };
1611 
1612 static void record__adjust_affinity(struct record *rec, struct mmap *map)
1613 {
1614 	if (rec->opts.affinity != PERF_AFFINITY_SYS &&
1615 	    !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits,
1616 			  thread->mask->affinity.nbits)) {
1617 		bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits);
1618 		bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits,
1619 			  map->affinity_mask.bits, thread->mask->affinity.nbits);
1620 		sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
1621 					(cpu_set_t *)thread->mask->affinity.bits);
1622 		if (verbose == 2) {
1623 			pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu());
1624 			mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity");
1625 		}
1626 	}
1627 }
1628 
1629 static size_t process_comp_header(void *record, size_t increment)
1630 {
1631 	struct perf_record_compressed2 *event = record;
1632 	size_t size = sizeof(*event);
1633 
1634 	if (increment) {
1635 		event->header.size += increment;
1636 		return increment;
1637 	}
1638 
1639 	event->header.type = PERF_RECORD_COMPRESSED2;
1640 	event->header.size = size;
1641 
1642 	return size;
1643 }
1644 
1645 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map,
1646 			    void *dst, size_t dst_size, void *src, size_t src_size)
1647 {
1648 	ssize_t compressed;
1649 	size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed2) - 1;
1650 	struct zstd_data *zstd_data = &session->zstd_data;
1651 
1652 	if (map && map->file)
1653 		zstd_data = &map->zstd_data;
1654 
1655 	compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size,
1656 						     max_record_size, process_comp_header);
1657 	if (compressed < 0)
1658 		return compressed;
1659 
1660 	if (map && map->file) {
1661 		thread->bytes_transferred += src_size;
1662 		thread->bytes_compressed  += compressed;
1663 	} else {
1664 		session->bytes_transferred += src_size;
1665 		session->bytes_compressed  += compressed;
1666 	}
1667 
1668 	return compressed;
1669 }
1670 
1671 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1672 				    bool overwrite, bool synch)
1673 {
1674 	u64 bytes_written = rec->bytes_written;
1675 	int i;
1676 	int rc = 0;
1677 	int nr_mmaps;
1678 	struct mmap **maps;
1679 	int trace_fd = rec->data.file.fd;
1680 	off_t off = 0;
1681 
1682 	if (!evlist)
1683 		return 0;
1684 
1685 	nr_mmaps = thread->nr_mmaps;
1686 	maps = overwrite ? thread->overwrite_maps : thread->maps;
1687 
1688 	if (!maps)
1689 		return 0;
1690 
1691 	if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1692 		return 0;
1693 
1694 	if (record__aio_enabled(rec))
1695 		off = record__aio_get_pos(trace_fd);
1696 
1697 	for (i = 0; i < nr_mmaps; i++) {
1698 		u64 flush = 0;
1699 		struct mmap *map = maps[i];
1700 
1701 		if (map->core.base) {
1702 			record__adjust_affinity(rec, map);
1703 			if (synch) {
1704 				flush = map->core.flush;
1705 				map->core.flush = 1;
1706 			}
1707 			if (!record__aio_enabled(rec)) {
1708 				if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1709 					if (synch)
1710 						map->core.flush = flush;
1711 					rc = -1;
1712 					goto out;
1713 				}
1714 			} else {
1715 				if (record__aio_push(rec, map, &off) < 0) {
1716 					record__aio_set_pos(trace_fd, off);
1717 					if (synch)
1718 						map->core.flush = flush;
1719 					rc = -1;
1720 					goto out;
1721 				}
1722 			}
1723 			if (synch)
1724 				map->core.flush = flush;
1725 		}
1726 
1727 		if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1728 		    !rec->opts.auxtrace_sample_mode &&
1729 		    record__auxtrace_mmap_read(rec, map) != 0) {
1730 			rc = -1;
1731 			goto out;
1732 		}
1733 	}
1734 
1735 	if (record__aio_enabled(rec))
1736 		record__aio_set_pos(trace_fd, off);
1737 
1738 	/*
1739 	 * Mark the round finished in case we wrote
1740 	 * at least one event.
1741 	 *
1742 	 * No need for round events in directory mode,
1743 	 * because per-cpu maps and files have data
1744 	 * sorted by kernel.
1745 	 */
1746 	if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written)
1747 		rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1748 
1749 	if (overwrite)
1750 		evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1751 out:
1752 	return rc;
1753 }
1754 
1755 static int record__mmap_read_all(struct record *rec, bool synch)
1756 {
1757 	int err;
1758 
1759 	err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1760 	if (err)
1761 		return err;
1762 
1763 	return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1764 }
1765 
1766 static void record__thread_munmap_filtered(struct fdarray *fda, int fd,
1767 					   void *arg __maybe_unused)
1768 {
1769 	struct perf_mmap *map = fda->priv[fd].ptr;
1770 
1771 	if (map)
1772 		perf_mmap__put(map);
1773 }
1774 
1775 static void *record__thread(void *arg)
1776 {
1777 	enum thread_msg msg = THREAD_MSG__READY;
1778 	bool terminate = false;
1779 	struct fdarray *pollfd;
1780 	int err, ctlfd_pos;
1781 
1782 	thread = arg;
1783 	thread->tid = gettid();
1784 
1785 	err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1786 	if (err == -1)
1787 		pr_warning("threads[%d]: failed to notify on start: %s\n",
1788 			   thread->tid, strerror(errno));
1789 
1790 	pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
1791 
1792 	pollfd = &thread->pollfd;
1793 	ctlfd_pos = thread->ctlfd_pos;
1794 
1795 	for (;;) {
1796 		unsigned long long hits = thread->samples;
1797 
1798 		if (record__mmap_read_all(thread->rec, false) < 0 || terminate)
1799 			break;
1800 
1801 		if (hits == thread->samples) {
1802 
1803 			err = fdarray__poll(pollfd, -1);
1804 			/*
1805 			 * Propagate error, only if there's any. Ignore positive
1806 			 * number of returned events and interrupt error.
1807 			 */
1808 			if (err > 0 || (err < 0 && errno == EINTR))
1809 				err = 0;
1810 			thread->waking++;
1811 
1812 			if (fdarray__filter(pollfd, POLLERR | POLLHUP,
1813 					    record__thread_munmap_filtered, NULL) == 0)
1814 				break;
1815 		}
1816 
1817 		if (pollfd->entries[ctlfd_pos].revents & POLLHUP) {
1818 			terminate = true;
1819 			close(thread->pipes.msg[0]);
1820 			thread->pipes.msg[0] = -1;
1821 			pollfd->entries[ctlfd_pos].fd = -1;
1822 			pollfd->entries[ctlfd_pos].events = 0;
1823 		}
1824 
1825 		pollfd->entries[ctlfd_pos].revents = 0;
1826 	}
1827 	record__mmap_read_all(thread->rec, true);
1828 
1829 	err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1830 	if (err == -1)
1831 		pr_warning("threads[%d]: failed to notify on termination: %s\n",
1832 			   thread->tid, strerror(errno));
1833 
1834 	return NULL;
1835 }
1836 
1837 static void record__init_features(struct record *rec)
1838 {
1839 	struct perf_session *session = rec->session;
1840 	int feat;
1841 
1842 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1843 		perf_header__set_feat(&session->header, feat);
1844 
1845 	if (rec->no_buildid)
1846 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1847 
1848 	if (!have_tracepoints(&rec->evlist->core.entries))
1849 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1850 
1851 	if (!rec->opts.branch_stack)
1852 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1853 
1854 	if (!rec->opts.full_auxtrace)
1855 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1856 
1857 	if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1858 		perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1859 
1860 	if (!rec->opts.use_clockid)
1861 		perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA);
1862 
1863 	if (!record__threads_enabled(rec))
1864 		perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1865 
1866 	if (!record__comp_enabled(rec))
1867 		perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1868 
1869 	perf_header__clear_feat(&session->header, HEADER_STAT);
1870 }
1871 
1872 static void
1873 record__finish_output(struct record *rec)
1874 {
1875 	int i;
1876 	struct perf_data *data = &rec->data;
1877 	int fd = perf_data__fd(data);
1878 
1879 	if (data->is_pipe) {
1880 		/* Just to display approx. size */
1881 		data->file.size = rec->bytes_written;
1882 		return;
1883 	}
1884 
1885 	rec->session->header.data_size += rec->bytes_written;
1886 	data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1887 	if (record__threads_enabled(rec)) {
1888 		for (i = 0; i < data->dir.nr; i++)
1889 			data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR);
1890 	}
1891 
1892 	/* Buildid scanning disabled or build ID in kernel and synthesized map events. */
1893 	if (!rec->no_buildid) {
1894 		process_buildids(rec);
1895 
1896 		if (rec->buildid_all)
1897 			perf_session__dsos_hit_all(rec->session);
1898 	}
1899 	perf_session__write_header(rec->session, rec->evlist, fd, true);
1900 
1901 	return;
1902 }
1903 
1904 static int record__synthesize_workload(struct record *rec, bool tail)
1905 {
1906 	int err;
1907 	struct perf_thread_map *thread_map;
1908 	bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1909 
1910 	if (rec->opts.tail_synthesize != tail)
1911 		return 0;
1912 
1913 	thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1914 	if (thread_map == NULL)
1915 		return -1;
1916 
1917 	err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1918 						 process_synthesized_event,
1919 						 &rec->session->machines.host,
1920 						 needs_mmap,
1921 						 rec->opts.sample_address);
1922 	perf_thread_map__put(thread_map);
1923 	return err;
1924 }
1925 
1926 static int write_finished_init(struct record *rec, bool tail)
1927 {
1928 	if (rec->opts.tail_synthesize != tail)
1929 		return 0;
1930 
1931 	return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event));
1932 }
1933 
1934 static int record__synthesize(struct record *rec, bool tail);
1935 
1936 static int
1937 record__switch_output(struct record *rec, bool at_exit)
1938 {
1939 	struct perf_data *data = &rec->data;
1940 	char *new_filename = NULL;
1941 	int fd, err;
1942 
1943 	/* Same Size:      "2015122520103046"*/
1944 	char timestamp[] = "InvalidTimestamp";
1945 
1946 	record__aio_mmap_read_sync(rec);
1947 
1948 	write_finished_init(rec, true);
1949 
1950 	record__synthesize(rec, true);
1951 	if (target__none(&rec->opts.target))
1952 		record__synthesize_workload(rec, true);
1953 
1954 	rec->samples = 0;
1955 	record__finish_output(rec);
1956 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1957 	if (err) {
1958 		pr_err("Failed to get current timestamp\n");
1959 		return -EINVAL;
1960 	}
1961 
1962 	fd = perf_data__switch(data, timestamp,
1963 			       rec->session->header.data_offset,
1964 			       at_exit, &new_filename);
1965 	if (fd >= 0 && !at_exit) {
1966 		rec->bytes_written = 0;
1967 		rec->session->header.data_size = 0;
1968 	}
1969 
1970 	if (!quiet) {
1971 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1972 			data->path, timestamp);
1973 	}
1974 
1975 	if (rec->switch_output.num_files) {
1976 		int n = rec->switch_output.cur_file + 1;
1977 
1978 		if (n >= rec->switch_output.num_files)
1979 			n = 0;
1980 		rec->switch_output.cur_file = n;
1981 		if (rec->switch_output.filenames[n]) {
1982 			remove(rec->switch_output.filenames[n]);
1983 			zfree(&rec->switch_output.filenames[n]);
1984 		}
1985 		rec->switch_output.filenames[n] = new_filename;
1986 	} else {
1987 		free(new_filename);
1988 	}
1989 
1990 	/* Output tracking events */
1991 	if (!at_exit) {
1992 		record__synthesize(rec, false);
1993 
1994 		/*
1995 		 * In 'perf record --switch-output' without -a,
1996 		 * record__synthesize() in record__switch_output() won't
1997 		 * generate tracking events because there's no thread_map
1998 		 * in evlist. Which causes newly created perf.data doesn't
1999 		 * contain map and comm information.
2000 		 * Create a fake thread_map and directly call
2001 		 * perf_event__synthesize_thread_map() for those events.
2002 		 */
2003 		if (target__none(&rec->opts.target))
2004 			record__synthesize_workload(rec, false);
2005 		write_finished_init(rec, false);
2006 	}
2007 	return fd;
2008 }
2009 
2010 static void __record__save_lost_samples(struct record *rec, struct evsel *evsel,
2011 					struct perf_record_lost_samples *lost,
2012 					int cpu_idx, int thread_idx, u64 lost_count,
2013 					u16 misc_flag)
2014 {
2015 	struct perf_sample_id *sid;
2016 	struct perf_sample sample;
2017 	int id_hdr_size;
2018 
2019 	perf_sample__init(&sample, /*all=*/true);
2020 	lost->lost = lost_count;
2021 	if (evsel->core.ids) {
2022 		sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx);
2023 		sample.id = sid->id;
2024 	}
2025 
2026 	id_hdr_size = perf_event__synthesize_id_sample((void *)(lost + 1),
2027 						       evsel->core.attr.sample_type, &sample);
2028 	lost->header.size = sizeof(*lost) + id_hdr_size;
2029 	lost->header.misc = misc_flag;
2030 	record__write(rec, NULL, lost, lost->header.size);
2031 	perf_sample__exit(&sample);
2032 }
2033 
2034 static void record__read_lost_samples(struct record *rec)
2035 {
2036 	struct perf_session *session = rec->session;
2037 	struct perf_record_lost_samples_and_ids lost;
2038 	struct evsel *evsel;
2039 
2040 	/* there was an error during record__open */
2041 	if (session->evlist == NULL)
2042 		return;
2043 
2044 	evlist__for_each_entry(session->evlist, evsel) {
2045 		struct xyarray *xy = evsel->core.sample_id;
2046 		u64 lost_count;
2047 
2048 		if (xy == NULL || evsel->core.fd == NULL)
2049 			continue;
2050 		if (xyarray__max_x(evsel->core.fd) != xyarray__max_x(xy) ||
2051 		    xyarray__max_y(evsel->core.fd) != xyarray__max_y(xy)) {
2052 			pr_debug("Unmatched FD vs. sample ID: skip reading LOST count\n");
2053 			continue;
2054 		}
2055 
2056 		for (int x = 0; x < xyarray__max_x(xy); x++) {
2057 			for (int y = 0; y < xyarray__max_y(xy); y++) {
2058 				struct perf_counts_values count;
2059 
2060 				if (perf_evsel__read(&evsel->core, x, y, &count) < 0) {
2061 					pr_debug("read LOST count failed\n");
2062 					return;
2063 				}
2064 
2065 				if (count.lost) {
2066 					memset(&lost, 0, sizeof(lost));
2067 					lost.lost.header.type = PERF_RECORD_LOST_SAMPLES;
2068 					__record__save_lost_samples(rec, evsel, &lost.lost,
2069 								    x, y, count.lost, 0);
2070 				}
2071 			}
2072 		}
2073 
2074 		lost_count = perf_bpf_filter__lost_count(evsel);
2075 		if (lost_count) {
2076 			memset(&lost, 0, sizeof(lost));
2077 			lost.lost.header.type = PERF_RECORD_LOST_SAMPLES;
2078 			__record__save_lost_samples(rec, evsel, &lost.lost, 0, 0, lost_count,
2079 						    PERF_RECORD_MISC_LOST_SAMPLES_BPF);
2080 		}
2081 	}
2082 }
2083 
2084 static volatile sig_atomic_t workload_exec_errno;
2085 
2086 /*
2087  * evlist__prepare_workload will send a SIGUSR1
2088  * if the fork fails, since we asked by setting its
2089  * want_signal to true.
2090  */
2091 static void workload_exec_failed_signal(int signo __maybe_unused,
2092 					siginfo_t *info,
2093 					void *ucontext __maybe_unused)
2094 {
2095 	workload_exec_errno = info->si_value.sival_int;
2096 	done = 1;
2097 	child_finished = 1;
2098 }
2099 
2100 static void snapshot_sig_handler(int sig);
2101 static void alarm_sig_handler(int sig);
2102 
2103 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist)
2104 {
2105 	if (evlist) {
2106 		if (evlist->mmap && evlist->mmap[0].core.base)
2107 			return evlist->mmap[0].core.base;
2108 		if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
2109 			return evlist->overwrite_mmap[0].core.base;
2110 	}
2111 	return NULL;
2112 }
2113 
2114 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
2115 {
2116 	const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist);
2117 	if (pc)
2118 		return pc;
2119 	return NULL;
2120 }
2121 
2122 static int record__synthesize(struct record *rec, bool tail)
2123 {
2124 	struct perf_session *session = rec->session;
2125 	struct machine *machine = &session->machines.host;
2126 	struct perf_data *data = &rec->data;
2127 	struct record_opts *opts = &rec->opts;
2128 	struct perf_tool *tool = &rec->tool;
2129 	int err = 0;
2130 	event_op f = process_synthesized_event;
2131 
2132 	if (rec->opts.tail_synthesize != tail)
2133 		return 0;
2134 
2135 	if (data->is_pipe) {
2136 		err = perf_event__synthesize_for_pipe(tool, session, data,
2137 						      process_synthesized_event);
2138 		if (err < 0)
2139 			goto out;
2140 
2141 		rec->bytes_written += err;
2142 	}
2143 
2144 	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
2145 					  process_synthesized_event, machine);
2146 	if (err)
2147 		goto out;
2148 
2149 	/* Synthesize id_index before auxtrace_info */
2150 	err = perf_event__synthesize_id_index(tool,
2151 					      process_synthesized_event,
2152 					      session->evlist, machine);
2153 	if (err)
2154 		goto out;
2155 
2156 	if (rec->opts.full_auxtrace) {
2157 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
2158 					session, process_synthesized_event);
2159 		if (err)
2160 			goto out;
2161 	}
2162 
2163 	if (!evlist__exclude_kernel(rec->evlist)) {
2164 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
2165 							 machine);
2166 		WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
2167 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
2168 				   "Check /proc/kallsyms permission or run as root.\n");
2169 
2170 		err = perf_event__synthesize_modules(tool, process_synthesized_event,
2171 						     machine);
2172 		WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
2173 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
2174 				   "Check /proc/modules permission or run as root.\n");
2175 	}
2176 
2177 	if (perf_guest) {
2178 		machines__process_guests(&session->machines,
2179 					 perf_event__synthesize_guest_os, tool);
2180 	}
2181 
2182 	err = perf_event__synthesize_extra_attr(&rec->tool,
2183 						rec->evlist,
2184 						process_synthesized_event,
2185 						data->is_pipe);
2186 	if (err)
2187 		goto out;
2188 
2189 	err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
2190 						 process_synthesized_event,
2191 						NULL);
2192 	if (err < 0) {
2193 		pr_err("Couldn't synthesize thread map.\n");
2194 		return err;
2195 	}
2196 
2197 	err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus,
2198 					     process_synthesized_event, NULL);
2199 	if (err < 0) {
2200 		pr_err("Couldn't synthesize cpu map.\n");
2201 		return err;
2202 	}
2203 
2204 	err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
2205 						machine, opts);
2206 	if (err < 0) {
2207 		pr_warning("Couldn't synthesize bpf events.\n");
2208 		err = 0;
2209 	}
2210 
2211 	if (rec->opts.synth & PERF_SYNTH_CGROUP) {
2212 		err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
2213 						     machine);
2214 		if (err < 0) {
2215 			pr_warning("Couldn't synthesize cgroup events.\n");
2216 			err = 0;
2217 		}
2218 	}
2219 
2220 	if (rec->opts.nr_threads_synthesize > 1) {
2221 		mutex_init(&synth_lock);
2222 		perf_set_multithreaded();
2223 		f = process_locked_synthesized_event;
2224 	}
2225 
2226 	if (rec->opts.synth & PERF_SYNTH_TASK) {
2227 		bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
2228 
2229 		err = __machine__synthesize_threads(machine, tool, &opts->target,
2230 						    rec->evlist->core.threads,
2231 						    f, needs_mmap, opts->sample_address,
2232 						    rec->opts.nr_threads_synthesize);
2233 	}
2234 
2235 	if (rec->opts.nr_threads_synthesize > 1) {
2236 		perf_set_singlethreaded();
2237 		mutex_destroy(&synth_lock);
2238 	}
2239 
2240 out:
2241 	return err;
2242 }
2243 
2244 static void record__synthesize_final_bpf_metadata(struct record *rec __maybe_unused)
2245 {
2246 #ifdef HAVE_LIBBPF_SUPPORT
2247 	perf_event__synthesize_final_bpf_metadata(rec->session,
2248 						  process_synthesized_event);
2249 #endif
2250 }
2251 
2252 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
2253 {
2254 	struct record *rec = data;
2255 	pthread_kill(rec->thread_id, SIGUSR2);
2256 	return 0;
2257 }
2258 
2259 static int record__setup_sb_evlist(struct record *rec)
2260 {
2261 	struct record_opts *opts = &rec->opts;
2262 
2263 	if (rec->sb_evlist != NULL) {
2264 		/*
2265 		 * We get here if --switch-output-event populated the
2266 		 * sb_evlist, so associate a callback that will send a SIGUSR2
2267 		 * to the main thread.
2268 		 */
2269 		evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
2270 		rec->thread_id = pthread_self();
2271 	}
2272 #ifdef HAVE_LIBBPF_SUPPORT
2273 	if (!opts->no_bpf_event) {
2274 		if (rec->sb_evlist == NULL) {
2275 			rec->sb_evlist = evlist__new();
2276 
2277 			if (rec->sb_evlist == NULL) {
2278 				pr_err("Couldn't create side band evlist.\n.");
2279 				return -1;
2280 			}
2281 		}
2282 
2283 		if (evlist__add_bpf_sb_event(rec->sb_evlist, perf_session__env(rec->session))) {
2284 			pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
2285 			return -1;
2286 		}
2287 	}
2288 #endif
2289 	if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
2290 		pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
2291 		opts->no_bpf_event = true;
2292 	}
2293 
2294 	return 0;
2295 }
2296 
2297 static int record__init_clock(struct record *rec)
2298 {
2299 	struct perf_session *session = rec->session;
2300 	struct timespec ref_clockid;
2301 	struct timeval ref_tod;
2302 	struct perf_env *env = perf_session__env(session);
2303 	u64 ref;
2304 
2305 	if (!rec->opts.use_clockid)
2306 		return 0;
2307 
2308 	if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
2309 		env->clock.clockid_res_ns = rec->opts.clockid_res_ns;
2310 
2311 	env->clock.clockid = rec->opts.clockid;
2312 
2313 	if (gettimeofday(&ref_tod, NULL) != 0) {
2314 		pr_err("gettimeofday failed, cannot set reference time.\n");
2315 		return -1;
2316 	}
2317 
2318 	if (clock_gettime(rec->opts.clockid, &ref_clockid)) {
2319 		pr_err("clock_gettime failed, cannot set reference time.\n");
2320 		return -1;
2321 	}
2322 
2323 	ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC +
2324 	      (u64) ref_tod.tv_usec * NSEC_PER_USEC;
2325 
2326 	env->clock.tod_ns = ref;
2327 
2328 	ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC +
2329 	      (u64) ref_clockid.tv_nsec;
2330 
2331 	env->clock.clockid_ns = ref;
2332 	return 0;
2333 }
2334 
2335 static void hit_auxtrace_snapshot_trigger(struct record *rec)
2336 {
2337 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2338 		trigger_hit(&auxtrace_snapshot_trigger);
2339 		auxtrace_record__snapshot_started = 1;
2340 		if (auxtrace_record__snapshot_start(rec->itr))
2341 			trigger_error(&auxtrace_snapshot_trigger);
2342 	}
2343 }
2344 
2345 static int record__terminate_thread(struct record_thread *thread_data)
2346 {
2347 	int err;
2348 	enum thread_msg ack = THREAD_MSG__UNDEFINED;
2349 	pid_t tid = thread_data->tid;
2350 
2351 	close(thread_data->pipes.msg[1]);
2352 	thread_data->pipes.msg[1] = -1;
2353 	err = read(thread_data->pipes.ack[0], &ack, sizeof(ack));
2354 	if (err > 0)
2355 		pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]);
2356 	else
2357 		pr_warning("threads[%d]: failed to receive termination notification from %d\n",
2358 			   thread->tid, tid);
2359 
2360 	return 0;
2361 }
2362 
2363 static int record__start_threads(struct record *rec)
2364 {
2365 	int t, tt, err, ret = 0, nr_threads = rec->nr_threads;
2366 	struct record_thread *thread_data = rec->thread_data;
2367 	sigset_t full, mask;
2368 	pthread_t handle;
2369 	pthread_attr_t attrs;
2370 
2371 	thread = &thread_data[0];
2372 
2373 	if (!record__threads_enabled(rec))
2374 		return 0;
2375 
2376 	sigfillset(&full);
2377 	if (sigprocmask(SIG_SETMASK, &full, &mask)) {
2378 		pr_err("Failed to block signals on threads start: %s\n", strerror(errno));
2379 		return -1;
2380 	}
2381 
2382 	pthread_attr_init(&attrs);
2383 	pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
2384 
2385 	for (t = 1; t < nr_threads; t++) {
2386 		enum thread_msg msg = THREAD_MSG__UNDEFINED;
2387 
2388 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
2389 		pthread_attr_setaffinity_np(&attrs,
2390 					    MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)),
2391 					    (cpu_set_t *)(thread_data[t].mask->affinity.bits));
2392 #endif
2393 		if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) {
2394 			for (tt = 1; tt < t; tt++)
2395 				record__terminate_thread(&thread_data[t]);
2396 			pr_err("Failed to start threads: %s\n", strerror(errno));
2397 			ret = -1;
2398 			goto out_err;
2399 		}
2400 
2401 		err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg));
2402 		if (err > 0)
2403 			pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid,
2404 				  thread_msg_tags[msg]);
2405 		else
2406 			pr_warning("threads[%d]: failed to receive start notification from %d\n",
2407 				   thread->tid, rec->thread_data[t].tid);
2408 	}
2409 
2410 	sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
2411 			(cpu_set_t *)thread->mask->affinity.bits);
2412 
2413 	pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
2414 
2415 out_err:
2416 	pthread_attr_destroy(&attrs);
2417 
2418 	if (sigprocmask(SIG_SETMASK, &mask, NULL)) {
2419 		pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno));
2420 		ret = -1;
2421 	}
2422 
2423 	return ret;
2424 }
2425 
2426 static int record__stop_threads(struct record *rec)
2427 {
2428 	int t;
2429 	struct record_thread *thread_data = rec->thread_data;
2430 
2431 	for (t = 1; t < rec->nr_threads; t++)
2432 		record__terminate_thread(&thread_data[t]);
2433 
2434 	for (t = 0; t < rec->nr_threads; t++) {
2435 		rec->samples += thread_data[t].samples;
2436 		if (!record__threads_enabled(rec))
2437 			continue;
2438 		rec->session->bytes_transferred += thread_data[t].bytes_transferred;
2439 		rec->session->bytes_compressed += thread_data[t].bytes_compressed;
2440 		pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid,
2441 			 thread_data[t].samples, thread_data[t].waking);
2442 		if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed)
2443 			pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n",
2444 				 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed);
2445 		else
2446 			pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written);
2447 	}
2448 
2449 	return 0;
2450 }
2451 
2452 static unsigned long record__waking(struct record *rec)
2453 {
2454 	int t;
2455 	unsigned long waking = 0;
2456 	struct record_thread *thread_data = rec->thread_data;
2457 
2458 	for (t = 0; t < rec->nr_threads; t++)
2459 		waking += thread_data[t].waking;
2460 
2461 	return waking;
2462 }
2463 
2464 static int __cmd_record(struct record *rec, int argc, const char **argv)
2465 {
2466 	int err;
2467 	int status = 0;
2468 	const bool forks = argc > 0;
2469 	struct perf_tool *tool = &rec->tool;
2470 	struct record_opts *opts = &rec->opts;
2471 	struct perf_data *data = &rec->data;
2472 	struct perf_session *session;
2473 	bool disabled = false, draining = false;
2474 	int fd;
2475 	float ratio = 0;
2476 	enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
2477 	struct perf_env *env;
2478 
2479 	atexit(record__sig_exit);
2480 	signal(SIGCHLD, sig_handler);
2481 	signal(SIGINT, sig_handler);
2482 	signal(SIGTERM, sig_handler);
2483 	signal(SIGSEGV, sigsegv_handler);
2484 
2485 	if (rec->opts.record_cgroup) {
2486 #ifndef HAVE_FILE_HANDLE
2487 		pr_err("cgroup tracking is not supported\n");
2488 		return -1;
2489 #endif
2490 	}
2491 
2492 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
2493 		signal(SIGUSR2, snapshot_sig_handler);
2494 		if (rec->opts.auxtrace_snapshot_mode)
2495 			trigger_on(&auxtrace_snapshot_trigger);
2496 		if (rec->switch_output.enabled)
2497 			trigger_on(&switch_output_trigger);
2498 	} else {
2499 		signal(SIGUSR2, SIG_IGN);
2500 	}
2501 
2502 	perf_tool__init(tool, /*ordered_events=*/true);
2503 	tool->sample		= process_sample_event;
2504 	tool->fork		= perf_event__process_fork;
2505 	tool->exit		= perf_event__process_exit;
2506 	tool->comm		= perf_event__process_comm;
2507 	tool->namespaces	= perf_event__process_namespaces;
2508 	tool->mmap		= build_id__process_mmap;
2509 	tool->mmap2		= build_id__process_mmap2;
2510 	tool->itrace_start	= process_timestamp_boundary;
2511 	tool->aux		= process_timestamp_boundary;
2512 	tool->namespace_events	= rec->opts.record_namespaces;
2513 	tool->cgroup_events	= rec->opts.record_cgroup;
2514 	session = perf_session__new(data, tool);
2515 	if (IS_ERR(session)) {
2516 		pr_err("Perf session creation failed.\n");
2517 		return PTR_ERR(session);
2518 	}
2519 	env = perf_session__env(session);
2520 	if (record__threads_enabled(rec)) {
2521 		if (perf_data__is_pipe(&rec->data)) {
2522 			pr_err("Parallel trace streaming is not available in pipe mode.\n");
2523 			return -1;
2524 		}
2525 		if (rec->opts.full_auxtrace) {
2526 			pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n");
2527 			return -1;
2528 		}
2529 	}
2530 
2531 	fd = perf_data__fd(data);
2532 	rec->session = session;
2533 
2534 	if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
2535 		pr_err("Compression initialization failed.\n");
2536 		return -1;
2537 	}
2538 #ifdef HAVE_EVENTFD_SUPPORT
2539 	done_fd = eventfd(0, EFD_NONBLOCK);
2540 	if (done_fd < 0) {
2541 		pr_err("Failed to create wakeup eventfd, error: %m\n");
2542 		status = -1;
2543 		goto out_delete_session;
2544 	}
2545 	err = evlist__add_wakeup_eventfd(rec->evlist, done_fd);
2546 	if (err < 0) {
2547 		pr_err("Failed to add wakeup eventfd to poll list\n");
2548 		status = err;
2549 		goto out_delete_session;
2550 	}
2551 #endif // HAVE_EVENTFD_SUPPORT
2552 
2553 	env->comp_type  = PERF_COMP_ZSTD;
2554 	env->comp_level = rec->opts.comp_level;
2555 
2556 	if (rec->opts.kcore &&
2557 	    !record__kcore_readable(&session->machines.host)) {
2558 		pr_err("ERROR: kcore is not readable.\n");
2559 		return -1;
2560 	}
2561 
2562 	if (record__init_clock(rec))
2563 		return -1;
2564 
2565 	record__init_features(rec);
2566 
2567 	if (forks) {
2568 		err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe,
2569 					       workload_exec_failed_signal);
2570 		if (err < 0) {
2571 			pr_err("Couldn't run the workload!\n");
2572 			status = err;
2573 			goto out_delete_session;
2574 		}
2575 	}
2576 
2577 	/*
2578 	 * If we have just single event and are sending data
2579 	 * through pipe, we need to force the ids allocation,
2580 	 * because we synthesize event name through the pipe
2581 	 * and need the id for that.
2582 	 */
2583 	if (data->is_pipe && rec->evlist->core.nr_entries == 1)
2584 		rec->opts.sample_id = true;
2585 
2586 	if (rec->timestamp_filename && perf_data__is_pipe(data)) {
2587 		rec->timestamp_filename = false;
2588 		pr_warning("WARNING: --timestamp-filename option is not available in pipe mode.\n");
2589 	}
2590 
2591 	/*
2592 	 * Use global stat_config that is zero meaning aggr_mode is AGGR_NONE
2593 	 * and hybrid_merge is false.
2594 	 */
2595 	evlist__uniquify_evsel_names(rec->evlist, &stat_config);
2596 
2597 	evlist__config(rec->evlist, opts, &callchain_param);
2598 
2599 	/* Debug message used by test scripts */
2600 	pr_debug3("perf record opening and mmapping events\n");
2601 	if (record__open(rec) != 0) {
2602 		err = -1;
2603 		goto out_free_threads;
2604 	}
2605 	/* Debug message used by test scripts */
2606 	pr_debug3("perf record done opening and mmapping events\n");
2607 	env->comp_mmap_len = session->evlist->core.mmap_len;
2608 
2609 	if (rec->opts.kcore) {
2610 		err = record__kcore_copy(&session->machines.host, data);
2611 		if (err) {
2612 			pr_err("ERROR: Failed to copy kcore\n");
2613 			goto out_free_threads;
2614 		}
2615 	}
2616 
2617 	/*
2618 	 * Normally perf_session__new would do this, but it doesn't have the
2619 	 * evlist.
2620 	 */
2621 	if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) {
2622 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
2623 		rec->tool.ordered_events = false;
2624 	}
2625 
2626 	if (evlist__nr_groups(rec->evlist) == 0)
2627 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
2628 
2629 	if (data->is_pipe) {
2630 		err = perf_header__write_pipe(fd);
2631 		if (err < 0)
2632 			goto out_free_threads;
2633 	} else {
2634 		err = perf_session__write_header(session, rec->evlist, fd, false);
2635 		if (err < 0)
2636 			goto out_free_threads;
2637 	}
2638 
2639 	err = -1;
2640 	if (!rec->no_buildid
2641 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
2642 		pr_err("Couldn't generate buildids. "
2643 		       "Use --no-buildid to profile anyway.\n");
2644 		goto out_free_threads;
2645 	}
2646 
2647 	if (!evlist__needs_bpf_sb_event(rec->evlist))
2648 		opts->no_bpf_event = true;
2649 
2650 	err = record__setup_sb_evlist(rec);
2651 	if (err)
2652 		goto out_free_threads;
2653 
2654 	err = record__synthesize(rec, false);
2655 	if (err < 0)
2656 		goto out_free_threads;
2657 
2658 	if (rec->realtime_prio) {
2659 		struct sched_param param;
2660 
2661 		param.sched_priority = rec->realtime_prio;
2662 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
2663 			pr_err("Could not set realtime priority.\n");
2664 			err = -1;
2665 			goto out_free_threads;
2666 		}
2667 	}
2668 
2669 	if (record__start_threads(rec))
2670 		goto out_free_threads;
2671 
2672 	/*
2673 	 * When perf is starting the traced process, all the events
2674 	 * (apart from group members) have enable_on_exec=1 set,
2675 	 * so don't spoil it by prematurely enabling them.
2676 	 */
2677 	if (!target__none(&opts->target) && !opts->target.initial_delay)
2678 		evlist__enable(rec->evlist);
2679 
2680 	/*
2681 	 * offcpu-time does not call execve, so enable_on_exe wouldn't work
2682 	 * when recording a workload, do it manually
2683 	 */
2684 	if (rec->off_cpu)
2685 		evlist__enable_evsel(rec->evlist, (char *)OFFCPU_EVENT);
2686 
2687 	/*
2688 	 * Let the child rip
2689 	 */
2690 	if (forks) {
2691 		struct machine *machine = &session->machines.host;
2692 		union perf_event *event;
2693 		pid_t tgid;
2694 
2695 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
2696 		if (event == NULL) {
2697 			err = -ENOMEM;
2698 			goto out_child;
2699 		}
2700 
2701 		/*
2702 		 * Some H/W events are generated before COMM event
2703 		 * which is emitted during exec(), so perf script
2704 		 * cannot see a correct process name for those events.
2705 		 * Synthesize COMM event to prevent it.
2706 		 */
2707 		tgid = perf_event__synthesize_comm(tool, event,
2708 						   rec->evlist->workload.pid,
2709 						   process_synthesized_event,
2710 						   machine);
2711 		free(event);
2712 
2713 		if (tgid == -1)
2714 			goto out_child;
2715 
2716 		event = malloc(sizeof(event->namespaces) +
2717 			       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
2718 			       machine->id_hdr_size);
2719 		if (event == NULL) {
2720 			err = -ENOMEM;
2721 			goto out_child;
2722 		}
2723 
2724 		/*
2725 		 * Synthesize NAMESPACES event for the command specified.
2726 		 */
2727 		perf_event__synthesize_namespaces(tool, event,
2728 						  rec->evlist->workload.pid,
2729 						  tgid, process_synthesized_event,
2730 						  machine);
2731 		free(event);
2732 
2733 		evlist__start_workload(rec->evlist);
2734 	}
2735 
2736 	if (opts->target.initial_delay) {
2737 		pr_info(EVLIST_DISABLED_MSG);
2738 		if (opts->target.initial_delay > 0) {
2739 			usleep(opts->target.initial_delay * USEC_PER_MSEC);
2740 			evlist__enable(rec->evlist);
2741 			pr_info(EVLIST_ENABLED_MSG);
2742 		}
2743 	}
2744 
2745 	err = event_enable_timer__start(rec->evlist->eet);
2746 	if (err)
2747 		goto out_child;
2748 
2749 	/* Debug message used by test scripts */
2750 	pr_debug3("perf record has started\n");
2751 	fflush(stderr);
2752 
2753 	trigger_ready(&auxtrace_snapshot_trigger);
2754 	trigger_ready(&switch_output_trigger);
2755 	perf_hooks__invoke_record_start();
2756 
2757 	/*
2758 	 * Must write FINISHED_INIT so it will be seen after all other
2759 	 * synthesized user events, but before any regular events.
2760 	 */
2761 	err = write_finished_init(rec, false);
2762 	if (err < 0)
2763 		goto out_child;
2764 
2765 	for (;;) {
2766 		unsigned long long hits = thread->samples;
2767 
2768 		/*
2769 		 * rec->evlist->bkw_mmap_state is possible to be
2770 		 * BKW_MMAP_EMPTY here: when done == true and
2771 		 * hits != rec->samples in previous round.
2772 		 *
2773 		 * evlist__toggle_bkw_mmap ensure we never
2774 		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
2775 		 */
2776 		if (trigger_is_hit(&switch_output_trigger) || done || draining)
2777 			evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
2778 
2779 		if (record__mmap_read_all(rec, false) < 0) {
2780 			trigger_error(&auxtrace_snapshot_trigger);
2781 			trigger_error(&switch_output_trigger);
2782 			err = -1;
2783 			goto out_child;
2784 		}
2785 
2786 		if (auxtrace_record__snapshot_started) {
2787 			auxtrace_record__snapshot_started = 0;
2788 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
2789 				record__read_auxtrace_snapshot(rec, false);
2790 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
2791 				pr_err("AUX area tracing snapshot failed\n");
2792 				err = -1;
2793 				goto out_child;
2794 			}
2795 		}
2796 
2797 		if (trigger_is_hit(&switch_output_trigger)) {
2798 			/*
2799 			 * If switch_output_trigger is hit, the data in
2800 			 * overwritable ring buffer should have been collected,
2801 			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
2802 			 *
2803 			 * If SIGUSR2 raise after or during record__mmap_read_all(),
2804 			 * record__mmap_read_all() didn't collect data from
2805 			 * overwritable ring buffer. Read again.
2806 			 */
2807 			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
2808 				continue;
2809 			trigger_ready(&switch_output_trigger);
2810 
2811 			/*
2812 			 * Reenable events in overwrite ring buffer after
2813 			 * record__mmap_read_all(): we should have collected
2814 			 * data from it.
2815 			 */
2816 			evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
2817 
2818 			if (!quiet)
2819 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
2820 					record__waking(rec));
2821 			thread->waking = 0;
2822 			fd = record__switch_output(rec, false);
2823 			if (fd < 0) {
2824 				pr_err("Failed to switch to new file\n");
2825 				trigger_error(&switch_output_trigger);
2826 				err = fd;
2827 				goto out_child;
2828 			}
2829 
2830 			/* re-arm the alarm */
2831 			if (rec->switch_output.time)
2832 				alarm(rec->switch_output.time);
2833 		}
2834 
2835 		if (hits == thread->samples) {
2836 			if (done || draining)
2837 				break;
2838 			err = fdarray__poll(&thread->pollfd, -1);
2839 			/*
2840 			 * Propagate error, only if there's any. Ignore positive
2841 			 * number of returned events and interrupt error.
2842 			 */
2843 			if (err > 0 || (err < 0 && errno == EINTR))
2844 				err = 0;
2845 			thread->waking++;
2846 
2847 			if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP,
2848 					    record__thread_munmap_filtered, NULL) == 0)
2849 				draining = true;
2850 
2851 			err = record__update_evlist_pollfd_from_thread(rec, rec->evlist, thread);
2852 			if (err)
2853 				goto out_child;
2854 		}
2855 
2856 		if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) {
2857 			switch (cmd) {
2858 			case EVLIST_CTL_CMD_SNAPSHOT:
2859 				hit_auxtrace_snapshot_trigger(rec);
2860 				evlist__ctlfd_ack(rec->evlist);
2861 				break;
2862 			case EVLIST_CTL_CMD_STOP:
2863 				done = 1;
2864 				break;
2865 			case EVLIST_CTL_CMD_ACK:
2866 			case EVLIST_CTL_CMD_UNSUPPORTED:
2867 			case EVLIST_CTL_CMD_ENABLE:
2868 			case EVLIST_CTL_CMD_DISABLE:
2869 			case EVLIST_CTL_CMD_EVLIST:
2870 			case EVLIST_CTL_CMD_PING:
2871 			default:
2872 				break;
2873 			}
2874 		}
2875 
2876 		err = event_enable_timer__process(rec->evlist->eet);
2877 		if (err < 0)
2878 			goto out_child;
2879 		if (err) {
2880 			err = 0;
2881 			done = 1;
2882 		}
2883 
2884 		/*
2885 		 * When perf is starting the traced process, at the end events
2886 		 * die with the process and we wait for that. Thus no need to
2887 		 * disable events in this case.
2888 		 */
2889 		if (done && !disabled && !target__none(&opts->target)) {
2890 			trigger_off(&auxtrace_snapshot_trigger);
2891 			evlist__disable(rec->evlist);
2892 			disabled = true;
2893 		}
2894 	}
2895 
2896 	trigger_off(&auxtrace_snapshot_trigger);
2897 	trigger_off(&switch_output_trigger);
2898 
2899 	record__synthesize_final_bpf_metadata(rec);
2900 
2901 	if (opts->auxtrace_snapshot_on_exit)
2902 		record__auxtrace_snapshot_exit(rec);
2903 
2904 	if (forks && workload_exec_errno) {
2905 		char msg[STRERR_BUFSIZE];
2906 		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
2907 		struct strbuf sb = STRBUF_INIT;
2908 
2909 		evlist__format_evsels(rec->evlist, &sb, 2048);
2910 
2911 		pr_err("Failed to collect '%s' for the '%s' workload: %s\n",
2912 			sb.buf, argv[0], emsg);
2913 		strbuf_release(&sb);
2914 		err = -1;
2915 		goto out_child;
2916 	}
2917 
2918 	if (!quiet)
2919 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n",
2920 			record__waking(rec));
2921 
2922 	write_finished_init(rec, true);
2923 
2924 	if (target__none(&rec->opts.target))
2925 		record__synthesize_workload(rec, true);
2926 
2927 out_child:
2928 	record__stop_threads(rec);
2929 	record__mmap_read_all(rec, true);
2930 out_free_threads:
2931 	record__free_thread_data(rec);
2932 	evlist__finalize_ctlfd(rec->evlist);
2933 	record__aio_mmap_read_sync(rec);
2934 
2935 	if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
2936 		ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
2937 		env->comp_ratio = ratio + 0.5;
2938 	}
2939 
2940 	if (forks) {
2941 		int exit_status;
2942 
2943 		if (!child_finished)
2944 			kill(rec->evlist->workload.pid, SIGTERM);
2945 
2946 		wait(&exit_status);
2947 
2948 		if (err < 0)
2949 			status = err;
2950 		else if (WIFEXITED(exit_status))
2951 			status = WEXITSTATUS(exit_status);
2952 		else if (WIFSIGNALED(exit_status))
2953 			signr = WTERMSIG(exit_status);
2954 	} else
2955 		status = err;
2956 
2957 	if (rec->off_cpu)
2958 		rec->bytes_written += off_cpu_write(rec->session);
2959 
2960 	record__read_lost_samples(rec);
2961 	record__synthesize(rec, true);
2962 	/* this will be recalculated during process_buildids() */
2963 	rec->samples = 0;
2964 
2965 	if (!err) {
2966 		if (!rec->timestamp_filename) {
2967 			record__finish_output(rec);
2968 		} else {
2969 			fd = record__switch_output(rec, true);
2970 			if (fd < 0) {
2971 				status = fd;
2972 				goto out_delete_session;
2973 			}
2974 		}
2975 	}
2976 
2977 	perf_hooks__invoke_record_end();
2978 
2979 	if (!err && !quiet) {
2980 		char samples[128];
2981 		const char *postfix = rec->timestamp_filename ?
2982 					".<timestamp>" : "";
2983 
2984 		if (rec->samples && !rec->opts.full_auxtrace)
2985 			scnprintf(samples, sizeof(samples),
2986 				  " (%" PRIu64 " samples)", rec->samples);
2987 		else
2988 			samples[0] = '\0';
2989 
2990 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s",
2991 			perf_data__size(data) / 1024.0 / 1024.0,
2992 			data->path, postfix, samples);
2993 		if (ratio) {
2994 			fprintf(stderr,	", compressed (original %.3f MB, ratio is %.3f)",
2995 					rec->session->bytes_transferred / 1024.0 / 1024.0,
2996 					ratio);
2997 		}
2998 		fprintf(stderr, " ]\n");
2999 	}
3000 
3001 out_delete_session:
3002 #ifdef HAVE_EVENTFD_SUPPORT
3003 	if (done_fd >= 0) {
3004 		fd = done_fd;
3005 		done_fd = -1;
3006 
3007 		close(fd);
3008 	}
3009 #endif
3010 	zstd_fini(&session->zstd_data);
3011 	if (!opts->no_bpf_event)
3012 		evlist__stop_sb_thread(rec->sb_evlist);
3013 
3014 	perf_session__delete(session);
3015 	return status;
3016 }
3017 
3018 static void callchain_debug(struct callchain_param *callchain)
3019 {
3020 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
3021 
3022 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
3023 
3024 	if (callchain->record_mode == CALLCHAIN_DWARF)
3025 		pr_debug("callchain: stack dump size %d\n",
3026 			 callchain->dump_size);
3027 }
3028 
3029 int record_opts__parse_callchain(struct record_opts *record,
3030 				 struct callchain_param *callchain,
3031 				 const char *arg, bool unset)
3032 {
3033 	int ret;
3034 	callchain->enabled = !unset;
3035 
3036 	/* --no-call-graph */
3037 	if (unset) {
3038 		callchain->record_mode = CALLCHAIN_NONE;
3039 		pr_debug("callchain: disabled\n");
3040 		return 0;
3041 	}
3042 
3043 	ret = parse_callchain_record_opt(arg, callchain);
3044 	if (!ret) {
3045 		/* Enable data address sampling for DWARF unwind. */
3046 		if (callchain->record_mode == CALLCHAIN_DWARF)
3047 			record->sample_address = true;
3048 		callchain_debug(callchain);
3049 	}
3050 
3051 	return ret;
3052 }
3053 
3054 int record_parse_callchain_opt(const struct option *opt,
3055 			       const char *arg,
3056 			       int unset)
3057 {
3058 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
3059 }
3060 
3061 int record_callchain_opt(const struct option *opt,
3062 			 const char *arg __maybe_unused,
3063 			 int unset __maybe_unused)
3064 {
3065 	struct callchain_param *callchain = opt->value;
3066 
3067 	callchain->enabled = true;
3068 
3069 	if (callchain->record_mode == CALLCHAIN_NONE)
3070 		callchain->record_mode = CALLCHAIN_FP;
3071 
3072 	callchain_debug(callchain);
3073 	return 0;
3074 }
3075 
3076 static int perf_record_config(const char *var, const char *value, void *cb)
3077 {
3078 	struct record *rec = cb;
3079 
3080 	if (!strcmp(var, "record.build-id")) {
3081 		if (!strcmp(value, "cache"))
3082 			rec->no_buildid_cache = false;
3083 		else if (!strcmp(value, "no-cache"))
3084 			rec->no_buildid_cache = true;
3085 		else if (!strcmp(value, "skip"))
3086 			rec->no_buildid = true;
3087 		else if (!strcmp(value, "mmap"))
3088 			rec->buildid_mmap = true;
3089 		else if (!strcmp(value, "no-mmap"))
3090 			rec->buildid_mmap = false;
3091 		else
3092 			return -1;
3093 		return 0;
3094 	}
3095 	if (!strcmp(var, "record.call-graph")) {
3096 		var = "call-graph.record-mode";
3097 		return perf_default_config(var, value, cb);
3098 	}
3099 #ifdef HAVE_AIO_SUPPORT
3100 	if (!strcmp(var, "record.aio")) {
3101 		rec->opts.nr_cblocks = strtol(value, NULL, 0);
3102 		if (!rec->opts.nr_cblocks)
3103 			rec->opts.nr_cblocks = nr_cblocks_default;
3104 	}
3105 #endif
3106 	if (!strcmp(var, "record.debuginfod")) {
3107 		rec->debuginfod.urls = strdup(value);
3108 		if (!rec->debuginfod.urls)
3109 			return -ENOMEM;
3110 		rec->debuginfod.set = true;
3111 	}
3112 
3113 	return 0;
3114 }
3115 
3116 static int record__parse_event_enable_time(const struct option *opt, const char *str, int unset)
3117 {
3118 	struct record *rec = (struct record *)opt->value;
3119 
3120 	return evlist__parse_event_enable_time(rec->evlist, &rec->opts, str, unset);
3121 }
3122 
3123 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
3124 {
3125 	struct record_opts *opts = (struct record_opts *)opt->value;
3126 
3127 	if (unset || !str)
3128 		return 0;
3129 
3130 	if (!strcasecmp(str, "node"))
3131 		opts->affinity = PERF_AFFINITY_NODE;
3132 	else if (!strcasecmp(str, "cpu"))
3133 		opts->affinity = PERF_AFFINITY_CPU;
3134 
3135 	return 0;
3136 }
3137 
3138 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits)
3139 {
3140 	mask->nbits = nr_bits;
3141 	mask->bits = bitmap_zalloc(mask->nbits);
3142 	if (!mask->bits)
3143 		return -ENOMEM;
3144 
3145 	return 0;
3146 }
3147 
3148 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask)
3149 {
3150 	bitmap_free(mask->bits);
3151 	mask->nbits = 0;
3152 }
3153 
3154 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits)
3155 {
3156 	int ret;
3157 
3158 	ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits);
3159 	if (ret) {
3160 		mask->affinity.bits = NULL;
3161 		return ret;
3162 	}
3163 
3164 	ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits);
3165 	if (ret) {
3166 		record__mmap_cpu_mask_free(&mask->maps);
3167 		mask->maps.bits = NULL;
3168 	}
3169 
3170 	return ret;
3171 }
3172 
3173 static void record__thread_mask_free(struct thread_mask *mask)
3174 {
3175 	record__mmap_cpu_mask_free(&mask->maps);
3176 	record__mmap_cpu_mask_free(&mask->affinity);
3177 }
3178 
3179 static int record__parse_threads(const struct option *opt, const char *str, int unset)
3180 {
3181 	int s;
3182 	struct record_opts *opts = opt->value;
3183 
3184 	if (unset || !str || !strlen(str)) {
3185 		opts->threads_spec = THREAD_SPEC__CPU;
3186 	} else {
3187 		for (s = 1; s < THREAD_SPEC__MAX; s++) {
3188 			if (s == THREAD_SPEC__USER) {
3189 				opts->threads_user_spec = strdup(str);
3190 				if (!opts->threads_user_spec)
3191 					return -ENOMEM;
3192 				opts->threads_spec = THREAD_SPEC__USER;
3193 				break;
3194 			}
3195 			if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) {
3196 				opts->threads_spec = s;
3197 				break;
3198 			}
3199 		}
3200 	}
3201 
3202 	if (opts->threads_spec == THREAD_SPEC__USER)
3203 		pr_debug("threads_spec: %s\n", opts->threads_user_spec);
3204 	else
3205 		pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]);
3206 
3207 	return 0;
3208 }
3209 
3210 static int parse_output_max_size(const struct option *opt,
3211 				 const char *str, int unset)
3212 {
3213 	unsigned long *s = (unsigned long *)opt->value;
3214 	static struct parse_tag tags_size[] = {
3215 		{ .tag  = 'B', .mult = 1       },
3216 		{ .tag  = 'K', .mult = 1 << 10 },
3217 		{ .tag  = 'M', .mult = 1 << 20 },
3218 		{ .tag  = 'G', .mult = 1 << 30 },
3219 		{ .tag  = 0 },
3220 	};
3221 	unsigned long val;
3222 
3223 	if (unset) {
3224 		*s = 0;
3225 		return 0;
3226 	}
3227 
3228 	val = parse_tag_value(str, tags_size);
3229 	if (val != (unsigned long) -1) {
3230 		*s = val;
3231 		return 0;
3232 	}
3233 
3234 	return -1;
3235 }
3236 
3237 static int record__parse_mmap_pages(const struct option *opt,
3238 				    const char *str,
3239 				    int unset __maybe_unused)
3240 {
3241 	struct record_opts *opts = opt->value;
3242 	char *s, *p;
3243 	unsigned int mmap_pages;
3244 	int ret;
3245 
3246 	if (!str)
3247 		return -EINVAL;
3248 
3249 	s = strdup(str);
3250 	if (!s)
3251 		return -ENOMEM;
3252 
3253 	p = strchr(s, ',');
3254 	if (p)
3255 		*p = '\0';
3256 
3257 	if (*s) {
3258 		ret = __evlist__parse_mmap_pages(&mmap_pages, s);
3259 		if (ret)
3260 			goto out_free;
3261 		opts->mmap_pages = mmap_pages;
3262 	}
3263 
3264 	if (!p) {
3265 		ret = 0;
3266 		goto out_free;
3267 	}
3268 
3269 	ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1);
3270 	if (ret)
3271 		goto out_free;
3272 
3273 	opts->auxtrace_mmap_pages = mmap_pages;
3274 
3275 out_free:
3276 	free(s);
3277 	return ret;
3278 }
3279 
3280 static int record__parse_off_cpu_thresh(const struct option *opt,
3281 					const char *str,
3282 					int unset __maybe_unused)
3283 {
3284 	struct record_opts *opts = opt->value;
3285 	char *endptr;
3286 	u64 off_cpu_thresh_ms;
3287 
3288 	if (!str)
3289 		return -EINVAL;
3290 
3291 	off_cpu_thresh_ms = strtoull(str, &endptr, 10);
3292 
3293 	/* the threshold isn't string "0", yet strtoull() returns 0, parsing failed */
3294 	if (*endptr || (off_cpu_thresh_ms == 0 && strcmp(str, "0")))
3295 		return -EINVAL;
3296 	else
3297 		opts->off_cpu_thresh_ns = off_cpu_thresh_ms * NSEC_PER_MSEC;
3298 
3299 	return 0;
3300 }
3301 
3302 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
3303 {
3304 }
3305 
3306 static int parse_control_option(const struct option *opt,
3307 				const char *str,
3308 				int unset __maybe_unused)
3309 {
3310 	struct record_opts *opts = opt->value;
3311 
3312 	return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close);
3313 }
3314 
3315 static void switch_output_size_warn(struct record *rec)
3316 {
3317 	u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
3318 	struct switch_output *s = &rec->switch_output;
3319 
3320 	wakeup_size /= 2;
3321 
3322 	if (s->size < wakeup_size) {
3323 		char buf[100];
3324 
3325 		unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
3326 		pr_warning("WARNING: switch-output data size lower than "
3327 			   "wakeup kernel buffer size (%s) "
3328 			   "expect bigger perf.data sizes\n", buf);
3329 	}
3330 }
3331 
3332 static int switch_output_setup(struct record *rec)
3333 {
3334 	struct switch_output *s = &rec->switch_output;
3335 	static struct parse_tag tags_size[] = {
3336 		{ .tag  = 'B', .mult = 1       },
3337 		{ .tag  = 'K', .mult = 1 << 10 },
3338 		{ .tag  = 'M', .mult = 1 << 20 },
3339 		{ .tag  = 'G', .mult = 1 << 30 },
3340 		{ .tag  = 0 },
3341 	};
3342 	static struct parse_tag tags_time[] = {
3343 		{ .tag  = 's', .mult = 1        },
3344 		{ .tag  = 'm', .mult = 60       },
3345 		{ .tag  = 'h', .mult = 60*60    },
3346 		{ .tag  = 'd', .mult = 60*60*24 },
3347 		{ .tag  = 0 },
3348 	};
3349 	unsigned long val;
3350 
3351 	/*
3352 	 * If we're using --switch-output-events, then we imply its
3353 	 * --switch-output=signal, as we'll send a SIGUSR2 from the side band
3354 	 *  thread to its parent.
3355 	 */
3356 	if (rec->switch_output_event_set) {
3357 		if (record__threads_enabled(rec)) {
3358 			pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n");
3359 			return 0;
3360 		}
3361 		goto do_signal;
3362 	}
3363 
3364 	if (!s->set)
3365 		return 0;
3366 
3367 	if (record__threads_enabled(rec)) {
3368 		pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n");
3369 		return 0;
3370 	}
3371 
3372 	if (!strcmp(s->str, "signal")) {
3373 do_signal:
3374 		s->signal = true;
3375 		pr_debug("switch-output with SIGUSR2 signal\n");
3376 		goto enabled;
3377 	}
3378 
3379 	val = parse_tag_value(s->str, tags_size);
3380 	if (val != (unsigned long) -1) {
3381 		s->size = val;
3382 		pr_debug("switch-output with %s size threshold\n", s->str);
3383 		goto enabled;
3384 	}
3385 
3386 	val = parse_tag_value(s->str, tags_time);
3387 	if (val != (unsigned long) -1) {
3388 		s->time = val;
3389 		pr_debug("switch-output with %s time threshold (%lu seconds)\n",
3390 			 s->str, s->time);
3391 		goto enabled;
3392 	}
3393 
3394 	return -1;
3395 
3396 enabled:
3397 	rec->timestamp_filename = true;
3398 	s->enabled              = true;
3399 
3400 	if (s->size && !rec->opts.no_buffering)
3401 		switch_output_size_warn(rec);
3402 
3403 	return 0;
3404 }
3405 
3406 static const char * const __record_usage[] = {
3407 	"perf record [<options>] [<command>]",
3408 	"perf record [<options>] -- <command> [<options>]",
3409 	NULL
3410 };
3411 const char * const *record_usage = __record_usage;
3412 
3413 static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event,
3414 				  struct perf_sample *sample, struct machine *machine)
3415 {
3416 	/*
3417 	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3418 	 * no need to add them twice.
3419 	 */
3420 	if (!(event->header.misc & PERF_RECORD_MISC_USER))
3421 		return 0;
3422 	return perf_event__process_mmap(tool, event, sample, machine);
3423 }
3424 
3425 static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event,
3426 				   struct perf_sample *sample, struct machine *machine)
3427 {
3428 	/*
3429 	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3430 	 * no need to add them twice.
3431 	 */
3432 	if (!(event->header.misc & PERF_RECORD_MISC_USER))
3433 		return 0;
3434 
3435 	return perf_event__process_mmap2(tool, event, sample, machine);
3436 }
3437 
3438 static int process_timestamp_boundary(const struct perf_tool *tool,
3439 				      union perf_event *event __maybe_unused,
3440 				      struct perf_sample *sample,
3441 				      struct machine *machine __maybe_unused)
3442 {
3443 	struct record *rec = container_of(tool, struct record, tool);
3444 
3445 	set_timestamp_boundary(rec, sample->time);
3446 	return 0;
3447 }
3448 
3449 static int parse_record_synth_option(const struct option *opt,
3450 				     const char *str,
3451 				     int unset __maybe_unused)
3452 {
3453 	struct record_opts *opts = opt->value;
3454 	char *p = strdup(str);
3455 
3456 	if (p == NULL)
3457 		return -1;
3458 
3459 	opts->synth = parse_synth_opt(p);
3460 	free(p);
3461 
3462 	if (opts->synth < 0) {
3463 		pr_err("Invalid synth option: %s\n", str);
3464 		return -1;
3465 	}
3466 	return 0;
3467 }
3468 
3469 /*
3470  * XXX Ideally would be local to cmd_record() and passed to a record__new
3471  * because we need to have access to it in record__exit, that is called
3472  * after cmd_record() exits, but since record_options need to be accessible to
3473  * builtin-script, leave it here.
3474  *
3475  * At least we don't ouch it in all the other functions here directly.
3476  *
3477  * Just say no to tons of global variables, sigh.
3478  */
3479 static struct record record = {
3480 	.opts = {
3481 		.sample_time	     = true,
3482 		.mmap_pages	     = UINT_MAX,
3483 		.user_freq	     = UINT_MAX,
3484 		.user_interval	     = ULLONG_MAX,
3485 		.freq		     = 4000,
3486 		.target		     = {
3487 			.uses_mmap   = true,
3488 			.default_per_cpu = true,
3489 		},
3490 		.mmap_flush          = MMAP_FLUSH_DEFAULT,
3491 		.nr_threads_synthesize = 1,
3492 		.ctl_fd              = -1,
3493 		.ctl_fd_ack          = -1,
3494 		.synth               = PERF_SYNTH_ALL,
3495 		.off_cpu_thresh_ns   = OFFCPU_THRESH,
3496 	},
3497 	.buildid_mmap = true,
3498 };
3499 
3500 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
3501 	"\n\t\t\t\tDefault: fp";
3502 
3503 static bool dry_run;
3504 
3505 static struct parse_events_option_args parse_events_option_args = {
3506 	.evlistp = &record.evlist,
3507 };
3508 
3509 static struct parse_events_option_args switch_output_parse_events_option_args = {
3510 	.evlistp = &record.sb_evlist,
3511 };
3512 
3513 /*
3514  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
3515  * with it and switch to use the library functions in perf_evlist that came
3516  * from builtin-record.c, i.e. use record_opts,
3517  * evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
3518  * using pipes, etc.
3519  */
3520 static struct option __record_options[] = {
3521 	OPT_CALLBACK('e', "event", &parse_events_option_args, "event",
3522 		     "event selector. use 'perf list' to list available events",
3523 		     parse_events_option),
3524 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
3525 		     "event filter", parse_filter),
3526 	OPT_BOOLEAN(0, "latency", &record.latency,
3527 		    "Enable data collection for latency profiling.\n"
3528 		    "\t\t\t  Use perf report --latency for latency-centric profile."),
3529 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
3530 			   NULL, "don't record events from perf itself",
3531 			   exclude_perf),
3532 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
3533 		    "record events on existing process id"),
3534 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
3535 		    "record events on existing thread id"),
3536 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
3537 		    "collect data with this RT SCHED_FIFO priority"),
3538 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
3539 		    "collect data without buffering"),
3540 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
3541 		    "collect raw sample records from all opened counters"),
3542 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
3543 			    "system-wide collection from all CPUs"),
3544 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
3545 		    "list of cpus to monitor"),
3546 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
3547 	OPT_STRING('o', "output", &record.data.path, "file",
3548 		    "output file name"),
3549 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
3550 			&record.opts.no_inherit_set,
3551 			"child tasks do not inherit counters"),
3552 	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
3553 		    "synthesize non-sample events at the end of output"),
3554 	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
3555 	OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"),
3556 	OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
3557 		    "Fail if the specified frequency can't be used"),
3558 	OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
3559 		     "profile at this frequency",
3560 		      record__parse_freq),
3561 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
3562 		     "number of mmap data pages and AUX area tracing mmap pages",
3563 		     record__parse_mmap_pages),
3564 	OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
3565 		     "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
3566 		     record__mmap_flush_parse),
3567 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
3568 			   NULL, "enables call-graph recording" ,
3569 			   &record_callchain_opt),
3570 	OPT_CALLBACK(0, "call-graph", &record.opts,
3571 		     "record_mode[,record_size]", record_callchain_help,
3572 		     &record_parse_callchain_opt),
3573 	OPT_INCR('v', "verbose", &verbose,
3574 		    "be more verbose (show counter open errors, etc)"),
3575 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"),
3576 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
3577 		    "per thread counts"),
3578 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
3579 	OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
3580 		    "Record the sample physical addresses"),
3581 	OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size,
3582 		    "Record the sampled data address data page size"),
3583 	OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size,
3584 		    "Record the sampled code address (ip) page size"),
3585 	OPT_BOOLEAN(0, "sample-mem-info", &record.opts.sample_data_src,
3586 		    "Record the data source for memory operations"),
3587 	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
3588 	OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier,
3589 		    "Record the sample identifier"),
3590 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
3591 			&record.opts.sample_time_set,
3592 			"Record the sample timestamps"),
3593 	OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
3594 			"Record the sample period"),
3595 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
3596 		    "don't sample"),
3597 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
3598 			&record.no_buildid_cache_set,
3599 			"do not update the buildid cache"),
3600 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
3601 			&record.no_buildid_set,
3602 			"do not collect buildids in perf.data"),
3603 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
3604 		     "monitor event in cgroup name only",
3605 		     parse_cgroups),
3606 	OPT_CALLBACK('D', "delay", &record, "ms",
3607 		     "ms to wait before starting measurement after program start (-1: start with events disabled), "
3608 		     "or ranges of time to enable events e.g. '-D 10-20,30-40'",
3609 		     record__parse_event_enable_time),
3610 	OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
3611 	OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"),
3612 
3613 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
3614 		     "branch any", "sample any taken branches",
3615 		     parse_branch_stack),
3616 
3617 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
3618 		     "branch filter mask", "branch stack filter modes",
3619 		     parse_branch_stack),
3620 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
3621 		    "sample by weight (on special events only)"),
3622 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
3623 		    "sample transaction flags (special events only)"),
3624 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
3625 		    "use per-thread mmaps"),
3626 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
3627 		    "sample selected machine registers on interrupt,"
3628 		    " use '-I?' to list register names", parse_intr_regs),
3629 	OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
3630 		    "sample selected machine registers in user space,"
3631 		    " use '--user-regs=?' to list register names", parse_user_regs),
3632 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
3633 		    "Record running/enabled time of read (:S) events"),
3634 	OPT_CALLBACK('k', "clockid", &record.opts,
3635 	"clockid", "clockid to use for events, see clock_gettime()",
3636 	parse_clockid),
3637 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
3638 			  "opts", "AUX area tracing Snapshot Mode", ""),
3639 	OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
3640 			  "opts", "sample AUX area", ""),
3641 	OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
3642 			"per thread proc mmap processing timeout in ms"),
3643 	OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
3644 		    "Record namespaces events"),
3645 	OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
3646 		    "Record cgroup events"),
3647 	OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events,
3648 			&record.opts.record_switch_events_set,
3649 			"Record context switch events"),
3650 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
3651 			 "Configure all used events to run in kernel space.",
3652 			 PARSE_OPT_EXCLUSIVE),
3653 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
3654 			 "Configure all used events to run in user space.",
3655 			 PARSE_OPT_EXCLUSIVE),
3656 	OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
3657 		    "collect kernel callchains"),
3658 	OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
3659 		    "collect user callchains"),
3660 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
3661 		   "file", "vmlinux pathname"),
3662 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
3663 		    "Record build-id of all DSOs regardless of hits"),
3664 	OPT_BOOLEAN_SET(0, "buildid-mmap", &record.buildid_mmap, &record.buildid_mmap_set,
3665 			"Record build-id in mmap events and skip build-id processing."),
3666 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
3667 		    "append timestamp to output filename"),
3668 	OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
3669 		    "Record timestamp boundary (time of first/last samples)"),
3670 	OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
3671 			  &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
3672 			  "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
3673 			  "signal"),
3674 	OPT_CALLBACK_SET(0, "switch-output-event", &switch_output_parse_events_option_args,
3675 			 &record.switch_output_event_set, "switch output event",
3676 			 "switch output event selector. use 'perf list' to list available events",
3677 			 parse_events_option_new_evlist),
3678 	OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
3679 		   "Limit number of switch output generated files"),
3680 	OPT_BOOLEAN(0, "dry-run", &dry_run,
3681 		    "Parse options then exit"),
3682 #ifdef HAVE_AIO_SUPPORT
3683 	OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
3684 		     &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
3685 		     record__aio_parse),
3686 #endif
3687 	OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
3688 		     "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
3689 		     record__parse_affinity),
3690 #ifdef HAVE_ZSTD_SUPPORT
3691 	OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n",
3692 			    "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
3693 			    record__parse_comp_level),
3694 #endif
3695 	OPT_CALLBACK(0, "max-size", &record.output_max_size,
3696 		     "size", "Limit the maximum size of the output file", parse_output_max_size),
3697 	OPT_UINTEGER(0, "num-thread-synthesize",
3698 		     &record.opts.nr_threads_synthesize,
3699 		     "number of threads to run for event synthesis"),
3700 #ifdef HAVE_LIBPFM
3701 	OPT_CALLBACK(0, "pfm-events", &record.evlist, "event",
3702 		"libpfm4 event selector. use 'perf list' to list available events",
3703 		parse_libpfm_events_option),
3704 #endif
3705 	OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
3706 		     "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n"
3707 		     "\t\t\t  'snapshot': AUX area tracing snapshot).\n"
3708 		     "\t\t\t  Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
3709 		     "\t\t\t  Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
3710 		      parse_control_option),
3711 	OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup",
3712 		     "Fine-tune event synthesis: default=all", parse_record_synth_option),
3713 	OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls,
3714 			  &record.debuginfod.set, "debuginfod urls",
3715 			  "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
3716 			  "system"),
3717 	OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec",
3718 			    "write collected trace data into several data files using parallel threads",
3719 			    record__parse_threads),
3720 	OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"),
3721 	OPT_STRING(0, "setup-filter", &record.filter_action, "pin|unpin",
3722 		   "BPF filter action"),
3723 	OPT_CALLBACK(0, "off-cpu-thresh", &record.opts, "ms",
3724 		     "Dump off-cpu samples if off-cpu time exceeds this threshold (in milliseconds). (Default: 500ms)",
3725 		     record__parse_off_cpu_thresh),
3726 	OPT_END()
3727 };
3728 
3729 struct option *record_options = __record_options;
3730 
3731 static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus)
3732 {
3733 	struct perf_cpu cpu;
3734 	int idx;
3735 
3736 	if (cpu_map__is_dummy(cpus))
3737 		return 0;
3738 
3739 	perf_cpu_map__for_each_cpu_skip_any(cpu, idx, cpus) {
3740 		/* Return ENODEV is input cpu is greater than max cpu */
3741 		if ((unsigned long)cpu.cpu > mask->nbits)
3742 			return -ENODEV;
3743 		__set_bit(cpu.cpu, mask->bits);
3744 	}
3745 
3746 	return 0;
3747 }
3748 
3749 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec)
3750 {
3751 	struct perf_cpu_map *cpus;
3752 
3753 	cpus = perf_cpu_map__new(mask_spec);
3754 	if (!cpus)
3755 		return -ENOMEM;
3756 
3757 	bitmap_zero(mask->bits, mask->nbits);
3758 	if (record__mmap_cpu_mask_init(mask, cpus))
3759 		return -ENODEV;
3760 
3761 	perf_cpu_map__put(cpus);
3762 
3763 	return 0;
3764 }
3765 
3766 static void record__free_thread_masks(struct record *rec, int nr_threads)
3767 {
3768 	int t;
3769 
3770 	if (rec->thread_masks)
3771 		for (t = 0; t < nr_threads; t++)
3772 			record__thread_mask_free(&rec->thread_masks[t]);
3773 
3774 	zfree(&rec->thread_masks);
3775 }
3776 
3777 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits)
3778 {
3779 	int t, ret;
3780 
3781 	rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks)));
3782 	if (!rec->thread_masks) {
3783 		pr_err("Failed to allocate thread masks\n");
3784 		return -ENOMEM;
3785 	}
3786 
3787 	for (t = 0; t < nr_threads; t++) {
3788 		ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits);
3789 		if (ret) {
3790 			pr_err("Failed to allocate thread masks[%d]\n", t);
3791 			goto out_free;
3792 		}
3793 	}
3794 
3795 	return 0;
3796 
3797 out_free:
3798 	record__free_thread_masks(rec, nr_threads);
3799 
3800 	return ret;
3801 }
3802 
3803 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus)
3804 {
3805 	int t, ret, nr_cpus = perf_cpu_map__nr(cpus);
3806 
3807 	ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu);
3808 	if (ret)
3809 		return ret;
3810 
3811 	rec->nr_threads = nr_cpus;
3812 	pr_debug("nr_threads: %d\n", rec->nr_threads);
3813 
3814 	for (t = 0; t < rec->nr_threads; t++) {
3815 		__set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits);
3816 		__set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits);
3817 		if (verbose > 0) {
3818 			pr_debug("thread_masks[%d]: ", t);
3819 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3820 			pr_debug("thread_masks[%d]: ", t);
3821 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3822 		}
3823 	}
3824 
3825 	return 0;
3826 }
3827 
3828 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus,
3829 					  const char **maps_spec, const char **affinity_spec,
3830 					  u32 nr_spec)
3831 {
3832 	u32 s;
3833 	int ret = 0, t = 0;
3834 	struct mmap_cpu_mask cpus_mask;
3835 	struct thread_mask thread_mask, full_mask, *thread_masks;
3836 
3837 	ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu);
3838 	if (ret) {
3839 		pr_err("Failed to allocate CPUs mask\n");
3840 		return ret;
3841 	}
3842 
3843 	ret = record__mmap_cpu_mask_init(&cpus_mask, cpus);
3844 	if (ret) {
3845 		pr_err("Failed to init cpu mask\n");
3846 		goto out_free_cpu_mask;
3847 	}
3848 
3849 	ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu);
3850 	if (ret) {
3851 		pr_err("Failed to allocate full mask\n");
3852 		goto out_free_cpu_mask;
3853 	}
3854 
3855 	ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3856 	if (ret) {
3857 		pr_err("Failed to allocate thread mask\n");
3858 		goto out_free_full_and_cpu_masks;
3859 	}
3860 
3861 	for (s = 0; s < nr_spec; s++) {
3862 		ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]);
3863 		if (ret) {
3864 			pr_err("Failed to initialize maps thread mask\n");
3865 			goto out_free;
3866 		}
3867 		ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]);
3868 		if (ret) {
3869 			pr_err("Failed to initialize affinity thread mask\n");
3870 			goto out_free;
3871 		}
3872 
3873 		/* ignore invalid CPUs but do not allow empty masks */
3874 		if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits,
3875 				cpus_mask.bits, thread_mask.maps.nbits)) {
3876 			pr_err("Empty maps mask: %s\n", maps_spec[s]);
3877 			ret = -EINVAL;
3878 			goto out_free;
3879 		}
3880 		if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits,
3881 				cpus_mask.bits, thread_mask.affinity.nbits)) {
3882 			pr_err("Empty affinity mask: %s\n", affinity_spec[s]);
3883 			ret = -EINVAL;
3884 			goto out_free;
3885 		}
3886 
3887 		/* do not allow intersection with other masks (full_mask) */
3888 		if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits,
3889 				      thread_mask.maps.nbits)) {
3890 			pr_err("Intersecting maps mask: %s\n", maps_spec[s]);
3891 			ret = -EINVAL;
3892 			goto out_free;
3893 		}
3894 		if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits,
3895 				      thread_mask.affinity.nbits)) {
3896 			pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]);
3897 			ret = -EINVAL;
3898 			goto out_free;
3899 		}
3900 
3901 		bitmap_or(full_mask.maps.bits, full_mask.maps.bits,
3902 			  thread_mask.maps.bits, full_mask.maps.nbits);
3903 		bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits,
3904 			  thread_mask.affinity.bits, full_mask.maps.nbits);
3905 
3906 		thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask));
3907 		if (!thread_masks) {
3908 			pr_err("Failed to reallocate thread masks\n");
3909 			ret = -ENOMEM;
3910 			goto out_free;
3911 		}
3912 		rec->thread_masks = thread_masks;
3913 		rec->thread_masks[t] = thread_mask;
3914 		if (verbose > 0) {
3915 			pr_debug("thread_masks[%d]: ", t);
3916 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3917 			pr_debug("thread_masks[%d]: ", t);
3918 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3919 		}
3920 		t++;
3921 		ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3922 		if (ret) {
3923 			pr_err("Failed to allocate thread mask\n");
3924 			goto out_free_full_and_cpu_masks;
3925 		}
3926 	}
3927 	rec->nr_threads = t;
3928 	pr_debug("nr_threads: %d\n", rec->nr_threads);
3929 	if (!rec->nr_threads)
3930 		ret = -EINVAL;
3931 
3932 out_free:
3933 	record__thread_mask_free(&thread_mask);
3934 out_free_full_and_cpu_masks:
3935 	record__thread_mask_free(&full_mask);
3936 out_free_cpu_mask:
3937 	record__mmap_cpu_mask_free(&cpus_mask);
3938 
3939 	return ret;
3940 }
3941 
3942 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus)
3943 {
3944 	int ret;
3945 	struct cpu_topology *topo;
3946 
3947 	topo = cpu_topology__new();
3948 	if (!topo) {
3949 		pr_err("Failed to allocate CPU topology\n");
3950 		return -ENOMEM;
3951 	}
3952 
3953 	ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list,
3954 					     topo->core_cpus_list, topo->core_cpus_lists);
3955 	cpu_topology__delete(topo);
3956 
3957 	return ret;
3958 }
3959 
3960 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus)
3961 {
3962 	int ret;
3963 	struct cpu_topology *topo;
3964 
3965 	topo = cpu_topology__new();
3966 	if (!topo) {
3967 		pr_err("Failed to allocate CPU topology\n");
3968 		return -ENOMEM;
3969 	}
3970 
3971 	ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list,
3972 					     topo->package_cpus_list, topo->package_cpus_lists);
3973 	cpu_topology__delete(topo);
3974 
3975 	return ret;
3976 }
3977 
3978 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus)
3979 {
3980 	u32 s;
3981 	int ret;
3982 	const char **spec;
3983 	struct numa_topology *topo;
3984 
3985 	topo = numa_topology__new();
3986 	if (!topo) {
3987 		pr_err("Failed to allocate NUMA topology\n");
3988 		return -ENOMEM;
3989 	}
3990 
3991 	spec = zalloc(topo->nr * sizeof(char *));
3992 	if (!spec) {
3993 		pr_err("Failed to allocate NUMA spec\n");
3994 		ret = -ENOMEM;
3995 		goto out_delete_topo;
3996 	}
3997 	for (s = 0; s < topo->nr; s++)
3998 		spec[s] = topo->nodes[s].cpus;
3999 
4000 	ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr);
4001 
4002 	zfree(&spec);
4003 
4004 out_delete_topo:
4005 	numa_topology__delete(topo);
4006 
4007 	return ret;
4008 }
4009 
4010 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus)
4011 {
4012 	int t, ret;
4013 	u32 s, nr_spec = 0;
4014 	char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec;
4015 	char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL;
4016 
4017 	for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) {
4018 		spec = strtok_r(user_spec, ":", &spec_ptr);
4019 		if (spec == NULL)
4020 			break;
4021 		pr_debug2("threads_spec[%d]: %s\n", t, spec);
4022 		mask = strtok_r(spec, "/", &mask_ptr);
4023 		if (mask == NULL)
4024 			break;
4025 		pr_debug2("  maps mask: %s\n", mask);
4026 		tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *));
4027 		if (!tmp_spec) {
4028 			pr_err("Failed to reallocate maps spec\n");
4029 			ret = -ENOMEM;
4030 			goto out_free;
4031 		}
4032 		maps_spec = tmp_spec;
4033 		maps_spec[nr_spec] = dup_mask = strdup(mask);
4034 		if (!maps_spec[nr_spec]) {
4035 			pr_err("Failed to allocate maps spec[%d]\n", nr_spec);
4036 			ret = -ENOMEM;
4037 			goto out_free;
4038 		}
4039 		mask = strtok_r(NULL, "/", &mask_ptr);
4040 		if (mask == NULL) {
4041 			pr_err("Invalid thread maps or affinity specs\n");
4042 			ret = -EINVAL;
4043 			goto out_free;
4044 		}
4045 		pr_debug2("  affinity mask: %s\n", mask);
4046 		tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *));
4047 		if (!tmp_spec) {
4048 			pr_err("Failed to reallocate affinity spec\n");
4049 			ret = -ENOMEM;
4050 			goto out_free;
4051 		}
4052 		affinity_spec = tmp_spec;
4053 		affinity_spec[nr_spec] = strdup(mask);
4054 		if (!affinity_spec[nr_spec]) {
4055 			pr_err("Failed to allocate affinity spec[%d]\n", nr_spec);
4056 			ret = -ENOMEM;
4057 			goto out_free;
4058 		}
4059 		dup_mask = NULL;
4060 		nr_spec++;
4061 	}
4062 
4063 	ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec,
4064 					     (const char **)affinity_spec, nr_spec);
4065 
4066 out_free:
4067 	free(dup_mask);
4068 	for (s = 0; s < nr_spec; s++) {
4069 		if (maps_spec)
4070 			free(maps_spec[s]);
4071 		if (affinity_spec)
4072 			free(affinity_spec[s]);
4073 	}
4074 	free(affinity_spec);
4075 	free(maps_spec);
4076 
4077 	return ret;
4078 }
4079 
4080 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus)
4081 {
4082 	int ret;
4083 
4084 	ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu);
4085 	if (ret)
4086 		return ret;
4087 
4088 	if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus))
4089 		return -ENODEV;
4090 
4091 	rec->nr_threads = 1;
4092 
4093 	return 0;
4094 }
4095 
4096 static int record__init_thread_masks(struct record *rec)
4097 {
4098 	int ret = 0;
4099 	struct perf_cpu_map *cpus = rec->evlist->core.all_cpus;
4100 
4101 	if (!record__threads_enabled(rec))
4102 		return record__init_thread_default_masks(rec, cpus);
4103 
4104 	if (evlist__per_thread(rec->evlist)) {
4105 		pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n");
4106 		return -EINVAL;
4107 	}
4108 
4109 	switch (rec->opts.threads_spec) {
4110 	case THREAD_SPEC__CPU:
4111 		ret = record__init_thread_cpu_masks(rec, cpus);
4112 		break;
4113 	case THREAD_SPEC__CORE:
4114 		ret = record__init_thread_core_masks(rec, cpus);
4115 		break;
4116 	case THREAD_SPEC__PACKAGE:
4117 		ret = record__init_thread_package_masks(rec, cpus);
4118 		break;
4119 	case THREAD_SPEC__NUMA:
4120 		ret = record__init_thread_numa_masks(rec, cpus);
4121 		break;
4122 	case THREAD_SPEC__USER:
4123 		ret = record__init_thread_user_masks(rec, cpus);
4124 		break;
4125 	default:
4126 		break;
4127 	}
4128 
4129 	return ret;
4130 }
4131 
4132 int cmd_record(int argc, const char **argv)
4133 {
4134 	int err;
4135 	struct record *rec = &record;
4136 	char errbuf[BUFSIZ];
4137 
4138 	setlocale(LC_ALL, "");
4139 
4140 #ifndef HAVE_BPF_SKEL
4141 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c)
4142 	set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true);
4143 # undef set_nobuild
4144 #endif
4145 
4146 	/* Disable eager loading of kernel symbols that adds overhead to perf record. */
4147 	symbol_conf.lazy_load_kernel_maps = true;
4148 	rec->opts.affinity = PERF_AFFINITY_SYS;
4149 
4150 	rec->evlist = evlist__new();
4151 	if (rec->evlist == NULL)
4152 		return -ENOMEM;
4153 
4154 	err = perf_config(perf_record_config, rec);
4155 	if (err)
4156 		return err;
4157 
4158 	argc = parse_options(argc, argv, record_options, record_usage,
4159 			    PARSE_OPT_STOP_AT_NON_OPTION);
4160 	if (quiet)
4161 		perf_quiet_option();
4162 
4163 	err = symbol__validate_sym_arguments();
4164 	if (err)
4165 		return err;
4166 
4167 	perf_debuginfod_setup(&record.debuginfod);
4168 
4169 	/* Make system wide (-a) the default target. */
4170 	if (!argc && target__none(&rec->opts.target))
4171 		rec->opts.target.system_wide = true;
4172 
4173 	if (nr_cgroups && !rec->opts.target.system_wide) {
4174 		usage_with_options_msg(record_usage, record_options,
4175 			"cgroup monitoring only available in system-wide mode");
4176 
4177 	}
4178 
4179 	if (record.latency) {
4180 		/*
4181 		 * There is no fundamental reason why latency profiling
4182 		 * can't work for system-wide mode, but exact semantics
4183 		 * and details are to be defined.
4184 		 * See the following thread for details:
4185 		 * https://lore.kernel.org/all/Z4XDJyvjiie3howF@google.com/
4186 		 */
4187 		if (record.opts.target.system_wide) {
4188 			pr_err("Failed: latency profiling is not supported with system-wide collection.\n");
4189 			err = -EINVAL;
4190 			goto out_opts;
4191 		}
4192 		record.opts.record_switch_events = true;
4193 	}
4194 
4195 	if (!rec->buildid_mmap) {
4196 		pr_debug("Disabling build id in synthesized mmap2 events.\n");
4197 		symbol_conf.no_buildid_mmap2 = true;
4198 	} else if (rec->buildid_mmap_set) {
4199 		/*
4200 		 * Explicitly passing --buildid-mmap disables buildid processing
4201 		 * and cache generation.
4202 		 */
4203 		rec->no_buildid = true;
4204 	}
4205 	if (rec->buildid_mmap && !perf_can_record_build_id()) {
4206 		pr_warning("Missing support for build id in kernel mmap events.\n"
4207 			   "Disable this warning with --no-buildid-mmap\n");
4208 		rec->buildid_mmap = false;
4209 	}
4210 	if (rec->buildid_mmap) {
4211 		/* Enable perf_event_attr::build_id bit. */
4212 		rec->opts.build_id = true;
4213 	}
4214 
4215 	if (rec->opts.record_cgroup && !perf_can_record_cgroup()) {
4216 		pr_err("Kernel has no cgroup sampling support.\n");
4217 		err = -EINVAL;
4218 		goto out_opts;
4219 	}
4220 
4221 	if (rec->opts.kcore)
4222 		rec->opts.text_poke = true;
4223 
4224 	if (rec->opts.kcore || record__threads_enabled(rec))
4225 		rec->data.is_dir = true;
4226 
4227 	if (record__threads_enabled(rec)) {
4228 		if (rec->opts.affinity != PERF_AFFINITY_SYS) {
4229 			pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n");
4230 			goto out_opts;
4231 		}
4232 		if (record__aio_enabled(rec)) {
4233 			pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n");
4234 			goto out_opts;
4235 		}
4236 	}
4237 
4238 	if (rec->opts.comp_level != 0) {
4239 		pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
4240 		rec->no_buildid = true;
4241 	}
4242 
4243 	if (rec->opts.record_switch_events &&
4244 	    !perf_can_record_switch_events()) {
4245 		ui__error("kernel does not support recording context switch events\n");
4246 		parse_options_usage(record_usage, record_options, "switch-events", 0);
4247 		err = -EINVAL;
4248 		goto out_opts;
4249 	}
4250 
4251 	if (switch_output_setup(rec)) {
4252 		parse_options_usage(record_usage, record_options, "switch-output", 0);
4253 		err = -EINVAL;
4254 		goto out_opts;
4255 	}
4256 
4257 	if (rec->switch_output.time) {
4258 		signal(SIGALRM, alarm_sig_handler);
4259 		alarm(rec->switch_output.time);
4260 	}
4261 
4262 	if (rec->switch_output.num_files) {
4263 		rec->switch_output.filenames = calloc(rec->switch_output.num_files,
4264 						      sizeof(char *));
4265 		if (!rec->switch_output.filenames) {
4266 			err = -EINVAL;
4267 			goto out_opts;
4268 		}
4269 	}
4270 
4271 	if (rec->timestamp_filename && record__threads_enabled(rec)) {
4272 		rec->timestamp_filename = false;
4273 		pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n");
4274 	}
4275 
4276 	if (rec->filter_action) {
4277 		if (!strcmp(rec->filter_action, "pin"))
4278 			err = perf_bpf_filter__pin();
4279 		else if (!strcmp(rec->filter_action, "unpin"))
4280 			err = perf_bpf_filter__unpin();
4281 		else {
4282 			pr_warning("Unknown BPF filter action: %s\n", rec->filter_action);
4283 			err = -EINVAL;
4284 		}
4285 		goto out_opts;
4286 	}
4287 
4288 	/* For backward compatibility, -d implies --mem-info */
4289 	if (rec->opts.sample_address)
4290 		rec->opts.sample_data_src = true;
4291 
4292 	/*
4293 	 * Allow aliases to facilitate the lookup of symbols for address
4294 	 * filters. Refer to auxtrace_parse_filters().
4295 	 */
4296 	symbol_conf.allow_aliases = true;
4297 
4298 	symbol__init(NULL);
4299 
4300 	err = record__auxtrace_init(rec);
4301 	if (err)
4302 		goto out;
4303 
4304 	if (dry_run)
4305 		goto out;
4306 
4307 	err = -ENOMEM;
4308 
4309 	if (rec->no_buildid_cache || rec->no_buildid) {
4310 		disable_buildid_cache();
4311 	} else if (rec->switch_output.enabled) {
4312 		/*
4313 		 * In 'perf record --switch-output', disable buildid
4314 		 * generation by default to reduce data file switching
4315 		 * overhead. Still generate buildid if they are required
4316 		 * explicitly using
4317 		 *
4318 		 *  perf record --switch-output --no-no-buildid \
4319 		 *              --no-no-buildid-cache
4320 		 *
4321 		 * Following code equals to:
4322 		 *
4323 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
4324 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
4325 		 *         disable_buildid_cache();
4326 		 */
4327 		bool disable = true;
4328 
4329 		if (rec->no_buildid_set && !rec->no_buildid)
4330 			disable = false;
4331 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
4332 			disable = false;
4333 		if (disable) {
4334 			rec->no_buildid = true;
4335 			rec->no_buildid_cache = true;
4336 			disable_buildid_cache();
4337 		}
4338 	}
4339 
4340 	if (record.opts.overwrite)
4341 		record.opts.tail_synthesize = true;
4342 
4343 	if (rec->evlist->core.nr_entries == 0) {
4344 		struct evlist *def_evlist = evlist__new_default();
4345 
4346 		if (!def_evlist)
4347 			goto out;
4348 
4349 		evlist__splice_list_tail(rec->evlist, &def_evlist->core.entries);
4350 		evlist__delete(def_evlist);
4351 	}
4352 
4353 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
4354 		rec->opts.no_inherit = true;
4355 
4356 	err = target__validate(&rec->opts.target);
4357 	if (err) {
4358 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
4359 		ui__warning("%s\n", errbuf);
4360 	}
4361 
4362 	if (rec->uid_str) {
4363 		uid_t uid = parse_uid(rec->uid_str);
4364 
4365 		if (uid == UINT_MAX) {
4366 			ui__error("Invalid User: %s", rec->uid_str);
4367 			err = -EINVAL;
4368 			goto out;
4369 		}
4370 		err = parse_uid_filter(rec->evlist, uid);
4371 		if (err)
4372 			goto out;
4373 
4374 		/* User ID filtering implies system wide. */
4375 		rec->opts.target.system_wide = true;
4376 	}
4377 
4378 	/* Enable ignoring missing threads when -p option is defined. */
4379 	rec->opts.ignore_missing_thread = rec->opts.target.pid;
4380 
4381 	evlist__warn_user_requested_cpus(rec->evlist, rec->opts.target.cpu_list);
4382 
4383 	if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
4384 		arch__add_leaf_frame_record_opts(&rec->opts);
4385 
4386 	err = -ENOMEM;
4387 	if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) {
4388 		if (rec->opts.target.pid != NULL) {
4389 			pr_err("Couldn't create thread/CPU maps: %s\n",
4390 				errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf)));
4391 			goto out;
4392 		}
4393 		else
4394 			usage_with_options(record_usage, record_options);
4395 	}
4396 
4397 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
4398 	if (err)
4399 		goto out;
4400 
4401 	/*
4402 	 * We take all buildids when the file contains
4403 	 * AUX area tracing data because we do not decode the
4404 	 * trace because it would take too long.
4405 	 */
4406 	if (rec->opts.full_auxtrace)
4407 		rec->buildid_all = true;
4408 
4409 	if (rec->opts.text_poke) {
4410 		err = record__config_text_poke(rec->evlist);
4411 		if (err) {
4412 			pr_err("record__config_text_poke failed, error %d\n", err);
4413 			goto out;
4414 		}
4415 	}
4416 
4417 	if (rec->off_cpu) {
4418 		err = record__config_off_cpu(rec);
4419 		if (err) {
4420 			pr_err("record__config_off_cpu failed, error %d\n", err);
4421 			goto out;
4422 		}
4423 	}
4424 
4425 	if (record_opts__config(&rec->opts)) {
4426 		err = -EINVAL;
4427 		goto out;
4428 	}
4429 
4430 	err = record__config_tracking_events(rec);
4431 	if (err) {
4432 		pr_err("record__config_tracking_events failed, error %d\n", err);
4433 		goto out;
4434 	}
4435 
4436 	err = record__init_thread_masks(rec);
4437 	if (err) {
4438 		pr_err("Failed to initialize parallel data streaming masks\n");
4439 		goto out;
4440 	}
4441 
4442 	if (rec->opts.nr_cblocks > nr_cblocks_max)
4443 		rec->opts.nr_cblocks = nr_cblocks_max;
4444 	pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
4445 
4446 	pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
4447 	pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
4448 
4449 	if (rec->opts.comp_level > comp_level_max)
4450 		rec->opts.comp_level = comp_level_max;
4451 	pr_debug("comp level: %d\n", rec->opts.comp_level);
4452 
4453 	err = __cmd_record(&record, argc, argv);
4454 out:
4455 	record__free_thread_masks(rec, rec->nr_threads);
4456 	rec->nr_threads = 0;
4457 	symbol__exit();
4458 	auxtrace_record__free(rec->itr);
4459 out_opts:
4460 	evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close);
4461 	evlist__delete(rec->evlist);
4462 	return err;
4463 }
4464 
4465 static void snapshot_sig_handler(int sig __maybe_unused)
4466 {
4467 	struct record *rec = &record;
4468 
4469 	hit_auxtrace_snapshot_trigger(rec);
4470 
4471 	if (switch_output_signal(rec))
4472 		trigger_hit(&switch_output_trigger);
4473 }
4474 
4475 static void alarm_sig_handler(int sig __maybe_unused)
4476 {
4477 	struct record *rec = &record;
4478 
4479 	if (switch_output_time(rec))
4480 		trigger_hit(&switch_output_trigger);
4481 }
4482