xref: /linux/tools/perf/builtin-record.c (revision 2c240484cf52da3c2ca14cfb70e8cf9179197ced)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10 
11 #include "util/build-id.h"
12 #include <subcmd/parse-options.h>
13 #include <internal/xyarray.h>
14 #include "util/parse-events.h"
15 #include "util/config.h"
16 
17 #include "util/callchain.h"
18 #include "util/cgroup.h"
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/mmap.h"
25 #include "util/mutex.h"
26 #include "util/target.h"
27 #include "util/session.h"
28 #include "util/tool.h"
29 #include "util/stat.h"
30 #include "util/symbol.h"
31 #include "util/record.h"
32 #include "util/cpumap.h"
33 #include "util/thread_map.h"
34 #include "util/data.h"
35 #include "util/perf_regs.h"
36 #include "util/auxtrace.h"
37 #include "util/tsc.h"
38 #include "util/parse-branch-options.h"
39 #include "util/parse-regs-options.h"
40 #include "util/perf_api_probe.h"
41 #include "util/trigger.h"
42 #include "util/perf-hooks.h"
43 #include "util/cpu-set-sched.h"
44 #include "util/synthetic-events.h"
45 #include "util/time-utils.h"
46 #include "util/units.h"
47 #include "util/bpf-event.h"
48 #include "util/util.h"
49 #include "util/pfm.h"
50 #include "util/pmu.h"
51 #include "util/pmus.h"
52 #include "util/clockid.h"
53 #include "util/off_cpu.h"
54 #include "util/bpf-filter.h"
55 #include "util/strbuf.h"
56 #include "asm/bug.h"
57 #include "perf.h"
58 #include "cputopo.h"
59 
60 #include <errno.h>
61 #include <inttypes.h>
62 #include <locale.h>
63 #include <poll.h>
64 #include <pthread.h>
65 #include <unistd.h>
66 #ifndef HAVE_GETTID
67 #include <syscall.h>
68 #endif
69 #include <sched.h>
70 #include <signal.h>
71 #ifdef HAVE_EVENTFD_SUPPORT
72 #include <sys/eventfd.h>
73 #endif
74 #include <sys/mman.h>
75 #include <sys/wait.h>
76 #include <sys/types.h>
77 #include <sys/stat.h>
78 #include <fcntl.h>
79 #include <linux/err.h>
80 #include <linux/string.h>
81 #include <linux/time64.h>
82 #include <linux/zalloc.h>
83 #include <linux/bitmap.h>
84 #include <sys/time.h>
85 
86 struct switch_output {
87 	bool		 enabled;
88 	bool		 signal;
89 	unsigned long	 size;
90 	unsigned long	 time;
91 	const char	*str;
92 	bool		 set;
93 	char		 **filenames;
94 	int		 num_files;
95 	int		 cur_file;
96 };
97 
98 struct thread_mask {
99 	struct mmap_cpu_mask	maps;
100 	struct mmap_cpu_mask	affinity;
101 };
102 
103 struct record_thread {
104 	pid_t			tid;
105 	struct thread_mask	*mask;
106 	struct {
107 		int		msg[2];
108 		int		ack[2];
109 	} pipes;
110 	struct fdarray		pollfd;
111 	int			ctlfd_pos;
112 	int			nr_mmaps;
113 	struct mmap		**maps;
114 	struct mmap		**overwrite_maps;
115 	struct record		*rec;
116 	unsigned long long	samples;
117 	unsigned long		waking;
118 	u64			bytes_written;
119 	u64			bytes_transferred;
120 	u64			bytes_compressed;
121 };
122 
123 static __thread struct record_thread *thread;
124 
125 enum thread_msg {
126 	THREAD_MSG__UNDEFINED = 0,
127 	THREAD_MSG__READY,
128 	THREAD_MSG__MAX,
129 };
130 
131 static const char *thread_msg_tags[THREAD_MSG__MAX] = {
132 	"UNDEFINED", "READY"
133 };
134 
135 enum thread_spec {
136 	THREAD_SPEC__UNDEFINED = 0,
137 	THREAD_SPEC__CPU,
138 	THREAD_SPEC__CORE,
139 	THREAD_SPEC__PACKAGE,
140 	THREAD_SPEC__NUMA,
141 	THREAD_SPEC__USER,
142 	THREAD_SPEC__MAX,
143 };
144 
145 static const char *thread_spec_tags[THREAD_SPEC__MAX] = {
146 	"undefined", "cpu", "core", "package", "numa", "user"
147 };
148 
149 struct pollfd_index_map {
150 	int evlist_pollfd_index;
151 	int thread_pollfd_index;
152 };
153 
154 struct record {
155 	struct perf_tool	tool;
156 	struct record_opts	opts;
157 	u64			bytes_written;
158 	u64			thread_bytes_written;
159 	struct perf_data	data;
160 	struct auxtrace_record	*itr;
161 	struct evlist	*evlist;
162 	struct perf_session	*session;
163 	struct evlist		*sb_evlist;
164 	pthread_t		thread_id;
165 	int			realtime_prio;
166 	bool			latency;
167 	bool			switch_output_event_set;
168 	bool			no_buildid;
169 	bool			no_buildid_set;
170 	bool			no_buildid_cache;
171 	bool			no_buildid_cache_set;
172 	bool			buildid_all;
173 	bool			buildid_mmap;
174 	bool			buildid_mmap_set;
175 	bool			timestamp_filename;
176 	bool			timestamp_boundary;
177 	bool			off_cpu;
178 	const char		*filter_action;
179 	const char		*uid_str;
180 	struct switch_output	switch_output;
181 	unsigned long long	samples;
182 	unsigned long		output_max_size;	/* = 0: unlimited */
183 	struct perf_debuginfod	debuginfod;
184 	int			nr_threads;
185 	struct thread_mask	*thread_masks;
186 	struct record_thread	*thread_data;
187 	struct pollfd_index_map	*index_map;
188 	size_t			index_map_sz;
189 	size_t			index_map_cnt;
190 };
191 
192 static volatile int done;
193 
194 static volatile int auxtrace_record__snapshot_started;
195 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
196 static DEFINE_TRIGGER(switch_output_trigger);
197 
198 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
199 	"SYS", "NODE", "CPU"
200 };
201 
202 static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event,
203 				  struct perf_sample *sample, struct machine *machine);
204 static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event,
205 				   struct perf_sample *sample, struct machine *machine);
206 static int process_timestamp_boundary(const struct perf_tool *tool,
207 				      union perf_event *event,
208 				      struct perf_sample *sample,
209 				      struct machine *machine);
210 
211 #ifndef HAVE_GETTID
212 static inline pid_t gettid(void)
213 {
214 	return (pid_t)syscall(__NR_gettid);
215 }
216 #endif
217 
218 static int record__threads_enabled(struct record *rec)
219 {
220 	return rec->opts.threads_spec;
221 }
222 
223 static bool switch_output_signal(struct record *rec)
224 {
225 	return rec->switch_output.signal &&
226 	       trigger_is_ready(&switch_output_trigger);
227 }
228 
229 static bool switch_output_size(struct record *rec)
230 {
231 	return rec->switch_output.size &&
232 	       trigger_is_ready(&switch_output_trigger) &&
233 	       (rec->bytes_written >= rec->switch_output.size);
234 }
235 
236 static bool switch_output_time(struct record *rec)
237 {
238 	return rec->switch_output.time &&
239 	       trigger_is_ready(&switch_output_trigger);
240 }
241 
242 static u64 record__bytes_written(struct record *rec)
243 {
244 	return rec->bytes_written + rec->thread_bytes_written;
245 }
246 
247 static bool record__output_max_size_exceeded(struct record *rec)
248 {
249 	return rec->output_max_size &&
250 	       (record__bytes_written(rec) >= rec->output_max_size);
251 }
252 
253 static int record__write(struct record *rec, struct mmap *map __maybe_unused,
254 			 void *bf, size_t size)
255 {
256 	struct perf_data_file *file = &rec->session->data->file;
257 
258 	if (map && map->file)
259 		file = map->file;
260 
261 	if (perf_data_file__write(file, bf, size) < 0) {
262 		pr_err("failed to write perf data, error: %m\n");
263 		return -1;
264 	}
265 
266 	if (map && map->file) {
267 		thread->bytes_written += size;
268 		rec->thread_bytes_written += size;
269 	} else {
270 		rec->bytes_written += size;
271 	}
272 
273 	if (record__output_max_size_exceeded(rec) && !done) {
274 		fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
275 				" stopping session ]\n",
276 				record__bytes_written(rec) >> 10);
277 		done = 1;
278 	}
279 
280 	if (switch_output_size(rec))
281 		trigger_hit(&switch_output_trigger);
282 
283 	return 0;
284 }
285 
286 static int record__aio_enabled(struct record *rec);
287 static int record__comp_enabled(struct record *rec);
288 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map,
289 			    void *dst, size_t dst_size, void *src, size_t src_size);
290 
291 #ifdef HAVE_AIO_SUPPORT
292 static int record__aio_write(struct aiocb *cblock, int trace_fd,
293 		void *buf, size_t size, off_t off)
294 {
295 	int rc;
296 
297 	cblock->aio_fildes = trace_fd;
298 	cblock->aio_buf    = buf;
299 	cblock->aio_nbytes = size;
300 	cblock->aio_offset = off;
301 	cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
302 
303 	do {
304 		rc = aio_write(cblock);
305 		if (rc == 0) {
306 			break;
307 		} else if (errno != EAGAIN) {
308 			cblock->aio_fildes = -1;
309 			pr_err("failed to queue perf data, error: %m\n");
310 			break;
311 		}
312 	} while (1);
313 
314 	return rc;
315 }
316 
317 static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
318 {
319 	void *rem_buf;
320 	off_t rem_off;
321 	size_t rem_size;
322 	int rc, aio_errno;
323 	ssize_t aio_ret, written;
324 
325 	aio_errno = aio_error(cblock);
326 	if (aio_errno == EINPROGRESS)
327 		return 0;
328 
329 	written = aio_ret = aio_return(cblock);
330 	if (aio_ret < 0) {
331 		if (aio_errno != EINTR)
332 			pr_err("failed to write perf data, error: %m\n");
333 		written = 0;
334 	}
335 
336 	rem_size = cblock->aio_nbytes - written;
337 
338 	if (rem_size == 0) {
339 		cblock->aio_fildes = -1;
340 		/*
341 		 * md->refcount is incremented in record__aio_pushfn() for
342 		 * every aio write request started in record__aio_push() so
343 		 * decrement it because the request is now complete.
344 		 */
345 		perf_mmap__put(&md->core);
346 		rc = 1;
347 	} else {
348 		/*
349 		 * aio write request may require restart with the
350 		 * remainder if the kernel didn't write whole
351 		 * chunk at once.
352 		 */
353 		rem_off = cblock->aio_offset + written;
354 		rem_buf = (void *)(cblock->aio_buf + written);
355 		record__aio_write(cblock, cblock->aio_fildes,
356 				rem_buf, rem_size, rem_off);
357 		rc = 0;
358 	}
359 
360 	return rc;
361 }
362 
363 static int record__aio_sync(struct mmap *md, bool sync_all)
364 {
365 	struct aiocb **aiocb = md->aio.aiocb;
366 	struct aiocb *cblocks = md->aio.cblocks;
367 	struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
368 	int i, do_suspend;
369 
370 	do {
371 		do_suspend = 0;
372 		for (i = 0; i < md->aio.nr_cblocks; ++i) {
373 			if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
374 				if (sync_all)
375 					aiocb[i] = NULL;
376 				else
377 					return i;
378 			} else {
379 				/*
380 				 * Started aio write is not complete yet
381 				 * so it has to be waited before the
382 				 * next allocation.
383 				 */
384 				aiocb[i] = &cblocks[i];
385 				do_suspend = 1;
386 			}
387 		}
388 		if (!do_suspend)
389 			return -1;
390 
391 		while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
392 			if (!(errno == EAGAIN || errno == EINTR))
393 				pr_err("failed to sync perf data, error: %m\n");
394 		}
395 	} while (1);
396 }
397 
398 struct record_aio {
399 	struct record	*rec;
400 	void		*data;
401 	size_t		size;
402 };
403 
404 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
405 {
406 	struct record_aio *aio = to;
407 
408 	/*
409 	 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
410 	 * to release space in the kernel buffer as fast as possible, calling
411 	 * perf_mmap__consume() from perf_mmap__push() function.
412 	 *
413 	 * That lets the kernel to proceed with storing more profiling data into
414 	 * the kernel buffer earlier than other per-cpu kernel buffers are handled.
415 	 *
416 	 * Coping can be done in two steps in case the chunk of profiling data
417 	 * crosses the upper bound of the kernel buffer. In this case we first move
418 	 * part of data from map->start till the upper bound and then the remainder
419 	 * from the beginning of the kernel buffer till the end of the data chunk.
420 	 */
421 
422 	if (record__comp_enabled(aio->rec)) {
423 		ssize_t compressed = zstd_compress(aio->rec->session, NULL, aio->data + aio->size,
424 						   mmap__mmap_len(map) - aio->size,
425 						   buf, size);
426 		if (compressed < 0)
427 			return (int)compressed;
428 
429 		size = compressed;
430 	} else {
431 		memcpy(aio->data + aio->size, buf, size);
432 	}
433 
434 	if (!aio->size) {
435 		/*
436 		 * Increment map->refcount to guard map->aio.data[] buffer
437 		 * from premature deallocation because map object can be
438 		 * released earlier than aio write request started on
439 		 * map->aio.data[] buffer is complete.
440 		 *
441 		 * perf_mmap__put() is done at record__aio_complete()
442 		 * after started aio request completion or at record__aio_push()
443 		 * if the request failed to start.
444 		 */
445 		perf_mmap__get(&map->core);
446 	}
447 
448 	aio->size += size;
449 
450 	return size;
451 }
452 
453 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
454 {
455 	int ret, idx;
456 	int trace_fd = rec->session->data->file.fd;
457 	struct record_aio aio = { .rec = rec, .size = 0 };
458 
459 	/*
460 	 * Call record__aio_sync() to wait till map->aio.data[] buffer
461 	 * becomes available after previous aio write operation.
462 	 */
463 
464 	idx = record__aio_sync(map, false);
465 	aio.data = map->aio.data[idx];
466 	ret = perf_mmap__push(map, &aio, record__aio_pushfn);
467 	if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
468 		return ret;
469 
470 	rec->samples++;
471 	ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
472 	if (!ret) {
473 		*off += aio.size;
474 		rec->bytes_written += aio.size;
475 		if (switch_output_size(rec))
476 			trigger_hit(&switch_output_trigger);
477 	} else {
478 		/*
479 		 * Decrement map->refcount incremented in record__aio_pushfn()
480 		 * back if record__aio_write() operation failed to start, otherwise
481 		 * map->refcount is decremented in record__aio_complete() after
482 		 * aio write operation finishes successfully.
483 		 */
484 		perf_mmap__put(&map->core);
485 	}
486 
487 	return ret;
488 }
489 
490 static off_t record__aio_get_pos(int trace_fd)
491 {
492 	return lseek(trace_fd, 0, SEEK_CUR);
493 }
494 
495 static void record__aio_set_pos(int trace_fd, off_t pos)
496 {
497 	lseek(trace_fd, pos, SEEK_SET);
498 }
499 
500 static void record__aio_mmap_read_sync(struct record *rec)
501 {
502 	int i;
503 	struct evlist *evlist = rec->evlist;
504 	struct mmap *maps = evlist->mmap;
505 
506 	if (!record__aio_enabled(rec))
507 		return;
508 
509 	for (i = 0; i < evlist->core.nr_mmaps; i++) {
510 		struct mmap *map = &maps[i];
511 
512 		if (map->core.base)
513 			record__aio_sync(map, true);
514 	}
515 }
516 
517 static int nr_cblocks_default = 1;
518 static int nr_cblocks_max = 4;
519 
520 static int record__aio_parse(const struct option *opt,
521 			     const char *str,
522 			     int unset)
523 {
524 	struct record_opts *opts = (struct record_opts *)opt->value;
525 
526 	if (unset) {
527 		opts->nr_cblocks = 0;
528 	} else {
529 		if (str)
530 			opts->nr_cblocks = strtol(str, NULL, 0);
531 		if (!opts->nr_cblocks)
532 			opts->nr_cblocks = nr_cblocks_default;
533 	}
534 
535 	return 0;
536 }
537 #else /* HAVE_AIO_SUPPORT */
538 static int nr_cblocks_max = 0;
539 
540 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
541 			    off_t *off __maybe_unused)
542 {
543 	return -1;
544 }
545 
546 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
547 {
548 	return -1;
549 }
550 
551 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
552 {
553 }
554 
555 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
556 {
557 }
558 #endif
559 
560 static int record__aio_enabled(struct record *rec)
561 {
562 	return rec->opts.nr_cblocks > 0;
563 }
564 
565 #define MMAP_FLUSH_DEFAULT 1
566 static int record__mmap_flush_parse(const struct option *opt,
567 				    const char *str,
568 				    int unset)
569 {
570 	int flush_max;
571 	struct record_opts *opts = (struct record_opts *)opt->value;
572 	static struct parse_tag tags[] = {
573 			{ .tag  = 'B', .mult = 1       },
574 			{ .tag  = 'K', .mult = 1 << 10 },
575 			{ .tag  = 'M', .mult = 1 << 20 },
576 			{ .tag  = 'G', .mult = 1 << 30 },
577 			{ .tag  = 0 },
578 	};
579 
580 	if (unset)
581 		return 0;
582 
583 	if (str) {
584 		opts->mmap_flush = parse_tag_value(str, tags);
585 		if (opts->mmap_flush == (int)-1)
586 			opts->mmap_flush = strtol(str, NULL, 0);
587 	}
588 
589 	if (!opts->mmap_flush)
590 		opts->mmap_flush = MMAP_FLUSH_DEFAULT;
591 
592 	flush_max = evlist__mmap_size(opts->mmap_pages);
593 	flush_max /= 4;
594 	if (opts->mmap_flush > flush_max)
595 		opts->mmap_flush = flush_max;
596 
597 	return 0;
598 }
599 
600 #ifdef HAVE_ZSTD_SUPPORT
601 static unsigned int comp_level_default = 1;
602 
603 static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
604 {
605 	struct record_opts *opts = opt->value;
606 
607 	if (unset) {
608 		opts->comp_level = 0;
609 	} else {
610 		if (str)
611 			opts->comp_level = strtol(str, NULL, 0);
612 		if (!opts->comp_level)
613 			opts->comp_level = comp_level_default;
614 	}
615 
616 	return 0;
617 }
618 #endif
619 static unsigned int comp_level_max = 22;
620 
621 static int record__comp_enabled(struct record *rec)
622 {
623 	return rec->opts.comp_level > 0;
624 }
625 
626 static int process_synthesized_event(const struct perf_tool *tool,
627 				     union perf_event *event,
628 				     struct perf_sample *sample __maybe_unused,
629 				     struct machine *machine __maybe_unused)
630 {
631 	struct record *rec = container_of(tool, struct record, tool);
632 	return record__write(rec, NULL, event, event->header.size);
633 }
634 
635 static struct mutex synth_lock;
636 
637 static int process_locked_synthesized_event(const struct perf_tool *tool,
638 				     union perf_event *event,
639 				     struct perf_sample *sample __maybe_unused,
640 				     struct machine *machine __maybe_unused)
641 {
642 	int ret;
643 
644 	mutex_lock(&synth_lock);
645 	ret = process_synthesized_event(tool, event, sample, machine);
646 	mutex_unlock(&synth_lock);
647 	return ret;
648 }
649 
650 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
651 {
652 	struct record *rec = to;
653 
654 	if (record__comp_enabled(rec)) {
655 		struct perf_record_compressed2 *event = map->data;
656 		size_t padding = 0;
657 		u8 pad[8] = {0};
658 		ssize_t compressed = zstd_compress(rec->session, map, map->data,
659 						   mmap__mmap_len(map), bf, size);
660 
661 		if (compressed < 0)
662 			return (int)compressed;
663 
664 		bf = event;
665 		thread->samples++;
666 
667 		/*
668 		 * The record from `zstd_compress` is not 8 bytes aligned, which would cause asan
669 		 * error. We make it aligned here.
670 		 */
671 		event->data_size = compressed - sizeof(struct perf_record_compressed2);
672 		event->header.size = PERF_ALIGN(compressed, sizeof(u64));
673 		padding = event->header.size - compressed;
674 		return record__write(rec, map, bf, compressed) ||
675 		       record__write(rec, map, &pad, padding);
676 	}
677 
678 	thread->samples++;
679 	return record__write(rec, map, bf, size);
680 }
681 
682 static volatile sig_atomic_t signr = -1;
683 static volatile sig_atomic_t child_finished;
684 #ifdef HAVE_EVENTFD_SUPPORT
685 static volatile sig_atomic_t done_fd = -1;
686 #endif
687 
688 static void sig_handler(int sig)
689 {
690 	if (sig == SIGCHLD)
691 		child_finished = 1;
692 	else
693 		signr = sig;
694 
695 	done = 1;
696 #ifdef HAVE_EVENTFD_SUPPORT
697 	if (done_fd >= 0) {
698 		u64 tmp = 1;
699 		int orig_errno = errno;
700 
701 		/*
702 		 * It is possible for this signal handler to run after done is
703 		 * checked in the main loop, but before the perf counter fds are
704 		 * polled. If this happens, the poll() will continue to wait
705 		 * even though done is set, and will only break out if either
706 		 * another signal is received, or the counters are ready for
707 		 * read. To ensure the poll() doesn't sleep when done is set,
708 		 * use an eventfd (done_fd) to wake up the poll().
709 		 */
710 		if (write(done_fd, &tmp, sizeof(tmp)) < 0)
711 			pr_err("failed to signal wakeup fd, error: %m\n");
712 
713 		errno = orig_errno;
714 	}
715 #endif // HAVE_EVENTFD_SUPPORT
716 }
717 
718 static void sigsegv_handler(int sig)
719 {
720 	perf_hooks__recover();
721 	sighandler_dump_stack(sig);
722 }
723 
724 static void record__sig_exit(void)
725 {
726 	if (signr == -1)
727 		return;
728 
729 	signal(signr, SIG_DFL);
730 	raise(signr);
731 }
732 
733 #ifdef HAVE_AUXTRACE_SUPPORT
734 
735 static int record__process_auxtrace(const struct perf_tool *tool,
736 				    struct mmap *map,
737 				    union perf_event *event, void *data1,
738 				    size_t len1, void *data2, size_t len2)
739 {
740 	struct record *rec = container_of(tool, struct record, tool);
741 	struct perf_data *data = &rec->data;
742 	size_t padding;
743 	u8 pad[8] = {0};
744 
745 	if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
746 		off_t file_offset;
747 		int fd = perf_data__fd(data);
748 		int err;
749 
750 		file_offset = lseek(fd, 0, SEEK_CUR);
751 		if (file_offset == -1)
752 			return -1;
753 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
754 						     event, file_offset);
755 		if (err)
756 			return err;
757 	}
758 
759 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
760 	padding = (len1 + len2) & 7;
761 	if (padding)
762 		padding = 8 - padding;
763 
764 	record__write(rec, map, event, event->header.size);
765 	record__write(rec, map, data1, len1);
766 	if (len2)
767 		record__write(rec, map, data2, len2);
768 	record__write(rec, map, &pad, padding);
769 
770 	return 0;
771 }
772 
773 static int record__auxtrace_mmap_read(struct record *rec,
774 				      struct mmap *map)
775 {
776 	int ret;
777 
778 	ret = auxtrace_mmap__read(map, rec->itr,
779 				  perf_session__env(rec->session),
780 				  &rec->tool,
781 				  record__process_auxtrace);
782 	if (ret < 0)
783 		return ret;
784 
785 	if (ret)
786 		rec->samples++;
787 
788 	return 0;
789 }
790 
791 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
792 					       struct mmap *map)
793 {
794 	int ret;
795 
796 	ret = auxtrace_mmap__read_snapshot(map, rec->itr,
797 					   perf_session__env(rec->session),
798 					   &rec->tool,
799 					   record__process_auxtrace,
800 					   rec->opts.auxtrace_snapshot_size);
801 	if (ret < 0)
802 		return ret;
803 
804 	if (ret)
805 		rec->samples++;
806 
807 	return 0;
808 }
809 
810 static int record__auxtrace_read_snapshot_all(struct record *rec)
811 {
812 	int i;
813 	int rc = 0;
814 
815 	for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
816 		struct mmap *map = &rec->evlist->mmap[i];
817 
818 		if (!map->auxtrace_mmap.base)
819 			continue;
820 
821 		if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
822 			rc = -1;
823 			goto out;
824 		}
825 	}
826 out:
827 	return rc;
828 }
829 
830 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
831 {
832 	pr_debug("Recording AUX area tracing snapshot\n");
833 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
834 		trigger_error(&auxtrace_snapshot_trigger);
835 	} else {
836 		if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
837 			trigger_error(&auxtrace_snapshot_trigger);
838 		else
839 			trigger_ready(&auxtrace_snapshot_trigger);
840 	}
841 }
842 
843 static int record__auxtrace_snapshot_exit(struct record *rec)
844 {
845 	if (trigger_is_error(&auxtrace_snapshot_trigger))
846 		return 0;
847 
848 	if (!auxtrace_record__snapshot_started &&
849 	    auxtrace_record__snapshot_start(rec->itr))
850 		return -1;
851 
852 	record__read_auxtrace_snapshot(rec, true);
853 	if (trigger_is_error(&auxtrace_snapshot_trigger))
854 		return -1;
855 
856 	return 0;
857 }
858 
859 static int record__auxtrace_init(struct record *rec)
860 {
861 	int err;
862 
863 	if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts)
864 	    && record__threads_enabled(rec)) {
865 		pr_err("AUX area tracing options are not available in parallel streaming mode.\n");
866 		return -EINVAL;
867 	}
868 
869 	if (!rec->itr) {
870 		rec->itr = auxtrace_record__init(rec->evlist, &err);
871 		if (err)
872 			return err;
873 	}
874 
875 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
876 					      rec->opts.auxtrace_snapshot_opts);
877 	if (err)
878 		return err;
879 
880 	err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
881 					    rec->opts.auxtrace_sample_opts);
882 	if (err)
883 		return err;
884 
885 	err = auxtrace_parse_aux_action(rec->evlist);
886 	if (err)
887 		return err;
888 
889 	return auxtrace_parse_filters(rec->evlist);
890 }
891 
892 #else
893 
894 static inline
895 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
896 			       struct mmap *map __maybe_unused)
897 {
898 	return 0;
899 }
900 
901 static inline
902 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
903 				    bool on_exit __maybe_unused)
904 {
905 }
906 
907 static inline
908 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
909 {
910 	return 0;
911 }
912 
913 static inline
914 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
915 {
916 	return 0;
917 }
918 
919 static int record__auxtrace_init(struct record *rec __maybe_unused)
920 {
921 	return 0;
922 }
923 
924 #endif
925 
926 static int record__config_text_poke(struct evlist *evlist)
927 {
928 	struct evsel *evsel;
929 
930 	/* Nothing to do if text poke is already configured */
931 	evlist__for_each_entry(evlist, evsel) {
932 		if (evsel->core.attr.text_poke)
933 			return 0;
934 	}
935 
936 	evsel = evlist__add_dummy_on_all_cpus(evlist);
937 	if (!evsel)
938 		return -ENOMEM;
939 
940 	evsel->core.attr.text_poke = 1;
941 	evsel->core.attr.ksymbol = 1;
942 	evsel->immediate = true;
943 	evsel__set_sample_bit(evsel, TIME);
944 
945 	return 0;
946 }
947 
948 static int record__config_off_cpu(struct record *rec)
949 {
950 	return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts);
951 }
952 
953 static bool record__tracking_system_wide(struct record *rec)
954 {
955 	struct evlist *evlist = rec->evlist;
956 	struct evsel *evsel;
957 
958 	/*
959 	 * If non-dummy evsel exists, system_wide sideband is need to
960 	 * help parse sample information.
961 	 * For example, PERF_EVENT_MMAP event to help parse symbol,
962 	 * and PERF_EVENT_COMM event to help parse task executable name.
963 	 */
964 	evlist__for_each_entry(evlist, evsel) {
965 		if (!evsel__is_dummy_event(evsel))
966 			return true;
967 	}
968 
969 	return false;
970 }
971 
972 static int record__config_tracking_events(struct record *rec)
973 {
974 	struct record_opts *opts = &rec->opts;
975 	struct evlist *evlist = rec->evlist;
976 	bool system_wide = false;
977 	struct evsel *evsel;
978 
979 	/*
980 	 * For initial_delay, system wide or a hybrid system, we need to add
981 	 * tracking event so that we can track PERF_RECORD_MMAP to cover the
982 	 * delay of waiting or event synthesis.
983 	 */
984 	if (opts->target.initial_delay || target__has_cpu(&opts->target) ||
985 	    perf_pmus__num_core_pmus() > 1) {
986 		/*
987 		 * User space tasks can migrate between CPUs, so when tracing
988 		 * selected CPUs, sideband for all CPUs is still needed.
989 		 */
990 		if (!!opts->target.cpu_list && record__tracking_system_wide(rec))
991 			system_wide = true;
992 
993 		evsel = evlist__findnew_tracking_event(evlist, system_wide);
994 		if (!evsel)
995 			return -ENOMEM;
996 
997 		/*
998 		 * Enable the tracking event when the process is forked for
999 		 * initial_delay, immediately for system wide.
1000 		 */
1001 		if (opts->target.initial_delay && !evsel->immediate &&
1002 		    !target__has_cpu(&opts->target))
1003 			evsel->core.attr.enable_on_exec = 1;
1004 		else
1005 			evsel->immediate = 1;
1006 	}
1007 
1008 	return 0;
1009 }
1010 
1011 static bool record__kcore_readable(struct machine *machine)
1012 {
1013 	char kcore[PATH_MAX];
1014 	int fd;
1015 
1016 	scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
1017 
1018 	fd = open(kcore, O_RDONLY);
1019 	if (fd < 0)
1020 		return false;
1021 
1022 	close(fd);
1023 
1024 	return true;
1025 }
1026 
1027 static int record__kcore_copy(struct machine *machine, struct perf_data *data)
1028 {
1029 	char from_dir[PATH_MAX];
1030 	char kcore_dir[PATH_MAX];
1031 	int ret;
1032 
1033 	snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
1034 
1035 	ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
1036 	if (ret)
1037 		return ret;
1038 
1039 	return kcore_copy(from_dir, kcore_dir);
1040 }
1041 
1042 static void record__thread_data_init_pipes(struct record_thread *thread_data)
1043 {
1044 	thread_data->pipes.msg[0] = -1;
1045 	thread_data->pipes.msg[1] = -1;
1046 	thread_data->pipes.ack[0] = -1;
1047 	thread_data->pipes.ack[1] = -1;
1048 }
1049 
1050 static int record__thread_data_open_pipes(struct record_thread *thread_data)
1051 {
1052 	if (pipe(thread_data->pipes.msg))
1053 		return -EINVAL;
1054 
1055 	if (pipe(thread_data->pipes.ack)) {
1056 		close(thread_data->pipes.msg[0]);
1057 		thread_data->pipes.msg[0] = -1;
1058 		close(thread_data->pipes.msg[1]);
1059 		thread_data->pipes.msg[1] = -1;
1060 		return -EINVAL;
1061 	}
1062 
1063 	pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data,
1064 		 thread_data->pipes.msg[0], thread_data->pipes.msg[1],
1065 		 thread_data->pipes.ack[0], thread_data->pipes.ack[1]);
1066 
1067 	return 0;
1068 }
1069 
1070 static void record__thread_data_close_pipes(struct record_thread *thread_data)
1071 {
1072 	if (thread_data->pipes.msg[0] != -1) {
1073 		close(thread_data->pipes.msg[0]);
1074 		thread_data->pipes.msg[0] = -1;
1075 	}
1076 	if (thread_data->pipes.msg[1] != -1) {
1077 		close(thread_data->pipes.msg[1]);
1078 		thread_data->pipes.msg[1] = -1;
1079 	}
1080 	if (thread_data->pipes.ack[0] != -1) {
1081 		close(thread_data->pipes.ack[0]);
1082 		thread_data->pipes.ack[0] = -1;
1083 	}
1084 	if (thread_data->pipes.ack[1] != -1) {
1085 		close(thread_data->pipes.ack[1]);
1086 		thread_data->pipes.ack[1] = -1;
1087 	}
1088 }
1089 
1090 static bool evlist__per_thread(struct evlist *evlist)
1091 {
1092 	return cpu_map__is_dummy(evlist->core.user_requested_cpus);
1093 }
1094 
1095 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist)
1096 {
1097 	int m, tm, nr_mmaps = evlist->core.nr_mmaps;
1098 	struct mmap *mmap = evlist->mmap;
1099 	struct mmap *overwrite_mmap = evlist->overwrite_mmap;
1100 	struct perf_cpu_map *cpus = evlist->core.all_cpus;
1101 	bool per_thread = evlist__per_thread(evlist);
1102 
1103 	if (per_thread)
1104 		thread_data->nr_mmaps = nr_mmaps;
1105 	else
1106 		thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
1107 						      thread_data->mask->maps.nbits);
1108 	if (mmap) {
1109 		thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1110 		if (!thread_data->maps)
1111 			return -ENOMEM;
1112 	}
1113 	if (overwrite_mmap) {
1114 		thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1115 		if (!thread_data->overwrite_maps) {
1116 			zfree(&thread_data->maps);
1117 			return -ENOMEM;
1118 		}
1119 	}
1120 	pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data,
1121 		 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
1122 
1123 	for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
1124 		if (per_thread ||
1125 		    test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) {
1126 			if (thread_data->maps) {
1127 				thread_data->maps[tm] = &mmap[m];
1128 				pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
1129 					  thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1130 			}
1131 			if (thread_data->overwrite_maps) {
1132 				thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
1133 				pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
1134 					  thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1135 			}
1136 			tm++;
1137 		}
1138 	}
1139 
1140 	return 0;
1141 }
1142 
1143 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist)
1144 {
1145 	int f, tm, pos;
1146 	struct mmap *map, *overwrite_map;
1147 
1148 	fdarray__init(&thread_data->pollfd, 64);
1149 
1150 	for (tm = 0; tm < thread_data->nr_mmaps; tm++) {
1151 		map = thread_data->maps ? thread_data->maps[tm] : NULL;
1152 		overwrite_map = thread_data->overwrite_maps ?
1153 				thread_data->overwrite_maps[tm] : NULL;
1154 
1155 		for (f = 0; f < evlist->core.pollfd.nr; f++) {
1156 			void *ptr = evlist->core.pollfd.priv[f].ptr;
1157 
1158 			if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) {
1159 				pos = fdarray__dup_entry_from(&thread_data->pollfd, f,
1160 							      &evlist->core.pollfd);
1161 				if (pos < 0)
1162 					return pos;
1163 				pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n",
1164 					 thread_data, pos, evlist->core.pollfd.entries[f].fd);
1165 			}
1166 		}
1167 	}
1168 
1169 	return 0;
1170 }
1171 
1172 static void record__free_thread_data(struct record *rec)
1173 {
1174 	int t;
1175 	struct record_thread *thread_data = rec->thread_data;
1176 
1177 	if (thread_data == NULL)
1178 		return;
1179 
1180 	for (t = 0; t < rec->nr_threads; t++) {
1181 		record__thread_data_close_pipes(&thread_data[t]);
1182 		zfree(&thread_data[t].maps);
1183 		zfree(&thread_data[t].overwrite_maps);
1184 		fdarray__exit(&thread_data[t].pollfd);
1185 	}
1186 
1187 	zfree(&rec->thread_data);
1188 }
1189 
1190 static int record__map_thread_evlist_pollfd_indexes(struct record *rec,
1191 						    int evlist_pollfd_index,
1192 						    int thread_pollfd_index)
1193 {
1194 	size_t x = rec->index_map_cnt;
1195 
1196 	if (realloc_array_as_needed(rec->index_map, rec->index_map_sz, x, NULL))
1197 		return -ENOMEM;
1198 	rec->index_map[x].evlist_pollfd_index = evlist_pollfd_index;
1199 	rec->index_map[x].thread_pollfd_index = thread_pollfd_index;
1200 	rec->index_map_cnt += 1;
1201 	return 0;
1202 }
1203 
1204 static int record__update_evlist_pollfd_from_thread(struct record *rec,
1205 						    struct evlist *evlist,
1206 						    struct record_thread *thread_data)
1207 {
1208 	struct pollfd *e_entries = evlist->core.pollfd.entries;
1209 	struct pollfd *t_entries = thread_data->pollfd.entries;
1210 	int err = 0;
1211 	size_t i;
1212 
1213 	for (i = 0; i < rec->index_map_cnt; i++) {
1214 		int e_pos = rec->index_map[i].evlist_pollfd_index;
1215 		int t_pos = rec->index_map[i].thread_pollfd_index;
1216 
1217 		if (e_entries[e_pos].fd != t_entries[t_pos].fd ||
1218 		    e_entries[e_pos].events != t_entries[t_pos].events) {
1219 			pr_err("Thread and evlist pollfd index mismatch\n");
1220 			err = -EINVAL;
1221 			continue;
1222 		}
1223 		e_entries[e_pos].revents = t_entries[t_pos].revents;
1224 	}
1225 	return err;
1226 }
1227 
1228 static int record__dup_non_perf_events(struct record *rec,
1229 				       struct evlist *evlist,
1230 				       struct record_thread *thread_data)
1231 {
1232 	struct fdarray *fda = &evlist->core.pollfd;
1233 	int i, ret;
1234 
1235 	for (i = 0; i < fda->nr; i++) {
1236 		if (!(fda->priv[i].flags & fdarray_flag__non_perf_event))
1237 			continue;
1238 		ret = fdarray__dup_entry_from(&thread_data->pollfd, i, fda);
1239 		if (ret < 0) {
1240 			pr_err("Failed to duplicate descriptor in main thread pollfd\n");
1241 			return ret;
1242 		}
1243 		pr_debug2("thread_data[%p]: pollfd[%d] <- non_perf_event fd=%d\n",
1244 			  thread_data, ret, fda->entries[i].fd);
1245 		ret = record__map_thread_evlist_pollfd_indexes(rec, i, ret);
1246 		if (ret < 0) {
1247 			pr_err("Failed to map thread and evlist pollfd indexes\n");
1248 			return ret;
1249 		}
1250 	}
1251 	return 0;
1252 }
1253 
1254 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist)
1255 {
1256 	int t, ret;
1257 	struct record_thread *thread_data;
1258 
1259 	rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data)));
1260 	if (!rec->thread_data) {
1261 		pr_err("Failed to allocate thread data\n");
1262 		return -ENOMEM;
1263 	}
1264 	thread_data = rec->thread_data;
1265 
1266 	for (t = 0; t < rec->nr_threads; t++)
1267 		record__thread_data_init_pipes(&thread_data[t]);
1268 
1269 	for (t = 0; t < rec->nr_threads; t++) {
1270 		thread_data[t].rec = rec;
1271 		thread_data[t].mask = &rec->thread_masks[t];
1272 		ret = record__thread_data_init_maps(&thread_data[t], evlist);
1273 		if (ret) {
1274 			pr_err("Failed to initialize thread[%d] maps\n", t);
1275 			goto out_free;
1276 		}
1277 		ret = record__thread_data_init_pollfd(&thread_data[t], evlist);
1278 		if (ret) {
1279 			pr_err("Failed to initialize thread[%d] pollfd\n", t);
1280 			goto out_free;
1281 		}
1282 		if (t) {
1283 			thread_data[t].tid = -1;
1284 			ret = record__thread_data_open_pipes(&thread_data[t]);
1285 			if (ret) {
1286 				pr_err("Failed to open thread[%d] communication pipes\n", t);
1287 				goto out_free;
1288 			}
1289 			ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0],
1290 					   POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable);
1291 			if (ret < 0) {
1292 				pr_err("Failed to add descriptor to thread[%d] pollfd\n", t);
1293 				goto out_free;
1294 			}
1295 			thread_data[t].ctlfd_pos = ret;
1296 			pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1297 				 thread_data, thread_data[t].ctlfd_pos,
1298 				 thread_data[t].pipes.msg[0]);
1299 		} else {
1300 			thread_data[t].tid = gettid();
1301 
1302 			ret = record__dup_non_perf_events(rec, evlist, &thread_data[t]);
1303 			if (ret < 0)
1304 				goto out_free;
1305 
1306 			thread_data[t].ctlfd_pos = -1; /* Not used */
1307 		}
1308 	}
1309 
1310 	return 0;
1311 
1312 out_free:
1313 	record__free_thread_data(rec);
1314 
1315 	return ret;
1316 }
1317 
1318 static int record__mmap_evlist(struct record *rec,
1319 			       struct evlist *evlist)
1320 {
1321 	int i, ret;
1322 	struct record_opts *opts = &rec->opts;
1323 	bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
1324 				  opts->auxtrace_sample_mode;
1325 	char msg[512];
1326 
1327 	if (opts->affinity != PERF_AFFINITY_SYS)
1328 		cpu__setup_cpunode_map();
1329 
1330 	if (evlist__mmap_ex(evlist, opts->mmap_pages,
1331 				 opts->auxtrace_mmap_pages,
1332 				 auxtrace_overwrite,
1333 				 opts->nr_cblocks, opts->affinity,
1334 				 opts->mmap_flush, opts->comp_level) < 0) {
1335 		if (errno == EPERM) {
1336 			pr_err("Permission error mapping pages.\n"
1337 			       "Consider increasing "
1338 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
1339 			       "or try again with a smaller value of -m/--mmap_pages.\n"
1340 			       "(current value: %u,%u)\n",
1341 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
1342 			return -errno;
1343 		} else {
1344 			pr_err("failed to mmap with %d (%s)\n", errno,
1345 				str_error_r(errno, msg, sizeof(msg)));
1346 			if (errno)
1347 				return -errno;
1348 			else
1349 				return -EINVAL;
1350 		}
1351 	}
1352 
1353 	if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack))
1354 		return -1;
1355 
1356 	ret = record__alloc_thread_data(rec, evlist);
1357 	if (ret)
1358 		return ret;
1359 
1360 	if (record__threads_enabled(rec)) {
1361 		ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps);
1362 		if (ret) {
1363 			pr_err("Failed to create data directory: %s\n", strerror(-ret));
1364 			return ret;
1365 		}
1366 		for (i = 0; i < evlist->core.nr_mmaps; i++) {
1367 			if (evlist->mmap)
1368 				evlist->mmap[i].file = &rec->data.dir.files[i];
1369 			if (evlist->overwrite_mmap)
1370 				evlist->overwrite_mmap[i].file = &rec->data.dir.files[i];
1371 		}
1372 	}
1373 
1374 	return 0;
1375 }
1376 
1377 static int record__mmap(struct record *rec)
1378 {
1379 	return record__mmap_evlist(rec, rec->evlist);
1380 }
1381 
1382 static int record__open(struct record *rec)
1383 {
1384 	char msg[BUFSIZ];
1385 	struct evsel *pos;
1386 	struct evlist *evlist = rec->evlist;
1387 	struct perf_session *session = rec->session;
1388 	struct record_opts *opts = &rec->opts;
1389 	int rc = 0;
1390 	bool skipped = false;
1391 	bool removed_tracking = false;
1392 
1393 	evlist__for_each_entry(evlist, pos) {
1394 		if (removed_tracking) {
1395 			/*
1396 			 * Normally the head of the list has tracking enabled
1397 			 * for sideband data like mmaps. If this event is
1398 			 * removed, make sure to add tracking to the next
1399 			 * processed event.
1400 			 */
1401 			if (!pos->tracking) {
1402 				pos->tracking = true;
1403 				evsel__config(pos, opts, &callchain_param);
1404 			}
1405 			removed_tracking = false;
1406 		}
1407 try_again:
1408 		if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
1409 			bool report_error = true;
1410 
1411 			if (evsel__fallback(pos, &opts->target, errno, msg, sizeof(msg))) {
1412 				if (verbose > 0)
1413 					ui__warning("%s\n", msg);
1414 				goto try_again;
1415 			}
1416 			if ((errno == EINVAL || errno == EBADF) &&
1417 			    pos->core.leader != &pos->core &&
1418 			    pos->weak_group) {
1419 			        pos = evlist__reset_weak_group(evlist, pos, true);
1420 				goto try_again;
1421 			}
1422 #if defined(__aarch64__) || defined(__arm__)
1423 			if (strstr(evsel__name(pos), "cycles")) {
1424 				struct evsel *pos2;
1425 				/*
1426 				 * Unfortunately ARM has many events named
1427 				 * "cycles" on PMUs like the system-level (L3)
1428 				 * cache which don't support sampling. Only
1429 				 * display such failures to open when there is
1430 				 * only 1 cycles event or verbose is enabled.
1431 				 */
1432 				evlist__for_each_entry(evlist, pos2) {
1433 					if (pos2 == pos)
1434 						continue;
1435 					if (strstr(evsel__name(pos2), "cycles")) {
1436 						report_error = false;
1437 						break;
1438 					}
1439 				}
1440 			}
1441 #endif
1442 			if (report_error || verbose > 0) {
1443 				ui__error("Failure to open event '%s' on PMU '%s' which will be "
1444 					  "removed.\n%s\n",
1445 					  evsel__name(pos), evsel__pmu_name(pos), msg);
1446 			}
1447 			if (pos->tracking)
1448 				removed_tracking = true;
1449 			pos->skippable = true;
1450 			skipped = true;
1451 		}
1452 	}
1453 
1454 	if (skipped) {
1455 		struct evsel *tmp;
1456 		int idx = 0;
1457 		bool evlist_empty = true;
1458 
1459 		/* Remove evsels that failed to open and update indices. */
1460 		evlist__for_each_entry_safe(evlist, tmp, pos) {
1461 			if (pos->skippable) {
1462 				evlist__remove(evlist, pos);
1463 				continue;
1464 			}
1465 
1466 			/*
1467 			 * Note, dummy events may be command line parsed or
1468 			 * added by the tool. We care about supporting `perf
1469 			 * record -e dummy` which may be used as a permission
1470 			 * check. Dummy events that are added to the command
1471 			 * line and opened along with other events that fail,
1472 			 * will still fail as if the dummy events were tool
1473 			 * added events for the sake of code simplicity.
1474 			 */
1475 			if (!evsel__is_dummy_event(pos))
1476 				evlist_empty = false;
1477 		}
1478 		evlist__for_each_entry(evlist, pos) {
1479 			pos->core.idx = idx++;
1480 		}
1481 		/* If list is empty then fail. */
1482 		if (evlist_empty) {
1483 			ui__error("Failure to open any events for recording.\n");
1484 			rc = -1;
1485 			goto out;
1486 		}
1487 	}
1488 	if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {
1489 		pr_warning(
1490 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1491 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1492 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1493 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1494 "Samples in kernel modules won't be resolved at all.\n\n"
1495 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1496 "even with a suitable vmlinux or kallsyms file.\n\n");
1497 	}
1498 
1499 	if (evlist__apply_filters(evlist, &pos, &opts->target)) {
1500 		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
1501 			pos->filter ?: "BPF", evsel__name(pos), errno,
1502 			str_error_r(errno, msg, sizeof(msg)));
1503 		rc = -1;
1504 		goto out;
1505 	}
1506 
1507 	rc = record__mmap(rec);
1508 	if (rc)
1509 		goto out;
1510 
1511 	session->evlist = evlist;
1512 	perf_session__set_id_hdr_size(session);
1513 out:
1514 	return rc;
1515 }
1516 
1517 static void set_timestamp_boundary(struct record *rec, u64 sample_time)
1518 {
1519 	if (rec->evlist->first_sample_time == 0)
1520 		rec->evlist->first_sample_time = sample_time;
1521 
1522 	if (sample_time)
1523 		rec->evlist->last_sample_time = sample_time;
1524 }
1525 
1526 static int process_sample_event(const struct perf_tool *tool,
1527 				union perf_event *event,
1528 				struct perf_sample *sample,
1529 				struct evsel *evsel,
1530 				struct machine *machine)
1531 {
1532 	struct record *rec = container_of(tool, struct record, tool);
1533 
1534 	set_timestamp_boundary(rec, sample->time);
1535 
1536 	if (rec->buildid_all)
1537 		return 0;
1538 
1539 	rec->samples++;
1540 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
1541 }
1542 
1543 static int process_buildids(struct record *rec)
1544 {
1545 	struct perf_session *session = rec->session;
1546 
1547 	if (perf_data__size(&rec->data) == 0)
1548 		return 0;
1549 
1550 	/*
1551 	 * During this process, it'll load kernel map and replace the
1552 	 * dso->long_name to a real pathname it found.  In this case
1553 	 * we prefer the vmlinux path like
1554 	 *   /lib/modules/3.16.4/build/vmlinux
1555 	 *
1556 	 * rather than build-id path (in debug directory).
1557 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
1558 	 */
1559 	symbol_conf.ignore_vmlinux_buildid = true;
1560 
1561 	/*
1562 	 * If --buildid-all is given, it marks all DSO regardless of hits,
1563 	 * so no need to process samples. But if timestamp_boundary is enabled,
1564 	 * it still needs to walk on all samples to get the timestamps of
1565 	 * first/last samples.
1566 	 */
1567 	if (rec->buildid_all && !rec->timestamp_boundary)
1568 		rec->tool.sample = process_event_sample_stub;
1569 
1570 	return perf_session__process_events(session);
1571 }
1572 
1573 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
1574 {
1575 	int err;
1576 	struct perf_tool *tool = data;
1577 	/*
1578 	 *As for guest kernel when processing subcommand record&report,
1579 	 *we arrange module mmap prior to guest kernel mmap and trigger
1580 	 *a preload dso because default guest module symbols are loaded
1581 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
1582 	 *method is used to avoid symbol missing when the first addr is
1583 	 *in module instead of in guest kernel.
1584 	 */
1585 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
1586 					     machine);
1587 	if (err < 0)
1588 		pr_err("Couldn't record guest kernel [%d]'s reference"
1589 		       " relocation symbol.\n", machine->pid);
1590 
1591 	/*
1592 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
1593 	 * have no _text sometimes.
1594 	 */
1595 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1596 						 machine);
1597 	if (err < 0)
1598 		pr_err("Couldn't record guest kernel [%d]'s reference"
1599 		       " relocation symbol.\n", machine->pid);
1600 }
1601 
1602 static struct perf_event_header finished_round_event = {
1603 	.size = sizeof(struct perf_event_header),
1604 	.type = PERF_RECORD_FINISHED_ROUND,
1605 };
1606 
1607 static struct perf_event_header finished_init_event = {
1608 	.size = sizeof(struct perf_event_header),
1609 	.type = PERF_RECORD_FINISHED_INIT,
1610 };
1611 
1612 static void record__adjust_affinity(struct record *rec, struct mmap *map)
1613 {
1614 	if (rec->opts.affinity != PERF_AFFINITY_SYS &&
1615 	    !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits,
1616 			  thread->mask->affinity.nbits)) {
1617 		bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits);
1618 		bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits,
1619 			  map->affinity_mask.bits, thread->mask->affinity.nbits);
1620 		sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
1621 					(cpu_set_t *)thread->mask->affinity.bits);
1622 		if (verbose == 2) {
1623 			pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu());
1624 			mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity");
1625 		}
1626 	}
1627 }
1628 
1629 static size_t process_comp_header(void *record, size_t increment)
1630 {
1631 	struct perf_record_compressed2 *event = record;
1632 	size_t size = sizeof(*event);
1633 
1634 	if (increment) {
1635 		event->header.size += increment;
1636 		return increment;
1637 	}
1638 
1639 	event->header.type = PERF_RECORD_COMPRESSED2;
1640 	event->header.size = size;
1641 
1642 	return size;
1643 }
1644 
1645 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map,
1646 			    void *dst, size_t dst_size, void *src, size_t src_size)
1647 {
1648 	ssize_t compressed;
1649 	size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed2) - 1;
1650 	struct zstd_data *zstd_data = &session->zstd_data;
1651 
1652 	if (map && map->file)
1653 		zstd_data = &map->zstd_data;
1654 
1655 	compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size,
1656 						     max_record_size, process_comp_header);
1657 	if (compressed < 0)
1658 		return compressed;
1659 
1660 	if (map && map->file) {
1661 		thread->bytes_transferred += src_size;
1662 		thread->bytes_compressed  += compressed;
1663 	} else {
1664 		session->bytes_transferred += src_size;
1665 		session->bytes_compressed  += compressed;
1666 	}
1667 
1668 	return compressed;
1669 }
1670 
1671 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1672 				    bool overwrite, bool synch)
1673 {
1674 	u64 bytes_written = rec->bytes_written;
1675 	int i;
1676 	int rc = 0;
1677 	int nr_mmaps;
1678 	struct mmap **maps;
1679 	int trace_fd = rec->data.file.fd;
1680 	off_t off = 0;
1681 
1682 	if (!evlist)
1683 		return 0;
1684 
1685 	nr_mmaps = thread->nr_mmaps;
1686 	maps = overwrite ? thread->overwrite_maps : thread->maps;
1687 
1688 	if (!maps)
1689 		return 0;
1690 
1691 	if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1692 		return 0;
1693 
1694 	if (record__aio_enabled(rec))
1695 		off = record__aio_get_pos(trace_fd);
1696 
1697 	for (i = 0; i < nr_mmaps; i++) {
1698 		u64 flush = 0;
1699 		struct mmap *map = maps[i];
1700 
1701 		if (map->core.base) {
1702 			record__adjust_affinity(rec, map);
1703 			if (synch) {
1704 				flush = map->core.flush;
1705 				map->core.flush = 1;
1706 			}
1707 			if (!record__aio_enabled(rec)) {
1708 				if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1709 					if (synch)
1710 						map->core.flush = flush;
1711 					rc = -1;
1712 					goto out;
1713 				}
1714 			} else {
1715 				if (record__aio_push(rec, map, &off) < 0) {
1716 					record__aio_set_pos(trace_fd, off);
1717 					if (synch)
1718 						map->core.flush = flush;
1719 					rc = -1;
1720 					goto out;
1721 				}
1722 			}
1723 			if (synch)
1724 				map->core.flush = flush;
1725 		}
1726 
1727 		if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1728 		    !rec->opts.auxtrace_sample_mode &&
1729 		    record__auxtrace_mmap_read(rec, map) != 0) {
1730 			rc = -1;
1731 			goto out;
1732 		}
1733 	}
1734 
1735 	if (record__aio_enabled(rec))
1736 		record__aio_set_pos(trace_fd, off);
1737 
1738 	/*
1739 	 * Mark the round finished in case we wrote
1740 	 * at least one event.
1741 	 *
1742 	 * No need for round events in directory mode,
1743 	 * because per-cpu maps and files have data
1744 	 * sorted by kernel.
1745 	 */
1746 	if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written)
1747 		rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1748 
1749 	if (overwrite)
1750 		evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1751 out:
1752 	return rc;
1753 }
1754 
1755 static int record__mmap_read_all(struct record *rec, bool synch)
1756 {
1757 	int err;
1758 
1759 	err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1760 	if (err)
1761 		return err;
1762 
1763 	return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1764 }
1765 
1766 static void record__thread_munmap_filtered(struct fdarray *fda, int fd,
1767 					   void *arg __maybe_unused)
1768 {
1769 	struct perf_mmap *map = fda->priv[fd].ptr;
1770 
1771 	if (map)
1772 		perf_mmap__put(map);
1773 }
1774 
1775 static void *record__thread(void *arg)
1776 {
1777 	enum thread_msg msg = THREAD_MSG__READY;
1778 	bool terminate = false;
1779 	struct fdarray *pollfd;
1780 	int err, ctlfd_pos;
1781 
1782 	thread = arg;
1783 	thread->tid = gettid();
1784 
1785 	err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1786 	if (err == -1)
1787 		pr_warning("threads[%d]: failed to notify on start: %s\n",
1788 			   thread->tid, strerror(errno));
1789 
1790 	pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
1791 
1792 	pollfd = &thread->pollfd;
1793 	ctlfd_pos = thread->ctlfd_pos;
1794 
1795 	for (;;) {
1796 		unsigned long long hits = thread->samples;
1797 
1798 		if (record__mmap_read_all(thread->rec, false) < 0 || terminate)
1799 			break;
1800 
1801 		if (hits == thread->samples) {
1802 
1803 			err = fdarray__poll(pollfd, -1);
1804 			/*
1805 			 * Propagate error, only if there's any. Ignore positive
1806 			 * number of returned events and interrupt error.
1807 			 */
1808 			if (err > 0 || (err < 0 && errno == EINTR))
1809 				err = 0;
1810 			thread->waking++;
1811 
1812 			if (fdarray__filter(pollfd, POLLERR | POLLHUP,
1813 					    record__thread_munmap_filtered, NULL) == 0)
1814 				break;
1815 		}
1816 
1817 		if (pollfd->entries[ctlfd_pos].revents & POLLHUP) {
1818 			terminate = true;
1819 			close(thread->pipes.msg[0]);
1820 			thread->pipes.msg[0] = -1;
1821 			pollfd->entries[ctlfd_pos].fd = -1;
1822 			pollfd->entries[ctlfd_pos].events = 0;
1823 		}
1824 
1825 		pollfd->entries[ctlfd_pos].revents = 0;
1826 	}
1827 	record__mmap_read_all(thread->rec, true);
1828 
1829 	err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1830 	if (err == -1)
1831 		pr_warning("threads[%d]: failed to notify on termination: %s\n",
1832 			   thread->tid, strerror(errno));
1833 
1834 	return NULL;
1835 }
1836 
1837 static void record__init_features(struct record *rec)
1838 {
1839 	struct perf_session *session = rec->session;
1840 	int feat;
1841 
1842 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1843 		perf_header__set_feat(&session->header, feat);
1844 
1845 	if (rec->no_buildid)
1846 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1847 
1848 	if (!have_tracepoints(&rec->evlist->core.entries))
1849 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1850 
1851 	if (!rec->opts.branch_stack)
1852 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1853 
1854 	if (!rec->opts.full_auxtrace)
1855 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1856 
1857 	if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1858 		perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1859 
1860 	if (!rec->opts.use_clockid)
1861 		perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA);
1862 
1863 	if (!record__threads_enabled(rec))
1864 		perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1865 
1866 	if (!record__comp_enabled(rec))
1867 		perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1868 
1869 	perf_header__clear_feat(&session->header, HEADER_STAT);
1870 }
1871 
1872 static void
1873 record__finish_output(struct record *rec)
1874 {
1875 	int i;
1876 	struct perf_data *data = &rec->data;
1877 	int fd = perf_data__fd(data);
1878 
1879 	if (data->is_pipe) {
1880 		/* Just to display approx. size */
1881 		data->file.size = rec->bytes_written;
1882 		return;
1883 	}
1884 
1885 	rec->session->header.data_size += rec->bytes_written;
1886 	data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1887 	if (record__threads_enabled(rec)) {
1888 		for (i = 0; i < data->dir.nr; i++)
1889 			data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR);
1890 	}
1891 
1892 	/* Buildid scanning disabled or build ID in kernel and synthesized map events. */
1893 	if (!rec->no_buildid || !rec->no_buildid_cache) {
1894 		process_buildids(rec);
1895 
1896 		if (rec->buildid_all)
1897 			perf_session__dsos_hit_all(rec->session);
1898 	}
1899 	perf_session__write_header(rec->session, rec->evlist, fd, true);
1900 	perf_session__cache_build_ids(rec->session);
1901 }
1902 
1903 static int record__synthesize_workload(struct record *rec, bool tail)
1904 {
1905 	int err;
1906 	struct perf_thread_map *thread_map;
1907 	bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1908 
1909 	if (rec->opts.tail_synthesize != tail)
1910 		return 0;
1911 
1912 	thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1913 	if (thread_map == NULL)
1914 		return -1;
1915 
1916 	err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1917 						 process_synthesized_event,
1918 						 &rec->session->machines.host,
1919 						 needs_mmap,
1920 						 rec->opts.sample_address);
1921 	perf_thread_map__put(thread_map);
1922 	return err;
1923 }
1924 
1925 static int write_finished_init(struct record *rec, bool tail)
1926 {
1927 	if (rec->opts.tail_synthesize != tail)
1928 		return 0;
1929 
1930 	return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event));
1931 }
1932 
1933 static int record__synthesize(struct record *rec, bool tail);
1934 
1935 static int
1936 record__switch_output(struct record *rec, bool at_exit)
1937 {
1938 	struct perf_data *data = &rec->data;
1939 	char *new_filename = NULL;
1940 	int fd, err;
1941 
1942 	/* Same Size:      "2015122520103046"*/
1943 	char timestamp[] = "InvalidTimestamp";
1944 
1945 	record__aio_mmap_read_sync(rec);
1946 
1947 	write_finished_init(rec, true);
1948 
1949 	record__synthesize(rec, true);
1950 	if (target__none(&rec->opts.target))
1951 		record__synthesize_workload(rec, true);
1952 
1953 	rec->samples = 0;
1954 	record__finish_output(rec);
1955 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1956 	if (err) {
1957 		pr_err("Failed to get current timestamp\n");
1958 		return -EINVAL;
1959 	}
1960 
1961 	fd = perf_data__switch(data, timestamp,
1962 			       rec->session->header.data_offset,
1963 			       at_exit, &new_filename);
1964 	if (fd >= 0 && !at_exit) {
1965 		rec->bytes_written = 0;
1966 		rec->session->header.data_size = 0;
1967 	}
1968 
1969 	if (!quiet) {
1970 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1971 			data->path, timestamp);
1972 	}
1973 
1974 	if (rec->switch_output.num_files) {
1975 		int n = rec->switch_output.cur_file + 1;
1976 
1977 		if (n >= rec->switch_output.num_files)
1978 			n = 0;
1979 		rec->switch_output.cur_file = n;
1980 		if (rec->switch_output.filenames[n]) {
1981 			remove(rec->switch_output.filenames[n]);
1982 			zfree(&rec->switch_output.filenames[n]);
1983 		}
1984 		rec->switch_output.filenames[n] = new_filename;
1985 	} else {
1986 		free(new_filename);
1987 	}
1988 
1989 	/* Output tracking events */
1990 	if (!at_exit) {
1991 		record__synthesize(rec, false);
1992 
1993 		/*
1994 		 * In 'perf record --switch-output' without -a,
1995 		 * record__synthesize() in record__switch_output() won't
1996 		 * generate tracking events because there's no thread_map
1997 		 * in evlist. Which causes newly created perf.data doesn't
1998 		 * contain map and comm information.
1999 		 * Create a fake thread_map and directly call
2000 		 * perf_event__synthesize_thread_map() for those events.
2001 		 */
2002 		if (target__none(&rec->opts.target))
2003 			record__synthesize_workload(rec, false);
2004 		write_finished_init(rec, false);
2005 	}
2006 	return fd;
2007 }
2008 
2009 static void __record__save_lost_samples(struct record *rec, struct evsel *evsel,
2010 					struct perf_record_lost_samples *lost,
2011 					int cpu_idx, int thread_idx, u64 lost_count,
2012 					u16 misc_flag)
2013 {
2014 	struct perf_sample_id *sid;
2015 	struct perf_sample sample;
2016 	int id_hdr_size;
2017 
2018 	perf_sample__init(&sample, /*all=*/true);
2019 	lost->lost = lost_count;
2020 	if (evsel->core.ids) {
2021 		sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx);
2022 		sample.id = sid->id;
2023 	}
2024 
2025 	id_hdr_size = perf_event__synthesize_id_sample((void *)(lost + 1),
2026 						       evsel->core.attr.sample_type, &sample);
2027 	lost->header.size = sizeof(*lost) + id_hdr_size;
2028 	lost->header.misc = misc_flag;
2029 	record__write(rec, NULL, lost, lost->header.size);
2030 	perf_sample__exit(&sample);
2031 }
2032 
2033 static void record__read_lost_samples(struct record *rec)
2034 {
2035 	struct perf_session *session = rec->session;
2036 	struct perf_record_lost_samples_and_ids lost;
2037 	struct evsel *evsel;
2038 
2039 	/* there was an error during record__open */
2040 	if (session->evlist == NULL)
2041 		return;
2042 
2043 	evlist__for_each_entry(session->evlist, evsel) {
2044 		struct xyarray *xy = evsel->core.sample_id;
2045 		u64 lost_count;
2046 
2047 		if (xy == NULL || evsel->core.fd == NULL)
2048 			continue;
2049 		if (xyarray__max_x(evsel->core.fd) != xyarray__max_x(xy) ||
2050 		    xyarray__max_y(evsel->core.fd) != xyarray__max_y(xy)) {
2051 			pr_debug("Unmatched FD vs. sample ID: skip reading LOST count\n");
2052 			continue;
2053 		}
2054 
2055 		for (int x = 0; x < xyarray__max_x(xy); x++) {
2056 			for (int y = 0; y < xyarray__max_y(xy); y++) {
2057 				struct perf_counts_values count;
2058 
2059 				if (perf_evsel__read(&evsel->core, x, y, &count) < 0) {
2060 					pr_debug("read LOST count failed\n");
2061 					return;
2062 				}
2063 
2064 				if (count.lost) {
2065 					memset(&lost, 0, sizeof(lost));
2066 					lost.lost.header.type = PERF_RECORD_LOST_SAMPLES;
2067 					__record__save_lost_samples(rec, evsel, &lost.lost,
2068 								    x, y, count.lost, 0);
2069 				}
2070 			}
2071 		}
2072 
2073 		lost_count = perf_bpf_filter__lost_count(evsel);
2074 		if (lost_count) {
2075 			memset(&lost, 0, sizeof(lost));
2076 			lost.lost.header.type = PERF_RECORD_LOST_SAMPLES;
2077 			__record__save_lost_samples(rec, evsel, &lost.lost, 0, 0, lost_count,
2078 						    PERF_RECORD_MISC_LOST_SAMPLES_BPF);
2079 		}
2080 	}
2081 }
2082 
2083 static volatile sig_atomic_t workload_exec_errno;
2084 
2085 /*
2086  * evlist__prepare_workload will send a SIGUSR1
2087  * if the fork fails, since we asked by setting its
2088  * want_signal to true.
2089  */
2090 static void workload_exec_failed_signal(int signo __maybe_unused,
2091 					siginfo_t *info,
2092 					void *ucontext __maybe_unused)
2093 {
2094 	workload_exec_errno = info->si_value.sival_int;
2095 	done = 1;
2096 	child_finished = 1;
2097 }
2098 
2099 static void snapshot_sig_handler(int sig);
2100 static void alarm_sig_handler(int sig);
2101 
2102 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist)
2103 {
2104 	if (evlist) {
2105 		if (evlist->mmap && evlist->mmap[0].core.base)
2106 			return evlist->mmap[0].core.base;
2107 		if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
2108 			return evlist->overwrite_mmap[0].core.base;
2109 	}
2110 	return NULL;
2111 }
2112 
2113 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
2114 {
2115 	const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist);
2116 	if (pc)
2117 		return pc;
2118 	return NULL;
2119 }
2120 
2121 static int record__synthesize(struct record *rec, bool tail)
2122 {
2123 	struct perf_session *session = rec->session;
2124 	struct machine *machine = &session->machines.host;
2125 	struct perf_data *data = &rec->data;
2126 	struct record_opts *opts = &rec->opts;
2127 	struct perf_tool *tool = &rec->tool;
2128 	int err = 0;
2129 	event_op f = process_synthesized_event;
2130 
2131 	if (rec->opts.tail_synthesize != tail)
2132 		return 0;
2133 
2134 	if (data->is_pipe) {
2135 		err = perf_event__synthesize_for_pipe(tool, session, data,
2136 						      process_synthesized_event);
2137 		if (err < 0)
2138 			goto out;
2139 
2140 		rec->bytes_written += err;
2141 	}
2142 
2143 	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
2144 					  process_synthesized_event, machine);
2145 	if (err)
2146 		goto out;
2147 
2148 	/* Synthesize id_index before auxtrace_info */
2149 	err = perf_event__synthesize_id_index(tool,
2150 					      process_synthesized_event,
2151 					      session->evlist, machine);
2152 	if (err)
2153 		goto out;
2154 
2155 	if (rec->opts.full_auxtrace) {
2156 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
2157 					session, process_synthesized_event);
2158 		if (err)
2159 			goto out;
2160 	}
2161 
2162 	if (!evlist__exclude_kernel(rec->evlist)) {
2163 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
2164 							 machine);
2165 		WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
2166 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
2167 				   "Check /proc/kallsyms permission or run as root.\n");
2168 
2169 		err = perf_event__synthesize_modules(tool, process_synthesized_event,
2170 						     machine);
2171 		WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
2172 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
2173 				   "Check /proc/modules permission or run as root.\n");
2174 	}
2175 
2176 	if (perf_guest) {
2177 		machines__process_guests(&session->machines,
2178 					 perf_event__synthesize_guest_os, tool);
2179 	}
2180 
2181 	err = perf_event__synthesize_extra_attr(&rec->tool,
2182 						rec->evlist,
2183 						process_synthesized_event,
2184 						data->is_pipe);
2185 	if (err)
2186 		goto out;
2187 
2188 	err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
2189 						 process_synthesized_event,
2190 						NULL);
2191 	if (err < 0) {
2192 		pr_err("Couldn't synthesize thread map.\n");
2193 		return err;
2194 	}
2195 
2196 	err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus,
2197 					     process_synthesized_event, NULL);
2198 	if (err < 0) {
2199 		pr_err("Couldn't synthesize cpu map.\n");
2200 		return err;
2201 	}
2202 
2203 	err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
2204 						machine, opts);
2205 	if (err < 0) {
2206 		pr_warning("Couldn't synthesize bpf events.\n");
2207 		err = 0;
2208 	}
2209 
2210 	if (rec->opts.synth & PERF_SYNTH_CGROUP) {
2211 		err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
2212 						     machine);
2213 		if (err < 0) {
2214 			pr_warning("Couldn't synthesize cgroup events.\n");
2215 			err = 0;
2216 		}
2217 	}
2218 
2219 	if (rec->opts.nr_threads_synthesize > 1) {
2220 		mutex_init(&synth_lock);
2221 		perf_set_multithreaded();
2222 		f = process_locked_synthesized_event;
2223 	}
2224 
2225 	if (rec->opts.synth & PERF_SYNTH_TASK) {
2226 		bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
2227 
2228 		err = __machine__synthesize_threads(machine, tool, &opts->target,
2229 						    rec->evlist->core.threads,
2230 						    f, needs_mmap, opts->sample_address,
2231 						    rec->opts.nr_threads_synthesize);
2232 	}
2233 
2234 	if (rec->opts.nr_threads_synthesize > 1) {
2235 		perf_set_singlethreaded();
2236 		mutex_destroy(&synth_lock);
2237 	}
2238 
2239 out:
2240 	return err;
2241 }
2242 
2243 static void record__synthesize_final_bpf_metadata(struct record *rec __maybe_unused)
2244 {
2245 #ifdef HAVE_LIBBPF_SUPPORT
2246 	perf_event__synthesize_final_bpf_metadata(rec->session,
2247 						  process_synthesized_event);
2248 #endif
2249 }
2250 
2251 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
2252 {
2253 	struct record *rec = data;
2254 	pthread_kill(rec->thread_id, SIGUSR2);
2255 	return 0;
2256 }
2257 
2258 static int record__setup_sb_evlist(struct record *rec)
2259 {
2260 	struct record_opts *opts = &rec->opts;
2261 
2262 	if (rec->sb_evlist != NULL) {
2263 		/*
2264 		 * We get here if --switch-output-event populated the
2265 		 * sb_evlist, so associate a callback that will send a SIGUSR2
2266 		 * to the main thread.
2267 		 */
2268 		evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
2269 		rec->thread_id = pthread_self();
2270 	}
2271 #ifdef HAVE_LIBBPF_SUPPORT
2272 	if (!opts->no_bpf_event) {
2273 		if (rec->sb_evlist == NULL) {
2274 			rec->sb_evlist = evlist__new();
2275 
2276 			if (rec->sb_evlist == NULL) {
2277 				pr_err("Couldn't create side band evlist.\n.");
2278 				return -1;
2279 			}
2280 		}
2281 
2282 		if (evlist__add_bpf_sb_event(rec->sb_evlist, perf_session__env(rec->session))) {
2283 			pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
2284 			return -1;
2285 		}
2286 	}
2287 #endif
2288 	if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
2289 		pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
2290 		opts->no_bpf_event = true;
2291 	}
2292 
2293 	return 0;
2294 }
2295 
2296 static int record__init_clock(struct record *rec)
2297 {
2298 	struct perf_session *session = rec->session;
2299 	struct timespec ref_clockid;
2300 	struct timeval ref_tod;
2301 	struct perf_env *env = perf_session__env(session);
2302 	u64 ref;
2303 
2304 	if (!rec->opts.use_clockid)
2305 		return 0;
2306 
2307 	if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
2308 		env->clock.clockid_res_ns = rec->opts.clockid_res_ns;
2309 
2310 	env->clock.clockid = rec->opts.clockid;
2311 
2312 	if (gettimeofday(&ref_tod, NULL) != 0) {
2313 		pr_err("gettimeofday failed, cannot set reference time.\n");
2314 		return -1;
2315 	}
2316 
2317 	if (clock_gettime(rec->opts.clockid, &ref_clockid)) {
2318 		pr_err("clock_gettime failed, cannot set reference time.\n");
2319 		return -1;
2320 	}
2321 
2322 	ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC +
2323 	      (u64) ref_tod.tv_usec * NSEC_PER_USEC;
2324 
2325 	env->clock.tod_ns = ref;
2326 
2327 	ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC +
2328 	      (u64) ref_clockid.tv_nsec;
2329 
2330 	env->clock.clockid_ns = ref;
2331 	return 0;
2332 }
2333 
2334 static void hit_auxtrace_snapshot_trigger(struct record *rec)
2335 {
2336 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2337 		trigger_hit(&auxtrace_snapshot_trigger);
2338 		auxtrace_record__snapshot_started = 1;
2339 		if (auxtrace_record__snapshot_start(rec->itr))
2340 			trigger_error(&auxtrace_snapshot_trigger);
2341 	}
2342 }
2343 
2344 static int record__terminate_thread(struct record_thread *thread_data)
2345 {
2346 	int err;
2347 	enum thread_msg ack = THREAD_MSG__UNDEFINED;
2348 	pid_t tid = thread_data->tid;
2349 
2350 	close(thread_data->pipes.msg[1]);
2351 	thread_data->pipes.msg[1] = -1;
2352 	err = read(thread_data->pipes.ack[0], &ack, sizeof(ack));
2353 	if (err > 0)
2354 		pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]);
2355 	else
2356 		pr_warning("threads[%d]: failed to receive termination notification from %d\n",
2357 			   thread->tid, tid);
2358 
2359 	return 0;
2360 }
2361 
2362 static int record__start_threads(struct record *rec)
2363 {
2364 	int t, tt, err, ret = 0, nr_threads = rec->nr_threads;
2365 	struct record_thread *thread_data = rec->thread_data;
2366 	sigset_t full, mask;
2367 	pthread_t handle;
2368 	pthread_attr_t attrs;
2369 
2370 	thread = &thread_data[0];
2371 
2372 	if (!record__threads_enabled(rec))
2373 		return 0;
2374 
2375 	sigfillset(&full);
2376 	if (sigprocmask(SIG_SETMASK, &full, &mask)) {
2377 		pr_err("Failed to block signals on threads start: %s\n", strerror(errno));
2378 		return -1;
2379 	}
2380 
2381 	pthread_attr_init(&attrs);
2382 	pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
2383 
2384 	for (t = 1; t < nr_threads; t++) {
2385 		enum thread_msg msg = THREAD_MSG__UNDEFINED;
2386 
2387 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
2388 		pthread_attr_setaffinity_np(&attrs,
2389 					    MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)),
2390 					    (cpu_set_t *)(thread_data[t].mask->affinity.bits));
2391 #endif
2392 		if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) {
2393 			for (tt = 1; tt < t; tt++)
2394 				record__terminate_thread(&thread_data[t]);
2395 			pr_err("Failed to start threads: %s\n", strerror(errno));
2396 			ret = -1;
2397 			goto out_err;
2398 		}
2399 
2400 		err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg));
2401 		if (err > 0)
2402 			pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid,
2403 				  thread_msg_tags[msg]);
2404 		else
2405 			pr_warning("threads[%d]: failed to receive start notification from %d\n",
2406 				   thread->tid, rec->thread_data[t].tid);
2407 	}
2408 
2409 	sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
2410 			(cpu_set_t *)thread->mask->affinity.bits);
2411 
2412 	pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
2413 
2414 out_err:
2415 	pthread_attr_destroy(&attrs);
2416 
2417 	if (sigprocmask(SIG_SETMASK, &mask, NULL)) {
2418 		pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno));
2419 		ret = -1;
2420 	}
2421 
2422 	return ret;
2423 }
2424 
2425 static int record__stop_threads(struct record *rec)
2426 {
2427 	int t;
2428 	struct record_thread *thread_data = rec->thread_data;
2429 
2430 	for (t = 1; t < rec->nr_threads; t++)
2431 		record__terminate_thread(&thread_data[t]);
2432 
2433 	for (t = 0; t < rec->nr_threads; t++) {
2434 		rec->samples += thread_data[t].samples;
2435 		if (!record__threads_enabled(rec))
2436 			continue;
2437 		rec->session->bytes_transferred += thread_data[t].bytes_transferred;
2438 		rec->session->bytes_compressed += thread_data[t].bytes_compressed;
2439 		pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid,
2440 			 thread_data[t].samples, thread_data[t].waking);
2441 		if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed)
2442 			pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n",
2443 				 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed);
2444 		else
2445 			pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written);
2446 	}
2447 
2448 	return 0;
2449 }
2450 
2451 static unsigned long record__waking(struct record *rec)
2452 {
2453 	int t;
2454 	unsigned long waking = 0;
2455 	struct record_thread *thread_data = rec->thread_data;
2456 
2457 	for (t = 0; t < rec->nr_threads; t++)
2458 		waking += thread_data[t].waking;
2459 
2460 	return waking;
2461 }
2462 
2463 static int __cmd_record(struct record *rec, int argc, const char **argv)
2464 {
2465 	int err;
2466 	int status = 0;
2467 	const bool forks = argc > 0;
2468 	struct perf_tool *tool = &rec->tool;
2469 	struct record_opts *opts = &rec->opts;
2470 	struct perf_data *data = &rec->data;
2471 	struct perf_session *session;
2472 	bool disabled = false, draining = false;
2473 	int fd;
2474 	float ratio = 0;
2475 	enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
2476 	struct perf_env *env;
2477 
2478 	atexit(record__sig_exit);
2479 	signal(SIGCHLD, sig_handler);
2480 	signal(SIGINT, sig_handler);
2481 	signal(SIGTERM, sig_handler);
2482 	signal(SIGSEGV, sigsegv_handler);
2483 
2484 	if (rec->opts.record_cgroup) {
2485 #ifndef HAVE_FILE_HANDLE
2486 		pr_err("cgroup tracking is not supported\n");
2487 		return -1;
2488 #endif
2489 	}
2490 
2491 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
2492 		signal(SIGUSR2, snapshot_sig_handler);
2493 		if (rec->opts.auxtrace_snapshot_mode)
2494 			trigger_on(&auxtrace_snapshot_trigger);
2495 		if (rec->switch_output.enabled)
2496 			trigger_on(&switch_output_trigger);
2497 	} else {
2498 		signal(SIGUSR2, SIG_IGN);
2499 	}
2500 
2501 	perf_tool__init(tool, /*ordered_events=*/true);
2502 	tool->sample		= process_sample_event;
2503 	tool->fork		= perf_event__process_fork;
2504 	tool->exit		= perf_event__process_exit;
2505 	tool->comm		= perf_event__process_comm;
2506 	tool->namespaces	= perf_event__process_namespaces;
2507 	tool->mmap		= build_id__process_mmap;
2508 	tool->mmap2		= build_id__process_mmap2;
2509 	tool->itrace_start	= process_timestamp_boundary;
2510 	tool->aux		= process_timestamp_boundary;
2511 	tool->namespace_events	= rec->opts.record_namespaces;
2512 	tool->cgroup_events	= rec->opts.record_cgroup;
2513 	session = perf_session__new(data, tool);
2514 	if (IS_ERR(session)) {
2515 		pr_err("Perf session creation failed.\n");
2516 		return PTR_ERR(session);
2517 	}
2518 	env = perf_session__env(session);
2519 	if (record__threads_enabled(rec)) {
2520 		if (perf_data__is_pipe(&rec->data)) {
2521 			pr_err("Parallel trace streaming is not available in pipe mode.\n");
2522 			return -1;
2523 		}
2524 		if (rec->opts.full_auxtrace) {
2525 			pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n");
2526 			return -1;
2527 		}
2528 	}
2529 
2530 	fd = perf_data__fd(data);
2531 	rec->session = session;
2532 
2533 	if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
2534 		pr_err("Compression initialization failed.\n");
2535 		return -1;
2536 	}
2537 #ifdef HAVE_EVENTFD_SUPPORT
2538 	done_fd = eventfd(0, EFD_NONBLOCK);
2539 	if (done_fd < 0) {
2540 		pr_err("Failed to create wakeup eventfd, error: %m\n");
2541 		status = -1;
2542 		goto out_delete_session;
2543 	}
2544 	err = evlist__add_wakeup_eventfd(rec->evlist, done_fd);
2545 	if (err < 0) {
2546 		pr_err("Failed to add wakeup eventfd to poll list\n");
2547 		status = err;
2548 		goto out_delete_session;
2549 	}
2550 #endif // HAVE_EVENTFD_SUPPORT
2551 
2552 	env->comp_type  = PERF_COMP_ZSTD;
2553 	env->comp_level = rec->opts.comp_level;
2554 
2555 	if (rec->opts.kcore &&
2556 	    !record__kcore_readable(&session->machines.host)) {
2557 		pr_err("ERROR: kcore is not readable.\n");
2558 		return -1;
2559 	}
2560 
2561 	if (record__init_clock(rec))
2562 		return -1;
2563 
2564 	record__init_features(rec);
2565 
2566 	if (forks) {
2567 		err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe,
2568 					       workload_exec_failed_signal);
2569 		if (err < 0) {
2570 			pr_err("Couldn't run the workload!\n");
2571 			status = err;
2572 			goto out_delete_session;
2573 		}
2574 	}
2575 
2576 	/*
2577 	 * If we have just single event and are sending data
2578 	 * through pipe, we need to force the ids allocation,
2579 	 * because we synthesize event name through the pipe
2580 	 * and need the id for that.
2581 	 */
2582 	if (data->is_pipe && rec->evlist->core.nr_entries == 1)
2583 		rec->opts.sample_id = true;
2584 
2585 	if (rec->timestamp_filename && perf_data__is_pipe(data)) {
2586 		rec->timestamp_filename = false;
2587 		pr_warning("WARNING: --timestamp-filename option is not available in pipe mode.\n");
2588 	}
2589 
2590 	/*
2591 	 * Use global stat_config that is zero meaning aggr_mode is AGGR_NONE
2592 	 * and hybrid_merge is false.
2593 	 */
2594 	evlist__uniquify_evsel_names(rec->evlist, &stat_config);
2595 
2596 	evlist__config(rec->evlist, opts, &callchain_param);
2597 
2598 	/* Debug message used by test scripts */
2599 	pr_debug3("perf record opening and mmapping events\n");
2600 	if (record__open(rec) != 0) {
2601 		err = -1;
2602 		goto out_free_threads;
2603 	}
2604 	/* Debug message used by test scripts */
2605 	pr_debug3("perf record done opening and mmapping events\n");
2606 	env->comp_mmap_len = session->evlist->core.mmap_len;
2607 
2608 	if (rec->opts.kcore) {
2609 		err = record__kcore_copy(&session->machines.host, data);
2610 		if (err) {
2611 			pr_err("ERROR: Failed to copy kcore\n");
2612 			goto out_free_threads;
2613 		}
2614 	}
2615 
2616 	/*
2617 	 * Normally perf_session__new would do this, but it doesn't have the
2618 	 * evlist.
2619 	 */
2620 	if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) {
2621 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
2622 		rec->tool.ordered_events = false;
2623 	}
2624 
2625 	if (evlist__nr_groups(rec->evlist) == 0)
2626 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
2627 
2628 	if (data->is_pipe) {
2629 		err = perf_header__write_pipe(fd);
2630 		if (err < 0)
2631 			goto out_free_threads;
2632 	} else {
2633 		err = perf_session__write_header(session, rec->evlist, fd, false);
2634 		if (err < 0)
2635 			goto out_free_threads;
2636 	}
2637 
2638 	err = -1;
2639 	if (!rec->no_buildid
2640 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
2641 		pr_err("Couldn't generate buildids. "
2642 		       "Use --no-buildid to profile anyway.\n");
2643 		goto out_free_threads;
2644 	}
2645 
2646 	if (!evlist__needs_bpf_sb_event(rec->evlist))
2647 		opts->no_bpf_event = true;
2648 
2649 	err = record__setup_sb_evlist(rec);
2650 	if (err)
2651 		goto out_free_threads;
2652 
2653 	err = record__synthesize(rec, false);
2654 	if (err < 0)
2655 		goto out_free_threads;
2656 
2657 	if (rec->realtime_prio) {
2658 		struct sched_param param;
2659 
2660 		param.sched_priority = rec->realtime_prio;
2661 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
2662 			pr_err("Could not set realtime priority.\n");
2663 			err = -1;
2664 			goto out_free_threads;
2665 		}
2666 	}
2667 
2668 	if (record__start_threads(rec))
2669 		goto out_free_threads;
2670 
2671 	/*
2672 	 * When perf is starting the traced process, all the events
2673 	 * (apart from group members) have enable_on_exec=1 set,
2674 	 * so don't spoil it by prematurely enabling them.
2675 	 */
2676 	if (!target__none(&opts->target) && !opts->target.initial_delay)
2677 		evlist__enable(rec->evlist);
2678 
2679 	/*
2680 	 * offcpu-time does not call execve, so enable_on_exe wouldn't work
2681 	 * when recording a workload, do it manually
2682 	 */
2683 	if (rec->off_cpu)
2684 		evlist__enable_evsel(rec->evlist, (char *)OFFCPU_EVENT);
2685 
2686 	/*
2687 	 * Let the child rip
2688 	 */
2689 	if (forks) {
2690 		struct machine *machine = &session->machines.host;
2691 		union perf_event *event;
2692 		pid_t tgid;
2693 
2694 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
2695 		if (event == NULL) {
2696 			err = -ENOMEM;
2697 			goto out_child;
2698 		}
2699 
2700 		/*
2701 		 * Some H/W events are generated before COMM event
2702 		 * which is emitted during exec(), so perf script
2703 		 * cannot see a correct process name for those events.
2704 		 * Synthesize COMM event to prevent it.
2705 		 */
2706 		tgid = perf_event__synthesize_comm(tool, event,
2707 						   rec->evlist->workload.pid,
2708 						   process_synthesized_event,
2709 						   machine);
2710 		free(event);
2711 
2712 		if (tgid == -1)
2713 			goto out_child;
2714 
2715 		event = malloc(sizeof(event->namespaces) +
2716 			       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
2717 			       machine->id_hdr_size);
2718 		if (event == NULL) {
2719 			err = -ENOMEM;
2720 			goto out_child;
2721 		}
2722 
2723 		/*
2724 		 * Synthesize NAMESPACES event for the command specified.
2725 		 */
2726 		perf_event__synthesize_namespaces(tool, event,
2727 						  rec->evlist->workload.pid,
2728 						  tgid, process_synthesized_event,
2729 						  machine);
2730 		free(event);
2731 
2732 		evlist__start_workload(rec->evlist);
2733 	}
2734 
2735 	if (opts->target.initial_delay) {
2736 		pr_info(EVLIST_DISABLED_MSG);
2737 		if (opts->target.initial_delay > 0) {
2738 			usleep(opts->target.initial_delay * USEC_PER_MSEC);
2739 			evlist__enable(rec->evlist);
2740 			pr_info(EVLIST_ENABLED_MSG);
2741 		}
2742 	}
2743 
2744 	err = event_enable_timer__start(rec->evlist->eet);
2745 	if (err)
2746 		goto out_child;
2747 
2748 	/* Debug message used by test scripts */
2749 	pr_debug3("perf record has started\n");
2750 	fflush(stderr);
2751 
2752 	trigger_ready(&auxtrace_snapshot_trigger);
2753 	trigger_ready(&switch_output_trigger);
2754 	perf_hooks__invoke_record_start();
2755 
2756 	/*
2757 	 * Must write FINISHED_INIT so it will be seen after all other
2758 	 * synthesized user events, but before any regular events.
2759 	 */
2760 	err = write_finished_init(rec, false);
2761 	if (err < 0)
2762 		goto out_child;
2763 
2764 	for (;;) {
2765 		unsigned long long hits = thread->samples;
2766 
2767 		/*
2768 		 * rec->evlist->bkw_mmap_state is possible to be
2769 		 * BKW_MMAP_EMPTY here: when done == true and
2770 		 * hits != rec->samples in previous round.
2771 		 *
2772 		 * evlist__toggle_bkw_mmap ensure we never
2773 		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
2774 		 */
2775 		if (trigger_is_hit(&switch_output_trigger) || done || draining)
2776 			evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
2777 
2778 		if (record__mmap_read_all(rec, false) < 0) {
2779 			trigger_error(&auxtrace_snapshot_trigger);
2780 			trigger_error(&switch_output_trigger);
2781 			err = -1;
2782 			goto out_child;
2783 		}
2784 
2785 		if (auxtrace_record__snapshot_started) {
2786 			auxtrace_record__snapshot_started = 0;
2787 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
2788 				record__read_auxtrace_snapshot(rec, false);
2789 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
2790 				pr_err("AUX area tracing snapshot failed\n");
2791 				err = -1;
2792 				goto out_child;
2793 			}
2794 		}
2795 
2796 		if (trigger_is_hit(&switch_output_trigger)) {
2797 			/*
2798 			 * If switch_output_trigger is hit, the data in
2799 			 * overwritable ring buffer should have been collected,
2800 			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
2801 			 *
2802 			 * If SIGUSR2 raise after or during record__mmap_read_all(),
2803 			 * record__mmap_read_all() didn't collect data from
2804 			 * overwritable ring buffer. Read again.
2805 			 */
2806 			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
2807 				continue;
2808 			trigger_ready(&switch_output_trigger);
2809 
2810 			/*
2811 			 * Reenable events in overwrite ring buffer after
2812 			 * record__mmap_read_all(): we should have collected
2813 			 * data from it.
2814 			 */
2815 			evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
2816 
2817 			if (!quiet)
2818 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
2819 					record__waking(rec));
2820 			thread->waking = 0;
2821 			fd = record__switch_output(rec, false);
2822 			if (fd < 0) {
2823 				pr_err("Failed to switch to new file\n");
2824 				trigger_error(&switch_output_trigger);
2825 				err = fd;
2826 				goto out_child;
2827 			}
2828 
2829 			/* re-arm the alarm */
2830 			if (rec->switch_output.time)
2831 				alarm(rec->switch_output.time);
2832 		}
2833 
2834 		if (hits == thread->samples) {
2835 			if (done || draining)
2836 				break;
2837 			err = fdarray__poll(&thread->pollfd, -1);
2838 			/*
2839 			 * Propagate error, only if there's any. Ignore positive
2840 			 * number of returned events and interrupt error.
2841 			 */
2842 			if (err > 0 || (err < 0 && errno == EINTR))
2843 				err = 0;
2844 			thread->waking++;
2845 
2846 			if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP,
2847 					    record__thread_munmap_filtered, NULL) == 0)
2848 				draining = true;
2849 
2850 			err = record__update_evlist_pollfd_from_thread(rec, rec->evlist, thread);
2851 			if (err)
2852 				goto out_child;
2853 		}
2854 
2855 		if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) {
2856 			switch (cmd) {
2857 			case EVLIST_CTL_CMD_SNAPSHOT:
2858 				hit_auxtrace_snapshot_trigger(rec);
2859 				evlist__ctlfd_ack(rec->evlist);
2860 				break;
2861 			case EVLIST_CTL_CMD_STOP:
2862 				done = 1;
2863 				break;
2864 			case EVLIST_CTL_CMD_ACK:
2865 			case EVLIST_CTL_CMD_UNSUPPORTED:
2866 			case EVLIST_CTL_CMD_ENABLE:
2867 			case EVLIST_CTL_CMD_DISABLE:
2868 			case EVLIST_CTL_CMD_EVLIST:
2869 			case EVLIST_CTL_CMD_PING:
2870 			default:
2871 				break;
2872 			}
2873 		}
2874 
2875 		err = event_enable_timer__process(rec->evlist->eet);
2876 		if (err < 0)
2877 			goto out_child;
2878 		if (err) {
2879 			err = 0;
2880 			done = 1;
2881 		}
2882 
2883 		/*
2884 		 * When perf is starting the traced process, at the end events
2885 		 * die with the process and we wait for that. Thus no need to
2886 		 * disable events in this case.
2887 		 */
2888 		if (done && !disabled && !target__none(&opts->target)) {
2889 			trigger_off(&auxtrace_snapshot_trigger);
2890 			evlist__disable(rec->evlist);
2891 			disabled = true;
2892 		}
2893 	}
2894 
2895 	trigger_off(&auxtrace_snapshot_trigger);
2896 	trigger_off(&switch_output_trigger);
2897 
2898 	record__synthesize_final_bpf_metadata(rec);
2899 
2900 	if (opts->auxtrace_snapshot_on_exit)
2901 		record__auxtrace_snapshot_exit(rec);
2902 
2903 	if (forks && workload_exec_errno) {
2904 		char msg[STRERR_BUFSIZE];
2905 		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
2906 		struct strbuf sb = STRBUF_INIT;
2907 
2908 		evlist__format_evsels(rec->evlist, &sb, 2048);
2909 
2910 		pr_err("Failed to collect '%s' for the '%s' workload: %s\n",
2911 			sb.buf, argv[0], emsg);
2912 		strbuf_release(&sb);
2913 		err = -1;
2914 		goto out_child;
2915 	}
2916 
2917 	if (!quiet)
2918 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n",
2919 			record__waking(rec));
2920 
2921 	write_finished_init(rec, true);
2922 
2923 	if (target__none(&rec->opts.target))
2924 		record__synthesize_workload(rec, true);
2925 
2926 out_child:
2927 	record__stop_threads(rec);
2928 	record__mmap_read_all(rec, true);
2929 out_free_threads:
2930 	record__free_thread_data(rec);
2931 	evlist__finalize_ctlfd(rec->evlist);
2932 	record__aio_mmap_read_sync(rec);
2933 
2934 	if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
2935 		ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
2936 		env->comp_ratio = ratio + 0.5;
2937 	}
2938 
2939 	if (forks) {
2940 		int exit_status;
2941 
2942 		if (!child_finished)
2943 			kill(rec->evlist->workload.pid, SIGTERM);
2944 
2945 		wait(&exit_status);
2946 
2947 		if (err < 0)
2948 			status = err;
2949 		else if (WIFEXITED(exit_status))
2950 			status = WEXITSTATUS(exit_status);
2951 		else if (WIFSIGNALED(exit_status))
2952 			signr = WTERMSIG(exit_status);
2953 	} else
2954 		status = err;
2955 
2956 	if (rec->off_cpu)
2957 		rec->bytes_written += off_cpu_write(rec->session);
2958 
2959 	record__read_lost_samples(rec);
2960 	/* this will be recalculated during process_buildids() */
2961 	rec->samples = 0;
2962 
2963 	if (!err) {
2964 		record__synthesize(rec, true);
2965 		if (!rec->timestamp_filename) {
2966 			record__finish_output(rec);
2967 		} else {
2968 			fd = record__switch_output(rec, true);
2969 			if (fd < 0) {
2970 				status = fd;
2971 				goto out_delete_session;
2972 			}
2973 		}
2974 	}
2975 
2976 	perf_hooks__invoke_record_end();
2977 
2978 	if (!err && !quiet) {
2979 		char samples[128];
2980 		const char *postfix = rec->timestamp_filename ?
2981 					".<timestamp>" : "";
2982 
2983 		if (rec->samples && !rec->opts.full_auxtrace)
2984 			scnprintf(samples, sizeof(samples),
2985 				  " (%" PRIu64 " samples)", rec->samples);
2986 		else
2987 			samples[0] = '\0';
2988 
2989 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s",
2990 			perf_data__size(data) / 1024.0 / 1024.0,
2991 			data->path, postfix, samples);
2992 		if (ratio) {
2993 			fprintf(stderr,	", compressed (original %.3f MB, ratio is %.3f)",
2994 					rec->session->bytes_transferred / 1024.0 / 1024.0,
2995 					ratio);
2996 		}
2997 		fprintf(stderr, " ]\n");
2998 	}
2999 
3000 out_delete_session:
3001 #ifdef HAVE_EVENTFD_SUPPORT
3002 	if (done_fd >= 0) {
3003 		fd = done_fd;
3004 		done_fd = -1;
3005 
3006 		close(fd);
3007 	}
3008 #endif
3009 	zstd_fini(&session->zstd_data);
3010 	if (!opts->no_bpf_event)
3011 		evlist__stop_sb_thread(rec->sb_evlist);
3012 
3013 	perf_session__delete(session);
3014 	return status;
3015 }
3016 
3017 static void callchain_debug(struct callchain_param *callchain)
3018 {
3019 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
3020 
3021 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
3022 
3023 	if (callchain->record_mode == CALLCHAIN_DWARF)
3024 		pr_debug("callchain: stack dump size %d\n",
3025 			 callchain->dump_size);
3026 }
3027 
3028 int record_opts__parse_callchain(struct record_opts *record,
3029 				 struct callchain_param *callchain,
3030 				 const char *arg, bool unset)
3031 {
3032 	int ret;
3033 	callchain->enabled = !unset;
3034 
3035 	/* --no-call-graph */
3036 	if (unset) {
3037 		callchain->record_mode = CALLCHAIN_NONE;
3038 		pr_debug("callchain: disabled\n");
3039 		return 0;
3040 	}
3041 
3042 	ret = parse_callchain_record_opt(arg, callchain);
3043 	if (!ret) {
3044 		/* Enable data address sampling for DWARF unwind. */
3045 		if (callchain->record_mode == CALLCHAIN_DWARF)
3046 			record->sample_address = true;
3047 		callchain_debug(callchain);
3048 	}
3049 
3050 	return ret;
3051 }
3052 
3053 int record_parse_callchain_opt(const struct option *opt,
3054 			       const char *arg,
3055 			       int unset)
3056 {
3057 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
3058 }
3059 
3060 int record_callchain_opt(const struct option *opt,
3061 			 const char *arg __maybe_unused,
3062 			 int unset __maybe_unused)
3063 {
3064 	struct callchain_param *callchain = opt->value;
3065 
3066 	callchain->enabled = true;
3067 
3068 	if (callchain->record_mode == CALLCHAIN_NONE)
3069 		callchain->record_mode = CALLCHAIN_FP;
3070 
3071 	callchain_debug(callchain);
3072 	return 0;
3073 }
3074 
3075 static int perf_record_config(const char *var, const char *value, void *cb)
3076 {
3077 	struct record *rec = cb;
3078 
3079 	if (!strcmp(var, "record.build-id")) {
3080 		if (!strcmp(value, "cache"))
3081 			rec->no_buildid_cache = false;
3082 		else if (!strcmp(value, "no-cache"))
3083 			rec->no_buildid_cache = true;
3084 		else if (!strcmp(value, "skip"))
3085 			rec->no_buildid = rec->no_buildid_cache = true;
3086 		else if (!strcmp(value, "mmap"))
3087 			rec->buildid_mmap = true;
3088 		else if (!strcmp(value, "no-mmap"))
3089 			rec->buildid_mmap = false;
3090 		else
3091 			return -1;
3092 		return 0;
3093 	}
3094 	if (!strcmp(var, "record.call-graph")) {
3095 		var = "call-graph.record-mode";
3096 		return perf_default_config(var, value, cb);
3097 	}
3098 #ifdef HAVE_AIO_SUPPORT
3099 	if (!strcmp(var, "record.aio")) {
3100 		rec->opts.nr_cblocks = strtol(value, NULL, 0);
3101 		if (!rec->opts.nr_cblocks)
3102 			rec->opts.nr_cblocks = nr_cblocks_default;
3103 	}
3104 #endif
3105 	if (!strcmp(var, "record.debuginfod")) {
3106 		rec->debuginfod.urls = strdup(value);
3107 		if (!rec->debuginfod.urls)
3108 			return -ENOMEM;
3109 		rec->debuginfod.set = true;
3110 	}
3111 
3112 	return 0;
3113 }
3114 
3115 static int record__parse_event_enable_time(const struct option *opt, const char *str, int unset)
3116 {
3117 	struct record *rec = (struct record *)opt->value;
3118 
3119 	return evlist__parse_event_enable_time(rec->evlist, &rec->opts, str, unset);
3120 }
3121 
3122 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
3123 {
3124 	struct record_opts *opts = (struct record_opts *)opt->value;
3125 
3126 	if (unset || !str)
3127 		return 0;
3128 
3129 	if (!strcasecmp(str, "node"))
3130 		opts->affinity = PERF_AFFINITY_NODE;
3131 	else if (!strcasecmp(str, "cpu"))
3132 		opts->affinity = PERF_AFFINITY_CPU;
3133 
3134 	return 0;
3135 }
3136 
3137 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits)
3138 {
3139 	mask->nbits = nr_bits;
3140 	mask->bits = bitmap_zalloc(mask->nbits);
3141 	if (!mask->bits)
3142 		return -ENOMEM;
3143 
3144 	return 0;
3145 }
3146 
3147 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask)
3148 {
3149 	bitmap_free(mask->bits);
3150 	mask->nbits = 0;
3151 }
3152 
3153 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits)
3154 {
3155 	int ret;
3156 
3157 	ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits);
3158 	if (ret) {
3159 		mask->affinity.bits = NULL;
3160 		return ret;
3161 	}
3162 
3163 	ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits);
3164 	if (ret) {
3165 		record__mmap_cpu_mask_free(&mask->maps);
3166 		mask->maps.bits = NULL;
3167 	}
3168 
3169 	return ret;
3170 }
3171 
3172 static void record__thread_mask_free(struct thread_mask *mask)
3173 {
3174 	record__mmap_cpu_mask_free(&mask->maps);
3175 	record__mmap_cpu_mask_free(&mask->affinity);
3176 }
3177 
3178 static int record__parse_threads(const struct option *opt, const char *str, int unset)
3179 {
3180 	int s;
3181 	struct record_opts *opts = opt->value;
3182 
3183 	if (unset || !str || !strlen(str)) {
3184 		opts->threads_spec = THREAD_SPEC__CPU;
3185 	} else {
3186 		for (s = 1; s < THREAD_SPEC__MAX; s++) {
3187 			if (s == THREAD_SPEC__USER) {
3188 				opts->threads_user_spec = strdup(str);
3189 				if (!opts->threads_user_spec)
3190 					return -ENOMEM;
3191 				opts->threads_spec = THREAD_SPEC__USER;
3192 				break;
3193 			}
3194 			if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) {
3195 				opts->threads_spec = s;
3196 				break;
3197 			}
3198 		}
3199 	}
3200 
3201 	if (opts->threads_spec == THREAD_SPEC__USER)
3202 		pr_debug("threads_spec: %s\n", opts->threads_user_spec);
3203 	else
3204 		pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]);
3205 
3206 	return 0;
3207 }
3208 
3209 static int parse_output_max_size(const struct option *opt,
3210 				 const char *str, int unset)
3211 {
3212 	unsigned long *s = (unsigned long *)opt->value;
3213 	static struct parse_tag tags_size[] = {
3214 		{ .tag  = 'B', .mult = 1       },
3215 		{ .tag  = 'K', .mult = 1 << 10 },
3216 		{ .tag  = 'M', .mult = 1 << 20 },
3217 		{ .tag  = 'G', .mult = 1 << 30 },
3218 		{ .tag  = 0 },
3219 	};
3220 	unsigned long val;
3221 
3222 	if (unset) {
3223 		*s = 0;
3224 		return 0;
3225 	}
3226 
3227 	val = parse_tag_value(str, tags_size);
3228 	if (val != (unsigned long) -1) {
3229 		*s = val;
3230 		return 0;
3231 	}
3232 
3233 	return -1;
3234 }
3235 
3236 static int record__parse_mmap_pages(const struct option *opt,
3237 				    const char *str,
3238 				    int unset __maybe_unused)
3239 {
3240 	struct record_opts *opts = opt->value;
3241 	char *s, *p;
3242 	unsigned int mmap_pages;
3243 	int ret;
3244 
3245 	if (!str)
3246 		return -EINVAL;
3247 
3248 	s = strdup(str);
3249 	if (!s)
3250 		return -ENOMEM;
3251 
3252 	p = strchr(s, ',');
3253 	if (p)
3254 		*p = '\0';
3255 
3256 	if (*s) {
3257 		ret = __evlist__parse_mmap_pages(&mmap_pages, s);
3258 		if (ret)
3259 			goto out_free;
3260 		opts->mmap_pages = mmap_pages;
3261 	}
3262 
3263 	if (!p) {
3264 		ret = 0;
3265 		goto out_free;
3266 	}
3267 
3268 	ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1);
3269 	if (ret)
3270 		goto out_free;
3271 
3272 	opts->auxtrace_mmap_pages = mmap_pages;
3273 
3274 out_free:
3275 	free(s);
3276 	return ret;
3277 }
3278 
3279 static int record__parse_off_cpu_thresh(const struct option *opt,
3280 					const char *str,
3281 					int unset __maybe_unused)
3282 {
3283 	struct record_opts *opts = opt->value;
3284 	char *endptr;
3285 	u64 off_cpu_thresh_ms;
3286 
3287 	if (!str)
3288 		return -EINVAL;
3289 
3290 	off_cpu_thresh_ms = strtoull(str, &endptr, 10);
3291 
3292 	/* the threshold isn't string "0", yet strtoull() returns 0, parsing failed */
3293 	if (*endptr || (off_cpu_thresh_ms == 0 && strcmp(str, "0")))
3294 		return -EINVAL;
3295 	else
3296 		opts->off_cpu_thresh_ns = off_cpu_thresh_ms * NSEC_PER_MSEC;
3297 
3298 	return 0;
3299 }
3300 
3301 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
3302 {
3303 }
3304 
3305 static int parse_control_option(const struct option *opt,
3306 				const char *str,
3307 				int unset __maybe_unused)
3308 {
3309 	struct record_opts *opts = opt->value;
3310 
3311 	return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close);
3312 }
3313 
3314 static void switch_output_size_warn(struct record *rec)
3315 {
3316 	u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
3317 	struct switch_output *s = &rec->switch_output;
3318 
3319 	wakeup_size /= 2;
3320 
3321 	if (s->size < wakeup_size) {
3322 		char buf[100];
3323 
3324 		unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
3325 		pr_warning("WARNING: switch-output data size lower than "
3326 			   "wakeup kernel buffer size (%s) "
3327 			   "expect bigger perf.data sizes\n", buf);
3328 	}
3329 }
3330 
3331 static int switch_output_setup(struct record *rec)
3332 {
3333 	struct switch_output *s = &rec->switch_output;
3334 	static struct parse_tag tags_size[] = {
3335 		{ .tag  = 'B', .mult = 1       },
3336 		{ .tag  = 'K', .mult = 1 << 10 },
3337 		{ .tag  = 'M', .mult = 1 << 20 },
3338 		{ .tag  = 'G', .mult = 1 << 30 },
3339 		{ .tag  = 0 },
3340 	};
3341 	static struct parse_tag tags_time[] = {
3342 		{ .tag  = 's', .mult = 1        },
3343 		{ .tag  = 'm', .mult = 60       },
3344 		{ .tag  = 'h', .mult = 60*60    },
3345 		{ .tag  = 'd', .mult = 60*60*24 },
3346 		{ .tag  = 0 },
3347 	};
3348 	unsigned long val;
3349 
3350 	/*
3351 	 * If we're using --switch-output-events, then we imply its
3352 	 * --switch-output=signal, as we'll send a SIGUSR2 from the side band
3353 	 *  thread to its parent.
3354 	 */
3355 	if (rec->switch_output_event_set) {
3356 		if (record__threads_enabled(rec)) {
3357 			pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n");
3358 			return 0;
3359 		}
3360 		goto do_signal;
3361 	}
3362 
3363 	if (!s->set)
3364 		return 0;
3365 
3366 	if (record__threads_enabled(rec)) {
3367 		pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n");
3368 		return 0;
3369 	}
3370 
3371 	if (!strcmp(s->str, "signal")) {
3372 do_signal:
3373 		s->signal = true;
3374 		pr_debug("switch-output with SIGUSR2 signal\n");
3375 		goto enabled;
3376 	}
3377 
3378 	val = parse_tag_value(s->str, tags_size);
3379 	if (val != (unsigned long) -1) {
3380 		s->size = val;
3381 		pr_debug("switch-output with %s size threshold\n", s->str);
3382 		goto enabled;
3383 	}
3384 
3385 	val = parse_tag_value(s->str, tags_time);
3386 	if (val != (unsigned long) -1) {
3387 		s->time = val;
3388 		pr_debug("switch-output with %s time threshold (%lu seconds)\n",
3389 			 s->str, s->time);
3390 		goto enabled;
3391 	}
3392 
3393 	return -1;
3394 
3395 enabled:
3396 	rec->timestamp_filename = true;
3397 	s->enabled              = true;
3398 
3399 	if (s->size && !rec->opts.no_buffering)
3400 		switch_output_size_warn(rec);
3401 
3402 	return 0;
3403 }
3404 
3405 static const char * const __record_usage[] = {
3406 	"perf record [<options>] [<command>]",
3407 	"perf record [<options>] -- <command> [<options>]",
3408 	NULL
3409 };
3410 const char * const *record_usage = __record_usage;
3411 
3412 static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event,
3413 				  struct perf_sample *sample, struct machine *machine)
3414 {
3415 	/*
3416 	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3417 	 * no need to add them twice.
3418 	 */
3419 	if (!(event->header.misc & PERF_RECORD_MISC_USER))
3420 		return 0;
3421 	return perf_event__process_mmap(tool, event, sample, machine);
3422 }
3423 
3424 static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event,
3425 				   struct perf_sample *sample, struct machine *machine)
3426 {
3427 	/*
3428 	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3429 	 * no need to add them twice.
3430 	 */
3431 	if (!(event->header.misc & PERF_RECORD_MISC_USER))
3432 		return 0;
3433 
3434 	return perf_event__process_mmap2(tool, event, sample, machine);
3435 }
3436 
3437 static int process_timestamp_boundary(const struct perf_tool *tool,
3438 				      union perf_event *event __maybe_unused,
3439 				      struct perf_sample *sample,
3440 				      struct machine *machine __maybe_unused)
3441 {
3442 	struct record *rec = container_of(tool, struct record, tool);
3443 
3444 	set_timestamp_boundary(rec, sample->time);
3445 	return 0;
3446 }
3447 
3448 static int parse_record_synth_option(const struct option *opt,
3449 				     const char *str,
3450 				     int unset __maybe_unused)
3451 {
3452 	struct record_opts *opts = opt->value;
3453 	char *p = strdup(str);
3454 
3455 	if (p == NULL)
3456 		return -1;
3457 
3458 	opts->synth = parse_synth_opt(p);
3459 	free(p);
3460 
3461 	if (opts->synth < 0) {
3462 		pr_err("Invalid synth option: %s\n", str);
3463 		return -1;
3464 	}
3465 	return 0;
3466 }
3467 
3468 /*
3469  * XXX Ideally would be local to cmd_record() and passed to a record__new
3470  * because we need to have access to it in record__exit, that is called
3471  * after cmd_record() exits, but since record_options need to be accessible to
3472  * builtin-script, leave it here.
3473  *
3474  * At least we don't ouch it in all the other functions here directly.
3475  *
3476  * Just say no to tons of global variables, sigh.
3477  */
3478 static struct record record = {
3479 	.opts = {
3480 		.sample_time	     = true,
3481 		.mmap_pages	     = UINT_MAX,
3482 		.user_freq	     = UINT_MAX,
3483 		.user_interval	     = ULLONG_MAX,
3484 		.freq		     = 4000,
3485 		.target		     = {
3486 			.uses_mmap   = true,
3487 			.default_per_cpu = true,
3488 		},
3489 		.mmap_flush          = MMAP_FLUSH_DEFAULT,
3490 		.nr_threads_synthesize = 1,
3491 		.ctl_fd              = -1,
3492 		.ctl_fd_ack          = -1,
3493 		.synth               = PERF_SYNTH_ALL,
3494 		.off_cpu_thresh_ns   = OFFCPU_THRESH,
3495 	},
3496 	.buildid_mmap = true,
3497 };
3498 
3499 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
3500 	"\n\t\t\t\tDefault: fp";
3501 
3502 static bool dry_run;
3503 
3504 static struct parse_events_option_args parse_events_option_args = {
3505 	.evlistp = &record.evlist,
3506 };
3507 
3508 static struct parse_events_option_args switch_output_parse_events_option_args = {
3509 	.evlistp = &record.sb_evlist,
3510 };
3511 
3512 /*
3513  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
3514  * with it and switch to use the library functions in perf_evlist that came
3515  * from builtin-record.c, i.e. use record_opts,
3516  * evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
3517  * using pipes, etc.
3518  */
3519 static struct option __record_options[] = {
3520 	OPT_CALLBACK('e', "event", &parse_events_option_args, "event",
3521 		     "event selector. use 'perf list' to list available events",
3522 		     parse_events_option),
3523 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
3524 		     "event filter", parse_filter),
3525 	OPT_BOOLEAN(0, "latency", &record.latency,
3526 		    "Enable data collection for latency profiling.\n"
3527 		    "\t\t\t  Use perf report --latency for latency-centric profile."),
3528 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
3529 			   NULL, "don't record events from perf itself",
3530 			   exclude_perf),
3531 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
3532 		    "record events on existing process id"),
3533 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
3534 		    "record events on existing thread id"),
3535 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
3536 		    "collect data with this RT SCHED_FIFO priority"),
3537 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
3538 		    "collect data without buffering"),
3539 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
3540 		    "collect raw sample records from all opened counters"),
3541 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
3542 			    "system-wide collection from all CPUs"),
3543 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
3544 		    "list of cpus to monitor"),
3545 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
3546 	OPT_STRING('o', "output", &record.data.path, "file",
3547 		    "output file name"),
3548 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
3549 			&record.opts.no_inherit_set,
3550 			"child tasks do not inherit counters"),
3551 	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
3552 		    "synthesize non-sample events at the end of output"),
3553 	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
3554 	OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"),
3555 	OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
3556 		    "Fail if the specified frequency can't be used"),
3557 	OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
3558 		     "profile at this frequency",
3559 		      record__parse_freq),
3560 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
3561 		     "number of mmap data pages and AUX area tracing mmap pages",
3562 		     record__parse_mmap_pages),
3563 	OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
3564 		     "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
3565 		     record__mmap_flush_parse),
3566 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
3567 			   NULL, "enables call-graph recording" ,
3568 			   &record_callchain_opt),
3569 	OPT_CALLBACK(0, "call-graph", &record.opts,
3570 		     "record_mode[,record_size]", record_callchain_help,
3571 		     &record_parse_callchain_opt),
3572 	OPT_INCR('v', "verbose", &verbose,
3573 		    "be more verbose (show counter open errors, etc)"),
3574 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"),
3575 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
3576 		    "per thread counts"),
3577 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
3578 	OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
3579 		    "Record the sample physical addresses"),
3580 	OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size,
3581 		    "Record the sampled data address data page size"),
3582 	OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size,
3583 		    "Record the sampled code address (ip) page size"),
3584 	OPT_BOOLEAN(0, "sample-mem-info", &record.opts.sample_data_src,
3585 		    "Record the data source for memory operations"),
3586 	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
3587 	OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier,
3588 		    "Record the sample identifier"),
3589 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
3590 			&record.opts.sample_time_set,
3591 			"Record the sample timestamps"),
3592 	OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
3593 			"Record the sample period"),
3594 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
3595 		    "don't sample"),
3596 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
3597 			&record.no_buildid_cache_set,
3598 			"do not update the buildid cache"),
3599 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
3600 			&record.no_buildid_set,
3601 			"do not collect buildids in perf.data"),
3602 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
3603 		     "monitor event in cgroup name only",
3604 		     parse_cgroups),
3605 	OPT_CALLBACK('D', "delay", &record, "ms",
3606 		     "ms to wait before starting measurement after program start (-1: start with events disabled), "
3607 		     "or ranges of time to enable events e.g. '-D 10-20,30-40'",
3608 		     record__parse_event_enable_time),
3609 	OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
3610 	OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"),
3611 
3612 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
3613 		     "branch any", "sample any taken branches",
3614 		     parse_branch_stack),
3615 
3616 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
3617 		     "branch filter mask", "branch stack filter modes",
3618 		     parse_branch_stack),
3619 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
3620 		    "sample by weight (on special events only)"),
3621 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
3622 		    "sample transaction flags (special events only)"),
3623 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
3624 		    "use per-thread mmaps"),
3625 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
3626 		    "sample selected machine registers on interrupt,"
3627 		    " use '-I?' to list register names", parse_intr_regs),
3628 	OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
3629 		    "sample selected machine registers in user space,"
3630 		    " use '--user-regs=?' to list register names", parse_user_regs),
3631 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
3632 		    "Record running/enabled time of read (:S) events"),
3633 	OPT_CALLBACK('k', "clockid", &record.opts,
3634 	"clockid", "clockid to use for events, see clock_gettime()",
3635 	parse_clockid),
3636 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
3637 			  "opts", "AUX area tracing Snapshot Mode", ""),
3638 	OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
3639 			  "opts", "sample AUX area", ""),
3640 	OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
3641 			"per thread proc mmap processing timeout in ms"),
3642 	OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
3643 		    "Record namespaces events"),
3644 	OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
3645 		    "Record cgroup events"),
3646 	OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events,
3647 			&record.opts.record_switch_events_set,
3648 			"Record context switch events"),
3649 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
3650 			 "Configure all used events to run in kernel space.",
3651 			 PARSE_OPT_EXCLUSIVE),
3652 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
3653 			 "Configure all used events to run in user space.",
3654 			 PARSE_OPT_EXCLUSIVE),
3655 	OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
3656 		    "collect kernel callchains"),
3657 	OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
3658 		    "collect user callchains"),
3659 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
3660 		   "file", "vmlinux pathname"),
3661 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
3662 		    "Record build-id of all DSOs regardless of hits"),
3663 	OPT_BOOLEAN_SET(0, "buildid-mmap", &record.buildid_mmap, &record.buildid_mmap_set,
3664 			"Record build-id in mmap events and skip build-id processing."),
3665 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
3666 		    "append timestamp to output filename"),
3667 	OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
3668 		    "Record timestamp boundary (time of first/last samples)"),
3669 	OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
3670 			  &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
3671 			  "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
3672 			  "signal"),
3673 	OPT_CALLBACK_SET(0, "switch-output-event", &switch_output_parse_events_option_args,
3674 			 &record.switch_output_event_set, "switch output event",
3675 			 "switch output event selector. use 'perf list' to list available events",
3676 			 parse_events_option_new_evlist),
3677 	OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
3678 		   "Limit number of switch output generated files"),
3679 	OPT_BOOLEAN(0, "dry-run", &dry_run,
3680 		    "Parse options then exit"),
3681 #ifdef HAVE_AIO_SUPPORT
3682 	OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
3683 		     &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
3684 		     record__aio_parse),
3685 #endif
3686 	OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
3687 		     "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
3688 		     record__parse_affinity),
3689 #ifdef HAVE_ZSTD_SUPPORT
3690 	OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n",
3691 			    "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
3692 			    record__parse_comp_level),
3693 #endif
3694 	OPT_CALLBACK(0, "max-size", &record.output_max_size,
3695 		     "size", "Limit the maximum size of the output file", parse_output_max_size),
3696 	OPT_UINTEGER(0, "num-thread-synthesize",
3697 		     &record.opts.nr_threads_synthesize,
3698 		     "number of threads to run for event synthesis"),
3699 #ifdef HAVE_LIBPFM
3700 	OPT_CALLBACK(0, "pfm-events", &record.evlist, "event",
3701 		"libpfm4 event selector. use 'perf list' to list available events",
3702 		parse_libpfm_events_option),
3703 #endif
3704 	OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
3705 		     "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n"
3706 		     "\t\t\t  'snapshot': AUX area tracing snapshot).\n"
3707 		     "\t\t\t  Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
3708 		     "\t\t\t  Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
3709 		      parse_control_option),
3710 	OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup",
3711 		     "Fine-tune event synthesis: default=all", parse_record_synth_option),
3712 	OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls,
3713 			  &record.debuginfod.set, "debuginfod urls",
3714 			  "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
3715 			  "system"),
3716 	OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec",
3717 			    "write collected trace data into several data files using parallel threads",
3718 			    record__parse_threads),
3719 	OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"),
3720 	OPT_STRING(0, "setup-filter", &record.filter_action, "pin|unpin",
3721 		   "BPF filter action"),
3722 	OPT_CALLBACK(0, "off-cpu-thresh", &record.opts, "ms",
3723 		     "Dump off-cpu samples if off-cpu time exceeds this threshold (in milliseconds). (Default: 500ms)",
3724 		     record__parse_off_cpu_thresh),
3725 	OPT_END()
3726 };
3727 
3728 struct option *record_options = __record_options;
3729 
3730 static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus)
3731 {
3732 	struct perf_cpu cpu;
3733 	int idx;
3734 
3735 	if (cpu_map__is_dummy(cpus))
3736 		return 0;
3737 
3738 	perf_cpu_map__for_each_cpu_skip_any(cpu, idx, cpus) {
3739 		/* Return ENODEV is input cpu is greater than max cpu */
3740 		if ((unsigned long)cpu.cpu > mask->nbits)
3741 			return -ENODEV;
3742 		__set_bit(cpu.cpu, mask->bits);
3743 	}
3744 
3745 	return 0;
3746 }
3747 
3748 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec)
3749 {
3750 	struct perf_cpu_map *cpus;
3751 
3752 	cpus = perf_cpu_map__new(mask_spec);
3753 	if (!cpus)
3754 		return -ENOMEM;
3755 
3756 	bitmap_zero(mask->bits, mask->nbits);
3757 	if (record__mmap_cpu_mask_init(mask, cpus))
3758 		return -ENODEV;
3759 
3760 	perf_cpu_map__put(cpus);
3761 
3762 	return 0;
3763 }
3764 
3765 static void record__free_thread_masks(struct record *rec, int nr_threads)
3766 {
3767 	int t;
3768 
3769 	if (rec->thread_masks)
3770 		for (t = 0; t < nr_threads; t++)
3771 			record__thread_mask_free(&rec->thread_masks[t]);
3772 
3773 	zfree(&rec->thread_masks);
3774 }
3775 
3776 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits)
3777 {
3778 	int t, ret;
3779 
3780 	rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks)));
3781 	if (!rec->thread_masks) {
3782 		pr_err("Failed to allocate thread masks\n");
3783 		return -ENOMEM;
3784 	}
3785 
3786 	for (t = 0; t < nr_threads; t++) {
3787 		ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits);
3788 		if (ret) {
3789 			pr_err("Failed to allocate thread masks[%d]\n", t);
3790 			goto out_free;
3791 		}
3792 	}
3793 
3794 	return 0;
3795 
3796 out_free:
3797 	record__free_thread_masks(rec, nr_threads);
3798 
3799 	return ret;
3800 }
3801 
3802 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus)
3803 {
3804 	int t, ret, nr_cpus = perf_cpu_map__nr(cpus);
3805 
3806 	ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu);
3807 	if (ret)
3808 		return ret;
3809 
3810 	rec->nr_threads = nr_cpus;
3811 	pr_debug("nr_threads: %d\n", rec->nr_threads);
3812 
3813 	for (t = 0; t < rec->nr_threads; t++) {
3814 		__set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits);
3815 		__set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits);
3816 		if (verbose > 0) {
3817 			pr_debug("thread_masks[%d]: ", t);
3818 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3819 			pr_debug("thread_masks[%d]: ", t);
3820 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3821 		}
3822 	}
3823 
3824 	return 0;
3825 }
3826 
3827 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus,
3828 					  const char **maps_spec, const char **affinity_spec,
3829 					  u32 nr_spec)
3830 {
3831 	u32 s;
3832 	int ret = 0, t = 0;
3833 	struct mmap_cpu_mask cpus_mask;
3834 	struct thread_mask thread_mask, full_mask, *thread_masks;
3835 
3836 	ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu);
3837 	if (ret) {
3838 		pr_err("Failed to allocate CPUs mask\n");
3839 		return ret;
3840 	}
3841 
3842 	ret = record__mmap_cpu_mask_init(&cpus_mask, cpus);
3843 	if (ret) {
3844 		pr_err("Failed to init cpu mask\n");
3845 		goto out_free_cpu_mask;
3846 	}
3847 
3848 	ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu);
3849 	if (ret) {
3850 		pr_err("Failed to allocate full mask\n");
3851 		goto out_free_cpu_mask;
3852 	}
3853 
3854 	ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3855 	if (ret) {
3856 		pr_err("Failed to allocate thread mask\n");
3857 		goto out_free_full_and_cpu_masks;
3858 	}
3859 
3860 	for (s = 0; s < nr_spec; s++) {
3861 		ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]);
3862 		if (ret) {
3863 			pr_err("Failed to initialize maps thread mask\n");
3864 			goto out_free;
3865 		}
3866 		ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]);
3867 		if (ret) {
3868 			pr_err("Failed to initialize affinity thread mask\n");
3869 			goto out_free;
3870 		}
3871 
3872 		/* ignore invalid CPUs but do not allow empty masks */
3873 		if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits,
3874 				cpus_mask.bits, thread_mask.maps.nbits)) {
3875 			pr_err("Empty maps mask: %s\n", maps_spec[s]);
3876 			ret = -EINVAL;
3877 			goto out_free;
3878 		}
3879 		if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits,
3880 				cpus_mask.bits, thread_mask.affinity.nbits)) {
3881 			pr_err("Empty affinity mask: %s\n", affinity_spec[s]);
3882 			ret = -EINVAL;
3883 			goto out_free;
3884 		}
3885 
3886 		/* do not allow intersection with other masks (full_mask) */
3887 		if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits,
3888 				      thread_mask.maps.nbits)) {
3889 			pr_err("Intersecting maps mask: %s\n", maps_spec[s]);
3890 			ret = -EINVAL;
3891 			goto out_free;
3892 		}
3893 		if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits,
3894 				      thread_mask.affinity.nbits)) {
3895 			pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]);
3896 			ret = -EINVAL;
3897 			goto out_free;
3898 		}
3899 
3900 		bitmap_or(full_mask.maps.bits, full_mask.maps.bits,
3901 			  thread_mask.maps.bits, full_mask.maps.nbits);
3902 		bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits,
3903 			  thread_mask.affinity.bits, full_mask.maps.nbits);
3904 
3905 		thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask));
3906 		if (!thread_masks) {
3907 			pr_err("Failed to reallocate thread masks\n");
3908 			ret = -ENOMEM;
3909 			goto out_free;
3910 		}
3911 		rec->thread_masks = thread_masks;
3912 		rec->thread_masks[t] = thread_mask;
3913 		if (verbose > 0) {
3914 			pr_debug("thread_masks[%d]: ", t);
3915 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3916 			pr_debug("thread_masks[%d]: ", t);
3917 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3918 		}
3919 		t++;
3920 		ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3921 		if (ret) {
3922 			pr_err("Failed to allocate thread mask\n");
3923 			goto out_free_full_and_cpu_masks;
3924 		}
3925 	}
3926 	rec->nr_threads = t;
3927 	pr_debug("nr_threads: %d\n", rec->nr_threads);
3928 	if (!rec->nr_threads)
3929 		ret = -EINVAL;
3930 
3931 out_free:
3932 	record__thread_mask_free(&thread_mask);
3933 out_free_full_and_cpu_masks:
3934 	record__thread_mask_free(&full_mask);
3935 out_free_cpu_mask:
3936 	record__mmap_cpu_mask_free(&cpus_mask);
3937 
3938 	return ret;
3939 }
3940 
3941 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus)
3942 {
3943 	int ret;
3944 	struct cpu_topology *topo;
3945 
3946 	topo = cpu_topology__new();
3947 	if (!topo) {
3948 		pr_err("Failed to allocate CPU topology\n");
3949 		return -ENOMEM;
3950 	}
3951 
3952 	ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list,
3953 					     topo->core_cpus_list, topo->core_cpus_lists);
3954 	cpu_topology__delete(topo);
3955 
3956 	return ret;
3957 }
3958 
3959 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus)
3960 {
3961 	int ret;
3962 	struct cpu_topology *topo;
3963 
3964 	topo = cpu_topology__new();
3965 	if (!topo) {
3966 		pr_err("Failed to allocate CPU topology\n");
3967 		return -ENOMEM;
3968 	}
3969 
3970 	ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list,
3971 					     topo->package_cpus_list, topo->package_cpus_lists);
3972 	cpu_topology__delete(topo);
3973 
3974 	return ret;
3975 }
3976 
3977 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus)
3978 {
3979 	u32 s;
3980 	int ret;
3981 	const char **spec;
3982 	struct numa_topology *topo;
3983 
3984 	topo = numa_topology__new();
3985 	if (!topo) {
3986 		pr_err("Failed to allocate NUMA topology\n");
3987 		return -ENOMEM;
3988 	}
3989 
3990 	spec = zalloc(topo->nr * sizeof(char *));
3991 	if (!spec) {
3992 		pr_err("Failed to allocate NUMA spec\n");
3993 		ret = -ENOMEM;
3994 		goto out_delete_topo;
3995 	}
3996 	for (s = 0; s < topo->nr; s++)
3997 		spec[s] = topo->nodes[s].cpus;
3998 
3999 	ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr);
4000 
4001 	zfree(&spec);
4002 
4003 out_delete_topo:
4004 	numa_topology__delete(topo);
4005 
4006 	return ret;
4007 }
4008 
4009 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus)
4010 {
4011 	int t, ret;
4012 	u32 s, nr_spec = 0;
4013 	char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec;
4014 	char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL;
4015 
4016 	for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) {
4017 		spec = strtok_r(user_spec, ":", &spec_ptr);
4018 		if (spec == NULL)
4019 			break;
4020 		pr_debug2("threads_spec[%d]: %s\n", t, spec);
4021 		mask = strtok_r(spec, "/", &mask_ptr);
4022 		if (mask == NULL)
4023 			break;
4024 		pr_debug2("  maps mask: %s\n", mask);
4025 		tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *));
4026 		if (!tmp_spec) {
4027 			pr_err("Failed to reallocate maps spec\n");
4028 			ret = -ENOMEM;
4029 			goto out_free;
4030 		}
4031 		maps_spec = tmp_spec;
4032 		maps_spec[nr_spec] = dup_mask = strdup(mask);
4033 		if (!maps_spec[nr_spec]) {
4034 			pr_err("Failed to allocate maps spec[%d]\n", nr_spec);
4035 			ret = -ENOMEM;
4036 			goto out_free;
4037 		}
4038 		mask = strtok_r(NULL, "/", &mask_ptr);
4039 		if (mask == NULL) {
4040 			pr_err("Invalid thread maps or affinity specs\n");
4041 			ret = -EINVAL;
4042 			goto out_free;
4043 		}
4044 		pr_debug2("  affinity mask: %s\n", mask);
4045 		tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *));
4046 		if (!tmp_spec) {
4047 			pr_err("Failed to reallocate affinity spec\n");
4048 			ret = -ENOMEM;
4049 			goto out_free;
4050 		}
4051 		affinity_spec = tmp_spec;
4052 		affinity_spec[nr_spec] = strdup(mask);
4053 		if (!affinity_spec[nr_spec]) {
4054 			pr_err("Failed to allocate affinity spec[%d]\n", nr_spec);
4055 			ret = -ENOMEM;
4056 			goto out_free;
4057 		}
4058 		dup_mask = NULL;
4059 		nr_spec++;
4060 	}
4061 
4062 	ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec,
4063 					     (const char **)affinity_spec, nr_spec);
4064 
4065 out_free:
4066 	free(dup_mask);
4067 	for (s = 0; s < nr_spec; s++) {
4068 		if (maps_spec)
4069 			free(maps_spec[s]);
4070 		if (affinity_spec)
4071 			free(affinity_spec[s]);
4072 	}
4073 	free(affinity_spec);
4074 	free(maps_spec);
4075 
4076 	return ret;
4077 }
4078 
4079 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus)
4080 {
4081 	int ret;
4082 
4083 	ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu);
4084 	if (ret)
4085 		return ret;
4086 
4087 	if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus))
4088 		return -ENODEV;
4089 
4090 	rec->nr_threads = 1;
4091 
4092 	return 0;
4093 }
4094 
4095 static int record__init_thread_masks(struct record *rec)
4096 {
4097 	int ret = 0;
4098 	struct perf_cpu_map *cpus = rec->evlist->core.all_cpus;
4099 
4100 	if (!record__threads_enabled(rec))
4101 		return record__init_thread_default_masks(rec, cpus);
4102 
4103 	if (evlist__per_thread(rec->evlist)) {
4104 		pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n");
4105 		return -EINVAL;
4106 	}
4107 
4108 	switch (rec->opts.threads_spec) {
4109 	case THREAD_SPEC__CPU:
4110 		ret = record__init_thread_cpu_masks(rec, cpus);
4111 		break;
4112 	case THREAD_SPEC__CORE:
4113 		ret = record__init_thread_core_masks(rec, cpus);
4114 		break;
4115 	case THREAD_SPEC__PACKAGE:
4116 		ret = record__init_thread_package_masks(rec, cpus);
4117 		break;
4118 	case THREAD_SPEC__NUMA:
4119 		ret = record__init_thread_numa_masks(rec, cpus);
4120 		break;
4121 	case THREAD_SPEC__USER:
4122 		ret = record__init_thread_user_masks(rec, cpus);
4123 		break;
4124 	default:
4125 		break;
4126 	}
4127 
4128 	return ret;
4129 }
4130 
4131 int cmd_record(int argc, const char **argv)
4132 {
4133 	int err;
4134 	struct record *rec = &record;
4135 	char errbuf[BUFSIZ];
4136 
4137 	setlocale(LC_ALL, "");
4138 
4139 #ifndef HAVE_BPF_SKEL
4140 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c)
4141 	set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true);
4142 # undef set_nobuild
4143 #endif
4144 
4145 	/* Disable eager loading of kernel symbols that adds overhead to perf record. */
4146 	symbol_conf.lazy_load_kernel_maps = true;
4147 	rec->opts.affinity = PERF_AFFINITY_SYS;
4148 
4149 	rec->evlist = evlist__new();
4150 	if (rec->evlist == NULL)
4151 		return -ENOMEM;
4152 
4153 	err = perf_config(perf_record_config, rec);
4154 	if (err)
4155 		return err;
4156 
4157 	argc = parse_options(argc, argv, record_options, record_usage,
4158 			    PARSE_OPT_STOP_AT_NON_OPTION);
4159 	if (quiet)
4160 		perf_quiet_option();
4161 
4162 	err = symbol__validate_sym_arguments();
4163 	if (err)
4164 		return err;
4165 
4166 	perf_debuginfod_setup(&record.debuginfod);
4167 
4168 	/* Make system wide (-a) the default target. */
4169 	if (!argc && target__none(&rec->opts.target))
4170 		rec->opts.target.system_wide = true;
4171 
4172 	if (nr_cgroups && !rec->opts.target.system_wide) {
4173 		usage_with_options_msg(record_usage, record_options,
4174 			"cgroup monitoring only available in system-wide mode");
4175 
4176 	}
4177 
4178 	if (record.latency) {
4179 		/*
4180 		 * There is no fundamental reason why latency profiling
4181 		 * can't work for system-wide mode, but exact semantics
4182 		 * and details are to be defined.
4183 		 * See the following thread for details:
4184 		 * https://lore.kernel.org/all/Z4XDJyvjiie3howF@google.com/
4185 		 */
4186 		if (record.opts.target.system_wide) {
4187 			pr_err("Failed: latency profiling is not supported with system-wide collection.\n");
4188 			err = -EINVAL;
4189 			goto out_opts;
4190 		}
4191 		record.opts.record_switch_events = true;
4192 	}
4193 
4194 	if (rec->buildid_mmap && !perf_can_record_build_id()) {
4195 		pr_warning("Missing support for build id in kernel mmap events.\n"
4196 			   "Disable this warning with --no-buildid-mmap\n");
4197 		rec->buildid_mmap = false;
4198 	}
4199 
4200 	if (rec->buildid_mmap) {
4201 		/* Enable perf_event_attr::build_id bit. */
4202 		rec->opts.build_id = true;
4203 		/* Disable build-ID table in the header. */
4204 		rec->no_buildid = true;
4205 	} else {
4206 		pr_debug("Disabling build id in synthesized mmap2 events.\n");
4207 		symbol_conf.no_buildid_mmap2 = true;
4208 	}
4209 
4210 	if (rec->no_buildid_set && rec->no_buildid) {
4211 		/* -B implies -N for historic reasons. */
4212 		rec->no_buildid_cache = true;
4213 	}
4214 
4215 	if (rec->opts.record_cgroup && !perf_can_record_cgroup()) {
4216 		pr_err("Kernel has no cgroup sampling support.\n");
4217 		err = -EINVAL;
4218 		goto out_opts;
4219 	}
4220 
4221 	if (rec->opts.kcore)
4222 		rec->opts.text_poke = true;
4223 
4224 	if (rec->opts.kcore || record__threads_enabled(rec))
4225 		rec->data.is_dir = true;
4226 
4227 	if (record__threads_enabled(rec)) {
4228 		if (rec->opts.affinity != PERF_AFFINITY_SYS) {
4229 			pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n");
4230 			goto out_opts;
4231 		}
4232 		if (record__aio_enabled(rec)) {
4233 			pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n");
4234 			goto out_opts;
4235 		}
4236 	}
4237 
4238 	if (rec->opts.comp_level != 0) {
4239 		pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
4240 		rec->no_buildid = true;
4241 	}
4242 
4243 	if (rec->opts.record_switch_events &&
4244 	    !perf_can_record_switch_events()) {
4245 		ui__error("kernel does not support recording context switch events\n");
4246 		parse_options_usage(record_usage, record_options, "switch-events", 0);
4247 		err = -EINVAL;
4248 		goto out_opts;
4249 	}
4250 
4251 	if (switch_output_setup(rec)) {
4252 		parse_options_usage(record_usage, record_options, "switch-output", 0);
4253 		err = -EINVAL;
4254 		goto out_opts;
4255 	}
4256 
4257 	if (rec->switch_output.time) {
4258 		signal(SIGALRM, alarm_sig_handler);
4259 		alarm(rec->switch_output.time);
4260 	}
4261 
4262 	if (rec->switch_output.num_files) {
4263 		rec->switch_output.filenames = calloc(rec->switch_output.num_files,
4264 						      sizeof(char *));
4265 		if (!rec->switch_output.filenames) {
4266 			err = -EINVAL;
4267 			goto out_opts;
4268 		}
4269 	}
4270 
4271 	if (rec->timestamp_filename && record__threads_enabled(rec)) {
4272 		rec->timestamp_filename = false;
4273 		pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n");
4274 	}
4275 
4276 	if (rec->filter_action) {
4277 		if (!strcmp(rec->filter_action, "pin"))
4278 			err = perf_bpf_filter__pin();
4279 		else if (!strcmp(rec->filter_action, "unpin"))
4280 			err = perf_bpf_filter__unpin();
4281 		else {
4282 			pr_warning("Unknown BPF filter action: %s\n", rec->filter_action);
4283 			err = -EINVAL;
4284 		}
4285 		goto out_opts;
4286 	}
4287 
4288 	/* For backward compatibility, -d implies --mem-info */
4289 	if (rec->opts.sample_address)
4290 		rec->opts.sample_data_src = true;
4291 
4292 	/*
4293 	 * Allow aliases to facilitate the lookup of symbols for address
4294 	 * filters. Refer to auxtrace_parse_filters().
4295 	 */
4296 	symbol_conf.allow_aliases = true;
4297 
4298 	symbol__init(NULL);
4299 
4300 	err = record__auxtrace_init(rec);
4301 	if (err)
4302 		goto out;
4303 
4304 	if (dry_run)
4305 		goto out;
4306 
4307 	err = -ENOMEM;
4308 
4309 	if (rec->no_buildid_cache) {
4310 		disable_buildid_cache();
4311 	} else if (rec->switch_output.enabled) {
4312 		/*
4313 		 * In 'perf record --switch-output', disable buildid
4314 		 * generation by default to reduce data file switching
4315 		 * overhead. Still generate buildid if they are required
4316 		 * explicitly using
4317 		 *
4318 		 *  perf record --switch-output --no-no-buildid \
4319 		 *              --no-no-buildid-cache
4320 		 *
4321 		 * Following code equals to:
4322 		 *
4323 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
4324 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
4325 		 *         disable_buildid_cache();
4326 		 */
4327 		bool disable = true;
4328 
4329 		if (rec->no_buildid_set && !rec->no_buildid)
4330 			disable = false;
4331 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
4332 			disable = false;
4333 		if (disable) {
4334 			rec->no_buildid = true;
4335 			rec->no_buildid_cache = true;
4336 			disable_buildid_cache();
4337 		}
4338 	}
4339 
4340 	if (record.opts.overwrite)
4341 		record.opts.tail_synthesize = true;
4342 
4343 	if (rec->evlist->core.nr_entries == 0) {
4344 		struct evlist *def_evlist = evlist__new_default();
4345 
4346 		if (!def_evlist)
4347 			goto out;
4348 
4349 		evlist__splice_list_tail(rec->evlist, &def_evlist->core.entries);
4350 		evlist__delete(def_evlist);
4351 	}
4352 
4353 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
4354 		rec->opts.no_inherit = true;
4355 
4356 	err = target__validate(&rec->opts.target);
4357 	if (err) {
4358 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
4359 		ui__warning("%s\n", errbuf);
4360 	}
4361 
4362 	if (rec->uid_str) {
4363 		uid_t uid = parse_uid(rec->uid_str);
4364 
4365 		if (uid == UINT_MAX) {
4366 			ui__error("Invalid User: %s", rec->uid_str);
4367 			err = -EINVAL;
4368 			goto out;
4369 		}
4370 		err = parse_uid_filter(rec->evlist, uid);
4371 		if (err)
4372 			goto out;
4373 
4374 		/* User ID filtering implies system wide. */
4375 		rec->opts.target.system_wide = true;
4376 	}
4377 
4378 	/* Enable ignoring missing threads when -p option is defined. */
4379 	rec->opts.ignore_missing_thread = rec->opts.target.pid;
4380 
4381 	evlist__warn_user_requested_cpus(rec->evlist, rec->opts.target.cpu_list);
4382 
4383 	if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
4384 		arch__add_leaf_frame_record_opts(&rec->opts);
4385 
4386 	err = -ENOMEM;
4387 	if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) {
4388 		if (rec->opts.target.pid != NULL) {
4389 			pr_err("Couldn't create thread/CPU maps: %s\n",
4390 				errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf)));
4391 			goto out;
4392 		}
4393 		else
4394 			usage_with_options(record_usage, record_options);
4395 	}
4396 
4397 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
4398 	if (err)
4399 		goto out;
4400 
4401 	/*
4402 	 * We take all buildids when the file contains
4403 	 * AUX area tracing data because we do not decode the
4404 	 * trace because it would take too long.
4405 	 */
4406 	if (rec->opts.full_auxtrace)
4407 		rec->buildid_all = true;
4408 
4409 	if (rec->opts.text_poke) {
4410 		err = record__config_text_poke(rec->evlist);
4411 		if (err) {
4412 			pr_err("record__config_text_poke failed, error %d\n", err);
4413 			goto out;
4414 		}
4415 	}
4416 
4417 	if (rec->off_cpu) {
4418 		err = record__config_off_cpu(rec);
4419 		if (err) {
4420 			pr_err("record__config_off_cpu failed, error %d\n", err);
4421 			goto out;
4422 		}
4423 	}
4424 
4425 	if (record_opts__config(&rec->opts)) {
4426 		err = -EINVAL;
4427 		goto out;
4428 	}
4429 
4430 	err = record__config_tracking_events(rec);
4431 	if (err) {
4432 		pr_err("record__config_tracking_events failed, error %d\n", err);
4433 		goto out;
4434 	}
4435 
4436 	err = record__init_thread_masks(rec);
4437 	if (err) {
4438 		pr_err("Failed to initialize parallel data streaming masks\n");
4439 		goto out;
4440 	}
4441 
4442 	if (rec->opts.nr_cblocks > nr_cblocks_max)
4443 		rec->opts.nr_cblocks = nr_cblocks_max;
4444 	pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
4445 
4446 	pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
4447 	pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
4448 
4449 	if (rec->opts.comp_level > comp_level_max)
4450 		rec->opts.comp_level = comp_level_max;
4451 	pr_debug("comp level: %d\n", rec->opts.comp_level);
4452 
4453 	err = __cmd_record(&record, argc, argv);
4454 out:
4455 	record__free_thread_masks(rec, rec->nr_threads);
4456 	rec->nr_threads = 0;
4457 	symbol__exit();
4458 	auxtrace_record__free(rec->itr);
4459 out_opts:
4460 	evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close);
4461 	evlist__delete(rec->evlist);
4462 	return err;
4463 }
4464 
4465 static void snapshot_sig_handler(int sig __maybe_unused)
4466 {
4467 	struct record *rec = &record;
4468 
4469 	hit_auxtrace_snapshot_trigger(rec);
4470 
4471 	if (switch_output_signal(rec))
4472 		trigger_hit(&switch_output_trigger);
4473 }
4474 
4475 static void alarm_sig_handler(int sig __maybe_unused)
4476 {
4477 	struct record *rec = &record;
4478 
4479 	if (switch_output_time(rec))
4480 		trigger_hit(&switch_output_trigger);
4481 }
4482