xref: /linux/tools/perf/builtin-record.c (revision 1672f3707a6ef4b386c30bb76df2f62e58a39430)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10 
11 #include "util/build-id.h"
12 #include <subcmd/parse-options.h>
13 #include <internal/xyarray.h>
14 #include "util/parse-events.h"
15 #include "util/config.h"
16 
17 #include "util/callchain.h"
18 #include "util/cgroup.h"
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/mmap.h"
25 #include "util/mutex.h"
26 #include "util/target.h"
27 #include "util/session.h"
28 #include "util/tool.h"
29 #include "util/stat.h"
30 #include "util/symbol.h"
31 #include "util/record.h"
32 #include "util/cpumap.h"
33 #include "util/thread_map.h"
34 #include "util/data.h"
35 #include "util/perf_regs.h"
36 #include "util/auxtrace.h"
37 #include "util/tsc.h"
38 #include "util/parse-branch-options.h"
39 #include "util/parse-regs-options.h"
40 #include "util/perf_api_probe.h"
41 #include "util/trigger.h"
42 #include "util/perf-hooks.h"
43 #include "util/cpu-set-sched.h"
44 #include "util/synthetic-events.h"
45 #include "util/time-utils.h"
46 #include "util/units.h"
47 #include "util/bpf-event.h"
48 #include "util/util.h"
49 #include "util/pfm.h"
50 #include "util/pmu.h"
51 #include "util/pmus.h"
52 #include "util/clockid.h"
53 #include "util/off_cpu.h"
54 #include "util/bpf-filter.h"
55 #include "util/strbuf.h"
56 #include "asm/bug.h"
57 #include "perf.h"
58 #include "cputopo.h"
59 
60 #include <errno.h>
61 #include <inttypes.h>
62 #include <locale.h>
63 #include <poll.h>
64 #include <pthread.h>
65 #include <unistd.h>
66 #ifndef HAVE_GETTID
67 #include <syscall.h>
68 #endif
69 #include <sched.h>
70 #include <signal.h>
71 #ifdef HAVE_EVENTFD_SUPPORT
72 #include <sys/eventfd.h>
73 #endif
74 #include <sys/mman.h>
75 #include <sys/wait.h>
76 #include <sys/types.h>
77 #include <sys/stat.h>
78 #include <fcntl.h>
79 #include <linux/err.h>
80 #include <linux/string.h>
81 #include <linux/time64.h>
82 #include <linux/zalloc.h>
83 #include <linux/bitmap.h>
84 #include <sys/time.h>
85 
86 struct switch_output {
87 	bool		 enabled;
88 	bool		 signal;
89 	unsigned long	 size;
90 	unsigned long	 time;
91 	const char	*str;
92 	bool		 set;
93 	char		 **filenames;
94 	int		 num_files;
95 	int		 cur_file;
96 };
97 
98 struct thread_mask {
99 	struct mmap_cpu_mask	maps;
100 	struct mmap_cpu_mask	affinity;
101 };
102 
103 struct record_thread {
104 	pid_t			tid;
105 	struct thread_mask	*mask;
106 	struct {
107 		int		msg[2];
108 		int		ack[2];
109 	} pipes;
110 	struct fdarray		pollfd;
111 	int			ctlfd_pos;
112 	int			nr_mmaps;
113 	struct mmap		**maps;
114 	struct mmap		**overwrite_maps;
115 	struct record		*rec;
116 	unsigned long long	samples;
117 	unsigned long		waking;
118 	u64			bytes_written;
119 	u64			bytes_transferred;
120 	u64			bytes_compressed;
121 };
122 
123 static __thread struct record_thread *thread;
124 
125 enum thread_msg {
126 	THREAD_MSG__UNDEFINED = 0,
127 	THREAD_MSG__READY,
128 	THREAD_MSG__MAX,
129 };
130 
131 static const char *thread_msg_tags[THREAD_MSG__MAX] = {
132 	"UNDEFINED", "READY"
133 };
134 
135 enum thread_spec {
136 	THREAD_SPEC__UNDEFINED = 0,
137 	THREAD_SPEC__CPU,
138 	THREAD_SPEC__CORE,
139 	THREAD_SPEC__PACKAGE,
140 	THREAD_SPEC__NUMA,
141 	THREAD_SPEC__USER,
142 	THREAD_SPEC__MAX,
143 };
144 
145 static const char *thread_spec_tags[THREAD_SPEC__MAX] = {
146 	"undefined", "cpu", "core", "package", "numa", "user"
147 };
148 
149 struct pollfd_index_map {
150 	int evlist_pollfd_index;
151 	int thread_pollfd_index;
152 };
153 
154 struct record {
155 	struct perf_tool	tool;
156 	struct record_opts	opts;
157 	u64			bytes_written;
158 	u64			thread_bytes_written;
159 	struct perf_data	data;
160 	struct auxtrace_record	*itr;
161 	struct evlist	*evlist;
162 	struct perf_session	*session;
163 	struct evlist		*sb_evlist;
164 	pthread_t		thread_id;
165 	int			realtime_prio;
166 	bool			latency;
167 	bool			switch_output_event_set;
168 	bool			no_buildid;
169 	bool			no_buildid_set;
170 	bool			no_buildid_cache;
171 	bool			no_buildid_cache_set;
172 	bool			buildid_all;
173 	bool			buildid_mmap;
174 	bool			buildid_mmap_set;
175 	bool			timestamp_filename;
176 	bool			timestamp_boundary;
177 	bool			off_cpu;
178 	const char		*filter_action;
179 	const char		*uid_str;
180 	struct switch_output	switch_output;
181 	unsigned long long	samples;
182 	unsigned long		output_max_size;	/* = 0: unlimited */
183 	struct perf_debuginfod	debuginfod;
184 	int			nr_threads;
185 	struct thread_mask	*thread_masks;
186 	struct record_thread	*thread_data;
187 	struct pollfd_index_map	*index_map;
188 	size_t			index_map_sz;
189 	size_t			index_map_cnt;
190 };
191 
192 static volatile int done;
193 
194 static volatile int auxtrace_record__snapshot_started;
195 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
196 static DEFINE_TRIGGER(switch_output_trigger);
197 
198 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
199 	"SYS", "NODE", "CPU"
200 };
201 
202 static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event,
203 				  struct perf_sample *sample, struct machine *machine);
204 static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event,
205 				   struct perf_sample *sample, struct machine *machine);
206 static int process_timestamp_boundary(const struct perf_tool *tool,
207 				      union perf_event *event,
208 				      struct perf_sample *sample,
209 				      struct machine *machine);
210 
211 #ifndef HAVE_GETTID
212 static inline pid_t gettid(void)
213 {
214 	return (pid_t)syscall(__NR_gettid);
215 }
216 #endif
217 
218 static int record__threads_enabled(struct record *rec)
219 {
220 	return rec->opts.threads_spec;
221 }
222 
223 static bool switch_output_signal(struct record *rec)
224 {
225 	return rec->switch_output.signal &&
226 	       trigger_is_ready(&switch_output_trigger);
227 }
228 
229 static bool switch_output_size(struct record *rec)
230 {
231 	return rec->switch_output.size &&
232 	       trigger_is_ready(&switch_output_trigger) &&
233 	       (rec->bytes_written >= rec->switch_output.size);
234 }
235 
236 static bool switch_output_time(struct record *rec)
237 {
238 	return rec->switch_output.time &&
239 	       trigger_is_ready(&switch_output_trigger);
240 }
241 
242 static u64 record__bytes_written(struct record *rec)
243 {
244 	return rec->bytes_written + rec->thread_bytes_written;
245 }
246 
247 static bool record__output_max_size_exceeded(struct record *rec)
248 {
249 	return rec->output_max_size &&
250 	       (record__bytes_written(rec) >= rec->output_max_size);
251 }
252 
253 static int record__write(struct record *rec, struct mmap *map __maybe_unused,
254 			 void *bf, size_t size)
255 {
256 	struct perf_data_file *file = &rec->session->data->file;
257 
258 	if (map && map->file)
259 		file = map->file;
260 
261 	if (perf_data_file__write(file, bf, size) < 0) {
262 		pr_err("failed to write perf data, error: %m\n");
263 		return -1;
264 	}
265 
266 	if (map && map->file) {
267 		thread->bytes_written += size;
268 		rec->thread_bytes_written += size;
269 	} else {
270 		rec->bytes_written += size;
271 	}
272 
273 	if (record__output_max_size_exceeded(rec) && !done) {
274 		fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
275 				" stopping session ]\n",
276 				record__bytes_written(rec) >> 10);
277 		done = 1;
278 	}
279 
280 	if (switch_output_size(rec))
281 		trigger_hit(&switch_output_trigger);
282 
283 	return 0;
284 }
285 
286 static int record__aio_enabled(struct record *rec);
287 static int record__comp_enabled(struct record *rec);
288 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map,
289 			    void *dst, size_t dst_size, void *src, size_t src_size);
290 
291 #ifdef HAVE_AIO_SUPPORT
292 static int record__aio_write(struct aiocb *cblock, int trace_fd,
293 		void *buf, size_t size, off_t off)
294 {
295 	int rc;
296 
297 	cblock->aio_fildes = trace_fd;
298 	cblock->aio_buf    = buf;
299 	cblock->aio_nbytes = size;
300 	cblock->aio_offset = off;
301 	cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
302 
303 	do {
304 		rc = aio_write(cblock);
305 		if (rc == 0) {
306 			break;
307 		} else if (errno != EAGAIN) {
308 			cblock->aio_fildes = -1;
309 			pr_err("failed to queue perf data, error: %m\n");
310 			break;
311 		}
312 	} while (1);
313 
314 	return rc;
315 }
316 
317 static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
318 {
319 	void *rem_buf;
320 	off_t rem_off;
321 	size_t rem_size;
322 	int rc, aio_errno;
323 	ssize_t aio_ret, written;
324 
325 	aio_errno = aio_error(cblock);
326 	if (aio_errno == EINPROGRESS)
327 		return 0;
328 
329 	written = aio_ret = aio_return(cblock);
330 	if (aio_ret < 0) {
331 		if (aio_errno != EINTR)
332 			pr_err("failed to write perf data, error: %m\n");
333 		written = 0;
334 	}
335 
336 	rem_size = cblock->aio_nbytes - written;
337 
338 	if (rem_size == 0) {
339 		cblock->aio_fildes = -1;
340 		/*
341 		 * md->refcount is incremented in record__aio_pushfn() for
342 		 * every aio write request started in record__aio_push() so
343 		 * decrement it because the request is now complete.
344 		 */
345 		perf_mmap__put(&md->core);
346 		rc = 1;
347 	} else {
348 		/*
349 		 * aio write request may require restart with the
350 		 * remainder if the kernel didn't write whole
351 		 * chunk at once.
352 		 */
353 		rem_off = cblock->aio_offset + written;
354 		rem_buf = (void *)(cblock->aio_buf + written);
355 		record__aio_write(cblock, cblock->aio_fildes,
356 				rem_buf, rem_size, rem_off);
357 		rc = 0;
358 	}
359 
360 	return rc;
361 }
362 
363 static int record__aio_sync(struct mmap *md, bool sync_all)
364 {
365 	struct aiocb **aiocb = md->aio.aiocb;
366 	struct aiocb *cblocks = md->aio.cblocks;
367 	struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
368 	int i, do_suspend;
369 
370 	do {
371 		do_suspend = 0;
372 		for (i = 0; i < md->aio.nr_cblocks; ++i) {
373 			if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
374 				if (sync_all)
375 					aiocb[i] = NULL;
376 				else
377 					return i;
378 			} else {
379 				/*
380 				 * Started aio write is not complete yet
381 				 * so it has to be waited before the
382 				 * next allocation.
383 				 */
384 				aiocb[i] = &cblocks[i];
385 				do_suspend = 1;
386 			}
387 		}
388 		if (!do_suspend)
389 			return -1;
390 
391 		while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
392 			if (!(errno == EAGAIN || errno == EINTR))
393 				pr_err("failed to sync perf data, error: %m\n");
394 		}
395 	} while (1);
396 }
397 
398 struct record_aio {
399 	struct record	*rec;
400 	void		*data;
401 	size_t		size;
402 };
403 
404 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
405 {
406 	struct record_aio *aio = to;
407 
408 	/*
409 	 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
410 	 * to release space in the kernel buffer as fast as possible, calling
411 	 * perf_mmap__consume() from perf_mmap__push() function.
412 	 *
413 	 * That lets the kernel to proceed with storing more profiling data into
414 	 * the kernel buffer earlier than other per-cpu kernel buffers are handled.
415 	 *
416 	 * Coping can be done in two steps in case the chunk of profiling data
417 	 * crosses the upper bound of the kernel buffer. In this case we first move
418 	 * part of data from map->start till the upper bound and then the remainder
419 	 * from the beginning of the kernel buffer till the end of the data chunk.
420 	 */
421 
422 	if (record__comp_enabled(aio->rec)) {
423 		ssize_t compressed = zstd_compress(aio->rec->session, NULL, aio->data + aio->size,
424 						   mmap__mmap_len(map) - aio->size,
425 						   buf, size);
426 		if (compressed < 0)
427 			return (int)compressed;
428 
429 		size = compressed;
430 	} else {
431 		memcpy(aio->data + aio->size, buf, size);
432 	}
433 
434 	if (!aio->size) {
435 		/*
436 		 * Increment map->refcount to guard map->aio.data[] buffer
437 		 * from premature deallocation because map object can be
438 		 * released earlier than aio write request started on
439 		 * map->aio.data[] buffer is complete.
440 		 *
441 		 * perf_mmap__put() is done at record__aio_complete()
442 		 * after started aio request completion or at record__aio_push()
443 		 * if the request failed to start.
444 		 */
445 		perf_mmap__get(&map->core);
446 	}
447 
448 	aio->size += size;
449 
450 	return size;
451 }
452 
453 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
454 {
455 	int ret, idx;
456 	int trace_fd = rec->session->data->file.fd;
457 	struct record_aio aio = { .rec = rec, .size = 0 };
458 
459 	/*
460 	 * Call record__aio_sync() to wait till map->aio.data[] buffer
461 	 * becomes available after previous aio write operation.
462 	 */
463 
464 	idx = record__aio_sync(map, false);
465 	aio.data = map->aio.data[idx];
466 	ret = perf_mmap__push(map, &aio, record__aio_pushfn);
467 	if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
468 		return ret;
469 
470 	rec->samples++;
471 	ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
472 	if (!ret) {
473 		*off += aio.size;
474 		rec->bytes_written += aio.size;
475 		if (switch_output_size(rec))
476 			trigger_hit(&switch_output_trigger);
477 	} else {
478 		/*
479 		 * Decrement map->refcount incremented in record__aio_pushfn()
480 		 * back if record__aio_write() operation failed to start, otherwise
481 		 * map->refcount is decremented in record__aio_complete() after
482 		 * aio write operation finishes successfully.
483 		 */
484 		perf_mmap__put(&map->core);
485 	}
486 
487 	return ret;
488 }
489 
490 static off_t record__aio_get_pos(int trace_fd)
491 {
492 	return lseek(trace_fd, 0, SEEK_CUR);
493 }
494 
495 static void record__aio_set_pos(int trace_fd, off_t pos)
496 {
497 	lseek(trace_fd, pos, SEEK_SET);
498 }
499 
500 static void record__aio_mmap_read_sync(struct record *rec)
501 {
502 	int i;
503 	struct evlist *evlist = rec->evlist;
504 	struct mmap *maps = evlist->mmap;
505 
506 	if (!record__aio_enabled(rec))
507 		return;
508 
509 	for (i = 0; i < evlist->core.nr_mmaps; i++) {
510 		struct mmap *map = &maps[i];
511 
512 		if (map->core.base)
513 			record__aio_sync(map, true);
514 	}
515 }
516 
517 static int nr_cblocks_default = 1;
518 static int nr_cblocks_max = 4;
519 
520 static int record__aio_parse(const struct option *opt,
521 			     const char *str,
522 			     int unset)
523 {
524 	struct record_opts *opts = (struct record_opts *)opt->value;
525 
526 	if (unset) {
527 		opts->nr_cblocks = 0;
528 	} else {
529 		if (str)
530 			opts->nr_cblocks = strtol(str, NULL, 0);
531 		if (!opts->nr_cblocks)
532 			opts->nr_cblocks = nr_cblocks_default;
533 	}
534 
535 	return 0;
536 }
537 #else /* HAVE_AIO_SUPPORT */
538 static int nr_cblocks_max = 0;
539 
540 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
541 			    off_t *off __maybe_unused)
542 {
543 	return -1;
544 }
545 
546 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
547 {
548 	return -1;
549 }
550 
551 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
552 {
553 }
554 
555 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
556 {
557 }
558 #endif
559 
560 static int record__aio_enabled(struct record *rec)
561 {
562 	return rec->opts.nr_cblocks > 0;
563 }
564 
565 #define MMAP_FLUSH_DEFAULT 1
566 static int record__mmap_flush_parse(const struct option *opt,
567 				    const char *str,
568 				    int unset)
569 {
570 	int flush_max;
571 	struct record_opts *opts = (struct record_opts *)opt->value;
572 	static struct parse_tag tags[] = {
573 			{ .tag  = 'B', .mult = 1       },
574 			{ .tag  = 'K', .mult = 1 << 10 },
575 			{ .tag  = 'M', .mult = 1 << 20 },
576 			{ .tag  = 'G', .mult = 1 << 30 },
577 			{ .tag  = 0 },
578 	};
579 
580 	if (unset)
581 		return 0;
582 
583 	if (str) {
584 		opts->mmap_flush = parse_tag_value(str, tags);
585 		if (opts->mmap_flush == (int)-1)
586 			opts->mmap_flush = strtol(str, NULL, 0);
587 	}
588 
589 	if (!opts->mmap_flush)
590 		opts->mmap_flush = MMAP_FLUSH_DEFAULT;
591 
592 	flush_max = evlist__mmap_size(opts->mmap_pages);
593 	flush_max /= 4;
594 	if (opts->mmap_flush > flush_max)
595 		opts->mmap_flush = flush_max;
596 
597 	return 0;
598 }
599 
600 #ifdef HAVE_ZSTD_SUPPORT
601 static unsigned int comp_level_default = 1;
602 
603 static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
604 {
605 	struct record_opts *opts = opt->value;
606 
607 	if (unset) {
608 		opts->comp_level = 0;
609 	} else {
610 		if (str)
611 			opts->comp_level = strtol(str, NULL, 0);
612 		if (!opts->comp_level)
613 			opts->comp_level = comp_level_default;
614 	}
615 
616 	return 0;
617 }
618 #endif
619 static unsigned int comp_level_max = 22;
620 
621 static int record__comp_enabled(struct record *rec)
622 {
623 	return rec->opts.comp_level > 0;
624 }
625 
626 static int process_synthesized_event(const struct perf_tool *tool,
627 				     union perf_event *event,
628 				     struct perf_sample *sample __maybe_unused,
629 				     struct machine *machine __maybe_unused)
630 {
631 	struct record *rec = container_of(tool, struct record, tool);
632 	return record__write(rec, NULL, event, event->header.size);
633 }
634 
635 static struct mutex synth_lock;
636 
637 static int process_locked_synthesized_event(const struct perf_tool *tool,
638 				     union perf_event *event,
639 				     struct perf_sample *sample __maybe_unused,
640 				     struct machine *machine __maybe_unused)
641 {
642 	int ret;
643 
644 	mutex_lock(&synth_lock);
645 	ret = process_synthesized_event(tool, event, sample, machine);
646 	mutex_unlock(&synth_lock);
647 	return ret;
648 }
649 
650 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
651 {
652 	struct record *rec = to;
653 
654 	if (record__comp_enabled(rec)) {
655 		struct perf_record_compressed2 *event = map->data;
656 		size_t padding = 0;
657 		u8 pad[8] = {0};
658 		ssize_t compressed = zstd_compress(rec->session, map, map->data,
659 						   mmap__mmap_len(map), bf, size);
660 
661 		if (compressed < 0)
662 			return (int)compressed;
663 
664 		bf = event;
665 		thread->samples++;
666 
667 		/*
668 		 * The record from `zstd_compress` is not 8 bytes aligned, which would cause asan
669 		 * error. We make it aligned here.
670 		 */
671 		event->data_size = compressed - sizeof(struct perf_record_compressed2);
672 		event->header.size = PERF_ALIGN(compressed, sizeof(u64));
673 		padding = event->header.size - compressed;
674 		return record__write(rec, map, bf, compressed) ||
675 		       record__write(rec, map, &pad, padding);
676 	}
677 
678 	thread->samples++;
679 	return record__write(rec, map, bf, size);
680 }
681 
682 static volatile sig_atomic_t signr = -1;
683 static volatile sig_atomic_t child_finished;
684 #ifdef HAVE_EVENTFD_SUPPORT
685 static volatile sig_atomic_t done_fd = -1;
686 #endif
687 
688 static void sig_handler(int sig)
689 {
690 	if (sig == SIGCHLD)
691 		child_finished = 1;
692 	else
693 		signr = sig;
694 
695 	done = 1;
696 #ifdef HAVE_EVENTFD_SUPPORT
697 	if (done_fd >= 0) {
698 		u64 tmp = 1;
699 		int orig_errno = errno;
700 
701 		/*
702 		 * It is possible for this signal handler to run after done is
703 		 * checked in the main loop, but before the perf counter fds are
704 		 * polled. If this happens, the poll() will continue to wait
705 		 * even though done is set, and will only break out if either
706 		 * another signal is received, or the counters are ready for
707 		 * read. To ensure the poll() doesn't sleep when done is set,
708 		 * use an eventfd (done_fd) to wake up the poll().
709 		 */
710 		if (write(done_fd, &tmp, sizeof(tmp)) < 0)
711 			pr_err("failed to signal wakeup fd, error: %m\n");
712 
713 		errno = orig_errno;
714 	}
715 #endif // HAVE_EVENTFD_SUPPORT
716 }
717 
718 static void sigsegv_handler(int sig)
719 {
720 	perf_hooks__recover();
721 	sighandler_dump_stack(sig);
722 }
723 
724 static void record__sig_exit(void)
725 {
726 	if (signr == -1)
727 		return;
728 
729 	signal(signr, SIG_DFL);
730 	raise(signr);
731 }
732 
733 static int record__process_auxtrace(const struct perf_tool *tool,
734 				    struct mmap *map,
735 				    union perf_event *event, void *data1,
736 				    size_t len1, void *data2, size_t len2)
737 {
738 	struct record *rec = container_of(tool, struct record, tool);
739 	struct perf_data *data = &rec->data;
740 	size_t padding;
741 	u8 pad[8] = {0};
742 
743 	if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
744 		off_t file_offset;
745 		int fd = perf_data__fd(data);
746 		int err;
747 
748 		file_offset = lseek(fd, 0, SEEK_CUR);
749 		if (file_offset == -1)
750 			return -1;
751 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
752 						     event, file_offset);
753 		if (err)
754 			return err;
755 	}
756 
757 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
758 	padding = (len1 + len2) & 7;
759 	if (padding)
760 		padding = 8 - padding;
761 
762 	record__write(rec, map, event, event->header.size);
763 	record__write(rec, map, data1, len1);
764 	if (len2)
765 		record__write(rec, map, data2, len2);
766 	record__write(rec, map, &pad, padding);
767 
768 	return 0;
769 }
770 
771 static int record__auxtrace_mmap_read(struct record *rec,
772 				      struct mmap *map)
773 {
774 	int ret;
775 
776 	ret = auxtrace_mmap__read(map, rec->itr,
777 				  perf_session__env(rec->session),
778 				  &rec->tool,
779 				  record__process_auxtrace);
780 	if (ret < 0)
781 		return ret;
782 
783 	if (ret)
784 		rec->samples++;
785 
786 	return 0;
787 }
788 
789 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
790 					       struct mmap *map)
791 {
792 	int ret;
793 
794 	ret = auxtrace_mmap__read_snapshot(map, rec->itr,
795 					   perf_session__env(rec->session),
796 					   &rec->tool,
797 					   record__process_auxtrace,
798 					   rec->opts.auxtrace_snapshot_size);
799 	if (ret < 0)
800 		return ret;
801 
802 	if (ret)
803 		rec->samples++;
804 
805 	return 0;
806 }
807 
808 static int record__auxtrace_read_snapshot_all(struct record *rec)
809 {
810 	int i;
811 	int rc = 0;
812 
813 	for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
814 		struct mmap *map = &rec->evlist->mmap[i];
815 
816 		if (!map->auxtrace_mmap.base)
817 			continue;
818 
819 		if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
820 			rc = -1;
821 			goto out;
822 		}
823 	}
824 out:
825 	return rc;
826 }
827 
828 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
829 {
830 	pr_debug("Recording AUX area tracing snapshot\n");
831 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
832 		trigger_error(&auxtrace_snapshot_trigger);
833 	} else {
834 		if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
835 			trigger_error(&auxtrace_snapshot_trigger);
836 		else
837 			trigger_ready(&auxtrace_snapshot_trigger);
838 	}
839 }
840 
841 static int record__auxtrace_snapshot_exit(struct record *rec)
842 {
843 	if (trigger_is_error(&auxtrace_snapshot_trigger))
844 		return 0;
845 
846 	if (!auxtrace_record__snapshot_started &&
847 	    auxtrace_record__snapshot_start(rec->itr))
848 		return -1;
849 
850 	record__read_auxtrace_snapshot(rec, true);
851 	if (trigger_is_error(&auxtrace_snapshot_trigger))
852 		return -1;
853 
854 	return 0;
855 }
856 
857 static int record__auxtrace_init(struct record *rec)
858 {
859 	int err;
860 
861 	if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts)
862 	    && record__threads_enabled(rec)) {
863 		pr_err("AUX area tracing options are not available in parallel streaming mode.\n");
864 		return -EINVAL;
865 	}
866 
867 	if (!rec->itr) {
868 		rec->itr = auxtrace_record__init(rec->evlist, &err);
869 		if (err)
870 			return err;
871 	}
872 
873 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
874 					      rec->opts.auxtrace_snapshot_opts);
875 	if (err)
876 		return err;
877 
878 	err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
879 					    rec->opts.auxtrace_sample_opts);
880 	if (err)
881 		return err;
882 
883 	err = auxtrace_parse_aux_action(rec->evlist);
884 	if (err)
885 		return err;
886 
887 	return auxtrace_parse_filters(rec->evlist);
888 }
889 
890 static int record__config_text_poke(struct evlist *evlist)
891 {
892 	struct evsel *evsel;
893 
894 	/* Nothing to do if text poke is already configured */
895 	evlist__for_each_entry(evlist, evsel) {
896 		if (evsel->core.attr.text_poke)
897 			return 0;
898 	}
899 
900 	evsel = evlist__add_dummy_on_all_cpus(evlist);
901 	if (!evsel)
902 		return -ENOMEM;
903 
904 	evsel->core.attr.text_poke = 1;
905 	evsel->core.attr.ksymbol = 1;
906 	evsel->immediate = true;
907 	evsel__set_sample_bit(evsel, TIME);
908 
909 	return 0;
910 }
911 
912 static int record__config_off_cpu(struct record *rec)
913 {
914 	return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts);
915 }
916 
917 static bool record__tracking_system_wide(struct record *rec)
918 {
919 	struct evlist *evlist = rec->evlist;
920 	struct evsel *evsel;
921 
922 	/*
923 	 * If non-dummy evsel exists, system_wide sideband is need to
924 	 * help parse sample information.
925 	 * For example, PERF_EVENT_MMAP event to help parse symbol,
926 	 * and PERF_EVENT_COMM event to help parse task executable name.
927 	 */
928 	evlist__for_each_entry(evlist, evsel) {
929 		if (!evsel__is_dummy_event(evsel))
930 			return true;
931 	}
932 
933 	return false;
934 }
935 
936 static int record__config_tracking_events(struct record *rec)
937 {
938 	struct record_opts *opts = &rec->opts;
939 	struct evlist *evlist = rec->evlist;
940 	bool system_wide = false;
941 	struct evsel *evsel;
942 
943 	/*
944 	 * For initial_delay, system wide or a hybrid system, we need to add
945 	 * tracking event so that we can track PERF_RECORD_MMAP to cover the
946 	 * delay of waiting or event synthesis.
947 	 */
948 	if (opts->target.initial_delay || target__has_cpu(&opts->target) ||
949 	    perf_pmus__num_core_pmus() > 1) {
950 		/*
951 		 * User space tasks can migrate between CPUs, so when tracing
952 		 * selected CPUs, sideband for all CPUs is still needed.
953 		 */
954 		if (!!opts->target.cpu_list && record__tracking_system_wide(rec))
955 			system_wide = true;
956 
957 		evsel = evlist__findnew_tracking_event(evlist, system_wide);
958 		if (!evsel)
959 			return -ENOMEM;
960 
961 		/*
962 		 * Enable the tracking event when the process is forked for
963 		 * initial_delay, immediately for system wide.
964 		 */
965 		if (opts->target.initial_delay && !evsel->immediate &&
966 		    !target__has_cpu(&opts->target))
967 			evsel->core.attr.enable_on_exec = 1;
968 		else
969 			evsel->immediate = 1;
970 	}
971 
972 	return 0;
973 }
974 
975 static bool record__kcore_readable(struct machine *machine)
976 {
977 	char kcore[PATH_MAX];
978 	int fd;
979 
980 	scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
981 
982 	fd = open(kcore, O_RDONLY);
983 	if (fd < 0)
984 		return false;
985 
986 	close(fd);
987 
988 	return true;
989 }
990 
991 static int record__kcore_copy(struct machine *machine, struct perf_data *data)
992 {
993 	char from_dir[PATH_MAX];
994 	char kcore_dir[PATH_MAX];
995 	int ret;
996 
997 	snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
998 
999 	ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
1000 	if (ret)
1001 		return ret;
1002 
1003 	return kcore_copy(from_dir, kcore_dir);
1004 }
1005 
1006 static void record__thread_data_init_pipes(struct record_thread *thread_data)
1007 {
1008 	thread_data->pipes.msg[0] = -1;
1009 	thread_data->pipes.msg[1] = -1;
1010 	thread_data->pipes.ack[0] = -1;
1011 	thread_data->pipes.ack[1] = -1;
1012 }
1013 
1014 static int record__thread_data_open_pipes(struct record_thread *thread_data)
1015 {
1016 	if (pipe(thread_data->pipes.msg))
1017 		return -EINVAL;
1018 
1019 	if (pipe(thread_data->pipes.ack)) {
1020 		close(thread_data->pipes.msg[0]);
1021 		thread_data->pipes.msg[0] = -1;
1022 		close(thread_data->pipes.msg[1]);
1023 		thread_data->pipes.msg[1] = -1;
1024 		return -EINVAL;
1025 	}
1026 
1027 	pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data,
1028 		 thread_data->pipes.msg[0], thread_data->pipes.msg[1],
1029 		 thread_data->pipes.ack[0], thread_data->pipes.ack[1]);
1030 
1031 	return 0;
1032 }
1033 
1034 static void record__thread_data_close_pipes(struct record_thread *thread_data)
1035 {
1036 	if (thread_data->pipes.msg[0] != -1) {
1037 		close(thread_data->pipes.msg[0]);
1038 		thread_data->pipes.msg[0] = -1;
1039 	}
1040 	if (thread_data->pipes.msg[1] != -1) {
1041 		close(thread_data->pipes.msg[1]);
1042 		thread_data->pipes.msg[1] = -1;
1043 	}
1044 	if (thread_data->pipes.ack[0] != -1) {
1045 		close(thread_data->pipes.ack[0]);
1046 		thread_data->pipes.ack[0] = -1;
1047 	}
1048 	if (thread_data->pipes.ack[1] != -1) {
1049 		close(thread_data->pipes.ack[1]);
1050 		thread_data->pipes.ack[1] = -1;
1051 	}
1052 }
1053 
1054 static bool evlist__per_thread(struct evlist *evlist)
1055 {
1056 	return cpu_map__is_dummy(evlist->core.user_requested_cpus);
1057 }
1058 
1059 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist)
1060 {
1061 	int m, tm, nr_mmaps = evlist->core.nr_mmaps;
1062 	struct mmap *mmap = evlist->mmap;
1063 	struct mmap *overwrite_mmap = evlist->overwrite_mmap;
1064 	struct perf_cpu_map *cpus = evlist->core.all_cpus;
1065 	bool per_thread = evlist__per_thread(evlist);
1066 
1067 	if (per_thread)
1068 		thread_data->nr_mmaps = nr_mmaps;
1069 	else
1070 		thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
1071 						      thread_data->mask->maps.nbits);
1072 	if (mmap) {
1073 		thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1074 		if (!thread_data->maps)
1075 			return -ENOMEM;
1076 	}
1077 	if (overwrite_mmap) {
1078 		thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1079 		if (!thread_data->overwrite_maps) {
1080 			zfree(&thread_data->maps);
1081 			return -ENOMEM;
1082 		}
1083 	}
1084 	pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data,
1085 		 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
1086 
1087 	for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
1088 		if (per_thread ||
1089 		    test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) {
1090 			if (thread_data->maps) {
1091 				thread_data->maps[tm] = &mmap[m];
1092 				pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
1093 					  thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1094 			}
1095 			if (thread_data->overwrite_maps) {
1096 				thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
1097 				pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
1098 					  thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1099 			}
1100 			tm++;
1101 		}
1102 	}
1103 
1104 	return 0;
1105 }
1106 
1107 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist)
1108 {
1109 	int f, tm, pos;
1110 	struct mmap *map, *overwrite_map;
1111 
1112 	fdarray__init(&thread_data->pollfd, 64);
1113 
1114 	for (tm = 0; tm < thread_data->nr_mmaps; tm++) {
1115 		map = thread_data->maps ? thread_data->maps[tm] : NULL;
1116 		overwrite_map = thread_data->overwrite_maps ?
1117 				thread_data->overwrite_maps[tm] : NULL;
1118 
1119 		for (f = 0; f < evlist->core.pollfd.nr; f++) {
1120 			void *ptr = evlist->core.pollfd.priv[f].ptr;
1121 
1122 			if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) {
1123 				pos = fdarray__dup_entry_from(&thread_data->pollfd, f,
1124 							      &evlist->core.pollfd);
1125 				if (pos < 0)
1126 					return pos;
1127 				pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n",
1128 					 thread_data, pos, evlist->core.pollfd.entries[f].fd);
1129 			}
1130 		}
1131 	}
1132 
1133 	return 0;
1134 }
1135 
1136 static void record__free_thread_data(struct record *rec)
1137 {
1138 	int t;
1139 	struct record_thread *thread_data = rec->thread_data;
1140 
1141 	if (thread_data == NULL)
1142 		return;
1143 
1144 	for (t = 0; t < rec->nr_threads; t++) {
1145 		record__thread_data_close_pipes(&thread_data[t]);
1146 		zfree(&thread_data[t].maps);
1147 		zfree(&thread_data[t].overwrite_maps);
1148 		fdarray__exit(&thread_data[t].pollfd);
1149 	}
1150 
1151 	zfree(&rec->thread_data);
1152 }
1153 
1154 static int record__map_thread_evlist_pollfd_indexes(struct record *rec,
1155 						    int evlist_pollfd_index,
1156 						    int thread_pollfd_index)
1157 {
1158 	size_t x = rec->index_map_cnt;
1159 
1160 	if (realloc_array_as_needed(rec->index_map, rec->index_map_sz, x, NULL))
1161 		return -ENOMEM;
1162 	rec->index_map[x].evlist_pollfd_index = evlist_pollfd_index;
1163 	rec->index_map[x].thread_pollfd_index = thread_pollfd_index;
1164 	rec->index_map_cnt += 1;
1165 	return 0;
1166 }
1167 
1168 static int record__update_evlist_pollfd_from_thread(struct record *rec,
1169 						    struct evlist *evlist,
1170 						    struct record_thread *thread_data)
1171 {
1172 	struct pollfd *e_entries = evlist->core.pollfd.entries;
1173 	struct pollfd *t_entries = thread_data->pollfd.entries;
1174 	int err = 0;
1175 	size_t i;
1176 
1177 	for (i = 0; i < rec->index_map_cnt; i++) {
1178 		int e_pos = rec->index_map[i].evlist_pollfd_index;
1179 		int t_pos = rec->index_map[i].thread_pollfd_index;
1180 
1181 		if (e_entries[e_pos].fd != t_entries[t_pos].fd ||
1182 		    e_entries[e_pos].events != t_entries[t_pos].events) {
1183 			pr_err("Thread and evlist pollfd index mismatch\n");
1184 			err = -EINVAL;
1185 			continue;
1186 		}
1187 		e_entries[e_pos].revents = t_entries[t_pos].revents;
1188 	}
1189 	return err;
1190 }
1191 
1192 static int record__dup_non_perf_events(struct record *rec,
1193 				       struct evlist *evlist,
1194 				       struct record_thread *thread_data)
1195 {
1196 	struct fdarray *fda = &evlist->core.pollfd;
1197 	int i, ret;
1198 
1199 	for (i = 0; i < fda->nr; i++) {
1200 		if (!(fda->priv[i].flags & fdarray_flag__non_perf_event))
1201 			continue;
1202 		ret = fdarray__dup_entry_from(&thread_data->pollfd, i, fda);
1203 		if (ret < 0) {
1204 			pr_err("Failed to duplicate descriptor in main thread pollfd\n");
1205 			return ret;
1206 		}
1207 		pr_debug2("thread_data[%p]: pollfd[%d] <- non_perf_event fd=%d\n",
1208 			  thread_data, ret, fda->entries[i].fd);
1209 		ret = record__map_thread_evlist_pollfd_indexes(rec, i, ret);
1210 		if (ret < 0) {
1211 			pr_err("Failed to map thread and evlist pollfd indexes\n");
1212 			return ret;
1213 		}
1214 	}
1215 	return 0;
1216 }
1217 
1218 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist)
1219 {
1220 	int t, ret;
1221 	struct record_thread *thread_data;
1222 
1223 	rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data)));
1224 	if (!rec->thread_data) {
1225 		pr_err("Failed to allocate thread data\n");
1226 		return -ENOMEM;
1227 	}
1228 	thread_data = rec->thread_data;
1229 
1230 	for (t = 0; t < rec->nr_threads; t++)
1231 		record__thread_data_init_pipes(&thread_data[t]);
1232 
1233 	for (t = 0; t < rec->nr_threads; t++) {
1234 		thread_data[t].rec = rec;
1235 		thread_data[t].mask = &rec->thread_masks[t];
1236 		ret = record__thread_data_init_maps(&thread_data[t], evlist);
1237 		if (ret) {
1238 			pr_err("Failed to initialize thread[%d] maps\n", t);
1239 			goto out_free;
1240 		}
1241 		ret = record__thread_data_init_pollfd(&thread_data[t], evlist);
1242 		if (ret) {
1243 			pr_err("Failed to initialize thread[%d] pollfd\n", t);
1244 			goto out_free;
1245 		}
1246 		if (t) {
1247 			thread_data[t].tid = -1;
1248 			ret = record__thread_data_open_pipes(&thread_data[t]);
1249 			if (ret) {
1250 				pr_err("Failed to open thread[%d] communication pipes\n", t);
1251 				goto out_free;
1252 			}
1253 			ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0],
1254 					   POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable);
1255 			if (ret < 0) {
1256 				pr_err("Failed to add descriptor to thread[%d] pollfd\n", t);
1257 				goto out_free;
1258 			}
1259 			thread_data[t].ctlfd_pos = ret;
1260 			pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1261 				 thread_data, thread_data[t].ctlfd_pos,
1262 				 thread_data[t].pipes.msg[0]);
1263 		} else {
1264 			thread_data[t].tid = gettid();
1265 
1266 			ret = record__dup_non_perf_events(rec, evlist, &thread_data[t]);
1267 			if (ret < 0)
1268 				goto out_free;
1269 
1270 			thread_data[t].ctlfd_pos = -1; /* Not used */
1271 		}
1272 	}
1273 
1274 	return 0;
1275 
1276 out_free:
1277 	record__free_thread_data(rec);
1278 
1279 	return ret;
1280 }
1281 
1282 static int record__mmap_evlist(struct record *rec,
1283 			       struct evlist *evlist)
1284 {
1285 	int i, ret;
1286 	struct record_opts *opts = &rec->opts;
1287 	bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
1288 				  opts->auxtrace_sample_mode;
1289 
1290 	if (opts->affinity != PERF_AFFINITY_SYS)
1291 		cpu__setup_cpunode_map();
1292 
1293 	if (evlist__mmap_ex(evlist, opts->mmap_pages,
1294 				 opts->auxtrace_mmap_pages,
1295 				 auxtrace_overwrite,
1296 				 opts->nr_cblocks, opts->affinity,
1297 				 opts->mmap_flush, opts->comp_level) < 0) {
1298 		if (errno == EPERM) {
1299 			pr_err("Permission error mapping pages.\n"
1300 			       "Consider increasing "
1301 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
1302 			       "or try again with a smaller value of -m/--mmap_pages.\n"
1303 			       "(current value: %u,%u)\n",
1304 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
1305 			return -errno;
1306 		} else {
1307 			pr_err("failed to mmap: %m\n");
1308 			if (errno)
1309 				return -errno;
1310 			else
1311 				return -EINVAL;
1312 		}
1313 	}
1314 
1315 	if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack))
1316 		return -1;
1317 
1318 	ret = record__alloc_thread_data(rec, evlist);
1319 	if (ret)
1320 		return ret;
1321 
1322 	if (record__threads_enabled(rec)) {
1323 		ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps);
1324 		if (ret) {
1325 			errno = -ret;
1326 			pr_err("Failed to create data directory: %m\n");
1327 			return ret;
1328 		}
1329 		for (i = 0; i < evlist->core.nr_mmaps; i++) {
1330 			if (evlist->mmap)
1331 				evlist->mmap[i].file = &rec->data.dir.files[i];
1332 			if (evlist->overwrite_mmap)
1333 				evlist->overwrite_mmap[i].file = &rec->data.dir.files[i];
1334 		}
1335 	}
1336 
1337 	return 0;
1338 }
1339 
1340 static int record__mmap(struct record *rec)
1341 {
1342 	return record__mmap_evlist(rec, rec->evlist);
1343 }
1344 
1345 static int record__open(struct record *rec)
1346 {
1347 	char msg[BUFSIZ];
1348 	struct evsel *pos;
1349 	struct evlist *evlist = rec->evlist;
1350 	struct perf_session *session = rec->session;
1351 	struct record_opts *opts = &rec->opts;
1352 	int rc = 0;
1353 	bool skipped = false;
1354 	bool removed_tracking = false;
1355 
1356 	evlist__for_each_entry(evlist, pos) {
1357 		if (removed_tracking) {
1358 			/*
1359 			 * Normally the head of the list has tracking enabled
1360 			 * for sideband data like mmaps. If this event is
1361 			 * removed, make sure to add tracking to the next
1362 			 * processed event.
1363 			 */
1364 			if (!pos->tracking) {
1365 				pos->tracking = true;
1366 				evsel__config(pos, opts, &callchain_param);
1367 			}
1368 			removed_tracking = false;
1369 		}
1370 try_again:
1371 		if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
1372 			bool report_error = true;
1373 
1374 			if (evsel__fallback(pos, &opts->target, errno, msg, sizeof(msg))) {
1375 				if (verbose > 0)
1376 					ui__warning("%s\n", msg);
1377 				goto try_again;
1378 			}
1379 			if ((errno == EINVAL || errno == EBADF) &&
1380 			    pos->core.leader != &pos->core &&
1381 			    pos->weak_group) {
1382 			        pos = evlist__reset_weak_group(evlist, pos, true);
1383 				goto try_again;
1384 			}
1385 #if defined(__aarch64__) || defined(__arm__)
1386 			if (strstr(evsel__name(pos), "cycles")) {
1387 				struct evsel *pos2;
1388 				/*
1389 				 * Unfortunately ARM has many events named
1390 				 * "cycles" on PMUs like the system-level (L3)
1391 				 * cache which don't support sampling. Only
1392 				 * display such failures to open when there is
1393 				 * only 1 cycles event or verbose is enabled.
1394 				 */
1395 				evlist__for_each_entry(evlist, pos2) {
1396 					if (pos2 == pos)
1397 						continue;
1398 					if (strstr(evsel__name(pos2), "cycles")) {
1399 						report_error = false;
1400 						break;
1401 					}
1402 				}
1403 			}
1404 #endif
1405 			if (report_error || verbose > 0) {
1406 				ui__error("Failure to open event '%s' on PMU '%s' which will be "
1407 					  "removed.\n%s\n",
1408 					  evsel__name(pos), evsel__pmu_name(pos), msg);
1409 			}
1410 			if (pos->tracking)
1411 				removed_tracking = true;
1412 			pos->skippable = true;
1413 			skipped = true;
1414 		}
1415 	}
1416 
1417 	if (skipped) {
1418 		struct evsel *tmp;
1419 		int idx = 0;
1420 		bool evlist_empty = true;
1421 
1422 		/* Remove evsels that failed to open and update indices. */
1423 		evlist__for_each_entry_safe(evlist, tmp, pos) {
1424 			if (pos->skippable) {
1425 				evlist__remove(evlist, pos);
1426 				continue;
1427 			}
1428 
1429 			/*
1430 			 * Note, dummy events may be command line parsed or
1431 			 * added by the tool. We care about supporting `perf
1432 			 * record -e dummy` which may be used as a permission
1433 			 * check. Dummy events that are added to the command
1434 			 * line and opened along with other events that fail,
1435 			 * will still fail as if the dummy events were tool
1436 			 * added events for the sake of code simplicity.
1437 			 */
1438 			if (!evsel__is_dummy_event(pos))
1439 				evlist_empty = false;
1440 		}
1441 		evlist__for_each_entry(evlist, pos) {
1442 			pos->core.idx = idx++;
1443 		}
1444 		/* If list is empty then fail. */
1445 		if (evlist_empty) {
1446 			ui__error("Failure to open any events for recording.\n");
1447 			rc = -1;
1448 			goto out;
1449 		}
1450 	}
1451 	if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {
1452 		pr_warning(
1453 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1454 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1455 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1456 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1457 "Samples in kernel modules won't be resolved at all.\n\n"
1458 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1459 "even with a suitable vmlinux or kallsyms file.\n\n");
1460 	}
1461 
1462 	if (evlist__apply_filters(evlist, &pos, &opts->target)) {
1463 		pr_err("failed to set filter \"%s\" on event %s: %m\n",
1464 			pos->filter ?: "BPF", evsel__name(pos));
1465 		rc = -1;
1466 		goto out;
1467 	}
1468 
1469 	rc = record__mmap(rec);
1470 	if (rc)
1471 		goto out;
1472 
1473 	session->evlist = evlist;
1474 	perf_session__set_id_hdr_size(session);
1475 out:
1476 	return rc;
1477 }
1478 
1479 static void set_timestamp_boundary(struct record *rec, u64 sample_time)
1480 {
1481 	if (rec->evlist->first_sample_time == 0)
1482 		rec->evlist->first_sample_time = sample_time;
1483 
1484 	if (sample_time)
1485 		rec->evlist->last_sample_time = sample_time;
1486 }
1487 
1488 static int process_sample_event(const struct perf_tool *tool,
1489 				union perf_event *event,
1490 				struct perf_sample *sample,
1491 				struct evsel *evsel,
1492 				struct machine *machine)
1493 {
1494 	struct record *rec = container_of(tool, struct record, tool);
1495 
1496 	set_timestamp_boundary(rec, sample->time);
1497 
1498 	if (rec->buildid_all)
1499 		return 0;
1500 
1501 	rec->samples++;
1502 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
1503 }
1504 
1505 static int process_buildids(struct record *rec)
1506 {
1507 	struct perf_session *session = rec->session;
1508 
1509 	if (perf_data__size(&rec->data) == 0)
1510 		return 0;
1511 
1512 	/* A single DSO is needed and not all inline frames. */
1513 	symbol_conf.inline_name = false;
1514 	/*
1515 	 * During this process, it'll load kernel map and replace the
1516 	 * dso->long_name to a real pathname it found.  In this case
1517 	 * we prefer the vmlinux path like
1518 	 *   /lib/modules/3.16.4/build/vmlinux
1519 	 *
1520 	 * rather than build-id path (in debug directory).
1521 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
1522 	 */
1523 	symbol_conf.ignore_vmlinux_buildid = true;
1524 	/*
1525 	 * If --buildid-all is given, it marks all DSO regardless of hits,
1526 	 * so no need to process samples. But if timestamp_boundary is enabled,
1527 	 * it still needs to walk on all samples to get the timestamps of
1528 	 * first/last samples.
1529 	 */
1530 	if (rec->buildid_all && !rec->timestamp_boundary)
1531 		rec->tool.sample = process_event_sample_stub;
1532 
1533 	return perf_session__process_events(session);
1534 }
1535 
1536 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
1537 {
1538 	int err;
1539 	struct perf_tool *tool = data;
1540 	/*
1541 	 *As for guest kernel when processing subcommand record&report,
1542 	 *we arrange module mmap prior to guest kernel mmap and trigger
1543 	 *a preload dso because default guest module symbols are loaded
1544 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
1545 	 *method is used to avoid symbol missing when the first addr is
1546 	 *in module instead of in guest kernel.
1547 	 */
1548 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
1549 					     machine);
1550 	if (err < 0)
1551 		pr_err("Couldn't record guest kernel [%d]'s reference"
1552 		       " relocation symbol.\n", machine->pid);
1553 
1554 	/*
1555 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
1556 	 * have no _text sometimes.
1557 	 */
1558 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1559 						 machine);
1560 	if (err < 0)
1561 		pr_err("Couldn't record guest kernel [%d]'s reference"
1562 		       " relocation symbol.\n", machine->pid);
1563 }
1564 
1565 static struct perf_event_header finished_round_event = {
1566 	.size = sizeof(struct perf_event_header),
1567 	.type = PERF_RECORD_FINISHED_ROUND,
1568 };
1569 
1570 static struct perf_event_header finished_init_event = {
1571 	.size = sizeof(struct perf_event_header),
1572 	.type = PERF_RECORD_FINISHED_INIT,
1573 };
1574 
1575 static void record__adjust_affinity(struct record *rec, struct mmap *map)
1576 {
1577 	if (rec->opts.affinity != PERF_AFFINITY_SYS &&
1578 	    !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits,
1579 			  thread->mask->affinity.nbits)) {
1580 		bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits);
1581 		bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits,
1582 			  map->affinity_mask.bits, thread->mask->affinity.nbits);
1583 		sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
1584 					(cpu_set_t *)thread->mask->affinity.bits);
1585 		if (verbose == 2) {
1586 			pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu());
1587 			mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity");
1588 		}
1589 	}
1590 }
1591 
1592 static size_t process_comp_header(void *record, size_t increment)
1593 {
1594 	struct perf_record_compressed2 *event = record;
1595 	size_t size = sizeof(*event);
1596 
1597 	if (increment) {
1598 		event->header.size += increment;
1599 		return increment;
1600 	}
1601 
1602 	event->header.type = PERF_RECORD_COMPRESSED2;
1603 	event->header.size = size;
1604 
1605 	return size;
1606 }
1607 
1608 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map,
1609 			    void *dst, size_t dst_size, void *src, size_t src_size)
1610 {
1611 	ssize_t compressed;
1612 	size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed2) - 1;
1613 	struct zstd_data *zstd_data = &session->zstd_data;
1614 
1615 	if (map && map->file)
1616 		zstd_data = &map->zstd_data;
1617 
1618 	compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size,
1619 						     max_record_size, process_comp_header);
1620 	if (compressed < 0)
1621 		return compressed;
1622 
1623 	if (map && map->file) {
1624 		thread->bytes_transferred += src_size;
1625 		thread->bytes_compressed  += compressed;
1626 	} else {
1627 		session->bytes_transferred += src_size;
1628 		session->bytes_compressed  += compressed;
1629 	}
1630 
1631 	return compressed;
1632 }
1633 
1634 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1635 				    bool overwrite, bool synch)
1636 {
1637 	u64 bytes_written = rec->bytes_written;
1638 	int i;
1639 	int rc = 0;
1640 	int nr_mmaps;
1641 	struct mmap **maps;
1642 	int trace_fd = rec->data.file.fd;
1643 	off_t off = 0;
1644 
1645 	if (!evlist)
1646 		return 0;
1647 
1648 	nr_mmaps = thread->nr_mmaps;
1649 	maps = overwrite ? thread->overwrite_maps : thread->maps;
1650 
1651 	if (!maps)
1652 		return 0;
1653 
1654 	if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1655 		return 0;
1656 
1657 	if (record__aio_enabled(rec))
1658 		off = record__aio_get_pos(trace_fd);
1659 
1660 	for (i = 0; i < nr_mmaps; i++) {
1661 		u64 flush = 0;
1662 		struct mmap *map = maps[i];
1663 
1664 		if (map->core.base) {
1665 			record__adjust_affinity(rec, map);
1666 			if (synch) {
1667 				flush = map->core.flush;
1668 				map->core.flush = 1;
1669 			}
1670 			if (!record__aio_enabled(rec)) {
1671 				if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1672 					if (synch)
1673 						map->core.flush = flush;
1674 					rc = -1;
1675 					goto out;
1676 				}
1677 			} else {
1678 				if (record__aio_push(rec, map, &off) < 0) {
1679 					record__aio_set_pos(trace_fd, off);
1680 					if (synch)
1681 						map->core.flush = flush;
1682 					rc = -1;
1683 					goto out;
1684 				}
1685 			}
1686 			if (synch)
1687 				map->core.flush = flush;
1688 		}
1689 
1690 		if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1691 		    !rec->opts.auxtrace_sample_mode &&
1692 		    record__auxtrace_mmap_read(rec, map) != 0) {
1693 			rc = -1;
1694 			goto out;
1695 		}
1696 	}
1697 
1698 	if (record__aio_enabled(rec))
1699 		record__aio_set_pos(trace_fd, off);
1700 
1701 	/*
1702 	 * Mark the round finished in case we wrote
1703 	 * at least one event.
1704 	 *
1705 	 * No need for round events in directory mode,
1706 	 * because per-cpu maps and files have data
1707 	 * sorted by kernel.
1708 	 */
1709 	if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written)
1710 		rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1711 
1712 	if (overwrite)
1713 		evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1714 out:
1715 	return rc;
1716 }
1717 
1718 static int record__mmap_read_all(struct record *rec, bool synch)
1719 {
1720 	int err;
1721 
1722 	err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1723 	if (err)
1724 		return err;
1725 
1726 	return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1727 }
1728 
1729 static void record__thread_munmap_filtered(struct fdarray *fda, int fd,
1730 					   void *arg __maybe_unused)
1731 {
1732 	struct perf_mmap *map = fda->priv[fd].ptr;
1733 
1734 	if (map)
1735 		perf_mmap__put(map);
1736 }
1737 
1738 static void *record__thread(void *arg)
1739 {
1740 	enum thread_msg msg = THREAD_MSG__READY;
1741 	bool terminate = false;
1742 	struct fdarray *pollfd;
1743 	int err, ctlfd_pos;
1744 
1745 	thread = arg;
1746 	thread->tid = gettid();
1747 
1748 	err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1749 	if (err == -1)
1750 		pr_warning("threads[%d]: failed to notify on start: %m\n", thread->tid);
1751 
1752 	pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
1753 
1754 	pollfd = &thread->pollfd;
1755 	ctlfd_pos = thread->ctlfd_pos;
1756 
1757 	for (;;) {
1758 		unsigned long long hits = thread->samples;
1759 
1760 		if (record__mmap_read_all(thread->rec, false) < 0 || terminate)
1761 			break;
1762 
1763 		if (hits == thread->samples) {
1764 
1765 			err = fdarray__poll(pollfd, -1);
1766 			/*
1767 			 * Propagate error, only if there's any. Ignore positive
1768 			 * number of returned events and interrupt error.
1769 			 */
1770 			if (err > 0 || (err < 0 && errno == EINTR))
1771 				err = 0;
1772 			thread->waking++;
1773 
1774 			if (fdarray__filter(pollfd, POLLERR | POLLHUP,
1775 					    record__thread_munmap_filtered, NULL) == 0)
1776 				break;
1777 		}
1778 
1779 		if (pollfd->entries[ctlfd_pos].revents & POLLHUP) {
1780 			terminate = true;
1781 			close(thread->pipes.msg[0]);
1782 			thread->pipes.msg[0] = -1;
1783 			pollfd->entries[ctlfd_pos].fd = -1;
1784 			pollfd->entries[ctlfd_pos].events = 0;
1785 		}
1786 
1787 		pollfd->entries[ctlfd_pos].revents = 0;
1788 	}
1789 	record__mmap_read_all(thread->rec, true);
1790 
1791 	err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1792 	if (err == -1)
1793 		pr_warning("threads[%d]: failed to notify on termination: %m\n", thread->tid);
1794 
1795 	return NULL;
1796 }
1797 
1798 static void record__init_features(struct record *rec)
1799 {
1800 	struct perf_session *session = rec->session;
1801 	int feat;
1802 
1803 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1804 		perf_header__set_feat(&session->header, feat);
1805 
1806 	if (rec->no_buildid)
1807 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1808 
1809 	if (!have_tracepoints(&rec->evlist->core.entries))
1810 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1811 
1812 	if (!rec->opts.branch_stack)
1813 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1814 
1815 	if (!rec->opts.full_auxtrace)
1816 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1817 
1818 	if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1819 		perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1820 
1821 	if (!rec->opts.use_clockid)
1822 		perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA);
1823 
1824 	if (!record__threads_enabled(rec))
1825 		perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1826 
1827 	if (!record__comp_enabled(rec))
1828 		perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1829 
1830 	perf_header__clear_feat(&session->header, HEADER_STAT);
1831 }
1832 
1833 static void
1834 record__finish_output(struct record *rec)
1835 {
1836 	int i;
1837 	struct perf_data *data = &rec->data;
1838 	int fd = perf_data__fd(data);
1839 
1840 	if (data->is_pipe) {
1841 		/* Just to display approx. size */
1842 		data->file.size = rec->bytes_written;
1843 		return;
1844 	}
1845 
1846 	rec->session->header.data_size += rec->bytes_written;
1847 	data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1848 	if (record__threads_enabled(rec)) {
1849 		for (i = 0; i < data->dir.nr; i++)
1850 			data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR);
1851 	}
1852 
1853 	/* Buildid scanning disabled or build ID in kernel and synthesized map events. */
1854 	if (!rec->no_buildid || !rec->no_buildid_cache) {
1855 		process_buildids(rec);
1856 
1857 		if (rec->buildid_all)
1858 			perf_session__dsos_hit_all(rec->session);
1859 	}
1860 	perf_session__write_header(rec->session, rec->evlist, fd, true);
1861 	perf_session__cache_build_ids(rec->session);
1862 }
1863 
1864 static int record__synthesize_workload(struct record *rec, bool tail)
1865 {
1866 	int err;
1867 	struct perf_thread_map *thread_map;
1868 	bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1869 
1870 	if (rec->opts.tail_synthesize != tail)
1871 		return 0;
1872 
1873 	thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1874 	if (thread_map == NULL)
1875 		return -1;
1876 
1877 	err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1878 						 process_synthesized_event,
1879 						 &rec->session->machines.host,
1880 						 needs_mmap,
1881 						 rec->opts.record_data_mmap);
1882 	perf_thread_map__put(thread_map);
1883 	return err;
1884 }
1885 
1886 static int write_finished_init(struct record *rec, bool tail)
1887 {
1888 	if (rec->opts.tail_synthesize != tail)
1889 		return 0;
1890 
1891 	return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event));
1892 }
1893 
1894 static int record__synthesize(struct record *rec, bool tail);
1895 
1896 static int
1897 record__switch_output(struct record *rec, bool at_exit)
1898 {
1899 	struct perf_data *data = &rec->data;
1900 	char *new_filename = NULL;
1901 	int fd, err;
1902 
1903 	/* Same Size:      "2015122520103046"*/
1904 	char timestamp[] = "InvalidTimestamp";
1905 
1906 	record__aio_mmap_read_sync(rec);
1907 
1908 	write_finished_init(rec, true);
1909 
1910 	record__synthesize(rec, true);
1911 	if (target__none(&rec->opts.target))
1912 		record__synthesize_workload(rec, true);
1913 
1914 	rec->samples = 0;
1915 	record__finish_output(rec);
1916 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1917 	if (err) {
1918 		pr_err("Failed to get current timestamp\n");
1919 		return -EINVAL;
1920 	}
1921 
1922 	fd = perf_data__switch(data, timestamp,
1923 			       rec->session->header.data_offset,
1924 			       at_exit, &new_filename);
1925 	if (fd >= 0 && !at_exit) {
1926 		rec->bytes_written = 0;
1927 		rec->session->header.data_size = 0;
1928 	}
1929 
1930 	if (!quiet) {
1931 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1932 			data->path, timestamp);
1933 	}
1934 
1935 	if (rec->switch_output.num_files) {
1936 		int n = rec->switch_output.cur_file + 1;
1937 
1938 		if (n >= rec->switch_output.num_files)
1939 			n = 0;
1940 		rec->switch_output.cur_file = n;
1941 		if (rec->switch_output.filenames[n]) {
1942 			remove(rec->switch_output.filenames[n]);
1943 			zfree(&rec->switch_output.filenames[n]);
1944 		}
1945 		rec->switch_output.filenames[n] = new_filename;
1946 	} else {
1947 		free(new_filename);
1948 	}
1949 
1950 	/* Output tracking events */
1951 	if (!at_exit) {
1952 		record__synthesize(rec, false);
1953 
1954 		/*
1955 		 * In 'perf record --switch-output' without -a,
1956 		 * record__synthesize() in record__switch_output() won't
1957 		 * generate tracking events because there's no thread_map
1958 		 * in evlist. Which causes newly created perf.data doesn't
1959 		 * contain map and comm information.
1960 		 * Create a fake thread_map and directly call
1961 		 * perf_event__synthesize_thread_map() for those events.
1962 		 */
1963 		if (target__none(&rec->opts.target))
1964 			record__synthesize_workload(rec, false);
1965 		write_finished_init(rec, false);
1966 	}
1967 	return fd;
1968 }
1969 
1970 static void __record__save_lost_samples(struct record *rec, struct evsel *evsel,
1971 					struct perf_record_lost_samples *lost,
1972 					int cpu_idx, int thread_idx, u64 lost_count,
1973 					u16 misc_flag)
1974 {
1975 	struct perf_sample_id *sid;
1976 	struct perf_sample sample;
1977 	int id_hdr_size;
1978 
1979 	perf_sample__init(&sample, /*all=*/true);
1980 	lost->lost = lost_count;
1981 	if (evsel->core.ids) {
1982 		sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx);
1983 		sample.id = sid->id;
1984 	}
1985 
1986 	id_hdr_size = perf_event__synthesize_id_sample((void *)(lost + 1),
1987 						       evsel->core.attr.sample_type, &sample);
1988 	lost->header.size = sizeof(*lost) + id_hdr_size;
1989 	lost->header.misc = misc_flag;
1990 	record__write(rec, NULL, lost, lost->header.size);
1991 	perf_sample__exit(&sample);
1992 }
1993 
1994 static void record__read_lost_samples(struct record *rec)
1995 {
1996 	struct perf_session *session = rec->session;
1997 	struct perf_record_lost_samples_and_ids lost;
1998 	struct evsel *evsel;
1999 
2000 	/* there was an error during record__open */
2001 	if (session->evlist == NULL)
2002 		return;
2003 
2004 	evlist__for_each_entry(session->evlist, evsel) {
2005 		struct xyarray *xy = evsel->core.sample_id;
2006 		u64 lost_count;
2007 
2008 		if (xy == NULL || evsel->core.fd == NULL)
2009 			continue;
2010 		if (xyarray__max_x(evsel->core.fd) != xyarray__max_x(xy) ||
2011 		    xyarray__max_y(evsel->core.fd) != xyarray__max_y(xy)) {
2012 			pr_debug("Unmatched FD vs. sample ID: skip reading LOST count\n");
2013 			continue;
2014 		}
2015 
2016 		for (int x = 0; x < xyarray__max_x(xy); x++) {
2017 			for (int y = 0; y < xyarray__max_y(xy); y++) {
2018 				struct perf_counts_values count;
2019 
2020 				if (perf_evsel__read(&evsel->core, x, y, &count) < 0) {
2021 					pr_debug("read LOST count failed\n");
2022 					return;
2023 				}
2024 
2025 				if (count.lost) {
2026 					memset(&lost, 0, sizeof(lost));
2027 					lost.lost.header.type = PERF_RECORD_LOST_SAMPLES;
2028 					__record__save_lost_samples(rec, evsel, &lost.lost,
2029 								    x, y, count.lost, 0);
2030 				}
2031 			}
2032 		}
2033 
2034 		lost_count = perf_bpf_filter__lost_count(evsel);
2035 		if (lost_count) {
2036 			memset(&lost, 0, sizeof(lost));
2037 			lost.lost.header.type = PERF_RECORD_LOST_SAMPLES;
2038 			__record__save_lost_samples(rec, evsel, &lost.lost, 0, 0, lost_count,
2039 						    PERF_RECORD_MISC_LOST_SAMPLES_BPF);
2040 		}
2041 	}
2042 }
2043 
2044 static volatile sig_atomic_t workload_exec_errno;
2045 
2046 /*
2047  * evlist__prepare_workload will send a SIGUSR1
2048  * if the fork fails, since we asked by setting its
2049  * want_signal to true.
2050  */
2051 static void workload_exec_failed_signal(int signo __maybe_unused,
2052 					siginfo_t *info,
2053 					void *ucontext __maybe_unused)
2054 {
2055 	workload_exec_errno = info->si_value.sival_int;
2056 	done = 1;
2057 	child_finished = 1;
2058 }
2059 
2060 static void snapshot_sig_handler(int sig);
2061 static void alarm_sig_handler(int sig);
2062 
2063 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist)
2064 {
2065 	if (evlist) {
2066 		if (evlist->mmap && evlist->mmap[0].core.base)
2067 			return evlist->mmap[0].core.base;
2068 		if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
2069 			return evlist->overwrite_mmap[0].core.base;
2070 	}
2071 	return NULL;
2072 }
2073 
2074 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
2075 {
2076 	const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist);
2077 	if (pc)
2078 		return pc;
2079 	return NULL;
2080 }
2081 
2082 static int record__synthesize(struct record *rec, bool tail)
2083 {
2084 	struct perf_session *session = rec->session;
2085 	struct machine *machine = &session->machines.host;
2086 	struct perf_data *data = &rec->data;
2087 	struct record_opts *opts = &rec->opts;
2088 	struct perf_tool *tool = &rec->tool;
2089 	int err = 0;
2090 	event_op f = process_synthesized_event;
2091 
2092 	if (rec->opts.tail_synthesize != tail)
2093 		return 0;
2094 
2095 	if (data->is_pipe) {
2096 		err = perf_event__synthesize_for_pipe(tool, session, data,
2097 						      process_synthesized_event);
2098 		if (err < 0)
2099 			goto out;
2100 
2101 		rec->bytes_written += err;
2102 	}
2103 
2104 	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
2105 					  process_synthesized_event, machine);
2106 	if (err)
2107 		goto out;
2108 
2109 	/* Synthesize id_index before auxtrace_info */
2110 	err = perf_event__synthesize_id_index(tool,
2111 					      process_synthesized_event,
2112 					      session->evlist, machine);
2113 	if (err)
2114 		goto out;
2115 
2116 	if (rec->opts.full_auxtrace) {
2117 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
2118 					session, process_synthesized_event);
2119 		if (err)
2120 			goto out;
2121 	}
2122 
2123 	if (!evlist__exclude_kernel(rec->evlist)) {
2124 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
2125 							 machine);
2126 		WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
2127 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
2128 				   "Check /proc/kallsyms permission or run as root.\n");
2129 
2130 		err = perf_event__synthesize_modules(tool, process_synthesized_event,
2131 						     machine);
2132 		WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
2133 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
2134 				   "Check /proc/modules permission or run as root.\n");
2135 	}
2136 
2137 	if (perf_guest) {
2138 		machines__process_guests(&session->machines,
2139 					 perf_event__synthesize_guest_os, tool);
2140 	}
2141 
2142 	err = perf_event__synthesize_extra_attr(&rec->tool,
2143 						rec->evlist,
2144 						process_synthesized_event,
2145 						data->is_pipe);
2146 	if (err)
2147 		goto out;
2148 
2149 	err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
2150 						 process_synthesized_event,
2151 						NULL);
2152 	if (err < 0) {
2153 		pr_err("Couldn't synthesize thread map.\n");
2154 		return err;
2155 	}
2156 
2157 	err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus,
2158 					     process_synthesized_event, NULL);
2159 	if (err < 0) {
2160 		pr_err("Couldn't synthesize cpu map.\n");
2161 		return err;
2162 	}
2163 
2164 	err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
2165 						machine, opts);
2166 	if (err < 0) {
2167 		pr_warning("Couldn't synthesize bpf events.\n");
2168 		err = 0;
2169 	}
2170 
2171 	if (rec->opts.synth & PERF_SYNTH_CGROUP) {
2172 		err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
2173 						     machine);
2174 		if (err < 0) {
2175 			pr_warning("Couldn't synthesize cgroup events.\n");
2176 			err = 0;
2177 		}
2178 	}
2179 
2180 	if (rec->opts.nr_threads_synthesize > 1) {
2181 		mutex_init(&synth_lock);
2182 		perf_set_multithreaded();
2183 		f = process_locked_synthesized_event;
2184 	}
2185 
2186 	if (rec->opts.synth & PERF_SYNTH_TASK) {
2187 		bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
2188 
2189 		err = __machine__synthesize_threads(machine, tool, &opts->target,
2190 						    rec->evlist->core.threads,
2191 						    f, needs_mmap, opts->record_data_mmap,
2192 						    rec->opts.nr_threads_synthesize);
2193 	}
2194 
2195 	if (rec->opts.nr_threads_synthesize > 1) {
2196 		perf_set_singlethreaded();
2197 		mutex_destroy(&synth_lock);
2198 	}
2199 
2200 out:
2201 	return err;
2202 }
2203 
2204 static void record__synthesize_final_bpf_metadata(struct record *rec __maybe_unused)
2205 {
2206 #ifdef HAVE_LIBBPF_SUPPORT
2207 	perf_event__synthesize_final_bpf_metadata(rec->session,
2208 						  process_synthesized_event);
2209 #endif
2210 }
2211 
2212 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
2213 {
2214 	struct record *rec = data;
2215 	pthread_kill(rec->thread_id, SIGUSR2);
2216 	return 0;
2217 }
2218 
2219 static int record__setup_sb_evlist(struct record *rec)
2220 {
2221 	struct record_opts *opts = &rec->opts;
2222 
2223 	if (rec->sb_evlist != NULL) {
2224 		/*
2225 		 * We get here if --switch-output-event populated the
2226 		 * sb_evlist, so associate a callback that will send a SIGUSR2
2227 		 * to the main thread.
2228 		 */
2229 		evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
2230 		rec->thread_id = pthread_self();
2231 	}
2232 #ifdef HAVE_LIBBPF_SUPPORT
2233 	if (!opts->no_bpf_event) {
2234 		if (rec->sb_evlist == NULL) {
2235 			rec->sb_evlist = evlist__new();
2236 
2237 			if (rec->sb_evlist == NULL) {
2238 				pr_err("Couldn't create side band evlist.\n.");
2239 				return -1;
2240 			}
2241 		}
2242 
2243 		if (evlist__add_bpf_sb_event(rec->sb_evlist, perf_session__env(rec->session))) {
2244 			pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
2245 			return -1;
2246 		}
2247 	}
2248 #endif
2249 	if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
2250 		pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
2251 		opts->no_bpf_event = true;
2252 	}
2253 
2254 	return 0;
2255 }
2256 
2257 static int record__init_clock(struct record *rec)
2258 {
2259 	struct perf_session *session = rec->session;
2260 	struct timespec ref_clockid;
2261 	struct timeval ref_tod;
2262 	struct perf_env *env = perf_session__env(session);
2263 	u64 ref;
2264 
2265 	if (!rec->opts.use_clockid)
2266 		return 0;
2267 
2268 	if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
2269 		env->clock.clockid_res_ns = rec->opts.clockid_res_ns;
2270 
2271 	env->clock.clockid = rec->opts.clockid;
2272 
2273 	if (gettimeofday(&ref_tod, NULL) != 0) {
2274 		pr_err("gettimeofday failed, cannot set reference time.\n");
2275 		return -1;
2276 	}
2277 
2278 	if (clock_gettime(rec->opts.clockid, &ref_clockid)) {
2279 		pr_err("clock_gettime failed, cannot set reference time.\n");
2280 		return -1;
2281 	}
2282 
2283 	ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC +
2284 	      (u64) ref_tod.tv_usec * NSEC_PER_USEC;
2285 
2286 	env->clock.tod_ns = ref;
2287 
2288 	ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC +
2289 	      (u64) ref_clockid.tv_nsec;
2290 
2291 	env->clock.clockid_ns = ref;
2292 	return 0;
2293 }
2294 
2295 static void hit_auxtrace_snapshot_trigger(struct record *rec)
2296 {
2297 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2298 		trigger_hit(&auxtrace_snapshot_trigger);
2299 		auxtrace_record__snapshot_started = 1;
2300 		if (auxtrace_record__snapshot_start(rec->itr))
2301 			trigger_error(&auxtrace_snapshot_trigger);
2302 	}
2303 }
2304 
2305 static int record__terminate_thread(struct record_thread *thread_data)
2306 {
2307 	int err;
2308 	enum thread_msg ack = THREAD_MSG__UNDEFINED;
2309 	pid_t tid = thread_data->tid;
2310 
2311 	close(thread_data->pipes.msg[1]);
2312 	thread_data->pipes.msg[1] = -1;
2313 	err = read(thread_data->pipes.ack[0], &ack, sizeof(ack));
2314 	if (err > 0)
2315 		pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]);
2316 	else
2317 		pr_warning("threads[%d]: failed to receive termination notification from %d\n",
2318 			   thread->tid, tid);
2319 
2320 	return 0;
2321 }
2322 
2323 static int record__start_threads(struct record *rec)
2324 {
2325 	int t, tt, err, ret = 0, nr_threads = rec->nr_threads;
2326 	struct record_thread *thread_data = rec->thread_data;
2327 	sigset_t full, mask;
2328 	pthread_t handle;
2329 	pthread_attr_t attrs;
2330 
2331 	thread = &thread_data[0];
2332 
2333 	if (!record__threads_enabled(rec))
2334 		return 0;
2335 
2336 	sigfillset(&full);
2337 	if (sigprocmask(SIG_SETMASK, &full, &mask)) {
2338 		pr_err("Failed to block signals on threads start: %m\n");
2339 		return -1;
2340 	}
2341 
2342 	pthread_attr_init(&attrs);
2343 	pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
2344 
2345 	for (t = 1; t < nr_threads; t++) {
2346 		enum thread_msg msg = THREAD_MSG__UNDEFINED;
2347 
2348 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
2349 		pthread_attr_setaffinity_np(&attrs,
2350 					    MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)),
2351 					    (cpu_set_t *)(thread_data[t].mask->affinity.bits));
2352 #endif
2353 		if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) {
2354 			for (tt = 1; tt < t; tt++)
2355 				record__terminate_thread(&thread_data[t]);
2356 			pr_err("Failed to start threads: %m\n");
2357 			ret = -1;
2358 			goto out_err;
2359 		}
2360 
2361 		err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg));
2362 		if (err > 0)
2363 			pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid,
2364 				  thread_msg_tags[msg]);
2365 		else
2366 			pr_warning("threads[%d]: failed to receive start notification from %d\n",
2367 				   thread->tid, rec->thread_data[t].tid);
2368 	}
2369 
2370 	sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
2371 			(cpu_set_t *)thread->mask->affinity.bits);
2372 
2373 	pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
2374 
2375 out_err:
2376 	pthread_attr_destroy(&attrs);
2377 
2378 	if (sigprocmask(SIG_SETMASK, &mask, NULL)) {
2379 		pr_err("Failed to unblock signals on threads start: %m\n");
2380 		ret = -1;
2381 	}
2382 
2383 	return ret;
2384 }
2385 
2386 static int record__stop_threads(struct record *rec)
2387 {
2388 	int t;
2389 	struct record_thread *thread_data = rec->thread_data;
2390 
2391 	for (t = 1; t < rec->nr_threads; t++)
2392 		record__terminate_thread(&thread_data[t]);
2393 
2394 	for (t = 0; t < rec->nr_threads; t++) {
2395 		rec->samples += thread_data[t].samples;
2396 		if (!record__threads_enabled(rec))
2397 			continue;
2398 		rec->session->bytes_transferred += thread_data[t].bytes_transferred;
2399 		rec->session->bytes_compressed += thread_data[t].bytes_compressed;
2400 		pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid,
2401 			 thread_data[t].samples, thread_data[t].waking);
2402 		if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed)
2403 			pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n",
2404 				 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed);
2405 		else
2406 			pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written);
2407 	}
2408 
2409 	return 0;
2410 }
2411 
2412 static unsigned long record__waking(struct record *rec)
2413 {
2414 	int t;
2415 	unsigned long waking = 0;
2416 	struct record_thread *thread_data = rec->thread_data;
2417 
2418 	for (t = 0; t < rec->nr_threads; t++)
2419 		waking += thread_data[t].waking;
2420 
2421 	return waking;
2422 }
2423 
2424 static int __cmd_record(struct record *rec, int argc, const char **argv)
2425 {
2426 	int err;
2427 	int status = 0;
2428 	const bool forks = argc > 0;
2429 	struct perf_tool *tool = &rec->tool;
2430 	struct record_opts *opts = &rec->opts;
2431 	struct perf_data *data = &rec->data;
2432 	struct perf_session *session;
2433 	bool disabled = false, draining = false;
2434 	int fd;
2435 	float ratio = 0;
2436 	enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
2437 	struct perf_env *env;
2438 
2439 	atexit(record__sig_exit);
2440 	signal(SIGCHLD, sig_handler);
2441 	signal(SIGINT, sig_handler);
2442 	signal(SIGTERM, sig_handler);
2443 	signal(SIGSEGV, sigsegv_handler);
2444 
2445 	if (rec->opts.record_cgroup) {
2446 #ifndef HAVE_FILE_HANDLE
2447 		pr_err("cgroup tracking is not supported\n");
2448 		return -1;
2449 #endif
2450 	}
2451 
2452 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
2453 		signal(SIGUSR2, snapshot_sig_handler);
2454 		if (rec->opts.auxtrace_snapshot_mode)
2455 			trigger_on(&auxtrace_snapshot_trigger);
2456 		if (rec->switch_output.enabled)
2457 			trigger_on(&switch_output_trigger);
2458 	} else {
2459 		signal(SIGUSR2, SIG_IGN);
2460 	}
2461 
2462 	perf_tool__init(tool, /*ordered_events=*/true);
2463 	tool->sample		= process_sample_event;
2464 	tool->fork		= perf_event__process_fork;
2465 	tool->exit		= perf_event__process_exit;
2466 	tool->comm		= perf_event__process_comm;
2467 	tool->namespaces	= perf_event__process_namespaces;
2468 	tool->mmap		= build_id__process_mmap;
2469 	tool->mmap2		= build_id__process_mmap2;
2470 	tool->itrace_start	= process_timestamp_boundary;
2471 	tool->aux		= process_timestamp_boundary;
2472 	tool->namespace_events	= rec->opts.record_namespaces;
2473 	tool->cgroup_events	= rec->opts.record_cgroup;
2474 	session = perf_session__new(data, tool);
2475 	if (IS_ERR(session)) {
2476 		pr_err("Perf session creation failed.\n");
2477 		return PTR_ERR(session);
2478 	}
2479 	env = perf_session__env(session);
2480 	if (record__threads_enabled(rec)) {
2481 		if (perf_data__is_pipe(&rec->data)) {
2482 			pr_err("Parallel trace streaming is not available in pipe mode.\n");
2483 			return -1;
2484 		}
2485 		if (rec->opts.full_auxtrace) {
2486 			pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n");
2487 			return -1;
2488 		}
2489 	}
2490 
2491 	fd = perf_data__fd(data);
2492 	rec->session = session;
2493 
2494 	if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
2495 		pr_err("Compression initialization failed.\n");
2496 		return -1;
2497 	}
2498 #ifdef HAVE_EVENTFD_SUPPORT
2499 	done_fd = eventfd(0, EFD_NONBLOCK);
2500 	if (done_fd < 0) {
2501 		pr_err("Failed to create wakeup eventfd, error: %m\n");
2502 		status = -1;
2503 		goto out_delete_session;
2504 	}
2505 	err = evlist__add_wakeup_eventfd(rec->evlist, done_fd);
2506 	if (err < 0) {
2507 		pr_err("Failed to add wakeup eventfd to poll list\n");
2508 		status = err;
2509 		goto out_delete_session;
2510 	}
2511 #endif // HAVE_EVENTFD_SUPPORT
2512 
2513 	env->comp_type  = PERF_COMP_ZSTD;
2514 	env->comp_level = rec->opts.comp_level;
2515 
2516 	if (rec->opts.kcore &&
2517 	    !record__kcore_readable(&session->machines.host)) {
2518 		pr_err("ERROR: kcore is not readable.\n");
2519 		return -1;
2520 	}
2521 
2522 	if (record__init_clock(rec))
2523 		return -1;
2524 
2525 	record__init_features(rec);
2526 
2527 	if (forks) {
2528 		err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe,
2529 					       workload_exec_failed_signal);
2530 		if (err < 0) {
2531 			pr_err("Couldn't run the workload!\n");
2532 			status = err;
2533 			goto out_delete_session;
2534 		}
2535 	}
2536 
2537 	/*
2538 	 * If we have just single event and are sending data
2539 	 * through pipe, we need to force the ids allocation,
2540 	 * because we synthesize event name through the pipe
2541 	 * and need the id for that.
2542 	 */
2543 	if (data->is_pipe && rec->evlist->core.nr_entries == 1)
2544 		rec->opts.sample_id = true;
2545 
2546 	if (rec->timestamp_filename && perf_data__is_pipe(data)) {
2547 		rec->timestamp_filename = false;
2548 		pr_warning("WARNING: --timestamp-filename option is not available in pipe mode.\n");
2549 	}
2550 
2551 	/*
2552 	 * Use global stat_config that is zero meaning aggr_mode is AGGR_NONE
2553 	 * and hybrid_merge is false.
2554 	 */
2555 	evlist__uniquify_evsel_names(rec->evlist, &stat_config);
2556 
2557 	evlist__config(rec->evlist, opts, &callchain_param);
2558 
2559 	/* Debug message used by test scripts */
2560 	pr_debug3("perf record opening and mmapping events\n");
2561 	if (record__open(rec) != 0) {
2562 		err = -1;
2563 		goto out_free_threads;
2564 	}
2565 	/* Debug message used by test scripts */
2566 	pr_debug3("perf record done opening and mmapping events\n");
2567 	env->comp_mmap_len = session->evlist->core.mmap_len;
2568 
2569 	if (rec->opts.kcore) {
2570 		err = record__kcore_copy(&session->machines.host, data);
2571 		if (err) {
2572 			pr_err("ERROR: Failed to copy kcore\n");
2573 			goto out_free_threads;
2574 		}
2575 	}
2576 
2577 	/*
2578 	 * Normally perf_session__new would do this, but it doesn't have the
2579 	 * evlist.
2580 	 */
2581 	if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) {
2582 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
2583 		rec->tool.ordered_events = false;
2584 	}
2585 
2586 	if (evlist__nr_groups(rec->evlist) == 0)
2587 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
2588 
2589 	if (data->is_pipe) {
2590 		err = perf_header__write_pipe(fd);
2591 		if (err < 0)
2592 			goto out_free_threads;
2593 	} else {
2594 		err = perf_session__write_header(session, rec->evlist, fd, false);
2595 		if (err < 0)
2596 			goto out_free_threads;
2597 	}
2598 
2599 	err = -1;
2600 	if (!rec->no_buildid
2601 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
2602 		pr_err("Couldn't generate buildids. "
2603 		       "Use --no-buildid to profile anyway.\n");
2604 		goto out_free_threads;
2605 	}
2606 
2607 	if (!evlist__needs_bpf_sb_event(rec->evlist))
2608 		opts->no_bpf_event = true;
2609 
2610 	err = record__setup_sb_evlist(rec);
2611 	if (err)
2612 		goto out_free_threads;
2613 
2614 	err = record__synthesize(rec, false);
2615 	if (err < 0)
2616 		goto out_free_threads;
2617 
2618 	if (rec->realtime_prio) {
2619 		struct sched_param param;
2620 
2621 		param.sched_priority = rec->realtime_prio;
2622 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
2623 			pr_err("Could not set realtime priority.\n");
2624 			err = -1;
2625 			goto out_free_threads;
2626 		}
2627 	}
2628 
2629 	if (record__start_threads(rec))
2630 		goto out_free_threads;
2631 
2632 	/*
2633 	 * When perf is starting the traced process, all the events
2634 	 * (apart from group members) have enable_on_exec=1 set,
2635 	 * so don't spoil it by prematurely enabling them.
2636 	 */
2637 	if (!target__none(&opts->target) && !opts->target.initial_delay)
2638 		evlist__enable(rec->evlist);
2639 
2640 	/*
2641 	 * offcpu-time does not call execve, so enable_on_exe wouldn't work
2642 	 * when recording a workload, do it manually
2643 	 */
2644 	if (rec->off_cpu)
2645 		evlist__enable_evsel(rec->evlist, (char *)OFFCPU_EVENT);
2646 
2647 	/*
2648 	 * Let the child rip
2649 	 */
2650 	if (forks) {
2651 		struct machine *machine = &session->machines.host;
2652 		union perf_event *event;
2653 		pid_t tgid;
2654 
2655 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
2656 		if (event == NULL) {
2657 			err = -ENOMEM;
2658 			goto out_child;
2659 		}
2660 
2661 		/*
2662 		 * Some H/W events are generated before COMM event
2663 		 * which is emitted during exec(), so perf script
2664 		 * cannot see a correct process name for those events.
2665 		 * Synthesize COMM event to prevent it.
2666 		 */
2667 		tgid = perf_event__synthesize_comm(tool, event,
2668 						   rec->evlist->workload.pid,
2669 						   process_synthesized_event,
2670 						   machine);
2671 		free(event);
2672 
2673 		if (tgid == -1)
2674 			goto out_child;
2675 
2676 		event = malloc(sizeof(event->namespaces) +
2677 			       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
2678 			       machine->id_hdr_size);
2679 		if (event == NULL) {
2680 			err = -ENOMEM;
2681 			goto out_child;
2682 		}
2683 
2684 		/*
2685 		 * Synthesize NAMESPACES event for the command specified.
2686 		 */
2687 		perf_event__synthesize_namespaces(tool, event,
2688 						  rec->evlist->workload.pid,
2689 						  tgid, process_synthesized_event,
2690 						  machine);
2691 		free(event);
2692 
2693 		evlist__start_workload(rec->evlist);
2694 	}
2695 
2696 	if (opts->target.initial_delay) {
2697 		pr_info(EVLIST_DISABLED_MSG);
2698 		if (opts->target.initial_delay > 0) {
2699 			usleep(opts->target.initial_delay * USEC_PER_MSEC);
2700 			evlist__enable(rec->evlist);
2701 			pr_info(EVLIST_ENABLED_MSG);
2702 		}
2703 	}
2704 
2705 	err = event_enable_timer__start(rec->evlist->eet);
2706 	if (err)
2707 		goto out_child;
2708 
2709 	/* Debug message used by test scripts */
2710 	pr_debug3("perf record has started\n");
2711 	fflush(stderr);
2712 
2713 	trigger_ready(&auxtrace_snapshot_trigger);
2714 	trigger_ready(&switch_output_trigger);
2715 	perf_hooks__invoke_record_start();
2716 
2717 	/*
2718 	 * Must write FINISHED_INIT so it will be seen after all other
2719 	 * synthesized user events, but before any regular events.
2720 	 */
2721 	err = write_finished_init(rec, false);
2722 	if (err < 0)
2723 		goto out_child;
2724 
2725 	for (;;) {
2726 		unsigned long long hits = thread->samples;
2727 
2728 		/*
2729 		 * rec->evlist->bkw_mmap_state is possible to be
2730 		 * BKW_MMAP_EMPTY here: when done == true and
2731 		 * hits != rec->samples in previous round.
2732 		 *
2733 		 * evlist__toggle_bkw_mmap ensure we never
2734 		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
2735 		 */
2736 		if (trigger_is_hit(&switch_output_trigger) || done || draining)
2737 			evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
2738 
2739 		if (record__mmap_read_all(rec, false) < 0) {
2740 			trigger_error(&auxtrace_snapshot_trigger);
2741 			trigger_error(&switch_output_trigger);
2742 			err = -1;
2743 			goto out_child;
2744 		}
2745 
2746 		if (auxtrace_record__snapshot_started) {
2747 			auxtrace_record__snapshot_started = 0;
2748 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
2749 				record__read_auxtrace_snapshot(rec, false);
2750 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
2751 				pr_err("AUX area tracing snapshot failed\n");
2752 				err = -1;
2753 				goto out_child;
2754 			}
2755 		}
2756 
2757 		if (trigger_is_hit(&switch_output_trigger)) {
2758 			/*
2759 			 * If switch_output_trigger is hit, the data in
2760 			 * overwritable ring buffer should have been collected,
2761 			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
2762 			 *
2763 			 * If SIGUSR2 raise after or during record__mmap_read_all(),
2764 			 * record__mmap_read_all() didn't collect data from
2765 			 * overwritable ring buffer. Read again.
2766 			 */
2767 			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
2768 				continue;
2769 			trigger_ready(&switch_output_trigger);
2770 
2771 			/*
2772 			 * Reenable events in overwrite ring buffer after
2773 			 * record__mmap_read_all(): we should have collected
2774 			 * data from it.
2775 			 */
2776 			evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
2777 
2778 			if (!quiet)
2779 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
2780 					record__waking(rec));
2781 			thread->waking = 0;
2782 			fd = record__switch_output(rec, false);
2783 			if (fd < 0) {
2784 				pr_err("Failed to switch to new file\n");
2785 				trigger_error(&switch_output_trigger);
2786 				err = fd;
2787 				goto out_child;
2788 			}
2789 
2790 			/* re-arm the alarm */
2791 			if (rec->switch_output.time)
2792 				alarm(rec->switch_output.time);
2793 		}
2794 
2795 		if (hits == thread->samples) {
2796 			if (done || draining)
2797 				break;
2798 			err = fdarray__poll(&thread->pollfd, -1);
2799 			/*
2800 			 * Propagate error, only if there's any. Ignore positive
2801 			 * number of returned events and interrupt error.
2802 			 */
2803 			if (err > 0 || (err < 0 && errno == EINTR))
2804 				err = 0;
2805 			thread->waking++;
2806 
2807 			if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP,
2808 					    record__thread_munmap_filtered, NULL) == 0)
2809 				draining = true;
2810 
2811 			err = record__update_evlist_pollfd_from_thread(rec, rec->evlist, thread);
2812 			if (err)
2813 				goto out_child;
2814 		}
2815 
2816 		if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) {
2817 			switch (cmd) {
2818 			case EVLIST_CTL_CMD_SNAPSHOT:
2819 				hit_auxtrace_snapshot_trigger(rec);
2820 				evlist__ctlfd_ack(rec->evlist);
2821 				break;
2822 			case EVLIST_CTL_CMD_STOP:
2823 				done = 1;
2824 				break;
2825 			case EVLIST_CTL_CMD_ACK:
2826 			case EVLIST_CTL_CMD_UNSUPPORTED:
2827 			case EVLIST_CTL_CMD_ENABLE:
2828 			case EVLIST_CTL_CMD_DISABLE:
2829 			case EVLIST_CTL_CMD_EVLIST:
2830 			case EVLIST_CTL_CMD_PING:
2831 			default:
2832 				break;
2833 			}
2834 		}
2835 
2836 		err = event_enable_timer__process(rec->evlist->eet);
2837 		if (err < 0)
2838 			goto out_child;
2839 		if (err) {
2840 			err = 0;
2841 			done = 1;
2842 		}
2843 
2844 		/*
2845 		 * When perf is starting the traced process, at the end events
2846 		 * die with the process and we wait for that. Thus no need to
2847 		 * disable events in this case.
2848 		 */
2849 		if (done && !disabled && !target__none(&opts->target)) {
2850 			trigger_off(&auxtrace_snapshot_trigger);
2851 			evlist__disable(rec->evlist);
2852 			disabled = true;
2853 		}
2854 	}
2855 
2856 	trigger_off(&auxtrace_snapshot_trigger);
2857 	trigger_off(&switch_output_trigger);
2858 
2859 	record__synthesize_final_bpf_metadata(rec);
2860 
2861 	if (opts->auxtrace_snapshot_on_exit)
2862 		record__auxtrace_snapshot_exit(rec);
2863 
2864 	if (forks && workload_exec_errno) {
2865 		char msg[STRERR_BUFSIZE];
2866 		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
2867 		struct strbuf sb = STRBUF_INIT;
2868 
2869 		evlist__format_evsels(rec->evlist, &sb, 2048);
2870 
2871 		pr_err("Failed to collect '%s' for the '%s' workload: %s\n",
2872 			sb.buf, argv[0], emsg);
2873 		strbuf_release(&sb);
2874 		err = -1;
2875 		goto out_child;
2876 	}
2877 
2878 	if (!quiet)
2879 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n",
2880 			record__waking(rec));
2881 
2882 	write_finished_init(rec, true);
2883 
2884 	if (target__none(&rec->opts.target))
2885 		record__synthesize_workload(rec, true);
2886 
2887 out_child:
2888 	record__stop_threads(rec);
2889 	record__mmap_read_all(rec, true);
2890 out_free_threads:
2891 	record__free_thread_data(rec);
2892 	evlist__finalize_ctlfd(rec->evlist);
2893 	record__aio_mmap_read_sync(rec);
2894 
2895 	if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
2896 		ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
2897 		env->comp_ratio = ratio + 0.5;
2898 	}
2899 
2900 	if (forks) {
2901 		int exit_status;
2902 
2903 		if (!child_finished)
2904 			kill(rec->evlist->workload.pid, SIGTERM);
2905 
2906 		wait(&exit_status);
2907 
2908 		if (err < 0)
2909 			status = err;
2910 		else if (WIFEXITED(exit_status))
2911 			status = WEXITSTATUS(exit_status);
2912 		else if (WIFSIGNALED(exit_status))
2913 			signr = WTERMSIG(exit_status);
2914 	} else
2915 		status = err;
2916 
2917 	if (rec->off_cpu)
2918 		rec->bytes_written += off_cpu_write(rec->session);
2919 
2920 	record__read_lost_samples(rec);
2921 	/* this will be recalculated during process_buildids() */
2922 	rec->samples = 0;
2923 
2924 	if (!err) {
2925 		record__synthesize(rec, true);
2926 		if (!rec->timestamp_filename) {
2927 			record__finish_output(rec);
2928 		} else {
2929 			fd = record__switch_output(rec, true);
2930 			if (fd < 0) {
2931 				status = fd;
2932 				goto out_delete_session;
2933 			}
2934 		}
2935 	}
2936 
2937 	perf_hooks__invoke_record_end();
2938 
2939 	if (!err && !quiet) {
2940 		char samples[128];
2941 		const char *postfix = rec->timestamp_filename ?
2942 					".<timestamp>" : "";
2943 
2944 		if (rec->samples && !rec->opts.full_auxtrace)
2945 			scnprintf(samples, sizeof(samples),
2946 				  " (%" PRIu64 " samples)", rec->samples);
2947 		else
2948 			samples[0] = '\0';
2949 
2950 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s",
2951 			perf_data__size(data) / 1024.0 / 1024.0,
2952 			data->path, postfix, samples);
2953 		if (ratio) {
2954 			fprintf(stderr,	", compressed (original %.3f MB, ratio is %.3f)",
2955 					rec->session->bytes_transferred / 1024.0 / 1024.0,
2956 					ratio);
2957 		}
2958 		fprintf(stderr, " ]\n");
2959 	}
2960 
2961 out_delete_session:
2962 #ifdef HAVE_EVENTFD_SUPPORT
2963 	if (done_fd >= 0) {
2964 		fd = done_fd;
2965 		done_fd = -1;
2966 
2967 		close(fd);
2968 	}
2969 #endif
2970 	zstd_fini(&session->zstd_data);
2971 	if (!opts->no_bpf_event)
2972 		evlist__stop_sb_thread(rec->sb_evlist);
2973 
2974 	perf_session__delete(session);
2975 	return status;
2976 }
2977 
2978 static void callchain_debug(struct callchain_param *callchain)
2979 {
2980 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
2981 
2982 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
2983 
2984 	if (callchain->record_mode == CALLCHAIN_DWARF)
2985 		pr_debug("callchain: stack dump size %d\n",
2986 			 callchain->dump_size);
2987 }
2988 
2989 int record_opts__parse_callchain(struct record_opts *record,
2990 				 struct callchain_param *callchain,
2991 				 const char *arg, bool unset)
2992 {
2993 	int ret;
2994 	callchain->enabled = !unset;
2995 
2996 	/* --no-call-graph */
2997 	if (unset) {
2998 		callchain->record_mode = CALLCHAIN_NONE;
2999 		pr_debug("callchain: disabled\n");
3000 		return 0;
3001 	}
3002 
3003 	ret = parse_callchain_record_opt(arg, callchain);
3004 	if (!ret) {
3005 		/* Enable data address sampling for DWARF unwind. */
3006 		if (callchain->record_mode == CALLCHAIN_DWARF &&
3007 		    !record->record_data_mmap_set)
3008 			record->record_data_mmap = true;
3009 		callchain_debug(callchain);
3010 	}
3011 
3012 	return ret;
3013 }
3014 
3015 int record_parse_callchain_opt(const struct option *opt,
3016 			       const char *arg,
3017 			       int unset)
3018 {
3019 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
3020 }
3021 
3022 int record_callchain_opt(const struct option *opt,
3023 			 const char *arg __maybe_unused,
3024 			 int unset __maybe_unused)
3025 {
3026 	struct callchain_param *callchain = opt->value;
3027 
3028 	callchain->enabled = true;
3029 
3030 	if (callchain->record_mode == CALLCHAIN_NONE)
3031 		callchain->record_mode = CALLCHAIN_FP;
3032 
3033 	callchain_debug(callchain);
3034 	return 0;
3035 }
3036 
3037 static int perf_record_config(const char *var, const char *value, void *cb)
3038 {
3039 	struct record *rec = cb;
3040 
3041 	if (!strcmp(var, "record.build-id")) {
3042 		if (!strcmp(value, "cache"))
3043 			rec->no_buildid_cache = false;
3044 		else if (!strcmp(value, "no-cache"))
3045 			rec->no_buildid_cache = true;
3046 		else if (!strcmp(value, "skip"))
3047 			rec->no_buildid = rec->no_buildid_cache = true;
3048 		else if (!strcmp(value, "mmap"))
3049 			rec->buildid_mmap = true;
3050 		else if (!strcmp(value, "no-mmap"))
3051 			rec->buildid_mmap = false;
3052 		else
3053 			return -1;
3054 		return 0;
3055 	}
3056 	if (!strcmp(var, "record.call-graph")) {
3057 		var = "call-graph.record-mode";
3058 		return perf_default_config(var, value, cb);
3059 	}
3060 #ifdef HAVE_AIO_SUPPORT
3061 	if (!strcmp(var, "record.aio")) {
3062 		rec->opts.nr_cblocks = strtol(value, NULL, 0);
3063 		if (!rec->opts.nr_cblocks)
3064 			rec->opts.nr_cblocks = nr_cblocks_default;
3065 	}
3066 #endif
3067 	if (!strcmp(var, "record.debuginfod")) {
3068 		rec->debuginfod.urls = strdup(value);
3069 		if (!rec->debuginfod.urls)
3070 			return -ENOMEM;
3071 		rec->debuginfod.set = true;
3072 	}
3073 
3074 	return 0;
3075 }
3076 
3077 static int record__parse_event_enable_time(const struct option *opt, const char *str, int unset)
3078 {
3079 	struct record *rec = (struct record *)opt->value;
3080 
3081 	return evlist__parse_event_enable_time(rec->evlist, &rec->opts, str, unset);
3082 }
3083 
3084 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
3085 {
3086 	struct record_opts *opts = (struct record_opts *)opt->value;
3087 
3088 	if (unset || !str)
3089 		return 0;
3090 
3091 	if (!strcasecmp(str, "node"))
3092 		opts->affinity = PERF_AFFINITY_NODE;
3093 	else if (!strcasecmp(str, "cpu"))
3094 		opts->affinity = PERF_AFFINITY_CPU;
3095 
3096 	return 0;
3097 }
3098 
3099 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits)
3100 {
3101 	mask->nbits = nr_bits;
3102 	mask->bits = bitmap_zalloc(mask->nbits);
3103 	if (!mask->bits)
3104 		return -ENOMEM;
3105 
3106 	return 0;
3107 }
3108 
3109 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask)
3110 {
3111 	bitmap_free(mask->bits);
3112 	mask->nbits = 0;
3113 }
3114 
3115 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits)
3116 {
3117 	int ret;
3118 
3119 	ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits);
3120 	if (ret) {
3121 		mask->affinity.bits = NULL;
3122 		return ret;
3123 	}
3124 
3125 	ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits);
3126 	if (ret) {
3127 		record__mmap_cpu_mask_free(&mask->maps);
3128 		mask->maps.bits = NULL;
3129 	}
3130 
3131 	return ret;
3132 }
3133 
3134 static void record__thread_mask_free(struct thread_mask *mask)
3135 {
3136 	record__mmap_cpu_mask_free(&mask->maps);
3137 	record__mmap_cpu_mask_free(&mask->affinity);
3138 }
3139 
3140 static int record__parse_threads(const struct option *opt, const char *str, int unset)
3141 {
3142 	int s;
3143 	struct record_opts *opts = opt->value;
3144 
3145 	if (unset || !str || !strlen(str)) {
3146 		opts->threads_spec = THREAD_SPEC__CPU;
3147 	} else {
3148 		for (s = 1; s < THREAD_SPEC__MAX; s++) {
3149 			if (s == THREAD_SPEC__USER) {
3150 				opts->threads_user_spec = strdup(str);
3151 				if (!opts->threads_user_spec)
3152 					return -ENOMEM;
3153 				opts->threads_spec = THREAD_SPEC__USER;
3154 				break;
3155 			}
3156 			if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) {
3157 				opts->threads_spec = s;
3158 				break;
3159 			}
3160 		}
3161 	}
3162 
3163 	if (opts->threads_spec == THREAD_SPEC__USER)
3164 		pr_debug("threads_spec: %s\n", opts->threads_user_spec);
3165 	else
3166 		pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]);
3167 
3168 	return 0;
3169 }
3170 
3171 static int parse_output_max_size(const struct option *opt,
3172 				 const char *str, int unset)
3173 {
3174 	unsigned long *s = (unsigned long *)opt->value;
3175 	static struct parse_tag tags_size[] = {
3176 		{ .tag  = 'B', .mult = 1       },
3177 		{ .tag  = 'K', .mult = 1 << 10 },
3178 		{ .tag  = 'M', .mult = 1 << 20 },
3179 		{ .tag  = 'G', .mult = 1 << 30 },
3180 		{ .tag  = 0 },
3181 	};
3182 	unsigned long val;
3183 
3184 	if (unset) {
3185 		*s = 0;
3186 		return 0;
3187 	}
3188 
3189 	val = parse_tag_value(str, tags_size);
3190 	if (val != (unsigned long) -1) {
3191 		*s = val;
3192 		return 0;
3193 	}
3194 
3195 	return -1;
3196 }
3197 
3198 static int record__parse_mmap_pages(const struct option *opt,
3199 				    const char *str,
3200 				    int unset __maybe_unused)
3201 {
3202 	struct record_opts *opts = opt->value;
3203 	char *s, *p;
3204 	unsigned int mmap_pages;
3205 	int ret;
3206 
3207 	if (!str)
3208 		return -EINVAL;
3209 
3210 	s = strdup(str);
3211 	if (!s)
3212 		return -ENOMEM;
3213 
3214 	p = strchr(s, ',');
3215 	if (p)
3216 		*p = '\0';
3217 
3218 	if (*s) {
3219 		ret = __evlist__parse_mmap_pages(&mmap_pages, s);
3220 		if (ret)
3221 			goto out_free;
3222 		opts->mmap_pages = mmap_pages;
3223 	}
3224 
3225 	if (!p) {
3226 		ret = 0;
3227 		goto out_free;
3228 	}
3229 
3230 	ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1);
3231 	if (ret)
3232 		goto out_free;
3233 
3234 	opts->auxtrace_mmap_pages = mmap_pages;
3235 
3236 out_free:
3237 	free(s);
3238 	return ret;
3239 }
3240 
3241 static int record__parse_off_cpu_thresh(const struct option *opt,
3242 					const char *str,
3243 					int unset __maybe_unused)
3244 {
3245 	struct record_opts *opts = opt->value;
3246 	char *endptr;
3247 	u64 off_cpu_thresh_ms;
3248 
3249 	if (!str)
3250 		return -EINVAL;
3251 
3252 	off_cpu_thresh_ms = strtoull(str, &endptr, 10);
3253 
3254 	/* the threshold isn't string "0", yet strtoull() returns 0, parsing failed */
3255 	if (*endptr || (off_cpu_thresh_ms == 0 && strcmp(str, "0")))
3256 		return -EINVAL;
3257 	else
3258 		opts->off_cpu_thresh_ns = off_cpu_thresh_ms * NSEC_PER_MSEC;
3259 
3260 	return 0;
3261 }
3262 
3263 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
3264 {
3265 }
3266 
3267 static int parse_control_option(const struct option *opt,
3268 				const char *str,
3269 				int unset __maybe_unused)
3270 {
3271 	struct record_opts *opts = opt->value;
3272 
3273 	return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close);
3274 }
3275 
3276 static void switch_output_size_warn(struct record *rec)
3277 {
3278 	u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
3279 	struct switch_output *s = &rec->switch_output;
3280 
3281 	wakeup_size /= 2;
3282 
3283 	if (s->size < wakeup_size) {
3284 		char buf[100];
3285 
3286 		unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
3287 		pr_warning("WARNING: switch-output data size lower than "
3288 			   "wakeup kernel buffer size (%s) "
3289 			   "expect bigger perf.data sizes\n", buf);
3290 	}
3291 }
3292 
3293 static int switch_output_setup(struct record *rec)
3294 {
3295 	struct switch_output *s = &rec->switch_output;
3296 	static struct parse_tag tags_size[] = {
3297 		{ .tag  = 'B', .mult = 1       },
3298 		{ .tag  = 'K', .mult = 1 << 10 },
3299 		{ .tag  = 'M', .mult = 1 << 20 },
3300 		{ .tag  = 'G', .mult = 1 << 30 },
3301 		{ .tag  = 0 },
3302 	};
3303 	static struct parse_tag tags_time[] = {
3304 		{ .tag  = 's', .mult = 1        },
3305 		{ .tag  = 'm', .mult = 60       },
3306 		{ .tag  = 'h', .mult = 60*60    },
3307 		{ .tag  = 'd', .mult = 60*60*24 },
3308 		{ .tag  = 0 },
3309 	};
3310 	unsigned long val;
3311 
3312 	/*
3313 	 * If we're using --switch-output-events, then we imply its
3314 	 * --switch-output=signal, as we'll send a SIGUSR2 from the side band
3315 	 *  thread to its parent.
3316 	 */
3317 	if (rec->switch_output_event_set) {
3318 		if (record__threads_enabled(rec)) {
3319 			pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n");
3320 			return 0;
3321 		}
3322 		goto do_signal;
3323 	}
3324 
3325 	if (!s->set)
3326 		return 0;
3327 
3328 	if (record__threads_enabled(rec)) {
3329 		pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n");
3330 		return 0;
3331 	}
3332 
3333 	if (!strcmp(s->str, "signal")) {
3334 do_signal:
3335 		s->signal = true;
3336 		pr_debug("switch-output with SIGUSR2 signal\n");
3337 		goto enabled;
3338 	}
3339 
3340 	val = parse_tag_value(s->str, tags_size);
3341 	if (val != (unsigned long) -1) {
3342 		s->size = val;
3343 		pr_debug("switch-output with %s size threshold\n", s->str);
3344 		goto enabled;
3345 	}
3346 
3347 	val = parse_tag_value(s->str, tags_time);
3348 	if (val != (unsigned long) -1) {
3349 		s->time = val;
3350 		pr_debug("switch-output with %s time threshold (%lu seconds)\n",
3351 			 s->str, s->time);
3352 		goto enabled;
3353 	}
3354 
3355 	return -1;
3356 
3357 enabled:
3358 	rec->timestamp_filename = true;
3359 	s->enabled              = true;
3360 
3361 	if (s->size && !rec->opts.no_buffering)
3362 		switch_output_size_warn(rec);
3363 
3364 	return 0;
3365 }
3366 
3367 static const char * const __record_usage[] = {
3368 	"perf record [<options>] [<command>]",
3369 	"perf record [<options>] -- <command> [<options>]",
3370 	NULL
3371 };
3372 const char * const *record_usage = __record_usage;
3373 
3374 static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event,
3375 				  struct perf_sample *sample, struct machine *machine)
3376 {
3377 	/*
3378 	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3379 	 * no need to add them twice.
3380 	 */
3381 	if (!(event->header.misc & PERF_RECORD_MISC_USER))
3382 		return 0;
3383 	return perf_event__process_mmap(tool, event, sample, machine);
3384 }
3385 
3386 static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event,
3387 				   struct perf_sample *sample, struct machine *machine)
3388 {
3389 	/*
3390 	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3391 	 * no need to add them twice.
3392 	 */
3393 	if (!(event->header.misc & PERF_RECORD_MISC_USER))
3394 		return 0;
3395 
3396 	return perf_event__process_mmap2(tool, event, sample, machine);
3397 }
3398 
3399 static int process_timestamp_boundary(const struct perf_tool *tool,
3400 				      union perf_event *event __maybe_unused,
3401 				      struct perf_sample *sample,
3402 				      struct machine *machine __maybe_unused)
3403 {
3404 	struct record *rec = container_of(tool, struct record, tool);
3405 
3406 	set_timestamp_boundary(rec, sample->time);
3407 	return 0;
3408 }
3409 
3410 static int parse_record_synth_option(const struct option *opt,
3411 				     const char *str,
3412 				     int unset __maybe_unused)
3413 {
3414 	struct record_opts *opts = opt->value;
3415 	char *p = strdup(str);
3416 
3417 	if (p == NULL)
3418 		return -1;
3419 
3420 	opts->synth = parse_synth_opt(p);
3421 	free(p);
3422 
3423 	if (opts->synth < 0) {
3424 		pr_err("Invalid synth option: %s\n", str);
3425 		return -1;
3426 	}
3427 	return 0;
3428 }
3429 
3430 /*
3431  * XXX Ideally would be local to cmd_record() and passed to a record__new
3432  * because we need to have access to it in record__exit, that is called
3433  * after cmd_record() exits, but since record_options need to be accessible to
3434  * builtin-script, leave it here.
3435  *
3436  * At least we don't ouch it in all the other functions here directly.
3437  *
3438  * Just say no to tons of global variables, sigh.
3439  */
3440 static struct record record = {
3441 	.opts = {
3442 		.sample_time	     = true,
3443 		.mmap_pages	     = UINT_MAX,
3444 		.user_freq	     = UINT_MAX,
3445 		.user_interval	     = ULLONG_MAX,
3446 		.freq		     = 4000,
3447 		.target		     = {
3448 			.uses_mmap   = true,
3449 			.default_per_cpu = true,
3450 		},
3451 		.mmap_flush          = MMAP_FLUSH_DEFAULT,
3452 		.nr_threads_synthesize = 1,
3453 		.ctl_fd              = -1,
3454 		.ctl_fd_ack          = -1,
3455 		.synth               = PERF_SYNTH_ALL,
3456 		.off_cpu_thresh_ns   = OFFCPU_THRESH,
3457 	},
3458 	.buildid_mmap = true,
3459 };
3460 
3461 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
3462 	"\n\t\t\t\tDefault: fp";
3463 
3464 static bool dry_run;
3465 
3466 static struct parse_events_option_args parse_events_option_args = {
3467 	.evlistp = &record.evlist,
3468 };
3469 
3470 static struct parse_events_option_args switch_output_parse_events_option_args = {
3471 	.evlistp = &record.sb_evlist,
3472 };
3473 
3474 /*
3475  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
3476  * with it and switch to use the library functions in perf_evlist that came
3477  * from builtin-record.c, i.e. use record_opts,
3478  * evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
3479  * using pipes, etc.
3480  */
3481 static struct option __record_options[] = {
3482 	OPT_CALLBACK('e', "event", &parse_events_option_args, "event",
3483 		     "event selector. use 'perf list' to list available events",
3484 		     parse_events_option),
3485 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
3486 		     "event filter", parse_filter),
3487 	OPT_BOOLEAN(0, "latency", &record.latency,
3488 		    "Enable data collection for latency profiling.\n"
3489 		    "\t\t\t  Use perf report --latency for latency-centric profile."),
3490 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
3491 			   NULL, "don't record events from perf itself",
3492 			   exclude_perf),
3493 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
3494 		    "record events on existing process id"),
3495 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
3496 		    "record events on existing thread id"),
3497 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
3498 		    "collect data with this RT SCHED_FIFO priority"),
3499 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
3500 		    "collect data without buffering"),
3501 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
3502 		    "collect raw sample records from all opened counters"),
3503 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
3504 			    "system-wide collection from all CPUs"),
3505 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
3506 		    "list of cpus to monitor"),
3507 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
3508 	OPT_STRING('o', "output", &record.data.path, "file",
3509 		    "output file name"),
3510 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
3511 			&record.opts.no_inherit_set,
3512 			"child tasks do not inherit counters"),
3513 	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
3514 		    "synthesize non-sample events at the end of output"),
3515 	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
3516 	OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"),
3517 	OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
3518 		    "Fail if the specified frequency can't be used"),
3519 	OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
3520 		     "profile at this frequency",
3521 		      record__parse_freq),
3522 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
3523 		     "number of mmap data pages and AUX area tracing mmap pages",
3524 		     record__parse_mmap_pages),
3525 	OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
3526 		     "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
3527 		     record__mmap_flush_parse),
3528 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
3529 			   NULL, "enables call-graph recording" ,
3530 			   &record_callchain_opt),
3531 	OPT_CALLBACK(0, "call-graph", &record.opts,
3532 		     "record_mode[,record_size]", record_callchain_help,
3533 		     &record_parse_callchain_opt),
3534 	OPT_INCR('v', "verbose", &verbose,
3535 		    "be more verbose (show counter open errors, etc)"),
3536 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"),
3537 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
3538 		    "per thread counts"),
3539 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
3540 	OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
3541 		    "Record the sample physical addresses"),
3542 	OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size,
3543 		    "Record the sampled data address data page size"),
3544 	OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size,
3545 		    "Record the sampled code address (ip) page size"),
3546 	OPT_BOOLEAN(0, "sample-mem-info", &record.opts.sample_data_src,
3547 		    "Record the data source for memory operations"),
3548 	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
3549 	OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier,
3550 		    "Record the sample identifier"),
3551 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
3552 			&record.opts.sample_time_set,
3553 			"Record the sample timestamps"),
3554 	OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
3555 			"Record the sample period"),
3556 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
3557 		    "don't sample"),
3558 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
3559 			&record.no_buildid_cache_set,
3560 			"do not update the buildid cache"),
3561 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
3562 			&record.no_buildid_set,
3563 			"do not collect buildids in perf.data"),
3564 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
3565 		     "monitor event in cgroup name only",
3566 		     parse_cgroups),
3567 	OPT_CALLBACK('D', "delay", &record, "ms",
3568 		     "ms to wait before starting measurement after program start (-1: start with events disabled), "
3569 		     "or ranges of time to enable events e.g. '-D 10-20,30-40'",
3570 		     record__parse_event_enable_time),
3571 	OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
3572 	OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"),
3573 
3574 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
3575 		     "branch any", "sample any taken branches",
3576 		     parse_branch_stack),
3577 
3578 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
3579 		     "branch filter mask", "branch stack filter modes",
3580 		     parse_branch_stack),
3581 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
3582 		    "sample by weight (on special events only)"),
3583 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
3584 		    "sample transaction flags (special events only)"),
3585 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
3586 		    "use per-thread mmaps"),
3587 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
3588 		    "sample selected machine registers on interrupt,"
3589 		    " use '-I?' to list register names", parse_intr_regs),
3590 	OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
3591 		    "sample selected machine registers in user space,"
3592 		    " use '--user-regs=?' to list register names", parse_user_regs),
3593 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
3594 		    "Record running/enabled time of read (:S) events"),
3595 	OPT_CALLBACK('k', "clockid", &record.opts,
3596 	"clockid", "clockid to use for events, see clock_gettime()",
3597 	parse_clockid),
3598 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
3599 			  "opts", "AUX area tracing Snapshot Mode", ""),
3600 	OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
3601 			  "opts", "sample AUX area", ""),
3602 	OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
3603 			"per thread proc mmap processing timeout in ms"),
3604 	OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
3605 		    "Record namespaces events"),
3606 	OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
3607 		    "Record cgroup events"),
3608 	OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events,
3609 			&record.opts.record_switch_events_set,
3610 			"Record context switch events"),
3611 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
3612 			 "Configure all used events to run in kernel space.",
3613 			 PARSE_OPT_EXCLUSIVE),
3614 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
3615 			 "Configure all used events to run in user space.",
3616 			 PARSE_OPT_EXCLUSIVE),
3617 	OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
3618 		    "collect kernel callchains"),
3619 	OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
3620 		    "collect user callchains"),
3621 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
3622 		   "file", "vmlinux pathname"),
3623 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
3624 		    "Record build-id of all DSOs regardless of hits"),
3625 	OPT_BOOLEAN_SET(0, "buildid-mmap", &record.buildid_mmap, &record.buildid_mmap_set,
3626 			"Record build-id in mmap events and skip build-id processing."),
3627 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
3628 		    "append timestamp to output filename"),
3629 	OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
3630 		    "Record timestamp boundary (time of first/last samples)"),
3631 	OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
3632 			  &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
3633 			  "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
3634 			  "signal"),
3635 	OPT_CALLBACK_SET(0, "switch-output-event", &switch_output_parse_events_option_args,
3636 			 &record.switch_output_event_set, "switch output event",
3637 			 "switch output event selector. use 'perf list' to list available events",
3638 			 parse_events_option_new_evlist),
3639 	OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
3640 		   "Limit number of switch output generated files"),
3641 	OPT_BOOLEAN(0, "dry-run", &dry_run,
3642 		    "Parse options then exit"),
3643 #ifdef HAVE_AIO_SUPPORT
3644 	OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
3645 		     &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
3646 		     record__aio_parse),
3647 #endif
3648 	OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
3649 		     "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
3650 		     record__parse_affinity),
3651 #ifdef HAVE_ZSTD_SUPPORT
3652 	OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n",
3653 			    "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
3654 			    record__parse_comp_level),
3655 #endif
3656 	OPT_CALLBACK(0, "max-size", &record.output_max_size,
3657 		     "size", "Limit the maximum size of the output file", parse_output_max_size),
3658 	OPT_UINTEGER(0, "num-thread-synthesize",
3659 		     &record.opts.nr_threads_synthesize,
3660 		     "number of threads to run for event synthesis"),
3661 #ifdef HAVE_LIBPFM
3662 	OPT_CALLBACK(0, "pfm-events", &record.evlist, "event",
3663 		"libpfm4 event selector. use 'perf list' to list available events",
3664 		parse_libpfm_events_option),
3665 #endif
3666 	OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
3667 		     "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n"
3668 		     "\t\t\t  'snapshot': AUX area tracing snapshot).\n"
3669 		     "\t\t\t  Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
3670 		     "\t\t\t  Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
3671 		      parse_control_option),
3672 	OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup",
3673 		     "Fine-tune event synthesis: default=all", parse_record_synth_option),
3674 	OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls,
3675 			  &record.debuginfod.set, "debuginfod urls",
3676 			  "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
3677 			  "system"),
3678 	OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec",
3679 			    "write collected trace data into several data files using parallel threads",
3680 			    record__parse_threads),
3681 	OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"),
3682 	OPT_STRING(0, "setup-filter", &record.filter_action, "pin|unpin",
3683 		   "BPF filter action"),
3684 	OPT_CALLBACK(0, "off-cpu-thresh", &record.opts, "ms",
3685 		     "Dump off-cpu samples if off-cpu time exceeds this threshold (in milliseconds). (Default: 500ms)",
3686 		     record__parse_off_cpu_thresh),
3687 	OPT_BOOLEAN_SET(0, "data-mmap", &record.opts.record_data_mmap,
3688 			&record.opts.record_data_mmap_set,
3689 			"Record mmap events for non-executable mappings"),
3690 	OPT_END()
3691 };
3692 
3693 struct option *record_options = __record_options;
3694 
3695 static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus)
3696 {
3697 	struct perf_cpu cpu;
3698 	int idx;
3699 
3700 	if (cpu_map__is_dummy(cpus))
3701 		return 0;
3702 
3703 	perf_cpu_map__for_each_cpu_skip_any(cpu, idx, cpus) {
3704 		/* Return ENODEV is input cpu is greater than max cpu */
3705 		if ((unsigned long)cpu.cpu > mask->nbits)
3706 			return -ENODEV;
3707 		__set_bit(cpu.cpu, mask->bits);
3708 	}
3709 
3710 	return 0;
3711 }
3712 
3713 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec)
3714 {
3715 	struct perf_cpu_map *cpus;
3716 
3717 	cpus = perf_cpu_map__new(mask_spec);
3718 	if (!cpus)
3719 		return -ENOMEM;
3720 
3721 	bitmap_zero(mask->bits, mask->nbits);
3722 	if (record__mmap_cpu_mask_init(mask, cpus))
3723 		return -ENODEV;
3724 
3725 	perf_cpu_map__put(cpus);
3726 
3727 	return 0;
3728 }
3729 
3730 static void record__free_thread_masks(struct record *rec, int nr_threads)
3731 {
3732 	int t;
3733 
3734 	if (rec->thread_masks)
3735 		for (t = 0; t < nr_threads; t++)
3736 			record__thread_mask_free(&rec->thread_masks[t]);
3737 
3738 	zfree(&rec->thread_masks);
3739 }
3740 
3741 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits)
3742 {
3743 	int t, ret;
3744 
3745 	rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks)));
3746 	if (!rec->thread_masks) {
3747 		pr_err("Failed to allocate thread masks\n");
3748 		return -ENOMEM;
3749 	}
3750 
3751 	for (t = 0; t < nr_threads; t++) {
3752 		ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits);
3753 		if (ret) {
3754 			pr_err("Failed to allocate thread masks[%d]\n", t);
3755 			goto out_free;
3756 		}
3757 	}
3758 
3759 	return 0;
3760 
3761 out_free:
3762 	record__free_thread_masks(rec, nr_threads);
3763 
3764 	return ret;
3765 }
3766 
3767 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus)
3768 {
3769 	int t, ret, nr_cpus = perf_cpu_map__nr(cpus);
3770 
3771 	ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu);
3772 	if (ret)
3773 		return ret;
3774 
3775 	rec->nr_threads = nr_cpus;
3776 	pr_debug("nr_threads: %d\n", rec->nr_threads);
3777 
3778 	for (t = 0; t < rec->nr_threads; t++) {
3779 		__set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits);
3780 		__set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits);
3781 		if (verbose > 0) {
3782 			pr_debug("thread_masks[%d]: ", t);
3783 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3784 			pr_debug("thread_masks[%d]: ", t);
3785 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3786 		}
3787 	}
3788 
3789 	return 0;
3790 }
3791 
3792 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus,
3793 					  const char **maps_spec, const char **affinity_spec,
3794 					  u32 nr_spec)
3795 {
3796 	u32 s;
3797 	int ret = 0, t = 0;
3798 	struct mmap_cpu_mask cpus_mask;
3799 	struct thread_mask thread_mask, full_mask, *thread_masks;
3800 
3801 	ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu);
3802 	if (ret) {
3803 		pr_err("Failed to allocate CPUs mask\n");
3804 		return ret;
3805 	}
3806 
3807 	ret = record__mmap_cpu_mask_init(&cpus_mask, cpus);
3808 	if (ret) {
3809 		pr_err("Failed to init cpu mask\n");
3810 		goto out_free_cpu_mask;
3811 	}
3812 
3813 	ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu);
3814 	if (ret) {
3815 		pr_err("Failed to allocate full mask\n");
3816 		goto out_free_cpu_mask;
3817 	}
3818 
3819 	ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3820 	if (ret) {
3821 		pr_err("Failed to allocate thread mask\n");
3822 		goto out_free_full_and_cpu_masks;
3823 	}
3824 
3825 	for (s = 0; s < nr_spec; s++) {
3826 		ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]);
3827 		if (ret) {
3828 			pr_err("Failed to initialize maps thread mask\n");
3829 			goto out_free;
3830 		}
3831 		ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]);
3832 		if (ret) {
3833 			pr_err("Failed to initialize affinity thread mask\n");
3834 			goto out_free;
3835 		}
3836 
3837 		/* ignore invalid CPUs but do not allow empty masks */
3838 		if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits,
3839 				cpus_mask.bits, thread_mask.maps.nbits)) {
3840 			pr_err("Empty maps mask: %s\n", maps_spec[s]);
3841 			ret = -EINVAL;
3842 			goto out_free;
3843 		}
3844 		if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits,
3845 				cpus_mask.bits, thread_mask.affinity.nbits)) {
3846 			pr_err("Empty affinity mask: %s\n", affinity_spec[s]);
3847 			ret = -EINVAL;
3848 			goto out_free;
3849 		}
3850 
3851 		/* do not allow intersection with other masks (full_mask) */
3852 		if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits,
3853 				      thread_mask.maps.nbits)) {
3854 			pr_err("Intersecting maps mask: %s\n", maps_spec[s]);
3855 			ret = -EINVAL;
3856 			goto out_free;
3857 		}
3858 		if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits,
3859 				      thread_mask.affinity.nbits)) {
3860 			pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]);
3861 			ret = -EINVAL;
3862 			goto out_free;
3863 		}
3864 
3865 		bitmap_or(full_mask.maps.bits, full_mask.maps.bits,
3866 			  thread_mask.maps.bits, full_mask.maps.nbits);
3867 		bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits,
3868 			  thread_mask.affinity.bits, full_mask.maps.nbits);
3869 
3870 		thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask));
3871 		if (!thread_masks) {
3872 			pr_err("Failed to reallocate thread masks\n");
3873 			ret = -ENOMEM;
3874 			goto out_free;
3875 		}
3876 		rec->thread_masks = thread_masks;
3877 		rec->thread_masks[t] = thread_mask;
3878 		if (verbose > 0) {
3879 			pr_debug("thread_masks[%d]: ", t);
3880 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3881 			pr_debug("thread_masks[%d]: ", t);
3882 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3883 		}
3884 		t++;
3885 		ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3886 		if (ret) {
3887 			pr_err("Failed to allocate thread mask\n");
3888 			goto out_free_full_and_cpu_masks;
3889 		}
3890 	}
3891 	rec->nr_threads = t;
3892 	pr_debug("nr_threads: %d\n", rec->nr_threads);
3893 	if (!rec->nr_threads)
3894 		ret = -EINVAL;
3895 
3896 out_free:
3897 	record__thread_mask_free(&thread_mask);
3898 out_free_full_and_cpu_masks:
3899 	record__thread_mask_free(&full_mask);
3900 out_free_cpu_mask:
3901 	record__mmap_cpu_mask_free(&cpus_mask);
3902 
3903 	return ret;
3904 }
3905 
3906 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus)
3907 {
3908 	int ret;
3909 	struct cpu_topology *topo;
3910 
3911 	topo = cpu_topology__new();
3912 	if (!topo) {
3913 		pr_err("Failed to allocate CPU topology\n");
3914 		return -ENOMEM;
3915 	}
3916 
3917 	ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list,
3918 					     topo->core_cpus_list, topo->core_cpus_lists);
3919 	cpu_topology__delete(topo);
3920 
3921 	return ret;
3922 }
3923 
3924 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus)
3925 {
3926 	int ret;
3927 	struct cpu_topology *topo;
3928 
3929 	topo = cpu_topology__new();
3930 	if (!topo) {
3931 		pr_err("Failed to allocate CPU topology\n");
3932 		return -ENOMEM;
3933 	}
3934 
3935 	ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list,
3936 					     topo->package_cpus_list, topo->package_cpus_lists);
3937 	cpu_topology__delete(topo);
3938 
3939 	return ret;
3940 }
3941 
3942 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus)
3943 {
3944 	u32 s;
3945 	int ret;
3946 	const char **spec;
3947 	struct numa_topology *topo;
3948 
3949 	topo = numa_topology__new();
3950 	if (!topo) {
3951 		pr_err("Failed to allocate NUMA topology\n");
3952 		return -ENOMEM;
3953 	}
3954 
3955 	spec = zalloc(topo->nr * sizeof(char *));
3956 	if (!spec) {
3957 		pr_err("Failed to allocate NUMA spec\n");
3958 		ret = -ENOMEM;
3959 		goto out_delete_topo;
3960 	}
3961 	for (s = 0; s < topo->nr; s++)
3962 		spec[s] = topo->nodes[s].cpus;
3963 
3964 	ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr);
3965 
3966 	zfree(&spec);
3967 
3968 out_delete_topo:
3969 	numa_topology__delete(topo);
3970 
3971 	return ret;
3972 }
3973 
3974 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus)
3975 {
3976 	int t, ret;
3977 	u32 s, nr_spec = 0;
3978 	char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec;
3979 	char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL;
3980 
3981 	for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) {
3982 		spec = strtok_r(user_spec, ":", &spec_ptr);
3983 		if (spec == NULL)
3984 			break;
3985 		pr_debug2("threads_spec[%d]: %s\n", t, spec);
3986 		mask = strtok_r(spec, "/", &mask_ptr);
3987 		if (mask == NULL)
3988 			break;
3989 		pr_debug2("  maps mask: %s\n", mask);
3990 		tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *));
3991 		if (!tmp_spec) {
3992 			pr_err("Failed to reallocate maps spec\n");
3993 			ret = -ENOMEM;
3994 			goto out_free;
3995 		}
3996 		maps_spec = tmp_spec;
3997 		maps_spec[nr_spec] = dup_mask = strdup(mask);
3998 		if (!maps_spec[nr_spec]) {
3999 			pr_err("Failed to allocate maps spec[%d]\n", nr_spec);
4000 			ret = -ENOMEM;
4001 			goto out_free;
4002 		}
4003 		mask = strtok_r(NULL, "/", &mask_ptr);
4004 		if (mask == NULL) {
4005 			pr_err("Invalid thread maps or affinity specs\n");
4006 			ret = -EINVAL;
4007 			goto out_free;
4008 		}
4009 		pr_debug2("  affinity mask: %s\n", mask);
4010 		tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *));
4011 		if (!tmp_spec) {
4012 			pr_err("Failed to reallocate affinity spec\n");
4013 			ret = -ENOMEM;
4014 			goto out_free;
4015 		}
4016 		affinity_spec = tmp_spec;
4017 		affinity_spec[nr_spec] = strdup(mask);
4018 		if (!affinity_spec[nr_spec]) {
4019 			pr_err("Failed to allocate affinity spec[%d]\n", nr_spec);
4020 			ret = -ENOMEM;
4021 			goto out_free;
4022 		}
4023 		dup_mask = NULL;
4024 		nr_spec++;
4025 	}
4026 
4027 	ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec,
4028 					     (const char **)affinity_spec, nr_spec);
4029 
4030 out_free:
4031 	free(dup_mask);
4032 	for (s = 0; s < nr_spec; s++) {
4033 		if (maps_spec)
4034 			free(maps_spec[s]);
4035 		if (affinity_spec)
4036 			free(affinity_spec[s]);
4037 	}
4038 	free(affinity_spec);
4039 	free(maps_spec);
4040 
4041 	return ret;
4042 }
4043 
4044 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus)
4045 {
4046 	int ret;
4047 
4048 	ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu);
4049 	if (ret)
4050 		return ret;
4051 
4052 	if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus))
4053 		return -ENODEV;
4054 
4055 	rec->nr_threads = 1;
4056 
4057 	return 0;
4058 }
4059 
4060 static int record__init_thread_masks(struct record *rec)
4061 {
4062 	int ret = 0;
4063 	struct perf_cpu_map *cpus = rec->evlist->core.all_cpus;
4064 
4065 	if (!record__threads_enabled(rec))
4066 		return record__init_thread_default_masks(rec, cpus);
4067 
4068 	if (evlist__per_thread(rec->evlist)) {
4069 		pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n");
4070 		return -EINVAL;
4071 	}
4072 
4073 	switch (rec->opts.threads_spec) {
4074 	case THREAD_SPEC__CPU:
4075 		ret = record__init_thread_cpu_masks(rec, cpus);
4076 		break;
4077 	case THREAD_SPEC__CORE:
4078 		ret = record__init_thread_core_masks(rec, cpus);
4079 		break;
4080 	case THREAD_SPEC__PACKAGE:
4081 		ret = record__init_thread_package_masks(rec, cpus);
4082 		break;
4083 	case THREAD_SPEC__NUMA:
4084 		ret = record__init_thread_numa_masks(rec, cpus);
4085 		break;
4086 	case THREAD_SPEC__USER:
4087 		ret = record__init_thread_user_masks(rec, cpus);
4088 		break;
4089 	default:
4090 		break;
4091 	}
4092 
4093 	return ret;
4094 }
4095 
4096 int cmd_record(int argc, const char **argv)
4097 {
4098 	int err;
4099 	struct record *rec = &record;
4100 	char errbuf[BUFSIZ];
4101 
4102 	setlocale(LC_ALL, "");
4103 
4104 #ifndef HAVE_BPF_SKEL
4105 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c)
4106 	set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true);
4107 # undef set_nobuild
4108 #endif
4109 
4110 	/* Disable eager loading of kernel symbols that adds overhead to perf record. */
4111 	symbol_conf.lazy_load_kernel_maps = true;
4112 	rec->opts.affinity = PERF_AFFINITY_SYS;
4113 
4114 	rec->evlist = evlist__new();
4115 	if (rec->evlist == NULL)
4116 		return -ENOMEM;
4117 
4118 	err = perf_config(perf_record_config, rec);
4119 	if (err)
4120 		return err;
4121 
4122 	argc = parse_options(argc, argv, record_options, record_usage,
4123 			    PARSE_OPT_STOP_AT_NON_OPTION);
4124 	if (quiet)
4125 		perf_quiet_option();
4126 
4127 	err = symbol__validate_sym_arguments();
4128 	if (err)
4129 		return err;
4130 
4131 	perf_debuginfod_setup(&record.debuginfod);
4132 
4133 	/* Make system wide (-a) the default target. */
4134 	if (!argc && target__none(&rec->opts.target))
4135 		rec->opts.target.system_wide = true;
4136 
4137 	if (nr_cgroups && !rec->opts.target.system_wide) {
4138 		usage_with_options_msg(record_usage, record_options,
4139 			"cgroup monitoring only available in system-wide mode");
4140 
4141 	}
4142 
4143 	if (record.latency) {
4144 		/*
4145 		 * There is no fundamental reason why latency profiling
4146 		 * can't work for system-wide mode, but exact semantics
4147 		 * and details are to be defined.
4148 		 * See the following thread for details:
4149 		 * https://lore.kernel.org/all/Z4XDJyvjiie3howF@google.com/
4150 		 */
4151 		if (record.opts.target.system_wide) {
4152 			pr_err("Failed: latency profiling is not supported with system-wide collection.\n");
4153 			err = -EINVAL;
4154 			goto out_opts;
4155 		}
4156 		record.opts.record_switch_events = true;
4157 	}
4158 
4159 	if (rec->buildid_mmap && !perf_can_record_build_id()) {
4160 		pr_warning("Missing support for build id in kernel mmap events.\n"
4161 			   "Disable this warning with --no-buildid-mmap\n");
4162 		rec->buildid_mmap = false;
4163 	}
4164 
4165 	if (rec->buildid_mmap) {
4166 		/* Enable perf_event_attr::build_id bit. */
4167 		rec->opts.build_id = true;
4168 		/* Disable build-ID table in the header. */
4169 		rec->no_buildid = true;
4170 	} else {
4171 		pr_debug("Disabling build id in synthesized mmap2 events.\n");
4172 		symbol_conf.no_buildid_mmap2 = true;
4173 	}
4174 
4175 	if (rec->no_buildid_set && rec->no_buildid) {
4176 		/* -B implies -N for historic reasons. */
4177 		rec->no_buildid_cache = true;
4178 	}
4179 
4180 	if (rec->opts.record_cgroup && !perf_can_record_cgroup()) {
4181 		pr_err("Kernel has no cgroup sampling support.\n");
4182 		err = -EINVAL;
4183 		goto out_opts;
4184 	}
4185 
4186 	if (rec->opts.kcore)
4187 		rec->opts.text_poke = true;
4188 
4189 	if (rec->opts.kcore || record__threads_enabled(rec))
4190 		rec->data.is_dir = true;
4191 
4192 	if (record__threads_enabled(rec)) {
4193 		if (rec->opts.affinity != PERF_AFFINITY_SYS) {
4194 			pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n");
4195 			goto out_opts;
4196 		}
4197 		if (record__aio_enabled(rec)) {
4198 			pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n");
4199 			goto out_opts;
4200 		}
4201 	}
4202 
4203 	if (rec->opts.comp_level != 0) {
4204 		pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
4205 		rec->no_buildid = true;
4206 	}
4207 
4208 	if (rec->opts.record_switch_events &&
4209 	    !perf_can_record_switch_events()) {
4210 		ui__error("kernel does not support recording context switch events\n");
4211 		parse_options_usage(record_usage, record_options, "switch-events", 0);
4212 		err = -EINVAL;
4213 		goto out_opts;
4214 	}
4215 
4216 	if (switch_output_setup(rec)) {
4217 		parse_options_usage(record_usage, record_options, "switch-output", 0);
4218 		err = -EINVAL;
4219 		goto out_opts;
4220 	}
4221 
4222 	if (rec->switch_output.time) {
4223 		signal(SIGALRM, alarm_sig_handler);
4224 		alarm(rec->switch_output.time);
4225 	}
4226 
4227 	if (rec->switch_output.num_files) {
4228 		rec->switch_output.filenames = calloc(rec->switch_output.num_files,
4229 						      sizeof(char *));
4230 		if (!rec->switch_output.filenames) {
4231 			err = -EINVAL;
4232 			goto out_opts;
4233 		}
4234 	}
4235 
4236 	if (rec->timestamp_filename && record__threads_enabled(rec)) {
4237 		rec->timestamp_filename = false;
4238 		pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n");
4239 	}
4240 
4241 	if (rec->filter_action) {
4242 		if (!strcmp(rec->filter_action, "pin"))
4243 			err = perf_bpf_filter__pin();
4244 		else if (!strcmp(rec->filter_action, "unpin"))
4245 			err = perf_bpf_filter__unpin();
4246 		else {
4247 			pr_warning("Unknown BPF filter action: %s\n", rec->filter_action);
4248 			err = -EINVAL;
4249 		}
4250 		goto out_opts;
4251 	}
4252 
4253 	/* For backward compatibility, -d implies --mem-info and --data-mmap */
4254 	if (rec->opts.sample_address) {
4255 		rec->opts.sample_data_src = true;
4256 		if (!rec->opts.record_data_mmap_set)
4257 			rec->opts.record_data_mmap = true;
4258 	}
4259 
4260 	/*
4261 	 * Allow aliases to facilitate the lookup of symbols for address
4262 	 * filters. Refer to auxtrace_parse_filters().
4263 	 */
4264 	symbol_conf.allow_aliases = true;
4265 
4266 	symbol__init(NULL);
4267 
4268 	err = record__auxtrace_init(rec);
4269 	if (err)
4270 		goto out;
4271 
4272 	if (dry_run)
4273 		goto out;
4274 
4275 	err = -ENOMEM;
4276 
4277 	if (rec->no_buildid_cache) {
4278 		disable_buildid_cache();
4279 	} else if (rec->switch_output.enabled) {
4280 		/*
4281 		 * In 'perf record --switch-output', disable buildid
4282 		 * generation by default to reduce data file switching
4283 		 * overhead. Still generate buildid if they are required
4284 		 * explicitly using
4285 		 *
4286 		 *  perf record --switch-output --no-no-buildid \
4287 		 *              --no-no-buildid-cache
4288 		 *
4289 		 * Following code equals to:
4290 		 *
4291 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
4292 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
4293 		 *         disable_buildid_cache();
4294 		 */
4295 		bool disable = true;
4296 
4297 		if (rec->no_buildid_set && !rec->no_buildid)
4298 			disable = false;
4299 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
4300 			disable = false;
4301 		if (disable) {
4302 			rec->no_buildid = true;
4303 			rec->no_buildid_cache = true;
4304 			disable_buildid_cache();
4305 		}
4306 	}
4307 
4308 	if (record.opts.overwrite)
4309 		record.opts.tail_synthesize = true;
4310 
4311 	if (rec->evlist->core.nr_entries == 0) {
4312 		struct evlist *def_evlist = evlist__new_default();
4313 
4314 		if (!def_evlist)
4315 			goto out;
4316 
4317 		evlist__splice_list_tail(rec->evlist, &def_evlist->core.entries);
4318 		evlist__delete(def_evlist);
4319 	}
4320 
4321 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
4322 		rec->opts.no_inherit = true;
4323 
4324 	err = target__validate(&rec->opts.target);
4325 	if (err) {
4326 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
4327 		ui__warning("%s\n", errbuf);
4328 	}
4329 
4330 	if (rec->uid_str) {
4331 		uid_t uid = parse_uid(rec->uid_str);
4332 
4333 		if (uid == UINT_MAX) {
4334 			ui__error("Invalid User: %s", rec->uid_str);
4335 			err = -EINVAL;
4336 			goto out;
4337 		}
4338 		err = parse_uid_filter(rec->evlist, uid);
4339 		if (err)
4340 			goto out;
4341 
4342 		/* User ID filtering implies system wide. */
4343 		rec->opts.target.system_wide = true;
4344 	}
4345 
4346 	/* Enable ignoring missing threads when -p option is defined. */
4347 	rec->opts.ignore_missing_thread = rec->opts.target.pid;
4348 
4349 	evlist__warn_user_requested_cpus(rec->evlist, rec->opts.target.cpu_list);
4350 
4351 	if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
4352 		arch__add_leaf_frame_record_opts(&rec->opts);
4353 
4354 	err = -ENOMEM;
4355 	if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) {
4356 		if (rec->opts.target.pid != NULL) {
4357 			pr_err("Couldn't create thread/CPU maps: %s\n",
4358 				errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf)));
4359 			goto out;
4360 		}
4361 		else
4362 			usage_with_options(record_usage, record_options);
4363 	}
4364 
4365 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
4366 	if (err)
4367 		goto out;
4368 
4369 	/*
4370 	 * We take all buildids when the file contains
4371 	 * AUX area tracing data because we do not decode the
4372 	 * trace because it would take too long.
4373 	 */
4374 	if (rec->opts.full_auxtrace)
4375 		rec->buildid_all = true;
4376 
4377 	if (rec->opts.text_poke) {
4378 		err = record__config_text_poke(rec->evlist);
4379 		if (err) {
4380 			pr_err("record__config_text_poke failed, error %d\n", err);
4381 			goto out;
4382 		}
4383 	}
4384 
4385 	if (rec->off_cpu) {
4386 		err = record__config_off_cpu(rec);
4387 		if (err) {
4388 			pr_err("record__config_off_cpu failed, error %d\n", err);
4389 			goto out;
4390 		}
4391 	}
4392 
4393 	if (record_opts__config(&rec->opts)) {
4394 		err = -EINVAL;
4395 		goto out;
4396 	}
4397 
4398 	err = record__config_tracking_events(rec);
4399 	if (err) {
4400 		pr_err("record__config_tracking_events failed, error %d\n", err);
4401 		goto out;
4402 	}
4403 
4404 	err = record__init_thread_masks(rec);
4405 	if (err) {
4406 		pr_err("Failed to initialize parallel data streaming masks\n");
4407 		goto out;
4408 	}
4409 
4410 	if (rec->opts.nr_cblocks > nr_cblocks_max)
4411 		rec->opts.nr_cblocks = nr_cblocks_max;
4412 	pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
4413 
4414 	pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
4415 	pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
4416 
4417 	if (rec->opts.comp_level > comp_level_max)
4418 		rec->opts.comp_level = comp_level_max;
4419 	pr_debug("comp level: %d\n", rec->opts.comp_level);
4420 
4421 	err = __cmd_record(&record, argc, argv);
4422 out:
4423 	record__free_thread_masks(rec, rec->nr_threads);
4424 	rec->nr_threads = 0;
4425 	symbol__exit();
4426 	auxtrace_record__free(rec->itr);
4427 out_opts:
4428 	evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close);
4429 	evlist__delete(rec->evlist);
4430 	return err;
4431 }
4432 
4433 static void snapshot_sig_handler(int sig __maybe_unused)
4434 {
4435 	struct record *rec = &record;
4436 
4437 	hit_auxtrace_snapshot_trigger(rec);
4438 
4439 	if (switch_output_signal(rec))
4440 		trigger_hit(&switch_output_trigger);
4441 }
4442 
4443 static void alarm_sig_handler(int sig __maybe_unused)
4444 {
4445 	struct record *rec = &record;
4446 
4447 	if (switch_output_time(rec))
4448 		trigger_hit(&switch_output_trigger);
4449 }
4450