xref: /linux/tools/perf/builtin-record.c (revision 2b64b2ed277ff23e785fbdb65098ee7e1252d64f)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10 
11 #include "perf.h"
12 
13 #include "util/build-id.h"
14 #include "util/util.h"
15 #include <subcmd/parse-options.h>
16 #include "util/parse-events.h"
17 #include "util/config.h"
18 
19 #include "util/callchain.h"
20 #include "util/cgroup.h"
21 #include "util/header.h"
22 #include "util/event.h"
23 #include "util/evlist.h"
24 #include "util/evsel.h"
25 #include "util/debug.h"
26 #include "util/session.h"
27 #include "util/tool.h"
28 #include "util/symbol.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
34 #include "util/tsc.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/llvm-utils.h"
38 #include "util/bpf-loader.h"
39 #include "util/trigger.h"
40 #include "util/perf-hooks.h"
41 #include "util/cpu-set-sched.h"
42 #include "util/time-utils.h"
43 #include "util/units.h"
44 #include "util/bpf-event.h"
45 #include "asm/bug.h"
46 
47 #include <errno.h>
48 #include <inttypes.h>
49 #include <locale.h>
50 #include <poll.h>
51 #include <unistd.h>
52 #include <sched.h>
53 #include <signal.h>
54 #include <sys/mman.h>
55 #include <sys/wait.h>
56 #include <linux/time64.h>
57 
58 struct switch_output {
59 	bool		 enabled;
60 	bool		 signal;
61 	unsigned long	 size;
62 	unsigned long	 time;
63 	const char	*str;
64 	bool		 set;
65 	char		 **filenames;
66 	int		 num_files;
67 	int		 cur_file;
68 };
69 
70 struct record {
71 	struct perf_tool	tool;
72 	struct record_opts	opts;
73 	u64			bytes_written;
74 	struct perf_data	data;
75 	struct auxtrace_record	*itr;
76 	struct perf_evlist	*evlist;
77 	struct perf_session	*session;
78 	int			realtime_prio;
79 	bool			no_buildid;
80 	bool			no_buildid_set;
81 	bool			no_buildid_cache;
82 	bool			no_buildid_cache_set;
83 	bool			buildid_all;
84 	bool			timestamp_filename;
85 	bool			timestamp_boundary;
86 	struct switch_output	switch_output;
87 	unsigned long long	samples;
88 	cpu_set_t		affinity_mask;
89 };
90 
91 static volatile int auxtrace_record__snapshot_started;
92 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
93 static DEFINE_TRIGGER(switch_output_trigger);
94 
95 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
96 	"SYS", "NODE", "CPU"
97 };
98 
99 static bool switch_output_signal(struct record *rec)
100 {
101 	return rec->switch_output.signal &&
102 	       trigger_is_ready(&switch_output_trigger);
103 }
104 
105 static bool switch_output_size(struct record *rec)
106 {
107 	return rec->switch_output.size &&
108 	       trigger_is_ready(&switch_output_trigger) &&
109 	       (rec->bytes_written >= rec->switch_output.size);
110 }
111 
112 static bool switch_output_time(struct record *rec)
113 {
114 	return rec->switch_output.time &&
115 	       trigger_is_ready(&switch_output_trigger);
116 }
117 
118 static int record__write(struct record *rec, struct perf_mmap *map __maybe_unused,
119 			 void *bf, size_t size)
120 {
121 	struct perf_data_file *file = &rec->session->data->file;
122 
123 	if (perf_data_file__write(file, bf, size) < 0) {
124 		pr_err("failed to write perf data, error: %m\n");
125 		return -1;
126 	}
127 
128 	rec->bytes_written += size;
129 
130 	if (switch_output_size(rec))
131 		trigger_hit(&switch_output_trigger);
132 
133 	return 0;
134 }
135 
136 #ifdef HAVE_AIO_SUPPORT
137 static int record__aio_write(struct aiocb *cblock, int trace_fd,
138 		void *buf, size_t size, off_t off)
139 {
140 	int rc;
141 
142 	cblock->aio_fildes = trace_fd;
143 	cblock->aio_buf    = buf;
144 	cblock->aio_nbytes = size;
145 	cblock->aio_offset = off;
146 	cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
147 
148 	do {
149 		rc = aio_write(cblock);
150 		if (rc == 0) {
151 			break;
152 		} else if (errno != EAGAIN) {
153 			cblock->aio_fildes = -1;
154 			pr_err("failed to queue perf data, error: %m\n");
155 			break;
156 		}
157 	} while (1);
158 
159 	return rc;
160 }
161 
162 static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock)
163 {
164 	void *rem_buf;
165 	off_t rem_off;
166 	size_t rem_size;
167 	int rc, aio_errno;
168 	ssize_t aio_ret, written;
169 
170 	aio_errno = aio_error(cblock);
171 	if (aio_errno == EINPROGRESS)
172 		return 0;
173 
174 	written = aio_ret = aio_return(cblock);
175 	if (aio_ret < 0) {
176 		if (aio_errno != EINTR)
177 			pr_err("failed to write perf data, error: %m\n");
178 		written = 0;
179 	}
180 
181 	rem_size = cblock->aio_nbytes - written;
182 
183 	if (rem_size == 0) {
184 		cblock->aio_fildes = -1;
185 		/*
186 		 * md->refcount is incremented in perf_mmap__push() for
187 		 * every enqueued aio write request so decrement it because
188 		 * the request is now complete.
189 		 */
190 		perf_mmap__put(md);
191 		rc = 1;
192 	} else {
193 		/*
194 		 * aio write request may require restart with the
195 		 * reminder if the kernel didn't write whole
196 		 * chunk at once.
197 		 */
198 		rem_off = cblock->aio_offset + written;
199 		rem_buf = (void *)(cblock->aio_buf + written);
200 		record__aio_write(cblock, cblock->aio_fildes,
201 				rem_buf, rem_size, rem_off);
202 		rc = 0;
203 	}
204 
205 	return rc;
206 }
207 
208 static int record__aio_sync(struct perf_mmap *md, bool sync_all)
209 {
210 	struct aiocb **aiocb = md->aio.aiocb;
211 	struct aiocb *cblocks = md->aio.cblocks;
212 	struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
213 	int i, do_suspend;
214 
215 	do {
216 		do_suspend = 0;
217 		for (i = 0; i < md->aio.nr_cblocks; ++i) {
218 			if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
219 				if (sync_all)
220 					aiocb[i] = NULL;
221 				else
222 					return i;
223 			} else {
224 				/*
225 				 * Started aio write is not complete yet
226 				 * so it has to be waited before the
227 				 * next allocation.
228 				 */
229 				aiocb[i] = &cblocks[i];
230 				do_suspend = 1;
231 			}
232 		}
233 		if (!do_suspend)
234 			return -1;
235 
236 		while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
237 			if (!(errno == EAGAIN || errno == EINTR))
238 				pr_err("failed to sync perf data, error: %m\n");
239 		}
240 	} while (1);
241 }
242 
243 static int record__aio_pushfn(void *to, struct aiocb *cblock, void *bf, size_t size, off_t off)
244 {
245 	struct record *rec = to;
246 	int ret, trace_fd = rec->session->data->file.fd;
247 
248 	rec->samples++;
249 
250 	ret = record__aio_write(cblock, trace_fd, bf, size, off);
251 	if (!ret) {
252 		rec->bytes_written += size;
253 		if (switch_output_size(rec))
254 			trigger_hit(&switch_output_trigger);
255 	}
256 
257 	return ret;
258 }
259 
260 static off_t record__aio_get_pos(int trace_fd)
261 {
262 	return lseek(trace_fd, 0, SEEK_CUR);
263 }
264 
265 static void record__aio_set_pos(int trace_fd, off_t pos)
266 {
267 	lseek(trace_fd, pos, SEEK_SET);
268 }
269 
270 static void record__aio_mmap_read_sync(struct record *rec)
271 {
272 	int i;
273 	struct perf_evlist *evlist = rec->evlist;
274 	struct perf_mmap *maps = evlist->mmap;
275 
276 	if (!rec->opts.nr_cblocks)
277 		return;
278 
279 	for (i = 0; i < evlist->nr_mmaps; i++) {
280 		struct perf_mmap *map = &maps[i];
281 
282 		if (map->base)
283 			record__aio_sync(map, true);
284 	}
285 }
286 
287 static int nr_cblocks_default = 1;
288 static int nr_cblocks_max = 4;
289 
290 static int record__aio_parse(const struct option *opt,
291 			     const char *str,
292 			     int unset)
293 {
294 	struct record_opts *opts = (struct record_opts *)opt->value;
295 
296 	if (unset) {
297 		opts->nr_cblocks = 0;
298 	} else {
299 		if (str)
300 			opts->nr_cblocks = strtol(str, NULL, 0);
301 		if (!opts->nr_cblocks)
302 			opts->nr_cblocks = nr_cblocks_default;
303 	}
304 
305 	return 0;
306 }
307 #else /* HAVE_AIO_SUPPORT */
308 static int nr_cblocks_max = 0;
309 
310 static int record__aio_sync(struct perf_mmap *md __maybe_unused, bool sync_all __maybe_unused)
311 {
312 	return -1;
313 }
314 
315 static int record__aio_pushfn(void *to __maybe_unused, struct aiocb *cblock __maybe_unused,
316 		void *bf __maybe_unused, size_t size __maybe_unused, off_t off __maybe_unused)
317 {
318 	return -1;
319 }
320 
321 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
322 {
323 	return -1;
324 }
325 
326 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
327 {
328 }
329 
330 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
331 {
332 }
333 #endif
334 
335 static int record__aio_enabled(struct record *rec)
336 {
337 	return rec->opts.nr_cblocks > 0;
338 }
339 
340 static int process_synthesized_event(struct perf_tool *tool,
341 				     union perf_event *event,
342 				     struct perf_sample *sample __maybe_unused,
343 				     struct machine *machine __maybe_unused)
344 {
345 	struct record *rec = container_of(tool, struct record, tool);
346 	return record__write(rec, NULL, event, event->header.size);
347 }
348 
349 static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size)
350 {
351 	struct record *rec = to;
352 
353 	rec->samples++;
354 	return record__write(rec, map, bf, size);
355 }
356 
357 static volatile int done;
358 static volatile int signr = -1;
359 static volatile int child_finished;
360 
361 static void sig_handler(int sig)
362 {
363 	if (sig == SIGCHLD)
364 		child_finished = 1;
365 	else
366 		signr = sig;
367 
368 	done = 1;
369 }
370 
371 static void sigsegv_handler(int sig)
372 {
373 	perf_hooks__recover();
374 	sighandler_dump_stack(sig);
375 }
376 
377 static void record__sig_exit(void)
378 {
379 	if (signr == -1)
380 		return;
381 
382 	signal(signr, SIG_DFL);
383 	raise(signr);
384 }
385 
386 #ifdef HAVE_AUXTRACE_SUPPORT
387 
388 static int record__process_auxtrace(struct perf_tool *tool,
389 				    struct perf_mmap *map,
390 				    union perf_event *event, void *data1,
391 				    size_t len1, void *data2, size_t len2)
392 {
393 	struct record *rec = container_of(tool, struct record, tool);
394 	struct perf_data *data = &rec->data;
395 	size_t padding;
396 	u8 pad[8] = {0};
397 
398 	if (!perf_data__is_pipe(data) && !perf_data__is_dir(data)) {
399 		off_t file_offset;
400 		int fd = perf_data__fd(data);
401 		int err;
402 
403 		file_offset = lseek(fd, 0, SEEK_CUR);
404 		if (file_offset == -1)
405 			return -1;
406 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
407 						     event, file_offset);
408 		if (err)
409 			return err;
410 	}
411 
412 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
413 	padding = (len1 + len2) & 7;
414 	if (padding)
415 		padding = 8 - padding;
416 
417 	record__write(rec, map, event, event->header.size);
418 	record__write(rec, map, data1, len1);
419 	if (len2)
420 		record__write(rec, map, data2, len2);
421 	record__write(rec, map, &pad, padding);
422 
423 	return 0;
424 }
425 
426 static int record__auxtrace_mmap_read(struct record *rec,
427 				      struct perf_mmap *map)
428 {
429 	int ret;
430 
431 	ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
432 				  record__process_auxtrace);
433 	if (ret < 0)
434 		return ret;
435 
436 	if (ret)
437 		rec->samples++;
438 
439 	return 0;
440 }
441 
442 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
443 					       struct perf_mmap *map)
444 {
445 	int ret;
446 
447 	ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
448 					   record__process_auxtrace,
449 					   rec->opts.auxtrace_snapshot_size);
450 	if (ret < 0)
451 		return ret;
452 
453 	if (ret)
454 		rec->samples++;
455 
456 	return 0;
457 }
458 
459 static int record__auxtrace_read_snapshot_all(struct record *rec)
460 {
461 	int i;
462 	int rc = 0;
463 
464 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
465 		struct perf_mmap *map = &rec->evlist->mmap[i];
466 
467 		if (!map->auxtrace_mmap.base)
468 			continue;
469 
470 		if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
471 			rc = -1;
472 			goto out;
473 		}
474 	}
475 out:
476 	return rc;
477 }
478 
479 static void record__read_auxtrace_snapshot(struct record *rec)
480 {
481 	pr_debug("Recording AUX area tracing snapshot\n");
482 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
483 		trigger_error(&auxtrace_snapshot_trigger);
484 	} else {
485 		if (auxtrace_record__snapshot_finish(rec->itr))
486 			trigger_error(&auxtrace_snapshot_trigger);
487 		else
488 			trigger_ready(&auxtrace_snapshot_trigger);
489 	}
490 }
491 
492 static int record__auxtrace_init(struct record *rec)
493 {
494 	int err;
495 
496 	if (!rec->itr) {
497 		rec->itr = auxtrace_record__init(rec->evlist, &err);
498 		if (err)
499 			return err;
500 	}
501 
502 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
503 					      rec->opts.auxtrace_snapshot_opts);
504 	if (err)
505 		return err;
506 
507 	return auxtrace_parse_filters(rec->evlist);
508 }
509 
510 #else
511 
512 static inline
513 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
514 			       struct perf_mmap *map __maybe_unused)
515 {
516 	return 0;
517 }
518 
519 static inline
520 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
521 {
522 }
523 
524 static inline
525 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
526 {
527 	return 0;
528 }
529 
530 static int record__auxtrace_init(struct record *rec __maybe_unused)
531 {
532 	return 0;
533 }
534 
535 #endif
536 
537 static int record__mmap_evlist(struct record *rec,
538 			       struct perf_evlist *evlist)
539 {
540 	struct record_opts *opts = &rec->opts;
541 	char msg[512];
542 
543 	if (opts->affinity != PERF_AFFINITY_SYS)
544 		cpu__setup_cpunode_map();
545 
546 	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
547 				 opts->auxtrace_mmap_pages,
548 				 opts->auxtrace_snapshot_mode,
549 				 opts->nr_cblocks, opts->affinity) < 0) {
550 		if (errno == EPERM) {
551 			pr_err("Permission error mapping pages.\n"
552 			       "Consider increasing "
553 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
554 			       "or try again with a smaller value of -m/--mmap_pages.\n"
555 			       "(current value: %u,%u)\n",
556 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
557 			return -errno;
558 		} else {
559 			pr_err("failed to mmap with %d (%s)\n", errno,
560 				str_error_r(errno, msg, sizeof(msg)));
561 			if (errno)
562 				return -errno;
563 			else
564 				return -EINVAL;
565 		}
566 	}
567 	return 0;
568 }
569 
570 static int record__mmap(struct record *rec)
571 {
572 	return record__mmap_evlist(rec, rec->evlist);
573 }
574 
575 static int record__open(struct record *rec)
576 {
577 	char msg[BUFSIZ];
578 	struct perf_evsel *pos;
579 	struct perf_evlist *evlist = rec->evlist;
580 	struct perf_session *session = rec->session;
581 	struct record_opts *opts = &rec->opts;
582 	int rc = 0;
583 
584 	/*
585 	 * For initial_delay we need to add a dummy event so that we can track
586 	 * PERF_RECORD_MMAP while we wait for the initial delay to enable the
587 	 * real events, the ones asked by the user.
588 	 */
589 	if (opts->initial_delay) {
590 		if (perf_evlist__add_dummy(evlist))
591 			return -ENOMEM;
592 
593 		pos = perf_evlist__first(evlist);
594 		pos->tracking = 0;
595 		pos = perf_evlist__last(evlist);
596 		pos->tracking = 1;
597 		pos->attr.enable_on_exec = 1;
598 	}
599 
600 	perf_evlist__config(evlist, opts, &callchain_param);
601 
602 	evlist__for_each_entry(evlist, pos) {
603 try_again:
604 		if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
605 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
606 				if (verbose > 0)
607 					ui__warning("%s\n", msg);
608 				goto try_again;
609 			}
610 			if ((errno == EINVAL || errno == EBADF) &&
611 			    pos->leader != pos &&
612 			    pos->weak_group) {
613 			        pos = perf_evlist__reset_weak_group(evlist, pos);
614 				goto try_again;
615 			}
616 			rc = -errno;
617 			perf_evsel__open_strerror(pos, &opts->target,
618 						  errno, msg, sizeof(msg));
619 			ui__error("%s\n", msg);
620 			goto out;
621 		}
622 
623 		pos->supported = true;
624 	}
625 
626 	if (perf_evlist__apply_filters(evlist, &pos)) {
627 		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
628 			pos->filter, perf_evsel__name(pos), errno,
629 			str_error_r(errno, msg, sizeof(msg)));
630 		rc = -1;
631 		goto out;
632 	}
633 
634 	rc = record__mmap(rec);
635 	if (rc)
636 		goto out;
637 
638 	session->evlist = evlist;
639 	perf_session__set_id_hdr_size(session);
640 out:
641 	return rc;
642 }
643 
644 static int process_sample_event(struct perf_tool *tool,
645 				union perf_event *event,
646 				struct perf_sample *sample,
647 				struct perf_evsel *evsel,
648 				struct machine *machine)
649 {
650 	struct record *rec = container_of(tool, struct record, tool);
651 
652 	if (rec->evlist->first_sample_time == 0)
653 		rec->evlist->first_sample_time = sample->time;
654 
655 	rec->evlist->last_sample_time = sample->time;
656 
657 	if (rec->buildid_all)
658 		return 0;
659 
660 	rec->samples++;
661 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
662 }
663 
664 static int process_buildids(struct record *rec)
665 {
666 	struct perf_session *session = rec->session;
667 
668 	if (perf_data__size(&rec->data) == 0)
669 		return 0;
670 
671 	/*
672 	 * During this process, it'll load kernel map and replace the
673 	 * dso->long_name to a real pathname it found.  In this case
674 	 * we prefer the vmlinux path like
675 	 *   /lib/modules/3.16.4/build/vmlinux
676 	 *
677 	 * rather than build-id path (in debug directory).
678 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
679 	 */
680 	symbol_conf.ignore_vmlinux_buildid = true;
681 
682 	/*
683 	 * If --buildid-all is given, it marks all DSO regardless of hits,
684 	 * so no need to process samples. But if timestamp_boundary is enabled,
685 	 * it still needs to walk on all samples to get the timestamps of
686 	 * first/last samples.
687 	 */
688 	if (rec->buildid_all && !rec->timestamp_boundary)
689 		rec->tool.sample = NULL;
690 
691 	return perf_session__process_events(session);
692 }
693 
694 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
695 {
696 	int err;
697 	struct perf_tool *tool = data;
698 	/*
699 	 *As for guest kernel when processing subcommand record&report,
700 	 *we arrange module mmap prior to guest kernel mmap and trigger
701 	 *a preload dso because default guest module symbols are loaded
702 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
703 	 *method is used to avoid symbol missing when the first addr is
704 	 *in module instead of in guest kernel.
705 	 */
706 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
707 					     machine);
708 	if (err < 0)
709 		pr_err("Couldn't record guest kernel [%d]'s reference"
710 		       " relocation symbol.\n", machine->pid);
711 
712 	/*
713 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
714 	 * have no _text sometimes.
715 	 */
716 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
717 						 machine);
718 	if (err < 0)
719 		pr_err("Couldn't record guest kernel [%d]'s reference"
720 		       " relocation symbol.\n", machine->pid);
721 }
722 
723 static struct perf_event_header finished_round_event = {
724 	.size = sizeof(struct perf_event_header),
725 	.type = PERF_RECORD_FINISHED_ROUND,
726 };
727 
728 static void record__adjust_affinity(struct record *rec, struct perf_mmap *map)
729 {
730 	if (rec->opts.affinity != PERF_AFFINITY_SYS &&
731 	    !CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) {
732 		CPU_ZERO(&rec->affinity_mask);
733 		CPU_OR(&rec->affinity_mask, &rec->affinity_mask, &map->affinity_mask);
734 		sched_setaffinity(0, sizeof(rec->affinity_mask), &rec->affinity_mask);
735 	}
736 }
737 
738 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
739 				    bool overwrite)
740 {
741 	u64 bytes_written = rec->bytes_written;
742 	int i;
743 	int rc = 0;
744 	struct perf_mmap *maps;
745 	int trace_fd = rec->data.file.fd;
746 	off_t off;
747 
748 	if (!evlist)
749 		return 0;
750 
751 	maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
752 	if (!maps)
753 		return 0;
754 
755 	if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
756 		return 0;
757 
758 	if (record__aio_enabled(rec))
759 		off = record__aio_get_pos(trace_fd);
760 
761 	for (i = 0; i < evlist->nr_mmaps; i++) {
762 		struct perf_mmap *map = &maps[i];
763 
764 		if (map->base) {
765 			record__adjust_affinity(rec, map);
766 			if (!record__aio_enabled(rec)) {
767 				if (perf_mmap__push(map, rec, record__pushfn) != 0) {
768 					rc = -1;
769 					goto out;
770 				}
771 			} else {
772 				int idx;
773 				/*
774 				 * Call record__aio_sync() to wait till map->data buffer
775 				 * becomes available after previous aio write request.
776 				 */
777 				idx = record__aio_sync(map, false);
778 				if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) {
779 					record__aio_set_pos(trace_fd, off);
780 					rc = -1;
781 					goto out;
782 				}
783 			}
784 		}
785 
786 		if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
787 		    record__auxtrace_mmap_read(rec, map) != 0) {
788 			rc = -1;
789 			goto out;
790 		}
791 	}
792 
793 	if (record__aio_enabled(rec))
794 		record__aio_set_pos(trace_fd, off);
795 
796 	/*
797 	 * Mark the round finished in case we wrote
798 	 * at least one event.
799 	 */
800 	if (bytes_written != rec->bytes_written)
801 		rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
802 
803 	if (overwrite)
804 		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
805 out:
806 	return rc;
807 }
808 
809 static int record__mmap_read_all(struct record *rec)
810 {
811 	int err;
812 
813 	err = record__mmap_read_evlist(rec, rec->evlist, false);
814 	if (err)
815 		return err;
816 
817 	return record__mmap_read_evlist(rec, rec->evlist, true);
818 }
819 
820 static void record__init_features(struct record *rec)
821 {
822 	struct perf_session *session = rec->session;
823 	int feat;
824 
825 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
826 		perf_header__set_feat(&session->header, feat);
827 
828 	if (rec->no_buildid)
829 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
830 
831 	if (!have_tracepoints(&rec->evlist->entries))
832 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
833 
834 	if (!rec->opts.branch_stack)
835 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
836 
837 	if (!rec->opts.full_auxtrace)
838 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
839 
840 	if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
841 		perf_header__clear_feat(&session->header, HEADER_CLOCKID);
842 
843 	perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
844 
845 	perf_header__clear_feat(&session->header, HEADER_STAT);
846 }
847 
848 static void
849 record__finish_output(struct record *rec)
850 {
851 	struct perf_data *data = &rec->data;
852 	int fd = perf_data__fd(data);
853 
854 	if (data->is_pipe)
855 		return;
856 
857 	rec->session->header.data_size += rec->bytes_written;
858 	data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
859 
860 	if (!rec->no_buildid) {
861 		process_buildids(rec);
862 
863 		if (rec->buildid_all)
864 			dsos__hit_all(rec->session);
865 	}
866 	perf_session__write_header(rec->session, rec->evlist, fd, true);
867 
868 	return;
869 }
870 
871 static int record__synthesize_workload(struct record *rec, bool tail)
872 {
873 	int err;
874 	struct thread_map *thread_map;
875 
876 	if (rec->opts.tail_synthesize != tail)
877 		return 0;
878 
879 	thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
880 	if (thread_map == NULL)
881 		return -1;
882 
883 	err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
884 						 process_synthesized_event,
885 						 &rec->session->machines.host,
886 						 rec->opts.sample_address);
887 	thread_map__put(thread_map);
888 	return err;
889 }
890 
891 static int record__synthesize(struct record *rec, bool tail);
892 
893 static int
894 record__switch_output(struct record *rec, bool at_exit)
895 {
896 	struct perf_data *data = &rec->data;
897 	int fd, err;
898 	char *new_filename;
899 
900 	/* Same Size:      "2015122520103046"*/
901 	char timestamp[] = "InvalidTimestamp";
902 
903 	record__aio_mmap_read_sync(rec);
904 
905 	record__synthesize(rec, true);
906 	if (target__none(&rec->opts.target))
907 		record__synthesize_workload(rec, true);
908 
909 	rec->samples = 0;
910 	record__finish_output(rec);
911 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
912 	if (err) {
913 		pr_err("Failed to get current timestamp\n");
914 		return -EINVAL;
915 	}
916 
917 	fd = perf_data__switch(data, timestamp,
918 				    rec->session->header.data_offset,
919 				    at_exit, &new_filename);
920 	if (fd >= 0 && !at_exit) {
921 		rec->bytes_written = 0;
922 		rec->session->header.data_size = 0;
923 	}
924 
925 	if (!quiet)
926 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
927 			data->path, timestamp);
928 
929 	if (rec->switch_output.num_files) {
930 		int n = rec->switch_output.cur_file + 1;
931 
932 		if (n >= rec->switch_output.num_files)
933 			n = 0;
934 		rec->switch_output.cur_file = n;
935 		if (rec->switch_output.filenames[n]) {
936 			remove(rec->switch_output.filenames[n]);
937 			free(rec->switch_output.filenames[n]);
938 		}
939 		rec->switch_output.filenames[n] = new_filename;
940 	} else {
941 		free(new_filename);
942 	}
943 
944 	/* Output tracking events */
945 	if (!at_exit) {
946 		record__synthesize(rec, false);
947 
948 		/*
949 		 * In 'perf record --switch-output' without -a,
950 		 * record__synthesize() in record__switch_output() won't
951 		 * generate tracking events because there's no thread_map
952 		 * in evlist. Which causes newly created perf.data doesn't
953 		 * contain map and comm information.
954 		 * Create a fake thread_map and directly call
955 		 * perf_event__synthesize_thread_map() for those events.
956 		 */
957 		if (target__none(&rec->opts.target))
958 			record__synthesize_workload(rec, false);
959 	}
960 	return fd;
961 }
962 
963 static volatile int workload_exec_errno;
964 
965 /*
966  * perf_evlist__prepare_workload will send a SIGUSR1
967  * if the fork fails, since we asked by setting its
968  * want_signal to true.
969  */
970 static void workload_exec_failed_signal(int signo __maybe_unused,
971 					siginfo_t *info,
972 					void *ucontext __maybe_unused)
973 {
974 	workload_exec_errno = info->si_value.sival_int;
975 	done = 1;
976 	child_finished = 1;
977 }
978 
979 static void snapshot_sig_handler(int sig);
980 static void alarm_sig_handler(int sig);
981 
982 int __weak
983 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
984 			    struct perf_tool *tool __maybe_unused,
985 			    perf_event__handler_t process __maybe_unused,
986 			    struct machine *machine __maybe_unused)
987 {
988 	return 0;
989 }
990 
991 static const struct perf_event_mmap_page *
992 perf_evlist__pick_pc(struct perf_evlist *evlist)
993 {
994 	if (evlist) {
995 		if (evlist->mmap && evlist->mmap[0].base)
996 			return evlist->mmap[0].base;
997 		if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base)
998 			return evlist->overwrite_mmap[0].base;
999 	}
1000 	return NULL;
1001 }
1002 
1003 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1004 {
1005 	const struct perf_event_mmap_page *pc;
1006 
1007 	pc = perf_evlist__pick_pc(rec->evlist);
1008 	if (pc)
1009 		return pc;
1010 	return NULL;
1011 }
1012 
1013 static int record__synthesize(struct record *rec, bool tail)
1014 {
1015 	struct perf_session *session = rec->session;
1016 	struct machine *machine = &session->machines.host;
1017 	struct perf_data *data = &rec->data;
1018 	struct record_opts *opts = &rec->opts;
1019 	struct perf_tool *tool = &rec->tool;
1020 	int fd = perf_data__fd(data);
1021 	int err = 0;
1022 
1023 	if (rec->opts.tail_synthesize != tail)
1024 		return 0;
1025 
1026 	if (data->is_pipe) {
1027 		/*
1028 		 * We need to synthesize events first, because some
1029 		 * features works on top of them (on report side).
1030 		 */
1031 		err = perf_event__synthesize_attrs(tool, rec->evlist,
1032 						   process_synthesized_event);
1033 		if (err < 0) {
1034 			pr_err("Couldn't synthesize attrs.\n");
1035 			goto out;
1036 		}
1037 
1038 		err = perf_event__synthesize_features(tool, session, rec->evlist,
1039 						      process_synthesized_event);
1040 		if (err < 0) {
1041 			pr_err("Couldn't synthesize features.\n");
1042 			return err;
1043 		}
1044 
1045 		if (have_tracepoints(&rec->evlist->entries)) {
1046 			/*
1047 			 * FIXME err <= 0 here actually means that
1048 			 * there were no tracepoints so its not really
1049 			 * an error, just that we don't need to
1050 			 * synthesize anything.  We really have to
1051 			 * return this more properly and also
1052 			 * propagate errors that now are calling die()
1053 			 */
1054 			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
1055 								  process_synthesized_event);
1056 			if (err <= 0) {
1057 				pr_err("Couldn't record tracing data.\n");
1058 				goto out;
1059 			}
1060 			rec->bytes_written += err;
1061 		}
1062 	}
1063 
1064 	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1065 					  process_synthesized_event, machine);
1066 	if (err)
1067 		goto out;
1068 
1069 	if (rec->opts.full_auxtrace) {
1070 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1071 					session, process_synthesized_event);
1072 		if (err)
1073 			goto out;
1074 	}
1075 
1076 	if (!perf_evlist__exclude_kernel(rec->evlist)) {
1077 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1078 							 machine);
1079 		WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1080 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1081 				   "Check /proc/kallsyms permission or run as root.\n");
1082 
1083 		err = perf_event__synthesize_modules(tool, process_synthesized_event,
1084 						     machine);
1085 		WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1086 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1087 				   "Check /proc/modules permission or run as root.\n");
1088 	}
1089 
1090 	if (perf_guest) {
1091 		machines__process_guests(&session->machines,
1092 					 perf_event__synthesize_guest_os, tool);
1093 	}
1094 
1095 	err = perf_event__synthesize_extra_attr(&rec->tool,
1096 						rec->evlist,
1097 						process_synthesized_event,
1098 						data->is_pipe);
1099 	if (err)
1100 		goto out;
1101 
1102 	err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads,
1103 						 process_synthesized_event,
1104 						NULL);
1105 	if (err < 0) {
1106 		pr_err("Couldn't synthesize thread map.\n");
1107 		return err;
1108 	}
1109 
1110 	err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus,
1111 					     process_synthesized_event, NULL);
1112 	if (err < 0) {
1113 		pr_err("Couldn't synthesize cpu map.\n");
1114 		return err;
1115 	}
1116 
1117 	err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
1118 						machine, opts);
1119 	if (err < 0)
1120 		pr_warning("Couldn't synthesize bpf events.\n");
1121 
1122 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
1123 					    process_synthesized_event, opts->sample_address,
1124 					    1);
1125 out:
1126 	return err;
1127 }
1128 
1129 static int __cmd_record(struct record *rec, int argc, const char **argv)
1130 {
1131 	int err;
1132 	int status = 0;
1133 	unsigned long waking = 0;
1134 	const bool forks = argc > 0;
1135 	struct perf_tool *tool = &rec->tool;
1136 	struct record_opts *opts = &rec->opts;
1137 	struct perf_data *data = &rec->data;
1138 	struct perf_session *session;
1139 	bool disabled = false, draining = false;
1140 	struct perf_evlist *sb_evlist = NULL;
1141 	int fd;
1142 
1143 	atexit(record__sig_exit);
1144 	signal(SIGCHLD, sig_handler);
1145 	signal(SIGINT, sig_handler);
1146 	signal(SIGTERM, sig_handler);
1147 	signal(SIGSEGV, sigsegv_handler);
1148 
1149 	if (rec->opts.record_namespaces)
1150 		tool->namespace_events = true;
1151 
1152 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
1153 		signal(SIGUSR2, snapshot_sig_handler);
1154 		if (rec->opts.auxtrace_snapshot_mode)
1155 			trigger_on(&auxtrace_snapshot_trigger);
1156 		if (rec->switch_output.enabled)
1157 			trigger_on(&switch_output_trigger);
1158 	} else {
1159 		signal(SIGUSR2, SIG_IGN);
1160 	}
1161 
1162 	session = perf_session__new(data, false, tool);
1163 	if (session == NULL) {
1164 		pr_err("Perf session creation failed.\n");
1165 		return -1;
1166 	}
1167 
1168 	fd = perf_data__fd(data);
1169 	rec->session = session;
1170 
1171 	record__init_features(rec);
1172 
1173 	if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
1174 		session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
1175 
1176 	if (forks) {
1177 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
1178 						    argv, data->is_pipe,
1179 						    workload_exec_failed_signal);
1180 		if (err < 0) {
1181 			pr_err("Couldn't run the workload!\n");
1182 			status = err;
1183 			goto out_delete_session;
1184 		}
1185 	}
1186 
1187 	/*
1188 	 * If we have just single event and are sending data
1189 	 * through pipe, we need to force the ids allocation,
1190 	 * because we synthesize event name through the pipe
1191 	 * and need the id for that.
1192 	 */
1193 	if (data->is_pipe && rec->evlist->nr_entries == 1)
1194 		rec->opts.sample_id = true;
1195 
1196 	if (record__open(rec) != 0) {
1197 		err = -1;
1198 		goto out_child;
1199 	}
1200 
1201 	err = bpf__apply_obj_config();
1202 	if (err) {
1203 		char errbuf[BUFSIZ];
1204 
1205 		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
1206 		pr_err("ERROR: Apply config to BPF failed: %s\n",
1207 			 errbuf);
1208 		goto out_child;
1209 	}
1210 
1211 	/*
1212 	 * Normally perf_session__new would do this, but it doesn't have the
1213 	 * evlist.
1214 	 */
1215 	if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
1216 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
1217 		rec->tool.ordered_events = false;
1218 	}
1219 
1220 	if (!rec->evlist->nr_groups)
1221 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
1222 
1223 	if (data->is_pipe) {
1224 		err = perf_header__write_pipe(fd);
1225 		if (err < 0)
1226 			goto out_child;
1227 	} else {
1228 		err = perf_session__write_header(session, rec->evlist, fd, false);
1229 		if (err < 0)
1230 			goto out_child;
1231 	}
1232 
1233 	if (!rec->no_buildid
1234 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
1235 		pr_err("Couldn't generate buildids. "
1236 		       "Use --no-buildid to profile anyway.\n");
1237 		err = -1;
1238 		goto out_child;
1239 	}
1240 
1241 	if (!opts->no_bpf_event)
1242 		bpf_event__add_sb_event(&sb_evlist, &session->header.env);
1243 
1244 	if (perf_evlist__start_sb_thread(sb_evlist, &rec->opts.target)) {
1245 		pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1246 		opts->no_bpf_event = true;
1247 	}
1248 
1249 	err = record__synthesize(rec, false);
1250 	if (err < 0)
1251 		goto out_child;
1252 
1253 	if (rec->realtime_prio) {
1254 		struct sched_param param;
1255 
1256 		param.sched_priority = rec->realtime_prio;
1257 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
1258 			pr_err("Could not set realtime priority.\n");
1259 			err = -1;
1260 			goto out_child;
1261 		}
1262 	}
1263 
1264 	/*
1265 	 * When perf is starting the traced process, all the events
1266 	 * (apart from group members) have enable_on_exec=1 set,
1267 	 * so don't spoil it by prematurely enabling them.
1268 	 */
1269 	if (!target__none(&opts->target) && !opts->initial_delay)
1270 		perf_evlist__enable(rec->evlist);
1271 
1272 	/*
1273 	 * Let the child rip
1274 	 */
1275 	if (forks) {
1276 		struct machine *machine = &session->machines.host;
1277 		union perf_event *event;
1278 		pid_t tgid;
1279 
1280 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
1281 		if (event == NULL) {
1282 			err = -ENOMEM;
1283 			goto out_child;
1284 		}
1285 
1286 		/*
1287 		 * Some H/W events are generated before COMM event
1288 		 * which is emitted during exec(), so perf script
1289 		 * cannot see a correct process name for those events.
1290 		 * Synthesize COMM event to prevent it.
1291 		 */
1292 		tgid = perf_event__synthesize_comm(tool, event,
1293 						   rec->evlist->workload.pid,
1294 						   process_synthesized_event,
1295 						   machine);
1296 		free(event);
1297 
1298 		if (tgid == -1)
1299 			goto out_child;
1300 
1301 		event = malloc(sizeof(event->namespaces) +
1302 			       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1303 			       machine->id_hdr_size);
1304 		if (event == NULL) {
1305 			err = -ENOMEM;
1306 			goto out_child;
1307 		}
1308 
1309 		/*
1310 		 * Synthesize NAMESPACES event for the command specified.
1311 		 */
1312 		perf_event__synthesize_namespaces(tool, event,
1313 						  rec->evlist->workload.pid,
1314 						  tgid, process_synthesized_event,
1315 						  machine);
1316 		free(event);
1317 
1318 		perf_evlist__start_workload(rec->evlist);
1319 	}
1320 
1321 	if (opts->initial_delay) {
1322 		usleep(opts->initial_delay * USEC_PER_MSEC);
1323 		perf_evlist__enable(rec->evlist);
1324 	}
1325 
1326 	trigger_ready(&auxtrace_snapshot_trigger);
1327 	trigger_ready(&switch_output_trigger);
1328 	perf_hooks__invoke_record_start();
1329 	for (;;) {
1330 		unsigned long long hits = rec->samples;
1331 
1332 		/*
1333 		 * rec->evlist->bkw_mmap_state is possible to be
1334 		 * BKW_MMAP_EMPTY here: when done == true and
1335 		 * hits != rec->samples in previous round.
1336 		 *
1337 		 * perf_evlist__toggle_bkw_mmap ensure we never
1338 		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1339 		 */
1340 		if (trigger_is_hit(&switch_output_trigger) || done || draining)
1341 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1342 
1343 		if (record__mmap_read_all(rec) < 0) {
1344 			trigger_error(&auxtrace_snapshot_trigger);
1345 			trigger_error(&switch_output_trigger);
1346 			err = -1;
1347 			goto out_child;
1348 		}
1349 
1350 		if (auxtrace_record__snapshot_started) {
1351 			auxtrace_record__snapshot_started = 0;
1352 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
1353 				record__read_auxtrace_snapshot(rec);
1354 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1355 				pr_err("AUX area tracing snapshot failed\n");
1356 				err = -1;
1357 				goto out_child;
1358 			}
1359 		}
1360 
1361 		if (trigger_is_hit(&switch_output_trigger)) {
1362 			/*
1363 			 * If switch_output_trigger is hit, the data in
1364 			 * overwritable ring buffer should have been collected,
1365 			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1366 			 *
1367 			 * If SIGUSR2 raise after or during record__mmap_read_all(),
1368 			 * record__mmap_read_all() didn't collect data from
1369 			 * overwritable ring buffer. Read again.
1370 			 */
1371 			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1372 				continue;
1373 			trigger_ready(&switch_output_trigger);
1374 
1375 			/*
1376 			 * Reenable events in overwrite ring buffer after
1377 			 * record__mmap_read_all(): we should have collected
1378 			 * data from it.
1379 			 */
1380 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1381 
1382 			if (!quiet)
1383 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1384 					waking);
1385 			waking = 0;
1386 			fd = record__switch_output(rec, false);
1387 			if (fd < 0) {
1388 				pr_err("Failed to switch to new file\n");
1389 				trigger_error(&switch_output_trigger);
1390 				err = fd;
1391 				goto out_child;
1392 			}
1393 
1394 			/* re-arm the alarm */
1395 			if (rec->switch_output.time)
1396 				alarm(rec->switch_output.time);
1397 		}
1398 
1399 		if (hits == rec->samples) {
1400 			if (done || draining)
1401 				break;
1402 			err = perf_evlist__poll(rec->evlist, -1);
1403 			/*
1404 			 * Propagate error, only if there's any. Ignore positive
1405 			 * number of returned events and interrupt error.
1406 			 */
1407 			if (err > 0 || (err < 0 && errno == EINTR))
1408 				err = 0;
1409 			waking++;
1410 
1411 			if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1412 				draining = true;
1413 		}
1414 
1415 		/*
1416 		 * When perf is starting the traced process, at the end events
1417 		 * die with the process and we wait for that. Thus no need to
1418 		 * disable events in this case.
1419 		 */
1420 		if (done && !disabled && !target__none(&opts->target)) {
1421 			trigger_off(&auxtrace_snapshot_trigger);
1422 			perf_evlist__disable(rec->evlist);
1423 			disabled = true;
1424 		}
1425 	}
1426 	trigger_off(&auxtrace_snapshot_trigger);
1427 	trigger_off(&switch_output_trigger);
1428 
1429 	if (forks && workload_exec_errno) {
1430 		char msg[STRERR_BUFSIZE];
1431 		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1432 		pr_err("Workload failed: %s\n", emsg);
1433 		err = -1;
1434 		goto out_child;
1435 	}
1436 
1437 	if (!quiet)
1438 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1439 
1440 	if (target__none(&rec->opts.target))
1441 		record__synthesize_workload(rec, true);
1442 
1443 out_child:
1444 	record__aio_mmap_read_sync(rec);
1445 
1446 	if (forks) {
1447 		int exit_status;
1448 
1449 		if (!child_finished)
1450 			kill(rec->evlist->workload.pid, SIGTERM);
1451 
1452 		wait(&exit_status);
1453 
1454 		if (err < 0)
1455 			status = err;
1456 		else if (WIFEXITED(exit_status))
1457 			status = WEXITSTATUS(exit_status);
1458 		else if (WIFSIGNALED(exit_status))
1459 			signr = WTERMSIG(exit_status);
1460 	} else
1461 		status = err;
1462 
1463 	record__synthesize(rec, true);
1464 	/* this will be recalculated during process_buildids() */
1465 	rec->samples = 0;
1466 
1467 	if (!err) {
1468 		if (!rec->timestamp_filename) {
1469 			record__finish_output(rec);
1470 		} else {
1471 			fd = record__switch_output(rec, true);
1472 			if (fd < 0) {
1473 				status = fd;
1474 				goto out_delete_session;
1475 			}
1476 		}
1477 	}
1478 
1479 	perf_hooks__invoke_record_end();
1480 
1481 	if (!err && !quiet) {
1482 		char samples[128];
1483 		const char *postfix = rec->timestamp_filename ?
1484 					".<timestamp>" : "";
1485 
1486 		if (rec->samples && !rec->opts.full_auxtrace)
1487 			scnprintf(samples, sizeof(samples),
1488 				  " (%" PRIu64 " samples)", rec->samples);
1489 		else
1490 			samples[0] = '\0';
1491 
1492 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1493 			perf_data__size(data) / 1024.0 / 1024.0,
1494 			data->path, postfix, samples);
1495 	}
1496 
1497 out_delete_session:
1498 	perf_session__delete(session);
1499 
1500 	if (!opts->no_bpf_event)
1501 		perf_evlist__stop_sb_thread(sb_evlist);
1502 	return status;
1503 }
1504 
1505 static void callchain_debug(struct callchain_param *callchain)
1506 {
1507 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1508 
1509 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1510 
1511 	if (callchain->record_mode == CALLCHAIN_DWARF)
1512 		pr_debug("callchain: stack dump size %d\n",
1513 			 callchain->dump_size);
1514 }
1515 
1516 int record_opts__parse_callchain(struct record_opts *record,
1517 				 struct callchain_param *callchain,
1518 				 const char *arg, bool unset)
1519 {
1520 	int ret;
1521 	callchain->enabled = !unset;
1522 
1523 	/* --no-call-graph */
1524 	if (unset) {
1525 		callchain->record_mode = CALLCHAIN_NONE;
1526 		pr_debug("callchain: disabled\n");
1527 		return 0;
1528 	}
1529 
1530 	ret = parse_callchain_record_opt(arg, callchain);
1531 	if (!ret) {
1532 		/* Enable data address sampling for DWARF unwind. */
1533 		if (callchain->record_mode == CALLCHAIN_DWARF)
1534 			record->sample_address = true;
1535 		callchain_debug(callchain);
1536 	}
1537 
1538 	return ret;
1539 }
1540 
1541 int record_parse_callchain_opt(const struct option *opt,
1542 			       const char *arg,
1543 			       int unset)
1544 {
1545 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1546 }
1547 
1548 int record_callchain_opt(const struct option *opt,
1549 			 const char *arg __maybe_unused,
1550 			 int unset __maybe_unused)
1551 {
1552 	struct callchain_param *callchain = opt->value;
1553 
1554 	callchain->enabled = true;
1555 
1556 	if (callchain->record_mode == CALLCHAIN_NONE)
1557 		callchain->record_mode = CALLCHAIN_FP;
1558 
1559 	callchain_debug(callchain);
1560 	return 0;
1561 }
1562 
1563 static int perf_record_config(const char *var, const char *value, void *cb)
1564 {
1565 	struct record *rec = cb;
1566 
1567 	if (!strcmp(var, "record.build-id")) {
1568 		if (!strcmp(value, "cache"))
1569 			rec->no_buildid_cache = false;
1570 		else if (!strcmp(value, "no-cache"))
1571 			rec->no_buildid_cache = true;
1572 		else if (!strcmp(value, "skip"))
1573 			rec->no_buildid = true;
1574 		else
1575 			return -1;
1576 		return 0;
1577 	}
1578 	if (!strcmp(var, "record.call-graph")) {
1579 		var = "call-graph.record-mode";
1580 		return perf_default_config(var, value, cb);
1581 	}
1582 #ifdef HAVE_AIO_SUPPORT
1583 	if (!strcmp(var, "record.aio")) {
1584 		rec->opts.nr_cblocks = strtol(value, NULL, 0);
1585 		if (!rec->opts.nr_cblocks)
1586 			rec->opts.nr_cblocks = nr_cblocks_default;
1587 	}
1588 #endif
1589 
1590 	return 0;
1591 }
1592 
1593 struct clockid_map {
1594 	const char *name;
1595 	int clockid;
1596 };
1597 
1598 #define CLOCKID_MAP(n, c)	\
1599 	{ .name = n, .clockid = (c), }
1600 
1601 #define CLOCKID_END	{ .name = NULL, }
1602 
1603 
1604 /*
1605  * Add the missing ones, we need to build on many distros...
1606  */
1607 #ifndef CLOCK_MONOTONIC_RAW
1608 #define CLOCK_MONOTONIC_RAW 4
1609 #endif
1610 #ifndef CLOCK_BOOTTIME
1611 #define CLOCK_BOOTTIME 7
1612 #endif
1613 #ifndef CLOCK_TAI
1614 #define CLOCK_TAI 11
1615 #endif
1616 
1617 static const struct clockid_map clockids[] = {
1618 	/* available for all events, NMI safe */
1619 	CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1620 	CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1621 
1622 	/* available for some events */
1623 	CLOCKID_MAP("realtime", CLOCK_REALTIME),
1624 	CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1625 	CLOCKID_MAP("tai", CLOCK_TAI),
1626 
1627 	/* available for the lazy */
1628 	CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1629 	CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1630 	CLOCKID_MAP("real", CLOCK_REALTIME),
1631 	CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1632 
1633 	CLOCKID_END,
1634 };
1635 
1636 static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
1637 {
1638 	struct timespec res;
1639 
1640 	*res_ns = 0;
1641 	if (!clock_getres(clk_id, &res))
1642 		*res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
1643 	else
1644 		pr_warning("WARNING: Failed to determine specified clock resolution.\n");
1645 
1646 	return 0;
1647 }
1648 
1649 static int parse_clockid(const struct option *opt, const char *str, int unset)
1650 {
1651 	struct record_opts *opts = (struct record_opts *)opt->value;
1652 	const struct clockid_map *cm;
1653 	const char *ostr = str;
1654 
1655 	if (unset) {
1656 		opts->use_clockid = 0;
1657 		return 0;
1658 	}
1659 
1660 	/* no arg passed */
1661 	if (!str)
1662 		return 0;
1663 
1664 	/* no setting it twice */
1665 	if (opts->use_clockid)
1666 		return -1;
1667 
1668 	opts->use_clockid = true;
1669 
1670 	/* if its a number, we're done */
1671 	if (sscanf(str, "%d", &opts->clockid) == 1)
1672 		return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
1673 
1674 	/* allow a "CLOCK_" prefix to the name */
1675 	if (!strncasecmp(str, "CLOCK_", 6))
1676 		str += 6;
1677 
1678 	for (cm = clockids; cm->name; cm++) {
1679 		if (!strcasecmp(str, cm->name)) {
1680 			opts->clockid = cm->clockid;
1681 			return get_clockid_res(opts->clockid,
1682 					       &opts->clockid_res_ns);
1683 		}
1684 	}
1685 
1686 	opts->use_clockid = false;
1687 	ui__warning("unknown clockid %s, check man page\n", ostr);
1688 	return -1;
1689 }
1690 
1691 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
1692 {
1693 	struct record_opts *opts = (struct record_opts *)opt->value;
1694 
1695 	if (unset || !str)
1696 		return 0;
1697 
1698 	if (!strcasecmp(str, "node"))
1699 		opts->affinity = PERF_AFFINITY_NODE;
1700 	else if (!strcasecmp(str, "cpu"))
1701 		opts->affinity = PERF_AFFINITY_CPU;
1702 
1703 	return 0;
1704 }
1705 
1706 static int record__parse_mmap_pages(const struct option *opt,
1707 				    const char *str,
1708 				    int unset __maybe_unused)
1709 {
1710 	struct record_opts *opts = opt->value;
1711 	char *s, *p;
1712 	unsigned int mmap_pages;
1713 	int ret;
1714 
1715 	if (!str)
1716 		return -EINVAL;
1717 
1718 	s = strdup(str);
1719 	if (!s)
1720 		return -ENOMEM;
1721 
1722 	p = strchr(s, ',');
1723 	if (p)
1724 		*p = '\0';
1725 
1726 	if (*s) {
1727 		ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1728 		if (ret)
1729 			goto out_free;
1730 		opts->mmap_pages = mmap_pages;
1731 	}
1732 
1733 	if (!p) {
1734 		ret = 0;
1735 		goto out_free;
1736 	}
1737 
1738 	ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1739 	if (ret)
1740 		goto out_free;
1741 
1742 	opts->auxtrace_mmap_pages = mmap_pages;
1743 
1744 out_free:
1745 	free(s);
1746 	return ret;
1747 }
1748 
1749 static void switch_output_size_warn(struct record *rec)
1750 {
1751 	u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1752 	struct switch_output *s = &rec->switch_output;
1753 
1754 	wakeup_size /= 2;
1755 
1756 	if (s->size < wakeup_size) {
1757 		char buf[100];
1758 
1759 		unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1760 		pr_warning("WARNING: switch-output data size lower than "
1761 			   "wakeup kernel buffer size (%s) "
1762 			   "expect bigger perf.data sizes\n", buf);
1763 	}
1764 }
1765 
1766 static int switch_output_setup(struct record *rec)
1767 {
1768 	struct switch_output *s = &rec->switch_output;
1769 	static struct parse_tag tags_size[] = {
1770 		{ .tag  = 'B', .mult = 1       },
1771 		{ .tag  = 'K', .mult = 1 << 10 },
1772 		{ .tag  = 'M', .mult = 1 << 20 },
1773 		{ .tag  = 'G', .mult = 1 << 30 },
1774 		{ .tag  = 0 },
1775 	};
1776 	static struct parse_tag tags_time[] = {
1777 		{ .tag  = 's', .mult = 1        },
1778 		{ .tag  = 'm', .mult = 60       },
1779 		{ .tag  = 'h', .mult = 60*60    },
1780 		{ .tag  = 'd', .mult = 60*60*24 },
1781 		{ .tag  = 0 },
1782 	};
1783 	unsigned long val;
1784 
1785 	if (!s->set)
1786 		return 0;
1787 
1788 	if (!strcmp(s->str, "signal")) {
1789 		s->signal = true;
1790 		pr_debug("switch-output with SIGUSR2 signal\n");
1791 		goto enabled;
1792 	}
1793 
1794 	val = parse_tag_value(s->str, tags_size);
1795 	if (val != (unsigned long) -1) {
1796 		s->size = val;
1797 		pr_debug("switch-output with %s size threshold\n", s->str);
1798 		goto enabled;
1799 	}
1800 
1801 	val = parse_tag_value(s->str, tags_time);
1802 	if (val != (unsigned long) -1) {
1803 		s->time = val;
1804 		pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1805 			 s->str, s->time);
1806 		goto enabled;
1807 	}
1808 
1809 	return -1;
1810 
1811 enabled:
1812 	rec->timestamp_filename = true;
1813 	s->enabled              = true;
1814 
1815 	if (s->size && !rec->opts.no_buffering)
1816 		switch_output_size_warn(rec);
1817 
1818 	return 0;
1819 }
1820 
1821 static const char * const __record_usage[] = {
1822 	"perf record [<options>] [<command>]",
1823 	"perf record [<options>] -- <command> [<options>]",
1824 	NULL
1825 };
1826 const char * const *record_usage = __record_usage;
1827 
1828 /*
1829  * XXX Ideally would be local to cmd_record() and passed to a record__new
1830  * because we need to have access to it in record__exit, that is called
1831  * after cmd_record() exits, but since record_options need to be accessible to
1832  * builtin-script, leave it here.
1833  *
1834  * At least we don't ouch it in all the other functions here directly.
1835  *
1836  * Just say no to tons of global variables, sigh.
1837  */
1838 static struct record record = {
1839 	.opts = {
1840 		.sample_time	     = true,
1841 		.mmap_pages	     = UINT_MAX,
1842 		.user_freq	     = UINT_MAX,
1843 		.user_interval	     = ULLONG_MAX,
1844 		.freq		     = 4000,
1845 		.target		     = {
1846 			.uses_mmap   = true,
1847 			.default_per_cpu = true,
1848 		},
1849 	},
1850 	.tool = {
1851 		.sample		= process_sample_event,
1852 		.fork		= perf_event__process_fork,
1853 		.exit		= perf_event__process_exit,
1854 		.comm		= perf_event__process_comm,
1855 		.namespaces	= perf_event__process_namespaces,
1856 		.mmap		= perf_event__process_mmap,
1857 		.mmap2		= perf_event__process_mmap2,
1858 		.ordered_events	= true,
1859 	},
1860 };
1861 
1862 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1863 	"\n\t\t\t\tDefault: fp";
1864 
1865 static bool dry_run;
1866 
1867 /*
1868  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1869  * with it and switch to use the library functions in perf_evlist that came
1870  * from builtin-record.c, i.e. use record_opts,
1871  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1872  * using pipes, etc.
1873  */
1874 static struct option __record_options[] = {
1875 	OPT_CALLBACK('e', "event", &record.evlist, "event",
1876 		     "event selector. use 'perf list' to list available events",
1877 		     parse_events_option),
1878 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1879 		     "event filter", parse_filter),
1880 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1881 			   NULL, "don't record events from perf itself",
1882 			   exclude_perf),
1883 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1884 		    "record events on existing process id"),
1885 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1886 		    "record events on existing thread id"),
1887 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
1888 		    "collect data with this RT SCHED_FIFO priority"),
1889 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1890 		    "collect data without buffering"),
1891 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1892 		    "collect raw sample records from all opened counters"),
1893 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1894 			    "system-wide collection from all CPUs"),
1895 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1896 		    "list of cpus to monitor"),
1897 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1898 	OPT_STRING('o', "output", &record.data.path, "file",
1899 		    "output file name"),
1900 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1901 			&record.opts.no_inherit_set,
1902 			"child tasks do not inherit counters"),
1903 	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1904 		    "synthesize non-sample events at the end of output"),
1905 	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1906 	OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"),
1907 	OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
1908 		    "Fail if the specified frequency can't be used"),
1909 	OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
1910 		     "profile at this frequency",
1911 		      record__parse_freq),
1912 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1913 		     "number of mmap data pages and AUX area tracing mmap pages",
1914 		     record__parse_mmap_pages),
1915 	OPT_BOOLEAN(0, "group", &record.opts.group,
1916 		    "put the counters into a counter group"),
1917 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1918 			   NULL, "enables call-graph recording" ,
1919 			   &record_callchain_opt),
1920 	OPT_CALLBACK(0, "call-graph", &record.opts,
1921 		     "record_mode[,record_size]", record_callchain_help,
1922 		     &record_parse_callchain_opt),
1923 	OPT_INCR('v', "verbose", &verbose,
1924 		    "be more verbose (show counter open errors, etc)"),
1925 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1926 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1927 		    "per thread counts"),
1928 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1929 	OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
1930 		    "Record the sample physical addresses"),
1931 	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1932 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1933 			&record.opts.sample_time_set,
1934 			"Record the sample timestamps"),
1935 	OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
1936 			"Record the sample period"),
1937 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1938 		    "don't sample"),
1939 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1940 			&record.no_buildid_cache_set,
1941 			"do not update the buildid cache"),
1942 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1943 			&record.no_buildid_set,
1944 			"do not collect buildids in perf.data"),
1945 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1946 		     "monitor event in cgroup name only",
1947 		     parse_cgroups),
1948 	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1949 		  "ms to wait before starting measurement after program start"),
1950 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1951 		   "user to profile"),
1952 
1953 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1954 		     "branch any", "sample any taken branches",
1955 		     parse_branch_stack),
1956 
1957 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1958 		     "branch filter mask", "branch stack filter modes",
1959 		     parse_branch_stack),
1960 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1961 		    "sample by weight (on special events only)"),
1962 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1963 		    "sample transaction flags (special events only)"),
1964 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1965 		    "use per-thread mmaps"),
1966 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1967 		    "sample selected machine registers on interrupt,"
1968 		    " use -I ? to list register names", parse_regs),
1969 	OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
1970 		    "sample selected machine registers on interrupt,"
1971 		    " use -I ? to list register names", parse_regs),
1972 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1973 		    "Record running/enabled time of read (:S) events"),
1974 	OPT_CALLBACK('k', "clockid", &record.opts,
1975 	"clockid", "clockid to use for events, see clock_gettime()",
1976 	parse_clockid),
1977 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1978 			  "opts", "AUX area tracing Snapshot Mode", ""),
1979 	OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
1980 			"per thread proc mmap processing timeout in ms"),
1981 	OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
1982 		    "Record namespaces events"),
1983 	OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1984 		    "Record context switch events"),
1985 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1986 			 "Configure all used events to run in kernel space.",
1987 			 PARSE_OPT_EXCLUSIVE),
1988 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1989 			 "Configure all used events to run in user space.",
1990 			 PARSE_OPT_EXCLUSIVE),
1991 	OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1992 		   "clang binary to use for compiling BPF scriptlets"),
1993 	OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1994 		   "options passed to clang when compiling BPF scriptlets"),
1995 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1996 		   "file", "vmlinux pathname"),
1997 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1998 		    "Record build-id of all DSOs regardless of hits"),
1999 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
2000 		    "append timestamp to output filename"),
2001 	OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
2002 		    "Record timestamp boundary (time of first/last samples)"),
2003 	OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
2004 			  &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
2005 			  "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
2006 			  "signal"),
2007 	OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
2008 		   "Limit number of switch output generated files"),
2009 	OPT_BOOLEAN(0, "dry-run", &dry_run,
2010 		    "Parse options then exit"),
2011 #ifdef HAVE_AIO_SUPPORT
2012 	OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
2013 		     &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
2014 		     record__aio_parse),
2015 #endif
2016 	OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
2017 		     "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
2018 		     record__parse_affinity),
2019 	OPT_END()
2020 };
2021 
2022 struct option *record_options = __record_options;
2023 
2024 int cmd_record(int argc, const char **argv)
2025 {
2026 	int err;
2027 	struct record *rec = &record;
2028 	char errbuf[BUFSIZ];
2029 
2030 	setlocale(LC_ALL, "");
2031 
2032 #ifndef HAVE_LIBBPF_SUPPORT
2033 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
2034 	set_nobuild('\0', "clang-path", true);
2035 	set_nobuild('\0', "clang-opt", true);
2036 # undef set_nobuild
2037 #endif
2038 
2039 #ifndef HAVE_BPF_PROLOGUE
2040 # if !defined (HAVE_DWARF_SUPPORT)
2041 #  define REASON  "NO_DWARF=1"
2042 # elif !defined (HAVE_LIBBPF_SUPPORT)
2043 #  define REASON  "NO_LIBBPF=1"
2044 # else
2045 #  define REASON  "this architecture doesn't support BPF prologue"
2046 # endif
2047 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
2048 	set_nobuild('\0', "vmlinux", true);
2049 # undef set_nobuild
2050 # undef REASON
2051 #endif
2052 
2053 	CPU_ZERO(&rec->affinity_mask);
2054 	rec->opts.affinity = PERF_AFFINITY_SYS;
2055 
2056 	rec->evlist = perf_evlist__new();
2057 	if (rec->evlist == NULL)
2058 		return -ENOMEM;
2059 
2060 	err = perf_config(perf_record_config, rec);
2061 	if (err)
2062 		return err;
2063 
2064 	argc = parse_options(argc, argv, record_options, record_usage,
2065 			    PARSE_OPT_STOP_AT_NON_OPTION);
2066 	if (quiet)
2067 		perf_quiet_option();
2068 
2069 	/* Make system wide (-a) the default target. */
2070 	if (!argc && target__none(&rec->opts.target))
2071 		rec->opts.target.system_wide = true;
2072 
2073 	if (nr_cgroups && !rec->opts.target.system_wide) {
2074 		usage_with_options_msg(record_usage, record_options,
2075 			"cgroup monitoring only available in system-wide mode");
2076 
2077 	}
2078 	if (rec->opts.record_switch_events &&
2079 	    !perf_can_record_switch_events()) {
2080 		ui__error("kernel does not support recording context switch events\n");
2081 		parse_options_usage(record_usage, record_options, "switch-events", 0);
2082 		return -EINVAL;
2083 	}
2084 
2085 	if (switch_output_setup(rec)) {
2086 		parse_options_usage(record_usage, record_options, "switch-output", 0);
2087 		return -EINVAL;
2088 	}
2089 
2090 	if (rec->switch_output.time) {
2091 		signal(SIGALRM, alarm_sig_handler);
2092 		alarm(rec->switch_output.time);
2093 	}
2094 
2095 	if (rec->switch_output.num_files) {
2096 		rec->switch_output.filenames = calloc(sizeof(char *),
2097 						      rec->switch_output.num_files);
2098 		if (!rec->switch_output.filenames)
2099 			return -EINVAL;
2100 	}
2101 
2102 	/*
2103 	 * Allow aliases to facilitate the lookup of symbols for address
2104 	 * filters. Refer to auxtrace_parse_filters().
2105 	 */
2106 	symbol_conf.allow_aliases = true;
2107 
2108 	symbol__init(NULL);
2109 
2110 	err = record__auxtrace_init(rec);
2111 	if (err)
2112 		goto out;
2113 
2114 	if (dry_run)
2115 		goto out;
2116 
2117 	err = bpf__setup_stdout(rec->evlist);
2118 	if (err) {
2119 		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
2120 		pr_err("ERROR: Setup BPF stdout failed: %s\n",
2121 			 errbuf);
2122 		goto out;
2123 	}
2124 
2125 	err = -ENOMEM;
2126 
2127 	if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist))
2128 		pr_warning(
2129 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
2130 "check /proc/sys/kernel/kptr_restrict.\n\n"
2131 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
2132 "file is not found in the buildid cache or in the vmlinux path.\n\n"
2133 "Samples in kernel modules won't be resolved at all.\n\n"
2134 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
2135 "even with a suitable vmlinux or kallsyms file.\n\n");
2136 
2137 	if (rec->no_buildid_cache || rec->no_buildid) {
2138 		disable_buildid_cache();
2139 	} else if (rec->switch_output.enabled) {
2140 		/*
2141 		 * In 'perf record --switch-output', disable buildid
2142 		 * generation by default to reduce data file switching
2143 		 * overhead. Still generate buildid if they are required
2144 		 * explicitly using
2145 		 *
2146 		 *  perf record --switch-output --no-no-buildid \
2147 		 *              --no-no-buildid-cache
2148 		 *
2149 		 * Following code equals to:
2150 		 *
2151 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
2152 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
2153 		 *         disable_buildid_cache();
2154 		 */
2155 		bool disable = true;
2156 
2157 		if (rec->no_buildid_set && !rec->no_buildid)
2158 			disable = false;
2159 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
2160 			disable = false;
2161 		if (disable) {
2162 			rec->no_buildid = true;
2163 			rec->no_buildid_cache = true;
2164 			disable_buildid_cache();
2165 		}
2166 	}
2167 
2168 	if (record.opts.overwrite)
2169 		record.opts.tail_synthesize = true;
2170 
2171 	if (rec->evlist->nr_entries == 0 &&
2172 	    __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
2173 		pr_err("Not enough memory for event selector list\n");
2174 		goto out;
2175 	}
2176 
2177 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
2178 		rec->opts.no_inherit = true;
2179 
2180 	err = target__validate(&rec->opts.target);
2181 	if (err) {
2182 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2183 		ui__warning("%s\n", errbuf);
2184 	}
2185 
2186 	err = target__parse_uid(&rec->opts.target);
2187 	if (err) {
2188 		int saved_errno = errno;
2189 
2190 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2191 		ui__error("%s", errbuf);
2192 
2193 		err = -saved_errno;
2194 		goto out;
2195 	}
2196 
2197 	/* Enable ignoring missing threads when -u/-p option is defined. */
2198 	rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
2199 
2200 	err = -ENOMEM;
2201 	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
2202 		usage_with_options(record_usage, record_options);
2203 
2204 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
2205 	if (err)
2206 		goto out;
2207 
2208 	/*
2209 	 * We take all buildids when the file contains
2210 	 * AUX area tracing data because we do not decode the
2211 	 * trace because it would take too long.
2212 	 */
2213 	if (rec->opts.full_auxtrace)
2214 		rec->buildid_all = true;
2215 
2216 	if (record_opts__config(&rec->opts)) {
2217 		err = -EINVAL;
2218 		goto out;
2219 	}
2220 
2221 	if (rec->opts.nr_cblocks > nr_cblocks_max)
2222 		rec->opts.nr_cblocks = nr_cblocks_max;
2223 	if (verbose > 0)
2224 		pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks);
2225 
2226 	pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
2227 
2228 	err = __cmd_record(&record, argc, argv);
2229 out:
2230 	perf_evlist__delete(rec->evlist);
2231 	symbol__exit();
2232 	auxtrace_record__free(rec->itr);
2233 	return err;
2234 }
2235 
2236 static void snapshot_sig_handler(int sig __maybe_unused)
2237 {
2238 	struct record *rec = &record;
2239 
2240 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2241 		trigger_hit(&auxtrace_snapshot_trigger);
2242 		auxtrace_record__snapshot_started = 1;
2243 		if (auxtrace_record__snapshot_start(record.itr))
2244 			trigger_error(&auxtrace_snapshot_trigger);
2245 	}
2246 
2247 	if (switch_output_signal(rec))
2248 		trigger_hit(&switch_output_trigger);
2249 }
2250 
2251 static void alarm_sig_handler(int sig __maybe_unused)
2252 {
2253 	struct record *rec = &record;
2254 
2255 	if (switch_output_time(rec))
2256 		trigger_hit(&switch_output_trigger);
2257 }
2258