xref: /linux/tools/perf/builtin-record.c (revision b6815f354518591400ce4c3a5fd63337643710ff)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10 
11 #include "perf.h"
12 
13 #include "util/build-id.h"
14 #include "util/util.h"
15 #include <subcmd/parse-options.h>
16 #include "util/parse-events.h"
17 #include "util/config.h"
18 
19 #include "util/callchain.h"
20 #include "util/cgroup.h"
21 #include "util/header.h"
22 #include "util/event.h"
23 #include "util/evlist.h"
24 #include "util/evsel.h"
25 #include "util/debug.h"
26 #include "util/drv_configs.h"
27 #include "util/session.h"
28 #include "util/tool.h"
29 #include "util/symbol.h"
30 #include "util/cpumap.h"
31 #include "util/thread_map.h"
32 #include "util/data.h"
33 #include "util/perf_regs.h"
34 #include "util/auxtrace.h"
35 #include "util/tsc.h"
36 #include "util/parse-branch-options.h"
37 #include "util/parse-regs-options.h"
38 #include "util/llvm-utils.h"
39 #include "util/bpf-loader.h"
40 #include "util/trigger.h"
41 #include "util/perf-hooks.h"
42 #include "util/time-utils.h"
43 #include "util/units.h"
44 #include "asm/bug.h"
45 
46 #include <errno.h>
47 #include <inttypes.h>
48 #include <poll.h>
49 #include <unistd.h>
50 #include <sched.h>
51 #include <signal.h>
52 #include <sys/mman.h>
53 #include <sys/wait.h>
54 #include <linux/time64.h>
55 
56 struct switch_output {
57 	bool		 enabled;
58 	bool		 signal;
59 	unsigned long	 size;
60 	unsigned long	 time;
61 	const char	*str;
62 	bool		 set;
63 };
64 
65 struct record {
66 	struct perf_tool	tool;
67 	struct record_opts	opts;
68 	u64			bytes_written;
69 	struct perf_data	data;
70 	struct auxtrace_record	*itr;
71 	struct perf_evlist	*evlist;
72 	struct perf_session	*session;
73 	const char		*progname;
74 	int			realtime_prio;
75 	bool			no_buildid;
76 	bool			no_buildid_set;
77 	bool			no_buildid_cache;
78 	bool			no_buildid_cache_set;
79 	bool			buildid_all;
80 	bool			timestamp_filename;
81 	struct switch_output	switch_output;
82 	unsigned long long	samples;
83 };
84 
85 static volatile int auxtrace_record__snapshot_started;
86 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
87 static DEFINE_TRIGGER(switch_output_trigger);
88 
89 static bool switch_output_signal(struct record *rec)
90 {
91 	return rec->switch_output.signal &&
92 	       trigger_is_ready(&switch_output_trigger);
93 }
94 
95 static bool switch_output_size(struct record *rec)
96 {
97 	return rec->switch_output.size &&
98 	       trigger_is_ready(&switch_output_trigger) &&
99 	       (rec->bytes_written >= rec->switch_output.size);
100 }
101 
102 static bool switch_output_time(struct record *rec)
103 {
104 	return rec->switch_output.time &&
105 	       trigger_is_ready(&switch_output_trigger);
106 }
107 
108 static int record__write(struct record *rec, void *bf, size_t size)
109 {
110 	if (perf_data__write(rec->session->data, bf, size) < 0) {
111 		pr_err("failed to write perf data, error: %m\n");
112 		return -1;
113 	}
114 
115 	rec->bytes_written += size;
116 
117 	if (switch_output_size(rec))
118 		trigger_hit(&switch_output_trigger);
119 
120 	return 0;
121 }
122 
123 static int process_synthesized_event(struct perf_tool *tool,
124 				     union perf_event *event,
125 				     struct perf_sample *sample __maybe_unused,
126 				     struct machine *machine __maybe_unused)
127 {
128 	struct record *rec = container_of(tool, struct record, tool);
129 	return record__write(rec, event, event->header.size);
130 }
131 
132 static int record__pushfn(void *to, void *bf, size_t size)
133 {
134 	struct record *rec = to;
135 
136 	rec->samples++;
137 	return record__write(rec, bf, size);
138 }
139 
140 static volatile int done;
141 static volatile int signr = -1;
142 static volatile int child_finished;
143 
144 static void sig_handler(int sig)
145 {
146 	if (sig == SIGCHLD)
147 		child_finished = 1;
148 	else
149 		signr = sig;
150 
151 	done = 1;
152 }
153 
154 static void sigsegv_handler(int sig)
155 {
156 	perf_hooks__recover();
157 	sighandler_dump_stack(sig);
158 }
159 
160 static void record__sig_exit(void)
161 {
162 	if (signr == -1)
163 		return;
164 
165 	signal(signr, SIG_DFL);
166 	raise(signr);
167 }
168 
169 #ifdef HAVE_AUXTRACE_SUPPORT
170 
171 static int record__process_auxtrace(struct perf_tool *tool,
172 				    union perf_event *event, void *data1,
173 				    size_t len1, void *data2, size_t len2)
174 {
175 	struct record *rec = container_of(tool, struct record, tool);
176 	struct perf_data *data = &rec->data;
177 	size_t padding;
178 	u8 pad[8] = {0};
179 
180 	if (!perf_data__is_pipe(data)) {
181 		off_t file_offset;
182 		int fd = perf_data__fd(data);
183 		int err;
184 
185 		file_offset = lseek(fd, 0, SEEK_CUR);
186 		if (file_offset == -1)
187 			return -1;
188 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
189 						     event, file_offset);
190 		if (err)
191 			return err;
192 	}
193 
194 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
195 	padding = (len1 + len2) & 7;
196 	if (padding)
197 		padding = 8 - padding;
198 
199 	record__write(rec, event, event->header.size);
200 	record__write(rec, data1, len1);
201 	if (len2)
202 		record__write(rec, data2, len2);
203 	record__write(rec, &pad, padding);
204 
205 	return 0;
206 }
207 
208 static int record__auxtrace_mmap_read(struct record *rec,
209 				      struct auxtrace_mmap *mm)
210 {
211 	int ret;
212 
213 	ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
214 				  record__process_auxtrace);
215 	if (ret < 0)
216 		return ret;
217 
218 	if (ret)
219 		rec->samples++;
220 
221 	return 0;
222 }
223 
224 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
225 					       struct auxtrace_mmap *mm)
226 {
227 	int ret;
228 
229 	ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
230 					   record__process_auxtrace,
231 					   rec->opts.auxtrace_snapshot_size);
232 	if (ret < 0)
233 		return ret;
234 
235 	if (ret)
236 		rec->samples++;
237 
238 	return 0;
239 }
240 
241 static int record__auxtrace_read_snapshot_all(struct record *rec)
242 {
243 	int i;
244 	int rc = 0;
245 
246 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
247 		struct auxtrace_mmap *mm =
248 				&rec->evlist->mmap[i].auxtrace_mmap;
249 
250 		if (!mm->base)
251 			continue;
252 
253 		if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
254 			rc = -1;
255 			goto out;
256 		}
257 	}
258 out:
259 	return rc;
260 }
261 
262 static void record__read_auxtrace_snapshot(struct record *rec)
263 {
264 	pr_debug("Recording AUX area tracing snapshot\n");
265 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
266 		trigger_error(&auxtrace_snapshot_trigger);
267 	} else {
268 		if (auxtrace_record__snapshot_finish(rec->itr))
269 			trigger_error(&auxtrace_snapshot_trigger);
270 		else
271 			trigger_ready(&auxtrace_snapshot_trigger);
272 	}
273 }
274 
275 #else
276 
277 static inline
278 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
279 			       struct auxtrace_mmap *mm __maybe_unused)
280 {
281 	return 0;
282 }
283 
284 static inline
285 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
286 {
287 }
288 
289 static inline
290 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
291 {
292 	return 0;
293 }
294 
295 #endif
296 
297 static int record__mmap_evlist(struct record *rec,
298 			       struct perf_evlist *evlist)
299 {
300 	struct record_opts *opts = &rec->opts;
301 	char msg[512];
302 
303 	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
304 				 opts->auxtrace_mmap_pages,
305 				 opts->auxtrace_snapshot_mode) < 0) {
306 		if (errno == EPERM) {
307 			pr_err("Permission error mapping pages.\n"
308 			       "Consider increasing "
309 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
310 			       "or try again with a smaller value of -m/--mmap_pages.\n"
311 			       "(current value: %u,%u)\n",
312 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
313 			return -errno;
314 		} else {
315 			pr_err("failed to mmap with %d (%s)\n", errno,
316 				str_error_r(errno, msg, sizeof(msg)));
317 			if (errno)
318 				return -errno;
319 			else
320 				return -EINVAL;
321 		}
322 	}
323 	return 0;
324 }
325 
326 static int record__mmap(struct record *rec)
327 {
328 	return record__mmap_evlist(rec, rec->evlist);
329 }
330 
331 static int record__open(struct record *rec)
332 {
333 	char msg[BUFSIZ];
334 	struct perf_evsel *pos;
335 	struct perf_evlist *evlist = rec->evlist;
336 	struct perf_session *session = rec->session;
337 	struct record_opts *opts = &rec->opts;
338 	struct perf_evsel_config_term *err_term;
339 	int rc = 0;
340 
341 	/*
342 	 * For initial_delay we need to add a dummy event so that we can track
343 	 * PERF_RECORD_MMAP while we wait for the initial delay to enable the
344 	 * real events, the ones asked by the user.
345 	 */
346 	if (opts->initial_delay) {
347 		if (perf_evlist__add_dummy(evlist))
348 			return -ENOMEM;
349 
350 		pos = perf_evlist__first(evlist);
351 		pos->tracking = 0;
352 		pos = perf_evlist__last(evlist);
353 		pos->tracking = 1;
354 		pos->attr.enable_on_exec = 1;
355 	}
356 
357 	perf_evlist__config(evlist, opts, &callchain_param);
358 
359 	evlist__for_each_entry(evlist, pos) {
360 try_again:
361 		if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
362 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
363 				if (verbose > 0)
364 					ui__warning("%s\n", msg);
365 				goto try_again;
366 			}
367 
368 			rc = -errno;
369 			perf_evsel__open_strerror(pos, &opts->target,
370 						  errno, msg, sizeof(msg));
371 			ui__error("%s\n", msg);
372 			goto out;
373 		}
374 
375 		pos->supported = true;
376 	}
377 
378 	if (perf_evlist__apply_filters(evlist, &pos)) {
379 		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
380 			pos->filter, perf_evsel__name(pos), errno,
381 			str_error_r(errno, msg, sizeof(msg)));
382 		rc = -1;
383 		goto out;
384 	}
385 
386 	if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
387 		pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
388 		      err_term->val.drv_cfg, perf_evsel__name(pos), errno,
389 		      str_error_r(errno, msg, sizeof(msg)));
390 		rc = -1;
391 		goto out;
392 	}
393 
394 	rc = record__mmap(rec);
395 	if (rc)
396 		goto out;
397 
398 	session->evlist = evlist;
399 	perf_session__set_id_hdr_size(session);
400 out:
401 	return rc;
402 }
403 
404 static int process_sample_event(struct perf_tool *tool,
405 				union perf_event *event,
406 				struct perf_sample *sample,
407 				struct perf_evsel *evsel,
408 				struct machine *machine)
409 {
410 	struct record *rec = container_of(tool, struct record, tool);
411 
412 	rec->samples++;
413 
414 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
415 }
416 
417 static int process_buildids(struct record *rec)
418 {
419 	struct perf_data *data = &rec->data;
420 	struct perf_session *session = rec->session;
421 
422 	if (data->size == 0)
423 		return 0;
424 
425 	/*
426 	 * During this process, it'll load kernel map and replace the
427 	 * dso->long_name to a real pathname it found.  In this case
428 	 * we prefer the vmlinux path like
429 	 *   /lib/modules/3.16.4/build/vmlinux
430 	 *
431 	 * rather than build-id path (in debug directory).
432 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
433 	 */
434 	symbol_conf.ignore_vmlinux_buildid = true;
435 
436 	/*
437 	 * If --buildid-all is given, it marks all DSO regardless of hits,
438 	 * so no need to process samples.
439 	 */
440 	if (rec->buildid_all)
441 		rec->tool.sample = NULL;
442 
443 	return perf_session__process_events(session);
444 }
445 
446 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
447 {
448 	int err;
449 	struct perf_tool *tool = data;
450 	/*
451 	 *As for guest kernel when processing subcommand record&report,
452 	 *we arrange module mmap prior to guest kernel mmap and trigger
453 	 *a preload dso because default guest module symbols are loaded
454 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
455 	 *method is used to avoid symbol missing when the first addr is
456 	 *in module instead of in guest kernel.
457 	 */
458 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
459 					     machine);
460 	if (err < 0)
461 		pr_err("Couldn't record guest kernel [%d]'s reference"
462 		       " relocation symbol.\n", machine->pid);
463 
464 	/*
465 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
466 	 * have no _text sometimes.
467 	 */
468 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
469 						 machine);
470 	if (err < 0)
471 		pr_err("Couldn't record guest kernel [%d]'s reference"
472 		       " relocation symbol.\n", machine->pid);
473 }
474 
475 static struct perf_event_header finished_round_event = {
476 	.size = sizeof(struct perf_event_header),
477 	.type = PERF_RECORD_FINISHED_ROUND,
478 };
479 
480 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
481 				    bool overwrite)
482 {
483 	u64 bytes_written = rec->bytes_written;
484 	int i;
485 	int rc = 0;
486 	struct perf_mmap *maps;
487 
488 	if (!evlist)
489 		return 0;
490 
491 	maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
492 	if (!maps)
493 		return 0;
494 
495 	if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
496 		return 0;
497 
498 	for (i = 0; i < evlist->nr_mmaps; i++) {
499 		struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
500 
501 		if (maps[i].base) {
502 			if (perf_mmap__push(&maps[i], overwrite, rec, record__pushfn) != 0) {
503 				rc = -1;
504 				goto out;
505 			}
506 		}
507 
508 		if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
509 		    record__auxtrace_mmap_read(rec, mm) != 0) {
510 			rc = -1;
511 			goto out;
512 		}
513 	}
514 
515 	/*
516 	 * Mark the round finished in case we wrote
517 	 * at least one event.
518 	 */
519 	if (bytes_written != rec->bytes_written)
520 		rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
521 
522 	if (overwrite)
523 		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
524 out:
525 	return rc;
526 }
527 
528 static int record__mmap_read_all(struct record *rec)
529 {
530 	int err;
531 
532 	err = record__mmap_read_evlist(rec, rec->evlist, false);
533 	if (err)
534 		return err;
535 
536 	return record__mmap_read_evlist(rec, rec->evlist, true);
537 }
538 
539 static void record__init_features(struct record *rec)
540 {
541 	struct perf_session *session = rec->session;
542 	int feat;
543 
544 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
545 		perf_header__set_feat(&session->header, feat);
546 
547 	if (rec->no_buildid)
548 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
549 
550 	if (!have_tracepoints(&rec->evlist->entries))
551 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
552 
553 	if (!rec->opts.branch_stack)
554 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
555 
556 	if (!rec->opts.full_auxtrace)
557 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
558 
559 	perf_header__clear_feat(&session->header, HEADER_STAT);
560 }
561 
562 static void
563 record__finish_output(struct record *rec)
564 {
565 	struct perf_data *data = &rec->data;
566 	int fd = perf_data__fd(data);
567 
568 	if (data->is_pipe)
569 		return;
570 
571 	rec->session->header.data_size += rec->bytes_written;
572 	data->size = lseek(perf_data__fd(data), 0, SEEK_CUR);
573 
574 	if (!rec->no_buildid) {
575 		process_buildids(rec);
576 
577 		if (rec->buildid_all)
578 			dsos__hit_all(rec->session);
579 	}
580 	perf_session__write_header(rec->session, rec->evlist, fd, true);
581 
582 	return;
583 }
584 
585 static int record__synthesize_workload(struct record *rec, bool tail)
586 {
587 	int err;
588 	struct thread_map *thread_map;
589 
590 	if (rec->opts.tail_synthesize != tail)
591 		return 0;
592 
593 	thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
594 	if (thread_map == NULL)
595 		return -1;
596 
597 	err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
598 						 process_synthesized_event,
599 						 &rec->session->machines.host,
600 						 rec->opts.sample_address,
601 						 rec->opts.proc_map_timeout);
602 	thread_map__put(thread_map);
603 	return err;
604 }
605 
606 static int record__synthesize(struct record *rec, bool tail);
607 
608 static int
609 record__switch_output(struct record *rec, bool at_exit)
610 {
611 	struct perf_data *data = &rec->data;
612 	int fd, err;
613 
614 	/* Same Size:      "2015122520103046"*/
615 	char timestamp[] = "InvalidTimestamp";
616 
617 	record__synthesize(rec, true);
618 	if (target__none(&rec->opts.target))
619 		record__synthesize_workload(rec, true);
620 
621 	rec->samples = 0;
622 	record__finish_output(rec);
623 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
624 	if (err) {
625 		pr_err("Failed to get current timestamp\n");
626 		return -EINVAL;
627 	}
628 
629 	fd = perf_data__switch(data, timestamp,
630 				    rec->session->header.data_offset,
631 				    at_exit);
632 	if (fd >= 0 && !at_exit) {
633 		rec->bytes_written = 0;
634 		rec->session->header.data_size = 0;
635 	}
636 
637 	if (!quiet)
638 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
639 			data->file.path, timestamp);
640 
641 	/* Output tracking events */
642 	if (!at_exit) {
643 		record__synthesize(rec, false);
644 
645 		/*
646 		 * In 'perf record --switch-output' without -a,
647 		 * record__synthesize() in record__switch_output() won't
648 		 * generate tracking events because there's no thread_map
649 		 * in evlist. Which causes newly created perf.data doesn't
650 		 * contain map and comm information.
651 		 * Create a fake thread_map and directly call
652 		 * perf_event__synthesize_thread_map() for those events.
653 		 */
654 		if (target__none(&rec->opts.target))
655 			record__synthesize_workload(rec, false);
656 	}
657 	return fd;
658 }
659 
660 static volatile int workload_exec_errno;
661 
662 /*
663  * perf_evlist__prepare_workload will send a SIGUSR1
664  * if the fork fails, since we asked by setting its
665  * want_signal to true.
666  */
667 static void workload_exec_failed_signal(int signo __maybe_unused,
668 					siginfo_t *info,
669 					void *ucontext __maybe_unused)
670 {
671 	workload_exec_errno = info->si_value.sival_int;
672 	done = 1;
673 	child_finished = 1;
674 }
675 
676 static void snapshot_sig_handler(int sig);
677 static void alarm_sig_handler(int sig);
678 
679 int __weak
680 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
681 			    struct perf_tool *tool __maybe_unused,
682 			    perf_event__handler_t process __maybe_unused,
683 			    struct machine *machine __maybe_unused)
684 {
685 	return 0;
686 }
687 
688 static const struct perf_event_mmap_page *
689 perf_evlist__pick_pc(struct perf_evlist *evlist)
690 {
691 	if (evlist) {
692 		if (evlist->mmap && evlist->mmap[0].base)
693 			return evlist->mmap[0].base;
694 		if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base)
695 			return evlist->overwrite_mmap[0].base;
696 	}
697 	return NULL;
698 }
699 
700 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
701 {
702 	const struct perf_event_mmap_page *pc;
703 
704 	pc = perf_evlist__pick_pc(rec->evlist);
705 	if (pc)
706 		return pc;
707 	return NULL;
708 }
709 
710 static int record__synthesize(struct record *rec, bool tail)
711 {
712 	struct perf_session *session = rec->session;
713 	struct machine *machine = &session->machines.host;
714 	struct perf_data *data = &rec->data;
715 	struct record_opts *opts = &rec->opts;
716 	struct perf_tool *tool = &rec->tool;
717 	int fd = perf_data__fd(data);
718 	int err = 0;
719 
720 	if (rec->opts.tail_synthesize != tail)
721 		return 0;
722 
723 	if (data->is_pipe) {
724 		err = perf_event__synthesize_features(
725 			tool, session, rec->evlist, process_synthesized_event);
726 		if (err < 0) {
727 			pr_err("Couldn't synthesize features.\n");
728 			return err;
729 		}
730 
731 		err = perf_event__synthesize_attrs(tool, session,
732 						   process_synthesized_event);
733 		if (err < 0) {
734 			pr_err("Couldn't synthesize attrs.\n");
735 			goto out;
736 		}
737 
738 		if (have_tracepoints(&rec->evlist->entries)) {
739 			/*
740 			 * FIXME err <= 0 here actually means that
741 			 * there were no tracepoints so its not really
742 			 * an error, just that we don't need to
743 			 * synthesize anything.  We really have to
744 			 * return this more properly and also
745 			 * propagate errors that now are calling die()
746 			 */
747 			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
748 								  process_synthesized_event);
749 			if (err <= 0) {
750 				pr_err("Couldn't record tracing data.\n");
751 				goto out;
752 			}
753 			rec->bytes_written += err;
754 		}
755 	}
756 
757 	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
758 					  process_synthesized_event, machine);
759 	if (err)
760 		goto out;
761 
762 	if (rec->opts.full_auxtrace) {
763 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
764 					session, process_synthesized_event);
765 		if (err)
766 			goto out;
767 	}
768 
769 	if (!perf_evlist__exclude_kernel(rec->evlist)) {
770 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
771 							 machine);
772 		WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
773 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
774 				   "Check /proc/kallsyms permission or run as root.\n");
775 
776 		err = perf_event__synthesize_modules(tool, process_synthesized_event,
777 						     machine);
778 		WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
779 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
780 				   "Check /proc/modules permission or run as root.\n");
781 	}
782 
783 	if (perf_guest) {
784 		machines__process_guests(&session->machines,
785 					 perf_event__synthesize_guest_os, tool);
786 	}
787 
788 	err = perf_event__synthesize_extra_attr(&rec->tool,
789 						rec->evlist,
790 						process_synthesized_event,
791 						data->is_pipe);
792 	if (err)
793 		goto out;
794 
795 	err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads,
796 						 process_synthesized_event,
797 						NULL);
798 	if (err < 0) {
799 		pr_err("Couldn't synthesize thread map.\n");
800 		return err;
801 	}
802 
803 	err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus,
804 					     process_synthesized_event, NULL);
805 	if (err < 0) {
806 		pr_err("Couldn't synthesize cpu map.\n");
807 		return err;
808 	}
809 
810 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
811 					    process_synthesized_event, opts->sample_address,
812 					    opts->proc_map_timeout, 1);
813 out:
814 	return err;
815 }
816 
817 static int __cmd_record(struct record *rec, int argc, const char **argv)
818 {
819 	int err;
820 	int status = 0;
821 	unsigned long waking = 0;
822 	const bool forks = argc > 0;
823 	struct machine *machine;
824 	struct perf_tool *tool = &rec->tool;
825 	struct record_opts *opts = &rec->opts;
826 	struct perf_data *data = &rec->data;
827 	struct perf_session *session;
828 	bool disabled = false, draining = false;
829 	int fd;
830 
831 	rec->progname = argv[0];
832 
833 	atexit(record__sig_exit);
834 	signal(SIGCHLD, sig_handler);
835 	signal(SIGINT, sig_handler);
836 	signal(SIGTERM, sig_handler);
837 	signal(SIGSEGV, sigsegv_handler);
838 
839 	if (rec->opts.record_namespaces)
840 		tool->namespace_events = true;
841 
842 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
843 		signal(SIGUSR2, snapshot_sig_handler);
844 		if (rec->opts.auxtrace_snapshot_mode)
845 			trigger_on(&auxtrace_snapshot_trigger);
846 		if (rec->switch_output.enabled)
847 			trigger_on(&switch_output_trigger);
848 	} else {
849 		signal(SIGUSR2, SIG_IGN);
850 	}
851 
852 	session = perf_session__new(data, false, tool);
853 	if (session == NULL) {
854 		pr_err("Perf session creation failed.\n");
855 		return -1;
856 	}
857 
858 	fd = perf_data__fd(data);
859 	rec->session = session;
860 
861 	record__init_features(rec);
862 
863 	if (forks) {
864 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
865 						    argv, data->is_pipe,
866 						    workload_exec_failed_signal);
867 		if (err < 0) {
868 			pr_err("Couldn't run the workload!\n");
869 			status = err;
870 			goto out_delete_session;
871 		}
872 	}
873 
874 	if (record__open(rec) != 0) {
875 		err = -1;
876 		goto out_child;
877 	}
878 
879 	err = bpf__apply_obj_config();
880 	if (err) {
881 		char errbuf[BUFSIZ];
882 
883 		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
884 		pr_err("ERROR: Apply config to BPF failed: %s\n",
885 			 errbuf);
886 		goto out_child;
887 	}
888 
889 	/*
890 	 * Normally perf_session__new would do this, but it doesn't have the
891 	 * evlist.
892 	 */
893 	if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
894 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
895 		rec->tool.ordered_events = false;
896 	}
897 
898 	if (!rec->evlist->nr_groups)
899 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
900 
901 	if (data->is_pipe) {
902 		err = perf_header__write_pipe(fd);
903 		if (err < 0)
904 			goto out_child;
905 	} else {
906 		err = perf_session__write_header(session, rec->evlist, fd, false);
907 		if (err < 0)
908 			goto out_child;
909 	}
910 
911 	if (!rec->no_buildid
912 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
913 		pr_err("Couldn't generate buildids. "
914 		       "Use --no-buildid to profile anyway.\n");
915 		err = -1;
916 		goto out_child;
917 	}
918 
919 	machine = &session->machines.host;
920 
921 	err = record__synthesize(rec, false);
922 	if (err < 0)
923 		goto out_child;
924 
925 	if (rec->realtime_prio) {
926 		struct sched_param param;
927 
928 		param.sched_priority = rec->realtime_prio;
929 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
930 			pr_err("Could not set realtime priority.\n");
931 			err = -1;
932 			goto out_child;
933 		}
934 	}
935 
936 	/*
937 	 * When perf is starting the traced process, all the events
938 	 * (apart from group members) have enable_on_exec=1 set,
939 	 * so don't spoil it by prematurely enabling them.
940 	 */
941 	if (!target__none(&opts->target) && !opts->initial_delay)
942 		perf_evlist__enable(rec->evlist);
943 
944 	/*
945 	 * Let the child rip
946 	 */
947 	if (forks) {
948 		union perf_event *event;
949 		pid_t tgid;
950 
951 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
952 		if (event == NULL) {
953 			err = -ENOMEM;
954 			goto out_child;
955 		}
956 
957 		/*
958 		 * Some H/W events are generated before COMM event
959 		 * which is emitted during exec(), so perf script
960 		 * cannot see a correct process name for those events.
961 		 * Synthesize COMM event to prevent it.
962 		 */
963 		tgid = perf_event__synthesize_comm(tool, event,
964 						   rec->evlist->workload.pid,
965 						   process_synthesized_event,
966 						   machine);
967 		free(event);
968 
969 		if (tgid == -1)
970 			goto out_child;
971 
972 		event = malloc(sizeof(event->namespaces) +
973 			       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
974 			       machine->id_hdr_size);
975 		if (event == NULL) {
976 			err = -ENOMEM;
977 			goto out_child;
978 		}
979 
980 		/*
981 		 * Synthesize NAMESPACES event for the command specified.
982 		 */
983 		perf_event__synthesize_namespaces(tool, event,
984 						  rec->evlist->workload.pid,
985 						  tgid, process_synthesized_event,
986 						  machine);
987 		free(event);
988 
989 		perf_evlist__start_workload(rec->evlist);
990 	}
991 
992 	if (opts->initial_delay) {
993 		usleep(opts->initial_delay * USEC_PER_MSEC);
994 		perf_evlist__enable(rec->evlist);
995 	}
996 
997 	trigger_ready(&auxtrace_snapshot_trigger);
998 	trigger_ready(&switch_output_trigger);
999 	perf_hooks__invoke_record_start();
1000 	for (;;) {
1001 		unsigned long long hits = rec->samples;
1002 
1003 		/*
1004 		 * rec->evlist->bkw_mmap_state is possible to be
1005 		 * BKW_MMAP_EMPTY here: when done == true and
1006 		 * hits != rec->samples in previous round.
1007 		 *
1008 		 * perf_evlist__toggle_bkw_mmap ensure we never
1009 		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1010 		 */
1011 		if (trigger_is_hit(&switch_output_trigger) || done || draining)
1012 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1013 
1014 		if (record__mmap_read_all(rec) < 0) {
1015 			trigger_error(&auxtrace_snapshot_trigger);
1016 			trigger_error(&switch_output_trigger);
1017 			err = -1;
1018 			goto out_child;
1019 		}
1020 
1021 		if (auxtrace_record__snapshot_started) {
1022 			auxtrace_record__snapshot_started = 0;
1023 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
1024 				record__read_auxtrace_snapshot(rec);
1025 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1026 				pr_err("AUX area tracing snapshot failed\n");
1027 				err = -1;
1028 				goto out_child;
1029 			}
1030 		}
1031 
1032 		if (trigger_is_hit(&switch_output_trigger)) {
1033 			/*
1034 			 * If switch_output_trigger is hit, the data in
1035 			 * overwritable ring buffer should have been collected,
1036 			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1037 			 *
1038 			 * If SIGUSR2 raise after or during record__mmap_read_all(),
1039 			 * record__mmap_read_all() didn't collect data from
1040 			 * overwritable ring buffer. Read again.
1041 			 */
1042 			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1043 				continue;
1044 			trigger_ready(&switch_output_trigger);
1045 
1046 			/*
1047 			 * Reenable events in overwrite ring buffer after
1048 			 * record__mmap_read_all(): we should have collected
1049 			 * data from it.
1050 			 */
1051 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1052 
1053 			if (!quiet)
1054 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1055 					waking);
1056 			waking = 0;
1057 			fd = record__switch_output(rec, false);
1058 			if (fd < 0) {
1059 				pr_err("Failed to switch to new file\n");
1060 				trigger_error(&switch_output_trigger);
1061 				err = fd;
1062 				goto out_child;
1063 			}
1064 
1065 			/* re-arm the alarm */
1066 			if (rec->switch_output.time)
1067 				alarm(rec->switch_output.time);
1068 		}
1069 
1070 		if (hits == rec->samples) {
1071 			if (done || draining)
1072 				break;
1073 			err = perf_evlist__poll(rec->evlist, -1);
1074 			/*
1075 			 * Propagate error, only if there's any. Ignore positive
1076 			 * number of returned events and interrupt error.
1077 			 */
1078 			if (err > 0 || (err < 0 && errno == EINTR))
1079 				err = 0;
1080 			waking++;
1081 
1082 			if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1083 				draining = true;
1084 		}
1085 
1086 		/*
1087 		 * When perf is starting the traced process, at the end events
1088 		 * die with the process and we wait for that. Thus no need to
1089 		 * disable events in this case.
1090 		 */
1091 		if (done && !disabled && !target__none(&opts->target)) {
1092 			trigger_off(&auxtrace_snapshot_trigger);
1093 			perf_evlist__disable(rec->evlist);
1094 			disabled = true;
1095 		}
1096 	}
1097 	trigger_off(&auxtrace_snapshot_trigger);
1098 	trigger_off(&switch_output_trigger);
1099 
1100 	if (forks && workload_exec_errno) {
1101 		char msg[STRERR_BUFSIZE];
1102 		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1103 		pr_err("Workload failed: %s\n", emsg);
1104 		err = -1;
1105 		goto out_child;
1106 	}
1107 
1108 	if (!quiet)
1109 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1110 
1111 	if (target__none(&rec->opts.target))
1112 		record__synthesize_workload(rec, true);
1113 
1114 out_child:
1115 	if (forks) {
1116 		int exit_status;
1117 
1118 		if (!child_finished)
1119 			kill(rec->evlist->workload.pid, SIGTERM);
1120 
1121 		wait(&exit_status);
1122 
1123 		if (err < 0)
1124 			status = err;
1125 		else if (WIFEXITED(exit_status))
1126 			status = WEXITSTATUS(exit_status);
1127 		else if (WIFSIGNALED(exit_status))
1128 			signr = WTERMSIG(exit_status);
1129 	} else
1130 		status = err;
1131 
1132 	record__synthesize(rec, true);
1133 	/* this will be recalculated during process_buildids() */
1134 	rec->samples = 0;
1135 
1136 	if (!err) {
1137 		if (!rec->timestamp_filename) {
1138 			record__finish_output(rec);
1139 		} else {
1140 			fd = record__switch_output(rec, true);
1141 			if (fd < 0) {
1142 				status = fd;
1143 				goto out_delete_session;
1144 			}
1145 		}
1146 	}
1147 
1148 	perf_hooks__invoke_record_end();
1149 
1150 	if (!err && !quiet) {
1151 		char samples[128];
1152 		const char *postfix = rec->timestamp_filename ?
1153 					".<timestamp>" : "";
1154 
1155 		if (rec->samples && !rec->opts.full_auxtrace)
1156 			scnprintf(samples, sizeof(samples),
1157 				  " (%" PRIu64 " samples)", rec->samples);
1158 		else
1159 			samples[0] = '\0';
1160 
1161 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1162 			perf_data__size(data) / 1024.0 / 1024.0,
1163 			data->file.path, postfix, samples);
1164 	}
1165 
1166 out_delete_session:
1167 	perf_session__delete(session);
1168 	return status;
1169 }
1170 
1171 static void callchain_debug(struct callchain_param *callchain)
1172 {
1173 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1174 
1175 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1176 
1177 	if (callchain->record_mode == CALLCHAIN_DWARF)
1178 		pr_debug("callchain: stack dump size %d\n",
1179 			 callchain->dump_size);
1180 }
1181 
1182 int record_opts__parse_callchain(struct record_opts *record,
1183 				 struct callchain_param *callchain,
1184 				 const char *arg, bool unset)
1185 {
1186 	int ret;
1187 	callchain->enabled = !unset;
1188 
1189 	/* --no-call-graph */
1190 	if (unset) {
1191 		callchain->record_mode = CALLCHAIN_NONE;
1192 		pr_debug("callchain: disabled\n");
1193 		return 0;
1194 	}
1195 
1196 	ret = parse_callchain_record_opt(arg, callchain);
1197 	if (!ret) {
1198 		/* Enable data address sampling for DWARF unwind. */
1199 		if (callchain->record_mode == CALLCHAIN_DWARF)
1200 			record->sample_address = true;
1201 		callchain_debug(callchain);
1202 	}
1203 
1204 	return ret;
1205 }
1206 
1207 int record_parse_callchain_opt(const struct option *opt,
1208 			       const char *arg,
1209 			       int unset)
1210 {
1211 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1212 }
1213 
1214 int record_callchain_opt(const struct option *opt,
1215 			 const char *arg __maybe_unused,
1216 			 int unset __maybe_unused)
1217 {
1218 	struct callchain_param *callchain = opt->value;
1219 
1220 	callchain->enabled = true;
1221 
1222 	if (callchain->record_mode == CALLCHAIN_NONE)
1223 		callchain->record_mode = CALLCHAIN_FP;
1224 
1225 	callchain_debug(callchain);
1226 	return 0;
1227 }
1228 
1229 static int perf_record_config(const char *var, const char *value, void *cb)
1230 {
1231 	struct record *rec = cb;
1232 
1233 	if (!strcmp(var, "record.build-id")) {
1234 		if (!strcmp(value, "cache"))
1235 			rec->no_buildid_cache = false;
1236 		else if (!strcmp(value, "no-cache"))
1237 			rec->no_buildid_cache = true;
1238 		else if (!strcmp(value, "skip"))
1239 			rec->no_buildid = true;
1240 		else
1241 			return -1;
1242 		return 0;
1243 	}
1244 	if (!strcmp(var, "record.call-graph"))
1245 		var = "call-graph.record-mode"; /* fall-through */
1246 
1247 	return perf_default_config(var, value, cb);
1248 }
1249 
1250 struct clockid_map {
1251 	const char *name;
1252 	int clockid;
1253 };
1254 
1255 #define CLOCKID_MAP(n, c)	\
1256 	{ .name = n, .clockid = (c), }
1257 
1258 #define CLOCKID_END	{ .name = NULL, }
1259 
1260 
1261 /*
1262  * Add the missing ones, we need to build on many distros...
1263  */
1264 #ifndef CLOCK_MONOTONIC_RAW
1265 #define CLOCK_MONOTONIC_RAW 4
1266 #endif
1267 #ifndef CLOCK_BOOTTIME
1268 #define CLOCK_BOOTTIME 7
1269 #endif
1270 #ifndef CLOCK_TAI
1271 #define CLOCK_TAI 11
1272 #endif
1273 
1274 static const struct clockid_map clockids[] = {
1275 	/* available for all events, NMI safe */
1276 	CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1277 	CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1278 
1279 	/* available for some events */
1280 	CLOCKID_MAP("realtime", CLOCK_REALTIME),
1281 	CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1282 	CLOCKID_MAP("tai", CLOCK_TAI),
1283 
1284 	/* available for the lazy */
1285 	CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1286 	CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1287 	CLOCKID_MAP("real", CLOCK_REALTIME),
1288 	CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1289 
1290 	CLOCKID_END,
1291 };
1292 
1293 static int parse_clockid(const struct option *opt, const char *str, int unset)
1294 {
1295 	struct record_opts *opts = (struct record_opts *)opt->value;
1296 	const struct clockid_map *cm;
1297 	const char *ostr = str;
1298 
1299 	if (unset) {
1300 		opts->use_clockid = 0;
1301 		return 0;
1302 	}
1303 
1304 	/* no arg passed */
1305 	if (!str)
1306 		return 0;
1307 
1308 	/* no setting it twice */
1309 	if (opts->use_clockid)
1310 		return -1;
1311 
1312 	opts->use_clockid = true;
1313 
1314 	/* if its a number, we're done */
1315 	if (sscanf(str, "%d", &opts->clockid) == 1)
1316 		return 0;
1317 
1318 	/* allow a "CLOCK_" prefix to the name */
1319 	if (!strncasecmp(str, "CLOCK_", 6))
1320 		str += 6;
1321 
1322 	for (cm = clockids; cm->name; cm++) {
1323 		if (!strcasecmp(str, cm->name)) {
1324 			opts->clockid = cm->clockid;
1325 			return 0;
1326 		}
1327 	}
1328 
1329 	opts->use_clockid = false;
1330 	ui__warning("unknown clockid %s, check man page\n", ostr);
1331 	return -1;
1332 }
1333 
1334 static int record__parse_mmap_pages(const struct option *opt,
1335 				    const char *str,
1336 				    int unset __maybe_unused)
1337 {
1338 	struct record_opts *opts = opt->value;
1339 	char *s, *p;
1340 	unsigned int mmap_pages;
1341 	int ret;
1342 
1343 	if (!str)
1344 		return -EINVAL;
1345 
1346 	s = strdup(str);
1347 	if (!s)
1348 		return -ENOMEM;
1349 
1350 	p = strchr(s, ',');
1351 	if (p)
1352 		*p = '\0';
1353 
1354 	if (*s) {
1355 		ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1356 		if (ret)
1357 			goto out_free;
1358 		opts->mmap_pages = mmap_pages;
1359 	}
1360 
1361 	if (!p) {
1362 		ret = 0;
1363 		goto out_free;
1364 	}
1365 
1366 	ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1367 	if (ret)
1368 		goto out_free;
1369 
1370 	opts->auxtrace_mmap_pages = mmap_pages;
1371 
1372 out_free:
1373 	free(s);
1374 	return ret;
1375 }
1376 
1377 static void switch_output_size_warn(struct record *rec)
1378 {
1379 	u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1380 	struct switch_output *s = &rec->switch_output;
1381 
1382 	wakeup_size /= 2;
1383 
1384 	if (s->size < wakeup_size) {
1385 		char buf[100];
1386 
1387 		unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1388 		pr_warning("WARNING: switch-output data size lower than "
1389 			   "wakeup kernel buffer size (%s) "
1390 			   "expect bigger perf.data sizes\n", buf);
1391 	}
1392 }
1393 
1394 static int switch_output_setup(struct record *rec)
1395 {
1396 	struct switch_output *s = &rec->switch_output;
1397 	static struct parse_tag tags_size[] = {
1398 		{ .tag  = 'B', .mult = 1       },
1399 		{ .tag  = 'K', .mult = 1 << 10 },
1400 		{ .tag  = 'M', .mult = 1 << 20 },
1401 		{ .tag  = 'G', .mult = 1 << 30 },
1402 		{ .tag  = 0 },
1403 	};
1404 	static struct parse_tag tags_time[] = {
1405 		{ .tag  = 's', .mult = 1        },
1406 		{ .tag  = 'm', .mult = 60       },
1407 		{ .tag  = 'h', .mult = 60*60    },
1408 		{ .tag  = 'd', .mult = 60*60*24 },
1409 		{ .tag  = 0 },
1410 	};
1411 	unsigned long val;
1412 
1413 	if (!s->set)
1414 		return 0;
1415 
1416 	if (!strcmp(s->str, "signal")) {
1417 		s->signal = true;
1418 		pr_debug("switch-output with SIGUSR2 signal\n");
1419 		goto enabled;
1420 	}
1421 
1422 	val = parse_tag_value(s->str, tags_size);
1423 	if (val != (unsigned long) -1) {
1424 		s->size = val;
1425 		pr_debug("switch-output with %s size threshold\n", s->str);
1426 		goto enabled;
1427 	}
1428 
1429 	val = parse_tag_value(s->str, tags_time);
1430 	if (val != (unsigned long) -1) {
1431 		s->time = val;
1432 		pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1433 			 s->str, s->time);
1434 		goto enabled;
1435 	}
1436 
1437 	return -1;
1438 
1439 enabled:
1440 	rec->timestamp_filename = true;
1441 	s->enabled              = true;
1442 
1443 	if (s->size && !rec->opts.no_buffering)
1444 		switch_output_size_warn(rec);
1445 
1446 	return 0;
1447 }
1448 
1449 static const char * const __record_usage[] = {
1450 	"perf record [<options>] [<command>]",
1451 	"perf record [<options>] -- <command> [<options>]",
1452 	NULL
1453 };
1454 const char * const *record_usage = __record_usage;
1455 
1456 /*
1457  * XXX Ideally would be local to cmd_record() and passed to a record__new
1458  * because we need to have access to it in record__exit, that is called
1459  * after cmd_record() exits, but since record_options need to be accessible to
1460  * builtin-script, leave it here.
1461  *
1462  * At least we don't ouch it in all the other functions here directly.
1463  *
1464  * Just say no to tons of global variables, sigh.
1465  */
1466 static struct record record = {
1467 	.opts = {
1468 		.sample_time	     = true,
1469 		.mmap_pages	     = UINT_MAX,
1470 		.user_freq	     = UINT_MAX,
1471 		.user_interval	     = ULLONG_MAX,
1472 		.freq		     = 4000,
1473 		.target		     = {
1474 			.uses_mmap   = true,
1475 			.default_per_cpu = true,
1476 		},
1477 		.proc_map_timeout     = 500,
1478 	},
1479 	.tool = {
1480 		.sample		= process_sample_event,
1481 		.fork		= perf_event__process_fork,
1482 		.exit		= perf_event__process_exit,
1483 		.comm		= perf_event__process_comm,
1484 		.namespaces	= perf_event__process_namespaces,
1485 		.mmap		= perf_event__process_mmap,
1486 		.mmap2		= perf_event__process_mmap2,
1487 		.ordered_events	= true,
1488 	},
1489 };
1490 
1491 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1492 	"\n\t\t\t\tDefault: fp";
1493 
1494 static bool dry_run;
1495 
1496 /*
1497  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1498  * with it and switch to use the library functions in perf_evlist that came
1499  * from builtin-record.c, i.e. use record_opts,
1500  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1501  * using pipes, etc.
1502  */
1503 static struct option __record_options[] = {
1504 	OPT_CALLBACK('e', "event", &record.evlist, "event",
1505 		     "event selector. use 'perf list' to list available events",
1506 		     parse_events_option),
1507 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1508 		     "event filter", parse_filter),
1509 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1510 			   NULL, "don't record events from perf itself",
1511 			   exclude_perf),
1512 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1513 		    "record events on existing process id"),
1514 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1515 		    "record events on existing thread id"),
1516 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
1517 		    "collect data with this RT SCHED_FIFO priority"),
1518 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1519 		    "collect data without buffering"),
1520 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1521 		    "collect raw sample records from all opened counters"),
1522 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1523 			    "system-wide collection from all CPUs"),
1524 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1525 		    "list of cpus to monitor"),
1526 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1527 	OPT_STRING('o', "output", &record.data.file.path, "file",
1528 		    "output file name"),
1529 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1530 			&record.opts.no_inherit_set,
1531 			"child tasks do not inherit counters"),
1532 	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1533 		    "synthesize non-sample events at the end of output"),
1534 	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1535 	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1536 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1537 		     "number of mmap data pages and AUX area tracing mmap pages",
1538 		     record__parse_mmap_pages),
1539 	OPT_BOOLEAN(0, "group", &record.opts.group,
1540 		    "put the counters into a counter group"),
1541 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1542 			   NULL, "enables call-graph recording" ,
1543 			   &record_callchain_opt),
1544 	OPT_CALLBACK(0, "call-graph", &record.opts,
1545 		     "record_mode[,record_size]", record_callchain_help,
1546 		     &record_parse_callchain_opt),
1547 	OPT_INCR('v', "verbose", &verbose,
1548 		    "be more verbose (show counter open errors, etc)"),
1549 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1550 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1551 		    "per thread counts"),
1552 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1553 	OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
1554 		    "Record the sample physical addresses"),
1555 	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1556 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1557 			&record.opts.sample_time_set,
1558 			"Record the sample timestamps"),
1559 	OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
1560 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1561 		    "don't sample"),
1562 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1563 			&record.no_buildid_cache_set,
1564 			"do not update the buildid cache"),
1565 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1566 			&record.no_buildid_set,
1567 			"do not collect buildids in perf.data"),
1568 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1569 		     "monitor event in cgroup name only",
1570 		     parse_cgroups),
1571 	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1572 		  "ms to wait before starting measurement after program start"),
1573 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1574 		   "user to profile"),
1575 
1576 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1577 		     "branch any", "sample any taken branches",
1578 		     parse_branch_stack),
1579 
1580 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1581 		     "branch filter mask", "branch stack filter modes",
1582 		     parse_branch_stack),
1583 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1584 		    "sample by weight (on special events only)"),
1585 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1586 		    "sample transaction flags (special events only)"),
1587 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1588 		    "use per-thread mmaps"),
1589 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1590 		    "sample selected machine registers on interrupt,"
1591 		    " use -I ? to list register names", parse_regs),
1592 	OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
1593 		    "sample selected machine registers on interrupt,"
1594 		    " use -I ? to list register names", parse_regs),
1595 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1596 		    "Record running/enabled time of read (:S) events"),
1597 	OPT_CALLBACK('k', "clockid", &record.opts,
1598 	"clockid", "clockid to use for events, see clock_gettime()",
1599 	parse_clockid),
1600 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1601 			  "opts", "AUX area tracing Snapshot Mode", ""),
1602 	OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1603 			"per thread proc mmap processing timeout in ms"),
1604 	OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
1605 		    "Record namespaces events"),
1606 	OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1607 		    "Record context switch events"),
1608 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1609 			 "Configure all used events to run in kernel space.",
1610 			 PARSE_OPT_EXCLUSIVE),
1611 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1612 			 "Configure all used events to run in user space.",
1613 			 PARSE_OPT_EXCLUSIVE),
1614 	OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1615 		   "clang binary to use for compiling BPF scriptlets"),
1616 	OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1617 		   "options passed to clang when compiling BPF scriptlets"),
1618 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1619 		   "file", "vmlinux pathname"),
1620 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1621 		    "Record build-id of all DSOs regardless of hits"),
1622 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1623 		    "append timestamp to output filename"),
1624 	OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
1625 			  &record.switch_output.set, "signal,size,time",
1626 			  "Switch output when receive SIGUSR2 or cross size,time threshold",
1627 			  "signal"),
1628 	OPT_BOOLEAN(0, "dry-run", &dry_run,
1629 		    "Parse options then exit"),
1630 	OPT_END()
1631 };
1632 
1633 struct option *record_options = __record_options;
1634 
1635 int cmd_record(int argc, const char **argv)
1636 {
1637 	int err;
1638 	struct record *rec = &record;
1639 	char errbuf[BUFSIZ];
1640 
1641 #ifndef HAVE_LIBBPF_SUPPORT
1642 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1643 	set_nobuild('\0', "clang-path", true);
1644 	set_nobuild('\0', "clang-opt", true);
1645 # undef set_nobuild
1646 #endif
1647 
1648 #ifndef HAVE_BPF_PROLOGUE
1649 # if !defined (HAVE_DWARF_SUPPORT)
1650 #  define REASON  "NO_DWARF=1"
1651 # elif !defined (HAVE_LIBBPF_SUPPORT)
1652 #  define REASON  "NO_LIBBPF=1"
1653 # else
1654 #  define REASON  "this architecture doesn't support BPF prologue"
1655 # endif
1656 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1657 	set_nobuild('\0', "vmlinux", true);
1658 # undef set_nobuild
1659 # undef REASON
1660 #endif
1661 
1662 	rec->evlist = perf_evlist__new();
1663 	if (rec->evlist == NULL)
1664 		return -ENOMEM;
1665 
1666 	err = perf_config(perf_record_config, rec);
1667 	if (err)
1668 		return err;
1669 
1670 	argc = parse_options(argc, argv, record_options, record_usage,
1671 			    PARSE_OPT_STOP_AT_NON_OPTION);
1672 	if (quiet)
1673 		perf_quiet_option();
1674 
1675 	/* Make system wide (-a) the default target. */
1676 	if (!argc && target__none(&rec->opts.target))
1677 		rec->opts.target.system_wide = true;
1678 
1679 	if (nr_cgroups && !rec->opts.target.system_wide) {
1680 		usage_with_options_msg(record_usage, record_options,
1681 			"cgroup monitoring only available in system-wide mode");
1682 
1683 	}
1684 	if (rec->opts.record_switch_events &&
1685 	    !perf_can_record_switch_events()) {
1686 		ui__error("kernel does not support recording context switch events\n");
1687 		parse_options_usage(record_usage, record_options, "switch-events", 0);
1688 		return -EINVAL;
1689 	}
1690 
1691 	if (switch_output_setup(rec)) {
1692 		parse_options_usage(record_usage, record_options, "switch-output", 0);
1693 		return -EINVAL;
1694 	}
1695 
1696 	if (rec->switch_output.time) {
1697 		signal(SIGALRM, alarm_sig_handler);
1698 		alarm(rec->switch_output.time);
1699 	}
1700 
1701 	if (!rec->itr) {
1702 		rec->itr = auxtrace_record__init(rec->evlist, &err);
1703 		if (err)
1704 			goto out;
1705 	}
1706 
1707 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1708 					      rec->opts.auxtrace_snapshot_opts);
1709 	if (err)
1710 		goto out;
1711 
1712 	/*
1713 	 * Allow aliases to facilitate the lookup of symbols for address
1714 	 * filters. Refer to auxtrace_parse_filters().
1715 	 */
1716 	symbol_conf.allow_aliases = true;
1717 
1718 	symbol__init(NULL);
1719 
1720 	err = auxtrace_parse_filters(rec->evlist);
1721 	if (err)
1722 		goto out;
1723 
1724 	if (dry_run)
1725 		goto out;
1726 
1727 	err = bpf__setup_stdout(rec->evlist);
1728 	if (err) {
1729 		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1730 		pr_err("ERROR: Setup BPF stdout failed: %s\n",
1731 			 errbuf);
1732 		goto out;
1733 	}
1734 
1735 	err = -ENOMEM;
1736 
1737 	if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist))
1738 		pr_warning(
1739 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1740 "check /proc/sys/kernel/kptr_restrict.\n\n"
1741 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1742 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1743 "Samples in kernel modules won't be resolved at all.\n\n"
1744 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1745 "even with a suitable vmlinux or kallsyms file.\n\n");
1746 
1747 	if (rec->no_buildid_cache || rec->no_buildid) {
1748 		disable_buildid_cache();
1749 	} else if (rec->switch_output.enabled) {
1750 		/*
1751 		 * In 'perf record --switch-output', disable buildid
1752 		 * generation by default to reduce data file switching
1753 		 * overhead. Still generate buildid if they are required
1754 		 * explicitly using
1755 		 *
1756 		 *  perf record --switch-output --no-no-buildid \
1757 		 *              --no-no-buildid-cache
1758 		 *
1759 		 * Following code equals to:
1760 		 *
1761 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
1762 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1763 		 *         disable_buildid_cache();
1764 		 */
1765 		bool disable = true;
1766 
1767 		if (rec->no_buildid_set && !rec->no_buildid)
1768 			disable = false;
1769 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1770 			disable = false;
1771 		if (disable) {
1772 			rec->no_buildid = true;
1773 			rec->no_buildid_cache = true;
1774 			disable_buildid_cache();
1775 		}
1776 	}
1777 
1778 	if (record.opts.overwrite)
1779 		record.opts.tail_synthesize = true;
1780 
1781 	if (rec->evlist->nr_entries == 0 &&
1782 	    __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
1783 		pr_err("Not enough memory for event selector list\n");
1784 		goto out;
1785 	}
1786 
1787 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1788 		rec->opts.no_inherit = true;
1789 
1790 	err = target__validate(&rec->opts.target);
1791 	if (err) {
1792 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1793 		ui__warning("%s", errbuf);
1794 	}
1795 
1796 	err = target__parse_uid(&rec->opts.target);
1797 	if (err) {
1798 		int saved_errno = errno;
1799 
1800 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1801 		ui__error("%s", errbuf);
1802 
1803 		err = -saved_errno;
1804 		goto out;
1805 	}
1806 
1807 	/* Enable ignoring missing threads when -u/-p option is defined. */
1808 	rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
1809 
1810 	err = -ENOMEM;
1811 	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1812 		usage_with_options(record_usage, record_options);
1813 
1814 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1815 	if (err)
1816 		goto out;
1817 
1818 	/*
1819 	 * We take all buildids when the file contains
1820 	 * AUX area tracing data because we do not decode the
1821 	 * trace because it would take too long.
1822 	 */
1823 	if (rec->opts.full_auxtrace)
1824 		rec->buildid_all = true;
1825 
1826 	if (record_opts__config(&rec->opts)) {
1827 		err = -EINVAL;
1828 		goto out;
1829 	}
1830 
1831 	err = __cmd_record(&record, argc, argv);
1832 out:
1833 	perf_evlist__delete(rec->evlist);
1834 	symbol__exit();
1835 	auxtrace_record__free(rec->itr);
1836 	return err;
1837 }
1838 
1839 static void snapshot_sig_handler(int sig __maybe_unused)
1840 {
1841 	struct record *rec = &record;
1842 
1843 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1844 		trigger_hit(&auxtrace_snapshot_trigger);
1845 		auxtrace_record__snapshot_started = 1;
1846 		if (auxtrace_record__snapshot_start(record.itr))
1847 			trigger_error(&auxtrace_snapshot_trigger);
1848 	}
1849 
1850 	if (switch_output_signal(rec))
1851 		trigger_hit(&switch_output_trigger);
1852 }
1853 
1854 static void alarm_sig_handler(int sig __maybe_unused)
1855 {
1856 	struct record *rec = &record;
1857 
1858 	if (switch_output_time(rec))
1859 		trigger_hit(&switch_output_trigger);
1860 }
1861