xref: /linux/tools/perf/builtin-record.c (revision 0d08df6c493898e679d9c517e77ea95c063d40ec)
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9 
10 #include "perf.h"
11 
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include <subcmd/parse-options.h>
15 #include "util/parse-events.h"
16 
17 #include "util/callchain.h"
18 #include "util/cgroup.h"
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/symbol.h"
27 #include "util/cpumap.h"
28 #include "util/thread_map.h"
29 #include "util/data.h"
30 #include "util/perf_regs.h"
31 #include "util/auxtrace.h"
32 #include "util/tsc.h"
33 #include "util/parse-branch-options.h"
34 #include "util/parse-regs-options.h"
35 #include "util/llvm-utils.h"
36 #include "util/bpf-loader.h"
37 #include "util/trigger.h"
38 #include "asm/bug.h"
39 
40 #include <unistd.h>
41 #include <sched.h>
42 #include <sys/mman.h>
43 
44 
45 struct record {
46 	struct perf_tool	tool;
47 	struct record_opts	opts;
48 	u64			bytes_written;
49 	struct perf_data_file	file;
50 	struct auxtrace_record	*itr;
51 	struct perf_evlist	*evlist;
52 	struct perf_session	*session;
53 	const char		*progname;
54 	int			realtime_prio;
55 	bool			no_buildid;
56 	bool			no_buildid_set;
57 	bool			no_buildid_cache;
58 	bool			no_buildid_cache_set;
59 	bool			buildid_all;
60 	bool			timestamp_filename;
61 	bool			switch_output;
62 	unsigned long long	samples;
63 };
64 
65 static int record__write(struct record *rec, void *bf, size_t size)
66 {
67 	if (perf_data_file__write(rec->session->file, bf, size) < 0) {
68 		pr_err("failed to write perf data, error: %m\n");
69 		return -1;
70 	}
71 
72 	rec->bytes_written += size;
73 	return 0;
74 }
75 
76 static int process_synthesized_event(struct perf_tool *tool,
77 				     union perf_event *event,
78 				     struct perf_sample *sample __maybe_unused,
79 				     struct machine *machine __maybe_unused)
80 {
81 	struct record *rec = container_of(tool, struct record, tool);
82 	return record__write(rec, event, event->header.size);
83 }
84 
85 static int record__mmap_read(struct record *rec, int idx)
86 {
87 	struct perf_mmap *md = &rec->evlist->mmap[idx];
88 	u64 head = perf_mmap__read_head(md);
89 	u64 old = md->prev;
90 	unsigned char *data = md->base + page_size;
91 	unsigned long size;
92 	void *buf;
93 	int rc = 0;
94 
95 	if (old == head)
96 		return 0;
97 
98 	rec->samples++;
99 
100 	size = head - old;
101 
102 	if ((old & md->mask) + size != (head & md->mask)) {
103 		buf = &data[old & md->mask];
104 		size = md->mask + 1 - (old & md->mask);
105 		old += size;
106 
107 		if (record__write(rec, buf, size) < 0) {
108 			rc = -1;
109 			goto out;
110 		}
111 	}
112 
113 	buf = &data[old & md->mask];
114 	size = head - old;
115 	old += size;
116 
117 	if (record__write(rec, buf, size) < 0) {
118 		rc = -1;
119 		goto out;
120 	}
121 
122 	md->prev = old;
123 	perf_evlist__mmap_consume(rec->evlist, idx);
124 out:
125 	return rc;
126 }
127 
128 static volatile int done;
129 static volatile int signr = -1;
130 static volatile int child_finished;
131 
132 static volatile int auxtrace_record__snapshot_started;
133 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
134 static DEFINE_TRIGGER(switch_output_trigger);
135 
136 static void sig_handler(int sig)
137 {
138 	if (sig == SIGCHLD)
139 		child_finished = 1;
140 	else
141 		signr = sig;
142 
143 	done = 1;
144 }
145 
146 static void record__sig_exit(void)
147 {
148 	if (signr == -1)
149 		return;
150 
151 	signal(signr, SIG_DFL);
152 	raise(signr);
153 }
154 
155 #ifdef HAVE_AUXTRACE_SUPPORT
156 
157 static int record__process_auxtrace(struct perf_tool *tool,
158 				    union perf_event *event, void *data1,
159 				    size_t len1, void *data2, size_t len2)
160 {
161 	struct record *rec = container_of(tool, struct record, tool);
162 	struct perf_data_file *file = &rec->file;
163 	size_t padding;
164 	u8 pad[8] = {0};
165 
166 	if (!perf_data_file__is_pipe(file)) {
167 		off_t file_offset;
168 		int fd = perf_data_file__fd(file);
169 		int err;
170 
171 		file_offset = lseek(fd, 0, SEEK_CUR);
172 		if (file_offset == -1)
173 			return -1;
174 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
175 						     event, file_offset);
176 		if (err)
177 			return err;
178 	}
179 
180 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
181 	padding = (len1 + len2) & 7;
182 	if (padding)
183 		padding = 8 - padding;
184 
185 	record__write(rec, event, event->header.size);
186 	record__write(rec, data1, len1);
187 	if (len2)
188 		record__write(rec, data2, len2);
189 	record__write(rec, &pad, padding);
190 
191 	return 0;
192 }
193 
194 static int record__auxtrace_mmap_read(struct record *rec,
195 				      struct auxtrace_mmap *mm)
196 {
197 	int ret;
198 
199 	ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
200 				  record__process_auxtrace);
201 	if (ret < 0)
202 		return ret;
203 
204 	if (ret)
205 		rec->samples++;
206 
207 	return 0;
208 }
209 
210 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
211 					       struct auxtrace_mmap *mm)
212 {
213 	int ret;
214 
215 	ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
216 					   record__process_auxtrace,
217 					   rec->opts.auxtrace_snapshot_size);
218 	if (ret < 0)
219 		return ret;
220 
221 	if (ret)
222 		rec->samples++;
223 
224 	return 0;
225 }
226 
227 static int record__auxtrace_read_snapshot_all(struct record *rec)
228 {
229 	int i;
230 	int rc = 0;
231 
232 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
233 		struct auxtrace_mmap *mm =
234 				&rec->evlist->mmap[i].auxtrace_mmap;
235 
236 		if (!mm->base)
237 			continue;
238 
239 		if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
240 			rc = -1;
241 			goto out;
242 		}
243 	}
244 out:
245 	return rc;
246 }
247 
248 static void record__read_auxtrace_snapshot(struct record *rec)
249 {
250 	pr_debug("Recording AUX area tracing snapshot\n");
251 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
252 		trigger_error(&auxtrace_snapshot_trigger);
253 	} else {
254 		if (auxtrace_record__snapshot_finish(rec->itr))
255 			trigger_error(&auxtrace_snapshot_trigger);
256 		else
257 			trigger_ready(&auxtrace_snapshot_trigger);
258 	}
259 }
260 
261 #else
262 
263 static inline
264 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
265 			       struct auxtrace_mmap *mm __maybe_unused)
266 {
267 	return 0;
268 }
269 
270 static inline
271 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
272 {
273 }
274 
275 static inline
276 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
277 {
278 	return 0;
279 }
280 
281 #endif
282 
283 static int record__open(struct record *rec)
284 {
285 	char msg[512];
286 	struct perf_evsel *pos;
287 	struct perf_evlist *evlist = rec->evlist;
288 	struct perf_session *session = rec->session;
289 	struct record_opts *opts = &rec->opts;
290 	int rc = 0;
291 
292 	perf_evlist__config(evlist, opts, &callchain_param);
293 
294 	evlist__for_each(evlist, pos) {
295 try_again:
296 		if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
297 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
298 				if (verbose)
299 					ui__warning("%s\n", msg);
300 				goto try_again;
301 			}
302 
303 			rc = -errno;
304 			perf_evsel__open_strerror(pos, &opts->target,
305 						  errno, msg, sizeof(msg));
306 			ui__error("%s\n", msg);
307 			goto out;
308 		}
309 	}
310 
311 	if (perf_evlist__apply_filters(evlist, &pos)) {
312 		error("failed to set filter \"%s\" on event %s with %d (%s)\n",
313 			pos->filter, perf_evsel__name(pos), errno,
314 			strerror_r(errno, msg, sizeof(msg)));
315 		rc = -1;
316 		goto out;
317 	}
318 
319 	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
320 				 opts->auxtrace_mmap_pages,
321 				 opts->auxtrace_snapshot_mode) < 0) {
322 		if (errno == EPERM) {
323 			pr_err("Permission error mapping pages.\n"
324 			       "Consider increasing "
325 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
326 			       "or try again with a smaller value of -m/--mmap_pages.\n"
327 			       "(current value: %u,%u)\n",
328 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
329 			rc = -errno;
330 		} else {
331 			pr_err("failed to mmap with %d (%s)\n", errno,
332 				strerror_r(errno, msg, sizeof(msg)));
333 			if (errno)
334 				rc = -errno;
335 			else
336 				rc = -EINVAL;
337 		}
338 		goto out;
339 	}
340 
341 	session->evlist = evlist;
342 	perf_session__set_id_hdr_size(session);
343 out:
344 	return rc;
345 }
346 
347 static int process_sample_event(struct perf_tool *tool,
348 				union perf_event *event,
349 				struct perf_sample *sample,
350 				struct perf_evsel *evsel,
351 				struct machine *machine)
352 {
353 	struct record *rec = container_of(tool, struct record, tool);
354 
355 	rec->samples++;
356 
357 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
358 }
359 
360 static int process_buildids(struct record *rec)
361 {
362 	struct perf_data_file *file  = &rec->file;
363 	struct perf_session *session = rec->session;
364 
365 	if (file->size == 0)
366 		return 0;
367 
368 	/*
369 	 * During this process, it'll load kernel map and replace the
370 	 * dso->long_name to a real pathname it found.  In this case
371 	 * we prefer the vmlinux path like
372 	 *   /lib/modules/3.16.4/build/vmlinux
373 	 *
374 	 * rather than build-id path (in debug directory).
375 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
376 	 */
377 	symbol_conf.ignore_vmlinux_buildid = true;
378 
379 	/*
380 	 * If --buildid-all is given, it marks all DSO regardless of hits,
381 	 * so no need to process samples.
382 	 */
383 	if (rec->buildid_all)
384 		rec->tool.sample = NULL;
385 
386 	return perf_session__process_events(session);
387 }
388 
389 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
390 {
391 	int err;
392 	struct perf_tool *tool = data;
393 	/*
394 	 *As for guest kernel when processing subcommand record&report,
395 	 *we arrange module mmap prior to guest kernel mmap and trigger
396 	 *a preload dso because default guest module symbols are loaded
397 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
398 	 *method is used to avoid symbol missing when the first addr is
399 	 *in module instead of in guest kernel.
400 	 */
401 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
402 					     machine);
403 	if (err < 0)
404 		pr_err("Couldn't record guest kernel [%d]'s reference"
405 		       " relocation symbol.\n", machine->pid);
406 
407 	/*
408 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
409 	 * have no _text sometimes.
410 	 */
411 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
412 						 machine);
413 	if (err < 0)
414 		pr_err("Couldn't record guest kernel [%d]'s reference"
415 		       " relocation symbol.\n", machine->pid);
416 }
417 
418 static struct perf_event_header finished_round_event = {
419 	.size = sizeof(struct perf_event_header),
420 	.type = PERF_RECORD_FINISHED_ROUND,
421 };
422 
423 static int record__mmap_read_all(struct record *rec)
424 {
425 	u64 bytes_written = rec->bytes_written;
426 	int i;
427 	int rc = 0;
428 
429 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
430 		struct auxtrace_mmap *mm = &rec->evlist->mmap[i].auxtrace_mmap;
431 
432 		if (rec->evlist->mmap[i].base) {
433 			if (record__mmap_read(rec, i) != 0) {
434 				rc = -1;
435 				goto out;
436 			}
437 		}
438 
439 		if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
440 		    record__auxtrace_mmap_read(rec, mm) != 0) {
441 			rc = -1;
442 			goto out;
443 		}
444 	}
445 
446 	/*
447 	 * Mark the round finished in case we wrote
448 	 * at least one event.
449 	 */
450 	if (bytes_written != rec->bytes_written)
451 		rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
452 
453 out:
454 	return rc;
455 }
456 
457 static void record__init_features(struct record *rec)
458 {
459 	struct perf_session *session = rec->session;
460 	int feat;
461 
462 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
463 		perf_header__set_feat(&session->header, feat);
464 
465 	if (rec->no_buildid)
466 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
467 
468 	if (!have_tracepoints(&rec->evlist->entries))
469 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
470 
471 	if (!rec->opts.branch_stack)
472 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
473 
474 	if (!rec->opts.full_auxtrace)
475 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
476 
477 	perf_header__clear_feat(&session->header, HEADER_STAT);
478 }
479 
480 static void
481 record__finish_output(struct record *rec)
482 {
483 	struct perf_data_file *file = &rec->file;
484 	int fd = perf_data_file__fd(file);
485 
486 	if (file->is_pipe)
487 		return;
488 
489 	rec->session->header.data_size += rec->bytes_written;
490 	file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
491 
492 	if (!rec->no_buildid) {
493 		process_buildids(rec);
494 
495 		if (rec->buildid_all)
496 			dsos__hit_all(rec->session);
497 	}
498 	perf_session__write_header(rec->session, rec->evlist, fd, true);
499 
500 	return;
501 }
502 
503 static int record__synthesize_workload(struct record *rec)
504 {
505 	struct {
506 		struct thread_map map;
507 		struct thread_map_data map_data;
508 	} thread_map;
509 
510 	thread_map.map.nr = 1;
511 	thread_map.map.map[0].pid = rec->evlist->workload.pid;
512 	thread_map.map.map[0].comm = NULL;
513 	return perf_event__synthesize_thread_map(&rec->tool, &thread_map.map,
514 						 process_synthesized_event,
515 						 &rec->session->machines.host,
516 						 rec->opts.sample_address,
517 						 rec->opts.proc_map_timeout);
518 }
519 
520 static int record__synthesize(struct record *rec);
521 
522 static int
523 record__switch_output(struct record *rec, bool at_exit)
524 {
525 	struct perf_data_file *file = &rec->file;
526 	int fd, err;
527 
528 	/* Same Size:      "2015122520103046"*/
529 	char timestamp[] = "InvalidTimestamp";
530 
531 	rec->samples = 0;
532 	record__finish_output(rec);
533 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
534 	if (err) {
535 		pr_err("Failed to get current timestamp\n");
536 		return -EINVAL;
537 	}
538 
539 	fd = perf_data_file__switch(file, timestamp,
540 				    rec->session->header.data_offset,
541 				    at_exit);
542 	if (fd >= 0 && !at_exit) {
543 		rec->bytes_written = 0;
544 		rec->session->header.data_size = 0;
545 	}
546 
547 	if (!quiet)
548 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
549 			file->path, timestamp);
550 
551 	/* Output tracking events */
552 	if (!at_exit) {
553 		record__synthesize(rec);
554 
555 		/*
556 		 * In 'perf record --switch-output' without -a,
557 		 * record__synthesize() in record__switch_output() won't
558 		 * generate tracking events because there's no thread_map
559 		 * in evlist. Which causes newly created perf.data doesn't
560 		 * contain map and comm information.
561 		 * Create a fake thread_map and directly call
562 		 * perf_event__synthesize_thread_map() for those events.
563 		 */
564 		if (target__none(&rec->opts.target))
565 			record__synthesize_workload(rec);
566 	}
567 	return fd;
568 }
569 
570 static volatile int workload_exec_errno;
571 
572 /*
573  * perf_evlist__prepare_workload will send a SIGUSR1
574  * if the fork fails, since we asked by setting its
575  * want_signal to true.
576  */
577 static void workload_exec_failed_signal(int signo __maybe_unused,
578 					siginfo_t *info,
579 					void *ucontext __maybe_unused)
580 {
581 	workload_exec_errno = info->si_value.sival_int;
582 	done = 1;
583 	child_finished = 1;
584 }
585 
586 static void snapshot_sig_handler(int sig);
587 
588 int __weak
589 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
590 			    struct perf_tool *tool __maybe_unused,
591 			    perf_event__handler_t process __maybe_unused,
592 			    struct machine *machine __maybe_unused)
593 {
594 	return 0;
595 }
596 
597 static int record__synthesize(struct record *rec)
598 {
599 	struct perf_session *session = rec->session;
600 	struct machine *machine = &session->machines.host;
601 	struct perf_data_file *file = &rec->file;
602 	struct record_opts *opts = &rec->opts;
603 	struct perf_tool *tool = &rec->tool;
604 	int fd = perf_data_file__fd(file);
605 	int err = 0;
606 
607 	if (file->is_pipe) {
608 		err = perf_event__synthesize_attrs(tool, session,
609 						   process_synthesized_event);
610 		if (err < 0) {
611 			pr_err("Couldn't synthesize attrs.\n");
612 			goto out;
613 		}
614 
615 		if (have_tracepoints(&rec->evlist->entries)) {
616 			/*
617 			 * FIXME err <= 0 here actually means that
618 			 * there were no tracepoints so its not really
619 			 * an error, just that we don't need to
620 			 * synthesize anything.  We really have to
621 			 * return this more properly and also
622 			 * propagate errors that now are calling die()
623 			 */
624 			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
625 								  process_synthesized_event);
626 			if (err <= 0) {
627 				pr_err("Couldn't record tracing data.\n");
628 				goto out;
629 			}
630 			rec->bytes_written += err;
631 		}
632 	}
633 
634 	err = perf_event__synth_time_conv(rec->evlist->mmap[0].base, tool,
635 					  process_synthesized_event, machine);
636 	if (err)
637 		goto out;
638 
639 	if (rec->opts.full_auxtrace) {
640 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
641 					session, process_synthesized_event);
642 		if (err)
643 			goto out;
644 	}
645 
646 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
647 						 machine);
648 	WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
649 			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
650 			   "Check /proc/kallsyms permission or run as root.\n");
651 
652 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
653 					     machine);
654 	WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
655 			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
656 			   "Check /proc/modules permission or run as root.\n");
657 
658 	if (perf_guest) {
659 		machines__process_guests(&session->machines,
660 					 perf_event__synthesize_guest_os, tool);
661 	}
662 
663 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
664 					    process_synthesized_event, opts->sample_address,
665 					    opts->proc_map_timeout);
666 out:
667 	return err;
668 }
669 
670 static int __cmd_record(struct record *rec, int argc, const char **argv)
671 {
672 	int err;
673 	int status = 0;
674 	unsigned long waking = 0;
675 	const bool forks = argc > 0;
676 	struct machine *machine;
677 	struct perf_tool *tool = &rec->tool;
678 	struct record_opts *opts = &rec->opts;
679 	struct perf_data_file *file = &rec->file;
680 	struct perf_session *session;
681 	bool disabled = false, draining = false;
682 	int fd;
683 
684 	rec->progname = argv[0];
685 
686 	atexit(record__sig_exit);
687 	signal(SIGCHLD, sig_handler);
688 	signal(SIGINT, sig_handler);
689 	signal(SIGTERM, sig_handler);
690 
691 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output) {
692 		signal(SIGUSR2, snapshot_sig_handler);
693 		if (rec->opts.auxtrace_snapshot_mode)
694 			trigger_on(&auxtrace_snapshot_trigger);
695 		if (rec->switch_output)
696 			trigger_on(&switch_output_trigger);
697 	} else {
698 		signal(SIGUSR2, SIG_IGN);
699 	}
700 
701 	session = perf_session__new(file, false, tool);
702 	if (session == NULL) {
703 		pr_err("Perf session creation failed.\n");
704 		return -1;
705 	}
706 
707 	fd = perf_data_file__fd(file);
708 	rec->session = session;
709 
710 	record__init_features(rec);
711 
712 	if (forks) {
713 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
714 						    argv, file->is_pipe,
715 						    workload_exec_failed_signal);
716 		if (err < 0) {
717 			pr_err("Couldn't run the workload!\n");
718 			status = err;
719 			goto out_delete_session;
720 		}
721 	}
722 
723 	if (record__open(rec) != 0) {
724 		err = -1;
725 		goto out_child;
726 	}
727 
728 	err = bpf__apply_obj_config();
729 	if (err) {
730 		char errbuf[BUFSIZ];
731 
732 		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
733 		pr_err("ERROR: Apply config to BPF failed: %s\n",
734 			 errbuf);
735 		goto out_child;
736 	}
737 
738 	/*
739 	 * Normally perf_session__new would do this, but it doesn't have the
740 	 * evlist.
741 	 */
742 	if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
743 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
744 		rec->tool.ordered_events = false;
745 	}
746 
747 	if (!rec->evlist->nr_groups)
748 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
749 
750 	if (file->is_pipe) {
751 		err = perf_header__write_pipe(fd);
752 		if (err < 0)
753 			goto out_child;
754 	} else {
755 		err = perf_session__write_header(session, rec->evlist, fd, false);
756 		if (err < 0)
757 			goto out_child;
758 	}
759 
760 	if (!rec->no_buildid
761 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
762 		pr_err("Couldn't generate buildids. "
763 		       "Use --no-buildid to profile anyway.\n");
764 		err = -1;
765 		goto out_child;
766 	}
767 
768 	machine = &session->machines.host;
769 
770 	err = record__synthesize(rec);
771 	if (err < 0)
772 		goto out_child;
773 
774 	if (rec->realtime_prio) {
775 		struct sched_param param;
776 
777 		param.sched_priority = rec->realtime_prio;
778 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
779 			pr_err("Could not set realtime priority.\n");
780 			err = -1;
781 			goto out_child;
782 		}
783 	}
784 
785 	/*
786 	 * When perf is starting the traced process, all the events
787 	 * (apart from group members) have enable_on_exec=1 set,
788 	 * so don't spoil it by prematurely enabling them.
789 	 */
790 	if (!target__none(&opts->target) && !opts->initial_delay)
791 		perf_evlist__enable(rec->evlist);
792 
793 	/*
794 	 * Let the child rip
795 	 */
796 	if (forks) {
797 		union perf_event *event;
798 
799 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
800 		if (event == NULL) {
801 			err = -ENOMEM;
802 			goto out_child;
803 		}
804 
805 		/*
806 		 * Some H/W events are generated before COMM event
807 		 * which is emitted during exec(), so perf script
808 		 * cannot see a correct process name for those events.
809 		 * Synthesize COMM event to prevent it.
810 		 */
811 		perf_event__synthesize_comm(tool, event,
812 					    rec->evlist->workload.pid,
813 					    process_synthesized_event,
814 					    machine);
815 		free(event);
816 
817 		perf_evlist__start_workload(rec->evlist);
818 	}
819 
820 	if (opts->initial_delay) {
821 		usleep(opts->initial_delay * 1000);
822 		perf_evlist__enable(rec->evlist);
823 	}
824 
825 	trigger_ready(&auxtrace_snapshot_trigger);
826 	trigger_ready(&switch_output_trigger);
827 	for (;;) {
828 		unsigned long long hits = rec->samples;
829 
830 		if (record__mmap_read_all(rec) < 0) {
831 			trigger_error(&auxtrace_snapshot_trigger);
832 			trigger_error(&switch_output_trigger);
833 			err = -1;
834 			goto out_child;
835 		}
836 
837 		if (auxtrace_record__snapshot_started) {
838 			auxtrace_record__snapshot_started = 0;
839 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
840 				record__read_auxtrace_snapshot(rec);
841 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
842 				pr_err("AUX area tracing snapshot failed\n");
843 				err = -1;
844 				goto out_child;
845 			}
846 		}
847 
848 		if (trigger_is_hit(&switch_output_trigger)) {
849 			trigger_ready(&switch_output_trigger);
850 
851 			if (!quiet)
852 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
853 					waking);
854 			waking = 0;
855 			fd = record__switch_output(rec, false);
856 			if (fd < 0) {
857 				pr_err("Failed to switch to new file\n");
858 				trigger_error(&switch_output_trigger);
859 				err = fd;
860 				goto out_child;
861 			}
862 		}
863 
864 		if (hits == rec->samples) {
865 			if (done || draining)
866 				break;
867 			err = perf_evlist__poll(rec->evlist, -1);
868 			/*
869 			 * Propagate error, only if there's any. Ignore positive
870 			 * number of returned events and interrupt error.
871 			 */
872 			if (err > 0 || (err < 0 && errno == EINTR))
873 				err = 0;
874 			waking++;
875 
876 			if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
877 				draining = true;
878 		}
879 
880 		/*
881 		 * When perf is starting the traced process, at the end events
882 		 * die with the process and we wait for that. Thus no need to
883 		 * disable events in this case.
884 		 */
885 		if (done && !disabled && !target__none(&opts->target)) {
886 			trigger_off(&auxtrace_snapshot_trigger);
887 			perf_evlist__disable(rec->evlist);
888 			disabled = true;
889 		}
890 	}
891 	trigger_off(&auxtrace_snapshot_trigger);
892 	trigger_off(&switch_output_trigger);
893 
894 	if (forks && workload_exec_errno) {
895 		char msg[STRERR_BUFSIZE];
896 		const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
897 		pr_err("Workload failed: %s\n", emsg);
898 		err = -1;
899 		goto out_child;
900 	}
901 
902 	if (!quiet)
903 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
904 
905 out_child:
906 	if (forks) {
907 		int exit_status;
908 
909 		if (!child_finished)
910 			kill(rec->evlist->workload.pid, SIGTERM);
911 
912 		wait(&exit_status);
913 
914 		if (err < 0)
915 			status = err;
916 		else if (WIFEXITED(exit_status))
917 			status = WEXITSTATUS(exit_status);
918 		else if (WIFSIGNALED(exit_status))
919 			signr = WTERMSIG(exit_status);
920 	} else
921 		status = err;
922 
923 	/* this will be recalculated during process_buildids() */
924 	rec->samples = 0;
925 
926 	if (!err) {
927 		if (!rec->timestamp_filename) {
928 			record__finish_output(rec);
929 		} else {
930 			fd = record__switch_output(rec, true);
931 			if (fd < 0) {
932 				status = fd;
933 				goto out_delete_session;
934 			}
935 		}
936 	}
937 
938 	if (!err && !quiet) {
939 		char samples[128];
940 		const char *postfix = rec->timestamp_filename ?
941 					".<timestamp>" : "";
942 
943 		if (rec->samples && !rec->opts.full_auxtrace)
944 			scnprintf(samples, sizeof(samples),
945 				  " (%" PRIu64 " samples)", rec->samples);
946 		else
947 			samples[0] = '\0';
948 
949 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
950 			perf_data_file__size(file) / 1024.0 / 1024.0,
951 			file->path, postfix, samples);
952 	}
953 
954 out_delete_session:
955 	perf_session__delete(session);
956 	return status;
957 }
958 
959 static void callchain_debug(struct callchain_param *callchain)
960 {
961 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
962 
963 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
964 
965 	if (callchain->record_mode == CALLCHAIN_DWARF)
966 		pr_debug("callchain: stack dump size %d\n",
967 			 callchain->dump_size);
968 }
969 
970 int record_opts__parse_callchain(struct record_opts *record,
971 				 struct callchain_param *callchain,
972 				 const char *arg, bool unset)
973 {
974 	int ret;
975 	callchain->enabled = !unset;
976 
977 	/* --no-call-graph */
978 	if (unset) {
979 		callchain->record_mode = CALLCHAIN_NONE;
980 		pr_debug("callchain: disabled\n");
981 		return 0;
982 	}
983 
984 	ret = parse_callchain_record_opt(arg, callchain);
985 	if (!ret) {
986 		/* Enable data address sampling for DWARF unwind. */
987 		if (callchain->record_mode == CALLCHAIN_DWARF)
988 			record->sample_address = true;
989 		callchain_debug(callchain);
990 	}
991 
992 	return ret;
993 }
994 
995 int record_parse_callchain_opt(const struct option *opt,
996 			       const char *arg,
997 			       int unset)
998 {
999 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1000 }
1001 
1002 int record_callchain_opt(const struct option *opt,
1003 			 const char *arg __maybe_unused,
1004 			 int unset __maybe_unused)
1005 {
1006 	struct callchain_param *callchain = opt->value;
1007 
1008 	callchain->enabled = true;
1009 
1010 	if (callchain->record_mode == CALLCHAIN_NONE)
1011 		callchain->record_mode = CALLCHAIN_FP;
1012 
1013 	callchain_debug(callchain);
1014 	return 0;
1015 }
1016 
1017 static int perf_record_config(const char *var, const char *value, void *cb)
1018 {
1019 	struct record *rec = cb;
1020 
1021 	if (!strcmp(var, "record.build-id")) {
1022 		if (!strcmp(value, "cache"))
1023 			rec->no_buildid_cache = false;
1024 		else if (!strcmp(value, "no-cache"))
1025 			rec->no_buildid_cache = true;
1026 		else if (!strcmp(value, "skip"))
1027 			rec->no_buildid = true;
1028 		else
1029 			return -1;
1030 		return 0;
1031 	}
1032 	if (!strcmp(var, "record.call-graph"))
1033 		var = "call-graph.record-mode"; /* fall-through */
1034 
1035 	return perf_default_config(var, value, cb);
1036 }
1037 
1038 struct clockid_map {
1039 	const char *name;
1040 	int clockid;
1041 };
1042 
1043 #define CLOCKID_MAP(n, c)	\
1044 	{ .name = n, .clockid = (c), }
1045 
1046 #define CLOCKID_END	{ .name = NULL, }
1047 
1048 
1049 /*
1050  * Add the missing ones, we need to build on many distros...
1051  */
1052 #ifndef CLOCK_MONOTONIC_RAW
1053 #define CLOCK_MONOTONIC_RAW 4
1054 #endif
1055 #ifndef CLOCK_BOOTTIME
1056 #define CLOCK_BOOTTIME 7
1057 #endif
1058 #ifndef CLOCK_TAI
1059 #define CLOCK_TAI 11
1060 #endif
1061 
1062 static const struct clockid_map clockids[] = {
1063 	/* available for all events, NMI safe */
1064 	CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1065 	CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1066 
1067 	/* available for some events */
1068 	CLOCKID_MAP("realtime", CLOCK_REALTIME),
1069 	CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1070 	CLOCKID_MAP("tai", CLOCK_TAI),
1071 
1072 	/* available for the lazy */
1073 	CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1074 	CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1075 	CLOCKID_MAP("real", CLOCK_REALTIME),
1076 	CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1077 
1078 	CLOCKID_END,
1079 };
1080 
1081 static int parse_clockid(const struct option *opt, const char *str, int unset)
1082 {
1083 	struct record_opts *opts = (struct record_opts *)opt->value;
1084 	const struct clockid_map *cm;
1085 	const char *ostr = str;
1086 
1087 	if (unset) {
1088 		opts->use_clockid = 0;
1089 		return 0;
1090 	}
1091 
1092 	/* no arg passed */
1093 	if (!str)
1094 		return 0;
1095 
1096 	/* no setting it twice */
1097 	if (opts->use_clockid)
1098 		return -1;
1099 
1100 	opts->use_clockid = true;
1101 
1102 	/* if its a number, we're done */
1103 	if (sscanf(str, "%d", &opts->clockid) == 1)
1104 		return 0;
1105 
1106 	/* allow a "CLOCK_" prefix to the name */
1107 	if (!strncasecmp(str, "CLOCK_", 6))
1108 		str += 6;
1109 
1110 	for (cm = clockids; cm->name; cm++) {
1111 		if (!strcasecmp(str, cm->name)) {
1112 			opts->clockid = cm->clockid;
1113 			return 0;
1114 		}
1115 	}
1116 
1117 	opts->use_clockid = false;
1118 	ui__warning("unknown clockid %s, check man page\n", ostr);
1119 	return -1;
1120 }
1121 
1122 static int record__parse_mmap_pages(const struct option *opt,
1123 				    const char *str,
1124 				    int unset __maybe_unused)
1125 {
1126 	struct record_opts *opts = opt->value;
1127 	char *s, *p;
1128 	unsigned int mmap_pages;
1129 	int ret;
1130 
1131 	if (!str)
1132 		return -EINVAL;
1133 
1134 	s = strdup(str);
1135 	if (!s)
1136 		return -ENOMEM;
1137 
1138 	p = strchr(s, ',');
1139 	if (p)
1140 		*p = '\0';
1141 
1142 	if (*s) {
1143 		ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1144 		if (ret)
1145 			goto out_free;
1146 		opts->mmap_pages = mmap_pages;
1147 	}
1148 
1149 	if (!p) {
1150 		ret = 0;
1151 		goto out_free;
1152 	}
1153 
1154 	ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1155 	if (ret)
1156 		goto out_free;
1157 
1158 	opts->auxtrace_mmap_pages = mmap_pages;
1159 
1160 out_free:
1161 	free(s);
1162 	return ret;
1163 }
1164 
1165 static const char * const __record_usage[] = {
1166 	"perf record [<options>] [<command>]",
1167 	"perf record [<options>] -- <command> [<options>]",
1168 	NULL
1169 };
1170 const char * const *record_usage = __record_usage;
1171 
1172 /*
1173  * XXX Ideally would be local to cmd_record() and passed to a record__new
1174  * because we need to have access to it in record__exit, that is called
1175  * after cmd_record() exits, but since record_options need to be accessible to
1176  * builtin-script, leave it here.
1177  *
1178  * At least we don't ouch it in all the other functions here directly.
1179  *
1180  * Just say no to tons of global variables, sigh.
1181  */
1182 static struct record record = {
1183 	.opts = {
1184 		.sample_time	     = true,
1185 		.mmap_pages	     = UINT_MAX,
1186 		.user_freq	     = UINT_MAX,
1187 		.user_interval	     = ULLONG_MAX,
1188 		.freq		     = 4000,
1189 		.target		     = {
1190 			.uses_mmap   = true,
1191 			.default_per_cpu = true,
1192 		},
1193 		.proc_map_timeout     = 500,
1194 	},
1195 	.tool = {
1196 		.sample		= process_sample_event,
1197 		.fork		= perf_event__process_fork,
1198 		.exit		= perf_event__process_exit,
1199 		.comm		= perf_event__process_comm,
1200 		.mmap		= perf_event__process_mmap,
1201 		.mmap2		= perf_event__process_mmap2,
1202 		.ordered_events	= true,
1203 	},
1204 };
1205 
1206 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1207 	"\n\t\t\t\tDefault: fp";
1208 
1209 /*
1210  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1211  * with it and switch to use the library functions in perf_evlist that came
1212  * from builtin-record.c, i.e. use record_opts,
1213  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1214  * using pipes, etc.
1215  */
1216 struct option __record_options[] = {
1217 	OPT_CALLBACK('e', "event", &record.evlist, "event",
1218 		     "event selector. use 'perf list' to list available events",
1219 		     parse_events_option),
1220 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1221 		     "event filter", parse_filter),
1222 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1223 			   NULL, "don't record events from perf itself",
1224 			   exclude_perf),
1225 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1226 		    "record events on existing process id"),
1227 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1228 		    "record events on existing thread id"),
1229 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
1230 		    "collect data with this RT SCHED_FIFO priority"),
1231 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1232 		    "collect data without buffering"),
1233 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1234 		    "collect raw sample records from all opened counters"),
1235 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1236 			    "system-wide collection from all CPUs"),
1237 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1238 		    "list of cpus to monitor"),
1239 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1240 	OPT_STRING('o', "output", &record.file.path, "file",
1241 		    "output file name"),
1242 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1243 			&record.opts.no_inherit_set,
1244 			"child tasks do not inherit counters"),
1245 	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1246 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1247 		     "number of mmap data pages and AUX area tracing mmap pages",
1248 		     record__parse_mmap_pages),
1249 	OPT_BOOLEAN(0, "group", &record.opts.group,
1250 		    "put the counters into a counter group"),
1251 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1252 			   NULL, "enables call-graph recording" ,
1253 			   &record_callchain_opt),
1254 	OPT_CALLBACK(0, "call-graph", &record.opts,
1255 		     "record_mode[,record_size]", record_callchain_help,
1256 		     &record_parse_callchain_opt),
1257 	OPT_INCR('v', "verbose", &verbose,
1258 		    "be more verbose (show counter open errors, etc)"),
1259 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1260 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1261 		    "per thread counts"),
1262 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1263 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1264 			&record.opts.sample_time_set,
1265 			"Record the sample timestamps"),
1266 	OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
1267 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1268 		    "don't sample"),
1269 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1270 			&record.no_buildid_cache_set,
1271 			"do not update the buildid cache"),
1272 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1273 			&record.no_buildid_set,
1274 			"do not collect buildids in perf.data"),
1275 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1276 		     "monitor event in cgroup name only",
1277 		     parse_cgroups),
1278 	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1279 		  "ms to wait before starting measurement after program start"),
1280 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1281 		   "user to profile"),
1282 
1283 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1284 		     "branch any", "sample any taken branches",
1285 		     parse_branch_stack),
1286 
1287 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1288 		     "branch filter mask", "branch stack filter modes",
1289 		     parse_branch_stack),
1290 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1291 		    "sample by weight (on special events only)"),
1292 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1293 		    "sample transaction flags (special events only)"),
1294 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1295 		    "use per-thread mmaps"),
1296 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1297 		    "sample selected machine registers on interrupt,"
1298 		    " use -I ? to list register names", parse_regs),
1299 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1300 		    "Record running/enabled time of read (:S) events"),
1301 	OPT_CALLBACK('k', "clockid", &record.opts,
1302 	"clockid", "clockid to use for events, see clock_gettime()",
1303 	parse_clockid),
1304 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1305 			  "opts", "AUX area tracing Snapshot Mode", ""),
1306 	OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1307 			"per thread proc mmap processing timeout in ms"),
1308 	OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1309 		    "Record context switch events"),
1310 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1311 			 "Configure all used events to run in kernel space.",
1312 			 PARSE_OPT_EXCLUSIVE),
1313 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1314 			 "Configure all used events to run in user space.",
1315 			 PARSE_OPT_EXCLUSIVE),
1316 	OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1317 		   "clang binary to use for compiling BPF scriptlets"),
1318 	OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1319 		   "options passed to clang when compiling BPF scriptlets"),
1320 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1321 		   "file", "vmlinux pathname"),
1322 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1323 		    "Record build-id of all DSOs regardless of hits"),
1324 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1325 		    "append timestamp to output filename"),
1326 	OPT_BOOLEAN(0, "switch-output", &record.switch_output,
1327 		    "Switch output when receive SIGUSR2"),
1328 	OPT_END()
1329 };
1330 
1331 struct option *record_options = __record_options;
1332 
1333 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1334 {
1335 	int err;
1336 	struct record *rec = &record;
1337 	char errbuf[BUFSIZ];
1338 
1339 #ifndef HAVE_LIBBPF_SUPPORT
1340 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1341 	set_nobuild('\0', "clang-path", true);
1342 	set_nobuild('\0', "clang-opt", true);
1343 # undef set_nobuild
1344 #endif
1345 
1346 #ifndef HAVE_BPF_PROLOGUE
1347 # if !defined (HAVE_DWARF_SUPPORT)
1348 #  define REASON  "NO_DWARF=1"
1349 # elif !defined (HAVE_LIBBPF_SUPPORT)
1350 #  define REASON  "NO_LIBBPF=1"
1351 # else
1352 #  define REASON  "this architecture doesn't support BPF prologue"
1353 # endif
1354 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1355 	set_nobuild('\0', "vmlinux", true);
1356 # undef set_nobuild
1357 # undef REASON
1358 #endif
1359 
1360 	rec->evlist = perf_evlist__new();
1361 	if (rec->evlist == NULL)
1362 		return -ENOMEM;
1363 
1364 	perf_config(perf_record_config, rec);
1365 
1366 	argc = parse_options(argc, argv, record_options, record_usage,
1367 			    PARSE_OPT_STOP_AT_NON_OPTION);
1368 	if (!argc && target__none(&rec->opts.target))
1369 		usage_with_options(record_usage, record_options);
1370 
1371 	if (nr_cgroups && !rec->opts.target.system_wide) {
1372 		usage_with_options_msg(record_usage, record_options,
1373 			"cgroup monitoring only available in system-wide mode");
1374 
1375 	}
1376 	if (rec->opts.record_switch_events &&
1377 	    !perf_can_record_switch_events()) {
1378 		ui__error("kernel does not support recording context switch events\n");
1379 		parse_options_usage(record_usage, record_options, "switch-events", 0);
1380 		return -EINVAL;
1381 	}
1382 
1383 	if (rec->switch_output)
1384 		rec->timestamp_filename = true;
1385 
1386 	if (!rec->itr) {
1387 		rec->itr = auxtrace_record__init(rec->evlist, &err);
1388 		if (err)
1389 			return err;
1390 	}
1391 
1392 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1393 					      rec->opts.auxtrace_snapshot_opts);
1394 	if (err)
1395 		return err;
1396 
1397 	err = bpf__setup_stdout(rec->evlist);
1398 	if (err) {
1399 		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1400 		pr_err("ERROR: Setup BPF stdout failed: %s\n",
1401 			 errbuf);
1402 		return err;
1403 	}
1404 
1405 	err = -ENOMEM;
1406 
1407 	symbol__init(NULL);
1408 
1409 	if (symbol_conf.kptr_restrict)
1410 		pr_warning(
1411 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1412 "check /proc/sys/kernel/kptr_restrict.\n\n"
1413 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1414 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1415 "Samples in kernel modules won't be resolved at all.\n\n"
1416 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1417 "even with a suitable vmlinux or kallsyms file.\n\n");
1418 
1419 	if (rec->no_buildid_cache || rec->no_buildid) {
1420 		disable_buildid_cache();
1421 	} else if (rec->switch_output) {
1422 		/*
1423 		 * In 'perf record --switch-output', disable buildid
1424 		 * generation by default to reduce data file switching
1425 		 * overhead. Still generate buildid if they are required
1426 		 * explicitly using
1427 		 *
1428 		 *  perf record --signal-trigger --no-no-buildid \
1429 		 *              --no-no-buildid-cache
1430 		 *
1431 		 * Following code equals to:
1432 		 *
1433 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
1434 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1435 		 *         disable_buildid_cache();
1436 		 */
1437 		bool disable = true;
1438 
1439 		if (rec->no_buildid_set && !rec->no_buildid)
1440 			disable = false;
1441 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1442 			disable = false;
1443 		if (disable) {
1444 			rec->no_buildid = true;
1445 			rec->no_buildid_cache = true;
1446 			disable_buildid_cache();
1447 		}
1448 	}
1449 
1450 	if (rec->evlist->nr_entries == 0 &&
1451 	    perf_evlist__add_default(rec->evlist) < 0) {
1452 		pr_err("Not enough memory for event selector list\n");
1453 		goto out_symbol_exit;
1454 	}
1455 
1456 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1457 		rec->opts.no_inherit = true;
1458 
1459 	err = target__validate(&rec->opts.target);
1460 	if (err) {
1461 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1462 		ui__warning("%s", errbuf);
1463 	}
1464 
1465 	err = target__parse_uid(&rec->opts.target);
1466 	if (err) {
1467 		int saved_errno = errno;
1468 
1469 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1470 		ui__error("%s", errbuf);
1471 
1472 		err = -saved_errno;
1473 		goto out_symbol_exit;
1474 	}
1475 
1476 	err = -ENOMEM;
1477 	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1478 		usage_with_options(record_usage, record_options);
1479 
1480 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1481 	if (err)
1482 		goto out_symbol_exit;
1483 
1484 	/*
1485 	 * We take all buildids when the file contains
1486 	 * AUX area tracing data because we do not decode the
1487 	 * trace because it would take too long.
1488 	 */
1489 	if (rec->opts.full_auxtrace)
1490 		rec->buildid_all = true;
1491 
1492 	if (record_opts__config(&rec->opts)) {
1493 		err = -EINVAL;
1494 		goto out_symbol_exit;
1495 	}
1496 
1497 	err = __cmd_record(&record, argc, argv);
1498 out_symbol_exit:
1499 	perf_evlist__delete(rec->evlist);
1500 	symbol__exit();
1501 	auxtrace_record__free(rec->itr);
1502 	return err;
1503 }
1504 
1505 static void snapshot_sig_handler(int sig __maybe_unused)
1506 {
1507 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1508 		trigger_hit(&auxtrace_snapshot_trigger);
1509 		auxtrace_record__snapshot_started = 1;
1510 		if (auxtrace_record__snapshot_start(record.itr))
1511 			trigger_error(&auxtrace_snapshot_trigger);
1512 	}
1513 
1514 	if (trigger_is_ready(&switch_output_trigger))
1515 		trigger_hit(&switch_output_trigger);
1516 }
1517