xref: /linux/tools/perf/builtin-record.c (revision 9e9f60108423f18a99c9cc93ef7f23490ecc709b)
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9 
10 #include "perf.h"
11 
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include "util/parse-options.h"
15 #include "util/parse-events.h"
16 
17 #include "util/callchain.h"
18 #include "util/cgroup.h"
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/symbol.h"
27 #include "util/cpumap.h"
28 #include "util/thread_map.h"
29 #include "util/data.h"
30 
31 #include <unistd.h>
32 #include <sched.h>
33 #include <sys/mman.h>
34 
35 
36 struct record {
37 	struct perf_tool	tool;
38 	struct record_opts	opts;
39 	u64			bytes_written;
40 	struct perf_data_file	file;
41 	struct perf_evlist	*evlist;
42 	struct perf_session	*session;
43 	const char		*progname;
44 	int			realtime_prio;
45 	bool			no_buildid;
46 	bool			no_buildid_cache;
47 	long			samples;
48 };
49 
50 static int record__write(struct record *rec, void *bf, size_t size)
51 {
52 	if (perf_data_file__write(rec->session->file, bf, size) < 0) {
53 		pr_err("failed to write perf data, error: %m\n");
54 		return -1;
55 	}
56 
57 	rec->bytes_written += size;
58 	return 0;
59 }
60 
61 static int process_synthesized_event(struct perf_tool *tool,
62 				     union perf_event *event,
63 				     struct perf_sample *sample __maybe_unused,
64 				     struct machine *machine __maybe_unused)
65 {
66 	struct record *rec = container_of(tool, struct record, tool);
67 	return record__write(rec, event, event->header.size);
68 }
69 
70 static int record__mmap_read(struct record *rec, int idx)
71 {
72 	struct perf_mmap *md = &rec->evlist->mmap[idx];
73 	unsigned int head = perf_mmap__read_head(md);
74 	unsigned int old = md->prev;
75 	unsigned char *data = md->base + page_size;
76 	unsigned long size;
77 	void *buf;
78 	int rc = 0;
79 
80 	if (old == head)
81 		return 0;
82 
83 	rec->samples++;
84 
85 	size = head - old;
86 
87 	if ((old & md->mask) + size != (head & md->mask)) {
88 		buf = &data[old & md->mask];
89 		size = md->mask + 1 - (old & md->mask);
90 		old += size;
91 
92 		if (record__write(rec, buf, size) < 0) {
93 			rc = -1;
94 			goto out;
95 		}
96 	}
97 
98 	buf = &data[old & md->mask];
99 	size = head - old;
100 	old += size;
101 
102 	if (record__write(rec, buf, size) < 0) {
103 		rc = -1;
104 		goto out;
105 	}
106 
107 	md->prev = old;
108 	perf_evlist__mmap_consume(rec->evlist, idx);
109 out:
110 	return rc;
111 }
112 
113 static volatile int done = 0;
114 static volatile int signr = -1;
115 static volatile int child_finished = 0;
116 
117 static void sig_handler(int sig)
118 {
119 	if (sig == SIGCHLD)
120 		child_finished = 1;
121 	else
122 		signr = sig;
123 
124 	done = 1;
125 }
126 
127 static void record__sig_exit(void)
128 {
129 	if (signr == -1)
130 		return;
131 
132 	signal(signr, SIG_DFL);
133 	raise(signr);
134 }
135 
136 static int record__open(struct record *rec)
137 {
138 	char msg[512];
139 	struct perf_evsel *pos;
140 	struct perf_evlist *evlist = rec->evlist;
141 	struct perf_session *session = rec->session;
142 	struct record_opts *opts = &rec->opts;
143 	int rc = 0;
144 
145 	perf_evlist__config(evlist, opts);
146 
147 	evlist__for_each(evlist, pos) {
148 try_again:
149 		if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
150 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
151 				if (verbose)
152 					ui__warning("%s\n", msg);
153 				goto try_again;
154 			}
155 
156 			rc = -errno;
157 			perf_evsel__open_strerror(pos, &opts->target,
158 						  errno, msg, sizeof(msg));
159 			ui__error("%s\n", msg);
160 			goto out;
161 		}
162 	}
163 
164 	if (perf_evlist__apply_filters(evlist)) {
165 		error("failed to set filter with %d (%s)\n", errno,
166 			strerror_r(errno, msg, sizeof(msg)));
167 		rc = -1;
168 		goto out;
169 	}
170 
171 	if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
172 		if (errno == EPERM) {
173 			pr_err("Permission error mapping pages.\n"
174 			       "Consider increasing "
175 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
176 			       "or try again with a smaller value of -m/--mmap_pages.\n"
177 			       "(current value: %u)\n", opts->mmap_pages);
178 			rc = -errno;
179 		} else {
180 			pr_err("failed to mmap with %d (%s)\n", errno,
181 				strerror_r(errno, msg, sizeof(msg)));
182 			rc = -errno;
183 		}
184 		goto out;
185 	}
186 
187 	session->evlist = evlist;
188 	perf_session__set_id_hdr_size(session);
189 out:
190 	return rc;
191 }
192 
193 static int process_buildids(struct record *rec)
194 {
195 	struct perf_data_file *file  = &rec->file;
196 	struct perf_session *session = rec->session;
197 	u64 start = session->header.data_offset;
198 
199 	u64 size = lseek(file->fd, 0, SEEK_CUR);
200 	if (size == 0)
201 		return 0;
202 
203 	/*
204 	 * During this process, it'll load kernel map and replace the
205 	 * dso->long_name to a real pathname it found.  In this case
206 	 * we prefer the vmlinux path like
207 	 *   /lib/modules/3.16.4/build/vmlinux
208 	 *
209 	 * rather than build-id path (in debug directory).
210 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
211 	 */
212 	symbol_conf.ignore_vmlinux_buildid = true;
213 
214 	return __perf_session__process_events(session, start,
215 					      size - start,
216 					      size, &build_id__mark_dso_hit_ops);
217 }
218 
219 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
220 {
221 	int err;
222 	struct perf_tool *tool = data;
223 	/*
224 	 *As for guest kernel when processing subcommand record&report,
225 	 *we arrange module mmap prior to guest kernel mmap and trigger
226 	 *a preload dso because default guest module symbols are loaded
227 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
228 	 *method is used to avoid symbol missing when the first addr is
229 	 *in module instead of in guest kernel.
230 	 */
231 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
232 					     machine);
233 	if (err < 0)
234 		pr_err("Couldn't record guest kernel [%d]'s reference"
235 		       " relocation symbol.\n", machine->pid);
236 
237 	/*
238 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
239 	 * have no _text sometimes.
240 	 */
241 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
242 						 machine);
243 	if (err < 0)
244 		pr_err("Couldn't record guest kernel [%d]'s reference"
245 		       " relocation symbol.\n", machine->pid);
246 }
247 
248 static struct perf_event_header finished_round_event = {
249 	.size = sizeof(struct perf_event_header),
250 	.type = PERF_RECORD_FINISHED_ROUND,
251 };
252 
253 static int record__mmap_read_all(struct record *rec)
254 {
255 	u64 bytes_written = rec->bytes_written;
256 	int i;
257 	int rc = 0;
258 
259 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
260 		if (rec->evlist->mmap[i].base) {
261 			if (record__mmap_read(rec, i) != 0) {
262 				rc = -1;
263 				goto out;
264 			}
265 		}
266 	}
267 
268 	/*
269 	 * Mark the round finished in case we wrote
270 	 * at least one event.
271 	 */
272 	if (bytes_written != rec->bytes_written)
273 		rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
274 
275 out:
276 	return rc;
277 }
278 
279 static void record__init_features(struct record *rec)
280 {
281 	struct perf_session *session = rec->session;
282 	int feat;
283 
284 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
285 		perf_header__set_feat(&session->header, feat);
286 
287 	if (rec->no_buildid)
288 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
289 
290 	if (!have_tracepoints(&rec->evlist->entries))
291 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
292 
293 	if (!rec->opts.branch_stack)
294 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
295 }
296 
297 static volatile int workload_exec_errno;
298 
299 /*
300  * perf_evlist__prepare_workload will send a SIGUSR1
301  * if the fork fails, since we asked by setting its
302  * want_signal to true.
303  */
304 static void workload_exec_failed_signal(int signo __maybe_unused,
305 					siginfo_t *info,
306 					void *ucontext __maybe_unused)
307 {
308 	workload_exec_errno = info->si_value.sival_int;
309 	done = 1;
310 	child_finished = 1;
311 }
312 
313 static int __cmd_record(struct record *rec, int argc, const char **argv)
314 {
315 	int err;
316 	int status = 0;
317 	unsigned long waking = 0;
318 	const bool forks = argc > 0;
319 	struct machine *machine;
320 	struct perf_tool *tool = &rec->tool;
321 	struct record_opts *opts = &rec->opts;
322 	struct perf_data_file *file = &rec->file;
323 	struct perf_session *session;
324 	bool disabled = false, draining = false;
325 
326 	rec->progname = argv[0];
327 
328 	atexit(record__sig_exit);
329 	signal(SIGCHLD, sig_handler);
330 	signal(SIGINT, sig_handler);
331 	signal(SIGTERM, sig_handler);
332 
333 	session = perf_session__new(file, false, NULL);
334 	if (session == NULL) {
335 		pr_err("Perf session creation failed.\n");
336 		return -1;
337 	}
338 
339 	rec->session = session;
340 
341 	record__init_features(rec);
342 
343 	if (forks) {
344 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
345 						    argv, file->is_pipe,
346 						    workload_exec_failed_signal);
347 		if (err < 0) {
348 			pr_err("Couldn't run the workload!\n");
349 			status = err;
350 			goto out_delete_session;
351 		}
352 	}
353 
354 	if (record__open(rec) != 0) {
355 		err = -1;
356 		goto out_child;
357 	}
358 
359 	if (!rec->evlist->nr_groups)
360 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
361 
362 	if (file->is_pipe) {
363 		err = perf_header__write_pipe(file->fd);
364 		if (err < 0)
365 			goto out_child;
366 	} else {
367 		err = perf_session__write_header(session, rec->evlist,
368 						 file->fd, false);
369 		if (err < 0)
370 			goto out_child;
371 	}
372 
373 	if (!rec->no_buildid
374 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
375 		pr_err("Couldn't generate buildids. "
376 		       "Use --no-buildid to profile anyway.\n");
377 		err = -1;
378 		goto out_child;
379 	}
380 
381 	machine = &session->machines.host;
382 
383 	if (file->is_pipe) {
384 		err = perf_event__synthesize_attrs(tool, session,
385 						   process_synthesized_event);
386 		if (err < 0) {
387 			pr_err("Couldn't synthesize attrs.\n");
388 			goto out_child;
389 		}
390 
391 		if (have_tracepoints(&rec->evlist->entries)) {
392 			/*
393 			 * FIXME err <= 0 here actually means that
394 			 * there were no tracepoints so its not really
395 			 * an error, just that we don't need to
396 			 * synthesize anything.  We really have to
397 			 * return this more properly and also
398 			 * propagate errors that now are calling die()
399 			 */
400 			err = perf_event__synthesize_tracing_data(tool, file->fd, rec->evlist,
401 								  process_synthesized_event);
402 			if (err <= 0) {
403 				pr_err("Couldn't record tracing data.\n");
404 				goto out_child;
405 			}
406 			rec->bytes_written += err;
407 		}
408 	}
409 
410 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
411 						 machine);
412 	if (err < 0)
413 		pr_err("Couldn't record kernel reference relocation symbol\n"
414 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
415 		       "Check /proc/kallsyms permission or run as root.\n");
416 
417 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
418 					     machine);
419 	if (err < 0)
420 		pr_err("Couldn't record kernel module information.\n"
421 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
422 		       "Check /proc/modules permission or run as root.\n");
423 
424 	if (perf_guest) {
425 		machines__process_guests(&session->machines,
426 					 perf_event__synthesize_guest_os, tool);
427 	}
428 
429 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
430 					    process_synthesized_event, opts->sample_address);
431 	if (err != 0)
432 		goto out_child;
433 
434 	if (rec->realtime_prio) {
435 		struct sched_param param;
436 
437 		param.sched_priority = rec->realtime_prio;
438 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
439 			pr_err("Could not set realtime priority.\n");
440 			err = -1;
441 			goto out_child;
442 		}
443 	}
444 
445 	/*
446 	 * When perf is starting the traced process, all the events
447 	 * (apart from group members) have enable_on_exec=1 set,
448 	 * so don't spoil it by prematurely enabling them.
449 	 */
450 	if (!target__none(&opts->target) && !opts->initial_delay)
451 		perf_evlist__enable(rec->evlist);
452 
453 	/*
454 	 * Let the child rip
455 	 */
456 	if (forks)
457 		perf_evlist__start_workload(rec->evlist);
458 
459 	if (opts->initial_delay) {
460 		usleep(opts->initial_delay * 1000);
461 		perf_evlist__enable(rec->evlist);
462 	}
463 
464 	for (;;) {
465 		int hits = rec->samples;
466 
467 		if (record__mmap_read_all(rec) < 0) {
468 			err = -1;
469 			goto out_child;
470 		}
471 
472 		if (hits == rec->samples) {
473 			if (done || draining)
474 				break;
475 			err = perf_evlist__poll(rec->evlist, -1);
476 			/*
477 			 * Propagate error, only if there's any. Ignore positive
478 			 * number of returned events and interrupt error.
479 			 */
480 			if (err > 0 || (err < 0 && errno == EINTR))
481 				err = 0;
482 			waking++;
483 
484 			if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
485 				draining = true;
486 		}
487 
488 		/*
489 		 * When perf is starting the traced process, at the end events
490 		 * die with the process and we wait for that. Thus no need to
491 		 * disable events in this case.
492 		 */
493 		if (done && !disabled && !target__none(&opts->target)) {
494 			perf_evlist__disable(rec->evlist);
495 			disabled = true;
496 		}
497 	}
498 
499 	if (forks && workload_exec_errno) {
500 		char msg[STRERR_BUFSIZE];
501 		const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
502 		pr_err("Workload failed: %s\n", emsg);
503 		err = -1;
504 		goto out_child;
505 	}
506 
507 	if (!quiet) {
508 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
509 
510 		/*
511 		 * Approximate RIP event size: 24 bytes.
512 		 */
513 		fprintf(stderr,
514 			"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
515 			(double)rec->bytes_written / 1024.0 / 1024.0,
516 			file->path,
517 			rec->bytes_written / 24);
518 	}
519 
520 out_child:
521 	if (forks) {
522 		int exit_status;
523 
524 		if (!child_finished)
525 			kill(rec->evlist->workload.pid, SIGTERM);
526 
527 		wait(&exit_status);
528 
529 		if (err < 0)
530 			status = err;
531 		else if (WIFEXITED(exit_status))
532 			status = WEXITSTATUS(exit_status);
533 		else if (WIFSIGNALED(exit_status))
534 			signr = WTERMSIG(exit_status);
535 	} else
536 		status = err;
537 
538 	if (!err && !file->is_pipe) {
539 		rec->session->header.data_size += rec->bytes_written;
540 
541 		if (!rec->no_buildid)
542 			process_buildids(rec);
543 		perf_session__write_header(rec->session, rec->evlist,
544 					   file->fd, true);
545 	}
546 
547 out_delete_session:
548 	perf_session__delete(session);
549 	return status;
550 }
551 
552 #define BRANCH_OPT(n, m) \
553 	{ .name = n, .mode = (m) }
554 
555 #define BRANCH_END { .name = NULL }
556 
557 struct branch_mode {
558 	const char *name;
559 	int mode;
560 };
561 
562 static const struct branch_mode branch_modes[] = {
563 	BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
564 	BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
565 	BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
566 	BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
567 	BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
568 	BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
569 	BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
570 	BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
571 	BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
572 	BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
573 	BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND),
574 	BRANCH_END
575 };
576 
577 static int
578 parse_branch_stack(const struct option *opt, const char *str, int unset)
579 {
580 #define ONLY_PLM \
581 	(PERF_SAMPLE_BRANCH_USER	|\
582 	 PERF_SAMPLE_BRANCH_KERNEL	|\
583 	 PERF_SAMPLE_BRANCH_HV)
584 
585 	uint64_t *mode = (uint64_t *)opt->value;
586 	const struct branch_mode *br;
587 	char *s, *os = NULL, *p;
588 	int ret = -1;
589 
590 	if (unset)
591 		return 0;
592 
593 	/*
594 	 * cannot set it twice, -b + --branch-filter for instance
595 	 */
596 	if (*mode)
597 		return -1;
598 
599 	/* str may be NULL in case no arg is passed to -b */
600 	if (str) {
601 		/* because str is read-only */
602 		s = os = strdup(str);
603 		if (!s)
604 			return -1;
605 
606 		for (;;) {
607 			p = strchr(s, ',');
608 			if (p)
609 				*p = '\0';
610 
611 			for (br = branch_modes; br->name; br++) {
612 				if (!strcasecmp(s, br->name))
613 					break;
614 			}
615 			if (!br->name) {
616 				ui__warning("unknown branch filter %s,"
617 					    " check man page\n", s);
618 				goto error;
619 			}
620 
621 			*mode |= br->mode;
622 
623 			if (!p)
624 				break;
625 
626 			s = p + 1;
627 		}
628 	}
629 	ret = 0;
630 
631 	/* default to any branch */
632 	if ((*mode & ~ONLY_PLM) == 0) {
633 		*mode = PERF_SAMPLE_BRANCH_ANY;
634 	}
635 error:
636 	free(os);
637 	return ret;
638 }
639 
640 static void callchain_debug(void)
641 {
642 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF" };
643 
644 	pr_debug("callchain: type %s\n", str[callchain_param.record_mode]);
645 
646 	if (callchain_param.record_mode == CALLCHAIN_DWARF)
647 		pr_debug("callchain: stack dump size %d\n",
648 			 callchain_param.dump_size);
649 }
650 
651 int record_parse_callchain_opt(const struct option *opt __maybe_unused,
652 			       const char *arg,
653 			       int unset)
654 {
655 	int ret;
656 
657 	callchain_param.enabled = !unset;
658 
659 	/* --no-call-graph */
660 	if (unset) {
661 		callchain_param.record_mode = CALLCHAIN_NONE;
662 		pr_debug("callchain: disabled\n");
663 		return 0;
664 	}
665 
666 	ret = parse_callchain_record_opt(arg);
667 	if (!ret)
668 		callchain_debug();
669 
670 	return ret;
671 }
672 
673 int record_callchain_opt(const struct option *opt __maybe_unused,
674 			 const char *arg __maybe_unused,
675 			 int unset __maybe_unused)
676 {
677 	callchain_param.enabled = true;
678 
679 	if (callchain_param.record_mode == CALLCHAIN_NONE)
680 		callchain_param.record_mode = CALLCHAIN_FP;
681 
682 	callchain_debug();
683 	return 0;
684 }
685 
686 static int perf_record_config(const char *var, const char *value, void *cb)
687 {
688 	if (!strcmp(var, "record.call-graph"))
689 		var = "call-graph.record-mode"; /* fall-through */
690 
691 	return perf_default_config(var, value, cb);
692 }
693 
694 static const char * const __record_usage[] = {
695 	"perf record [<options>] [<command>]",
696 	"perf record [<options>] -- <command> [<options>]",
697 	NULL
698 };
699 const char * const *record_usage = __record_usage;
700 
701 /*
702  * XXX Ideally would be local to cmd_record() and passed to a record__new
703  * because we need to have access to it in record__exit, that is called
704  * after cmd_record() exits, but since record_options need to be accessible to
705  * builtin-script, leave it here.
706  *
707  * At least we don't ouch it in all the other functions here directly.
708  *
709  * Just say no to tons of global variables, sigh.
710  */
711 static struct record record = {
712 	.opts = {
713 		.sample_time	     = true,
714 		.mmap_pages	     = UINT_MAX,
715 		.user_freq	     = UINT_MAX,
716 		.user_interval	     = ULLONG_MAX,
717 		.freq		     = 4000,
718 		.target		     = {
719 			.uses_mmap   = true,
720 			.default_per_cpu = true,
721 		},
722 	},
723 };
724 
725 #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
726 
727 #ifdef HAVE_DWARF_UNWIND_SUPPORT
728 const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf";
729 #else
730 const char record_callchain_help[] = CALLCHAIN_HELP "fp";
731 #endif
732 
733 /*
734  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
735  * with it and switch to use the library functions in perf_evlist that came
736  * from builtin-record.c, i.e. use record_opts,
737  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
738  * using pipes, etc.
739  */
740 struct option __record_options[] = {
741 	OPT_CALLBACK('e', "event", &record.evlist, "event",
742 		     "event selector. use 'perf list' to list available events",
743 		     parse_events_option),
744 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
745 		     "event filter", parse_filter),
746 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
747 		    "record events on existing process id"),
748 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
749 		    "record events on existing thread id"),
750 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
751 		    "collect data with this RT SCHED_FIFO priority"),
752 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
753 		    "collect data without buffering"),
754 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
755 		    "collect raw sample records from all opened counters"),
756 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
757 			    "system-wide collection from all CPUs"),
758 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
759 		    "list of cpus to monitor"),
760 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
761 	OPT_STRING('o', "output", &record.file.path, "file",
762 		    "output file name"),
763 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
764 			&record.opts.no_inherit_set,
765 			"child tasks do not inherit counters"),
766 	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
767 	OPT_CALLBACK('m', "mmap-pages", &record.opts.mmap_pages, "pages",
768 		     "number of mmap data pages",
769 		     perf_evlist__parse_mmap_pages),
770 	OPT_BOOLEAN(0, "group", &record.opts.group,
771 		    "put the counters into a counter group"),
772 	OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
773 			   NULL, "enables call-graph recording" ,
774 			   &record_callchain_opt),
775 	OPT_CALLBACK(0, "call-graph", &record.opts,
776 		     "mode[,dump_size]", record_callchain_help,
777 		     &record_parse_callchain_opt),
778 	OPT_INCR('v', "verbose", &verbose,
779 		    "be more verbose (show counter open errors, etc)"),
780 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
781 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
782 		    "per thread counts"),
783 	OPT_BOOLEAN('d', "data", &record.opts.sample_address,
784 		    "Sample addresses"),
785 	OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
786 	OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
787 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
788 		    "don't sample"),
789 	OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
790 		    "do not update the buildid cache"),
791 	OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
792 		    "do not collect buildids in perf.data"),
793 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
794 		     "monitor event in cgroup name only",
795 		     parse_cgroups),
796 	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
797 		  "ms to wait before starting measurement after program start"),
798 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
799 		   "user to profile"),
800 
801 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
802 		     "branch any", "sample any taken branches",
803 		     parse_branch_stack),
804 
805 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
806 		     "branch filter mask", "branch stack filter modes",
807 		     parse_branch_stack),
808 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
809 		    "sample by weight (on special events only)"),
810 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
811 		    "sample transaction flags (special events only)"),
812 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
813 		    "use per-thread mmaps"),
814 	OPT_BOOLEAN('I', "intr-regs", &record.opts.sample_intr_regs,
815 		    "Sample machine registers on interrupt"),
816 	OPT_END()
817 };
818 
819 struct option *record_options = __record_options;
820 
821 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
822 {
823 	int err = -ENOMEM;
824 	struct record *rec = &record;
825 	char errbuf[BUFSIZ];
826 
827 	rec->evlist = perf_evlist__new();
828 	if (rec->evlist == NULL)
829 		return -ENOMEM;
830 
831 	perf_config(perf_record_config, rec);
832 
833 	argc = parse_options(argc, argv, record_options, record_usage,
834 			    PARSE_OPT_STOP_AT_NON_OPTION);
835 	if (!argc && target__none(&rec->opts.target))
836 		usage_with_options(record_usage, record_options);
837 
838 	if (nr_cgroups && !rec->opts.target.system_wide) {
839 		ui__error("cgroup monitoring only available in"
840 			  " system-wide mode\n");
841 		usage_with_options(record_usage, record_options);
842 	}
843 
844 	symbol__init(NULL);
845 
846 	if (symbol_conf.kptr_restrict)
847 		pr_warning(
848 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
849 "check /proc/sys/kernel/kptr_restrict.\n\n"
850 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
851 "file is not found in the buildid cache or in the vmlinux path.\n\n"
852 "Samples in kernel modules won't be resolved at all.\n\n"
853 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
854 "even with a suitable vmlinux or kallsyms file.\n\n");
855 
856 	if (rec->no_buildid_cache || rec->no_buildid)
857 		disable_buildid_cache();
858 
859 	if (rec->evlist->nr_entries == 0 &&
860 	    perf_evlist__add_default(rec->evlist) < 0) {
861 		pr_err("Not enough memory for event selector list\n");
862 		goto out_symbol_exit;
863 	}
864 
865 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
866 		rec->opts.no_inherit = true;
867 
868 	err = target__validate(&rec->opts.target);
869 	if (err) {
870 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
871 		ui__warning("%s", errbuf);
872 	}
873 
874 	err = target__parse_uid(&rec->opts.target);
875 	if (err) {
876 		int saved_errno = errno;
877 
878 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
879 		ui__error("%s", errbuf);
880 
881 		err = -saved_errno;
882 		goto out_symbol_exit;
883 	}
884 
885 	err = -ENOMEM;
886 	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
887 		usage_with_options(record_usage, record_options);
888 
889 	if (record_opts__config(&rec->opts)) {
890 		err = -EINVAL;
891 		goto out_symbol_exit;
892 	}
893 
894 	err = __cmd_record(&record, argc, argv);
895 out_symbol_exit:
896 	perf_evlist__delete(rec->evlist);
897 	symbol__exit();
898 	return err;
899 }
900