xref: /linux/tools/perf/builtin-record.c (revision c75c5ab575af7db707689cdbb5a5c458e9a034bb)
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #define _FILE_OFFSET_BITS 64
9 
10 #include "builtin.h"
11 
12 #include "perf.h"
13 
14 #include "util/build-id.h"
15 #include "util/util.h"
16 #include "util/parse-options.h"
17 #include "util/parse-events.h"
18 
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/symbol.h"
27 #include "util/cpumap.h"
28 #include "util/thread_map.h"
29 
30 #include <unistd.h>
31 #include <sched.h>
32 #include <sys/mman.h>
33 
34 #ifndef HAVE_ON_EXIT
35 #ifndef ATEXIT_MAX
36 #define ATEXIT_MAX 32
37 #endif
38 static int __on_exit_count = 0;
39 typedef void (*on_exit_func_t) (int, void *);
40 static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
41 static void *__on_exit_args[ATEXIT_MAX];
42 static int __exitcode = 0;
43 static void __handle_on_exit_funcs(void);
44 static int on_exit(on_exit_func_t function, void *arg);
45 #define exit(x) (exit)(__exitcode = (x))
46 
47 static int on_exit(on_exit_func_t function, void *arg)
48 {
49 	if (__on_exit_count == ATEXIT_MAX)
50 		return -ENOMEM;
51 	else if (__on_exit_count == 0)
52 		atexit(__handle_on_exit_funcs);
53 	__on_exit_funcs[__on_exit_count] = function;
54 	__on_exit_args[__on_exit_count++] = arg;
55 	return 0;
56 }
57 
58 static void __handle_on_exit_funcs(void)
59 {
60 	int i;
61 	for (i = 0; i < __on_exit_count; i++)
62 		__on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
63 }
64 #endif
65 
66 enum write_mode_t {
67 	WRITE_FORCE,
68 	WRITE_APPEND
69 };
70 
71 struct perf_record {
72 	struct perf_tool	tool;
73 	struct perf_record_opts	opts;
74 	u64			bytes_written;
75 	const char		*output_name;
76 	struct perf_evlist	*evlist;
77 	struct perf_session	*session;
78 	const char		*progname;
79 	int			output;
80 	unsigned int		page_size;
81 	int			realtime_prio;
82 	enum write_mode_t	write_mode;
83 	bool			no_buildid;
84 	bool			no_buildid_cache;
85 	bool			force;
86 	bool			file_new;
87 	bool			append_file;
88 	long			samples;
89 	off_t			post_processing_offset;
90 };
91 
92 static void advance_output(struct perf_record *rec, size_t size)
93 {
94 	rec->bytes_written += size;
95 }
96 
97 static int write_output(struct perf_record *rec, void *buf, size_t size)
98 {
99 	while (size) {
100 		int ret = write(rec->output, buf, size);
101 
102 		if (ret < 0) {
103 			pr_err("failed to write\n");
104 			return -1;
105 		}
106 
107 		size -= ret;
108 		buf += ret;
109 
110 		rec->bytes_written += ret;
111 	}
112 
113 	return 0;
114 }
115 
116 static int process_synthesized_event(struct perf_tool *tool,
117 				     union perf_event *event,
118 				     struct perf_sample *sample __maybe_unused,
119 				     struct machine *machine __maybe_unused)
120 {
121 	struct perf_record *rec = container_of(tool, struct perf_record, tool);
122 	if (write_output(rec, event, event->header.size) < 0)
123 		return -1;
124 
125 	return 0;
126 }
127 
128 static int perf_record__mmap_read(struct perf_record *rec,
129 				   struct perf_mmap *md)
130 {
131 	unsigned int head = perf_mmap__read_head(md);
132 	unsigned int old = md->prev;
133 	unsigned char *data = md->base + rec->page_size;
134 	unsigned long size;
135 	void *buf;
136 	int rc = 0;
137 
138 	if (old == head)
139 		return 0;
140 
141 	rec->samples++;
142 
143 	size = head - old;
144 
145 	if ((old & md->mask) + size != (head & md->mask)) {
146 		buf = &data[old & md->mask];
147 		size = md->mask + 1 - (old & md->mask);
148 		old += size;
149 
150 		if (write_output(rec, buf, size) < 0) {
151 			rc = -1;
152 			goto out;
153 		}
154 	}
155 
156 	buf = &data[old & md->mask];
157 	size = head - old;
158 	old += size;
159 
160 	if (write_output(rec, buf, size) < 0) {
161 		rc = -1;
162 		goto out;
163 	}
164 
165 	md->prev = old;
166 	perf_mmap__write_tail(md, old);
167 
168 out:
169 	return rc;
170 }
171 
172 static volatile int done = 0;
173 static volatile int signr = -1;
174 static volatile int child_finished = 0;
175 
176 static void sig_handler(int sig)
177 {
178 	if (sig == SIGCHLD)
179 		child_finished = 1;
180 
181 	done = 1;
182 	signr = sig;
183 }
184 
185 static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg)
186 {
187 	struct perf_record *rec = arg;
188 	int status;
189 
190 	if (rec->evlist->workload.pid > 0) {
191 		if (!child_finished)
192 			kill(rec->evlist->workload.pid, SIGTERM);
193 
194 		wait(&status);
195 		if (WIFSIGNALED(status))
196 			psignal(WTERMSIG(status), rec->progname);
197 	}
198 
199 	if (signr == -1 || signr == SIGUSR1)
200 		return;
201 
202 	signal(signr, SIG_DFL);
203 	kill(getpid(), signr);
204 }
205 
206 static bool perf_evlist__equal(struct perf_evlist *evlist,
207 			       struct perf_evlist *other)
208 {
209 	struct perf_evsel *pos, *pair;
210 
211 	if (evlist->nr_entries != other->nr_entries)
212 		return false;
213 
214 	pair = perf_evlist__first(other);
215 
216 	list_for_each_entry(pos, &evlist->entries, node) {
217 		if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
218 			return false;
219 		pair = perf_evsel__next(pair);
220 	}
221 
222 	return true;
223 }
224 
225 static int perf_record__open(struct perf_record *rec)
226 {
227 	char msg[512];
228 	struct perf_evsel *pos;
229 	struct perf_evlist *evlist = rec->evlist;
230 	struct perf_session *session = rec->session;
231 	struct perf_record_opts *opts = &rec->opts;
232 	int rc = 0;
233 
234 	perf_evlist__config(evlist, opts);
235 
236 	list_for_each_entry(pos, &evlist->entries, node) {
237 try_again:
238 		if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
239 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
240 				if (verbose)
241 					ui__warning("%s\n", msg);
242 				goto try_again;
243 			}
244 
245 			rc = -errno;
246 			perf_evsel__open_strerror(pos, &opts->target,
247 						  errno, msg, sizeof(msg));
248 			ui__error("%s\n", msg);
249 			goto out;
250 		}
251 	}
252 
253 	if (perf_evlist__apply_filters(evlist)) {
254 		error("failed to set filter with %d (%s)\n", errno,
255 			strerror(errno));
256 		rc = -1;
257 		goto out;
258 	}
259 
260 	if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
261 		if (errno == EPERM) {
262 			pr_err("Permission error mapping pages.\n"
263 			       "Consider increasing "
264 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
265 			       "or try again with a smaller value of -m/--mmap_pages.\n"
266 			       "(current value: %d)\n", opts->mmap_pages);
267 			rc = -errno;
268 		} else if (!is_power_of_2(opts->mmap_pages) &&
269 			   (opts->mmap_pages != UINT_MAX)) {
270 			pr_err("--mmap_pages/-m value must be a power of two.");
271 			rc = -EINVAL;
272 		} else {
273 			pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
274 			rc = -errno;
275 		}
276 		goto out;
277 	}
278 
279 	if (rec->file_new)
280 		session->evlist = evlist;
281 	else {
282 		if (!perf_evlist__equal(session->evlist, evlist)) {
283 			fprintf(stderr, "incompatible append\n");
284 			rc = -1;
285 			goto out;
286 		}
287  	}
288 
289 	perf_session__set_id_hdr_size(session);
290 out:
291 	return rc;
292 }
293 
294 static int process_buildids(struct perf_record *rec)
295 {
296 	u64 size = lseek(rec->output, 0, SEEK_CUR);
297 
298 	if (size == 0)
299 		return 0;
300 
301 	rec->session->fd = rec->output;
302 	return __perf_session__process_events(rec->session, rec->post_processing_offset,
303 					      size - rec->post_processing_offset,
304 					      size, &build_id__mark_dso_hit_ops);
305 }
306 
307 static void perf_record__exit(int status, void *arg)
308 {
309 	struct perf_record *rec = arg;
310 
311 	if (status != 0)
312 		return;
313 
314 	if (!rec->opts.pipe_output) {
315 		rec->session->header.data_size += rec->bytes_written;
316 
317 		if (!rec->no_buildid)
318 			process_buildids(rec);
319 		perf_session__write_header(rec->session, rec->evlist,
320 					   rec->output, true);
321 		perf_session__delete(rec->session);
322 		perf_evlist__delete(rec->evlist);
323 		symbol__exit();
324 	}
325 }
326 
327 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
328 {
329 	int err;
330 	struct perf_tool *tool = data;
331 	/*
332 	 *As for guest kernel when processing subcommand record&report,
333 	 *we arrange module mmap prior to guest kernel mmap and trigger
334 	 *a preload dso because default guest module symbols are loaded
335 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
336 	 *method is used to avoid symbol missing when the first addr is
337 	 *in module instead of in guest kernel.
338 	 */
339 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
340 					     machine);
341 	if (err < 0)
342 		pr_err("Couldn't record guest kernel [%d]'s reference"
343 		       " relocation symbol.\n", machine->pid);
344 
345 	/*
346 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
347 	 * have no _text sometimes.
348 	 */
349 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
350 						 machine, "_text");
351 	if (err < 0)
352 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
353 							 machine, "_stext");
354 	if (err < 0)
355 		pr_err("Couldn't record guest kernel [%d]'s reference"
356 		       " relocation symbol.\n", machine->pid);
357 }
358 
359 static struct perf_event_header finished_round_event = {
360 	.size = sizeof(struct perf_event_header),
361 	.type = PERF_RECORD_FINISHED_ROUND,
362 };
363 
364 static int perf_record__mmap_read_all(struct perf_record *rec)
365 {
366 	int i;
367 	int rc = 0;
368 
369 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
370 		if (rec->evlist->mmap[i].base) {
371 			if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
372 				rc = -1;
373 				goto out;
374 			}
375 		}
376 	}
377 
378 	if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
379 		rc = write_output(rec, &finished_round_event,
380 				  sizeof(finished_round_event));
381 
382 out:
383 	return rc;
384 }
385 
386 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
387 {
388 	struct stat st;
389 	int flags;
390 	int err, output, feat;
391 	unsigned long waking = 0;
392 	const bool forks = argc > 0;
393 	struct machine *machine;
394 	struct perf_tool *tool = &rec->tool;
395 	struct perf_record_opts *opts = &rec->opts;
396 	struct perf_evlist *evsel_list = rec->evlist;
397 	const char *output_name = rec->output_name;
398 	struct perf_session *session;
399 	bool disabled = false;
400 
401 	rec->progname = argv[0];
402 
403 	rec->page_size = sysconf(_SC_PAGE_SIZE);
404 
405 	on_exit(perf_record__sig_exit, rec);
406 	signal(SIGCHLD, sig_handler);
407 	signal(SIGINT, sig_handler);
408 	signal(SIGUSR1, sig_handler);
409 
410 	if (!output_name) {
411 		if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
412 			opts->pipe_output = true;
413 		else
414 			rec->output_name = output_name = "perf.data";
415 	}
416 	if (output_name) {
417 		if (!strcmp(output_name, "-"))
418 			opts->pipe_output = true;
419 		else if (!stat(output_name, &st) && st.st_size) {
420 			if (rec->write_mode == WRITE_FORCE) {
421 				char oldname[PATH_MAX];
422 				snprintf(oldname, sizeof(oldname), "%s.old",
423 					 output_name);
424 				unlink(oldname);
425 				rename(output_name, oldname);
426 			}
427 		} else if (rec->write_mode == WRITE_APPEND) {
428 			rec->write_mode = WRITE_FORCE;
429 		}
430 	}
431 
432 	flags = O_CREAT|O_RDWR;
433 	if (rec->write_mode == WRITE_APPEND)
434 		rec->file_new = 0;
435 	else
436 		flags |= O_TRUNC;
437 
438 	if (opts->pipe_output)
439 		output = STDOUT_FILENO;
440 	else
441 		output = open(output_name, flags, S_IRUSR | S_IWUSR);
442 	if (output < 0) {
443 		perror("failed to create output file");
444 		return -1;
445 	}
446 
447 	rec->output = output;
448 
449 	session = perf_session__new(output_name, O_WRONLY,
450 				    rec->write_mode == WRITE_FORCE, false, NULL);
451 	if (session == NULL) {
452 		pr_err("Not enough memory for reading perf file header\n");
453 		return -1;
454 	}
455 
456 	rec->session = session;
457 
458 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
459 		perf_header__set_feat(&session->header, feat);
460 
461 	if (rec->no_buildid)
462 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
463 
464 	if (!have_tracepoints(&evsel_list->entries))
465 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
466 
467 	if (!rec->opts.branch_stack)
468 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
469 
470 	if (!rec->file_new) {
471 		err = perf_session__read_header(session, output);
472 		if (err < 0)
473 			goto out_delete_session;
474 	}
475 
476 	if (forks) {
477 		err = perf_evlist__prepare_workload(evsel_list, opts, argv);
478 		if (err < 0) {
479 			pr_err("Couldn't run the workload!\n");
480 			goto out_delete_session;
481 		}
482 	}
483 
484 	if (perf_record__open(rec) != 0) {
485 		err = -1;
486 		goto out_delete_session;
487 	}
488 
489 	if (!evsel_list->nr_groups)
490 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
491 
492 	/*
493 	 * perf_session__delete(session) will be called at perf_record__exit()
494 	 */
495 	on_exit(perf_record__exit, rec);
496 
497 	if (opts->pipe_output) {
498 		err = perf_header__write_pipe(output);
499 		if (err < 0)
500 			goto out_delete_session;
501 	} else if (rec->file_new) {
502 		err = perf_session__write_header(session, evsel_list,
503 						 output, false);
504 		if (err < 0)
505 			goto out_delete_session;
506 	}
507 
508 	if (!rec->no_buildid
509 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
510 		pr_err("Couldn't generate buildids. "
511 		       "Use --no-buildid to profile anyway.\n");
512 		err = -1;
513 		goto out_delete_session;
514 	}
515 
516 	rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
517 
518 	machine = &session->machines.host;
519 
520 	if (opts->pipe_output) {
521 		err = perf_event__synthesize_attrs(tool, session,
522 						   process_synthesized_event);
523 		if (err < 0) {
524 			pr_err("Couldn't synthesize attrs.\n");
525 			goto out_delete_session;
526 		}
527 
528 		err = perf_event__synthesize_event_types(tool, process_synthesized_event,
529 							 machine);
530 		if (err < 0) {
531 			pr_err("Couldn't synthesize event_types.\n");
532 			goto out_delete_session;
533 		}
534 
535 		if (have_tracepoints(&evsel_list->entries)) {
536 			/*
537 			 * FIXME err <= 0 here actually means that
538 			 * there were no tracepoints so its not really
539 			 * an error, just that we don't need to
540 			 * synthesize anything.  We really have to
541 			 * return this more properly and also
542 			 * propagate errors that now are calling die()
543 			 */
544 			err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
545 								  process_synthesized_event);
546 			if (err <= 0) {
547 				pr_err("Couldn't record tracing data.\n");
548 				goto out_delete_session;
549 			}
550 			advance_output(rec, err);
551 		}
552 	}
553 
554 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
555 						 machine, "_text");
556 	if (err < 0)
557 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
558 							 machine, "_stext");
559 	if (err < 0)
560 		pr_err("Couldn't record kernel reference relocation symbol\n"
561 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
562 		       "Check /proc/kallsyms permission or run as root.\n");
563 
564 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
565 					     machine);
566 	if (err < 0)
567 		pr_err("Couldn't record kernel module information.\n"
568 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
569 		       "Check /proc/modules permission or run as root.\n");
570 
571 	if (perf_guest) {
572 		machines__process_guests(&session->machines,
573 					 perf_event__synthesize_guest_os, tool);
574 	}
575 
576 	if (perf_target__has_task(&opts->target))
577 		err = perf_event__synthesize_thread_map(tool, evsel_list->threads,
578 						  process_synthesized_event,
579 						  machine);
580 	else if (perf_target__has_cpu(&opts->target))
581 		err = perf_event__synthesize_threads(tool, process_synthesized_event,
582 					       machine);
583 	else /* command specified */
584 		err = 0;
585 
586 	if (err != 0)
587 		goto out_delete_session;
588 
589 	if (rec->realtime_prio) {
590 		struct sched_param param;
591 
592 		param.sched_priority = rec->realtime_prio;
593 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
594 			pr_err("Could not set realtime priority.\n");
595 			err = -1;
596 			goto out_delete_session;
597 		}
598 	}
599 
600 	/*
601 	 * When perf is starting the traced process, all the events
602 	 * (apart from group members) have enable_on_exec=1 set,
603 	 * so don't spoil it by prematurely enabling them.
604 	 */
605 	if (!perf_target__none(&opts->target))
606 		perf_evlist__enable(evsel_list);
607 
608 	/*
609 	 * Let the child rip
610 	 */
611 	if (forks)
612 		perf_evlist__start_workload(evsel_list);
613 
614 	for (;;) {
615 		int hits = rec->samples;
616 
617 		if (perf_record__mmap_read_all(rec) < 0) {
618 			err = -1;
619 			goto out_delete_session;
620 		}
621 
622 		if (hits == rec->samples) {
623 			if (done)
624 				break;
625 			err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
626 			waking++;
627 		}
628 
629 		/*
630 		 * When perf is starting the traced process, at the end events
631 		 * die with the process and we wait for that. Thus no need to
632 		 * disable events in this case.
633 		 */
634 		if (done && !disabled && !perf_target__none(&opts->target)) {
635 			perf_evlist__disable(evsel_list);
636 			disabled = true;
637 		}
638 	}
639 
640 	if (quiet || signr == SIGUSR1)
641 		return 0;
642 
643 	fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
644 
645 	/*
646 	 * Approximate RIP event size: 24 bytes.
647 	 */
648 	fprintf(stderr,
649 		"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
650 		(double)rec->bytes_written / 1024.0 / 1024.0,
651 		output_name,
652 		rec->bytes_written / 24);
653 
654 	return 0;
655 
656 out_delete_session:
657 	perf_session__delete(session);
658 	return err;
659 }
660 
661 #define BRANCH_OPT(n, m) \
662 	{ .name = n, .mode = (m) }
663 
664 #define BRANCH_END { .name = NULL }
665 
666 struct branch_mode {
667 	const char *name;
668 	int mode;
669 };
670 
671 static const struct branch_mode branch_modes[] = {
672 	BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
673 	BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
674 	BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
675 	BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
676 	BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
677 	BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
678 	BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
679 	BRANCH_END
680 };
681 
682 static int
683 parse_branch_stack(const struct option *opt, const char *str, int unset)
684 {
685 #define ONLY_PLM \
686 	(PERF_SAMPLE_BRANCH_USER	|\
687 	 PERF_SAMPLE_BRANCH_KERNEL	|\
688 	 PERF_SAMPLE_BRANCH_HV)
689 
690 	uint64_t *mode = (uint64_t *)opt->value;
691 	const struct branch_mode *br;
692 	char *s, *os = NULL, *p;
693 	int ret = -1;
694 
695 	if (unset)
696 		return 0;
697 
698 	/*
699 	 * cannot set it twice, -b + --branch-filter for instance
700 	 */
701 	if (*mode)
702 		return -1;
703 
704 	/* str may be NULL in case no arg is passed to -b */
705 	if (str) {
706 		/* because str is read-only */
707 		s = os = strdup(str);
708 		if (!s)
709 			return -1;
710 
711 		for (;;) {
712 			p = strchr(s, ',');
713 			if (p)
714 				*p = '\0';
715 
716 			for (br = branch_modes; br->name; br++) {
717 				if (!strcasecmp(s, br->name))
718 					break;
719 			}
720 			if (!br->name) {
721 				ui__warning("unknown branch filter %s,"
722 					    " check man page\n", s);
723 				goto error;
724 			}
725 
726 			*mode |= br->mode;
727 
728 			if (!p)
729 				break;
730 
731 			s = p + 1;
732 		}
733 	}
734 	ret = 0;
735 
736 	/* default to any branch */
737 	if ((*mode & ~ONLY_PLM) == 0) {
738 		*mode = PERF_SAMPLE_BRANCH_ANY;
739 	}
740 error:
741 	free(os);
742 	return ret;
743 }
744 
745 #ifdef LIBUNWIND_SUPPORT
746 static int get_stack_size(char *str, unsigned long *_size)
747 {
748 	char *endptr;
749 	unsigned long size;
750 	unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
751 
752 	size = strtoul(str, &endptr, 0);
753 
754 	do {
755 		if (*endptr)
756 			break;
757 
758 		size = round_up(size, sizeof(u64));
759 		if (!size || size > max_size)
760 			break;
761 
762 		*_size = size;
763 		return 0;
764 
765 	} while (0);
766 
767 	pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
768 	       max_size, str);
769 	return -1;
770 }
771 #endif /* LIBUNWIND_SUPPORT */
772 
773 int record_parse_callchain_opt(const struct option *opt,
774 			       const char *arg, int unset)
775 {
776 	struct perf_record_opts *opts = opt->value;
777 	char *tok, *name, *saveptr = NULL;
778 	char *buf;
779 	int ret = -1;
780 
781 	/* --no-call-graph */
782 	if (unset)
783 		return 0;
784 
785 	/* We specified default option if none is provided. */
786 	BUG_ON(!arg);
787 
788 	/* We need buffer that we know we can write to. */
789 	buf = malloc(strlen(arg) + 1);
790 	if (!buf)
791 		return -ENOMEM;
792 
793 	strcpy(buf, arg);
794 
795 	tok = strtok_r((char *)buf, ",", &saveptr);
796 	name = tok ? : (char *)buf;
797 
798 	do {
799 		/* Framepointer style */
800 		if (!strncmp(name, "fp", sizeof("fp"))) {
801 			if (!strtok_r(NULL, ",", &saveptr)) {
802 				opts->call_graph = CALLCHAIN_FP;
803 				ret = 0;
804 			} else
805 				pr_err("callchain: No more arguments "
806 				       "needed for -g fp\n");
807 			break;
808 
809 #ifdef LIBUNWIND_SUPPORT
810 		/* Dwarf style */
811 		} else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
812 			const unsigned long default_stack_dump_size = 8192;
813 
814 			ret = 0;
815 			opts->call_graph = CALLCHAIN_DWARF;
816 			opts->stack_dump_size = default_stack_dump_size;
817 
818 			tok = strtok_r(NULL, ",", &saveptr);
819 			if (tok) {
820 				unsigned long size = 0;
821 
822 				ret = get_stack_size(tok, &size);
823 				opts->stack_dump_size = size;
824 			}
825 
826 			if (!ret)
827 				pr_debug("callchain: stack dump size %d\n",
828 					 opts->stack_dump_size);
829 #endif /* LIBUNWIND_SUPPORT */
830 		} else {
831 			pr_err("callchain: Unknown -g option "
832 			       "value: %s\n", arg);
833 			break;
834 		}
835 
836 	} while (0);
837 
838 	free(buf);
839 
840 	if (!ret)
841 		pr_debug("callchain: type %d\n", opts->call_graph);
842 
843 	return ret;
844 }
845 
846 static const char * const record_usage[] = {
847 	"perf record [<options>] [<command>]",
848 	"perf record [<options>] -- <command> [<options>]",
849 	NULL
850 };
851 
852 /*
853  * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
854  * because we need to have access to it in perf_record__exit, that is called
855  * after cmd_record() exits, but since record_options need to be accessible to
856  * builtin-script, leave it here.
857  *
858  * At least we don't ouch it in all the other functions here directly.
859  *
860  * Just say no to tons of global variables, sigh.
861  */
862 static struct perf_record record = {
863 	.opts = {
864 		.mmap_pages	     = UINT_MAX,
865 		.user_freq	     = UINT_MAX,
866 		.user_interval	     = ULLONG_MAX,
867 		.freq		     = 4000,
868 		.target		     = {
869 			.uses_mmap   = true,
870 		},
871 	},
872 	.write_mode = WRITE_FORCE,
873 	.file_new   = true,
874 };
875 
876 #define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
877 
878 #ifdef LIBUNWIND_SUPPORT
879 const char record_callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
880 #else
881 const char record_callchain_help[] = CALLCHAIN_HELP "[fp]";
882 #endif
883 
884 /*
885  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
886  * with it and switch to use the library functions in perf_evlist that came
887  * from builtin-record.c, i.e. use perf_record_opts,
888  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
889  * using pipes, etc.
890  */
891 const struct option record_options[] = {
892 	OPT_CALLBACK('e', "event", &record.evlist, "event",
893 		     "event selector. use 'perf list' to list available events",
894 		     parse_events_option),
895 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
896 		     "event filter", parse_filter),
897 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
898 		    "record events on existing process id"),
899 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
900 		    "record events on existing thread id"),
901 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
902 		    "collect data with this RT SCHED_FIFO priority"),
903 	OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
904 		    "collect data without buffering"),
905 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
906 		    "collect raw sample records from all opened counters"),
907 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
908 			    "system-wide collection from all CPUs"),
909 	OPT_BOOLEAN('A', "append", &record.append_file,
910 			    "append to the output file to do incremental profiling"),
911 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
912 		    "list of cpus to monitor"),
913 	OPT_BOOLEAN('f', "force", &record.force,
914 			"overwrite existing data file (deprecated)"),
915 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
916 	OPT_STRING('o', "output", &record.output_name, "file",
917 		    "output file name"),
918 	OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
919 		    "child tasks do not inherit counters"),
920 	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
921 	OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
922 		     "number of mmap data pages"),
923 	OPT_BOOLEAN(0, "group", &record.opts.group,
924 		    "put the counters into a counter group"),
925 	OPT_CALLBACK_DEFAULT('g', "call-graph", &record.opts,
926 			     "mode[,dump_size]", record_callchain_help,
927 			     &record_parse_callchain_opt, "fp"),
928 	OPT_INCR('v', "verbose", &verbose,
929 		    "be more verbose (show counter open errors, etc)"),
930 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
931 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
932 		    "per thread counts"),
933 	OPT_BOOLEAN('d', "data", &record.opts.sample_address,
934 		    "Sample addresses"),
935 	OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
936 	OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
937 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
938 		    "don't sample"),
939 	OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
940 		    "do not update the buildid cache"),
941 	OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
942 		    "do not collect buildids in perf.data"),
943 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
944 		     "monitor event in cgroup name only",
945 		     parse_cgroups),
946 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
947 		   "user to profile"),
948 
949 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
950 		     "branch any", "sample any taken branches",
951 		     parse_branch_stack),
952 
953 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
954 		     "branch filter mask", "branch stack filter modes",
955 		     parse_branch_stack),
956 	OPT_END()
957 };
958 
959 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
960 {
961 	int err = -ENOMEM;
962 	struct perf_evsel *pos;
963 	struct perf_evlist *evsel_list;
964 	struct perf_record *rec = &record;
965 	char errbuf[BUFSIZ];
966 
967 	evsel_list = perf_evlist__new(NULL, NULL);
968 	if (evsel_list == NULL)
969 		return -ENOMEM;
970 
971 	rec->evlist = evsel_list;
972 
973 	argc = parse_options(argc, argv, record_options, record_usage,
974 			    PARSE_OPT_STOP_AT_NON_OPTION);
975 	if (!argc && perf_target__none(&rec->opts.target))
976 		usage_with_options(record_usage, record_options);
977 
978 	if (rec->force && rec->append_file) {
979 		ui__error("Can't overwrite and append at the same time."
980 			  " You need to choose between -f and -A");
981 		usage_with_options(record_usage, record_options);
982 	} else if (rec->append_file) {
983 		rec->write_mode = WRITE_APPEND;
984 	} else {
985 		rec->write_mode = WRITE_FORCE;
986 	}
987 
988 	if (nr_cgroups && !rec->opts.target.system_wide) {
989 		ui__error("cgroup monitoring only available in"
990 			  " system-wide mode\n");
991 		usage_with_options(record_usage, record_options);
992 	}
993 
994 	symbol__init();
995 
996 	if (symbol_conf.kptr_restrict)
997 		pr_warning(
998 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
999 "check /proc/sys/kernel/kptr_restrict.\n\n"
1000 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1001 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1002 "Samples in kernel modules won't be resolved at all.\n\n"
1003 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1004 "even with a suitable vmlinux or kallsyms file.\n\n");
1005 
1006 	if (rec->no_buildid_cache || rec->no_buildid)
1007 		disable_buildid_cache();
1008 
1009 	if (evsel_list->nr_entries == 0 &&
1010 	    perf_evlist__add_default(evsel_list) < 0) {
1011 		pr_err("Not enough memory for event selector list\n");
1012 		goto out_symbol_exit;
1013 	}
1014 
1015 	err = perf_target__validate(&rec->opts.target);
1016 	if (err) {
1017 		perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1018 		ui__warning("%s", errbuf);
1019 	}
1020 
1021 	err = perf_target__parse_uid(&rec->opts.target);
1022 	if (err) {
1023 		int saved_errno = errno;
1024 
1025 		perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1026 		ui__error("%s", errbuf);
1027 
1028 		err = -saved_errno;
1029 		goto out_free_fd;
1030 	}
1031 
1032 	err = -ENOMEM;
1033 	if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
1034 		usage_with_options(record_usage, record_options);
1035 
1036 	list_for_each_entry(pos, &evsel_list->entries, node) {
1037 		if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos)))
1038 			goto out_free_fd;
1039 	}
1040 
1041 	if (rec->opts.user_interval != ULLONG_MAX)
1042 		rec->opts.default_interval = rec->opts.user_interval;
1043 	if (rec->opts.user_freq != UINT_MAX)
1044 		rec->opts.freq = rec->opts.user_freq;
1045 
1046 	/*
1047 	 * User specified count overrides default frequency.
1048 	 */
1049 	if (rec->opts.default_interval)
1050 		rec->opts.freq = 0;
1051 	else if (rec->opts.freq) {
1052 		rec->opts.default_interval = rec->opts.freq;
1053 	} else {
1054 		ui__error("frequency and count are zero, aborting\n");
1055 		err = -EINVAL;
1056 		goto out_free_fd;
1057 	}
1058 
1059 	err = __cmd_record(&record, argc, argv);
1060 out_free_fd:
1061 	perf_evlist__delete_maps(evsel_list);
1062 out_symbol_exit:
1063 	symbol__exit();
1064 	return err;
1065 }
1066