xref: /linux/tools/perf/builtin-record.c (revision db8fd07a541fc2d5e8076f0151286e19591465b3)
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #define _FILE_OFFSET_BITS 64
9 
10 #include "builtin.h"
11 
12 #include "perf.h"
13 
14 #include "util/build-id.h"
15 #include "util/util.h"
16 #include "util/parse-options.h"
17 #include "util/parse-events.h"
18 
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/symbol.h"
27 #include "util/cpumap.h"
28 #include "util/thread_map.h"
29 
30 #include <unistd.h>
31 #include <sched.h>
32 #include <sys/mman.h>
33 
34 #ifndef HAVE_ON_EXIT
35 #ifndef ATEXIT_MAX
36 #define ATEXIT_MAX 32
37 #endif
38 static int __on_exit_count = 0;
39 typedef void (*on_exit_func_t) (int, void *);
40 static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
41 static void *__on_exit_args[ATEXIT_MAX];
42 static int __exitcode = 0;
43 static void __handle_on_exit_funcs(void);
44 static int on_exit(on_exit_func_t function, void *arg);
45 #define exit(x) (exit)(__exitcode = (x))
46 
47 static int on_exit(on_exit_func_t function, void *arg)
48 {
49 	if (__on_exit_count == ATEXIT_MAX)
50 		return -ENOMEM;
51 	else if (__on_exit_count == 0)
52 		atexit(__handle_on_exit_funcs);
53 	__on_exit_funcs[__on_exit_count] = function;
54 	__on_exit_args[__on_exit_count++] = arg;
55 	return 0;
56 }
57 
58 static void __handle_on_exit_funcs(void)
59 {
60 	int i;
61 	for (i = 0; i < __on_exit_count; i++)
62 		__on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
63 }
64 #endif
65 
66 enum write_mode_t {
67 	WRITE_FORCE,
68 	WRITE_APPEND
69 };
70 
71 struct perf_record {
72 	struct perf_tool	tool;
73 	struct perf_record_opts	opts;
74 	u64			bytes_written;
75 	const char		*output_name;
76 	struct perf_evlist	*evlist;
77 	struct perf_session	*session;
78 	const char		*progname;
79 	int			output;
80 	unsigned int		page_size;
81 	int			realtime_prio;
82 	enum write_mode_t	write_mode;
83 	bool			no_buildid;
84 	bool			no_buildid_cache;
85 	bool			force;
86 	bool			file_new;
87 	bool			append_file;
88 	long			samples;
89 	off_t			post_processing_offset;
90 };
91 
92 static void advance_output(struct perf_record *rec, size_t size)
93 {
94 	rec->bytes_written += size;
95 }
96 
97 static int write_output(struct perf_record *rec, void *buf, size_t size)
98 {
99 	while (size) {
100 		int ret = write(rec->output, buf, size);
101 
102 		if (ret < 0) {
103 			pr_err("failed to write\n");
104 			return -1;
105 		}
106 
107 		size -= ret;
108 		buf += ret;
109 
110 		rec->bytes_written += ret;
111 	}
112 
113 	return 0;
114 }
115 
116 static int process_synthesized_event(struct perf_tool *tool,
117 				     union perf_event *event,
118 				     struct perf_sample *sample __maybe_unused,
119 				     struct machine *machine __maybe_unused)
120 {
121 	struct perf_record *rec = container_of(tool, struct perf_record, tool);
122 	if (write_output(rec, event, event->header.size) < 0)
123 		return -1;
124 
125 	return 0;
126 }
127 
128 static int perf_record__mmap_read(struct perf_record *rec,
129 				   struct perf_mmap *md)
130 {
131 	unsigned int head = perf_mmap__read_head(md);
132 	unsigned int old = md->prev;
133 	unsigned char *data = md->base + rec->page_size;
134 	unsigned long size;
135 	void *buf;
136 	int rc = 0;
137 
138 	if (old == head)
139 		return 0;
140 
141 	rec->samples++;
142 
143 	size = head - old;
144 
145 	if ((old & md->mask) + size != (head & md->mask)) {
146 		buf = &data[old & md->mask];
147 		size = md->mask + 1 - (old & md->mask);
148 		old += size;
149 
150 		if (write_output(rec, buf, size) < 0) {
151 			rc = -1;
152 			goto out;
153 		}
154 	}
155 
156 	buf = &data[old & md->mask];
157 	size = head - old;
158 	old += size;
159 
160 	if (write_output(rec, buf, size) < 0) {
161 		rc = -1;
162 		goto out;
163 	}
164 
165 	md->prev = old;
166 	perf_mmap__write_tail(md, old);
167 
168 out:
169 	return rc;
170 }
171 
172 static volatile int done = 0;
173 static volatile int signr = -1;
174 static volatile int child_finished = 0;
175 
176 static void sig_handler(int sig)
177 {
178 	if (sig == SIGCHLD)
179 		child_finished = 1;
180 
181 	done = 1;
182 	signr = sig;
183 }
184 
185 static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg)
186 {
187 	struct perf_record *rec = arg;
188 	int status;
189 
190 	if (rec->evlist->workload.pid > 0) {
191 		if (!child_finished)
192 			kill(rec->evlist->workload.pid, SIGTERM);
193 
194 		wait(&status);
195 		if (WIFSIGNALED(status))
196 			psignal(WTERMSIG(status), rec->progname);
197 	}
198 
199 	if (signr == -1 || signr == SIGUSR1)
200 		return;
201 
202 	signal(signr, SIG_DFL);
203 	kill(getpid(), signr);
204 }
205 
206 static bool perf_evlist__equal(struct perf_evlist *evlist,
207 			       struct perf_evlist *other)
208 {
209 	struct perf_evsel *pos, *pair;
210 
211 	if (evlist->nr_entries != other->nr_entries)
212 		return false;
213 
214 	pair = perf_evlist__first(other);
215 
216 	list_for_each_entry(pos, &evlist->entries, node) {
217 		if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
218 			return false;
219 		pair = perf_evsel__next(pair);
220 	}
221 
222 	return true;
223 }
224 
225 static int perf_record__open(struct perf_record *rec)
226 {
227 	char msg[512];
228 	struct perf_evsel *pos;
229 	struct perf_evlist *evlist = rec->evlist;
230 	struct perf_session *session = rec->session;
231 	struct perf_record_opts *opts = &rec->opts;
232 	int rc = 0;
233 
234 	perf_evlist__config(evlist, opts);
235 
236 	list_for_each_entry(pos, &evlist->entries, node) {
237 try_again:
238 		if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
239 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
240 				if (verbose)
241 					ui__warning("%s\n", msg);
242 				goto try_again;
243 			}
244 
245 			rc = -errno;
246 			perf_evsel__open_strerror(pos, &opts->target,
247 						  errno, msg, sizeof(msg));
248 			ui__error("%s\n", msg);
249 			goto out;
250 		}
251 	}
252 
253 	if (perf_evlist__apply_filters(evlist)) {
254 		error("failed to set filter with %d (%s)\n", errno,
255 			strerror(errno));
256 		rc = -1;
257 		goto out;
258 	}
259 
260 	if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
261 		if (errno == EPERM) {
262 			pr_err("Permission error mapping pages.\n"
263 			       "Consider increasing "
264 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
265 			       "or try again with a smaller value of -m/--mmap_pages.\n"
266 			       "(current value: %d)\n", opts->mmap_pages);
267 			rc = -errno;
268 		} else if (!is_power_of_2(opts->mmap_pages) &&
269 			   (opts->mmap_pages != UINT_MAX)) {
270 			pr_err("--mmap_pages/-m value must be a power of two.");
271 			rc = -EINVAL;
272 		} else {
273 			pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
274 			rc = -errno;
275 		}
276 		goto out;
277 	}
278 
279 	if (rec->file_new)
280 		session->evlist = evlist;
281 	else {
282 		if (!perf_evlist__equal(session->evlist, evlist)) {
283 			fprintf(stderr, "incompatible append\n");
284 			rc = -1;
285 			goto out;
286 		}
287  	}
288 
289 	perf_session__set_id_hdr_size(session);
290 out:
291 	return rc;
292 }
293 
294 static int process_buildids(struct perf_record *rec)
295 {
296 	u64 size = lseek(rec->output, 0, SEEK_CUR);
297 
298 	if (size == 0)
299 		return 0;
300 
301 	rec->session->fd = rec->output;
302 	return __perf_session__process_events(rec->session, rec->post_processing_offset,
303 					      size - rec->post_processing_offset,
304 					      size, &build_id__mark_dso_hit_ops);
305 }
306 
307 static void perf_record__exit(int status, void *arg)
308 {
309 	struct perf_record *rec = arg;
310 
311 	if (status != 0)
312 		return;
313 
314 	if (!rec->opts.pipe_output) {
315 		rec->session->header.data_size += rec->bytes_written;
316 
317 		if (!rec->no_buildid)
318 			process_buildids(rec);
319 		perf_session__write_header(rec->session, rec->evlist,
320 					   rec->output, true);
321 		perf_session__delete(rec->session);
322 		perf_evlist__delete(rec->evlist);
323 		symbol__exit();
324 	}
325 }
326 
327 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
328 {
329 	int err;
330 	struct perf_tool *tool = data;
331 	/*
332 	 *As for guest kernel when processing subcommand record&report,
333 	 *we arrange module mmap prior to guest kernel mmap and trigger
334 	 *a preload dso because default guest module symbols are loaded
335 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
336 	 *method is used to avoid symbol missing when the first addr is
337 	 *in module instead of in guest kernel.
338 	 */
339 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
340 					     machine);
341 	if (err < 0)
342 		pr_err("Couldn't record guest kernel [%d]'s reference"
343 		       " relocation symbol.\n", machine->pid);
344 
345 	/*
346 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
347 	 * have no _text sometimes.
348 	 */
349 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
350 						 machine, "_text");
351 	if (err < 0)
352 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
353 							 machine, "_stext");
354 	if (err < 0)
355 		pr_err("Couldn't record guest kernel [%d]'s reference"
356 		       " relocation symbol.\n", machine->pid);
357 }
358 
359 static struct perf_event_header finished_round_event = {
360 	.size = sizeof(struct perf_event_header),
361 	.type = PERF_RECORD_FINISHED_ROUND,
362 };
363 
364 static int perf_record__mmap_read_all(struct perf_record *rec)
365 {
366 	int i;
367 	int rc = 0;
368 
369 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
370 		if (rec->evlist->mmap[i].base) {
371 			if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
372 				rc = -1;
373 				goto out;
374 			}
375 		}
376 	}
377 
378 	if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
379 		rc = write_output(rec, &finished_round_event,
380 				  sizeof(finished_round_event));
381 
382 out:
383 	return rc;
384 }
385 
386 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
387 {
388 	struct stat st;
389 	int flags;
390 	int err, output, feat;
391 	unsigned long waking = 0;
392 	const bool forks = argc > 0;
393 	struct machine *machine;
394 	struct perf_tool *tool = &rec->tool;
395 	struct perf_record_opts *opts = &rec->opts;
396 	struct perf_evlist *evsel_list = rec->evlist;
397 	const char *output_name = rec->output_name;
398 	struct perf_session *session;
399 	bool disabled = false;
400 
401 	rec->progname = argv[0];
402 
403 	rec->page_size = sysconf(_SC_PAGE_SIZE);
404 
405 	on_exit(perf_record__sig_exit, rec);
406 	signal(SIGCHLD, sig_handler);
407 	signal(SIGINT, sig_handler);
408 	signal(SIGUSR1, sig_handler);
409 
410 	if (!output_name) {
411 		if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
412 			opts->pipe_output = true;
413 		else
414 			rec->output_name = output_name = "perf.data";
415 	}
416 	if (output_name) {
417 		if (!strcmp(output_name, "-"))
418 			opts->pipe_output = true;
419 		else if (!stat(output_name, &st) && st.st_size) {
420 			if (rec->write_mode == WRITE_FORCE) {
421 				char oldname[PATH_MAX];
422 				snprintf(oldname, sizeof(oldname), "%s.old",
423 					 output_name);
424 				unlink(oldname);
425 				rename(output_name, oldname);
426 			}
427 		} else if (rec->write_mode == WRITE_APPEND) {
428 			rec->write_mode = WRITE_FORCE;
429 		}
430 	}
431 
432 	flags = O_CREAT|O_RDWR;
433 	if (rec->write_mode == WRITE_APPEND)
434 		rec->file_new = 0;
435 	else
436 		flags |= O_TRUNC;
437 
438 	if (opts->pipe_output)
439 		output = STDOUT_FILENO;
440 	else
441 		output = open(output_name, flags, S_IRUSR | S_IWUSR);
442 	if (output < 0) {
443 		perror("failed to create output file");
444 		return -1;
445 	}
446 
447 	rec->output = output;
448 
449 	session = perf_session__new(output_name, O_WRONLY,
450 				    rec->write_mode == WRITE_FORCE, false, NULL);
451 	if (session == NULL) {
452 		pr_err("Not enough memory for reading perf file header\n");
453 		return -1;
454 	}
455 
456 	rec->session = session;
457 
458 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
459 		perf_header__set_feat(&session->header, feat);
460 
461 	if (rec->no_buildid)
462 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
463 
464 	if (!have_tracepoints(&evsel_list->entries))
465 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
466 
467 	if (!rec->opts.branch_stack)
468 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
469 
470 	if (!rec->file_new) {
471 		err = perf_session__read_header(session, output);
472 		if (err < 0)
473 			goto out_delete_session;
474 	}
475 
476 	if (forks) {
477 		err = perf_evlist__prepare_workload(evsel_list, &opts->target,
478 						    argv, opts->pipe_output,
479 						    true);
480 		if (err < 0) {
481 			pr_err("Couldn't run the workload!\n");
482 			goto out_delete_session;
483 		}
484 	}
485 
486 	if (perf_record__open(rec) != 0) {
487 		err = -1;
488 		goto out_delete_session;
489 	}
490 
491 	if (!evsel_list->nr_groups)
492 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
493 
494 	/*
495 	 * perf_session__delete(session) will be called at perf_record__exit()
496 	 */
497 	on_exit(perf_record__exit, rec);
498 
499 	if (opts->pipe_output) {
500 		err = perf_header__write_pipe(output);
501 		if (err < 0)
502 			goto out_delete_session;
503 	} else if (rec->file_new) {
504 		err = perf_session__write_header(session, evsel_list,
505 						 output, false);
506 		if (err < 0)
507 			goto out_delete_session;
508 	}
509 
510 	if (!rec->no_buildid
511 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
512 		pr_err("Couldn't generate buildids. "
513 		       "Use --no-buildid to profile anyway.\n");
514 		err = -1;
515 		goto out_delete_session;
516 	}
517 
518 	rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
519 
520 	machine = &session->machines.host;
521 
522 	if (opts->pipe_output) {
523 		err = perf_event__synthesize_attrs(tool, session,
524 						   process_synthesized_event);
525 		if (err < 0) {
526 			pr_err("Couldn't synthesize attrs.\n");
527 			goto out_delete_session;
528 		}
529 
530 		err = perf_event__synthesize_event_types(tool, process_synthesized_event,
531 							 machine);
532 		if (err < 0) {
533 			pr_err("Couldn't synthesize event_types.\n");
534 			goto out_delete_session;
535 		}
536 
537 		if (have_tracepoints(&evsel_list->entries)) {
538 			/*
539 			 * FIXME err <= 0 here actually means that
540 			 * there were no tracepoints so its not really
541 			 * an error, just that we don't need to
542 			 * synthesize anything.  We really have to
543 			 * return this more properly and also
544 			 * propagate errors that now are calling die()
545 			 */
546 			err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
547 								  process_synthesized_event);
548 			if (err <= 0) {
549 				pr_err("Couldn't record tracing data.\n");
550 				goto out_delete_session;
551 			}
552 			advance_output(rec, err);
553 		}
554 	}
555 
556 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
557 						 machine, "_text");
558 	if (err < 0)
559 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
560 							 machine, "_stext");
561 	if (err < 0)
562 		pr_err("Couldn't record kernel reference relocation symbol\n"
563 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
564 		       "Check /proc/kallsyms permission or run as root.\n");
565 
566 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
567 					     machine);
568 	if (err < 0)
569 		pr_err("Couldn't record kernel module information.\n"
570 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
571 		       "Check /proc/modules permission or run as root.\n");
572 
573 	if (perf_guest) {
574 		machines__process_guests(&session->machines,
575 					 perf_event__synthesize_guest_os, tool);
576 	}
577 
578 	if (perf_target__has_task(&opts->target))
579 		err = perf_event__synthesize_thread_map(tool, evsel_list->threads,
580 						  process_synthesized_event,
581 						  machine);
582 	else if (perf_target__has_cpu(&opts->target))
583 		err = perf_event__synthesize_threads(tool, process_synthesized_event,
584 					       machine);
585 	else /* command specified */
586 		err = 0;
587 
588 	if (err != 0)
589 		goto out_delete_session;
590 
591 	if (rec->realtime_prio) {
592 		struct sched_param param;
593 
594 		param.sched_priority = rec->realtime_prio;
595 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
596 			pr_err("Could not set realtime priority.\n");
597 			err = -1;
598 			goto out_delete_session;
599 		}
600 	}
601 
602 	/*
603 	 * When perf is starting the traced process, all the events
604 	 * (apart from group members) have enable_on_exec=1 set,
605 	 * so don't spoil it by prematurely enabling them.
606 	 */
607 	if (!perf_target__none(&opts->target))
608 		perf_evlist__enable(evsel_list);
609 
610 	/*
611 	 * Let the child rip
612 	 */
613 	if (forks)
614 		perf_evlist__start_workload(evsel_list);
615 
616 	for (;;) {
617 		int hits = rec->samples;
618 
619 		if (perf_record__mmap_read_all(rec) < 0) {
620 			err = -1;
621 			goto out_delete_session;
622 		}
623 
624 		if (hits == rec->samples) {
625 			if (done)
626 				break;
627 			err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
628 			waking++;
629 		}
630 
631 		/*
632 		 * When perf is starting the traced process, at the end events
633 		 * die with the process and we wait for that. Thus no need to
634 		 * disable events in this case.
635 		 */
636 		if (done && !disabled && !perf_target__none(&opts->target)) {
637 			perf_evlist__disable(evsel_list);
638 			disabled = true;
639 		}
640 	}
641 
642 	if (quiet || signr == SIGUSR1)
643 		return 0;
644 
645 	fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
646 
647 	/*
648 	 * Approximate RIP event size: 24 bytes.
649 	 */
650 	fprintf(stderr,
651 		"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
652 		(double)rec->bytes_written / 1024.0 / 1024.0,
653 		output_name,
654 		rec->bytes_written / 24);
655 
656 	return 0;
657 
658 out_delete_session:
659 	perf_session__delete(session);
660 	return err;
661 }
662 
663 #define BRANCH_OPT(n, m) \
664 	{ .name = n, .mode = (m) }
665 
666 #define BRANCH_END { .name = NULL }
667 
668 struct branch_mode {
669 	const char *name;
670 	int mode;
671 };
672 
673 static const struct branch_mode branch_modes[] = {
674 	BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
675 	BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
676 	BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
677 	BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
678 	BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
679 	BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
680 	BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
681 	BRANCH_END
682 };
683 
684 static int
685 parse_branch_stack(const struct option *opt, const char *str, int unset)
686 {
687 #define ONLY_PLM \
688 	(PERF_SAMPLE_BRANCH_USER	|\
689 	 PERF_SAMPLE_BRANCH_KERNEL	|\
690 	 PERF_SAMPLE_BRANCH_HV)
691 
692 	uint64_t *mode = (uint64_t *)opt->value;
693 	const struct branch_mode *br;
694 	char *s, *os = NULL, *p;
695 	int ret = -1;
696 
697 	if (unset)
698 		return 0;
699 
700 	/*
701 	 * cannot set it twice, -b + --branch-filter for instance
702 	 */
703 	if (*mode)
704 		return -1;
705 
706 	/* str may be NULL in case no arg is passed to -b */
707 	if (str) {
708 		/* because str is read-only */
709 		s = os = strdup(str);
710 		if (!s)
711 			return -1;
712 
713 		for (;;) {
714 			p = strchr(s, ',');
715 			if (p)
716 				*p = '\0';
717 
718 			for (br = branch_modes; br->name; br++) {
719 				if (!strcasecmp(s, br->name))
720 					break;
721 			}
722 			if (!br->name) {
723 				ui__warning("unknown branch filter %s,"
724 					    " check man page\n", s);
725 				goto error;
726 			}
727 
728 			*mode |= br->mode;
729 
730 			if (!p)
731 				break;
732 
733 			s = p + 1;
734 		}
735 	}
736 	ret = 0;
737 
738 	/* default to any branch */
739 	if ((*mode & ~ONLY_PLM) == 0) {
740 		*mode = PERF_SAMPLE_BRANCH_ANY;
741 	}
742 error:
743 	free(os);
744 	return ret;
745 }
746 
747 #ifdef LIBUNWIND_SUPPORT
748 static int get_stack_size(char *str, unsigned long *_size)
749 {
750 	char *endptr;
751 	unsigned long size;
752 	unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
753 
754 	size = strtoul(str, &endptr, 0);
755 
756 	do {
757 		if (*endptr)
758 			break;
759 
760 		size = round_up(size, sizeof(u64));
761 		if (!size || size > max_size)
762 			break;
763 
764 		*_size = size;
765 		return 0;
766 
767 	} while (0);
768 
769 	pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
770 	       max_size, str);
771 	return -1;
772 }
773 #endif /* LIBUNWIND_SUPPORT */
774 
775 int record_parse_callchain_opt(const struct option *opt,
776 			       const char *arg, int unset)
777 {
778 	struct perf_record_opts *opts = opt->value;
779 	char *tok, *name, *saveptr = NULL;
780 	char *buf;
781 	int ret = -1;
782 
783 	/* --no-call-graph */
784 	if (unset)
785 		return 0;
786 
787 	/* We specified default option if none is provided. */
788 	BUG_ON(!arg);
789 
790 	/* We need buffer that we know we can write to. */
791 	buf = malloc(strlen(arg) + 1);
792 	if (!buf)
793 		return -ENOMEM;
794 
795 	strcpy(buf, arg);
796 
797 	tok = strtok_r((char *)buf, ",", &saveptr);
798 	name = tok ? : (char *)buf;
799 
800 	do {
801 		/* Framepointer style */
802 		if (!strncmp(name, "fp", sizeof("fp"))) {
803 			if (!strtok_r(NULL, ",", &saveptr)) {
804 				opts->call_graph = CALLCHAIN_FP;
805 				ret = 0;
806 			} else
807 				pr_err("callchain: No more arguments "
808 				       "needed for -g fp\n");
809 			break;
810 
811 #ifdef LIBUNWIND_SUPPORT
812 		/* Dwarf style */
813 		} else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
814 			const unsigned long default_stack_dump_size = 8192;
815 
816 			ret = 0;
817 			opts->call_graph = CALLCHAIN_DWARF;
818 			opts->stack_dump_size = default_stack_dump_size;
819 
820 			tok = strtok_r(NULL, ",", &saveptr);
821 			if (tok) {
822 				unsigned long size = 0;
823 
824 				ret = get_stack_size(tok, &size);
825 				opts->stack_dump_size = size;
826 			}
827 
828 			if (!ret)
829 				pr_debug("callchain: stack dump size %d\n",
830 					 opts->stack_dump_size);
831 #endif /* LIBUNWIND_SUPPORT */
832 		} else {
833 			pr_err("callchain: Unknown -g option "
834 			       "value: %s\n", arg);
835 			break;
836 		}
837 
838 	} while (0);
839 
840 	free(buf);
841 
842 	if (!ret)
843 		pr_debug("callchain: type %d\n", opts->call_graph);
844 
845 	return ret;
846 }
847 
848 static const char * const record_usage[] = {
849 	"perf record [<options>] [<command>]",
850 	"perf record [<options>] -- <command> [<options>]",
851 	NULL
852 };
853 
854 /*
855  * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
856  * because we need to have access to it in perf_record__exit, that is called
857  * after cmd_record() exits, but since record_options need to be accessible to
858  * builtin-script, leave it here.
859  *
860  * At least we don't ouch it in all the other functions here directly.
861  *
862  * Just say no to tons of global variables, sigh.
863  */
864 static struct perf_record record = {
865 	.opts = {
866 		.mmap_pages	     = UINT_MAX,
867 		.user_freq	     = UINT_MAX,
868 		.user_interval	     = ULLONG_MAX,
869 		.freq		     = 4000,
870 		.target		     = {
871 			.uses_mmap   = true,
872 		},
873 	},
874 	.write_mode = WRITE_FORCE,
875 	.file_new   = true,
876 };
877 
878 #define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
879 
880 #ifdef LIBUNWIND_SUPPORT
881 const char record_callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
882 #else
883 const char record_callchain_help[] = CALLCHAIN_HELP "[fp]";
884 #endif
885 
886 /*
887  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
888  * with it and switch to use the library functions in perf_evlist that came
889  * from builtin-record.c, i.e. use perf_record_opts,
890  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
891  * using pipes, etc.
892  */
893 const struct option record_options[] = {
894 	OPT_CALLBACK('e', "event", &record.evlist, "event",
895 		     "event selector. use 'perf list' to list available events",
896 		     parse_events_option),
897 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
898 		     "event filter", parse_filter),
899 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
900 		    "record events on existing process id"),
901 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
902 		    "record events on existing thread id"),
903 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
904 		    "collect data with this RT SCHED_FIFO priority"),
905 	OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
906 		    "collect data without buffering"),
907 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
908 		    "collect raw sample records from all opened counters"),
909 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
910 			    "system-wide collection from all CPUs"),
911 	OPT_BOOLEAN('A', "append", &record.append_file,
912 			    "append to the output file to do incremental profiling"),
913 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
914 		    "list of cpus to monitor"),
915 	OPT_BOOLEAN('f', "force", &record.force,
916 			"overwrite existing data file (deprecated)"),
917 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
918 	OPT_STRING('o', "output", &record.output_name, "file",
919 		    "output file name"),
920 	OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
921 		    "child tasks do not inherit counters"),
922 	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
923 	OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
924 		     "number of mmap data pages"),
925 	OPT_BOOLEAN(0, "group", &record.opts.group,
926 		    "put the counters into a counter group"),
927 	OPT_CALLBACK_DEFAULT('g', "call-graph", &record.opts,
928 			     "mode[,dump_size]", record_callchain_help,
929 			     &record_parse_callchain_opt, "fp"),
930 	OPT_INCR('v', "verbose", &verbose,
931 		    "be more verbose (show counter open errors, etc)"),
932 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
933 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
934 		    "per thread counts"),
935 	OPT_BOOLEAN('d', "data", &record.opts.sample_address,
936 		    "Sample addresses"),
937 	OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
938 	OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
939 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
940 		    "don't sample"),
941 	OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
942 		    "do not update the buildid cache"),
943 	OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
944 		    "do not collect buildids in perf.data"),
945 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
946 		     "monitor event in cgroup name only",
947 		     parse_cgroups),
948 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
949 		   "user to profile"),
950 
951 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
952 		     "branch any", "sample any taken branches",
953 		     parse_branch_stack),
954 
955 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
956 		     "branch filter mask", "branch stack filter modes",
957 		     parse_branch_stack),
958 	OPT_END()
959 };
960 
961 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
962 {
963 	int err = -ENOMEM;
964 	struct perf_evsel *pos;
965 	struct perf_evlist *evsel_list;
966 	struct perf_record *rec = &record;
967 	char errbuf[BUFSIZ];
968 
969 	evsel_list = perf_evlist__new();
970 	if (evsel_list == NULL)
971 		return -ENOMEM;
972 
973 	rec->evlist = evsel_list;
974 
975 	argc = parse_options(argc, argv, record_options, record_usage,
976 			    PARSE_OPT_STOP_AT_NON_OPTION);
977 	if (!argc && perf_target__none(&rec->opts.target))
978 		usage_with_options(record_usage, record_options);
979 
980 	if (rec->force && rec->append_file) {
981 		ui__error("Can't overwrite and append at the same time."
982 			  " You need to choose between -f and -A");
983 		usage_with_options(record_usage, record_options);
984 	} else if (rec->append_file) {
985 		rec->write_mode = WRITE_APPEND;
986 	} else {
987 		rec->write_mode = WRITE_FORCE;
988 	}
989 
990 	if (nr_cgroups && !rec->opts.target.system_wide) {
991 		ui__error("cgroup monitoring only available in"
992 			  " system-wide mode\n");
993 		usage_with_options(record_usage, record_options);
994 	}
995 
996 	symbol__init();
997 
998 	if (symbol_conf.kptr_restrict)
999 		pr_warning(
1000 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1001 "check /proc/sys/kernel/kptr_restrict.\n\n"
1002 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1003 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1004 "Samples in kernel modules won't be resolved at all.\n\n"
1005 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1006 "even with a suitable vmlinux or kallsyms file.\n\n");
1007 
1008 	if (rec->no_buildid_cache || rec->no_buildid)
1009 		disable_buildid_cache();
1010 
1011 	if (evsel_list->nr_entries == 0 &&
1012 	    perf_evlist__add_default(evsel_list) < 0) {
1013 		pr_err("Not enough memory for event selector list\n");
1014 		goto out_symbol_exit;
1015 	}
1016 
1017 	err = perf_target__validate(&rec->opts.target);
1018 	if (err) {
1019 		perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1020 		ui__warning("%s", errbuf);
1021 	}
1022 
1023 	err = perf_target__parse_uid(&rec->opts.target);
1024 	if (err) {
1025 		int saved_errno = errno;
1026 
1027 		perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1028 		ui__error("%s", errbuf);
1029 
1030 		err = -saved_errno;
1031 		goto out_free_fd;
1032 	}
1033 
1034 	err = -ENOMEM;
1035 	if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
1036 		usage_with_options(record_usage, record_options);
1037 
1038 	list_for_each_entry(pos, &evsel_list->entries, node) {
1039 		if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos)))
1040 			goto out_free_fd;
1041 	}
1042 
1043 	if (rec->opts.user_interval != ULLONG_MAX)
1044 		rec->opts.default_interval = rec->opts.user_interval;
1045 	if (rec->opts.user_freq != UINT_MAX)
1046 		rec->opts.freq = rec->opts.user_freq;
1047 
1048 	/*
1049 	 * User specified count overrides default frequency.
1050 	 */
1051 	if (rec->opts.default_interval)
1052 		rec->opts.freq = 0;
1053 	else if (rec->opts.freq) {
1054 		rec->opts.default_interval = rec->opts.freq;
1055 	} else {
1056 		ui__error("frequency and count are zero, aborting\n");
1057 		err = -EINVAL;
1058 		goto out_free_fd;
1059 	}
1060 
1061 	err = __cmd_record(&record, argc, argv);
1062 out_free_fd:
1063 	perf_evlist__delete_maps(evsel_list);
1064 out_symbol_exit:
1065 	symbol__exit();
1066 	return err;
1067 }
1068