xref: /linux/tools/perf/builtin-record.c (revision 90ab5ee94171b3e28de6bb42ee30b527014e0be7)
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #define _FILE_OFFSET_BITS 64
9 
10 #include "builtin.h"
11 
12 #include "perf.h"
13 
14 #include "util/build-id.h"
15 #include "util/util.h"
16 #include "util/parse-options.h"
17 #include "util/parse-events.h"
18 
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/symbol.h"
27 #include "util/cpumap.h"
28 #include "util/thread_map.h"
29 
30 #include <unistd.h>
31 #include <sched.h>
32 #include <sys/mman.h>
33 
34 enum write_mode_t {
35 	WRITE_FORCE,
36 	WRITE_APPEND
37 };
38 
39 struct perf_record {
40 	struct perf_tool	tool;
41 	struct perf_record_opts	opts;
42 	u64			bytes_written;
43 	const char		*output_name;
44 	struct perf_evlist	*evlist;
45 	struct perf_session	*session;
46 	const char		*progname;
47 	int			output;
48 	unsigned int		page_size;
49 	int			realtime_prio;
50 	enum write_mode_t	write_mode;
51 	bool			no_buildid;
52 	bool			no_buildid_cache;
53 	bool			force;
54 	bool			file_new;
55 	bool			append_file;
56 	long			samples;
57 	off_t			post_processing_offset;
58 };
59 
60 static void advance_output(struct perf_record *rec, size_t size)
61 {
62 	rec->bytes_written += size;
63 }
64 
65 static void write_output(struct perf_record *rec, void *buf, size_t size)
66 {
67 	while (size) {
68 		int ret = write(rec->output, buf, size);
69 
70 		if (ret < 0)
71 			die("failed to write");
72 
73 		size -= ret;
74 		buf += ret;
75 
76 		rec->bytes_written += ret;
77 	}
78 }
79 
80 static int process_synthesized_event(struct perf_tool *tool,
81 				     union perf_event *event,
82 				     struct perf_sample *sample __used,
83 				     struct machine *machine __used)
84 {
85 	struct perf_record *rec = container_of(tool, struct perf_record, tool);
86 	write_output(rec, event, event->header.size);
87 	return 0;
88 }
89 
90 static void perf_record__mmap_read(struct perf_record *rec,
91 				   struct perf_mmap *md)
92 {
93 	unsigned int head = perf_mmap__read_head(md);
94 	unsigned int old = md->prev;
95 	unsigned char *data = md->base + rec->page_size;
96 	unsigned long size;
97 	void *buf;
98 
99 	if (old == head)
100 		return;
101 
102 	rec->samples++;
103 
104 	size = head - old;
105 
106 	if ((old & md->mask) + size != (head & md->mask)) {
107 		buf = &data[old & md->mask];
108 		size = md->mask + 1 - (old & md->mask);
109 		old += size;
110 
111 		write_output(rec, buf, size);
112 	}
113 
114 	buf = &data[old & md->mask];
115 	size = head - old;
116 	old += size;
117 
118 	write_output(rec, buf, size);
119 
120 	md->prev = old;
121 	perf_mmap__write_tail(md, old);
122 }
123 
124 static volatile int done = 0;
125 static volatile int signr = -1;
126 static volatile int child_finished = 0;
127 
128 static void sig_handler(int sig)
129 {
130 	if (sig == SIGCHLD)
131 		child_finished = 1;
132 
133 	done = 1;
134 	signr = sig;
135 }
136 
137 static void perf_record__sig_exit(int exit_status __used, void *arg)
138 {
139 	struct perf_record *rec = arg;
140 	int status;
141 
142 	if (rec->evlist->workload.pid > 0) {
143 		if (!child_finished)
144 			kill(rec->evlist->workload.pid, SIGTERM);
145 
146 		wait(&status);
147 		if (WIFSIGNALED(status))
148 			psignal(WTERMSIG(status), rec->progname);
149 	}
150 
151 	if (signr == -1 || signr == SIGUSR1)
152 		return;
153 
154 	signal(signr, SIG_DFL);
155 	kill(getpid(), signr);
156 }
157 
158 static bool perf_evlist__equal(struct perf_evlist *evlist,
159 			       struct perf_evlist *other)
160 {
161 	struct perf_evsel *pos, *pair;
162 
163 	if (evlist->nr_entries != other->nr_entries)
164 		return false;
165 
166 	pair = list_entry(other->entries.next, struct perf_evsel, node);
167 
168 	list_for_each_entry(pos, &evlist->entries, node) {
169 		if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
170 			return false;
171 		pair = list_entry(pair->node.next, struct perf_evsel, node);
172 	}
173 
174 	return true;
175 }
176 
177 static void perf_record__open(struct perf_record *rec)
178 {
179 	struct perf_evsel *pos, *first;
180 	struct perf_evlist *evlist = rec->evlist;
181 	struct perf_session *session = rec->session;
182 	struct perf_record_opts *opts = &rec->opts;
183 
184 	first = list_entry(evlist->entries.next, struct perf_evsel, node);
185 
186 	perf_evlist__config_attrs(evlist, opts);
187 
188 	list_for_each_entry(pos, &evlist->entries, node) {
189 		struct perf_event_attr *attr = &pos->attr;
190 		struct xyarray *group_fd = NULL;
191 		/*
192 		 * Check if parse_single_tracepoint_event has already asked for
193 		 * PERF_SAMPLE_TIME.
194 		 *
195 		 * XXX this is kludgy but short term fix for problems introduced by
196 		 * eac23d1c that broke 'perf script' by having different sample_types
197 		 * when using multiple tracepoint events when we use a perf binary
198 		 * that tries to use sample_id_all on an older kernel.
199 		 *
200 		 * We need to move counter creation to perf_session, support
201 		 * different sample_types, etc.
202 		 */
203 		bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
204 
205 		if (opts->group && pos != first)
206 			group_fd = first->fd;
207 retry_sample_id:
208 		attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0;
209 try_again:
210 		if (perf_evsel__open(pos, evlist->cpus, evlist->threads,
211 				     opts->group, group_fd) < 0) {
212 			int err = errno;
213 
214 			if (err == EPERM || err == EACCES) {
215 				ui__error_paranoid();
216 				exit(EXIT_FAILURE);
217 			} else if (err ==  ENODEV && opts->cpu_list) {
218 				die("No such device - did you specify"
219 					" an out-of-range profile CPU?\n");
220 			} else if (err == EINVAL && opts->sample_id_all_avail) {
221 				/*
222 				 * Old kernel, no attr->sample_id_type_all field
223 				 */
224 				opts->sample_id_all_avail = false;
225 				if (!opts->sample_time && !opts->raw_samples && !time_needed)
226 					attr->sample_type &= ~PERF_SAMPLE_TIME;
227 
228 				goto retry_sample_id;
229 			}
230 
231 			/*
232 			 * If it's cycles then fall back to hrtimer
233 			 * based cpu-clock-tick sw counter, which
234 			 * is always available even if no PMU support:
235 			 */
236 			if (attr->type == PERF_TYPE_HARDWARE
237 					&& attr->config == PERF_COUNT_HW_CPU_CYCLES) {
238 
239 				if (verbose)
240 					ui__warning("The cycles event is not supported, "
241 						    "trying to fall back to cpu-clock-ticks\n");
242 				attr->type = PERF_TYPE_SOFTWARE;
243 				attr->config = PERF_COUNT_SW_CPU_CLOCK;
244 				goto try_again;
245 			}
246 
247 			if (err == ENOENT) {
248 				ui__warning("The %s event is not supported.\n",
249 					    event_name(pos));
250 				exit(EXIT_FAILURE);
251 			}
252 
253 			printf("\n");
254 			error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
255 			      err, strerror(err));
256 
257 #if defined(__i386__) || defined(__x86_64__)
258 			if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
259 				die("No hardware sampling interrupt available."
260 				    " No APIC? If so then you can boot the kernel"
261 				    " with the \"lapic\" boot parameter to"
262 				    " force-enable it.\n");
263 #endif
264 
265 			die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
266 		}
267 	}
268 
269 	if (perf_evlist__set_filters(evlist)) {
270 		error("failed to set filter with %d (%s)\n", errno,
271 			strerror(errno));
272 		exit(-1);
273 	}
274 
275 	if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
276 		if (errno == EPERM)
277 			die("Permission error mapping pages.\n"
278 			    "Consider increasing "
279 			    "/proc/sys/kernel/perf_event_mlock_kb,\n"
280 			    "or try again with a smaller value of -m/--mmap_pages.\n"
281 			    "(current value: %d)\n", opts->mmap_pages);
282 		else if (!is_power_of_2(opts->mmap_pages))
283 			die("--mmap_pages/-m value must be a power of two.");
284 
285 		die("failed to mmap with %d (%s)\n", errno, strerror(errno));
286 	}
287 
288 	if (rec->file_new)
289 		session->evlist = evlist;
290 	else {
291 		if (!perf_evlist__equal(session->evlist, evlist)) {
292 			fprintf(stderr, "incompatible append\n");
293 			exit(-1);
294 		}
295  	}
296 
297 	perf_session__update_sample_type(session);
298 }
299 
300 static int process_buildids(struct perf_record *rec)
301 {
302 	u64 size = lseek(rec->output, 0, SEEK_CUR);
303 
304 	if (size == 0)
305 		return 0;
306 
307 	rec->session->fd = rec->output;
308 	return __perf_session__process_events(rec->session, rec->post_processing_offset,
309 					      size - rec->post_processing_offset,
310 					      size, &build_id__mark_dso_hit_ops);
311 }
312 
313 static void perf_record__exit(int status __used, void *arg)
314 {
315 	struct perf_record *rec = arg;
316 
317 	if (!rec->opts.pipe_output) {
318 		rec->session->header.data_size += rec->bytes_written;
319 
320 		if (!rec->no_buildid)
321 			process_buildids(rec);
322 		perf_session__write_header(rec->session, rec->evlist,
323 					   rec->output, true);
324 		perf_session__delete(rec->session);
325 		perf_evlist__delete(rec->evlist);
326 		symbol__exit();
327 	}
328 }
329 
330 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
331 {
332 	int err;
333 	struct perf_tool *tool = data;
334 
335 	if (machine__is_host(machine))
336 		return;
337 
338 	/*
339 	 *As for guest kernel when processing subcommand record&report,
340 	 *we arrange module mmap prior to guest kernel mmap and trigger
341 	 *a preload dso because default guest module symbols are loaded
342 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
343 	 *method is used to avoid symbol missing when the first addr is
344 	 *in module instead of in guest kernel.
345 	 */
346 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
347 					     machine);
348 	if (err < 0)
349 		pr_err("Couldn't record guest kernel [%d]'s reference"
350 		       " relocation symbol.\n", machine->pid);
351 
352 	/*
353 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
354 	 * have no _text sometimes.
355 	 */
356 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
357 						 machine, "_text");
358 	if (err < 0)
359 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
360 							 machine, "_stext");
361 	if (err < 0)
362 		pr_err("Couldn't record guest kernel [%d]'s reference"
363 		       " relocation symbol.\n", machine->pid);
364 }
365 
366 static struct perf_event_header finished_round_event = {
367 	.size = sizeof(struct perf_event_header),
368 	.type = PERF_RECORD_FINISHED_ROUND,
369 };
370 
371 static void perf_record__mmap_read_all(struct perf_record *rec)
372 {
373 	int i;
374 
375 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
376 		if (rec->evlist->mmap[i].base)
377 			perf_record__mmap_read(rec, &rec->evlist->mmap[i]);
378 	}
379 
380 	if (perf_header__has_feat(&rec->session->header, HEADER_TRACE_INFO))
381 		write_output(rec, &finished_round_event, sizeof(finished_round_event));
382 }
383 
384 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
385 {
386 	struct stat st;
387 	int flags;
388 	int err, output;
389 	unsigned long waking = 0;
390 	const bool forks = argc > 0;
391 	struct machine *machine;
392 	struct perf_tool *tool = &rec->tool;
393 	struct perf_record_opts *opts = &rec->opts;
394 	struct perf_evlist *evsel_list = rec->evlist;
395 	const char *output_name = rec->output_name;
396 	struct perf_session *session;
397 
398 	rec->progname = argv[0];
399 
400 	rec->page_size = sysconf(_SC_PAGE_SIZE);
401 
402 	on_exit(perf_record__sig_exit, rec);
403 	signal(SIGCHLD, sig_handler);
404 	signal(SIGINT, sig_handler);
405 	signal(SIGUSR1, sig_handler);
406 
407 	if (!output_name) {
408 		if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
409 			opts->pipe_output = true;
410 		else
411 			rec->output_name = output_name = "perf.data";
412 	}
413 	if (output_name) {
414 		if (!strcmp(output_name, "-"))
415 			opts->pipe_output = true;
416 		else if (!stat(output_name, &st) && st.st_size) {
417 			if (rec->write_mode == WRITE_FORCE) {
418 				char oldname[PATH_MAX];
419 				snprintf(oldname, sizeof(oldname), "%s.old",
420 					 output_name);
421 				unlink(oldname);
422 				rename(output_name, oldname);
423 			}
424 		} else if (rec->write_mode == WRITE_APPEND) {
425 			rec->write_mode = WRITE_FORCE;
426 		}
427 	}
428 
429 	flags = O_CREAT|O_RDWR;
430 	if (rec->write_mode == WRITE_APPEND)
431 		rec->file_new = 0;
432 	else
433 		flags |= O_TRUNC;
434 
435 	if (opts->pipe_output)
436 		output = STDOUT_FILENO;
437 	else
438 		output = open(output_name, flags, S_IRUSR | S_IWUSR);
439 	if (output < 0) {
440 		perror("failed to create output file");
441 		exit(-1);
442 	}
443 
444 	rec->output = output;
445 
446 	session = perf_session__new(output_name, O_WRONLY,
447 				    rec->write_mode == WRITE_FORCE, false, NULL);
448 	if (session == NULL) {
449 		pr_err("Not enough memory for reading perf file header\n");
450 		return -1;
451 	}
452 
453 	rec->session = session;
454 
455 	if (!rec->no_buildid)
456 		perf_header__set_feat(&session->header, HEADER_BUILD_ID);
457 
458 	if (!rec->file_new) {
459 		err = perf_session__read_header(session, output);
460 		if (err < 0)
461 			goto out_delete_session;
462 	}
463 
464 	if (have_tracepoints(&evsel_list->entries))
465 		perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
466 
467 	perf_header__set_feat(&session->header, HEADER_HOSTNAME);
468 	perf_header__set_feat(&session->header, HEADER_OSRELEASE);
469 	perf_header__set_feat(&session->header, HEADER_ARCH);
470 	perf_header__set_feat(&session->header, HEADER_CPUDESC);
471 	perf_header__set_feat(&session->header, HEADER_NRCPUS);
472 	perf_header__set_feat(&session->header, HEADER_EVENT_DESC);
473 	perf_header__set_feat(&session->header, HEADER_CMDLINE);
474 	perf_header__set_feat(&session->header, HEADER_VERSION);
475 	perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY);
476 	perf_header__set_feat(&session->header, HEADER_TOTAL_MEM);
477 	perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY);
478 	perf_header__set_feat(&session->header, HEADER_CPUID);
479 
480 	if (forks) {
481 		err = perf_evlist__prepare_workload(evsel_list, opts, argv);
482 		if (err < 0) {
483 			pr_err("Couldn't run the workload!\n");
484 			goto out_delete_session;
485 		}
486 	}
487 
488 	perf_record__open(rec);
489 
490 	/*
491 	 * perf_session__delete(session) will be called at perf_record__exit()
492 	 */
493 	on_exit(perf_record__exit, rec);
494 
495 	if (opts->pipe_output) {
496 		err = perf_header__write_pipe(output);
497 		if (err < 0)
498 			return err;
499 	} else if (rec->file_new) {
500 		err = perf_session__write_header(session, evsel_list,
501 						 output, false);
502 		if (err < 0)
503 			return err;
504 	}
505 
506 	if (!!rec->no_buildid
507 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
508 		pr_err("Couldn't generating buildids. "
509 		       "Use --no-buildid to profile anyway.\n");
510 		return -1;
511 	}
512 
513 	rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
514 
515 	machine = perf_session__find_host_machine(session);
516 	if (!machine) {
517 		pr_err("Couldn't find native kernel information.\n");
518 		return -1;
519 	}
520 
521 	if (opts->pipe_output) {
522 		err = perf_event__synthesize_attrs(tool, session,
523 						   process_synthesized_event);
524 		if (err < 0) {
525 			pr_err("Couldn't synthesize attrs.\n");
526 			return err;
527 		}
528 
529 		err = perf_event__synthesize_event_types(tool, process_synthesized_event,
530 							 machine);
531 		if (err < 0) {
532 			pr_err("Couldn't synthesize event_types.\n");
533 			return err;
534 		}
535 
536 		if (have_tracepoints(&evsel_list->entries)) {
537 			/*
538 			 * FIXME err <= 0 here actually means that
539 			 * there were no tracepoints so its not really
540 			 * an error, just that we don't need to
541 			 * synthesize anything.  We really have to
542 			 * return this more properly and also
543 			 * propagate errors that now are calling die()
544 			 */
545 			err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
546 								  process_synthesized_event);
547 			if (err <= 0) {
548 				pr_err("Couldn't record tracing data.\n");
549 				return err;
550 			}
551 			advance_output(rec, err);
552 		}
553 	}
554 
555 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
556 						 machine, "_text");
557 	if (err < 0)
558 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
559 							 machine, "_stext");
560 	if (err < 0)
561 		pr_err("Couldn't record kernel reference relocation symbol\n"
562 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
563 		       "Check /proc/kallsyms permission or run as root.\n");
564 
565 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
566 					     machine);
567 	if (err < 0)
568 		pr_err("Couldn't record kernel module information.\n"
569 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
570 		       "Check /proc/modules permission or run as root.\n");
571 
572 	if (perf_guest)
573 		perf_session__process_machines(session, tool,
574 					       perf_event__synthesize_guest_os);
575 
576 	if (!opts->system_wide)
577 		perf_event__synthesize_thread_map(tool, evsel_list->threads,
578 						  process_synthesized_event,
579 						  machine);
580 	else
581 		perf_event__synthesize_threads(tool, process_synthesized_event,
582 					       machine);
583 
584 	if (rec->realtime_prio) {
585 		struct sched_param param;
586 
587 		param.sched_priority = rec->realtime_prio;
588 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
589 			pr_err("Could not set realtime priority.\n");
590 			exit(-1);
591 		}
592 	}
593 
594 	perf_evlist__enable(evsel_list);
595 
596 	/*
597 	 * Let the child rip
598 	 */
599 	if (forks)
600 		perf_evlist__start_workload(evsel_list);
601 
602 	for (;;) {
603 		int hits = rec->samples;
604 
605 		perf_record__mmap_read_all(rec);
606 
607 		if (hits == rec->samples) {
608 			if (done)
609 				break;
610 			err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
611 			waking++;
612 		}
613 
614 		if (done)
615 			perf_evlist__disable(evsel_list);
616 	}
617 
618 	if (quiet || signr == SIGUSR1)
619 		return 0;
620 
621 	fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
622 
623 	/*
624 	 * Approximate RIP event size: 24 bytes.
625 	 */
626 	fprintf(stderr,
627 		"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
628 		(double)rec->bytes_written / 1024.0 / 1024.0,
629 		output_name,
630 		rec->bytes_written / 24);
631 
632 	return 0;
633 
634 out_delete_session:
635 	perf_session__delete(session);
636 	return err;
637 }
638 
639 static const char * const record_usage[] = {
640 	"perf record [<options>] [<command>]",
641 	"perf record [<options>] -- <command> [<options>]",
642 	NULL
643 };
644 
645 /*
646  * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
647  * because we need to have access to it in perf_record__exit, that is called
648  * after cmd_record() exits, but since record_options need to be accessible to
649  * builtin-script, leave it here.
650  *
651  * At least we don't ouch it in all the other functions here directly.
652  *
653  * Just say no to tons of global variables, sigh.
654  */
655 static struct perf_record record = {
656 	.opts = {
657 		.target_pid	     = -1,
658 		.target_tid	     = -1,
659 		.mmap_pages	     = UINT_MAX,
660 		.user_freq	     = UINT_MAX,
661 		.user_interval	     = ULLONG_MAX,
662 		.freq		     = 1000,
663 		.sample_id_all_avail = true,
664 	},
665 	.write_mode = WRITE_FORCE,
666 	.file_new   = true,
667 };
668 
669 /*
670  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
671  * with it and switch to use the library functions in perf_evlist that came
672  * from builtin-record.c, i.e. use perf_record_opts,
673  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
674  * using pipes, etc.
675  */
676 const struct option record_options[] = {
677 	OPT_CALLBACK('e', "event", &record.evlist, "event",
678 		     "event selector. use 'perf list' to list available events",
679 		     parse_events_option),
680 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
681 		     "event filter", parse_filter),
682 	OPT_INTEGER('p', "pid", &record.opts.target_pid,
683 		    "record events on existing process id"),
684 	OPT_INTEGER('t', "tid", &record.opts.target_tid,
685 		    "record events on existing thread id"),
686 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
687 		    "collect data with this RT SCHED_FIFO priority"),
688 	OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
689 		    "collect data without buffering"),
690 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
691 		    "collect raw sample records from all opened counters"),
692 	OPT_BOOLEAN('a', "all-cpus", &record.opts.system_wide,
693 			    "system-wide collection from all CPUs"),
694 	OPT_BOOLEAN('A', "append", &record.append_file,
695 			    "append to the output file to do incremental profiling"),
696 	OPT_STRING('C', "cpu", &record.opts.cpu_list, "cpu",
697 		    "list of cpus to monitor"),
698 	OPT_BOOLEAN('f', "force", &record.force,
699 			"overwrite existing data file (deprecated)"),
700 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
701 	OPT_STRING('o', "output", &record.output_name, "file",
702 		    "output file name"),
703 	OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
704 		    "child tasks do not inherit counters"),
705 	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
706 	OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
707 		     "number of mmap data pages"),
708 	OPT_BOOLEAN(0, "group", &record.opts.group,
709 		    "put the counters into a counter group"),
710 	OPT_BOOLEAN('g', "call-graph", &record.opts.call_graph,
711 		    "do call-graph (stack chain/backtrace) recording"),
712 	OPT_INCR('v', "verbose", &verbose,
713 		    "be more verbose (show counter open errors, etc)"),
714 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
715 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
716 		    "per thread counts"),
717 	OPT_BOOLEAN('d', "data", &record.opts.sample_address,
718 		    "Sample addresses"),
719 	OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
720 	OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
721 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
722 		    "don't sample"),
723 	OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
724 		    "do not update the buildid cache"),
725 	OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
726 		    "do not collect buildids in perf.data"),
727 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
728 		     "monitor event in cgroup name only",
729 		     parse_cgroups),
730 	OPT_END()
731 };
732 
733 int cmd_record(int argc, const char **argv, const char *prefix __used)
734 {
735 	int err = -ENOMEM;
736 	struct perf_evsel *pos;
737 	struct perf_evlist *evsel_list;
738 	struct perf_record *rec = &record;
739 
740 	perf_header__set_cmdline(argc, argv);
741 
742 	evsel_list = perf_evlist__new(NULL, NULL);
743 	if (evsel_list == NULL)
744 		return -ENOMEM;
745 
746 	rec->evlist = evsel_list;
747 
748 	argc = parse_options(argc, argv, record_options, record_usage,
749 			    PARSE_OPT_STOP_AT_NON_OPTION);
750 	if (!argc && rec->opts.target_pid == -1 && rec->opts.target_tid == -1 &&
751 		!rec->opts.system_wide && !rec->opts.cpu_list)
752 		usage_with_options(record_usage, record_options);
753 
754 	if (rec->force && rec->append_file) {
755 		fprintf(stderr, "Can't overwrite and append at the same time."
756 				" You need to choose between -f and -A");
757 		usage_with_options(record_usage, record_options);
758 	} else if (rec->append_file) {
759 		rec->write_mode = WRITE_APPEND;
760 	} else {
761 		rec->write_mode = WRITE_FORCE;
762 	}
763 
764 	if (nr_cgroups && !rec->opts.system_wide) {
765 		fprintf(stderr, "cgroup monitoring only available in"
766 			" system-wide mode\n");
767 		usage_with_options(record_usage, record_options);
768 	}
769 
770 	symbol__init();
771 
772 	if (symbol_conf.kptr_restrict)
773 		pr_warning(
774 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
775 "check /proc/sys/kernel/kptr_restrict.\n\n"
776 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
777 "file is not found in the buildid cache or in the vmlinux path.\n\n"
778 "Samples in kernel modules won't be resolved at all.\n\n"
779 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
780 "even with a suitable vmlinux or kallsyms file.\n\n");
781 
782 	if (rec->no_buildid_cache || rec->no_buildid)
783 		disable_buildid_cache();
784 
785 	if (evsel_list->nr_entries == 0 &&
786 	    perf_evlist__add_default(evsel_list) < 0) {
787 		pr_err("Not enough memory for event selector list\n");
788 		goto out_symbol_exit;
789 	}
790 
791 	if (rec->opts.target_pid != -1)
792 		rec->opts.target_tid = rec->opts.target_pid;
793 
794 	if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid,
795 				     rec->opts.target_tid, rec->opts.cpu_list) < 0)
796 		usage_with_options(record_usage, record_options);
797 
798 	list_for_each_entry(pos, &evsel_list->entries, node) {
799 		if (perf_header__push_event(pos->attr.config, event_name(pos)))
800 			goto out_free_fd;
801 	}
802 
803 	if (rec->opts.user_interval != ULLONG_MAX)
804 		rec->opts.default_interval = rec->opts.user_interval;
805 	if (rec->opts.user_freq != UINT_MAX)
806 		rec->opts.freq = rec->opts.user_freq;
807 
808 	/*
809 	 * User specified count overrides default frequency.
810 	 */
811 	if (rec->opts.default_interval)
812 		rec->opts.freq = 0;
813 	else if (rec->opts.freq) {
814 		rec->opts.default_interval = rec->opts.freq;
815 	} else {
816 		fprintf(stderr, "frequency and count are zero, aborting\n");
817 		err = -EINVAL;
818 		goto out_free_fd;
819 	}
820 
821 	err = __cmd_record(&record, argc, argv);
822 out_free_fd:
823 	perf_evlist__delete_maps(evsel_list);
824 out_symbol_exit:
825 	symbol__exit();
826 	return err;
827 }
828