xref: /linux/tools/perf/builtin-record.c (revision a67ff6a54095e27093ea501fb143fefe51a536c2)
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #define _FILE_OFFSET_BITS 64
9 
10 #include "builtin.h"
11 
12 #include "perf.h"
13 
14 #include "util/build-id.h"
15 #include "util/util.h"
16 #include "util/parse-options.h"
17 #include "util/parse-events.h"
18 
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/symbol.h"
26 #include "util/cpumap.h"
27 #include "util/thread_map.h"
28 
29 #include <unistd.h>
30 #include <sched.h>
31 #include <sys/mman.h>
32 
33 enum write_mode_t {
34 	WRITE_FORCE,
35 	WRITE_APPEND
36 };
37 
38 static u64			user_interval			= ULLONG_MAX;
39 static u64			default_interval		=      0;
40 
41 static unsigned int		page_size;
42 static unsigned int		mmap_pages			= UINT_MAX;
43 static unsigned int		user_freq 			= UINT_MAX;
44 static int			freq				=   1000;
45 static int			output;
46 static int			pipe_output			=      0;
47 static const char		*output_name			= NULL;
48 static bool			group				=  false;
49 static int			realtime_prio			=      0;
50 static bool			nodelay				=  false;
51 static bool			raw_samples			=  false;
52 static bool			sample_id_all_avail		=   true;
53 static bool			system_wide			=  false;
54 static pid_t			target_pid			=     -1;
55 static pid_t			target_tid			=     -1;
56 static pid_t			child_pid			=     -1;
57 static bool			no_inherit			=  false;
58 static enum write_mode_t	write_mode			= WRITE_FORCE;
59 static bool			call_graph			=  false;
60 static bool			inherit_stat			=  false;
61 static bool			no_samples			=  false;
62 static bool			sample_address			=  false;
63 static bool			sample_time			=  false;
64 static bool			no_buildid			=  false;
65 static bool			no_buildid_cache		=  false;
66 static struct perf_evlist	*evsel_list;
67 
68 static long			samples				=      0;
69 static u64			bytes_written			=      0;
70 
71 static int			file_new			=      1;
72 static off_t			post_processing_offset;
73 
74 static struct perf_session	*session;
75 static const char		*cpu_list;
76 static const char               *progname;
77 
78 static void advance_output(size_t size)
79 {
80 	bytes_written += size;
81 }
82 
83 static void write_output(void *buf, size_t size)
84 {
85 	while (size) {
86 		int ret = write(output, buf, size);
87 
88 		if (ret < 0)
89 			die("failed to write");
90 
91 		size -= ret;
92 		buf += ret;
93 
94 		bytes_written += ret;
95 	}
96 }
97 
98 static int process_synthesized_event(union perf_event *event,
99 				     struct perf_sample *sample __used,
100 				     struct perf_session *self __used)
101 {
102 	write_output(event, event->header.size);
103 	return 0;
104 }
105 
106 static void mmap_read(struct perf_mmap *md)
107 {
108 	unsigned int head = perf_mmap__read_head(md);
109 	unsigned int old = md->prev;
110 	unsigned char *data = md->base + page_size;
111 	unsigned long size;
112 	void *buf;
113 
114 	if (old == head)
115 		return;
116 
117 	samples++;
118 
119 	size = head - old;
120 
121 	if ((old & md->mask) + size != (head & md->mask)) {
122 		buf = &data[old & md->mask];
123 		size = md->mask + 1 - (old & md->mask);
124 		old += size;
125 
126 		write_output(buf, size);
127 	}
128 
129 	buf = &data[old & md->mask];
130 	size = head - old;
131 	old += size;
132 
133 	write_output(buf, size);
134 
135 	md->prev = old;
136 	perf_mmap__write_tail(md, old);
137 }
138 
139 static volatile int done = 0;
140 static volatile int signr = -1;
141 static volatile int child_finished = 0;
142 
143 static void sig_handler(int sig)
144 {
145 	if (sig == SIGCHLD)
146 		child_finished = 1;
147 
148 	done = 1;
149 	signr = sig;
150 }
151 
152 static void sig_atexit(void)
153 {
154 	int status;
155 
156 	if (child_pid > 0) {
157 		if (!child_finished)
158 			kill(child_pid, SIGTERM);
159 
160 		wait(&status);
161 		if (WIFSIGNALED(status))
162 			psignal(WTERMSIG(status), progname);
163 	}
164 
165 	if (signr == -1 || signr == SIGUSR1)
166 		return;
167 
168 	signal(signr, SIG_DFL);
169 	kill(getpid(), signr);
170 }
171 
172 static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
173 {
174 	struct perf_event_attr *attr = &evsel->attr;
175 	int track = !evsel->idx; /* only the first counter needs these */
176 
177 	attr->disabled		= 1;
178 	attr->inherit		= !no_inherit;
179 	attr->read_format	= PERF_FORMAT_TOTAL_TIME_ENABLED |
180 				  PERF_FORMAT_TOTAL_TIME_RUNNING |
181 				  PERF_FORMAT_ID;
182 
183 	attr->sample_type	|= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
184 
185 	if (evlist->nr_entries > 1)
186 		attr->sample_type |= PERF_SAMPLE_ID;
187 
188 	/*
189 	 * We default some events to a 1 default interval. But keep
190 	 * it a weak assumption overridable by the user.
191 	 */
192 	if (!attr->sample_period || (user_freq != UINT_MAX &&
193 				     user_interval != ULLONG_MAX)) {
194 		if (freq) {
195 			attr->sample_type	|= PERF_SAMPLE_PERIOD;
196 			attr->freq		= 1;
197 			attr->sample_freq	= freq;
198 		} else {
199 			attr->sample_period = default_interval;
200 		}
201 	}
202 
203 	if (no_samples)
204 		attr->sample_freq = 0;
205 
206 	if (inherit_stat)
207 		attr->inherit_stat = 1;
208 
209 	if (sample_address) {
210 		attr->sample_type	|= PERF_SAMPLE_ADDR;
211 		attr->mmap_data = track;
212 	}
213 
214 	if (call_graph)
215 		attr->sample_type	|= PERF_SAMPLE_CALLCHAIN;
216 
217 	if (system_wide)
218 		attr->sample_type	|= PERF_SAMPLE_CPU;
219 
220 	if (sample_id_all_avail &&
221 	    (sample_time || system_wide || !no_inherit || cpu_list))
222 		attr->sample_type	|= PERF_SAMPLE_TIME;
223 
224 	if (raw_samples) {
225 		attr->sample_type	|= PERF_SAMPLE_TIME;
226 		attr->sample_type	|= PERF_SAMPLE_RAW;
227 		attr->sample_type	|= PERF_SAMPLE_CPU;
228 	}
229 
230 	if (nodelay) {
231 		attr->watermark = 0;
232 		attr->wakeup_events = 1;
233 	}
234 
235 	attr->mmap		= track;
236 	attr->comm		= track;
237 
238 	if (target_pid == -1 && target_tid == -1 && !system_wide) {
239 		attr->disabled = 1;
240 		attr->enable_on_exec = 1;
241 	}
242 }
243 
244 static bool perf_evlist__equal(struct perf_evlist *evlist,
245 			       struct perf_evlist *other)
246 {
247 	struct perf_evsel *pos, *pair;
248 
249 	if (evlist->nr_entries != other->nr_entries)
250 		return false;
251 
252 	pair = list_entry(other->entries.next, struct perf_evsel, node);
253 
254 	list_for_each_entry(pos, &evlist->entries, node) {
255 		if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
256 			return false;
257 		pair = list_entry(pair->node.next, struct perf_evsel, node);
258 	}
259 
260 	return true;
261 }
262 
263 static void open_counters(struct perf_evlist *evlist)
264 {
265 	struct perf_evsel *pos, *first;
266 
267 	if (evlist->cpus->map[0] < 0)
268 		no_inherit = true;
269 
270 	first = list_entry(evlist->entries.next, struct perf_evsel, node);
271 
272 	list_for_each_entry(pos, &evlist->entries, node) {
273 		struct perf_event_attr *attr = &pos->attr;
274 		struct xyarray *group_fd = NULL;
275 		/*
276 		 * Check if parse_single_tracepoint_event has already asked for
277 		 * PERF_SAMPLE_TIME.
278 		 *
279 		 * XXX this is kludgy but short term fix for problems introduced by
280 		 * eac23d1c that broke 'perf script' by having different sample_types
281 		 * when using multiple tracepoint events when we use a perf binary
282 		 * that tries to use sample_id_all on an older kernel.
283 		 *
284 		 * We need to move counter creation to perf_session, support
285 		 * different sample_types, etc.
286 		 */
287 		bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
288 
289 		if (group && pos != first)
290 			group_fd = first->fd;
291 
292 		config_attr(pos, evlist);
293 retry_sample_id:
294 		attr->sample_id_all = sample_id_all_avail ? 1 : 0;
295 try_again:
296 		if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group,
297 				     group_fd) < 0) {
298 			int err = errno;
299 
300 			if (err == EPERM || err == EACCES) {
301 				ui__error_paranoid();
302 				exit(EXIT_FAILURE);
303 			} else if (err ==  ENODEV && cpu_list) {
304 				die("No such device - did you specify"
305 					" an out-of-range profile CPU?\n");
306 			} else if (err == EINVAL && sample_id_all_avail) {
307 				/*
308 				 * Old kernel, no attr->sample_id_type_all field
309 				 */
310 				sample_id_all_avail = false;
311 				if (!sample_time && !raw_samples && !time_needed)
312 					attr->sample_type &= ~PERF_SAMPLE_TIME;
313 
314 				goto retry_sample_id;
315 			}
316 
317 			/*
318 			 * If it's cycles then fall back to hrtimer
319 			 * based cpu-clock-tick sw counter, which
320 			 * is always available even if no PMU support:
321 			 */
322 			if (attr->type == PERF_TYPE_HARDWARE
323 					&& attr->config == PERF_COUNT_HW_CPU_CYCLES) {
324 
325 				if (verbose)
326 					ui__warning("The cycles event is not supported, "
327 						    "trying to fall back to cpu-clock-ticks\n");
328 				attr->type = PERF_TYPE_SOFTWARE;
329 				attr->config = PERF_COUNT_SW_CPU_CLOCK;
330 				goto try_again;
331 			}
332 
333 			if (err == ENOENT) {
334 				ui__warning("The %s event is not supported.\n",
335 					    event_name(pos));
336 				exit(EXIT_FAILURE);
337 			}
338 
339 			printf("\n");
340 			error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
341 			      err, strerror(err));
342 
343 #if defined(__i386__) || defined(__x86_64__)
344 			if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
345 				die("No hardware sampling interrupt available."
346 				    " No APIC? If so then you can boot the kernel"
347 				    " with the \"lapic\" boot parameter to"
348 				    " force-enable it.\n");
349 #endif
350 
351 			die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
352 		}
353 	}
354 
355 	if (perf_evlist__set_filters(evlist)) {
356 		error("failed to set filter with %d (%s)\n", errno,
357 			strerror(errno));
358 		exit(-1);
359 	}
360 
361 	if (perf_evlist__mmap(evlist, mmap_pages, false) < 0)
362 		die("failed to mmap with %d (%s)\n", errno, strerror(errno));
363 
364 	if (file_new)
365 		session->evlist = evlist;
366 	else {
367 		if (!perf_evlist__equal(session->evlist, evlist)) {
368 			fprintf(stderr, "incompatible append\n");
369 			exit(-1);
370 		}
371  	}
372 
373 	perf_session__update_sample_type(session);
374 }
375 
376 static int process_buildids(void)
377 {
378 	u64 size = lseek(output, 0, SEEK_CUR);
379 
380 	if (size == 0)
381 		return 0;
382 
383 	session->fd = output;
384 	return __perf_session__process_events(session, post_processing_offset,
385 					      size - post_processing_offset,
386 					      size, &build_id__mark_dso_hit_ops);
387 }
388 
389 static void atexit_header(void)
390 {
391 	if (!pipe_output) {
392 		session->header.data_size += bytes_written;
393 
394 		if (!no_buildid)
395 			process_buildids();
396 		perf_session__write_header(session, evsel_list, output, true);
397 		perf_session__delete(session);
398 		perf_evlist__delete(evsel_list);
399 		symbol__exit();
400 	}
401 }
402 
403 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
404 {
405 	int err;
406 	struct perf_session *psession = data;
407 
408 	if (machine__is_host(machine))
409 		return;
410 
411 	/*
412 	 *As for guest kernel when processing subcommand record&report,
413 	 *we arrange module mmap prior to guest kernel mmap and trigger
414 	 *a preload dso because default guest module symbols are loaded
415 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
416 	 *method is used to avoid symbol missing when the first addr is
417 	 *in module instead of in guest kernel.
418 	 */
419 	err = perf_event__synthesize_modules(process_synthesized_event,
420 					     psession, machine);
421 	if (err < 0)
422 		pr_err("Couldn't record guest kernel [%d]'s reference"
423 		       " relocation symbol.\n", machine->pid);
424 
425 	/*
426 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
427 	 * have no _text sometimes.
428 	 */
429 	err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
430 						 psession, machine, "_text");
431 	if (err < 0)
432 		err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
433 							 psession, machine,
434 							 "_stext");
435 	if (err < 0)
436 		pr_err("Couldn't record guest kernel [%d]'s reference"
437 		       " relocation symbol.\n", machine->pid);
438 }
439 
440 static struct perf_event_header finished_round_event = {
441 	.size = sizeof(struct perf_event_header),
442 	.type = PERF_RECORD_FINISHED_ROUND,
443 };
444 
445 static void mmap_read_all(void)
446 {
447 	int i;
448 
449 	for (i = 0; i < evsel_list->nr_mmaps; i++) {
450 		if (evsel_list->mmap[i].base)
451 			mmap_read(&evsel_list->mmap[i]);
452 	}
453 
454 	if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
455 		write_output(&finished_round_event, sizeof(finished_round_event));
456 }
457 
458 static int __cmd_record(int argc, const char **argv)
459 {
460 	struct stat st;
461 	int flags;
462 	int err;
463 	unsigned long waking = 0;
464 	int child_ready_pipe[2], go_pipe[2];
465 	const bool forks = argc > 0;
466 	char buf;
467 	struct machine *machine;
468 
469 	progname = argv[0];
470 
471 	page_size = sysconf(_SC_PAGE_SIZE);
472 
473 	atexit(sig_atexit);
474 	signal(SIGCHLD, sig_handler);
475 	signal(SIGINT, sig_handler);
476 	signal(SIGUSR1, sig_handler);
477 
478 	if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
479 		perror("failed to create pipes");
480 		exit(-1);
481 	}
482 
483 	if (!output_name) {
484 		if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
485 			pipe_output = 1;
486 		else
487 			output_name = "perf.data";
488 	}
489 	if (output_name) {
490 		if (!strcmp(output_name, "-"))
491 			pipe_output = 1;
492 		else if (!stat(output_name, &st) && st.st_size) {
493 			if (write_mode == WRITE_FORCE) {
494 				char oldname[PATH_MAX];
495 				snprintf(oldname, sizeof(oldname), "%s.old",
496 					 output_name);
497 				unlink(oldname);
498 				rename(output_name, oldname);
499 			}
500 		} else if (write_mode == WRITE_APPEND) {
501 			write_mode = WRITE_FORCE;
502 		}
503 	}
504 
505 	flags = O_CREAT|O_RDWR;
506 	if (write_mode == WRITE_APPEND)
507 		file_new = 0;
508 	else
509 		flags |= O_TRUNC;
510 
511 	if (pipe_output)
512 		output = STDOUT_FILENO;
513 	else
514 		output = open(output_name, flags, S_IRUSR | S_IWUSR);
515 	if (output < 0) {
516 		perror("failed to create output file");
517 		exit(-1);
518 	}
519 
520 	session = perf_session__new(output_name, O_WRONLY,
521 				    write_mode == WRITE_FORCE, false, NULL);
522 	if (session == NULL) {
523 		pr_err("Not enough memory for reading perf file header\n");
524 		return -1;
525 	}
526 
527 	if (!no_buildid)
528 		perf_header__set_feat(&session->header, HEADER_BUILD_ID);
529 
530 	if (!file_new) {
531 		err = perf_session__read_header(session, output);
532 		if (err < 0)
533 			goto out_delete_session;
534 	}
535 
536 	if (have_tracepoints(&evsel_list->entries))
537 		perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
538 
539 	perf_header__set_feat(&session->header, HEADER_HOSTNAME);
540 	perf_header__set_feat(&session->header, HEADER_OSRELEASE);
541 	perf_header__set_feat(&session->header, HEADER_ARCH);
542 	perf_header__set_feat(&session->header, HEADER_CPUDESC);
543 	perf_header__set_feat(&session->header, HEADER_NRCPUS);
544 	perf_header__set_feat(&session->header, HEADER_EVENT_DESC);
545 	perf_header__set_feat(&session->header, HEADER_CMDLINE);
546 	perf_header__set_feat(&session->header, HEADER_VERSION);
547 	perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY);
548 	perf_header__set_feat(&session->header, HEADER_TOTAL_MEM);
549 	perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY);
550 	perf_header__set_feat(&session->header, HEADER_CPUID);
551 
552 	/* 512 kiB: default amount of unprivileged mlocked memory */
553 	if (mmap_pages == UINT_MAX)
554 		mmap_pages = (512 * 1024) / page_size;
555 
556 	if (forks) {
557 		child_pid = fork();
558 		if (child_pid < 0) {
559 			perror("failed to fork");
560 			exit(-1);
561 		}
562 
563 		if (!child_pid) {
564 			if (pipe_output)
565 				dup2(2, 1);
566 			close(child_ready_pipe[0]);
567 			close(go_pipe[1]);
568 			fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
569 
570 			/*
571 			 * Do a dummy execvp to get the PLT entry resolved,
572 			 * so we avoid the resolver overhead on the real
573 			 * execvp call.
574 			 */
575 			execvp("", (char **)argv);
576 
577 			/*
578 			 * Tell the parent we're ready to go
579 			 */
580 			close(child_ready_pipe[1]);
581 
582 			/*
583 			 * Wait until the parent tells us to go.
584 			 */
585 			if (read(go_pipe[0], &buf, 1) == -1)
586 				perror("unable to read pipe");
587 
588 			execvp(argv[0], (char **)argv);
589 
590 			perror(argv[0]);
591 			kill(getppid(), SIGUSR1);
592 			exit(-1);
593 		}
594 
595 		if (!system_wide && target_tid == -1 && target_pid == -1)
596 			evsel_list->threads->map[0] = child_pid;
597 
598 		close(child_ready_pipe[1]);
599 		close(go_pipe[0]);
600 		/*
601 		 * wait for child to settle
602 		 */
603 		if (read(child_ready_pipe[0], &buf, 1) == -1) {
604 			perror("unable to read pipe");
605 			exit(-1);
606 		}
607 		close(child_ready_pipe[0]);
608 	}
609 
610 	open_counters(evsel_list);
611 
612 	/*
613 	 * perf_session__delete(session) will be called at atexit_header()
614 	 */
615 	atexit(atexit_header);
616 
617 	if (pipe_output) {
618 		err = perf_header__write_pipe(output);
619 		if (err < 0)
620 			return err;
621 	} else if (file_new) {
622 		err = perf_session__write_header(session, evsel_list,
623 						 output, false);
624 		if (err < 0)
625 			return err;
626 	}
627 
628 	post_processing_offset = lseek(output, 0, SEEK_CUR);
629 
630 	if (pipe_output) {
631 		err = perf_session__synthesize_attrs(session,
632 						     process_synthesized_event);
633 		if (err < 0) {
634 			pr_err("Couldn't synthesize attrs.\n");
635 			return err;
636 		}
637 
638 		err = perf_event__synthesize_event_types(process_synthesized_event,
639 							 session);
640 		if (err < 0) {
641 			pr_err("Couldn't synthesize event_types.\n");
642 			return err;
643 		}
644 
645 		if (have_tracepoints(&evsel_list->entries)) {
646 			/*
647 			 * FIXME err <= 0 here actually means that
648 			 * there were no tracepoints so its not really
649 			 * an error, just that we don't need to
650 			 * synthesize anything.  We really have to
651 			 * return this more properly and also
652 			 * propagate errors that now are calling die()
653 			 */
654 			err = perf_event__synthesize_tracing_data(output, evsel_list,
655 								  process_synthesized_event,
656 								  session);
657 			if (err <= 0) {
658 				pr_err("Couldn't record tracing data.\n");
659 				return err;
660 			}
661 			advance_output(err);
662 		}
663 	}
664 
665 	machine = perf_session__find_host_machine(session);
666 	if (!machine) {
667 		pr_err("Couldn't find native kernel information.\n");
668 		return -1;
669 	}
670 
671 	err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
672 						 session, machine, "_text");
673 	if (err < 0)
674 		err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
675 							 session, machine, "_stext");
676 	if (err < 0)
677 		pr_err("Couldn't record kernel reference relocation symbol\n"
678 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
679 		       "Check /proc/kallsyms permission or run as root.\n");
680 
681 	err = perf_event__synthesize_modules(process_synthesized_event,
682 					     session, machine);
683 	if (err < 0)
684 		pr_err("Couldn't record kernel module information.\n"
685 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
686 		       "Check /proc/modules permission or run as root.\n");
687 
688 	if (perf_guest)
689 		perf_session__process_machines(session,
690 					       perf_event__synthesize_guest_os);
691 
692 	if (!system_wide)
693 		perf_event__synthesize_thread_map(evsel_list->threads,
694 						  process_synthesized_event,
695 						  session);
696 	else
697 		perf_event__synthesize_threads(process_synthesized_event,
698 					       session);
699 
700 	if (realtime_prio) {
701 		struct sched_param param;
702 
703 		param.sched_priority = realtime_prio;
704 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
705 			pr_err("Could not set realtime priority.\n");
706 			exit(-1);
707 		}
708 	}
709 
710 	perf_evlist__enable(evsel_list);
711 
712 	/*
713 	 * Let the child rip
714 	 */
715 	if (forks)
716 		close(go_pipe[1]);
717 
718 	for (;;) {
719 		int hits = samples;
720 
721 		mmap_read_all();
722 
723 		if (hits == samples) {
724 			if (done)
725 				break;
726 			err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
727 			waking++;
728 		}
729 
730 		if (done)
731 			perf_evlist__disable(evsel_list);
732 	}
733 
734 	if (quiet || signr == SIGUSR1)
735 		return 0;
736 
737 	fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
738 
739 	/*
740 	 * Approximate RIP event size: 24 bytes.
741 	 */
742 	fprintf(stderr,
743 		"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
744 		(double)bytes_written / 1024.0 / 1024.0,
745 		output_name,
746 		bytes_written / 24);
747 
748 	return 0;
749 
750 out_delete_session:
751 	perf_session__delete(session);
752 	return err;
753 }
754 
755 static const char * const record_usage[] = {
756 	"perf record [<options>] [<command>]",
757 	"perf record [<options>] -- <command> [<options>]",
758 	NULL
759 };
760 
761 static bool force, append_file;
762 
763 const struct option record_options[] = {
764 	OPT_CALLBACK('e', "event", &evsel_list, "event",
765 		     "event selector. use 'perf list' to list available events",
766 		     parse_events_option),
767 	OPT_CALLBACK(0, "filter", &evsel_list, "filter",
768 		     "event filter", parse_filter),
769 	OPT_INTEGER('p', "pid", &target_pid,
770 		    "record events on existing process id"),
771 	OPT_INTEGER('t', "tid", &target_tid,
772 		    "record events on existing thread id"),
773 	OPT_INTEGER('r', "realtime", &realtime_prio,
774 		    "collect data with this RT SCHED_FIFO priority"),
775 	OPT_BOOLEAN('D', "no-delay", &nodelay,
776 		    "collect data without buffering"),
777 	OPT_BOOLEAN('R', "raw-samples", &raw_samples,
778 		    "collect raw sample records from all opened counters"),
779 	OPT_BOOLEAN('a', "all-cpus", &system_wide,
780 			    "system-wide collection from all CPUs"),
781 	OPT_BOOLEAN('A', "append", &append_file,
782 			    "append to the output file to do incremental profiling"),
783 	OPT_STRING('C', "cpu", &cpu_list, "cpu",
784 		    "list of cpus to monitor"),
785 	OPT_BOOLEAN('f', "force", &force,
786 			"overwrite existing data file (deprecated)"),
787 	OPT_U64('c', "count", &user_interval, "event period to sample"),
788 	OPT_STRING('o', "output", &output_name, "file",
789 		    "output file name"),
790 	OPT_BOOLEAN('i', "no-inherit", &no_inherit,
791 		    "child tasks do not inherit counters"),
792 	OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"),
793 	OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
794 	OPT_BOOLEAN(0, "group", &group,
795 		    "put the counters into a counter group"),
796 	OPT_BOOLEAN('g', "call-graph", &call_graph,
797 		    "do call-graph (stack chain/backtrace) recording"),
798 	OPT_INCR('v', "verbose", &verbose,
799 		    "be more verbose (show counter open errors, etc)"),
800 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
801 	OPT_BOOLEAN('s', "stat", &inherit_stat,
802 		    "per thread counts"),
803 	OPT_BOOLEAN('d', "data", &sample_address,
804 		    "Sample addresses"),
805 	OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"),
806 	OPT_BOOLEAN('n', "no-samples", &no_samples,
807 		    "don't sample"),
808 	OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
809 		    "do not update the buildid cache"),
810 	OPT_BOOLEAN('B', "no-buildid", &no_buildid,
811 		    "do not collect buildids in perf.data"),
812 	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
813 		     "monitor event in cgroup name only",
814 		     parse_cgroups),
815 	OPT_END()
816 };
817 
818 int cmd_record(int argc, const char **argv, const char *prefix __used)
819 {
820 	int err = -ENOMEM;
821 	struct perf_evsel *pos;
822 
823 	perf_header__set_cmdline(argc, argv);
824 
825 	evsel_list = perf_evlist__new(NULL, NULL);
826 	if (evsel_list == NULL)
827 		return -ENOMEM;
828 
829 	argc = parse_options(argc, argv, record_options, record_usage,
830 			    PARSE_OPT_STOP_AT_NON_OPTION);
831 	if (!argc && target_pid == -1 && target_tid == -1 &&
832 		!system_wide && !cpu_list)
833 		usage_with_options(record_usage, record_options);
834 
835 	if (force && append_file) {
836 		fprintf(stderr, "Can't overwrite and append at the same time."
837 				" You need to choose between -f and -A");
838 		usage_with_options(record_usage, record_options);
839 	} else if (append_file) {
840 		write_mode = WRITE_APPEND;
841 	} else {
842 		write_mode = WRITE_FORCE;
843 	}
844 
845 	if (nr_cgroups && !system_wide) {
846 		fprintf(stderr, "cgroup monitoring only available in"
847 			" system-wide mode\n");
848 		usage_with_options(record_usage, record_options);
849 	}
850 
851 	symbol__init();
852 
853 	if (symbol_conf.kptr_restrict)
854 		pr_warning(
855 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
856 "check /proc/sys/kernel/kptr_restrict.\n\n"
857 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
858 "file is not found in the buildid cache or in the vmlinux path.\n\n"
859 "Samples in kernel modules won't be resolved at all.\n\n"
860 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
861 "even with a suitable vmlinux or kallsyms file.\n\n");
862 
863 	if (no_buildid_cache || no_buildid)
864 		disable_buildid_cache();
865 
866 	if (evsel_list->nr_entries == 0 &&
867 	    perf_evlist__add_default(evsel_list) < 0) {
868 		pr_err("Not enough memory for event selector list\n");
869 		goto out_symbol_exit;
870 	}
871 
872 	if (target_pid != -1)
873 		target_tid = target_pid;
874 
875 	if (perf_evlist__create_maps(evsel_list, target_pid,
876 				     target_tid, cpu_list) < 0)
877 		usage_with_options(record_usage, record_options);
878 
879 	list_for_each_entry(pos, &evsel_list->entries, node) {
880 		if (perf_evsel__alloc_fd(pos, evsel_list->cpus->nr,
881 					 evsel_list->threads->nr) < 0)
882 			goto out_free_fd;
883 		if (perf_header__push_event(pos->attr.config, event_name(pos)))
884 			goto out_free_fd;
885 	}
886 
887 	if (perf_evlist__alloc_pollfd(evsel_list) < 0)
888 		goto out_free_fd;
889 
890 	if (user_interval != ULLONG_MAX)
891 		default_interval = user_interval;
892 	if (user_freq != UINT_MAX)
893 		freq = user_freq;
894 
895 	/*
896 	 * User specified count overrides default frequency.
897 	 */
898 	if (default_interval)
899 		freq = 0;
900 	else if (freq) {
901 		default_interval = freq;
902 	} else {
903 		fprintf(stderr, "frequency and count are zero, aborting\n");
904 		err = -EINVAL;
905 		goto out_free_fd;
906 	}
907 
908 	err = __cmd_record(argc, argv);
909 out_free_fd:
910 	perf_evlist__delete_maps(evsel_list);
911 out_symbol_exit:
912 	symbol__exit();
913 	return err;
914 }
915