xref: /linux/tools/perf/builtin-record.c (revision d229807f669ba3dea9f64467ee965051c4366aed)
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #define _FILE_OFFSET_BITS 64
9 
10 #include "builtin.h"
11 
12 #include "perf.h"
13 
14 #include "util/build-id.h"
15 #include "util/util.h"
16 #include "util/parse-options.h"
17 #include "util/parse-events.h"
18 
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/symbol.h"
26 #include "util/cpumap.h"
27 #include "util/thread_map.h"
28 
29 #include <unistd.h>
30 #include <sched.h>
31 #include <sys/mman.h>
32 
33 enum write_mode_t {
34 	WRITE_FORCE,
35 	WRITE_APPEND
36 };
37 
38 static u64			user_interval			= ULLONG_MAX;
39 static u64			default_interval		=      0;
40 
41 static unsigned int		page_size;
42 static unsigned int		mmap_pages			= UINT_MAX;
43 static unsigned int		user_freq 			= UINT_MAX;
44 static int			freq				=   1000;
45 static int			output;
46 static int			pipe_output			=      0;
47 static const char		*output_name			= NULL;
48 static bool			group				=  false;
49 static int			realtime_prio			=      0;
50 static bool			nodelay				=  false;
51 static bool			raw_samples			=  false;
52 static bool			sample_id_all_avail		=   true;
53 static bool			system_wide			=  false;
54 static pid_t			target_pid			=     -1;
55 static pid_t			target_tid			=     -1;
56 static pid_t			child_pid			=     -1;
57 static bool			no_inherit			=  false;
58 static enum write_mode_t	write_mode			= WRITE_FORCE;
59 static bool			call_graph			=  false;
60 static bool			inherit_stat			=  false;
61 static bool			no_samples			=  false;
62 static bool			sample_address			=  false;
63 static bool			sample_time			=  false;
64 static bool			no_buildid			=  false;
65 static bool			no_buildid_cache		=  false;
66 static struct perf_evlist	*evsel_list;
67 
68 static long			samples				=      0;
69 static u64			bytes_written			=      0;
70 
71 static int			file_new			=      1;
72 static off_t			post_processing_offset;
73 
74 static struct perf_session	*session;
75 static const char		*cpu_list;
76 static const char               *progname;
77 
78 static void advance_output(size_t size)
79 {
80 	bytes_written += size;
81 }
82 
83 static void write_output(void *buf, size_t size)
84 {
85 	while (size) {
86 		int ret = write(output, buf, size);
87 
88 		if (ret < 0)
89 			die("failed to write");
90 
91 		size -= ret;
92 		buf += ret;
93 
94 		bytes_written += ret;
95 	}
96 }
97 
98 static int process_synthesized_event(union perf_event *event,
99 				     struct perf_sample *sample __used,
100 				     struct perf_session *self __used)
101 {
102 	write_output(event, event->header.size);
103 	return 0;
104 }
105 
106 static void mmap_read(struct perf_mmap *md)
107 {
108 	unsigned int head = perf_mmap__read_head(md);
109 	unsigned int old = md->prev;
110 	unsigned char *data = md->base + page_size;
111 	unsigned long size;
112 	void *buf;
113 
114 	if (old == head)
115 		return;
116 
117 	samples++;
118 
119 	size = head - old;
120 
121 	if ((old & md->mask) + size != (head & md->mask)) {
122 		buf = &data[old & md->mask];
123 		size = md->mask + 1 - (old & md->mask);
124 		old += size;
125 
126 		write_output(buf, size);
127 	}
128 
129 	buf = &data[old & md->mask];
130 	size = head - old;
131 	old += size;
132 
133 	write_output(buf, size);
134 
135 	md->prev = old;
136 	perf_mmap__write_tail(md, old);
137 }
138 
139 static volatile int done = 0;
140 static volatile int signr = -1;
141 static volatile int child_finished = 0;
142 
143 static void sig_handler(int sig)
144 {
145 	if (sig == SIGCHLD)
146 		child_finished = 1;
147 
148 	done = 1;
149 	signr = sig;
150 }
151 
152 static void sig_atexit(void)
153 {
154 	int status;
155 
156 	if (child_pid > 0) {
157 		if (!child_finished)
158 			kill(child_pid, SIGTERM);
159 
160 		wait(&status);
161 		if (WIFSIGNALED(status))
162 			psignal(WTERMSIG(status), progname);
163 	}
164 
165 	if (signr == -1 || signr == SIGUSR1)
166 		return;
167 
168 	signal(signr, SIG_DFL);
169 	kill(getpid(), signr);
170 }
171 
172 static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
173 {
174 	struct perf_event_attr *attr = &evsel->attr;
175 	int track = !evsel->idx; /* only the first counter needs these */
176 
177 	attr->disabled		= 1;
178 	attr->inherit		= !no_inherit;
179 	attr->read_format	= PERF_FORMAT_TOTAL_TIME_ENABLED |
180 				  PERF_FORMAT_TOTAL_TIME_RUNNING |
181 				  PERF_FORMAT_ID;
182 
183 	attr->sample_type	|= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
184 
185 	if (evlist->nr_entries > 1)
186 		attr->sample_type |= PERF_SAMPLE_ID;
187 
188 	/*
189 	 * We default some events to a 1 default interval. But keep
190 	 * it a weak assumption overridable by the user.
191 	 */
192 	if (!attr->sample_period || (user_freq != UINT_MAX &&
193 				     user_interval != ULLONG_MAX)) {
194 		if (freq) {
195 			attr->sample_type	|= PERF_SAMPLE_PERIOD;
196 			attr->freq		= 1;
197 			attr->sample_freq	= freq;
198 		} else {
199 			attr->sample_period = default_interval;
200 		}
201 	}
202 
203 	if (no_samples)
204 		attr->sample_freq = 0;
205 
206 	if (inherit_stat)
207 		attr->inherit_stat = 1;
208 
209 	if (sample_address) {
210 		attr->sample_type	|= PERF_SAMPLE_ADDR;
211 		attr->mmap_data = track;
212 	}
213 
214 	if (call_graph)
215 		attr->sample_type	|= PERF_SAMPLE_CALLCHAIN;
216 
217 	if (system_wide)
218 		attr->sample_type	|= PERF_SAMPLE_CPU;
219 
220 	if (sample_id_all_avail &&
221 	    (sample_time || system_wide || !no_inherit || cpu_list))
222 		attr->sample_type	|= PERF_SAMPLE_TIME;
223 
224 	if (raw_samples) {
225 		attr->sample_type	|= PERF_SAMPLE_TIME;
226 		attr->sample_type	|= PERF_SAMPLE_RAW;
227 		attr->sample_type	|= PERF_SAMPLE_CPU;
228 	}
229 
230 	if (nodelay) {
231 		attr->watermark = 0;
232 		attr->wakeup_events = 1;
233 	}
234 
235 	attr->mmap		= track;
236 	attr->comm		= track;
237 
238 	if (target_pid == -1 && target_tid == -1 && !system_wide) {
239 		attr->disabled = 1;
240 		attr->enable_on_exec = 1;
241 	}
242 }
243 
244 static bool perf_evlist__equal(struct perf_evlist *evlist,
245 			       struct perf_evlist *other)
246 {
247 	struct perf_evsel *pos, *pair;
248 
249 	if (evlist->nr_entries != other->nr_entries)
250 		return false;
251 
252 	pair = list_entry(other->entries.next, struct perf_evsel, node);
253 
254 	list_for_each_entry(pos, &evlist->entries, node) {
255 		if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
256 			return false;
257 		pair = list_entry(pair->node.next, struct perf_evsel, node);
258 	}
259 
260 	return true;
261 }
262 
263 static void open_counters(struct perf_evlist *evlist)
264 {
265 	struct perf_evsel *pos;
266 
267 	if (evlist->cpus->map[0] < 0)
268 		no_inherit = true;
269 
270 	list_for_each_entry(pos, &evlist->entries, node) {
271 		struct perf_event_attr *attr = &pos->attr;
272 		/*
273 		 * Check if parse_single_tracepoint_event has already asked for
274 		 * PERF_SAMPLE_TIME.
275 		 *
276 		 * XXX this is kludgy but short term fix for problems introduced by
277 		 * eac23d1c that broke 'perf script' by having different sample_types
278 		 * when using multiple tracepoint events when we use a perf binary
279 		 * that tries to use sample_id_all on an older kernel.
280 		 *
281 		 * We need to move counter creation to perf_session, support
282 		 * different sample_types, etc.
283 		 */
284 		bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
285 
286 		config_attr(pos, evlist);
287 retry_sample_id:
288 		attr->sample_id_all = sample_id_all_avail ? 1 : 0;
289 try_again:
290 		if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group) < 0) {
291 			int err = errno;
292 
293 			if (err == EPERM || err == EACCES) {
294 				ui__warning_paranoid();
295 				exit(EXIT_FAILURE);
296 			} else if (err ==  ENODEV && cpu_list) {
297 				die("No such device - did you specify"
298 					" an out-of-range profile CPU?\n");
299 			} else if (err == EINVAL && sample_id_all_avail) {
300 				/*
301 				 * Old kernel, no attr->sample_id_type_all field
302 				 */
303 				sample_id_all_avail = false;
304 				if (!sample_time && !raw_samples && !time_needed)
305 					attr->sample_type &= ~PERF_SAMPLE_TIME;
306 
307 				goto retry_sample_id;
308 			}
309 
310 			/*
311 			 * If it's cycles then fall back to hrtimer
312 			 * based cpu-clock-tick sw counter, which
313 			 * is always available even if no PMU support:
314 			 */
315 			if (attr->type == PERF_TYPE_HARDWARE
316 					&& attr->config == PERF_COUNT_HW_CPU_CYCLES) {
317 
318 				if (verbose)
319 					ui__warning("The cycles event is not supported, "
320 						    "trying to fall back to cpu-clock-ticks\n");
321 				attr->type = PERF_TYPE_SOFTWARE;
322 				attr->config = PERF_COUNT_SW_CPU_CLOCK;
323 				goto try_again;
324 			}
325 
326 			if (err == ENOENT) {
327 				ui__warning("The %s event is not supported.\n",
328 					    event_name(pos));
329 				exit(EXIT_FAILURE);
330 			}
331 
332 			printf("\n");
333 			error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
334 			      err, strerror(err));
335 
336 #if defined(__i386__) || defined(__x86_64__)
337 			if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
338 				die("No hardware sampling interrupt available."
339 				    " No APIC? If so then you can boot the kernel"
340 				    " with the \"lapic\" boot parameter to"
341 				    " force-enable it.\n");
342 #endif
343 
344 			die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
345 		}
346 	}
347 
348 	if (perf_evlist__set_filters(evlist)) {
349 		error("failed to set filter with %d (%s)\n", errno,
350 			strerror(errno));
351 		exit(-1);
352 	}
353 
354 	if (perf_evlist__mmap(evlist, mmap_pages, false) < 0)
355 		die("failed to mmap with %d (%s)\n", errno, strerror(errno));
356 
357 	if (file_new)
358 		session->evlist = evlist;
359 	else {
360 		if (!perf_evlist__equal(session->evlist, evlist)) {
361 			fprintf(stderr, "incompatible append\n");
362 			exit(-1);
363 		}
364  	}
365 
366 	perf_session__update_sample_type(session);
367 }
368 
369 static int process_buildids(void)
370 {
371 	u64 size = lseek(output, 0, SEEK_CUR);
372 
373 	if (size == 0)
374 		return 0;
375 
376 	session->fd = output;
377 	return __perf_session__process_events(session, post_processing_offset,
378 					      size - post_processing_offset,
379 					      size, &build_id__mark_dso_hit_ops);
380 }
381 
382 static void atexit_header(void)
383 {
384 	if (!pipe_output) {
385 		session->header.data_size += bytes_written;
386 
387 		if (!no_buildid)
388 			process_buildids();
389 		perf_session__write_header(session, evsel_list, output, true);
390 		perf_session__delete(session);
391 		perf_evlist__delete(evsel_list);
392 		symbol__exit();
393 	}
394 }
395 
396 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
397 {
398 	int err;
399 	struct perf_session *psession = data;
400 
401 	if (machine__is_host(machine))
402 		return;
403 
404 	/*
405 	 *As for guest kernel when processing subcommand record&report,
406 	 *we arrange module mmap prior to guest kernel mmap and trigger
407 	 *a preload dso because default guest module symbols are loaded
408 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
409 	 *method is used to avoid symbol missing when the first addr is
410 	 *in module instead of in guest kernel.
411 	 */
412 	err = perf_event__synthesize_modules(process_synthesized_event,
413 					     psession, machine);
414 	if (err < 0)
415 		pr_err("Couldn't record guest kernel [%d]'s reference"
416 		       " relocation symbol.\n", machine->pid);
417 
418 	/*
419 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
420 	 * have no _text sometimes.
421 	 */
422 	err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
423 						 psession, machine, "_text");
424 	if (err < 0)
425 		err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
426 							 psession, machine,
427 							 "_stext");
428 	if (err < 0)
429 		pr_err("Couldn't record guest kernel [%d]'s reference"
430 		       " relocation symbol.\n", machine->pid);
431 }
432 
433 static struct perf_event_header finished_round_event = {
434 	.size = sizeof(struct perf_event_header),
435 	.type = PERF_RECORD_FINISHED_ROUND,
436 };
437 
438 static void mmap_read_all(void)
439 {
440 	int i;
441 
442 	for (i = 0; i < evsel_list->nr_mmaps; i++) {
443 		if (evsel_list->mmap[i].base)
444 			mmap_read(&evsel_list->mmap[i]);
445 	}
446 
447 	if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
448 		write_output(&finished_round_event, sizeof(finished_round_event));
449 }
450 
451 static int __cmd_record(int argc, const char **argv)
452 {
453 	struct stat st;
454 	int flags;
455 	int err;
456 	unsigned long waking = 0;
457 	int child_ready_pipe[2], go_pipe[2];
458 	const bool forks = argc > 0;
459 	char buf;
460 	struct machine *machine;
461 
462 	progname = argv[0];
463 
464 	page_size = sysconf(_SC_PAGE_SIZE);
465 
466 	atexit(sig_atexit);
467 	signal(SIGCHLD, sig_handler);
468 	signal(SIGINT, sig_handler);
469 	signal(SIGUSR1, sig_handler);
470 
471 	if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
472 		perror("failed to create pipes");
473 		exit(-1);
474 	}
475 
476 	if (!output_name) {
477 		if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
478 			pipe_output = 1;
479 		else
480 			output_name = "perf.data";
481 	}
482 	if (output_name) {
483 		if (!strcmp(output_name, "-"))
484 			pipe_output = 1;
485 		else if (!stat(output_name, &st) && st.st_size) {
486 			if (write_mode == WRITE_FORCE) {
487 				char oldname[PATH_MAX];
488 				snprintf(oldname, sizeof(oldname), "%s.old",
489 					 output_name);
490 				unlink(oldname);
491 				rename(output_name, oldname);
492 			}
493 		} else if (write_mode == WRITE_APPEND) {
494 			write_mode = WRITE_FORCE;
495 		}
496 	}
497 
498 	flags = O_CREAT|O_RDWR;
499 	if (write_mode == WRITE_APPEND)
500 		file_new = 0;
501 	else
502 		flags |= O_TRUNC;
503 
504 	if (pipe_output)
505 		output = STDOUT_FILENO;
506 	else
507 		output = open(output_name, flags, S_IRUSR | S_IWUSR);
508 	if (output < 0) {
509 		perror("failed to create output file");
510 		exit(-1);
511 	}
512 
513 	session = perf_session__new(output_name, O_WRONLY,
514 				    write_mode == WRITE_FORCE, false, NULL);
515 	if (session == NULL) {
516 		pr_err("Not enough memory for reading perf file header\n");
517 		return -1;
518 	}
519 
520 	if (!no_buildid)
521 		perf_header__set_feat(&session->header, HEADER_BUILD_ID);
522 
523 	if (!file_new) {
524 		err = perf_session__read_header(session, output);
525 		if (err < 0)
526 			goto out_delete_session;
527 	}
528 
529 	if (have_tracepoints(&evsel_list->entries))
530 		perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
531 
532 	perf_header__set_feat(&session->header, HEADER_HOSTNAME);
533 	perf_header__set_feat(&session->header, HEADER_OSRELEASE);
534 	perf_header__set_feat(&session->header, HEADER_ARCH);
535 	perf_header__set_feat(&session->header, HEADER_CPUDESC);
536 	perf_header__set_feat(&session->header, HEADER_NRCPUS);
537 	perf_header__set_feat(&session->header, HEADER_EVENT_DESC);
538 	perf_header__set_feat(&session->header, HEADER_CMDLINE);
539 	perf_header__set_feat(&session->header, HEADER_VERSION);
540 	perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY);
541 	perf_header__set_feat(&session->header, HEADER_TOTAL_MEM);
542 	perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY);
543 	perf_header__set_feat(&session->header, HEADER_CPUID);
544 
545 	/* 512 kiB: default amount of unprivileged mlocked memory */
546 	if (mmap_pages == UINT_MAX)
547 		mmap_pages = (512 * 1024) / page_size;
548 
549 	if (forks) {
550 		child_pid = fork();
551 		if (child_pid < 0) {
552 			perror("failed to fork");
553 			exit(-1);
554 		}
555 
556 		if (!child_pid) {
557 			if (pipe_output)
558 				dup2(2, 1);
559 			close(child_ready_pipe[0]);
560 			close(go_pipe[1]);
561 			fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
562 
563 			/*
564 			 * Do a dummy execvp to get the PLT entry resolved,
565 			 * so we avoid the resolver overhead on the real
566 			 * execvp call.
567 			 */
568 			execvp("", (char **)argv);
569 
570 			/*
571 			 * Tell the parent we're ready to go
572 			 */
573 			close(child_ready_pipe[1]);
574 
575 			/*
576 			 * Wait until the parent tells us to go.
577 			 */
578 			if (read(go_pipe[0], &buf, 1) == -1)
579 				perror("unable to read pipe");
580 
581 			execvp(argv[0], (char **)argv);
582 
583 			perror(argv[0]);
584 			kill(getppid(), SIGUSR1);
585 			exit(-1);
586 		}
587 
588 		if (!system_wide && target_tid == -1 && target_pid == -1)
589 			evsel_list->threads->map[0] = child_pid;
590 
591 		close(child_ready_pipe[1]);
592 		close(go_pipe[0]);
593 		/*
594 		 * wait for child to settle
595 		 */
596 		if (read(child_ready_pipe[0], &buf, 1) == -1) {
597 			perror("unable to read pipe");
598 			exit(-1);
599 		}
600 		close(child_ready_pipe[0]);
601 	}
602 
603 	open_counters(evsel_list);
604 
605 	/*
606 	 * perf_session__delete(session) will be called at atexit_header()
607 	 */
608 	atexit(atexit_header);
609 
610 	if (pipe_output) {
611 		err = perf_header__write_pipe(output);
612 		if (err < 0)
613 			return err;
614 	} else if (file_new) {
615 		err = perf_session__write_header(session, evsel_list,
616 						 output, false);
617 		if (err < 0)
618 			return err;
619 	}
620 
621 	post_processing_offset = lseek(output, 0, SEEK_CUR);
622 
623 	if (pipe_output) {
624 		err = perf_session__synthesize_attrs(session,
625 						     process_synthesized_event);
626 		if (err < 0) {
627 			pr_err("Couldn't synthesize attrs.\n");
628 			return err;
629 		}
630 
631 		err = perf_event__synthesize_event_types(process_synthesized_event,
632 							 session);
633 		if (err < 0) {
634 			pr_err("Couldn't synthesize event_types.\n");
635 			return err;
636 		}
637 
638 		if (have_tracepoints(&evsel_list->entries)) {
639 			/*
640 			 * FIXME err <= 0 here actually means that
641 			 * there were no tracepoints so its not really
642 			 * an error, just that we don't need to
643 			 * synthesize anything.  We really have to
644 			 * return this more properly and also
645 			 * propagate errors that now are calling die()
646 			 */
647 			err = perf_event__synthesize_tracing_data(output, evsel_list,
648 								  process_synthesized_event,
649 								  session);
650 			if (err <= 0) {
651 				pr_err("Couldn't record tracing data.\n");
652 				return err;
653 			}
654 			advance_output(err);
655 		}
656 	}
657 
658 	machine = perf_session__find_host_machine(session);
659 	if (!machine) {
660 		pr_err("Couldn't find native kernel information.\n");
661 		return -1;
662 	}
663 
664 	err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
665 						 session, machine, "_text");
666 	if (err < 0)
667 		err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
668 							 session, machine, "_stext");
669 	if (err < 0)
670 		pr_err("Couldn't record kernel reference relocation symbol\n"
671 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
672 		       "Check /proc/kallsyms permission or run as root.\n");
673 
674 	err = perf_event__synthesize_modules(process_synthesized_event,
675 					     session, machine);
676 	if (err < 0)
677 		pr_err("Couldn't record kernel module information.\n"
678 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
679 		       "Check /proc/modules permission or run as root.\n");
680 
681 	if (perf_guest)
682 		perf_session__process_machines(session,
683 					       perf_event__synthesize_guest_os);
684 
685 	if (!system_wide)
686 		perf_event__synthesize_thread_map(evsel_list->threads,
687 						  process_synthesized_event,
688 						  session);
689 	else
690 		perf_event__synthesize_threads(process_synthesized_event,
691 					       session);
692 
693 	if (realtime_prio) {
694 		struct sched_param param;
695 
696 		param.sched_priority = realtime_prio;
697 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
698 			pr_err("Could not set realtime priority.\n");
699 			exit(-1);
700 		}
701 	}
702 
703 	perf_evlist__enable(evsel_list);
704 
705 	/*
706 	 * Let the child rip
707 	 */
708 	if (forks)
709 		close(go_pipe[1]);
710 
711 	for (;;) {
712 		int hits = samples;
713 
714 		mmap_read_all();
715 
716 		if (hits == samples) {
717 			if (done)
718 				break;
719 			err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
720 			waking++;
721 		}
722 
723 		if (done)
724 			perf_evlist__disable(evsel_list);
725 	}
726 
727 	if (quiet || signr == SIGUSR1)
728 		return 0;
729 
730 	fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
731 
732 	/*
733 	 * Approximate RIP event size: 24 bytes.
734 	 */
735 	fprintf(stderr,
736 		"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
737 		(double)bytes_written / 1024.0 / 1024.0,
738 		output_name,
739 		bytes_written / 24);
740 
741 	return 0;
742 
743 out_delete_session:
744 	perf_session__delete(session);
745 	return err;
746 }
747 
748 static const char * const record_usage[] = {
749 	"perf record [<options>] [<command>]",
750 	"perf record [<options>] -- <command> [<options>]",
751 	NULL
752 };
753 
754 static bool force, append_file;
755 
756 const struct option record_options[] = {
757 	OPT_CALLBACK('e', "event", &evsel_list, "event",
758 		     "event selector. use 'perf list' to list available events",
759 		     parse_events_option),
760 	OPT_CALLBACK(0, "filter", &evsel_list, "filter",
761 		     "event filter", parse_filter),
762 	OPT_INTEGER('p', "pid", &target_pid,
763 		    "record events on existing process id"),
764 	OPT_INTEGER('t', "tid", &target_tid,
765 		    "record events on existing thread id"),
766 	OPT_INTEGER('r', "realtime", &realtime_prio,
767 		    "collect data with this RT SCHED_FIFO priority"),
768 	OPT_BOOLEAN('D', "no-delay", &nodelay,
769 		    "collect data without buffering"),
770 	OPT_BOOLEAN('R', "raw-samples", &raw_samples,
771 		    "collect raw sample records from all opened counters"),
772 	OPT_BOOLEAN('a', "all-cpus", &system_wide,
773 			    "system-wide collection from all CPUs"),
774 	OPT_BOOLEAN('A', "append", &append_file,
775 			    "append to the output file to do incremental profiling"),
776 	OPT_STRING('C', "cpu", &cpu_list, "cpu",
777 		    "list of cpus to monitor"),
778 	OPT_BOOLEAN('f', "force", &force,
779 			"overwrite existing data file (deprecated)"),
780 	OPT_U64('c', "count", &user_interval, "event period to sample"),
781 	OPT_STRING('o', "output", &output_name, "file",
782 		    "output file name"),
783 	OPT_BOOLEAN('i', "no-inherit", &no_inherit,
784 		    "child tasks do not inherit counters"),
785 	OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"),
786 	OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
787 	OPT_BOOLEAN(0, "group", &group,
788 		    "put the counters into a counter group"),
789 	OPT_BOOLEAN('g', "call-graph", &call_graph,
790 		    "do call-graph (stack chain/backtrace) recording"),
791 	OPT_INCR('v', "verbose", &verbose,
792 		    "be more verbose (show counter open errors, etc)"),
793 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
794 	OPT_BOOLEAN('s', "stat", &inherit_stat,
795 		    "per thread counts"),
796 	OPT_BOOLEAN('d', "data", &sample_address,
797 		    "Sample addresses"),
798 	OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"),
799 	OPT_BOOLEAN('n', "no-samples", &no_samples,
800 		    "don't sample"),
801 	OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
802 		    "do not update the buildid cache"),
803 	OPT_BOOLEAN('B', "no-buildid", &no_buildid,
804 		    "do not collect buildids in perf.data"),
805 	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
806 		     "monitor event in cgroup name only",
807 		     parse_cgroups),
808 	OPT_END()
809 };
810 
811 int cmd_record(int argc, const char **argv, const char *prefix __used)
812 {
813 	int err = -ENOMEM;
814 	struct perf_evsel *pos;
815 
816 	perf_header__set_cmdline(argc, argv);
817 
818 	evsel_list = perf_evlist__new(NULL, NULL);
819 	if (evsel_list == NULL)
820 		return -ENOMEM;
821 
822 	argc = parse_options(argc, argv, record_options, record_usage,
823 			    PARSE_OPT_STOP_AT_NON_OPTION);
824 	if (!argc && target_pid == -1 && target_tid == -1 &&
825 		!system_wide && !cpu_list)
826 		usage_with_options(record_usage, record_options);
827 
828 	if (force && append_file) {
829 		fprintf(stderr, "Can't overwrite and append at the same time."
830 				" You need to choose between -f and -A");
831 		usage_with_options(record_usage, record_options);
832 	} else if (append_file) {
833 		write_mode = WRITE_APPEND;
834 	} else {
835 		write_mode = WRITE_FORCE;
836 	}
837 
838 	if (nr_cgroups && !system_wide) {
839 		fprintf(stderr, "cgroup monitoring only available in"
840 			" system-wide mode\n");
841 		usage_with_options(record_usage, record_options);
842 	}
843 
844 	symbol__init();
845 
846 	if (symbol_conf.kptr_restrict)
847 		pr_warning(
848 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
849 "check /proc/sys/kernel/kptr_restrict.\n\n"
850 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
851 "file is not found in the buildid cache or in the vmlinux path.\n\n"
852 "Samples in kernel modules won't be resolved at all.\n\n"
853 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
854 "even with a suitable vmlinux or kallsyms file.\n\n");
855 
856 	if (no_buildid_cache || no_buildid)
857 		disable_buildid_cache();
858 
859 	if (evsel_list->nr_entries == 0 &&
860 	    perf_evlist__add_default(evsel_list) < 0) {
861 		pr_err("Not enough memory for event selector list\n");
862 		goto out_symbol_exit;
863 	}
864 
865 	if (target_pid != -1)
866 		target_tid = target_pid;
867 
868 	if (perf_evlist__create_maps(evsel_list, target_pid,
869 				     target_tid, cpu_list) < 0)
870 		usage_with_options(record_usage, record_options);
871 
872 	list_for_each_entry(pos, &evsel_list->entries, node) {
873 		if (perf_evsel__alloc_fd(pos, evsel_list->cpus->nr,
874 					 evsel_list->threads->nr) < 0)
875 			goto out_free_fd;
876 		if (perf_header__push_event(pos->attr.config, event_name(pos)))
877 			goto out_free_fd;
878 	}
879 
880 	if (perf_evlist__alloc_pollfd(evsel_list) < 0)
881 		goto out_free_fd;
882 
883 	if (user_interval != ULLONG_MAX)
884 		default_interval = user_interval;
885 	if (user_freq != UINT_MAX)
886 		freq = user_freq;
887 
888 	/*
889 	 * User specified count overrides default frequency.
890 	 */
891 	if (default_interval)
892 		freq = 0;
893 	else if (freq) {
894 		default_interval = freq;
895 	} else {
896 		fprintf(stderr, "frequency and count are zero, aborting\n");
897 		err = -EINVAL;
898 		goto out_free_fd;
899 	}
900 
901 	err = __cmd_record(argc, argv);
902 out_free_fd:
903 	perf_evlist__delete_maps(evsel_list);
904 out_symbol_exit:
905 	symbol__exit();
906 	return err;
907 }
908