xref: /linux/tools/perf/builtin-record.c (revision e978aa7d7d57d04eb5f88a7507c4fb98577def77)
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #define _FILE_OFFSET_BITS 64
9 
10 #include "builtin.h"
11 
12 #include "perf.h"
13 
14 #include "util/build-id.h"
15 #include "util/util.h"
16 #include "util/parse-options.h"
17 #include "util/parse-events.h"
18 
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/symbol.h"
26 #include "util/cpumap.h"
27 #include "util/thread_map.h"
28 
29 #include <unistd.h>
30 #include <sched.h>
31 #include <sys/mman.h>
32 
33 enum write_mode_t {
34 	WRITE_FORCE,
35 	WRITE_APPEND
36 };
37 
38 static u64			user_interval			= ULLONG_MAX;
39 static u64			default_interval		=      0;
40 
41 static unsigned int		page_size;
42 static unsigned int		mmap_pages			= UINT_MAX;
43 static unsigned int		user_freq 			= UINT_MAX;
44 static int			freq				=   1000;
45 static int			output;
46 static int			pipe_output			=      0;
47 static const char		*output_name			= NULL;
48 static bool			group				=  false;
49 static int			realtime_prio			=      0;
50 static bool			nodelay				=  false;
51 static bool			raw_samples			=  false;
52 static bool			sample_id_all_avail		=   true;
53 static bool			system_wide			=  false;
54 static pid_t			target_pid			=     -1;
55 static pid_t			target_tid			=     -1;
56 static pid_t			child_pid			=     -1;
57 static bool			no_inherit			=  false;
58 static enum write_mode_t	write_mode			= WRITE_FORCE;
59 static bool			call_graph			=  false;
60 static bool			inherit_stat			=  false;
61 static bool			no_samples			=  false;
62 static bool			sample_address			=  false;
63 static bool			sample_time			=  false;
64 static bool			no_buildid			=  false;
65 static bool			no_buildid_cache		=  false;
66 static struct perf_evlist	*evsel_list;
67 
68 static long			samples				=      0;
69 static u64			bytes_written			=      0;
70 
71 static int			file_new			=      1;
72 static off_t			post_processing_offset;
73 
74 static struct perf_session	*session;
75 static const char		*cpu_list;
76 
77 static void advance_output(size_t size)
78 {
79 	bytes_written += size;
80 }
81 
82 static void write_output(void *buf, size_t size)
83 {
84 	while (size) {
85 		int ret = write(output, buf, size);
86 
87 		if (ret < 0)
88 			die("failed to write");
89 
90 		size -= ret;
91 		buf += ret;
92 
93 		bytes_written += ret;
94 	}
95 }
96 
97 static int process_synthesized_event(union perf_event *event,
98 				     struct perf_sample *sample __used,
99 				     struct perf_session *self __used)
100 {
101 	write_output(event, event->header.size);
102 	return 0;
103 }
104 
105 static void mmap_read(struct perf_mmap *md)
106 {
107 	unsigned int head = perf_mmap__read_head(md);
108 	unsigned int old = md->prev;
109 	unsigned char *data = md->base + page_size;
110 	unsigned long size;
111 	void *buf;
112 
113 	if (old == head)
114 		return;
115 
116 	samples++;
117 
118 	size = head - old;
119 
120 	if ((old & md->mask) + size != (head & md->mask)) {
121 		buf = &data[old & md->mask];
122 		size = md->mask + 1 - (old & md->mask);
123 		old += size;
124 
125 		write_output(buf, size);
126 	}
127 
128 	buf = &data[old & md->mask];
129 	size = head - old;
130 	old += size;
131 
132 	write_output(buf, size);
133 
134 	md->prev = old;
135 	perf_mmap__write_tail(md, old);
136 }
137 
138 static volatile int done = 0;
139 static volatile int signr = -1;
140 
141 static void sig_handler(int sig)
142 {
143 	done = 1;
144 	signr = sig;
145 }
146 
147 static void sig_atexit(void)
148 {
149 	if (child_pid > 0)
150 		kill(child_pid, SIGTERM);
151 
152 	if (signr == -1 || signr == SIGUSR1)
153 		return;
154 
155 	signal(signr, SIG_DFL);
156 	kill(getpid(), signr);
157 }
158 
159 static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
160 {
161 	struct perf_event_attr *attr = &evsel->attr;
162 	int track = !evsel->idx; /* only the first counter needs these */
163 
164 	attr->disabled		= 1;
165 	attr->inherit		= !no_inherit;
166 	attr->read_format	= PERF_FORMAT_TOTAL_TIME_ENABLED |
167 				  PERF_FORMAT_TOTAL_TIME_RUNNING |
168 				  PERF_FORMAT_ID;
169 
170 	attr->sample_type	|= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
171 
172 	if (evlist->nr_entries > 1)
173 		attr->sample_type |= PERF_SAMPLE_ID;
174 
175 	/*
176 	 * We default some events to a 1 default interval. But keep
177 	 * it a weak assumption overridable by the user.
178 	 */
179 	if (!attr->sample_period || (user_freq != UINT_MAX &&
180 				     user_interval != ULLONG_MAX)) {
181 		if (freq) {
182 			attr->sample_type	|= PERF_SAMPLE_PERIOD;
183 			attr->freq		= 1;
184 			attr->sample_freq	= freq;
185 		} else {
186 			attr->sample_period = default_interval;
187 		}
188 	}
189 
190 	if (no_samples)
191 		attr->sample_freq = 0;
192 
193 	if (inherit_stat)
194 		attr->inherit_stat = 1;
195 
196 	if (sample_address) {
197 		attr->sample_type	|= PERF_SAMPLE_ADDR;
198 		attr->mmap_data = track;
199 	}
200 
201 	if (call_graph)
202 		attr->sample_type	|= PERF_SAMPLE_CALLCHAIN;
203 
204 	if (system_wide)
205 		attr->sample_type	|= PERF_SAMPLE_CPU;
206 
207 	if (sample_id_all_avail &&
208 	    (sample_time || system_wide || !no_inherit || cpu_list))
209 		attr->sample_type	|= PERF_SAMPLE_TIME;
210 
211 	if (raw_samples) {
212 		attr->sample_type	|= PERF_SAMPLE_TIME;
213 		attr->sample_type	|= PERF_SAMPLE_RAW;
214 		attr->sample_type	|= PERF_SAMPLE_CPU;
215 	}
216 
217 	if (nodelay) {
218 		attr->watermark = 0;
219 		attr->wakeup_events = 1;
220 	}
221 
222 	attr->mmap		= track;
223 	attr->comm		= track;
224 
225 	if (target_pid == -1 && target_tid == -1 && !system_wide) {
226 		attr->disabled = 1;
227 		attr->enable_on_exec = 1;
228 	}
229 }
230 
231 static bool perf_evlist__equal(struct perf_evlist *evlist,
232 			       struct perf_evlist *other)
233 {
234 	struct perf_evsel *pos, *pair;
235 
236 	if (evlist->nr_entries != other->nr_entries)
237 		return false;
238 
239 	pair = list_entry(other->entries.next, struct perf_evsel, node);
240 
241 	list_for_each_entry(pos, &evlist->entries, node) {
242 		if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
243 			return false;
244 		pair = list_entry(pair->node.next, struct perf_evsel, node);
245 	}
246 
247 	return true;
248 }
249 
250 static void open_counters(struct perf_evlist *evlist)
251 {
252 	struct perf_evsel *pos;
253 
254 	if (evlist->cpus->map[0] < 0)
255 		no_inherit = true;
256 
257 	list_for_each_entry(pos, &evlist->entries, node) {
258 		struct perf_event_attr *attr = &pos->attr;
259 		/*
260 		 * Check if parse_single_tracepoint_event has already asked for
261 		 * PERF_SAMPLE_TIME.
262 		 *
263 		 * XXX this is kludgy but short term fix for problems introduced by
264 		 * eac23d1c that broke 'perf script' by having different sample_types
265 		 * when using multiple tracepoint events when we use a perf binary
266 		 * that tries to use sample_id_all on an older kernel.
267 		 *
268 		 * We need to move counter creation to perf_session, support
269 		 * different sample_types, etc.
270 		 */
271 		bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
272 
273 		config_attr(pos, evlist);
274 retry_sample_id:
275 		attr->sample_id_all = sample_id_all_avail ? 1 : 0;
276 try_again:
277 		if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group) < 0) {
278 			int err = errno;
279 
280 			if (err == EPERM || err == EACCES) {
281 				ui__warning_paranoid();
282 				exit(EXIT_FAILURE);
283 			} else if (err ==  ENODEV && cpu_list) {
284 				die("No such device - did you specify"
285 					" an out-of-range profile CPU?\n");
286 			} else if (err == EINVAL && sample_id_all_avail) {
287 				/*
288 				 * Old kernel, no attr->sample_id_type_all field
289 				 */
290 				sample_id_all_avail = false;
291 				if (!sample_time && !raw_samples && !time_needed)
292 					attr->sample_type &= ~PERF_SAMPLE_TIME;
293 
294 				goto retry_sample_id;
295 			}
296 
297 			/*
298 			 * If it's cycles then fall back to hrtimer
299 			 * based cpu-clock-tick sw counter, which
300 			 * is always available even if no PMU support:
301 			 */
302 			if (attr->type == PERF_TYPE_HARDWARE
303 					&& attr->config == PERF_COUNT_HW_CPU_CYCLES) {
304 
305 				if (verbose)
306 					ui__warning("The cycles event is not supported, "
307 						    "trying to fall back to cpu-clock-ticks\n");
308 				attr->type = PERF_TYPE_SOFTWARE;
309 				attr->config = PERF_COUNT_SW_CPU_CLOCK;
310 				goto try_again;
311 			}
312 
313 			if (err == ENOENT) {
314 				ui__warning("The %s event is not supported.\n",
315 					    event_name(pos));
316 				exit(EXIT_FAILURE);
317 			}
318 
319 			printf("\n");
320 			error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
321 			      err, strerror(err));
322 
323 #if defined(__i386__) || defined(__x86_64__)
324 			if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
325 				die("No hardware sampling interrupt available."
326 				    " No APIC? If so then you can boot the kernel"
327 				    " with the \"lapic\" boot parameter to"
328 				    " force-enable it.\n");
329 #endif
330 
331 			die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
332 		}
333 	}
334 
335 	if (perf_evlist__set_filters(evlist)) {
336 		error("failed to set filter with %d (%s)\n", errno,
337 			strerror(errno));
338 		exit(-1);
339 	}
340 
341 	if (perf_evlist__mmap(evlist, mmap_pages, false) < 0)
342 		die("failed to mmap with %d (%s)\n", errno, strerror(errno));
343 
344 	if (file_new)
345 		session->evlist = evlist;
346 	else {
347 		if (!perf_evlist__equal(session->evlist, evlist)) {
348 			fprintf(stderr, "incompatible append\n");
349 			exit(-1);
350 		}
351  	}
352 
353 	perf_session__update_sample_type(session);
354 }
355 
356 static int process_buildids(void)
357 {
358 	u64 size = lseek(output, 0, SEEK_CUR);
359 
360 	if (size == 0)
361 		return 0;
362 
363 	session->fd = output;
364 	return __perf_session__process_events(session, post_processing_offset,
365 					      size - post_processing_offset,
366 					      size, &build_id__mark_dso_hit_ops);
367 }
368 
369 static void atexit_header(void)
370 {
371 	if (!pipe_output) {
372 		session->header.data_size += bytes_written;
373 
374 		if (!no_buildid)
375 			process_buildids();
376 		perf_session__write_header(session, evsel_list, output, true);
377 		perf_session__delete(session);
378 		perf_evlist__delete(evsel_list);
379 		symbol__exit();
380 	}
381 }
382 
383 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
384 {
385 	int err;
386 	struct perf_session *psession = data;
387 
388 	if (machine__is_host(machine))
389 		return;
390 
391 	/*
392 	 *As for guest kernel when processing subcommand record&report,
393 	 *we arrange module mmap prior to guest kernel mmap and trigger
394 	 *a preload dso because default guest module symbols are loaded
395 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
396 	 *method is used to avoid symbol missing when the first addr is
397 	 *in module instead of in guest kernel.
398 	 */
399 	err = perf_event__synthesize_modules(process_synthesized_event,
400 					     psession, machine);
401 	if (err < 0)
402 		pr_err("Couldn't record guest kernel [%d]'s reference"
403 		       " relocation symbol.\n", machine->pid);
404 
405 	/*
406 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
407 	 * have no _text sometimes.
408 	 */
409 	err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
410 						 psession, machine, "_text");
411 	if (err < 0)
412 		err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
413 							 psession, machine,
414 							 "_stext");
415 	if (err < 0)
416 		pr_err("Couldn't record guest kernel [%d]'s reference"
417 		       " relocation symbol.\n", machine->pid);
418 }
419 
420 static struct perf_event_header finished_round_event = {
421 	.size = sizeof(struct perf_event_header),
422 	.type = PERF_RECORD_FINISHED_ROUND,
423 };
424 
425 static void mmap_read_all(void)
426 {
427 	int i;
428 
429 	for (i = 0; i < evsel_list->nr_mmaps; i++) {
430 		if (evsel_list->mmap[i].base)
431 			mmap_read(&evsel_list->mmap[i]);
432 	}
433 
434 	if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
435 		write_output(&finished_round_event, sizeof(finished_round_event));
436 }
437 
438 static int __cmd_record(int argc, const char **argv)
439 {
440 	struct stat st;
441 	int flags;
442 	int err;
443 	unsigned long waking = 0;
444 	int child_ready_pipe[2], go_pipe[2];
445 	const bool forks = argc > 0;
446 	char buf;
447 	struct machine *machine;
448 
449 	page_size = sysconf(_SC_PAGE_SIZE);
450 
451 	atexit(sig_atexit);
452 	signal(SIGCHLD, sig_handler);
453 	signal(SIGINT, sig_handler);
454 	signal(SIGUSR1, sig_handler);
455 
456 	if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
457 		perror("failed to create pipes");
458 		exit(-1);
459 	}
460 
461 	if (!output_name) {
462 		if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
463 			pipe_output = 1;
464 		else
465 			output_name = "perf.data";
466 	}
467 	if (output_name) {
468 		if (!strcmp(output_name, "-"))
469 			pipe_output = 1;
470 		else if (!stat(output_name, &st) && st.st_size) {
471 			if (write_mode == WRITE_FORCE) {
472 				char oldname[PATH_MAX];
473 				snprintf(oldname, sizeof(oldname), "%s.old",
474 					 output_name);
475 				unlink(oldname);
476 				rename(output_name, oldname);
477 			}
478 		} else if (write_mode == WRITE_APPEND) {
479 			write_mode = WRITE_FORCE;
480 		}
481 	}
482 
483 	flags = O_CREAT|O_RDWR;
484 	if (write_mode == WRITE_APPEND)
485 		file_new = 0;
486 	else
487 		flags |= O_TRUNC;
488 
489 	if (pipe_output)
490 		output = STDOUT_FILENO;
491 	else
492 		output = open(output_name, flags, S_IRUSR | S_IWUSR);
493 	if (output < 0) {
494 		perror("failed to create output file");
495 		exit(-1);
496 	}
497 
498 	session = perf_session__new(output_name, O_WRONLY,
499 				    write_mode == WRITE_FORCE, false, NULL);
500 	if (session == NULL) {
501 		pr_err("Not enough memory for reading perf file header\n");
502 		return -1;
503 	}
504 
505 	if (!no_buildid)
506 		perf_header__set_feat(&session->header, HEADER_BUILD_ID);
507 
508 	if (!file_new) {
509 		err = perf_session__read_header(session, output);
510 		if (err < 0)
511 			goto out_delete_session;
512 	}
513 
514 	if (have_tracepoints(&evsel_list->entries))
515 		perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
516 
517 	/* 512 kiB: default amount of unprivileged mlocked memory */
518 	if (mmap_pages == UINT_MAX)
519 		mmap_pages = (512 * 1024) / page_size;
520 
521 	if (forks) {
522 		child_pid = fork();
523 		if (child_pid < 0) {
524 			perror("failed to fork");
525 			exit(-1);
526 		}
527 
528 		if (!child_pid) {
529 			if (pipe_output)
530 				dup2(2, 1);
531 			close(child_ready_pipe[0]);
532 			close(go_pipe[1]);
533 			fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
534 
535 			/*
536 			 * Do a dummy execvp to get the PLT entry resolved,
537 			 * so we avoid the resolver overhead on the real
538 			 * execvp call.
539 			 */
540 			execvp("", (char **)argv);
541 
542 			/*
543 			 * Tell the parent we're ready to go
544 			 */
545 			close(child_ready_pipe[1]);
546 
547 			/*
548 			 * Wait until the parent tells us to go.
549 			 */
550 			if (read(go_pipe[0], &buf, 1) == -1)
551 				perror("unable to read pipe");
552 
553 			execvp(argv[0], (char **)argv);
554 
555 			perror(argv[0]);
556 			kill(getppid(), SIGUSR1);
557 			exit(-1);
558 		}
559 
560 		if (!system_wide && target_tid == -1 && target_pid == -1)
561 			evsel_list->threads->map[0] = child_pid;
562 
563 		close(child_ready_pipe[1]);
564 		close(go_pipe[0]);
565 		/*
566 		 * wait for child to settle
567 		 */
568 		if (read(child_ready_pipe[0], &buf, 1) == -1) {
569 			perror("unable to read pipe");
570 			exit(-1);
571 		}
572 		close(child_ready_pipe[0]);
573 	}
574 
575 	open_counters(evsel_list);
576 
577 	/*
578 	 * perf_session__delete(session) will be called at atexit_header()
579 	 */
580 	atexit(atexit_header);
581 
582 	if (pipe_output) {
583 		err = perf_header__write_pipe(output);
584 		if (err < 0)
585 			return err;
586 	} else if (file_new) {
587 		err = perf_session__write_header(session, evsel_list,
588 						 output, false);
589 		if (err < 0)
590 			return err;
591 	}
592 
593 	post_processing_offset = lseek(output, 0, SEEK_CUR);
594 
595 	if (pipe_output) {
596 		err = perf_session__synthesize_attrs(session,
597 						     process_synthesized_event);
598 		if (err < 0) {
599 			pr_err("Couldn't synthesize attrs.\n");
600 			return err;
601 		}
602 
603 		err = perf_event__synthesize_event_types(process_synthesized_event,
604 							 session);
605 		if (err < 0) {
606 			pr_err("Couldn't synthesize event_types.\n");
607 			return err;
608 		}
609 
610 		if (have_tracepoints(&evsel_list->entries)) {
611 			/*
612 			 * FIXME err <= 0 here actually means that
613 			 * there were no tracepoints so its not really
614 			 * an error, just that we don't need to
615 			 * synthesize anything.  We really have to
616 			 * return this more properly and also
617 			 * propagate errors that now are calling die()
618 			 */
619 			err = perf_event__synthesize_tracing_data(output, evsel_list,
620 								  process_synthesized_event,
621 								  session);
622 			if (err <= 0) {
623 				pr_err("Couldn't record tracing data.\n");
624 				return err;
625 			}
626 			advance_output(err);
627 		}
628 	}
629 
630 	machine = perf_session__find_host_machine(session);
631 	if (!machine) {
632 		pr_err("Couldn't find native kernel information.\n");
633 		return -1;
634 	}
635 
636 	err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
637 						 session, machine, "_text");
638 	if (err < 0)
639 		err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
640 							 session, machine, "_stext");
641 	if (err < 0)
642 		pr_err("Couldn't record kernel reference relocation symbol\n"
643 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
644 		       "Check /proc/kallsyms permission or run as root.\n");
645 
646 	err = perf_event__synthesize_modules(process_synthesized_event,
647 					     session, machine);
648 	if (err < 0)
649 		pr_err("Couldn't record kernel module information.\n"
650 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
651 		       "Check /proc/modules permission or run as root.\n");
652 
653 	if (perf_guest)
654 		perf_session__process_machines(session,
655 					       perf_event__synthesize_guest_os);
656 
657 	if (!system_wide)
658 		perf_event__synthesize_thread_map(evsel_list->threads,
659 						  process_synthesized_event,
660 						  session);
661 	else
662 		perf_event__synthesize_threads(process_synthesized_event,
663 					       session);
664 
665 	if (realtime_prio) {
666 		struct sched_param param;
667 
668 		param.sched_priority = realtime_prio;
669 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
670 			pr_err("Could not set realtime priority.\n");
671 			exit(-1);
672 		}
673 	}
674 
675 	perf_evlist__enable(evsel_list);
676 
677 	/*
678 	 * Let the child rip
679 	 */
680 	if (forks)
681 		close(go_pipe[1]);
682 
683 	for (;;) {
684 		int hits = samples;
685 
686 		mmap_read_all();
687 
688 		if (hits == samples) {
689 			if (done)
690 				break;
691 			err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
692 			waking++;
693 		}
694 
695 		if (done)
696 			perf_evlist__disable(evsel_list);
697 	}
698 
699 	if (quiet || signr == SIGUSR1)
700 		return 0;
701 
702 	fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
703 
704 	/*
705 	 * Approximate RIP event size: 24 bytes.
706 	 */
707 	fprintf(stderr,
708 		"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
709 		(double)bytes_written / 1024.0 / 1024.0,
710 		output_name,
711 		bytes_written / 24);
712 
713 	return 0;
714 
715 out_delete_session:
716 	perf_session__delete(session);
717 	return err;
718 }
719 
720 static const char * const record_usage[] = {
721 	"perf record [<options>] [<command>]",
722 	"perf record [<options>] -- <command> [<options>]",
723 	NULL
724 };
725 
726 static bool force, append_file;
727 
728 const struct option record_options[] = {
729 	OPT_CALLBACK('e', "event", &evsel_list, "event",
730 		     "event selector. use 'perf list' to list available events",
731 		     parse_events_option),
732 	OPT_CALLBACK(0, "filter", &evsel_list, "filter",
733 		     "event filter", parse_filter),
734 	OPT_INTEGER('p', "pid", &target_pid,
735 		    "record events on existing process id"),
736 	OPT_INTEGER('t', "tid", &target_tid,
737 		    "record events on existing thread id"),
738 	OPT_INTEGER('r', "realtime", &realtime_prio,
739 		    "collect data with this RT SCHED_FIFO priority"),
740 	OPT_BOOLEAN('D', "no-delay", &nodelay,
741 		    "collect data without buffering"),
742 	OPT_BOOLEAN('R', "raw-samples", &raw_samples,
743 		    "collect raw sample records from all opened counters"),
744 	OPT_BOOLEAN('a', "all-cpus", &system_wide,
745 			    "system-wide collection from all CPUs"),
746 	OPT_BOOLEAN('A', "append", &append_file,
747 			    "append to the output file to do incremental profiling"),
748 	OPT_STRING('C', "cpu", &cpu_list, "cpu",
749 		    "list of cpus to monitor"),
750 	OPT_BOOLEAN('f', "force", &force,
751 			"overwrite existing data file (deprecated)"),
752 	OPT_U64('c', "count", &user_interval, "event period to sample"),
753 	OPT_STRING('o', "output", &output_name, "file",
754 		    "output file name"),
755 	OPT_BOOLEAN('i', "no-inherit", &no_inherit,
756 		    "child tasks do not inherit counters"),
757 	OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"),
758 	OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
759 	OPT_BOOLEAN(0, "group", &group,
760 		    "put the counters into a counter group"),
761 	OPT_BOOLEAN('g', "call-graph", &call_graph,
762 		    "do call-graph (stack chain/backtrace) recording"),
763 	OPT_INCR('v', "verbose", &verbose,
764 		    "be more verbose (show counter open errors, etc)"),
765 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
766 	OPT_BOOLEAN('s', "stat", &inherit_stat,
767 		    "per thread counts"),
768 	OPT_BOOLEAN('d', "data", &sample_address,
769 		    "Sample addresses"),
770 	OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"),
771 	OPT_BOOLEAN('n', "no-samples", &no_samples,
772 		    "don't sample"),
773 	OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
774 		    "do not update the buildid cache"),
775 	OPT_BOOLEAN('B', "no-buildid", &no_buildid,
776 		    "do not collect buildids in perf.data"),
777 	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
778 		     "monitor event in cgroup name only",
779 		     parse_cgroups),
780 	OPT_END()
781 };
782 
783 int cmd_record(int argc, const char **argv, const char *prefix __used)
784 {
785 	int err = -ENOMEM;
786 	struct perf_evsel *pos;
787 
788 	evsel_list = perf_evlist__new(NULL, NULL);
789 	if (evsel_list == NULL)
790 		return -ENOMEM;
791 
792 	argc = parse_options(argc, argv, record_options, record_usage,
793 			    PARSE_OPT_STOP_AT_NON_OPTION);
794 	if (!argc && target_pid == -1 && target_tid == -1 &&
795 		!system_wide && !cpu_list)
796 		usage_with_options(record_usage, record_options);
797 
798 	if (force && append_file) {
799 		fprintf(stderr, "Can't overwrite and append at the same time."
800 				" You need to choose between -f and -A");
801 		usage_with_options(record_usage, record_options);
802 	} else if (append_file) {
803 		write_mode = WRITE_APPEND;
804 	} else {
805 		write_mode = WRITE_FORCE;
806 	}
807 
808 	if (nr_cgroups && !system_wide) {
809 		fprintf(stderr, "cgroup monitoring only available in"
810 			" system-wide mode\n");
811 		usage_with_options(record_usage, record_options);
812 	}
813 
814 	symbol__init();
815 
816 	if (symbol_conf.kptr_restrict)
817 		pr_warning(
818 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
819 "check /proc/sys/kernel/kptr_restrict.\n\n"
820 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
821 "file is not found in the buildid cache or in the vmlinux path.\n\n"
822 "Samples in kernel modules won't be resolved at all.\n\n"
823 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
824 "even with a suitable vmlinux or kallsyms file.\n\n");
825 
826 	if (no_buildid_cache || no_buildid)
827 		disable_buildid_cache();
828 
829 	if (evsel_list->nr_entries == 0 &&
830 	    perf_evlist__add_default(evsel_list) < 0) {
831 		pr_err("Not enough memory for event selector list\n");
832 		goto out_symbol_exit;
833 	}
834 
835 	if (target_pid != -1)
836 		target_tid = target_pid;
837 
838 	if (perf_evlist__create_maps(evsel_list, target_pid,
839 				     target_tid, cpu_list) < 0)
840 		usage_with_options(record_usage, record_options);
841 
842 	list_for_each_entry(pos, &evsel_list->entries, node) {
843 		if (perf_evsel__alloc_fd(pos, evsel_list->cpus->nr,
844 					 evsel_list->threads->nr) < 0)
845 			goto out_free_fd;
846 		if (perf_header__push_event(pos->attr.config, event_name(pos)))
847 			goto out_free_fd;
848 	}
849 
850 	if (perf_evlist__alloc_pollfd(evsel_list) < 0)
851 		goto out_free_fd;
852 
853 	if (user_interval != ULLONG_MAX)
854 		default_interval = user_interval;
855 	if (user_freq != UINT_MAX)
856 		freq = user_freq;
857 
858 	/*
859 	 * User specified count overrides default frequency.
860 	 */
861 	if (default_interval)
862 		freq = 0;
863 	else if (freq) {
864 		default_interval = freq;
865 	} else {
866 		fprintf(stderr, "frequency and count are zero, aborting\n");
867 		err = -EINVAL;
868 		goto out_free_fd;
869 	}
870 
871 	err = __cmd_record(argc, argv);
872 out_free_fd:
873 	perf_evlist__delete_maps(evsel_list);
874 out_symbol_exit:
875 	symbol__exit();
876 	return err;
877 }
878