xref: /linux/tools/perf/builtin-record.c (revision 52ffe0ff02fc053a025c381d5808e9ecd3206dfe)
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9 
10 #include "perf.h"
11 
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include <subcmd/parse-options.h>
15 #include "util/parse-events.h"
16 
17 #include "util/callchain.h"
18 #include "util/cgroup.h"
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/symbol.h"
27 #include "util/cpumap.h"
28 #include "util/thread_map.h"
29 #include "util/data.h"
30 #include "util/perf_regs.h"
31 #include "util/auxtrace.h"
32 #include "util/tsc.h"
33 #include "util/parse-branch-options.h"
34 #include "util/parse-regs-options.h"
35 #include "util/llvm-utils.h"
36 #include "util/bpf-loader.h"
37 #include "util/trigger.h"
38 #include "asm/bug.h"
39 
40 #include <unistd.h>
41 #include <sched.h>
42 #include <sys/mman.h>
43 #include <asm/bug.h>
44 
45 
46 struct record {
47 	struct perf_tool	tool;
48 	struct record_opts	opts;
49 	u64			bytes_written;
50 	struct perf_data_file	file;
51 	struct auxtrace_record	*itr;
52 	struct perf_evlist	*evlist;
53 	struct perf_session	*session;
54 	const char		*progname;
55 	int			realtime_prio;
56 	bool			no_buildid;
57 	bool			no_buildid_set;
58 	bool			no_buildid_cache;
59 	bool			no_buildid_cache_set;
60 	bool			buildid_all;
61 	bool			timestamp_filename;
62 	bool			switch_output;
63 	unsigned long long	samples;
64 };
65 
66 static int record__write(struct record *rec, void *bf, size_t size)
67 {
68 	if (perf_data_file__write(rec->session->file, bf, size) < 0) {
69 		pr_err("failed to write perf data, error: %m\n");
70 		return -1;
71 	}
72 
73 	rec->bytes_written += size;
74 	return 0;
75 }
76 
77 static int process_synthesized_event(struct perf_tool *tool,
78 				     union perf_event *event,
79 				     struct perf_sample *sample __maybe_unused,
80 				     struct machine *machine __maybe_unused)
81 {
82 	struct record *rec = container_of(tool, struct record, tool);
83 	return record__write(rec, event, event->header.size);
84 }
85 
86 static int
87 backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
88 {
89 	struct perf_event_header *pheader;
90 	u64 evt_head = head;
91 	int size = mask + 1;
92 
93 	pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
94 	pheader = (struct perf_event_header *)(buf + (head & mask));
95 	*start = head;
96 	while (true) {
97 		if (evt_head - head >= (unsigned int)size) {
98 			pr_debug("Finshed reading backward ring buffer: rewind\n");
99 			if (evt_head - head > (unsigned int)size)
100 				evt_head -= pheader->size;
101 			*end = evt_head;
102 			return 0;
103 		}
104 
105 		pheader = (struct perf_event_header *)(buf + (evt_head & mask));
106 
107 		if (pheader->size == 0) {
108 			pr_debug("Finshed reading backward ring buffer: get start\n");
109 			*end = evt_head;
110 			return 0;
111 		}
112 
113 		evt_head += pheader->size;
114 		pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
115 	}
116 	WARN_ONCE(1, "Shouldn't get here\n");
117 	return -1;
118 }
119 
120 static int
121 rb_find_range(struct perf_evlist *evlist,
122 	      void *data, int mask, u64 head, u64 old,
123 	      u64 *start, u64 *end)
124 {
125 	if (!evlist->backward) {
126 		*start = old;
127 		*end = head;
128 		return 0;
129 	}
130 
131 	return backward_rb_find_range(data, mask, head, start, end);
132 }
133 
134 static int record__mmap_read(struct record *rec, int idx)
135 {
136 	struct perf_mmap *md = &rec->evlist->mmap[idx];
137 	u64 head = perf_mmap__read_head(md);
138 	u64 old = md->prev;
139 	u64 end = head, start = old;
140 	unsigned char *data = md->base + page_size;
141 	unsigned long size;
142 	void *buf;
143 	int rc = 0;
144 
145 	if (rb_find_range(rec->evlist, data, md->mask, head,
146 			  old, &start, &end))
147 		return -1;
148 
149 	if (start == end)
150 		return 0;
151 
152 	rec->samples++;
153 
154 	size = end - start;
155 	if (size > (unsigned long)(md->mask) + 1) {
156 		WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
157 
158 		md->prev = head;
159 		perf_evlist__mmap_consume(rec->evlist, idx);
160 		return 0;
161 	}
162 
163 	if ((start & md->mask) + size != (end & md->mask)) {
164 		buf = &data[start & md->mask];
165 		size = md->mask + 1 - (start & md->mask);
166 		start += size;
167 
168 		if (record__write(rec, buf, size) < 0) {
169 			rc = -1;
170 			goto out;
171 		}
172 	}
173 
174 	buf = &data[start & md->mask];
175 	size = end - start;
176 	start += size;
177 
178 	if (record__write(rec, buf, size) < 0) {
179 		rc = -1;
180 		goto out;
181 	}
182 
183 	md->prev = head;
184 	perf_evlist__mmap_consume(rec->evlist, idx);
185 out:
186 	return rc;
187 }
188 
189 static volatile int done;
190 static volatile int signr = -1;
191 static volatile int child_finished;
192 
193 static volatile int auxtrace_record__snapshot_started;
194 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
195 static DEFINE_TRIGGER(switch_output_trigger);
196 
197 static void sig_handler(int sig)
198 {
199 	if (sig == SIGCHLD)
200 		child_finished = 1;
201 	else
202 		signr = sig;
203 
204 	done = 1;
205 }
206 
207 static void record__sig_exit(void)
208 {
209 	if (signr == -1)
210 		return;
211 
212 	signal(signr, SIG_DFL);
213 	raise(signr);
214 }
215 
216 #ifdef HAVE_AUXTRACE_SUPPORT
217 
218 static int record__process_auxtrace(struct perf_tool *tool,
219 				    union perf_event *event, void *data1,
220 				    size_t len1, void *data2, size_t len2)
221 {
222 	struct record *rec = container_of(tool, struct record, tool);
223 	struct perf_data_file *file = &rec->file;
224 	size_t padding;
225 	u8 pad[8] = {0};
226 
227 	if (!perf_data_file__is_pipe(file)) {
228 		off_t file_offset;
229 		int fd = perf_data_file__fd(file);
230 		int err;
231 
232 		file_offset = lseek(fd, 0, SEEK_CUR);
233 		if (file_offset == -1)
234 			return -1;
235 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
236 						     event, file_offset);
237 		if (err)
238 			return err;
239 	}
240 
241 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
242 	padding = (len1 + len2) & 7;
243 	if (padding)
244 		padding = 8 - padding;
245 
246 	record__write(rec, event, event->header.size);
247 	record__write(rec, data1, len1);
248 	if (len2)
249 		record__write(rec, data2, len2);
250 	record__write(rec, &pad, padding);
251 
252 	return 0;
253 }
254 
255 static int record__auxtrace_mmap_read(struct record *rec,
256 				      struct auxtrace_mmap *mm)
257 {
258 	int ret;
259 
260 	ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
261 				  record__process_auxtrace);
262 	if (ret < 0)
263 		return ret;
264 
265 	if (ret)
266 		rec->samples++;
267 
268 	return 0;
269 }
270 
271 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
272 					       struct auxtrace_mmap *mm)
273 {
274 	int ret;
275 
276 	ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
277 					   record__process_auxtrace,
278 					   rec->opts.auxtrace_snapshot_size);
279 	if (ret < 0)
280 		return ret;
281 
282 	if (ret)
283 		rec->samples++;
284 
285 	return 0;
286 }
287 
288 static int record__auxtrace_read_snapshot_all(struct record *rec)
289 {
290 	int i;
291 	int rc = 0;
292 
293 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
294 		struct auxtrace_mmap *mm =
295 				&rec->evlist->mmap[i].auxtrace_mmap;
296 
297 		if (!mm->base)
298 			continue;
299 
300 		if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
301 			rc = -1;
302 			goto out;
303 		}
304 	}
305 out:
306 	return rc;
307 }
308 
309 static void record__read_auxtrace_snapshot(struct record *rec)
310 {
311 	pr_debug("Recording AUX area tracing snapshot\n");
312 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
313 		trigger_error(&auxtrace_snapshot_trigger);
314 	} else {
315 		if (auxtrace_record__snapshot_finish(rec->itr))
316 			trigger_error(&auxtrace_snapshot_trigger);
317 		else
318 			trigger_ready(&auxtrace_snapshot_trigger);
319 	}
320 }
321 
322 #else
323 
324 static inline
325 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
326 			       struct auxtrace_mmap *mm __maybe_unused)
327 {
328 	return 0;
329 }
330 
331 static inline
332 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
333 {
334 }
335 
336 static inline
337 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
338 {
339 	return 0;
340 }
341 
342 #endif
343 
344 static int record__open(struct record *rec)
345 {
346 	char msg[512];
347 	struct perf_evsel *pos;
348 	struct perf_evlist *evlist = rec->evlist;
349 	struct perf_session *session = rec->session;
350 	struct record_opts *opts = &rec->opts;
351 	int rc = 0;
352 
353 	perf_evlist__config(evlist, opts, &callchain_param);
354 
355 	evlist__for_each(evlist, pos) {
356 try_again:
357 		if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
358 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
359 				if (verbose)
360 					ui__warning("%s\n", msg);
361 				goto try_again;
362 			}
363 
364 			rc = -errno;
365 			perf_evsel__open_strerror(pos, &opts->target,
366 						  errno, msg, sizeof(msg));
367 			ui__error("%s\n", msg);
368 			goto out;
369 		}
370 	}
371 
372 	if (perf_evlist__apply_filters(evlist, &pos)) {
373 		error("failed to set filter \"%s\" on event %s with %d (%s)\n",
374 			pos->filter, perf_evsel__name(pos), errno,
375 			strerror_r(errno, msg, sizeof(msg)));
376 		rc = -1;
377 		goto out;
378 	}
379 
380 	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
381 				 opts->auxtrace_mmap_pages,
382 				 opts->auxtrace_snapshot_mode) < 0) {
383 		if (errno == EPERM) {
384 			pr_err("Permission error mapping pages.\n"
385 			       "Consider increasing "
386 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
387 			       "or try again with a smaller value of -m/--mmap_pages.\n"
388 			       "(current value: %u,%u)\n",
389 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
390 			rc = -errno;
391 		} else {
392 			pr_err("failed to mmap with %d (%s)\n", errno,
393 				strerror_r(errno, msg, sizeof(msg)));
394 			if (errno)
395 				rc = -errno;
396 			else
397 				rc = -EINVAL;
398 		}
399 		goto out;
400 	}
401 
402 	session->evlist = evlist;
403 	perf_session__set_id_hdr_size(session);
404 out:
405 	return rc;
406 }
407 
408 static int process_sample_event(struct perf_tool *tool,
409 				union perf_event *event,
410 				struct perf_sample *sample,
411 				struct perf_evsel *evsel,
412 				struct machine *machine)
413 {
414 	struct record *rec = container_of(tool, struct record, tool);
415 
416 	rec->samples++;
417 
418 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
419 }
420 
421 static int process_buildids(struct record *rec)
422 {
423 	struct perf_data_file *file  = &rec->file;
424 	struct perf_session *session = rec->session;
425 
426 	if (file->size == 0)
427 		return 0;
428 
429 	/*
430 	 * During this process, it'll load kernel map and replace the
431 	 * dso->long_name to a real pathname it found.  In this case
432 	 * we prefer the vmlinux path like
433 	 *   /lib/modules/3.16.4/build/vmlinux
434 	 *
435 	 * rather than build-id path (in debug directory).
436 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
437 	 */
438 	symbol_conf.ignore_vmlinux_buildid = true;
439 
440 	/*
441 	 * If --buildid-all is given, it marks all DSO regardless of hits,
442 	 * so no need to process samples.
443 	 */
444 	if (rec->buildid_all)
445 		rec->tool.sample = NULL;
446 
447 	return perf_session__process_events(session);
448 }
449 
450 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
451 {
452 	int err;
453 	struct perf_tool *tool = data;
454 	/*
455 	 *As for guest kernel when processing subcommand record&report,
456 	 *we arrange module mmap prior to guest kernel mmap and trigger
457 	 *a preload dso because default guest module symbols are loaded
458 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
459 	 *method is used to avoid symbol missing when the first addr is
460 	 *in module instead of in guest kernel.
461 	 */
462 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
463 					     machine);
464 	if (err < 0)
465 		pr_err("Couldn't record guest kernel [%d]'s reference"
466 		       " relocation symbol.\n", machine->pid);
467 
468 	/*
469 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
470 	 * have no _text sometimes.
471 	 */
472 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
473 						 machine);
474 	if (err < 0)
475 		pr_err("Couldn't record guest kernel [%d]'s reference"
476 		       " relocation symbol.\n", machine->pid);
477 }
478 
479 static struct perf_event_header finished_round_event = {
480 	.size = sizeof(struct perf_event_header),
481 	.type = PERF_RECORD_FINISHED_ROUND,
482 };
483 
484 static int record__mmap_read_all(struct record *rec)
485 {
486 	u64 bytes_written = rec->bytes_written;
487 	int i;
488 	int rc = 0;
489 
490 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
491 		struct auxtrace_mmap *mm = &rec->evlist->mmap[i].auxtrace_mmap;
492 
493 		if (rec->evlist->mmap[i].base) {
494 			if (record__mmap_read(rec, i) != 0) {
495 				rc = -1;
496 				goto out;
497 			}
498 		}
499 
500 		if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
501 		    record__auxtrace_mmap_read(rec, mm) != 0) {
502 			rc = -1;
503 			goto out;
504 		}
505 	}
506 
507 	/*
508 	 * Mark the round finished in case we wrote
509 	 * at least one event.
510 	 */
511 	if (bytes_written != rec->bytes_written)
512 		rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
513 
514 out:
515 	return rc;
516 }
517 
518 static void record__init_features(struct record *rec)
519 {
520 	struct perf_session *session = rec->session;
521 	int feat;
522 
523 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
524 		perf_header__set_feat(&session->header, feat);
525 
526 	if (rec->no_buildid)
527 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
528 
529 	if (!have_tracepoints(&rec->evlist->entries))
530 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
531 
532 	if (!rec->opts.branch_stack)
533 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
534 
535 	if (!rec->opts.full_auxtrace)
536 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
537 
538 	perf_header__clear_feat(&session->header, HEADER_STAT);
539 }
540 
541 static void
542 record__finish_output(struct record *rec)
543 {
544 	struct perf_data_file *file = &rec->file;
545 	int fd = perf_data_file__fd(file);
546 
547 	if (file->is_pipe)
548 		return;
549 
550 	rec->session->header.data_size += rec->bytes_written;
551 	file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
552 
553 	if (!rec->no_buildid) {
554 		process_buildids(rec);
555 
556 		if (rec->buildid_all)
557 			dsos__hit_all(rec->session);
558 	}
559 	perf_session__write_header(rec->session, rec->evlist, fd, true);
560 
561 	return;
562 }
563 
564 static int record__synthesize_workload(struct record *rec)
565 {
566 	struct {
567 		struct thread_map map;
568 		struct thread_map_data map_data;
569 	} thread_map;
570 
571 	thread_map.map.nr = 1;
572 	thread_map.map.map[0].pid = rec->evlist->workload.pid;
573 	thread_map.map.map[0].comm = NULL;
574 	return perf_event__synthesize_thread_map(&rec->tool, &thread_map.map,
575 						 process_synthesized_event,
576 						 &rec->session->machines.host,
577 						 rec->opts.sample_address,
578 						 rec->opts.proc_map_timeout);
579 }
580 
581 static int record__synthesize(struct record *rec);
582 
583 static int
584 record__switch_output(struct record *rec, bool at_exit)
585 {
586 	struct perf_data_file *file = &rec->file;
587 	int fd, err;
588 
589 	/* Same Size:      "2015122520103046"*/
590 	char timestamp[] = "InvalidTimestamp";
591 
592 	rec->samples = 0;
593 	record__finish_output(rec);
594 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
595 	if (err) {
596 		pr_err("Failed to get current timestamp\n");
597 		return -EINVAL;
598 	}
599 
600 	fd = perf_data_file__switch(file, timestamp,
601 				    rec->session->header.data_offset,
602 				    at_exit);
603 	if (fd >= 0 && !at_exit) {
604 		rec->bytes_written = 0;
605 		rec->session->header.data_size = 0;
606 	}
607 
608 	if (!quiet)
609 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
610 			file->path, timestamp);
611 
612 	/* Output tracking events */
613 	if (!at_exit) {
614 		record__synthesize(rec);
615 
616 		/*
617 		 * In 'perf record --switch-output' without -a,
618 		 * record__synthesize() in record__switch_output() won't
619 		 * generate tracking events because there's no thread_map
620 		 * in evlist. Which causes newly created perf.data doesn't
621 		 * contain map and comm information.
622 		 * Create a fake thread_map and directly call
623 		 * perf_event__synthesize_thread_map() for those events.
624 		 */
625 		if (target__none(&rec->opts.target))
626 			record__synthesize_workload(rec);
627 	}
628 	return fd;
629 }
630 
631 static volatile int workload_exec_errno;
632 
633 /*
634  * perf_evlist__prepare_workload will send a SIGUSR1
635  * if the fork fails, since we asked by setting its
636  * want_signal to true.
637  */
638 static void workload_exec_failed_signal(int signo __maybe_unused,
639 					siginfo_t *info,
640 					void *ucontext __maybe_unused)
641 {
642 	workload_exec_errno = info->si_value.sival_int;
643 	done = 1;
644 	child_finished = 1;
645 }
646 
647 static void snapshot_sig_handler(int sig);
648 
649 int __weak
650 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
651 			    struct perf_tool *tool __maybe_unused,
652 			    perf_event__handler_t process __maybe_unused,
653 			    struct machine *machine __maybe_unused)
654 {
655 	return 0;
656 }
657 
658 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
659 {
660 	if (rec->evlist && rec->evlist->mmap && rec->evlist->mmap[0].base)
661 		return rec->evlist->mmap[0].base;
662 	return NULL;
663 }
664 
665 static int record__synthesize(struct record *rec)
666 {
667 	struct perf_session *session = rec->session;
668 	struct machine *machine = &session->machines.host;
669 	struct perf_data_file *file = &rec->file;
670 	struct record_opts *opts = &rec->opts;
671 	struct perf_tool *tool = &rec->tool;
672 	int fd = perf_data_file__fd(file);
673 	int err = 0;
674 
675 	if (file->is_pipe) {
676 		err = perf_event__synthesize_attrs(tool, session,
677 						   process_synthesized_event);
678 		if (err < 0) {
679 			pr_err("Couldn't synthesize attrs.\n");
680 			goto out;
681 		}
682 
683 		if (have_tracepoints(&rec->evlist->entries)) {
684 			/*
685 			 * FIXME err <= 0 here actually means that
686 			 * there were no tracepoints so its not really
687 			 * an error, just that we don't need to
688 			 * synthesize anything.  We really have to
689 			 * return this more properly and also
690 			 * propagate errors that now are calling die()
691 			 */
692 			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
693 								  process_synthesized_event);
694 			if (err <= 0) {
695 				pr_err("Couldn't record tracing data.\n");
696 				goto out;
697 			}
698 			rec->bytes_written += err;
699 		}
700 	}
701 
702 	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
703 					  process_synthesized_event, machine);
704 	if (err)
705 		goto out;
706 
707 	if (rec->opts.full_auxtrace) {
708 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
709 					session, process_synthesized_event);
710 		if (err)
711 			goto out;
712 	}
713 
714 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
715 						 machine);
716 	WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
717 			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
718 			   "Check /proc/kallsyms permission or run as root.\n");
719 
720 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
721 					     machine);
722 	WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
723 			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
724 			   "Check /proc/modules permission or run as root.\n");
725 
726 	if (perf_guest) {
727 		machines__process_guests(&session->machines,
728 					 perf_event__synthesize_guest_os, tool);
729 	}
730 
731 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
732 					    process_synthesized_event, opts->sample_address,
733 					    opts->proc_map_timeout);
734 out:
735 	return err;
736 }
737 
738 static int __cmd_record(struct record *rec, int argc, const char **argv)
739 {
740 	int err;
741 	int status = 0;
742 	unsigned long waking = 0;
743 	const bool forks = argc > 0;
744 	struct machine *machine;
745 	struct perf_tool *tool = &rec->tool;
746 	struct record_opts *opts = &rec->opts;
747 	struct perf_data_file *file = &rec->file;
748 	struct perf_session *session;
749 	bool disabled = false, draining = false;
750 	int fd;
751 
752 	rec->progname = argv[0];
753 
754 	atexit(record__sig_exit);
755 	signal(SIGCHLD, sig_handler);
756 	signal(SIGINT, sig_handler);
757 	signal(SIGTERM, sig_handler);
758 
759 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output) {
760 		signal(SIGUSR2, snapshot_sig_handler);
761 		if (rec->opts.auxtrace_snapshot_mode)
762 			trigger_on(&auxtrace_snapshot_trigger);
763 		if (rec->switch_output)
764 			trigger_on(&switch_output_trigger);
765 	} else {
766 		signal(SIGUSR2, SIG_IGN);
767 	}
768 
769 	session = perf_session__new(file, false, tool);
770 	if (session == NULL) {
771 		pr_err("Perf session creation failed.\n");
772 		return -1;
773 	}
774 
775 	fd = perf_data_file__fd(file);
776 	rec->session = session;
777 
778 	record__init_features(rec);
779 
780 	if (forks) {
781 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
782 						    argv, file->is_pipe,
783 						    workload_exec_failed_signal);
784 		if (err < 0) {
785 			pr_err("Couldn't run the workload!\n");
786 			status = err;
787 			goto out_delete_session;
788 		}
789 	}
790 
791 	if (record__open(rec) != 0) {
792 		err = -1;
793 		goto out_child;
794 	}
795 
796 	err = bpf__apply_obj_config();
797 	if (err) {
798 		char errbuf[BUFSIZ];
799 
800 		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
801 		pr_err("ERROR: Apply config to BPF failed: %s\n",
802 			 errbuf);
803 		goto out_child;
804 	}
805 
806 	/*
807 	 * Normally perf_session__new would do this, but it doesn't have the
808 	 * evlist.
809 	 */
810 	if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
811 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
812 		rec->tool.ordered_events = false;
813 	}
814 
815 	if (!rec->evlist->nr_groups)
816 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
817 
818 	if (file->is_pipe) {
819 		err = perf_header__write_pipe(fd);
820 		if (err < 0)
821 			goto out_child;
822 	} else {
823 		err = perf_session__write_header(session, rec->evlist, fd, false);
824 		if (err < 0)
825 			goto out_child;
826 	}
827 
828 	if (!rec->no_buildid
829 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
830 		pr_err("Couldn't generate buildids. "
831 		       "Use --no-buildid to profile anyway.\n");
832 		err = -1;
833 		goto out_child;
834 	}
835 
836 	machine = &session->machines.host;
837 
838 	err = record__synthesize(rec);
839 	if (err < 0)
840 		goto out_child;
841 
842 	if (rec->realtime_prio) {
843 		struct sched_param param;
844 
845 		param.sched_priority = rec->realtime_prio;
846 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
847 			pr_err("Could not set realtime priority.\n");
848 			err = -1;
849 			goto out_child;
850 		}
851 	}
852 
853 	/*
854 	 * When perf is starting the traced process, all the events
855 	 * (apart from group members) have enable_on_exec=1 set,
856 	 * so don't spoil it by prematurely enabling them.
857 	 */
858 	if (!target__none(&opts->target) && !opts->initial_delay)
859 		perf_evlist__enable(rec->evlist);
860 
861 	/*
862 	 * Let the child rip
863 	 */
864 	if (forks) {
865 		union perf_event *event;
866 
867 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
868 		if (event == NULL) {
869 			err = -ENOMEM;
870 			goto out_child;
871 		}
872 
873 		/*
874 		 * Some H/W events are generated before COMM event
875 		 * which is emitted during exec(), so perf script
876 		 * cannot see a correct process name for those events.
877 		 * Synthesize COMM event to prevent it.
878 		 */
879 		perf_event__synthesize_comm(tool, event,
880 					    rec->evlist->workload.pid,
881 					    process_synthesized_event,
882 					    machine);
883 		free(event);
884 
885 		perf_evlist__start_workload(rec->evlist);
886 	}
887 
888 	if (opts->initial_delay) {
889 		usleep(opts->initial_delay * 1000);
890 		perf_evlist__enable(rec->evlist);
891 	}
892 
893 	trigger_ready(&auxtrace_snapshot_trigger);
894 	trigger_ready(&switch_output_trigger);
895 	for (;;) {
896 		unsigned long long hits = rec->samples;
897 
898 		if (record__mmap_read_all(rec) < 0) {
899 			trigger_error(&auxtrace_snapshot_trigger);
900 			trigger_error(&switch_output_trigger);
901 			err = -1;
902 			goto out_child;
903 		}
904 
905 		if (auxtrace_record__snapshot_started) {
906 			auxtrace_record__snapshot_started = 0;
907 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
908 				record__read_auxtrace_snapshot(rec);
909 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
910 				pr_err("AUX area tracing snapshot failed\n");
911 				err = -1;
912 				goto out_child;
913 			}
914 		}
915 
916 		if (trigger_is_hit(&switch_output_trigger)) {
917 			trigger_ready(&switch_output_trigger);
918 
919 			if (!quiet)
920 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
921 					waking);
922 			waking = 0;
923 			fd = record__switch_output(rec, false);
924 			if (fd < 0) {
925 				pr_err("Failed to switch to new file\n");
926 				trigger_error(&switch_output_trigger);
927 				err = fd;
928 				goto out_child;
929 			}
930 		}
931 
932 		if (hits == rec->samples) {
933 			if (done || draining)
934 				break;
935 			err = perf_evlist__poll(rec->evlist, -1);
936 			/*
937 			 * Propagate error, only if there's any. Ignore positive
938 			 * number of returned events and interrupt error.
939 			 */
940 			if (err > 0 || (err < 0 && errno == EINTR))
941 				err = 0;
942 			waking++;
943 
944 			if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
945 				draining = true;
946 		}
947 
948 		/*
949 		 * When perf is starting the traced process, at the end events
950 		 * die with the process and we wait for that. Thus no need to
951 		 * disable events in this case.
952 		 */
953 		if (done && !disabled && !target__none(&opts->target)) {
954 			trigger_off(&auxtrace_snapshot_trigger);
955 			perf_evlist__disable(rec->evlist);
956 			disabled = true;
957 		}
958 	}
959 	trigger_off(&auxtrace_snapshot_trigger);
960 	trigger_off(&switch_output_trigger);
961 
962 	if (forks && workload_exec_errno) {
963 		char msg[STRERR_BUFSIZE];
964 		const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
965 		pr_err("Workload failed: %s\n", emsg);
966 		err = -1;
967 		goto out_child;
968 	}
969 
970 	if (!quiet)
971 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
972 
973 out_child:
974 	if (forks) {
975 		int exit_status;
976 
977 		if (!child_finished)
978 			kill(rec->evlist->workload.pid, SIGTERM);
979 
980 		wait(&exit_status);
981 
982 		if (err < 0)
983 			status = err;
984 		else if (WIFEXITED(exit_status))
985 			status = WEXITSTATUS(exit_status);
986 		else if (WIFSIGNALED(exit_status))
987 			signr = WTERMSIG(exit_status);
988 	} else
989 		status = err;
990 
991 	/* this will be recalculated during process_buildids() */
992 	rec->samples = 0;
993 
994 	if (!err) {
995 		if (!rec->timestamp_filename) {
996 			record__finish_output(rec);
997 		} else {
998 			fd = record__switch_output(rec, true);
999 			if (fd < 0) {
1000 				status = fd;
1001 				goto out_delete_session;
1002 			}
1003 		}
1004 	}
1005 
1006 	if (!err && !quiet) {
1007 		char samples[128];
1008 		const char *postfix = rec->timestamp_filename ?
1009 					".<timestamp>" : "";
1010 
1011 		if (rec->samples && !rec->opts.full_auxtrace)
1012 			scnprintf(samples, sizeof(samples),
1013 				  " (%" PRIu64 " samples)", rec->samples);
1014 		else
1015 			samples[0] = '\0';
1016 
1017 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1018 			perf_data_file__size(file) / 1024.0 / 1024.0,
1019 			file->path, postfix, samples);
1020 	}
1021 
1022 out_delete_session:
1023 	perf_session__delete(session);
1024 	return status;
1025 }
1026 
1027 static void callchain_debug(struct callchain_param *callchain)
1028 {
1029 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1030 
1031 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1032 
1033 	if (callchain->record_mode == CALLCHAIN_DWARF)
1034 		pr_debug("callchain: stack dump size %d\n",
1035 			 callchain->dump_size);
1036 }
1037 
1038 int record_opts__parse_callchain(struct record_opts *record,
1039 				 struct callchain_param *callchain,
1040 				 const char *arg, bool unset)
1041 {
1042 	int ret;
1043 	callchain->enabled = !unset;
1044 
1045 	/* --no-call-graph */
1046 	if (unset) {
1047 		callchain->record_mode = CALLCHAIN_NONE;
1048 		pr_debug("callchain: disabled\n");
1049 		return 0;
1050 	}
1051 
1052 	ret = parse_callchain_record_opt(arg, callchain);
1053 	if (!ret) {
1054 		/* Enable data address sampling for DWARF unwind. */
1055 		if (callchain->record_mode == CALLCHAIN_DWARF)
1056 			record->sample_address = true;
1057 		callchain_debug(callchain);
1058 	}
1059 
1060 	return ret;
1061 }
1062 
1063 int record_parse_callchain_opt(const struct option *opt,
1064 			       const char *arg,
1065 			       int unset)
1066 {
1067 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1068 }
1069 
1070 int record_callchain_opt(const struct option *opt,
1071 			 const char *arg __maybe_unused,
1072 			 int unset __maybe_unused)
1073 {
1074 	struct callchain_param *callchain = opt->value;
1075 
1076 	callchain->enabled = true;
1077 
1078 	if (callchain->record_mode == CALLCHAIN_NONE)
1079 		callchain->record_mode = CALLCHAIN_FP;
1080 
1081 	callchain_debug(callchain);
1082 	return 0;
1083 }
1084 
1085 static int perf_record_config(const char *var, const char *value, void *cb)
1086 {
1087 	struct record *rec = cb;
1088 
1089 	if (!strcmp(var, "record.build-id")) {
1090 		if (!strcmp(value, "cache"))
1091 			rec->no_buildid_cache = false;
1092 		else if (!strcmp(value, "no-cache"))
1093 			rec->no_buildid_cache = true;
1094 		else if (!strcmp(value, "skip"))
1095 			rec->no_buildid = true;
1096 		else
1097 			return -1;
1098 		return 0;
1099 	}
1100 	if (!strcmp(var, "record.call-graph"))
1101 		var = "call-graph.record-mode"; /* fall-through */
1102 
1103 	return perf_default_config(var, value, cb);
1104 }
1105 
1106 struct clockid_map {
1107 	const char *name;
1108 	int clockid;
1109 };
1110 
1111 #define CLOCKID_MAP(n, c)	\
1112 	{ .name = n, .clockid = (c), }
1113 
1114 #define CLOCKID_END	{ .name = NULL, }
1115 
1116 
1117 /*
1118  * Add the missing ones, we need to build on many distros...
1119  */
1120 #ifndef CLOCK_MONOTONIC_RAW
1121 #define CLOCK_MONOTONIC_RAW 4
1122 #endif
1123 #ifndef CLOCK_BOOTTIME
1124 #define CLOCK_BOOTTIME 7
1125 #endif
1126 #ifndef CLOCK_TAI
1127 #define CLOCK_TAI 11
1128 #endif
1129 
1130 static const struct clockid_map clockids[] = {
1131 	/* available for all events, NMI safe */
1132 	CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1133 	CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1134 
1135 	/* available for some events */
1136 	CLOCKID_MAP("realtime", CLOCK_REALTIME),
1137 	CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1138 	CLOCKID_MAP("tai", CLOCK_TAI),
1139 
1140 	/* available for the lazy */
1141 	CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1142 	CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1143 	CLOCKID_MAP("real", CLOCK_REALTIME),
1144 	CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1145 
1146 	CLOCKID_END,
1147 };
1148 
1149 static int parse_clockid(const struct option *opt, const char *str, int unset)
1150 {
1151 	struct record_opts *opts = (struct record_opts *)opt->value;
1152 	const struct clockid_map *cm;
1153 	const char *ostr = str;
1154 
1155 	if (unset) {
1156 		opts->use_clockid = 0;
1157 		return 0;
1158 	}
1159 
1160 	/* no arg passed */
1161 	if (!str)
1162 		return 0;
1163 
1164 	/* no setting it twice */
1165 	if (opts->use_clockid)
1166 		return -1;
1167 
1168 	opts->use_clockid = true;
1169 
1170 	/* if its a number, we're done */
1171 	if (sscanf(str, "%d", &opts->clockid) == 1)
1172 		return 0;
1173 
1174 	/* allow a "CLOCK_" prefix to the name */
1175 	if (!strncasecmp(str, "CLOCK_", 6))
1176 		str += 6;
1177 
1178 	for (cm = clockids; cm->name; cm++) {
1179 		if (!strcasecmp(str, cm->name)) {
1180 			opts->clockid = cm->clockid;
1181 			return 0;
1182 		}
1183 	}
1184 
1185 	opts->use_clockid = false;
1186 	ui__warning("unknown clockid %s, check man page\n", ostr);
1187 	return -1;
1188 }
1189 
1190 static int record__parse_mmap_pages(const struct option *opt,
1191 				    const char *str,
1192 				    int unset __maybe_unused)
1193 {
1194 	struct record_opts *opts = opt->value;
1195 	char *s, *p;
1196 	unsigned int mmap_pages;
1197 	int ret;
1198 
1199 	if (!str)
1200 		return -EINVAL;
1201 
1202 	s = strdup(str);
1203 	if (!s)
1204 		return -ENOMEM;
1205 
1206 	p = strchr(s, ',');
1207 	if (p)
1208 		*p = '\0';
1209 
1210 	if (*s) {
1211 		ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1212 		if (ret)
1213 			goto out_free;
1214 		opts->mmap_pages = mmap_pages;
1215 	}
1216 
1217 	if (!p) {
1218 		ret = 0;
1219 		goto out_free;
1220 	}
1221 
1222 	ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1223 	if (ret)
1224 		goto out_free;
1225 
1226 	opts->auxtrace_mmap_pages = mmap_pages;
1227 
1228 out_free:
1229 	free(s);
1230 	return ret;
1231 }
1232 
1233 static const char * const __record_usage[] = {
1234 	"perf record [<options>] [<command>]",
1235 	"perf record [<options>] -- <command> [<options>]",
1236 	NULL
1237 };
1238 const char * const *record_usage = __record_usage;
1239 
1240 /*
1241  * XXX Ideally would be local to cmd_record() and passed to a record__new
1242  * because we need to have access to it in record__exit, that is called
1243  * after cmd_record() exits, but since record_options need to be accessible to
1244  * builtin-script, leave it here.
1245  *
1246  * At least we don't ouch it in all the other functions here directly.
1247  *
1248  * Just say no to tons of global variables, sigh.
1249  */
1250 static struct record record = {
1251 	.opts = {
1252 		.sample_time	     = true,
1253 		.mmap_pages	     = UINT_MAX,
1254 		.user_freq	     = UINT_MAX,
1255 		.user_interval	     = ULLONG_MAX,
1256 		.freq		     = 4000,
1257 		.target		     = {
1258 			.uses_mmap   = true,
1259 			.default_per_cpu = true,
1260 		},
1261 		.proc_map_timeout     = 500,
1262 	},
1263 	.tool = {
1264 		.sample		= process_sample_event,
1265 		.fork		= perf_event__process_fork,
1266 		.exit		= perf_event__process_exit,
1267 		.comm		= perf_event__process_comm,
1268 		.mmap		= perf_event__process_mmap,
1269 		.mmap2		= perf_event__process_mmap2,
1270 		.ordered_events	= true,
1271 	},
1272 };
1273 
1274 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1275 	"\n\t\t\t\tDefault: fp";
1276 
1277 /*
1278  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1279  * with it and switch to use the library functions in perf_evlist that came
1280  * from builtin-record.c, i.e. use record_opts,
1281  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1282  * using pipes, etc.
1283  */
1284 struct option __record_options[] = {
1285 	OPT_CALLBACK('e', "event", &record.evlist, "event",
1286 		     "event selector. use 'perf list' to list available events",
1287 		     parse_events_option),
1288 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1289 		     "event filter", parse_filter),
1290 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1291 			   NULL, "don't record events from perf itself",
1292 			   exclude_perf),
1293 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1294 		    "record events on existing process id"),
1295 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1296 		    "record events on existing thread id"),
1297 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
1298 		    "collect data with this RT SCHED_FIFO priority"),
1299 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1300 		    "collect data without buffering"),
1301 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1302 		    "collect raw sample records from all opened counters"),
1303 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1304 			    "system-wide collection from all CPUs"),
1305 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1306 		    "list of cpus to monitor"),
1307 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1308 	OPT_STRING('o', "output", &record.file.path, "file",
1309 		    "output file name"),
1310 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1311 			&record.opts.no_inherit_set,
1312 			"child tasks do not inherit counters"),
1313 	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1314 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1315 		     "number of mmap data pages and AUX area tracing mmap pages",
1316 		     record__parse_mmap_pages),
1317 	OPT_BOOLEAN(0, "group", &record.opts.group,
1318 		    "put the counters into a counter group"),
1319 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1320 			   NULL, "enables call-graph recording" ,
1321 			   &record_callchain_opt),
1322 	OPT_CALLBACK(0, "call-graph", &record.opts,
1323 		     "record_mode[,record_size]", record_callchain_help,
1324 		     &record_parse_callchain_opt),
1325 	OPT_INCR('v', "verbose", &verbose,
1326 		    "be more verbose (show counter open errors, etc)"),
1327 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1328 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1329 		    "per thread counts"),
1330 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1331 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1332 			&record.opts.sample_time_set,
1333 			"Record the sample timestamps"),
1334 	OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
1335 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1336 		    "don't sample"),
1337 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1338 			&record.no_buildid_cache_set,
1339 			"do not update the buildid cache"),
1340 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1341 			&record.no_buildid_set,
1342 			"do not collect buildids in perf.data"),
1343 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1344 		     "monitor event in cgroup name only",
1345 		     parse_cgroups),
1346 	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1347 		  "ms to wait before starting measurement after program start"),
1348 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1349 		   "user to profile"),
1350 
1351 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1352 		     "branch any", "sample any taken branches",
1353 		     parse_branch_stack),
1354 
1355 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1356 		     "branch filter mask", "branch stack filter modes",
1357 		     parse_branch_stack),
1358 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1359 		    "sample by weight (on special events only)"),
1360 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1361 		    "sample transaction flags (special events only)"),
1362 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1363 		    "use per-thread mmaps"),
1364 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1365 		    "sample selected machine registers on interrupt,"
1366 		    " use -I ? to list register names", parse_regs),
1367 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1368 		    "Record running/enabled time of read (:S) events"),
1369 	OPT_CALLBACK('k', "clockid", &record.opts,
1370 	"clockid", "clockid to use for events, see clock_gettime()",
1371 	parse_clockid),
1372 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1373 			  "opts", "AUX area tracing Snapshot Mode", ""),
1374 	OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1375 			"per thread proc mmap processing timeout in ms"),
1376 	OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1377 		    "Record context switch events"),
1378 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1379 			 "Configure all used events to run in kernel space.",
1380 			 PARSE_OPT_EXCLUSIVE),
1381 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1382 			 "Configure all used events to run in user space.",
1383 			 PARSE_OPT_EXCLUSIVE),
1384 	OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1385 		   "clang binary to use for compiling BPF scriptlets"),
1386 	OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1387 		   "options passed to clang when compiling BPF scriptlets"),
1388 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1389 		   "file", "vmlinux pathname"),
1390 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1391 		    "Record build-id of all DSOs regardless of hits"),
1392 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1393 		    "append timestamp to output filename"),
1394 	OPT_BOOLEAN(0, "switch-output", &record.switch_output,
1395 		    "Switch output when receive SIGUSR2"),
1396 	OPT_END()
1397 };
1398 
1399 struct option *record_options = __record_options;
1400 
1401 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1402 {
1403 	int err;
1404 	struct record *rec = &record;
1405 	char errbuf[BUFSIZ];
1406 
1407 #ifndef HAVE_LIBBPF_SUPPORT
1408 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1409 	set_nobuild('\0', "clang-path", true);
1410 	set_nobuild('\0', "clang-opt", true);
1411 # undef set_nobuild
1412 #endif
1413 
1414 #ifndef HAVE_BPF_PROLOGUE
1415 # if !defined (HAVE_DWARF_SUPPORT)
1416 #  define REASON  "NO_DWARF=1"
1417 # elif !defined (HAVE_LIBBPF_SUPPORT)
1418 #  define REASON  "NO_LIBBPF=1"
1419 # else
1420 #  define REASON  "this architecture doesn't support BPF prologue"
1421 # endif
1422 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1423 	set_nobuild('\0', "vmlinux", true);
1424 # undef set_nobuild
1425 # undef REASON
1426 #endif
1427 
1428 	rec->evlist = perf_evlist__new();
1429 	if (rec->evlist == NULL)
1430 		return -ENOMEM;
1431 
1432 	perf_config(perf_record_config, rec);
1433 
1434 	argc = parse_options(argc, argv, record_options, record_usage,
1435 			    PARSE_OPT_STOP_AT_NON_OPTION);
1436 	if (!argc && target__none(&rec->opts.target))
1437 		usage_with_options(record_usage, record_options);
1438 
1439 	if (nr_cgroups && !rec->opts.target.system_wide) {
1440 		usage_with_options_msg(record_usage, record_options,
1441 			"cgroup monitoring only available in system-wide mode");
1442 
1443 	}
1444 	if (rec->opts.record_switch_events &&
1445 	    !perf_can_record_switch_events()) {
1446 		ui__error("kernel does not support recording context switch events\n");
1447 		parse_options_usage(record_usage, record_options, "switch-events", 0);
1448 		return -EINVAL;
1449 	}
1450 
1451 	if (rec->switch_output)
1452 		rec->timestamp_filename = true;
1453 
1454 	if (!rec->itr) {
1455 		rec->itr = auxtrace_record__init(rec->evlist, &err);
1456 		if (err)
1457 			return err;
1458 	}
1459 
1460 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1461 					      rec->opts.auxtrace_snapshot_opts);
1462 	if (err)
1463 		return err;
1464 
1465 	err = bpf__setup_stdout(rec->evlist);
1466 	if (err) {
1467 		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1468 		pr_err("ERROR: Setup BPF stdout failed: %s\n",
1469 			 errbuf);
1470 		return err;
1471 	}
1472 
1473 	err = -ENOMEM;
1474 
1475 	symbol__init(NULL);
1476 
1477 	if (symbol_conf.kptr_restrict)
1478 		pr_warning(
1479 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1480 "check /proc/sys/kernel/kptr_restrict.\n\n"
1481 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1482 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1483 "Samples in kernel modules won't be resolved at all.\n\n"
1484 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1485 "even with a suitable vmlinux or kallsyms file.\n\n");
1486 
1487 	if (rec->no_buildid_cache || rec->no_buildid) {
1488 		disable_buildid_cache();
1489 	} else if (rec->switch_output) {
1490 		/*
1491 		 * In 'perf record --switch-output', disable buildid
1492 		 * generation by default to reduce data file switching
1493 		 * overhead. Still generate buildid if they are required
1494 		 * explicitly using
1495 		 *
1496 		 *  perf record --signal-trigger --no-no-buildid \
1497 		 *              --no-no-buildid-cache
1498 		 *
1499 		 * Following code equals to:
1500 		 *
1501 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
1502 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1503 		 *         disable_buildid_cache();
1504 		 */
1505 		bool disable = true;
1506 
1507 		if (rec->no_buildid_set && !rec->no_buildid)
1508 			disable = false;
1509 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1510 			disable = false;
1511 		if (disable) {
1512 			rec->no_buildid = true;
1513 			rec->no_buildid_cache = true;
1514 			disable_buildid_cache();
1515 		}
1516 	}
1517 
1518 	if (rec->evlist->nr_entries == 0 &&
1519 	    perf_evlist__add_default(rec->evlist) < 0) {
1520 		pr_err("Not enough memory for event selector list\n");
1521 		goto out_symbol_exit;
1522 	}
1523 
1524 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1525 		rec->opts.no_inherit = true;
1526 
1527 	err = target__validate(&rec->opts.target);
1528 	if (err) {
1529 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1530 		ui__warning("%s", errbuf);
1531 	}
1532 
1533 	err = target__parse_uid(&rec->opts.target);
1534 	if (err) {
1535 		int saved_errno = errno;
1536 
1537 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1538 		ui__error("%s", errbuf);
1539 
1540 		err = -saved_errno;
1541 		goto out_symbol_exit;
1542 	}
1543 
1544 	err = -ENOMEM;
1545 	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1546 		usage_with_options(record_usage, record_options);
1547 
1548 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1549 	if (err)
1550 		goto out_symbol_exit;
1551 
1552 	/*
1553 	 * We take all buildids when the file contains
1554 	 * AUX area tracing data because we do not decode the
1555 	 * trace because it would take too long.
1556 	 */
1557 	if (rec->opts.full_auxtrace)
1558 		rec->buildid_all = true;
1559 
1560 	if (record_opts__config(&rec->opts)) {
1561 		err = -EINVAL;
1562 		goto out_symbol_exit;
1563 	}
1564 
1565 	err = __cmd_record(&record, argc, argv);
1566 out_symbol_exit:
1567 	perf_evlist__delete(rec->evlist);
1568 	symbol__exit();
1569 	auxtrace_record__free(rec->itr);
1570 	return err;
1571 }
1572 
1573 static void snapshot_sig_handler(int sig __maybe_unused)
1574 {
1575 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1576 		trigger_hit(&auxtrace_snapshot_trigger);
1577 		auxtrace_record__snapshot_started = 1;
1578 		if (auxtrace_record__snapshot_start(record.itr))
1579 			trigger_error(&auxtrace_snapshot_trigger);
1580 	}
1581 
1582 	if (trigger_is_ready(&switch_output_trigger))
1583 		trigger_hit(&switch_output_trigger);
1584 }
1585