xref: /linux/tools/perf/builtin-record.c (revision 5dafea097ac65bd01cc86801c399ae41dce79756)
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9 
10 #include "perf.h"
11 
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include <subcmd/parse-options.h>
15 #include "util/parse-events.h"
16 #include "util/config.h"
17 
18 #include "util/callchain.h"
19 #include "util/cgroup.h"
20 #include "util/header.h"
21 #include "util/event.h"
22 #include "util/evlist.h"
23 #include "util/evsel.h"
24 #include "util/debug.h"
25 #include "util/session.h"
26 #include "util/tool.h"
27 #include "util/symbol.h"
28 #include "util/cpumap.h"
29 #include "util/thread_map.h"
30 #include "util/data.h"
31 #include "util/perf_regs.h"
32 #include "util/auxtrace.h"
33 #include "util/tsc.h"
34 #include "util/parse-branch-options.h"
35 #include "util/parse-regs-options.h"
36 #include "util/llvm-utils.h"
37 #include "util/bpf-loader.h"
38 #include "util/trigger.h"
39 #include "asm/bug.h"
40 
41 #include <unistd.h>
42 #include <sched.h>
43 #include <sys/mman.h>
44 #include <asm/bug.h>
45 
46 
47 struct record {
48 	struct perf_tool	tool;
49 	struct record_opts	opts;
50 	u64			bytes_written;
51 	struct perf_data_file	file;
52 	struct auxtrace_record	*itr;
53 	struct perf_evlist	*evlist;
54 	struct perf_session	*session;
55 	const char		*progname;
56 	int			realtime_prio;
57 	bool			no_buildid;
58 	bool			no_buildid_set;
59 	bool			no_buildid_cache;
60 	bool			no_buildid_cache_set;
61 	bool			buildid_all;
62 	bool			timestamp_filename;
63 	bool			switch_output;
64 	unsigned long long	samples;
65 };
66 
67 static int record__write(struct record *rec, void *bf, size_t size)
68 {
69 	if (perf_data_file__write(rec->session->file, bf, size) < 0) {
70 		pr_err("failed to write perf data, error: %m\n");
71 		return -1;
72 	}
73 
74 	rec->bytes_written += size;
75 	return 0;
76 }
77 
78 static int process_synthesized_event(struct perf_tool *tool,
79 				     union perf_event *event,
80 				     struct perf_sample *sample __maybe_unused,
81 				     struct machine *machine __maybe_unused)
82 {
83 	struct record *rec = container_of(tool, struct record, tool);
84 	return record__write(rec, event, event->header.size);
85 }
86 
87 static int
88 backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
89 {
90 	struct perf_event_header *pheader;
91 	u64 evt_head = head;
92 	int size = mask + 1;
93 
94 	pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
95 	pheader = (struct perf_event_header *)(buf + (head & mask));
96 	*start = head;
97 	while (true) {
98 		if (evt_head - head >= (unsigned int)size) {
99 			pr_debug("Finshed reading backward ring buffer: rewind\n");
100 			if (evt_head - head > (unsigned int)size)
101 				evt_head -= pheader->size;
102 			*end = evt_head;
103 			return 0;
104 		}
105 
106 		pheader = (struct perf_event_header *)(buf + (evt_head & mask));
107 
108 		if (pheader->size == 0) {
109 			pr_debug("Finshed reading backward ring buffer: get start\n");
110 			*end = evt_head;
111 			return 0;
112 		}
113 
114 		evt_head += pheader->size;
115 		pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
116 	}
117 	WARN_ONCE(1, "Shouldn't get here\n");
118 	return -1;
119 }
120 
121 static int
122 rb_find_range(struct perf_evlist *evlist,
123 	      void *data, int mask, u64 head, u64 old,
124 	      u64 *start, u64 *end)
125 {
126 	if (!evlist->backward) {
127 		*start = old;
128 		*end = head;
129 		return 0;
130 	}
131 
132 	return backward_rb_find_range(data, mask, head, start, end);
133 }
134 
135 static int record__mmap_read(struct record *rec, int idx)
136 {
137 	struct perf_mmap *md = &rec->evlist->mmap[idx];
138 	u64 head = perf_mmap__read_head(md);
139 	u64 old = md->prev;
140 	u64 end = head, start = old;
141 	unsigned char *data = md->base + page_size;
142 	unsigned long size;
143 	void *buf;
144 	int rc = 0;
145 
146 	if (rb_find_range(rec->evlist, data, md->mask, head,
147 			  old, &start, &end))
148 		return -1;
149 
150 	if (start == end)
151 		return 0;
152 
153 	rec->samples++;
154 
155 	size = end - start;
156 	if (size > (unsigned long)(md->mask) + 1) {
157 		WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
158 
159 		md->prev = head;
160 		perf_evlist__mmap_consume(rec->evlist, idx);
161 		return 0;
162 	}
163 
164 	if ((start & md->mask) + size != (end & md->mask)) {
165 		buf = &data[start & md->mask];
166 		size = md->mask + 1 - (start & md->mask);
167 		start += size;
168 
169 		if (record__write(rec, buf, size) < 0) {
170 			rc = -1;
171 			goto out;
172 		}
173 	}
174 
175 	buf = &data[start & md->mask];
176 	size = end - start;
177 	start += size;
178 
179 	if (record__write(rec, buf, size) < 0) {
180 		rc = -1;
181 		goto out;
182 	}
183 
184 	md->prev = head;
185 	perf_evlist__mmap_consume(rec->evlist, idx);
186 out:
187 	return rc;
188 }
189 
190 static volatile int done;
191 static volatile int signr = -1;
192 static volatile int child_finished;
193 
194 static volatile int auxtrace_record__snapshot_started;
195 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
196 static DEFINE_TRIGGER(switch_output_trigger);
197 
198 static void sig_handler(int sig)
199 {
200 	if (sig == SIGCHLD)
201 		child_finished = 1;
202 	else
203 		signr = sig;
204 
205 	done = 1;
206 }
207 
208 static void record__sig_exit(void)
209 {
210 	if (signr == -1)
211 		return;
212 
213 	signal(signr, SIG_DFL);
214 	raise(signr);
215 }
216 
217 #ifdef HAVE_AUXTRACE_SUPPORT
218 
219 static int record__process_auxtrace(struct perf_tool *tool,
220 				    union perf_event *event, void *data1,
221 				    size_t len1, void *data2, size_t len2)
222 {
223 	struct record *rec = container_of(tool, struct record, tool);
224 	struct perf_data_file *file = &rec->file;
225 	size_t padding;
226 	u8 pad[8] = {0};
227 
228 	if (!perf_data_file__is_pipe(file)) {
229 		off_t file_offset;
230 		int fd = perf_data_file__fd(file);
231 		int err;
232 
233 		file_offset = lseek(fd, 0, SEEK_CUR);
234 		if (file_offset == -1)
235 			return -1;
236 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
237 						     event, file_offset);
238 		if (err)
239 			return err;
240 	}
241 
242 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
243 	padding = (len1 + len2) & 7;
244 	if (padding)
245 		padding = 8 - padding;
246 
247 	record__write(rec, event, event->header.size);
248 	record__write(rec, data1, len1);
249 	if (len2)
250 		record__write(rec, data2, len2);
251 	record__write(rec, &pad, padding);
252 
253 	return 0;
254 }
255 
256 static int record__auxtrace_mmap_read(struct record *rec,
257 				      struct auxtrace_mmap *mm)
258 {
259 	int ret;
260 
261 	ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
262 				  record__process_auxtrace);
263 	if (ret < 0)
264 		return ret;
265 
266 	if (ret)
267 		rec->samples++;
268 
269 	return 0;
270 }
271 
272 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
273 					       struct auxtrace_mmap *mm)
274 {
275 	int ret;
276 
277 	ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
278 					   record__process_auxtrace,
279 					   rec->opts.auxtrace_snapshot_size);
280 	if (ret < 0)
281 		return ret;
282 
283 	if (ret)
284 		rec->samples++;
285 
286 	return 0;
287 }
288 
289 static int record__auxtrace_read_snapshot_all(struct record *rec)
290 {
291 	int i;
292 	int rc = 0;
293 
294 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
295 		struct auxtrace_mmap *mm =
296 				&rec->evlist->mmap[i].auxtrace_mmap;
297 
298 		if (!mm->base)
299 			continue;
300 
301 		if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
302 			rc = -1;
303 			goto out;
304 		}
305 	}
306 out:
307 	return rc;
308 }
309 
310 static void record__read_auxtrace_snapshot(struct record *rec)
311 {
312 	pr_debug("Recording AUX area tracing snapshot\n");
313 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
314 		trigger_error(&auxtrace_snapshot_trigger);
315 	} else {
316 		if (auxtrace_record__snapshot_finish(rec->itr))
317 			trigger_error(&auxtrace_snapshot_trigger);
318 		else
319 			trigger_ready(&auxtrace_snapshot_trigger);
320 	}
321 }
322 
323 #else
324 
325 static inline
326 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
327 			       struct auxtrace_mmap *mm __maybe_unused)
328 {
329 	return 0;
330 }
331 
332 static inline
333 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
334 {
335 }
336 
337 static inline
338 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
339 {
340 	return 0;
341 }
342 
343 #endif
344 
345 static int record__open(struct record *rec)
346 {
347 	char msg[512];
348 	struct perf_evsel *pos;
349 	struct perf_evlist *evlist = rec->evlist;
350 	struct perf_session *session = rec->session;
351 	struct record_opts *opts = &rec->opts;
352 	int rc = 0;
353 
354 	perf_evlist__config(evlist, opts, &callchain_param);
355 
356 	evlist__for_each(evlist, pos) {
357 try_again:
358 		if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
359 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
360 				if (verbose)
361 					ui__warning("%s\n", msg);
362 				goto try_again;
363 			}
364 
365 			rc = -errno;
366 			perf_evsel__open_strerror(pos, &opts->target,
367 						  errno, msg, sizeof(msg));
368 			ui__error("%s\n", msg);
369 			goto out;
370 		}
371 	}
372 
373 	if (perf_evlist__apply_filters(evlist, &pos)) {
374 		error("failed to set filter \"%s\" on event %s with %d (%s)\n",
375 			pos->filter, perf_evsel__name(pos), errno,
376 			strerror_r(errno, msg, sizeof(msg)));
377 		rc = -1;
378 		goto out;
379 	}
380 
381 	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
382 				 opts->auxtrace_mmap_pages,
383 				 opts->auxtrace_snapshot_mode) < 0) {
384 		if (errno == EPERM) {
385 			pr_err("Permission error mapping pages.\n"
386 			       "Consider increasing "
387 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
388 			       "or try again with a smaller value of -m/--mmap_pages.\n"
389 			       "(current value: %u,%u)\n",
390 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
391 			rc = -errno;
392 		} else {
393 			pr_err("failed to mmap with %d (%s)\n", errno,
394 				strerror_r(errno, msg, sizeof(msg)));
395 			if (errno)
396 				rc = -errno;
397 			else
398 				rc = -EINVAL;
399 		}
400 		goto out;
401 	}
402 
403 	session->evlist = evlist;
404 	perf_session__set_id_hdr_size(session);
405 out:
406 	return rc;
407 }
408 
409 static int process_sample_event(struct perf_tool *tool,
410 				union perf_event *event,
411 				struct perf_sample *sample,
412 				struct perf_evsel *evsel,
413 				struct machine *machine)
414 {
415 	struct record *rec = container_of(tool, struct record, tool);
416 
417 	rec->samples++;
418 
419 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
420 }
421 
422 static int process_buildids(struct record *rec)
423 {
424 	struct perf_data_file *file  = &rec->file;
425 	struct perf_session *session = rec->session;
426 
427 	if (file->size == 0)
428 		return 0;
429 
430 	/*
431 	 * During this process, it'll load kernel map and replace the
432 	 * dso->long_name to a real pathname it found.  In this case
433 	 * we prefer the vmlinux path like
434 	 *   /lib/modules/3.16.4/build/vmlinux
435 	 *
436 	 * rather than build-id path (in debug directory).
437 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
438 	 */
439 	symbol_conf.ignore_vmlinux_buildid = true;
440 
441 	/*
442 	 * If --buildid-all is given, it marks all DSO regardless of hits,
443 	 * so no need to process samples.
444 	 */
445 	if (rec->buildid_all)
446 		rec->tool.sample = NULL;
447 
448 	return perf_session__process_events(session);
449 }
450 
451 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
452 {
453 	int err;
454 	struct perf_tool *tool = data;
455 	/*
456 	 *As for guest kernel when processing subcommand record&report,
457 	 *we arrange module mmap prior to guest kernel mmap and trigger
458 	 *a preload dso because default guest module symbols are loaded
459 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
460 	 *method is used to avoid symbol missing when the first addr is
461 	 *in module instead of in guest kernel.
462 	 */
463 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
464 					     machine);
465 	if (err < 0)
466 		pr_err("Couldn't record guest kernel [%d]'s reference"
467 		       " relocation symbol.\n", machine->pid);
468 
469 	/*
470 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
471 	 * have no _text sometimes.
472 	 */
473 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
474 						 machine);
475 	if (err < 0)
476 		pr_err("Couldn't record guest kernel [%d]'s reference"
477 		       " relocation symbol.\n", machine->pid);
478 }
479 
480 static struct perf_event_header finished_round_event = {
481 	.size = sizeof(struct perf_event_header),
482 	.type = PERF_RECORD_FINISHED_ROUND,
483 };
484 
485 static int record__mmap_read_all(struct record *rec)
486 {
487 	u64 bytes_written = rec->bytes_written;
488 	int i;
489 	int rc = 0;
490 
491 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
492 		struct auxtrace_mmap *mm = &rec->evlist->mmap[i].auxtrace_mmap;
493 
494 		if (rec->evlist->mmap[i].base) {
495 			if (record__mmap_read(rec, i) != 0) {
496 				rc = -1;
497 				goto out;
498 			}
499 		}
500 
501 		if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
502 		    record__auxtrace_mmap_read(rec, mm) != 0) {
503 			rc = -1;
504 			goto out;
505 		}
506 	}
507 
508 	/*
509 	 * Mark the round finished in case we wrote
510 	 * at least one event.
511 	 */
512 	if (bytes_written != rec->bytes_written)
513 		rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
514 
515 out:
516 	return rc;
517 }
518 
519 static void record__init_features(struct record *rec)
520 {
521 	struct perf_session *session = rec->session;
522 	int feat;
523 
524 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
525 		perf_header__set_feat(&session->header, feat);
526 
527 	if (rec->no_buildid)
528 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
529 
530 	if (!have_tracepoints(&rec->evlist->entries))
531 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
532 
533 	if (!rec->opts.branch_stack)
534 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
535 
536 	if (!rec->opts.full_auxtrace)
537 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
538 
539 	perf_header__clear_feat(&session->header, HEADER_STAT);
540 }
541 
542 static void
543 record__finish_output(struct record *rec)
544 {
545 	struct perf_data_file *file = &rec->file;
546 	int fd = perf_data_file__fd(file);
547 
548 	if (file->is_pipe)
549 		return;
550 
551 	rec->session->header.data_size += rec->bytes_written;
552 	file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
553 
554 	if (!rec->no_buildid) {
555 		process_buildids(rec);
556 
557 		if (rec->buildid_all)
558 			dsos__hit_all(rec->session);
559 	}
560 	perf_session__write_header(rec->session, rec->evlist, fd, true);
561 
562 	return;
563 }
564 
565 static int record__synthesize_workload(struct record *rec)
566 {
567 	struct {
568 		struct thread_map map;
569 		struct thread_map_data map_data;
570 	} thread_map;
571 
572 	thread_map.map.nr = 1;
573 	thread_map.map.map[0].pid = rec->evlist->workload.pid;
574 	thread_map.map.map[0].comm = NULL;
575 	return perf_event__synthesize_thread_map(&rec->tool, &thread_map.map,
576 						 process_synthesized_event,
577 						 &rec->session->machines.host,
578 						 rec->opts.sample_address,
579 						 rec->opts.proc_map_timeout);
580 }
581 
582 static int record__synthesize(struct record *rec);
583 
584 static int
585 record__switch_output(struct record *rec, bool at_exit)
586 {
587 	struct perf_data_file *file = &rec->file;
588 	int fd, err;
589 
590 	/* Same Size:      "2015122520103046"*/
591 	char timestamp[] = "InvalidTimestamp";
592 
593 	rec->samples = 0;
594 	record__finish_output(rec);
595 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
596 	if (err) {
597 		pr_err("Failed to get current timestamp\n");
598 		return -EINVAL;
599 	}
600 
601 	fd = perf_data_file__switch(file, timestamp,
602 				    rec->session->header.data_offset,
603 				    at_exit);
604 	if (fd >= 0 && !at_exit) {
605 		rec->bytes_written = 0;
606 		rec->session->header.data_size = 0;
607 	}
608 
609 	if (!quiet)
610 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
611 			file->path, timestamp);
612 
613 	/* Output tracking events */
614 	if (!at_exit) {
615 		record__synthesize(rec);
616 
617 		/*
618 		 * In 'perf record --switch-output' without -a,
619 		 * record__synthesize() in record__switch_output() won't
620 		 * generate tracking events because there's no thread_map
621 		 * in evlist. Which causes newly created perf.data doesn't
622 		 * contain map and comm information.
623 		 * Create a fake thread_map and directly call
624 		 * perf_event__synthesize_thread_map() for those events.
625 		 */
626 		if (target__none(&rec->opts.target))
627 			record__synthesize_workload(rec);
628 	}
629 	return fd;
630 }
631 
632 static volatile int workload_exec_errno;
633 
634 /*
635  * perf_evlist__prepare_workload will send a SIGUSR1
636  * if the fork fails, since we asked by setting its
637  * want_signal to true.
638  */
639 static void workload_exec_failed_signal(int signo __maybe_unused,
640 					siginfo_t *info,
641 					void *ucontext __maybe_unused)
642 {
643 	workload_exec_errno = info->si_value.sival_int;
644 	done = 1;
645 	child_finished = 1;
646 }
647 
648 static void snapshot_sig_handler(int sig);
649 
650 int __weak
651 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
652 			    struct perf_tool *tool __maybe_unused,
653 			    perf_event__handler_t process __maybe_unused,
654 			    struct machine *machine __maybe_unused)
655 {
656 	return 0;
657 }
658 
659 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
660 {
661 	if (rec->evlist && rec->evlist->mmap && rec->evlist->mmap[0].base)
662 		return rec->evlist->mmap[0].base;
663 	return NULL;
664 }
665 
666 static int record__synthesize(struct record *rec)
667 {
668 	struct perf_session *session = rec->session;
669 	struct machine *machine = &session->machines.host;
670 	struct perf_data_file *file = &rec->file;
671 	struct record_opts *opts = &rec->opts;
672 	struct perf_tool *tool = &rec->tool;
673 	int fd = perf_data_file__fd(file);
674 	int err = 0;
675 
676 	if (file->is_pipe) {
677 		err = perf_event__synthesize_attrs(tool, session,
678 						   process_synthesized_event);
679 		if (err < 0) {
680 			pr_err("Couldn't synthesize attrs.\n");
681 			goto out;
682 		}
683 
684 		if (have_tracepoints(&rec->evlist->entries)) {
685 			/*
686 			 * FIXME err <= 0 here actually means that
687 			 * there were no tracepoints so its not really
688 			 * an error, just that we don't need to
689 			 * synthesize anything.  We really have to
690 			 * return this more properly and also
691 			 * propagate errors that now are calling die()
692 			 */
693 			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
694 								  process_synthesized_event);
695 			if (err <= 0) {
696 				pr_err("Couldn't record tracing data.\n");
697 				goto out;
698 			}
699 			rec->bytes_written += err;
700 		}
701 	}
702 
703 	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
704 					  process_synthesized_event, machine);
705 	if (err)
706 		goto out;
707 
708 	if (rec->opts.full_auxtrace) {
709 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
710 					session, process_synthesized_event);
711 		if (err)
712 			goto out;
713 	}
714 
715 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
716 						 machine);
717 	WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
718 			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
719 			   "Check /proc/kallsyms permission or run as root.\n");
720 
721 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
722 					     machine);
723 	WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
724 			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
725 			   "Check /proc/modules permission or run as root.\n");
726 
727 	if (perf_guest) {
728 		machines__process_guests(&session->machines,
729 					 perf_event__synthesize_guest_os, tool);
730 	}
731 
732 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
733 					    process_synthesized_event, opts->sample_address,
734 					    opts->proc_map_timeout);
735 out:
736 	return err;
737 }
738 
739 static int __cmd_record(struct record *rec, int argc, const char **argv)
740 {
741 	int err;
742 	int status = 0;
743 	unsigned long waking = 0;
744 	const bool forks = argc > 0;
745 	struct machine *machine;
746 	struct perf_tool *tool = &rec->tool;
747 	struct record_opts *opts = &rec->opts;
748 	struct perf_data_file *file = &rec->file;
749 	struct perf_session *session;
750 	bool disabled = false, draining = false;
751 	int fd;
752 
753 	rec->progname = argv[0];
754 
755 	atexit(record__sig_exit);
756 	signal(SIGCHLD, sig_handler);
757 	signal(SIGINT, sig_handler);
758 	signal(SIGTERM, sig_handler);
759 
760 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output) {
761 		signal(SIGUSR2, snapshot_sig_handler);
762 		if (rec->opts.auxtrace_snapshot_mode)
763 			trigger_on(&auxtrace_snapshot_trigger);
764 		if (rec->switch_output)
765 			trigger_on(&switch_output_trigger);
766 	} else {
767 		signal(SIGUSR2, SIG_IGN);
768 	}
769 
770 	session = perf_session__new(file, false, tool);
771 	if (session == NULL) {
772 		pr_err("Perf session creation failed.\n");
773 		return -1;
774 	}
775 
776 	fd = perf_data_file__fd(file);
777 	rec->session = session;
778 
779 	record__init_features(rec);
780 
781 	if (forks) {
782 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
783 						    argv, file->is_pipe,
784 						    workload_exec_failed_signal);
785 		if (err < 0) {
786 			pr_err("Couldn't run the workload!\n");
787 			status = err;
788 			goto out_delete_session;
789 		}
790 	}
791 
792 	if (record__open(rec) != 0) {
793 		err = -1;
794 		goto out_child;
795 	}
796 
797 	err = bpf__apply_obj_config();
798 	if (err) {
799 		char errbuf[BUFSIZ];
800 
801 		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
802 		pr_err("ERROR: Apply config to BPF failed: %s\n",
803 			 errbuf);
804 		goto out_child;
805 	}
806 
807 	/*
808 	 * Normally perf_session__new would do this, but it doesn't have the
809 	 * evlist.
810 	 */
811 	if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
812 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
813 		rec->tool.ordered_events = false;
814 	}
815 
816 	if (!rec->evlist->nr_groups)
817 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
818 
819 	if (file->is_pipe) {
820 		err = perf_header__write_pipe(fd);
821 		if (err < 0)
822 			goto out_child;
823 	} else {
824 		err = perf_session__write_header(session, rec->evlist, fd, false);
825 		if (err < 0)
826 			goto out_child;
827 	}
828 
829 	if (!rec->no_buildid
830 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
831 		pr_err("Couldn't generate buildids. "
832 		       "Use --no-buildid to profile anyway.\n");
833 		err = -1;
834 		goto out_child;
835 	}
836 
837 	machine = &session->machines.host;
838 
839 	err = record__synthesize(rec);
840 	if (err < 0)
841 		goto out_child;
842 
843 	if (rec->realtime_prio) {
844 		struct sched_param param;
845 
846 		param.sched_priority = rec->realtime_prio;
847 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
848 			pr_err("Could not set realtime priority.\n");
849 			err = -1;
850 			goto out_child;
851 		}
852 	}
853 
854 	/*
855 	 * When perf is starting the traced process, all the events
856 	 * (apart from group members) have enable_on_exec=1 set,
857 	 * so don't spoil it by prematurely enabling them.
858 	 */
859 	if (!target__none(&opts->target) && !opts->initial_delay)
860 		perf_evlist__enable(rec->evlist);
861 
862 	/*
863 	 * Let the child rip
864 	 */
865 	if (forks) {
866 		union perf_event *event;
867 
868 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
869 		if (event == NULL) {
870 			err = -ENOMEM;
871 			goto out_child;
872 		}
873 
874 		/*
875 		 * Some H/W events are generated before COMM event
876 		 * which is emitted during exec(), so perf script
877 		 * cannot see a correct process name for those events.
878 		 * Synthesize COMM event to prevent it.
879 		 */
880 		perf_event__synthesize_comm(tool, event,
881 					    rec->evlist->workload.pid,
882 					    process_synthesized_event,
883 					    machine);
884 		free(event);
885 
886 		perf_evlist__start_workload(rec->evlist);
887 	}
888 
889 	if (opts->initial_delay) {
890 		usleep(opts->initial_delay * 1000);
891 		perf_evlist__enable(rec->evlist);
892 	}
893 
894 	trigger_ready(&auxtrace_snapshot_trigger);
895 	trigger_ready(&switch_output_trigger);
896 	for (;;) {
897 		unsigned long long hits = rec->samples;
898 
899 		if (record__mmap_read_all(rec) < 0) {
900 			trigger_error(&auxtrace_snapshot_trigger);
901 			trigger_error(&switch_output_trigger);
902 			err = -1;
903 			goto out_child;
904 		}
905 
906 		if (auxtrace_record__snapshot_started) {
907 			auxtrace_record__snapshot_started = 0;
908 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
909 				record__read_auxtrace_snapshot(rec);
910 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
911 				pr_err("AUX area tracing snapshot failed\n");
912 				err = -1;
913 				goto out_child;
914 			}
915 		}
916 
917 		if (trigger_is_hit(&switch_output_trigger)) {
918 			trigger_ready(&switch_output_trigger);
919 
920 			if (!quiet)
921 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
922 					waking);
923 			waking = 0;
924 			fd = record__switch_output(rec, false);
925 			if (fd < 0) {
926 				pr_err("Failed to switch to new file\n");
927 				trigger_error(&switch_output_trigger);
928 				err = fd;
929 				goto out_child;
930 			}
931 		}
932 
933 		if (hits == rec->samples) {
934 			if (done || draining)
935 				break;
936 			err = perf_evlist__poll(rec->evlist, -1);
937 			/*
938 			 * Propagate error, only if there's any. Ignore positive
939 			 * number of returned events and interrupt error.
940 			 */
941 			if (err > 0 || (err < 0 && errno == EINTR))
942 				err = 0;
943 			waking++;
944 
945 			if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
946 				draining = true;
947 		}
948 
949 		/*
950 		 * When perf is starting the traced process, at the end events
951 		 * die with the process and we wait for that. Thus no need to
952 		 * disable events in this case.
953 		 */
954 		if (done && !disabled && !target__none(&opts->target)) {
955 			trigger_off(&auxtrace_snapshot_trigger);
956 			perf_evlist__disable(rec->evlist);
957 			disabled = true;
958 		}
959 	}
960 	trigger_off(&auxtrace_snapshot_trigger);
961 	trigger_off(&switch_output_trigger);
962 
963 	if (forks && workload_exec_errno) {
964 		char msg[STRERR_BUFSIZE];
965 		const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
966 		pr_err("Workload failed: %s\n", emsg);
967 		err = -1;
968 		goto out_child;
969 	}
970 
971 	if (!quiet)
972 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
973 
974 out_child:
975 	if (forks) {
976 		int exit_status;
977 
978 		if (!child_finished)
979 			kill(rec->evlist->workload.pid, SIGTERM);
980 
981 		wait(&exit_status);
982 
983 		if (err < 0)
984 			status = err;
985 		else if (WIFEXITED(exit_status))
986 			status = WEXITSTATUS(exit_status);
987 		else if (WIFSIGNALED(exit_status))
988 			signr = WTERMSIG(exit_status);
989 	} else
990 		status = err;
991 
992 	/* this will be recalculated during process_buildids() */
993 	rec->samples = 0;
994 
995 	if (!err) {
996 		if (!rec->timestamp_filename) {
997 			record__finish_output(rec);
998 		} else {
999 			fd = record__switch_output(rec, true);
1000 			if (fd < 0) {
1001 				status = fd;
1002 				goto out_delete_session;
1003 			}
1004 		}
1005 	}
1006 
1007 	if (!err && !quiet) {
1008 		char samples[128];
1009 		const char *postfix = rec->timestamp_filename ?
1010 					".<timestamp>" : "";
1011 
1012 		if (rec->samples && !rec->opts.full_auxtrace)
1013 			scnprintf(samples, sizeof(samples),
1014 				  " (%" PRIu64 " samples)", rec->samples);
1015 		else
1016 			samples[0] = '\0';
1017 
1018 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1019 			perf_data_file__size(file) / 1024.0 / 1024.0,
1020 			file->path, postfix, samples);
1021 	}
1022 
1023 out_delete_session:
1024 	perf_session__delete(session);
1025 	return status;
1026 }
1027 
1028 static void callchain_debug(struct callchain_param *callchain)
1029 {
1030 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1031 
1032 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1033 
1034 	if (callchain->record_mode == CALLCHAIN_DWARF)
1035 		pr_debug("callchain: stack dump size %d\n",
1036 			 callchain->dump_size);
1037 }
1038 
1039 int record_opts__parse_callchain(struct record_opts *record,
1040 				 struct callchain_param *callchain,
1041 				 const char *arg, bool unset)
1042 {
1043 	int ret;
1044 	callchain->enabled = !unset;
1045 
1046 	/* --no-call-graph */
1047 	if (unset) {
1048 		callchain->record_mode = CALLCHAIN_NONE;
1049 		pr_debug("callchain: disabled\n");
1050 		return 0;
1051 	}
1052 
1053 	ret = parse_callchain_record_opt(arg, callchain);
1054 	if (!ret) {
1055 		/* Enable data address sampling for DWARF unwind. */
1056 		if (callchain->record_mode == CALLCHAIN_DWARF)
1057 			record->sample_address = true;
1058 		callchain_debug(callchain);
1059 	}
1060 
1061 	return ret;
1062 }
1063 
1064 int record_parse_callchain_opt(const struct option *opt,
1065 			       const char *arg,
1066 			       int unset)
1067 {
1068 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1069 }
1070 
1071 int record_callchain_opt(const struct option *opt,
1072 			 const char *arg __maybe_unused,
1073 			 int unset __maybe_unused)
1074 {
1075 	struct callchain_param *callchain = opt->value;
1076 
1077 	callchain->enabled = true;
1078 
1079 	if (callchain->record_mode == CALLCHAIN_NONE)
1080 		callchain->record_mode = CALLCHAIN_FP;
1081 
1082 	callchain_debug(callchain);
1083 	return 0;
1084 }
1085 
1086 static int perf_record_config(const char *var, const char *value, void *cb)
1087 {
1088 	struct record *rec = cb;
1089 
1090 	if (!strcmp(var, "record.build-id")) {
1091 		if (!strcmp(value, "cache"))
1092 			rec->no_buildid_cache = false;
1093 		else if (!strcmp(value, "no-cache"))
1094 			rec->no_buildid_cache = true;
1095 		else if (!strcmp(value, "skip"))
1096 			rec->no_buildid = true;
1097 		else
1098 			return -1;
1099 		return 0;
1100 	}
1101 	if (!strcmp(var, "record.call-graph"))
1102 		var = "call-graph.record-mode"; /* fall-through */
1103 
1104 	return perf_default_config(var, value, cb);
1105 }
1106 
1107 struct clockid_map {
1108 	const char *name;
1109 	int clockid;
1110 };
1111 
1112 #define CLOCKID_MAP(n, c)	\
1113 	{ .name = n, .clockid = (c), }
1114 
1115 #define CLOCKID_END	{ .name = NULL, }
1116 
1117 
1118 /*
1119  * Add the missing ones, we need to build on many distros...
1120  */
1121 #ifndef CLOCK_MONOTONIC_RAW
1122 #define CLOCK_MONOTONIC_RAW 4
1123 #endif
1124 #ifndef CLOCK_BOOTTIME
1125 #define CLOCK_BOOTTIME 7
1126 #endif
1127 #ifndef CLOCK_TAI
1128 #define CLOCK_TAI 11
1129 #endif
1130 
1131 static const struct clockid_map clockids[] = {
1132 	/* available for all events, NMI safe */
1133 	CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1134 	CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1135 
1136 	/* available for some events */
1137 	CLOCKID_MAP("realtime", CLOCK_REALTIME),
1138 	CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1139 	CLOCKID_MAP("tai", CLOCK_TAI),
1140 
1141 	/* available for the lazy */
1142 	CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1143 	CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1144 	CLOCKID_MAP("real", CLOCK_REALTIME),
1145 	CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1146 
1147 	CLOCKID_END,
1148 };
1149 
1150 static int parse_clockid(const struct option *opt, const char *str, int unset)
1151 {
1152 	struct record_opts *opts = (struct record_opts *)opt->value;
1153 	const struct clockid_map *cm;
1154 	const char *ostr = str;
1155 
1156 	if (unset) {
1157 		opts->use_clockid = 0;
1158 		return 0;
1159 	}
1160 
1161 	/* no arg passed */
1162 	if (!str)
1163 		return 0;
1164 
1165 	/* no setting it twice */
1166 	if (opts->use_clockid)
1167 		return -1;
1168 
1169 	opts->use_clockid = true;
1170 
1171 	/* if its a number, we're done */
1172 	if (sscanf(str, "%d", &opts->clockid) == 1)
1173 		return 0;
1174 
1175 	/* allow a "CLOCK_" prefix to the name */
1176 	if (!strncasecmp(str, "CLOCK_", 6))
1177 		str += 6;
1178 
1179 	for (cm = clockids; cm->name; cm++) {
1180 		if (!strcasecmp(str, cm->name)) {
1181 			opts->clockid = cm->clockid;
1182 			return 0;
1183 		}
1184 	}
1185 
1186 	opts->use_clockid = false;
1187 	ui__warning("unknown clockid %s, check man page\n", ostr);
1188 	return -1;
1189 }
1190 
1191 static int record__parse_mmap_pages(const struct option *opt,
1192 				    const char *str,
1193 				    int unset __maybe_unused)
1194 {
1195 	struct record_opts *opts = opt->value;
1196 	char *s, *p;
1197 	unsigned int mmap_pages;
1198 	int ret;
1199 
1200 	if (!str)
1201 		return -EINVAL;
1202 
1203 	s = strdup(str);
1204 	if (!s)
1205 		return -ENOMEM;
1206 
1207 	p = strchr(s, ',');
1208 	if (p)
1209 		*p = '\0';
1210 
1211 	if (*s) {
1212 		ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1213 		if (ret)
1214 			goto out_free;
1215 		opts->mmap_pages = mmap_pages;
1216 	}
1217 
1218 	if (!p) {
1219 		ret = 0;
1220 		goto out_free;
1221 	}
1222 
1223 	ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1224 	if (ret)
1225 		goto out_free;
1226 
1227 	opts->auxtrace_mmap_pages = mmap_pages;
1228 
1229 out_free:
1230 	free(s);
1231 	return ret;
1232 }
1233 
1234 static const char * const __record_usage[] = {
1235 	"perf record [<options>] [<command>]",
1236 	"perf record [<options>] -- <command> [<options>]",
1237 	NULL
1238 };
1239 const char * const *record_usage = __record_usage;
1240 
1241 /*
1242  * XXX Ideally would be local to cmd_record() and passed to a record__new
1243  * because we need to have access to it in record__exit, that is called
1244  * after cmd_record() exits, but since record_options need to be accessible to
1245  * builtin-script, leave it here.
1246  *
1247  * At least we don't ouch it in all the other functions here directly.
1248  *
1249  * Just say no to tons of global variables, sigh.
1250  */
1251 static struct record record = {
1252 	.opts = {
1253 		.sample_time	     = true,
1254 		.mmap_pages	     = UINT_MAX,
1255 		.user_freq	     = UINT_MAX,
1256 		.user_interval	     = ULLONG_MAX,
1257 		.freq		     = 4000,
1258 		.target		     = {
1259 			.uses_mmap   = true,
1260 			.default_per_cpu = true,
1261 		},
1262 		.proc_map_timeout     = 500,
1263 	},
1264 	.tool = {
1265 		.sample		= process_sample_event,
1266 		.fork		= perf_event__process_fork,
1267 		.exit		= perf_event__process_exit,
1268 		.comm		= perf_event__process_comm,
1269 		.mmap		= perf_event__process_mmap,
1270 		.mmap2		= perf_event__process_mmap2,
1271 		.ordered_events	= true,
1272 	},
1273 };
1274 
1275 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1276 	"\n\t\t\t\tDefault: fp";
1277 
1278 static bool dry_run;
1279 
1280 /*
1281  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1282  * with it and switch to use the library functions in perf_evlist that came
1283  * from builtin-record.c, i.e. use record_opts,
1284  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1285  * using pipes, etc.
1286  */
1287 struct option __record_options[] = {
1288 	OPT_CALLBACK('e', "event", &record.evlist, "event",
1289 		     "event selector. use 'perf list' to list available events",
1290 		     parse_events_option),
1291 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1292 		     "event filter", parse_filter),
1293 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1294 			   NULL, "don't record events from perf itself",
1295 			   exclude_perf),
1296 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1297 		    "record events on existing process id"),
1298 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1299 		    "record events on existing thread id"),
1300 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
1301 		    "collect data with this RT SCHED_FIFO priority"),
1302 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1303 		    "collect data without buffering"),
1304 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1305 		    "collect raw sample records from all opened counters"),
1306 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1307 			    "system-wide collection from all CPUs"),
1308 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1309 		    "list of cpus to monitor"),
1310 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1311 	OPT_STRING('o', "output", &record.file.path, "file",
1312 		    "output file name"),
1313 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1314 			&record.opts.no_inherit_set,
1315 			"child tasks do not inherit counters"),
1316 	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1317 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1318 		     "number of mmap data pages and AUX area tracing mmap pages",
1319 		     record__parse_mmap_pages),
1320 	OPT_BOOLEAN(0, "group", &record.opts.group,
1321 		    "put the counters into a counter group"),
1322 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1323 			   NULL, "enables call-graph recording" ,
1324 			   &record_callchain_opt),
1325 	OPT_CALLBACK(0, "call-graph", &record.opts,
1326 		     "record_mode[,record_size]", record_callchain_help,
1327 		     &record_parse_callchain_opt),
1328 	OPT_INCR('v', "verbose", &verbose,
1329 		    "be more verbose (show counter open errors, etc)"),
1330 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1331 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1332 		    "per thread counts"),
1333 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1334 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1335 			&record.opts.sample_time_set,
1336 			"Record the sample timestamps"),
1337 	OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
1338 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1339 		    "don't sample"),
1340 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1341 			&record.no_buildid_cache_set,
1342 			"do not update the buildid cache"),
1343 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1344 			&record.no_buildid_set,
1345 			"do not collect buildids in perf.data"),
1346 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1347 		     "monitor event in cgroup name only",
1348 		     parse_cgroups),
1349 	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1350 		  "ms to wait before starting measurement after program start"),
1351 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1352 		   "user to profile"),
1353 
1354 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1355 		     "branch any", "sample any taken branches",
1356 		     parse_branch_stack),
1357 
1358 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1359 		     "branch filter mask", "branch stack filter modes",
1360 		     parse_branch_stack),
1361 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1362 		    "sample by weight (on special events only)"),
1363 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1364 		    "sample transaction flags (special events only)"),
1365 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1366 		    "use per-thread mmaps"),
1367 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1368 		    "sample selected machine registers on interrupt,"
1369 		    " use -I ? to list register names", parse_regs),
1370 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1371 		    "Record running/enabled time of read (:S) events"),
1372 	OPT_CALLBACK('k', "clockid", &record.opts,
1373 	"clockid", "clockid to use for events, see clock_gettime()",
1374 	parse_clockid),
1375 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1376 			  "opts", "AUX area tracing Snapshot Mode", ""),
1377 	OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1378 			"per thread proc mmap processing timeout in ms"),
1379 	OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1380 		    "Record context switch events"),
1381 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1382 			 "Configure all used events to run in kernel space.",
1383 			 PARSE_OPT_EXCLUSIVE),
1384 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1385 			 "Configure all used events to run in user space.",
1386 			 PARSE_OPT_EXCLUSIVE),
1387 	OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1388 		   "clang binary to use for compiling BPF scriptlets"),
1389 	OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1390 		   "options passed to clang when compiling BPF scriptlets"),
1391 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1392 		   "file", "vmlinux pathname"),
1393 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1394 		    "Record build-id of all DSOs regardless of hits"),
1395 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1396 		    "append timestamp to output filename"),
1397 	OPT_BOOLEAN(0, "switch-output", &record.switch_output,
1398 		    "Switch output when receive SIGUSR2"),
1399 	OPT_BOOLEAN(0, "dry-run", &dry_run,
1400 		    "Parse options then exit"),
1401 	OPT_END()
1402 };
1403 
1404 struct option *record_options = __record_options;
1405 
1406 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1407 {
1408 	int err;
1409 	struct record *rec = &record;
1410 	char errbuf[BUFSIZ];
1411 
1412 #ifndef HAVE_LIBBPF_SUPPORT
1413 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1414 	set_nobuild('\0', "clang-path", true);
1415 	set_nobuild('\0', "clang-opt", true);
1416 # undef set_nobuild
1417 #endif
1418 
1419 #ifndef HAVE_BPF_PROLOGUE
1420 # if !defined (HAVE_DWARF_SUPPORT)
1421 #  define REASON  "NO_DWARF=1"
1422 # elif !defined (HAVE_LIBBPF_SUPPORT)
1423 #  define REASON  "NO_LIBBPF=1"
1424 # else
1425 #  define REASON  "this architecture doesn't support BPF prologue"
1426 # endif
1427 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1428 	set_nobuild('\0', "vmlinux", true);
1429 # undef set_nobuild
1430 # undef REASON
1431 #endif
1432 
1433 	rec->evlist = perf_evlist__new();
1434 	if (rec->evlist == NULL)
1435 		return -ENOMEM;
1436 
1437 	perf_config(perf_record_config, rec);
1438 
1439 	argc = parse_options(argc, argv, record_options, record_usage,
1440 			    PARSE_OPT_STOP_AT_NON_OPTION);
1441 	if (!argc && target__none(&rec->opts.target))
1442 		usage_with_options(record_usage, record_options);
1443 
1444 	if (nr_cgroups && !rec->opts.target.system_wide) {
1445 		usage_with_options_msg(record_usage, record_options,
1446 			"cgroup monitoring only available in system-wide mode");
1447 
1448 	}
1449 	if (rec->opts.record_switch_events &&
1450 	    !perf_can_record_switch_events()) {
1451 		ui__error("kernel does not support recording context switch events\n");
1452 		parse_options_usage(record_usage, record_options, "switch-events", 0);
1453 		return -EINVAL;
1454 	}
1455 
1456 	if (rec->switch_output)
1457 		rec->timestamp_filename = true;
1458 
1459 	if (!rec->itr) {
1460 		rec->itr = auxtrace_record__init(rec->evlist, &err);
1461 		if (err)
1462 			return err;
1463 	}
1464 
1465 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1466 					      rec->opts.auxtrace_snapshot_opts);
1467 	if (err)
1468 		return err;
1469 
1470 	if (dry_run)
1471 		return 0;
1472 
1473 	err = bpf__setup_stdout(rec->evlist);
1474 	if (err) {
1475 		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1476 		pr_err("ERROR: Setup BPF stdout failed: %s\n",
1477 			 errbuf);
1478 		return err;
1479 	}
1480 
1481 	err = -ENOMEM;
1482 
1483 	symbol__init(NULL);
1484 
1485 	if (symbol_conf.kptr_restrict)
1486 		pr_warning(
1487 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1488 "check /proc/sys/kernel/kptr_restrict.\n\n"
1489 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1490 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1491 "Samples in kernel modules won't be resolved at all.\n\n"
1492 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1493 "even with a suitable vmlinux or kallsyms file.\n\n");
1494 
1495 	if (rec->no_buildid_cache || rec->no_buildid) {
1496 		disable_buildid_cache();
1497 	} else if (rec->switch_output) {
1498 		/*
1499 		 * In 'perf record --switch-output', disable buildid
1500 		 * generation by default to reduce data file switching
1501 		 * overhead. Still generate buildid if they are required
1502 		 * explicitly using
1503 		 *
1504 		 *  perf record --signal-trigger --no-no-buildid \
1505 		 *              --no-no-buildid-cache
1506 		 *
1507 		 * Following code equals to:
1508 		 *
1509 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
1510 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1511 		 *         disable_buildid_cache();
1512 		 */
1513 		bool disable = true;
1514 
1515 		if (rec->no_buildid_set && !rec->no_buildid)
1516 			disable = false;
1517 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1518 			disable = false;
1519 		if (disable) {
1520 			rec->no_buildid = true;
1521 			rec->no_buildid_cache = true;
1522 			disable_buildid_cache();
1523 		}
1524 	}
1525 
1526 	if (rec->evlist->nr_entries == 0 &&
1527 	    perf_evlist__add_default(rec->evlist) < 0) {
1528 		pr_err("Not enough memory for event selector list\n");
1529 		goto out_symbol_exit;
1530 	}
1531 
1532 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1533 		rec->opts.no_inherit = true;
1534 
1535 	err = target__validate(&rec->opts.target);
1536 	if (err) {
1537 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1538 		ui__warning("%s", errbuf);
1539 	}
1540 
1541 	err = target__parse_uid(&rec->opts.target);
1542 	if (err) {
1543 		int saved_errno = errno;
1544 
1545 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1546 		ui__error("%s", errbuf);
1547 
1548 		err = -saved_errno;
1549 		goto out_symbol_exit;
1550 	}
1551 
1552 	err = -ENOMEM;
1553 	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1554 		usage_with_options(record_usage, record_options);
1555 
1556 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1557 	if (err)
1558 		goto out_symbol_exit;
1559 
1560 	/*
1561 	 * We take all buildids when the file contains
1562 	 * AUX area tracing data because we do not decode the
1563 	 * trace because it would take too long.
1564 	 */
1565 	if (rec->opts.full_auxtrace)
1566 		rec->buildid_all = true;
1567 
1568 	if (record_opts__config(&rec->opts)) {
1569 		err = -EINVAL;
1570 		goto out_symbol_exit;
1571 	}
1572 
1573 	err = __cmd_record(&record, argc, argv);
1574 out_symbol_exit:
1575 	perf_evlist__delete(rec->evlist);
1576 	symbol__exit();
1577 	auxtrace_record__free(rec->itr);
1578 	return err;
1579 }
1580 
1581 static void snapshot_sig_handler(int sig __maybe_unused)
1582 {
1583 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1584 		trigger_hit(&auxtrace_snapshot_trigger);
1585 		auxtrace_record__snapshot_started = 1;
1586 		if (auxtrace_record__snapshot_start(record.itr))
1587 			trigger_error(&auxtrace_snapshot_trigger);
1588 	}
1589 
1590 	if (trigger_is_ready(&switch_output_trigger))
1591 		trigger_hit(&switch_output_trigger);
1592 }
1593