xref: /linux/tools/perf/builtin-record.c (revision e3b9f1e81de2083f359bacd2a94bf1c024f2ede0)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10 
11 #include "perf.h"
12 
13 #include "util/build-id.h"
14 #include "util/util.h"
15 #include <subcmd/parse-options.h>
16 #include "util/parse-events.h"
17 #include "util/config.h"
18 
19 #include "util/callchain.h"
20 #include "util/cgroup.h"
21 #include "util/header.h"
22 #include "util/event.h"
23 #include "util/evlist.h"
24 #include "util/evsel.h"
25 #include "util/debug.h"
26 #include "util/drv_configs.h"
27 #include "util/session.h"
28 #include "util/tool.h"
29 #include "util/symbol.h"
30 #include "util/cpumap.h"
31 #include "util/thread_map.h"
32 #include "util/data.h"
33 #include "util/perf_regs.h"
34 #include "util/auxtrace.h"
35 #include "util/tsc.h"
36 #include "util/parse-branch-options.h"
37 #include "util/parse-regs-options.h"
38 #include "util/llvm-utils.h"
39 #include "util/bpf-loader.h"
40 #include "util/trigger.h"
41 #include "util/perf-hooks.h"
42 #include "util/time-utils.h"
43 #include "util/units.h"
44 #include "asm/bug.h"
45 
46 #include <errno.h>
47 #include <inttypes.h>
48 #include <locale.h>
49 #include <poll.h>
50 #include <unistd.h>
51 #include <sched.h>
52 #include <signal.h>
53 #include <sys/mman.h>
54 #include <sys/wait.h>
55 #include <linux/time64.h>
56 
57 struct switch_output {
58 	bool		 enabled;
59 	bool		 signal;
60 	unsigned long	 size;
61 	unsigned long	 time;
62 	const char	*str;
63 	bool		 set;
64 };
65 
66 struct record {
67 	struct perf_tool	tool;
68 	struct record_opts	opts;
69 	u64			bytes_written;
70 	struct perf_data	data;
71 	struct auxtrace_record	*itr;
72 	struct perf_evlist	*evlist;
73 	struct perf_session	*session;
74 	int			realtime_prio;
75 	bool			no_buildid;
76 	bool			no_buildid_set;
77 	bool			no_buildid_cache;
78 	bool			no_buildid_cache_set;
79 	bool			buildid_all;
80 	bool			timestamp_filename;
81 	bool			timestamp_boundary;
82 	struct switch_output	switch_output;
83 	unsigned long long	samples;
84 };
85 
86 static volatile int auxtrace_record__snapshot_started;
87 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
88 static DEFINE_TRIGGER(switch_output_trigger);
89 
90 static bool switch_output_signal(struct record *rec)
91 {
92 	return rec->switch_output.signal &&
93 	       trigger_is_ready(&switch_output_trigger);
94 }
95 
96 static bool switch_output_size(struct record *rec)
97 {
98 	return rec->switch_output.size &&
99 	       trigger_is_ready(&switch_output_trigger) &&
100 	       (rec->bytes_written >= rec->switch_output.size);
101 }
102 
103 static bool switch_output_time(struct record *rec)
104 {
105 	return rec->switch_output.time &&
106 	       trigger_is_ready(&switch_output_trigger);
107 }
108 
109 static int record__write(struct record *rec, void *bf, size_t size)
110 {
111 	if (perf_data__write(rec->session->data, bf, size) < 0) {
112 		pr_err("failed to write perf data, error: %m\n");
113 		return -1;
114 	}
115 
116 	rec->bytes_written += size;
117 
118 	if (switch_output_size(rec))
119 		trigger_hit(&switch_output_trigger);
120 
121 	return 0;
122 }
123 
124 static int process_synthesized_event(struct perf_tool *tool,
125 				     union perf_event *event,
126 				     struct perf_sample *sample __maybe_unused,
127 				     struct machine *machine __maybe_unused)
128 {
129 	struct record *rec = container_of(tool, struct record, tool);
130 	return record__write(rec, event, event->header.size);
131 }
132 
133 static int record__pushfn(void *to, void *bf, size_t size)
134 {
135 	struct record *rec = to;
136 
137 	rec->samples++;
138 	return record__write(rec, bf, size);
139 }
140 
141 static volatile int done;
142 static volatile int signr = -1;
143 static volatile int child_finished;
144 
145 static void sig_handler(int sig)
146 {
147 	if (sig == SIGCHLD)
148 		child_finished = 1;
149 	else
150 		signr = sig;
151 
152 	done = 1;
153 }
154 
155 static void sigsegv_handler(int sig)
156 {
157 	perf_hooks__recover();
158 	sighandler_dump_stack(sig);
159 }
160 
161 static void record__sig_exit(void)
162 {
163 	if (signr == -1)
164 		return;
165 
166 	signal(signr, SIG_DFL);
167 	raise(signr);
168 }
169 
170 #ifdef HAVE_AUXTRACE_SUPPORT
171 
172 static int record__process_auxtrace(struct perf_tool *tool,
173 				    union perf_event *event, void *data1,
174 				    size_t len1, void *data2, size_t len2)
175 {
176 	struct record *rec = container_of(tool, struct record, tool);
177 	struct perf_data *data = &rec->data;
178 	size_t padding;
179 	u8 pad[8] = {0};
180 
181 	if (!perf_data__is_pipe(data)) {
182 		off_t file_offset;
183 		int fd = perf_data__fd(data);
184 		int err;
185 
186 		file_offset = lseek(fd, 0, SEEK_CUR);
187 		if (file_offset == -1)
188 			return -1;
189 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
190 						     event, file_offset);
191 		if (err)
192 			return err;
193 	}
194 
195 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
196 	padding = (len1 + len2) & 7;
197 	if (padding)
198 		padding = 8 - padding;
199 
200 	record__write(rec, event, event->header.size);
201 	record__write(rec, data1, len1);
202 	if (len2)
203 		record__write(rec, data2, len2);
204 	record__write(rec, &pad, padding);
205 
206 	return 0;
207 }
208 
209 static int record__auxtrace_mmap_read(struct record *rec,
210 				      struct auxtrace_mmap *mm)
211 {
212 	int ret;
213 
214 	ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
215 				  record__process_auxtrace);
216 	if (ret < 0)
217 		return ret;
218 
219 	if (ret)
220 		rec->samples++;
221 
222 	return 0;
223 }
224 
225 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
226 					       struct auxtrace_mmap *mm)
227 {
228 	int ret;
229 
230 	ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
231 					   record__process_auxtrace,
232 					   rec->opts.auxtrace_snapshot_size);
233 	if (ret < 0)
234 		return ret;
235 
236 	if (ret)
237 		rec->samples++;
238 
239 	return 0;
240 }
241 
242 static int record__auxtrace_read_snapshot_all(struct record *rec)
243 {
244 	int i;
245 	int rc = 0;
246 
247 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
248 		struct auxtrace_mmap *mm =
249 				&rec->evlist->mmap[i].auxtrace_mmap;
250 
251 		if (!mm->base)
252 			continue;
253 
254 		if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
255 			rc = -1;
256 			goto out;
257 		}
258 	}
259 out:
260 	return rc;
261 }
262 
263 static void record__read_auxtrace_snapshot(struct record *rec)
264 {
265 	pr_debug("Recording AUX area tracing snapshot\n");
266 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
267 		trigger_error(&auxtrace_snapshot_trigger);
268 	} else {
269 		if (auxtrace_record__snapshot_finish(rec->itr))
270 			trigger_error(&auxtrace_snapshot_trigger);
271 		else
272 			trigger_ready(&auxtrace_snapshot_trigger);
273 	}
274 }
275 
276 static int record__auxtrace_init(struct record *rec)
277 {
278 	int err;
279 
280 	if (!rec->itr) {
281 		rec->itr = auxtrace_record__init(rec->evlist, &err);
282 		if (err)
283 			return err;
284 	}
285 
286 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
287 					      rec->opts.auxtrace_snapshot_opts);
288 	if (err)
289 		return err;
290 
291 	return auxtrace_parse_filters(rec->evlist);
292 }
293 
294 #else
295 
296 static inline
297 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
298 			       struct auxtrace_mmap *mm __maybe_unused)
299 {
300 	return 0;
301 }
302 
303 static inline
304 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
305 {
306 }
307 
308 static inline
309 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
310 {
311 	return 0;
312 }
313 
314 static int record__auxtrace_init(struct record *rec __maybe_unused)
315 {
316 	return 0;
317 }
318 
319 #endif
320 
321 static int record__mmap_evlist(struct record *rec,
322 			       struct perf_evlist *evlist)
323 {
324 	struct record_opts *opts = &rec->opts;
325 	char msg[512];
326 
327 	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
328 				 opts->auxtrace_mmap_pages,
329 				 opts->auxtrace_snapshot_mode) < 0) {
330 		if (errno == EPERM) {
331 			pr_err("Permission error mapping pages.\n"
332 			       "Consider increasing "
333 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
334 			       "or try again with a smaller value of -m/--mmap_pages.\n"
335 			       "(current value: %u,%u)\n",
336 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
337 			return -errno;
338 		} else {
339 			pr_err("failed to mmap with %d (%s)\n", errno,
340 				str_error_r(errno, msg, sizeof(msg)));
341 			if (errno)
342 				return -errno;
343 			else
344 				return -EINVAL;
345 		}
346 	}
347 	return 0;
348 }
349 
350 static int record__mmap(struct record *rec)
351 {
352 	return record__mmap_evlist(rec, rec->evlist);
353 }
354 
355 static int record__open(struct record *rec)
356 {
357 	char msg[BUFSIZ];
358 	struct perf_evsel *pos;
359 	struct perf_evlist *evlist = rec->evlist;
360 	struct perf_session *session = rec->session;
361 	struct record_opts *opts = &rec->opts;
362 	struct perf_evsel_config_term *err_term;
363 	int rc = 0;
364 
365 	/*
366 	 * For initial_delay we need to add a dummy event so that we can track
367 	 * PERF_RECORD_MMAP while we wait for the initial delay to enable the
368 	 * real events, the ones asked by the user.
369 	 */
370 	if (opts->initial_delay) {
371 		if (perf_evlist__add_dummy(evlist))
372 			return -ENOMEM;
373 
374 		pos = perf_evlist__first(evlist);
375 		pos->tracking = 0;
376 		pos = perf_evlist__last(evlist);
377 		pos->tracking = 1;
378 		pos->attr.enable_on_exec = 1;
379 	}
380 
381 	perf_evlist__config(evlist, opts, &callchain_param);
382 
383 	evlist__for_each_entry(evlist, pos) {
384 try_again:
385 		if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
386 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
387 				if (verbose > 0)
388 					ui__warning("%s\n", msg);
389 				goto try_again;
390 			}
391 
392 			rc = -errno;
393 			perf_evsel__open_strerror(pos, &opts->target,
394 						  errno, msg, sizeof(msg));
395 			ui__error("%s\n", msg);
396 			goto out;
397 		}
398 
399 		pos->supported = true;
400 	}
401 
402 	if (perf_evlist__apply_filters(evlist, &pos)) {
403 		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
404 			pos->filter, perf_evsel__name(pos), errno,
405 			str_error_r(errno, msg, sizeof(msg)));
406 		rc = -1;
407 		goto out;
408 	}
409 
410 	if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
411 		pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
412 		      err_term->val.drv_cfg, perf_evsel__name(pos), errno,
413 		      str_error_r(errno, msg, sizeof(msg)));
414 		rc = -1;
415 		goto out;
416 	}
417 
418 	rc = record__mmap(rec);
419 	if (rc)
420 		goto out;
421 
422 	session->evlist = evlist;
423 	perf_session__set_id_hdr_size(session);
424 out:
425 	return rc;
426 }
427 
428 static int process_sample_event(struct perf_tool *tool,
429 				union perf_event *event,
430 				struct perf_sample *sample,
431 				struct perf_evsel *evsel,
432 				struct machine *machine)
433 {
434 	struct record *rec = container_of(tool, struct record, tool);
435 
436 	if (rec->evlist->first_sample_time == 0)
437 		rec->evlist->first_sample_time = sample->time;
438 
439 	rec->evlist->last_sample_time = sample->time;
440 
441 	if (rec->buildid_all)
442 		return 0;
443 
444 	rec->samples++;
445 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
446 }
447 
448 static int process_buildids(struct record *rec)
449 {
450 	struct perf_data *data = &rec->data;
451 	struct perf_session *session = rec->session;
452 
453 	if (data->size == 0)
454 		return 0;
455 
456 	/*
457 	 * During this process, it'll load kernel map and replace the
458 	 * dso->long_name to a real pathname it found.  In this case
459 	 * we prefer the vmlinux path like
460 	 *   /lib/modules/3.16.4/build/vmlinux
461 	 *
462 	 * rather than build-id path (in debug directory).
463 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
464 	 */
465 	symbol_conf.ignore_vmlinux_buildid = true;
466 
467 	/*
468 	 * If --buildid-all is given, it marks all DSO regardless of hits,
469 	 * so no need to process samples. But if timestamp_boundary is enabled,
470 	 * it still needs to walk on all samples to get the timestamps of
471 	 * first/last samples.
472 	 */
473 	if (rec->buildid_all && !rec->timestamp_boundary)
474 		rec->tool.sample = NULL;
475 
476 	return perf_session__process_events(session);
477 }
478 
479 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
480 {
481 	int err;
482 	struct perf_tool *tool = data;
483 	/*
484 	 *As for guest kernel when processing subcommand record&report,
485 	 *we arrange module mmap prior to guest kernel mmap and trigger
486 	 *a preload dso because default guest module symbols are loaded
487 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
488 	 *method is used to avoid symbol missing when the first addr is
489 	 *in module instead of in guest kernel.
490 	 */
491 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
492 					     machine);
493 	if (err < 0)
494 		pr_err("Couldn't record guest kernel [%d]'s reference"
495 		       " relocation symbol.\n", machine->pid);
496 
497 	/*
498 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
499 	 * have no _text sometimes.
500 	 */
501 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
502 						 machine);
503 	if (err < 0)
504 		pr_err("Couldn't record guest kernel [%d]'s reference"
505 		       " relocation symbol.\n", machine->pid);
506 }
507 
508 static struct perf_event_header finished_round_event = {
509 	.size = sizeof(struct perf_event_header),
510 	.type = PERF_RECORD_FINISHED_ROUND,
511 };
512 
513 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
514 				    bool overwrite)
515 {
516 	u64 bytes_written = rec->bytes_written;
517 	int i;
518 	int rc = 0;
519 	struct perf_mmap *maps;
520 
521 	if (!evlist)
522 		return 0;
523 
524 	maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
525 	if (!maps)
526 		return 0;
527 
528 	if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
529 		return 0;
530 
531 	for (i = 0; i < evlist->nr_mmaps; i++) {
532 		struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
533 
534 		if (maps[i].base) {
535 			if (perf_mmap__push(&maps[i], rec, record__pushfn) != 0) {
536 				rc = -1;
537 				goto out;
538 			}
539 		}
540 
541 		if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
542 		    record__auxtrace_mmap_read(rec, mm) != 0) {
543 			rc = -1;
544 			goto out;
545 		}
546 	}
547 
548 	/*
549 	 * Mark the round finished in case we wrote
550 	 * at least one event.
551 	 */
552 	if (bytes_written != rec->bytes_written)
553 		rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
554 
555 	if (overwrite)
556 		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
557 out:
558 	return rc;
559 }
560 
561 static int record__mmap_read_all(struct record *rec)
562 {
563 	int err;
564 
565 	err = record__mmap_read_evlist(rec, rec->evlist, false);
566 	if (err)
567 		return err;
568 
569 	return record__mmap_read_evlist(rec, rec->evlist, true);
570 }
571 
572 static void record__init_features(struct record *rec)
573 {
574 	struct perf_session *session = rec->session;
575 	int feat;
576 
577 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
578 		perf_header__set_feat(&session->header, feat);
579 
580 	if (rec->no_buildid)
581 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
582 
583 	if (!have_tracepoints(&rec->evlist->entries))
584 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
585 
586 	if (!rec->opts.branch_stack)
587 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
588 
589 	if (!rec->opts.full_auxtrace)
590 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
591 
592 	perf_header__clear_feat(&session->header, HEADER_STAT);
593 }
594 
595 static void
596 record__finish_output(struct record *rec)
597 {
598 	struct perf_data *data = &rec->data;
599 	int fd = perf_data__fd(data);
600 
601 	if (data->is_pipe)
602 		return;
603 
604 	rec->session->header.data_size += rec->bytes_written;
605 	data->size = lseek(perf_data__fd(data), 0, SEEK_CUR);
606 
607 	if (!rec->no_buildid) {
608 		process_buildids(rec);
609 
610 		if (rec->buildid_all)
611 			dsos__hit_all(rec->session);
612 	}
613 	perf_session__write_header(rec->session, rec->evlist, fd, true);
614 
615 	return;
616 }
617 
618 static int record__synthesize_workload(struct record *rec, bool tail)
619 {
620 	int err;
621 	struct thread_map *thread_map;
622 
623 	if (rec->opts.tail_synthesize != tail)
624 		return 0;
625 
626 	thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
627 	if (thread_map == NULL)
628 		return -1;
629 
630 	err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
631 						 process_synthesized_event,
632 						 &rec->session->machines.host,
633 						 rec->opts.sample_address,
634 						 rec->opts.proc_map_timeout);
635 	thread_map__put(thread_map);
636 	return err;
637 }
638 
639 static int record__synthesize(struct record *rec, bool tail);
640 
641 static int
642 record__switch_output(struct record *rec, bool at_exit)
643 {
644 	struct perf_data *data = &rec->data;
645 	int fd, err;
646 
647 	/* Same Size:      "2015122520103046"*/
648 	char timestamp[] = "InvalidTimestamp";
649 
650 	record__synthesize(rec, true);
651 	if (target__none(&rec->opts.target))
652 		record__synthesize_workload(rec, true);
653 
654 	rec->samples = 0;
655 	record__finish_output(rec);
656 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
657 	if (err) {
658 		pr_err("Failed to get current timestamp\n");
659 		return -EINVAL;
660 	}
661 
662 	fd = perf_data__switch(data, timestamp,
663 				    rec->session->header.data_offset,
664 				    at_exit);
665 	if (fd >= 0 && !at_exit) {
666 		rec->bytes_written = 0;
667 		rec->session->header.data_size = 0;
668 	}
669 
670 	if (!quiet)
671 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
672 			data->file.path, timestamp);
673 
674 	/* Output tracking events */
675 	if (!at_exit) {
676 		record__synthesize(rec, false);
677 
678 		/*
679 		 * In 'perf record --switch-output' without -a,
680 		 * record__synthesize() in record__switch_output() won't
681 		 * generate tracking events because there's no thread_map
682 		 * in evlist. Which causes newly created perf.data doesn't
683 		 * contain map and comm information.
684 		 * Create a fake thread_map and directly call
685 		 * perf_event__synthesize_thread_map() for those events.
686 		 */
687 		if (target__none(&rec->opts.target))
688 			record__synthesize_workload(rec, false);
689 	}
690 	return fd;
691 }
692 
693 static volatile int workload_exec_errno;
694 
695 /*
696  * perf_evlist__prepare_workload will send a SIGUSR1
697  * if the fork fails, since we asked by setting its
698  * want_signal to true.
699  */
700 static void workload_exec_failed_signal(int signo __maybe_unused,
701 					siginfo_t *info,
702 					void *ucontext __maybe_unused)
703 {
704 	workload_exec_errno = info->si_value.sival_int;
705 	done = 1;
706 	child_finished = 1;
707 }
708 
709 static void snapshot_sig_handler(int sig);
710 static void alarm_sig_handler(int sig);
711 
712 int __weak
713 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
714 			    struct perf_tool *tool __maybe_unused,
715 			    perf_event__handler_t process __maybe_unused,
716 			    struct machine *machine __maybe_unused)
717 {
718 	return 0;
719 }
720 
721 static const struct perf_event_mmap_page *
722 perf_evlist__pick_pc(struct perf_evlist *evlist)
723 {
724 	if (evlist) {
725 		if (evlist->mmap && evlist->mmap[0].base)
726 			return evlist->mmap[0].base;
727 		if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base)
728 			return evlist->overwrite_mmap[0].base;
729 	}
730 	return NULL;
731 }
732 
733 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
734 {
735 	const struct perf_event_mmap_page *pc;
736 
737 	pc = perf_evlist__pick_pc(rec->evlist);
738 	if (pc)
739 		return pc;
740 	return NULL;
741 }
742 
743 static int record__synthesize(struct record *rec, bool tail)
744 {
745 	struct perf_session *session = rec->session;
746 	struct machine *machine = &session->machines.host;
747 	struct perf_data *data = &rec->data;
748 	struct record_opts *opts = &rec->opts;
749 	struct perf_tool *tool = &rec->tool;
750 	int fd = perf_data__fd(data);
751 	int err = 0;
752 
753 	if (rec->opts.tail_synthesize != tail)
754 		return 0;
755 
756 	if (data->is_pipe) {
757 		err = perf_event__synthesize_features(
758 			tool, session, rec->evlist, process_synthesized_event);
759 		if (err < 0) {
760 			pr_err("Couldn't synthesize features.\n");
761 			return err;
762 		}
763 
764 		err = perf_event__synthesize_attrs(tool, session,
765 						   process_synthesized_event);
766 		if (err < 0) {
767 			pr_err("Couldn't synthesize attrs.\n");
768 			goto out;
769 		}
770 
771 		if (have_tracepoints(&rec->evlist->entries)) {
772 			/*
773 			 * FIXME err <= 0 here actually means that
774 			 * there were no tracepoints so its not really
775 			 * an error, just that we don't need to
776 			 * synthesize anything.  We really have to
777 			 * return this more properly and also
778 			 * propagate errors that now are calling die()
779 			 */
780 			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
781 								  process_synthesized_event);
782 			if (err <= 0) {
783 				pr_err("Couldn't record tracing data.\n");
784 				goto out;
785 			}
786 			rec->bytes_written += err;
787 		}
788 	}
789 
790 	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
791 					  process_synthesized_event, machine);
792 	if (err)
793 		goto out;
794 
795 	if (rec->opts.full_auxtrace) {
796 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
797 					session, process_synthesized_event);
798 		if (err)
799 			goto out;
800 	}
801 
802 	if (!perf_evlist__exclude_kernel(rec->evlist)) {
803 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
804 							 machine);
805 		WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
806 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
807 				   "Check /proc/kallsyms permission or run as root.\n");
808 
809 		err = perf_event__synthesize_modules(tool, process_synthesized_event,
810 						     machine);
811 		WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
812 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
813 				   "Check /proc/modules permission or run as root.\n");
814 	}
815 
816 	if (perf_guest) {
817 		machines__process_guests(&session->machines,
818 					 perf_event__synthesize_guest_os, tool);
819 	}
820 
821 	err = perf_event__synthesize_extra_attr(&rec->tool,
822 						rec->evlist,
823 						process_synthesized_event,
824 						data->is_pipe);
825 	if (err)
826 		goto out;
827 
828 	err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads,
829 						 process_synthesized_event,
830 						NULL);
831 	if (err < 0) {
832 		pr_err("Couldn't synthesize thread map.\n");
833 		return err;
834 	}
835 
836 	err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus,
837 					     process_synthesized_event, NULL);
838 	if (err < 0) {
839 		pr_err("Couldn't synthesize cpu map.\n");
840 		return err;
841 	}
842 
843 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
844 					    process_synthesized_event, opts->sample_address,
845 					    opts->proc_map_timeout, 1);
846 out:
847 	return err;
848 }
849 
850 static int __cmd_record(struct record *rec, int argc, const char **argv)
851 {
852 	int err;
853 	int status = 0;
854 	unsigned long waking = 0;
855 	const bool forks = argc > 0;
856 	struct perf_tool *tool = &rec->tool;
857 	struct record_opts *opts = &rec->opts;
858 	struct perf_data *data = &rec->data;
859 	struct perf_session *session;
860 	bool disabled = false, draining = false;
861 	int fd;
862 
863 	atexit(record__sig_exit);
864 	signal(SIGCHLD, sig_handler);
865 	signal(SIGINT, sig_handler);
866 	signal(SIGTERM, sig_handler);
867 	signal(SIGSEGV, sigsegv_handler);
868 
869 	if (rec->opts.record_namespaces)
870 		tool->namespace_events = true;
871 
872 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
873 		signal(SIGUSR2, snapshot_sig_handler);
874 		if (rec->opts.auxtrace_snapshot_mode)
875 			trigger_on(&auxtrace_snapshot_trigger);
876 		if (rec->switch_output.enabled)
877 			trigger_on(&switch_output_trigger);
878 	} else {
879 		signal(SIGUSR2, SIG_IGN);
880 	}
881 
882 	session = perf_session__new(data, false, tool);
883 	if (session == NULL) {
884 		pr_err("Perf session creation failed.\n");
885 		return -1;
886 	}
887 
888 	fd = perf_data__fd(data);
889 	rec->session = session;
890 
891 	record__init_features(rec);
892 
893 	if (forks) {
894 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
895 						    argv, data->is_pipe,
896 						    workload_exec_failed_signal);
897 		if (err < 0) {
898 			pr_err("Couldn't run the workload!\n");
899 			status = err;
900 			goto out_delete_session;
901 		}
902 	}
903 
904 	/*
905 	 * If we have just single event and are sending data
906 	 * through pipe, we need to force the ids allocation,
907 	 * because we synthesize event name through the pipe
908 	 * and need the id for that.
909 	 */
910 	if (data->is_pipe && rec->evlist->nr_entries == 1)
911 		rec->opts.sample_id = true;
912 
913 	if (record__open(rec) != 0) {
914 		err = -1;
915 		goto out_child;
916 	}
917 
918 	err = bpf__apply_obj_config();
919 	if (err) {
920 		char errbuf[BUFSIZ];
921 
922 		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
923 		pr_err("ERROR: Apply config to BPF failed: %s\n",
924 			 errbuf);
925 		goto out_child;
926 	}
927 
928 	/*
929 	 * Normally perf_session__new would do this, but it doesn't have the
930 	 * evlist.
931 	 */
932 	if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
933 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
934 		rec->tool.ordered_events = false;
935 	}
936 
937 	if (!rec->evlist->nr_groups)
938 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
939 
940 	if (data->is_pipe) {
941 		err = perf_header__write_pipe(fd);
942 		if (err < 0)
943 			goto out_child;
944 	} else {
945 		err = perf_session__write_header(session, rec->evlist, fd, false);
946 		if (err < 0)
947 			goto out_child;
948 	}
949 
950 	if (!rec->no_buildid
951 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
952 		pr_err("Couldn't generate buildids. "
953 		       "Use --no-buildid to profile anyway.\n");
954 		err = -1;
955 		goto out_child;
956 	}
957 
958 	err = record__synthesize(rec, false);
959 	if (err < 0)
960 		goto out_child;
961 
962 	if (rec->realtime_prio) {
963 		struct sched_param param;
964 
965 		param.sched_priority = rec->realtime_prio;
966 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
967 			pr_err("Could not set realtime priority.\n");
968 			err = -1;
969 			goto out_child;
970 		}
971 	}
972 
973 	/*
974 	 * When perf is starting the traced process, all the events
975 	 * (apart from group members) have enable_on_exec=1 set,
976 	 * so don't spoil it by prematurely enabling them.
977 	 */
978 	if (!target__none(&opts->target) && !opts->initial_delay)
979 		perf_evlist__enable(rec->evlist);
980 
981 	/*
982 	 * Let the child rip
983 	 */
984 	if (forks) {
985 		struct machine *machine = &session->machines.host;
986 		union perf_event *event;
987 		pid_t tgid;
988 
989 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
990 		if (event == NULL) {
991 			err = -ENOMEM;
992 			goto out_child;
993 		}
994 
995 		/*
996 		 * Some H/W events are generated before COMM event
997 		 * which is emitted during exec(), so perf script
998 		 * cannot see a correct process name for those events.
999 		 * Synthesize COMM event to prevent it.
1000 		 */
1001 		tgid = perf_event__synthesize_comm(tool, event,
1002 						   rec->evlist->workload.pid,
1003 						   process_synthesized_event,
1004 						   machine);
1005 		free(event);
1006 
1007 		if (tgid == -1)
1008 			goto out_child;
1009 
1010 		event = malloc(sizeof(event->namespaces) +
1011 			       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1012 			       machine->id_hdr_size);
1013 		if (event == NULL) {
1014 			err = -ENOMEM;
1015 			goto out_child;
1016 		}
1017 
1018 		/*
1019 		 * Synthesize NAMESPACES event for the command specified.
1020 		 */
1021 		perf_event__synthesize_namespaces(tool, event,
1022 						  rec->evlist->workload.pid,
1023 						  tgid, process_synthesized_event,
1024 						  machine);
1025 		free(event);
1026 
1027 		perf_evlist__start_workload(rec->evlist);
1028 	}
1029 
1030 	if (opts->initial_delay) {
1031 		usleep(opts->initial_delay * USEC_PER_MSEC);
1032 		perf_evlist__enable(rec->evlist);
1033 	}
1034 
1035 	trigger_ready(&auxtrace_snapshot_trigger);
1036 	trigger_ready(&switch_output_trigger);
1037 	perf_hooks__invoke_record_start();
1038 	for (;;) {
1039 		unsigned long long hits = rec->samples;
1040 
1041 		/*
1042 		 * rec->evlist->bkw_mmap_state is possible to be
1043 		 * BKW_MMAP_EMPTY here: when done == true and
1044 		 * hits != rec->samples in previous round.
1045 		 *
1046 		 * perf_evlist__toggle_bkw_mmap ensure we never
1047 		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1048 		 */
1049 		if (trigger_is_hit(&switch_output_trigger) || done || draining)
1050 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1051 
1052 		if (record__mmap_read_all(rec) < 0) {
1053 			trigger_error(&auxtrace_snapshot_trigger);
1054 			trigger_error(&switch_output_trigger);
1055 			err = -1;
1056 			goto out_child;
1057 		}
1058 
1059 		if (auxtrace_record__snapshot_started) {
1060 			auxtrace_record__snapshot_started = 0;
1061 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
1062 				record__read_auxtrace_snapshot(rec);
1063 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1064 				pr_err("AUX area tracing snapshot failed\n");
1065 				err = -1;
1066 				goto out_child;
1067 			}
1068 		}
1069 
1070 		if (trigger_is_hit(&switch_output_trigger)) {
1071 			/*
1072 			 * If switch_output_trigger is hit, the data in
1073 			 * overwritable ring buffer should have been collected,
1074 			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1075 			 *
1076 			 * If SIGUSR2 raise after or during record__mmap_read_all(),
1077 			 * record__mmap_read_all() didn't collect data from
1078 			 * overwritable ring buffer. Read again.
1079 			 */
1080 			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1081 				continue;
1082 			trigger_ready(&switch_output_trigger);
1083 
1084 			/*
1085 			 * Reenable events in overwrite ring buffer after
1086 			 * record__mmap_read_all(): we should have collected
1087 			 * data from it.
1088 			 */
1089 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1090 
1091 			if (!quiet)
1092 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1093 					waking);
1094 			waking = 0;
1095 			fd = record__switch_output(rec, false);
1096 			if (fd < 0) {
1097 				pr_err("Failed to switch to new file\n");
1098 				trigger_error(&switch_output_trigger);
1099 				err = fd;
1100 				goto out_child;
1101 			}
1102 
1103 			/* re-arm the alarm */
1104 			if (rec->switch_output.time)
1105 				alarm(rec->switch_output.time);
1106 		}
1107 
1108 		if (hits == rec->samples) {
1109 			if (done || draining)
1110 				break;
1111 			err = perf_evlist__poll(rec->evlist, -1);
1112 			/*
1113 			 * Propagate error, only if there's any. Ignore positive
1114 			 * number of returned events and interrupt error.
1115 			 */
1116 			if (err > 0 || (err < 0 && errno == EINTR))
1117 				err = 0;
1118 			waking++;
1119 
1120 			if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1121 				draining = true;
1122 		}
1123 
1124 		/*
1125 		 * When perf is starting the traced process, at the end events
1126 		 * die with the process and we wait for that. Thus no need to
1127 		 * disable events in this case.
1128 		 */
1129 		if (done && !disabled && !target__none(&opts->target)) {
1130 			trigger_off(&auxtrace_snapshot_trigger);
1131 			perf_evlist__disable(rec->evlist);
1132 			disabled = true;
1133 		}
1134 	}
1135 	trigger_off(&auxtrace_snapshot_trigger);
1136 	trigger_off(&switch_output_trigger);
1137 
1138 	if (forks && workload_exec_errno) {
1139 		char msg[STRERR_BUFSIZE];
1140 		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1141 		pr_err("Workload failed: %s\n", emsg);
1142 		err = -1;
1143 		goto out_child;
1144 	}
1145 
1146 	if (!quiet)
1147 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1148 
1149 	if (target__none(&rec->opts.target))
1150 		record__synthesize_workload(rec, true);
1151 
1152 out_child:
1153 	if (forks) {
1154 		int exit_status;
1155 
1156 		if (!child_finished)
1157 			kill(rec->evlist->workload.pid, SIGTERM);
1158 
1159 		wait(&exit_status);
1160 
1161 		if (err < 0)
1162 			status = err;
1163 		else if (WIFEXITED(exit_status))
1164 			status = WEXITSTATUS(exit_status);
1165 		else if (WIFSIGNALED(exit_status))
1166 			signr = WTERMSIG(exit_status);
1167 	} else
1168 		status = err;
1169 
1170 	record__synthesize(rec, true);
1171 	/* this will be recalculated during process_buildids() */
1172 	rec->samples = 0;
1173 
1174 	if (!err) {
1175 		if (!rec->timestamp_filename) {
1176 			record__finish_output(rec);
1177 		} else {
1178 			fd = record__switch_output(rec, true);
1179 			if (fd < 0) {
1180 				status = fd;
1181 				goto out_delete_session;
1182 			}
1183 		}
1184 	}
1185 
1186 	perf_hooks__invoke_record_end();
1187 
1188 	if (!err && !quiet) {
1189 		char samples[128];
1190 		const char *postfix = rec->timestamp_filename ?
1191 					".<timestamp>" : "";
1192 
1193 		if (rec->samples && !rec->opts.full_auxtrace)
1194 			scnprintf(samples, sizeof(samples),
1195 				  " (%" PRIu64 " samples)", rec->samples);
1196 		else
1197 			samples[0] = '\0';
1198 
1199 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1200 			perf_data__size(data) / 1024.0 / 1024.0,
1201 			data->file.path, postfix, samples);
1202 	}
1203 
1204 out_delete_session:
1205 	perf_session__delete(session);
1206 	return status;
1207 }
1208 
1209 static void callchain_debug(struct callchain_param *callchain)
1210 {
1211 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1212 
1213 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1214 
1215 	if (callchain->record_mode == CALLCHAIN_DWARF)
1216 		pr_debug("callchain: stack dump size %d\n",
1217 			 callchain->dump_size);
1218 }
1219 
1220 int record_opts__parse_callchain(struct record_opts *record,
1221 				 struct callchain_param *callchain,
1222 				 const char *arg, bool unset)
1223 {
1224 	int ret;
1225 	callchain->enabled = !unset;
1226 
1227 	/* --no-call-graph */
1228 	if (unset) {
1229 		callchain->record_mode = CALLCHAIN_NONE;
1230 		pr_debug("callchain: disabled\n");
1231 		return 0;
1232 	}
1233 
1234 	ret = parse_callchain_record_opt(arg, callchain);
1235 	if (!ret) {
1236 		/* Enable data address sampling for DWARF unwind. */
1237 		if (callchain->record_mode == CALLCHAIN_DWARF)
1238 			record->sample_address = true;
1239 		callchain_debug(callchain);
1240 	}
1241 
1242 	return ret;
1243 }
1244 
1245 int record_parse_callchain_opt(const struct option *opt,
1246 			       const char *arg,
1247 			       int unset)
1248 {
1249 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1250 }
1251 
1252 int record_callchain_opt(const struct option *opt,
1253 			 const char *arg __maybe_unused,
1254 			 int unset __maybe_unused)
1255 {
1256 	struct callchain_param *callchain = opt->value;
1257 
1258 	callchain->enabled = true;
1259 
1260 	if (callchain->record_mode == CALLCHAIN_NONE)
1261 		callchain->record_mode = CALLCHAIN_FP;
1262 
1263 	callchain_debug(callchain);
1264 	return 0;
1265 }
1266 
1267 static int perf_record_config(const char *var, const char *value, void *cb)
1268 {
1269 	struct record *rec = cb;
1270 
1271 	if (!strcmp(var, "record.build-id")) {
1272 		if (!strcmp(value, "cache"))
1273 			rec->no_buildid_cache = false;
1274 		else if (!strcmp(value, "no-cache"))
1275 			rec->no_buildid_cache = true;
1276 		else if (!strcmp(value, "skip"))
1277 			rec->no_buildid = true;
1278 		else
1279 			return -1;
1280 		return 0;
1281 	}
1282 	if (!strcmp(var, "record.call-graph"))
1283 		var = "call-graph.record-mode"; /* fall-through */
1284 
1285 	return perf_default_config(var, value, cb);
1286 }
1287 
1288 struct clockid_map {
1289 	const char *name;
1290 	int clockid;
1291 };
1292 
1293 #define CLOCKID_MAP(n, c)	\
1294 	{ .name = n, .clockid = (c), }
1295 
1296 #define CLOCKID_END	{ .name = NULL, }
1297 
1298 
1299 /*
1300  * Add the missing ones, we need to build on many distros...
1301  */
1302 #ifndef CLOCK_MONOTONIC_RAW
1303 #define CLOCK_MONOTONIC_RAW 4
1304 #endif
1305 #ifndef CLOCK_BOOTTIME
1306 #define CLOCK_BOOTTIME 7
1307 #endif
1308 #ifndef CLOCK_TAI
1309 #define CLOCK_TAI 11
1310 #endif
1311 
1312 static const struct clockid_map clockids[] = {
1313 	/* available for all events, NMI safe */
1314 	CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1315 	CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1316 
1317 	/* available for some events */
1318 	CLOCKID_MAP("realtime", CLOCK_REALTIME),
1319 	CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1320 	CLOCKID_MAP("tai", CLOCK_TAI),
1321 
1322 	/* available for the lazy */
1323 	CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1324 	CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1325 	CLOCKID_MAP("real", CLOCK_REALTIME),
1326 	CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1327 
1328 	CLOCKID_END,
1329 };
1330 
1331 static int parse_clockid(const struct option *opt, const char *str, int unset)
1332 {
1333 	struct record_opts *opts = (struct record_opts *)opt->value;
1334 	const struct clockid_map *cm;
1335 	const char *ostr = str;
1336 
1337 	if (unset) {
1338 		opts->use_clockid = 0;
1339 		return 0;
1340 	}
1341 
1342 	/* no arg passed */
1343 	if (!str)
1344 		return 0;
1345 
1346 	/* no setting it twice */
1347 	if (opts->use_clockid)
1348 		return -1;
1349 
1350 	opts->use_clockid = true;
1351 
1352 	/* if its a number, we're done */
1353 	if (sscanf(str, "%d", &opts->clockid) == 1)
1354 		return 0;
1355 
1356 	/* allow a "CLOCK_" prefix to the name */
1357 	if (!strncasecmp(str, "CLOCK_", 6))
1358 		str += 6;
1359 
1360 	for (cm = clockids; cm->name; cm++) {
1361 		if (!strcasecmp(str, cm->name)) {
1362 			opts->clockid = cm->clockid;
1363 			return 0;
1364 		}
1365 	}
1366 
1367 	opts->use_clockid = false;
1368 	ui__warning("unknown clockid %s, check man page\n", ostr);
1369 	return -1;
1370 }
1371 
1372 static int record__parse_mmap_pages(const struct option *opt,
1373 				    const char *str,
1374 				    int unset __maybe_unused)
1375 {
1376 	struct record_opts *opts = opt->value;
1377 	char *s, *p;
1378 	unsigned int mmap_pages;
1379 	int ret;
1380 
1381 	if (!str)
1382 		return -EINVAL;
1383 
1384 	s = strdup(str);
1385 	if (!s)
1386 		return -ENOMEM;
1387 
1388 	p = strchr(s, ',');
1389 	if (p)
1390 		*p = '\0';
1391 
1392 	if (*s) {
1393 		ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1394 		if (ret)
1395 			goto out_free;
1396 		opts->mmap_pages = mmap_pages;
1397 	}
1398 
1399 	if (!p) {
1400 		ret = 0;
1401 		goto out_free;
1402 	}
1403 
1404 	ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1405 	if (ret)
1406 		goto out_free;
1407 
1408 	opts->auxtrace_mmap_pages = mmap_pages;
1409 
1410 out_free:
1411 	free(s);
1412 	return ret;
1413 }
1414 
1415 static void switch_output_size_warn(struct record *rec)
1416 {
1417 	u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1418 	struct switch_output *s = &rec->switch_output;
1419 
1420 	wakeup_size /= 2;
1421 
1422 	if (s->size < wakeup_size) {
1423 		char buf[100];
1424 
1425 		unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1426 		pr_warning("WARNING: switch-output data size lower than "
1427 			   "wakeup kernel buffer size (%s) "
1428 			   "expect bigger perf.data sizes\n", buf);
1429 	}
1430 }
1431 
1432 static int switch_output_setup(struct record *rec)
1433 {
1434 	struct switch_output *s = &rec->switch_output;
1435 	static struct parse_tag tags_size[] = {
1436 		{ .tag  = 'B', .mult = 1       },
1437 		{ .tag  = 'K', .mult = 1 << 10 },
1438 		{ .tag  = 'M', .mult = 1 << 20 },
1439 		{ .tag  = 'G', .mult = 1 << 30 },
1440 		{ .tag  = 0 },
1441 	};
1442 	static struct parse_tag tags_time[] = {
1443 		{ .tag  = 's', .mult = 1        },
1444 		{ .tag  = 'm', .mult = 60       },
1445 		{ .tag  = 'h', .mult = 60*60    },
1446 		{ .tag  = 'd', .mult = 60*60*24 },
1447 		{ .tag  = 0 },
1448 	};
1449 	unsigned long val;
1450 
1451 	if (!s->set)
1452 		return 0;
1453 
1454 	if (!strcmp(s->str, "signal")) {
1455 		s->signal = true;
1456 		pr_debug("switch-output with SIGUSR2 signal\n");
1457 		goto enabled;
1458 	}
1459 
1460 	val = parse_tag_value(s->str, tags_size);
1461 	if (val != (unsigned long) -1) {
1462 		s->size = val;
1463 		pr_debug("switch-output with %s size threshold\n", s->str);
1464 		goto enabled;
1465 	}
1466 
1467 	val = parse_tag_value(s->str, tags_time);
1468 	if (val != (unsigned long) -1) {
1469 		s->time = val;
1470 		pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1471 			 s->str, s->time);
1472 		goto enabled;
1473 	}
1474 
1475 	return -1;
1476 
1477 enabled:
1478 	rec->timestamp_filename = true;
1479 	s->enabled              = true;
1480 
1481 	if (s->size && !rec->opts.no_buffering)
1482 		switch_output_size_warn(rec);
1483 
1484 	return 0;
1485 }
1486 
1487 static const char * const __record_usage[] = {
1488 	"perf record [<options>] [<command>]",
1489 	"perf record [<options>] -- <command> [<options>]",
1490 	NULL
1491 };
1492 const char * const *record_usage = __record_usage;
1493 
1494 /*
1495  * XXX Ideally would be local to cmd_record() and passed to a record__new
1496  * because we need to have access to it in record__exit, that is called
1497  * after cmd_record() exits, but since record_options need to be accessible to
1498  * builtin-script, leave it here.
1499  *
1500  * At least we don't ouch it in all the other functions here directly.
1501  *
1502  * Just say no to tons of global variables, sigh.
1503  */
1504 static struct record record = {
1505 	.opts = {
1506 		.sample_time	     = true,
1507 		.mmap_pages	     = UINT_MAX,
1508 		.user_freq	     = UINT_MAX,
1509 		.user_interval	     = ULLONG_MAX,
1510 		.freq		     = 4000,
1511 		.target		     = {
1512 			.uses_mmap   = true,
1513 			.default_per_cpu = true,
1514 		},
1515 		.proc_map_timeout     = 500,
1516 	},
1517 	.tool = {
1518 		.sample		= process_sample_event,
1519 		.fork		= perf_event__process_fork,
1520 		.exit		= perf_event__process_exit,
1521 		.comm		= perf_event__process_comm,
1522 		.namespaces	= perf_event__process_namespaces,
1523 		.mmap		= perf_event__process_mmap,
1524 		.mmap2		= perf_event__process_mmap2,
1525 		.ordered_events	= true,
1526 	},
1527 };
1528 
1529 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1530 	"\n\t\t\t\tDefault: fp";
1531 
1532 static bool dry_run;
1533 
1534 /*
1535  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1536  * with it and switch to use the library functions in perf_evlist that came
1537  * from builtin-record.c, i.e. use record_opts,
1538  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1539  * using pipes, etc.
1540  */
1541 static struct option __record_options[] = {
1542 	OPT_CALLBACK('e', "event", &record.evlist, "event",
1543 		     "event selector. use 'perf list' to list available events",
1544 		     parse_events_option),
1545 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1546 		     "event filter", parse_filter),
1547 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1548 			   NULL, "don't record events from perf itself",
1549 			   exclude_perf),
1550 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1551 		    "record events on existing process id"),
1552 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1553 		    "record events on existing thread id"),
1554 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
1555 		    "collect data with this RT SCHED_FIFO priority"),
1556 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1557 		    "collect data without buffering"),
1558 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1559 		    "collect raw sample records from all opened counters"),
1560 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1561 			    "system-wide collection from all CPUs"),
1562 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1563 		    "list of cpus to monitor"),
1564 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1565 	OPT_STRING('o', "output", &record.data.file.path, "file",
1566 		    "output file name"),
1567 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1568 			&record.opts.no_inherit_set,
1569 			"child tasks do not inherit counters"),
1570 	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1571 		    "synthesize non-sample events at the end of output"),
1572 	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1573 	OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
1574 		    "Fail if the specified frequency can't be used"),
1575 	OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
1576 		     "profile at this frequency",
1577 		      record__parse_freq),
1578 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1579 		     "number of mmap data pages and AUX area tracing mmap pages",
1580 		     record__parse_mmap_pages),
1581 	OPT_BOOLEAN(0, "group", &record.opts.group,
1582 		    "put the counters into a counter group"),
1583 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1584 			   NULL, "enables call-graph recording" ,
1585 			   &record_callchain_opt),
1586 	OPT_CALLBACK(0, "call-graph", &record.opts,
1587 		     "record_mode[,record_size]", record_callchain_help,
1588 		     &record_parse_callchain_opt),
1589 	OPT_INCR('v', "verbose", &verbose,
1590 		    "be more verbose (show counter open errors, etc)"),
1591 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1592 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1593 		    "per thread counts"),
1594 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1595 	OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
1596 		    "Record the sample physical addresses"),
1597 	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1598 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1599 			&record.opts.sample_time_set,
1600 			"Record the sample timestamps"),
1601 	OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
1602 			"Record the sample period"),
1603 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1604 		    "don't sample"),
1605 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1606 			&record.no_buildid_cache_set,
1607 			"do not update the buildid cache"),
1608 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1609 			&record.no_buildid_set,
1610 			"do not collect buildids in perf.data"),
1611 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1612 		     "monitor event in cgroup name only",
1613 		     parse_cgroups),
1614 	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1615 		  "ms to wait before starting measurement after program start"),
1616 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1617 		   "user to profile"),
1618 
1619 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1620 		     "branch any", "sample any taken branches",
1621 		     parse_branch_stack),
1622 
1623 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1624 		     "branch filter mask", "branch stack filter modes",
1625 		     parse_branch_stack),
1626 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1627 		    "sample by weight (on special events only)"),
1628 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1629 		    "sample transaction flags (special events only)"),
1630 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1631 		    "use per-thread mmaps"),
1632 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1633 		    "sample selected machine registers on interrupt,"
1634 		    " use -I ? to list register names", parse_regs),
1635 	OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
1636 		    "sample selected machine registers on interrupt,"
1637 		    " use -I ? to list register names", parse_regs),
1638 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1639 		    "Record running/enabled time of read (:S) events"),
1640 	OPT_CALLBACK('k', "clockid", &record.opts,
1641 	"clockid", "clockid to use for events, see clock_gettime()",
1642 	parse_clockid),
1643 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1644 			  "opts", "AUX area tracing Snapshot Mode", ""),
1645 	OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1646 			"per thread proc mmap processing timeout in ms"),
1647 	OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
1648 		    "Record namespaces events"),
1649 	OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1650 		    "Record context switch events"),
1651 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1652 			 "Configure all used events to run in kernel space.",
1653 			 PARSE_OPT_EXCLUSIVE),
1654 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1655 			 "Configure all used events to run in user space.",
1656 			 PARSE_OPT_EXCLUSIVE),
1657 	OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1658 		   "clang binary to use for compiling BPF scriptlets"),
1659 	OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1660 		   "options passed to clang when compiling BPF scriptlets"),
1661 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1662 		   "file", "vmlinux pathname"),
1663 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1664 		    "Record build-id of all DSOs regardless of hits"),
1665 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1666 		    "append timestamp to output filename"),
1667 	OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
1668 		    "Record timestamp boundary (time of first/last samples)"),
1669 	OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
1670 			  &record.switch_output.set, "signal,size,time",
1671 			  "Switch output when receive SIGUSR2 or cross size,time threshold",
1672 			  "signal"),
1673 	OPT_BOOLEAN(0, "dry-run", &dry_run,
1674 		    "Parse options then exit"),
1675 	OPT_END()
1676 };
1677 
1678 struct option *record_options = __record_options;
1679 
1680 int cmd_record(int argc, const char **argv)
1681 {
1682 	int err;
1683 	struct record *rec = &record;
1684 	char errbuf[BUFSIZ];
1685 
1686 	setlocale(LC_ALL, "");
1687 
1688 #ifndef HAVE_LIBBPF_SUPPORT
1689 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1690 	set_nobuild('\0', "clang-path", true);
1691 	set_nobuild('\0', "clang-opt", true);
1692 # undef set_nobuild
1693 #endif
1694 
1695 #ifndef HAVE_BPF_PROLOGUE
1696 # if !defined (HAVE_DWARF_SUPPORT)
1697 #  define REASON  "NO_DWARF=1"
1698 # elif !defined (HAVE_LIBBPF_SUPPORT)
1699 #  define REASON  "NO_LIBBPF=1"
1700 # else
1701 #  define REASON  "this architecture doesn't support BPF prologue"
1702 # endif
1703 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1704 	set_nobuild('\0', "vmlinux", true);
1705 # undef set_nobuild
1706 # undef REASON
1707 #endif
1708 
1709 	rec->evlist = perf_evlist__new();
1710 	if (rec->evlist == NULL)
1711 		return -ENOMEM;
1712 
1713 	err = perf_config(perf_record_config, rec);
1714 	if (err)
1715 		return err;
1716 
1717 	argc = parse_options(argc, argv, record_options, record_usage,
1718 			    PARSE_OPT_STOP_AT_NON_OPTION);
1719 	if (quiet)
1720 		perf_quiet_option();
1721 
1722 	/* Make system wide (-a) the default target. */
1723 	if (!argc && target__none(&rec->opts.target))
1724 		rec->opts.target.system_wide = true;
1725 
1726 	if (nr_cgroups && !rec->opts.target.system_wide) {
1727 		usage_with_options_msg(record_usage, record_options,
1728 			"cgroup monitoring only available in system-wide mode");
1729 
1730 	}
1731 	if (rec->opts.record_switch_events &&
1732 	    !perf_can_record_switch_events()) {
1733 		ui__error("kernel does not support recording context switch events\n");
1734 		parse_options_usage(record_usage, record_options, "switch-events", 0);
1735 		return -EINVAL;
1736 	}
1737 
1738 	if (switch_output_setup(rec)) {
1739 		parse_options_usage(record_usage, record_options, "switch-output", 0);
1740 		return -EINVAL;
1741 	}
1742 
1743 	if (rec->switch_output.time) {
1744 		signal(SIGALRM, alarm_sig_handler);
1745 		alarm(rec->switch_output.time);
1746 	}
1747 
1748 	/*
1749 	 * Allow aliases to facilitate the lookup of symbols for address
1750 	 * filters. Refer to auxtrace_parse_filters().
1751 	 */
1752 	symbol_conf.allow_aliases = true;
1753 
1754 	symbol__init(NULL);
1755 
1756 	err = record__auxtrace_init(rec);
1757 	if (err)
1758 		goto out;
1759 
1760 	if (dry_run)
1761 		goto out;
1762 
1763 	err = bpf__setup_stdout(rec->evlist);
1764 	if (err) {
1765 		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1766 		pr_err("ERROR: Setup BPF stdout failed: %s\n",
1767 			 errbuf);
1768 		goto out;
1769 	}
1770 
1771 	err = -ENOMEM;
1772 
1773 	if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist))
1774 		pr_warning(
1775 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1776 "check /proc/sys/kernel/kptr_restrict.\n\n"
1777 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1778 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1779 "Samples in kernel modules won't be resolved at all.\n\n"
1780 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1781 "even with a suitable vmlinux or kallsyms file.\n\n");
1782 
1783 	if (rec->no_buildid_cache || rec->no_buildid) {
1784 		disable_buildid_cache();
1785 	} else if (rec->switch_output.enabled) {
1786 		/*
1787 		 * In 'perf record --switch-output', disable buildid
1788 		 * generation by default to reduce data file switching
1789 		 * overhead. Still generate buildid if they are required
1790 		 * explicitly using
1791 		 *
1792 		 *  perf record --switch-output --no-no-buildid \
1793 		 *              --no-no-buildid-cache
1794 		 *
1795 		 * Following code equals to:
1796 		 *
1797 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
1798 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1799 		 *         disable_buildid_cache();
1800 		 */
1801 		bool disable = true;
1802 
1803 		if (rec->no_buildid_set && !rec->no_buildid)
1804 			disable = false;
1805 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1806 			disable = false;
1807 		if (disable) {
1808 			rec->no_buildid = true;
1809 			rec->no_buildid_cache = true;
1810 			disable_buildid_cache();
1811 		}
1812 	}
1813 
1814 	if (record.opts.overwrite)
1815 		record.opts.tail_synthesize = true;
1816 
1817 	if (rec->evlist->nr_entries == 0 &&
1818 	    __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
1819 		pr_err("Not enough memory for event selector list\n");
1820 		goto out;
1821 	}
1822 
1823 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1824 		rec->opts.no_inherit = true;
1825 
1826 	err = target__validate(&rec->opts.target);
1827 	if (err) {
1828 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1829 		ui__warning("%s\n", errbuf);
1830 	}
1831 
1832 	err = target__parse_uid(&rec->opts.target);
1833 	if (err) {
1834 		int saved_errno = errno;
1835 
1836 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1837 		ui__error("%s", errbuf);
1838 
1839 		err = -saved_errno;
1840 		goto out;
1841 	}
1842 
1843 	/* Enable ignoring missing threads when -u/-p option is defined. */
1844 	rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
1845 
1846 	err = -ENOMEM;
1847 	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1848 		usage_with_options(record_usage, record_options);
1849 
1850 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1851 	if (err)
1852 		goto out;
1853 
1854 	/*
1855 	 * We take all buildids when the file contains
1856 	 * AUX area tracing data because we do not decode the
1857 	 * trace because it would take too long.
1858 	 */
1859 	if (rec->opts.full_auxtrace)
1860 		rec->buildid_all = true;
1861 
1862 	if (record_opts__config(&rec->opts)) {
1863 		err = -EINVAL;
1864 		goto out;
1865 	}
1866 
1867 	err = __cmd_record(&record, argc, argv);
1868 out:
1869 	perf_evlist__delete(rec->evlist);
1870 	symbol__exit();
1871 	auxtrace_record__free(rec->itr);
1872 	return err;
1873 }
1874 
1875 static void snapshot_sig_handler(int sig __maybe_unused)
1876 {
1877 	struct record *rec = &record;
1878 
1879 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1880 		trigger_hit(&auxtrace_snapshot_trigger);
1881 		auxtrace_record__snapshot_started = 1;
1882 		if (auxtrace_record__snapshot_start(record.itr))
1883 			trigger_error(&auxtrace_snapshot_trigger);
1884 	}
1885 
1886 	if (switch_output_signal(rec))
1887 		trigger_hit(&switch_output_trigger);
1888 }
1889 
1890 static void alarm_sig_handler(int sig __maybe_unused)
1891 {
1892 	struct record *rec = &record;
1893 
1894 	if (switch_output_time(rec))
1895 		trigger_hit(&switch_output_trigger);
1896 }
1897