xref: /linux/tools/perf/builtin-record.c (revision ec714e371f22f716a04e6ecb2a24988c92b26911)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10 
11 #include "util/build-id.h"
12 #include <subcmd/parse-options.h>
13 #include <internal/xyarray.h>
14 #include "util/parse-events.h"
15 #include "util/config.h"
16 
17 #include "util/callchain.h"
18 #include "util/cgroup.h"
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/mmap.h"
25 #include "util/mutex.h"
26 #include "util/target.h"
27 #include "util/session.h"
28 #include "util/tool.h"
29 #include "util/stat.h"
30 #include "util/symbol.h"
31 #include "util/record.h"
32 #include "util/cpumap.h"
33 #include "util/thread_map.h"
34 #include "util/data.h"
35 #include "util/perf_regs.h"
36 #include "util/auxtrace.h"
37 #include "util/tsc.h"
38 #include "util/parse-branch-options.h"
39 #include "util/parse-regs-options.h"
40 #include "util/perf_api_probe.h"
41 #include "util/trigger.h"
42 #include "util/perf-hooks.h"
43 #include "util/cpu-set-sched.h"
44 #include "util/synthetic-events.h"
45 #include "util/time-utils.h"
46 #include "util/units.h"
47 #include "util/bpf-event.h"
48 #include "util/util.h"
49 #include "util/pfm.h"
50 #include "util/pmu.h"
51 #include "util/pmus.h"
52 #include "util/clockid.h"
53 #include "util/off_cpu.h"
54 #include "util/bpf-filter.h"
55 #include "util/strbuf.h"
56 #include "asm/bug.h"
57 #include "perf.h"
58 #include "cputopo.h"
59 
60 #include <errno.h>
61 #include <inttypes.h>
62 #include <locale.h>
63 #include <poll.h>
64 #include <pthread.h>
65 #include <unistd.h>
66 #ifndef HAVE_GETTID
67 #include <syscall.h>
68 #endif
69 #include <sched.h>
70 #include <signal.h>
71 #ifdef HAVE_EVENTFD_SUPPORT
72 #include <sys/eventfd.h>
73 #endif
74 #include <sys/mman.h>
75 #include <sys/wait.h>
76 #include <sys/types.h>
77 #include <sys/stat.h>
78 #include <fcntl.h>
79 #include <linux/err.h>
80 #include <linux/string.h>
81 #include <linux/time64.h>
82 #include <linux/zalloc.h>
83 #include <linux/bitmap.h>
84 #include <sys/time.h>
85 
86 struct switch_output {
87 	bool		 enabled;
88 	bool		 signal;
89 	unsigned long	 size;
90 	unsigned long	 time;
91 	const char	*str;
92 	bool		 set;
93 	char		 **filenames;
94 	int		 num_files;
95 	int		 cur_file;
96 };
97 
98 struct thread_mask {
99 	struct mmap_cpu_mask	maps;
100 	struct mmap_cpu_mask	affinity;
101 };
102 
103 struct record_thread {
104 	pid_t			tid;
105 	struct thread_mask	*mask;
106 	struct {
107 		int		msg[2];
108 		int		ack[2];
109 	} pipes;
110 	struct fdarray		pollfd;
111 	int			ctlfd_pos;
112 	int			nr_mmaps;
113 	struct mmap		**maps;
114 	struct mmap		**overwrite_maps;
115 	struct record		*rec;
116 	unsigned long long	samples;
117 	unsigned long		waking;
118 	u64			bytes_written;
119 	u64			bytes_transferred;
120 	u64			bytes_compressed;
121 };
122 
123 static __thread struct record_thread *thread;
124 
125 enum thread_msg {
126 	THREAD_MSG__UNDEFINED = 0,
127 	THREAD_MSG__READY,
128 	THREAD_MSG__MAX,
129 };
130 
131 static const char *thread_msg_tags[THREAD_MSG__MAX] = {
132 	"UNDEFINED", "READY"
133 };
134 
135 enum thread_spec {
136 	THREAD_SPEC__UNDEFINED = 0,
137 	THREAD_SPEC__CPU,
138 	THREAD_SPEC__CORE,
139 	THREAD_SPEC__PACKAGE,
140 	THREAD_SPEC__NUMA,
141 	THREAD_SPEC__USER,
142 	THREAD_SPEC__MAX,
143 };
144 
145 static const char *thread_spec_tags[THREAD_SPEC__MAX] = {
146 	"undefined", "cpu", "core", "package", "numa", "user"
147 };
148 
149 struct pollfd_index_map {
150 	int evlist_pollfd_index;
151 	int thread_pollfd_index;
152 };
153 
154 struct record {
155 	struct perf_tool	tool;
156 	struct record_opts	opts;
157 	u64			bytes_written;
158 	u64			thread_bytes_written;
159 	struct perf_data	data;
160 	struct auxtrace_record	*itr;
161 	struct evlist	*evlist;
162 	struct perf_session	*session;
163 	struct evlist		*sb_evlist;
164 	pthread_t		thread_id;
165 	int			realtime_prio;
166 	bool			latency;
167 	bool			switch_output_event_set;
168 	bool			no_buildid;
169 	bool			no_buildid_set;
170 	bool			no_buildid_cache;
171 	bool			no_buildid_cache_set;
172 	bool			buildid_all;
173 	bool			buildid_mmap;
174 	bool			buildid_mmap_set;
175 	bool			timestamp_filename;
176 	bool			timestamp_boundary;
177 	bool			off_cpu;
178 	const char		*filter_action;
179 	const char		*uid_str;
180 	struct switch_output	switch_output;
181 	unsigned long long	samples;
182 	unsigned long		output_max_size;	/* = 0: unlimited */
183 	struct perf_debuginfod	debuginfod;
184 	int			nr_threads;
185 	struct thread_mask	*thread_masks;
186 	struct record_thread	*thread_data;
187 	struct pollfd_index_map	*index_map;
188 	size_t			index_map_sz;
189 	size_t			index_map_cnt;
190 };
191 
192 static volatile int done;
193 
194 static volatile int auxtrace_record__snapshot_started;
195 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
196 static DEFINE_TRIGGER(switch_output_trigger);
197 
198 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
199 	"SYS", "NODE", "CPU"
200 };
201 
202 static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event,
203 				  struct perf_sample *sample, struct machine *machine);
204 static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event,
205 				   struct perf_sample *sample, struct machine *machine);
206 static int process_timestamp_boundary(const struct perf_tool *tool,
207 				      union perf_event *event,
208 				      struct perf_sample *sample,
209 				      struct machine *machine);
210 
211 #ifndef HAVE_GETTID
gettid(void)212 static inline pid_t gettid(void)
213 {
214 	return (pid_t)syscall(__NR_gettid);
215 }
216 #endif
217 
record__threads_enabled(struct record * rec)218 static int record__threads_enabled(struct record *rec)
219 {
220 	return rec->opts.threads_spec;
221 }
222 
switch_output_signal(struct record * rec)223 static bool switch_output_signal(struct record *rec)
224 {
225 	return rec->switch_output.signal &&
226 	       trigger_is_ready(&switch_output_trigger);
227 }
228 
switch_output_size(struct record * rec)229 static bool switch_output_size(struct record *rec)
230 {
231 	return rec->switch_output.size &&
232 	       trigger_is_ready(&switch_output_trigger) &&
233 	       (rec->bytes_written >= rec->switch_output.size);
234 }
235 
switch_output_time(struct record * rec)236 static bool switch_output_time(struct record *rec)
237 {
238 	return rec->switch_output.time &&
239 	       trigger_is_ready(&switch_output_trigger);
240 }
241 
record__bytes_written(struct record * rec)242 static u64 record__bytes_written(struct record *rec)
243 {
244 	return rec->bytes_written + rec->thread_bytes_written;
245 }
246 
record__output_max_size_exceeded(struct record * rec)247 static bool record__output_max_size_exceeded(struct record *rec)
248 {
249 	return rec->output_max_size &&
250 	       (record__bytes_written(rec) >= rec->output_max_size);
251 }
252 
record__write(struct record * rec,struct mmap * map __maybe_unused,void * bf,size_t size)253 static int record__write(struct record *rec, struct mmap *map __maybe_unused,
254 			 void *bf, size_t size)
255 {
256 	struct perf_data_file *file = &rec->session->data->file;
257 
258 	if (map && map->file)
259 		file = map->file;
260 
261 	if (perf_data_file__write(file, bf, size) < 0) {
262 		pr_err("failed to write perf data, error: %m\n");
263 		return -1;
264 	}
265 
266 	if (map && map->file) {
267 		thread->bytes_written += size;
268 		rec->thread_bytes_written += size;
269 	} else {
270 		rec->bytes_written += size;
271 	}
272 
273 	if (record__output_max_size_exceeded(rec) && !done) {
274 		fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
275 				" stopping session ]\n",
276 				record__bytes_written(rec) >> 10);
277 		done = 1;
278 	}
279 
280 	if (switch_output_size(rec))
281 		trigger_hit(&switch_output_trigger);
282 
283 	return 0;
284 }
285 
286 static int record__aio_enabled(struct record *rec);
287 static int record__comp_enabled(struct record *rec);
288 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map,
289 			    void *dst, size_t dst_size, void *src, size_t src_size);
290 
291 #ifdef HAVE_AIO_SUPPORT
record__aio_write(struct aiocb * cblock,int trace_fd,void * buf,size_t size,off_t off)292 static int record__aio_write(struct aiocb *cblock, int trace_fd,
293 		void *buf, size_t size, off_t off)
294 {
295 	int rc;
296 
297 	cblock->aio_fildes = trace_fd;
298 	cblock->aio_buf    = buf;
299 	cblock->aio_nbytes = size;
300 	cblock->aio_offset = off;
301 	cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
302 
303 	do {
304 		rc = aio_write(cblock);
305 		if (rc == 0) {
306 			break;
307 		} else if (errno != EAGAIN) {
308 			cblock->aio_fildes = -1;
309 			pr_err("failed to queue perf data, error: %m\n");
310 			break;
311 		}
312 	} while (1);
313 
314 	return rc;
315 }
316 
record__aio_complete(struct mmap * md,struct aiocb * cblock)317 static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
318 {
319 	void *rem_buf;
320 	off_t rem_off;
321 	size_t rem_size;
322 	int rc, aio_errno;
323 	ssize_t aio_ret, written;
324 
325 	aio_errno = aio_error(cblock);
326 	if (aio_errno == EINPROGRESS)
327 		return 0;
328 
329 	written = aio_ret = aio_return(cblock);
330 	if (aio_ret < 0) {
331 		if (aio_errno != EINTR)
332 			pr_err("failed to write perf data, error: %m\n");
333 		written = 0;
334 	}
335 
336 	rem_size = cblock->aio_nbytes - written;
337 
338 	if (rem_size == 0) {
339 		cblock->aio_fildes = -1;
340 		/*
341 		 * md->refcount is incremented in record__aio_pushfn() for
342 		 * every aio write request started in record__aio_push() so
343 		 * decrement it because the request is now complete.
344 		 */
345 		perf_mmap__put(&md->core);
346 		rc = 1;
347 	} else {
348 		/*
349 		 * aio write request may require restart with the
350 		 * remainder if the kernel didn't write whole
351 		 * chunk at once.
352 		 */
353 		rem_off = cblock->aio_offset + written;
354 		rem_buf = (void *)(cblock->aio_buf + written);
355 		record__aio_write(cblock, cblock->aio_fildes,
356 				rem_buf, rem_size, rem_off);
357 		rc = 0;
358 	}
359 
360 	return rc;
361 }
362 
record__aio_sync(struct mmap * md,bool sync_all)363 static int record__aio_sync(struct mmap *md, bool sync_all)
364 {
365 	struct aiocb **aiocb = md->aio.aiocb;
366 	struct aiocb *cblocks = md->aio.cblocks;
367 	struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
368 	int i, do_suspend;
369 
370 	do {
371 		do_suspend = 0;
372 		for (i = 0; i < md->aio.nr_cblocks; ++i) {
373 			if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
374 				if (sync_all)
375 					aiocb[i] = NULL;
376 				else
377 					return i;
378 			} else {
379 				/*
380 				 * Started aio write is not complete yet
381 				 * so it has to be waited before the
382 				 * next allocation.
383 				 */
384 				aiocb[i] = &cblocks[i];
385 				do_suspend = 1;
386 			}
387 		}
388 		if (!do_suspend)
389 			return -1;
390 
391 		while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
392 			if (!(errno == EAGAIN || errno == EINTR))
393 				pr_err("failed to sync perf data, error: %m\n");
394 		}
395 	} while (1);
396 }
397 
398 struct record_aio {
399 	struct record	*rec;
400 	void		*data;
401 	size_t		size;
402 };
403 
record__aio_pushfn(struct mmap * map,void * to,void * buf,size_t size)404 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
405 {
406 	struct record_aio *aio = to;
407 
408 	/*
409 	 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
410 	 * to release space in the kernel buffer as fast as possible, calling
411 	 * perf_mmap__consume() from perf_mmap__push() function.
412 	 *
413 	 * That lets the kernel to proceed with storing more profiling data into
414 	 * the kernel buffer earlier than other per-cpu kernel buffers are handled.
415 	 *
416 	 * Coping can be done in two steps in case the chunk of profiling data
417 	 * crosses the upper bound of the kernel buffer. In this case we first move
418 	 * part of data from map->start till the upper bound and then the remainder
419 	 * from the beginning of the kernel buffer till the end of the data chunk.
420 	 */
421 
422 	if (record__comp_enabled(aio->rec)) {
423 		ssize_t compressed = zstd_compress(aio->rec->session, NULL, aio->data + aio->size,
424 						   mmap__mmap_len(map) - aio->size,
425 						   buf, size);
426 		if (compressed < 0)
427 			return (int)compressed;
428 
429 		size = compressed;
430 	} else {
431 		memcpy(aio->data + aio->size, buf, size);
432 	}
433 
434 	if (!aio->size) {
435 		/*
436 		 * Increment map->refcount to guard map->aio.data[] buffer
437 		 * from premature deallocation because map object can be
438 		 * released earlier than aio write request started on
439 		 * map->aio.data[] buffer is complete.
440 		 *
441 		 * perf_mmap__put() is done at record__aio_complete()
442 		 * after started aio request completion or at record__aio_push()
443 		 * if the request failed to start.
444 		 */
445 		perf_mmap__get(&map->core);
446 	}
447 
448 	aio->size += size;
449 
450 	return size;
451 }
452 
record__aio_push(struct record * rec,struct mmap * map,off_t * off)453 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
454 {
455 	int ret, idx;
456 	int trace_fd = rec->session->data->file.fd;
457 	struct record_aio aio = { .rec = rec, .size = 0 };
458 
459 	/*
460 	 * Call record__aio_sync() to wait till map->aio.data[] buffer
461 	 * becomes available after previous aio write operation.
462 	 */
463 
464 	idx = record__aio_sync(map, false);
465 	aio.data = map->aio.data[idx];
466 	ret = perf_mmap__push(map, &aio, record__aio_pushfn);
467 	if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
468 		return ret;
469 
470 	rec->samples++;
471 	ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
472 	if (!ret) {
473 		*off += aio.size;
474 		rec->bytes_written += aio.size;
475 		if (switch_output_size(rec))
476 			trigger_hit(&switch_output_trigger);
477 	} else {
478 		/*
479 		 * Decrement map->refcount incremented in record__aio_pushfn()
480 		 * back if record__aio_write() operation failed to start, otherwise
481 		 * map->refcount is decremented in record__aio_complete() after
482 		 * aio write operation finishes successfully.
483 		 */
484 		perf_mmap__put(&map->core);
485 	}
486 
487 	return ret;
488 }
489 
record__aio_get_pos(int trace_fd)490 static off_t record__aio_get_pos(int trace_fd)
491 {
492 	return lseek(trace_fd, 0, SEEK_CUR);
493 }
494 
record__aio_set_pos(int trace_fd,off_t pos)495 static void record__aio_set_pos(int trace_fd, off_t pos)
496 {
497 	lseek(trace_fd, pos, SEEK_SET);
498 }
499 
record__aio_mmap_read_sync(struct record * rec)500 static void record__aio_mmap_read_sync(struct record *rec)
501 {
502 	int i;
503 	struct evlist *evlist = rec->evlist;
504 	struct mmap *maps = evlist->mmap;
505 
506 	if (!record__aio_enabled(rec))
507 		return;
508 
509 	for (i = 0; i < evlist->core.nr_mmaps; i++) {
510 		struct mmap *map = &maps[i];
511 
512 		if (map->core.base)
513 			record__aio_sync(map, true);
514 	}
515 }
516 
517 static int nr_cblocks_default = 1;
518 static int nr_cblocks_max = 4;
519 
record__aio_parse(const struct option * opt,const char * str,int unset)520 static int record__aio_parse(const struct option *opt,
521 			     const char *str,
522 			     int unset)
523 {
524 	struct record_opts *opts = (struct record_opts *)opt->value;
525 
526 	if (unset) {
527 		opts->nr_cblocks = 0;
528 	} else {
529 		if (str)
530 			opts->nr_cblocks = strtol(str, NULL, 0);
531 		if (!opts->nr_cblocks)
532 			opts->nr_cblocks = nr_cblocks_default;
533 	}
534 
535 	return 0;
536 }
537 #else /* HAVE_AIO_SUPPORT */
538 static int nr_cblocks_max = 0;
539 
record__aio_push(struct record * rec __maybe_unused,struct mmap * map __maybe_unused,off_t * off __maybe_unused)540 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
541 			    off_t *off __maybe_unused)
542 {
543 	return -1;
544 }
545 
record__aio_get_pos(int trace_fd __maybe_unused)546 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
547 {
548 	return -1;
549 }
550 
record__aio_set_pos(int trace_fd __maybe_unused,off_t pos __maybe_unused)551 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
552 {
553 }
554 
record__aio_mmap_read_sync(struct record * rec __maybe_unused)555 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
556 {
557 }
558 #endif
559 
record__aio_enabled(struct record * rec)560 static int record__aio_enabled(struct record *rec)
561 {
562 	return rec->opts.nr_cblocks > 0;
563 }
564 
565 #define MMAP_FLUSH_DEFAULT 1
record__mmap_flush_parse(const struct option * opt,const char * str,int unset)566 static int record__mmap_flush_parse(const struct option *opt,
567 				    const char *str,
568 				    int unset)
569 {
570 	int flush_max;
571 	struct record_opts *opts = (struct record_opts *)opt->value;
572 	static struct parse_tag tags[] = {
573 			{ .tag  = 'B', .mult = 1       },
574 			{ .tag  = 'K', .mult = 1 << 10 },
575 			{ .tag  = 'M', .mult = 1 << 20 },
576 			{ .tag  = 'G', .mult = 1 << 30 },
577 			{ .tag  = 0 },
578 	};
579 
580 	if (unset)
581 		return 0;
582 
583 	if (str) {
584 		opts->mmap_flush = parse_tag_value(str, tags);
585 		if (opts->mmap_flush == (int)-1)
586 			opts->mmap_flush = strtol(str, NULL, 0);
587 	}
588 
589 	if (!opts->mmap_flush)
590 		opts->mmap_flush = MMAP_FLUSH_DEFAULT;
591 
592 	flush_max = evlist__mmap_size(opts->mmap_pages);
593 	flush_max /= 4;
594 	if (opts->mmap_flush > flush_max)
595 		opts->mmap_flush = flush_max;
596 
597 	return 0;
598 }
599 
600 #ifdef HAVE_ZSTD_SUPPORT
601 static unsigned int comp_level_default = 1;
602 
record__parse_comp_level(const struct option * opt,const char * str,int unset)603 static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
604 {
605 	struct record_opts *opts = opt->value;
606 
607 	if (unset) {
608 		opts->comp_level = 0;
609 	} else {
610 		if (str)
611 			opts->comp_level = strtol(str, NULL, 0);
612 		if (!opts->comp_level)
613 			opts->comp_level = comp_level_default;
614 	}
615 
616 	return 0;
617 }
618 #endif
619 static unsigned int comp_level_max = 22;
620 
record__comp_enabled(struct record * rec)621 static int record__comp_enabled(struct record *rec)
622 {
623 	return rec->opts.comp_level > 0;
624 }
625 
process_synthesized_event(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)626 static int process_synthesized_event(const struct perf_tool *tool,
627 				     union perf_event *event,
628 				     struct perf_sample *sample __maybe_unused,
629 				     struct machine *machine __maybe_unused)
630 {
631 	struct record *rec = container_of(tool, struct record, tool);
632 	return record__write(rec, NULL, event, event->header.size);
633 }
634 
635 static struct mutex synth_lock;
636 
process_locked_synthesized_event(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)637 static int process_locked_synthesized_event(const struct perf_tool *tool,
638 				     union perf_event *event,
639 				     struct perf_sample *sample __maybe_unused,
640 				     struct machine *machine __maybe_unused)
641 {
642 	int ret;
643 
644 	mutex_lock(&synth_lock);
645 	ret = process_synthesized_event(tool, event, sample, machine);
646 	mutex_unlock(&synth_lock);
647 	return ret;
648 }
649 
record__pushfn(struct mmap * map,void * to,void * bf,size_t size)650 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
651 {
652 	struct record *rec = to;
653 
654 	if (record__comp_enabled(rec)) {
655 		struct perf_record_compressed2 *event = map->data;
656 		size_t padding = 0;
657 		u8 pad[8] = {0};
658 		ssize_t compressed = zstd_compress(rec->session, map, map->data,
659 						   mmap__mmap_len(map), bf, size);
660 
661 		if (compressed < 0)
662 			return (int)compressed;
663 
664 		bf = event;
665 		thread->samples++;
666 
667 		/*
668 		 * The record from `zstd_compress` is not 8 bytes aligned, which would cause asan
669 		 * error. We make it aligned here.
670 		 */
671 		event->data_size = compressed - sizeof(struct perf_record_compressed2);
672 		event->header.size = PERF_ALIGN(compressed, sizeof(u64));
673 		padding = event->header.size - compressed;
674 		return record__write(rec, map, bf, compressed) ||
675 		       record__write(rec, map, &pad, padding);
676 	}
677 
678 	thread->samples++;
679 	return record__write(rec, map, bf, size);
680 }
681 
682 static volatile sig_atomic_t signr = -1;
683 static volatile sig_atomic_t child_finished;
684 #ifdef HAVE_EVENTFD_SUPPORT
685 static volatile sig_atomic_t done_fd = -1;
686 #endif
687 
sig_handler(int sig)688 static void sig_handler(int sig)
689 {
690 	if (sig == SIGCHLD)
691 		child_finished = 1;
692 	else
693 		signr = sig;
694 
695 	done = 1;
696 #ifdef HAVE_EVENTFD_SUPPORT
697 	if (done_fd >= 0) {
698 		u64 tmp = 1;
699 		int orig_errno = errno;
700 
701 		/*
702 		 * It is possible for this signal handler to run after done is
703 		 * checked in the main loop, but before the perf counter fds are
704 		 * polled. If this happens, the poll() will continue to wait
705 		 * even though done is set, and will only break out if either
706 		 * another signal is received, or the counters are ready for
707 		 * read. To ensure the poll() doesn't sleep when done is set,
708 		 * use an eventfd (done_fd) to wake up the poll().
709 		 */
710 		if (write(done_fd, &tmp, sizeof(tmp)) < 0)
711 			pr_err("failed to signal wakeup fd, error: %m\n");
712 
713 		errno = orig_errno;
714 	}
715 #endif // HAVE_EVENTFD_SUPPORT
716 }
717 
sigsegv_handler(int sig)718 static void sigsegv_handler(int sig)
719 {
720 	perf_hooks__recover();
721 	sighandler_dump_stack(sig);
722 }
723 
record__sig_exit(void)724 static void record__sig_exit(void)
725 {
726 	if (signr == -1)
727 		return;
728 
729 	signal(signr, SIG_DFL);
730 	raise(signr);
731 }
732 
733 #ifdef HAVE_AUXTRACE_SUPPORT
734 
record__process_auxtrace(const struct perf_tool * tool,struct mmap * map,union perf_event * event,void * data1,size_t len1,void * data2,size_t len2)735 static int record__process_auxtrace(const struct perf_tool *tool,
736 				    struct mmap *map,
737 				    union perf_event *event, void *data1,
738 				    size_t len1, void *data2, size_t len2)
739 {
740 	struct record *rec = container_of(tool, struct record, tool);
741 	struct perf_data *data = &rec->data;
742 	size_t padding;
743 	u8 pad[8] = {0};
744 
745 	if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
746 		off_t file_offset;
747 		int fd = perf_data__fd(data);
748 		int err;
749 
750 		file_offset = lseek(fd, 0, SEEK_CUR);
751 		if (file_offset == -1)
752 			return -1;
753 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
754 						     event, file_offset);
755 		if (err)
756 			return err;
757 	}
758 
759 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
760 	padding = (len1 + len2) & 7;
761 	if (padding)
762 		padding = 8 - padding;
763 
764 	record__write(rec, map, event, event->header.size);
765 	record__write(rec, map, data1, len1);
766 	if (len2)
767 		record__write(rec, map, data2, len2);
768 	record__write(rec, map, &pad, padding);
769 
770 	return 0;
771 }
772 
record__auxtrace_mmap_read(struct record * rec,struct mmap * map)773 static int record__auxtrace_mmap_read(struct record *rec,
774 				      struct mmap *map)
775 {
776 	int ret;
777 
778 	ret = auxtrace_mmap__read(map, rec->itr,
779 				  perf_session__env(rec->session),
780 				  &rec->tool,
781 				  record__process_auxtrace);
782 	if (ret < 0)
783 		return ret;
784 
785 	if (ret)
786 		rec->samples++;
787 
788 	return 0;
789 }
790 
record__auxtrace_mmap_read_snapshot(struct record * rec,struct mmap * map)791 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
792 					       struct mmap *map)
793 {
794 	int ret;
795 
796 	ret = auxtrace_mmap__read_snapshot(map, rec->itr,
797 					   perf_session__env(rec->session),
798 					   &rec->tool,
799 					   record__process_auxtrace,
800 					   rec->opts.auxtrace_snapshot_size);
801 	if (ret < 0)
802 		return ret;
803 
804 	if (ret)
805 		rec->samples++;
806 
807 	return 0;
808 }
809 
record__auxtrace_read_snapshot_all(struct record * rec)810 static int record__auxtrace_read_snapshot_all(struct record *rec)
811 {
812 	int i;
813 	int rc = 0;
814 
815 	for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
816 		struct mmap *map = &rec->evlist->mmap[i];
817 
818 		if (!map->auxtrace_mmap.base)
819 			continue;
820 
821 		if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
822 			rc = -1;
823 			goto out;
824 		}
825 	}
826 out:
827 	return rc;
828 }
829 
record__read_auxtrace_snapshot(struct record * rec,bool on_exit)830 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
831 {
832 	pr_debug("Recording AUX area tracing snapshot\n");
833 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
834 		trigger_error(&auxtrace_snapshot_trigger);
835 	} else {
836 		if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
837 			trigger_error(&auxtrace_snapshot_trigger);
838 		else
839 			trigger_ready(&auxtrace_snapshot_trigger);
840 	}
841 }
842 
record__auxtrace_snapshot_exit(struct record * rec)843 static int record__auxtrace_snapshot_exit(struct record *rec)
844 {
845 	if (trigger_is_error(&auxtrace_snapshot_trigger))
846 		return 0;
847 
848 	if (!auxtrace_record__snapshot_started &&
849 	    auxtrace_record__snapshot_start(rec->itr))
850 		return -1;
851 
852 	record__read_auxtrace_snapshot(rec, true);
853 	if (trigger_is_error(&auxtrace_snapshot_trigger))
854 		return -1;
855 
856 	return 0;
857 }
858 
record__auxtrace_init(struct record * rec)859 static int record__auxtrace_init(struct record *rec)
860 {
861 	int err;
862 
863 	if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts)
864 	    && record__threads_enabled(rec)) {
865 		pr_err("AUX area tracing options are not available in parallel streaming mode.\n");
866 		return -EINVAL;
867 	}
868 
869 	if (!rec->itr) {
870 		rec->itr = auxtrace_record__init(rec->evlist, &err);
871 		if (err)
872 			return err;
873 	}
874 
875 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
876 					      rec->opts.auxtrace_snapshot_opts);
877 	if (err)
878 		return err;
879 
880 	err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
881 					    rec->opts.auxtrace_sample_opts);
882 	if (err)
883 		return err;
884 
885 	err = auxtrace_parse_aux_action(rec->evlist);
886 	if (err)
887 		return err;
888 
889 	return auxtrace_parse_filters(rec->evlist);
890 }
891 
892 #else
893 
894 static inline
record__auxtrace_mmap_read(struct record * rec __maybe_unused,struct mmap * map __maybe_unused)895 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
896 			       struct mmap *map __maybe_unused)
897 {
898 	return 0;
899 }
900 
901 static inline
record__read_auxtrace_snapshot(struct record * rec __maybe_unused,bool on_exit __maybe_unused)902 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
903 				    bool on_exit __maybe_unused)
904 {
905 }
906 
907 static inline
auxtrace_record__snapshot_start(struct auxtrace_record * itr __maybe_unused)908 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
909 {
910 	return 0;
911 }
912 
913 static inline
record__auxtrace_snapshot_exit(struct record * rec __maybe_unused)914 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
915 {
916 	return 0;
917 }
918 
record__auxtrace_init(struct record * rec __maybe_unused)919 static int record__auxtrace_init(struct record *rec __maybe_unused)
920 {
921 	return 0;
922 }
923 
924 #endif
925 
record__config_text_poke(struct evlist * evlist)926 static int record__config_text_poke(struct evlist *evlist)
927 {
928 	struct evsel *evsel;
929 
930 	/* Nothing to do if text poke is already configured */
931 	evlist__for_each_entry(evlist, evsel) {
932 		if (evsel->core.attr.text_poke)
933 			return 0;
934 	}
935 
936 	evsel = evlist__add_dummy_on_all_cpus(evlist);
937 	if (!evsel)
938 		return -ENOMEM;
939 
940 	evsel->core.attr.text_poke = 1;
941 	evsel->core.attr.ksymbol = 1;
942 	evsel->immediate = true;
943 	evsel__set_sample_bit(evsel, TIME);
944 
945 	return 0;
946 }
947 
record__config_off_cpu(struct record * rec)948 static int record__config_off_cpu(struct record *rec)
949 {
950 	return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts);
951 }
952 
record__tracking_system_wide(struct record * rec)953 static bool record__tracking_system_wide(struct record *rec)
954 {
955 	struct evlist *evlist = rec->evlist;
956 	struct evsel *evsel;
957 
958 	/*
959 	 * If non-dummy evsel exists, system_wide sideband is need to
960 	 * help parse sample information.
961 	 * For example, PERF_EVENT_MMAP event to help parse symbol,
962 	 * and PERF_EVENT_COMM event to help parse task executable name.
963 	 */
964 	evlist__for_each_entry(evlist, evsel) {
965 		if (!evsel__is_dummy_event(evsel))
966 			return true;
967 	}
968 
969 	return false;
970 }
971 
record__config_tracking_events(struct record * rec)972 static int record__config_tracking_events(struct record *rec)
973 {
974 	struct record_opts *opts = &rec->opts;
975 	struct evlist *evlist = rec->evlist;
976 	bool system_wide = false;
977 	struct evsel *evsel;
978 
979 	/*
980 	 * For initial_delay, system wide or a hybrid system, we need to add
981 	 * tracking event so that we can track PERF_RECORD_MMAP to cover the
982 	 * delay of waiting or event synthesis.
983 	 */
984 	if (opts->target.initial_delay || target__has_cpu(&opts->target) ||
985 	    perf_pmus__num_core_pmus() > 1) {
986 
987 		/*
988 		 * User space tasks can migrate between CPUs, so when tracing
989 		 * selected CPUs, sideband for all CPUs is still needed.
990 		 */
991 		if (!!opts->target.cpu_list && record__tracking_system_wide(rec))
992 			system_wide = true;
993 
994 		evsel = evlist__findnew_tracking_event(evlist, system_wide);
995 		if (!evsel)
996 			return -ENOMEM;
997 
998 		/*
999 		 * Enable the tracking event when the process is forked for
1000 		 * initial_delay, immediately for system wide.
1001 		 */
1002 		if (opts->target.initial_delay && !evsel->immediate &&
1003 		    !target__has_cpu(&opts->target))
1004 			evsel->core.attr.enable_on_exec = 1;
1005 		else
1006 			evsel->immediate = 1;
1007 	}
1008 
1009 	return 0;
1010 }
1011 
record__kcore_readable(struct machine * machine)1012 static bool record__kcore_readable(struct machine *machine)
1013 {
1014 	char kcore[PATH_MAX];
1015 	int fd;
1016 
1017 	scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
1018 
1019 	fd = open(kcore, O_RDONLY);
1020 	if (fd < 0)
1021 		return false;
1022 
1023 	close(fd);
1024 
1025 	return true;
1026 }
1027 
record__kcore_copy(struct machine * machine,struct perf_data * data)1028 static int record__kcore_copy(struct machine *machine, struct perf_data *data)
1029 {
1030 	char from_dir[PATH_MAX];
1031 	char kcore_dir[PATH_MAX];
1032 	int ret;
1033 
1034 	snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
1035 
1036 	ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
1037 	if (ret)
1038 		return ret;
1039 
1040 	return kcore_copy(from_dir, kcore_dir);
1041 }
1042 
record__thread_data_init_pipes(struct record_thread * thread_data)1043 static void record__thread_data_init_pipes(struct record_thread *thread_data)
1044 {
1045 	thread_data->pipes.msg[0] = -1;
1046 	thread_data->pipes.msg[1] = -1;
1047 	thread_data->pipes.ack[0] = -1;
1048 	thread_data->pipes.ack[1] = -1;
1049 }
1050 
record__thread_data_open_pipes(struct record_thread * thread_data)1051 static int record__thread_data_open_pipes(struct record_thread *thread_data)
1052 {
1053 	if (pipe(thread_data->pipes.msg))
1054 		return -EINVAL;
1055 
1056 	if (pipe(thread_data->pipes.ack)) {
1057 		close(thread_data->pipes.msg[0]);
1058 		thread_data->pipes.msg[0] = -1;
1059 		close(thread_data->pipes.msg[1]);
1060 		thread_data->pipes.msg[1] = -1;
1061 		return -EINVAL;
1062 	}
1063 
1064 	pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data,
1065 		 thread_data->pipes.msg[0], thread_data->pipes.msg[1],
1066 		 thread_data->pipes.ack[0], thread_data->pipes.ack[1]);
1067 
1068 	return 0;
1069 }
1070 
record__thread_data_close_pipes(struct record_thread * thread_data)1071 static void record__thread_data_close_pipes(struct record_thread *thread_data)
1072 {
1073 	if (thread_data->pipes.msg[0] != -1) {
1074 		close(thread_data->pipes.msg[0]);
1075 		thread_data->pipes.msg[0] = -1;
1076 	}
1077 	if (thread_data->pipes.msg[1] != -1) {
1078 		close(thread_data->pipes.msg[1]);
1079 		thread_data->pipes.msg[1] = -1;
1080 	}
1081 	if (thread_data->pipes.ack[0] != -1) {
1082 		close(thread_data->pipes.ack[0]);
1083 		thread_data->pipes.ack[0] = -1;
1084 	}
1085 	if (thread_data->pipes.ack[1] != -1) {
1086 		close(thread_data->pipes.ack[1]);
1087 		thread_data->pipes.ack[1] = -1;
1088 	}
1089 }
1090 
evlist__per_thread(struct evlist * evlist)1091 static bool evlist__per_thread(struct evlist *evlist)
1092 {
1093 	return cpu_map__is_dummy(evlist->core.user_requested_cpus);
1094 }
1095 
record__thread_data_init_maps(struct record_thread * thread_data,struct evlist * evlist)1096 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist)
1097 {
1098 	int m, tm, nr_mmaps = evlist->core.nr_mmaps;
1099 	struct mmap *mmap = evlist->mmap;
1100 	struct mmap *overwrite_mmap = evlist->overwrite_mmap;
1101 	struct perf_cpu_map *cpus = evlist->core.all_cpus;
1102 	bool per_thread = evlist__per_thread(evlist);
1103 
1104 	if (per_thread)
1105 		thread_data->nr_mmaps = nr_mmaps;
1106 	else
1107 		thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
1108 						      thread_data->mask->maps.nbits);
1109 	if (mmap) {
1110 		thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1111 		if (!thread_data->maps)
1112 			return -ENOMEM;
1113 	}
1114 	if (overwrite_mmap) {
1115 		thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1116 		if (!thread_data->overwrite_maps) {
1117 			zfree(&thread_data->maps);
1118 			return -ENOMEM;
1119 		}
1120 	}
1121 	pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data,
1122 		 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
1123 
1124 	for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
1125 		if (per_thread ||
1126 		    test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) {
1127 			if (thread_data->maps) {
1128 				thread_data->maps[tm] = &mmap[m];
1129 				pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
1130 					  thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1131 			}
1132 			if (thread_data->overwrite_maps) {
1133 				thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
1134 				pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
1135 					  thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1136 			}
1137 			tm++;
1138 		}
1139 	}
1140 
1141 	return 0;
1142 }
1143 
record__thread_data_init_pollfd(struct record_thread * thread_data,struct evlist * evlist)1144 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist)
1145 {
1146 	int f, tm, pos;
1147 	struct mmap *map, *overwrite_map;
1148 
1149 	fdarray__init(&thread_data->pollfd, 64);
1150 
1151 	for (tm = 0; tm < thread_data->nr_mmaps; tm++) {
1152 		map = thread_data->maps ? thread_data->maps[tm] : NULL;
1153 		overwrite_map = thread_data->overwrite_maps ?
1154 				thread_data->overwrite_maps[tm] : NULL;
1155 
1156 		for (f = 0; f < evlist->core.pollfd.nr; f++) {
1157 			void *ptr = evlist->core.pollfd.priv[f].ptr;
1158 
1159 			if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) {
1160 				pos = fdarray__dup_entry_from(&thread_data->pollfd, f,
1161 							      &evlist->core.pollfd);
1162 				if (pos < 0)
1163 					return pos;
1164 				pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n",
1165 					 thread_data, pos, evlist->core.pollfd.entries[f].fd);
1166 			}
1167 		}
1168 	}
1169 
1170 	return 0;
1171 }
1172 
record__free_thread_data(struct record * rec)1173 static void record__free_thread_data(struct record *rec)
1174 {
1175 	int t;
1176 	struct record_thread *thread_data = rec->thread_data;
1177 
1178 	if (thread_data == NULL)
1179 		return;
1180 
1181 	for (t = 0; t < rec->nr_threads; t++) {
1182 		record__thread_data_close_pipes(&thread_data[t]);
1183 		zfree(&thread_data[t].maps);
1184 		zfree(&thread_data[t].overwrite_maps);
1185 		fdarray__exit(&thread_data[t].pollfd);
1186 	}
1187 
1188 	zfree(&rec->thread_data);
1189 }
1190 
record__map_thread_evlist_pollfd_indexes(struct record * rec,int evlist_pollfd_index,int thread_pollfd_index)1191 static int record__map_thread_evlist_pollfd_indexes(struct record *rec,
1192 						    int evlist_pollfd_index,
1193 						    int thread_pollfd_index)
1194 {
1195 	size_t x = rec->index_map_cnt;
1196 
1197 	if (realloc_array_as_needed(rec->index_map, rec->index_map_sz, x, NULL))
1198 		return -ENOMEM;
1199 	rec->index_map[x].evlist_pollfd_index = evlist_pollfd_index;
1200 	rec->index_map[x].thread_pollfd_index = thread_pollfd_index;
1201 	rec->index_map_cnt += 1;
1202 	return 0;
1203 }
1204 
record__update_evlist_pollfd_from_thread(struct record * rec,struct evlist * evlist,struct record_thread * thread_data)1205 static int record__update_evlist_pollfd_from_thread(struct record *rec,
1206 						    struct evlist *evlist,
1207 						    struct record_thread *thread_data)
1208 {
1209 	struct pollfd *e_entries = evlist->core.pollfd.entries;
1210 	struct pollfd *t_entries = thread_data->pollfd.entries;
1211 	int err = 0;
1212 	size_t i;
1213 
1214 	for (i = 0; i < rec->index_map_cnt; i++) {
1215 		int e_pos = rec->index_map[i].evlist_pollfd_index;
1216 		int t_pos = rec->index_map[i].thread_pollfd_index;
1217 
1218 		if (e_entries[e_pos].fd != t_entries[t_pos].fd ||
1219 		    e_entries[e_pos].events != t_entries[t_pos].events) {
1220 			pr_err("Thread and evlist pollfd index mismatch\n");
1221 			err = -EINVAL;
1222 			continue;
1223 		}
1224 		e_entries[e_pos].revents = t_entries[t_pos].revents;
1225 	}
1226 	return err;
1227 }
1228 
record__dup_non_perf_events(struct record * rec,struct evlist * evlist,struct record_thread * thread_data)1229 static int record__dup_non_perf_events(struct record *rec,
1230 				       struct evlist *evlist,
1231 				       struct record_thread *thread_data)
1232 {
1233 	struct fdarray *fda = &evlist->core.pollfd;
1234 	int i, ret;
1235 
1236 	for (i = 0; i < fda->nr; i++) {
1237 		if (!(fda->priv[i].flags & fdarray_flag__non_perf_event))
1238 			continue;
1239 		ret = fdarray__dup_entry_from(&thread_data->pollfd, i, fda);
1240 		if (ret < 0) {
1241 			pr_err("Failed to duplicate descriptor in main thread pollfd\n");
1242 			return ret;
1243 		}
1244 		pr_debug2("thread_data[%p]: pollfd[%d] <- non_perf_event fd=%d\n",
1245 			  thread_data, ret, fda->entries[i].fd);
1246 		ret = record__map_thread_evlist_pollfd_indexes(rec, i, ret);
1247 		if (ret < 0) {
1248 			pr_err("Failed to map thread and evlist pollfd indexes\n");
1249 			return ret;
1250 		}
1251 	}
1252 	return 0;
1253 }
1254 
record__alloc_thread_data(struct record * rec,struct evlist * evlist)1255 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist)
1256 {
1257 	int t, ret;
1258 	struct record_thread *thread_data;
1259 
1260 	rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data)));
1261 	if (!rec->thread_data) {
1262 		pr_err("Failed to allocate thread data\n");
1263 		return -ENOMEM;
1264 	}
1265 	thread_data = rec->thread_data;
1266 
1267 	for (t = 0; t < rec->nr_threads; t++)
1268 		record__thread_data_init_pipes(&thread_data[t]);
1269 
1270 	for (t = 0; t < rec->nr_threads; t++) {
1271 		thread_data[t].rec = rec;
1272 		thread_data[t].mask = &rec->thread_masks[t];
1273 		ret = record__thread_data_init_maps(&thread_data[t], evlist);
1274 		if (ret) {
1275 			pr_err("Failed to initialize thread[%d] maps\n", t);
1276 			goto out_free;
1277 		}
1278 		ret = record__thread_data_init_pollfd(&thread_data[t], evlist);
1279 		if (ret) {
1280 			pr_err("Failed to initialize thread[%d] pollfd\n", t);
1281 			goto out_free;
1282 		}
1283 		if (t) {
1284 			thread_data[t].tid = -1;
1285 			ret = record__thread_data_open_pipes(&thread_data[t]);
1286 			if (ret) {
1287 				pr_err("Failed to open thread[%d] communication pipes\n", t);
1288 				goto out_free;
1289 			}
1290 			ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0],
1291 					   POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable);
1292 			if (ret < 0) {
1293 				pr_err("Failed to add descriptor to thread[%d] pollfd\n", t);
1294 				goto out_free;
1295 			}
1296 			thread_data[t].ctlfd_pos = ret;
1297 			pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1298 				 thread_data, thread_data[t].ctlfd_pos,
1299 				 thread_data[t].pipes.msg[0]);
1300 		} else {
1301 			thread_data[t].tid = gettid();
1302 
1303 			ret = record__dup_non_perf_events(rec, evlist, &thread_data[t]);
1304 			if (ret < 0)
1305 				goto out_free;
1306 
1307 			thread_data[t].ctlfd_pos = -1; /* Not used */
1308 		}
1309 	}
1310 
1311 	return 0;
1312 
1313 out_free:
1314 	record__free_thread_data(rec);
1315 
1316 	return ret;
1317 }
1318 
record__mmap_evlist(struct record * rec,struct evlist * evlist)1319 static int record__mmap_evlist(struct record *rec,
1320 			       struct evlist *evlist)
1321 {
1322 	int i, ret;
1323 	struct record_opts *opts = &rec->opts;
1324 	bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
1325 				  opts->auxtrace_sample_mode;
1326 	char msg[512];
1327 
1328 	if (opts->affinity != PERF_AFFINITY_SYS)
1329 		cpu__setup_cpunode_map();
1330 
1331 	if (evlist__mmap_ex(evlist, opts->mmap_pages,
1332 				 opts->auxtrace_mmap_pages,
1333 				 auxtrace_overwrite,
1334 				 opts->nr_cblocks, opts->affinity,
1335 				 opts->mmap_flush, opts->comp_level) < 0) {
1336 		if (errno == EPERM) {
1337 			pr_err("Permission error mapping pages.\n"
1338 			       "Consider increasing "
1339 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
1340 			       "or try again with a smaller value of -m/--mmap_pages.\n"
1341 			       "(current value: %u,%u)\n",
1342 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
1343 			return -errno;
1344 		} else {
1345 			pr_err("failed to mmap with %d (%s)\n", errno,
1346 				str_error_r(errno, msg, sizeof(msg)));
1347 			if (errno)
1348 				return -errno;
1349 			else
1350 				return -EINVAL;
1351 		}
1352 	}
1353 
1354 	if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack))
1355 		return -1;
1356 
1357 	ret = record__alloc_thread_data(rec, evlist);
1358 	if (ret)
1359 		return ret;
1360 
1361 	if (record__threads_enabled(rec)) {
1362 		ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps);
1363 		if (ret) {
1364 			pr_err("Failed to create data directory: %s\n", strerror(-ret));
1365 			return ret;
1366 		}
1367 		for (i = 0; i < evlist->core.nr_mmaps; i++) {
1368 			if (evlist->mmap)
1369 				evlist->mmap[i].file = &rec->data.dir.files[i];
1370 			if (evlist->overwrite_mmap)
1371 				evlist->overwrite_mmap[i].file = &rec->data.dir.files[i];
1372 		}
1373 	}
1374 
1375 	return 0;
1376 }
1377 
record__mmap(struct record * rec)1378 static int record__mmap(struct record *rec)
1379 {
1380 	return record__mmap_evlist(rec, rec->evlist);
1381 }
1382 
record__open(struct record * rec)1383 static int record__open(struct record *rec)
1384 {
1385 	char msg[BUFSIZ];
1386 	struct evsel *pos;
1387 	struct evlist *evlist = rec->evlist;
1388 	struct perf_session *session = rec->session;
1389 	struct record_opts *opts = &rec->opts;
1390 	int rc = 0;
1391 
1392 	evlist__for_each_entry(evlist, pos) {
1393 try_again:
1394 		if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
1395 			if (evsel__fallback(pos, &opts->target, errno, msg, sizeof(msg))) {
1396 				if (verbose > 0)
1397 					ui__warning("%s\n", msg);
1398 				goto try_again;
1399 			}
1400 			if ((errno == EINVAL || errno == EBADF) &&
1401 			    pos->core.leader != &pos->core &&
1402 			    pos->weak_group) {
1403 			        pos = evlist__reset_weak_group(evlist, pos, true);
1404 				goto try_again;
1405 			}
1406 			rc = -errno;
1407 			evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
1408 			ui__error("%s\n", msg);
1409 			goto out;
1410 		}
1411 	}
1412 
1413 	if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {
1414 		pr_warning(
1415 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1416 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1417 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1418 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1419 "Samples in kernel modules won't be resolved at all.\n\n"
1420 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1421 "even with a suitable vmlinux or kallsyms file.\n\n");
1422 	}
1423 
1424 	if (evlist__apply_filters(evlist, &pos, &opts->target)) {
1425 		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
1426 			pos->filter ?: "BPF", evsel__name(pos), errno,
1427 			str_error_r(errno, msg, sizeof(msg)));
1428 		rc = -1;
1429 		goto out;
1430 	}
1431 
1432 	rc = record__mmap(rec);
1433 	if (rc)
1434 		goto out;
1435 
1436 	session->evlist = evlist;
1437 	perf_session__set_id_hdr_size(session);
1438 out:
1439 	return rc;
1440 }
1441 
set_timestamp_boundary(struct record * rec,u64 sample_time)1442 static void set_timestamp_boundary(struct record *rec, u64 sample_time)
1443 {
1444 	if (rec->evlist->first_sample_time == 0)
1445 		rec->evlist->first_sample_time = sample_time;
1446 
1447 	if (sample_time)
1448 		rec->evlist->last_sample_time = sample_time;
1449 }
1450 
process_sample_event(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct evsel * evsel,struct machine * machine)1451 static int process_sample_event(const struct perf_tool *tool,
1452 				union perf_event *event,
1453 				struct perf_sample *sample,
1454 				struct evsel *evsel,
1455 				struct machine *machine)
1456 {
1457 	struct record *rec = container_of(tool, struct record, tool);
1458 
1459 	set_timestamp_boundary(rec, sample->time);
1460 
1461 	if (rec->buildid_all)
1462 		return 0;
1463 
1464 	rec->samples++;
1465 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
1466 }
1467 
process_buildids(struct record * rec)1468 static int process_buildids(struct record *rec)
1469 {
1470 	struct perf_session *session = rec->session;
1471 
1472 	if (perf_data__size(&rec->data) == 0)
1473 		return 0;
1474 
1475 	/*
1476 	 * During this process, it'll load kernel map and replace the
1477 	 * dso->long_name to a real pathname it found.  In this case
1478 	 * we prefer the vmlinux path like
1479 	 *   /lib/modules/3.16.4/build/vmlinux
1480 	 *
1481 	 * rather than build-id path (in debug directory).
1482 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
1483 	 */
1484 	symbol_conf.ignore_vmlinux_buildid = true;
1485 
1486 	/*
1487 	 * If --buildid-all is given, it marks all DSO regardless of hits,
1488 	 * so no need to process samples. But if timestamp_boundary is enabled,
1489 	 * it still needs to walk on all samples to get the timestamps of
1490 	 * first/last samples.
1491 	 */
1492 	if (rec->buildid_all && !rec->timestamp_boundary)
1493 		rec->tool.sample = process_event_sample_stub;
1494 
1495 	return perf_session__process_events(session);
1496 }
1497 
perf_event__synthesize_guest_os(struct machine * machine,void * data)1498 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
1499 {
1500 	int err;
1501 	struct perf_tool *tool = data;
1502 	/*
1503 	 *As for guest kernel when processing subcommand record&report,
1504 	 *we arrange module mmap prior to guest kernel mmap and trigger
1505 	 *a preload dso because default guest module symbols are loaded
1506 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
1507 	 *method is used to avoid symbol missing when the first addr is
1508 	 *in module instead of in guest kernel.
1509 	 */
1510 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
1511 					     machine);
1512 	if (err < 0)
1513 		pr_err("Couldn't record guest kernel [%d]'s reference"
1514 		       " relocation symbol.\n", machine->pid);
1515 
1516 	/*
1517 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
1518 	 * have no _text sometimes.
1519 	 */
1520 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1521 						 machine);
1522 	if (err < 0)
1523 		pr_err("Couldn't record guest kernel [%d]'s reference"
1524 		       " relocation symbol.\n", machine->pid);
1525 }
1526 
1527 static struct perf_event_header finished_round_event = {
1528 	.size = sizeof(struct perf_event_header),
1529 	.type = PERF_RECORD_FINISHED_ROUND,
1530 };
1531 
1532 static struct perf_event_header finished_init_event = {
1533 	.size = sizeof(struct perf_event_header),
1534 	.type = PERF_RECORD_FINISHED_INIT,
1535 };
1536 
record__adjust_affinity(struct record * rec,struct mmap * map)1537 static void record__adjust_affinity(struct record *rec, struct mmap *map)
1538 {
1539 	if (rec->opts.affinity != PERF_AFFINITY_SYS &&
1540 	    !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits,
1541 			  thread->mask->affinity.nbits)) {
1542 		bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits);
1543 		bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits,
1544 			  map->affinity_mask.bits, thread->mask->affinity.nbits);
1545 		sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
1546 					(cpu_set_t *)thread->mask->affinity.bits);
1547 		if (verbose == 2) {
1548 			pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu());
1549 			mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity");
1550 		}
1551 	}
1552 }
1553 
process_comp_header(void * record,size_t increment)1554 static size_t process_comp_header(void *record, size_t increment)
1555 {
1556 	struct perf_record_compressed2 *event = record;
1557 	size_t size = sizeof(*event);
1558 
1559 	if (increment) {
1560 		event->header.size += increment;
1561 		return increment;
1562 	}
1563 
1564 	event->header.type = PERF_RECORD_COMPRESSED2;
1565 	event->header.size = size;
1566 
1567 	return size;
1568 }
1569 
zstd_compress(struct perf_session * session,struct mmap * map,void * dst,size_t dst_size,void * src,size_t src_size)1570 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map,
1571 			    void *dst, size_t dst_size, void *src, size_t src_size)
1572 {
1573 	ssize_t compressed;
1574 	size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed2) - 1;
1575 	struct zstd_data *zstd_data = &session->zstd_data;
1576 
1577 	if (map && map->file)
1578 		zstd_data = &map->zstd_data;
1579 
1580 	compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size,
1581 						     max_record_size, process_comp_header);
1582 	if (compressed < 0)
1583 		return compressed;
1584 
1585 	if (map && map->file) {
1586 		thread->bytes_transferred += src_size;
1587 		thread->bytes_compressed  += compressed;
1588 	} else {
1589 		session->bytes_transferred += src_size;
1590 		session->bytes_compressed  += compressed;
1591 	}
1592 
1593 	return compressed;
1594 }
1595 
record__mmap_read_evlist(struct record * rec,struct evlist * evlist,bool overwrite,bool synch)1596 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1597 				    bool overwrite, bool synch)
1598 {
1599 	u64 bytes_written = rec->bytes_written;
1600 	int i;
1601 	int rc = 0;
1602 	int nr_mmaps;
1603 	struct mmap **maps;
1604 	int trace_fd = rec->data.file.fd;
1605 	off_t off = 0;
1606 
1607 	if (!evlist)
1608 		return 0;
1609 
1610 	nr_mmaps = thread->nr_mmaps;
1611 	maps = overwrite ? thread->overwrite_maps : thread->maps;
1612 
1613 	if (!maps)
1614 		return 0;
1615 
1616 	if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1617 		return 0;
1618 
1619 	if (record__aio_enabled(rec))
1620 		off = record__aio_get_pos(trace_fd);
1621 
1622 	for (i = 0; i < nr_mmaps; i++) {
1623 		u64 flush = 0;
1624 		struct mmap *map = maps[i];
1625 
1626 		if (map->core.base) {
1627 			record__adjust_affinity(rec, map);
1628 			if (synch) {
1629 				flush = map->core.flush;
1630 				map->core.flush = 1;
1631 			}
1632 			if (!record__aio_enabled(rec)) {
1633 				if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1634 					if (synch)
1635 						map->core.flush = flush;
1636 					rc = -1;
1637 					goto out;
1638 				}
1639 			} else {
1640 				if (record__aio_push(rec, map, &off) < 0) {
1641 					record__aio_set_pos(trace_fd, off);
1642 					if (synch)
1643 						map->core.flush = flush;
1644 					rc = -1;
1645 					goto out;
1646 				}
1647 			}
1648 			if (synch)
1649 				map->core.flush = flush;
1650 		}
1651 
1652 		if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1653 		    !rec->opts.auxtrace_sample_mode &&
1654 		    record__auxtrace_mmap_read(rec, map) != 0) {
1655 			rc = -1;
1656 			goto out;
1657 		}
1658 	}
1659 
1660 	if (record__aio_enabled(rec))
1661 		record__aio_set_pos(trace_fd, off);
1662 
1663 	/*
1664 	 * Mark the round finished in case we wrote
1665 	 * at least one event.
1666 	 *
1667 	 * No need for round events in directory mode,
1668 	 * because per-cpu maps and files have data
1669 	 * sorted by kernel.
1670 	 */
1671 	if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written)
1672 		rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1673 
1674 	if (overwrite)
1675 		evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1676 out:
1677 	return rc;
1678 }
1679 
record__mmap_read_all(struct record * rec,bool synch)1680 static int record__mmap_read_all(struct record *rec, bool synch)
1681 {
1682 	int err;
1683 
1684 	err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1685 	if (err)
1686 		return err;
1687 
1688 	return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1689 }
1690 
record__thread_munmap_filtered(struct fdarray * fda,int fd,void * arg __maybe_unused)1691 static void record__thread_munmap_filtered(struct fdarray *fda, int fd,
1692 					   void *arg __maybe_unused)
1693 {
1694 	struct perf_mmap *map = fda->priv[fd].ptr;
1695 
1696 	if (map)
1697 		perf_mmap__put(map);
1698 }
1699 
record__thread(void * arg)1700 static void *record__thread(void *arg)
1701 {
1702 	enum thread_msg msg = THREAD_MSG__READY;
1703 	bool terminate = false;
1704 	struct fdarray *pollfd;
1705 	int err, ctlfd_pos;
1706 
1707 	thread = arg;
1708 	thread->tid = gettid();
1709 
1710 	err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1711 	if (err == -1)
1712 		pr_warning("threads[%d]: failed to notify on start: %s\n",
1713 			   thread->tid, strerror(errno));
1714 
1715 	pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
1716 
1717 	pollfd = &thread->pollfd;
1718 	ctlfd_pos = thread->ctlfd_pos;
1719 
1720 	for (;;) {
1721 		unsigned long long hits = thread->samples;
1722 
1723 		if (record__mmap_read_all(thread->rec, false) < 0 || terminate)
1724 			break;
1725 
1726 		if (hits == thread->samples) {
1727 
1728 			err = fdarray__poll(pollfd, -1);
1729 			/*
1730 			 * Propagate error, only if there's any. Ignore positive
1731 			 * number of returned events and interrupt error.
1732 			 */
1733 			if (err > 0 || (err < 0 && errno == EINTR))
1734 				err = 0;
1735 			thread->waking++;
1736 
1737 			if (fdarray__filter(pollfd, POLLERR | POLLHUP,
1738 					    record__thread_munmap_filtered, NULL) == 0)
1739 				break;
1740 		}
1741 
1742 		if (pollfd->entries[ctlfd_pos].revents & POLLHUP) {
1743 			terminate = true;
1744 			close(thread->pipes.msg[0]);
1745 			thread->pipes.msg[0] = -1;
1746 			pollfd->entries[ctlfd_pos].fd = -1;
1747 			pollfd->entries[ctlfd_pos].events = 0;
1748 		}
1749 
1750 		pollfd->entries[ctlfd_pos].revents = 0;
1751 	}
1752 	record__mmap_read_all(thread->rec, true);
1753 
1754 	err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1755 	if (err == -1)
1756 		pr_warning("threads[%d]: failed to notify on termination: %s\n",
1757 			   thread->tid, strerror(errno));
1758 
1759 	return NULL;
1760 }
1761 
record__init_features(struct record * rec)1762 static void record__init_features(struct record *rec)
1763 {
1764 	struct perf_session *session = rec->session;
1765 	int feat;
1766 
1767 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1768 		perf_header__set_feat(&session->header, feat);
1769 
1770 	if (rec->no_buildid)
1771 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1772 
1773 	if (!have_tracepoints(&rec->evlist->core.entries))
1774 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1775 
1776 	if (!rec->opts.branch_stack)
1777 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1778 
1779 	if (!rec->opts.full_auxtrace)
1780 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1781 
1782 	if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1783 		perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1784 
1785 	if (!rec->opts.use_clockid)
1786 		perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA);
1787 
1788 	if (!record__threads_enabled(rec))
1789 		perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1790 
1791 	if (!record__comp_enabled(rec))
1792 		perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1793 
1794 	perf_header__clear_feat(&session->header, HEADER_STAT);
1795 }
1796 
1797 static void
record__finish_output(struct record * rec)1798 record__finish_output(struct record *rec)
1799 {
1800 	int i;
1801 	struct perf_data *data = &rec->data;
1802 	int fd = perf_data__fd(data);
1803 
1804 	if (data->is_pipe) {
1805 		/* Just to display approx. size */
1806 		data->file.size = rec->bytes_written;
1807 		return;
1808 	}
1809 
1810 	rec->session->header.data_size += rec->bytes_written;
1811 	data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1812 	if (record__threads_enabled(rec)) {
1813 		for (i = 0; i < data->dir.nr; i++)
1814 			data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR);
1815 	}
1816 
1817 	/* Buildid scanning disabled or build ID in kernel and synthesized map events. */
1818 	if (!rec->no_buildid) {
1819 		process_buildids(rec);
1820 
1821 		if (rec->buildid_all)
1822 			perf_session__dsos_hit_all(rec->session);
1823 	}
1824 	perf_session__write_header(rec->session, rec->evlist, fd, true);
1825 
1826 	return;
1827 }
1828 
record__synthesize_workload(struct record * rec,bool tail)1829 static int record__synthesize_workload(struct record *rec, bool tail)
1830 {
1831 	int err;
1832 	struct perf_thread_map *thread_map;
1833 	bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1834 
1835 	if (rec->opts.tail_synthesize != tail)
1836 		return 0;
1837 
1838 	thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1839 	if (thread_map == NULL)
1840 		return -1;
1841 
1842 	err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1843 						 process_synthesized_event,
1844 						 &rec->session->machines.host,
1845 						 needs_mmap,
1846 						 rec->opts.sample_address);
1847 	perf_thread_map__put(thread_map);
1848 	return err;
1849 }
1850 
write_finished_init(struct record * rec,bool tail)1851 static int write_finished_init(struct record *rec, bool tail)
1852 {
1853 	if (rec->opts.tail_synthesize != tail)
1854 		return 0;
1855 
1856 	return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event));
1857 }
1858 
1859 static int record__synthesize(struct record *rec, bool tail);
1860 
1861 static int
record__switch_output(struct record * rec,bool at_exit)1862 record__switch_output(struct record *rec, bool at_exit)
1863 {
1864 	struct perf_data *data = &rec->data;
1865 	char *new_filename = NULL;
1866 	int fd, err;
1867 
1868 	/* Same Size:      "2015122520103046"*/
1869 	char timestamp[] = "InvalidTimestamp";
1870 
1871 	record__aio_mmap_read_sync(rec);
1872 
1873 	write_finished_init(rec, true);
1874 
1875 	record__synthesize(rec, true);
1876 	if (target__none(&rec->opts.target))
1877 		record__synthesize_workload(rec, true);
1878 
1879 	rec->samples = 0;
1880 	record__finish_output(rec);
1881 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1882 	if (err) {
1883 		pr_err("Failed to get current timestamp\n");
1884 		return -EINVAL;
1885 	}
1886 
1887 	fd = perf_data__switch(data, timestamp,
1888 			       rec->session->header.data_offset,
1889 			       at_exit, &new_filename);
1890 	if (fd >= 0 && !at_exit) {
1891 		rec->bytes_written = 0;
1892 		rec->session->header.data_size = 0;
1893 	}
1894 
1895 	if (!quiet) {
1896 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1897 			data->path, timestamp);
1898 	}
1899 
1900 	if (rec->switch_output.num_files) {
1901 		int n = rec->switch_output.cur_file + 1;
1902 
1903 		if (n >= rec->switch_output.num_files)
1904 			n = 0;
1905 		rec->switch_output.cur_file = n;
1906 		if (rec->switch_output.filenames[n]) {
1907 			remove(rec->switch_output.filenames[n]);
1908 			zfree(&rec->switch_output.filenames[n]);
1909 		}
1910 		rec->switch_output.filenames[n] = new_filename;
1911 	} else {
1912 		free(new_filename);
1913 	}
1914 
1915 	/* Output tracking events */
1916 	if (!at_exit) {
1917 		record__synthesize(rec, false);
1918 
1919 		/*
1920 		 * In 'perf record --switch-output' without -a,
1921 		 * record__synthesize() in record__switch_output() won't
1922 		 * generate tracking events because there's no thread_map
1923 		 * in evlist. Which causes newly created perf.data doesn't
1924 		 * contain map and comm information.
1925 		 * Create a fake thread_map and directly call
1926 		 * perf_event__synthesize_thread_map() for those events.
1927 		 */
1928 		if (target__none(&rec->opts.target))
1929 			record__synthesize_workload(rec, false);
1930 		write_finished_init(rec, false);
1931 	}
1932 	return fd;
1933 }
1934 
__record__save_lost_samples(struct record * rec,struct evsel * evsel,struct perf_record_lost_samples * lost,int cpu_idx,int thread_idx,u64 lost_count,u16 misc_flag)1935 static void __record__save_lost_samples(struct record *rec, struct evsel *evsel,
1936 					struct perf_record_lost_samples *lost,
1937 					int cpu_idx, int thread_idx, u64 lost_count,
1938 					u16 misc_flag)
1939 {
1940 	struct perf_sample_id *sid;
1941 	struct perf_sample sample;
1942 	int id_hdr_size;
1943 
1944 	perf_sample__init(&sample, /*all=*/true);
1945 	lost->lost = lost_count;
1946 	if (evsel->core.ids) {
1947 		sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx);
1948 		sample.id = sid->id;
1949 	}
1950 
1951 	id_hdr_size = perf_event__synthesize_id_sample((void *)(lost + 1),
1952 						       evsel->core.attr.sample_type, &sample);
1953 	lost->header.size = sizeof(*lost) + id_hdr_size;
1954 	lost->header.misc = misc_flag;
1955 	record__write(rec, NULL, lost, lost->header.size);
1956 	perf_sample__exit(&sample);
1957 }
1958 
record__read_lost_samples(struct record * rec)1959 static void record__read_lost_samples(struct record *rec)
1960 {
1961 	struct perf_session *session = rec->session;
1962 	struct perf_record_lost_samples_and_ids lost;
1963 	struct evsel *evsel;
1964 
1965 	/* there was an error during record__open */
1966 	if (session->evlist == NULL)
1967 		return;
1968 
1969 	evlist__for_each_entry(session->evlist, evsel) {
1970 		struct xyarray *xy = evsel->core.sample_id;
1971 		u64 lost_count;
1972 
1973 		if (xy == NULL || evsel->core.fd == NULL)
1974 			continue;
1975 		if (xyarray__max_x(evsel->core.fd) != xyarray__max_x(xy) ||
1976 		    xyarray__max_y(evsel->core.fd) != xyarray__max_y(xy)) {
1977 			pr_debug("Unmatched FD vs. sample ID: skip reading LOST count\n");
1978 			continue;
1979 		}
1980 
1981 		for (int x = 0; x < xyarray__max_x(xy); x++) {
1982 			for (int y = 0; y < xyarray__max_y(xy); y++) {
1983 				struct perf_counts_values count;
1984 
1985 				if (perf_evsel__read(&evsel->core, x, y, &count) < 0) {
1986 					pr_debug("read LOST count failed\n");
1987 					return;
1988 				}
1989 
1990 				if (count.lost) {
1991 					memset(&lost, 0, sizeof(lost));
1992 					lost.lost.header.type = PERF_RECORD_LOST_SAMPLES;
1993 					__record__save_lost_samples(rec, evsel, &lost.lost,
1994 								    x, y, count.lost, 0);
1995 				}
1996 			}
1997 		}
1998 
1999 		lost_count = perf_bpf_filter__lost_count(evsel);
2000 		if (lost_count) {
2001 			memset(&lost, 0, sizeof(lost));
2002 			lost.lost.header.type = PERF_RECORD_LOST_SAMPLES;
2003 			__record__save_lost_samples(rec, evsel, &lost.lost, 0, 0, lost_count,
2004 						    PERF_RECORD_MISC_LOST_SAMPLES_BPF);
2005 		}
2006 	}
2007 }
2008 
2009 static volatile sig_atomic_t workload_exec_errno;
2010 
2011 /*
2012  * evlist__prepare_workload will send a SIGUSR1
2013  * if the fork fails, since we asked by setting its
2014  * want_signal to true.
2015  */
workload_exec_failed_signal(int signo __maybe_unused,siginfo_t * info,void * ucontext __maybe_unused)2016 static void workload_exec_failed_signal(int signo __maybe_unused,
2017 					siginfo_t *info,
2018 					void *ucontext __maybe_unused)
2019 {
2020 	workload_exec_errno = info->si_value.sival_int;
2021 	done = 1;
2022 	child_finished = 1;
2023 }
2024 
2025 static void snapshot_sig_handler(int sig);
2026 static void alarm_sig_handler(int sig);
2027 
evlist__pick_pc(struct evlist * evlist)2028 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist)
2029 {
2030 	if (evlist) {
2031 		if (evlist->mmap && evlist->mmap[0].core.base)
2032 			return evlist->mmap[0].core.base;
2033 		if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
2034 			return evlist->overwrite_mmap[0].core.base;
2035 	}
2036 	return NULL;
2037 }
2038 
record__pick_pc(struct record * rec)2039 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
2040 {
2041 	const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist);
2042 	if (pc)
2043 		return pc;
2044 	return NULL;
2045 }
2046 
record__synthesize(struct record * rec,bool tail)2047 static int record__synthesize(struct record *rec, bool tail)
2048 {
2049 	struct perf_session *session = rec->session;
2050 	struct machine *machine = &session->machines.host;
2051 	struct perf_data *data = &rec->data;
2052 	struct record_opts *opts = &rec->opts;
2053 	struct perf_tool *tool = &rec->tool;
2054 	int err = 0;
2055 	event_op f = process_synthesized_event;
2056 
2057 	if (rec->opts.tail_synthesize != tail)
2058 		return 0;
2059 
2060 	if (data->is_pipe) {
2061 		err = perf_event__synthesize_for_pipe(tool, session, data,
2062 						      process_synthesized_event);
2063 		if (err < 0)
2064 			goto out;
2065 
2066 		rec->bytes_written += err;
2067 	}
2068 
2069 	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
2070 					  process_synthesized_event, machine);
2071 	if (err)
2072 		goto out;
2073 
2074 	/* Synthesize id_index before auxtrace_info */
2075 	err = perf_event__synthesize_id_index(tool,
2076 					      process_synthesized_event,
2077 					      session->evlist, machine);
2078 	if (err)
2079 		goto out;
2080 
2081 	if (rec->opts.full_auxtrace) {
2082 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
2083 					session, process_synthesized_event);
2084 		if (err)
2085 			goto out;
2086 	}
2087 
2088 	if (!evlist__exclude_kernel(rec->evlist)) {
2089 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
2090 							 machine);
2091 		WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
2092 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
2093 				   "Check /proc/kallsyms permission or run as root.\n");
2094 
2095 		err = perf_event__synthesize_modules(tool, process_synthesized_event,
2096 						     machine);
2097 		WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
2098 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
2099 				   "Check /proc/modules permission or run as root.\n");
2100 	}
2101 
2102 	if (perf_guest) {
2103 		machines__process_guests(&session->machines,
2104 					 perf_event__synthesize_guest_os, tool);
2105 	}
2106 
2107 	err = perf_event__synthesize_extra_attr(&rec->tool,
2108 						rec->evlist,
2109 						process_synthesized_event,
2110 						data->is_pipe);
2111 	if (err)
2112 		goto out;
2113 
2114 	err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
2115 						 process_synthesized_event,
2116 						NULL);
2117 	if (err < 0) {
2118 		pr_err("Couldn't synthesize thread map.\n");
2119 		return err;
2120 	}
2121 
2122 	err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus,
2123 					     process_synthesized_event, NULL);
2124 	if (err < 0) {
2125 		pr_err("Couldn't synthesize cpu map.\n");
2126 		return err;
2127 	}
2128 
2129 	err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
2130 						machine, opts);
2131 	if (err < 0) {
2132 		pr_warning("Couldn't synthesize bpf events.\n");
2133 		err = 0;
2134 	}
2135 
2136 	if (rec->opts.synth & PERF_SYNTH_CGROUP) {
2137 		err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
2138 						     machine);
2139 		if (err < 0) {
2140 			pr_warning("Couldn't synthesize cgroup events.\n");
2141 			err = 0;
2142 		}
2143 	}
2144 
2145 	if (rec->opts.nr_threads_synthesize > 1) {
2146 		mutex_init(&synth_lock);
2147 		perf_set_multithreaded();
2148 		f = process_locked_synthesized_event;
2149 	}
2150 
2151 	if (rec->opts.synth & PERF_SYNTH_TASK) {
2152 		bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
2153 
2154 		err = __machine__synthesize_threads(machine, tool, &opts->target,
2155 						    rec->evlist->core.threads,
2156 						    f, needs_mmap, opts->sample_address,
2157 						    rec->opts.nr_threads_synthesize);
2158 	}
2159 
2160 	if (rec->opts.nr_threads_synthesize > 1) {
2161 		perf_set_singlethreaded();
2162 		mutex_destroy(&synth_lock);
2163 	}
2164 
2165 out:
2166 	return err;
2167 }
2168 
record__synthesize_final_bpf_metadata(struct record * rec __maybe_unused)2169 static void record__synthesize_final_bpf_metadata(struct record *rec __maybe_unused)
2170 {
2171 #ifdef HAVE_LIBBPF_SUPPORT
2172 	perf_event__synthesize_final_bpf_metadata(rec->session,
2173 						  process_synthesized_event);
2174 #endif
2175 }
2176 
record__process_signal_event(union perf_event * event __maybe_unused,void * data)2177 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
2178 {
2179 	struct record *rec = data;
2180 	pthread_kill(rec->thread_id, SIGUSR2);
2181 	return 0;
2182 }
2183 
record__setup_sb_evlist(struct record * rec)2184 static int record__setup_sb_evlist(struct record *rec)
2185 {
2186 	struct record_opts *opts = &rec->opts;
2187 
2188 	if (rec->sb_evlist != NULL) {
2189 		/*
2190 		 * We get here if --switch-output-event populated the
2191 		 * sb_evlist, so associate a callback that will send a SIGUSR2
2192 		 * to the main thread.
2193 		 */
2194 		evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
2195 		rec->thread_id = pthread_self();
2196 	}
2197 #ifdef HAVE_LIBBPF_SUPPORT
2198 	if (!opts->no_bpf_event) {
2199 		if (rec->sb_evlist == NULL) {
2200 			rec->sb_evlist = evlist__new();
2201 
2202 			if (rec->sb_evlist == NULL) {
2203 				pr_err("Couldn't create side band evlist.\n.");
2204 				return -1;
2205 			}
2206 		}
2207 
2208 		if (evlist__add_bpf_sb_event(rec->sb_evlist, perf_session__env(rec->session))) {
2209 			pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
2210 			return -1;
2211 		}
2212 	}
2213 #endif
2214 	if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
2215 		pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
2216 		opts->no_bpf_event = true;
2217 	}
2218 
2219 	return 0;
2220 }
2221 
record__init_clock(struct record * rec)2222 static int record__init_clock(struct record *rec)
2223 {
2224 	struct perf_session *session = rec->session;
2225 	struct timespec ref_clockid;
2226 	struct timeval ref_tod;
2227 	struct perf_env *env = perf_session__env(session);
2228 	u64 ref;
2229 
2230 	if (!rec->opts.use_clockid)
2231 		return 0;
2232 
2233 	if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
2234 		env->clock.clockid_res_ns = rec->opts.clockid_res_ns;
2235 
2236 	env->clock.clockid = rec->opts.clockid;
2237 
2238 	if (gettimeofday(&ref_tod, NULL) != 0) {
2239 		pr_err("gettimeofday failed, cannot set reference time.\n");
2240 		return -1;
2241 	}
2242 
2243 	if (clock_gettime(rec->opts.clockid, &ref_clockid)) {
2244 		pr_err("clock_gettime failed, cannot set reference time.\n");
2245 		return -1;
2246 	}
2247 
2248 	ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC +
2249 	      (u64) ref_tod.tv_usec * NSEC_PER_USEC;
2250 
2251 	env->clock.tod_ns = ref;
2252 
2253 	ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC +
2254 	      (u64) ref_clockid.tv_nsec;
2255 
2256 	env->clock.clockid_ns = ref;
2257 	return 0;
2258 }
2259 
hit_auxtrace_snapshot_trigger(struct record * rec)2260 static void hit_auxtrace_snapshot_trigger(struct record *rec)
2261 {
2262 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2263 		trigger_hit(&auxtrace_snapshot_trigger);
2264 		auxtrace_record__snapshot_started = 1;
2265 		if (auxtrace_record__snapshot_start(rec->itr))
2266 			trigger_error(&auxtrace_snapshot_trigger);
2267 	}
2268 }
2269 
record__terminate_thread(struct record_thread * thread_data)2270 static int record__terminate_thread(struct record_thread *thread_data)
2271 {
2272 	int err;
2273 	enum thread_msg ack = THREAD_MSG__UNDEFINED;
2274 	pid_t tid = thread_data->tid;
2275 
2276 	close(thread_data->pipes.msg[1]);
2277 	thread_data->pipes.msg[1] = -1;
2278 	err = read(thread_data->pipes.ack[0], &ack, sizeof(ack));
2279 	if (err > 0)
2280 		pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]);
2281 	else
2282 		pr_warning("threads[%d]: failed to receive termination notification from %d\n",
2283 			   thread->tid, tid);
2284 
2285 	return 0;
2286 }
2287 
record__start_threads(struct record * rec)2288 static int record__start_threads(struct record *rec)
2289 {
2290 	int t, tt, err, ret = 0, nr_threads = rec->nr_threads;
2291 	struct record_thread *thread_data = rec->thread_data;
2292 	sigset_t full, mask;
2293 	pthread_t handle;
2294 	pthread_attr_t attrs;
2295 
2296 	thread = &thread_data[0];
2297 
2298 	if (!record__threads_enabled(rec))
2299 		return 0;
2300 
2301 	sigfillset(&full);
2302 	if (sigprocmask(SIG_SETMASK, &full, &mask)) {
2303 		pr_err("Failed to block signals on threads start: %s\n", strerror(errno));
2304 		return -1;
2305 	}
2306 
2307 	pthread_attr_init(&attrs);
2308 	pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
2309 
2310 	for (t = 1; t < nr_threads; t++) {
2311 		enum thread_msg msg = THREAD_MSG__UNDEFINED;
2312 
2313 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
2314 		pthread_attr_setaffinity_np(&attrs,
2315 					    MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)),
2316 					    (cpu_set_t *)(thread_data[t].mask->affinity.bits));
2317 #endif
2318 		if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) {
2319 			for (tt = 1; tt < t; tt++)
2320 				record__terminate_thread(&thread_data[t]);
2321 			pr_err("Failed to start threads: %s\n", strerror(errno));
2322 			ret = -1;
2323 			goto out_err;
2324 		}
2325 
2326 		err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg));
2327 		if (err > 0)
2328 			pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid,
2329 				  thread_msg_tags[msg]);
2330 		else
2331 			pr_warning("threads[%d]: failed to receive start notification from %d\n",
2332 				   thread->tid, rec->thread_data[t].tid);
2333 	}
2334 
2335 	sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
2336 			(cpu_set_t *)thread->mask->affinity.bits);
2337 
2338 	pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
2339 
2340 out_err:
2341 	pthread_attr_destroy(&attrs);
2342 
2343 	if (sigprocmask(SIG_SETMASK, &mask, NULL)) {
2344 		pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno));
2345 		ret = -1;
2346 	}
2347 
2348 	return ret;
2349 }
2350 
record__stop_threads(struct record * rec)2351 static int record__stop_threads(struct record *rec)
2352 {
2353 	int t;
2354 	struct record_thread *thread_data = rec->thread_data;
2355 
2356 	for (t = 1; t < rec->nr_threads; t++)
2357 		record__terminate_thread(&thread_data[t]);
2358 
2359 	for (t = 0; t < rec->nr_threads; t++) {
2360 		rec->samples += thread_data[t].samples;
2361 		if (!record__threads_enabled(rec))
2362 			continue;
2363 		rec->session->bytes_transferred += thread_data[t].bytes_transferred;
2364 		rec->session->bytes_compressed += thread_data[t].bytes_compressed;
2365 		pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid,
2366 			 thread_data[t].samples, thread_data[t].waking);
2367 		if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed)
2368 			pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n",
2369 				 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed);
2370 		else
2371 			pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written);
2372 	}
2373 
2374 	return 0;
2375 }
2376 
record__waking(struct record * rec)2377 static unsigned long record__waking(struct record *rec)
2378 {
2379 	int t;
2380 	unsigned long waking = 0;
2381 	struct record_thread *thread_data = rec->thread_data;
2382 
2383 	for (t = 0; t < rec->nr_threads; t++)
2384 		waking += thread_data[t].waking;
2385 
2386 	return waking;
2387 }
2388 
__cmd_record(struct record * rec,int argc,const char ** argv)2389 static int __cmd_record(struct record *rec, int argc, const char **argv)
2390 {
2391 	int err;
2392 	int status = 0;
2393 	const bool forks = argc > 0;
2394 	struct perf_tool *tool = &rec->tool;
2395 	struct record_opts *opts = &rec->opts;
2396 	struct perf_data *data = &rec->data;
2397 	struct perf_session *session;
2398 	bool disabled = false, draining = false;
2399 	int fd;
2400 	float ratio = 0;
2401 	enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
2402 	struct perf_env *env;
2403 
2404 	atexit(record__sig_exit);
2405 	signal(SIGCHLD, sig_handler);
2406 	signal(SIGINT, sig_handler);
2407 	signal(SIGTERM, sig_handler);
2408 	signal(SIGSEGV, sigsegv_handler);
2409 
2410 	if (rec->opts.record_cgroup) {
2411 #ifndef HAVE_FILE_HANDLE
2412 		pr_err("cgroup tracking is not supported\n");
2413 		return -1;
2414 #endif
2415 	}
2416 
2417 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
2418 		signal(SIGUSR2, snapshot_sig_handler);
2419 		if (rec->opts.auxtrace_snapshot_mode)
2420 			trigger_on(&auxtrace_snapshot_trigger);
2421 		if (rec->switch_output.enabled)
2422 			trigger_on(&switch_output_trigger);
2423 	} else {
2424 		signal(SIGUSR2, SIG_IGN);
2425 	}
2426 
2427 	perf_tool__init(tool, /*ordered_events=*/true);
2428 	tool->sample		= process_sample_event;
2429 	tool->fork		= perf_event__process_fork;
2430 	tool->exit		= perf_event__process_exit;
2431 	tool->comm		= perf_event__process_comm;
2432 	tool->namespaces	= perf_event__process_namespaces;
2433 	tool->mmap		= build_id__process_mmap;
2434 	tool->mmap2		= build_id__process_mmap2;
2435 	tool->itrace_start	= process_timestamp_boundary;
2436 	tool->aux		= process_timestamp_boundary;
2437 	tool->namespace_events	= rec->opts.record_namespaces;
2438 	tool->cgroup_events	= rec->opts.record_cgroup;
2439 	session = perf_session__new(data, tool);
2440 	if (IS_ERR(session)) {
2441 		pr_err("Perf session creation failed.\n");
2442 		return PTR_ERR(session);
2443 	}
2444 	env = perf_session__env(session);
2445 	if (record__threads_enabled(rec)) {
2446 		if (perf_data__is_pipe(&rec->data)) {
2447 			pr_err("Parallel trace streaming is not available in pipe mode.\n");
2448 			return -1;
2449 		}
2450 		if (rec->opts.full_auxtrace) {
2451 			pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n");
2452 			return -1;
2453 		}
2454 	}
2455 
2456 	fd = perf_data__fd(data);
2457 	rec->session = session;
2458 
2459 	if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
2460 		pr_err("Compression initialization failed.\n");
2461 		return -1;
2462 	}
2463 #ifdef HAVE_EVENTFD_SUPPORT
2464 	done_fd = eventfd(0, EFD_NONBLOCK);
2465 	if (done_fd < 0) {
2466 		pr_err("Failed to create wakeup eventfd, error: %m\n");
2467 		status = -1;
2468 		goto out_delete_session;
2469 	}
2470 	err = evlist__add_wakeup_eventfd(rec->evlist, done_fd);
2471 	if (err < 0) {
2472 		pr_err("Failed to add wakeup eventfd to poll list\n");
2473 		status = err;
2474 		goto out_delete_session;
2475 	}
2476 #endif // HAVE_EVENTFD_SUPPORT
2477 
2478 	env->comp_type  = PERF_COMP_ZSTD;
2479 	env->comp_level = rec->opts.comp_level;
2480 
2481 	if (rec->opts.kcore &&
2482 	    !record__kcore_readable(&session->machines.host)) {
2483 		pr_err("ERROR: kcore is not readable.\n");
2484 		return -1;
2485 	}
2486 
2487 	if (record__init_clock(rec))
2488 		return -1;
2489 
2490 	record__init_features(rec);
2491 
2492 	if (forks) {
2493 		err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe,
2494 					       workload_exec_failed_signal);
2495 		if (err < 0) {
2496 			pr_err("Couldn't run the workload!\n");
2497 			status = err;
2498 			goto out_delete_session;
2499 		}
2500 	}
2501 
2502 	/*
2503 	 * If we have just single event and are sending data
2504 	 * through pipe, we need to force the ids allocation,
2505 	 * because we synthesize event name through the pipe
2506 	 * and need the id for that.
2507 	 */
2508 	if (data->is_pipe && rec->evlist->core.nr_entries == 1)
2509 		rec->opts.sample_id = true;
2510 
2511 	if (rec->timestamp_filename && perf_data__is_pipe(data)) {
2512 		rec->timestamp_filename = false;
2513 		pr_warning("WARNING: --timestamp-filename option is not available in pipe mode.\n");
2514 	}
2515 
2516 	/*
2517 	 * Use global stat_config that is zero meaning aggr_mode is AGGR_NONE
2518 	 * and hybrid_merge is false.
2519 	 */
2520 	evlist__uniquify_evsel_names(rec->evlist, &stat_config);
2521 
2522 	evlist__config(rec->evlist, opts, &callchain_param);
2523 
2524 	/* Debug message used by test scripts */
2525 	pr_debug3("perf record opening and mmapping events\n");
2526 	if (record__open(rec) != 0) {
2527 		err = -1;
2528 		goto out_free_threads;
2529 	}
2530 	/* Debug message used by test scripts */
2531 	pr_debug3("perf record done opening and mmapping events\n");
2532 	env->comp_mmap_len = session->evlist->core.mmap_len;
2533 
2534 	if (rec->opts.kcore) {
2535 		err = record__kcore_copy(&session->machines.host, data);
2536 		if (err) {
2537 			pr_err("ERROR: Failed to copy kcore\n");
2538 			goto out_free_threads;
2539 		}
2540 	}
2541 
2542 	/*
2543 	 * Normally perf_session__new would do this, but it doesn't have the
2544 	 * evlist.
2545 	 */
2546 	if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) {
2547 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
2548 		rec->tool.ordered_events = false;
2549 	}
2550 
2551 	if (evlist__nr_groups(rec->evlist) == 0)
2552 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
2553 
2554 	if (data->is_pipe) {
2555 		err = perf_header__write_pipe(fd);
2556 		if (err < 0)
2557 			goto out_free_threads;
2558 	} else {
2559 		err = perf_session__write_header(session, rec->evlist, fd, false);
2560 		if (err < 0)
2561 			goto out_free_threads;
2562 	}
2563 
2564 	err = -1;
2565 	if (!rec->no_buildid
2566 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
2567 		pr_err("Couldn't generate buildids. "
2568 		       "Use --no-buildid to profile anyway.\n");
2569 		goto out_free_threads;
2570 	}
2571 
2572 	if (!evlist__needs_bpf_sb_event(rec->evlist))
2573 		opts->no_bpf_event = true;
2574 
2575 	err = record__setup_sb_evlist(rec);
2576 	if (err)
2577 		goto out_free_threads;
2578 
2579 	err = record__synthesize(rec, false);
2580 	if (err < 0)
2581 		goto out_free_threads;
2582 
2583 	if (rec->realtime_prio) {
2584 		struct sched_param param;
2585 
2586 		param.sched_priority = rec->realtime_prio;
2587 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
2588 			pr_err("Could not set realtime priority.\n");
2589 			err = -1;
2590 			goto out_free_threads;
2591 		}
2592 	}
2593 
2594 	if (record__start_threads(rec))
2595 		goto out_free_threads;
2596 
2597 	/*
2598 	 * When perf is starting the traced process, all the events
2599 	 * (apart from group members) have enable_on_exec=1 set,
2600 	 * so don't spoil it by prematurely enabling them.
2601 	 */
2602 	if (!target__none(&opts->target) && !opts->target.initial_delay)
2603 		evlist__enable(rec->evlist);
2604 
2605 	/*
2606 	 * offcpu-time does not call execve, so enable_on_exe wouldn't work
2607 	 * when recording a workload, do it manually
2608 	 */
2609 	if (rec->off_cpu)
2610 		evlist__enable_evsel(rec->evlist, (char *)OFFCPU_EVENT);
2611 
2612 	/*
2613 	 * Let the child rip
2614 	 */
2615 	if (forks) {
2616 		struct machine *machine = &session->machines.host;
2617 		union perf_event *event;
2618 		pid_t tgid;
2619 
2620 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
2621 		if (event == NULL) {
2622 			err = -ENOMEM;
2623 			goto out_child;
2624 		}
2625 
2626 		/*
2627 		 * Some H/W events are generated before COMM event
2628 		 * which is emitted during exec(), so perf script
2629 		 * cannot see a correct process name for those events.
2630 		 * Synthesize COMM event to prevent it.
2631 		 */
2632 		tgid = perf_event__synthesize_comm(tool, event,
2633 						   rec->evlist->workload.pid,
2634 						   process_synthesized_event,
2635 						   machine);
2636 		free(event);
2637 
2638 		if (tgid == -1)
2639 			goto out_child;
2640 
2641 		event = malloc(sizeof(event->namespaces) +
2642 			       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
2643 			       machine->id_hdr_size);
2644 		if (event == NULL) {
2645 			err = -ENOMEM;
2646 			goto out_child;
2647 		}
2648 
2649 		/*
2650 		 * Synthesize NAMESPACES event for the command specified.
2651 		 */
2652 		perf_event__synthesize_namespaces(tool, event,
2653 						  rec->evlist->workload.pid,
2654 						  tgid, process_synthesized_event,
2655 						  machine);
2656 		free(event);
2657 
2658 		evlist__start_workload(rec->evlist);
2659 	}
2660 
2661 	if (opts->target.initial_delay) {
2662 		pr_info(EVLIST_DISABLED_MSG);
2663 		if (opts->target.initial_delay > 0) {
2664 			usleep(opts->target.initial_delay * USEC_PER_MSEC);
2665 			evlist__enable(rec->evlist);
2666 			pr_info(EVLIST_ENABLED_MSG);
2667 		}
2668 	}
2669 
2670 	err = event_enable_timer__start(rec->evlist->eet);
2671 	if (err)
2672 		goto out_child;
2673 
2674 	/* Debug message used by test scripts */
2675 	pr_debug3("perf record has started\n");
2676 	fflush(stderr);
2677 
2678 	trigger_ready(&auxtrace_snapshot_trigger);
2679 	trigger_ready(&switch_output_trigger);
2680 	perf_hooks__invoke_record_start();
2681 
2682 	/*
2683 	 * Must write FINISHED_INIT so it will be seen after all other
2684 	 * synthesized user events, but before any regular events.
2685 	 */
2686 	err = write_finished_init(rec, false);
2687 	if (err < 0)
2688 		goto out_child;
2689 
2690 	for (;;) {
2691 		unsigned long long hits = thread->samples;
2692 
2693 		/*
2694 		 * rec->evlist->bkw_mmap_state is possible to be
2695 		 * BKW_MMAP_EMPTY here: when done == true and
2696 		 * hits != rec->samples in previous round.
2697 		 *
2698 		 * evlist__toggle_bkw_mmap ensure we never
2699 		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
2700 		 */
2701 		if (trigger_is_hit(&switch_output_trigger) || done || draining)
2702 			evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
2703 
2704 		if (record__mmap_read_all(rec, false) < 0) {
2705 			trigger_error(&auxtrace_snapshot_trigger);
2706 			trigger_error(&switch_output_trigger);
2707 			err = -1;
2708 			goto out_child;
2709 		}
2710 
2711 		if (auxtrace_record__snapshot_started) {
2712 			auxtrace_record__snapshot_started = 0;
2713 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
2714 				record__read_auxtrace_snapshot(rec, false);
2715 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
2716 				pr_err("AUX area tracing snapshot failed\n");
2717 				err = -1;
2718 				goto out_child;
2719 			}
2720 		}
2721 
2722 		if (trigger_is_hit(&switch_output_trigger)) {
2723 			/*
2724 			 * If switch_output_trigger is hit, the data in
2725 			 * overwritable ring buffer should have been collected,
2726 			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
2727 			 *
2728 			 * If SIGUSR2 raise after or during record__mmap_read_all(),
2729 			 * record__mmap_read_all() didn't collect data from
2730 			 * overwritable ring buffer. Read again.
2731 			 */
2732 			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
2733 				continue;
2734 			trigger_ready(&switch_output_trigger);
2735 
2736 			/*
2737 			 * Reenable events in overwrite ring buffer after
2738 			 * record__mmap_read_all(): we should have collected
2739 			 * data from it.
2740 			 */
2741 			evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
2742 
2743 			if (!quiet)
2744 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
2745 					record__waking(rec));
2746 			thread->waking = 0;
2747 			fd = record__switch_output(rec, false);
2748 			if (fd < 0) {
2749 				pr_err("Failed to switch to new file\n");
2750 				trigger_error(&switch_output_trigger);
2751 				err = fd;
2752 				goto out_child;
2753 			}
2754 
2755 			/* re-arm the alarm */
2756 			if (rec->switch_output.time)
2757 				alarm(rec->switch_output.time);
2758 		}
2759 
2760 		if (hits == thread->samples) {
2761 			if (done || draining)
2762 				break;
2763 			err = fdarray__poll(&thread->pollfd, -1);
2764 			/*
2765 			 * Propagate error, only if there's any. Ignore positive
2766 			 * number of returned events and interrupt error.
2767 			 */
2768 			if (err > 0 || (err < 0 && errno == EINTR))
2769 				err = 0;
2770 			thread->waking++;
2771 
2772 			if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP,
2773 					    record__thread_munmap_filtered, NULL) == 0)
2774 				draining = true;
2775 
2776 			err = record__update_evlist_pollfd_from_thread(rec, rec->evlist, thread);
2777 			if (err)
2778 				goto out_child;
2779 		}
2780 
2781 		if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) {
2782 			switch (cmd) {
2783 			case EVLIST_CTL_CMD_SNAPSHOT:
2784 				hit_auxtrace_snapshot_trigger(rec);
2785 				evlist__ctlfd_ack(rec->evlist);
2786 				break;
2787 			case EVLIST_CTL_CMD_STOP:
2788 				done = 1;
2789 				break;
2790 			case EVLIST_CTL_CMD_ACK:
2791 			case EVLIST_CTL_CMD_UNSUPPORTED:
2792 			case EVLIST_CTL_CMD_ENABLE:
2793 			case EVLIST_CTL_CMD_DISABLE:
2794 			case EVLIST_CTL_CMD_EVLIST:
2795 			case EVLIST_CTL_CMD_PING:
2796 			default:
2797 				break;
2798 			}
2799 		}
2800 
2801 		err = event_enable_timer__process(rec->evlist->eet);
2802 		if (err < 0)
2803 			goto out_child;
2804 		if (err) {
2805 			err = 0;
2806 			done = 1;
2807 		}
2808 
2809 		/*
2810 		 * When perf is starting the traced process, at the end events
2811 		 * die with the process and we wait for that. Thus no need to
2812 		 * disable events in this case.
2813 		 */
2814 		if (done && !disabled && !target__none(&opts->target)) {
2815 			trigger_off(&auxtrace_snapshot_trigger);
2816 			evlist__disable(rec->evlist);
2817 			disabled = true;
2818 		}
2819 	}
2820 
2821 	trigger_off(&auxtrace_snapshot_trigger);
2822 	trigger_off(&switch_output_trigger);
2823 
2824 	record__synthesize_final_bpf_metadata(rec);
2825 
2826 	if (opts->auxtrace_snapshot_on_exit)
2827 		record__auxtrace_snapshot_exit(rec);
2828 
2829 	if (forks && workload_exec_errno) {
2830 		char msg[STRERR_BUFSIZE];
2831 		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
2832 		struct strbuf sb = STRBUF_INIT;
2833 
2834 		evlist__format_evsels(rec->evlist, &sb, 2048);
2835 
2836 		pr_err("Failed to collect '%s' for the '%s' workload: %s\n",
2837 			sb.buf, argv[0], emsg);
2838 		strbuf_release(&sb);
2839 		err = -1;
2840 		goto out_child;
2841 	}
2842 
2843 	if (!quiet)
2844 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n",
2845 			record__waking(rec));
2846 
2847 	write_finished_init(rec, true);
2848 
2849 	if (target__none(&rec->opts.target))
2850 		record__synthesize_workload(rec, true);
2851 
2852 out_child:
2853 	record__stop_threads(rec);
2854 	record__mmap_read_all(rec, true);
2855 out_free_threads:
2856 	record__free_thread_data(rec);
2857 	evlist__finalize_ctlfd(rec->evlist);
2858 	record__aio_mmap_read_sync(rec);
2859 
2860 	if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
2861 		ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
2862 		env->comp_ratio = ratio + 0.5;
2863 	}
2864 
2865 	if (forks) {
2866 		int exit_status;
2867 
2868 		if (!child_finished)
2869 			kill(rec->evlist->workload.pid, SIGTERM);
2870 
2871 		wait(&exit_status);
2872 
2873 		if (err < 0)
2874 			status = err;
2875 		else if (WIFEXITED(exit_status))
2876 			status = WEXITSTATUS(exit_status);
2877 		else if (WIFSIGNALED(exit_status))
2878 			signr = WTERMSIG(exit_status);
2879 	} else
2880 		status = err;
2881 
2882 	if (rec->off_cpu)
2883 		rec->bytes_written += off_cpu_write(rec->session);
2884 
2885 	record__read_lost_samples(rec);
2886 	record__synthesize(rec, true);
2887 	/* this will be recalculated during process_buildids() */
2888 	rec->samples = 0;
2889 
2890 	if (!err) {
2891 		if (!rec->timestamp_filename) {
2892 			record__finish_output(rec);
2893 		} else {
2894 			fd = record__switch_output(rec, true);
2895 			if (fd < 0) {
2896 				status = fd;
2897 				goto out_delete_session;
2898 			}
2899 		}
2900 	}
2901 
2902 	perf_hooks__invoke_record_end();
2903 
2904 	if (!err && !quiet) {
2905 		char samples[128];
2906 		const char *postfix = rec->timestamp_filename ?
2907 					".<timestamp>" : "";
2908 
2909 		if (rec->samples && !rec->opts.full_auxtrace)
2910 			scnprintf(samples, sizeof(samples),
2911 				  " (%" PRIu64 " samples)", rec->samples);
2912 		else
2913 			samples[0] = '\0';
2914 
2915 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s",
2916 			perf_data__size(data) / 1024.0 / 1024.0,
2917 			data->path, postfix, samples);
2918 		if (ratio) {
2919 			fprintf(stderr,	", compressed (original %.3f MB, ratio is %.3f)",
2920 					rec->session->bytes_transferred / 1024.0 / 1024.0,
2921 					ratio);
2922 		}
2923 		fprintf(stderr, " ]\n");
2924 	}
2925 
2926 out_delete_session:
2927 #ifdef HAVE_EVENTFD_SUPPORT
2928 	if (done_fd >= 0) {
2929 		fd = done_fd;
2930 		done_fd = -1;
2931 
2932 		close(fd);
2933 	}
2934 #endif
2935 	zstd_fini(&session->zstd_data);
2936 	if (!opts->no_bpf_event)
2937 		evlist__stop_sb_thread(rec->sb_evlist);
2938 
2939 	perf_session__delete(session);
2940 	return status;
2941 }
2942 
callchain_debug(struct callchain_param * callchain)2943 static void callchain_debug(struct callchain_param *callchain)
2944 {
2945 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
2946 
2947 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
2948 
2949 	if (callchain->record_mode == CALLCHAIN_DWARF)
2950 		pr_debug("callchain: stack dump size %d\n",
2951 			 callchain->dump_size);
2952 }
2953 
record_opts__parse_callchain(struct record_opts * record,struct callchain_param * callchain,const char * arg,bool unset)2954 int record_opts__parse_callchain(struct record_opts *record,
2955 				 struct callchain_param *callchain,
2956 				 const char *arg, bool unset)
2957 {
2958 	int ret;
2959 	callchain->enabled = !unset;
2960 
2961 	/* --no-call-graph */
2962 	if (unset) {
2963 		callchain->record_mode = CALLCHAIN_NONE;
2964 		pr_debug("callchain: disabled\n");
2965 		return 0;
2966 	}
2967 
2968 	ret = parse_callchain_record_opt(arg, callchain);
2969 	if (!ret) {
2970 		/* Enable data address sampling for DWARF unwind. */
2971 		if (callchain->record_mode == CALLCHAIN_DWARF)
2972 			record->sample_address = true;
2973 		callchain_debug(callchain);
2974 	}
2975 
2976 	return ret;
2977 }
2978 
record_parse_callchain_opt(const struct option * opt,const char * arg,int unset)2979 int record_parse_callchain_opt(const struct option *opt,
2980 			       const char *arg,
2981 			       int unset)
2982 {
2983 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
2984 }
2985 
record_callchain_opt(const struct option * opt,const char * arg __maybe_unused,int unset __maybe_unused)2986 int record_callchain_opt(const struct option *opt,
2987 			 const char *arg __maybe_unused,
2988 			 int unset __maybe_unused)
2989 {
2990 	struct callchain_param *callchain = opt->value;
2991 
2992 	callchain->enabled = true;
2993 
2994 	if (callchain->record_mode == CALLCHAIN_NONE)
2995 		callchain->record_mode = CALLCHAIN_FP;
2996 
2997 	callchain_debug(callchain);
2998 	return 0;
2999 }
3000 
perf_record_config(const char * var,const char * value,void * cb)3001 static int perf_record_config(const char *var, const char *value, void *cb)
3002 {
3003 	struct record *rec = cb;
3004 
3005 	if (!strcmp(var, "record.build-id")) {
3006 		if (!strcmp(value, "cache"))
3007 			rec->no_buildid_cache = false;
3008 		else if (!strcmp(value, "no-cache"))
3009 			rec->no_buildid_cache = true;
3010 		else if (!strcmp(value, "skip"))
3011 			rec->no_buildid = true;
3012 		else if (!strcmp(value, "mmap"))
3013 			rec->buildid_mmap = true;
3014 		else if (!strcmp(value, "no-mmap"))
3015 			rec->buildid_mmap = false;
3016 		else
3017 			return -1;
3018 		return 0;
3019 	}
3020 	if (!strcmp(var, "record.call-graph")) {
3021 		var = "call-graph.record-mode";
3022 		return perf_default_config(var, value, cb);
3023 	}
3024 #ifdef HAVE_AIO_SUPPORT
3025 	if (!strcmp(var, "record.aio")) {
3026 		rec->opts.nr_cblocks = strtol(value, NULL, 0);
3027 		if (!rec->opts.nr_cblocks)
3028 			rec->opts.nr_cblocks = nr_cblocks_default;
3029 	}
3030 #endif
3031 	if (!strcmp(var, "record.debuginfod")) {
3032 		rec->debuginfod.urls = strdup(value);
3033 		if (!rec->debuginfod.urls)
3034 			return -ENOMEM;
3035 		rec->debuginfod.set = true;
3036 	}
3037 
3038 	return 0;
3039 }
3040 
record__parse_event_enable_time(const struct option * opt,const char * str,int unset)3041 static int record__parse_event_enable_time(const struct option *opt, const char *str, int unset)
3042 {
3043 	struct record *rec = (struct record *)opt->value;
3044 
3045 	return evlist__parse_event_enable_time(rec->evlist, &rec->opts, str, unset);
3046 }
3047 
record__parse_affinity(const struct option * opt,const char * str,int unset)3048 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
3049 {
3050 	struct record_opts *opts = (struct record_opts *)opt->value;
3051 
3052 	if (unset || !str)
3053 		return 0;
3054 
3055 	if (!strcasecmp(str, "node"))
3056 		opts->affinity = PERF_AFFINITY_NODE;
3057 	else if (!strcasecmp(str, "cpu"))
3058 		opts->affinity = PERF_AFFINITY_CPU;
3059 
3060 	return 0;
3061 }
3062 
record__mmap_cpu_mask_alloc(struct mmap_cpu_mask * mask,int nr_bits)3063 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits)
3064 {
3065 	mask->nbits = nr_bits;
3066 	mask->bits = bitmap_zalloc(mask->nbits);
3067 	if (!mask->bits)
3068 		return -ENOMEM;
3069 
3070 	return 0;
3071 }
3072 
record__mmap_cpu_mask_free(struct mmap_cpu_mask * mask)3073 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask)
3074 {
3075 	bitmap_free(mask->bits);
3076 	mask->nbits = 0;
3077 }
3078 
record__thread_mask_alloc(struct thread_mask * mask,int nr_bits)3079 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits)
3080 {
3081 	int ret;
3082 
3083 	ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits);
3084 	if (ret) {
3085 		mask->affinity.bits = NULL;
3086 		return ret;
3087 	}
3088 
3089 	ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits);
3090 	if (ret) {
3091 		record__mmap_cpu_mask_free(&mask->maps);
3092 		mask->maps.bits = NULL;
3093 	}
3094 
3095 	return ret;
3096 }
3097 
record__thread_mask_free(struct thread_mask * mask)3098 static void record__thread_mask_free(struct thread_mask *mask)
3099 {
3100 	record__mmap_cpu_mask_free(&mask->maps);
3101 	record__mmap_cpu_mask_free(&mask->affinity);
3102 }
3103 
record__parse_threads(const struct option * opt,const char * str,int unset)3104 static int record__parse_threads(const struct option *opt, const char *str, int unset)
3105 {
3106 	int s;
3107 	struct record_opts *opts = opt->value;
3108 
3109 	if (unset || !str || !strlen(str)) {
3110 		opts->threads_spec = THREAD_SPEC__CPU;
3111 	} else {
3112 		for (s = 1; s < THREAD_SPEC__MAX; s++) {
3113 			if (s == THREAD_SPEC__USER) {
3114 				opts->threads_user_spec = strdup(str);
3115 				if (!opts->threads_user_spec)
3116 					return -ENOMEM;
3117 				opts->threads_spec = THREAD_SPEC__USER;
3118 				break;
3119 			}
3120 			if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) {
3121 				opts->threads_spec = s;
3122 				break;
3123 			}
3124 		}
3125 	}
3126 
3127 	if (opts->threads_spec == THREAD_SPEC__USER)
3128 		pr_debug("threads_spec: %s\n", opts->threads_user_spec);
3129 	else
3130 		pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]);
3131 
3132 	return 0;
3133 }
3134 
parse_output_max_size(const struct option * opt,const char * str,int unset)3135 static int parse_output_max_size(const struct option *opt,
3136 				 const char *str, int unset)
3137 {
3138 	unsigned long *s = (unsigned long *)opt->value;
3139 	static struct parse_tag tags_size[] = {
3140 		{ .tag  = 'B', .mult = 1       },
3141 		{ .tag  = 'K', .mult = 1 << 10 },
3142 		{ .tag  = 'M', .mult = 1 << 20 },
3143 		{ .tag  = 'G', .mult = 1 << 30 },
3144 		{ .tag  = 0 },
3145 	};
3146 	unsigned long val;
3147 
3148 	if (unset) {
3149 		*s = 0;
3150 		return 0;
3151 	}
3152 
3153 	val = parse_tag_value(str, tags_size);
3154 	if (val != (unsigned long) -1) {
3155 		*s = val;
3156 		return 0;
3157 	}
3158 
3159 	return -1;
3160 }
3161 
record__parse_mmap_pages(const struct option * opt,const char * str,int unset __maybe_unused)3162 static int record__parse_mmap_pages(const struct option *opt,
3163 				    const char *str,
3164 				    int unset __maybe_unused)
3165 {
3166 	struct record_opts *opts = opt->value;
3167 	char *s, *p;
3168 	unsigned int mmap_pages;
3169 	int ret;
3170 
3171 	if (!str)
3172 		return -EINVAL;
3173 
3174 	s = strdup(str);
3175 	if (!s)
3176 		return -ENOMEM;
3177 
3178 	p = strchr(s, ',');
3179 	if (p)
3180 		*p = '\0';
3181 
3182 	if (*s) {
3183 		ret = __evlist__parse_mmap_pages(&mmap_pages, s);
3184 		if (ret)
3185 			goto out_free;
3186 		opts->mmap_pages = mmap_pages;
3187 	}
3188 
3189 	if (!p) {
3190 		ret = 0;
3191 		goto out_free;
3192 	}
3193 
3194 	ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1);
3195 	if (ret)
3196 		goto out_free;
3197 
3198 	opts->auxtrace_mmap_pages = mmap_pages;
3199 
3200 out_free:
3201 	free(s);
3202 	return ret;
3203 }
3204 
record__parse_off_cpu_thresh(const struct option * opt,const char * str,int unset __maybe_unused)3205 static int record__parse_off_cpu_thresh(const struct option *opt,
3206 					const char *str,
3207 					int unset __maybe_unused)
3208 {
3209 	struct record_opts *opts = opt->value;
3210 	char *endptr;
3211 	u64 off_cpu_thresh_ms;
3212 
3213 	if (!str)
3214 		return -EINVAL;
3215 
3216 	off_cpu_thresh_ms = strtoull(str, &endptr, 10);
3217 
3218 	/* the threshold isn't string "0", yet strtoull() returns 0, parsing failed */
3219 	if (*endptr || (off_cpu_thresh_ms == 0 && strcmp(str, "0")))
3220 		return -EINVAL;
3221 	else
3222 		opts->off_cpu_thresh_ns = off_cpu_thresh_ms * NSEC_PER_MSEC;
3223 
3224 	return 0;
3225 }
3226 
arch__add_leaf_frame_record_opts(struct record_opts * opts __maybe_unused)3227 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
3228 {
3229 }
3230 
parse_control_option(const struct option * opt,const char * str,int unset __maybe_unused)3231 static int parse_control_option(const struct option *opt,
3232 				const char *str,
3233 				int unset __maybe_unused)
3234 {
3235 	struct record_opts *opts = opt->value;
3236 
3237 	return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close);
3238 }
3239 
switch_output_size_warn(struct record * rec)3240 static void switch_output_size_warn(struct record *rec)
3241 {
3242 	u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
3243 	struct switch_output *s = &rec->switch_output;
3244 
3245 	wakeup_size /= 2;
3246 
3247 	if (s->size < wakeup_size) {
3248 		char buf[100];
3249 
3250 		unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
3251 		pr_warning("WARNING: switch-output data size lower than "
3252 			   "wakeup kernel buffer size (%s) "
3253 			   "expect bigger perf.data sizes\n", buf);
3254 	}
3255 }
3256 
switch_output_setup(struct record * rec)3257 static int switch_output_setup(struct record *rec)
3258 {
3259 	struct switch_output *s = &rec->switch_output;
3260 	static struct parse_tag tags_size[] = {
3261 		{ .tag  = 'B', .mult = 1       },
3262 		{ .tag  = 'K', .mult = 1 << 10 },
3263 		{ .tag  = 'M', .mult = 1 << 20 },
3264 		{ .tag  = 'G', .mult = 1 << 30 },
3265 		{ .tag  = 0 },
3266 	};
3267 	static struct parse_tag tags_time[] = {
3268 		{ .tag  = 's', .mult = 1        },
3269 		{ .tag  = 'm', .mult = 60       },
3270 		{ .tag  = 'h', .mult = 60*60    },
3271 		{ .tag  = 'd', .mult = 60*60*24 },
3272 		{ .tag  = 0 },
3273 	};
3274 	unsigned long val;
3275 
3276 	/*
3277 	 * If we're using --switch-output-events, then we imply its
3278 	 * --switch-output=signal, as we'll send a SIGUSR2 from the side band
3279 	 *  thread to its parent.
3280 	 */
3281 	if (rec->switch_output_event_set) {
3282 		if (record__threads_enabled(rec)) {
3283 			pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n");
3284 			return 0;
3285 		}
3286 		goto do_signal;
3287 	}
3288 
3289 	if (!s->set)
3290 		return 0;
3291 
3292 	if (record__threads_enabled(rec)) {
3293 		pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n");
3294 		return 0;
3295 	}
3296 
3297 	if (!strcmp(s->str, "signal")) {
3298 do_signal:
3299 		s->signal = true;
3300 		pr_debug("switch-output with SIGUSR2 signal\n");
3301 		goto enabled;
3302 	}
3303 
3304 	val = parse_tag_value(s->str, tags_size);
3305 	if (val != (unsigned long) -1) {
3306 		s->size = val;
3307 		pr_debug("switch-output with %s size threshold\n", s->str);
3308 		goto enabled;
3309 	}
3310 
3311 	val = parse_tag_value(s->str, tags_time);
3312 	if (val != (unsigned long) -1) {
3313 		s->time = val;
3314 		pr_debug("switch-output with %s time threshold (%lu seconds)\n",
3315 			 s->str, s->time);
3316 		goto enabled;
3317 	}
3318 
3319 	return -1;
3320 
3321 enabled:
3322 	rec->timestamp_filename = true;
3323 	s->enabled              = true;
3324 
3325 	if (s->size && !rec->opts.no_buffering)
3326 		switch_output_size_warn(rec);
3327 
3328 	return 0;
3329 }
3330 
3331 static const char * const __record_usage[] = {
3332 	"perf record [<options>] [<command>]",
3333 	"perf record [<options>] -- <command> [<options>]",
3334 	NULL
3335 };
3336 const char * const *record_usage = __record_usage;
3337 
build_id__process_mmap(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)3338 static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event,
3339 				  struct perf_sample *sample, struct machine *machine)
3340 {
3341 	/*
3342 	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3343 	 * no need to add them twice.
3344 	 */
3345 	if (!(event->header.misc & PERF_RECORD_MISC_USER))
3346 		return 0;
3347 	return perf_event__process_mmap(tool, event, sample, machine);
3348 }
3349 
build_id__process_mmap2(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)3350 static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event,
3351 				   struct perf_sample *sample, struct machine *machine)
3352 {
3353 	/*
3354 	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3355 	 * no need to add them twice.
3356 	 */
3357 	if (!(event->header.misc & PERF_RECORD_MISC_USER))
3358 		return 0;
3359 
3360 	return perf_event__process_mmap2(tool, event, sample, machine);
3361 }
3362 
process_timestamp_boundary(const struct perf_tool * tool,union perf_event * event __maybe_unused,struct perf_sample * sample,struct machine * machine __maybe_unused)3363 static int process_timestamp_boundary(const struct perf_tool *tool,
3364 				      union perf_event *event __maybe_unused,
3365 				      struct perf_sample *sample,
3366 				      struct machine *machine __maybe_unused)
3367 {
3368 	struct record *rec = container_of(tool, struct record, tool);
3369 
3370 	set_timestamp_boundary(rec, sample->time);
3371 	return 0;
3372 }
3373 
parse_record_synth_option(const struct option * opt,const char * str,int unset __maybe_unused)3374 static int parse_record_synth_option(const struct option *opt,
3375 				     const char *str,
3376 				     int unset __maybe_unused)
3377 {
3378 	struct record_opts *opts = opt->value;
3379 	char *p = strdup(str);
3380 
3381 	if (p == NULL)
3382 		return -1;
3383 
3384 	opts->synth = parse_synth_opt(p);
3385 	free(p);
3386 
3387 	if (opts->synth < 0) {
3388 		pr_err("Invalid synth option: %s\n", str);
3389 		return -1;
3390 	}
3391 	return 0;
3392 }
3393 
3394 /*
3395  * XXX Ideally would be local to cmd_record() and passed to a record__new
3396  * because we need to have access to it in record__exit, that is called
3397  * after cmd_record() exits, but since record_options need to be accessible to
3398  * builtin-script, leave it here.
3399  *
3400  * At least we don't ouch it in all the other functions here directly.
3401  *
3402  * Just say no to tons of global variables, sigh.
3403  */
3404 static struct record record = {
3405 	.opts = {
3406 		.sample_time	     = true,
3407 		.mmap_pages	     = UINT_MAX,
3408 		.user_freq	     = UINT_MAX,
3409 		.user_interval	     = ULLONG_MAX,
3410 		.freq		     = 4000,
3411 		.target		     = {
3412 			.uses_mmap   = true,
3413 			.default_per_cpu = true,
3414 		},
3415 		.mmap_flush          = MMAP_FLUSH_DEFAULT,
3416 		.nr_threads_synthesize = 1,
3417 		.ctl_fd              = -1,
3418 		.ctl_fd_ack          = -1,
3419 		.synth               = PERF_SYNTH_ALL,
3420 		.off_cpu_thresh_ns   = OFFCPU_THRESH,
3421 	},
3422 	.buildid_mmap = true,
3423 };
3424 
3425 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
3426 	"\n\t\t\t\tDefault: fp";
3427 
3428 static bool dry_run;
3429 
3430 static struct parse_events_option_args parse_events_option_args = {
3431 	.evlistp = &record.evlist,
3432 };
3433 
3434 static struct parse_events_option_args switch_output_parse_events_option_args = {
3435 	.evlistp = &record.sb_evlist,
3436 };
3437 
3438 /*
3439  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
3440  * with it and switch to use the library functions in perf_evlist that came
3441  * from builtin-record.c, i.e. use record_opts,
3442  * evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
3443  * using pipes, etc.
3444  */
3445 static struct option __record_options[] = {
3446 	OPT_CALLBACK('e', "event", &parse_events_option_args, "event",
3447 		     "event selector. use 'perf list' to list available events",
3448 		     parse_events_option),
3449 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
3450 		     "event filter", parse_filter),
3451 	OPT_BOOLEAN(0, "latency", &record.latency,
3452 		    "Enable data collection for latency profiling.\n"
3453 		    "\t\t\t  Use perf report --latency for latency-centric profile."),
3454 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
3455 			   NULL, "don't record events from perf itself",
3456 			   exclude_perf),
3457 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
3458 		    "record events on existing process id"),
3459 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
3460 		    "record events on existing thread id"),
3461 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
3462 		    "collect data with this RT SCHED_FIFO priority"),
3463 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
3464 		    "collect data without buffering"),
3465 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
3466 		    "collect raw sample records from all opened counters"),
3467 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
3468 			    "system-wide collection from all CPUs"),
3469 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
3470 		    "list of cpus to monitor"),
3471 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
3472 	OPT_STRING('o', "output", &record.data.path, "file",
3473 		    "output file name"),
3474 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
3475 			&record.opts.no_inherit_set,
3476 			"child tasks do not inherit counters"),
3477 	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
3478 		    "synthesize non-sample events at the end of output"),
3479 	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
3480 	OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"),
3481 	OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
3482 		    "Fail if the specified frequency can't be used"),
3483 	OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
3484 		     "profile at this frequency",
3485 		      record__parse_freq),
3486 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
3487 		     "number of mmap data pages and AUX area tracing mmap pages",
3488 		     record__parse_mmap_pages),
3489 	OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
3490 		     "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
3491 		     record__mmap_flush_parse),
3492 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
3493 			   NULL, "enables call-graph recording" ,
3494 			   &record_callchain_opt),
3495 	OPT_CALLBACK(0, "call-graph", &record.opts,
3496 		     "record_mode[,record_size]", record_callchain_help,
3497 		     &record_parse_callchain_opt),
3498 	OPT_INCR('v', "verbose", &verbose,
3499 		    "be more verbose (show counter open errors, etc)"),
3500 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"),
3501 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
3502 		    "per thread counts"),
3503 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
3504 	OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
3505 		    "Record the sample physical addresses"),
3506 	OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size,
3507 		    "Record the sampled data address data page size"),
3508 	OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size,
3509 		    "Record the sampled code address (ip) page size"),
3510 	OPT_BOOLEAN(0, "sample-mem-info", &record.opts.sample_data_src,
3511 		    "Record the data source for memory operations"),
3512 	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
3513 	OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier,
3514 		    "Record the sample identifier"),
3515 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
3516 			&record.opts.sample_time_set,
3517 			"Record the sample timestamps"),
3518 	OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
3519 			"Record the sample period"),
3520 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
3521 		    "don't sample"),
3522 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
3523 			&record.no_buildid_cache_set,
3524 			"do not update the buildid cache"),
3525 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
3526 			&record.no_buildid_set,
3527 			"do not collect buildids in perf.data"),
3528 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
3529 		     "monitor event in cgroup name only",
3530 		     parse_cgroups),
3531 	OPT_CALLBACK('D', "delay", &record, "ms",
3532 		     "ms to wait before starting measurement after program start (-1: start with events disabled), "
3533 		     "or ranges of time to enable events e.g. '-D 10-20,30-40'",
3534 		     record__parse_event_enable_time),
3535 	OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
3536 	OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"),
3537 
3538 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
3539 		     "branch any", "sample any taken branches",
3540 		     parse_branch_stack),
3541 
3542 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
3543 		     "branch filter mask", "branch stack filter modes",
3544 		     parse_branch_stack),
3545 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
3546 		    "sample by weight (on special events only)"),
3547 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
3548 		    "sample transaction flags (special events only)"),
3549 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
3550 		    "use per-thread mmaps"),
3551 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
3552 		    "sample selected machine registers on interrupt,"
3553 		    " use '-I?' to list register names", parse_intr_regs),
3554 	OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
3555 		    "sample selected machine registers in user space,"
3556 		    " use '--user-regs=?' to list register names", parse_user_regs),
3557 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
3558 		    "Record running/enabled time of read (:S) events"),
3559 	OPT_CALLBACK('k', "clockid", &record.opts,
3560 	"clockid", "clockid to use for events, see clock_gettime()",
3561 	parse_clockid),
3562 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
3563 			  "opts", "AUX area tracing Snapshot Mode", ""),
3564 	OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
3565 			  "opts", "sample AUX area", ""),
3566 	OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
3567 			"per thread proc mmap processing timeout in ms"),
3568 	OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
3569 		    "Record namespaces events"),
3570 	OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
3571 		    "Record cgroup events"),
3572 	OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events,
3573 			&record.opts.record_switch_events_set,
3574 			"Record context switch events"),
3575 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
3576 			 "Configure all used events to run in kernel space.",
3577 			 PARSE_OPT_EXCLUSIVE),
3578 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
3579 			 "Configure all used events to run in user space.",
3580 			 PARSE_OPT_EXCLUSIVE),
3581 	OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
3582 		    "collect kernel callchains"),
3583 	OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
3584 		    "collect user callchains"),
3585 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
3586 		   "file", "vmlinux pathname"),
3587 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
3588 		    "Record build-id of all DSOs regardless of hits"),
3589 	OPT_BOOLEAN_SET(0, "buildid-mmap", &record.buildid_mmap, &record.buildid_mmap_set,
3590 			"Record build-id in mmap events and skip build-id processing."),
3591 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
3592 		    "append timestamp to output filename"),
3593 	OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
3594 		    "Record timestamp boundary (time of first/last samples)"),
3595 	OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
3596 			  &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
3597 			  "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
3598 			  "signal"),
3599 	OPT_CALLBACK_SET(0, "switch-output-event", &switch_output_parse_events_option_args,
3600 			 &record.switch_output_event_set, "switch output event",
3601 			 "switch output event selector. use 'perf list' to list available events",
3602 			 parse_events_option_new_evlist),
3603 	OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
3604 		   "Limit number of switch output generated files"),
3605 	OPT_BOOLEAN(0, "dry-run", &dry_run,
3606 		    "Parse options then exit"),
3607 #ifdef HAVE_AIO_SUPPORT
3608 	OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
3609 		     &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
3610 		     record__aio_parse),
3611 #endif
3612 	OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
3613 		     "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
3614 		     record__parse_affinity),
3615 #ifdef HAVE_ZSTD_SUPPORT
3616 	OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n",
3617 			    "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
3618 			    record__parse_comp_level),
3619 #endif
3620 	OPT_CALLBACK(0, "max-size", &record.output_max_size,
3621 		     "size", "Limit the maximum size of the output file", parse_output_max_size),
3622 	OPT_UINTEGER(0, "num-thread-synthesize",
3623 		     &record.opts.nr_threads_synthesize,
3624 		     "number of threads to run for event synthesis"),
3625 #ifdef HAVE_LIBPFM
3626 	OPT_CALLBACK(0, "pfm-events", &record.evlist, "event",
3627 		"libpfm4 event selector. use 'perf list' to list available events",
3628 		parse_libpfm_events_option),
3629 #endif
3630 	OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
3631 		     "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n"
3632 		     "\t\t\t  'snapshot': AUX area tracing snapshot).\n"
3633 		     "\t\t\t  Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
3634 		     "\t\t\t  Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
3635 		      parse_control_option),
3636 	OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup",
3637 		     "Fine-tune event synthesis: default=all", parse_record_synth_option),
3638 	OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls,
3639 			  &record.debuginfod.set, "debuginfod urls",
3640 			  "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
3641 			  "system"),
3642 	OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec",
3643 			    "write collected trace data into several data files using parallel threads",
3644 			    record__parse_threads),
3645 	OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"),
3646 	OPT_STRING(0, "setup-filter", &record.filter_action, "pin|unpin",
3647 		   "BPF filter action"),
3648 	OPT_CALLBACK(0, "off-cpu-thresh", &record.opts, "ms",
3649 		     "Dump off-cpu samples if off-cpu time exceeds this threshold (in milliseconds). (Default: 500ms)",
3650 		     record__parse_off_cpu_thresh),
3651 	OPT_END()
3652 };
3653 
3654 struct option *record_options = __record_options;
3655 
record__mmap_cpu_mask_init(struct mmap_cpu_mask * mask,struct perf_cpu_map * cpus)3656 static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus)
3657 {
3658 	struct perf_cpu cpu;
3659 	int idx;
3660 
3661 	if (cpu_map__is_dummy(cpus))
3662 		return 0;
3663 
3664 	perf_cpu_map__for_each_cpu_skip_any(cpu, idx, cpus) {
3665 		/* Return ENODEV is input cpu is greater than max cpu */
3666 		if ((unsigned long)cpu.cpu > mask->nbits)
3667 			return -ENODEV;
3668 		__set_bit(cpu.cpu, mask->bits);
3669 	}
3670 
3671 	return 0;
3672 }
3673 
record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask * mask,const char * mask_spec)3674 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec)
3675 {
3676 	struct perf_cpu_map *cpus;
3677 
3678 	cpus = perf_cpu_map__new(mask_spec);
3679 	if (!cpus)
3680 		return -ENOMEM;
3681 
3682 	bitmap_zero(mask->bits, mask->nbits);
3683 	if (record__mmap_cpu_mask_init(mask, cpus))
3684 		return -ENODEV;
3685 
3686 	perf_cpu_map__put(cpus);
3687 
3688 	return 0;
3689 }
3690 
record__free_thread_masks(struct record * rec,int nr_threads)3691 static void record__free_thread_masks(struct record *rec, int nr_threads)
3692 {
3693 	int t;
3694 
3695 	if (rec->thread_masks)
3696 		for (t = 0; t < nr_threads; t++)
3697 			record__thread_mask_free(&rec->thread_masks[t]);
3698 
3699 	zfree(&rec->thread_masks);
3700 }
3701 
record__alloc_thread_masks(struct record * rec,int nr_threads,int nr_bits)3702 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits)
3703 {
3704 	int t, ret;
3705 
3706 	rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks)));
3707 	if (!rec->thread_masks) {
3708 		pr_err("Failed to allocate thread masks\n");
3709 		return -ENOMEM;
3710 	}
3711 
3712 	for (t = 0; t < nr_threads; t++) {
3713 		ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits);
3714 		if (ret) {
3715 			pr_err("Failed to allocate thread masks[%d]\n", t);
3716 			goto out_free;
3717 		}
3718 	}
3719 
3720 	return 0;
3721 
3722 out_free:
3723 	record__free_thread_masks(rec, nr_threads);
3724 
3725 	return ret;
3726 }
3727 
record__init_thread_cpu_masks(struct record * rec,struct perf_cpu_map * cpus)3728 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus)
3729 {
3730 	int t, ret, nr_cpus = perf_cpu_map__nr(cpus);
3731 
3732 	ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu);
3733 	if (ret)
3734 		return ret;
3735 
3736 	rec->nr_threads = nr_cpus;
3737 	pr_debug("nr_threads: %d\n", rec->nr_threads);
3738 
3739 	for (t = 0; t < rec->nr_threads; t++) {
3740 		__set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits);
3741 		__set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits);
3742 		if (verbose > 0) {
3743 			pr_debug("thread_masks[%d]: ", t);
3744 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3745 			pr_debug("thread_masks[%d]: ", t);
3746 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3747 		}
3748 	}
3749 
3750 	return 0;
3751 }
3752 
record__init_thread_masks_spec(struct record * rec,struct perf_cpu_map * cpus,const char ** maps_spec,const char ** affinity_spec,u32 nr_spec)3753 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus,
3754 					  const char **maps_spec, const char **affinity_spec,
3755 					  u32 nr_spec)
3756 {
3757 	u32 s;
3758 	int ret = 0, t = 0;
3759 	struct mmap_cpu_mask cpus_mask;
3760 	struct thread_mask thread_mask, full_mask, *thread_masks;
3761 
3762 	ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu);
3763 	if (ret) {
3764 		pr_err("Failed to allocate CPUs mask\n");
3765 		return ret;
3766 	}
3767 
3768 	ret = record__mmap_cpu_mask_init(&cpus_mask, cpus);
3769 	if (ret) {
3770 		pr_err("Failed to init cpu mask\n");
3771 		goto out_free_cpu_mask;
3772 	}
3773 
3774 	ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu);
3775 	if (ret) {
3776 		pr_err("Failed to allocate full mask\n");
3777 		goto out_free_cpu_mask;
3778 	}
3779 
3780 	ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3781 	if (ret) {
3782 		pr_err("Failed to allocate thread mask\n");
3783 		goto out_free_full_and_cpu_masks;
3784 	}
3785 
3786 	for (s = 0; s < nr_spec; s++) {
3787 		ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]);
3788 		if (ret) {
3789 			pr_err("Failed to initialize maps thread mask\n");
3790 			goto out_free;
3791 		}
3792 		ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]);
3793 		if (ret) {
3794 			pr_err("Failed to initialize affinity thread mask\n");
3795 			goto out_free;
3796 		}
3797 
3798 		/* ignore invalid CPUs but do not allow empty masks */
3799 		if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits,
3800 				cpus_mask.bits, thread_mask.maps.nbits)) {
3801 			pr_err("Empty maps mask: %s\n", maps_spec[s]);
3802 			ret = -EINVAL;
3803 			goto out_free;
3804 		}
3805 		if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits,
3806 				cpus_mask.bits, thread_mask.affinity.nbits)) {
3807 			pr_err("Empty affinity mask: %s\n", affinity_spec[s]);
3808 			ret = -EINVAL;
3809 			goto out_free;
3810 		}
3811 
3812 		/* do not allow intersection with other masks (full_mask) */
3813 		if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits,
3814 				      thread_mask.maps.nbits)) {
3815 			pr_err("Intersecting maps mask: %s\n", maps_spec[s]);
3816 			ret = -EINVAL;
3817 			goto out_free;
3818 		}
3819 		if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits,
3820 				      thread_mask.affinity.nbits)) {
3821 			pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]);
3822 			ret = -EINVAL;
3823 			goto out_free;
3824 		}
3825 
3826 		bitmap_or(full_mask.maps.bits, full_mask.maps.bits,
3827 			  thread_mask.maps.bits, full_mask.maps.nbits);
3828 		bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits,
3829 			  thread_mask.affinity.bits, full_mask.maps.nbits);
3830 
3831 		thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask));
3832 		if (!thread_masks) {
3833 			pr_err("Failed to reallocate thread masks\n");
3834 			ret = -ENOMEM;
3835 			goto out_free;
3836 		}
3837 		rec->thread_masks = thread_masks;
3838 		rec->thread_masks[t] = thread_mask;
3839 		if (verbose > 0) {
3840 			pr_debug("thread_masks[%d]: ", t);
3841 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3842 			pr_debug("thread_masks[%d]: ", t);
3843 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3844 		}
3845 		t++;
3846 		ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3847 		if (ret) {
3848 			pr_err("Failed to allocate thread mask\n");
3849 			goto out_free_full_and_cpu_masks;
3850 		}
3851 	}
3852 	rec->nr_threads = t;
3853 	pr_debug("nr_threads: %d\n", rec->nr_threads);
3854 	if (!rec->nr_threads)
3855 		ret = -EINVAL;
3856 
3857 out_free:
3858 	record__thread_mask_free(&thread_mask);
3859 out_free_full_and_cpu_masks:
3860 	record__thread_mask_free(&full_mask);
3861 out_free_cpu_mask:
3862 	record__mmap_cpu_mask_free(&cpus_mask);
3863 
3864 	return ret;
3865 }
3866 
record__init_thread_core_masks(struct record * rec,struct perf_cpu_map * cpus)3867 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus)
3868 {
3869 	int ret;
3870 	struct cpu_topology *topo;
3871 
3872 	topo = cpu_topology__new();
3873 	if (!topo) {
3874 		pr_err("Failed to allocate CPU topology\n");
3875 		return -ENOMEM;
3876 	}
3877 
3878 	ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list,
3879 					     topo->core_cpus_list, topo->core_cpus_lists);
3880 	cpu_topology__delete(topo);
3881 
3882 	return ret;
3883 }
3884 
record__init_thread_package_masks(struct record * rec,struct perf_cpu_map * cpus)3885 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus)
3886 {
3887 	int ret;
3888 	struct cpu_topology *topo;
3889 
3890 	topo = cpu_topology__new();
3891 	if (!topo) {
3892 		pr_err("Failed to allocate CPU topology\n");
3893 		return -ENOMEM;
3894 	}
3895 
3896 	ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list,
3897 					     topo->package_cpus_list, topo->package_cpus_lists);
3898 	cpu_topology__delete(topo);
3899 
3900 	return ret;
3901 }
3902 
record__init_thread_numa_masks(struct record * rec,struct perf_cpu_map * cpus)3903 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus)
3904 {
3905 	u32 s;
3906 	int ret;
3907 	const char **spec;
3908 	struct numa_topology *topo;
3909 
3910 	topo = numa_topology__new();
3911 	if (!topo) {
3912 		pr_err("Failed to allocate NUMA topology\n");
3913 		return -ENOMEM;
3914 	}
3915 
3916 	spec = zalloc(topo->nr * sizeof(char *));
3917 	if (!spec) {
3918 		pr_err("Failed to allocate NUMA spec\n");
3919 		ret = -ENOMEM;
3920 		goto out_delete_topo;
3921 	}
3922 	for (s = 0; s < topo->nr; s++)
3923 		spec[s] = topo->nodes[s].cpus;
3924 
3925 	ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr);
3926 
3927 	zfree(&spec);
3928 
3929 out_delete_topo:
3930 	numa_topology__delete(topo);
3931 
3932 	return ret;
3933 }
3934 
record__init_thread_user_masks(struct record * rec,struct perf_cpu_map * cpus)3935 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus)
3936 {
3937 	int t, ret;
3938 	u32 s, nr_spec = 0;
3939 	char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec;
3940 	char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL;
3941 
3942 	for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) {
3943 		spec = strtok_r(user_spec, ":", &spec_ptr);
3944 		if (spec == NULL)
3945 			break;
3946 		pr_debug2("threads_spec[%d]: %s\n", t, spec);
3947 		mask = strtok_r(spec, "/", &mask_ptr);
3948 		if (mask == NULL)
3949 			break;
3950 		pr_debug2("  maps mask: %s\n", mask);
3951 		tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *));
3952 		if (!tmp_spec) {
3953 			pr_err("Failed to reallocate maps spec\n");
3954 			ret = -ENOMEM;
3955 			goto out_free;
3956 		}
3957 		maps_spec = tmp_spec;
3958 		maps_spec[nr_spec] = dup_mask = strdup(mask);
3959 		if (!maps_spec[nr_spec]) {
3960 			pr_err("Failed to allocate maps spec[%d]\n", nr_spec);
3961 			ret = -ENOMEM;
3962 			goto out_free;
3963 		}
3964 		mask = strtok_r(NULL, "/", &mask_ptr);
3965 		if (mask == NULL) {
3966 			pr_err("Invalid thread maps or affinity specs\n");
3967 			ret = -EINVAL;
3968 			goto out_free;
3969 		}
3970 		pr_debug2("  affinity mask: %s\n", mask);
3971 		tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *));
3972 		if (!tmp_spec) {
3973 			pr_err("Failed to reallocate affinity spec\n");
3974 			ret = -ENOMEM;
3975 			goto out_free;
3976 		}
3977 		affinity_spec = tmp_spec;
3978 		affinity_spec[nr_spec] = strdup(mask);
3979 		if (!affinity_spec[nr_spec]) {
3980 			pr_err("Failed to allocate affinity spec[%d]\n", nr_spec);
3981 			ret = -ENOMEM;
3982 			goto out_free;
3983 		}
3984 		dup_mask = NULL;
3985 		nr_spec++;
3986 	}
3987 
3988 	ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec,
3989 					     (const char **)affinity_spec, nr_spec);
3990 
3991 out_free:
3992 	free(dup_mask);
3993 	for (s = 0; s < nr_spec; s++) {
3994 		if (maps_spec)
3995 			free(maps_spec[s]);
3996 		if (affinity_spec)
3997 			free(affinity_spec[s]);
3998 	}
3999 	free(affinity_spec);
4000 	free(maps_spec);
4001 
4002 	return ret;
4003 }
4004 
record__init_thread_default_masks(struct record * rec,struct perf_cpu_map * cpus)4005 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus)
4006 {
4007 	int ret;
4008 
4009 	ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu);
4010 	if (ret)
4011 		return ret;
4012 
4013 	if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus))
4014 		return -ENODEV;
4015 
4016 	rec->nr_threads = 1;
4017 
4018 	return 0;
4019 }
4020 
record__init_thread_masks(struct record * rec)4021 static int record__init_thread_masks(struct record *rec)
4022 {
4023 	int ret = 0;
4024 	struct perf_cpu_map *cpus = rec->evlist->core.all_cpus;
4025 
4026 	if (!record__threads_enabled(rec))
4027 		return record__init_thread_default_masks(rec, cpus);
4028 
4029 	if (evlist__per_thread(rec->evlist)) {
4030 		pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n");
4031 		return -EINVAL;
4032 	}
4033 
4034 	switch (rec->opts.threads_spec) {
4035 	case THREAD_SPEC__CPU:
4036 		ret = record__init_thread_cpu_masks(rec, cpus);
4037 		break;
4038 	case THREAD_SPEC__CORE:
4039 		ret = record__init_thread_core_masks(rec, cpus);
4040 		break;
4041 	case THREAD_SPEC__PACKAGE:
4042 		ret = record__init_thread_package_masks(rec, cpus);
4043 		break;
4044 	case THREAD_SPEC__NUMA:
4045 		ret = record__init_thread_numa_masks(rec, cpus);
4046 		break;
4047 	case THREAD_SPEC__USER:
4048 		ret = record__init_thread_user_masks(rec, cpus);
4049 		break;
4050 	default:
4051 		break;
4052 	}
4053 
4054 	return ret;
4055 }
4056 
cmd_record(int argc,const char ** argv)4057 int cmd_record(int argc, const char **argv)
4058 {
4059 	int err;
4060 	struct record *rec = &record;
4061 	char errbuf[BUFSIZ];
4062 
4063 	setlocale(LC_ALL, "");
4064 
4065 #ifndef HAVE_BPF_SKEL
4066 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c)
4067 	set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true);
4068 # undef set_nobuild
4069 #endif
4070 
4071 	/* Disable eager loading of kernel symbols that adds overhead to perf record. */
4072 	symbol_conf.lazy_load_kernel_maps = true;
4073 	rec->opts.affinity = PERF_AFFINITY_SYS;
4074 
4075 	rec->evlist = evlist__new();
4076 	if (rec->evlist == NULL)
4077 		return -ENOMEM;
4078 
4079 	err = perf_config(perf_record_config, rec);
4080 	if (err)
4081 		return err;
4082 
4083 	argc = parse_options(argc, argv, record_options, record_usage,
4084 			    PARSE_OPT_STOP_AT_NON_OPTION);
4085 	if (quiet)
4086 		perf_quiet_option();
4087 
4088 	err = symbol__validate_sym_arguments();
4089 	if (err)
4090 		return err;
4091 
4092 	perf_debuginfod_setup(&record.debuginfod);
4093 
4094 	/* Make system wide (-a) the default target. */
4095 	if (!argc && target__none(&rec->opts.target))
4096 		rec->opts.target.system_wide = true;
4097 
4098 	if (nr_cgroups && !rec->opts.target.system_wide) {
4099 		usage_with_options_msg(record_usage, record_options,
4100 			"cgroup monitoring only available in system-wide mode");
4101 
4102 	}
4103 
4104 	if (record.latency) {
4105 		/*
4106 		 * There is no fundamental reason why latency profiling
4107 		 * can't work for system-wide mode, but exact semantics
4108 		 * and details are to be defined.
4109 		 * See the following thread for details:
4110 		 * https://lore.kernel.org/all/Z4XDJyvjiie3howF@google.com/
4111 		 */
4112 		if (record.opts.target.system_wide) {
4113 			pr_err("Failed: latency profiling is not supported with system-wide collection.\n");
4114 			err = -EINVAL;
4115 			goto out_opts;
4116 		}
4117 		record.opts.record_switch_events = true;
4118 	}
4119 
4120 	if (!rec->buildid_mmap) {
4121 		pr_debug("Disabling build id in synthesized mmap2 events.\n");
4122 		symbol_conf.no_buildid_mmap2 = true;
4123 	} else if (rec->buildid_mmap_set) {
4124 		/*
4125 		 * Explicitly passing --buildid-mmap disables buildid processing
4126 		 * and cache generation.
4127 		 */
4128 		rec->no_buildid = true;
4129 	}
4130 	if (rec->buildid_mmap && !perf_can_record_build_id()) {
4131 		pr_warning("Missing support for build id in kernel mmap events.\n"
4132 			   "Disable this warning with --no-buildid-mmap\n");
4133 		rec->buildid_mmap = false;
4134 	}
4135 	if (rec->buildid_mmap) {
4136 		/* Enable perf_event_attr::build_id bit. */
4137 		rec->opts.build_id = true;
4138 	}
4139 
4140 	if (rec->opts.record_cgroup && !perf_can_record_cgroup()) {
4141 		pr_err("Kernel has no cgroup sampling support.\n");
4142 		err = -EINVAL;
4143 		goto out_opts;
4144 	}
4145 
4146 	if (rec->opts.kcore)
4147 		rec->opts.text_poke = true;
4148 
4149 	if (rec->opts.kcore || record__threads_enabled(rec))
4150 		rec->data.is_dir = true;
4151 
4152 	if (record__threads_enabled(rec)) {
4153 		if (rec->opts.affinity != PERF_AFFINITY_SYS) {
4154 			pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n");
4155 			goto out_opts;
4156 		}
4157 		if (record__aio_enabled(rec)) {
4158 			pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n");
4159 			goto out_opts;
4160 		}
4161 	}
4162 
4163 	if (rec->opts.comp_level != 0) {
4164 		pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
4165 		rec->no_buildid = true;
4166 	}
4167 
4168 	if (rec->opts.record_switch_events &&
4169 	    !perf_can_record_switch_events()) {
4170 		ui__error("kernel does not support recording context switch events\n");
4171 		parse_options_usage(record_usage, record_options, "switch-events", 0);
4172 		err = -EINVAL;
4173 		goto out_opts;
4174 	}
4175 
4176 	if (switch_output_setup(rec)) {
4177 		parse_options_usage(record_usage, record_options, "switch-output", 0);
4178 		err = -EINVAL;
4179 		goto out_opts;
4180 	}
4181 
4182 	if (rec->switch_output.time) {
4183 		signal(SIGALRM, alarm_sig_handler);
4184 		alarm(rec->switch_output.time);
4185 	}
4186 
4187 	if (rec->switch_output.num_files) {
4188 		rec->switch_output.filenames = calloc(rec->switch_output.num_files,
4189 						      sizeof(char *));
4190 		if (!rec->switch_output.filenames) {
4191 			err = -EINVAL;
4192 			goto out_opts;
4193 		}
4194 	}
4195 
4196 	if (rec->timestamp_filename && record__threads_enabled(rec)) {
4197 		rec->timestamp_filename = false;
4198 		pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n");
4199 	}
4200 
4201 	if (rec->filter_action) {
4202 		if (!strcmp(rec->filter_action, "pin"))
4203 			err = perf_bpf_filter__pin();
4204 		else if (!strcmp(rec->filter_action, "unpin"))
4205 			err = perf_bpf_filter__unpin();
4206 		else {
4207 			pr_warning("Unknown BPF filter action: %s\n", rec->filter_action);
4208 			err = -EINVAL;
4209 		}
4210 		goto out_opts;
4211 	}
4212 
4213 	/* For backward compatibility, -d implies --mem-info */
4214 	if (rec->opts.sample_address)
4215 		rec->opts.sample_data_src = true;
4216 
4217 	/*
4218 	 * Allow aliases to facilitate the lookup of symbols for address
4219 	 * filters. Refer to auxtrace_parse_filters().
4220 	 */
4221 	symbol_conf.allow_aliases = true;
4222 
4223 	symbol__init(NULL);
4224 
4225 	err = record__auxtrace_init(rec);
4226 	if (err)
4227 		goto out;
4228 
4229 	if (dry_run)
4230 		goto out;
4231 
4232 	err = -ENOMEM;
4233 
4234 	if (rec->no_buildid_cache || rec->no_buildid) {
4235 		disable_buildid_cache();
4236 	} else if (rec->switch_output.enabled) {
4237 		/*
4238 		 * In 'perf record --switch-output', disable buildid
4239 		 * generation by default to reduce data file switching
4240 		 * overhead. Still generate buildid if they are required
4241 		 * explicitly using
4242 		 *
4243 		 *  perf record --switch-output --no-no-buildid \
4244 		 *              --no-no-buildid-cache
4245 		 *
4246 		 * Following code equals to:
4247 		 *
4248 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
4249 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
4250 		 *         disable_buildid_cache();
4251 		 */
4252 		bool disable = true;
4253 
4254 		if (rec->no_buildid_set && !rec->no_buildid)
4255 			disable = false;
4256 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
4257 			disable = false;
4258 		if (disable) {
4259 			rec->no_buildid = true;
4260 			rec->no_buildid_cache = true;
4261 			disable_buildid_cache();
4262 		}
4263 	}
4264 
4265 	if (record.opts.overwrite)
4266 		record.opts.tail_synthesize = true;
4267 
4268 	if (rec->evlist->core.nr_entries == 0) {
4269 		err = parse_event(rec->evlist, "cycles:P");
4270 		if (err)
4271 			goto out;
4272 	}
4273 
4274 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
4275 		rec->opts.no_inherit = true;
4276 
4277 	err = target__validate(&rec->opts.target);
4278 	if (err) {
4279 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
4280 		ui__warning("%s\n", errbuf);
4281 	}
4282 
4283 	if (rec->uid_str) {
4284 		uid_t uid = parse_uid(rec->uid_str);
4285 
4286 		if (uid == UINT_MAX) {
4287 			ui__error("Invalid User: %s", rec->uid_str);
4288 			err = -EINVAL;
4289 			goto out;
4290 		}
4291 		err = parse_uid_filter(rec->evlist, uid);
4292 		if (err)
4293 			goto out;
4294 
4295 		/* User ID filtering implies system wide. */
4296 		rec->opts.target.system_wide = true;
4297 	}
4298 
4299 	/* Enable ignoring missing threads when -p option is defined. */
4300 	rec->opts.ignore_missing_thread = rec->opts.target.pid;
4301 
4302 	evlist__warn_user_requested_cpus(rec->evlist, rec->opts.target.cpu_list);
4303 
4304 	if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
4305 		arch__add_leaf_frame_record_opts(&rec->opts);
4306 
4307 	err = -ENOMEM;
4308 	if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) {
4309 		if (rec->opts.target.pid != NULL) {
4310 			pr_err("Couldn't create thread/CPU maps: %s\n",
4311 				errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf)));
4312 			goto out;
4313 		}
4314 		else
4315 			usage_with_options(record_usage, record_options);
4316 	}
4317 
4318 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
4319 	if (err)
4320 		goto out;
4321 
4322 	/*
4323 	 * We take all buildids when the file contains
4324 	 * AUX area tracing data because we do not decode the
4325 	 * trace because it would take too long.
4326 	 */
4327 	if (rec->opts.full_auxtrace)
4328 		rec->buildid_all = true;
4329 
4330 	if (rec->opts.text_poke) {
4331 		err = record__config_text_poke(rec->evlist);
4332 		if (err) {
4333 			pr_err("record__config_text_poke failed, error %d\n", err);
4334 			goto out;
4335 		}
4336 	}
4337 
4338 	if (rec->off_cpu) {
4339 		err = record__config_off_cpu(rec);
4340 		if (err) {
4341 			pr_err("record__config_off_cpu failed, error %d\n", err);
4342 			goto out;
4343 		}
4344 	}
4345 
4346 	if (record_opts__config(&rec->opts)) {
4347 		err = -EINVAL;
4348 		goto out;
4349 	}
4350 
4351 	err = record__config_tracking_events(rec);
4352 	if (err) {
4353 		pr_err("record__config_tracking_events failed, error %d\n", err);
4354 		goto out;
4355 	}
4356 
4357 	err = record__init_thread_masks(rec);
4358 	if (err) {
4359 		pr_err("Failed to initialize parallel data streaming masks\n");
4360 		goto out;
4361 	}
4362 
4363 	if (rec->opts.nr_cblocks > nr_cblocks_max)
4364 		rec->opts.nr_cblocks = nr_cblocks_max;
4365 	pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
4366 
4367 	pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
4368 	pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
4369 
4370 	if (rec->opts.comp_level > comp_level_max)
4371 		rec->opts.comp_level = comp_level_max;
4372 	pr_debug("comp level: %d\n", rec->opts.comp_level);
4373 
4374 	err = __cmd_record(&record, argc, argv);
4375 out:
4376 	record__free_thread_masks(rec, rec->nr_threads);
4377 	rec->nr_threads = 0;
4378 	symbol__exit();
4379 	auxtrace_record__free(rec->itr);
4380 out_opts:
4381 	evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close);
4382 	evlist__delete(rec->evlist);
4383 	return err;
4384 }
4385 
snapshot_sig_handler(int sig __maybe_unused)4386 static void snapshot_sig_handler(int sig __maybe_unused)
4387 {
4388 	struct record *rec = &record;
4389 
4390 	hit_auxtrace_snapshot_trigger(rec);
4391 
4392 	if (switch_output_signal(rec))
4393 		trigger_hit(&switch_output_trigger);
4394 }
4395 
alarm_sig_handler(int sig __maybe_unused)4396 static void alarm_sig_handler(int sig __maybe_unused)
4397 {
4398 	struct record *rec = &record;
4399 
4400 	if (switch_output_time(rec))
4401 		trigger_hit(&switch_output_trigger);
4402 }
4403