xref: /linux/tools/perf/builtin-record.c (revision f4f346c3465949ebba80c6cc52cd8d2eeaa545fd)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10 
11 #include "util/build-id.h"
12 #include <subcmd/parse-options.h>
13 #include <internal/xyarray.h>
14 #include "util/parse-events.h"
15 #include "util/config.h"
16 
17 #include "util/callchain.h"
18 #include "util/cgroup.h"
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/mmap.h"
25 #include "util/mutex.h"
26 #include "util/target.h"
27 #include "util/session.h"
28 #include "util/tool.h"
29 #include "util/stat.h"
30 #include "util/symbol.h"
31 #include "util/record.h"
32 #include "util/cpumap.h"
33 #include "util/thread_map.h"
34 #include "util/data.h"
35 #include "util/perf_regs.h"
36 #include "util/auxtrace.h"
37 #include "util/tsc.h"
38 #include "util/parse-branch-options.h"
39 #include "util/parse-regs-options.h"
40 #include "util/perf_api_probe.h"
41 #include "util/trigger.h"
42 #include "util/perf-hooks.h"
43 #include "util/cpu-set-sched.h"
44 #include "util/synthetic-events.h"
45 #include "util/time-utils.h"
46 #include "util/units.h"
47 #include "util/bpf-event.h"
48 #include "util/util.h"
49 #include "util/pfm.h"
50 #include "util/pmu.h"
51 #include "util/pmus.h"
52 #include "util/clockid.h"
53 #include "util/off_cpu.h"
54 #include "util/bpf-filter.h"
55 #include "util/strbuf.h"
56 #include "asm/bug.h"
57 #include "perf.h"
58 #include "cputopo.h"
59 
60 #include <errno.h>
61 #include <inttypes.h>
62 #include <locale.h>
63 #include <poll.h>
64 #include <pthread.h>
65 #include <unistd.h>
66 #ifndef HAVE_GETTID
67 #include <syscall.h>
68 #endif
69 #include <sched.h>
70 #include <signal.h>
71 #ifdef HAVE_EVENTFD_SUPPORT
72 #include <sys/eventfd.h>
73 #endif
74 #include <sys/mman.h>
75 #include <sys/wait.h>
76 #include <sys/types.h>
77 #include <sys/stat.h>
78 #include <fcntl.h>
79 #include <linux/err.h>
80 #include <linux/string.h>
81 #include <linux/time64.h>
82 #include <linux/zalloc.h>
83 #include <linux/bitmap.h>
84 #include <sys/time.h>
85 
86 struct switch_output {
87 	bool		 enabled;
88 	bool		 signal;
89 	unsigned long	 size;
90 	unsigned long	 time;
91 	const char	*str;
92 	bool		 set;
93 	char		 **filenames;
94 	int		 num_files;
95 	int		 cur_file;
96 };
97 
98 struct thread_mask {
99 	struct mmap_cpu_mask	maps;
100 	struct mmap_cpu_mask	affinity;
101 };
102 
103 struct record_thread {
104 	pid_t			tid;
105 	struct thread_mask	*mask;
106 	struct {
107 		int		msg[2];
108 		int		ack[2];
109 	} pipes;
110 	struct fdarray		pollfd;
111 	int			ctlfd_pos;
112 	int			nr_mmaps;
113 	struct mmap		**maps;
114 	struct mmap		**overwrite_maps;
115 	struct record		*rec;
116 	unsigned long long	samples;
117 	unsigned long		waking;
118 	u64			bytes_written;
119 	u64			bytes_transferred;
120 	u64			bytes_compressed;
121 };
122 
123 static __thread struct record_thread *thread;
124 
125 enum thread_msg {
126 	THREAD_MSG__UNDEFINED = 0,
127 	THREAD_MSG__READY,
128 	THREAD_MSG__MAX,
129 };
130 
131 static const char *thread_msg_tags[THREAD_MSG__MAX] = {
132 	"UNDEFINED", "READY"
133 };
134 
135 enum thread_spec {
136 	THREAD_SPEC__UNDEFINED = 0,
137 	THREAD_SPEC__CPU,
138 	THREAD_SPEC__CORE,
139 	THREAD_SPEC__PACKAGE,
140 	THREAD_SPEC__NUMA,
141 	THREAD_SPEC__USER,
142 	THREAD_SPEC__MAX,
143 };
144 
145 static const char *thread_spec_tags[THREAD_SPEC__MAX] = {
146 	"undefined", "cpu", "core", "package", "numa", "user"
147 };
148 
149 struct pollfd_index_map {
150 	int evlist_pollfd_index;
151 	int thread_pollfd_index;
152 };
153 
154 struct record {
155 	struct perf_tool	tool;
156 	struct record_opts	opts;
157 	u64			bytes_written;
158 	u64			thread_bytes_written;
159 	struct perf_data	data;
160 	struct auxtrace_record	*itr;
161 	struct evlist	*evlist;
162 	struct perf_session	*session;
163 	struct evlist		*sb_evlist;
164 	pthread_t		thread_id;
165 	int			realtime_prio;
166 	bool			latency;
167 	bool			switch_output_event_set;
168 	bool			no_buildid;
169 	bool			no_buildid_set;
170 	bool			no_buildid_cache;
171 	bool			no_buildid_cache_set;
172 	bool			buildid_all;
173 	bool			buildid_mmap;
174 	bool			buildid_mmap_set;
175 	bool			timestamp_filename;
176 	bool			timestamp_boundary;
177 	bool			off_cpu;
178 	const char		*filter_action;
179 	const char		*uid_str;
180 	struct switch_output	switch_output;
181 	unsigned long long	samples;
182 	unsigned long		output_max_size;	/* = 0: unlimited */
183 	struct perf_debuginfod	debuginfod;
184 	int			nr_threads;
185 	struct thread_mask	*thread_masks;
186 	struct record_thread	*thread_data;
187 	struct pollfd_index_map	*index_map;
188 	size_t			index_map_sz;
189 	size_t			index_map_cnt;
190 };
191 
192 static volatile int done;
193 
194 static volatile int auxtrace_record__snapshot_started;
195 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
196 static DEFINE_TRIGGER(switch_output_trigger);
197 
198 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
199 	"SYS", "NODE", "CPU"
200 };
201 
202 static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event,
203 				  struct perf_sample *sample, struct machine *machine);
204 static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event,
205 				   struct perf_sample *sample, struct machine *machine);
206 static int process_timestamp_boundary(const struct perf_tool *tool,
207 				      union perf_event *event,
208 				      struct perf_sample *sample,
209 				      struct machine *machine);
210 
211 #ifndef HAVE_GETTID
gettid(void)212 static inline pid_t gettid(void)
213 {
214 	return (pid_t)syscall(__NR_gettid);
215 }
216 #endif
217 
record__threads_enabled(struct record * rec)218 static int record__threads_enabled(struct record *rec)
219 {
220 	return rec->opts.threads_spec;
221 }
222 
switch_output_signal(struct record * rec)223 static bool switch_output_signal(struct record *rec)
224 {
225 	return rec->switch_output.signal &&
226 	       trigger_is_ready(&switch_output_trigger);
227 }
228 
switch_output_size(struct record * rec)229 static bool switch_output_size(struct record *rec)
230 {
231 	return rec->switch_output.size &&
232 	       trigger_is_ready(&switch_output_trigger) &&
233 	       (rec->bytes_written >= rec->switch_output.size);
234 }
235 
switch_output_time(struct record * rec)236 static bool switch_output_time(struct record *rec)
237 {
238 	return rec->switch_output.time &&
239 	       trigger_is_ready(&switch_output_trigger);
240 }
241 
record__bytes_written(struct record * rec)242 static u64 record__bytes_written(struct record *rec)
243 {
244 	return rec->bytes_written + rec->thread_bytes_written;
245 }
246 
record__output_max_size_exceeded(struct record * rec)247 static bool record__output_max_size_exceeded(struct record *rec)
248 {
249 	return rec->output_max_size &&
250 	       (record__bytes_written(rec) >= rec->output_max_size);
251 }
252 
record__write(struct record * rec,struct mmap * map __maybe_unused,void * bf,size_t size)253 static int record__write(struct record *rec, struct mmap *map __maybe_unused,
254 			 void *bf, size_t size)
255 {
256 	struct perf_data_file *file = &rec->session->data->file;
257 
258 	if (map && map->file)
259 		file = map->file;
260 
261 	if (perf_data_file__write(file, bf, size) < 0) {
262 		pr_err("failed to write perf data, error: %m\n");
263 		return -1;
264 	}
265 
266 	if (map && map->file) {
267 		thread->bytes_written += size;
268 		rec->thread_bytes_written += size;
269 	} else {
270 		rec->bytes_written += size;
271 	}
272 
273 	if (record__output_max_size_exceeded(rec) && !done) {
274 		fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
275 				" stopping session ]\n",
276 				record__bytes_written(rec) >> 10);
277 		done = 1;
278 	}
279 
280 	if (switch_output_size(rec))
281 		trigger_hit(&switch_output_trigger);
282 
283 	return 0;
284 }
285 
286 static int record__aio_enabled(struct record *rec);
287 static int record__comp_enabled(struct record *rec);
288 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map,
289 			    void *dst, size_t dst_size, void *src, size_t src_size);
290 
291 #ifdef HAVE_AIO_SUPPORT
record__aio_write(struct aiocb * cblock,int trace_fd,void * buf,size_t size,off_t off)292 static int record__aio_write(struct aiocb *cblock, int trace_fd,
293 		void *buf, size_t size, off_t off)
294 {
295 	int rc;
296 
297 	cblock->aio_fildes = trace_fd;
298 	cblock->aio_buf    = buf;
299 	cblock->aio_nbytes = size;
300 	cblock->aio_offset = off;
301 	cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
302 
303 	do {
304 		rc = aio_write(cblock);
305 		if (rc == 0) {
306 			break;
307 		} else if (errno != EAGAIN) {
308 			cblock->aio_fildes = -1;
309 			pr_err("failed to queue perf data, error: %m\n");
310 			break;
311 		}
312 	} while (1);
313 
314 	return rc;
315 }
316 
record__aio_complete(struct mmap * md,struct aiocb * cblock)317 static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
318 {
319 	void *rem_buf;
320 	off_t rem_off;
321 	size_t rem_size;
322 	int rc, aio_errno;
323 	ssize_t aio_ret, written;
324 
325 	aio_errno = aio_error(cblock);
326 	if (aio_errno == EINPROGRESS)
327 		return 0;
328 
329 	written = aio_ret = aio_return(cblock);
330 	if (aio_ret < 0) {
331 		if (aio_errno != EINTR)
332 			pr_err("failed to write perf data, error: %m\n");
333 		written = 0;
334 	}
335 
336 	rem_size = cblock->aio_nbytes - written;
337 
338 	if (rem_size == 0) {
339 		cblock->aio_fildes = -1;
340 		/*
341 		 * md->refcount is incremented in record__aio_pushfn() for
342 		 * every aio write request started in record__aio_push() so
343 		 * decrement it because the request is now complete.
344 		 */
345 		perf_mmap__put(&md->core);
346 		rc = 1;
347 	} else {
348 		/*
349 		 * aio write request may require restart with the
350 		 * remainder if the kernel didn't write whole
351 		 * chunk at once.
352 		 */
353 		rem_off = cblock->aio_offset + written;
354 		rem_buf = (void *)(cblock->aio_buf + written);
355 		record__aio_write(cblock, cblock->aio_fildes,
356 				rem_buf, rem_size, rem_off);
357 		rc = 0;
358 	}
359 
360 	return rc;
361 }
362 
record__aio_sync(struct mmap * md,bool sync_all)363 static int record__aio_sync(struct mmap *md, bool sync_all)
364 {
365 	struct aiocb **aiocb = md->aio.aiocb;
366 	struct aiocb *cblocks = md->aio.cblocks;
367 	struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
368 	int i, do_suspend;
369 
370 	do {
371 		do_suspend = 0;
372 		for (i = 0; i < md->aio.nr_cblocks; ++i) {
373 			if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
374 				if (sync_all)
375 					aiocb[i] = NULL;
376 				else
377 					return i;
378 			} else {
379 				/*
380 				 * Started aio write is not complete yet
381 				 * so it has to be waited before the
382 				 * next allocation.
383 				 */
384 				aiocb[i] = &cblocks[i];
385 				do_suspend = 1;
386 			}
387 		}
388 		if (!do_suspend)
389 			return -1;
390 
391 		while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
392 			if (!(errno == EAGAIN || errno == EINTR))
393 				pr_err("failed to sync perf data, error: %m\n");
394 		}
395 	} while (1);
396 }
397 
398 struct record_aio {
399 	struct record	*rec;
400 	void		*data;
401 	size_t		size;
402 };
403 
record__aio_pushfn(struct mmap * map,void * to,void * buf,size_t size)404 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
405 {
406 	struct record_aio *aio = to;
407 
408 	/*
409 	 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
410 	 * to release space in the kernel buffer as fast as possible, calling
411 	 * perf_mmap__consume() from perf_mmap__push() function.
412 	 *
413 	 * That lets the kernel to proceed with storing more profiling data into
414 	 * the kernel buffer earlier than other per-cpu kernel buffers are handled.
415 	 *
416 	 * Coping can be done in two steps in case the chunk of profiling data
417 	 * crosses the upper bound of the kernel buffer. In this case we first move
418 	 * part of data from map->start till the upper bound and then the remainder
419 	 * from the beginning of the kernel buffer till the end of the data chunk.
420 	 */
421 
422 	if (record__comp_enabled(aio->rec)) {
423 		ssize_t compressed = zstd_compress(aio->rec->session, NULL, aio->data + aio->size,
424 						   mmap__mmap_len(map) - aio->size,
425 						   buf, size);
426 		if (compressed < 0)
427 			return (int)compressed;
428 
429 		size = compressed;
430 	} else {
431 		memcpy(aio->data + aio->size, buf, size);
432 	}
433 
434 	if (!aio->size) {
435 		/*
436 		 * Increment map->refcount to guard map->aio.data[] buffer
437 		 * from premature deallocation because map object can be
438 		 * released earlier than aio write request started on
439 		 * map->aio.data[] buffer is complete.
440 		 *
441 		 * perf_mmap__put() is done at record__aio_complete()
442 		 * after started aio request completion or at record__aio_push()
443 		 * if the request failed to start.
444 		 */
445 		perf_mmap__get(&map->core);
446 	}
447 
448 	aio->size += size;
449 
450 	return size;
451 }
452 
record__aio_push(struct record * rec,struct mmap * map,off_t * off)453 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
454 {
455 	int ret, idx;
456 	int trace_fd = rec->session->data->file.fd;
457 	struct record_aio aio = { .rec = rec, .size = 0 };
458 
459 	/*
460 	 * Call record__aio_sync() to wait till map->aio.data[] buffer
461 	 * becomes available after previous aio write operation.
462 	 */
463 
464 	idx = record__aio_sync(map, false);
465 	aio.data = map->aio.data[idx];
466 	ret = perf_mmap__push(map, &aio, record__aio_pushfn);
467 	if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
468 		return ret;
469 
470 	rec->samples++;
471 	ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
472 	if (!ret) {
473 		*off += aio.size;
474 		rec->bytes_written += aio.size;
475 		if (switch_output_size(rec))
476 			trigger_hit(&switch_output_trigger);
477 	} else {
478 		/*
479 		 * Decrement map->refcount incremented in record__aio_pushfn()
480 		 * back if record__aio_write() operation failed to start, otherwise
481 		 * map->refcount is decremented in record__aio_complete() after
482 		 * aio write operation finishes successfully.
483 		 */
484 		perf_mmap__put(&map->core);
485 	}
486 
487 	return ret;
488 }
489 
record__aio_get_pos(int trace_fd)490 static off_t record__aio_get_pos(int trace_fd)
491 {
492 	return lseek(trace_fd, 0, SEEK_CUR);
493 }
494 
record__aio_set_pos(int trace_fd,off_t pos)495 static void record__aio_set_pos(int trace_fd, off_t pos)
496 {
497 	lseek(trace_fd, pos, SEEK_SET);
498 }
499 
record__aio_mmap_read_sync(struct record * rec)500 static void record__aio_mmap_read_sync(struct record *rec)
501 {
502 	int i;
503 	struct evlist *evlist = rec->evlist;
504 	struct mmap *maps = evlist->mmap;
505 
506 	if (!record__aio_enabled(rec))
507 		return;
508 
509 	for (i = 0; i < evlist->core.nr_mmaps; i++) {
510 		struct mmap *map = &maps[i];
511 
512 		if (map->core.base)
513 			record__aio_sync(map, true);
514 	}
515 }
516 
517 static int nr_cblocks_default = 1;
518 static int nr_cblocks_max = 4;
519 
record__aio_parse(const struct option * opt,const char * str,int unset)520 static int record__aio_parse(const struct option *opt,
521 			     const char *str,
522 			     int unset)
523 {
524 	struct record_opts *opts = (struct record_opts *)opt->value;
525 
526 	if (unset) {
527 		opts->nr_cblocks = 0;
528 	} else {
529 		if (str)
530 			opts->nr_cblocks = strtol(str, NULL, 0);
531 		if (!opts->nr_cblocks)
532 			opts->nr_cblocks = nr_cblocks_default;
533 	}
534 
535 	return 0;
536 }
537 #else /* HAVE_AIO_SUPPORT */
538 static int nr_cblocks_max = 0;
539 
record__aio_push(struct record * rec __maybe_unused,struct mmap * map __maybe_unused,off_t * off __maybe_unused)540 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
541 			    off_t *off __maybe_unused)
542 {
543 	return -1;
544 }
545 
record__aio_get_pos(int trace_fd __maybe_unused)546 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
547 {
548 	return -1;
549 }
550 
record__aio_set_pos(int trace_fd __maybe_unused,off_t pos __maybe_unused)551 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
552 {
553 }
554 
record__aio_mmap_read_sync(struct record * rec __maybe_unused)555 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
556 {
557 }
558 #endif
559 
record__aio_enabled(struct record * rec)560 static int record__aio_enabled(struct record *rec)
561 {
562 	return rec->opts.nr_cblocks > 0;
563 }
564 
565 #define MMAP_FLUSH_DEFAULT 1
record__mmap_flush_parse(const struct option * opt,const char * str,int unset)566 static int record__mmap_flush_parse(const struct option *opt,
567 				    const char *str,
568 				    int unset)
569 {
570 	int flush_max;
571 	struct record_opts *opts = (struct record_opts *)opt->value;
572 	static struct parse_tag tags[] = {
573 			{ .tag  = 'B', .mult = 1       },
574 			{ .tag  = 'K', .mult = 1 << 10 },
575 			{ .tag  = 'M', .mult = 1 << 20 },
576 			{ .tag  = 'G', .mult = 1 << 30 },
577 			{ .tag  = 0 },
578 	};
579 
580 	if (unset)
581 		return 0;
582 
583 	if (str) {
584 		opts->mmap_flush = parse_tag_value(str, tags);
585 		if (opts->mmap_flush == (int)-1)
586 			opts->mmap_flush = strtol(str, NULL, 0);
587 	}
588 
589 	if (!opts->mmap_flush)
590 		opts->mmap_flush = MMAP_FLUSH_DEFAULT;
591 
592 	flush_max = evlist__mmap_size(opts->mmap_pages);
593 	flush_max /= 4;
594 	if (opts->mmap_flush > flush_max)
595 		opts->mmap_flush = flush_max;
596 
597 	return 0;
598 }
599 
600 #ifdef HAVE_ZSTD_SUPPORT
601 static unsigned int comp_level_default = 1;
602 
record__parse_comp_level(const struct option * opt,const char * str,int unset)603 static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
604 {
605 	struct record_opts *opts = opt->value;
606 
607 	if (unset) {
608 		opts->comp_level = 0;
609 	} else {
610 		if (str)
611 			opts->comp_level = strtol(str, NULL, 0);
612 		if (!opts->comp_level)
613 			opts->comp_level = comp_level_default;
614 	}
615 
616 	return 0;
617 }
618 #endif
619 static unsigned int comp_level_max = 22;
620 
record__comp_enabled(struct record * rec)621 static int record__comp_enabled(struct record *rec)
622 {
623 	return rec->opts.comp_level > 0;
624 }
625 
process_synthesized_event(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)626 static int process_synthesized_event(const struct perf_tool *tool,
627 				     union perf_event *event,
628 				     struct perf_sample *sample __maybe_unused,
629 				     struct machine *machine __maybe_unused)
630 {
631 	struct record *rec = container_of(tool, struct record, tool);
632 	return record__write(rec, NULL, event, event->header.size);
633 }
634 
635 static struct mutex synth_lock;
636 
process_locked_synthesized_event(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)637 static int process_locked_synthesized_event(const struct perf_tool *tool,
638 				     union perf_event *event,
639 				     struct perf_sample *sample __maybe_unused,
640 				     struct machine *machine __maybe_unused)
641 {
642 	int ret;
643 
644 	mutex_lock(&synth_lock);
645 	ret = process_synthesized_event(tool, event, sample, machine);
646 	mutex_unlock(&synth_lock);
647 	return ret;
648 }
649 
record__pushfn(struct mmap * map,void * to,void * bf,size_t size)650 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
651 {
652 	struct record *rec = to;
653 
654 	if (record__comp_enabled(rec)) {
655 		struct perf_record_compressed2 *event = map->data;
656 		size_t padding = 0;
657 		u8 pad[8] = {0};
658 		ssize_t compressed = zstd_compress(rec->session, map, map->data,
659 						   mmap__mmap_len(map), bf, size);
660 
661 		if (compressed < 0)
662 			return (int)compressed;
663 
664 		bf = event;
665 		thread->samples++;
666 
667 		/*
668 		 * The record from `zstd_compress` is not 8 bytes aligned, which would cause asan
669 		 * error. We make it aligned here.
670 		 */
671 		event->data_size = compressed - sizeof(struct perf_record_compressed2);
672 		event->header.size = PERF_ALIGN(compressed, sizeof(u64));
673 		padding = event->header.size - compressed;
674 		return record__write(rec, map, bf, compressed) ||
675 		       record__write(rec, map, &pad, padding);
676 	}
677 
678 	thread->samples++;
679 	return record__write(rec, map, bf, size);
680 }
681 
682 static volatile sig_atomic_t signr = -1;
683 static volatile sig_atomic_t child_finished;
684 #ifdef HAVE_EVENTFD_SUPPORT
685 static volatile sig_atomic_t done_fd = -1;
686 #endif
687 
sig_handler(int sig)688 static void sig_handler(int sig)
689 {
690 	if (sig == SIGCHLD)
691 		child_finished = 1;
692 	else
693 		signr = sig;
694 
695 	done = 1;
696 #ifdef HAVE_EVENTFD_SUPPORT
697 	if (done_fd >= 0) {
698 		u64 tmp = 1;
699 		int orig_errno = errno;
700 
701 		/*
702 		 * It is possible for this signal handler to run after done is
703 		 * checked in the main loop, but before the perf counter fds are
704 		 * polled. If this happens, the poll() will continue to wait
705 		 * even though done is set, and will only break out if either
706 		 * another signal is received, or the counters are ready for
707 		 * read. To ensure the poll() doesn't sleep when done is set,
708 		 * use an eventfd (done_fd) to wake up the poll().
709 		 */
710 		if (write(done_fd, &tmp, sizeof(tmp)) < 0)
711 			pr_err("failed to signal wakeup fd, error: %m\n");
712 
713 		errno = orig_errno;
714 	}
715 #endif // HAVE_EVENTFD_SUPPORT
716 }
717 
sigsegv_handler(int sig)718 static void sigsegv_handler(int sig)
719 {
720 	perf_hooks__recover();
721 	sighandler_dump_stack(sig);
722 }
723 
record__sig_exit(void)724 static void record__sig_exit(void)
725 {
726 	if (signr == -1)
727 		return;
728 
729 	signal(signr, SIG_DFL);
730 	raise(signr);
731 }
732 
733 #ifdef HAVE_AUXTRACE_SUPPORT
734 
record__process_auxtrace(const struct perf_tool * tool,struct mmap * map,union perf_event * event,void * data1,size_t len1,void * data2,size_t len2)735 static int record__process_auxtrace(const struct perf_tool *tool,
736 				    struct mmap *map,
737 				    union perf_event *event, void *data1,
738 				    size_t len1, void *data2, size_t len2)
739 {
740 	struct record *rec = container_of(tool, struct record, tool);
741 	struct perf_data *data = &rec->data;
742 	size_t padding;
743 	u8 pad[8] = {0};
744 
745 	if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
746 		off_t file_offset;
747 		int fd = perf_data__fd(data);
748 		int err;
749 
750 		file_offset = lseek(fd, 0, SEEK_CUR);
751 		if (file_offset == -1)
752 			return -1;
753 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
754 						     event, file_offset);
755 		if (err)
756 			return err;
757 	}
758 
759 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
760 	padding = (len1 + len2) & 7;
761 	if (padding)
762 		padding = 8 - padding;
763 
764 	record__write(rec, map, event, event->header.size);
765 	record__write(rec, map, data1, len1);
766 	if (len2)
767 		record__write(rec, map, data2, len2);
768 	record__write(rec, map, &pad, padding);
769 
770 	return 0;
771 }
772 
record__auxtrace_mmap_read(struct record * rec,struct mmap * map)773 static int record__auxtrace_mmap_read(struct record *rec,
774 				      struct mmap *map)
775 {
776 	int ret;
777 
778 	ret = auxtrace_mmap__read(map, rec->itr,
779 				  perf_session__env(rec->session),
780 				  &rec->tool,
781 				  record__process_auxtrace);
782 	if (ret < 0)
783 		return ret;
784 
785 	if (ret)
786 		rec->samples++;
787 
788 	return 0;
789 }
790 
record__auxtrace_mmap_read_snapshot(struct record * rec,struct mmap * map)791 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
792 					       struct mmap *map)
793 {
794 	int ret;
795 
796 	ret = auxtrace_mmap__read_snapshot(map, rec->itr,
797 					   perf_session__env(rec->session),
798 					   &rec->tool,
799 					   record__process_auxtrace,
800 					   rec->opts.auxtrace_snapshot_size);
801 	if (ret < 0)
802 		return ret;
803 
804 	if (ret)
805 		rec->samples++;
806 
807 	return 0;
808 }
809 
record__auxtrace_read_snapshot_all(struct record * rec)810 static int record__auxtrace_read_snapshot_all(struct record *rec)
811 {
812 	int i;
813 	int rc = 0;
814 
815 	for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
816 		struct mmap *map = &rec->evlist->mmap[i];
817 
818 		if (!map->auxtrace_mmap.base)
819 			continue;
820 
821 		if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
822 			rc = -1;
823 			goto out;
824 		}
825 	}
826 out:
827 	return rc;
828 }
829 
record__read_auxtrace_snapshot(struct record * rec,bool on_exit)830 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
831 {
832 	pr_debug("Recording AUX area tracing snapshot\n");
833 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
834 		trigger_error(&auxtrace_snapshot_trigger);
835 	} else {
836 		if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
837 			trigger_error(&auxtrace_snapshot_trigger);
838 		else
839 			trigger_ready(&auxtrace_snapshot_trigger);
840 	}
841 }
842 
record__auxtrace_snapshot_exit(struct record * rec)843 static int record__auxtrace_snapshot_exit(struct record *rec)
844 {
845 	if (trigger_is_error(&auxtrace_snapshot_trigger))
846 		return 0;
847 
848 	if (!auxtrace_record__snapshot_started &&
849 	    auxtrace_record__snapshot_start(rec->itr))
850 		return -1;
851 
852 	record__read_auxtrace_snapshot(rec, true);
853 	if (trigger_is_error(&auxtrace_snapshot_trigger))
854 		return -1;
855 
856 	return 0;
857 }
858 
record__auxtrace_init(struct record * rec)859 static int record__auxtrace_init(struct record *rec)
860 {
861 	int err;
862 
863 	if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts)
864 	    && record__threads_enabled(rec)) {
865 		pr_err("AUX area tracing options are not available in parallel streaming mode.\n");
866 		return -EINVAL;
867 	}
868 
869 	if (!rec->itr) {
870 		rec->itr = auxtrace_record__init(rec->evlist, &err);
871 		if (err)
872 			return err;
873 	}
874 
875 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
876 					      rec->opts.auxtrace_snapshot_opts);
877 	if (err)
878 		return err;
879 
880 	err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
881 					    rec->opts.auxtrace_sample_opts);
882 	if (err)
883 		return err;
884 
885 	err = auxtrace_parse_aux_action(rec->evlist);
886 	if (err)
887 		return err;
888 
889 	return auxtrace_parse_filters(rec->evlist);
890 }
891 
892 #else
893 
894 static inline
record__auxtrace_mmap_read(struct record * rec __maybe_unused,struct mmap * map __maybe_unused)895 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
896 			       struct mmap *map __maybe_unused)
897 {
898 	return 0;
899 }
900 
901 static inline
record__read_auxtrace_snapshot(struct record * rec __maybe_unused,bool on_exit __maybe_unused)902 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
903 				    bool on_exit __maybe_unused)
904 {
905 }
906 
907 static inline
auxtrace_record__snapshot_start(struct auxtrace_record * itr __maybe_unused)908 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
909 {
910 	return 0;
911 }
912 
913 static inline
record__auxtrace_snapshot_exit(struct record * rec __maybe_unused)914 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
915 {
916 	return 0;
917 }
918 
record__auxtrace_init(struct record * rec __maybe_unused)919 static int record__auxtrace_init(struct record *rec __maybe_unused)
920 {
921 	return 0;
922 }
923 
924 #endif
925 
record__config_text_poke(struct evlist * evlist)926 static int record__config_text_poke(struct evlist *evlist)
927 {
928 	struct evsel *evsel;
929 
930 	/* Nothing to do if text poke is already configured */
931 	evlist__for_each_entry(evlist, evsel) {
932 		if (evsel->core.attr.text_poke)
933 			return 0;
934 	}
935 
936 	evsel = evlist__add_dummy_on_all_cpus(evlist);
937 	if (!evsel)
938 		return -ENOMEM;
939 
940 	evsel->core.attr.text_poke = 1;
941 	evsel->core.attr.ksymbol = 1;
942 	evsel->immediate = true;
943 	evsel__set_sample_bit(evsel, TIME);
944 
945 	return 0;
946 }
947 
record__config_off_cpu(struct record * rec)948 static int record__config_off_cpu(struct record *rec)
949 {
950 	return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts);
951 }
952 
record__tracking_system_wide(struct record * rec)953 static bool record__tracking_system_wide(struct record *rec)
954 {
955 	struct evlist *evlist = rec->evlist;
956 	struct evsel *evsel;
957 
958 	/*
959 	 * If non-dummy evsel exists, system_wide sideband is need to
960 	 * help parse sample information.
961 	 * For example, PERF_EVENT_MMAP event to help parse symbol,
962 	 * and PERF_EVENT_COMM event to help parse task executable name.
963 	 */
964 	evlist__for_each_entry(evlist, evsel) {
965 		if (!evsel__is_dummy_event(evsel))
966 			return true;
967 	}
968 
969 	return false;
970 }
971 
record__config_tracking_events(struct record * rec)972 static int record__config_tracking_events(struct record *rec)
973 {
974 	struct record_opts *opts = &rec->opts;
975 	struct evlist *evlist = rec->evlist;
976 	bool system_wide = false;
977 	struct evsel *evsel;
978 
979 	/*
980 	 * For initial_delay, system wide or a hybrid system, we need to add
981 	 * tracking event so that we can track PERF_RECORD_MMAP to cover the
982 	 * delay of waiting or event synthesis.
983 	 */
984 	if (opts->target.initial_delay || target__has_cpu(&opts->target) ||
985 	    perf_pmus__num_core_pmus() > 1) {
986 
987 		/*
988 		 * User space tasks can migrate between CPUs, so when tracing
989 		 * selected CPUs, sideband for all CPUs is still needed.
990 		 */
991 		if (!!opts->target.cpu_list && record__tracking_system_wide(rec))
992 			system_wide = true;
993 
994 		evsel = evlist__findnew_tracking_event(evlist, system_wide);
995 		if (!evsel)
996 			return -ENOMEM;
997 
998 		/*
999 		 * Enable the tracking event when the process is forked for
1000 		 * initial_delay, immediately for system wide.
1001 		 */
1002 		if (opts->target.initial_delay && !evsel->immediate &&
1003 		    !target__has_cpu(&opts->target))
1004 			evsel->core.attr.enable_on_exec = 1;
1005 		else
1006 			evsel->immediate = 1;
1007 	}
1008 
1009 	return 0;
1010 }
1011 
record__kcore_readable(struct machine * machine)1012 static bool record__kcore_readable(struct machine *machine)
1013 {
1014 	char kcore[PATH_MAX];
1015 	int fd;
1016 
1017 	scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
1018 
1019 	fd = open(kcore, O_RDONLY);
1020 	if (fd < 0)
1021 		return false;
1022 
1023 	close(fd);
1024 
1025 	return true;
1026 }
1027 
record__kcore_copy(struct machine * machine,struct perf_data * data)1028 static int record__kcore_copy(struct machine *machine, struct perf_data *data)
1029 {
1030 	char from_dir[PATH_MAX];
1031 	char kcore_dir[PATH_MAX];
1032 	int ret;
1033 
1034 	snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
1035 
1036 	ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
1037 	if (ret)
1038 		return ret;
1039 
1040 	return kcore_copy(from_dir, kcore_dir);
1041 }
1042 
record__thread_data_init_pipes(struct record_thread * thread_data)1043 static void record__thread_data_init_pipes(struct record_thread *thread_data)
1044 {
1045 	thread_data->pipes.msg[0] = -1;
1046 	thread_data->pipes.msg[1] = -1;
1047 	thread_data->pipes.ack[0] = -1;
1048 	thread_data->pipes.ack[1] = -1;
1049 }
1050 
record__thread_data_open_pipes(struct record_thread * thread_data)1051 static int record__thread_data_open_pipes(struct record_thread *thread_data)
1052 {
1053 	if (pipe(thread_data->pipes.msg))
1054 		return -EINVAL;
1055 
1056 	if (pipe(thread_data->pipes.ack)) {
1057 		close(thread_data->pipes.msg[0]);
1058 		thread_data->pipes.msg[0] = -1;
1059 		close(thread_data->pipes.msg[1]);
1060 		thread_data->pipes.msg[1] = -1;
1061 		return -EINVAL;
1062 	}
1063 
1064 	pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data,
1065 		 thread_data->pipes.msg[0], thread_data->pipes.msg[1],
1066 		 thread_data->pipes.ack[0], thread_data->pipes.ack[1]);
1067 
1068 	return 0;
1069 }
1070 
record__thread_data_close_pipes(struct record_thread * thread_data)1071 static void record__thread_data_close_pipes(struct record_thread *thread_data)
1072 {
1073 	if (thread_data->pipes.msg[0] != -1) {
1074 		close(thread_data->pipes.msg[0]);
1075 		thread_data->pipes.msg[0] = -1;
1076 	}
1077 	if (thread_data->pipes.msg[1] != -1) {
1078 		close(thread_data->pipes.msg[1]);
1079 		thread_data->pipes.msg[1] = -1;
1080 	}
1081 	if (thread_data->pipes.ack[0] != -1) {
1082 		close(thread_data->pipes.ack[0]);
1083 		thread_data->pipes.ack[0] = -1;
1084 	}
1085 	if (thread_data->pipes.ack[1] != -1) {
1086 		close(thread_data->pipes.ack[1]);
1087 		thread_data->pipes.ack[1] = -1;
1088 	}
1089 }
1090 
evlist__per_thread(struct evlist * evlist)1091 static bool evlist__per_thread(struct evlist *evlist)
1092 {
1093 	return cpu_map__is_dummy(evlist->core.user_requested_cpus);
1094 }
1095 
record__thread_data_init_maps(struct record_thread * thread_data,struct evlist * evlist)1096 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist)
1097 {
1098 	int m, tm, nr_mmaps = evlist->core.nr_mmaps;
1099 	struct mmap *mmap = evlist->mmap;
1100 	struct mmap *overwrite_mmap = evlist->overwrite_mmap;
1101 	struct perf_cpu_map *cpus = evlist->core.all_cpus;
1102 	bool per_thread = evlist__per_thread(evlist);
1103 
1104 	if (per_thread)
1105 		thread_data->nr_mmaps = nr_mmaps;
1106 	else
1107 		thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
1108 						      thread_data->mask->maps.nbits);
1109 	if (mmap) {
1110 		thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1111 		if (!thread_data->maps)
1112 			return -ENOMEM;
1113 	}
1114 	if (overwrite_mmap) {
1115 		thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1116 		if (!thread_data->overwrite_maps) {
1117 			zfree(&thread_data->maps);
1118 			return -ENOMEM;
1119 		}
1120 	}
1121 	pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data,
1122 		 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
1123 
1124 	for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
1125 		if (per_thread ||
1126 		    test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) {
1127 			if (thread_data->maps) {
1128 				thread_data->maps[tm] = &mmap[m];
1129 				pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
1130 					  thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1131 			}
1132 			if (thread_data->overwrite_maps) {
1133 				thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
1134 				pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
1135 					  thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1136 			}
1137 			tm++;
1138 		}
1139 	}
1140 
1141 	return 0;
1142 }
1143 
record__thread_data_init_pollfd(struct record_thread * thread_data,struct evlist * evlist)1144 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist)
1145 {
1146 	int f, tm, pos;
1147 	struct mmap *map, *overwrite_map;
1148 
1149 	fdarray__init(&thread_data->pollfd, 64);
1150 
1151 	for (tm = 0; tm < thread_data->nr_mmaps; tm++) {
1152 		map = thread_data->maps ? thread_data->maps[tm] : NULL;
1153 		overwrite_map = thread_data->overwrite_maps ?
1154 				thread_data->overwrite_maps[tm] : NULL;
1155 
1156 		for (f = 0; f < evlist->core.pollfd.nr; f++) {
1157 			void *ptr = evlist->core.pollfd.priv[f].ptr;
1158 
1159 			if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) {
1160 				pos = fdarray__dup_entry_from(&thread_data->pollfd, f,
1161 							      &evlist->core.pollfd);
1162 				if (pos < 0)
1163 					return pos;
1164 				pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n",
1165 					 thread_data, pos, evlist->core.pollfd.entries[f].fd);
1166 			}
1167 		}
1168 	}
1169 
1170 	return 0;
1171 }
1172 
record__free_thread_data(struct record * rec)1173 static void record__free_thread_data(struct record *rec)
1174 {
1175 	int t;
1176 	struct record_thread *thread_data = rec->thread_data;
1177 
1178 	if (thread_data == NULL)
1179 		return;
1180 
1181 	for (t = 0; t < rec->nr_threads; t++) {
1182 		record__thread_data_close_pipes(&thread_data[t]);
1183 		zfree(&thread_data[t].maps);
1184 		zfree(&thread_data[t].overwrite_maps);
1185 		fdarray__exit(&thread_data[t].pollfd);
1186 	}
1187 
1188 	zfree(&rec->thread_data);
1189 }
1190 
record__map_thread_evlist_pollfd_indexes(struct record * rec,int evlist_pollfd_index,int thread_pollfd_index)1191 static int record__map_thread_evlist_pollfd_indexes(struct record *rec,
1192 						    int evlist_pollfd_index,
1193 						    int thread_pollfd_index)
1194 {
1195 	size_t x = rec->index_map_cnt;
1196 
1197 	if (realloc_array_as_needed(rec->index_map, rec->index_map_sz, x, NULL))
1198 		return -ENOMEM;
1199 	rec->index_map[x].evlist_pollfd_index = evlist_pollfd_index;
1200 	rec->index_map[x].thread_pollfd_index = thread_pollfd_index;
1201 	rec->index_map_cnt += 1;
1202 	return 0;
1203 }
1204 
record__update_evlist_pollfd_from_thread(struct record * rec,struct evlist * evlist,struct record_thread * thread_data)1205 static int record__update_evlist_pollfd_from_thread(struct record *rec,
1206 						    struct evlist *evlist,
1207 						    struct record_thread *thread_data)
1208 {
1209 	struct pollfd *e_entries = evlist->core.pollfd.entries;
1210 	struct pollfd *t_entries = thread_data->pollfd.entries;
1211 	int err = 0;
1212 	size_t i;
1213 
1214 	for (i = 0; i < rec->index_map_cnt; i++) {
1215 		int e_pos = rec->index_map[i].evlist_pollfd_index;
1216 		int t_pos = rec->index_map[i].thread_pollfd_index;
1217 
1218 		if (e_entries[e_pos].fd != t_entries[t_pos].fd ||
1219 		    e_entries[e_pos].events != t_entries[t_pos].events) {
1220 			pr_err("Thread and evlist pollfd index mismatch\n");
1221 			err = -EINVAL;
1222 			continue;
1223 		}
1224 		e_entries[e_pos].revents = t_entries[t_pos].revents;
1225 	}
1226 	return err;
1227 }
1228 
record__dup_non_perf_events(struct record * rec,struct evlist * evlist,struct record_thread * thread_data)1229 static int record__dup_non_perf_events(struct record *rec,
1230 				       struct evlist *evlist,
1231 				       struct record_thread *thread_data)
1232 {
1233 	struct fdarray *fda = &evlist->core.pollfd;
1234 	int i, ret;
1235 
1236 	for (i = 0; i < fda->nr; i++) {
1237 		if (!(fda->priv[i].flags & fdarray_flag__non_perf_event))
1238 			continue;
1239 		ret = fdarray__dup_entry_from(&thread_data->pollfd, i, fda);
1240 		if (ret < 0) {
1241 			pr_err("Failed to duplicate descriptor in main thread pollfd\n");
1242 			return ret;
1243 		}
1244 		pr_debug2("thread_data[%p]: pollfd[%d] <- non_perf_event fd=%d\n",
1245 			  thread_data, ret, fda->entries[i].fd);
1246 		ret = record__map_thread_evlist_pollfd_indexes(rec, i, ret);
1247 		if (ret < 0) {
1248 			pr_err("Failed to map thread and evlist pollfd indexes\n");
1249 			return ret;
1250 		}
1251 	}
1252 	return 0;
1253 }
1254 
record__alloc_thread_data(struct record * rec,struct evlist * evlist)1255 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist)
1256 {
1257 	int t, ret;
1258 	struct record_thread *thread_data;
1259 
1260 	rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data)));
1261 	if (!rec->thread_data) {
1262 		pr_err("Failed to allocate thread data\n");
1263 		return -ENOMEM;
1264 	}
1265 	thread_data = rec->thread_data;
1266 
1267 	for (t = 0; t < rec->nr_threads; t++)
1268 		record__thread_data_init_pipes(&thread_data[t]);
1269 
1270 	for (t = 0; t < rec->nr_threads; t++) {
1271 		thread_data[t].rec = rec;
1272 		thread_data[t].mask = &rec->thread_masks[t];
1273 		ret = record__thread_data_init_maps(&thread_data[t], evlist);
1274 		if (ret) {
1275 			pr_err("Failed to initialize thread[%d] maps\n", t);
1276 			goto out_free;
1277 		}
1278 		ret = record__thread_data_init_pollfd(&thread_data[t], evlist);
1279 		if (ret) {
1280 			pr_err("Failed to initialize thread[%d] pollfd\n", t);
1281 			goto out_free;
1282 		}
1283 		if (t) {
1284 			thread_data[t].tid = -1;
1285 			ret = record__thread_data_open_pipes(&thread_data[t]);
1286 			if (ret) {
1287 				pr_err("Failed to open thread[%d] communication pipes\n", t);
1288 				goto out_free;
1289 			}
1290 			ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0],
1291 					   POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable);
1292 			if (ret < 0) {
1293 				pr_err("Failed to add descriptor to thread[%d] pollfd\n", t);
1294 				goto out_free;
1295 			}
1296 			thread_data[t].ctlfd_pos = ret;
1297 			pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1298 				 thread_data, thread_data[t].ctlfd_pos,
1299 				 thread_data[t].pipes.msg[0]);
1300 		} else {
1301 			thread_data[t].tid = gettid();
1302 
1303 			ret = record__dup_non_perf_events(rec, evlist, &thread_data[t]);
1304 			if (ret < 0)
1305 				goto out_free;
1306 
1307 			thread_data[t].ctlfd_pos = -1; /* Not used */
1308 		}
1309 	}
1310 
1311 	return 0;
1312 
1313 out_free:
1314 	record__free_thread_data(rec);
1315 
1316 	return ret;
1317 }
1318 
record__mmap_evlist(struct record * rec,struct evlist * evlist)1319 static int record__mmap_evlist(struct record *rec,
1320 			       struct evlist *evlist)
1321 {
1322 	int i, ret;
1323 	struct record_opts *opts = &rec->opts;
1324 	bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
1325 				  opts->auxtrace_sample_mode;
1326 	char msg[512];
1327 
1328 	if (opts->affinity != PERF_AFFINITY_SYS)
1329 		cpu__setup_cpunode_map();
1330 
1331 	if (evlist__mmap_ex(evlist, opts->mmap_pages,
1332 				 opts->auxtrace_mmap_pages,
1333 				 auxtrace_overwrite,
1334 				 opts->nr_cblocks, opts->affinity,
1335 				 opts->mmap_flush, opts->comp_level) < 0) {
1336 		if (errno == EPERM) {
1337 			pr_err("Permission error mapping pages.\n"
1338 			       "Consider increasing "
1339 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
1340 			       "or try again with a smaller value of -m/--mmap_pages.\n"
1341 			       "(current value: %u,%u)\n",
1342 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
1343 			return -errno;
1344 		} else {
1345 			pr_err("failed to mmap with %d (%s)\n", errno,
1346 				str_error_r(errno, msg, sizeof(msg)));
1347 			if (errno)
1348 				return -errno;
1349 			else
1350 				return -EINVAL;
1351 		}
1352 	}
1353 
1354 	if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack))
1355 		return -1;
1356 
1357 	ret = record__alloc_thread_data(rec, evlist);
1358 	if (ret)
1359 		return ret;
1360 
1361 	if (record__threads_enabled(rec)) {
1362 		ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps);
1363 		if (ret) {
1364 			pr_err("Failed to create data directory: %s\n", strerror(-ret));
1365 			return ret;
1366 		}
1367 		for (i = 0; i < evlist->core.nr_mmaps; i++) {
1368 			if (evlist->mmap)
1369 				evlist->mmap[i].file = &rec->data.dir.files[i];
1370 			if (evlist->overwrite_mmap)
1371 				evlist->overwrite_mmap[i].file = &rec->data.dir.files[i];
1372 		}
1373 	}
1374 
1375 	return 0;
1376 }
1377 
record__mmap(struct record * rec)1378 static int record__mmap(struct record *rec)
1379 {
1380 	return record__mmap_evlist(rec, rec->evlist);
1381 }
1382 
record__open(struct record * rec)1383 static int record__open(struct record *rec)
1384 {
1385 	char msg[BUFSIZ];
1386 	struct evsel *pos;
1387 	struct evlist *evlist = rec->evlist;
1388 	struct perf_session *session = rec->session;
1389 	struct record_opts *opts = &rec->opts;
1390 	int rc = 0;
1391 
1392 	evlist__for_each_entry(evlist, pos) {
1393 try_again:
1394 		if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
1395 			if (evsel__fallback(pos, &opts->target, errno, msg, sizeof(msg))) {
1396 				if (verbose > 0)
1397 					ui__warning("%s\n", msg);
1398 				goto try_again;
1399 			}
1400 			if ((errno == EINVAL || errno == EBADF) &&
1401 			    pos->core.leader != &pos->core &&
1402 			    pos->weak_group) {
1403 			        pos = evlist__reset_weak_group(evlist, pos, true);
1404 				goto try_again;
1405 			}
1406 			rc = -errno;
1407 			evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
1408 			ui__error("%s\n", msg);
1409 			goto out;
1410 		}
1411 
1412 		pos->supported = true;
1413 	}
1414 
1415 	if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {
1416 		pr_warning(
1417 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1418 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1419 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1420 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1421 "Samples in kernel modules won't be resolved at all.\n\n"
1422 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1423 "even with a suitable vmlinux or kallsyms file.\n\n");
1424 	}
1425 
1426 	if (evlist__apply_filters(evlist, &pos, &opts->target)) {
1427 		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
1428 			pos->filter ?: "BPF", evsel__name(pos), errno,
1429 			str_error_r(errno, msg, sizeof(msg)));
1430 		rc = -1;
1431 		goto out;
1432 	}
1433 
1434 	rc = record__mmap(rec);
1435 	if (rc)
1436 		goto out;
1437 
1438 	session->evlist = evlist;
1439 	perf_session__set_id_hdr_size(session);
1440 out:
1441 	return rc;
1442 }
1443 
set_timestamp_boundary(struct record * rec,u64 sample_time)1444 static void set_timestamp_boundary(struct record *rec, u64 sample_time)
1445 {
1446 	if (rec->evlist->first_sample_time == 0)
1447 		rec->evlist->first_sample_time = sample_time;
1448 
1449 	if (sample_time)
1450 		rec->evlist->last_sample_time = sample_time;
1451 }
1452 
process_sample_event(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct evsel * evsel,struct machine * machine)1453 static int process_sample_event(const struct perf_tool *tool,
1454 				union perf_event *event,
1455 				struct perf_sample *sample,
1456 				struct evsel *evsel,
1457 				struct machine *machine)
1458 {
1459 	struct record *rec = container_of(tool, struct record, tool);
1460 
1461 	set_timestamp_boundary(rec, sample->time);
1462 
1463 	if (rec->buildid_all)
1464 		return 0;
1465 
1466 	rec->samples++;
1467 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
1468 }
1469 
process_buildids(struct record * rec)1470 static int process_buildids(struct record *rec)
1471 {
1472 	struct perf_session *session = rec->session;
1473 
1474 	if (perf_data__size(&rec->data) == 0)
1475 		return 0;
1476 
1477 	/*
1478 	 * During this process, it'll load kernel map and replace the
1479 	 * dso->long_name to a real pathname it found.  In this case
1480 	 * we prefer the vmlinux path like
1481 	 *   /lib/modules/3.16.4/build/vmlinux
1482 	 *
1483 	 * rather than build-id path (in debug directory).
1484 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
1485 	 */
1486 	symbol_conf.ignore_vmlinux_buildid = true;
1487 
1488 	/*
1489 	 * If --buildid-all is given, it marks all DSO regardless of hits,
1490 	 * so no need to process samples. But if timestamp_boundary is enabled,
1491 	 * it still needs to walk on all samples to get the timestamps of
1492 	 * first/last samples.
1493 	 */
1494 	if (rec->buildid_all && !rec->timestamp_boundary)
1495 		rec->tool.sample = process_event_sample_stub;
1496 
1497 	return perf_session__process_events(session);
1498 }
1499 
perf_event__synthesize_guest_os(struct machine * machine,void * data)1500 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
1501 {
1502 	int err;
1503 	struct perf_tool *tool = data;
1504 	/*
1505 	 *As for guest kernel when processing subcommand record&report,
1506 	 *we arrange module mmap prior to guest kernel mmap and trigger
1507 	 *a preload dso because default guest module symbols are loaded
1508 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
1509 	 *method is used to avoid symbol missing when the first addr is
1510 	 *in module instead of in guest kernel.
1511 	 */
1512 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
1513 					     machine);
1514 	if (err < 0)
1515 		pr_err("Couldn't record guest kernel [%d]'s reference"
1516 		       " relocation symbol.\n", machine->pid);
1517 
1518 	/*
1519 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
1520 	 * have no _text sometimes.
1521 	 */
1522 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1523 						 machine);
1524 	if (err < 0)
1525 		pr_err("Couldn't record guest kernel [%d]'s reference"
1526 		       " relocation symbol.\n", machine->pid);
1527 }
1528 
1529 static struct perf_event_header finished_round_event = {
1530 	.size = sizeof(struct perf_event_header),
1531 	.type = PERF_RECORD_FINISHED_ROUND,
1532 };
1533 
1534 static struct perf_event_header finished_init_event = {
1535 	.size = sizeof(struct perf_event_header),
1536 	.type = PERF_RECORD_FINISHED_INIT,
1537 };
1538 
record__adjust_affinity(struct record * rec,struct mmap * map)1539 static void record__adjust_affinity(struct record *rec, struct mmap *map)
1540 {
1541 	if (rec->opts.affinity != PERF_AFFINITY_SYS &&
1542 	    !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits,
1543 			  thread->mask->affinity.nbits)) {
1544 		bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits);
1545 		bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits,
1546 			  map->affinity_mask.bits, thread->mask->affinity.nbits);
1547 		sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
1548 					(cpu_set_t *)thread->mask->affinity.bits);
1549 		if (verbose == 2) {
1550 			pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu());
1551 			mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity");
1552 		}
1553 	}
1554 }
1555 
process_comp_header(void * record,size_t increment)1556 static size_t process_comp_header(void *record, size_t increment)
1557 {
1558 	struct perf_record_compressed2 *event = record;
1559 	size_t size = sizeof(*event);
1560 
1561 	if (increment) {
1562 		event->header.size += increment;
1563 		return increment;
1564 	}
1565 
1566 	event->header.type = PERF_RECORD_COMPRESSED2;
1567 	event->header.size = size;
1568 
1569 	return size;
1570 }
1571 
zstd_compress(struct perf_session * session,struct mmap * map,void * dst,size_t dst_size,void * src,size_t src_size)1572 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map,
1573 			    void *dst, size_t dst_size, void *src, size_t src_size)
1574 {
1575 	ssize_t compressed;
1576 	size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed2) - 1;
1577 	struct zstd_data *zstd_data = &session->zstd_data;
1578 
1579 	if (map && map->file)
1580 		zstd_data = &map->zstd_data;
1581 
1582 	compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size,
1583 						     max_record_size, process_comp_header);
1584 	if (compressed < 0)
1585 		return compressed;
1586 
1587 	if (map && map->file) {
1588 		thread->bytes_transferred += src_size;
1589 		thread->bytes_compressed  += compressed;
1590 	} else {
1591 		session->bytes_transferred += src_size;
1592 		session->bytes_compressed  += compressed;
1593 	}
1594 
1595 	return compressed;
1596 }
1597 
record__mmap_read_evlist(struct record * rec,struct evlist * evlist,bool overwrite,bool synch)1598 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1599 				    bool overwrite, bool synch)
1600 {
1601 	u64 bytes_written = rec->bytes_written;
1602 	int i;
1603 	int rc = 0;
1604 	int nr_mmaps;
1605 	struct mmap **maps;
1606 	int trace_fd = rec->data.file.fd;
1607 	off_t off = 0;
1608 
1609 	if (!evlist)
1610 		return 0;
1611 
1612 	nr_mmaps = thread->nr_mmaps;
1613 	maps = overwrite ? thread->overwrite_maps : thread->maps;
1614 
1615 	if (!maps)
1616 		return 0;
1617 
1618 	if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1619 		return 0;
1620 
1621 	if (record__aio_enabled(rec))
1622 		off = record__aio_get_pos(trace_fd);
1623 
1624 	for (i = 0; i < nr_mmaps; i++) {
1625 		u64 flush = 0;
1626 		struct mmap *map = maps[i];
1627 
1628 		if (map->core.base) {
1629 			record__adjust_affinity(rec, map);
1630 			if (synch) {
1631 				flush = map->core.flush;
1632 				map->core.flush = 1;
1633 			}
1634 			if (!record__aio_enabled(rec)) {
1635 				if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1636 					if (synch)
1637 						map->core.flush = flush;
1638 					rc = -1;
1639 					goto out;
1640 				}
1641 			} else {
1642 				if (record__aio_push(rec, map, &off) < 0) {
1643 					record__aio_set_pos(trace_fd, off);
1644 					if (synch)
1645 						map->core.flush = flush;
1646 					rc = -1;
1647 					goto out;
1648 				}
1649 			}
1650 			if (synch)
1651 				map->core.flush = flush;
1652 		}
1653 
1654 		if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1655 		    !rec->opts.auxtrace_sample_mode &&
1656 		    record__auxtrace_mmap_read(rec, map) != 0) {
1657 			rc = -1;
1658 			goto out;
1659 		}
1660 	}
1661 
1662 	if (record__aio_enabled(rec))
1663 		record__aio_set_pos(trace_fd, off);
1664 
1665 	/*
1666 	 * Mark the round finished in case we wrote
1667 	 * at least one event.
1668 	 *
1669 	 * No need for round events in directory mode,
1670 	 * because per-cpu maps and files have data
1671 	 * sorted by kernel.
1672 	 */
1673 	if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written)
1674 		rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1675 
1676 	if (overwrite)
1677 		evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1678 out:
1679 	return rc;
1680 }
1681 
record__mmap_read_all(struct record * rec,bool synch)1682 static int record__mmap_read_all(struct record *rec, bool synch)
1683 {
1684 	int err;
1685 
1686 	err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1687 	if (err)
1688 		return err;
1689 
1690 	return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1691 }
1692 
record__thread_munmap_filtered(struct fdarray * fda,int fd,void * arg __maybe_unused)1693 static void record__thread_munmap_filtered(struct fdarray *fda, int fd,
1694 					   void *arg __maybe_unused)
1695 {
1696 	struct perf_mmap *map = fda->priv[fd].ptr;
1697 
1698 	if (map)
1699 		perf_mmap__put(map);
1700 }
1701 
record__thread(void * arg)1702 static void *record__thread(void *arg)
1703 {
1704 	enum thread_msg msg = THREAD_MSG__READY;
1705 	bool terminate = false;
1706 	struct fdarray *pollfd;
1707 	int err, ctlfd_pos;
1708 
1709 	thread = arg;
1710 	thread->tid = gettid();
1711 
1712 	err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1713 	if (err == -1)
1714 		pr_warning("threads[%d]: failed to notify on start: %s\n",
1715 			   thread->tid, strerror(errno));
1716 
1717 	pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
1718 
1719 	pollfd = &thread->pollfd;
1720 	ctlfd_pos = thread->ctlfd_pos;
1721 
1722 	for (;;) {
1723 		unsigned long long hits = thread->samples;
1724 
1725 		if (record__mmap_read_all(thread->rec, false) < 0 || terminate)
1726 			break;
1727 
1728 		if (hits == thread->samples) {
1729 
1730 			err = fdarray__poll(pollfd, -1);
1731 			/*
1732 			 * Propagate error, only if there's any. Ignore positive
1733 			 * number of returned events and interrupt error.
1734 			 */
1735 			if (err > 0 || (err < 0 && errno == EINTR))
1736 				err = 0;
1737 			thread->waking++;
1738 
1739 			if (fdarray__filter(pollfd, POLLERR | POLLHUP,
1740 					    record__thread_munmap_filtered, NULL) == 0)
1741 				break;
1742 		}
1743 
1744 		if (pollfd->entries[ctlfd_pos].revents & POLLHUP) {
1745 			terminate = true;
1746 			close(thread->pipes.msg[0]);
1747 			thread->pipes.msg[0] = -1;
1748 			pollfd->entries[ctlfd_pos].fd = -1;
1749 			pollfd->entries[ctlfd_pos].events = 0;
1750 		}
1751 
1752 		pollfd->entries[ctlfd_pos].revents = 0;
1753 	}
1754 	record__mmap_read_all(thread->rec, true);
1755 
1756 	err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1757 	if (err == -1)
1758 		pr_warning("threads[%d]: failed to notify on termination: %s\n",
1759 			   thread->tid, strerror(errno));
1760 
1761 	return NULL;
1762 }
1763 
record__init_features(struct record * rec)1764 static void record__init_features(struct record *rec)
1765 {
1766 	struct perf_session *session = rec->session;
1767 	int feat;
1768 
1769 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1770 		perf_header__set_feat(&session->header, feat);
1771 
1772 	if (rec->no_buildid)
1773 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1774 
1775 	if (!have_tracepoints(&rec->evlist->core.entries))
1776 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1777 
1778 	if (!rec->opts.branch_stack)
1779 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1780 
1781 	if (!rec->opts.full_auxtrace)
1782 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1783 
1784 	if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1785 		perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1786 
1787 	if (!rec->opts.use_clockid)
1788 		perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA);
1789 
1790 	if (!record__threads_enabled(rec))
1791 		perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1792 
1793 	if (!record__comp_enabled(rec))
1794 		perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1795 
1796 	perf_header__clear_feat(&session->header, HEADER_STAT);
1797 }
1798 
1799 static void
record__finish_output(struct record * rec)1800 record__finish_output(struct record *rec)
1801 {
1802 	int i;
1803 	struct perf_data *data = &rec->data;
1804 	int fd = perf_data__fd(data);
1805 
1806 	if (data->is_pipe) {
1807 		/* Just to display approx. size */
1808 		data->file.size = rec->bytes_written;
1809 		return;
1810 	}
1811 
1812 	rec->session->header.data_size += rec->bytes_written;
1813 	data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1814 	if (record__threads_enabled(rec)) {
1815 		for (i = 0; i < data->dir.nr; i++)
1816 			data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR);
1817 	}
1818 
1819 	/* Buildid scanning disabled or build ID in kernel and synthesized map events. */
1820 	if (!rec->no_buildid) {
1821 		process_buildids(rec);
1822 
1823 		if (rec->buildid_all)
1824 			perf_session__dsos_hit_all(rec->session);
1825 	}
1826 	perf_session__write_header(rec->session, rec->evlist, fd, true);
1827 
1828 	return;
1829 }
1830 
record__synthesize_workload(struct record * rec,bool tail)1831 static int record__synthesize_workload(struct record *rec, bool tail)
1832 {
1833 	int err;
1834 	struct perf_thread_map *thread_map;
1835 	bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1836 
1837 	if (rec->opts.tail_synthesize != tail)
1838 		return 0;
1839 
1840 	thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1841 	if (thread_map == NULL)
1842 		return -1;
1843 
1844 	err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1845 						 process_synthesized_event,
1846 						 &rec->session->machines.host,
1847 						 needs_mmap,
1848 						 rec->opts.sample_address);
1849 	perf_thread_map__put(thread_map);
1850 	return err;
1851 }
1852 
write_finished_init(struct record * rec,bool tail)1853 static int write_finished_init(struct record *rec, bool tail)
1854 {
1855 	if (rec->opts.tail_synthesize != tail)
1856 		return 0;
1857 
1858 	return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event));
1859 }
1860 
1861 static int record__synthesize(struct record *rec, bool tail);
1862 
1863 static int
record__switch_output(struct record * rec,bool at_exit)1864 record__switch_output(struct record *rec, bool at_exit)
1865 {
1866 	struct perf_data *data = &rec->data;
1867 	char *new_filename = NULL;
1868 	int fd, err;
1869 
1870 	/* Same Size:      "2015122520103046"*/
1871 	char timestamp[] = "InvalidTimestamp";
1872 
1873 	record__aio_mmap_read_sync(rec);
1874 
1875 	write_finished_init(rec, true);
1876 
1877 	record__synthesize(rec, true);
1878 	if (target__none(&rec->opts.target))
1879 		record__synthesize_workload(rec, true);
1880 
1881 	rec->samples = 0;
1882 	record__finish_output(rec);
1883 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1884 	if (err) {
1885 		pr_err("Failed to get current timestamp\n");
1886 		return -EINVAL;
1887 	}
1888 
1889 	fd = perf_data__switch(data, timestamp,
1890 			       rec->session->header.data_offset,
1891 			       at_exit, &new_filename);
1892 	if (fd >= 0 && !at_exit) {
1893 		rec->bytes_written = 0;
1894 		rec->session->header.data_size = 0;
1895 	}
1896 
1897 	if (!quiet) {
1898 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1899 			data->path, timestamp);
1900 	}
1901 
1902 	if (rec->switch_output.num_files) {
1903 		int n = rec->switch_output.cur_file + 1;
1904 
1905 		if (n >= rec->switch_output.num_files)
1906 			n = 0;
1907 		rec->switch_output.cur_file = n;
1908 		if (rec->switch_output.filenames[n]) {
1909 			remove(rec->switch_output.filenames[n]);
1910 			zfree(&rec->switch_output.filenames[n]);
1911 		}
1912 		rec->switch_output.filenames[n] = new_filename;
1913 	} else {
1914 		free(new_filename);
1915 	}
1916 
1917 	/* Output tracking events */
1918 	if (!at_exit) {
1919 		record__synthesize(rec, false);
1920 
1921 		/*
1922 		 * In 'perf record --switch-output' without -a,
1923 		 * record__synthesize() in record__switch_output() won't
1924 		 * generate tracking events because there's no thread_map
1925 		 * in evlist. Which causes newly created perf.data doesn't
1926 		 * contain map and comm information.
1927 		 * Create a fake thread_map and directly call
1928 		 * perf_event__synthesize_thread_map() for those events.
1929 		 */
1930 		if (target__none(&rec->opts.target))
1931 			record__synthesize_workload(rec, false);
1932 		write_finished_init(rec, false);
1933 	}
1934 	return fd;
1935 }
1936 
__record__save_lost_samples(struct record * rec,struct evsel * evsel,struct perf_record_lost_samples * lost,int cpu_idx,int thread_idx,u64 lost_count,u16 misc_flag)1937 static void __record__save_lost_samples(struct record *rec, struct evsel *evsel,
1938 					struct perf_record_lost_samples *lost,
1939 					int cpu_idx, int thread_idx, u64 lost_count,
1940 					u16 misc_flag)
1941 {
1942 	struct perf_sample_id *sid;
1943 	struct perf_sample sample;
1944 	int id_hdr_size;
1945 
1946 	perf_sample__init(&sample, /*all=*/true);
1947 	lost->lost = lost_count;
1948 	if (evsel->core.ids) {
1949 		sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx);
1950 		sample.id = sid->id;
1951 	}
1952 
1953 	id_hdr_size = perf_event__synthesize_id_sample((void *)(lost + 1),
1954 						       evsel->core.attr.sample_type, &sample);
1955 	lost->header.size = sizeof(*lost) + id_hdr_size;
1956 	lost->header.misc = misc_flag;
1957 	record__write(rec, NULL, lost, lost->header.size);
1958 	perf_sample__exit(&sample);
1959 }
1960 
record__read_lost_samples(struct record * rec)1961 static void record__read_lost_samples(struct record *rec)
1962 {
1963 	struct perf_session *session = rec->session;
1964 	struct perf_record_lost_samples_and_ids lost;
1965 	struct evsel *evsel;
1966 
1967 	/* there was an error during record__open */
1968 	if (session->evlist == NULL)
1969 		return;
1970 
1971 	evlist__for_each_entry(session->evlist, evsel) {
1972 		struct xyarray *xy = evsel->core.sample_id;
1973 		u64 lost_count;
1974 
1975 		if (xy == NULL || evsel->core.fd == NULL)
1976 			continue;
1977 		if (xyarray__max_x(evsel->core.fd) != xyarray__max_x(xy) ||
1978 		    xyarray__max_y(evsel->core.fd) != xyarray__max_y(xy)) {
1979 			pr_debug("Unmatched FD vs. sample ID: skip reading LOST count\n");
1980 			continue;
1981 		}
1982 
1983 		for (int x = 0; x < xyarray__max_x(xy); x++) {
1984 			for (int y = 0; y < xyarray__max_y(xy); y++) {
1985 				struct perf_counts_values count;
1986 
1987 				if (perf_evsel__read(&evsel->core, x, y, &count) < 0) {
1988 					pr_debug("read LOST count failed\n");
1989 					return;
1990 				}
1991 
1992 				if (count.lost) {
1993 					memset(&lost, 0, sizeof(lost));
1994 					lost.lost.header.type = PERF_RECORD_LOST_SAMPLES;
1995 					__record__save_lost_samples(rec, evsel, &lost.lost,
1996 								    x, y, count.lost, 0);
1997 				}
1998 			}
1999 		}
2000 
2001 		lost_count = perf_bpf_filter__lost_count(evsel);
2002 		if (lost_count) {
2003 			memset(&lost, 0, sizeof(lost));
2004 			lost.lost.header.type = PERF_RECORD_LOST_SAMPLES;
2005 			__record__save_lost_samples(rec, evsel, &lost.lost, 0, 0, lost_count,
2006 						    PERF_RECORD_MISC_LOST_SAMPLES_BPF);
2007 		}
2008 	}
2009 }
2010 
2011 static volatile sig_atomic_t workload_exec_errno;
2012 
2013 /*
2014  * evlist__prepare_workload will send a SIGUSR1
2015  * if the fork fails, since we asked by setting its
2016  * want_signal to true.
2017  */
workload_exec_failed_signal(int signo __maybe_unused,siginfo_t * info,void * ucontext __maybe_unused)2018 static void workload_exec_failed_signal(int signo __maybe_unused,
2019 					siginfo_t *info,
2020 					void *ucontext __maybe_unused)
2021 {
2022 	workload_exec_errno = info->si_value.sival_int;
2023 	done = 1;
2024 	child_finished = 1;
2025 }
2026 
2027 static void snapshot_sig_handler(int sig);
2028 static void alarm_sig_handler(int sig);
2029 
evlist__pick_pc(struct evlist * evlist)2030 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist)
2031 {
2032 	if (evlist) {
2033 		if (evlist->mmap && evlist->mmap[0].core.base)
2034 			return evlist->mmap[0].core.base;
2035 		if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
2036 			return evlist->overwrite_mmap[0].core.base;
2037 	}
2038 	return NULL;
2039 }
2040 
record__pick_pc(struct record * rec)2041 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
2042 {
2043 	const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist);
2044 	if (pc)
2045 		return pc;
2046 	return NULL;
2047 }
2048 
record__synthesize(struct record * rec,bool tail)2049 static int record__synthesize(struct record *rec, bool tail)
2050 {
2051 	struct perf_session *session = rec->session;
2052 	struct machine *machine = &session->machines.host;
2053 	struct perf_data *data = &rec->data;
2054 	struct record_opts *opts = &rec->opts;
2055 	struct perf_tool *tool = &rec->tool;
2056 	int err = 0;
2057 	event_op f = process_synthesized_event;
2058 
2059 	if (rec->opts.tail_synthesize != tail)
2060 		return 0;
2061 
2062 	if (data->is_pipe) {
2063 		err = perf_event__synthesize_for_pipe(tool, session, data,
2064 						      process_synthesized_event);
2065 		if (err < 0)
2066 			goto out;
2067 
2068 		rec->bytes_written += err;
2069 	}
2070 
2071 	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
2072 					  process_synthesized_event, machine);
2073 	if (err)
2074 		goto out;
2075 
2076 	/* Synthesize id_index before auxtrace_info */
2077 	err = perf_event__synthesize_id_index(tool,
2078 					      process_synthesized_event,
2079 					      session->evlist, machine);
2080 	if (err)
2081 		goto out;
2082 
2083 	if (rec->opts.full_auxtrace) {
2084 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
2085 					session, process_synthesized_event);
2086 		if (err)
2087 			goto out;
2088 	}
2089 
2090 	if (!evlist__exclude_kernel(rec->evlist)) {
2091 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
2092 							 machine);
2093 		WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
2094 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
2095 				   "Check /proc/kallsyms permission or run as root.\n");
2096 
2097 		err = perf_event__synthesize_modules(tool, process_synthesized_event,
2098 						     machine);
2099 		WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
2100 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
2101 				   "Check /proc/modules permission or run as root.\n");
2102 	}
2103 
2104 	if (perf_guest) {
2105 		machines__process_guests(&session->machines,
2106 					 perf_event__synthesize_guest_os, tool);
2107 	}
2108 
2109 	err = perf_event__synthesize_extra_attr(&rec->tool,
2110 						rec->evlist,
2111 						process_synthesized_event,
2112 						data->is_pipe);
2113 	if (err)
2114 		goto out;
2115 
2116 	err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
2117 						 process_synthesized_event,
2118 						NULL);
2119 	if (err < 0) {
2120 		pr_err("Couldn't synthesize thread map.\n");
2121 		return err;
2122 	}
2123 
2124 	err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus,
2125 					     process_synthesized_event, NULL);
2126 	if (err < 0) {
2127 		pr_err("Couldn't synthesize cpu map.\n");
2128 		return err;
2129 	}
2130 
2131 	err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
2132 						machine, opts);
2133 	if (err < 0) {
2134 		pr_warning("Couldn't synthesize bpf events.\n");
2135 		err = 0;
2136 	}
2137 
2138 	if (rec->opts.synth & PERF_SYNTH_CGROUP) {
2139 		err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
2140 						     machine);
2141 		if (err < 0) {
2142 			pr_warning("Couldn't synthesize cgroup events.\n");
2143 			err = 0;
2144 		}
2145 	}
2146 
2147 	if (rec->opts.nr_threads_synthesize > 1) {
2148 		mutex_init(&synth_lock);
2149 		perf_set_multithreaded();
2150 		f = process_locked_synthesized_event;
2151 	}
2152 
2153 	if (rec->opts.synth & PERF_SYNTH_TASK) {
2154 		bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
2155 
2156 		err = __machine__synthesize_threads(machine, tool, &opts->target,
2157 						    rec->evlist->core.threads,
2158 						    f, needs_mmap, opts->sample_address,
2159 						    rec->opts.nr_threads_synthesize);
2160 	}
2161 
2162 	if (rec->opts.nr_threads_synthesize > 1) {
2163 		perf_set_singlethreaded();
2164 		mutex_destroy(&synth_lock);
2165 	}
2166 
2167 out:
2168 	return err;
2169 }
2170 
record__synthesize_final_bpf_metadata(struct record * rec __maybe_unused)2171 static void record__synthesize_final_bpf_metadata(struct record *rec __maybe_unused)
2172 {
2173 #ifdef HAVE_LIBBPF_SUPPORT
2174 	perf_event__synthesize_final_bpf_metadata(rec->session,
2175 						  process_synthesized_event);
2176 #endif
2177 }
2178 
record__process_signal_event(union perf_event * event __maybe_unused,void * data)2179 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
2180 {
2181 	struct record *rec = data;
2182 	pthread_kill(rec->thread_id, SIGUSR2);
2183 	return 0;
2184 }
2185 
record__setup_sb_evlist(struct record * rec)2186 static int record__setup_sb_evlist(struct record *rec)
2187 {
2188 	struct record_opts *opts = &rec->opts;
2189 
2190 	if (rec->sb_evlist != NULL) {
2191 		/*
2192 		 * We get here if --switch-output-event populated the
2193 		 * sb_evlist, so associate a callback that will send a SIGUSR2
2194 		 * to the main thread.
2195 		 */
2196 		evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
2197 		rec->thread_id = pthread_self();
2198 	}
2199 #ifdef HAVE_LIBBPF_SUPPORT
2200 	if (!opts->no_bpf_event) {
2201 		if (rec->sb_evlist == NULL) {
2202 			rec->sb_evlist = evlist__new();
2203 
2204 			if (rec->sb_evlist == NULL) {
2205 				pr_err("Couldn't create side band evlist.\n.");
2206 				return -1;
2207 			}
2208 		}
2209 
2210 		if (evlist__add_bpf_sb_event(rec->sb_evlist, perf_session__env(rec->session))) {
2211 			pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
2212 			return -1;
2213 		}
2214 	}
2215 #endif
2216 	if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
2217 		pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
2218 		opts->no_bpf_event = true;
2219 	}
2220 
2221 	return 0;
2222 }
2223 
record__init_clock(struct record * rec)2224 static int record__init_clock(struct record *rec)
2225 {
2226 	struct perf_session *session = rec->session;
2227 	struct timespec ref_clockid;
2228 	struct timeval ref_tod;
2229 	struct perf_env *env = perf_session__env(session);
2230 	u64 ref;
2231 
2232 	if (!rec->opts.use_clockid)
2233 		return 0;
2234 
2235 	if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
2236 		env->clock.clockid_res_ns = rec->opts.clockid_res_ns;
2237 
2238 	env->clock.clockid = rec->opts.clockid;
2239 
2240 	if (gettimeofday(&ref_tod, NULL) != 0) {
2241 		pr_err("gettimeofday failed, cannot set reference time.\n");
2242 		return -1;
2243 	}
2244 
2245 	if (clock_gettime(rec->opts.clockid, &ref_clockid)) {
2246 		pr_err("clock_gettime failed, cannot set reference time.\n");
2247 		return -1;
2248 	}
2249 
2250 	ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC +
2251 	      (u64) ref_tod.tv_usec * NSEC_PER_USEC;
2252 
2253 	env->clock.tod_ns = ref;
2254 
2255 	ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC +
2256 	      (u64) ref_clockid.tv_nsec;
2257 
2258 	env->clock.clockid_ns = ref;
2259 	return 0;
2260 }
2261 
hit_auxtrace_snapshot_trigger(struct record * rec)2262 static void hit_auxtrace_snapshot_trigger(struct record *rec)
2263 {
2264 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2265 		trigger_hit(&auxtrace_snapshot_trigger);
2266 		auxtrace_record__snapshot_started = 1;
2267 		if (auxtrace_record__snapshot_start(rec->itr))
2268 			trigger_error(&auxtrace_snapshot_trigger);
2269 	}
2270 }
2271 
record__terminate_thread(struct record_thread * thread_data)2272 static int record__terminate_thread(struct record_thread *thread_data)
2273 {
2274 	int err;
2275 	enum thread_msg ack = THREAD_MSG__UNDEFINED;
2276 	pid_t tid = thread_data->tid;
2277 
2278 	close(thread_data->pipes.msg[1]);
2279 	thread_data->pipes.msg[1] = -1;
2280 	err = read(thread_data->pipes.ack[0], &ack, sizeof(ack));
2281 	if (err > 0)
2282 		pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]);
2283 	else
2284 		pr_warning("threads[%d]: failed to receive termination notification from %d\n",
2285 			   thread->tid, tid);
2286 
2287 	return 0;
2288 }
2289 
record__start_threads(struct record * rec)2290 static int record__start_threads(struct record *rec)
2291 {
2292 	int t, tt, err, ret = 0, nr_threads = rec->nr_threads;
2293 	struct record_thread *thread_data = rec->thread_data;
2294 	sigset_t full, mask;
2295 	pthread_t handle;
2296 	pthread_attr_t attrs;
2297 
2298 	thread = &thread_data[0];
2299 
2300 	if (!record__threads_enabled(rec))
2301 		return 0;
2302 
2303 	sigfillset(&full);
2304 	if (sigprocmask(SIG_SETMASK, &full, &mask)) {
2305 		pr_err("Failed to block signals on threads start: %s\n", strerror(errno));
2306 		return -1;
2307 	}
2308 
2309 	pthread_attr_init(&attrs);
2310 	pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
2311 
2312 	for (t = 1; t < nr_threads; t++) {
2313 		enum thread_msg msg = THREAD_MSG__UNDEFINED;
2314 
2315 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
2316 		pthread_attr_setaffinity_np(&attrs,
2317 					    MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)),
2318 					    (cpu_set_t *)(thread_data[t].mask->affinity.bits));
2319 #endif
2320 		if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) {
2321 			for (tt = 1; tt < t; tt++)
2322 				record__terminate_thread(&thread_data[t]);
2323 			pr_err("Failed to start threads: %s\n", strerror(errno));
2324 			ret = -1;
2325 			goto out_err;
2326 		}
2327 
2328 		err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg));
2329 		if (err > 0)
2330 			pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid,
2331 				  thread_msg_tags[msg]);
2332 		else
2333 			pr_warning("threads[%d]: failed to receive start notification from %d\n",
2334 				   thread->tid, rec->thread_data[t].tid);
2335 	}
2336 
2337 	sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
2338 			(cpu_set_t *)thread->mask->affinity.bits);
2339 
2340 	pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
2341 
2342 out_err:
2343 	pthread_attr_destroy(&attrs);
2344 
2345 	if (sigprocmask(SIG_SETMASK, &mask, NULL)) {
2346 		pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno));
2347 		ret = -1;
2348 	}
2349 
2350 	return ret;
2351 }
2352 
record__stop_threads(struct record * rec)2353 static int record__stop_threads(struct record *rec)
2354 {
2355 	int t;
2356 	struct record_thread *thread_data = rec->thread_data;
2357 
2358 	for (t = 1; t < rec->nr_threads; t++)
2359 		record__terminate_thread(&thread_data[t]);
2360 
2361 	for (t = 0; t < rec->nr_threads; t++) {
2362 		rec->samples += thread_data[t].samples;
2363 		if (!record__threads_enabled(rec))
2364 			continue;
2365 		rec->session->bytes_transferred += thread_data[t].bytes_transferred;
2366 		rec->session->bytes_compressed += thread_data[t].bytes_compressed;
2367 		pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid,
2368 			 thread_data[t].samples, thread_data[t].waking);
2369 		if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed)
2370 			pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n",
2371 				 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed);
2372 		else
2373 			pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written);
2374 	}
2375 
2376 	return 0;
2377 }
2378 
record__waking(struct record * rec)2379 static unsigned long record__waking(struct record *rec)
2380 {
2381 	int t;
2382 	unsigned long waking = 0;
2383 	struct record_thread *thread_data = rec->thread_data;
2384 
2385 	for (t = 0; t < rec->nr_threads; t++)
2386 		waking += thread_data[t].waking;
2387 
2388 	return waking;
2389 }
2390 
__cmd_record(struct record * rec,int argc,const char ** argv)2391 static int __cmd_record(struct record *rec, int argc, const char **argv)
2392 {
2393 	int err;
2394 	int status = 0;
2395 	const bool forks = argc > 0;
2396 	struct perf_tool *tool = &rec->tool;
2397 	struct record_opts *opts = &rec->opts;
2398 	struct perf_data *data = &rec->data;
2399 	struct perf_session *session;
2400 	bool disabled = false, draining = false;
2401 	int fd;
2402 	float ratio = 0;
2403 	enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
2404 	struct perf_env *env;
2405 
2406 	atexit(record__sig_exit);
2407 	signal(SIGCHLD, sig_handler);
2408 	signal(SIGINT, sig_handler);
2409 	signal(SIGTERM, sig_handler);
2410 	signal(SIGSEGV, sigsegv_handler);
2411 
2412 	if (rec->opts.record_cgroup) {
2413 #ifndef HAVE_FILE_HANDLE
2414 		pr_err("cgroup tracking is not supported\n");
2415 		return -1;
2416 #endif
2417 	}
2418 
2419 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
2420 		signal(SIGUSR2, snapshot_sig_handler);
2421 		if (rec->opts.auxtrace_snapshot_mode)
2422 			trigger_on(&auxtrace_snapshot_trigger);
2423 		if (rec->switch_output.enabled)
2424 			trigger_on(&switch_output_trigger);
2425 	} else {
2426 		signal(SIGUSR2, SIG_IGN);
2427 	}
2428 
2429 	perf_tool__init(tool, /*ordered_events=*/true);
2430 	tool->sample		= process_sample_event;
2431 	tool->fork		= perf_event__process_fork;
2432 	tool->exit		= perf_event__process_exit;
2433 	tool->comm		= perf_event__process_comm;
2434 	tool->namespaces	= perf_event__process_namespaces;
2435 	tool->mmap		= build_id__process_mmap;
2436 	tool->mmap2		= build_id__process_mmap2;
2437 	tool->itrace_start	= process_timestamp_boundary;
2438 	tool->aux		= process_timestamp_boundary;
2439 	tool->namespace_events	= rec->opts.record_namespaces;
2440 	tool->cgroup_events	= rec->opts.record_cgroup;
2441 	session = perf_session__new(data, tool);
2442 	if (IS_ERR(session)) {
2443 		pr_err("Perf session creation failed.\n");
2444 		return PTR_ERR(session);
2445 	}
2446 	env = perf_session__env(session);
2447 	if (record__threads_enabled(rec)) {
2448 		if (perf_data__is_pipe(&rec->data)) {
2449 			pr_err("Parallel trace streaming is not available in pipe mode.\n");
2450 			return -1;
2451 		}
2452 		if (rec->opts.full_auxtrace) {
2453 			pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n");
2454 			return -1;
2455 		}
2456 	}
2457 
2458 	fd = perf_data__fd(data);
2459 	rec->session = session;
2460 
2461 	if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
2462 		pr_err("Compression initialization failed.\n");
2463 		return -1;
2464 	}
2465 #ifdef HAVE_EVENTFD_SUPPORT
2466 	done_fd = eventfd(0, EFD_NONBLOCK);
2467 	if (done_fd < 0) {
2468 		pr_err("Failed to create wakeup eventfd, error: %m\n");
2469 		status = -1;
2470 		goto out_delete_session;
2471 	}
2472 	err = evlist__add_wakeup_eventfd(rec->evlist, done_fd);
2473 	if (err < 0) {
2474 		pr_err("Failed to add wakeup eventfd to poll list\n");
2475 		status = err;
2476 		goto out_delete_session;
2477 	}
2478 #endif // HAVE_EVENTFD_SUPPORT
2479 
2480 	env->comp_type  = PERF_COMP_ZSTD;
2481 	env->comp_level = rec->opts.comp_level;
2482 
2483 	if (rec->opts.kcore &&
2484 	    !record__kcore_readable(&session->machines.host)) {
2485 		pr_err("ERROR: kcore is not readable.\n");
2486 		return -1;
2487 	}
2488 
2489 	if (record__init_clock(rec))
2490 		return -1;
2491 
2492 	record__init_features(rec);
2493 
2494 	if (forks) {
2495 		err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe,
2496 					       workload_exec_failed_signal);
2497 		if (err < 0) {
2498 			pr_err("Couldn't run the workload!\n");
2499 			status = err;
2500 			goto out_delete_session;
2501 		}
2502 	}
2503 
2504 	/*
2505 	 * If we have just single event and are sending data
2506 	 * through pipe, we need to force the ids allocation,
2507 	 * because we synthesize event name through the pipe
2508 	 * and need the id for that.
2509 	 */
2510 	if (data->is_pipe && rec->evlist->core.nr_entries == 1)
2511 		rec->opts.sample_id = true;
2512 
2513 	if (rec->timestamp_filename && perf_data__is_pipe(data)) {
2514 		rec->timestamp_filename = false;
2515 		pr_warning("WARNING: --timestamp-filename option is not available in pipe mode.\n");
2516 	}
2517 
2518 	/*
2519 	 * Use global stat_config that is zero meaning aggr_mode is AGGR_NONE
2520 	 * and hybrid_merge is false.
2521 	 */
2522 	evlist__uniquify_evsel_names(rec->evlist, &stat_config);
2523 
2524 	evlist__config(rec->evlist, opts, &callchain_param);
2525 
2526 	/* Debug message used by test scripts */
2527 	pr_debug3("perf record opening and mmapping events\n");
2528 	if (record__open(rec) != 0) {
2529 		err = -1;
2530 		goto out_free_threads;
2531 	}
2532 	/* Debug message used by test scripts */
2533 	pr_debug3("perf record done opening and mmapping events\n");
2534 	env->comp_mmap_len = session->evlist->core.mmap_len;
2535 
2536 	if (rec->opts.kcore) {
2537 		err = record__kcore_copy(&session->machines.host, data);
2538 		if (err) {
2539 			pr_err("ERROR: Failed to copy kcore\n");
2540 			goto out_free_threads;
2541 		}
2542 	}
2543 
2544 	/*
2545 	 * Normally perf_session__new would do this, but it doesn't have the
2546 	 * evlist.
2547 	 */
2548 	if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) {
2549 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
2550 		rec->tool.ordered_events = false;
2551 	}
2552 
2553 	if (evlist__nr_groups(rec->evlist) == 0)
2554 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
2555 
2556 	if (data->is_pipe) {
2557 		err = perf_header__write_pipe(fd);
2558 		if (err < 0)
2559 			goto out_free_threads;
2560 	} else {
2561 		err = perf_session__write_header(session, rec->evlist, fd, false);
2562 		if (err < 0)
2563 			goto out_free_threads;
2564 	}
2565 
2566 	err = -1;
2567 	if (!rec->no_buildid
2568 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
2569 		pr_err("Couldn't generate buildids. "
2570 		       "Use --no-buildid to profile anyway.\n");
2571 		goto out_free_threads;
2572 	}
2573 
2574 	if (!evlist__needs_bpf_sb_event(rec->evlist))
2575 		opts->no_bpf_event = true;
2576 
2577 	err = record__setup_sb_evlist(rec);
2578 	if (err)
2579 		goto out_free_threads;
2580 
2581 	err = record__synthesize(rec, false);
2582 	if (err < 0)
2583 		goto out_free_threads;
2584 
2585 	if (rec->realtime_prio) {
2586 		struct sched_param param;
2587 
2588 		param.sched_priority = rec->realtime_prio;
2589 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
2590 			pr_err("Could not set realtime priority.\n");
2591 			err = -1;
2592 			goto out_free_threads;
2593 		}
2594 	}
2595 
2596 	if (record__start_threads(rec))
2597 		goto out_free_threads;
2598 
2599 	/*
2600 	 * When perf is starting the traced process, all the events
2601 	 * (apart from group members) have enable_on_exec=1 set,
2602 	 * so don't spoil it by prematurely enabling them.
2603 	 */
2604 	if (!target__none(&opts->target) && !opts->target.initial_delay)
2605 		evlist__enable(rec->evlist);
2606 
2607 	/*
2608 	 * offcpu-time does not call execve, so enable_on_exe wouldn't work
2609 	 * when recording a workload, do it manually
2610 	 */
2611 	if (rec->off_cpu)
2612 		evlist__enable_evsel(rec->evlist, (char *)OFFCPU_EVENT);
2613 
2614 	/*
2615 	 * Let the child rip
2616 	 */
2617 	if (forks) {
2618 		struct machine *machine = &session->machines.host;
2619 		union perf_event *event;
2620 		pid_t tgid;
2621 
2622 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
2623 		if (event == NULL) {
2624 			err = -ENOMEM;
2625 			goto out_child;
2626 		}
2627 
2628 		/*
2629 		 * Some H/W events are generated before COMM event
2630 		 * which is emitted during exec(), so perf script
2631 		 * cannot see a correct process name for those events.
2632 		 * Synthesize COMM event to prevent it.
2633 		 */
2634 		tgid = perf_event__synthesize_comm(tool, event,
2635 						   rec->evlist->workload.pid,
2636 						   process_synthesized_event,
2637 						   machine);
2638 		free(event);
2639 
2640 		if (tgid == -1)
2641 			goto out_child;
2642 
2643 		event = malloc(sizeof(event->namespaces) +
2644 			       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
2645 			       machine->id_hdr_size);
2646 		if (event == NULL) {
2647 			err = -ENOMEM;
2648 			goto out_child;
2649 		}
2650 
2651 		/*
2652 		 * Synthesize NAMESPACES event for the command specified.
2653 		 */
2654 		perf_event__synthesize_namespaces(tool, event,
2655 						  rec->evlist->workload.pid,
2656 						  tgid, process_synthesized_event,
2657 						  machine);
2658 		free(event);
2659 
2660 		evlist__start_workload(rec->evlist);
2661 	}
2662 
2663 	if (opts->target.initial_delay) {
2664 		pr_info(EVLIST_DISABLED_MSG);
2665 		if (opts->target.initial_delay > 0) {
2666 			usleep(opts->target.initial_delay * USEC_PER_MSEC);
2667 			evlist__enable(rec->evlist);
2668 			pr_info(EVLIST_ENABLED_MSG);
2669 		}
2670 	}
2671 
2672 	err = event_enable_timer__start(rec->evlist->eet);
2673 	if (err)
2674 		goto out_child;
2675 
2676 	/* Debug message used by test scripts */
2677 	pr_debug3("perf record has started\n");
2678 	fflush(stderr);
2679 
2680 	trigger_ready(&auxtrace_snapshot_trigger);
2681 	trigger_ready(&switch_output_trigger);
2682 	perf_hooks__invoke_record_start();
2683 
2684 	/*
2685 	 * Must write FINISHED_INIT so it will be seen after all other
2686 	 * synthesized user events, but before any regular events.
2687 	 */
2688 	err = write_finished_init(rec, false);
2689 	if (err < 0)
2690 		goto out_child;
2691 
2692 	for (;;) {
2693 		unsigned long long hits = thread->samples;
2694 
2695 		/*
2696 		 * rec->evlist->bkw_mmap_state is possible to be
2697 		 * BKW_MMAP_EMPTY here: when done == true and
2698 		 * hits != rec->samples in previous round.
2699 		 *
2700 		 * evlist__toggle_bkw_mmap ensure we never
2701 		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
2702 		 */
2703 		if (trigger_is_hit(&switch_output_trigger) || done || draining)
2704 			evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
2705 
2706 		if (record__mmap_read_all(rec, false) < 0) {
2707 			trigger_error(&auxtrace_snapshot_trigger);
2708 			trigger_error(&switch_output_trigger);
2709 			err = -1;
2710 			goto out_child;
2711 		}
2712 
2713 		if (auxtrace_record__snapshot_started) {
2714 			auxtrace_record__snapshot_started = 0;
2715 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
2716 				record__read_auxtrace_snapshot(rec, false);
2717 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
2718 				pr_err("AUX area tracing snapshot failed\n");
2719 				err = -1;
2720 				goto out_child;
2721 			}
2722 		}
2723 
2724 		if (trigger_is_hit(&switch_output_trigger)) {
2725 			/*
2726 			 * If switch_output_trigger is hit, the data in
2727 			 * overwritable ring buffer should have been collected,
2728 			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
2729 			 *
2730 			 * If SIGUSR2 raise after or during record__mmap_read_all(),
2731 			 * record__mmap_read_all() didn't collect data from
2732 			 * overwritable ring buffer. Read again.
2733 			 */
2734 			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
2735 				continue;
2736 			trigger_ready(&switch_output_trigger);
2737 
2738 			/*
2739 			 * Reenable events in overwrite ring buffer after
2740 			 * record__mmap_read_all(): we should have collected
2741 			 * data from it.
2742 			 */
2743 			evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
2744 
2745 			if (!quiet)
2746 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
2747 					record__waking(rec));
2748 			thread->waking = 0;
2749 			fd = record__switch_output(rec, false);
2750 			if (fd < 0) {
2751 				pr_err("Failed to switch to new file\n");
2752 				trigger_error(&switch_output_trigger);
2753 				err = fd;
2754 				goto out_child;
2755 			}
2756 
2757 			/* re-arm the alarm */
2758 			if (rec->switch_output.time)
2759 				alarm(rec->switch_output.time);
2760 		}
2761 
2762 		if (hits == thread->samples) {
2763 			if (done || draining)
2764 				break;
2765 			err = fdarray__poll(&thread->pollfd, -1);
2766 			/*
2767 			 * Propagate error, only if there's any. Ignore positive
2768 			 * number of returned events and interrupt error.
2769 			 */
2770 			if (err > 0 || (err < 0 && errno == EINTR))
2771 				err = 0;
2772 			thread->waking++;
2773 
2774 			if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP,
2775 					    record__thread_munmap_filtered, NULL) == 0)
2776 				draining = true;
2777 
2778 			err = record__update_evlist_pollfd_from_thread(rec, rec->evlist, thread);
2779 			if (err)
2780 				goto out_child;
2781 		}
2782 
2783 		if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) {
2784 			switch (cmd) {
2785 			case EVLIST_CTL_CMD_SNAPSHOT:
2786 				hit_auxtrace_snapshot_trigger(rec);
2787 				evlist__ctlfd_ack(rec->evlist);
2788 				break;
2789 			case EVLIST_CTL_CMD_STOP:
2790 				done = 1;
2791 				break;
2792 			case EVLIST_CTL_CMD_ACK:
2793 			case EVLIST_CTL_CMD_UNSUPPORTED:
2794 			case EVLIST_CTL_CMD_ENABLE:
2795 			case EVLIST_CTL_CMD_DISABLE:
2796 			case EVLIST_CTL_CMD_EVLIST:
2797 			case EVLIST_CTL_CMD_PING:
2798 			default:
2799 				break;
2800 			}
2801 		}
2802 
2803 		err = event_enable_timer__process(rec->evlist->eet);
2804 		if (err < 0)
2805 			goto out_child;
2806 		if (err) {
2807 			err = 0;
2808 			done = 1;
2809 		}
2810 
2811 		/*
2812 		 * When perf is starting the traced process, at the end events
2813 		 * die with the process and we wait for that. Thus no need to
2814 		 * disable events in this case.
2815 		 */
2816 		if (done && !disabled && !target__none(&opts->target)) {
2817 			trigger_off(&auxtrace_snapshot_trigger);
2818 			evlist__disable(rec->evlist);
2819 			disabled = true;
2820 		}
2821 	}
2822 
2823 	trigger_off(&auxtrace_snapshot_trigger);
2824 	trigger_off(&switch_output_trigger);
2825 
2826 	record__synthesize_final_bpf_metadata(rec);
2827 
2828 	if (opts->auxtrace_snapshot_on_exit)
2829 		record__auxtrace_snapshot_exit(rec);
2830 
2831 	if (forks && workload_exec_errno) {
2832 		char msg[STRERR_BUFSIZE];
2833 		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
2834 		struct strbuf sb = STRBUF_INIT;
2835 
2836 		evlist__format_evsels(rec->evlist, &sb, 2048);
2837 
2838 		pr_err("Failed to collect '%s' for the '%s' workload: %s\n",
2839 			sb.buf, argv[0], emsg);
2840 		strbuf_release(&sb);
2841 		err = -1;
2842 		goto out_child;
2843 	}
2844 
2845 	if (!quiet)
2846 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n",
2847 			record__waking(rec));
2848 
2849 	write_finished_init(rec, true);
2850 
2851 	if (target__none(&rec->opts.target))
2852 		record__synthesize_workload(rec, true);
2853 
2854 out_child:
2855 	record__stop_threads(rec);
2856 	record__mmap_read_all(rec, true);
2857 out_free_threads:
2858 	record__free_thread_data(rec);
2859 	evlist__finalize_ctlfd(rec->evlist);
2860 	record__aio_mmap_read_sync(rec);
2861 
2862 	if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
2863 		ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
2864 		env->comp_ratio = ratio + 0.5;
2865 	}
2866 
2867 	if (forks) {
2868 		int exit_status;
2869 
2870 		if (!child_finished)
2871 			kill(rec->evlist->workload.pid, SIGTERM);
2872 
2873 		wait(&exit_status);
2874 
2875 		if (err < 0)
2876 			status = err;
2877 		else if (WIFEXITED(exit_status))
2878 			status = WEXITSTATUS(exit_status);
2879 		else if (WIFSIGNALED(exit_status))
2880 			signr = WTERMSIG(exit_status);
2881 	} else
2882 		status = err;
2883 
2884 	if (rec->off_cpu)
2885 		rec->bytes_written += off_cpu_write(rec->session);
2886 
2887 	record__read_lost_samples(rec);
2888 	record__synthesize(rec, true);
2889 	/* this will be recalculated during process_buildids() */
2890 	rec->samples = 0;
2891 
2892 	if (!err) {
2893 		if (!rec->timestamp_filename) {
2894 			record__finish_output(rec);
2895 		} else {
2896 			fd = record__switch_output(rec, true);
2897 			if (fd < 0) {
2898 				status = fd;
2899 				goto out_delete_session;
2900 			}
2901 		}
2902 	}
2903 
2904 	perf_hooks__invoke_record_end();
2905 
2906 	if (!err && !quiet) {
2907 		char samples[128];
2908 		const char *postfix = rec->timestamp_filename ?
2909 					".<timestamp>" : "";
2910 
2911 		if (rec->samples && !rec->opts.full_auxtrace)
2912 			scnprintf(samples, sizeof(samples),
2913 				  " (%" PRIu64 " samples)", rec->samples);
2914 		else
2915 			samples[0] = '\0';
2916 
2917 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s",
2918 			perf_data__size(data) / 1024.0 / 1024.0,
2919 			data->path, postfix, samples);
2920 		if (ratio) {
2921 			fprintf(stderr,	", compressed (original %.3f MB, ratio is %.3f)",
2922 					rec->session->bytes_transferred / 1024.0 / 1024.0,
2923 					ratio);
2924 		}
2925 		fprintf(stderr, " ]\n");
2926 	}
2927 
2928 out_delete_session:
2929 #ifdef HAVE_EVENTFD_SUPPORT
2930 	if (done_fd >= 0) {
2931 		fd = done_fd;
2932 		done_fd = -1;
2933 
2934 		close(fd);
2935 	}
2936 #endif
2937 	zstd_fini(&session->zstd_data);
2938 	if (!opts->no_bpf_event)
2939 		evlist__stop_sb_thread(rec->sb_evlist);
2940 
2941 	perf_session__delete(session);
2942 	return status;
2943 }
2944 
callchain_debug(struct callchain_param * callchain)2945 static void callchain_debug(struct callchain_param *callchain)
2946 {
2947 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
2948 
2949 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
2950 
2951 	if (callchain->record_mode == CALLCHAIN_DWARF)
2952 		pr_debug("callchain: stack dump size %d\n",
2953 			 callchain->dump_size);
2954 }
2955 
record_opts__parse_callchain(struct record_opts * record,struct callchain_param * callchain,const char * arg,bool unset)2956 int record_opts__parse_callchain(struct record_opts *record,
2957 				 struct callchain_param *callchain,
2958 				 const char *arg, bool unset)
2959 {
2960 	int ret;
2961 	callchain->enabled = !unset;
2962 
2963 	/* --no-call-graph */
2964 	if (unset) {
2965 		callchain->record_mode = CALLCHAIN_NONE;
2966 		pr_debug("callchain: disabled\n");
2967 		return 0;
2968 	}
2969 
2970 	ret = parse_callchain_record_opt(arg, callchain);
2971 	if (!ret) {
2972 		/* Enable data address sampling for DWARF unwind. */
2973 		if (callchain->record_mode == CALLCHAIN_DWARF)
2974 			record->sample_address = true;
2975 		callchain_debug(callchain);
2976 	}
2977 
2978 	return ret;
2979 }
2980 
record_parse_callchain_opt(const struct option * opt,const char * arg,int unset)2981 int record_parse_callchain_opt(const struct option *opt,
2982 			       const char *arg,
2983 			       int unset)
2984 {
2985 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
2986 }
2987 
record_callchain_opt(const struct option * opt,const char * arg __maybe_unused,int unset __maybe_unused)2988 int record_callchain_opt(const struct option *opt,
2989 			 const char *arg __maybe_unused,
2990 			 int unset __maybe_unused)
2991 {
2992 	struct callchain_param *callchain = opt->value;
2993 
2994 	callchain->enabled = true;
2995 
2996 	if (callchain->record_mode == CALLCHAIN_NONE)
2997 		callchain->record_mode = CALLCHAIN_FP;
2998 
2999 	callchain_debug(callchain);
3000 	return 0;
3001 }
3002 
perf_record_config(const char * var,const char * value,void * cb)3003 static int perf_record_config(const char *var, const char *value, void *cb)
3004 {
3005 	struct record *rec = cb;
3006 
3007 	if (!strcmp(var, "record.build-id")) {
3008 		if (!strcmp(value, "cache"))
3009 			rec->no_buildid_cache = false;
3010 		else if (!strcmp(value, "no-cache"))
3011 			rec->no_buildid_cache = true;
3012 		else if (!strcmp(value, "skip"))
3013 			rec->no_buildid = true;
3014 		else if (!strcmp(value, "mmap"))
3015 			rec->buildid_mmap = true;
3016 		else if (!strcmp(value, "no-mmap"))
3017 			rec->buildid_mmap = false;
3018 		else
3019 			return -1;
3020 		return 0;
3021 	}
3022 	if (!strcmp(var, "record.call-graph")) {
3023 		var = "call-graph.record-mode";
3024 		return perf_default_config(var, value, cb);
3025 	}
3026 #ifdef HAVE_AIO_SUPPORT
3027 	if (!strcmp(var, "record.aio")) {
3028 		rec->opts.nr_cblocks = strtol(value, NULL, 0);
3029 		if (!rec->opts.nr_cblocks)
3030 			rec->opts.nr_cblocks = nr_cblocks_default;
3031 	}
3032 #endif
3033 	if (!strcmp(var, "record.debuginfod")) {
3034 		rec->debuginfod.urls = strdup(value);
3035 		if (!rec->debuginfod.urls)
3036 			return -ENOMEM;
3037 		rec->debuginfod.set = true;
3038 	}
3039 
3040 	return 0;
3041 }
3042 
record__parse_event_enable_time(const struct option * opt,const char * str,int unset)3043 static int record__parse_event_enable_time(const struct option *opt, const char *str, int unset)
3044 {
3045 	struct record *rec = (struct record *)opt->value;
3046 
3047 	return evlist__parse_event_enable_time(rec->evlist, &rec->opts, str, unset);
3048 }
3049 
record__parse_affinity(const struct option * opt,const char * str,int unset)3050 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
3051 {
3052 	struct record_opts *opts = (struct record_opts *)opt->value;
3053 
3054 	if (unset || !str)
3055 		return 0;
3056 
3057 	if (!strcasecmp(str, "node"))
3058 		opts->affinity = PERF_AFFINITY_NODE;
3059 	else if (!strcasecmp(str, "cpu"))
3060 		opts->affinity = PERF_AFFINITY_CPU;
3061 
3062 	return 0;
3063 }
3064 
record__mmap_cpu_mask_alloc(struct mmap_cpu_mask * mask,int nr_bits)3065 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits)
3066 {
3067 	mask->nbits = nr_bits;
3068 	mask->bits = bitmap_zalloc(mask->nbits);
3069 	if (!mask->bits)
3070 		return -ENOMEM;
3071 
3072 	return 0;
3073 }
3074 
record__mmap_cpu_mask_free(struct mmap_cpu_mask * mask)3075 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask)
3076 {
3077 	bitmap_free(mask->bits);
3078 	mask->nbits = 0;
3079 }
3080 
record__thread_mask_alloc(struct thread_mask * mask,int nr_bits)3081 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits)
3082 {
3083 	int ret;
3084 
3085 	ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits);
3086 	if (ret) {
3087 		mask->affinity.bits = NULL;
3088 		return ret;
3089 	}
3090 
3091 	ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits);
3092 	if (ret) {
3093 		record__mmap_cpu_mask_free(&mask->maps);
3094 		mask->maps.bits = NULL;
3095 	}
3096 
3097 	return ret;
3098 }
3099 
record__thread_mask_free(struct thread_mask * mask)3100 static void record__thread_mask_free(struct thread_mask *mask)
3101 {
3102 	record__mmap_cpu_mask_free(&mask->maps);
3103 	record__mmap_cpu_mask_free(&mask->affinity);
3104 }
3105 
record__parse_threads(const struct option * opt,const char * str,int unset)3106 static int record__parse_threads(const struct option *opt, const char *str, int unset)
3107 {
3108 	int s;
3109 	struct record_opts *opts = opt->value;
3110 
3111 	if (unset || !str || !strlen(str)) {
3112 		opts->threads_spec = THREAD_SPEC__CPU;
3113 	} else {
3114 		for (s = 1; s < THREAD_SPEC__MAX; s++) {
3115 			if (s == THREAD_SPEC__USER) {
3116 				opts->threads_user_spec = strdup(str);
3117 				if (!opts->threads_user_spec)
3118 					return -ENOMEM;
3119 				opts->threads_spec = THREAD_SPEC__USER;
3120 				break;
3121 			}
3122 			if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) {
3123 				opts->threads_spec = s;
3124 				break;
3125 			}
3126 		}
3127 	}
3128 
3129 	if (opts->threads_spec == THREAD_SPEC__USER)
3130 		pr_debug("threads_spec: %s\n", opts->threads_user_spec);
3131 	else
3132 		pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]);
3133 
3134 	return 0;
3135 }
3136 
parse_output_max_size(const struct option * opt,const char * str,int unset)3137 static int parse_output_max_size(const struct option *opt,
3138 				 const char *str, int unset)
3139 {
3140 	unsigned long *s = (unsigned long *)opt->value;
3141 	static struct parse_tag tags_size[] = {
3142 		{ .tag  = 'B', .mult = 1       },
3143 		{ .tag  = 'K', .mult = 1 << 10 },
3144 		{ .tag  = 'M', .mult = 1 << 20 },
3145 		{ .tag  = 'G', .mult = 1 << 30 },
3146 		{ .tag  = 0 },
3147 	};
3148 	unsigned long val;
3149 
3150 	if (unset) {
3151 		*s = 0;
3152 		return 0;
3153 	}
3154 
3155 	val = parse_tag_value(str, tags_size);
3156 	if (val != (unsigned long) -1) {
3157 		*s = val;
3158 		return 0;
3159 	}
3160 
3161 	return -1;
3162 }
3163 
record__parse_mmap_pages(const struct option * opt,const char * str,int unset __maybe_unused)3164 static int record__parse_mmap_pages(const struct option *opt,
3165 				    const char *str,
3166 				    int unset __maybe_unused)
3167 {
3168 	struct record_opts *opts = opt->value;
3169 	char *s, *p;
3170 	unsigned int mmap_pages;
3171 	int ret;
3172 
3173 	if (!str)
3174 		return -EINVAL;
3175 
3176 	s = strdup(str);
3177 	if (!s)
3178 		return -ENOMEM;
3179 
3180 	p = strchr(s, ',');
3181 	if (p)
3182 		*p = '\0';
3183 
3184 	if (*s) {
3185 		ret = __evlist__parse_mmap_pages(&mmap_pages, s);
3186 		if (ret)
3187 			goto out_free;
3188 		opts->mmap_pages = mmap_pages;
3189 	}
3190 
3191 	if (!p) {
3192 		ret = 0;
3193 		goto out_free;
3194 	}
3195 
3196 	ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1);
3197 	if (ret)
3198 		goto out_free;
3199 
3200 	opts->auxtrace_mmap_pages = mmap_pages;
3201 
3202 out_free:
3203 	free(s);
3204 	return ret;
3205 }
3206 
record__parse_off_cpu_thresh(const struct option * opt,const char * str,int unset __maybe_unused)3207 static int record__parse_off_cpu_thresh(const struct option *opt,
3208 					const char *str,
3209 					int unset __maybe_unused)
3210 {
3211 	struct record_opts *opts = opt->value;
3212 	char *endptr;
3213 	u64 off_cpu_thresh_ms;
3214 
3215 	if (!str)
3216 		return -EINVAL;
3217 
3218 	off_cpu_thresh_ms = strtoull(str, &endptr, 10);
3219 
3220 	/* the threshold isn't string "0", yet strtoull() returns 0, parsing failed */
3221 	if (*endptr || (off_cpu_thresh_ms == 0 && strcmp(str, "0")))
3222 		return -EINVAL;
3223 	else
3224 		opts->off_cpu_thresh_ns = off_cpu_thresh_ms * NSEC_PER_MSEC;
3225 
3226 	return 0;
3227 }
3228 
arch__add_leaf_frame_record_opts(struct record_opts * opts __maybe_unused)3229 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
3230 {
3231 }
3232 
parse_control_option(const struct option * opt,const char * str,int unset __maybe_unused)3233 static int parse_control_option(const struct option *opt,
3234 				const char *str,
3235 				int unset __maybe_unused)
3236 {
3237 	struct record_opts *opts = opt->value;
3238 
3239 	return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close);
3240 }
3241 
switch_output_size_warn(struct record * rec)3242 static void switch_output_size_warn(struct record *rec)
3243 {
3244 	u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
3245 	struct switch_output *s = &rec->switch_output;
3246 
3247 	wakeup_size /= 2;
3248 
3249 	if (s->size < wakeup_size) {
3250 		char buf[100];
3251 
3252 		unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
3253 		pr_warning("WARNING: switch-output data size lower than "
3254 			   "wakeup kernel buffer size (%s) "
3255 			   "expect bigger perf.data sizes\n", buf);
3256 	}
3257 }
3258 
switch_output_setup(struct record * rec)3259 static int switch_output_setup(struct record *rec)
3260 {
3261 	struct switch_output *s = &rec->switch_output;
3262 	static struct parse_tag tags_size[] = {
3263 		{ .tag  = 'B', .mult = 1       },
3264 		{ .tag  = 'K', .mult = 1 << 10 },
3265 		{ .tag  = 'M', .mult = 1 << 20 },
3266 		{ .tag  = 'G', .mult = 1 << 30 },
3267 		{ .tag  = 0 },
3268 	};
3269 	static struct parse_tag tags_time[] = {
3270 		{ .tag  = 's', .mult = 1        },
3271 		{ .tag  = 'm', .mult = 60       },
3272 		{ .tag  = 'h', .mult = 60*60    },
3273 		{ .tag  = 'd', .mult = 60*60*24 },
3274 		{ .tag  = 0 },
3275 	};
3276 	unsigned long val;
3277 
3278 	/*
3279 	 * If we're using --switch-output-events, then we imply its
3280 	 * --switch-output=signal, as we'll send a SIGUSR2 from the side band
3281 	 *  thread to its parent.
3282 	 */
3283 	if (rec->switch_output_event_set) {
3284 		if (record__threads_enabled(rec)) {
3285 			pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n");
3286 			return 0;
3287 		}
3288 		goto do_signal;
3289 	}
3290 
3291 	if (!s->set)
3292 		return 0;
3293 
3294 	if (record__threads_enabled(rec)) {
3295 		pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n");
3296 		return 0;
3297 	}
3298 
3299 	if (!strcmp(s->str, "signal")) {
3300 do_signal:
3301 		s->signal = true;
3302 		pr_debug("switch-output with SIGUSR2 signal\n");
3303 		goto enabled;
3304 	}
3305 
3306 	val = parse_tag_value(s->str, tags_size);
3307 	if (val != (unsigned long) -1) {
3308 		s->size = val;
3309 		pr_debug("switch-output with %s size threshold\n", s->str);
3310 		goto enabled;
3311 	}
3312 
3313 	val = parse_tag_value(s->str, tags_time);
3314 	if (val != (unsigned long) -1) {
3315 		s->time = val;
3316 		pr_debug("switch-output with %s time threshold (%lu seconds)\n",
3317 			 s->str, s->time);
3318 		goto enabled;
3319 	}
3320 
3321 	return -1;
3322 
3323 enabled:
3324 	rec->timestamp_filename = true;
3325 	s->enabled              = true;
3326 
3327 	if (s->size && !rec->opts.no_buffering)
3328 		switch_output_size_warn(rec);
3329 
3330 	return 0;
3331 }
3332 
3333 static const char * const __record_usage[] = {
3334 	"perf record [<options>] [<command>]",
3335 	"perf record [<options>] -- <command> [<options>]",
3336 	NULL
3337 };
3338 const char * const *record_usage = __record_usage;
3339 
build_id__process_mmap(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)3340 static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event,
3341 				  struct perf_sample *sample, struct machine *machine)
3342 {
3343 	/*
3344 	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3345 	 * no need to add them twice.
3346 	 */
3347 	if (!(event->header.misc & PERF_RECORD_MISC_USER))
3348 		return 0;
3349 	return perf_event__process_mmap(tool, event, sample, machine);
3350 }
3351 
build_id__process_mmap2(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)3352 static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event,
3353 				   struct perf_sample *sample, struct machine *machine)
3354 {
3355 	/*
3356 	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3357 	 * no need to add them twice.
3358 	 */
3359 	if (!(event->header.misc & PERF_RECORD_MISC_USER))
3360 		return 0;
3361 
3362 	return perf_event__process_mmap2(tool, event, sample, machine);
3363 }
3364 
process_timestamp_boundary(const struct perf_tool * tool,union perf_event * event __maybe_unused,struct perf_sample * sample,struct machine * machine __maybe_unused)3365 static int process_timestamp_boundary(const struct perf_tool *tool,
3366 				      union perf_event *event __maybe_unused,
3367 				      struct perf_sample *sample,
3368 				      struct machine *machine __maybe_unused)
3369 {
3370 	struct record *rec = container_of(tool, struct record, tool);
3371 
3372 	set_timestamp_boundary(rec, sample->time);
3373 	return 0;
3374 }
3375 
parse_record_synth_option(const struct option * opt,const char * str,int unset __maybe_unused)3376 static int parse_record_synth_option(const struct option *opt,
3377 				     const char *str,
3378 				     int unset __maybe_unused)
3379 {
3380 	struct record_opts *opts = opt->value;
3381 	char *p = strdup(str);
3382 
3383 	if (p == NULL)
3384 		return -1;
3385 
3386 	opts->synth = parse_synth_opt(p);
3387 	free(p);
3388 
3389 	if (opts->synth < 0) {
3390 		pr_err("Invalid synth option: %s\n", str);
3391 		return -1;
3392 	}
3393 	return 0;
3394 }
3395 
3396 /*
3397  * XXX Ideally would be local to cmd_record() and passed to a record__new
3398  * because we need to have access to it in record__exit, that is called
3399  * after cmd_record() exits, but since record_options need to be accessible to
3400  * builtin-script, leave it here.
3401  *
3402  * At least we don't ouch it in all the other functions here directly.
3403  *
3404  * Just say no to tons of global variables, sigh.
3405  */
3406 static struct record record = {
3407 	.opts = {
3408 		.sample_time	     = true,
3409 		.mmap_pages	     = UINT_MAX,
3410 		.user_freq	     = UINT_MAX,
3411 		.user_interval	     = ULLONG_MAX,
3412 		.freq		     = 4000,
3413 		.target		     = {
3414 			.uses_mmap   = true,
3415 			.default_per_cpu = true,
3416 		},
3417 		.mmap_flush          = MMAP_FLUSH_DEFAULT,
3418 		.nr_threads_synthesize = 1,
3419 		.ctl_fd              = -1,
3420 		.ctl_fd_ack          = -1,
3421 		.synth               = PERF_SYNTH_ALL,
3422 		.off_cpu_thresh_ns   = OFFCPU_THRESH,
3423 	},
3424 	.buildid_mmap = true,
3425 };
3426 
3427 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
3428 	"\n\t\t\t\tDefault: fp";
3429 
3430 static bool dry_run;
3431 
3432 static struct parse_events_option_args parse_events_option_args = {
3433 	.evlistp = &record.evlist,
3434 };
3435 
3436 static struct parse_events_option_args switch_output_parse_events_option_args = {
3437 	.evlistp = &record.sb_evlist,
3438 };
3439 
3440 /*
3441  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
3442  * with it and switch to use the library functions in perf_evlist that came
3443  * from builtin-record.c, i.e. use record_opts,
3444  * evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
3445  * using pipes, etc.
3446  */
3447 static struct option __record_options[] = {
3448 	OPT_CALLBACK('e', "event", &parse_events_option_args, "event",
3449 		     "event selector. use 'perf list' to list available events",
3450 		     parse_events_option),
3451 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
3452 		     "event filter", parse_filter),
3453 	OPT_BOOLEAN(0, "latency", &record.latency,
3454 		    "Enable data collection for latency profiling.\n"
3455 		    "\t\t\t  Use perf report --latency for latency-centric profile."),
3456 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
3457 			   NULL, "don't record events from perf itself",
3458 			   exclude_perf),
3459 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
3460 		    "record events on existing process id"),
3461 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
3462 		    "record events on existing thread id"),
3463 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
3464 		    "collect data with this RT SCHED_FIFO priority"),
3465 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
3466 		    "collect data without buffering"),
3467 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
3468 		    "collect raw sample records from all opened counters"),
3469 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
3470 			    "system-wide collection from all CPUs"),
3471 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
3472 		    "list of cpus to monitor"),
3473 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
3474 	OPT_STRING('o', "output", &record.data.path, "file",
3475 		    "output file name"),
3476 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
3477 			&record.opts.no_inherit_set,
3478 			"child tasks do not inherit counters"),
3479 	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
3480 		    "synthesize non-sample events at the end of output"),
3481 	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
3482 	OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"),
3483 	OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
3484 		    "Fail if the specified frequency can't be used"),
3485 	OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
3486 		     "profile at this frequency",
3487 		      record__parse_freq),
3488 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
3489 		     "number of mmap data pages and AUX area tracing mmap pages",
3490 		     record__parse_mmap_pages),
3491 	OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
3492 		     "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
3493 		     record__mmap_flush_parse),
3494 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
3495 			   NULL, "enables call-graph recording" ,
3496 			   &record_callchain_opt),
3497 	OPT_CALLBACK(0, "call-graph", &record.opts,
3498 		     "record_mode[,record_size]", record_callchain_help,
3499 		     &record_parse_callchain_opt),
3500 	OPT_INCR('v', "verbose", &verbose,
3501 		    "be more verbose (show counter open errors, etc)"),
3502 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"),
3503 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
3504 		    "per thread counts"),
3505 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
3506 	OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
3507 		    "Record the sample physical addresses"),
3508 	OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size,
3509 		    "Record the sampled data address data page size"),
3510 	OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size,
3511 		    "Record the sampled code address (ip) page size"),
3512 	OPT_BOOLEAN(0, "sample-mem-info", &record.opts.sample_data_src,
3513 		    "Record the data source for memory operations"),
3514 	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
3515 	OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier,
3516 		    "Record the sample identifier"),
3517 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
3518 			&record.opts.sample_time_set,
3519 			"Record the sample timestamps"),
3520 	OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
3521 			"Record the sample period"),
3522 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
3523 		    "don't sample"),
3524 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
3525 			&record.no_buildid_cache_set,
3526 			"do not update the buildid cache"),
3527 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
3528 			&record.no_buildid_set,
3529 			"do not collect buildids in perf.data"),
3530 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
3531 		     "monitor event in cgroup name only",
3532 		     parse_cgroups),
3533 	OPT_CALLBACK('D', "delay", &record, "ms",
3534 		     "ms to wait before starting measurement after program start (-1: start with events disabled), "
3535 		     "or ranges of time to enable events e.g. '-D 10-20,30-40'",
3536 		     record__parse_event_enable_time),
3537 	OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
3538 	OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"),
3539 
3540 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
3541 		     "branch any", "sample any taken branches",
3542 		     parse_branch_stack),
3543 
3544 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
3545 		     "branch filter mask", "branch stack filter modes",
3546 		     parse_branch_stack),
3547 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
3548 		    "sample by weight (on special events only)"),
3549 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
3550 		    "sample transaction flags (special events only)"),
3551 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
3552 		    "use per-thread mmaps"),
3553 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
3554 		    "sample selected machine registers on interrupt,"
3555 		    " use '-I?' to list register names", parse_intr_regs),
3556 	OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
3557 		    "sample selected machine registers in user space,"
3558 		    " use '--user-regs=?' to list register names", parse_user_regs),
3559 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
3560 		    "Record running/enabled time of read (:S) events"),
3561 	OPT_CALLBACK('k', "clockid", &record.opts,
3562 	"clockid", "clockid to use for events, see clock_gettime()",
3563 	parse_clockid),
3564 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
3565 			  "opts", "AUX area tracing Snapshot Mode", ""),
3566 	OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
3567 			  "opts", "sample AUX area", ""),
3568 	OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
3569 			"per thread proc mmap processing timeout in ms"),
3570 	OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
3571 		    "Record namespaces events"),
3572 	OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
3573 		    "Record cgroup events"),
3574 	OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events,
3575 			&record.opts.record_switch_events_set,
3576 			"Record context switch events"),
3577 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
3578 			 "Configure all used events to run in kernel space.",
3579 			 PARSE_OPT_EXCLUSIVE),
3580 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
3581 			 "Configure all used events to run in user space.",
3582 			 PARSE_OPT_EXCLUSIVE),
3583 	OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
3584 		    "collect kernel callchains"),
3585 	OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
3586 		    "collect user callchains"),
3587 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
3588 		   "file", "vmlinux pathname"),
3589 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
3590 		    "Record build-id of all DSOs regardless of hits"),
3591 	OPT_BOOLEAN_SET(0, "buildid-mmap", &record.buildid_mmap, &record.buildid_mmap_set,
3592 			"Record build-id in mmap events and skip build-id processing."),
3593 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
3594 		    "append timestamp to output filename"),
3595 	OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
3596 		    "Record timestamp boundary (time of first/last samples)"),
3597 	OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
3598 			  &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
3599 			  "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
3600 			  "signal"),
3601 	OPT_CALLBACK_SET(0, "switch-output-event", &switch_output_parse_events_option_args,
3602 			 &record.switch_output_event_set, "switch output event",
3603 			 "switch output event selector. use 'perf list' to list available events",
3604 			 parse_events_option_new_evlist),
3605 	OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
3606 		   "Limit number of switch output generated files"),
3607 	OPT_BOOLEAN(0, "dry-run", &dry_run,
3608 		    "Parse options then exit"),
3609 #ifdef HAVE_AIO_SUPPORT
3610 	OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
3611 		     &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
3612 		     record__aio_parse),
3613 #endif
3614 	OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
3615 		     "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
3616 		     record__parse_affinity),
3617 #ifdef HAVE_ZSTD_SUPPORT
3618 	OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n",
3619 			    "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
3620 			    record__parse_comp_level),
3621 #endif
3622 	OPT_CALLBACK(0, "max-size", &record.output_max_size,
3623 		     "size", "Limit the maximum size of the output file", parse_output_max_size),
3624 	OPT_UINTEGER(0, "num-thread-synthesize",
3625 		     &record.opts.nr_threads_synthesize,
3626 		     "number of threads to run for event synthesis"),
3627 #ifdef HAVE_LIBPFM
3628 	OPT_CALLBACK(0, "pfm-events", &record.evlist, "event",
3629 		"libpfm4 event selector. use 'perf list' to list available events",
3630 		parse_libpfm_events_option),
3631 #endif
3632 	OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
3633 		     "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n"
3634 		     "\t\t\t  'snapshot': AUX area tracing snapshot).\n"
3635 		     "\t\t\t  Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
3636 		     "\t\t\t  Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
3637 		      parse_control_option),
3638 	OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup",
3639 		     "Fine-tune event synthesis: default=all", parse_record_synth_option),
3640 	OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls,
3641 			  &record.debuginfod.set, "debuginfod urls",
3642 			  "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
3643 			  "system"),
3644 	OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec",
3645 			    "write collected trace data into several data files using parallel threads",
3646 			    record__parse_threads),
3647 	OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"),
3648 	OPT_STRING(0, "setup-filter", &record.filter_action, "pin|unpin",
3649 		   "BPF filter action"),
3650 	OPT_CALLBACK(0, "off-cpu-thresh", &record.opts, "ms",
3651 		     "Dump off-cpu samples if off-cpu time exceeds this threshold (in milliseconds). (Default: 500ms)",
3652 		     record__parse_off_cpu_thresh),
3653 	OPT_END()
3654 };
3655 
3656 struct option *record_options = __record_options;
3657 
record__mmap_cpu_mask_init(struct mmap_cpu_mask * mask,struct perf_cpu_map * cpus)3658 static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus)
3659 {
3660 	struct perf_cpu cpu;
3661 	int idx;
3662 
3663 	if (cpu_map__is_dummy(cpus))
3664 		return 0;
3665 
3666 	perf_cpu_map__for_each_cpu_skip_any(cpu, idx, cpus) {
3667 		/* Return ENODEV is input cpu is greater than max cpu */
3668 		if ((unsigned long)cpu.cpu > mask->nbits)
3669 			return -ENODEV;
3670 		__set_bit(cpu.cpu, mask->bits);
3671 	}
3672 
3673 	return 0;
3674 }
3675 
record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask * mask,const char * mask_spec)3676 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec)
3677 {
3678 	struct perf_cpu_map *cpus;
3679 
3680 	cpus = perf_cpu_map__new(mask_spec);
3681 	if (!cpus)
3682 		return -ENOMEM;
3683 
3684 	bitmap_zero(mask->bits, mask->nbits);
3685 	if (record__mmap_cpu_mask_init(mask, cpus))
3686 		return -ENODEV;
3687 
3688 	perf_cpu_map__put(cpus);
3689 
3690 	return 0;
3691 }
3692 
record__free_thread_masks(struct record * rec,int nr_threads)3693 static void record__free_thread_masks(struct record *rec, int nr_threads)
3694 {
3695 	int t;
3696 
3697 	if (rec->thread_masks)
3698 		for (t = 0; t < nr_threads; t++)
3699 			record__thread_mask_free(&rec->thread_masks[t]);
3700 
3701 	zfree(&rec->thread_masks);
3702 }
3703 
record__alloc_thread_masks(struct record * rec,int nr_threads,int nr_bits)3704 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits)
3705 {
3706 	int t, ret;
3707 
3708 	rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks)));
3709 	if (!rec->thread_masks) {
3710 		pr_err("Failed to allocate thread masks\n");
3711 		return -ENOMEM;
3712 	}
3713 
3714 	for (t = 0; t < nr_threads; t++) {
3715 		ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits);
3716 		if (ret) {
3717 			pr_err("Failed to allocate thread masks[%d]\n", t);
3718 			goto out_free;
3719 		}
3720 	}
3721 
3722 	return 0;
3723 
3724 out_free:
3725 	record__free_thread_masks(rec, nr_threads);
3726 
3727 	return ret;
3728 }
3729 
record__init_thread_cpu_masks(struct record * rec,struct perf_cpu_map * cpus)3730 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus)
3731 {
3732 	int t, ret, nr_cpus = perf_cpu_map__nr(cpus);
3733 
3734 	ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu);
3735 	if (ret)
3736 		return ret;
3737 
3738 	rec->nr_threads = nr_cpus;
3739 	pr_debug("nr_threads: %d\n", rec->nr_threads);
3740 
3741 	for (t = 0; t < rec->nr_threads; t++) {
3742 		__set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits);
3743 		__set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits);
3744 		if (verbose > 0) {
3745 			pr_debug("thread_masks[%d]: ", t);
3746 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3747 			pr_debug("thread_masks[%d]: ", t);
3748 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3749 		}
3750 	}
3751 
3752 	return 0;
3753 }
3754 
record__init_thread_masks_spec(struct record * rec,struct perf_cpu_map * cpus,const char ** maps_spec,const char ** affinity_spec,u32 nr_spec)3755 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus,
3756 					  const char **maps_spec, const char **affinity_spec,
3757 					  u32 nr_spec)
3758 {
3759 	u32 s;
3760 	int ret = 0, t = 0;
3761 	struct mmap_cpu_mask cpus_mask;
3762 	struct thread_mask thread_mask, full_mask, *thread_masks;
3763 
3764 	ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu);
3765 	if (ret) {
3766 		pr_err("Failed to allocate CPUs mask\n");
3767 		return ret;
3768 	}
3769 
3770 	ret = record__mmap_cpu_mask_init(&cpus_mask, cpus);
3771 	if (ret) {
3772 		pr_err("Failed to init cpu mask\n");
3773 		goto out_free_cpu_mask;
3774 	}
3775 
3776 	ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu);
3777 	if (ret) {
3778 		pr_err("Failed to allocate full mask\n");
3779 		goto out_free_cpu_mask;
3780 	}
3781 
3782 	ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3783 	if (ret) {
3784 		pr_err("Failed to allocate thread mask\n");
3785 		goto out_free_full_and_cpu_masks;
3786 	}
3787 
3788 	for (s = 0; s < nr_spec; s++) {
3789 		ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]);
3790 		if (ret) {
3791 			pr_err("Failed to initialize maps thread mask\n");
3792 			goto out_free;
3793 		}
3794 		ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]);
3795 		if (ret) {
3796 			pr_err("Failed to initialize affinity thread mask\n");
3797 			goto out_free;
3798 		}
3799 
3800 		/* ignore invalid CPUs but do not allow empty masks */
3801 		if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits,
3802 				cpus_mask.bits, thread_mask.maps.nbits)) {
3803 			pr_err("Empty maps mask: %s\n", maps_spec[s]);
3804 			ret = -EINVAL;
3805 			goto out_free;
3806 		}
3807 		if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits,
3808 				cpus_mask.bits, thread_mask.affinity.nbits)) {
3809 			pr_err("Empty affinity mask: %s\n", affinity_spec[s]);
3810 			ret = -EINVAL;
3811 			goto out_free;
3812 		}
3813 
3814 		/* do not allow intersection with other masks (full_mask) */
3815 		if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits,
3816 				      thread_mask.maps.nbits)) {
3817 			pr_err("Intersecting maps mask: %s\n", maps_spec[s]);
3818 			ret = -EINVAL;
3819 			goto out_free;
3820 		}
3821 		if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits,
3822 				      thread_mask.affinity.nbits)) {
3823 			pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]);
3824 			ret = -EINVAL;
3825 			goto out_free;
3826 		}
3827 
3828 		bitmap_or(full_mask.maps.bits, full_mask.maps.bits,
3829 			  thread_mask.maps.bits, full_mask.maps.nbits);
3830 		bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits,
3831 			  thread_mask.affinity.bits, full_mask.maps.nbits);
3832 
3833 		thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask));
3834 		if (!thread_masks) {
3835 			pr_err("Failed to reallocate thread masks\n");
3836 			ret = -ENOMEM;
3837 			goto out_free;
3838 		}
3839 		rec->thread_masks = thread_masks;
3840 		rec->thread_masks[t] = thread_mask;
3841 		if (verbose > 0) {
3842 			pr_debug("thread_masks[%d]: ", t);
3843 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3844 			pr_debug("thread_masks[%d]: ", t);
3845 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3846 		}
3847 		t++;
3848 		ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3849 		if (ret) {
3850 			pr_err("Failed to allocate thread mask\n");
3851 			goto out_free_full_and_cpu_masks;
3852 		}
3853 	}
3854 	rec->nr_threads = t;
3855 	pr_debug("nr_threads: %d\n", rec->nr_threads);
3856 	if (!rec->nr_threads)
3857 		ret = -EINVAL;
3858 
3859 out_free:
3860 	record__thread_mask_free(&thread_mask);
3861 out_free_full_and_cpu_masks:
3862 	record__thread_mask_free(&full_mask);
3863 out_free_cpu_mask:
3864 	record__mmap_cpu_mask_free(&cpus_mask);
3865 
3866 	return ret;
3867 }
3868 
record__init_thread_core_masks(struct record * rec,struct perf_cpu_map * cpus)3869 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus)
3870 {
3871 	int ret;
3872 	struct cpu_topology *topo;
3873 
3874 	topo = cpu_topology__new();
3875 	if (!topo) {
3876 		pr_err("Failed to allocate CPU topology\n");
3877 		return -ENOMEM;
3878 	}
3879 
3880 	ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list,
3881 					     topo->core_cpus_list, topo->core_cpus_lists);
3882 	cpu_topology__delete(topo);
3883 
3884 	return ret;
3885 }
3886 
record__init_thread_package_masks(struct record * rec,struct perf_cpu_map * cpus)3887 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus)
3888 {
3889 	int ret;
3890 	struct cpu_topology *topo;
3891 
3892 	topo = cpu_topology__new();
3893 	if (!topo) {
3894 		pr_err("Failed to allocate CPU topology\n");
3895 		return -ENOMEM;
3896 	}
3897 
3898 	ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list,
3899 					     topo->package_cpus_list, topo->package_cpus_lists);
3900 	cpu_topology__delete(topo);
3901 
3902 	return ret;
3903 }
3904 
record__init_thread_numa_masks(struct record * rec,struct perf_cpu_map * cpus)3905 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus)
3906 {
3907 	u32 s;
3908 	int ret;
3909 	const char **spec;
3910 	struct numa_topology *topo;
3911 
3912 	topo = numa_topology__new();
3913 	if (!topo) {
3914 		pr_err("Failed to allocate NUMA topology\n");
3915 		return -ENOMEM;
3916 	}
3917 
3918 	spec = zalloc(topo->nr * sizeof(char *));
3919 	if (!spec) {
3920 		pr_err("Failed to allocate NUMA spec\n");
3921 		ret = -ENOMEM;
3922 		goto out_delete_topo;
3923 	}
3924 	for (s = 0; s < topo->nr; s++)
3925 		spec[s] = topo->nodes[s].cpus;
3926 
3927 	ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr);
3928 
3929 	zfree(&spec);
3930 
3931 out_delete_topo:
3932 	numa_topology__delete(topo);
3933 
3934 	return ret;
3935 }
3936 
record__init_thread_user_masks(struct record * rec,struct perf_cpu_map * cpus)3937 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus)
3938 {
3939 	int t, ret;
3940 	u32 s, nr_spec = 0;
3941 	char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec;
3942 	char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL;
3943 
3944 	for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) {
3945 		spec = strtok_r(user_spec, ":", &spec_ptr);
3946 		if (spec == NULL)
3947 			break;
3948 		pr_debug2("threads_spec[%d]: %s\n", t, spec);
3949 		mask = strtok_r(spec, "/", &mask_ptr);
3950 		if (mask == NULL)
3951 			break;
3952 		pr_debug2("  maps mask: %s\n", mask);
3953 		tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *));
3954 		if (!tmp_spec) {
3955 			pr_err("Failed to reallocate maps spec\n");
3956 			ret = -ENOMEM;
3957 			goto out_free;
3958 		}
3959 		maps_spec = tmp_spec;
3960 		maps_spec[nr_spec] = dup_mask = strdup(mask);
3961 		if (!maps_spec[nr_spec]) {
3962 			pr_err("Failed to allocate maps spec[%d]\n", nr_spec);
3963 			ret = -ENOMEM;
3964 			goto out_free;
3965 		}
3966 		mask = strtok_r(NULL, "/", &mask_ptr);
3967 		if (mask == NULL) {
3968 			pr_err("Invalid thread maps or affinity specs\n");
3969 			ret = -EINVAL;
3970 			goto out_free;
3971 		}
3972 		pr_debug2("  affinity mask: %s\n", mask);
3973 		tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *));
3974 		if (!tmp_spec) {
3975 			pr_err("Failed to reallocate affinity spec\n");
3976 			ret = -ENOMEM;
3977 			goto out_free;
3978 		}
3979 		affinity_spec = tmp_spec;
3980 		affinity_spec[nr_spec] = strdup(mask);
3981 		if (!affinity_spec[nr_spec]) {
3982 			pr_err("Failed to allocate affinity spec[%d]\n", nr_spec);
3983 			ret = -ENOMEM;
3984 			goto out_free;
3985 		}
3986 		dup_mask = NULL;
3987 		nr_spec++;
3988 	}
3989 
3990 	ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec,
3991 					     (const char **)affinity_spec, nr_spec);
3992 
3993 out_free:
3994 	free(dup_mask);
3995 	for (s = 0; s < nr_spec; s++) {
3996 		if (maps_spec)
3997 			free(maps_spec[s]);
3998 		if (affinity_spec)
3999 			free(affinity_spec[s]);
4000 	}
4001 	free(affinity_spec);
4002 	free(maps_spec);
4003 
4004 	return ret;
4005 }
4006 
record__init_thread_default_masks(struct record * rec,struct perf_cpu_map * cpus)4007 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus)
4008 {
4009 	int ret;
4010 
4011 	ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu);
4012 	if (ret)
4013 		return ret;
4014 
4015 	if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus))
4016 		return -ENODEV;
4017 
4018 	rec->nr_threads = 1;
4019 
4020 	return 0;
4021 }
4022 
record__init_thread_masks(struct record * rec)4023 static int record__init_thread_masks(struct record *rec)
4024 {
4025 	int ret = 0;
4026 	struct perf_cpu_map *cpus = rec->evlist->core.all_cpus;
4027 
4028 	if (!record__threads_enabled(rec))
4029 		return record__init_thread_default_masks(rec, cpus);
4030 
4031 	if (evlist__per_thread(rec->evlist)) {
4032 		pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n");
4033 		return -EINVAL;
4034 	}
4035 
4036 	switch (rec->opts.threads_spec) {
4037 	case THREAD_SPEC__CPU:
4038 		ret = record__init_thread_cpu_masks(rec, cpus);
4039 		break;
4040 	case THREAD_SPEC__CORE:
4041 		ret = record__init_thread_core_masks(rec, cpus);
4042 		break;
4043 	case THREAD_SPEC__PACKAGE:
4044 		ret = record__init_thread_package_masks(rec, cpus);
4045 		break;
4046 	case THREAD_SPEC__NUMA:
4047 		ret = record__init_thread_numa_masks(rec, cpus);
4048 		break;
4049 	case THREAD_SPEC__USER:
4050 		ret = record__init_thread_user_masks(rec, cpus);
4051 		break;
4052 	default:
4053 		break;
4054 	}
4055 
4056 	return ret;
4057 }
4058 
cmd_record(int argc,const char ** argv)4059 int cmd_record(int argc, const char **argv)
4060 {
4061 	int err;
4062 	struct record *rec = &record;
4063 	char errbuf[BUFSIZ];
4064 
4065 	setlocale(LC_ALL, "");
4066 
4067 #ifndef HAVE_BPF_SKEL
4068 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c)
4069 	set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true);
4070 # undef set_nobuild
4071 #endif
4072 
4073 	/* Disable eager loading of kernel symbols that adds overhead to perf record. */
4074 	symbol_conf.lazy_load_kernel_maps = true;
4075 	rec->opts.affinity = PERF_AFFINITY_SYS;
4076 
4077 	rec->evlist = evlist__new();
4078 	if (rec->evlist == NULL)
4079 		return -ENOMEM;
4080 
4081 	err = perf_config(perf_record_config, rec);
4082 	if (err)
4083 		return err;
4084 
4085 	argc = parse_options(argc, argv, record_options, record_usage,
4086 			    PARSE_OPT_STOP_AT_NON_OPTION);
4087 	if (quiet)
4088 		perf_quiet_option();
4089 
4090 	err = symbol__validate_sym_arguments();
4091 	if (err)
4092 		return err;
4093 
4094 	perf_debuginfod_setup(&record.debuginfod);
4095 
4096 	/* Make system wide (-a) the default target. */
4097 	if (!argc && target__none(&rec->opts.target))
4098 		rec->opts.target.system_wide = true;
4099 
4100 	if (nr_cgroups && !rec->opts.target.system_wide) {
4101 		usage_with_options_msg(record_usage, record_options,
4102 			"cgroup monitoring only available in system-wide mode");
4103 
4104 	}
4105 
4106 	if (record.latency) {
4107 		/*
4108 		 * There is no fundamental reason why latency profiling
4109 		 * can't work for system-wide mode, but exact semantics
4110 		 * and details are to be defined.
4111 		 * See the following thread for details:
4112 		 * https://lore.kernel.org/all/Z4XDJyvjiie3howF@google.com/
4113 		 */
4114 		if (record.opts.target.system_wide) {
4115 			pr_err("Failed: latency profiling is not supported with system-wide collection.\n");
4116 			err = -EINVAL;
4117 			goto out_opts;
4118 		}
4119 		record.opts.record_switch_events = true;
4120 	}
4121 
4122 	if (!rec->buildid_mmap) {
4123 		pr_debug("Disabling build id in synthesized mmap2 events.\n");
4124 		symbol_conf.no_buildid_mmap2 = true;
4125 	} else if (rec->buildid_mmap_set) {
4126 		/*
4127 		 * Explicitly passing --buildid-mmap disables buildid processing
4128 		 * and cache generation.
4129 		 */
4130 		rec->no_buildid = true;
4131 	}
4132 	if (rec->buildid_mmap && !perf_can_record_build_id()) {
4133 		pr_warning("Missing support for build id in kernel mmap events.\n"
4134 			   "Disable this warning with --no-buildid-mmap\n");
4135 		rec->buildid_mmap = false;
4136 	}
4137 	if (rec->buildid_mmap) {
4138 		/* Enable perf_event_attr::build_id bit. */
4139 		rec->opts.build_id = true;
4140 	}
4141 
4142 	if (rec->opts.record_cgroup && !perf_can_record_cgroup()) {
4143 		pr_err("Kernel has no cgroup sampling support.\n");
4144 		err = -EINVAL;
4145 		goto out_opts;
4146 	}
4147 
4148 	if (rec->opts.kcore)
4149 		rec->opts.text_poke = true;
4150 
4151 	if (rec->opts.kcore || record__threads_enabled(rec))
4152 		rec->data.is_dir = true;
4153 
4154 	if (record__threads_enabled(rec)) {
4155 		if (rec->opts.affinity != PERF_AFFINITY_SYS) {
4156 			pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n");
4157 			goto out_opts;
4158 		}
4159 		if (record__aio_enabled(rec)) {
4160 			pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n");
4161 			goto out_opts;
4162 		}
4163 	}
4164 
4165 	if (rec->opts.comp_level != 0) {
4166 		pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
4167 		rec->no_buildid = true;
4168 	}
4169 
4170 	if (rec->opts.record_switch_events &&
4171 	    !perf_can_record_switch_events()) {
4172 		ui__error("kernel does not support recording context switch events\n");
4173 		parse_options_usage(record_usage, record_options, "switch-events", 0);
4174 		err = -EINVAL;
4175 		goto out_opts;
4176 	}
4177 
4178 	if (switch_output_setup(rec)) {
4179 		parse_options_usage(record_usage, record_options, "switch-output", 0);
4180 		err = -EINVAL;
4181 		goto out_opts;
4182 	}
4183 
4184 	if (rec->switch_output.time) {
4185 		signal(SIGALRM, alarm_sig_handler);
4186 		alarm(rec->switch_output.time);
4187 	}
4188 
4189 	if (rec->switch_output.num_files) {
4190 		rec->switch_output.filenames = calloc(rec->switch_output.num_files,
4191 						      sizeof(char *));
4192 		if (!rec->switch_output.filenames) {
4193 			err = -EINVAL;
4194 			goto out_opts;
4195 		}
4196 	}
4197 
4198 	if (rec->timestamp_filename && record__threads_enabled(rec)) {
4199 		rec->timestamp_filename = false;
4200 		pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n");
4201 	}
4202 
4203 	if (rec->filter_action) {
4204 		if (!strcmp(rec->filter_action, "pin"))
4205 			err = perf_bpf_filter__pin();
4206 		else if (!strcmp(rec->filter_action, "unpin"))
4207 			err = perf_bpf_filter__unpin();
4208 		else {
4209 			pr_warning("Unknown BPF filter action: %s\n", rec->filter_action);
4210 			err = -EINVAL;
4211 		}
4212 		goto out_opts;
4213 	}
4214 
4215 	/* For backward compatibility, -d implies --mem-info */
4216 	if (rec->opts.sample_address)
4217 		rec->opts.sample_data_src = true;
4218 
4219 	/*
4220 	 * Allow aliases to facilitate the lookup of symbols for address
4221 	 * filters. Refer to auxtrace_parse_filters().
4222 	 */
4223 	symbol_conf.allow_aliases = true;
4224 
4225 	symbol__init(NULL);
4226 
4227 	err = record__auxtrace_init(rec);
4228 	if (err)
4229 		goto out;
4230 
4231 	if (dry_run)
4232 		goto out;
4233 
4234 	err = -ENOMEM;
4235 
4236 	if (rec->no_buildid_cache || rec->no_buildid) {
4237 		disable_buildid_cache();
4238 	} else if (rec->switch_output.enabled) {
4239 		/*
4240 		 * In 'perf record --switch-output', disable buildid
4241 		 * generation by default to reduce data file switching
4242 		 * overhead. Still generate buildid if they are required
4243 		 * explicitly using
4244 		 *
4245 		 *  perf record --switch-output --no-no-buildid \
4246 		 *              --no-no-buildid-cache
4247 		 *
4248 		 * Following code equals to:
4249 		 *
4250 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
4251 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
4252 		 *         disable_buildid_cache();
4253 		 */
4254 		bool disable = true;
4255 
4256 		if (rec->no_buildid_set && !rec->no_buildid)
4257 			disable = false;
4258 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
4259 			disable = false;
4260 		if (disable) {
4261 			rec->no_buildid = true;
4262 			rec->no_buildid_cache = true;
4263 			disable_buildid_cache();
4264 		}
4265 	}
4266 
4267 	if (record.opts.overwrite)
4268 		record.opts.tail_synthesize = true;
4269 
4270 	if (rec->evlist->core.nr_entries == 0) {
4271 		err = parse_event(rec->evlist, "cycles:P");
4272 		if (err)
4273 			goto out;
4274 	}
4275 
4276 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
4277 		rec->opts.no_inherit = true;
4278 
4279 	err = target__validate(&rec->opts.target);
4280 	if (err) {
4281 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
4282 		ui__warning("%s\n", errbuf);
4283 	}
4284 
4285 	if (rec->uid_str) {
4286 		uid_t uid = parse_uid(rec->uid_str);
4287 
4288 		if (uid == UINT_MAX) {
4289 			ui__error("Invalid User: %s", rec->uid_str);
4290 			err = -EINVAL;
4291 			goto out;
4292 		}
4293 		err = parse_uid_filter(rec->evlist, uid);
4294 		if (err)
4295 			goto out;
4296 
4297 		/* User ID filtering implies system wide. */
4298 		rec->opts.target.system_wide = true;
4299 	}
4300 
4301 	/* Enable ignoring missing threads when -p option is defined. */
4302 	rec->opts.ignore_missing_thread = rec->opts.target.pid;
4303 
4304 	evlist__warn_user_requested_cpus(rec->evlist, rec->opts.target.cpu_list);
4305 
4306 	if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
4307 		arch__add_leaf_frame_record_opts(&rec->opts);
4308 
4309 	err = -ENOMEM;
4310 	if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) {
4311 		if (rec->opts.target.pid != NULL) {
4312 			pr_err("Couldn't create thread/CPU maps: %s\n",
4313 				errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf)));
4314 			goto out;
4315 		}
4316 		else
4317 			usage_with_options(record_usage, record_options);
4318 	}
4319 
4320 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
4321 	if (err)
4322 		goto out;
4323 
4324 	/*
4325 	 * We take all buildids when the file contains
4326 	 * AUX area tracing data because we do not decode the
4327 	 * trace because it would take too long.
4328 	 */
4329 	if (rec->opts.full_auxtrace)
4330 		rec->buildid_all = true;
4331 
4332 	if (rec->opts.text_poke) {
4333 		err = record__config_text_poke(rec->evlist);
4334 		if (err) {
4335 			pr_err("record__config_text_poke failed, error %d\n", err);
4336 			goto out;
4337 		}
4338 	}
4339 
4340 	if (rec->off_cpu) {
4341 		err = record__config_off_cpu(rec);
4342 		if (err) {
4343 			pr_err("record__config_off_cpu failed, error %d\n", err);
4344 			goto out;
4345 		}
4346 	}
4347 
4348 	if (record_opts__config(&rec->opts)) {
4349 		err = -EINVAL;
4350 		goto out;
4351 	}
4352 
4353 	err = record__config_tracking_events(rec);
4354 	if (err) {
4355 		pr_err("record__config_tracking_events failed, error %d\n", err);
4356 		goto out;
4357 	}
4358 
4359 	err = record__init_thread_masks(rec);
4360 	if (err) {
4361 		pr_err("Failed to initialize parallel data streaming masks\n");
4362 		goto out;
4363 	}
4364 
4365 	if (rec->opts.nr_cblocks > nr_cblocks_max)
4366 		rec->opts.nr_cblocks = nr_cblocks_max;
4367 	pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
4368 
4369 	pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
4370 	pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
4371 
4372 	if (rec->opts.comp_level > comp_level_max)
4373 		rec->opts.comp_level = comp_level_max;
4374 	pr_debug("comp level: %d\n", rec->opts.comp_level);
4375 
4376 	err = __cmd_record(&record, argc, argv);
4377 out:
4378 	record__free_thread_masks(rec, rec->nr_threads);
4379 	rec->nr_threads = 0;
4380 	symbol__exit();
4381 	auxtrace_record__free(rec->itr);
4382 out_opts:
4383 	evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close);
4384 	evlist__delete(rec->evlist);
4385 	return err;
4386 }
4387 
snapshot_sig_handler(int sig __maybe_unused)4388 static void snapshot_sig_handler(int sig __maybe_unused)
4389 {
4390 	struct record *rec = &record;
4391 
4392 	hit_auxtrace_snapshot_trigger(rec);
4393 
4394 	if (switch_output_signal(rec))
4395 		trigger_hit(&switch_output_trigger);
4396 }
4397 
alarm_sig_handler(int sig __maybe_unused)4398 static void alarm_sig_handler(int sig __maybe_unused)
4399 {
4400 	struct record *rec = &record;
4401 
4402 	if (switch_output_time(rec))
4403 		trigger_hit(&switch_output_trigger);
4404 }
4405