xref: /linux/tools/perf/builtin-record.c (revision 9e906a9dead17d81d6c2687f65e159231d0e3286)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10 
11 #include "util/build-id.h"
12 #include <subcmd/parse-options.h>
13 #include <internal/xyarray.h>
14 #include "util/parse-events.h"
15 #include "util/config.h"
16 
17 #include "util/callchain.h"
18 #include "util/cgroup.h"
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/mmap.h"
25 #include "util/mutex.h"
26 #include "util/target.h"
27 #include "util/session.h"
28 #include "util/tool.h"
29 #include "util/stat.h"
30 #include "util/symbol.h"
31 #include "util/record.h"
32 #include "util/cpumap.h"
33 #include "util/thread_map.h"
34 #include "util/data.h"
35 #include "util/perf_regs.h"
36 #include "util/auxtrace.h"
37 #include "util/tsc.h"
38 #include "util/parse-branch-options.h"
39 #include "util/parse-regs-options.h"
40 #include "util/perf_api_probe.h"
41 #include "util/trigger.h"
42 #include "util/perf-hooks.h"
43 #include "util/cpu-set-sched.h"
44 #include "util/synthetic-events.h"
45 #include "util/time-utils.h"
46 #include "util/units.h"
47 #include "util/bpf-event.h"
48 #include "util/util.h"
49 #include "util/pfm.h"
50 #include "util/pmu.h"
51 #include "util/pmus.h"
52 #include "util/clockid.h"
53 #include "util/off_cpu.h"
54 #include "util/bpf-filter.h"
55 #include "util/strbuf.h"
56 #include "asm/bug.h"
57 #include "perf.h"
58 #include "cputopo.h"
59 
60 #include <errno.h>
61 #include <inttypes.h>
62 #include <locale.h>
63 #include <poll.h>
64 #include <pthread.h>
65 #include <unistd.h>
66 #ifndef HAVE_GETTID
67 #include <syscall.h>
68 #endif
69 #include <sched.h>
70 #include <signal.h>
71 #ifdef HAVE_EVENTFD_SUPPORT
72 #include <sys/eventfd.h>
73 #endif
74 #include <sys/mman.h>
75 #include <sys/wait.h>
76 #include <sys/types.h>
77 #include <sys/stat.h>
78 #include <fcntl.h>
79 #include <linux/err.h>
80 #include <linux/string.h>
81 #include <linux/time64.h>
82 #include <linux/zalloc.h>
83 #include <linux/bitmap.h>
84 #include <sys/time.h>
85 
86 struct switch_output {
87 	bool		 enabled;
88 	bool		 signal;
89 	unsigned long	 size;
90 	unsigned long	 time;
91 	const char	*str;
92 	bool		 set;
93 	char		 **filenames;
94 	int		 num_files;
95 	int		 cur_file;
96 };
97 
98 struct thread_mask {
99 	struct mmap_cpu_mask	maps;
100 	struct mmap_cpu_mask	affinity;
101 };
102 
103 struct record_thread {
104 	pid_t			tid;
105 	struct thread_mask	*mask;
106 	struct {
107 		int		msg[2];
108 		int		ack[2];
109 	} pipes;
110 	struct fdarray		pollfd;
111 	int			ctlfd_pos;
112 	int			nr_mmaps;
113 	struct mmap		**maps;
114 	struct mmap		**overwrite_maps;
115 	struct record		*rec;
116 	unsigned long long	samples;
117 	unsigned long		waking;
118 	u64			bytes_written;
119 	u64			bytes_transferred;
120 	u64			bytes_compressed;
121 };
122 
123 static __thread struct record_thread *thread;
124 
125 enum thread_msg {
126 	THREAD_MSG__UNDEFINED = 0,
127 	THREAD_MSG__READY,
128 	THREAD_MSG__MAX,
129 };
130 
131 static const char *thread_msg_tags[THREAD_MSG__MAX] = {
132 	"UNDEFINED", "READY"
133 };
134 
135 enum thread_spec {
136 	THREAD_SPEC__UNDEFINED = 0,
137 	THREAD_SPEC__CPU,
138 	THREAD_SPEC__CORE,
139 	THREAD_SPEC__PACKAGE,
140 	THREAD_SPEC__NUMA,
141 	THREAD_SPEC__USER,
142 	THREAD_SPEC__MAX,
143 };
144 
145 static const char *thread_spec_tags[THREAD_SPEC__MAX] = {
146 	"undefined", "cpu", "core", "package", "numa", "user"
147 };
148 
149 struct pollfd_index_map {
150 	int evlist_pollfd_index;
151 	int thread_pollfd_index;
152 };
153 
154 struct record {
155 	struct perf_tool	tool;
156 	struct record_opts	opts;
157 	u64			bytes_written;
158 	u64			thread_bytes_written;
159 	struct perf_data	data;
160 	struct auxtrace_record	*itr;
161 	struct evlist	*evlist;
162 	struct perf_session	*session;
163 	struct evlist		*sb_evlist;
164 	pthread_t		thread_id;
165 	int			realtime_prio;
166 	bool			latency;
167 	bool			switch_output_event_set;
168 	bool			no_buildid;
169 	bool			no_buildid_set;
170 	bool			no_buildid_cache;
171 	bool			no_buildid_cache_set;
172 	bool			buildid_all;
173 	bool			buildid_mmap;
174 	bool			buildid_mmap_set;
175 	bool			timestamp_filename;
176 	bool			timestamp_boundary;
177 	bool			off_cpu;
178 	const char		*filter_action;
179 	const char		*uid_str;
180 	struct switch_output	switch_output;
181 	unsigned long long	samples;
182 	unsigned long		output_max_size;	/* = 0: unlimited */
183 	struct perf_debuginfod	debuginfod;
184 	int			nr_threads;
185 	struct thread_mask	*thread_masks;
186 	struct record_thread	*thread_data;
187 	struct pollfd_index_map	*index_map;
188 	size_t			index_map_sz;
189 	size_t			index_map_cnt;
190 };
191 
192 static volatile int done;
193 
194 static volatile int auxtrace_record__snapshot_started;
195 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
196 static DEFINE_TRIGGER(switch_output_trigger);
197 
198 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
199 	"SYS", "NODE", "CPU"
200 };
201 
202 static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event,
203 				  struct perf_sample *sample, struct machine *machine);
204 static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event,
205 				   struct perf_sample *sample, struct machine *machine);
206 static int process_timestamp_boundary(const struct perf_tool *tool,
207 				      union perf_event *event,
208 				      struct perf_sample *sample,
209 				      struct machine *machine);
210 
211 #ifndef HAVE_GETTID
gettid(void)212 static inline pid_t gettid(void)
213 {
214 	return (pid_t)syscall(__NR_gettid);
215 }
216 #endif
217 
record__threads_enabled(struct record * rec)218 static int record__threads_enabled(struct record *rec)
219 {
220 	return rec->opts.threads_spec;
221 }
222 
switch_output_signal(struct record * rec)223 static bool switch_output_signal(struct record *rec)
224 {
225 	return rec->switch_output.signal &&
226 	       trigger_is_ready(&switch_output_trigger);
227 }
228 
switch_output_size(struct record * rec)229 static bool switch_output_size(struct record *rec)
230 {
231 	return rec->switch_output.size &&
232 	       trigger_is_ready(&switch_output_trigger) &&
233 	       (rec->bytes_written >= rec->switch_output.size);
234 }
235 
switch_output_time(struct record * rec)236 static bool switch_output_time(struct record *rec)
237 {
238 	return rec->switch_output.time &&
239 	       trigger_is_ready(&switch_output_trigger);
240 }
241 
record__bytes_written(struct record * rec)242 static u64 record__bytes_written(struct record *rec)
243 {
244 	return rec->bytes_written + rec->thread_bytes_written;
245 }
246 
record__output_max_size_exceeded(struct record * rec)247 static bool record__output_max_size_exceeded(struct record *rec)
248 {
249 	return rec->output_max_size &&
250 	       (record__bytes_written(rec) >= rec->output_max_size);
251 }
252 
record__write(struct record * rec,struct mmap * map __maybe_unused,void * bf,size_t size)253 static int record__write(struct record *rec, struct mmap *map __maybe_unused,
254 			 void *bf, size_t size)
255 {
256 	struct perf_data_file *file = &rec->session->data->file;
257 
258 	if (map && map->file)
259 		file = map->file;
260 
261 	if (perf_data_file__write(file, bf, size) < 0) {
262 		pr_err("failed to write perf data, error: %m\n");
263 		return -1;
264 	}
265 
266 	if (map && map->file) {
267 		thread->bytes_written += size;
268 		rec->thread_bytes_written += size;
269 	} else {
270 		rec->bytes_written += size;
271 	}
272 
273 	if (record__output_max_size_exceeded(rec) && !done) {
274 		fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
275 				" stopping session ]\n",
276 				record__bytes_written(rec) >> 10);
277 		done = 1;
278 	}
279 
280 	if (switch_output_size(rec))
281 		trigger_hit(&switch_output_trigger);
282 
283 	return 0;
284 }
285 
286 static int record__aio_enabled(struct record *rec);
287 static int record__comp_enabled(struct record *rec);
288 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map,
289 			    void *dst, size_t dst_size, void *src, size_t src_size);
290 
291 #ifdef HAVE_AIO_SUPPORT
record__aio_write(struct aiocb * cblock,int trace_fd,void * buf,size_t size,off_t off)292 static int record__aio_write(struct aiocb *cblock, int trace_fd,
293 		void *buf, size_t size, off_t off)
294 {
295 	int rc;
296 
297 	cblock->aio_fildes = trace_fd;
298 	cblock->aio_buf    = buf;
299 	cblock->aio_nbytes = size;
300 	cblock->aio_offset = off;
301 	cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
302 
303 	do {
304 		rc = aio_write(cblock);
305 		if (rc == 0) {
306 			break;
307 		} else if (errno != EAGAIN) {
308 			cblock->aio_fildes = -1;
309 			pr_err("failed to queue perf data, error: %m\n");
310 			break;
311 		}
312 	} while (1);
313 
314 	return rc;
315 }
316 
record__aio_complete(struct mmap * md,struct aiocb * cblock)317 static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
318 {
319 	void *rem_buf;
320 	off_t rem_off;
321 	size_t rem_size;
322 	int rc, aio_errno;
323 	ssize_t aio_ret, written;
324 
325 	aio_errno = aio_error(cblock);
326 	if (aio_errno == EINPROGRESS)
327 		return 0;
328 
329 	written = aio_ret = aio_return(cblock);
330 	if (aio_ret < 0) {
331 		if (aio_errno != EINTR)
332 			pr_err("failed to write perf data, error: %m\n");
333 		written = 0;
334 	}
335 
336 	rem_size = cblock->aio_nbytes - written;
337 
338 	if (rem_size == 0) {
339 		cblock->aio_fildes = -1;
340 		/*
341 		 * md->refcount is incremented in record__aio_pushfn() for
342 		 * every aio write request started in record__aio_push() so
343 		 * decrement it because the request is now complete.
344 		 */
345 		perf_mmap__put(&md->core);
346 		rc = 1;
347 	} else {
348 		/*
349 		 * aio write request may require restart with the
350 		 * remainder if the kernel didn't write whole
351 		 * chunk at once.
352 		 */
353 		rem_off = cblock->aio_offset + written;
354 		rem_buf = (void *)(cblock->aio_buf + written);
355 		record__aio_write(cblock, cblock->aio_fildes,
356 				rem_buf, rem_size, rem_off);
357 		rc = 0;
358 	}
359 
360 	return rc;
361 }
362 
record__aio_sync(struct mmap * md,bool sync_all)363 static int record__aio_sync(struct mmap *md, bool sync_all)
364 {
365 	struct aiocb **aiocb = md->aio.aiocb;
366 	struct aiocb *cblocks = md->aio.cblocks;
367 	struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
368 	int i, do_suspend;
369 
370 	do {
371 		do_suspend = 0;
372 		for (i = 0; i < md->aio.nr_cblocks; ++i) {
373 			if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
374 				if (sync_all)
375 					aiocb[i] = NULL;
376 				else
377 					return i;
378 			} else {
379 				/*
380 				 * Started aio write is not complete yet
381 				 * so it has to be waited before the
382 				 * next allocation.
383 				 */
384 				aiocb[i] = &cblocks[i];
385 				do_suspend = 1;
386 			}
387 		}
388 		if (!do_suspend)
389 			return -1;
390 
391 		while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
392 			if (!(errno == EAGAIN || errno == EINTR))
393 				pr_err("failed to sync perf data, error: %m\n");
394 		}
395 	} while (1);
396 }
397 
398 struct record_aio {
399 	struct record	*rec;
400 	void		*data;
401 	size_t		size;
402 };
403 
record__aio_pushfn(struct mmap * map,void * to,void * buf,size_t size)404 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
405 {
406 	struct record_aio *aio = to;
407 
408 	/*
409 	 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
410 	 * to release space in the kernel buffer as fast as possible, calling
411 	 * perf_mmap__consume() from perf_mmap__push() function.
412 	 *
413 	 * That lets the kernel to proceed with storing more profiling data into
414 	 * the kernel buffer earlier than other per-cpu kernel buffers are handled.
415 	 *
416 	 * Coping can be done in two steps in case the chunk of profiling data
417 	 * crosses the upper bound of the kernel buffer. In this case we first move
418 	 * part of data from map->start till the upper bound and then the remainder
419 	 * from the beginning of the kernel buffer till the end of the data chunk.
420 	 */
421 
422 	if (record__comp_enabled(aio->rec)) {
423 		ssize_t compressed = zstd_compress(aio->rec->session, NULL, aio->data + aio->size,
424 						   mmap__mmap_len(map) - aio->size,
425 						   buf, size);
426 		if (compressed < 0)
427 			return (int)compressed;
428 
429 		size = compressed;
430 	} else {
431 		memcpy(aio->data + aio->size, buf, size);
432 	}
433 
434 	if (!aio->size) {
435 		/*
436 		 * Increment map->refcount to guard map->aio.data[] buffer
437 		 * from premature deallocation because map object can be
438 		 * released earlier than aio write request started on
439 		 * map->aio.data[] buffer is complete.
440 		 *
441 		 * perf_mmap__put() is done at record__aio_complete()
442 		 * after started aio request completion or at record__aio_push()
443 		 * if the request failed to start.
444 		 */
445 		perf_mmap__get(&map->core);
446 	}
447 
448 	aio->size += size;
449 
450 	return size;
451 }
452 
record__aio_push(struct record * rec,struct mmap * map,off_t * off)453 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
454 {
455 	int ret, idx;
456 	int trace_fd = rec->session->data->file.fd;
457 	struct record_aio aio = { .rec = rec, .size = 0 };
458 
459 	/*
460 	 * Call record__aio_sync() to wait till map->aio.data[] buffer
461 	 * becomes available after previous aio write operation.
462 	 */
463 
464 	idx = record__aio_sync(map, false);
465 	aio.data = map->aio.data[idx];
466 	ret = perf_mmap__push(map, &aio, record__aio_pushfn);
467 	if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
468 		return ret;
469 
470 	rec->samples++;
471 	ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
472 	if (!ret) {
473 		*off += aio.size;
474 		rec->bytes_written += aio.size;
475 		if (switch_output_size(rec))
476 			trigger_hit(&switch_output_trigger);
477 	} else {
478 		/*
479 		 * Decrement map->refcount incremented in record__aio_pushfn()
480 		 * back if record__aio_write() operation failed to start, otherwise
481 		 * map->refcount is decremented in record__aio_complete() after
482 		 * aio write operation finishes successfully.
483 		 */
484 		perf_mmap__put(&map->core);
485 	}
486 
487 	return ret;
488 }
489 
record__aio_get_pos(int trace_fd)490 static off_t record__aio_get_pos(int trace_fd)
491 {
492 	return lseek(trace_fd, 0, SEEK_CUR);
493 }
494 
record__aio_set_pos(int trace_fd,off_t pos)495 static void record__aio_set_pos(int trace_fd, off_t pos)
496 {
497 	lseek(trace_fd, pos, SEEK_SET);
498 }
499 
record__aio_mmap_read_sync(struct record * rec)500 static void record__aio_mmap_read_sync(struct record *rec)
501 {
502 	int i;
503 	struct evlist *evlist = rec->evlist;
504 	struct mmap *maps = evlist->mmap;
505 
506 	if (!record__aio_enabled(rec))
507 		return;
508 
509 	for (i = 0; i < evlist->core.nr_mmaps; i++) {
510 		struct mmap *map = &maps[i];
511 
512 		if (map->core.base)
513 			record__aio_sync(map, true);
514 	}
515 }
516 
517 static int nr_cblocks_default = 1;
518 static int nr_cblocks_max = 4;
519 
record__aio_parse(const struct option * opt,const char * str,int unset)520 static int record__aio_parse(const struct option *opt,
521 			     const char *str,
522 			     int unset)
523 {
524 	struct record_opts *opts = (struct record_opts *)opt->value;
525 
526 	if (unset) {
527 		opts->nr_cblocks = 0;
528 	} else {
529 		if (str)
530 			opts->nr_cblocks = strtol(str, NULL, 0);
531 		if (!opts->nr_cblocks)
532 			opts->nr_cblocks = nr_cblocks_default;
533 	}
534 
535 	return 0;
536 }
537 #else /* HAVE_AIO_SUPPORT */
538 static int nr_cblocks_max = 0;
539 
record__aio_push(struct record * rec __maybe_unused,struct mmap * map __maybe_unused,off_t * off __maybe_unused)540 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
541 			    off_t *off __maybe_unused)
542 {
543 	return -1;
544 }
545 
record__aio_get_pos(int trace_fd __maybe_unused)546 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
547 {
548 	return -1;
549 }
550 
record__aio_set_pos(int trace_fd __maybe_unused,off_t pos __maybe_unused)551 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
552 {
553 }
554 
record__aio_mmap_read_sync(struct record * rec __maybe_unused)555 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
556 {
557 }
558 #endif
559 
record__aio_enabled(struct record * rec)560 static int record__aio_enabled(struct record *rec)
561 {
562 	return rec->opts.nr_cblocks > 0;
563 }
564 
565 #define MMAP_FLUSH_DEFAULT 1
record__mmap_flush_parse(const struct option * opt,const char * str,int unset)566 static int record__mmap_flush_parse(const struct option *opt,
567 				    const char *str,
568 				    int unset)
569 {
570 	int flush_max;
571 	struct record_opts *opts = (struct record_opts *)opt->value;
572 	static struct parse_tag tags[] = {
573 			{ .tag  = 'B', .mult = 1       },
574 			{ .tag  = 'K', .mult = 1 << 10 },
575 			{ .tag  = 'M', .mult = 1 << 20 },
576 			{ .tag  = 'G', .mult = 1 << 30 },
577 			{ .tag  = 0 },
578 	};
579 
580 	if (unset)
581 		return 0;
582 
583 	if (str) {
584 		opts->mmap_flush = parse_tag_value(str, tags);
585 		if (opts->mmap_flush == (int)-1)
586 			opts->mmap_flush = strtol(str, NULL, 0);
587 	}
588 
589 	if (!opts->mmap_flush)
590 		opts->mmap_flush = MMAP_FLUSH_DEFAULT;
591 
592 	flush_max = evlist__mmap_size(opts->mmap_pages);
593 	flush_max /= 4;
594 	if (opts->mmap_flush > flush_max)
595 		opts->mmap_flush = flush_max;
596 
597 	return 0;
598 }
599 
600 #ifdef HAVE_ZSTD_SUPPORT
601 static unsigned int comp_level_default = 1;
602 
record__parse_comp_level(const struct option * opt,const char * str,int unset)603 static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
604 {
605 	struct record_opts *opts = opt->value;
606 
607 	if (unset) {
608 		opts->comp_level = 0;
609 	} else {
610 		if (str)
611 			opts->comp_level = strtol(str, NULL, 0);
612 		if (!opts->comp_level)
613 			opts->comp_level = comp_level_default;
614 	}
615 
616 	return 0;
617 }
618 #endif
619 static unsigned int comp_level_max = 22;
620 
record__comp_enabled(struct record * rec)621 static int record__comp_enabled(struct record *rec)
622 {
623 	return rec->opts.comp_level > 0;
624 }
625 
process_synthesized_event(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)626 static int process_synthesized_event(const struct perf_tool *tool,
627 				     union perf_event *event,
628 				     struct perf_sample *sample __maybe_unused,
629 				     struct machine *machine __maybe_unused)
630 {
631 	struct record *rec = container_of(tool, struct record, tool);
632 	return record__write(rec, NULL, event, event->header.size);
633 }
634 
635 static struct mutex synth_lock;
636 
process_locked_synthesized_event(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)637 static int process_locked_synthesized_event(const struct perf_tool *tool,
638 				     union perf_event *event,
639 				     struct perf_sample *sample __maybe_unused,
640 				     struct machine *machine __maybe_unused)
641 {
642 	int ret;
643 
644 	mutex_lock(&synth_lock);
645 	ret = process_synthesized_event(tool, event, sample, machine);
646 	mutex_unlock(&synth_lock);
647 	return ret;
648 }
649 
record__pushfn(struct mmap * map,void * to,void * bf,size_t size)650 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
651 {
652 	struct record *rec = to;
653 
654 	if (record__comp_enabled(rec)) {
655 		struct perf_record_compressed2 *event = map->data;
656 		size_t padding = 0;
657 		u8 pad[8] = {0};
658 		ssize_t compressed = zstd_compress(rec->session, map, map->data,
659 						   mmap__mmap_len(map), bf, size);
660 
661 		if (compressed < 0)
662 			return (int)compressed;
663 
664 		bf = event;
665 		thread->samples++;
666 
667 		/*
668 		 * The record from `zstd_compress` is not 8 bytes aligned, which would cause asan
669 		 * error. We make it aligned here.
670 		 */
671 		event->data_size = compressed - sizeof(struct perf_record_compressed2);
672 		event->header.size = PERF_ALIGN(compressed, sizeof(u64));
673 		padding = event->header.size - compressed;
674 		return record__write(rec, map, bf, compressed) ||
675 		       record__write(rec, map, &pad, padding);
676 	}
677 
678 	thread->samples++;
679 	return record__write(rec, map, bf, size);
680 }
681 
682 static volatile sig_atomic_t signr = -1;
683 static volatile sig_atomic_t child_finished;
684 #ifdef HAVE_EVENTFD_SUPPORT
685 static volatile sig_atomic_t done_fd = -1;
686 #endif
687 
sig_handler(int sig)688 static void sig_handler(int sig)
689 {
690 	if (sig == SIGCHLD)
691 		child_finished = 1;
692 	else
693 		signr = sig;
694 
695 	done = 1;
696 #ifdef HAVE_EVENTFD_SUPPORT
697 	if (done_fd >= 0) {
698 		u64 tmp = 1;
699 		int orig_errno = errno;
700 
701 		/*
702 		 * It is possible for this signal handler to run after done is
703 		 * checked in the main loop, but before the perf counter fds are
704 		 * polled. If this happens, the poll() will continue to wait
705 		 * even though done is set, and will only break out if either
706 		 * another signal is received, or the counters are ready for
707 		 * read. To ensure the poll() doesn't sleep when done is set,
708 		 * use an eventfd (done_fd) to wake up the poll().
709 		 */
710 		if (write(done_fd, &tmp, sizeof(tmp)) < 0)
711 			pr_err("failed to signal wakeup fd, error: %m\n");
712 
713 		errno = orig_errno;
714 	}
715 #endif // HAVE_EVENTFD_SUPPORT
716 }
717 
sigsegv_handler(int sig)718 static void sigsegv_handler(int sig)
719 {
720 	perf_hooks__recover();
721 	sighandler_dump_stack(sig);
722 }
723 
record__sig_exit(void)724 static void record__sig_exit(void)
725 {
726 	if (signr == -1)
727 		return;
728 
729 	signal(signr, SIG_DFL);
730 	raise(signr);
731 }
732 
record__process_auxtrace(const struct perf_tool * tool,struct mmap * map,union perf_event * event,void * data1,size_t len1,void * data2,size_t len2)733 static int record__process_auxtrace(const struct perf_tool *tool,
734 				    struct mmap *map,
735 				    union perf_event *event, void *data1,
736 				    size_t len1, void *data2, size_t len2)
737 {
738 	struct record *rec = container_of(tool, struct record, tool);
739 	struct perf_data *data = &rec->data;
740 	size_t padding;
741 	u8 pad[8] = {0};
742 
743 	if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
744 		off_t file_offset;
745 		int fd = perf_data__fd(data);
746 		int err;
747 
748 		file_offset = lseek(fd, 0, SEEK_CUR);
749 		if (file_offset == -1)
750 			return -1;
751 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
752 						     event, file_offset);
753 		if (err)
754 			return err;
755 	}
756 
757 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
758 	padding = (len1 + len2) & 7;
759 	if (padding)
760 		padding = 8 - padding;
761 
762 	record__write(rec, map, event, event->header.size);
763 	record__write(rec, map, data1, len1);
764 	if (len2)
765 		record__write(rec, map, data2, len2);
766 	record__write(rec, map, &pad, padding);
767 
768 	return 0;
769 }
770 
record__auxtrace_mmap_read(struct record * rec,struct mmap * map)771 static int record__auxtrace_mmap_read(struct record *rec,
772 				      struct mmap *map)
773 {
774 	int ret;
775 
776 	ret = auxtrace_mmap__read(map, rec->itr,
777 				  perf_session__env(rec->session),
778 				  &rec->tool,
779 				  record__process_auxtrace);
780 	if (ret < 0)
781 		return ret;
782 
783 	if (ret)
784 		rec->samples++;
785 
786 	return 0;
787 }
788 
record__auxtrace_mmap_read_snapshot(struct record * rec,struct mmap * map)789 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
790 					       struct mmap *map)
791 {
792 	int ret;
793 
794 	ret = auxtrace_mmap__read_snapshot(map, rec->itr,
795 					   perf_session__env(rec->session),
796 					   &rec->tool,
797 					   record__process_auxtrace,
798 					   rec->opts.auxtrace_snapshot_size);
799 	if (ret < 0)
800 		return ret;
801 
802 	if (ret)
803 		rec->samples++;
804 
805 	return 0;
806 }
807 
record__auxtrace_read_snapshot_all(struct record * rec)808 static int record__auxtrace_read_snapshot_all(struct record *rec)
809 {
810 	int i;
811 	int rc = 0;
812 
813 	for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
814 		struct mmap *map = &rec->evlist->mmap[i];
815 
816 		if (!map->auxtrace_mmap.base)
817 			continue;
818 
819 		if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
820 			rc = -1;
821 			goto out;
822 		}
823 	}
824 out:
825 	return rc;
826 }
827 
record__read_auxtrace_snapshot(struct record * rec,bool on_exit)828 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
829 {
830 	pr_debug("Recording AUX area tracing snapshot\n");
831 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
832 		trigger_error(&auxtrace_snapshot_trigger);
833 	} else {
834 		if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
835 			trigger_error(&auxtrace_snapshot_trigger);
836 		else
837 			trigger_ready(&auxtrace_snapshot_trigger);
838 	}
839 }
840 
record__auxtrace_snapshot_exit(struct record * rec)841 static int record__auxtrace_snapshot_exit(struct record *rec)
842 {
843 	if (trigger_is_error(&auxtrace_snapshot_trigger))
844 		return 0;
845 
846 	if (!auxtrace_record__snapshot_started &&
847 	    auxtrace_record__snapshot_start(rec->itr))
848 		return -1;
849 
850 	record__read_auxtrace_snapshot(rec, true);
851 	if (trigger_is_error(&auxtrace_snapshot_trigger))
852 		return -1;
853 
854 	return 0;
855 }
856 
record__auxtrace_init(struct record * rec)857 static int record__auxtrace_init(struct record *rec)
858 {
859 	int err;
860 
861 	if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts)
862 	    && record__threads_enabled(rec)) {
863 		pr_err("AUX area tracing options are not available in parallel streaming mode.\n");
864 		return -EINVAL;
865 	}
866 
867 	if (!rec->itr) {
868 		rec->itr = auxtrace_record__init(rec->evlist, &err);
869 		if (err)
870 			return err;
871 	}
872 
873 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
874 					      rec->opts.auxtrace_snapshot_opts);
875 	if (err)
876 		return err;
877 
878 	err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
879 					    rec->opts.auxtrace_sample_opts);
880 	if (err)
881 		return err;
882 
883 	err = auxtrace_parse_aux_action(rec->evlist);
884 	if (err)
885 		return err;
886 
887 	return auxtrace_parse_filters(rec->evlist);
888 }
889 
record__config_text_poke(struct evlist * evlist)890 static int record__config_text_poke(struct evlist *evlist)
891 {
892 	struct evsel *evsel;
893 
894 	/* Nothing to do if text poke is already configured */
895 	evlist__for_each_entry(evlist, evsel) {
896 		if (evsel->core.attr.text_poke)
897 			return 0;
898 	}
899 
900 	evsel = evlist__add_dummy_on_all_cpus(evlist);
901 	if (!evsel)
902 		return -ENOMEM;
903 
904 	evsel->core.attr.text_poke = 1;
905 	evsel->core.attr.ksymbol = 1;
906 	evsel->immediate = true;
907 	evsel__set_sample_bit(evsel, TIME);
908 
909 	return 0;
910 }
911 
record__config_off_cpu(struct record * rec)912 static int record__config_off_cpu(struct record *rec)
913 {
914 	return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts);
915 }
916 
record__tracking_system_wide(struct record * rec)917 static bool record__tracking_system_wide(struct record *rec)
918 {
919 	struct evlist *evlist = rec->evlist;
920 	struct evsel *evsel;
921 
922 	/*
923 	 * If non-dummy evsel exists, system_wide sideband is need to
924 	 * help parse sample information.
925 	 * For example, PERF_EVENT_MMAP event to help parse symbol,
926 	 * and PERF_EVENT_COMM event to help parse task executable name.
927 	 */
928 	evlist__for_each_entry(evlist, evsel) {
929 		if (!evsel__is_dummy_event(evsel))
930 			return true;
931 	}
932 
933 	return false;
934 }
935 
record__config_tracking_events(struct record * rec)936 static int record__config_tracking_events(struct record *rec)
937 {
938 	struct record_opts *opts = &rec->opts;
939 	struct evlist *evlist = rec->evlist;
940 	bool system_wide = false;
941 	struct evsel *evsel;
942 
943 	/*
944 	 * For initial_delay, system wide or a hybrid system, we need to add
945 	 * tracking event so that we can track PERF_RECORD_MMAP to cover the
946 	 * delay of waiting or event synthesis.
947 	 */
948 	if (opts->target.initial_delay || target__has_cpu(&opts->target) ||
949 	    perf_pmus__num_core_pmus() > 1) {
950 		/*
951 		 * User space tasks can migrate between CPUs, so when tracing
952 		 * selected CPUs, sideband for all CPUs is still needed.
953 		 */
954 		if (!!opts->target.cpu_list && record__tracking_system_wide(rec))
955 			system_wide = true;
956 
957 		evsel = evlist__findnew_tracking_event(evlist, system_wide);
958 		if (!evsel)
959 			return -ENOMEM;
960 
961 		/*
962 		 * Enable the tracking event when the process is forked for
963 		 * initial_delay, immediately for system wide.
964 		 */
965 		if (opts->target.initial_delay && !evsel->immediate &&
966 		    !target__has_cpu(&opts->target))
967 			evsel->core.attr.enable_on_exec = 1;
968 		else
969 			evsel->immediate = 1;
970 	}
971 
972 	return 0;
973 }
974 
record__kcore_readable(struct machine * machine)975 static bool record__kcore_readable(struct machine *machine)
976 {
977 	char kcore[PATH_MAX];
978 	int fd;
979 
980 	scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
981 
982 	fd = open(kcore, O_RDONLY);
983 	if (fd < 0)
984 		return false;
985 
986 	close(fd);
987 
988 	return true;
989 }
990 
record__kcore_copy(struct machine * machine,struct perf_data * data)991 static int record__kcore_copy(struct machine *machine, struct perf_data *data)
992 {
993 	char from_dir[PATH_MAX];
994 	char kcore_dir[PATH_MAX];
995 	int ret;
996 
997 	snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
998 
999 	ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
1000 	if (ret)
1001 		return ret;
1002 
1003 	return kcore_copy(from_dir, kcore_dir);
1004 }
1005 
record__thread_data_init_pipes(struct record_thread * thread_data)1006 static void record__thread_data_init_pipes(struct record_thread *thread_data)
1007 {
1008 	thread_data->pipes.msg[0] = -1;
1009 	thread_data->pipes.msg[1] = -1;
1010 	thread_data->pipes.ack[0] = -1;
1011 	thread_data->pipes.ack[1] = -1;
1012 }
1013 
record__thread_data_open_pipes(struct record_thread * thread_data)1014 static int record__thread_data_open_pipes(struct record_thread *thread_data)
1015 {
1016 	if (pipe(thread_data->pipes.msg))
1017 		return -EINVAL;
1018 
1019 	if (pipe(thread_data->pipes.ack)) {
1020 		close(thread_data->pipes.msg[0]);
1021 		thread_data->pipes.msg[0] = -1;
1022 		close(thread_data->pipes.msg[1]);
1023 		thread_data->pipes.msg[1] = -1;
1024 		return -EINVAL;
1025 	}
1026 
1027 	pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data,
1028 		 thread_data->pipes.msg[0], thread_data->pipes.msg[1],
1029 		 thread_data->pipes.ack[0], thread_data->pipes.ack[1]);
1030 
1031 	return 0;
1032 }
1033 
record__thread_data_close_pipes(struct record_thread * thread_data)1034 static void record__thread_data_close_pipes(struct record_thread *thread_data)
1035 {
1036 	if (thread_data->pipes.msg[0] != -1) {
1037 		close(thread_data->pipes.msg[0]);
1038 		thread_data->pipes.msg[0] = -1;
1039 	}
1040 	if (thread_data->pipes.msg[1] != -1) {
1041 		close(thread_data->pipes.msg[1]);
1042 		thread_data->pipes.msg[1] = -1;
1043 	}
1044 	if (thread_data->pipes.ack[0] != -1) {
1045 		close(thread_data->pipes.ack[0]);
1046 		thread_data->pipes.ack[0] = -1;
1047 	}
1048 	if (thread_data->pipes.ack[1] != -1) {
1049 		close(thread_data->pipes.ack[1]);
1050 		thread_data->pipes.ack[1] = -1;
1051 	}
1052 }
1053 
evlist__per_thread(struct evlist * evlist)1054 static bool evlist__per_thread(struct evlist *evlist)
1055 {
1056 	return cpu_map__is_dummy(evlist->core.user_requested_cpus);
1057 }
1058 
record__thread_data_init_maps(struct record_thread * thread_data,struct evlist * evlist)1059 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist)
1060 {
1061 	int m, tm, nr_mmaps = evlist->core.nr_mmaps;
1062 	struct mmap *mmap = evlist->mmap;
1063 	struct mmap *overwrite_mmap = evlist->overwrite_mmap;
1064 	struct perf_cpu_map *cpus = evlist->core.all_cpus;
1065 	bool per_thread = evlist__per_thread(evlist);
1066 
1067 	if (per_thread)
1068 		thread_data->nr_mmaps = nr_mmaps;
1069 	else
1070 		thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
1071 						      thread_data->mask->maps.nbits);
1072 	if (mmap) {
1073 		thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1074 		if (!thread_data->maps)
1075 			return -ENOMEM;
1076 	}
1077 	if (overwrite_mmap) {
1078 		thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1079 		if (!thread_data->overwrite_maps) {
1080 			zfree(&thread_data->maps);
1081 			return -ENOMEM;
1082 		}
1083 	}
1084 	pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data,
1085 		 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
1086 
1087 	for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
1088 		if (per_thread ||
1089 		    test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) {
1090 			if (thread_data->maps) {
1091 				thread_data->maps[tm] = &mmap[m];
1092 				pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
1093 					  thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1094 			}
1095 			if (thread_data->overwrite_maps) {
1096 				thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
1097 				pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
1098 					  thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1099 			}
1100 			tm++;
1101 		}
1102 	}
1103 
1104 	return 0;
1105 }
1106 
record__thread_data_init_pollfd(struct record_thread * thread_data,struct evlist * evlist)1107 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist)
1108 {
1109 	int f, tm, pos;
1110 	struct mmap *map, *overwrite_map;
1111 
1112 	fdarray__init(&thread_data->pollfd, 64);
1113 
1114 	for (tm = 0; tm < thread_data->nr_mmaps; tm++) {
1115 		map = thread_data->maps ? thread_data->maps[tm] : NULL;
1116 		overwrite_map = thread_data->overwrite_maps ?
1117 				thread_data->overwrite_maps[tm] : NULL;
1118 
1119 		for (f = 0; f < evlist->core.pollfd.nr; f++) {
1120 			void *ptr = evlist->core.pollfd.priv[f].ptr;
1121 
1122 			if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) {
1123 				pos = fdarray__dup_entry_from(&thread_data->pollfd, f,
1124 							      &evlist->core.pollfd);
1125 				if (pos < 0)
1126 					return pos;
1127 				pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n",
1128 					 thread_data, pos, evlist->core.pollfd.entries[f].fd);
1129 			}
1130 		}
1131 	}
1132 
1133 	return 0;
1134 }
1135 
record__free_thread_data(struct record * rec)1136 static void record__free_thread_data(struct record *rec)
1137 {
1138 	int t;
1139 	struct record_thread *thread_data = rec->thread_data;
1140 
1141 	if (thread_data == NULL)
1142 		return;
1143 
1144 	for (t = 0; t < rec->nr_threads; t++) {
1145 		record__thread_data_close_pipes(&thread_data[t]);
1146 		zfree(&thread_data[t].maps);
1147 		zfree(&thread_data[t].overwrite_maps);
1148 		fdarray__exit(&thread_data[t].pollfd);
1149 	}
1150 
1151 	zfree(&rec->thread_data);
1152 }
1153 
record__map_thread_evlist_pollfd_indexes(struct record * rec,int evlist_pollfd_index,int thread_pollfd_index)1154 static int record__map_thread_evlist_pollfd_indexes(struct record *rec,
1155 						    int evlist_pollfd_index,
1156 						    int thread_pollfd_index)
1157 {
1158 	size_t x = rec->index_map_cnt;
1159 
1160 	if (realloc_array_as_needed(rec->index_map, rec->index_map_sz, x, NULL))
1161 		return -ENOMEM;
1162 	rec->index_map[x].evlist_pollfd_index = evlist_pollfd_index;
1163 	rec->index_map[x].thread_pollfd_index = thread_pollfd_index;
1164 	rec->index_map_cnt += 1;
1165 	return 0;
1166 }
1167 
record__update_evlist_pollfd_from_thread(struct record * rec,struct evlist * evlist,struct record_thread * thread_data)1168 static int record__update_evlist_pollfd_from_thread(struct record *rec,
1169 						    struct evlist *evlist,
1170 						    struct record_thread *thread_data)
1171 {
1172 	struct pollfd *e_entries = evlist->core.pollfd.entries;
1173 	struct pollfd *t_entries = thread_data->pollfd.entries;
1174 	int err = 0;
1175 	size_t i;
1176 
1177 	for (i = 0; i < rec->index_map_cnt; i++) {
1178 		int e_pos = rec->index_map[i].evlist_pollfd_index;
1179 		int t_pos = rec->index_map[i].thread_pollfd_index;
1180 
1181 		if (e_entries[e_pos].fd != t_entries[t_pos].fd ||
1182 		    e_entries[e_pos].events != t_entries[t_pos].events) {
1183 			pr_err("Thread and evlist pollfd index mismatch\n");
1184 			err = -EINVAL;
1185 			continue;
1186 		}
1187 		e_entries[e_pos].revents = t_entries[t_pos].revents;
1188 	}
1189 	return err;
1190 }
1191 
record__dup_non_perf_events(struct record * rec,struct evlist * evlist,struct record_thread * thread_data)1192 static int record__dup_non_perf_events(struct record *rec,
1193 				       struct evlist *evlist,
1194 				       struct record_thread *thread_data)
1195 {
1196 	struct fdarray *fda = &evlist->core.pollfd;
1197 	int i, ret;
1198 
1199 	for (i = 0; i < fda->nr; i++) {
1200 		if (!(fda->priv[i].flags & fdarray_flag__non_perf_event))
1201 			continue;
1202 		ret = fdarray__dup_entry_from(&thread_data->pollfd, i, fda);
1203 		if (ret < 0) {
1204 			pr_err("Failed to duplicate descriptor in main thread pollfd\n");
1205 			return ret;
1206 		}
1207 		pr_debug2("thread_data[%p]: pollfd[%d] <- non_perf_event fd=%d\n",
1208 			  thread_data, ret, fda->entries[i].fd);
1209 		ret = record__map_thread_evlist_pollfd_indexes(rec, i, ret);
1210 		if (ret < 0) {
1211 			pr_err("Failed to map thread and evlist pollfd indexes\n");
1212 			return ret;
1213 		}
1214 	}
1215 	return 0;
1216 }
1217 
record__alloc_thread_data(struct record * rec,struct evlist * evlist)1218 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist)
1219 {
1220 	int t, ret;
1221 	struct record_thread *thread_data;
1222 
1223 	rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data)));
1224 	if (!rec->thread_data) {
1225 		pr_err("Failed to allocate thread data\n");
1226 		return -ENOMEM;
1227 	}
1228 	thread_data = rec->thread_data;
1229 
1230 	for (t = 0; t < rec->nr_threads; t++)
1231 		record__thread_data_init_pipes(&thread_data[t]);
1232 
1233 	for (t = 0; t < rec->nr_threads; t++) {
1234 		thread_data[t].rec = rec;
1235 		thread_data[t].mask = &rec->thread_masks[t];
1236 		ret = record__thread_data_init_maps(&thread_data[t], evlist);
1237 		if (ret) {
1238 			pr_err("Failed to initialize thread[%d] maps\n", t);
1239 			goto out_free;
1240 		}
1241 		ret = record__thread_data_init_pollfd(&thread_data[t], evlist);
1242 		if (ret) {
1243 			pr_err("Failed to initialize thread[%d] pollfd\n", t);
1244 			goto out_free;
1245 		}
1246 		if (t) {
1247 			thread_data[t].tid = -1;
1248 			ret = record__thread_data_open_pipes(&thread_data[t]);
1249 			if (ret) {
1250 				pr_err("Failed to open thread[%d] communication pipes\n", t);
1251 				goto out_free;
1252 			}
1253 			ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0],
1254 					   POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable);
1255 			if (ret < 0) {
1256 				pr_err("Failed to add descriptor to thread[%d] pollfd\n", t);
1257 				goto out_free;
1258 			}
1259 			thread_data[t].ctlfd_pos = ret;
1260 			pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1261 				 thread_data, thread_data[t].ctlfd_pos,
1262 				 thread_data[t].pipes.msg[0]);
1263 		} else {
1264 			thread_data[t].tid = gettid();
1265 
1266 			ret = record__dup_non_perf_events(rec, evlist, &thread_data[t]);
1267 			if (ret < 0)
1268 				goto out_free;
1269 
1270 			thread_data[t].ctlfd_pos = -1; /* Not used */
1271 		}
1272 	}
1273 
1274 	return 0;
1275 
1276 out_free:
1277 	record__free_thread_data(rec);
1278 
1279 	return ret;
1280 }
1281 
record__mmap_evlist(struct record * rec,struct evlist * evlist)1282 static int record__mmap_evlist(struct record *rec,
1283 			       struct evlist *evlist)
1284 {
1285 	int i, ret;
1286 	struct record_opts *opts = &rec->opts;
1287 	bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
1288 				  opts->auxtrace_sample_mode;
1289 	char msg[512];
1290 
1291 	if (opts->affinity != PERF_AFFINITY_SYS)
1292 		cpu__setup_cpunode_map();
1293 
1294 	if (evlist__mmap_ex(evlist, opts->mmap_pages,
1295 				 opts->auxtrace_mmap_pages,
1296 				 auxtrace_overwrite,
1297 				 opts->nr_cblocks, opts->affinity,
1298 				 opts->mmap_flush, opts->comp_level) < 0) {
1299 		if (errno == EPERM) {
1300 			pr_err("Permission error mapping pages.\n"
1301 			       "Consider increasing "
1302 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
1303 			       "or try again with a smaller value of -m/--mmap_pages.\n"
1304 			       "(current value: %u,%u)\n",
1305 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
1306 			return -errno;
1307 		} else {
1308 			pr_err("failed to mmap with %d (%s)\n", errno,
1309 				str_error_r(errno, msg, sizeof(msg)));
1310 			if (errno)
1311 				return -errno;
1312 			else
1313 				return -EINVAL;
1314 		}
1315 	}
1316 
1317 	if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack))
1318 		return -1;
1319 
1320 	ret = record__alloc_thread_data(rec, evlist);
1321 	if (ret)
1322 		return ret;
1323 
1324 	if (record__threads_enabled(rec)) {
1325 		ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps);
1326 		if (ret) {
1327 			pr_err("Failed to create data directory: %s\n", strerror(-ret));
1328 			return ret;
1329 		}
1330 		for (i = 0; i < evlist->core.nr_mmaps; i++) {
1331 			if (evlist->mmap)
1332 				evlist->mmap[i].file = &rec->data.dir.files[i];
1333 			if (evlist->overwrite_mmap)
1334 				evlist->overwrite_mmap[i].file = &rec->data.dir.files[i];
1335 		}
1336 	}
1337 
1338 	return 0;
1339 }
1340 
record__mmap(struct record * rec)1341 static int record__mmap(struct record *rec)
1342 {
1343 	return record__mmap_evlist(rec, rec->evlist);
1344 }
1345 
record__open(struct record * rec)1346 static int record__open(struct record *rec)
1347 {
1348 	char msg[BUFSIZ];
1349 	struct evsel *pos;
1350 	struct evlist *evlist = rec->evlist;
1351 	struct perf_session *session = rec->session;
1352 	struct record_opts *opts = &rec->opts;
1353 	int rc = 0;
1354 	bool skipped = false;
1355 	bool removed_tracking = false;
1356 
1357 	evlist__for_each_entry(evlist, pos) {
1358 		if (removed_tracking) {
1359 			/*
1360 			 * Normally the head of the list has tracking enabled
1361 			 * for sideband data like mmaps. If this event is
1362 			 * removed, make sure to add tracking to the next
1363 			 * processed event.
1364 			 */
1365 			if (!pos->tracking) {
1366 				pos->tracking = true;
1367 				evsel__config(pos, opts, &callchain_param);
1368 			}
1369 			removed_tracking = false;
1370 		}
1371 try_again:
1372 		if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
1373 			bool report_error = true;
1374 
1375 			if (evsel__fallback(pos, &opts->target, errno, msg, sizeof(msg))) {
1376 				if (verbose > 0)
1377 					ui__warning("%s\n", msg);
1378 				goto try_again;
1379 			}
1380 			if ((errno == EINVAL || errno == EBADF) &&
1381 			    pos->core.leader != &pos->core &&
1382 			    pos->weak_group) {
1383 			        pos = evlist__reset_weak_group(evlist, pos, true);
1384 				goto try_again;
1385 			}
1386 #if defined(__aarch64__) || defined(__arm__)
1387 			if (strstr(evsel__name(pos), "cycles")) {
1388 				struct evsel *pos2;
1389 				/*
1390 				 * Unfortunately ARM has many events named
1391 				 * "cycles" on PMUs like the system-level (L3)
1392 				 * cache which don't support sampling. Only
1393 				 * display such failures to open when there is
1394 				 * only 1 cycles event or verbose is enabled.
1395 				 */
1396 				evlist__for_each_entry(evlist, pos2) {
1397 					if (pos2 == pos)
1398 						continue;
1399 					if (strstr(evsel__name(pos2), "cycles")) {
1400 						report_error = false;
1401 						break;
1402 					}
1403 				}
1404 			}
1405 #endif
1406 			if (report_error || verbose > 0) {
1407 				ui__error("Failure to open event '%s' on PMU '%s' which will be "
1408 					  "removed.\n%s\n",
1409 					  evsel__name(pos), evsel__pmu_name(pos), msg);
1410 			}
1411 			if (pos->tracking)
1412 				removed_tracking = true;
1413 			pos->skippable = true;
1414 			skipped = true;
1415 		}
1416 	}
1417 
1418 	if (skipped) {
1419 		struct evsel *tmp;
1420 		int idx = 0;
1421 		bool evlist_empty = true;
1422 
1423 		/* Remove evsels that failed to open and update indices. */
1424 		evlist__for_each_entry_safe(evlist, tmp, pos) {
1425 			if (pos->skippable) {
1426 				evlist__remove(evlist, pos);
1427 				continue;
1428 			}
1429 
1430 			/*
1431 			 * Note, dummy events may be command line parsed or
1432 			 * added by the tool. We care about supporting `perf
1433 			 * record -e dummy` which may be used as a permission
1434 			 * check. Dummy events that are added to the command
1435 			 * line and opened along with other events that fail,
1436 			 * will still fail as if the dummy events were tool
1437 			 * added events for the sake of code simplicity.
1438 			 */
1439 			if (!evsel__is_dummy_event(pos))
1440 				evlist_empty = false;
1441 		}
1442 		evlist__for_each_entry(evlist, pos) {
1443 			pos->core.idx = idx++;
1444 		}
1445 		/* If list is empty then fail. */
1446 		if (evlist_empty) {
1447 			ui__error("Failure to open any events for recording.\n");
1448 			rc = -1;
1449 			goto out;
1450 		}
1451 	}
1452 	if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {
1453 		pr_warning(
1454 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1455 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1456 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1457 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1458 "Samples in kernel modules won't be resolved at all.\n\n"
1459 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1460 "even with a suitable vmlinux or kallsyms file.\n\n");
1461 	}
1462 
1463 	if (evlist__apply_filters(evlist, &pos, &opts->target)) {
1464 		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
1465 			pos->filter ?: "BPF", evsel__name(pos), errno,
1466 			str_error_r(errno, msg, sizeof(msg)));
1467 		rc = -1;
1468 		goto out;
1469 	}
1470 
1471 	rc = record__mmap(rec);
1472 	if (rc)
1473 		goto out;
1474 
1475 	session->evlist = evlist;
1476 	perf_session__set_id_hdr_size(session);
1477 out:
1478 	return rc;
1479 }
1480 
set_timestamp_boundary(struct record * rec,u64 sample_time)1481 static void set_timestamp_boundary(struct record *rec, u64 sample_time)
1482 {
1483 	if (rec->evlist->first_sample_time == 0)
1484 		rec->evlist->first_sample_time = sample_time;
1485 
1486 	if (sample_time)
1487 		rec->evlist->last_sample_time = sample_time;
1488 }
1489 
process_sample_event(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct evsel * evsel,struct machine * machine)1490 static int process_sample_event(const struct perf_tool *tool,
1491 				union perf_event *event,
1492 				struct perf_sample *sample,
1493 				struct evsel *evsel,
1494 				struct machine *machine)
1495 {
1496 	struct record *rec = container_of(tool, struct record, tool);
1497 
1498 	set_timestamp_boundary(rec, sample->time);
1499 
1500 	if (rec->buildid_all)
1501 		return 0;
1502 
1503 	rec->samples++;
1504 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
1505 }
1506 
process_buildids(struct record * rec)1507 static int process_buildids(struct record *rec)
1508 {
1509 	struct perf_session *session = rec->session;
1510 
1511 	if (perf_data__size(&rec->data) == 0)
1512 		return 0;
1513 
1514 	/*
1515 	 * During this process, it'll load kernel map and replace the
1516 	 * dso->long_name to a real pathname it found.  In this case
1517 	 * we prefer the vmlinux path like
1518 	 *   /lib/modules/3.16.4/build/vmlinux
1519 	 *
1520 	 * rather than build-id path (in debug directory).
1521 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
1522 	 */
1523 	symbol_conf.ignore_vmlinux_buildid = true;
1524 
1525 	/*
1526 	 * If --buildid-all is given, it marks all DSO regardless of hits,
1527 	 * so no need to process samples. But if timestamp_boundary is enabled,
1528 	 * it still needs to walk on all samples to get the timestamps of
1529 	 * first/last samples.
1530 	 */
1531 	if (rec->buildid_all && !rec->timestamp_boundary)
1532 		rec->tool.sample = process_event_sample_stub;
1533 
1534 	return perf_session__process_events(session);
1535 }
1536 
perf_event__synthesize_guest_os(struct machine * machine,void * data)1537 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
1538 {
1539 	int err;
1540 	struct perf_tool *tool = data;
1541 	/*
1542 	 *As for guest kernel when processing subcommand record&report,
1543 	 *we arrange module mmap prior to guest kernel mmap and trigger
1544 	 *a preload dso because default guest module symbols are loaded
1545 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
1546 	 *method is used to avoid symbol missing when the first addr is
1547 	 *in module instead of in guest kernel.
1548 	 */
1549 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
1550 					     machine);
1551 	if (err < 0)
1552 		pr_err("Couldn't record guest kernel [%d]'s reference"
1553 		       " relocation symbol.\n", machine->pid);
1554 
1555 	/*
1556 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
1557 	 * have no _text sometimes.
1558 	 */
1559 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1560 						 machine);
1561 	if (err < 0)
1562 		pr_err("Couldn't record guest kernel [%d]'s reference"
1563 		       " relocation symbol.\n", machine->pid);
1564 }
1565 
1566 static struct perf_event_header finished_round_event = {
1567 	.size = sizeof(struct perf_event_header),
1568 	.type = PERF_RECORD_FINISHED_ROUND,
1569 };
1570 
1571 static struct perf_event_header finished_init_event = {
1572 	.size = sizeof(struct perf_event_header),
1573 	.type = PERF_RECORD_FINISHED_INIT,
1574 };
1575 
record__adjust_affinity(struct record * rec,struct mmap * map)1576 static void record__adjust_affinity(struct record *rec, struct mmap *map)
1577 {
1578 	if (rec->opts.affinity != PERF_AFFINITY_SYS &&
1579 	    !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits,
1580 			  thread->mask->affinity.nbits)) {
1581 		bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits);
1582 		bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits,
1583 			  map->affinity_mask.bits, thread->mask->affinity.nbits);
1584 		sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
1585 					(cpu_set_t *)thread->mask->affinity.bits);
1586 		if (verbose == 2) {
1587 			pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu());
1588 			mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity");
1589 		}
1590 	}
1591 }
1592 
process_comp_header(void * record,size_t increment)1593 static size_t process_comp_header(void *record, size_t increment)
1594 {
1595 	struct perf_record_compressed2 *event = record;
1596 	size_t size = sizeof(*event);
1597 
1598 	if (increment) {
1599 		event->header.size += increment;
1600 		return increment;
1601 	}
1602 
1603 	event->header.type = PERF_RECORD_COMPRESSED2;
1604 	event->header.size = size;
1605 
1606 	return size;
1607 }
1608 
zstd_compress(struct perf_session * session,struct mmap * map,void * dst,size_t dst_size,void * src,size_t src_size)1609 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map,
1610 			    void *dst, size_t dst_size, void *src, size_t src_size)
1611 {
1612 	ssize_t compressed;
1613 	size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed2) - 1;
1614 	struct zstd_data *zstd_data = &session->zstd_data;
1615 
1616 	if (map && map->file)
1617 		zstd_data = &map->zstd_data;
1618 
1619 	compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size,
1620 						     max_record_size, process_comp_header);
1621 	if (compressed < 0)
1622 		return compressed;
1623 
1624 	if (map && map->file) {
1625 		thread->bytes_transferred += src_size;
1626 		thread->bytes_compressed  += compressed;
1627 	} else {
1628 		session->bytes_transferred += src_size;
1629 		session->bytes_compressed  += compressed;
1630 	}
1631 
1632 	return compressed;
1633 }
1634 
record__mmap_read_evlist(struct record * rec,struct evlist * evlist,bool overwrite,bool synch)1635 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1636 				    bool overwrite, bool synch)
1637 {
1638 	u64 bytes_written = rec->bytes_written;
1639 	int i;
1640 	int rc = 0;
1641 	int nr_mmaps;
1642 	struct mmap **maps;
1643 	int trace_fd = rec->data.file.fd;
1644 	off_t off = 0;
1645 
1646 	if (!evlist)
1647 		return 0;
1648 
1649 	nr_mmaps = thread->nr_mmaps;
1650 	maps = overwrite ? thread->overwrite_maps : thread->maps;
1651 
1652 	if (!maps)
1653 		return 0;
1654 
1655 	if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1656 		return 0;
1657 
1658 	if (record__aio_enabled(rec))
1659 		off = record__aio_get_pos(trace_fd);
1660 
1661 	for (i = 0; i < nr_mmaps; i++) {
1662 		u64 flush = 0;
1663 		struct mmap *map = maps[i];
1664 
1665 		if (map->core.base) {
1666 			record__adjust_affinity(rec, map);
1667 			if (synch) {
1668 				flush = map->core.flush;
1669 				map->core.flush = 1;
1670 			}
1671 			if (!record__aio_enabled(rec)) {
1672 				if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1673 					if (synch)
1674 						map->core.flush = flush;
1675 					rc = -1;
1676 					goto out;
1677 				}
1678 			} else {
1679 				if (record__aio_push(rec, map, &off) < 0) {
1680 					record__aio_set_pos(trace_fd, off);
1681 					if (synch)
1682 						map->core.flush = flush;
1683 					rc = -1;
1684 					goto out;
1685 				}
1686 			}
1687 			if (synch)
1688 				map->core.flush = flush;
1689 		}
1690 
1691 		if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1692 		    !rec->opts.auxtrace_sample_mode &&
1693 		    record__auxtrace_mmap_read(rec, map) != 0) {
1694 			rc = -1;
1695 			goto out;
1696 		}
1697 	}
1698 
1699 	if (record__aio_enabled(rec))
1700 		record__aio_set_pos(trace_fd, off);
1701 
1702 	/*
1703 	 * Mark the round finished in case we wrote
1704 	 * at least one event.
1705 	 *
1706 	 * No need for round events in directory mode,
1707 	 * because per-cpu maps and files have data
1708 	 * sorted by kernel.
1709 	 */
1710 	if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written)
1711 		rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1712 
1713 	if (overwrite)
1714 		evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1715 out:
1716 	return rc;
1717 }
1718 
record__mmap_read_all(struct record * rec,bool synch)1719 static int record__mmap_read_all(struct record *rec, bool synch)
1720 {
1721 	int err;
1722 
1723 	err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1724 	if (err)
1725 		return err;
1726 
1727 	return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1728 }
1729 
record__thread_munmap_filtered(struct fdarray * fda,int fd,void * arg __maybe_unused)1730 static void record__thread_munmap_filtered(struct fdarray *fda, int fd,
1731 					   void *arg __maybe_unused)
1732 {
1733 	struct perf_mmap *map = fda->priv[fd].ptr;
1734 
1735 	if (map)
1736 		perf_mmap__put(map);
1737 }
1738 
record__thread(void * arg)1739 static void *record__thread(void *arg)
1740 {
1741 	enum thread_msg msg = THREAD_MSG__READY;
1742 	bool terminate = false;
1743 	struct fdarray *pollfd;
1744 	int err, ctlfd_pos;
1745 
1746 	thread = arg;
1747 	thread->tid = gettid();
1748 
1749 	err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1750 	if (err == -1)
1751 		pr_warning("threads[%d]: failed to notify on start: %s\n",
1752 			   thread->tid, strerror(errno));
1753 
1754 	pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
1755 
1756 	pollfd = &thread->pollfd;
1757 	ctlfd_pos = thread->ctlfd_pos;
1758 
1759 	for (;;) {
1760 		unsigned long long hits = thread->samples;
1761 
1762 		if (record__mmap_read_all(thread->rec, false) < 0 || terminate)
1763 			break;
1764 
1765 		if (hits == thread->samples) {
1766 
1767 			err = fdarray__poll(pollfd, -1);
1768 			/*
1769 			 * Propagate error, only if there's any. Ignore positive
1770 			 * number of returned events and interrupt error.
1771 			 */
1772 			if (err > 0 || (err < 0 && errno == EINTR))
1773 				err = 0;
1774 			thread->waking++;
1775 
1776 			if (fdarray__filter(pollfd, POLLERR | POLLHUP,
1777 					    record__thread_munmap_filtered, NULL) == 0)
1778 				break;
1779 		}
1780 
1781 		if (pollfd->entries[ctlfd_pos].revents & POLLHUP) {
1782 			terminate = true;
1783 			close(thread->pipes.msg[0]);
1784 			thread->pipes.msg[0] = -1;
1785 			pollfd->entries[ctlfd_pos].fd = -1;
1786 			pollfd->entries[ctlfd_pos].events = 0;
1787 		}
1788 
1789 		pollfd->entries[ctlfd_pos].revents = 0;
1790 	}
1791 	record__mmap_read_all(thread->rec, true);
1792 
1793 	err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1794 	if (err == -1)
1795 		pr_warning("threads[%d]: failed to notify on termination: %s\n",
1796 			   thread->tid, strerror(errno));
1797 
1798 	return NULL;
1799 }
1800 
record__init_features(struct record * rec)1801 static void record__init_features(struct record *rec)
1802 {
1803 	struct perf_session *session = rec->session;
1804 	int feat;
1805 
1806 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1807 		perf_header__set_feat(&session->header, feat);
1808 
1809 	if (rec->no_buildid)
1810 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1811 
1812 	if (!have_tracepoints(&rec->evlist->core.entries))
1813 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1814 
1815 	if (!rec->opts.branch_stack)
1816 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1817 
1818 	if (!rec->opts.full_auxtrace)
1819 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1820 
1821 	if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1822 		perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1823 
1824 	if (!rec->opts.use_clockid)
1825 		perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA);
1826 
1827 	if (!record__threads_enabled(rec))
1828 		perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1829 
1830 	if (!record__comp_enabled(rec))
1831 		perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1832 
1833 	perf_header__clear_feat(&session->header, HEADER_STAT);
1834 }
1835 
1836 static void
record__finish_output(struct record * rec)1837 record__finish_output(struct record *rec)
1838 {
1839 	int i;
1840 	struct perf_data *data = &rec->data;
1841 	int fd = perf_data__fd(data);
1842 
1843 	if (data->is_pipe) {
1844 		/* Just to display approx. size */
1845 		data->file.size = rec->bytes_written;
1846 		return;
1847 	}
1848 
1849 	rec->session->header.data_size += rec->bytes_written;
1850 	data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1851 	if (record__threads_enabled(rec)) {
1852 		for (i = 0; i < data->dir.nr; i++)
1853 			data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR);
1854 	}
1855 
1856 	/* Buildid scanning disabled or build ID in kernel and synthesized map events. */
1857 	if (!rec->no_buildid || !rec->no_buildid_cache) {
1858 		process_buildids(rec);
1859 
1860 		if (rec->buildid_all)
1861 			perf_session__dsos_hit_all(rec->session);
1862 	}
1863 	perf_session__write_header(rec->session, rec->evlist, fd, true);
1864 	perf_session__cache_build_ids(rec->session);
1865 }
1866 
record__synthesize_workload(struct record * rec,bool tail)1867 static int record__synthesize_workload(struct record *rec, bool tail)
1868 {
1869 	int err;
1870 	struct perf_thread_map *thread_map;
1871 	bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1872 
1873 	if (rec->opts.tail_synthesize != tail)
1874 		return 0;
1875 
1876 	thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1877 	if (thread_map == NULL)
1878 		return -1;
1879 
1880 	err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1881 						 process_synthesized_event,
1882 						 &rec->session->machines.host,
1883 						 needs_mmap,
1884 						 rec->opts.sample_address);
1885 	perf_thread_map__put(thread_map);
1886 	return err;
1887 }
1888 
write_finished_init(struct record * rec,bool tail)1889 static int write_finished_init(struct record *rec, bool tail)
1890 {
1891 	if (rec->opts.tail_synthesize != tail)
1892 		return 0;
1893 
1894 	return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event));
1895 }
1896 
1897 static int record__synthesize(struct record *rec, bool tail);
1898 
1899 static int
record__switch_output(struct record * rec,bool at_exit)1900 record__switch_output(struct record *rec, bool at_exit)
1901 {
1902 	struct perf_data *data = &rec->data;
1903 	char *new_filename = NULL;
1904 	int fd, err;
1905 
1906 	/* Same Size:      "2015122520103046"*/
1907 	char timestamp[] = "InvalidTimestamp";
1908 
1909 	record__aio_mmap_read_sync(rec);
1910 
1911 	write_finished_init(rec, true);
1912 
1913 	record__synthesize(rec, true);
1914 	if (target__none(&rec->opts.target))
1915 		record__synthesize_workload(rec, true);
1916 
1917 	rec->samples = 0;
1918 	record__finish_output(rec);
1919 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1920 	if (err) {
1921 		pr_err("Failed to get current timestamp\n");
1922 		return -EINVAL;
1923 	}
1924 
1925 	fd = perf_data__switch(data, timestamp,
1926 			       rec->session->header.data_offset,
1927 			       at_exit, &new_filename);
1928 	if (fd >= 0 && !at_exit) {
1929 		rec->bytes_written = 0;
1930 		rec->session->header.data_size = 0;
1931 	}
1932 
1933 	if (!quiet) {
1934 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1935 			data->path, timestamp);
1936 	}
1937 
1938 	if (rec->switch_output.num_files) {
1939 		int n = rec->switch_output.cur_file + 1;
1940 
1941 		if (n >= rec->switch_output.num_files)
1942 			n = 0;
1943 		rec->switch_output.cur_file = n;
1944 		if (rec->switch_output.filenames[n]) {
1945 			remove(rec->switch_output.filenames[n]);
1946 			zfree(&rec->switch_output.filenames[n]);
1947 		}
1948 		rec->switch_output.filenames[n] = new_filename;
1949 	} else {
1950 		free(new_filename);
1951 	}
1952 
1953 	/* Output tracking events */
1954 	if (!at_exit) {
1955 		record__synthesize(rec, false);
1956 
1957 		/*
1958 		 * In 'perf record --switch-output' without -a,
1959 		 * record__synthesize() in record__switch_output() won't
1960 		 * generate tracking events because there's no thread_map
1961 		 * in evlist. Which causes newly created perf.data doesn't
1962 		 * contain map and comm information.
1963 		 * Create a fake thread_map and directly call
1964 		 * perf_event__synthesize_thread_map() for those events.
1965 		 */
1966 		if (target__none(&rec->opts.target))
1967 			record__synthesize_workload(rec, false);
1968 		write_finished_init(rec, false);
1969 	}
1970 	return fd;
1971 }
1972 
__record__save_lost_samples(struct record * rec,struct evsel * evsel,struct perf_record_lost_samples * lost,int cpu_idx,int thread_idx,u64 lost_count,u16 misc_flag)1973 static void __record__save_lost_samples(struct record *rec, struct evsel *evsel,
1974 					struct perf_record_lost_samples *lost,
1975 					int cpu_idx, int thread_idx, u64 lost_count,
1976 					u16 misc_flag)
1977 {
1978 	struct perf_sample_id *sid;
1979 	struct perf_sample sample;
1980 	int id_hdr_size;
1981 
1982 	perf_sample__init(&sample, /*all=*/true);
1983 	lost->lost = lost_count;
1984 	if (evsel->core.ids) {
1985 		sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx);
1986 		sample.id = sid->id;
1987 	}
1988 
1989 	id_hdr_size = perf_event__synthesize_id_sample((void *)(lost + 1),
1990 						       evsel->core.attr.sample_type, &sample);
1991 	lost->header.size = sizeof(*lost) + id_hdr_size;
1992 	lost->header.misc = misc_flag;
1993 	record__write(rec, NULL, lost, lost->header.size);
1994 	perf_sample__exit(&sample);
1995 }
1996 
record__read_lost_samples(struct record * rec)1997 static void record__read_lost_samples(struct record *rec)
1998 {
1999 	struct perf_session *session = rec->session;
2000 	struct perf_record_lost_samples_and_ids lost;
2001 	struct evsel *evsel;
2002 
2003 	/* there was an error during record__open */
2004 	if (session->evlist == NULL)
2005 		return;
2006 
2007 	evlist__for_each_entry(session->evlist, evsel) {
2008 		struct xyarray *xy = evsel->core.sample_id;
2009 		u64 lost_count;
2010 
2011 		if (xy == NULL || evsel->core.fd == NULL)
2012 			continue;
2013 		if (xyarray__max_x(evsel->core.fd) != xyarray__max_x(xy) ||
2014 		    xyarray__max_y(evsel->core.fd) != xyarray__max_y(xy)) {
2015 			pr_debug("Unmatched FD vs. sample ID: skip reading LOST count\n");
2016 			continue;
2017 		}
2018 
2019 		for (int x = 0; x < xyarray__max_x(xy); x++) {
2020 			for (int y = 0; y < xyarray__max_y(xy); y++) {
2021 				struct perf_counts_values count;
2022 
2023 				if (perf_evsel__read(&evsel->core, x, y, &count) < 0) {
2024 					pr_debug("read LOST count failed\n");
2025 					return;
2026 				}
2027 
2028 				if (count.lost) {
2029 					memset(&lost, 0, sizeof(lost));
2030 					lost.lost.header.type = PERF_RECORD_LOST_SAMPLES;
2031 					__record__save_lost_samples(rec, evsel, &lost.lost,
2032 								    x, y, count.lost, 0);
2033 				}
2034 			}
2035 		}
2036 
2037 		lost_count = perf_bpf_filter__lost_count(evsel);
2038 		if (lost_count) {
2039 			memset(&lost, 0, sizeof(lost));
2040 			lost.lost.header.type = PERF_RECORD_LOST_SAMPLES;
2041 			__record__save_lost_samples(rec, evsel, &lost.lost, 0, 0, lost_count,
2042 						    PERF_RECORD_MISC_LOST_SAMPLES_BPF);
2043 		}
2044 	}
2045 }
2046 
2047 static volatile sig_atomic_t workload_exec_errno;
2048 
2049 /*
2050  * evlist__prepare_workload will send a SIGUSR1
2051  * if the fork fails, since we asked by setting its
2052  * want_signal to true.
2053  */
workload_exec_failed_signal(int signo __maybe_unused,siginfo_t * info,void * ucontext __maybe_unused)2054 static void workload_exec_failed_signal(int signo __maybe_unused,
2055 					siginfo_t *info,
2056 					void *ucontext __maybe_unused)
2057 {
2058 	workload_exec_errno = info->si_value.sival_int;
2059 	done = 1;
2060 	child_finished = 1;
2061 }
2062 
2063 static void snapshot_sig_handler(int sig);
2064 static void alarm_sig_handler(int sig);
2065 
evlist__pick_pc(struct evlist * evlist)2066 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist)
2067 {
2068 	if (evlist) {
2069 		if (evlist->mmap && evlist->mmap[0].core.base)
2070 			return evlist->mmap[0].core.base;
2071 		if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
2072 			return evlist->overwrite_mmap[0].core.base;
2073 	}
2074 	return NULL;
2075 }
2076 
record__pick_pc(struct record * rec)2077 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
2078 {
2079 	const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist);
2080 	if (pc)
2081 		return pc;
2082 	return NULL;
2083 }
2084 
record__synthesize(struct record * rec,bool tail)2085 static int record__synthesize(struct record *rec, bool tail)
2086 {
2087 	struct perf_session *session = rec->session;
2088 	struct machine *machine = &session->machines.host;
2089 	struct perf_data *data = &rec->data;
2090 	struct record_opts *opts = &rec->opts;
2091 	struct perf_tool *tool = &rec->tool;
2092 	int err = 0;
2093 	event_op f = process_synthesized_event;
2094 
2095 	if (rec->opts.tail_synthesize != tail)
2096 		return 0;
2097 
2098 	if (data->is_pipe) {
2099 		err = perf_event__synthesize_for_pipe(tool, session, data,
2100 						      process_synthesized_event);
2101 		if (err < 0)
2102 			goto out;
2103 
2104 		rec->bytes_written += err;
2105 	}
2106 
2107 	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
2108 					  process_synthesized_event, machine);
2109 	if (err)
2110 		goto out;
2111 
2112 	/* Synthesize id_index before auxtrace_info */
2113 	err = perf_event__synthesize_id_index(tool,
2114 					      process_synthesized_event,
2115 					      session->evlist, machine);
2116 	if (err)
2117 		goto out;
2118 
2119 	if (rec->opts.full_auxtrace) {
2120 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
2121 					session, process_synthesized_event);
2122 		if (err)
2123 			goto out;
2124 	}
2125 
2126 	if (!evlist__exclude_kernel(rec->evlist)) {
2127 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
2128 							 machine);
2129 		WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
2130 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
2131 				   "Check /proc/kallsyms permission or run as root.\n");
2132 
2133 		err = perf_event__synthesize_modules(tool, process_synthesized_event,
2134 						     machine);
2135 		WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
2136 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
2137 				   "Check /proc/modules permission or run as root.\n");
2138 	}
2139 
2140 	if (perf_guest) {
2141 		machines__process_guests(&session->machines,
2142 					 perf_event__synthesize_guest_os, tool);
2143 	}
2144 
2145 	err = perf_event__synthesize_extra_attr(&rec->tool,
2146 						rec->evlist,
2147 						process_synthesized_event,
2148 						data->is_pipe);
2149 	if (err)
2150 		goto out;
2151 
2152 	err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
2153 						 process_synthesized_event,
2154 						NULL);
2155 	if (err < 0) {
2156 		pr_err("Couldn't synthesize thread map.\n");
2157 		return err;
2158 	}
2159 
2160 	err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus,
2161 					     process_synthesized_event, NULL);
2162 	if (err < 0) {
2163 		pr_err("Couldn't synthesize cpu map.\n");
2164 		return err;
2165 	}
2166 
2167 	err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
2168 						machine, opts);
2169 	if (err < 0) {
2170 		pr_warning("Couldn't synthesize bpf events.\n");
2171 		err = 0;
2172 	}
2173 
2174 	if (rec->opts.synth & PERF_SYNTH_CGROUP) {
2175 		err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
2176 						     machine);
2177 		if (err < 0) {
2178 			pr_warning("Couldn't synthesize cgroup events.\n");
2179 			err = 0;
2180 		}
2181 	}
2182 
2183 	if (rec->opts.nr_threads_synthesize > 1) {
2184 		mutex_init(&synth_lock);
2185 		perf_set_multithreaded();
2186 		f = process_locked_synthesized_event;
2187 	}
2188 
2189 	if (rec->opts.synth & PERF_SYNTH_TASK) {
2190 		bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
2191 
2192 		err = __machine__synthesize_threads(machine, tool, &opts->target,
2193 						    rec->evlist->core.threads,
2194 						    f, needs_mmap, opts->sample_address,
2195 						    rec->opts.nr_threads_synthesize);
2196 	}
2197 
2198 	if (rec->opts.nr_threads_synthesize > 1) {
2199 		perf_set_singlethreaded();
2200 		mutex_destroy(&synth_lock);
2201 	}
2202 
2203 out:
2204 	return err;
2205 }
2206 
record__synthesize_final_bpf_metadata(struct record * rec __maybe_unused)2207 static void record__synthesize_final_bpf_metadata(struct record *rec __maybe_unused)
2208 {
2209 #ifdef HAVE_LIBBPF_SUPPORT
2210 	perf_event__synthesize_final_bpf_metadata(rec->session,
2211 						  process_synthesized_event);
2212 #endif
2213 }
2214 
record__process_signal_event(union perf_event * event __maybe_unused,void * data)2215 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
2216 {
2217 	struct record *rec = data;
2218 	pthread_kill(rec->thread_id, SIGUSR2);
2219 	return 0;
2220 }
2221 
record__setup_sb_evlist(struct record * rec)2222 static int record__setup_sb_evlist(struct record *rec)
2223 {
2224 	struct record_opts *opts = &rec->opts;
2225 
2226 	if (rec->sb_evlist != NULL) {
2227 		/*
2228 		 * We get here if --switch-output-event populated the
2229 		 * sb_evlist, so associate a callback that will send a SIGUSR2
2230 		 * to the main thread.
2231 		 */
2232 		evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
2233 		rec->thread_id = pthread_self();
2234 	}
2235 #ifdef HAVE_LIBBPF_SUPPORT
2236 	if (!opts->no_bpf_event) {
2237 		if (rec->sb_evlist == NULL) {
2238 			rec->sb_evlist = evlist__new();
2239 
2240 			if (rec->sb_evlist == NULL) {
2241 				pr_err("Couldn't create side band evlist.\n.");
2242 				return -1;
2243 			}
2244 		}
2245 
2246 		if (evlist__add_bpf_sb_event(rec->sb_evlist, perf_session__env(rec->session))) {
2247 			pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
2248 			return -1;
2249 		}
2250 	}
2251 #endif
2252 	if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
2253 		pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
2254 		opts->no_bpf_event = true;
2255 	}
2256 
2257 	return 0;
2258 }
2259 
record__init_clock(struct record * rec)2260 static int record__init_clock(struct record *rec)
2261 {
2262 	struct perf_session *session = rec->session;
2263 	struct timespec ref_clockid;
2264 	struct timeval ref_tod;
2265 	struct perf_env *env = perf_session__env(session);
2266 	u64 ref;
2267 
2268 	if (!rec->opts.use_clockid)
2269 		return 0;
2270 
2271 	if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
2272 		env->clock.clockid_res_ns = rec->opts.clockid_res_ns;
2273 
2274 	env->clock.clockid = rec->opts.clockid;
2275 
2276 	if (gettimeofday(&ref_tod, NULL) != 0) {
2277 		pr_err("gettimeofday failed, cannot set reference time.\n");
2278 		return -1;
2279 	}
2280 
2281 	if (clock_gettime(rec->opts.clockid, &ref_clockid)) {
2282 		pr_err("clock_gettime failed, cannot set reference time.\n");
2283 		return -1;
2284 	}
2285 
2286 	ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC +
2287 	      (u64) ref_tod.tv_usec * NSEC_PER_USEC;
2288 
2289 	env->clock.tod_ns = ref;
2290 
2291 	ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC +
2292 	      (u64) ref_clockid.tv_nsec;
2293 
2294 	env->clock.clockid_ns = ref;
2295 	return 0;
2296 }
2297 
hit_auxtrace_snapshot_trigger(struct record * rec)2298 static void hit_auxtrace_snapshot_trigger(struct record *rec)
2299 {
2300 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2301 		trigger_hit(&auxtrace_snapshot_trigger);
2302 		auxtrace_record__snapshot_started = 1;
2303 		if (auxtrace_record__snapshot_start(rec->itr))
2304 			trigger_error(&auxtrace_snapshot_trigger);
2305 	}
2306 }
2307 
record__terminate_thread(struct record_thread * thread_data)2308 static int record__terminate_thread(struct record_thread *thread_data)
2309 {
2310 	int err;
2311 	enum thread_msg ack = THREAD_MSG__UNDEFINED;
2312 	pid_t tid = thread_data->tid;
2313 
2314 	close(thread_data->pipes.msg[1]);
2315 	thread_data->pipes.msg[1] = -1;
2316 	err = read(thread_data->pipes.ack[0], &ack, sizeof(ack));
2317 	if (err > 0)
2318 		pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]);
2319 	else
2320 		pr_warning("threads[%d]: failed to receive termination notification from %d\n",
2321 			   thread->tid, tid);
2322 
2323 	return 0;
2324 }
2325 
record__start_threads(struct record * rec)2326 static int record__start_threads(struct record *rec)
2327 {
2328 	int t, tt, err, ret = 0, nr_threads = rec->nr_threads;
2329 	struct record_thread *thread_data = rec->thread_data;
2330 	sigset_t full, mask;
2331 	pthread_t handle;
2332 	pthread_attr_t attrs;
2333 
2334 	thread = &thread_data[0];
2335 
2336 	if (!record__threads_enabled(rec))
2337 		return 0;
2338 
2339 	sigfillset(&full);
2340 	if (sigprocmask(SIG_SETMASK, &full, &mask)) {
2341 		pr_err("Failed to block signals on threads start: %s\n", strerror(errno));
2342 		return -1;
2343 	}
2344 
2345 	pthread_attr_init(&attrs);
2346 	pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
2347 
2348 	for (t = 1; t < nr_threads; t++) {
2349 		enum thread_msg msg = THREAD_MSG__UNDEFINED;
2350 
2351 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
2352 		pthread_attr_setaffinity_np(&attrs,
2353 					    MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)),
2354 					    (cpu_set_t *)(thread_data[t].mask->affinity.bits));
2355 #endif
2356 		if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) {
2357 			for (tt = 1; tt < t; tt++)
2358 				record__terminate_thread(&thread_data[t]);
2359 			pr_err("Failed to start threads: %s\n", strerror(errno));
2360 			ret = -1;
2361 			goto out_err;
2362 		}
2363 
2364 		err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg));
2365 		if (err > 0)
2366 			pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid,
2367 				  thread_msg_tags[msg]);
2368 		else
2369 			pr_warning("threads[%d]: failed to receive start notification from %d\n",
2370 				   thread->tid, rec->thread_data[t].tid);
2371 	}
2372 
2373 	sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
2374 			(cpu_set_t *)thread->mask->affinity.bits);
2375 
2376 	pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
2377 
2378 out_err:
2379 	pthread_attr_destroy(&attrs);
2380 
2381 	if (sigprocmask(SIG_SETMASK, &mask, NULL)) {
2382 		pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno));
2383 		ret = -1;
2384 	}
2385 
2386 	return ret;
2387 }
2388 
record__stop_threads(struct record * rec)2389 static int record__stop_threads(struct record *rec)
2390 {
2391 	int t;
2392 	struct record_thread *thread_data = rec->thread_data;
2393 
2394 	for (t = 1; t < rec->nr_threads; t++)
2395 		record__terminate_thread(&thread_data[t]);
2396 
2397 	for (t = 0; t < rec->nr_threads; t++) {
2398 		rec->samples += thread_data[t].samples;
2399 		if (!record__threads_enabled(rec))
2400 			continue;
2401 		rec->session->bytes_transferred += thread_data[t].bytes_transferred;
2402 		rec->session->bytes_compressed += thread_data[t].bytes_compressed;
2403 		pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid,
2404 			 thread_data[t].samples, thread_data[t].waking);
2405 		if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed)
2406 			pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n",
2407 				 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed);
2408 		else
2409 			pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written);
2410 	}
2411 
2412 	return 0;
2413 }
2414 
record__waking(struct record * rec)2415 static unsigned long record__waking(struct record *rec)
2416 {
2417 	int t;
2418 	unsigned long waking = 0;
2419 	struct record_thread *thread_data = rec->thread_data;
2420 
2421 	for (t = 0; t < rec->nr_threads; t++)
2422 		waking += thread_data[t].waking;
2423 
2424 	return waking;
2425 }
2426 
__cmd_record(struct record * rec,int argc,const char ** argv)2427 static int __cmd_record(struct record *rec, int argc, const char **argv)
2428 {
2429 	int err;
2430 	int status = 0;
2431 	const bool forks = argc > 0;
2432 	struct perf_tool *tool = &rec->tool;
2433 	struct record_opts *opts = &rec->opts;
2434 	struct perf_data *data = &rec->data;
2435 	struct perf_session *session;
2436 	bool disabled = false, draining = false;
2437 	int fd;
2438 	float ratio = 0;
2439 	enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
2440 	struct perf_env *env;
2441 
2442 	atexit(record__sig_exit);
2443 	signal(SIGCHLD, sig_handler);
2444 	signal(SIGINT, sig_handler);
2445 	signal(SIGTERM, sig_handler);
2446 	signal(SIGSEGV, sigsegv_handler);
2447 
2448 	if (rec->opts.record_cgroup) {
2449 #ifndef HAVE_FILE_HANDLE
2450 		pr_err("cgroup tracking is not supported\n");
2451 		return -1;
2452 #endif
2453 	}
2454 
2455 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
2456 		signal(SIGUSR2, snapshot_sig_handler);
2457 		if (rec->opts.auxtrace_snapshot_mode)
2458 			trigger_on(&auxtrace_snapshot_trigger);
2459 		if (rec->switch_output.enabled)
2460 			trigger_on(&switch_output_trigger);
2461 	} else {
2462 		signal(SIGUSR2, SIG_IGN);
2463 	}
2464 
2465 	perf_tool__init(tool, /*ordered_events=*/true);
2466 	tool->sample		= process_sample_event;
2467 	tool->fork		= perf_event__process_fork;
2468 	tool->exit		= perf_event__process_exit;
2469 	tool->comm		= perf_event__process_comm;
2470 	tool->namespaces	= perf_event__process_namespaces;
2471 	tool->mmap		= build_id__process_mmap;
2472 	tool->mmap2		= build_id__process_mmap2;
2473 	tool->itrace_start	= process_timestamp_boundary;
2474 	tool->aux		= process_timestamp_boundary;
2475 	tool->namespace_events	= rec->opts.record_namespaces;
2476 	tool->cgroup_events	= rec->opts.record_cgroup;
2477 	session = perf_session__new(data, tool);
2478 	if (IS_ERR(session)) {
2479 		pr_err("Perf session creation failed.\n");
2480 		return PTR_ERR(session);
2481 	}
2482 	env = perf_session__env(session);
2483 	if (record__threads_enabled(rec)) {
2484 		if (perf_data__is_pipe(&rec->data)) {
2485 			pr_err("Parallel trace streaming is not available in pipe mode.\n");
2486 			return -1;
2487 		}
2488 		if (rec->opts.full_auxtrace) {
2489 			pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n");
2490 			return -1;
2491 		}
2492 	}
2493 
2494 	fd = perf_data__fd(data);
2495 	rec->session = session;
2496 
2497 	if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
2498 		pr_err("Compression initialization failed.\n");
2499 		return -1;
2500 	}
2501 #ifdef HAVE_EVENTFD_SUPPORT
2502 	done_fd = eventfd(0, EFD_NONBLOCK);
2503 	if (done_fd < 0) {
2504 		pr_err("Failed to create wakeup eventfd, error: %m\n");
2505 		status = -1;
2506 		goto out_delete_session;
2507 	}
2508 	err = evlist__add_wakeup_eventfd(rec->evlist, done_fd);
2509 	if (err < 0) {
2510 		pr_err("Failed to add wakeup eventfd to poll list\n");
2511 		status = err;
2512 		goto out_delete_session;
2513 	}
2514 #endif // HAVE_EVENTFD_SUPPORT
2515 
2516 	env->comp_type  = PERF_COMP_ZSTD;
2517 	env->comp_level = rec->opts.comp_level;
2518 
2519 	if (rec->opts.kcore &&
2520 	    !record__kcore_readable(&session->machines.host)) {
2521 		pr_err("ERROR: kcore is not readable.\n");
2522 		return -1;
2523 	}
2524 
2525 	if (record__init_clock(rec))
2526 		return -1;
2527 
2528 	record__init_features(rec);
2529 
2530 	if (forks) {
2531 		err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe,
2532 					       workload_exec_failed_signal);
2533 		if (err < 0) {
2534 			pr_err("Couldn't run the workload!\n");
2535 			status = err;
2536 			goto out_delete_session;
2537 		}
2538 	}
2539 
2540 	/*
2541 	 * If we have just single event and are sending data
2542 	 * through pipe, we need to force the ids allocation,
2543 	 * because we synthesize event name through the pipe
2544 	 * and need the id for that.
2545 	 */
2546 	if (data->is_pipe && rec->evlist->core.nr_entries == 1)
2547 		rec->opts.sample_id = true;
2548 
2549 	if (rec->timestamp_filename && perf_data__is_pipe(data)) {
2550 		rec->timestamp_filename = false;
2551 		pr_warning("WARNING: --timestamp-filename option is not available in pipe mode.\n");
2552 	}
2553 
2554 	/*
2555 	 * Use global stat_config that is zero meaning aggr_mode is AGGR_NONE
2556 	 * and hybrid_merge is false.
2557 	 */
2558 	evlist__uniquify_evsel_names(rec->evlist, &stat_config);
2559 
2560 	evlist__config(rec->evlist, opts, &callchain_param);
2561 
2562 	/* Debug message used by test scripts */
2563 	pr_debug3("perf record opening and mmapping events\n");
2564 	if (record__open(rec) != 0) {
2565 		err = -1;
2566 		goto out_free_threads;
2567 	}
2568 	/* Debug message used by test scripts */
2569 	pr_debug3("perf record done opening and mmapping events\n");
2570 	env->comp_mmap_len = session->evlist->core.mmap_len;
2571 
2572 	if (rec->opts.kcore) {
2573 		err = record__kcore_copy(&session->machines.host, data);
2574 		if (err) {
2575 			pr_err("ERROR: Failed to copy kcore\n");
2576 			goto out_free_threads;
2577 		}
2578 	}
2579 
2580 	/*
2581 	 * Normally perf_session__new would do this, but it doesn't have the
2582 	 * evlist.
2583 	 */
2584 	if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) {
2585 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
2586 		rec->tool.ordered_events = false;
2587 	}
2588 
2589 	if (evlist__nr_groups(rec->evlist) == 0)
2590 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
2591 
2592 	if (data->is_pipe) {
2593 		err = perf_header__write_pipe(fd);
2594 		if (err < 0)
2595 			goto out_free_threads;
2596 	} else {
2597 		err = perf_session__write_header(session, rec->evlist, fd, false);
2598 		if (err < 0)
2599 			goto out_free_threads;
2600 	}
2601 
2602 	err = -1;
2603 	if (!rec->no_buildid
2604 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
2605 		pr_err("Couldn't generate buildids. "
2606 		       "Use --no-buildid to profile anyway.\n");
2607 		goto out_free_threads;
2608 	}
2609 
2610 	if (!evlist__needs_bpf_sb_event(rec->evlist))
2611 		opts->no_bpf_event = true;
2612 
2613 	err = record__setup_sb_evlist(rec);
2614 	if (err)
2615 		goto out_free_threads;
2616 
2617 	err = record__synthesize(rec, false);
2618 	if (err < 0)
2619 		goto out_free_threads;
2620 
2621 	if (rec->realtime_prio) {
2622 		struct sched_param param;
2623 
2624 		param.sched_priority = rec->realtime_prio;
2625 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
2626 			pr_err("Could not set realtime priority.\n");
2627 			err = -1;
2628 			goto out_free_threads;
2629 		}
2630 	}
2631 
2632 	if (record__start_threads(rec))
2633 		goto out_free_threads;
2634 
2635 	/*
2636 	 * When perf is starting the traced process, all the events
2637 	 * (apart from group members) have enable_on_exec=1 set,
2638 	 * so don't spoil it by prematurely enabling them.
2639 	 */
2640 	if (!target__none(&opts->target) && !opts->target.initial_delay)
2641 		evlist__enable(rec->evlist);
2642 
2643 	/*
2644 	 * offcpu-time does not call execve, so enable_on_exe wouldn't work
2645 	 * when recording a workload, do it manually
2646 	 */
2647 	if (rec->off_cpu)
2648 		evlist__enable_evsel(rec->evlist, (char *)OFFCPU_EVENT);
2649 
2650 	/*
2651 	 * Let the child rip
2652 	 */
2653 	if (forks) {
2654 		struct machine *machine = &session->machines.host;
2655 		union perf_event *event;
2656 		pid_t tgid;
2657 
2658 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
2659 		if (event == NULL) {
2660 			err = -ENOMEM;
2661 			goto out_child;
2662 		}
2663 
2664 		/*
2665 		 * Some H/W events are generated before COMM event
2666 		 * which is emitted during exec(), so perf script
2667 		 * cannot see a correct process name for those events.
2668 		 * Synthesize COMM event to prevent it.
2669 		 */
2670 		tgid = perf_event__synthesize_comm(tool, event,
2671 						   rec->evlist->workload.pid,
2672 						   process_synthesized_event,
2673 						   machine);
2674 		free(event);
2675 
2676 		if (tgid == -1)
2677 			goto out_child;
2678 
2679 		event = malloc(sizeof(event->namespaces) +
2680 			       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
2681 			       machine->id_hdr_size);
2682 		if (event == NULL) {
2683 			err = -ENOMEM;
2684 			goto out_child;
2685 		}
2686 
2687 		/*
2688 		 * Synthesize NAMESPACES event for the command specified.
2689 		 */
2690 		perf_event__synthesize_namespaces(tool, event,
2691 						  rec->evlist->workload.pid,
2692 						  tgid, process_synthesized_event,
2693 						  machine);
2694 		free(event);
2695 
2696 		evlist__start_workload(rec->evlist);
2697 	}
2698 
2699 	if (opts->target.initial_delay) {
2700 		pr_info(EVLIST_DISABLED_MSG);
2701 		if (opts->target.initial_delay > 0) {
2702 			usleep(opts->target.initial_delay * USEC_PER_MSEC);
2703 			evlist__enable(rec->evlist);
2704 			pr_info(EVLIST_ENABLED_MSG);
2705 		}
2706 	}
2707 
2708 	err = event_enable_timer__start(rec->evlist->eet);
2709 	if (err)
2710 		goto out_child;
2711 
2712 	/* Debug message used by test scripts */
2713 	pr_debug3("perf record has started\n");
2714 	fflush(stderr);
2715 
2716 	trigger_ready(&auxtrace_snapshot_trigger);
2717 	trigger_ready(&switch_output_trigger);
2718 	perf_hooks__invoke_record_start();
2719 
2720 	/*
2721 	 * Must write FINISHED_INIT so it will be seen after all other
2722 	 * synthesized user events, but before any regular events.
2723 	 */
2724 	err = write_finished_init(rec, false);
2725 	if (err < 0)
2726 		goto out_child;
2727 
2728 	for (;;) {
2729 		unsigned long long hits = thread->samples;
2730 
2731 		/*
2732 		 * rec->evlist->bkw_mmap_state is possible to be
2733 		 * BKW_MMAP_EMPTY here: when done == true and
2734 		 * hits != rec->samples in previous round.
2735 		 *
2736 		 * evlist__toggle_bkw_mmap ensure we never
2737 		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
2738 		 */
2739 		if (trigger_is_hit(&switch_output_trigger) || done || draining)
2740 			evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
2741 
2742 		if (record__mmap_read_all(rec, false) < 0) {
2743 			trigger_error(&auxtrace_snapshot_trigger);
2744 			trigger_error(&switch_output_trigger);
2745 			err = -1;
2746 			goto out_child;
2747 		}
2748 
2749 		if (auxtrace_record__snapshot_started) {
2750 			auxtrace_record__snapshot_started = 0;
2751 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
2752 				record__read_auxtrace_snapshot(rec, false);
2753 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
2754 				pr_err("AUX area tracing snapshot failed\n");
2755 				err = -1;
2756 				goto out_child;
2757 			}
2758 		}
2759 
2760 		if (trigger_is_hit(&switch_output_trigger)) {
2761 			/*
2762 			 * If switch_output_trigger is hit, the data in
2763 			 * overwritable ring buffer should have been collected,
2764 			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
2765 			 *
2766 			 * If SIGUSR2 raise after or during record__mmap_read_all(),
2767 			 * record__mmap_read_all() didn't collect data from
2768 			 * overwritable ring buffer. Read again.
2769 			 */
2770 			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
2771 				continue;
2772 			trigger_ready(&switch_output_trigger);
2773 
2774 			/*
2775 			 * Reenable events in overwrite ring buffer after
2776 			 * record__mmap_read_all(): we should have collected
2777 			 * data from it.
2778 			 */
2779 			evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
2780 
2781 			if (!quiet)
2782 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
2783 					record__waking(rec));
2784 			thread->waking = 0;
2785 			fd = record__switch_output(rec, false);
2786 			if (fd < 0) {
2787 				pr_err("Failed to switch to new file\n");
2788 				trigger_error(&switch_output_trigger);
2789 				err = fd;
2790 				goto out_child;
2791 			}
2792 
2793 			/* re-arm the alarm */
2794 			if (rec->switch_output.time)
2795 				alarm(rec->switch_output.time);
2796 		}
2797 
2798 		if (hits == thread->samples) {
2799 			if (done || draining)
2800 				break;
2801 			err = fdarray__poll(&thread->pollfd, -1);
2802 			/*
2803 			 * Propagate error, only if there's any. Ignore positive
2804 			 * number of returned events and interrupt error.
2805 			 */
2806 			if (err > 0 || (err < 0 && errno == EINTR))
2807 				err = 0;
2808 			thread->waking++;
2809 
2810 			if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP,
2811 					    record__thread_munmap_filtered, NULL) == 0)
2812 				draining = true;
2813 
2814 			err = record__update_evlist_pollfd_from_thread(rec, rec->evlist, thread);
2815 			if (err)
2816 				goto out_child;
2817 		}
2818 
2819 		if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) {
2820 			switch (cmd) {
2821 			case EVLIST_CTL_CMD_SNAPSHOT:
2822 				hit_auxtrace_snapshot_trigger(rec);
2823 				evlist__ctlfd_ack(rec->evlist);
2824 				break;
2825 			case EVLIST_CTL_CMD_STOP:
2826 				done = 1;
2827 				break;
2828 			case EVLIST_CTL_CMD_ACK:
2829 			case EVLIST_CTL_CMD_UNSUPPORTED:
2830 			case EVLIST_CTL_CMD_ENABLE:
2831 			case EVLIST_CTL_CMD_DISABLE:
2832 			case EVLIST_CTL_CMD_EVLIST:
2833 			case EVLIST_CTL_CMD_PING:
2834 			default:
2835 				break;
2836 			}
2837 		}
2838 
2839 		err = event_enable_timer__process(rec->evlist->eet);
2840 		if (err < 0)
2841 			goto out_child;
2842 		if (err) {
2843 			err = 0;
2844 			done = 1;
2845 		}
2846 
2847 		/*
2848 		 * When perf is starting the traced process, at the end events
2849 		 * die with the process and we wait for that. Thus no need to
2850 		 * disable events in this case.
2851 		 */
2852 		if (done && !disabled && !target__none(&opts->target)) {
2853 			trigger_off(&auxtrace_snapshot_trigger);
2854 			evlist__disable(rec->evlist);
2855 			disabled = true;
2856 		}
2857 	}
2858 
2859 	trigger_off(&auxtrace_snapshot_trigger);
2860 	trigger_off(&switch_output_trigger);
2861 
2862 	record__synthesize_final_bpf_metadata(rec);
2863 
2864 	if (opts->auxtrace_snapshot_on_exit)
2865 		record__auxtrace_snapshot_exit(rec);
2866 
2867 	if (forks && workload_exec_errno) {
2868 		char msg[STRERR_BUFSIZE];
2869 		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
2870 		struct strbuf sb = STRBUF_INIT;
2871 
2872 		evlist__format_evsels(rec->evlist, &sb, 2048);
2873 
2874 		pr_err("Failed to collect '%s' for the '%s' workload: %s\n",
2875 			sb.buf, argv[0], emsg);
2876 		strbuf_release(&sb);
2877 		err = -1;
2878 		goto out_child;
2879 	}
2880 
2881 	if (!quiet)
2882 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n",
2883 			record__waking(rec));
2884 
2885 	write_finished_init(rec, true);
2886 
2887 	if (target__none(&rec->opts.target))
2888 		record__synthesize_workload(rec, true);
2889 
2890 out_child:
2891 	record__stop_threads(rec);
2892 	record__mmap_read_all(rec, true);
2893 out_free_threads:
2894 	record__free_thread_data(rec);
2895 	evlist__finalize_ctlfd(rec->evlist);
2896 	record__aio_mmap_read_sync(rec);
2897 
2898 	if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
2899 		ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
2900 		env->comp_ratio = ratio + 0.5;
2901 	}
2902 
2903 	if (forks) {
2904 		int exit_status;
2905 
2906 		if (!child_finished)
2907 			kill(rec->evlist->workload.pid, SIGTERM);
2908 
2909 		wait(&exit_status);
2910 
2911 		if (err < 0)
2912 			status = err;
2913 		else if (WIFEXITED(exit_status))
2914 			status = WEXITSTATUS(exit_status);
2915 		else if (WIFSIGNALED(exit_status))
2916 			signr = WTERMSIG(exit_status);
2917 	} else
2918 		status = err;
2919 
2920 	if (rec->off_cpu)
2921 		rec->bytes_written += off_cpu_write(rec->session);
2922 
2923 	record__read_lost_samples(rec);
2924 	/* this will be recalculated during process_buildids() */
2925 	rec->samples = 0;
2926 
2927 	if (!err) {
2928 		record__synthesize(rec, true);
2929 		if (!rec->timestamp_filename) {
2930 			record__finish_output(rec);
2931 		} else {
2932 			fd = record__switch_output(rec, true);
2933 			if (fd < 0) {
2934 				status = fd;
2935 				goto out_delete_session;
2936 			}
2937 		}
2938 	}
2939 
2940 	perf_hooks__invoke_record_end();
2941 
2942 	if (!err && !quiet) {
2943 		char samples[128];
2944 		const char *postfix = rec->timestamp_filename ?
2945 					".<timestamp>" : "";
2946 
2947 		if (rec->samples && !rec->opts.full_auxtrace)
2948 			scnprintf(samples, sizeof(samples),
2949 				  " (%" PRIu64 " samples)", rec->samples);
2950 		else
2951 			samples[0] = '\0';
2952 
2953 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s",
2954 			perf_data__size(data) / 1024.0 / 1024.0,
2955 			data->path, postfix, samples);
2956 		if (ratio) {
2957 			fprintf(stderr,	", compressed (original %.3f MB, ratio is %.3f)",
2958 					rec->session->bytes_transferred / 1024.0 / 1024.0,
2959 					ratio);
2960 		}
2961 		fprintf(stderr, " ]\n");
2962 	}
2963 
2964 out_delete_session:
2965 #ifdef HAVE_EVENTFD_SUPPORT
2966 	if (done_fd >= 0) {
2967 		fd = done_fd;
2968 		done_fd = -1;
2969 
2970 		close(fd);
2971 	}
2972 #endif
2973 	zstd_fini(&session->zstd_data);
2974 	if (!opts->no_bpf_event)
2975 		evlist__stop_sb_thread(rec->sb_evlist);
2976 
2977 	perf_session__delete(session);
2978 	return status;
2979 }
2980 
callchain_debug(struct callchain_param * callchain)2981 static void callchain_debug(struct callchain_param *callchain)
2982 {
2983 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
2984 
2985 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
2986 
2987 	if (callchain->record_mode == CALLCHAIN_DWARF)
2988 		pr_debug("callchain: stack dump size %d\n",
2989 			 callchain->dump_size);
2990 }
2991 
record_opts__parse_callchain(struct record_opts * record,struct callchain_param * callchain,const char * arg,bool unset)2992 int record_opts__parse_callchain(struct record_opts *record,
2993 				 struct callchain_param *callchain,
2994 				 const char *arg, bool unset)
2995 {
2996 	int ret;
2997 	callchain->enabled = !unset;
2998 
2999 	/* --no-call-graph */
3000 	if (unset) {
3001 		callchain->record_mode = CALLCHAIN_NONE;
3002 		pr_debug("callchain: disabled\n");
3003 		return 0;
3004 	}
3005 
3006 	ret = parse_callchain_record_opt(arg, callchain);
3007 	if (!ret) {
3008 		/* Enable data address sampling for DWARF unwind. */
3009 		if (callchain->record_mode == CALLCHAIN_DWARF)
3010 			record->sample_address = true;
3011 		callchain_debug(callchain);
3012 	}
3013 
3014 	return ret;
3015 }
3016 
record_parse_callchain_opt(const struct option * opt,const char * arg,int unset)3017 int record_parse_callchain_opt(const struct option *opt,
3018 			       const char *arg,
3019 			       int unset)
3020 {
3021 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
3022 }
3023 
record_callchain_opt(const struct option * opt,const char * arg __maybe_unused,int unset __maybe_unused)3024 int record_callchain_opt(const struct option *opt,
3025 			 const char *arg __maybe_unused,
3026 			 int unset __maybe_unused)
3027 {
3028 	struct callchain_param *callchain = opt->value;
3029 
3030 	callchain->enabled = true;
3031 
3032 	if (callchain->record_mode == CALLCHAIN_NONE)
3033 		callchain->record_mode = CALLCHAIN_FP;
3034 
3035 	callchain_debug(callchain);
3036 	return 0;
3037 }
3038 
perf_record_config(const char * var,const char * value,void * cb)3039 static int perf_record_config(const char *var, const char *value, void *cb)
3040 {
3041 	struct record *rec = cb;
3042 
3043 	if (!strcmp(var, "record.build-id")) {
3044 		if (!strcmp(value, "cache"))
3045 			rec->no_buildid_cache = false;
3046 		else if (!strcmp(value, "no-cache"))
3047 			rec->no_buildid_cache = true;
3048 		else if (!strcmp(value, "skip"))
3049 			rec->no_buildid = rec->no_buildid_cache = true;
3050 		else if (!strcmp(value, "mmap"))
3051 			rec->buildid_mmap = true;
3052 		else if (!strcmp(value, "no-mmap"))
3053 			rec->buildid_mmap = false;
3054 		else
3055 			return -1;
3056 		return 0;
3057 	}
3058 	if (!strcmp(var, "record.call-graph")) {
3059 		var = "call-graph.record-mode";
3060 		return perf_default_config(var, value, cb);
3061 	}
3062 #ifdef HAVE_AIO_SUPPORT
3063 	if (!strcmp(var, "record.aio")) {
3064 		rec->opts.nr_cblocks = strtol(value, NULL, 0);
3065 		if (!rec->opts.nr_cblocks)
3066 			rec->opts.nr_cblocks = nr_cblocks_default;
3067 	}
3068 #endif
3069 	if (!strcmp(var, "record.debuginfod")) {
3070 		rec->debuginfod.urls = strdup(value);
3071 		if (!rec->debuginfod.urls)
3072 			return -ENOMEM;
3073 		rec->debuginfod.set = true;
3074 	}
3075 
3076 	return 0;
3077 }
3078 
record__parse_event_enable_time(const struct option * opt,const char * str,int unset)3079 static int record__parse_event_enable_time(const struct option *opt, const char *str, int unset)
3080 {
3081 	struct record *rec = (struct record *)opt->value;
3082 
3083 	return evlist__parse_event_enable_time(rec->evlist, &rec->opts, str, unset);
3084 }
3085 
record__parse_affinity(const struct option * opt,const char * str,int unset)3086 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
3087 {
3088 	struct record_opts *opts = (struct record_opts *)opt->value;
3089 
3090 	if (unset || !str)
3091 		return 0;
3092 
3093 	if (!strcasecmp(str, "node"))
3094 		opts->affinity = PERF_AFFINITY_NODE;
3095 	else if (!strcasecmp(str, "cpu"))
3096 		opts->affinity = PERF_AFFINITY_CPU;
3097 
3098 	return 0;
3099 }
3100 
record__mmap_cpu_mask_alloc(struct mmap_cpu_mask * mask,int nr_bits)3101 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits)
3102 {
3103 	mask->nbits = nr_bits;
3104 	mask->bits = bitmap_zalloc(mask->nbits);
3105 	if (!mask->bits)
3106 		return -ENOMEM;
3107 
3108 	return 0;
3109 }
3110 
record__mmap_cpu_mask_free(struct mmap_cpu_mask * mask)3111 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask)
3112 {
3113 	bitmap_free(mask->bits);
3114 	mask->nbits = 0;
3115 }
3116 
record__thread_mask_alloc(struct thread_mask * mask,int nr_bits)3117 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits)
3118 {
3119 	int ret;
3120 
3121 	ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits);
3122 	if (ret) {
3123 		mask->affinity.bits = NULL;
3124 		return ret;
3125 	}
3126 
3127 	ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits);
3128 	if (ret) {
3129 		record__mmap_cpu_mask_free(&mask->maps);
3130 		mask->maps.bits = NULL;
3131 	}
3132 
3133 	return ret;
3134 }
3135 
record__thread_mask_free(struct thread_mask * mask)3136 static void record__thread_mask_free(struct thread_mask *mask)
3137 {
3138 	record__mmap_cpu_mask_free(&mask->maps);
3139 	record__mmap_cpu_mask_free(&mask->affinity);
3140 }
3141 
record__parse_threads(const struct option * opt,const char * str,int unset)3142 static int record__parse_threads(const struct option *opt, const char *str, int unset)
3143 {
3144 	int s;
3145 	struct record_opts *opts = opt->value;
3146 
3147 	if (unset || !str || !strlen(str)) {
3148 		opts->threads_spec = THREAD_SPEC__CPU;
3149 	} else {
3150 		for (s = 1; s < THREAD_SPEC__MAX; s++) {
3151 			if (s == THREAD_SPEC__USER) {
3152 				opts->threads_user_spec = strdup(str);
3153 				if (!opts->threads_user_spec)
3154 					return -ENOMEM;
3155 				opts->threads_spec = THREAD_SPEC__USER;
3156 				break;
3157 			}
3158 			if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) {
3159 				opts->threads_spec = s;
3160 				break;
3161 			}
3162 		}
3163 	}
3164 
3165 	if (opts->threads_spec == THREAD_SPEC__USER)
3166 		pr_debug("threads_spec: %s\n", opts->threads_user_spec);
3167 	else
3168 		pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]);
3169 
3170 	return 0;
3171 }
3172 
parse_output_max_size(const struct option * opt,const char * str,int unset)3173 static int parse_output_max_size(const struct option *opt,
3174 				 const char *str, int unset)
3175 {
3176 	unsigned long *s = (unsigned long *)opt->value;
3177 	static struct parse_tag tags_size[] = {
3178 		{ .tag  = 'B', .mult = 1       },
3179 		{ .tag  = 'K', .mult = 1 << 10 },
3180 		{ .tag  = 'M', .mult = 1 << 20 },
3181 		{ .tag  = 'G', .mult = 1 << 30 },
3182 		{ .tag  = 0 },
3183 	};
3184 	unsigned long val;
3185 
3186 	if (unset) {
3187 		*s = 0;
3188 		return 0;
3189 	}
3190 
3191 	val = parse_tag_value(str, tags_size);
3192 	if (val != (unsigned long) -1) {
3193 		*s = val;
3194 		return 0;
3195 	}
3196 
3197 	return -1;
3198 }
3199 
record__parse_mmap_pages(const struct option * opt,const char * str,int unset __maybe_unused)3200 static int record__parse_mmap_pages(const struct option *opt,
3201 				    const char *str,
3202 				    int unset __maybe_unused)
3203 {
3204 	struct record_opts *opts = opt->value;
3205 	char *s, *p;
3206 	unsigned int mmap_pages;
3207 	int ret;
3208 
3209 	if (!str)
3210 		return -EINVAL;
3211 
3212 	s = strdup(str);
3213 	if (!s)
3214 		return -ENOMEM;
3215 
3216 	p = strchr(s, ',');
3217 	if (p)
3218 		*p = '\0';
3219 
3220 	if (*s) {
3221 		ret = __evlist__parse_mmap_pages(&mmap_pages, s);
3222 		if (ret)
3223 			goto out_free;
3224 		opts->mmap_pages = mmap_pages;
3225 	}
3226 
3227 	if (!p) {
3228 		ret = 0;
3229 		goto out_free;
3230 	}
3231 
3232 	ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1);
3233 	if (ret)
3234 		goto out_free;
3235 
3236 	opts->auxtrace_mmap_pages = mmap_pages;
3237 
3238 out_free:
3239 	free(s);
3240 	return ret;
3241 }
3242 
record__parse_off_cpu_thresh(const struct option * opt,const char * str,int unset __maybe_unused)3243 static int record__parse_off_cpu_thresh(const struct option *opt,
3244 					const char *str,
3245 					int unset __maybe_unused)
3246 {
3247 	struct record_opts *opts = opt->value;
3248 	char *endptr;
3249 	u64 off_cpu_thresh_ms;
3250 
3251 	if (!str)
3252 		return -EINVAL;
3253 
3254 	off_cpu_thresh_ms = strtoull(str, &endptr, 10);
3255 
3256 	/* the threshold isn't string "0", yet strtoull() returns 0, parsing failed */
3257 	if (*endptr || (off_cpu_thresh_ms == 0 && strcmp(str, "0")))
3258 		return -EINVAL;
3259 	else
3260 		opts->off_cpu_thresh_ns = off_cpu_thresh_ms * NSEC_PER_MSEC;
3261 
3262 	return 0;
3263 }
3264 
arch__add_leaf_frame_record_opts(struct record_opts * opts __maybe_unused)3265 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
3266 {
3267 }
3268 
parse_control_option(const struct option * opt,const char * str,int unset __maybe_unused)3269 static int parse_control_option(const struct option *opt,
3270 				const char *str,
3271 				int unset __maybe_unused)
3272 {
3273 	struct record_opts *opts = opt->value;
3274 
3275 	return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close);
3276 }
3277 
switch_output_size_warn(struct record * rec)3278 static void switch_output_size_warn(struct record *rec)
3279 {
3280 	u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
3281 	struct switch_output *s = &rec->switch_output;
3282 
3283 	wakeup_size /= 2;
3284 
3285 	if (s->size < wakeup_size) {
3286 		char buf[100];
3287 
3288 		unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
3289 		pr_warning("WARNING: switch-output data size lower than "
3290 			   "wakeup kernel buffer size (%s) "
3291 			   "expect bigger perf.data sizes\n", buf);
3292 	}
3293 }
3294 
switch_output_setup(struct record * rec)3295 static int switch_output_setup(struct record *rec)
3296 {
3297 	struct switch_output *s = &rec->switch_output;
3298 	static struct parse_tag tags_size[] = {
3299 		{ .tag  = 'B', .mult = 1       },
3300 		{ .tag  = 'K', .mult = 1 << 10 },
3301 		{ .tag  = 'M', .mult = 1 << 20 },
3302 		{ .tag  = 'G', .mult = 1 << 30 },
3303 		{ .tag  = 0 },
3304 	};
3305 	static struct parse_tag tags_time[] = {
3306 		{ .tag  = 's', .mult = 1        },
3307 		{ .tag  = 'm', .mult = 60       },
3308 		{ .tag  = 'h', .mult = 60*60    },
3309 		{ .tag  = 'd', .mult = 60*60*24 },
3310 		{ .tag  = 0 },
3311 	};
3312 	unsigned long val;
3313 
3314 	/*
3315 	 * If we're using --switch-output-events, then we imply its
3316 	 * --switch-output=signal, as we'll send a SIGUSR2 from the side band
3317 	 *  thread to its parent.
3318 	 */
3319 	if (rec->switch_output_event_set) {
3320 		if (record__threads_enabled(rec)) {
3321 			pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n");
3322 			return 0;
3323 		}
3324 		goto do_signal;
3325 	}
3326 
3327 	if (!s->set)
3328 		return 0;
3329 
3330 	if (record__threads_enabled(rec)) {
3331 		pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n");
3332 		return 0;
3333 	}
3334 
3335 	if (!strcmp(s->str, "signal")) {
3336 do_signal:
3337 		s->signal = true;
3338 		pr_debug("switch-output with SIGUSR2 signal\n");
3339 		goto enabled;
3340 	}
3341 
3342 	val = parse_tag_value(s->str, tags_size);
3343 	if (val != (unsigned long) -1) {
3344 		s->size = val;
3345 		pr_debug("switch-output with %s size threshold\n", s->str);
3346 		goto enabled;
3347 	}
3348 
3349 	val = parse_tag_value(s->str, tags_time);
3350 	if (val != (unsigned long) -1) {
3351 		s->time = val;
3352 		pr_debug("switch-output with %s time threshold (%lu seconds)\n",
3353 			 s->str, s->time);
3354 		goto enabled;
3355 	}
3356 
3357 	return -1;
3358 
3359 enabled:
3360 	rec->timestamp_filename = true;
3361 	s->enabled              = true;
3362 
3363 	if (s->size && !rec->opts.no_buffering)
3364 		switch_output_size_warn(rec);
3365 
3366 	return 0;
3367 }
3368 
3369 static const char * const __record_usage[] = {
3370 	"perf record [<options>] [<command>]",
3371 	"perf record [<options>] -- <command> [<options>]",
3372 	NULL
3373 };
3374 const char * const *record_usage = __record_usage;
3375 
build_id__process_mmap(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)3376 static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event,
3377 				  struct perf_sample *sample, struct machine *machine)
3378 {
3379 	/*
3380 	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3381 	 * no need to add them twice.
3382 	 */
3383 	if (!(event->header.misc & PERF_RECORD_MISC_USER))
3384 		return 0;
3385 	return perf_event__process_mmap(tool, event, sample, machine);
3386 }
3387 
build_id__process_mmap2(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)3388 static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event,
3389 				   struct perf_sample *sample, struct machine *machine)
3390 {
3391 	/*
3392 	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3393 	 * no need to add them twice.
3394 	 */
3395 	if (!(event->header.misc & PERF_RECORD_MISC_USER))
3396 		return 0;
3397 
3398 	return perf_event__process_mmap2(tool, event, sample, machine);
3399 }
3400 
process_timestamp_boundary(const struct perf_tool * tool,union perf_event * event __maybe_unused,struct perf_sample * sample,struct machine * machine __maybe_unused)3401 static int process_timestamp_boundary(const struct perf_tool *tool,
3402 				      union perf_event *event __maybe_unused,
3403 				      struct perf_sample *sample,
3404 				      struct machine *machine __maybe_unused)
3405 {
3406 	struct record *rec = container_of(tool, struct record, tool);
3407 
3408 	set_timestamp_boundary(rec, sample->time);
3409 	return 0;
3410 }
3411 
parse_record_synth_option(const struct option * opt,const char * str,int unset __maybe_unused)3412 static int parse_record_synth_option(const struct option *opt,
3413 				     const char *str,
3414 				     int unset __maybe_unused)
3415 {
3416 	struct record_opts *opts = opt->value;
3417 	char *p = strdup(str);
3418 
3419 	if (p == NULL)
3420 		return -1;
3421 
3422 	opts->synth = parse_synth_opt(p);
3423 	free(p);
3424 
3425 	if (opts->synth < 0) {
3426 		pr_err("Invalid synth option: %s\n", str);
3427 		return -1;
3428 	}
3429 	return 0;
3430 }
3431 
3432 /*
3433  * XXX Ideally would be local to cmd_record() and passed to a record__new
3434  * because we need to have access to it in record__exit, that is called
3435  * after cmd_record() exits, but since record_options need to be accessible to
3436  * builtin-script, leave it here.
3437  *
3438  * At least we don't ouch it in all the other functions here directly.
3439  *
3440  * Just say no to tons of global variables, sigh.
3441  */
3442 static struct record record = {
3443 	.opts = {
3444 		.sample_time	     = true,
3445 		.mmap_pages	     = UINT_MAX,
3446 		.user_freq	     = UINT_MAX,
3447 		.user_interval	     = ULLONG_MAX,
3448 		.freq		     = 4000,
3449 		.target		     = {
3450 			.uses_mmap   = true,
3451 			.default_per_cpu = true,
3452 		},
3453 		.mmap_flush          = MMAP_FLUSH_DEFAULT,
3454 		.nr_threads_synthesize = 1,
3455 		.ctl_fd              = -1,
3456 		.ctl_fd_ack          = -1,
3457 		.synth               = PERF_SYNTH_ALL,
3458 		.off_cpu_thresh_ns   = OFFCPU_THRESH,
3459 	},
3460 	.buildid_mmap = true,
3461 };
3462 
3463 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
3464 	"\n\t\t\t\tDefault: fp";
3465 
3466 static bool dry_run;
3467 
3468 static struct parse_events_option_args parse_events_option_args = {
3469 	.evlistp = &record.evlist,
3470 };
3471 
3472 static struct parse_events_option_args switch_output_parse_events_option_args = {
3473 	.evlistp = &record.sb_evlist,
3474 };
3475 
3476 /*
3477  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
3478  * with it and switch to use the library functions in perf_evlist that came
3479  * from builtin-record.c, i.e. use record_opts,
3480  * evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
3481  * using pipes, etc.
3482  */
3483 static struct option __record_options[] = {
3484 	OPT_CALLBACK('e', "event", &parse_events_option_args, "event",
3485 		     "event selector. use 'perf list' to list available events",
3486 		     parse_events_option),
3487 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
3488 		     "event filter", parse_filter),
3489 	OPT_BOOLEAN(0, "latency", &record.latency,
3490 		    "Enable data collection for latency profiling.\n"
3491 		    "\t\t\t  Use perf report --latency for latency-centric profile."),
3492 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
3493 			   NULL, "don't record events from perf itself",
3494 			   exclude_perf),
3495 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
3496 		    "record events on existing process id"),
3497 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
3498 		    "record events on existing thread id"),
3499 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
3500 		    "collect data with this RT SCHED_FIFO priority"),
3501 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
3502 		    "collect data without buffering"),
3503 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
3504 		    "collect raw sample records from all opened counters"),
3505 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
3506 			    "system-wide collection from all CPUs"),
3507 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
3508 		    "list of cpus to monitor"),
3509 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
3510 	OPT_STRING('o', "output", &record.data.path, "file",
3511 		    "output file name"),
3512 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
3513 			&record.opts.no_inherit_set,
3514 			"child tasks do not inherit counters"),
3515 	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
3516 		    "synthesize non-sample events at the end of output"),
3517 	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
3518 	OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"),
3519 	OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
3520 		    "Fail if the specified frequency can't be used"),
3521 	OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
3522 		     "profile at this frequency",
3523 		      record__parse_freq),
3524 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
3525 		     "number of mmap data pages and AUX area tracing mmap pages",
3526 		     record__parse_mmap_pages),
3527 	OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
3528 		     "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
3529 		     record__mmap_flush_parse),
3530 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
3531 			   NULL, "enables call-graph recording" ,
3532 			   &record_callchain_opt),
3533 	OPT_CALLBACK(0, "call-graph", &record.opts,
3534 		     "record_mode[,record_size]", record_callchain_help,
3535 		     &record_parse_callchain_opt),
3536 	OPT_INCR('v', "verbose", &verbose,
3537 		    "be more verbose (show counter open errors, etc)"),
3538 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"),
3539 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
3540 		    "per thread counts"),
3541 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
3542 	OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
3543 		    "Record the sample physical addresses"),
3544 	OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size,
3545 		    "Record the sampled data address data page size"),
3546 	OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size,
3547 		    "Record the sampled code address (ip) page size"),
3548 	OPT_BOOLEAN(0, "sample-mem-info", &record.opts.sample_data_src,
3549 		    "Record the data source for memory operations"),
3550 	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
3551 	OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier,
3552 		    "Record the sample identifier"),
3553 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
3554 			&record.opts.sample_time_set,
3555 			"Record the sample timestamps"),
3556 	OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
3557 			"Record the sample period"),
3558 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
3559 		    "don't sample"),
3560 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
3561 			&record.no_buildid_cache_set,
3562 			"do not update the buildid cache"),
3563 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
3564 			&record.no_buildid_set,
3565 			"do not collect buildids in perf.data"),
3566 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
3567 		     "monitor event in cgroup name only",
3568 		     parse_cgroups),
3569 	OPT_CALLBACK('D', "delay", &record, "ms",
3570 		     "ms to wait before starting measurement after program start (-1: start with events disabled), "
3571 		     "or ranges of time to enable events e.g. '-D 10-20,30-40'",
3572 		     record__parse_event_enable_time),
3573 	OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
3574 	OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"),
3575 
3576 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
3577 		     "branch any", "sample any taken branches",
3578 		     parse_branch_stack),
3579 
3580 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
3581 		     "branch filter mask", "branch stack filter modes",
3582 		     parse_branch_stack),
3583 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
3584 		    "sample by weight (on special events only)"),
3585 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
3586 		    "sample transaction flags (special events only)"),
3587 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
3588 		    "use per-thread mmaps"),
3589 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
3590 		    "sample selected machine registers on interrupt,"
3591 		    " use '-I?' to list register names", parse_intr_regs),
3592 	OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
3593 		    "sample selected machine registers in user space,"
3594 		    " use '--user-regs=?' to list register names", parse_user_regs),
3595 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
3596 		    "Record running/enabled time of read (:S) events"),
3597 	OPT_CALLBACK('k', "clockid", &record.opts,
3598 	"clockid", "clockid to use for events, see clock_gettime()",
3599 	parse_clockid),
3600 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
3601 			  "opts", "AUX area tracing Snapshot Mode", ""),
3602 	OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
3603 			  "opts", "sample AUX area", ""),
3604 	OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
3605 			"per thread proc mmap processing timeout in ms"),
3606 	OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
3607 		    "Record namespaces events"),
3608 	OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
3609 		    "Record cgroup events"),
3610 	OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events,
3611 			&record.opts.record_switch_events_set,
3612 			"Record context switch events"),
3613 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
3614 			 "Configure all used events to run in kernel space.",
3615 			 PARSE_OPT_EXCLUSIVE),
3616 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
3617 			 "Configure all used events to run in user space.",
3618 			 PARSE_OPT_EXCLUSIVE),
3619 	OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
3620 		    "collect kernel callchains"),
3621 	OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
3622 		    "collect user callchains"),
3623 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
3624 		   "file", "vmlinux pathname"),
3625 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
3626 		    "Record build-id of all DSOs regardless of hits"),
3627 	OPT_BOOLEAN_SET(0, "buildid-mmap", &record.buildid_mmap, &record.buildid_mmap_set,
3628 			"Record build-id in mmap events and skip build-id processing."),
3629 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
3630 		    "append timestamp to output filename"),
3631 	OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
3632 		    "Record timestamp boundary (time of first/last samples)"),
3633 	OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
3634 			  &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
3635 			  "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
3636 			  "signal"),
3637 	OPT_CALLBACK_SET(0, "switch-output-event", &switch_output_parse_events_option_args,
3638 			 &record.switch_output_event_set, "switch output event",
3639 			 "switch output event selector. use 'perf list' to list available events",
3640 			 parse_events_option_new_evlist),
3641 	OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
3642 		   "Limit number of switch output generated files"),
3643 	OPT_BOOLEAN(0, "dry-run", &dry_run,
3644 		    "Parse options then exit"),
3645 #ifdef HAVE_AIO_SUPPORT
3646 	OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
3647 		     &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
3648 		     record__aio_parse),
3649 #endif
3650 	OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
3651 		     "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
3652 		     record__parse_affinity),
3653 #ifdef HAVE_ZSTD_SUPPORT
3654 	OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n",
3655 			    "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
3656 			    record__parse_comp_level),
3657 #endif
3658 	OPT_CALLBACK(0, "max-size", &record.output_max_size,
3659 		     "size", "Limit the maximum size of the output file", parse_output_max_size),
3660 	OPT_UINTEGER(0, "num-thread-synthesize",
3661 		     &record.opts.nr_threads_synthesize,
3662 		     "number of threads to run for event synthesis"),
3663 #ifdef HAVE_LIBPFM
3664 	OPT_CALLBACK(0, "pfm-events", &record.evlist, "event",
3665 		"libpfm4 event selector. use 'perf list' to list available events",
3666 		parse_libpfm_events_option),
3667 #endif
3668 	OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
3669 		     "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n"
3670 		     "\t\t\t  'snapshot': AUX area tracing snapshot).\n"
3671 		     "\t\t\t  Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
3672 		     "\t\t\t  Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
3673 		      parse_control_option),
3674 	OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup",
3675 		     "Fine-tune event synthesis: default=all", parse_record_synth_option),
3676 	OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls,
3677 			  &record.debuginfod.set, "debuginfod urls",
3678 			  "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
3679 			  "system"),
3680 	OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec",
3681 			    "write collected trace data into several data files using parallel threads",
3682 			    record__parse_threads),
3683 	OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"),
3684 	OPT_STRING(0, "setup-filter", &record.filter_action, "pin|unpin",
3685 		   "BPF filter action"),
3686 	OPT_CALLBACK(0, "off-cpu-thresh", &record.opts, "ms",
3687 		     "Dump off-cpu samples if off-cpu time exceeds this threshold (in milliseconds). (Default: 500ms)",
3688 		     record__parse_off_cpu_thresh),
3689 	OPT_END()
3690 };
3691 
3692 struct option *record_options = __record_options;
3693 
record__mmap_cpu_mask_init(struct mmap_cpu_mask * mask,struct perf_cpu_map * cpus)3694 static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus)
3695 {
3696 	struct perf_cpu cpu;
3697 	int idx;
3698 
3699 	if (cpu_map__is_dummy(cpus))
3700 		return 0;
3701 
3702 	perf_cpu_map__for_each_cpu_skip_any(cpu, idx, cpus) {
3703 		/* Return ENODEV is input cpu is greater than max cpu */
3704 		if ((unsigned long)cpu.cpu > mask->nbits)
3705 			return -ENODEV;
3706 		__set_bit(cpu.cpu, mask->bits);
3707 	}
3708 
3709 	return 0;
3710 }
3711 
record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask * mask,const char * mask_spec)3712 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec)
3713 {
3714 	struct perf_cpu_map *cpus;
3715 
3716 	cpus = perf_cpu_map__new(mask_spec);
3717 	if (!cpus)
3718 		return -ENOMEM;
3719 
3720 	bitmap_zero(mask->bits, mask->nbits);
3721 	if (record__mmap_cpu_mask_init(mask, cpus))
3722 		return -ENODEV;
3723 
3724 	perf_cpu_map__put(cpus);
3725 
3726 	return 0;
3727 }
3728 
record__free_thread_masks(struct record * rec,int nr_threads)3729 static void record__free_thread_masks(struct record *rec, int nr_threads)
3730 {
3731 	int t;
3732 
3733 	if (rec->thread_masks)
3734 		for (t = 0; t < nr_threads; t++)
3735 			record__thread_mask_free(&rec->thread_masks[t]);
3736 
3737 	zfree(&rec->thread_masks);
3738 }
3739 
record__alloc_thread_masks(struct record * rec,int nr_threads,int nr_bits)3740 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits)
3741 {
3742 	int t, ret;
3743 
3744 	rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks)));
3745 	if (!rec->thread_masks) {
3746 		pr_err("Failed to allocate thread masks\n");
3747 		return -ENOMEM;
3748 	}
3749 
3750 	for (t = 0; t < nr_threads; t++) {
3751 		ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits);
3752 		if (ret) {
3753 			pr_err("Failed to allocate thread masks[%d]\n", t);
3754 			goto out_free;
3755 		}
3756 	}
3757 
3758 	return 0;
3759 
3760 out_free:
3761 	record__free_thread_masks(rec, nr_threads);
3762 
3763 	return ret;
3764 }
3765 
record__init_thread_cpu_masks(struct record * rec,struct perf_cpu_map * cpus)3766 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus)
3767 {
3768 	int t, ret, nr_cpus = perf_cpu_map__nr(cpus);
3769 
3770 	ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu);
3771 	if (ret)
3772 		return ret;
3773 
3774 	rec->nr_threads = nr_cpus;
3775 	pr_debug("nr_threads: %d\n", rec->nr_threads);
3776 
3777 	for (t = 0; t < rec->nr_threads; t++) {
3778 		__set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits);
3779 		__set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits);
3780 		if (verbose > 0) {
3781 			pr_debug("thread_masks[%d]: ", t);
3782 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3783 			pr_debug("thread_masks[%d]: ", t);
3784 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3785 		}
3786 	}
3787 
3788 	return 0;
3789 }
3790 
record__init_thread_masks_spec(struct record * rec,struct perf_cpu_map * cpus,const char ** maps_spec,const char ** affinity_spec,u32 nr_spec)3791 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus,
3792 					  const char **maps_spec, const char **affinity_spec,
3793 					  u32 nr_spec)
3794 {
3795 	u32 s;
3796 	int ret = 0, t = 0;
3797 	struct mmap_cpu_mask cpus_mask;
3798 	struct thread_mask thread_mask, full_mask, *thread_masks;
3799 
3800 	ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu);
3801 	if (ret) {
3802 		pr_err("Failed to allocate CPUs mask\n");
3803 		return ret;
3804 	}
3805 
3806 	ret = record__mmap_cpu_mask_init(&cpus_mask, cpus);
3807 	if (ret) {
3808 		pr_err("Failed to init cpu mask\n");
3809 		goto out_free_cpu_mask;
3810 	}
3811 
3812 	ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu);
3813 	if (ret) {
3814 		pr_err("Failed to allocate full mask\n");
3815 		goto out_free_cpu_mask;
3816 	}
3817 
3818 	ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3819 	if (ret) {
3820 		pr_err("Failed to allocate thread mask\n");
3821 		goto out_free_full_and_cpu_masks;
3822 	}
3823 
3824 	for (s = 0; s < nr_spec; s++) {
3825 		ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]);
3826 		if (ret) {
3827 			pr_err("Failed to initialize maps thread mask\n");
3828 			goto out_free;
3829 		}
3830 		ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]);
3831 		if (ret) {
3832 			pr_err("Failed to initialize affinity thread mask\n");
3833 			goto out_free;
3834 		}
3835 
3836 		/* ignore invalid CPUs but do not allow empty masks */
3837 		if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits,
3838 				cpus_mask.bits, thread_mask.maps.nbits)) {
3839 			pr_err("Empty maps mask: %s\n", maps_spec[s]);
3840 			ret = -EINVAL;
3841 			goto out_free;
3842 		}
3843 		if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits,
3844 				cpus_mask.bits, thread_mask.affinity.nbits)) {
3845 			pr_err("Empty affinity mask: %s\n", affinity_spec[s]);
3846 			ret = -EINVAL;
3847 			goto out_free;
3848 		}
3849 
3850 		/* do not allow intersection with other masks (full_mask) */
3851 		if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits,
3852 				      thread_mask.maps.nbits)) {
3853 			pr_err("Intersecting maps mask: %s\n", maps_spec[s]);
3854 			ret = -EINVAL;
3855 			goto out_free;
3856 		}
3857 		if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits,
3858 				      thread_mask.affinity.nbits)) {
3859 			pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]);
3860 			ret = -EINVAL;
3861 			goto out_free;
3862 		}
3863 
3864 		bitmap_or(full_mask.maps.bits, full_mask.maps.bits,
3865 			  thread_mask.maps.bits, full_mask.maps.nbits);
3866 		bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits,
3867 			  thread_mask.affinity.bits, full_mask.maps.nbits);
3868 
3869 		thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask));
3870 		if (!thread_masks) {
3871 			pr_err("Failed to reallocate thread masks\n");
3872 			ret = -ENOMEM;
3873 			goto out_free;
3874 		}
3875 		rec->thread_masks = thread_masks;
3876 		rec->thread_masks[t] = thread_mask;
3877 		if (verbose > 0) {
3878 			pr_debug("thread_masks[%d]: ", t);
3879 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3880 			pr_debug("thread_masks[%d]: ", t);
3881 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3882 		}
3883 		t++;
3884 		ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3885 		if (ret) {
3886 			pr_err("Failed to allocate thread mask\n");
3887 			goto out_free_full_and_cpu_masks;
3888 		}
3889 	}
3890 	rec->nr_threads = t;
3891 	pr_debug("nr_threads: %d\n", rec->nr_threads);
3892 	if (!rec->nr_threads)
3893 		ret = -EINVAL;
3894 
3895 out_free:
3896 	record__thread_mask_free(&thread_mask);
3897 out_free_full_and_cpu_masks:
3898 	record__thread_mask_free(&full_mask);
3899 out_free_cpu_mask:
3900 	record__mmap_cpu_mask_free(&cpus_mask);
3901 
3902 	return ret;
3903 }
3904 
record__init_thread_core_masks(struct record * rec,struct perf_cpu_map * cpus)3905 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus)
3906 {
3907 	int ret;
3908 	struct cpu_topology *topo;
3909 
3910 	topo = cpu_topology__new();
3911 	if (!topo) {
3912 		pr_err("Failed to allocate CPU topology\n");
3913 		return -ENOMEM;
3914 	}
3915 
3916 	ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list,
3917 					     topo->core_cpus_list, topo->core_cpus_lists);
3918 	cpu_topology__delete(topo);
3919 
3920 	return ret;
3921 }
3922 
record__init_thread_package_masks(struct record * rec,struct perf_cpu_map * cpus)3923 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus)
3924 {
3925 	int ret;
3926 	struct cpu_topology *topo;
3927 
3928 	topo = cpu_topology__new();
3929 	if (!topo) {
3930 		pr_err("Failed to allocate CPU topology\n");
3931 		return -ENOMEM;
3932 	}
3933 
3934 	ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list,
3935 					     topo->package_cpus_list, topo->package_cpus_lists);
3936 	cpu_topology__delete(topo);
3937 
3938 	return ret;
3939 }
3940 
record__init_thread_numa_masks(struct record * rec,struct perf_cpu_map * cpus)3941 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus)
3942 {
3943 	u32 s;
3944 	int ret;
3945 	const char **spec;
3946 	struct numa_topology *topo;
3947 
3948 	topo = numa_topology__new();
3949 	if (!topo) {
3950 		pr_err("Failed to allocate NUMA topology\n");
3951 		return -ENOMEM;
3952 	}
3953 
3954 	spec = zalloc(topo->nr * sizeof(char *));
3955 	if (!spec) {
3956 		pr_err("Failed to allocate NUMA spec\n");
3957 		ret = -ENOMEM;
3958 		goto out_delete_topo;
3959 	}
3960 	for (s = 0; s < topo->nr; s++)
3961 		spec[s] = topo->nodes[s].cpus;
3962 
3963 	ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr);
3964 
3965 	zfree(&spec);
3966 
3967 out_delete_topo:
3968 	numa_topology__delete(topo);
3969 
3970 	return ret;
3971 }
3972 
record__init_thread_user_masks(struct record * rec,struct perf_cpu_map * cpus)3973 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus)
3974 {
3975 	int t, ret;
3976 	u32 s, nr_spec = 0;
3977 	char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec;
3978 	char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL;
3979 
3980 	for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) {
3981 		spec = strtok_r(user_spec, ":", &spec_ptr);
3982 		if (spec == NULL)
3983 			break;
3984 		pr_debug2("threads_spec[%d]: %s\n", t, spec);
3985 		mask = strtok_r(spec, "/", &mask_ptr);
3986 		if (mask == NULL)
3987 			break;
3988 		pr_debug2("  maps mask: %s\n", mask);
3989 		tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *));
3990 		if (!tmp_spec) {
3991 			pr_err("Failed to reallocate maps spec\n");
3992 			ret = -ENOMEM;
3993 			goto out_free;
3994 		}
3995 		maps_spec = tmp_spec;
3996 		maps_spec[nr_spec] = dup_mask = strdup(mask);
3997 		if (!maps_spec[nr_spec]) {
3998 			pr_err("Failed to allocate maps spec[%d]\n", nr_spec);
3999 			ret = -ENOMEM;
4000 			goto out_free;
4001 		}
4002 		mask = strtok_r(NULL, "/", &mask_ptr);
4003 		if (mask == NULL) {
4004 			pr_err("Invalid thread maps or affinity specs\n");
4005 			ret = -EINVAL;
4006 			goto out_free;
4007 		}
4008 		pr_debug2("  affinity mask: %s\n", mask);
4009 		tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *));
4010 		if (!tmp_spec) {
4011 			pr_err("Failed to reallocate affinity spec\n");
4012 			ret = -ENOMEM;
4013 			goto out_free;
4014 		}
4015 		affinity_spec = tmp_spec;
4016 		affinity_spec[nr_spec] = strdup(mask);
4017 		if (!affinity_spec[nr_spec]) {
4018 			pr_err("Failed to allocate affinity spec[%d]\n", nr_spec);
4019 			ret = -ENOMEM;
4020 			goto out_free;
4021 		}
4022 		dup_mask = NULL;
4023 		nr_spec++;
4024 	}
4025 
4026 	ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec,
4027 					     (const char **)affinity_spec, nr_spec);
4028 
4029 out_free:
4030 	free(dup_mask);
4031 	for (s = 0; s < nr_spec; s++) {
4032 		if (maps_spec)
4033 			free(maps_spec[s]);
4034 		if (affinity_spec)
4035 			free(affinity_spec[s]);
4036 	}
4037 	free(affinity_spec);
4038 	free(maps_spec);
4039 
4040 	return ret;
4041 }
4042 
record__init_thread_default_masks(struct record * rec,struct perf_cpu_map * cpus)4043 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus)
4044 {
4045 	int ret;
4046 
4047 	ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu);
4048 	if (ret)
4049 		return ret;
4050 
4051 	if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus))
4052 		return -ENODEV;
4053 
4054 	rec->nr_threads = 1;
4055 
4056 	return 0;
4057 }
4058 
record__init_thread_masks(struct record * rec)4059 static int record__init_thread_masks(struct record *rec)
4060 {
4061 	int ret = 0;
4062 	struct perf_cpu_map *cpus = rec->evlist->core.all_cpus;
4063 
4064 	if (!record__threads_enabled(rec))
4065 		return record__init_thread_default_masks(rec, cpus);
4066 
4067 	if (evlist__per_thread(rec->evlist)) {
4068 		pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n");
4069 		return -EINVAL;
4070 	}
4071 
4072 	switch (rec->opts.threads_spec) {
4073 	case THREAD_SPEC__CPU:
4074 		ret = record__init_thread_cpu_masks(rec, cpus);
4075 		break;
4076 	case THREAD_SPEC__CORE:
4077 		ret = record__init_thread_core_masks(rec, cpus);
4078 		break;
4079 	case THREAD_SPEC__PACKAGE:
4080 		ret = record__init_thread_package_masks(rec, cpus);
4081 		break;
4082 	case THREAD_SPEC__NUMA:
4083 		ret = record__init_thread_numa_masks(rec, cpus);
4084 		break;
4085 	case THREAD_SPEC__USER:
4086 		ret = record__init_thread_user_masks(rec, cpus);
4087 		break;
4088 	default:
4089 		break;
4090 	}
4091 
4092 	return ret;
4093 }
4094 
cmd_record(int argc,const char ** argv)4095 int cmd_record(int argc, const char **argv)
4096 {
4097 	int err;
4098 	struct record *rec = &record;
4099 	char errbuf[BUFSIZ];
4100 
4101 	setlocale(LC_ALL, "");
4102 
4103 #ifndef HAVE_BPF_SKEL
4104 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c)
4105 	set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true);
4106 # undef set_nobuild
4107 #endif
4108 
4109 	/* Disable eager loading of kernel symbols that adds overhead to perf record. */
4110 	symbol_conf.lazy_load_kernel_maps = true;
4111 	rec->opts.affinity = PERF_AFFINITY_SYS;
4112 
4113 	rec->evlist = evlist__new();
4114 	if (rec->evlist == NULL)
4115 		return -ENOMEM;
4116 
4117 	err = perf_config(perf_record_config, rec);
4118 	if (err)
4119 		return err;
4120 
4121 	argc = parse_options(argc, argv, record_options, record_usage,
4122 			    PARSE_OPT_STOP_AT_NON_OPTION);
4123 	if (quiet)
4124 		perf_quiet_option();
4125 
4126 	err = symbol__validate_sym_arguments();
4127 	if (err)
4128 		return err;
4129 
4130 	perf_debuginfod_setup(&record.debuginfod);
4131 
4132 	/* Make system wide (-a) the default target. */
4133 	if (!argc && target__none(&rec->opts.target))
4134 		rec->opts.target.system_wide = true;
4135 
4136 	if (nr_cgroups && !rec->opts.target.system_wide) {
4137 		usage_with_options_msg(record_usage, record_options,
4138 			"cgroup monitoring only available in system-wide mode");
4139 
4140 	}
4141 
4142 	if (record.latency) {
4143 		/*
4144 		 * There is no fundamental reason why latency profiling
4145 		 * can't work for system-wide mode, but exact semantics
4146 		 * and details are to be defined.
4147 		 * See the following thread for details:
4148 		 * https://lore.kernel.org/all/Z4XDJyvjiie3howF@google.com/
4149 		 */
4150 		if (record.opts.target.system_wide) {
4151 			pr_err("Failed: latency profiling is not supported with system-wide collection.\n");
4152 			err = -EINVAL;
4153 			goto out_opts;
4154 		}
4155 		record.opts.record_switch_events = true;
4156 	}
4157 
4158 	if (rec->buildid_mmap && !perf_can_record_build_id()) {
4159 		pr_warning("Missing support for build id in kernel mmap events.\n"
4160 			   "Disable this warning with --no-buildid-mmap\n");
4161 		rec->buildid_mmap = false;
4162 	}
4163 
4164 	if (rec->buildid_mmap) {
4165 		/* Enable perf_event_attr::build_id bit. */
4166 		rec->opts.build_id = true;
4167 		/* Disable build-ID table in the header. */
4168 		rec->no_buildid = true;
4169 	} else {
4170 		pr_debug("Disabling build id in synthesized mmap2 events.\n");
4171 		symbol_conf.no_buildid_mmap2 = true;
4172 	}
4173 
4174 	if (rec->no_buildid_set && rec->no_buildid) {
4175 		/* -B implies -N for historic reasons. */
4176 		rec->no_buildid_cache = true;
4177 	}
4178 
4179 	if (rec->opts.record_cgroup && !perf_can_record_cgroup()) {
4180 		pr_err("Kernel has no cgroup sampling support.\n");
4181 		err = -EINVAL;
4182 		goto out_opts;
4183 	}
4184 
4185 	if (rec->opts.kcore)
4186 		rec->opts.text_poke = true;
4187 
4188 	if (rec->opts.kcore || record__threads_enabled(rec))
4189 		rec->data.is_dir = true;
4190 
4191 	if (record__threads_enabled(rec)) {
4192 		if (rec->opts.affinity != PERF_AFFINITY_SYS) {
4193 			pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n");
4194 			goto out_opts;
4195 		}
4196 		if (record__aio_enabled(rec)) {
4197 			pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n");
4198 			goto out_opts;
4199 		}
4200 	}
4201 
4202 	if (rec->opts.comp_level != 0) {
4203 		pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
4204 		rec->no_buildid = true;
4205 	}
4206 
4207 	if (rec->opts.record_switch_events &&
4208 	    !perf_can_record_switch_events()) {
4209 		ui__error("kernel does not support recording context switch events\n");
4210 		parse_options_usage(record_usage, record_options, "switch-events", 0);
4211 		err = -EINVAL;
4212 		goto out_opts;
4213 	}
4214 
4215 	if (switch_output_setup(rec)) {
4216 		parse_options_usage(record_usage, record_options, "switch-output", 0);
4217 		err = -EINVAL;
4218 		goto out_opts;
4219 	}
4220 
4221 	if (rec->switch_output.time) {
4222 		signal(SIGALRM, alarm_sig_handler);
4223 		alarm(rec->switch_output.time);
4224 	}
4225 
4226 	if (rec->switch_output.num_files) {
4227 		rec->switch_output.filenames = calloc(rec->switch_output.num_files,
4228 						      sizeof(char *));
4229 		if (!rec->switch_output.filenames) {
4230 			err = -EINVAL;
4231 			goto out_opts;
4232 		}
4233 	}
4234 
4235 	if (rec->timestamp_filename && record__threads_enabled(rec)) {
4236 		rec->timestamp_filename = false;
4237 		pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n");
4238 	}
4239 
4240 	if (rec->filter_action) {
4241 		if (!strcmp(rec->filter_action, "pin"))
4242 			err = perf_bpf_filter__pin();
4243 		else if (!strcmp(rec->filter_action, "unpin"))
4244 			err = perf_bpf_filter__unpin();
4245 		else {
4246 			pr_warning("Unknown BPF filter action: %s\n", rec->filter_action);
4247 			err = -EINVAL;
4248 		}
4249 		goto out_opts;
4250 	}
4251 
4252 	/* For backward compatibility, -d implies --mem-info */
4253 	if (rec->opts.sample_address)
4254 		rec->opts.sample_data_src = true;
4255 
4256 	/*
4257 	 * Allow aliases to facilitate the lookup of symbols for address
4258 	 * filters. Refer to auxtrace_parse_filters().
4259 	 */
4260 	symbol_conf.allow_aliases = true;
4261 
4262 	symbol__init(NULL);
4263 
4264 	err = record__auxtrace_init(rec);
4265 	if (err)
4266 		goto out;
4267 
4268 	if (dry_run)
4269 		goto out;
4270 
4271 	err = -ENOMEM;
4272 
4273 	if (rec->no_buildid_cache) {
4274 		disable_buildid_cache();
4275 	} else if (rec->switch_output.enabled) {
4276 		/*
4277 		 * In 'perf record --switch-output', disable buildid
4278 		 * generation by default to reduce data file switching
4279 		 * overhead. Still generate buildid if they are required
4280 		 * explicitly using
4281 		 *
4282 		 *  perf record --switch-output --no-no-buildid \
4283 		 *              --no-no-buildid-cache
4284 		 *
4285 		 * Following code equals to:
4286 		 *
4287 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
4288 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
4289 		 *         disable_buildid_cache();
4290 		 */
4291 		bool disable = true;
4292 
4293 		if (rec->no_buildid_set && !rec->no_buildid)
4294 			disable = false;
4295 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
4296 			disable = false;
4297 		if (disable) {
4298 			rec->no_buildid = true;
4299 			rec->no_buildid_cache = true;
4300 			disable_buildid_cache();
4301 		}
4302 	}
4303 
4304 	if (record.opts.overwrite)
4305 		record.opts.tail_synthesize = true;
4306 
4307 	if (rec->evlist->core.nr_entries == 0) {
4308 		struct evlist *def_evlist = evlist__new_default();
4309 
4310 		if (!def_evlist)
4311 			goto out;
4312 
4313 		evlist__splice_list_tail(rec->evlist, &def_evlist->core.entries);
4314 		evlist__delete(def_evlist);
4315 	}
4316 
4317 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
4318 		rec->opts.no_inherit = true;
4319 
4320 	err = target__validate(&rec->opts.target);
4321 	if (err) {
4322 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
4323 		ui__warning("%s\n", errbuf);
4324 	}
4325 
4326 	if (rec->uid_str) {
4327 		uid_t uid = parse_uid(rec->uid_str);
4328 
4329 		if (uid == UINT_MAX) {
4330 			ui__error("Invalid User: %s", rec->uid_str);
4331 			err = -EINVAL;
4332 			goto out;
4333 		}
4334 		err = parse_uid_filter(rec->evlist, uid);
4335 		if (err)
4336 			goto out;
4337 
4338 		/* User ID filtering implies system wide. */
4339 		rec->opts.target.system_wide = true;
4340 	}
4341 
4342 	/* Enable ignoring missing threads when -p option is defined. */
4343 	rec->opts.ignore_missing_thread = rec->opts.target.pid;
4344 
4345 	evlist__warn_user_requested_cpus(rec->evlist, rec->opts.target.cpu_list);
4346 
4347 	if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
4348 		arch__add_leaf_frame_record_opts(&rec->opts);
4349 
4350 	err = -ENOMEM;
4351 	if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) {
4352 		if (rec->opts.target.pid != NULL) {
4353 			pr_err("Couldn't create thread/CPU maps: %s\n",
4354 				errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf)));
4355 			goto out;
4356 		}
4357 		else
4358 			usage_with_options(record_usage, record_options);
4359 	}
4360 
4361 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
4362 	if (err)
4363 		goto out;
4364 
4365 	/*
4366 	 * We take all buildids when the file contains
4367 	 * AUX area tracing data because we do not decode the
4368 	 * trace because it would take too long.
4369 	 */
4370 	if (rec->opts.full_auxtrace)
4371 		rec->buildid_all = true;
4372 
4373 	if (rec->opts.text_poke) {
4374 		err = record__config_text_poke(rec->evlist);
4375 		if (err) {
4376 			pr_err("record__config_text_poke failed, error %d\n", err);
4377 			goto out;
4378 		}
4379 	}
4380 
4381 	if (rec->off_cpu) {
4382 		err = record__config_off_cpu(rec);
4383 		if (err) {
4384 			pr_err("record__config_off_cpu failed, error %d\n", err);
4385 			goto out;
4386 		}
4387 	}
4388 
4389 	if (record_opts__config(&rec->opts)) {
4390 		err = -EINVAL;
4391 		goto out;
4392 	}
4393 
4394 	err = record__config_tracking_events(rec);
4395 	if (err) {
4396 		pr_err("record__config_tracking_events failed, error %d\n", err);
4397 		goto out;
4398 	}
4399 
4400 	err = record__init_thread_masks(rec);
4401 	if (err) {
4402 		pr_err("Failed to initialize parallel data streaming masks\n");
4403 		goto out;
4404 	}
4405 
4406 	if (rec->opts.nr_cblocks > nr_cblocks_max)
4407 		rec->opts.nr_cblocks = nr_cblocks_max;
4408 	pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
4409 
4410 	pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
4411 	pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
4412 
4413 	if (rec->opts.comp_level > comp_level_max)
4414 		rec->opts.comp_level = comp_level_max;
4415 	pr_debug("comp level: %d\n", rec->opts.comp_level);
4416 
4417 	err = __cmd_record(&record, argc, argv);
4418 out:
4419 	record__free_thread_masks(rec, rec->nr_threads);
4420 	rec->nr_threads = 0;
4421 	symbol__exit();
4422 	auxtrace_record__free(rec->itr);
4423 out_opts:
4424 	evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close);
4425 	evlist__delete(rec->evlist);
4426 	return err;
4427 }
4428 
snapshot_sig_handler(int sig __maybe_unused)4429 static void snapshot_sig_handler(int sig __maybe_unused)
4430 {
4431 	struct record *rec = &record;
4432 
4433 	hit_auxtrace_snapshot_trigger(rec);
4434 
4435 	if (switch_output_signal(rec))
4436 		trigger_hit(&switch_output_trigger);
4437 }
4438 
alarm_sig_handler(int sig __maybe_unused)4439 static void alarm_sig_handler(int sig __maybe_unused)
4440 {
4441 	struct record *rec = &record;
4442 
4443 	if (switch_output_time(rec))
4444 		trigger_hit(&switch_output_trigger);
4445 }
4446