xref: /linux/tools/perf/builtin-record.c (revision c7decec2f2d2ab0366567f9e30c0e1418cece43f)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10 
11 #include "util/build-id.h"
12 #include <subcmd/parse-options.h>
13 #include <internal/xyarray.h>
14 #include "util/parse-events.h"
15 #include "util/config.h"
16 
17 #include "util/callchain.h"
18 #include "util/cgroup.h"
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/mmap.h"
25 #include "util/mutex.h"
26 #include "util/target.h"
27 #include "util/session.h"
28 #include "util/tool.h"
29 #include "util/stat.h"
30 #include "util/symbol.h"
31 #include "util/record.h"
32 #include "util/cpumap.h"
33 #include "util/thread_map.h"
34 #include "util/data.h"
35 #include "util/perf_regs.h"
36 #include "util/auxtrace.h"
37 #include "util/tsc.h"
38 #include "util/parse-branch-options.h"
39 #include "util/parse-regs-options.h"
40 #include "util/perf_api_probe.h"
41 #include "util/trigger.h"
42 #include "util/perf-hooks.h"
43 #include "util/cpu-set-sched.h"
44 #include "util/synthetic-events.h"
45 #include "util/time-utils.h"
46 #include "util/units.h"
47 #include "util/bpf-event.h"
48 #include "util/util.h"
49 #include "util/pfm.h"
50 #include "util/pmu.h"
51 #include "util/pmus.h"
52 #include "util/clockid.h"
53 #include "util/off_cpu.h"
54 #include "util/bpf-filter.h"
55 #include "util/strbuf.h"
56 #include "asm/bug.h"
57 #include "perf.h"
58 #include "cputopo.h"
59 
60 #include <errno.h>
61 #include <inttypes.h>
62 #include <locale.h>
63 #include <poll.h>
64 #include <pthread.h>
65 #include <unistd.h>
66 #ifndef HAVE_GETTID
67 #include <syscall.h>
68 #endif
69 #include <sched.h>
70 #include <signal.h>
71 #ifdef HAVE_EVENTFD_SUPPORT
72 #include <sys/eventfd.h>
73 #endif
74 #include <sys/mman.h>
75 #include <sys/wait.h>
76 #include <sys/types.h>
77 #include <sys/stat.h>
78 #include <fcntl.h>
79 #include <linux/err.h>
80 #include <linux/string.h>
81 #include <linux/time64.h>
82 #include <linux/zalloc.h>
83 #include <linux/bitmap.h>
84 #include <sys/time.h>
85 
86 struct switch_output {
87 	bool		 enabled;
88 	bool		 signal;
89 	unsigned long	 size;
90 	unsigned long	 time;
91 	const char	*str;
92 	bool		 set;
93 	char		 **filenames;
94 	int		 num_files;
95 	int		 cur_file;
96 };
97 
98 struct thread_mask {
99 	struct mmap_cpu_mask	maps;
100 	struct mmap_cpu_mask	affinity;
101 };
102 
103 struct record_thread {
104 	pid_t			tid;
105 	struct thread_mask	*mask;
106 	struct {
107 		int		msg[2];
108 		int		ack[2];
109 	} pipes;
110 	struct fdarray		pollfd;
111 	int			ctlfd_pos;
112 	int			nr_mmaps;
113 	struct mmap		**maps;
114 	struct mmap		**overwrite_maps;
115 	struct record		*rec;
116 	unsigned long long	samples;
117 	unsigned long		waking;
118 	u64			bytes_written;
119 	u64			bytes_transferred;
120 	u64			bytes_compressed;
121 };
122 
123 static __thread struct record_thread *thread;
124 
125 enum thread_msg {
126 	THREAD_MSG__UNDEFINED = 0,
127 	THREAD_MSG__READY,
128 	THREAD_MSG__MAX,
129 };
130 
131 static const char *thread_msg_tags[THREAD_MSG__MAX] = {
132 	"UNDEFINED", "READY"
133 };
134 
135 enum thread_spec {
136 	THREAD_SPEC__UNDEFINED = 0,
137 	THREAD_SPEC__CPU,
138 	THREAD_SPEC__CORE,
139 	THREAD_SPEC__PACKAGE,
140 	THREAD_SPEC__NUMA,
141 	THREAD_SPEC__USER,
142 	THREAD_SPEC__MAX,
143 };
144 
145 static const char *thread_spec_tags[THREAD_SPEC__MAX] = {
146 	"undefined", "cpu", "core", "package", "numa", "user"
147 };
148 
149 struct pollfd_index_map {
150 	int evlist_pollfd_index;
151 	int thread_pollfd_index;
152 };
153 
154 struct record {
155 	struct perf_tool	tool;
156 	struct record_opts	opts;
157 	u64			bytes_written;
158 	u64			thread_bytes_written;
159 	struct perf_data	data;
160 	struct auxtrace_record	*itr;
161 	struct evlist	*evlist;
162 	struct perf_session	*session;
163 	struct evlist		*sb_evlist;
164 	pthread_t		thread_id;
165 	int			realtime_prio;
166 	bool			latency;
167 	bool			switch_output_event_set;
168 	bool			no_buildid;
169 	bool			no_buildid_set;
170 	bool			no_buildid_cache;
171 	bool			no_buildid_cache_set;
172 	bool			buildid_all;
173 	bool			buildid_mmap;
174 	bool			buildid_mmap_set;
175 	bool			timestamp_filename;
176 	bool			timestamp_boundary;
177 	bool			off_cpu;
178 	const char		*filter_action;
179 	const char		*uid_str;
180 	struct switch_output	switch_output;
181 	unsigned long long	samples;
182 	unsigned long		output_max_size;	/* = 0: unlimited */
183 	struct perf_debuginfod	debuginfod;
184 	int			nr_threads;
185 	struct thread_mask	*thread_masks;
186 	struct record_thread	*thread_data;
187 	struct pollfd_index_map	*index_map;
188 	size_t			index_map_sz;
189 	size_t			index_map_cnt;
190 };
191 
192 static volatile int done;
193 
194 static volatile int auxtrace_record__snapshot_started;
195 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
196 static DEFINE_TRIGGER(switch_output_trigger);
197 
198 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
199 	"SYS", "NODE", "CPU"
200 };
201 
202 static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event,
203 				  struct perf_sample *sample, struct machine *machine);
204 static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event,
205 				   struct perf_sample *sample, struct machine *machine);
206 static int process_timestamp_boundary(const struct perf_tool *tool,
207 				      union perf_event *event,
208 				      struct perf_sample *sample,
209 				      struct machine *machine);
210 
211 #ifndef HAVE_GETTID
gettid(void)212 static inline pid_t gettid(void)
213 {
214 	return (pid_t)syscall(__NR_gettid);
215 }
216 #endif
217 
record__threads_enabled(struct record * rec)218 static int record__threads_enabled(struct record *rec)
219 {
220 	return rec->opts.threads_spec;
221 }
222 
switch_output_signal(struct record * rec)223 static bool switch_output_signal(struct record *rec)
224 {
225 	return rec->switch_output.signal &&
226 	       trigger_is_ready(&switch_output_trigger);
227 }
228 
switch_output_size(struct record * rec)229 static bool switch_output_size(struct record *rec)
230 {
231 	return rec->switch_output.size &&
232 	       trigger_is_ready(&switch_output_trigger) &&
233 	       (rec->bytes_written >= rec->switch_output.size);
234 }
235 
switch_output_time(struct record * rec)236 static bool switch_output_time(struct record *rec)
237 {
238 	return rec->switch_output.time &&
239 	       trigger_is_ready(&switch_output_trigger);
240 }
241 
record__bytes_written(struct record * rec)242 static u64 record__bytes_written(struct record *rec)
243 {
244 	return rec->bytes_written + rec->thread_bytes_written;
245 }
246 
record__output_max_size_exceeded(struct record * rec)247 static bool record__output_max_size_exceeded(struct record *rec)
248 {
249 	return rec->output_max_size &&
250 	       (record__bytes_written(rec) >= rec->output_max_size);
251 }
252 
record__write(struct record * rec,struct mmap * map __maybe_unused,void * bf,size_t size)253 static int record__write(struct record *rec, struct mmap *map __maybe_unused,
254 			 void *bf, size_t size)
255 {
256 	struct perf_data_file *file = &rec->session->data->file;
257 
258 	if (map && map->file)
259 		file = map->file;
260 
261 	if (perf_data_file__write(file, bf, size) < 0) {
262 		pr_err("failed to write perf data, error: %m\n");
263 		return -1;
264 	}
265 
266 	if (map && map->file) {
267 		thread->bytes_written += size;
268 		rec->thread_bytes_written += size;
269 	} else {
270 		rec->bytes_written += size;
271 	}
272 
273 	if (record__output_max_size_exceeded(rec) && !done) {
274 		fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
275 				" stopping session ]\n",
276 				record__bytes_written(rec) >> 10);
277 		done = 1;
278 	}
279 
280 	if (switch_output_size(rec))
281 		trigger_hit(&switch_output_trigger);
282 
283 	return 0;
284 }
285 
286 static int record__aio_enabled(struct record *rec);
287 static int record__comp_enabled(struct record *rec);
288 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map,
289 			    void *dst, size_t dst_size, void *src, size_t src_size);
290 
291 #ifdef HAVE_AIO_SUPPORT
record__aio_write(struct aiocb * cblock,int trace_fd,void * buf,size_t size,off_t off)292 static int record__aio_write(struct aiocb *cblock, int trace_fd,
293 		void *buf, size_t size, off_t off)
294 {
295 	int rc;
296 
297 	cblock->aio_fildes = trace_fd;
298 	cblock->aio_buf    = buf;
299 	cblock->aio_nbytes = size;
300 	cblock->aio_offset = off;
301 	cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
302 
303 	do {
304 		rc = aio_write(cblock);
305 		if (rc == 0) {
306 			break;
307 		} else if (errno != EAGAIN) {
308 			cblock->aio_fildes = -1;
309 			pr_err("failed to queue perf data, error: %m\n");
310 			break;
311 		}
312 	} while (1);
313 
314 	return rc;
315 }
316 
record__aio_complete(struct mmap * md,struct aiocb * cblock)317 static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
318 {
319 	void *rem_buf;
320 	off_t rem_off;
321 	size_t rem_size;
322 	int rc, aio_errno;
323 	ssize_t aio_ret, written;
324 
325 	aio_errno = aio_error(cblock);
326 	if (aio_errno == EINPROGRESS)
327 		return 0;
328 
329 	written = aio_ret = aio_return(cblock);
330 	if (aio_ret < 0) {
331 		if (aio_errno != EINTR)
332 			pr_err("failed to write perf data, error: %m\n");
333 		written = 0;
334 	}
335 
336 	rem_size = cblock->aio_nbytes - written;
337 
338 	if (rem_size == 0) {
339 		cblock->aio_fildes = -1;
340 		/*
341 		 * md->refcount is incremented in record__aio_pushfn() for
342 		 * every aio write request started in record__aio_push() so
343 		 * decrement it because the request is now complete.
344 		 */
345 		perf_mmap__put(&md->core);
346 		rc = 1;
347 	} else {
348 		/*
349 		 * aio write request may require restart with the
350 		 * remainder if the kernel didn't write whole
351 		 * chunk at once.
352 		 */
353 		rem_off = cblock->aio_offset + written;
354 		rem_buf = (void *)(cblock->aio_buf + written);
355 		record__aio_write(cblock, cblock->aio_fildes,
356 				rem_buf, rem_size, rem_off);
357 		rc = 0;
358 	}
359 
360 	return rc;
361 }
362 
record__aio_sync(struct mmap * md,bool sync_all)363 static int record__aio_sync(struct mmap *md, bool sync_all)
364 {
365 	struct aiocb **aiocb = md->aio.aiocb;
366 	struct aiocb *cblocks = md->aio.cblocks;
367 	struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
368 	int i, do_suspend;
369 
370 	do {
371 		do_suspend = 0;
372 		for (i = 0; i < md->aio.nr_cblocks; ++i) {
373 			if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
374 				if (sync_all)
375 					aiocb[i] = NULL;
376 				else
377 					return i;
378 			} else {
379 				/*
380 				 * Started aio write is not complete yet
381 				 * so it has to be waited before the
382 				 * next allocation.
383 				 */
384 				aiocb[i] = &cblocks[i];
385 				do_suspend = 1;
386 			}
387 		}
388 		if (!do_suspend)
389 			return -1;
390 
391 		while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
392 			if (!(errno == EAGAIN || errno == EINTR))
393 				pr_err("failed to sync perf data, error: %m\n");
394 		}
395 	} while (1);
396 }
397 
398 struct record_aio {
399 	struct record	*rec;
400 	void		*data;
401 	size_t		size;
402 };
403 
record__aio_pushfn(struct mmap * map,void * to,void * buf,size_t size)404 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
405 {
406 	struct record_aio *aio = to;
407 
408 	/*
409 	 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
410 	 * to release space in the kernel buffer as fast as possible, calling
411 	 * perf_mmap__consume() from perf_mmap__push() function.
412 	 *
413 	 * That lets the kernel to proceed with storing more profiling data into
414 	 * the kernel buffer earlier than other per-cpu kernel buffers are handled.
415 	 *
416 	 * Coping can be done in two steps in case the chunk of profiling data
417 	 * crosses the upper bound of the kernel buffer. In this case we first move
418 	 * part of data from map->start till the upper bound and then the remainder
419 	 * from the beginning of the kernel buffer till the end of the data chunk.
420 	 */
421 
422 	if (record__comp_enabled(aio->rec)) {
423 		ssize_t compressed = zstd_compress(aio->rec->session, NULL, aio->data + aio->size,
424 						   mmap__mmap_len(map) - aio->size,
425 						   buf, size);
426 		if (compressed < 0)
427 			return (int)compressed;
428 
429 		size = compressed;
430 	} else {
431 		memcpy(aio->data + aio->size, buf, size);
432 	}
433 
434 	if (!aio->size) {
435 		/*
436 		 * Increment map->refcount to guard map->aio.data[] buffer
437 		 * from premature deallocation because map object can be
438 		 * released earlier than aio write request started on
439 		 * map->aio.data[] buffer is complete.
440 		 *
441 		 * perf_mmap__put() is done at record__aio_complete()
442 		 * after started aio request completion or at record__aio_push()
443 		 * if the request failed to start.
444 		 */
445 		perf_mmap__get(&map->core);
446 	}
447 
448 	aio->size += size;
449 
450 	return size;
451 }
452 
record__aio_push(struct record * rec,struct mmap * map,off_t * off)453 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
454 {
455 	int ret, idx;
456 	int trace_fd = rec->session->data->file.fd;
457 	struct record_aio aio = { .rec = rec, .size = 0 };
458 
459 	/*
460 	 * Call record__aio_sync() to wait till map->aio.data[] buffer
461 	 * becomes available after previous aio write operation.
462 	 */
463 
464 	idx = record__aio_sync(map, false);
465 	aio.data = map->aio.data[idx];
466 	ret = perf_mmap__push(map, &aio, record__aio_pushfn);
467 	if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
468 		return ret;
469 
470 	rec->samples++;
471 	ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
472 	if (!ret) {
473 		*off += aio.size;
474 		rec->bytes_written += aio.size;
475 		if (switch_output_size(rec))
476 			trigger_hit(&switch_output_trigger);
477 	} else {
478 		/*
479 		 * Decrement map->refcount incremented in record__aio_pushfn()
480 		 * back if record__aio_write() operation failed to start, otherwise
481 		 * map->refcount is decremented in record__aio_complete() after
482 		 * aio write operation finishes successfully.
483 		 */
484 		perf_mmap__put(&map->core);
485 	}
486 
487 	return ret;
488 }
489 
record__aio_get_pos(int trace_fd)490 static off_t record__aio_get_pos(int trace_fd)
491 {
492 	return lseek(trace_fd, 0, SEEK_CUR);
493 }
494 
record__aio_set_pos(int trace_fd,off_t pos)495 static void record__aio_set_pos(int trace_fd, off_t pos)
496 {
497 	lseek(trace_fd, pos, SEEK_SET);
498 }
499 
record__aio_mmap_read_sync(struct record * rec)500 static void record__aio_mmap_read_sync(struct record *rec)
501 {
502 	int i;
503 	struct evlist *evlist = rec->evlist;
504 	struct mmap *maps = evlist->mmap;
505 
506 	if (!record__aio_enabled(rec))
507 		return;
508 
509 	for (i = 0; i < evlist->core.nr_mmaps; i++) {
510 		struct mmap *map = &maps[i];
511 
512 		if (map->core.base)
513 			record__aio_sync(map, true);
514 	}
515 }
516 
517 static int nr_cblocks_default = 1;
518 static int nr_cblocks_max = 4;
519 
record__aio_parse(const struct option * opt,const char * str,int unset)520 static int record__aio_parse(const struct option *opt,
521 			     const char *str,
522 			     int unset)
523 {
524 	struct record_opts *opts = (struct record_opts *)opt->value;
525 
526 	if (unset) {
527 		opts->nr_cblocks = 0;
528 	} else {
529 		if (str)
530 			opts->nr_cblocks = strtol(str, NULL, 0);
531 		if (!opts->nr_cblocks)
532 			opts->nr_cblocks = nr_cblocks_default;
533 	}
534 
535 	return 0;
536 }
537 #else /* HAVE_AIO_SUPPORT */
538 static int nr_cblocks_max = 0;
539 
record__aio_push(struct record * rec __maybe_unused,struct mmap * map __maybe_unused,off_t * off __maybe_unused)540 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
541 			    off_t *off __maybe_unused)
542 {
543 	return -1;
544 }
545 
record__aio_get_pos(int trace_fd __maybe_unused)546 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
547 {
548 	return -1;
549 }
550 
record__aio_set_pos(int trace_fd __maybe_unused,off_t pos __maybe_unused)551 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
552 {
553 }
554 
record__aio_mmap_read_sync(struct record * rec __maybe_unused)555 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
556 {
557 }
558 #endif
559 
record__aio_enabled(struct record * rec)560 static int record__aio_enabled(struct record *rec)
561 {
562 	return rec->opts.nr_cblocks > 0;
563 }
564 
565 #define MMAP_FLUSH_DEFAULT 1
record__mmap_flush_parse(const struct option * opt,const char * str,int unset)566 static int record__mmap_flush_parse(const struct option *opt,
567 				    const char *str,
568 				    int unset)
569 {
570 	int flush_max;
571 	struct record_opts *opts = (struct record_opts *)opt->value;
572 	static struct parse_tag tags[] = {
573 			{ .tag  = 'B', .mult = 1       },
574 			{ .tag  = 'K', .mult = 1 << 10 },
575 			{ .tag  = 'M', .mult = 1 << 20 },
576 			{ .tag  = 'G', .mult = 1 << 30 },
577 			{ .tag  = 0 },
578 	};
579 
580 	if (unset)
581 		return 0;
582 
583 	if (str) {
584 		opts->mmap_flush = parse_tag_value(str, tags);
585 		if (opts->mmap_flush == (int)-1)
586 			opts->mmap_flush = strtol(str, NULL, 0);
587 	}
588 
589 	if (!opts->mmap_flush)
590 		opts->mmap_flush = MMAP_FLUSH_DEFAULT;
591 
592 	flush_max = evlist__mmap_size(opts->mmap_pages);
593 	flush_max /= 4;
594 	if (opts->mmap_flush > flush_max)
595 		opts->mmap_flush = flush_max;
596 
597 	return 0;
598 }
599 
600 #ifdef HAVE_ZSTD_SUPPORT
601 static unsigned int comp_level_default = 1;
602 
record__parse_comp_level(const struct option * opt,const char * str,int unset)603 static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
604 {
605 	struct record_opts *opts = opt->value;
606 
607 	if (unset) {
608 		opts->comp_level = 0;
609 	} else {
610 		if (str)
611 			opts->comp_level = strtol(str, NULL, 0);
612 		if (!opts->comp_level)
613 			opts->comp_level = comp_level_default;
614 	}
615 
616 	return 0;
617 }
618 #endif
619 static unsigned int comp_level_max = 22;
620 
record__comp_enabled(struct record * rec)621 static int record__comp_enabled(struct record *rec)
622 {
623 	return rec->opts.comp_level > 0;
624 }
625 
process_synthesized_event(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)626 static int process_synthesized_event(const struct perf_tool *tool,
627 				     union perf_event *event,
628 				     struct perf_sample *sample __maybe_unused,
629 				     struct machine *machine __maybe_unused)
630 {
631 	struct record *rec = container_of(tool, struct record, tool);
632 	return record__write(rec, NULL, event, event->header.size);
633 }
634 
635 static struct mutex synth_lock;
636 
process_locked_synthesized_event(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)637 static int process_locked_synthesized_event(const struct perf_tool *tool,
638 				     union perf_event *event,
639 				     struct perf_sample *sample __maybe_unused,
640 				     struct machine *machine __maybe_unused)
641 {
642 	int ret;
643 
644 	mutex_lock(&synth_lock);
645 	ret = process_synthesized_event(tool, event, sample, machine);
646 	mutex_unlock(&synth_lock);
647 	return ret;
648 }
649 
record__pushfn(struct mmap * map,void * to,void * bf,size_t size)650 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
651 {
652 	struct record *rec = to;
653 
654 	if (record__comp_enabled(rec)) {
655 		struct perf_record_compressed2 *event = map->data;
656 		size_t padding = 0;
657 		u8 pad[8] = {0};
658 		ssize_t compressed = zstd_compress(rec->session, map, map->data,
659 						   mmap__mmap_len(map), bf, size);
660 
661 		if (compressed < 0)
662 			return (int)compressed;
663 
664 		bf = event;
665 		thread->samples++;
666 
667 		/*
668 		 * The record from `zstd_compress` is not 8 bytes aligned, which would cause asan
669 		 * error. We make it aligned here.
670 		 */
671 		event->data_size = compressed - sizeof(struct perf_record_compressed2);
672 		event->header.size = PERF_ALIGN(compressed, sizeof(u64));
673 		padding = event->header.size - compressed;
674 		return record__write(rec, map, bf, compressed) ||
675 		       record__write(rec, map, &pad, padding);
676 	}
677 
678 	thread->samples++;
679 	return record__write(rec, map, bf, size);
680 }
681 
682 static volatile sig_atomic_t signr = -1;
683 static volatile sig_atomic_t child_finished;
684 #ifdef HAVE_EVENTFD_SUPPORT
685 static volatile sig_atomic_t done_fd = -1;
686 #endif
687 
sig_handler(int sig)688 static void sig_handler(int sig)
689 {
690 	if (sig == SIGCHLD)
691 		child_finished = 1;
692 	else
693 		signr = sig;
694 
695 	done = 1;
696 #ifdef HAVE_EVENTFD_SUPPORT
697 	if (done_fd >= 0) {
698 		u64 tmp = 1;
699 		int orig_errno = errno;
700 
701 		/*
702 		 * It is possible for this signal handler to run after done is
703 		 * checked in the main loop, but before the perf counter fds are
704 		 * polled. If this happens, the poll() will continue to wait
705 		 * even though done is set, and will only break out if either
706 		 * another signal is received, or the counters are ready for
707 		 * read. To ensure the poll() doesn't sleep when done is set,
708 		 * use an eventfd (done_fd) to wake up the poll().
709 		 */
710 		if (write(done_fd, &tmp, sizeof(tmp)) < 0)
711 			pr_err("failed to signal wakeup fd, error: %m\n");
712 
713 		errno = orig_errno;
714 	}
715 #endif // HAVE_EVENTFD_SUPPORT
716 }
717 
sigsegv_handler(int sig)718 static void sigsegv_handler(int sig)
719 {
720 	perf_hooks__recover();
721 	sighandler_dump_stack(sig);
722 }
723 
record__sig_exit(void)724 static void record__sig_exit(void)
725 {
726 	if (signr == -1)
727 		return;
728 
729 	signal(signr, SIG_DFL);
730 	raise(signr);
731 }
732 
record__process_auxtrace(const struct perf_tool * tool,struct mmap * map,union perf_event * event,void * data1,size_t len1,void * data2,size_t len2)733 static int record__process_auxtrace(const struct perf_tool *tool,
734 				    struct mmap *map,
735 				    union perf_event *event, void *data1,
736 				    size_t len1, void *data2, size_t len2)
737 {
738 	struct record *rec = container_of(tool, struct record, tool);
739 	struct perf_data *data = &rec->data;
740 	size_t padding;
741 	u8 pad[8] = {0};
742 
743 	if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
744 		off_t file_offset;
745 		int fd = perf_data__fd(data);
746 		int err;
747 
748 		file_offset = lseek(fd, 0, SEEK_CUR);
749 		if (file_offset == -1)
750 			return -1;
751 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
752 						     event, file_offset);
753 		if (err)
754 			return err;
755 	}
756 
757 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
758 	padding = (len1 + len2) & 7;
759 	if (padding)
760 		padding = 8 - padding;
761 
762 	record__write(rec, map, event, event->header.size);
763 	record__write(rec, map, data1, len1);
764 	if (len2)
765 		record__write(rec, map, data2, len2);
766 	record__write(rec, map, &pad, padding);
767 
768 	return 0;
769 }
770 
record__auxtrace_mmap_read(struct record * rec,struct mmap * map)771 static int record__auxtrace_mmap_read(struct record *rec,
772 				      struct mmap *map)
773 {
774 	int ret;
775 
776 	ret = auxtrace_mmap__read(map, rec->itr,
777 				  perf_session__env(rec->session),
778 				  &rec->tool,
779 				  record__process_auxtrace);
780 	if (ret < 0)
781 		return ret;
782 
783 	if (ret)
784 		rec->samples++;
785 
786 	return 0;
787 }
788 
record__auxtrace_mmap_read_snapshot(struct record * rec,struct mmap * map)789 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
790 					       struct mmap *map)
791 {
792 	int ret;
793 
794 	ret = auxtrace_mmap__read_snapshot(map, rec->itr,
795 					   perf_session__env(rec->session),
796 					   &rec->tool,
797 					   record__process_auxtrace,
798 					   rec->opts.auxtrace_snapshot_size);
799 	if (ret < 0)
800 		return ret;
801 
802 	if (ret)
803 		rec->samples++;
804 
805 	return 0;
806 }
807 
record__auxtrace_read_snapshot_all(struct record * rec)808 static int record__auxtrace_read_snapshot_all(struct record *rec)
809 {
810 	int i;
811 	int rc = 0;
812 
813 	for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
814 		struct mmap *map = &rec->evlist->mmap[i];
815 
816 		if (!map->auxtrace_mmap.base)
817 			continue;
818 
819 		if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
820 			rc = -1;
821 			goto out;
822 		}
823 	}
824 out:
825 	return rc;
826 }
827 
record__read_auxtrace_snapshot(struct record * rec,bool on_exit)828 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
829 {
830 	pr_debug("Recording AUX area tracing snapshot\n");
831 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
832 		trigger_error(&auxtrace_snapshot_trigger);
833 	} else {
834 		if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
835 			trigger_error(&auxtrace_snapshot_trigger);
836 		else
837 			trigger_ready(&auxtrace_snapshot_trigger);
838 	}
839 }
840 
record__auxtrace_snapshot_exit(struct record * rec)841 static int record__auxtrace_snapshot_exit(struct record *rec)
842 {
843 	if (trigger_is_error(&auxtrace_snapshot_trigger))
844 		return 0;
845 
846 	if (!auxtrace_record__snapshot_started &&
847 	    auxtrace_record__snapshot_start(rec->itr))
848 		return -1;
849 
850 	record__read_auxtrace_snapshot(rec, true);
851 	if (trigger_is_error(&auxtrace_snapshot_trigger))
852 		return -1;
853 
854 	return 0;
855 }
856 
record__auxtrace_init(struct record * rec)857 static int record__auxtrace_init(struct record *rec)
858 {
859 	int err;
860 
861 	if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts)
862 	    && record__threads_enabled(rec)) {
863 		pr_err("AUX area tracing options are not available in parallel streaming mode.\n");
864 		return -EINVAL;
865 	}
866 
867 	if (!rec->itr) {
868 		rec->itr = auxtrace_record__init(rec->evlist, &err);
869 		if (err)
870 			return err;
871 	}
872 
873 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
874 					      rec->opts.auxtrace_snapshot_opts);
875 	if (err)
876 		return err;
877 
878 	err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
879 					    rec->opts.auxtrace_sample_opts);
880 	if (err)
881 		return err;
882 
883 	err = auxtrace_parse_aux_action(rec->evlist);
884 	if (err)
885 		return err;
886 
887 	return auxtrace_parse_filters(rec->evlist);
888 }
889 
record__config_text_poke(struct evlist * evlist)890 static int record__config_text_poke(struct evlist *evlist)
891 {
892 	struct evsel *evsel;
893 
894 	/* Nothing to do if text poke is already configured */
895 	evlist__for_each_entry(evlist, evsel) {
896 		if (evsel->core.attr.text_poke)
897 			return 0;
898 	}
899 
900 	evsel = evlist__add_dummy_on_all_cpus(evlist);
901 	if (!evsel)
902 		return -ENOMEM;
903 
904 	evsel->core.attr.text_poke = 1;
905 	evsel->core.attr.ksymbol = 1;
906 	evsel->immediate = true;
907 	evsel__set_sample_bit(evsel, TIME);
908 
909 	return 0;
910 }
911 
record__config_off_cpu(struct record * rec)912 static int record__config_off_cpu(struct record *rec)
913 {
914 	return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts);
915 }
916 
record__tracking_system_wide(struct record * rec)917 static bool record__tracking_system_wide(struct record *rec)
918 {
919 	struct evlist *evlist = rec->evlist;
920 	struct evsel *evsel;
921 
922 	/*
923 	 * If non-dummy evsel exists, system_wide sideband is need to
924 	 * help parse sample information.
925 	 * For example, PERF_EVENT_MMAP event to help parse symbol,
926 	 * and PERF_EVENT_COMM event to help parse task executable name.
927 	 */
928 	evlist__for_each_entry(evlist, evsel) {
929 		if (!evsel__is_dummy_event(evsel))
930 			return true;
931 	}
932 
933 	return false;
934 }
935 
record__config_tracking_events(struct record * rec)936 static int record__config_tracking_events(struct record *rec)
937 {
938 	struct record_opts *opts = &rec->opts;
939 	struct evlist *evlist = rec->evlist;
940 	bool system_wide = false;
941 	struct evsel *evsel;
942 
943 	/*
944 	 * For initial_delay, system wide or a hybrid system, we need to add
945 	 * tracking event so that we can track PERF_RECORD_MMAP to cover the
946 	 * delay of waiting or event synthesis.
947 	 */
948 	if (opts->target.initial_delay || target__has_cpu(&opts->target) ||
949 	    perf_pmus__num_core_pmus() > 1) {
950 		/*
951 		 * User space tasks can migrate between CPUs, so when tracing
952 		 * selected CPUs, sideband for all CPUs is still needed.
953 		 */
954 		if (!!opts->target.cpu_list && record__tracking_system_wide(rec))
955 			system_wide = true;
956 
957 		evsel = evlist__findnew_tracking_event(evlist, system_wide);
958 		if (!evsel)
959 			return -ENOMEM;
960 
961 		/*
962 		 * Enable the tracking event when the process is forked for
963 		 * initial_delay, immediately for system wide.
964 		 */
965 		if (opts->target.initial_delay && !evsel->immediate &&
966 		    !target__has_cpu(&opts->target))
967 			evsel->core.attr.enable_on_exec = 1;
968 		else
969 			evsel->immediate = 1;
970 	}
971 
972 	return 0;
973 }
974 
record__kcore_readable(struct machine * machine)975 static bool record__kcore_readable(struct machine *machine)
976 {
977 	char kcore[PATH_MAX];
978 	int fd;
979 
980 	scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
981 
982 	fd = open(kcore, O_RDONLY);
983 	if (fd < 0)
984 		return false;
985 
986 	close(fd);
987 
988 	return true;
989 }
990 
record__kcore_copy(struct machine * machine,struct perf_data * data)991 static int record__kcore_copy(struct machine *machine, struct perf_data *data)
992 {
993 	char from_dir[PATH_MAX];
994 	char kcore_dir[PATH_MAX];
995 	int ret;
996 
997 	snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
998 
999 	ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
1000 	if (ret)
1001 		return ret;
1002 
1003 	return kcore_copy(from_dir, kcore_dir);
1004 }
1005 
record__thread_data_init_pipes(struct record_thread * thread_data)1006 static void record__thread_data_init_pipes(struct record_thread *thread_data)
1007 {
1008 	thread_data->pipes.msg[0] = -1;
1009 	thread_data->pipes.msg[1] = -1;
1010 	thread_data->pipes.ack[0] = -1;
1011 	thread_data->pipes.ack[1] = -1;
1012 }
1013 
record__thread_data_open_pipes(struct record_thread * thread_data)1014 static int record__thread_data_open_pipes(struct record_thread *thread_data)
1015 {
1016 	if (pipe(thread_data->pipes.msg))
1017 		return -EINVAL;
1018 
1019 	if (pipe(thread_data->pipes.ack)) {
1020 		close(thread_data->pipes.msg[0]);
1021 		thread_data->pipes.msg[0] = -1;
1022 		close(thread_data->pipes.msg[1]);
1023 		thread_data->pipes.msg[1] = -1;
1024 		return -EINVAL;
1025 	}
1026 
1027 	pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data,
1028 		 thread_data->pipes.msg[0], thread_data->pipes.msg[1],
1029 		 thread_data->pipes.ack[0], thread_data->pipes.ack[1]);
1030 
1031 	return 0;
1032 }
1033 
record__thread_data_close_pipes(struct record_thread * thread_data)1034 static void record__thread_data_close_pipes(struct record_thread *thread_data)
1035 {
1036 	if (thread_data->pipes.msg[0] != -1) {
1037 		close(thread_data->pipes.msg[0]);
1038 		thread_data->pipes.msg[0] = -1;
1039 	}
1040 	if (thread_data->pipes.msg[1] != -1) {
1041 		close(thread_data->pipes.msg[1]);
1042 		thread_data->pipes.msg[1] = -1;
1043 	}
1044 	if (thread_data->pipes.ack[0] != -1) {
1045 		close(thread_data->pipes.ack[0]);
1046 		thread_data->pipes.ack[0] = -1;
1047 	}
1048 	if (thread_data->pipes.ack[1] != -1) {
1049 		close(thread_data->pipes.ack[1]);
1050 		thread_data->pipes.ack[1] = -1;
1051 	}
1052 }
1053 
evlist__per_thread(struct evlist * evlist)1054 static bool evlist__per_thread(struct evlist *evlist)
1055 {
1056 	return cpu_map__is_dummy(evlist->core.user_requested_cpus);
1057 }
1058 
record__thread_data_init_maps(struct record_thread * thread_data,struct evlist * evlist)1059 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist)
1060 {
1061 	int m, tm, nr_mmaps = evlist->core.nr_mmaps;
1062 	struct mmap *mmap = evlist->mmap;
1063 	struct mmap *overwrite_mmap = evlist->overwrite_mmap;
1064 	struct perf_cpu_map *cpus = evlist->core.all_cpus;
1065 	bool per_thread = evlist__per_thread(evlist);
1066 
1067 	if (per_thread)
1068 		thread_data->nr_mmaps = nr_mmaps;
1069 	else
1070 		thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
1071 						      thread_data->mask->maps.nbits);
1072 	if (mmap) {
1073 		thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1074 		if (!thread_data->maps)
1075 			return -ENOMEM;
1076 	}
1077 	if (overwrite_mmap) {
1078 		thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1079 		if (!thread_data->overwrite_maps) {
1080 			zfree(&thread_data->maps);
1081 			return -ENOMEM;
1082 		}
1083 	}
1084 	pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data,
1085 		 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
1086 
1087 	for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
1088 		if (per_thread ||
1089 		    test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) {
1090 			if (thread_data->maps) {
1091 				thread_data->maps[tm] = &mmap[m];
1092 				pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
1093 					  thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1094 			}
1095 			if (thread_data->overwrite_maps) {
1096 				thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
1097 				pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
1098 					  thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1099 			}
1100 			tm++;
1101 		}
1102 	}
1103 
1104 	return 0;
1105 }
1106 
record__thread_data_init_pollfd(struct record_thread * thread_data,struct evlist * evlist)1107 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist)
1108 {
1109 	int f, tm, pos;
1110 	struct mmap *map, *overwrite_map;
1111 
1112 	fdarray__init(&thread_data->pollfd, 64);
1113 
1114 	for (tm = 0; tm < thread_data->nr_mmaps; tm++) {
1115 		map = thread_data->maps ? thread_data->maps[tm] : NULL;
1116 		overwrite_map = thread_data->overwrite_maps ?
1117 				thread_data->overwrite_maps[tm] : NULL;
1118 
1119 		for (f = 0; f < evlist->core.pollfd.nr; f++) {
1120 			void *ptr = evlist->core.pollfd.priv[f].ptr;
1121 
1122 			if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) {
1123 				pos = fdarray__dup_entry_from(&thread_data->pollfd, f,
1124 							      &evlist->core.pollfd);
1125 				if (pos < 0)
1126 					return pos;
1127 				pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n",
1128 					 thread_data, pos, evlist->core.pollfd.entries[f].fd);
1129 			}
1130 		}
1131 	}
1132 
1133 	return 0;
1134 }
1135 
record__free_thread_data(struct record * rec)1136 static void record__free_thread_data(struct record *rec)
1137 {
1138 	int t;
1139 	struct record_thread *thread_data = rec->thread_data;
1140 
1141 	if (thread_data == NULL)
1142 		return;
1143 
1144 	for (t = 0; t < rec->nr_threads; t++) {
1145 		record__thread_data_close_pipes(&thread_data[t]);
1146 		zfree(&thread_data[t].maps);
1147 		zfree(&thread_data[t].overwrite_maps);
1148 		fdarray__exit(&thread_data[t].pollfd);
1149 	}
1150 
1151 	zfree(&rec->thread_data);
1152 }
1153 
record__map_thread_evlist_pollfd_indexes(struct record * rec,int evlist_pollfd_index,int thread_pollfd_index)1154 static int record__map_thread_evlist_pollfd_indexes(struct record *rec,
1155 						    int evlist_pollfd_index,
1156 						    int thread_pollfd_index)
1157 {
1158 	size_t x = rec->index_map_cnt;
1159 
1160 	if (realloc_array_as_needed(rec->index_map, rec->index_map_sz, x, NULL))
1161 		return -ENOMEM;
1162 	rec->index_map[x].evlist_pollfd_index = evlist_pollfd_index;
1163 	rec->index_map[x].thread_pollfd_index = thread_pollfd_index;
1164 	rec->index_map_cnt += 1;
1165 	return 0;
1166 }
1167 
record__update_evlist_pollfd_from_thread(struct record * rec,struct evlist * evlist,struct record_thread * thread_data)1168 static int record__update_evlist_pollfd_from_thread(struct record *rec,
1169 						    struct evlist *evlist,
1170 						    struct record_thread *thread_data)
1171 {
1172 	struct pollfd *e_entries = evlist->core.pollfd.entries;
1173 	struct pollfd *t_entries = thread_data->pollfd.entries;
1174 	int err = 0;
1175 	size_t i;
1176 
1177 	for (i = 0; i < rec->index_map_cnt; i++) {
1178 		int e_pos = rec->index_map[i].evlist_pollfd_index;
1179 		int t_pos = rec->index_map[i].thread_pollfd_index;
1180 
1181 		if (e_entries[e_pos].fd != t_entries[t_pos].fd ||
1182 		    e_entries[e_pos].events != t_entries[t_pos].events) {
1183 			pr_err("Thread and evlist pollfd index mismatch\n");
1184 			err = -EINVAL;
1185 			continue;
1186 		}
1187 		e_entries[e_pos].revents = t_entries[t_pos].revents;
1188 	}
1189 	return err;
1190 }
1191 
record__dup_non_perf_events(struct record * rec,struct evlist * evlist,struct record_thread * thread_data)1192 static int record__dup_non_perf_events(struct record *rec,
1193 				       struct evlist *evlist,
1194 				       struct record_thread *thread_data)
1195 {
1196 	struct fdarray *fda = &evlist->core.pollfd;
1197 	int i, ret;
1198 
1199 	for (i = 0; i < fda->nr; i++) {
1200 		if (!(fda->priv[i].flags & fdarray_flag__non_perf_event))
1201 			continue;
1202 		ret = fdarray__dup_entry_from(&thread_data->pollfd, i, fda);
1203 		if (ret < 0) {
1204 			pr_err("Failed to duplicate descriptor in main thread pollfd\n");
1205 			return ret;
1206 		}
1207 		pr_debug2("thread_data[%p]: pollfd[%d] <- non_perf_event fd=%d\n",
1208 			  thread_data, ret, fda->entries[i].fd);
1209 		ret = record__map_thread_evlist_pollfd_indexes(rec, i, ret);
1210 		if (ret < 0) {
1211 			pr_err("Failed to map thread and evlist pollfd indexes\n");
1212 			return ret;
1213 		}
1214 	}
1215 	return 0;
1216 }
1217 
record__alloc_thread_data(struct record * rec,struct evlist * evlist)1218 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist)
1219 {
1220 	int t, ret;
1221 	struct record_thread *thread_data;
1222 
1223 	rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data)));
1224 	if (!rec->thread_data) {
1225 		pr_err("Failed to allocate thread data\n");
1226 		return -ENOMEM;
1227 	}
1228 	thread_data = rec->thread_data;
1229 
1230 	for (t = 0; t < rec->nr_threads; t++)
1231 		record__thread_data_init_pipes(&thread_data[t]);
1232 
1233 	for (t = 0; t < rec->nr_threads; t++) {
1234 		thread_data[t].rec = rec;
1235 		thread_data[t].mask = &rec->thread_masks[t];
1236 		ret = record__thread_data_init_maps(&thread_data[t], evlist);
1237 		if (ret) {
1238 			pr_err("Failed to initialize thread[%d] maps\n", t);
1239 			goto out_free;
1240 		}
1241 		ret = record__thread_data_init_pollfd(&thread_data[t], evlist);
1242 		if (ret) {
1243 			pr_err("Failed to initialize thread[%d] pollfd\n", t);
1244 			goto out_free;
1245 		}
1246 		if (t) {
1247 			thread_data[t].tid = -1;
1248 			ret = record__thread_data_open_pipes(&thread_data[t]);
1249 			if (ret) {
1250 				pr_err("Failed to open thread[%d] communication pipes\n", t);
1251 				goto out_free;
1252 			}
1253 			ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0],
1254 					   POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable);
1255 			if (ret < 0) {
1256 				pr_err("Failed to add descriptor to thread[%d] pollfd\n", t);
1257 				goto out_free;
1258 			}
1259 			thread_data[t].ctlfd_pos = ret;
1260 			pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1261 				 thread_data, thread_data[t].ctlfd_pos,
1262 				 thread_data[t].pipes.msg[0]);
1263 		} else {
1264 			thread_data[t].tid = gettid();
1265 
1266 			ret = record__dup_non_perf_events(rec, evlist, &thread_data[t]);
1267 			if (ret < 0)
1268 				goto out_free;
1269 
1270 			thread_data[t].ctlfd_pos = -1; /* Not used */
1271 		}
1272 	}
1273 
1274 	return 0;
1275 
1276 out_free:
1277 	record__free_thread_data(rec);
1278 
1279 	return ret;
1280 }
1281 
record__mmap_evlist(struct record * rec,struct evlist * evlist)1282 static int record__mmap_evlist(struct record *rec,
1283 			       struct evlist *evlist)
1284 {
1285 	int i, ret;
1286 	struct record_opts *opts = &rec->opts;
1287 	bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
1288 				  opts->auxtrace_sample_mode;
1289 
1290 	if (opts->affinity != PERF_AFFINITY_SYS)
1291 		cpu__setup_cpunode_map();
1292 
1293 	if (evlist__mmap_ex(evlist, opts->mmap_pages,
1294 				 opts->auxtrace_mmap_pages,
1295 				 auxtrace_overwrite,
1296 				 opts->nr_cblocks, opts->affinity,
1297 				 opts->mmap_flush, opts->comp_level) < 0) {
1298 		if (errno == EPERM) {
1299 			pr_err("Permission error mapping pages.\n"
1300 			       "Consider increasing "
1301 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
1302 			       "or try again with a smaller value of -m/--mmap_pages.\n"
1303 			       "(current value: %u,%u)\n",
1304 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
1305 			return -errno;
1306 		} else {
1307 			pr_err("failed to mmap: %m\n");
1308 			if (errno)
1309 				return -errno;
1310 			else
1311 				return -EINVAL;
1312 		}
1313 	}
1314 
1315 	if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack))
1316 		return -1;
1317 
1318 	ret = record__alloc_thread_data(rec, evlist);
1319 	if (ret)
1320 		return ret;
1321 
1322 	if (record__threads_enabled(rec)) {
1323 		ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps);
1324 		if (ret) {
1325 			errno = -ret;
1326 			pr_err("Failed to create data directory: %m\n");
1327 			return ret;
1328 		}
1329 		for (i = 0; i < evlist->core.nr_mmaps; i++) {
1330 			if (evlist->mmap)
1331 				evlist->mmap[i].file = &rec->data.dir.files[i];
1332 			if (evlist->overwrite_mmap)
1333 				evlist->overwrite_mmap[i].file = &rec->data.dir.files[i];
1334 		}
1335 	}
1336 
1337 	return 0;
1338 }
1339 
record__mmap(struct record * rec)1340 static int record__mmap(struct record *rec)
1341 {
1342 	return record__mmap_evlist(rec, rec->evlist);
1343 }
1344 
record__open(struct record * rec)1345 static int record__open(struct record *rec)
1346 {
1347 	char msg[BUFSIZ];
1348 	struct evsel *pos;
1349 	struct evlist *evlist = rec->evlist;
1350 	struct perf_session *session = rec->session;
1351 	struct record_opts *opts = &rec->opts;
1352 	int rc = 0;
1353 	bool skipped = false;
1354 	bool removed_tracking = false;
1355 
1356 	evlist__for_each_entry(evlist, pos) {
1357 		if (removed_tracking) {
1358 			/*
1359 			 * Normally the head of the list has tracking enabled
1360 			 * for sideband data like mmaps. If this event is
1361 			 * removed, make sure to add tracking to the next
1362 			 * processed event.
1363 			 */
1364 			if (!pos->tracking) {
1365 				pos->tracking = true;
1366 				evsel__config(pos, opts, &callchain_param);
1367 			}
1368 			removed_tracking = false;
1369 		}
1370 try_again:
1371 		if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
1372 			bool report_error = true;
1373 
1374 			if (evsel__fallback(pos, &opts->target, errno, msg, sizeof(msg))) {
1375 				if (verbose > 0)
1376 					ui__warning("%s\n", msg);
1377 				goto try_again;
1378 			}
1379 			if ((errno == EINVAL || errno == EBADF) &&
1380 			    pos->core.leader != &pos->core &&
1381 			    pos->weak_group) {
1382 			        pos = evlist__reset_weak_group(evlist, pos, true);
1383 				goto try_again;
1384 			}
1385 #if defined(__aarch64__) || defined(__arm__)
1386 			if (strstr(evsel__name(pos), "cycles")) {
1387 				struct evsel *pos2;
1388 				/*
1389 				 * Unfortunately ARM has many events named
1390 				 * "cycles" on PMUs like the system-level (L3)
1391 				 * cache which don't support sampling. Only
1392 				 * display such failures to open when there is
1393 				 * only 1 cycles event or verbose is enabled.
1394 				 */
1395 				evlist__for_each_entry(evlist, pos2) {
1396 					if (pos2 == pos)
1397 						continue;
1398 					if (strstr(evsel__name(pos2), "cycles")) {
1399 						report_error = false;
1400 						break;
1401 					}
1402 				}
1403 			}
1404 #endif
1405 			if (report_error || verbose > 0) {
1406 				evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
1407 				ui__error("Failure to open event '%s' on PMU '%s' which will be "
1408 					  "removed.\n%s\n",
1409 					  evsel__name(pos), evsel__pmu_name(pos), msg);
1410 			}
1411 			if (pos->tracking)
1412 				removed_tracking = true;
1413 			pos->skippable = true;
1414 			skipped = true;
1415 		}
1416 	}
1417 
1418 	if (skipped) {
1419 		struct evsel *tmp;
1420 		int idx = 0;
1421 		bool evlist_empty = true;
1422 
1423 		/* Remove evsels that failed to open and update indices. */
1424 		evlist__for_each_entry_safe(evlist, tmp, pos) {
1425 			if (pos->skippable) {
1426 				evlist__remove(evlist, pos);
1427 				continue;
1428 			}
1429 
1430 			/*
1431 			 * Note, dummy events may be command line parsed or
1432 			 * added by the tool. We care about supporting `perf
1433 			 * record -e dummy` which may be used as a permission
1434 			 * check. Dummy events that are added to the command
1435 			 * line and opened along with other events that fail,
1436 			 * will still fail as if the dummy events were tool
1437 			 * added events for the sake of code simplicity.
1438 			 */
1439 			if (!evsel__is_dummy_event(pos))
1440 				evlist_empty = false;
1441 		}
1442 		evlist__for_each_entry(evlist, pos) {
1443 			pos->core.idx = idx++;
1444 		}
1445 		/* If list is empty then fail. */
1446 		if (evlist_empty) {
1447 			ui__error("Failure to open any events for recording.\n");
1448 			rc = -1;
1449 			goto out;
1450 		}
1451 	}
1452 	if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {
1453 		pr_warning(
1454 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1455 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1456 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1457 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1458 "Samples in kernel modules won't be resolved at all.\n\n"
1459 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1460 "even with a suitable vmlinux or kallsyms file.\n\n");
1461 	}
1462 
1463 	if (evlist__apply_filters(evlist, &pos, &opts->target)) {
1464 		pr_err("failed to set filter \"%s\" on event %s: %m\n",
1465 			pos->filter ?: "BPF", evsel__name(pos));
1466 		rc = -1;
1467 		goto out;
1468 	}
1469 
1470 	rc = record__mmap(rec);
1471 	if (rc)
1472 		goto out;
1473 
1474 	session->evlist = evlist;
1475 	perf_session__set_id_hdr_size(session);
1476 out:
1477 	return rc;
1478 }
1479 
set_timestamp_boundary(struct record * rec,u64 sample_time)1480 static void set_timestamp_boundary(struct record *rec, u64 sample_time)
1481 {
1482 	if (rec->evlist->first_sample_time == 0)
1483 		rec->evlist->first_sample_time = sample_time;
1484 
1485 	if (sample_time)
1486 		rec->evlist->last_sample_time = sample_time;
1487 }
1488 
process_sample_event(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct evsel * evsel,struct machine * machine)1489 static int process_sample_event(const struct perf_tool *tool,
1490 				union perf_event *event,
1491 				struct perf_sample *sample,
1492 				struct evsel *evsel,
1493 				struct machine *machine)
1494 {
1495 	struct record *rec = container_of(tool, struct record, tool);
1496 
1497 	set_timestamp_boundary(rec, sample->time);
1498 
1499 	if (rec->buildid_all)
1500 		return 0;
1501 
1502 	rec->samples++;
1503 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
1504 }
1505 
process_buildids(struct record * rec)1506 static int process_buildids(struct record *rec)
1507 {
1508 	struct perf_session *session = rec->session;
1509 
1510 	if (perf_data__size(&rec->data) == 0)
1511 		return 0;
1512 
1513 	/* A single DSO is needed and not all inline frames. */
1514 	symbol_conf.inline_name = false;
1515 	/*
1516 	 * During this process, it'll load kernel map and replace the
1517 	 * dso->long_name to a real pathname it found.  In this case
1518 	 * we prefer the vmlinux path like
1519 	 *   /lib/modules/3.16.4/build/vmlinux
1520 	 *
1521 	 * rather than build-id path (in debug directory).
1522 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
1523 	 */
1524 	symbol_conf.ignore_vmlinux_buildid = true;
1525 	/*
1526 	 * If --buildid-all is given, it marks all DSO regardless of hits,
1527 	 * so no need to process samples. But if timestamp_boundary is enabled,
1528 	 * it still needs to walk on all samples to get the timestamps of
1529 	 * first/last samples.
1530 	 */
1531 	if (rec->buildid_all && !rec->timestamp_boundary)
1532 		rec->tool.sample = process_event_sample_stub;
1533 
1534 	return perf_session__process_events(session);
1535 }
1536 
perf_event__synthesize_guest_os(struct machine * machine,void * data)1537 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
1538 {
1539 	int err;
1540 	struct perf_tool *tool = data;
1541 	/*
1542 	 *As for guest kernel when processing subcommand record&report,
1543 	 *we arrange module mmap prior to guest kernel mmap and trigger
1544 	 *a preload dso because default guest module symbols are loaded
1545 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
1546 	 *method is used to avoid symbol missing when the first addr is
1547 	 *in module instead of in guest kernel.
1548 	 */
1549 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
1550 					     machine);
1551 	if (err < 0)
1552 		pr_err("Couldn't record guest kernel [%d]'s reference"
1553 		       " relocation symbol.\n", machine->pid);
1554 
1555 	/*
1556 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
1557 	 * have no _text sometimes.
1558 	 */
1559 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1560 						 machine);
1561 	if (err < 0)
1562 		pr_err("Couldn't record guest kernel [%d]'s reference"
1563 		       " relocation symbol.\n", machine->pid);
1564 }
1565 
1566 static struct perf_event_header finished_round_event = {
1567 	.size = sizeof(struct perf_event_header),
1568 	.type = PERF_RECORD_FINISHED_ROUND,
1569 };
1570 
1571 static struct perf_event_header finished_init_event = {
1572 	.size = sizeof(struct perf_event_header),
1573 	.type = PERF_RECORD_FINISHED_INIT,
1574 };
1575 
record__adjust_affinity(struct record * rec,struct mmap * map)1576 static void record__adjust_affinity(struct record *rec, struct mmap *map)
1577 {
1578 	if (rec->opts.affinity != PERF_AFFINITY_SYS &&
1579 	    !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits,
1580 			  thread->mask->affinity.nbits)) {
1581 		bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits);
1582 		bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits,
1583 			  map->affinity_mask.bits, thread->mask->affinity.nbits);
1584 		sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
1585 					(cpu_set_t *)thread->mask->affinity.bits);
1586 		if (verbose == 2) {
1587 			pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu());
1588 			mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity");
1589 		}
1590 	}
1591 }
1592 
process_comp_header(void * record,size_t increment)1593 static size_t process_comp_header(void *record, size_t increment)
1594 {
1595 	struct perf_record_compressed2 *event = record;
1596 	size_t size = sizeof(*event);
1597 
1598 	if (increment) {
1599 		event->header.size += increment;
1600 		return increment;
1601 	}
1602 
1603 	event->header.type = PERF_RECORD_COMPRESSED2;
1604 	event->header.size = size;
1605 
1606 	return size;
1607 }
1608 
zstd_compress(struct perf_session * session,struct mmap * map,void * dst,size_t dst_size,void * src,size_t src_size)1609 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map,
1610 			    void *dst, size_t dst_size, void *src, size_t src_size)
1611 {
1612 	ssize_t compressed;
1613 	size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed2) - 1;
1614 	struct zstd_data *zstd_data = &session->zstd_data;
1615 
1616 	if (map && map->file)
1617 		zstd_data = &map->zstd_data;
1618 
1619 	compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size,
1620 						     max_record_size, process_comp_header);
1621 	if (compressed < 0)
1622 		return compressed;
1623 
1624 	if (map && map->file) {
1625 		thread->bytes_transferred += src_size;
1626 		thread->bytes_compressed  += compressed;
1627 	} else {
1628 		session->bytes_transferred += src_size;
1629 		session->bytes_compressed  += compressed;
1630 	}
1631 
1632 	return compressed;
1633 }
1634 
record__mmap_read_evlist(struct record * rec,struct evlist * evlist,bool overwrite,bool synch)1635 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1636 				    bool overwrite, bool synch)
1637 {
1638 	u64 bytes_written = rec->bytes_written;
1639 	int i;
1640 	int rc = 0;
1641 	int nr_mmaps;
1642 	struct mmap **maps;
1643 	int trace_fd = rec->data.file.fd;
1644 	off_t off = 0;
1645 
1646 	if (!evlist)
1647 		return 0;
1648 
1649 	nr_mmaps = thread->nr_mmaps;
1650 	maps = overwrite ? thread->overwrite_maps : thread->maps;
1651 
1652 	if (!maps)
1653 		return 0;
1654 
1655 	if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1656 		return 0;
1657 
1658 	if (record__aio_enabled(rec))
1659 		off = record__aio_get_pos(trace_fd);
1660 
1661 	for (i = 0; i < nr_mmaps; i++) {
1662 		u64 flush = 0;
1663 		struct mmap *map = maps[i];
1664 
1665 		if (map->core.base) {
1666 			record__adjust_affinity(rec, map);
1667 			if (synch) {
1668 				flush = map->core.flush;
1669 				map->core.flush = 1;
1670 			}
1671 			if (!record__aio_enabled(rec)) {
1672 				if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1673 					if (synch)
1674 						map->core.flush = flush;
1675 					rc = -1;
1676 					goto out;
1677 				}
1678 			} else {
1679 				if (record__aio_push(rec, map, &off) < 0) {
1680 					record__aio_set_pos(trace_fd, off);
1681 					if (synch)
1682 						map->core.flush = flush;
1683 					rc = -1;
1684 					goto out;
1685 				}
1686 			}
1687 			if (synch)
1688 				map->core.flush = flush;
1689 		}
1690 
1691 		if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1692 		    !rec->opts.auxtrace_sample_mode &&
1693 		    record__auxtrace_mmap_read(rec, map) != 0) {
1694 			rc = -1;
1695 			goto out;
1696 		}
1697 	}
1698 
1699 	if (record__aio_enabled(rec))
1700 		record__aio_set_pos(trace_fd, off);
1701 
1702 	/*
1703 	 * Mark the round finished in case we wrote
1704 	 * at least one event.
1705 	 *
1706 	 * No need for round events in directory mode,
1707 	 * because per-cpu maps and files have data
1708 	 * sorted by kernel.
1709 	 */
1710 	if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written)
1711 		rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1712 
1713 	if (overwrite)
1714 		evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1715 out:
1716 	return rc;
1717 }
1718 
record__mmap_read_all(struct record * rec,bool synch)1719 static int record__mmap_read_all(struct record *rec, bool synch)
1720 {
1721 	int err;
1722 
1723 	err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1724 	if (err)
1725 		return err;
1726 
1727 	return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1728 }
1729 
record__thread_munmap_filtered(struct fdarray * fda,int fd,void * arg __maybe_unused)1730 static void record__thread_munmap_filtered(struct fdarray *fda, int fd,
1731 					   void *arg __maybe_unused)
1732 {
1733 	struct perf_mmap *map = fda->priv[fd].ptr;
1734 
1735 	if (map)
1736 		perf_mmap__put(map);
1737 }
1738 
record__thread(void * arg)1739 static void *record__thread(void *arg)
1740 {
1741 	enum thread_msg msg = THREAD_MSG__READY;
1742 	bool terminate = false;
1743 	struct fdarray *pollfd;
1744 	int err, ctlfd_pos;
1745 
1746 	thread = arg;
1747 	thread->tid = gettid();
1748 
1749 	err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1750 	if (err == -1)
1751 		pr_warning("threads[%d]: failed to notify on start: %m\n", thread->tid);
1752 
1753 	pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
1754 
1755 	pollfd = &thread->pollfd;
1756 	ctlfd_pos = thread->ctlfd_pos;
1757 
1758 	for (;;) {
1759 		unsigned long long hits = thread->samples;
1760 
1761 		if (record__mmap_read_all(thread->rec, false) < 0 || terminate)
1762 			break;
1763 
1764 		if (hits == thread->samples) {
1765 
1766 			err = fdarray__poll(pollfd, -1);
1767 			/*
1768 			 * Propagate error, only if there's any. Ignore positive
1769 			 * number of returned events and interrupt error.
1770 			 */
1771 			if (err > 0 || (err < 0 && errno == EINTR))
1772 				err = 0;
1773 			thread->waking++;
1774 
1775 			if (fdarray__filter(pollfd, POLLERR | POLLHUP,
1776 					    record__thread_munmap_filtered, NULL) == 0)
1777 				break;
1778 		}
1779 
1780 		if (pollfd->entries[ctlfd_pos].revents & POLLHUP) {
1781 			terminate = true;
1782 			close(thread->pipes.msg[0]);
1783 			thread->pipes.msg[0] = -1;
1784 			pollfd->entries[ctlfd_pos].fd = -1;
1785 			pollfd->entries[ctlfd_pos].events = 0;
1786 		}
1787 
1788 		pollfd->entries[ctlfd_pos].revents = 0;
1789 	}
1790 	record__mmap_read_all(thread->rec, true);
1791 
1792 	err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1793 	if (err == -1)
1794 		pr_warning("threads[%d]: failed to notify on termination: %m\n", thread->tid);
1795 
1796 	return NULL;
1797 }
1798 
record__init_features(struct record * rec)1799 static void record__init_features(struct record *rec)
1800 {
1801 	struct perf_session *session = rec->session;
1802 	int feat;
1803 
1804 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1805 		perf_header__set_feat(&session->header, feat);
1806 
1807 	if (rec->no_buildid)
1808 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1809 
1810 	if (!have_tracepoints(&rec->evlist->core.entries))
1811 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1812 
1813 	if (!rec->opts.branch_stack)
1814 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1815 
1816 	if (!rec->opts.full_auxtrace)
1817 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1818 
1819 	if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1820 		perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1821 
1822 	if (!rec->opts.use_clockid)
1823 		perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA);
1824 
1825 	if (!record__threads_enabled(rec))
1826 		perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1827 
1828 	if (!record__comp_enabled(rec))
1829 		perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1830 
1831 	perf_header__clear_feat(&session->header, HEADER_STAT);
1832 }
1833 
1834 static void
record__finish_output(struct record * rec)1835 record__finish_output(struct record *rec)
1836 {
1837 	int i;
1838 	struct perf_data *data = &rec->data;
1839 	int fd = perf_data__fd(data);
1840 
1841 	if (data->is_pipe) {
1842 		/* Just to display approx. size */
1843 		data->file.size = rec->bytes_written;
1844 		return;
1845 	}
1846 
1847 	rec->session->header.data_size += rec->bytes_written;
1848 	data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1849 	if (record__threads_enabled(rec)) {
1850 		for (i = 0; i < data->dir.nr; i++)
1851 			data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR);
1852 	}
1853 
1854 	/* Buildid scanning disabled or build ID in kernel and synthesized map events. */
1855 	if (!rec->no_buildid || !rec->no_buildid_cache) {
1856 		process_buildids(rec);
1857 
1858 		if (rec->buildid_all)
1859 			perf_session__dsos_hit_all(rec->session);
1860 	}
1861 	perf_session__write_header(rec->session, rec->evlist, fd, true);
1862 	perf_session__cache_build_ids(rec->session);
1863 }
1864 
record__synthesize_workload(struct record * rec,bool tail)1865 static int record__synthesize_workload(struct record *rec, bool tail)
1866 {
1867 	int err;
1868 	struct perf_thread_map *thread_map;
1869 	bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1870 
1871 	if (rec->opts.tail_synthesize != tail)
1872 		return 0;
1873 
1874 	thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1875 	if (thread_map == NULL)
1876 		return -1;
1877 
1878 	err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1879 						 process_synthesized_event,
1880 						 &rec->session->machines.host,
1881 						 needs_mmap,
1882 						 rec->opts.record_data_mmap);
1883 	perf_thread_map__put(thread_map);
1884 	return err;
1885 }
1886 
write_finished_init(struct record * rec,bool tail)1887 static int write_finished_init(struct record *rec, bool tail)
1888 {
1889 	if (rec->opts.tail_synthesize != tail)
1890 		return 0;
1891 
1892 	return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event));
1893 }
1894 
1895 static int record__synthesize(struct record *rec, bool tail);
1896 
1897 static int
record__switch_output(struct record * rec,bool at_exit)1898 record__switch_output(struct record *rec, bool at_exit)
1899 {
1900 	struct perf_data *data = &rec->data;
1901 	char *new_filename = NULL;
1902 	int fd, err;
1903 
1904 	/* Same Size:      "2015122520103046"*/
1905 	char timestamp[] = "InvalidTimestamp";
1906 
1907 	record__aio_mmap_read_sync(rec);
1908 
1909 	write_finished_init(rec, true);
1910 
1911 	record__synthesize(rec, true);
1912 	if (target__none(&rec->opts.target))
1913 		record__synthesize_workload(rec, true);
1914 
1915 	rec->samples = 0;
1916 	record__finish_output(rec);
1917 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1918 	if (err) {
1919 		pr_err("Failed to get current timestamp\n");
1920 		return -EINVAL;
1921 	}
1922 
1923 	fd = perf_data__switch(data, timestamp,
1924 			       rec->session->header.data_offset,
1925 			       at_exit, &new_filename);
1926 	if (fd >= 0 && !at_exit) {
1927 		rec->bytes_written = 0;
1928 		rec->session->header.data_size = 0;
1929 	}
1930 
1931 	if (!quiet) {
1932 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1933 			data->path, timestamp);
1934 	}
1935 
1936 	if (rec->switch_output.num_files) {
1937 		int n = rec->switch_output.cur_file + 1;
1938 
1939 		if (n >= rec->switch_output.num_files)
1940 			n = 0;
1941 		rec->switch_output.cur_file = n;
1942 		if (rec->switch_output.filenames[n]) {
1943 			remove(rec->switch_output.filenames[n]);
1944 			zfree(&rec->switch_output.filenames[n]);
1945 		}
1946 		rec->switch_output.filenames[n] = new_filename;
1947 	} else {
1948 		free(new_filename);
1949 	}
1950 
1951 	/* Output tracking events */
1952 	if (!at_exit) {
1953 		record__synthesize(rec, false);
1954 
1955 		/*
1956 		 * In 'perf record --switch-output' without -a,
1957 		 * record__synthesize() in record__switch_output() won't
1958 		 * generate tracking events because there's no thread_map
1959 		 * in evlist. Which causes newly created perf.data doesn't
1960 		 * contain map and comm information.
1961 		 * Create a fake thread_map and directly call
1962 		 * perf_event__synthesize_thread_map() for those events.
1963 		 */
1964 		if (target__none(&rec->opts.target))
1965 			record__synthesize_workload(rec, false);
1966 		write_finished_init(rec, false);
1967 	}
1968 	return fd;
1969 }
1970 
__record__save_lost_samples(struct record * rec,struct evsel * evsel,struct perf_record_lost_samples * lost,int cpu_idx,int thread_idx,u64 lost_count,u16 misc_flag)1971 static void __record__save_lost_samples(struct record *rec, struct evsel *evsel,
1972 					struct perf_record_lost_samples *lost,
1973 					int cpu_idx, int thread_idx, u64 lost_count,
1974 					u16 misc_flag)
1975 {
1976 	struct perf_sample_id *sid;
1977 	struct perf_sample sample;
1978 	int id_hdr_size;
1979 
1980 	perf_sample__init(&sample, /*all=*/true);
1981 	lost->lost = lost_count;
1982 	if (evsel->core.ids) {
1983 		sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx);
1984 		sample.id = sid->id;
1985 	}
1986 
1987 	id_hdr_size = perf_event__synthesize_id_sample((void *)(lost + 1),
1988 						       evsel->core.attr.sample_type, &sample);
1989 	lost->header.size = sizeof(*lost) + id_hdr_size;
1990 	lost->header.misc = misc_flag;
1991 	record__write(rec, NULL, lost, lost->header.size);
1992 	perf_sample__exit(&sample);
1993 }
1994 
record__read_lost_samples(struct record * rec)1995 static void record__read_lost_samples(struct record *rec)
1996 {
1997 	struct perf_session *session = rec->session;
1998 	struct perf_record_lost_samples_and_ids lost;
1999 	struct evsel *evsel;
2000 
2001 	/* there was an error during record__open */
2002 	if (session->evlist == NULL)
2003 		return;
2004 
2005 	evlist__for_each_entry(session->evlist, evsel) {
2006 		struct xyarray *xy = evsel->core.sample_id;
2007 		u64 lost_count;
2008 
2009 		if (xy == NULL || evsel->core.fd == NULL)
2010 			continue;
2011 		if (xyarray__max_x(evsel->core.fd) != xyarray__max_x(xy) ||
2012 		    xyarray__max_y(evsel->core.fd) != xyarray__max_y(xy)) {
2013 			pr_debug("Unmatched FD vs. sample ID: skip reading LOST count\n");
2014 			continue;
2015 		}
2016 
2017 		for (int x = 0; x < xyarray__max_x(xy); x++) {
2018 			for (int y = 0; y < xyarray__max_y(xy); y++) {
2019 				struct perf_counts_values count;
2020 
2021 				if (perf_evsel__read(&evsel->core, x, y, &count) < 0) {
2022 					pr_debug("read LOST count failed\n");
2023 					return;
2024 				}
2025 
2026 				if (count.lost) {
2027 					memset(&lost, 0, sizeof(lost));
2028 					lost.lost.header.type = PERF_RECORD_LOST_SAMPLES;
2029 					__record__save_lost_samples(rec, evsel, &lost.lost,
2030 								    x, y, count.lost, 0);
2031 				}
2032 			}
2033 		}
2034 
2035 		lost_count = perf_bpf_filter__lost_count(evsel);
2036 		if (lost_count) {
2037 			memset(&lost, 0, sizeof(lost));
2038 			lost.lost.header.type = PERF_RECORD_LOST_SAMPLES;
2039 			__record__save_lost_samples(rec, evsel, &lost.lost, 0, 0, lost_count,
2040 						    PERF_RECORD_MISC_LOST_SAMPLES_BPF);
2041 		}
2042 	}
2043 }
2044 
2045 static volatile sig_atomic_t workload_exec_errno;
2046 
2047 /*
2048  * evlist__prepare_workload will send a SIGUSR1
2049  * if the fork fails, since we asked by setting its
2050  * want_signal to true.
2051  */
workload_exec_failed_signal(int signo __maybe_unused,siginfo_t * info,void * ucontext __maybe_unused)2052 static void workload_exec_failed_signal(int signo __maybe_unused,
2053 					siginfo_t *info,
2054 					void *ucontext __maybe_unused)
2055 {
2056 	workload_exec_errno = info->si_value.sival_int;
2057 	done = 1;
2058 	child_finished = 1;
2059 }
2060 
2061 static void snapshot_sig_handler(int sig);
2062 static void alarm_sig_handler(int sig);
2063 
evlist__pick_pc(struct evlist * evlist)2064 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist)
2065 {
2066 	if (evlist) {
2067 		if (evlist->mmap && evlist->mmap[0].core.base)
2068 			return evlist->mmap[0].core.base;
2069 		if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
2070 			return evlist->overwrite_mmap[0].core.base;
2071 	}
2072 	return NULL;
2073 }
2074 
record__pick_pc(struct record * rec)2075 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
2076 {
2077 	const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist);
2078 	if (pc)
2079 		return pc;
2080 	return NULL;
2081 }
2082 
record__synthesize(struct record * rec,bool tail)2083 static int record__synthesize(struct record *rec, bool tail)
2084 {
2085 	struct perf_session *session = rec->session;
2086 	struct machine *machine = &session->machines.host;
2087 	struct perf_data *data = &rec->data;
2088 	struct record_opts *opts = &rec->opts;
2089 	struct perf_tool *tool = &rec->tool;
2090 	int err = 0;
2091 	event_op f = process_synthesized_event;
2092 
2093 	if (rec->opts.tail_synthesize != tail)
2094 		return 0;
2095 
2096 	if (data->is_pipe) {
2097 		err = perf_event__synthesize_for_pipe(tool, session, data,
2098 						      process_synthesized_event);
2099 		if (err < 0)
2100 			goto out;
2101 
2102 		rec->bytes_written += err;
2103 	}
2104 
2105 	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
2106 					  process_synthesized_event, machine);
2107 	if (err)
2108 		goto out;
2109 
2110 	/* Synthesize id_index before auxtrace_info */
2111 	err = perf_event__synthesize_id_index(tool,
2112 					      process_synthesized_event,
2113 					      session->evlist, machine);
2114 	if (err)
2115 		goto out;
2116 
2117 	if (rec->opts.full_auxtrace) {
2118 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
2119 					session, process_synthesized_event);
2120 		if (err)
2121 			goto out;
2122 	}
2123 
2124 	if (!evlist__exclude_kernel(rec->evlist)) {
2125 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
2126 							 machine);
2127 		WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
2128 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
2129 				   "Check /proc/kallsyms permission or run as root.\n");
2130 
2131 		err = perf_event__synthesize_modules(tool, process_synthesized_event,
2132 						     machine);
2133 		WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
2134 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
2135 				   "Check /proc/modules permission or run as root.\n");
2136 	}
2137 
2138 	if (perf_guest) {
2139 		machines__process_guests(&session->machines,
2140 					 perf_event__synthesize_guest_os, tool);
2141 	}
2142 
2143 	err = perf_event__synthesize_extra_attr(&rec->tool,
2144 						rec->evlist,
2145 						process_synthesized_event,
2146 						data->is_pipe);
2147 	if (err)
2148 		goto out;
2149 
2150 	err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
2151 						 process_synthesized_event,
2152 						NULL);
2153 	if (err < 0) {
2154 		pr_err("Couldn't synthesize thread map.\n");
2155 		return err;
2156 	}
2157 
2158 	err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus,
2159 					     process_synthesized_event, NULL);
2160 	if (err < 0) {
2161 		pr_err("Couldn't synthesize cpu map.\n");
2162 		return err;
2163 	}
2164 
2165 	err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
2166 						machine, opts);
2167 	if (err < 0) {
2168 		pr_warning("Couldn't synthesize bpf events.\n");
2169 		err = 0;
2170 	}
2171 
2172 	if (rec->opts.synth & PERF_SYNTH_CGROUP) {
2173 		err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
2174 						     machine);
2175 		if (err < 0) {
2176 			pr_warning("Couldn't synthesize cgroup events.\n");
2177 			err = 0;
2178 		}
2179 	}
2180 
2181 	if (rec->opts.nr_threads_synthesize > 1) {
2182 		mutex_init(&synth_lock);
2183 		perf_set_multithreaded();
2184 		f = process_locked_synthesized_event;
2185 	}
2186 
2187 	if (rec->opts.synth & PERF_SYNTH_TASK) {
2188 		bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
2189 
2190 		err = __machine__synthesize_threads(machine, tool, &opts->target,
2191 						    rec->evlist->core.threads,
2192 						    f, needs_mmap, opts->record_data_mmap,
2193 						    rec->opts.nr_threads_synthesize);
2194 	}
2195 
2196 	if (rec->opts.nr_threads_synthesize > 1) {
2197 		perf_set_singlethreaded();
2198 		mutex_destroy(&synth_lock);
2199 	}
2200 
2201 out:
2202 	return err;
2203 }
2204 
record__synthesize_final_bpf_metadata(struct record * rec __maybe_unused)2205 static void record__synthesize_final_bpf_metadata(struct record *rec __maybe_unused)
2206 {
2207 #ifdef HAVE_LIBBPF_SUPPORT
2208 	perf_event__synthesize_final_bpf_metadata(rec->session,
2209 						  process_synthesized_event);
2210 #endif
2211 }
2212 
record__process_signal_event(union perf_event * event __maybe_unused,void * data)2213 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
2214 {
2215 	struct record *rec = data;
2216 	pthread_kill(rec->thread_id, SIGUSR2);
2217 	return 0;
2218 }
2219 
record__setup_sb_evlist(struct record * rec)2220 static int record__setup_sb_evlist(struct record *rec)
2221 {
2222 	struct record_opts *opts = &rec->opts;
2223 
2224 	if (rec->sb_evlist != NULL) {
2225 		/*
2226 		 * We get here if --switch-output-event populated the
2227 		 * sb_evlist, so associate a callback that will send a SIGUSR2
2228 		 * to the main thread.
2229 		 */
2230 		evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
2231 		rec->thread_id = pthread_self();
2232 	}
2233 #ifdef HAVE_LIBBPF_SUPPORT
2234 	if (!opts->no_bpf_event) {
2235 		if (rec->sb_evlist == NULL) {
2236 			rec->sb_evlist = evlist__new();
2237 
2238 			if (rec->sb_evlist == NULL) {
2239 				pr_err("Couldn't create side band evlist.\n.");
2240 				return -1;
2241 			}
2242 		}
2243 
2244 		if (evlist__add_bpf_sb_event(rec->sb_evlist, perf_session__env(rec->session))) {
2245 			pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
2246 			return -1;
2247 		}
2248 	}
2249 #endif
2250 	if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
2251 		pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
2252 		opts->no_bpf_event = true;
2253 	}
2254 
2255 	return 0;
2256 }
2257 
record__init_clock(struct record * rec)2258 static int record__init_clock(struct record *rec)
2259 {
2260 	struct perf_session *session = rec->session;
2261 	struct timespec ref_clockid;
2262 	struct timeval ref_tod;
2263 	struct perf_env *env = perf_session__env(session);
2264 	u64 ref;
2265 
2266 	if (!rec->opts.use_clockid)
2267 		return 0;
2268 
2269 	if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
2270 		env->clock.clockid_res_ns = rec->opts.clockid_res_ns;
2271 
2272 	env->clock.clockid = rec->opts.clockid;
2273 
2274 	if (gettimeofday(&ref_tod, NULL) != 0) {
2275 		pr_err("gettimeofday failed, cannot set reference time.\n");
2276 		return -1;
2277 	}
2278 
2279 	if (clock_gettime(rec->opts.clockid, &ref_clockid)) {
2280 		pr_err("clock_gettime failed, cannot set reference time.\n");
2281 		return -1;
2282 	}
2283 
2284 	ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC +
2285 	      (u64) ref_tod.tv_usec * NSEC_PER_USEC;
2286 
2287 	env->clock.tod_ns = ref;
2288 
2289 	ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC +
2290 	      (u64) ref_clockid.tv_nsec;
2291 
2292 	env->clock.clockid_ns = ref;
2293 	return 0;
2294 }
2295 
hit_auxtrace_snapshot_trigger(struct record * rec)2296 static void hit_auxtrace_snapshot_trigger(struct record *rec)
2297 {
2298 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2299 		trigger_hit(&auxtrace_snapshot_trigger);
2300 		auxtrace_record__snapshot_started = 1;
2301 		if (auxtrace_record__snapshot_start(rec->itr))
2302 			trigger_error(&auxtrace_snapshot_trigger);
2303 	}
2304 }
2305 
record__terminate_thread(struct record_thread * thread_data)2306 static int record__terminate_thread(struct record_thread *thread_data)
2307 {
2308 	int err;
2309 	enum thread_msg ack = THREAD_MSG__UNDEFINED;
2310 	pid_t tid = thread_data->tid;
2311 
2312 	close(thread_data->pipes.msg[1]);
2313 	thread_data->pipes.msg[1] = -1;
2314 	err = read(thread_data->pipes.ack[0], &ack, sizeof(ack));
2315 	if (err > 0)
2316 		pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]);
2317 	else
2318 		pr_warning("threads[%d]: failed to receive termination notification from %d\n",
2319 			   thread->tid, tid);
2320 
2321 	return 0;
2322 }
2323 
record__start_threads(struct record * rec)2324 static int record__start_threads(struct record *rec)
2325 {
2326 	int t, tt, err, ret = 0, nr_threads = rec->nr_threads;
2327 	struct record_thread *thread_data = rec->thread_data;
2328 	sigset_t full, mask;
2329 	pthread_t handle;
2330 	pthread_attr_t attrs;
2331 
2332 	thread = &thread_data[0];
2333 
2334 	if (!record__threads_enabled(rec))
2335 		return 0;
2336 
2337 	sigfillset(&full);
2338 	if (sigprocmask(SIG_SETMASK, &full, &mask)) {
2339 		pr_err("Failed to block signals on threads start: %m\n");
2340 		return -1;
2341 	}
2342 
2343 	pthread_attr_init(&attrs);
2344 	pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
2345 
2346 	for (t = 1; t < nr_threads; t++) {
2347 		enum thread_msg msg = THREAD_MSG__UNDEFINED;
2348 
2349 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
2350 		pthread_attr_setaffinity_np(&attrs,
2351 					    MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)),
2352 					    (cpu_set_t *)(thread_data[t].mask->affinity.bits));
2353 #endif
2354 		if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) {
2355 			for (tt = 1; tt < t; tt++)
2356 				record__terminate_thread(&thread_data[t]);
2357 			pr_err("Failed to start threads: %m\n");
2358 			ret = -1;
2359 			goto out_err;
2360 		}
2361 
2362 		err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg));
2363 		if (err > 0)
2364 			pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid,
2365 				  thread_msg_tags[msg]);
2366 		else
2367 			pr_warning("threads[%d]: failed to receive start notification from %d\n",
2368 				   thread->tid, rec->thread_data[t].tid);
2369 	}
2370 
2371 	sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
2372 			(cpu_set_t *)thread->mask->affinity.bits);
2373 
2374 	pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
2375 
2376 out_err:
2377 	pthread_attr_destroy(&attrs);
2378 
2379 	if (sigprocmask(SIG_SETMASK, &mask, NULL)) {
2380 		pr_err("Failed to unblock signals on threads start: %m\n");
2381 		ret = -1;
2382 	}
2383 
2384 	return ret;
2385 }
2386 
record__stop_threads(struct record * rec)2387 static int record__stop_threads(struct record *rec)
2388 {
2389 	int t;
2390 	struct record_thread *thread_data = rec->thread_data;
2391 
2392 	for (t = 1; t < rec->nr_threads; t++)
2393 		record__terminate_thread(&thread_data[t]);
2394 
2395 	for (t = 0; t < rec->nr_threads; t++) {
2396 		rec->samples += thread_data[t].samples;
2397 		if (!record__threads_enabled(rec))
2398 			continue;
2399 		rec->session->bytes_transferred += thread_data[t].bytes_transferred;
2400 		rec->session->bytes_compressed += thread_data[t].bytes_compressed;
2401 		pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid,
2402 			 thread_data[t].samples, thread_data[t].waking);
2403 		if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed)
2404 			pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n",
2405 				 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed);
2406 		else
2407 			pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written);
2408 	}
2409 
2410 	return 0;
2411 }
2412 
record__waking(struct record * rec)2413 static unsigned long record__waking(struct record *rec)
2414 {
2415 	int t;
2416 	unsigned long waking = 0;
2417 	struct record_thread *thread_data = rec->thread_data;
2418 
2419 	for (t = 0; t < rec->nr_threads; t++)
2420 		waking += thread_data[t].waking;
2421 
2422 	return waking;
2423 }
2424 
__cmd_record(struct record * rec,int argc,const char ** argv)2425 static int __cmd_record(struct record *rec, int argc, const char **argv)
2426 {
2427 	int err;
2428 	int status = 0;
2429 	const bool forks = argc > 0;
2430 	struct perf_tool *tool = &rec->tool;
2431 	struct record_opts *opts = &rec->opts;
2432 	struct perf_data *data = &rec->data;
2433 	struct perf_session *session;
2434 	bool disabled = false, draining = false;
2435 	int fd;
2436 	float ratio = 0;
2437 	enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
2438 	struct perf_env *env;
2439 
2440 	atexit(record__sig_exit);
2441 	signal(SIGCHLD, sig_handler);
2442 	signal(SIGINT, sig_handler);
2443 	signal(SIGTERM, sig_handler);
2444 	signal(SIGSEGV, sigsegv_handler);
2445 
2446 	if (rec->opts.record_cgroup) {
2447 #ifndef HAVE_FILE_HANDLE
2448 		pr_err("cgroup tracking is not supported\n");
2449 		return -1;
2450 #endif
2451 	}
2452 
2453 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
2454 		signal(SIGUSR2, snapshot_sig_handler);
2455 		if (rec->opts.auxtrace_snapshot_mode)
2456 			trigger_on(&auxtrace_snapshot_trigger);
2457 		if (rec->switch_output.enabled)
2458 			trigger_on(&switch_output_trigger);
2459 	} else {
2460 		signal(SIGUSR2, SIG_IGN);
2461 	}
2462 
2463 	perf_tool__init(tool, /*ordered_events=*/true);
2464 	tool->sample		= process_sample_event;
2465 	tool->fork		= perf_event__process_fork;
2466 	tool->exit		= perf_event__process_exit;
2467 	tool->comm		= perf_event__process_comm;
2468 	tool->namespaces	= perf_event__process_namespaces;
2469 	tool->mmap		= build_id__process_mmap;
2470 	tool->mmap2		= build_id__process_mmap2;
2471 	tool->itrace_start	= process_timestamp_boundary;
2472 	tool->aux		= process_timestamp_boundary;
2473 	tool->namespace_events	= rec->opts.record_namespaces;
2474 	tool->cgroup_events	= rec->opts.record_cgroup;
2475 	session = perf_session__new(data, tool);
2476 	if (IS_ERR(session)) {
2477 		pr_err("Perf session creation failed.\n");
2478 		return PTR_ERR(session);
2479 	}
2480 	env = perf_session__env(session);
2481 	if (record__threads_enabled(rec)) {
2482 		if (perf_data__is_pipe(&rec->data)) {
2483 			pr_err("Parallel trace streaming is not available in pipe mode.\n");
2484 			return -1;
2485 		}
2486 		if (rec->opts.full_auxtrace) {
2487 			pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n");
2488 			return -1;
2489 		}
2490 	}
2491 
2492 	fd = perf_data__fd(data);
2493 	rec->session = session;
2494 
2495 	if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
2496 		pr_err("Compression initialization failed.\n");
2497 		return -1;
2498 	}
2499 #ifdef HAVE_EVENTFD_SUPPORT
2500 	done_fd = eventfd(0, EFD_NONBLOCK);
2501 	if (done_fd < 0) {
2502 		pr_err("Failed to create wakeup eventfd, error: %m\n");
2503 		status = -1;
2504 		goto out_delete_session;
2505 	}
2506 	err = evlist__add_wakeup_eventfd(rec->evlist, done_fd);
2507 	if (err < 0) {
2508 		pr_err("Failed to add wakeup eventfd to poll list\n");
2509 		status = err;
2510 		goto out_delete_session;
2511 	}
2512 #endif // HAVE_EVENTFD_SUPPORT
2513 
2514 	env->comp_type  = PERF_COMP_ZSTD;
2515 	env->comp_level = rec->opts.comp_level;
2516 
2517 	if (rec->opts.kcore &&
2518 	    !record__kcore_readable(&session->machines.host)) {
2519 		pr_err("ERROR: kcore is not readable.\n");
2520 		return -1;
2521 	}
2522 
2523 	if (record__init_clock(rec))
2524 		return -1;
2525 
2526 	record__init_features(rec);
2527 
2528 	if (forks) {
2529 		err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe,
2530 					       workload_exec_failed_signal);
2531 		if (err < 0) {
2532 			pr_err("Couldn't run the workload!\n");
2533 			status = err;
2534 			goto out_delete_session;
2535 		}
2536 	}
2537 
2538 	/*
2539 	 * If we have just single event and are sending data
2540 	 * through pipe, we need to force the ids allocation,
2541 	 * because we synthesize event name through the pipe
2542 	 * and need the id for that.
2543 	 */
2544 	if (data->is_pipe && rec->evlist->core.nr_entries == 1)
2545 		rec->opts.sample_id = true;
2546 
2547 	if (rec->timestamp_filename && perf_data__is_pipe(data)) {
2548 		rec->timestamp_filename = false;
2549 		pr_warning("WARNING: --timestamp-filename option is not available in pipe mode.\n");
2550 	}
2551 
2552 	/*
2553 	 * Use global stat_config that is zero meaning aggr_mode is AGGR_NONE
2554 	 * and hybrid_merge is false.
2555 	 */
2556 	evlist__uniquify_evsel_names(rec->evlist, &stat_config);
2557 
2558 	evlist__config(rec->evlist, opts, &callchain_param);
2559 
2560 	/* Debug message used by test scripts */
2561 	pr_debug3("perf record opening and mmapping events\n");
2562 	if (record__open(rec) != 0) {
2563 		err = -1;
2564 		goto out_free_threads;
2565 	}
2566 	/* Debug message used by test scripts */
2567 	pr_debug3("perf record done opening and mmapping events\n");
2568 	env->comp_mmap_len = session->evlist->core.mmap_len;
2569 
2570 	if (rec->opts.kcore) {
2571 		err = record__kcore_copy(&session->machines.host, data);
2572 		if (err) {
2573 			pr_err("ERROR: Failed to copy kcore\n");
2574 			goto out_free_threads;
2575 		}
2576 	}
2577 
2578 	/*
2579 	 * Normally perf_session__new would do this, but it doesn't have the
2580 	 * evlist.
2581 	 */
2582 	if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) {
2583 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
2584 		rec->tool.ordered_events = false;
2585 	}
2586 
2587 	if (evlist__nr_groups(rec->evlist) == 0)
2588 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
2589 
2590 	if (data->is_pipe) {
2591 		err = perf_header__write_pipe(fd);
2592 		if (err < 0)
2593 			goto out_free_threads;
2594 	} else {
2595 		err = perf_session__write_header(session, rec->evlist, fd, false);
2596 		if (err < 0)
2597 			goto out_free_threads;
2598 	}
2599 
2600 	err = -1;
2601 	if (!rec->no_buildid
2602 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
2603 		pr_err("Couldn't generate buildids. "
2604 		       "Use --no-buildid to profile anyway.\n");
2605 		goto out_free_threads;
2606 	}
2607 
2608 	if (!evlist__needs_bpf_sb_event(rec->evlist))
2609 		opts->no_bpf_event = true;
2610 
2611 	err = record__setup_sb_evlist(rec);
2612 	if (err)
2613 		goto out_free_threads;
2614 
2615 	err = record__synthesize(rec, false);
2616 	if (err < 0)
2617 		goto out_free_threads;
2618 
2619 	if (rec->realtime_prio) {
2620 		struct sched_param param;
2621 
2622 		param.sched_priority = rec->realtime_prio;
2623 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
2624 			pr_err("Could not set realtime priority.\n");
2625 			err = -1;
2626 			goto out_free_threads;
2627 		}
2628 	}
2629 
2630 	if (record__start_threads(rec))
2631 		goto out_free_threads;
2632 
2633 	/*
2634 	 * When perf is starting the traced process, all the events
2635 	 * (apart from group members) have enable_on_exec=1 set,
2636 	 * so don't spoil it by prematurely enabling them.
2637 	 */
2638 	if (!target__none(&opts->target) && !opts->target.initial_delay)
2639 		evlist__enable(rec->evlist);
2640 
2641 	/*
2642 	 * offcpu-time does not call execve, so enable_on_exe wouldn't work
2643 	 * when recording a workload, do it manually
2644 	 */
2645 	if (rec->off_cpu)
2646 		evlist__enable_evsel(rec->evlist, (char *)OFFCPU_EVENT);
2647 
2648 	/*
2649 	 * Let the child rip
2650 	 */
2651 	if (forks) {
2652 		struct machine *machine = &session->machines.host;
2653 		union perf_event *event;
2654 		pid_t tgid;
2655 
2656 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
2657 		if (event == NULL) {
2658 			err = -ENOMEM;
2659 			goto out_child;
2660 		}
2661 
2662 		/*
2663 		 * Some H/W events are generated before COMM event
2664 		 * which is emitted during exec(), so perf script
2665 		 * cannot see a correct process name for those events.
2666 		 * Synthesize COMM event to prevent it.
2667 		 */
2668 		tgid = perf_event__synthesize_comm(tool, event,
2669 						   rec->evlist->workload.pid,
2670 						   process_synthesized_event,
2671 						   machine);
2672 		free(event);
2673 
2674 		if (tgid == -1)
2675 			goto out_child;
2676 
2677 		event = malloc(sizeof(event->namespaces) +
2678 			       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
2679 			       machine->id_hdr_size);
2680 		if (event == NULL) {
2681 			err = -ENOMEM;
2682 			goto out_child;
2683 		}
2684 
2685 		/*
2686 		 * Synthesize NAMESPACES event for the command specified.
2687 		 */
2688 		perf_event__synthesize_namespaces(tool, event,
2689 						  rec->evlist->workload.pid,
2690 						  tgid, process_synthesized_event,
2691 						  machine);
2692 		free(event);
2693 
2694 		evlist__start_workload(rec->evlist);
2695 	}
2696 
2697 	if (opts->target.initial_delay) {
2698 		pr_info(EVLIST_DISABLED_MSG);
2699 		if (opts->target.initial_delay > 0) {
2700 			usleep(opts->target.initial_delay * USEC_PER_MSEC);
2701 			evlist__enable(rec->evlist);
2702 			pr_info(EVLIST_ENABLED_MSG);
2703 		}
2704 	}
2705 
2706 	err = event_enable_timer__start(rec->evlist->eet);
2707 	if (err)
2708 		goto out_child;
2709 
2710 	/* Debug message used by test scripts */
2711 	pr_debug3("perf record has started\n");
2712 	fflush(stderr);
2713 
2714 	trigger_ready(&auxtrace_snapshot_trigger);
2715 	trigger_ready(&switch_output_trigger);
2716 	perf_hooks__invoke_record_start();
2717 
2718 	/*
2719 	 * Must write FINISHED_INIT so it will be seen after all other
2720 	 * synthesized user events, but before any regular events.
2721 	 */
2722 	err = write_finished_init(rec, false);
2723 	if (err < 0)
2724 		goto out_child;
2725 
2726 	for (;;) {
2727 		unsigned long long hits = thread->samples;
2728 
2729 		/*
2730 		 * rec->evlist->bkw_mmap_state is possible to be
2731 		 * BKW_MMAP_EMPTY here: when done == true and
2732 		 * hits != rec->samples in previous round.
2733 		 *
2734 		 * evlist__toggle_bkw_mmap ensure we never
2735 		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
2736 		 */
2737 		if (trigger_is_hit(&switch_output_trigger) || done || draining)
2738 			evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
2739 
2740 		if (record__mmap_read_all(rec, false) < 0) {
2741 			trigger_error(&auxtrace_snapshot_trigger);
2742 			trigger_error(&switch_output_trigger);
2743 			err = -1;
2744 			goto out_child;
2745 		}
2746 
2747 		if (auxtrace_record__snapshot_started) {
2748 			auxtrace_record__snapshot_started = 0;
2749 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
2750 				record__read_auxtrace_snapshot(rec, false);
2751 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
2752 				pr_err("AUX area tracing snapshot failed\n");
2753 				err = -1;
2754 				goto out_child;
2755 			}
2756 		}
2757 
2758 		if (trigger_is_hit(&switch_output_trigger)) {
2759 			/*
2760 			 * If switch_output_trigger is hit, the data in
2761 			 * overwritable ring buffer should have been collected,
2762 			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
2763 			 *
2764 			 * If SIGUSR2 raise after or during record__mmap_read_all(),
2765 			 * record__mmap_read_all() didn't collect data from
2766 			 * overwritable ring buffer. Read again.
2767 			 */
2768 			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
2769 				continue;
2770 			trigger_ready(&switch_output_trigger);
2771 
2772 			/*
2773 			 * Reenable events in overwrite ring buffer after
2774 			 * record__mmap_read_all(): we should have collected
2775 			 * data from it.
2776 			 */
2777 			evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
2778 
2779 			if (!quiet)
2780 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
2781 					record__waking(rec));
2782 			thread->waking = 0;
2783 			fd = record__switch_output(rec, false);
2784 			if (fd < 0) {
2785 				pr_err("Failed to switch to new file\n");
2786 				trigger_error(&switch_output_trigger);
2787 				err = fd;
2788 				goto out_child;
2789 			}
2790 
2791 			/* re-arm the alarm */
2792 			if (rec->switch_output.time)
2793 				alarm(rec->switch_output.time);
2794 		}
2795 
2796 		if (hits == thread->samples) {
2797 			if (done || draining)
2798 				break;
2799 			err = fdarray__poll(&thread->pollfd, -1);
2800 			/*
2801 			 * Propagate error, only if there's any. Ignore positive
2802 			 * number of returned events and interrupt error.
2803 			 */
2804 			if (err > 0 || (err < 0 && errno == EINTR))
2805 				err = 0;
2806 			thread->waking++;
2807 
2808 			if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP,
2809 					    record__thread_munmap_filtered, NULL) == 0)
2810 				draining = true;
2811 
2812 			err = record__update_evlist_pollfd_from_thread(rec, rec->evlist, thread);
2813 			if (err)
2814 				goto out_child;
2815 		}
2816 
2817 		if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) {
2818 			switch (cmd) {
2819 			case EVLIST_CTL_CMD_SNAPSHOT:
2820 				hit_auxtrace_snapshot_trigger(rec);
2821 				evlist__ctlfd_ack(rec->evlist);
2822 				break;
2823 			case EVLIST_CTL_CMD_STOP:
2824 				done = 1;
2825 				break;
2826 			case EVLIST_CTL_CMD_ACK:
2827 			case EVLIST_CTL_CMD_UNSUPPORTED:
2828 			case EVLIST_CTL_CMD_ENABLE:
2829 			case EVLIST_CTL_CMD_DISABLE:
2830 			case EVLIST_CTL_CMD_EVLIST:
2831 			case EVLIST_CTL_CMD_PING:
2832 			default:
2833 				break;
2834 			}
2835 		}
2836 
2837 		err = event_enable_timer__process(rec->evlist->eet);
2838 		if (err < 0)
2839 			goto out_child;
2840 		if (err) {
2841 			err = 0;
2842 			done = 1;
2843 		}
2844 
2845 		/*
2846 		 * When perf is starting the traced process, at the end events
2847 		 * die with the process and we wait for that. Thus no need to
2848 		 * disable events in this case.
2849 		 */
2850 		if (done && !disabled && !target__none(&opts->target)) {
2851 			trigger_off(&auxtrace_snapshot_trigger);
2852 			evlist__disable(rec->evlist);
2853 			disabled = true;
2854 		}
2855 	}
2856 
2857 	trigger_off(&auxtrace_snapshot_trigger);
2858 	trigger_off(&switch_output_trigger);
2859 
2860 	record__synthesize_final_bpf_metadata(rec);
2861 
2862 	if (opts->auxtrace_snapshot_on_exit)
2863 		record__auxtrace_snapshot_exit(rec);
2864 
2865 	if (forks && workload_exec_errno) {
2866 		char msg[STRERR_BUFSIZE];
2867 		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
2868 		struct strbuf sb = STRBUF_INIT;
2869 
2870 		evlist__format_evsels(rec->evlist, &sb, 2048);
2871 
2872 		pr_err("Failed to collect '%s' for the '%s' workload: %s\n",
2873 			sb.buf, argv[0], emsg);
2874 		strbuf_release(&sb);
2875 		err = -1;
2876 		goto out_child;
2877 	}
2878 
2879 	if (!quiet)
2880 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n",
2881 			record__waking(rec));
2882 
2883 	write_finished_init(rec, true);
2884 
2885 	if (target__none(&rec->opts.target))
2886 		record__synthesize_workload(rec, true);
2887 
2888 out_child:
2889 	record__stop_threads(rec);
2890 	record__mmap_read_all(rec, true);
2891 out_free_threads:
2892 	record__free_thread_data(rec);
2893 	evlist__finalize_ctlfd(rec->evlist);
2894 	record__aio_mmap_read_sync(rec);
2895 
2896 	if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
2897 		ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
2898 		env->comp_ratio = ratio + 0.5;
2899 	}
2900 
2901 	if (forks) {
2902 		int exit_status;
2903 
2904 		if (!child_finished)
2905 			kill(rec->evlist->workload.pid, SIGTERM);
2906 
2907 		wait(&exit_status);
2908 
2909 		if (err < 0)
2910 			status = err;
2911 		else if (WIFEXITED(exit_status))
2912 			status = WEXITSTATUS(exit_status);
2913 		else if (WIFSIGNALED(exit_status))
2914 			signr = WTERMSIG(exit_status);
2915 	} else
2916 		status = err;
2917 
2918 	if (rec->off_cpu)
2919 		rec->bytes_written += off_cpu_write(rec->session);
2920 
2921 	record__read_lost_samples(rec);
2922 	/* this will be recalculated during process_buildids() */
2923 	rec->samples = 0;
2924 
2925 	if (!err) {
2926 		record__synthesize(rec, true);
2927 		if (!rec->timestamp_filename) {
2928 			record__finish_output(rec);
2929 		} else {
2930 			fd = record__switch_output(rec, true);
2931 			if (fd < 0) {
2932 				status = fd;
2933 				goto out_delete_session;
2934 			}
2935 		}
2936 	}
2937 
2938 	perf_hooks__invoke_record_end();
2939 
2940 	if (!err && !quiet) {
2941 		char samples[128];
2942 		const char *postfix = rec->timestamp_filename ?
2943 					".<timestamp>" : "";
2944 
2945 		if (rec->samples && !rec->opts.full_auxtrace)
2946 			scnprintf(samples, sizeof(samples),
2947 				  " (%" PRIu64 " samples)", rec->samples);
2948 		else
2949 			samples[0] = '\0';
2950 
2951 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s",
2952 			perf_data__size(data) / 1024.0 / 1024.0,
2953 			data->path, postfix, samples);
2954 		if (ratio) {
2955 			fprintf(stderr,	", compressed (original %.3f MB, ratio is %.3f)",
2956 					rec->session->bytes_transferred / 1024.0 / 1024.0,
2957 					ratio);
2958 		}
2959 		fprintf(stderr, " ]\n");
2960 	}
2961 
2962 out_delete_session:
2963 #ifdef HAVE_EVENTFD_SUPPORT
2964 	if (done_fd >= 0) {
2965 		fd = done_fd;
2966 		done_fd = -1;
2967 
2968 		close(fd);
2969 	}
2970 #endif
2971 	zstd_fini(&session->zstd_data);
2972 	if (!opts->no_bpf_event)
2973 		evlist__stop_sb_thread(rec->sb_evlist);
2974 
2975 	perf_session__delete(session);
2976 	return status;
2977 }
2978 
callchain_debug(struct callchain_param * callchain)2979 static void callchain_debug(struct callchain_param *callchain)
2980 {
2981 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
2982 
2983 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
2984 
2985 	if (callchain->record_mode == CALLCHAIN_DWARF)
2986 		pr_debug("callchain: stack dump size %d\n",
2987 			 callchain->dump_size);
2988 }
2989 
record_opts__parse_callchain(struct record_opts * record,struct callchain_param * callchain,const char * arg,bool unset)2990 int record_opts__parse_callchain(struct record_opts *record,
2991 				 struct callchain_param *callchain,
2992 				 const char *arg, bool unset)
2993 {
2994 	int ret;
2995 	callchain->enabled = !unset;
2996 
2997 	/* --no-call-graph */
2998 	if (unset) {
2999 		callchain->record_mode = CALLCHAIN_NONE;
3000 		pr_debug("callchain: disabled\n");
3001 		return 0;
3002 	}
3003 
3004 	ret = parse_callchain_record_opt(arg, callchain);
3005 	if (!ret) {
3006 		/* Enable data address sampling for DWARF unwind. */
3007 		if (callchain->record_mode == CALLCHAIN_DWARF &&
3008 		    !record->record_data_mmap_set)
3009 			record->record_data_mmap = true;
3010 		callchain_debug(callchain);
3011 	}
3012 
3013 	return ret;
3014 }
3015 
record_parse_callchain_opt(const struct option * opt,const char * arg,int unset)3016 int record_parse_callchain_opt(const struct option *opt,
3017 			       const char *arg,
3018 			       int unset)
3019 {
3020 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
3021 }
3022 
record_callchain_opt(const struct option * opt,const char * arg __maybe_unused,int unset __maybe_unused)3023 int record_callchain_opt(const struct option *opt,
3024 			 const char *arg __maybe_unused,
3025 			 int unset __maybe_unused)
3026 {
3027 	struct callchain_param *callchain = opt->value;
3028 
3029 	callchain->enabled = true;
3030 
3031 	if (callchain->record_mode == CALLCHAIN_NONE)
3032 		callchain->record_mode = CALLCHAIN_FP;
3033 
3034 	callchain_debug(callchain);
3035 	return 0;
3036 }
3037 
perf_record_config(const char * var,const char * value,void * cb)3038 static int perf_record_config(const char *var, const char *value, void *cb)
3039 {
3040 	struct record *rec = cb;
3041 
3042 	if (!strcmp(var, "record.build-id")) {
3043 		if (!strcmp(value, "cache"))
3044 			rec->no_buildid_cache = false;
3045 		else if (!strcmp(value, "no-cache"))
3046 			rec->no_buildid_cache = true;
3047 		else if (!strcmp(value, "skip"))
3048 			rec->no_buildid = rec->no_buildid_cache = true;
3049 		else if (!strcmp(value, "mmap"))
3050 			rec->buildid_mmap = true;
3051 		else if (!strcmp(value, "no-mmap"))
3052 			rec->buildid_mmap = false;
3053 		else
3054 			return -1;
3055 		return 0;
3056 	}
3057 	if (!strcmp(var, "record.call-graph")) {
3058 		var = "call-graph.record-mode";
3059 		return perf_default_config(var, value, cb);
3060 	}
3061 #ifdef HAVE_AIO_SUPPORT
3062 	if (!strcmp(var, "record.aio")) {
3063 		rec->opts.nr_cblocks = strtol(value, NULL, 0);
3064 		if (!rec->opts.nr_cblocks)
3065 			rec->opts.nr_cblocks = nr_cblocks_default;
3066 	}
3067 #endif
3068 	if (!strcmp(var, "record.debuginfod")) {
3069 		rec->debuginfod.urls = strdup(value);
3070 		if (!rec->debuginfod.urls)
3071 			return -ENOMEM;
3072 		rec->debuginfod.set = true;
3073 	}
3074 
3075 	return 0;
3076 }
3077 
record__parse_event_enable_time(const struct option * opt,const char * str,int unset)3078 static int record__parse_event_enable_time(const struct option *opt, const char *str, int unset)
3079 {
3080 	struct record *rec = (struct record *)opt->value;
3081 
3082 	return evlist__parse_event_enable_time(rec->evlist, &rec->opts, str, unset);
3083 }
3084 
record__parse_affinity(const struct option * opt,const char * str,int unset)3085 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
3086 {
3087 	struct record_opts *opts = (struct record_opts *)opt->value;
3088 
3089 	if (unset || !str)
3090 		return 0;
3091 
3092 	if (!strcasecmp(str, "node"))
3093 		opts->affinity = PERF_AFFINITY_NODE;
3094 	else if (!strcasecmp(str, "cpu"))
3095 		opts->affinity = PERF_AFFINITY_CPU;
3096 
3097 	return 0;
3098 }
3099 
record__mmap_cpu_mask_alloc(struct mmap_cpu_mask * mask,int nr_bits)3100 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits)
3101 {
3102 	mask->nbits = nr_bits;
3103 	mask->bits = bitmap_zalloc(mask->nbits);
3104 	if (!mask->bits)
3105 		return -ENOMEM;
3106 
3107 	return 0;
3108 }
3109 
record__mmap_cpu_mask_free(struct mmap_cpu_mask * mask)3110 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask)
3111 {
3112 	bitmap_free(mask->bits);
3113 	mask->nbits = 0;
3114 }
3115 
record__thread_mask_alloc(struct thread_mask * mask,int nr_bits)3116 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits)
3117 {
3118 	int ret;
3119 
3120 	ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits);
3121 	if (ret) {
3122 		mask->affinity.bits = NULL;
3123 		return ret;
3124 	}
3125 
3126 	ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits);
3127 	if (ret) {
3128 		record__mmap_cpu_mask_free(&mask->maps);
3129 		mask->maps.bits = NULL;
3130 	}
3131 
3132 	return ret;
3133 }
3134 
record__thread_mask_free(struct thread_mask * mask)3135 static void record__thread_mask_free(struct thread_mask *mask)
3136 {
3137 	record__mmap_cpu_mask_free(&mask->maps);
3138 	record__mmap_cpu_mask_free(&mask->affinity);
3139 }
3140 
record__parse_threads(const struct option * opt,const char * str,int unset)3141 static int record__parse_threads(const struct option *opt, const char *str, int unset)
3142 {
3143 	int s;
3144 	struct record_opts *opts = opt->value;
3145 
3146 	if (unset || !str || !strlen(str)) {
3147 		opts->threads_spec = THREAD_SPEC__CPU;
3148 	} else {
3149 		for (s = 1; s < THREAD_SPEC__MAX; s++) {
3150 			if (s == THREAD_SPEC__USER) {
3151 				opts->threads_user_spec = strdup(str);
3152 				if (!opts->threads_user_spec)
3153 					return -ENOMEM;
3154 				opts->threads_spec = THREAD_SPEC__USER;
3155 				break;
3156 			}
3157 			if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) {
3158 				opts->threads_spec = s;
3159 				break;
3160 			}
3161 		}
3162 	}
3163 
3164 	if (opts->threads_spec == THREAD_SPEC__USER)
3165 		pr_debug("threads_spec: %s\n", opts->threads_user_spec);
3166 	else
3167 		pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]);
3168 
3169 	return 0;
3170 }
3171 
parse_output_max_size(const struct option * opt,const char * str,int unset)3172 static int parse_output_max_size(const struct option *opt,
3173 				 const char *str, int unset)
3174 {
3175 	unsigned long *s = (unsigned long *)opt->value;
3176 	static struct parse_tag tags_size[] = {
3177 		{ .tag  = 'B', .mult = 1       },
3178 		{ .tag  = 'K', .mult = 1 << 10 },
3179 		{ .tag  = 'M', .mult = 1 << 20 },
3180 		{ .tag  = 'G', .mult = 1 << 30 },
3181 		{ .tag  = 0 },
3182 	};
3183 	unsigned long val;
3184 
3185 	if (unset) {
3186 		*s = 0;
3187 		return 0;
3188 	}
3189 
3190 	val = parse_tag_value(str, tags_size);
3191 	if (val != (unsigned long) -1) {
3192 		*s = val;
3193 		return 0;
3194 	}
3195 
3196 	return -1;
3197 }
3198 
record__parse_mmap_pages(const struct option * opt,const char * str,int unset __maybe_unused)3199 static int record__parse_mmap_pages(const struct option *opt,
3200 				    const char *str,
3201 				    int unset __maybe_unused)
3202 {
3203 	struct record_opts *opts = opt->value;
3204 	char *s, *p;
3205 	unsigned int mmap_pages;
3206 	int ret;
3207 
3208 	if (!str)
3209 		return -EINVAL;
3210 
3211 	s = strdup(str);
3212 	if (!s)
3213 		return -ENOMEM;
3214 
3215 	p = strchr(s, ',');
3216 	if (p)
3217 		*p = '\0';
3218 
3219 	if (*s) {
3220 		ret = __evlist__parse_mmap_pages(&mmap_pages, s);
3221 		if (ret)
3222 			goto out_free;
3223 		opts->mmap_pages = mmap_pages;
3224 	}
3225 
3226 	if (!p) {
3227 		ret = 0;
3228 		goto out_free;
3229 	}
3230 
3231 	ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1);
3232 	if (ret)
3233 		goto out_free;
3234 
3235 	opts->auxtrace_mmap_pages = mmap_pages;
3236 
3237 out_free:
3238 	free(s);
3239 	return ret;
3240 }
3241 
record__parse_off_cpu_thresh(const struct option * opt,const char * str,int unset __maybe_unused)3242 static int record__parse_off_cpu_thresh(const struct option *opt,
3243 					const char *str,
3244 					int unset __maybe_unused)
3245 {
3246 	struct record_opts *opts = opt->value;
3247 	char *endptr;
3248 	u64 off_cpu_thresh_ms;
3249 
3250 	if (!str)
3251 		return -EINVAL;
3252 
3253 	off_cpu_thresh_ms = strtoull(str, &endptr, 10);
3254 
3255 	/* the threshold isn't string "0", yet strtoull() returns 0, parsing failed */
3256 	if (*endptr || (off_cpu_thresh_ms == 0 && strcmp(str, "0")))
3257 		return -EINVAL;
3258 	else
3259 		opts->off_cpu_thresh_ns = off_cpu_thresh_ms * NSEC_PER_MSEC;
3260 
3261 	return 0;
3262 }
3263 
arch__add_leaf_frame_record_opts(struct record_opts * opts __maybe_unused)3264 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
3265 {
3266 }
3267 
parse_control_option(const struct option * opt,const char * str,int unset __maybe_unused)3268 static int parse_control_option(const struct option *opt,
3269 				const char *str,
3270 				int unset __maybe_unused)
3271 {
3272 	struct record_opts *opts = opt->value;
3273 
3274 	return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close);
3275 }
3276 
switch_output_size_warn(struct record * rec)3277 static void switch_output_size_warn(struct record *rec)
3278 {
3279 	u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
3280 	struct switch_output *s = &rec->switch_output;
3281 
3282 	wakeup_size /= 2;
3283 
3284 	if (s->size < wakeup_size) {
3285 		char buf[100];
3286 
3287 		unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
3288 		pr_warning("WARNING: switch-output data size lower than "
3289 			   "wakeup kernel buffer size (%s) "
3290 			   "expect bigger perf.data sizes\n", buf);
3291 	}
3292 }
3293 
switch_output_setup(struct record * rec)3294 static int switch_output_setup(struct record *rec)
3295 {
3296 	struct switch_output *s = &rec->switch_output;
3297 	static struct parse_tag tags_size[] = {
3298 		{ .tag  = 'B', .mult = 1       },
3299 		{ .tag  = 'K', .mult = 1 << 10 },
3300 		{ .tag  = 'M', .mult = 1 << 20 },
3301 		{ .tag  = 'G', .mult = 1 << 30 },
3302 		{ .tag  = 0 },
3303 	};
3304 	static struct parse_tag tags_time[] = {
3305 		{ .tag  = 's', .mult = 1        },
3306 		{ .tag  = 'm', .mult = 60       },
3307 		{ .tag  = 'h', .mult = 60*60    },
3308 		{ .tag  = 'd', .mult = 60*60*24 },
3309 		{ .tag  = 0 },
3310 	};
3311 	unsigned long val;
3312 
3313 	/*
3314 	 * If we're using --switch-output-events, then we imply its
3315 	 * --switch-output=signal, as we'll send a SIGUSR2 from the side band
3316 	 *  thread to its parent.
3317 	 */
3318 	if (rec->switch_output_event_set) {
3319 		if (record__threads_enabled(rec)) {
3320 			pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n");
3321 			return 0;
3322 		}
3323 		goto do_signal;
3324 	}
3325 
3326 	if (!s->set)
3327 		return 0;
3328 
3329 	if (record__threads_enabled(rec)) {
3330 		pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n");
3331 		return 0;
3332 	}
3333 
3334 	if (!strcmp(s->str, "signal")) {
3335 do_signal:
3336 		s->signal = true;
3337 		pr_debug("switch-output with SIGUSR2 signal\n");
3338 		goto enabled;
3339 	}
3340 
3341 	val = parse_tag_value(s->str, tags_size);
3342 	if (val != (unsigned long) -1) {
3343 		s->size = val;
3344 		pr_debug("switch-output with %s size threshold\n", s->str);
3345 		goto enabled;
3346 	}
3347 
3348 	val = parse_tag_value(s->str, tags_time);
3349 	if (val != (unsigned long) -1) {
3350 		s->time = val;
3351 		pr_debug("switch-output with %s time threshold (%lu seconds)\n",
3352 			 s->str, s->time);
3353 		goto enabled;
3354 	}
3355 
3356 	return -1;
3357 
3358 enabled:
3359 	rec->timestamp_filename = true;
3360 	s->enabled              = true;
3361 
3362 	if (s->size && !rec->opts.no_buffering)
3363 		switch_output_size_warn(rec);
3364 
3365 	return 0;
3366 }
3367 
3368 static const char * const __record_usage[] = {
3369 	"perf record [<options>] [<command>]",
3370 	"perf record [<options>] -- <command> [<options>]",
3371 	NULL
3372 };
3373 const char * const *record_usage = __record_usage;
3374 
build_id__process_mmap(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)3375 static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event,
3376 				  struct perf_sample *sample, struct machine *machine)
3377 {
3378 	/*
3379 	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3380 	 * no need to add them twice.
3381 	 */
3382 	if (!(event->header.misc & PERF_RECORD_MISC_USER))
3383 		return 0;
3384 	return perf_event__process_mmap(tool, event, sample, machine);
3385 }
3386 
build_id__process_mmap2(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)3387 static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event,
3388 				   struct perf_sample *sample, struct machine *machine)
3389 {
3390 	/*
3391 	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3392 	 * no need to add them twice.
3393 	 */
3394 	if (!(event->header.misc & PERF_RECORD_MISC_USER))
3395 		return 0;
3396 
3397 	return perf_event__process_mmap2(tool, event, sample, machine);
3398 }
3399 
process_timestamp_boundary(const struct perf_tool * tool,union perf_event * event __maybe_unused,struct perf_sample * sample,struct machine * machine __maybe_unused)3400 static int process_timestamp_boundary(const struct perf_tool *tool,
3401 				      union perf_event *event __maybe_unused,
3402 				      struct perf_sample *sample,
3403 				      struct machine *machine __maybe_unused)
3404 {
3405 	struct record *rec = container_of(tool, struct record, tool);
3406 
3407 	set_timestamp_boundary(rec, sample->time);
3408 	return 0;
3409 }
3410 
parse_record_synth_option(const struct option * opt,const char * str,int unset __maybe_unused)3411 static int parse_record_synth_option(const struct option *opt,
3412 				     const char *str,
3413 				     int unset __maybe_unused)
3414 {
3415 	struct record_opts *opts = opt->value;
3416 	char *p = strdup(str);
3417 
3418 	if (p == NULL)
3419 		return -1;
3420 
3421 	opts->synth = parse_synth_opt(p);
3422 	free(p);
3423 
3424 	if (opts->synth < 0) {
3425 		pr_err("Invalid synth option: %s\n", str);
3426 		return -1;
3427 	}
3428 	return 0;
3429 }
3430 
3431 /*
3432  * XXX Ideally would be local to cmd_record() and passed to a record__new
3433  * because we need to have access to it in record__exit, that is called
3434  * after cmd_record() exits, but since record_options need to be accessible to
3435  * builtin-script, leave it here.
3436  *
3437  * At least we don't ouch it in all the other functions here directly.
3438  *
3439  * Just say no to tons of global variables, sigh.
3440  */
3441 static struct record record = {
3442 	.opts = {
3443 		.sample_time	     = true,
3444 		.mmap_pages	     = UINT_MAX,
3445 		.user_freq	     = UINT_MAX,
3446 		.user_interval	     = ULLONG_MAX,
3447 		.freq		     = 4000,
3448 		.target		     = {
3449 			.uses_mmap   = true,
3450 			.default_per_cpu = true,
3451 		},
3452 		.mmap_flush          = MMAP_FLUSH_DEFAULT,
3453 		.nr_threads_synthesize = 1,
3454 		.ctl_fd              = -1,
3455 		.ctl_fd_ack          = -1,
3456 		.synth               = PERF_SYNTH_ALL,
3457 		.off_cpu_thresh_ns   = OFFCPU_THRESH,
3458 	},
3459 	.buildid_mmap = true,
3460 };
3461 
3462 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
3463 	"\n\t\t\t\tDefault: fp";
3464 
3465 static bool dry_run;
3466 
3467 static struct parse_events_option_args parse_events_option_args = {
3468 	.evlistp = &record.evlist,
3469 };
3470 
3471 static struct parse_events_option_args switch_output_parse_events_option_args = {
3472 	.evlistp = &record.sb_evlist,
3473 };
3474 
3475 /*
3476  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
3477  * with it and switch to use the library functions in perf_evlist that came
3478  * from builtin-record.c, i.e. use record_opts,
3479  * evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
3480  * using pipes, etc.
3481  */
3482 static struct option __record_options[] = {
3483 	OPT_CALLBACK('e', "event", &parse_events_option_args, "event",
3484 		     "event selector. use 'perf list' to list available events",
3485 		     parse_events_option),
3486 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
3487 		     "event filter", parse_filter),
3488 	OPT_BOOLEAN(0, "latency", &record.latency,
3489 		    "Enable data collection for latency profiling.\n"
3490 		    "\t\t\t  Use perf report --latency for latency-centric profile."),
3491 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
3492 			   NULL, "don't record events from perf itself",
3493 			   exclude_perf),
3494 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
3495 		    "record events on existing process id"),
3496 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
3497 		    "record events on existing thread id"),
3498 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
3499 		    "collect data with this RT SCHED_FIFO priority"),
3500 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
3501 		    "collect data without buffering"),
3502 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
3503 		    "collect raw sample records from all opened counters"),
3504 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
3505 			    "system-wide collection from all CPUs"),
3506 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
3507 		    "list of cpus to monitor"),
3508 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
3509 	OPT_STRING('o', "output", &record.data.path, "file",
3510 		    "output file name"),
3511 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
3512 			&record.opts.no_inherit_set,
3513 			"child tasks do not inherit counters"),
3514 	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
3515 		    "synthesize non-sample events at the end of output"),
3516 	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
3517 	OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"),
3518 	OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
3519 		    "Fail if the specified frequency can't be used"),
3520 	OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
3521 		     "profile at this frequency",
3522 		      record__parse_freq),
3523 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
3524 		     "number of mmap data pages and AUX area tracing mmap pages",
3525 		     record__parse_mmap_pages),
3526 	OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
3527 		     "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
3528 		     record__mmap_flush_parse),
3529 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
3530 			   NULL, "enables call-graph recording" ,
3531 			   &record_callchain_opt),
3532 	OPT_CALLBACK(0, "call-graph", &record.opts,
3533 		     "record_mode[,record_size]", record_callchain_help,
3534 		     &record_parse_callchain_opt),
3535 	OPT_INCR('v', "verbose", &verbose,
3536 		    "be more verbose (show counter open errors, etc)"),
3537 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"),
3538 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
3539 		    "per thread counts"),
3540 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
3541 	OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
3542 		    "Record the sample physical addresses"),
3543 	OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size,
3544 		    "Record the sampled data address data page size"),
3545 	OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size,
3546 		    "Record the sampled code address (ip) page size"),
3547 	OPT_BOOLEAN(0, "sample-mem-info", &record.opts.sample_data_src,
3548 		    "Record the data source for memory operations"),
3549 	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
3550 	OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier,
3551 		    "Record the sample identifier"),
3552 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
3553 			&record.opts.sample_time_set,
3554 			"Record the sample timestamps"),
3555 	OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
3556 			"Record the sample period"),
3557 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
3558 		    "don't sample"),
3559 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
3560 			&record.no_buildid_cache_set,
3561 			"do not update the buildid cache"),
3562 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
3563 			&record.no_buildid_set,
3564 			"do not collect buildids in perf.data"),
3565 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
3566 		     "monitor event in cgroup name only",
3567 		     parse_cgroups),
3568 	OPT_CALLBACK('D', "delay", &record, "ms",
3569 		     "ms to wait before starting measurement after program start (-1: start with events disabled), "
3570 		     "or ranges of time to enable events e.g. '-D 10-20,30-40'",
3571 		     record__parse_event_enable_time),
3572 	OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
3573 	OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"),
3574 
3575 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
3576 		     "branch any", "sample any taken branches",
3577 		     parse_branch_stack),
3578 
3579 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
3580 		     "branch filter mask", "branch stack filter modes",
3581 		     parse_branch_stack),
3582 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
3583 		    "sample by weight (on special events only)"),
3584 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
3585 		    "sample transaction flags (special events only)"),
3586 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
3587 		    "use per-thread mmaps"),
3588 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
3589 		    "sample selected machine registers on interrupt,"
3590 		    " use '-I?' to list register names", parse_intr_regs),
3591 	OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
3592 		    "sample selected machine registers in user space,"
3593 		    " use '--user-regs=?' to list register names", parse_user_regs),
3594 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
3595 		    "Record running/enabled time of read (:S) events"),
3596 	OPT_CALLBACK('k', "clockid", &record.opts,
3597 	"clockid", "clockid to use for events, see clock_gettime()",
3598 	parse_clockid),
3599 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
3600 			  "opts", "AUX area tracing Snapshot Mode", ""),
3601 	OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
3602 			  "opts", "sample AUX area", ""),
3603 	OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
3604 			"per thread proc mmap processing timeout in ms"),
3605 	OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
3606 		    "Record namespaces events"),
3607 	OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
3608 		    "Record cgroup events"),
3609 	OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events,
3610 			&record.opts.record_switch_events_set,
3611 			"Record context switch events"),
3612 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
3613 			 "Configure all used events to run in kernel space.",
3614 			 PARSE_OPT_EXCLUSIVE),
3615 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
3616 			 "Configure all used events to run in user space.",
3617 			 PARSE_OPT_EXCLUSIVE),
3618 	OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
3619 		    "collect kernel callchains"),
3620 	OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
3621 		    "collect user callchains"),
3622 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
3623 		   "file", "vmlinux pathname"),
3624 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
3625 		    "Record build-id of all DSOs regardless of hits"),
3626 	OPT_BOOLEAN_SET(0, "buildid-mmap", &record.buildid_mmap, &record.buildid_mmap_set,
3627 			"Record build-id in mmap events and skip build-id processing."),
3628 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
3629 		    "append timestamp to output filename"),
3630 	OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
3631 		    "Record timestamp boundary (time of first/last samples)"),
3632 	OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
3633 			  &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
3634 			  "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
3635 			  "signal"),
3636 	OPT_CALLBACK_SET(0, "switch-output-event", &switch_output_parse_events_option_args,
3637 			 &record.switch_output_event_set, "switch output event",
3638 			 "switch output event selector. use 'perf list' to list available events",
3639 			 parse_events_option_new_evlist),
3640 	OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
3641 		   "Limit number of switch output generated files"),
3642 	OPT_BOOLEAN(0, "dry-run", &dry_run,
3643 		    "Parse options then exit"),
3644 #ifdef HAVE_AIO_SUPPORT
3645 	OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
3646 		     &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
3647 		     record__aio_parse),
3648 #endif
3649 	OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
3650 		     "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
3651 		     record__parse_affinity),
3652 #ifdef HAVE_ZSTD_SUPPORT
3653 	OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n",
3654 			    "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
3655 			    record__parse_comp_level),
3656 #endif
3657 	OPT_CALLBACK(0, "max-size", &record.output_max_size,
3658 		     "size", "Limit the maximum size of the output file", parse_output_max_size),
3659 	OPT_UINTEGER(0, "num-thread-synthesize",
3660 		     &record.opts.nr_threads_synthesize,
3661 		     "number of threads to run for event synthesis"),
3662 #ifdef HAVE_LIBPFM
3663 	OPT_CALLBACK(0, "pfm-events", &record.evlist, "event",
3664 		"libpfm4 event selector. use 'perf list' to list available events",
3665 		parse_libpfm_events_option),
3666 #endif
3667 	OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
3668 		     "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n"
3669 		     "\t\t\t  'snapshot': AUX area tracing snapshot).\n"
3670 		     "\t\t\t  Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
3671 		     "\t\t\t  Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
3672 		      parse_control_option),
3673 	OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup",
3674 		     "Fine-tune event synthesis: default=all", parse_record_synth_option),
3675 	OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls,
3676 			  &record.debuginfod.set, "debuginfod urls",
3677 			  "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
3678 			  "system"),
3679 	OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec",
3680 			    "write collected trace data into several data files using parallel threads",
3681 			    record__parse_threads),
3682 	OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"),
3683 	OPT_STRING(0, "setup-filter", &record.filter_action, "pin|unpin",
3684 		   "BPF filter action"),
3685 	OPT_CALLBACK(0, "off-cpu-thresh", &record.opts, "ms",
3686 		     "Dump off-cpu samples if off-cpu time exceeds this threshold (in milliseconds). (Default: 500ms)",
3687 		     record__parse_off_cpu_thresh),
3688 	OPT_BOOLEAN_SET(0, "data-mmap", &record.opts.record_data_mmap,
3689 			&record.opts.record_data_mmap_set,
3690 			"Record mmap events for non-executable mappings"),
3691 	OPT_END()
3692 };
3693 
3694 struct option *record_options = __record_options;
3695 
record__mmap_cpu_mask_init(struct mmap_cpu_mask * mask,struct perf_cpu_map * cpus)3696 static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus)
3697 {
3698 	struct perf_cpu cpu;
3699 	int idx;
3700 
3701 	if (cpu_map__is_dummy(cpus))
3702 		return 0;
3703 
3704 	perf_cpu_map__for_each_cpu_skip_any(cpu, idx, cpus) {
3705 		/* Return ENODEV is input cpu is greater than max cpu */
3706 		if ((unsigned long)cpu.cpu > mask->nbits)
3707 			return -ENODEV;
3708 		__set_bit(cpu.cpu, mask->bits);
3709 	}
3710 
3711 	return 0;
3712 }
3713 
record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask * mask,const char * mask_spec)3714 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec)
3715 {
3716 	struct perf_cpu_map *cpus;
3717 
3718 	cpus = perf_cpu_map__new(mask_spec);
3719 	if (!cpus)
3720 		return -ENOMEM;
3721 
3722 	bitmap_zero(mask->bits, mask->nbits);
3723 	if (record__mmap_cpu_mask_init(mask, cpus))
3724 		return -ENODEV;
3725 
3726 	perf_cpu_map__put(cpus);
3727 
3728 	return 0;
3729 }
3730 
record__free_thread_masks(struct record * rec,int nr_threads)3731 static void record__free_thread_masks(struct record *rec, int nr_threads)
3732 {
3733 	int t;
3734 
3735 	if (rec->thread_masks)
3736 		for (t = 0; t < nr_threads; t++)
3737 			record__thread_mask_free(&rec->thread_masks[t]);
3738 
3739 	zfree(&rec->thread_masks);
3740 }
3741 
record__alloc_thread_masks(struct record * rec,int nr_threads,int nr_bits)3742 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits)
3743 {
3744 	int t, ret;
3745 
3746 	rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks)));
3747 	if (!rec->thread_masks) {
3748 		pr_err("Failed to allocate thread masks\n");
3749 		return -ENOMEM;
3750 	}
3751 
3752 	for (t = 0; t < nr_threads; t++) {
3753 		ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits);
3754 		if (ret) {
3755 			pr_err("Failed to allocate thread masks[%d]\n", t);
3756 			goto out_free;
3757 		}
3758 	}
3759 
3760 	return 0;
3761 
3762 out_free:
3763 	record__free_thread_masks(rec, nr_threads);
3764 
3765 	return ret;
3766 }
3767 
record__init_thread_cpu_masks(struct record * rec,struct perf_cpu_map * cpus)3768 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus)
3769 {
3770 	int t, ret, nr_cpus = perf_cpu_map__nr(cpus);
3771 
3772 	ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu);
3773 	if (ret)
3774 		return ret;
3775 
3776 	rec->nr_threads = nr_cpus;
3777 	pr_debug("nr_threads: %d\n", rec->nr_threads);
3778 
3779 	for (t = 0; t < rec->nr_threads; t++) {
3780 		__set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits);
3781 		__set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits);
3782 		if (verbose > 0) {
3783 			pr_debug("thread_masks[%d]: ", t);
3784 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3785 			pr_debug("thread_masks[%d]: ", t);
3786 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3787 		}
3788 	}
3789 
3790 	return 0;
3791 }
3792 
record__init_thread_masks_spec(struct record * rec,struct perf_cpu_map * cpus,const char ** maps_spec,const char ** affinity_spec,u32 nr_spec)3793 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus,
3794 					  const char **maps_spec, const char **affinity_spec,
3795 					  u32 nr_spec)
3796 {
3797 	u32 s;
3798 	int ret = 0, t = 0;
3799 	struct mmap_cpu_mask cpus_mask;
3800 	struct thread_mask thread_mask, full_mask, *thread_masks;
3801 
3802 	ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu);
3803 	if (ret) {
3804 		pr_err("Failed to allocate CPUs mask\n");
3805 		return ret;
3806 	}
3807 
3808 	ret = record__mmap_cpu_mask_init(&cpus_mask, cpus);
3809 	if (ret) {
3810 		pr_err("Failed to init cpu mask\n");
3811 		goto out_free_cpu_mask;
3812 	}
3813 
3814 	ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu);
3815 	if (ret) {
3816 		pr_err("Failed to allocate full mask\n");
3817 		goto out_free_cpu_mask;
3818 	}
3819 
3820 	ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3821 	if (ret) {
3822 		pr_err("Failed to allocate thread mask\n");
3823 		goto out_free_full_and_cpu_masks;
3824 	}
3825 
3826 	for (s = 0; s < nr_spec; s++) {
3827 		ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]);
3828 		if (ret) {
3829 			pr_err("Failed to initialize maps thread mask\n");
3830 			goto out_free;
3831 		}
3832 		ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]);
3833 		if (ret) {
3834 			pr_err("Failed to initialize affinity thread mask\n");
3835 			goto out_free;
3836 		}
3837 
3838 		/* ignore invalid CPUs but do not allow empty masks */
3839 		if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits,
3840 				cpus_mask.bits, thread_mask.maps.nbits)) {
3841 			pr_err("Empty maps mask: %s\n", maps_spec[s]);
3842 			ret = -EINVAL;
3843 			goto out_free;
3844 		}
3845 		if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits,
3846 				cpus_mask.bits, thread_mask.affinity.nbits)) {
3847 			pr_err("Empty affinity mask: %s\n", affinity_spec[s]);
3848 			ret = -EINVAL;
3849 			goto out_free;
3850 		}
3851 
3852 		/* do not allow intersection with other masks (full_mask) */
3853 		if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits,
3854 				      thread_mask.maps.nbits)) {
3855 			pr_err("Intersecting maps mask: %s\n", maps_spec[s]);
3856 			ret = -EINVAL;
3857 			goto out_free;
3858 		}
3859 		if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits,
3860 				      thread_mask.affinity.nbits)) {
3861 			pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]);
3862 			ret = -EINVAL;
3863 			goto out_free;
3864 		}
3865 
3866 		bitmap_or(full_mask.maps.bits, full_mask.maps.bits,
3867 			  thread_mask.maps.bits, full_mask.maps.nbits);
3868 		bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits,
3869 			  thread_mask.affinity.bits, full_mask.maps.nbits);
3870 
3871 		thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask));
3872 		if (!thread_masks) {
3873 			pr_err("Failed to reallocate thread masks\n");
3874 			ret = -ENOMEM;
3875 			goto out_free;
3876 		}
3877 		rec->thread_masks = thread_masks;
3878 		rec->thread_masks[t] = thread_mask;
3879 		if (verbose > 0) {
3880 			pr_debug("thread_masks[%d]: ", t);
3881 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3882 			pr_debug("thread_masks[%d]: ", t);
3883 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3884 		}
3885 		t++;
3886 		ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3887 		if (ret) {
3888 			pr_err("Failed to allocate thread mask\n");
3889 			goto out_free_full_and_cpu_masks;
3890 		}
3891 	}
3892 	rec->nr_threads = t;
3893 	pr_debug("nr_threads: %d\n", rec->nr_threads);
3894 	if (!rec->nr_threads)
3895 		ret = -EINVAL;
3896 
3897 out_free:
3898 	record__thread_mask_free(&thread_mask);
3899 out_free_full_and_cpu_masks:
3900 	record__thread_mask_free(&full_mask);
3901 out_free_cpu_mask:
3902 	record__mmap_cpu_mask_free(&cpus_mask);
3903 
3904 	return ret;
3905 }
3906 
record__init_thread_core_masks(struct record * rec,struct perf_cpu_map * cpus)3907 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus)
3908 {
3909 	int ret;
3910 	struct cpu_topology *topo;
3911 
3912 	topo = cpu_topology__new();
3913 	if (!topo) {
3914 		pr_err("Failed to allocate CPU topology\n");
3915 		return -ENOMEM;
3916 	}
3917 
3918 	ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list,
3919 					     topo->core_cpus_list, topo->core_cpus_lists);
3920 	cpu_topology__delete(topo);
3921 
3922 	return ret;
3923 }
3924 
record__init_thread_package_masks(struct record * rec,struct perf_cpu_map * cpus)3925 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus)
3926 {
3927 	int ret;
3928 	struct cpu_topology *topo;
3929 
3930 	topo = cpu_topology__new();
3931 	if (!topo) {
3932 		pr_err("Failed to allocate CPU topology\n");
3933 		return -ENOMEM;
3934 	}
3935 
3936 	ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list,
3937 					     topo->package_cpus_list, topo->package_cpus_lists);
3938 	cpu_topology__delete(topo);
3939 
3940 	return ret;
3941 }
3942 
record__init_thread_numa_masks(struct record * rec,struct perf_cpu_map * cpus)3943 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus)
3944 {
3945 	u32 s;
3946 	int ret;
3947 	const char **spec;
3948 	struct numa_topology *topo;
3949 
3950 	topo = numa_topology__new();
3951 	if (!topo) {
3952 		pr_err("Failed to allocate NUMA topology\n");
3953 		return -ENOMEM;
3954 	}
3955 
3956 	spec = zalloc(topo->nr * sizeof(char *));
3957 	if (!spec) {
3958 		pr_err("Failed to allocate NUMA spec\n");
3959 		ret = -ENOMEM;
3960 		goto out_delete_topo;
3961 	}
3962 	for (s = 0; s < topo->nr; s++)
3963 		spec[s] = topo->nodes[s].cpus;
3964 
3965 	ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr);
3966 
3967 	zfree(&spec);
3968 
3969 out_delete_topo:
3970 	numa_topology__delete(topo);
3971 
3972 	return ret;
3973 }
3974 
record__init_thread_user_masks(struct record * rec,struct perf_cpu_map * cpus)3975 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus)
3976 {
3977 	int t, ret;
3978 	u32 s, nr_spec = 0;
3979 	char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec;
3980 	char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL;
3981 
3982 	for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) {
3983 		spec = strtok_r(user_spec, ":", &spec_ptr);
3984 		if (spec == NULL)
3985 			break;
3986 		pr_debug2("threads_spec[%d]: %s\n", t, spec);
3987 		mask = strtok_r(spec, "/", &mask_ptr);
3988 		if (mask == NULL)
3989 			break;
3990 		pr_debug2("  maps mask: %s\n", mask);
3991 		tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *));
3992 		if (!tmp_spec) {
3993 			pr_err("Failed to reallocate maps spec\n");
3994 			ret = -ENOMEM;
3995 			goto out_free;
3996 		}
3997 		maps_spec = tmp_spec;
3998 		maps_spec[nr_spec] = dup_mask = strdup(mask);
3999 		if (!maps_spec[nr_spec]) {
4000 			pr_err("Failed to allocate maps spec[%d]\n", nr_spec);
4001 			ret = -ENOMEM;
4002 			goto out_free;
4003 		}
4004 		mask = strtok_r(NULL, "/", &mask_ptr);
4005 		if (mask == NULL) {
4006 			pr_err("Invalid thread maps or affinity specs\n");
4007 			ret = -EINVAL;
4008 			goto out_free;
4009 		}
4010 		pr_debug2("  affinity mask: %s\n", mask);
4011 		tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *));
4012 		if (!tmp_spec) {
4013 			pr_err("Failed to reallocate affinity spec\n");
4014 			ret = -ENOMEM;
4015 			goto out_free;
4016 		}
4017 		affinity_spec = tmp_spec;
4018 		affinity_spec[nr_spec] = strdup(mask);
4019 		if (!affinity_spec[nr_spec]) {
4020 			pr_err("Failed to allocate affinity spec[%d]\n", nr_spec);
4021 			ret = -ENOMEM;
4022 			goto out_free;
4023 		}
4024 		dup_mask = NULL;
4025 		nr_spec++;
4026 	}
4027 
4028 	ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec,
4029 					     (const char **)affinity_spec, nr_spec);
4030 
4031 out_free:
4032 	free(dup_mask);
4033 	for (s = 0; s < nr_spec; s++) {
4034 		if (maps_spec)
4035 			free(maps_spec[s]);
4036 		if (affinity_spec)
4037 			free(affinity_spec[s]);
4038 	}
4039 	free(affinity_spec);
4040 	free(maps_spec);
4041 
4042 	return ret;
4043 }
4044 
record__init_thread_default_masks(struct record * rec,struct perf_cpu_map * cpus)4045 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus)
4046 {
4047 	int ret;
4048 
4049 	ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu);
4050 	if (ret)
4051 		return ret;
4052 
4053 	if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus))
4054 		return -ENODEV;
4055 
4056 	rec->nr_threads = 1;
4057 
4058 	return 0;
4059 }
4060 
record__init_thread_masks(struct record * rec)4061 static int record__init_thread_masks(struct record *rec)
4062 {
4063 	int ret = 0;
4064 	struct perf_cpu_map *cpus = rec->evlist->core.all_cpus;
4065 
4066 	if (!record__threads_enabled(rec))
4067 		return record__init_thread_default_masks(rec, cpus);
4068 
4069 	if (evlist__per_thread(rec->evlist)) {
4070 		pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n");
4071 		return -EINVAL;
4072 	}
4073 
4074 	switch (rec->opts.threads_spec) {
4075 	case THREAD_SPEC__CPU:
4076 		ret = record__init_thread_cpu_masks(rec, cpus);
4077 		break;
4078 	case THREAD_SPEC__CORE:
4079 		ret = record__init_thread_core_masks(rec, cpus);
4080 		break;
4081 	case THREAD_SPEC__PACKAGE:
4082 		ret = record__init_thread_package_masks(rec, cpus);
4083 		break;
4084 	case THREAD_SPEC__NUMA:
4085 		ret = record__init_thread_numa_masks(rec, cpus);
4086 		break;
4087 	case THREAD_SPEC__USER:
4088 		ret = record__init_thread_user_masks(rec, cpus);
4089 		break;
4090 	default:
4091 		break;
4092 	}
4093 
4094 	return ret;
4095 }
4096 
cmd_record(int argc,const char ** argv)4097 int cmd_record(int argc, const char **argv)
4098 {
4099 	int err;
4100 	struct record *rec = &record;
4101 	char errbuf[BUFSIZ];
4102 
4103 	setlocale(LC_ALL, "");
4104 
4105 #ifndef HAVE_BPF_SKEL
4106 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c)
4107 	set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true);
4108 # undef set_nobuild
4109 #endif
4110 
4111 	/* Disable eager loading of kernel symbols that adds overhead to perf record. */
4112 	symbol_conf.lazy_load_kernel_maps = true;
4113 	rec->opts.affinity = PERF_AFFINITY_SYS;
4114 
4115 	rec->evlist = evlist__new();
4116 	if (rec->evlist == NULL)
4117 		return -ENOMEM;
4118 
4119 	err = perf_config(perf_record_config, rec);
4120 	if (err)
4121 		return err;
4122 
4123 	argc = parse_options(argc, argv, record_options, record_usage,
4124 			    PARSE_OPT_STOP_AT_NON_OPTION);
4125 	if (quiet)
4126 		perf_quiet_option();
4127 
4128 	err = symbol__validate_sym_arguments();
4129 	if (err)
4130 		return err;
4131 
4132 	perf_debuginfod_setup(&record.debuginfod);
4133 
4134 	/* Make system wide (-a) the default target. */
4135 	if (!argc && target__none(&rec->opts.target))
4136 		rec->opts.target.system_wide = true;
4137 
4138 	if (nr_cgroups && !rec->opts.target.system_wide) {
4139 		usage_with_options_msg(record_usage, record_options,
4140 			"cgroup monitoring only available in system-wide mode");
4141 
4142 	}
4143 
4144 	if (record.latency) {
4145 		/*
4146 		 * There is no fundamental reason why latency profiling
4147 		 * can't work for system-wide mode, but exact semantics
4148 		 * and details are to be defined.
4149 		 * See the following thread for details:
4150 		 * https://lore.kernel.org/all/Z4XDJyvjiie3howF@google.com/
4151 		 */
4152 		if (record.opts.target.system_wide) {
4153 			pr_err("Failed: latency profiling is not supported with system-wide collection.\n");
4154 			err = -EINVAL;
4155 			goto out_opts;
4156 		}
4157 		record.opts.record_switch_events = true;
4158 	}
4159 
4160 	if (rec->buildid_mmap && !perf_can_record_build_id()) {
4161 		pr_warning("Missing support for build id in kernel mmap events.\n"
4162 			   "Disable this warning with --no-buildid-mmap\n");
4163 		rec->buildid_mmap = false;
4164 	}
4165 
4166 	if (rec->buildid_mmap) {
4167 		/* Enable perf_event_attr::build_id bit. */
4168 		rec->opts.build_id = true;
4169 		/* Disable build-ID table in the header. */
4170 		rec->no_buildid = true;
4171 	} else {
4172 		pr_debug("Disabling build id in synthesized mmap2 events.\n");
4173 		symbol_conf.no_buildid_mmap2 = true;
4174 	}
4175 
4176 	if (rec->no_buildid_set && rec->no_buildid) {
4177 		/* -B implies -N for historic reasons. */
4178 		rec->no_buildid_cache = true;
4179 	}
4180 
4181 	if (rec->opts.record_cgroup && !perf_can_record_cgroup()) {
4182 		pr_err("Kernel has no cgroup sampling support.\n");
4183 		err = -EINVAL;
4184 		goto out_opts;
4185 	}
4186 
4187 	if (rec->opts.kcore)
4188 		rec->opts.text_poke = true;
4189 
4190 	if (rec->opts.kcore || record__threads_enabled(rec))
4191 		rec->data.is_dir = true;
4192 
4193 	if (record__threads_enabled(rec)) {
4194 		if (rec->opts.affinity != PERF_AFFINITY_SYS) {
4195 			pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n");
4196 			goto out_opts;
4197 		}
4198 		if (record__aio_enabled(rec)) {
4199 			pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n");
4200 			goto out_opts;
4201 		}
4202 	}
4203 
4204 	if (rec->opts.comp_level != 0) {
4205 		pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
4206 		rec->no_buildid = true;
4207 	}
4208 
4209 	if (rec->opts.record_switch_events &&
4210 	    !perf_can_record_switch_events()) {
4211 		ui__error("kernel does not support recording context switch events\n");
4212 		parse_options_usage(record_usage, record_options, "switch-events", 0);
4213 		err = -EINVAL;
4214 		goto out_opts;
4215 	}
4216 
4217 	if (switch_output_setup(rec)) {
4218 		parse_options_usage(record_usage, record_options, "switch-output", 0);
4219 		err = -EINVAL;
4220 		goto out_opts;
4221 	}
4222 
4223 	if (rec->switch_output.time) {
4224 		signal(SIGALRM, alarm_sig_handler);
4225 		alarm(rec->switch_output.time);
4226 	}
4227 
4228 	if (rec->switch_output.num_files) {
4229 		rec->switch_output.filenames = calloc(rec->switch_output.num_files,
4230 						      sizeof(char *));
4231 		if (!rec->switch_output.filenames) {
4232 			err = -EINVAL;
4233 			goto out_opts;
4234 		}
4235 	}
4236 
4237 	if (rec->timestamp_filename && record__threads_enabled(rec)) {
4238 		rec->timestamp_filename = false;
4239 		pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n");
4240 	}
4241 
4242 	if (rec->filter_action) {
4243 		if (!strcmp(rec->filter_action, "pin"))
4244 			err = perf_bpf_filter__pin();
4245 		else if (!strcmp(rec->filter_action, "unpin"))
4246 			err = perf_bpf_filter__unpin();
4247 		else {
4248 			pr_warning("Unknown BPF filter action: %s\n", rec->filter_action);
4249 			err = -EINVAL;
4250 		}
4251 		goto out_opts;
4252 	}
4253 
4254 	/* For backward compatibility, -d implies --mem-info and --data-mmap */
4255 	if (rec->opts.sample_address) {
4256 		rec->opts.sample_data_src = true;
4257 		if (!rec->opts.record_data_mmap_set)
4258 			rec->opts.record_data_mmap = true;
4259 	}
4260 
4261 	/*
4262 	 * Allow aliases to facilitate the lookup of symbols for address
4263 	 * filters. Refer to auxtrace_parse_filters().
4264 	 */
4265 	symbol_conf.allow_aliases = true;
4266 
4267 	symbol__init(NULL);
4268 
4269 	err = record__auxtrace_init(rec);
4270 	if (err)
4271 		goto out;
4272 
4273 	if (dry_run)
4274 		goto out;
4275 
4276 	err = -ENOMEM;
4277 
4278 	if (rec->no_buildid_cache) {
4279 		disable_buildid_cache();
4280 	} else if (rec->switch_output.enabled) {
4281 		/*
4282 		 * In 'perf record --switch-output', disable buildid
4283 		 * generation by default to reduce data file switching
4284 		 * overhead. Still generate buildid if they are required
4285 		 * explicitly using
4286 		 *
4287 		 *  perf record --switch-output --no-no-buildid \
4288 		 *              --no-no-buildid-cache
4289 		 *
4290 		 * Following code equals to:
4291 		 *
4292 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
4293 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
4294 		 *         disable_buildid_cache();
4295 		 */
4296 		bool disable = true;
4297 
4298 		if (rec->no_buildid_set && !rec->no_buildid)
4299 			disable = false;
4300 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
4301 			disable = false;
4302 		if (disable) {
4303 			rec->no_buildid = true;
4304 			rec->no_buildid_cache = true;
4305 			disable_buildid_cache();
4306 		}
4307 	}
4308 
4309 	if (record.opts.overwrite)
4310 		record.opts.tail_synthesize = true;
4311 
4312 	if (rec->evlist->core.nr_entries == 0) {
4313 		struct evlist *def_evlist = evlist__new_default();
4314 
4315 		if (!def_evlist)
4316 			goto out;
4317 
4318 		evlist__splice_list_tail(rec->evlist, &def_evlist->core.entries);
4319 		evlist__delete(def_evlist);
4320 	}
4321 
4322 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
4323 		rec->opts.no_inherit = true;
4324 
4325 	err = target__validate(&rec->opts.target);
4326 	if (err) {
4327 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
4328 		ui__warning("%s\n", errbuf);
4329 	}
4330 
4331 	if (rec->uid_str) {
4332 		uid_t uid = parse_uid(rec->uid_str);
4333 
4334 		if (uid == UINT_MAX) {
4335 			ui__error("Invalid User: %s", rec->uid_str);
4336 			err = -EINVAL;
4337 			goto out;
4338 		}
4339 		err = parse_uid_filter(rec->evlist, uid);
4340 		if (err)
4341 			goto out;
4342 
4343 		/* User ID filtering implies system wide. */
4344 		rec->opts.target.system_wide = true;
4345 	}
4346 
4347 	/* Enable ignoring missing threads when -p option is defined. */
4348 	rec->opts.ignore_missing_thread = rec->opts.target.pid;
4349 
4350 	evlist__warn_user_requested_cpus(rec->evlist, rec->opts.target.cpu_list);
4351 
4352 	if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
4353 		arch__add_leaf_frame_record_opts(&rec->opts);
4354 
4355 	err = -ENOMEM;
4356 	if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) {
4357 		if (rec->opts.target.pid != NULL) {
4358 			pr_err("Couldn't create thread/CPU maps: %s\n",
4359 				errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf)));
4360 			goto out;
4361 		}
4362 		else
4363 			usage_with_options(record_usage, record_options);
4364 	}
4365 
4366 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
4367 	if (err)
4368 		goto out;
4369 
4370 	/*
4371 	 * We take all buildids when the file contains
4372 	 * AUX area tracing data because we do not decode the
4373 	 * trace because it would take too long.
4374 	 */
4375 	if (rec->opts.full_auxtrace)
4376 		rec->buildid_all = true;
4377 
4378 	if (rec->opts.text_poke) {
4379 		err = record__config_text_poke(rec->evlist);
4380 		if (err) {
4381 			pr_err("record__config_text_poke failed, error %d\n", err);
4382 			goto out;
4383 		}
4384 	}
4385 
4386 	if (rec->off_cpu) {
4387 		err = record__config_off_cpu(rec);
4388 		if (err) {
4389 			pr_err("record__config_off_cpu failed, error %d\n", err);
4390 			goto out;
4391 		}
4392 	}
4393 
4394 	if (record_opts__config(&rec->opts)) {
4395 		err = -EINVAL;
4396 		goto out;
4397 	}
4398 
4399 	err = record__config_tracking_events(rec);
4400 	if (err) {
4401 		pr_err("record__config_tracking_events failed, error %d\n", err);
4402 		goto out;
4403 	}
4404 
4405 	err = record__init_thread_masks(rec);
4406 	if (err) {
4407 		pr_err("Failed to initialize parallel data streaming masks\n");
4408 		goto out;
4409 	}
4410 
4411 	if (rec->opts.nr_cblocks > nr_cblocks_max)
4412 		rec->opts.nr_cblocks = nr_cblocks_max;
4413 	pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
4414 
4415 	pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
4416 	pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
4417 
4418 	if (rec->opts.comp_level > comp_level_max)
4419 		rec->opts.comp_level = comp_level_max;
4420 	pr_debug("comp level: %d\n", rec->opts.comp_level);
4421 
4422 	err = __cmd_record(&record, argc, argv);
4423 out:
4424 	record__free_thread_masks(rec, rec->nr_threads);
4425 	rec->nr_threads = 0;
4426 	symbol__exit();
4427 	auxtrace_record__free(rec->itr);
4428 out_opts:
4429 	evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close);
4430 	evlist__delete(rec->evlist);
4431 	return err;
4432 }
4433 
snapshot_sig_handler(int sig __maybe_unused)4434 static void snapshot_sig_handler(int sig __maybe_unused)
4435 {
4436 	struct record *rec = &record;
4437 
4438 	hit_auxtrace_snapshot_trigger(rec);
4439 
4440 	if (switch_output_signal(rec))
4441 		trigger_hit(&switch_output_trigger);
4442 }
4443 
alarm_sig_handler(int sig __maybe_unused)4444 static void alarm_sig_handler(int sig __maybe_unused)
4445 {
4446 	struct record *rec = &record;
4447 
4448 	if (switch_output_time(rec))
4449 		trigger_hit(&switch_output_trigger);
4450 }
4451