xref: /linux/tools/perf/builtin-trace.c (revision 10accd2e6890b57db8e717e9aee91b791f90fe14)
1 /*
2  * builtin-trace.c
3  *
4  * Builtin 'trace' command:
5  *
6  * Display a continuously updated trace of any workload, CPU, specific PID,
7  * system wide, etc.  Default format is loosely strace like, but any other
8  * event may be specified using --event.
9  *
10  * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11  *
12  * Initially based on the 'trace' prototype by Thomas Gleixner:
13  *
14  * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15  *
16  * Released under the GPL v2. (and only v2, not any later version)
17  */
18 
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
21 #include "builtin.h"
22 #include "util/color.h"
23 #include "util/debug.h"
24 #include "util/evlist.h"
25 #include <subcmd/exec-cmd.h>
26 #include "util/machine.h"
27 #include "util/session.h"
28 #include "util/thread.h"
29 #include <subcmd/parse-options.h>
30 #include "util/strlist.h"
31 #include "util/intlist.h"
32 #include "util/thread_map.h"
33 #include "util/stat.h"
34 #include "trace-event.h"
35 #include "util/parse-events.h"
36 #include "util/bpf-loader.h"
37 #include "callchain.h"
38 #include "syscalltbl.h"
39 #include "rb_resort.h"
40 
41 #include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
42 #include <stdlib.h>
43 #include <linux/err.h>
44 #include <linux/filter.h>
45 #include <linux/audit.h>
46 #include <linux/random.h>
47 #include <linux/stringify.h>
48 
49 #ifndef O_CLOEXEC
50 # define O_CLOEXEC		02000000
51 #endif
52 
53 struct trace {
54 	struct perf_tool	tool;
55 	struct syscalltbl	*sctbl;
56 	struct {
57 		int		max;
58 		struct syscall  *table;
59 		struct {
60 			struct perf_evsel *sys_enter,
61 					  *sys_exit;
62 		}		events;
63 	} syscalls;
64 	struct record_opts	opts;
65 	struct perf_evlist	*evlist;
66 	struct machine		*host;
67 	struct thread		*current;
68 	u64			base_time;
69 	FILE			*output;
70 	unsigned long		nr_events;
71 	struct strlist		*ev_qualifier;
72 	struct {
73 		size_t		nr;
74 		int		*entries;
75 	}			ev_qualifier_ids;
76 	struct intlist		*tid_list;
77 	struct intlist		*pid_list;
78 	struct {
79 		size_t		nr;
80 		pid_t		*entries;
81 	}			filter_pids;
82 	double			duration_filter;
83 	double			runtime_ms;
84 	struct {
85 		u64		vfs_getname,
86 				proc_getname;
87 	} stats;
88 	unsigned int		max_stack;
89 	unsigned int		min_stack;
90 	bool			not_ev_qualifier;
91 	bool			live;
92 	bool			full_time;
93 	bool			sched;
94 	bool			multiple_threads;
95 	bool			summary;
96 	bool			summary_only;
97 	bool			show_comm;
98 	bool			show_tool_stats;
99 	bool			trace_syscalls;
100 	bool			kernel_syscallchains;
101 	bool			force;
102 	bool			vfs_getname;
103 	int			trace_pgfaults;
104 	int			open_id;
105 };
106 
107 struct tp_field {
108 	int offset;
109 	union {
110 		u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
111 		void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
112 	};
113 };
114 
115 #define TP_UINT_FIELD(bits) \
116 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
117 { \
118 	u##bits value; \
119 	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
120 	return value;  \
121 }
122 
123 TP_UINT_FIELD(8);
124 TP_UINT_FIELD(16);
125 TP_UINT_FIELD(32);
126 TP_UINT_FIELD(64);
127 
128 #define TP_UINT_FIELD__SWAPPED(bits) \
129 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
130 { \
131 	u##bits value; \
132 	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
133 	return bswap_##bits(value);\
134 }
135 
136 TP_UINT_FIELD__SWAPPED(16);
137 TP_UINT_FIELD__SWAPPED(32);
138 TP_UINT_FIELD__SWAPPED(64);
139 
140 static int tp_field__init_uint(struct tp_field *field,
141 			       struct format_field *format_field,
142 			       bool needs_swap)
143 {
144 	field->offset = format_field->offset;
145 
146 	switch (format_field->size) {
147 	case 1:
148 		field->integer = tp_field__u8;
149 		break;
150 	case 2:
151 		field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
152 		break;
153 	case 4:
154 		field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
155 		break;
156 	case 8:
157 		field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
158 		break;
159 	default:
160 		return -1;
161 	}
162 
163 	return 0;
164 }
165 
166 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
167 {
168 	return sample->raw_data + field->offset;
169 }
170 
171 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
172 {
173 	field->offset = format_field->offset;
174 	field->pointer = tp_field__ptr;
175 	return 0;
176 }
177 
178 struct syscall_tp {
179 	struct tp_field id;
180 	union {
181 		struct tp_field args, ret;
182 	};
183 };
184 
185 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
186 					  struct tp_field *field,
187 					  const char *name)
188 {
189 	struct format_field *format_field = perf_evsel__field(evsel, name);
190 
191 	if (format_field == NULL)
192 		return -1;
193 
194 	return tp_field__init_uint(field, format_field, evsel->needs_swap);
195 }
196 
197 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
198 	({ struct syscall_tp *sc = evsel->priv;\
199 	   perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
200 
201 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
202 					 struct tp_field *field,
203 					 const char *name)
204 {
205 	struct format_field *format_field = perf_evsel__field(evsel, name);
206 
207 	if (format_field == NULL)
208 		return -1;
209 
210 	return tp_field__init_ptr(field, format_field);
211 }
212 
213 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
214 	({ struct syscall_tp *sc = evsel->priv;\
215 	   perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
216 
217 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
218 {
219 	zfree(&evsel->priv);
220 	perf_evsel__delete(evsel);
221 }
222 
223 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
224 {
225 	evsel->priv = malloc(sizeof(struct syscall_tp));
226 	if (evsel->priv != NULL) {
227 		if (perf_evsel__init_sc_tp_uint_field(evsel, id))
228 			goto out_delete;
229 
230 		evsel->handler = handler;
231 		return 0;
232 	}
233 
234 	return -ENOMEM;
235 
236 out_delete:
237 	zfree(&evsel->priv);
238 	return -ENOENT;
239 }
240 
241 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
242 {
243 	struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
244 
245 	/* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
246 	if (IS_ERR(evsel))
247 		evsel = perf_evsel__newtp("syscalls", direction);
248 
249 	if (IS_ERR(evsel))
250 		return NULL;
251 
252 	if (perf_evsel__init_syscall_tp(evsel, handler))
253 		goto out_delete;
254 
255 	return evsel;
256 
257 out_delete:
258 	perf_evsel__delete_priv(evsel);
259 	return NULL;
260 }
261 
262 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
263 	({ struct syscall_tp *fields = evsel->priv; \
264 	   fields->name.integer(&fields->name, sample); })
265 
266 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
267 	({ struct syscall_tp *fields = evsel->priv; \
268 	   fields->name.pointer(&fields->name, sample); })
269 
270 struct syscall_arg {
271 	unsigned long val;
272 	struct thread *thread;
273 	struct trace  *trace;
274 	void	      *parm;
275 	u8	      idx;
276 	u8	      mask;
277 };
278 
279 struct strarray {
280 	int	    offset;
281 	int	    nr_entries;
282 	const char **entries;
283 };
284 
285 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
286 	.nr_entries = ARRAY_SIZE(array), \
287 	.entries = array, \
288 }
289 
290 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
291 	.offset	    = off, \
292 	.nr_entries = ARRAY_SIZE(array), \
293 	.entries = array, \
294 }
295 
296 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
297 						const char *intfmt,
298 					        struct syscall_arg *arg)
299 {
300 	struct strarray *sa = arg->parm;
301 	int idx = arg->val - sa->offset;
302 
303 	if (idx < 0 || idx >= sa->nr_entries)
304 		return scnprintf(bf, size, intfmt, arg->val);
305 
306 	return scnprintf(bf, size, "%s", sa->entries[idx]);
307 }
308 
309 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
310 					      struct syscall_arg *arg)
311 {
312 	return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
313 }
314 
315 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
316 
317 #if defined(__i386__) || defined(__x86_64__)
318 /*
319  * FIXME: Make this available to all arches as soon as the ioctl beautifier
320  * 	  gets rewritten to support all arches.
321  */
322 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
323 						 struct syscall_arg *arg)
324 {
325 	return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
326 }
327 
328 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
329 #endif /* defined(__i386__) || defined(__x86_64__) */
330 
331 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
332 					struct syscall_arg *arg);
333 
334 #define SCA_FD syscall_arg__scnprintf_fd
335 
336 #ifndef AT_FDCWD
337 #define AT_FDCWD	-100
338 #endif
339 
340 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
341 					   struct syscall_arg *arg)
342 {
343 	int fd = arg->val;
344 
345 	if (fd == AT_FDCWD)
346 		return scnprintf(bf, size, "CWD");
347 
348 	return syscall_arg__scnprintf_fd(bf, size, arg);
349 }
350 
351 #define SCA_FDAT syscall_arg__scnprintf_fd_at
352 
353 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
354 					      struct syscall_arg *arg);
355 
356 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
357 
358 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
359 					 struct syscall_arg *arg)
360 {
361 	return scnprintf(bf, size, "%#lx", arg->val);
362 }
363 
364 #define SCA_HEX syscall_arg__scnprintf_hex
365 
366 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
367 					 struct syscall_arg *arg)
368 {
369 	return scnprintf(bf, size, "%d", arg->val);
370 }
371 
372 #define SCA_INT syscall_arg__scnprintf_int
373 
374 static const char *bpf_cmd[] = {
375 	"MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
376 	"MAP_GET_NEXT_KEY", "PROG_LOAD",
377 };
378 static DEFINE_STRARRAY(bpf_cmd);
379 
380 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
381 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
382 
383 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
384 static DEFINE_STRARRAY(itimers);
385 
386 static const char *keyctl_options[] = {
387 	"GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
388 	"SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
389 	"INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
390 	"ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
391 	"INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
392 };
393 static DEFINE_STRARRAY(keyctl_options);
394 
395 static const char *whences[] = { "SET", "CUR", "END",
396 #ifdef SEEK_DATA
397 "DATA",
398 #endif
399 #ifdef SEEK_HOLE
400 "HOLE",
401 #endif
402 };
403 static DEFINE_STRARRAY(whences);
404 
405 static const char *fcntl_cmds[] = {
406 	"DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
407 	"SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
408 	"F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
409 	"F_GETOWNER_UIDS",
410 };
411 static DEFINE_STRARRAY(fcntl_cmds);
412 
413 static const char *rlimit_resources[] = {
414 	"CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
415 	"MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
416 	"RTTIME",
417 };
418 static DEFINE_STRARRAY(rlimit_resources);
419 
420 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
421 static DEFINE_STRARRAY(sighow);
422 
423 static const char *clockid[] = {
424 	"REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
425 	"MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
426 	"REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
427 };
428 static DEFINE_STRARRAY(clockid);
429 
430 static const char *socket_families[] = {
431 	"UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
432 	"BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
433 	"SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
434 	"RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
435 	"BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
436 	"ALG", "NFC", "VSOCK",
437 };
438 static DEFINE_STRARRAY(socket_families);
439 
440 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
441 						 struct syscall_arg *arg)
442 {
443 	size_t printed = 0;
444 	int mode = arg->val;
445 
446 	if (mode == F_OK) /* 0 */
447 		return scnprintf(bf, size, "F");
448 #define	P_MODE(n) \
449 	if (mode & n##_OK) { \
450 		printed += scnprintf(bf + printed, size - printed, "%s", #n); \
451 		mode &= ~n##_OK; \
452 	}
453 
454 	P_MODE(R);
455 	P_MODE(W);
456 	P_MODE(X);
457 #undef P_MODE
458 
459 	if (mode)
460 		printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
461 
462 	return printed;
463 }
464 
465 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
466 
467 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
468 					      struct syscall_arg *arg);
469 
470 #define SCA_FILENAME syscall_arg__scnprintf_filename
471 
472 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
473 						struct syscall_arg *arg)
474 {
475 	int printed = 0, flags = arg->val;
476 
477 #define	P_FLAG(n) \
478 	if (flags & O_##n) { \
479 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
480 		flags &= ~O_##n; \
481 	}
482 
483 	P_FLAG(CLOEXEC);
484 	P_FLAG(NONBLOCK);
485 #undef P_FLAG
486 
487 	if (flags)
488 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
489 
490 	return printed;
491 }
492 
493 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
494 
495 #if defined(__i386__) || defined(__x86_64__)
496 /*
497  * FIXME: Make this available to all arches.
498  */
499 #define TCGETS		0x5401
500 
501 static const char *tioctls[] = {
502 	"TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
503 	"TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
504 	"TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
505 	"TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
506 	"TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
507 	"TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
508 	"TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
509 	"TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
510 	"TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
511 	"TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
512 	"TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
513 	[0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
514 	"TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
515 	"TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
516 	"TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
517 };
518 
519 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
520 #endif /* defined(__i386__) || defined(__x86_64__) */
521 
522 #ifndef GRND_NONBLOCK
523 #define GRND_NONBLOCK	0x0001
524 #endif
525 #ifndef GRND_RANDOM
526 #define GRND_RANDOM	0x0002
527 #endif
528 
529 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
530 						   struct syscall_arg *arg)
531 {
532 	int printed = 0, flags = arg->val;
533 
534 #define	P_FLAG(n) \
535 	if (flags & GRND_##n) { \
536 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
537 		flags &= ~GRND_##n; \
538 	}
539 
540 	P_FLAG(RANDOM);
541 	P_FLAG(NONBLOCK);
542 #undef P_FLAG
543 
544 	if (flags)
545 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
546 
547 	return printed;
548 }
549 
550 #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
551 
552 #define STRARRAY(arg, name, array) \
553 	  .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
554 	  .arg_parm	 = { [arg] = &strarray__##array, }
555 
556 #include "trace/beauty/eventfd.c"
557 #include "trace/beauty/flock.c"
558 #include "trace/beauty/futex_op.c"
559 #include "trace/beauty/mmap.c"
560 #include "trace/beauty/mode_t.c"
561 #include "trace/beauty/msg_flags.c"
562 #include "trace/beauty/open_flags.c"
563 #include "trace/beauty/perf_event_open.c"
564 #include "trace/beauty/pid.c"
565 #include "trace/beauty/sched_policy.c"
566 #include "trace/beauty/seccomp.c"
567 #include "trace/beauty/signum.c"
568 #include "trace/beauty/socket_type.c"
569 #include "trace/beauty/waitid_options.c"
570 
571 static struct syscall_fmt {
572 	const char *name;
573 	const char *alias;
574 	size_t	   (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
575 	void	   *arg_parm[6];
576 	bool	   errmsg;
577 	bool	   errpid;
578 	bool	   timeout;
579 	bool	   hexret;
580 } syscall_fmts[] = {
581 	{ .name	    = "access",	    .errmsg = true,
582 	  .arg_scnprintf = { [1] = SCA_ACCMODE,  /* mode */ }, },
583 	{ .name	    = "arch_prctl", .errmsg = true, .alias = "prctl", },
584 	{ .name	    = "bpf",	    .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
585 	{ .name	    = "brk",	    .hexret = true,
586 	  .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
587 	{ .name	    = "chdir",	    .errmsg = true, },
588 	{ .name	    = "chmod",	    .errmsg = true, },
589 	{ .name	    = "chroot",	    .errmsg = true, },
590 	{ .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
591 	{ .name	    = "clone",	    .errpid = true, },
592 	{ .name	    = "close",	    .errmsg = true,
593 	  .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
594 	{ .name	    = "connect",    .errmsg = true, },
595 	{ .name	    = "creat",	    .errmsg = true, },
596 	{ .name	    = "dup",	    .errmsg = true, },
597 	{ .name	    = "dup2",	    .errmsg = true, },
598 	{ .name	    = "dup3",	    .errmsg = true, },
599 	{ .name	    = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
600 	{ .name	    = "eventfd2",   .errmsg = true,
601 	  .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
602 	{ .name	    = "faccessat",  .errmsg = true, },
603 	{ .name	    = "fadvise64",  .errmsg = true, },
604 	{ .name	    = "fallocate",  .errmsg = true, },
605 	{ .name	    = "fchdir",	    .errmsg = true, },
606 	{ .name	    = "fchmod",	    .errmsg = true, },
607 	{ .name	    = "fchmodat",   .errmsg = true,
608 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
609 	{ .name	    = "fchown",	    .errmsg = true, },
610 	{ .name	    = "fchownat",   .errmsg = true,
611 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
612 	{ .name	    = "fcntl",	    .errmsg = true,
613 	  .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
614 	  .arg_parm	 = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
615 	{ .name	    = "fdatasync",  .errmsg = true, },
616 	{ .name	    = "flock",	    .errmsg = true,
617 	  .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, },
618 	{ .name	    = "fsetxattr",  .errmsg = true, },
619 	{ .name	    = "fstat",	    .errmsg = true, .alias = "newfstat", },
620 	{ .name	    = "fstatat",    .errmsg = true, .alias = "newfstatat", },
621 	{ .name	    = "fstatfs",    .errmsg = true, },
622 	{ .name	    = "fsync",    .errmsg = true, },
623 	{ .name	    = "ftruncate", .errmsg = true, },
624 	{ .name	    = "futex",	    .errmsg = true,
625 	  .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
626 	{ .name	    = "futimesat", .errmsg = true,
627 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
628 	{ .name	    = "getdents",   .errmsg = true, },
629 	{ .name	    = "getdents64", .errmsg = true, },
630 	{ .name	    = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
631 	{ .name	    = "getpid",	    .errpid = true, },
632 	{ .name	    = "getpgid",    .errpid = true, },
633 	{ .name	    = "getppid",    .errpid = true, },
634 	{ .name	    = "getrandom",  .errmsg = true,
635 	  .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
636 	{ .name	    = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
637 	{ .name	    = "getxattr",   .errmsg = true, },
638 	{ .name	    = "inotify_add_watch",	    .errmsg = true, },
639 	{ .name	    = "ioctl",	    .errmsg = true,
640 	  .arg_scnprintf = {
641 #if defined(__i386__) || defined(__x86_64__)
642 /*
643  * FIXME: Make this available to all arches.
644  */
645 			     [1] = SCA_STRHEXARRAY, /* cmd */
646 			     [2] = SCA_HEX, /* arg */ },
647 	  .arg_parm	 = { [1] = &strarray__tioctls, /* cmd */ }, },
648 #else
649 			     [2] = SCA_HEX, /* arg */ }, },
650 #endif
651 	{ .name	    = "keyctl",	    .errmsg = true, STRARRAY(0, option, keyctl_options), },
652 	{ .name	    = "kill",	    .errmsg = true,
653 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
654 	{ .name	    = "lchown",    .errmsg = true, },
655 	{ .name	    = "lgetxattr",  .errmsg = true, },
656 	{ .name	    = "linkat",	    .errmsg = true,
657 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
658 	{ .name	    = "listxattr",  .errmsg = true, },
659 	{ .name	    = "llistxattr", .errmsg = true, },
660 	{ .name	    = "lremovexattr",  .errmsg = true, },
661 	{ .name	    = "lseek",	    .errmsg = true,
662 	  .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
663 	  .arg_parm	 = { [2] = &strarray__whences, /* whence */ }, },
664 	{ .name	    = "lsetxattr",  .errmsg = true, },
665 	{ .name	    = "lstat",	    .errmsg = true, .alias = "newlstat", },
666 	{ .name	    = "lsxattr",    .errmsg = true, },
667 	{ .name     = "madvise",    .errmsg = true,
668 	  .arg_scnprintf = { [0] = SCA_HEX,	 /* start */
669 			     [2] = SCA_MADV_BHV, /* behavior */ }, },
670 	{ .name	    = "mkdir",    .errmsg = true, },
671 	{ .name	    = "mkdirat",    .errmsg = true,
672 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
673 	{ .name	    = "mknod",      .errmsg = true, },
674 	{ .name	    = "mknodat",    .errmsg = true,
675 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
676 	{ .name	    = "mlock",	    .errmsg = true,
677 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
678 	{ .name	    = "mlockall",   .errmsg = true,
679 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
680 	{ .name	    = "mmap",	    .hexret = true,
681 	  .arg_scnprintf = { [0] = SCA_HEX,	  /* addr */
682 			     [2] = SCA_MMAP_PROT, /* prot */
683 			     [3] = SCA_MMAP_FLAGS, /* flags */ }, },
684 	{ .name	    = "mprotect",   .errmsg = true,
685 	  .arg_scnprintf = { [0] = SCA_HEX, /* start */
686 			     [2] = SCA_MMAP_PROT, /* prot */ }, },
687 	{ .name	    = "mq_unlink", .errmsg = true,
688 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
689 	{ .name	    = "mremap",	    .hexret = true,
690 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */
691 			     [3] = SCA_MREMAP_FLAGS, /* flags */
692 			     [4] = SCA_HEX, /* new_addr */ }, },
693 	{ .name	    = "munlock",    .errmsg = true,
694 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
695 	{ .name	    = "munmap",	    .errmsg = true,
696 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
697 	{ .name	    = "name_to_handle_at", .errmsg = true,
698 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
699 	{ .name	    = "newfstatat", .errmsg = true,
700 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
701 	{ .name	    = "open",	    .errmsg = true,
702 	  .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
703 	{ .name	    = "open_by_handle_at", .errmsg = true,
704 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
705 			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
706 	{ .name	    = "openat",	    .errmsg = true,
707 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
708 			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
709 	{ .name	    = "perf_event_open", .errmsg = true,
710 	  .arg_scnprintf = { [2] = SCA_INT, /* cpu */
711 			     [3] = SCA_FD,  /* group_fd */
712 			     [4] = SCA_PERF_FLAGS,  /* flags */ }, },
713 	{ .name	    = "pipe2",	    .errmsg = true,
714 	  .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
715 	{ .name	    = "poll",	    .errmsg = true, .timeout = true, },
716 	{ .name	    = "ppoll",	    .errmsg = true, .timeout = true, },
717 	{ .name	    = "pread",	    .errmsg = true, .alias = "pread64", },
718 	{ .name	    = "preadv",	    .errmsg = true, .alias = "pread", },
719 	{ .name	    = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
720 	{ .name	    = "pwrite",	    .errmsg = true, .alias = "pwrite64", },
721 	{ .name	    = "pwritev",    .errmsg = true, },
722 	{ .name	    = "read",	    .errmsg = true, },
723 	{ .name	    = "readlink",   .errmsg = true, },
724 	{ .name	    = "readlinkat", .errmsg = true,
725 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
726 	{ .name	    = "readv",	    .errmsg = true, },
727 	{ .name	    = "recvfrom",   .errmsg = true,
728 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
729 	{ .name	    = "recvmmsg",   .errmsg = true,
730 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
731 	{ .name	    = "recvmsg",    .errmsg = true,
732 	  .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
733 	{ .name	    = "removexattr", .errmsg = true, },
734 	{ .name	    = "renameat",   .errmsg = true,
735 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
736 	{ .name	    = "rmdir",    .errmsg = true, },
737 	{ .name	    = "rt_sigaction", .errmsg = true,
738 	  .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
739 	{ .name	    = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
740 	{ .name	    = "rt_sigqueueinfo", .errmsg = true,
741 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
742 	{ .name	    = "rt_tgsigqueueinfo", .errmsg = true,
743 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
744 	{ .name	    = "sched_setscheduler",   .errmsg = true,
745 	  .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
746 	{ .name	    = "seccomp", .errmsg = true,
747 	  .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
748 			     [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
749 	{ .name	    = "select",	    .errmsg = true, .timeout = true, },
750 	{ .name	    = "sendmmsg",    .errmsg = true,
751 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
752 	{ .name	    = "sendmsg",    .errmsg = true,
753 	  .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
754 	{ .name	    = "sendto",	    .errmsg = true,
755 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
756 	{ .name	    = "set_tid_address", .errpid = true, },
757 	{ .name	    = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
758 	{ .name	    = "setpgid",    .errmsg = true, },
759 	{ .name	    = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
760 	{ .name	    = "setxattr",   .errmsg = true, },
761 	{ .name	    = "shutdown",   .errmsg = true, },
762 	{ .name	    = "socket",	    .errmsg = true,
763 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
764 			     [1] = SCA_SK_TYPE, /* type */ },
765 	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
766 	{ .name	    = "socketpair", .errmsg = true,
767 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
768 			     [1] = SCA_SK_TYPE, /* type */ },
769 	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
770 	{ .name	    = "stat",	    .errmsg = true, .alias = "newstat", },
771 	{ .name	    = "statfs",	    .errmsg = true, },
772 	{ .name	    = "swapoff",    .errmsg = true,
773 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
774 	{ .name	    = "swapon",	    .errmsg = true,
775 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
776 	{ .name	    = "symlinkat",  .errmsg = true,
777 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
778 	{ .name	    = "tgkill",	    .errmsg = true,
779 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
780 	{ .name	    = "tkill",	    .errmsg = true,
781 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
782 	{ .name	    = "truncate",   .errmsg = true, },
783 	{ .name	    = "uname",	    .errmsg = true, .alias = "newuname", },
784 	{ .name	    = "unlinkat",   .errmsg = true,
785 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
786 	{ .name	    = "utime",  .errmsg = true, },
787 	{ .name	    = "utimensat",  .errmsg = true,
788 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
789 	{ .name	    = "utimes",  .errmsg = true, },
790 	{ .name	    = "vmsplice",  .errmsg = true, },
791 	{ .name	    = "wait4",	    .errpid = true,
792 	  .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
793 	{ .name	    = "waitid",	    .errpid = true,
794 	  .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
795 	{ .name	    = "write",	    .errmsg = true, },
796 	{ .name	    = "writev",	    .errmsg = true, },
797 };
798 
799 static int syscall_fmt__cmp(const void *name, const void *fmtp)
800 {
801 	const struct syscall_fmt *fmt = fmtp;
802 	return strcmp(name, fmt->name);
803 }
804 
805 static struct syscall_fmt *syscall_fmt__find(const char *name)
806 {
807 	const int nmemb = ARRAY_SIZE(syscall_fmts);
808 	return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
809 }
810 
811 struct syscall {
812 	struct event_format *tp_format;
813 	int		    nr_args;
814 	struct format_field *args;
815 	const char	    *name;
816 	bool		    is_exit;
817 	struct syscall_fmt  *fmt;
818 	size_t		    (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
819 	void		    **arg_parm;
820 };
821 
822 static size_t fprintf_duration(unsigned long t, FILE *fp)
823 {
824 	double duration = (double)t / NSEC_PER_MSEC;
825 	size_t printed = fprintf(fp, "(");
826 
827 	if (duration >= 1.0)
828 		printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
829 	else if (duration >= 0.01)
830 		printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
831 	else
832 		printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
833 	return printed + fprintf(fp, "): ");
834 }
835 
836 /**
837  * filename.ptr: The filename char pointer that will be vfs_getname'd
838  * filename.entry_str_pos: Where to insert the string translated from
839  *                         filename.ptr by the vfs_getname tracepoint/kprobe.
840  */
841 struct thread_trace {
842 	u64		  entry_time;
843 	u64		  exit_time;
844 	bool		  entry_pending;
845 	unsigned long	  nr_events;
846 	unsigned long	  pfmaj, pfmin;
847 	char		  *entry_str;
848 	double		  runtime_ms;
849         struct {
850 		unsigned long ptr;
851 		short int     entry_str_pos;
852 		bool	      pending_open;
853 		unsigned int  namelen;
854 		char	      *name;
855 	} filename;
856 	struct {
857 		int	  max;
858 		char	  **table;
859 	} paths;
860 
861 	struct intlist *syscall_stats;
862 };
863 
864 static struct thread_trace *thread_trace__new(void)
865 {
866 	struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
867 
868 	if (ttrace)
869 		ttrace->paths.max = -1;
870 
871 	ttrace->syscall_stats = intlist__new(NULL);
872 
873 	return ttrace;
874 }
875 
876 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
877 {
878 	struct thread_trace *ttrace;
879 
880 	if (thread == NULL)
881 		goto fail;
882 
883 	if (thread__priv(thread) == NULL)
884 		thread__set_priv(thread, thread_trace__new());
885 
886 	if (thread__priv(thread) == NULL)
887 		goto fail;
888 
889 	ttrace = thread__priv(thread);
890 	++ttrace->nr_events;
891 
892 	return ttrace;
893 fail:
894 	color_fprintf(fp, PERF_COLOR_RED,
895 		      "WARNING: not enough memory, dropping samples!\n");
896 	return NULL;
897 }
898 
899 #define TRACE_PFMAJ		(1 << 0)
900 #define TRACE_PFMIN		(1 << 1)
901 
902 static const size_t trace__entry_str_size = 2048;
903 
904 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
905 {
906 	struct thread_trace *ttrace = thread__priv(thread);
907 
908 	if (fd > ttrace->paths.max) {
909 		char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
910 
911 		if (npath == NULL)
912 			return -1;
913 
914 		if (ttrace->paths.max != -1) {
915 			memset(npath + ttrace->paths.max + 1, 0,
916 			       (fd - ttrace->paths.max) * sizeof(char *));
917 		} else {
918 			memset(npath, 0, (fd + 1) * sizeof(char *));
919 		}
920 
921 		ttrace->paths.table = npath;
922 		ttrace->paths.max   = fd;
923 	}
924 
925 	ttrace->paths.table[fd] = strdup(pathname);
926 
927 	return ttrace->paths.table[fd] != NULL ? 0 : -1;
928 }
929 
930 static int thread__read_fd_path(struct thread *thread, int fd)
931 {
932 	char linkname[PATH_MAX], pathname[PATH_MAX];
933 	struct stat st;
934 	int ret;
935 
936 	if (thread->pid_ == thread->tid) {
937 		scnprintf(linkname, sizeof(linkname),
938 			  "/proc/%d/fd/%d", thread->pid_, fd);
939 	} else {
940 		scnprintf(linkname, sizeof(linkname),
941 			  "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
942 	}
943 
944 	if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
945 		return -1;
946 
947 	ret = readlink(linkname, pathname, sizeof(pathname));
948 
949 	if (ret < 0 || ret > st.st_size)
950 		return -1;
951 
952 	pathname[ret] = '\0';
953 	return trace__set_fd_pathname(thread, fd, pathname);
954 }
955 
956 static const char *thread__fd_path(struct thread *thread, int fd,
957 				   struct trace *trace)
958 {
959 	struct thread_trace *ttrace = thread__priv(thread);
960 
961 	if (ttrace == NULL)
962 		return NULL;
963 
964 	if (fd < 0)
965 		return NULL;
966 
967 	if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
968 		if (!trace->live)
969 			return NULL;
970 		++trace->stats.proc_getname;
971 		if (thread__read_fd_path(thread, fd))
972 			return NULL;
973 	}
974 
975 	return ttrace->paths.table[fd];
976 }
977 
978 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
979 					struct syscall_arg *arg)
980 {
981 	int fd = arg->val;
982 	size_t printed = scnprintf(bf, size, "%d", fd);
983 	const char *path = thread__fd_path(arg->thread, fd, arg->trace);
984 
985 	if (path)
986 		printed += scnprintf(bf + printed, size - printed, "<%s>", path);
987 
988 	return printed;
989 }
990 
991 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
992 					      struct syscall_arg *arg)
993 {
994 	int fd = arg->val;
995 	size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
996 	struct thread_trace *ttrace = thread__priv(arg->thread);
997 
998 	if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
999 		zfree(&ttrace->paths.table[fd]);
1000 
1001 	return printed;
1002 }
1003 
1004 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1005 				     unsigned long ptr)
1006 {
1007 	struct thread_trace *ttrace = thread__priv(thread);
1008 
1009 	ttrace->filename.ptr = ptr;
1010 	ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1011 }
1012 
1013 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1014 					      struct syscall_arg *arg)
1015 {
1016 	unsigned long ptr = arg->val;
1017 
1018 	if (!arg->trace->vfs_getname)
1019 		return scnprintf(bf, size, "%#x", ptr);
1020 
1021 	thread__set_filename_pos(arg->thread, bf, ptr);
1022 	return 0;
1023 }
1024 
1025 static bool trace__filter_duration(struct trace *trace, double t)
1026 {
1027 	return t < (trace->duration_filter * NSEC_PER_MSEC);
1028 }
1029 
1030 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1031 {
1032 	double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1033 
1034 	return fprintf(fp, "%10.3f ", ts);
1035 }
1036 
1037 static bool done = false;
1038 static bool interrupted = false;
1039 
1040 static void sig_handler(int sig)
1041 {
1042 	done = true;
1043 	interrupted = sig == SIGINT;
1044 }
1045 
1046 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1047 					u64 duration, u64 tstamp, FILE *fp)
1048 {
1049 	size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1050 	printed += fprintf_duration(duration, fp);
1051 
1052 	if (trace->multiple_threads) {
1053 		if (trace->show_comm)
1054 			printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1055 		printed += fprintf(fp, "%d ", thread->tid);
1056 	}
1057 
1058 	return printed;
1059 }
1060 
1061 static int trace__process_event(struct trace *trace, struct machine *machine,
1062 				union perf_event *event, struct perf_sample *sample)
1063 {
1064 	int ret = 0;
1065 
1066 	switch (event->header.type) {
1067 	case PERF_RECORD_LOST:
1068 		color_fprintf(trace->output, PERF_COLOR_RED,
1069 			      "LOST %" PRIu64 " events!\n", event->lost.lost);
1070 		ret = machine__process_lost_event(machine, event, sample);
1071 		break;
1072 	default:
1073 		ret = machine__process_event(machine, event, sample);
1074 		break;
1075 	}
1076 
1077 	return ret;
1078 }
1079 
1080 static int trace__tool_process(struct perf_tool *tool,
1081 			       union perf_event *event,
1082 			       struct perf_sample *sample,
1083 			       struct machine *machine)
1084 {
1085 	struct trace *trace = container_of(tool, struct trace, tool);
1086 	return trace__process_event(trace, machine, event, sample);
1087 }
1088 
1089 static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1090 {
1091 	struct machine *machine = vmachine;
1092 
1093 	if (machine->kptr_restrict_warned)
1094 		return NULL;
1095 
1096 	if (symbol_conf.kptr_restrict) {
1097 		pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1098 			   "Check /proc/sys/kernel/kptr_restrict.\n\n"
1099 			   "Kernel samples will not be resolved.\n");
1100 		machine->kptr_restrict_warned = true;
1101 		return NULL;
1102 	}
1103 
1104 	return machine__resolve_kernel_addr(vmachine, addrp, modp);
1105 }
1106 
1107 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1108 {
1109 	int err = symbol__init(NULL);
1110 
1111 	if (err)
1112 		return err;
1113 
1114 	trace->host = machine__new_host();
1115 	if (trace->host == NULL)
1116 		return -ENOMEM;
1117 
1118 	if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
1119 		return -errno;
1120 
1121 	err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1122 					    evlist->threads, trace__tool_process, false,
1123 					    trace->opts.proc_map_timeout);
1124 	if (err)
1125 		symbol__exit();
1126 
1127 	return err;
1128 }
1129 
1130 static int syscall__set_arg_fmts(struct syscall *sc)
1131 {
1132 	struct format_field *field;
1133 	int idx = 0, len;
1134 
1135 	sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1136 	if (sc->arg_scnprintf == NULL)
1137 		return -1;
1138 
1139 	if (sc->fmt)
1140 		sc->arg_parm = sc->fmt->arg_parm;
1141 
1142 	for (field = sc->args; field; field = field->next) {
1143 		if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1144 			sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1145 		else if (strcmp(field->type, "const char *") == 0 &&
1146 			 (strcmp(field->name, "filename") == 0 ||
1147 			  strcmp(field->name, "path") == 0 ||
1148 			  strcmp(field->name, "pathname") == 0))
1149 			sc->arg_scnprintf[idx] = SCA_FILENAME;
1150 		else if (field->flags & FIELD_IS_POINTER)
1151 			sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1152 		else if (strcmp(field->type, "pid_t") == 0)
1153 			sc->arg_scnprintf[idx] = SCA_PID;
1154 		else if (strcmp(field->type, "umode_t") == 0)
1155 			sc->arg_scnprintf[idx] = SCA_MODE_T;
1156 		else if ((strcmp(field->type, "int") == 0 ||
1157 			  strcmp(field->type, "unsigned int") == 0 ||
1158 			  strcmp(field->type, "long") == 0) &&
1159 			 (len = strlen(field->name)) >= 2 &&
1160 			 strcmp(field->name + len - 2, "fd") == 0) {
1161 			/*
1162 			 * /sys/kernel/tracing/events/syscalls/sys_enter*
1163 			 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1164 			 * 65 int
1165 			 * 23 unsigned int
1166 			 * 7 unsigned long
1167 			 */
1168 			sc->arg_scnprintf[idx] = SCA_FD;
1169 		}
1170 		++idx;
1171 	}
1172 
1173 	return 0;
1174 }
1175 
1176 static int trace__read_syscall_info(struct trace *trace, int id)
1177 {
1178 	char tp_name[128];
1179 	struct syscall *sc;
1180 	const char *name = syscalltbl__name(trace->sctbl, id);
1181 
1182 	if (name == NULL)
1183 		return -1;
1184 
1185 	if (id > trace->syscalls.max) {
1186 		struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1187 
1188 		if (nsyscalls == NULL)
1189 			return -1;
1190 
1191 		if (trace->syscalls.max != -1) {
1192 			memset(nsyscalls + trace->syscalls.max + 1, 0,
1193 			       (id - trace->syscalls.max) * sizeof(*sc));
1194 		} else {
1195 			memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1196 		}
1197 
1198 		trace->syscalls.table = nsyscalls;
1199 		trace->syscalls.max   = id;
1200 	}
1201 
1202 	sc = trace->syscalls.table + id;
1203 	sc->name = name;
1204 
1205 	sc->fmt  = syscall_fmt__find(sc->name);
1206 
1207 	snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1208 	sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1209 
1210 	if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1211 		snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1212 		sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1213 	}
1214 
1215 	if (IS_ERR(sc->tp_format))
1216 		return -1;
1217 
1218 	sc->args = sc->tp_format->format.fields;
1219 	sc->nr_args = sc->tp_format->format.nr_fields;
1220 	/*
1221 	 * We need to check and discard the first variable '__syscall_nr'
1222 	 * or 'nr' that mean the syscall number. It is needless here.
1223 	 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1224 	 */
1225 	if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1226 		sc->args = sc->args->next;
1227 		--sc->nr_args;
1228 	}
1229 
1230 	sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1231 
1232 	return syscall__set_arg_fmts(sc);
1233 }
1234 
1235 static int trace__validate_ev_qualifier(struct trace *trace)
1236 {
1237 	int err = 0, i;
1238 	struct str_node *pos;
1239 
1240 	trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1241 	trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1242 						 sizeof(trace->ev_qualifier_ids.entries[0]));
1243 
1244 	if (trace->ev_qualifier_ids.entries == NULL) {
1245 		fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1246 		       trace->output);
1247 		err = -EINVAL;
1248 		goto out;
1249 	}
1250 
1251 	i = 0;
1252 
1253 	strlist__for_each_entry(pos, trace->ev_qualifier) {
1254 		const char *sc = pos->s;
1255 		int id = syscalltbl__id(trace->sctbl, sc);
1256 
1257 		if (id < 0) {
1258 			if (err == 0) {
1259 				fputs("Error:\tInvalid syscall ", trace->output);
1260 				err = -EINVAL;
1261 			} else {
1262 				fputs(", ", trace->output);
1263 			}
1264 
1265 			fputs(sc, trace->output);
1266 		}
1267 
1268 		trace->ev_qualifier_ids.entries[i++] = id;
1269 	}
1270 
1271 	if (err < 0) {
1272 		fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1273 		      "\nHint:\tand: 'man syscalls'\n", trace->output);
1274 		zfree(&trace->ev_qualifier_ids.entries);
1275 		trace->ev_qualifier_ids.nr = 0;
1276 	}
1277 out:
1278 	return err;
1279 }
1280 
1281 /*
1282  * args is to be interpreted as a series of longs but we need to handle
1283  * 8-byte unaligned accesses. args points to raw_data within the event
1284  * and raw_data is guaranteed to be 8-byte unaligned because it is
1285  * preceded by raw_size which is a u32. So we need to copy args to a temp
1286  * variable to read it. Most notably this avoids extended load instructions
1287  * on unaligned addresses
1288  */
1289 
1290 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1291 				      unsigned char *args, struct trace *trace,
1292 				      struct thread *thread)
1293 {
1294 	size_t printed = 0;
1295 	unsigned char *p;
1296 	unsigned long val;
1297 
1298 	if (sc->args != NULL) {
1299 		struct format_field *field;
1300 		u8 bit = 1;
1301 		struct syscall_arg arg = {
1302 			.idx	= 0,
1303 			.mask	= 0,
1304 			.trace  = trace,
1305 			.thread = thread,
1306 		};
1307 
1308 		for (field = sc->args; field;
1309 		     field = field->next, ++arg.idx, bit <<= 1) {
1310 			if (arg.mask & bit)
1311 				continue;
1312 
1313 			/* special care for unaligned accesses */
1314 			p = args + sizeof(unsigned long) * arg.idx;
1315 			memcpy(&val, p, sizeof(val));
1316 
1317 			/*
1318  			 * Suppress this argument if its value is zero and
1319  			 * and we don't have a string associated in an
1320  			 * strarray for it.
1321  			 */
1322 			if (val == 0 &&
1323 			    !(sc->arg_scnprintf &&
1324 			      sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1325 			      sc->arg_parm[arg.idx]))
1326 				continue;
1327 
1328 			printed += scnprintf(bf + printed, size - printed,
1329 					     "%s%s: ", printed ? ", " : "", field->name);
1330 			if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1331 				arg.val = val;
1332 				if (sc->arg_parm)
1333 					arg.parm = sc->arg_parm[arg.idx];
1334 				printed += sc->arg_scnprintf[arg.idx](bf + printed,
1335 								      size - printed, &arg);
1336 			} else {
1337 				printed += scnprintf(bf + printed, size - printed,
1338 						     "%ld", val);
1339 			}
1340 		}
1341 	} else if (IS_ERR(sc->tp_format)) {
1342 		/*
1343 		 * If we managed to read the tracepoint /format file, then we
1344 		 * may end up not having any args, like with gettid(), so only
1345 		 * print the raw args when we didn't manage to read it.
1346 		 */
1347 		int i = 0;
1348 
1349 		while (i < 6) {
1350 			/* special care for unaligned accesses */
1351 			p = args + sizeof(unsigned long) * i;
1352 			memcpy(&val, p, sizeof(val));
1353 			printed += scnprintf(bf + printed, size - printed,
1354 					     "%sarg%d: %ld",
1355 					     printed ? ", " : "", i, val);
1356 			++i;
1357 		}
1358 	}
1359 
1360 	return printed;
1361 }
1362 
1363 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1364 				  union perf_event *event,
1365 				  struct perf_sample *sample);
1366 
1367 static struct syscall *trace__syscall_info(struct trace *trace,
1368 					   struct perf_evsel *evsel, int id)
1369 {
1370 
1371 	if (id < 0) {
1372 
1373 		/*
1374 		 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1375 		 * before that, leaving at a higher verbosity level till that is
1376 		 * explained. Reproduced with plain ftrace with:
1377 		 *
1378 		 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1379 		 * grep "NR -1 " /t/trace_pipe
1380 		 *
1381 		 * After generating some load on the machine.
1382  		 */
1383 		if (verbose > 1) {
1384 			static u64 n;
1385 			fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1386 				id, perf_evsel__name(evsel), ++n);
1387 		}
1388 		return NULL;
1389 	}
1390 
1391 	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1392 	    trace__read_syscall_info(trace, id))
1393 		goto out_cant_read;
1394 
1395 	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1396 		goto out_cant_read;
1397 
1398 	return &trace->syscalls.table[id];
1399 
1400 out_cant_read:
1401 	if (verbose) {
1402 		fprintf(trace->output, "Problems reading syscall %d", id);
1403 		if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1404 			fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1405 		fputs(" information\n", trace->output);
1406 	}
1407 	return NULL;
1408 }
1409 
1410 static void thread__update_stats(struct thread_trace *ttrace,
1411 				 int id, struct perf_sample *sample)
1412 {
1413 	struct int_node *inode;
1414 	struct stats *stats;
1415 	u64 duration = 0;
1416 
1417 	inode = intlist__findnew(ttrace->syscall_stats, id);
1418 	if (inode == NULL)
1419 		return;
1420 
1421 	stats = inode->priv;
1422 	if (stats == NULL) {
1423 		stats = malloc(sizeof(struct stats));
1424 		if (stats == NULL)
1425 			return;
1426 		init_stats(stats);
1427 		inode->priv = stats;
1428 	}
1429 
1430 	if (ttrace->entry_time && sample->time > ttrace->entry_time)
1431 		duration = sample->time - ttrace->entry_time;
1432 
1433 	update_stats(stats, duration);
1434 }
1435 
1436 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1437 {
1438 	struct thread_trace *ttrace;
1439 	u64 duration;
1440 	size_t printed;
1441 
1442 	if (trace->current == NULL)
1443 		return 0;
1444 
1445 	ttrace = thread__priv(trace->current);
1446 
1447 	if (!ttrace->entry_pending)
1448 		return 0;
1449 
1450 	duration = sample->time - ttrace->entry_time;
1451 
1452 	printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1453 	printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1454 	ttrace->entry_pending = false;
1455 
1456 	return printed;
1457 }
1458 
1459 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1460 			    union perf_event *event __maybe_unused,
1461 			    struct perf_sample *sample)
1462 {
1463 	char *msg;
1464 	void *args;
1465 	size_t printed = 0;
1466 	struct thread *thread;
1467 	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1468 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
1469 	struct thread_trace *ttrace;
1470 
1471 	if (sc == NULL)
1472 		return -1;
1473 
1474 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1475 	ttrace = thread__trace(thread, trace->output);
1476 	if (ttrace == NULL)
1477 		goto out_put;
1478 
1479 	args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1480 
1481 	if (ttrace->entry_str == NULL) {
1482 		ttrace->entry_str = malloc(trace__entry_str_size);
1483 		if (!ttrace->entry_str)
1484 			goto out_put;
1485 	}
1486 
1487 	if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
1488 		trace__printf_interrupted_entry(trace, sample);
1489 
1490 	ttrace->entry_time = sample->time;
1491 	msg = ttrace->entry_str;
1492 	printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1493 
1494 	printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1495 					   args, trace, thread);
1496 
1497 	if (sc->is_exit) {
1498 		if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
1499 			trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1500 			fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
1501 		}
1502 	} else {
1503 		ttrace->entry_pending = true;
1504 		/* See trace__vfs_getname & trace__sys_exit */
1505 		ttrace->filename.pending_open = false;
1506 	}
1507 
1508 	if (trace->current != thread) {
1509 		thread__put(trace->current);
1510 		trace->current = thread__get(thread);
1511 	}
1512 	err = 0;
1513 out_put:
1514 	thread__put(thread);
1515 	return err;
1516 }
1517 
1518 static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1519 				    struct perf_sample *sample,
1520 				    struct callchain_cursor *cursor)
1521 {
1522 	struct addr_location al;
1523 
1524 	if (machine__resolve(trace->host, &al, sample) < 0 ||
1525 	    thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1526 		return -1;
1527 
1528 	return 0;
1529 }
1530 
1531 static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1532 {
1533 	/* TODO: user-configurable print_opts */
1534 	const unsigned int print_opts = EVSEL__PRINT_SYM |
1535 				        EVSEL__PRINT_DSO |
1536 				        EVSEL__PRINT_UNKNOWN_AS_ADDR;
1537 
1538 	return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
1539 }
1540 
1541 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1542 			   union perf_event *event __maybe_unused,
1543 			   struct perf_sample *sample)
1544 {
1545 	long ret;
1546 	u64 duration = 0;
1547 	struct thread *thread;
1548 	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
1549 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
1550 	struct thread_trace *ttrace;
1551 
1552 	if (sc == NULL)
1553 		return -1;
1554 
1555 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1556 	ttrace = thread__trace(thread, trace->output);
1557 	if (ttrace == NULL)
1558 		goto out_put;
1559 
1560 	if (trace->summary)
1561 		thread__update_stats(ttrace, id, sample);
1562 
1563 	ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1564 
1565 	if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
1566 		trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1567 		ttrace->filename.pending_open = false;
1568 		++trace->stats.vfs_getname;
1569 	}
1570 
1571 	ttrace->exit_time = sample->time;
1572 
1573 	if (ttrace->entry_time) {
1574 		duration = sample->time - ttrace->entry_time;
1575 		if (trace__filter_duration(trace, duration))
1576 			goto out;
1577 	} else if (trace->duration_filter)
1578 		goto out;
1579 
1580 	if (sample->callchain) {
1581 		callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1582 		if (callchain_ret == 0) {
1583 			if (callchain_cursor.nr < trace->min_stack)
1584 				goto out;
1585 			callchain_ret = 1;
1586 		}
1587 	}
1588 
1589 	if (trace->summary_only)
1590 		goto out;
1591 
1592 	trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1593 
1594 	if (ttrace->entry_pending) {
1595 		fprintf(trace->output, "%-70s", ttrace->entry_str);
1596 	} else {
1597 		fprintf(trace->output, " ... [");
1598 		color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1599 		fprintf(trace->output, "]: %s()", sc->name);
1600 	}
1601 
1602 	if (sc->fmt == NULL) {
1603 signed_print:
1604 		fprintf(trace->output, ") = %ld", ret);
1605 	} else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
1606 		char bf[STRERR_BUFSIZE];
1607 		const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
1608 			   *e = audit_errno_to_name(-ret);
1609 
1610 		fprintf(trace->output, ") = -1 %s %s", e, emsg);
1611 	} else if (ret == 0 && sc->fmt->timeout)
1612 		fprintf(trace->output, ") = 0 Timeout");
1613 	else if (sc->fmt->hexret)
1614 		fprintf(trace->output, ") = %#lx", ret);
1615 	else if (sc->fmt->errpid) {
1616 		struct thread *child = machine__find_thread(trace->host, ret, ret);
1617 
1618 		if (child != NULL) {
1619 			fprintf(trace->output, ") = %ld", ret);
1620 			if (child->comm_set)
1621 				fprintf(trace->output, " (%s)", thread__comm_str(child));
1622 			thread__put(child);
1623 		}
1624 	} else
1625 		goto signed_print;
1626 
1627 	fputc('\n', trace->output);
1628 
1629 	if (callchain_ret > 0)
1630 		trace__fprintf_callchain(trace, sample);
1631 	else if (callchain_ret < 0)
1632 		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1633 out:
1634 	ttrace->entry_pending = false;
1635 	err = 0;
1636 out_put:
1637 	thread__put(thread);
1638 	return err;
1639 }
1640 
1641 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1642 			      union perf_event *event __maybe_unused,
1643 			      struct perf_sample *sample)
1644 {
1645 	struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1646 	struct thread_trace *ttrace;
1647 	size_t filename_len, entry_str_len, to_move;
1648 	ssize_t remaining_space;
1649 	char *pos;
1650 	const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
1651 
1652 	if (!thread)
1653 		goto out;
1654 
1655 	ttrace = thread__priv(thread);
1656 	if (!ttrace)
1657 		goto out;
1658 
1659 	filename_len = strlen(filename);
1660 
1661 	if (ttrace->filename.namelen < filename_len) {
1662 		char *f = realloc(ttrace->filename.name, filename_len + 1);
1663 
1664 		if (f == NULL)
1665 				goto out;
1666 
1667 		ttrace->filename.namelen = filename_len;
1668 		ttrace->filename.name = f;
1669 	}
1670 
1671 	strcpy(ttrace->filename.name, filename);
1672 	ttrace->filename.pending_open = true;
1673 
1674 	if (!ttrace->filename.ptr)
1675 		goto out;
1676 
1677 	entry_str_len = strlen(ttrace->entry_str);
1678 	remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1679 	if (remaining_space <= 0)
1680 		goto out;
1681 
1682 	if (filename_len > (size_t)remaining_space) {
1683 		filename += filename_len - remaining_space;
1684 		filename_len = remaining_space;
1685 	}
1686 
1687 	to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1688 	pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1689 	memmove(pos + filename_len, pos, to_move);
1690 	memcpy(pos, filename, filename_len);
1691 
1692 	ttrace->filename.ptr = 0;
1693 	ttrace->filename.entry_str_pos = 0;
1694 out:
1695 	return 0;
1696 }
1697 
1698 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1699 				     union perf_event *event __maybe_unused,
1700 				     struct perf_sample *sample)
1701 {
1702         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1703 	double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1704 	struct thread *thread = machine__findnew_thread(trace->host,
1705 							sample->pid,
1706 							sample->tid);
1707 	struct thread_trace *ttrace = thread__trace(thread, trace->output);
1708 
1709 	if (ttrace == NULL)
1710 		goto out_dump;
1711 
1712 	ttrace->runtime_ms += runtime_ms;
1713 	trace->runtime_ms += runtime_ms;
1714 	thread__put(thread);
1715 	return 0;
1716 
1717 out_dump:
1718 	fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1719 	       evsel->name,
1720 	       perf_evsel__strval(evsel, sample, "comm"),
1721 	       (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1722 	       runtime,
1723 	       perf_evsel__intval(evsel, sample, "vruntime"));
1724 	thread__put(thread);
1725 	return 0;
1726 }
1727 
1728 static void bpf_output__printer(enum binary_printer_ops op,
1729 				unsigned int val, void *extra)
1730 {
1731 	FILE *output = extra;
1732 	unsigned char ch = (unsigned char)val;
1733 
1734 	switch (op) {
1735 	case BINARY_PRINT_CHAR_DATA:
1736 		fprintf(output, "%c", isprint(ch) ? ch : '.');
1737 		break;
1738 	case BINARY_PRINT_DATA_BEGIN:
1739 	case BINARY_PRINT_LINE_BEGIN:
1740 	case BINARY_PRINT_ADDR:
1741 	case BINARY_PRINT_NUM_DATA:
1742 	case BINARY_PRINT_NUM_PAD:
1743 	case BINARY_PRINT_SEP:
1744 	case BINARY_PRINT_CHAR_PAD:
1745 	case BINARY_PRINT_LINE_END:
1746 	case BINARY_PRINT_DATA_END:
1747 	default:
1748 		break;
1749 	}
1750 }
1751 
1752 static void bpf_output__fprintf(struct trace *trace,
1753 				struct perf_sample *sample)
1754 {
1755 	print_binary(sample->raw_data, sample->raw_size, 8,
1756 		     bpf_output__printer, trace->output);
1757 }
1758 
1759 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1760 				union perf_event *event __maybe_unused,
1761 				struct perf_sample *sample)
1762 {
1763 	int callchain_ret = 0;
1764 
1765 	if (sample->callchain) {
1766 		callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1767 		if (callchain_ret == 0) {
1768 			if (callchain_cursor.nr < trace->min_stack)
1769 				goto out;
1770 			callchain_ret = 1;
1771 		}
1772 	}
1773 
1774 	trace__printf_interrupted_entry(trace, sample);
1775 	trace__fprintf_tstamp(trace, sample->time, trace->output);
1776 
1777 	if (trace->trace_syscalls)
1778 		fprintf(trace->output, "(         ): ");
1779 
1780 	fprintf(trace->output, "%s:", evsel->name);
1781 
1782 	if (perf_evsel__is_bpf_output(evsel)) {
1783 		bpf_output__fprintf(trace, sample);
1784 	} else if (evsel->tp_format) {
1785 		event_format__fprintf(evsel->tp_format, sample->cpu,
1786 				      sample->raw_data, sample->raw_size,
1787 				      trace->output);
1788 	}
1789 
1790 	fprintf(trace->output, ")\n");
1791 
1792 	if (callchain_ret > 0)
1793 		trace__fprintf_callchain(trace, sample);
1794 	else if (callchain_ret < 0)
1795 		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1796 out:
1797 	return 0;
1798 }
1799 
1800 static void print_location(FILE *f, struct perf_sample *sample,
1801 			   struct addr_location *al,
1802 			   bool print_dso, bool print_sym)
1803 {
1804 
1805 	if ((verbose || print_dso) && al->map)
1806 		fprintf(f, "%s@", al->map->dso->long_name);
1807 
1808 	if ((verbose || print_sym) && al->sym)
1809 		fprintf(f, "%s+0x%" PRIx64, al->sym->name,
1810 			al->addr - al->sym->start);
1811 	else if (al->map)
1812 		fprintf(f, "0x%" PRIx64, al->addr);
1813 	else
1814 		fprintf(f, "0x%" PRIx64, sample->addr);
1815 }
1816 
1817 static int trace__pgfault(struct trace *trace,
1818 			  struct perf_evsel *evsel,
1819 			  union perf_event *event __maybe_unused,
1820 			  struct perf_sample *sample)
1821 {
1822 	struct thread *thread;
1823 	struct addr_location al;
1824 	char map_type = 'd';
1825 	struct thread_trace *ttrace;
1826 	int err = -1;
1827 	int callchain_ret = 0;
1828 
1829 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1830 
1831 	if (sample->callchain) {
1832 		callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1833 		if (callchain_ret == 0) {
1834 			if (callchain_cursor.nr < trace->min_stack)
1835 				goto out_put;
1836 			callchain_ret = 1;
1837 		}
1838 	}
1839 
1840 	ttrace = thread__trace(thread, trace->output);
1841 	if (ttrace == NULL)
1842 		goto out_put;
1843 
1844 	if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1845 		ttrace->pfmaj++;
1846 	else
1847 		ttrace->pfmin++;
1848 
1849 	if (trace->summary_only)
1850 		goto out;
1851 
1852 	thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
1853 			      sample->ip, &al);
1854 
1855 	trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
1856 
1857 	fprintf(trace->output, "%sfault [",
1858 		evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1859 		"maj" : "min");
1860 
1861 	print_location(trace->output, sample, &al, false, true);
1862 
1863 	fprintf(trace->output, "] => ");
1864 
1865 	thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
1866 				   sample->addr, &al);
1867 
1868 	if (!al.map) {
1869 		thread__find_addr_location(thread, sample->cpumode,
1870 					   MAP__FUNCTION, sample->addr, &al);
1871 
1872 		if (al.map)
1873 			map_type = 'x';
1874 		else
1875 			map_type = '?';
1876 	}
1877 
1878 	print_location(trace->output, sample, &al, true, false);
1879 
1880 	fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1881 
1882 	if (callchain_ret > 0)
1883 		trace__fprintf_callchain(trace, sample);
1884 	else if (callchain_ret < 0)
1885 		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1886 out:
1887 	err = 0;
1888 out_put:
1889 	thread__put(thread);
1890 	return err;
1891 }
1892 
1893 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1894 {
1895 	if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1896 	    (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1897 		return false;
1898 
1899 	if (trace->pid_list || trace->tid_list)
1900 		return true;
1901 
1902 	return false;
1903 }
1904 
1905 static void trace__set_base_time(struct trace *trace,
1906 				 struct perf_evsel *evsel,
1907 				 struct perf_sample *sample)
1908 {
1909 	/*
1910 	 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
1911 	 * and don't use sample->time unconditionally, we may end up having
1912 	 * some other event in the future without PERF_SAMPLE_TIME for good
1913 	 * reason, i.e. we may not be interested in its timestamps, just in
1914 	 * it taking place, picking some piece of information when it
1915 	 * appears in our event stream (vfs_getname comes to mind).
1916 	 */
1917 	if (trace->base_time == 0 && !trace->full_time &&
1918 	    (evsel->attr.sample_type & PERF_SAMPLE_TIME))
1919 		trace->base_time = sample->time;
1920 }
1921 
1922 static int trace__process_sample(struct perf_tool *tool,
1923 				 union perf_event *event,
1924 				 struct perf_sample *sample,
1925 				 struct perf_evsel *evsel,
1926 				 struct machine *machine __maybe_unused)
1927 {
1928 	struct trace *trace = container_of(tool, struct trace, tool);
1929 	int err = 0;
1930 
1931 	tracepoint_handler handler = evsel->handler;
1932 
1933 	if (skip_sample(trace, sample))
1934 		return 0;
1935 
1936 	trace__set_base_time(trace, evsel, sample);
1937 
1938 	if (handler) {
1939 		++trace->nr_events;
1940 		handler(trace, evsel, event, sample);
1941 	}
1942 
1943 	return err;
1944 }
1945 
1946 static int parse_target_str(struct trace *trace)
1947 {
1948 	if (trace->opts.target.pid) {
1949 		trace->pid_list = intlist__new(trace->opts.target.pid);
1950 		if (trace->pid_list == NULL) {
1951 			pr_err("Error parsing process id string\n");
1952 			return -EINVAL;
1953 		}
1954 	}
1955 
1956 	if (trace->opts.target.tid) {
1957 		trace->tid_list = intlist__new(trace->opts.target.tid);
1958 		if (trace->tid_list == NULL) {
1959 			pr_err("Error parsing thread id string\n");
1960 			return -EINVAL;
1961 		}
1962 	}
1963 
1964 	return 0;
1965 }
1966 
1967 static int trace__record(struct trace *trace, int argc, const char **argv)
1968 {
1969 	unsigned int rec_argc, i, j;
1970 	const char **rec_argv;
1971 	const char * const record_args[] = {
1972 		"record",
1973 		"-R",
1974 		"-m", "1024",
1975 		"-c", "1",
1976 	};
1977 
1978 	const char * const sc_args[] = { "-e", };
1979 	unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
1980 	const char * const majpf_args[] = { "-e", "major-faults" };
1981 	unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
1982 	const char * const minpf_args[] = { "-e", "minor-faults" };
1983 	unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
1984 
1985 	/* +1 is for the event string below */
1986 	rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
1987 		majpf_args_nr + minpf_args_nr + argc;
1988 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
1989 
1990 	if (rec_argv == NULL)
1991 		return -ENOMEM;
1992 
1993 	j = 0;
1994 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
1995 		rec_argv[j++] = record_args[i];
1996 
1997 	if (trace->trace_syscalls) {
1998 		for (i = 0; i < sc_args_nr; i++)
1999 			rec_argv[j++] = sc_args[i];
2000 
2001 		/* event string may be different for older kernels - e.g., RHEL6 */
2002 		if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2003 			rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2004 		else if (is_valid_tracepoint("syscalls:sys_enter"))
2005 			rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2006 		else {
2007 			pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2008 			return -1;
2009 		}
2010 	}
2011 
2012 	if (trace->trace_pgfaults & TRACE_PFMAJ)
2013 		for (i = 0; i < majpf_args_nr; i++)
2014 			rec_argv[j++] = majpf_args[i];
2015 
2016 	if (trace->trace_pgfaults & TRACE_PFMIN)
2017 		for (i = 0; i < minpf_args_nr; i++)
2018 			rec_argv[j++] = minpf_args[i];
2019 
2020 	for (i = 0; i < (unsigned int)argc; i++)
2021 		rec_argv[j++] = argv[i];
2022 
2023 	return cmd_record(j, rec_argv, NULL);
2024 }
2025 
2026 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2027 
2028 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2029 {
2030 	struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2031 
2032 	if (IS_ERR(evsel))
2033 		return false;
2034 
2035 	if (perf_evsel__field(evsel, "pathname") == NULL) {
2036 		perf_evsel__delete(evsel);
2037 		return false;
2038 	}
2039 
2040 	evsel->handler = trace__vfs_getname;
2041 	perf_evlist__add(evlist, evsel);
2042 	return true;
2043 }
2044 
2045 static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
2046 {
2047 	struct perf_evsel *evsel;
2048 	struct perf_event_attr attr = {
2049 		.type = PERF_TYPE_SOFTWARE,
2050 		.mmap_data = 1,
2051 	};
2052 
2053 	attr.config = config;
2054 	attr.sample_period = 1;
2055 
2056 	event_attr_init(&attr);
2057 
2058 	evsel = perf_evsel__new(&attr);
2059 	if (evsel)
2060 		evsel->handler = trace__pgfault;
2061 
2062 	return evsel;
2063 }
2064 
2065 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2066 {
2067 	const u32 type = event->header.type;
2068 	struct perf_evsel *evsel;
2069 
2070 	if (type != PERF_RECORD_SAMPLE) {
2071 		trace__process_event(trace, trace->host, event, sample);
2072 		return;
2073 	}
2074 
2075 	evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2076 	if (evsel == NULL) {
2077 		fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2078 		return;
2079 	}
2080 
2081 	trace__set_base_time(trace, evsel, sample);
2082 
2083 	if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2084 	    sample->raw_data == NULL) {
2085 		fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2086 		       perf_evsel__name(evsel), sample->tid,
2087 		       sample->cpu, sample->raw_size);
2088 	} else {
2089 		tracepoint_handler handler = evsel->handler;
2090 		handler(trace, evsel, event, sample);
2091 	}
2092 }
2093 
2094 static int trace__add_syscall_newtp(struct trace *trace)
2095 {
2096 	int ret = -1;
2097 	struct perf_evlist *evlist = trace->evlist;
2098 	struct perf_evsel *sys_enter, *sys_exit;
2099 
2100 	sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2101 	if (sys_enter == NULL)
2102 		goto out;
2103 
2104 	if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2105 		goto out_delete_sys_enter;
2106 
2107 	sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2108 	if (sys_exit == NULL)
2109 		goto out_delete_sys_enter;
2110 
2111 	if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2112 		goto out_delete_sys_exit;
2113 
2114 	perf_evlist__add(evlist, sys_enter);
2115 	perf_evlist__add(evlist, sys_exit);
2116 
2117 	if (callchain_param.enabled && !trace->kernel_syscallchains) {
2118 		/*
2119 		 * We're interested only in the user space callchain
2120 		 * leading to the syscall, allow overriding that for
2121 		 * debugging reasons using --kernel_syscall_callchains
2122 		 */
2123 		sys_exit->attr.exclude_callchain_kernel = 1;
2124 	}
2125 
2126 	trace->syscalls.events.sys_enter = sys_enter;
2127 	trace->syscalls.events.sys_exit  = sys_exit;
2128 
2129 	ret = 0;
2130 out:
2131 	return ret;
2132 
2133 out_delete_sys_exit:
2134 	perf_evsel__delete_priv(sys_exit);
2135 out_delete_sys_enter:
2136 	perf_evsel__delete_priv(sys_enter);
2137 	goto out;
2138 }
2139 
2140 static int trace__set_ev_qualifier_filter(struct trace *trace)
2141 {
2142 	int err = -1;
2143 	char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2144 						trace->ev_qualifier_ids.nr,
2145 						trace->ev_qualifier_ids.entries);
2146 
2147 	if (filter == NULL)
2148 		goto out_enomem;
2149 
2150 	if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2151 		err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2152 
2153 	free(filter);
2154 out:
2155 	return err;
2156 out_enomem:
2157 	errno = ENOMEM;
2158 	goto out;
2159 }
2160 
2161 static int trace__run(struct trace *trace, int argc, const char **argv)
2162 {
2163 	struct perf_evlist *evlist = trace->evlist;
2164 	struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
2165 	int err = -1, i;
2166 	unsigned long before;
2167 	const bool forks = argc > 0;
2168 	bool draining = false;
2169 
2170 	trace->live = true;
2171 
2172 	if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2173 		goto out_error_raw_syscalls;
2174 
2175 	if (trace->trace_syscalls)
2176 		trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2177 
2178 	if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2179 		pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2180 		if (pgfault_maj == NULL)
2181 			goto out_error_mem;
2182 		perf_evlist__add(evlist, pgfault_maj);
2183 	}
2184 
2185 	if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2186 		pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2187 		if (pgfault_min == NULL)
2188 			goto out_error_mem;
2189 		perf_evlist__add(evlist, pgfault_min);
2190 	}
2191 
2192 	if (trace->sched &&
2193 	    perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2194 				   trace__sched_stat_runtime))
2195 		goto out_error_sched_stat_runtime;
2196 
2197 	err = perf_evlist__create_maps(evlist, &trace->opts.target);
2198 	if (err < 0) {
2199 		fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2200 		goto out_delete_evlist;
2201 	}
2202 
2203 	err = trace__symbols_init(trace, evlist);
2204 	if (err < 0) {
2205 		fprintf(trace->output, "Problems initializing symbol libraries!\n");
2206 		goto out_delete_evlist;
2207 	}
2208 
2209 	perf_evlist__config(evlist, &trace->opts, NULL);
2210 
2211 	if (callchain_param.enabled) {
2212 		bool use_identifier = false;
2213 
2214 		if (trace->syscalls.events.sys_exit) {
2215 			perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2216 						     &trace->opts, &callchain_param);
2217 			use_identifier = true;
2218 		}
2219 
2220 		if (pgfault_maj) {
2221 			perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2222 			use_identifier = true;
2223 		}
2224 
2225 		if (pgfault_min) {
2226 			perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2227 			use_identifier = true;
2228 		}
2229 
2230 		if (use_identifier) {
2231 		       /*
2232 			* Now we have evsels with different sample_ids, use
2233 			* PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2234 			* from a fixed position in each ring buffer record.
2235 			*
2236 			* As of this the changeset introducing this comment, this
2237 			* isn't strictly needed, as the fields that can come before
2238 			* PERF_SAMPLE_ID are all used, but we'll probably disable
2239 			* some of those for things like copying the payload of
2240 			* pointer syscall arguments, and for vfs_getname we don't
2241 			* need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2242 			* here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2243 			*/
2244 			perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2245 			perf_evlist__reset_sample_bit(evlist, ID);
2246 		}
2247 	}
2248 
2249 	signal(SIGCHLD, sig_handler);
2250 	signal(SIGINT, sig_handler);
2251 
2252 	if (forks) {
2253 		err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2254 						    argv, false, NULL);
2255 		if (err < 0) {
2256 			fprintf(trace->output, "Couldn't run the workload!\n");
2257 			goto out_delete_evlist;
2258 		}
2259 	}
2260 
2261 	err = perf_evlist__open(evlist);
2262 	if (err < 0)
2263 		goto out_error_open;
2264 
2265 	err = bpf__apply_obj_config();
2266 	if (err) {
2267 		char errbuf[BUFSIZ];
2268 
2269 		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2270 		pr_err("ERROR: Apply config to BPF failed: %s\n",
2271 			 errbuf);
2272 		goto out_error_open;
2273 	}
2274 
2275 	/*
2276 	 * Better not use !target__has_task() here because we need to cover the
2277 	 * case where no threads were specified in the command line, but a
2278 	 * workload was, and in that case we will fill in the thread_map when
2279 	 * we fork the workload in perf_evlist__prepare_workload.
2280 	 */
2281 	if (trace->filter_pids.nr > 0)
2282 		err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2283 	else if (thread_map__pid(evlist->threads, 0) == -1)
2284 		err = perf_evlist__set_filter_pid(evlist, getpid());
2285 
2286 	if (err < 0)
2287 		goto out_error_mem;
2288 
2289 	if (trace->ev_qualifier_ids.nr > 0) {
2290 		err = trace__set_ev_qualifier_filter(trace);
2291 		if (err < 0)
2292 			goto out_errno;
2293 
2294 		pr_debug("event qualifier tracepoint filter: %s\n",
2295 			 trace->syscalls.events.sys_exit->filter);
2296 	}
2297 
2298 	err = perf_evlist__apply_filters(evlist, &evsel);
2299 	if (err < 0)
2300 		goto out_error_apply_filters;
2301 
2302 	err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2303 	if (err < 0)
2304 		goto out_error_mmap;
2305 
2306 	if (!target__none(&trace->opts.target))
2307 		perf_evlist__enable(evlist);
2308 
2309 	if (forks)
2310 		perf_evlist__start_workload(evlist);
2311 
2312 	trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2313 				  evlist->threads->nr > 1 ||
2314 				  perf_evlist__first(evlist)->attr.inherit;
2315 again:
2316 	before = trace->nr_events;
2317 
2318 	for (i = 0; i < evlist->nr_mmaps; i++) {
2319 		union perf_event *event;
2320 
2321 		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2322 			struct perf_sample sample;
2323 
2324 			++trace->nr_events;
2325 
2326 			err = perf_evlist__parse_sample(evlist, event, &sample);
2327 			if (err) {
2328 				fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2329 				goto next_event;
2330 			}
2331 
2332 			trace__handle_event(trace, event, &sample);
2333 next_event:
2334 			perf_evlist__mmap_consume(evlist, i);
2335 
2336 			if (interrupted)
2337 				goto out_disable;
2338 
2339 			if (done && !draining) {
2340 				perf_evlist__disable(evlist);
2341 				draining = true;
2342 			}
2343 		}
2344 	}
2345 
2346 	if (trace->nr_events == before) {
2347 		int timeout = done ? 100 : -1;
2348 
2349 		if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2350 			if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2351 				draining = true;
2352 
2353 			goto again;
2354 		}
2355 	} else {
2356 		goto again;
2357 	}
2358 
2359 out_disable:
2360 	thread__zput(trace->current);
2361 
2362 	perf_evlist__disable(evlist);
2363 
2364 	if (!err) {
2365 		if (trace->summary)
2366 			trace__fprintf_thread_summary(trace, trace->output);
2367 
2368 		if (trace->show_tool_stats) {
2369 			fprintf(trace->output, "Stats:\n "
2370 					       " vfs_getname : %" PRIu64 "\n"
2371 					       " proc_getname: %" PRIu64 "\n",
2372 				trace->stats.vfs_getname,
2373 				trace->stats.proc_getname);
2374 		}
2375 	}
2376 
2377 out_delete_evlist:
2378 	perf_evlist__delete(evlist);
2379 	trace->evlist = NULL;
2380 	trace->live = false;
2381 	return err;
2382 {
2383 	char errbuf[BUFSIZ];
2384 
2385 out_error_sched_stat_runtime:
2386 	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2387 	goto out_error;
2388 
2389 out_error_raw_syscalls:
2390 	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2391 	goto out_error;
2392 
2393 out_error_mmap:
2394 	perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2395 	goto out_error;
2396 
2397 out_error_open:
2398 	perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2399 
2400 out_error:
2401 	fprintf(trace->output, "%s\n", errbuf);
2402 	goto out_delete_evlist;
2403 
2404 out_error_apply_filters:
2405 	fprintf(trace->output,
2406 		"Failed to set filter \"%s\" on event %s with %d (%s)\n",
2407 		evsel->filter, perf_evsel__name(evsel), errno,
2408 		str_error_r(errno, errbuf, sizeof(errbuf)));
2409 	goto out_delete_evlist;
2410 }
2411 out_error_mem:
2412 	fprintf(trace->output, "Not enough memory to run!\n");
2413 	goto out_delete_evlist;
2414 
2415 out_errno:
2416 	fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2417 	goto out_delete_evlist;
2418 }
2419 
2420 static int trace__replay(struct trace *trace)
2421 {
2422 	const struct perf_evsel_str_handler handlers[] = {
2423 		{ "probe:vfs_getname",	     trace__vfs_getname, },
2424 	};
2425 	struct perf_data_file file = {
2426 		.path  = input_name,
2427 		.mode  = PERF_DATA_MODE_READ,
2428 		.force = trace->force,
2429 	};
2430 	struct perf_session *session;
2431 	struct perf_evsel *evsel;
2432 	int err = -1;
2433 
2434 	trace->tool.sample	  = trace__process_sample;
2435 	trace->tool.mmap	  = perf_event__process_mmap;
2436 	trace->tool.mmap2	  = perf_event__process_mmap2;
2437 	trace->tool.comm	  = perf_event__process_comm;
2438 	trace->tool.exit	  = perf_event__process_exit;
2439 	trace->tool.fork	  = perf_event__process_fork;
2440 	trace->tool.attr	  = perf_event__process_attr;
2441 	trace->tool.tracing_data = perf_event__process_tracing_data;
2442 	trace->tool.build_id	  = perf_event__process_build_id;
2443 
2444 	trace->tool.ordered_events = true;
2445 	trace->tool.ordering_requires_timestamps = true;
2446 
2447 	/* add tid to output */
2448 	trace->multiple_threads = true;
2449 
2450 	session = perf_session__new(&file, false, &trace->tool);
2451 	if (session == NULL)
2452 		return -1;
2453 
2454 	if (symbol__init(&session->header.env) < 0)
2455 		goto out;
2456 
2457 	trace->host = &session->machines.host;
2458 
2459 	err = perf_session__set_tracepoints_handlers(session, handlers);
2460 	if (err)
2461 		goto out;
2462 
2463 	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2464 						     "raw_syscalls:sys_enter");
2465 	/* older kernels have syscalls tp versus raw_syscalls */
2466 	if (evsel == NULL)
2467 		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2468 							     "syscalls:sys_enter");
2469 
2470 	if (evsel &&
2471 	    (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2472 	    perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2473 		pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2474 		goto out;
2475 	}
2476 
2477 	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2478 						     "raw_syscalls:sys_exit");
2479 	if (evsel == NULL)
2480 		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2481 							     "syscalls:sys_exit");
2482 	if (evsel &&
2483 	    (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2484 	    perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2485 		pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2486 		goto out;
2487 	}
2488 
2489 	evlist__for_each_entry(session->evlist, evsel) {
2490 		if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2491 		    (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2492 		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2493 		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2494 			evsel->handler = trace__pgfault;
2495 	}
2496 
2497 	err = parse_target_str(trace);
2498 	if (err != 0)
2499 		goto out;
2500 
2501 	setup_pager();
2502 
2503 	err = perf_session__process_events(session);
2504 	if (err)
2505 		pr_err("Failed to process events, error %d", err);
2506 
2507 	else if (trace->summary)
2508 		trace__fprintf_thread_summary(trace, trace->output);
2509 
2510 out:
2511 	perf_session__delete(session);
2512 
2513 	return err;
2514 }
2515 
2516 static size_t trace__fprintf_threads_header(FILE *fp)
2517 {
2518 	size_t printed;
2519 
2520 	printed  = fprintf(fp, "\n Summary of events:\n\n");
2521 
2522 	return printed;
2523 }
2524 
2525 DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2526 	struct stats 	*stats;
2527 	double		msecs;
2528 	int		syscall;
2529 )
2530 {
2531 	struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2532 	struct stats *stats = source->priv;
2533 
2534 	entry->syscall = source->i;
2535 	entry->stats   = stats;
2536 	entry->msecs   = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2537 }
2538 
2539 static size_t thread__dump_stats(struct thread_trace *ttrace,
2540 				 struct trace *trace, FILE *fp)
2541 {
2542 	size_t printed = 0;
2543 	struct syscall *sc;
2544 	struct rb_node *nd;
2545 	DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
2546 
2547 	if (syscall_stats == NULL)
2548 		return 0;
2549 
2550 	printed += fprintf(fp, "\n");
2551 
2552 	printed += fprintf(fp, "   syscall            calls    total       min       avg       max      stddev\n");
2553 	printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
2554 	printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
2555 
2556 	resort_rb__for_each_entry(nd, syscall_stats) {
2557 		struct stats *stats = syscall_stats_entry->stats;
2558 		if (stats) {
2559 			double min = (double)(stats->min) / NSEC_PER_MSEC;
2560 			double max = (double)(stats->max) / NSEC_PER_MSEC;
2561 			double avg = avg_stats(stats);
2562 			double pct;
2563 			u64 n = (u64) stats->n;
2564 
2565 			pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2566 			avg /= NSEC_PER_MSEC;
2567 
2568 			sc = &trace->syscalls.table[syscall_stats_entry->syscall];
2569 			printed += fprintf(fp, "   %-15s", sc->name);
2570 			printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2571 					   n, syscall_stats_entry->msecs, min, avg);
2572 			printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2573 		}
2574 	}
2575 
2576 	resort_rb__delete(syscall_stats);
2577 	printed += fprintf(fp, "\n\n");
2578 
2579 	return printed;
2580 }
2581 
2582 static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
2583 {
2584 	size_t printed = 0;
2585 	struct thread_trace *ttrace = thread__priv(thread);
2586 	double ratio;
2587 
2588 	if (ttrace == NULL)
2589 		return 0;
2590 
2591 	ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2592 
2593 	printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2594 	printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2595 	printed += fprintf(fp, "%.1f%%", ratio);
2596 	if (ttrace->pfmaj)
2597 		printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2598 	if (ttrace->pfmin)
2599 		printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2600 	if (trace->sched)
2601 		printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2602 	else if (fputc('\n', fp) != EOF)
2603 		++printed;
2604 
2605 	printed += thread__dump_stats(ttrace, trace, fp);
2606 
2607 	return printed;
2608 }
2609 
2610 static unsigned long thread__nr_events(struct thread_trace *ttrace)
2611 {
2612 	return ttrace ? ttrace->nr_events : 0;
2613 }
2614 
2615 DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2616 	struct thread *thread;
2617 )
2618 {
2619 	entry->thread = rb_entry(nd, struct thread, rb_node);
2620 }
2621 
2622 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2623 {
2624 	DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
2625 	size_t printed = trace__fprintf_threads_header(fp);
2626 	struct rb_node *nd;
2627 
2628 	if (threads == NULL) {
2629 		fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2630 		return 0;
2631 	}
2632 
2633 	resort_rb__for_each_entry(nd, threads)
2634 		printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
2635 
2636 	resort_rb__delete(threads);
2637 
2638 	return printed;
2639 }
2640 
2641 static int trace__set_duration(const struct option *opt, const char *str,
2642 			       int unset __maybe_unused)
2643 {
2644 	struct trace *trace = opt->value;
2645 
2646 	trace->duration_filter = atof(str);
2647 	return 0;
2648 }
2649 
2650 static int trace__set_filter_pids(const struct option *opt, const char *str,
2651 				  int unset __maybe_unused)
2652 {
2653 	int ret = -1;
2654 	size_t i;
2655 	struct trace *trace = opt->value;
2656 	/*
2657 	 * FIXME: introduce a intarray class, plain parse csv and create a
2658 	 * { int nr, int entries[] } struct...
2659 	 */
2660 	struct intlist *list = intlist__new(str);
2661 
2662 	if (list == NULL)
2663 		return -1;
2664 
2665 	i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2666 	trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2667 
2668 	if (trace->filter_pids.entries == NULL)
2669 		goto out;
2670 
2671 	trace->filter_pids.entries[0] = getpid();
2672 
2673 	for (i = 1; i < trace->filter_pids.nr; ++i)
2674 		trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2675 
2676 	intlist__delete(list);
2677 	ret = 0;
2678 out:
2679 	return ret;
2680 }
2681 
2682 static int trace__open_output(struct trace *trace, const char *filename)
2683 {
2684 	struct stat st;
2685 
2686 	if (!stat(filename, &st) && st.st_size) {
2687 		char oldname[PATH_MAX];
2688 
2689 		scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2690 		unlink(oldname);
2691 		rename(filename, oldname);
2692 	}
2693 
2694 	trace->output = fopen(filename, "w");
2695 
2696 	return trace->output == NULL ? -errno : 0;
2697 }
2698 
2699 static int parse_pagefaults(const struct option *opt, const char *str,
2700 			    int unset __maybe_unused)
2701 {
2702 	int *trace_pgfaults = opt->value;
2703 
2704 	if (strcmp(str, "all") == 0)
2705 		*trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2706 	else if (strcmp(str, "maj") == 0)
2707 		*trace_pgfaults |= TRACE_PFMAJ;
2708 	else if (strcmp(str, "min") == 0)
2709 		*trace_pgfaults |= TRACE_PFMIN;
2710 	else
2711 		return -1;
2712 
2713 	return 0;
2714 }
2715 
2716 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2717 {
2718 	struct perf_evsel *evsel;
2719 
2720 	evlist__for_each_entry(evlist, evsel)
2721 		evsel->handler = handler;
2722 }
2723 
2724 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2725 {
2726 	const char *trace_usage[] = {
2727 		"perf trace [<options>] [<command>]",
2728 		"perf trace [<options>] -- <command> [<options>]",
2729 		"perf trace record [<options>] [<command>]",
2730 		"perf trace record [<options>] -- <command> [<options>]",
2731 		NULL
2732 	};
2733 	struct trace trace = {
2734 		.syscalls = {
2735 			. max = -1,
2736 		},
2737 		.opts = {
2738 			.target = {
2739 				.uid	   = UINT_MAX,
2740 				.uses_mmap = true,
2741 			},
2742 			.user_freq     = UINT_MAX,
2743 			.user_interval = ULLONG_MAX,
2744 			.no_buffering  = true,
2745 			.mmap_pages    = UINT_MAX,
2746 			.proc_map_timeout  = 500,
2747 		},
2748 		.output = stderr,
2749 		.show_comm = true,
2750 		.trace_syscalls = true,
2751 		.kernel_syscallchains = false,
2752 		.max_stack = UINT_MAX,
2753 	};
2754 	const char *output_name = NULL;
2755 	const char *ev_qualifier_str = NULL;
2756 	const struct option trace_options[] = {
2757 	OPT_CALLBACK(0, "event", &trace.evlist, "event",
2758 		     "event selector. use 'perf list' to list available events",
2759 		     parse_events_option),
2760 	OPT_BOOLEAN(0, "comm", &trace.show_comm,
2761 		    "show the thread COMM next to its id"),
2762 	OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2763 	OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
2764 	OPT_STRING('o', "output", &output_name, "file", "output file name"),
2765 	OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2766 	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2767 		    "trace events on existing process id"),
2768 	OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2769 		    "trace events on existing thread id"),
2770 	OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2771 		     "pids to filter (by the kernel)", trace__set_filter_pids),
2772 	OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2773 		    "system-wide collection from all CPUs"),
2774 	OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2775 		    "list of cpus to monitor"),
2776 	OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2777 		    "child tasks do not inherit counters"),
2778 	OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2779 		     "number of mmap data pages",
2780 		     perf_evlist__parse_mmap_pages),
2781 	OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2782 		   "user to profile"),
2783 	OPT_CALLBACK(0, "duration", &trace, "float",
2784 		     "show only events with duration > N.M ms",
2785 		     trace__set_duration),
2786 	OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2787 	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2788 	OPT_BOOLEAN('T', "time", &trace.full_time,
2789 		    "Show full timestamp, not time relative to first start"),
2790 	OPT_BOOLEAN('s', "summary", &trace.summary_only,
2791 		    "Show only syscall summary with statistics"),
2792 	OPT_BOOLEAN('S', "with-summary", &trace.summary,
2793 		    "Show all syscalls and summary with statistics"),
2794 	OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2795 		     "Trace pagefaults", parse_pagefaults, "maj"),
2796 	OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2797 	OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
2798 	OPT_CALLBACK(0, "call-graph", &trace.opts,
2799 		     "record_mode[,record_size]", record_callchain_help,
2800 		     &record_parse_callchain_opt),
2801 	OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
2802 		    "Show the kernel callchains on the syscall exit path"),
2803 	OPT_UINTEGER(0, "min-stack", &trace.min_stack,
2804 		     "Set the minimum stack depth when parsing the callchain, "
2805 		     "anything below the specified depth will be ignored."),
2806 	OPT_UINTEGER(0, "max-stack", &trace.max_stack,
2807 		     "Set the maximum stack depth when parsing the callchain, "
2808 		     "anything beyond the specified depth will be ignored. "
2809 		     "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
2810 	OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2811 			"per thread proc mmap processing timeout in ms"),
2812 	OPT_END()
2813 	};
2814 	bool __maybe_unused max_stack_user_set = true;
2815 	bool mmap_pages_user_set = true;
2816 	const char * const trace_subcommands[] = { "record", NULL };
2817 	int err;
2818 	char bf[BUFSIZ];
2819 
2820 	signal(SIGSEGV, sighandler_dump_stack);
2821 	signal(SIGFPE, sighandler_dump_stack);
2822 
2823 	trace.evlist = perf_evlist__new();
2824 	trace.sctbl = syscalltbl__new();
2825 
2826 	if (trace.evlist == NULL || trace.sctbl == NULL) {
2827 		pr_err("Not enough memory to run!\n");
2828 		err = -ENOMEM;
2829 		goto out;
2830 	}
2831 
2832 	argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2833 				 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2834 
2835 	err = bpf__setup_stdout(trace.evlist);
2836 	if (err) {
2837 		bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
2838 		pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
2839 		goto out;
2840 	}
2841 
2842 	err = -1;
2843 
2844 	if (trace.trace_pgfaults) {
2845 		trace.opts.sample_address = true;
2846 		trace.opts.sample_time = true;
2847 	}
2848 
2849 	if (trace.opts.mmap_pages == UINT_MAX)
2850 		mmap_pages_user_set = false;
2851 
2852 	if (trace.max_stack == UINT_MAX) {
2853 		trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
2854 		max_stack_user_set = false;
2855 	}
2856 
2857 #ifdef HAVE_DWARF_UNWIND_SUPPORT
2858 	if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls)
2859 		record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
2860 #endif
2861 
2862 	if (callchain_param.enabled) {
2863 		if (!mmap_pages_user_set && geteuid() == 0)
2864 			trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
2865 
2866 		symbol_conf.use_callchain = true;
2867 	}
2868 
2869 	if (trace.evlist->nr_entries > 0)
2870 		evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2871 
2872 	if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2873 		return trace__record(&trace, argc-1, &argv[1]);
2874 
2875 	/* summary_only implies summary option, but don't overwrite summary if set */
2876 	if (trace.summary_only)
2877 		trace.summary = trace.summary_only;
2878 
2879 	if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2880 	    trace.evlist->nr_entries == 0 /* Was --events used? */) {
2881 		pr_err("Please specify something to trace.\n");
2882 		return -1;
2883 	}
2884 
2885 	if (!trace.trace_syscalls && ev_qualifier_str) {
2886 		pr_err("The -e option can't be used with --no-syscalls.\n");
2887 		goto out;
2888 	}
2889 
2890 	if (output_name != NULL) {
2891 		err = trace__open_output(&trace, output_name);
2892 		if (err < 0) {
2893 			perror("failed to create output file");
2894 			goto out;
2895 		}
2896 	}
2897 
2898 	trace.open_id = syscalltbl__id(trace.sctbl, "open");
2899 
2900 	if (ev_qualifier_str != NULL) {
2901 		const char *s = ev_qualifier_str;
2902 		struct strlist_config slist_config = {
2903 			.dirname = system_path(STRACE_GROUPS_DIR),
2904 		};
2905 
2906 		trace.not_ev_qualifier = *s == '!';
2907 		if (trace.not_ev_qualifier)
2908 			++s;
2909 		trace.ev_qualifier = strlist__new(s, &slist_config);
2910 		if (trace.ev_qualifier == NULL) {
2911 			fputs("Not enough memory to parse event qualifier",
2912 			      trace.output);
2913 			err = -ENOMEM;
2914 			goto out_close;
2915 		}
2916 
2917 		err = trace__validate_ev_qualifier(&trace);
2918 		if (err)
2919 			goto out_close;
2920 	}
2921 
2922 	err = target__validate(&trace.opts.target);
2923 	if (err) {
2924 		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2925 		fprintf(trace.output, "%s", bf);
2926 		goto out_close;
2927 	}
2928 
2929 	err = target__parse_uid(&trace.opts.target);
2930 	if (err) {
2931 		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2932 		fprintf(trace.output, "%s", bf);
2933 		goto out_close;
2934 	}
2935 
2936 	if (!argc && target__none(&trace.opts.target))
2937 		trace.opts.target.system_wide = true;
2938 
2939 	if (input_name)
2940 		err = trace__replay(&trace);
2941 	else
2942 		err = trace__run(&trace, argc, argv);
2943 
2944 out_close:
2945 	if (output_name != NULL)
2946 		fclose(trace.output);
2947 out:
2948 	return err;
2949 }
2950