xref: /linux/tools/perf/builtin-trace.c (revision 32786fdc9506aeba98278c1844d4bfb766863832)
1 /*
2  * builtin-trace.c
3  *
4  * Builtin 'trace' command:
5  *
6  * Display a continuously updated trace of any workload, CPU, specific PID,
7  * system wide, etc.  Default format is loosely strace like, but any other
8  * event may be specified using --event.
9  *
10  * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11  *
12  * Initially based on the 'trace' prototype by Thomas Gleixner:
13  *
14  * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15  *
16  * Released under the GPL v2. (and only v2, not any later version)
17  */
18 
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
21 #include "builtin.h"
22 #include "util/color.h"
23 #include "util/debug.h"
24 #include "util/evlist.h"
25 #include <subcmd/exec-cmd.h>
26 #include "util/machine.h"
27 #include "util/session.h"
28 #include "util/thread.h"
29 #include <subcmd/parse-options.h>
30 #include "util/strlist.h"
31 #include "util/intlist.h"
32 #include "util/thread_map.h"
33 #include "util/stat.h"
34 #include "trace-event.h"
35 #include "util/parse-events.h"
36 #include "util/bpf-loader.h"
37 #include "callchain.h"
38 #include "syscalltbl.h"
39 #include "rb_resort.h"
40 
41 #include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
42 #include <stdlib.h>
43 #include <linux/err.h>
44 #include <linux/filter.h>
45 #include <linux/audit.h>
46 #include <linux/random.h>
47 #include <linux/stringify.h>
48 #include <linux/time64.h>
49 
50 #ifndef O_CLOEXEC
51 # define O_CLOEXEC		02000000
52 #endif
53 
54 struct trace {
55 	struct perf_tool	tool;
56 	struct syscalltbl	*sctbl;
57 	struct {
58 		int		max;
59 		struct syscall  *table;
60 		struct {
61 			struct perf_evsel *sys_enter,
62 					  *sys_exit;
63 		}		events;
64 	} syscalls;
65 	struct record_opts	opts;
66 	struct perf_evlist	*evlist;
67 	struct machine		*host;
68 	struct thread		*current;
69 	u64			base_time;
70 	FILE			*output;
71 	unsigned long		nr_events;
72 	struct strlist		*ev_qualifier;
73 	struct {
74 		size_t		nr;
75 		int		*entries;
76 	}			ev_qualifier_ids;
77 	struct {
78 		size_t		nr;
79 		pid_t		*entries;
80 	}			filter_pids;
81 	double			duration_filter;
82 	double			runtime_ms;
83 	struct {
84 		u64		vfs_getname,
85 				proc_getname;
86 	} stats;
87 	unsigned int		max_stack;
88 	unsigned int		min_stack;
89 	bool			not_ev_qualifier;
90 	bool			live;
91 	bool			full_time;
92 	bool			sched;
93 	bool			multiple_threads;
94 	bool			summary;
95 	bool			summary_only;
96 	bool			show_comm;
97 	bool			show_tool_stats;
98 	bool			trace_syscalls;
99 	bool			kernel_syscallchains;
100 	bool			force;
101 	bool			vfs_getname;
102 	int			trace_pgfaults;
103 	int			open_id;
104 };
105 
106 struct tp_field {
107 	int offset;
108 	union {
109 		u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
110 		void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
111 	};
112 };
113 
114 #define TP_UINT_FIELD(bits) \
115 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
116 { \
117 	u##bits value; \
118 	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
119 	return value;  \
120 }
121 
122 TP_UINT_FIELD(8);
123 TP_UINT_FIELD(16);
124 TP_UINT_FIELD(32);
125 TP_UINT_FIELD(64);
126 
127 #define TP_UINT_FIELD__SWAPPED(bits) \
128 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
129 { \
130 	u##bits value; \
131 	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
132 	return bswap_##bits(value);\
133 }
134 
135 TP_UINT_FIELD__SWAPPED(16);
136 TP_UINT_FIELD__SWAPPED(32);
137 TP_UINT_FIELD__SWAPPED(64);
138 
139 static int tp_field__init_uint(struct tp_field *field,
140 			       struct format_field *format_field,
141 			       bool needs_swap)
142 {
143 	field->offset = format_field->offset;
144 
145 	switch (format_field->size) {
146 	case 1:
147 		field->integer = tp_field__u8;
148 		break;
149 	case 2:
150 		field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
151 		break;
152 	case 4:
153 		field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
154 		break;
155 	case 8:
156 		field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
157 		break;
158 	default:
159 		return -1;
160 	}
161 
162 	return 0;
163 }
164 
165 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
166 {
167 	return sample->raw_data + field->offset;
168 }
169 
170 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
171 {
172 	field->offset = format_field->offset;
173 	field->pointer = tp_field__ptr;
174 	return 0;
175 }
176 
177 struct syscall_tp {
178 	struct tp_field id;
179 	union {
180 		struct tp_field args, ret;
181 	};
182 };
183 
184 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
185 					  struct tp_field *field,
186 					  const char *name)
187 {
188 	struct format_field *format_field = perf_evsel__field(evsel, name);
189 
190 	if (format_field == NULL)
191 		return -1;
192 
193 	return tp_field__init_uint(field, format_field, evsel->needs_swap);
194 }
195 
196 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
197 	({ struct syscall_tp *sc = evsel->priv;\
198 	   perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
199 
200 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
201 					 struct tp_field *field,
202 					 const char *name)
203 {
204 	struct format_field *format_field = perf_evsel__field(evsel, name);
205 
206 	if (format_field == NULL)
207 		return -1;
208 
209 	return tp_field__init_ptr(field, format_field);
210 }
211 
212 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
213 	({ struct syscall_tp *sc = evsel->priv;\
214 	   perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
215 
216 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
217 {
218 	zfree(&evsel->priv);
219 	perf_evsel__delete(evsel);
220 }
221 
222 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
223 {
224 	evsel->priv = malloc(sizeof(struct syscall_tp));
225 	if (evsel->priv != NULL) {
226 		if (perf_evsel__init_sc_tp_uint_field(evsel, id))
227 			goto out_delete;
228 
229 		evsel->handler = handler;
230 		return 0;
231 	}
232 
233 	return -ENOMEM;
234 
235 out_delete:
236 	zfree(&evsel->priv);
237 	return -ENOENT;
238 }
239 
240 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
241 {
242 	struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
243 
244 	/* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
245 	if (IS_ERR(evsel))
246 		evsel = perf_evsel__newtp("syscalls", direction);
247 
248 	if (IS_ERR(evsel))
249 		return NULL;
250 
251 	if (perf_evsel__init_syscall_tp(evsel, handler))
252 		goto out_delete;
253 
254 	return evsel;
255 
256 out_delete:
257 	perf_evsel__delete_priv(evsel);
258 	return NULL;
259 }
260 
261 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
262 	({ struct syscall_tp *fields = evsel->priv; \
263 	   fields->name.integer(&fields->name, sample); })
264 
265 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
266 	({ struct syscall_tp *fields = evsel->priv; \
267 	   fields->name.pointer(&fields->name, sample); })
268 
269 struct syscall_arg {
270 	unsigned long val;
271 	struct thread *thread;
272 	struct trace  *trace;
273 	void	      *parm;
274 	u8	      idx;
275 	u8	      mask;
276 };
277 
278 struct strarray {
279 	int	    offset;
280 	int	    nr_entries;
281 	const char **entries;
282 };
283 
284 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
285 	.nr_entries = ARRAY_SIZE(array), \
286 	.entries = array, \
287 }
288 
289 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
290 	.offset	    = off, \
291 	.nr_entries = ARRAY_SIZE(array), \
292 	.entries = array, \
293 }
294 
295 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
296 						const char *intfmt,
297 					        struct syscall_arg *arg)
298 {
299 	struct strarray *sa = arg->parm;
300 	int idx = arg->val - sa->offset;
301 
302 	if (idx < 0 || idx >= sa->nr_entries)
303 		return scnprintf(bf, size, intfmt, arg->val);
304 
305 	return scnprintf(bf, size, "%s", sa->entries[idx]);
306 }
307 
308 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
309 					      struct syscall_arg *arg)
310 {
311 	return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
312 }
313 
314 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
315 
316 #if defined(__i386__) || defined(__x86_64__)
317 /*
318  * FIXME: Make this available to all arches as soon as the ioctl beautifier
319  * 	  gets rewritten to support all arches.
320  */
321 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
322 						 struct syscall_arg *arg)
323 {
324 	return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
325 }
326 
327 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
328 #endif /* defined(__i386__) || defined(__x86_64__) */
329 
330 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
331 					struct syscall_arg *arg);
332 
333 #define SCA_FD syscall_arg__scnprintf_fd
334 
335 #ifndef AT_FDCWD
336 #define AT_FDCWD	-100
337 #endif
338 
339 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
340 					   struct syscall_arg *arg)
341 {
342 	int fd = arg->val;
343 
344 	if (fd == AT_FDCWD)
345 		return scnprintf(bf, size, "CWD");
346 
347 	return syscall_arg__scnprintf_fd(bf, size, arg);
348 }
349 
350 #define SCA_FDAT syscall_arg__scnprintf_fd_at
351 
352 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
353 					      struct syscall_arg *arg);
354 
355 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
356 
357 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
358 					 struct syscall_arg *arg)
359 {
360 	return scnprintf(bf, size, "%#lx", arg->val);
361 }
362 
363 #define SCA_HEX syscall_arg__scnprintf_hex
364 
365 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
366 					 struct syscall_arg *arg)
367 {
368 	return scnprintf(bf, size, "%d", arg->val);
369 }
370 
371 #define SCA_INT syscall_arg__scnprintf_int
372 
373 static const char *bpf_cmd[] = {
374 	"MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
375 	"MAP_GET_NEXT_KEY", "PROG_LOAD",
376 };
377 static DEFINE_STRARRAY(bpf_cmd);
378 
379 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
380 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
381 
382 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
383 static DEFINE_STRARRAY(itimers);
384 
385 static const char *keyctl_options[] = {
386 	"GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
387 	"SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
388 	"INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
389 	"ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
390 	"INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
391 };
392 static DEFINE_STRARRAY(keyctl_options);
393 
394 static const char *whences[] = { "SET", "CUR", "END",
395 #ifdef SEEK_DATA
396 "DATA",
397 #endif
398 #ifdef SEEK_HOLE
399 "HOLE",
400 #endif
401 };
402 static DEFINE_STRARRAY(whences);
403 
404 static const char *fcntl_cmds[] = {
405 	"DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
406 	"SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
407 	"F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
408 	"F_GETOWNER_UIDS",
409 };
410 static DEFINE_STRARRAY(fcntl_cmds);
411 
412 static const char *rlimit_resources[] = {
413 	"CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
414 	"MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
415 	"RTTIME",
416 };
417 static DEFINE_STRARRAY(rlimit_resources);
418 
419 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
420 static DEFINE_STRARRAY(sighow);
421 
422 static const char *clockid[] = {
423 	"REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
424 	"MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
425 	"REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
426 };
427 static DEFINE_STRARRAY(clockid);
428 
429 static const char *socket_families[] = {
430 	"UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
431 	"BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
432 	"SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
433 	"RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
434 	"BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
435 	"ALG", "NFC", "VSOCK",
436 };
437 static DEFINE_STRARRAY(socket_families);
438 
439 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
440 						 struct syscall_arg *arg)
441 {
442 	size_t printed = 0;
443 	int mode = arg->val;
444 
445 	if (mode == F_OK) /* 0 */
446 		return scnprintf(bf, size, "F");
447 #define	P_MODE(n) \
448 	if (mode & n##_OK) { \
449 		printed += scnprintf(bf + printed, size - printed, "%s", #n); \
450 		mode &= ~n##_OK; \
451 	}
452 
453 	P_MODE(R);
454 	P_MODE(W);
455 	P_MODE(X);
456 #undef P_MODE
457 
458 	if (mode)
459 		printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
460 
461 	return printed;
462 }
463 
464 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
465 
466 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
467 					      struct syscall_arg *arg);
468 
469 #define SCA_FILENAME syscall_arg__scnprintf_filename
470 
471 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
472 						struct syscall_arg *arg)
473 {
474 	int printed = 0, flags = arg->val;
475 
476 #define	P_FLAG(n) \
477 	if (flags & O_##n) { \
478 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
479 		flags &= ~O_##n; \
480 	}
481 
482 	P_FLAG(CLOEXEC);
483 	P_FLAG(NONBLOCK);
484 #undef P_FLAG
485 
486 	if (flags)
487 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
488 
489 	return printed;
490 }
491 
492 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
493 
494 #if defined(__i386__) || defined(__x86_64__)
495 /*
496  * FIXME: Make this available to all arches.
497  */
498 #define TCGETS		0x5401
499 
500 static const char *tioctls[] = {
501 	"TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
502 	"TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
503 	"TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
504 	"TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
505 	"TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
506 	"TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
507 	"TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
508 	"TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
509 	"TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
510 	"TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
511 	"TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
512 	[0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
513 	"TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
514 	"TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
515 	"TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
516 };
517 
518 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
519 #endif /* defined(__i386__) || defined(__x86_64__) */
520 
521 #ifndef GRND_NONBLOCK
522 #define GRND_NONBLOCK	0x0001
523 #endif
524 #ifndef GRND_RANDOM
525 #define GRND_RANDOM	0x0002
526 #endif
527 
528 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
529 						   struct syscall_arg *arg)
530 {
531 	int printed = 0, flags = arg->val;
532 
533 #define	P_FLAG(n) \
534 	if (flags & GRND_##n) { \
535 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
536 		flags &= ~GRND_##n; \
537 	}
538 
539 	P_FLAG(RANDOM);
540 	P_FLAG(NONBLOCK);
541 #undef P_FLAG
542 
543 	if (flags)
544 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
545 
546 	return printed;
547 }
548 
549 #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
550 
551 #define STRARRAY(arg, name, array) \
552 	  .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
553 	  .arg_parm	 = { [arg] = &strarray__##array, }
554 
555 #include "trace/beauty/eventfd.c"
556 #include "trace/beauty/flock.c"
557 #include "trace/beauty/futex_op.c"
558 #include "trace/beauty/mmap.c"
559 #include "trace/beauty/mode_t.c"
560 #include "trace/beauty/msg_flags.c"
561 #include "trace/beauty/open_flags.c"
562 #include "trace/beauty/perf_event_open.c"
563 #include "trace/beauty/pid.c"
564 #include "trace/beauty/sched_policy.c"
565 #include "trace/beauty/seccomp.c"
566 #include "trace/beauty/signum.c"
567 #include "trace/beauty/socket_type.c"
568 #include "trace/beauty/waitid_options.c"
569 
570 static struct syscall_fmt {
571 	const char *name;
572 	const char *alias;
573 	size_t	   (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
574 	void	   *arg_parm[6];
575 	bool	   errmsg;
576 	bool	   errpid;
577 	bool	   timeout;
578 	bool	   hexret;
579 } syscall_fmts[] = {
580 	{ .name	    = "access",	    .errmsg = true,
581 	  .arg_scnprintf = { [1] = SCA_ACCMODE,  /* mode */ }, },
582 	{ .name	    = "arch_prctl", .errmsg = true, .alias = "prctl", },
583 	{ .name	    = "bpf",	    .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
584 	{ .name	    = "brk",	    .hexret = true,
585 	  .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
586 	{ .name	    = "chdir",	    .errmsg = true, },
587 	{ .name	    = "chmod",	    .errmsg = true, },
588 	{ .name	    = "chroot",	    .errmsg = true, },
589 	{ .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
590 	{ .name	    = "clone",	    .errpid = true, },
591 	{ .name	    = "close",	    .errmsg = true,
592 	  .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
593 	{ .name	    = "connect",    .errmsg = true, },
594 	{ .name	    = "creat",	    .errmsg = true, },
595 	{ .name	    = "dup",	    .errmsg = true, },
596 	{ .name	    = "dup2",	    .errmsg = true, },
597 	{ .name	    = "dup3",	    .errmsg = true, },
598 	{ .name	    = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
599 	{ .name	    = "eventfd2",   .errmsg = true,
600 	  .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
601 	{ .name	    = "faccessat",  .errmsg = true, },
602 	{ .name	    = "fadvise64",  .errmsg = true, },
603 	{ .name	    = "fallocate",  .errmsg = true, },
604 	{ .name	    = "fchdir",	    .errmsg = true, },
605 	{ .name	    = "fchmod",	    .errmsg = true, },
606 	{ .name	    = "fchmodat",   .errmsg = true,
607 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
608 	{ .name	    = "fchown",	    .errmsg = true, },
609 	{ .name	    = "fchownat",   .errmsg = true,
610 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
611 	{ .name	    = "fcntl",	    .errmsg = true,
612 	  .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
613 	  .arg_parm	 = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
614 	{ .name	    = "fdatasync",  .errmsg = true, },
615 	{ .name	    = "flock",	    .errmsg = true,
616 	  .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, },
617 	{ .name	    = "fsetxattr",  .errmsg = true, },
618 	{ .name	    = "fstat",	    .errmsg = true, .alias = "newfstat", },
619 	{ .name	    = "fstatat",    .errmsg = true, .alias = "newfstatat", },
620 	{ .name	    = "fstatfs",    .errmsg = true, },
621 	{ .name	    = "fsync",    .errmsg = true, },
622 	{ .name	    = "ftruncate", .errmsg = true, },
623 	{ .name	    = "futex",	    .errmsg = true,
624 	  .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
625 	{ .name	    = "futimesat", .errmsg = true,
626 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
627 	{ .name	    = "getdents",   .errmsg = true, },
628 	{ .name	    = "getdents64", .errmsg = true, },
629 	{ .name	    = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
630 	{ .name	    = "getpid",	    .errpid = true, },
631 	{ .name	    = "getpgid",    .errpid = true, },
632 	{ .name	    = "getppid",    .errpid = true, },
633 	{ .name	    = "getrandom",  .errmsg = true,
634 	  .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
635 	{ .name	    = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
636 	{ .name	    = "getxattr",   .errmsg = true, },
637 	{ .name	    = "inotify_add_watch",	    .errmsg = true, },
638 	{ .name	    = "ioctl",	    .errmsg = true,
639 	  .arg_scnprintf = {
640 #if defined(__i386__) || defined(__x86_64__)
641 /*
642  * FIXME: Make this available to all arches.
643  */
644 			     [1] = SCA_STRHEXARRAY, /* cmd */
645 			     [2] = SCA_HEX, /* arg */ },
646 	  .arg_parm	 = { [1] = &strarray__tioctls, /* cmd */ }, },
647 #else
648 			     [2] = SCA_HEX, /* arg */ }, },
649 #endif
650 	{ .name	    = "keyctl",	    .errmsg = true, STRARRAY(0, option, keyctl_options), },
651 	{ .name	    = "kill",	    .errmsg = true,
652 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
653 	{ .name	    = "lchown",    .errmsg = true, },
654 	{ .name	    = "lgetxattr",  .errmsg = true, },
655 	{ .name	    = "linkat",	    .errmsg = true,
656 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
657 	{ .name	    = "listxattr",  .errmsg = true, },
658 	{ .name	    = "llistxattr", .errmsg = true, },
659 	{ .name	    = "lremovexattr",  .errmsg = true, },
660 	{ .name	    = "lseek",	    .errmsg = true,
661 	  .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
662 	  .arg_parm	 = { [2] = &strarray__whences, /* whence */ }, },
663 	{ .name	    = "lsetxattr",  .errmsg = true, },
664 	{ .name	    = "lstat",	    .errmsg = true, .alias = "newlstat", },
665 	{ .name	    = "lsxattr",    .errmsg = true, },
666 	{ .name     = "madvise",    .errmsg = true,
667 	  .arg_scnprintf = { [0] = SCA_HEX,	 /* start */
668 			     [2] = SCA_MADV_BHV, /* behavior */ }, },
669 	{ .name	    = "mkdir",    .errmsg = true, },
670 	{ .name	    = "mkdirat",    .errmsg = true,
671 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
672 	{ .name	    = "mknod",      .errmsg = true, },
673 	{ .name	    = "mknodat",    .errmsg = true,
674 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
675 	{ .name	    = "mlock",	    .errmsg = true,
676 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
677 	{ .name	    = "mlockall",   .errmsg = true,
678 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
679 	{ .name	    = "mmap",	    .hexret = true,
680 	  .arg_scnprintf = { [0] = SCA_HEX,	  /* addr */
681 			     [2] = SCA_MMAP_PROT, /* prot */
682 			     [3] = SCA_MMAP_FLAGS, /* flags */ }, },
683 	{ .name	    = "mprotect",   .errmsg = true,
684 	  .arg_scnprintf = { [0] = SCA_HEX, /* start */
685 			     [2] = SCA_MMAP_PROT, /* prot */ }, },
686 	{ .name	    = "mq_unlink", .errmsg = true,
687 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
688 	{ .name	    = "mremap",	    .hexret = true,
689 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */
690 			     [3] = SCA_MREMAP_FLAGS, /* flags */
691 			     [4] = SCA_HEX, /* new_addr */ }, },
692 	{ .name	    = "munlock",    .errmsg = true,
693 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
694 	{ .name	    = "munmap",	    .errmsg = true,
695 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
696 	{ .name	    = "name_to_handle_at", .errmsg = true,
697 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
698 	{ .name	    = "newfstatat", .errmsg = true,
699 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
700 	{ .name	    = "open",	    .errmsg = true,
701 	  .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
702 	{ .name	    = "open_by_handle_at", .errmsg = true,
703 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
704 			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
705 	{ .name	    = "openat",	    .errmsg = true,
706 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
707 			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
708 	{ .name	    = "perf_event_open", .errmsg = true,
709 	  .arg_scnprintf = { [2] = SCA_INT, /* cpu */
710 			     [3] = SCA_FD,  /* group_fd */
711 			     [4] = SCA_PERF_FLAGS,  /* flags */ }, },
712 	{ .name	    = "pipe2",	    .errmsg = true,
713 	  .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
714 	{ .name	    = "poll",	    .errmsg = true, .timeout = true, },
715 	{ .name	    = "ppoll",	    .errmsg = true, .timeout = true, },
716 	{ .name	    = "pread",	    .errmsg = true, .alias = "pread64", },
717 	{ .name	    = "preadv",	    .errmsg = true, .alias = "pread", },
718 	{ .name	    = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
719 	{ .name	    = "pwrite",	    .errmsg = true, .alias = "pwrite64", },
720 	{ .name	    = "pwritev",    .errmsg = true, },
721 	{ .name	    = "read",	    .errmsg = true, },
722 	{ .name	    = "readlink",   .errmsg = true, },
723 	{ .name	    = "readlinkat", .errmsg = true,
724 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
725 	{ .name	    = "readv",	    .errmsg = true, },
726 	{ .name	    = "recvfrom",   .errmsg = true,
727 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
728 	{ .name	    = "recvmmsg",   .errmsg = true,
729 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
730 	{ .name	    = "recvmsg",    .errmsg = true,
731 	  .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
732 	{ .name	    = "removexattr", .errmsg = true, },
733 	{ .name	    = "renameat",   .errmsg = true,
734 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
735 	{ .name	    = "rmdir",    .errmsg = true, },
736 	{ .name	    = "rt_sigaction", .errmsg = true,
737 	  .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
738 	{ .name	    = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
739 	{ .name	    = "rt_sigqueueinfo", .errmsg = true,
740 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
741 	{ .name	    = "rt_tgsigqueueinfo", .errmsg = true,
742 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
743 	{ .name	    = "sched_getattr",	      .errmsg = true, },
744 	{ .name	    = "sched_setattr",	      .errmsg = true, },
745 	{ .name	    = "sched_setscheduler",   .errmsg = true,
746 	  .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
747 	{ .name	    = "seccomp", .errmsg = true,
748 	  .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
749 			     [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
750 	{ .name	    = "select",	    .errmsg = true, .timeout = true, },
751 	{ .name	    = "sendmmsg",    .errmsg = true,
752 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
753 	{ .name	    = "sendmsg",    .errmsg = true,
754 	  .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
755 	{ .name	    = "sendto",	    .errmsg = true,
756 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
757 	{ .name	    = "set_tid_address", .errpid = true, },
758 	{ .name	    = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
759 	{ .name	    = "setpgid",    .errmsg = true, },
760 	{ .name	    = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
761 	{ .name	    = "setxattr",   .errmsg = true, },
762 	{ .name	    = "shutdown",   .errmsg = true, },
763 	{ .name	    = "socket",	    .errmsg = true,
764 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
765 			     [1] = SCA_SK_TYPE, /* type */ },
766 	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
767 	{ .name	    = "socketpair", .errmsg = true,
768 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
769 			     [1] = SCA_SK_TYPE, /* type */ },
770 	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
771 	{ .name	    = "stat",	    .errmsg = true, .alias = "newstat", },
772 	{ .name	    = "statfs",	    .errmsg = true, },
773 	{ .name	    = "swapoff",    .errmsg = true,
774 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
775 	{ .name	    = "swapon",	    .errmsg = true,
776 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
777 	{ .name	    = "symlinkat",  .errmsg = true,
778 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
779 	{ .name	    = "tgkill",	    .errmsg = true,
780 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
781 	{ .name	    = "tkill",	    .errmsg = true,
782 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
783 	{ .name	    = "truncate",   .errmsg = true, },
784 	{ .name	    = "uname",	    .errmsg = true, .alias = "newuname", },
785 	{ .name	    = "unlinkat",   .errmsg = true,
786 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
787 	{ .name	    = "utime",  .errmsg = true, },
788 	{ .name	    = "utimensat",  .errmsg = true,
789 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
790 	{ .name	    = "utimes",  .errmsg = true, },
791 	{ .name	    = "vmsplice",  .errmsg = true, },
792 	{ .name	    = "wait4",	    .errpid = true,
793 	  .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
794 	{ .name	    = "waitid",	    .errpid = true,
795 	  .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
796 	{ .name	    = "write",	    .errmsg = true, },
797 	{ .name	    = "writev",	    .errmsg = true, },
798 };
799 
800 static int syscall_fmt__cmp(const void *name, const void *fmtp)
801 {
802 	const struct syscall_fmt *fmt = fmtp;
803 	return strcmp(name, fmt->name);
804 }
805 
806 static struct syscall_fmt *syscall_fmt__find(const char *name)
807 {
808 	const int nmemb = ARRAY_SIZE(syscall_fmts);
809 	return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
810 }
811 
812 struct syscall {
813 	struct event_format *tp_format;
814 	int		    nr_args;
815 	struct format_field *args;
816 	const char	    *name;
817 	bool		    is_exit;
818 	struct syscall_fmt  *fmt;
819 	size_t		    (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
820 	void		    **arg_parm;
821 };
822 
823 static size_t fprintf_duration(unsigned long t, FILE *fp)
824 {
825 	double duration = (double)t / NSEC_PER_MSEC;
826 	size_t printed = fprintf(fp, "(");
827 
828 	if (duration >= 1.0)
829 		printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
830 	else if (duration >= 0.01)
831 		printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
832 	else
833 		printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
834 	return printed + fprintf(fp, "): ");
835 }
836 
837 /**
838  * filename.ptr: The filename char pointer that will be vfs_getname'd
839  * filename.entry_str_pos: Where to insert the string translated from
840  *                         filename.ptr by the vfs_getname tracepoint/kprobe.
841  */
842 struct thread_trace {
843 	u64		  entry_time;
844 	bool		  entry_pending;
845 	unsigned long	  nr_events;
846 	unsigned long	  pfmaj, pfmin;
847 	char		  *entry_str;
848 	double		  runtime_ms;
849         struct {
850 		unsigned long ptr;
851 		short int     entry_str_pos;
852 		bool	      pending_open;
853 		unsigned int  namelen;
854 		char	      *name;
855 	} filename;
856 	struct {
857 		int	  max;
858 		char	  **table;
859 	} paths;
860 
861 	struct intlist *syscall_stats;
862 };
863 
864 static struct thread_trace *thread_trace__new(void)
865 {
866 	struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
867 
868 	if (ttrace)
869 		ttrace->paths.max = -1;
870 
871 	ttrace->syscall_stats = intlist__new(NULL);
872 
873 	return ttrace;
874 }
875 
876 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
877 {
878 	struct thread_trace *ttrace;
879 
880 	if (thread == NULL)
881 		goto fail;
882 
883 	if (thread__priv(thread) == NULL)
884 		thread__set_priv(thread, thread_trace__new());
885 
886 	if (thread__priv(thread) == NULL)
887 		goto fail;
888 
889 	ttrace = thread__priv(thread);
890 	++ttrace->nr_events;
891 
892 	return ttrace;
893 fail:
894 	color_fprintf(fp, PERF_COLOR_RED,
895 		      "WARNING: not enough memory, dropping samples!\n");
896 	return NULL;
897 }
898 
899 #define TRACE_PFMAJ		(1 << 0)
900 #define TRACE_PFMIN		(1 << 1)
901 
902 static const size_t trace__entry_str_size = 2048;
903 
904 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
905 {
906 	struct thread_trace *ttrace = thread__priv(thread);
907 
908 	if (fd > ttrace->paths.max) {
909 		char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
910 
911 		if (npath == NULL)
912 			return -1;
913 
914 		if (ttrace->paths.max != -1) {
915 			memset(npath + ttrace->paths.max + 1, 0,
916 			       (fd - ttrace->paths.max) * sizeof(char *));
917 		} else {
918 			memset(npath, 0, (fd + 1) * sizeof(char *));
919 		}
920 
921 		ttrace->paths.table = npath;
922 		ttrace->paths.max   = fd;
923 	}
924 
925 	ttrace->paths.table[fd] = strdup(pathname);
926 
927 	return ttrace->paths.table[fd] != NULL ? 0 : -1;
928 }
929 
930 static int thread__read_fd_path(struct thread *thread, int fd)
931 {
932 	char linkname[PATH_MAX], pathname[PATH_MAX];
933 	struct stat st;
934 	int ret;
935 
936 	if (thread->pid_ == thread->tid) {
937 		scnprintf(linkname, sizeof(linkname),
938 			  "/proc/%d/fd/%d", thread->pid_, fd);
939 	} else {
940 		scnprintf(linkname, sizeof(linkname),
941 			  "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
942 	}
943 
944 	if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
945 		return -1;
946 
947 	ret = readlink(linkname, pathname, sizeof(pathname));
948 
949 	if (ret < 0 || ret > st.st_size)
950 		return -1;
951 
952 	pathname[ret] = '\0';
953 	return trace__set_fd_pathname(thread, fd, pathname);
954 }
955 
956 static const char *thread__fd_path(struct thread *thread, int fd,
957 				   struct trace *trace)
958 {
959 	struct thread_trace *ttrace = thread__priv(thread);
960 
961 	if (ttrace == NULL)
962 		return NULL;
963 
964 	if (fd < 0)
965 		return NULL;
966 
967 	if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
968 		if (!trace->live)
969 			return NULL;
970 		++trace->stats.proc_getname;
971 		if (thread__read_fd_path(thread, fd))
972 			return NULL;
973 	}
974 
975 	return ttrace->paths.table[fd];
976 }
977 
978 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
979 					struct syscall_arg *arg)
980 {
981 	int fd = arg->val;
982 	size_t printed = scnprintf(bf, size, "%d", fd);
983 	const char *path = thread__fd_path(arg->thread, fd, arg->trace);
984 
985 	if (path)
986 		printed += scnprintf(bf + printed, size - printed, "<%s>", path);
987 
988 	return printed;
989 }
990 
991 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
992 					      struct syscall_arg *arg)
993 {
994 	int fd = arg->val;
995 	size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
996 	struct thread_trace *ttrace = thread__priv(arg->thread);
997 
998 	if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
999 		zfree(&ttrace->paths.table[fd]);
1000 
1001 	return printed;
1002 }
1003 
1004 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1005 				     unsigned long ptr)
1006 {
1007 	struct thread_trace *ttrace = thread__priv(thread);
1008 
1009 	ttrace->filename.ptr = ptr;
1010 	ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1011 }
1012 
1013 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1014 					      struct syscall_arg *arg)
1015 {
1016 	unsigned long ptr = arg->val;
1017 
1018 	if (!arg->trace->vfs_getname)
1019 		return scnprintf(bf, size, "%#x", ptr);
1020 
1021 	thread__set_filename_pos(arg->thread, bf, ptr);
1022 	return 0;
1023 }
1024 
1025 static bool trace__filter_duration(struct trace *trace, double t)
1026 {
1027 	return t < (trace->duration_filter * NSEC_PER_MSEC);
1028 }
1029 
1030 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1031 {
1032 	double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1033 
1034 	return fprintf(fp, "%10.3f ", ts);
1035 }
1036 
1037 static bool done = false;
1038 static bool interrupted = false;
1039 
1040 static void sig_handler(int sig)
1041 {
1042 	done = true;
1043 	interrupted = sig == SIGINT;
1044 }
1045 
1046 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1047 					u64 duration, u64 tstamp, FILE *fp)
1048 {
1049 	size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1050 	printed += fprintf_duration(duration, fp);
1051 
1052 	if (trace->multiple_threads) {
1053 		if (trace->show_comm)
1054 			printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1055 		printed += fprintf(fp, "%d ", thread->tid);
1056 	}
1057 
1058 	return printed;
1059 }
1060 
1061 static int trace__process_event(struct trace *trace, struct machine *machine,
1062 				union perf_event *event, struct perf_sample *sample)
1063 {
1064 	int ret = 0;
1065 
1066 	switch (event->header.type) {
1067 	case PERF_RECORD_LOST:
1068 		color_fprintf(trace->output, PERF_COLOR_RED,
1069 			      "LOST %" PRIu64 " events!\n", event->lost.lost);
1070 		ret = machine__process_lost_event(machine, event, sample);
1071 		break;
1072 	default:
1073 		ret = machine__process_event(machine, event, sample);
1074 		break;
1075 	}
1076 
1077 	return ret;
1078 }
1079 
1080 static int trace__tool_process(struct perf_tool *tool,
1081 			       union perf_event *event,
1082 			       struct perf_sample *sample,
1083 			       struct machine *machine)
1084 {
1085 	struct trace *trace = container_of(tool, struct trace, tool);
1086 	return trace__process_event(trace, machine, event, sample);
1087 }
1088 
1089 static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1090 {
1091 	struct machine *machine = vmachine;
1092 
1093 	if (machine->kptr_restrict_warned)
1094 		return NULL;
1095 
1096 	if (symbol_conf.kptr_restrict) {
1097 		pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1098 			   "Check /proc/sys/kernel/kptr_restrict.\n\n"
1099 			   "Kernel samples will not be resolved.\n");
1100 		machine->kptr_restrict_warned = true;
1101 		return NULL;
1102 	}
1103 
1104 	return machine__resolve_kernel_addr(vmachine, addrp, modp);
1105 }
1106 
1107 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1108 {
1109 	int err = symbol__init(NULL);
1110 
1111 	if (err)
1112 		return err;
1113 
1114 	trace->host = machine__new_host();
1115 	if (trace->host == NULL)
1116 		return -ENOMEM;
1117 
1118 	if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
1119 		return -errno;
1120 
1121 	err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1122 					    evlist->threads, trace__tool_process, false,
1123 					    trace->opts.proc_map_timeout);
1124 	if (err)
1125 		symbol__exit();
1126 
1127 	return err;
1128 }
1129 
1130 static int syscall__set_arg_fmts(struct syscall *sc)
1131 {
1132 	struct format_field *field;
1133 	int idx = 0, len;
1134 
1135 	sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1136 	if (sc->arg_scnprintf == NULL)
1137 		return -1;
1138 
1139 	if (sc->fmt)
1140 		sc->arg_parm = sc->fmt->arg_parm;
1141 
1142 	for (field = sc->args; field; field = field->next) {
1143 		if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1144 			sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1145 		else if (strcmp(field->type, "const char *") == 0 &&
1146 			 (strcmp(field->name, "filename") == 0 ||
1147 			  strcmp(field->name, "path") == 0 ||
1148 			  strcmp(field->name, "pathname") == 0))
1149 			sc->arg_scnprintf[idx] = SCA_FILENAME;
1150 		else if (field->flags & FIELD_IS_POINTER)
1151 			sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1152 		else if (strcmp(field->type, "pid_t") == 0)
1153 			sc->arg_scnprintf[idx] = SCA_PID;
1154 		else if (strcmp(field->type, "umode_t") == 0)
1155 			sc->arg_scnprintf[idx] = SCA_MODE_T;
1156 		else if ((strcmp(field->type, "int") == 0 ||
1157 			  strcmp(field->type, "unsigned int") == 0 ||
1158 			  strcmp(field->type, "long") == 0) &&
1159 			 (len = strlen(field->name)) >= 2 &&
1160 			 strcmp(field->name + len - 2, "fd") == 0) {
1161 			/*
1162 			 * /sys/kernel/tracing/events/syscalls/sys_enter*
1163 			 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1164 			 * 65 int
1165 			 * 23 unsigned int
1166 			 * 7 unsigned long
1167 			 */
1168 			sc->arg_scnprintf[idx] = SCA_FD;
1169 		}
1170 		++idx;
1171 	}
1172 
1173 	return 0;
1174 }
1175 
1176 static int trace__read_syscall_info(struct trace *trace, int id)
1177 {
1178 	char tp_name[128];
1179 	struct syscall *sc;
1180 	const char *name = syscalltbl__name(trace->sctbl, id);
1181 
1182 	if (name == NULL)
1183 		return -1;
1184 
1185 	if (id > trace->syscalls.max) {
1186 		struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1187 
1188 		if (nsyscalls == NULL)
1189 			return -1;
1190 
1191 		if (trace->syscalls.max != -1) {
1192 			memset(nsyscalls + trace->syscalls.max + 1, 0,
1193 			       (id - trace->syscalls.max) * sizeof(*sc));
1194 		} else {
1195 			memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1196 		}
1197 
1198 		trace->syscalls.table = nsyscalls;
1199 		trace->syscalls.max   = id;
1200 	}
1201 
1202 	sc = trace->syscalls.table + id;
1203 	sc->name = name;
1204 
1205 	sc->fmt  = syscall_fmt__find(sc->name);
1206 
1207 	snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1208 	sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1209 
1210 	if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1211 		snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1212 		sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1213 	}
1214 
1215 	if (IS_ERR(sc->tp_format))
1216 		return -1;
1217 
1218 	sc->args = sc->tp_format->format.fields;
1219 	sc->nr_args = sc->tp_format->format.nr_fields;
1220 	/*
1221 	 * We need to check and discard the first variable '__syscall_nr'
1222 	 * or 'nr' that mean the syscall number. It is needless here.
1223 	 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1224 	 */
1225 	if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1226 		sc->args = sc->args->next;
1227 		--sc->nr_args;
1228 	}
1229 
1230 	sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1231 
1232 	return syscall__set_arg_fmts(sc);
1233 }
1234 
1235 static int trace__validate_ev_qualifier(struct trace *trace)
1236 {
1237 	int err = 0, i;
1238 	struct str_node *pos;
1239 
1240 	trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1241 	trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1242 						 sizeof(trace->ev_qualifier_ids.entries[0]));
1243 
1244 	if (trace->ev_qualifier_ids.entries == NULL) {
1245 		fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1246 		       trace->output);
1247 		err = -EINVAL;
1248 		goto out;
1249 	}
1250 
1251 	i = 0;
1252 
1253 	strlist__for_each_entry(pos, trace->ev_qualifier) {
1254 		const char *sc = pos->s;
1255 		int id = syscalltbl__id(trace->sctbl, sc);
1256 
1257 		if (id < 0) {
1258 			if (err == 0) {
1259 				fputs("Error:\tInvalid syscall ", trace->output);
1260 				err = -EINVAL;
1261 			} else {
1262 				fputs(", ", trace->output);
1263 			}
1264 
1265 			fputs(sc, trace->output);
1266 		}
1267 
1268 		trace->ev_qualifier_ids.entries[i++] = id;
1269 	}
1270 
1271 	if (err < 0) {
1272 		fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1273 		      "\nHint:\tand: 'man syscalls'\n", trace->output);
1274 		zfree(&trace->ev_qualifier_ids.entries);
1275 		trace->ev_qualifier_ids.nr = 0;
1276 	}
1277 out:
1278 	return err;
1279 }
1280 
1281 /*
1282  * args is to be interpreted as a series of longs but we need to handle
1283  * 8-byte unaligned accesses. args points to raw_data within the event
1284  * and raw_data is guaranteed to be 8-byte unaligned because it is
1285  * preceded by raw_size which is a u32. So we need to copy args to a temp
1286  * variable to read it. Most notably this avoids extended load instructions
1287  * on unaligned addresses
1288  */
1289 
1290 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1291 				      unsigned char *args, struct trace *trace,
1292 				      struct thread *thread)
1293 {
1294 	size_t printed = 0;
1295 	unsigned char *p;
1296 	unsigned long val;
1297 
1298 	if (sc->args != NULL) {
1299 		struct format_field *field;
1300 		u8 bit = 1;
1301 		struct syscall_arg arg = {
1302 			.idx	= 0,
1303 			.mask	= 0,
1304 			.trace  = trace,
1305 			.thread = thread,
1306 		};
1307 
1308 		for (field = sc->args; field;
1309 		     field = field->next, ++arg.idx, bit <<= 1) {
1310 			if (arg.mask & bit)
1311 				continue;
1312 
1313 			/* special care for unaligned accesses */
1314 			p = args + sizeof(unsigned long) * arg.idx;
1315 			memcpy(&val, p, sizeof(val));
1316 
1317 			/*
1318  			 * Suppress this argument if its value is zero and
1319  			 * and we don't have a string associated in an
1320  			 * strarray for it.
1321  			 */
1322 			if (val == 0 &&
1323 			    !(sc->arg_scnprintf &&
1324 			      sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1325 			      sc->arg_parm[arg.idx]))
1326 				continue;
1327 
1328 			printed += scnprintf(bf + printed, size - printed,
1329 					     "%s%s: ", printed ? ", " : "", field->name);
1330 			if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1331 				arg.val = val;
1332 				if (sc->arg_parm)
1333 					arg.parm = sc->arg_parm[arg.idx];
1334 				printed += sc->arg_scnprintf[arg.idx](bf + printed,
1335 								      size - printed, &arg);
1336 			} else {
1337 				printed += scnprintf(bf + printed, size - printed,
1338 						     "%ld", val);
1339 			}
1340 		}
1341 	} else if (IS_ERR(sc->tp_format)) {
1342 		/*
1343 		 * If we managed to read the tracepoint /format file, then we
1344 		 * may end up not having any args, like with gettid(), so only
1345 		 * print the raw args when we didn't manage to read it.
1346 		 */
1347 		int i = 0;
1348 
1349 		while (i < 6) {
1350 			/* special care for unaligned accesses */
1351 			p = args + sizeof(unsigned long) * i;
1352 			memcpy(&val, p, sizeof(val));
1353 			printed += scnprintf(bf + printed, size - printed,
1354 					     "%sarg%d: %ld",
1355 					     printed ? ", " : "", i, val);
1356 			++i;
1357 		}
1358 	}
1359 
1360 	return printed;
1361 }
1362 
1363 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1364 				  union perf_event *event,
1365 				  struct perf_sample *sample);
1366 
1367 static struct syscall *trace__syscall_info(struct trace *trace,
1368 					   struct perf_evsel *evsel, int id)
1369 {
1370 
1371 	if (id < 0) {
1372 
1373 		/*
1374 		 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1375 		 * before that, leaving at a higher verbosity level till that is
1376 		 * explained. Reproduced with plain ftrace with:
1377 		 *
1378 		 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1379 		 * grep "NR -1 " /t/trace_pipe
1380 		 *
1381 		 * After generating some load on the machine.
1382  		 */
1383 		if (verbose > 1) {
1384 			static u64 n;
1385 			fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1386 				id, perf_evsel__name(evsel), ++n);
1387 		}
1388 		return NULL;
1389 	}
1390 
1391 	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1392 	    trace__read_syscall_info(trace, id))
1393 		goto out_cant_read;
1394 
1395 	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1396 		goto out_cant_read;
1397 
1398 	return &trace->syscalls.table[id];
1399 
1400 out_cant_read:
1401 	if (verbose) {
1402 		fprintf(trace->output, "Problems reading syscall %d", id);
1403 		if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1404 			fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1405 		fputs(" information\n", trace->output);
1406 	}
1407 	return NULL;
1408 }
1409 
1410 static void thread__update_stats(struct thread_trace *ttrace,
1411 				 int id, struct perf_sample *sample)
1412 {
1413 	struct int_node *inode;
1414 	struct stats *stats;
1415 	u64 duration = 0;
1416 
1417 	inode = intlist__findnew(ttrace->syscall_stats, id);
1418 	if (inode == NULL)
1419 		return;
1420 
1421 	stats = inode->priv;
1422 	if (stats == NULL) {
1423 		stats = malloc(sizeof(struct stats));
1424 		if (stats == NULL)
1425 			return;
1426 		init_stats(stats);
1427 		inode->priv = stats;
1428 	}
1429 
1430 	if (ttrace->entry_time && sample->time > ttrace->entry_time)
1431 		duration = sample->time - ttrace->entry_time;
1432 
1433 	update_stats(stats, duration);
1434 }
1435 
1436 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1437 {
1438 	struct thread_trace *ttrace;
1439 	u64 duration;
1440 	size_t printed;
1441 
1442 	if (trace->current == NULL)
1443 		return 0;
1444 
1445 	ttrace = thread__priv(trace->current);
1446 
1447 	if (!ttrace->entry_pending)
1448 		return 0;
1449 
1450 	duration = sample->time - ttrace->entry_time;
1451 
1452 	printed  = trace__fprintf_entry_head(trace, trace->current, duration, ttrace->entry_time, trace->output);
1453 	printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1454 	ttrace->entry_pending = false;
1455 
1456 	return printed;
1457 }
1458 
1459 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1460 			    union perf_event *event __maybe_unused,
1461 			    struct perf_sample *sample)
1462 {
1463 	char *msg;
1464 	void *args;
1465 	size_t printed = 0;
1466 	struct thread *thread;
1467 	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1468 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
1469 	struct thread_trace *ttrace;
1470 
1471 	if (sc == NULL)
1472 		return -1;
1473 
1474 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1475 	ttrace = thread__trace(thread, trace->output);
1476 	if (ttrace == NULL)
1477 		goto out_put;
1478 
1479 	args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1480 
1481 	if (ttrace->entry_str == NULL) {
1482 		ttrace->entry_str = malloc(trace__entry_str_size);
1483 		if (!ttrace->entry_str)
1484 			goto out_put;
1485 	}
1486 
1487 	if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
1488 		trace__printf_interrupted_entry(trace, sample);
1489 
1490 	ttrace->entry_time = sample->time;
1491 	msg = ttrace->entry_str;
1492 	printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1493 
1494 	printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1495 					   args, trace, thread);
1496 
1497 	if (sc->is_exit) {
1498 		if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
1499 			trace__fprintf_entry_head(trace, thread, 1, ttrace->entry_time, trace->output);
1500 			fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
1501 		}
1502 	} else {
1503 		ttrace->entry_pending = true;
1504 		/* See trace__vfs_getname & trace__sys_exit */
1505 		ttrace->filename.pending_open = false;
1506 	}
1507 
1508 	if (trace->current != thread) {
1509 		thread__put(trace->current);
1510 		trace->current = thread__get(thread);
1511 	}
1512 	err = 0;
1513 out_put:
1514 	thread__put(thread);
1515 	return err;
1516 }
1517 
1518 static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1519 				    struct perf_sample *sample,
1520 				    struct callchain_cursor *cursor)
1521 {
1522 	struct addr_location al;
1523 
1524 	if (machine__resolve(trace->host, &al, sample) < 0 ||
1525 	    thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1526 		return -1;
1527 
1528 	return 0;
1529 }
1530 
1531 static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1532 {
1533 	/* TODO: user-configurable print_opts */
1534 	const unsigned int print_opts = EVSEL__PRINT_SYM |
1535 				        EVSEL__PRINT_DSO |
1536 				        EVSEL__PRINT_UNKNOWN_AS_ADDR;
1537 
1538 	return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
1539 }
1540 
1541 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1542 			   union perf_event *event __maybe_unused,
1543 			   struct perf_sample *sample)
1544 {
1545 	long ret;
1546 	u64 duration = 0;
1547 	struct thread *thread;
1548 	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
1549 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
1550 	struct thread_trace *ttrace;
1551 
1552 	if (sc == NULL)
1553 		return -1;
1554 
1555 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1556 	ttrace = thread__trace(thread, trace->output);
1557 	if (ttrace == NULL)
1558 		goto out_put;
1559 
1560 	if (trace->summary)
1561 		thread__update_stats(ttrace, id, sample);
1562 
1563 	ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1564 
1565 	if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
1566 		trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1567 		ttrace->filename.pending_open = false;
1568 		++trace->stats.vfs_getname;
1569 	}
1570 
1571 	if (ttrace->entry_time) {
1572 		duration = sample->time - ttrace->entry_time;
1573 		if (trace__filter_duration(trace, duration))
1574 			goto out;
1575 	} else if (trace->duration_filter)
1576 		goto out;
1577 
1578 	if (sample->callchain) {
1579 		callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1580 		if (callchain_ret == 0) {
1581 			if (callchain_cursor.nr < trace->min_stack)
1582 				goto out;
1583 			callchain_ret = 1;
1584 		}
1585 	}
1586 
1587 	if (trace->summary_only)
1588 		goto out;
1589 
1590 	trace__fprintf_entry_head(trace, thread, duration, ttrace->entry_time, trace->output);
1591 
1592 	if (ttrace->entry_pending) {
1593 		fprintf(trace->output, "%-70s", ttrace->entry_str);
1594 	} else {
1595 		fprintf(trace->output, " ... [");
1596 		color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1597 		fprintf(trace->output, "]: %s()", sc->name);
1598 	}
1599 
1600 	if (sc->fmt == NULL) {
1601 signed_print:
1602 		fprintf(trace->output, ") = %ld", ret);
1603 	} else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
1604 		char bf[STRERR_BUFSIZE];
1605 		const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
1606 			   *e = audit_errno_to_name(-ret);
1607 
1608 		fprintf(trace->output, ") = -1 %s %s", e, emsg);
1609 	} else if (ret == 0 && sc->fmt->timeout)
1610 		fprintf(trace->output, ") = 0 Timeout");
1611 	else if (sc->fmt->hexret)
1612 		fprintf(trace->output, ") = %#lx", ret);
1613 	else if (sc->fmt->errpid) {
1614 		struct thread *child = machine__find_thread(trace->host, ret, ret);
1615 
1616 		if (child != NULL) {
1617 			fprintf(trace->output, ") = %ld", ret);
1618 			if (child->comm_set)
1619 				fprintf(trace->output, " (%s)", thread__comm_str(child));
1620 			thread__put(child);
1621 		}
1622 	} else
1623 		goto signed_print;
1624 
1625 	fputc('\n', trace->output);
1626 
1627 	if (callchain_ret > 0)
1628 		trace__fprintf_callchain(trace, sample);
1629 	else if (callchain_ret < 0)
1630 		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1631 out:
1632 	ttrace->entry_pending = false;
1633 	err = 0;
1634 out_put:
1635 	thread__put(thread);
1636 	return err;
1637 }
1638 
1639 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1640 			      union perf_event *event __maybe_unused,
1641 			      struct perf_sample *sample)
1642 {
1643 	struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1644 	struct thread_trace *ttrace;
1645 	size_t filename_len, entry_str_len, to_move;
1646 	ssize_t remaining_space;
1647 	char *pos;
1648 	const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
1649 
1650 	if (!thread)
1651 		goto out;
1652 
1653 	ttrace = thread__priv(thread);
1654 	if (!ttrace)
1655 		goto out;
1656 
1657 	filename_len = strlen(filename);
1658 
1659 	if (ttrace->filename.namelen < filename_len) {
1660 		char *f = realloc(ttrace->filename.name, filename_len + 1);
1661 
1662 		if (f == NULL)
1663 				goto out;
1664 
1665 		ttrace->filename.namelen = filename_len;
1666 		ttrace->filename.name = f;
1667 	}
1668 
1669 	strcpy(ttrace->filename.name, filename);
1670 	ttrace->filename.pending_open = true;
1671 
1672 	if (!ttrace->filename.ptr)
1673 		goto out;
1674 
1675 	entry_str_len = strlen(ttrace->entry_str);
1676 	remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1677 	if (remaining_space <= 0)
1678 		goto out;
1679 
1680 	if (filename_len > (size_t)remaining_space) {
1681 		filename += filename_len - remaining_space;
1682 		filename_len = remaining_space;
1683 	}
1684 
1685 	to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1686 	pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1687 	memmove(pos + filename_len, pos, to_move);
1688 	memcpy(pos, filename, filename_len);
1689 
1690 	ttrace->filename.ptr = 0;
1691 	ttrace->filename.entry_str_pos = 0;
1692 out:
1693 	return 0;
1694 }
1695 
1696 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1697 				     union perf_event *event __maybe_unused,
1698 				     struct perf_sample *sample)
1699 {
1700         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1701 	double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1702 	struct thread *thread = machine__findnew_thread(trace->host,
1703 							sample->pid,
1704 							sample->tid);
1705 	struct thread_trace *ttrace = thread__trace(thread, trace->output);
1706 
1707 	if (ttrace == NULL)
1708 		goto out_dump;
1709 
1710 	ttrace->runtime_ms += runtime_ms;
1711 	trace->runtime_ms += runtime_ms;
1712 	thread__put(thread);
1713 	return 0;
1714 
1715 out_dump:
1716 	fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1717 	       evsel->name,
1718 	       perf_evsel__strval(evsel, sample, "comm"),
1719 	       (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1720 	       runtime,
1721 	       perf_evsel__intval(evsel, sample, "vruntime"));
1722 	thread__put(thread);
1723 	return 0;
1724 }
1725 
1726 static void bpf_output__printer(enum binary_printer_ops op,
1727 				unsigned int val, void *extra)
1728 {
1729 	FILE *output = extra;
1730 	unsigned char ch = (unsigned char)val;
1731 
1732 	switch (op) {
1733 	case BINARY_PRINT_CHAR_DATA:
1734 		fprintf(output, "%c", isprint(ch) ? ch : '.');
1735 		break;
1736 	case BINARY_PRINT_DATA_BEGIN:
1737 	case BINARY_PRINT_LINE_BEGIN:
1738 	case BINARY_PRINT_ADDR:
1739 	case BINARY_PRINT_NUM_DATA:
1740 	case BINARY_PRINT_NUM_PAD:
1741 	case BINARY_PRINT_SEP:
1742 	case BINARY_PRINT_CHAR_PAD:
1743 	case BINARY_PRINT_LINE_END:
1744 	case BINARY_PRINT_DATA_END:
1745 	default:
1746 		break;
1747 	}
1748 }
1749 
1750 static void bpf_output__fprintf(struct trace *trace,
1751 				struct perf_sample *sample)
1752 {
1753 	print_binary(sample->raw_data, sample->raw_size, 8,
1754 		     bpf_output__printer, trace->output);
1755 }
1756 
1757 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1758 				union perf_event *event __maybe_unused,
1759 				struct perf_sample *sample)
1760 {
1761 	int callchain_ret = 0;
1762 
1763 	if (sample->callchain) {
1764 		callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1765 		if (callchain_ret == 0) {
1766 			if (callchain_cursor.nr < trace->min_stack)
1767 				goto out;
1768 			callchain_ret = 1;
1769 		}
1770 	}
1771 
1772 	trace__printf_interrupted_entry(trace, sample);
1773 	trace__fprintf_tstamp(trace, sample->time, trace->output);
1774 
1775 	if (trace->trace_syscalls)
1776 		fprintf(trace->output, "(         ): ");
1777 
1778 	fprintf(trace->output, "%s:", evsel->name);
1779 
1780 	if (perf_evsel__is_bpf_output(evsel)) {
1781 		bpf_output__fprintf(trace, sample);
1782 	} else if (evsel->tp_format) {
1783 		event_format__fprintf(evsel->tp_format, sample->cpu,
1784 				      sample->raw_data, sample->raw_size,
1785 				      trace->output);
1786 	}
1787 
1788 	fprintf(trace->output, ")\n");
1789 
1790 	if (callchain_ret > 0)
1791 		trace__fprintf_callchain(trace, sample);
1792 	else if (callchain_ret < 0)
1793 		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1794 out:
1795 	return 0;
1796 }
1797 
1798 static void print_location(FILE *f, struct perf_sample *sample,
1799 			   struct addr_location *al,
1800 			   bool print_dso, bool print_sym)
1801 {
1802 
1803 	if ((verbose || print_dso) && al->map)
1804 		fprintf(f, "%s@", al->map->dso->long_name);
1805 
1806 	if ((verbose || print_sym) && al->sym)
1807 		fprintf(f, "%s+0x%" PRIx64, al->sym->name,
1808 			al->addr - al->sym->start);
1809 	else if (al->map)
1810 		fprintf(f, "0x%" PRIx64, al->addr);
1811 	else
1812 		fprintf(f, "0x%" PRIx64, sample->addr);
1813 }
1814 
1815 static int trace__pgfault(struct trace *trace,
1816 			  struct perf_evsel *evsel,
1817 			  union perf_event *event __maybe_unused,
1818 			  struct perf_sample *sample)
1819 {
1820 	struct thread *thread;
1821 	struct addr_location al;
1822 	char map_type = 'd';
1823 	struct thread_trace *ttrace;
1824 	int err = -1;
1825 	int callchain_ret = 0;
1826 
1827 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1828 
1829 	if (sample->callchain) {
1830 		callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1831 		if (callchain_ret == 0) {
1832 			if (callchain_cursor.nr < trace->min_stack)
1833 				goto out_put;
1834 			callchain_ret = 1;
1835 		}
1836 	}
1837 
1838 	ttrace = thread__trace(thread, trace->output);
1839 	if (ttrace == NULL)
1840 		goto out_put;
1841 
1842 	if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1843 		ttrace->pfmaj++;
1844 	else
1845 		ttrace->pfmin++;
1846 
1847 	if (trace->summary_only)
1848 		goto out;
1849 
1850 	thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
1851 			      sample->ip, &al);
1852 
1853 	trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
1854 
1855 	fprintf(trace->output, "%sfault [",
1856 		evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1857 		"maj" : "min");
1858 
1859 	print_location(trace->output, sample, &al, false, true);
1860 
1861 	fprintf(trace->output, "] => ");
1862 
1863 	thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
1864 				   sample->addr, &al);
1865 
1866 	if (!al.map) {
1867 		thread__find_addr_location(thread, sample->cpumode,
1868 					   MAP__FUNCTION, sample->addr, &al);
1869 
1870 		if (al.map)
1871 			map_type = 'x';
1872 		else
1873 			map_type = '?';
1874 	}
1875 
1876 	print_location(trace->output, sample, &al, true, false);
1877 
1878 	fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1879 
1880 	if (callchain_ret > 0)
1881 		trace__fprintf_callchain(trace, sample);
1882 	else if (callchain_ret < 0)
1883 		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1884 out:
1885 	err = 0;
1886 out_put:
1887 	thread__put(thread);
1888 	return err;
1889 }
1890 
1891 static void trace__set_base_time(struct trace *trace,
1892 				 struct perf_evsel *evsel,
1893 				 struct perf_sample *sample)
1894 {
1895 	/*
1896 	 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
1897 	 * and don't use sample->time unconditionally, we may end up having
1898 	 * some other event in the future without PERF_SAMPLE_TIME for good
1899 	 * reason, i.e. we may not be interested in its timestamps, just in
1900 	 * it taking place, picking some piece of information when it
1901 	 * appears in our event stream (vfs_getname comes to mind).
1902 	 */
1903 	if (trace->base_time == 0 && !trace->full_time &&
1904 	    (evsel->attr.sample_type & PERF_SAMPLE_TIME))
1905 		trace->base_time = sample->time;
1906 }
1907 
1908 static int trace__process_sample(struct perf_tool *tool,
1909 				 union perf_event *event,
1910 				 struct perf_sample *sample,
1911 				 struct perf_evsel *evsel,
1912 				 struct machine *machine __maybe_unused)
1913 {
1914 	struct trace *trace = container_of(tool, struct trace, tool);
1915 	struct thread *thread;
1916 	int err = 0;
1917 
1918 	tracepoint_handler handler = evsel->handler;
1919 
1920 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1921 	if (thread && thread__is_filtered(thread))
1922 		return 0;
1923 
1924 	trace__set_base_time(trace, evsel, sample);
1925 
1926 	if (handler) {
1927 		++trace->nr_events;
1928 		handler(trace, evsel, event, sample);
1929 	}
1930 
1931 	return err;
1932 }
1933 
1934 static int trace__record(struct trace *trace, int argc, const char **argv)
1935 {
1936 	unsigned int rec_argc, i, j;
1937 	const char **rec_argv;
1938 	const char * const record_args[] = {
1939 		"record",
1940 		"-R",
1941 		"-m", "1024",
1942 		"-c", "1",
1943 	};
1944 
1945 	const char * const sc_args[] = { "-e", };
1946 	unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
1947 	const char * const majpf_args[] = { "-e", "major-faults" };
1948 	unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
1949 	const char * const minpf_args[] = { "-e", "minor-faults" };
1950 	unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
1951 
1952 	/* +1 is for the event string below */
1953 	rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
1954 		majpf_args_nr + minpf_args_nr + argc;
1955 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
1956 
1957 	if (rec_argv == NULL)
1958 		return -ENOMEM;
1959 
1960 	j = 0;
1961 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
1962 		rec_argv[j++] = record_args[i];
1963 
1964 	if (trace->trace_syscalls) {
1965 		for (i = 0; i < sc_args_nr; i++)
1966 			rec_argv[j++] = sc_args[i];
1967 
1968 		/* event string may be different for older kernels - e.g., RHEL6 */
1969 		if (is_valid_tracepoint("raw_syscalls:sys_enter"))
1970 			rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
1971 		else if (is_valid_tracepoint("syscalls:sys_enter"))
1972 			rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
1973 		else {
1974 			pr_err("Neither raw_syscalls nor syscalls events exist.\n");
1975 			return -1;
1976 		}
1977 	}
1978 
1979 	if (trace->trace_pgfaults & TRACE_PFMAJ)
1980 		for (i = 0; i < majpf_args_nr; i++)
1981 			rec_argv[j++] = majpf_args[i];
1982 
1983 	if (trace->trace_pgfaults & TRACE_PFMIN)
1984 		for (i = 0; i < minpf_args_nr; i++)
1985 			rec_argv[j++] = minpf_args[i];
1986 
1987 	for (i = 0; i < (unsigned int)argc; i++)
1988 		rec_argv[j++] = argv[i];
1989 
1990 	return cmd_record(j, rec_argv, NULL);
1991 }
1992 
1993 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1994 
1995 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1996 {
1997 	struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1998 
1999 	if (IS_ERR(evsel))
2000 		return false;
2001 
2002 	if (perf_evsel__field(evsel, "pathname") == NULL) {
2003 		perf_evsel__delete(evsel);
2004 		return false;
2005 	}
2006 
2007 	evsel->handler = trace__vfs_getname;
2008 	perf_evlist__add(evlist, evsel);
2009 	return true;
2010 }
2011 
2012 static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
2013 {
2014 	struct perf_evsel *evsel;
2015 	struct perf_event_attr attr = {
2016 		.type = PERF_TYPE_SOFTWARE,
2017 		.mmap_data = 1,
2018 	};
2019 
2020 	attr.config = config;
2021 	attr.sample_period = 1;
2022 
2023 	event_attr_init(&attr);
2024 
2025 	evsel = perf_evsel__new(&attr);
2026 	if (evsel)
2027 		evsel->handler = trace__pgfault;
2028 
2029 	return evsel;
2030 }
2031 
2032 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2033 {
2034 	const u32 type = event->header.type;
2035 	struct perf_evsel *evsel;
2036 
2037 	if (type != PERF_RECORD_SAMPLE) {
2038 		trace__process_event(trace, trace->host, event, sample);
2039 		return;
2040 	}
2041 
2042 	evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2043 	if (evsel == NULL) {
2044 		fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2045 		return;
2046 	}
2047 
2048 	trace__set_base_time(trace, evsel, sample);
2049 
2050 	if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2051 	    sample->raw_data == NULL) {
2052 		fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2053 		       perf_evsel__name(evsel), sample->tid,
2054 		       sample->cpu, sample->raw_size);
2055 	} else {
2056 		tracepoint_handler handler = evsel->handler;
2057 		handler(trace, evsel, event, sample);
2058 	}
2059 }
2060 
2061 static int trace__add_syscall_newtp(struct trace *trace)
2062 {
2063 	int ret = -1;
2064 	struct perf_evlist *evlist = trace->evlist;
2065 	struct perf_evsel *sys_enter, *sys_exit;
2066 
2067 	sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2068 	if (sys_enter == NULL)
2069 		goto out;
2070 
2071 	if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2072 		goto out_delete_sys_enter;
2073 
2074 	sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2075 	if (sys_exit == NULL)
2076 		goto out_delete_sys_enter;
2077 
2078 	if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2079 		goto out_delete_sys_exit;
2080 
2081 	perf_evlist__add(evlist, sys_enter);
2082 	perf_evlist__add(evlist, sys_exit);
2083 
2084 	if (callchain_param.enabled && !trace->kernel_syscallchains) {
2085 		/*
2086 		 * We're interested only in the user space callchain
2087 		 * leading to the syscall, allow overriding that for
2088 		 * debugging reasons using --kernel_syscall_callchains
2089 		 */
2090 		sys_exit->attr.exclude_callchain_kernel = 1;
2091 	}
2092 
2093 	trace->syscalls.events.sys_enter = sys_enter;
2094 	trace->syscalls.events.sys_exit  = sys_exit;
2095 
2096 	ret = 0;
2097 out:
2098 	return ret;
2099 
2100 out_delete_sys_exit:
2101 	perf_evsel__delete_priv(sys_exit);
2102 out_delete_sys_enter:
2103 	perf_evsel__delete_priv(sys_enter);
2104 	goto out;
2105 }
2106 
2107 static int trace__set_ev_qualifier_filter(struct trace *trace)
2108 {
2109 	int err = -1;
2110 	struct perf_evsel *sys_exit;
2111 	char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2112 						trace->ev_qualifier_ids.nr,
2113 						trace->ev_qualifier_ids.entries);
2114 
2115 	if (filter == NULL)
2116 		goto out_enomem;
2117 
2118 	if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2119 					  filter)) {
2120 		sys_exit = trace->syscalls.events.sys_exit;
2121 		err = perf_evsel__append_tp_filter(sys_exit, filter);
2122 	}
2123 
2124 	free(filter);
2125 out:
2126 	return err;
2127 out_enomem:
2128 	errno = ENOMEM;
2129 	goto out;
2130 }
2131 
2132 static int trace__run(struct trace *trace, int argc, const char **argv)
2133 {
2134 	struct perf_evlist *evlist = trace->evlist;
2135 	struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
2136 	int err = -1, i;
2137 	unsigned long before;
2138 	const bool forks = argc > 0;
2139 	bool draining = false;
2140 
2141 	trace->live = true;
2142 
2143 	if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2144 		goto out_error_raw_syscalls;
2145 
2146 	if (trace->trace_syscalls)
2147 		trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2148 
2149 	if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2150 		pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2151 		if (pgfault_maj == NULL)
2152 			goto out_error_mem;
2153 		perf_evlist__add(evlist, pgfault_maj);
2154 	}
2155 
2156 	if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2157 		pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2158 		if (pgfault_min == NULL)
2159 			goto out_error_mem;
2160 		perf_evlist__add(evlist, pgfault_min);
2161 	}
2162 
2163 	if (trace->sched &&
2164 	    perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2165 				   trace__sched_stat_runtime))
2166 		goto out_error_sched_stat_runtime;
2167 
2168 	err = perf_evlist__create_maps(evlist, &trace->opts.target);
2169 	if (err < 0) {
2170 		fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2171 		goto out_delete_evlist;
2172 	}
2173 
2174 	err = trace__symbols_init(trace, evlist);
2175 	if (err < 0) {
2176 		fprintf(trace->output, "Problems initializing symbol libraries!\n");
2177 		goto out_delete_evlist;
2178 	}
2179 
2180 	perf_evlist__config(evlist, &trace->opts, NULL);
2181 
2182 	if (callchain_param.enabled) {
2183 		bool use_identifier = false;
2184 
2185 		if (trace->syscalls.events.sys_exit) {
2186 			perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2187 						     &trace->opts, &callchain_param);
2188 			use_identifier = true;
2189 		}
2190 
2191 		if (pgfault_maj) {
2192 			perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2193 			use_identifier = true;
2194 		}
2195 
2196 		if (pgfault_min) {
2197 			perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2198 			use_identifier = true;
2199 		}
2200 
2201 		if (use_identifier) {
2202 		       /*
2203 			* Now we have evsels with different sample_ids, use
2204 			* PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2205 			* from a fixed position in each ring buffer record.
2206 			*
2207 			* As of this the changeset introducing this comment, this
2208 			* isn't strictly needed, as the fields that can come before
2209 			* PERF_SAMPLE_ID are all used, but we'll probably disable
2210 			* some of those for things like copying the payload of
2211 			* pointer syscall arguments, and for vfs_getname we don't
2212 			* need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2213 			* here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2214 			*/
2215 			perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2216 			perf_evlist__reset_sample_bit(evlist, ID);
2217 		}
2218 	}
2219 
2220 	signal(SIGCHLD, sig_handler);
2221 	signal(SIGINT, sig_handler);
2222 
2223 	if (forks) {
2224 		err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2225 						    argv, false, NULL);
2226 		if (err < 0) {
2227 			fprintf(trace->output, "Couldn't run the workload!\n");
2228 			goto out_delete_evlist;
2229 		}
2230 	}
2231 
2232 	err = perf_evlist__open(evlist);
2233 	if (err < 0)
2234 		goto out_error_open;
2235 
2236 	err = bpf__apply_obj_config();
2237 	if (err) {
2238 		char errbuf[BUFSIZ];
2239 
2240 		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2241 		pr_err("ERROR: Apply config to BPF failed: %s\n",
2242 			 errbuf);
2243 		goto out_error_open;
2244 	}
2245 
2246 	/*
2247 	 * Better not use !target__has_task() here because we need to cover the
2248 	 * case where no threads were specified in the command line, but a
2249 	 * workload was, and in that case we will fill in the thread_map when
2250 	 * we fork the workload in perf_evlist__prepare_workload.
2251 	 */
2252 	if (trace->filter_pids.nr > 0)
2253 		err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2254 	else if (thread_map__pid(evlist->threads, 0) == -1)
2255 		err = perf_evlist__set_filter_pid(evlist, getpid());
2256 
2257 	if (err < 0)
2258 		goto out_error_mem;
2259 
2260 	if (trace->ev_qualifier_ids.nr > 0) {
2261 		err = trace__set_ev_qualifier_filter(trace);
2262 		if (err < 0)
2263 			goto out_errno;
2264 
2265 		pr_debug("event qualifier tracepoint filter: %s\n",
2266 			 trace->syscalls.events.sys_exit->filter);
2267 	}
2268 
2269 	err = perf_evlist__apply_filters(evlist, &evsel);
2270 	if (err < 0)
2271 		goto out_error_apply_filters;
2272 
2273 	err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2274 	if (err < 0)
2275 		goto out_error_mmap;
2276 
2277 	if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
2278 		perf_evlist__enable(evlist);
2279 
2280 	if (forks)
2281 		perf_evlist__start_workload(evlist);
2282 
2283 	if (trace->opts.initial_delay) {
2284 		usleep(trace->opts.initial_delay * 1000);
2285 		perf_evlist__enable(evlist);
2286 	}
2287 
2288 	trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2289 				  evlist->threads->nr > 1 ||
2290 				  perf_evlist__first(evlist)->attr.inherit;
2291 again:
2292 	before = trace->nr_events;
2293 
2294 	for (i = 0; i < evlist->nr_mmaps; i++) {
2295 		union perf_event *event;
2296 
2297 		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2298 			struct perf_sample sample;
2299 
2300 			++trace->nr_events;
2301 
2302 			err = perf_evlist__parse_sample(evlist, event, &sample);
2303 			if (err) {
2304 				fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2305 				goto next_event;
2306 			}
2307 
2308 			trace__handle_event(trace, event, &sample);
2309 next_event:
2310 			perf_evlist__mmap_consume(evlist, i);
2311 
2312 			if (interrupted)
2313 				goto out_disable;
2314 
2315 			if (done && !draining) {
2316 				perf_evlist__disable(evlist);
2317 				draining = true;
2318 			}
2319 		}
2320 	}
2321 
2322 	if (trace->nr_events == before) {
2323 		int timeout = done ? 100 : -1;
2324 
2325 		if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2326 			if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2327 				draining = true;
2328 
2329 			goto again;
2330 		}
2331 	} else {
2332 		goto again;
2333 	}
2334 
2335 out_disable:
2336 	thread__zput(trace->current);
2337 
2338 	perf_evlist__disable(evlist);
2339 
2340 	if (!err) {
2341 		if (trace->summary)
2342 			trace__fprintf_thread_summary(trace, trace->output);
2343 
2344 		if (trace->show_tool_stats) {
2345 			fprintf(trace->output, "Stats:\n "
2346 					       " vfs_getname : %" PRIu64 "\n"
2347 					       " proc_getname: %" PRIu64 "\n",
2348 				trace->stats.vfs_getname,
2349 				trace->stats.proc_getname);
2350 		}
2351 	}
2352 
2353 out_delete_evlist:
2354 	perf_evlist__delete(evlist);
2355 	trace->evlist = NULL;
2356 	trace->live = false;
2357 	return err;
2358 {
2359 	char errbuf[BUFSIZ];
2360 
2361 out_error_sched_stat_runtime:
2362 	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2363 	goto out_error;
2364 
2365 out_error_raw_syscalls:
2366 	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2367 	goto out_error;
2368 
2369 out_error_mmap:
2370 	perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2371 	goto out_error;
2372 
2373 out_error_open:
2374 	perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2375 
2376 out_error:
2377 	fprintf(trace->output, "%s\n", errbuf);
2378 	goto out_delete_evlist;
2379 
2380 out_error_apply_filters:
2381 	fprintf(trace->output,
2382 		"Failed to set filter \"%s\" on event %s with %d (%s)\n",
2383 		evsel->filter, perf_evsel__name(evsel), errno,
2384 		str_error_r(errno, errbuf, sizeof(errbuf)));
2385 	goto out_delete_evlist;
2386 }
2387 out_error_mem:
2388 	fprintf(trace->output, "Not enough memory to run!\n");
2389 	goto out_delete_evlist;
2390 
2391 out_errno:
2392 	fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2393 	goto out_delete_evlist;
2394 }
2395 
2396 static int trace__replay(struct trace *trace)
2397 {
2398 	const struct perf_evsel_str_handler handlers[] = {
2399 		{ "probe:vfs_getname",	     trace__vfs_getname, },
2400 	};
2401 	struct perf_data_file file = {
2402 		.path  = input_name,
2403 		.mode  = PERF_DATA_MODE_READ,
2404 		.force = trace->force,
2405 	};
2406 	struct perf_session *session;
2407 	struct perf_evsel *evsel;
2408 	int err = -1;
2409 
2410 	trace->tool.sample	  = trace__process_sample;
2411 	trace->tool.mmap	  = perf_event__process_mmap;
2412 	trace->tool.mmap2	  = perf_event__process_mmap2;
2413 	trace->tool.comm	  = perf_event__process_comm;
2414 	trace->tool.exit	  = perf_event__process_exit;
2415 	trace->tool.fork	  = perf_event__process_fork;
2416 	trace->tool.attr	  = perf_event__process_attr;
2417 	trace->tool.tracing_data = perf_event__process_tracing_data;
2418 	trace->tool.build_id	  = perf_event__process_build_id;
2419 
2420 	trace->tool.ordered_events = true;
2421 	trace->tool.ordering_requires_timestamps = true;
2422 
2423 	/* add tid to output */
2424 	trace->multiple_threads = true;
2425 
2426 	session = perf_session__new(&file, false, &trace->tool);
2427 	if (session == NULL)
2428 		return -1;
2429 
2430 	if (trace->opts.target.pid)
2431 		symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
2432 
2433 	if (trace->opts.target.tid)
2434 		symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
2435 
2436 	if (symbol__init(&session->header.env) < 0)
2437 		goto out;
2438 
2439 	trace->host = &session->machines.host;
2440 
2441 	err = perf_session__set_tracepoints_handlers(session, handlers);
2442 	if (err)
2443 		goto out;
2444 
2445 	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2446 						     "raw_syscalls:sys_enter");
2447 	/* older kernels have syscalls tp versus raw_syscalls */
2448 	if (evsel == NULL)
2449 		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2450 							     "syscalls:sys_enter");
2451 
2452 	if (evsel &&
2453 	    (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2454 	    perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2455 		pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2456 		goto out;
2457 	}
2458 
2459 	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2460 						     "raw_syscalls:sys_exit");
2461 	if (evsel == NULL)
2462 		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2463 							     "syscalls:sys_exit");
2464 	if (evsel &&
2465 	    (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2466 	    perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2467 		pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2468 		goto out;
2469 	}
2470 
2471 	evlist__for_each_entry(session->evlist, evsel) {
2472 		if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2473 		    (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2474 		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2475 		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2476 			evsel->handler = trace__pgfault;
2477 	}
2478 
2479 	setup_pager();
2480 
2481 	err = perf_session__process_events(session);
2482 	if (err)
2483 		pr_err("Failed to process events, error %d", err);
2484 
2485 	else if (trace->summary)
2486 		trace__fprintf_thread_summary(trace, trace->output);
2487 
2488 out:
2489 	perf_session__delete(session);
2490 
2491 	return err;
2492 }
2493 
2494 static size_t trace__fprintf_threads_header(FILE *fp)
2495 {
2496 	size_t printed;
2497 
2498 	printed  = fprintf(fp, "\n Summary of events:\n\n");
2499 
2500 	return printed;
2501 }
2502 
2503 DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2504 	struct stats 	*stats;
2505 	double		msecs;
2506 	int		syscall;
2507 )
2508 {
2509 	struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2510 	struct stats *stats = source->priv;
2511 
2512 	entry->syscall = source->i;
2513 	entry->stats   = stats;
2514 	entry->msecs   = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2515 }
2516 
2517 static size_t thread__dump_stats(struct thread_trace *ttrace,
2518 				 struct trace *trace, FILE *fp)
2519 {
2520 	size_t printed = 0;
2521 	struct syscall *sc;
2522 	struct rb_node *nd;
2523 	DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
2524 
2525 	if (syscall_stats == NULL)
2526 		return 0;
2527 
2528 	printed += fprintf(fp, "\n");
2529 
2530 	printed += fprintf(fp, "   syscall            calls    total       min       avg       max      stddev\n");
2531 	printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
2532 	printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
2533 
2534 	resort_rb__for_each_entry(nd, syscall_stats) {
2535 		struct stats *stats = syscall_stats_entry->stats;
2536 		if (stats) {
2537 			double min = (double)(stats->min) / NSEC_PER_MSEC;
2538 			double max = (double)(stats->max) / NSEC_PER_MSEC;
2539 			double avg = avg_stats(stats);
2540 			double pct;
2541 			u64 n = (u64) stats->n;
2542 
2543 			pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2544 			avg /= NSEC_PER_MSEC;
2545 
2546 			sc = &trace->syscalls.table[syscall_stats_entry->syscall];
2547 			printed += fprintf(fp, "   %-15s", sc->name);
2548 			printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2549 					   n, syscall_stats_entry->msecs, min, avg);
2550 			printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2551 		}
2552 	}
2553 
2554 	resort_rb__delete(syscall_stats);
2555 	printed += fprintf(fp, "\n\n");
2556 
2557 	return printed;
2558 }
2559 
2560 static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
2561 {
2562 	size_t printed = 0;
2563 	struct thread_trace *ttrace = thread__priv(thread);
2564 	double ratio;
2565 
2566 	if (ttrace == NULL)
2567 		return 0;
2568 
2569 	ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2570 
2571 	printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2572 	printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2573 	printed += fprintf(fp, "%.1f%%", ratio);
2574 	if (ttrace->pfmaj)
2575 		printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2576 	if (ttrace->pfmin)
2577 		printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2578 	if (trace->sched)
2579 		printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2580 	else if (fputc('\n', fp) != EOF)
2581 		++printed;
2582 
2583 	printed += thread__dump_stats(ttrace, trace, fp);
2584 
2585 	return printed;
2586 }
2587 
2588 static unsigned long thread__nr_events(struct thread_trace *ttrace)
2589 {
2590 	return ttrace ? ttrace->nr_events : 0;
2591 }
2592 
2593 DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2594 	struct thread *thread;
2595 )
2596 {
2597 	entry->thread = rb_entry(nd, struct thread, rb_node);
2598 }
2599 
2600 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2601 {
2602 	DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
2603 	size_t printed = trace__fprintf_threads_header(fp);
2604 	struct rb_node *nd;
2605 
2606 	if (threads == NULL) {
2607 		fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2608 		return 0;
2609 	}
2610 
2611 	resort_rb__for_each_entry(nd, threads)
2612 		printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
2613 
2614 	resort_rb__delete(threads);
2615 
2616 	return printed;
2617 }
2618 
2619 static int trace__set_duration(const struct option *opt, const char *str,
2620 			       int unset __maybe_unused)
2621 {
2622 	struct trace *trace = opt->value;
2623 
2624 	trace->duration_filter = atof(str);
2625 	return 0;
2626 }
2627 
2628 static int trace__set_filter_pids(const struct option *opt, const char *str,
2629 				  int unset __maybe_unused)
2630 {
2631 	int ret = -1;
2632 	size_t i;
2633 	struct trace *trace = opt->value;
2634 	/*
2635 	 * FIXME: introduce a intarray class, plain parse csv and create a
2636 	 * { int nr, int entries[] } struct...
2637 	 */
2638 	struct intlist *list = intlist__new(str);
2639 
2640 	if (list == NULL)
2641 		return -1;
2642 
2643 	i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2644 	trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2645 
2646 	if (trace->filter_pids.entries == NULL)
2647 		goto out;
2648 
2649 	trace->filter_pids.entries[0] = getpid();
2650 
2651 	for (i = 1; i < trace->filter_pids.nr; ++i)
2652 		trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2653 
2654 	intlist__delete(list);
2655 	ret = 0;
2656 out:
2657 	return ret;
2658 }
2659 
2660 static int trace__open_output(struct trace *trace, const char *filename)
2661 {
2662 	struct stat st;
2663 
2664 	if (!stat(filename, &st) && st.st_size) {
2665 		char oldname[PATH_MAX];
2666 
2667 		scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2668 		unlink(oldname);
2669 		rename(filename, oldname);
2670 	}
2671 
2672 	trace->output = fopen(filename, "w");
2673 
2674 	return trace->output == NULL ? -errno : 0;
2675 }
2676 
2677 static int parse_pagefaults(const struct option *opt, const char *str,
2678 			    int unset __maybe_unused)
2679 {
2680 	int *trace_pgfaults = opt->value;
2681 
2682 	if (strcmp(str, "all") == 0)
2683 		*trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2684 	else if (strcmp(str, "maj") == 0)
2685 		*trace_pgfaults |= TRACE_PFMAJ;
2686 	else if (strcmp(str, "min") == 0)
2687 		*trace_pgfaults |= TRACE_PFMIN;
2688 	else
2689 		return -1;
2690 
2691 	return 0;
2692 }
2693 
2694 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2695 {
2696 	struct perf_evsel *evsel;
2697 
2698 	evlist__for_each_entry(evlist, evsel)
2699 		evsel->handler = handler;
2700 }
2701 
2702 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2703 {
2704 	const char *trace_usage[] = {
2705 		"perf trace [<options>] [<command>]",
2706 		"perf trace [<options>] -- <command> [<options>]",
2707 		"perf trace record [<options>] [<command>]",
2708 		"perf trace record [<options>] -- <command> [<options>]",
2709 		NULL
2710 	};
2711 	struct trace trace = {
2712 		.syscalls = {
2713 			. max = -1,
2714 		},
2715 		.opts = {
2716 			.target = {
2717 				.uid	   = UINT_MAX,
2718 				.uses_mmap = true,
2719 			},
2720 			.user_freq     = UINT_MAX,
2721 			.user_interval = ULLONG_MAX,
2722 			.no_buffering  = true,
2723 			.mmap_pages    = UINT_MAX,
2724 			.proc_map_timeout  = 500,
2725 		},
2726 		.output = stderr,
2727 		.show_comm = true,
2728 		.trace_syscalls = true,
2729 		.kernel_syscallchains = false,
2730 		.max_stack = UINT_MAX,
2731 	};
2732 	const char *output_name = NULL;
2733 	const char *ev_qualifier_str = NULL;
2734 	const struct option trace_options[] = {
2735 	OPT_CALLBACK(0, "event", &trace.evlist, "event",
2736 		     "event selector. use 'perf list' to list available events",
2737 		     parse_events_option),
2738 	OPT_BOOLEAN(0, "comm", &trace.show_comm,
2739 		    "show the thread COMM next to its id"),
2740 	OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2741 	OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
2742 	OPT_STRING('o', "output", &output_name, "file", "output file name"),
2743 	OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2744 	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2745 		    "trace events on existing process id"),
2746 	OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2747 		    "trace events on existing thread id"),
2748 	OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2749 		     "pids to filter (by the kernel)", trace__set_filter_pids),
2750 	OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2751 		    "system-wide collection from all CPUs"),
2752 	OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2753 		    "list of cpus to monitor"),
2754 	OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2755 		    "child tasks do not inherit counters"),
2756 	OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2757 		     "number of mmap data pages",
2758 		     perf_evlist__parse_mmap_pages),
2759 	OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2760 		   "user to profile"),
2761 	OPT_CALLBACK(0, "duration", &trace, "float",
2762 		     "show only events with duration > N.M ms",
2763 		     trace__set_duration),
2764 	OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2765 	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2766 	OPT_BOOLEAN('T', "time", &trace.full_time,
2767 		    "Show full timestamp, not time relative to first start"),
2768 	OPT_BOOLEAN('s', "summary", &trace.summary_only,
2769 		    "Show only syscall summary with statistics"),
2770 	OPT_BOOLEAN('S', "with-summary", &trace.summary,
2771 		    "Show all syscalls and summary with statistics"),
2772 	OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2773 		     "Trace pagefaults", parse_pagefaults, "maj"),
2774 	OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2775 	OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
2776 	OPT_CALLBACK(0, "call-graph", &trace.opts,
2777 		     "record_mode[,record_size]", record_callchain_help,
2778 		     &record_parse_callchain_opt),
2779 	OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
2780 		    "Show the kernel callchains on the syscall exit path"),
2781 	OPT_UINTEGER(0, "min-stack", &trace.min_stack,
2782 		     "Set the minimum stack depth when parsing the callchain, "
2783 		     "anything below the specified depth will be ignored."),
2784 	OPT_UINTEGER(0, "max-stack", &trace.max_stack,
2785 		     "Set the maximum stack depth when parsing the callchain, "
2786 		     "anything beyond the specified depth will be ignored. "
2787 		     "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
2788 	OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2789 			"per thread proc mmap processing timeout in ms"),
2790 	OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
2791 		     "ms to wait before starting measurement after program "
2792 		     "start"),
2793 	OPT_END()
2794 	};
2795 	bool __maybe_unused max_stack_user_set = true;
2796 	bool mmap_pages_user_set = true;
2797 	const char * const trace_subcommands[] = { "record", NULL };
2798 	int err;
2799 	char bf[BUFSIZ];
2800 
2801 	signal(SIGSEGV, sighandler_dump_stack);
2802 	signal(SIGFPE, sighandler_dump_stack);
2803 
2804 	trace.evlist = perf_evlist__new();
2805 	trace.sctbl = syscalltbl__new();
2806 
2807 	if (trace.evlist == NULL || trace.sctbl == NULL) {
2808 		pr_err("Not enough memory to run!\n");
2809 		err = -ENOMEM;
2810 		goto out;
2811 	}
2812 
2813 	argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2814 				 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2815 
2816 	err = bpf__setup_stdout(trace.evlist);
2817 	if (err) {
2818 		bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
2819 		pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
2820 		goto out;
2821 	}
2822 
2823 	err = -1;
2824 
2825 	if (trace.trace_pgfaults) {
2826 		trace.opts.sample_address = true;
2827 		trace.opts.sample_time = true;
2828 	}
2829 
2830 	if (trace.opts.mmap_pages == UINT_MAX)
2831 		mmap_pages_user_set = false;
2832 
2833 	if (trace.max_stack == UINT_MAX) {
2834 		trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
2835 		max_stack_user_set = false;
2836 	}
2837 
2838 #ifdef HAVE_DWARF_UNWIND_SUPPORT
2839 	if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls)
2840 		record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
2841 #endif
2842 
2843 	if (callchain_param.enabled) {
2844 		if (!mmap_pages_user_set && geteuid() == 0)
2845 			trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
2846 
2847 		symbol_conf.use_callchain = true;
2848 	}
2849 
2850 	if (trace.evlist->nr_entries > 0)
2851 		evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2852 
2853 	if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2854 		return trace__record(&trace, argc-1, &argv[1]);
2855 
2856 	/* summary_only implies summary option, but don't overwrite summary if set */
2857 	if (trace.summary_only)
2858 		trace.summary = trace.summary_only;
2859 
2860 	if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2861 	    trace.evlist->nr_entries == 0 /* Was --events used? */) {
2862 		pr_err("Please specify something to trace.\n");
2863 		return -1;
2864 	}
2865 
2866 	if (!trace.trace_syscalls && ev_qualifier_str) {
2867 		pr_err("The -e option can't be used with --no-syscalls.\n");
2868 		goto out;
2869 	}
2870 
2871 	if (output_name != NULL) {
2872 		err = trace__open_output(&trace, output_name);
2873 		if (err < 0) {
2874 			perror("failed to create output file");
2875 			goto out;
2876 		}
2877 	}
2878 
2879 	trace.open_id = syscalltbl__id(trace.sctbl, "open");
2880 
2881 	if (ev_qualifier_str != NULL) {
2882 		const char *s = ev_qualifier_str;
2883 		struct strlist_config slist_config = {
2884 			.dirname = system_path(STRACE_GROUPS_DIR),
2885 		};
2886 
2887 		trace.not_ev_qualifier = *s == '!';
2888 		if (trace.not_ev_qualifier)
2889 			++s;
2890 		trace.ev_qualifier = strlist__new(s, &slist_config);
2891 		if (trace.ev_qualifier == NULL) {
2892 			fputs("Not enough memory to parse event qualifier",
2893 			      trace.output);
2894 			err = -ENOMEM;
2895 			goto out_close;
2896 		}
2897 
2898 		err = trace__validate_ev_qualifier(&trace);
2899 		if (err)
2900 			goto out_close;
2901 	}
2902 
2903 	err = target__validate(&trace.opts.target);
2904 	if (err) {
2905 		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2906 		fprintf(trace.output, "%s", bf);
2907 		goto out_close;
2908 	}
2909 
2910 	err = target__parse_uid(&trace.opts.target);
2911 	if (err) {
2912 		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2913 		fprintf(trace.output, "%s", bf);
2914 		goto out_close;
2915 	}
2916 
2917 	if (!argc && target__none(&trace.opts.target))
2918 		trace.opts.target.system_wide = true;
2919 
2920 	if (input_name)
2921 		err = trace__replay(&trace);
2922 	else
2923 		err = trace__run(&trace, argc, argv);
2924 
2925 out_close:
2926 	if (output_name != NULL)
2927 		fclose(trace.output);
2928 out:
2929 	return err;
2930 }
2931