xref: /linux/tools/perf/builtin-trace.c (revision 49af9e93adfa11d50435aa079299a765843532fc)
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 
14 #include <libaudit.h>
15 #include <stdlib.h>
16 #include <sys/eventfd.h>
17 #include <sys/mman.h>
18 #include <linux/futex.h>
19 
20 /* For older distros: */
21 #ifndef MAP_STACK
22 # define MAP_STACK		0x20000
23 #endif
24 
25 #ifndef MADV_HWPOISON
26 # define MADV_HWPOISON		100
27 #endif
28 
29 #ifndef MADV_MERGEABLE
30 # define MADV_MERGEABLE		12
31 #endif
32 
33 #ifndef MADV_UNMERGEABLE
34 # define MADV_UNMERGEABLE	13
35 #endif
36 
37 struct syscall_arg {
38 	unsigned long val;
39 	void	      *parm;
40 	u8	      idx;
41 	u8	      mask;
42 };
43 
44 struct strarray {
45 	int	    nr_entries;
46 	const char **entries;
47 };
48 
49 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
50 	.nr_entries = ARRAY_SIZE(array), \
51 	.entries = array, \
52 }
53 
54 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
55 					      struct syscall_arg *arg)
56 {
57 	int idx = arg->val;
58 	struct strarray *sa = arg->parm;
59 
60 	if (idx < 0 || idx >= sa->nr_entries)
61 		return scnprintf(bf, size, "%d", idx);
62 
63 	return scnprintf(bf, size, "%s", sa->entries[idx]);
64 }
65 
66 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
67 
68 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
69 					 struct syscall_arg *arg)
70 {
71 	return scnprintf(bf, size, "%#lx", arg->val);
72 }
73 
74 #define SCA_HEX syscall_arg__scnprintf_hex
75 
76 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
77 					       struct syscall_arg *arg)
78 {
79 	int printed = 0, prot = arg->val;
80 
81 	if (prot == PROT_NONE)
82 		return scnprintf(bf, size, "NONE");
83 #define	P_MMAP_PROT(n) \
84 	if (prot & PROT_##n) { \
85 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
86 		prot &= ~PROT_##n; \
87 	}
88 
89 	P_MMAP_PROT(EXEC);
90 	P_MMAP_PROT(READ);
91 	P_MMAP_PROT(WRITE);
92 #ifdef PROT_SEM
93 	P_MMAP_PROT(SEM);
94 #endif
95 	P_MMAP_PROT(GROWSDOWN);
96 	P_MMAP_PROT(GROWSUP);
97 #undef P_MMAP_PROT
98 
99 	if (prot)
100 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
101 
102 	return printed;
103 }
104 
105 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
106 
107 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
108 						struct syscall_arg *arg)
109 {
110 	int printed = 0, flags = arg->val;
111 
112 #define	P_MMAP_FLAG(n) \
113 	if (flags & MAP_##n) { \
114 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
115 		flags &= ~MAP_##n; \
116 	}
117 
118 	P_MMAP_FLAG(SHARED);
119 	P_MMAP_FLAG(PRIVATE);
120 #ifdef MAP_32BIT
121 	P_MMAP_FLAG(32BIT);
122 #endif
123 	P_MMAP_FLAG(ANONYMOUS);
124 	P_MMAP_FLAG(DENYWRITE);
125 	P_MMAP_FLAG(EXECUTABLE);
126 	P_MMAP_FLAG(FILE);
127 	P_MMAP_FLAG(FIXED);
128 	P_MMAP_FLAG(GROWSDOWN);
129 #ifdef MAP_HUGETLB
130 	P_MMAP_FLAG(HUGETLB);
131 #endif
132 	P_MMAP_FLAG(LOCKED);
133 	P_MMAP_FLAG(NONBLOCK);
134 	P_MMAP_FLAG(NORESERVE);
135 	P_MMAP_FLAG(POPULATE);
136 	P_MMAP_FLAG(STACK);
137 #ifdef MAP_UNINITIALIZED
138 	P_MMAP_FLAG(UNINITIALIZED);
139 #endif
140 #undef P_MMAP_FLAG
141 
142 	if (flags)
143 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
144 
145 	return printed;
146 }
147 
148 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
149 
150 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
151 						      struct syscall_arg *arg)
152 {
153 	int behavior = arg->val;
154 
155 	switch (behavior) {
156 #define	P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
157 	P_MADV_BHV(NORMAL);
158 	P_MADV_BHV(RANDOM);
159 	P_MADV_BHV(SEQUENTIAL);
160 	P_MADV_BHV(WILLNEED);
161 	P_MADV_BHV(DONTNEED);
162 	P_MADV_BHV(REMOVE);
163 	P_MADV_BHV(DONTFORK);
164 	P_MADV_BHV(DOFORK);
165 	P_MADV_BHV(HWPOISON);
166 #ifdef MADV_SOFT_OFFLINE
167 	P_MADV_BHV(SOFT_OFFLINE);
168 #endif
169 	P_MADV_BHV(MERGEABLE);
170 	P_MADV_BHV(UNMERGEABLE);
171 #ifdef MADV_HUGEPAGE
172 	P_MADV_BHV(HUGEPAGE);
173 #endif
174 #ifdef MADV_NOHUGEPAGE
175 	P_MADV_BHV(NOHUGEPAGE);
176 #endif
177 #ifdef MADV_DONTDUMP
178 	P_MADV_BHV(DONTDUMP);
179 #endif
180 #ifdef MADV_DODUMP
181 	P_MADV_BHV(DODUMP);
182 #endif
183 #undef P_MADV_PHV
184 	default: break;
185 	}
186 
187 	return scnprintf(bf, size, "%#x", behavior);
188 }
189 
190 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
191 
192 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
193 {
194 	enum syscall_futex_args {
195 		SCF_UADDR   = (1 << 0),
196 		SCF_OP	    = (1 << 1),
197 		SCF_VAL	    = (1 << 2),
198 		SCF_TIMEOUT = (1 << 3),
199 		SCF_UADDR2  = (1 << 4),
200 		SCF_VAL3    = (1 << 5),
201 	};
202 	int op = arg->val;
203 	int cmd = op & FUTEX_CMD_MASK;
204 	size_t printed = 0;
205 
206 	switch (cmd) {
207 #define	P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
208 	P_FUTEX_OP(WAIT);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
209 	P_FUTEX_OP(WAKE);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
210 	P_FUTEX_OP(FD);		    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
211 	P_FUTEX_OP(REQUEUE);	    arg->mask |= SCF_VAL3|SCF_TIMEOUT;	          break;
212 	P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;			  break;
213 	P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;			  break;
214 	P_FUTEX_OP(WAKE_OP);							  break;
215 	P_FUTEX_OP(LOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
216 	P_FUTEX_OP(UNLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
217 	P_FUTEX_OP(TRYLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
218 	P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;			  break;
219 	P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;			  break;
220 	P_FUTEX_OP(WAIT_REQUEUE_PI);						  break;
221 	default: printed = scnprintf(bf, size, "%#x", cmd);			  break;
222 	}
223 
224 	if (op & FUTEX_PRIVATE_FLAG)
225 		printed += scnprintf(bf + printed, size - printed, "|PRIV");
226 
227 	if (op & FUTEX_CLOCK_REALTIME)
228 		printed += scnprintf(bf + printed, size - printed, "|CLKRT");
229 
230 	return printed;
231 }
232 
233 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
234 
235 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
236 static DEFINE_STRARRAY(itimers);
237 
238 static const char *whences[] = { "SET", "CUR", "END",
239 #ifdef SEEK_DATA
240 "DATA",
241 #endif
242 #ifdef SEEK_HOLE
243 "HOLE",
244 #endif
245 };
246 static DEFINE_STRARRAY(whences);
247 
248 static const char *fcntl_cmds[] = {
249 	"DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
250 	"SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
251 	"F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
252 	"F_GETOWNER_UIDS",
253 };
254 static DEFINE_STRARRAY(fcntl_cmds);
255 
256 static const char *rlimit_resources[] = {
257 	"CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
258 	"MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
259 	"RTTIME",
260 };
261 static DEFINE_STRARRAY(rlimit_resources);
262 
263 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
264 static DEFINE_STRARRAY(sighow);
265 
266 static const char *socket_families[] = {
267 	"UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
268 	"BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
269 	"SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
270 	"RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
271 	"BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
272 	"ALG", "NFC", "VSOCK",
273 };
274 static DEFINE_STRARRAY(socket_families);
275 
276 #ifndef SOCK_TYPE_MASK
277 #define SOCK_TYPE_MASK 0xf
278 #endif
279 
280 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
281 						      struct syscall_arg *arg)
282 {
283 	size_t printed;
284 	int type = arg->val,
285 	    flags = type & ~SOCK_TYPE_MASK;
286 
287 	type &= SOCK_TYPE_MASK;
288 	/*
289  	 * Can't use a strarray, MIPS may override for ABI reasons.
290  	 */
291 	switch (type) {
292 #define	P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
293 	P_SK_TYPE(STREAM);
294 	P_SK_TYPE(DGRAM);
295 	P_SK_TYPE(RAW);
296 	P_SK_TYPE(RDM);
297 	P_SK_TYPE(SEQPACKET);
298 	P_SK_TYPE(DCCP);
299 	P_SK_TYPE(PACKET);
300 #undef P_SK_TYPE
301 	default:
302 		printed = scnprintf(bf, size, "%#x", type);
303 	}
304 
305 #define	P_SK_FLAG(n) \
306 	if (flags & SOCK_##n) { \
307 		printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
308 		flags &= ~SOCK_##n; \
309 	}
310 
311 	P_SK_FLAG(CLOEXEC);
312 	P_SK_FLAG(NONBLOCK);
313 #undef P_SK_FLAG
314 
315 	if (flags)
316 		printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
317 
318 	return printed;
319 }
320 
321 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
322 
323 #ifndef MSG_PROBE
324 #define MSG_PROBE	     0x10
325 #endif
326 #ifndef MSG_SENDPAGE_NOTLAST
327 #define MSG_SENDPAGE_NOTLAST 0x20000
328 #endif
329 #ifndef MSG_FASTOPEN
330 #define MSG_FASTOPEN	     0x20000000
331 #endif
332 
333 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
334 					       struct syscall_arg *arg)
335 {
336 	int printed = 0, flags = arg->val;
337 
338 	if (flags == 0)
339 		return scnprintf(bf, size, "NONE");
340 #define	P_MSG_FLAG(n) \
341 	if (flags & MSG_##n) { \
342 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
343 		flags &= ~MSG_##n; \
344 	}
345 
346 	P_MSG_FLAG(OOB);
347 	P_MSG_FLAG(PEEK);
348 	P_MSG_FLAG(DONTROUTE);
349 	P_MSG_FLAG(TRYHARD);
350 	P_MSG_FLAG(CTRUNC);
351 	P_MSG_FLAG(PROBE);
352 	P_MSG_FLAG(TRUNC);
353 	P_MSG_FLAG(DONTWAIT);
354 	P_MSG_FLAG(EOR);
355 	P_MSG_FLAG(WAITALL);
356 	P_MSG_FLAG(FIN);
357 	P_MSG_FLAG(SYN);
358 	P_MSG_FLAG(CONFIRM);
359 	P_MSG_FLAG(RST);
360 	P_MSG_FLAG(ERRQUEUE);
361 	P_MSG_FLAG(NOSIGNAL);
362 	P_MSG_FLAG(MORE);
363 	P_MSG_FLAG(WAITFORONE);
364 	P_MSG_FLAG(SENDPAGE_NOTLAST);
365 	P_MSG_FLAG(FASTOPEN);
366 	P_MSG_FLAG(CMSG_CLOEXEC);
367 #undef P_MSG_FLAG
368 
369 	if (flags)
370 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
371 
372 	return printed;
373 }
374 
375 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
376 
377 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
378 						 struct syscall_arg *arg)
379 {
380 	size_t printed = 0;
381 	int mode = arg->val;
382 
383 	if (mode == F_OK) /* 0 */
384 		return scnprintf(bf, size, "F");
385 #define	P_MODE(n) \
386 	if (mode & n##_OK) { \
387 		printed += scnprintf(bf + printed, size - printed, "%s", #n); \
388 		mode &= ~n##_OK; \
389 	}
390 
391 	P_MODE(R);
392 	P_MODE(W);
393 	P_MODE(X);
394 #undef P_MODE
395 
396 	if (mode)
397 		printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
398 
399 	return printed;
400 }
401 
402 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
403 
404 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
405 					       struct syscall_arg *arg)
406 {
407 	int printed = 0, flags = arg->val;
408 
409 	if (!(flags & O_CREAT))
410 		arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
411 
412 	if (flags == 0)
413 		return scnprintf(bf, size, "RDONLY");
414 #define	P_FLAG(n) \
415 	if (flags & O_##n) { \
416 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
417 		flags &= ~O_##n; \
418 	}
419 
420 	P_FLAG(APPEND);
421 	P_FLAG(ASYNC);
422 	P_FLAG(CLOEXEC);
423 	P_FLAG(CREAT);
424 	P_FLAG(DIRECT);
425 	P_FLAG(DIRECTORY);
426 	P_FLAG(EXCL);
427 	P_FLAG(LARGEFILE);
428 	P_FLAG(NOATIME);
429 	P_FLAG(NOCTTY);
430 #ifdef O_NONBLOCK
431 	P_FLAG(NONBLOCK);
432 #elif O_NDELAY
433 	P_FLAG(NDELAY);
434 #endif
435 #ifdef O_PATH
436 	P_FLAG(PATH);
437 #endif
438 	P_FLAG(RDWR);
439 #ifdef O_DSYNC
440 	if ((flags & O_SYNC) == O_SYNC)
441 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
442 	else {
443 		P_FLAG(DSYNC);
444 	}
445 #else
446 	P_FLAG(SYNC);
447 #endif
448 	P_FLAG(TRUNC);
449 	P_FLAG(WRONLY);
450 #undef P_FLAG
451 
452 	if (flags)
453 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
454 
455 	return printed;
456 }
457 
458 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
459 
460 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
461 						   struct syscall_arg *arg)
462 {
463 	int printed = 0, flags = arg->val;
464 
465 	if (flags == 0)
466 		return scnprintf(bf, size, "NONE");
467 #define	P_FLAG(n) \
468 	if (flags & EFD_##n) { \
469 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
470 		flags &= ~EFD_##n; \
471 	}
472 
473 	P_FLAG(SEMAPHORE);
474 	P_FLAG(CLOEXEC);
475 	P_FLAG(NONBLOCK);
476 #undef P_FLAG
477 
478 	if (flags)
479 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
480 
481 	return printed;
482 }
483 
484 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
485 
486 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
487 {
488 	int sig = arg->val;
489 
490 	switch (sig) {
491 #define	P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
492 	P_SIGNUM(HUP);
493 	P_SIGNUM(INT);
494 	P_SIGNUM(QUIT);
495 	P_SIGNUM(ILL);
496 	P_SIGNUM(TRAP);
497 	P_SIGNUM(ABRT);
498 	P_SIGNUM(BUS);
499 	P_SIGNUM(FPE);
500 	P_SIGNUM(KILL);
501 	P_SIGNUM(USR1);
502 	P_SIGNUM(SEGV);
503 	P_SIGNUM(USR2);
504 	P_SIGNUM(PIPE);
505 	P_SIGNUM(ALRM);
506 	P_SIGNUM(TERM);
507 	P_SIGNUM(STKFLT);
508 	P_SIGNUM(CHLD);
509 	P_SIGNUM(CONT);
510 	P_SIGNUM(STOP);
511 	P_SIGNUM(TSTP);
512 	P_SIGNUM(TTIN);
513 	P_SIGNUM(TTOU);
514 	P_SIGNUM(URG);
515 	P_SIGNUM(XCPU);
516 	P_SIGNUM(XFSZ);
517 	P_SIGNUM(VTALRM);
518 	P_SIGNUM(PROF);
519 	P_SIGNUM(WINCH);
520 	P_SIGNUM(IO);
521 	P_SIGNUM(PWR);
522 	P_SIGNUM(SYS);
523 	default: break;
524 	}
525 
526 	return scnprintf(bf, size, "%#x", sig);
527 }
528 
529 #define SCA_SIGNUM syscall_arg__scnprintf_signum
530 
531 static struct syscall_fmt {
532 	const char *name;
533 	const char *alias;
534 	size_t	   (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
535 	void	   *arg_parm[6];
536 	bool	   errmsg;
537 	bool	   timeout;
538 	bool	   hexret;
539 } syscall_fmts[] = {
540 	{ .name	    = "access",	    .errmsg = true,
541 	  .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
542 	{ .name	    = "arch_prctl", .errmsg = true, .alias = "prctl", },
543 	{ .name	    = "brk",	    .hexret = true,
544 	  .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
545 	{ .name	    = "connect",    .errmsg = true, },
546 	{ .name	    = "eventfd2",   .errmsg = true,
547 	  .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
548 	{ .name	    = "fcntl",	    .errmsg = true,
549 	  .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
550 	  .arg_parm	 = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
551 	{ .name	    = "fstat",	    .errmsg = true, .alias = "newfstat", },
552 	{ .name	    = "fstatat",    .errmsg = true, .alias = "newfstatat", },
553 	{ .name	    = "futex",	    .errmsg = true,
554 	  .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
555 	{ .name	    = "getitimer",  .errmsg = true,
556 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ },
557 	  .arg_parm	 = { [0] = &strarray__itimers, /* which */ }, },
558 	{ .name	    = "getrlimit",  .errmsg = true,
559 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* resource */ },
560 	  .arg_parm	 = { [0] = &strarray__rlimit_resources, /* resource */ }, },
561 	{ .name	    = "ioctl",	    .errmsg = true,
562 	  .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, },
563 	{ .name	    = "kill",	    .errmsg = true,
564 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
565 	{ .name	    = "lseek",	    .errmsg = true,
566 	  .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
567 	  .arg_parm	 = { [2] = &strarray__whences, /* whence */ }, },
568 	{ .name	    = "lstat",	    .errmsg = true, .alias = "newlstat", },
569 	{ .name     = "madvise",    .errmsg = true,
570 	  .arg_scnprintf = { [0] = SCA_HEX,	 /* start */
571 			     [2] = SCA_MADV_BHV, /* behavior */ }, },
572 	{ .name	    = "mmap",	    .hexret = true,
573 	  .arg_scnprintf = { [0] = SCA_HEX,	  /* addr */
574 			     [2] = SCA_MMAP_PROT, /* prot */
575 			     [3] = SCA_MMAP_FLAGS, /* flags */ }, },
576 	{ .name	    = "mprotect",   .errmsg = true,
577 	  .arg_scnprintf = { [0] = SCA_HEX, /* start */
578 			     [2] = SCA_MMAP_PROT, /* prot */ }, },
579 	{ .name	    = "mremap",	    .hexret = true,
580 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */
581 			     [4] = SCA_HEX, /* new_addr */ }, },
582 	{ .name	    = "munmap",	    .errmsg = true,
583 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
584 	{ .name	    = "open",	    .errmsg = true,
585 	  .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
586 	{ .name	    = "open_by_handle_at", .errmsg = true,
587 	  .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
588 	{ .name	    = "openat",	    .errmsg = true,
589 	  .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
590 	{ .name	    = "poll",	    .errmsg = true, .timeout = true, },
591 	{ .name	    = "ppoll",	    .errmsg = true, .timeout = true, },
592 	{ .name	    = "pread",	    .errmsg = true, .alias = "pread64", },
593 	{ .name	    = "prlimit64",  .errmsg = true,
594 	  .arg_scnprintf = { [1] = SCA_STRARRAY, /* resource */ },
595 	  .arg_parm	 = { [1] = &strarray__rlimit_resources, /* resource */ }, },
596 	{ .name	    = "pwrite",	    .errmsg = true, .alias = "pwrite64", },
597 	{ .name	    = "read",	    .errmsg = true, },
598 	{ .name	    = "recvfrom",   .errmsg = true,
599 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
600 	{ .name	    = "recvmmsg",   .errmsg = true,
601 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
602 	{ .name	    = "recvmsg",    .errmsg = true,
603 	  .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
604 	{ .name	    = "rt_sigaction", .errmsg = true,
605 	  .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
606 	{ .name	    = "rt_sigprocmask", .errmsg = true,
607 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* how */ },
608 	  .arg_parm	 = { [0] = &strarray__sighow, /* how */ }, },
609 	{ .name	    = "rt_sigqueueinfo", .errmsg = true,
610 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
611 	{ .name	    = "rt_tgsigqueueinfo", .errmsg = true,
612 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
613 	{ .name	    = "select",	    .errmsg = true, .timeout = true, },
614 	{ .name	    = "sendmmsg",    .errmsg = true,
615 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
616 	{ .name	    = "sendmsg",    .errmsg = true,
617 	  .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
618 	{ .name	    = "sendto",	    .errmsg = true,
619 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
620 	{ .name	    = "setitimer",  .errmsg = true,
621 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ },
622 	  .arg_parm	 = { [0] = &strarray__itimers, /* which */ }, },
623 	{ .name	    = "setrlimit",  .errmsg = true,
624 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* resource */ },
625 	  .arg_parm	 = { [0] = &strarray__rlimit_resources, /* resource */ }, },
626 	{ .name	    = "socket",	    .errmsg = true,
627 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
628 			     [1] = SCA_SK_TYPE, /* type */ },
629 	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
630 	{ .name	    = "stat",	    .errmsg = true, .alias = "newstat", },
631 	{ .name	    = "tgkill",	    .errmsg = true,
632 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
633 	{ .name	    = "tkill",	    .errmsg = true,
634 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
635 	{ .name	    = "uname",	    .errmsg = true, .alias = "newuname", },
636 };
637 
638 static int syscall_fmt__cmp(const void *name, const void *fmtp)
639 {
640 	const struct syscall_fmt *fmt = fmtp;
641 	return strcmp(name, fmt->name);
642 }
643 
644 static struct syscall_fmt *syscall_fmt__find(const char *name)
645 {
646 	const int nmemb = ARRAY_SIZE(syscall_fmts);
647 	return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
648 }
649 
650 struct syscall {
651 	struct event_format *tp_format;
652 	const char	    *name;
653 	bool		    filtered;
654 	struct syscall_fmt  *fmt;
655 	size_t		    (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
656 	void		    **arg_parm;
657 };
658 
659 static size_t fprintf_duration(unsigned long t, FILE *fp)
660 {
661 	double duration = (double)t / NSEC_PER_MSEC;
662 	size_t printed = fprintf(fp, "(");
663 
664 	if (duration >= 1.0)
665 		printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
666 	else if (duration >= 0.01)
667 		printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
668 	else
669 		printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
670 	return printed + fprintf(fp, "): ");
671 }
672 
673 struct thread_trace {
674 	u64		  entry_time;
675 	u64		  exit_time;
676 	bool		  entry_pending;
677 	unsigned long	  nr_events;
678 	char		  *entry_str;
679 	double		  runtime_ms;
680 };
681 
682 static struct thread_trace *thread_trace__new(void)
683 {
684 	return zalloc(sizeof(struct thread_trace));
685 }
686 
687 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
688 {
689 	struct thread_trace *ttrace;
690 
691 	if (thread == NULL)
692 		goto fail;
693 
694 	if (thread->priv == NULL)
695 		thread->priv = thread_trace__new();
696 
697 	if (thread->priv == NULL)
698 		goto fail;
699 
700 	ttrace = thread->priv;
701 	++ttrace->nr_events;
702 
703 	return ttrace;
704 fail:
705 	color_fprintf(fp, PERF_COLOR_RED,
706 		      "WARNING: not enough memory, dropping samples!\n");
707 	return NULL;
708 }
709 
710 struct trace {
711 	struct perf_tool	tool;
712 	int			audit_machine;
713 	struct {
714 		int		max;
715 		struct syscall  *table;
716 	} syscalls;
717 	struct perf_record_opts opts;
718 	struct machine		host;
719 	u64			base_time;
720 	bool			full_time;
721 	FILE			*output;
722 	unsigned long		nr_events;
723 	struct strlist		*ev_qualifier;
724 	bool			not_ev_qualifier;
725 	struct intlist		*tid_list;
726 	struct intlist		*pid_list;
727 	bool			sched;
728 	bool			multiple_threads;
729 	double			duration_filter;
730 	double			runtime_ms;
731 };
732 
733 static bool trace__filter_duration(struct trace *trace, double t)
734 {
735 	return t < (trace->duration_filter * NSEC_PER_MSEC);
736 }
737 
738 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
739 {
740 	double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
741 
742 	return fprintf(fp, "%10.3f ", ts);
743 }
744 
745 static bool done = false;
746 
747 static void sig_handler(int sig __maybe_unused)
748 {
749 	done = true;
750 }
751 
752 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
753 					u64 duration, u64 tstamp, FILE *fp)
754 {
755 	size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
756 	printed += fprintf_duration(duration, fp);
757 
758 	if (trace->multiple_threads)
759 		printed += fprintf(fp, "%d ", thread->tid);
760 
761 	return printed;
762 }
763 
764 static int trace__process_event(struct trace *trace, struct machine *machine,
765 				union perf_event *event)
766 {
767 	int ret = 0;
768 
769 	switch (event->header.type) {
770 	case PERF_RECORD_LOST:
771 		color_fprintf(trace->output, PERF_COLOR_RED,
772 			      "LOST %" PRIu64 " events!\n", event->lost.lost);
773 		ret = machine__process_lost_event(machine, event);
774 	default:
775 		ret = machine__process_event(machine, event);
776 		break;
777 	}
778 
779 	return ret;
780 }
781 
782 static int trace__tool_process(struct perf_tool *tool,
783 			       union perf_event *event,
784 			       struct perf_sample *sample __maybe_unused,
785 			       struct machine *machine)
786 {
787 	struct trace *trace = container_of(tool, struct trace, tool);
788 	return trace__process_event(trace, machine, event);
789 }
790 
791 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
792 {
793 	int err = symbol__init();
794 
795 	if (err)
796 		return err;
797 
798 	machine__init(&trace->host, "", HOST_KERNEL_ID);
799 	machine__create_kernel_maps(&trace->host);
800 
801 	if (perf_target__has_task(&trace->opts.target)) {
802 		err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
803 							trace__tool_process,
804 							&trace->host);
805 	} else {
806 		err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
807 						     &trace->host);
808 	}
809 
810 	if (err)
811 		symbol__exit();
812 
813 	return err;
814 }
815 
816 static int syscall__set_arg_fmts(struct syscall *sc)
817 {
818 	struct format_field *field;
819 	int idx = 0;
820 
821 	sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
822 	if (sc->arg_scnprintf == NULL)
823 		return -1;
824 
825 	if (sc->fmt)
826 		sc->arg_parm = sc->fmt->arg_parm;
827 
828 	for (field = sc->tp_format->format.fields->next; field; field = field->next) {
829 		if (sc->fmt && sc->fmt->arg_scnprintf[idx])
830 			sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
831 		else if (field->flags & FIELD_IS_POINTER)
832 			sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
833 		++idx;
834 	}
835 
836 	return 0;
837 }
838 
839 static int trace__read_syscall_info(struct trace *trace, int id)
840 {
841 	char tp_name[128];
842 	struct syscall *sc;
843 	const char *name = audit_syscall_to_name(id, trace->audit_machine);
844 
845 	if (name == NULL)
846 		return -1;
847 
848 	if (id > trace->syscalls.max) {
849 		struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
850 
851 		if (nsyscalls == NULL)
852 			return -1;
853 
854 		if (trace->syscalls.max != -1) {
855 			memset(nsyscalls + trace->syscalls.max + 1, 0,
856 			       (id - trace->syscalls.max) * sizeof(*sc));
857 		} else {
858 			memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
859 		}
860 
861 		trace->syscalls.table = nsyscalls;
862 		trace->syscalls.max   = id;
863 	}
864 
865 	sc = trace->syscalls.table + id;
866 	sc->name = name;
867 
868 	if (trace->ev_qualifier) {
869 		bool in = strlist__find(trace->ev_qualifier, name) != NULL;
870 
871 		if (!(in ^ trace->not_ev_qualifier)) {
872 			sc->filtered = true;
873 			/*
874 			 * No need to do read tracepoint information since this will be
875 			 * filtered out.
876 			 */
877 			return 0;
878 		}
879 	}
880 
881 	sc->fmt  = syscall_fmt__find(sc->name);
882 
883 	snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
884 	sc->tp_format = event_format__new("syscalls", tp_name);
885 
886 	if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
887 		snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
888 		sc->tp_format = event_format__new("syscalls", tp_name);
889 	}
890 
891 	if (sc->tp_format == NULL)
892 		return -1;
893 
894 	return syscall__set_arg_fmts(sc);
895 }
896 
897 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
898 				      unsigned long *args)
899 {
900 	size_t printed = 0;
901 
902 	if (sc->tp_format != NULL) {
903 		struct format_field *field;
904 		u8 bit = 1;
905 		struct syscall_arg arg = {
906 			.idx  = 0,
907 			.mask = 0,
908 		};
909 
910 		for (field = sc->tp_format->format.fields->next; field;
911 		     field = field->next, ++arg.idx, bit <<= 1) {
912 			if (arg.mask & bit)
913 				continue;
914 
915 			if (args[arg.idx] == 0)
916 				continue;
917 
918 			printed += scnprintf(bf + printed, size - printed,
919 					     "%s%s: ", printed ? ", " : "", field->name);
920 			if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
921 				arg.val = args[arg.idx];
922 				if (sc->arg_parm)
923 					arg.parm = sc->arg_parm[arg.idx];
924 				printed += sc->arg_scnprintf[arg.idx](bf + printed,
925 								      size - printed, &arg);
926 			} else {
927 				printed += scnprintf(bf + printed, size - printed,
928 						     "%ld", args[arg.idx]);
929 			}
930 		}
931 	} else {
932 		int i = 0;
933 
934 		while (i < 6) {
935 			printed += scnprintf(bf + printed, size - printed,
936 					     "%sarg%d: %ld",
937 					     printed ? ", " : "", i, args[i]);
938 			++i;
939 		}
940 	}
941 
942 	return printed;
943 }
944 
945 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
946 				  struct perf_sample *sample);
947 
948 static struct syscall *trace__syscall_info(struct trace *trace,
949 					   struct perf_evsel *evsel,
950 					   struct perf_sample *sample)
951 {
952 	int id = perf_evsel__intval(evsel, sample, "id");
953 
954 	if (id < 0) {
955 
956 		/*
957 		 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
958 		 * before that, leaving at a higher verbosity level till that is
959 		 * explained. Reproduced with plain ftrace with:
960 		 *
961 		 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
962 		 * grep "NR -1 " /t/trace_pipe
963 		 *
964 		 * After generating some load on the machine.
965  		 */
966 		if (verbose > 1) {
967 			static u64 n;
968 			fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
969 				id, perf_evsel__name(evsel), ++n);
970 		}
971 		return NULL;
972 	}
973 
974 	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
975 	    trace__read_syscall_info(trace, id))
976 		goto out_cant_read;
977 
978 	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
979 		goto out_cant_read;
980 
981 	return &trace->syscalls.table[id];
982 
983 out_cant_read:
984 	if (verbose) {
985 		fprintf(trace->output, "Problems reading syscall %d", id);
986 		if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
987 			fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
988 		fputs(" information\n", trace->output);
989 	}
990 	return NULL;
991 }
992 
993 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
994 			    struct perf_sample *sample)
995 {
996 	char *msg;
997 	void *args;
998 	size_t printed = 0;
999 	struct thread *thread;
1000 	struct syscall *sc = trace__syscall_info(trace, evsel, sample);
1001 	struct thread_trace *ttrace;
1002 
1003 	if (sc == NULL)
1004 		return -1;
1005 
1006 	if (sc->filtered)
1007 		return 0;
1008 
1009 	thread = machine__findnew_thread(&trace->host, sample->pid,
1010 					 sample->tid);
1011 	ttrace = thread__trace(thread, trace->output);
1012 	if (ttrace == NULL)
1013 		return -1;
1014 
1015 	args = perf_evsel__rawptr(evsel, sample, "args");
1016 	if (args == NULL) {
1017 		fprintf(trace->output, "Problems reading syscall arguments\n");
1018 		return -1;
1019 	}
1020 
1021 	ttrace = thread->priv;
1022 
1023 	if (ttrace->entry_str == NULL) {
1024 		ttrace->entry_str = malloc(1024);
1025 		if (!ttrace->entry_str)
1026 			return -1;
1027 	}
1028 
1029 	ttrace->entry_time = sample->time;
1030 	msg = ttrace->entry_str;
1031 	printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1032 
1033 	printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,  args);
1034 
1035 	if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1036 		if (!trace->duration_filter) {
1037 			trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1038 			fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1039 		}
1040 	} else
1041 		ttrace->entry_pending = true;
1042 
1043 	return 0;
1044 }
1045 
1046 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1047 			   struct perf_sample *sample)
1048 {
1049 	int ret;
1050 	u64 duration = 0;
1051 	struct thread *thread;
1052 	struct syscall *sc = trace__syscall_info(trace, evsel, sample);
1053 	struct thread_trace *ttrace;
1054 
1055 	if (sc == NULL)
1056 		return -1;
1057 
1058 	if (sc->filtered)
1059 		return 0;
1060 
1061 	thread = machine__findnew_thread(&trace->host, sample->pid,
1062 					 sample->tid);
1063 	ttrace = thread__trace(thread, trace->output);
1064 	if (ttrace == NULL)
1065 		return -1;
1066 
1067 	ret = perf_evsel__intval(evsel, sample, "ret");
1068 
1069 	ttrace = thread->priv;
1070 
1071 	ttrace->exit_time = sample->time;
1072 
1073 	if (ttrace->entry_time) {
1074 		duration = sample->time - ttrace->entry_time;
1075 		if (trace__filter_duration(trace, duration))
1076 			goto out;
1077 	} else if (trace->duration_filter)
1078 		goto out;
1079 
1080 	trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1081 
1082 	if (ttrace->entry_pending) {
1083 		fprintf(trace->output, "%-70s", ttrace->entry_str);
1084 	} else {
1085 		fprintf(trace->output, " ... [");
1086 		color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1087 		fprintf(trace->output, "]: %s()", sc->name);
1088 	}
1089 
1090 	if (sc->fmt == NULL) {
1091 signed_print:
1092 		fprintf(trace->output, ") = %d", ret);
1093 	} else if (ret < 0 && sc->fmt->errmsg) {
1094 		char bf[256];
1095 		const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1096 			   *e = audit_errno_to_name(-ret);
1097 
1098 		fprintf(trace->output, ") = -1 %s %s", e, emsg);
1099 	} else if (ret == 0 && sc->fmt->timeout)
1100 		fprintf(trace->output, ") = 0 Timeout");
1101 	else if (sc->fmt->hexret)
1102 		fprintf(trace->output, ") = %#x", ret);
1103 	else
1104 		goto signed_print;
1105 
1106 	fputc('\n', trace->output);
1107 out:
1108 	ttrace->entry_pending = false;
1109 
1110 	return 0;
1111 }
1112 
1113 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1114 				     struct perf_sample *sample)
1115 {
1116         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1117 	double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1118 	struct thread *thread = machine__findnew_thread(&trace->host,
1119 							sample->pid,
1120 							sample->tid);
1121 	struct thread_trace *ttrace = thread__trace(thread, trace->output);
1122 
1123 	if (ttrace == NULL)
1124 		goto out_dump;
1125 
1126 	ttrace->runtime_ms += runtime_ms;
1127 	trace->runtime_ms += runtime_ms;
1128 	return 0;
1129 
1130 out_dump:
1131 	fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1132 	       evsel->name,
1133 	       perf_evsel__strval(evsel, sample, "comm"),
1134 	       (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1135 	       runtime,
1136 	       perf_evsel__intval(evsel, sample, "vruntime"));
1137 	return 0;
1138 }
1139 
1140 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1141 {
1142 	if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1143 	    (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1144 		return false;
1145 
1146 	if (trace->pid_list || trace->tid_list)
1147 		return true;
1148 
1149 	return false;
1150 }
1151 
1152 static int trace__process_sample(struct perf_tool *tool,
1153 				 union perf_event *event __maybe_unused,
1154 				 struct perf_sample *sample,
1155 				 struct perf_evsel *evsel,
1156 				 struct machine *machine __maybe_unused)
1157 {
1158 	struct trace *trace = container_of(tool, struct trace, tool);
1159 	int err = 0;
1160 
1161 	tracepoint_handler handler = evsel->handler.func;
1162 
1163 	if (skip_sample(trace, sample))
1164 		return 0;
1165 
1166 	if (!trace->full_time && trace->base_time == 0)
1167 		trace->base_time = sample->time;
1168 
1169 	if (handler)
1170 		handler(trace, evsel, sample);
1171 
1172 	return err;
1173 }
1174 
1175 static bool
1176 perf_session__has_tp(struct perf_session *session, const char *name)
1177 {
1178 	struct perf_evsel *evsel;
1179 
1180 	evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1181 
1182 	return evsel != NULL;
1183 }
1184 
1185 static int parse_target_str(struct trace *trace)
1186 {
1187 	if (trace->opts.target.pid) {
1188 		trace->pid_list = intlist__new(trace->opts.target.pid);
1189 		if (trace->pid_list == NULL) {
1190 			pr_err("Error parsing process id string\n");
1191 			return -EINVAL;
1192 		}
1193 	}
1194 
1195 	if (trace->opts.target.tid) {
1196 		trace->tid_list = intlist__new(trace->opts.target.tid);
1197 		if (trace->tid_list == NULL) {
1198 			pr_err("Error parsing thread id string\n");
1199 			return -EINVAL;
1200 		}
1201 	}
1202 
1203 	return 0;
1204 }
1205 
1206 static int trace__run(struct trace *trace, int argc, const char **argv)
1207 {
1208 	struct perf_evlist *evlist = perf_evlist__new();
1209 	struct perf_evsel *evsel;
1210 	int err = -1, i;
1211 	unsigned long before;
1212 	const bool forks = argc > 0;
1213 
1214 	if (evlist == NULL) {
1215 		fprintf(trace->output, "Not enough memory to run!\n");
1216 		goto out;
1217 	}
1218 
1219 	if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
1220 	    perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) {
1221 		fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n");
1222 		goto out_delete_evlist;
1223 	}
1224 
1225 	if (trace->sched &&
1226 	    perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1227 				   trace__sched_stat_runtime)) {
1228 		fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n");
1229 		goto out_delete_evlist;
1230 	}
1231 
1232 	err = perf_evlist__create_maps(evlist, &trace->opts.target);
1233 	if (err < 0) {
1234 		fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1235 		goto out_delete_evlist;
1236 	}
1237 
1238 	err = trace__symbols_init(trace, evlist);
1239 	if (err < 0) {
1240 		fprintf(trace->output, "Problems initializing symbol libraries!\n");
1241 		goto out_delete_maps;
1242 	}
1243 
1244 	perf_evlist__config(evlist, &trace->opts);
1245 
1246 	signal(SIGCHLD, sig_handler);
1247 	signal(SIGINT, sig_handler);
1248 
1249 	if (forks) {
1250 		err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1251 						    argv, false, false);
1252 		if (err < 0) {
1253 			fprintf(trace->output, "Couldn't run the workload!\n");
1254 			goto out_delete_maps;
1255 		}
1256 	}
1257 
1258 	err = perf_evlist__open(evlist);
1259 	if (err < 0) {
1260 		fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
1261 		goto out_delete_maps;
1262 	}
1263 
1264 	err = perf_evlist__mmap(evlist, UINT_MAX, false);
1265 	if (err < 0) {
1266 		fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1267 		goto out_close_evlist;
1268 	}
1269 
1270 	perf_evlist__enable(evlist);
1271 
1272 	if (forks)
1273 		perf_evlist__start_workload(evlist);
1274 
1275 	trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1276 again:
1277 	before = trace->nr_events;
1278 
1279 	for (i = 0; i < evlist->nr_mmaps; i++) {
1280 		union perf_event *event;
1281 
1282 		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1283 			const u32 type = event->header.type;
1284 			tracepoint_handler handler;
1285 			struct perf_sample sample;
1286 
1287 			++trace->nr_events;
1288 
1289 			err = perf_evlist__parse_sample(evlist, event, &sample);
1290 			if (err) {
1291 				fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1292 				continue;
1293 			}
1294 
1295 			if (!trace->full_time && trace->base_time == 0)
1296 				trace->base_time = sample.time;
1297 
1298 			if (type != PERF_RECORD_SAMPLE) {
1299 				trace__process_event(trace, &trace->host, event);
1300 				continue;
1301 			}
1302 
1303 			evsel = perf_evlist__id2evsel(evlist, sample.id);
1304 			if (evsel == NULL) {
1305 				fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1306 				continue;
1307 			}
1308 
1309 			if (sample.raw_data == NULL) {
1310 				fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1311 				       perf_evsel__name(evsel), sample.tid,
1312 				       sample.cpu, sample.raw_size);
1313 				continue;
1314 			}
1315 
1316 			handler = evsel->handler.func;
1317 			handler(trace, evsel, &sample);
1318 
1319 			if (done)
1320 				goto out_unmap_evlist;
1321 		}
1322 	}
1323 
1324 	if (trace->nr_events == before) {
1325 		if (done)
1326 			goto out_unmap_evlist;
1327 
1328 		poll(evlist->pollfd, evlist->nr_fds, -1);
1329 	}
1330 
1331 	if (done)
1332 		perf_evlist__disable(evlist);
1333 
1334 	goto again;
1335 
1336 out_unmap_evlist:
1337 	perf_evlist__munmap(evlist);
1338 out_close_evlist:
1339 	perf_evlist__close(evlist);
1340 out_delete_maps:
1341 	perf_evlist__delete_maps(evlist);
1342 out_delete_evlist:
1343 	perf_evlist__delete(evlist);
1344 out:
1345 	return err;
1346 }
1347 
1348 static int trace__replay(struct trace *trace)
1349 {
1350 	const struct perf_evsel_str_handler handlers[] = {
1351 		{ "raw_syscalls:sys_enter",  trace__sys_enter, },
1352 		{ "raw_syscalls:sys_exit",   trace__sys_exit, },
1353 	};
1354 
1355 	struct perf_session *session;
1356 	int err = -1;
1357 
1358 	trace->tool.sample	  = trace__process_sample;
1359 	trace->tool.mmap	  = perf_event__process_mmap;
1360 	trace->tool.mmap2	  = perf_event__process_mmap2;
1361 	trace->tool.comm	  = perf_event__process_comm;
1362 	trace->tool.exit	  = perf_event__process_exit;
1363 	trace->tool.fork	  = perf_event__process_fork;
1364 	trace->tool.attr	  = perf_event__process_attr;
1365 	trace->tool.tracing_data = perf_event__process_tracing_data;
1366 	trace->tool.build_id	  = perf_event__process_build_id;
1367 
1368 	trace->tool.ordered_samples = true;
1369 	trace->tool.ordering_requires_timestamps = true;
1370 
1371 	/* add tid to output */
1372 	trace->multiple_threads = true;
1373 
1374 	if (symbol__init() < 0)
1375 		return -1;
1376 
1377 	session = perf_session__new(input_name, O_RDONLY, 0, false,
1378 				    &trace->tool);
1379 	if (session == NULL)
1380 		return -ENOMEM;
1381 
1382 	err = perf_session__set_tracepoints_handlers(session, handlers);
1383 	if (err)
1384 		goto out;
1385 
1386 	if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
1387 		pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1388 		goto out;
1389 	}
1390 
1391 	if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
1392 		pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1393 		goto out;
1394 	}
1395 
1396 	err = parse_target_str(trace);
1397 	if (err != 0)
1398 		goto out;
1399 
1400 	setup_pager();
1401 
1402 	err = perf_session__process_events(session, &trace->tool);
1403 	if (err)
1404 		pr_err("Failed to process events, error %d", err);
1405 
1406 out:
1407 	perf_session__delete(session);
1408 
1409 	return err;
1410 }
1411 
1412 static size_t trace__fprintf_threads_header(FILE *fp)
1413 {
1414 	size_t printed;
1415 
1416 	printed  = fprintf(fp, "\n _____________________________________________________________________\n");
1417 	printed += fprintf(fp," __)    Summary of events    (__\n\n");
1418 	printed += fprintf(fp,"              [ task - pid ]     [ events ] [ ratio ]  [ runtime ]\n");
1419 	printed += fprintf(fp," _____________________________________________________________________\n\n");
1420 
1421 	return printed;
1422 }
1423 
1424 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
1425 {
1426 	size_t printed = trace__fprintf_threads_header(fp);
1427 	struct rb_node *nd;
1428 
1429 	for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) {
1430 		struct thread *thread = rb_entry(nd, struct thread, rb_node);
1431 		struct thread_trace *ttrace = thread->priv;
1432 		const char *color;
1433 		double ratio;
1434 
1435 		if (ttrace == NULL)
1436 			continue;
1437 
1438 		ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
1439 
1440 		color = PERF_COLOR_NORMAL;
1441 		if (ratio > 50.0)
1442 			color = PERF_COLOR_RED;
1443 		else if (ratio > 25.0)
1444 			color = PERF_COLOR_GREEN;
1445 		else if (ratio > 5.0)
1446 			color = PERF_COLOR_YELLOW;
1447 
1448 		printed += color_fprintf(fp, color, "%20s", thread->comm);
1449 		printed += fprintf(fp, " - %-5d :%11lu   [", thread->tid, ttrace->nr_events);
1450 		printed += color_fprintf(fp, color, "%5.1f%%", ratio);
1451 		printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
1452 	}
1453 
1454 	return printed;
1455 }
1456 
1457 static int trace__set_duration(const struct option *opt, const char *str,
1458 			       int unset __maybe_unused)
1459 {
1460 	struct trace *trace = opt->value;
1461 
1462 	trace->duration_filter = atof(str);
1463 	return 0;
1464 }
1465 
1466 static int trace__open_output(struct trace *trace, const char *filename)
1467 {
1468 	struct stat st;
1469 
1470 	if (!stat(filename, &st) && st.st_size) {
1471 		char oldname[PATH_MAX];
1472 
1473 		scnprintf(oldname, sizeof(oldname), "%s.old", filename);
1474 		unlink(oldname);
1475 		rename(filename, oldname);
1476 	}
1477 
1478 	trace->output = fopen(filename, "w");
1479 
1480 	return trace->output == NULL ? -errno : 0;
1481 }
1482 
1483 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
1484 {
1485 	const char * const trace_usage[] = {
1486 		"perf trace [<options>] [<command>]",
1487 		"perf trace [<options>] -- <command> [<options>]",
1488 		NULL
1489 	};
1490 	struct trace trace = {
1491 		.audit_machine = audit_detect_machine(),
1492 		.syscalls = {
1493 			. max = -1,
1494 		},
1495 		.opts = {
1496 			.target = {
1497 				.uid	   = UINT_MAX,
1498 				.uses_mmap = true,
1499 			},
1500 			.user_freq     = UINT_MAX,
1501 			.user_interval = ULLONG_MAX,
1502 			.no_delay      = true,
1503 			.mmap_pages    = 1024,
1504 		},
1505 		.output = stdout,
1506 	};
1507 	const char *output_name = NULL;
1508 	const char *ev_qualifier_str = NULL;
1509 	const struct option trace_options[] = {
1510 	OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
1511 		    "list of events to trace"),
1512 	OPT_STRING('o', "output", &output_name, "file", "output file name"),
1513 	OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
1514 	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
1515 		    "trace events on existing process id"),
1516 	OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
1517 		    "trace events on existing thread id"),
1518 	OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
1519 		    "system-wide collection from all CPUs"),
1520 	OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
1521 		    "list of cpus to monitor"),
1522 	OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
1523 		    "child tasks do not inherit counters"),
1524 	OPT_UINTEGER('m', "mmap-pages", &trace.opts.mmap_pages,
1525 		     "number of mmap data pages"),
1526 	OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
1527 		   "user to profile"),
1528 	OPT_CALLBACK(0, "duration", &trace, "float",
1529 		     "show only events with duration > N.M ms",
1530 		     trace__set_duration),
1531 	OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
1532 	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
1533 	OPT_BOOLEAN('T', "time", &trace.full_time,
1534 		    "Show full timestamp, not time relative to first start"),
1535 	OPT_END()
1536 	};
1537 	int err;
1538 	char bf[BUFSIZ];
1539 
1540 	argc = parse_options(argc, argv, trace_options, trace_usage, 0);
1541 
1542 	if (output_name != NULL) {
1543 		err = trace__open_output(&trace, output_name);
1544 		if (err < 0) {
1545 			perror("failed to create output file");
1546 			goto out;
1547 		}
1548 	}
1549 
1550 	if (ev_qualifier_str != NULL) {
1551 		const char *s = ev_qualifier_str;
1552 
1553 		trace.not_ev_qualifier = *s == '!';
1554 		if (trace.not_ev_qualifier)
1555 			++s;
1556 		trace.ev_qualifier = strlist__new(true, s);
1557 		if (trace.ev_qualifier == NULL) {
1558 			fputs("Not enough memory to parse event qualifier",
1559 			      trace.output);
1560 			err = -ENOMEM;
1561 			goto out_close;
1562 		}
1563 	}
1564 
1565 	err = perf_target__validate(&trace.opts.target);
1566 	if (err) {
1567 		perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1568 		fprintf(trace.output, "%s", bf);
1569 		goto out_close;
1570 	}
1571 
1572 	err = perf_target__parse_uid(&trace.opts.target);
1573 	if (err) {
1574 		perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1575 		fprintf(trace.output, "%s", bf);
1576 		goto out_close;
1577 	}
1578 
1579 	if (!argc && perf_target__none(&trace.opts.target))
1580 		trace.opts.target.system_wide = true;
1581 
1582 	if (input_name)
1583 		err = trace__replay(&trace);
1584 	else
1585 		err = trace__run(&trace, argc, argv);
1586 
1587 	if (trace.sched && !err)
1588 		trace__fprintf_thread_summary(&trace, trace.output);
1589 
1590 out_close:
1591 	if (output_name != NULL)
1592 		fclose(trace.output);
1593 out:
1594 	return err;
1595 }
1596