xref: /linux/tools/perf/builtin-trace.c (revision 50c95cbd70808aa2e5ba8d79e503456f1da37aeb)
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 
14 #include <libaudit.h>
15 #include <stdlib.h>
16 #include <sys/eventfd.h>
17 #include <sys/mman.h>
18 #include <linux/futex.h>
19 
20 /* For older distros: */
21 #ifndef MAP_STACK
22 # define MAP_STACK		0x20000
23 #endif
24 
25 #ifndef MADV_HWPOISON
26 # define MADV_HWPOISON		100
27 #endif
28 
29 #ifndef MADV_MERGEABLE
30 # define MADV_MERGEABLE		12
31 #endif
32 
33 #ifndef MADV_UNMERGEABLE
34 # define MADV_UNMERGEABLE	13
35 #endif
36 
37 struct syscall_arg {
38 	unsigned long val;
39 	void	      *parm;
40 	u8	      idx;
41 	u8	      mask;
42 };
43 
44 struct strarray {
45 	int	    nr_entries;
46 	const char **entries;
47 };
48 
49 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
50 	.nr_entries = ARRAY_SIZE(array), \
51 	.entries = array, \
52 }
53 
54 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
55 					      struct syscall_arg *arg)
56 {
57 	int idx = arg->val;
58 	struct strarray *sa = arg->parm;
59 
60 	if (idx < 0 || idx >= sa->nr_entries)
61 		return scnprintf(bf, size, "%d", idx);
62 
63 	return scnprintf(bf, size, "%s", sa->entries[idx]);
64 }
65 
66 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
67 
68 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
69 					 struct syscall_arg *arg)
70 {
71 	return scnprintf(bf, size, "%#lx", arg->val);
72 }
73 
74 #define SCA_HEX syscall_arg__scnprintf_hex
75 
76 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
77 					       struct syscall_arg *arg)
78 {
79 	int printed = 0, prot = arg->val;
80 
81 	if (prot == PROT_NONE)
82 		return scnprintf(bf, size, "NONE");
83 #define	P_MMAP_PROT(n) \
84 	if (prot & PROT_##n) { \
85 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
86 		prot &= ~PROT_##n; \
87 	}
88 
89 	P_MMAP_PROT(EXEC);
90 	P_MMAP_PROT(READ);
91 	P_MMAP_PROT(WRITE);
92 #ifdef PROT_SEM
93 	P_MMAP_PROT(SEM);
94 #endif
95 	P_MMAP_PROT(GROWSDOWN);
96 	P_MMAP_PROT(GROWSUP);
97 #undef P_MMAP_PROT
98 
99 	if (prot)
100 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
101 
102 	return printed;
103 }
104 
105 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
106 
107 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
108 						struct syscall_arg *arg)
109 {
110 	int printed = 0, flags = arg->val;
111 
112 #define	P_MMAP_FLAG(n) \
113 	if (flags & MAP_##n) { \
114 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
115 		flags &= ~MAP_##n; \
116 	}
117 
118 	P_MMAP_FLAG(SHARED);
119 	P_MMAP_FLAG(PRIVATE);
120 #ifdef MAP_32BIT
121 	P_MMAP_FLAG(32BIT);
122 #endif
123 	P_MMAP_FLAG(ANONYMOUS);
124 	P_MMAP_FLAG(DENYWRITE);
125 	P_MMAP_FLAG(EXECUTABLE);
126 	P_MMAP_FLAG(FILE);
127 	P_MMAP_FLAG(FIXED);
128 	P_MMAP_FLAG(GROWSDOWN);
129 #ifdef MAP_HUGETLB
130 	P_MMAP_FLAG(HUGETLB);
131 #endif
132 	P_MMAP_FLAG(LOCKED);
133 	P_MMAP_FLAG(NONBLOCK);
134 	P_MMAP_FLAG(NORESERVE);
135 	P_MMAP_FLAG(POPULATE);
136 	P_MMAP_FLAG(STACK);
137 #ifdef MAP_UNINITIALIZED
138 	P_MMAP_FLAG(UNINITIALIZED);
139 #endif
140 #undef P_MMAP_FLAG
141 
142 	if (flags)
143 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
144 
145 	return printed;
146 }
147 
148 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
149 
150 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
151 						      struct syscall_arg *arg)
152 {
153 	int behavior = arg->val;
154 
155 	switch (behavior) {
156 #define	P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
157 	P_MADV_BHV(NORMAL);
158 	P_MADV_BHV(RANDOM);
159 	P_MADV_BHV(SEQUENTIAL);
160 	P_MADV_BHV(WILLNEED);
161 	P_MADV_BHV(DONTNEED);
162 	P_MADV_BHV(REMOVE);
163 	P_MADV_BHV(DONTFORK);
164 	P_MADV_BHV(DOFORK);
165 	P_MADV_BHV(HWPOISON);
166 #ifdef MADV_SOFT_OFFLINE
167 	P_MADV_BHV(SOFT_OFFLINE);
168 #endif
169 	P_MADV_BHV(MERGEABLE);
170 	P_MADV_BHV(UNMERGEABLE);
171 #ifdef MADV_HUGEPAGE
172 	P_MADV_BHV(HUGEPAGE);
173 #endif
174 #ifdef MADV_NOHUGEPAGE
175 	P_MADV_BHV(NOHUGEPAGE);
176 #endif
177 #ifdef MADV_DONTDUMP
178 	P_MADV_BHV(DONTDUMP);
179 #endif
180 #ifdef MADV_DODUMP
181 	P_MADV_BHV(DODUMP);
182 #endif
183 #undef P_MADV_PHV
184 	default: break;
185 	}
186 
187 	return scnprintf(bf, size, "%#x", behavior);
188 }
189 
190 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
191 
192 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
193 {
194 	enum syscall_futex_args {
195 		SCF_UADDR   = (1 << 0),
196 		SCF_OP	    = (1 << 1),
197 		SCF_VAL	    = (1 << 2),
198 		SCF_TIMEOUT = (1 << 3),
199 		SCF_UADDR2  = (1 << 4),
200 		SCF_VAL3    = (1 << 5),
201 	};
202 	int op = arg->val;
203 	int cmd = op & FUTEX_CMD_MASK;
204 	size_t printed = 0;
205 
206 	switch (cmd) {
207 #define	P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
208 	P_FUTEX_OP(WAIT);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
209 	P_FUTEX_OP(WAKE);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
210 	P_FUTEX_OP(FD);		    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
211 	P_FUTEX_OP(REQUEUE);	    arg->mask |= SCF_VAL3|SCF_TIMEOUT;	          break;
212 	P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;			  break;
213 	P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;			  break;
214 	P_FUTEX_OP(WAKE_OP);							  break;
215 	P_FUTEX_OP(LOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
216 	P_FUTEX_OP(UNLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
217 	P_FUTEX_OP(TRYLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
218 	P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;			  break;
219 	P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;			  break;
220 	P_FUTEX_OP(WAIT_REQUEUE_PI);						  break;
221 	default: printed = scnprintf(bf, size, "%#x", cmd);			  break;
222 	}
223 
224 	if (op & FUTEX_PRIVATE_FLAG)
225 		printed += scnprintf(bf + printed, size - printed, "|PRIV");
226 
227 	if (op & FUTEX_CLOCK_REALTIME)
228 		printed += scnprintf(bf + printed, size - printed, "|CLKRT");
229 
230 	return printed;
231 }
232 
233 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
234 
235 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
236 static DEFINE_STRARRAY(itimers);
237 
238 static const char *whences[] = { "SET", "CUR", "END",
239 #ifdef SEEK_DATA
240 "DATA",
241 #endif
242 #ifdef SEEK_HOLE
243 "HOLE",
244 #endif
245 };
246 static DEFINE_STRARRAY(whences);
247 
248 static const char *fcntl_cmds[] = {
249 	"DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
250 	"SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
251 	"F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
252 	"F_GETOWNER_UIDS",
253 };
254 static DEFINE_STRARRAY(fcntl_cmds);
255 
256 static const char *rlimit_resources[] = {
257 	"CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
258 	"MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
259 	"RTTIME",
260 };
261 static DEFINE_STRARRAY(rlimit_resources);
262 
263 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
264 static DEFINE_STRARRAY(sighow);
265 
266 static const char *socket_families[] = {
267 	"UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
268 	"BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
269 	"SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
270 	"RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
271 	"BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
272 	"ALG", "NFC", "VSOCK",
273 };
274 static DEFINE_STRARRAY(socket_families);
275 
276 #ifndef SOCK_TYPE_MASK
277 #define SOCK_TYPE_MASK 0xf
278 #endif
279 
280 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
281 						      struct syscall_arg *arg)
282 {
283 	size_t printed;
284 	int type = arg->val,
285 	    flags = type & ~SOCK_TYPE_MASK;
286 
287 	type &= SOCK_TYPE_MASK;
288 	/*
289  	 * Can't use a strarray, MIPS may override for ABI reasons.
290  	 */
291 	switch (type) {
292 #define	P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
293 	P_SK_TYPE(STREAM);
294 	P_SK_TYPE(DGRAM);
295 	P_SK_TYPE(RAW);
296 	P_SK_TYPE(RDM);
297 	P_SK_TYPE(SEQPACKET);
298 	P_SK_TYPE(DCCP);
299 	P_SK_TYPE(PACKET);
300 #undef P_SK_TYPE
301 	default:
302 		printed = scnprintf(bf, size, "%#x", type);
303 	}
304 
305 #define	P_SK_FLAG(n) \
306 	if (flags & SOCK_##n) { \
307 		printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
308 		flags &= ~SOCK_##n; \
309 	}
310 
311 	P_SK_FLAG(CLOEXEC);
312 	P_SK_FLAG(NONBLOCK);
313 #undef P_SK_FLAG
314 
315 	if (flags)
316 		printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
317 
318 	return printed;
319 }
320 
321 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
322 
323 #ifndef MSG_PROBE
324 #define MSG_PROBE	     0x10
325 #endif
326 #ifndef MSG_SENDPAGE_NOTLAST
327 #define MSG_SENDPAGE_NOTLAST 0x20000
328 #endif
329 #ifndef MSG_FASTOPEN
330 #define MSG_FASTOPEN	     0x20000000
331 #endif
332 
333 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
334 					       struct syscall_arg *arg)
335 {
336 	int printed = 0, flags = arg->val;
337 
338 	if (flags == 0)
339 		return scnprintf(bf, size, "NONE");
340 #define	P_MSG_FLAG(n) \
341 	if (flags & MSG_##n) { \
342 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
343 		flags &= ~MSG_##n; \
344 	}
345 
346 	P_MSG_FLAG(OOB);
347 	P_MSG_FLAG(PEEK);
348 	P_MSG_FLAG(DONTROUTE);
349 	P_MSG_FLAG(TRYHARD);
350 	P_MSG_FLAG(CTRUNC);
351 	P_MSG_FLAG(PROBE);
352 	P_MSG_FLAG(TRUNC);
353 	P_MSG_FLAG(DONTWAIT);
354 	P_MSG_FLAG(EOR);
355 	P_MSG_FLAG(WAITALL);
356 	P_MSG_FLAG(FIN);
357 	P_MSG_FLAG(SYN);
358 	P_MSG_FLAG(CONFIRM);
359 	P_MSG_FLAG(RST);
360 	P_MSG_FLAG(ERRQUEUE);
361 	P_MSG_FLAG(NOSIGNAL);
362 	P_MSG_FLAG(MORE);
363 	P_MSG_FLAG(WAITFORONE);
364 	P_MSG_FLAG(SENDPAGE_NOTLAST);
365 	P_MSG_FLAG(FASTOPEN);
366 	P_MSG_FLAG(CMSG_CLOEXEC);
367 #undef P_MSG_FLAG
368 
369 	if (flags)
370 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
371 
372 	return printed;
373 }
374 
375 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
376 
377 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
378 						 struct syscall_arg *arg)
379 {
380 	size_t printed = 0;
381 	int mode = arg->val;
382 
383 	if (mode == F_OK) /* 0 */
384 		return scnprintf(bf, size, "F");
385 #define	P_MODE(n) \
386 	if (mode & n##_OK) { \
387 		printed += scnprintf(bf + printed, size - printed, "%s", #n); \
388 		mode &= ~n##_OK; \
389 	}
390 
391 	P_MODE(R);
392 	P_MODE(W);
393 	P_MODE(X);
394 #undef P_MODE
395 
396 	if (mode)
397 		printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
398 
399 	return printed;
400 }
401 
402 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
403 
404 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
405 					       struct syscall_arg *arg)
406 {
407 	int printed = 0, flags = arg->val;
408 
409 	if (!(flags & O_CREAT))
410 		arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
411 
412 	if (flags == 0)
413 		return scnprintf(bf, size, "RDONLY");
414 #define	P_FLAG(n) \
415 	if (flags & O_##n) { \
416 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
417 		flags &= ~O_##n; \
418 	}
419 
420 	P_FLAG(APPEND);
421 	P_FLAG(ASYNC);
422 	P_FLAG(CLOEXEC);
423 	P_FLAG(CREAT);
424 	P_FLAG(DIRECT);
425 	P_FLAG(DIRECTORY);
426 	P_FLAG(EXCL);
427 	P_FLAG(LARGEFILE);
428 	P_FLAG(NOATIME);
429 	P_FLAG(NOCTTY);
430 #ifdef O_NONBLOCK
431 	P_FLAG(NONBLOCK);
432 #elif O_NDELAY
433 	P_FLAG(NDELAY);
434 #endif
435 #ifdef O_PATH
436 	P_FLAG(PATH);
437 #endif
438 	P_FLAG(RDWR);
439 #ifdef O_DSYNC
440 	if ((flags & O_SYNC) == O_SYNC)
441 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
442 	else {
443 		P_FLAG(DSYNC);
444 	}
445 #else
446 	P_FLAG(SYNC);
447 #endif
448 	P_FLAG(TRUNC);
449 	P_FLAG(WRONLY);
450 #undef P_FLAG
451 
452 	if (flags)
453 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
454 
455 	return printed;
456 }
457 
458 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
459 
460 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
461 						   struct syscall_arg *arg)
462 {
463 	int printed = 0, flags = arg->val;
464 
465 	if (flags == 0)
466 		return scnprintf(bf, size, "NONE");
467 #define	P_FLAG(n) \
468 	if (flags & EFD_##n) { \
469 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
470 		flags &= ~EFD_##n; \
471 	}
472 
473 	P_FLAG(SEMAPHORE);
474 	P_FLAG(CLOEXEC);
475 	P_FLAG(NONBLOCK);
476 #undef P_FLAG
477 
478 	if (flags)
479 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
480 
481 	return printed;
482 }
483 
484 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
485 
486 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
487 {
488 	int sig = arg->val;
489 
490 	switch (sig) {
491 #define	P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
492 	P_SIGNUM(HUP);
493 	P_SIGNUM(INT);
494 	P_SIGNUM(QUIT);
495 	P_SIGNUM(ILL);
496 	P_SIGNUM(TRAP);
497 	P_SIGNUM(ABRT);
498 	P_SIGNUM(BUS);
499 	P_SIGNUM(FPE);
500 	P_SIGNUM(KILL);
501 	P_SIGNUM(USR1);
502 	P_SIGNUM(SEGV);
503 	P_SIGNUM(USR2);
504 	P_SIGNUM(PIPE);
505 	P_SIGNUM(ALRM);
506 	P_SIGNUM(TERM);
507 	P_SIGNUM(STKFLT);
508 	P_SIGNUM(CHLD);
509 	P_SIGNUM(CONT);
510 	P_SIGNUM(STOP);
511 	P_SIGNUM(TSTP);
512 	P_SIGNUM(TTIN);
513 	P_SIGNUM(TTOU);
514 	P_SIGNUM(URG);
515 	P_SIGNUM(XCPU);
516 	P_SIGNUM(XFSZ);
517 	P_SIGNUM(VTALRM);
518 	P_SIGNUM(PROF);
519 	P_SIGNUM(WINCH);
520 	P_SIGNUM(IO);
521 	P_SIGNUM(PWR);
522 	P_SIGNUM(SYS);
523 	default: break;
524 	}
525 
526 	return scnprintf(bf, size, "%#x", sig);
527 }
528 
529 #define SCA_SIGNUM syscall_arg__scnprintf_signum
530 
531 static struct syscall_fmt {
532 	const char *name;
533 	const char *alias;
534 	size_t	   (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
535 	void	   *arg_parm[6];
536 	bool	   errmsg;
537 	bool	   timeout;
538 	bool	   hexret;
539 } syscall_fmts[] = {
540 	{ .name	    = "access",	    .errmsg = true,
541 	  .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
542 	{ .name	    = "arch_prctl", .errmsg = true, .alias = "prctl", },
543 	{ .name	    = "brk",	    .hexret = true,
544 	  .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
545 	{ .name	    = "connect",    .errmsg = true, },
546 	{ .name	    = "eventfd2",   .errmsg = true,
547 	  .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
548 	{ .name	    = "fcntl",	    .errmsg = true,
549 	  .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
550 	  .arg_parm	 = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
551 	{ .name	    = "fstat",	    .errmsg = true, .alias = "newfstat", },
552 	{ .name	    = "fstatat",    .errmsg = true, .alias = "newfstatat", },
553 	{ .name	    = "futex",	    .errmsg = true,
554 	  .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
555 	{ .name	    = "getitimer",  .errmsg = true,
556 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ },
557 	  .arg_parm	 = { [0] = &strarray__itimers, /* which */ }, },
558 	{ .name	    = "getrlimit",  .errmsg = true,
559 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* resource */ },
560 	  .arg_parm	 = { [0] = &strarray__rlimit_resources, /* resource */ }, },
561 	{ .name	    = "ioctl",	    .errmsg = true,
562 	  .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, },
563 	{ .name	    = "kill",	    .errmsg = true,
564 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
565 	{ .name	    = "lseek",	    .errmsg = true,
566 	  .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
567 	  .arg_parm	 = { [2] = &strarray__whences, /* whence */ }, },
568 	{ .name	    = "lstat",	    .errmsg = true, .alias = "newlstat", },
569 	{ .name     = "madvise",    .errmsg = true,
570 	  .arg_scnprintf = { [0] = SCA_HEX,	 /* start */
571 			     [2] = SCA_MADV_BHV, /* behavior */ }, },
572 	{ .name	    = "mmap",	    .hexret = true,
573 	  .arg_scnprintf = { [0] = SCA_HEX,	  /* addr */
574 			     [2] = SCA_MMAP_PROT, /* prot */
575 			     [3] = SCA_MMAP_FLAGS, /* flags */ }, },
576 	{ .name	    = "mprotect",   .errmsg = true,
577 	  .arg_scnprintf = { [0] = SCA_HEX, /* start */
578 			     [2] = SCA_MMAP_PROT, /* prot */ }, },
579 	{ .name	    = "mremap",	    .hexret = true,
580 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */
581 			     [4] = SCA_HEX, /* new_addr */ }, },
582 	{ .name	    = "munmap",	    .errmsg = true,
583 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
584 	{ .name	    = "open",	    .errmsg = true,
585 	  .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
586 	{ .name	    = "open_by_handle_at", .errmsg = true,
587 	  .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
588 	{ .name	    = "openat",	    .errmsg = true,
589 	  .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
590 	{ .name	    = "poll",	    .errmsg = true, .timeout = true, },
591 	{ .name	    = "ppoll",	    .errmsg = true, .timeout = true, },
592 	{ .name	    = "pread",	    .errmsg = true, .alias = "pread64", },
593 	{ .name	    = "prlimit64",  .errmsg = true,
594 	  .arg_scnprintf = { [1] = SCA_STRARRAY, /* resource */ },
595 	  .arg_parm	 = { [1] = &strarray__rlimit_resources, /* resource */ }, },
596 	{ .name	    = "pwrite",	    .errmsg = true, .alias = "pwrite64", },
597 	{ .name	    = "read",	    .errmsg = true, },
598 	{ .name	    = "recvfrom",   .errmsg = true,
599 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
600 	{ .name	    = "recvmmsg",   .errmsg = true,
601 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
602 	{ .name	    = "recvmsg",    .errmsg = true,
603 	  .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
604 	{ .name	    = "rt_sigaction", .errmsg = true,
605 	  .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
606 	{ .name	    = "rt_sigprocmask", .errmsg = true,
607 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* how */ },
608 	  .arg_parm	 = { [0] = &strarray__sighow, /* how */ }, },
609 	{ .name	    = "rt_sigqueueinfo", .errmsg = true,
610 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
611 	{ .name	    = "rt_tgsigqueueinfo", .errmsg = true,
612 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
613 	{ .name	    = "select",	    .errmsg = true, .timeout = true, },
614 	{ .name	    = "sendmmsg",    .errmsg = true,
615 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
616 	{ .name	    = "sendmsg",    .errmsg = true,
617 	  .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
618 	{ .name	    = "sendto",	    .errmsg = true,
619 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
620 	{ .name	    = "setitimer",  .errmsg = true,
621 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ },
622 	  .arg_parm	 = { [0] = &strarray__itimers, /* which */ }, },
623 	{ .name	    = "setrlimit",  .errmsg = true,
624 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* resource */ },
625 	  .arg_parm	 = { [0] = &strarray__rlimit_resources, /* resource */ }, },
626 	{ .name	    = "socket",	    .errmsg = true,
627 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
628 			     [1] = SCA_SK_TYPE, /* type */ },
629 	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
630 	{ .name	    = "stat",	    .errmsg = true, .alias = "newstat", },
631 	{ .name	    = "tgkill",	    .errmsg = true,
632 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
633 	{ .name	    = "tkill",	    .errmsg = true,
634 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
635 	{ .name	    = "uname",	    .errmsg = true, .alias = "newuname", },
636 };
637 
638 static int syscall_fmt__cmp(const void *name, const void *fmtp)
639 {
640 	const struct syscall_fmt *fmt = fmtp;
641 	return strcmp(name, fmt->name);
642 }
643 
644 static struct syscall_fmt *syscall_fmt__find(const char *name)
645 {
646 	const int nmemb = ARRAY_SIZE(syscall_fmts);
647 	return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
648 }
649 
650 struct syscall {
651 	struct event_format *tp_format;
652 	const char	    *name;
653 	bool		    filtered;
654 	struct syscall_fmt  *fmt;
655 	size_t		    (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
656 	void		    **arg_parm;
657 };
658 
659 static size_t fprintf_duration(unsigned long t, FILE *fp)
660 {
661 	double duration = (double)t / NSEC_PER_MSEC;
662 	size_t printed = fprintf(fp, "(");
663 
664 	if (duration >= 1.0)
665 		printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
666 	else if (duration >= 0.01)
667 		printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
668 	else
669 		printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
670 	return printed + fprintf(fp, "): ");
671 }
672 
673 struct thread_trace {
674 	u64		  entry_time;
675 	u64		  exit_time;
676 	bool		  entry_pending;
677 	unsigned long	  nr_events;
678 	char		  *entry_str;
679 	double		  runtime_ms;
680 };
681 
682 static struct thread_trace *thread_trace__new(void)
683 {
684 	return zalloc(sizeof(struct thread_trace));
685 }
686 
687 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
688 {
689 	struct thread_trace *ttrace;
690 
691 	if (thread == NULL)
692 		goto fail;
693 
694 	if (thread->priv == NULL)
695 		thread->priv = thread_trace__new();
696 
697 	if (thread->priv == NULL)
698 		goto fail;
699 
700 	ttrace = thread->priv;
701 	++ttrace->nr_events;
702 
703 	return ttrace;
704 fail:
705 	color_fprintf(fp, PERF_COLOR_RED,
706 		      "WARNING: not enough memory, dropping samples!\n");
707 	return NULL;
708 }
709 
710 struct trace {
711 	struct perf_tool	tool;
712 	int			audit_machine;
713 	struct {
714 		int		max;
715 		struct syscall  *table;
716 	} syscalls;
717 	struct perf_record_opts opts;
718 	struct machine		host;
719 	u64			base_time;
720 	bool			full_time;
721 	FILE			*output;
722 	unsigned long		nr_events;
723 	struct strlist		*ev_qualifier;
724 	bool			not_ev_qualifier;
725 	struct intlist		*tid_list;
726 	struct intlist		*pid_list;
727 	bool			sched;
728 	bool			multiple_threads;
729 	bool			show_comm;
730 	double			duration_filter;
731 	double			runtime_ms;
732 };
733 
734 static bool trace__filter_duration(struct trace *trace, double t)
735 {
736 	return t < (trace->duration_filter * NSEC_PER_MSEC);
737 }
738 
739 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
740 {
741 	double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
742 
743 	return fprintf(fp, "%10.3f ", ts);
744 }
745 
746 static bool done = false;
747 
748 static void sig_handler(int sig __maybe_unused)
749 {
750 	done = true;
751 }
752 
753 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
754 					u64 duration, u64 tstamp, FILE *fp)
755 {
756 	size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
757 	printed += fprintf_duration(duration, fp);
758 
759 	if (trace->multiple_threads) {
760 		if (trace->show_comm)
761 			printed += fprintf(fp, "%.14s/", thread->comm);
762 		printed += fprintf(fp, "%d ", thread->tid);
763 	}
764 
765 	return printed;
766 }
767 
768 static int trace__process_event(struct trace *trace, struct machine *machine,
769 				union perf_event *event)
770 {
771 	int ret = 0;
772 
773 	switch (event->header.type) {
774 	case PERF_RECORD_LOST:
775 		color_fprintf(trace->output, PERF_COLOR_RED,
776 			      "LOST %" PRIu64 " events!\n", event->lost.lost);
777 		ret = machine__process_lost_event(machine, event);
778 	default:
779 		ret = machine__process_event(machine, event);
780 		break;
781 	}
782 
783 	return ret;
784 }
785 
786 static int trace__tool_process(struct perf_tool *tool,
787 			       union perf_event *event,
788 			       struct perf_sample *sample __maybe_unused,
789 			       struct machine *machine)
790 {
791 	struct trace *trace = container_of(tool, struct trace, tool);
792 	return trace__process_event(trace, machine, event);
793 }
794 
795 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
796 {
797 	int err = symbol__init();
798 
799 	if (err)
800 		return err;
801 
802 	machine__init(&trace->host, "", HOST_KERNEL_ID);
803 	machine__create_kernel_maps(&trace->host);
804 
805 	if (perf_target__has_task(&trace->opts.target)) {
806 		err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
807 							trace__tool_process,
808 							&trace->host);
809 	} else {
810 		err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
811 						     &trace->host);
812 	}
813 
814 	if (err)
815 		symbol__exit();
816 
817 	return err;
818 }
819 
820 static int syscall__set_arg_fmts(struct syscall *sc)
821 {
822 	struct format_field *field;
823 	int idx = 0;
824 
825 	sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
826 	if (sc->arg_scnprintf == NULL)
827 		return -1;
828 
829 	if (sc->fmt)
830 		sc->arg_parm = sc->fmt->arg_parm;
831 
832 	for (field = sc->tp_format->format.fields->next; field; field = field->next) {
833 		if (sc->fmt && sc->fmt->arg_scnprintf[idx])
834 			sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
835 		else if (field->flags & FIELD_IS_POINTER)
836 			sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
837 		++idx;
838 	}
839 
840 	return 0;
841 }
842 
843 static int trace__read_syscall_info(struct trace *trace, int id)
844 {
845 	char tp_name[128];
846 	struct syscall *sc;
847 	const char *name = audit_syscall_to_name(id, trace->audit_machine);
848 
849 	if (name == NULL)
850 		return -1;
851 
852 	if (id > trace->syscalls.max) {
853 		struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
854 
855 		if (nsyscalls == NULL)
856 			return -1;
857 
858 		if (trace->syscalls.max != -1) {
859 			memset(nsyscalls + trace->syscalls.max + 1, 0,
860 			       (id - trace->syscalls.max) * sizeof(*sc));
861 		} else {
862 			memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
863 		}
864 
865 		trace->syscalls.table = nsyscalls;
866 		trace->syscalls.max   = id;
867 	}
868 
869 	sc = trace->syscalls.table + id;
870 	sc->name = name;
871 
872 	if (trace->ev_qualifier) {
873 		bool in = strlist__find(trace->ev_qualifier, name) != NULL;
874 
875 		if (!(in ^ trace->not_ev_qualifier)) {
876 			sc->filtered = true;
877 			/*
878 			 * No need to do read tracepoint information since this will be
879 			 * filtered out.
880 			 */
881 			return 0;
882 		}
883 	}
884 
885 	sc->fmt  = syscall_fmt__find(sc->name);
886 
887 	snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
888 	sc->tp_format = event_format__new("syscalls", tp_name);
889 
890 	if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
891 		snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
892 		sc->tp_format = event_format__new("syscalls", tp_name);
893 	}
894 
895 	if (sc->tp_format == NULL)
896 		return -1;
897 
898 	return syscall__set_arg_fmts(sc);
899 }
900 
901 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
902 				      unsigned long *args)
903 {
904 	size_t printed = 0;
905 
906 	if (sc->tp_format != NULL) {
907 		struct format_field *field;
908 		u8 bit = 1;
909 		struct syscall_arg arg = {
910 			.idx  = 0,
911 			.mask = 0,
912 		};
913 
914 		for (field = sc->tp_format->format.fields->next; field;
915 		     field = field->next, ++arg.idx, bit <<= 1) {
916 			if (arg.mask & bit)
917 				continue;
918 
919 			if (args[arg.idx] == 0)
920 				continue;
921 
922 			printed += scnprintf(bf + printed, size - printed,
923 					     "%s%s: ", printed ? ", " : "", field->name);
924 			if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
925 				arg.val = args[arg.idx];
926 				if (sc->arg_parm)
927 					arg.parm = sc->arg_parm[arg.idx];
928 				printed += sc->arg_scnprintf[arg.idx](bf + printed,
929 								      size - printed, &arg);
930 			} else {
931 				printed += scnprintf(bf + printed, size - printed,
932 						     "%ld", args[arg.idx]);
933 			}
934 		}
935 	} else {
936 		int i = 0;
937 
938 		while (i < 6) {
939 			printed += scnprintf(bf + printed, size - printed,
940 					     "%sarg%d: %ld",
941 					     printed ? ", " : "", i, args[i]);
942 			++i;
943 		}
944 	}
945 
946 	return printed;
947 }
948 
949 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
950 				  struct perf_sample *sample);
951 
952 static struct syscall *trace__syscall_info(struct trace *trace,
953 					   struct perf_evsel *evsel,
954 					   struct perf_sample *sample)
955 {
956 	int id = perf_evsel__intval(evsel, sample, "id");
957 
958 	if (id < 0) {
959 
960 		/*
961 		 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
962 		 * before that, leaving at a higher verbosity level till that is
963 		 * explained. Reproduced with plain ftrace with:
964 		 *
965 		 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
966 		 * grep "NR -1 " /t/trace_pipe
967 		 *
968 		 * After generating some load on the machine.
969  		 */
970 		if (verbose > 1) {
971 			static u64 n;
972 			fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
973 				id, perf_evsel__name(evsel), ++n);
974 		}
975 		return NULL;
976 	}
977 
978 	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
979 	    trace__read_syscall_info(trace, id))
980 		goto out_cant_read;
981 
982 	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
983 		goto out_cant_read;
984 
985 	return &trace->syscalls.table[id];
986 
987 out_cant_read:
988 	if (verbose) {
989 		fprintf(trace->output, "Problems reading syscall %d", id);
990 		if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
991 			fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
992 		fputs(" information\n", trace->output);
993 	}
994 	return NULL;
995 }
996 
997 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
998 			    struct perf_sample *sample)
999 {
1000 	char *msg;
1001 	void *args;
1002 	size_t printed = 0;
1003 	struct thread *thread;
1004 	struct syscall *sc = trace__syscall_info(trace, evsel, sample);
1005 	struct thread_trace *ttrace;
1006 
1007 	if (sc == NULL)
1008 		return -1;
1009 
1010 	if (sc->filtered)
1011 		return 0;
1012 
1013 	thread = machine__findnew_thread(&trace->host, sample->pid,
1014 					 sample->tid);
1015 	ttrace = thread__trace(thread, trace->output);
1016 	if (ttrace == NULL)
1017 		return -1;
1018 
1019 	args = perf_evsel__rawptr(evsel, sample, "args");
1020 	if (args == NULL) {
1021 		fprintf(trace->output, "Problems reading syscall arguments\n");
1022 		return -1;
1023 	}
1024 
1025 	ttrace = thread->priv;
1026 
1027 	if (ttrace->entry_str == NULL) {
1028 		ttrace->entry_str = malloc(1024);
1029 		if (!ttrace->entry_str)
1030 			return -1;
1031 	}
1032 
1033 	ttrace->entry_time = sample->time;
1034 	msg = ttrace->entry_str;
1035 	printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1036 
1037 	printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,  args);
1038 
1039 	if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1040 		if (!trace->duration_filter) {
1041 			trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1042 			fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1043 		}
1044 	} else
1045 		ttrace->entry_pending = true;
1046 
1047 	return 0;
1048 }
1049 
1050 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1051 			   struct perf_sample *sample)
1052 {
1053 	int ret;
1054 	u64 duration = 0;
1055 	struct thread *thread;
1056 	struct syscall *sc = trace__syscall_info(trace, evsel, sample);
1057 	struct thread_trace *ttrace;
1058 
1059 	if (sc == NULL)
1060 		return -1;
1061 
1062 	if (sc->filtered)
1063 		return 0;
1064 
1065 	thread = machine__findnew_thread(&trace->host, sample->pid,
1066 					 sample->tid);
1067 	ttrace = thread__trace(thread, trace->output);
1068 	if (ttrace == NULL)
1069 		return -1;
1070 
1071 	ret = perf_evsel__intval(evsel, sample, "ret");
1072 
1073 	ttrace = thread->priv;
1074 
1075 	ttrace->exit_time = sample->time;
1076 
1077 	if (ttrace->entry_time) {
1078 		duration = sample->time - ttrace->entry_time;
1079 		if (trace__filter_duration(trace, duration))
1080 			goto out;
1081 	} else if (trace->duration_filter)
1082 		goto out;
1083 
1084 	trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1085 
1086 	if (ttrace->entry_pending) {
1087 		fprintf(trace->output, "%-70s", ttrace->entry_str);
1088 	} else {
1089 		fprintf(trace->output, " ... [");
1090 		color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1091 		fprintf(trace->output, "]: %s()", sc->name);
1092 	}
1093 
1094 	if (sc->fmt == NULL) {
1095 signed_print:
1096 		fprintf(trace->output, ") = %d", ret);
1097 	} else if (ret < 0 && sc->fmt->errmsg) {
1098 		char bf[256];
1099 		const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1100 			   *e = audit_errno_to_name(-ret);
1101 
1102 		fprintf(trace->output, ") = -1 %s %s", e, emsg);
1103 	} else if (ret == 0 && sc->fmt->timeout)
1104 		fprintf(trace->output, ") = 0 Timeout");
1105 	else if (sc->fmt->hexret)
1106 		fprintf(trace->output, ") = %#x", ret);
1107 	else
1108 		goto signed_print;
1109 
1110 	fputc('\n', trace->output);
1111 out:
1112 	ttrace->entry_pending = false;
1113 
1114 	return 0;
1115 }
1116 
1117 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1118 				     struct perf_sample *sample)
1119 {
1120         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1121 	double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1122 	struct thread *thread = machine__findnew_thread(&trace->host,
1123 							sample->pid,
1124 							sample->tid);
1125 	struct thread_trace *ttrace = thread__trace(thread, trace->output);
1126 
1127 	if (ttrace == NULL)
1128 		goto out_dump;
1129 
1130 	ttrace->runtime_ms += runtime_ms;
1131 	trace->runtime_ms += runtime_ms;
1132 	return 0;
1133 
1134 out_dump:
1135 	fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1136 	       evsel->name,
1137 	       perf_evsel__strval(evsel, sample, "comm"),
1138 	       (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1139 	       runtime,
1140 	       perf_evsel__intval(evsel, sample, "vruntime"));
1141 	return 0;
1142 }
1143 
1144 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1145 {
1146 	if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1147 	    (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1148 		return false;
1149 
1150 	if (trace->pid_list || trace->tid_list)
1151 		return true;
1152 
1153 	return false;
1154 }
1155 
1156 static int trace__process_sample(struct perf_tool *tool,
1157 				 union perf_event *event __maybe_unused,
1158 				 struct perf_sample *sample,
1159 				 struct perf_evsel *evsel,
1160 				 struct machine *machine __maybe_unused)
1161 {
1162 	struct trace *trace = container_of(tool, struct trace, tool);
1163 	int err = 0;
1164 
1165 	tracepoint_handler handler = evsel->handler.func;
1166 
1167 	if (skip_sample(trace, sample))
1168 		return 0;
1169 
1170 	if (!trace->full_time && trace->base_time == 0)
1171 		trace->base_time = sample->time;
1172 
1173 	if (handler)
1174 		handler(trace, evsel, sample);
1175 
1176 	return err;
1177 }
1178 
1179 static bool
1180 perf_session__has_tp(struct perf_session *session, const char *name)
1181 {
1182 	struct perf_evsel *evsel;
1183 
1184 	evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1185 
1186 	return evsel != NULL;
1187 }
1188 
1189 static int parse_target_str(struct trace *trace)
1190 {
1191 	if (trace->opts.target.pid) {
1192 		trace->pid_list = intlist__new(trace->opts.target.pid);
1193 		if (trace->pid_list == NULL) {
1194 			pr_err("Error parsing process id string\n");
1195 			return -EINVAL;
1196 		}
1197 	}
1198 
1199 	if (trace->opts.target.tid) {
1200 		trace->tid_list = intlist__new(trace->opts.target.tid);
1201 		if (trace->tid_list == NULL) {
1202 			pr_err("Error parsing thread id string\n");
1203 			return -EINVAL;
1204 		}
1205 	}
1206 
1207 	return 0;
1208 }
1209 
1210 static int trace__run(struct trace *trace, int argc, const char **argv)
1211 {
1212 	struct perf_evlist *evlist = perf_evlist__new();
1213 	struct perf_evsel *evsel;
1214 	int err = -1, i;
1215 	unsigned long before;
1216 	const bool forks = argc > 0;
1217 
1218 	if (evlist == NULL) {
1219 		fprintf(trace->output, "Not enough memory to run!\n");
1220 		goto out;
1221 	}
1222 
1223 	if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
1224 	    perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) {
1225 		fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n");
1226 		goto out_delete_evlist;
1227 	}
1228 
1229 	if (trace->sched &&
1230 	    perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1231 				   trace__sched_stat_runtime)) {
1232 		fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n");
1233 		goto out_delete_evlist;
1234 	}
1235 
1236 	err = perf_evlist__create_maps(evlist, &trace->opts.target);
1237 	if (err < 0) {
1238 		fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1239 		goto out_delete_evlist;
1240 	}
1241 
1242 	err = trace__symbols_init(trace, evlist);
1243 	if (err < 0) {
1244 		fprintf(trace->output, "Problems initializing symbol libraries!\n");
1245 		goto out_delete_maps;
1246 	}
1247 
1248 	perf_evlist__config(evlist, &trace->opts);
1249 
1250 	signal(SIGCHLD, sig_handler);
1251 	signal(SIGINT, sig_handler);
1252 
1253 	if (forks) {
1254 		err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1255 						    argv, false, false);
1256 		if (err < 0) {
1257 			fprintf(trace->output, "Couldn't run the workload!\n");
1258 			goto out_delete_maps;
1259 		}
1260 	}
1261 
1262 	err = perf_evlist__open(evlist);
1263 	if (err < 0) {
1264 		fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
1265 		goto out_delete_maps;
1266 	}
1267 
1268 	err = perf_evlist__mmap(evlist, UINT_MAX, false);
1269 	if (err < 0) {
1270 		fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1271 		goto out_close_evlist;
1272 	}
1273 
1274 	perf_evlist__enable(evlist);
1275 
1276 	if (forks)
1277 		perf_evlist__start_workload(evlist);
1278 
1279 	trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1280 again:
1281 	before = trace->nr_events;
1282 
1283 	for (i = 0; i < evlist->nr_mmaps; i++) {
1284 		union perf_event *event;
1285 
1286 		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1287 			const u32 type = event->header.type;
1288 			tracepoint_handler handler;
1289 			struct perf_sample sample;
1290 
1291 			++trace->nr_events;
1292 
1293 			err = perf_evlist__parse_sample(evlist, event, &sample);
1294 			if (err) {
1295 				fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1296 				continue;
1297 			}
1298 
1299 			if (!trace->full_time && trace->base_time == 0)
1300 				trace->base_time = sample.time;
1301 
1302 			if (type != PERF_RECORD_SAMPLE) {
1303 				trace__process_event(trace, &trace->host, event);
1304 				continue;
1305 			}
1306 
1307 			evsel = perf_evlist__id2evsel(evlist, sample.id);
1308 			if (evsel == NULL) {
1309 				fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1310 				continue;
1311 			}
1312 
1313 			if (sample.raw_data == NULL) {
1314 				fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1315 				       perf_evsel__name(evsel), sample.tid,
1316 				       sample.cpu, sample.raw_size);
1317 				continue;
1318 			}
1319 
1320 			handler = evsel->handler.func;
1321 			handler(trace, evsel, &sample);
1322 
1323 			if (done)
1324 				goto out_unmap_evlist;
1325 		}
1326 	}
1327 
1328 	if (trace->nr_events == before) {
1329 		if (done)
1330 			goto out_unmap_evlist;
1331 
1332 		poll(evlist->pollfd, evlist->nr_fds, -1);
1333 	}
1334 
1335 	if (done)
1336 		perf_evlist__disable(evlist);
1337 
1338 	goto again;
1339 
1340 out_unmap_evlist:
1341 	perf_evlist__munmap(evlist);
1342 out_close_evlist:
1343 	perf_evlist__close(evlist);
1344 out_delete_maps:
1345 	perf_evlist__delete_maps(evlist);
1346 out_delete_evlist:
1347 	perf_evlist__delete(evlist);
1348 out:
1349 	return err;
1350 }
1351 
1352 static int trace__replay(struct trace *trace)
1353 {
1354 	const struct perf_evsel_str_handler handlers[] = {
1355 		{ "raw_syscalls:sys_enter",  trace__sys_enter, },
1356 		{ "raw_syscalls:sys_exit",   trace__sys_exit, },
1357 	};
1358 
1359 	struct perf_session *session;
1360 	int err = -1;
1361 
1362 	trace->tool.sample	  = trace__process_sample;
1363 	trace->tool.mmap	  = perf_event__process_mmap;
1364 	trace->tool.mmap2	  = perf_event__process_mmap2;
1365 	trace->tool.comm	  = perf_event__process_comm;
1366 	trace->tool.exit	  = perf_event__process_exit;
1367 	trace->tool.fork	  = perf_event__process_fork;
1368 	trace->tool.attr	  = perf_event__process_attr;
1369 	trace->tool.tracing_data = perf_event__process_tracing_data;
1370 	trace->tool.build_id	  = perf_event__process_build_id;
1371 
1372 	trace->tool.ordered_samples = true;
1373 	trace->tool.ordering_requires_timestamps = true;
1374 
1375 	/* add tid to output */
1376 	trace->multiple_threads = true;
1377 
1378 	if (symbol__init() < 0)
1379 		return -1;
1380 
1381 	session = perf_session__new(input_name, O_RDONLY, 0, false,
1382 				    &trace->tool);
1383 	if (session == NULL)
1384 		return -ENOMEM;
1385 
1386 	err = perf_session__set_tracepoints_handlers(session, handlers);
1387 	if (err)
1388 		goto out;
1389 
1390 	if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
1391 		pr_err("Data file does not have raw_syscalls:sys_enter events\n");
1392 		goto out;
1393 	}
1394 
1395 	if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
1396 		pr_err("Data file does not have raw_syscalls:sys_exit events\n");
1397 		goto out;
1398 	}
1399 
1400 	err = parse_target_str(trace);
1401 	if (err != 0)
1402 		goto out;
1403 
1404 	setup_pager();
1405 
1406 	err = perf_session__process_events(session, &trace->tool);
1407 	if (err)
1408 		pr_err("Failed to process events, error %d", err);
1409 
1410 out:
1411 	perf_session__delete(session);
1412 
1413 	return err;
1414 }
1415 
1416 static size_t trace__fprintf_threads_header(FILE *fp)
1417 {
1418 	size_t printed;
1419 
1420 	printed  = fprintf(fp, "\n _____________________________________________________________________\n");
1421 	printed += fprintf(fp," __)    Summary of events    (__\n\n");
1422 	printed += fprintf(fp,"              [ task - pid ]     [ events ] [ ratio ]  [ runtime ]\n");
1423 	printed += fprintf(fp," _____________________________________________________________________\n\n");
1424 
1425 	return printed;
1426 }
1427 
1428 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
1429 {
1430 	size_t printed = trace__fprintf_threads_header(fp);
1431 	struct rb_node *nd;
1432 
1433 	for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) {
1434 		struct thread *thread = rb_entry(nd, struct thread, rb_node);
1435 		struct thread_trace *ttrace = thread->priv;
1436 		const char *color;
1437 		double ratio;
1438 
1439 		if (ttrace == NULL)
1440 			continue;
1441 
1442 		ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
1443 
1444 		color = PERF_COLOR_NORMAL;
1445 		if (ratio > 50.0)
1446 			color = PERF_COLOR_RED;
1447 		else if (ratio > 25.0)
1448 			color = PERF_COLOR_GREEN;
1449 		else if (ratio > 5.0)
1450 			color = PERF_COLOR_YELLOW;
1451 
1452 		printed += color_fprintf(fp, color, "%20s", thread->comm);
1453 		printed += fprintf(fp, " - %-5d :%11lu   [", thread->tid, ttrace->nr_events);
1454 		printed += color_fprintf(fp, color, "%5.1f%%", ratio);
1455 		printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
1456 	}
1457 
1458 	return printed;
1459 }
1460 
1461 static int trace__set_duration(const struct option *opt, const char *str,
1462 			       int unset __maybe_unused)
1463 {
1464 	struct trace *trace = opt->value;
1465 
1466 	trace->duration_filter = atof(str);
1467 	return 0;
1468 }
1469 
1470 static int trace__open_output(struct trace *trace, const char *filename)
1471 {
1472 	struct stat st;
1473 
1474 	if (!stat(filename, &st) && st.st_size) {
1475 		char oldname[PATH_MAX];
1476 
1477 		scnprintf(oldname, sizeof(oldname), "%s.old", filename);
1478 		unlink(oldname);
1479 		rename(filename, oldname);
1480 	}
1481 
1482 	trace->output = fopen(filename, "w");
1483 
1484 	return trace->output == NULL ? -errno : 0;
1485 }
1486 
1487 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
1488 {
1489 	const char * const trace_usage[] = {
1490 		"perf trace [<options>] [<command>]",
1491 		"perf trace [<options>] -- <command> [<options>]",
1492 		NULL
1493 	};
1494 	struct trace trace = {
1495 		.audit_machine = audit_detect_machine(),
1496 		.syscalls = {
1497 			. max = -1,
1498 		},
1499 		.opts = {
1500 			.target = {
1501 				.uid	   = UINT_MAX,
1502 				.uses_mmap = true,
1503 			},
1504 			.user_freq     = UINT_MAX,
1505 			.user_interval = ULLONG_MAX,
1506 			.no_delay      = true,
1507 			.mmap_pages    = 1024,
1508 		},
1509 		.output = stdout,
1510 		.show_comm = true,
1511 	};
1512 	const char *output_name = NULL;
1513 	const char *ev_qualifier_str = NULL;
1514 	const struct option trace_options[] = {
1515 	OPT_BOOLEAN(0, "comm", &trace.show_comm,
1516 		    "show the thread COMM next to its id"),
1517 	OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
1518 		    "list of events to trace"),
1519 	OPT_STRING('o', "output", &output_name, "file", "output file name"),
1520 	OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
1521 	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
1522 		    "trace events on existing process id"),
1523 	OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
1524 		    "trace events on existing thread id"),
1525 	OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
1526 		    "system-wide collection from all CPUs"),
1527 	OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
1528 		    "list of cpus to monitor"),
1529 	OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
1530 		    "child tasks do not inherit counters"),
1531 	OPT_UINTEGER('m', "mmap-pages", &trace.opts.mmap_pages,
1532 		     "number of mmap data pages"),
1533 	OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
1534 		   "user to profile"),
1535 	OPT_CALLBACK(0, "duration", &trace, "float",
1536 		     "show only events with duration > N.M ms",
1537 		     trace__set_duration),
1538 	OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
1539 	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
1540 	OPT_BOOLEAN('T', "time", &trace.full_time,
1541 		    "Show full timestamp, not time relative to first start"),
1542 	OPT_END()
1543 	};
1544 	int err;
1545 	char bf[BUFSIZ];
1546 
1547 	argc = parse_options(argc, argv, trace_options, trace_usage, 0);
1548 
1549 	if (output_name != NULL) {
1550 		err = trace__open_output(&trace, output_name);
1551 		if (err < 0) {
1552 			perror("failed to create output file");
1553 			goto out;
1554 		}
1555 	}
1556 
1557 	if (ev_qualifier_str != NULL) {
1558 		const char *s = ev_qualifier_str;
1559 
1560 		trace.not_ev_qualifier = *s == '!';
1561 		if (trace.not_ev_qualifier)
1562 			++s;
1563 		trace.ev_qualifier = strlist__new(true, s);
1564 		if (trace.ev_qualifier == NULL) {
1565 			fputs("Not enough memory to parse event qualifier",
1566 			      trace.output);
1567 			err = -ENOMEM;
1568 			goto out_close;
1569 		}
1570 	}
1571 
1572 	err = perf_target__validate(&trace.opts.target);
1573 	if (err) {
1574 		perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1575 		fprintf(trace.output, "%s", bf);
1576 		goto out_close;
1577 	}
1578 
1579 	err = perf_target__parse_uid(&trace.opts.target);
1580 	if (err) {
1581 		perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
1582 		fprintf(trace.output, "%s", bf);
1583 		goto out_close;
1584 	}
1585 
1586 	if (!argc && perf_target__none(&trace.opts.target))
1587 		trace.opts.target.system_wide = true;
1588 
1589 	if (input_name)
1590 		err = trace__replay(&trace);
1591 	else
1592 		err = trace__run(&trace, argc, argv);
1593 
1594 	if (trace.sched && !err)
1595 		trace__fprintf_thread_summary(&trace, trace.output);
1596 
1597 out_close:
1598 	if (output_name != NULL)
1599 		fclose(trace.output);
1600 out:
1601 	return err;
1602 }
1603