xref: /linux/tools/perf/builtin-trace.c (revision 99ff7150547382ee612c40d8d6a0670ddec7c9fc)
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 
15 #include <libaudit.h>
16 #include <stdlib.h>
17 #include <sys/eventfd.h>
18 #include <sys/mman.h>
19 #include <linux/futex.h>
20 
21 /* For older distros: */
22 #ifndef MAP_STACK
23 # define MAP_STACK		0x20000
24 #endif
25 
26 #ifndef MADV_HWPOISON
27 # define MADV_HWPOISON		100
28 #endif
29 
30 #ifndef MADV_MERGEABLE
31 # define MADV_MERGEABLE		12
32 #endif
33 
34 #ifndef MADV_UNMERGEABLE
35 # define MADV_UNMERGEABLE	13
36 #endif
37 
38 struct tp_field {
39 	int offset;
40 	union {
41 		u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
42 		void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
43 	};
44 };
45 
46 #define TP_UINT_FIELD(bits) \
47 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
48 { \
49 	return *(u##bits *)(sample->raw_data + field->offset); \
50 }
51 
52 TP_UINT_FIELD(8);
53 TP_UINT_FIELD(16);
54 TP_UINT_FIELD(32);
55 TP_UINT_FIELD(64);
56 
57 #define TP_UINT_FIELD__SWAPPED(bits) \
58 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
59 { \
60 	u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
61 	return bswap_##bits(value);\
62 }
63 
64 TP_UINT_FIELD__SWAPPED(16);
65 TP_UINT_FIELD__SWAPPED(32);
66 TP_UINT_FIELD__SWAPPED(64);
67 
68 static int tp_field__init_uint(struct tp_field *field,
69 			       struct format_field *format_field,
70 			       bool needs_swap)
71 {
72 	field->offset = format_field->offset;
73 
74 	switch (format_field->size) {
75 	case 1:
76 		field->integer = tp_field__u8;
77 		break;
78 	case 2:
79 		field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
80 		break;
81 	case 4:
82 		field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
83 		break;
84 	case 8:
85 		field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
86 		break;
87 	default:
88 		return -1;
89 	}
90 
91 	return 0;
92 }
93 
94 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
95 {
96 	return sample->raw_data + field->offset;
97 }
98 
99 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
100 {
101 	field->offset = format_field->offset;
102 	field->pointer = tp_field__ptr;
103 	return 0;
104 }
105 
106 struct syscall_tp {
107 	struct tp_field id;
108 	union {
109 		struct tp_field args, ret;
110 	};
111 };
112 
113 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
114 					  struct tp_field *field,
115 					  const char *name)
116 {
117 	struct format_field *format_field = perf_evsel__field(evsel, name);
118 
119 	if (format_field == NULL)
120 		return -1;
121 
122 	return tp_field__init_uint(field, format_field, evsel->needs_swap);
123 }
124 
125 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
126 	({ struct syscall_tp *sc = evsel->priv;\
127 	   perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
128 
129 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
130 					 struct tp_field *field,
131 					 const char *name)
132 {
133 	struct format_field *format_field = perf_evsel__field(evsel, name);
134 
135 	if (format_field == NULL)
136 		return -1;
137 
138 	return tp_field__init_ptr(field, format_field);
139 }
140 
141 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
142 	({ struct syscall_tp *sc = evsel->priv;\
143 	   perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
144 
145 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
146 {
147 	free(evsel->priv);
148 	evsel->priv = NULL;
149 	perf_evsel__delete(evsel);
150 }
151 
152 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
153 {
154 	evsel->priv = malloc(sizeof(struct syscall_tp));
155 	if (evsel->priv != NULL) {
156 		if (perf_evsel__init_sc_tp_uint_field(evsel, id))
157 			goto out_delete;
158 
159 		evsel->handler = handler;
160 		return 0;
161 	}
162 
163 	return -ENOMEM;
164 
165 out_delete:
166 	free(evsel->priv);
167 	evsel->priv = NULL;
168 	return -ENOENT;
169 }
170 
171 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
172 {
173 	struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
174 
175 	if (evsel) {
176 		if (perf_evsel__init_syscall_tp(evsel, handler))
177 			goto out_delete;
178 	}
179 
180 	return evsel;
181 
182 out_delete:
183 	perf_evsel__delete_priv(evsel);
184 	return NULL;
185 }
186 
187 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
188 	({ struct syscall_tp *fields = evsel->priv; \
189 	   fields->name.integer(&fields->name, sample); })
190 
191 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
192 	({ struct syscall_tp *fields = evsel->priv; \
193 	   fields->name.pointer(&fields->name, sample); })
194 
195 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
196 					  void *sys_enter_handler,
197 					  void *sys_exit_handler)
198 {
199 	int ret = -1;
200 	struct perf_evsel *sys_enter, *sys_exit;
201 
202 	sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
203 	if (sys_enter == NULL)
204 		goto out;
205 
206 	if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
207 		goto out_delete_sys_enter;
208 
209 	sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
210 	if (sys_exit == NULL)
211 		goto out_delete_sys_enter;
212 
213 	if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
214 		goto out_delete_sys_exit;
215 
216 	perf_evlist__add(evlist, sys_enter);
217 	perf_evlist__add(evlist, sys_exit);
218 
219 	ret = 0;
220 out:
221 	return ret;
222 
223 out_delete_sys_exit:
224 	perf_evsel__delete_priv(sys_exit);
225 out_delete_sys_enter:
226 	perf_evsel__delete_priv(sys_enter);
227 	goto out;
228 }
229 
230 
231 struct syscall_arg {
232 	unsigned long val;
233 	struct thread *thread;
234 	struct trace  *trace;
235 	void	      *parm;
236 	u8	      idx;
237 	u8	      mask;
238 };
239 
240 struct strarray {
241 	int	    offset;
242 	int	    nr_entries;
243 	const char **entries;
244 };
245 
246 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
247 	.nr_entries = ARRAY_SIZE(array), \
248 	.entries = array, \
249 }
250 
251 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
252 	.offset	    = off, \
253 	.nr_entries = ARRAY_SIZE(array), \
254 	.entries = array, \
255 }
256 
257 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
258 						const char *intfmt,
259 					        struct syscall_arg *arg)
260 {
261 	struct strarray *sa = arg->parm;
262 	int idx = arg->val - sa->offset;
263 
264 	if (idx < 0 || idx >= sa->nr_entries)
265 		return scnprintf(bf, size, intfmt, arg->val);
266 
267 	return scnprintf(bf, size, "%s", sa->entries[idx]);
268 }
269 
270 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
271 					      struct syscall_arg *arg)
272 {
273 	return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
274 }
275 
276 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
277 
278 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
279 						 struct syscall_arg *arg)
280 {
281 	return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
282 }
283 
284 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
285 
286 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
287 					struct syscall_arg *arg);
288 
289 #define SCA_FD syscall_arg__scnprintf_fd
290 
291 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
292 					   struct syscall_arg *arg)
293 {
294 	int fd = arg->val;
295 
296 	if (fd == AT_FDCWD)
297 		return scnprintf(bf, size, "CWD");
298 
299 	return syscall_arg__scnprintf_fd(bf, size, arg);
300 }
301 
302 #define SCA_FDAT syscall_arg__scnprintf_fd_at
303 
304 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
305 					      struct syscall_arg *arg);
306 
307 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
308 
309 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
310 					 struct syscall_arg *arg)
311 {
312 	return scnprintf(bf, size, "%#lx", arg->val);
313 }
314 
315 #define SCA_HEX syscall_arg__scnprintf_hex
316 
317 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
318 					       struct syscall_arg *arg)
319 {
320 	int printed = 0, prot = arg->val;
321 
322 	if (prot == PROT_NONE)
323 		return scnprintf(bf, size, "NONE");
324 #define	P_MMAP_PROT(n) \
325 	if (prot & PROT_##n) { \
326 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
327 		prot &= ~PROT_##n; \
328 	}
329 
330 	P_MMAP_PROT(EXEC);
331 	P_MMAP_PROT(READ);
332 	P_MMAP_PROT(WRITE);
333 #ifdef PROT_SEM
334 	P_MMAP_PROT(SEM);
335 #endif
336 	P_MMAP_PROT(GROWSDOWN);
337 	P_MMAP_PROT(GROWSUP);
338 #undef P_MMAP_PROT
339 
340 	if (prot)
341 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
342 
343 	return printed;
344 }
345 
346 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
347 
348 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
349 						struct syscall_arg *arg)
350 {
351 	int printed = 0, flags = arg->val;
352 
353 #define	P_MMAP_FLAG(n) \
354 	if (flags & MAP_##n) { \
355 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
356 		flags &= ~MAP_##n; \
357 	}
358 
359 	P_MMAP_FLAG(SHARED);
360 	P_MMAP_FLAG(PRIVATE);
361 #ifdef MAP_32BIT
362 	P_MMAP_FLAG(32BIT);
363 #endif
364 	P_MMAP_FLAG(ANONYMOUS);
365 	P_MMAP_FLAG(DENYWRITE);
366 	P_MMAP_FLAG(EXECUTABLE);
367 	P_MMAP_FLAG(FILE);
368 	P_MMAP_FLAG(FIXED);
369 	P_MMAP_FLAG(GROWSDOWN);
370 #ifdef MAP_HUGETLB
371 	P_MMAP_FLAG(HUGETLB);
372 #endif
373 	P_MMAP_FLAG(LOCKED);
374 	P_MMAP_FLAG(NONBLOCK);
375 	P_MMAP_FLAG(NORESERVE);
376 	P_MMAP_FLAG(POPULATE);
377 	P_MMAP_FLAG(STACK);
378 #ifdef MAP_UNINITIALIZED
379 	P_MMAP_FLAG(UNINITIALIZED);
380 #endif
381 #undef P_MMAP_FLAG
382 
383 	if (flags)
384 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
385 
386 	return printed;
387 }
388 
389 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
390 
391 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
392 						      struct syscall_arg *arg)
393 {
394 	int behavior = arg->val;
395 
396 	switch (behavior) {
397 #define	P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
398 	P_MADV_BHV(NORMAL);
399 	P_MADV_BHV(RANDOM);
400 	P_MADV_BHV(SEQUENTIAL);
401 	P_MADV_BHV(WILLNEED);
402 	P_MADV_BHV(DONTNEED);
403 	P_MADV_BHV(REMOVE);
404 	P_MADV_BHV(DONTFORK);
405 	P_MADV_BHV(DOFORK);
406 	P_MADV_BHV(HWPOISON);
407 #ifdef MADV_SOFT_OFFLINE
408 	P_MADV_BHV(SOFT_OFFLINE);
409 #endif
410 	P_MADV_BHV(MERGEABLE);
411 	P_MADV_BHV(UNMERGEABLE);
412 #ifdef MADV_HUGEPAGE
413 	P_MADV_BHV(HUGEPAGE);
414 #endif
415 #ifdef MADV_NOHUGEPAGE
416 	P_MADV_BHV(NOHUGEPAGE);
417 #endif
418 #ifdef MADV_DONTDUMP
419 	P_MADV_BHV(DONTDUMP);
420 #endif
421 #ifdef MADV_DODUMP
422 	P_MADV_BHV(DODUMP);
423 #endif
424 #undef P_MADV_PHV
425 	default: break;
426 	}
427 
428 	return scnprintf(bf, size, "%#x", behavior);
429 }
430 
431 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
432 
433 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
434 					   struct syscall_arg *arg)
435 {
436 	int printed = 0, op = arg->val;
437 
438 	if (op == 0)
439 		return scnprintf(bf, size, "NONE");
440 #define	P_CMD(cmd) \
441 	if ((op & LOCK_##cmd) == LOCK_##cmd) { \
442 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
443 		op &= ~LOCK_##cmd; \
444 	}
445 
446 	P_CMD(SH);
447 	P_CMD(EX);
448 	P_CMD(NB);
449 	P_CMD(UN);
450 	P_CMD(MAND);
451 	P_CMD(RW);
452 	P_CMD(READ);
453 	P_CMD(WRITE);
454 #undef P_OP
455 
456 	if (op)
457 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
458 
459 	return printed;
460 }
461 
462 #define SCA_FLOCK syscall_arg__scnprintf_flock
463 
464 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
465 {
466 	enum syscall_futex_args {
467 		SCF_UADDR   = (1 << 0),
468 		SCF_OP	    = (1 << 1),
469 		SCF_VAL	    = (1 << 2),
470 		SCF_TIMEOUT = (1 << 3),
471 		SCF_UADDR2  = (1 << 4),
472 		SCF_VAL3    = (1 << 5),
473 	};
474 	int op = arg->val;
475 	int cmd = op & FUTEX_CMD_MASK;
476 	size_t printed = 0;
477 
478 	switch (cmd) {
479 #define	P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
480 	P_FUTEX_OP(WAIT);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
481 	P_FUTEX_OP(WAKE);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
482 	P_FUTEX_OP(FD);		    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
483 	P_FUTEX_OP(REQUEUE);	    arg->mask |= SCF_VAL3|SCF_TIMEOUT;	          break;
484 	P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;			  break;
485 	P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;			  break;
486 	P_FUTEX_OP(WAKE_OP);							  break;
487 	P_FUTEX_OP(LOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
488 	P_FUTEX_OP(UNLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
489 	P_FUTEX_OP(TRYLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
490 	P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;			  break;
491 	P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;			  break;
492 	P_FUTEX_OP(WAIT_REQUEUE_PI);						  break;
493 	default: printed = scnprintf(bf, size, "%#x", cmd);			  break;
494 	}
495 
496 	if (op & FUTEX_PRIVATE_FLAG)
497 		printed += scnprintf(bf + printed, size - printed, "|PRIV");
498 
499 	if (op & FUTEX_CLOCK_REALTIME)
500 		printed += scnprintf(bf + printed, size - printed, "|CLKRT");
501 
502 	return printed;
503 }
504 
505 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
506 
507 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
508 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
509 
510 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
511 static DEFINE_STRARRAY(itimers);
512 
513 static const char *whences[] = { "SET", "CUR", "END",
514 #ifdef SEEK_DATA
515 "DATA",
516 #endif
517 #ifdef SEEK_HOLE
518 "HOLE",
519 #endif
520 };
521 static DEFINE_STRARRAY(whences);
522 
523 static const char *fcntl_cmds[] = {
524 	"DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
525 	"SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
526 	"F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
527 	"F_GETOWNER_UIDS",
528 };
529 static DEFINE_STRARRAY(fcntl_cmds);
530 
531 static const char *rlimit_resources[] = {
532 	"CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
533 	"MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
534 	"RTTIME",
535 };
536 static DEFINE_STRARRAY(rlimit_resources);
537 
538 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
539 static DEFINE_STRARRAY(sighow);
540 
541 static const char *clockid[] = {
542 	"REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
543 	"MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
544 };
545 static DEFINE_STRARRAY(clockid);
546 
547 static const char *socket_families[] = {
548 	"UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
549 	"BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
550 	"SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
551 	"RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
552 	"BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
553 	"ALG", "NFC", "VSOCK",
554 };
555 static DEFINE_STRARRAY(socket_families);
556 
557 #ifndef SOCK_TYPE_MASK
558 #define SOCK_TYPE_MASK 0xf
559 #endif
560 
561 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
562 						      struct syscall_arg *arg)
563 {
564 	size_t printed;
565 	int type = arg->val,
566 	    flags = type & ~SOCK_TYPE_MASK;
567 
568 	type &= SOCK_TYPE_MASK;
569 	/*
570  	 * Can't use a strarray, MIPS may override for ABI reasons.
571  	 */
572 	switch (type) {
573 #define	P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
574 	P_SK_TYPE(STREAM);
575 	P_SK_TYPE(DGRAM);
576 	P_SK_TYPE(RAW);
577 	P_SK_TYPE(RDM);
578 	P_SK_TYPE(SEQPACKET);
579 	P_SK_TYPE(DCCP);
580 	P_SK_TYPE(PACKET);
581 #undef P_SK_TYPE
582 	default:
583 		printed = scnprintf(bf, size, "%#x", type);
584 	}
585 
586 #define	P_SK_FLAG(n) \
587 	if (flags & SOCK_##n) { \
588 		printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
589 		flags &= ~SOCK_##n; \
590 	}
591 
592 	P_SK_FLAG(CLOEXEC);
593 	P_SK_FLAG(NONBLOCK);
594 #undef P_SK_FLAG
595 
596 	if (flags)
597 		printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
598 
599 	return printed;
600 }
601 
602 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
603 
604 #ifndef MSG_PROBE
605 #define MSG_PROBE	     0x10
606 #endif
607 #ifndef MSG_WAITFORONE
608 #define MSG_WAITFORONE	0x10000
609 #endif
610 #ifndef MSG_SENDPAGE_NOTLAST
611 #define MSG_SENDPAGE_NOTLAST 0x20000
612 #endif
613 #ifndef MSG_FASTOPEN
614 #define MSG_FASTOPEN	     0x20000000
615 #endif
616 
617 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
618 					       struct syscall_arg *arg)
619 {
620 	int printed = 0, flags = arg->val;
621 
622 	if (flags == 0)
623 		return scnprintf(bf, size, "NONE");
624 #define	P_MSG_FLAG(n) \
625 	if (flags & MSG_##n) { \
626 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
627 		flags &= ~MSG_##n; \
628 	}
629 
630 	P_MSG_FLAG(OOB);
631 	P_MSG_FLAG(PEEK);
632 	P_MSG_FLAG(DONTROUTE);
633 	P_MSG_FLAG(TRYHARD);
634 	P_MSG_FLAG(CTRUNC);
635 	P_MSG_FLAG(PROBE);
636 	P_MSG_FLAG(TRUNC);
637 	P_MSG_FLAG(DONTWAIT);
638 	P_MSG_FLAG(EOR);
639 	P_MSG_FLAG(WAITALL);
640 	P_MSG_FLAG(FIN);
641 	P_MSG_FLAG(SYN);
642 	P_MSG_FLAG(CONFIRM);
643 	P_MSG_FLAG(RST);
644 	P_MSG_FLAG(ERRQUEUE);
645 	P_MSG_FLAG(NOSIGNAL);
646 	P_MSG_FLAG(MORE);
647 	P_MSG_FLAG(WAITFORONE);
648 	P_MSG_FLAG(SENDPAGE_NOTLAST);
649 	P_MSG_FLAG(FASTOPEN);
650 	P_MSG_FLAG(CMSG_CLOEXEC);
651 #undef P_MSG_FLAG
652 
653 	if (flags)
654 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
655 
656 	return printed;
657 }
658 
659 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
660 
661 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
662 						 struct syscall_arg *arg)
663 {
664 	size_t printed = 0;
665 	int mode = arg->val;
666 
667 	if (mode == F_OK) /* 0 */
668 		return scnprintf(bf, size, "F");
669 #define	P_MODE(n) \
670 	if (mode & n##_OK) { \
671 		printed += scnprintf(bf + printed, size - printed, "%s", #n); \
672 		mode &= ~n##_OK; \
673 	}
674 
675 	P_MODE(R);
676 	P_MODE(W);
677 	P_MODE(X);
678 #undef P_MODE
679 
680 	if (mode)
681 		printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
682 
683 	return printed;
684 }
685 
686 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
687 
688 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
689 					       struct syscall_arg *arg)
690 {
691 	int printed = 0, flags = arg->val;
692 
693 	if (!(flags & O_CREAT))
694 		arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
695 
696 	if (flags == 0)
697 		return scnprintf(bf, size, "RDONLY");
698 #define	P_FLAG(n) \
699 	if (flags & O_##n) { \
700 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
701 		flags &= ~O_##n; \
702 	}
703 
704 	P_FLAG(APPEND);
705 	P_FLAG(ASYNC);
706 	P_FLAG(CLOEXEC);
707 	P_FLAG(CREAT);
708 	P_FLAG(DIRECT);
709 	P_FLAG(DIRECTORY);
710 	P_FLAG(EXCL);
711 	P_FLAG(LARGEFILE);
712 	P_FLAG(NOATIME);
713 	P_FLAG(NOCTTY);
714 #ifdef O_NONBLOCK
715 	P_FLAG(NONBLOCK);
716 #elif O_NDELAY
717 	P_FLAG(NDELAY);
718 #endif
719 #ifdef O_PATH
720 	P_FLAG(PATH);
721 #endif
722 	P_FLAG(RDWR);
723 #ifdef O_DSYNC
724 	if ((flags & O_SYNC) == O_SYNC)
725 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
726 	else {
727 		P_FLAG(DSYNC);
728 	}
729 #else
730 	P_FLAG(SYNC);
731 #endif
732 	P_FLAG(TRUNC);
733 	P_FLAG(WRONLY);
734 #undef P_FLAG
735 
736 	if (flags)
737 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
738 
739 	return printed;
740 }
741 
742 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
743 
744 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
745 						   struct syscall_arg *arg)
746 {
747 	int printed = 0, flags = arg->val;
748 
749 	if (flags == 0)
750 		return scnprintf(bf, size, "NONE");
751 #define	P_FLAG(n) \
752 	if (flags & EFD_##n) { \
753 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
754 		flags &= ~EFD_##n; \
755 	}
756 
757 	P_FLAG(SEMAPHORE);
758 	P_FLAG(CLOEXEC);
759 	P_FLAG(NONBLOCK);
760 #undef P_FLAG
761 
762 	if (flags)
763 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
764 
765 	return printed;
766 }
767 
768 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
769 
770 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
771 						struct syscall_arg *arg)
772 {
773 	int printed = 0, flags = arg->val;
774 
775 #define	P_FLAG(n) \
776 	if (flags & O_##n) { \
777 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
778 		flags &= ~O_##n; \
779 	}
780 
781 	P_FLAG(CLOEXEC);
782 	P_FLAG(NONBLOCK);
783 #undef P_FLAG
784 
785 	if (flags)
786 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
787 
788 	return printed;
789 }
790 
791 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
792 
793 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
794 {
795 	int sig = arg->val;
796 
797 	switch (sig) {
798 #define	P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
799 	P_SIGNUM(HUP);
800 	P_SIGNUM(INT);
801 	P_SIGNUM(QUIT);
802 	P_SIGNUM(ILL);
803 	P_SIGNUM(TRAP);
804 	P_SIGNUM(ABRT);
805 	P_SIGNUM(BUS);
806 	P_SIGNUM(FPE);
807 	P_SIGNUM(KILL);
808 	P_SIGNUM(USR1);
809 	P_SIGNUM(SEGV);
810 	P_SIGNUM(USR2);
811 	P_SIGNUM(PIPE);
812 	P_SIGNUM(ALRM);
813 	P_SIGNUM(TERM);
814 	P_SIGNUM(STKFLT);
815 	P_SIGNUM(CHLD);
816 	P_SIGNUM(CONT);
817 	P_SIGNUM(STOP);
818 	P_SIGNUM(TSTP);
819 	P_SIGNUM(TTIN);
820 	P_SIGNUM(TTOU);
821 	P_SIGNUM(URG);
822 	P_SIGNUM(XCPU);
823 	P_SIGNUM(XFSZ);
824 	P_SIGNUM(VTALRM);
825 	P_SIGNUM(PROF);
826 	P_SIGNUM(WINCH);
827 	P_SIGNUM(IO);
828 	P_SIGNUM(PWR);
829 	P_SIGNUM(SYS);
830 	default: break;
831 	}
832 
833 	return scnprintf(bf, size, "%#x", sig);
834 }
835 
836 #define SCA_SIGNUM syscall_arg__scnprintf_signum
837 
838 #define TCGETS		0x5401
839 
840 static const char *tioctls[] = {
841 	"TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
842 	"TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
843 	"TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
844 	"TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
845 	"TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
846 	"TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
847 	"TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
848 	"TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
849 	"TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
850 	"TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
851 	"TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
852 	[0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
853 	"TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
854 	"TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
855 	"TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
856 };
857 
858 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
859 
860 #define STRARRAY(arg, name, array) \
861 	  .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
862 	  .arg_parm	 = { [arg] = &strarray__##array, }
863 
864 static struct syscall_fmt {
865 	const char *name;
866 	const char *alias;
867 	size_t	   (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
868 	void	   *arg_parm[6];
869 	bool	   errmsg;
870 	bool	   timeout;
871 	bool	   hexret;
872 } syscall_fmts[] = {
873 	{ .name	    = "access",	    .errmsg = true,
874 	  .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
875 	{ .name	    = "arch_prctl", .errmsg = true, .alias = "prctl", },
876 	{ .name	    = "brk",	    .hexret = true,
877 	  .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
878 	{ .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
879 	{ .name	    = "close",	    .errmsg = true,
880 	  .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
881 	{ .name	    = "connect",    .errmsg = true, },
882 	{ .name	    = "dup",	    .errmsg = true,
883 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
884 	{ .name	    = "dup2",	    .errmsg = true,
885 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
886 	{ .name	    = "dup3",	    .errmsg = true,
887 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
888 	{ .name	    = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
889 	{ .name	    = "eventfd2",   .errmsg = true,
890 	  .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
891 	{ .name	    = "faccessat",  .errmsg = true,
892 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
893 	{ .name	    = "fadvise64",  .errmsg = true,
894 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
895 	{ .name	    = "fallocate",  .errmsg = true,
896 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
897 	{ .name	    = "fchdir",	    .errmsg = true,
898 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
899 	{ .name	    = "fchmod",	    .errmsg = true,
900 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
901 	{ .name	    = "fchmodat",   .errmsg = true,
902 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
903 	{ .name	    = "fchown",	    .errmsg = true,
904 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
905 	{ .name	    = "fchownat",   .errmsg = true,
906 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
907 	{ .name	    = "fcntl",	    .errmsg = true,
908 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
909 			     [1] = SCA_STRARRAY, /* cmd */ },
910 	  .arg_parm	 = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
911 	{ .name	    = "fdatasync",  .errmsg = true,
912 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
913 	{ .name	    = "flock",	    .errmsg = true,
914 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
915 			     [1] = SCA_FLOCK, /* cmd */ }, },
916 	{ .name	    = "fsetxattr",  .errmsg = true,
917 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
918 	{ .name	    = "fstat",	    .errmsg = true, .alias = "newfstat",
919 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
920 	{ .name	    = "fstatat",    .errmsg = true, .alias = "newfstatat",
921 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
922 	{ .name	    = "fstatfs",    .errmsg = true,
923 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
924 	{ .name	    = "fsync",    .errmsg = true,
925 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
926 	{ .name	    = "ftruncate", .errmsg = true,
927 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
928 	{ .name	    = "futex",	    .errmsg = true,
929 	  .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
930 	{ .name	    = "futimesat", .errmsg = true,
931 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
932 	{ .name	    = "getdents",   .errmsg = true,
933 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
934 	{ .name	    = "getdents64", .errmsg = true,
935 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
936 	{ .name	    = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
937 	{ .name	    = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
938 	{ .name	    = "ioctl",	    .errmsg = true,
939 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
940 			     [1] = SCA_STRHEXARRAY, /* cmd */
941 			     [2] = SCA_HEX, /* arg */ },
942 	  .arg_parm	 = { [1] = &strarray__tioctls, /* cmd */ }, },
943 	{ .name	    = "kill",	    .errmsg = true,
944 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
945 	{ .name	    = "linkat",	    .errmsg = true,
946 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
947 	{ .name	    = "lseek",	    .errmsg = true,
948 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
949 			     [2] = SCA_STRARRAY, /* whence */ },
950 	  .arg_parm	 = { [2] = &strarray__whences, /* whence */ }, },
951 	{ .name	    = "lstat",	    .errmsg = true, .alias = "newlstat", },
952 	{ .name     = "madvise",    .errmsg = true,
953 	  .arg_scnprintf = { [0] = SCA_HEX,	 /* start */
954 			     [2] = SCA_MADV_BHV, /* behavior */ }, },
955 	{ .name	    = "mkdirat",    .errmsg = true,
956 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
957 	{ .name	    = "mknodat",    .errmsg = true,
958 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
959 	{ .name	    = "mlock",	    .errmsg = true,
960 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
961 	{ .name	    = "mlockall",   .errmsg = true,
962 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
963 	{ .name	    = "mmap",	    .hexret = true,
964 	  .arg_scnprintf = { [0] = SCA_HEX,	  /* addr */
965 			     [2] = SCA_MMAP_PROT, /* prot */
966 			     [3] = SCA_MMAP_FLAGS, /* flags */
967 			     [4] = SCA_FD, 	  /* fd */ }, },
968 	{ .name	    = "mprotect",   .errmsg = true,
969 	  .arg_scnprintf = { [0] = SCA_HEX, /* start */
970 			     [2] = SCA_MMAP_PROT, /* prot */ }, },
971 	{ .name	    = "mremap",	    .hexret = true,
972 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */
973 			     [4] = SCA_HEX, /* new_addr */ }, },
974 	{ .name	    = "munlock",    .errmsg = true,
975 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
976 	{ .name	    = "munmap",	    .errmsg = true,
977 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
978 	{ .name	    = "name_to_handle_at", .errmsg = true,
979 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
980 	{ .name	    = "newfstatat", .errmsg = true,
981 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
982 	{ .name	    = "open",	    .errmsg = true,
983 	  .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
984 	{ .name	    = "open_by_handle_at", .errmsg = true,
985 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
986 			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
987 	{ .name	    = "openat",	    .errmsg = true,
988 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
989 			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
990 	{ .name	    = "pipe2",	    .errmsg = true,
991 	  .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
992 	{ .name	    = "poll",	    .errmsg = true, .timeout = true, },
993 	{ .name	    = "ppoll",	    .errmsg = true, .timeout = true, },
994 	{ .name	    = "pread",	    .errmsg = true, .alias = "pread64",
995 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
996 	{ .name	    = "preadv",	    .errmsg = true, .alias = "pread",
997 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
998 	{ .name	    = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
999 	{ .name	    = "pwrite",	    .errmsg = true, .alias = "pwrite64",
1000 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1001 	{ .name	    = "pwritev",    .errmsg = true,
1002 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1003 	{ .name	    = "read",	    .errmsg = true,
1004 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1005 	{ .name	    = "readlinkat", .errmsg = true,
1006 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1007 	{ .name	    = "readv",	    .errmsg = true,
1008 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1009 	{ .name	    = "recvfrom",   .errmsg = true,
1010 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1011 	{ .name	    = "recvmmsg",   .errmsg = true,
1012 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1013 	{ .name	    = "recvmsg",    .errmsg = true,
1014 	  .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1015 	{ .name	    = "renameat",   .errmsg = true,
1016 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1017 	{ .name	    = "rt_sigaction", .errmsg = true,
1018 	  .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1019 	{ .name	    = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1020 	{ .name	    = "rt_sigqueueinfo", .errmsg = true,
1021 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1022 	{ .name	    = "rt_tgsigqueueinfo", .errmsg = true,
1023 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1024 	{ .name	    = "select",	    .errmsg = true, .timeout = true, },
1025 	{ .name	    = "sendmmsg",    .errmsg = true,
1026 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1027 	{ .name	    = "sendmsg",    .errmsg = true,
1028 	  .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1029 	{ .name	    = "sendto",	    .errmsg = true,
1030 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1031 	{ .name	    = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1032 	{ .name	    = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1033 	{ .name	    = "shutdown",   .errmsg = true,
1034 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1035 	{ .name	    = "socket",	    .errmsg = true,
1036 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1037 			     [1] = SCA_SK_TYPE, /* type */ },
1038 	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
1039 	{ .name	    = "socketpair", .errmsg = true,
1040 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1041 			     [1] = SCA_SK_TYPE, /* type */ },
1042 	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
1043 	{ .name	    = "stat",	    .errmsg = true, .alias = "newstat", },
1044 	{ .name	    = "symlinkat",  .errmsg = true,
1045 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1046 	{ .name	    = "tgkill",	    .errmsg = true,
1047 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1048 	{ .name	    = "tkill",	    .errmsg = true,
1049 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1050 	{ .name	    = "uname",	    .errmsg = true, .alias = "newuname", },
1051 	{ .name	    = "unlinkat",   .errmsg = true,
1052 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1053 	{ .name	    = "utimensat",  .errmsg = true,
1054 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1055 	{ .name	    = "write",	    .errmsg = true,
1056 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1057 	{ .name	    = "writev",	    .errmsg = true,
1058 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1059 };
1060 
1061 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1062 {
1063 	const struct syscall_fmt *fmt = fmtp;
1064 	return strcmp(name, fmt->name);
1065 }
1066 
1067 static struct syscall_fmt *syscall_fmt__find(const char *name)
1068 {
1069 	const int nmemb = ARRAY_SIZE(syscall_fmts);
1070 	return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1071 }
1072 
1073 struct syscall {
1074 	struct event_format *tp_format;
1075 	const char	    *name;
1076 	bool		    filtered;
1077 	struct syscall_fmt  *fmt;
1078 	size_t		    (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1079 	void		    **arg_parm;
1080 };
1081 
1082 static size_t fprintf_duration(unsigned long t, FILE *fp)
1083 {
1084 	double duration = (double)t / NSEC_PER_MSEC;
1085 	size_t printed = fprintf(fp, "(");
1086 
1087 	if (duration >= 1.0)
1088 		printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1089 	else if (duration >= 0.01)
1090 		printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1091 	else
1092 		printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1093 	return printed + fprintf(fp, "): ");
1094 }
1095 
1096 struct thread_trace {
1097 	u64		  entry_time;
1098 	u64		  exit_time;
1099 	bool		  entry_pending;
1100 	unsigned long	  nr_events;
1101 	char		  *entry_str;
1102 	double		  runtime_ms;
1103 	struct {
1104 		int	  max;
1105 		char	  **table;
1106 	} paths;
1107 
1108 	struct intlist *syscall_stats;
1109 };
1110 
1111 static struct thread_trace *thread_trace__new(void)
1112 {
1113 	struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1114 
1115 	if (ttrace)
1116 		ttrace->paths.max = -1;
1117 
1118 	ttrace->syscall_stats = intlist__new(NULL);
1119 
1120 	return ttrace;
1121 }
1122 
1123 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1124 {
1125 	struct thread_trace *ttrace;
1126 
1127 	if (thread == NULL)
1128 		goto fail;
1129 
1130 	if (thread->priv == NULL)
1131 		thread->priv = thread_trace__new();
1132 
1133 	if (thread->priv == NULL)
1134 		goto fail;
1135 
1136 	ttrace = thread->priv;
1137 	++ttrace->nr_events;
1138 
1139 	return ttrace;
1140 fail:
1141 	color_fprintf(fp, PERF_COLOR_RED,
1142 		      "WARNING: not enough memory, dropping samples!\n");
1143 	return NULL;
1144 }
1145 
1146 struct trace {
1147 	struct perf_tool	tool;
1148 	struct {
1149 		int		machine;
1150 		int		open_id;
1151 	}			audit;
1152 	struct {
1153 		int		max;
1154 		struct syscall  *table;
1155 	} syscalls;
1156 	struct perf_record_opts opts;
1157 	struct machine		*host;
1158 	u64			base_time;
1159 	bool			full_time;
1160 	FILE			*output;
1161 	unsigned long		nr_events;
1162 	struct strlist		*ev_qualifier;
1163 	bool			not_ev_qualifier;
1164 	bool			live;
1165 	const char 		*last_vfs_getname;
1166 	struct intlist		*tid_list;
1167 	struct intlist		*pid_list;
1168 	bool			sched;
1169 	bool			multiple_threads;
1170 	bool			summary;
1171 	bool			show_comm;
1172 	bool			show_tool_stats;
1173 	double			duration_filter;
1174 	double			runtime_ms;
1175 	struct {
1176 		u64		vfs_getname, proc_getname;
1177 	} stats;
1178 };
1179 
1180 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1181 {
1182 	struct thread_trace *ttrace = thread->priv;
1183 
1184 	if (fd > ttrace->paths.max) {
1185 		char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1186 
1187 		if (npath == NULL)
1188 			return -1;
1189 
1190 		if (ttrace->paths.max != -1) {
1191 			memset(npath + ttrace->paths.max + 1, 0,
1192 			       (fd - ttrace->paths.max) * sizeof(char *));
1193 		} else {
1194 			memset(npath, 0, (fd + 1) * sizeof(char *));
1195 		}
1196 
1197 		ttrace->paths.table = npath;
1198 		ttrace->paths.max   = fd;
1199 	}
1200 
1201 	ttrace->paths.table[fd] = strdup(pathname);
1202 
1203 	return ttrace->paths.table[fd] != NULL ? 0 : -1;
1204 }
1205 
1206 static int thread__read_fd_path(struct thread *thread, int fd)
1207 {
1208 	char linkname[PATH_MAX], pathname[PATH_MAX];
1209 	struct stat st;
1210 	int ret;
1211 
1212 	if (thread->pid_ == thread->tid) {
1213 		scnprintf(linkname, sizeof(linkname),
1214 			  "/proc/%d/fd/%d", thread->pid_, fd);
1215 	} else {
1216 		scnprintf(linkname, sizeof(linkname),
1217 			  "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1218 	}
1219 
1220 	if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1221 		return -1;
1222 
1223 	ret = readlink(linkname, pathname, sizeof(pathname));
1224 
1225 	if (ret < 0 || ret > st.st_size)
1226 		return -1;
1227 
1228 	pathname[ret] = '\0';
1229 	return trace__set_fd_pathname(thread, fd, pathname);
1230 }
1231 
1232 static const char *thread__fd_path(struct thread *thread, int fd,
1233 				   struct trace *trace)
1234 {
1235 	struct thread_trace *ttrace = thread->priv;
1236 
1237 	if (ttrace == NULL)
1238 		return NULL;
1239 
1240 	if (fd < 0)
1241 		return NULL;
1242 
1243 	if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1244 		if (!trace->live)
1245 			return NULL;
1246 		++trace->stats.proc_getname;
1247 		if (thread__read_fd_path(thread, fd)) {
1248 			return NULL;
1249 	}
1250 
1251 	return ttrace->paths.table[fd];
1252 }
1253 
1254 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1255 					struct syscall_arg *arg)
1256 {
1257 	int fd = arg->val;
1258 	size_t printed = scnprintf(bf, size, "%d", fd);
1259 	const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1260 
1261 	if (path)
1262 		printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1263 
1264 	return printed;
1265 }
1266 
1267 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1268 					      struct syscall_arg *arg)
1269 {
1270 	int fd = arg->val;
1271 	size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1272 	struct thread_trace *ttrace = arg->thread->priv;
1273 
1274 	if (ttrace && fd >= 0 && fd <= ttrace->paths.max) {
1275 		free(ttrace->paths.table[fd]);
1276 		ttrace->paths.table[fd] = NULL;
1277 	}
1278 
1279 	return printed;
1280 }
1281 
1282 static bool trace__filter_duration(struct trace *trace, double t)
1283 {
1284 	return t < (trace->duration_filter * NSEC_PER_MSEC);
1285 }
1286 
1287 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1288 {
1289 	double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1290 
1291 	return fprintf(fp, "%10.3f ", ts);
1292 }
1293 
1294 static bool done = false;
1295 static bool interrupted = false;
1296 
1297 static void sig_handler(int sig)
1298 {
1299 	done = true;
1300 	interrupted = sig == SIGINT;
1301 }
1302 
1303 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1304 					u64 duration, u64 tstamp, FILE *fp)
1305 {
1306 	size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1307 	printed += fprintf_duration(duration, fp);
1308 
1309 	if (trace->multiple_threads) {
1310 		if (trace->show_comm)
1311 			printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1312 		printed += fprintf(fp, "%d ", thread->tid);
1313 	}
1314 
1315 	return printed;
1316 }
1317 
1318 static int trace__process_event(struct trace *trace, struct machine *machine,
1319 				union perf_event *event, struct perf_sample *sample)
1320 {
1321 	int ret = 0;
1322 
1323 	switch (event->header.type) {
1324 	case PERF_RECORD_LOST:
1325 		color_fprintf(trace->output, PERF_COLOR_RED,
1326 			      "LOST %" PRIu64 " events!\n", event->lost.lost);
1327 		ret = machine__process_lost_event(machine, event, sample);
1328 	default:
1329 		ret = machine__process_event(machine, event, sample);
1330 		break;
1331 	}
1332 
1333 	return ret;
1334 }
1335 
1336 static int trace__tool_process(struct perf_tool *tool,
1337 			       union perf_event *event,
1338 			       struct perf_sample *sample,
1339 			       struct machine *machine)
1340 {
1341 	struct trace *trace = container_of(tool, struct trace, tool);
1342 	return trace__process_event(trace, machine, event, sample);
1343 }
1344 
1345 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1346 {
1347 	int err = symbol__init();
1348 
1349 	if (err)
1350 		return err;
1351 
1352 	trace->host = machine__new_host();
1353 	if (trace->host == NULL)
1354 		return -ENOMEM;
1355 
1356 	err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1357 					    evlist->threads, trace__tool_process, false);
1358 	if (err)
1359 		symbol__exit();
1360 
1361 	return err;
1362 }
1363 
1364 static int syscall__set_arg_fmts(struct syscall *sc)
1365 {
1366 	struct format_field *field;
1367 	int idx = 0;
1368 
1369 	sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1370 	if (sc->arg_scnprintf == NULL)
1371 		return -1;
1372 
1373 	if (sc->fmt)
1374 		sc->arg_parm = sc->fmt->arg_parm;
1375 
1376 	for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1377 		if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1378 			sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1379 		else if (field->flags & FIELD_IS_POINTER)
1380 			sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1381 		++idx;
1382 	}
1383 
1384 	return 0;
1385 }
1386 
1387 static int trace__read_syscall_info(struct trace *trace, int id)
1388 {
1389 	char tp_name[128];
1390 	struct syscall *sc;
1391 	const char *name = audit_syscall_to_name(id, trace->audit.machine);
1392 
1393 	if (name == NULL)
1394 		return -1;
1395 
1396 	if (id > trace->syscalls.max) {
1397 		struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1398 
1399 		if (nsyscalls == NULL)
1400 			return -1;
1401 
1402 		if (trace->syscalls.max != -1) {
1403 			memset(nsyscalls + trace->syscalls.max + 1, 0,
1404 			       (id - trace->syscalls.max) * sizeof(*sc));
1405 		} else {
1406 			memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1407 		}
1408 
1409 		trace->syscalls.table = nsyscalls;
1410 		trace->syscalls.max   = id;
1411 	}
1412 
1413 	sc = trace->syscalls.table + id;
1414 	sc->name = name;
1415 
1416 	if (trace->ev_qualifier) {
1417 		bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1418 
1419 		if (!(in ^ trace->not_ev_qualifier)) {
1420 			sc->filtered = true;
1421 			/*
1422 			 * No need to do read tracepoint information since this will be
1423 			 * filtered out.
1424 			 */
1425 			return 0;
1426 		}
1427 	}
1428 
1429 	sc->fmt  = syscall_fmt__find(sc->name);
1430 
1431 	snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1432 	sc->tp_format = event_format__new("syscalls", tp_name);
1433 
1434 	if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1435 		snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1436 		sc->tp_format = event_format__new("syscalls", tp_name);
1437 	}
1438 
1439 	if (sc->tp_format == NULL)
1440 		return -1;
1441 
1442 	return syscall__set_arg_fmts(sc);
1443 }
1444 
1445 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1446 				      unsigned long *args, struct trace *trace,
1447 				      struct thread *thread)
1448 {
1449 	size_t printed = 0;
1450 
1451 	if (sc->tp_format != NULL) {
1452 		struct format_field *field;
1453 		u8 bit = 1;
1454 		struct syscall_arg arg = {
1455 			.idx	= 0,
1456 			.mask	= 0,
1457 			.trace  = trace,
1458 			.thread = thread,
1459 		};
1460 
1461 		for (field = sc->tp_format->format.fields->next; field;
1462 		     field = field->next, ++arg.idx, bit <<= 1) {
1463 			if (arg.mask & bit)
1464 				continue;
1465 			/*
1466  			 * Suppress this argument if its value is zero and
1467  			 * and we don't have a string associated in an
1468  			 * strarray for it.
1469  			 */
1470 			if (args[arg.idx] == 0 &&
1471 			    !(sc->arg_scnprintf &&
1472 			      sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1473 			      sc->arg_parm[arg.idx]))
1474 				continue;
1475 
1476 			printed += scnprintf(bf + printed, size - printed,
1477 					     "%s%s: ", printed ? ", " : "", field->name);
1478 			if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1479 				arg.val = args[arg.idx];
1480 				if (sc->arg_parm)
1481 					arg.parm = sc->arg_parm[arg.idx];
1482 				printed += sc->arg_scnprintf[arg.idx](bf + printed,
1483 								      size - printed, &arg);
1484 			} else {
1485 				printed += scnprintf(bf + printed, size - printed,
1486 						     "%ld", args[arg.idx]);
1487 			}
1488 		}
1489 	} else {
1490 		int i = 0;
1491 
1492 		while (i < 6) {
1493 			printed += scnprintf(bf + printed, size - printed,
1494 					     "%sarg%d: %ld",
1495 					     printed ? ", " : "", i, args[i]);
1496 			++i;
1497 		}
1498 	}
1499 
1500 	return printed;
1501 }
1502 
1503 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1504 				  struct perf_sample *sample);
1505 
1506 static struct syscall *trace__syscall_info(struct trace *trace,
1507 					   struct perf_evsel *evsel, int id)
1508 {
1509 
1510 	if (id < 0) {
1511 
1512 		/*
1513 		 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1514 		 * before that, leaving at a higher verbosity level till that is
1515 		 * explained. Reproduced with plain ftrace with:
1516 		 *
1517 		 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1518 		 * grep "NR -1 " /t/trace_pipe
1519 		 *
1520 		 * After generating some load on the machine.
1521  		 */
1522 		if (verbose > 1) {
1523 			static u64 n;
1524 			fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1525 				id, perf_evsel__name(evsel), ++n);
1526 		}
1527 		return NULL;
1528 	}
1529 
1530 	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1531 	    trace__read_syscall_info(trace, id))
1532 		goto out_cant_read;
1533 
1534 	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1535 		goto out_cant_read;
1536 
1537 	return &trace->syscalls.table[id];
1538 
1539 out_cant_read:
1540 	if (verbose) {
1541 		fprintf(trace->output, "Problems reading syscall %d", id);
1542 		if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1543 			fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1544 		fputs(" information\n", trace->output);
1545 	}
1546 	return NULL;
1547 }
1548 
1549 static void thread__update_stats(struct thread_trace *ttrace,
1550 				 int id, struct perf_sample *sample)
1551 {
1552 	struct int_node *inode;
1553 	struct stats *stats;
1554 	u64 duration = 0;
1555 
1556 	inode = intlist__findnew(ttrace->syscall_stats, id);
1557 	if (inode == NULL)
1558 		return;
1559 
1560 	stats = inode->priv;
1561 	if (stats == NULL) {
1562 		stats = malloc(sizeof(struct stats));
1563 		if (stats == NULL)
1564 			return;
1565 		init_stats(stats);
1566 		inode->priv = stats;
1567 	}
1568 
1569 	if (ttrace->entry_time && sample->time > ttrace->entry_time)
1570 		duration = sample->time - ttrace->entry_time;
1571 
1572 	update_stats(stats, duration);
1573 }
1574 
1575 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1576 			    struct perf_sample *sample)
1577 {
1578 	char *msg;
1579 	void *args;
1580 	size_t printed = 0;
1581 	struct thread *thread;
1582 	int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1583 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
1584 	struct thread_trace *ttrace;
1585 
1586 	if (sc == NULL)
1587 		return -1;
1588 
1589 	if (sc->filtered)
1590 		return 0;
1591 
1592 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1593 	ttrace = thread__trace(thread, trace->output);
1594 	if (ttrace == NULL)
1595 		return -1;
1596 
1597 	args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1598 	ttrace = thread->priv;
1599 
1600 	if (ttrace->entry_str == NULL) {
1601 		ttrace->entry_str = malloc(1024);
1602 		if (!ttrace->entry_str)
1603 			return -1;
1604 	}
1605 
1606 	ttrace->entry_time = sample->time;
1607 	msg = ttrace->entry_str;
1608 	printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1609 
1610 	printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1611 					   args, trace, thread);
1612 
1613 	if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1614 		if (!trace->duration_filter) {
1615 			trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1616 			fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1617 		}
1618 	} else
1619 		ttrace->entry_pending = true;
1620 
1621 	return 0;
1622 }
1623 
1624 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1625 			   struct perf_sample *sample)
1626 {
1627 	int ret;
1628 	u64 duration = 0;
1629 	struct thread *thread;
1630 	int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1631 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
1632 	struct thread_trace *ttrace;
1633 
1634 	if (sc == NULL)
1635 		return -1;
1636 
1637 	if (sc->filtered)
1638 		return 0;
1639 
1640 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1641 	ttrace = thread__trace(thread, trace->output);
1642 	if (ttrace == NULL)
1643 		return -1;
1644 
1645 	if (trace->summary)
1646 		thread__update_stats(ttrace, id, sample);
1647 
1648 	ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1649 
1650 	if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1651 		trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1652 		trace->last_vfs_getname = NULL;
1653 		++trace->stats.vfs_getname;
1654 	}
1655 
1656 	ttrace = thread->priv;
1657 
1658 	ttrace->exit_time = sample->time;
1659 
1660 	if (ttrace->entry_time) {
1661 		duration = sample->time - ttrace->entry_time;
1662 		if (trace__filter_duration(trace, duration))
1663 			goto out;
1664 	} else if (trace->duration_filter)
1665 		goto out;
1666 
1667 	trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1668 
1669 	if (ttrace->entry_pending) {
1670 		fprintf(trace->output, "%-70s", ttrace->entry_str);
1671 	} else {
1672 		fprintf(trace->output, " ... [");
1673 		color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1674 		fprintf(trace->output, "]: %s()", sc->name);
1675 	}
1676 
1677 	if (sc->fmt == NULL) {
1678 signed_print:
1679 		fprintf(trace->output, ") = %d", ret);
1680 	} else if (ret < 0 && sc->fmt->errmsg) {
1681 		char bf[256];
1682 		const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1683 			   *e = audit_errno_to_name(-ret);
1684 
1685 		fprintf(trace->output, ") = -1 %s %s", e, emsg);
1686 	} else if (ret == 0 && sc->fmt->timeout)
1687 		fprintf(trace->output, ") = 0 Timeout");
1688 	else if (sc->fmt->hexret)
1689 		fprintf(trace->output, ") = %#x", ret);
1690 	else
1691 		goto signed_print;
1692 
1693 	fputc('\n', trace->output);
1694 out:
1695 	ttrace->entry_pending = false;
1696 
1697 	return 0;
1698 }
1699 
1700 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1701 			      struct perf_sample *sample)
1702 {
1703 	trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1704 	return 0;
1705 }
1706 
1707 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1708 				     struct perf_sample *sample)
1709 {
1710         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1711 	double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1712 	struct thread *thread = machine__findnew_thread(trace->host,
1713 							sample->pid,
1714 							sample->tid);
1715 	struct thread_trace *ttrace = thread__trace(thread, trace->output);
1716 
1717 	if (ttrace == NULL)
1718 		goto out_dump;
1719 
1720 	ttrace->runtime_ms += runtime_ms;
1721 	trace->runtime_ms += runtime_ms;
1722 	return 0;
1723 
1724 out_dump:
1725 	fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1726 	       evsel->name,
1727 	       perf_evsel__strval(evsel, sample, "comm"),
1728 	       (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1729 	       runtime,
1730 	       perf_evsel__intval(evsel, sample, "vruntime"));
1731 	return 0;
1732 }
1733 
1734 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1735 {
1736 	if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1737 	    (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1738 		return false;
1739 
1740 	if (trace->pid_list || trace->tid_list)
1741 		return true;
1742 
1743 	return false;
1744 }
1745 
1746 static int trace__process_sample(struct perf_tool *tool,
1747 				 union perf_event *event __maybe_unused,
1748 				 struct perf_sample *sample,
1749 				 struct perf_evsel *evsel,
1750 				 struct machine *machine __maybe_unused)
1751 {
1752 	struct trace *trace = container_of(tool, struct trace, tool);
1753 	int err = 0;
1754 
1755 	tracepoint_handler handler = evsel->handler;
1756 
1757 	if (skip_sample(trace, sample))
1758 		return 0;
1759 
1760 	if (!trace->full_time && trace->base_time == 0)
1761 		trace->base_time = sample->time;
1762 
1763 	if (handler)
1764 		handler(trace, evsel, sample);
1765 
1766 	return err;
1767 }
1768 
1769 static int parse_target_str(struct trace *trace)
1770 {
1771 	if (trace->opts.target.pid) {
1772 		trace->pid_list = intlist__new(trace->opts.target.pid);
1773 		if (trace->pid_list == NULL) {
1774 			pr_err("Error parsing process id string\n");
1775 			return -EINVAL;
1776 		}
1777 	}
1778 
1779 	if (trace->opts.target.tid) {
1780 		trace->tid_list = intlist__new(trace->opts.target.tid);
1781 		if (trace->tid_list == NULL) {
1782 			pr_err("Error parsing thread id string\n");
1783 			return -EINVAL;
1784 		}
1785 	}
1786 
1787 	return 0;
1788 }
1789 
1790 static int trace__record(int argc, const char **argv)
1791 {
1792 	unsigned int rec_argc, i, j;
1793 	const char **rec_argv;
1794 	const char * const record_args[] = {
1795 		"record",
1796 		"-R",
1797 		"-m", "1024",
1798 		"-c", "1",
1799 		"-e", "raw_syscalls:sys_enter,raw_syscalls:sys_exit",
1800 	};
1801 
1802 	rec_argc = ARRAY_SIZE(record_args) + argc;
1803 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
1804 
1805 	if (rec_argv == NULL)
1806 		return -ENOMEM;
1807 
1808 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
1809 		rec_argv[i] = record_args[i];
1810 
1811 	for (j = 0; j < (unsigned int)argc; j++, i++)
1812 		rec_argv[i] = argv[j];
1813 
1814 	return cmd_record(i, rec_argv, NULL);
1815 }
1816 
1817 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1818 
1819 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1820 {
1821 	struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1822 	if (evsel == NULL)
1823 		return;
1824 
1825 	if (perf_evsel__field(evsel, "pathname") == NULL) {
1826 		perf_evsel__delete(evsel);
1827 		return;
1828 	}
1829 
1830 	evsel->handler = trace__vfs_getname;
1831 	perf_evlist__add(evlist, evsel);
1832 }
1833 
1834 static int trace__run(struct trace *trace, int argc, const char **argv)
1835 {
1836 	struct perf_evlist *evlist = perf_evlist__new();
1837 	struct perf_evsel *evsel;
1838 	int err = -1, i;
1839 	unsigned long before;
1840 	const bool forks = argc > 0;
1841 
1842 	trace->live = true;
1843 
1844 	if (evlist == NULL) {
1845 		fprintf(trace->output, "Not enough memory to run!\n");
1846 		goto out;
1847 	}
1848 
1849 	if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1850 		goto out_error_tp;
1851 
1852 	perf_evlist__add_vfs_getname(evlist);
1853 
1854 	if (trace->sched &&
1855 		perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1856 				trace__sched_stat_runtime))
1857 		goto out_error_tp;
1858 
1859 	err = perf_evlist__create_maps(evlist, &trace->opts.target);
1860 	if (err < 0) {
1861 		fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1862 		goto out_delete_evlist;
1863 	}
1864 
1865 	err = trace__symbols_init(trace, evlist);
1866 	if (err < 0) {
1867 		fprintf(trace->output, "Problems initializing symbol libraries!\n");
1868 		goto out_delete_maps;
1869 	}
1870 
1871 	perf_evlist__config(evlist, &trace->opts);
1872 
1873 	signal(SIGCHLD, sig_handler);
1874 	signal(SIGINT, sig_handler);
1875 
1876 	if (forks) {
1877 		err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1878 						    argv, false, false);
1879 		if (err < 0) {
1880 			fprintf(trace->output, "Couldn't run the workload!\n");
1881 			goto out_delete_maps;
1882 		}
1883 	}
1884 
1885 	err = perf_evlist__open(evlist);
1886 	if (err < 0)
1887 		goto out_error_open;
1888 
1889 	err = perf_evlist__mmap(evlist, UINT_MAX, false);
1890 	if (err < 0) {
1891 		fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1892 		goto out_close_evlist;
1893 	}
1894 
1895 	perf_evlist__enable(evlist);
1896 
1897 	if (forks)
1898 		perf_evlist__start_workload(evlist);
1899 
1900 	trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1901 again:
1902 	before = trace->nr_events;
1903 
1904 	for (i = 0; i < evlist->nr_mmaps; i++) {
1905 		union perf_event *event;
1906 
1907 		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1908 			const u32 type = event->header.type;
1909 			tracepoint_handler handler;
1910 			struct perf_sample sample;
1911 
1912 			++trace->nr_events;
1913 
1914 			err = perf_evlist__parse_sample(evlist, event, &sample);
1915 			if (err) {
1916 				fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1917 				goto next_event;
1918 			}
1919 
1920 			if (!trace->full_time && trace->base_time == 0)
1921 				trace->base_time = sample.time;
1922 
1923 			if (type != PERF_RECORD_SAMPLE) {
1924 				trace__process_event(trace, trace->host, event, &sample);
1925 				continue;
1926 			}
1927 
1928 			evsel = perf_evlist__id2evsel(evlist, sample.id);
1929 			if (evsel == NULL) {
1930 				fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1931 				goto next_event;
1932 			}
1933 
1934 			if (sample.raw_data == NULL) {
1935 				fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1936 				       perf_evsel__name(evsel), sample.tid,
1937 				       sample.cpu, sample.raw_size);
1938 				goto next_event;
1939 			}
1940 
1941 			handler = evsel->handler;
1942 			handler(trace, evsel, &sample);
1943 next_event:
1944 			perf_evlist__mmap_consume(evlist, i);
1945 
1946 			if (interrupted)
1947 				goto out_disable;
1948 		}
1949 	}
1950 
1951 	if (trace->nr_events == before) {
1952 		int timeout = done ? 100 : -1;
1953 
1954 		if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
1955 			goto again;
1956 	} else {
1957 		goto again;
1958 	}
1959 
1960 out_disable:
1961 	perf_evlist__disable(evlist);
1962 
1963 	if (!err) {
1964 		if (trace->summary)
1965 			trace__fprintf_thread_summary(trace, trace->output);
1966 
1967 		if (trace->show_tool_stats) {
1968 			fprintf(trace->output, "Stats:\n "
1969 					       " vfs_getname : %" PRIu64 "\n"
1970 					       " proc_getname: %" PRIu64 "\n",
1971 				trace->stats.vfs_getname,
1972 				trace->stats.proc_getname);
1973 		}
1974 	}
1975 
1976 	perf_evlist__munmap(evlist);
1977 out_close_evlist:
1978 	perf_evlist__close(evlist);
1979 out_delete_maps:
1980 	perf_evlist__delete_maps(evlist);
1981 out_delete_evlist:
1982 	perf_evlist__delete(evlist);
1983 out:
1984 	trace->live = false;
1985 	return err;
1986 {
1987 	char errbuf[BUFSIZ];
1988 
1989 out_error_tp:
1990 	perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
1991 	goto out_error;
1992 
1993 out_error_open:
1994 	perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
1995 
1996 out_error:
1997 	fprintf(trace->output, "%s\n", errbuf);
1998 	goto out_delete_evlist;
1999 }
2000 }
2001 
2002 static int trace__replay(struct trace *trace)
2003 {
2004 	const struct perf_evsel_str_handler handlers[] = {
2005 		{ "probe:vfs_getname",	     trace__vfs_getname, },
2006 	};
2007 	struct perf_data_file file = {
2008 		.path  = input_name,
2009 		.mode  = PERF_DATA_MODE_READ,
2010 	};
2011 	struct perf_session *session;
2012 	struct perf_evsel *evsel;
2013 	int err = -1;
2014 
2015 	trace->tool.sample	  = trace__process_sample;
2016 	trace->tool.mmap	  = perf_event__process_mmap;
2017 	trace->tool.mmap2	  = perf_event__process_mmap2;
2018 	trace->tool.comm	  = perf_event__process_comm;
2019 	trace->tool.exit	  = perf_event__process_exit;
2020 	trace->tool.fork	  = perf_event__process_fork;
2021 	trace->tool.attr	  = perf_event__process_attr;
2022 	trace->tool.tracing_data = perf_event__process_tracing_data;
2023 	trace->tool.build_id	  = perf_event__process_build_id;
2024 
2025 	trace->tool.ordered_samples = true;
2026 	trace->tool.ordering_requires_timestamps = true;
2027 
2028 	/* add tid to output */
2029 	trace->multiple_threads = true;
2030 
2031 	if (symbol__init() < 0)
2032 		return -1;
2033 
2034 	session = perf_session__new(&file, false, &trace->tool);
2035 	if (session == NULL)
2036 		return -ENOMEM;
2037 
2038 	trace->host = &session->machines.host;
2039 
2040 	err = perf_session__set_tracepoints_handlers(session, handlers);
2041 	if (err)
2042 		goto out;
2043 
2044 	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2045 						     "raw_syscalls:sys_enter");
2046 	if (evsel == NULL) {
2047 		pr_err("Data file does not have raw_syscalls:sys_enter event\n");
2048 		goto out;
2049 	}
2050 
2051 	if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2052 	    perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
2053 		pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2054 		goto out;
2055 	}
2056 
2057 	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2058 						     "raw_syscalls:sys_exit");
2059 	if (evsel == NULL) {
2060 		pr_err("Data file does not have raw_syscalls:sys_exit event\n");
2061 		goto out;
2062 	}
2063 
2064 	if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2065 	    perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
2066 		pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2067 		goto out;
2068 	}
2069 
2070 	err = parse_target_str(trace);
2071 	if (err != 0)
2072 		goto out;
2073 
2074 	setup_pager();
2075 
2076 	err = perf_session__process_events(session, &trace->tool);
2077 	if (err)
2078 		pr_err("Failed to process events, error %d", err);
2079 
2080 	else if (trace->summary)
2081 		trace__fprintf_thread_summary(trace, trace->output);
2082 
2083 out:
2084 	perf_session__delete(session);
2085 
2086 	return err;
2087 }
2088 
2089 static size_t trace__fprintf_threads_header(FILE *fp)
2090 {
2091 	size_t printed;
2092 
2093 	printed  = fprintf(fp, "\n Summary of events:\n\n");
2094 
2095 	return printed;
2096 }
2097 
2098 static size_t thread__dump_stats(struct thread_trace *ttrace,
2099 				 struct trace *trace, FILE *fp)
2100 {
2101 	struct stats *stats;
2102 	size_t printed = 0;
2103 	struct syscall *sc;
2104 	struct int_node *inode = intlist__first(ttrace->syscall_stats);
2105 
2106 	if (inode == NULL)
2107 		return 0;
2108 
2109 	printed += fprintf(fp, "\n");
2110 
2111 	printed += fprintf(fp, "                                                    msec/call\n");
2112 	printed += fprintf(fp, "   syscall            calls      min      avg      max stddev\n");
2113 	printed += fprintf(fp, "   --------------- -------- -------- -------- -------- ------\n");
2114 
2115 	/* each int_node is a syscall */
2116 	while (inode) {
2117 		stats = inode->priv;
2118 		if (stats) {
2119 			double min = (double)(stats->min) / NSEC_PER_MSEC;
2120 			double max = (double)(stats->max) / NSEC_PER_MSEC;
2121 			double avg = avg_stats(stats);
2122 			double pct;
2123 			u64 n = (u64) stats->n;
2124 
2125 			pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2126 			avg /= NSEC_PER_MSEC;
2127 
2128 			sc = &trace->syscalls.table[inode->i];
2129 			printed += fprintf(fp, "   %-15s", sc->name);
2130 			printed += fprintf(fp, " %8" PRIu64 " %8.3f %8.3f",
2131 					   n, min, avg);
2132 			printed += fprintf(fp, " %8.3f %6.2f\n", max, pct);
2133 		}
2134 
2135 		inode = intlist__next(inode);
2136 	}
2137 
2138 	printed += fprintf(fp, "\n\n");
2139 
2140 	return printed;
2141 }
2142 
2143 /* struct used to pass data to per-thread function */
2144 struct summary_data {
2145 	FILE *fp;
2146 	struct trace *trace;
2147 	size_t printed;
2148 };
2149 
2150 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2151 {
2152 	struct summary_data *data = priv;
2153 	FILE *fp = data->fp;
2154 	size_t printed = data->printed;
2155 	struct trace *trace = data->trace;
2156 	struct thread_trace *ttrace = thread->priv;
2157 	const char *color;
2158 	double ratio;
2159 
2160 	if (ttrace == NULL)
2161 		return 0;
2162 
2163 	ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2164 
2165 	color = PERF_COLOR_NORMAL;
2166 	if (ratio > 50.0)
2167 		color = PERF_COLOR_RED;
2168 	else if (ratio > 25.0)
2169 		color = PERF_COLOR_GREEN;
2170 	else if (ratio > 5.0)
2171 		color = PERF_COLOR_YELLOW;
2172 
2173 	printed += color_fprintf(fp, color, " %s (%d), ", thread__comm_str(thread), thread->tid);
2174 	printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2175 	printed += color_fprintf(fp, color, "%.1f%%", ratio);
2176 	printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2177 	printed += thread__dump_stats(ttrace, trace, fp);
2178 
2179 	data->printed += printed;
2180 
2181 	return 0;
2182 }
2183 
2184 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2185 {
2186 	struct summary_data data = {
2187 		.fp = fp,
2188 		.trace = trace
2189 	};
2190 	data.printed = trace__fprintf_threads_header(fp);
2191 
2192 	machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2193 
2194 	return data.printed;
2195 }
2196 
2197 static int trace__set_duration(const struct option *opt, const char *str,
2198 			       int unset __maybe_unused)
2199 {
2200 	struct trace *trace = opt->value;
2201 
2202 	trace->duration_filter = atof(str);
2203 	return 0;
2204 }
2205 
2206 static int trace__open_output(struct trace *trace, const char *filename)
2207 {
2208 	struct stat st;
2209 
2210 	if (!stat(filename, &st) && st.st_size) {
2211 		char oldname[PATH_MAX];
2212 
2213 		scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2214 		unlink(oldname);
2215 		rename(filename, oldname);
2216 	}
2217 
2218 	trace->output = fopen(filename, "w");
2219 
2220 	return trace->output == NULL ? -errno : 0;
2221 }
2222 
2223 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2224 {
2225 	const char * const trace_usage[] = {
2226 		"perf trace [<options>] [<command>]",
2227 		"perf trace [<options>] -- <command> [<options>]",
2228 		"perf trace record [<options>] [<command>]",
2229 		"perf trace record [<options>] -- <command> [<options>]",
2230 		NULL
2231 	};
2232 	struct trace trace = {
2233 		.audit = {
2234 			.machine = audit_detect_machine(),
2235 			.open_id = audit_name_to_syscall("open", trace.audit.machine),
2236 		},
2237 		.syscalls = {
2238 			. max = -1,
2239 		},
2240 		.opts = {
2241 			.target = {
2242 				.uid	   = UINT_MAX,
2243 				.uses_mmap = true,
2244 			},
2245 			.user_freq     = UINT_MAX,
2246 			.user_interval = ULLONG_MAX,
2247 			.no_delay      = true,
2248 			.mmap_pages    = 1024,
2249 		},
2250 		.output = stdout,
2251 		.show_comm = true,
2252 	};
2253 	const char *output_name = NULL;
2254 	const char *ev_qualifier_str = NULL;
2255 	const struct option trace_options[] = {
2256 	OPT_BOOLEAN(0, "comm", &trace.show_comm,
2257 		    "show the thread COMM next to its id"),
2258 	OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2259 	OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2260 		    "list of events to trace"),
2261 	OPT_STRING('o', "output", &output_name, "file", "output file name"),
2262 	OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2263 	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2264 		    "trace events on existing process id"),
2265 	OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2266 		    "trace events on existing thread id"),
2267 	OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2268 		    "system-wide collection from all CPUs"),
2269 	OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2270 		    "list of cpus to monitor"),
2271 	OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2272 		    "child tasks do not inherit counters"),
2273 	OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2274 		     "number of mmap data pages",
2275 		     perf_evlist__parse_mmap_pages),
2276 	OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2277 		   "user to profile"),
2278 	OPT_CALLBACK(0, "duration", &trace, "float",
2279 		     "show only events with duration > N.M ms",
2280 		     trace__set_duration),
2281 	OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2282 	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2283 	OPT_BOOLEAN('T', "time", &trace.full_time,
2284 		    "Show full timestamp, not time relative to first start"),
2285 	OPT_BOOLEAN(0, "summary", &trace.summary,
2286 		    "Show syscall summary with statistics"),
2287 	OPT_END()
2288 	};
2289 	int err;
2290 	char bf[BUFSIZ];
2291 
2292 	if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2293 		return trace__record(argc-2, &argv[2]);
2294 
2295 	argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2296 
2297 	if (output_name != NULL) {
2298 		err = trace__open_output(&trace, output_name);
2299 		if (err < 0) {
2300 			perror("failed to create output file");
2301 			goto out;
2302 		}
2303 	}
2304 
2305 	if (ev_qualifier_str != NULL) {
2306 		const char *s = ev_qualifier_str;
2307 
2308 		trace.not_ev_qualifier = *s == '!';
2309 		if (trace.not_ev_qualifier)
2310 			++s;
2311 		trace.ev_qualifier = strlist__new(true, s);
2312 		if (trace.ev_qualifier == NULL) {
2313 			fputs("Not enough memory to parse event qualifier",
2314 			      trace.output);
2315 			err = -ENOMEM;
2316 			goto out_close;
2317 		}
2318 	}
2319 
2320 	err = perf_target__validate(&trace.opts.target);
2321 	if (err) {
2322 		perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2323 		fprintf(trace.output, "%s", bf);
2324 		goto out_close;
2325 	}
2326 
2327 	err = perf_target__parse_uid(&trace.opts.target);
2328 	if (err) {
2329 		perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2330 		fprintf(trace.output, "%s", bf);
2331 		goto out_close;
2332 	}
2333 
2334 	if (!argc && perf_target__none(&trace.opts.target))
2335 		trace.opts.target.system_wide = true;
2336 
2337 	if (input_name)
2338 		err = trace__replay(&trace);
2339 	else
2340 		err = trace__run(&trace, argc, argv);
2341 
2342 out_close:
2343 	if (output_name != NULL)
2344 		fclose(trace.output);
2345 out:
2346 	return err;
2347 }
2348