xref: /linux/tools/perf/builtin-trace.c (revision 1e28fe0a4ff8680d5a0fb84995fd2444dac19cc4)
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
16 
17 #include <libaudit.h>
18 #include <stdlib.h>
19 #include <sys/eventfd.h>
20 #include <sys/mman.h>
21 #include <linux/futex.h>
22 
23 /* For older distros: */
24 #ifndef MAP_STACK
25 # define MAP_STACK		0x20000
26 #endif
27 
28 #ifndef MADV_HWPOISON
29 # define MADV_HWPOISON		100
30 #endif
31 
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE		12
34 #endif
35 
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE	13
38 #endif
39 
40 #ifndef EFD_SEMAPHORE
41 # define EFD_SEMAPHORE		1
42 #endif
43 
44 struct tp_field {
45 	int offset;
46 	union {
47 		u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
48 		void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
49 	};
50 };
51 
52 #define TP_UINT_FIELD(bits) \
53 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
54 { \
55 	return *(u##bits *)(sample->raw_data + field->offset); \
56 }
57 
58 TP_UINT_FIELD(8);
59 TP_UINT_FIELD(16);
60 TP_UINT_FIELD(32);
61 TP_UINT_FIELD(64);
62 
63 #define TP_UINT_FIELD__SWAPPED(bits) \
64 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
65 { \
66 	u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
67 	return bswap_##bits(value);\
68 }
69 
70 TP_UINT_FIELD__SWAPPED(16);
71 TP_UINT_FIELD__SWAPPED(32);
72 TP_UINT_FIELD__SWAPPED(64);
73 
74 static int tp_field__init_uint(struct tp_field *field,
75 			       struct format_field *format_field,
76 			       bool needs_swap)
77 {
78 	field->offset = format_field->offset;
79 
80 	switch (format_field->size) {
81 	case 1:
82 		field->integer = tp_field__u8;
83 		break;
84 	case 2:
85 		field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
86 		break;
87 	case 4:
88 		field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
89 		break;
90 	case 8:
91 		field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
92 		break;
93 	default:
94 		return -1;
95 	}
96 
97 	return 0;
98 }
99 
100 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
101 {
102 	return sample->raw_data + field->offset;
103 }
104 
105 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
106 {
107 	field->offset = format_field->offset;
108 	field->pointer = tp_field__ptr;
109 	return 0;
110 }
111 
112 struct syscall_tp {
113 	struct tp_field id;
114 	union {
115 		struct tp_field args, ret;
116 	};
117 };
118 
119 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
120 					  struct tp_field *field,
121 					  const char *name)
122 {
123 	struct format_field *format_field = perf_evsel__field(evsel, name);
124 
125 	if (format_field == NULL)
126 		return -1;
127 
128 	return tp_field__init_uint(field, format_field, evsel->needs_swap);
129 }
130 
131 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
132 	({ struct syscall_tp *sc = evsel->priv;\
133 	   perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
134 
135 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
136 					 struct tp_field *field,
137 					 const char *name)
138 {
139 	struct format_field *format_field = perf_evsel__field(evsel, name);
140 
141 	if (format_field == NULL)
142 		return -1;
143 
144 	return tp_field__init_ptr(field, format_field);
145 }
146 
147 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
148 	({ struct syscall_tp *sc = evsel->priv;\
149 	   perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
150 
151 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
152 {
153 	zfree(&evsel->priv);
154 	perf_evsel__delete(evsel);
155 }
156 
157 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
158 {
159 	evsel->priv = malloc(sizeof(struct syscall_tp));
160 	if (evsel->priv != NULL) {
161 		if (perf_evsel__init_sc_tp_uint_field(evsel, id))
162 			goto out_delete;
163 
164 		evsel->handler = handler;
165 		return 0;
166 	}
167 
168 	return -ENOMEM;
169 
170 out_delete:
171 	zfree(&evsel->priv);
172 	return -ENOENT;
173 }
174 
175 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
176 {
177 	struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
178 
179 	/* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
180 	if (evsel == NULL)
181 		evsel = perf_evsel__newtp("syscalls", direction);
182 
183 	if (evsel) {
184 		if (perf_evsel__init_syscall_tp(evsel, handler))
185 			goto out_delete;
186 	}
187 
188 	return evsel;
189 
190 out_delete:
191 	perf_evsel__delete_priv(evsel);
192 	return NULL;
193 }
194 
195 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
196 	({ struct syscall_tp *fields = evsel->priv; \
197 	   fields->name.integer(&fields->name, sample); })
198 
199 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
200 	({ struct syscall_tp *fields = evsel->priv; \
201 	   fields->name.pointer(&fields->name, sample); })
202 
203 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
204 					  void *sys_enter_handler,
205 					  void *sys_exit_handler)
206 {
207 	int ret = -1;
208 	struct perf_evsel *sys_enter, *sys_exit;
209 
210 	sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
211 	if (sys_enter == NULL)
212 		goto out;
213 
214 	if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
215 		goto out_delete_sys_enter;
216 
217 	sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
218 	if (sys_exit == NULL)
219 		goto out_delete_sys_enter;
220 
221 	if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
222 		goto out_delete_sys_exit;
223 
224 	perf_evlist__add(evlist, sys_enter);
225 	perf_evlist__add(evlist, sys_exit);
226 
227 	ret = 0;
228 out:
229 	return ret;
230 
231 out_delete_sys_exit:
232 	perf_evsel__delete_priv(sys_exit);
233 out_delete_sys_enter:
234 	perf_evsel__delete_priv(sys_enter);
235 	goto out;
236 }
237 
238 
239 struct syscall_arg {
240 	unsigned long val;
241 	struct thread *thread;
242 	struct trace  *trace;
243 	void	      *parm;
244 	u8	      idx;
245 	u8	      mask;
246 };
247 
248 struct strarray {
249 	int	    offset;
250 	int	    nr_entries;
251 	const char **entries;
252 };
253 
254 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
255 	.nr_entries = ARRAY_SIZE(array), \
256 	.entries = array, \
257 }
258 
259 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
260 	.offset	    = off, \
261 	.nr_entries = ARRAY_SIZE(array), \
262 	.entries = array, \
263 }
264 
265 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
266 						const char *intfmt,
267 					        struct syscall_arg *arg)
268 {
269 	struct strarray *sa = arg->parm;
270 	int idx = arg->val - sa->offset;
271 
272 	if (idx < 0 || idx >= sa->nr_entries)
273 		return scnprintf(bf, size, intfmt, arg->val);
274 
275 	return scnprintf(bf, size, "%s", sa->entries[idx]);
276 }
277 
278 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
279 					      struct syscall_arg *arg)
280 {
281 	return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
282 }
283 
284 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
285 
286 #if defined(__i386__) || defined(__x86_64__)
287 /*
288  * FIXME: Make this available to all arches as soon as the ioctl beautifier
289  * 	  gets rewritten to support all arches.
290  */
291 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
292 						 struct syscall_arg *arg)
293 {
294 	return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
295 }
296 
297 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
298 #endif /* defined(__i386__) || defined(__x86_64__) */
299 
300 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
301 					struct syscall_arg *arg);
302 
303 #define SCA_FD syscall_arg__scnprintf_fd
304 
305 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
306 					   struct syscall_arg *arg)
307 {
308 	int fd = arg->val;
309 
310 	if (fd == AT_FDCWD)
311 		return scnprintf(bf, size, "CWD");
312 
313 	return syscall_arg__scnprintf_fd(bf, size, arg);
314 }
315 
316 #define SCA_FDAT syscall_arg__scnprintf_fd_at
317 
318 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
319 					      struct syscall_arg *arg);
320 
321 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
322 
323 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
324 					 struct syscall_arg *arg)
325 {
326 	return scnprintf(bf, size, "%#lx", arg->val);
327 }
328 
329 #define SCA_HEX syscall_arg__scnprintf_hex
330 
331 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
332 					       struct syscall_arg *arg)
333 {
334 	int printed = 0, prot = arg->val;
335 
336 	if (prot == PROT_NONE)
337 		return scnprintf(bf, size, "NONE");
338 #define	P_MMAP_PROT(n) \
339 	if (prot & PROT_##n) { \
340 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
341 		prot &= ~PROT_##n; \
342 	}
343 
344 	P_MMAP_PROT(EXEC);
345 	P_MMAP_PROT(READ);
346 	P_MMAP_PROT(WRITE);
347 #ifdef PROT_SEM
348 	P_MMAP_PROT(SEM);
349 #endif
350 	P_MMAP_PROT(GROWSDOWN);
351 	P_MMAP_PROT(GROWSUP);
352 #undef P_MMAP_PROT
353 
354 	if (prot)
355 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
356 
357 	return printed;
358 }
359 
360 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
361 
362 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
363 						struct syscall_arg *arg)
364 {
365 	int printed = 0, flags = arg->val;
366 
367 #define	P_MMAP_FLAG(n) \
368 	if (flags & MAP_##n) { \
369 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
370 		flags &= ~MAP_##n; \
371 	}
372 
373 	P_MMAP_FLAG(SHARED);
374 	P_MMAP_FLAG(PRIVATE);
375 #ifdef MAP_32BIT
376 	P_MMAP_FLAG(32BIT);
377 #endif
378 	P_MMAP_FLAG(ANONYMOUS);
379 	P_MMAP_FLAG(DENYWRITE);
380 	P_MMAP_FLAG(EXECUTABLE);
381 	P_MMAP_FLAG(FILE);
382 	P_MMAP_FLAG(FIXED);
383 	P_MMAP_FLAG(GROWSDOWN);
384 #ifdef MAP_HUGETLB
385 	P_MMAP_FLAG(HUGETLB);
386 #endif
387 	P_MMAP_FLAG(LOCKED);
388 	P_MMAP_FLAG(NONBLOCK);
389 	P_MMAP_FLAG(NORESERVE);
390 	P_MMAP_FLAG(POPULATE);
391 	P_MMAP_FLAG(STACK);
392 #ifdef MAP_UNINITIALIZED
393 	P_MMAP_FLAG(UNINITIALIZED);
394 #endif
395 #undef P_MMAP_FLAG
396 
397 	if (flags)
398 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
399 
400 	return printed;
401 }
402 
403 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
404 
405 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
406 						      struct syscall_arg *arg)
407 {
408 	int behavior = arg->val;
409 
410 	switch (behavior) {
411 #define	P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
412 	P_MADV_BHV(NORMAL);
413 	P_MADV_BHV(RANDOM);
414 	P_MADV_BHV(SEQUENTIAL);
415 	P_MADV_BHV(WILLNEED);
416 	P_MADV_BHV(DONTNEED);
417 	P_MADV_BHV(REMOVE);
418 	P_MADV_BHV(DONTFORK);
419 	P_MADV_BHV(DOFORK);
420 	P_MADV_BHV(HWPOISON);
421 #ifdef MADV_SOFT_OFFLINE
422 	P_MADV_BHV(SOFT_OFFLINE);
423 #endif
424 	P_MADV_BHV(MERGEABLE);
425 	P_MADV_BHV(UNMERGEABLE);
426 #ifdef MADV_HUGEPAGE
427 	P_MADV_BHV(HUGEPAGE);
428 #endif
429 #ifdef MADV_NOHUGEPAGE
430 	P_MADV_BHV(NOHUGEPAGE);
431 #endif
432 #ifdef MADV_DONTDUMP
433 	P_MADV_BHV(DONTDUMP);
434 #endif
435 #ifdef MADV_DODUMP
436 	P_MADV_BHV(DODUMP);
437 #endif
438 #undef P_MADV_PHV
439 	default: break;
440 	}
441 
442 	return scnprintf(bf, size, "%#x", behavior);
443 }
444 
445 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
446 
447 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
448 					   struct syscall_arg *arg)
449 {
450 	int printed = 0, op = arg->val;
451 
452 	if (op == 0)
453 		return scnprintf(bf, size, "NONE");
454 #define	P_CMD(cmd) \
455 	if ((op & LOCK_##cmd) == LOCK_##cmd) { \
456 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
457 		op &= ~LOCK_##cmd; \
458 	}
459 
460 	P_CMD(SH);
461 	P_CMD(EX);
462 	P_CMD(NB);
463 	P_CMD(UN);
464 	P_CMD(MAND);
465 	P_CMD(RW);
466 	P_CMD(READ);
467 	P_CMD(WRITE);
468 #undef P_OP
469 
470 	if (op)
471 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
472 
473 	return printed;
474 }
475 
476 #define SCA_FLOCK syscall_arg__scnprintf_flock
477 
478 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
479 {
480 	enum syscall_futex_args {
481 		SCF_UADDR   = (1 << 0),
482 		SCF_OP	    = (1 << 1),
483 		SCF_VAL	    = (1 << 2),
484 		SCF_TIMEOUT = (1 << 3),
485 		SCF_UADDR2  = (1 << 4),
486 		SCF_VAL3    = (1 << 5),
487 	};
488 	int op = arg->val;
489 	int cmd = op & FUTEX_CMD_MASK;
490 	size_t printed = 0;
491 
492 	switch (cmd) {
493 #define	P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
494 	P_FUTEX_OP(WAIT);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
495 	P_FUTEX_OP(WAKE);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
496 	P_FUTEX_OP(FD);		    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
497 	P_FUTEX_OP(REQUEUE);	    arg->mask |= SCF_VAL3|SCF_TIMEOUT;	          break;
498 	P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;			  break;
499 	P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;			  break;
500 	P_FUTEX_OP(WAKE_OP);							  break;
501 	P_FUTEX_OP(LOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
502 	P_FUTEX_OP(UNLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
503 	P_FUTEX_OP(TRYLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
504 	P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;			  break;
505 	P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;			  break;
506 	P_FUTEX_OP(WAIT_REQUEUE_PI);						  break;
507 	default: printed = scnprintf(bf, size, "%#x", cmd);			  break;
508 	}
509 
510 	if (op & FUTEX_PRIVATE_FLAG)
511 		printed += scnprintf(bf + printed, size - printed, "|PRIV");
512 
513 	if (op & FUTEX_CLOCK_REALTIME)
514 		printed += scnprintf(bf + printed, size - printed, "|CLKRT");
515 
516 	return printed;
517 }
518 
519 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
520 
521 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
522 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
523 
524 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
525 static DEFINE_STRARRAY(itimers);
526 
527 static const char *whences[] = { "SET", "CUR", "END",
528 #ifdef SEEK_DATA
529 "DATA",
530 #endif
531 #ifdef SEEK_HOLE
532 "HOLE",
533 #endif
534 };
535 static DEFINE_STRARRAY(whences);
536 
537 static const char *fcntl_cmds[] = {
538 	"DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
539 	"SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
540 	"F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
541 	"F_GETOWNER_UIDS",
542 };
543 static DEFINE_STRARRAY(fcntl_cmds);
544 
545 static const char *rlimit_resources[] = {
546 	"CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
547 	"MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
548 	"RTTIME",
549 };
550 static DEFINE_STRARRAY(rlimit_resources);
551 
552 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
553 static DEFINE_STRARRAY(sighow);
554 
555 static const char *clockid[] = {
556 	"REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
557 	"MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
558 };
559 static DEFINE_STRARRAY(clockid);
560 
561 static const char *socket_families[] = {
562 	"UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
563 	"BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
564 	"SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
565 	"RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
566 	"BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
567 	"ALG", "NFC", "VSOCK",
568 };
569 static DEFINE_STRARRAY(socket_families);
570 
571 #ifndef SOCK_TYPE_MASK
572 #define SOCK_TYPE_MASK 0xf
573 #endif
574 
575 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
576 						      struct syscall_arg *arg)
577 {
578 	size_t printed;
579 	int type = arg->val,
580 	    flags = type & ~SOCK_TYPE_MASK;
581 
582 	type &= SOCK_TYPE_MASK;
583 	/*
584  	 * Can't use a strarray, MIPS may override for ABI reasons.
585  	 */
586 	switch (type) {
587 #define	P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
588 	P_SK_TYPE(STREAM);
589 	P_SK_TYPE(DGRAM);
590 	P_SK_TYPE(RAW);
591 	P_SK_TYPE(RDM);
592 	P_SK_TYPE(SEQPACKET);
593 	P_SK_TYPE(DCCP);
594 	P_SK_TYPE(PACKET);
595 #undef P_SK_TYPE
596 	default:
597 		printed = scnprintf(bf, size, "%#x", type);
598 	}
599 
600 #define	P_SK_FLAG(n) \
601 	if (flags & SOCK_##n) { \
602 		printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
603 		flags &= ~SOCK_##n; \
604 	}
605 
606 	P_SK_FLAG(CLOEXEC);
607 	P_SK_FLAG(NONBLOCK);
608 #undef P_SK_FLAG
609 
610 	if (flags)
611 		printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
612 
613 	return printed;
614 }
615 
616 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
617 
618 #ifndef MSG_PROBE
619 #define MSG_PROBE	     0x10
620 #endif
621 #ifndef MSG_WAITFORONE
622 #define MSG_WAITFORONE	0x10000
623 #endif
624 #ifndef MSG_SENDPAGE_NOTLAST
625 #define MSG_SENDPAGE_NOTLAST 0x20000
626 #endif
627 #ifndef MSG_FASTOPEN
628 #define MSG_FASTOPEN	     0x20000000
629 #endif
630 
631 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
632 					       struct syscall_arg *arg)
633 {
634 	int printed = 0, flags = arg->val;
635 
636 	if (flags == 0)
637 		return scnprintf(bf, size, "NONE");
638 #define	P_MSG_FLAG(n) \
639 	if (flags & MSG_##n) { \
640 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
641 		flags &= ~MSG_##n; \
642 	}
643 
644 	P_MSG_FLAG(OOB);
645 	P_MSG_FLAG(PEEK);
646 	P_MSG_FLAG(DONTROUTE);
647 	P_MSG_FLAG(TRYHARD);
648 	P_MSG_FLAG(CTRUNC);
649 	P_MSG_FLAG(PROBE);
650 	P_MSG_FLAG(TRUNC);
651 	P_MSG_FLAG(DONTWAIT);
652 	P_MSG_FLAG(EOR);
653 	P_MSG_FLAG(WAITALL);
654 	P_MSG_FLAG(FIN);
655 	P_MSG_FLAG(SYN);
656 	P_MSG_FLAG(CONFIRM);
657 	P_MSG_FLAG(RST);
658 	P_MSG_FLAG(ERRQUEUE);
659 	P_MSG_FLAG(NOSIGNAL);
660 	P_MSG_FLAG(MORE);
661 	P_MSG_FLAG(WAITFORONE);
662 	P_MSG_FLAG(SENDPAGE_NOTLAST);
663 	P_MSG_FLAG(FASTOPEN);
664 	P_MSG_FLAG(CMSG_CLOEXEC);
665 #undef P_MSG_FLAG
666 
667 	if (flags)
668 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
669 
670 	return printed;
671 }
672 
673 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
674 
675 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
676 						 struct syscall_arg *arg)
677 {
678 	size_t printed = 0;
679 	int mode = arg->val;
680 
681 	if (mode == F_OK) /* 0 */
682 		return scnprintf(bf, size, "F");
683 #define	P_MODE(n) \
684 	if (mode & n##_OK) { \
685 		printed += scnprintf(bf + printed, size - printed, "%s", #n); \
686 		mode &= ~n##_OK; \
687 	}
688 
689 	P_MODE(R);
690 	P_MODE(W);
691 	P_MODE(X);
692 #undef P_MODE
693 
694 	if (mode)
695 		printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
696 
697 	return printed;
698 }
699 
700 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
701 
702 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
703 					       struct syscall_arg *arg)
704 {
705 	int printed = 0, flags = arg->val;
706 
707 	if (!(flags & O_CREAT))
708 		arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
709 
710 	if (flags == 0)
711 		return scnprintf(bf, size, "RDONLY");
712 #define	P_FLAG(n) \
713 	if (flags & O_##n) { \
714 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
715 		flags &= ~O_##n; \
716 	}
717 
718 	P_FLAG(APPEND);
719 	P_FLAG(ASYNC);
720 	P_FLAG(CLOEXEC);
721 	P_FLAG(CREAT);
722 	P_FLAG(DIRECT);
723 	P_FLAG(DIRECTORY);
724 	P_FLAG(EXCL);
725 	P_FLAG(LARGEFILE);
726 	P_FLAG(NOATIME);
727 	P_FLAG(NOCTTY);
728 #ifdef O_NONBLOCK
729 	P_FLAG(NONBLOCK);
730 #elif O_NDELAY
731 	P_FLAG(NDELAY);
732 #endif
733 #ifdef O_PATH
734 	P_FLAG(PATH);
735 #endif
736 	P_FLAG(RDWR);
737 #ifdef O_DSYNC
738 	if ((flags & O_SYNC) == O_SYNC)
739 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
740 	else {
741 		P_FLAG(DSYNC);
742 	}
743 #else
744 	P_FLAG(SYNC);
745 #endif
746 	P_FLAG(TRUNC);
747 	P_FLAG(WRONLY);
748 #undef P_FLAG
749 
750 	if (flags)
751 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
752 
753 	return printed;
754 }
755 
756 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
757 
758 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
759 						   struct syscall_arg *arg)
760 {
761 	int printed = 0, flags = arg->val;
762 
763 	if (flags == 0)
764 		return scnprintf(bf, size, "NONE");
765 #define	P_FLAG(n) \
766 	if (flags & EFD_##n) { \
767 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
768 		flags &= ~EFD_##n; \
769 	}
770 
771 	P_FLAG(SEMAPHORE);
772 	P_FLAG(CLOEXEC);
773 	P_FLAG(NONBLOCK);
774 #undef P_FLAG
775 
776 	if (flags)
777 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
778 
779 	return printed;
780 }
781 
782 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
783 
784 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
785 						struct syscall_arg *arg)
786 {
787 	int printed = 0, flags = arg->val;
788 
789 #define	P_FLAG(n) \
790 	if (flags & O_##n) { \
791 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
792 		flags &= ~O_##n; \
793 	}
794 
795 	P_FLAG(CLOEXEC);
796 	P_FLAG(NONBLOCK);
797 #undef P_FLAG
798 
799 	if (flags)
800 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
801 
802 	return printed;
803 }
804 
805 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
806 
807 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
808 {
809 	int sig = arg->val;
810 
811 	switch (sig) {
812 #define	P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
813 	P_SIGNUM(HUP);
814 	P_SIGNUM(INT);
815 	P_SIGNUM(QUIT);
816 	P_SIGNUM(ILL);
817 	P_SIGNUM(TRAP);
818 	P_SIGNUM(ABRT);
819 	P_SIGNUM(BUS);
820 	P_SIGNUM(FPE);
821 	P_SIGNUM(KILL);
822 	P_SIGNUM(USR1);
823 	P_SIGNUM(SEGV);
824 	P_SIGNUM(USR2);
825 	P_SIGNUM(PIPE);
826 	P_SIGNUM(ALRM);
827 	P_SIGNUM(TERM);
828 	P_SIGNUM(CHLD);
829 	P_SIGNUM(CONT);
830 	P_SIGNUM(STOP);
831 	P_SIGNUM(TSTP);
832 	P_SIGNUM(TTIN);
833 	P_SIGNUM(TTOU);
834 	P_SIGNUM(URG);
835 	P_SIGNUM(XCPU);
836 	P_SIGNUM(XFSZ);
837 	P_SIGNUM(VTALRM);
838 	P_SIGNUM(PROF);
839 	P_SIGNUM(WINCH);
840 	P_SIGNUM(IO);
841 	P_SIGNUM(PWR);
842 	P_SIGNUM(SYS);
843 #ifdef SIGEMT
844 	P_SIGNUM(EMT);
845 #endif
846 #ifdef SIGSTKFLT
847 	P_SIGNUM(STKFLT);
848 #endif
849 #ifdef SIGSWI
850 	P_SIGNUM(SWI);
851 #endif
852 	default: break;
853 	}
854 
855 	return scnprintf(bf, size, "%#x", sig);
856 }
857 
858 #define SCA_SIGNUM syscall_arg__scnprintf_signum
859 
860 #if defined(__i386__) || defined(__x86_64__)
861 /*
862  * FIXME: Make this available to all arches.
863  */
864 #define TCGETS		0x5401
865 
866 static const char *tioctls[] = {
867 	"TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
868 	"TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
869 	"TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
870 	"TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
871 	"TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
872 	"TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
873 	"TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
874 	"TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
875 	"TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
876 	"TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
877 	"TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
878 	[0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
879 	"TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
880 	"TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
881 	"TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
882 };
883 
884 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
885 #endif /* defined(__i386__) || defined(__x86_64__) */
886 
887 #define STRARRAY(arg, name, array) \
888 	  .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
889 	  .arg_parm	 = { [arg] = &strarray__##array, }
890 
891 static struct syscall_fmt {
892 	const char *name;
893 	const char *alias;
894 	size_t	   (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
895 	void	   *arg_parm[6];
896 	bool	   errmsg;
897 	bool	   timeout;
898 	bool	   hexret;
899 } syscall_fmts[] = {
900 	{ .name	    = "access",	    .errmsg = true,
901 	  .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
902 	{ .name	    = "arch_prctl", .errmsg = true, .alias = "prctl", },
903 	{ .name	    = "brk",	    .hexret = true,
904 	  .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
905 	{ .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
906 	{ .name	    = "close",	    .errmsg = true,
907 	  .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
908 	{ .name	    = "connect",    .errmsg = true, },
909 	{ .name	    = "dup",	    .errmsg = true,
910 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
911 	{ .name	    = "dup2",	    .errmsg = true,
912 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
913 	{ .name	    = "dup3",	    .errmsg = true,
914 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
915 	{ .name	    = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
916 	{ .name	    = "eventfd2",   .errmsg = true,
917 	  .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
918 	{ .name	    = "faccessat",  .errmsg = true,
919 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
920 	{ .name	    = "fadvise64",  .errmsg = true,
921 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
922 	{ .name	    = "fallocate",  .errmsg = true,
923 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
924 	{ .name	    = "fchdir",	    .errmsg = true,
925 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
926 	{ .name	    = "fchmod",	    .errmsg = true,
927 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
928 	{ .name	    = "fchmodat",   .errmsg = true,
929 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
930 	{ .name	    = "fchown",	    .errmsg = true,
931 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
932 	{ .name	    = "fchownat",   .errmsg = true,
933 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
934 	{ .name	    = "fcntl",	    .errmsg = true,
935 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
936 			     [1] = SCA_STRARRAY, /* cmd */ },
937 	  .arg_parm	 = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
938 	{ .name	    = "fdatasync",  .errmsg = true,
939 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
940 	{ .name	    = "flock",	    .errmsg = true,
941 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
942 			     [1] = SCA_FLOCK, /* cmd */ }, },
943 	{ .name	    = "fsetxattr",  .errmsg = true,
944 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
945 	{ .name	    = "fstat",	    .errmsg = true, .alias = "newfstat",
946 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
947 	{ .name	    = "fstatat",    .errmsg = true, .alias = "newfstatat",
948 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
949 	{ .name	    = "fstatfs",    .errmsg = true,
950 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
951 	{ .name	    = "fsync",    .errmsg = true,
952 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
953 	{ .name	    = "ftruncate", .errmsg = true,
954 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
955 	{ .name	    = "futex",	    .errmsg = true,
956 	  .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
957 	{ .name	    = "futimesat", .errmsg = true,
958 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
959 	{ .name	    = "getdents",   .errmsg = true,
960 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
961 	{ .name	    = "getdents64", .errmsg = true,
962 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
963 	{ .name	    = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
964 	{ .name	    = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
965 	{ .name	    = "ioctl",	    .errmsg = true,
966 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
967 #if defined(__i386__) || defined(__x86_64__)
968 /*
969  * FIXME: Make this available to all arches.
970  */
971 			     [1] = SCA_STRHEXARRAY, /* cmd */
972 			     [2] = SCA_HEX, /* arg */ },
973 	  .arg_parm	 = { [1] = &strarray__tioctls, /* cmd */ }, },
974 #else
975 			     [2] = SCA_HEX, /* arg */ }, },
976 #endif
977 	{ .name	    = "kill",	    .errmsg = true,
978 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
979 	{ .name	    = "linkat",	    .errmsg = true,
980 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
981 	{ .name	    = "lseek",	    .errmsg = true,
982 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
983 			     [2] = SCA_STRARRAY, /* whence */ },
984 	  .arg_parm	 = { [2] = &strarray__whences, /* whence */ }, },
985 	{ .name	    = "lstat",	    .errmsg = true, .alias = "newlstat", },
986 	{ .name     = "madvise",    .errmsg = true,
987 	  .arg_scnprintf = { [0] = SCA_HEX,	 /* start */
988 			     [2] = SCA_MADV_BHV, /* behavior */ }, },
989 	{ .name	    = "mkdirat",    .errmsg = true,
990 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
991 	{ .name	    = "mknodat",    .errmsg = true,
992 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
993 	{ .name	    = "mlock",	    .errmsg = true,
994 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
995 	{ .name	    = "mlockall",   .errmsg = true,
996 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
997 	{ .name	    = "mmap",	    .hexret = true,
998 	  .arg_scnprintf = { [0] = SCA_HEX,	  /* addr */
999 			     [2] = SCA_MMAP_PROT, /* prot */
1000 			     [3] = SCA_MMAP_FLAGS, /* flags */
1001 			     [4] = SCA_FD, 	  /* fd */ }, },
1002 	{ .name	    = "mprotect",   .errmsg = true,
1003 	  .arg_scnprintf = { [0] = SCA_HEX, /* start */
1004 			     [2] = SCA_MMAP_PROT, /* prot */ }, },
1005 	{ .name	    = "mremap",	    .hexret = true,
1006 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1007 			     [4] = SCA_HEX, /* new_addr */ }, },
1008 	{ .name	    = "munlock",    .errmsg = true,
1009 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1010 	{ .name	    = "munmap",	    .errmsg = true,
1011 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1012 	{ .name	    = "name_to_handle_at", .errmsg = true,
1013 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1014 	{ .name	    = "newfstatat", .errmsg = true,
1015 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1016 	{ .name	    = "open",	    .errmsg = true,
1017 	  .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1018 	{ .name	    = "open_by_handle_at", .errmsg = true,
1019 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1020 			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1021 	{ .name	    = "openat",	    .errmsg = true,
1022 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1023 			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1024 	{ .name	    = "pipe2",	    .errmsg = true,
1025 	  .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1026 	{ .name	    = "poll",	    .errmsg = true, .timeout = true, },
1027 	{ .name	    = "ppoll",	    .errmsg = true, .timeout = true, },
1028 	{ .name	    = "pread",	    .errmsg = true, .alias = "pread64",
1029 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1030 	{ .name	    = "preadv",	    .errmsg = true, .alias = "pread",
1031 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1032 	{ .name	    = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1033 	{ .name	    = "pwrite",	    .errmsg = true, .alias = "pwrite64",
1034 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1035 	{ .name	    = "pwritev",    .errmsg = true,
1036 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1037 	{ .name	    = "read",	    .errmsg = true,
1038 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1039 	{ .name	    = "readlinkat", .errmsg = true,
1040 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1041 	{ .name	    = "readv",	    .errmsg = true,
1042 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1043 	{ .name	    = "recvfrom",   .errmsg = true,
1044 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1045 	{ .name	    = "recvmmsg",   .errmsg = true,
1046 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1047 	{ .name	    = "recvmsg",    .errmsg = true,
1048 	  .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1049 	{ .name	    = "renameat",   .errmsg = true,
1050 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1051 	{ .name	    = "rt_sigaction", .errmsg = true,
1052 	  .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1053 	{ .name	    = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1054 	{ .name	    = "rt_sigqueueinfo", .errmsg = true,
1055 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1056 	{ .name	    = "rt_tgsigqueueinfo", .errmsg = true,
1057 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1058 	{ .name	    = "select",	    .errmsg = true, .timeout = true, },
1059 	{ .name	    = "sendmmsg",    .errmsg = true,
1060 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1061 	{ .name	    = "sendmsg",    .errmsg = true,
1062 	  .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1063 	{ .name	    = "sendto",	    .errmsg = true,
1064 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1065 	{ .name	    = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1066 	{ .name	    = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1067 	{ .name	    = "shutdown",   .errmsg = true,
1068 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1069 	{ .name	    = "socket",	    .errmsg = true,
1070 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1071 			     [1] = SCA_SK_TYPE, /* type */ },
1072 	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
1073 	{ .name	    = "socketpair", .errmsg = true,
1074 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1075 			     [1] = SCA_SK_TYPE, /* type */ },
1076 	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
1077 	{ .name	    = "stat",	    .errmsg = true, .alias = "newstat", },
1078 	{ .name	    = "symlinkat",  .errmsg = true,
1079 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1080 	{ .name	    = "tgkill",	    .errmsg = true,
1081 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1082 	{ .name	    = "tkill",	    .errmsg = true,
1083 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1084 	{ .name	    = "uname",	    .errmsg = true, .alias = "newuname", },
1085 	{ .name	    = "unlinkat",   .errmsg = true,
1086 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1087 	{ .name	    = "utimensat",  .errmsg = true,
1088 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1089 	{ .name	    = "write",	    .errmsg = true,
1090 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1091 	{ .name	    = "writev",	    .errmsg = true,
1092 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1093 };
1094 
1095 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1096 {
1097 	const struct syscall_fmt *fmt = fmtp;
1098 	return strcmp(name, fmt->name);
1099 }
1100 
1101 static struct syscall_fmt *syscall_fmt__find(const char *name)
1102 {
1103 	const int nmemb = ARRAY_SIZE(syscall_fmts);
1104 	return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1105 }
1106 
1107 struct syscall {
1108 	struct event_format *tp_format;
1109 	const char	    *name;
1110 	bool		    filtered;
1111 	bool		    is_exit;
1112 	struct syscall_fmt  *fmt;
1113 	size_t		    (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1114 	void		    **arg_parm;
1115 };
1116 
1117 static size_t fprintf_duration(unsigned long t, FILE *fp)
1118 {
1119 	double duration = (double)t / NSEC_PER_MSEC;
1120 	size_t printed = fprintf(fp, "(");
1121 
1122 	if (duration >= 1.0)
1123 		printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1124 	else if (duration >= 0.01)
1125 		printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1126 	else
1127 		printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1128 	return printed + fprintf(fp, "): ");
1129 }
1130 
1131 struct thread_trace {
1132 	u64		  entry_time;
1133 	u64		  exit_time;
1134 	bool		  entry_pending;
1135 	unsigned long	  nr_events;
1136 	char		  *entry_str;
1137 	double		  runtime_ms;
1138 	struct {
1139 		int	  max;
1140 		char	  **table;
1141 	} paths;
1142 
1143 	struct intlist *syscall_stats;
1144 };
1145 
1146 static struct thread_trace *thread_trace__new(void)
1147 {
1148 	struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1149 
1150 	if (ttrace)
1151 		ttrace->paths.max = -1;
1152 
1153 	ttrace->syscall_stats = intlist__new(NULL);
1154 
1155 	return ttrace;
1156 }
1157 
1158 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1159 {
1160 	struct thread_trace *ttrace;
1161 
1162 	if (thread == NULL)
1163 		goto fail;
1164 
1165 	if (thread->priv == NULL)
1166 		thread->priv = thread_trace__new();
1167 
1168 	if (thread->priv == NULL)
1169 		goto fail;
1170 
1171 	ttrace = thread->priv;
1172 	++ttrace->nr_events;
1173 
1174 	return ttrace;
1175 fail:
1176 	color_fprintf(fp, PERF_COLOR_RED,
1177 		      "WARNING: not enough memory, dropping samples!\n");
1178 	return NULL;
1179 }
1180 
1181 #define TRACE_PFMAJ		(1 << 0)
1182 #define TRACE_PFMIN		(1 << 1)
1183 
1184 struct trace {
1185 	struct perf_tool	tool;
1186 	struct {
1187 		int		machine;
1188 		int		open_id;
1189 	}			audit;
1190 	struct {
1191 		int		max;
1192 		struct syscall  *table;
1193 	} syscalls;
1194 	struct record_opts	opts;
1195 	struct machine		*host;
1196 	u64			base_time;
1197 	FILE			*output;
1198 	unsigned long		nr_events;
1199 	struct strlist		*ev_qualifier;
1200 	const char 		*last_vfs_getname;
1201 	struct intlist		*tid_list;
1202 	struct intlist		*pid_list;
1203 	double			duration_filter;
1204 	double			runtime_ms;
1205 	struct {
1206 		u64		vfs_getname,
1207 				proc_getname;
1208 	} stats;
1209 	bool			not_ev_qualifier;
1210 	bool			live;
1211 	bool			full_time;
1212 	bool			sched;
1213 	bool			multiple_threads;
1214 	bool			summary;
1215 	bool			summary_only;
1216 	bool			show_comm;
1217 	bool			show_tool_stats;
1218 	int			trace_pgfaults;
1219 };
1220 
1221 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1222 {
1223 	struct thread_trace *ttrace = thread->priv;
1224 
1225 	if (fd > ttrace->paths.max) {
1226 		char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1227 
1228 		if (npath == NULL)
1229 			return -1;
1230 
1231 		if (ttrace->paths.max != -1) {
1232 			memset(npath + ttrace->paths.max + 1, 0,
1233 			       (fd - ttrace->paths.max) * sizeof(char *));
1234 		} else {
1235 			memset(npath, 0, (fd + 1) * sizeof(char *));
1236 		}
1237 
1238 		ttrace->paths.table = npath;
1239 		ttrace->paths.max   = fd;
1240 	}
1241 
1242 	ttrace->paths.table[fd] = strdup(pathname);
1243 
1244 	return ttrace->paths.table[fd] != NULL ? 0 : -1;
1245 }
1246 
1247 static int thread__read_fd_path(struct thread *thread, int fd)
1248 {
1249 	char linkname[PATH_MAX], pathname[PATH_MAX];
1250 	struct stat st;
1251 	int ret;
1252 
1253 	if (thread->pid_ == thread->tid) {
1254 		scnprintf(linkname, sizeof(linkname),
1255 			  "/proc/%d/fd/%d", thread->pid_, fd);
1256 	} else {
1257 		scnprintf(linkname, sizeof(linkname),
1258 			  "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1259 	}
1260 
1261 	if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1262 		return -1;
1263 
1264 	ret = readlink(linkname, pathname, sizeof(pathname));
1265 
1266 	if (ret < 0 || ret > st.st_size)
1267 		return -1;
1268 
1269 	pathname[ret] = '\0';
1270 	return trace__set_fd_pathname(thread, fd, pathname);
1271 }
1272 
1273 static const char *thread__fd_path(struct thread *thread, int fd,
1274 				   struct trace *trace)
1275 {
1276 	struct thread_trace *ttrace = thread->priv;
1277 
1278 	if (ttrace == NULL)
1279 		return NULL;
1280 
1281 	if (fd < 0)
1282 		return NULL;
1283 
1284 	if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1285 		if (!trace->live)
1286 			return NULL;
1287 		++trace->stats.proc_getname;
1288 		if (thread__read_fd_path(thread, fd))
1289 			return NULL;
1290 	}
1291 
1292 	return ttrace->paths.table[fd];
1293 }
1294 
1295 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1296 					struct syscall_arg *arg)
1297 {
1298 	int fd = arg->val;
1299 	size_t printed = scnprintf(bf, size, "%d", fd);
1300 	const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1301 
1302 	if (path)
1303 		printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1304 
1305 	return printed;
1306 }
1307 
1308 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1309 					      struct syscall_arg *arg)
1310 {
1311 	int fd = arg->val;
1312 	size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1313 	struct thread_trace *ttrace = arg->thread->priv;
1314 
1315 	if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1316 		zfree(&ttrace->paths.table[fd]);
1317 
1318 	return printed;
1319 }
1320 
1321 static bool trace__filter_duration(struct trace *trace, double t)
1322 {
1323 	return t < (trace->duration_filter * NSEC_PER_MSEC);
1324 }
1325 
1326 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1327 {
1328 	double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1329 
1330 	return fprintf(fp, "%10.3f ", ts);
1331 }
1332 
1333 static bool done = false;
1334 static bool interrupted = false;
1335 
1336 static void sig_handler(int sig)
1337 {
1338 	done = true;
1339 	interrupted = sig == SIGINT;
1340 }
1341 
1342 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1343 					u64 duration, u64 tstamp, FILE *fp)
1344 {
1345 	size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1346 	printed += fprintf_duration(duration, fp);
1347 
1348 	if (trace->multiple_threads) {
1349 		if (trace->show_comm)
1350 			printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1351 		printed += fprintf(fp, "%d ", thread->tid);
1352 	}
1353 
1354 	return printed;
1355 }
1356 
1357 static int trace__process_event(struct trace *trace, struct machine *machine,
1358 				union perf_event *event, struct perf_sample *sample)
1359 {
1360 	int ret = 0;
1361 
1362 	switch (event->header.type) {
1363 	case PERF_RECORD_LOST:
1364 		color_fprintf(trace->output, PERF_COLOR_RED,
1365 			      "LOST %" PRIu64 " events!\n", event->lost.lost);
1366 		ret = machine__process_lost_event(machine, event, sample);
1367 	default:
1368 		ret = machine__process_event(machine, event, sample);
1369 		break;
1370 	}
1371 
1372 	return ret;
1373 }
1374 
1375 static int trace__tool_process(struct perf_tool *tool,
1376 			       union perf_event *event,
1377 			       struct perf_sample *sample,
1378 			       struct machine *machine)
1379 {
1380 	struct trace *trace = container_of(tool, struct trace, tool);
1381 	return trace__process_event(trace, machine, event, sample);
1382 }
1383 
1384 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1385 {
1386 	int err = symbol__init();
1387 
1388 	if (err)
1389 		return err;
1390 
1391 	trace->host = machine__new_host();
1392 	if (trace->host == NULL)
1393 		return -ENOMEM;
1394 
1395 	err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1396 					    evlist->threads, trace__tool_process, false);
1397 	if (err)
1398 		symbol__exit();
1399 
1400 	return err;
1401 }
1402 
1403 static int syscall__set_arg_fmts(struct syscall *sc)
1404 {
1405 	struct format_field *field;
1406 	int idx = 0;
1407 
1408 	sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1409 	if (sc->arg_scnprintf == NULL)
1410 		return -1;
1411 
1412 	if (sc->fmt)
1413 		sc->arg_parm = sc->fmt->arg_parm;
1414 
1415 	for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1416 		if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1417 			sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1418 		else if (field->flags & FIELD_IS_POINTER)
1419 			sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1420 		++idx;
1421 	}
1422 
1423 	return 0;
1424 }
1425 
1426 static int trace__read_syscall_info(struct trace *trace, int id)
1427 {
1428 	char tp_name[128];
1429 	struct syscall *sc;
1430 	const char *name = audit_syscall_to_name(id, trace->audit.machine);
1431 
1432 	if (name == NULL)
1433 		return -1;
1434 
1435 	if (id > trace->syscalls.max) {
1436 		struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1437 
1438 		if (nsyscalls == NULL)
1439 			return -1;
1440 
1441 		if (trace->syscalls.max != -1) {
1442 			memset(nsyscalls + trace->syscalls.max + 1, 0,
1443 			       (id - trace->syscalls.max) * sizeof(*sc));
1444 		} else {
1445 			memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1446 		}
1447 
1448 		trace->syscalls.table = nsyscalls;
1449 		trace->syscalls.max   = id;
1450 	}
1451 
1452 	sc = trace->syscalls.table + id;
1453 	sc->name = name;
1454 
1455 	if (trace->ev_qualifier) {
1456 		bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1457 
1458 		if (!(in ^ trace->not_ev_qualifier)) {
1459 			sc->filtered = true;
1460 			/*
1461 			 * No need to do read tracepoint information since this will be
1462 			 * filtered out.
1463 			 */
1464 			return 0;
1465 		}
1466 	}
1467 
1468 	sc->fmt  = syscall_fmt__find(sc->name);
1469 
1470 	snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1471 	sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1472 
1473 	if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1474 		snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1475 		sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1476 	}
1477 
1478 	if (sc->tp_format == NULL)
1479 		return -1;
1480 
1481 	sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1482 
1483 	return syscall__set_arg_fmts(sc);
1484 }
1485 
1486 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1487 				      unsigned long *args, struct trace *trace,
1488 				      struct thread *thread)
1489 {
1490 	size_t printed = 0;
1491 
1492 	if (sc->tp_format != NULL) {
1493 		struct format_field *field;
1494 		u8 bit = 1;
1495 		struct syscall_arg arg = {
1496 			.idx	= 0,
1497 			.mask	= 0,
1498 			.trace  = trace,
1499 			.thread = thread,
1500 		};
1501 
1502 		for (field = sc->tp_format->format.fields->next; field;
1503 		     field = field->next, ++arg.idx, bit <<= 1) {
1504 			if (arg.mask & bit)
1505 				continue;
1506 			/*
1507  			 * Suppress this argument if its value is zero and
1508  			 * and we don't have a string associated in an
1509  			 * strarray for it.
1510  			 */
1511 			if (args[arg.idx] == 0 &&
1512 			    !(sc->arg_scnprintf &&
1513 			      sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1514 			      sc->arg_parm[arg.idx]))
1515 				continue;
1516 
1517 			printed += scnprintf(bf + printed, size - printed,
1518 					     "%s%s: ", printed ? ", " : "", field->name);
1519 			if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1520 				arg.val = args[arg.idx];
1521 				if (sc->arg_parm)
1522 					arg.parm = sc->arg_parm[arg.idx];
1523 				printed += sc->arg_scnprintf[arg.idx](bf + printed,
1524 								      size - printed, &arg);
1525 			} else {
1526 				printed += scnprintf(bf + printed, size - printed,
1527 						     "%ld", args[arg.idx]);
1528 			}
1529 		}
1530 	} else {
1531 		int i = 0;
1532 
1533 		while (i < 6) {
1534 			printed += scnprintf(bf + printed, size - printed,
1535 					     "%sarg%d: %ld",
1536 					     printed ? ", " : "", i, args[i]);
1537 			++i;
1538 		}
1539 	}
1540 
1541 	return printed;
1542 }
1543 
1544 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1545 				  union perf_event *event,
1546 				  struct perf_sample *sample);
1547 
1548 static struct syscall *trace__syscall_info(struct trace *trace,
1549 					   struct perf_evsel *evsel, int id)
1550 {
1551 
1552 	if (id < 0) {
1553 
1554 		/*
1555 		 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1556 		 * before that, leaving at a higher verbosity level till that is
1557 		 * explained. Reproduced with plain ftrace with:
1558 		 *
1559 		 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1560 		 * grep "NR -1 " /t/trace_pipe
1561 		 *
1562 		 * After generating some load on the machine.
1563  		 */
1564 		if (verbose > 1) {
1565 			static u64 n;
1566 			fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1567 				id, perf_evsel__name(evsel), ++n);
1568 		}
1569 		return NULL;
1570 	}
1571 
1572 	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1573 	    trace__read_syscall_info(trace, id))
1574 		goto out_cant_read;
1575 
1576 	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1577 		goto out_cant_read;
1578 
1579 	return &trace->syscalls.table[id];
1580 
1581 out_cant_read:
1582 	if (verbose) {
1583 		fprintf(trace->output, "Problems reading syscall %d", id);
1584 		if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1585 			fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1586 		fputs(" information\n", trace->output);
1587 	}
1588 	return NULL;
1589 }
1590 
1591 static void thread__update_stats(struct thread_trace *ttrace,
1592 				 int id, struct perf_sample *sample)
1593 {
1594 	struct int_node *inode;
1595 	struct stats *stats;
1596 	u64 duration = 0;
1597 
1598 	inode = intlist__findnew(ttrace->syscall_stats, id);
1599 	if (inode == NULL)
1600 		return;
1601 
1602 	stats = inode->priv;
1603 	if (stats == NULL) {
1604 		stats = malloc(sizeof(struct stats));
1605 		if (stats == NULL)
1606 			return;
1607 		init_stats(stats);
1608 		inode->priv = stats;
1609 	}
1610 
1611 	if (ttrace->entry_time && sample->time > ttrace->entry_time)
1612 		duration = sample->time - ttrace->entry_time;
1613 
1614 	update_stats(stats, duration);
1615 }
1616 
1617 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1618 			    union perf_event *event __maybe_unused,
1619 			    struct perf_sample *sample)
1620 {
1621 	char *msg;
1622 	void *args;
1623 	size_t printed = 0;
1624 	struct thread *thread;
1625 	int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1626 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
1627 	struct thread_trace *ttrace;
1628 
1629 	if (sc == NULL)
1630 		return -1;
1631 
1632 	if (sc->filtered)
1633 		return 0;
1634 
1635 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1636 	ttrace = thread__trace(thread, trace->output);
1637 	if (ttrace == NULL)
1638 		return -1;
1639 
1640 	args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1641 
1642 	if (ttrace->entry_str == NULL) {
1643 		ttrace->entry_str = malloc(1024);
1644 		if (!ttrace->entry_str)
1645 			return -1;
1646 	}
1647 
1648 	ttrace->entry_time = sample->time;
1649 	msg = ttrace->entry_str;
1650 	printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1651 
1652 	printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1653 					   args, trace, thread);
1654 
1655 	if (sc->is_exit) {
1656 		if (!trace->duration_filter && !trace->summary_only) {
1657 			trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1658 			fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1659 		}
1660 	} else
1661 		ttrace->entry_pending = true;
1662 
1663 	return 0;
1664 }
1665 
1666 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1667 			   union perf_event *event __maybe_unused,
1668 			   struct perf_sample *sample)
1669 {
1670 	int ret;
1671 	u64 duration = 0;
1672 	struct thread *thread;
1673 	int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1674 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
1675 	struct thread_trace *ttrace;
1676 
1677 	if (sc == NULL)
1678 		return -1;
1679 
1680 	if (sc->filtered)
1681 		return 0;
1682 
1683 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1684 	ttrace = thread__trace(thread, trace->output);
1685 	if (ttrace == NULL)
1686 		return -1;
1687 
1688 	if (trace->summary)
1689 		thread__update_stats(ttrace, id, sample);
1690 
1691 	ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1692 
1693 	if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1694 		trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1695 		trace->last_vfs_getname = NULL;
1696 		++trace->stats.vfs_getname;
1697 	}
1698 
1699 	ttrace->exit_time = sample->time;
1700 
1701 	if (ttrace->entry_time) {
1702 		duration = sample->time - ttrace->entry_time;
1703 		if (trace__filter_duration(trace, duration))
1704 			goto out;
1705 	} else if (trace->duration_filter)
1706 		goto out;
1707 
1708 	if (trace->summary_only)
1709 		goto out;
1710 
1711 	trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1712 
1713 	if (ttrace->entry_pending) {
1714 		fprintf(trace->output, "%-70s", ttrace->entry_str);
1715 	} else {
1716 		fprintf(trace->output, " ... [");
1717 		color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1718 		fprintf(trace->output, "]: %s()", sc->name);
1719 	}
1720 
1721 	if (sc->fmt == NULL) {
1722 signed_print:
1723 		fprintf(trace->output, ") = %d", ret);
1724 	} else if (ret < 0 && sc->fmt->errmsg) {
1725 		char bf[256];
1726 		const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1727 			   *e = audit_errno_to_name(-ret);
1728 
1729 		fprintf(trace->output, ") = -1 %s %s", e, emsg);
1730 	} else if (ret == 0 && sc->fmt->timeout)
1731 		fprintf(trace->output, ") = 0 Timeout");
1732 	else if (sc->fmt->hexret)
1733 		fprintf(trace->output, ") = %#x", ret);
1734 	else
1735 		goto signed_print;
1736 
1737 	fputc('\n', trace->output);
1738 out:
1739 	ttrace->entry_pending = false;
1740 
1741 	return 0;
1742 }
1743 
1744 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1745 			      union perf_event *event __maybe_unused,
1746 			      struct perf_sample *sample)
1747 {
1748 	trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1749 	return 0;
1750 }
1751 
1752 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1753 				     union perf_event *event __maybe_unused,
1754 				     struct perf_sample *sample)
1755 {
1756         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1757 	double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1758 	struct thread *thread = machine__findnew_thread(trace->host,
1759 							sample->pid,
1760 							sample->tid);
1761 	struct thread_trace *ttrace = thread__trace(thread, trace->output);
1762 
1763 	if (ttrace == NULL)
1764 		goto out_dump;
1765 
1766 	ttrace->runtime_ms += runtime_ms;
1767 	trace->runtime_ms += runtime_ms;
1768 	return 0;
1769 
1770 out_dump:
1771 	fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1772 	       evsel->name,
1773 	       perf_evsel__strval(evsel, sample, "comm"),
1774 	       (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1775 	       runtime,
1776 	       perf_evsel__intval(evsel, sample, "vruntime"));
1777 	return 0;
1778 }
1779 
1780 static void print_location(FILE *f, struct perf_sample *sample,
1781 			   struct addr_location *al,
1782 			   bool print_dso, bool print_sym)
1783 {
1784 
1785 	if ((verbose || print_dso) && al->map)
1786 		fprintf(f, "%s@", al->map->dso->long_name);
1787 
1788 	if ((verbose || print_sym) && al->sym)
1789 		fprintf(f, "%s+0x%lx", al->sym->name,
1790 			al->addr - al->sym->start);
1791 	else if (al->map)
1792 		fprintf(f, "0x%lx", al->addr);
1793 	else
1794 		fprintf(f, "0x%lx", sample->addr);
1795 }
1796 
1797 static int trace__pgfault(struct trace *trace,
1798 			  struct perf_evsel *evsel,
1799 			  union perf_event *event,
1800 			  struct perf_sample *sample)
1801 {
1802 	struct thread *thread;
1803 	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1804 	struct addr_location al;
1805 	char map_type = 'd';
1806 
1807 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1808 
1809 	thread__find_addr_location(thread, trace->host, cpumode, MAP__FUNCTION,
1810 			      sample->ip, &al);
1811 
1812 	trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
1813 
1814 	fprintf(trace->output, "%sfault [",
1815 		evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1816 		"maj" : "min");
1817 
1818 	print_location(trace->output, sample, &al, false, true);
1819 
1820 	fprintf(trace->output, "] => ");
1821 
1822 	thread__find_addr_location(thread, trace->host, cpumode, MAP__VARIABLE,
1823 				   sample->addr, &al);
1824 
1825 	if (!al.map) {
1826 		thread__find_addr_location(thread, trace->host, cpumode,
1827 					   MAP__FUNCTION, sample->addr, &al);
1828 
1829 		if (al.map)
1830 			map_type = 'x';
1831 		else
1832 			map_type = '?';
1833 	}
1834 
1835 	print_location(trace->output, sample, &al, true, false);
1836 
1837 	fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1838 
1839 	return 0;
1840 }
1841 
1842 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1843 {
1844 	if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1845 	    (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1846 		return false;
1847 
1848 	if (trace->pid_list || trace->tid_list)
1849 		return true;
1850 
1851 	return false;
1852 }
1853 
1854 static int trace__process_sample(struct perf_tool *tool,
1855 				 union perf_event *event,
1856 				 struct perf_sample *sample,
1857 				 struct perf_evsel *evsel,
1858 				 struct machine *machine __maybe_unused)
1859 {
1860 	struct trace *trace = container_of(tool, struct trace, tool);
1861 	int err = 0;
1862 
1863 	tracepoint_handler handler = evsel->handler;
1864 
1865 	if (skip_sample(trace, sample))
1866 		return 0;
1867 
1868 	if (!trace->full_time && trace->base_time == 0)
1869 		trace->base_time = sample->time;
1870 
1871 	if (handler) {
1872 		++trace->nr_events;
1873 		handler(trace, evsel, event, sample);
1874 	}
1875 
1876 	return err;
1877 }
1878 
1879 static int parse_target_str(struct trace *trace)
1880 {
1881 	if (trace->opts.target.pid) {
1882 		trace->pid_list = intlist__new(trace->opts.target.pid);
1883 		if (trace->pid_list == NULL) {
1884 			pr_err("Error parsing process id string\n");
1885 			return -EINVAL;
1886 		}
1887 	}
1888 
1889 	if (trace->opts.target.tid) {
1890 		trace->tid_list = intlist__new(trace->opts.target.tid);
1891 		if (trace->tid_list == NULL) {
1892 			pr_err("Error parsing thread id string\n");
1893 			return -EINVAL;
1894 		}
1895 	}
1896 
1897 	return 0;
1898 }
1899 
1900 static int trace__record(struct trace *trace, int argc, const char **argv)
1901 {
1902 	unsigned int rec_argc, i, j;
1903 	const char **rec_argv;
1904 	const char * const record_args[] = {
1905 		"record",
1906 		"-R",
1907 		"-m", "1024",
1908 		"-c", "1",
1909 	};
1910 
1911 	const char * const sc_args[] = { "-e", };
1912 	unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
1913 	const char * const majpf_args[] = { "-e", "major-faults" };
1914 	unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
1915 	const char * const minpf_args[] = { "-e", "minor-faults" };
1916 	unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
1917 
1918 	/* +1 is for the event string below */
1919 	rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
1920 		majpf_args_nr + minpf_args_nr + argc;
1921 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
1922 
1923 	if (rec_argv == NULL)
1924 		return -ENOMEM;
1925 
1926 	j = 0;
1927 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
1928 		rec_argv[j++] = record_args[i];
1929 
1930 	for (i = 0; i < sc_args_nr; i++)
1931 		rec_argv[j++] = sc_args[i];
1932 
1933 	/* event string may be different for older kernels - e.g., RHEL6 */
1934 	if (is_valid_tracepoint("raw_syscalls:sys_enter"))
1935 		rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
1936 	else if (is_valid_tracepoint("syscalls:sys_enter"))
1937 		rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
1938 	else {
1939 		pr_err("Neither raw_syscalls nor syscalls events exist.\n");
1940 		return -1;
1941 	}
1942 
1943 	if (trace->trace_pgfaults & TRACE_PFMAJ)
1944 		for (i = 0; i < majpf_args_nr; i++)
1945 			rec_argv[j++] = majpf_args[i];
1946 
1947 	if (trace->trace_pgfaults & TRACE_PFMIN)
1948 		for (i = 0; i < minpf_args_nr; i++)
1949 			rec_argv[j++] = minpf_args[i];
1950 
1951 	for (i = 0; i < (unsigned int)argc; i++)
1952 		rec_argv[j++] = argv[i];
1953 
1954 	return cmd_record(j, rec_argv, NULL);
1955 }
1956 
1957 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1958 
1959 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1960 {
1961 	struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1962 	if (evsel == NULL)
1963 		return;
1964 
1965 	if (perf_evsel__field(evsel, "pathname") == NULL) {
1966 		perf_evsel__delete(evsel);
1967 		return;
1968 	}
1969 
1970 	evsel->handler = trace__vfs_getname;
1971 	perf_evlist__add(evlist, evsel);
1972 }
1973 
1974 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
1975 				    u64 config)
1976 {
1977 	struct perf_evsel *evsel;
1978 	struct perf_event_attr attr = {
1979 		.type = PERF_TYPE_SOFTWARE,
1980 		.mmap_data = 1,
1981 		.sample_period = 1,
1982 	};
1983 
1984 	attr.config = config;
1985 
1986 	event_attr_init(&attr);
1987 
1988 	evsel = perf_evsel__new(&attr);
1989 	if (!evsel)
1990 		return -ENOMEM;
1991 
1992 	evsel->handler = trace__pgfault;
1993 	perf_evlist__add(evlist, evsel);
1994 
1995 	return 0;
1996 }
1997 
1998 static int trace__run(struct trace *trace, int argc, const char **argv)
1999 {
2000 	struct perf_evlist *evlist = perf_evlist__new();
2001 	struct perf_evsel *evsel;
2002 	int err = -1, i;
2003 	unsigned long before;
2004 	const bool forks = argc > 0;
2005 
2006 	trace->live = true;
2007 
2008 	if (evlist == NULL) {
2009 		fprintf(trace->output, "Not enough memory to run!\n");
2010 		goto out;
2011 	}
2012 
2013 	if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
2014 		goto out_error_tp;
2015 
2016 	perf_evlist__add_vfs_getname(evlist);
2017 
2018 	if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2019 	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ))
2020 		goto out_error_tp;
2021 
2022 	if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2023 	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2024 		goto out_error_tp;
2025 
2026 	if (trace->sched &&
2027 		perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2028 				trace__sched_stat_runtime))
2029 		goto out_error_tp;
2030 
2031 	err = perf_evlist__create_maps(evlist, &trace->opts.target);
2032 	if (err < 0) {
2033 		fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2034 		goto out_delete_evlist;
2035 	}
2036 
2037 	err = trace__symbols_init(trace, evlist);
2038 	if (err < 0) {
2039 		fprintf(trace->output, "Problems initializing symbol libraries!\n");
2040 		goto out_delete_evlist;
2041 	}
2042 
2043 	perf_evlist__config(evlist, &trace->opts);
2044 
2045 	signal(SIGCHLD, sig_handler);
2046 	signal(SIGINT, sig_handler);
2047 
2048 	if (forks) {
2049 		err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2050 						    argv, false, NULL);
2051 		if (err < 0) {
2052 			fprintf(trace->output, "Couldn't run the workload!\n");
2053 			goto out_delete_evlist;
2054 		}
2055 	}
2056 
2057 	err = perf_evlist__open(evlist);
2058 	if (err < 0)
2059 		goto out_error_open;
2060 
2061 	err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2062 	if (err < 0) {
2063 		fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
2064 		goto out_delete_evlist;
2065 	}
2066 
2067 	perf_evlist__enable(evlist);
2068 
2069 	if (forks)
2070 		perf_evlist__start_workload(evlist);
2071 
2072 	trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
2073 again:
2074 	before = trace->nr_events;
2075 
2076 	for (i = 0; i < evlist->nr_mmaps; i++) {
2077 		union perf_event *event;
2078 
2079 		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2080 			const u32 type = event->header.type;
2081 			tracepoint_handler handler;
2082 			struct perf_sample sample;
2083 
2084 			++trace->nr_events;
2085 
2086 			err = perf_evlist__parse_sample(evlist, event, &sample);
2087 			if (err) {
2088 				fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2089 				goto next_event;
2090 			}
2091 
2092 			if (!trace->full_time && trace->base_time == 0)
2093 				trace->base_time = sample.time;
2094 
2095 			if (type != PERF_RECORD_SAMPLE) {
2096 				trace__process_event(trace, trace->host, event, &sample);
2097 				continue;
2098 			}
2099 
2100 			evsel = perf_evlist__id2evsel(evlist, sample.id);
2101 			if (evsel == NULL) {
2102 				fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
2103 				goto next_event;
2104 			}
2105 
2106 			if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2107 			    sample.raw_data == NULL) {
2108 				fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2109 				       perf_evsel__name(evsel), sample.tid,
2110 				       sample.cpu, sample.raw_size);
2111 				goto next_event;
2112 			}
2113 
2114 			handler = evsel->handler;
2115 			handler(trace, evsel, event, &sample);
2116 next_event:
2117 			perf_evlist__mmap_consume(evlist, i);
2118 
2119 			if (interrupted)
2120 				goto out_disable;
2121 		}
2122 	}
2123 
2124 	if (trace->nr_events == before) {
2125 		int timeout = done ? 100 : -1;
2126 
2127 		if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
2128 			goto again;
2129 	} else {
2130 		goto again;
2131 	}
2132 
2133 out_disable:
2134 	perf_evlist__disable(evlist);
2135 
2136 	if (!err) {
2137 		if (trace->summary)
2138 			trace__fprintf_thread_summary(trace, trace->output);
2139 
2140 		if (trace->show_tool_stats) {
2141 			fprintf(trace->output, "Stats:\n "
2142 					       " vfs_getname : %" PRIu64 "\n"
2143 					       " proc_getname: %" PRIu64 "\n",
2144 				trace->stats.vfs_getname,
2145 				trace->stats.proc_getname);
2146 		}
2147 	}
2148 
2149 out_delete_evlist:
2150 	perf_evlist__delete(evlist);
2151 out:
2152 	trace->live = false;
2153 	return err;
2154 {
2155 	char errbuf[BUFSIZ];
2156 
2157 out_error_tp:
2158 	perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
2159 	goto out_error;
2160 
2161 out_error_open:
2162 	perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2163 
2164 out_error:
2165 	fprintf(trace->output, "%s\n", errbuf);
2166 	goto out_delete_evlist;
2167 }
2168 }
2169 
2170 static int trace__replay(struct trace *trace)
2171 {
2172 	const struct perf_evsel_str_handler handlers[] = {
2173 		{ "probe:vfs_getname",	     trace__vfs_getname, },
2174 	};
2175 	struct perf_data_file file = {
2176 		.path  = input_name,
2177 		.mode  = PERF_DATA_MODE_READ,
2178 	};
2179 	struct perf_session *session;
2180 	struct perf_evsel *evsel;
2181 	int err = -1;
2182 
2183 	trace->tool.sample	  = trace__process_sample;
2184 	trace->tool.mmap	  = perf_event__process_mmap;
2185 	trace->tool.mmap2	  = perf_event__process_mmap2;
2186 	trace->tool.comm	  = perf_event__process_comm;
2187 	trace->tool.exit	  = perf_event__process_exit;
2188 	trace->tool.fork	  = perf_event__process_fork;
2189 	trace->tool.attr	  = perf_event__process_attr;
2190 	trace->tool.tracing_data = perf_event__process_tracing_data;
2191 	trace->tool.build_id	  = perf_event__process_build_id;
2192 
2193 	trace->tool.ordered_samples = true;
2194 	trace->tool.ordering_requires_timestamps = true;
2195 
2196 	/* add tid to output */
2197 	trace->multiple_threads = true;
2198 
2199 	if (symbol__init() < 0)
2200 		return -1;
2201 
2202 	session = perf_session__new(&file, false, &trace->tool);
2203 	if (session == NULL)
2204 		return -ENOMEM;
2205 
2206 	trace->host = &session->machines.host;
2207 
2208 	err = perf_session__set_tracepoints_handlers(session, handlers);
2209 	if (err)
2210 		goto out;
2211 
2212 	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2213 						     "raw_syscalls:sys_enter");
2214 	/* older kernels have syscalls tp versus raw_syscalls */
2215 	if (evsel == NULL)
2216 		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2217 							     "syscalls:sys_enter");
2218 	if (evsel == NULL) {
2219 		pr_err("Data file does not have raw_syscalls:sys_enter event\n");
2220 		goto out;
2221 	}
2222 
2223 	if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2224 	    perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
2225 		pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2226 		goto out;
2227 	}
2228 
2229 	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2230 						     "raw_syscalls:sys_exit");
2231 	if (evsel == NULL)
2232 		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2233 							     "syscalls:sys_exit");
2234 	if (evsel == NULL) {
2235 		pr_err("Data file does not have raw_syscalls:sys_exit event\n");
2236 		goto out;
2237 	}
2238 
2239 	if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2240 	    perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
2241 		pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2242 		goto out;
2243 	}
2244 
2245 	evlist__for_each(session->evlist, evsel) {
2246 		if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2247 		    (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2248 		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2249 		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2250 			evsel->handler = trace__pgfault;
2251 	}
2252 
2253 	err = parse_target_str(trace);
2254 	if (err != 0)
2255 		goto out;
2256 
2257 	setup_pager();
2258 
2259 	err = perf_session__process_events(session, &trace->tool);
2260 	if (err)
2261 		pr_err("Failed to process events, error %d", err);
2262 
2263 	else if (trace->summary)
2264 		trace__fprintf_thread_summary(trace, trace->output);
2265 
2266 out:
2267 	perf_session__delete(session);
2268 
2269 	return err;
2270 }
2271 
2272 static size_t trace__fprintf_threads_header(FILE *fp)
2273 {
2274 	size_t printed;
2275 
2276 	printed  = fprintf(fp, "\n Summary of events:\n\n");
2277 
2278 	return printed;
2279 }
2280 
2281 static size_t thread__dump_stats(struct thread_trace *ttrace,
2282 				 struct trace *trace, FILE *fp)
2283 {
2284 	struct stats *stats;
2285 	size_t printed = 0;
2286 	struct syscall *sc;
2287 	struct int_node *inode = intlist__first(ttrace->syscall_stats);
2288 
2289 	if (inode == NULL)
2290 		return 0;
2291 
2292 	printed += fprintf(fp, "\n");
2293 
2294 	printed += fprintf(fp, "   syscall            calls      min       avg       max      stddev\n");
2295 	printed += fprintf(fp, "                               (msec)    (msec)    (msec)        (%%)\n");
2296 	printed += fprintf(fp, "   --------------- -------- --------- --------- ---------     ------\n");
2297 
2298 	/* each int_node is a syscall */
2299 	while (inode) {
2300 		stats = inode->priv;
2301 		if (stats) {
2302 			double min = (double)(stats->min) / NSEC_PER_MSEC;
2303 			double max = (double)(stats->max) / NSEC_PER_MSEC;
2304 			double avg = avg_stats(stats);
2305 			double pct;
2306 			u64 n = (u64) stats->n;
2307 
2308 			pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2309 			avg /= NSEC_PER_MSEC;
2310 
2311 			sc = &trace->syscalls.table[inode->i];
2312 			printed += fprintf(fp, "   %-15s", sc->name);
2313 			printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2314 					   n, min, avg);
2315 			printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2316 		}
2317 
2318 		inode = intlist__next(inode);
2319 	}
2320 
2321 	printed += fprintf(fp, "\n\n");
2322 
2323 	return printed;
2324 }
2325 
2326 /* struct used to pass data to per-thread function */
2327 struct summary_data {
2328 	FILE *fp;
2329 	struct trace *trace;
2330 	size_t printed;
2331 };
2332 
2333 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2334 {
2335 	struct summary_data *data = priv;
2336 	FILE *fp = data->fp;
2337 	size_t printed = data->printed;
2338 	struct trace *trace = data->trace;
2339 	struct thread_trace *ttrace = thread->priv;
2340 	double ratio;
2341 
2342 	if (ttrace == NULL)
2343 		return 0;
2344 
2345 	ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2346 
2347 	printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2348 	printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2349 	printed += fprintf(fp, "%.1f%%", ratio);
2350 	printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2351 	printed += thread__dump_stats(ttrace, trace, fp);
2352 
2353 	data->printed += printed;
2354 
2355 	return 0;
2356 }
2357 
2358 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2359 {
2360 	struct summary_data data = {
2361 		.fp = fp,
2362 		.trace = trace
2363 	};
2364 	data.printed = trace__fprintf_threads_header(fp);
2365 
2366 	machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2367 
2368 	return data.printed;
2369 }
2370 
2371 static int trace__set_duration(const struct option *opt, const char *str,
2372 			       int unset __maybe_unused)
2373 {
2374 	struct trace *trace = opt->value;
2375 
2376 	trace->duration_filter = atof(str);
2377 	return 0;
2378 }
2379 
2380 static int trace__open_output(struct trace *trace, const char *filename)
2381 {
2382 	struct stat st;
2383 
2384 	if (!stat(filename, &st) && st.st_size) {
2385 		char oldname[PATH_MAX];
2386 
2387 		scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2388 		unlink(oldname);
2389 		rename(filename, oldname);
2390 	}
2391 
2392 	trace->output = fopen(filename, "w");
2393 
2394 	return trace->output == NULL ? -errno : 0;
2395 }
2396 
2397 static int parse_pagefaults(const struct option *opt, const char *str,
2398 			    int unset __maybe_unused)
2399 {
2400 	int *trace_pgfaults = opt->value;
2401 
2402 	if (strcmp(str, "all") == 0)
2403 		*trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2404 	else if (strcmp(str, "maj") == 0)
2405 		*trace_pgfaults |= TRACE_PFMAJ;
2406 	else if (strcmp(str, "min") == 0)
2407 		*trace_pgfaults |= TRACE_PFMIN;
2408 	else
2409 		return -1;
2410 
2411 	return 0;
2412 }
2413 
2414 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2415 {
2416 	const char * const trace_usage[] = {
2417 		"perf trace [<options>] [<command>]",
2418 		"perf trace [<options>] -- <command> [<options>]",
2419 		"perf trace record [<options>] [<command>]",
2420 		"perf trace record [<options>] -- <command> [<options>]",
2421 		NULL
2422 	};
2423 	struct trace trace = {
2424 		.audit = {
2425 			.machine = audit_detect_machine(),
2426 			.open_id = audit_name_to_syscall("open", trace.audit.machine),
2427 		},
2428 		.syscalls = {
2429 			. max = -1,
2430 		},
2431 		.opts = {
2432 			.target = {
2433 				.uid	   = UINT_MAX,
2434 				.uses_mmap = true,
2435 			},
2436 			.user_freq     = UINT_MAX,
2437 			.user_interval = ULLONG_MAX,
2438 			.no_buffering  = true,
2439 			.mmap_pages    = 1024,
2440 		},
2441 		.output = stdout,
2442 		.show_comm = true,
2443 	};
2444 	const char *output_name = NULL;
2445 	const char *ev_qualifier_str = NULL;
2446 	const struct option trace_options[] = {
2447 	OPT_BOOLEAN(0, "comm", &trace.show_comm,
2448 		    "show the thread COMM next to its id"),
2449 	OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2450 	OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2451 		    "list of events to trace"),
2452 	OPT_STRING('o', "output", &output_name, "file", "output file name"),
2453 	OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2454 	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2455 		    "trace events on existing process id"),
2456 	OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2457 		    "trace events on existing thread id"),
2458 	OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2459 		    "system-wide collection from all CPUs"),
2460 	OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2461 		    "list of cpus to monitor"),
2462 	OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2463 		    "child tasks do not inherit counters"),
2464 	OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2465 		     "number of mmap data pages",
2466 		     perf_evlist__parse_mmap_pages),
2467 	OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2468 		   "user to profile"),
2469 	OPT_CALLBACK(0, "duration", &trace, "float",
2470 		     "show only events with duration > N.M ms",
2471 		     trace__set_duration),
2472 	OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2473 	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2474 	OPT_BOOLEAN('T', "time", &trace.full_time,
2475 		    "Show full timestamp, not time relative to first start"),
2476 	OPT_BOOLEAN('s', "summary", &trace.summary_only,
2477 		    "Show only syscall summary with statistics"),
2478 	OPT_BOOLEAN('S', "with-summary", &trace.summary,
2479 		    "Show all syscalls and summary with statistics"),
2480 	OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2481 		     "Trace pagefaults", parse_pagefaults, "maj"),
2482 	OPT_END()
2483 	};
2484 	int err;
2485 	char bf[BUFSIZ];
2486 
2487 	argc = parse_options(argc, argv, trace_options, trace_usage,
2488 			     PARSE_OPT_STOP_AT_NON_OPTION);
2489 
2490 	if (trace.trace_pgfaults) {
2491 		trace.opts.sample_address = true;
2492 		trace.opts.sample_time = true;
2493 	}
2494 
2495 	if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2496 		return trace__record(&trace, argc-1, &argv[1]);
2497 
2498 	/* summary_only implies summary option, but don't overwrite summary if set */
2499 	if (trace.summary_only)
2500 		trace.summary = trace.summary_only;
2501 
2502 	if (output_name != NULL) {
2503 		err = trace__open_output(&trace, output_name);
2504 		if (err < 0) {
2505 			perror("failed to create output file");
2506 			goto out;
2507 		}
2508 	}
2509 
2510 	if (ev_qualifier_str != NULL) {
2511 		const char *s = ev_qualifier_str;
2512 
2513 		trace.not_ev_qualifier = *s == '!';
2514 		if (trace.not_ev_qualifier)
2515 			++s;
2516 		trace.ev_qualifier = strlist__new(true, s);
2517 		if (trace.ev_qualifier == NULL) {
2518 			fputs("Not enough memory to parse event qualifier",
2519 			      trace.output);
2520 			err = -ENOMEM;
2521 			goto out_close;
2522 		}
2523 	}
2524 
2525 	err = target__validate(&trace.opts.target);
2526 	if (err) {
2527 		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2528 		fprintf(trace.output, "%s", bf);
2529 		goto out_close;
2530 	}
2531 
2532 	err = target__parse_uid(&trace.opts.target);
2533 	if (err) {
2534 		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2535 		fprintf(trace.output, "%s", bf);
2536 		goto out_close;
2537 	}
2538 
2539 	if (!argc && target__none(&trace.opts.target))
2540 		trace.opts.target.system_wide = true;
2541 
2542 	if (input_name)
2543 		err = trace__replay(&trace);
2544 	else
2545 		err = trace__run(&trace, argc, argv);
2546 
2547 out_close:
2548 	if (output_name != NULL)
2549 		fclose(trace.output);
2550 out:
2551 	return err;
2552 }
2553