xref: /linux/tools/perf/builtin-trace.c (revision 598d02c5a07b60e5c824184cdaf697b70f3c452a)
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
16 
17 #include <libaudit.h>
18 #include <stdlib.h>
19 #include <sys/eventfd.h>
20 #include <sys/mman.h>
21 #include <linux/futex.h>
22 
23 /* For older distros: */
24 #ifndef MAP_STACK
25 # define MAP_STACK		0x20000
26 #endif
27 
28 #ifndef MADV_HWPOISON
29 # define MADV_HWPOISON		100
30 #endif
31 
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE		12
34 #endif
35 
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE	13
38 #endif
39 
40 #ifndef EFD_SEMAPHORE
41 # define EFD_SEMAPHORE		1
42 #endif
43 
44 struct tp_field {
45 	int offset;
46 	union {
47 		u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
48 		void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
49 	};
50 };
51 
52 #define TP_UINT_FIELD(bits) \
53 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
54 { \
55 	return *(u##bits *)(sample->raw_data + field->offset); \
56 }
57 
58 TP_UINT_FIELD(8);
59 TP_UINT_FIELD(16);
60 TP_UINT_FIELD(32);
61 TP_UINT_FIELD(64);
62 
63 #define TP_UINT_FIELD__SWAPPED(bits) \
64 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
65 { \
66 	u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
67 	return bswap_##bits(value);\
68 }
69 
70 TP_UINT_FIELD__SWAPPED(16);
71 TP_UINT_FIELD__SWAPPED(32);
72 TP_UINT_FIELD__SWAPPED(64);
73 
74 static int tp_field__init_uint(struct tp_field *field,
75 			       struct format_field *format_field,
76 			       bool needs_swap)
77 {
78 	field->offset = format_field->offset;
79 
80 	switch (format_field->size) {
81 	case 1:
82 		field->integer = tp_field__u8;
83 		break;
84 	case 2:
85 		field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
86 		break;
87 	case 4:
88 		field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
89 		break;
90 	case 8:
91 		field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
92 		break;
93 	default:
94 		return -1;
95 	}
96 
97 	return 0;
98 }
99 
100 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
101 {
102 	return sample->raw_data + field->offset;
103 }
104 
105 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
106 {
107 	field->offset = format_field->offset;
108 	field->pointer = tp_field__ptr;
109 	return 0;
110 }
111 
112 struct syscall_tp {
113 	struct tp_field id;
114 	union {
115 		struct tp_field args, ret;
116 	};
117 };
118 
119 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
120 					  struct tp_field *field,
121 					  const char *name)
122 {
123 	struct format_field *format_field = perf_evsel__field(evsel, name);
124 
125 	if (format_field == NULL)
126 		return -1;
127 
128 	return tp_field__init_uint(field, format_field, evsel->needs_swap);
129 }
130 
131 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
132 	({ struct syscall_tp *sc = evsel->priv;\
133 	   perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
134 
135 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
136 					 struct tp_field *field,
137 					 const char *name)
138 {
139 	struct format_field *format_field = perf_evsel__field(evsel, name);
140 
141 	if (format_field == NULL)
142 		return -1;
143 
144 	return tp_field__init_ptr(field, format_field);
145 }
146 
147 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
148 	({ struct syscall_tp *sc = evsel->priv;\
149 	   perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
150 
151 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
152 {
153 	zfree(&evsel->priv);
154 	perf_evsel__delete(evsel);
155 }
156 
157 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
158 {
159 	evsel->priv = malloc(sizeof(struct syscall_tp));
160 	if (evsel->priv != NULL) {
161 		if (perf_evsel__init_sc_tp_uint_field(evsel, id))
162 			goto out_delete;
163 
164 		evsel->handler = handler;
165 		return 0;
166 	}
167 
168 	return -ENOMEM;
169 
170 out_delete:
171 	zfree(&evsel->priv);
172 	return -ENOENT;
173 }
174 
175 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
176 {
177 	struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
178 
179 	/* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
180 	if (evsel == NULL)
181 		evsel = perf_evsel__newtp("syscalls", direction);
182 
183 	if (evsel) {
184 		if (perf_evsel__init_syscall_tp(evsel, handler))
185 			goto out_delete;
186 	}
187 
188 	return evsel;
189 
190 out_delete:
191 	perf_evsel__delete_priv(evsel);
192 	return NULL;
193 }
194 
195 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
196 	({ struct syscall_tp *fields = evsel->priv; \
197 	   fields->name.integer(&fields->name, sample); })
198 
199 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
200 	({ struct syscall_tp *fields = evsel->priv; \
201 	   fields->name.pointer(&fields->name, sample); })
202 
203 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
204 					  void *sys_enter_handler,
205 					  void *sys_exit_handler)
206 {
207 	int ret = -1;
208 	struct perf_evsel *sys_enter, *sys_exit;
209 
210 	sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
211 	if (sys_enter == NULL)
212 		goto out;
213 
214 	if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
215 		goto out_delete_sys_enter;
216 
217 	sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
218 	if (sys_exit == NULL)
219 		goto out_delete_sys_enter;
220 
221 	if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
222 		goto out_delete_sys_exit;
223 
224 	perf_evlist__add(evlist, sys_enter);
225 	perf_evlist__add(evlist, sys_exit);
226 
227 	ret = 0;
228 out:
229 	return ret;
230 
231 out_delete_sys_exit:
232 	perf_evsel__delete_priv(sys_exit);
233 out_delete_sys_enter:
234 	perf_evsel__delete_priv(sys_enter);
235 	goto out;
236 }
237 
238 
239 struct syscall_arg {
240 	unsigned long val;
241 	struct thread *thread;
242 	struct trace  *trace;
243 	void	      *parm;
244 	u8	      idx;
245 	u8	      mask;
246 };
247 
248 struct strarray {
249 	int	    offset;
250 	int	    nr_entries;
251 	const char **entries;
252 };
253 
254 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
255 	.nr_entries = ARRAY_SIZE(array), \
256 	.entries = array, \
257 }
258 
259 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
260 	.offset	    = off, \
261 	.nr_entries = ARRAY_SIZE(array), \
262 	.entries = array, \
263 }
264 
265 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
266 						const char *intfmt,
267 					        struct syscall_arg *arg)
268 {
269 	struct strarray *sa = arg->parm;
270 	int idx = arg->val - sa->offset;
271 
272 	if (idx < 0 || idx >= sa->nr_entries)
273 		return scnprintf(bf, size, intfmt, arg->val);
274 
275 	return scnprintf(bf, size, "%s", sa->entries[idx]);
276 }
277 
278 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
279 					      struct syscall_arg *arg)
280 {
281 	return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
282 }
283 
284 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
285 
286 #if defined(__i386__) || defined(__x86_64__)
287 /*
288  * FIXME: Make this available to all arches as soon as the ioctl beautifier
289  * 	  gets rewritten to support all arches.
290  */
291 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
292 						 struct syscall_arg *arg)
293 {
294 	return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
295 }
296 
297 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
298 #endif /* defined(__i386__) || defined(__x86_64__) */
299 
300 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
301 					struct syscall_arg *arg);
302 
303 #define SCA_FD syscall_arg__scnprintf_fd
304 
305 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
306 					   struct syscall_arg *arg)
307 {
308 	int fd = arg->val;
309 
310 	if (fd == AT_FDCWD)
311 		return scnprintf(bf, size, "CWD");
312 
313 	return syscall_arg__scnprintf_fd(bf, size, arg);
314 }
315 
316 #define SCA_FDAT syscall_arg__scnprintf_fd_at
317 
318 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
319 					      struct syscall_arg *arg);
320 
321 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
322 
323 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
324 					 struct syscall_arg *arg)
325 {
326 	return scnprintf(bf, size, "%#lx", arg->val);
327 }
328 
329 #define SCA_HEX syscall_arg__scnprintf_hex
330 
331 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
332 					       struct syscall_arg *arg)
333 {
334 	int printed = 0, prot = arg->val;
335 
336 	if (prot == PROT_NONE)
337 		return scnprintf(bf, size, "NONE");
338 #define	P_MMAP_PROT(n) \
339 	if (prot & PROT_##n) { \
340 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
341 		prot &= ~PROT_##n; \
342 	}
343 
344 	P_MMAP_PROT(EXEC);
345 	P_MMAP_PROT(READ);
346 	P_MMAP_PROT(WRITE);
347 #ifdef PROT_SEM
348 	P_MMAP_PROT(SEM);
349 #endif
350 	P_MMAP_PROT(GROWSDOWN);
351 	P_MMAP_PROT(GROWSUP);
352 #undef P_MMAP_PROT
353 
354 	if (prot)
355 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
356 
357 	return printed;
358 }
359 
360 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
361 
362 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
363 						struct syscall_arg *arg)
364 {
365 	int printed = 0, flags = arg->val;
366 
367 #define	P_MMAP_FLAG(n) \
368 	if (flags & MAP_##n) { \
369 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
370 		flags &= ~MAP_##n; \
371 	}
372 
373 	P_MMAP_FLAG(SHARED);
374 	P_MMAP_FLAG(PRIVATE);
375 #ifdef MAP_32BIT
376 	P_MMAP_FLAG(32BIT);
377 #endif
378 	P_MMAP_FLAG(ANONYMOUS);
379 	P_MMAP_FLAG(DENYWRITE);
380 	P_MMAP_FLAG(EXECUTABLE);
381 	P_MMAP_FLAG(FILE);
382 	P_MMAP_FLAG(FIXED);
383 	P_MMAP_FLAG(GROWSDOWN);
384 #ifdef MAP_HUGETLB
385 	P_MMAP_FLAG(HUGETLB);
386 #endif
387 	P_MMAP_FLAG(LOCKED);
388 	P_MMAP_FLAG(NONBLOCK);
389 	P_MMAP_FLAG(NORESERVE);
390 	P_MMAP_FLAG(POPULATE);
391 	P_MMAP_FLAG(STACK);
392 #ifdef MAP_UNINITIALIZED
393 	P_MMAP_FLAG(UNINITIALIZED);
394 #endif
395 #undef P_MMAP_FLAG
396 
397 	if (flags)
398 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
399 
400 	return printed;
401 }
402 
403 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
404 
405 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
406 						      struct syscall_arg *arg)
407 {
408 	int behavior = arg->val;
409 
410 	switch (behavior) {
411 #define	P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
412 	P_MADV_BHV(NORMAL);
413 	P_MADV_BHV(RANDOM);
414 	P_MADV_BHV(SEQUENTIAL);
415 	P_MADV_BHV(WILLNEED);
416 	P_MADV_BHV(DONTNEED);
417 	P_MADV_BHV(REMOVE);
418 	P_MADV_BHV(DONTFORK);
419 	P_MADV_BHV(DOFORK);
420 	P_MADV_BHV(HWPOISON);
421 #ifdef MADV_SOFT_OFFLINE
422 	P_MADV_BHV(SOFT_OFFLINE);
423 #endif
424 	P_MADV_BHV(MERGEABLE);
425 	P_MADV_BHV(UNMERGEABLE);
426 #ifdef MADV_HUGEPAGE
427 	P_MADV_BHV(HUGEPAGE);
428 #endif
429 #ifdef MADV_NOHUGEPAGE
430 	P_MADV_BHV(NOHUGEPAGE);
431 #endif
432 #ifdef MADV_DONTDUMP
433 	P_MADV_BHV(DONTDUMP);
434 #endif
435 #ifdef MADV_DODUMP
436 	P_MADV_BHV(DODUMP);
437 #endif
438 #undef P_MADV_PHV
439 	default: break;
440 	}
441 
442 	return scnprintf(bf, size, "%#x", behavior);
443 }
444 
445 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
446 
447 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
448 					   struct syscall_arg *arg)
449 {
450 	int printed = 0, op = arg->val;
451 
452 	if (op == 0)
453 		return scnprintf(bf, size, "NONE");
454 #define	P_CMD(cmd) \
455 	if ((op & LOCK_##cmd) == LOCK_##cmd) { \
456 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
457 		op &= ~LOCK_##cmd; \
458 	}
459 
460 	P_CMD(SH);
461 	P_CMD(EX);
462 	P_CMD(NB);
463 	P_CMD(UN);
464 	P_CMD(MAND);
465 	P_CMD(RW);
466 	P_CMD(READ);
467 	P_CMD(WRITE);
468 #undef P_OP
469 
470 	if (op)
471 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
472 
473 	return printed;
474 }
475 
476 #define SCA_FLOCK syscall_arg__scnprintf_flock
477 
478 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
479 {
480 	enum syscall_futex_args {
481 		SCF_UADDR   = (1 << 0),
482 		SCF_OP	    = (1 << 1),
483 		SCF_VAL	    = (1 << 2),
484 		SCF_TIMEOUT = (1 << 3),
485 		SCF_UADDR2  = (1 << 4),
486 		SCF_VAL3    = (1 << 5),
487 	};
488 	int op = arg->val;
489 	int cmd = op & FUTEX_CMD_MASK;
490 	size_t printed = 0;
491 
492 	switch (cmd) {
493 #define	P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
494 	P_FUTEX_OP(WAIT);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
495 	P_FUTEX_OP(WAKE);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
496 	P_FUTEX_OP(FD);		    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
497 	P_FUTEX_OP(REQUEUE);	    arg->mask |= SCF_VAL3|SCF_TIMEOUT;	          break;
498 	P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;			  break;
499 	P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;			  break;
500 	P_FUTEX_OP(WAKE_OP);							  break;
501 	P_FUTEX_OP(LOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
502 	P_FUTEX_OP(UNLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
503 	P_FUTEX_OP(TRYLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
504 	P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;			  break;
505 	P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;			  break;
506 	P_FUTEX_OP(WAIT_REQUEUE_PI);						  break;
507 	default: printed = scnprintf(bf, size, "%#x", cmd);			  break;
508 	}
509 
510 	if (op & FUTEX_PRIVATE_FLAG)
511 		printed += scnprintf(bf + printed, size - printed, "|PRIV");
512 
513 	if (op & FUTEX_CLOCK_REALTIME)
514 		printed += scnprintf(bf + printed, size - printed, "|CLKRT");
515 
516 	return printed;
517 }
518 
519 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
520 
521 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
522 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
523 
524 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
525 static DEFINE_STRARRAY(itimers);
526 
527 static const char *whences[] = { "SET", "CUR", "END",
528 #ifdef SEEK_DATA
529 "DATA",
530 #endif
531 #ifdef SEEK_HOLE
532 "HOLE",
533 #endif
534 };
535 static DEFINE_STRARRAY(whences);
536 
537 static const char *fcntl_cmds[] = {
538 	"DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
539 	"SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
540 	"F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
541 	"F_GETOWNER_UIDS",
542 };
543 static DEFINE_STRARRAY(fcntl_cmds);
544 
545 static const char *rlimit_resources[] = {
546 	"CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
547 	"MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
548 	"RTTIME",
549 };
550 static DEFINE_STRARRAY(rlimit_resources);
551 
552 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
553 static DEFINE_STRARRAY(sighow);
554 
555 static const char *clockid[] = {
556 	"REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
557 	"MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
558 };
559 static DEFINE_STRARRAY(clockid);
560 
561 static const char *socket_families[] = {
562 	"UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
563 	"BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
564 	"SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
565 	"RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
566 	"BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
567 	"ALG", "NFC", "VSOCK",
568 };
569 static DEFINE_STRARRAY(socket_families);
570 
571 #ifndef SOCK_TYPE_MASK
572 #define SOCK_TYPE_MASK 0xf
573 #endif
574 
575 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
576 						      struct syscall_arg *arg)
577 {
578 	size_t printed;
579 	int type = arg->val,
580 	    flags = type & ~SOCK_TYPE_MASK;
581 
582 	type &= SOCK_TYPE_MASK;
583 	/*
584  	 * Can't use a strarray, MIPS may override for ABI reasons.
585  	 */
586 	switch (type) {
587 #define	P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
588 	P_SK_TYPE(STREAM);
589 	P_SK_TYPE(DGRAM);
590 	P_SK_TYPE(RAW);
591 	P_SK_TYPE(RDM);
592 	P_SK_TYPE(SEQPACKET);
593 	P_SK_TYPE(DCCP);
594 	P_SK_TYPE(PACKET);
595 #undef P_SK_TYPE
596 	default:
597 		printed = scnprintf(bf, size, "%#x", type);
598 	}
599 
600 #define	P_SK_FLAG(n) \
601 	if (flags & SOCK_##n) { \
602 		printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
603 		flags &= ~SOCK_##n; \
604 	}
605 
606 	P_SK_FLAG(CLOEXEC);
607 	P_SK_FLAG(NONBLOCK);
608 #undef P_SK_FLAG
609 
610 	if (flags)
611 		printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
612 
613 	return printed;
614 }
615 
616 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
617 
618 #ifndef MSG_PROBE
619 #define MSG_PROBE	     0x10
620 #endif
621 #ifndef MSG_WAITFORONE
622 #define MSG_WAITFORONE	0x10000
623 #endif
624 #ifndef MSG_SENDPAGE_NOTLAST
625 #define MSG_SENDPAGE_NOTLAST 0x20000
626 #endif
627 #ifndef MSG_FASTOPEN
628 #define MSG_FASTOPEN	     0x20000000
629 #endif
630 
631 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
632 					       struct syscall_arg *arg)
633 {
634 	int printed = 0, flags = arg->val;
635 
636 	if (flags == 0)
637 		return scnprintf(bf, size, "NONE");
638 #define	P_MSG_FLAG(n) \
639 	if (flags & MSG_##n) { \
640 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
641 		flags &= ~MSG_##n; \
642 	}
643 
644 	P_MSG_FLAG(OOB);
645 	P_MSG_FLAG(PEEK);
646 	P_MSG_FLAG(DONTROUTE);
647 	P_MSG_FLAG(TRYHARD);
648 	P_MSG_FLAG(CTRUNC);
649 	P_MSG_FLAG(PROBE);
650 	P_MSG_FLAG(TRUNC);
651 	P_MSG_FLAG(DONTWAIT);
652 	P_MSG_FLAG(EOR);
653 	P_MSG_FLAG(WAITALL);
654 	P_MSG_FLAG(FIN);
655 	P_MSG_FLAG(SYN);
656 	P_MSG_FLAG(CONFIRM);
657 	P_MSG_FLAG(RST);
658 	P_MSG_FLAG(ERRQUEUE);
659 	P_MSG_FLAG(NOSIGNAL);
660 	P_MSG_FLAG(MORE);
661 	P_MSG_FLAG(WAITFORONE);
662 	P_MSG_FLAG(SENDPAGE_NOTLAST);
663 	P_MSG_FLAG(FASTOPEN);
664 	P_MSG_FLAG(CMSG_CLOEXEC);
665 #undef P_MSG_FLAG
666 
667 	if (flags)
668 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
669 
670 	return printed;
671 }
672 
673 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
674 
675 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
676 						 struct syscall_arg *arg)
677 {
678 	size_t printed = 0;
679 	int mode = arg->val;
680 
681 	if (mode == F_OK) /* 0 */
682 		return scnprintf(bf, size, "F");
683 #define	P_MODE(n) \
684 	if (mode & n##_OK) { \
685 		printed += scnprintf(bf + printed, size - printed, "%s", #n); \
686 		mode &= ~n##_OK; \
687 	}
688 
689 	P_MODE(R);
690 	P_MODE(W);
691 	P_MODE(X);
692 #undef P_MODE
693 
694 	if (mode)
695 		printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
696 
697 	return printed;
698 }
699 
700 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
701 
702 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
703 					       struct syscall_arg *arg)
704 {
705 	int printed = 0, flags = arg->val;
706 
707 	if (!(flags & O_CREAT))
708 		arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
709 
710 	if (flags == 0)
711 		return scnprintf(bf, size, "RDONLY");
712 #define	P_FLAG(n) \
713 	if (flags & O_##n) { \
714 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
715 		flags &= ~O_##n; \
716 	}
717 
718 	P_FLAG(APPEND);
719 	P_FLAG(ASYNC);
720 	P_FLAG(CLOEXEC);
721 	P_FLAG(CREAT);
722 	P_FLAG(DIRECT);
723 	P_FLAG(DIRECTORY);
724 	P_FLAG(EXCL);
725 	P_FLAG(LARGEFILE);
726 	P_FLAG(NOATIME);
727 	P_FLAG(NOCTTY);
728 #ifdef O_NONBLOCK
729 	P_FLAG(NONBLOCK);
730 #elif O_NDELAY
731 	P_FLAG(NDELAY);
732 #endif
733 #ifdef O_PATH
734 	P_FLAG(PATH);
735 #endif
736 	P_FLAG(RDWR);
737 #ifdef O_DSYNC
738 	if ((flags & O_SYNC) == O_SYNC)
739 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
740 	else {
741 		P_FLAG(DSYNC);
742 	}
743 #else
744 	P_FLAG(SYNC);
745 #endif
746 	P_FLAG(TRUNC);
747 	P_FLAG(WRONLY);
748 #undef P_FLAG
749 
750 	if (flags)
751 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
752 
753 	return printed;
754 }
755 
756 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
757 
758 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
759 						   struct syscall_arg *arg)
760 {
761 	int printed = 0, flags = arg->val;
762 
763 	if (flags == 0)
764 		return scnprintf(bf, size, "NONE");
765 #define	P_FLAG(n) \
766 	if (flags & EFD_##n) { \
767 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
768 		flags &= ~EFD_##n; \
769 	}
770 
771 	P_FLAG(SEMAPHORE);
772 	P_FLAG(CLOEXEC);
773 	P_FLAG(NONBLOCK);
774 #undef P_FLAG
775 
776 	if (flags)
777 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
778 
779 	return printed;
780 }
781 
782 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
783 
784 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
785 						struct syscall_arg *arg)
786 {
787 	int printed = 0, flags = arg->val;
788 
789 #define	P_FLAG(n) \
790 	if (flags & O_##n) { \
791 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
792 		flags &= ~O_##n; \
793 	}
794 
795 	P_FLAG(CLOEXEC);
796 	P_FLAG(NONBLOCK);
797 #undef P_FLAG
798 
799 	if (flags)
800 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
801 
802 	return printed;
803 }
804 
805 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
806 
807 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
808 {
809 	int sig = arg->val;
810 
811 	switch (sig) {
812 #define	P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
813 	P_SIGNUM(HUP);
814 	P_SIGNUM(INT);
815 	P_SIGNUM(QUIT);
816 	P_SIGNUM(ILL);
817 	P_SIGNUM(TRAP);
818 	P_SIGNUM(ABRT);
819 	P_SIGNUM(BUS);
820 	P_SIGNUM(FPE);
821 	P_SIGNUM(KILL);
822 	P_SIGNUM(USR1);
823 	P_SIGNUM(SEGV);
824 	P_SIGNUM(USR2);
825 	P_SIGNUM(PIPE);
826 	P_SIGNUM(ALRM);
827 	P_SIGNUM(TERM);
828 	P_SIGNUM(CHLD);
829 	P_SIGNUM(CONT);
830 	P_SIGNUM(STOP);
831 	P_SIGNUM(TSTP);
832 	P_SIGNUM(TTIN);
833 	P_SIGNUM(TTOU);
834 	P_SIGNUM(URG);
835 	P_SIGNUM(XCPU);
836 	P_SIGNUM(XFSZ);
837 	P_SIGNUM(VTALRM);
838 	P_SIGNUM(PROF);
839 	P_SIGNUM(WINCH);
840 	P_SIGNUM(IO);
841 	P_SIGNUM(PWR);
842 	P_SIGNUM(SYS);
843 #ifdef SIGEMT
844 	P_SIGNUM(EMT);
845 #endif
846 #ifdef SIGSTKFLT
847 	P_SIGNUM(STKFLT);
848 #endif
849 #ifdef SIGSWI
850 	P_SIGNUM(SWI);
851 #endif
852 	default: break;
853 	}
854 
855 	return scnprintf(bf, size, "%#x", sig);
856 }
857 
858 #define SCA_SIGNUM syscall_arg__scnprintf_signum
859 
860 #if defined(__i386__) || defined(__x86_64__)
861 /*
862  * FIXME: Make this available to all arches.
863  */
864 #define TCGETS		0x5401
865 
866 static const char *tioctls[] = {
867 	"TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
868 	"TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
869 	"TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
870 	"TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
871 	"TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
872 	"TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
873 	"TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
874 	"TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
875 	"TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
876 	"TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
877 	"TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
878 	[0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
879 	"TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
880 	"TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
881 	"TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
882 };
883 
884 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
885 #endif /* defined(__i386__) || defined(__x86_64__) */
886 
887 #define STRARRAY(arg, name, array) \
888 	  .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
889 	  .arg_parm	 = { [arg] = &strarray__##array, }
890 
891 static struct syscall_fmt {
892 	const char *name;
893 	const char *alias;
894 	size_t	   (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
895 	void	   *arg_parm[6];
896 	bool	   errmsg;
897 	bool	   timeout;
898 	bool	   hexret;
899 } syscall_fmts[] = {
900 	{ .name	    = "access",	    .errmsg = true,
901 	  .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
902 	{ .name	    = "arch_prctl", .errmsg = true, .alias = "prctl", },
903 	{ .name	    = "brk",	    .hexret = true,
904 	  .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
905 	{ .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
906 	{ .name	    = "close",	    .errmsg = true,
907 	  .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
908 	{ .name	    = "connect",    .errmsg = true, },
909 	{ .name	    = "dup",	    .errmsg = true,
910 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
911 	{ .name	    = "dup2",	    .errmsg = true,
912 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
913 	{ .name	    = "dup3",	    .errmsg = true,
914 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
915 	{ .name	    = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
916 	{ .name	    = "eventfd2",   .errmsg = true,
917 	  .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
918 	{ .name	    = "faccessat",  .errmsg = true,
919 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
920 	{ .name	    = "fadvise64",  .errmsg = true,
921 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
922 	{ .name	    = "fallocate",  .errmsg = true,
923 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
924 	{ .name	    = "fchdir",	    .errmsg = true,
925 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
926 	{ .name	    = "fchmod",	    .errmsg = true,
927 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
928 	{ .name	    = "fchmodat",   .errmsg = true,
929 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
930 	{ .name	    = "fchown",	    .errmsg = true,
931 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
932 	{ .name	    = "fchownat",   .errmsg = true,
933 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
934 	{ .name	    = "fcntl",	    .errmsg = true,
935 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
936 			     [1] = SCA_STRARRAY, /* cmd */ },
937 	  .arg_parm	 = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
938 	{ .name	    = "fdatasync",  .errmsg = true,
939 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
940 	{ .name	    = "flock",	    .errmsg = true,
941 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
942 			     [1] = SCA_FLOCK, /* cmd */ }, },
943 	{ .name	    = "fsetxattr",  .errmsg = true,
944 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
945 	{ .name	    = "fstat",	    .errmsg = true, .alias = "newfstat",
946 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
947 	{ .name	    = "fstatat",    .errmsg = true, .alias = "newfstatat",
948 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
949 	{ .name	    = "fstatfs",    .errmsg = true,
950 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
951 	{ .name	    = "fsync",    .errmsg = true,
952 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
953 	{ .name	    = "ftruncate", .errmsg = true,
954 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
955 	{ .name	    = "futex",	    .errmsg = true,
956 	  .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
957 	{ .name	    = "futimesat", .errmsg = true,
958 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
959 	{ .name	    = "getdents",   .errmsg = true,
960 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
961 	{ .name	    = "getdents64", .errmsg = true,
962 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
963 	{ .name	    = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
964 	{ .name	    = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
965 	{ .name	    = "ioctl",	    .errmsg = true,
966 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
967 #if defined(__i386__) || defined(__x86_64__)
968 /*
969  * FIXME: Make this available to all arches.
970  */
971 			     [1] = SCA_STRHEXARRAY, /* cmd */
972 			     [2] = SCA_HEX, /* arg */ },
973 	  .arg_parm	 = { [1] = &strarray__tioctls, /* cmd */ }, },
974 #else
975 			     [2] = SCA_HEX, /* arg */ }, },
976 #endif
977 	{ .name	    = "kill",	    .errmsg = true,
978 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
979 	{ .name	    = "linkat",	    .errmsg = true,
980 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
981 	{ .name	    = "lseek",	    .errmsg = true,
982 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
983 			     [2] = SCA_STRARRAY, /* whence */ },
984 	  .arg_parm	 = { [2] = &strarray__whences, /* whence */ }, },
985 	{ .name	    = "lstat",	    .errmsg = true, .alias = "newlstat", },
986 	{ .name     = "madvise",    .errmsg = true,
987 	  .arg_scnprintf = { [0] = SCA_HEX,	 /* start */
988 			     [2] = SCA_MADV_BHV, /* behavior */ }, },
989 	{ .name	    = "mkdirat",    .errmsg = true,
990 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
991 	{ .name	    = "mknodat",    .errmsg = true,
992 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
993 	{ .name	    = "mlock",	    .errmsg = true,
994 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
995 	{ .name	    = "mlockall",   .errmsg = true,
996 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
997 	{ .name	    = "mmap",	    .hexret = true,
998 	  .arg_scnprintf = { [0] = SCA_HEX,	  /* addr */
999 			     [2] = SCA_MMAP_PROT, /* prot */
1000 			     [3] = SCA_MMAP_FLAGS, /* flags */
1001 			     [4] = SCA_FD, 	  /* fd */ }, },
1002 	{ .name	    = "mprotect",   .errmsg = true,
1003 	  .arg_scnprintf = { [0] = SCA_HEX, /* start */
1004 			     [2] = SCA_MMAP_PROT, /* prot */ }, },
1005 	{ .name	    = "mremap",	    .hexret = true,
1006 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1007 			     [4] = SCA_HEX, /* new_addr */ }, },
1008 	{ .name	    = "munlock",    .errmsg = true,
1009 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1010 	{ .name	    = "munmap",	    .errmsg = true,
1011 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1012 	{ .name	    = "name_to_handle_at", .errmsg = true,
1013 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1014 	{ .name	    = "newfstatat", .errmsg = true,
1015 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1016 	{ .name	    = "open",	    .errmsg = true,
1017 	  .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1018 	{ .name	    = "open_by_handle_at", .errmsg = true,
1019 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1020 			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1021 	{ .name	    = "openat",	    .errmsg = true,
1022 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1023 			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1024 	{ .name	    = "pipe2",	    .errmsg = true,
1025 	  .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1026 	{ .name	    = "poll",	    .errmsg = true, .timeout = true, },
1027 	{ .name	    = "ppoll",	    .errmsg = true, .timeout = true, },
1028 	{ .name	    = "pread",	    .errmsg = true, .alias = "pread64",
1029 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1030 	{ .name	    = "preadv",	    .errmsg = true, .alias = "pread",
1031 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1032 	{ .name	    = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1033 	{ .name	    = "pwrite",	    .errmsg = true, .alias = "pwrite64",
1034 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1035 	{ .name	    = "pwritev",    .errmsg = true,
1036 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1037 	{ .name	    = "read",	    .errmsg = true,
1038 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1039 	{ .name	    = "readlinkat", .errmsg = true,
1040 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1041 	{ .name	    = "readv",	    .errmsg = true,
1042 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1043 	{ .name	    = "recvfrom",   .errmsg = true,
1044 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1045 	{ .name	    = "recvmmsg",   .errmsg = true,
1046 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1047 	{ .name	    = "recvmsg",    .errmsg = true,
1048 	  .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1049 	{ .name	    = "renameat",   .errmsg = true,
1050 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1051 	{ .name	    = "rt_sigaction", .errmsg = true,
1052 	  .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1053 	{ .name	    = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1054 	{ .name	    = "rt_sigqueueinfo", .errmsg = true,
1055 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1056 	{ .name	    = "rt_tgsigqueueinfo", .errmsg = true,
1057 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1058 	{ .name	    = "select",	    .errmsg = true, .timeout = true, },
1059 	{ .name	    = "sendmmsg",    .errmsg = true,
1060 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1061 	{ .name	    = "sendmsg",    .errmsg = true,
1062 	  .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1063 	{ .name	    = "sendto",	    .errmsg = true,
1064 	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1065 	{ .name	    = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1066 	{ .name	    = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1067 	{ .name	    = "shutdown",   .errmsg = true,
1068 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1069 	{ .name	    = "socket",	    .errmsg = true,
1070 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1071 			     [1] = SCA_SK_TYPE, /* type */ },
1072 	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
1073 	{ .name	    = "socketpair", .errmsg = true,
1074 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1075 			     [1] = SCA_SK_TYPE, /* type */ },
1076 	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
1077 	{ .name	    = "stat",	    .errmsg = true, .alias = "newstat", },
1078 	{ .name	    = "symlinkat",  .errmsg = true,
1079 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1080 	{ .name	    = "tgkill",	    .errmsg = true,
1081 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1082 	{ .name	    = "tkill",	    .errmsg = true,
1083 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1084 	{ .name	    = "uname",	    .errmsg = true, .alias = "newuname", },
1085 	{ .name	    = "unlinkat",   .errmsg = true,
1086 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1087 	{ .name	    = "utimensat",  .errmsg = true,
1088 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1089 	{ .name	    = "write",	    .errmsg = true,
1090 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1091 	{ .name	    = "writev",	    .errmsg = true,
1092 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1093 };
1094 
1095 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1096 {
1097 	const struct syscall_fmt *fmt = fmtp;
1098 	return strcmp(name, fmt->name);
1099 }
1100 
1101 static struct syscall_fmt *syscall_fmt__find(const char *name)
1102 {
1103 	const int nmemb = ARRAY_SIZE(syscall_fmts);
1104 	return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1105 }
1106 
1107 struct syscall {
1108 	struct event_format *tp_format;
1109 	const char	    *name;
1110 	bool		    filtered;
1111 	bool		    is_exit;
1112 	struct syscall_fmt  *fmt;
1113 	size_t		    (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1114 	void		    **arg_parm;
1115 };
1116 
1117 static size_t fprintf_duration(unsigned long t, FILE *fp)
1118 {
1119 	double duration = (double)t / NSEC_PER_MSEC;
1120 	size_t printed = fprintf(fp, "(");
1121 
1122 	if (duration >= 1.0)
1123 		printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1124 	else if (duration >= 0.01)
1125 		printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1126 	else
1127 		printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1128 	return printed + fprintf(fp, "): ");
1129 }
1130 
1131 struct thread_trace {
1132 	u64		  entry_time;
1133 	u64		  exit_time;
1134 	bool		  entry_pending;
1135 	unsigned long	  nr_events;
1136 	char		  *entry_str;
1137 	double		  runtime_ms;
1138 	struct {
1139 		int	  max;
1140 		char	  **table;
1141 	} paths;
1142 
1143 	struct intlist *syscall_stats;
1144 };
1145 
1146 static struct thread_trace *thread_trace__new(void)
1147 {
1148 	struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1149 
1150 	if (ttrace)
1151 		ttrace->paths.max = -1;
1152 
1153 	ttrace->syscall_stats = intlist__new(NULL);
1154 
1155 	return ttrace;
1156 }
1157 
1158 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1159 {
1160 	struct thread_trace *ttrace;
1161 
1162 	if (thread == NULL)
1163 		goto fail;
1164 
1165 	if (thread->priv == NULL)
1166 		thread->priv = thread_trace__new();
1167 
1168 	if (thread->priv == NULL)
1169 		goto fail;
1170 
1171 	ttrace = thread->priv;
1172 	++ttrace->nr_events;
1173 
1174 	return ttrace;
1175 fail:
1176 	color_fprintf(fp, PERF_COLOR_RED,
1177 		      "WARNING: not enough memory, dropping samples!\n");
1178 	return NULL;
1179 }
1180 
1181 #define TRACE_PFMAJ		(1 << 0)
1182 #define TRACE_PFMIN		(1 << 1)
1183 
1184 struct trace {
1185 	struct perf_tool	tool;
1186 	struct {
1187 		int		machine;
1188 		int		open_id;
1189 	}			audit;
1190 	struct {
1191 		int		max;
1192 		struct syscall  *table;
1193 	} syscalls;
1194 	struct record_opts	opts;
1195 	struct machine		*host;
1196 	u64			base_time;
1197 	FILE			*output;
1198 	unsigned long		nr_events;
1199 	struct strlist		*ev_qualifier;
1200 	const char 		*last_vfs_getname;
1201 	struct intlist		*tid_list;
1202 	struct intlist		*pid_list;
1203 	double			duration_filter;
1204 	double			runtime_ms;
1205 	struct {
1206 		u64		vfs_getname,
1207 				proc_getname;
1208 	} stats;
1209 	bool			not_ev_qualifier;
1210 	bool			live;
1211 	bool			full_time;
1212 	bool			sched;
1213 	bool			multiple_threads;
1214 	bool			summary;
1215 	bool			summary_only;
1216 	bool			show_comm;
1217 	bool			show_tool_stats;
1218 	int			trace_pgfaults;
1219 };
1220 
1221 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1222 {
1223 	struct thread_trace *ttrace = thread->priv;
1224 
1225 	if (fd > ttrace->paths.max) {
1226 		char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1227 
1228 		if (npath == NULL)
1229 			return -1;
1230 
1231 		if (ttrace->paths.max != -1) {
1232 			memset(npath + ttrace->paths.max + 1, 0,
1233 			       (fd - ttrace->paths.max) * sizeof(char *));
1234 		} else {
1235 			memset(npath, 0, (fd + 1) * sizeof(char *));
1236 		}
1237 
1238 		ttrace->paths.table = npath;
1239 		ttrace->paths.max   = fd;
1240 	}
1241 
1242 	ttrace->paths.table[fd] = strdup(pathname);
1243 
1244 	return ttrace->paths.table[fd] != NULL ? 0 : -1;
1245 }
1246 
1247 static int thread__read_fd_path(struct thread *thread, int fd)
1248 {
1249 	char linkname[PATH_MAX], pathname[PATH_MAX];
1250 	struct stat st;
1251 	int ret;
1252 
1253 	if (thread->pid_ == thread->tid) {
1254 		scnprintf(linkname, sizeof(linkname),
1255 			  "/proc/%d/fd/%d", thread->pid_, fd);
1256 	} else {
1257 		scnprintf(linkname, sizeof(linkname),
1258 			  "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1259 	}
1260 
1261 	if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1262 		return -1;
1263 
1264 	ret = readlink(linkname, pathname, sizeof(pathname));
1265 
1266 	if (ret < 0 || ret > st.st_size)
1267 		return -1;
1268 
1269 	pathname[ret] = '\0';
1270 	return trace__set_fd_pathname(thread, fd, pathname);
1271 }
1272 
1273 static const char *thread__fd_path(struct thread *thread, int fd,
1274 				   struct trace *trace)
1275 {
1276 	struct thread_trace *ttrace = thread->priv;
1277 
1278 	if (ttrace == NULL)
1279 		return NULL;
1280 
1281 	if (fd < 0)
1282 		return NULL;
1283 
1284 	if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1285 		if (!trace->live)
1286 			return NULL;
1287 		++trace->stats.proc_getname;
1288 		if (thread__read_fd_path(thread, fd))
1289 			return NULL;
1290 	}
1291 
1292 	return ttrace->paths.table[fd];
1293 }
1294 
1295 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1296 					struct syscall_arg *arg)
1297 {
1298 	int fd = arg->val;
1299 	size_t printed = scnprintf(bf, size, "%d", fd);
1300 	const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1301 
1302 	if (path)
1303 		printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1304 
1305 	return printed;
1306 }
1307 
1308 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1309 					      struct syscall_arg *arg)
1310 {
1311 	int fd = arg->val;
1312 	size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1313 	struct thread_trace *ttrace = arg->thread->priv;
1314 
1315 	if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1316 		zfree(&ttrace->paths.table[fd]);
1317 
1318 	return printed;
1319 }
1320 
1321 static bool trace__filter_duration(struct trace *trace, double t)
1322 {
1323 	return t < (trace->duration_filter * NSEC_PER_MSEC);
1324 }
1325 
1326 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1327 {
1328 	double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1329 
1330 	return fprintf(fp, "%10.3f ", ts);
1331 }
1332 
1333 static bool done = false;
1334 static bool interrupted = false;
1335 
1336 static void sig_handler(int sig)
1337 {
1338 	done = true;
1339 	interrupted = sig == SIGINT;
1340 }
1341 
1342 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1343 					u64 duration, u64 tstamp, FILE *fp)
1344 {
1345 	size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1346 	printed += fprintf_duration(duration, fp);
1347 
1348 	if (trace->multiple_threads) {
1349 		if (trace->show_comm)
1350 			printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1351 		printed += fprintf(fp, "%d ", thread->tid);
1352 	}
1353 
1354 	return printed;
1355 }
1356 
1357 static int trace__process_event(struct trace *trace, struct machine *machine,
1358 				union perf_event *event, struct perf_sample *sample)
1359 {
1360 	int ret = 0;
1361 
1362 	switch (event->header.type) {
1363 	case PERF_RECORD_LOST:
1364 		color_fprintf(trace->output, PERF_COLOR_RED,
1365 			      "LOST %" PRIu64 " events!\n", event->lost.lost);
1366 		ret = machine__process_lost_event(machine, event, sample);
1367 	default:
1368 		ret = machine__process_event(machine, event, sample);
1369 		break;
1370 	}
1371 
1372 	return ret;
1373 }
1374 
1375 static int trace__tool_process(struct perf_tool *tool,
1376 			       union perf_event *event,
1377 			       struct perf_sample *sample,
1378 			       struct machine *machine)
1379 {
1380 	struct trace *trace = container_of(tool, struct trace, tool);
1381 	return trace__process_event(trace, machine, event, sample);
1382 }
1383 
1384 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1385 {
1386 	int err = symbol__init();
1387 
1388 	if (err)
1389 		return err;
1390 
1391 	trace->host = machine__new_host();
1392 	if (trace->host == NULL)
1393 		return -ENOMEM;
1394 
1395 	err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1396 					    evlist->threads, trace__tool_process, false);
1397 	if (err)
1398 		symbol__exit();
1399 
1400 	return err;
1401 }
1402 
1403 static int syscall__set_arg_fmts(struct syscall *sc)
1404 {
1405 	struct format_field *field;
1406 	int idx = 0;
1407 
1408 	sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1409 	if (sc->arg_scnprintf == NULL)
1410 		return -1;
1411 
1412 	if (sc->fmt)
1413 		sc->arg_parm = sc->fmt->arg_parm;
1414 
1415 	for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1416 		if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1417 			sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1418 		else if (field->flags & FIELD_IS_POINTER)
1419 			sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1420 		++idx;
1421 	}
1422 
1423 	return 0;
1424 }
1425 
1426 static int trace__read_syscall_info(struct trace *trace, int id)
1427 {
1428 	char tp_name[128];
1429 	struct syscall *sc;
1430 	const char *name = audit_syscall_to_name(id, trace->audit.machine);
1431 
1432 	if (name == NULL)
1433 		return -1;
1434 
1435 	if (id > trace->syscalls.max) {
1436 		struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1437 
1438 		if (nsyscalls == NULL)
1439 			return -1;
1440 
1441 		if (trace->syscalls.max != -1) {
1442 			memset(nsyscalls + trace->syscalls.max + 1, 0,
1443 			       (id - trace->syscalls.max) * sizeof(*sc));
1444 		} else {
1445 			memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1446 		}
1447 
1448 		trace->syscalls.table = nsyscalls;
1449 		trace->syscalls.max   = id;
1450 	}
1451 
1452 	sc = trace->syscalls.table + id;
1453 	sc->name = name;
1454 
1455 	if (trace->ev_qualifier) {
1456 		bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1457 
1458 		if (!(in ^ trace->not_ev_qualifier)) {
1459 			sc->filtered = true;
1460 			/*
1461 			 * No need to do read tracepoint information since this will be
1462 			 * filtered out.
1463 			 */
1464 			return 0;
1465 		}
1466 	}
1467 
1468 	sc->fmt  = syscall_fmt__find(sc->name);
1469 
1470 	snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1471 	sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1472 
1473 	if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1474 		snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1475 		sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1476 	}
1477 
1478 	if (sc->tp_format == NULL)
1479 		return -1;
1480 
1481 	sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1482 
1483 	return syscall__set_arg_fmts(sc);
1484 }
1485 
1486 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1487 				      unsigned long *args, struct trace *trace,
1488 				      struct thread *thread)
1489 {
1490 	size_t printed = 0;
1491 
1492 	if (sc->tp_format != NULL) {
1493 		struct format_field *field;
1494 		u8 bit = 1;
1495 		struct syscall_arg arg = {
1496 			.idx	= 0,
1497 			.mask	= 0,
1498 			.trace  = trace,
1499 			.thread = thread,
1500 		};
1501 
1502 		for (field = sc->tp_format->format.fields->next; field;
1503 		     field = field->next, ++arg.idx, bit <<= 1) {
1504 			if (arg.mask & bit)
1505 				continue;
1506 			/*
1507  			 * Suppress this argument if its value is zero and
1508  			 * and we don't have a string associated in an
1509  			 * strarray for it.
1510  			 */
1511 			if (args[arg.idx] == 0 &&
1512 			    !(sc->arg_scnprintf &&
1513 			      sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1514 			      sc->arg_parm[arg.idx]))
1515 				continue;
1516 
1517 			printed += scnprintf(bf + printed, size - printed,
1518 					     "%s%s: ", printed ? ", " : "", field->name);
1519 			if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1520 				arg.val = args[arg.idx];
1521 				if (sc->arg_parm)
1522 					arg.parm = sc->arg_parm[arg.idx];
1523 				printed += sc->arg_scnprintf[arg.idx](bf + printed,
1524 								      size - printed, &arg);
1525 			} else {
1526 				printed += scnprintf(bf + printed, size - printed,
1527 						     "%ld", args[arg.idx]);
1528 			}
1529 		}
1530 	} else {
1531 		int i = 0;
1532 
1533 		while (i < 6) {
1534 			printed += scnprintf(bf + printed, size - printed,
1535 					     "%sarg%d: %ld",
1536 					     printed ? ", " : "", i, args[i]);
1537 			++i;
1538 		}
1539 	}
1540 
1541 	return printed;
1542 }
1543 
1544 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1545 				  union perf_event *event,
1546 				  struct perf_sample *sample);
1547 
1548 static struct syscall *trace__syscall_info(struct trace *trace,
1549 					   struct perf_evsel *evsel, int id)
1550 {
1551 
1552 	if (id < 0) {
1553 
1554 		/*
1555 		 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1556 		 * before that, leaving at a higher verbosity level till that is
1557 		 * explained. Reproduced with plain ftrace with:
1558 		 *
1559 		 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1560 		 * grep "NR -1 " /t/trace_pipe
1561 		 *
1562 		 * After generating some load on the machine.
1563  		 */
1564 		if (verbose > 1) {
1565 			static u64 n;
1566 			fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1567 				id, perf_evsel__name(evsel), ++n);
1568 		}
1569 		return NULL;
1570 	}
1571 
1572 	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1573 	    trace__read_syscall_info(trace, id))
1574 		goto out_cant_read;
1575 
1576 	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1577 		goto out_cant_read;
1578 
1579 	return &trace->syscalls.table[id];
1580 
1581 out_cant_read:
1582 	if (verbose) {
1583 		fprintf(trace->output, "Problems reading syscall %d", id);
1584 		if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1585 			fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1586 		fputs(" information\n", trace->output);
1587 	}
1588 	return NULL;
1589 }
1590 
1591 static void thread__update_stats(struct thread_trace *ttrace,
1592 				 int id, struct perf_sample *sample)
1593 {
1594 	struct int_node *inode;
1595 	struct stats *stats;
1596 	u64 duration = 0;
1597 
1598 	inode = intlist__findnew(ttrace->syscall_stats, id);
1599 	if (inode == NULL)
1600 		return;
1601 
1602 	stats = inode->priv;
1603 	if (stats == NULL) {
1604 		stats = malloc(sizeof(struct stats));
1605 		if (stats == NULL)
1606 			return;
1607 		init_stats(stats);
1608 		inode->priv = stats;
1609 	}
1610 
1611 	if (ttrace->entry_time && sample->time > ttrace->entry_time)
1612 		duration = sample->time - ttrace->entry_time;
1613 
1614 	update_stats(stats, duration);
1615 }
1616 
1617 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1618 			    union perf_event *event __maybe_unused,
1619 			    struct perf_sample *sample)
1620 {
1621 	char *msg;
1622 	void *args;
1623 	size_t printed = 0;
1624 	struct thread *thread;
1625 	int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1626 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
1627 	struct thread_trace *ttrace;
1628 
1629 	if (sc == NULL)
1630 		return -1;
1631 
1632 	if (sc->filtered)
1633 		return 0;
1634 
1635 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1636 	ttrace = thread__trace(thread, trace->output);
1637 	if (ttrace == NULL)
1638 		return -1;
1639 
1640 	args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1641 
1642 	if (ttrace->entry_str == NULL) {
1643 		ttrace->entry_str = malloc(1024);
1644 		if (!ttrace->entry_str)
1645 			return -1;
1646 	}
1647 
1648 	ttrace->entry_time = sample->time;
1649 	msg = ttrace->entry_str;
1650 	printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1651 
1652 	printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1653 					   args, trace, thread);
1654 
1655 	if (sc->is_exit) {
1656 		if (!trace->duration_filter && !trace->summary_only) {
1657 			trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1658 			fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1659 		}
1660 	} else
1661 		ttrace->entry_pending = true;
1662 
1663 	return 0;
1664 }
1665 
1666 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1667 			   union perf_event *event __maybe_unused,
1668 			   struct perf_sample *sample)
1669 {
1670 	int ret;
1671 	u64 duration = 0;
1672 	struct thread *thread;
1673 	int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1674 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
1675 	struct thread_trace *ttrace;
1676 
1677 	if (sc == NULL)
1678 		return -1;
1679 
1680 	if (sc->filtered)
1681 		return 0;
1682 
1683 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1684 	ttrace = thread__trace(thread, trace->output);
1685 	if (ttrace == NULL)
1686 		return -1;
1687 
1688 	if (trace->summary)
1689 		thread__update_stats(ttrace, id, sample);
1690 
1691 	ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1692 
1693 	if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1694 		trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1695 		trace->last_vfs_getname = NULL;
1696 		++trace->stats.vfs_getname;
1697 	}
1698 
1699 	ttrace->exit_time = sample->time;
1700 
1701 	if (ttrace->entry_time) {
1702 		duration = sample->time - ttrace->entry_time;
1703 		if (trace__filter_duration(trace, duration))
1704 			goto out;
1705 	} else if (trace->duration_filter)
1706 		goto out;
1707 
1708 	if (trace->summary_only)
1709 		goto out;
1710 
1711 	trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1712 
1713 	if (ttrace->entry_pending) {
1714 		fprintf(trace->output, "%-70s", ttrace->entry_str);
1715 	} else {
1716 		fprintf(trace->output, " ... [");
1717 		color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1718 		fprintf(trace->output, "]: %s()", sc->name);
1719 	}
1720 
1721 	if (sc->fmt == NULL) {
1722 signed_print:
1723 		fprintf(trace->output, ") = %d", ret);
1724 	} else if (ret < 0 && sc->fmt->errmsg) {
1725 		char bf[256];
1726 		const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1727 			   *e = audit_errno_to_name(-ret);
1728 
1729 		fprintf(trace->output, ") = -1 %s %s", e, emsg);
1730 	} else if (ret == 0 && sc->fmt->timeout)
1731 		fprintf(trace->output, ") = 0 Timeout");
1732 	else if (sc->fmt->hexret)
1733 		fprintf(trace->output, ") = %#x", ret);
1734 	else
1735 		goto signed_print;
1736 
1737 	fputc('\n', trace->output);
1738 out:
1739 	ttrace->entry_pending = false;
1740 
1741 	return 0;
1742 }
1743 
1744 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1745 			      union perf_event *event __maybe_unused,
1746 			      struct perf_sample *sample)
1747 {
1748 	trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1749 	return 0;
1750 }
1751 
1752 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1753 				     union perf_event *event __maybe_unused,
1754 				     struct perf_sample *sample)
1755 {
1756         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1757 	double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1758 	struct thread *thread = machine__findnew_thread(trace->host,
1759 							sample->pid,
1760 							sample->tid);
1761 	struct thread_trace *ttrace = thread__trace(thread, trace->output);
1762 
1763 	if (ttrace == NULL)
1764 		goto out_dump;
1765 
1766 	ttrace->runtime_ms += runtime_ms;
1767 	trace->runtime_ms += runtime_ms;
1768 	return 0;
1769 
1770 out_dump:
1771 	fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1772 	       evsel->name,
1773 	       perf_evsel__strval(evsel, sample, "comm"),
1774 	       (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1775 	       runtime,
1776 	       perf_evsel__intval(evsel, sample, "vruntime"));
1777 	return 0;
1778 }
1779 
1780 static void print_location(FILE *f, struct perf_sample *sample,
1781 			   struct addr_location *al,
1782 			   bool print_dso, bool print_sym)
1783 {
1784 
1785 	if ((verbose || print_dso) && al->map)
1786 		fprintf(f, "%s@", al->map->dso->long_name);
1787 
1788 	if ((verbose || print_sym) && al->sym)
1789 		fprintf(f, "%s+0x%lx", al->sym->name,
1790 			al->addr - al->sym->start);
1791 	else if (al->map)
1792 		fprintf(f, "0x%lx", al->addr);
1793 	else
1794 		fprintf(f, "0x%lx", sample->addr);
1795 }
1796 
1797 static int trace__pgfault(struct trace *trace,
1798 			  struct perf_evsel *evsel,
1799 			  union perf_event *event,
1800 			  struct perf_sample *sample)
1801 {
1802 	struct thread *thread;
1803 	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1804 	struct addr_location al;
1805 	char map_type = 'd';
1806 
1807 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1808 
1809 	thread__find_addr_location(thread, trace->host, cpumode, MAP__FUNCTION,
1810 			      sample->ip, &al);
1811 
1812 	trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
1813 
1814 	fprintf(trace->output, "%sfault [",
1815 		evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1816 		"maj" : "min");
1817 
1818 	print_location(trace->output, sample, &al, false, true);
1819 
1820 	fprintf(trace->output, "] => ");
1821 
1822 	thread__find_addr_location(thread, trace->host, cpumode, MAP__VARIABLE,
1823 				   sample->addr, &al);
1824 
1825 	if (!al.map) {
1826 		thread__find_addr_location(thread, trace->host, cpumode,
1827 					   MAP__FUNCTION, sample->addr, &al);
1828 
1829 		if (al.map)
1830 			map_type = 'x';
1831 		else
1832 			map_type = '?';
1833 	}
1834 
1835 	print_location(trace->output, sample, &al, true, false);
1836 
1837 	fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1838 
1839 	return 0;
1840 }
1841 
1842 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1843 {
1844 	if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1845 	    (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1846 		return false;
1847 
1848 	if (trace->pid_list || trace->tid_list)
1849 		return true;
1850 
1851 	return false;
1852 }
1853 
1854 static int trace__process_sample(struct perf_tool *tool,
1855 				 union perf_event *event,
1856 				 struct perf_sample *sample,
1857 				 struct perf_evsel *evsel,
1858 				 struct machine *machine __maybe_unused)
1859 {
1860 	struct trace *trace = container_of(tool, struct trace, tool);
1861 	int err = 0;
1862 
1863 	tracepoint_handler handler = evsel->handler;
1864 
1865 	if (skip_sample(trace, sample))
1866 		return 0;
1867 
1868 	if (!trace->full_time && trace->base_time == 0)
1869 		trace->base_time = sample->time;
1870 
1871 	if (handler) {
1872 		++trace->nr_events;
1873 		handler(trace, evsel, event, sample);
1874 	}
1875 
1876 	return err;
1877 }
1878 
1879 static int parse_target_str(struct trace *trace)
1880 {
1881 	if (trace->opts.target.pid) {
1882 		trace->pid_list = intlist__new(trace->opts.target.pid);
1883 		if (trace->pid_list == NULL) {
1884 			pr_err("Error parsing process id string\n");
1885 			return -EINVAL;
1886 		}
1887 	}
1888 
1889 	if (trace->opts.target.tid) {
1890 		trace->tid_list = intlist__new(trace->opts.target.tid);
1891 		if (trace->tid_list == NULL) {
1892 			pr_err("Error parsing thread id string\n");
1893 			return -EINVAL;
1894 		}
1895 	}
1896 
1897 	return 0;
1898 }
1899 
1900 static int trace__record(int argc, const char **argv)
1901 {
1902 	unsigned int rec_argc, i, j;
1903 	const char **rec_argv;
1904 	const char * const record_args[] = {
1905 		"record",
1906 		"-R",
1907 		"-m", "1024",
1908 		"-c", "1",
1909 		"-e",
1910 	};
1911 
1912 	/* +1 is for the event string below */
1913 	rec_argc = ARRAY_SIZE(record_args) + 1 + argc;
1914 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
1915 
1916 	if (rec_argv == NULL)
1917 		return -ENOMEM;
1918 
1919 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
1920 		rec_argv[i] = record_args[i];
1921 
1922 	/* event string may be different for older kernels - e.g., RHEL6 */
1923 	if (is_valid_tracepoint("raw_syscalls:sys_enter"))
1924 		rec_argv[i] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
1925 	else if (is_valid_tracepoint("syscalls:sys_enter"))
1926 		rec_argv[i] = "syscalls:sys_enter,syscalls:sys_exit";
1927 	else {
1928 		pr_err("Neither raw_syscalls nor syscalls events exist.\n");
1929 		return -1;
1930 	}
1931 	i++;
1932 
1933 	for (j = 0; j < (unsigned int)argc; j++, i++)
1934 		rec_argv[i] = argv[j];
1935 
1936 	return cmd_record(i, rec_argv, NULL);
1937 }
1938 
1939 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1940 
1941 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1942 {
1943 	struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1944 	if (evsel == NULL)
1945 		return;
1946 
1947 	if (perf_evsel__field(evsel, "pathname") == NULL) {
1948 		perf_evsel__delete(evsel);
1949 		return;
1950 	}
1951 
1952 	evsel->handler = trace__vfs_getname;
1953 	perf_evlist__add(evlist, evsel);
1954 }
1955 
1956 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
1957 				    u64 config)
1958 {
1959 	struct perf_evsel *evsel;
1960 	struct perf_event_attr attr = {
1961 		.type = PERF_TYPE_SOFTWARE,
1962 		.mmap_data = 1,
1963 		.sample_period = 1,
1964 	};
1965 
1966 	attr.config = config;
1967 
1968 	event_attr_init(&attr);
1969 
1970 	evsel = perf_evsel__new(&attr);
1971 	if (!evsel)
1972 		return -ENOMEM;
1973 
1974 	evsel->handler = trace__pgfault;
1975 	perf_evlist__add(evlist, evsel);
1976 
1977 	return 0;
1978 }
1979 
1980 static int trace__run(struct trace *trace, int argc, const char **argv)
1981 {
1982 	struct perf_evlist *evlist = perf_evlist__new();
1983 	struct perf_evsel *evsel;
1984 	int err = -1, i;
1985 	unsigned long before;
1986 	const bool forks = argc > 0;
1987 
1988 	trace->live = true;
1989 
1990 	if (evlist == NULL) {
1991 		fprintf(trace->output, "Not enough memory to run!\n");
1992 		goto out;
1993 	}
1994 
1995 	if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1996 		goto out_error_tp;
1997 
1998 	perf_evlist__add_vfs_getname(evlist);
1999 
2000 	if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2001 	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ))
2002 		goto out_error_tp;
2003 
2004 	if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2005 	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2006 		goto out_error_tp;
2007 
2008 	if (trace->sched &&
2009 		perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2010 				trace__sched_stat_runtime))
2011 		goto out_error_tp;
2012 
2013 	err = perf_evlist__create_maps(evlist, &trace->opts.target);
2014 	if (err < 0) {
2015 		fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2016 		goto out_delete_evlist;
2017 	}
2018 
2019 	err = trace__symbols_init(trace, evlist);
2020 	if (err < 0) {
2021 		fprintf(trace->output, "Problems initializing symbol libraries!\n");
2022 		goto out_delete_evlist;
2023 	}
2024 
2025 	perf_evlist__config(evlist, &trace->opts);
2026 
2027 	signal(SIGCHLD, sig_handler);
2028 	signal(SIGINT, sig_handler);
2029 
2030 	if (forks) {
2031 		err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2032 						    argv, false, NULL);
2033 		if (err < 0) {
2034 			fprintf(trace->output, "Couldn't run the workload!\n");
2035 			goto out_delete_evlist;
2036 		}
2037 	}
2038 
2039 	err = perf_evlist__open(evlist);
2040 	if (err < 0)
2041 		goto out_error_open;
2042 
2043 	err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2044 	if (err < 0) {
2045 		fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
2046 		goto out_delete_evlist;
2047 	}
2048 
2049 	perf_evlist__enable(evlist);
2050 
2051 	if (forks)
2052 		perf_evlist__start_workload(evlist);
2053 
2054 	trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
2055 again:
2056 	before = trace->nr_events;
2057 
2058 	for (i = 0; i < evlist->nr_mmaps; i++) {
2059 		union perf_event *event;
2060 
2061 		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2062 			const u32 type = event->header.type;
2063 			tracepoint_handler handler;
2064 			struct perf_sample sample;
2065 
2066 			++trace->nr_events;
2067 
2068 			err = perf_evlist__parse_sample(evlist, event, &sample);
2069 			if (err) {
2070 				fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2071 				goto next_event;
2072 			}
2073 
2074 			if (!trace->full_time && trace->base_time == 0)
2075 				trace->base_time = sample.time;
2076 
2077 			if (type != PERF_RECORD_SAMPLE) {
2078 				trace__process_event(trace, trace->host, event, &sample);
2079 				continue;
2080 			}
2081 
2082 			evsel = perf_evlist__id2evsel(evlist, sample.id);
2083 			if (evsel == NULL) {
2084 				fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
2085 				goto next_event;
2086 			}
2087 
2088 			if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2089 			    sample.raw_data == NULL) {
2090 				fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2091 				       perf_evsel__name(evsel), sample.tid,
2092 				       sample.cpu, sample.raw_size);
2093 				goto next_event;
2094 			}
2095 
2096 			handler = evsel->handler;
2097 			handler(trace, evsel, event, &sample);
2098 next_event:
2099 			perf_evlist__mmap_consume(evlist, i);
2100 
2101 			if (interrupted)
2102 				goto out_disable;
2103 		}
2104 	}
2105 
2106 	if (trace->nr_events == before) {
2107 		int timeout = done ? 100 : -1;
2108 
2109 		if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
2110 			goto again;
2111 	} else {
2112 		goto again;
2113 	}
2114 
2115 out_disable:
2116 	perf_evlist__disable(evlist);
2117 
2118 	if (!err) {
2119 		if (trace->summary)
2120 			trace__fprintf_thread_summary(trace, trace->output);
2121 
2122 		if (trace->show_tool_stats) {
2123 			fprintf(trace->output, "Stats:\n "
2124 					       " vfs_getname : %" PRIu64 "\n"
2125 					       " proc_getname: %" PRIu64 "\n",
2126 				trace->stats.vfs_getname,
2127 				trace->stats.proc_getname);
2128 		}
2129 	}
2130 
2131 out_delete_evlist:
2132 	perf_evlist__delete(evlist);
2133 out:
2134 	trace->live = false;
2135 	return err;
2136 {
2137 	char errbuf[BUFSIZ];
2138 
2139 out_error_tp:
2140 	perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
2141 	goto out_error;
2142 
2143 out_error_open:
2144 	perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2145 
2146 out_error:
2147 	fprintf(trace->output, "%s\n", errbuf);
2148 	goto out_delete_evlist;
2149 }
2150 }
2151 
2152 static int trace__replay(struct trace *trace)
2153 {
2154 	const struct perf_evsel_str_handler handlers[] = {
2155 		{ "probe:vfs_getname",	     trace__vfs_getname, },
2156 	};
2157 	struct perf_data_file file = {
2158 		.path  = input_name,
2159 		.mode  = PERF_DATA_MODE_READ,
2160 	};
2161 	struct perf_session *session;
2162 	struct perf_evsel *evsel;
2163 	int err = -1;
2164 
2165 	trace->tool.sample	  = trace__process_sample;
2166 	trace->tool.mmap	  = perf_event__process_mmap;
2167 	trace->tool.mmap2	  = perf_event__process_mmap2;
2168 	trace->tool.comm	  = perf_event__process_comm;
2169 	trace->tool.exit	  = perf_event__process_exit;
2170 	trace->tool.fork	  = perf_event__process_fork;
2171 	trace->tool.attr	  = perf_event__process_attr;
2172 	trace->tool.tracing_data = perf_event__process_tracing_data;
2173 	trace->tool.build_id	  = perf_event__process_build_id;
2174 
2175 	trace->tool.ordered_samples = true;
2176 	trace->tool.ordering_requires_timestamps = true;
2177 
2178 	/* add tid to output */
2179 	trace->multiple_threads = true;
2180 
2181 	if (symbol__init() < 0)
2182 		return -1;
2183 
2184 	session = perf_session__new(&file, false, &trace->tool);
2185 	if (session == NULL)
2186 		return -ENOMEM;
2187 
2188 	trace->host = &session->machines.host;
2189 
2190 	err = perf_session__set_tracepoints_handlers(session, handlers);
2191 	if (err)
2192 		goto out;
2193 
2194 	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2195 						     "raw_syscalls:sys_enter");
2196 	/* older kernels have syscalls tp versus raw_syscalls */
2197 	if (evsel == NULL)
2198 		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2199 							     "syscalls:sys_enter");
2200 	if (evsel == NULL) {
2201 		pr_err("Data file does not have raw_syscalls:sys_enter event\n");
2202 		goto out;
2203 	}
2204 
2205 	if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2206 	    perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
2207 		pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2208 		goto out;
2209 	}
2210 
2211 	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2212 						     "raw_syscalls:sys_exit");
2213 	if (evsel == NULL)
2214 		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2215 							     "syscalls:sys_exit");
2216 	if (evsel == NULL) {
2217 		pr_err("Data file does not have raw_syscalls:sys_exit event\n");
2218 		goto out;
2219 	}
2220 
2221 	if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2222 	    perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
2223 		pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2224 		goto out;
2225 	}
2226 
2227 	err = parse_target_str(trace);
2228 	if (err != 0)
2229 		goto out;
2230 
2231 	setup_pager();
2232 
2233 	err = perf_session__process_events(session, &trace->tool);
2234 	if (err)
2235 		pr_err("Failed to process events, error %d", err);
2236 
2237 	else if (trace->summary)
2238 		trace__fprintf_thread_summary(trace, trace->output);
2239 
2240 out:
2241 	perf_session__delete(session);
2242 
2243 	return err;
2244 }
2245 
2246 static size_t trace__fprintf_threads_header(FILE *fp)
2247 {
2248 	size_t printed;
2249 
2250 	printed  = fprintf(fp, "\n Summary of events:\n\n");
2251 
2252 	return printed;
2253 }
2254 
2255 static size_t thread__dump_stats(struct thread_trace *ttrace,
2256 				 struct trace *trace, FILE *fp)
2257 {
2258 	struct stats *stats;
2259 	size_t printed = 0;
2260 	struct syscall *sc;
2261 	struct int_node *inode = intlist__first(ttrace->syscall_stats);
2262 
2263 	if (inode == NULL)
2264 		return 0;
2265 
2266 	printed += fprintf(fp, "\n");
2267 
2268 	printed += fprintf(fp, "   syscall            calls      min       avg       max      stddev\n");
2269 	printed += fprintf(fp, "                               (msec)    (msec)    (msec)        (%%)\n");
2270 	printed += fprintf(fp, "   --------------- -------- --------- --------- ---------     ------\n");
2271 
2272 	/* each int_node is a syscall */
2273 	while (inode) {
2274 		stats = inode->priv;
2275 		if (stats) {
2276 			double min = (double)(stats->min) / NSEC_PER_MSEC;
2277 			double max = (double)(stats->max) / NSEC_PER_MSEC;
2278 			double avg = avg_stats(stats);
2279 			double pct;
2280 			u64 n = (u64) stats->n;
2281 
2282 			pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2283 			avg /= NSEC_PER_MSEC;
2284 
2285 			sc = &trace->syscalls.table[inode->i];
2286 			printed += fprintf(fp, "   %-15s", sc->name);
2287 			printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2288 					   n, min, avg);
2289 			printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2290 		}
2291 
2292 		inode = intlist__next(inode);
2293 	}
2294 
2295 	printed += fprintf(fp, "\n\n");
2296 
2297 	return printed;
2298 }
2299 
2300 /* struct used to pass data to per-thread function */
2301 struct summary_data {
2302 	FILE *fp;
2303 	struct trace *trace;
2304 	size_t printed;
2305 };
2306 
2307 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2308 {
2309 	struct summary_data *data = priv;
2310 	FILE *fp = data->fp;
2311 	size_t printed = data->printed;
2312 	struct trace *trace = data->trace;
2313 	struct thread_trace *ttrace = thread->priv;
2314 	double ratio;
2315 
2316 	if (ttrace == NULL)
2317 		return 0;
2318 
2319 	ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2320 
2321 	printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2322 	printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2323 	printed += fprintf(fp, "%.1f%%", ratio);
2324 	printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2325 	printed += thread__dump_stats(ttrace, trace, fp);
2326 
2327 	data->printed += printed;
2328 
2329 	return 0;
2330 }
2331 
2332 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2333 {
2334 	struct summary_data data = {
2335 		.fp = fp,
2336 		.trace = trace
2337 	};
2338 	data.printed = trace__fprintf_threads_header(fp);
2339 
2340 	machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2341 
2342 	return data.printed;
2343 }
2344 
2345 static int trace__set_duration(const struct option *opt, const char *str,
2346 			       int unset __maybe_unused)
2347 {
2348 	struct trace *trace = opt->value;
2349 
2350 	trace->duration_filter = atof(str);
2351 	return 0;
2352 }
2353 
2354 static int trace__open_output(struct trace *trace, const char *filename)
2355 {
2356 	struct stat st;
2357 
2358 	if (!stat(filename, &st) && st.st_size) {
2359 		char oldname[PATH_MAX];
2360 
2361 		scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2362 		unlink(oldname);
2363 		rename(filename, oldname);
2364 	}
2365 
2366 	trace->output = fopen(filename, "w");
2367 
2368 	return trace->output == NULL ? -errno : 0;
2369 }
2370 
2371 static int parse_pagefaults(const struct option *opt, const char *str,
2372 			    int unset __maybe_unused)
2373 {
2374 	int *trace_pgfaults = opt->value;
2375 
2376 	if (strcmp(str, "all") == 0)
2377 		*trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2378 	else if (strcmp(str, "maj") == 0)
2379 		*trace_pgfaults |= TRACE_PFMAJ;
2380 	else if (strcmp(str, "min") == 0)
2381 		*trace_pgfaults |= TRACE_PFMIN;
2382 	else
2383 		return -1;
2384 
2385 	return 0;
2386 }
2387 
2388 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2389 {
2390 	const char * const trace_usage[] = {
2391 		"perf trace [<options>] [<command>]",
2392 		"perf trace [<options>] -- <command> [<options>]",
2393 		"perf trace record [<options>] [<command>]",
2394 		"perf trace record [<options>] -- <command> [<options>]",
2395 		NULL
2396 	};
2397 	struct trace trace = {
2398 		.audit = {
2399 			.machine = audit_detect_machine(),
2400 			.open_id = audit_name_to_syscall("open", trace.audit.machine),
2401 		},
2402 		.syscalls = {
2403 			. max = -1,
2404 		},
2405 		.opts = {
2406 			.target = {
2407 				.uid	   = UINT_MAX,
2408 				.uses_mmap = true,
2409 			},
2410 			.user_freq     = UINT_MAX,
2411 			.user_interval = ULLONG_MAX,
2412 			.no_buffering  = true,
2413 			.mmap_pages    = 1024,
2414 		},
2415 		.output = stdout,
2416 		.show_comm = true,
2417 	};
2418 	const char *output_name = NULL;
2419 	const char *ev_qualifier_str = NULL;
2420 	const struct option trace_options[] = {
2421 	OPT_BOOLEAN(0, "comm", &trace.show_comm,
2422 		    "show the thread COMM next to its id"),
2423 	OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2424 	OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2425 		    "list of events to trace"),
2426 	OPT_STRING('o', "output", &output_name, "file", "output file name"),
2427 	OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2428 	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2429 		    "trace events on existing process id"),
2430 	OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2431 		    "trace events on existing thread id"),
2432 	OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2433 		    "system-wide collection from all CPUs"),
2434 	OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2435 		    "list of cpus to monitor"),
2436 	OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2437 		    "child tasks do not inherit counters"),
2438 	OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2439 		     "number of mmap data pages",
2440 		     perf_evlist__parse_mmap_pages),
2441 	OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2442 		   "user to profile"),
2443 	OPT_CALLBACK(0, "duration", &trace, "float",
2444 		     "show only events with duration > N.M ms",
2445 		     trace__set_duration),
2446 	OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2447 	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2448 	OPT_BOOLEAN('T', "time", &trace.full_time,
2449 		    "Show full timestamp, not time relative to first start"),
2450 	OPT_BOOLEAN('s', "summary", &trace.summary_only,
2451 		    "Show only syscall summary with statistics"),
2452 	OPT_BOOLEAN('S', "with-summary", &trace.summary,
2453 		    "Show all syscalls and summary with statistics"),
2454 	OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2455 		     "Trace pagefaults", parse_pagefaults, "maj"),
2456 	OPT_END()
2457 	};
2458 	int err;
2459 	char bf[BUFSIZ];
2460 
2461 	if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2462 		return trace__record(argc-2, &argv[2]);
2463 
2464 	argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2465 
2466 	/* summary_only implies summary option, but don't overwrite summary if set */
2467 	if (trace.summary_only)
2468 		trace.summary = trace.summary_only;
2469 
2470 	if (trace.trace_pgfaults) {
2471 		trace.opts.sample_address = true;
2472 		trace.opts.sample_time = true;
2473 	}
2474 
2475 	if (output_name != NULL) {
2476 		err = trace__open_output(&trace, output_name);
2477 		if (err < 0) {
2478 			perror("failed to create output file");
2479 			goto out;
2480 		}
2481 	}
2482 
2483 	if (ev_qualifier_str != NULL) {
2484 		const char *s = ev_qualifier_str;
2485 
2486 		trace.not_ev_qualifier = *s == '!';
2487 		if (trace.not_ev_qualifier)
2488 			++s;
2489 		trace.ev_qualifier = strlist__new(true, s);
2490 		if (trace.ev_qualifier == NULL) {
2491 			fputs("Not enough memory to parse event qualifier",
2492 			      trace.output);
2493 			err = -ENOMEM;
2494 			goto out_close;
2495 		}
2496 	}
2497 
2498 	err = target__validate(&trace.opts.target);
2499 	if (err) {
2500 		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2501 		fprintf(trace.output, "%s", bf);
2502 		goto out_close;
2503 	}
2504 
2505 	err = target__parse_uid(&trace.opts.target);
2506 	if (err) {
2507 		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2508 		fprintf(trace.output, "%s", bf);
2509 		goto out_close;
2510 	}
2511 
2512 	if (!argc && target__none(&trace.opts.target))
2513 		trace.opts.target.system_wide = true;
2514 
2515 	if (input_name)
2516 		err = trace__replay(&trace);
2517 	else
2518 		err = trace__run(&trace, argc, argv);
2519 
2520 out_close:
2521 	if (output_name != NULL)
2522 		fclose(trace.output);
2523 out:
2524 	return err;
2525 }
2526