xref: /linux/tools/perf/builtin-trace.c (revision cf2f33a4e54096f90652cca3511fd6a456ea5abe)
1 /*
2  * builtin-trace.c
3  *
4  * Builtin 'trace' command:
5  *
6  * Display a continuously updated trace of any workload, CPU, specific PID,
7  * system wide, etc.  Default format is loosely strace like, but any other
8  * event may be specified using --event.
9  *
10  * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11  *
12  * Initially based on the 'trace' prototype by Thomas Gleixner:
13  *
14  * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15  *
16  * Released under the GPL v2. (and only v2, not any later version)
17  */
18 
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
21 #include "builtin.h"
22 #include "util/color.h"
23 #include "util/debug.h"
24 #include "util/evlist.h"
25 #include "util/exec_cmd.h"
26 #include "util/machine.h"
27 #include "util/session.h"
28 #include "util/thread.h"
29 #include "util/parse-options.h"
30 #include "util/strlist.h"
31 #include "util/intlist.h"
32 #include "util/thread_map.h"
33 #include "util/stat.h"
34 #include "trace-event.h"
35 #include "util/parse-events.h"
36 
37 #include <libaudit.h>
38 #include <stdlib.h>
39 #include <sys/mman.h>
40 #include <linux/futex.h>
41 
42 /* For older distros: */
43 #ifndef MAP_STACK
44 # define MAP_STACK		0x20000
45 #endif
46 
47 #ifndef MADV_HWPOISON
48 # define MADV_HWPOISON		100
49 
50 #endif
51 
52 #ifndef MADV_MERGEABLE
53 # define MADV_MERGEABLE		12
54 #endif
55 
56 #ifndef MADV_UNMERGEABLE
57 # define MADV_UNMERGEABLE	13
58 #endif
59 
60 #ifndef EFD_SEMAPHORE
61 # define EFD_SEMAPHORE		1
62 #endif
63 
64 #ifndef EFD_NONBLOCK
65 # define EFD_NONBLOCK		00004000
66 #endif
67 
68 #ifndef EFD_CLOEXEC
69 # define EFD_CLOEXEC		02000000
70 #endif
71 
72 #ifndef O_CLOEXEC
73 # define O_CLOEXEC		02000000
74 #endif
75 
76 #ifndef SOCK_DCCP
77 # define SOCK_DCCP		6
78 #endif
79 
80 #ifndef SOCK_CLOEXEC
81 # define SOCK_CLOEXEC		02000000
82 #endif
83 
84 #ifndef SOCK_NONBLOCK
85 # define SOCK_NONBLOCK		00004000
86 #endif
87 
88 #ifndef MSG_CMSG_CLOEXEC
89 # define MSG_CMSG_CLOEXEC	0x40000000
90 #endif
91 
92 #ifndef PERF_FLAG_FD_NO_GROUP
93 # define PERF_FLAG_FD_NO_GROUP		(1UL << 0)
94 #endif
95 
96 #ifndef PERF_FLAG_FD_OUTPUT
97 # define PERF_FLAG_FD_OUTPUT		(1UL << 1)
98 #endif
99 
100 #ifndef PERF_FLAG_PID_CGROUP
101 # define PERF_FLAG_PID_CGROUP		(1UL << 2) /* pid=cgroup id, per-cpu mode only */
102 #endif
103 
104 #ifndef PERF_FLAG_FD_CLOEXEC
105 # define PERF_FLAG_FD_CLOEXEC		(1UL << 3) /* O_CLOEXEC */
106 #endif
107 
108 
109 struct tp_field {
110 	int offset;
111 	union {
112 		u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
113 		void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
114 	};
115 };
116 
117 #define TP_UINT_FIELD(bits) \
118 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
119 { \
120 	u##bits value; \
121 	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
122 	return value;  \
123 }
124 
125 TP_UINT_FIELD(8);
126 TP_UINT_FIELD(16);
127 TP_UINT_FIELD(32);
128 TP_UINT_FIELD(64);
129 
130 #define TP_UINT_FIELD__SWAPPED(bits) \
131 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
132 { \
133 	u##bits value; \
134 	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
135 	return bswap_##bits(value);\
136 }
137 
138 TP_UINT_FIELD__SWAPPED(16);
139 TP_UINT_FIELD__SWAPPED(32);
140 TP_UINT_FIELD__SWAPPED(64);
141 
142 static int tp_field__init_uint(struct tp_field *field,
143 			       struct format_field *format_field,
144 			       bool needs_swap)
145 {
146 	field->offset = format_field->offset;
147 
148 	switch (format_field->size) {
149 	case 1:
150 		field->integer = tp_field__u8;
151 		break;
152 	case 2:
153 		field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
154 		break;
155 	case 4:
156 		field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
157 		break;
158 	case 8:
159 		field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
160 		break;
161 	default:
162 		return -1;
163 	}
164 
165 	return 0;
166 }
167 
168 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
169 {
170 	return sample->raw_data + field->offset;
171 }
172 
173 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
174 {
175 	field->offset = format_field->offset;
176 	field->pointer = tp_field__ptr;
177 	return 0;
178 }
179 
180 struct syscall_tp {
181 	struct tp_field id;
182 	union {
183 		struct tp_field args, ret;
184 	};
185 };
186 
187 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
188 					  struct tp_field *field,
189 					  const char *name)
190 {
191 	struct format_field *format_field = perf_evsel__field(evsel, name);
192 
193 	if (format_field == NULL)
194 		return -1;
195 
196 	return tp_field__init_uint(field, format_field, evsel->needs_swap);
197 }
198 
199 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
200 	({ struct syscall_tp *sc = evsel->priv;\
201 	   perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
202 
203 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
204 					 struct tp_field *field,
205 					 const char *name)
206 {
207 	struct format_field *format_field = perf_evsel__field(evsel, name);
208 
209 	if (format_field == NULL)
210 		return -1;
211 
212 	return tp_field__init_ptr(field, format_field);
213 }
214 
215 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
216 	({ struct syscall_tp *sc = evsel->priv;\
217 	   perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
218 
219 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
220 {
221 	zfree(&evsel->priv);
222 	perf_evsel__delete(evsel);
223 }
224 
225 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
226 {
227 	evsel->priv = malloc(sizeof(struct syscall_tp));
228 	if (evsel->priv != NULL) {
229 		if (perf_evsel__init_sc_tp_uint_field(evsel, id))
230 			goto out_delete;
231 
232 		evsel->handler = handler;
233 		return 0;
234 	}
235 
236 	return -ENOMEM;
237 
238 out_delete:
239 	zfree(&evsel->priv);
240 	return -ENOENT;
241 }
242 
243 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
244 {
245 	struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
246 
247 	/* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
248 	if (evsel == NULL)
249 		evsel = perf_evsel__newtp("syscalls", direction);
250 
251 	if (evsel) {
252 		if (perf_evsel__init_syscall_tp(evsel, handler))
253 			goto out_delete;
254 	}
255 
256 	return evsel;
257 
258 out_delete:
259 	perf_evsel__delete_priv(evsel);
260 	return NULL;
261 }
262 
263 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
264 	({ struct syscall_tp *fields = evsel->priv; \
265 	   fields->name.integer(&fields->name, sample); })
266 
267 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
268 	({ struct syscall_tp *fields = evsel->priv; \
269 	   fields->name.pointer(&fields->name, sample); })
270 
271 struct syscall_arg {
272 	unsigned long val;
273 	struct thread *thread;
274 	struct trace  *trace;
275 	void	      *parm;
276 	u8	      idx;
277 	u8	      mask;
278 };
279 
280 struct strarray {
281 	int	    offset;
282 	int	    nr_entries;
283 	const char **entries;
284 };
285 
286 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
287 	.nr_entries = ARRAY_SIZE(array), \
288 	.entries = array, \
289 }
290 
291 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
292 	.offset	    = off, \
293 	.nr_entries = ARRAY_SIZE(array), \
294 	.entries = array, \
295 }
296 
297 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
298 						const char *intfmt,
299 					        struct syscall_arg *arg)
300 {
301 	struct strarray *sa = arg->parm;
302 	int idx = arg->val - sa->offset;
303 
304 	if (idx < 0 || idx >= sa->nr_entries)
305 		return scnprintf(bf, size, intfmt, arg->val);
306 
307 	return scnprintf(bf, size, "%s", sa->entries[idx]);
308 }
309 
310 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
311 					      struct syscall_arg *arg)
312 {
313 	return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
314 }
315 
316 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
317 
318 #if defined(__i386__) || defined(__x86_64__)
319 /*
320  * FIXME: Make this available to all arches as soon as the ioctl beautifier
321  * 	  gets rewritten to support all arches.
322  */
323 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
324 						 struct syscall_arg *arg)
325 {
326 	return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
327 }
328 
329 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
330 #endif /* defined(__i386__) || defined(__x86_64__) */
331 
332 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
333 					struct syscall_arg *arg);
334 
335 #define SCA_FD syscall_arg__scnprintf_fd
336 
337 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
338 					   struct syscall_arg *arg)
339 {
340 	int fd = arg->val;
341 
342 	if (fd == AT_FDCWD)
343 		return scnprintf(bf, size, "CWD");
344 
345 	return syscall_arg__scnprintf_fd(bf, size, arg);
346 }
347 
348 #define SCA_FDAT syscall_arg__scnprintf_fd_at
349 
350 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
351 					      struct syscall_arg *arg);
352 
353 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
354 
355 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
356 					 struct syscall_arg *arg)
357 {
358 	return scnprintf(bf, size, "%#lx", arg->val);
359 }
360 
361 #define SCA_HEX syscall_arg__scnprintf_hex
362 
363 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
364 					 struct syscall_arg *arg)
365 {
366 	return scnprintf(bf, size, "%d", arg->val);
367 }
368 
369 #define SCA_INT syscall_arg__scnprintf_int
370 
371 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
372 					       struct syscall_arg *arg)
373 {
374 	int printed = 0, prot = arg->val;
375 
376 	if (prot == PROT_NONE)
377 		return scnprintf(bf, size, "NONE");
378 #define	P_MMAP_PROT(n) \
379 	if (prot & PROT_##n) { \
380 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
381 		prot &= ~PROT_##n; \
382 	}
383 
384 	P_MMAP_PROT(EXEC);
385 	P_MMAP_PROT(READ);
386 	P_MMAP_PROT(WRITE);
387 #ifdef PROT_SEM
388 	P_MMAP_PROT(SEM);
389 #endif
390 	P_MMAP_PROT(GROWSDOWN);
391 	P_MMAP_PROT(GROWSUP);
392 #undef P_MMAP_PROT
393 
394 	if (prot)
395 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
396 
397 	return printed;
398 }
399 
400 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
401 
402 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
403 						struct syscall_arg *arg)
404 {
405 	int printed = 0, flags = arg->val;
406 
407 #define	P_MMAP_FLAG(n) \
408 	if (flags & MAP_##n) { \
409 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
410 		flags &= ~MAP_##n; \
411 	}
412 
413 	P_MMAP_FLAG(SHARED);
414 	P_MMAP_FLAG(PRIVATE);
415 #ifdef MAP_32BIT
416 	P_MMAP_FLAG(32BIT);
417 #endif
418 	P_MMAP_FLAG(ANONYMOUS);
419 	P_MMAP_FLAG(DENYWRITE);
420 	P_MMAP_FLAG(EXECUTABLE);
421 	P_MMAP_FLAG(FILE);
422 	P_MMAP_FLAG(FIXED);
423 	P_MMAP_FLAG(GROWSDOWN);
424 #ifdef MAP_HUGETLB
425 	P_MMAP_FLAG(HUGETLB);
426 #endif
427 	P_MMAP_FLAG(LOCKED);
428 	P_MMAP_FLAG(NONBLOCK);
429 	P_MMAP_FLAG(NORESERVE);
430 	P_MMAP_FLAG(POPULATE);
431 	P_MMAP_FLAG(STACK);
432 #ifdef MAP_UNINITIALIZED
433 	P_MMAP_FLAG(UNINITIALIZED);
434 #endif
435 #undef P_MMAP_FLAG
436 
437 	if (flags)
438 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
439 
440 	return printed;
441 }
442 
443 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
444 
445 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
446 						  struct syscall_arg *arg)
447 {
448 	int printed = 0, flags = arg->val;
449 
450 #define P_MREMAP_FLAG(n) \
451 	if (flags & MREMAP_##n) { \
452 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
453 		flags &= ~MREMAP_##n; \
454 	}
455 
456 	P_MREMAP_FLAG(MAYMOVE);
457 #ifdef MREMAP_FIXED
458 	P_MREMAP_FLAG(FIXED);
459 #endif
460 #undef P_MREMAP_FLAG
461 
462 	if (flags)
463 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
464 
465 	return printed;
466 }
467 
468 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
469 
470 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
471 						      struct syscall_arg *arg)
472 {
473 	int behavior = arg->val;
474 
475 	switch (behavior) {
476 #define	P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
477 	P_MADV_BHV(NORMAL);
478 	P_MADV_BHV(RANDOM);
479 	P_MADV_BHV(SEQUENTIAL);
480 	P_MADV_BHV(WILLNEED);
481 	P_MADV_BHV(DONTNEED);
482 	P_MADV_BHV(REMOVE);
483 	P_MADV_BHV(DONTFORK);
484 	P_MADV_BHV(DOFORK);
485 	P_MADV_BHV(HWPOISON);
486 #ifdef MADV_SOFT_OFFLINE
487 	P_MADV_BHV(SOFT_OFFLINE);
488 #endif
489 	P_MADV_BHV(MERGEABLE);
490 	P_MADV_BHV(UNMERGEABLE);
491 #ifdef MADV_HUGEPAGE
492 	P_MADV_BHV(HUGEPAGE);
493 #endif
494 #ifdef MADV_NOHUGEPAGE
495 	P_MADV_BHV(NOHUGEPAGE);
496 #endif
497 #ifdef MADV_DONTDUMP
498 	P_MADV_BHV(DONTDUMP);
499 #endif
500 #ifdef MADV_DODUMP
501 	P_MADV_BHV(DODUMP);
502 #endif
503 #undef P_MADV_PHV
504 	default: break;
505 	}
506 
507 	return scnprintf(bf, size, "%#x", behavior);
508 }
509 
510 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
511 
512 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
513 					   struct syscall_arg *arg)
514 {
515 	int printed = 0, op = arg->val;
516 
517 	if (op == 0)
518 		return scnprintf(bf, size, "NONE");
519 #define	P_CMD(cmd) \
520 	if ((op & LOCK_##cmd) == LOCK_##cmd) { \
521 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
522 		op &= ~LOCK_##cmd; \
523 	}
524 
525 	P_CMD(SH);
526 	P_CMD(EX);
527 	P_CMD(NB);
528 	P_CMD(UN);
529 	P_CMD(MAND);
530 	P_CMD(RW);
531 	P_CMD(READ);
532 	P_CMD(WRITE);
533 #undef P_OP
534 
535 	if (op)
536 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
537 
538 	return printed;
539 }
540 
541 #define SCA_FLOCK syscall_arg__scnprintf_flock
542 
543 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
544 {
545 	enum syscall_futex_args {
546 		SCF_UADDR   = (1 << 0),
547 		SCF_OP	    = (1 << 1),
548 		SCF_VAL	    = (1 << 2),
549 		SCF_TIMEOUT = (1 << 3),
550 		SCF_UADDR2  = (1 << 4),
551 		SCF_VAL3    = (1 << 5),
552 	};
553 	int op = arg->val;
554 	int cmd = op & FUTEX_CMD_MASK;
555 	size_t printed = 0;
556 
557 	switch (cmd) {
558 #define	P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
559 	P_FUTEX_OP(WAIT);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
560 	P_FUTEX_OP(WAKE);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
561 	P_FUTEX_OP(FD);		    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
562 	P_FUTEX_OP(REQUEUE);	    arg->mask |= SCF_VAL3|SCF_TIMEOUT;	          break;
563 	P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;			  break;
564 	P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;			  break;
565 	P_FUTEX_OP(WAKE_OP);							  break;
566 	P_FUTEX_OP(LOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
567 	P_FUTEX_OP(UNLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
568 	P_FUTEX_OP(TRYLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
569 	P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;			  break;
570 	P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;			  break;
571 	P_FUTEX_OP(WAIT_REQUEUE_PI);						  break;
572 	default: printed = scnprintf(bf, size, "%#x", cmd);			  break;
573 	}
574 
575 	if (op & FUTEX_PRIVATE_FLAG)
576 		printed += scnprintf(bf + printed, size - printed, "|PRIV");
577 
578 	if (op & FUTEX_CLOCK_REALTIME)
579 		printed += scnprintf(bf + printed, size - printed, "|CLKRT");
580 
581 	return printed;
582 }
583 
584 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
585 
586 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
587 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
588 
589 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
590 static DEFINE_STRARRAY(itimers);
591 
592 static const char *keyctl_options[] = {
593 	"GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
594 	"SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
595 	"INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
596 	"ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
597 	"INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
598 };
599 static DEFINE_STRARRAY(keyctl_options);
600 
601 static const char *whences[] = { "SET", "CUR", "END",
602 #ifdef SEEK_DATA
603 "DATA",
604 #endif
605 #ifdef SEEK_HOLE
606 "HOLE",
607 #endif
608 };
609 static DEFINE_STRARRAY(whences);
610 
611 static const char *fcntl_cmds[] = {
612 	"DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
613 	"SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
614 	"F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
615 	"F_GETOWNER_UIDS",
616 };
617 static DEFINE_STRARRAY(fcntl_cmds);
618 
619 static const char *rlimit_resources[] = {
620 	"CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
621 	"MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
622 	"RTTIME",
623 };
624 static DEFINE_STRARRAY(rlimit_resources);
625 
626 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
627 static DEFINE_STRARRAY(sighow);
628 
629 static const char *clockid[] = {
630 	"REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
631 	"MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
632 	"REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
633 };
634 static DEFINE_STRARRAY(clockid);
635 
636 static const char *socket_families[] = {
637 	"UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
638 	"BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
639 	"SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
640 	"RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
641 	"BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
642 	"ALG", "NFC", "VSOCK",
643 };
644 static DEFINE_STRARRAY(socket_families);
645 
646 #ifndef SOCK_TYPE_MASK
647 #define SOCK_TYPE_MASK 0xf
648 #endif
649 
650 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
651 						      struct syscall_arg *arg)
652 {
653 	size_t printed;
654 	int type = arg->val,
655 	    flags = type & ~SOCK_TYPE_MASK;
656 
657 	type &= SOCK_TYPE_MASK;
658 	/*
659  	 * Can't use a strarray, MIPS may override for ABI reasons.
660  	 */
661 	switch (type) {
662 #define	P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
663 	P_SK_TYPE(STREAM);
664 	P_SK_TYPE(DGRAM);
665 	P_SK_TYPE(RAW);
666 	P_SK_TYPE(RDM);
667 	P_SK_TYPE(SEQPACKET);
668 	P_SK_TYPE(DCCP);
669 	P_SK_TYPE(PACKET);
670 #undef P_SK_TYPE
671 	default:
672 		printed = scnprintf(bf, size, "%#x", type);
673 	}
674 
675 #define	P_SK_FLAG(n) \
676 	if (flags & SOCK_##n) { \
677 		printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
678 		flags &= ~SOCK_##n; \
679 	}
680 
681 	P_SK_FLAG(CLOEXEC);
682 	P_SK_FLAG(NONBLOCK);
683 #undef P_SK_FLAG
684 
685 	if (flags)
686 		printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
687 
688 	return printed;
689 }
690 
691 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
692 
693 #ifndef MSG_PROBE
694 #define MSG_PROBE	     0x10
695 #endif
696 #ifndef MSG_WAITFORONE
697 #define MSG_WAITFORONE	0x10000
698 #endif
699 #ifndef MSG_SENDPAGE_NOTLAST
700 #define MSG_SENDPAGE_NOTLAST 0x20000
701 #endif
702 #ifndef MSG_FASTOPEN
703 #define MSG_FASTOPEN	     0x20000000
704 #endif
705 
706 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
707 					       struct syscall_arg *arg)
708 {
709 	int printed = 0, flags = arg->val;
710 
711 	if (flags == 0)
712 		return scnprintf(bf, size, "NONE");
713 #define	P_MSG_FLAG(n) \
714 	if (flags & MSG_##n) { \
715 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
716 		flags &= ~MSG_##n; \
717 	}
718 
719 	P_MSG_FLAG(OOB);
720 	P_MSG_FLAG(PEEK);
721 	P_MSG_FLAG(DONTROUTE);
722 	P_MSG_FLAG(TRYHARD);
723 	P_MSG_FLAG(CTRUNC);
724 	P_MSG_FLAG(PROBE);
725 	P_MSG_FLAG(TRUNC);
726 	P_MSG_FLAG(DONTWAIT);
727 	P_MSG_FLAG(EOR);
728 	P_MSG_FLAG(WAITALL);
729 	P_MSG_FLAG(FIN);
730 	P_MSG_FLAG(SYN);
731 	P_MSG_FLAG(CONFIRM);
732 	P_MSG_FLAG(RST);
733 	P_MSG_FLAG(ERRQUEUE);
734 	P_MSG_FLAG(NOSIGNAL);
735 	P_MSG_FLAG(MORE);
736 	P_MSG_FLAG(WAITFORONE);
737 	P_MSG_FLAG(SENDPAGE_NOTLAST);
738 	P_MSG_FLAG(FASTOPEN);
739 	P_MSG_FLAG(CMSG_CLOEXEC);
740 #undef P_MSG_FLAG
741 
742 	if (flags)
743 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
744 
745 	return printed;
746 }
747 
748 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
749 
750 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
751 						 struct syscall_arg *arg)
752 {
753 	size_t printed = 0;
754 	int mode = arg->val;
755 
756 	if (mode == F_OK) /* 0 */
757 		return scnprintf(bf, size, "F");
758 #define	P_MODE(n) \
759 	if (mode & n##_OK) { \
760 		printed += scnprintf(bf + printed, size - printed, "%s", #n); \
761 		mode &= ~n##_OK; \
762 	}
763 
764 	P_MODE(R);
765 	P_MODE(W);
766 	P_MODE(X);
767 #undef P_MODE
768 
769 	if (mode)
770 		printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
771 
772 	return printed;
773 }
774 
775 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
776 
777 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
778 					      struct syscall_arg *arg);
779 
780 #define SCA_FILENAME syscall_arg__scnprintf_filename
781 
782 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
783 					       struct syscall_arg *arg)
784 {
785 	int printed = 0, flags = arg->val;
786 
787 	if (!(flags & O_CREAT))
788 		arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
789 
790 	if (flags == 0)
791 		return scnprintf(bf, size, "RDONLY");
792 #define	P_FLAG(n) \
793 	if (flags & O_##n) { \
794 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
795 		flags &= ~O_##n; \
796 	}
797 
798 	P_FLAG(APPEND);
799 	P_FLAG(ASYNC);
800 	P_FLAG(CLOEXEC);
801 	P_FLAG(CREAT);
802 	P_FLAG(DIRECT);
803 	P_FLAG(DIRECTORY);
804 	P_FLAG(EXCL);
805 	P_FLAG(LARGEFILE);
806 	P_FLAG(NOATIME);
807 	P_FLAG(NOCTTY);
808 #ifdef O_NONBLOCK
809 	P_FLAG(NONBLOCK);
810 #elif O_NDELAY
811 	P_FLAG(NDELAY);
812 #endif
813 #ifdef O_PATH
814 	P_FLAG(PATH);
815 #endif
816 	P_FLAG(RDWR);
817 #ifdef O_DSYNC
818 	if ((flags & O_SYNC) == O_SYNC)
819 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
820 	else {
821 		P_FLAG(DSYNC);
822 	}
823 #else
824 	P_FLAG(SYNC);
825 #endif
826 	P_FLAG(TRUNC);
827 	P_FLAG(WRONLY);
828 #undef P_FLAG
829 
830 	if (flags)
831 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
832 
833 	return printed;
834 }
835 
836 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
837 
838 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
839 						struct syscall_arg *arg)
840 {
841 	int printed = 0, flags = arg->val;
842 
843 	if (flags == 0)
844 		return 0;
845 
846 #define	P_FLAG(n) \
847 	if (flags & PERF_FLAG_##n) { \
848 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
849 		flags &= ~PERF_FLAG_##n; \
850 	}
851 
852 	P_FLAG(FD_NO_GROUP);
853 	P_FLAG(FD_OUTPUT);
854 	P_FLAG(PID_CGROUP);
855 	P_FLAG(FD_CLOEXEC);
856 #undef P_FLAG
857 
858 	if (flags)
859 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
860 
861 	return printed;
862 }
863 
864 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
865 
866 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
867 						   struct syscall_arg *arg)
868 {
869 	int printed = 0, flags = arg->val;
870 
871 	if (flags == 0)
872 		return scnprintf(bf, size, "NONE");
873 #define	P_FLAG(n) \
874 	if (flags & EFD_##n) { \
875 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
876 		flags &= ~EFD_##n; \
877 	}
878 
879 	P_FLAG(SEMAPHORE);
880 	P_FLAG(CLOEXEC);
881 	P_FLAG(NONBLOCK);
882 #undef P_FLAG
883 
884 	if (flags)
885 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
886 
887 	return printed;
888 }
889 
890 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
891 
892 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
893 						struct syscall_arg *arg)
894 {
895 	int printed = 0, flags = arg->val;
896 
897 #define	P_FLAG(n) \
898 	if (flags & O_##n) { \
899 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
900 		flags &= ~O_##n; \
901 	}
902 
903 	P_FLAG(CLOEXEC);
904 	P_FLAG(NONBLOCK);
905 #undef P_FLAG
906 
907 	if (flags)
908 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
909 
910 	return printed;
911 }
912 
913 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
914 
915 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
916 {
917 	int sig = arg->val;
918 
919 	switch (sig) {
920 #define	P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
921 	P_SIGNUM(HUP);
922 	P_SIGNUM(INT);
923 	P_SIGNUM(QUIT);
924 	P_SIGNUM(ILL);
925 	P_SIGNUM(TRAP);
926 	P_SIGNUM(ABRT);
927 	P_SIGNUM(BUS);
928 	P_SIGNUM(FPE);
929 	P_SIGNUM(KILL);
930 	P_SIGNUM(USR1);
931 	P_SIGNUM(SEGV);
932 	P_SIGNUM(USR2);
933 	P_SIGNUM(PIPE);
934 	P_SIGNUM(ALRM);
935 	P_SIGNUM(TERM);
936 	P_SIGNUM(CHLD);
937 	P_SIGNUM(CONT);
938 	P_SIGNUM(STOP);
939 	P_SIGNUM(TSTP);
940 	P_SIGNUM(TTIN);
941 	P_SIGNUM(TTOU);
942 	P_SIGNUM(URG);
943 	P_SIGNUM(XCPU);
944 	P_SIGNUM(XFSZ);
945 	P_SIGNUM(VTALRM);
946 	P_SIGNUM(PROF);
947 	P_SIGNUM(WINCH);
948 	P_SIGNUM(IO);
949 	P_SIGNUM(PWR);
950 	P_SIGNUM(SYS);
951 #ifdef SIGEMT
952 	P_SIGNUM(EMT);
953 #endif
954 #ifdef SIGSTKFLT
955 	P_SIGNUM(STKFLT);
956 #endif
957 #ifdef SIGSWI
958 	P_SIGNUM(SWI);
959 #endif
960 	default: break;
961 	}
962 
963 	return scnprintf(bf, size, "%#x", sig);
964 }
965 
966 #define SCA_SIGNUM syscall_arg__scnprintf_signum
967 
968 #if defined(__i386__) || defined(__x86_64__)
969 /*
970  * FIXME: Make this available to all arches.
971  */
972 #define TCGETS		0x5401
973 
974 static const char *tioctls[] = {
975 	"TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
976 	"TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
977 	"TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
978 	"TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
979 	"TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
980 	"TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
981 	"TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
982 	"TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
983 	"TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
984 	"TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
985 	"TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
986 	[0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
987 	"TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
988 	"TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
989 	"TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
990 };
991 
992 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
993 #endif /* defined(__i386__) || defined(__x86_64__) */
994 
995 #define STRARRAY(arg, name, array) \
996 	  .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
997 	  .arg_parm	 = { [arg] = &strarray__##array, }
998 
999 static struct syscall_fmt {
1000 	const char *name;
1001 	const char *alias;
1002 	size_t	   (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1003 	void	   *arg_parm[6];
1004 	bool	   errmsg;
1005 	bool	   timeout;
1006 	bool	   hexret;
1007 } syscall_fmts[] = {
1008 	{ .name	    = "access",	    .errmsg = true,
1009 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1010 			     [1] = SCA_ACCMODE,  /* mode */ }, },
1011 	{ .name	    = "arch_prctl", .errmsg = true, .alias = "prctl", },
1012 	{ .name	    = "brk",	    .hexret = true,
1013 	  .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
1014 	{ .name	    = "chdir",	    .errmsg = true,
1015 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1016 	{ .name	    = "chmod",	    .errmsg = true,
1017 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1018 	{ .name	    = "chroot",	    .errmsg = true,
1019 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1020 	{ .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
1021 	{ .name	    = "close",	    .errmsg = true,
1022 	  .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
1023 	{ .name	    = "connect",    .errmsg = true, },
1024 	{ .name	    = "creat",	    .errmsg = true,
1025 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1026 	{ .name	    = "dup",	    .errmsg = true,
1027 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1028 	{ .name	    = "dup2",	    .errmsg = true,
1029 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1030 	{ .name	    = "dup3",	    .errmsg = true,
1031 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1032 	{ .name	    = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1033 	{ .name	    = "eventfd2",   .errmsg = true,
1034 	  .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1035 	{ .name	    = "faccessat",  .errmsg = true,
1036 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1037 			     [1] = SCA_FILENAME, /* filename */ }, },
1038 	{ .name	    = "fadvise64",  .errmsg = true,
1039 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1040 	{ .name	    = "fallocate",  .errmsg = true,
1041 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1042 	{ .name	    = "fchdir",	    .errmsg = true,
1043 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1044 	{ .name	    = "fchmod",	    .errmsg = true,
1045 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1046 	{ .name	    = "fchmodat",   .errmsg = true,
1047 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1048 			     [1] = SCA_FILENAME, /* filename */ }, },
1049 	{ .name	    = "fchown",	    .errmsg = true,
1050 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1051 	{ .name	    = "fchownat",   .errmsg = true,
1052 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1053 			     [1] = SCA_FILENAME, /* filename */ }, },
1054 	{ .name	    = "fcntl",	    .errmsg = true,
1055 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1056 			     [1] = SCA_STRARRAY, /* cmd */ },
1057 	  .arg_parm	 = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1058 	{ .name	    = "fdatasync",  .errmsg = true,
1059 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1060 	{ .name	    = "flock",	    .errmsg = true,
1061 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1062 			     [1] = SCA_FLOCK, /* cmd */ }, },
1063 	{ .name	    = "fsetxattr",  .errmsg = true,
1064 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1065 	{ .name	    = "fstat",	    .errmsg = true, .alias = "newfstat",
1066 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1067 	{ .name	    = "fstatat",    .errmsg = true, .alias = "newfstatat",
1068 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1069 			     [1] = SCA_FILENAME, /* filename */ }, },
1070 	{ .name	    = "fstatfs",    .errmsg = true,
1071 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1072 	{ .name	    = "fsync",    .errmsg = true,
1073 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1074 	{ .name	    = "ftruncate", .errmsg = true,
1075 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1076 	{ .name	    = "futex",	    .errmsg = true,
1077 	  .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1078 	{ .name	    = "futimesat", .errmsg = true,
1079 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1080 			     [1] = SCA_FILENAME, /* filename */ }, },
1081 	{ .name	    = "getdents",   .errmsg = true,
1082 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1083 	{ .name	    = "getdents64", .errmsg = true,
1084 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1085 	{ .name	    = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1086 	{ .name	    = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1087 	{ .name	    = "getxattr",    .errmsg = true,
1088 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1089 	{ .name	    = "inotify_add_watch",	    .errmsg = true,
1090 	  .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
1091 	{ .name	    = "ioctl",	    .errmsg = true,
1092 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1093 #if defined(__i386__) || defined(__x86_64__)
1094 /*
1095  * FIXME: Make this available to all arches.
1096  */
1097 			     [1] = SCA_STRHEXARRAY, /* cmd */
1098 			     [2] = SCA_HEX, /* arg */ },
1099 	  .arg_parm	 = { [1] = &strarray__tioctls, /* cmd */ }, },
1100 #else
1101 			     [2] = SCA_HEX, /* arg */ }, },
1102 #endif
1103 	{ .name	    = "keyctl",	    .errmsg = true, STRARRAY(0, option, keyctl_options), },
1104 	{ .name	    = "kill",	    .errmsg = true,
1105 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1106 	{ .name	    = "lchown",    .errmsg = true,
1107 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1108 	{ .name	    = "lgetxattr",  .errmsg = true,
1109 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1110 	{ .name	    = "linkat",	    .errmsg = true,
1111 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1112 	{ .name	    = "listxattr",  .errmsg = true,
1113 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1114 	{ .name	    = "llistxattr", .errmsg = true,
1115 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1116 	{ .name	    = "lremovexattr",  .errmsg = true,
1117 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1118 	{ .name	    = "lseek",	    .errmsg = true,
1119 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1120 			     [2] = SCA_STRARRAY, /* whence */ },
1121 	  .arg_parm	 = { [2] = &strarray__whences, /* whence */ }, },
1122 	{ .name	    = "lsetxattr",  .errmsg = true,
1123 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1124 	{ .name	    = "lstat",	    .errmsg = true, .alias = "newlstat",
1125 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1126 	{ .name	    = "lsxattr",    .errmsg = true,
1127 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1128 	{ .name     = "madvise",    .errmsg = true,
1129 	  .arg_scnprintf = { [0] = SCA_HEX,	 /* start */
1130 			     [2] = SCA_MADV_BHV, /* behavior */ }, },
1131 	{ .name	    = "mkdir",    .errmsg = true,
1132 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1133 	{ .name	    = "mkdirat",    .errmsg = true,
1134 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1135 			     [1] = SCA_FILENAME, /* pathname */ }, },
1136 	{ .name	    = "mknod",      .errmsg = true,
1137 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1138 	{ .name	    = "mknodat",    .errmsg = true,
1139 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1140 			     [1] = SCA_FILENAME, /* filename */ }, },
1141 	{ .name	    = "mlock",	    .errmsg = true,
1142 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1143 	{ .name	    = "mlockall",   .errmsg = true,
1144 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1145 	{ .name	    = "mmap",	    .hexret = true,
1146 	  .arg_scnprintf = { [0] = SCA_HEX,	  /* addr */
1147 			     [2] = SCA_MMAP_PROT, /* prot */
1148 			     [3] = SCA_MMAP_FLAGS, /* flags */
1149 			     [4] = SCA_FD, 	  /* fd */ }, },
1150 	{ .name	    = "mprotect",   .errmsg = true,
1151 	  .arg_scnprintf = { [0] = SCA_HEX, /* start */
1152 			     [2] = SCA_MMAP_PROT, /* prot */ }, },
1153 	{ .name	    = "mq_unlink", .errmsg = true,
1154 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
1155 	{ .name	    = "mremap",	    .hexret = true,
1156 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1157 			     [3] = SCA_MREMAP_FLAGS, /* flags */
1158 			     [4] = SCA_HEX, /* new_addr */ }, },
1159 	{ .name	    = "munlock",    .errmsg = true,
1160 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1161 	{ .name	    = "munmap",	    .errmsg = true,
1162 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1163 	{ .name	    = "name_to_handle_at", .errmsg = true,
1164 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1165 	{ .name	    = "newfstatat", .errmsg = true,
1166 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1167 			     [1] = SCA_FILENAME, /* filename */ }, },
1168 	{ .name	    = "open",	    .errmsg = true,
1169 	  .arg_scnprintf = { [0] = SCA_FILENAME,   /* filename */
1170 			     [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1171 	{ .name	    = "open_by_handle_at", .errmsg = true,
1172 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1173 			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1174 	{ .name	    = "openat",	    .errmsg = true,
1175 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1176 			     [1] = SCA_FILENAME, /* filename */
1177 			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1178 	{ .name	    = "perf_event_open", .errmsg = true,
1179 	  .arg_scnprintf = { [1] = SCA_INT, /* pid */
1180 			     [2] = SCA_INT, /* cpu */
1181 			     [3] = SCA_FD,  /* group_fd */
1182 			     [4] = SCA_PERF_FLAGS,  /* flags */ }, },
1183 	{ .name	    = "pipe2",	    .errmsg = true,
1184 	  .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1185 	{ .name	    = "poll",	    .errmsg = true, .timeout = true, },
1186 	{ .name	    = "ppoll",	    .errmsg = true, .timeout = true, },
1187 	{ .name	    = "pread",	    .errmsg = true, .alias = "pread64",
1188 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1189 	{ .name	    = "preadv",	    .errmsg = true, .alias = "pread",
1190 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1191 	{ .name	    = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1192 	{ .name	    = "pwrite",	    .errmsg = true, .alias = "pwrite64",
1193 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1194 	{ .name	    = "pwritev",    .errmsg = true,
1195 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1196 	{ .name	    = "read",	    .errmsg = true,
1197 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1198 	{ .name	    = "readlink",   .errmsg = true,
1199 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1200 	{ .name	    = "readlinkat", .errmsg = true,
1201 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1202 			     [1] = SCA_FILENAME, /* pathname */ }, },
1203 	{ .name	    = "readv",	    .errmsg = true,
1204 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1205 	{ .name	    = "recvfrom",   .errmsg = true,
1206 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1207 			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
1208 	{ .name	    = "recvmmsg",   .errmsg = true,
1209 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1210 			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
1211 	{ .name	    = "recvmsg",    .errmsg = true,
1212 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1213 			     [2] = SCA_MSG_FLAGS, /* flags */ }, },
1214 	{ .name	    = "removexattr", .errmsg = true,
1215 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1216 	{ .name	    = "renameat",   .errmsg = true,
1217 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1218 	{ .name	    = "rmdir",    .errmsg = true,
1219 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1220 	{ .name	    = "rt_sigaction", .errmsg = true,
1221 	  .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1222 	{ .name	    = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1223 	{ .name	    = "rt_sigqueueinfo", .errmsg = true,
1224 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1225 	{ .name	    = "rt_tgsigqueueinfo", .errmsg = true,
1226 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1227 	{ .name	    = "select",	    .errmsg = true, .timeout = true, },
1228 	{ .name	    = "sendmmsg",    .errmsg = true,
1229 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1230 			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
1231 	{ .name	    = "sendmsg",    .errmsg = true,
1232 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1233 			     [2] = SCA_MSG_FLAGS, /* flags */ }, },
1234 	{ .name	    = "sendto",	    .errmsg = true,
1235 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1236 			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
1237 	{ .name	    = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1238 	{ .name	    = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1239 	{ .name	    = "setxattr",   .errmsg = true,
1240 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1241 	{ .name	    = "shutdown",   .errmsg = true,
1242 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1243 	{ .name	    = "socket",	    .errmsg = true,
1244 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1245 			     [1] = SCA_SK_TYPE, /* type */ },
1246 	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
1247 	{ .name	    = "socketpair", .errmsg = true,
1248 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1249 			     [1] = SCA_SK_TYPE, /* type */ },
1250 	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
1251 	{ .name	    = "stat",	    .errmsg = true, .alias = "newstat",
1252 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1253 	{ .name	    = "statfs",	    .errmsg = true,
1254 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1255 	{ .name	    = "swapoff",    .errmsg = true,
1256 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1257 	{ .name	    = "swapon",	    .errmsg = true,
1258 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1259 	{ .name	    = "symlinkat",  .errmsg = true,
1260 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1261 	{ .name	    = "tgkill",	    .errmsg = true,
1262 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1263 	{ .name	    = "tkill",	    .errmsg = true,
1264 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1265 	{ .name	    = "truncate",   .errmsg = true,
1266 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1267 	{ .name	    = "uname",	    .errmsg = true, .alias = "newuname", },
1268 	{ .name	    = "unlinkat",   .errmsg = true,
1269 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1270 			     [1] = SCA_FILENAME, /* pathname */ }, },
1271 	{ .name	    = "utime",  .errmsg = true,
1272 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1273 	{ .name	    = "utimensat",  .errmsg = true,
1274 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
1275 			     [1] = SCA_FILENAME, /* filename */ }, },
1276 	{ .name	    = "utimes",  .errmsg = true,
1277 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1278 	{ .name	    = "vmsplice",  .errmsg = true,
1279 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1280 	{ .name	    = "write",	    .errmsg = true,
1281 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1282 	{ .name	    = "writev",	    .errmsg = true,
1283 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1284 };
1285 
1286 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1287 {
1288 	const struct syscall_fmt *fmt = fmtp;
1289 	return strcmp(name, fmt->name);
1290 }
1291 
1292 static struct syscall_fmt *syscall_fmt__find(const char *name)
1293 {
1294 	const int nmemb = ARRAY_SIZE(syscall_fmts);
1295 	return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1296 }
1297 
1298 struct syscall {
1299 	struct event_format *tp_format;
1300 	int		    nr_args;
1301 	struct format_field *args;
1302 	const char	    *name;
1303 	bool		    is_exit;
1304 	struct syscall_fmt  *fmt;
1305 	size_t		    (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1306 	void		    **arg_parm;
1307 };
1308 
1309 static size_t fprintf_duration(unsigned long t, FILE *fp)
1310 {
1311 	double duration = (double)t / NSEC_PER_MSEC;
1312 	size_t printed = fprintf(fp, "(");
1313 
1314 	if (duration >= 1.0)
1315 		printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1316 	else if (duration >= 0.01)
1317 		printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1318 	else
1319 		printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1320 	return printed + fprintf(fp, "): ");
1321 }
1322 
1323 /**
1324  * filename.ptr: The filename char pointer that will be vfs_getname'd
1325  * filename.entry_str_pos: Where to insert the string translated from
1326  *                         filename.ptr by the vfs_getname tracepoint/kprobe.
1327  */
1328 struct thread_trace {
1329 	u64		  entry_time;
1330 	u64		  exit_time;
1331 	bool		  entry_pending;
1332 	unsigned long	  nr_events;
1333 	unsigned long	  pfmaj, pfmin;
1334 	char		  *entry_str;
1335 	double		  runtime_ms;
1336         struct {
1337 		unsigned long ptr;
1338 		short int     entry_str_pos;
1339 		bool	      pending_open;
1340 		unsigned int  namelen;
1341 		char	      *name;
1342 	} filename;
1343 	struct {
1344 		int	  max;
1345 		char	  **table;
1346 	} paths;
1347 
1348 	struct intlist *syscall_stats;
1349 };
1350 
1351 static struct thread_trace *thread_trace__new(void)
1352 {
1353 	struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1354 
1355 	if (ttrace)
1356 		ttrace->paths.max = -1;
1357 
1358 	ttrace->syscall_stats = intlist__new(NULL);
1359 
1360 	return ttrace;
1361 }
1362 
1363 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1364 {
1365 	struct thread_trace *ttrace;
1366 
1367 	if (thread == NULL)
1368 		goto fail;
1369 
1370 	if (thread__priv(thread) == NULL)
1371 		thread__set_priv(thread, thread_trace__new());
1372 
1373 	if (thread__priv(thread) == NULL)
1374 		goto fail;
1375 
1376 	ttrace = thread__priv(thread);
1377 	++ttrace->nr_events;
1378 
1379 	return ttrace;
1380 fail:
1381 	color_fprintf(fp, PERF_COLOR_RED,
1382 		      "WARNING: not enough memory, dropping samples!\n");
1383 	return NULL;
1384 }
1385 
1386 #define TRACE_PFMAJ		(1 << 0)
1387 #define TRACE_PFMIN		(1 << 1)
1388 
1389 static const size_t trace__entry_str_size = 2048;
1390 
1391 struct trace {
1392 	struct perf_tool	tool;
1393 	struct {
1394 		int		machine;
1395 		int		open_id;
1396 	}			audit;
1397 	struct {
1398 		int		max;
1399 		struct syscall  *table;
1400 		struct {
1401 			struct perf_evsel *sys_enter,
1402 					  *sys_exit;
1403 		}		events;
1404 	} syscalls;
1405 	struct record_opts	opts;
1406 	struct perf_evlist	*evlist;
1407 	struct machine		*host;
1408 	struct thread		*current;
1409 	u64			base_time;
1410 	FILE			*output;
1411 	unsigned long		nr_events;
1412 	struct strlist		*ev_qualifier;
1413 	struct {
1414 		size_t		nr;
1415 		int		*entries;
1416 	}			ev_qualifier_ids;
1417 	struct intlist		*tid_list;
1418 	struct intlist		*pid_list;
1419 	struct {
1420 		size_t		nr;
1421 		pid_t		*entries;
1422 	}			filter_pids;
1423 	double			duration_filter;
1424 	double			runtime_ms;
1425 	struct {
1426 		u64		vfs_getname,
1427 				proc_getname;
1428 	} stats;
1429 	bool			not_ev_qualifier;
1430 	bool			live;
1431 	bool			full_time;
1432 	bool			sched;
1433 	bool			multiple_threads;
1434 	bool			summary;
1435 	bool			summary_only;
1436 	bool			show_comm;
1437 	bool			show_tool_stats;
1438 	bool			trace_syscalls;
1439 	bool			force;
1440 	bool			vfs_getname;
1441 	int			trace_pgfaults;
1442 };
1443 
1444 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1445 {
1446 	struct thread_trace *ttrace = thread__priv(thread);
1447 
1448 	if (fd > ttrace->paths.max) {
1449 		char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1450 
1451 		if (npath == NULL)
1452 			return -1;
1453 
1454 		if (ttrace->paths.max != -1) {
1455 			memset(npath + ttrace->paths.max + 1, 0,
1456 			       (fd - ttrace->paths.max) * sizeof(char *));
1457 		} else {
1458 			memset(npath, 0, (fd + 1) * sizeof(char *));
1459 		}
1460 
1461 		ttrace->paths.table = npath;
1462 		ttrace->paths.max   = fd;
1463 	}
1464 
1465 	ttrace->paths.table[fd] = strdup(pathname);
1466 
1467 	return ttrace->paths.table[fd] != NULL ? 0 : -1;
1468 }
1469 
1470 static int thread__read_fd_path(struct thread *thread, int fd)
1471 {
1472 	char linkname[PATH_MAX], pathname[PATH_MAX];
1473 	struct stat st;
1474 	int ret;
1475 
1476 	if (thread->pid_ == thread->tid) {
1477 		scnprintf(linkname, sizeof(linkname),
1478 			  "/proc/%d/fd/%d", thread->pid_, fd);
1479 	} else {
1480 		scnprintf(linkname, sizeof(linkname),
1481 			  "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1482 	}
1483 
1484 	if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1485 		return -1;
1486 
1487 	ret = readlink(linkname, pathname, sizeof(pathname));
1488 
1489 	if (ret < 0 || ret > st.st_size)
1490 		return -1;
1491 
1492 	pathname[ret] = '\0';
1493 	return trace__set_fd_pathname(thread, fd, pathname);
1494 }
1495 
1496 static const char *thread__fd_path(struct thread *thread, int fd,
1497 				   struct trace *trace)
1498 {
1499 	struct thread_trace *ttrace = thread__priv(thread);
1500 
1501 	if (ttrace == NULL)
1502 		return NULL;
1503 
1504 	if (fd < 0)
1505 		return NULL;
1506 
1507 	if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1508 		if (!trace->live)
1509 			return NULL;
1510 		++trace->stats.proc_getname;
1511 		if (thread__read_fd_path(thread, fd))
1512 			return NULL;
1513 	}
1514 
1515 	return ttrace->paths.table[fd];
1516 }
1517 
1518 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1519 					struct syscall_arg *arg)
1520 {
1521 	int fd = arg->val;
1522 	size_t printed = scnprintf(bf, size, "%d", fd);
1523 	const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1524 
1525 	if (path)
1526 		printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1527 
1528 	return printed;
1529 }
1530 
1531 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1532 					      struct syscall_arg *arg)
1533 {
1534 	int fd = arg->val;
1535 	size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1536 	struct thread_trace *ttrace = thread__priv(arg->thread);
1537 
1538 	if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1539 		zfree(&ttrace->paths.table[fd]);
1540 
1541 	return printed;
1542 }
1543 
1544 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1545 				     unsigned long ptr)
1546 {
1547 	struct thread_trace *ttrace = thread__priv(thread);
1548 
1549 	ttrace->filename.ptr = ptr;
1550 	ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1551 }
1552 
1553 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1554 					      struct syscall_arg *arg)
1555 {
1556 	unsigned long ptr = arg->val;
1557 
1558 	if (!arg->trace->vfs_getname)
1559 		return scnprintf(bf, size, "%#x", ptr);
1560 
1561 	thread__set_filename_pos(arg->thread, bf, ptr);
1562 	return 0;
1563 }
1564 
1565 static bool trace__filter_duration(struct trace *trace, double t)
1566 {
1567 	return t < (trace->duration_filter * NSEC_PER_MSEC);
1568 }
1569 
1570 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1571 {
1572 	double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1573 
1574 	return fprintf(fp, "%10.3f ", ts);
1575 }
1576 
1577 static bool done = false;
1578 static bool interrupted = false;
1579 
1580 static void sig_handler(int sig)
1581 {
1582 	done = true;
1583 	interrupted = sig == SIGINT;
1584 }
1585 
1586 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1587 					u64 duration, u64 tstamp, FILE *fp)
1588 {
1589 	size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1590 	printed += fprintf_duration(duration, fp);
1591 
1592 	if (trace->multiple_threads) {
1593 		if (trace->show_comm)
1594 			printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1595 		printed += fprintf(fp, "%d ", thread->tid);
1596 	}
1597 
1598 	return printed;
1599 }
1600 
1601 static int trace__process_event(struct trace *trace, struct machine *machine,
1602 				union perf_event *event, struct perf_sample *sample)
1603 {
1604 	int ret = 0;
1605 
1606 	switch (event->header.type) {
1607 	case PERF_RECORD_LOST:
1608 		color_fprintf(trace->output, PERF_COLOR_RED,
1609 			      "LOST %" PRIu64 " events!\n", event->lost.lost);
1610 		ret = machine__process_lost_event(machine, event, sample);
1611 	default:
1612 		ret = machine__process_event(machine, event, sample);
1613 		break;
1614 	}
1615 
1616 	return ret;
1617 }
1618 
1619 static int trace__tool_process(struct perf_tool *tool,
1620 			       union perf_event *event,
1621 			       struct perf_sample *sample,
1622 			       struct machine *machine)
1623 {
1624 	struct trace *trace = container_of(tool, struct trace, tool);
1625 	return trace__process_event(trace, machine, event, sample);
1626 }
1627 
1628 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1629 {
1630 	int err = symbol__init(NULL);
1631 
1632 	if (err)
1633 		return err;
1634 
1635 	trace->host = machine__new_host();
1636 	if (trace->host == NULL)
1637 		return -ENOMEM;
1638 
1639 	if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1640 		return -errno;
1641 
1642 	err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1643 					    evlist->threads, trace__tool_process, false,
1644 					    trace->opts.proc_map_timeout);
1645 	if (err)
1646 		symbol__exit();
1647 
1648 	return err;
1649 }
1650 
1651 static int syscall__set_arg_fmts(struct syscall *sc)
1652 {
1653 	struct format_field *field;
1654 	int idx = 0;
1655 
1656 	sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1657 	if (sc->arg_scnprintf == NULL)
1658 		return -1;
1659 
1660 	if (sc->fmt)
1661 		sc->arg_parm = sc->fmt->arg_parm;
1662 
1663 	for (field = sc->args; field; field = field->next) {
1664 		if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1665 			sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1666 		else if (field->flags & FIELD_IS_POINTER)
1667 			sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1668 		++idx;
1669 	}
1670 
1671 	return 0;
1672 }
1673 
1674 static int trace__read_syscall_info(struct trace *trace, int id)
1675 {
1676 	char tp_name[128];
1677 	struct syscall *sc;
1678 	const char *name = audit_syscall_to_name(id, trace->audit.machine);
1679 
1680 	if (name == NULL)
1681 		return -1;
1682 
1683 	if (id > trace->syscalls.max) {
1684 		struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1685 
1686 		if (nsyscalls == NULL)
1687 			return -1;
1688 
1689 		if (trace->syscalls.max != -1) {
1690 			memset(nsyscalls + trace->syscalls.max + 1, 0,
1691 			       (id - trace->syscalls.max) * sizeof(*sc));
1692 		} else {
1693 			memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1694 		}
1695 
1696 		trace->syscalls.table = nsyscalls;
1697 		trace->syscalls.max   = id;
1698 	}
1699 
1700 	sc = trace->syscalls.table + id;
1701 	sc->name = name;
1702 
1703 	sc->fmt  = syscall_fmt__find(sc->name);
1704 
1705 	snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1706 	sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1707 
1708 	if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1709 		snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1710 		sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1711 	}
1712 
1713 	if (sc->tp_format == NULL)
1714 		return -1;
1715 
1716 	sc->args = sc->tp_format->format.fields;
1717 	sc->nr_args = sc->tp_format->format.nr_fields;
1718 	/* drop nr field - not relevant here; does not exist on older kernels */
1719 	if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1720 		sc->args = sc->args->next;
1721 		--sc->nr_args;
1722 	}
1723 
1724 	sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1725 
1726 	return syscall__set_arg_fmts(sc);
1727 }
1728 
1729 static int trace__validate_ev_qualifier(struct trace *trace)
1730 {
1731 	int err = 0, i;
1732 	struct str_node *pos;
1733 
1734 	trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1735 	trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1736 						 sizeof(trace->ev_qualifier_ids.entries[0]));
1737 
1738 	if (trace->ev_qualifier_ids.entries == NULL) {
1739 		fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1740 		       trace->output);
1741 		err = -EINVAL;
1742 		goto out;
1743 	}
1744 
1745 	i = 0;
1746 
1747 	strlist__for_each(pos, trace->ev_qualifier) {
1748 		const char *sc = pos->s;
1749 		int id = audit_name_to_syscall(sc, trace->audit.machine);
1750 
1751 		if (id < 0) {
1752 			if (err == 0) {
1753 				fputs("Error:\tInvalid syscall ", trace->output);
1754 				err = -EINVAL;
1755 			} else {
1756 				fputs(", ", trace->output);
1757 			}
1758 
1759 			fputs(sc, trace->output);
1760 		}
1761 
1762 		trace->ev_qualifier_ids.entries[i++] = id;
1763 	}
1764 
1765 	if (err < 0) {
1766 		fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1767 		      "\nHint:\tand: 'man syscalls'\n", trace->output);
1768 		zfree(&trace->ev_qualifier_ids.entries);
1769 		trace->ev_qualifier_ids.nr = 0;
1770 	}
1771 out:
1772 	return err;
1773 }
1774 
1775 /*
1776  * args is to be interpreted as a series of longs but we need to handle
1777  * 8-byte unaligned accesses. args points to raw_data within the event
1778  * and raw_data is guaranteed to be 8-byte unaligned because it is
1779  * preceded by raw_size which is a u32. So we need to copy args to a temp
1780  * variable to read it. Most notably this avoids extended load instructions
1781  * on unaligned addresses
1782  */
1783 
1784 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1785 				      unsigned char *args, struct trace *trace,
1786 				      struct thread *thread)
1787 {
1788 	size_t printed = 0;
1789 	unsigned char *p;
1790 	unsigned long val;
1791 
1792 	if (sc->args != NULL) {
1793 		struct format_field *field;
1794 		u8 bit = 1;
1795 		struct syscall_arg arg = {
1796 			.idx	= 0,
1797 			.mask	= 0,
1798 			.trace  = trace,
1799 			.thread = thread,
1800 		};
1801 
1802 		for (field = sc->args; field;
1803 		     field = field->next, ++arg.idx, bit <<= 1) {
1804 			if (arg.mask & bit)
1805 				continue;
1806 
1807 			/* special care for unaligned accesses */
1808 			p = args + sizeof(unsigned long) * arg.idx;
1809 			memcpy(&val, p, sizeof(val));
1810 
1811 			/*
1812  			 * Suppress this argument if its value is zero and
1813  			 * and we don't have a string associated in an
1814  			 * strarray for it.
1815  			 */
1816 			if (val == 0 &&
1817 			    !(sc->arg_scnprintf &&
1818 			      sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1819 			      sc->arg_parm[arg.idx]))
1820 				continue;
1821 
1822 			printed += scnprintf(bf + printed, size - printed,
1823 					     "%s%s: ", printed ? ", " : "", field->name);
1824 			if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1825 				arg.val = val;
1826 				if (sc->arg_parm)
1827 					arg.parm = sc->arg_parm[arg.idx];
1828 				printed += sc->arg_scnprintf[arg.idx](bf + printed,
1829 								      size - printed, &arg);
1830 			} else {
1831 				printed += scnprintf(bf + printed, size - printed,
1832 						     "%ld", val);
1833 			}
1834 		}
1835 	} else {
1836 		int i = 0;
1837 
1838 		while (i < 6) {
1839 			/* special care for unaligned accesses */
1840 			p = args + sizeof(unsigned long) * i;
1841 			memcpy(&val, p, sizeof(val));
1842 			printed += scnprintf(bf + printed, size - printed,
1843 					     "%sarg%d: %ld",
1844 					     printed ? ", " : "", i, val);
1845 			++i;
1846 		}
1847 	}
1848 
1849 	return printed;
1850 }
1851 
1852 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1853 				  union perf_event *event,
1854 				  struct perf_sample *sample);
1855 
1856 static struct syscall *trace__syscall_info(struct trace *trace,
1857 					   struct perf_evsel *evsel, int id)
1858 {
1859 
1860 	if (id < 0) {
1861 
1862 		/*
1863 		 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1864 		 * before that, leaving at a higher verbosity level till that is
1865 		 * explained. Reproduced with plain ftrace with:
1866 		 *
1867 		 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1868 		 * grep "NR -1 " /t/trace_pipe
1869 		 *
1870 		 * After generating some load on the machine.
1871  		 */
1872 		if (verbose > 1) {
1873 			static u64 n;
1874 			fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1875 				id, perf_evsel__name(evsel), ++n);
1876 		}
1877 		return NULL;
1878 	}
1879 
1880 	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1881 	    trace__read_syscall_info(trace, id))
1882 		goto out_cant_read;
1883 
1884 	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1885 		goto out_cant_read;
1886 
1887 	return &trace->syscalls.table[id];
1888 
1889 out_cant_read:
1890 	if (verbose) {
1891 		fprintf(trace->output, "Problems reading syscall %d", id);
1892 		if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1893 			fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1894 		fputs(" information\n", trace->output);
1895 	}
1896 	return NULL;
1897 }
1898 
1899 static void thread__update_stats(struct thread_trace *ttrace,
1900 				 int id, struct perf_sample *sample)
1901 {
1902 	struct int_node *inode;
1903 	struct stats *stats;
1904 	u64 duration = 0;
1905 
1906 	inode = intlist__findnew(ttrace->syscall_stats, id);
1907 	if (inode == NULL)
1908 		return;
1909 
1910 	stats = inode->priv;
1911 	if (stats == NULL) {
1912 		stats = malloc(sizeof(struct stats));
1913 		if (stats == NULL)
1914 			return;
1915 		init_stats(stats);
1916 		inode->priv = stats;
1917 	}
1918 
1919 	if (ttrace->entry_time && sample->time > ttrace->entry_time)
1920 		duration = sample->time - ttrace->entry_time;
1921 
1922 	update_stats(stats, duration);
1923 }
1924 
1925 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1926 {
1927 	struct thread_trace *ttrace;
1928 	u64 duration;
1929 	size_t printed;
1930 
1931 	if (trace->current == NULL)
1932 		return 0;
1933 
1934 	ttrace = thread__priv(trace->current);
1935 
1936 	if (!ttrace->entry_pending)
1937 		return 0;
1938 
1939 	duration = sample->time - ttrace->entry_time;
1940 
1941 	printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1942 	printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1943 	ttrace->entry_pending = false;
1944 
1945 	return printed;
1946 }
1947 
1948 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1949 			    union perf_event *event __maybe_unused,
1950 			    struct perf_sample *sample)
1951 {
1952 	char *msg;
1953 	void *args;
1954 	size_t printed = 0;
1955 	struct thread *thread;
1956 	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1957 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
1958 	struct thread_trace *ttrace;
1959 
1960 	if (sc == NULL)
1961 		return -1;
1962 
1963 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1964 	ttrace = thread__trace(thread, trace->output);
1965 	if (ttrace == NULL)
1966 		goto out_put;
1967 
1968 	args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1969 
1970 	if (ttrace->entry_str == NULL) {
1971 		ttrace->entry_str = malloc(trace__entry_str_size);
1972 		if (!ttrace->entry_str)
1973 			goto out_put;
1974 	}
1975 
1976 	if (!trace->summary_only)
1977 		trace__printf_interrupted_entry(trace, sample);
1978 
1979 	ttrace->entry_time = sample->time;
1980 	msg = ttrace->entry_str;
1981 	printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1982 
1983 	printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1984 					   args, trace, thread);
1985 
1986 	if (sc->is_exit) {
1987 		if (!trace->duration_filter && !trace->summary_only) {
1988 			trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1989 			fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1990 		}
1991 	} else {
1992 		ttrace->entry_pending = true;
1993 		/* See trace__vfs_getname & trace__sys_exit */
1994 		ttrace->filename.pending_open = false;
1995 	}
1996 
1997 	if (trace->current != thread) {
1998 		thread__put(trace->current);
1999 		trace->current = thread__get(thread);
2000 	}
2001 	err = 0;
2002 out_put:
2003 	thread__put(thread);
2004 	return err;
2005 }
2006 
2007 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
2008 			   union perf_event *event __maybe_unused,
2009 			   struct perf_sample *sample)
2010 {
2011 	long ret;
2012 	u64 duration = 0;
2013 	struct thread *thread;
2014 	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2015 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
2016 	struct thread_trace *ttrace;
2017 
2018 	if (sc == NULL)
2019 		return -1;
2020 
2021 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2022 	ttrace = thread__trace(thread, trace->output);
2023 	if (ttrace == NULL)
2024 		goto out_put;
2025 
2026 	if (trace->summary)
2027 		thread__update_stats(ttrace, id, sample);
2028 
2029 	ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
2030 
2031 	if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) {
2032 		trace__set_fd_pathname(thread, ret, ttrace->filename.name);
2033 		ttrace->filename.pending_open = false;
2034 		++trace->stats.vfs_getname;
2035 	}
2036 
2037 	ttrace->exit_time = sample->time;
2038 
2039 	if (ttrace->entry_time) {
2040 		duration = sample->time - ttrace->entry_time;
2041 		if (trace__filter_duration(trace, duration))
2042 			goto out;
2043 	} else if (trace->duration_filter)
2044 		goto out;
2045 
2046 	if (trace->summary_only)
2047 		goto out;
2048 
2049 	trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
2050 
2051 	if (ttrace->entry_pending) {
2052 		fprintf(trace->output, "%-70s", ttrace->entry_str);
2053 	} else {
2054 		fprintf(trace->output, " ... [");
2055 		color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2056 		fprintf(trace->output, "]: %s()", sc->name);
2057 	}
2058 
2059 	if (sc->fmt == NULL) {
2060 signed_print:
2061 		fprintf(trace->output, ") = %ld", ret);
2062 	} else if (ret < 0 && sc->fmt->errmsg) {
2063 		char bf[STRERR_BUFSIZE];
2064 		const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
2065 			   *e = audit_errno_to_name(-ret);
2066 
2067 		fprintf(trace->output, ") = -1 %s %s", e, emsg);
2068 	} else if (ret == 0 && sc->fmt->timeout)
2069 		fprintf(trace->output, ") = 0 Timeout");
2070 	else if (sc->fmt->hexret)
2071 		fprintf(trace->output, ") = %#lx", ret);
2072 	else
2073 		goto signed_print;
2074 
2075 	fputc('\n', trace->output);
2076 out:
2077 	ttrace->entry_pending = false;
2078 	err = 0;
2079 out_put:
2080 	thread__put(thread);
2081 	return err;
2082 }
2083 
2084 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2085 			      union perf_event *event __maybe_unused,
2086 			      struct perf_sample *sample)
2087 {
2088 	struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2089 	struct thread_trace *ttrace;
2090 	size_t filename_len, entry_str_len, to_move;
2091 	ssize_t remaining_space;
2092 	char *pos;
2093 	const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
2094 
2095 	if (!thread)
2096 		goto out;
2097 
2098 	ttrace = thread__priv(thread);
2099 	if (!ttrace)
2100 		goto out;
2101 
2102 	filename_len = strlen(filename);
2103 
2104 	if (ttrace->filename.namelen < filename_len) {
2105 		char *f = realloc(ttrace->filename.name, filename_len + 1);
2106 
2107 		if (f == NULL)
2108 				goto out;
2109 
2110 		ttrace->filename.namelen = filename_len;
2111 		ttrace->filename.name = f;
2112 	}
2113 
2114 	strcpy(ttrace->filename.name, filename);
2115 	ttrace->filename.pending_open = true;
2116 
2117 	if (!ttrace->filename.ptr)
2118 		goto out;
2119 
2120 	entry_str_len = strlen(ttrace->entry_str);
2121 	remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2122 	if (remaining_space <= 0)
2123 		goto out;
2124 
2125 	if (filename_len > (size_t)remaining_space) {
2126 		filename += filename_len - remaining_space;
2127 		filename_len = remaining_space;
2128 	}
2129 
2130 	to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2131 	pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2132 	memmove(pos + filename_len, pos, to_move);
2133 	memcpy(pos, filename, filename_len);
2134 
2135 	ttrace->filename.ptr = 0;
2136 	ttrace->filename.entry_str_pos = 0;
2137 out:
2138 	return 0;
2139 }
2140 
2141 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2142 				     union perf_event *event __maybe_unused,
2143 				     struct perf_sample *sample)
2144 {
2145         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2146 	double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2147 	struct thread *thread = machine__findnew_thread(trace->host,
2148 							sample->pid,
2149 							sample->tid);
2150 	struct thread_trace *ttrace = thread__trace(thread, trace->output);
2151 
2152 	if (ttrace == NULL)
2153 		goto out_dump;
2154 
2155 	ttrace->runtime_ms += runtime_ms;
2156 	trace->runtime_ms += runtime_ms;
2157 	thread__put(thread);
2158 	return 0;
2159 
2160 out_dump:
2161 	fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2162 	       evsel->name,
2163 	       perf_evsel__strval(evsel, sample, "comm"),
2164 	       (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2165 	       runtime,
2166 	       perf_evsel__intval(evsel, sample, "vruntime"));
2167 	thread__put(thread);
2168 	return 0;
2169 }
2170 
2171 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2172 				union perf_event *event __maybe_unused,
2173 				struct perf_sample *sample)
2174 {
2175 	trace__printf_interrupted_entry(trace, sample);
2176 	trace__fprintf_tstamp(trace, sample->time, trace->output);
2177 
2178 	if (trace->trace_syscalls)
2179 		fprintf(trace->output, "(         ): ");
2180 
2181 	fprintf(trace->output, "%s:", evsel->name);
2182 
2183 	if (evsel->tp_format) {
2184 		event_format__fprintf(evsel->tp_format, sample->cpu,
2185 				      sample->raw_data, sample->raw_size,
2186 				      trace->output);
2187 	}
2188 
2189 	fprintf(trace->output, ")\n");
2190 	return 0;
2191 }
2192 
2193 static void print_location(FILE *f, struct perf_sample *sample,
2194 			   struct addr_location *al,
2195 			   bool print_dso, bool print_sym)
2196 {
2197 
2198 	if ((verbose || print_dso) && al->map)
2199 		fprintf(f, "%s@", al->map->dso->long_name);
2200 
2201 	if ((verbose || print_sym) && al->sym)
2202 		fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2203 			al->addr - al->sym->start);
2204 	else if (al->map)
2205 		fprintf(f, "0x%" PRIx64, al->addr);
2206 	else
2207 		fprintf(f, "0x%" PRIx64, sample->addr);
2208 }
2209 
2210 static int trace__pgfault(struct trace *trace,
2211 			  struct perf_evsel *evsel,
2212 			  union perf_event *event,
2213 			  struct perf_sample *sample)
2214 {
2215 	struct thread *thread;
2216 	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2217 	struct addr_location al;
2218 	char map_type = 'd';
2219 	struct thread_trace *ttrace;
2220 	int err = -1;
2221 
2222 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2223 	ttrace = thread__trace(thread, trace->output);
2224 	if (ttrace == NULL)
2225 		goto out_put;
2226 
2227 	if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2228 		ttrace->pfmaj++;
2229 	else
2230 		ttrace->pfmin++;
2231 
2232 	if (trace->summary_only)
2233 		goto out;
2234 
2235 	thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
2236 			      sample->ip, &al);
2237 
2238 	trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2239 
2240 	fprintf(trace->output, "%sfault [",
2241 		evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2242 		"maj" : "min");
2243 
2244 	print_location(trace->output, sample, &al, false, true);
2245 
2246 	fprintf(trace->output, "] => ");
2247 
2248 	thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
2249 				   sample->addr, &al);
2250 
2251 	if (!al.map) {
2252 		thread__find_addr_location(thread, cpumode,
2253 					   MAP__FUNCTION, sample->addr, &al);
2254 
2255 		if (al.map)
2256 			map_type = 'x';
2257 		else
2258 			map_type = '?';
2259 	}
2260 
2261 	print_location(trace->output, sample, &al, true, false);
2262 
2263 	fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2264 out:
2265 	err = 0;
2266 out_put:
2267 	thread__put(thread);
2268 	return err;
2269 }
2270 
2271 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2272 {
2273 	if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2274 	    (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2275 		return false;
2276 
2277 	if (trace->pid_list || trace->tid_list)
2278 		return true;
2279 
2280 	return false;
2281 }
2282 
2283 static int trace__process_sample(struct perf_tool *tool,
2284 				 union perf_event *event,
2285 				 struct perf_sample *sample,
2286 				 struct perf_evsel *evsel,
2287 				 struct machine *machine __maybe_unused)
2288 {
2289 	struct trace *trace = container_of(tool, struct trace, tool);
2290 	int err = 0;
2291 
2292 	tracepoint_handler handler = evsel->handler;
2293 
2294 	if (skip_sample(trace, sample))
2295 		return 0;
2296 
2297 	if (!trace->full_time && trace->base_time == 0)
2298 		trace->base_time = sample->time;
2299 
2300 	if (handler) {
2301 		++trace->nr_events;
2302 		handler(trace, evsel, event, sample);
2303 	}
2304 
2305 	return err;
2306 }
2307 
2308 static int parse_target_str(struct trace *trace)
2309 {
2310 	if (trace->opts.target.pid) {
2311 		trace->pid_list = intlist__new(trace->opts.target.pid);
2312 		if (trace->pid_list == NULL) {
2313 			pr_err("Error parsing process id string\n");
2314 			return -EINVAL;
2315 		}
2316 	}
2317 
2318 	if (trace->opts.target.tid) {
2319 		trace->tid_list = intlist__new(trace->opts.target.tid);
2320 		if (trace->tid_list == NULL) {
2321 			pr_err("Error parsing thread id string\n");
2322 			return -EINVAL;
2323 		}
2324 	}
2325 
2326 	return 0;
2327 }
2328 
2329 static int trace__record(struct trace *trace, int argc, const char **argv)
2330 {
2331 	unsigned int rec_argc, i, j;
2332 	const char **rec_argv;
2333 	const char * const record_args[] = {
2334 		"record",
2335 		"-R",
2336 		"-m", "1024",
2337 		"-c", "1",
2338 	};
2339 
2340 	const char * const sc_args[] = { "-e", };
2341 	unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2342 	const char * const majpf_args[] = { "-e", "major-faults" };
2343 	unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2344 	const char * const minpf_args[] = { "-e", "minor-faults" };
2345 	unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2346 
2347 	/* +1 is for the event string below */
2348 	rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2349 		majpf_args_nr + minpf_args_nr + argc;
2350 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
2351 
2352 	if (rec_argv == NULL)
2353 		return -ENOMEM;
2354 
2355 	j = 0;
2356 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
2357 		rec_argv[j++] = record_args[i];
2358 
2359 	if (trace->trace_syscalls) {
2360 		for (i = 0; i < sc_args_nr; i++)
2361 			rec_argv[j++] = sc_args[i];
2362 
2363 		/* event string may be different for older kernels - e.g., RHEL6 */
2364 		if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2365 			rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2366 		else if (is_valid_tracepoint("syscalls:sys_enter"))
2367 			rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2368 		else {
2369 			pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2370 			return -1;
2371 		}
2372 	}
2373 
2374 	if (trace->trace_pgfaults & TRACE_PFMAJ)
2375 		for (i = 0; i < majpf_args_nr; i++)
2376 			rec_argv[j++] = majpf_args[i];
2377 
2378 	if (trace->trace_pgfaults & TRACE_PFMIN)
2379 		for (i = 0; i < minpf_args_nr; i++)
2380 			rec_argv[j++] = minpf_args[i];
2381 
2382 	for (i = 0; i < (unsigned int)argc; i++)
2383 		rec_argv[j++] = argv[i];
2384 
2385 	return cmd_record(j, rec_argv, NULL);
2386 }
2387 
2388 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2389 
2390 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2391 {
2392 	struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2393 	if (evsel == NULL)
2394 		return false;
2395 
2396 	if (perf_evsel__field(evsel, "pathname") == NULL) {
2397 		perf_evsel__delete(evsel);
2398 		return false;
2399 	}
2400 
2401 	evsel->handler = trace__vfs_getname;
2402 	perf_evlist__add(evlist, evsel);
2403 	return true;
2404 }
2405 
2406 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2407 				    u64 config)
2408 {
2409 	struct perf_evsel *evsel;
2410 	struct perf_event_attr attr = {
2411 		.type = PERF_TYPE_SOFTWARE,
2412 		.mmap_data = 1,
2413 	};
2414 
2415 	attr.config = config;
2416 	attr.sample_period = 1;
2417 
2418 	event_attr_init(&attr);
2419 
2420 	evsel = perf_evsel__new(&attr);
2421 	if (!evsel)
2422 		return -ENOMEM;
2423 
2424 	evsel->handler = trace__pgfault;
2425 	perf_evlist__add(evlist, evsel);
2426 
2427 	return 0;
2428 }
2429 
2430 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2431 {
2432 	const u32 type = event->header.type;
2433 	struct perf_evsel *evsel;
2434 
2435 	if (!trace->full_time && trace->base_time == 0)
2436 		trace->base_time = sample->time;
2437 
2438 	if (type != PERF_RECORD_SAMPLE) {
2439 		trace__process_event(trace, trace->host, event, sample);
2440 		return;
2441 	}
2442 
2443 	evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2444 	if (evsel == NULL) {
2445 		fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2446 		return;
2447 	}
2448 
2449 	if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2450 	    sample->raw_data == NULL) {
2451 		fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2452 		       perf_evsel__name(evsel), sample->tid,
2453 		       sample->cpu, sample->raw_size);
2454 	} else {
2455 		tracepoint_handler handler = evsel->handler;
2456 		handler(trace, evsel, event, sample);
2457 	}
2458 }
2459 
2460 static int trace__add_syscall_newtp(struct trace *trace)
2461 {
2462 	int ret = -1;
2463 	struct perf_evlist *evlist = trace->evlist;
2464 	struct perf_evsel *sys_enter, *sys_exit;
2465 
2466 	sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2467 	if (sys_enter == NULL)
2468 		goto out;
2469 
2470 	if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2471 		goto out_delete_sys_enter;
2472 
2473 	sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2474 	if (sys_exit == NULL)
2475 		goto out_delete_sys_enter;
2476 
2477 	if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2478 		goto out_delete_sys_exit;
2479 
2480 	perf_evlist__add(evlist, sys_enter);
2481 	perf_evlist__add(evlist, sys_exit);
2482 
2483 	trace->syscalls.events.sys_enter = sys_enter;
2484 	trace->syscalls.events.sys_exit  = sys_exit;
2485 
2486 	ret = 0;
2487 out:
2488 	return ret;
2489 
2490 out_delete_sys_exit:
2491 	perf_evsel__delete_priv(sys_exit);
2492 out_delete_sys_enter:
2493 	perf_evsel__delete_priv(sys_enter);
2494 	goto out;
2495 }
2496 
2497 static int trace__set_ev_qualifier_filter(struct trace *trace)
2498 {
2499 	int err = -1;
2500 	char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2501 						trace->ev_qualifier_ids.nr,
2502 						trace->ev_qualifier_ids.entries);
2503 
2504 	if (filter == NULL)
2505 		goto out_enomem;
2506 
2507 	if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2508 		err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2509 
2510 	free(filter);
2511 out:
2512 	return err;
2513 out_enomem:
2514 	errno = ENOMEM;
2515 	goto out;
2516 }
2517 
2518 static int trace__run(struct trace *trace, int argc, const char **argv)
2519 {
2520 	struct perf_evlist *evlist = trace->evlist;
2521 	struct perf_evsel *evsel;
2522 	int err = -1, i;
2523 	unsigned long before;
2524 	const bool forks = argc > 0;
2525 	bool draining = false;
2526 
2527 	trace->live = true;
2528 
2529 	if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2530 		goto out_error_raw_syscalls;
2531 
2532 	if (trace->trace_syscalls)
2533 		trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2534 
2535 	if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2536 	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2537 		goto out_error_mem;
2538 	}
2539 
2540 	if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2541 	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2542 		goto out_error_mem;
2543 
2544 	if (trace->sched &&
2545 	    perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2546 				   trace__sched_stat_runtime))
2547 		goto out_error_sched_stat_runtime;
2548 
2549 	err = perf_evlist__create_maps(evlist, &trace->opts.target);
2550 	if (err < 0) {
2551 		fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2552 		goto out_delete_evlist;
2553 	}
2554 
2555 	err = trace__symbols_init(trace, evlist);
2556 	if (err < 0) {
2557 		fprintf(trace->output, "Problems initializing symbol libraries!\n");
2558 		goto out_delete_evlist;
2559 	}
2560 
2561 	perf_evlist__config(evlist, &trace->opts);
2562 
2563 	signal(SIGCHLD, sig_handler);
2564 	signal(SIGINT, sig_handler);
2565 
2566 	if (forks) {
2567 		err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2568 						    argv, false, NULL);
2569 		if (err < 0) {
2570 			fprintf(trace->output, "Couldn't run the workload!\n");
2571 			goto out_delete_evlist;
2572 		}
2573 	}
2574 
2575 	err = perf_evlist__open(evlist);
2576 	if (err < 0)
2577 		goto out_error_open;
2578 
2579 	/*
2580 	 * Better not use !target__has_task() here because we need to cover the
2581 	 * case where no threads were specified in the command line, but a
2582 	 * workload was, and in that case we will fill in the thread_map when
2583 	 * we fork the workload in perf_evlist__prepare_workload.
2584 	 */
2585 	if (trace->filter_pids.nr > 0)
2586 		err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2587 	else if (thread_map__pid(evlist->threads, 0) == -1)
2588 		err = perf_evlist__set_filter_pid(evlist, getpid());
2589 
2590 	if (err < 0)
2591 		goto out_error_mem;
2592 
2593 	if (trace->ev_qualifier_ids.nr > 0) {
2594 		err = trace__set_ev_qualifier_filter(trace);
2595 		if (err < 0)
2596 			goto out_errno;
2597 
2598 		pr_debug("event qualifier tracepoint filter: %s\n",
2599 			 trace->syscalls.events.sys_exit->filter);
2600 	}
2601 
2602 	err = perf_evlist__apply_filters(evlist, &evsel);
2603 	if (err < 0)
2604 		goto out_error_apply_filters;
2605 
2606 	err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2607 	if (err < 0)
2608 		goto out_error_mmap;
2609 
2610 	if (!target__none(&trace->opts.target))
2611 		perf_evlist__enable(evlist);
2612 
2613 	if (forks)
2614 		perf_evlist__start_workload(evlist);
2615 
2616 	trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2617 				  evlist->threads->nr > 1 ||
2618 				  perf_evlist__first(evlist)->attr.inherit;
2619 again:
2620 	before = trace->nr_events;
2621 
2622 	for (i = 0; i < evlist->nr_mmaps; i++) {
2623 		union perf_event *event;
2624 
2625 		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2626 			struct perf_sample sample;
2627 
2628 			++trace->nr_events;
2629 
2630 			err = perf_evlist__parse_sample(evlist, event, &sample);
2631 			if (err) {
2632 				fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2633 				goto next_event;
2634 			}
2635 
2636 			trace__handle_event(trace, event, &sample);
2637 next_event:
2638 			perf_evlist__mmap_consume(evlist, i);
2639 
2640 			if (interrupted)
2641 				goto out_disable;
2642 
2643 			if (done && !draining) {
2644 				perf_evlist__disable(evlist);
2645 				draining = true;
2646 			}
2647 		}
2648 	}
2649 
2650 	if (trace->nr_events == before) {
2651 		int timeout = done ? 100 : -1;
2652 
2653 		if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2654 			if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2655 				draining = true;
2656 
2657 			goto again;
2658 		}
2659 	} else {
2660 		goto again;
2661 	}
2662 
2663 out_disable:
2664 	thread__zput(trace->current);
2665 
2666 	perf_evlist__disable(evlist);
2667 
2668 	if (!err) {
2669 		if (trace->summary)
2670 			trace__fprintf_thread_summary(trace, trace->output);
2671 
2672 		if (trace->show_tool_stats) {
2673 			fprintf(trace->output, "Stats:\n "
2674 					       " vfs_getname : %" PRIu64 "\n"
2675 					       " proc_getname: %" PRIu64 "\n",
2676 				trace->stats.vfs_getname,
2677 				trace->stats.proc_getname);
2678 		}
2679 	}
2680 
2681 out_delete_evlist:
2682 	perf_evlist__delete(evlist);
2683 	trace->evlist = NULL;
2684 	trace->live = false;
2685 	return err;
2686 {
2687 	char errbuf[BUFSIZ];
2688 
2689 out_error_sched_stat_runtime:
2690 	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2691 	goto out_error;
2692 
2693 out_error_raw_syscalls:
2694 	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2695 	goto out_error;
2696 
2697 out_error_mmap:
2698 	perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2699 	goto out_error;
2700 
2701 out_error_open:
2702 	perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2703 
2704 out_error:
2705 	fprintf(trace->output, "%s\n", errbuf);
2706 	goto out_delete_evlist;
2707 
2708 out_error_apply_filters:
2709 	fprintf(trace->output,
2710 		"Failed to set filter \"%s\" on event %s with %d (%s)\n",
2711 		evsel->filter, perf_evsel__name(evsel), errno,
2712 		strerror_r(errno, errbuf, sizeof(errbuf)));
2713 	goto out_delete_evlist;
2714 }
2715 out_error_mem:
2716 	fprintf(trace->output, "Not enough memory to run!\n");
2717 	goto out_delete_evlist;
2718 
2719 out_errno:
2720 	fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2721 	goto out_delete_evlist;
2722 }
2723 
2724 static int trace__replay(struct trace *trace)
2725 {
2726 	const struct perf_evsel_str_handler handlers[] = {
2727 		{ "probe:vfs_getname",	     trace__vfs_getname, },
2728 	};
2729 	struct perf_data_file file = {
2730 		.path  = input_name,
2731 		.mode  = PERF_DATA_MODE_READ,
2732 		.force = trace->force,
2733 	};
2734 	struct perf_session *session;
2735 	struct perf_evsel *evsel;
2736 	int err = -1;
2737 
2738 	trace->tool.sample	  = trace__process_sample;
2739 	trace->tool.mmap	  = perf_event__process_mmap;
2740 	trace->tool.mmap2	  = perf_event__process_mmap2;
2741 	trace->tool.comm	  = perf_event__process_comm;
2742 	trace->tool.exit	  = perf_event__process_exit;
2743 	trace->tool.fork	  = perf_event__process_fork;
2744 	trace->tool.attr	  = perf_event__process_attr;
2745 	trace->tool.tracing_data = perf_event__process_tracing_data;
2746 	trace->tool.build_id	  = perf_event__process_build_id;
2747 
2748 	trace->tool.ordered_events = true;
2749 	trace->tool.ordering_requires_timestamps = true;
2750 
2751 	/* add tid to output */
2752 	trace->multiple_threads = true;
2753 
2754 	session = perf_session__new(&file, false, &trace->tool);
2755 	if (session == NULL)
2756 		return -1;
2757 
2758 	if (symbol__init(&session->header.env) < 0)
2759 		goto out;
2760 
2761 	trace->host = &session->machines.host;
2762 
2763 	err = perf_session__set_tracepoints_handlers(session, handlers);
2764 	if (err)
2765 		goto out;
2766 
2767 	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2768 						     "raw_syscalls:sys_enter");
2769 	/* older kernels have syscalls tp versus raw_syscalls */
2770 	if (evsel == NULL)
2771 		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2772 							     "syscalls:sys_enter");
2773 
2774 	if (evsel &&
2775 	    (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2776 	    perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2777 		pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2778 		goto out;
2779 	}
2780 
2781 	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2782 						     "raw_syscalls:sys_exit");
2783 	if (evsel == NULL)
2784 		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2785 							     "syscalls:sys_exit");
2786 	if (evsel &&
2787 	    (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2788 	    perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2789 		pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2790 		goto out;
2791 	}
2792 
2793 	evlist__for_each(session->evlist, evsel) {
2794 		if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2795 		    (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2796 		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2797 		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2798 			evsel->handler = trace__pgfault;
2799 	}
2800 
2801 	err = parse_target_str(trace);
2802 	if (err != 0)
2803 		goto out;
2804 
2805 	setup_pager();
2806 
2807 	err = perf_session__process_events(session);
2808 	if (err)
2809 		pr_err("Failed to process events, error %d", err);
2810 
2811 	else if (trace->summary)
2812 		trace__fprintf_thread_summary(trace, trace->output);
2813 
2814 out:
2815 	perf_session__delete(session);
2816 
2817 	return err;
2818 }
2819 
2820 static size_t trace__fprintf_threads_header(FILE *fp)
2821 {
2822 	size_t printed;
2823 
2824 	printed  = fprintf(fp, "\n Summary of events:\n\n");
2825 
2826 	return printed;
2827 }
2828 
2829 static size_t thread__dump_stats(struct thread_trace *ttrace,
2830 				 struct trace *trace, FILE *fp)
2831 {
2832 	struct stats *stats;
2833 	size_t printed = 0;
2834 	struct syscall *sc;
2835 	struct int_node *inode = intlist__first(ttrace->syscall_stats);
2836 
2837 	if (inode == NULL)
2838 		return 0;
2839 
2840 	printed += fprintf(fp, "\n");
2841 
2842 	printed += fprintf(fp, "   syscall            calls    total       min       avg       max      stddev\n");
2843 	printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
2844 	printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
2845 
2846 	/* each int_node is a syscall */
2847 	while (inode) {
2848 		stats = inode->priv;
2849 		if (stats) {
2850 			double min = (double)(stats->min) / NSEC_PER_MSEC;
2851 			double max = (double)(stats->max) / NSEC_PER_MSEC;
2852 			double avg = avg_stats(stats);
2853 			double pct;
2854 			u64 n = (u64) stats->n;
2855 
2856 			pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2857 			avg /= NSEC_PER_MSEC;
2858 
2859 			sc = &trace->syscalls.table[inode->i];
2860 			printed += fprintf(fp, "   %-15s", sc->name);
2861 			printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2862 					   n, avg * n, min, avg);
2863 			printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2864 		}
2865 
2866 		inode = intlist__next(inode);
2867 	}
2868 
2869 	printed += fprintf(fp, "\n\n");
2870 
2871 	return printed;
2872 }
2873 
2874 /* struct used to pass data to per-thread function */
2875 struct summary_data {
2876 	FILE *fp;
2877 	struct trace *trace;
2878 	size_t printed;
2879 };
2880 
2881 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2882 {
2883 	struct summary_data *data = priv;
2884 	FILE *fp = data->fp;
2885 	size_t printed = data->printed;
2886 	struct trace *trace = data->trace;
2887 	struct thread_trace *ttrace = thread__priv(thread);
2888 	double ratio;
2889 
2890 	if (ttrace == NULL)
2891 		return 0;
2892 
2893 	ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2894 
2895 	printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2896 	printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2897 	printed += fprintf(fp, "%.1f%%", ratio);
2898 	if (ttrace->pfmaj)
2899 		printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2900 	if (ttrace->pfmin)
2901 		printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2902 	printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2903 	printed += thread__dump_stats(ttrace, trace, fp);
2904 
2905 	data->printed += printed;
2906 
2907 	return 0;
2908 }
2909 
2910 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2911 {
2912 	struct summary_data data = {
2913 		.fp = fp,
2914 		.trace = trace
2915 	};
2916 	data.printed = trace__fprintf_threads_header(fp);
2917 
2918 	machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2919 
2920 	return data.printed;
2921 }
2922 
2923 static int trace__set_duration(const struct option *opt, const char *str,
2924 			       int unset __maybe_unused)
2925 {
2926 	struct trace *trace = opt->value;
2927 
2928 	trace->duration_filter = atof(str);
2929 	return 0;
2930 }
2931 
2932 static int trace__set_filter_pids(const struct option *opt, const char *str,
2933 				  int unset __maybe_unused)
2934 {
2935 	int ret = -1;
2936 	size_t i;
2937 	struct trace *trace = opt->value;
2938 	/*
2939 	 * FIXME: introduce a intarray class, plain parse csv and create a
2940 	 * { int nr, int entries[] } struct...
2941 	 */
2942 	struct intlist *list = intlist__new(str);
2943 
2944 	if (list == NULL)
2945 		return -1;
2946 
2947 	i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2948 	trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2949 
2950 	if (trace->filter_pids.entries == NULL)
2951 		goto out;
2952 
2953 	trace->filter_pids.entries[0] = getpid();
2954 
2955 	for (i = 1; i < trace->filter_pids.nr; ++i)
2956 		trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2957 
2958 	intlist__delete(list);
2959 	ret = 0;
2960 out:
2961 	return ret;
2962 }
2963 
2964 static int trace__open_output(struct trace *trace, const char *filename)
2965 {
2966 	struct stat st;
2967 
2968 	if (!stat(filename, &st) && st.st_size) {
2969 		char oldname[PATH_MAX];
2970 
2971 		scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2972 		unlink(oldname);
2973 		rename(filename, oldname);
2974 	}
2975 
2976 	trace->output = fopen(filename, "w");
2977 
2978 	return trace->output == NULL ? -errno : 0;
2979 }
2980 
2981 static int parse_pagefaults(const struct option *opt, const char *str,
2982 			    int unset __maybe_unused)
2983 {
2984 	int *trace_pgfaults = opt->value;
2985 
2986 	if (strcmp(str, "all") == 0)
2987 		*trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2988 	else if (strcmp(str, "maj") == 0)
2989 		*trace_pgfaults |= TRACE_PFMAJ;
2990 	else if (strcmp(str, "min") == 0)
2991 		*trace_pgfaults |= TRACE_PFMIN;
2992 	else
2993 		return -1;
2994 
2995 	return 0;
2996 }
2997 
2998 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2999 {
3000 	struct perf_evsel *evsel;
3001 
3002 	evlist__for_each(evlist, evsel)
3003 		evsel->handler = handler;
3004 }
3005 
3006 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
3007 {
3008 	const char *trace_usage[] = {
3009 		"perf trace [<options>] [<command>]",
3010 		"perf trace [<options>] -- <command> [<options>]",
3011 		"perf trace record [<options>] [<command>]",
3012 		"perf trace record [<options>] -- <command> [<options>]",
3013 		NULL
3014 	};
3015 	struct trace trace = {
3016 		.audit = {
3017 			.machine = audit_detect_machine(),
3018 			.open_id = audit_name_to_syscall("open", trace.audit.machine),
3019 		},
3020 		.syscalls = {
3021 			. max = -1,
3022 		},
3023 		.opts = {
3024 			.target = {
3025 				.uid	   = UINT_MAX,
3026 				.uses_mmap = true,
3027 			},
3028 			.user_freq     = UINT_MAX,
3029 			.user_interval = ULLONG_MAX,
3030 			.no_buffering  = true,
3031 			.mmap_pages    = UINT_MAX,
3032 			.proc_map_timeout  = 500,
3033 		},
3034 		.output = stderr,
3035 		.show_comm = true,
3036 		.trace_syscalls = true,
3037 	};
3038 	const char *output_name = NULL;
3039 	const char *ev_qualifier_str = NULL;
3040 	const struct option trace_options[] = {
3041 	OPT_CALLBACK(0, "event", &trace.evlist, "event",
3042 		     "event selector. use 'perf list' to list available events",
3043 		     parse_events_option),
3044 	OPT_BOOLEAN(0, "comm", &trace.show_comm,
3045 		    "show the thread COMM next to its id"),
3046 	OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
3047 	OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
3048 	OPT_STRING('o', "output", &output_name, "file", "output file name"),
3049 	OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
3050 	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3051 		    "trace events on existing process id"),
3052 	OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
3053 		    "trace events on existing thread id"),
3054 	OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3055 		     "pids to filter (by the kernel)", trace__set_filter_pids),
3056 	OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
3057 		    "system-wide collection from all CPUs"),
3058 	OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
3059 		    "list of cpus to monitor"),
3060 	OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
3061 		    "child tasks do not inherit counters"),
3062 	OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3063 		     "number of mmap data pages",
3064 		     perf_evlist__parse_mmap_pages),
3065 	OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3066 		   "user to profile"),
3067 	OPT_CALLBACK(0, "duration", &trace, "float",
3068 		     "show only events with duration > N.M ms",
3069 		     trace__set_duration),
3070 	OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3071 	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3072 	OPT_BOOLEAN('T', "time", &trace.full_time,
3073 		    "Show full timestamp, not time relative to first start"),
3074 	OPT_BOOLEAN('s', "summary", &trace.summary_only,
3075 		    "Show only syscall summary with statistics"),
3076 	OPT_BOOLEAN('S', "with-summary", &trace.summary,
3077 		    "Show all syscalls and summary with statistics"),
3078 	OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3079 		     "Trace pagefaults", parse_pagefaults, "maj"),
3080 	OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3081 	OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3082 	OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3083 			"per thread proc mmap processing timeout in ms"),
3084 	OPT_END()
3085 	};
3086 	const char * const trace_subcommands[] = { "record", NULL };
3087 	int err;
3088 	char bf[BUFSIZ];
3089 
3090 	signal(SIGSEGV, sighandler_dump_stack);
3091 	signal(SIGFPE, sighandler_dump_stack);
3092 
3093 	trace.evlist = perf_evlist__new();
3094 
3095 	if (trace.evlist == NULL) {
3096 		pr_err("Not enough memory to run!\n");
3097 		err = -ENOMEM;
3098 		goto out;
3099 	}
3100 
3101 	argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3102 				 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3103 
3104 	if (trace.trace_pgfaults) {
3105 		trace.opts.sample_address = true;
3106 		trace.opts.sample_time = true;
3107 	}
3108 
3109 	if (trace.evlist->nr_entries > 0)
3110 		evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3111 
3112 	if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3113 		return trace__record(&trace, argc-1, &argv[1]);
3114 
3115 	/* summary_only implies summary option, but don't overwrite summary if set */
3116 	if (trace.summary_only)
3117 		trace.summary = trace.summary_only;
3118 
3119 	if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3120 	    trace.evlist->nr_entries == 0 /* Was --events used? */) {
3121 		pr_err("Please specify something to trace.\n");
3122 		return -1;
3123 	}
3124 
3125 	if (output_name != NULL) {
3126 		err = trace__open_output(&trace, output_name);
3127 		if (err < 0) {
3128 			perror("failed to create output file");
3129 			goto out;
3130 		}
3131 	}
3132 
3133 	if (ev_qualifier_str != NULL) {
3134 		const char *s = ev_qualifier_str;
3135 		struct strlist_config slist_config = {
3136 			.dirname = system_path(STRACE_GROUPS_DIR),
3137 		};
3138 
3139 		trace.not_ev_qualifier = *s == '!';
3140 		if (trace.not_ev_qualifier)
3141 			++s;
3142 		trace.ev_qualifier = strlist__new(s, &slist_config);
3143 		if (trace.ev_qualifier == NULL) {
3144 			fputs("Not enough memory to parse event qualifier",
3145 			      trace.output);
3146 			err = -ENOMEM;
3147 			goto out_close;
3148 		}
3149 
3150 		err = trace__validate_ev_qualifier(&trace);
3151 		if (err)
3152 			goto out_close;
3153 	}
3154 
3155 	err = target__validate(&trace.opts.target);
3156 	if (err) {
3157 		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3158 		fprintf(trace.output, "%s", bf);
3159 		goto out_close;
3160 	}
3161 
3162 	err = target__parse_uid(&trace.opts.target);
3163 	if (err) {
3164 		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3165 		fprintf(trace.output, "%s", bf);
3166 		goto out_close;
3167 	}
3168 
3169 	if (!argc && target__none(&trace.opts.target))
3170 		trace.opts.target.system_wide = true;
3171 
3172 	if (input_name)
3173 		err = trace__replay(&trace);
3174 	else
3175 		err = trace__run(&trace, argc, argv);
3176 
3177 out_close:
3178 	if (output_name != NULL)
3179 		fclose(trace.output);
3180 out:
3181 	return err;
3182 }
3183