xref: /linux/tools/perf/builtin-trace.c (revision 8e947f1e84fd1588f66e5f2ea69c80647de72cd4)
1 /*
2  * builtin-trace.c
3  *
4  * Builtin 'trace' command:
5  *
6  * Display a continuously updated trace of any workload, CPU, specific PID,
7  * system wide, etc.  Default format is loosely strace like, but any other
8  * event may be specified using --event.
9  *
10  * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11  *
12  * Initially based on the 'trace' prototype by Thomas Gleixner:
13  *
14  * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15  *
16  * Released under the GPL v2. (and only v2, not any later version)
17  */
18 
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
21 #include "builtin.h"
22 #include "util/color.h"
23 #include "util/debug.h"
24 #include "util/evlist.h"
25 #include "util/exec_cmd.h"
26 #include "util/machine.h"
27 #include "util/session.h"
28 #include "util/thread.h"
29 #include "util/parse-options.h"
30 #include "util/strlist.h"
31 #include "util/intlist.h"
32 #include "util/thread_map.h"
33 #include "util/stat.h"
34 #include "trace-event.h"
35 #include "util/parse-events.h"
36 
37 #include <libaudit.h>
38 #include <stdlib.h>
39 #include <sys/mman.h>
40 #include <linux/futex.h>
41 #include <linux/err.h>
42 
43 /* For older distros: */
44 #ifndef MAP_STACK
45 # define MAP_STACK		0x20000
46 #endif
47 
48 #ifndef MADV_HWPOISON
49 # define MADV_HWPOISON		100
50 
51 #endif
52 
53 #ifndef MADV_MERGEABLE
54 # define MADV_MERGEABLE		12
55 #endif
56 
57 #ifndef MADV_UNMERGEABLE
58 # define MADV_UNMERGEABLE	13
59 #endif
60 
61 #ifndef EFD_SEMAPHORE
62 # define EFD_SEMAPHORE		1
63 #endif
64 
65 #ifndef EFD_NONBLOCK
66 # define EFD_NONBLOCK		00004000
67 #endif
68 
69 #ifndef EFD_CLOEXEC
70 # define EFD_CLOEXEC		02000000
71 #endif
72 
73 #ifndef O_CLOEXEC
74 # define O_CLOEXEC		02000000
75 #endif
76 
77 #ifndef SOCK_DCCP
78 # define SOCK_DCCP		6
79 #endif
80 
81 #ifndef SOCK_CLOEXEC
82 # define SOCK_CLOEXEC		02000000
83 #endif
84 
85 #ifndef SOCK_NONBLOCK
86 # define SOCK_NONBLOCK		00004000
87 #endif
88 
89 #ifndef MSG_CMSG_CLOEXEC
90 # define MSG_CMSG_CLOEXEC	0x40000000
91 #endif
92 
93 #ifndef PERF_FLAG_FD_NO_GROUP
94 # define PERF_FLAG_FD_NO_GROUP		(1UL << 0)
95 #endif
96 
97 #ifndef PERF_FLAG_FD_OUTPUT
98 # define PERF_FLAG_FD_OUTPUT		(1UL << 1)
99 #endif
100 
101 #ifndef PERF_FLAG_PID_CGROUP
102 # define PERF_FLAG_PID_CGROUP		(1UL << 2) /* pid=cgroup id, per-cpu mode only */
103 #endif
104 
105 #ifndef PERF_FLAG_FD_CLOEXEC
106 # define PERF_FLAG_FD_CLOEXEC		(1UL << 3) /* O_CLOEXEC */
107 #endif
108 
109 
110 struct tp_field {
111 	int offset;
112 	union {
113 		u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
114 		void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
115 	};
116 };
117 
118 #define TP_UINT_FIELD(bits) \
119 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
120 { \
121 	u##bits value; \
122 	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
123 	return value;  \
124 }
125 
126 TP_UINT_FIELD(8);
127 TP_UINT_FIELD(16);
128 TP_UINT_FIELD(32);
129 TP_UINT_FIELD(64);
130 
131 #define TP_UINT_FIELD__SWAPPED(bits) \
132 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
133 { \
134 	u##bits value; \
135 	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
136 	return bswap_##bits(value);\
137 }
138 
139 TP_UINT_FIELD__SWAPPED(16);
140 TP_UINT_FIELD__SWAPPED(32);
141 TP_UINT_FIELD__SWAPPED(64);
142 
143 static int tp_field__init_uint(struct tp_field *field,
144 			       struct format_field *format_field,
145 			       bool needs_swap)
146 {
147 	field->offset = format_field->offset;
148 
149 	switch (format_field->size) {
150 	case 1:
151 		field->integer = tp_field__u8;
152 		break;
153 	case 2:
154 		field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
155 		break;
156 	case 4:
157 		field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
158 		break;
159 	case 8:
160 		field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
161 		break;
162 	default:
163 		return -1;
164 	}
165 
166 	return 0;
167 }
168 
169 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
170 {
171 	return sample->raw_data + field->offset;
172 }
173 
174 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
175 {
176 	field->offset = format_field->offset;
177 	field->pointer = tp_field__ptr;
178 	return 0;
179 }
180 
181 struct syscall_tp {
182 	struct tp_field id;
183 	union {
184 		struct tp_field args, ret;
185 	};
186 };
187 
188 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
189 					  struct tp_field *field,
190 					  const char *name)
191 {
192 	struct format_field *format_field = perf_evsel__field(evsel, name);
193 
194 	if (format_field == NULL)
195 		return -1;
196 
197 	return tp_field__init_uint(field, format_field, evsel->needs_swap);
198 }
199 
200 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
201 	({ struct syscall_tp *sc = evsel->priv;\
202 	   perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
203 
204 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
205 					 struct tp_field *field,
206 					 const char *name)
207 {
208 	struct format_field *format_field = perf_evsel__field(evsel, name);
209 
210 	if (format_field == NULL)
211 		return -1;
212 
213 	return tp_field__init_ptr(field, format_field);
214 }
215 
216 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
217 	({ struct syscall_tp *sc = evsel->priv;\
218 	   perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
219 
220 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
221 {
222 	zfree(&evsel->priv);
223 	perf_evsel__delete(evsel);
224 }
225 
226 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
227 {
228 	evsel->priv = malloc(sizeof(struct syscall_tp));
229 	if (evsel->priv != NULL) {
230 		if (perf_evsel__init_sc_tp_uint_field(evsel, id))
231 			goto out_delete;
232 
233 		evsel->handler = handler;
234 		return 0;
235 	}
236 
237 	return -ENOMEM;
238 
239 out_delete:
240 	zfree(&evsel->priv);
241 	return -ENOENT;
242 }
243 
244 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
245 {
246 	struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
247 
248 	/* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
249 	if (IS_ERR(evsel))
250 		evsel = perf_evsel__newtp("syscalls", direction);
251 
252 	if (IS_ERR(evsel))
253 		return NULL;
254 
255 	if (perf_evsel__init_syscall_tp(evsel, handler))
256 		goto out_delete;
257 
258 	return evsel;
259 
260 out_delete:
261 	perf_evsel__delete_priv(evsel);
262 	return NULL;
263 }
264 
265 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
266 	({ struct syscall_tp *fields = evsel->priv; \
267 	   fields->name.integer(&fields->name, sample); })
268 
269 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
270 	({ struct syscall_tp *fields = evsel->priv; \
271 	   fields->name.pointer(&fields->name, sample); })
272 
273 struct syscall_arg {
274 	unsigned long val;
275 	struct thread *thread;
276 	struct trace  *trace;
277 	void	      *parm;
278 	u8	      idx;
279 	u8	      mask;
280 };
281 
282 struct strarray {
283 	int	    offset;
284 	int	    nr_entries;
285 	const char **entries;
286 };
287 
288 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
289 	.nr_entries = ARRAY_SIZE(array), \
290 	.entries = array, \
291 }
292 
293 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
294 	.offset	    = off, \
295 	.nr_entries = ARRAY_SIZE(array), \
296 	.entries = array, \
297 }
298 
299 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
300 						const char *intfmt,
301 					        struct syscall_arg *arg)
302 {
303 	struct strarray *sa = arg->parm;
304 	int idx = arg->val - sa->offset;
305 
306 	if (idx < 0 || idx >= sa->nr_entries)
307 		return scnprintf(bf, size, intfmt, arg->val);
308 
309 	return scnprintf(bf, size, "%s", sa->entries[idx]);
310 }
311 
312 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
313 					      struct syscall_arg *arg)
314 {
315 	return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
316 }
317 
318 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
319 
320 #if defined(__i386__) || defined(__x86_64__)
321 /*
322  * FIXME: Make this available to all arches as soon as the ioctl beautifier
323  * 	  gets rewritten to support all arches.
324  */
325 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
326 						 struct syscall_arg *arg)
327 {
328 	return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
329 }
330 
331 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
332 #endif /* defined(__i386__) || defined(__x86_64__) */
333 
334 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
335 					struct syscall_arg *arg);
336 
337 #define SCA_FD syscall_arg__scnprintf_fd
338 
339 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
340 					   struct syscall_arg *arg)
341 {
342 	int fd = arg->val;
343 
344 	if (fd == AT_FDCWD)
345 		return scnprintf(bf, size, "CWD");
346 
347 	return syscall_arg__scnprintf_fd(bf, size, arg);
348 }
349 
350 #define SCA_FDAT syscall_arg__scnprintf_fd_at
351 
352 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
353 					      struct syscall_arg *arg);
354 
355 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
356 
357 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
358 					 struct syscall_arg *arg)
359 {
360 	return scnprintf(bf, size, "%#lx", arg->val);
361 }
362 
363 #define SCA_HEX syscall_arg__scnprintf_hex
364 
365 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
366 					 struct syscall_arg *arg)
367 {
368 	return scnprintf(bf, size, "%d", arg->val);
369 }
370 
371 #define SCA_INT syscall_arg__scnprintf_int
372 
373 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
374 					       struct syscall_arg *arg)
375 {
376 	int printed = 0, prot = arg->val;
377 
378 	if (prot == PROT_NONE)
379 		return scnprintf(bf, size, "NONE");
380 #define	P_MMAP_PROT(n) \
381 	if (prot & PROT_##n) { \
382 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
383 		prot &= ~PROT_##n; \
384 	}
385 
386 	P_MMAP_PROT(EXEC);
387 	P_MMAP_PROT(READ);
388 	P_MMAP_PROT(WRITE);
389 #ifdef PROT_SEM
390 	P_MMAP_PROT(SEM);
391 #endif
392 	P_MMAP_PROT(GROWSDOWN);
393 	P_MMAP_PROT(GROWSUP);
394 #undef P_MMAP_PROT
395 
396 	if (prot)
397 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
398 
399 	return printed;
400 }
401 
402 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
403 
404 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
405 						struct syscall_arg *arg)
406 {
407 	int printed = 0, flags = arg->val;
408 
409 #define	P_MMAP_FLAG(n) \
410 	if (flags & MAP_##n) { \
411 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
412 		flags &= ~MAP_##n; \
413 	}
414 
415 	P_MMAP_FLAG(SHARED);
416 	P_MMAP_FLAG(PRIVATE);
417 #ifdef MAP_32BIT
418 	P_MMAP_FLAG(32BIT);
419 #endif
420 	P_MMAP_FLAG(ANONYMOUS);
421 	P_MMAP_FLAG(DENYWRITE);
422 	P_MMAP_FLAG(EXECUTABLE);
423 	P_MMAP_FLAG(FILE);
424 	P_MMAP_FLAG(FIXED);
425 	P_MMAP_FLAG(GROWSDOWN);
426 #ifdef MAP_HUGETLB
427 	P_MMAP_FLAG(HUGETLB);
428 #endif
429 	P_MMAP_FLAG(LOCKED);
430 	P_MMAP_FLAG(NONBLOCK);
431 	P_MMAP_FLAG(NORESERVE);
432 	P_MMAP_FLAG(POPULATE);
433 	P_MMAP_FLAG(STACK);
434 #ifdef MAP_UNINITIALIZED
435 	P_MMAP_FLAG(UNINITIALIZED);
436 #endif
437 #undef P_MMAP_FLAG
438 
439 	if (flags)
440 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
441 
442 	return printed;
443 }
444 
445 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
446 
447 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
448 						  struct syscall_arg *arg)
449 {
450 	int printed = 0, flags = arg->val;
451 
452 #define P_MREMAP_FLAG(n) \
453 	if (flags & MREMAP_##n) { \
454 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
455 		flags &= ~MREMAP_##n; \
456 	}
457 
458 	P_MREMAP_FLAG(MAYMOVE);
459 #ifdef MREMAP_FIXED
460 	P_MREMAP_FLAG(FIXED);
461 #endif
462 #undef P_MREMAP_FLAG
463 
464 	if (flags)
465 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
466 
467 	return printed;
468 }
469 
470 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
471 
472 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
473 						      struct syscall_arg *arg)
474 {
475 	int behavior = arg->val;
476 
477 	switch (behavior) {
478 #define	P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
479 	P_MADV_BHV(NORMAL);
480 	P_MADV_BHV(RANDOM);
481 	P_MADV_BHV(SEQUENTIAL);
482 	P_MADV_BHV(WILLNEED);
483 	P_MADV_BHV(DONTNEED);
484 	P_MADV_BHV(REMOVE);
485 	P_MADV_BHV(DONTFORK);
486 	P_MADV_BHV(DOFORK);
487 	P_MADV_BHV(HWPOISON);
488 #ifdef MADV_SOFT_OFFLINE
489 	P_MADV_BHV(SOFT_OFFLINE);
490 #endif
491 	P_MADV_BHV(MERGEABLE);
492 	P_MADV_BHV(UNMERGEABLE);
493 #ifdef MADV_HUGEPAGE
494 	P_MADV_BHV(HUGEPAGE);
495 #endif
496 #ifdef MADV_NOHUGEPAGE
497 	P_MADV_BHV(NOHUGEPAGE);
498 #endif
499 #ifdef MADV_DONTDUMP
500 	P_MADV_BHV(DONTDUMP);
501 #endif
502 #ifdef MADV_DODUMP
503 	P_MADV_BHV(DODUMP);
504 #endif
505 #undef P_MADV_PHV
506 	default: break;
507 	}
508 
509 	return scnprintf(bf, size, "%#x", behavior);
510 }
511 
512 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
513 
514 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
515 					   struct syscall_arg *arg)
516 {
517 	int printed = 0, op = arg->val;
518 
519 	if (op == 0)
520 		return scnprintf(bf, size, "NONE");
521 #define	P_CMD(cmd) \
522 	if ((op & LOCK_##cmd) == LOCK_##cmd) { \
523 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
524 		op &= ~LOCK_##cmd; \
525 	}
526 
527 	P_CMD(SH);
528 	P_CMD(EX);
529 	P_CMD(NB);
530 	P_CMD(UN);
531 	P_CMD(MAND);
532 	P_CMD(RW);
533 	P_CMD(READ);
534 	P_CMD(WRITE);
535 #undef P_OP
536 
537 	if (op)
538 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
539 
540 	return printed;
541 }
542 
543 #define SCA_FLOCK syscall_arg__scnprintf_flock
544 
545 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
546 {
547 	enum syscall_futex_args {
548 		SCF_UADDR   = (1 << 0),
549 		SCF_OP	    = (1 << 1),
550 		SCF_VAL	    = (1 << 2),
551 		SCF_TIMEOUT = (1 << 3),
552 		SCF_UADDR2  = (1 << 4),
553 		SCF_VAL3    = (1 << 5),
554 	};
555 	int op = arg->val;
556 	int cmd = op & FUTEX_CMD_MASK;
557 	size_t printed = 0;
558 
559 	switch (cmd) {
560 #define	P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
561 	P_FUTEX_OP(WAIT);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
562 	P_FUTEX_OP(WAKE);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
563 	P_FUTEX_OP(FD);		    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
564 	P_FUTEX_OP(REQUEUE);	    arg->mask |= SCF_VAL3|SCF_TIMEOUT;	          break;
565 	P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;			  break;
566 	P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;			  break;
567 	P_FUTEX_OP(WAKE_OP);							  break;
568 	P_FUTEX_OP(LOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
569 	P_FUTEX_OP(UNLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
570 	P_FUTEX_OP(TRYLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
571 	P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;			  break;
572 	P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;			  break;
573 	P_FUTEX_OP(WAIT_REQUEUE_PI);						  break;
574 	default: printed = scnprintf(bf, size, "%#x", cmd);			  break;
575 	}
576 
577 	if (op & FUTEX_PRIVATE_FLAG)
578 		printed += scnprintf(bf + printed, size - printed, "|PRIV");
579 
580 	if (op & FUTEX_CLOCK_REALTIME)
581 		printed += scnprintf(bf + printed, size - printed, "|CLKRT");
582 
583 	return printed;
584 }
585 
586 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
587 
588 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
589 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
590 
591 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
592 static DEFINE_STRARRAY(itimers);
593 
594 static const char *keyctl_options[] = {
595 	"GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
596 	"SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
597 	"INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
598 	"ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
599 	"INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
600 };
601 static DEFINE_STRARRAY(keyctl_options);
602 
603 static const char *whences[] = { "SET", "CUR", "END",
604 #ifdef SEEK_DATA
605 "DATA",
606 #endif
607 #ifdef SEEK_HOLE
608 "HOLE",
609 #endif
610 };
611 static DEFINE_STRARRAY(whences);
612 
613 static const char *fcntl_cmds[] = {
614 	"DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
615 	"SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
616 	"F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
617 	"F_GETOWNER_UIDS",
618 };
619 static DEFINE_STRARRAY(fcntl_cmds);
620 
621 static const char *rlimit_resources[] = {
622 	"CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
623 	"MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
624 	"RTTIME",
625 };
626 static DEFINE_STRARRAY(rlimit_resources);
627 
628 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
629 static DEFINE_STRARRAY(sighow);
630 
631 static const char *clockid[] = {
632 	"REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
633 	"MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
634 	"REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
635 };
636 static DEFINE_STRARRAY(clockid);
637 
638 static const char *socket_families[] = {
639 	"UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
640 	"BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
641 	"SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
642 	"RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
643 	"BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
644 	"ALG", "NFC", "VSOCK",
645 };
646 static DEFINE_STRARRAY(socket_families);
647 
648 #ifndef SOCK_TYPE_MASK
649 #define SOCK_TYPE_MASK 0xf
650 #endif
651 
652 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
653 						      struct syscall_arg *arg)
654 {
655 	size_t printed;
656 	int type = arg->val,
657 	    flags = type & ~SOCK_TYPE_MASK;
658 
659 	type &= SOCK_TYPE_MASK;
660 	/*
661  	 * Can't use a strarray, MIPS may override for ABI reasons.
662  	 */
663 	switch (type) {
664 #define	P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
665 	P_SK_TYPE(STREAM);
666 	P_SK_TYPE(DGRAM);
667 	P_SK_TYPE(RAW);
668 	P_SK_TYPE(RDM);
669 	P_SK_TYPE(SEQPACKET);
670 	P_SK_TYPE(DCCP);
671 	P_SK_TYPE(PACKET);
672 #undef P_SK_TYPE
673 	default:
674 		printed = scnprintf(bf, size, "%#x", type);
675 	}
676 
677 #define	P_SK_FLAG(n) \
678 	if (flags & SOCK_##n) { \
679 		printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
680 		flags &= ~SOCK_##n; \
681 	}
682 
683 	P_SK_FLAG(CLOEXEC);
684 	P_SK_FLAG(NONBLOCK);
685 #undef P_SK_FLAG
686 
687 	if (flags)
688 		printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
689 
690 	return printed;
691 }
692 
693 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
694 
695 #ifndef MSG_PROBE
696 #define MSG_PROBE	     0x10
697 #endif
698 #ifndef MSG_WAITFORONE
699 #define MSG_WAITFORONE	0x10000
700 #endif
701 #ifndef MSG_SENDPAGE_NOTLAST
702 #define MSG_SENDPAGE_NOTLAST 0x20000
703 #endif
704 #ifndef MSG_FASTOPEN
705 #define MSG_FASTOPEN	     0x20000000
706 #endif
707 
708 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
709 					       struct syscall_arg *arg)
710 {
711 	int printed = 0, flags = arg->val;
712 
713 	if (flags == 0)
714 		return scnprintf(bf, size, "NONE");
715 #define	P_MSG_FLAG(n) \
716 	if (flags & MSG_##n) { \
717 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
718 		flags &= ~MSG_##n; \
719 	}
720 
721 	P_MSG_FLAG(OOB);
722 	P_MSG_FLAG(PEEK);
723 	P_MSG_FLAG(DONTROUTE);
724 	P_MSG_FLAG(TRYHARD);
725 	P_MSG_FLAG(CTRUNC);
726 	P_MSG_FLAG(PROBE);
727 	P_MSG_FLAG(TRUNC);
728 	P_MSG_FLAG(DONTWAIT);
729 	P_MSG_FLAG(EOR);
730 	P_MSG_FLAG(WAITALL);
731 	P_MSG_FLAG(FIN);
732 	P_MSG_FLAG(SYN);
733 	P_MSG_FLAG(CONFIRM);
734 	P_MSG_FLAG(RST);
735 	P_MSG_FLAG(ERRQUEUE);
736 	P_MSG_FLAG(NOSIGNAL);
737 	P_MSG_FLAG(MORE);
738 	P_MSG_FLAG(WAITFORONE);
739 	P_MSG_FLAG(SENDPAGE_NOTLAST);
740 	P_MSG_FLAG(FASTOPEN);
741 	P_MSG_FLAG(CMSG_CLOEXEC);
742 #undef P_MSG_FLAG
743 
744 	if (flags)
745 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
746 
747 	return printed;
748 }
749 
750 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
751 
752 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
753 						 struct syscall_arg *arg)
754 {
755 	size_t printed = 0;
756 	int mode = arg->val;
757 
758 	if (mode == F_OK) /* 0 */
759 		return scnprintf(bf, size, "F");
760 #define	P_MODE(n) \
761 	if (mode & n##_OK) { \
762 		printed += scnprintf(bf + printed, size - printed, "%s", #n); \
763 		mode &= ~n##_OK; \
764 	}
765 
766 	P_MODE(R);
767 	P_MODE(W);
768 	P_MODE(X);
769 #undef P_MODE
770 
771 	if (mode)
772 		printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
773 
774 	return printed;
775 }
776 
777 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
778 
779 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
780 					      struct syscall_arg *arg);
781 
782 #define SCA_FILENAME syscall_arg__scnprintf_filename
783 
784 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
785 					       struct syscall_arg *arg)
786 {
787 	int printed = 0, flags = arg->val;
788 
789 	if (!(flags & O_CREAT))
790 		arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
791 
792 	if (flags == 0)
793 		return scnprintf(bf, size, "RDONLY");
794 #define	P_FLAG(n) \
795 	if (flags & O_##n) { \
796 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
797 		flags &= ~O_##n; \
798 	}
799 
800 	P_FLAG(APPEND);
801 	P_FLAG(ASYNC);
802 	P_FLAG(CLOEXEC);
803 	P_FLAG(CREAT);
804 	P_FLAG(DIRECT);
805 	P_FLAG(DIRECTORY);
806 	P_FLAG(EXCL);
807 	P_FLAG(LARGEFILE);
808 	P_FLAG(NOATIME);
809 	P_FLAG(NOCTTY);
810 #ifdef O_NONBLOCK
811 	P_FLAG(NONBLOCK);
812 #elif O_NDELAY
813 	P_FLAG(NDELAY);
814 #endif
815 #ifdef O_PATH
816 	P_FLAG(PATH);
817 #endif
818 	P_FLAG(RDWR);
819 #ifdef O_DSYNC
820 	if ((flags & O_SYNC) == O_SYNC)
821 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
822 	else {
823 		P_FLAG(DSYNC);
824 	}
825 #else
826 	P_FLAG(SYNC);
827 #endif
828 	P_FLAG(TRUNC);
829 	P_FLAG(WRONLY);
830 #undef P_FLAG
831 
832 	if (flags)
833 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
834 
835 	return printed;
836 }
837 
838 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
839 
840 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
841 						struct syscall_arg *arg)
842 {
843 	int printed = 0, flags = arg->val;
844 
845 	if (flags == 0)
846 		return 0;
847 
848 #define	P_FLAG(n) \
849 	if (flags & PERF_FLAG_##n) { \
850 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
851 		flags &= ~PERF_FLAG_##n; \
852 	}
853 
854 	P_FLAG(FD_NO_GROUP);
855 	P_FLAG(FD_OUTPUT);
856 	P_FLAG(PID_CGROUP);
857 	P_FLAG(FD_CLOEXEC);
858 #undef P_FLAG
859 
860 	if (flags)
861 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
862 
863 	return printed;
864 }
865 
866 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
867 
868 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
869 						   struct syscall_arg *arg)
870 {
871 	int printed = 0, flags = arg->val;
872 
873 	if (flags == 0)
874 		return scnprintf(bf, size, "NONE");
875 #define	P_FLAG(n) \
876 	if (flags & EFD_##n) { \
877 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
878 		flags &= ~EFD_##n; \
879 	}
880 
881 	P_FLAG(SEMAPHORE);
882 	P_FLAG(CLOEXEC);
883 	P_FLAG(NONBLOCK);
884 #undef P_FLAG
885 
886 	if (flags)
887 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
888 
889 	return printed;
890 }
891 
892 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
893 
894 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
895 						struct syscall_arg *arg)
896 {
897 	int printed = 0, flags = arg->val;
898 
899 #define	P_FLAG(n) \
900 	if (flags & O_##n) { \
901 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
902 		flags &= ~O_##n; \
903 	}
904 
905 	P_FLAG(CLOEXEC);
906 	P_FLAG(NONBLOCK);
907 #undef P_FLAG
908 
909 	if (flags)
910 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
911 
912 	return printed;
913 }
914 
915 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
916 
917 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
918 {
919 	int sig = arg->val;
920 
921 	switch (sig) {
922 #define	P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
923 	P_SIGNUM(HUP);
924 	P_SIGNUM(INT);
925 	P_SIGNUM(QUIT);
926 	P_SIGNUM(ILL);
927 	P_SIGNUM(TRAP);
928 	P_SIGNUM(ABRT);
929 	P_SIGNUM(BUS);
930 	P_SIGNUM(FPE);
931 	P_SIGNUM(KILL);
932 	P_SIGNUM(USR1);
933 	P_SIGNUM(SEGV);
934 	P_SIGNUM(USR2);
935 	P_SIGNUM(PIPE);
936 	P_SIGNUM(ALRM);
937 	P_SIGNUM(TERM);
938 	P_SIGNUM(CHLD);
939 	P_SIGNUM(CONT);
940 	P_SIGNUM(STOP);
941 	P_SIGNUM(TSTP);
942 	P_SIGNUM(TTIN);
943 	P_SIGNUM(TTOU);
944 	P_SIGNUM(URG);
945 	P_SIGNUM(XCPU);
946 	P_SIGNUM(XFSZ);
947 	P_SIGNUM(VTALRM);
948 	P_SIGNUM(PROF);
949 	P_SIGNUM(WINCH);
950 	P_SIGNUM(IO);
951 	P_SIGNUM(PWR);
952 	P_SIGNUM(SYS);
953 #ifdef SIGEMT
954 	P_SIGNUM(EMT);
955 #endif
956 #ifdef SIGSTKFLT
957 	P_SIGNUM(STKFLT);
958 #endif
959 #ifdef SIGSWI
960 	P_SIGNUM(SWI);
961 #endif
962 	default: break;
963 	}
964 
965 	return scnprintf(bf, size, "%#x", sig);
966 }
967 
968 #define SCA_SIGNUM syscall_arg__scnprintf_signum
969 
970 #if defined(__i386__) || defined(__x86_64__)
971 /*
972  * FIXME: Make this available to all arches.
973  */
974 #define TCGETS		0x5401
975 
976 static const char *tioctls[] = {
977 	"TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
978 	"TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
979 	"TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
980 	"TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
981 	"TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
982 	"TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
983 	"TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
984 	"TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
985 	"TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
986 	"TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
987 	"TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
988 	[0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
989 	"TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
990 	"TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
991 	"TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
992 };
993 
994 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
995 #endif /* defined(__i386__) || defined(__x86_64__) */
996 
997 #define STRARRAY(arg, name, array) \
998 	  .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
999 	  .arg_parm	 = { [arg] = &strarray__##array, }
1000 
1001 static struct syscall_fmt {
1002 	const char *name;
1003 	const char *alias;
1004 	size_t	   (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1005 	void	   *arg_parm[6];
1006 	bool	   errmsg;
1007 	bool	   timeout;
1008 	bool	   hexret;
1009 } syscall_fmts[] = {
1010 	{ .name	    = "access",	    .errmsg = true,
1011 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1012 			     [1] = SCA_ACCMODE,  /* mode */ }, },
1013 	{ .name	    = "arch_prctl", .errmsg = true, .alias = "prctl", },
1014 	{ .name	    = "brk",	    .hexret = true,
1015 	  .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
1016 	{ .name	    = "chdir",	    .errmsg = true,
1017 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1018 	{ .name	    = "chmod",	    .errmsg = true,
1019 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1020 	{ .name	    = "chroot",	    .errmsg = true,
1021 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1022 	{ .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
1023 	{ .name	    = "close",	    .errmsg = true,
1024 	  .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
1025 	{ .name	    = "connect",    .errmsg = true, },
1026 	{ .name	    = "creat",	    .errmsg = true,
1027 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1028 	{ .name	    = "dup",	    .errmsg = true,
1029 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1030 	{ .name	    = "dup2",	    .errmsg = true,
1031 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1032 	{ .name	    = "dup3",	    .errmsg = true,
1033 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1034 	{ .name	    = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1035 	{ .name	    = "eventfd2",   .errmsg = true,
1036 	  .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1037 	{ .name	    = "faccessat",  .errmsg = true,
1038 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1039 			     [1] = SCA_FILENAME, /* filename */ }, },
1040 	{ .name	    = "fadvise64",  .errmsg = true,
1041 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1042 	{ .name	    = "fallocate",  .errmsg = true,
1043 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1044 	{ .name	    = "fchdir",	    .errmsg = true,
1045 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1046 	{ .name	    = "fchmod",	    .errmsg = true,
1047 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1048 	{ .name	    = "fchmodat",   .errmsg = true,
1049 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1050 			     [1] = SCA_FILENAME, /* filename */ }, },
1051 	{ .name	    = "fchown",	    .errmsg = true,
1052 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1053 	{ .name	    = "fchownat",   .errmsg = true,
1054 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1055 			     [1] = SCA_FILENAME, /* filename */ }, },
1056 	{ .name	    = "fcntl",	    .errmsg = true,
1057 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1058 			     [1] = SCA_STRARRAY, /* cmd */ },
1059 	  .arg_parm	 = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1060 	{ .name	    = "fdatasync",  .errmsg = true,
1061 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1062 	{ .name	    = "flock",	    .errmsg = true,
1063 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1064 			     [1] = SCA_FLOCK, /* cmd */ }, },
1065 	{ .name	    = "fsetxattr",  .errmsg = true,
1066 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1067 	{ .name	    = "fstat",	    .errmsg = true, .alias = "newfstat",
1068 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1069 	{ .name	    = "fstatat",    .errmsg = true, .alias = "newfstatat",
1070 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1071 			     [1] = SCA_FILENAME, /* filename */ }, },
1072 	{ .name	    = "fstatfs",    .errmsg = true,
1073 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1074 	{ .name	    = "fsync",    .errmsg = true,
1075 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1076 	{ .name	    = "ftruncate", .errmsg = true,
1077 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1078 	{ .name	    = "futex",	    .errmsg = true,
1079 	  .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1080 	{ .name	    = "futimesat", .errmsg = true,
1081 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1082 			     [1] = SCA_FILENAME, /* filename */ }, },
1083 	{ .name	    = "getdents",   .errmsg = true,
1084 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1085 	{ .name	    = "getdents64", .errmsg = true,
1086 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1087 	{ .name	    = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1088 	{ .name	    = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1089 	{ .name	    = "getxattr",    .errmsg = true,
1090 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1091 	{ .name	    = "inotify_add_watch",	    .errmsg = true,
1092 	  .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
1093 	{ .name	    = "ioctl",	    .errmsg = true,
1094 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1095 #if defined(__i386__) || defined(__x86_64__)
1096 /*
1097  * FIXME: Make this available to all arches.
1098  */
1099 			     [1] = SCA_STRHEXARRAY, /* cmd */
1100 			     [2] = SCA_HEX, /* arg */ },
1101 	  .arg_parm	 = { [1] = &strarray__tioctls, /* cmd */ }, },
1102 #else
1103 			     [2] = SCA_HEX, /* arg */ }, },
1104 #endif
1105 	{ .name	    = "keyctl",	    .errmsg = true, STRARRAY(0, option, keyctl_options), },
1106 	{ .name	    = "kill",	    .errmsg = true,
1107 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1108 	{ .name	    = "lchown",    .errmsg = true,
1109 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1110 	{ .name	    = "lgetxattr",  .errmsg = true,
1111 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1112 	{ .name	    = "linkat",	    .errmsg = true,
1113 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1114 	{ .name	    = "listxattr",  .errmsg = true,
1115 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1116 	{ .name	    = "llistxattr", .errmsg = true,
1117 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1118 	{ .name	    = "lremovexattr",  .errmsg = true,
1119 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1120 	{ .name	    = "lseek",	    .errmsg = true,
1121 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1122 			     [2] = SCA_STRARRAY, /* whence */ },
1123 	  .arg_parm	 = { [2] = &strarray__whences, /* whence */ }, },
1124 	{ .name	    = "lsetxattr",  .errmsg = true,
1125 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1126 	{ .name	    = "lstat",	    .errmsg = true, .alias = "newlstat",
1127 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1128 	{ .name	    = "lsxattr",    .errmsg = true,
1129 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1130 	{ .name     = "madvise",    .errmsg = true,
1131 	  .arg_scnprintf = { [0] = SCA_HEX,	 /* start */
1132 			     [2] = SCA_MADV_BHV, /* behavior */ }, },
1133 	{ .name	    = "mkdir",    .errmsg = true,
1134 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1135 	{ .name	    = "mkdirat",    .errmsg = true,
1136 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1137 			     [1] = SCA_FILENAME, /* pathname */ }, },
1138 	{ .name	    = "mknod",      .errmsg = true,
1139 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1140 	{ .name	    = "mknodat",    .errmsg = true,
1141 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1142 			     [1] = SCA_FILENAME, /* filename */ }, },
1143 	{ .name	    = "mlock",	    .errmsg = true,
1144 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1145 	{ .name	    = "mlockall",   .errmsg = true,
1146 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1147 	{ .name	    = "mmap",	    .hexret = true,
1148 	  .arg_scnprintf = { [0] = SCA_HEX,	  /* addr */
1149 			     [2] = SCA_MMAP_PROT, /* prot */
1150 			     [3] = SCA_MMAP_FLAGS, /* flags */
1151 			     [4] = SCA_FD, 	  /* fd */ }, },
1152 	{ .name	    = "mprotect",   .errmsg = true,
1153 	  .arg_scnprintf = { [0] = SCA_HEX, /* start */
1154 			     [2] = SCA_MMAP_PROT, /* prot */ }, },
1155 	{ .name	    = "mq_unlink", .errmsg = true,
1156 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
1157 	{ .name	    = "mremap",	    .hexret = true,
1158 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1159 			     [3] = SCA_MREMAP_FLAGS, /* flags */
1160 			     [4] = SCA_HEX, /* new_addr */ }, },
1161 	{ .name	    = "munlock",    .errmsg = true,
1162 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1163 	{ .name	    = "munmap",	    .errmsg = true,
1164 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1165 	{ .name	    = "name_to_handle_at", .errmsg = true,
1166 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1167 	{ .name	    = "newfstatat", .errmsg = true,
1168 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1169 			     [1] = SCA_FILENAME, /* filename */ }, },
1170 	{ .name	    = "open",	    .errmsg = true,
1171 	  .arg_scnprintf = { [0] = SCA_FILENAME,   /* filename */
1172 			     [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1173 	{ .name	    = "open_by_handle_at", .errmsg = true,
1174 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1175 			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1176 	{ .name	    = "openat",	    .errmsg = true,
1177 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1178 			     [1] = SCA_FILENAME, /* filename */
1179 			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1180 	{ .name	    = "perf_event_open", .errmsg = true,
1181 	  .arg_scnprintf = { [1] = SCA_INT, /* pid */
1182 			     [2] = SCA_INT, /* cpu */
1183 			     [3] = SCA_FD,  /* group_fd */
1184 			     [4] = SCA_PERF_FLAGS,  /* flags */ }, },
1185 	{ .name	    = "pipe2",	    .errmsg = true,
1186 	  .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1187 	{ .name	    = "poll",	    .errmsg = true, .timeout = true, },
1188 	{ .name	    = "ppoll",	    .errmsg = true, .timeout = true, },
1189 	{ .name	    = "pread",	    .errmsg = true, .alias = "pread64",
1190 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1191 	{ .name	    = "preadv",	    .errmsg = true, .alias = "pread",
1192 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1193 	{ .name	    = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1194 	{ .name	    = "pwrite",	    .errmsg = true, .alias = "pwrite64",
1195 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1196 	{ .name	    = "pwritev",    .errmsg = true,
1197 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1198 	{ .name	    = "read",	    .errmsg = true,
1199 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1200 	{ .name	    = "readlink",   .errmsg = true,
1201 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1202 	{ .name	    = "readlinkat", .errmsg = true,
1203 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1204 			     [1] = SCA_FILENAME, /* pathname */ }, },
1205 	{ .name	    = "readv",	    .errmsg = true,
1206 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1207 	{ .name	    = "recvfrom",   .errmsg = true,
1208 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1209 			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
1210 	{ .name	    = "recvmmsg",   .errmsg = true,
1211 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1212 			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
1213 	{ .name	    = "recvmsg",    .errmsg = true,
1214 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1215 			     [2] = SCA_MSG_FLAGS, /* flags */ }, },
1216 	{ .name	    = "removexattr", .errmsg = true,
1217 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1218 	{ .name	    = "renameat",   .errmsg = true,
1219 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1220 	{ .name	    = "rmdir",    .errmsg = true,
1221 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1222 	{ .name	    = "rt_sigaction", .errmsg = true,
1223 	  .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1224 	{ .name	    = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1225 	{ .name	    = "rt_sigqueueinfo", .errmsg = true,
1226 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1227 	{ .name	    = "rt_tgsigqueueinfo", .errmsg = true,
1228 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1229 	{ .name	    = "select",	    .errmsg = true, .timeout = true, },
1230 	{ .name	    = "sendmmsg",    .errmsg = true,
1231 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1232 			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
1233 	{ .name	    = "sendmsg",    .errmsg = true,
1234 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1235 			     [2] = SCA_MSG_FLAGS, /* flags */ }, },
1236 	{ .name	    = "sendto",	    .errmsg = true,
1237 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1238 			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
1239 	{ .name	    = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1240 	{ .name	    = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1241 	{ .name	    = "setxattr",   .errmsg = true,
1242 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1243 	{ .name	    = "shutdown",   .errmsg = true,
1244 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1245 	{ .name	    = "socket",	    .errmsg = true,
1246 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1247 			     [1] = SCA_SK_TYPE, /* type */ },
1248 	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
1249 	{ .name	    = "socketpair", .errmsg = true,
1250 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1251 			     [1] = SCA_SK_TYPE, /* type */ },
1252 	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
1253 	{ .name	    = "stat",	    .errmsg = true, .alias = "newstat",
1254 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1255 	{ .name	    = "statfs",	    .errmsg = true,
1256 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1257 	{ .name	    = "swapoff",    .errmsg = true,
1258 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1259 	{ .name	    = "swapon",	    .errmsg = true,
1260 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1261 	{ .name	    = "symlinkat",  .errmsg = true,
1262 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1263 	{ .name	    = "tgkill",	    .errmsg = true,
1264 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1265 	{ .name	    = "tkill",	    .errmsg = true,
1266 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1267 	{ .name	    = "truncate",   .errmsg = true,
1268 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1269 	{ .name	    = "uname",	    .errmsg = true, .alias = "newuname", },
1270 	{ .name	    = "unlinkat",   .errmsg = true,
1271 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1272 			     [1] = SCA_FILENAME, /* pathname */ }, },
1273 	{ .name	    = "utime",  .errmsg = true,
1274 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1275 	{ .name	    = "utimensat",  .errmsg = true,
1276 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
1277 			     [1] = SCA_FILENAME, /* filename */ }, },
1278 	{ .name	    = "utimes",  .errmsg = true,
1279 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1280 	{ .name	    = "vmsplice",  .errmsg = true,
1281 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1282 	{ .name	    = "write",	    .errmsg = true,
1283 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1284 	{ .name	    = "writev",	    .errmsg = true,
1285 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1286 };
1287 
1288 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1289 {
1290 	const struct syscall_fmt *fmt = fmtp;
1291 	return strcmp(name, fmt->name);
1292 }
1293 
1294 static struct syscall_fmt *syscall_fmt__find(const char *name)
1295 {
1296 	const int nmemb = ARRAY_SIZE(syscall_fmts);
1297 	return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1298 }
1299 
1300 struct syscall {
1301 	struct event_format *tp_format;
1302 	int		    nr_args;
1303 	struct format_field *args;
1304 	const char	    *name;
1305 	bool		    is_exit;
1306 	struct syscall_fmt  *fmt;
1307 	size_t		    (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1308 	void		    **arg_parm;
1309 };
1310 
1311 static size_t fprintf_duration(unsigned long t, FILE *fp)
1312 {
1313 	double duration = (double)t / NSEC_PER_MSEC;
1314 	size_t printed = fprintf(fp, "(");
1315 
1316 	if (duration >= 1.0)
1317 		printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1318 	else if (duration >= 0.01)
1319 		printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1320 	else
1321 		printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1322 	return printed + fprintf(fp, "): ");
1323 }
1324 
1325 /**
1326  * filename.ptr: The filename char pointer that will be vfs_getname'd
1327  * filename.entry_str_pos: Where to insert the string translated from
1328  *                         filename.ptr by the vfs_getname tracepoint/kprobe.
1329  */
1330 struct thread_trace {
1331 	u64		  entry_time;
1332 	u64		  exit_time;
1333 	bool		  entry_pending;
1334 	unsigned long	  nr_events;
1335 	unsigned long	  pfmaj, pfmin;
1336 	char		  *entry_str;
1337 	double		  runtime_ms;
1338         struct {
1339 		unsigned long ptr;
1340 		short int     entry_str_pos;
1341 		bool	      pending_open;
1342 		unsigned int  namelen;
1343 		char	      *name;
1344 	} filename;
1345 	struct {
1346 		int	  max;
1347 		char	  **table;
1348 	} paths;
1349 
1350 	struct intlist *syscall_stats;
1351 };
1352 
1353 static struct thread_trace *thread_trace__new(void)
1354 {
1355 	struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1356 
1357 	if (ttrace)
1358 		ttrace->paths.max = -1;
1359 
1360 	ttrace->syscall_stats = intlist__new(NULL);
1361 
1362 	return ttrace;
1363 }
1364 
1365 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1366 {
1367 	struct thread_trace *ttrace;
1368 
1369 	if (thread == NULL)
1370 		goto fail;
1371 
1372 	if (thread__priv(thread) == NULL)
1373 		thread__set_priv(thread, thread_trace__new());
1374 
1375 	if (thread__priv(thread) == NULL)
1376 		goto fail;
1377 
1378 	ttrace = thread__priv(thread);
1379 	++ttrace->nr_events;
1380 
1381 	return ttrace;
1382 fail:
1383 	color_fprintf(fp, PERF_COLOR_RED,
1384 		      "WARNING: not enough memory, dropping samples!\n");
1385 	return NULL;
1386 }
1387 
1388 #define TRACE_PFMAJ		(1 << 0)
1389 #define TRACE_PFMIN		(1 << 1)
1390 
1391 static const size_t trace__entry_str_size = 2048;
1392 
1393 struct trace {
1394 	struct perf_tool	tool;
1395 	struct {
1396 		int		machine;
1397 		int		open_id;
1398 	}			audit;
1399 	struct {
1400 		int		max;
1401 		struct syscall  *table;
1402 		struct {
1403 			struct perf_evsel *sys_enter,
1404 					  *sys_exit;
1405 		}		events;
1406 	} syscalls;
1407 	struct record_opts	opts;
1408 	struct perf_evlist	*evlist;
1409 	struct machine		*host;
1410 	struct thread		*current;
1411 	u64			base_time;
1412 	FILE			*output;
1413 	unsigned long		nr_events;
1414 	struct strlist		*ev_qualifier;
1415 	struct {
1416 		size_t		nr;
1417 		int		*entries;
1418 	}			ev_qualifier_ids;
1419 	struct intlist		*tid_list;
1420 	struct intlist		*pid_list;
1421 	struct {
1422 		size_t		nr;
1423 		pid_t		*entries;
1424 	}			filter_pids;
1425 	double			duration_filter;
1426 	double			runtime_ms;
1427 	struct {
1428 		u64		vfs_getname,
1429 				proc_getname;
1430 	} stats;
1431 	bool			not_ev_qualifier;
1432 	bool			live;
1433 	bool			full_time;
1434 	bool			sched;
1435 	bool			multiple_threads;
1436 	bool			summary;
1437 	bool			summary_only;
1438 	bool			show_comm;
1439 	bool			show_tool_stats;
1440 	bool			trace_syscalls;
1441 	bool			force;
1442 	bool			vfs_getname;
1443 	int			trace_pgfaults;
1444 };
1445 
1446 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1447 {
1448 	struct thread_trace *ttrace = thread__priv(thread);
1449 
1450 	if (fd > ttrace->paths.max) {
1451 		char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1452 
1453 		if (npath == NULL)
1454 			return -1;
1455 
1456 		if (ttrace->paths.max != -1) {
1457 			memset(npath + ttrace->paths.max + 1, 0,
1458 			       (fd - ttrace->paths.max) * sizeof(char *));
1459 		} else {
1460 			memset(npath, 0, (fd + 1) * sizeof(char *));
1461 		}
1462 
1463 		ttrace->paths.table = npath;
1464 		ttrace->paths.max   = fd;
1465 	}
1466 
1467 	ttrace->paths.table[fd] = strdup(pathname);
1468 
1469 	return ttrace->paths.table[fd] != NULL ? 0 : -1;
1470 }
1471 
1472 static int thread__read_fd_path(struct thread *thread, int fd)
1473 {
1474 	char linkname[PATH_MAX], pathname[PATH_MAX];
1475 	struct stat st;
1476 	int ret;
1477 
1478 	if (thread->pid_ == thread->tid) {
1479 		scnprintf(linkname, sizeof(linkname),
1480 			  "/proc/%d/fd/%d", thread->pid_, fd);
1481 	} else {
1482 		scnprintf(linkname, sizeof(linkname),
1483 			  "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1484 	}
1485 
1486 	if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1487 		return -1;
1488 
1489 	ret = readlink(linkname, pathname, sizeof(pathname));
1490 
1491 	if (ret < 0 || ret > st.st_size)
1492 		return -1;
1493 
1494 	pathname[ret] = '\0';
1495 	return trace__set_fd_pathname(thread, fd, pathname);
1496 }
1497 
1498 static const char *thread__fd_path(struct thread *thread, int fd,
1499 				   struct trace *trace)
1500 {
1501 	struct thread_trace *ttrace = thread__priv(thread);
1502 
1503 	if (ttrace == NULL)
1504 		return NULL;
1505 
1506 	if (fd < 0)
1507 		return NULL;
1508 
1509 	if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1510 		if (!trace->live)
1511 			return NULL;
1512 		++trace->stats.proc_getname;
1513 		if (thread__read_fd_path(thread, fd))
1514 			return NULL;
1515 	}
1516 
1517 	return ttrace->paths.table[fd];
1518 }
1519 
1520 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1521 					struct syscall_arg *arg)
1522 {
1523 	int fd = arg->val;
1524 	size_t printed = scnprintf(bf, size, "%d", fd);
1525 	const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1526 
1527 	if (path)
1528 		printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1529 
1530 	return printed;
1531 }
1532 
1533 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1534 					      struct syscall_arg *arg)
1535 {
1536 	int fd = arg->val;
1537 	size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1538 	struct thread_trace *ttrace = thread__priv(arg->thread);
1539 
1540 	if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1541 		zfree(&ttrace->paths.table[fd]);
1542 
1543 	return printed;
1544 }
1545 
1546 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1547 				     unsigned long ptr)
1548 {
1549 	struct thread_trace *ttrace = thread__priv(thread);
1550 
1551 	ttrace->filename.ptr = ptr;
1552 	ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1553 }
1554 
1555 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1556 					      struct syscall_arg *arg)
1557 {
1558 	unsigned long ptr = arg->val;
1559 
1560 	if (!arg->trace->vfs_getname)
1561 		return scnprintf(bf, size, "%#x", ptr);
1562 
1563 	thread__set_filename_pos(arg->thread, bf, ptr);
1564 	return 0;
1565 }
1566 
1567 static bool trace__filter_duration(struct trace *trace, double t)
1568 {
1569 	return t < (trace->duration_filter * NSEC_PER_MSEC);
1570 }
1571 
1572 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1573 {
1574 	double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1575 
1576 	return fprintf(fp, "%10.3f ", ts);
1577 }
1578 
1579 static bool done = false;
1580 static bool interrupted = false;
1581 
1582 static void sig_handler(int sig)
1583 {
1584 	done = true;
1585 	interrupted = sig == SIGINT;
1586 }
1587 
1588 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1589 					u64 duration, u64 tstamp, FILE *fp)
1590 {
1591 	size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1592 	printed += fprintf_duration(duration, fp);
1593 
1594 	if (trace->multiple_threads) {
1595 		if (trace->show_comm)
1596 			printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1597 		printed += fprintf(fp, "%d ", thread->tid);
1598 	}
1599 
1600 	return printed;
1601 }
1602 
1603 static int trace__process_event(struct trace *trace, struct machine *machine,
1604 				union perf_event *event, struct perf_sample *sample)
1605 {
1606 	int ret = 0;
1607 
1608 	switch (event->header.type) {
1609 	case PERF_RECORD_LOST:
1610 		color_fprintf(trace->output, PERF_COLOR_RED,
1611 			      "LOST %" PRIu64 " events!\n", event->lost.lost);
1612 		ret = machine__process_lost_event(machine, event, sample);
1613 	default:
1614 		ret = machine__process_event(machine, event, sample);
1615 		break;
1616 	}
1617 
1618 	return ret;
1619 }
1620 
1621 static int trace__tool_process(struct perf_tool *tool,
1622 			       union perf_event *event,
1623 			       struct perf_sample *sample,
1624 			       struct machine *machine)
1625 {
1626 	struct trace *trace = container_of(tool, struct trace, tool);
1627 	return trace__process_event(trace, machine, event, sample);
1628 }
1629 
1630 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1631 {
1632 	int err = symbol__init(NULL);
1633 
1634 	if (err)
1635 		return err;
1636 
1637 	trace->host = machine__new_host();
1638 	if (trace->host == NULL)
1639 		return -ENOMEM;
1640 
1641 	if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1642 		return -errno;
1643 
1644 	err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1645 					    evlist->threads, trace__tool_process, false,
1646 					    trace->opts.proc_map_timeout);
1647 	if (err)
1648 		symbol__exit();
1649 
1650 	return err;
1651 }
1652 
1653 static int syscall__set_arg_fmts(struct syscall *sc)
1654 {
1655 	struct format_field *field;
1656 	int idx = 0;
1657 
1658 	sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1659 	if (sc->arg_scnprintf == NULL)
1660 		return -1;
1661 
1662 	if (sc->fmt)
1663 		sc->arg_parm = sc->fmt->arg_parm;
1664 
1665 	for (field = sc->args; field; field = field->next) {
1666 		if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1667 			sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1668 		else if (field->flags & FIELD_IS_POINTER)
1669 			sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1670 		++idx;
1671 	}
1672 
1673 	return 0;
1674 }
1675 
1676 static int trace__read_syscall_info(struct trace *trace, int id)
1677 {
1678 	char tp_name[128];
1679 	struct syscall *sc;
1680 	const char *name = audit_syscall_to_name(id, trace->audit.machine);
1681 
1682 	if (name == NULL)
1683 		return -1;
1684 
1685 	if (id > trace->syscalls.max) {
1686 		struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1687 
1688 		if (nsyscalls == NULL)
1689 			return -1;
1690 
1691 		if (trace->syscalls.max != -1) {
1692 			memset(nsyscalls + trace->syscalls.max + 1, 0,
1693 			       (id - trace->syscalls.max) * sizeof(*sc));
1694 		} else {
1695 			memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1696 		}
1697 
1698 		trace->syscalls.table = nsyscalls;
1699 		trace->syscalls.max   = id;
1700 	}
1701 
1702 	sc = trace->syscalls.table + id;
1703 	sc->name = name;
1704 
1705 	sc->fmt  = syscall_fmt__find(sc->name);
1706 
1707 	snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1708 	sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1709 
1710 	if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1711 		snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1712 		sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1713 	}
1714 
1715 	if (IS_ERR(sc->tp_format))
1716 		return -1;
1717 
1718 	sc->args = sc->tp_format->format.fields;
1719 	sc->nr_args = sc->tp_format->format.nr_fields;
1720 	/* drop nr field - not relevant here; does not exist on older kernels */
1721 	if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1722 		sc->args = sc->args->next;
1723 		--sc->nr_args;
1724 	}
1725 
1726 	sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1727 
1728 	return syscall__set_arg_fmts(sc);
1729 }
1730 
1731 static int trace__validate_ev_qualifier(struct trace *trace)
1732 {
1733 	int err = 0, i;
1734 	struct str_node *pos;
1735 
1736 	trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1737 	trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1738 						 sizeof(trace->ev_qualifier_ids.entries[0]));
1739 
1740 	if (trace->ev_qualifier_ids.entries == NULL) {
1741 		fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1742 		       trace->output);
1743 		err = -EINVAL;
1744 		goto out;
1745 	}
1746 
1747 	i = 0;
1748 
1749 	strlist__for_each(pos, trace->ev_qualifier) {
1750 		const char *sc = pos->s;
1751 		int id = audit_name_to_syscall(sc, trace->audit.machine);
1752 
1753 		if (id < 0) {
1754 			if (err == 0) {
1755 				fputs("Error:\tInvalid syscall ", trace->output);
1756 				err = -EINVAL;
1757 			} else {
1758 				fputs(", ", trace->output);
1759 			}
1760 
1761 			fputs(sc, trace->output);
1762 		}
1763 
1764 		trace->ev_qualifier_ids.entries[i++] = id;
1765 	}
1766 
1767 	if (err < 0) {
1768 		fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1769 		      "\nHint:\tand: 'man syscalls'\n", trace->output);
1770 		zfree(&trace->ev_qualifier_ids.entries);
1771 		trace->ev_qualifier_ids.nr = 0;
1772 	}
1773 out:
1774 	return err;
1775 }
1776 
1777 /*
1778  * args is to be interpreted as a series of longs but we need to handle
1779  * 8-byte unaligned accesses. args points to raw_data within the event
1780  * and raw_data is guaranteed to be 8-byte unaligned because it is
1781  * preceded by raw_size which is a u32. So we need to copy args to a temp
1782  * variable to read it. Most notably this avoids extended load instructions
1783  * on unaligned addresses
1784  */
1785 
1786 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1787 				      unsigned char *args, struct trace *trace,
1788 				      struct thread *thread)
1789 {
1790 	size_t printed = 0;
1791 	unsigned char *p;
1792 	unsigned long val;
1793 
1794 	if (sc->args != NULL) {
1795 		struct format_field *field;
1796 		u8 bit = 1;
1797 		struct syscall_arg arg = {
1798 			.idx	= 0,
1799 			.mask	= 0,
1800 			.trace  = trace,
1801 			.thread = thread,
1802 		};
1803 
1804 		for (field = sc->args; field;
1805 		     field = field->next, ++arg.idx, bit <<= 1) {
1806 			if (arg.mask & bit)
1807 				continue;
1808 
1809 			/* special care for unaligned accesses */
1810 			p = args + sizeof(unsigned long) * arg.idx;
1811 			memcpy(&val, p, sizeof(val));
1812 
1813 			/*
1814  			 * Suppress this argument if its value is zero and
1815  			 * and we don't have a string associated in an
1816  			 * strarray for it.
1817  			 */
1818 			if (val == 0 &&
1819 			    !(sc->arg_scnprintf &&
1820 			      sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1821 			      sc->arg_parm[arg.idx]))
1822 				continue;
1823 
1824 			printed += scnprintf(bf + printed, size - printed,
1825 					     "%s%s: ", printed ? ", " : "", field->name);
1826 			if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1827 				arg.val = val;
1828 				if (sc->arg_parm)
1829 					arg.parm = sc->arg_parm[arg.idx];
1830 				printed += sc->arg_scnprintf[arg.idx](bf + printed,
1831 								      size - printed, &arg);
1832 			} else {
1833 				printed += scnprintf(bf + printed, size - printed,
1834 						     "%ld", val);
1835 			}
1836 		}
1837 	} else {
1838 		int i = 0;
1839 
1840 		while (i < 6) {
1841 			/* special care for unaligned accesses */
1842 			p = args + sizeof(unsigned long) * i;
1843 			memcpy(&val, p, sizeof(val));
1844 			printed += scnprintf(bf + printed, size - printed,
1845 					     "%sarg%d: %ld",
1846 					     printed ? ", " : "", i, val);
1847 			++i;
1848 		}
1849 	}
1850 
1851 	return printed;
1852 }
1853 
1854 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1855 				  union perf_event *event,
1856 				  struct perf_sample *sample);
1857 
1858 static struct syscall *trace__syscall_info(struct trace *trace,
1859 					   struct perf_evsel *evsel, int id)
1860 {
1861 
1862 	if (id < 0) {
1863 
1864 		/*
1865 		 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1866 		 * before that, leaving at a higher verbosity level till that is
1867 		 * explained. Reproduced with plain ftrace with:
1868 		 *
1869 		 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1870 		 * grep "NR -1 " /t/trace_pipe
1871 		 *
1872 		 * After generating some load on the machine.
1873  		 */
1874 		if (verbose > 1) {
1875 			static u64 n;
1876 			fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1877 				id, perf_evsel__name(evsel), ++n);
1878 		}
1879 		return NULL;
1880 	}
1881 
1882 	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1883 	    trace__read_syscall_info(trace, id))
1884 		goto out_cant_read;
1885 
1886 	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1887 		goto out_cant_read;
1888 
1889 	return &trace->syscalls.table[id];
1890 
1891 out_cant_read:
1892 	if (verbose) {
1893 		fprintf(trace->output, "Problems reading syscall %d", id);
1894 		if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1895 			fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1896 		fputs(" information\n", trace->output);
1897 	}
1898 	return NULL;
1899 }
1900 
1901 static void thread__update_stats(struct thread_trace *ttrace,
1902 				 int id, struct perf_sample *sample)
1903 {
1904 	struct int_node *inode;
1905 	struct stats *stats;
1906 	u64 duration = 0;
1907 
1908 	inode = intlist__findnew(ttrace->syscall_stats, id);
1909 	if (inode == NULL)
1910 		return;
1911 
1912 	stats = inode->priv;
1913 	if (stats == NULL) {
1914 		stats = malloc(sizeof(struct stats));
1915 		if (stats == NULL)
1916 			return;
1917 		init_stats(stats);
1918 		inode->priv = stats;
1919 	}
1920 
1921 	if (ttrace->entry_time && sample->time > ttrace->entry_time)
1922 		duration = sample->time - ttrace->entry_time;
1923 
1924 	update_stats(stats, duration);
1925 }
1926 
1927 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1928 {
1929 	struct thread_trace *ttrace;
1930 	u64 duration;
1931 	size_t printed;
1932 
1933 	if (trace->current == NULL)
1934 		return 0;
1935 
1936 	ttrace = thread__priv(trace->current);
1937 
1938 	if (!ttrace->entry_pending)
1939 		return 0;
1940 
1941 	duration = sample->time - ttrace->entry_time;
1942 
1943 	printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1944 	printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1945 	ttrace->entry_pending = false;
1946 
1947 	return printed;
1948 }
1949 
1950 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1951 			    union perf_event *event __maybe_unused,
1952 			    struct perf_sample *sample)
1953 {
1954 	char *msg;
1955 	void *args;
1956 	size_t printed = 0;
1957 	struct thread *thread;
1958 	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1959 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
1960 	struct thread_trace *ttrace;
1961 
1962 	if (sc == NULL)
1963 		return -1;
1964 
1965 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1966 	ttrace = thread__trace(thread, trace->output);
1967 	if (ttrace == NULL)
1968 		goto out_put;
1969 
1970 	args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1971 
1972 	if (ttrace->entry_str == NULL) {
1973 		ttrace->entry_str = malloc(trace__entry_str_size);
1974 		if (!ttrace->entry_str)
1975 			goto out_put;
1976 	}
1977 
1978 	if (!trace->summary_only)
1979 		trace__printf_interrupted_entry(trace, sample);
1980 
1981 	ttrace->entry_time = sample->time;
1982 	msg = ttrace->entry_str;
1983 	printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1984 
1985 	printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1986 					   args, trace, thread);
1987 
1988 	if (sc->is_exit) {
1989 		if (!trace->duration_filter && !trace->summary_only) {
1990 			trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1991 			fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1992 		}
1993 	} else {
1994 		ttrace->entry_pending = true;
1995 		/* See trace__vfs_getname & trace__sys_exit */
1996 		ttrace->filename.pending_open = false;
1997 	}
1998 
1999 	if (trace->current != thread) {
2000 		thread__put(trace->current);
2001 		trace->current = thread__get(thread);
2002 	}
2003 	err = 0;
2004 out_put:
2005 	thread__put(thread);
2006 	return err;
2007 }
2008 
2009 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
2010 			   union perf_event *event __maybe_unused,
2011 			   struct perf_sample *sample)
2012 {
2013 	long ret;
2014 	u64 duration = 0;
2015 	struct thread *thread;
2016 	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2017 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
2018 	struct thread_trace *ttrace;
2019 
2020 	if (sc == NULL)
2021 		return -1;
2022 
2023 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2024 	ttrace = thread__trace(thread, trace->output);
2025 	if (ttrace == NULL)
2026 		goto out_put;
2027 
2028 	if (trace->summary)
2029 		thread__update_stats(ttrace, id, sample);
2030 
2031 	ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
2032 
2033 	if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) {
2034 		trace__set_fd_pathname(thread, ret, ttrace->filename.name);
2035 		ttrace->filename.pending_open = false;
2036 		++trace->stats.vfs_getname;
2037 	}
2038 
2039 	ttrace->exit_time = sample->time;
2040 
2041 	if (ttrace->entry_time) {
2042 		duration = sample->time - ttrace->entry_time;
2043 		if (trace__filter_duration(trace, duration))
2044 			goto out;
2045 	} else if (trace->duration_filter)
2046 		goto out;
2047 
2048 	if (trace->summary_only)
2049 		goto out;
2050 
2051 	trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
2052 
2053 	if (ttrace->entry_pending) {
2054 		fprintf(trace->output, "%-70s", ttrace->entry_str);
2055 	} else {
2056 		fprintf(trace->output, " ... [");
2057 		color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2058 		fprintf(trace->output, "]: %s()", sc->name);
2059 	}
2060 
2061 	if (sc->fmt == NULL) {
2062 signed_print:
2063 		fprintf(trace->output, ") = %ld", ret);
2064 	} else if (ret < 0 && sc->fmt->errmsg) {
2065 		char bf[STRERR_BUFSIZE];
2066 		const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
2067 			   *e = audit_errno_to_name(-ret);
2068 
2069 		fprintf(trace->output, ") = -1 %s %s", e, emsg);
2070 	} else if (ret == 0 && sc->fmt->timeout)
2071 		fprintf(trace->output, ") = 0 Timeout");
2072 	else if (sc->fmt->hexret)
2073 		fprintf(trace->output, ") = %#lx", ret);
2074 	else
2075 		goto signed_print;
2076 
2077 	fputc('\n', trace->output);
2078 out:
2079 	ttrace->entry_pending = false;
2080 	err = 0;
2081 out_put:
2082 	thread__put(thread);
2083 	return err;
2084 }
2085 
2086 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2087 			      union perf_event *event __maybe_unused,
2088 			      struct perf_sample *sample)
2089 {
2090 	struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2091 	struct thread_trace *ttrace;
2092 	size_t filename_len, entry_str_len, to_move;
2093 	ssize_t remaining_space;
2094 	char *pos;
2095 	const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
2096 
2097 	if (!thread)
2098 		goto out;
2099 
2100 	ttrace = thread__priv(thread);
2101 	if (!ttrace)
2102 		goto out;
2103 
2104 	filename_len = strlen(filename);
2105 
2106 	if (ttrace->filename.namelen < filename_len) {
2107 		char *f = realloc(ttrace->filename.name, filename_len + 1);
2108 
2109 		if (f == NULL)
2110 				goto out;
2111 
2112 		ttrace->filename.namelen = filename_len;
2113 		ttrace->filename.name = f;
2114 	}
2115 
2116 	strcpy(ttrace->filename.name, filename);
2117 	ttrace->filename.pending_open = true;
2118 
2119 	if (!ttrace->filename.ptr)
2120 		goto out;
2121 
2122 	entry_str_len = strlen(ttrace->entry_str);
2123 	remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2124 	if (remaining_space <= 0)
2125 		goto out;
2126 
2127 	if (filename_len > (size_t)remaining_space) {
2128 		filename += filename_len - remaining_space;
2129 		filename_len = remaining_space;
2130 	}
2131 
2132 	to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2133 	pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2134 	memmove(pos + filename_len, pos, to_move);
2135 	memcpy(pos, filename, filename_len);
2136 
2137 	ttrace->filename.ptr = 0;
2138 	ttrace->filename.entry_str_pos = 0;
2139 out:
2140 	return 0;
2141 }
2142 
2143 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2144 				     union perf_event *event __maybe_unused,
2145 				     struct perf_sample *sample)
2146 {
2147         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2148 	double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2149 	struct thread *thread = machine__findnew_thread(trace->host,
2150 							sample->pid,
2151 							sample->tid);
2152 	struct thread_trace *ttrace = thread__trace(thread, trace->output);
2153 
2154 	if (ttrace == NULL)
2155 		goto out_dump;
2156 
2157 	ttrace->runtime_ms += runtime_ms;
2158 	trace->runtime_ms += runtime_ms;
2159 	thread__put(thread);
2160 	return 0;
2161 
2162 out_dump:
2163 	fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2164 	       evsel->name,
2165 	       perf_evsel__strval(evsel, sample, "comm"),
2166 	       (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2167 	       runtime,
2168 	       perf_evsel__intval(evsel, sample, "vruntime"));
2169 	thread__put(thread);
2170 	return 0;
2171 }
2172 
2173 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2174 				union perf_event *event __maybe_unused,
2175 				struct perf_sample *sample)
2176 {
2177 	trace__printf_interrupted_entry(trace, sample);
2178 	trace__fprintf_tstamp(trace, sample->time, trace->output);
2179 
2180 	if (trace->trace_syscalls)
2181 		fprintf(trace->output, "(         ): ");
2182 
2183 	fprintf(trace->output, "%s:", evsel->name);
2184 
2185 	if (evsel->tp_format) {
2186 		event_format__fprintf(evsel->tp_format, sample->cpu,
2187 				      sample->raw_data, sample->raw_size,
2188 				      trace->output);
2189 	}
2190 
2191 	fprintf(trace->output, ")\n");
2192 	return 0;
2193 }
2194 
2195 static void print_location(FILE *f, struct perf_sample *sample,
2196 			   struct addr_location *al,
2197 			   bool print_dso, bool print_sym)
2198 {
2199 
2200 	if ((verbose || print_dso) && al->map)
2201 		fprintf(f, "%s@", al->map->dso->long_name);
2202 
2203 	if ((verbose || print_sym) && al->sym)
2204 		fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2205 			al->addr - al->sym->start);
2206 	else if (al->map)
2207 		fprintf(f, "0x%" PRIx64, al->addr);
2208 	else
2209 		fprintf(f, "0x%" PRIx64, sample->addr);
2210 }
2211 
2212 static int trace__pgfault(struct trace *trace,
2213 			  struct perf_evsel *evsel,
2214 			  union perf_event *event,
2215 			  struct perf_sample *sample)
2216 {
2217 	struct thread *thread;
2218 	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2219 	struct addr_location al;
2220 	char map_type = 'd';
2221 	struct thread_trace *ttrace;
2222 	int err = -1;
2223 
2224 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2225 	ttrace = thread__trace(thread, trace->output);
2226 	if (ttrace == NULL)
2227 		goto out_put;
2228 
2229 	if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2230 		ttrace->pfmaj++;
2231 	else
2232 		ttrace->pfmin++;
2233 
2234 	if (trace->summary_only)
2235 		goto out;
2236 
2237 	thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
2238 			      sample->ip, &al);
2239 
2240 	trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2241 
2242 	fprintf(trace->output, "%sfault [",
2243 		evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2244 		"maj" : "min");
2245 
2246 	print_location(trace->output, sample, &al, false, true);
2247 
2248 	fprintf(trace->output, "] => ");
2249 
2250 	thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
2251 				   sample->addr, &al);
2252 
2253 	if (!al.map) {
2254 		thread__find_addr_location(thread, cpumode,
2255 					   MAP__FUNCTION, sample->addr, &al);
2256 
2257 		if (al.map)
2258 			map_type = 'x';
2259 		else
2260 			map_type = '?';
2261 	}
2262 
2263 	print_location(trace->output, sample, &al, true, false);
2264 
2265 	fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2266 out:
2267 	err = 0;
2268 out_put:
2269 	thread__put(thread);
2270 	return err;
2271 }
2272 
2273 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2274 {
2275 	if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2276 	    (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2277 		return false;
2278 
2279 	if (trace->pid_list || trace->tid_list)
2280 		return true;
2281 
2282 	return false;
2283 }
2284 
2285 static int trace__process_sample(struct perf_tool *tool,
2286 				 union perf_event *event,
2287 				 struct perf_sample *sample,
2288 				 struct perf_evsel *evsel,
2289 				 struct machine *machine __maybe_unused)
2290 {
2291 	struct trace *trace = container_of(tool, struct trace, tool);
2292 	int err = 0;
2293 
2294 	tracepoint_handler handler = evsel->handler;
2295 
2296 	if (skip_sample(trace, sample))
2297 		return 0;
2298 
2299 	if (!trace->full_time && trace->base_time == 0)
2300 		trace->base_time = sample->time;
2301 
2302 	if (handler) {
2303 		++trace->nr_events;
2304 		handler(trace, evsel, event, sample);
2305 	}
2306 
2307 	return err;
2308 }
2309 
2310 static int parse_target_str(struct trace *trace)
2311 {
2312 	if (trace->opts.target.pid) {
2313 		trace->pid_list = intlist__new(trace->opts.target.pid);
2314 		if (trace->pid_list == NULL) {
2315 			pr_err("Error parsing process id string\n");
2316 			return -EINVAL;
2317 		}
2318 	}
2319 
2320 	if (trace->opts.target.tid) {
2321 		trace->tid_list = intlist__new(trace->opts.target.tid);
2322 		if (trace->tid_list == NULL) {
2323 			pr_err("Error parsing thread id string\n");
2324 			return -EINVAL;
2325 		}
2326 	}
2327 
2328 	return 0;
2329 }
2330 
2331 static int trace__record(struct trace *trace, int argc, const char **argv)
2332 {
2333 	unsigned int rec_argc, i, j;
2334 	const char **rec_argv;
2335 	const char * const record_args[] = {
2336 		"record",
2337 		"-R",
2338 		"-m", "1024",
2339 		"-c", "1",
2340 	};
2341 
2342 	const char * const sc_args[] = { "-e", };
2343 	unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2344 	const char * const majpf_args[] = { "-e", "major-faults" };
2345 	unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2346 	const char * const minpf_args[] = { "-e", "minor-faults" };
2347 	unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2348 
2349 	/* +1 is for the event string below */
2350 	rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2351 		majpf_args_nr + minpf_args_nr + argc;
2352 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
2353 
2354 	if (rec_argv == NULL)
2355 		return -ENOMEM;
2356 
2357 	j = 0;
2358 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
2359 		rec_argv[j++] = record_args[i];
2360 
2361 	if (trace->trace_syscalls) {
2362 		for (i = 0; i < sc_args_nr; i++)
2363 			rec_argv[j++] = sc_args[i];
2364 
2365 		/* event string may be different for older kernels - e.g., RHEL6 */
2366 		if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2367 			rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2368 		else if (is_valid_tracepoint("syscalls:sys_enter"))
2369 			rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2370 		else {
2371 			pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2372 			return -1;
2373 		}
2374 	}
2375 
2376 	if (trace->trace_pgfaults & TRACE_PFMAJ)
2377 		for (i = 0; i < majpf_args_nr; i++)
2378 			rec_argv[j++] = majpf_args[i];
2379 
2380 	if (trace->trace_pgfaults & TRACE_PFMIN)
2381 		for (i = 0; i < minpf_args_nr; i++)
2382 			rec_argv[j++] = minpf_args[i];
2383 
2384 	for (i = 0; i < (unsigned int)argc; i++)
2385 		rec_argv[j++] = argv[i];
2386 
2387 	return cmd_record(j, rec_argv, NULL);
2388 }
2389 
2390 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2391 
2392 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2393 {
2394 	struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2395 
2396 	if (IS_ERR(evsel))
2397 		return false;
2398 
2399 	if (perf_evsel__field(evsel, "pathname") == NULL) {
2400 		perf_evsel__delete(evsel);
2401 		return false;
2402 	}
2403 
2404 	evsel->handler = trace__vfs_getname;
2405 	perf_evlist__add(evlist, evsel);
2406 	return true;
2407 }
2408 
2409 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2410 				    u64 config)
2411 {
2412 	struct perf_evsel *evsel;
2413 	struct perf_event_attr attr = {
2414 		.type = PERF_TYPE_SOFTWARE,
2415 		.mmap_data = 1,
2416 	};
2417 
2418 	attr.config = config;
2419 	attr.sample_period = 1;
2420 
2421 	event_attr_init(&attr);
2422 
2423 	evsel = perf_evsel__new(&attr);
2424 	if (!evsel)
2425 		return -ENOMEM;
2426 
2427 	evsel->handler = trace__pgfault;
2428 	perf_evlist__add(evlist, evsel);
2429 
2430 	return 0;
2431 }
2432 
2433 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2434 {
2435 	const u32 type = event->header.type;
2436 	struct perf_evsel *evsel;
2437 
2438 	if (!trace->full_time && trace->base_time == 0)
2439 		trace->base_time = sample->time;
2440 
2441 	if (type != PERF_RECORD_SAMPLE) {
2442 		trace__process_event(trace, trace->host, event, sample);
2443 		return;
2444 	}
2445 
2446 	evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2447 	if (evsel == NULL) {
2448 		fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2449 		return;
2450 	}
2451 
2452 	if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2453 	    sample->raw_data == NULL) {
2454 		fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2455 		       perf_evsel__name(evsel), sample->tid,
2456 		       sample->cpu, sample->raw_size);
2457 	} else {
2458 		tracepoint_handler handler = evsel->handler;
2459 		handler(trace, evsel, event, sample);
2460 	}
2461 }
2462 
2463 static int trace__add_syscall_newtp(struct trace *trace)
2464 {
2465 	int ret = -1;
2466 	struct perf_evlist *evlist = trace->evlist;
2467 	struct perf_evsel *sys_enter, *sys_exit;
2468 
2469 	sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2470 	if (sys_enter == NULL)
2471 		goto out;
2472 
2473 	if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2474 		goto out_delete_sys_enter;
2475 
2476 	sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2477 	if (sys_exit == NULL)
2478 		goto out_delete_sys_enter;
2479 
2480 	if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2481 		goto out_delete_sys_exit;
2482 
2483 	perf_evlist__add(evlist, sys_enter);
2484 	perf_evlist__add(evlist, sys_exit);
2485 
2486 	trace->syscalls.events.sys_enter = sys_enter;
2487 	trace->syscalls.events.sys_exit  = sys_exit;
2488 
2489 	ret = 0;
2490 out:
2491 	return ret;
2492 
2493 out_delete_sys_exit:
2494 	perf_evsel__delete_priv(sys_exit);
2495 out_delete_sys_enter:
2496 	perf_evsel__delete_priv(sys_enter);
2497 	goto out;
2498 }
2499 
2500 static int trace__set_ev_qualifier_filter(struct trace *trace)
2501 {
2502 	int err = -1;
2503 	char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2504 						trace->ev_qualifier_ids.nr,
2505 						trace->ev_qualifier_ids.entries);
2506 
2507 	if (filter == NULL)
2508 		goto out_enomem;
2509 
2510 	if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2511 		err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2512 
2513 	free(filter);
2514 out:
2515 	return err;
2516 out_enomem:
2517 	errno = ENOMEM;
2518 	goto out;
2519 }
2520 
2521 static int trace__run(struct trace *trace, int argc, const char **argv)
2522 {
2523 	struct perf_evlist *evlist = trace->evlist;
2524 	struct perf_evsel *evsel;
2525 	int err = -1, i;
2526 	unsigned long before;
2527 	const bool forks = argc > 0;
2528 	bool draining = false;
2529 
2530 	trace->live = true;
2531 
2532 	if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2533 		goto out_error_raw_syscalls;
2534 
2535 	if (trace->trace_syscalls)
2536 		trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2537 
2538 	if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2539 	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2540 		goto out_error_mem;
2541 	}
2542 
2543 	if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2544 	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2545 		goto out_error_mem;
2546 
2547 	if (trace->sched &&
2548 	    perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2549 				   trace__sched_stat_runtime))
2550 		goto out_error_sched_stat_runtime;
2551 
2552 	err = perf_evlist__create_maps(evlist, &trace->opts.target);
2553 	if (err < 0) {
2554 		fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2555 		goto out_delete_evlist;
2556 	}
2557 
2558 	err = trace__symbols_init(trace, evlist);
2559 	if (err < 0) {
2560 		fprintf(trace->output, "Problems initializing symbol libraries!\n");
2561 		goto out_delete_evlist;
2562 	}
2563 
2564 	perf_evlist__config(evlist, &trace->opts);
2565 
2566 	signal(SIGCHLD, sig_handler);
2567 	signal(SIGINT, sig_handler);
2568 
2569 	if (forks) {
2570 		err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2571 						    argv, false, NULL);
2572 		if (err < 0) {
2573 			fprintf(trace->output, "Couldn't run the workload!\n");
2574 			goto out_delete_evlist;
2575 		}
2576 	}
2577 
2578 	err = perf_evlist__open(evlist);
2579 	if (err < 0)
2580 		goto out_error_open;
2581 
2582 	/*
2583 	 * Better not use !target__has_task() here because we need to cover the
2584 	 * case where no threads were specified in the command line, but a
2585 	 * workload was, and in that case we will fill in the thread_map when
2586 	 * we fork the workload in perf_evlist__prepare_workload.
2587 	 */
2588 	if (trace->filter_pids.nr > 0)
2589 		err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2590 	else if (thread_map__pid(evlist->threads, 0) == -1)
2591 		err = perf_evlist__set_filter_pid(evlist, getpid());
2592 
2593 	if (err < 0)
2594 		goto out_error_mem;
2595 
2596 	if (trace->ev_qualifier_ids.nr > 0) {
2597 		err = trace__set_ev_qualifier_filter(trace);
2598 		if (err < 0)
2599 			goto out_errno;
2600 
2601 		pr_debug("event qualifier tracepoint filter: %s\n",
2602 			 trace->syscalls.events.sys_exit->filter);
2603 	}
2604 
2605 	err = perf_evlist__apply_filters(evlist, &evsel);
2606 	if (err < 0)
2607 		goto out_error_apply_filters;
2608 
2609 	err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2610 	if (err < 0)
2611 		goto out_error_mmap;
2612 
2613 	if (!target__none(&trace->opts.target))
2614 		perf_evlist__enable(evlist);
2615 
2616 	if (forks)
2617 		perf_evlist__start_workload(evlist);
2618 
2619 	trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2620 				  evlist->threads->nr > 1 ||
2621 				  perf_evlist__first(evlist)->attr.inherit;
2622 again:
2623 	before = trace->nr_events;
2624 
2625 	for (i = 0; i < evlist->nr_mmaps; i++) {
2626 		union perf_event *event;
2627 
2628 		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2629 			struct perf_sample sample;
2630 
2631 			++trace->nr_events;
2632 
2633 			err = perf_evlist__parse_sample(evlist, event, &sample);
2634 			if (err) {
2635 				fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2636 				goto next_event;
2637 			}
2638 
2639 			trace__handle_event(trace, event, &sample);
2640 next_event:
2641 			perf_evlist__mmap_consume(evlist, i);
2642 
2643 			if (interrupted)
2644 				goto out_disable;
2645 
2646 			if (done && !draining) {
2647 				perf_evlist__disable(evlist);
2648 				draining = true;
2649 			}
2650 		}
2651 	}
2652 
2653 	if (trace->nr_events == before) {
2654 		int timeout = done ? 100 : -1;
2655 
2656 		if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2657 			if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2658 				draining = true;
2659 
2660 			goto again;
2661 		}
2662 	} else {
2663 		goto again;
2664 	}
2665 
2666 out_disable:
2667 	thread__zput(trace->current);
2668 
2669 	perf_evlist__disable(evlist);
2670 
2671 	if (!err) {
2672 		if (trace->summary)
2673 			trace__fprintf_thread_summary(trace, trace->output);
2674 
2675 		if (trace->show_tool_stats) {
2676 			fprintf(trace->output, "Stats:\n "
2677 					       " vfs_getname : %" PRIu64 "\n"
2678 					       " proc_getname: %" PRIu64 "\n",
2679 				trace->stats.vfs_getname,
2680 				trace->stats.proc_getname);
2681 		}
2682 	}
2683 
2684 out_delete_evlist:
2685 	perf_evlist__delete(evlist);
2686 	trace->evlist = NULL;
2687 	trace->live = false;
2688 	return err;
2689 {
2690 	char errbuf[BUFSIZ];
2691 
2692 out_error_sched_stat_runtime:
2693 	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2694 	goto out_error;
2695 
2696 out_error_raw_syscalls:
2697 	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2698 	goto out_error;
2699 
2700 out_error_mmap:
2701 	perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2702 	goto out_error;
2703 
2704 out_error_open:
2705 	perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2706 
2707 out_error:
2708 	fprintf(trace->output, "%s\n", errbuf);
2709 	goto out_delete_evlist;
2710 
2711 out_error_apply_filters:
2712 	fprintf(trace->output,
2713 		"Failed to set filter \"%s\" on event %s with %d (%s)\n",
2714 		evsel->filter, perf_evsel__name(evsel), errno,
2715 		strerror_r(errno, errbuf, sizeof(errbuf)));
2716 	goto out_delete_evlist;
2717 }
2718 out_error_mem:
2719 	fprintf(trace->output, "Not enough memory to run!\n");
2720 	goto out_delete_evlist;
2721 
2722 out_errno:
2723 	fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2724 	goto out_delete_evlist;
2725 }
2726 
2727 static int trace__replay(struct trace *trace)
2728 {
2729 	const struct perf_evsel_str_handler handlers[] = {
2730 		{ "probe:vfs_getname",	     trace__vfs_getname, },
2731 	};
2732 	struct perf_data_file file = {
2733 		.path  = input_name,
2734 		.mode  = PERF_DATA_MODE_READ,
2735 		.force = trace->force,
2736 	};
2737 	struct perf_session *session;
2738 	struct perf_evsel *evsel;
2739 	int err = -1;
2740 
2741 	trace->tool.sample	  = trace__process_sample;
2742 	trace->tool.mmap	  = perf_event__process_mmap;
2743 	trace->tool.mmap2	  = perf_event__process_mmap2;
2744 	trace->tool.comm	  = perf_event__process_comm;
2745 	trace->tool.exit	  = perf_event__process_exit;
2746 	trace->tool.fork	  = perf_event__process_fork;
2747 	trace->tool.attr	  = perf_event__process_attr;
2748 	trace->tool.tracing_data = perf_event__process_tracing_data;
2749 	trace->tool.build_id	  = perf_event__process_build_id;
2750 
2751 	trace->tool.ordered_events = true;
2752 	trace->tool.ordering_requires_timestamps = true;
2753 
2754 	/* add tid to output */
2755 	trace->multiple_threads = true;
2756 
2757 	session = perf_session__new(&file, false, &trace->tool);
2758 	if (session == NULL)
2759 		return -1;
2760 
2761 	if (symbol__init(&session->header.env) < 0)
2762 		goto out;
2763 
2764 	trace->host = &session->machines.host;
2765 
2766 	err = perf_session__set_tracepoints_handlers(session, handlers);
2767 	if (err)
2768 		goto out;
2769 
2770 	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2771 						     "raw_syscalls:sys_enter");
2772 	/* older kernels have syscalls tp versus raw_syscalls */
2773 	if (evsel == NULL)
2774 		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2775 							     "syscalls:sys_enter");
2776 
2777 	if (evsel &&
2778 	    (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2779 	    perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2780 		pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2781 		goto out;
2782 	}
2783 
2784 	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2785 						     "raw_syscalls:sys_exit");
2786 	if (evsel == NULL)
2787 		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2788 							     "syscalls:sys_exit");
2789 	if (evsel &&
2790 	    (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2791 	    perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2792 		pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2793 		goto out;
2794 	}
2795 
2796 	evlist__for_each(session->evlist, evsel) {
2797 		if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2798 		    (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2799 		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2800 		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2801 			evsel->handler = trace__pgfault;
2802 	}
2803 
2804 	err = parse_target_str(trace);
2805 	if (err != 0)
2806 		goto out;
2807 
2808 	setup_pager();
2809 
2810 	err = perf_session__process_events(session);
2811 	if (err)
2812 		pr_err("Failed to process events, error %d", err);
2813 
2814 	else if (trace->summary)
2815 		trace__fprintf_thread_summary(trace, trace->output);
2816 
2817 out:
2818 	perf_session__delete(session);
2819 
2820 	return err;
2821 }
2822 
2823 static size_t trace__fprintf_threads_header(FILE *fp)
2824 {
2825 	size_t printed;
2826 
2827 	printed  = fprintf(fp, "\n Summary of events:\n\n");
2828 
2829 	return printed;
2830 }
2831 
2832 static size_t thread__dump_stats(struct thread_trace *ttrace,
2833 				 struct trace *trace, FILE *fp)
2834 {
2835 	struct stats *stats;
2836 	size_t printed = 0;
2837 	struct syscall *sc;
2838 	struct int_node *inode = intlist__first(ttrace->syscall_stats);
2839 
2840 	if (inode == NULL)
2841 		return 0;
2842 
2843 	printed += fprintf(fp, "\n");
2844 
2845 	printed += fprintf(fp, "   syscall            calls    total       min       avg       max      stddev\n");
2846 	printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
2847 	printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
2848 
2849 	/* each int_node is a syscall */
2850 	while (inode) {
2851 		stats = inode->priv;
2852 		if (stats) {
2853 			double min = (double)(stats->min) / NSEC_PER_MSEC;
2854 			double max = (double)(stats->max) / NSEC_PER_MSEC;
2855 			double avg = avg_stats(stats);
2856 			double pct;
2857 			u64 n = (u64) stats->n;
2858 
2859 			pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2860 			avg /= NSEC_PER_MSEC;
2861 
2862 			sc = &trace->syscalls.table[inode->i];
2863 			printed += fprintf(fp, "   %-15s", sc->name);
2864 			printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2865 					   n, avg * n, min, avg);
2866 			printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2867 		}
2868 
2869 		inode = intlist__next(inode);
2870 	}
2871 
2872 	printed += fprintf(fp, "\n\n");
2873 
2874 	return printed;
2875 }
2876 
2877 /* struct used to pass data to per-thread function */
2878 struct summary_data {
2879 	FILE *fp;
2880 	struct trace *trace;
2881 	size_t printed;
2882 };
2883 
2884 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2885 {
2886 	struct summary_data *data = priv;
2887 	FILE *fp = data->fp;
2888 	size_t printed = data->printed;
2889 	struct trace *trace = data->trace;
2890 	struct thread_trace *ttrace = thread__priv(thread);
2891 	double ratio;
2892 
2893 	if (ttrace == NULL)
2894 		return 0;
2895 
2896 	ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2897 
2898 	printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2899 	printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2900 	printed += fprintf(fp, "%.1f%%", ratio);
2901 	if (ttrace->pfmaj)
2902 		printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2903 	if (ttrace->pfmin)
2904 		printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2905 	printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2906 	printed += thread__dump_stats(ttrace, trace, fp);
2907 
2908 	data->printed += printed;
2909 
2910 	return 0;
2911 }
2912 
2913 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2914 {
2915 	struct summary_data data = {
2916 		.fp = fp,
2917 		.trace = trace
2918 	};
2919 	data.printed = trace__fprintf_threads_header(fp);
2920 
2921 	machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2922 
2923 	return data.printed;
2924 }
2925 
2926 static int trace__set_duration(const struct option *opt, const char *str,
2927 			       int unset __maybe_unused)
2928 {
2929 	struct trace *trace = opt->value;
2930 
2931 	trace->duration_filter = atof(str);
2932 	return 0;
2933 }
2934 
2935 static int trace__set_filter_pids(const struct option *opt, const char *str,
2936 				  int unset __maybe_unused)
2937 {
2938 	int ret = -1;
2939 	size_t i;
2940 	struct trace *trace = opt->value;
2941 	/*
2942 	 * FIXME: introduce a intarray class, plain parse csv and create a
2943 	 * { int nr, int entries[] } struct...
2944 	 */
2945 	struct intlist *list = intlist__new(str);
2946 
2947 	if (list == NULL)
2948 		return -1;
2949 
2950 	i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2951 	trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2952 
2953 	if (trace->filter_pids.entries == NULL)
2954 		goto out;
2955 
2956 	trace->filter_pids.entries[0] = getpid();
2957 
2958 	for (i = 1; i < trace->filter_pids.nr; ++i)
2959 		trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2960 
2961 	intlist__delete(list);
2962 	ret = 0;
2963 out:
2964 	return ret;
2965 }
2966 
2967 static int trace__open_output(struct trace *trace, const char *filename)
2968 {
2969 	struct stat st;
2970 
2971 	if (!stat(filename, &st) && st.st_size) {
2972 		char oldname[PATH_MAX];
2973 
2974 		scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2975 		unlink(oldname);
2976 		rename(filename, oldname);
2977 	}
2978 
2979 	trace->output = fopen(filename, "w");
2980 
2981 	return trace->output == NULL ? -errno : 0;
2982 }
2983 
2984 static int parse_pagefaults(const struct option *opt, const char *str,
2985 			    int unset __maybe_unused)
2986 {
2987 	int *trace_pgfaults = opt->value;
2988 
2989 	if (strcmp(str, "all") == 0)
2990 		*trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2991 	else if (strcmp(str, "maj") == 0)
2992 		*trace_pgfaults |= TRACE_PFMAJ;
2993 	else if (strcmp(str, "min") == 0)
2994 		*trace_pgfaults |= TRACE_PFMIN;
2995 	else
2996 		return -1;
2997 
2998 	return 0;
2999 }
3000 
3001 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
3002 {
3003 	struct perf_evsel *evsel;
3004 
3005 	evlist__for_each(evlist, evsel)
3006 		evsel->handler = handler;
3007 }
3008 
3009 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
3010 {
3011 	const char *trace_usage[] = {
3012 		"perf trace [<options>] [<command>]",
3013 		"perf trace [<options>] -- <command> [<options>]",
3014 		"perf trace record [<options>] [<command>]",
3015 		"perf trace record [<options>] -- <command> [<options>]",
3016 		NULL
3017 	};
3018 	struct trace trace = {
3019 		.audit = {
3020 			.machine = audit_detect_machine(),
3021 			.open_id = audit_name_to_syscall("open", trace.audit.machine),
3022 		},
3023 		.syscalls = {
3024 			. max = -1,
3025 		},
3026 		.opts = {
3027 			.target = {
3028 				.uid	   = UINT_MAX,
3029 				.uses_mmap = true,
3030 			},
3031 			.user_freq     = UINT_MAX,
3032 			.user_interval = ULLONG_MAX,
3033 			.no_buffering  = true,
3034 			.mmap_pages    = UINT_MAX,
3035 			.proc_map_timeout  = 500,
3036 		},
3037 		.output = stderr,
3038 		.show_comm = true,
3039 		.trace_syscalls = true,
3040 	};
3041 	const char *output_name = NULL;
3042 	const char *ev_qualifier_str = NULL;
3043 	const struct option trace_options[] = {
3044 	OPT_CALLBACK(0, "event", &trace.evlist, "event",
3045 		     "event selector. use 'perf list' to list available events",
3046 		     parse_events_option),
3047 	OPT_BOOLEAN(0, "comm", &trace.show_comm,
3048 		    "show the thread COMM next to its id"),
3049 	OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
3050 	OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
3051 	OPT_STRING('o', "output", &output_name, "file", "output file name"),
3052 	OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
3053 	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3054 		    "trace events on existing process id"),
3055 	OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
3056 		    "trace events on existing thread id"),
3057 	OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3058 		     "pids to filter (by the kernel)", trace__set_filter_pids),
3059 	OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
3060 		    "system-wide collection from all CPUs"),
3061 	OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
3062 		    "list of cpus to monitor"),
3063 	OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
3064 		    "child tasks do not inherit counters"),
3065 	OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3066 		     "number of mmap data pages",
3067 		     perf_evlist__parse_mmap_pages),
3068 	OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3069 		   "user to profile"),
3070 	OPT_CALLBACK(0, "duration", &trace, "float",
3071 		     "show only events with duration > N.M ms",
3072 		     trace__set_duration),
3073 	OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3074 	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3075 	OPT_BOOLEAN('T', "time", &trace.full_time,
3076 		    "Show full timestamp, not time relative to first start"),
3077 	OPT_BOOLEAN('s', "summary", &trace.summary_only,
3078 		    "Show only syscall summary with statistics"),
3079 	OPT_BOOLEAN('S', "with-summary", &trace.summary,
3080 		    "Show all syscalls and summary with statistics"),
3081 	OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3082 		     "Trace pagefaults", parse_pagefaults, "maj"),
3083 	OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3084 	OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3085 	OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3086 			"per thread proc mmap processing timeout in ms"),
3087 	OPT_END()
3088 	};
3089 	const char * const trace_subcommands[] = { "record", NULL };
3090 	int err;
3091 	char bf[BUFSIZ];
3092 
3093 	signal(SIGSEGV, sighandler_dump_stack);
3094 	signal(SIGFPE, sighandler_dump_stack);
3095 
3096 	trace.evlist = perf_evlist__new();
3097 
3098 	if (trace.evlist == NULL) {
3099 		pr_err("Not enough memory to run!\n");
3100 		err = -ENOMEM;
3101 		goto out;
3102 	}
3103 
3104 	argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3105 				 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3106 
3107 	if (trace.trace_pgfaults) {
3108 		trace.opts.sample_address = true;
3109 		trace.opts.sample_time = true;
3110 	}
3111 
3112 	if (trace.evlist->nr_entries > 0)
3113 		evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3114 
3115 	if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3116 		return trace__record(&trace, argc-1, &argv[1]);
3117 
3118 	/* summary_only implies summary option, but don't overwrite summary if set */
3119 	if (trace.summary_only)
3120 		trace.summary = trace.summary_only;
3121 
3122 	if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3123 	    trace.evlist->nr_entries == 0 /* Was --events used? */) {
3124 		pr_err("Please specify something to trace.\n");
3125 		return -1;
3126 	}
3127 
3128 	if (output_name != NULL) {
3129 		err = trace__open_output(&trace, output_name);
3130 		if (err < 0) {
3131 			perror("failed to create output file");
3132 			goto out;
3133 		}
3134 	}
3135 
3136 	if (ev_qualifier_str != NULL) {
3137 		const char *s = ev_qualifier_str;
3138 		struct strlist_config slist_config = {
3139 			.dirname = system_path(STRACE_GROUPS_DIR),
3140 		};
3141 
3142 		trace.not_ev_qualifier = *s == '!';
3143 		if (trace.not_ev_qualifier)
3144 			++s;
3145 		trace.ev_qualifier = strlist__new(s, &slist_config);
3146 		if (trace.ev_qualifier == NULL) {
3147 			fputs("Not enough memory to parse event qualifier",
3148 			      trace.output);
3149 			err = -ENOMEM;
3150 			goto out_close;
3151 		}
3152 
3153 		err = trace__validate_ev_qualifier(&trace);
3154 		if (err)
3155 			goto out_close;
3156 	}
3157 
3158 	err = target__validate(&trace.opts.target);
3159 	if (err) {
3160 		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3161 		fprintf(trace.output, "%s", bf);
3162 		goto out_close;
3163 	}
3164 
3165 	err = target__parse_uid(&trace.opts.target);
3166 	if (err) {
3167 		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3168 		fprintf(trace.output, "%s", bf);
3169 		goto out_close;
3170 	}
3171 
3172 	if (!argc && target__none(&trace.opts.target))
3173 		trace.opts.target.system_wide = true;
3174 
3175 	if (input_name)
3176 		err = trace__replay(&trace);
3177 	else
3178 		err = trace__run(&trace, argc, argv);
3179 
3180 out_close:
3181 	if (output_name != NULL)
3182 		fclose(trace.output);
3183 out:
3184 	return err;
3185 }
3186