1 #include <traceevent/event-parse.h> 2 #include "builtin.h" 3 #include "util/color.h" 4 #include "util/debug.h" 5 #include "util/evlist.h" 6 #include "util/machine.h" 7 #include "util/session.h" 8 #include "util/thread.h" 9 #include "util/parse-options.h" 10 #include "util/strlist.h" 11 #include "util/intlist.h" 12 #include "util/thread_map.h" 13 #include "util/stat.h" 14 15 #include <libaudit.h> 16 #include <stdlib.h> 17 #include <sys/eventfd.h> 18 #include <sys/mman.h> 19 #include <linux/futex.h> 20 21 /* For older distros: */ 22 #ifndef MAP_STACK 23 # define MAP_STACK 0x20000 24 #endif 25 26 #ifndef MADV_HWPOISON 27 # define MADV_HWPOISON 100 28 #endif 29 30 #ifndef MADV_MERGEABLE 31 # define MADV_MERGEABLE 12 32 #endif 33 34 #ifndef MADV_UNMERGEABLE 35 # define MADV_UNMERGEABLE 13 36 #endif 37 38 struct tp_field { 39 int offset; 40 union { 41 u64 (*integer)(struct tp_field *field, struct perf_sample *sample); 42 void *(*pointer)(struct tp_field *field, struct perf_sample *sample); 43 }; 44 }; 45 46 #define TP_UINT_FIELD(bits) \ 47 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \ 48 { \ 49 return *(u##bits *)(sample->raw_data + field->offset); \ 50 } 51 52 TP_UINT_FIELD(8); 53 TP_UINT_FIELD(16); 54 TP_UINT_FIELD(32); 55 TP_UINT_FIELD(64); 56 57 #define TP_UINT_FIELD__SWAPPED(bits) \ 58 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \ 59 { \ 60 u##bits value = *(u##bits *)(sample->raw_data + field->offset); \ 61 return bswap_##bits(value);\ 62 } 63 64 TP_UINT_FIELD__SWAPPED(16); 65 TP_UINT_FIELD__SWAPPED(32); 66 TP_UINT_FIELD__SWAPPED(64); 67 68 static int tp_field__init_uint(struct tp_field *field, 69 struct format_field *format_field, 70 bool needs_swap) 71 { 72 field->offset = format_field->offset; 73 74 switch (format_field->size) { 75 case 1: 76 field->integer = tp_field__u8; 77 break; 78 case 2: 79 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16; 80 break; 81 case 4: 82 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32; 83 break; 84 case 8: 85 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64; 86 break; 87 default: 88 return -1; 89 } 90 91 return 0; 92 } 93 94 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample) 95 { 96 return sample->raw_data + field->offset; 97 } 98 99 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field) 100 { 101 field->offset = format_field->offset; 102 field->pointer = tp_field__ptr; 103 return 0; 104 } 105 106 struct syscall_tp { 107 struct tp_field id; 108 union { 109 struct tp_field args, ret; 110 }; 111 }; 112 113 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel, 114 struct tp_field *field, 115 const char *name) 116 { 117 struct format_field *format_field = perf_evsel__field(evsel, name); 118 119 if (format_field == NULL) 120 return -1; 121 122 return tp_field__init_uint(field, format_field, evsel->needs_swap); 123 } 124 125 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \ 126 ({ struct syscall_tp *sc = evsel->priv;\ 127 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); }) 128 129 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel, 130 struct tp_field *field, 131 const char *name) 132 { 133 struct format_field *format_field = perf_evsel__field(evsel, name); 134 135 if (format_field == NULL) 136 return -1; 137 138 return tp_field__init_ptr(field, format_field); 139 } 140 141 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \ 142 ({ struct syscall_tp *sc = evsel->priv;\ 143 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); }) 144 145 static void perf_evsel__delete_priv(struct perf_evsel *evsel) 146 { 147 free(evsel->priv); 148 evsel->priv = NULL; 149 perf_evsel__delete(evsel); 150 } 151 152 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler) 153 { 154 evsel->priv = malloc(sizeof(struct syscall_tp)); 155 if (evsel->priv != NULL) { 156 if (perf_evsel__init_sc_tp_uint_field(evsel, id)) 157 goto out_delete; 158 159 evsel->handler = handler; 160 return 0; 161 } 162 163 return -ENOMEM; 164 165 out_delete: 166 free(evsel->priv); 167 evsel->priv = NULL; 168 return -ENOENT; 169 } 170 171 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler) 172 { 173 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction); 174 175 if (evsel) { 176 if (perf_evsel__init_syscall_tp(evsel, handler)) 177 goto out_delete; 178 } 179 180 return evsel; 181 182 out_delete: 183 perf_evsel__delete_priv(evsel); 184 return NULL; 185 } 186 187 #define perf_evsel__sc_tp_uint(evsel, name, sample) \ 188 ({ struct syscall_tp *fields = evsel->priv; \ 189 fields->name.integer(&fields->name, sample); }) 190 191 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \ 192 ({ struct syscall_tp *fields = evsel->priv; \ 193 fields->name.pointer(&fields->name, sample); }) 194 195 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist, 196 void *sys_enter_handler, 197 void *sys_exit_handler) 198 { 199 int ret = -1; 200 struct perf_evsel *sys_enter, *sys_exit; 201 202 sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler); 203 if (sys_enter == NULL) 204 goto out; 205 206 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args)) 207 goto out_delete_sys_enter; 208 209 sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler); 210 if (sys_exit == NULL) 211 goto out_delete_sys_enter; 212 213 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret)) 214 goto out_delete_sys_exit; 215 216 perf_evlist__add(evlist, sys_enter); 217 perf_evlist__add(evlist, sys_exit); 218 219 ret = 0; 220 out: 221 return ret; 222 223 out_delete_sys_exit: 224 perf_evsel__delete_priv(sys_exit); 225 out_delete_sys_enter: 226 perf_evsel__delete_priv(sys_enter); 227 goto out; 228 } 229 230 231 struct syscall_arg { 232 unsigned long val; 233 struct thread *thread; 234 struct trace *trace; 235 void *parm; 236 u8 idx; 237 u8 mask; 238 }; 239 240 struct strarray { 241 int offset; 242 int nr_entries; 243 const char **entries; 244 }; 245 246 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \ 247 .nr_entries = ARRAY_SIZE(array), \ 248 .entries = array, \ 249 } 250 251 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \ 252 .offset = off, \ 253 .nr_entries = ARRAY_SIZE(array), \ 254 .entries = array, \ 255 } 256 257 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size, 258 const char *intfmt, 259 struct syscall_arg *arg) 260 { 261 struct strarray *sa = arg->parm; 262 int idx = arg->val - sa->offset; 263 264 if (idx < 0 || idx >= sa->nr_entries) 265 return scnprintf(bf, size, intfmt, arg->val); 266 267 return scnprintf(bf, size, "%s", sa->entries[idx]); 268 } 269 270 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size, 271 struct syscall_arg *arg) 272 { 273 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg); 274 } 275 276 #define SCA_STRARRAY syscall_arg__scnprintf_strarray 277 278 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size, 279 struct syscall_arg *arg) 280 { 281 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg); 282 } 283 284 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray 285 286 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size, 287 struct syscall_arg *arg); 288 289 #define SCA_FD syscall_arg__scnprintf_fd 290 291 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size, 292 struct syscall_arg *arg) 293 { 294 int fd = arg->val; 295 296 if (fd == AT_FDCWD) 297 return scnprintf(bf, size, "CWD"); 298 299 return syscall_arg__scnprintf_fd(bf, size, arg); 300 } 301 302 #define SCA_FDAT syscall_arg__scnprintf_fd_at 303 304 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, 305 struct syscall_arg *arg); 306 307 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd 308 309 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size, 310 struct syscall_arg *arg) 311 { 312 return scnprintf(bf, size, "%#lx", arg->val); 313 } 314 315 #define SCA_HEX syscall_arg__scnprintf_hex 316 317 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, 318 struct syscall_arg *arg) 319 { 320 int printed = 0, prot = arg->val; 321 322 if (prot == PROT_NONE) 323 return scnprintf(bf, size, "NONE"); 324 #define P_MMAP_PROT(n) \ 325 if (prot & PROT_##n) { \ 326 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 327 prot &= ~PROT_##n; \ 328 } 329 330 P_MMAP_PROT(EXEC); 331 P_MMAP_PROT(READ); 332 P_MMAP_PROT(WRITE); 333 #ifdef PROT_SEM 334 P_MMAP_PROT(SEM); 335 #endif 336 P_MMAP_PROT(GROWSDOWN); 337 P_MMAP_PROT(GROWSUP); 338 #undef P_MMAP_PROT 339 340 if (prot) 341 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot); 342 343 return printed; 344 } 345 346 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot 347 348 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, 349 struct syscall_arg *arg) 350 { 351 int printed = 0, flags = arg->val; 352 353 #define P_MMAP_FLAG(n) \ 354 if (flags & MAP_##n) { \ 355 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 356 flags &= ~MAP_##n; \ 357 } 358 359 P_MMAP_FLAG(SHARED); 360 P_MMAP_FLAG(PRIVATE); 361 #ifdef MAP_32BIT 362 P_MMAP_FLAG(32BIT); 363 #endif 364 P_MMAP_FLAG(ANONYMOUS); 365 P_MMAP_FLAG(DENYWRITE); 366 P_MMAP_FLAG(EXECUTABLE); 367 P_MMAP_FLAG(FILE); 368 P_MMAP_FLAG(FIXED); 369 P_MMAP_FLAG(GROWSDOWN); 370 #ifdef MAP_HUGETLB 371 P_MMAP_FLAG(HUGETLB); 372 #endif 373 P_MMAP_FLAG(LOCKED); 374 P_MMAP_FLAG(NONBLOCK); 375 P_MMAP_FLAG(NORESERVE); 376 P_MMAP_FLAG(POPULATE); 377 P_MMAP_FLAG(STACK); 378 #ifdef MAP_UNINITIALIZED 379 P_MMAP_FLAG(UNINITIALIZED); 380 #endif 381 #undef P_MMAP_FLAG 382 383 if (flags) 384 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 385 386 return printed; 387 } 388 389 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags 390 391 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size, 392 struct syscall_arg *arg) 393 { 394 int behavior = arg->val; 395 396 switch (behavior) { 397 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n) 398 P_MADV_BHV(NORMAL); 399 P_MADV_BHV(RANDOM); 400 P_MADV_BHV(SEQUENTIAL); 401 P_MADV_BHV(WILLNEED); 402 P_MADV_BHV(DONTNEED); 403 P_MADV_BHV(REMOVE); 404 P_MADV_BHV(DONTFORK); 405 P_MADV_BHV(DOFORK); 406 P_MADV_BHV(HWPOISON); 407 #ifdef MADV_SOFT_OFFLINE 408 P_MADV_BHV(SOFT_OFFLINE); 409 #endif 410 P_MADV_BHV(MERGEABLE); 411 P_MADV_BHV(UNMERGEABLE); 412 #ifdef MADV_HUGEPAGE 413 P_MADV_BHV(HUGEPAGE); 414 #endif 415 #ifdef MADV_NOHUGEPAGE 416 P_MADV_BHV(NOHUGEPAGE); 417 #endif 418 #ifdef MADV_DONTDUMP 419 P_MADV_BHV(DONTDUMP); 420 #endif 421 #ifdef MADV_DODUMP 422 P_MADV_BHV(DODUMP); 423 #endif 424 #undef P_MADV_PHV 425 default: break; 426 } 427 428 return scnprintf(bf, size, "%#x", behavior); 429 } 430 431 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior 432 433 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size, 434 struct syscall_arg *arg) 435 { 436 int printed = 0, op = arg->val; 437 438 if (op == 0) 439 return scnprintf(bf, size, "NONE"); 440 #define P_CMD(cmd) \ 441 if ((op & LOCK_##cmd) == LOCK_##cmd) { \ 442 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \ 443 op &= ~LOCK_##cmd; \ 444 } 445 446 P_CMD(SH); 447 P_CMD(EX); 448 P_CMD(NB); 449 P_CMD(UN); 450 P_CMD(MAND); 451 P_CMD(RW); 452 P_CMD(READ); 453 P_CMD(WRITE); 454 #undef P_OP 455 456 if (op) 457 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op); 458 459 return printed; 460 } 461 462 #define SCA_FLOCK syscall_arg__scnprintf_flock 463 464 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg) 465 { 466 enum syscall_futex_args { 467 SCF_UADDR = (1 << 0), 468 SCF_OP = (1 << 1), 469 SCF_VAL = (1 << 2), 470 SCF_TIMEOUT = (1 << 3), 471 SCF_UADDR2 = (1 << 4), 472 SCF_VAL3 = (1 << 5), 473 }; 474 int op = arg->val; 475 int cmd = op & FUTEX_CMD_MASK; 476 size_t printed = 0; 477 478 switch (cmd) { 479 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n); 480 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break; 481 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 482 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 483 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break; 484 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break; 485 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break; 486 P_FUTEX_OP(WAKE_OP); break; 487 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 488 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 489 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break; 490 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break; 491 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break; 492 P_FUTEX_OP(WAIT_REQUEUE_PI); break; 493 default: printed = scnprintf(bf, size, "%#x", cmd); break; 494 } 495 496 if (op & FUTEX_PRIVATE_FLAG) 497 printed += scnprintf(bf + printed, size - printed, "|PRIV"); 498 499 if (op & FUTEX_CLOCK_REALTIME) 500 printed += scnprintf(bf + printed, size - printed, "|CLKRT"); 501 502 return printed; 503 } 504 505 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op 506 507 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", }; 508 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1); 509 510 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", }; 511 static DEFINE_STRARRAY(itimers); 512 513 static const char *whences[] = { "SET", "CUR", "END", 514 #ifdef SEEK_DATA 515 "DATA", 516 #endif 517 #ifdef SEEK_HOLE 518 "HOLE", 519 #endif 520 }; 521 static DEFINE_STRARRAY(whences); 522 523 static const char *fcntl_cmds[] = { 524 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK", 525 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64", 526 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX", 527 "F_GETOWNER_UIDS", 528 }; 529 static DEFINE_STRARRAY(fcntl_cmds); 530 531 static const char *rlimit_resources[] = { 532 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE", 533 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO", 534 "RTTIME", 535 }; 536 static DEFINE_STRARRAY(rlimit_resources); 537 538 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", }; 539 static DEFINE_STRARRAY(sighow); 540 541 static const char *clockid[] = { 542 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID", 543 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", 544 }; 545 static DEFINE_STRARRAY(clockid); 546 547 static const char *socket_families[] = { 548 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM", 549 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI", 550 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC", 551 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC", 552 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF", 553 "ALG", "NFC", "VSOCK", 554 }; 555 static DEFINE_STRARRAY(socket_families); 556 557 #ifndef SOCK_TYPE_MASK 558 #define SOCK_TYPE_MASK 0xf 559 #endif 560 561 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size, 562 struct syscall_arg *arg) 563 { 564 size_t printed; 565 int type = arg->val, 566 flags = type & ~SOCK_TYPE_MASK; 567 568 type &= SOCK_TYPE_MASK; 569 /* 570 * Can't use a strarray, MIPS may override for ABI reasons. 571 */ 572 switch (type) { 573 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break; 574 P_SK_TYPE(STREAM); 575 P_SK_TYPE(DGRAM); 576 P_SK_TYPE(RAW); 577 P_SK_TYPE(RDM); 578 P_SK_TYPE(SEQPACKET); 579 P_SK_TYPE(DCCP); 580 P_SK_TYPE(PACKET); 581 #undef P_SK_TYPE 582 default: 583 printed = scnprintf(bf, size, "%#x", type); 584 } 585 586 #define P_SK_FLAG(n) \ 587 if (flags & SOCK_##n) { \ 588 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \ 589 flags &= ~SOCK_##n; \ 590 } 591 592 P_SK_FLAG(CLOEXEC); 593 P_SK_FLAG(NONBLOCK); 594 #undef P_SK_FLAG 595 596 if (flags) 597 printed += scnprintf(bf + printed, size - printed, "|%#x", flags); 598 599 return printed; 600 } 601 602 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type 603 604 #ifndef MSG_PROBE 605 #define MSG_PROBE 0x10 606 #endif 607 #ifndef MSG_WAITFORONE 608 #define MSG_WAITFORONE 0x10000 609 #endif 610 #ifndef MSG_SENDPAGE_NOTLAST 611 #define MSG_SENDPAGE_NOTLAST 0x20000 612 #endif 613 #ifndef MSG_FASTOPEN 614 #define MSG_FASTOPEN 0x20000000 615 #endif 616 617 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size, 618 struct syscall_arg *arg) 619 { 620 int printed = 0, flags = arg->val; 621 622 if (flags == 0) 623 return scnprintf(bf, size, "NONE"); 624 #define P_MSG_FLAG(n) \ 625 if (flags & MSG_##n) { \ 626 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 627 flags &= ~MSG_##n; \ 628 } 629 630 P_MSG_FLAG(OOB); 631 P_MSG_FLAG(PEEK); 632 P_MSG_FLAG(DONTROUTE); 633 P_MSG_FLAG(TRYHARD); 634 P_MSG_FLAG(CTRUNC); 635 P_MSG_FLAG(PROBE); 636 P_MSG_FLAG(TRUNC); 637 P_MSG_FLAG(DONTWAIT); 638 P_MSG_FLAG(EOR); 639 P_MSG_FLAG(WAITALL); 640 P_MSG_FLAG(FIN); 641 P_MSG_FLAG(SYN); 642 P_MSG_FLAG(CONFIRM); 643 P_MSG_FLAG(RST); 644 P_MSG_FLAG(ERRQUEUE); 645 P_MSG_FLAG(NOSIGNAL); 646 P_MSG_FLAG(MORE); 647 P_MSG_FLAG(WAITFORONE); 648 P_MSG_FLAG(SENDPAGE_NOTLAST); 649 P_MSG_FLAG(FASTOPEN); 650 P_MSG_FLAG(CMSG_CLOEXEC); 651 #undef P_MSG_FLAG 652 653 if (flags) 654 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 655 656 return printed; 657 } 658 659 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags 660 661 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size, 662 struct syscall_arg *arg) 663 { 664 size_t printed = 0; 665 int mode = arg->val; 666 667 if (mode == F_OK) /* 0 */ 668 return scnprintf(bf, size, "F"); 669 #define P_MODE(n) \ 670 if (mode & n##_OK) { \ 671 printed += scnprintf(bf + printed, size - printed, "%s", #n); \ 672 mode &= ~n##_OK; \ 673 } 674 675 P_MODE(R); 676 P_MODE(W); 677 P_MODE(X); 678 #undef P_MODE 679 680 if (mode) 681 printed += scnprintf(bf + printed, size - printed, "|%#x", mode); 682 683 return printed; 684 } 685 686 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode 687 688 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, 689 struct syscall_arg *arg) 690 { 691 int printed = 0, flags = arg->val; 692 693 if (!(flags & O_CREAT)) 694 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */ 695 696 if (flags == 0) 697 return scnprintf(bf, size, "RDONLY"); 698 #define P_FLAG(n) \ 699 if (flags & O_##n) { \ 700 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 701 flags &= ~O_##n; \ 702 } 703 704 P_FLAG(APPEND); 705 P_FLAG(ASYNC); 706 P_FLAG(CLOEXEC); 707 P_FLAG(CREAT); 708 P_FLAG(DIRECT); 709 P_FLAG(DIRECTORY); 710 P_FLAG(EXCL); 711 P_FLAG(LARGEFILE); 712 P_FLAG(NOATIME); 713 P_FLAG(NOCTTY); 714 #ifdef O_NONBLOCK 715 P_FLAG(NONBLOCK); 716 #elif O_NDELAY 717 P_FLAG(NDELAY); 718 #endif 719 #ifdef O_PATH 720 P_FLAG(PATH); 721 #endif 722 P_FLAG(RDWR); 723 #ifdef O_DSYNC 724 if ((flags & O_SYNC) == O_SYNC) 725 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC"); 726 else { 727 P_FLAG(DSYNC); 728 } 729 #else 730 P_FLAG(SYNC); 731 #endif 732 P_FLAG(TRUNC); 733 P_FLAG(WRONLY); 734 #undef P_FLAG 735 736 if (flags) 737 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 738 739 return printed; 740 } 741 742 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags 743 744 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size, 745 struct syscall_arg *arg) 746 { 747 int printed = 0, flags = arg->val; 748 749 if (flags == 0) 750 return scnprintf(bf, size, "NONE"); 751 #define P_FLAG(n) \ 752 if (flags & EFD_##n) { \ 753 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 754 flags &= ~EFD_##n; \ 755 } 756 757 P_FLAG(SEMAPHORE); 758 P_FLAG(CLOEXEC); 759 P_FLAG(NONBLOCK); 760 #undef P_FLAG 761 762 if (flags) 763 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 764 765 return printed; 766 } 767 768 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags 769 770 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size, 771 struct syscall_arg *arg) 772 { 773 int printed = 0, flags = arg->val; 774 775 #define P_FLAG(n) \ 776 if (flags & O_##n) { \ 777 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 778 flags &= ~O_##n; \ 779 } 780 781 P_FLAG(CLOEXEC); 782 P_FLAG(NONBLOCK); 783 #undef P_FLAG 784 785 if (flags) 786 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 787 788 return printed; 789 } 790 791 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags 792 793 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg) 794 { 795 int sig = arg->val; 796 797 switch (sig) { 798 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n) 799 P_SIGNUM(HUP); 800 P_SIGNUM(INT); 801 P_SIGNUM(QUIT); 802 P_SIGNUM(ILL); 803 P_SIGNUM(TRAP); 804 P_SIGNUM(ABRT); 805 P_SIGNUM(BUS); 806 P_SIGNUM(FPE); 807 P_SIGNUM(KILL); 808 P_SIGNUM(USR1); 809 P_SIGNUM(SEGV); 810 P_SIGNUM(USR2); 811 P_SIGNUM(PIPE); 812 P_SIGNUM(ALRM); 813 P_SIGNUM(TERM); 814 P_SIGNUM(STKFLT); 815 P_SIGNUM(CHLD); 816 P_SIGNUM(CONT); 817 P_SIGNUM(STOP); 818 P_SIGNUM(TSTP); 819 P_SIGNUM(TTIN); 820 P_SIGNUM(TTOU); 821 P_SIGNUM(URG); 822 P_SIGNUM(XCPU); 823 P_SIGNUM(XFSZ); 824 P_SIGNUM(VTALRM); 825 P_SIGNUM(PROF); 826 P_SIGNUM(WINCH); 827 P_SIGNUM(IO); 828 P_SIGNUM(PWR); 829 P_SIGNUM(SYS); 830 default: break; 831 } 832 833 return scnprintf(bf, size, "%#x", sig); 834 } 835 836 #define SCA_SIGNUM syscall_arg__scnprintf_signum 837 838 #define TCGETS 0x5401 839 840 static const char *tioctls[] = { 841 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW", 842 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL", 843 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI", 844 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC", 845 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX", 846 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO", 847 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK", 848 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2", 849 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK", 850 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG", 851 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL", 852 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG", 853 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS", 854 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI", 855 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE", 856 }; 857 858 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401); 859 860 #define STRARRAY(arg, name, array) \ 861 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \ 862 .arg_parm = { [arg] = &strarray__##array, } 863 864 static struct syscall_fmt { 865 const char *name; 866 const char *alias; 867 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg); 868 void *arg_parm[6]; 869 bool errmsg; 870 bool timeout; 871 bool hexret; 872 } syscall_fmts[] = { 873 { .name = "access", .errmsg = true, 874 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, }, 875 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", }, 876 { .name = "brk", .hexret = true, 877 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, }, 878 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), }, 879 { .name = "close", .errmsg = true, 880 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, 881 { .name = "connect", .errmsg = true, }, 882 { .name = "dup", .errmsg = true, 883 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 884 { .name = "dup2", .errmsg = true, 885 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 886 { .name = "dup3", .errmsg = true, 887 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 888 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), }, 889 { .name = "eventfd2", .errmsg = true, 890 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, }, 891 { .name = "faccessat", .errmsg = true, 892 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 893 { .name = "fadvise64", .errmsg = true, 894 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 895 { .name = "fallocate", .errmsg = true, 896 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 897 { .name = "fchdir", .errmsg = true, 898 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 899 { .name = "fchmod", .errmsg = true, 900 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 901 { .name = "fchmodat", .errmsg = true, 902 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 903 { .name = "fchown", .errmsg = true, 904 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 905 { .name = "fchownat", .errmsg = true, 906 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 907 { .name = "fcntl", .errmsg = true, 908 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 909 [1] = SCA_STRARRAY, /* cmd */ }, 910 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, }, 911 { .name = "fdatasync", .errmsg = true, 912 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 913 { .name = "flock", .errmsg = true, 914 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 915 [1] = SCA_FLOCK, /* cmd */ }, }, 916 { .name = "fsetxattr", .errmsg = true, 917 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 918 { .name = "fstat", .errmsg = true, .alias = "newfstat", 919 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 920 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", 921 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 922 { .name = "fstatfs", .errmsg = true, 923 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 924 { .name = "fsync", .errmsg = true, 925 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 926 { .name = "ftruncate", .errmsg = true, 927 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 928 { .name = "futex", .errmsg = true, 929 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, }, 930 { .name = "futimesat", .errmsg = true, 931 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 932 { .name = "getdents", .errmsg = true, 933 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 934 { .name = "getdents64", .errmsg = true, 935 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 936 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), }, 937 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, 938 { .name = "ioctl", .errmsg = true, 939 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 940 [1] = SCA_STRHEXARRAY, /* cmd */ 941 [2] = SCA_HEX, /* arg */ }, 942 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, }, 943 { .name = "kill", .errmsg = true, 944 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, 945 { .name = "linkat", .errmsg = true, 946 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 947 { .name = "lseek", .errmsg = true, 948 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 949 [2] = SCA_STRARRAY, /* whence */ }, 950 .arg_parm = { [2] = &strarray__whences, /* whence */ }, }, 951 { .name = "lstat", .errmsg = true, .alias = "newlstat", }, 952 { .name = "madvise", .errmsg = true, 953 .arg_scnprintf = { [0] = SCA_HEX, /* start */ 954 [2] = SCA_MADV_BHV, /* behavior */ }, }, 955 { .name = "mkdirat", .errmsg = true, 956 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 957 { .name = "mknodat", .errmsg = true, 958 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 959 { .name = "mlock", .errmsg = true, 960 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 961 { .name = "mlockall", .errmsg = true, 962 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 963 { .name = "mmap", .hexret = true, 964 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ 965 [2] = SCA_MMAP_PROT, /* prot */ 966 [3] = SCA_MMAP_FLAGS, /* flags */ 967 [4] = SCA_FD, /* fd */ }, }, 968 { .name = "mprotect", .errmsg = true, 969 .arg_scnprintf = { [0] = SCA_HEX, /* start */ 970 [2] = SCA_MMAP_PROT, /* prot */ }, }, 971 { .name = "mremap", .hexret = true, 972 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ 973 [4] = SCA_HEX, /* new_addr */ }, }, 974 { .name = "munlock", .errmsg = true, 975 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 976 { .name = "munmap", .errmsg = true, 977 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 978 { .name = "name_to_handle_at", .errmsg = true, 979 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 980 { .name = "newfstatat", .errmsg = true, 981 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 982 { .name = "open", .errmsg = true, 983 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, }, 984 { .name = "open_by_handle_at", .errmsg = true, 985 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ 986 [2] = SCA_OPEN_FLAGS, /* flags */ }, }, 987 { .name = "openat", .errmsg = true, 988 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ 989 [2] = SCA_OPEN_FLAGS, /* flags */ }, }, 990 { .name = "pipe2", .errmsg = true, 991 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, }, 992 { .name = "poll", .errmsg = true, .timeout = true, }, 993 { .name = "ppoll", .errmsg = true, .timeout = true, }, 994 { .name = "pread", .errmsg = true, .alias = "pread64", 995 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 996 { .name = "preadv", .errmsg = true, .alias = "pread", 997 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 998 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), }, 999 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", 1000 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1001 { .name = "pwritev", .errmsg = true, 1002 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1003 { .name = "read", .errmsg = true, 1004 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1005 { .name = "readlinkat", .errmsg = true, 1006 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1007 { .name = "readv", .errmsg = true, 1008 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1009 { .name = "recvfrom", .errmsg = true, 1010 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 1011 { .name = "recvmmsg", .errmsg = true, 1012 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 1013 { .name = "recvmsg", .errmsg = true, 1014 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, 1015 { .name = "renameat", .errmsg = true, 1016 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1017 { .name = "rt_sigaction", .errmsg = true, 1018 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, }, 1019 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), }, 1020 { .name = "rt_sigqueueinfo", .errmsg = true, 1021 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, 1022 { .name = "rt_tgsigqueueinfo", .errmsg = true, 1023 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, 1024 { .name = "select", .errmsg = true, .timeout = true, }, 1025 { .name = "sendmmsg", .errmsg = true, 1026 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 1027 { .name = "sendmsg", .errmsg = true, 1028 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, 1029 { .name = "sendto", .errmsg = true, 1030 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 1031 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), }, 1032 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, 1033 { .name = "shutdown", .errmsg = true, 1034 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1035 { .name = "socket", .errmsg = true, 1036 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ 1037 [1] = SCA_SK_TYPE, /* type */ }, 1038 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, 1039 { .name = "socketpair", .errmsg = true, 1040 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ 1041 [1] = SCA_SK_TYPE, /* type */ }, 1042 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, 1043 { .name = "stat", .errmsg = true, .alias = "newstat", }, 1044 { .name = "symlinkat", .errmsg = true, 1045 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1046 { .name = "tgkill", .errmsg = true, 1047 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, 1048 { .name = "tkill", .errmsg = true, 1049 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, 1050 { .name = "uname", .errmsg = true, .alias = "newuname", }, 1051 { .name = "unlinkat", .errmsg = true, 1052 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1053 { .name = "utimensat", .errmsg = true, 1054 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, }, 1055 { .name = "write", .errmsg = true, 1056 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1057 { .name = "writev", .errmsg = true, 1058 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1059 }; 1060 1061 static int syscall_fmt__cmp(const void *name, const void *fmtp) 1062 { 1063 const struct syscall_fmt *fmt = fmtp; 1064 return strcmp(name, fmt->name); 1065 } 1066 1067 static struct syscall_fmt *syscall_fmt__find(const char *name) 1068 { 1069 const int nmemb = ARRAY_SIZE(syscall_fmts); 1070 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp); 1071 } 1072 1073 struct syscall { 1074 struct event_format *tp_format; 1075 const char *name; 1076 bool filtered; 1077 struct syscall_fmt *fmt; 1078 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg); 1079 void **arg_parm; 1080 }; 1081 1082 static size_t fprintf_duration(unsigned long t, FILE *fp) 1083 { 1084 double duration = (double)t / NSEC_PER_MSEC; 1085 size_t printed = fprintf(fp, "("); 1086 1087 if (duration >= 1.0) 1088 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration); 1089 else if (duration >= 0.01) 1090 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration); 1091 else 1092 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration); 1093 return printed + fprintf(fp, "): "); 1094 } 1095 1096 struct thread_trace { 1097 u64 entry_time; 1098 u64 exit_time; 1099 bool entry_pending; 1100 unsigned long nr_events; 1101 char *entry_str; 1102 double runtime_ms; 1103 struct { 1104 int max; 1105 char **table; 1106 } paths; 1107 1108 struct intlist *syscall_stats; 1109 }; 1110 1111 static struct thread_trace *thread_trace__new(void) 1112 { 1113 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace)); 1114 1115 if (ttrace) 1116 ttrace->paths.max = -1; 1117 1118 ttrace->syscall_stats = intlist__new(NULL); 1119 1120 return ttrace; 1121 } 1122 1123 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp) 1124 { 1125 struct thread_trace *ttrace; 1126 1127 if (thread == NULL) 1128 goto fail; 1129 1130 if (thread->priv == NULL) 1131 thread->priv = thread_trace__new(); 1132 1133 if (thread->priv == NULL) 1134 goto fail; 1135 1136 ttrace = thread->priv; 1137 ++ttrace->nr_events; 1138 1139 return ttrace; 1140 fail: 1141 color_fprintf(fp, PERF_COLOR_RED, 1142 "WARNING: not enough memory, dropping samples!\n"); 1143 return NULL; 1144 } 1145 1146 struct trace { 1147 struct perf_tool tool; 1148 struct { 1149 int machine; 1150 int open_id; 1151 } audit; 1152 struct { 1153 int max; 1154 struct syscall *table; 1155 } syscalls; 1156 struct perf_record_opts opts; 1157 struct machine *host; 1158 u64 base_time; 1159 bool full_time; 1160 FILE *output; 1161 unsigned long nr_events; 1162 struct strlist *ev_qualifier; 1163 bool not_ev_qualifier; 1164 bool live; 1165 const char *last_vfs_getname; 1166 struct intlist *tid_list; 1167 struct intlist *pid_list; 1168 bool sched; 1169 bool multiple_threads; 1170 bool summary; 1171 bool show_comm; 1172 bool show_tool_stats; 1173 double duration_filter; 1174 double runtime_ms; 1175 struct { 1176 u64 vfs_getname, proc_getname; 1177 } stats; 1178 }; 1179 1180 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname) 1181 { 1182 struct thread_trace *ttrace = thread->priv; 1183 1184 if (fd > ttrace->paths.max) { 1185 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *)); 1186 1187 if (npath == NULL) 1188 return -1; 1189 1190 if (ttrace->paths.max != -1) { 1191 memset(npath + ttrace->paths.max + 1, 0, 1192 (fd - ttrace->paths.max) * sizeof(char *)); 1193 } else { 1194 memset(npath, 0, (fd + 1) * sizeof(char *)); 1195 } 1196 1197 ttrace->paths.table = npath; 1198 ttrace->paths.max = fd; 1199 } 1200 1201 ttrace->paths.table[fd] = strdup(pathname); 1202 1203 return ttrace->paths.table[fd] != NULL ? 0 : -1; 1204 } 1205 1206 static int thread__read_fd_path(struct thread *thread, int fd) 1207 { 1208 char linkname[PATH_MAX], pathname[PATH_MAX]; 1209 struct stat st; 1210 int ret; 1211 1212 if (thread->pid_ == thread->tid) { 1213 scnprintf(linkname, sizeof(linkname), 1214 "/proc/%d/fd/%d", thread->pid_, fd); 1215 } else { 1216 scnprintf(linkname, sizeof(linkname), 1217 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd); 1218 } 1219 1220 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname)) 1221 return -1; 1222 1223 ret = readlink(linkname, pathname, sizeof(pathname)); 1224 1225 if (ret < 0 || ret > st.st_size) 1226 return -1; 1227 1228 pathname[ret] = '\0'; 1229 return trace__set_fd_pathname(thread, fd, pathname); 1230 } 1231 1232 static const char *thread__fd_path(struct thread *thread, int fd, 1233 struct trace *trace) 1234 { 1235 struct thread_trace *ttrace = thread->priv; 1236 1237 if (ttrace == NULL) 1238 return NULL; 1239 1240 if (fd < 0) 1241 return NULL; 1242 1243 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) 1244 if (!trace->live) 1245 return NULL; 1246 ++trace->stats.proc_getname; 1247 if (thread__read_fd_path(thread, fd)) { 1248 return NULL; 1249 } 1250 1251 return ttrace->paths.table[fd]; 1252 } 1253 1254 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size, 1255 struct syscall_arg *arg) 1256 { 1257 int fd = arg->val; 1258 size_t printed = scnprintf(bf, size, "%d", fd); 1259 const char *path = thread__fd_path(arg->thread, fd, arg->trace); 1260 1261 if (path) 1262 printed += scnprintf(bf + printed, size - printed, "<%s>", path); 1263 1264 return printed; 1265 } 1266 1267 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, 1268 struct syscall_arg *arg) 1269 { 1270 int fd = arg->val; 1271 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg); 1272 struct thread_trace *ttrace = arg->thread->priv; 1273 1274 if (ttrace && fd >= 0 && fd <= ttrace->paths.max) { 1275 free(ttrace->paths.table[fd]); 1276 ttrace->paths.table[fd] = NULL; 1277 } 1278 1279 return printed; 1280 } 1281 1282 static bool trace__filter_duration(struct trace *trace, double t) 1283 { 1284 return t < (trace->duration_filter * NSEC_PER_MSEC); 1285 } 1286 1287 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) 1288 { 1289 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC; 1290 1291 return fprintf(fp, "%10.3f ", ts); 1292 } 1293 1294 static bool done = false; 1295 static bool interrupted = false; 1296 1297 static void sig_handler(int sig) 1298 { 1299 done = true; 1300 interrupted = sig == SIGINT; 1301 } 1302 1303 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread, 1304 u64 duration, u64 tstamp, FILE *fp) 1305 { 1306 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp); 1307 printed += fprintf_duration(duration, fp); 1308 1309 if (trace->multiple_threads) { 1310 if (trace->show_comm) 1311 printed += fprintf(fp, "%.14s/", thread__comm_str(thread)); 1312 printed += fprintf(fp, "%d ", thread->tid); 1313 } 1314 1315 return printed; 1316 } 1317 1318 static int trace__process_event(struct trace *trace, struct machine *machine, 1319 union perf_event *event, struct perf_sample *sample) 1320 { 1321 int ret = 0; 1322 1323 switch (event->header.type) { 1324 case PERF_RECORD_LOST: 1325 color_fprintf(trace->output, PERF_COLOR_RED, 1326 "LOST %" PRIu64 " events!\n", event->lost.lost); 1327 ret = machine__process_lost_event(machine, event, sample); 1328 default: 1329 ret = machine__process_event(machine, event, sample); 1330 break; 1331 } 1332 1333 return ret; 1334 } 1335 1336 static int trace__tool_process(struct perf_tool *tool, 1337 union perf_event *event, 1338 struct perf_sample *sample, 1339 struct machine *machine) 1340 { 1341 struct trace *trace = container_of(tool, struct trace, tool); 1342 return trace__process_event(trace, machine, event, sample); 1343 } 1344 1345 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) 1346 { 1347 int err = symbol__init(); 1348 1349 if (err) 1350 return err; 1351 1352 trace->host = machine__new_host(); 1353 if (trace->host == NULL) 1354 return -ENOMEM; 1355 1356 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, 1357 evlist->threads, trace__tool_process, false); 1358 if (err) 1359 symbol__exit(); 1360 1361 return err; 1362 } 1363 1364 static int syscall__set_arg_fmts(struct syscall *sc) 1365 { 1366 struct format_field *field; 1367 int idx = 0; 1368 1369 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *)); 1370 if (sc->arg_scnprintf == NULL) 1371 return -1; 1372 1373 if (sc->fmt) 1374 sc->arg_parm = sc->fmt->arg_parm; 1375 1376 for (field = sc->tp_format->format.fields->next; field; field = field->next) { 1377 if (sc->fmt && sc->fmt->arg_scnprintf[idx]) 1378 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx]; 1379 else if (field->flags & FIELD_IS_POINTER) 1380 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex; 1381 ++idx; 1382 } 1383 1384 return 0; 1385 } 1386 1387 static int trace__read_syscall_info(struct trace *trace, int id) 1388 { 1389 char tp_name[128]; 1390 struct syscall *sc; 1391 const char *name = audit_syscall_to_name(id, trace->audit.machine); 1392 1393 if (name == NULL) 1394 return -1; 1395 1396 if (id > trace->syscalls.max) { 1397 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc)); 1398 1399 if (nsyscalls == NULL) 1400 return -1; 1401 1402 if (trace->syscalls.max != -1) { 1403 memset(nsyscalls + trace->syscalls.max + 1, 0, 1404 (id - trace->syscalls.max) * sizeof(*sc)); 1405 } else { 1406 memset(nsyscalls, 0, (id + 1) * sizeof(*sc)); 1407 } 1408 1409 trace->syscalls.table = nsyscalls; 1410 trace->syscalls.max = id; 1411 } 1412 1413 sc = trace->syscalls.table + id; 1414 sc->name = name; 1415 1416 if (trace->ev_qualifier) { 1417 bool in = strlist__find(trace->ev_qualifier, name) != NULL; 1418 1419 if (!(in ^ trace->not_ev_qualifier)) { 1420 sc->filtered = true; 1421 /* 1422 * No need to do read tracepoint information since this will be 1423 * filtered out. 1424 */ 1425 return 0; 1426 } 1427 } 1428 1429 sc->fmt = syscall_fmt__find(sc->name); 1430 1431 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); 1432 sc->tp_format = event_format__new("syscalls", tp_name); 1433 1434 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) { 1435 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias); 1436 sc->tp_format = event_format__new("syscalls", tp_name); 1437 } 1438 1439 if (sc->tp_format == NULL) 1440 return -1; 1441 1442 return syscall__set_arg_fmts(sc); 1443 } 1444 1445 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, 1446 unsigned long *args, struct trace *trace, 1447 struct thread *thread) 1448 { 1449 size_t printed = 0; 1450 1451 if (sc->tp_format != NULL) { 1452 struct format_field *field; 1453 u8 bit = 1; 1454 struct syscall_arg arg = { 1455 .idx = 0, 1456 .mask = 0, 1457 .trace = trace, 1458 .thread = thread, 1459 }; 1460 1461 for (field = sc->tp_format->format.fields->next; field; 1462 field = field->next, ++arg.idx, bit <<= 1) { 1463 if (arg.mask & bit) 1464 continue; 1465 /* 1466 * Suppress this argument if its value is zero and 1467 * and we don't have a string associated in an 1468 * strarray for it. 1469 */ 1470 if (args[arg.idx] == 0 && 1471 !(sc->arg_scnprintf && 1472 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY && 1473 sc->arg_parm[arg.idx])) 1474 continue; 1475 1476 printed += scnprintf(bf + printed, size - printed, 1477 "%s%s: ", printed ? ", " : "", field->name); 1478 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) { 1479 arg.val = args[arg.idx]; 1480 if (sc->arg_parm) 1481 arg.parm = sc->arg_parm[arg.idx]; 1482 printed += sc->arg_scnprintf[arg.idx](bf + printed, 1483 size - printed, &arg); 1484 } else { 1485 printed += scnprintf(bf + printed, size - printed, 1486 "%ld", args[arg.idx]); 1487 } 1488 } 1489 } else { 1490 int i = 0; 1491 1492 while (i < 6) { 1493 printed += scnprintf(bf + printed, size - printed, 1494 "%sarg%d: %ld", 1495 printed ? ", " : "", i, args[i]); 1496 ++i; 1497 } 1498 } 1499 1500 return printed; 1501 } 1502 1503 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel, 1504 struct perf_sample *sample); 1505 1506 static struct syscall *trace__syscall_info(struct trace *trace, 1507 struct perf_evsel *evsel, int id) 1508 { 1509 1510 if (id < 0) { 1511 1512 /* 1513 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried 1514 * before that, leaving at a higher verbosity level till that is 1515 * explained. Reproduced with plain ftrace with: 1516 * 1517 * echo 1 > /t/events/raw_syscalls/sys_exit/enable 1518 * grep "NR -1 " /t/trace_pipe 1519 * 1520 * After generating some load on the machine. 1521 */ 1522 if (verbose > 1) { 1523 static u64 n; 1524 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n", 1525 id, perf_evsel__name(evsel), ++n); 1526 } 1527 return NULL; 1528 } 1529 1530 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) && 1531 trace__read_syscall_info(trace, id)) 1532 goto out_cant_read; 1533 1534 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL)) 1535 goto out_cant_read; 1536 1537 return &trace->syscalls.table[id]; 1538 1539 out_cant_read: 1540 if (verbose) { 1541 fprintf(trace->output, "Problems reading syscall %d", id); 1542 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL) 1543 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name); 1544 fputs(" information\n", trace->output); 1545 } 1546 return NULL; 1547 } 1548 1549 static void thread__update_stats(struct thread_trace *ttrace, 1550 int id, struct perf_sample *sample) 1551 { 1552 struct int_node *inode; 1553 struct stats *stats; 1554 u64 duration = 0; 1555 1556 inode = intlist__findnew(ttrace->syscall_stats, id); 1557 if (inode == NULL) 1558 return; 1559 1560 stats = inode->priv; 1561 if (stats == NULL) { 1562 stats = malloc(sizeof(struct stats)); 1563 if (stats == NULL) 1564 return; 1565 init_stats(stats); 1566 inode->priv = stats; 1567 } 1568 1569 if (ttrace->entry_time && sample->time > ttrace->entry_time) 1570 duration = sample->time - ttrace->entry_time; 1571 1572 update_stats(stats, duration); 1573 } 1574 1575 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, 1576 struct perf_sample *sample) 1577 { 1578 char *msg; 1579 void *args; 1580 size_t printed = 0; 1581 struct thread *thread; 1582 int id = perf_evsel__sc_tp_uint(evsel, id, sample); 1583 struct syscall *sc = trace__syscall_info(trace, evsel, id); 1584 struct thread_trace *ttrace; 1585 1586 if (sc == NULL) 1587 return -1; 1588 1589 if (sc->filtered) 1590 return 0; 1591 1592 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1593 ttrace = thread__trace(thread, trace->output); 1594 if (ttrace == NULL) 1595 return -1; 1596 1597 args = perf_evsel__sc_tp_ptr(evsel, args, sample); 1598 ttrace = thread->priv; 1599 1600 if (ttrace->entry_str == NULL) { 1601 ttrace->entry_str = malloc(1024); 1602 if (!ttrace->entry_str) 1603 return -1; 1604 } 1605 1606 ttrace->entry_time = sample->time; 1607 msg = ttrace->entry_str; 1608 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name); 1609 1610 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed, 1611 args, trace, thread); 1612 1613 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) { 1614 if (!trace->duration_filter) { 1615 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output); 1616 fprintf(trace->output, "%-70s\n", ttrace->entry_str); 1617 } 1618 } else 1619 ttrace->entry_pending = true; 1620 1621 return 0; 1622 } 1623 1624 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, 1625 struct perf_sample *sample) 1626 { 1627 int ret; 1628 u64 duration = 0; 1629 struct thread *thread; 1630 int id = perf_evsel__sc_tp_uint(evsel, id, sample); 1631 struct syscall *sc = trace__syscall_info(trace, evsel, id); 1632 struct thread_trace *ttrace; 1633 1634 if (sc == NULL) 1635 return -1; 1636 1637 if (sc->filtered) 1638 return 0; 1639 1640 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1641 ttrace = thread__trace(thread, trace->output); 1642 if (ttrace == NULL) 1643 return -1; 1644 1645 if (trace->summary) 1646 thread__update_stats(ttrace, id, sample); 1647 1648 ret = perf_evsel__sc_tp_uint(evsel, ret, sample); 1649 1650 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) { 1651 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname); 1652 trace->last_vfs_getname = NULL; 1653 ++trace->stats.vfs_getname; 1654 } 1655 1656 ttrace = thread->priv; 1657 1658 ttrace->exit_time = sample->time; 1659 1660 if (ttrace->entry_time) { 1661 duration = sample->time - ttrace->entry_time; 1662 if (trace__filter_duration(trace, duration)) 1663 goto out; 1664 } else if (trace->duration_filter) 1665 goto out; 1666 1667 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output); 1668 1669 if (ttrace->entry_pending) { 1670 fprintf(trace->output, "%-70s", ttrace->entry_str); 1671 } else { 1672 fprintf(trace->output, " ... ["); 1673 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued"); 1674 fprintf(trace->output, "]: %s()", sc->name); 1675 } 1676 1677 if (sc->fmt == NULL) { 1678 signed_print: 1679 fprintf(trace->output, ") = %d", ret); 1680 } else if (ret < 0 && sc->fmt->errmsg) { 1681 char bf[256]; 1682 const char *emsg = strerror_r(-ret, bf, sizeof(bf)), 1683 *e = audit_errno_to_name(-ret); 1684 1685 fprintf(trace->output, ") = -1 %s %s", e, emsg); 1686 } else if (ret == 0 && sc->fmt->timeout) 1687 fprintf(trace->output, ") = 0 Timeout"); 1688 else if (sc->fmt->hexret) 1689 fprintf(trace->output, ") = %#x", ret); 1690 else 1691 goto signed_print; 1692 1693 fputc('\n', trace->output); 1694 out: 1695 ttrace->entry_pending = false; 1696 1697 return 0; 1698 } 1699 1700 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, 1701 struct perf_sample *sample) 1702 { 1703 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname"); 1704 return 0; 1705 } 1706 1707 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel, 1708 struct perf_sample *sample) 1709 { 1710 u64 runtime = perf_evsel__intval(evsel, sample, "runtime"); 1711 double runtime_ms = (double)runtime / NSEC_PER_MSEC; 1712 struct thread *thread = machine__findnew_thread(trace->host, 1713 sample->pid, 1714 sample->tid); 1715 struct thread_trace *ttrace = thread__trace(thread, trace->output); 1716 1717 if (ttrace == NULL) 1718 goto out_dump; 1719 1720 ttrace->runtime_ms += runtime_ms; 1721 trace->runtime_ms += runtime_ms; 1722 return 0; 1723 1724 out_dump: 1725 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n", 1726 evsel->name, 1727 perf_evsel__strval(evsel, sample, "comm"), 1728 (pid_t)perf_evsel__intval(evsel, sample, "pid"), 1729 runtime, 1730 perf_evsel__intval(evsel, sample, "vruntime")); 1731 return 0; 1732 } 1733 1734 static bool skip_sample(struct trace *trace, struct perf_sample *sample) 1735 { 1736 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) || 1737 (trace->tid_list && intlist__find(trace->tid_list, sample->tid))) 1738 return false; 1739 1740 if (trace->pid_list || trace->tid_list) 1741 return true; 1742 1743 return false; 1744 } 1745 1746 static int trace__process_sample(struct perf_tool *tool, 1747 union perf_event *event __maybe_unused, 1748 struct perf_sample *sample, 1749 struct perf_evsel *evsel, 1750 struct machine *machine __maybe_unused) 1751 { 1752 struct trace *trace = container_of(tool, struct trace, tool); 1753 int err = 0; 1754 1755 tracepoint_handler handler = evsel->handler; 1756 1757 if (skip_sample(trace, sample)) 1758 return 0; 1759 1760 if (!trace->full_time && trace->base_time == 0) 1761 trace->base_time = sample->time; 1762 1763 if (handler) 1764 handler(trace, evsel, sample); 1765 1766 return err; 1767 } 1768 1769 static int parse_target_str(struct trace *trace) 1770 { 1771 if (trace->opts.target.pid) { 1772 trace->pid_list = intlist__new(trace->opts.target.pid); 1773 if (trace->pid_list == NULL) { 1774 pr_err("Error parsing process id string\n"); 1775 return -EINVAL; 1776 } 1777 } 1778 1779 if (trace->opts.target.tid) { 1780 trace->tid_list = intlist__new(trace->opts.target.tid); 1781 if (trace->tid_list == NULL) { 1782 pr_err("Error parsing thread id string\n"); 1783 return -EINVAL; 1784 } 1785 } 1786 1787 return 0; 1788 } 1789 1790 static int trace__record(int argc, const char **argv) 1791 { 1792 unsigned int rec_argc, i, j; 1793 const char **rec_argv; 1794 const char * const record_args[] = { 1795 "record", 1796 "-R", 1797 "-m", "1024", 1798 "-c", "1", 1799 "-e", "raw_syscalls:sys_enter,raw_syscalls:sys_exit", 1800 }; 1801 1802 rec_argc = ARRAY_SIZE(record_args) + argc; 1803 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 1804 1805 if (rec_argv == NULL) 1806 return -ENOMEM; 1807 1808 for (i = 0; i < ARRAY_SIZE(record_args); i++) 1809 rec_argv[i] = record_args[i]; 1810 1811 for (j = 0; j < (unsigned int)argc; j++, i++) 1812 rec_argv[i] = argv[j]; 1813 1814 return cmd_record(i, rec_argv, NULL); 1815 } 1816 1817 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); 1818 1819 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist) 1820 { 1821 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname"); 1822 if (evsel == NULL) 1823 return; 1824 1825 if (perf_evsel__field(evsel, "pathname") == NULL) { 1826 perf_evsel__delete(evsel); 1827 return; 1828 } 1829 1830 evsel->handler = trace__vfs_getname; 1831 perf_evlist__add(evlist, evsel); 1832 } 1833 1834 static int trace__run(struct trace *trace, int argc, const char **argv) 1835 { 1836 struct perf_evlist *evlist = perf_evlist__new(); 1837 struct perf_evsel *evsel; 1838 int err = -1, i; 1839 unsigned long before; 1840 const bool forks = argc > 0; 1841 1842 trace->live = true; 1843 1844 if (evlist == NULL) { 1845 fprintf(trace->output, "Not enough memory to run!\n"); 1846 goto out; 1847 } 1848 1849 if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit)) 1850 goto out_error_tp; 1851 1852 perf_evlist__add_vfs_getname(evlist); 1853 1854 if (trace->sched && 1855 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", 1856 trace__sched_stat_runtime)) 1857 goto out_error_tp; 1858 1859 err = perf_evlist__create_maps(evlist, &trace->opts.target); 1860 if (err < 0) { 1861 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n"); 1862 goto out_delete_evlist; 1863 } 1864 1865 err = trace__symbols_init(trace, evlist); 1866 if (err < 0) { 1867 fprintf(trace->output, "Problems initializing symbol libraries!\n"); 1868 goto out_delete_maps; 1869 } 1870 1871 perf_evlist__config(evlist, &trace->opts); 1872 1873 signal(SIGCHLD, sig_handler); 1874 signal(SIGINT, sig_handler); 1875 1876 if (forks) { 1877 err = perf_evlist__prepare_workload(evlist, &trace->opts.target, 1878 argv, false, false); 1879 if (err < 0) { 1880 fprintf(trace->output, "Couldn't run the workload!\n"); 1881 goto out_delete_maps; 1882 } 1883 } 1884 1885 err = perf_evlist__open(evlist); 1886 if (err < 0) 1887 goto out_error_open; 1888 1889 err = perf_evlist__mmap(evlist, UINT_MAX, false); 1890 if (err < 0) { 1891 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno)); 1892 goto out_close_evlist; 1893 } 1894 1895 perf_evlist__enable(evlist); 1896 1897 if (forks) 1898 perf_evlist__start_workload(evlist); 1899 1900 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1; 1901 again: 1902 before = trace->nr_events; 1903 1904 for (i = 0; i < evlist->nr_mmaps; i++) { 1905 union perf_event *event; 1906 1907 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { 1908 const u32 type = event->header.type; 1909 tracepoint_handler handler; 1910 struct perf_sample sample; 1911 1912 ++trace->nr_events; 1913 1914 err = perf_evlist__parse_sample(evlist, event, &sample); 1915 if (err) { 1916 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err); 1917 goto next_event; 1918 } 1919 1920 if (!trace->full_time && trace->base_time == 0) 1921 trace->base_time = sample.time; 1922 1923 if (type != PERF_RECORD_SAMPLE) { 1924 trace__process_event(trace, trace->host, event, &sample); 1925 continue; 1926 } 1927 1928 evsel = perf_evlist__id2evsel(evlist, sample.id); 1929 if (evsel == NULL) { 1930 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id); 1931 goto next_event; 1932 } 1933 1934 if (sample.raw_data == NULL) { 1935 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", 1936 perf_evsel__name(evsel), sample.tid, 1937 sample.cpu, sample.raw_size); 1938 goto next_event; 1939 } 1940 1941 handler = evsel->handler; 1942 handler(trace, evsel, &sample); 1943 next_event: 1944 perf_evlist__mmap_consume(evlist, i); 1945 1946 if (interrupted) 1947 goto out_disable; 1948 } 1949 } 1950 1951 if (trace->nr_events == before) { 1952 int timeout = done ? 100 : -1; 1953 1954 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0) 1955 goto again; 1956 } else { 1957 goto again; 1958 } 1959 1960 out_disable: 1961 perf_evlist__disable(evlist); 1962 1963 if (!err) { 1964 if (trace->summary) 1965 trace__fprintf_thread_summary(trace, trace->output); 1966 1967 if (trace->show_tool_stats) { 1968 fprintf(trace->output, "Stats:\n " 1969 " vfs_getname : %" PRIu64 "\n" 1970 " proc_getname: %" PRIu64 "\n", 1971 trace->stats.vfs_getname, 1972 trace->stats.proc_getname); 1973 } 1974 } 1975 1976 perf_evlist__munmap(evlist); 1977 out_close_evlist: 1978 perf_evlist__close(evlist); 1979 out_delete_maps: 1980 perf_evlist__delete_maps(evlist); 1981 out_delete_evlist: 1982 perf_evlist__delete(evlist); 1983 out: 1984 trace->live = false; 1985 return err; 1986 { 1987 char errbuf[BUFSIZ]; 1988 1989 out_error_tp: 1990 perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf)); 1991 goto out_error; 1992 1993 out_error_open: 1994 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf)); 1995 1996 out_error: 1997 fprintf(trace->output, "%s\n", errbuf); 1998 goto out_delete_evlist; 1999 } 2000 } 2001 2002 static int trace__replay(struct trace *trace) 2003 { 2004 const struct perf_evsel_str_handler handlers[] = { 2005 { "probe:vfs_getname", trace__vfs_getname, }, 2006 }; 2007 struct perf_data_file file = { 2008 .path = input_name, 2009 .mode = PERF_DATA_MODE_READ, 2010 }; 2011 struct perf_session *session; 2012 struct perf_evsel *evsel; 2013 int err = -1; 2014 2015 trace->tool.sample = trace__process_sample; 2016 trace->tool.mmap = perf_event__process_mmap; 2017 trace->tool.mmap2 = perf_event__process_mmap2; 2018 trace->tool.comm = perf_event__process_comm; 2019 trace->tool.exit = perf_event__process_exit; 2020 trace->tool.fork = perf_event__process_fork; 2021 trace->tool.attr = perf_event__process_attr; 2022 trace->tool.tracing_data = perf_event__process_tracing_data; 2023 trace->tool.build_id = perf_event__process_build_id; 2024 2025 trace->tool.ordered_samples = true; 2026 trace->tool.ordering_requires_timestamps = true; 2027 2028 /* add tid to output */ 2029 trace->multiple_threads = true; 2030 2031 if (symbol__init() < 0) 2032 return -1; 2033 2034 session = perf_session__new(&file, false, &trace->tool); 2035 if (session == NULL) 2036 return -ENOMEM; 2037 2038 trace->host = &session->machines.host; 2039 2040 err = perf_session__set_tracepoints_handlers(session, handlers); 2041 if (err) 2042 goto out; 2043 2044 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2045 "raw_syscalls:sys_enter"); 2046 if (evsel == NULL) { 2047 pr_err("Data file does not have raw_syscalls:sys_enter event\n"); 2048 goto out; 2049 } 2050 2051 if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 || 2052 perf_evsel__init_sc_tp_ptr_field(evsel, args)) { 2053 pr_err("Error during initialize raw_syscalls:sys_enter event\n"); 2054 goto out; 2055 } 2056 2057 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2058 "raw_syscalls:sys_exit"); 2059 if (evsel == NULL) { 2060 pr_err("Data file does not have raw_syscalls:sys_exit event\n"); 2061 goto out; 2062 } 2063 2064 if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 || 2065 perf_evsel__init_sc_tp_uint_field(evsel, ret)) { 2066 pr_err("Error during initialize raw_syscalls:sys_exit event\n"); 2067 goto out; 2068 } 2069 2070 err = parse_target_str(trace); 2071 if (err != 0) 2072 goto out; 2073 2074 setup_pager(); 2075 2076 err = perf_session__process_events(session, &trace->tool); 2077 if (err) 2078 pr_err("Failed to process events, error %d", err); 2079 2080 else if (trace->summary) 2081 trace__fprintf_thread_summary(trace, trace->output); 2082 2083 out: 2084 perf_session__delete(session); 2085 2086 return err; 2087 } 2088 2089 static size_t trace__fprintf_threads_header(FILE *fp) 2090 { 2091 size_t printed; 2092 2093 printed = fprintf(fp, "\n Summary of events:\n\n"); 2094 2095 return printed; 2096 } 2097 2098 static size_t thread__dump_stats(struct thread_trace *ttrace, 2099 struct trace *trace, FILE *fp) 2100 { 2101 struct stats *stats; 2102 size_t printed = 0; 2103 struct syscall *sc; 2104 struct int_node *inode = intlist__first(ttrace->syscall_stats); 2105 2106 if (inode == NULL) 2107 return 0; 2108 2109 printed += fprintf(fp, "\n"); 2110 2111 printed += fprintf(fp, " msec/call\n"); 2112 printed += fprintf(fp, " syscall calls min avg max stddev\n"); 2113 printed += fprintf(fp, " --------------- -------- -------- -------- -------- ------\n"); 2114 2115 /* each int_node is a syscall */ 2116 while (inode) { 2117 stats = inode->priv; 2118 if (stats) { 2119 double min = (double)(stats->min) / NSEC_PER_MSEC; 2120 double max = (double)(stats->max) / NSEC_PER_MSEC; 2121 double avg = avg_stats(stats); 2122 double pct; 2123 u64 n = (u64) stats->n; 2124 2125 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0; 2126 avg /= NSEC_PER_MSEC; 2127 2128 sc = &trace->syscalls.table[inode->i]; 2129 printed += fprintf(fp, " %-15s", sc->name); 2130 printed += fprintf(fp, " %8" PRIu64 " %8.3f %8.3f", 2131 n, min, avg); 2132 printed += fprintf(fp, " %8.3f %6.2f\n", max, pct); 2133 } 2134 2135 inode = intlist__next(inode); 2136 } 2137 2138 printed += fprintf(fp, "\n\n"); 2139 2140 return printed; 2141 } 2142 2143 /* struct used to pass data to per-thread function */ 2144 struct summary_data { 2145 FILE *fp; 2146 struct trace *trace; 2147 size_t printed; 2148 }; 2149 2150 static int trace__fprintf_one_thread(struct thread *thread, void *priv) 2151 { 2152 struct summary_data *data = priv; 2153 FILE *fp = data->fp; 2154 size_t printed = data->printed; 2155 struct trace *trace = data->trace; 2156 struct thread_trace *ttrace = thread->priv; 2157 const char *color; 2158 double ratio; 2159 2160 if (ttrace == NULL) 2161 return 0; 2162 2163 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0; 2164 2165 color = PERF_COLOR_NORMAL; 2166 if (ratio > 50.0) 2167 color = PERF_COLOR_RED; 2168 else if (ratio > 25.0) 2169 color = PERF_COLOR_GREEN; 2170 else if (ratio > 5.0) 2171 color = PERF_COLOR_YELLOW; 2172 2173 printed += color_fprintf(fp, color, " %s (%d), ", thread__comm_str(thread), thread->tid); 2174 printed += fprintf(fp, "%lu events, ", ttrace->nr_events); 2175 printed += color_fprintf(fp, color, "%.1f%%", ratio); 2176 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms); 2177 printed += thread__dump_stats(ttrace, trace, fp); 2178 2179 data->printed += printed; 2180 2181 return 0; 2182 } 2183 2184 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp) 2185 { 2186 struct summary_data data = { 2187 .fp = fp, 2188 .trace = trace 2189 }; 2190 data.printed = trace__fprintf_threads_header(fp); 2191 2192 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data); 2193 2194 return data.printed; 2195 } 2196 2197 static int trace__set_duration(const struct option *opt, const char *str, 2198 int unset __maybe_unused) 2199 { 2200 struct trace *trace = opt->value; 2201 2202 trace->duration_filter = atof(str); 2203 return 0; 2204 } 2205 2206 static int trace__open_output(struct trace *trace, const char *filename) 2207 { 2208 struct stat st; 2209 2210 if (!stat(filename, &st) && st.st_size) { 2211 char oldname[PATH_MAX]; 2212 2213 scnprintf(oldname, sizeof(oldname), "%s.old", filename); 2214 unlink(oldname); 2215 rename(filename, oldname); 2216 } 2217 2218 trace->output = fopen(filename, "w"); 2219 2220 return trace->output == NULL ? -errno : 0; 2221 } 2222 2223 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) 2224 { 2225 const char * const trace_usage[] = { 2226 "perf trace [<options>] [<command>]", 2227 "perf trace [<options>] -- <command> [<options>]", 2228 "perf trace record [<options>] [<command>]", 2229 "perf trace record [<options>] -- <command> [<options>]", 2230 NULL 2231 }; 2232 struct trace trace = { 2233 .audit = { 2234 .machine = audit_detect_machine(), 2235 .open_id = audit_name_to_syscall("open", trace.audit.machine), 2236 }, 2237 .syscalls = { 2238 . max = -1, 2239 }, 2240 .opts = { 2241 .target = { 2242 .uid = UINT_MAX, 2243 .uses_mmap = true, 2244 }, 2245 .user_freq = UINT_MAX, 2246 .user_interval = ULLONG_MAX, 2247 .no_delay = true, 2248 .mmap_pages = 1024, 2249 }, 2250 .output = stdout, 2251 .show_comm = true, 2252 }; 2253 const char *output_name = NULL; 2254 const char *ev_qualifier_str = NULL; 2255 const struct option trace_options[] = { 2256 OPT_BOOLEAN(0, "comm", &trace.show_comm, 2257 "show the thread COMM next to its id"), 2258 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"), 2259 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", 2260 "list of events to trace"), 2261 OPT_STRING('o', "output", &output_name, "file", "output file name"), 2262 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"), 2263 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid", 2264 "trace events on existing process id"), 2265 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid", 2266 "trace events on existing thread id"), 2267 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide, 2268 "system-wide collection from all CPUs"), 2269 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu", 2270 "list of cpus to monitor"), 2271 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit, 2272 "child tasks do not inherit counters"), 2273 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages", 2274 "number of mmap data pages", 2275 perf_evlist__parse_mmap_pages), 2276 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user", 2277 "user to profile"), 2278 OPT_CALLBACK(0, "duration", &trace, "float", 2279 "show only events with duration > N.M ms", 2280 trace__set_duration), 2281 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"), 2282 OPT_INCR('v', "verbose", &verbose, "be more verbose"), 2283 OPT_BOOLEAN('T', "time", &trace.full_time, 2284 "Show full timestamp, not time relative to first start"), 2285 OPT_BOOLEAN(0, "summary", &trace.summary, 2286 "Show syscall summary with statistics"), 2287 OPT_END() 2288 }; 2289 int err; 2290 char bf[BUFSIZ]; 2291 2292 if ((argc > 1) && (strcmp(argv[1], "record") == 0)) 2293 return trace__record(argc-2, &argv[2]); 2294 2295 argc = parse_options(argc, argv, trace_options, trace_usage, 0); 2296 2297 if (output_name != NULL) { 2298 err = trace__open_output(&trace, output_name); 2299 if (err < 0) { 2300 perror("failed to create output file"); 2301 goto out; 2302 } 2303 } 2304 2305 if (ev_qualifier_str != NULL) { 2306 const char *s = ev_qualifier_str; 2307 2308 trace.not_ev_qualifier = *s == '!'; 2309 if (trace.not_ev_qualifier) 2310 ++s; 2311 trace.ev_qualifier = strlist__new(true, s); 2312 if (trace.ev_qualifier == NULL) { 2313 fputs("Not enough memory to parse event qualifier", 2314 trace.output); 2315 err = -ENOMEM; 2316 goto out_close; 2317 } 2318 } 2319 2320 err = perf_target__validate(&trace.opts.target); 2321 if (err) { 2322 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf)); 2323 fprintf(trace.output, "%s", bf); 2324 goto out_close; 2325 } 2326 2327 err = perf_target__parse_uid(&trace.opts.target); 2328 if (err) { 2329 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf)); 2330 fprintf(trace.output, "%s", bf); 2331 goto out_close; 2332 } 2333 2334 if (!argc && perf_target__none(&trace.opts.target)) 2335 trace.opts.target.system_wide = true; 2336 2337 if (input_name) 2338 err = trace__replay(&trace); 2339 else 2340 err = trace__run(&trace, argc, argv); 2341 2342 out_close: 2343 if (output_name != NULL) 2344 fclose(trace.output); 2345 out: 2346 return err; 2347 } 2348