1 #include <traceevent/event-parse.h> 2 #include "builtin.h" 3 #include "util/color.h" 4 #include "util/debug.h" 5 #include "util/evlist.h" 6 #include "util/machine.h" 7 #include "util/session.h" 8 #include "util/thread.h" 9 #include "util/parse-options.h" 10 #include "util/strlist.h" 11 #include "util/intlist.h" 12 #include "util/thread_map.h" 13 #include "util/stat.h" 14 #include "trace-event.h" 15 #include "util/parse-events.h" 16 17 #include <libaudit.h> 18 #include <stdlib.h> 19 #include <sys/eventfd.h> 20 #include <sys/mman.h> 21 #include <linux/futex.h> 22 23 /* For older distros: */ 24 #ifndef MAP_STACK 25 # define MAP_STACK 0x20000 26 #endif 27 28 #ifndef MADV_HWPOISON 29 # define MADV_HWPOISON 100 30 #endif 31 32 #ifndef MADV_MERGEABLE 33 # define MADV_MERGEABLE 12 34 #endif 35 36 #ifndef MADV_UNMERGEABLE 37 # define MADV_UNMERGEABLE 13 38 #endif 39 40 #ifndef EFD_SEMAPHORE 41 # define EFD_SEMAPHORE 1 42 #endif 43 44 struct tp_field { 45 int offset; 46 union { 47 u64 (*integer)(struct tp_field *field, struct perf_sample *sample); 48 void *(*pointer)(struct tp_field *field, struct perf_sample *sample); 49 }; 50 }; 51 52 #define TP_UINT_FIELD(bits) \ 53 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \ 54 { \ 55 return *(u##bits *)(sample->raw_data + field->offset); \ 56 } 57 58 TP_UINT_FIELD(8); 59 TP_UINT_FIELD(16); 60 TP_UINT_FIELD(32); 61 TP_UINT_FIELD(64); 62 63 #define TP_UINT_FIELD__SWAPPED(bits) \ 64 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \ 65 { \ 66 u##bits value = *(u##bits *)(sample->raw_data + field->offset); \ 67 return bswap_##bits(value);\ 68 } 69 70 TP_UINT_FIELD__SWAPPED(16); 71 TP_UINT_FIELD__SWAPPED(32); 72 TP_UINT_FIELD__SWAPPED(64); 73 74 static int tp_field__init_uint(struct tp_field *field, 75 struct format_field *format_field, 76 bool needs_swap) 77 { 78 field->offset = format_field->offset; 79 80 switch (format_field->size) { 81 case 1: 82 field->integer = tp_field__u8; 83 break; 84 case 2: 85 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16; 86 break; 87 case 4: 88 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32; 89 break; 90 case 8: 91 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64; 92 break; 93 default: 94 return -1; 95 } 96 97 return 0; 98 } 99 100 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample) 101 { 102 return sample->raw_data + field->offset; 103 } 104 105 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field) 106 { 107 field->offset = format_field->offset; 108 field->pointer = tp_field__ptr; 109 return 0; 110 } 111 112 struct syscall_tp { 113 struct tp_field id; 114 union { 115 struct tp_field args, ret; 116 }; 117 }; 118 119 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel, 120 struct tp_field *field, 121 const char *name) 122 { 123 struct format_field *format_field = perf_evsel__field(evsel, name); 124 125 if (format_field == NULL) 126 return -1; 127 128 return tp_field__init_uint(field, format_field, evsel->needs_swap); 129 } 130 131 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \ 132 ({ struct syscall_tp *sc = evsel->priv;\ 133 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); }) 134 135 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel, 136 struct tp_field *field, 137 const char *name) 138 { 139 struct format_field *format_field = perf_evsel__field(evsel, name); 140 141 if (format_field == NULL) 142 return -1; 143 144 return tp_field__init_ptr(field, format_field); 145 } 146 147 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \ 148 ({ struct syscall_tp *sc = evsel->priv;\ 149 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); }) 150 151 static void perf_evsel__delete_priv(struct perf_evsel *evsel) 152 { 153 zfree(&evsel->priv); 154 perf_evsel__delete(evsel); 155 } 156 157 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler) 158 { 159 evsel->priv = malloc(sizeof(struct syscall_tp)); 160 if (evsel->priv != NULL) { 161 if (perf_evsel__init_sc_tp_uint_field(evsel, id)) 162 goto out_delete; 163 164 evsel->handler = handler; 165 return 0; 166 } 167 168 return -ENOMEM; 169 170 out_delete: 171 zfree(&evsel->priv); 172 return -ENOENT; 173 } 174 175 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler) 176 { 177 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction); 178 179 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */ 180 if (evsel == NULL) 181 evsel = perf_evsel__newtp("syscalls", direction); 182 183 if (evsel) { 184 if (perf_evsel__init_syscall_tp(evsel, handler)) 185 goto out_delete; 186 } 187 188 return evsel; 189 190 out_delete: 191 perf_evsel__delete_priv(evsel); 192 return NULL; 193 } 194 195 #define perf_evsel__sc_tp_uint(evsel, name, sample) \ 196 ({ struct syscall_tp *fields = evsel->priv; \ 197 fields->name.integer(&fields->name, sample); }) 198 199 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \ 200 ({ struct syscall_tp *fields = evsel->priv; \ 201 fields->name.pointer(&fields->name, sample); }) 202 203 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist, 204 void *sys_enter_handler, 205 void *sys_exit_handler) 206 { 207 int ret = -1; 208 struct perf_evsel *sys_enter, *sys_exit; 209 210 sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler); 211 if (sys_enter == NULL) 212 goto out; 213 214 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args)) 215 goto out_delete_sys_enter; 216 217 sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler); 218 if (sys_exit == NULL) 219 goto out_delete_sys_enter; 220 221 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret)) 222 goto out_delete_sys_exit; 223 224 perf_evlist__add(evlist, sys_enter); 225 perf_evlist__add(evlist, sys_exit); 226 227 ret = 0; 228 out: 229 return ret; 230 231 out_delete_sys_exit: 232 perf_evsel__delete_priv(sys_exit); 233 out_delete_sys_enter: 234 perf_evsel__delete_priv(sys_enter); 235 goto out; 236 } 237 238 239 struct syscall_arg { 240 unsigned long val; 241 struct thread *thread; 242 struct trace *trace; 243 void *parm; 244 u8 idx; 245 u8 mask; 246 }; 247 248 struct strarray { 249 int offset; 250 int nr_entries; 251 const char **entries; 252 }; 253 254 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \ 255 .nr_entries = ARRAY_SIZE(array), \ 256 .entries = array, \ 257 } 258 259 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \ 260 .offset = off, \ 261 .nr_entries = ARRAY_SIZE(array), \ 262 .entries = array, \ 263 } 264 265 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size, 266 const char *intfmt, 267 struct syscall_arg *arg) 268 { 269 struct strarray *sa = arg->parm; 270 int idx = arg->val - sa->offset; 271 272 if (idx < 0 || idx >= sa->nr_entries) 273 return scnprintf(bf, size, intfmt, arg->val); 274 275 return scnprintf(bf, size, "%s", sa->entries[idx]); 276 } 277 278 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size, 279 struct syscall_arg *arg) 280 { 281 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg); 282 } 283 284 #define SCA_STRARRAY syscall_arg__scnprintf_strarray 285 286 #if defined(__i386__) || defined(__x86_64__) 287 /* 288 * FIXME: Make this available to all arches as soon as the ioctl beautifier 289 * gets rewritten to support all arches. 290 */ 291 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size, 292 struct syscall_arg *arg) 293 { 294 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg); 295 } 296 297 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray 298 #endif /* defined(__i386__) || defined(__x86_64__) */ 299 300 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size, 301 struct syscall_arg *arg); 302 303 #define SCA_FD syscall_arg__scnprintf_fd 304 305 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size, 306 struct syscall_arg *arg) 307 { 308 int fd = arg->val; 309 310 if (fd == AT_FDCWD) 311 return scnprintf(bf, size, "CWD"); 312 313 return syscall_arg__scnprintf_fd(bf, size, arg); 314 } 315 316 #define SCA_FDAT syscall_arg__scnprintf_fd_at 317 318 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, 319 struct syscall_arg *arg); 320 321 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd 322 323 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size, 324 struct syscall_arg *arg) 325 { 326 return scnprintf(bf, size, "%#lx", arg->val); 327 } 328 329 #define SCA_HEX syscall_arg__scnprintf_hex 330 331 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, 332 struct syscall_arg *arg) 333 { 334 int printed = 0, prot = arg->val; 335 336 if (prot == PROT_NONE) 337 return scnprintf(bf, size, "NONE"); 338 #define P_MMAP_PROT(n) \ 339 if (prot & PROT_##n) { \ 340 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 341 prot &= ~PROT_##n; \ 342 } 343 344 P_MMAP_PROT(EXEC); 345 P_MMAP_PROT(READ); 346 P_MMAP_PROT(WRITE); 347 #ifdef PROT_SEM 348 P_MMAP_PROT(SEM); 349 #endif 350 P_MMAP_PROT(GROWSDOWN); 351 P_MMAP_PROT(GROWSUP); 352 #undef P_MMAP_PROT 353 354 if (prot) 355 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot); 356 357 return printed; 358 } 359 360 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot 361 362 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, 363 struct syscall_arg *arg) 364 { 365 int printed = 0, flags = arg->val; 366 367 #define P_MMAP_FLAG(n) \ 368 if (flags & MAP_##n) { \ 369 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 370 flags &= ~MAP_##n; \ 371 } 372 373 P_MMAP_FLAG(SHARED); 374 P_MMAP_FLAG(PRIVATE); 375 #ifdef MAP_32BIT 376 P_MMAP_FLAG(32BIT); 377 #endif 378 P_MMAP_FLAG(ANONYMOUS); 379 P_MMAP_FLAG(DENYWRITE); 380 P_MMAP_FLAG(EXECUTABLE); 381 P_MMAP_FLAG(FILE); 382 P_MMAP_FLAG(FIXED); 383 P_MMAP_FLAG(GROWSDOWN); 384 #ifdef MAP_HUGETLB 385 P_MMAP_FLAG(HUGETLB); 386 #endif 387 P_MMAP_FLAG(LOCKED); 388 P_MMAP_FLAG(NONBLOCK); 389 P_MMAP_FLAG(NORESERVE); 390 P_MMAP_FLAG(POPULATE); 391 P_MMAP_FLAG(STACK); 392 #ifdef MAP_UNINITIALIZED 393 P_MMAP_FLAG(UNINITIALIZED); 394 #endif 395 #undef P_MMAP_FLAG 396 397 if (flags) 398 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 399 400 return printed; 401 } 402 403 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags 404 405 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size, 406 struct syscall_arg *arg) 407 { 408 int behavior = arg->val; 409 410 switch (behavior) { 411 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n) 412 P_MADV_BHV(NORMAL); 413 P_MADV_BHV(RANDOM); 414 P_MADV_BHV(SEQUENTIAL); 415 P_MADV_BHV(WILLNEED); 416 P_MADV_BHV(DONTNEED); 417 P_MADV_BHV(REMOVE); 418 P_MADV_BHV(DONTFORK); 419 P_MADV_BHV(DOFORK); 420 P_MADV_BHV(HWPOISON); 421 #ifdef MADV_SOFT_OFFLINE 422 P_MADV_BHV(SOFT_OFFLINE); 423 #endif 424 P_MADV_BHV(MERGEABLE); 425 P_MADV_BHV(UNMERGEABLE); 426 #ifdef MADV_HUGEPAGE 427 P_MADV_BHV(HUGEPAGE); 428 #endif 429 #ifdef MADV_NOHUGEPAGE 430 P_MADV_BHV(NOHUGEPAGE); 431 #endif 432 #ifdef MADV_DONTDUMP 433 P_MADV_BHV(DONTDUMP); 434 #endif 435 #ifdef MADV_DODUMP 436 P_MADV_BHV(DODUMP); 437 #endif 438 #undef P_MADV_PHV 439 default: break; 440 } 441 442 return scnprintf(bf, size, "%#x", behavior); 443 } 444 445 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior 446 447 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size, 448 struct syscall_arg *arg) 449 { 450 int printed = 0, op = arg->val; 451 452 if (op == 0) 453 return scnprintf(bf, size, "NONE"); 454 #define P_CMD(cmd) \ 455 if ((op & LOCK_##cmd) == LOCK_##cmd) { \ 456 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \ 457 op &= ~LOCK_##cmd; \ 458 } 459 460 P_CMD(SH); 461 P_CMD(EX); 462 P_CMD(NB); 463 P_CMD(UN); 464 P_CMD(MAND); 465 P_CMD(RW); 466 P_CMD(READ); 467 P_CMD(WRITE); 468 #undef P_OP 469 470 if (op) 471 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op); 472 473 return printed; 474 } 475 476 #define SCA_FLOCK syscall_arg__scnprintf_flock 477 478 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg) 479 { 480 enum syscall_futex_args { 481 SCF_UADDR = (1 << 0), 482 SCF_OP = (1 << 1), 483 SCF_VAL = (1 << 2), 484 SCF_TIMEOUT = (1 << 3), 485 SCF_UADDR2 = (1 << 4), 486 SCF_VAL3 = (1 << 5), 487 }; 488 int op = arg->val; 489 int cmd = op & FUTEX_CMD_MASK; 490 size_t printed = 0; 491 492 switch (cmd) { 493 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n); 494 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break; 495 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 496 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 497 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break; 498 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break; 499 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break; 500 P_FUTEX_OP(WAKE_OP); break; 501 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 502 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 503 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break; 504 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break; 505 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break; 506 P_FUTEX_OP(WAIT_REQUEUE_PI); break; 507 default: printed = scnprintf(bf, size, "%#x", cmd); break; 508 } 509 510 if (op & FUTEX_PRIVATE_FLAG) 511 printed += scnprintf(bf + printed, size - printed, "|PRIV"); 512 513 if (op & FUTEX_CLOCK_REALTIME) 514 printed += scnprintf(bf + printed, size - printed, "|CLKRT"); 515 516 return printed; 517 } 518 519 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op 520 521 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", }; 522 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1); 523 524 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", }; 525 static DEFINE_STRARRAY(itimers); 526 527 static const char *whences[] = { "SET", "CUR", "END", 528 #ifdef SEEK_DATA 529 "DATA", 530 #endif 531 #ifdef SEEK_HOLE 532 "HOLE", 533 #endif 534 }; 535 static DEFINE_STRARRAY(whences); 536 537 static const char *fcntl_cmds[] = { 538 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK", 539 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64", 540 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX", 541 "F_GETOWNER_UIDS", 542 }; 543 static DEFINE_STRARRAY(fcntl_cmds); 544 545 static const char *rlimit_resources[] = { 546 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE", 547 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO", 548 "RTTIME", 549 }; 550 static DEFINE_STRARRAY(rlimit_resources); 551 552 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", }; 553 static DEFINE_STRARRAY(sighow); 554 555 static const char *clockid[] = { 556 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID", 557 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", 558 }; 559 static DEFINE_STRARRAY(clockid); 560 561 static const char *socket_families[] = { 562 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM", 563 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI", 564 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC", 565 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC", 566 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF", 567 "ALG", "NFC", "VSOCK", 568 }; 569 static DEFINE_STRARRAY(socket_families); 570 571 #ifndef SOCK_TYPE_MASK 572 #define SOCK_TYPE_MASK 0xf 573 #endif 574 575 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size, 576 struct syscall_arg *arg) 577 { 578 size_t printed; 579 int type = arg->val, 580 flags = type & ~SOCK_TYPE_MASK; 581 582 type &= SOCK_TYPE_MASK; 583 /* 584 * Can't use a strarray, MIPS may override for ABI reasons. 585 */ 586 switch (type) { 587 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break; 588 P_SK_TYPE(STREAM); 589 P_SK_TYPE(DGRAM); 590 P_SK_TYPE(RAW); 591 P_SK_TYPE(RDM); 592 P_SK_TYPE(SEQPACKET); 593 P_SK_TYPE(DCCP); 594 P_SK_TYPE(PACKET); 595 #undef P_SK_TYPE 596 default: 597 printed = scnprintf(bf, size, "%#x", type); 598 } 599 600 #define P_SK_FLAG(n) \ 601 if (flags & SOCK_##n) { \ 602 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \ 603 flags &= ~SOCK_##n; \ 604 } 605 606 P_SK_FLAG(CLOEXEC); 607 P_SK_FLAG(NONBLOCK); 608 #undef P_SK_FLAG 609 610 if (flags) 611 printed += scnprintf(bf + printed, size - printed, "|%#x", flags); 612 613 return printed; 614 } 615 616 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type 617 618 #ifndef MSG_PROBE 619 #define MSG_PROBE 0x10 620 #endif 621 #ifndef MSG_WAITFORONE 622 #define MSG_WAITFORONE 0x10000 623 #endif 624 #ifndef MSG_SENDPAGE_NOTLAST 625 #define MSG_SENDPAGE_NOTLAST 0x20000 626 #endif 627 #ifndef MSG_FASTOPEN 628 #define MSG_FASTOPEN 0x20000000 629 #endif 630 631 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size, 632 struct syscall_arg *arg) 633 { 634 int printed = 0, flags = arg->val; 635 636 if (flags == 0) 637 return scnprintf(bf, size, "NONE"); 638 #define P_MSG_FLAG(n) \ 639 if (flags & MSG_##n) { \ 640 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 641 flags &= ~MSG_##n; \ 642 } 643 644 P_MSG_FLAG(OOB); 645 P_MSG_FLAG(PEEK); 646 P_MSG_FLAG(DONTROUTE); 647 P_MSG_FLAG(TRYHARD); 648 P_MSG_FLAG(CTRUNC); 649 P_MSG_FLAG(PROBE); 650 P_MSG_FLAG(TRUNC); 651 P_MSG_FLAG(DONTWAIT); 652 P_MSG_FLAG(EOR); 653 P_MSG_FLAG(WAITALL); 654 P_MSG_FLAG(FIN); 655 P_MSG_FLAG(SYN); 656 P_MSG_FLAG(CONFIRM); 657 P_MSG_FLAG(RST); 658 P_MSG_FLAG(ERRQUEUE); 659 P_MSG_FLAG(NOSIGNAL); 660 P_MSG_FLAG(MORE); 661 P_MSG_FLAG(WAITFORONE); 662 P_MSG_FLAG(SENDPAGE_NOTLAST); 663 P_MSG_FLAG(FASTOPEN); 664 P_MSG_FLAG(CMSG_CLOEXEC); 665 #undef P_MSG_FLAG 666 667 if (flags) 668 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 669 670 return printed; 671 } 672 673 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags 674 675 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size, 676 struct syscall_arg *arg) 677 { 678 size_t printed = 0; 679 int mode = arg->val; 680 681 if (mode == F_OK) /* 0 */ 682 return scnprintf(bf, size, "F"); 683 #define P_MODE(n) \ 684 if (mode & n##_OK) { \ 685 printed += scnprintf(bf + printed, size - printed, "%s", #n); \ 686 mode &= ~n##_OK; \ 687 } 688 689 P_MODE(R); 690 P_MODE(W); 691 P_MODE(X); 692 #undef P_MODE 693 694 if (mode) 695 printed += scnprintf(bf + printed, size - printed, "|%#x", mode); 696 697 return printed; 698 } 699 700 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode 701 702 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, 703 struct syscall_arg *arg) 704 { 705 int printed = 0, flags = arg->val; 706 707 if (!(flags & O_CREAT)) 708 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */ 709 710 if (flags == 0) 711 return scnprintf(bf, size, "RDONLY"); 712 #define P_FLAG(n) \ 713 if (flags & O_##n) { \ 714 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 715 flags &= ~O_##n; \ 716 } 717 718 P_FLAG(APPEND); 719 P_FLAG(ASYNC); 720 P_FLAG(CLOEXEC); 721 P_FLAG(CREAT); 722 P_FLAG(DIRECT); 723 P_FLAG(DIRECTORY); 724 P_FLAG(EXCL); 725 P_FLAG(LARGEFILE); 726 P_FLAG(NOATIME); 727 P_FLAG(NOCTTY); 728 #ifdef O_NONBLOCK 729 P_FLAG(NONBLOCK); 730 #elif O_NDELAY 731 P_FLAG(NDELAY); 732 #endif 733 #ifdef O_PATH 734 P_FLAG(PATH); 735 #endif 736 P_FLAG(RDWR); 737 #ifdef O_DSYNC 738 if ((flags & O_SYNC) == O_SYNC) 739 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC"); 740 else { 741 P_FLAG(DSYNC); 742 } 743 #else 744 P_FLAG(SYNC); 745 #endif 746 P_FLAG(TRUNC); 747 P_FLAG(WRONLY); 748 #undef P_FLAG 749 750 if (flags) 751 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 752 753 return printed; 754 } 755 756 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags 757 758 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size, 759 struct syscall_arg *arg) 760 { 761 int printed = 0, flags = arg->val; 762 763 if (flags == 0) 764 return scnprintf(bf, size, "NONE"); 765 #define P_FLAG(n) \ 766 if (flags & EFD_##n) { \ 767 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 768 flags &= ~EFD_##n; \ 769 } 770 771 P_FLAG(SEMAPHORE); 772 P_FLAG(CLOEXEC); 773 P_FLAG(NONBLOCK); 774 #undef P_FLAG 775 776 if (flags) 777 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 778 779 return printed; 780 } 781 782 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags 783 784 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size, 785 struct syscall_arg *arg) 786 { 787 int printed = 0, flags = arg->val; 788 789 #define P_FLAG(n) \ 790 if (flags & O_##n) { \ 791 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 792 flags &= ~O_##n; \ 793 } 794 795 P_FLAG(CLOEXEC); 796 P_FLAG(NONBLOCK); 797 #undef P_FLAG 798 799 if (flags) 800 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 801 802 return printed; 803 } 804 805 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags 806 807 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg) 808 { 809 int sig = arg->val; 810 811 switch (sig) { 812 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n) 813 P_SIGNUM(HUP); 814 P_SIGNUM(INT); 815 P_SIGNUM(QUIT); 816 P_SIGNUM(ILL); 817 P_SIGNUM(TRAP); 818 P_SIGNUM(ABRT); 819 P_SIGNUM(BUS); 820 P_SIGNUM(FPE); 821 P_SIGNUM(KILL); 822 P_SIGNUM(USR1); 823 P_SIGNUM(SEGV); 824 P_SIGNUM(USR2); 825 P_SIGNUM(PIPE); 826 P_SIGNUM(ALRM); 827 P_SIGNUM(TERM); 828 P_SIGNUM(STKFLT); 829 P_SIGNUM(CHLD); 830 P_SIGNUM(CONT); 831 P_SIGNUM(STOP); 832 P_SIGNUM(TSTP); 833 P_SIGNUM(TTIN); 834 P_SIGNUM(TTOU); 835 P_SIGNUM(URG); 836 P_SIGNUM(XCPU); 837 P_SIGNUM(XFSZ); 838 P_SIGNUM(VTALRM); 839 P_SIGNUM(PROF); 840 P_SIGNUM(WINCH); 841 P_SIGNUM(IO); 842 P_SIGNUM(PWR); 843 P_SIGNUM(SYS); 844 default: break; 845 } 846 847 return scnprintf(bf, size, "%#x", sig); 848 } 849 850 #define SCA_SIGNUM syscall_arg__scnprintf_signum 851 852 #if defined(__i386__) || defined(__x86_64__) 853 /* 854 * FIXME: Make this available to all arches. 855 */ 856 #define TCGETS 0x5401 857 858 static const char *tioctls[] = { 859 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW", 860 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL", 861 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI", 862 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC", 863 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX", 864 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO", 865 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK", 866 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2", 867 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK", 868 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG", 869 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL", 870 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG", 871 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS", 872 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI", 873 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE", 874 }; 875 876 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401); 877 #endif /* defined(__i386__) || defined(__x86_64__) */ 878 879 #define STRARRAY(arg, name, array) \ 880 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \ 881 .arg_parm = { [arg] = &strarray__##array, } 882 883 static struct syscall_fmt { 884 const char *name; 885 const char *alias; 886 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg); 887 void *arg_parm[6]; 888 bool errmsg; 889 bool timeout; 890 bool hexret; 891 } syscall_fmts[] = { 892 { .name = "access", .errmsg = true, 893 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, }, 894 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", }, 895 { .name = "brk", .hexret = true, 896 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, }, 897 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), }, 898 { .name = "close", .errmsg = true, 899 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, 900 { .name = "connect", .errmsg = true, }, 901 { .name = "dup", .errmsg = true, 902 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 903 { .name = "dup2", .errmsg = true, 904 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 905 { .name = "dup3", .errmsg = true, 906 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 907 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), }, 908 { .name = "eventfd2", .errmsg = true, 909 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, }, 910 { .name = "faccessat", .errmsg = true, 911 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 912 { .name = "fadvise64", .errmsg = true, 913 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 914 { .name = "fallocate", .errmsg = true, 915 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 916 { .name = "fchdir", .errmsg = true, 917 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 918 { .name = "fchmod", .errmsg = true, 919 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 920 { .name = "fchmodat", .errmsg = true, 921 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 922 { .name = "fchown", .errmsg = true, 923 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 924 { .name = "fchownat", .errmsg = true, 925 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 926 { .name = "fcntl", .errmsg = true, 927 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 928 [1] = SCA_STRARRAY, /* cmd */ }, 929 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, }, 930 { .name = "fdatasync", .errmsg = true, 931 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 932 { .name = "flock", .errmsg = true, 933 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 934 [1] = SCA_FLOCK, /* cmd */ }, }, 935 { .name = "fsetxattr", .errmsg = true, 936 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 937 { .name = "fstat", .errmsg = true, .alias = "newfstat", 938 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 939 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", 940 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 941 { .name = "fstatfs", .errmsg = true, 942 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 943 { .name = "fsync", .errmsg = true, 944 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 945 { .name = "ftruncate", .errmsg = true, 946 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 947 { .name = "futex", .errmsg = true, 948 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, }, 949 { .name = "futimesat", .errmsg = true, 950 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 951 { .name = "getdents", .errmsg = true, 952 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 953 { .name = "getdents64", .errmsg = true, 954 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 955 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), }, 956 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, 957 { .name = "ioctl", .errmsg = true, 958 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 959 #if defined(__i386__) || defined(__x86_64__) 960 /* 961 * FIXME: Make this available to all arches. 962 */ 963 [1] = SCA_STRHEXARRAY, /* cmd */ 964 [2] = SCA_HEX, /* arg */ }, 965 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, }, 966 #else 967 [2] = SCA_HEX, /* arg */ }, }, 968 #endif 969 { .name = "kill", .errmsg = true, 970 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, 971 { .name = "linkat", .errmsg = true, 972 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 973 { .name = "lseek", .errmsg = true, 974 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 975 [2] = SCA_STRARRAY, /* whence */ }, 976 .arg_parm = { [2] = &strarray__whences, /* whence */ }, }, 977 { .name = "lstat", .errmsg = true, .alias = "newlstat", }, 978 { .name = "madvise", .errmsg = true, 979 .arg_scnprintf = { [0] = SCA_HEX, /* start */ 980 [2] = SCA_MADV_BHV, /* behavior */ }, }, 981 { .name = "mkdirat", .errmsg = true, 982 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 983 { .name = "mknodat", .errmsg = true, 984 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 985 { .name = "mlock", .errmsg = true, 986 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 987 { .name = "mlockall", .errmsg = true, 988 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 989 { .name = "mmap", .hexret = true, 990 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ 991 [2] = SCA_MMAP_PROT, /* prot */ 992 [3] = SCA_MMAP_FLAGS, /* flags */ 993 [4] = SCA_FD, /* fd */ }, }, 994 { .name = "mprotect", .errmsg = true, 995 .arg_scnprintf = { [0] = SCA_HEX, /* start */ 996 [2] = SCA_MMAP_PROT, /* prot */ }, }, 997 { .name = "mremap", .hexret = true, 998 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ 999 [4] = SCA_HEX, /* new_addr */ }, }, 1000 { .name = "munlock", .errmsg = true, 1001 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 1002 { .name = "munmap", .errmsg = true, 1003 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 1004 { .name = "name_to_handle_at", .errmsg = true, 1005 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1006 { .name = "newfstatat", .errmsg = true, 1007 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1008 { .name = "open", .errmsg = true, 1009 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, }, 1010 { .name = "open_by_handle_at", .errmsg = true, 1011 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ 1012 [2] = SCA_OPEN_FLAGS, /* flags */ }, }, 1013 { .name = "openat", .errmsg = true, 1014 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ 1015 [2] = SCA_OPEN_FLAGS, /* flags */ }, }, 1016 { .name = "pipe2", .errmsg = true, 1017 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, }, 1018 { .name = "poll", .errmsg = true, .timeout = true, }, 1019 { .name = "ppoll", .errmsg = true, .timeout = true, }, 1020 { .name = "pread", .errmsg = true, .alias = "pread64", 1021 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1022 { .name = "preadv", .errmsg = true, .alias = "pread", 1023 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1024 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), }, 1025 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", 1026 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1027 { .name = "pwritev", .errmsg = true, 1028 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1029 { .name = "read", .errmsg = true, 1030 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1031 { .name = "readlinkat", .errmsg = true, 1032 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1033 { .name = "readv", .errmsg = true, 1034 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1035 { .name = "recvfrom", .errmsg = true, 1036 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 1037 { .name = "recvmmsg", .errmsg = true, 1038 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 1039 { .name = "recvmsg", .errmsg = true, 1040 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, 1041 { .name = "renameat", .errmsg = true, 1042 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1043 { .name = "rt_sigaction", .errmsg = true, 1044 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, }, 1045 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), }, 1046 { .name = "rt_sigqueueinfo", .errmsg = true, 1047 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, 1048 { .name = "rt_tgsigqueueinfo", .errmsg = true, 1049 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, 1050 { .name = "select", .errmsg = true, .timeout = true, }, 1051 { .name = "sendmmsg", .errmsg = true, 1052 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 1053 { .name = "sendmsg", .errmsg = true, 1054 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, 1055 { .name = "sendto", .errmsg = true, 1056 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 1057 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), }, 1058 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, 1059 { .name = "shutdown", .errmsg = true, 1060 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1061 { .name = "socket", .errmsg = true, 1062 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ 1063 [1] = SCA_SK_TYPE, /* type */ }, 1064 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, 1065 { .name = "socketpair", .errmsg = true, 1066 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ 1067 [1] = SCA_SK_TYPE, /* type */ }, 1068 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, 1069 { .name = "stat", .errmsg = true, .alias = "newstat", }, 1070 { .name = "symlinkat", .errmsg = true, 1071 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1072 { .name = "tgkill", .errmsg = true, 1073 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, 1074 { .name = "tkill", .errmsg = true, 1075 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, 1076 { .name = "uname", .errmsg = true, .alias = "newuname", }, 1077 { .name = "unlinkat", .errmsg = true, 1078 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1079 { .name = "utimensat", .errmsg = true, 1080 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, }, 1081 { .name = "write", .errmsg = true, 1082 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1083 { .name = "writev", .errmsg = true, 1084 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1085 }; 1086 1087 static int syscall_fmt__cmp(const void *name, const void *fmtp) 1088 { 1089 const struct syscall_fmt *fmt = fmtp; 1090 return strcmp(name, fmt->name); 1091 } 1092 1093 static struct syscall_fmt *syscall_fmt__find(const char *name) 1094 { 1095 const int nmemb = ARRAY_SIZE(syscall_fmts); 1096 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp); 1097 } 1098 1099 struct syscall { 1100 struct event_format *tp_format; 1101 const char *name; 1102 bool filtered; 1103 struct syscall_fmt *fmt; 1104 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg); 1105 void **arg_parm; 1106 }; 1107 1108 static size_t fprintf_duration(unsigned long t, FILE *fp) 1109 { 1110 double duration = (double)t / NSEC_PER_MSEC; 1111 size_t printed = fprintf(fp, "("); 1112 1113 if (duration >= 1.0) 1114 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration); 1115 else if (duration >= 0.01) 1116 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration); 1117 else 1118 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration); 1119 return printed + fprintf(fp, "): "); 1120 } 1121 1122 struct thread_trace { 1123 u64 entry_time; 1124 u64 exit_time; 1125 bool entry_pending; 1126 unsigned long nr_events; 1127 char *entry_str; 1128 double runtime_ms; 1129 struct { 1130 int max; 1131 char **table; 1132 } paths; 1133 1134 struct intlist *syscall_stats; 1135 }; 1136 1137 static struct thread_trace *thread_trace__new(void) 1138 { 1139 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace)); 1140 1141 if (ttrace) 1142 ttrace->paths.max = -1; 1143 1144 ttrace->syscall_stats = intlist__new(NULL); 1145 1146 return ttrace; 1147 } 1148 1149 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp) 1150 { 1151 struct thread_trace *ttrace; 1152 1153 if (thread == NULL) 1154 goto fail; 1155 1156 if (thread->priv == NULL) 1157 thread->priv = thread_trace__new(); 1158 1159 if (thread->priv == NULL) 1160 goto fail; 1161 1162 ttrace = thread->priv; 1163 ++ttrace->nr_events; 1164 1165 return ttrace; 1166 fail: 1167 color_fprintf(fp, PERF_COLOR_RED, 1168 "WARNING: not enough memory, dropping samples!\n"); 1169 return NULL; 1170 } 1171 1172 struct trace { 1173 struct perf_tool tool; 1174 struct { 1175 int machine; 1176 int open_id; 1177 } audit; 1178 struct { 1179 int max; 1180 struct syscall *table; 1181 } syscalls; 1182 struct record_opts opts; 1183 struct machine *host; 1184 u64 base_time; 1185 FILE *output; 1186 unsigned long nr_events; 1187 struct strlist *ev_qualifier; 1188 const char *last_vfs_getname; 1189 struct intlist *tid_list; 1190 struct intlist *pid_list; 1191 double duration_filter; 1192 double runtime_ms; 1193 struct { 1194 u64 vfs_getname, 1195 proc_getname; 1196 } stats; 1197 bool not_ev_qualifier; 1198 bool live; 1199 bool full_time; 1200 bool sched; 1201 bool multiple_threads; 1202 bool summary; 1203 bool summary_only; 1204 bool show_comm; 1205 bool show_tool_stats; 1206 }; 1207 1208 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname) 1209 { 1210 struct thread_trace *ttrace = thread->priv; 1211 1212 if (fd > ttrace->paths.max) { 1213 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *)); 1214 1215 if (npath == NULL) 1216 return -1; 1217 1218 if (ttrace->paths.max != -1) { 1219 memset(npath + ttrace->paths.max + 1, 0, 1220 (fd - ttrace->paths.max) * sizeof(char *)); 1221 } else { 1222 memset(npath, 0, (fd + 1) * sizeof(char *)); 1223 } 1224 1225 ttrace->paths.table = npath; 1226 ttrace->paths.max = fd; 1227 } 1228 1229 ttrace->paths.table[fd] = strdup(pathname); 1230 1231 return ttrace->paths.table[fd] != NULL ? 0 : -1; 1232 } 1233 1234 static int thread__read_fd_path(struct thread *thread, int fd) 1235 { 1236 char linkname[PATH_MAX], pathname[PATH_MAX]; 1237 struct stat st; 1238 int ret; 1239 1240 if (thread->pid_ == thread->tid) { 1241 scnprintf(linkname, sizeof(linkname), 1242 "/proc/%d/fd/%d", thread->pid_, fd); 1243 } else { 1244 scnprintf(linkname, sizeof(linkname), 1245 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd); 1246 } 1247 1248 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname)) 1249 return -1; 1250 1251 ret = readlink(linkname, pathname, sizeof(pathname)); 1252 1253 if (ret < 0 || ret > st.st_size) 1254 return -1; 1255 1256 pathname[ret] = '\0'; 1257 return trace__set_fd_pathname(thread, fd, pathname); 1258 } 1259 1260 static const char *thread__fd_path(struct thread *thread, int fd, 1261 struct trace *trace) 1262 { 1263 struct thread_trace *ttrace = thread->priv; 1264 1265 if (ttrace == NULL) 1266 return NULL; 1267 1268 if (fd < 0) 1269 return NULL; 1270 1271 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) 1272 if (!trace->live) 1273 return NULL; 1274 ++trace->stats.proc_getname; 1275 if (thread__read_fd_path(thread, fd)) { 1276 return NULL; 1277 } 1278 1279 return ttrace->paths.table[fd]; 1280 } 1281 1282 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size, 1283 struct syscall_arg *arg) 1284 { 1285 int fd = arg->val; 1286 size_t printed = scnprintf(bf, size, "%d", fd); 1287 const char *path = thread__fd_path(arg->thread, fd, arg->trace); 1288 1289 if (path) 1290 printed += scnprintf(bf + printed, size - printed, "<%s>", path); 1291 1292 return printed; 1293 } 1294 1295 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, 1296 struct syscall_arg *arg) 1297 { 1298 int fd = arg->val; 1299 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg); 1300 struct thread_trace *ttrace = arg->thread->priv; 1301 1302 if (ttrace && fd >= 0 && fd <= ttrace->paths.max) 1303 zfree(&ttrace->paths.table[fd]); 1304 1305 return printed; 1306 } 1307 1308 static bool trace__filter_duration(struct trace *trace, double t) 1309 { 1310 return t < (trace->duration_filter * NSEC_PER_MSEC); 1311 } 1312 1313 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) 1314 { 1315 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC; 1316 1317 return fprintf(fp, "%10.3f ", ts); 1318 } 1319 1320 static bool done = false; 1321 static bool interrupted = false; 1322 1323 static void sig_handler(int sig) 1324 { 1325 done = true; 1326 interrupted = sig == SIGINT; 1327 } 1328 1329 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread, 1330 u64 duration, u64 tstamp, FILE *fp) 1331 { 1332 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp); 1333 printed += fprintf_duration(duration, fp); 1334 1335 if (trace->multiple_threads) { 1336 if (trace->show_comm) 1337 printed += fprintf(fp, "%.14s/", thread__comm_str(thread)); 1338 printed += fprintf(fp, "%d ", thread->tid); 1339 } 1340 1341 return printed; 1342 } 1343 1344 static int trace__process_event(struct trace *trace, struct machine *machine, 1345 union perf_event *event, struct perf_sample *sample) 1346 { 1347 int ret = 0; 1348 1349 switch (event->header.type) { 1350 case PERF_RECORD_LOST: 1351 color_fprintf(trace->output, PERF_COLOR_RED, 1352 "LOST %" PRIu64 " events!\n", event->lost.lost); 1353 ret = machine__process_lost_event(machine, event, sample); 1354 default: 1355 ret = machine__process_event(machine, event, sample); 1356 break; 1357 } 1358 1359 return ret; 1360 } 1361 1362 static int trace__tool_process(struct perf_tool *tool, 1363 union perf_event *event, 1364 struct perf_sample *sample, 1365 struct machine *machine) 1366 { 1367 struct trace *trace = container_of(tool, struct trace, tool); 1368 return trace__process_event(trace, machine, event, sample); 1369 } 1370 1371 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) 1372 { 1373 int err = symbol__init(); 1374 1375 if (err) 1376 return err; 1377 1378 trace->host = machine__new_host(); 1379 if (trace->host == NULL) 1380 return -ENOMEM; 1381 1382 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, 1383 evlist->threads, trace__tool_process, false); 1384 if (err) 1385 symbol__exit(); 1386 1387 return err; 1388 } 1389 1390 static int syscall__set_arg_fmts(struct syscall *sc) 1391 { 1392 struct format_field *field; 1393 int idx = 0; 1394 1395 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *)); 1396 if (sc->arg_scnprintf == NULL) 1397 return -1; 1398 1399 if (sc->fmt) 1400 sc->arg_parm = sc->fmt->arg_parm; 1401 1402 for (field = sc->tp_format->format.fields->next; field; field = field->next) { 1403 if (sc->fmt && sc->fmt->arg_scnprintf[idx]) 1404 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx]; 1405 else if (field->flags & FIELD_IS_POINTER) 1406 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex; 1407 ++idx; 1408 } 1409 1410 return 0; 1411 } 1412 1413 static int trace__read_syscall_info(struct trace *trace, int id) 1414 { 1415 char tp_name[128]; 1416 struct syscall *sc; 1417 const char *name = audit_syscall_to_name(id, trace->audit.machine); 1418 1419 if (name == NULL) 1420 return -1; 1421 1422 if (id > trace->syscalls.max) { 1423 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc)); 1424 1425 if (nsyscalls == NULL) 1426 return -1; 1427 1428 if (trace->syscalls.max != -1) { 1429 memset(nsyscalls + trace->syscalls.max + 1, 0, 1430 (id - trace->syscalls.max) * sizeof(*sc)); 1431 } else { 1432 memset(nsyscalls, 0, (id + 1) * sizeof(*sc)); 1433 } 1434 1435 trace->syscalls.table = nsyscalls; 1436 trace->syscalls.max = id; 1437 } 1438 1439 sc = trace->syscalls.table + id; 1440 sc->name = name; 1441 1442 if (trace->ev_qualifier) { 1443 bool in = strlist__find(trace->ev_qualifier, name) != NULL; 1444 1445 if (!(in ^ trace->not_ev_qualifier)) { 1446 sc->filtered = true; 1447 /* 1448 * No need to do read tracepoint information since this will be 1449 * filtered out. 1450 */ 1451 return 0; 1452 } 1453 } 1454 1455 sc->fmt = syscall_fmt__find(sc->name); 1456 1457 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); 1458 sc->tp_format = trace_event__tp_format("syscalls", tp_name); 1459 1460 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) { 1461 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias); 1462 sc->tp_format = trace_event__tp_format("syscalls", tp_name); 1463 } 1464 1465 if (sc->tp_format == NULL) 1466 return -1; 1467 1468 return syscall__set_arg_fmts(sc); 1469 } 1470 1471 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, 1472 unsigned long *args, struct trace *trace, 1473 struct thread *thread) 1474 { 1475 size_t printed = 0; 1476 1477 if (sc->tp_format != NULL) { 1478 struct format_field *field; 1479 u8 bit = 1; 1480 struct syscall_arg arg = { 1481 .idx = 0, 1482 .mask = 0, 1483 .trace = trace, 1484 .thread = thread, 1485 }; 1486 1487 for (field = sc->tp_format->format.fields->next; field; 1488 field = field->next, ++arg.idx, bit <<= 1) { 1489 if (arg.mask & bit) 1490 continue; 1491 /* 1492 * Suppress this argument if its value is zero and 1493 * and we don't have a string associated in an 1494 * strarray for it. 1495 */ 1496 if (args[arg.idx] == 0 && 1497 !(sc->arg_scnprintf && 1498 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY && 1499 sc->arg_parm[arg.idx])) 1500 continue; 1501 1502 printed += scnprintf(bf + printed, size - printed, 1503 "%s%s: ", printed ? ", " : "", field->name); 1504 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) { 1505 arg.val = args[arg.idx]; 1506 if (sc->arg_parm) 1507 arg.parm = sc->arg_parm[arg.idx]; 1508 printed += sc->arg_scnprintf[arg.idx](bf + printed, 1509 size - printed, &arg); 1510 } else { 1511 printed += scnprintf(bf + printed, size - printed, 1512 "%ld", args[arg.idx]); 1513 } 1514 } 1515 } else { 1516 int i = 0; 1517 1518 while (i < 6) { 1519 printed += scnprintf(bf + printed, size - printed, 1520 "%sarg%d: %ld", 1521 printed ? ", " : "", i, args[i]); 1522 ++i; 1523 } 1524 } 1525 1526 return printed; 1527 } 1528 1529 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel, 1530 struct perf_sample *sample); 1531 1532 static struct syscall *trace__syscall_info(struct trace *trace, 1533 struct perf_evsel *evsel, int id) 1534 { 1535 1536 if (id < 0) { 1537 1538 /* 1539 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried 1540 * before that, leaving at a higher verbosity level till that is 1541 * explained. Reproduced with plain ftrace with: 1542 * 1543 * echo 1 > /t/events/raw_syscalls/sys_exit/enable 1544 * grep "NR -1 " /t/trace_pipe 1545 * 1546 * After generating some load on the machine. 1547 */ 1548 if (verbose > 1) { 1549 static u64 n; 1550 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n", 1551 id, perf_evsel__name(evsel), ++n); 1552 } 1553 return NULL; 1554 } 1555 1556 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) && 1557 trace__read_syscall_info(trace, id)) 1558 goto out_cant_read; 1559 1560 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL)) 1561 goto out_cant_read; 1562 1563 return &trace->syscalls.table[id]; 1564 1565 out_cant_read: 1566 if (verbose) { 1567 fprintf(trace->output, "Problems reading syscall %d", id); 1568 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL) 1569 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name); 1570 fputs(" information\n", trace->output); 1571 } 1572 return NULL; 1573 } 1574 1575 static void thread__update_stats(struct thread_trace *ttrace, 1576 int id, struct perf_sample *sample) 1577 { 1578 struct int_node *inode; 1579 struct stats *stats; 1580 u64 duration = 0; 1581 1582 inode = intlist__findnew(ttrace->syscall_stats, id); 1583 if (inode == NULL) 1584 return; 1585 1586 stats = inode->priv; 1587 if (stats == NULL) { 1588 stats = malloc(sizeof(struct stats)); 1589 if (stats == NULL) 1590 return; 1591 init_stats(stats); 1592 inode->priv = stats; 1593 } 1594 1595 if (ttrace->entry_time && sample->time > ttrace->entry_time) 1596 duration = sample->time - ttrace->entry_time; 1597 1598 update_stats(stats, duration); 1599 } 1600 1601 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, 1602 struct perf_sample *sample) 1603 { 1604 char *msg; 1605 void *args; 1606 size_t printed = 0; 1607 struct thread *thread; 1608 int id = perf_evsel__sc_tp_uint(evsel, id, sample); 1609 struct syscall *sc = trace__syscall_info(trace, evsel, id); 1610 struct thread_trace *ttrace; 1611 1612 if (sc == NULL) 1613 return -1; 1614 1615 if (sc->filtered) 1616 return 0; 1617 1618 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1619 ttrace = thread__trace(thread, trace->output); 1620 if (ttrace == NULL) 1621 return -1; 1622 1623 args = perf_evsel__sc_tp_ptr(evsel, args, sample); 1624 ttrace = thread->priv; 1625 1626 if (ttrace->entry_str == NULL) { 1627 ttrace->entry_str = malloc(1024); 1628 if (!ttrace->entry_str) 1629 return -1; 1630 } 1631 1632 ttrace->entry_time = sample->time; 1633 msg = ttrace->entry_str; 1634 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name); 1635 1636 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed, 1637 args, trace, thread); 1638 1639 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) { 1640 if (!trace->duration_filter && !trace->summary_only) { 1641 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output); 1642 fprintf(trace->output, "%-70s\n", ttrace->entry_str); 1643 } 1644 } else 1645 ttrace->entry_pending = true; 1646 1647 return 0; 1648 } 1649 1650 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, 1651 struct perf_sample *sample) 1652 { 1653 int ret; 1654 u64 duration = 0; 1655 struct thread *thread; 1656 int id = perf_evsel__sc_tp_uint(evsel, id, sample); 1657 struct syscall *sc = trace__syscall_info(trace, evsel, id); 1658 struct thread_trace *ttrace; 1659 1660 if (sc == NULL) 1661 return -1; 1662 1663 if (sc->filtered) 1664 return 0; 1665 1666 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1667 ttrace = thread__trace(thread, trace->output); 1668 if (ttrace == NULL) 1669 return -1; 1670 1671 if (trace->summary) 1672 thread__update_stats(ttrace, id, sample); 1673 1674 ret = perf_evsel__sc_tp_uint(evsel, ret, sample); 1675 1676 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) { 1677 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname); 1678 trace->last_vfs_getname = NULL; 1679 ++trace->stats.vfs_getname; 1680 } 1681 1682 ttrace = thread->priv; 1683 1684 ttrace->exit_time = sample->time; 1685 1686 if (ttrace->entry_time) { 1687 duration = sample->time - ttrace->entry_time; 1688 if (trace__filter_duration(trace, duration)) 1689 goto out; 1690 } else if (trace->duration_filter) 1691 goto out; 1692 1693 if (trace->summary_only) 1694 goto out; 1695 1696 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output); 1697 1698 if (ttrace->entry_pending) { 1699 fprintf(trace->output, "%-70s", ttrace->entry_str); 1700 } else { 1701 fprintf(trace->output, " ... ["); 1702 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued"); 1703 fprintf(trace->output, "]: %s()", sc->name); 1704 } 1705 1706 if (sc->fmt == NULL) { 1707 signed_print: 1708 fprintf(trace->output, ") = %d", ret); 1709 } else if (ret < 0 && sc->fmt->errmsg) { 1710 char bf[256]; 1711 const char *emsg = strerror_r(-ret, bf, sizeof(bf)), 1712 *e = audit_errno_to_name(-ret); 1713 1714 fprintf(trace->output, ") = -1 %s %s", e, emsg); 1715 } else if (ret == 0 && sc->fmt->timeout) 1716 fprintf(trace->output, ") = 0 Timeout"); 1717 else if (sc->fmt->hexret) 1718 fprintf(trace->output, ") = %#x", ret); 1719 else 1720 goto signed_print; 1721 1722 fputc('\n', trace->output); 1723 out: 1724 ttrace->entry_pending = false; 1725 1726 return 0; 1727 } 1728 1729 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, 1730 struct perf_sample *sample) 1731 { 1732 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname"); 1733 return 0; 1734 } 1735 1736 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel, 1737 struct perf_sample *sample) 1738 { 1739 u64 runtime = perf_evsel__intval(evsel, sample, "runtime"); 1740 double runtime_ms = (double)runtime / NSEC_PER_MSEC; 1741 struct thread *thread = machine__findnew_thread(trace->host, 1742 sample->pid, 1743 sample->tid); 1744 struct thread_trace *ttrace = thread__trace(thread, trace->output); 1745 1746 if (ttrace == NULL) 1747 goto out_dump; 1748 1749 ttrace->runtime_ms += runtime_ms; 1750 trace->runtime_ms += runtime_ms; 1751 return 0; 1752 1753 out_dump: 1754 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n", 1755 evsel->name, 1756 perf_evsel__strval(evsel, sample, "comm"), 1757 (pid_t)perf_evsel__intval(evsel, sample, "pid"), 1758 runtime, 1759 perf_evsel__intval(evsel, sample, "vruntime")); 1760 return 0; 1761 } 1762 1763 static bool skip_sample(struct trace *trace, struct perf_sample *sample) 1764 { 1765 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) || 1766 (trace->tid_list && intlist__find(trace->tid_list, sample->tid))) 1767 return false; 1768 1769 if (trace->pid_list || trace->tid_list) 1770 return true; 1771 1772 return false; 1773 } 1774 1775 static int trace__process_sample(struct perf_tool *tool, 1776 union perf_event *event __maybe_unused, 1777 struct perf_sample *sample, 1778 struct perf_evsel *evsel, 1779 struct machine *machine __maybe_unused) 1780 { 1781 struct trace *trace = container_of(tool, struct trace, tool); 1782 int err = 0; 1783 1784 tracepoint_handler handler = evsel->handler; 1785 1786 if (skip_sample(trace, sample)) 1787 return 0; 1788 1789 if (!trace->full_time && trace->base_time == 0) 1790 trace->base_time = sample->time; 1791 1792 if (handler) { 1793 ++trace->nr_events; 1794 handler(trace, evsel, sample); 1795 } 1796 1797 return err; 1798 } 1799 1800 static int parse_target_str(struct trace *trace) 1801 { 1802 if (trace->opts.target.pid) { 1803 trace->pid_list = intlist__new(trace->opts.target.pid); 1804 if (trace->pid_list == NULL) { 1805 pr_err("Error parsing process id string\n"); 1806 return -EINVAL; 1807 } 1808 } 1809 1810 if (trace->opts.target.tid) { 1811 trace->tid_list = intlist__new(trace->opts.target.tid); 1812 if (trace->tid_list == NULL) { 1813 pr_err("Error parsing thread id string\n"); 1814 return -EINVAL; 1815 } 1816 } 1817 1818 return 0; 1819 } 1820 1821 static int trace__record(int argc, const char **argv) 1822 { 1823 unsigned int rec_argc, i, j; 1824 const char **rec_argv; 1825 const char * const record_args[] = { 1826 "record", 1827 "-R", 1828 "-m", "1024", 1829 "-c", "1", 1830 "-e", 1831 }; 1832 1833 /* +1 is for the event string below */ 1834 rec_argc = ARRAY_SIZE(record_args) + 1 + argc; 1835 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 1836 1837 if (rec_argv == NULL) 1838 return -ENOMEM; 1839 1840 for (i = 0; i < ARRAY_SIZE(record_args); i++) 1841 rec_argv[i] = record_args[i]; 1842 1843 /* event string may be different for older kernels - e.g., RHEL6 */ 1844 if (is_valid_tracepoint("raw_syscalls:sys_enter")) 1845 rec_argv[i] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit"; 1846 else if (is_valid_tracepoint("syscalls:sys_enter")) 1847 rec_argv[i] = "syscalls:sys_enter,syscalls:sys_exit"; 1848 else { 1849 pr_err("Neither raw_syscalls nor syscalls events exist.\n"); 1850 return -1; 1851 } 1852 i++; 1853 1854 for (j = 0; j < (unsigned int)argc; j++, i++) 1855 rec_argv[i] = argv[j]; 1856 1857 return cmd_record(i, rec_argv, NULL); 1858 } 1859 1860 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); 1861 1862 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist) 1863 { 1864 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname"); 1865 if (evsel == NULL) 1866 return; 1867 1868 if (perf_evsel__field(evsel, "pathname") == NULL) { 1869 perf_evsel__delete(evsel); 1870 return; 1871 } 1872 1873 evsel->handler = trace__vfs_getname; 1874 perf_evlist__add(evlist, evsel); 1875 } 1876 1877 static int trace__run(struct trace *trace, int argc, const char **argv) 1878 { 1879 struct perf_evlist *evlist = perf_evlist__new(); 1880 struct perf_evsel *evsel; 1881 int err = -1, i; 1882 unsigned long before; 1883 const bool forks = argc > 0; 1884 1885 trace->live = true; 1886 1887 if (evlist == NULL) { 1888 fprintf(trace->output, "Not enough memory to run!\n"); 1889 goto out; 1890 } 1891 1892 if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit)) 1893 goto out_error_tp; 1894 1895 perf_evlist__add_vfs_getname(evlist); 1896 1897 if (trace->sched && 1898 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", 1899 trace__sched_stat_runtime)) 1900 goto out_error_tp; 1901 1902 err = perf_evlist__create_maps(evlist, &trace->opts.target); 1903 if (err < 0) { 1904 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n"); 1905 goto out_delete_evlist; 1906 } 1907 1908 err = trace__symbols_init(trace, evlist); 1909 if (err < 0) { 1910 fprintf(trace->output, "Problems initializing symbol libraries!\n"); 1911 goto out_delete_evlist; 1912 } 1913 1914 perf_evlist__config(evlist, &trace->opts); 1915 1916 signal(SIGCHLD, sig_handler); 1917 signal(SIGINT, sig_handler); 1918 1919 if (forks) { 1920 err = perf_evlist__prepare_workload(evlist, &trace->opts.target, 1921 argv, false, NULL); 1922 if (err < 0) { 1923 fprintf(trace->output, "Couldn't run the workload!\n"); 1924 goto out_delete_evlist; 1925 } 1926 } 1927 1928 err = perf_evlist__open(evlist); 1929 if (err < 0) 1930 goto out_error_open; 1931 1932 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false); 1933 if (err < 0) { 1934 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno)); 1935 goto out_delete_evlist; 1936 } 1937 1938 perf_evlist__enable(evlist); 1939 1940 if (forks) 1941 perf_evlist__start_workload(evlist); 1942 1943 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1; 1944 again: 1945 before = trace->nr_events; 1946 1947 for (i = 0; i < evlist->nr_mmaps; i++) { 1948 union perf_event *event; 1949 1950 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { 1951 const u32 type = event->header.type; 1952 tracepoint_handler handler; 1953 struct perf_sample sample; 1954 1955 ++trace->nr_events; 1956 1957 err = perf_evlist__parse_sample(evlist, event, &sample); 1958 if (err) { 1959 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err); 1960 goto next_event; 1961 } 1962 1963 if (!trace->full_time && trace->base_time == 0) 1964 trace->base_time = sample.time; 1965 1966 if (type != PERF_RECORD_SAMPLE) { 1967 trace__process_event(trace, trace->host, event, &sample); 1968 continue; 1969 } 1970 1971 evsel = perf_evlist__id2evsel(evlist, sample.id); 1972 if (evsel == NULL) { 1973 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id); 1974 goto next_event; 1975 } 1976 1977 if (sample.raw_data == NULL) { 1978 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", 1979 perf_evsel__name(evsel), sample.tid, 1980 sample.cpu, sample.raw_size); 1981 goto next_event; 1982 } 1983 1984 handler = evsel->handler; 1985 handler(trace, evsel, &sample); 1986 next_event: 1987 perf_evlist__mmap_consume(evlist, i); 1988 1989 if (interrupted) 1990 goto out_disable; 1991 } 1992 } 1993 1994 if (trace->nr_events == before) { 1995 int timeout = done ? 100 : -1; 1996 1997 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0) 1998 goto again; 1999 } else { 2000 goto again; 2001 } 2002 2003 out_disable: 2004 perf_evlist__disable(evlist); 2005 2006 if (!err) { 2007 if (trace->summary) 2008 trace__fprintf_thread_summary(trace, trace->output); 2009 2010 if (trace->show_tool_stats) { 2011 fprintf(trace->output, "Stats:\n " 2012 " vfs_getname : %" PRIu64 "\n" 2013 " proc_getname: %" PRIu64 "\n", 2014 trace->stats.vfs_getname, 2015 trace->stats.proc_getname); 2016 } 2017 } 2018 2019 out_delete_evlist: 2020 perf_evlist__delete(evlist); 2021 out: 2022 trace->live = false; 2023 return err; 2024 { 2025 char errbuf[BUFSIZ]; 2026 2027 out_error_tp: 2028 perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf)); 2029 goto out_error; 2030 2031 out_error_open: 2032 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf)); 2033 2034 out_error: 2035 fprintf(trace->output, "%s\n", errbuf); 2036 goto out_delete_evlist; 2037 } 2038 } 2039 2040 static int trace__replay(struct trace *trace) 2041 { 2042 const struct perf_evsel_str_handler handlers[] = { 2043 { "probe:vfs_getname", trace__vfs_getname, }, 2044 }; 2045 struct perf_data_file file = { 2046 .path = input_name, 2047 .mode = PERF_DATA_MODE_READ, 2048 }; 2049 struct perf_session *session; 2050 struct perf_evsel *evsel; 2051 int err = -1; 2052 2053 trace->tool.sample = trace__process_sample; 2054 trace->tool.mmap = perf_event__process_mmap; 2055 trace->tool.mmap2 = perf_event__process_mmap2; 2056 trace->tool.comm = perf_event__process_comm; 2057 trace->tool.exit = perf_event__process_exit; 2058 trace->tool.fork = perf_event__process_fork; 2059 trace->tool.attr = perf_event__process_attr; 2060 trace->tool.tracing_data = perf_event__process_tracing_data; 2061 trace->tool.build_id = perf_event__process_build_id; 2062 2063 trace->tool.ordered_samples = true; 2064 trace->tool.ordering_requires_timestamps = true; 2065 2066 /* add tid to output */ 2067 trace->multiple_threads = true; 2068 2069 if (symbol__init() < 0) 2070 return -1; 2071 2072 session = perf_session__new(&file, false, &trace->tool); 2073 if (session == NULL) 2074 return -ENOMEM; 2075 2076 trace->host = &session->machines.host; 2077 2078 err = perf_session__set_tracepoints_handlers(session, handlers); 2079 if (err) 2080 goto out; 2081 2082 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2083 "raw_syscalls:sys_enter"); 2084 /* older kernels have syscalls tp versus raw_syscalls */ 2085 if (evsel == NULL) 2086 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2087 "syscalls:sys_enter"); 2088 if (evsel == NULL) { 2089 pr_err("Data file does not have raw_syscalls:sys_enter event\n"); 2090 goto out; 2091 } 2092 2093 if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 || 2094 perf_evsel__init_sc_tp_ptr_field(evsel, args)) { 2095 pr_err("Error during initialize raw_syscalls:sys_enter event\n"); 2096 goto out; 2097 } 2098 2099 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2100 "raw_syscalls:sys_exit"); 2101 if (evsel == NULL) 2102 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2103 "syscalls:sys_exit"); 2104 if (evsel == NULL) { 2105 pr_err("Data file does not have raw_syscalls:sys_exit event\n"); 2106 goto out; 2107 } 2108 2109 if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 || 2110 perf_evsel__init_sc_tp_uint_field(evsel, ret)) { 2111 pr_err("Error during initialize raw_syscalls:sys_exit event\n"); 2112 goto out; 2113 } 2114 2115 err = parse_target_str(trace); 2116 if (err != 0) 2117 goto out; 2118 2119 setup_pager(); 2120 2121 err = perf_session__process_events(session, &trace->tool); 2122 if (err) 2123 pr_err("Failed to process events, error %d", err); 2124 2125 else if (trace->summary) 2126 trace__fprintf_thread_summary(trace, trace->output); 2127 2128 out: 2129 perf_session__delete(session); 2130 2131 return err; 2132 } 2133 2134 static size_t trace__fprintf_threads_header(FILE *fp) 2135 { 2136 size_t printed; 2137 2138 printed = fprintf(fp, "\n Summary of events:\n\n"); 2139 2140 return printed; 2141 } 2142 2143 static size_t thread__dump_stats(struct thread_trace *ttrace, 2144 struct trace *trace, FILE *fp) 2145 { 2146 struct stats *stats; 2147 size_t printed = 0; 2148 struct syscall *sc; 2149 struct int_node *inode = intlist__first(ttrace->syscall_stats); 2150 2151 if (inode == NULL) 2152 return 0; 2153 2154 printed += fprintf(fp, "\n"); 2155 2156 printed += fprintf(fp, " syscall calls min avg max stddev\n"); 2157 printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n"); 2158 printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n"); 2159 2160 /* each int_node is a syscall */ 2161 while (inode) { 2162 stats = inode->priv; 2163 if (stats) { 2164 double min = (double)(stats->min) / NSEC_PER_MSEC; 2165 double max = (double)(stats->max) / NSEC_PER_MSEC; 2166 double avg = avg_stats(stats); 2167 double pct; 2168 u64 n = (u64) stats->n; 2169 2170 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0; 2171 avg /= NSEC_PER_MSEC; 2172 2173 sc = &trace->syscalls.table[inode->i]; 2174 printed += fprintf(fp, " %-15s", sc->name); 2175 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f", 2176 n, min, avg); 2177 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct); 2178 } 2179 2180 inode = intlist__next(inode); 2181 } 2182 2183 printed += fprintf(fp, "\n\n"); 2184 2185 return printed; 2186 } 2187 2188 /* struct used to pass data to per-thread function */ 2189 struct summary_data { 2190 FILE *fp; 2191 struct trace *trace; 2192 size_t printed; 2193 }; 2194 2195 static int trace__fprintf_one_thread(struct thread *thread, void *priv) 2196 { 2197 struct summary_data *data = priv; 2198 FILE *fp = data->fp; 2199 size_t printed = data->printed; 2200 struct trace *trace = data->trace; 2201 struct thread_trace *ttrace = thread->priv; 2202 double ratio; 2203 2204 if (ttrace == NULL) 2205 return 0; 2206 2207 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0; 2208 2209 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid); 2210 printed += fprintf(fp, "%lu events, ", ttrace->nr_events); 2211 printed += fprintf(fp, "%.1f%%", ratio); 2212 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms); 2213 printed += thread__dump_stats(ttrace, trace, fp); 2214 2215 data->printed += printed; 2216 2217 return 0; 2218 } 2219 2220 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp) 2221 { 2222 struct summary_data data = { 2223 .fp = fp, 2224 .trace = trace 2225 }; 2226 data.printed = trace__fprintf_threads_header(fp); 2227 2228 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data); 2229 2230 return data.printed; 2231 } 2232 2233 static int trace__set_duration(const struct option *opt, const char *str, 2234 int unset __maybe_unused) 2235 { 2236 struct trace *trace = opt->value; 2237 2238 trace->duration_filter = atof(str); 2239 return 0; 2240 } 2241 2242 static int trace__open_output(struct trace *trace, const char *filename) 2243 { 2244 struct stat st; 2245 2246 if (!stat(filename, &st) && st.st_size) { 2247 char oldname[PATH_MAX]; 2248 2249 scnprintf(oldname, sizeof(oldname), "%s.old", filename); 2250 unlink(oldname); 2251 rename(filename, oldname); 2252 } 2253 2254 trace->output = fopen(filename, "w"); 2255 2256 return trace->output == NULL ? -errno : 0; 2257 } 2258 2259 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) 2260 { 2261 const char * const trace_usage[] = { 2262 "perf trace [<options>] [<command>]", 2263 "perf trace [<options>] -- <command> [<options>]", 2264 "perf trace record [<options>] [<command>]", 2265 "perf trace record [<options>] -- <command> [<options>]", 2266 NULL 2267 }; 2268 struct trace trace = { 2269 .audit = { 2270 .machine = audit_detect_machine(), 2271 .open_id = audit_name_to_syscall("open", trace.audit.machine), 2272 }, 2273 .syscalls = { 2274 . max = -1, 2275 }, 2276 .opts = { 2277 .target = { 2278 .uid = UINT_MAX, 2279 .uses_mmap = true, 2280 }, 2281 .user_freq = UINT_MAX, 2282 .user_interval = ULLONG_MAX, 2283 .no_buffering = true, 2284 .mmap_pages = 1024, 2285 }, 2286 .output = stdout, 2287 .show_comm = true, 2288 }; 2289 const char *output_name = NULL; 2290 const char *ev_qualifier_str = NULL; 2291 const struct option trace_options[] = { 2292 OPT_BOOLEAN(0, "comm", &trace.show_comm, 2293 "show the thread COMM next to its id"), 2294 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"), 2295 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", 2296 "list of events to trace"), 2297 OPT_STRING('o', "output", &output_name, "file", "output file name"), 2298 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"), 2299 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid", 2300 "trace events on existing process id"), 2301 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid", 2302 "trace events on existing thread id"), 2303 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide, 2304 "system-wide collection from all CPUs"), 2305 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu", 2306 "list of cpus to monitor"), 2307 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit, 2308 "child tasks do not inherit counters"), 2309 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages", 2310 "number of mmap data pages", 2311 perf_evlist__parse_mmap_pages), 2312 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user", 2313 "user to profile"), 2314 OPT_CALLBACK(0, "duration", &trace, "float", 2315 "show only events with duration > N.M ms", 2316 trace__set_duration), 2317 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"), 2318 OPT_INCR('v', "verbose", &verbose, "be more verbose"), 2319 OPT_BOOLEAN('T', "time", &trace.full_time, 2320 "Show full timestamp, not time relative to first start"), 2321 OPT_BOOLEAN('s', "summary", &trace.summary_only, 2322 "Show only syscall summary with statistics"), 2323 OPT_BOOLEAN('S', "with-summary", &trace.summary, 2324 "Show all syscalls and summary with statistics"), 2325 OPT_END() 2326 }; 2327 int err; 2328 char bf[BUFSIZ]; 2329 2330 if ((argc > 1) && (strcmp(argv[1], "record") == 0)) 2331 return trace__record(argc-2, &argv[2]); 2332 2333 argc = parse_options(argc, argv, trace_options, trace_usage, 0); 2334 2335 /* summary_only implies summary option, but don't overwrite summary if set */ 2336 if (trace.summary_only) 2337 trace.summary = trace.summary_only; 2338 2339 if (output_name != NULL) { 2340 err = trace__open_output(&trace, output_name); 2341 if (err < 0) { 2342 perror("failed to create output file"); 2343 goto out; 2344 } 2345 } 2346 2347 if (ev_qualifier_str != NULL) { 2348 const char *s = ev_qualifier_str; 2349 2350 trace.not_ev_qualifier = *s == '!'; 2351 if (trace.not_ev_qualifier) 2352 ++s; 2353 trace.ev_qualifier = strlist__new(true, s); 2354 if (trace.ev_qualifier == NULL) { 2355 fputs("Not enough memory to parse event qualifier", 2356 trace.output); 2357 err = -ENOMEM; 2358 goto out_close; 2359 } 2360 } 2361 2362 err = target__validate(&trace.opts.target); 2363 if (err) { 2364 target__strerror(&trace.opts.target, err, bf, sizeof(bf)); 2365 fprintf(trace.output, "%s", bf); 2366 goto out_close; 2367 } 2368 2369 err = target__parse_uid(&trace.opts.target); 2370 if (err) { 2371 target__strerror(&trace.opts.target, err, bf, sizeof(bf)); 2372 fprintf(trace.output, "%s", bf); 2373 goto out_close; 2374 } 2375 2376 if (!argc && target__none(&trace.opts.target)) 2377 trace.opts.target.system_wide = true; 2378 2379 if (input_name) 2380 err = trace__replay(&trace); 2381 else 2382 err = trace__run(&trace, argc, argv); 2383 2384 out_close: 2385 if (output_name != NULL) 2386 fclose(trace.output); 2387 out: 2388 return err; 2389 } 2390