1 #include <traceevent/event-parse.h> 2 #include "builtin.h" 3 #include "util/color.h" 4 #include "util/debug.h" 5 #include "util/evlist.h" 6 #include "util/exec_cmd.h" 7 #include "util/machine.h" 8 #include "util/session.h" 9 #include "util/thread.h" 10 #include "util/parse-options.h" 11 #include "util/strlist.h" 12 #include "util/intlist.h" 13 #include "util/thread_map.h" 14 #include "util/stat.h" 15 #include "trace-event.h" 16 #include "util/parse-events.h" 17 18 #include <libaudit.h> 19 #include <stdlib.h> 20 #include <sys/mman.h> 21 #include <linux/futex.h> 22 23 /* For older distros: */ 24 #ifndef MAP_STACK 25 # define MAP_STACK 0x20000 26 #endif 27 28 #ifndef MADV_HWPOISON 29 # define MADV_HWPOISON 100 30 #endif 31 32 #ifndef MADV_MERGEABLE 33 # define MADV_MERGEABLE 12 34 #endif 35 36 #ifndef MADV_UNMERGEABLE 37 # define MADV_UNMERGEABLE 13 38 #endif 39 40 #ifndef EFD_SEMAPHORE 41 # define EFD_SEMAPHORE 1 42 #endif 43 44 #ifndef EFD_NONBLOCK 45 # define EFD_NONBLOCK 00004000 46 #endif 47 48 #ifndef EFD_CLOEXEC 49 # define EFD_CLOEXEC 02000000 50 #endif 51 52 #ifndef O_CLOEXEC 53 # define O_CLOEXEC 02000000 54 #endif 55 56 #ifndef SOCK_DCCP 57 # define SOCK_DCCP 6 58 #endif 59 60 #ifndef SOCK_CLOEXEC 61 # define SOCK_CLOEXEC 02000000 62 #endif 63 64 #ifndef SOCK_NONBLOCK 65 # define SOCK_NONBLOCK 00004000 66 #endif 67 68 #ifndef MSG_CMSG_CLOEXEC 69 # define MSG_CMSG_CLOEXEC 0x40000000 70 #endif 71 72 #ifndef PERF_FLAG_FD_NO_GROUP 73 # define PERF_FLAG_FD_NO_GROUP (1UL << 0) 74 #endif 75 76 #ifndef PERF_FLAG_FD_OUTPUT 77 # define PERF_FLAG_FD_OUTPUT (1UL << 1) 78 #endif 79 80 #ifndef PERF_FLAG_PID_CGROUP 81 # define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */ 82 #endif 83 84 #ifndef PERF_FLAG_FD_CLOEXEC 85 # define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ 86 #endif 87 88 89 struct tp_field { 90 int offset; 91 union { 92 u64 (*integer)(struct tp_field *field, struct perf_sample *sample); 93 void *(*pointer)(struct tp_field *field, struct perf_sample *sample); 94 }; 95 }; 96 97 #define TP_UINT_FIELD(bits) \ 98 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \ 99 { \ 100 u##bits value; \ 101 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \ 102 return value; \ 103 } 104 105 TP_UINT_FIELD(8); 106 TP_UINT_FIELD(16); 107 TP_UINT_FIELD(32); 108 TP_UINT_FIELD(64); 109 110 #define TP_UINT_FIELD__SWAPPED(bits) \ 111 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \ 112 { \ 113 u##bits value; \ 114 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \ 115 return bswap_##bits(value);\ 116 } 117 118 TP_UINT_FIELD__SWAPPED(16); 119 TP_UINT_FIELD__SWAPPED(32); 120 TP_UINT_FIELD__SWAPPED(64); 121 122 static int tp_field__init_uint(struct tp_field *field, 123 struct format_field *format_field, 124 bool needs_swap) 125 { 126 field->offset = format_field->offset; 127 128 switch (format_field->size) { 129 case 1: 130 field->integer = tp_field__u8; 131 break; 132 case 2: 133 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16; 134 break; 135 case 4: 136 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32; 137 break; 138 case 8: 139 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64; 140 break; 141 default: 142 return -1; 143 } 144 145 return 0; 146 } 147 148 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample) 149 { 150 return sample->raw_data + field->offset; 151 } 152 153 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field) 154 { 155 field->offset = format_field->offset; 156 field->pointer = tp_field__ptr; 157 return 0; 158 } 159 160 struct syscall_tp { 161 struct tp_field id; 162 union { 163 struct tp_field args, ret; 164 }; 165 }; 166 167 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel, 168 struct tp_field *field, 169 const char *name) 170 { 171 struct format_field *format_field = perf_evsel__field(evsel, name); 172 173 if (format_field == NULL) 174 return -1; 175 176 return tp_field__init_uint(field, format_field, evsel->needs_swap); 177 } 178 179 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \ 180 ({ struct syscall_tp *sc = evsel->priv;\ 181 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); }) 182 183 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel, 184 struct tp_field *field, 185 const char *name) 186 { 187 struct format_field *format_field = perf_evsel__field(evsel, name); 188 189 if (format_field == NULL) 190 return -1; 191 192 return tp_field__init_ptr(field, format_field); 193 } 194 195 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \ 196 ({ struct syscall_tp *sc = evsel->priv;\ 197 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); }) 198 199 static void perf_evsel__delete_priv(struct perf_evsel *evsel) 200 { 201 zfree(&evsel->priv); 202 perf_evsel__delete(evsel); 203 } 204 205 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler) 206 { 207 evsel->priv = malloc(sizeof(struct syscall_tp)); 208 if (evsel->priv != NULL) { 209 if (perf_evsel__init_sc_tp_uint_field(evsel, id)) 210 goto out_delete; 211 212 evsel->handler = handler; 213 return 0; 214 } 215 216 return -ENOMEM; 217 218 out_delete: 219 zfree(&evsel->priv); 220 return -ENOENT; 221 } 222 223 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler) 224 { 225 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction); 226 227 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */ 228 if (evsel == NULL) 229 evsel = perf_evsel__newtp("syscalls", direction); 230 231 if (evsel) { 232 if (perf_evsel__init_syscall_tp(evsel, handler)) 233 goto out_delete; 234 } 235 236 return evsel; 237 238 out_delete: 239 perf_evsel__delete_priv(evsel); 240 return NULL; 241 } 242 243 #define perf_evsel__sc_tp_uint(evsel, name, sample) \ 244 ({ struct syscall_tp *fields = evsel->priv; \ 245 fields->name.integer(&fields->name, sample); }) 246 247 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \ 248 ({ struct syscall_tp *fields = evsel->priv; \ 249 fields->name.pointer(&fields->name, sample); }) 250 251 struct syscall_arg { 252 unsigned long val; 253 struct thread *thread; 254 struct trace *trace; 255 void *parm; 256 u8 idx; 257 u8 mask; 258 }; 259 260 struct strarray { 261 int offset; 262 int nr_entries; 263 const char **entries; 264 }; 265 266 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \ 267 .nr_entries = ARRAY_SIZE(array), \ 268 .entries = array, \ 269 } 270 271 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \ 272 .offset = off, \ 273 .nr_entries = ARRAY_SIZE(array), \ 274 .entries = array, \ 275 } 276 277 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size, 278 const char *intfmt, 279 struct syscall_arg *arg) 280 { 281 struct strarray *sa = arg->parm; 282 int idx = arg->val - sa->offset; 283 284 if (idx < 0 || idx >= sa->nr_entries) 285 return scnprintf(bf, size, intfmt, arg->val); 286 287 return scnprintf(bf, size, "%s", sa->entries[idx]); 288 } 289 290 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size, 291 struct syscall_arg *arg) 292 { 293 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg); 294 } 295 296 #define SCA_STRARRAY syscall_arg__scnprintf_strarray 297 298 #if defined(__i386__) || defined(__x86_64__) 299 /* 300 * FIXME: Make this available to all arches as soon as the ioctl beautifier 301 * gets rewritten to support all arches. 302 */ 303 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size, 304 struct syscall_arg *arg) 305 { 306 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg); 307 } 308 309 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray 310 #endif /* defined(__i386__) || defined(__x86_64__) */ 311 312 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size, 313 struct syscall_arg *arg); 314 315 #define SCA_FD syscall_arg__scnprintf_fd 316 317 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size, 318 struct syscall_arg *arg) 319 { 320 int fd = arg->val; 321 322 if (fd == AT_FDCWD) 323 return scnprintf(bf, size, "CWD"); 324 325 return syscall_arg__scnprintf_fd(bf, size, arg); 326 } 327 328 #define SCA_FDAT syscall_arg__scnprintf_fd_at 329 330 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, 331 struct syscall_arg *arg); 332 333 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd 334 335 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size, 336 struct syscall_arg *arg) 337 { 338 return scnprintf(bf, size, "%#lx", arg->val); 339 } 340 341 #define SCA_HEX syscall_arg__scnprintf_hex 342 343 static size_t syscall_arg__scnprintf_int(char *bf, size_t size, 344 struct syscall_arg *arg) 345 { 346 return scnprintf(bf, size, "%d", arg->val); 347 } 348 349 #define SCA_INT syscall_arg__scnprintf_int 350 351 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, 352 struct syscall_arg *arg) 353 { 354 int printed = 0, prot = arg->val; 355 356 if (prot == PROT_NONE) 357 return scnprintf(bf, size, "NONE"); 358 #define P_MMAP_PROT(n) \ 359 if (prot & PROT_##n) { \ 360 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 361 prot &= ~PROT_##n; \ 362 } 363 364 P_MMAP_PROT(EXEC); 365 P_MMAP_PROT(READ); 366 P_MMAP_PROT(WRITE); 367 #ifdef PROT_SEM 368 P_MMAP_PROT(SEM); 369 #endif 370 P_MMAP_PROT(GROWSDOWN); 371 P_MMAP_PROT(GROWSUP); 372 #undef P_MMAP_PROT 373 374 if (prot) 375 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot); 376 377 return printed; 378 } 379 380 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot 381 382 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, 383 struct syscall_arg *arg) 384 { 385 int printed = 0, flags = arg->val; 386 387 #define P_MMAP_FLAG(n) \ 388 if (flags & MAP_##n) { \ 389 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 390 flags &= ~MAP_##n; \ 391 } 392 393 P_MMAP_FLAG(SHARED); 394 P_MMAP_FLAG(PRIVATE); 395 #ifdef MAP_32BIT 396 P_MMAP_FLAG(32BIT); 397 #endif 398 P_MMAP_FLAG(ANONYMOUS); 399 P_MMAP_FLAG(DENYWRITE); 400 P_MMAP_FLAG(EXECUTABLE); 401 P_MMAP_FLAG(FILE); 402 P_MMAP_FLAG(FIXED); 403 P_MMAP_FLAG(GROWSDOWN); 404 #ifdef MAP_HUGETLB 405 P_MMAP_FLAG(HUGETLB); 406 #endif 407 P_MMAP_FLAG(LOCKED); 408 P_MMAP_FLAG(NONBLOCK); 409 P_MMAP_FLAG(NORESERVE); 410 P_MMAP_FLAG(POPULATE); 411 P_MMAP_FLAG(STACK); 412 #ifdef MAP_UNINITIALIZED 413 P_MMAP_FLAG(UNINITIALIZED); 414 #endif 415 #undef P_MMAP_FLAG 416 417 if (flags) 418 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 419 420 return printed; 421 } 422 423 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags 424 425 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size, 426 struct syscall_arg *arg) 427 { 428 int printed = 0, flags = arg->val; 429 430 #define P_MREMAP_FLAG(n) \ 431 if (flags & MREMAP_##n) { \ 432 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 433 flags &= ~MREMAP_##n; \ 434 } 435 436 P_MREMAP_FLAG(MAYMOVE); 437 #ifdef MREMAP_FIXED 438 P_MREMAP_FLAG(FIXED); 439 #endif 440 #undef P_MREMAP_FLAG 441 442 if (flags) 443 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 444 445 return printed; 446 } 447 448 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags 449 450 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size, 451 struct syscall_arg *arg) 452 { 453 int behavior = arg->val; 454 455 switch (behavior) { 456 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n) 457 P_MADV_BHV(NORMAL); 458 P_MADV_BHV(RANDOM); 459 P_MADV_BHV(SEQUENTIAL); 460 P_MADV_BHV(WILLNEED); 461 P_MADV_BHV(DONTNEED); 462 P_MADV_BHV(REMOVE); 463 P_MADV_BHV(DONTFORK); 464 P_MADV_BHV(DOFORK); 465 P_MADV_BHV(HWPOISON); 466 #ifdef MADV_SOFT_OFFLINE 467 P_MADV_BHV(SOFT_OFFLINE); 468 #endif 469 P_MADV_BHV(MERGEABLE); 470 P_MADV_BHV(UNMERGEABLE); 471 #ifdef MADV_HUGEPAGE 472 P_MADV_BHV(HUGEPAGE); 473 #endif 474 #ifdef MADV_NOHUGEPAGE 475 P_MADV_BHV(NOHUGEPAGE); 476 #endif 477 #ifdef MADV_DONTDUMP 478 P_MADV_BHV(DONTDUMP); 479 #endif 480 #ifdef MADV_DODUMP 481 P_MADV_BHV(DODUMP); 482 #endif 483 #undef P_MADV_PHV 484 default: break; 485 } 486 487 return scnprintf(bf, size, "%#x", behavior); 488 } 489 490 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior 491 492 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size, 493 struct syscall_arg *arg) 494 { 495 int printed = 0, op = arg->val; 496 497 if (op == 0) 498 return scnprintf(bf, size, "NONE"); 499 #define P_CMD(cmd) \ 500 if ((op & LOCK_##cmd) == LOCK_##cmd) { \ 501 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \ 502 op &= ~LOCK_##cmd; \ 503 } 504 505 P_CMD(SH); 506 P_CMD(EX); 507 P_CMD(NB); 508 P_CMD(UN); 509 P_CMD(MAND); 510 P_CMD(RW); 511 P_CMD(READ); 512 P_CMD(WRITE); 513 #undef P_OP 514 515 if (op) 516 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op); 517 518 return printed; 519 } 520 521 #define SCA_FLOCK syscall_arg__scnprintf_flock 522 523 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg) 524 { 525 enum syscall_futex_args { 526 SCF_UADDR = (1 << 0), 527 SCF_OP = (1 << 1), 528 SCF_VAL = (1 << 2), 529 SCF_TIMEOUT = (1 << 3), 530 SCF_UADDR2 = (1 << 4), 531 SCF_VAL3 = (1 << 5), 532 }; 533 int op = arg->val; 534 int cmd = op & FUTEX_CMD_MASK; 535 size_t printed = 0; 536 537 switch (cmd) { 538 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n); 539 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break; 540 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 541 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 542 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break; 543 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break; 544 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break; 545 P_FUTEX_OP(WAKE_OP); break; 546 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 547 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 548 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break; 549 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break; 550 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break; 551 P_FUTEX_OP(WAIT_REQUEUE_PI); break; 552 default: printed = scnprintf(bf, size, "%#x", cmd); break; 553 } 554 555 if (op & FUTEX_PRIVATE_FLAG) 556 printed += scnprintf(bf + printed, size - printed, "|PRIV"); 557 558 if (op & FUTEX_CLOCK_REALTIME) 559 printed += scnprintf(bf + printed, size - printed, "|CLKRT"); 560 561 return printed; 562 } 563 564 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op 565 566 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", }; 567 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1); 568 569 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", }; 570 static DEFINE_STRARRAY(itimers); 571 572 static const char *whences[] = { "SET", "CUR", "END", 573 #ifdef SEEK_DATA 574 "DATA", 575 #endif 576 #ifdef SEEK_HOLE 577 "HOLE", 578 #endif 579 }; 580 static DEFINE_STRARRAY(whences); 581 582 static const char *fcntl_cmds[] = { 583 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK", 584 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64", 585 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX", 586 "F_GETOWNER_UIDS", 587 }; 588 static DEFINE_STRARRAY(fcntl_cmds); 589 590 static const char *rlimit_resources[] = { 591 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE", 592 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO", 593 "RTTIME", 594 }; 595 static DEFINE_STRARRAY(rlimit_resources); 596 597 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", }; 598 static DEFINE_STRARRAY(sighow); 599 600 static const char *clockid[] = { 601 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID", 602 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", 603 }; 604 static DEFINE_STRARRAY(clockid); 605 606 static const char *socket_families[] = { 607 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM", 608 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI", 609 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC", 610 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC", 611 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF", 612 "ALG", "NFC", "VSOCK", 613 }; 614 static DEFINE_STRARRAY(socket_families); 615 616 #ifndef SOCK_TYPE_MASK 617 #define SOCK_TYPE_MASK 0xf 618 #endif 619 620 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size, 621 struct syscall_arg *arg) 622 { 623 size_t printed; 624 int type = arg->val, 625 flags = type & ~SOCK_TYPE_MASK; 626 627 type &= SOCK_TYPE_MASK; 628 /* 629 * Can't use a strarray, MIPS may override for ABI reasons. 630 */ 631 switch (type) { 632 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break; 633 P_SK_TYPE(STREAM); 634 P_SK_TYPE(DGRAM); 635 P_SK_TYPE(RAW); 636 P_SK_TYPE(RDM); 637 P_SK_TYPE(SEQPACKET); 638 P_SK_TYPE(DCCP); 639 P_SK_TYPE(PACKET); 640 #undef P_SK_TYPE 641 default: 642 printed = scnprintf(bf, size, "%#x", type); 643 } 644 645 #define P_SK_FLAG(n) \ 646 if (flags & SOCK_##n) { \ 647 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \ 648 flags &= ~SOCK_##n; \ 649 } 650 651 P_SK_FLAG(CLOEXEC); 652 P_SK_FLAG(NONBLOCK); 653 #undef P_SK_FLAG 654 655 if (flags) 656 printed += scnprintf(bf + printed, size - printed, "|%#x", flags); 657 658 return printed; 659 } 660 661 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type 662 663 #ifndef MSG_PROBE 664 #define MSG_PROBE 0x10 665 #endif 666 #ifndef MSG_WAITFORONE 667 #define MSG_WAITFORONE 0x10000 668 #endif 669 #ifndef MSG_SENDPAGE_NOTLAST 670 #define MSG_SENDPAGE_NOTLAST 0x20000 671 #endif 672 #ifndef MSG_FASTOPEN 673 #define MSG_FASTOPEN 0x20000000 674 #endif 675 676 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size, 677 struct syscall_arg *arg) 678 { 679 int printed = 0, flags = arg->val; 680 681 if (flags == 0) 682 return scnprintf(bf, size, "NONE"); 683 #define P_MSG_FLAG(n) \ 684 if (flags & MSG_##n) { \ 685 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 686 flags &= ~MSG_##n; \ 687 } 688 689 P_MSG_FLAG(OOB); 690 P_MSG_FLAG(PEEK); 691 P_MSG_FLAG(DONTROUTE); 692 P_MSG_FLAG(TRYHARD); 693 P_MSG_FLAG(CTRUNC); 694 P_MSG_FLAG(PROBE); 695 P_MSG_FLAG(TRUNC); 696 P_MSG_FLAG(DONTWAIT); 697 P_MSG_FLAG(EOR); 698 P_MSG_FLAG(WAITALL); 699 P_MSG_FLAG(FIN); 700 P_MSG_FLAG(SYN); 701 P_MSG_FLAG(CONFIRM); 702 P_MSG_FLAG(RST); 703 P_MSG_FLAG(ERRQUEUE); 704 P_MSG_FLAG(NOSIGNAL); 705 P_MSG_FLAG(MORE); 706 P_MSG_FLAG(WAITFORONE); 707 P_MSG_FLAG(SENDPAGE_NOTLAST); 708 P_MSG_FLAG(FASTOPEN); 709 P_MSG_FLAG(CMSG_CLOEXEC); 710 #undef P_MSG_FLAG 711 712 if (flags) 713 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 714 715 return printed; 716 } 717 718 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags 719 720 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size, 721 struct syscall_arg *arg) 722 { 723 size_t printed = 0; 724 int mode = arg->val; 725 726 if (mode == F_OK) /* 0 */ 727 return scnprintf(bf, size, "F"); 728 #define P_MODE(n) \ 729 if (mode & n##_OK) { \ 730 printed += scnprintf(bf + printed, size - printed, "%s", #n); \ 731 mode &= ~n##_OK; \ 732 } 733 734 P_MODE(R); 735 P_MODE(W); 736 P_MODE(X); 737 #undef P_MODE 738 739 if (mode) 740 printed += scnprintf(bf + printed, size - printed, "|%#x", mode); 741 742 return printed; 743 } 744 745 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode 746 747 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, 748 struct syscall_arg *arg) 749 { 750 int printed = 0, flags = arg->val; 751 752 if (!(flags & O_CREAT)) 753 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */ 754 755 if (flags == 0) 756 return scnprintf(bf, size, "RDONLY"); 757 #define P_FLAG(n) \ 758 if (flags & O_##n) { \ 759 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 760 flags &= ~O_##n; \ 761 } 762 763 P_FLAG(APPEND); 764 P_FLAG(ASYNC); 765 P_FLAG(CLOEXEC); 766 P_FLAG(CREAT); 767 P_FLAG(DIRECT); 768 P_FLAG(DIRECTORY); 769 P_FLAG(EXCL); 770 P_FLAG(LARGEFILE); 771 P_FLAG(NOATIME); 772 P_FLAG(NOCTTY); 773 #ifdef O_NONBLOCK 774 P_FLAG(NONBLOCK); 775 #elif O_NDELAY 776 P_FLAG(NDELAY); 777 #endif 778 #ifdef O_PATH 779 P_FLAG(PATH); 780 #endif 781 P_FLAG(RDWR); 782 #ifdef O_DSYNC 783 if ((flags & O_SYNC) == O_SYNC) 784 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC"); 785 else { 786 P_FLAG(DSYNC); 787 } 788 #else 789 P_FLAG(SYNC); 790 #endif 791 P_FLAG(TRUNC); 792 P_FLAG(WRONLY); 793 #undef P_FLAG 794 795 if (flags) 796 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 797 798 return printed; 799 } 800 801 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags 802 803 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size, 804 struct syscall_arg *arg) 805 { 806 int printed = 0, flags = arg->val; 807 808 if (flags == 0) 809 return 0; 810 811 #define P_FLAG(n) \ 812 if (flags & PERF_FLAG_##n) { \ 813 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 814 flags &= ~PERF_FLAG_##n; \ 815 } 816 817 P_FLAG(FD_NO_GROUP); 818 P_FLAG(FD_OUTPUT); 819 P_FLAG(PID_CGROUP); 820 P_FLAG(FD_CLOEXEC); 821 #undef P_FLAG 822 823 if (flags) 824 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 825 826 return printed; 827 } 828 829 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags 830 831 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size, 832 struct syscall_arg *arg) 833 { 834 int printed = 0, flags = arg->val; 835 836 if (flags == 0) 837 return scnprintf(bf, size, "NONE"); 838 #define P_FLAG(n) \ 839 if (flags & EFD_##n) { \ 840 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 841 flags &= ~EFD_##n; \ 842 } 843 844 P_FLAG(SEMAPHORE); 845 P_FLAG(CLOEXEC); 846 P_FLAG(NONBLOCK); 847 #undef P_FLAG 848 849 if (flags) 850 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 851 852 return printed; 853 } 854 855 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags 856 857 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size, 858 struct syscall_arg *arg) 859 { 860 int printed = 0, flags = arg->val; 861 862 #define P_FLAG(n) \ 863 if (flags & O_##n) { \ 864 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 865 flags &= ~O_##n; \ 866 } 867 868 P_FLAG(CLOEXEC); 869 P_FLAG(NONBLOCK); 870 #undef P_FLAG 871 872 if (flags) 873 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 874 875 return printed; 876 } 877 878 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags 879 880 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg) 881 { 882 int sig = arg->val; 883 884 switch (sig) { 885 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n) 886 P_SIGNUM(HUP); 887 P_SIGNUM(INT); 888 P_SIGNUM(QUIT); 889 P_SIGNUM(ILL); 890 P_SIGNUM(TRAP); 891 P_SIGNUM(ABRT); 892 P_SIGNUM(BUS); 893 P_SIGNUM(FPE); 894 P_SIGNUM(KILL); 895 P_SIGNUM(USR1); 896 P_SIGNUM(SEGV); 897 P_SIGNUM(USR2); 898 P_SIGNUM(PIPE); 899 P_SIGNUM(ALRM); 900 P_SIGNUM(TERM); 901 P_SIGNUM(CHLD); 902 P_SIGNUM(CONT); 903 P_SIGNUM(STOP); 904 P_SIGNUM(TSTP); 905 P_SIGNUM(TTIN); 906 P_SIGNUM(TTOU); 907 P_SIGNUM(URG); 908 P_SIGNUM(XCPU); 909 P_SIGNUM(XFSZ); 910 P_SIGNUM(VTALRM); 911 P_SIGNUM(PROF); 912 P_SIGNUM(WINCH); 913 P_SIGNUM(IO); 914 P_SIGNUM(PWR); 915 P_SIGNUM(SYS); 916 #ifdef SIGEMT 917 P_SIGNUM(EMT); 918 #endif 919 #ifdef SIGSTKFLT 920 P_SIGNUM(STKFLT); 921 #endif 922 #ifdef SIGSWI 923 P_SIGNUM(SWI); 924 #endif 925 default: break; 926 } 927 928 return scnprintf(bf, size, "%#x", sig); 929 } 930 931 #define SCA_SIGNUM syscall_arg__scnprintf_signum 932 933 #if defined(__i386__) || defined(__x86_64__) 934 /* 935 * FIXME: Make this available to all arches. 936 */ 937 #define TCGETS 0x5401 938 939 static const char *tioctls[] = { 940 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW", 941 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL", 942 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI", 943 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC", 944 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX", 945 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO", 946 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK", 947 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2", 948 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK", 949 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG", 950 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL", 951 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG", 952 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS", 953 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI", 954 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE", 955 }; 956 957 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401); 958 #endif /* defined(__i386__) || defined(__x86_64__) */ 959 960 #define STRARRAY(arg, name, array) \ 961 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \ 962 .arg_parm = { [arg] = &strarray__##array, } 963 964 static struct syscall_fmt { 965 const char *name; 966 const char *alias; 967 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg); 968 void *arg_parm[6]; 969 bool errmsg; 970 bool timeout; 971 bool hexret; 972 } syscall_fmts[] = { 973 { .name = "access", .errmsg = true, 974 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, }, 975 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", }, 976 { .name = "brk", .hexret = true, 977 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, }, 978 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), }, 979 { .name = "close", .errmsg = true, 980 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, 981 { .name = "connect", .errmsg = true, }, 982 { .name = "dup", .errmsg = true, 983 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 984 { .name = "dup2", .errmsg = true, 985 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 986 { .name = "dup3", .errmsg = true, 987 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 988 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), }, 989 { .name = "eventfd2", .errmsg = true, 990 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, }, 991 { .name = "faccessat", .errmsg = true, 992 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 993 { .name = "fadvise64", .errmsg = true, 994 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 995 { .name = "fallocate", .errmsg = true, 996 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 997 { .name = "fchdir", .errmsg = true, 998 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 999 { .name = "fchmod", .errmsg = true, 1000 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1001 { .name = "fchmodat", .errmsg = true, 1002 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 1003 { .name = "fchown", .errmsg = true, 1004 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1005 { .name = "fchownat", .errmsg = true, 1006 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 1007 { .name = "fcntl", .errmsg = true, 1008 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 1009 [1] = SCA_STRARRAY, /* cmd */ }, 1010 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, }, 1011 { .name = "fdatasync", .errmsg = true, 1012 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1013 { .name = "flock", .errmsg = true, 1014 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 1015 [1] = SCA_FLOCK, /* cmd */ }, }, 1016 { .name = "fsetxattr", .errmsg = true, 1017 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1018 { .name = "fstat", .errmsg = true, .alias = "newfstat", 1019 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1020 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", 1021 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1022 { .name = "fstatfs", .errmsg = true, 1023 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1024 { .name = "fsync", .errmsg = true, 1025 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1026 { .name = "ftruncate", .errmsg = true, 1027 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1028 { .name = "futex", .errmsg = true, 1029 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, }, 1030 { .name = "futimesat", .errmsg = true, 1031 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 1032 { .name = "getdents", .errmsg = true, 1033 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1034 { .name = "getdents64", .errmsg = true, 1035 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1036 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), }, 1037 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, 1038 { .name = "ioctl", .errmsg = true, 1039 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 1040 #if defined(__i386__) || defined(__x86_64__) 1041 /* 1042 * FIXME: Make this available to all arches. 1043 */ 1044 [1] = SCA_STRHEXARRAY, /* cmd */ 1045 [2] = SCA_HEX, /* arg */ }, 1046 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, }, 1047 #else 1048 [2] = SCA_HEX, /* arg */ }, }, 1049 #endif 1050 { .name = "kill", .errmsg = true, 1051 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, 1052 { .name = "linkat", .errmsg = true, 1053 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 1054 { .name = "lseek", .errmsg = true, 1055 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 1056 [2] = SCA_STRARRAY, /* whence */ }, 1057 .arg_parm = { [2] = &strarray__whences, /* whence */ }, }, 1058 { .name = "lstat", .errmsg = true, .alias = "newlstat", }, 1059 { .name = "madvise", .errmsg = true, 1060 .arg_scnprintf = { [0] = SCA_HEX, /* start */ 1061 [2] = SCA_MADV_BHV, /* behavior */ }, }, 1062 { .name = "mkdirat", .errmsg = true, 1063 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 1064 { .name = "mknodat", .errmsg = true, 1065 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 1066 { .name = "mlock", .errmsg = true, 1067 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 1068 { .name = "mlockall", .errmsg = true, 1069 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 1070 { .name = "mmap", .hexret = true, 1071 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ 1072 [2] = SCA_MMAP_PROT, /* prot */ 1073 [3] = SCA_MMAP_FLAGS, /* flags */ 1074 [4] = SCA_FD, /* fd */ }, }, 1075 { .name = "mprotect", .errmsg = true, 1076 .arg_scnprintf = { [0] = SCA_HEX, /* start */ 1077 [2] = SCA_MMAP_PROT, /* prot */ }, }, 1078 { .name = "mremap", .hexret = true, 1079 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ 1080 [3] = SCA_MREMAP_FLAGS, /* flags */ 1081 [4] = SCA_HEX, /* new_addr */ }, }, 1082 { .name = "munlock", .errmsg = true, 1083 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 1084 { .name = "munmap", .errmsg = true, 1085 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 1086 { .name = "name_to_handle_at", .errmsg = true, 1087 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1088 { .name = "newfstatat", .errmsg = true, 1089 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1090 { .name = "open", .errmsg = true, 1091 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, }, 1092 { .name = "open_by_handle_at", .errmsg = true, 1093 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ 1094 [2] = SCA_OPEN_FLAGS, /* flags */ }, }, 1095 { .name = "openat", .errmsg = true, 1096 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ 1097 [2] = SCA_OPEN_FLAGS, /* flags */ }, }, 1098 { .name = "perf_event_open", .errmsg = true, 1099 .arg_scnprintf = { [1] = SCA_INT, /* pid */ 1100 [2] = SCA_INT, /* cpu */ 1101 [3] = SCA_FD, /* group_fd */ 1102 [4] = SCA_PERF_FLAGS, /* flags */ }, }, 1103 { .name = "pipe2", .errmsg = true, 1104 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, }, 1105 { .name = "poll", .errmsg = true, .timeout = true, }, 1106 { .name = "ppoll", .errmsg = true, .timeout = true, }, 1107 { .name = "pread", .errmsg = true, .alias = "pread64", 1108 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1109 { .name = "preadv", .errmsg = true, .alias = "pread", 1110 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1111 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), }, 1112 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", 1113 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1114 { .name = "pwritev", .errmsg = true, 1115 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1116 { .name = "read", .errmsg = true, 1117 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1118 { .name = "readlinkat", .errmsg = true, 1119 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1120 { .name = "readv", .errmsg = true, 1121 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1122 { .name = "recvfrom", .errmsg = true, 1123 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 1124 { .name = "recvmmsg", .errmsg = true, 1125 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 1126 { .name = "recvmsg", .errmsg = true, 1127 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, 1128 { .name = "renameat", .errmsg = true, 1129 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1130 { .name = "rt_sigaction", .errmsg = true, 1131 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, }, 1132 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), }, 1133 { .name = "rt_sigqueueinfo", .errmsg = true, 1134 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, 1135 { .name = "rt_tgsigqueueinfo", .errmsg = true, 1136 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, 1137 { .name = "select", .errmsg = true, .timeout = true, }, 1138 { .name = "sendmmsg", .errmsg = true, 1139 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 1140 { .name = "sendmsg", .errmsg = true, 1141 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, 1142 { .name = "sendto", .errmsg = true, 1143 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 1144 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), }, 1145 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, 1146 { .name = "shutdown", .errmsg = true, 1147 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1148 { .name = "socket", .errmsg = true, 1149 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ 1150 [1] = SCA_SK_TYPE, /* type */ }, 1151 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, 1152 { .name = "socketpair", .errmsg = true, 1153 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ 1154 [1] = SCA_SK_TYPE, /* type */ }, 1155 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, 1156 { .name = "stat", .errmsg = true, .alias = "newstat", }, 1157 { .name = "symlinkat", .errmsg = true, 1158 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1159 { .name = "tgkill", .errmsg = true, 1160 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, 1161 { .name = "tkill", .errmsg = true, 1162 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, 1163 { .name = "uname", .errmsg = true, .alias = "newuname", }, 1164 { .name = "unlinkat", .errmsg = true, 1165 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 1166 { .name = "utimensat", .errmsg = true, 1167 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, }, 1168 { .name = "write", .errmsg = true, 1169 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1170 { .name = "writev", .errmsg = true, 1171 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 1172 }; 1173 1174 static int syscall_fmt__cmp(const void *name, const void *fmtp) 1175 { 1176 const struct syscall_fmt *fmt = fmtp; 1177 return strcmp(name, fmt->name); 1178 } 1179 1180 static struct syscall_fmt *syscall_fmt__find(const char *name) 1181 { 1182 const int nmemb = ARRAY_SIZE(syscall_fmts); 1183 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp); 1184 } 1185 1186 struct syscall { 1187 struct event_format *tp_format; 1188 int nr_args; 1189 struct format_field *args; 1190 const char *name; 1191 bool is_exit; 1192 struct syscall_fmt *fmt; 1193 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg); 1194 void **arg_parm; 1195 }; 1196 1197 static size_t fprintf_duration(unsigned long t, FILE *fp) 1198 { 1199 double duration = (double)t / NSEC_PER_MSEC; 1200 size_t printed = fprintf(fp, "("); 1201 1202 if (duration >= 1.0) 1203 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration); 1204 else if (duration >= 0.01) 1205 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration); 1206 else 1207 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration); 1208 return printed + fprintf(fp, "): "); 1209 } 1210 1211 struct thread_trace { 1212 u64 entry_time; 1213 u64 exit_time; 1214 bool entry_pending; 1215 unsigned long nr_events; 1216 unsigned long pfmaj, pfmin; 1217 char *entry_str; 1218 double runtime_ms; 1219 struct { 1220 int max; 1221 char **table; 1222 } paths; 1223 1224 struct intlist *syscall_stats; 1225 }; 1226 1227 static struct thread_trace *thread_trace__new(void) 1228 { 1229 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace)); 1230 1231 if (ttrace) 1232 ttrace->paths.max = -1; 1233 1234 ttrace->syscall_stats = intlist__new(NULL); 1235 1236 return ttrace; 1237 } 1238 1239 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp) 1240 { 1241 struct thread_trace *ttrace; 1242 1243 if (thread == NULL) 1244 goto fail; 1245 1246 if (thread__priv(thread) == NULL) 1247 thread__set_priv(thread, thread_trace__new()); 1248 1249 if (thread__priv(thread) == NULL) 1250 goto fail; 1251 1252 ttrace = thread__priv(thread); 1253 ++ttrace->nr_events; 1254 1255 return ttrace; 1256 fail: 1257 color_fprintf(fp, PERF_COLOR_RED, 1258 "WARNING: not enough memory, dropping samples!\n"); 1259 return NULL; 1260 } 1261 1262 #define TRACE_PFMAJ (1 << 0) 1263 #define TRACE_PFMIN (1 << 1) 1264 1265 struct trace { 1266 struct perf_tool tool; 1267 struct { 1268 int machine; 1269 int open_id; 1270 } audit; 1271 struct { 1272 int max; 1273 struct syscall *table; 1274 struct { 1275 struct perf_evsel *sys_enter, 1276 *sys_exit; 1277 } events; 1278 } syscalls; 1279 struct record_opts opts; 1280 struct perf_evlist *evlist; 1281 struct machine *host; 1282 struct thread *current; 1283 u64 base_time; 1284 FILE *output; 1285 unsigned long nr_events; 1286 struct strlist *ev_qualifier; 1287 struct { 1288 size_t nr; 1289 int *entries; 1290 } ev_qualifier_ids; 1291 const char *last_vfs_getname; 1292 struct intlist *tid_list; 1293 struct intlist *pid_list; 1294 struct { 1295 size_t nr; 1296 pid_t *entries; 1297 } filter_pids; 1298 double duration_filter; 1299 double runtime_ms; 1300 struct { 1301 u64 vfs_getname, 1302 proc_getname; 1303 } stats; 1304 bool not_ev_qualifier; 1305 bool live; 1306 bool full_time; 1307 bool sched; 1308 bool multiple_threads; 1309 bool summary; 1310 bool summary_only; 1311 bool show_comm; 1312 bool show_tool_stats; 1313 bool trace_syscalls; 1314 bool force; 1315 int trace_pgfaults; 1316 }; 1317 1318 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname) 1319 { 1320 struct thread_trace *ttrace = thread__priv(thread); 1321 1322 if (fd > ttrace->paths.max) { 1323 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *)); 1324 1325 if (npath == NULL) 1326 return -1; 1327 1328 if (ttrace->paths.max != -1) { 1329 memset(npath + ttrace->paths.max + 1, 0, 1330 (fd - ttrace->paths.max) * sizeof(char *)); 1331 } else { 1332 memset(npath, 0, (fd + 1) * sizeof(char *)); 1333 } 1334 1335 ttrace->paths.table = npath; 1336 ttrace->paths.max = fd; 1337 } 1338 1339 ttrace->paths.table[fd] = strdup(pathname); 1340 1341 return ttrace->paths.table[fd] != NULL ? 0 : -1; 1342 } 1343 1344 static int thread__read_fd_path(struct thread *thread, int fd) 1345 { 1346 char linkname[PATH_MAX], pathname[PATH_MAX]; 1347 struct stat st; 1348 int ret; 1349 1350 if (thread->pid_ == thread->tid) { 1351 scnprintf(linkname, sizeof(linkname), 1352 "/proc/%d/fd/%d", thread->pid_, fd); 1353 } else { 1354 scnprintf(linkname, sizeof(linkname), 1355 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd); 1356 } 1357 1358 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname)) 1359 return -1; 1360 1361 ret = readlink(linkname, pathname, sizeof(pathname)); 1362 1363 if (ret < 0 || ret > st.st_size) 1364 return -1; 1365 1366 pathname[ret] = '\0'; 1367 return trace__set_fd_pathname(thread, fd, pathname); 1368 } 1369 1370 static const char *thread__fd_path(struct thread *thread, int fd, 1371 struct trace *trace) 1372 { 1373 struct thread_trace *ttrace = thread__priv(thread); 1374 1375 if (ttrace == NULL) 1376 return NULL; 1377 1378 if (fd < 0) 1379 return NULL; 1380 1381 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) { 1382 if (!trace->live) 1383 return NULL; 1384 ++trace->stats.proc_getname; 1385 if (thread__read_fd_path(thread, fd)) 1386 return NULL; 1387 } 1388 1389 return ttrace->paths.table[fd]; 1390 } 1391 1392 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size, 1393 struct syscall_arg *arg) 1394 { 1395 int fd = arg->val; 1396 size_t printed = scnprintf(bf, size, "%d", fd); 1397 const char *path = thread__fd_path(arg->thread, fd, arg->trace); 1398 1399 if (path) 1400 printed += scnprintf(bf + printed, size - printed, "<%s>", path); 1401 1402 return printed; 1403 } 1404 1405 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, 1406 struct syscall_arg *arg) 1407 { 1408 int fd = arg->val; 1409 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg); 1410 struct thread_trace *ttrace = thread__priv(arg->thread); 1411 1412 if (ttrace && fd >= 0 && fd <= ttrace->paths.max) 1413 zfree(&ttrace->paths.table[fd]); 1414 1415 return printed; 1416 } 1417 1418 static bool trace__filter_duration(struct trace *trace, double t) 1419 { 1420 return t < (trace->duration_filter * NSEC_PER_MSEC); 1421 } 1422 1423 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) 1424 { 1425 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC; 1426 1427 return fprintf(fp, "%10.3f ", ts); 1428 } 1429 1430 static bool done = false; 1431 static bool interrupted = false; 1432 1433 static void sig_handler(int sig) 1434 { 1435 done = true; 1436 interrupted = sig == SIGINT; 1437 } 1438 1439 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread, 1440 u64 duration, u64 tstamp, FILE *fp) 1441 { 1442 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp); 1443 printed += fprintf_duration(duration, fp); 1444 1445 if (trace->multiple_threads) { 1446 if (trace->show_comm) 1447 printed += fprintf(fp, "%.14s/", thread__comm_str(thread)); 1448 printed += fprintf(fp, "%d ", thread->tid); 1449 } 1450 1451 return printed; 1452 } 1453 1454 static int trace__process_event(struct trace *trace, struct machine *machine, 1455 union perf_event *event, struct perf_sample *sample) 1456 { 1457 int ret = 0; 1458 1459 switch (event->header.type) { 1460 case PERF_RECORD_LOST: 1461 color_fprintf(trace->output, PERF_COLOR_RED, 1462 "LOST %" PRIu64 " events!\n", event->lost.lost); 1463 ret = machine__process_lost_event(machine, event, sample); 1464 default: 1465 ret = machine__process_event(machine, event, sample); 1466 break; 1467 } 1468 1469 return ret; 1470 } 1471 1472 static int trace__tool_process(struct perf_tool *tool, 1473 union perf_event *event, 1474 struct perf_sample *sample, 1475 struct machine *machine) 1476 { 1477 struct trace *trace = container_of(tool, struct trace, tool); 1478 return trace__process_event(trace, machine, event, sample); 1479 } 1480 1481 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) 1482 { 1483 int err = symbol__init(NULL); 1484 1485 if (err) 1486 return err; 1487 1488 trace->host = machine__new_host(); 1489 if (trace->host == NULL) 1490 return -ENOMEM; 1491 1492 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, 1493 evlist->threads, trace__tool_process, false, 1494 trace->opts.proc_map_timeout); 1495 if (err) 1496 symbol__exit(); 1497 1498 return err; 1499 } 1500 1501 static int syscall__set_arg_fmts(struct syscall *sc) 1502 { 1503 struct format_field *field; 1504 int idx = 0; 1505 1506 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *)); 1507 if (sc->arg_scnprintf == NULL) 1508 return -1; 1509 1510 if (sc->fmt) 1511 sc->arg_parm = sc->fmt->arg_parm; 1512 1513 for (field = sc->args; field; field = field->next) { 1514 if (sc->fmt && sc->fmt->arg_scnprintf[idx]) 1515 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx]; 1516 else if (field->flags & FIELD_IS_POINTER) 1517 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex; 1518 ++idx; 1519 } 1520 1521 return 0; 1522 } 1523 1524 static int trace__read_syscall_info(struct trace *trace, int id) 1525 { 1526 char tp_name[128]; 1527 struct syscall *sc; 1528 const char *name = audit_syscall_to_name(id, trace->audit.machine); 1529 1530 if (name == NULL) 1531 return -1; 1532 1533 if (id > trace->syscalls.max) { 1534 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc)); 1535 1536 if (nsyscalls == NULL) 1537 return -1; 1538 1539 if (trace->syscalls.max != -1) { 1540 memset(nsyscalls + trace->syscalls.max + 1, 0, 1541 (id - trace->syscalls.max) * sizeof(*sc)); 1542 } else { 1543 memset(nsyscalls, 0, (id + 1) * sizeof(*sc)); 1544 } 1545 1546 trace->syscalls.table = nsyscalls; 1547 trace->syscalls.max = id; 1548 } 1549 1550 sc = trace->syscalls.table + id; 1551 sc->name = name; 1552 1553 sc->fmt = syscall_fmt__find(sc->name); 1554 1555 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); 1556 sc->tp_format = trace_event__tp_format("syscalls", tp_name); 1557 1558 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) { 1559 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias); 1560 sc->tp_format = trace_event__tp_format("syscalls", tp_name); 1561 } 1562 1563 if (sc->tp_format == NULL) 1564 return -1; 1565 1566 sc->args = sc->tp_format->format.fields; 1567 sc->nr_args = sc->tp_format->format.nr_fields; 1568 /* drop nr field - not relevant here; does not exist on older kernels */ 1569 if (sc->args && strcmp(sc->args->name, "nr") == 0) { 1570 sc->args = sc->args->next; 1571 --sc->nr_args; 1572 } 1573 1574 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit"); 1575 1576 return syscall__set_arg_fmts(sc); 1577 } 1578 1579 static int trace__validate_ev_qualifier(struct trace *trace) 1580 { 1581 int err = 0, i; 1582 struct str_node *pos; 1583 1584 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier); 1585 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr * 1586 sizeof(trace->ev_qualifier_ids.entries[0])); 1587 1588 if (trace->ev_qualifier_ids.entries == NULL) { 1589 fputs("Error:\tNot enough memory for allocating events qualifier ids\n", 1590 trace->output); 1591 err = -EINVAL; 1592 goto out; 1593 } 1594 1595 i = 0; 1596 1597 strlist__for_each(pos, trace->ev_qualifier) { 1598 const char *sc = pos->s; 1599 int id = audit_name_to_syscall(sc, trace->audit.machine); 1600 1601 if (id < 0) { 1602 if (err == 0) { 1603 fputs("Error:\tInvalid syscall ", trace->output); 1604 err = -EINVAL; 1605 } else { 1606 fputs(", ", trace->output); 1607 } 1608 1609 fputs(sc, trace->output); 1610 } 1611 1612 trace->ev_qualifier_ids.entries[i++] = id; 1613 } 1614 1615 if (err < 0) { 1616 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'" 1617 "\nHint:\tand: 'man syscalls'\n", trace->output); 1618 zfree(&trace->ev_qualifier_ids.entries); 1619 trace->ev_qualifier_ids.nr = 0; 1620 } 1621 out: 1622 return err; 1623 } 1624 1625 /* 1626 * args is to be interpreted as a series of longs but we need to handle 1627 * 8-byte unaligned accesses. args points to raw_data within the event 1628 * and raw_data is guaranteed to be 8-byte unaligned because it is 1629 * preceded by raw_size which is a u32. So we need to copy args to a temp 1630 * variable to read it. Most notably this avoids extended load instructions 1631 * on unaligned addresses 1632 */ 1633 1634 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, 1635 unsigned char *args, struct trace *trace, 1636 struct thread *thread) 1637 { 1638 size_t printed = 0; 1639 unsigned char *p; 1640 unsigned long val; 1641 1642 if (sc->args != NULL) { 1643 struct format_field *field; 1644 u8 bit = 1; 1645 struct syscall_arg arg = { 1646 .idx = 0, 1647 .mask = 0, 1648 .trace = trace, 1649 .thread = thread, 1650 }; 1651 1652 for (field = sc->args; field; 1653 field = field->next, ++arg.idx, bit <<= 1) { 1654 if (arg.mask & bit) 1655 continue; 1656 1657 /* special care for unaligned accesses */ 1658 p = args + sizeof(unsigned long) * arg.idx; 1659 memcpy(&val, p, sizeof(val)); 1660 1661 /* 1662 * Suppress this argument if its value is zero and 1663 * and we don't have a string associated in an 1664 * strarray for it. 1665 */ 1666 if (val == 0 && 1667 !(sc->arg_scnprintf && 1668 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY && 1669 sc->arg_parm[arg.idx])) 1670 continue; 1671 1672 printed += scnprintf(bf + printed, size - printed, 1673 "%s%s: ", printed ? ", " : "", field->name); 1674 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) { 1675 arg.val = val; 1676 if (sc->arg_parm) 1677 arg.parm = sc->arg_parm[arg.idx]; 1678 printed += sc->arg_scnprintf[arg.idx](bf + printed, 1679 size - printed, &arg); 1680 } else { 1681 printed += scnprintf(bf + printed, size - printed, 1682 "%ld", val); 1683 } 1684 } 1685 } else { 1686 int i = 0; 1687 1688 while (i < 6) { 1689 /* special care for unaligned accesses */ 1690 p = args + sizeof(unsigned long) * i; 1691 memcpy(&val, p, sizeof(val)); 1692 printed += scnprintf(bf + printed, size - printed, 1693 "%sarg%d: %ld", 1694 printed ? ", " : "", i, val); 1695 ++i; 1696 } 1697 } 1698 1699 return printed; 1700 } 1701 1702 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel, 1703 union perf_event *event, 1704 struct perf_sample *sample); 1705 1706 static struct syscall *trace__syscall_info(struct trace *trace, 1707 struct perf_evsel *evsel, int id) 1708 { 1709 1710 if (id < 0) { 1711 1712 /* 1713 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried 1714 * before that, leaving at a higher verbosity level till that is 1715 * explained. Reproduced with plain ftrace with: 1716 * 1717 * echo 1 > /t/events/raw_syscalls/sys_exit/enable 1718 * grep "NR -1 " /t/trace_pipe 1719 * 1720 * After generating some load on the machine. 1721 */ 1722 if (verbose > 1) { 1723 static u64 n; 1724 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n", 1725 id, perf_evsel__name(evsel), ++n); 1726 } 1727 return NULL; 1728 } 1729 1730 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) && 1731 trace__read_syscall_info(trace, id)) 1732 goto out_cant_read; 1733 1734 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL)) 1735 goto out_cant_read; 1736 1737 return &trace->syscalls.table[id]; 1738 1739 out_cant_read: 1740 if (verbose) { 1741 fprintf(trace->output, "Problems reading syscall %d", id); 1742 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL) 1743 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name); 1744 fputs(" information\n", trace->output); 1745 } 1746 return NULL; 1747 } 1748 1749 static void thread__update_stats(struct thread_trace *ttrace, 1750 int id, struct perf_sample *sample) 1751 { 1752 struct int_node *inode; 1753 struct stats *stats; 1754 u64 duration = 0; 1755 1756 inode = intlist__findnew(ttrace->syscall_stats, id); 1757 if (inode == NULL) 1758 return; 1759 1760 stats = inode->priv; 1761 if (stats == NULL) { 1762 stats = malloc(sizeof(struct stats)); 1763 if (stats == NULL) 1764 return; 1765 init_stats(stats); 1766 inode->priv = stats; 1767 } 1768 1769 if (ttrace->entry_time && sample->time > ttrace->entry_time) 1770 duration = sample->time - ttrace->entry_time; 1771 1772 update_stats(stats, duration); 1773 } 1774 1775 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample) 1776 { 1777 struct thread_trace *ttrace; 1778 u64 duration; 1779 size_t printed; 1780 1781 if (trace->current == NULL) 1782 return 0; 1783 1784 ttrace = thread__priv(trace->current); 1785 1786 if (!ttrace->entry_pending) 1787 return 0; 1788 1789 duration = sample->time - ttrace->entry_time; 1790 1791 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output); 1792 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str); 1793 ttrace->entry_pending = false; 1794 1795 return printed; 1796 } 1797 1798 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, 1799 union perf_event *event __maybe_unused, 1800 struct perf_sample *sample) 1801 { 1802 char *msg; 1803 void *args; 1804 size_t printed = 0; 1805 struct thread *thread; 1806 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1; 1807 struct syscall *sc = trace__syscall_info(trace, evsel, id); 1808 struct thread_trace *ttrace; 1809 1810 if (sc == NULL) 1811 return -1; 1812 1813 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1814 ttrace = thread__trace(thread, trace->output); 1815 if (ttrace == NULL) 1816 goto out_put; 1817 1818 args = perf_evsel__sc_tp_ptr(evsel, args, sample); 1819 1820 if (ttrace->entry_str == NULL) { 1821 ttrace->entry_str = malloc(1024); 1822 if (!ttrace->entry_str) 1823 goto out_put; 1824 } 1825 1826 if (!trace->summary_only) 1827 trace__printf_interrupted_entry(trace, sample); 1828 1829 ttrace->entry_time = sample->time; 1830 msg = ttrace->entry_str; 1831 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name); 1832 1833 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed, 1834 args, trace, thread); 1835 1836 if (sc->is_exit) { 1837 if (!trace->duration_filter && !trace->summary_only) { 1838 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output); 1839 fprintf(trace->output, "%-70s\n", ttrace->entry_str); 1840 } 1841 } else 1842 ttrace->entry_pending = true; 1843 1844 if (trace->current != thread) { 1845 thread__put(trace->current); 1846 trace->current = thread__get(thread); 1847 } 1848 err = 0; 1849 out_put: 1850 thread__put(thread); 1851 return err; 1852 } 1853 1854 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, 1855 union perf_event *event __maybe_unused, 1856 struct perf_sample *sample) 1857 { 1858 long ret; 1859 u64 duration = 0; 1860 struct thread *thread; 1861 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1; 1862 struct syscall *sc = trace__syscall_info(trace, evsel, id); 1863 struct thread_trace *ttrace; 1864 1865 if (sc == NULL) 1866 return -1; 1867 1868 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1869 ttrace = thread__trace(thread, trace->output); 1870 if (ttrace == NULL) 1871 goto out_put; 1872 1873 if (trace->summary) 1874 thread__update_stats(ttrace, id, sample); 1875 1876 ret = perf_evsel__sc_tp_uint(evsel, ret, sample); 1877 1878 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) { 1879 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname); 1880 trace->last_vfs_getname = NULL; 1881 ++trace->stats.vfs_getname; 1882 } 1883 1884 ttrace->exit_time = sample->time; 1885 1886 if (ttrace->entry_time) { 1887 duration = sample->time - ttrace->entry_time; 1888 if (trace__filter_duration(trace, duration)) 1889 goto out; 1890 } else if (trace->duration_filter) 1891 goto out; 1892 1893 if (trace->summary_only) 1894 goto out; 1895 1896 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output); 1897 1898 if (ttrace->entry_pending) { 1899 fprintf(trace->output, "%-70s", ttrace->entry_str); 1900 } else { 1901 fprintf(trace->output, " ... ["); 1902 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued"); 1903 fprintf(trace->output, "]: %s()", sc->name); 1904 } 1905 1906 if (sc->fmt == NULL) { 1907 signed_print: 1908 fprintf(trace->output, ") = %ld", ret); 1909 } else if (ret < 0 && sc->fmt->errmsg) { 1910 char bf[STRERR_BUFSIZE]; 1911 const char *emsg = strerror_r(-ret, bf, sizeof(bf)), 1912 *e = audit_errno_to_name(-ret); 1913 1914 fprintf(trace->output, ") = -1 %s %s", e, emsg); 1915 } else if (ret == 0 && sc->fmt->timeout) 1916 fprintf(trace->output, ") = 0 Timeout"); 1917 else if (sc->fmt->hexret) 1918 fprintf(trace->output, ") = %#lx", ret); 1919 else 1920 goto signed_print; 1921 1922 fputc('\n', trace->output); 1923 out: 1924 ttrace->entry_pending = false; 1925 err = 0; 1926 out_put: 1927 thread__put(thread); 1928 return err; 1929 } 1930 1931 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, 1932 union perf_event *event __maybe_unused, 1933 struct perf_sample *sample) 1934 { 1935 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname"); 1936 return 0; 1937 } 1938 1939 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel, 1940 union perf_event *event __maybe_unused, 1941 struct perf_sample *sample) 1942 { 1943 u64 runtime = perf_evsel__intval(evsel, sample, "runtime"); 1944 double runtime_ms = (double)runtime / NSEC_PER_MSEC; 1945 struct thread *thread = machine__findnew_thread(trace->host, 1946 sample->pid, 1947 sample->tid); 1948 struct thread_trace *ttrace = thread__trace(thread, trace->output); 1949 1950 if (ttrace == NULL) 1951 goto out_dump; 1952 1953 ttrace->runtime_ms += runtime_ms; 1954 trace->runtime_ms += runtime_ms; 1955 thread__put(thread); 1956 return 0; 1957 1958 out_dump: 1959 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n", 1960 evsel->name, 1961 perf_evsel__strval(evsel, sample, "comm"), 1962 (pid_t)perf_evsel__intval(evsel, sample, "pid"), 1963 runtime, 1964 perf_evsel__intval(evsel, sample, "vruntime")); 1965 thread__put(thread); 1966 return 0; 1967 } 1968 1969 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, 1970 union perf_event *event __maybe_unused, 1971 struct perf_sample *sample) 1972 { 1973 trace__printf_interrupted_entry(trace, sample); 1974 trace__fprintf_tstamp(trace, sample->time, trace->output); 1975 1976 if (trace->trace_syscalls) 1977 fprintf(trace->output, "( ): "); 1978 1979 fprintf(trace->output, "%s:", evsel->name); 1980 1981 if (evsel->tp_format) { 1982 event_format__fprintf(evsel->tp_format, sample->cpu, 1983 sample->raw_data, sample->raw_size, 1984 trace->output); 1985 } 1986 1987 fprintf(trace->output, ")\n"); 1988 return 0; 1989 } 1990 1991 static void print_location(FILE *f, struct perf_sample *sample, 1992 struct addr_location *al, 1993 bool print_dso, bool print_sym) 1994 { 1995 1996 if ((verbose || print_dso) && al->map) 1997 fprintf(f, "%s@", al->map->dso->long_name); 1998 1999 if ((verbose || print_sym) && al->sym) 2000 fprintf(f, "%s+0x%" PRIx64, al->sym->name, 2001 al->addr - al->sym->start); 2002 else if (al->map) 2003 fprintf(f, "0x%" PRIx64, al->addr); 2004 else 2005 fprintf(f, "0x%" PRIx64, sample->addr); 2006 } 2007 2008 static int trace__pgfault(struct trace *trace, 2009 struct perf_evsel *evsel, 2010 union perf_event *event, 2011 struct perf_sample *sample) 2012 { 2013 struct thread *thread; 2014 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 2015 struct addr_location al; 2016 char map_type = 'd'; 2017 struct thread_trace *ttrace; 2018 int err = -1; 2019 2020 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 2021 ttrace = thread__trace(thread, trace->output); 2022 if (ttrace == NULL) 2023 goto out_put; 2024 2025 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ) 2026 ttrace->pfmaj++; 2027 else 2028 ttrace->pfmin++; 2029 2030 if (trace->summary_only) 2031 goto out; 2032 2033 thread__find_addr_location(thread, cpumode, MAP__FUNCTION, 2034 sample->ip, &al); 2035 2036 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output); 2037 2038 fprintf(trace->output, "%sfault [", 2039 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ? 2040 "maj" : "min"); 2041 2042 print_location(trace->output, sample, &al, false, true); 2043 2044 fprintf(trace->output, "] => "); 2045 2046 thread__find_addr_location(thread, cpumode, MAP__VARIABLE, 2047 sample->addr, &al); 2048 2049 if (!al.map) { 2050 thread__find_addr_location(thread, cpumode, 2051 MAP__FUNCTION, sample->addr, &al); 2052 2053 if (al.map) 2054 map_type = 'x'; 2055 else 2056 map_type = '?'; 2057 } 2058 2059 print_location(trace->output, sample, &al, true, false); 2060 2061 fprintf(trace->output, " (%c%c)\n", map_type, al.level); 2062 out: 2063 err = 0; 2064 out_put: 2065 thread__put(thread); 2066 return err; 2067 } 2068 2069 static bool skip_sample(struct trace *trace, struct perf_sample *sample) 2070 { 2071 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) || 2072 (trace->tid_list && intlist__find(trace->tid_list, sample->tid))) 2073 return false; 2074 2075 if (trace->pid_list || trace->tid_list) 2076 return true; 2077 2078 return false; 2079 } 2080 2081 static int trace__process_sample(struct perf_tool *tool, 2082 union perf_event *event, 2083 struct perf_sample *sample, 2084 struct perf_evsel *evsel, 2085 struct machine *machine __maybe_unused) 2086 { 2087 struct trace *trace = container_of(tool, struct trace, tool); 2088 int err = 0; 2089 2090 tracepoint_handler handler = evsel->handler; 2091 2092 if (skip_sample(trace, sample)) 2093 return 0; 2094 2095 if (!trace->full_time && trace->base_time == 0) 2096 trace->base_time = sample->time; 2097 2098 if (handler) { 2099 ++trace->nr_events; 2100 handler(trace, evsel, event, sample); 2101 } 2102 2103 return err; 2104 } 2105 2106 static int parse_target_str(struct trace *trace) 2107 { 2108 if (trace->opts.target.pid) { 2109 trace->pid_list = intlist__new(trace->opts.target.pid); 2110 if (trace->pid_list == NULL) { 2111 pr_err("Error parsing process id string\n"); 2112 return -EINVAL; 2113 } 2114 } 2115 2116 if (trace->opts.target.tid) { 2117 trace->tid_list = intlist__new(trace->opts.target.tid); 2118 if (trace->tid_list == NULL) { 2119 pr_err("Error parsing thread id string\n"); 2120 return -EINVAL; 2121 } 2122 } 2123 2124 return 0; 2125 } 2126 2127 static int trace__record(struct trace *trace, int argc, const char **argv) 2128 { 2129 unsigned int rec_argc, i, j; 2130 const char **rec_argv; 2131 const char * const record_args[] = { 2132 "record", 2133 "-R", 2134 "-m", "1024", 2135 "-c", "1", 2136 }; 2137 2138 const char * const sc_args[] = { "-e", }; 2139 unsigned int sc_args_nr = ARRAY_SIZE(sc_args); 2140 const char * const majpf_args[] = { "-e", "major-faults" }; 2141 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args); 2142 const char * const minpf_args[] = { "-e", "minor-faults" }; 2143 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args); 2144 2145 /* +1 is for the event string below */ 2146 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 + 2147 majpf_args_nr + minpf_args_nr + argc; 2148 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 2149 2150 if (rec_argv == NULL) 2151 return -ENOMEM; 2152 2153 j = 0; 2154 for (i = 0; i < ARRAY_SIZE(record_args); i++) 2155 rec_argv[j++] = record_args[i]; 2156 2157 if (trace->trace_syscalls) { 2158 for (i = 0; i < sc_args_nr; i++) 2159 rec_argv[j++] = sc_args[i]; 2160 2161 /* event string may be different for older kernels - e.g., RHEL6 */ 2162 if (is_valid_tracepoint("raw_syscalls:sys_enter")) 2163 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit"; 2164 else if (is_valid_tracepoint("syscalls:sys_enter")) 2165 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit"; 2166 else { 2167 pr_err("Neither raw_syscalls nor syscalls events exist.\n"); 2168 return -1; 2169 } 2170 } 2171 2172 if (trace->trace_pgfaults & TRACE_PFMAJ) 2173 for (i = 0; i < majpf_args_nr; i++) 2174 rec_argv[j++] = majpf_args[i]; 2175 2176 if (trace->trace_pgfaults & TRACE_PFMIN) 2177 for (i = 0; i < minpf_args_nr; i++) 2178 rec_argv[j++] = minpf_args[i]; 2179 2180 for (i = 0; i < (unsigned int)argc; i++) 2181 rec_argv[j++] = argv[i]; 2182 2183 return cmd_record(j, rec_argv, NULL); 2184 } 2185 2186 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); 2187 2188 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist) 2189 { 2190 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname"); 2191 if (evsel == NULL) 2192 return; 2193 2194 if (perf_evsel__field(evsel, "pathname") == NULL) { 2195 perf_evsel__delete(evsel); 2196 return; 2197 } 2198 2199 evsel->handler = trace__vfs_getname; 2200 perf_evlist__add(evlist, evsel); 2201 } 2202 2203 static int perf_evlist__add_pgfault(struct perf_evlist *evlist, 2204 u64 config) 2205 { 2206 struct perf_evsel *evsel; 2207 struct perf_event_attr attr = { 2208 .type = PERF_TYPE_SOFTWARE, 2209 .mmap_data = 1, 2210 }; 2211 2212 attr.config = config; 2213 attr.sample_period = 1; 2214 2215 event_attr_init(&attr); 2216 2217 evsel = perf_evsel__new(&attr); 2218 if (!evsel) 2219 return -ENOMEM; 2220 2221 evsel->handler = trace__pgfault; 2222 perf_evlist__add(evlist, evsel); 2223 2224 return 0; 2225 } 2226 2227 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample) 2228 { 2229 const u32 type = event->header.type; 2230 struct perf_evsel *evsel; 2231 2232 if (!trace->full_time && trace->base_time == 0) 2233 trace->base_time = sample->time; 2234 2235 if (type != PERF_RECORD_SAMPLE) { 2236 trace__process_event(trace, trace->host, event, sample); 2237 return; 2238 } 2239 2240 evsel = perf_evlist__id2evsel(trace->evlist, sample->id); 2241 if (evsel == NULL) { 2242 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id); 2243 return; 2244 } 2245 2246 if (evsel->attr.type == PERF_TYPE_TRACEPOINT && 2247 sample->raw_data == NULL) { 2248 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", 2249 perf_evsel__name(evsel), sample->tid, 2250 sample->cpu, sample->raw_size); 2251 } else { 2252 tracepoint_handler handler = evsel->handler; 2253 handler(trace, evsel, event, sample); 2254 } 2255 } 2256 2257 static int trace__add_syscall_newtp(struct trace *trace) 2258 { 2259 int ret = -1; 2260 struct perf_evlist *evlist = trace->evlist; 2261 struct perf_evsel *sys_enter, *sys_exit; 2262 2263 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter); 2264 if (sys_enter == NULL) 2265 goto out; 2266 2267 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args)) 2268 goto out_delete_sys_enter; 2269 2270 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit); 2271 if (sys_exit == NULL) 2272 goto out_delete_sys_enter; 2273 2274 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret)) 2275 goto out_delete_sys_exit; 2276 2277 perf_evlist__add(evlist, sys_enter); 2278 perf_evlist__add(evlist, sys_exit); 2279 2280 trace->syscalls.events.sys_enter = sys_enter; 2281 trace->syscalls.events.sys_exit = sys_exit; 2282 2283 ret = 0; 2284 out: 2285 return ret; 2286 2287 out_delete_sys_exit: 2288 perf_evsel__delete_priv(sys_exit); 2289 out_delete_sys_enter: 2290 perf_evsel__delete_priv(sys_enter); 2291 goto out; 2292 } 2293 2294 static int trace__set_ev_qualifier_filter(struct trace *trace) 2295 { 2296 int err = -1; 2297 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier, 2298 trace->ev_qualifier_ids.nr, 2299 trace->ev_qualifier_ids.entries); 2300 2301 if (filter == NULL) 2302 goto out_enomem; 2303 2304 if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter)) 2305 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter); 2306 2307 free(filter); 2308 out: 2309 return err; 2310 out_enomem: 2311 errno = ENOMEM; 2312 goto out; 2313 } 2314 2315 static int trace__run(struct trace *trace, int argc, const char **argv) 2316 { 2317 struct perf_evlist *evlist = trace->evlist; 2318 struct perf_evsel *evsel; 2319 int err = -1, i; 2320 unsigned long before; 2321 const bool forks = argc > 0; 2322 bool draining = false; 2323 2324 trace->live = true; 2325 2326 if (trace->trace_syscalls && trace__add_syscall_newtp(trace)) 2327 goto out_error_raw_syscalls; 2328 2329 if (trace->trace_syscalls) 2330 perf_evlist__add_vfs_getname(evlist); 2331 2332 if ((trace->trace_pgfaults & TRACE_PFMAJ) && 2333 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) { 2334 goto out_error_mem; 2335 } 2336 2337 if ((trace->trace_pgfaults & TRACE_PFMIN) && 2338 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN)) 2339 goto out_error_mem; 2340 2341 if (trace->sched && 2342 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", 2343 trace__sched_stat_runtime)) 2344 goto out_error_sched_stat_runtime; 2345 2346 err = perf_evlist__create_maps(evlist, &trace->opts.target); 2347 if (err < 0) { 2348 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n"); 2349 goto out_delete_evlist; 2350 } 2351 2352 err = trace__symbols_init(trace, evlist); 2353 if (err < 0) { 2354 fprintf(trace->output, "Problems initializing symbol libraries!\n"); 2355 goto out_delete_evlist; 2356 } 2357 2358 perf_evlist__config(evlist, &trace->opts); 2359 2360 signal(SIGCHLD, sig_handler); 2361 signal(SIGINT, sig_handler); 2362 2363 if (forks) { 2364 err = perf_evlist__prepare_workload(evlist, &trace->opts.target, 2365 argv, false, NULL); 2366 if (err < 0) { 2367 fprintf(trace->output, "Couldn't run the workload!\n"); 2368 goto out_delete_evlist; 2369 } 2370 } 2371 2372 err = perf_evlist__open(evlist); 2373 if (err < 0) 2374 goto out_error_open; 2375 2376 /* 2377 * Better not use !target__has_task() here because we need to cover the 2378 * case where no threads were specified in the command line, but a 2379 * workload was, and in that case we will fill in the thread_map when 2380 * we fork the workload in perf_evlist__prepare_workload. 2381 */ 2382 if (trace->filter_pids.nr > 0) 2383 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries); 2384 else if (thread_map__pid(evlist->threads, 0) == -1) 2385 err = perf_evlist__set_filter_pid(evlist, getpid()); 2386 2387 if (err < 0) 2388 goto out_error_mem; 2389 2390 if (trace->ev_qualifier_ids.nr > 0) { 2391 err = trace__set_ev_qualifier_filter(trace); 2392 if (err < 0) 2393 goto out_errno; 2394 } 2395 2396 pr_debug("%s\n", trace->syscalls.events.sys_exit->filter); 2397 2398 err = perf_evlist__apply_filters(evlist, &evsel); 2399 if (err < 0) 2400 goto out_error_apply_filters; 2401 2402 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false); 2403 if (err < 0) 2404 goto out_error_mmap; 2405 2406 if (!target__none(&trace->opts.target)) 2407 perf_evlist__enable(evlist); 2408 2409 if (forks) 2410 perf_evlist__start_workload(evlist); 2411 2412 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 || 2413 evlist->threads->nr > 1 || 2414 perf_evlist__first(evlist)->attr.inherit; 2415 again: 2416 before = trace->nr_events; 2417 2418 for (i = 0; i < evlist->nr_mmaps; i++) { 2419 union perf_event *event; 2420 2421 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { 2422 struct perf_sample sample; 2423 2424 ++trace->nr_events; 2425 2426 err = perf_evlist__parse_sample(evlist, event, &sample); 2427 if (err) { 2428 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err); 2429 goto next_event; 2430 } 2431 2432 trace__handle_event(trace, event, &sample); 2433 next_event: 2434 perf_evlist__mmap_consume(evlist, i); 2435 2436 if (interrupted) 2437 goto out_disable; 2438 2439 if (done && !draining) { 2440 perf_evlist__disable(evlist); 2441 draining = true; 2442 } 2443 } 2444 } 2445 2446 if (trace->nr_events == before) { 2447 int timeout = done ? 100 : -1; 2448 2449 if (!draining && perf_evlist__poll(evlist, timeout) > 0) { 2450 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0) 2451 draining = true; 2452 2453 goto again; 2454 } 2455 } else { 2456 goto again; 2457 } 2458 2459 out_disable: 2460 thread__zput(trace->current); 2461 2462 perf_evlist__disable(evlist); 2463 2464 if (!err) { 2465 if (trace->summary) 2466 trace__fprintf_thread_summary(trace, trace->output); 2467 2468 if (trace->show_tool_stats) { 2469 fprintf(trace->output, "Stats:\n " 2470 " vfs_getname : %" PRIu64 "\n" 2471 " proc_getname: %" PRIu64 "\n", 2472 trace->stats.vfs_getname, 2473 trace->stats.proc_getname); 2474 } 2475 } 2476 2477 out_delete_evlist: 2478 perf_evlist__delete(evlist); 2479 trace->evlist = NULL; 2480 trace->live = false; 2481 return err; 2482 { 2483 char errbuf[BUFSIZ]; 2484 2485 out_error_sched_stat_runtime: 2486 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime"); 2487 goto out_error; 2488 2489 out_error_raw_syscalls: 2490 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)"); 2491 goto out_error; 2492 2493 out_error_mmap: 2494 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf)); 2495 goto out_error; 2496 2497 out_error_open: 2498 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf)); 2499 2500 out_error: 2501 fprintf(trace->output, "%s\n", errbuf); 2502 goto out_delete_evlist; 2503 2504 out_error_apply_filters: 2505 fprintf(trace->output, 2506 "Failed to set filter \"%s\" on event %s with %d (%s)\n", 2507 evsel->filter, perf_evsel__name(evsel), errno, 2508 strerror_r(errno, errbuf, sizeof(errbuf))); 2509 goto out_delete_evlist; 2510 } 2511 out_error_mem: 2512 fprintf(trace->output, "Not enough memory to run!\n"); 2513 goto out_delete_evlist; 2514 2515 out_errno: 2516 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno)); 2517 goto out_delete_evlist; 2518 } 2519 2520 static int trace__replay(struct trace *trace) 2521 { 2522 const struct perf_evsel_str_handler handlers[] = { 2523 { "probe:vfs_getname", trace__vfs_getname, }, 2524 }; 2525 struct perf_data_file file = { 2526 .path = input_name, 2527 .mode = PERF_DATA_MODE_READ, 2528 .force = trace->force, 2529 }; 2530 struct perf_session *session; 2531 struct perf_evsel *evsel; 2532 int err = -1; 2533 2534 trace->tool.sample = trace__process_sample; 2535 trace->tool.mmap = perf_event__process_mmap; 2536 trace->tool.mmap2 = perf_event__process_mmap2; 2537 trace->tool.comm = perf_event__process_comm; 2538 trace->tool.exit = perf_event__process_exit; 2539 trace->tool.fork = perf_event__process_fork; 2540 trace->tool.attr = perf_event__process_attr; 2541 trace->tool.tracing_data = perf_event__process_tracing_data; 2542 trace->tool.build_id = perf_event__process_build_id; 2543 2544 trace->tool.ordered_events = true; 2545 trace->tool.ordering_requires_timestamps = true; 2546 2547 /* add tid to output */ 2548 trace->multiple_threads = true; 2549 2550 session = perf_session__new(&file, false, &trace->tool); 2551 if (session == NULL) 2552 return -1; 2553 2554 if (symbol__init(&session->header.env) < 0) 2555 goto out; 2556 2557 trace->host = &session->machines.host; 2558 2559 err = perf_session__set_tracepoints_handlers(session, handlers); 2560 if (err) 2561 goto out; 2562 2563 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2564 "raw_syscalls:sys_enter"); 2565 /* older kernels have syscalls tp versus raw_syscalls */ 2566 if (evsel == NULL) 2567 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2568 "syscalls:sys_enter"); 2569 2570 if (evsel && 2571 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 || 2572 perf_evsel__init_sc_tp_ptr_field(evsel, args))) { 2573 pr_err("Error during initialize raw_syscalls:sys_enter event\n"); 2574 goto out; 2575 } 2576 2577 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2578 "raw_syscalls:sys_exit"); 2579 if (evsel == NULL) 2580 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2581 "syscalls:sys_exit"); 2582 if (evsel && 2583 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 || 2584 perf_evsel__init_sc_tp_uint_field(evsel, ret))) { 2585 pr_err("Error during initialize raw_syscalls:sys_exit event\n"); 2586 goto out; 2587 } 2588 2589 evlist__for_each(session->evlist, evsel) { 2590 if (evsel->attr.type == PERF_TYPE_SOFTWARE && 2591 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ || 2592 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN || 2593 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS)) 2594 evsel->handler = trace__pgfault; 2595 } 2596 2597 err = parse_target_str(trace); 2598 if (err != 0) 2599 goto out; 2600 2601 setup_pager(); 2602 2603 err = perf_session__process_events(session); 2604 if (err) 2605 pr_err("Failed to process events, error %d", err); 2606 2607 else if (trace->summary) 2608 trace__fprintf_thread_summary(trace, trace->output); 2609 2610 out: 2611 perf_session__delete(session); 2612 2613 return err; 2614 } 2615 2616 static size_t trace__fprintf_threads_header(FILE *fp) 2617 { 2618 size_t printed; 2619 2620 printed = fprintf(fp, "\n Summary of events:\n\n"); 2621 2622 return printed; 2623 } 2624 2625 static size_t thread__dump_stats(struct thread_trace *ttrace, 2626 struct trace *trace, FILE *fp) 2627 { 2628 struct stats *stats; 2629 size_t printed = 0; 2630 struct syscall *sc; 2631 struct int_node *inode = intlist__first(ttrace->syscall_stats); 2632 2633 if (inode == NULL) 2634 return 0; 2635 2636 printed += fprintf(fp, "\n"); 2637 2638 printed += fprintf(fp, " syscall calls min avg max stddev\n"); 2639 printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n"); 2640 printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n"); 2641 2642 /* each int_node is a syscall */ 2643 while (inode) { 2644 stats = inode->priv; 2645 if (stats) { 2646 double min = (double)(stats->min) / NSEC_PER_MSEC; 2647 double max = (double)(stats->max) / NSEC_PER_MSEC; 2648 double avg = avg_stats(stats); 2649 double pct; 2650 u64 n = (u64) stats->n; 2651 2652 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0; 2653 avg /= NSEC_PER_MSEC; 2654 2655 sc = &trace->syscalls.table[inode->i]; 2656 printed += fprintf(fp, " %-15s", sc->name); 2657 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f", 2658 n, min, avg); 2659 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct); 2660 } 2661 2662 inode = intlist__next(inode); 2663 } 2664 2665 printed += fprintf(fp, "\n\n"); 2666 2667 return printed; 2668 } 2669 2670 /* struct used to pass data to per-thread function */ 2671 struct summary_data { 2672 FILE *fp; 2673 struct trace *trace; 2674 size_t printed; 2675 }; 2676 2677 static int trace__fprintf_one_thread(struct thread *thread, void *priv) 2678 { 2679 struct summary_data *data = priv; 2680 FILE *fp = data->fp; 2681 size_t printed = data->printed; 2682 struct trace *trace = data->trace; 2683 struct thread_trace *ttrace = thread__priv(thread); 2684 double ratio; 2685 2686 if (ttrace == NULL) 2687 return 0; 2688 2689 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0; 2690 2691 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid); 2692 printed += fprintf(fp, "%lu events, ", ttrace->nr_events); 2693 printed += fprintf(fp, "%.1f%%", ratio); 2694 if (ttrace->pfmaj) 2695 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj); 2696 if (ttrace->pfmin) 2697 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin); 2698 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms); 2699 printed += thread__dump_stats(ttrace, trace, fp); 2700 2701 data->printed += printed; 2702 2703 return 0; 2704 } 2705 2706 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp) 2707 { 2708 struct summary_data data = { 2709 .fp = fp, 2710 .trace = trace 2711 }; 2712 data.printed = trace__fprintf_threads_header(fp); 2713 2714 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data); 2715 2716 return data.printed; 2717 } 2718 2719 static int trace__set_duration(const struct option *opt, const char *str, 2720 int unset __maybe_unused) 2721 { 2722 struct trace *trace = opt->value; 2723 2724 trace->duration_filter = atof(str); 2725 return 0; 2726 } 2727 2728 static int trace__set_filter_pids(const struct option *opt, const char *str, 2729 int unset __maybe_unused) 2730 { 2731 int ret = -1; 2732 size_t i; 2733 struct trace *trace = opt->value; 2734 /* 2735 * FIXME: introduce a intarray class, plain parse csv and create a 2736 * { int nr, int entries[] } struct... 2737 */ 2738 struct intlist *list = intlist__new(str); 2739 2740 if (list == NULL) 2741 return -1; 2742 2743 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1; 2744 trace->filter_pids.entries = calloc(i, sizeof(pid_t)); 2745 2746 if (trace->filter_pids.entries == NULL) 2747 goto out; 2748 2749 trace->filter_pids.entries[0] = getpid(); 2750 2751 for (i = 1; i < trace->filter_pids.nr; ++i) 2752 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i; 2753 2754 intlist__delete(list); 2755 ret = 0; 2756 out: 2757 return ret; 2758 } 2759 2760 static int trace__open_output(struct trace *trace, const char *filename) 2761 { 2762 struct stat st; 2763 2764 if (!stat(filename, &st) && st.st_size) { 2765 char oldname[PATH_MAX]; 2766 2767 scnprintf(oldname, sizeof(oldname), "%s.old", filename); 2768 unlink(oldname); 2769 rename(filename, oldname); 2770 } 2771 2772 trace->output = fopen(filename, "w"); 2773 2774 return trace->output == NULL ? -errno : 0; 2775 } 2776 2777 static int parse_pagefaults(const struct option *opt, const char *str, 2778 int unset __maybe_unused) 2779 { 2780 int *trace_pgfaults = opt->value; 2781 2782 if (strcmp(str, "all") == 0) 2783 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN; 2784 else if (strcmp(str, "maj") == 0) 2785 *trace_pgfaults |= TRACE_PFMAJ; 2786 else if (strcmp(str, "min") == 0) 2787 *trace_pgfaults |= TRACE_PFMIN; 2788 else 2789 return -1; 2790 2791 return 0; 2792 } 2793 2794 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler) 2795 { 2796 struct perf_evsel *evsel; 2797 2798 evlist__for_each(evlist, evsel) 2799 evsel->handler = handler; 2800 } 2801 2802 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) 2803 { 2804 const char *trace_usage[] = { 2805 "perf trace [<options>] [<command>]", 2806 "perf trace [<options>] -- <command> [<options>]", 2807 "perf trace record [<options>] [<command>]", 2808 "perf trace record [<options>] -- <command> [<options>]", 2809 NULL 2810 }; 2811 struct trace trace = { 2812 .audit = { 2813 .machine = audit_detect_machine(), 2814 .open_id = audit_name_to_syscall("open", trace.audit.machine), 2815 }, 2816 .syscalls = { 2817 . max = -1, 2818 }, 2819 .opts = { 2820 .target = { 2821 .uid = UINT_MAX, 2822 .uses_mmap = true, 2823 }, 2824 .user_freq = UINT_MAX, 2825 .user_interval = ULLONG_MAX, 2826 .no_buffering = true, 2827 .mmap_pages = UINT_MAX, 2828 .proc_map_timeout = 500, 2829 }, 2830 .output = stdout, 2831 .show_comm = true, 2832 .trace_syscalls = true, 2833 }; 2834 const char *output_name = NULL; 2835 const char *ev_qualifier_str = NULL; 2836 const struct option trace_options[] = { 2837 OPT_CALLBACK(0, "event", &trace.evlist, "event", 2838 "event selector. use 'perf list' to list available events", 2839 parse_events_option), 2840 OPT_BOOLEAN(0, "comm", &trace.show_comm, 2841 "show the thread COMM next to its id"), 2842 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"), 2843 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"), 2844 OPT_STRING('o', "output", &output_name, "file", "output file name"), 2845 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"), 2846 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid", 2847 "trace events on existing process id"), 2848 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid", 2849 "trace events on existing thread id"), 2850 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids", 2851 "pids to filter (by the kernel)", trace__set_filter_pids), 2852 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide, 2853 "system-wide collection from all CPUs"), 2854 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu", 2855 "list of cpus to monitor"), 2856 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit, 2857 "child tasks do not inherit counters"), 2858 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages", 2859 "number of mmap data pages", 2860 perf_evlist__parse_mmap_pages), 2861 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user", 2862 "user to profile"), 2863 OPT_CALLBACK(0, "duration", &trace, "float", 2864 "show only events with duration > N.M ms", 2865 trace__set_duration), 2866 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"), 2867 OPT_INCR('v', "verbose", &verbose, "be more verbose"), 2868 OPT_BOOLEAN('T', "time", &trace.full_time, 2869 "Show full timestamp, not time relative to first start"), 2870 OPT_BOOLEAN('s', "summary", &trace.summary_only, 2871 "Show only syscall summary with statistics"), 2872 OPT_BOOLEAN('S', "with-summary", &trace.summary, 2873 "Show all syscalls and summary with statistics"), 2874 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min", 2875 "Trace pagefaults", parse_pagefaults, "maj"), 2876 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"), 2877 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"), 2878 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout, 2879 "per thread proc mmap processing timeout in ms"), 2880 OPT_END() 2881 }; 2882 const char * const trace_subcommands[] = { "record", NULL }; 2883 int err; 2884 char bf[BUFSIZ]; 2885 2886 signal(SIGSEGV, sighandler_dump_stack); 2887 signal(SIGFPE, sighandler_dump_stack); 2888 2889 trace.evlist = perf_evlist__new(); 2890 2891 if (trace.evlist == NULL) { 2892 pr_err("Not enough memory to run!\n"); 2893 err = -ENOMEM; 2894 goto out; 2895 } 2896 2897 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands, 2898 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION); 2899 2900 if (trace.trace_pgfaults) { 2901 trace.opts.sample_address = true; 2902 trace.opts.sample_time = true; 2903 } 2904 2905 if (trace.evlist->nr_entries > 0) 2906 evlist__set_evsel_handler(trace.evlist, trace__event_handler); 2907 2908 if ((argc >= 1) && (strcmp(argv[0], "record") == 0)) 2909 return trace__record(&trace, argc-1, &argv[1]); 2910 2911 /* summary_only implies summary option, but don't overwrite summary if set */ 2912 if (trace.summary_only) 2913 trace.summary = trace.summary_only; 2914 2915 if (!trace.trace_syscalls && !trace.trace_pgfaults && 2916 trace.evlist->nr_entries == 0 /* Was --events used? */) { 2917 pr_err("Please specify something to trace.\n"); 2918 return -1; 2919 } 2920 2921 if (output_name != NULL) { 2922 err = trace__open_output(&trace, output_name); 2923 if (err < 0) { 2924 perror("failed to create output file"); 2925 goto out; 2926 } 2927 } 2928 2929 if (ev_qualifier_str != NULL) { 2930 const char *s = ev_qualifier_str; 2931 struct strlist_config slist_config = { 2932 .dirname = system_path(STRACE_GROUPS_DIR), 2933 }; 2934 2935 trace.not_ev_qualifier = *s == '!'; 2936 if (trace.not_ev_qualifier) 2937 ++s; 2938 trace.ev_qualifier = strlist__new(s, &slist_config); 2939 if (trace.ev_qualifier == NULL) { 2940 fputs("Not enough memory to parse event qualifier", 2941 trace.output); 2942 err = -ENOMEM; 2943 goto out_close; 2944 } 2945 2946 err = trace__validate_ev_qualifier(&trace); 2947 if (err) 2948 goto out_close; 2949 } 2950 2951 err = target__validate(&trace.opts.target); 2952 if (err) { 2953 target__strerror(&trace.opts.target, err, bf, sizeof(bf)); 2954 fprintf(trace.output, "%s", bf); 2955 goto out_close; 2956 } 2957 2958 err = target__parse_uid(&trace.opts.target); 2959 if (err) { 2960 target__strerror(&trace.opts.target, err, bf, sizeof(bf)); 2961 fprintf(trace.output, "%s", bf); 2962 goto out_close; 2963 } 2964 2965 if (!argc && target__none(&trace.opts.target)) 2966 trace.opts.target.system_wide = true; 2967 2968 if (input_name) 2969 err = trace__replay(&trace); 2970 else 2971 err = trace__run(&trace, argc, argv); 2972 2973 out_close: 2974 if (output_name != NULL) 2975 fclose(trace.output); 2976 out: 2977 return err; 2978 } 2979