1 /* 2 * builtin-trace.c 3 * 4 * Builtin 'trace' command: 5 * 6 * Display a continuously updated trace of any workload, CPU, specific PID, 7 * system wide, etc. Default format is loosely strace like, but any other 8 * event may be specified using --event. 9 * 10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 11 * 12 * Initially based on the 'trace' prototype by Thomas Gleixner: 13 * 14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'") 15 * 16 * Released under the GPL v2. (and only v2, not any later version) 17 */ 18 19 #include <traceevent/event-parse.h> 20 #include <api/fs/tracing_path.h> 21 #include "builtin.h" 22 #include "util/color.h" 23 #include "util/debug.h" 24 #include "util/evlist.h" 25 #include <subcmd/exec-cmd.h> 26 #include "util/machine.h" 27 #include "util/session.h" 28 #include "util/thread.h" 29 #include <subcmd/parse-options.h> 30 #include "util/strlist.h" 31 #include "util/intlist.h" 32 #include "util/thread_map.h" 33 #include "util/stat.h" 34 #include "trace-event.h" 35 #include "util/parse-events.h" 36 #include "util/bpf-loader.h" 37 #include "callchain.h" 38 #include "syscalltbl.h" 39 #include "rb_resort.h" 40 41 #include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */ 42 #include <stdlib.h> 43 #include <linux/err.h> 44 #include <linux/filter.h> 45 #include <linux/audit.h> 46 #include <sys/ptrace.h> 47 #include <linux/random.h> 48 #include <linux/stringify.h> 49 50 #ifndef O_CLOEXEC 51 # define O_CLOEXEC 02000000 52 #endif 53 54 struct trace { 55 struct perf_tool tool; 56 struct syscalltbl *sctbl; 57 struct { 58 int max; 59 struct syscall *table; 60 struct { 61 struct perf_evsel *sys_enter, 62 *sys_exit; 63 } events; 64 } syscalls; 65 struct record_opts opts; 66 struct perf_evlist *evlist; 67 struct machine *host; 68 struct thread *current; 69 u64 base_time; 70 FILE *output; 71 unsigned long nr_events; 72 struct strlist *ev_qualifier; 73 struct { 74 size_t nr; 75 int *entries; 76 } ev_qualifier_ids; 77 struct intlist *tid_list; 78 struct intlist *pid_list; 79 struct { 80 size_t nr; 81 pid_t *entries; 82 } filter_pids; 83 double duration_filter; 84 double runtime_ms; 85 struct { 86 u64 vfs_getname, 87 proc_getname; 88 } stats; 89 unsigned int max_stack; 90 unsigned int min_stack; 91 bool not_ev_qualifier; 92 bool live; 93 bool full_time; 94 bool sched; 95 bool multiple_threads; 96 bool summary; 97 bool summary_only; 98 bool show_comm; 99 bool show_tool_stats; 100 bool trace_syscalls; 101 bool kernel_syscallchains; 102 bool force; 103 bool vfs_getname; 104 int trace_pgfaults; 105 int open_id; 106 }; 107 108 struct tp_field { 109 int offset; 110 union { 111 u64 (*integer)(struct tp_field *field, struct perf_sample *sample); 112 void *(*pointer)(struct tp_field *field, struct perf_sample *sample); 113 }; 114 }; 115 116 #define TP_UINT_FIELD(bits) \ 117 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \ 118 { \ 119 u##bits value; \ 120 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \ 121 return value; \ 122 } 123 124 TP_UINT_FIELD(8); 125 TP_UINT_FIELD(16); 126 TP_UINT_FIELD(32); 127 TP_UINT_FIELD(64); 128 129 #define TP_UINT_FIELD__SWAPPED(bits) \ 130 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \ 131 { \ 132 u##bits value; \ 133 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \ 134 return bswap_##bits(value);\ 135 } 136 137 TP_UINT_FIELD__SWAPPED(16); 138 TP_UINT_FIELD__SWAPPED(32); 139 TP_UINT_FIELD__SWAPPED(64); 140 141 static int tp_field__init_uint(struct tp_field *field, 142 struct format_field *format_field, 143 bool needs_swap) 144 { 145 field->offset = format_field->offset; 146 147 switch (format_field->size) { 148 case 1: 149 field->integer = tp_field__u8; 150 break; 151 case 2: 152 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16; 153 break; 154 case 4: 155 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32; 156 break; 157 case 8: 158 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64; 159 break; 160 default: 161 return -1; 162 } 163 164 return 0; 165 } 166 167 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample) 168 { 169 return sample->raw_data + field->offset; 170 } 171 172 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field) 173 { 174 field->offset = format_field->offset; 175 field->pointer = tp_field__ptr; 176 return 0; 177 } 178 179 struct syscall_tp { 180 struct tp_field id; 181 union { 182 struct tp_field args, ret; 183 }; 184 }; 185 186 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel, 187 struct tp_field *field, 188 const char *name) 189 { 190 struct format_field *format_field = perf_evsel__field(evsel, name); 191 192 if (format_field == NULL) 193 return -1; 194 195 return tp_field__init_uint(field, format_field, evsel->needs_swap); 196 } 197 198 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \ 199 ({ struct syscall_tp *sc = evsel->priv;\ 200 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); }) 201 202 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel, 203 struct tp_field *field, 204 const char *name) 205 { 206 struct format_field *format_field = perf_evsel__field(evsel, name); 207 208 if (format_field == NULL) 209 return -1; 210 211 return tp_field__init_ptr(field, format_field); 212 } 213 214 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \ 215 ({ struct syscall_tp *sc = evsel->priv;\ 216 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); }) 217 218 static void perf_evsel__delete_priv(struct perf_evsel *evsel) 219 { 220 zfree(&evsel->priv); 221 perf_evsel__delete(evsel); 222 } 223 224 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler) 225 { 226 evsel->priv = malloc(sizeof(struct syscall_tp)); 227 if (evsel->priv != NULL) { 228 if (perf_evsel__init_sc_tp_uint_field(evsel, id)) 229 goto out_delete; 230 231 evsel->handler = handler; 232 return 0; 233 } 234 235 return -ENOMEM; 236 237 out_delete: 238 zfree(&evsel->priv); 239 return -ENOENT; 240 } 241 242 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler) 243 { 244 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction); 245 246 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */ 247 if (IS_ERR(evsel)) 248 evsel = perf_evsel__newtp("syscalls", direction); 249 250 if (IS_ERR(evsel)) 251 return NULL; 252 253 if (perf_evsel__init_syscall_tp(evsel, handler)) 254 goto out_delete; 255 256 return evsel; 257 258 out_delete: 259 perf_evsel__delete_priv(evsel); 260 return NULL; 261 } 262 263 #define perf_evsel__sc_tp_uint(evsel, name, sample) \ 264 ({ struct syscall_tp *fields = evsel->priv; \ 265 fields->name.integer(&fields->name, sample); }) 266 267 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \ 268 ({ struct syscall_tp *fields = evsel->priv; \ 269 fields->name.pointer(&fields->name, sample); }) 270 271 struct syscall_arg { 272 unsigned long val; 273 struct thread *thread; 274 struct trace *trace; 275 void *parm; 276 u8 idx; 277 u8 mask; 278 }; 279 280 struct strarray { 281 int offset; 282 int nr_entries; 283 const char **entries; 284 }; 285 286 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \ 287 .nr_entries = ARRAY_SIZE(array), \ 288 .entries = array, \ 289 } 290 291 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \ 292 .offset = off, \ 293 .nr_entries = ARRAY_SIZE(array), \ 294 .entries = array, \ 295 } 296 297 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size, 298 const char *intfmt, 299 struct syscall_arg *arg) 300 { 301 struct strarray *sa = arg->parm; 302 int idx = arg->val - sa->offset; 303 304 if (idx < 0 || idx >= sa->nr_entries) 305 return scnprintf(bf, size, intfmt, arg->val); 306 307 return scnprintf(bf, size, "%s", sa->entries[idx]); 308 } 309 310 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size, 311 struct syscall_arg *arg) 312 { 313 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg); 314 } 315 316 #define SCA_STRARRAY syscall_arg__scnprintf_strarray 317 318 #if defined(__i386__) || defined(__x86_64__) 319 /* 320 * FIXME: Make this available to all arches as soon as the ioctl beautifier 321 * gets rewritten to support all arches. 322 */ 323 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size, 324 struct syscall_arg *arg) 325 { 326 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg); 327 } 328 329 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray 330 #endif /* defined(__i386__) || defined(__x86_64__) */ 331 332 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size, 333 struct syscall_arg *arg); 334 335 #define SCA_FD syscall_arg__scnprintf_fd 336 337 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size, 338 struct syscall_arg *arg) 339 { 340 int fd = arg->val; 341 342 if (fd == AT_FDCWD) 343 return scnprintf(bf, size, "CWD"); 344 345 return syscall_arg__scnprintf_fd(bf, size, arg); 346 } 347 348 #define SCA_FDAT syscall_arg__scnprintf_fd_at 349 350 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, 351 struct syscall_arg *arg); 352 353 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd 354 355 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size, 356 struct syscall_arg *arg) 357 { 358 return scnprintf(bf, size, "%#lx", arg->val); 359 } 360 361 #define SCA_HEX syscall_arg__scnprintf_hex 362 363 static size_t syscall_arg__scnprintf_int(char *bf, size_t size, 364 struct syscall_arg *arg) 365 { 366 return scnprintf(bf, size, "%d", arg->val); 367 } 368 369 #define SCA_INT syscall_arg__scnprintf_int 370 371 static const char *bpf_cmd[] = { 372 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM", 373 "MAP_GET_NEXT_KEY", "PROG_LOAD", 374 }; 375 static DEFINE_STRARRAY(bpf_cmd); 376 377 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", }; 378 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1); 379 380 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", }; 381 static DEFINE_STRARRAY(itimers); 382 383 static const char *keyctl_options[] = { 384 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN", 385 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ", 386 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT", 387 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT", 388 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT", 389 }; 390 static DEFINE_STRARRAY(keyctl_options); 391 392 static const char *whences[] = { "SET", "CUR", "END", 393 #ifdef SEEK_DATA 394 "DATA", 395 #endif 396 #ifdef SEEK_HOLE 397 "HOLE", 398 #endif 399 }; 400 static DEFINE_STRARRAY(whences); 401 402 static const char *fcntl_cmds[] = { 403 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK", 404 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64", 405 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX", 406 "F_GETOWNER_UIDS", 407 }; 408 static DEFINE_STRARRAY(fcntl_cmds); 409 410 static const char *rlimit_resources[] = { 411 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE", 412 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO", 413 "RTTIME", 414 }; 415 static DEFINE_STRARRAY(rlimit_resources); 416 417 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", }; 418 static DEFINE_STRARRAY(sighow); 419 420 static const char *clockid[] = { 421 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID", 422 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME", 423 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI" 424 }; 425 static DEFINE_STRARRAY(clockid); 426 427 static const char *socket_families[] = { 428 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM", 429 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI", 430 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC", 431 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC", 432 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF", 433 "ALG", "NFC", "VSOCK", 434 }; 435 static DEFINE_STRARRAY(socket_families); 436 437 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size, 438 struct syscall_arg *arg) 439 { 440 size_t printed = 0; 441 int mode = arg->val; 442 443 if (mode == F_OK) /* 0 */ 444 return scnprintf(bf, size, "F"); 445 #define P_MODE(n) \ 446 if (mode & n##_OK) { \ 447 printed += scnprintf(bf + printed, size - printed, "%s", #n); \ 448 mode &= ~n##_OK; \ 449 } 450 451 P_MODE(R); 452 P_MODE(W); 453 P_MODE(X); 454 #undef P_MODE 455 456 if (mode) 457 printed += scnprintf(bf + printed, size - printed, "|%#x", mode); 458 459 return printed; 460 } 461 462 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode 463 464 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size, 465 struct syscall_arg *arg); 466 467 #define SCA_FILENAME syscall_arg__scnprintf_filename 468 469 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size, 470 struct syscall_arg *arg) 471 { 472 int printed = 0, flags = arg->val; 473 474 #define P_FLAG(n) \ 475 if (flags & O_##n) { \ 476 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 477 flags &= ~O_##n; \ 478 } 479 480 P_FLAG(CLOEXEC); 481 P_FLAG(NONBLOCK); 482 #undef P_FLAG 483 484 if (flags) 485 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 486 487 return printed; 488 } 489 490 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags 491 492 #if defined(__i386__) || defined(__x86_64__) 493 /* 494 * FIXME: Make this available to all arches. 495 */ 496 #define TCGETS 0x5401 497 498 static const char *tioctls[] = { 499 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW", 500 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL", 501 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI", 502 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC", 503 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX", 504 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO", 505 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK", 506 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2", 507 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK", 508 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG", 509 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL", 510 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG", 511 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS", 512 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI", 513 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE", 514 }; 515 516 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401); 517 #endif /* defined(__i386__) || defined(__x86_64__) */ 518 519 #ifndef GRND_NONBLOCK 520 #define GRND_NONBLOCK 0x0001 521 #endif 522 #ifndef GRND_RANDOM 523 #define GRND_RANDOM 0x0002 524 #endif 525 526 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size, 527 struct syscall_arg *arg) 528 { 529 int printed = 0, flags = arg->val; 530 531 #define P_FLAG(n) \ 532 if (flags & GRND_##n) { \ 533 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 534 flags &= ~GRND_##n; \ 535 } 536 537 P_FLAG(RANDOM); 538 P_FLAG(NONBLOCK); 539 #undef P_FLAG 540 541 if (flags) 542 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 543 544 return printed; 545 } 546 547 #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags 548 549 #define STRARRAY(arg, name, array) \ 550 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \ 551 .arg_parm = { [arg] = &strarray__##array, } 552 553 #include "trace/beauty/eventfd.c" 554 #include "trace/beauty/flock.c" 555 #include "trace/beauty/futex_op.c" 556 #include "trace/beauty/mmap.c" 557 #include "trace/beauty/mode_t.c" 558 #include "trace/beauty/msg_flags.c" 559 #include "trace/beauty/open_flags.c" 560 #include "trace/beauty/perf_event_open.c" 561 #include "trace/beauty/pid.c" 562 #include "trace/beauty/sched_policy.c" 563 #include "trace/beauty/seccomp.c" 564 #include "trace/beauty/signum.c" 565 #include "trace/beauty/socket_type.c" 566 #include "trace/beauty/waitid_options.c" 567 568 static struct syscall_fmt { 569 const char *name; 570 const char *alias; 571 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg); 572 void *arg_parm[6]; 573 bool errmsg; 574 bool errpid; 575 bool timeout; 576 bool hexret; 577 } syscall_fmts[] = { 578 { .name = "access", .errmsg = true, 579 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ 580 [1] = SCA_ACCMODE, /* mode */ }, }, 581 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", }, 582 { .name = "bpf", .errmsg = true, STRARRAY(0, cmd, bpf_cmd), }, 583 { .name = "brk", .hexret = true, 584 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, }, 585 { .name = "chdir", .errmsg = true, 586 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, 587 { .name = "chmod", .errmsg = true, 588 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, 589 { .name = "chroot", .errmsg = true, 590 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, 591 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), }, 592 { .name = "clone", .errpid = true, }, 593 { .name = "close", .errmsg = true, 594 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, 595 { .name = "connect", .errmsg = true, }, 596 { .name = "creat", .errmsg = true, 597 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, 598 { .name = "dup", .errmsg = true, 599 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 600 { .name = "dup2", .errmsg = true, 601 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 602 { .name = "dup3", .errmsg = true, 603 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 604 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), }, 605 { .name = "eventfd2", .errmsg = true, 606 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, }, 607 { .name = "faccessat", .errmsg = true, 608 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ 609 [1] = SCA_FILENAME, /* filename */ }, }, 610 { .name = "fadvise64", .errmsg = true, 611 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 612 { .name = "fallocate", .errmsg = true, 613 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 614 { .name = "fchdir", .errmsg = true, 615 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 616 { .name = "fchmod", .errmsg = true, 617 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 618 { .name = "fchmodat", .errmsg = true, 619 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ 620 [1] = SCA_FILENAME, /* filename */ }, }, 621 { .name = "fchown", .errmsg = true, 622 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 623 { .name = "fchownat", .errmsg = true, 624 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ 625 [1] = SCA_FILENAME, /* filename */ }, }, 626 { .name = "fcntl", .errmsg = true, 627 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 628 [1] = SCA_STRARRAY, /* cmd */ }, 629 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, }, 630 { .name = "fdatasync", .errmsg = true, 631 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 632 { .name = "flock", .errmsg = true, 633 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 634 [1] = SCA_FLOCK, /* cmd */ }, }, 635 { .name = "fsetxattr", .errmsg = true, 636 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 637 { .name = "fstat", .errmsg = true, .alias = "newfstat", 638 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 639 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", 640 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ 641 [1] = SCA_FILENAME, /* filename */ }, }, 642 { .name = "fstatfs", .errmsg = true, 643 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 644 { .name = "fsync", .errmsg = true, 645 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 646 { .name = "ftruncate", .errmsg = true, 647 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 648 { .name = "futex", .errmsg = true, 649 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, }, 650 { .name = "futimesat", .errmsg = true, 651 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ 652 [1] = SCA_FILENAME, /* filename */ }, }, 653 { .name = "getdents", .errmsg = true, 654 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 655 { .name = "getdents64", .errmsg = true, 656 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 657 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), }, 658 { .name = "getpid", .errpid = true, }, 659 { .name = "getpgid", .errpid = true, }, 660 { .name = "getppid", .errpid = true, }, 661 { .name = "getrandom", .errmsg = true, 662 .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, }, 663 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, 664 { .name = "getxattr", .errmsg = true, 665 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, 666 { .name = "inotify_add_watch", .errmsg = true, 667 .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, }, 668 { .name = "ioctl", .errmsg = true, 669 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 670 #if defined(__i386__) || defined(__x86_64__) 671 /* 672 * FIXME: Make this available to all arches. 673 */ 674 [1] = SCA_STRHEXARRAY, /* cmd */ 675 [2] = SCA_HEX, /* arg */ }, 676 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, }, 677 #else 678 [2] = SCA_HEX, /* arg */ }, }, 679 #endif 680 { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), }, 681 { .name = "kill", .errmsg = true, 682 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, 683 { .name = "lchown", .errmsg = true, 684 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, 685 { .name = "lgetxattr", .errmsg = true, 686 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, 687 { .name = "linkat", .errmsg = true, 688 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 689 { .name = "listxattr", .errmsg = true, 690 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, 691 { .name = "llistxattr", .errmsg = true, 692 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, 693 { .name = "lremovexattr", .errmsg = true, 694 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, 695 { .name = "lseek", .errmsg = true, 696 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 697 [2] = SCA_STRARRAY, /* whence */ }, 698 .arg_parm = { [2] = &strarray__whences, /* whence */ }, }, 699 { .name = "lsetxattr", .errmsg = true, 700 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, 701 { .name = "lstat", .errmsg = true, .alias = "newlstat", 702 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, 703 { .name = "lsxattr", .errmsg = true, 704 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, 705 { .name = "madvise", .errmsg = true, 706 .arg_scnprintf = { [0] = SCA_HEX, /* start */ 707 [2] = SCA_MADV_BHV, /* behavior */ }, }, 708 { .name = "mkdir", .errmsg = true, 709 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, 710 { .name = "mkdirat", .errmsg = true, 711 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ 712 [1] = SCA_FILENAME, /* pathname */ }, }, 713 { .name = "mknod", .errmsg = true, 714 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, 715 { .name = "mknodat", .errmsg = true, 716 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ 717 [1] = SCA_FILENAME, /* filename */ }, }, 718 { .name = "mlock", .errmsg = true, 719 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 720 { .name = "mlockall", .errmsg = true, 721 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 722 { .name = "mmap", .hexret = true, 723 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ 724 [2] = SCA_MMAP_PROT, /* prot */ 725 [3] = SCA_MMAP_FLAGS, /* flags */ 726 [4] = SCA_FD, /* fd */ }, }, 727 { .name = "mprotect", .errmsg = true, 728 .arg_scnprintf = { [0] = SCA_HEX, /* start */ 729 [2] = SCA_MMAP_PROT, /* prot */ }, }, 730 { .name = "mq_unlink", .errmsg = true, 731 .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, }, 732 { .name = "mremap", .hexret = true, 733 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ 734 [3] = SCA_MREMAP_FLAGS, /* flags */ 735 [4] = SCA_HEX, /* new_addr */ }, }, 736 { .name = "munlock", .errmsg = true, 737 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 738 { .name = "munmap", .errmsg = true, 739 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 740 { .name = "name_to_handle_at", .errmsg = true, 741 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 742 { .name = "newfstatat", .errmsg = true, 743 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ 744 [1] = SCA_FILENAME, /* filename */ }, }, 745 { .name = "open", .errmsg = true, 746 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ 747 [1] = SCA_OPEN_FLAGS, /* flags */ }, }, 748 { .name = "open_by_handle_at", .errmsg = true, 749 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ 750 [2] = SCA_OPEN_FLAGS, /* flags */ }, }, 751 { .name = "openat", .errmsg = true, 752 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ 753 [1] = SCA_FILENAME, /* filename */ 754 [2] = SCA_OPEN_FLAGS, /* flags */ }, }, 755 { .name = "perf_event_open", .errmsg = true, 756 .arg_scnprintf = { [2] = SCA_INT, /* cpu */ 757 [3] = SCA_FD, /* group_fd */ 758 [4] = SCA_PERF_FLAGS, /* flags */ }, }, 759 { .name = "pipe2", .errmsg = true, 760 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, }, 761 { .name = "poll", .errmsg = true, .timeout = true, }, 762 { .name = "ppoll", .errmsg = true, .timeout = true, }, 763 { .name = "pread", .errmsg = true, .alias = "pread64", 764 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 765 { .name = "preadv", .errmsg = true, .alias = "pread", 766 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 767 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), }, 768 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", 769 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 770 { .name = "pwritev", .errmsg = true, 771 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 772 { .name = "read", .errmsg = true, 773 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 774 { .name = "readlink", .errmsg = true, 775 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, }, 776 { .name = "readlinkat", .errmsg = true, 777 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ 778 [1] = SCA_FILENAME, /* pathname */ }, }, 779 { .name = "readv", .errmsg = true, 780 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 781 { .name = "recvfrom", .errmsg = true, 782 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 783 [3] = SCA_MSG_FLAGS, /* flags */ }, }, 784 { .name = "recvmmsg", .errmsg = true, 785 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 786 [3] = SCA_MSG_FLAGS, /* flags */ }, }, 787 { .name = "recvmsg", .errmsg = true, 788 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 789 [2] = SCA_MSG_FLAGS, /* flags */ }, }, 790 { .name = "removexattr", .errmsg = true, 791 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, 792 { .name = "renameat", .errmsg = true, 793 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 794 { .name = "rmdir", .errmsg = true, 795 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, 796 { .name = "rt_sigaction", .errmsg = true, 797 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, }, 798 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), }, 799 { .name = "rt_sigqueueinfo", .errmsg = true, 800 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, 801 { .name = "rt_tgsigqueueinfo", .errmsg = true, 802 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, 803 { .name = "sched_setscheduler", .errmsg = true, 804 .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, }, 805 { .name = "seccomp", .errmsg = true, 806 .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */ 807 [1] = SCA_SECCOMP_FLAGS, /* flags */ }, }, 808 { .name = "select", .errmsg = true, .timeout = true, }, 809 { .name = "sendmmsg", .errmsg = true, 810 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 811 [3] = SCA_MSG_FLAGS, /* flags */ }, }, 812 { .name = "sendmsg", .errmsg = true, 813 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 814 [2] = SCA_MSG_FLAGS, /* flags */ }, }, 815 { .name = "sendto", .errmsg = true, 816 .arg_scnprintf = { [0] = SCA_FD, /* fd */ 817 [3] = SCA_MSG_FLAGS, /* flags */ }, }, 818 { .name = "set_tid_address", .errpid = true, }, 819 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), }, 820 { .name = "setpgid", .errmsg = true, }, 821 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, 822 { .name = "setxattr", .errmsg = true, 823 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, 824 { .name = "shutdown", .errmsg = true, 825 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 826 { .name = "socket", .errmsg = true, 827 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ 828 [1] = SCA_SK_TYPE, /* type */ }, 829 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, 830 { .name = "socketpair", .errmsg = true, 831 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ 832 [1] = SCA_SK_TYPE, /* type */ }, 833 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, 834 { .name = "stat", .errmsg = true, .alias = "newstat", 835 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, 836 { .name = "statfs", .errmsg = true, 837 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, 838 { .name = "swapoff", .errmsg = true, 839 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, }, 840 { .name = "swapon", .errmsg = true, 841 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, }, 842 { .name = "symlinkat", .errmsg = true, 843 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 844 { .name = "tgkill", .errmsg = true, 845 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, 846 { .name = "tkill", .errmsg = true, 847 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, 848 { .name = "truncate", .errmsg = true, 849 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, }, 850 { .name = "uname", .errmsg = true, .alias = "newuname", }, 851 { .name = "unlinkat", .errmsg = true, 852 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ 853 [1] = SCA_FILENAME, /* pathname */ }, }, 854 { .name = "utime", .errmsg = true, 855 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, 856 { .name = "utimensat", .errmsg = true, 857 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ 858 [1] = SCA_FILENAME, /* filename */ }, }, 859 { .name = "utimes", .errmsg = true, 860 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, 861 { .name = "vmsplice", .errmsg = true, 862 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 863 { .name = "wait4", .errpid = true, 864 .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, }, 865 { .name = "waitid", .errpid = true, 866 .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, }, 867 { .name = "write", .errmsg = true, 868 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 869 { .name = "writev", .errmsg = true, 870 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 871 }; 872 873 static int syscall_fmt__cmp(const void *name, const void *fmtp) 874 { 875 const struct syscall_fmt *fmt = fmtp; 876 return strcmp(name, fmt->name); 877 } 878 879 static struct syscall_fmt *syscall_fmt__find(const char *name) 880 { 881 const int nmemb = ARRAY_SIZE(syscall_fmts); 882 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp); 883 } 884 885 struct syscall { 886 struct event_format *tp_format; 887 int nr_args; 888 struct format_field *args; 889 const char *name; 890 bool is_exit; 891 struct syscall_fmt *fmt; 892 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg); 893 void **arg_parm; 894 }; 895 896 static size_t fprintf_duration(unsigned long t, FILE *fp) 897 { 898 double duration = (double)t / NSEC_PER_MSEC; 899 size_t printed = fprintf(fp, "("); 900 901 if (duration >= 1.0) 902 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration); 903 else if (duration >= 0.01) 904 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration); 905 else 906 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration); 907 return printed + fprintf(fp, "): "); 908 } 909 910 /** 911 * filename.ptr: The filename char pointer that will be vfs_getname'd 912 * filename.entry_str_pos: Where to insert the string translated from 913 * filename.ptr by the vfs_getname tracepoint/kprobe. 914 */ 915 struct thread_trace { 916 u64 entry_time; 917 u64 exit_time; 918 bool entry_pending; 919 unsigned long nr_events; 920 unsigned long pfmaj, pfmin; 921 char *entry_str; 922 double runtime_ms; 923 struct { 924 unsigned long ptr; 925 short int entry_str_pos; 926 bool pending_open; 927 unsigned int namelen; 928 char *name; 929 } filename; 930 struct { 931 int max; 932 char **table; 933 } paths; 934 935 struct intlist *syscall_stats; 936 }; 937 938 static struct thread_trace *thread_trace__new(void) 939 { 940 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace)); 941 942 if (ttrace) 943 ttrace->paths.max = -1; 944 945 ttrace->syscall_stats = intlist__new(NULL); 946 947 return ttrace; 948 } 949 950 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp) 951 { 952 struct thread_trace *ttrace; 953 954 if (thread == NULL) 955 goto fail; 956 957 if (thread__priv(thread) == NULL) 958 thread__set_priv(thread, thread_trace__new()); 959 960 if (thread__priv(thread) == NULL) 961 goto fail; 962 963 ttrace = thread__priv(thread); 964 ++ttrace->nr_events; 965 966 return ttrace; 967 fail: 968 color_fprintf(fp, PERF_COLOR_RED, 969 "WARNING: not enough memory, dropping samples!\n"); 970 return NULL; 971 } 972 973 #define TRACE_PFMAJ (1 << 0) 974 #define TRACE_PFMIN (1 << 1) 975 976 static const size_t trace__entry_str_size = 2048; 977 978 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname) 979 { 980 struct thread_trace *ttrace = thread__priv(thread); 981 982 if (fd > ttrace->paths.max) { 983 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *)); 984 985 if (npath == NULL) 986 return -1; 987 988 if (ttrace->paths.max != -1) { 989 memset(npath + ttrace->paths.max + 1, 0, 990 (fd - ttrace->paths.max) * sizeof(char *)); 991 } else { 992 memset(npath, 0, (fd + 1) * sizeof(char *)); 993 } 994 995 ttrace->paths.table = npath; 996 ttrace->paths.max = fd; 997 } 998 999 ttrace->paths.table[fd] = strdup(pathname); 1000 1001 return ttrace->paths.table[fd] != NULL ? 0 : -1; 1002 } 1003 1004 static int thread__read_fd_path(struct thread *thread, int fd) 1005 { 1006 char linkname[PATH_MAX], pathname[PATH_MAX]; 1007 struct stat st; 1008 int ret; 1009 1010 if (thread->pid_ == thread->tid) { 1011 scnprintf(linkname, sizeof(linkname), 1012 "/proc/%d/fd/%d", thread->pid_, fd); 1013 } else { 1014 scnprintf(linkname, sizeof(linkname), 1015 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd); 1016 } 1017 1018 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname)) 1019 return -1; 1020 1021 ret = readlink(linkname, pathname, sizeof(pathname)); 1022 1023 if (ret < 0 || ret > st.st_size) 1024 return -1; 1025 1026 pathname[ret] = '\0'; 1027 return trace__set_fd_pathname(thread, fd, pathname); 1028 } 1029 1030 static const char *thread__fd_path(struct thread *thread, int fd, 1031 struct trace *trace) 1032 { 1033 struct thread_trace *ttrace = thread__priv(thread); 1034 1035 if (ttrace == NULL) 1036 return NULL; 1037 1038 if (fd < 0) 1039 return NULL; 1040 1041 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) { 1042 if (!trace->live) 1043 return NULL; 1044 ++trace->stats.proc_getname; 1045 if (thread__read_fd_path(thread, fd)) 1046 return NULL; 1047 } 1048 1049 return ttrace->paths.table[fd]; 1050 } 1051 1052 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size, 1053 struct syscall_arg *arg) 1054 { 1055 int fd = arg->val; 1056 size_t printed = scnprintf(bf, size, "%d", fd); 1057 const char *path = thread__fd_path(arg->thread, fd, arg->trace); 1058 1059 if (path) 1060 printed += scnprintf(bf + printed, size - printed, "<%s>", path); 1061 1062 return printed; 1063 } 1064 1065 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, 1066 struct syscall_arg *arg) 1067 { 1068 int fd = arg->val; 1069 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg); 1070 struct thread_trace *ttrace = thread__priv(arg->thread); 1071 1072 if (ttrace && fd >= 0 && fd <= ttrace->paths.max) 1073 zfree(&ttrace->paths.table[fd]); 1074 1075 return printed; 1076 } 1077 1078 static void thread__set_filename_pos(struct thread *thread, const char *bf, 1079 unsigned long ptr) 1080 { 1081 struct thread_trace *ttrace = thread__priv(thread); 1082 1083 ttrace->filename.ptr = ptr; 1084 ttrace->filename.entry_str_pos = bf - ttrace->entry_str; 1085 } 1086 1087 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size, 1088 struct syscall_arg *arg) 1089 { 1090 unsigned long ptr = arg->val; 1091 1092 if (!arg->trace->vfs_getname) 1093 return scnprintf(bf, size, "%#x", ptr); 1094 1095 thread__set_filename_pos(arg->thread, bf, ptr); 1096 return 0; 1097 } 1098 1099 static bool trace__filter_duration(struct trace *trace, double t) 1100 { 1101 return t < (trace->duration_filter * NSEC_PER_MSEC); 1102 } 1103 1104 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) 1105 { 1106 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC; 1107 1108 return fprintf(fp, "%10.3f ", ts); 1109 } 1110 1111 static bool done = false; 1112 static bool interrupted = false; 1113 1114 static void sig_handler(int sig) 1115 { 1116 done = true; 1117 interrupted = sig == SIGINT; 1118 } 1119 1120 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread, 1121 u64 duration, u64 tstamp, FILE *fp) 1122 { 1123 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp); 1124 printed += fprintf_duration(duration, fp); 1125 1126 if (trace->multiple_threads) { 1127 if (trace->show_comm) 1128 printed += fprintf(fp, "%.14s/", thread__comm_str(thread)); 1129 printed += fprintf(fp, "%d ", thread->tid); 1130 } 1131 1132 return printed; 1133 } 1134 1135 static int trace__process_event(struct trace *trace, struct machine *machine, 1136 union perf_event *event, struct perf_sample *sample) 1137 { 1138 int ret = 0; 1139 1140 switch (event->header.type) { 1141 case PERF_RECORD_LOST: 1142 color_fprintf(trace->output, PERF_COLOR_RED, 1143 "LOST %" PRIu64 " events!\n", event->lost.lost); 1144 ret = machine__process_lost_event(machine, event, sample); 1145 break; 1146 default: 1147 ret = machine__process_event(machine, event, sample); 1148 break; 1149 } 1150 1151 return ret; 1152 } 1153 1154 static int trace__tool_process(struct perf_tool *tool, 1155 union perf_event *event, 1156 struct perf_sample *sample, 1157 struct machine *machine) 1158 { 1159 struct trace *trace = container_of(tool, struct trace, tool); 1160 return trace__process_event(trace, machine, event, sample); 1161 } 1162 1163 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) 1164 { 1165 int err = symbol__init(NULL); 1166 1167 if (err) 1168 return err; 1169 1170 trace->host = machine__new_host(); 1171 if (trace->host == NULL) 1172 return -ENOMEM; 1173 1174 if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0) 1175 return -errno; 1176 1177 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, 1178 evlist->threads, trace__tool_process, false, 1179 trace->opts.proc_map_timeout); 1180 if (err) 1181 symbol__exit(); 1182 1183 return err; 1184 } 1185 1186 static int syscall__set_arg_fmts(struct syscall *sc) 1187 { 1188 struct format_field *field; 1189 int idx = 0; 1190 1191 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *)); 1192 if (sc->arg_scnprintf == NULL) 1193 return -1; 1194 1195 if (sc->fmt) 1196 sc->arg_parm = sc->fmt->arg_parm; 1197 1198 for (field = sc->args; field; field = field->next) { 1199 if (sc->fmt && sc->fmt->arg_scnprintf[idx]) 1200 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx]; 1201 else if (field->flags & FIELD_IS_POINTER) 1202 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex; 1203 else if (strcmp(field->type, "pid_t") == 0) 1204 sc->arg_scnprintf[idx] = SCA_PID; 1205 else if (strcmp(field->type, "umode_t") == 0) 1206 sc->arg_scnprintf[idx] = SCA_MODE_T; 1207 ++idx; 1208 } 1209 1210 return 0; 1211 } 1212 1213 static int trace__read_syscall_info(struct trace *trace, int id) 1214 { 1215 char tp_name[128]; 1216 struct syscall *sc; 1217 const char *name = syscalltbl__name(trace->sctbl, id); 1218 1219 if (name == NULL) 1220 return -1; 1221 1222 if (id > trace->syscalls.max) { 1223 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc)); 1224 1225 if (nsyscalls == NULL) 1226 return -1; 1227 1228 if (trace->syscalls.max != -1) { 1229 memset(nsyscalls + trace->syscalls.max + 1, 0, 1230 (id - trace->syscalls.max) * sizeof(*sc)); 1231 } else { 1232 memset(nsyscalls, 0, (id + 1) * sizeof(*sc)); 1233 } 1234 1235 trace->syscalls.table = nsyscalls; 1236 trace->syscalls.max = id; 1237 } 1238 1239 sc = trace->syscalls.table + id; 1240 sc->name = name; 1241 1242 sc->fmt = syscall_fmt__find(sc->name); 1243 1244 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); 1245 sc->tp_format = trace_event__tp_format("syscalls", tp_name); 1246 1247 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) { 1248 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias); 1249 sc->tp_format = trace_event__tp_format("syscalls", tp_name); 1250 } 1251 1252 if (IS_ERR(sc->tp_format)) 1253 return -1; 1254 1255 sc->args = sc->tp_format->format.fields; 1256 sc->nr_args = sc->tp_format->format.nr_fields; 1257 /* 1258 * We need to check and discard the first variable '__syscall_nr' 1259 * or 'nr' that mean the syscall number. It is needless here. 1260 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels. 1261 */ 1262 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) { 1263 sc->args = sc->args->next; 1264 --sc->nr_args; 1265 } 1266 1267 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit"); 1268 1269 return syscall__set_arg_fmts(sc); 1270 } 1271 1272 static int trace__validate_ev_qualifier(struct trace *trace) 1273 { 1274 int err = 0, i; 1275 struct str_node *pos; 1276 1277 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier); 1278 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr * 1279 sizeof(trace->ev_qualifier_ids.entries[0])); 1280 1281 if (trace->ev_qualifier_ids.entries == NULL) { 1282 fputs("Error:\tNot enough memory for allocating events qualifier ids\n", 1283 trace->output); 1284 err = -EINVAL; 1285 goto out; 1286 } 1287 1288 i = 0; 1289 1290 strlist__for_each(pos, trace->ev_qualifier) { 1291 const char *sc = pos->s; 1292 int id = syscalltbl__id(trace->sctbl, sc); 1293 1294 if (id < 0) { 1295 if (err == 0) { 1296 fputs("Error:\tInvalid syscall ", trace->output); 1297 err = -EINVAL; 1298 } else { 1299 fputs(", ", trace->output); 1300 } 1301 1302 fputs(sc, trace->output); 1303 } 1304 1305 trace->ev_qualifier_ids.entries[i++] = id; 1306 } 1307 1308 if (err < 0) { 1309 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'" 1310 "\nHint:\tand: 'man syscalls'\n", trace->output); 1311 zfree(&trace->ev_qualifier_ids.entries); 1312 trace->ev_qualifier_ids.nr = 0; 1313 } 1314 out: 1315 return err; 1316 } 1317 1318 /* 1319 * args is to be interpreted as a series of longs but we need to handle 1320 * 8-byte unaligned accesses. args points to raw_data within the event 1321 * and raw_data is guaranteed to be 8-byte unaligned because it is 1322 * preceded by raw_size which is a u32. So we need to copy args to a temp 1323 * variable to read it. Most notably this avoids extended load instructions 1324 * on unaligned addresses 1325 */ 1326 1327 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, 1328 unsigned char *args, struct trace *trace, 1329 struct thread *thread) 1330 { 1331 size_t printed = 0; 1332 unsigned char *p; 1333 unsigned long val; 1334 1335 if (sc->args != NULL) { 1336 struct format_field *field; 1337 u8 bit = 1; 1338 struct syscall_arg arg = { 1339 .idx = 0, 1340 .mask = 0, 1341 .trace = trace, 1342 .thread = thread, 1343 }; 1344 1345 for (field = sc->args; field; 1346 field = field->next, ++arg.idx, bit <<= 1) { 1347 if (arg.mask & bit) 1348 continue; 1349 1350 /* special care for unaligned accesses */ 1351 p = args + sizeof(unsigned long) * arg.idx; 1352 memcpy(&val, p, sizeof(val)); 1353 1354 /* 1355 * Suppress this argument if its value is zero and 1356 * and we don't have a string associated in an 1357 * strarray for it. 1358 */ 1359 if (val == 0 && 1360 !(sc->arg_scnprintf && 1361 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY && 1362 sc->arg_parm[arg.idx])) 1363 continue; 1364 1365 printed += scnprintf(bf + printed, size - printed, 1366 "%s%s: ", printed ? ", " : "", field->name); 1367 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) { 1368 arg.val = val; 1369 if (sc->arg_parm) 1370 arg.parm = sc->arg_parm[arg.idx]; 1371 printed += sc->arg_scnprintf[arg.idx](bf + printed, 1372 size - printed, &arg); 1373 } else { 1374 printed += scnprintf(bf + printed, size - printed, 1375 "%ld", val); 1376 } 1377 } 1378 } else if (IS_ERR(sc->tp_format)) { 1379 /* 1380 * If we managed to read the tracepoint /format file, then we 1381 * may end up not having any args, like with gettid(), so only 1382 * print the raw args when we didn't manage to read it. 1383 */ 1384 int i = 0; 1385 1386 while (i < 6) { 1387 /* special care for unaligned accesses */ 1388 p = args + sizeof(unsigned long) * i; 1389 memcpy(&val, p, sizeof(val)); 1390 printed += scnprintf(bf + printed, size - printed, 1391 "%sarg%d: %ld", 1392 printed ? ", " : "", i, val); 1393 ++i; 1394 } 1395 } 1396 1397 return printed; 1398 } 1399 1400 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel, 1401 union perf_event *event, 1402 struct perf_sample *sample); 1403 1404 static struct syscall *trace__syscall_info(struct trace *trace, 1405 struct perf_evsel *evsel, int id) 1406 { 1407 1408 if (id < 0) { 1409 1410 /* 1411 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried 1412 * before that, leaving at a higher verbosity level till that is 1413 * explained. Reproduced with plain ftrace with: 1414 * 1415 * echo 1 > /t/events/raw_syscalls/sys_exit/enable 1416 * grep "NR -1 " /t/trace_pipe 1417 * 1418 * After generating some load on the machine. 1419 */ 1420 if (verbose > 1) { 1421 static u64 n; 1422 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n", 1423 id, perf_evsel__name(evsel), ++n); 1424 } 1425 return NULL; 1426 } 1427 1428 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) && 1429 trace__read_syscall_info(trace, id)) 1430 goto out_cant_read; 1431 1432 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL)) 1433 goto out_cant_read; 1434 1435 return &trace->syscalls.table[id]; 1436 1437 out_cant_read: 1438 if (verbose) { 1439 fprintf(trace->output, "Problems reading syscall %d", id); 1440 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL) 1441 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name); 1442 fputs(" information\n", trace->output); 1443 } 1444 return NULL; 1445 } 1446 1447 static void thread__update_stats(struct thread_trace *ttrace, 1448 int id, struct perf_sample *sample) 1449 { 1450 struct int_node *inode; 1451 struct stats *stats; 1452 u64 duration = 0; 1453 1454 inode = intlist__findnew(ttrace->syscall_stats, id); 1455 if (inode == NULL) 1456 return; 1457 1458 stats = inode->priv; 1459 if (stats == NULL) { 1460 stats = malloc(sizeof(struct stats)); 1461 if (stats == NULL) 1462 return; 1463 init_stats(stats); 1464 inode->priv = stats; 1465 } 1466 1467 if (ttrace->entry_time && sample->time > ttrace->entry_time) 1468 duration = sample->time - ttrace->entry_time; 1469 1470 update_stats(stats, duration); 1471 } 1472 1473 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample) 1474 { 1475 struct thread_trace *ttrace; 1476 u64 duration; 1477 size_t printed; 1478 1479 if (trace->current == NULL) 1480 return 0; 1481 1482 ttrace = thread__priv(trace->current); 1483 1484 if (!ttrace->entry_pending) 1485 return 0; 1486 1487 duration = sample->time - ttrace->entry_time; 1488 1489 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output); 1490 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str); 1491 ttrace->entry_pending = false; 1492 1493 return printed; 1494 } 1495 1496 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, 1497 union perf_event *event __maybe_unused, 1498 struct perf_sample *sample) 1499 { 1500 char *msg; 1501 void *args; 1502 size_t printed = 0; 1503 struct thread *thread; 1504 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1; 1505 struct syscall *sc = trace__syscall_info(trace, evsel, id); 1506 struct thread_trace *ttrace; 1507 1508 if (sc == NULL) 1509 return -1; 1510 1511 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1512 ttrace = thread__trace(thread, trace->output); 1513 if (ttrace == NULL) 1514 goto out_put; 1515 1516 args = perf_evsel__sc_tp_ptr(evsel, args, sample); 1517 1518 if (ttrace->entry_str == NULL) { 1519 ttrace->entry_str = malloc(trace__entry_str_size); 1520 if (!ttrace->entry_str) 1521 goto out_put; 1522 } 1523 1524 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) 1525 trace__printf_interrupted_entry(trace, sample); 1526 1527 ttrace->entry_time = sample->time; 1528 msg = ttrace->entry_str; 1529 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name); 1530 1531 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed, 1532 args, trace, thread); 1533 1534 if (sc->is_exit) { 1535 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) { 1536 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output); 1537 fprintf(trace->output, "%-70s\n", ttrace->entry_str); 1538 } 1539 } else { 1540 ttrace->entry_pending = true; 1541 /* See trace__vfs_getname & trace__sys_exit */ 1542 ttrace->filename.pending_open = false; 1543 } 1544 1545 if (trace->current != thread) { 1546 thread__put(trace->current); 1547 trace->current = thread__get(thread); 1548 } 1549 err = 0; 1550 out_put: 1551 thread__put(thread); 1552 return err; 1553 } 1554 1555 static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel, 1556 struct perf_sample *sample, 1557 struct callchain_cursor *cursor) 1558 { 1559 struct addr_location al; 1560 1561 if (machine__resolve(trace->host, &al, sample) < 0 || 1562 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack)) 1563 return -1; 1564 1565 return 0; 1566 } 1567 1568 static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample) 1569 { 1570 /* TODO: user-configurable print_opts */ 1571 const unsigned int print_opts = EVSEL__PRINT_SYM | 1572 EVSEL__PRINT_DSO | 1573 EVSEL__PRINT_UNKNOWN_AS_ADDR; 1574 1575 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output); 1576 } 1577 1578 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, 1579 union perf_event *event __maybe_unused, 1580 struct perf_sample *sample) 1581 { 1582 long ret; 1583 u64 duration = 0; 1584 struct thread *thread; 1585 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0; 1586 struct syscall *sc = trace__syscall_info(trace, evsel, id); 1587 struct thread_trace *ttrace; 1588 1589 if (sc == NULL) 1590 return -1; 1591 1592 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1593 ttrace = thread__trace(thread, trace->output); 1594 if (ttrace == NULL) 1595 goto out_put; 1596 1597 if (trace->summary) 1598 thread__update_stats(ttrace, id, sample); 1599 1600 ret = perf_evsel__sc_tp_uint(evsel, ret, sample); 1601 1602 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) { 1603 trace__set_fd_pathname(thread, ret, ttrace->filename.name); 1604 ttrace->filename.pending_open = false; 1605 ++trace->stats.vfs_getname; 1606 } 1607 1608 ttrace->exit_time = sample->time; 1609 1610 if (ttrace->entry_time) { 1611 duration = sample->time - ttrace->entry_time; 1612 if (trace__filter_duration(trace, duration)) 1613 goto out; 1614 } else if (trace->duration_filter) 1615 goto out; 1616 1617 if (sample->callchain) { 1618 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor); 1619 if (callchain_ret == 0) { 1620 if (callchain_cursor.nr < trace->min_stack) 1621 goto out; 1622 callchain_ret = 1; 1623 } 1624 } 1625 1626 if (trace->summary_only) 1627 goto out; 1628 1629 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output); 1630 1631 if (ttrace->entry_pending) { 1632 fprintf(trace->output, "%-70s", ttrace->entry_str); 1633 } else { 1634 fprintf(trace->output, " ... ["); 1635 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued"); 1636 fprintf(trace->output, "]: %s()", sc->name); 1637 } 1638 1639 if (sc->fmt == NULL) { 1640 signed_print: 1641 fprintf(trace->output, ") = %ld", ret); 1642 } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) { 1643 char bf[STRERR_BUFSIZE]; 1644 const char *emsg = strerror_r(-ret, bf, sizeof(bf)), 1645 *e = audit_errno_to_name(-ret); 1646 1647 fprintf(trace->output, ") = -1 %s %s", e, emsg); 1648 } else if (ret == 0 && sc->fmt->timeout) 1649 fprintf(trace->output, ") = 0 Timeout"); 1650 else if (sc->fmt->hexret) 1651 fprintf(trace->output, ") = %#lx", ret); 1652 else if (sc->fmt->errpid) { 1653 struct thread *child = machine__find_thread(trace->host, ret, ret); 1654 1655 if (child != NULL) { 1656 fprintf(trace->output, ") = %ld", ret); 1657 if (child->comm_set) 1658 fprintf(trace->output, " (%s)", thread__comm_str(child)); 1659 thread__put(child); 1660 } 1661 } else 1662 goto signed_print; 1663 1664 fputc('\n', trace->output); 1665 1666 if (callchain_ret > 0) 1667 trace__fprintf_callchain(trace, sample); 1668 else if (callchain_ret < 0) 1669 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); 1670 out: 1671 ttrace->entry_pending = false; 1672 err = 0; 1673 out_put: 1674 thread__put(thread); 1675 return err; 1676 } 1677 1678 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, 1679 union perf_event *event __maybe_unused, 1680 struct perf_sample *sample) 1681 { 1682 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1683 struct thread_trace *ttrace; 1684 size_t filename_len, entry_str_len, to_move; 1685 ssize_t remaining_space; 1686 char *pos; 1687 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname"); 1688 1689 if (!thread) 1690 goto out; 1691 1692 ttrace = thread__priv(thread); 1693 if (!ttrace) 1694 goto out; 1695 1696 filename_len = strlen(filename); 1697 1698 if (ttrace->filename.namelen < filename_len) { 1699 char *f = realloc(ttrace->filename.name, filename_len + 1); 1700 1701 if (f == NULL) 1702 goto out; 1703 1704 ttrace->filename.namelen = filename_len; 1705 ttrace->filename.name = f; 1706 } 1707 1708 strcpy(ttrace->filename.name, filename); 1709 ttrace->filename.pending_open = true; 1710 1711 if (!ttrace->filename.ptr) 1712 goto out; 1713 1714 entry_str_len = strlen(ttrace->entry_str); 1715 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */ 1716 if (remaining_space <= 0) 1717 goto out; 1718 1719 if (filename_len > (size_t)remaining_space) { 1720 filename += filename_len - remaining_space; 1721 filename_len = remaining_space; 1722 } 1723 1724 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */ 1725 pos = ttrace->entry_str + ttrace->filename.entry_str_pos; 1726 memmove(pos + filename_len, pos, to_move); 1727 memcpy(pos, filename, filename_len); 1728 1729 ttrace->filename.ptr = 0; 1730 ttrace->filename.entry_str_pos = 0; 1731 out: 1732 return 0; 1733 } 1734 1735 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel, 1736 union perf_event *event __maybe_unused, 1737 struct perf_sample *sample) 1738 { 1739 u64 runtime = perf_evsel__intval(evsel, sample, "runtime"); 1740 double runtime_ms = (double)runtime / NSEC_PER_MSEC; 1741 struct thread *thread = machine__findnew_thread(trace->host, 1742 sample->pid, 1743 sample->tid); 1744 struct thread_trace *ttrace = thread__trace(thread, trace->output); 1745 1746 if (ttrace == NULL) 1747 goto out_dump; 1748 1749 ttrace->runtime_ms += runtime_ms; 1750 trace->runtime_ms += runtime_ms; 1751 thread__put(thread); 1752 return 0; 1753 1754 out_dump: 1755 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n", 1756 evsel->name, 1757 perf_evsel__strval(evsel, sample, "comm"), 1758 (pid_t)perf_evsel__intval(evsel, sample, "pid"), 1759 runtime, 1760 perf_evsel__intval(evsel, sample, "vruntime")); 1761 thread__put(thread); 1762 return 0; 1763 } 1764 1765 static void bpf_output__printer(enum binary_printer_ops op, 1766 unsigned int val, void *extra) 1767 { 1768 FILE *output = extra; 1769 unsigned char ch = (unsigned char)val; 1770 1771 switch (op) { 1772 case BINARY_PRINT_CHAR_DATA: 1773 fprintf(output, "%c", isprint(ch) ? ch : '.'); 1774 break; 1775 case BINARY_PRINT_DATA_BEGIN: 1776 case BINARY_PRINT_LINE_BEGIN: 1777 case BINARY_PRINT_ADDR: 1778 case BINARY_PRINT_NUM_DATA: 1779 case BINARY_PRINT_NUM_PAD: 1780 case BINARY_PRINT_SEP: 1781 case BINARY_PRINT_CHAR_PAD: 1782 case BINARY_PRINT_LINE_END: 1783 case BINARY_PRINT_DATA_END: 1784 default: 1785 break; 1786 } 1787 } 1788 1789 static void bpf_output__fprintf(struct trace *trace, 1790 struct perf_sample *sample) 1791 { 1792 print_binary(sample->raw_data, sample->raw_size, 8, 1793 bpf_output__printer, trace->output); 1794 } 1795 1796 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, 1797 union perf_event *event __maybe_unused, 1798 struct perf_sample *sample) 1799 { 1800 int callchain_ret = 0; 1801 1802 if (sample->callchain) { 1803 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor); 1804 if (callchain_ret == 0) { 1805 if (callchain_cursor.nr < trace->min_stack) 1806 goto out; 1807 callchain_ret = 1; 1808 } 1809 } 1810 1811 trace__printf_interrupted_entry(trace, sample); 1812 trace__fprintf_tstamp(trace, sample->time, trace->output); 1813 1814 if (trace->trace_syscalls) 1815 fprintf(trace->output, "( ): "); 1816 1817 fprintf(trace->output, "%s:", evsel->name); 1818 1819 if (perf_evsel__is_bpf_output(evsel)) { 1820 bpf_output__fprintf(trace, sample); 1821 } else if (evsel->tp_format) { 1822 event_format__fprintf(evsel->tp_format, sample->cpu, 1823 sample->raw_data, sample->raw_size, 1824 trace->output); 1825 } 1826 1827 fprintf(trace->output, ")\n"); 1828 1829 if (callchain_ret > 0) 1830 trace__fprintf_callchain(trace, sample); 1831 else if (callchain_ret < 0) 1832 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); 1833 out: 1834 return 0; 1835 } 1836 1837 static void print_location(FILE *f, struct perf_sample *sample, 1838 struct addr_location *al, 1839 bool print_dso, bool print_sym) 1840 { 1841 1842 if ((verbose || print_dso) && al->map) 1843 fprintf(f, "%s@", al->map->dso->long_name); 1844 1845 if ((verbose || print_sym) && al->sym) 1846 fprintf(f, "%s+0x%" PRIx64, al->sym->name, 1847 al->addr - al->sym->start); 1848 else if (al->map) 1849 fprintf(f, "0x%" PRIx64, al->addr); 1850 else 1851 fprintf(f, "0x%" PRIx64, sample->addr); 1852 } 1853 1854 static int trace__pgfault(struct trace *trace, 1855 struct perf_evsel *evsel, 1856 union perf_event *event __maybe_unused, 1857 struct perf_sample *sample) 1858 { 1859 struct thread *thread; 1860 struct addr_location al; 1861 char map_type = 'd'; 1862 struct thread_trace *ttrace; 1863 int err = -1; 1864 int callchain_ret = 0; 1865 1866 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1867 1868 if (sample->callchain) { 1869 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor); 1870 if (callchain_ret == 0) { 1871 if (callchain_cursor.nr < trace->min_stack) 1872 goto out_put; 1873 callchain_ret = 1; 1874 } 1875 } 1876 1877 ttrace = thread__trace(thread, trace->output); 1878 if (ttrace == NULL) 1879 goto out_put; 1880 1881 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ) 1882 ttrace->pfmaj++; 1883 else 1884 ttrace->pfmin++; 1885 1886 if (trace->summary_only) 1887 goto out; 1888 1889 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION, 1890 sample->ip, &al); 1891 1892 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output); 1893 1894 fprintf(trace->output, "%sfault [", 1895 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ? 1896 "maj" : "min"); 1897 1898 print_location(trace->output, sample, &al, false, true); 1899 1900 fprintf(trace->output, "] => "); 1901 1902 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE, 1903 sample->addr, &al); 1904 1905 if (!al.map) { 1906 thread__find_addr_location(thread, sample->cpumode, 1907 MAP__FUNCTION, sample->addr, &al); 1908 1909 if (al.map) 1910 map_type = 'x'; 1911 else 1912 map_type = '?'; 1913 } 1914 1915 print_location(trace->output, sample, &al, true, false); 1916 1917 fprintf(trace->output, " (%c%c)\n", map_type, al.level); 1918 1919 if (callchain_ret > 0) 1920 trace__fprintf_callchain(trace, sample); 1921 else if (callchain_ret < 0) 1922 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); 1923 out: 1924 err = 0; 1925 out_put: 1926 thread__put(thread); 1927 return err; 1928 } 1929 1930 static bool skip_sample(struct trace *trace, struct perf_sample *sample) 1931 { 1932 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) || 1933 (trace->tid_list && intlist__find(trace->tid_list, sample->tid))) 1934 return false; 1935 1936 if (trace->pid_list || trace->tid_list) 1937 return true; 1938 1939 return false; 1940 } 1941 1942 static void trace__set_base_time(struct trace *trace, 1943 struct perf_evsel *evsel, 1944 struct perf_sample *sample) 1945 { 1946 /* 1947 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust 1948 * and don't use sample->time unconditionally, we may end up having 1949 * some other event in the future without PERF_SAMPLE_TIME for good 1950 * reason, i.e. we may not be interested in its timestamps, just in 1951 * it taking place, picking some piece of information when it 1952 * appears in our event stream (vfs_getname comes to mind). 1953 */ 1954 if (trace->base_time == 0 && !trace->full_time && 1955 (evsel->attr.sample_type & PERF_SAMPLE_TIME)) 1956 trace->base_time = sample->time; 1957 } 1958 1959 static int trace__process_sample(struct perf_tool *tool, 1960 union perf_event *event, 1961 struct perf_sample *sample, 1962 struct perf_evsel *evsel, 1963 struct machine *machine __maybe_unused) 1964 { 1965 struct trace *trace = container_of(tool, struct trace, tool); 1966 int err = 0; 1967 1968 tracepoint_handler handler = evsel->handler; 1969 1970 if (skip_sample(trace, sample)) 1971 return 0; 1972 1973 trace__set_base_time(trace, evsel, sample); 1974 1975 if (handler) { 1976 ++trace->nr_events; 1977 handler(trace, evsel, event, sample); 1978 } 1979 1980 return err; 1981 } 1982 1983 static int parse_target_str(struct trace *trace) 1984 { 1985 if (trace->opts.target.pid) { 1986 trace->pid_list = intlist__new(trace->opts.target.pid); 1987 if (trace->pid_list == NULL) { 1988 pr_err("Error parsing process id string\n"); 1989 return -EINVAL; 1990 } 1991 } 1992 1993 if (trace->opts.target.tid) { 1994 trace->tid_list = intlist__new(trace->opts.target.tid); 1995 if (trace->tid_list == NULL) { 1996 pr_err("Error parsing thread id string\n"); 1997 return -EINVAL; 1998 } 1999 } 2000 2001 return 0; 2002 } 2003 2004 static int trace__record(struct trace *trace, int argc, const char **argv) 2005 { 2006 unsigned int rec_argc, i, j; 2007 const char **rec_argv; 2008 const char * const record_args[] = { 2009 "record", 2010 "-R", 2011 "-m", "1024", 2012 "-c", "1", 2013 }; 2014 2015 const char * const sc_args[] = { "-e", }; 2016 unsigned int sc_args_nr = ARRAY_SIZE(sc_args); 2017 const char * const majpf_args[] = { "-e", "major-faults" }; 2018 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args); 2019 const char * const minpf_args[] = { "-e", "minor-faults" }; 2020 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args); 2021 2022 /* +1 is for the event string below */ 2023 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 + 2024 majpf_args_nr + minpf_args_nr + argc; 2025 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 2026 2027 if (rec_argv == NULL) 2028 return -ENOMEM; 2029 2030 j = 0; 2031 for (i = 0; i < ARRAY_SIZE(record_args); i++) 2032 rec_argv[j++] = record_args[i]; 2033 2034 if (trace->trace_syscalls) { 2035 for (i = 0; i < sc_args_nr; i++) 2036 rec_argv[j++] = sc_args[i]; 2037 2038 /* event string may be different for older kernels - e.g., RHEL6 */ 2039 if (is_valid_tracepoint("raw_syscalls:sys_enter")) 2040 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit"; 2041 else if (is_valid_tracepoint("syscalls:sys_enter")) 2042 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit"; 2043 else { 2044 pr_err("Neither raw_syscalls nor syscalls events exist.\n"); 2045 return -1; 2046 } 2047 } 2048 2049 if (trace->trace_pgfaults & TRACE_PFMAJ) 2050 for (i = 0; i < majpf_args_nr; i++) 2051 rec_argv[j++] = majpf_args[i]; 2052 2053 if (trace->trace_pgfaults & TRACE_PFMIN) 2054 for (i = 0; i < minpf_args_nr; i++) 2055 rec_argv[j++] = minpf_args[i]; 2056 2057 for (i = 0; i < (unsigned int)argc; i++) 2058 rec_argv[j++] = argv[i]; 2059 2060 return cmd_record(j, rec_argv, NULL); 2061 } 2062 2063 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); 2064 2065 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist) 2066 { 2067 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname"); 2068 2069 if (IS_ERR(evsel)) 2070 return false; 2071 2072 if (perf_evsel__field(evsel, "pathname") == NULL) { 2073 perf_evsel__delete(evsel); 2074 return false; 2075 } 2076 2077 evsel->handler = trace__vfs_getname; 2078 perf_evlist__add(evlist, evsel); 2079 return true; 2080 } 2081 2082 static struct perf_evsel *perf_evsel__new_pgfault(u64 config) 2083 { 2084 struct perf_evsel *evsel; 2085 struct perf_event_attr attr = { 2086 .type = PERF_TYPE_SOFTWARE, 2087 .mmap_data = 1, 2088 }; 2089 2090 attr.config = config; 2091 attr.sample_period = 1; 2092 2093 event_attr_init(&attr); 2094 2095 evsel = perf_evsel__new(&attr); 2096 if (evsel) 2097 evsel->handler = trace__pgfault; 2098 2099 return evsel; 2100 } 2101 2102 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample) 2103 { 2104 const u32 type = event->header.type; 2105 struct perf_evsel *evsel; 2106 2107 if (type != PERF_RECORD_SAMPLE) { 2108 trace__process_event(trace, trace->host, event, sample); 2109 return; 2110 } 2111 2112 evsel = perf_evlist__id2evsel(trace->evlist, sample->id); 2113 if (evsel == NULL) { 2114 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id); 2115 return; 2116 } 2117 2118 trace__set_base_time(trace, evsel, sample); 2119 2120 if (evsel->attr.type == PERF_TYPE_TRACEPOINT && 2121 sample->raw_data == NULL) { 2122 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", 2123 perf_evsel__name(evsel), sample->tid, 2124 sample->cpu, sample->raw_size); 2125 } else { 2126 tracepoint_handler handler = evsel->handler; 2127 handler(trace, evsel, event, sample); 2128 } 2129 } 2130 2131 static int trace__add_syscall_newtp(struct trace *trace) 2132 { 2133 int ret = -1; 2134 struct perf_evlist *evlist = trace->evlist; 2135 struct perf_evsel *sys_enter, *sys_exit; 2136 2137 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter); 2138 if (sys_enter == NULL) 2139 goto out; 2140 2141 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args)) 2142 goto out_delete_sys_enter; 2143 2144 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit); 2145 if (sys_exit == NULL) 2146 goto out_delete_sys_enter; 2147 2148 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret)) 2149 goto out_delete_sys_exit; 2150 2151 perf_evlist__add(evlist, sys_enter); 2152 perf_evlist__add(evlist, sys_exit); 2153 2154 if (callchain_param.enabled && !trace->kernel_syscallchains) { 2155 /* 2156 * We're interested only in the user space callchain 2157 * leading to the syscall, allow overriding that for 2158 * debugging reasons using --kernel_syscall_callchains 2159 */ 2160 sys_exit->attr.exclude_callchain_kernel = 1; 2161 } 2162 2163 trace->syscalls.events.sys_enter = sys_enter; 2164 trace->syscalls.events.sys_exit = sys_exit; 2165 2166 ret = 0; 2167 out: 2168 return ret; 2169 2170 out_delete_sys_exit: 2171 perf_evsel__delete_priv(sys_exit); 2172 out_delete_sys_enter: 2173 perf_evsel__delete_priv(sys_enter); 2174 goto out; 2175 } 2176 2177 static int trace__set_ev_qualifier_filter(struct trace *trace) 2178 { 2179 int err = -1; 2180 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier, 2181 trace->ev_qualifier_ids.nr, 2182 trace->ev_qualifier_ids.entries); 2183 2184 if (filter == NULL) 2185 goto out_enomem; 2186 2187 if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter)) 2188 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter); 2189 2190 free(filter); 2191 out: 2192 return err; 2193 out_enomem: 2194 errno = ENOMEM; 2195 goto out; 2196 } 2197 2198 static int trace__run(struct trace *trace, int argc, const char **argv) 2199 { 2200 struct perf_evlist *evlist = trace->evlist; 2201 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL; 2202 int err = -1, i; 2203 unsigned long before; 2204 const bool forks = argc > 0; 2205 bool draining = false; 2206 2207 trace->live = true; 2208 2209 if (trace->trace_syscalls && trace__add_syscall_newtp(trace)) 2210 goto out_error_raw_syscalls; 2211 2212 if (trace->trace_syscalls) 2213 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist); 2214 2215 if ((trace->trace_pgfaults & TRACE_PFMAJ)) { 2216 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ); 2217 if (pgfault_maj == NULL) 2218 goto out_error_mem; 2219 perf_evlist__add(evlist, pgfault_maj); 2220 } 2221 2222 if ((trace->trace_pgfaults & TRACE_PFMIN)) { 2223 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN); 2224 if (pgfault_min == NULL) 2225 goto out_error_mem; 2226 perf_evlist__add(evlist, pgfault_min); 2227 } 2228 2229 if (trace->sched && 2230 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", 2231 trace__sched_stat_runtime)) 2232 goto out_error_sched_stat_runtime; 2233 2234 err = perf_evlist__create_maps(evlist, &trace->opts.target); 2235 if (err < 0) { 2236 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n"); 2237 goto out_delete_evlist; 2238 } 2239 2240 err = trace__symbols_init(trace, evlist); 2241 if (err < 0) { 2242 fprintf(trace->output, "Problems initializing symbol libraries!\n"); 2243 goto out_delete_evlist; 2244 } 2245 2246 perf_evlist__config(evlist, &trace->opts, NULL); 2247 2248 if (callchain_param.enabled) { 2249 bool use_identifier = false; 2250 2251 if (trace->syscalls.events.sys_exit) { 2252 perf_evsel__config_callchain(trace->syscalls.events.sys_exit, 2253 &trace->opts, &callchain_param); 2254 use_identifier = true; 2255 } 2256 2257 if (pgfault_maj) { 2258 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param); 2259 use_identifier = true; 2260 } 2261 2262 if (pgfault_min) { 2263 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param); 2264 use_identifier = true; 2265 } 2266 2267 if (use_identifier) { 2268 /* 2269 * Now we have evsels with different sample_ids, use 2270 * PERF_SAMPLE_IDENTIFIER to map from sample to evsel 2271 * from a fixed position in each ring buffer record. 2272 * 2273 * As of this the changeset introducing this comment, this 2274 * isn't strictly needed, as the fields that can come before 2275 * PERF_SAMPLE_ID are all used, but we'll probably disable 2276 * some of those for things like copying the payload of 2277 * pointer syscall arguments, and for vfs_getname we don't 2278 * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this 2279 * here as a warning we need to use PERF_SAMPLE_IDENTIFIER. 2280 */ 2281 perf_evlist__set_sample_bit(evlist, IDENTIFIER); 2282 perf_evlist__reset_sample_bit(evlist, ID); 2283 } 2284 } 2285 2286 signal(SIGCHLD, sig_handler); 2287 signal(SIGINT, sig_handler); 2288 2289 if (forks) { 2290 err = perf_evlist__prepare_workload(evlist, &trace->opts.target, 2291 argv, false, NULL); 2292 if (err < 0) { 2293 fprintf(trace->output, "Couldn't run the workload!\n"); 2294 goto out_delete_evlist; 2295 } 2296 } 2297 2298 err = perf_evlist__open(evlist); 2299 if (err < 0) 2300 goto out_error_open; 2301 2302 err = bpf__apply_obj_config(); 2303 if (err) { 2304 char errbuf[BUFSIZ]; 2305 2306 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); 2307 pr_err("ERROR: Apply config to BPF failed: %s\n", 2308 errbuf); 2309 goto out_error_open; 2310 } 2311 2312 /* 2313 * Better not use !target__has_task() here because we need to cover the 2314 * case where no threads were specified in the command line, but a 2315 * workload was, and in that case we will fill in the thread_map when 2316 * we fork the workload in perf_evlist__prepare_workload. 2317 */ 2318 if (trace->filter_pids.nr > 0) 2319 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries); 2320 else if (thread_map__pid(evlist->threads, 0) == -1) 2321 err = perf_evlist__set_filter_pid(evlist, getpid()); 2322 2323 if (err < 0) 2324 goto out_error_mem; 2325 2326 if (trace->ev_qualifier_ids.nr > 0) { 2327 err = trace__set_ev_qualifier_filter(trace); 2328 if (err < 0) 2329 goto out_errno; 2330 2331 pr_debug("event qualifier tracepoint filter: %s\n", 2332 trace->syscalls.events.sys_exit->filter); 2333 } 2334 2335 err = perf_evlist__apply_filters(evlist, &evsel); 2336 if (err < 0) 2337 goto out_error_apply_filters; 2338 2339 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false); 2340 if (err < 0) 2341 goto out_error_mmap; 2342 2343 if (!target__none(&trace->opts.target)) 2344 perf_evlist__enable(evlist); 2345 2346 if (forks) 2347 perf_evlist__start_workload(evlist); 2348 2349 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 || 2350 evlist->threads->nr > 1 || 2351 perf_evlist__first(evlist)->attr.inherit; 2352 again: 2353 before = trace->nr_events; 2354 2355 for (i = 0; i < evlist->nr_mmaps; i++) { 2356 union perf_event *event; 2357 2358 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { 2359 struct perf_sample sample; 2360 2361 ++trace->nr_events; 2362 2363 err = perf_evlist__parse_sample(evlist, event, &sample); 2364 if (err) { 2365 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err); 2366 goto next_event; 2367 } 2368 2369 trace__handle_event(trace, event, &sample); 2370 next_event: 2371 perf_evlist__mmap_consume(evlist, i); 2372 2373 if (interrupted) 2374 goto out_disable; 2375 2376 if (done && !draining) { 2377 perf_evlist__disable(evlist); 2378 draining = true; 2379 } 2380 } 2381 } 2382 2383 if (trace->nr_events == before) { 2384 int timeout = done ? 100 : -1; 2385 2386 if (!draining && perf_evlist__poll(evlist, timeout) > 0) { 2387 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0) 2388 draining = true; 2389 2390 goto again; 2391 } 2392 } else { 2393 goto again; 2394 } 2395 2396 out_disable: 2397 thread__zput(trace->current); 2398 2399 perf_evlist__disable(evlist); 2400 2401 if (!err) { 2402 if (trace->summary) 2403 trace__fprintf_thread_summary(trace, trace->output); 2404 2405 if (trace->show_tool_stats) { 2406 fprintf(trace->output, "Stats:\n " 2407 " vfs_getname : %" PRIu64 "\n" 2408 " proc_getname: %" PRIu64 "\n", 2409 trace->stats.vfs_getname, 2410 trace->stats.proc_getname); 2411 } 2412 } 2413 2414 out_delete_evlist: 2415 perf_evlist__delete(evlist); 2416 trace->evlist = NULL; 2417 trace->live = false; 2418 return err; 2419 { 2420 char errbuf[BUFSIZ]; 2421 2422 out_error_sched_stat_runtime: 2423 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime"); 2424 goto out_error; 2425 2426 out_error_raw_syscalls: 2427 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)"); 2428 goto out_error; 2429 2430 out_error_mmap: 2431 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf)); 2432 goto out_error; 2433 2434 out_error_open: 2435 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf)); 2436 2437 out_error: 2438 fprintf(trace->output, "%s\n", errbuf); 2439 goto out_delete_evlist; 2440 2441 out_error_apply_filters: 2442 fprintf(trace->output, 2443 "Failed to set filter \"%s\" on event %s with %d (%s)\n", 2444 evsel->filter, perf_evsel__name(evsel), errno, 2445 strerror_r(errno, errbuf, sizeof(errbuf))); 2446 goto out_delete_evlist; 2447 } 2448 out_error_mem: 2449 fprintf(trace->output, "Not enough memory to run!\n"); 2450 goto out_delete_evlist; 2451 2452 out_errno: 2453 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno)); 2454 goto out_delete_evlist; 2455 } 2456 2457 static int trace__replay(struct trace *trace) 2458 { 2459 const struct perf_evsel_str_handler handlers[] = { 2460 { "probe:vfs_getname", trace__vfs_getname, }, 2461 }; 2462 struct perf_data_file file = { 2463 .path = input_name, 2464 .mode = PERF_DATA_MODE_READ, 2465 .force = trace->force, 2466 }; 2467 struct perf_session *session; 2468 struct perf_evsel *evsel; 2469 int err = -1; 2470 2471 trace->tool.sample = trace__process_sample; 2472 trace->tool.mmap = perf_event__process_mmap; 2473 trace->tool.mmap2 = perf_event__process_mmap2; 2474 trace->tool.comm = perf_event__process_comm; 2475 trace->tool.exit = perf_event__process_exit; 2476 trace->tool.fork = perf_event__process_fork; 2477 trace->tool.attr = perf_event__process_attr; 2478 trace->tool.tracing_data = perf_event__process_tracing_data; 2479 trace->tool.build_id = perf_event__process_build_id; 2480 2481 trace->tool.ordered_events = true; 2482 trace->tool.ordering_requires_timestamps = true; 2483 2484 /* add tid to output */ 2485 trace->multiple_threads = true; 2486 2487 session = perf_session__new(&file, false, &trace->tool); 2488 if (session == NULL) 2489 return -1; 2490 2491 if (symbol__init(&session->header.env) < 0) 2492 goto out; 2493 2494 trace->host = &session->machines.host; 2495 2496 err = perf_session__set_tracepoints_handlers(session, handlers); 2497 if (err) 2498 goto out; 2499 2500 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2501 "raw_syscalls:sys_enter"); 2502 /* older kernels have syscalls tp versus raw_syscalls */ 2503 if (evsel == NULL) 2504 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2505 "syscalls:sys_enter"); 2506 2507 if (evsel && 2508 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 || 2509 perf_evsel__init_sc_tp_ptr_field(evsel, args))) { 2510 pr_err("Error during initialize raw_syscalls:sys_enter event\n"); 2511 goto out; 2512 } 2513 2514 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2515 "raw_syscalls:sys_exit"); 2516 if (evsel == NULL) 2517 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2518 "syscalls:sys_exit"); 2519 if (evsel && 2520 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 || 2521 perf_evsel__init_sc_tp_uint_field(evsel, ret))) { 2522 pr_err("Error during initialize raw_syscalls:sys_exit event\n"); 2523 goto out; 2524 } 2525 2526 evlist__for_each(session->evlist, evsel) { 2527 if (evsel->attr.type == PERF_TYPE_SOFTWARE && 2528 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ || 2529 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN || 2530 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS)) 2531 evsel->handler = trace__pgfault; 2532 } 2533 2534 err = parse_target_str(trace); 2535 if (err != 0) 2536 goto out; 2537 2538 setup_pager(); 2539 2540 err = perf_session__process_events(session); 2541 if (err) 2542 pr_err("Failed to process events, error %d", err); 2543 2544 else if (trace->summary) 2545 trace__fprintf_thread_summary(trace, trace->output); 2546 2547 out: 2548 perf_session__delete(session); 2549 2550 return err; 2551 } 2552 2553 static size_t trace__fprintf_threads_header(FILE *fp) 2554 { 2555 size_t printed; 2556 2557 printed = fprintf(fp, "\n Summary of events:\n\n"); 2558 2559 return printed; 2560 } 2561 2562 DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs, 2563 struct stats *stats; 2564 double msecs; 2565 int syscall; 2566 ) 2567 { 2568 struct int_node *source = rb_entry(nd, struct int_node, rb_node); 2569 struct stats *stats = source->priv; 2570 2571 entry->syscall = source->i; 2572 entry->stats = stats; 2573 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0; 2574 } 2575 2576 static size_t thread__dump_stats(struct thread_trace *ttrace, 2577 struct trace *trace, FILE *fp) 2578 { 2579 size_t printed = 0; 2580 struct syscall *sc; 2581 struct rb_node *nd; 2582 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats); 2583 2584 if (syscall_stats == NULL) 2585 return 0; 2586 2587 printed += fprintf(fp, "\n"); 2588 2589 printed += fprintf(fp, " syscall calls total min avg max stddev\n"); 2590 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); 2591 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n"); 2592 2593 resort_rb__for_each(nd, syscall_stats) { 2594 struct stats *stats = syscall_stats_entry->stats; 2595 if (stats) { 2596 double min = (double)(stats->min) / NSEC_PER_MSEC; 2597 double max = (double)(stats->max) / NSEC_PER_MSEC; 2598 double avg = avg_stats(stats); 2599 double pct; 2600 u64 n = (u64) stats->n; 2601 2602 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0; 2603 avg /= NSEC_PER_MSEC; 2604 2605 sc = &trace->syscalls.table[syscall_stats_entry->syscall]; 2606 printed += fprintf(fp, " %-15s", sc->name); 2607 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f", 2608 n, syscall_stats_entry->msecs, min, avg); 2609 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct); 2610 } 2611 } 2612 2613 resort_rb__delete(syscall_stats); 2614 printed += fprintf(fp, "\n\n"); 2615 2616 return printed; 2617 } 2618 2619 static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace) 2620 { 2621 size_t printed = 0; 2622 struct thread_trace *ttrace = thread__priv(thread); 2623 double ratio; 2624 2625 if (ttrace == NULL) 2626 return 0; 2627 2628 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0; 2629 2630 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid); 2631 printed += fprintf(fp, "%lu events, ", ttrace->nr_events); 2632 printed += fprintf(fp, "%.1f%%", ratio); 2633 if (ttrace->pfmaj) 2634 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj); 2635 if (ttrace->pfmin) 2636 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin); 2637 if (trace->sched) 2638 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms); 2639 else if (fputc('\n', fp) != EOF) 2640 ++printed; 2641 2642 printed += thread__dump_stats(ttrace, trace, fp); 2643 2644 return printed; 2645 } 2646 2647 static unsigned long thread__nr_events(struct thread_trace *ttrace) 2648 { 2649 return ttrace ? ttrace->nr_events : 0; 2650 } 2651 2652 DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)), 2653 struct thread *thread; 2654 ) 2655 { 2656 entry->thread = rb_entry(nd, struct thread, rb_node); 2657 } 2658 2659 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp) 2660 { 2661 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host); 2662 size_t printed = trace__fprintf_threads_header(fp); 2663 struct rb_node *nd; 2664 2665 if (threads == NULL) { 2666 fprintf(fp, "%s", "Error sorting output by nr_events!\n"); 2667 return 0; 2668 } 2669 2670 resort_rb__for_each(nd, threads) 2671 printed += trace__fprintf_thread(fp, threads_entry->thread, trace); 2672 2673 resort_rb__delete(threads); 2674 2675 return printed; 2676 } 2677 2678 static int trace__set_duration(const struct option *opt, const char *str, 2679 int unset __maybe_unused) 2680 { 2681 struct trace *trace = opt->value; 2682 2683 trace->duration_filter = atof(str); 2684 return 0; 2685 } 2686 2687 static int trace__set_filter_pids(const struct option *opt, const char *str, 2688 int unset __maybe_unused) 2689 { 2690 int ret = -1; 2691 size_t i; 2692 struct trace *trace = opt->value; 2693 /* 2694 * FIXME: introduce a intarray class, plain parse csv and create a 2695 * { int nr, int entries[] } struct... 2696 */ 2697 struct intlist *list = intlist__new(str); 2698 2699 if (list == NULL) 2700 return -1; 2701 2702 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1; 2703 trace->filter_pids.entries = calloc(i, sizeof(pid_t)); 2704 2705 if (trace->filter_pids.entries == NULL) 2706 goto out; 2707 2708 trace->filter_pids.entries[0] = getpid(); 2709 2710 for (i = 1; i < trace->filter_pids.nr; ++i) 2711 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i; 2712 2713 intlist__delete(list); 2714 ret = 0; 2715 out: 2716 return ret; 2717 } 2718 2719 static int trace__open_output(struct trace *trace, const char *filename) 2720 { 2721 struct stat st; 2722 2723 if (!stat(filename, &st) && st.st_size) { 2724 char oldname[PATH_MAX]; 2725 2726 scnprintf(oldname, sizeof(oldname), "%s.old", filename); 2727 unlink(oldname); 2728 rename(filename, oldname); 2729 } 2730 2731 trace->output = fopen(filename, "w"); 2732 2733 return trace->output == NULL ? -errno : 0; 2734 } 2735 2736 static int parse_pagefaults(const struct option *opt, const char *str, 2737 int unset __maybe_unused) 2738 { 2739 int *trace_pgfaults = opt->value; 2740 2741 if (strcmp(str, "all") == 0) 2742 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN; 2743 else if (strcmp(str, "maj") == 0) 2744 *trace_pgfaults |= TRACE_PFMAJ; 2745 else if (strcmp(str, "min") == 0) 2746 *trace_pgfaults |= TRACE_PFMIN; 2747 else 2748 return -1; 2749 2750 return 0; 2751 } 2752 2753 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler) 2754 { 2755 struct perf_evsel *evsel; 2756 2757 evlist__for_each(evlist, evsel) 2758 evsel->handler = handler; 2759 } 2760 2761 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) 2762 { 2763 const char *trace_usage[] = { 2764 "perf trace [<options>] [<command>]", 2765 "perf trace [<options>] -- <command> [<options>]", 2766 "perf trace record [<options>] [<command>]", 2767 "perf trace record [<options>] -- <command> [<options>]", 2768 NULL 2769 }; 2770 struct trace trace = { 2771 .syscalls = { 2772 . max = -1, 2773 }, 2774 .opts = { 2775 .target = { 2776 .uid = UINT_MAX, 2777 .uses_mmap = true, 2778 }, 2779 .user_freq = UINT_MAX, 2780 .user_interval = ULLONG_MAX, 2781 .no_buffering = true, 2782 .mmap_pages = UINT_MAX, 2783 .proc_map_timeout = 500, 2784 }, 2785 .output = stderr, 2786 .show_comm = true, 2787 .trace_syscalls = true, 2788 .kernel_syscallchains = false, 2789 .max_stack = UINT_MAX, 2790 }; 2791 const char *output_name = NULL; 2792 const char *ev_qualifier_str = NULL; 2793 const struct option trace_options[] = { 2794 OPT_CALLBACK(0, "event", &trace.evlist, "event", 2795 "event selector. use 'perf list' to list available events", 2796 parse_events_option), 2797 OPT_BOOLEAN(0, "comm", &trace.show_comm, 2798 "show the thread COMM next to its id"), 2799 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"), 2800 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"), 2801 OPT_STRING('o', "output", &output_name, "file", "output file name"), 2802 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"), 2803 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid", 2804 "trace events on existing process id"), 2805 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid", 2806 "trace events on existing thread id"), 2807 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids", 2808 "pids to filter (by the kernel)", trace__set_filter_pids), 2809 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide, 2810 "system-wide collection from all CPUs"), 2811 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu", 2812 "list of cpus to monitor"), 2813 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit, 2814 "child tasks do not inherit counters"), 2815 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages", 2816 "number of mmap data pages", 2817 perf_evlist__parse_mmap_pages), 2818 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user", 2819 "user to profile"), 2820 OPT_CALLBACK(0, "duration", &trace, "float", 2821 "show only events with duration > N.M ms", 2822 trace__set_duration), 2823 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"), 2824 OPT_INCR('v', "verbose", &verbose, "be more verbose"), 2825 OPT_BOOLEAN('T', "time", &trace.full_time, 2826 "Show full timestamp, not time relative to first start"), 2827 OPT_BOOLEAN('s', "summary", &trace.summary_only, 2828 "Show only syscall summary with statistics"), 2829 OPT_BOOLEAN('S', "with-summary", &trace.summary, 2830 "Show all syscalls and summary with statistics"), 2831 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min", 2832 "Trace pagefaults", parse_pagefaults, "maj"), 2833 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"), 2834 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"), 2835 OPT_CALLBACK(0, "call-graph", &trace.opts, 2836 "record_mode[,record_size]", record_callchain_help, 2837 &record_parse_callchain_opt), 2838 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains, 2839 "Show the kernel callchains on the syscall exit path"), 2840 OPT_UINTEGER(0, "min-stack", &trace.min_stack, 2841 "Set the minimum stack depth when parsing the callchain, " 2842 "anything below the specified depth will be ignored."), 2843 OPT_UINTEGER(0, "max-stack", &trace.max_stack, 2844 "Set the maximum stack depth when parsing the callchain, " 2845 "anything beyond the specified depth will be ignored. " 2846 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)), 2847 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout, 2848 "per thread proc mmap processing timeout in ms"), 2849 OPT_END() 2850 }; 2851 bool __maybe_unused max_stack_user_set = true; 2852 bool mmap_pages_user_set = true; 2853 const char * const trace_subcommands[] = { "record", NULL }; 2854 int err; 2855 char bf[BUFSIZ]; 2856 2857 signal(SIGSEGV, sighandler_dump_stack); 2858 signal(SIGFPE, sighandler_dump_stack); 2859 2860 trace.evlist = perf_evlist__new(); 2861 trace.sctbl = syscalltbl__new(); 2862 2863 if (trace.evlist == NULL || trace.sctbl == NULL) { 2864 pr_err("Not enough memory to run!\n"); 2865 err = -ENOMEM; 2866 goto out; 2867 } 2868 2869 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands, 2870 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION); 2871 2872 err = bpf__setup_stdout(trace.evlist); 2873 if (err) { 2874 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf)); 2875 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf); 2876 goto out; 2877 } 2878 2879 err = -1; 2880 2881 if (trace.trace_pgfaults) { 2882 trace.opts.sample_address = true; 2883 trace.opts.sample_time = true; 2884 } 2885 2886 if (trace.opts.mmap_pages == UINT_MAX) 2887 mmap_pages_user_set = false; 2888 2889 if (trace.max_stack == UINT_MAX) { 2890 trace.max_stack = sysctl_perf_event_max_stack; 2891 max_stack_user_set = false; 2892 } 2893 2894 #ifdef HAVE_DWARF_UNWIND_SUPPORT 2895 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled) 2896 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false); 2897 #endif 2898 2899 if (callchain_param.enabled) { 2900 if (!mmap_pages_user_set && geteuid() == 0) 2901 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4; 2902 2903 symbol_conf.use_callchain = true; 2904 } 2905 2906 if (trace.evlist->nr_entries > 0) 2907 evlist__set_evsel_handler(trace.evlist, trace__event_handler); 2908 2909 if ((argc >= 1) && (strcmp(argv[0], "record") == 0)) 2910 return trace__record(&trace, argc-1, &argv[1]); 2911 2912 /* summary_only implies summary option, but don't overwrite summary if set */ 2913 if (trace.summary_only) 2914 trace.summary = trace.summary_only; 2915 2916 if (!trace.trace_syscalls && !trace.trace_pgfaults && 2917 trace.evlist->nr_entries == 0 /* Was --events used? */) { 2918 pr_err("Please specify something to trace.\n"); 2919 return -1; 2920 } 2921 2922 if (!trace.trace_syscalls && ev_qualifier_str) { 2923 pr_err("The -e option can't be used with --no-syscalls.\n"); 2924 goto out; 2925 } 2926 2927 if (output_name != NULL) { 2928 err = trace__open_output(&trace, output_name); 2929 if (err < 0) { 2930 perror("failed to create output file"); 2931 goto out; 2932 } 2933 } 2934 2935 trace.open_id = syscalltbl__id(trace.sctbl, "open"); 2936 2937 if (ev_qualifier_str != NULL) { 2938 const char *s = ev_qualifier_str; 2939 struct strlist_config slist_config = { 2940 .dirname = system_path(STRACE_GROUPS_DIR), 2941 }; 2942 2943 trace.not_ev_qualifier = *s == '!'; 2944 if (trace.not_ev_qualifier) 2945 ++s; 2946 trace.ev_qualifier = strlist__new(s, &slist_config); 2947 if (trace.ev_qualifier == NULL) { 2948 fputs("Not enough memory to parse event qualifier", 2949 trace.output); 2950 err = -ENOMEM; 2951 goto out_close; 2952 } 2953 2954 err = trace__validate_ev_qualifier(&trace); 2955 if (err) 2956 goto out_close; 2957 } 2958 2959 err = target__validate(&trace.opts.target); 2960 if (err) { 2961 target__strerror(&trace.opts.target, err, bf, sizeof(bf)); 2962 fprintf(trace.output, "%s", bf); 2963 goto out_close; 2964 } 2965 2966 err = target__parse_uid(&trace.opts.target); 2967 if (err) { 2968 target__strerror(&trace.opts.target, err, bf, sizeof(bf)); 2969 fprintf(trace.output, "%s", bf); 2970 goto out_close; 2971 } 2972 2973 if (!argc && target__none(&trace.opts.target)) 2974 trace.opts.target.system_wide = true; 2975 2976 if (input_name) 2977 err = trace__replay(&trace); 2978 else 2979 err = trace__run(&trace, argc, argv); 2980 2981 out_close: 2982 if (output_name != NULL) 2983 fclose(trace.output); 2984 out: 2985 return err; 2986 } 2987