1 /* 2 * builtin-trace.c 3 * 4 * Builtin 'trace' command: 5 * 6 * Display a continuously updated trace of any workload, CPU, specific PID, 7 * system wide, etc. Default format is loosely strace like, but any other 8 * event may be specified using --event. 9 * 10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 11 * 12 * Initially based on the 'trace' prototype by Thomas Gleixner: 13 * 14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'") 15 * 16 * Released under the GPL v2. (and only v2, not any later version) 17 */ 18 19 #include <traceevent/event-parse.h> 20 #include <api/fs/tracing_path.h> 21 #include "builtin.h" 22 #include "util/cgroup.h" 23 #include "util/color.h" 24 #include "util/debug.h" 25 #include "util/env.h" 26 #include "util/event.h" 27 #include "util/evlist.h" 28 #include <subcmd/exec-cmd.h> 29 #include "util/machine.h" 30 #include "util/path.h" 31 #include "util/session.h" 32 #include "util/thread.h" 33 #include <subcmd/parse-options.h> 34 #include "util/strlist.h" 35 #include "util/intlist.h" 36 #include "util/thread_map.h" 37 #include "util/stat.h" 38 #include "trace/beauty/beauty.h" 39 #include "trace-event.h" 40 #include "util/parse-events.h" 41 #include "util/bpf-loader.h" 42 #include "callchain.h" 43 #include "print_binary.h" 44 #include "string2.h" 45 #include "syscalltbl.h" 46 #include "rb_resort.h" 47 48 #include <errno.h> 49 #include <inttypes.h> 50 #include <poll.h> 51 #include <signal.h> 52 #include <stdlib.h> 53 #include <string.h> 54 #include <linux/err.h> 55 #include <linux/filter.h> 56 #include <linux/kernel.h> 57 #include <linux/random.h> 58 #include <linux/stringify.h> 59 #include <linux/time64.h> 60 #include <fcntl.h> 61 62 #include "sane_ctype.h" 63 64 #ifndef O_CLOEXEC 65 # define O_CLOEXEC 02000000 66 #endif 67 68 #ifndef F_LINUX_SPECIFIC_BASE 69 # define F_LINUX_SPECIFIC_BASE 1024 70 #endif 71 72 struct trace { 73 struct perf_tool tool; 74 struct syscalltbl *sctbl; 75 struct { 76 int max; 77 struct syscall *table; 78 struct { 79 struct perf_evsel *sys_enter, 80 *sys_exit; 81 } events; 82 } syscalls; 83 struct record_opts opts; 84 struct perf_evlist *evlist; 85 struct machine *host; 86 struct thread *current; 87 struct cgroup *cgroup; 88 u64 base_time; 89 FILE *output; 90 unsigned long nr_events; 91 struct strlist *ev_qualifier; 92 struct { 93 size_t nr; 94 int *entries; 95 } ev_qualifier_ids; 96 struct { 97 size_t nr; 98 pid_t *entries; 99 } filter_pids; 100 double duration_filter; 101 double runtime_ms; 102 struct { 103 u64 vfs_getname, 104 proc_getname; 105 } stats; 106 unsigned int max_stack; 107 unsigned int min_stack; 108 bool not_ev_qualifier; 109 bool live; 110 bool full_time; 111 bool sched; 112 bool multiple_threads; 113 bool summary; 114 bool summary_only; 115 bool show_comm; 116 bool print_sample; 117 bool show_tool_stats; 118 bool trace_syscalls; 119 bool kernel_syscallchains; 120 bool force; 121 bool vfs_getname; 122 int trace_pgfaults; 123 int open_id; 124 }; 125 126 struct tp_field { 127 int offset; 128 union { 129 u64 (*integer)(struct tp_field *field, struct perf_sample *sample); 130 void *(*pointer)(struct tp_field *field, struct perf_sample *sample); 131 }; 132 }; 133 134 #define TP_UINT_FIELD(bits) \ 135 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \ 136 { \ 137 u##bits value; \ 138 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \ 139 return value; \ 140 } 141 142 TP_UINT_FIELD(8); 143 TP_UINT_FIELD(16); 144 TP_UINT_FIELD(32); 145 TP_UINT_FIELD(64); 146 147 #define TP_UINT_FIELD__SWAPPED(bits) \ 148 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \ 149 { \ 150 u##bits value; \ 151 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \ 152 return bswap_##bits(value);\ 153 } 154 155 TP_UINT_FIELD__SWAPPED(16); 156 TP_UINT_FIELD__SWAPPED(32); 157 TP_UINT_FIELD__SWAPPED(64); 158 159 static int tp_field__init_uint(struct tp_field *field, 160 struct format_field *format_field, 161 bool needs_swap) 162 { 163 field->offset = format_field->offset; 164 165 switch (format_field->size) { 166 case 1: 167 field->integer = tp_field__u8; 168 break; 169 case 2: 170 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16; 171 break; 172 case 4: 173 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32; 174 break; 175 case 8: 176 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64; 177 break; 178 default: 179 return -1; 180 } 181 182 return 0; 183 } 184 185 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample) 186 { 187 return sample->raw_data + field->offset; 188 } 189 190 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field) 191 { 192 field->offset = format_field->offset; 193 field->pointer = tp_field__ptr; 194 return 0; 195 } 196 197 struct syscall_tp { 198 struct tp_field id; 199 union { 200 struct tp_field args, ret; 201 }; 202 }; 203 204 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel, 205 struct tp_field *field, 206 const char *name) 207 { 208 struct format_field *format_field = perf_evsel__field(evsel, name); 209 210 if (format_field == NULL) 211 return -1; 212 213 return tp_field__init_uint(field, format_field, evsel->needs_swap); 214 } 215 216 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \ 217 ({ struct syscall_tp *sc = evsel->priv;\ 218 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); }) 219 220 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel, 221 struct tp_field *field, 222 const char *name) 223 { 224 struct format_field *format_field = perf_evsel__field(evsel, name); 225 226 if (format_field == NULL) 227 return -1; 228 229 return tp_field__init_ptr(field, format_field); 230 } 231 232 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \ 233 ({ struct syscall_tp *sc = evsel->priv;\ 234 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); }) 235 236 static void perf_evsel__delete_priv(struct perf_evsel *evsel) 237 { 238 zfree(&evsel->priv); 239 perf_evsel__delete(evsel); 240 } 241 242 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler) 243 { 244 evsel->priv = malloc(sizeof(struct syscall_tp)); 245 if (evsel->priv != NULL) { 246 if (perf_evsel__init_sc_tp_uint_field(evsel, id)) 247 goto out_delete; 248 249 evsel->handler = handler; 250 return 0; 251 } 252 253 return -ENOMEM; 254 255 out_delete: 256 zfree(&evsel->priv); 257 return -ENOENT; 258 } 259 260 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler) 261 { 262 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction); 263 264 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */ 265 if (IS_ERR(evsel)) 266 evsel = perf_evsel__newtp("syscalls", direction); 267 268 if (IS_ERR(evsel)) 269 return NULL; 270 271 if (perf_evsel__init_syscall_tp(evsel, handler)) 272 goto out_delete; 273 274 return evsel; 275 276 out_delete: 277 perf_evsel__delete_priv(evsel); 278 return NULL; 279 } 280 281 #define perf_evsel__sc_tp_uint(evsel, name, sample) \ 282 ({ struct syscall_tp *fields = evsel->priv; \ 283 fields->name.integer(&fields->name, sample); }) 284 285 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \ 286 ({ struct syscall_tp *fields = evsel->priv; \ 287 fields->name.pointer(&fields->name, sample); }) 288 289 size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val) 290 { 291 int idx = val - sa->offset; 292 293 if (idx < 0 || idx >= sa->nr_entries) 294 return scnprintf(bf, size, intfmt, val); 295 296 return scnprintf(bf, size, "%s", sa->entries[idx]); 297 } 298 299 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size, 300 const char *intfmt, 301 struct syscall_arg *arg) 302 { 303 return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->val); 304 } 305 306 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size, 307 struct syscall_arg *arg) 308 { 309 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg); 310 } 311 312 #define SCA_STRARRAY syscall_arg__scnprintf_strarray 313 314 struct strarrays { 315 int nr_entries; 316 struct strarray **entries; 317 }; 318 319 #define DEFINE_STRARRAYS(array) struct strarrays strarrays__##array = { \ 320 .nr_entries = ARRAY_SIZE(array), \ 321 .entries = array, \ 322 } 323 324 size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size, 325 struct syscall_arg *arg) 326 { 327 struct strarrays *sas = arg->parm; 328 int i; 329 330 for (i = 0; i < sas->nr_entries; ++i) { 331 struct strarray *sa = sas->entries[i]; 332 int idx = arg->val - sa->offset; 333 334 if (idx >= 0 && idx < sa->nr_entries) { 335 if (sa->entries[idx] == NULL) 336 break; 337 return scnprintf(bf, size, "%s", sa->entries[idx]); 338 } 339 } 340 341 return scnprintf(bf, size, "%d", arg->val); 342 } 343 344 #ifndef AT_FDCWD 345 #define AT_FDCWD -100 346 #endif 347 348 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size, 349 struct syscall_arg *arg) 350 { 351 int fd = arg->val; 352 353 if (fd == AT_FDCWD) 354 return scnprintf(bf, size, "CWD"); 355 356 return syscall_arg__scnprintf_fd(bf, size, arg); 357 } 358 359 #define SCA_FDAT syscall_arg__scnprintf_fd_at 360 361 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, 362 struct syscall_arg *arg); 363 364 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd 365 366 size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg) 367 { 368 return scnprintf(bf, size, "%#lx", arg->val); 369 } 370 371 size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg) 372 { 373 return scnprintf(bf, size, "%d", arg->val); 374 } 375 376 size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg) 377 { 378 return scnprintf(bf, size, "%ld", arg->val); 379 } 380 381 static const char *bpf_cmd[] = { 382 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM", 383 "MAP_GET_NEXT_KEY", "PROG_LOAD", 384 }; 385 static DEFINE_STRARRAY(bpf_cmd); 386 387 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", }; 388 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1); 389 390 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", }; 391 static DEFINE_STRARRAY(itimers); 392 393 static const char *keyctl_options[] = { 394 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN", 395 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ", 396 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT", 397 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT", 398 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT", 399 }; 400 static DEFINE_STRARRAY(keyctl_options); 401 402 static const char *whences[] = { "SET", "CUR", "END", 403 #ifdef SEEK_DATA 404 "DATA", 405 #endif 406 #ifdef SEEK_HOLE 407 "HOLE", 408 #endif 409 }; 410 static DEFINE_STRARRAY(whences); 411 412 static const char *fcntl_cmds[] = { 413 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK", 414 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "GETLK64", 415 "SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX", 416 "GETOWNER_UIDS", 417 }; 418 static DEFINE_STRARRAY(fcntl_cmds); 419 420 static const char *fcntl_linux_specific_cmds[] = { 421 "SETLEASE", "GETLEASE", "NOTIFY", [5] = "CANCELLK", "DUPFD_CLOEXEC", 422 "SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS", 423 "GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT", 424 }; 425 426 static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, F_LINUX_SPECIFIC_BASE); 427 428 static struct strarray *fcntl_cmds_arrays[] = { 429 &strarray__fcntl_cmds, 430 &strarray__fcntl_linux_specific_cmds, 431 }; 432 433 static DEFINE_STRARRAYS(fcntl_cmds_arrays); 434 435 static const char *rlimit_resources[] = { 436 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE", 437 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO", 438 "RTTIME", 439 }; 440 static DEFINE_STRARRAY(rlimit_resources); 441 442 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", }; 443 static DEFINE_STRARRAY(sighow); 444 445 static const char *clockid[] = { 446 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID", 447 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME", 448 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI" 449 }; 450 static DEFINE_STRARRAY(clockid); 451 452 static const char *socket_families[] = { 453 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM", 454 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI", 455 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC", 456 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC", 457 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF", 458 "ALG", "NFC", "VSOCK", 459 }; 460 static DEFINE_STRARRAY(socket_families); 461 462 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size, 463 struct syscall_arg *arg) 464 { 465 size_t printed = 0; 466 int mode = arg->val; 467 468 if (mode == F_OK) /* 0 */ 469 return scnprintf(bf, size, "F"); 470 #define P_MODE(n) \ 471 if (mode & n##_OK) { \ 472 printed += scnprintf(bf + printed, size - printed, "%s", #n); \ 473 mode &= ~n##_OK; \ 474 } 475 476 P_MODE(R); 477 P_MODE(W); 478 P_MODE(X); 479 #undef P_MODE 480 481 if (mode) 482 printed += scnprintf(bf + printed, size - printed, "|%#x", mode); 483 484 return printed; 485 } 486 487 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode 488 489 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size, 490 struct syscall_arg *arg); 491 492 #define SCA_FILENAME syscall_arg__scnprintf_filename 493 494 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size, 495 struct syscall_arg *arg) 496 { 497 int printed = 0, flags = arg->val; 498 499 #define P_FLAG(n) \ 500 if (flags & O_##n) { \ 501 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 502 flags &= ~O_##n; \ 503 } 504 505 P_FLAG(CLOEXEC); 506 P_FLAG(NONBLOCK); 507 #undef P_FLAG 508 509 if (flags) 510 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 511 512 return printed; 513 } 514 515 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags 516 517 #ifndef GRND_NONBLOCK 518 #define GRND_NONBLOCK 0x0001 519 #endif 520 #ifndef GRND_RANDOM 521 #define GRND_RANDOM 0x0002 522 #endif 523 524 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size, 525 struct syscall_arg *arg) 526 { 527 int printed = 0, flags = arg->val; 528 529 #define P_FLAG(n) \ 530 if (flags & GRND_##n) { \ 531 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 532 flags &= ~GRND_##n; \ 533 } 534 535 P_FLAG(RANDOM); 536 P_FLAG(NONBLOCK); 537 #undef P_FLAG 538 539 if (flags) 540 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 541 542 return printed; 543 } 544 545 #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags 546 547 #define STRARRAY(name, array) \ 548 { .scnprintf = SCA_STRARRAY, \ 549 .parm = &strarray__##array, } 550 551 #include "trace/beauty/arch_errno_names.c" 552 #include "trace/beauty/eventfd.c" 553 #include "trace/beauty/futex_op.c" 554 #include "trace/beauty/futex_val3.c" 555 #include "trace/beauty/mmap.c" 556 #include "trace/beauty/mode_t.c" 557 #include "trace/beauty/msg_flags.c" 558 #include "trace/beauty/open_flags.c" 559 #include "trace/beauty/perf_event_open.c" 560 #include "trace/beauty/pid.c" 561 #include "trace/beauty/sched_policy.c" 562 #include "trace/beauty/seccomp.c" 563 #include "trace/beauty/signum.c" 564 #include "trace/beauty/socket_type.c" 565 #include "trace/beauty/waitid_options.c" 566 567 struct syscall_arg_fmt { 568 size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg); 569 void *parm; 570 const char *name; 571 bool show_zero; 572 }; 573 574 static struct syscall_fmt { 575 const char *name; 576 const char *alias; 577 struct syscall_arg_fmt arg[6]; 578 u8 nr_args; 579 bool errpid; 580 bool timeout; 581 bool hexret; 582 } syscall_fmts[] = { 583 { .name = "access", 584 .arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, }, 585 { .name = "bpf", 586 .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, }, 587 { .name = "brk", .hexret = true, 588 .arg = { [0] = { .scnprintf = SCA_HEX, /* brk */ }, }, }, 589 { .name = "clock_gettime", 590 .arg = { [0] = STRARRAY(clk_id, clockid), }, }, 591 { .name = "clone", .errpid = true, .nr_args = 5, 592 .arg = { [0] = { .name = "flags", .scnprintf = SCA_CLONE_FLAGS, }, 593 [1] = { .name = "child_stack", .scnprintf = SCA_HEX, }, 594 [2] = { .name = "parent_tidptr", .scnprintf = SCA_HEX, }, 595 [3] = { .name = "child_tidptr", .scnprintf = SCA_HEX, }, 596 [4] = { .name = "tls", .scnprintf = SCA_HEX, }, }, }, 597 { .name = "close", 598 .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, }, 599 { .name = "epoll_ctl", 600 .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, }, 601 { .name = "eventfd2", 602 .arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, /* flags */ }, }, }, 603 { .name = "fchmodat", 604 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, 605 { .name = "fchownat", 606 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, 607 { .name = "fcntl", 608 .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD, /* cmd */ 609 .parm = &strarrays__fcntl_cmds_arrays, 610 .show_zero = true, }, 611 [2] = { .scnprintf = SCA_FCNTL_ARG, /* arg */ }, }, }, 612 { .name = "flock", 613 .arg = { [1] = { .scnprintf = SCA_FLOCK, /* cmd */ }, }, }, 614 { .name = "fstat", .alias = "newfstat", }, 615 { .name = "fstatat", .alias = "newfstatat", }, 616 { .name = "futex", 617 .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, /* op */ }, 618 [5] = { .scnprintf = SCA_FUTEX_VAL3, /* val3 */ }, }, }, 619 { .name = "futimesat", 620 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, 621 { .name = "getitimer", 622 .arg = { [0] = STRARRAY(which, itimers), }, }, 623 { .name = "getpid", .errpid = true, }, 624 { .name = "getpgid", .errpid = true, }, 625 { .name = "getppid", .errpid = true, }, 626 { .name = "getrandom", 627 .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, }, 628 { .name = "getrlimit", 629 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, }, 630 { .name = "gettid", .errpid = true, }, 631 { .name = "ioctl", 632 .arg = { 633 #if defined(__i386__) || defined(__x86_64__) 634 /* 635 * FIXME: Make this available to all arches. 636 */ 637 [1] = { .scnprintf = SCA_IOCTL_CMD, /* cmd */ }, 638 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, }, 639 #else 640 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, }, 641 #endif 642 { .name = "kcmp", .nr_args = 5, 643 .arg = { [0] = { .name = "pid1", .scnprintf = SCA_PID, }, 644 [1] = { .name = "pid2", .scnprintf = SCA_PID, }, 645 [2] = { .name = "type", .scnprintf = SCA_KCMP_TYPE, }, 646 [3] = { .name = "idx1", .scnprintf = SCA_KCMP_IDX, }, 647 [4] = { .name = "idx2", .scnprintf = SCA_KCMP_IDX, }, }, }, 648 { .name = "keyctl", 649 .arg = { [0] = STRARRAY(option, keyctl_options), }, }, 650 { .name = "kill", 651 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, 652 { .name = "linkat", 653 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, 654 { .name = "lseek", 655 .arg = { [2] = STRARRAY(whence, whences), }, }, 656 { .name = "lstat", .alias = "newlstat", }, 657 { .name = "madvise", 658 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ }, 659 [2] = { .scnprintf = SCA_MADV_BHV, /* behavior */ }, }, }, 660 { .name = "mkdirat", 661 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, 662 { .name = "mknodat", 663 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, 664 { .name = "mlock", 665 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, }, 666 { .name = "mlockall", 667 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, }, 668 { .name = "mmap", .hexret = true, 669 /* The standard mmap maps to old_mmap on s390x */ 670 #if defined(__s390x__) 671 .alias = "old_mmap", 672 #endif 673 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, 674 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, 675 [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ }, }, }, 676 { .name = "mprotect", 677 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ }, 678 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, }, 679 { .name = "mq_unlink", 680 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* u_name */ }, }, }, 681 { .name = "mremap", .hexret = true, 682 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, 683 [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ }, 684 [4] = { .scnprintf = SCA_HEX, /* new_addr */ }, }, }, 685 { .name = "munlock", 686 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, }, 687 { .name = "munmap", 688 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, }, 689 { .name = "name_to_handle_at", 690 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, 691 { .name = "newfstatat", 692 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, 693 { .name = "open", 694 .arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, }, 695 { .name = "open_by_handle_at", 696 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, 697 [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, }, 698 { .name = "openat", 699 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, 700 [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, }, 701 { .name = "perf_event_open", 702 .arg = { [2] = { .scnprintf = SCA_INT, /* cpu */ }, 703 [3] = { .scnprintf = SCA_FD, /* group_fd */ }, 704 [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, }, 705 { .name = "pipe2", 706 .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, /* flags */ }, }, }, 707 { .name = "pkey_alloc", 708 .arg = { [1] = { .scnprintf = SCA_PKEY_ALLOC_ACCESS_RIGHTS, /* access_rights */ }, }, }, 709 { .name = "pkey_free", 710 .arg = { [0] = { .scnprintf = SCA_INT, /* key */ }, }, }, 711 { .name = "pkey_mprotect", 712 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ }, 713 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, 714 [3] = { .scnprintf = SCA_INT, /* pkey */ }, }, }, 715 { .name = "poll", .timeout = true, }, 716 { .name = "ppoll", .timeout = true, }, 717 { .name = "prctl", .alias = "arch_prctl", 718 .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, /* option */ }, 719 [1] = { .scnprintf = SCA_PRCTL_ARG2, /* arg2 */ }, 720 [2] = { .scnprintf = SCA_PRCTL_ARG3, /* arg3 */ }, }, }, 721 { .name = "pread", .alias = "pread64", }, 722 { .name = "preadv", .alias = "pread", }, 723 { .name = "prlimit64", 724 .arg = { [1] = STRARRAY(resource, rlimit_resources), }, }, 725 { .name = "pwrite", .alias = "pwrite64", }, 726 { .name = "readlinkat", 727 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, 728 { .name = "recvfrom", 729 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, 730 { .name = "recvmmsg", 731 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, 732 { .name = "recvmsg", 733 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, 734 { .name = "renameat", 735 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, 736 { .name = "rt_sigaction", 737 .arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, 738 { .name = "rt_sigprocmask", 739 .arg = { [0] = STRARRAY(how, sighow), }, }, 740 { .name = "rt_sigqueueinfo", 741 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, 742 { .name = "rt_tgsigqueueinfo", 743 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, 744 { .name = "sched_setscheduler", 745 .arg = { [1] = { .scnprintf = SCA_SCHED_POLICY, /* policy */ }, }, }, 746 { .name = "seccomp", 747 .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP, /* op */ }, 748 [1] = { .scnprintf = SCA_SECCOMP_FLAGS, /* flags */ }, }, }, 749 { .name = "select", .timeout = true, }, 750 { .name = "sendmmsg", 751 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, 752 { .name = "sendmsg", 753 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, 754 { .name = "sendto", 755 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, 756 { .name = "set_tid_address", .errpid = true, }, 757 { .name = "setitimer", 758 .arg = { [0] = STRARRAY(which, itimers), }, }, 759 { .name = "setrlimit", 760 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, }, 761 { .name = "socket", 762 .arg = { [0] = STRARRAY(family, socket_families), 763 [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, }, 764 { .name = "socketpair", 765 .arg = { [0] = STRARRAY(family, socket_families), 766 [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, }, 767 { .name = "stat", .alias = "newstat", }, 768 { .name = "statx", 769 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fdat */ }, 770 [2] = { .scnprintf = SCA_STATX_FLAGS, /* flags */ } , 771 [3] = { .scnprintf = SCA_STATX_MASK, /* mask */ }, }, }, 772 { .name = "swapoff", 773 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, }, 774 { .name = "swapon", 775 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, }, 776 { .name = "symlinkat", 777 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, 778 { .name = "tgkill", 779 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, 780 { .name = "tkill", 781 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, 782 { .name = "uname", .alias = "newuname", }, 783 { .name = "unlinkat", 784 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, 785 { .name = "utimensat", 786 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ }, }, }, 787 { .name = "wait4", .errpid = true, 788 .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, }, 789 { .name = "waitid", .errpid = true, 790 .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, }, 791 }; 792 793 static int syscall_fmt__cmp(const void *name, const void *fmtp) 794 { 795 const struct syscall_fmt *fmt = fmtp; 796 return strcmp(name, fmt->name); 797 } 798 799 static struct syscall_fmt *syscall_fmt__find(const char *name) 800 { 801 const int nmemb = ARRAY_SIZE(syscall_fmts); 802 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp); 803 } 804 805 struct syscall { 806 struct event_format *tp_format; 807 int nr_args; 808 struct format_field *args; 809 const char *name; 810 bool is_exit; 811 struct syscall_fmt *fmt; 812 struct syscall_arg_fmt *arg_fmt; 813 }; 814 815 /* 816 * We need to have this 'calculated' boolean because in some cases we really 817 * don't know what is the duration of a syscall, for instance, when we start 818 * a session and some threads are waiting for a syscall to finish, say 'poll', 819 * in which case all we can do is to print "( ? ) for duration and for the 820 * start timestamp. 821 */ 822 static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp) 823 { 824 double duration = (double)t / NSEC_PER_MSEC; 825 size_t printed = fprintf(fp, "("); 826 827 if (!calculated) 828 printed += fprintf(fp, " "); 829 else if (duration >= 1.0) 830 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration); 831 else if (duration >= 0.01) 832 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration); 833 else 834 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration); 835 return printed + fprintf(fp, "): "); 836 } 837 838 /** 839 * filename.ptr: The filename char pointer that will be vfs_getname'd 840 * filename.entry_str_pos: Where to insert the string translated from 841 * filename.ptr by the vfs_getname tracepoint/kprobe. 842 * ret_scnprintf: syscall args may set this to a different syscall return 843 * formatter, for instance, fcntl may return fds, file flags, etc. 844 */ 845 struct thread_trace { 846 u64 entry_time; 847 bool entry_pending; 848 unsigned long nr_events; 849 unsigned long pfmaj, pfmin; 850 char *entry_str; 851 double runtime_ms; 852 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg); 853 struct { 854 unsigned long ptr; 855 short int entry_str_pos; 856 bool pending_open; 857 unsigned int namelen; 858 char *name; 859 } filename; 860 struct { 861 int max; 862 char **table; 863 } paths; 864 865 struct intlist *syscall_stats; 866 }; 867 868 static struct thread_trace *thread_trace__new(void) 869 { 870 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace)); 871 872 if (ttrace) 873 ttrace->paths.max = -1; 874 875 ttrace->syscall_stats = intlist__new(NULL); 876 877 return ttrace; 878 } 879 880 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp) 881 { 882 struct thread_trace *ttrace; 883 884 if (thread == NULL) 885 goto fail; 886 887 if (thread__priv(thread) == NULL) 888 thread__set_priv(thread, thread_trace__new()); 889 890 if (thread__priv(thread) == NULL) 891 goto fail; 892 893 ttrace = thread__priv(thread); 894 ++ttrace->nr_events; 895 896 return ttrace; 897 fail: 898 color_fprintf(fp, PERF_COLOR_RED, 899 "WARNING: not enough memory, dropping samples!\n"); 900 return NULL; 901 } 902 903 904 void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg, 905 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg)) 906 { 907 struct thread_trace *ttrace = thread__priv(arg->thread); 908 909 ttrace->ret_scnprintf = ret_scnprintf; 910 } 911 912 #define TRACE_PFMAJ (1 << 0) 913 #define TRACE_PFMIN (1 << 1) 914 915 static const size_t trace__entry_str_size = 2048; 916 917 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname) 918 { 919 struct thread_trace *ttrace = thread__priv(thread); 920 921 if (fd > ttrace->paths.max) { 922 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *)); 923 924 if (npath == NULL) 925 return -1; 926 927 if (ttrace->paths.max != -1) { 928 memset(npath + ttrace->paths.max + 1, 0, 929 (fd - ttrace->paths.max) * sizeof(char *)); 930 } else { 931 memset(npath, 0, (fd + 1) * sizeof(char *)); 932 } 933 934 ttrace->paths.table = npath; 935 ttrace->paths.max = fd; 936 } 937 938 ttrace->paths.table[fd] = strdup(pathname); 939 940 return ttrace->paths.table[fd] != NULL ? 0 : -1; 941 } 942 943 static int thread__read_fd_path(struct thread *thread, int fd) 944 { 945 char linkname[PATH_MAX], pathname[PATH_MAX]; 946 struct stat st; 947 int ret; 948 949 if (thread->pid_ == thread->tid) { 950 scnprintf(linkname, sizeof(linkname), 951 "/proc/%d/fd/%d", thread->pid_, fd); 952 } else { 953 scnprintf(linkname, sizeof(linkname), 954 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd); 955 } 956 957 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname)) 958 return -1; 959 960 ret = readlink(linkname, pathname, sizeof(pathname)); 961 962 if (ret < 0 || ret > st.st_size) 963 return -1; 964 965 pathname[ret] = '\0'; 966 return trace__set_fd_pathname(thread, fd, pathname); 967 } 968 969 static const char *thread__fd_path(struct thread *thread, int fd, 970 struct trace *trace) 971 { 972 struct thread_trace *ttrace = thread__priv(thread); 973 974 if (ttrace == NULL) 975 return NULL; 976 977 if (fd < 0) 978 return NULL; 979 980 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) { 981 if (!trace->live) 982 return NULL; 983 ++trace->stats.proc_getname; 984 if (thread__read_fd_path(thread, fd)) 985 return NULL; 986 } 987 988 return ttrace->paths.table[fd]; 989 } 990 991 size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg) 992 { 993 int fd = arg->val; 994 size_t printed = scnprintf(bf, size, "%d", fd); 995 const char *path = thread__fd_path(arg->thread, fd, arg->trace); 996 997 if (path) 998 printed += scnprintf(bf + printed, size - printed, "<%s>", path); 999 1000 return printed; 1001 } 1002 1003 size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size) 1004 { 1005 size_t printed = scnprintf(bf, size, "%d", fd); 1006 struct thread *thread = machine__find_thread(trace->host, pid, pid); 1007 1008 if (thread) { 1009 const char *path = thread__fd_path(thread, fd, trace); 1010 1011 if (path) 1012 printed += scnprintf(bf + printed, size - printed, "<%s>", path); 1013 1014 thread__put(thread); 1015 } 1016 1017 return printed; 1018 } 1019 1020 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, 1021 struct syscall_arg *arg) 1022 { 1023 int fd = arg->val; 1024 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg); 1025 struct thread_trace *ttrace = thread__priv(arg->thread); 1026 1027 if (ttrace && fd >= 0 && fd <= ttrace->paths.max) 1028 zfree(&ttrace->paths.table[fd]); 1029 1030 return printed; 1031 } 1032 1033 static void thread__set_filename_pos(struct thread *thread, const char *bf, 1034 unsigned long ptr) 1035 { 1036 struct thread_trace *ttrace = thread__priv(thread); 1037 1038 ttrace->filename.ptr = ptr; 1039 ttrace->filename.entry_str_pos = bf - ttrace->entry_str; 1040 } 1041 1042 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size, 1043 struct syscall_arg *arg) 1044 { 1045 unsigned long ptr = arg->val; 1046 1047 if (!arg->trace->vfs_getname) 1048 return scnprintf(bf, size, "%#x", ptr); 1049 1050 thread__set_filename_pos(arg->thread, bf, ptr); 1051 return 0; 1052 } 1053 1054 static bool trace__filter_duration(struct trace *trace, double t) 1055 { 1056 return t < (trace->duration_filter * NSEC_PER_MSEC); 1057 } 1058 1059 static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) 1060 { 1061 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC; 1062 1063 return fprintf(fp, "%10.3f ", ts); 1064 } 1065 1066 /* 1067 * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are 1068 * using ttrace->entry_time for a thread that receives a sys_exit without 1069 * first having received a sys_enter ("poll" issued before tracing session 1070 * starts, lost sys_enter exit due to ring buffer overflow). 1071 */ 1072 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) 1073 { 1074 if (tstamp > 0) 1075 return __trace__fprintf_tstamp(trace, tstamp, fp); 1076 1077 return fprintf(fp, " ? "); 1078 } 1079 1080 static bool done = false; 1081 static bool interrupted = false; 1082 1083 static void sig_handler(int sig) 1084 { 1085 done = true; 1086 interrupted = sig == SIGINT; 1087 } 1088 1089 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread, 1090 u64 duration, bool duration_calculated, u64 tstamp, FILE *fp) 1091 { 1092 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp); 1093 printed += fprintf_duration(duration, duration_calculated, fp); 1094 1095 if (trace->multiple_threads) { 1096 if (trace->show_comm) 1097 printed += fprintf(fp, "%.14s/", thread__comm_str(thread)); 1098 printed += fprintf(fp, "%d ", thread->tid); 1099 } 1100 1101 return printed; 1102 } 1103 1104 static int trace__process_event(struct trace *trace, struct machine *machine, 1105 union perf_event *event, struct perf_sample *sample) 1106 { 1107 int ret = 0; 1108 1109 switch (event->header.type) { 1110 case PERF_RECORD_LOST: 1111 color_fprintf(trace->output, PERF_COLOR_RED, 1112 "LOST %" PRIu64 " events!\n", event->lost.lost); 1113 ret = machine__process_lost_event(machine, event, sample); 1114 break; 1115 default: 1116 ret = machine__process_event(machine, event, sample); 1117 break; 1118 } 1119 1120 return ret; 1121 } 1122 1123 static int trace__tool_process(struct perf_tool *tool, 1124 union perf_event *event, 1125 struct perf_sample *sample, 1126 struct machine *machine) 1127 { 1128 struct trace *trace = container_of(tool, struct trace, tool); 1129 return trace__process_event(trace, machine, event, sample); 1130 } 1131 1132 static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp) 1133 { 1134 struct machine *machine = vmachine; 1135 1136 if (machine->kptr_restrict_warned) 1137 return NULL; 1138 1139 if (symbol_conf.kptr_restrict) { 1140 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n" 1141 "Check /proc/sys/kernel/kptr_restrict.\n\n" 1142 "Kernel samples will not be resolved.\n"); 1143 machine->kptr_restrict_warned = true; 1144 return NULL; 1145 } 1146 1147 return machine__resolve_kernel_addr(vmachine, addrp, modp); 1148 } 1149 1150 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) 1151 { 1152 int err = symbol__init(NULL); 1153 1154 if (err) 1155 return err; 1156 1157 trace->host = machine__new_host(); 1158 if (trace->host == NULL) 1159 return -ENOMEM; 1160 1161 err = trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr); 1162 if (err < 0) 1163 goto out; 1164 1165 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, 1166 evlist->threads, trace__tool_process, false, 1167 trace->opts.proc_map_timeout, 1); 1168 out: 1169 if (err) 1170 symbol__exit(); 1171 1172 return err; 1173 } 1174 1175 static void trace__symbols__exit(struct trace *trace) 1176 { 1177 machine__exit(trace->host); 1178 trace->host = NULL; 1179 1180 symbol__exit(); 1181 } 1182 1183 static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args) 1184 { 1185 int idx; 1186 1187 if (nr_args == 6 && sc->fmt && sc->fmt->nr_args != 0) 1188 nr_args = sc->fmt->nr_args; 1189 1190 sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt)); 1191 if (sc->arg_fmt == NULL) 1192 return -1; 1193 1194 for (idx = 0; idx < nr_args; ++idx) { 1195 if (sc->fmt) 1196 sc->arg_fmt[idx] = sc->fmt->arg[idx]; 1197 } 1198 1199 sc->nr_args = nr_args; 1200 return 0; 1201 } 1202 1203 static int syscall__set_arg_fmts(struct syscall *sc) 1204 { 1205 struct format_field *field; 1206 int idx = 0, len; 1207 1208 for (field = sc->args; field; field = field->next, ++idx) { 1209 if (sc->fmt && sc->fmt->arg[idx].scnprintf) 1210 continue; 1211 1212 if (strcmp(field->type, "const char *") == 0 && 1213 (strcmp(field->name, "filename") == 0 || 1214 strcmp(field->name, "path") == 0 || 1215 strcmp(field->name, "pathname") == 0)) 1216 sc->arg_fmt[idx].scnprintf = SCA_FILENAME; 1217 else if (field->flags & FIELD_IS_POINTER) 1218 sc->arg_fmt[idx].scnprintf = syscall_arg__scnprintf_hex; 1219 else if (strcmp(field->type, "pid_t") == 0) 1220 sc->arg_fmt[idx].scnprintf = SCA_PID; 1221 else if (strcmp(field->type, "umode_t") == 0) 1222 sc->arg_fmt[idx].scnprintf = SCA_MODE_T; 1223 else if ((strcmp(field->type, "int") == 0 || 1224 strcmp(field->type, "unsigned int") == 0 || 1225 strcmp(field->type, "long") == 0) && 1226 (len = strlen(field->name)) >= 2 && 1227 strcmp(field->name + len - 2, "fd") == 0) { 1228 /* 1229 * /sys/kernel/tracing/events/syscalls/sys_enter* 1230 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c 1231 * 65 int 1232 * 23 unsigned int 1233 * 7 unsigned long 1234 */ 1235 sc->arg_fmt[idx].scnprintf = SCA_FD; 1236 } 1237 } 1238 1239 return 0; 1240 } 1241 1242 static int trace__read_syscall_info(struct trace *trace, int id) 1243 { 1244 char tp_name[128]; 1245 struct syscall *sc; 1246 const char *name = syscalltbl__name(trace->sctbl, id); 1247 1248 if (name == NULL) 1249 return -1; 1250 1251 if (id > trace->syscalls.max) { 1252 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc)); 1253 1254 if (nsyscalls == NULL) 1255 return -1; 1256 1257 if (trace->syscalls.max != -1) { 1258 memset(nsyscalls + trace->syscalls.max + 1, 0, 1259 (id - trace->syscalls.max) * sizeof(*sc)); 1260 } else { 1261 memset(nsyscalls, 0, (id + 1) * sizeof(*sc)); 1262 } 1263 1264 trace->syscalls.table = nsyscalls; 1265 trace->syscalls.max = id; 1266 } 1267 1268 sc = trace->syscalls.table + id; 1269 sc->name = name; 1270 1271 sc->fmt = syscall_fmt__find(sc->name); 1272 1273 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); 1274 sc->tp_format = trace_event__tp_format("syscalls", tp_name); 1275 1276 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) { 1277 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias); 1278 sc->tp_format = trace_event__tp_format("syscalls", tp_name); 1279 } 1280 1281 if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields)) 1282 return -1; 1283 1284 if (IS_ERR(sc->tp_format)) 1285 return -1; 1286 1287 sc->args = sc->tp_format->format.fields; 1288 /* 1289 * We need to check and discard the first variable '__syscall_nr' 1290 * or 'nr' that mean the syscall number. It is needless here. 1291 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels. 1292 */ 1293 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) { 1294 sc->args = sc->args->next; 1295 --sc->nr_args; 1296 } 1297 1298 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit"); 1299 1300 return syscall__set_arg_fmts(sc); 1301 } 1302 1303 static int trace__validate_ev_qualifier(struct trace *trace) 1304 { 1305 int err = 0, i; 1306 size_t nr_allocated; 1307 struct str_node *pos; 1308 1309 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier); 1310 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr * 1311 sizeof(trace->ev_qualifier_ids.entries[0])); 1312 1313 if (trace->ev_qualifier_ids.entries == NULL) { 1314 fputs("Error:\tNot enough memory for allocating events qualifier ids\n", 1315 trace->output); 1316 err = -EINVAL; 1317 goto out; 1318 } 1319 1320 nr_allocated = trace->ev_qualifier_ids.nr; 1321 i = 0; 1322 1323 strlist__for_each_entry(pos, trace->ev_qualifier) { 1324 const char *sc = pos->s; 1325 int id = syscalltbl__id(trace->sctbl, sc), match_next = -1; 1326 1327 if (id < 0) { 1328 id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next); 1329 if (id >= 0) 1330 goto matches; 1331 1332 if (err == 0) { 1333 fputs("Error:\tInvalid syscall ", trace->output); 1334 err = -EINVAL; 1335 } else { 1336 fputs(", ", trace->output); 1337 } 1338 1339 fputs(sc, trace->output); 1340 } 1341 matches: 1342 trace->ev_qualifier_ids.entries[i++] = id; 1343 if (match_next == -1) 1344 continue; 1345 1346 while (1) { 1347 id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next); 1348 if (id < 0) 1349 break; 1350 if (nr_allocated == trace->ev_qualifier_ids.nr) { 1351 void *entries; 1352 1353 nr_allocated += 8; 1354 entries = realloc(trace->ev_qualifier_ids.entries, 1355 nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0])); 1356 if (entries == NULL) { 1357 err = -ENOMEM; 1358 fputs("\nError:\t Not enough memory for parsing\n", trace->output); 1359 goto out_free; 1360 } 1361 trace->ev_qualifier_ids.entries = entries; 1362 } 1363 trace->ev_qualifier_ids.nr++; 1364 trace->ev_qualifier_ids.entries[i++] = id; 1365 } 1366 } 1367 1368 if (err < 0) { 1369 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'" 1370 "\nHint:\tand: 'man syscalls'\n", trace->output); 1371 out_free: 1372 zfree(&trace->ev_qualifier_ids.entries); 1373 trace->ev_qualifier_ids.nr = 0; 1374 } 1375 out: 1376 return err; 1377 } 1378 1379 /* 1380 * args is to be interpreted as a series of longs but we need to handle 1381 * 8-byte unaligned accesses. args points to raw_data within the event 1382 * and raw_data is guaranteed to be 8-byte unaligned because it is 1383 * preceded by raw_size which is a u32. So we need to copy args to a temp 1384 * variable to read it. Most notably this avoids extended load instructions 1385 * on unaligned addresses 1386 */ 1387 unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx) 1388 { 1389 unsigned long val; 1390 unsigned char *p = arg->args + sizeof(unsigned long) * idx; 1391 1392 memcpy(&val, p, sizeof(val)); 1393 return val; 1394 } 1395 1396 static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size, 1397 struct syscall_arg *arg) 1398 { 1399 if (sc->arg_fmt && sc->arg_fmt[arg->idx].name) 1400 return scnprintf(bf, size, "%s: ", sc->arg_fmt[arg->idx].name); 1401 1402 return scnprintf(bf, size, "arg%d: ", arg->idx); 1403 } 1404 1405 static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size, 1406 struct syscall_arg *arg, unsigned long val) 1407 { 1408 if (sc->arg_fmt && sc->arg_fmt[arg->idx].scnprintf) { 1409 arg->val = val; 1410 if (sc->arg_fmt[arg->idx].parm) 1411 arg->parm = sc->arg_fmt[arg->idx].parm; 1412 return sc->arg_fmt[arg->idx].scnprintf(bf, size, arg); 1413 } 1414 return scnprintf(bf, size, "%ld", val); 1415 } 1416 1417 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, 1418 unsigned char *args, struct trace *trace, 1419 struct thread *thread) 1420 { 1421 size_t printed = 0; 1422 unsigned long val; 1423 u8 bit = 1; 1424 struct syscall_arg arg = { 1425 .args = args, 1426 .idx = 0, 1427 .mask = 0, 1428 .trace = trace, 1429 .thread = thread, 1430 }; 1431 struct thread_trace *ttrace = thread__priv(thread); 1432 1433 /* 1434 * Things like fcntl will set this in its 'cmd' formatter to pick the 1435 * right formatter for the return value (an fd? file flags?), which is 1436 * not needed for syscalls that always return a given type, say an fd. 1437 */ 1438 ttrace->ret_scnprintf = NULL; 1439 1440 if (sc->args != NULL) { 1441 struct format_field *field; 1442 1443 for (field = sc->args; field; 1444 field = field->next, ++arg.idx, bit <<= 1) { 1445 if (arg.mask & bit) 1446 continue; 1447 1448 val = syscall_arg__val(&arg, arg.idx); 1449 1450 /* 1451 * Suppress this argument if its value is zero and 1452 * and we don't have a string associated in an 1453 * strarray for it. 1454 */ 1455 if (val == 0 && 1456 !(sc->arg_fmt && 1457 (sc->arg_fmt[arg.idx].show_zero || 1458 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY || 1459 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAYS) && 1460 sc->arg_fmt[arg.idx].parm)) 1461 continue; 1462 1463 printed += scnprintf(bf + printed, size - printed, 1464 "%s%s: ", printed ? ", " : "", field->name); 1465 printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val); 1466 } 1467 } else if (IS_ERR(sc->tp_format)) { 1468 /* 1469 * If we managed to read the tracepoint /format file, then we 1470 * may end up not having any args, like with gettid(), so only 1471 * print the raw args when we didn't manage to read it. 1472 */ 1473 while (arg.idx < sc->nr_args) { 1474 if (arg.mask & bit) 1475 goto next_arg; 1476 val = syscall_arg__val(&arg, arg.idx); 1477 if (printed) 1478 printed += scnprintf(bf + printed, size - printed, ", "); 1479 printed += syscall__scnprintf_name(sc, bf + printed, size - printed, &arg); 1480 printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val); 1481 next_arg: 1482 ++arg.idx; 1483 bit <<= 1; 1484 } 1485 } 1486 1487 return printed; 1488 } 1489 1490 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel, 1491 union perf_event *event, 1492 struct perf_sample *sample); 1493 1494 static struct syscall *trace__syscall_info(struct trace *trace, 1495 struct perf_evsel *evsel, int id) 1496 { 1497 1498 if (id < 0) { 1499 1500 /* 1501 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried 1502 * before that, leaving at a higher verbosity level till that is 1503 * explained. Reproduced with plain ftrace with: 1504 * 1505 * echo 1 > /t/events/raw_syscalls/sys_exit/enable 1506 * grep "NR -1 " /t/trace_pipe 1507 * 1508 * After generating some load on the machine. 1509 */ 1510 if (verbose > 1) { 1511 static u64 n; 1512 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n", 1513 id, perf_evsel__name(evsel), ++n); 1514 } 1515 return NULL; 1516 } 1517 1518 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) && 1519 trace__read_syscall_info(trace, id)) 1520 goto out_cant_read; 1521 1522 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL)) 1523 goto out_cant_read; 1524 1525 return &trace->syscalls.table[id]; 1526 1527 out_cant_read: 1528 if (verbose > 0) { 1529 fprintf(trace->output, "Problems reading syscall %d", id); 1530 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL) 1531 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name); 1532 fputs(" information\n", trace->output); 1533 } 1534 return NULL; 1535 } 1536 1537 static void thread__update_stats(struct thread_trace *ttrace, 1538 int id, struct perf_sample *sample) 1539 { 1540 struct int_node *inode; 1541 struct stats *stats; 1542 u64 duration = 0; 1543 1544 inode = intlist__findnew(ttrace->syscall_stats, id); 1545 if (inode == NULL) 1546 return; 1547 1548 stats = inode->priv; 1549 if (stats == NULL) { 1550 stats = malloc(sizeof(struct stats)); 1551 if (stats == NULL) 1552 return; 1553 init_stats(stats); 1554 inode->priv = stats; 1555 } 1556 1557 if (ttrace->entry_time && sample->time > ttrace->entry_time) 1558 duration = sample->time - ttrace->entry_time; 1559 1560 update_stats(stats, duration); 1561 } 1562 1563 static int trace__printf_interrupted_entry(struct trace *trace) 1564 { 1565 struct thread_trace *ttrace; 1566 size_t printed; 1567 1568 if (trace->current == NULL) 1569 return 0; 1570 1571 ttrace = thread__priv(trace->current); 1572 1573 if (!ttrace->entry_pending) 1574 return 0; 1575 1576 printed = trace__fprintf_entry_head(trace, trace->current, 0, false, ttrace->entry_time, trace->output); 1577 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str); 1578 ttrace->entry_pending = false; 1579 1580 return printed; 1581 } 1582 1583 static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel, 1584 struct perf_sample *sample, struct thread *thread) 1585 { 1586 int printed = 0; 1587 1588 if (trace->print_sample) { 1589 double ts = (double)sample->time / NSEC_PER_MSEC; 1590 1591 printed += fprintf(trace->output, "%22s %10.3f %s %d/%d [%d]\n", 1592 perf_evsel__name(evsel), ts, 1593 thread__comm_str(thread), 1594 sample->pid, sample->tid, sample->cpu); 1595 } 1596 1597 return printed; 1598 } 1599 1600 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, 1601 union perf_event *event __maybe_unused, 1602 struct perf_sample *sample) 1603 { 1604 char *msg; 1605 void *args; 1606 size_t printed = 0; 1607 struct thread *thread; 1608 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1; 1609 struct syscall *sc = trace__syscall_info(trace, evsel, id); 1610 struct thread_trace *ttrace; 1611 1612 if (sc == NULL) 1613 return -1; 1614 1615 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1616 ttrace = thread__trace(thread, trace->output); 1617 if (ttrace == NULL) 1618 goto out_put; 1619 1620 trace__fprintf_sample(trace, evsel, sample, thread); 1621 1622 args = perf_evsel__sc_tp_ptr(evsel, args, sample); 1623 1624 if (ttrace->entry_str == NULL) { 1625 ttrace->entry_str = malloc(trace__entry_str_size); 1626 if (!ttrace->entry_str) 1627 goto out_put; 1628 } 1629 1630 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) 1631 trace__printf_interrupted_entry(trace); 1632 1633 ttrace->entry_time = sample->time; 1634 msg = ttrace->entry_str; 1635 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name); 1636 1637 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed, 1638 args, trace, thread); 1639 1640 if (sc->is_exit) { 1641 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) { 1642 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output); 1643 fprintf(trace->output, "%-70s)\n", ttrace->entry_str); 1644 } 1645 } else { 1646 ttrace->entry_pending = true; 1647 /* See trace__vfs_getname & trace__sys_exit */ 1648 ttrace->filename.pending_open = false; 1649 } 1650 1651 if (trace->current != thread) { 1652 thread__put(trace->current); 1653 trace->current = thread__get(thread); 1654 } 1655 err = 0; 1656 out_put: 1657 thread__put(thread); 1658 return err; 1659 } 1660 1661 static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel, 1662 struct perf_sample *sample, 1663 struct callchain_cursor *cursor) 1664 { 1665 struct addr_location al; 1666 int max_stack = evsel->attr.sample_max_stack ? 1667 evsel->attr.sample_max_stack : 1668 trace->max_stack; 1669 1670 if (machine__resolve(trace->host, &al, sample) < 0 || 1671 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack)) 1672 return -1; 1673 1674 return 0; 1675 } 1676 1677 static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample) 1678 { 1679 /* TODO: user-configurable print_opts */ 1680 const unsigned int print_opts = EVSEL__PRINT_SYM | 1681 EVSEL__PRINT_DSO | 1682 EVSEL__PRINT_UNKNOWN_AS_ADDR; 1683 1684 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output); 1685 } 1686 1687 static const char *errno_to_name(struct perf_evsel *evsel, int err) 1688 { 1689 struct perf_env *env = perf_evsel__env(evsel); 1690 const char *arch_name = perf_env__arch(env); 1691 1692 return arch_syscalls__strerrno(arch_name, err); 1693 } 1694 1695 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, 1696 union perf_event *event __maybe_unused, 1697 struct perf_sample *sample) 1698 { 1699 long ret; 1700 u64 duration = 0; 1701 bool duration_calculated = false; 1702 struct thread *thread; 1703 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0; 1704 struct syscall *sc = trace__syscall_info(trace, evsel, id); 1705 struct thread_trace *ttrace; 1706 1707 if (sc == NULL) 1708 return -1; 1709 1710 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1711 ttrace = thread__trace(thread, trace->output); 1712 if (ttrace == NULL) 1713 goto out_put; 1714 1715 trace__fprintf_sample(trace, evsel, sample, thread); 1716 1717 if (trace->summary) 1718 thread__update_stats(ttrace, id, sample); 1719 1720 ret = perf_evsel__sc_tp_uint(evsel, ret, sample); 1721 1722 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) { 1723 trace__set_fd_pathname(thread, ret, ttrace->filename.name); 1724 ttrace->filename.pending_open = false; 1725 ++trace->stats.vfs_getname; 1726 } 1727 1728 if (ttrace->entry_time) { 1729 duration = sample->time - ttrace->entry_time; 1730 if (trace__filter_duration(trace, duration)) 1731 goto out; 1732 duration_calculated = true; 1733 } else if (trace->duration_filter) 1734 goto out; 1735 1736 if (sample->callchain) { 1737 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor); 1738 if (callchain_ret == 0) { 1739 if (callchain_cursor.nr < trace->min_stack) 1740 goto out; 1741 callchain_ret = 1; 1742 } 1743 } 1744 1745 if (trace->summary_only) 1746 goto out; 1747 1748 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output); 1749 1750 if (ttrace->entry_pending) { 1751 fprintf(trace->output, "%-70s", ttrace->entry_str); 1752 } else { 1753 fprintf(trace->output, " ... ["); 1754 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued"); 1755 fprintf(trace->output, "]: %s()", sc->name); 1756 } 1757 1758 if (sc->fmt == NULL) { 1759 if (ret < 0) 1760 goto errno_print; 1761 signed_print: 1762 fprintf(trace->output, ") = %ld", ret); 1763 } else if (ret < 0) { 1764 errno_print: { 1765 char bf[STRERR_BUFSIZE]; 1766 const char *emsg = str_error_r(-ret, bf, sizeof(bf)), 1767 *e = errno_to_name(evsel, -ret); 1768 1769 fprintf(trace->output, ") = -1 %s %s", e, emsg); 1770 } 1771 } else if (ret == 0 && sc->fmt->timeout) 1772 fprintf(trace->output, ") = 0 Timeout"); 1773 else if (ttrace->ret_scnprintf) { 1774 char bf[1024]; 1775 struct syscall_arg arg = { 1776 .val = ret, 1777 .thread = thread, 1778 .trace = trace, 1779 }; 1780 ttrace->ret_scnprintf(bf, sizeof(bf), &arg); 1781 ttrace->ret_scnprintf = NULL; 1782 fprintf(trace->output, ") = %s", bf); 1783 } else if (sc->fmt->hexret) 1784 fprintf(trace->output, ") = %#lx", ret); 1785 else if (sc->fmt->errpid) { 1786 struct thread *child = machine__find_thread(trace->host, ret, ret); 1787 1788 if (child != NULL) { 1789 fprintf(trace->output, ") = %ld", ret); 1790 if (child->comm_set) 1791 fprintf(trace->output, " (%s)", thread__comm_str(child)); 1792 thread__put(child); 1793 } 1794 } else 1795 goto signed_print; 1796 1797 fputc('\n', trace->output); 1798 1799 if (callchain_ret > 0) 1800 trace__fprintf_callchain(trace, sample); 1801 else if (callchain_ret < 0) 1802 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); 1803 out: 1804 ttrace->entry_pending = false; 1805 err = 0; 1806 out_put: 1807 thread__put(thread); 1808 return err; 1809 } 1810 1811 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, 1812 union perf_event *event __maybe_unused, 1813 struct perf_sample *sample) 1814 { 1815 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1816 struct thread_trace *ttrace; 1817 size_t filename_len, entry_str_len, to_move; 1818 ssize_t remaining_space; 1819 char *pos; 1820 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname"); 1821 1822 if (!thread) 1823 goto out; 1824 1825 ttrace = thread__priv(thread); 1826 if (!ttrace) 1827 goto out_put; 1828 1829 filename_len = strlen(filename); 1830 if (filename_len == 0) 1831 goto out_put; 1832 1833 if (ttrace->filename.namelen < filename_len) { 1834 char *f = realloc(ttrace->filename.name, filename_len + 1); 1835 1836 if (f == NULL) 1837 goto out_put; 1838 1839 ttrace->filename.namelen = filename_len; 1840 ttrace->filename.name = f; 1841 } 1842 1843 strcpy(ttrace->filename.name, filename); 1844 ttrace->filename.pending_open = true; 1845 1846 if (!ttrace->filename.ptr) 1847 goto out_put; 1848 1849 entry_str_len = strlen(ttrace->entry_str); 1850 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */ 1851 if (remaining_space <= 0) 1852 goto out_put; 1853 1854 if (filename_len > (size_t)remaining_space) { 1855 filename += filename_len - remaining_space; 1856 filename_len = remaining_space; 1857 } 1858 1859 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */ 1860 pos = ttrace->entry_str + ttrace->filename.entry_str_pos; 1861 memmove(pos + filename_len, pos, to_move); 1862 memcpy(pos, filename, filename_len); 1863 1864 ttrace->filename.ptr = 0; 1865 ttrace->filename.entry_str_pos = 0; 1866 out_put: 1867 thread__put(thread); 1868 out: 1869 return 0; 1870 } 1871 1872 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel, 1873 union perf_event *event __maybe_unused, 1874 struct perf_sample *sample) 1875 { 1876 u64 runtime = perf_evsel__intval(evsel, sample, "runtime"); 1877 double runtime_ms = (double)runtime / NSEC_PER_MSEC; 1878 struct thread *thread = machine__findnew_thread(trace->host, 1879 sample->pid, 1880 sample->tid); 1881 struct thread_trace *ttrace = thread__trace(thread, trace->output); 1882 1883 if (ttrace == NULL) 1884 goto out_dump; 1885 1886 ttrace->runtime_ms += runtime_ms; 1887 trace->runtime_ms += runtime_ms; 1888 out_put: 1889 thread__put(thread); 1890 return 0; 1891 1892 out_dump: 1893 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n", 1894 evsel->name, 1895 perf_evsel__strval(evsel, sample, "comm"), 1896 (pid_t)perf_evsel__intval(evsel, sample, "pid"), 1897 runtime, 1898 perf_evsel__intval(evsel, sample, "vruntime")); 1899 goto out_put; 1900 } 1901 1902 static int bpf_output__printer(enum binary_printer_ops op, 1903 unsigned int val, void *extra __maybe_unused, FILE *fp) 1904 { 1905 unsigned char ch = (unsigned char)val; 1906 1907 switch (op) { 1908 case BINARY_PRINT_CHAR_DATA: 1909 return fprintf(fp, "%c", isprint(ch) ? ch : '.'); 1910 case BINARY_PRINT_DATA_BEGIN: 1911 case BINARY_PRINT_LINE_BEGIN: 1912 case BINARY_PRINT_ADDR: 1913 case BINARY_PRINT_NUM_DATA: 1914 case BINARY_PRINT_NUM_PAD: 1915 case BINARY_PRINT_SEP: 1916 case BINARY_PRINT_CHAR_PAD: 1917 case BINARY_PRINT_LINE_END: 1918 case BINARY_PRINT_DATA_END: 1919 default: 1920 break; 1921 } 1922 1923 return 0; 1924 } 1925 1926 static void bpf_output__fprintf(struct trace *trace, 1927 struct perf_sample *sample) 1928 { 1929 binary__fprintf(sample->raw_data, sample->raw_size, 8, 1930 bpf_output__printer, NULL, trace->output); 1931 } 1932 1933 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, 1934 union perf_event *event __maybe_unused, 1935 struct perf_sample *sample) 1936 { 1937 int callchain_ret = 0; 1938 1939 if (sample->callchain) { 1940 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor); 1941 if (callchain_ret == 0) { 1942 if (callchain_cursor.nr < trace->min_stack) 1943 goto out; 1944 callchain_ret = 1; 1945 } 1946 } 1947 1948 trace__printf_interrupted_entry(trace); 1949 trace__fprintf_tstamp(trace, sample->time, trace->output); 1950 1951 if (trace->trace_syscalls) 1952 fprintf(trace->output, "( ): "); 1953 1954 fprintf(trace->output, "%s:", evsel->name); 1955 1956 if (perf_evsel__is_bpf_output(evsel)) { 1957 bpf_output__fprintf(trace, sample); 1958 } else if (evsel->tp_format) { 1959 event_format__fprintf(evsel->tp_format, sample->cpu, 1960 sample->raw_data, sample->raw_size, 1961 trace->output); 1962 } 1963 1964 fprintf(trace->output, ")\n"); 1965 1966 if (callchain_ret > 0) 1967 trace__fprintf_callchain(trace, sample); 1968 else if (callchain_ret < 0) 1969 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); 1970 out: 1971 return 0; 1972 } 1973 1974 static void print_location(FILE *f, struct perf_sample *sample, 1975 struct addr_location *al, 1976 bool print_dso, bool print_sym) 1977 { 1978 1979 if ((verbose > 0 || print_dso) && al->map) 1980 fprintf(f, "%s@", al->map->dso->long_name); 1981 1982 if ((verbose > 0 || print_sym) && al->sym) 1983 fprintf(f, "%s+0x%" PRIx64, al->sym->name, 1984 al->addr - al->sym->start); 1985 else if (al->map) 1986 fprintf(f, "0x%" PRIx64, al->addr); 1987 else 1988 fprintf(f, "0x%" PRIx64, sample->addr); 1989 } 1990 1991 static int trace__pgfault(struct trace *trace, 1992 struct perf_evsel *evsel, 1993 union perf_event *event __maybe_unused, 1994 struct perf_sample *sample) 1995 { 1996 struct thread *thread; 1997 struct addr_location al; 1998 char map_type = 'd'; 1999 struct thread_trace *ttrace; 2000 int err = -1; 2001 int callchain_ret = 0; 2002 2003 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 2004 2005 if (sample->callchain) { 2006 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor); 2007 if (callchain_ret == 0) { 2008 if (callchain_cursor.nr < trace->min_stack) 2009 goto out_put; 2010 callchain_ret = 1; 2011 } 2012 } 2013 2014 ttrace = thread__trace(thread, trace->output); 2015 if (ttrace == NULL) 2016 goto out_put; 2017 2018 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ) 2019 ttrace->pfmaj++; 2020 else 2021 ttrace->pfmin++; 2022 2023 if (trace->summary_only) 2024 goto out; 2025 2026 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION, 2027 sample->ip, &al); 2028 2029 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output); 2030 2031 fprintf(trace->output, "%sfault [", 2032 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ? 2033 "maj" : "min"); 2034 2035 print_location(trace->output, sample, &al, false, true); 2036 2037 fprintf(trace->output, "] => "); 2038 2039 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE, 2040 sample->addr, &al); 2041 2042 if (!al.map) { 2043 thread__find_addr_location(thread, sample->cpumode, 2044 MAP__FUNCTION, sample->addr, &al); 2045 2046 if (al.map) 2047 map_type = 'x'; 2048 else 2049 map_type = '?'; 2050 } 2051 2052 print_location(trace->output, sample, &al, true, false); 2053 2054 fprintf(trace->output, " (%c%c)\n", map_type, al.level); 2055 2056 if (callchain_ret > 0) 2057 trace__fprintf_callchain(trace, sample); 2058 else if (callchain_ret < 0) 2059 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); 2060 out: 2061 err = 0; 2062 out_put: 2063 thread__put(thread); 2064 return err; 2065 } 2066 2067 static void trace__set_base_time(struct trace *trace, 2068 struct perf_evsel *evsel, 2069 struct perf_sample *sample) 2070 { 2071 /* 2072 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust 2073 * and don't use sample->time unconditionally, we may end up having 2074 * some other event in the future without PERF_SAMPLE_TIME for good 2075 * reason, i.e. we may not be interested in its timestamps, just in 2076 * it taking place, picking some piece of information when it 2077 * appears in our event stream (vfs_getname comes to mind). 2078 */ 2079 if (trace->base_time == 0 && !trace->full_time && 2080 (evsel->attr.sample_type & PERF_SAMPLE_TIME)) 2081 trace->base_time = sample->time; 2082 } 2083 2084 static int trace__process_sample(struct perf_tool *tool, 2085 union perf_event *event, 2086 struct perf_sample *sample, 2087 struct perf_evsel *evsel, 2088 struct machine *machine __maybe_unused) 2089 { 2090 struct trace *trace = container_of(tool, struct trace, tool); 2091 struct thread *thread; 2092 int err = 0; 2093 2094 tracepoint_handler handler = evsel->handler; 2095 2096 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 2097 if (thread && thread__is_filtered(thread)) 2098 goto out; 2099 2100 trace__set_base_time(trace, evsel, sample); 2101 2102 if (handler) { 2103 ++trace->nr_events; 2104 handler(trace, evsel, event, sample); 2105 } 2106 out: 2107 thread__put(thread); 2108 return err; 2109 } 2110 2111 static int trace__record(struct trace *trace, int argc, const char **argv) 2112 { 2113 unsigned int rec_argc, i, j; 2114 const char **rec_argv; 2115 const char * const record_args[] = { 2116 "record", 2117 "-R", 2118 "-m", "1024", 2119 "-c", "1", 2120 }; 2121 2122 const char * const sc_args[] = { "-e", }; 2123 unsigned int sc_args_nr = ARRAY_SIZE(sc_args); 2124 const char * const majpf_args[] = { "-e", "major-faults" }; 2125 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args); 2126 const char * const minpf_args[] = { "-e", "minor-faults" }; 2127 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args); 2128 2129 /* +1 is for the event string below */ 2130 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 + 2131 majpf_args_nr + minpf_args_nr + argc; 2132 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 2133 2134 if (rec_argv == NULL) 2135 return -ENOMEM; 2136 2137 j = 0; 2138 for (i = 0; i < ARRAY_SIZE(record_args); i++) 2139 rec_argv[j++] = record_args[i]; 2140 2141 if (trace->trace_syscalls) { 2142 for (i = 0; i < sc_args_nr; i++) 2143 rec_argv[j++] = sc_args[i]; 2144 2145 /* event string may be different for older kernels - e.g., RHEL6 */ 2146 if (is_valid_tracepoint("raw_syscalls:sys_enter")) 2147 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit"; 2148 else if (is_valid_tracepoint("syscalls:sys_enter")) 2149 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit"; 2150 else { 2151 pr_err("Neither raw_syscalls nor syscalls events exist.\n"); 2152 free(rec_argv); 2153 return -1; 2154 } 2155 } 2156 2157 if (trace->trace_pgfaults & TRACE_PFMAJ) 2158 for (i = 0; i < majpf_args_nr; i++) 2159 rec_argv[j++] = majpf_args[i]; 2160 2161 if (trace->trace_pgfaults & TRACE_PFMIN) 2162 for (i = 0; i < minpf_args_nr; i++) 2163 rec_argv[j++] = minpf_args[i]; 2164 2165 for (i = 0; i < (unsigned int)argc; i++) 2166 rec_argv[j++] = argv[i]; 2167 2168 return cmd_record(j, rec_argv); 2169 } 2170 2171 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); 2172 2173 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist) 2174 { 2175 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname"); 2176 2177 if (IS_ERR(evsel)) 2178 return false; 2179 2180 if (perf_evsel__field(evsel, "pathname") == NULL) { 2181 perf_evsel__delete(evsel); 2182 return false; 2183 } 2184 2185 evsel->handler = trace__vfs_getname; 2186 perf_evlist__add(evlist, evsel); 2187 return true; 2188 } 2189 2190 static struct perf_evsel *perf_evsel__new_pgfault(u64 config) 2191 { 2192 struct perf_evsel *evsel; 2193 struct perf_event_attr attr = { 2194 .type = PERF_TYPE_SOFTWARE, 2195 .mmap_data = 1, 2196 }; 2197 2198 attr.config = config; 2199 attr.sample_period = 1; 2200 2201 event_attr_init(&attr); 2202 2203 evsel = perf_evsel__new(&attr); 2204 if (evsel) 2205 evsel->handler = trace__pgfault; 2206 2207 return evsel; 2208 } 2209 2210 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample) 2211 { 2212 const u32 type = event->header.type; 2213 struct perf_evsel *evsel; 2214 2215 if (type != PERF_RECORD_SAMPLE) { 2216 trace__process_event(trace, trace->host, event, sample); 2217 return; 2218 } 2219 2220 evsel = perf_evlist__id2evsel(trace->evlist, sample->id); 2221 if (evsel == NULL) { 2222 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id); 2223 return; 2224 } 2225 2226 trace__set_base_time(trace, evsel, sample); 2227 2228 if (evsel->attr.type == PERF_TYPE_TRACEPOINT && 2229 sample->raw_data == NULL) { 2230 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", 2231 perf_evsel__name(evsel), sample->tid, 2232 sample->cpu, sample->raw_size); 2233 } else { 2234 tracepoint_handler handler = evsel->handler; 2235 handler(trace, evsel, event, sample); 2236 } 2237 } 2238 2239 static int trace__add_syscall_newtp(struct trace *trace) 2240 { 2241 int ret = -1; 2242 struct perf_evlist *evlist = trace->evlist; 2243 struct perf_evsel *sys_enter, *sys_exit; 2244 2245 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter); 2246 if (sys_enter == NULL) 2247 goto out; 2248 2249 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args)) 2250 goto out_delete_sys_enter; 2251 2252 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit); 2253 if (sys_exit == NULL) 2254 goto out_delete_sys_enter; 2255 2256 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret)) 2257 goto out_delete_sys_exit; 2258 2259 perf_evsel__config_callchain(sys_enter, &trace->opts, &callchain_param); 2260 perf_evsel__config_callchain(sys_exit, &trace->opts, &callchain_param); 2261 2262 perf_evlist__add(evlist, sys_enter); 2263 perf_evlist__add(evlist, sys_exit); 2264 2265 if (callchain_param.enabled && !trace->kernel_syscallchains) { 2266 /* 2267 * We're interested only in the user space callchain 2268 * leading to the syscall, allow overriding that for 2269 * debugging reasons using --kernel_syscall_callchains 2270 */ 2271 sys_exit->attr.exclude_callchain_kernel = 1; 2272 } 2273 2274 trace->syscalls.events.sys_enter = sys_enter; 2275 trace->syscalls.events.sys_exit = sys_exit; 2276 2277 ret = 0; 2278 out: 2279 return ret; 2280 2281 out_delete_sys_exit: 2282 perf_evsel__delete_priv(sys_exit); 2283 out_delete_sys_enter: 2284 perf_evsel__delete_priv(sys_enter); 2285 goto out; 2286 } 2287 2288 static int trace__set_ev_qualifier_filter(struct trace *trace) 2289 { 2290 int err = -1; 2291 struct perf_evsel *sys_exit; 2292 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier, 2293 trace->ev_qualifier_ids.nr, 2294 trace->ev_qualifier_ids.entries); 2295 2296 if (filter == NULL) 2297 goto out_enomem; 2298 2299 if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter, 2300 filter)) { 2301 sys_exit = trace->syscalls.events.sys_exit; 2302 err = perf_evsel__append_tp_filter(sys_exit, filter); 2303 } 2304 2305 free(filter); 2306 out: 2307 return err; 2308 out_enomem: 2309 errno = ENOMEM; 2310 goto out; 2311 } 2312 2313 static int trace__set_filter_loop_pids(struct trace *trace) 2314 { 2315 unsigned int nr = 1; 2316 pid_t pids[32] = { 2317 getpid(), 2318 }; 2319 struct thread *thread = machine__find_thread(trace->host, pids[0], pids[0]); 2320 2321 while (thread && nr < ARRAY_SIZE(pids)) { 2322 struct thread *parent = machine__find_thread(trace->host, thread->ppid, thread->ppid); 2323 2324 if (parent == NULL) 2325 break; 2326 2327 if (!strcmp(thread__comm_str(parent), "sshd")) { 2328 pids[nr++] = parent->tid; 2329 break; 2330 } 2331 thread = parent; 2332 } 2333 2334 return perf_evlist__set_filter_pids(trace->evlist, nr, pids); 2335 } 2336 2337 static int trace__run(struct trace *trace, int argc, const char **argv) 2338 { 2339 struct perf_evlist *evlist = trace->evlist; 2340 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL; 2341 int err = -1, i; 2342 unsigned long before; 2343 const bool forks = argc > 0; 2344 bool draining = false; 2345 2346 trace->live = true; 2347 2348 if (trace->trace_syscalls && trace__add_syscall_newtp(trace)) 2349 goto out_error_raw_syscalls; 2350 2351 if (trace->trace_syscalls) 2352 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist); 2353 2354 if ((trace->trace_pgfaults & TRACE_PFMAJ)) { 2355 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ); 2356 if (pgfault_maj == NULL) 2357 goto out_error_mem; 2358 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param); 2359 perf_evlist__add(evlist, pgfault_maj); 2360 } 2361 2362 if ((trace->trace_pgfaults & TRACE_PFMIN)) { 2363 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN); 2364 if (pgfault_min == NULL) 2365 goto out_error_mem; 2366 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param); 2367 perf_evlist__add(evlist, pgfault_min); 2368 } 2369 2370 if (trace->sched && 2371 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", 2372 trace__sched_stat_runtime)) 2373 goto out_error_sched_stat_runtime; 2374 2375 /* 2376 * If a global cgroup was set, apply it to all the events without an 2377 * explicit cgroup. I.e.: 2378 * 2379 * trace -G A -e sched:*switch 2380 * 2381 * Will set all raw_syscalls:sys_{enter,exit}, pgfault, vfs_getname, etc 2382 * _and_ sched:sched_switch to the 'A' cgroup, while: 2383 * 2384 * trace -e sched:*switch -G A 2385 * 2386 * will only set the sched:sched_switch event to the 'A' cgroup, all the 2387 * other events (raw_syscalls:sys_{enter,exit}, etc are left "without" 2388 * a cgroup (on the root cgroup, sys wide, etc). 2389 * 2390 * Multiple cgroups: 2391 * 2392 * trace -G A -e sched:*switch -G B 2393 * 2394 * the syscall ones go to the 'A' cgroup, the sched:sched_switch goes 2395 * to the 'B' cgroup. 2396 * 2397 * evlist__set_default_cgroup() grabs a reference of the passed cgroup 2398 * only for the evsels still without a cgroup, i.e. evsel->cgroup == NULL. 2399 */ 2400 if (trace->cgroup) 2401 evlist__set_default_cgroup(trace->evlist, trace->cgroup); 2402 2403 err = perf_evlist__create_maps(evlist, &trace->opts.target); 2404 if (err < 0) { 2405 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n"); 2406 goto out_delete_evlist; 2407 } 2408 2409 err = trace__symbols_init(trace, evlist); 2410 if (err < 0) { 2411 fprintf(trace->output, "Problems initializing symbol libraries!\n"); 2412 goto out_delete_evlist; 2413 } 2414 2415 perf_evlist__config(evlist, &trace->opts, &callchain_param); 2416 2417 signal(SIGCHLD, sig_handler); 2418 signal(SIGINT, sig_handler); 2419 2420 if (forks) { 2421 err = perf_evlist__prepare_workload(evlist, &trace->opts.target, 2422 argv, false, NULL); 2423 if (err < 0) { 2424 fprintf(trace->output, "Couldn't run the workload!\n"); 2425 goto out_delete_evlist; 2426 } 2427 } 2428 2429 err = perf_evlist__open(evlist); 2430 if (err < 0) 2431 goto out_error_open; 2432 2433 err = bpf__apply_obj_config(); 2434 if (err) { 2435 char errbuf[BUFSIZ]; 2436 2437 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); 2438 pr_err("ERROR: Apply config to BPF failed: %s\n", 2439 errbuf); 2440 goto out_error_open; 2441 } 2442 2443 /* 2444 * Better not use !target__has_task() here because we need to cover the 2445 * case where no threads were specified in the command line, but a 2446 * workload was, and in that case we will fill in the thread_map when 2447 * we fork the workload in perf_evlist__prepare_workload. 2448 */ 2449 if (trace->filter_pids.nr > 0) 2450 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries); 2451 else if (thread_map__pid(evlist->threads, 0) == -1) 2452 err = trace__set_filter_loop_pids(trace); 2453 2454 if (err < 0) 2455 goto out_error_mem; 2456 2457 if (trace->ev_qualifier_ids.nr > 0) { 2458 err = trace__set_ev_qualifier_filter(trace); 2459 if (err < 0) 2460 goto out_errno; 2461 2462 pr_debug("event qualifier tracepoint filter: %s\n", 2463 trace->syscalls.events.sys_exit->filter); 2464 } 2465 2466 err = perf_evlist__apply_filters(evlist, &evsel); 2467 if (err < 0) 2468 goto out_error_apply_filters; 2469 2470 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages); 2471 if (err < 0) 2472 goto out_error_mmap; 2473 2474 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay) 2475 perf_evlist__enable(evlist); 2476 2477 if (forks) 2478 perf_evlist__start_workload(evlist); 2479 2480 if (trace->opts.initial_delay) { 2481 usleep(trace->opts.initial_delay * 1000); 2482 perf_evlist__enable(evlist); 2483 } 2484 2485 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 || 2486 evlist->threads->nr > 1 || 2487 perf_evlist__first(evlist)->attr.inherit; 2488 2489 /* 2490 * Now that we already used evsel->attr to ask the kernel to setup the 2491 * events, lets reuse evsel->attr.sample_max_stack as the limit in 2492 * trace__resolve_callchain(), allowing per-event max-stack settings 2493 * to override an explicitely set --max-stack global setting. 2494 */ 2495 evlist__for_each_entry(evlist, evsel) { 2496 if ((evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) && 2497 evsel->attr.sample_max_stack == 0) 2498 evsel->attr.sample_max_stack = trace->max_stack; 2499 } 2500 again: 2501 before = trace->nr_events; 2502 2503 for (i = 0; i < evlist->nr_mmaps; i++) { 2504 union perf_event *event; 2505 struct perf_mmap *md; 2506 2507 md = &evlist->mmap[i]; 2508 if (perf_mmap__read_init(md) < 0) 2509 continue; 2510 2511 while ((event = perf_mmap__read_event(md)) != NULL) { 2512 struct perf_sample sample; 2513 2514 ++trace->nr_events; 2515 2516 err = perf_evlist__parse_sample(evlist, event, &sample); 2517 if (err) { 2518 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err); 2519 goto next_event; 2520 } 2521 2522 trace__handle_event(trace, event, &sample); 2523 next_event: 2524 perf_mmap__consume(md); 2525 2526 if (interrupted) 2527 goto out_disable; 2528 2529 if (done && !draining) { 2530 perf_evlist__disable(evlist); 2531 draining = true; 2532 } 2533 } 2534 perf_mmap__read_done(md); 2535 } 2536 2537 if (trace->nr_events == before) { 2538 int timeout = done ? 100 : -1; 2539 2540 if (!draining && perf_evlist__poll(evlist, timeout) > 0) { 2541 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0) 2542 draining = true; 2543 2544 goto again; 2545 } 2546 } else { 2547 goto again; 2548 } 2549 2550 out_disable: 2551 thread__zput(trace->current); 2552 2553 perf_evlist__disable(evlist); 2554 2555 if (!err) { 2556 if (trace->summary) 2557 trace__fprintf_thread_summary(trace, trace->output); 2558 2559 if (trace->show_tool_stats) { 2560 fprintf(trace->output, "Stats:\n " 2561 " vfs_getname : %" PRIu64 "\n" 2562 " proc_getname: %" PRIu64 "\n", 2563 trace->stats.vfs_getname, 2564 trace->stats.proc_getname); 2565 } 2566 } 2567 2568 out_delete_evlist: 2569 trace__symbols__exit(trace); 2570 2571 perf_evlist__delete(evlist); 2572 cgroup__put(trace->cgroup); 2573 trace->evlist = NULL; 2574 trace->live = false; 2575 return err; 2576 { 2577 char errbuf[BUFSIZ]; 2578 2579 out_error_sched_stat_runtime: 2580 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime"); 2581 goto out_error; 2582 2583 out_error_raw_syscalls: 2584 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)"); 2585 goto out_error; 2586 2587 out_error_mmap: 2588 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf)); 2589 goto out_error; 2590 2591 out_error_open: 2592 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf)); 2593 2594 out_error: 2595 fprintf(trace->output, "%s\n", errbuf); 2596 goto out_delete_evlist; 2597 2598 out_error_apply_filters: 2599 fprintf(trace->output, 2600 "Failed to set filter \"%s\" on event %s with %d (%s)\n", 2601 evsel->filter, perf_evsel__name(evsel), errno, 2602 str_error_r(errno, errbuf, sizeof(errbuf))); 2603 goto out_delete_evlist; 2604 } 2605 out_error_mem: 2606 fprintf(trace->output, "Not enough memory to run!\n"); 2607 goto out_delete_evlist; 2608 2609 out_errno: 2610 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno)); 2611 goto out_delete_evlist; 2612 } 2613 2614 static int trace__replay(struct trace *trace) 2615 { 2616 const struct perf_evsel_str_handler handlers[] = { 2617 { "probe:vfs_getname", trace__vfs_getname, }, 2618 }; 2619 struct perf_data data = { 2620 .file = { 2621 .path = input_name, 2622 }, 2623 .mode = PERF_DATA_MODE_READ, 2624 .force = trace->force, 2625 }; 2626 struct perf_session *session; 2627 struct perf_evsel *evsel; 2628 int err = -1; 2629 2630 trace->tool.sample = trace__process_sample; 2631 trace->tool.mmap = perf_event__process_mmap; 2632 trace->tool.mmap2 = perf_event__process_mmap2; 2633 trace->tool.comm = perf_event__process_comm; 2634 trace->tool.exit = perf_event__process_exit; 2635 trace->tool.fork = perf_event__process_fork; 2636 trace->tool.attr = perf_event__process_attr; 2637 trace->tool.tracing_data = perf_event__process_tracing_data; 2638 trace->tool.build_id = perf_event__process_build_id; 2639 trace->tool.namespaces = perf_event__process_namespaces; 2640 2641 trace->tool.ordered_events = true; 2642 trace->tool.ordering_requires_timestamps = true; 2643 2644 /* add tid to output */ 2645 trace->multiple_threads = true; 2646 2647 session = perf_session__new(&data, false, &trace->tool); 2648 if (session == NULL) 2649 return -1; 2650 2651 if (trace->opts.target.pid) 2652 symbol_conf.pid_list_str = strdup(trace->opts.target.pid); 2653 2654 if (trace->opts.target.tid) 2655 symbol_conf.tid_list_str = strdup(trace->opts.target.tid); 2656 2657 if (symbol__init(&session->header.env) < 0) 2658 goto out; 2659 2660 trace->host = &session->machines.host; 2661 2662 err = perf_session__set_tracepoints_handlers(session, handlers); 2663 if (err) 2664 goto out; 2665 2666 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2667 "raw_syscalls:sys_enter"); 2668 /* older kernels have syscalls tp versus raw_syscalls */ 2669 if (evsel == NULL) 2670 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2671 "syscalls:sys_enter"); 2672 2673 if (evsel && 2674 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 || 2675 perf_evsel__init_sc_tp_ptr_field(evsel, args))) { 2676 pr_err("Error during initialize raw_syscalls:sys_enter event\n"); 2677 goto out; 2678 } 2679 2680 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2681 "raw_syscalls:sys_exit"); 2682 if (evsel == NULL) 2683 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2684 "syscalls:sys_exit"); 2685 if (evsel && 2686 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 || 2687 perf_evsel__init_sc_tp_uint_field(evsel, ret))) { 2688 pr_err("Error during initialize raw_syscalls:sys_exit event\n"); 2689 goto out; 2690 } 2691 2692 evlist__for_each_entry(session->evlist, evsel) { 2693 if (evsel->attr.type == PERF_TYPE_SOFTWARE && 2694 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ || 2695 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN || 2696 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS)) 2697 evsel->handler = trace__pgfault; 2698 } 2699 2700 setup_pager(); 2701 2702 err = perf_session__process_events(session); 2703 if (err) 2704 pr_err("Failed to process events, error %d", err); 2705 2706 else if (trace->summary) 2707 trace__fprintf_thread_summary(trace, trace->output); 2708 2709 out: 2710 perf_session__delete(session); 2711 2712 return err; 2713 } 2714 2715 static size_t trace__fprintf_threads_header(FILE *fp) 2716 { 2717 size_t printed; 2718 2719 printed = fprintf(fp, "\n Summary of events:\n\n"); 2720 2721 return printed; 2722 } 2723 2724 DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs, 2725 struct stats *stats; 2726 double msecs; 2727 int syscall; 2728 ) 2729 { 2730 struct int_node *source = rb_entry(nd, struct int_node, rb_node); 2731 struct stats *stats = source->priv; 2732 2733 entry->syscall = source->i; 2734 entry->stats = stats; 2735 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0; 2736 } 2737 2738 static size_t thread__dump_stats(struct thread_trace *ttrace, 2739 struct trace *trace, FILE *fp) 2740 { 2741 size_t printed = 0; 2742 struct syscall *sc; 2743 struct rb_node *nd; 2744 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats); 2745 2746 if (syscall_stats == NULL) 2747 return 0; 2748 2749 printed += fprintf(fp, "\n"); 2750 2751 printed += fprintf(fp, " syscall calls total min avg max stddev\n"); 2752 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); 2753 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n"); 2754 2755 resort_rb__for_each_entry(nd, syscall_stats) { 2756 struct stats *stats = syscall_stats_entry->stats; 2757 if (stats) { 2758 double min = (double)(stats->min) / NSEC_PER_MSEC; 2759 double max = (double)(stats->max) / NSEC_PER_MSEC; 2760 double avg = avg_stats(stats); 2761 double pct; 2762 u64 n = (u64) stats->n; 2763 2764 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0; 2765 avg /= NSEC_PER_MSEC; 2766 2767 sc = &trace->syscalls.table[syscall_stats_entry->syscall]; 2768 printed += fprintf(fp, " %-15s", sc->name); 2769 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f", 2770 n, syscall_stats_entry->msecs, min, avg); 2771 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct); 2772 } 2773 } 2774 2775 resort_rb__delete(syscall_stats); 2776 printed += fprintf(fp, "\n\n"); 2777 2778 return printed; 2779 } 2780 2781 static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace) 2782 { 2783 size_t printed = 0; 2784 struct thread_trace *ttrace = thread__priv(thread); 2785 double ratio; 2786 2787 if (ttrace == NULL) 2788 return 0; 2789 2790 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0; 2791 2792 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid); 2793 printed += fprintf(fp, "%lu events, ", ttrace->nr_events); 2794 printed += fprintf(fp, "%.1f%%", ratio); 2795 if (ttrace->pfmaj) 2796 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj); 2797 if (ttrace->pfmin) 2798 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin); 2799 if (trace->sched) 2800 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms); 2801 else if (fputc('\n', fp) != EOF) 2802 ++printed; 2803 2804 printed += thread__dump_stats(ttrace, trace, fp); 2805 2806 return printed; 2807 } 2808 2809 static unsigned long thread__nr_events(struct thread_trace *ttrace) 2810 { 2811 return ttrace ? ttrace->nr_events : 0; 2812 } 2813 2814 DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)), 2815 struct thread *thread; 2816 ) 2817 { 2818 entry->thread = rb_entry(nd, struct thread, rb_node); 2819 } 2820 2821 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp) 2822 { 2823 size_t printed = trace__fprintf_threads_header(fp); 2824 struct rb_node *nd; 2825 int i; 2826 2827 for (i = 0; i < THREADS__TABLE_SIZE; i++) { 2828 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i); 2829 2830 if (threads == NULL) { 2831 fprintf(fp, "%s", "Error sorting output by nr_events!\n"); 2832 return 0; 2833 } 2834 2835 resort_rb__for_each_entry(nd, threads) 2836 printed += trace__fprintf_thread(fp, threads_entry->thread, trace); 2837 2838 resort_rb__delete(threads); 2839 } 2840 return printed; 2841 } 2842 2843 static int trace__set_duration(const struct option *opt, const char *str, 2844 int unset __maybe_unused) 2845 { 2846 struct trace *trace = opt->value; 2847 2848 trace->duration_filter = atof(str); 2849 return 0; 2850 } 2851 2852 static int trace__set_filter_pids(const struct option *opt, const char *str, 2853 int unset __maybe_unused) 2854 { 2855 int ret = -1; 2856 size_t i; 2857 struct trace *trace = opt->value; 2858 /* 2859 * FIXME: introduce a intarray class, plain parse csv and create a 2860 * { int nr, int entries[] } struct... 2861 */ 2862 struct intlist *list = intlist__new(str); 2863 2864 if (list == NULL) 2865 return -1; 2866 2867 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1; 2868 trace->filter_pids.entries = calloc(i, sizeof(pid_t)); 2869 2870 if (trace->filter_pids.entries == NULL) 2871 goto out; 2872 2873 trace->filter_pids.entries[0] = getpid(); 2874 2875 for (i = 1; i < trace->filter_pids.nr; ++i) 2876 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i; 2877 2878 intlist__delete(list); 2879 ret = 0; 2880 out: 2881 return ret; 2882 } 2883 2884 static int trace__open_output(struct trace *trace, const char *filename) 2885 { 2886 struct stat st; 2887 2888 if (!stat(filename, &st) && st.st_size) { 2889 char oldname[PATH_MAX]; 2890 2891 scnprintf(oldname, sizeof(oldname), "%s.old", filename); 2892 unlink(oldname); 2893 rename(filename, oldname); 2894 } 2895 2896 trace->output = fopen(filename, "w"); 2897 2898 return trace->output == NULL ? -errno : 0; 2899 } 2900 2901 static int parse_pagefaults(const struct option *opt, const char *str, 2902 int unset __maybe_unused) 2903 { 2904 int *trace_pgfaults = opt->value; 2905 2906 if (strcmp(str, "all") == 0) 2907 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN; 2908 else if (strcmp(str, "maj") == 0) 2909 *trace_pgfaults |= TRACE_PFMAJ; 2910 else if (strcmp(str, "min") == 0) 2911 *trace_pgfaults |= TRACE_PFMIN; 2912 else 2913 return -1; 2914 2915 return 0; 2916 } 2917 2918 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler) 2919 { 2920 struct perf_evsel *evsel; 2921 2922 evlist__for_each_entry(evlist, evsel) 2923 evsel->handler = handler; 2924 } 2925 2926 /* 2927 * XXX: Hackish, just splitting the combined -e+--event (syscalls 2928 * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use 2929 * existing facilities unchanged (trace->ev_qualifier + parse_options()). 2930 * 2931 * It'd be better to introduce a parse_options() variant that would return a 2932 * list with the terms it didn't match to an event... 2933 */ 2934 static int trace__parse_events_option(const struct option *opt, const char *str, 2935 int unset __maybe_unused) 2936 { 2937 struct trace *trace = (struct trace *)opt->value; 2938 const char *s = str; 2939 char *sep = NULL, *lists[2] = { NULL, NULL, }; 2940 int len = strlen(str) + 1, err = -1, list, idx; 2941 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR); 2942 char group_name[PATH_MAX]; 2943 2944 if (strace_groups_dir == NULL) 2945 return -1; 2946 2947 if (*s == '!') { 2948 ++s; 2949 trace->not_ev_qualifier = true; 2950 } 2951 2952 while (1) { 2953 if ((sep = strchr(s, ',')) != NULL) 2954 *sep = '\0'; 2955 2956 list = 0; 2957 if (syscalltbl__id(trace->sctbl, s) >= 0 || 2958 syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) { 2959 list = 1; 2960 } else { 2961 path__join(group_name, sizeof(group_name), strace_groups_dir, s); 2962 if (access(group_name, R_OK) == 0) 2963 list = 1; 2964 } 2965 2966 if (lists[list]) { 2967 sprintf(lists[list] + strlen(lists[list]), ",%s", s); 2968 } else { 2969 lists[list] = malloc(len); 2970 if (lists[list] == NULL) 2971 goto out; 2972 strcpy(lists[list], s); 2973 } 2974 2975 if (!sep) 2976 break; 2977 2978 *sep = ','; 2979 s = sep + 1; 2980 } 2981 2982 if (lists[1] != NULL) { 2983 struct strlist_config slist_config = { 2984 .dirname = strace_groups_dir, 2985 }; 2986 2987 trace->ev_qualifier = strlist__new(lists[1], &slist_config); 2988 if (trace->ev_qualifier == NULL) { 2989 fputs("Not enough memory to parse event qualifier", trace->output); 2990 goto out; 2991 } 2992 2993 if (trace__validate_ev_qualifier(trace)) 2994 goto out; 2995 } 2996 2997 err = 0; 2998 2999 if (lists[0]) { 3000 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event", 3001 "event selector. use 'perf list' to list available events", 3002 parse_events_option); 3003 err = parse_events_option(&o, lists[0], 0); 3004 } 3005 out: 3006 if (sep) 3007 *sep = ','; 3008 3009 return err; 3010 } 3011 3012 static int trace__parse_cgroups(const struct option *opt, const char *str, int unset) 3013 { 3014 struct trace *trace = opt->value; 3015 3016 if (!list_empty(&trace->evlist->entries)) 3017 return parse_cgroups(opt, str, unset); 3018 3019 trace->cgroup = evlist__findnew_cgroup(trace->evlist, str); 3020 3021 return 0; 3022 } 3023 3024 int cmd_trace(int argc, const char **argv) 3025 { 3026 const char *trace_usage[] = { 3027 "perf trace [<options>] [<command>]", 3028 "perf trace [<options>] -- <command> [<options>]", 3029 "perf trace record [<options>] [<command>]", 3030 "perf trace record [<options>] -- <command> [<options>]", 3031 NULL 3032 }; 3033 struct trace trace = { 3034 .syscalls = { 3035 . max = -1, 3036 }, 3037 .opts = { 3038 .target = { 3039 .uid = UINT_MAX, 3040 .uses_mmap = true, 3041 }, 3042 .user_freq = UINT_MAX, 3043 .user_interval = ULLONG_MAX, 3044 .no_buffering = true, 3045 .mmap_pages = UINT_MAX, 3046 .proc_map_timeout = 500, 3047 }, 3048 .output = stderr, 3049 .show_comm = true, 3050 .trace_syscalls = true, 3051 .kernel_syscallchains = false, 3052 .max_stack = UINT_MAX, 3053 }; 3054 const char *output_name = NULL; 3055 const struct option trace_options[] = { 3056 OPT_CALLBACK('e', "event", &trace, "event", 3057 "event/syscall selector. use 'perf list' to list available events", 3058 trace__parse_events_option), 3059 OPT_BOOLEAN(0, "comm", &trace.show_comm, 3060 "show the thread COMM next to its id"), 3061 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"), 3062 OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace", 3063 trace__parse_events_option), 3064 OPT_STRING('o', "output", &output_name, "file", "output file name"), 3065 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"), 3066 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid", 3067 "trace events on existing process id"), 3068 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid", 3069 "trace events on existing thread id"), 3070 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids", 3071 "pids to filter (by the kernel)", trace__set_filter_pids), 3072 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide, 3073 "system-wide collection from all CPUs"), 3074 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu", 3075 "list of cpus to monitor"), 3076 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit, 3077 "child tasks do not inherit counters"), 3078 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages", 3079 "number of mmap data pages", 3080 perf_evlist__parse_mmap_pages), 3081 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user", 3082 "user to profile"), 3083 OPT_CALLBACK(0, "duration", &trace, "float", 3084 "show only events with duration > N.M ms", 3085 trace__set_duration), 3086 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"), 3087 OPT_INCR('v', "verbose", &verbose, "be more verbose"), 3088 OPT_BOOLEAN('T', "time", &trace.full_time, 3089 "Show full timestamp, not time relative to first start"), 3090 OPT_BOOLEAN('s', "summary", &trace.summary_only, 3091 "Show only syscall summary with statistics"), 3092 OPT_BOOLEAN('S', "with-summary", &trace.summary, 3093 "Show all syscalls and summary with statistics"), 3094 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min", 3095 "Trace pagefaults", parse_pagefaults, "maj"), 3096 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"), 3097 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"), 3098 OPT_CALLBACK(0, "call-graph", &trace.opts, 3099 "record_mode[,record_size]", record_callchain_help, 3100 &record_parse_callchain_opt), 3101 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains, 3102 "Show the kernel callchains on the syscall exit path"), 3103 OPT_UINTEGER(0, "min-stack", &trace.min_stack, 3104 "Set the minimum stack depth when parsing the callchain, " 3105 "anything below the specified depth will be ignored."), 3106 OPT_UINTEGER(0, "max-stack", &trace.max_stack, 3107 "Set the maximum stack depth when parsing the callchain, " 3108 "anything beyond the specified depth will be ignored. " 3109 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)), 3110 OPT_BOOLEAN(0, "print-sample", &trace.print_sample, 3111 "print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"), 3112 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout, 3113 "per thread proc mmap processing timeout in ms"), 3114 OPT_CALLBACK('G', "cgroup", &trace, "name", "monitor event in cgroup name only", 3115 trace__parse_cgroups), 3116 OPT_UINTEGER('D', "delay", &trace.opts.initial_delay, 3117 "ms to wait before starting measurement after program " 3118 "start"), 3119 OPT_END() 3120 }; 3121 bool __maybe_unused max_stack_user_set = true; 3122 bool mmap_pages_user_set = true; 3123 const char * const trace_subcommands[] = { "record", NULL }; 3124 int err; 3125 char bf[BUFSIZ]; 3126 3127 signal(SIGSEGV, sighandler_dump_stack); 3128 signal(SIGFPE, sighandler_dump_stack); 3129 3130 trace.evlist = perf_evlist__new(); 3131 trace.sctbl = syscalltbl__new(); 3132 3133 if (trace.evlist == NULL || trace.sctbl == NULL) { 3134 pr_err("Not enough memory to run!\n"); 3135 err = -ENOMEM; 3136 goto out; 3137 } 3138 3139 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands, 3140 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION); 3141 3142 if ((nr_cgroups || trace.cgroup) && !trace.opts.target.system_wide) { 3143 usage_with_options_msg(trace_usage, trace_options, 3144 "cgroup monitoring only available in system-wide mode"); 3145 } 3146 3147 err = bpf__setup_stdout(trace.evlist); 3148 if (err) { 3149 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf)); 3150 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf); 3151 goto out; 3152 } 3153 3154 err = -1; 3155 3156 if (trace.trace_pgfaults) { 3157 trace.opts.sample_address = true; 3158 trace.opts.sample_time = true; 3159 } 3160 3161 if (trace.opts.mmap_pages == UINT_MAX) 3162 mmap_pages_user_set = false; 3163 3164 if (trace.max_stack == UINT_MAX) { 3165 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack; 3166 max_stack_user_set = false; 3167 } 3168 3169 #ifdef HAVE_DWARF_UNWIND_SUPPORT 3170 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled) { 3171 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false); 3172 } 3173 #endif 3174 3175 if (callchain_param.enabled) { 3176 if (!mmap_pages_user_set && geteuid() == 0) 3177 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4; 3178 3179 symbol_conf.use_callchain = true; 3180 } 3181 3182 if (trace.evlist->nr_entries > 0) 3183 evlist__set_evsel_handler(trace.evlist, trace__event_handler); 3184 3185 if ((argc >= 1) && (strcmp(argv[0], "record") == 0)) 3186 return trace__record(&trace, argc-1, &argv[1]); 3187 3188 /* summary_only implies summary option, but don't overwrite summary if set */ 3189 if (trace.summary_only) 3190 trace.summary = trace.summary_only; 3191 3192 if (!trace.trace_syscalls && !trace.trace_pgfaults && 3193 trace.evlist->nr_entries == 0 /* Was --events used? */) { 3194 pr_err("Please specify something to trace.\n"); 3195 return -1; 3196 } 3197 3198 if (!trace.trace_syscalls && trace.ev_qualifier) { 3199 pr_err("The -e option can't be used with --no-syscalls.\n"); 3200 goto out; 3201 } 3202 3203 if (output_name != NULL) { 3204 err = trace__open_output(&trace, output_name); 3205 if (err < 0) { 3206 perror("failed to create output file"); 3207 goto out; 3208 } 3209 } 3210 3211 trace.open_id = syscalltbl__id(trace.sctbl, "open"); 3212 3213 err = target__validate(&trace.opts.target); 3214 if (err) { 3215 target__strerror(&trace.opts.target, err, bf, sizeof(bf)); 3216 fprintf(trace.output, "%s", bf); 3217 goto out_close; 3218 } 3219 3220 err = target__parse_uid(&trace.opts.target); 3221 if (err) { 3222 target__strerror(&trace.opts.target, err, bf, sizeof(bf)); 3223 fprintf(trace.output, "%s", bf); 3224 goto out_close; 3225 } 3226 3227 if (!argc && target__none(&trace.opts.target)) 3228 trace.opts.target.system_wide = true; 3229 3230 if (input_name) 3231 err = trace__replay(&trace); 3232 else 3233 err = trace__run(&trace, argc, argv); 3234 3235 out_close: 3236 if (output_name != NULL) 3237 fclose(trace.output); 3238 out: 3239 return err; 3240 } 3241