1 /* 2 * builtin-trace.c 3 * 4 * Builtin 'trace' command: 5 * 6 * Display a continuously updated trace of any workload, CPU, specific PID, 7 * system wide, etc. Default format is loosely strace like, but any other 8 * event may be specified using --event. 9 * 10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 11 * 12 * Initially based on the 'trace' prototype by Thomas Gleixner: 13 * 14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'") 15 * 16 * Released under the GPL v2. (and only v2, not any later version) 17 */ 18 19 #include <traceevent/event-parse.h> 20 #include <api/fs/tracing_path.h> 21 #include "builtin.h" 22 #include "util/color.h" 23 #include "util/debug.h" 24 #include "util/env.h" 25 #include "util/event.h" 26 #include "util/evlist.h" 27 #include <subcmd/exec-cmd.h> 28 #include "util/machine.h" 29 #include "util/path.h" 30 #include "util/session.h" 31 #include "util/thread.h" 32 #include <subcmd/parse-options.h> 33 #include "util/strlist.h" 34 #include "util/intlist.h" 35 #include "util/thread_map.h" 36 #include "util/stat.h" 37 #include "trace/beauty/beauty.h" 38 #include "trace-event.h" 39 #include "util/parse-events.h" 40 #include "util/bpf-loader.h" 41 #include "callchain.h" 42 #include "print_binary.h" 43 #include "string2.h" 44 #include "syscalltbl.h" 45 #include "rb_resort.h" 46 47 #include <errno.h> 48 #include <inttypes.h> 49 #include <poll.h> 50 #include <signal.h> 51 #include <stdlib.h> 52 #include <string.h> 53 #include <linux/err.h> 54 #include <linux/filter.h> 55 #include <linux/kernel.h> 56 #include <linux/random.h> 57 #include <linux/stringify.h> 58 #include <linux/time64.h> 59 60 #include "sane_ctype.h" 61 62 #ifndef O_CLOEXEC 63 # define O_CLOEXEC 02000000 64 #endif 65 66 #ifndef F_LINUX_SPECIFIC_BASE 67 # define F_LINUX_SPECIFIC_BASE 1024 68 #endif 69 70 struct trace { 71 struct perf_tool tool; 72 struct syscalltbl *sctbl; 73 struct { 74 int max; 75 struct syscall *table; 76 struct { 77 struct perf_evsel *sys_enter, 78 *sys_exit; 79 } events; 80 } syscalls; 81 struct record_opts opts; 82 struct perf_evlist *evlist; 83 struct machine *host; 84 struct thread *current; 85 u64 base_time; 86 FILE *output; 87 unsigned long nr_events; 88 struct strlist *ev_qualifier; 89 struct { 90 size_t nr; 91 int *entries; 92 } ev_qualifier_ids; 93 struct { 94 size_t nr; 95 pid_t *entries; 96 } filter_pids; 97 double duration_filter; 98 double runtime_ms; 99 struct { 100 u64 vfs_getname, 101 proc_getname; 102 } stats; 103 unsigned int max_stack; 104 unsigned int min_stack; 105 bool not_ev_qualifier; 106 bool live; 107 bool full_time; 108 bool sched; 109 bool multiple_threads; 110 bool summary; 111 bool summary_only; 112 bool show_comm; 113 bool print_sample; 114 bool show_tool_stats; 115 bool trace_syscalls; 116 bool kernel_syscallchains; 117 bool force; 118 bool vfs_getname; 119 int trace_pgfaults; 120 int open_id; 121 }; 122 123 struct tp_field { 124 int offset; 125 union { 126 u64 (*integer)(struct tp_field *field, struct perf_sample *sample); 127 void *(*pointer)(struct tp_field *field, struct perf_sample *sample); 128 }; 129 }; 130 131 #define TP_UINT_FIELD(bits) \ 132 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \ 133 { \ 134 u##bits value; \ 135 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \ 136 return value; \ 137 } 138 139 TP_UINT_FIELD(8); 140 TP_UINT_FIELD(16); 141 TP_UINT_FIELD(32); 142 TP_UINT_FIELD(64); 143 144 #define TP_UINT_FIELD__SWAPPED(bits) \ 145 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \ 146 { \ 147 u##bits value; \ 148 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \ 149 return bswap_##bits(value);\ 150 } 151 152 TP_UINT_FIELD__SWAPPED(16); 153 TP_UINT_FIELD__SWAPPED(32); 154 TP_UINT_FIELD__SWAPPED(64); 155 156 static int tp_field__init_uint(struct tp_field *field, 157 struct format_field *format_field, 158 bool needs_swap) 159 { 160 field->offset = format_field->offset; 161 162 switch (format_field->size) { 163 case 1: 164 field->integer = tp_field__u8; 165 break; 166 case 2: 167 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16; 168 break; 169 case 4: 170 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32; 171 break; 172 case 8: 173 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64; 174 break; 175 default: 176 return -1; 177 } 178 179 return 0; 180 } 181 182 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample) 183 { 184 return sample->raw_data + field->offset; 185 } 186 187 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field) 188 { 189 field->offset = format_field->offset; 190 field->pointer = tp_field__ptr; 191 return 0; 192 } 193 194 struct syscall_tp { 195 struct tp_field id; 196 union { 197 struct tp_field args, ret; 198 }; 199 }; 200 201 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel, 202 struct tp_field *field, 203 const char *name) 204 { 205 struct format_field *format_field = perf_evsel__field(evsel, name); 206 207 if (format_field == NULL) 208 return -1; 209 210 return tp_field__init_uint(field, format_field, evsel->needs_swap); 211 } 212 213 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \ 214 ({ struct syscall_tp *sc = evsel->priv;\ 215 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); }) 216 217 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel, 218 struct tp_field *field, 219 const char *name) 220 { 221 struct format_field *format_field = perf_evsel__field(evsel, name); 222 223 if (format_field == NULL) 224 return -1; 225 226 return tp_field__init_ptr(field, format_field); 227 } 228 229 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \ 230 ({ struct syscall_tp *sc = evsel->priv;\ 231 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); }) 232 233 static void perf_evsel__delete_priv(struct perf_evsel *evsel) 234 { 235 zfree(&evsel->priv); 236 perf_evsel__delete(evsel); 237 } 238 239 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler) 240 { 241 evsel->priv = malloc(sizeof(struct syscall_tp)); 242 if (evsel->priv != NULL) { 243 if (perf_evsel__init_sc_tp_uint_field(evsel, id)) 244 goto out_delete; 245 246 evsel->handler = handler; 247 return 0; 248 } 249 250 return -ENOMEM; 251 252 out_delete: 253 zfree(&evsel->priv); 254 return -ENOENT; 255 } 256 257 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler) 258 { 259 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction); 260 261 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */ 262 if (IS_ERR(evsel)) 263 evsel = perf_evsel__newtp("syscalls", direction); 264 265 if (IS_ERR(evsel)) 266 return NULL; 267 268 if (perf_evsel__init_syscall_tp(evsel, handler)) 269 goto out_delete; 270 271 return evsel; 272 273 out_delete: 274 perf_evsel__delete_priv(evsel); 275 return NULL; 276 } 277 278 #define perf_evsel__sc_tp_uint(evsel, name, sample) \ 279 ({ struct syscall_tp *fields = evsel->priv; \ 280 fields->name.integer(&fields->name, sample); }) 281 282 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \ 283 ({ struct syscall_tp *fields = evsel->priv; \ 284 fields->name.pointer(&fields->name, sample); }) 285 286 size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val) 287 { 288 int idx = val - sa->offset; 289 290 if (idx < 0 || idx >= sa->nr_entries) 291 return scnprintf(bf, size, intfmt, val); 292 293 return scnprintf(bf, size, "%s", sa->entries[idx]); 294 } 295 296 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size, 297 const char *intfmt, 298 struct syscall_arg *arg) 299 { 300 return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->val); 301 } 302 303 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size, 304 struct syscall_arg *arg) 305 { 306 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg); 307 } 308 309 #define SCA_STRARRAY syscall_arg__scnprintf_strarray 310 311 struct strarrays { 312 int nr_entries; 313 struct strarray **entries; 314 }; 315 316 #define DEFINE_STRARRAYS(array) struct strarrays strarrays__##array = { \ 317 .nr_entries = ARRAY_SIZE(array), \ 318 .entries = array, \ 319 } 320 321 size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size, 322 struct syscall_arg *arg) 323 { 324 struct strarrays *sas = arg->parm; 325 int i; 326 327 for (i = 0; i < sas->nr_entries; ++i) { 328 struct strarray *sa = sas->entries[i]; 329 int idx = arg->val - sa->offset; 330 331 if (idx >= 0 && idx < sa->nr_entries) { 332 if (sa->entries[idx] == NULL) 333 break; 334 return scnprintf(bf, size, "%s", sa->entries[idx]); 335 } 336 } 337 338 return scnprintf(bf, size, "%d", arg->val); 339 } 340 341 #ifndef AT_FDCWD 342 #define AT_FDCWD -100 343 #endif 344 345 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size, 346 struct syscall_arg *arg) 347 { 348 int fd = arg->val; 349 350 if (fd == AT_FDCWD) 351 return scnprintf(bf, size, "CWD"); 352 353 return syscall_arg__scnprintf_fd(bf, size, arg); 354 } 355 356 #define SCA_FDAT syscall_arg__scnprintf_fd_at 357 358 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, 359 struct syscall_arg *arg); 360 361 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd 362 363 size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg) 364 { 365 return scnprintf(bf, size, "%#lx", arg->val); 366 } 367 368 size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg) 369 { 370 return scnprintf(bf, size, "%d", arg->val); 371 } 372 373 size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg) 374 { 375 return scnprintf(bf, size, "%ld", arg->val); 376 } 377 378 static const char *bpf_cmd[] = { 379 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM", 380 "MAP_GET_NEXT_KEY", "PROG_LOAD", 381 }; 382 static DEFINE_STRARRAY(bpf_cmd); 383 384 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", }; 385 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1); 386 387 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", }; 388 static DEFINE_STRARRAY(itimers); 389 390 static const char *keyctl_options[] = { 391 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN", 392 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ", 393 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT", 394 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT", 395 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT", 396 }; 397 static DEFINE_STRARRAY(keyctl_options); 398 399 static const char *whences[] = { "SET", "CUR", "END", 400 #ifdef SEEK_DATA 401 "DATA", 402 #endif 403 #ifdef SEEK_HOLE 404 "HOLE", 405 #endif 406 }; 407 static DEFINE_STRARRAY(whences); 408 409 static const char *fcntl_cmds[] = { 410 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK", 411 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "GETLK64", 412 "SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX", 413 "GETOWNER_UIDS", 414 }; 415 static DEFINE_STRARRAY(fcntl_cmds); 416 417 static const char *fcntl_linux_specific_cmds[] = { 418 "SETLEASE", "GETLEASE", "NOTIFY", [5] = "CANCELLK", "DUPFD_CLOEXEC", 419 "SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS", 420 "GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT", 421 }; 422 423 static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, F_LINUX_SPECIFIC_BASE); 424 425 static struct strarray *fcntl_cmds_arrays[] = { 426 &strarray__fcntl_cmds, 427 &strarray__fcntl_linux_specific_cmds, 428 }; 429 430 static DEFINE_STRARRAYS(fcntl_cmds_arrays); 431 432 static const char *rlimit_resources[] = { 433 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE", 434 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO", 435 "RTTIME", 436 }; 437 static DEFINE_STRARRAY(rlimit_resources); 438 439 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", }; 440 static DEFINE_STRARRAY(sighow); 441 442 static const char *clockid[] = { 443 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID", 444 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME", 445 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI" 446 }; 447 static DEFINE_STRARRAY(clockid); 448 449 static const char *socket_families[] = { 450 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM", 451 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI", 452 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC", 453 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC", 454 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF", 455 "ALG", "NFC", "VSOCK", 456 }; 457 static DEFINE_STRARRAY(socket_families); 458 459 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size, 460 struct syscall_arg *arg) 461 { 462 size_t printed = 0; 463 int mode = arg->val; 464 465 if (mode == F_OK) /* 0 */ 466 return scnprintf(bf, size, "F"); 467 #define P_MODE(n) \ 468 if (mode & n##_OK) { \ 469 printed += scnprintf(bf + printed, size - printed, "%s", #n); \ 470 mode &= ~n##_OK; \ 471 } 472 473 P_MODE(R); 474 P_MODE(W); 475 P_MODE(X); 476 #undef P_MODE 477 478 if (mode) 479 printed += scnprintf(bf + printed, size - printed, "|%#x", mode); 480 481 return printed; 482 } 483 484 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode 485 486 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size, 487 struct syscall_arg *arg); 488 489 #define SCA_FILENAME syscall_arg__scnprintf_filename 490 491 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size, 492 struct syscall_arg *arg) 493 { 494 int printed = 0, flags = arg->val; 495 496 #define P_FLAG(n) \ 497 if (flags & O_##n) { \ 498 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 499 flags &= ~O_##n; \ 500 } 501 502 P_FLAG(CLOEXEC); 503 P_FLAG(NONBLOCK); 504 #undef P_FLAG 505 506 if (flags) 507 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 508 509 return printed; 510 } 511 512 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags 513 514 #ifndef GRND_NONBLOCK 515 #define GRND_NONBLOCK 0x0001 516 #endif 517 #ifndef GRND_RANDOM 518 #define GRND_RANDOM 0x0002 519 #endif 520 521 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size, 522 struct syscall_arg *arg) 523 { 524 int printed = 0, flags = arg->val; 525 526 #define P_FLAG(n) \ 527 if (flags & GRND_##n) { \ 528 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 529 flags &= ~GRND_##n; \ 530 } 531 532 P_FLAG(RANDOM); 533 P_FLAG(NONBLOCK); 534 #undef P_FLAG 535 536 if (flags) 537 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 538 539 return printed; 540 } 541 542 #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags 543 544 #define STRARRAY(name, array) \ 545 { .scnprintf = SCA_STRARRAY, \ 546 .parm = &strarray__##array, } 547 548 #include "trace/beauty/arch_errno_names.c" 549 #include "trace/beauty/eventfd.c" 550 #include "trace/beauty/flock.c" 551 #include "trace/beauty/futex_op.c" 552 #include "trace/beauty/mmap.c" 553 #include "trace/beauty/mode_t.c" 554 #include "trace/beauty/msg_flags.c" 555 #include "trace/beauty/open_flags.c" 556 #include "trace/beauty/perf_event_open.c" 557 #include "trace/beauty/pid.c" 558 #include "trace/beauty/sched_policy.c" 559 #include "trace/beauty/seccomp.c" 560 #include "trace/beauty/signum.c" 561 #include "trace/beauty/socket_type.c" 562 #include "trace/beauty/waitid_options.c" 563 564 struct syscall_arg_fmt { 565 size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg); 566 void *parm; 567 const char *name; 568 bool show_zero; 569 }; 570 571 static struct syscall_fmt { 572 const char *name; 573 const char *alias; 574 struct syscall_arg_fmt arg[6]; 575 u8 nr_args; 576 bool errpid; 577 bool timeout; 578 bool hexret; 579 } syscall_fmts[] = { 580 { .name = "access", 581 .arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, }, 582 { .name = "bpf", 583 .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, }, 584 { .name = "brk", .hexret = true, 585 .arg = { [0] = { .scnprintf = SCA_HEX, /* brk */ }, }, }, 586 { .name = "clock_gettime", 587 .arg = { [0] = STRARRAY(clk_id, clockid), }, }, 588 { .name = "clone", .errpid = true, .nr_args = 5, 589 .arg = { [0] = { .name = "flags", .scnprintf = SCA_CLONE_FLAGS, }, 590 [1] = { .name = "child_stack", .scnprintf = SCA_HEX, }, 591 [2] = { .name = "parent_tidptr", .scnprintf = SCA_HEX, }, 592 [3] = { .name = "child_tidptr", .scnprintf = SCA_HEX, }, 593 [4] = { .name = "tls", .scnprintf = SCA_HEX, }, }, }, 594 { .name = "close", 595 .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, }, 596 { .name = "epoll_ctl", 597 .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, }, 598 { .name = "eventfd2", 599 .arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, /* flags */ }, }, }, 600 { .name = "fchmodat", 601 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, 602 { .name = "fchownat", 603 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, 604 { .name = "fcntl", 605 .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD, /* cmd */ 606 .parm = &strarrays__fcntl_cmds_arrays, 607 .show_zero = true, }, 608 [2] = { .scnprintf = SCA_FCNTL_ARG, /* arg */ }, }, }, 609 { .name = "flock", 610 .arg = { [1] = { .scnprintf = SCA_FLOCK, /* cmd */ }, }, }, 611 { .name = "fstat", .alias = "newfstat", }, 612 { .name = "fstatat", .alias = "newfstatat", }, 613 { .name = "futex", 614 .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, /* op */ }, }, }, 615 { .name = "futimesat", 616 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, 617 { .name = "getitimer", 618 .arg = { [0] = STRARRAY(which, itimers), }, }, 619 { .name = "getpid", .errpid = true, }, 620 { .name = "getpgid", .errpid = true, }, 621 { .name = "getppid", .errpid = true, }, 622 { .name = "getrandom", 623 .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, }, 624 { .name = "getrlimit", 625 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, }, 626 { .name = "gettid", .errpid = true, }, 627 { .name = "ioctl", 628 .arg = { 629 #if defined(__i386__) || defined(__x86_64__) 630 /* 631 * FIXME: Make this available to all arches. 632 */ 633 [1] = { .scnprintf = SCA_IOCTL_CMD, /* cmd */ }, 634 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, }, 635 #else 636 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, }, 637 #endif 638 { .name = "kcmp", .nr_args = 5, 639 .arg = { [0] = { .name = "pid1", .scnprintf = SCA_PID, }, 640 [1] = { .name = "pid2", .scnprintf = SCA_PID, }, 641 [2] = { .name = "type", .scnprintf = SCA_KCMP_TYPE, }, 642 [3] = { .name = "idx1", .scnprintf = SCA_KCMP_IDX, }, 643 [4] = { .name = "idx2", .scnprintf = SCA_KCMP_IDX, }, }, }, 644 { .name = "keyctl", 645 .arg = { [0] = STRARRAY(option, keyctl_options), }, }, 646 { .name = "kill", 647 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, 648 { .name = "linkat", 649 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, 650 { .name = "lseek", 651 .arg = { [2] = STRARRAY(whence, whences), }, }, 652 { .name = "lstat", .alias = "newlstat", }, 653 { .name = "madvise", 654 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ }, 655 [2] = { .scnprintf = SCA_MADV_BHV, /* behavior */ }, }, }, 656 { .name = "mkdirat", 657 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, 658 { .name = "mknodat", 659 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, 660 { .name = "mlock", 661 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, }, 662 { .name = "mlockall", 663 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, }, 664 { .name = "mmap", .hexret = true, 665 /* The standard mmap maps to old_mmap on s390x */ 666 #if defined(__s390x__) 667 .alias = "old_mmap", 668 #endif 669 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, 670 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, 671 [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ }, }, }, 672 { .name = "mprotect", 673 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ }, 674 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, }, 675 { .name = "mq_unlink", 676 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* u_name */ }, }, }, 677 { .name = "mremap", .hexret = true, 678 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, 679 [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ }, 680 [4] = { .scnprintf = SCA_HEX, /* new_addr */ }, }, }, 681 { .name = "munlock", 682 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, }, 683 { .name = "munmap", 684 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, }, 685 { .name = "name_to_handle_at", 686 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, 687 { .name = "newfstatat", 688 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, 689 { .name = "open", 690 .arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, }, 691 { .name = "open_by_handle_at", 692 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, 693 [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, }, 694 { .name = "openat", 695 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, 696 [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, }, 697 { .name = "perf_event_open", 698 .arg = { [2] = { .scnprintf = SCA_INT, /* cpu */ }, 699 [3] = { .scnprintf = SCA_FD, /* group_fd */ }, 700 [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, }, 701 { .name = "pipe2", 702 .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, /* flags */ }, }, }, 703 { .name = "pkey_alloc", 704 .arg = { [1] = { .scnprintf = SCA_PKEY_ALLOC_ACCESS_RIGHTS, /* access_rights */ }, }, }, 705 { .name = "pkey_free", 706 .arg = { [0] = { .scnprintf = SCA_INT, /* key */ }, }, }, 707 { .name = "pkey_mprotect", 708 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ }, 709 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, 710 [3] = { .scnprintf = SCA_INT, /* pkey */ }, }, }, 711 { .name = "poll", .timeout = true, }, 712 { .name = "ppoll", .timeout = true, }, 713 { .name = "prctl", .alias = "arch_prctl", 714 .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, /* option */ }, 715 [1] = { .scnprintf = SCA_PRCTL_ARG2, /* arg2 */ }, 716 [2] = { .scnprintf = SCA_PRCTL_ARG3, /* arg3 */ }, }, }, 717 { .name = "pread", .alias = "pread64", }, 718 { .name = "preadv", .alias = "pread", }, 719 { .name = "prlimit64", 720 .arg = { [1] = STRARRAY(resource, rlimit_resources), }, }, 721 { .name = "pwrite", .alias = "pwrite64", }, 722 { .name = "readlinkat", 723 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, 724 { .name = "recvfrom", 725 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, 726 { .name = "recvmmsg", 727 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, 728 { .name = "recvmsg", 729 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, 730 { .name = "renameat", 731 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, 732 { .name = "rt_sigaction", 733 .arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, 734 { .name = "rt_sigprocmask", 735 .arg = { [0] = STRARRAY(how, sighow), }, }, 736 { .name = "rt_sigqueueinfo", 737 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, 738 { .name = "rt_tgsigqueueinfo", 739 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, 740 { .name = "sched_setscheduler", 741 .arg = { [1] = { .scnprintf = SCA_SCHED_POLICY, /* policy */ }, }, }, 742 { .name = "seccomp", 743 .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP, /* op */ }, 744 [1] = { .scnprintf = SCA_SECCOMP_FLAGS, /* flags */ }, }, }, 745 { .name = "select", .timeout = true, }, 746 { .name = "sendmmsg", 747 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, 748 { .name = "sendmsg", 749 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, 750 { .name = "sendto", 751 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, 752 { .name = "set_tid_address", .errpid = true, }, 753 { .name = "setitimer", 754 .arg = { [0] = STRARRAY(which, itimers), }, }, 755 { .name = "setrlimit", 756 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, }, 757 { .name = "socket", 758 .arg = { [0] = STRARRAY(family, socket_families), 759 [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, }, 760 { .name = "socketpair", 761 .arg = { [0] = STRARRAY(family, socket_families), 762 [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, }, 763 { .name = "stat", .alias = "newstat", }, 764 { .name = "statx", 765 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fdat */ }, 766 [2] = { .scnprintf = SCA_STATX_FLAGS, /* flags */ } , 767 [3] = { .scnprintf = SCA_STATX_MASK, /* mask */ }, }, }, 768 { .name = "swapoff", 769 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, }, 770 { .name = "swapon", 771 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, }, 772 { .name = "symlinkat", 773 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, 774 { .name = "tgkill", 775 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, 776 { .name = "tkill", 777 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, 778 { .name = "uname", .alias = "newuname", }, 779 { .name = "unlinkat", 780 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, 781 { .name = "utimensat", 782 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ }, }, }, 783 { .name = "wait4", .errpid = true, 784 .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, }, 785 { .name = "waitid", .errpid = true, 786 .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, }, 787 }; 788 789 static int syscall_fmt__cmp(const void *name, const void *fmtp) 790 { 791 const struct syscall_fmt *fmt = fmtp; 792 return strcmp(name, fmt->name); 793 } 794 795 static struct syscall_fmt *syscall_fmt__find(const char *name) 796 { 797 const int nmemb = ARRAY_SIZE(syscall_fmts); 798 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp); 799 } 800 801 struct syscall { 802 struct event_format *tp_format; 803 int nr_args; 804 struct format_field *args; 805 const char *name; 806 bool is_exit; 807 struct syscall_fmt *fmt; 808 struct syscall_arg_fmt *arg_fmt; 809 }; 810 811 /* 812 * We need to have this 'calculated' boolean because in some cases we really 813 * don't know what is the duration of a syscall, for instance, when we start 814 * a session and some threads are waiting for a syscall to finish, say 'poll', 815 * in which case all we can do is to print "( ? ) for duration and for the 816 * start timestamp. 817 */ 818 static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp) 819 { 820 double duration = (double)t / NSEC_PER_MSEC; 821 size_t printed = fprintf(fp, "("); 822 823 if (!calculated) 824 printed += fprintf(fp, " ? "); 825 else if (duration >= 1.0) 826 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration); 827 else if (duration >= 0.01) 828 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration); 829 else 830 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration); 831 return printed + fprintf(fp, "): "); 832 } 833 834 /** 835 * filename.ptr: The filename char pointer that will be vfs_getname'd 836 * filename.entry_str_pos: Where to insert the string translated from 837 * filename.ptr by the vfs_getname tracepoint/kprobe. 838 * ret_scnprintf: syscall args may set this to a different syscall return 839 * formatter, for instance, fcntl may return fds, file flags, etc. 840 */ 841 struct thread_trace { 842 u64 entry_time; 843 bool entry_pending; 844 unsigned long nr_events; 845 unsigned long pfmaj, pfmin; 846 char *entry_str; 847 double runtime_ms; 848 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg); 849 struct { 850 unsigned long ptr; 851 short int entry_str_pos; 852 bool pending_open; 853 unsigned int namelen; 854 char *name; 855 } filename; 856 struct { 857 int max; 858 char **table; 859 } paths; 860 861 struct intlist *syscall_stats; 862 }; 863 864 static struct thread_trace *thread_trace__new(void) 865 { 866 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace)); 867 868 if (ttrace) 869 ttrace->paths.max = -1; 870 871 ttrace->syscall_stats = intlist__new(NULL); 872 873 return ttrace; 874 } 875 876 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp) 877 { 878 struct thread_trace *ttrace; 879 880 if (thread == NULL) 881 goto fail; 882 883 if (thread__priv(thread) == NULL) 884 thread__set_priv(thread, thread_trace__new()); 885 886 if (thread__priv(thread) == NULL) 887 goto fail; 888 889 ttrace = thread__priv(thread); 890 ++ttrace->nr_events; 891 892 return ttrace; 893 fail: 894 color_fprintf(fp, PERF_COLOR_RED, 895 "WARNING: not enough memory, dropping samples!\n"); 896 return NULL; 897 } 898 899 900 void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg, 901 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg)) 902 { 903 struct thread_trace *ttrace = thread__priv(arg->thread); 904 905 ttrace->ret_scnprintf = ret_scnprintf; 906 } 907 908 #define TRACE_PFMAJ (1 << 0) 909 #define TRACE_PFMIN (1 << 1) 910 911 static const size_t trace__entry_str_size = 2048; 912 913 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname) 914 { 915 struct thread_trace *ttrace = thread__priv(thread); 916 917 if (fd > ttrace->paths.max) { 918 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *)); 919 920 if (npath == NULL) 921 return -1; 922 923 if (ttrace->paths.max != -1) { 924 memset(npath + ttrace->paths.max + 1, 0, 925 (fd - ttrace->paths.max) * sizeof(char *)); 926 } else { 927 memset(npath, 0, (fd + 1) * sizeof(char *)); 928 } 929 930 ttrace->paths.table = npath; 931 ttrace->paths.max = fd; 932 } 933 934 ttrace->paths.table[fd] = strdup(pathname); 935 936 return ttrace->paths.table[fd] != NULL ? 0 : -1; 937 } 938 939 static int thread__read_fd_path(struct thread *thread, int fd) 940 { 941 char linkname[PATH_MAX], pathname[PATH_MAX]; 942 struct stat st; 943 int ret; 944 945 if (thread->pid_ == thread->tid) { 946 scnprintf(linkname, sizeof(linkname), 947 "/proc/%d/fd/%d", thread->pid_, fd); 948 } else { 949 scnprintf(linkname, sizeof(linkname), 950 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd); 951 } 952 953 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname)) 954 return -1; 955 956 ret = readlink(linkname, pathname, sizeof(pathname)); 957 958 if (ret < 0 || ret > st.st_size) 959 return -1; 960 961 pathname[ret] = '\0'; 962 return trace__set_fd_pathname(thread, fd, pathname); 963 } 964 965 static const char *thread__fd_path(struct thread *thread, int fd, 966 struct trace *trace) 967 { 968 struct thread_trace *ttrace = thread__priv(thread); 969 970 if (ttrace == NULL) 971 return NULL; 972 973 if (fd < 0) 974 return NULL; 975 976 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) { 977 if (!trace->live) 978 return NULL; 979 ++trace->stats.proc_getname; 980 if (thread__read_fd_path(thread, fd)) 981 return NULL; 982 } 983 984 return ttrace->paths.table[fd]; 985 } 986 987 size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg) 988 { 989 int fd = arg->val; 990 size_t printed = scnprintf(bf, size, "%d", fd); 991 const char *path = thread__fd_path(arg->thread, fd, arg->trace); 992 993 if (path) 994 printed += scnprintf(bf + printed, size - printed, "<%s>", path); 995 996 return printed; 997 } 998 999 size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size) 1000 { 1001 size_t printed = scnprintf(bf, size, "%d", fd); 1002 struct thread *thread = machine__find_thread(trace->host, pid, pid); 1003 1004 if (thread) { 1005 const char *path = thread__fd_path(thread, fd, trace); 1006 1007 if (path) 1008 printed += scnprintf(bf + printed, size - printed, "<%s>", path); 1009 1010 thread__put(thread); 1011 } 1012 1013 return printed; 1014 } 1015 1016 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, 1017 struct syscall_arg *arg) 1018 { 1019 int fd = arg->val; 1020 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg); 1021 struct thread_trace *ttrace = thread__priv(arg->thread); 1022 1023 if (ttrace && fd >= 0 && fd <= ttrace->paths.max) 1024 zfree(&ttrace->paths.table[fd]); 1025 1026 return printed; 1027 } 1028 1029 static void thread__set_filename_pos(struct thread *thread, const char *bf, 1030 unsigned long ptr) 1031 { 1032 struct thread_trace *ttrace = thread__priv(thread); 1033 1034 ttrace->filename.ptr = ptr; 1035 ttrace->filename.entry_str_pos = bf - ttrace->entry_str; 1036 } 1037 1038 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size, 1039 struct syscall_arg *arg) 1040 { 1041 unsigned long ptr = arg->val; 1042 1043 if (!arg->trace->vfs_getname) 1044 return scnprintf(bf, size, "%#x", ptr); 1045 1046 thread__set_filename_pos(arg->thread, bf, ptr); 1047 return 0; 1048 } 1049 1050 static bool trace__filter_duration(struct trace *trace, double t) 1051 { 1052 return t < (trace->duration_filter * NSEC_PER_MSEC); 1053 } 1054 1055 static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) 1056 { 1057 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC; 1058 1059 return fprintf(fp, "%10.3f ", ts); 1060 } 1061 1062 /* 1063 * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are 1064 * using ttrace->entry_time for a thread that receives a sys_exit without 1065 * first having received a sys_enter ("poll" issued before tracing session 1066 * starts, lost sys_enter exit due to ring buffer overflow). 1067 */ 1068 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) 1069 { 1070 if (tstamp > 0) 1071 return __trace__fprintf_tstamp(trace, tstamp, fp); 1072 1073 return fprintf(fp, " ? "); 1074 } 1075 1076 static bool done = false; 1077 static bool interrupted = false; 1078 1079 static void sig_handler(int sig) 1080 { 1081 done = true; 1082 interrupted = sig == SIGINT; 1083 } 1084 1085 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread, 1086 u64 duration, bool duration_calculated, u64 tstamp, FILE *fp) 1087 { 1088 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp); 1089 printed += fprintf_duration(duration, duration_calculated, fp); 1090 1091 if (trace->multiple_threads) { 1092 if (trace->show_comm) 1093 printed += fprintf(fp, "%.14s/", thread__comm_str(thread)); 1094 printed += fprintf(fp, "%d ", thread->tid); 1095 } 1096 1097 return printed; 1098 } 1099 1100 static int trace__process_event(struct trace *trace, struct machine *machine, 1101 union perf_event *event, struct perf_sample *sample) 1102 { 1103 int ret = 0; 1104 1105 switch (event->header.type) { 1106 case PERF_RECORD_LOST: 1107 color_fprintf(trace->output, PERF_COLOR_RED, 1108 "LOST %" PRIu64 " events!\n", event->lost.lost); 1109 ret = machine__process_lost_event(machine, event, sample); 1110 break; 1111 default: 1112 ret = machine__process_event(machine, event, sample); 1113 break; 1114 } 1115 1116 return ret; 1117 } 1118 1119 static int trace__tool_process(struct perf_tool *tool, 1120 union perf_event *event, 1121 struct perf_sample *sample, 1122 struct machine *machine) 1123 { 1124 struct trace *trace = container_of(tool, struct trace, tool); 1125 return trace__process_event(trace, machine, event, sample); 1126 } 1127 1128 static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp) 1129 { 1130 struct machine *machine = vmachine; 1131 1132 if (machine->kptr_restrict_warned) 1133 return NULL; 1134 1135 if (symbol_conf.kptr_restrict) { 1136 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n" 1137 "Check /proc/sys/kernel/kptr_restrict.\n\n" 1138 "Kernel samples will not be resolved.\n"); 1139 machine->kptr_restrict_warned = true; 1140 return NULL; 1141 } 1142 1143 return machine__resolve_kernel_addr(vmachine, addrp, modp); 1144 } 1145 1146 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) 1147 { 1148 int err = symbol__init(NULL); 1149 1150 if (err) 1151 return err; 1152 1153 trace->host = machine__new_host(); 1154 if (trace->host == NULL) 1155 return -ENOMEM; 1156 1157 err = trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr); 1158 if (err < 0) 1159 goto out; 1160 1161 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, 1162 evlist->threads, trace__tool_process, false, 1163 trace->opts.proc_map_timeout, 1); 1164 out: 1165 if (err) 1166 symbol__exit(); 1167 1168 return err; 1169 } 1170 1171 static void trace__symbols__exit(struct trace *trace) 1172 { 1173 machine__exit(trace->host); 1174 trace->host = NULL; 1175 1176 symbol__exit(); 1177 } 1178 1179 static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args) 1180 { 1181 int idx; 1182 1183 if (nr_args == 6 && sc->fmt && sc->fmt->nr_args != 0) 1184 nr_args = sc->fmt->nr_args; 1185 1186 sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt)); 1187 if (sc->arg_fmt == NULL) 1188 return -1; 1189 1190 for (idx = 0; idx < nr_args; ++idx) { 1191 if (sc->fmt) 1192 sc->arg_fmt[idx] = sc->fmt->arg[idx]; 1193 } 1194 1195 sc->nr_args = nr_args; 1196 return 0; 1197 } 1198 1199 static int syscall__set_arg_fmts(struct syscall *sc) 1200 { 1201 struct format_field *field; 1202 int idx = 0, len; 1203 1204 for (field = sc->args; field; field = field->next, ++idx) { 1205 if (sc->fmt && sc->fmt->arg[idx].scnprintf) 1206 continue; 1207 1208 if (strcmp(field->type, "const char *") == 0 && 1209 (strcmp(field->name, "filename") == 0 || 1210 strcmp(field->name, "path") == 0 || 1211 strcmp(field->name, "pathname") == 0)) 1212 sc->arg_fmt[idx].scnprintf = SCA_FILENAME; 1213 else if (field->flags & FIELD_IS_POINTER) 1214 sc->arg_fmt[idx].scnprintf = syscall_arg__scnprintf_hex; 1215 else if (strcmp(field->type, "pid_t") == 0) 1216 sc->arg_fmt[idx].scnprintf = SCA_PID; 1217 else if (strcmp(field->type, "umode_t") == 0) 1218 sc->arg_fmt[idx].scnprintf = SCA_MODE_T; 1219 else if ((strcmp(field->type, "int") == 0 || 1220 strcmp(field->type, "unsigned int") == 0 || 1221 strcmp(field->type, "long") == 0) && 1222 (len = strlen(field->name)) >= 2 && 1223 strcmp(field->name + len - 2, "fd") == 0) { 1224 /* 1225 * /sys/kernel/tracing/events/syscalls/sys_enter* 1226 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c 1227 * 65 int 1228 * 23 unsigned int 1229 * 7 unsigned long 1230 */ 1231 sc->arg_fmt[idx].scnprintf = SCA_FD; 1232 } 1233 } 1234 1235 return 0; 1236 } 1237 1238 static int trace__read_syscall_info(struct trace *trace, int id) 1239 { 1240 char tp_name[128]; 1241 struct syscall *sc; 1242 const char *name = syscalltbl__name(trace->sctbl, id); 1243 1244 if (name == NULL) 1245 return -1; 1246 1247 if (id > trace->syscalls.max) { 1248 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc)); 1249 1250 if (nsyscalls == NULL) 1251 return -1; 1252 1253 if (trace->syscalls.max != -1) { 1254 memset(nsyscalls + trace->syscalls.max + 1, 0, 1255 (id - trace->syscalls.max) * sizeof(*sc)); 1256 } else { 1257 memset(nsyscalls, 0, (id + 1) * sizeof(*sc)); 1258 } 1259 1260 trace->syscalls.table = nsyscalls; 1261 trace->syscalls.max = id; 1262 } 1263 1264 sc = trace->syscalls.table + id; 1265 sc->name = name; 1266 1267 sc->fmt = syscall_fmt__find(sc->name); 1268 1269 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); 1270 sc->tp_format = trace_event__tp_format("syscalls", tp_name); 1271 1272 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) { 1273 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias); 1274 sc->tp_format = trace_event__tp_format("syscalls", tp_name); 1275 } 1276 1277 if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields)) 1278 return -1; 1279 1280 if (IS_ERR(sc->tp_format)) 1281 return -1; 1282 1283 sc->args = sc->tp_format->format.fields; 1284 /* 1285 * We need to check and discard the first variable '__syscall_nr' 1286 * or 'nr' that mean the syscall number. It is needless here. 1287 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels. 1288 */ 1289 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) { 1290 sc->args = sc->args->next; 1291 --sc->nr_args; 1292 } 1293 1294 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit"); 1295 1296 return syscall__set_arg_fmts(sc); 1297 } 1298 1299 static int trace__validate_ev_qualifier(struct trace *trace) 1300 { 1301 int err = 0, i; 1302 size_t nr_allocated; 1303 struct str_node *pos; 1304 1305 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier); 1306 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr * 1307 sizeof(trace->ev_qualifier_ids.entries[0])); 1308 1309 if (trace->ev_qualifier_ids.entries == NULL) { 1310 fputs("Error:\tNot enough memory for allocating events qualifier ids\n", 1311 trace->output); 1312 err = -EINVAL; 1313 goto out; 1314 } 1315 1316 nr_allocated = trace->ev_qualifier_ids.nr; 1317 i = 0; 1318 1319 strlist__for_each_entry(pos, trace->ev_qualifier) { 1320 const char *sc = pos->s; 1321 int id = syscalltbl__id(trace->sctbl, sc), match_next = -1; 1322 1323 if (id < 0) { 1324 id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next); 1325 if (id >= 0) 1326 goto matches; 1327 1328 if (err == 0) { 1329 fputs("Error:\tInvalid syscall ", trace->output); 1330 err = -EINVAL; 1331 } else { 1332 fputs(", ", trace->output); 1333 } 1334 1335 fputs(sc, trace->output); 1336 } 1337 matches: 1338 trace->ev_qualifier_ids.entries[i++] = id; 1339 if (match_next == -1) 1340 continue; 1341 1342 while (1) { 1343 id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next); 1344 if (id < 0) 1345 break; 1346 if (nr_allocated == trace->ev_qualifier_ids.nr) { 1347 void *entries; 1348 1349 nr_allocated += 8; 1350 entries = realloc(trace->ev_qualifier_ids.entries, 1351 nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0])); 1352 if (entries == NULL) { 1353 err = -ENOMEM; 1354 fputs("\nError:\t Not enough memory for parsing\n", trace->output); 1355 goto out_free; 1356 } 1357 trace->ev_qualifier_ids.entries = entries; 1358 } 1359 trace->ev_qualifier_ids.nr++; 1360 trace->ev_qualifier_ids.entries[i++] = id; 1361 } 1362 } 1363 1364 if (err < 0) { 1365 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'" 1366 "\nHint:\tand: 'man syscalls'\n", trace->output); 1367 out_free: 1368 zfree(&trace->ev_qualifier_ids.entries); 1369 trace->ev_qualifier_ids.nr = 0; 1370 } 1371 out: 1372 return err; 1373 } 1374 1375 /* 1376 * args is to be interpreted as a series of longs but we need to handle 1377 * 8-byte unaligned accesses. args points to raw_data within the event 1378 * and raw_data is guaranteed to be 8-byte unaligned because it is 1379 * preceded by raw_size which is a u32. So we need to copy args to a temp 1380 * variable to read it. Most notably this avoids extended load instructions 1381 * on unaligned addresses 1382 */ 1383 unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx) 1384 { 1385 unsigned long val; 1386 unsigned char *p = arg->args + sizeof(unsigned long) * idx; 1387 1388 memcpy(&val, p, sizeof(val)); 1389 return val; 1390 } 1391 1392 static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size, 1393 struct syscall_arg *arg) 1394 { 1395 if (sc->arg_fmt && sc->arg_fmt[arg->idx].name) 1396 return scnprintf(bf, size, "%s: ", sc->arg_fmt[arg->idx].name); 1397 1398 return scnprintf(bf, size, "arg%d: ", arg->idx); 1399 } 1400 1401 static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size, 1402 struct syscall_arg *arg, unsigned long val) 1403 { 1404 if (sc->arg_fmt && sc->arg_fmt[arg->idx].scnprintf) { 1405 arg->val = val; 1406 if (sc->arg_fmt[arg->idx].parm) 1407 arg->parm = sc->arg_fmt[arg->idx].parm; 1408 return sc->arg_fmt[arg->idx].scnprintf(bf, size, arg); 1409 } 1410 return scnprintf(bf, size, "%ld", val); 1411 } 1412 1413 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, 1414 unsigned char *args, struct trace *trace, 1415 struct thread *thread) 1416 { 1417 size_t printed = 0; 1418 unsigned long val; 1419 u8 bit = 1; 1420 struct syscall_arg arg = { 1421 .args = args, 1422 .idx = 0, 1423 .mask = 0, 1424 .trace = trace, 1425 .thread = thread, 1426 }; 1427 struct thread_trace *ttrace = thread__priv(thread); 1428 1429 /* 1430 * Things like fcntl will set this in its 'cmd' formatter to pick the 1431 * right formatter for the return value (an fd? file flags?), which is 1432 * not needed for syscalls that always return a given type, say an fd. 1433 */ 1434 ttrace->ret_scnprintf = NULL; 1435 1436 if (sc->args != NULL) { 1437 struct format_field *field; 1438 1439 for (field = sc->args; field; 1440 field = field->next, ++arg.idx, bit <<= 1) { 1441 if (arg.mask & bit) 1442 continue; 1443 1444 val = syscall_arg__val(&arg, arg.idx); 1445 1446 /* 1447 * Suppress this argument if its value is zero and 1448 * and we don't have a string associated in an 1449 * strarray for it. 1450 */ 1451 if (val == 0 && 1452 !(sc->arg_fmt && 1453 (sc->arg_fmt[arg.idx].show_zero || 1454 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY || 1455 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAYS) && 1456 sc->arg_fmt[arg.idx].parm)) 1457 continue; 1458 1459 printed += scnprintf(bf + printed, size - printed, 1460 "%s%s: ", printed ? ", " : "", field->name); 1461 printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val); 1462 } 1463 } else if (IS_ERR(sc->tp_format)) { 1464 /* 1465 * If we managed to read the tracepoint /format file, then we 1466 * may end up not having any args, like with gettid(), so only 1467 * print the raw args when we didn't manage to read it. 1468 */ 1469 while (arg.idx < sc->nr_args) { 1470 if (arg.mask & bit) 1471 goto next_arg; 1472 val = syscall_arg__val(&arg, arg.idx); 1473 if (printed) 1474 printed += scnprintf(bf + printed, size - printed, ", "); 1475 printed += syscall__scnprintf_name(sc, bf + printed, size - printed, &arg); 1476 printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val); 1477 next_arg: 1478 ++arg.idx; 1479 bit <<= 1; 1480 } 1481 } 1482 1483 return printed; 1484 } 1485 1486 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel, 1487 union perf_event *event, 1488 struct perf_sample *sample); 1489 1490 static struct syscall *trace__syscall_info(struct trace *trace, 1491 struct perf_evsel *evsel, int id) 1492 { 1493 1494 if (id < 0) { 1495 1496 /* 1497 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried 1498 * before that, leaving at a higher verbosity level till that is 1499 * explained. Reproduced with plain ftrace with: 1500 * 1501 * echo 1 > /t/events/raw_syscalls/sys_exit/enable 1502 * grep "NR -1 " /t/trace_pipe 1503 * 1504 * After generating some load on the machine. 1505 */ 1506 if (verbose > 1) { 1507 static u64 n; 1508 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n", 1509 id, perf_evsel__name(evsel), ++n); 1510 } 1511 return NULL; 1512 } 1513 1514 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) && 1515 trace__read_syscall_info(trace, id)) 1516 goto out_cant_read; 1517 1518 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL)) 1519 goto out_cant_read; 1520 1521 return &trace->syscalls.table[id]; 1522 1523 out_cant_read: 1524 if (verbose > 0) { 1525 fprintf(trace->output, "Problems reading syscall %d", id); 1526 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL) 1527 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name); 1528 fputs(" information\n", trace->output); 1529 } 1530 return NULL; 1531 } 1532 1533 static void thread__update_stats(struct thread_trace *ttrace, 1534 int id, struct perf_sample *sample) 1535 { 1536 struct int_node *inode; 1537 struct stats *stats; 1538 u64 duration = 0; 1539 1540 inode = intlist__findnew(ttrace->syscall_stats, id); 1541 if (inode == NULL) 1542 return; 1543 1544 stats = inode->priv; 1545 if (stats == NULL) { 1546 stats = malloc(sizeof(struct stats)); 1547 if (stats == NULL) 1548 return; 1549 init_stats(stats); 1550 inode->priv = stats; 1551 } 1552 1553 if (ttrace->entry_time && sample->time > ttrace->entry_time) 1554 duration = sample->time - ttrace->entry_time; 1555 1556 update_stats(stats, duration); 1557 } 1558 1559 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample) 1560 { 1561 struct thread_trace *ttrace; 1562 u64 duration; 1563 size_t printed; 1564 1565 if (trace->current == NULL) 1566 return 0; 1567 1568 ttrace = thread__priv(trace->current); 1569 1570 if (!ttrace->entry_pending) 1571 return 0; 1572 1573 duration = sample->time - ttrace->entry_time; 1574 1575 printed = trace__fprintf_entry_head(trace, trace->current, duration, true, ttrace->entry_time, trace->output); 1576 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str); 1577 ttrace->entry_pending = false; 1578 1579 return printed; 1580 } 1581 1582 static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel, 1583 struct perf_sample *sample, struct thread *thread) 1584 { 1585 int printed = 0; 1586 1587 if (trace->print_sample) { 1588 double ts = (double)sample->time / NSEC_PER_MSEC; 1589 1590 printed += fprintf(trace->output, "%22s %10.3f %s %d/%d [%d]\n", 1591 perf_evsel__name(evsel), ts, 1592 thread__comm_str(thread), 1593 sample->pid, sample->tid, sample->cpu); 1594 } 1595 1596 return printed; 1597 } 1598 1599 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, 1600 union perf_event *event __maybe_unused, 1601 struct perf_sample *sample) 1602 { 1603 char *msg; 1604 void *args; 1605 size_t printed = 0; 1606 struct thread *thread; 1607 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1; 1608 struct syscall *sc = trace__syscall_info(trace, evsel, id); 1609 struct thread_trace *ttrace; 1610 1611 if (sc == NULL) 1612 return -1; 1613 1614 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1615 ttrace = thread__trace(thread, trace->output); 1616 if (ttrace == NULL) 1617 goto out_put; 1618 1619 trace__fprintf_sample(trace, evsel, sample, thread); 1620 1621 args = perf_evsel__sc_tp_ptr(evsel, args, sample); 1622 1623 if (ttrace->entry_str == NULL) { 1624 ttrace->entry_str = malloc(trace__entry_str_size); 1625 if (!ttrace->entry_str) 1626 goto out_put; 1627 } 1628 1629 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) 1630 trace__printf_interrupted_entry(trace, sample); 1631 1632 ttrace->entry_time = sample->time; 1633 msg = ttrace->entry_str; 1634 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name); 1635 1636 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed, 1637 args, trace, thread); 1638 1639 if (sc->is_exit) { 1640 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) { 1641 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output); 1642 fprintf(trace->output, "%-70s)\n", ttrace->entry_str); 1643 } 1644 } else { 1645 ttrace->entry_pending = true; 1646 /* See trace__vfs_getname & trace__sys_exit */ 1647 ttrace->filename.pending_open = false; 1648 } 1649 1650 if (trace->current != thread) { 1651 thread__put(trace->current); 1652 trace->current = thread__get(thread); 1653 } 1654 err = 0; 1655 out_put: 1656 thread__put(thread); 1657 return err; 1658 } 1659 1660 static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel, 1661 struct perf_sample *sample, 1662 struct callchain_cursor *cursor) 1663 { 1664 struct addr_location al; 1665 1666 if (machine__resolve(trace->host, &al, sample) < 0 || 1667 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, evsel->attr.sample_max_stack)) 1668 return -1; 1669 1670 return 0; 1671 } 1672 1673 static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample) 1674 { 1675 /* TODO: user-configurable print_opts */ 1676 const unsigned int print_opts = EVSEL__PRINT_SYM | 1677 EVSEL__PRINT_DSO | 1678 EVSEL__PRINT_UNKNOWN_AS_ADDR; 1679 1680 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output); 1681 } 1682 1683 static const char *errno_to_name(struct perf_evsel *evsel, int err) 1684 { 1685 struct perf_env *env = perf_evsel__env(evsel); 1686 const char *arch_name = perf_env__arch(env); 1687 1688 return arch_syscalls__strerrno(arch_name, err); 1689 } 1690 1691 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, 1692 union perf_event *event __maybe_unused, 1693 struct perf_sample *sample) 1694 { 1695 long ret; 1696 u64 duration = 0; 1697 bool duration_calculated = false; 1698 struct thread *thread; 1699 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0; 1700 struct syscall *sc = trace__syscall_info(trace, evsel, id); 1701 struct thread_trace *ttrace; 1702 1703 if (sc == NULL) 1704 return -1; 1705 1706 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1707 ttrace = thread__trace(thread, trace->output); 1708 if (ttrace == NULL) 1709 goto out_put; 1710 1711 trace__fprintf_sample(trace, evsel, sample, thread); 1712 1713 if (trace->summary) 1714 thread__update_stats(ttrace, id, sample); 1715 1716 ret = perf_evsel__sc_tp_uint(evsel, ret, sample); 1717 1718 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) { 1719 trace__set_fd_pathname(thread, ret, ttrace->filename.name); 1720 ttrace->filename.pending_open = false; 1721 ++trace->stats.vfs_getname; 1722 } 1723 1724 if (ttrace->entry_time) { 1725 duration = sample->time - ttrace->entry_time; 1726 if (trace__filter_duration(trace, duration)) 1727 goto out; 1728 duration_calculated = true; 1729 } else if (trace->duration_filter) 1730 goto out; 1731 1732 if (sample->callchain) { 1733 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor); 1734 if (callchain_ret == 0) { 1735 if (callchain_cursor.nr < trace->min_stack) 1736 goto out; 1737 callchain_ret = 1; 1738 } 1739 } 1740 1741 if (trace->summary_only) 1742 goto out; 1743 1744 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output); 1745 1746 if (ttrace->entry_pending) { 1747 fprintf(trace->output, "%-70s", ttrace->entry_str); 1748 } else { 1749 fprintf(trace->output, " ... ["); 1750 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued"); 1751 fprintf(trace->output, "]: %s()", sc->name); 1752 } 1753 1754 if (sc->fmt == NULL) { 1755 if (ret < 0) 1756 goto errno_print; 1757 signed_print: 1758 fprintf(trace->output, ") = %ld", ret); 1759 } else if (ret < 0) { 1760 errno_print: { 1761 char bf[STRERR_BUFSIZE]; 1762 const char *emsg = str_error_r(-ret, bf, sizeof(bf)), 1763 *e = errno_to_name(evsel, -ret); 1764 1765 fprintf(trace->output, ") = -1 %s %s", e, emsg); 1766 } 1767 } else if (ret == 0 && sc->fmt->timeout) 1768 fprintf(trace->output, ") = 0 Timeout"); 1769 else if (ttrace->ret_scnprintf) { 1770 char bf[1024]; 1771 struct syscall_arg arg = { 1772 .val = ret, 1773 .thread = thread, 1774 .trace = trace, 1775 }; 1776 ttrace->ret_scnprintf(bf, sizeof(bf), &arg); 1777 ttrace->ret_scnprintf = NULL; 1778 fprintf(trace->output, ") = %s", bf); 1779 } else if (sc->fmt->hexret) 1780 fprintf(trace->output, ") = %#lx", ret); 1781 else if (sc->fmt->errpid) { 1782 struct thread *child = machine__find_thread(trace->host, ret, ret); 1783 1784 if (child != NULL) { 1785 fprintf(trace->output, ") = %ld", ret); 1786 if (child->comm_set) 1787 fprintf(trace->output, " (%s)", thread__comm_str(child)); 1788 thread__put(child); 1789 } 1790 } else 1791 goto signed_print; 1792 1793 fputc('\n', trace->output); 1794 1795 if (callchain_ret > 0) 1796 trace__fprintf_callchain(trace, sample); 1797 else if (callchain_ret < 0) 1798 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); 1799 out: 1800 ttrace->entry_pending = false; 1801 err = 0; 1802 out_put: 1803 thread__put(thread); 1804 return err; 1805 } 1806 1807 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, 1808 union perf_event *event __maybe_unused, 1809 struct perf_sample *sample) 1810 { 1811 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1812 struct thread_trace *ttrace; 1813 size_t filename_len, entry_str_len, to_move; 1814 ssize_t remaining_space; 1815 char *pos; 1816 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname"); 1817 1818 if (!thread) 1819 goto out; 1820 1821 ttrace = thread__priv(thread); 1822 if (!ttrace) 1823 goto out_put; 1824 1825 filename_len = strlen(filename); 1826 if (filename_len == 0) 1827 goto out_put; 1828 1829 if (ttrace->filename.namelen < filename_len) { 1830 char *f = realloc(ttrace->filename.name, filename_len + 1); 1831 1832 if (f == NULL) 1833 goto out_put; 1834 1835 ttrace->filename.namelen = filename_len; 1836 ttrace->filename.name = f; 1837 } 1838 1839 strcpy(ttrace->filename.name, filename); 1840 ttrace->filename.pending_open = true; 1841 1842 if (!ttrace->filename.ptr) 1843 goto out_put; 1844 1845 entry_str_len = strlen(ttrace->entry_str); 1846 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */ 1847 if (remaining_space <= 0) 1848 goto out_put; 1849 1850 if (filename_len > (size_t)remaining_space) { 1851 filename += filename_len - remaining_space; 1852 filename_len = remaining_space; 1853 } 1854 1855 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */ 1856 pos = ttrace->entry_str + ttrace->filename.entry_str_pos; 1857 memmove(pos + filename_len, pos, to_move); 1858 memcpy(pos, filename, filename_len); 1859 1860 ttrace->filename.ptr = 0; 1861 ttrace->filename.entry_str_pos = 0; 1862 out_put: 1863 thread__put(thread); 1864 out: 1865 return 0; 1866 } 1867 1868 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel, 1869 union perf_event *event __maybe_unused, 1870 struct perf_sample *sample) 1871 { 1872 u64 runtime = perf_evsel__intval(evsel, sample, "runtime"); 1873 double runtime_ms = (double)runtime / NSEC_PER_MSEC; 1874 struct thread *thread = machine__findnew_thread(trace->host, 1875 sample->pid, 1876 sample->tid); 1877 struct thread_trace *ttrace = thread__trace(thread, trace->output); 1878 1879 if (ttrace == NULL) 1880 goto out_dump; 1881 1882 ttrace->runtime_ms += runtime_ms; 1883 trace->runtime_ms += runtime_ms; 1884 out_put: 1885 thread__put(thread); 1886 return 0; 1887 1888 out_dump: 1889 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n", 1890 evsel->name, 1891 perf_evsel__strval(evsel, sample, "comm"), 1892 (pid_t)perf_evsel__intval(evsel, sample, "pid"), 1893 runtime, 1894 perf_evsel__intval(evsel, sample, "vruntime")); 1895 goto out_put; 1896 } 1897 1898 static int bpf_output__printer(enum binary_printer_ops op, 1899 unsigned int val, void *extra __maybe_unused, FILE *fp) 1900 { 1901 unsigned char ch = (unsigned char)val; 1902 1903 switch (op) { 1904 case BINARY_PRINT_CHAR_DATA: 1905 return fprintf(fp, "%c", isprint(ch) ? ch : '.'); 1906 case BINARY_PRINT_DATA_BEGIN: 1907 case BINARY_PRINT_LINE_BEGIN: 1908 case BINARY_PRINT_ADDR: 1909 case BINARY_PRINT_NUM_DATA: 1910 case BINARY_PRINT_NUM_PAD: 1911 case BINARY_PRINT_SEP: 1912 case BINARY_PRINT_CHAR_PAD: 1913 case BINARY_PRINT_LINE_END: 1914 case BINARY_PRINT_DATA_END: 1915 default: 1916 break; 1917 } 1918 1919 return 0; 1920 } 1921 1922 static void bpf_output__fprintf(struct trace *trace, 1923 struct perf_sample *sample) 1924 { 1925 binary__fprintf(sample->raw_data, sample->raw_size, 8, 1926 bpf_output__printer, NULL, trace->output); 1927 } 1928 1929 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, 1930 union perf_event *event __maybe_unused, 1931 struct perf_sample *sample) 1932 { 1933 int callchain_ret = 0; 1934 1935 if (sample->callchain) { 1936 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor); 1937 if (callchain_ret == 0) { 1938 if (callchain_cursor.nr < trace->min_stack) 1939 goto out; 1940 callchain_ret = 1; 1941 } 1942 } 1943 1944 trace__printf_interrupted_entry(trace, sample); 1945 trace__fprintf_tstamp(trace, sample->time, trace->output); 1946 1947 if (trace->trace_syscalls) 1948 fprintf(trace->output, "( ): "); 1949 1950 fprintf(trace->output, "%s:", evsel->name); 1951 1952 if (perf_evsel__is_bpf_output(evsel)) { 1953 bpf_output__fprintf(trace, sample); 1954 } else if (evsel->tp_format) { 1955 event_format__fprintf(evsel->tp_format, sample->cpu, 1956 sample->raw_data, sample->raw_size, 1957 trace->output); 1958 } 1959 1960 fprintf(trace->output, ")\n"); 1961 1962 if (callchain_ret > 0) 1963 trace__fprintf_callchain(trace, sample); 1964 else if (callchain_ret < 0) 1965 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); 1966 out: 1967 return 0; 1968 } 1969 1970 static void print_location(FILE *f, struct perf_sample *sample, 1971 struct addr_location *al, 1972 bool print_dso, bool print_sym) 1973 { 1974 1975 if ((verbose > 0 || print_dso) && al->map) 1976 fprintf(f, "%s@", al->map->dso->long_name); 1977 1978 if ((verbose > 0 || print_sym) && al->sym) 1979 fprintf(f, "%s+0x%" PRIx64, al->sym->name, 1980 al->addr - al->sym->start); 1981 else if (al->map) 1982 fprintf(f, "0x%" PRIx64, al->addr); 1983 else 1984 fprintf(f, "0x%" PRIx64, sample->addr); 1985 } 1986 1987 static int trace__pgfault(struct trace *trace, 1988 struct perf_evsel *evsel, 1989 union perf_event *event __maybe_unused, 1990 struct perf_sample *sample) 1991 { 1992 struct thread *thread; 1993 struct addr_location al; 1994 char map_type = 'd'; 1995 struct thread_trace *ttrace; 1996 int err = -1; 1997 int callchain_ret = 0; 1998 1999 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 2000 2001 if (sample->callchain) { 2002 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor); 2003 if (callchain_ret == 0) { 2004 if (callchain_cursor.nr < trace->min_stack) 2005 goto out_put; 2006 callchain_ret = 1; 2007 } 2008 } 2009 2010 ttrace = thread__trace(thread, trace->output); 2011 if (ttrace == NULL) 2012 goto out_put; 2013 2014 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ) 2015 ttrace->pfmaj++; 2016 else 2017 ttrace->pfmin++; 2018 2019 if (trace->summary_only) 2020 goto out; 2021 2022 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION, 2023 sample->ip, &al); 2024 2025 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output); 2026 2027 fprintf(trace->output, "%sfault [", 2028 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ? 2029 "maj" : "min"); 2030 2031 print_location(trace->output, sample, &al, false, true); 2032 2033 fprintf(trace->output, "] => "); 2034 2035 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE, 2036 sample->addr, &al); 2037 2038 if (!al.map) { 2039 thread__find_addr_location(thread, sample->cpumode, 2040 MAP__FUNCTION, sample->addr, &al); 2041 2042 if (al.map) 2043 map_type = 'x'; 2044 else 2045 map_type = '?'; 2046 } 2047 2048 print_location(trace->output, sample, &al, true, false); 2049 2050 fprintf(trace->output, " (%c%c)\n", map_type, al.level); 2051 2052 if (callchain_ret > 0) 2053 trace__fprintf_callchain(trace, sample); 2054 else if (callchain_ret < 0) 2055 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); 2056 out: 2057 err = 0; 2058 out_put: 2059 thread__put(thread); 2060 return err; 2061 } 2062 2063 static void trace__set_base_time(struct trace *trace, 2064 struct perf_evsel *evsel, 2065 struct perf_sample *sample) 2066 { 2067 /* 2068 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust 2069 * and don't use sample->time unconditionally, we may end up having 2070 * some other event in the future without PERF_SAMPLE_TIME for good 2071 * reason, i.e. we may not be interested in its timestamps, just in 2072 * it taking place, picking some piece of information when it 2073 * appears in our event stream (vfs_getname comes to mind). 2074 */ 2075 if (trace->base_time == 0 && !trace->full_time && 2076 (evsel->attr.sample_type & PERF_SAMPLE_TIME)) 2077 trace->base_time = sample->time; 2078 } 2079 2080 static int trace__process_sample(struct perf_tool *tool, 2081 union perf_event *event, 2082 struct perf_sample *sample, 2083 struct perf_evsel *evsel, 2084 struct machine *machine __maybe_unused) 2085 { 2086 struct trace *trace = container_of(tool, struct trace, tool); 2087 struct thread *thread; 2088 int err = 0; 2089 2090 tracepoint_handler handler = evsel->handler; 2091 2092 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 2093 if (thread && thread__is_filtered(thread)) 2094 goto out; 2095 2096 trace__set_base_time(trace, evsel, sample); 2097 2098 if (handler) { 2099 ++trace->nr_events; 2100 handler(trace, evsel, event, sample); 2101 } 2102 out: 2103 thread__put(thread); 2104 return err; 2105 } 2106 2107 static int trace__record(struct trace *trace, int argc, const char **argv) 2108 { 2109 unsigned int rec_argc, i, j; 2110 const char **rec_argv; 2111 const char * const record_args[] = { 2112 "record", 2113 "-R", 2114 "-m", "1024", 2115 "-c", "1", 2116 }; 2117 2118 const char * const sc_args[] = { "-e", }; 2119 unsigned int sc_args_nr = ARRAY_SIZE(sc_args); 2120 const char * const majpf_args[] = { "-e", "major-faults" }; 2121 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args); 2122 const char * const minpf_args[] = { "-e", "minor-faults" }; 2123 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args); 2124 2125 /* +1 is for the event string below */ 2126 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 + 2127 majpf_args_nr + minpf_args_nr + argc; 2128 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 2129 2130 if (rec_argv == NULL) 2131 return -ENOMEM; 2132 2133 j = 0; 2134 for (i = 0; i < ARRAY_SIZE(record_args); i++) 2135 rec_argv[j++] = record_args[i]; 2136 2137 if (trace->trace_syscalls) { 2138 for (i = 0; i < sc_args_nr; i++) 2139 rec_argv[j++] = sc_args[i]; 2140 2141 /* event string may be different for older kernels - e.g., RHEL6 */ 2142 if (is_valid_tracepoint("raw_syscalls:sys_enter")) 2143 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit"; 2144 else if (is_valid_tracepoint("syscalls:sys_enter")) 2145 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit"; 2146 else { 2147 pr_err("Neither raw_syscalls nor syscalls events exist.\n"); 2148 free(rec_argv); 2149 return -1; 2150 } 2151 } 2152 2153 if (trace->trace_pgfaults & TRACE_PFMAJ) 2154 for (i = 0; i < majpf_args_nr; i++) 2155 rec_argv[j++] = majpf_args[i]; 2156 2157 if (trace->trace_pgfaults & TRACE_PFMIN) 2158 for (i = 0; i < minpf_args_nr; i++) 2159 rec_argv[j++] = minpf_args[i]; 2160 2161 for (i = 0; i < (unsigned int)argc; i++) 2162 rec_argv[j++] = argv[i]; 2163 2164 return cmd_record(j, rec_argv); 2165 } 2166 2167 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); 2168 2169 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist) 2170 { 2171 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname"); 2172 2173 if (IS_ERR(evsel)) 2174 return false; 2175 2176 if (perf_evsel__field(evsel, "pathname") == NULL) { 2177 perf_evsel__delete(evsel); 2178 return false; 2179 } 2180 2181 evsel->handler = trace__vfs_getname; 2182 perf_evlist__add(evlist, evsel); 2183 return true; 2184 } 2185 2186 static struct perf_evsel *perf_evsel__new_pgfault(u64 config) 2187 { 2188 struct perf_evsel *evsel; 2189 struct perf_event_attr attr = { 2190 .type = PERF_TYPE_SOFTWARE, 2191 .mmap_data = 1, 2192 }; 2193 2194 attr.config = config; 2195 attr.sample_period = 1; 2196 2197 event_attr_init(&attr); 2198 2199 evsel = perf_evsel__new(&attr); 2200 if (evsel) 2201 evsel->handler = trace__pgfault; 2202 2203 return evsel; 2204 } 2205 2206 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample) 2207 { 2208 const u32 type = event->header.type; 2209 struct perf_evsel *evsel; 2210 2211 if (type != PERF_RECORD_SAMPLE) { 2212 trace__process_event(trace, trace->host, event, sample); 2213 return; 2214 } 2215 2216 evsel = perf_evlist__id2evsel(trace->evlist, sample->id); 2217 if (evsel == NULL) { 2218 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id); 2219 return; 2220 } 2221 2222 trace__set_base_time(trace, evsel, sample); 2223 2224 if (evsel->attr.type == PERF_TYPE_TRACEPOINT && 2225 sample->raw_data == NULL) { 2226 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", 2227 perf_evsel__name(evsel), sample->tid, 2228 sample->cpu, sample->raw_size); 2229 } else { 2230 tracepoint_handler handler = evsel->handler; 2231 handler(trace, evsel, event, sample); 2232 } 2233 } 2234 2235 static int trace__add_syscall_newtp(struct trace *trace) 2236 { 2237 int ret = -1; 2238 struct perf_evlist *evlist = trace->evlist; 2239 struct perf_evsel *sys_enter, *sys_exit; 2240 2241 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter); 2242 if (sys_enter == NULL) 2243 goto out; 2244 2245 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args)) 2246 goto out_delete_sys_enter; 2247 2248 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit); 2249 if (sys_exit == NULL) 2250 goto out_delete_sys_enter; 2251 2252 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret)) 2253 goto out_delete_sys_exit; 2254 2255 perf_evsel__config_callchain(sys_enter, &trace->opts, &callchain_param); 2256 perf_evsel__config_callchain(sys_exit, &trace->opts, &callchain_param); 2257 2258 perf_evlist__add(evlist, sys_enter); 2259 perf_evlist__add(evlist, sys_exit); 2260 2261 if (callchain_param.enabled && !trace->kernel_syscallchains) { 2262 /* 2263 * We're interested only in the user space callchain 2264 * leading to the syscall, allow overriding that for 2265 * debugging reasons using --kernel_syscall_callchains 2266 */ 2267 sys_exit->attr.exclude_callchain_kernel = 1; 2268 } 2269 2270 trace->syscalls.events.sys_enter = sys_enter; 2271 trace->syscalls.events.sys_exit = sys_exit; 2272 2273 ret = 0; 2274 out: 2275 return ret; 2276 2277 out_delete_sys_exit: 2278 perf_evsel__delete_priv(sys_exit); 2279 out_delete_sys_enter: 2280 perf_evsel__delete_priv(sys_enter); 2281 goto out; 2282 } 2283 2284 static int trace__set_ev_qualifier_filter(struct trace *trace) 2285 { 2286 int err = -1; 2287 struct perf_evsel *sys_exit; 2288 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier, 2289 trace->ev_qualifier_ids.nr, 2290 trace->ev_qualifier_ids.entries); 2291 2292 if (filter == NULL) 2293 goto out_enomem; 2294 2295 if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter, 2296 filter)) { 2297 sys_exit = trace->syscalls.events.sys_exit; 2298 err = perf_evsel__append_tp_filter(sys_exit, filter); 2299 } 2300 2301 free(filter); 2302 out: 2303 return err; 2304 out_enomem: 2305 errno = ENOMEM; 2306 goto out; 2307 } 2308 2309 static int trace__set_filter_loop_pids(struct trace *trace) 2310 { 2311 unsigned int nr = 1; 2312 pid_t pids[32] = { 2313 getpid(), 2314 }; 2315 struct thread *thread = machine__find_thread(trace->host, pids[0], pids[0]); 2316 2317 while (thread && nr < ARRAY_SIZE(pids)) { 2318 struct thread *parent = machine__find_thread(trace->host, thread->ppid, thread->ppid); 2319 2320 if (parent == NULL) 2321 break; 2322 2323 if (!strcmp(thread__comm_str(parent), "sshd")) { 2324 pids[nr++] = parent->tid; 2325 break; 2326 } 2327 thread = parent; 2328 } 2329 2330 return perf_evlist__set_filter_pids(trace->evlist, nr, pids); 2331 } 2332 2333 static int trace__run(struct trace *trace, int argc, const char **argv) 2334 { 2335 struct perf_evlist *evlist = trace->evlist; 2336 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL; 2337 int err = -1, i; 2338 unsigned long before; 2339 const bool forks = argc > 0; 2340 bool draining = false; 2341 2342 trace->live = true; 2343 2344 if (trace->trace_syscalls && trace__add_syscall_newtp(trace)) 2345 goto out_error_raw_syscalls; 2346 2347 if (trace->trace_syscalls) 2348 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist); 2349 2350 if ((trace->trace_pgfaults & TRACE_PFMAJ)) { 2351 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ); 2352 if (pgfault_maj == NULL) 2353 goto out_error_mem; 2354 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param); 2355 perf_evlist__add(evlist, pgfault_maj); 2356 } 2357 2358 if ((trace->trace_pgfaults & TRACE_PFMIN)) { 2359 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN); 2360 if (pgfault_min == NULL) 2361 goto out_error_mem; 2362 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param); 2363 perf_evlist__add(evlist, pgfault_min); 2364 } 2365 2366 if (trace->sched && 2367 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", 2368 trace__sched_stat_runtime)) 2369 goto out_error_sched_stat_runtime; 2370 2371 err = perf_evlist__create_maps(evlist, &trace->opts.target); 2372 if (err < 0) { 2373 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n"); 2374 goto out_delete_evlist; 2375 } 2376 2377 err = trace__symbols_init(trace, evlist); 2378 if (err < 0) { 2379 fprintf(trace->output, "Problems initializing symbol libraries!\n"); 2380 goto out_delete_evlist; 2381 } 2382 2383 perf_evlist__config(evlist, &trace->opts, &callchain_param); 2384 2385 signal(SIGCHLD, sig_handler); 2386 signal(SIGINT, sig_handler); 2387 2388 if (forks) { 2389 err = perf_evlist__prepare_workload(evlist, &trace->opts.target, 2390 argv, false, NULL); 2391 if (err < 0) { 2392 fprintf(trace->output, "Couldn't run the workload!\n"); 2393 goto out_delete_evlist; 2394 } 2395 } 2396 2397 err = perf_evlist__open(evlist); 2398 if (err < 0) 2399 goto out_error_open; 2400 2401 err = bpf__apply_obj_config(); 2402 if (err) { 2403 char errbuf[BUFSIZ]; 2404 2405 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); 2406 pr_err("ERROR: Apply config to BPF failed: %s\n", 2407 errbuf); 2408 goto out_error_open; 2409 } 2410 2411 /* 2412 * Better not use !target__has_task() here because we need to cover the 2413 * case where no threads were specified in the command line, but a 2414 * workload was, and in that case we will fill in the thread_map when 2415 * we fork the workload in perf_evlist__prepare_workload. 2416 */ 2417 if (trace->filter_pids.nr > 0) 2418 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries); 2419 else if (thread_map__pid(evlist->threads, 0) == -1) 2420 err = trace__set_filter_loop_pids(trace); 2421 2422 if (err < 0) 2423 goto out_error_mem; 2424 2425 if (trace->ev_qualifier_ids.nr > 0) { 2426 err = trace__set_ev_qualifier_filter(trace); 2427 if (err < 0) 2428 goto out_errno; 2429 2430 pr_debug("event qualifier tracepoint filter: %s\n", 2431 trace->syscalls.events.sys_exit->filter); 2432 } 2433 2434 err = perf_evlist__apply_filters(evlist, &evsel); 2435 if (err < 0) 2436 goto out_error_apply_filters; 2437 2438 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages); 2439 if (err < 0) 2440 goto out_error_mmap; 2441 2442 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay) 2443 perf_evlist__enable(evlist); 2444 2445 if (forks) 2446 perf_evlist__start_workload(evlist); 2447 2448 if (trace->opts.initial_delay) { 2449 usleep(trace->opts.initial_delay * 1000); 2450 perf_evlist__enable(evlist); 2451 } 2452 2453 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 || 2454 evlist->threads->nr > 1 || 2455 perf_evlist__first(evlist)->attr.inherit; 2456 2457 /* 2458 * Now that we already used evsel->attr to ask the kernel to setup the 2459 * events, lets reuse evsel->attr.sample_max_stack as the limit in 2460 * trace__resolve_callchain(), allowing per-event max-stack settings 2461 * to override an explicitely set --max-stack global setting. 2462 */ 2463 evlist__for_each_entry(evlist, evsel) { 2464 if ((evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) && 2465 evsel->attr.sample_max_stack == 0) 2466 evsel->attr.sample_max_stack = trace->max_stack; 2467 } 2468 again: 2469 before = trace->nr_events; 2470 2471 for (i = 0; i < evlist->nr_mmaps; i++) { 2472 union perf_event *event; 2473 2474 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { 2475 struct perf_sample sample; 2476 2477 ++trace->nr_events; 2478 2479 err = perf_evlist__parse_sample(evlist, event, &sample); 2480 if (err) { 2481 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err); 2482 goto next_event; 2483 } 2484 2485 trace__handle_event(trace, event, &sample); 2486 next_event: 2487 perf_evlist__mmap_consume(evlist, i); 2488 2489 if (interrupted) 2490 goto out_disable; 2491 2492 if (done && !draining) { 2493 perf_evlist__disable(evlist); 2494 draining = true; 2495 } 2496 } 2497 } 2498 2499 if (trace->nr_events == before) { 2500 int timeout = done ? 100 : -1; 2501 2502 if (!draining && perf_evlist__poll(evlist, timeout) > 0) { 2503 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0) 2504 draining = true; 2505 2506 goto again; 2507 } 2508 } else { 2509 goto again; 2510 } 2511 2512 out_disable: 2513 thread__zput(trace->current); 2514 2515 perf_evlist__disable(evlist); 2516 2517 if (!err) { 2518 if (trace->summary) 2519 trace__fprintf_thread_summary(trace, trace->output); 2520 2521 if (trace->show_tool_stats) { 2522 fprintf(trace->output, "Stats:\n " 2523 " vfs_getname : %" PRIu64 "\n" 2524 " proc_getname: %" PRIu64 "\n", 2525 trace->stats.vfs_getname, 2526 trace->stats.proc_getname); 2527 } 2528 } 2529 2530 out_delete_evlist: 2531 trace__symbols__exit(trace); 2532 2533 perf_evlist__delete(evlist); 2534 trace->evlist = NULL; 2535 trace->live = false; 2536 return err; 2537 { 2538 char errbuf[BUFSIZ]; 2539 2540 out_error_sched_stat_runtime: 2541 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime"); 2542 goto out_error; 2543 2544 out_error_raw_syscalls: 2545 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)"); 2546 goto out_error; 2547 2548 out_error_mmap: 2549 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf)); 2550 goto out_error; 2551 2552 out_error_open: 2553 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf)); 2554 2555 out_error: 2556 fprintf(trace->output, "%s\n", errbuf); 2557 goto out_delete_evlist; 2558 2559 out_error_apply_filters: 2560 fprintf(trace->output, 2561 "Failed to set filter \"%s\" on event %s with %d (%s)\n", 2562 evsel->filter, perf_evsel__name(evsel), errno, 2563 str_error_r(errno, errbuf, sizeof(errbuf))); 2564 goto out_delete_evlist; 2565 } 2566 out_error_mem: 2567 fprintf(trace->output, "Not enough memory to run!\n"); 2568 goto out_delete_evlist; 2569 2570 out_errno: 2571 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno)); 2572 goto out_delete_evlist; 2573 } 2574 2575 static int trace__replay(struct trace *trace) 2576 { 2577 const struct perf_evsel_str_handler handlers[] = { 2578 { "probe:vfs_getname", trace__vfs_getname, }, 2579 }; 2580 struct perf_data data = { 2581 .file = { 2582 .path = input_name, 2583 }, 2584 .mode = PERF_DATA_MODE_READ, 2585 .force = trace->force, 2586 }; 2587 struct perf_session *session; 2588 struct perf_evsel *evsel; 2589 int err = -1; 2590 2591 trace->tool.sample = trace__process_sample; 2592 trace->tool.mmap = perf_event__process_mmap; 2593 trace->tool.mmap2 = perf_event__process_mmap2; 2594 trace->tool.comm = perf_event__process_comm; 2595 trace->tool.exit = perf_event__process_exit; 2596 trace->tool.fork = perf_event__process_fork; 2597 trace->tool.attr = perf_event__process_attr; 2598 trace->tool.tracing_data = perf_event__process_tracing_data; 2599 trace->tool.build_id = perf_event__process_build_id; 2600 trace->tool.namespaces = perf_event__process_namespaces; 2601 2602 trace->tool.ordered_events = true; 2603 trace->tool.ordering_requires_timestamps = true; 2604 2605 /* add tid to output */ 2606 trace->multiple_threads = true; 2607 2608 session = perf_session__new(&data, false, &trace->tool); 2609 if (session == NULL) 2610 return -1; 2611 2612 if (trace->opts.target.pid) 2613 symbol_conf.pid_list_str = strdup(trace->opts.target.pid); 2614 2615 if (trace->opts.target.tid) 2616 symbol_conf.tid_list_str = strdup(trace->opts.target.tid); 2617 2618 if (symbol__init(&session->header.env) < 0) 2619 goto out; 2620 2621 trace->host = &session->machines.host; 2622 2623 err = perf_session__set_tracepoints_handlers(session, handlers); 2624 if (err) 2625 goto out; 2626 2627 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2628 "raw_syscalls:sys_enter"); 2629 /* older kernels have syscalls tp versus raw_syscalls */ 2630 if (evsel == NULL) 2631 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2632 "syscalls:sys_enter"); 2633 2634 if (evsel && 2635 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 || 2636 perf_evsel__init_sc_tp_ptr_field(evsel, args))) { 2637 pr_err("Error during initialize raw_syscalls:sys_enter event\n"); 2638 goto out; 2639 } 2640 2641 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2642 "raw_syscalls:sys_exit"); 2643 if (evsel == NULL) 2644 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2645 "syscalls:sys_exit"); 2646 if (evsel && 2647 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 || 2648 perf_evsel__init_sc_tp_uint_field(evsel, ret))) { 2649 pr_err("Error during initialize raw_syscalls:sys_exit event\n"); 2650 goto out; 2651 } 2652 2653 evlist__for_each_entry(session->evlist, evsel) { 2654 if (evsel->attr.type == PERF_TYPE_SOFTWARE && 2655 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ || 2656 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN || 2657 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS)) 2658 evsel->handler = trace__pgfault; 2659 } 2660 2661 setup_pager(); 2662 2663 err = perf_session__process_events(session); 2664 if (err) 2665 pr_err("Failed to process events, error %d", err); 2666 2667 else if (trace->summary) 2668 trace__fprintf_thread_summary(trace, trace->output); 2669 2670 out: 2671 perf_session__delete(session); 2672 2673 return err; 2674 } 2675 2676 static size_t trace__fprintf_threads_header(FILE *fp) 2677 { 2678 size_t printed; 2679 2680 printed = fprintf(fp, "\n Summary of events:\n\n"); 2681 2682 return printed; 2683 } 2684 2685 DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs, 2686 struct stats *stats; 2687 double msecs; 2688 int syscall; 2689 ) 2690 { 2691 struct int_node *source = rb_entry(nd, struct int_node, rb_node); 2692 struct stats *stats = source->priv; 2693 2694 entry->syscall = source->i; 2695 entry->stats = stats; 2696 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0; 2697 } 2698 2699 static size_t thread__dump_stats(struct thread_trace *ttrace, 2700 struct trace *trace, FILE *fp) 2701 { 2702 size_t printed = 0; 2703 struct syscall *sc; 2704 struct rb_node *nd; 2705 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats); 2706 2707 if (syscall_stats == NULL) 2708 return 0; 2709 2710 printed += fprintf(fp, "\n"); 2711 2712 printed += fprintf(fp, " syscall calls total min avg max stddev\n"); 2713 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); 2714 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n"); 2715 2716 resort_rb__for_each_entry(nd, syscall_stats) { 2717 struct stats *stats = syscall_stats_entry->stats; 2718 if (stats) { 2719 double min = (double)(stats->min) / NSEC_PER_MSEC; 2720 double max = (double)(stats->max) / NSEC_PER_MSEC; 2721 double avg = avg_stats(stats); 2722 double pct; 2723 u64 n = (u64) stats->n; 2724 2725 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0; 2726 avg /= NSEC_PER_MSEC; 2727 2728 sc = &trace->syscalls.table[syscall_stats_entry->syscall]; 2729 printed += fprintf(fp, " %-15s", sc->name); 2730 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f", 2731 n, syscall_stats_entry->msecs, min, avg); 2732 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct); 2733 } 2734 } 2735 2736 resort_rb__delete(syscall_stats); 2737 printed += fprintf(fp, "\n\n"); 2738 2739 return printed; 2740 } 2741 2742 static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace) 2743 { 2744 size_t printed = 0; 2745 struct thread_trace *ttrace = thread__priv(thread); 2746 double ratio; 2747 2748 if (ttrace == NULL) 2749 return 0; 2750 2751 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0; 2752 2753 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid); 2754 printed += fprintf(fp, "%lu events, ", ttrace->nr_events); 2755 printed += fprintf(fp, "%.1f%%", ratio); 2756 if (ttrace->pfmaj) 2757 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj); 2758 if (ttrace->pfmin) 2759 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin); 2760 if (trace->sched) 2761 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms); 2762 else if (fputc('\n', fp) != EOF) 2763 ++printed; 2764 2765 printed += thread__dump_stats(ttrace, trace, fp); 2766 2767 return printed; 2768 } 2769 2770 static unsigned long thread__nr_events(struct thread_trace *ttrace) 2771 { 2772 return ttrace ? ttrace->nr_events : 0; 2773 } 2774 2775 DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)), 2776 struct thread *thread; 2777 ) 2778 { 2779 entry->thread = rb_entry(nd, struct thread, rb_node); 2780 } 2781 2782 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp) 2783 { 2784 size_t printed = trace__fprintf_threads_header(fp); 2785 struct rb_node *nd; 2786 int i; 2787 2788 for (i = 0; i < THREADS__TABLE_SIZE; i++) { 2789 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i); 2790 2791 if (threads == NULL) { 2792 fprintf(fp, "%s", "Error sorting output by nr_events!\n"); 2793 return 0; 2794 } 2795 2796 resort_rb__for_each_entry(nd, threads) 2797 printed += trace__fprintf_thread(fp, threads_entry->thread, trace); 2798 2799 resort_rb__delete(threads); 2800 } 2801 return printed; 2802 } 2803 2804 static int trace__set_duration(const struct option *opt, const char *str, 2805 int unset __maybe_unused) 2806 { 2807 struct trace *trace = opt->value; 2808 2809 trace->duration_filter = atof(str); 2810 return 0; 2811 } 2812 2813 static int trace__set_filter_pids(const struct option *opt, const char *str, 2814 int unset __maybe_unused) 2815 { 2816 int ret = -1; 2817 size_t i; 2818 struct trace *trace = opt->value; 2819 /* 2820 * FIXME: introduce a intarray class, plain parse csv and create a 2821 * { int nr, int entries[] } struct... 2822 */ 2823 struct intlist *list = intlist__new(str); 2824 2825 if (list == NULL) 2826 return -1; 2827 2828 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1; 2829 trace->filter_pids.entries = calloc(i, sizeof(pid_t)); 2830 2831 if (trace->filter_pids.entries == NULL) 2832 goto out; 2833 2834 trace->filter_pids.entries[0] = getpid(); 2835 2836 for (i = 1; i < trace->filter_pids.nr; ++i) 2837 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i; 2838 2839 intlist__delete(list); 2840 ret = 0; 2841 out: 2842 return ret; 2843 } 2844 2845 static int trace__open_output(struct trace *trace, const char *filename) 2846 { 2847 struct stat st; 2848 2849 if (!stat(filename, &st) && st.st_size) { 2850 char oldname[PATH_MAX]; 2851 2852 scnprintf(oldname, sizeof(oldname), "%s.old", filename); 2853 unlink(oldname); 2854 rename(filename, oldname); 2855 } 2856 2857 trace->output = fopen(filename, "w"); 2858 2859 return trace->output == NULL ? -errno : 0; 2860 } 2861 2862 static int parse_pagefaults(const struct option *opt, const char *str, 2863 int unset __maybe_unused) 2864 { 2865 int *trace_pgfaults = opt->value; 2866 2867 if (strcmp(str, "all") == 0) 2868 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN; 2869 else if (strcmp(str, "maj") == 0) 2870 *trace_pgfaults |= TRACE_PFMAJ; 2871 else if (strcmp(str, "min") == 0) 2872 *trace_pgfaults |= TRACE_PFMIN; 2873 else 2874 return -1; 2875 2876 return 0; 2877 } 2878 2879 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler) 2880 { 2881 struct perf_evsel *evsel; 2882 2883 evlist__for_each_entry(evlist, evsel) 2884 evsel->handler = handler; 2885 } 2886 2887 /* 2888 * XXX: Hackish, just splitting the combined -e+--event (syscalls 2889 * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use 2890 * existing facilities unchanged (trace->ev_qualifier + parse_options()). 2891 * 2892 * It'd be better to introduce a parse_options() variant that would return a 2893 * list with the terms it didn't match to an event... 2894 */ 2895 static int trace__parse_events_option(const struct option *opt, const char *str, 2896 int unset __maybe_unused) 2897 { 2898 struct trace *trace = (struct trace *)opt->value; 2899 const char *s = str; 2900 char *sep = NULL, *lists[2] = { NULL, NULL, }; 2901 int len = strlen(str) + 1, err = -1, list, idx; 2902 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR); 2903 char group_name[PATH_MAX]; 2904 2905 if (strace_groups_dir == NULL) 2906 return -1; 2907 2908 if (*s == '!') { 2909 ++s; 2910 trace->not_ev_qualifier = true; 2911 } 2912 2913 while (1) { 2914 if ((sep = strchr(s, ',')) != NULL) 2915 *sep = '\0'; 2916 2917 list = 0; 2918 if (syscalltbl__id(trace->sctbl, s) >= 0 || 2919 syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) { 2920 list = 1; 2921 } else { 2922 path__join(group_name, sizeof(group_name), strace_groups_dir, s); 2923 if (access(group_name, R_OK) == 0) 2924 list = 1; 2925 } 2926 2927 if (lists[list]) { 2928 sprintf(lists[list] + strlen(lists[list]), ",%s", s); 2929 } else { 2930 lists[list] = malloc(len); 2931 if (lists[list] == NULL) 2932 goto out; 2933 strcpy(lists[list], s); 2934 } 2935 2936 if (!sep) 2937 break; 2938 2939 *sep = ','; 2940 s = sep + 1; 2941 } 2942 2943 if (lists[1] != NULL) { 2944 struct strlist_config slist_config = { 2945 .dirname = strace_groups_dir, 2946 }; 2947 2948 trace->ev_qualifier = strlist__new(lists[1], &slist_config); 2949 if (trace->ev_qualifier == NULL) { 2950 fputs("Not enough memory to parse event qualifier", trace->output); 2951 goto out; 2952 } 2953 2954 if (trace__validate_ev_qualifier(trace)) 2955 goto out; 2956 } 2957 2958 err = 0; 2959 2960 if (lists[0]) { 2961 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event", 2962 "event selector. use 'perf list' to list available events", 2963 parse_events_option); 2964 err = parse_events_option(&o, lists[0], 0); 2965 } 2966 out: 2967 if (sep) 2968 *sep = ','; 2969 2970 return err; 2971 } 2972 2973 int cmd_trace(int argc, const char **argv) 2974 { 2975 const char *trace_usage[] = { 2976 "perf trace [<options>] [<command>]", 2977 "perf trace [<options>] -- <command> [<options>]", 2978 "perf trace record [<options>] [<command>]", 2979 "perf trace record [<options>] -- <command> [<options>]", 2980 NULL 2981 }; 2982 struct trace trace = { 2983 .syscalls = { 2984 . max = -1, 2985 }, 2986 .opts = { 2987 .target = { 2988 .uid = UINT_MAX, 2989 .uses_mmap = true, 2990 }, 2991 .user_freq = UINT_MAX, 2992 .user_interval = ULLONG_MAX, 2993 .no_buffering = true, 2994 .mmap_pages = UINT_MAX, 2995 .proc_map_timeout = 500, 2996 }, 2997 .output = stderr, 2998 .show_comm = true, 2999 .trace_syscalls = true, 3000 .kernel_syscallchains = false, 3001 .max_stack = UINT_MAX, 3002 }; 3003 const char *output_name = NULL; 3004 const struct option trace_options[] = { 3005 OPT_CALLBACK('e', "event", &trace, "event", 3006 "event/syscall selector. use 'perf list' to list available events", 3007 trace__parse_events_option), 3008 OPT_BOOLEAN(0, "comm", &trace.show_comm, 3009 "show the thread COMM next to its id"), 3010 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"), 3011 OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace", 3012 trace__parse_events_option), 3013 OPT_STRING('o', "output", &output_name, "file", "output file name"), 3014 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"), 3015 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid", 3016 "trace events on existing process id"), 3017 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid", 3018 "trace events on existing thread id"), 3019 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids", 3020 "pids to filter (by the kernel)", trace__set_filter_pids), 3021 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide, 3022 "system-wide collection from all CPUs"), 3023 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu", 3024 "list of cpus to monitor"), 3025 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit, 3026 "child tasks do not inherit counters"), 3027 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages", 3028 "number of mmap data pages", 3029 perf_evlist__parse_mmap_pages), 3030 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user", 3031 "user to profile"), 3032 OPT_CALLBACK(0, "duration", &trace, "float", 3033 "show only events with duration > N.M ms", 3034 trace__set_duration), 3035 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"), 3036 OPT_INCR('v', "verbose", &verbose, "be more verbose"), 3037 OPT_BOOLEAN('T', "time", &trace.full_time, 3038 "Show full timestamp, not time relative to first start"), 3039 OPT_BOOLEAN('s', "summary", &trace.summary_only, 3040 "Show only syscall summary with statistics"), 3041 OPT_BOOLEAN('S', "with-summary", &trace.summary, 3042 "Show all syscalls and summary with statistics"), 3043 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min", 3044 "Trace pagefaults", parse_pagefaults, "maj"), 3045 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"), 3046 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"), 3047 OPT_CALLBACK(0, "call-graph", &trace.opts, 3048 "record_mode[,record_size]", record_callchain_help, 3049 &record_parse_callchain_opt), 3050 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains, 3051 "Show the kernel callchains on the syscall exit path"), 3052 OPT_UINTEGER(0, "min-stack", &trace.min_stack, 3053 "Set the minimum stack depth when parsing the callchain, " 3054 "anything below the specified depth will be ignored."), 3055 OPT_UINTEGER(0, "max-stack", &trace.max_stack, 3056 "Set the maximum stack depth when parsing the callchain, " 3057 "anything beyond the specified depth will be ignored. " 3058 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)), 3059 OPT_BOOLEAN(0, "print-sample", &trace.print_sample, 3060 "print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"), 3061 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout, 3062 "per thread proc mmap processing timeout in ms"), 3063 OPT_UINTEGER('D', "delay", &trace.opts.initial_delay, 3064 "ms to wait before starting measurement after program " 3065 "start"), 3066 OPT_END() 3067 }; 3068 bool __maybe_unused max_stack_user_set = true; 3069 bool mmap_pages_user_set = true; 3070 const char * const trace_subcommands[] = { "record", NULL }; 3071 int err; 3072 char bf[BUFSIZ]; 3073 3074 signal(SIGSEGV, sighandler_dump_stack); 3075 signal(SIGFPE, sighandler_dump_stack); 3076 3077 trace.evlist = perf_evlist__new(); 3078 trace.sctbl = syscalltbl__new(); 3079 3080 if (trace.evlist == NULL || trace.sctbl == NULL) { 3081 pr_err("Not enough memory to run!\n"); 3082 err = -ENOMEM; 3083 goto out; 3084 } 3085 3086 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands, 3087 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION); 3088 3089 err = bpf__setup_stdout(trace.evlist); 3090 if (err) { 3091 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf)); 3092 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf); 3093 goto out; 3094 } 3095 3096 err = -1; 3097 3098 if (trace.trace_pgfaults) { 3099 trace.opts.sample_address = true; 3100 trace.opts.sample_time = true; 3101 } 3102 3103 if (trace.opts.mmap_pages == UINT_MAX) 3104 mmap_pages_user_set = false; 3105 3106 if (trace.max_stack == UINT_MAX) { 3107 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack; 3108 max_stack_user_set = false; 3109 } 3110 3111 #ifdef HAVE_DWARF_UNWIND_SUPPORT 3112 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled) { 3113 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false); 3114 } 3115 #endif 3116 3117 if (callchain_param.enabled) { 3118 if (!mmap_pages_user_set && geteuid() == 0) 3119 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4; 3120 3121 symbol_conf.use_callchain = true; 3122 } 3123 3124 if (trace.evlist->nr_entries > 0) 3125 evlist__set_evsel_handler(trace.evlist, trace__event_handler); 3126 3127 if ((argc >= 1) && (strcmp(argv[0], "record") == 0)) 3128 return trace__record(&trace, argc-1, &argv[1]); 3129 3130 /* summary_only implies summary option, but don't overwrite summary if set */ 3131 if (trace.summary_only) 3132 trace.summary = trace.summary_only; 3133 3134 if (!trace.trace_syscalls && !trace.trace_pgfaults && 3135 trace.evlist->nr_entries == 0 /* Was --events used? */) { 3136 pr_err("Please specify something to trace.\n"); 3137 return -1; 3138 } 3139 3140 if (!trace.trace_syscalls && trace.ev_qualifier) { 3141 pr_err("The -e option can't be used with --no-syscalls.\n"); 3142 goto out; 3143 } 3144 3145 if (output_name != NULL) { 3146 err = trace__open_output(&trace, output_name); 3147 if (err < 0) { 3148 perror("failed to create output file"); 3149 goto out; 3150 } 3151 } 3152 3153 trace.open_id = syscalltbl__id(trace.sctbl, "open"); 3154 3155 err = target__validate(&trace.opts.target); 3156 if (err) { 3157 target__strerror(&trace.opts.target, err, bf, sizeof(bf)); 3158 fprintf(trace.output, "%s", bf); 3159 goto out_close; 3160 } 3161 3162 err = target__parse_uid(&trace.opts.target); 3163 if (err) { 3164 target__strerror(&trace.opts.target, err, bf, sizeof(bf)); 3165 fprintf(trace.output, "%s", bf); 3166 goto out_close; 3167 } 3168 3169 if (!argc && target__none(&trace.opts.target)) 3170 trace.opts.target.system_wide = true; 3171 3172 if (input_name) 3173 err = trace__replay(&trace); 3174 else 3175 err = trace__run(&trace, argc, argv); 3176 3177 out_close: 3178 if (output_name != NULL) 3179 fclose(trace.output); 3180 out: 3181 return err; 3182 } 3183