1 /* 2 * builtin-trace.c 3 * 4 * Builtin 'trace' command: 5 * 6 * Display a continuously updated trace of any workload, CPU, specific PID, 7 * system wide, etc. Default format is loosely strace like, but any other 8 * event may be specified using --event. 9 * 10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 11 * 12 * Initially based on the 'trace' prototype by Thomas Gleixner: 13 * 14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'") 15 * 16 * Released under the GPL v2. (and only v2, not any later version) 17 */ 18 19 #include <traceevent/event-parse.h> 20 #include <api/fs/tracing_path.h> 21 #include "builtin.h" 22 #include "util/color.h" 23 #include "util/debug.h" 24 #include "util/env.h" 25 #include "util/event.h" 26 #include "util/evlist.h" 27 #include <subcmd/exec-cmd.h> 28 #include "util/machine.h" 29 #include "util/path.h" 30 #include "util/session.h" 31 #include "util/thread.h" 32 #include <subcmd/parse-options.h> 33 #include "util/strlist.h" 34 #include "util/intlist.h" 35 #include "util/thread_map.h" 36 #include "util/stat.h" 37 #include "trace/beauty/beauty.h" 38 #include "trace-event.h" 39 #include "util/parse-events.h" 40 #include "util/bpf-loader.h" 41 #include "callchain.h" 42 #include "print_binary.h" 43 #include "string2.h" 44 #include "syscalltbl.h" 45 #include "rb_resort.h" 46 47 #include <errno.h> 48 #include <inttypes.h> 49 #include <poll.h> 50 #include <signal.h> 51 #include <stdlib.h> 52 #include <string.h> 53 #include <linux/err.h> 54 #include <linux/filter.h> 55 #include <linux/kernel.h> 56 #include <linux/random.h> 57 #include <linux/stringify.h> 58 #include <linux/time64.h> 59 #include <fcntl.h> 60 61 #include "sane_ctype.h" 62 63 #ifndef O_CLOEXEC 64 # define O_CLOEXEC 02000000 65 #endif 66 67 #ifndef F_LINUX_SPECIFIC_BASE 68 # define F_LINUX_SPECIFIC_BASE 1024 69 #endif 70 71 struct trace { 72 struct perf_tool tool; 73 struct syscalltbl *sctbl; 74 struct { 75 int max; 76 struct syscall *table; 77 struct { 78 struct perf_evsel *sys_enter, 79 *sys_exit; 80 } events; 81 } syscalls; 82 struct record_opts opts; 83 struct perf_evlist *evlist; 84 struct machine *host; 85 struct thread *current; 86 u64 base_time; 87 FILE *output; 88 unsigned long nr_events; 89 struct strlist *ev_qualifier; 90 struct { 91 size_t nr; 92 int *entries; 93 } ev_qualifier_ids; 94 struct { 95 size_t nr; 96 pid_t *entries; 97 } filter_pids; 98 double duration_filter; 99 double runtime_ms; 100 struct { 101 u64 vfs_getname, 102 proc_getname; 103 } stats; 104 unsigned int max_stack; 105 unsigned int min_stack; 106 bool not_ev_qualifier; 107 bool live; 108 bool full_time; 109 bool sched; 110 bool multiple_threads; 111 bool summary; 112 bool summary_only; 113 bool show_comm; 114 bool print_sample; 115 bool show_tool_stats; 116 bool trace_syscalls; 117 bool kernel_syscallchains; 118 bool force; 119 bool vfs_getname; 120 int trace_pgfaults; 121 int open_id; 122 }; 123 124 struct tp_field { 125 int offset; 126 union { 127 u64 (*integer)(struct tp_field *field, struct perf_sample *sample); 128 void *(*pointer)(struct tp_field *field, struct perf_sample *sample); 129 }; 130 }; 131 132 #define TP_UINT_FIELD(bits) \ 133 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \ 134 { \ 135 u##bits value; \ 136 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \ 137 return value; \ 138 } 139 140 TP_UINT_FIELD(8); 141 TP_UINT_FIELD(16); 142 TP_UINT_FIELD(32); 143 TP_UINT_FIELD(64); 144 145 #define TP_UINT_FIELD__SWAPPED(bits) \ 146 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \ 147 { \ 148 u##bits value; \ 149 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \ 150 return bswap_##bits(value);\ 151 } 152 153 TP_UINT_FIELD__SWAPPED(16); 154 TP_UINT_FIELD__SWAPPED(32); 155 TP_UINT_FIELD__SWAPPED(64); 156 157 static int tp_field__init_uint(struct tp_field *field, 158 struct format_field *format_field, 159 bool needs_swap) 160 { 161 field->offset = format_field->offset; 162 163 switch (format_field->size) { 164 case 1: 165 field->integer = tp_field__u8; 166 break; 167 case 2: 168 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16; 169 break; 170 case 4: 171 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32; 172 break; 173 case 8: 174 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64; 175 break; 176 default: 177 return -1; 178 } 179 180 return 0; 181 } 182 183 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample) 184 { 185 return sample->raw_data + field->offset; 186 } 187 188 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field) 189 { 190 field->offset = format_field->offset; 191 field->pointer = tp_field__ptr; 192 return 0; 193 } 194 195 struct syscall_tp { 196 struct tp_field id; 197 union { 198 struct tp_field args, ret; 199 }; 200 }; 201 202 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel, 203 struct tp_field *field, 204 const char *name) 205 { 206 struct format_field *format_field = perf_evsel__field(evsel, name); 207 208 if (format_field == NULL) 209 return -1; 210 211 return tp_field__init_uint(field, format_field, evsel->needs_swap); 212 } 213 214 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \ 215 ({ struct syscall_tp *sc = evsel->priv;\ 216 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); }) 217 218 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel, 219 struct tp_field *field, 220 const char *name) 221 { 222 struct format_field *format_field = perf_evsel__field(evsel, name); 223 224 if (format_field == NULL) 225 return -1; 226 227 return tp_field__init_ptr(field, format_field); 228 } 229 230 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \ 231 ({ struct syscall_tp *sc = evsel->priv;\ 232 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); }) 233 234 static void perf_evsel__delete_priv(struct perf_evsel *evsel) 235 { 236 zfree(&evsel->priv); 237 perf_evsel__delete(evsel); 238 } 239 240 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler) 241 { 242 evsel->priv = malloc(sizeof(struct syscall_tp)); 243 if (evsel->priv != NULL) { 244 if (perf_evsel__init_sc_tp_uint_field(evsel, id)) 245 goto out_delete; 246 247 evsel->handler = handler; 248 return 0; 249 } 250 251 return -ENOMEM; 252 253 out_delete: 254 zfree(&evsel->priv); 255 return -ENOENT; 256 } 257 258 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler) 259 { 260 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction); 261 262 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */ 263 if (IS_ERR(evsel)) 264 evsel = perf_evsel__newtp("syscalls", direction); 265 266 if (IS_ERR(evsel)) 267 return NULL; 268 269 if (perf_evsel__init_syscall_tp(evsel, handler)) 270 goto out_delete; 271 272 return evsel; 273 274 out_delete: 275 perf_evsel__delete_priv(evsel); 276 return NULL; 277 } 278 279 #define perf_evsel__sc_tp_uint(evsel, name, sample) \ 280 ({ struct syscall_tp *fields = evsel->priv; \ 281 fields->name.integer(&fields->name, sample); }) 282 283 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \ 284 ({ struct syscall_tp *fields = evsel->priv; \ 285 fields->name.pointer(&fields->name, sample); }) 286 287 size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val) 288 { 289 int idx = val - sa->offset; 290 291 if (idx < 0 || idx >= sa->nr_entries) 292 return scnprintf(bf, size, intfmt, val); 293 294 return scnprintf(bf, size, "%s", sa->entries[idx]); 295 } 296 297 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size, 298 const char *intfmt, 299 struct syscall_arg *arg) 300 { 301 return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->val); 302 } 303 304 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size, 305 struct syscall_arg *arg) 306 { 307 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg); 308 } 309 310 #define SCA_STRARRAY syscall_arg__scnprintf_strarray 311 312 struct strarrays { 313 int nr_entries; 314 struct strarray **entries; 315 }; 316 317 #define DEFINE_STRARRAYS(array) struct strarrays strarrays__##array = { \ 318 .nr_entries = ARRAY_SIZE(array), \ 319 .entries = array, \ 320 } 321 322 size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size, 323 struct syscall_arg *arg) 324 { 325 struct strarrays *sas = arg->parm; 326 int i; 327 328 for (i = 0; i < sas->nr_entries; ++i) { 329 struct strarray *sa = sas->entries[i]; 330 int idx = arg->val - sa->offset; 331 332 if (idx >= 0 && idx < sa->nr_entries) { 333 if (sa->entries[idx] == NULL) 334 break; 335 return scnprintf(bf, size, "%s", sa->entries[idx]); 336 } 337 } 338 339 return scnprintf(bf, size, "%d", arg->val); 340 } 341 342 #ifndef AT_FDCWD 343 #define AT_FDCWD -100 344 #endif 345 346 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size, 347 struct syscall_arg *arg) 348 { 349 int fd = arg->val; 350 351 if (fd == AT_FDCWD) 352 return scnprintf(bf, size, "CWD"); 353 354 return syscall_arg__scnprintf_fd(bf, size, arg); 355 } 356 357 #define SCA_FDAT syscall_arg__scnprintf_fd_at 358 359 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, 360 struct syscall_arg *arg); 361 362 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd 363 364 size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg) 365 { 366 return scnprintf(bf, size, "%#lx", arg->val); 367 } 368 369 size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg) 370 { 371 return scnprintf(bf, size, "%d", arg->val); 372 } 373 374 size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg) 375 { 376 return scnprintf(bf, size, "%ld", arg->val); 377 } 378 379 static const char *bpf_cmd[] = { 380 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM", 381 "MAP_GET_NEXT_KEY", "PROG_LOAD", 382 }; 383 static DEFINE_STRARRAY(bpf_cmd); 384 385 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", }; 386 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1); 387 388 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", }; 389 static DEFINE_STRARRAY(itimers); 390 391 static const char *keyctl_options[] = { 392 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN", 393 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ", 394 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT", 395 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT", 396 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT", 397 }; 398 static DEFINE_STRARRAY(keyctl_options); 399 400 static const char *whences[] = { "SET", "CUR", "END", 401 #ifdef SEEK_DATA 402 "DATA", 403 #endif 404 #ifdef SEEK_HOLE 405 "HOLE", 406 #endif 407 }; 408 static DEFINE_STRARRAY(whences); 409 410 static const char *fcntl_cmds[] = { 411 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK", 412 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "GETLK64", 413 "SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX", 414 "GETOWNER_UIDS", 415 }; 416 static DEFINE_STRARRAY(fcntl_cmds); 417 418 static const char *fcntl_linux_specific_cmds[] = { 419 "SETLEASE", "GETLEASE", "NOTIFY", [5] = "CANCELLK", "DUPFD_CLOEXEC", 420 "SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS", 421 "GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT", 422 }; 423 424 static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, F_LINUX_SPECIFIC_BASE); 425 426 static struct strarray *fcntl_cmds_arrays[] = { 427 &strarray__fcntl_cmds, 428 &strarray__fcntl_linux_specific_cmds, 429 }; 430 431 static DEFINE_STRARRAYS(fcntl_cmds_arrays); 432 433 static const char *rlimit_resources[] = { 434 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE", 435 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO", 436 "RTTIME", 437 }; 438 static DEFINE_STRARRAY(rlimit_resources); 439 440 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", }; 441 static DEFINE_STRARRAY(sighow); 442 443 static const char *clockid[] = { 444 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID", 445 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME", 446 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI" 447 }; 448 static DEFINE_STRARRAY(clockid); 449 450 static const char *socket_families[] = { 451 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM", 452 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI", 453 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC", 454 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC", 455 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF", 456 "ALG", "NFC", "VSOCK", 457 }; 458 static DEFINE_STRARRAY(socket_families); 459 460 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size, 461 struct syscall_arg *arg) 462 { 463 size_t printed = 0; 464 int mode = arg->val; 465 466 if (mode == F_OK) /* 0 */ 467 return scnprintf(bf, size, "F"); 468 #define P_MODE(n) \ 469 if (mode & n##_OK) { \ 470 printed += scnprintf(bf + printed, size - printed, "%s", #n); \ 471 mode &= ~n##_OK; \ 472 } 473 474 P_MODE(R); 475 P_MODE(W); 476 P_MODE(X); 477 #undef P_MODE 478 479 if (mode) 480 printed += scnprintf(bf + printed, size - printed, "|%#x", mode); 481 482 return printed; 483 } 484 485 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode 486 487 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size, 488 struct syscall_arg *arg); 489 490 #define SCA_FILENAME syscall_arg__scnprintf_filename 491 492 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size, 493 struct syscall_arg *arg) 494 { 495 int printed = 0, flags = arg->val; 496 497 #define P_FLAG(n) \ 498 if (flags & O_##n) { \ 499 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 500 flags &= ~O_##n; \ 501 } 502 503 P_FLAG(CLOEXEC); 504 P_FLAG(NONBLOCK); 505 #undef P_FLAG 506 507 if (flags) 508 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 509 510 return printed; 511 } 512 513 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags 514 515 #ifndef GRND_NONBLOCK 516 #define GRND_NONBLOCK 0x0001 517 #endif 518 #ifndef GRND_RANDOM 519 #define GRND_RANDOM 0x0002 520 #endif 521 522 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size, 523 struct syscall_arg *arg) 524 { 525 int printed = 0, flags = arg->val; 526 527 #define P_FLAG(n) \ 528 if (flags & GRND_##n) { \ 529 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 530 flags &= ~GRND_##n; \ 531 } 532 533 P_FLAG(RANDOM); 534 P_FLAG(NONBLOCK); 535 #undef P_FLAG 536 537 if (flags) 538 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 539 540 return printed; 541 } 542 543 #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags 544 545 #define STRARRAY(name, array) \ 546 { .scnprintf = SCA_STRARRAY, \ 547 .parm = &strarray__##array, } 548 549 #include "trace/beauty/arch_errno_names.c" 550 #include "trace/beauty/eventfd.c" 551 #include "trace/beauty/futex_op.c" 552 #include "trace/beauty/futex_val3.c" 553 #include "trace/beauty/mmap.c" 554 #include "trace/beauty/mode_t.c" 555 #include "trace/beauty/msg_flags.c" 556 #include "trace/beauty/open_flags.c" 557 #include "trace/beauty/perf_event_open.c" 558 #include "trace/beauty/pid.c" 559 #include "trace/beauty/sched_policy.c" 560 #include "trace/beauty/seccomp.c" 561 #include "trace/beauty/signum.c" 562 #include "trace/beauty/socket_type.c" 563 #include "trace/beauty/waitid_options.c" 564 565 struct syscall_arg_fmt { 566 size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg); 567 void *parm; 568 const char *name; 569 bool show_zero; 570 }; 571 572 static struct syscall_fmt { 573 const char *name; 574 const char *alias; 575 struct syscall_arg_fmt arg[6]; 576 u8 nr_args; 577 bool errpid; 578 bool timeout; 579 bool hexret; 580 } syscall_fmts[] = { 581 { .name = "access", 582 .arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, }, 583 { .name = "bpf", 584 .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, }, 585 { .name = "brk", .hexret = true, 586 .arg = { [0] = { .scnprintf = SCA_HEX, /* brk */ }, }, }, 587 { .name = "clock_gettime", 588 .arg = { [0] = STRARRAY(clk_id, clockid), }, }, 589 { .name = "clone", .errpid = true, .nr_args = 5, 590 .arg = { [0] = { .name = "flags", .scnprintf = SCA_CLONE_FLAGS, }, 591 [1] = { .name = "child_stack", .scnprintf = SCA_HEX, }, 592 [2] = { .name = "parent_tidptr", .scnprintf = SCA_HEX, }, 593 [3] = { .name = "child_tidptr", .scnprintf = SCA_HEX, }, 594 [4] = { .name = "tls", .scnprintf = SCA_HEX, }, }, }, 595 { .name = "close", 596 .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, }, 597 { .name = "epoll_ctl", 598 .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, }, 599 { .name = "eventfd2", 600 .arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, /* flags */ }, }, }, 601 { .name = "fchmodat", 602 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, 603 { .name = "fchownat", 604 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, 605 { .name = "fcntl", 606 .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD, /* cmd */ 607 .parm = &strarrays__fcntl_cmds_arrays, 608 .show_zero = true, }, 609 [2] = { .scnprintf = SCA_FCNTL_ARG, /* arg */ }, }, }, 610 { .name = "flock", 611 .arg = { [1] = { .scnprintf = SCA_FLOCK, /* cmd */ }, }, }, 612 { .name = "fstat", .alias = "newfstat", }, 613 { .name = "fstatat", .alias = "newfstatat", }, 614 { .name = "futex", 615 .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, /* op */ }, 616 [5] = { .scnprintf = SCA_FUTEX_VAL3, /* val3 */ }, }, }, 617 { .name = "futimesat", 618 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, 619 { .name = "getitimer", 620 .arg = { [0] = STRARRAY(which, itimers), }, }, 621 { .name = "getpid", .errpid = true, }, 622 { .name = "getpgid", .errpid = true, }, 623 { .name = "getppid", .errpid = true, }, 624 { .name = "getrandom", 625 .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, }, 626 { .name = "getrlimit", 627 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, }, 628 { .name = "gettid", .errpid = true, }, 629 { .name = "ioctl", 630 .arg = { 631 #if defined(__i386__) || defined(__x86_64__) 632 /* 633 * FIXME: Make this available to all arches. 634 */ 635 [1] = { .scnprintf = SCA_IOCTL_CMD, /* cmd */ }, 636 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, }, 637 #else 638 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, }, 639 #endif 640 { .name = "kcmp", .nr_args = 5, 641 .arg = { [0] = { .name = "pid1", .scnprintf = SCA_PID, }, 642 [1] = { .name = "pid2", .scnprintf = SCA_PID, }, 643 [2] = { .name = "type", .scnprintf = SCA_KCMP_TYPE, }, 644 [3] = { .name = "idx1", .scnprintf = SCA_KCMP_IDX, }, 645 [4] = { .name = "idx2", .scnprintf = SCA_KCMP_IDX, }, }, }, 646 { .name = "keyctl", 647 .arg = { [0] = STRARRAY(option, keyctl_options), }, }, 648 { .name = "kill", 649 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, 650 { .name = "linkat", 651 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, 652 { .name = "lseek", 653 .arg = { [2] = STRARRAY(whence, whences), }, }, 654 { .name = "lstat", .alias = "newlstat", }, 655 { .name = "madvise", 656 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ }, 657 [2] = { .scnprintf = SCA_MADV_BHV, /* behavior */ }, }, }, 658 { .name = "mkdirat", 659 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, 660 { .name = "mknodat", 661 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, 662 { .name = "mlock", 663 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, }, 664 { .name = "mlockall", 665 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, }, 666 { .name = "mmap", .hexret = true, 667 /* The standard mmap maps to old_mmap on s390x */ 668 #if defined(__s390x__) 669 .alias = "old_mmap", 670 #endif 671 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, 672 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, 673 [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ }, }, }, 674 { .name = "mprotect", 675 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ }, 676 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, }, 677 { .name = "mq_unlink", 678 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* u_name */ }, }, }, 679 { .name = "mremap", .hexret = true, 680 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, 681 [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ }, 682 [4] = { .scnprintf = SCA_HEX, /* new_addr */ }, }, }, 683 { .name = "munlock", 684 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, }, 685 { .name = "munmap", 686 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, }, 687 { .name = "name_to_handle_at", 688 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, 689 { .name = "newfstatat", 690 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, 691 { .name = "open", 692 .arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, }, 693 { .name = "open_by_handle_at", 694 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, 695 [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, }, 696 { .name = "openat", 697 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, 698 [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, }, 699 { .name = "perf_event_open", 700 .arg = { [2] = { .scnprintf = SCA_INT, /* cpu */ }, 701 [3] = { .scnprintf = SCA_FD, /* group_fd */ }, 702 [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, }, 703 { .name = "pipe2", 704 .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, /* flags */ }, }, }, 705 { .name = "pkey_alloc", 706 .arg = { [1] = { .scnprintf = SCA_PKEY_ALLOC_ACCESS_RIGHTS, /* access_rights */ }, }, }, 707 { .name = "pkey_free", 708 .arg = { [0] = { .scnprintf = SCA_INT, /* key */ }, }, }, 709 { .name = "pkey_mprotect", 710 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ }, 711 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, 712 [3] = { .scnprintf = SCA_INT, /* pkey */ }, }, }, 713 { .name = "poll", .timeout = true, }, 714 { .name = "ppoll", .timeout = true, }, 715 { .name = "prctl", .alias = "arch_prctl", 716 .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, /* option */ }, 717 [1] = { .scnprintf = SCA_PRCTL_ARG2, /* arg2 */ }, 718 [2] = { .scnprintf = SCA_PRCTL_ARG3, /* arg3 */ }, }, }, 719 { .name = "pread", .alias = "pread64", }, 720 { .name = "preadv", .alias = "pread", }, 721 { .name = "prlimit64", 722 .arg = { [1] = STRARRAY(resource, rlimit_resources), }, }, 723 { .name = "pwrite", .alias = "pwrite64", }, 724 { .name = "readlinkat", 725 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, 726 { .name = "recvfrom", 727 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, 728 { .name = "recvmmsg", 729 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, 730 { .name = "recvmsg", 731 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, 732 { .name = "renameat", 733 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, 734 { .name = "rt_sigaction", 735 .arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, 736 { .name = "rt_sigprocmask", 737 .arg = { [0] = STRARRAY(how, sighow), }, }, 738 { .name = "rt_sigqueueinfo", 739 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, 740 { .name = "rt_tgsigqueueinfo", 741 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, 742 { .name = "sched_setscheduler", 743 .arg = { [1] = { .scnprintf = SCA_SCHED_POLICY, /* policy */ }, }, }, 744 { .name = "seccomp", 745 .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP, /* op */ }, 746 [1] = { .scnprintf = SCA_SECCOMP_FLAGS, /* flags */ }, }, }, 747 { .name = "select", .timeout = true, }, 748 { .name = "sendmmsg", 749 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, 750 { .name = "sendmsg", 751 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, 752 { .name = "sendto", 753 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, 754 { .name = "set_tid_address", .errpid = true, }, 755 { .name = "setitimer", 756 .arg = { [0] = STRARRAY(which, itimers), }, }, 757 { .name = "setrlimit", 758 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, }, 759 { .name = "socket", 760 .arg = { [0] = STRARRAY(family, socket_families), 761 [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, }, 762 { .name = "socketpair", 763 .arg = { [0] = STRARRAY(family, socket_families), 764 [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, }, 765 { .name = "stat", .alias = "newstat", }, 766 { .name = "statx", 767 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fdat */ }, 768 [2] = { .scnprintf = SCA_STATX_FLAGS, /* flags */ } , 769 [3] = { .scnprintf = SCA_STATX_MASK, /* mask */ }, }, }, 770 { .name = "swapoff", 771 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, }, 772 { .name = "swapon", 773 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, }, 774 { .name = "symlinkat", 775 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, 776 { .name = "tgkill", 777 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, 778 { .name = "tkill", 779 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, 780 { .name = "uname", .alias = "newuname", }, 781 { .name = "unlinkat", 782 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, 783 { .name = "utimensat", 784 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ }, }, }, 785 { .name = "wait4", .errpid = true, 786 .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, }, 787 { .name = "waitid", .errpid = true, 788 .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, }, 789 }; 790 791 static int syscall_fmt__cmp(const void *name, const void *fmtp) 792 { 793 const struct syscall_fmt *fmt = fmtp; 794 return strcmp(name, fmt->name); 795 } 796 797 static struct syscall_fmt *syscall_fmt__find(const char *name) 798 { 799 const int nmemb = ARRAY_SIZE(syscall_fmts); 800 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp); 801 } 802 803 struct syscall { 804 struct event_format *tp_format; 805 int nr_args; 806 struct format_field *args; 807 const char *name; 808 bool is_exit; 809 struct syscall_fmt *fmt; 810 struct syscall_arg_fmt *arg_fmt; 811 }; 812 813 /* 814 * We need to have this 'calculated' boolean because in some cases we really 815 * don't know what is the duration of a syscall, for instance, when we start 816 * a session and some threads are waiting for a syscall to finish, say 'poll', 817 * in which case all we can do is to print "( ? ) for duration and for the 818 * start timestamp. 819 */ 820 static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp) 821 { 822 double duration = (double)t / NSEC_PER_MSEC; 823 size_t printed = fprintf(fp, "("); 824 825 if (!calculated) 826 printed += fprintf(fp, " "); 827 else if (duration >= 1.0) 828 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration); 829 else if (duration >= 0.01) 830 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration); 831 else 832 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration); 833 return printed + fprintf(fp, "): "); 834 } 835 836 /** 837 * filename.ptr: The filename char pointer that will be vfs_getname'd 838 * filename.entry_str_pos: Where to insert the string translated from 839 * filename.ptr by the vfs_getname tracepoint/kprobe. 840 * ret_scnprintf: syscall args may set this to a different syscall return 841 * formatter, for instance, fcntl may return fds, file flags, etc. 842 */ 843 struct thread_trace { 844 u64 entry_time; 845 bool entry_pending; 846 unsigned long nr_events; 847 unsigned long pfmaj, pfmin; 848 char *entry_str; 849 double runtime_ms; 850 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg); 851 struct { 852 unsigned long ptr; 853 short int entry_str_pos; 854 bool pending_open; 855 unsigned int namelen; 856 char *name; 857 } filename; 858 struct { 859 int max; 860 char **table; 861 } paths; 862 863 struct intlist *syscall_stats; 864 }; 865 866 static struct thread_trace *thread_trace__new(void) 867 { 868 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace)); 869 870 if (ttrace) 871 ttrace->paths.max = -1; 872 873 ttrace->syscall_stats = intlist__new(NULL); 874 875 return ttrace; 876 } 877 878 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp) 879 { 880 struct thread_trace *ttrace; 881 882 if (thread == NULL) 883 goto fail; 884 885 if (thread__priv(thread) == NULL) 886 thread__set_priv(thread, thread_trace__new()); 887 888 if (thread__priv(thread) == NULL) 889 goto fail; 890 891 ttrace = thread__priv(thread); 892 ++ttrace->nr_events; 893 894 return ttrace; 895 fail: 896 color_fprintf(fp, PERF_COLOR_RED, 897 "WARNING: not enough memory, dropping samples!\n"); 898 return NULL; 899 } 900 901 902 void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg, 903 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg)) 904 { 905 struct thread_trace *ttrace = thread__priv(arg->thread); 906 907 ttrace->ret_scnprintf = ret_scnprintf; 908 } 909 910 #define TRACE_PFMAJ (1 << 0) 911 #define TRACE_PFMIN (1 << 1) 912 913 static const size_t trace__entry_str_size = 2048; 914 915 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname) 916 { 917 struct thread_trace *ttrace = thread__priv(thread); 918 919 if (fd > ttrace->paths.max) { 920 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *)); 921 922 if (npath == NULL) 923 return -1; 924 925 if (ttrace->paths.max != -1) { 926 memset(npath + ttrace->paths.max + 1, 0, 927 (fd - ttrace->paths.max) * sizeof(char *)); 928 } else { 929 memset(npath, 0, (fd + 1) * sizeof(char *)); 930 } 931 932 ttrace->paths.table = npath; 933 ttrace->paths.max = fd; 934 } 935 936 ttrace->paths.table[fd] = strdup(pathname); 937 938 return ttrace->paths.table[fd] != NULL ? 0 : -1; 939 } 940 941 static int thread__read_fd_path(struct thread *thread, int fd) 942 { 943 char linkname[PATH_MAX], pathname[PATH_MAX]; 944 struct stat st; 945 int ret; 946 947 if (thread->pid_ == thread->tid) { 948 scnprintf(linkname, sizeof(linkname), 949 "/proc/%d/fd/%d", thread->pid_, fd); 950 } else { 951 scnprintf(linkname, sizeof(linkname), 952 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd); 953 } 954 955 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname)) 956 return -1; 957 958 ret = readlink(linkname, pathname, sizeof(pathname)); 959 960 if (ret < 0 || ret > st.st_size) 961 return -1; 962 963 pathname[ret] = '\0'; 964 return trace__set_fd_pathname(thread, fd, pathname); 965 } 966 967 static const char *thread__fd_path(struct thread *thread, int fd, 968 struct trace *trace) 969 { 970 struct thread_trace *ttrace = thread__priv(thread); 971 972 if (ttrace == NULL) 973 return NULL; 974 975 if (fd < 0) 976 return NULL; 977 978 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) { 979 if (!trace->live) 980 return NULL; 981 ++trace->stats.proc_getname; 982 if (thread__read_fd_path(thread, fd)) 983 return NULL; 984 } 985 986 return ttrace->paths.table[fd]; 987 } 988 989 size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg) 990 { 991 int fd = arg->val; 992 size_t printed = scnprintf(bf, size, "%d", fd); 993 const char *path = thread__fd_path(arg->thread, fd, arg->trace); 994 995 if (path) 996 printed += scnprintf(bf + printed, size - printed, "<%s>", path); 997 998 return printed; 999 } 1000 1001 size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size) 1002 { 1003 size_t printed = scnprintf(bf, size, "%d", fd); 1004 struct thread *thread = machine__find_thread(trace->host, pid, pid); 1005 1006 if (thread) { 1007 const char *path = thread__fd_path(thread, fd, trace); 1008 1009 if (path) 1010 printed += scnprintf(bf + printed, size - printed, "<%s>", path); 1011 1012 thread__put(thread); 1013 } 1014 1015 return printed; 1016 } 1017 1018 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, 1019 struct syscall_arg *arg) 1020 { 1021 int fd = arg->val; 1022 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg); 1023 struct thread_trace *ttrace = thread__priv(arg->thread); 1024 1025 if (ttrace && fd >= 0 && fd <= ttrace->paths.max) 1026 zfree(&ttrace->paths.table[fd]); 1027 1028 return printed; 1029 } 1030 1031 static void thread__set_filename_pos(struct thread *thread, const char *bf, 1032 unsigned long ptr) 1033 { 1034 struct thread_trace *ttrace = thread__priv(thread); 1035 1036 ttrace->filename.ptr = ptr; 1037 ttrace->filename.entry_str_pos = bf - ttrace->entry_str; 1038 } 1039 1040 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size, 1041 struct syscall_arg *arg) 1042 { 1043 unsigned long ptr = arg->val; 1044 1045 if (!arg->trace->vfs_getname) 1046 return scnprintf(bf, size, "%#x", ptr); 1047 1048 thread__set_filename_pos(arg->thread, bf, ptr); 1049 return 0; 1050 } 1051 1052 static bool trace__filter_duration(struct trace *trace, double t) 1053 { 1054 return t < (trace->duration_filter * NSEC_PER_MSEC); 1055 } 1056 1057 static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) 1058 { 1059 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC; 1060 1061 return fprintf(fp, "%10.3f ", ts); 1062 } 1063 1064 /* 1065 * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are 1066 * using ttrace->entry_time for a thread that receives a sys_exit without 1067 * first having received a sys_enter ("poll" issued before tracing session 1068 * starts, lost sys_enter exit due to ring buffer overflow). 1069 */ 1070 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) 1071 { 1072 if (tstamp > 0) 1073 return __trace__fprintf_tstamp(trace, tstamp, fp); 1074 1075 return fprintf(fp, " ? "); 1076 } 1077 1078 static bool done = false; 1079 static bool interrupted = false; 1080 1081 static void sig_handler(int sig) 1082 { 1083 done = true; 1084 interrupted = sig == SIGINT; 1085 } 1086 1087 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread, 1088 u64 duration, bool duration_calculated, u64 tstamp, FILE *fp) 1089 { 1090 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp); 1091 printed += fprintf_duration(duration, duration_calculated, fp); 1092 1093 if (trace->multiple_threads) { 1094 if (trace->show_comm) 1095 printed += fprintf(fp, "%.14s/", thread__comm_str(thread)); 1096 printed += fprintf(fp, "%d ", thread->tid); 1097 } 1098 1099 return printed; 1100 } 1101 1102 static int trace__process_event(struct trace *trace, struct machine *machine, 1103 union perf_event *event, struct perf_sample *sample) 1104 { 1105 int ret = 0; 1106 1107 switch (event->header.type) { 1108 case PERF_RECORD_LOST: 1109 color_fprintf(trace->output, PERF_COLOR_RED, 1110 "LOST %" PRIu64 " events!\n", event->lost.lost); 1111 ret = machine__process_lost_event(machine, event, sample); 1112 break; 1113 default: 1114 ret = machine__process_event(machine, event, sample); 1115 break; 1116 } 1117 1118 return ret; 1119 } 1120 1121 static int trace__tool_process(struct perf_tool *tool, 1122 union perf_event *event, 1123 struct perf_sample *sample, 1124 struct machine *machine) 1125 { 1126 struct trace *trace = container_of(tool, struct trace, tool); 1127 return trace__process_event(trace, machine, event, sample); 1128 } 1129 1130 static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp) 1131 { 1132 struct machine *machine = vmachine; 1133 1134 if (machine->kptr_restrict_warned) 1135 return NULL; 1136 1137 if (symbol_conf.kptr_restrict) { 1138 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n" 1139 "Check /proc/sys/kernel/kptr_restrict.\n\n" 1140 "Kernel samples will not be resolved.\n"); 1141 machine->kptr_restrict_warned = true; 1142 return NULL; 1143 } 1144 1145 return machine__resolve_kernel_addr(vmachine, addrp, modp); 1146 } 1147 1148 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) 1149 { 1150 int err = symbol__init(NULL); 1151 1152 if (err) 1153 return err; 1154 1155 trace->host = machine__new_host(); 1156 if (trace->host == NULL) 1157 return -ENOMEM; 1158 1159 err = trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr); 1160 if (err < 0) 1161 goto out; 1162 1163 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, 1164 evlist->threads, trace__tool_process, false, 1165 trace->opts.proc_map_timeout, 1); 1166 out: 1167 if (err) 1168 symbol__exit(); 1169 1170 return err; 1171 } 1172 1173 static void trace__symbols__exit(struct trace *trace) 1174 { 1175 machine__exit(trace->host); 1176 trace->host = NULL; 1177 1178 symbol__exit(); 1179 } 1180 1181 static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args) 1182 { 1183 int idx; 1184 1185 if (nr_args == 6 && sc->fmt && sc->fmt->nr_args != 0) 1186 nr_args = sc->fmt->nr_args; 1187 1188 sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt)); 1189 if (sc->arg_fmt == NULL) 1190 return -1; 1191 1192 for (idx = 0; idx < nr_args; ++idx) { 1193 if (sc->fmt) 1194 sc->arg_fmt[idx] = sc->fmt->arg[idx]; 1195 } 1196 1197 sc->nr_args = nr_args; 1198 return 0; 1199 } 1200 1201 static int syscall__set_arg_fmts(struct syscall *sc) 1202 { 1203 struct format_field *field; 1204 int idx = 0, len; 1205 1206 for (field = sc->args; field; field = field->next, ++idx) { 1207 if (sc->fmt && sc->fmt->arg[idx].scnprintf) 1208 continue; 1209 1210 if (strcmp(field->type, "const char *") == 0 && 1211 (strcmp(field->name, "filename") == 0 || 1212 strcmp(field->name, "path") == 0 || 1213 strcmp(field->name, "pathname") == 0)) 1214 sc->arg_fmt[idx].scnprintf = SCA_FILENAME; 1215 else if (field->flags & FIELD_IS_POINTER) 1216 sc->arg_fmt[idx].scnprintf = syscall_arg__scnprintf_hex; 1217 else if (strcmp(field->type, "pid_t") == 0) 1218 sc->arg_fmt[idx].scnprintf = SCA_PID; 1219 else if (strcmp(field->type, "umode_t") == 0) 1220 sc->arg_fmt[idx].scnprintf = SCA_MODE_T; 1221 else if ((strcmp(field->type, "int") == 0 || 1222 strcmp(field->type, "unsigned int") == 0 || 1223 strcmp(field->type, "long") == 0) && 1224 (len = strlen(field->name)) >= 2 && 1225 strcmp(field->name + len - 2, "fd") == 0) { 1226 /* 1227 * /sys/kernel/tracing/events/syscalls/sys_enter* 1228 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c 1229 * 65 int 1230 * 23 unsigned int 1231 * 7 unsigned long 1232 */ 1233 sc->arg_fmt[idx].scnprintf = SCA_FD; 1234 } 1235 } 1236 1237 return 0; 1238 } 1239 1240 static int trace__read_syscall_info(struct trace *trace, int id) 1241 { 1242 char tp_name[128]; 1243 struct syscall *sc; 1244 const char *name = syscalltbl__name(trace->sctbl, id); 1245 1246 if (name == NULL) 1247 return -1; 1248 1249 if (id > trace->syscalls.max) { 1250 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc)); 1251 1252 if (nsyscalls == NULL) 1253 return -1; 1254 1255 if (trace->syscalls.max != -1) { 1256 memset(nsyscalls + trace->syscalls.max + 1, 0, 1257 (id - trace->syscalls.max) * sizeof(*sc)); 1258 } else { 1259 memset(nsyscalls, 0, (id + 1) * sizeof(*sc)); 1260 } 1261 1262 trace->syscalls.table = nsyscalls; 1263 trace->syscalls.max = id; 1264 } 1265 1266 sc = trace->syscalls.table + id; 1267 sc->name = name; 1268 1269 sc->fmt = syscall_fmt__find(sc->name); 1270 1271 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); 1272 sc->tp_format = trace_event__tp_format("syscalls", tp_name); 1273 1274 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) { 1275 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias); 1276 sc->tp_format = trace_event__tp_format("syscalls", tp_name); 1277 } 1278 1279 if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields)) 1280 return -1; 1281 1282 if (IS_ERR(sc->tp_format)) 1283 return -1; 1284 1285 sc->args = sc->tp_format->format.fields; 1286 /* 1287 * We need to check and discard the first variable '__syscall_nr' 1288 * or 'nr' that mean the syscall number. It is needless here. 1289 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels. 1290 */ 1291 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) { 1292 sc->args = sc->args->next; 1293 --sc->nr_args; 1294 } 1295 1296 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit"); 1297 1298 return syscall__set_arg_fmts(sc); 1299 } 1300 1301 static int trace__validate_ev_qualifier(struct trace *trace) 1302 { 1303 int err = 0, i; 1304 size_t nr_allocated; 1305 struct str_node *pos; 1306 1307 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier); 1308 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr * 1309 sizeof(trace->ev_qualifier_ids.entries[0])); 1310 1311 if (trace->ev_qualifier_ids.entries == NULL) { 1312 fputs("Error:\tNot enough memory for allocating events qualifier ids\n", 1313 trace->output); 1314 err = -EINVAL; 1315 goto out; 1316 } 1317 1318 nr_allocated = trace->ev_qualifier_ids.nr; 1319 i = 0; 1320 1321 strlist__for_each_entry(pos, trace->ev_qualifier) { 1322 const char *sc = pos->s; 1323 int id = syscalltbl__id(trace->sctbl, sc), match_next = -1; 1324 1325 if (id < 0) { 1326 id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next); 1327 if (id >= 0) 1328 goto matches; 1329 1330 if (err == 0) { 1331 fputs("Error:\tInvalid syscall ", trace->output); 1332 err = -EINVAL; 1333 } else { 1334 fputs(", ", trace->output); 1335 } 1336 1337 fputs(sc, trace->output); 1338 } 1339 matches: 1340 trace->ev_qualifier_ids.entries[i++] = id; 1341 if (match_next == -1) 1342 continue; 1343 1344 while (1) { 1345 id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next); 1346 if (id < 0) 1347 break; 1348 if (nr_allocated == trace->ev_qualifier_ids.nr) { 1349 void *entries; 1350 1351 nr_allocated += 8; 1352 entries = realloc(trace->ev_qualifier_ids.entries, 1353 nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0])); 1354 if (entries == NULL) { 1355 err = -ENOMEM; 1356 fputs("\nError:\t Not enough memory for parsing\n", trace->output); 1357 goto out_free; 1358 } 1359 trace->ev_qualifier_ids.entries = entries; 1360 } 1361 trace->ev_qualifier_ids.nr++; 1362 trace->ev_qualifier_ids.entries[i++] = id; 1363 } 1364 } 1365 1366 if (err < 0) { 1367 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'" 1368 "\nHint:\tand: 'man syscalls'\n", trace->output); 1369 out_free: 1370 zfree(&trace->ev_qualifier_ids.entries); 1371 trace->ev_qualifier_ids.nr = 0; 1372 } 1373 out: 1374 return err; 1375 } 1376 1377 /* 1378 * args is to be interpreted as a series of longs but we need to handle 1379 * 8-byte unaligned accesses. args points to raw_data within the event 1380 * and raw_data is guaranteed to be 8-byte unaligned because it is 1381 * preceded by raw_size which is a u32. So we need to copy args to a temp 1382 * variable to read it. Most notably this avoids extended load instructions 1383 * on unaligned addresses 1384 */ 1385 unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx) 1386 { 1387 unsigned long val; 1388 unsigned char *p = arg->args + sizeof(unsigned long) * idx; 1389 1390 memcpy(&val, p, sizeof(val)); 1391 return val; 1392 } 1393 1394 static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size, 1395 struct syscall_arg *arg) 1396 { 1397 if (sc->arg_fmt && sc->arg_fmt[arg->idx].name) 1398 return scnprintf(bf, size, "%s: ", sc->arg_fmt[arg->idx].name); 1399 1400 return scnprintf(bf, size, "arg%d: ", arg->idx); 1401 } 1402 1403 static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size, 1404 struct syscall_arg *arg, unsigned long val) 1405 { 1406 if (sc->arg_fmt && sc->arg_fmt[arg->idx].scnprintf) { 1407 arg->val = val; 1408 if (sc->arg_fmt[arg->idx].parm) 1409 arg->parm = sc->arg_fmt[arg->idx].parm; 1410 return sc->arg_fmt[arg->idx].scnprintf(bf, size, arg); 1411 } 1412 return scnprintf(bf, size, "%ld", val); 1413 } 1414 1415 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, 1416 unsigned char *args, struct trace *trace, 1417 struct thread *thread) 1418 { 1419 size_t printed = 0; 1420 unsigned long val; 1421 u8 bit = 1; 1422 struct syscall_arg arg = { 1423 .args = args, 1424 .idx = 0, 1425 .mask = 0, 1426 .trace = trace, 1427 .thread = thread, 1428 }; 1429 struct thread_trace *ttrace = thread__priv(thread); 1430 1431 /* 1432 * Things like fcntl will set this in its 'cmd' formatter to pick the 1433 * right formatter for the return value (an fd? file flags?), which is 1434 * not needed for syscalls that always return a given type, say an fd. 1435 */ 1436 ttrace->ret_scnprintf = NULL; 1437 1438 if (sc->args != NULL) { 1439 struct format_field *field; 1440 1441 for (field = sc->args; field; 1442 field = field->next, ++arg.idx, bit <<= 1) { 1443 if (arg.mask & bit) 1444 continue; 1445 1446 val = syscall_arg__val(&arg, arg.idx); 1447 1448 /* 1449 * Suppress this argument if its value is zero and 1450 * and we don't have a string associated in an 1451 * strarray for it. 1452 */ 1453 if (val == 0 && 1454 !(sc->arg_fmt && 1455 (sc->arg_fmt[arg.idx].show_zero || 1456 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY || 1457 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAYS) && 1458 sc->arg_fmt[arg.idx].parm)) 1459 continue; 1460 1461 printed += scnprintf(bf + printed, size - printed, 1462 "%s%s: ", printed ? ", " : "", field->name); 1463 printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val); 1464 } 1465 } else if (IS_ERR(sc->tp_format)) { 1466 /* 1467 * If we managed to read the tracepoint /format file, then we 1468 * may end up not having any args, like with gettid(), so only 1469 * print the raw args when we didn't manage to read it. 1470 */ 1471 while (arg.idx < sc->nr_args) { 1472 if (arg.mask & bit) 1473 goto next_arg; 1474 val = syscall_arg__val(&arg, arg.idx); 1475 if (printed) 1476 printed += scnprintf(bf + printed, size - printed, ", "); 1477 printed += syscall__scnprintf_name(sc, bf + printed, size - printed, &arg); 1478 printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val); 1479 next_arg: 1480 ++arg.idx; 1481 bit <<= 1; 1482 } 1483 } 1484 1485 return printed; 1486 } 1487 1488 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel, 1489 union perf_event *event, 1490 struct perf_sample *sample); 1491 1492 static struct syscall *trace__syscall_info(struct trace *trace, 1493 struct perf_evsel *evsel, int id) 1494 { 1495 1496 if (id < 0) { 1497 1498 /* 1499 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried 1500 * before that, leaving at a higher verbosity level till that is 1501 * explained. Reproduced with plain ftrace with: 1502 * 1503 * echo 1 > /t/events/raw_syscalls/sys_exit/enable 1504 * grep "NR -1 " /t/trace_pipe 1505 * 1506 * After generating some load on the machine. 1507 */ 1508 if (verbose > 1) { 1509 static u64 n; 1510 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n", 1511 id, perf_evsel__name(evsel), ++n); 1512 } 1513 return NULL; 1514 } 1515 1516 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) && 1517 trace__read_syscall_info(trace, id)) 1518 goto out_cant_read; 1519 1520 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL)) 1521 goto out_cant_read; 1522 1523 return &trace->syscalls.table[id]; 1524 1525 out_cant_read: 1526 if (verbose > 0) { 1527 fprintf(trace->output, "Problems reading syscall %d", id); 1528 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL) 1529 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name); 1530 fputs(" information\n", trace->output); 1531 } 1532 return NULL; 1533 } 1534 1535 static void thread__update_stats(struct thread_trace *ttrace, 1536 int id, struct perf_sample *sample) 1537 { 1538 struct int_node *inode; 1539 struct stats *stats; 1540 u64 duration = 0; 1541 1542 inode = intlist__findnew(ttrace->syscall_stats, id); 1543 if (inode == NULL) 1544 return; 1545 1546 stats = inode->priv; 1547 if (stats == NULL) { 1548 stats = malloc(sizeof(struct stats)); 1549 if (stats == NULL) 1550 return; 1551 init_stats(stats); 1552 inode->priv = stats; 1553 } 1554 1555 if (ttrace->entry_time && sample->time > ttrace->entry_time) 1556 duration = sample->time - ttrace->entry_time; 1557 1558 update_stats(stats, duration); 1559 } 1560 1561 static int trace__printf_interrupted_entry(struct trace *trace) 1562 { 1563 struct thread_trace *ttrace; 1564 size_t printed; 1565 1566 if (trace->current == NULL) 1567 return 0; 1568 1569 ttrace = thread__priv(trace->current); 1570 1571 if (!ttrace->entry_pending) 1572 return 0; 1573 1574 printed = trace__fprintf_entry_head(trace, trace->current, 0, false, ttrace->entry_time, trace->output); 1575 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str); 1576 ttrace->entry_pending = false; 1577 1578 return printed; 1579 } 1580 1581 static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel, 1582 struct perf_sample *sample, struct thread *thread) 1583 { 1584 int printed = 0; 1585 1586 if (trace->print_sample) { 1587 double ts = (double)sample->time / NSEC_PER_MSEC; 1588 1589 printed += fprintf(trace->output, "%22s %10.3f %s %d/%d [%d]\n", 1590 perf_evsel__name(evsel), ts, 1591 thread__comm_str(thread), 1592 sample->pid, sample->tid, sample->cpu); 1593 } 1594 1595 return printed; 1596 } 1597 1598 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, 1599 union perf_event *event __maybe_unused, 1600 struct perf_sample *sample) 1601 { 1602 char *msg; 1603 void *args; 1604 size_t printed = 0; 1605 struct thread *thread; 1606 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1; 1607 struct syscall *sc = trace__syscall_info(trace, evsel, id); 1608 struct thread_trace *ttrace; 1609 1610 if (sc == NULL) 1611 return -1; 1612 1613 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1614 ttrace = thread__trace(thread, trace->output); 1615 if (ttrace == NULL) 1616 goto out_put; 1617 1618 trace__fprintf_sample(trace, evsel, sample, thread); 1619 1620 args = perf_evsel__sc_tp_ptr(evsel, args, sample); 1621 1622 if (ttrace->entry_str == NULL) { 1623 ttrace->entry_str = malloc(trace__entry_str_size); 1624 if (!ttrace->entry_str) 1625 goto out_put; 1626 } 1627 1628 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) 1629 trace__printf_interrupted_entry(trace); 1630 1631 ttrace->entry_time = sample->time; 1632 msg = ttrace->entry_str; 1633 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name); 1634 1635 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed, 1636 args, trace, thread); 1637 1638 if (sc->is_exit) { 1639 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) { 1640 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output); 1641 fprintf(trace->output, "%-70s)\n", ttrace->entry_str); 1642 } 1643 } else { 1644 ttrace->entry_pending = true; 1645 /* See trace__vfs_getname & trace__sys_exit */ 1646 ttrace->filename.pending_open = false; 1647 } 1648 1649 if (trace->current != thread) { 1650 thread__put(trace->current); 1651 trace->current = thread__get(thread); 1652 } 1653 err = 0; 1654 out_put: 1655 thread__put(thread); 1656 return err; 1657 } 1658 1659 static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel, 1660 struct perf_sample *sample, 1661 struct callchain_cursor *cursor) 1662 { 1663 struct addr_location al; 1664 1665 if (machine__resolve(trace->host, &al, sample) < 0 || 1666 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, evsel->attr.sample_max_stack)) 1667 return -1; 1668 1669 return 0; 1670 } 1671 1672 static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample) 1673 { 1674 /* TODO: user-configurable print_opts */ 1675 const unsigned int print_opts = EVSEL__PRINT_SYM | 1676 EVSEL__PRINT_DSO | 1677 EVSEL__PRINT_UNKNOWN_AS_ADDR; 1678 1679 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output); 1680 } 1681 1682 static const char *errno_to_name(struct perf_evsel *evsel, int err) 1683 { 1684 struct perf_env *env = perf_evsel__env(evsel); 1685 const char *arch_name = perf_env__arch(env); 1686 1687 return arch_syscalls__strerrno(arch_name, err); 1688 } 1689 1690 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, 1691 union perf_event *event __maybe_unused, 1692 struct perf_sample *sample) 1693 { 1694 long ret; 1695 u64 duration = 0; 1696 bool duration_calculated = false; 1697 struct thread *thread; 1698 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0; 1699 struct syscall *sc = trace__syscall_info(trace, evsel, id); 1700 struct thread_trace *ttrace; 1701 1702 if (sc == NULL) 1703 return -1; 1704 1705 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1706 ttrace = thread__trace(thread, trace->output); 1707 if (ttrace == NULL) 1708 goto out_put; 1709 1710 trace__fprintf_sample(trace, evsel, sample, thread); 1711 1712 if (trace->summary) 1713 thread__update_stats(ttrace, id, sample); 1714 1715 ret = perf_evsel__sc_tp_uint(evsel, ret, sample); 1716 1717 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) { 1718 trace__set_fd_pathname(thread, ret, ttrace->filename.name); 1719 ttrace->filename.pending_open = false; 1720 ++trace->stats.vfs_getname; 1721 } 1722 1723 if (ttrace->entry_time) { 1724 duration = sample->time - ttrace->entry_time; 1725 if (trace__filter_duration(trace, duration)) 1726 goto out; 1727 duration_calculated = true; 1728 } else if (trace->duration_filter) 1729 goto out; 1730 1731 if (sample->callchain) { 1732 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor); 1733 if (callchain_ret == 0) { 1734 if (callchain_cursor.nr < trace->min_stack) 1735 goto out; 1736 callchain_ret = 1; 1737 } 1738 } 1739 1740 if (trace->summary_only) 1741 goto out; 1742 1743 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output); 1744 1745 if (ttrace->entry_pending) { 1746 fprintf(trace->output, "%-70s", ttrace->entry_str); 1747 } else { 1748 fprintf(trace->output, " ... ["); 1749 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued"); 1750 fprintf(trace->output, "]: %s()", sc->name); 1751 } 1752 1753 if (sc->fmt == NULL) { 1754 if (ret < 0) 1755 goto errno_print; 1756 signed_print: 1757 fprintf(trace->output, ") = %ld", ret); 1758 } else if (ret < 0) { 1759 errno_print: { 1760 char bf[STRERR_BUFSIZE]; 1761 const char *emsg = str_error_r(-ret, bf, sizeof(bf)), 1762 *e = errno_to_name(evsel, -ret); 1763 1764 fprintf(trace->output, ") = -1 %s %s", e, emsg); 1765 } 1766 } else if (ret == 0 && sc->fmt->timeout) 1767 fprintf(trace->output, ") = 0 Timeout"); 1768 else if (ttrace->ret_scnprintf) { 1769 char bf[1024]; 1770 struct syscall_arg arg = { 1771 .val = ret, 1772 .thread = thread, 1773 .trace = trace, 1774 }; 1775 ttrace->ret_scnprintf(bf, sizeof(bf), &arg); 1776 ttrace->ret_scnprintf = NULL; 1777 fprintf(trace->output, ") = %s", bf); 1778 } else if (sc->fmt->hexret) 1779 fprintf(trace->output, ") = %#lx", ret); 1780 else if (sc->fmt->errpid) { 1781 struct thread *child = machine__find_thread(trace->host, ret, ret); 1782 1783 if (child != NULL) { 1784 fprintf(trace->output, ") = %ld", ret); 1785 if (child->comm_set) 1786 fprintf(trace->output, " (%s)", thread__comm_str(child)); 1787 thread__put(child); 1788 } 1789 } else 1790 goto signed_print; 1791 1792 fputc('\n', trace->output); 1793 1794 if (callchain_ret > 0) 1795 trace__fprintf_callchain(trace, sample); 1796 else if (callchain_ret < 0) 1797 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); 1798 out: 1799 ttrace->entry_pending = false; 1800 err = 0; 1801 out_put: 1802 thread__put(thread); 1803 return err; 1804 } 1805 1806 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, 1807 union perf_event *event __maybe_unused, 1808 struct perf_sample *sample) 1809 { 1810 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1811 struct thread_trace *ttrace; 1812 size_t filename_len, entry_str_len, to_move; 1813 ssize_t remaining_space; 1814 char *pos; 1815 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname"); 1816 1817 if (!thread) 1818 goto out; 1819 1820 ttrace = thread__priv(thread); 1821 if (!ttrace) 1822 goto out_put; 1823 1824 filename_len = strlen(filename); 1825 if (filename_len == 0) 1826 goto out_put; 1827 1828 if (ttrace->filename.namelen < filename_len) { 1829 char *f = realloc(ttrace->filename.name, filename_len + 1); 1830 1831 if (f == NULL) 1832 goto out_put; 1833 1834 ttrace->filename.namelen = filename_len; 1835 ttrace->filename.name = f; 1836 } 1837 1838 strcpy(ttrace->filename.name, filename); 1839 ttrace->filename.pending_open = true; 1840 1841 if (!ttrace->filename.ptr) 1842 goto out_put; 1843 1844 entry_str_len = strlen(ttrace->entry_str); 1845 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */ 1846 if (remaining_space <= 0) 1847 goto out_put; 1848 1849 if (filename_len > (size_t)remaining_space) { 1850 filename += filename_len - remaining_space; 1851 filename_len = remaining_space; 1852 } 1853 1854 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */ 1855 pos = ttrace->entry_str + ttrace->filename.entry_str_pos; 1856 memmove(pos + filename_len, pos, to_move); 1857 memcpy(pos, filename, filename_len); 1858 1859 ttrace->filename.ptr = 0; 1860 ttrace->filename.entry_str_pos = 0; 1861 out_put: 1862 thread__put(thread); 1863 out: 1864 return 0; 1865 } 1866 1867 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel, 1868 union perf_event *event __maybe_unused, 1869 struct perf_sample *sample) 1870 { 1871 u64 runtime = perf_evsel__intval(evsel, sample, "runtime"); 1872 double runtime_ms = (double)runtime / NSEC_PER_MSEC; 1873 struct thread *thread = machine__findnew_thread(trace->host, 1874 sample->pid, 1875 sample->tid); 1876 struct thread_trace *ttrace = thread__trace(thread, trace->output); 1877 1878 if (ttrace == NULL) 1879 goto out_dump; 1880 1881 ttrace->runtime_ms += runtime_ms; 1882 trace->runtime_ms += runtime_ms; 1883 out_put: 1884 thread__put(thread); 1885 return 0; 1886 1887 out_dump: 1888 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n", 1889 evsel->name, 1890 perf_evsel__strval(evsel, sample, "comm"), 1891 (pid_t)perf_evsel__intval(evsel, sample, "pid"), 1892 runtime, 1893 perf_evsel__intval(evsel, sample, "vruntime")); 1894 goto out_put; 1895 } 1896 1897 static int bpf_output__printer(enum binary_printer_ops op, 1898 unsigned int val, void *extra __maybe_unused, FILE *fp) 1899 { 1900 unsigned char ch = (unsigned char)val; 1901 1902 switch (op) { 1903 case BINARY_PRINT_CHAR_DATA: 1904 return fprintf(fp, "%c", isprint(ch) ? ch : '.'); 1905 case BINARY_PRINT_DATA_BEGIN: 1906 case BINARY_PRINT_LINE_BEGIN: 1907 case BINARY_PRINT_ADDR: 1908 case BINARY_PRINT_NUM_DATA: 1909 case BINARY_PRINT_NUM_PAD: 1910 case BINARY_PRINT_SEP: 1911 case BINARY_PRINT_CHAR_PAD: 1912 case BINARY_PRINT_LINE_END: 1913 case BINARY_PRINT_DATA_END: 1914 default: 1915 break; 1916 } 1917 1918 return 0; 1919 } 1920 1921 static void bpf_output__fprintf(struct trace *trace, 1922 struct perf_sample *sample) 1923 { 1924 binary__fprintf(sample->raw_data, sample->raw_size, 8, 1925 bpf_output__printer, NULL, trace->output); 1926 } 1927 1928 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, 1929 union perf_event *event __maybe_unused, 1930 struct perf_sample *sample) 1931 { 1932 int callchain_ret = 0; 1933 1934 if (sample->callchain) { 1935 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor); 1936 if (callchain_ret == 0) { 1937 if (callchain_cursor.nr < trace->min_stack) 1938 goto out; 1939 callchain_ret = 1; 1940 } 1941 } 1942 1943 trace__printf_interrupted_entry(trace); 1944 trace__fprintf_tstamp(trace, sample->time, trace->output); 1945 1946 if (trace->trace_syscalls) 1947 fprintf(trace->output, "( ): "); 1948 1949 fprintf(trace->output, "%s:", evsel->name); 1950 1951 if (perf_evsel__is_bpf_output(evsel)) { 1952 bpf_output__fprintf(trace, sample); 1953 } else if (evsel->tp_format) { 1954 event_format__fprintf(evsel->tp_format, sample->cpu, 1955 sample->raw_data, sample->raw_size, 1956 trace->output); 1957 } 1958 1959 fprintf(trace->output, ")\n"); 1960 1961 if (callchain_ret > 0) 1962 trace__fprintf_callchain(trace, sample); 1963 else if (callchain_ret < 0) 1964 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); 1965 out: 1966 return 0; 1967 } 1968 1969 static void print_location(FILE *f, struct perf_sample *sample, 1970 struct addr_location *al, 1971 bool print_dso, bool print_sym) 1972 { 1973 1974 if ((verbose > 0 || print_dso) && al->map) 1975 fprintf(f, "%s@", al->map->dso->long_name); 1976 1977 if ((verbose > 0 || print_sym) && al->sym) 1978 fprintf(f, "%s+0x%" PRIx64, al->sym->name, 1979 al->addr - al->sym->start); 1980 else if (al->map) 1981 fprintf(f, "0x%" PRIx64, al->addr); 1982 else 1983 fprintf(f, "0x%" PRIx64, sample->addr); 1984 } 1985 1986 static int trace__pgfault(struct trace *trace, 1987 struct perf_evsel *evsel, 1988 union perf_event *event __maybe_unused, 1989 struct perf_sample *sample) 1990 { 1991 struct thread *thread; 1992 struct addr_location al; 1993 char map_type = 'd'; 1994 struct thread_trace *ttrace; 1995 int err = -1; 1996 int callchain_ret = 0; 1997 1998 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1999 2000 if (sample->callchain) { 2001 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor); 2002 if (callchain_ret == 0) { 2003 if (callchain_cursor.nr < trace->min_stack) 2004 goto out_put; 2005 callchain_ret = 1; 2006 } 2007 } 2008 2009 ttrace = thread__trace(thread, trace->output); 2010 if (ttrace == NULL) 2011 goto out_put; 2012 2013 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ) 2014 ttrace->pfmaj++; 2015 else 2016 ttrace->pfmin++; 2017 2018 if (trace->summary_only) 2019 goto out; 2020 2021 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION, 2022 sample->ip, &al); 2023 2024 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output); 2025 2026 fprintf(trace->output, "%sfault [", 2027 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ? 2028 "maj" : "min"); 2029 2030 print_location(trace->output, sample, &al, false, true); 2031 2032 fprintf(trace->output, "] => "); 2033 2034 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE, 2035 sample->addr, &al); 2036 2037 if (!al.map) { 2038 thread__find_addr_location(thread, sample->cpumode, 2039 MAP__FUNCTION, sample->addr, &al); 2040 2041 if (al.map) 2042 map_type = 'x'; 2043 else 2044 map_type = '?'; 2045 } 2046 2047 print_location(trace->output, sample, &al, true, false); 2048 2049 fprintf(trace->output, " (%c%c)\n", map_type, al.level); 2050 2051 if (callchain_ret > 0) 2052 trace__fprintf_callchain(trace, sample); 2053 else if (callchain_ret < 0) 2054 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); 2055 out: 2056 err = 0; 2057 out_put: 2058 thread__put(thread); 2059 return err; 2060 } 2061 2062 static void trace__set_base_time(struct trace *trace, 2063 struct perf_evsel *evsel, 2064 struct perf_sample *sample) 2065 { 2066 /* 2067 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust 2068 * and don't use sample->time unconditionally, we may end up having 2069 * some other event in the future without PERF_SAMPLE_TIME for good 2070 * reason, i.e. we may not be interested in its timestamps, just in 2071 * it taking place, picking some piece of information when it 2072 * appears in our event stream (vfs_getname comes to mind). 2073 */ 2074 if (trace->base_time == 0 && !trace->full_time && 2075 (evsel->attr.sample_type & PERF_SAMPLE_TIME)) 2076 trace->base_time = sample->time; 2077 } 2078 2079 static int trace__process_sample(struct perf_tool *tool, 2080 union perf_event *event, 2081 struct perf_sample *sample, 2082 struct perf_evsel *evsel, 2083 struct machine *machine __maybe_unused) 2084 { 2085 struct trace *trace = container_of(tool, struct trace, tool); 2086 struct thread *thread; 2087 int err = 0; 2088 2089 tracepoint_handler handler = evsel->handler; 2090 2091 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 2092 if (thread && thread__is_filtered(thread)) 2093 goto out; 2094 2095 trace__set_base_time(trace, evsel, sample); 2096 2097 if (handler) { 2098 ++trace->nr_events; 2099 handler(trace, evsel, event, sample); 2100 } 2101 out: 2102 thread__put(thread); 2103 return err; 2104 } 2105 2106 static int trace__record(struct trace *trace, int argc, const char **argv) 2107 { 2108 unsigned int rec_argc, i, j; 2109 const char **rec_argv; 2110 const char * const record_args[] = { 2111 "record", 2112 "-R", 2113 "-m", "1024", 2114 "-c", "1", 2115 }; 2116 2117 const char * const sc_args[] = { "-e", }; 2118 unsigned int sc_args_nr = ARRAY_SIZE(sc_args); 2119 const char * const majpf_args[] = { "-e", "major-faults" }; 2120 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args); 2121 const char * const minpf_args[] = { "-e", "minor-faults" }; 2122 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args); 2123 2124 /* +1 is for the event string below */ 2125 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 + 2126 majpf_args_nr + minpf_args_nr + argc; 2127 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 2128 2129 if (rec_argv == NULL) 2130 return -ENOMEM; 2131 2132 j = 0; 2133 for (i = 0; i < ARRAY_SIZE(record_args); i++) 2134 rec_argv[j++] = record_args[i]; 2135 2136 if (trace->trace_syscalls) { 2137 for (i = 0; i < sc_args_nr; i++) 2138 rec_argv[j++] = sc_args[i]; 2139 2140 /* event string may be different for older kernels - e.g., RHEL6 */ 2141 if (is_valid_tracepoint("raw_syscalls:sys_enter")) 2142 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit"; 2143 else if (is_valid_tracepoint("syscalls:sys_enter")) 2144 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit"; 2145 else { 2146 pr_err("Neither raw_syscalls nor syscalls events exist.\n"); 2147 free(rec_argv); 2148 return -1; 2149 } 2150 } 2151 2152 if (trace->trace_pgfaults & TRACE_PFMAJ) 2153 for (i = 0; i < majpf_args_nr; i++) 2154 rec_argv[j++] = majpf_args[i]; 2155 2156 if (trace->trace_pgfaults & TRACE_PFMIN) 2157 for (i = 0; i < minpf_args_nr; i++) 2158 rec_argv[j++] = minpf_args[i]; 2159 2160 for (i = 0; i < (unsigned int)argc; i++) 2161 rec_argv[j++] = argv[i]; 2162 2163 return cmd_record(j, rec_argv); 2164 } 2165 2166 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); 2167 2168 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist) 2169 { 2170 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname"); 2171 2172 if (IS_ERR(evsel)) 2173 return false; 2174 2175 if (perf_evsel__field(evsel, "pathname") == NULL) { 2176 perf_evsel__delete(evsel); 2177 return false; 2178 } 2179 2180 evsel->handler = trace__vfs_getname; 2181 perf_evlist__add(evlist, evsel); 2182 return true; 2183 } 2184 2185 static struct perf_evsel *perf_evsel__new_pgfault(u64 config) 2186 { 2187 struct perf_evsel *evsel; 2188 struct perf_event_attr attr = { 2189 .type = PERF_TYPE_SOFTWARE, 2190 .mmap_data = 1, 2191 }; 2192 2193 attr.config = config; 2194 attr.sample_period = 1; 2195 2196 event_attr_init(&attr); 2197 2198 evsel = perf_evsel__new(&attr); 2199 if (evsel) 2200 evsel->handler = trace__pgfault; 2201 2202 return evsel; 2203 } 2204 2205 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample) 2206 { 2207 const u32 type = event->header.type; 2208 struct perf_evsel *evsel; 2209 2210 if (type != PERF_RECORD_SAMPLE) { 2211 trace__process_event(trace, trace->host, event, sample); 2212 return; 2213 } 2214 2215 evsel = perf_evlist__id2evsel(trace->evlist, sample->id); 2216 if (evsel == NULL) { 2217 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id); 2218 return; 2219 } 2220 2221 trace__set_base_time(trace, evsel, sample); 2222 2223 if (evsel->attr.type == PERF_TYPE_TRACEPOINT && 2224 sample->raw_data == NULL) { 2225 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", 2226 perf_evsel__name(evsel), sample->tid, 2227 sample->cpu, sample->raw_size); 2228 } else { 2229 tracepoint_handler handler = evsel->handler; 2230 handler(trace, evsel, event, sample); 2231 } 2232 } 2233 2234 static int trace__add_syscall_newtp(struct trace *trace) 2235 { 2236 int ret = -1; 2237 struct perf_evlist *evlist = trace->evlist; 2238 struct perf_evsel *sys_enter, *sys_exit; 2239 2240 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter); 2241 if (sys_enter == NULL) 2242 goto out; 2243 2244 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args)) 2245 goto out_delete_sys_enter; 2246 2247 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit); 2248 if (sys_exit == NULL) 2249 goto out_delete_sys_enter; 2250 2251 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret)) 2252 goto out_delete_sys_exit; 2253 2254 perf_evsel__config_callchain(sys_enter, &trace->opts, &callchain_param); 2255 perf_evsel__config_callchain(sys_exit, &trace->opts, &callchain_param); 2256 2257 perf_evlist__add(evlist, sys_enter); 2258 perf_evlist__add(evlist, sys_exit); 2259 2260 if (callchain_param.enabled && !trace->kernel_syscallchains) { 2261 /* 2262 * We're interested only in the user space callchain 2263 * leading to the syscall, allow overriding that for 2264 * debugging reasons using --kernel_syscall_callchains 2265 */ 2266 sys_exit->attr.exclude_callchain_kernel = 1; 2267 } 2268 2269 trace->syscalls.events.sys_enter = sys_enter; 2270 trace->syscalls.events.sys_exit = sys_exit; 2271 2272 ret = 0; 2273 out: 2274 return ret; 2275 2276 out_delete_sys_exit: 2277 perf_evsel__delete_priv(sys_exit); 2278 out_delete_sys_enter: 2279 perf_evsel__delete_priv(sys_enter); 2280 goto out; 2281 } 2282 2283 static int trace__set_ev_qualifier_filter(struct trace *trace) 2284 { 2285 int err = -1; 2286 struct perf_evsel *sys_exit; 2287 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier, 2288 trace->ev_qualifier_ids.nr, 2289 trace->ev_qualifier_ids.entries); 2290 2291 if (filter == NULL) 2292 goto out_enomem; 2293 2294 if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter, 2295 filter)) { 2296 sys_exit = trace->syscalls.events.sys_exit; 2297 err = perf_evsel__append_tp_filter(sys_exit, filter); 2298 } 2299 2300 free(filter); 2301 out: 2302 return err; 2303 out_enomem: 2304 errno = ENOMEM; 2305 goto out; 2306 } 2307 2308 static int trace__set_filter_loop_pids(struct trace *trace) 2309 { 2310 unsigned int nr = 1; 2311 pid_t pids[32] = { 2312 getpid(), 2313 }; 2314 struct thread *thread = machine__find_thread(trace->host, pids[0], pids[0]); 2315 2316 while (thread && nr < ARRAY_SIZE(pids)) { 2317 struct thread *parent = machine__find_thread(trace->host, thread->ppid, thread->ppid); 2318 2319 if (parent == NULL) 2320 break; 2321 2322 if (!strcmp(thread__comm_str(parent), "sshd")) { 2323 pids[nr++] = parent->tid; 2324 break; 2325 } 2326 thread = parent; 2327 } 2328 2329 return perf_evlist__set_filter_pids(trace->evlist, nr, pids); 2330 } 2331 2332 static int trace__run(struct trace *trace, int argc, const char **argv) 2333 { 2334 struct perf_evlist *evlist = trace->evlist; 2335 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL; 2336 int err = -1, i; 2337 unsigned long before; 2338 const bool forks = argc > 0; 2339 bool draining = false; 2340 2341 trace->live = true; 2342 2343 if (trace->trace_syscalls && trace__add_syscall_newtp(trace)) 2344 goto out_error_raw_syscalls; 2345 2346 if (trace->trace_syscalls) 2347 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist); 2348 2349 if ((trace->trace_pgfaults & TRACE_PFMAJ)) { 2350 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ); 2351 if (pgfault_maj == NULL) 2352 goto out_error_mem; 2353 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param); 2354 perf_evlist__add(evlist, pgfault_maj); 2355 } 2356 2357 if ((trace->trace_pgfaults & TRACE_PFMIN)) { 2358 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN); 2359 if (pgfault_min == NULL) 2360 goto out_error_mem; 2361 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param); 2362 perf_evlist__add(evlist, pgfault_min); 2363 } 2364 2365 if (trace->sched && 2366 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", 2367 trace__sched_stat_runtime)) 2368 goto out_error_sched_stat_runtime; 2369 2370 err = perf_evlist__create_maps(evlist, &trace->opts.target); 2371 if (err < 0) { 2372 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n"); 2373 goto out_delete_evlist; 2374 } 2375 2376 err = trace__symbols_init(trace, evlist); 2377 if (err < 0) { 2378 fprintf(trace->output, "Problems initializing symbol libraries!\n"); 2379 goto out_delete_evlist; 2380 } 2381 2382 perf_evlist__config(evlist, &trace->opts, &callchain_param); 2383 2384 signal(SIGCHLD, sig_handler); 2385 signal(SIGINT, sig_handler); 2386 2387 if (forks) { 2388 err = perf_evlist__prepare_workload(evlist, &trace->opts.target, 2389 argv, false, NULL); 2390 if (err < 0) { 2391 fprintf(trace->output, "Couldn't run the workload!\n"); 2392 goto out_delete_evlist; 2393 } 2394 } 2395 2396 err = perf_evlist__open(evlist); 2397 if (err < 0) 2398 goto out_error_open; 2399 2400 err = bpf__apply_obj_config(); 2401 if (err) { 2402 char errbuf[BUFSIZ]; 2403 2404 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); 2405 pr_err("ERROR: Apply config to BPF failed: %s\n", 2406 errbuf); 2407 goto out_error_open; 2408 } 2409 2410 /* 2411 * Better not use !target__has_task() here because we need to cover the 2412 * case where no threads were specified in the command line, but a 2413 * workload was, and in that case we will fill in the thread_map when 2414 * we fork the workload in perf_evlist__prepare_workload. 2415 */ 2416 if (trace->filter_pids.nr > 0) 2417 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries); 2418 else if (thread_map__pid(evlist->threads, 0) == -1) 2419 err = trace__set_filter_loop_pids(trace); 2420 2421 if (err < 0) 2422 goto out_error_mem; 2423 2424 if (trace->ev_qualifier_ids.nr > 0) { 2425 err = trace__set_ev_qualifier_filter(trace); 2426 if (err < 0) 2427 goto out_errno; 2428 2429 pr_debug("event qualifier tracepoint filter: %s\n", 2430 trace->syscalls.events.sys_exit->filter); 2431 } 2432 2433 err = perf_evlist__apply_filters(evlist, &evsel); 2434 if (err < 0) 2435 goto out_error_apply_filters; 2436 2437 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages); 2438 if (err < 0) 2439 goto out_error_mmap; 2440 2441 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay) 2442 perf_evlist__enable(evlist); 2443 2444 if (forks) 2445 perf_evlist__start_workload(evlist); 2446 2447 if (trace->opts.initial_delay) { 2448 usleep(trace->opts.initial_delay * 1000); 2449 perf_evlist__enable(evlist); 2450 } 2451 2452 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 || 2453 evlist->threads->nr > 1 || 2454 perf_evlist__first(evlist)->attr.inherit; 2455 2456 /* 2457 * Now that we already used evsel->attr to ask the kernel to setup the 2458 * events, lets reuse evsel->attr.sample_max_stack as the limit in 2459 * trace__resolve_callchain(), allowing per-event max-stack settings 2460 * to override an explicitely set --max-stack global setting. 2461 */ 2462 evlist__for_each_entry(evlist, evsel) { 2463 if ((evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) && 2464 evsel->attr.sample_max_stack == 0) 2465 evsel->attr.sample_max_stack = trace->max_stack; 2466 } 2467 again: 2468 before = trace->nr_events; 2469 2470 for (i = 0; i < evlist->nr_mmaps; i++) { 2471 union perf_event *event; 2472 2473 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { 2474 struct perf_sample sample; 2475 2476 ++trace->nr_events; 2477 2478 err = perf_evlist__parse_sample(evlist, event, &sample); 2479 if (err) { 2480 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err); 2481 goto next_event; 2482 } 2483 2484 trace__handle_event(trace, event, &sample); 2485 next_event: 2486 perf_evlist__mmap_consume(evlist, i); 2487 2488 if (interrupted) 2489 goto out_disable; 2490 2491 if (done && !draining) { 2492 perf_evlist__disable(evlist); 2493 draining = true; 2494 } 2495 } 2496 } 2497 2498 if (trace->nr_events == before) { 2499 int timeout = done ? 100 : -1; 2500 2501 if (!draining && perf_evlist__poll(evlist, timeout) > 0) { 2502 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0) 2503 draining = true; 2504 2505 goto again; 2506 } 2507 } else { 2508 goto again; 2509 } 2510 2511 out_disable: 2512 thread__zput(trace->current); 2513 2514 perf_evlist__disable(evlist); 2515 2516 if (!err) { 2517 if (trace->summary) 2518 trace__fprintf_thread_summary(trace, trace->output); 2519 2520 if (trace->show_tool_stats) { 2521 fprintf(trace->output, "Stats:\n " 2522 " vfs_getname : %" PRIu64 "\n" 2523 " proc_getname: %" PRIu64 "\n", 2524 trace->stats.vfs_getname, 2525 trace->stats.proc_getname); 2526 } 2527 } 2528 2529 out_delete_evlist: 2530 trace__symbols__exit(trace); 2531 2532 perf_evlist__delete(evlist); 2533 trace->evlist = NULL; 2534 trace->live = false; 2535 return err; 2536 { 2537 char errbuf[BUFSIZ]; 2538 2539 out_error_sched_stat_runtime: 2540 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime"); 2541 goto out_error; 2542 2543 out_error_raw_syscalls: 2544 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)"); 2545 goto out_error; 2546 2547 out_error_mmap: 2548 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf)); 2549 goto out_error; 2550 2551 out_error_open: 2552 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf)); 2553 2554 out_error: 2555 fprintf(trace->output, "%s\n", errbuf); 2556 goto out_delete_evlist; 2557 2558 out_error_apply_filters: 2559 fprintf(trace->output, 2560 "Failed to set filter \"%s\" on event %s with %d (%s)\n", 2561 evsel->filter, perf_evsel__name(evsel), errno, 2562 str_error_r(errno, errbuf, sizeof(errbuf))); 2563 goto out_delete_evlist; 2564 } 2565 out_error_mem: 2566 fprintf(trace->output, "Not enough memory to run!\n"); 2567 goto out_delete_evlist; 2568 2569 out_errno: 2570 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno)); 2571 goto out_delete_evlist; 2572 } 2573 2574 static int trace__replay(struct trace *trace) 2575 { 2576 const struct perf_evsel_str_handler handlers[] = { 2577 { "probe:vfs_getname", trace__vfs_getname, }, 2578 }; 2579 struct perf_data data = { 2580 .file = { 2581 .path = input_name, 2582 }, 2583 .mode = PERF_DATA_MODE_READ, 2584 .force = trace->force, 2585 }; 2586 struct perf_session *session; 2587 struct perf_evsel *evsel; 2588 int err = -1; 2589 2590 trace->tool.sample = trace__process_sample; 2591 trace->tool.mmap = perf_event__process_mmap; 2592 trace->tool.mmap2 = perf_event__process_mmap2; 2593 trace->tool.comm = perf_event__process_comm; 2594 trace->tool.exit = perf_event__process_exit; 2595 trace->tool.fork = perf_event__process_fork; 2596 trace->tool.attr = perf_event__process_attr; 2597 trace->tool.tracing_data = perf_event__process_tracing_data; 2598 trace->tool.build_id = perf_event__process_build_id; 2599 trace->tool.namespaces = perf_event__process_namespaces; 2600 2601 trace->tool.ordered_events = true; 2602 trace->tool.ordering_requires_timestamps = true; 2603 2604 /* add tid to output */ 2605 trace->multiple_threads = true; 2606 2607 session = perf_session__new(&data, false, &trace->tool); 2608 if (session == NULL) 2609 return -1; 2610 2611 if (trace->opts.target.pid) 2612 symbol_conf.pid_list_str = strdup(trace->opts.target.pid); 2613 2614 if (trace->opts.target.tid) 2615 symbol_conf.tid_list_str = strdup(trace->opts.target.tid); 2616 2617 if (symbol__init(&session->header.env) < 0) 2618 goto out; 2619 2620 trace->host = &session->machines.host; 2621 2622 err = perf_session__set_tracepoints_handlers(session, handlers); 2623 if (err) 2624 goto out; 2625 2626 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2627 "raw_syscalls:sys_enter"); 2628 /* older kernels have syscalls tp versus raw_syscalls */ 2629 if (evsel == NULL) 2630 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2631 "syscalls:sys_enter"); 2632 2633 if (evsel && 2634 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 || 2635 perf_evsel__init_sc_tp_ptr_field(evsel, args))) { 2636 pr_err("Error during initialize raw_syscalls:sys_enter event\n"); 2637 goto out; 2638 } 2639 2640 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2641 "raw_syscalls:sys_exit"); 2642 if (evsel == NULL) 2643 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, 2644 "syscalls:sys_exit"); 2645 if (evsel && 2646 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 || 2647 perf_evsel__init_sc_tp_uint_field(evsel, ret))) { 2648 pr_err("Error during initialize raw_syscalls:sys_exit event\n"); 2649 goto out; 2650 } 2651 2652 evlist__for_each_entry(session->evlist, evsel) { 2653 if (evsel->attr.type == PERF_TYPE_SOFTWARE && 2654 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ || 2655 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN || 2656 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS)) 2657 evsel->handler = trace__pgfault; 2658 } 2659 2660 setup_pager(); 2661 2662 err = perf_session__process_events(session); 2663 if (err) 2664 pr_err("Failed to process events, error %d", err); 2665 2666 else if (trace->summary) 2667 trace__fprintf_thread_summary(trace, trace->output); 2668 2669 out: 2670 perf_session__delete(session); 2671 2672 return err; 2673 } 2674 2675 static size_t trace__fprintf_threads_header(FILE *fp) 2676 { 2677 size_t printed; 2678 2679 printed = fprintf(fp, "\n Summary of events:\n\n"); 2680 2681 return printed; 2682 } 2683 2684 DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs, 2685 struct stats *stats; 2686 double msecs; 2687 int syscall; 2688 ) 2689 { 2690 struct int_node *source = rb_entry(nd, struct int_node, rb_node); 2691 struct stats *stats = source->priv; 2692 2693 entry->syscall = source->i; 2694 entry->stats = stats; 2695 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0; 2696 } 2697 2698 static size_t thread__dump_stats(struct thread_trace *ttrace, 2699 struct trace *trace, FILE *fp) 2700 { 2701 size_t printed = 0; 2702 struct syscall *sc; 2703 struct rb_node *nd; 2704 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats); 2705 2706 if (syscall_stats == NULL) 2707 return 0; 2708 2709 printed += fprintf(fp, "\n"); 2710 2711 printed += fprintf(fp, " syscall calls total min avg max stddev\n"); 2712 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); 2713 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n"); 2714 2715 resort_rb__for_each_entry(nd, syscall_stats) { 2716 struct stats *stats = syscall_stats_entry->stats; 2717 if (stats) { 2718 double min = (double)(stats->min) / NSEC_PER_MSEC; 2719 double max = (double)(stats->max) / NSEC_PER_MSEC; 2720 double avg = avg_stats(stats); 2721 double pct; 2722 u64 n = (u64) stats->n; 2723 2724 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0; 2725 avg /= NSEC_PER_MSEC; 2726 2727 sc = &trace->syscalls.table[syscall_stats_entry->syscall]; 2728 printed += fprintf(fp, " %-15s", sc->name); 2729 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f", 2730 n, syscall_stats_entry->msecs, min, avg); 2731 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct); 2732 } 2733 } 2734 2735 resort_rb__delete(syscall_stats); 2736 printed += fprintf(fp, "\n\n"); 2737 2738 return printed; 2739 } 2740 2741 static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace) 2742 { 2743 size_t printed = 0; 2744 struct thread_trace *ttrace = thread__priv(thread); 2745 double ratio; 2746 2747 if (ttrace == NULL) 2748 return 0; 2749 2750 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0; 2751 2752 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid); 2753 printed += fprintf(fp, "%lu events, ", ttrace->nr_events); 2754 printed += fprintf(fp, "%.1f%%", ratio); 2755 if (ttrace->pfmaj) 2756 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj); 2757 if (ttrace->pfmin) 2758 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin); 2759 if (trace->sched) 2760 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms); 2761 else if (fputc('\n', fp) != EOF) 2762 ++printed; 2763 2764 printed += thread__dump_stats(ttrace, trace, fp); 2765 2766 return printed; 2767 } 2768 2769 static unsigned long thread__nr_events(struct thread_trace *ttrace) 2770 { 2771 return ttrace ? ttrace->nr_events : 0; 2772 } 2773 2774 DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)), 2775 struct thread *thread; 2776 ) 2777 { 2778 entry->thread = rb_entry(nd, struct thread, rb_node); 2779 } 2780 2781 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp) 2782 { 2783 size_t printed = trace__fprintf_threads_header(fp); 2784 struct rb_node *nd; 2785 int i; 2786 2787 for (i = 0; i < THREADS__TABLE_SIZE; i++) { 2788 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i); 2789 2790 if (threads == NULL) { 2791 fprintf(fp, "%s", "Error sorting output by nr_events!\n"); 2792 return 0; 2793 } 2794 2795 resort_rb__for_each_entry(nd, threads) 2796 printed += trace__fprintf_thread(fp, threads_entry->thread, trace); 2797 2798 resort_rb__delete(threads); 2799 } 2800 return printed; 2801 } 2802 2803 static int trace__set_duration(const struct option *opt, const char *str, 2804 int unset __maybe_unused) 2805 { 2806 struct trace *trace = opt->value; 2807 2808 trace->duration_filter = atof(str); 2809 return 0; 2810 } 2811 2812 static int trace__set_filter_pids(const struct option *opt, const char *str, 2813 int unset __maybe_unused) 2814 { 2815 int ret = -1; 2816 size_t i; 2817 struct trace *trace = opt->value; 2818 /* 2819 * FIXME: introduce a intarray class, plain parse csv and create a 2820 * { int nr, int entries[] } struct... 2821 */ 2822 struct intlist *list = intlist__new(str); 2823 2824 if (list == NULL) 2825 return -1; 2826 2827 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1; 2828 trace->filter_pids.entries = calloc(i, sizeof(pid_t)); 2829 2830 if (trace->filter_pids.entries == NULL) 2831 goto out; 2832 2833 trace->filter_pids.entries[0] = getpid(); 2834 2835 for (i = 1; i < trace->filter_pids.nr; ++i) 2836 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i; 2837 2838 intlist__delete(list); 2839 ret = 0; 2840 out: 2841 return ret; 2842 } 2843 2844 static int trace__open_output(struct trace *trace, const char *filename) 2845 { 2846 struct stat st; 2847 2848 if (!stat(filename, &st) && st.st_size) { 2849 char oldname[PATH_MAX]; 2850 2851 scnprintf(oldname, sizeof(oldname), "%s.old", filename); 2852 unlink(oldname); 2853 rename(filename, oldname); 2854 } 2855 2856 trace->output = fopen(filename, "w"); 2857 2858 return trace->output == NULL ? -errno : 0; 2859 } 2860 2861 static int parse_pagefaults(const struct option *opt, const char *str, 2862 int unset __maybe_unused) 2863 { 2864 int *trace_pgfaults = opt->value; 2865 2866 if (strcmp(str, "all") == 0) 2867 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN; 2868 else if (strcmp(str, "maj") == 0) 2869 *trace_pgfaults |= TRACE_PFMAJ; 2870 else if (strcmp(str, "min") == 0) 2871 *trace_pgfaults |= TRACE_PFMIN; 2872 else 2873 return -1; 2874 2875 return 0; 2876 } 2877 2878 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler) 2879 { 2880 struct perf_evsel *evsel; 2881 2882 evlist__for_each_entry(evlist, evsel) 2883 evsel->handler = handler; 2884 } 2885 2886 /* 2887 * XXX: Hackish, just splitting the combined -e+--event (syscalls 2888 * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use 2889 * existing facilities unchanged (trace->ev_qualifier + parse_options()). 2890 * 2891 * It'd be better to introduce a parse_options() variant that would return a 2892 * list with the terms it didn't match to an event... 2893 */ 2894 static int trace__parse_events_option(const struct option *opt, const char *str, 2895 int unset __maybe_unused) 2896 { 2897 struct trace *trace = (struct trace *)opt->value; 2898 const char *s = str; 2899 char *sep = NULL, *lists[2] = { NULL, NULL, }; 2900 int len = strlen(str) + 1, err = -1, list, idx; 2901 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR); 2902 char group_name[PATH_MAX]; 2903 2904 if (strace_groups_dir == NULL) 2905 return -1; 2906 2907 if (*s == '!') { 2908 ++s; 2909 trace->not_ev_qualifier = true; 2910 } 2911 2912 while (1) { 2913 if ((sep = strchr(s, ',')) != NULL) 2914 *sep = '\0'; 2915 2916 list = 0; 2917 if (syscalltbl__id(trace->sctbl, s) >= 0 || 2918 syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) { 2919 list = 1; 2920 } else { 2921 path__join(group_name, sizeof(group_name), strace_groups_dir, s); 2922 if (access(group_name, R_OK) == 0) 2923 list = 1; 2924 } 2925 2926 if (lists[list]) { 2927 sprintf(lists[list] + strlen(lists[list]), ",%s", s); 2928 } else { 2929 lists[list] = malloc(len); 2930 if (lists[list] == NULL) 2931 goto out; 2932 strcpy(lists[list], s); 2933 } 2934 2935 if (!sep) 2936 break; 2937 2938 *sep = ','; 2939 s = sep + 1; 2940 } 2941 2942 if (lists[1] != NULL) { 2943 struct strlist_config slist_config = { 2944 .dirname = strace_groups_dir, 2945 }; 2946 2947 trace->ev_qualifier = strlist__new(lists[1], &slist_config); 2948 if (trace->ev_qualifier == NULL) { 2949 fputs("Not enough memory to parse event qualifier", trace->output); 2950 goto out; 2951 } 2952 2953 if (trace__validate_ev_qualifier(trace)) 2954 goto out; 2955 } 2956 2957 err = 0; 2958 2959 if (lists[0]) { 2960 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event", 2961 "event selector. use 'perf list' to list available events", 2962 parse_events_option); 2963 err = parse_events_option(&o, lists[0], 0); 2964 } 2965 out: 2966 if (sep) 2967 *sep = ','; 2968 2969 return err; 2970 } 2971 2972 int cmd_trace(int argc, const char **argv) 2973 { 2974 const char *trace_usage[] = { 2975 "perf trace [<options>] [<command>]", 2976 "perf trace [<options>] -- <command> [<options>]", 2977 "perf trace record [<options>] [<command>]", 2978 "perf trace record [<options>] -- <command> [<options>]", 2979 NULL 2980 }; 2981 struct trace trace = { 2982 .syscalls = { 2983 . max = -1, 2984 }, 2985 .opts = { 2986 .target = { 2987 .uid = UINT_MAX, 2988 .uses_mmap = true, 2989 }, 2990 .user_freq = UINT_MAX, 2991 .user_interval = ULLONG_MAX, 2992 .no_buffering = true, 2993 .mmap_pages = UINT_MAX, 2994 .proc_map_timeout = 500, 2995 }, 2996 .output = stderr, 2997 .show_comm = true, 2998 .trace_syscalls = true, 2999 .kernel_syscallchains = false, 3000 .max_stack = UINT_MAX, 3001 }; 3002 const char *output_name = NULL; 3003 const struct option trace_options[] = { 3004 OPT_CALLBACK('e', "event", &trace, "event", 3005 "event/syscall selector. use 'perf list' to list available events", 3006 trace__parse_events_option), 3007 OPT_BOOLEAN(0, "comm", &trace.show_comm, 3008 "show the thread COMM next to its id"), 3009 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"), 3010 OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace", 3011 trace__parse_events_option), 3012 OPT_STRING('o', "output", &output_name, "file", "output file name"), 3013 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"), 3014 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid", 3015 "trace events on existing process id"), 3016 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid", 3017 "trace events on existing thread id"), 3018 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids", 3019 "pids to filter (by the kernel)", trace__set_filter_pids), 3020 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide, 3021 "system-wide collection from all CPUs"), 3022 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu", 3023 "list of cpus to monitor"), 3024 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit, 3025 "child tasks do not inherit counters"), 3026 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages", 3027 "number of mmap data pages", 3028 perf_evlist__parse_mmap_pages), 3029 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user", 3030 "user to profile"), 3031 OPT_CALLBACK(0, "duration", &trace, "float", 3032 "show only events with duration > N.M ms", 3033 trace__set_duration), 3034 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"), 3035 OPT_INCR('v', "verbose", &verbose, "be more verbose"), 3036 OPT_BOOLEAN('T', "time", &trace.full_time, 3037 "Show full timestamp, not time relative to first start"), 3038 OPT_BOOLEAN('s', "summary", &trace.summary_only, 3039 "Show only syscall summary with statistics"), 3040 OPT_BOOLEAN('S', "with-summary", &trace.summary, 3041 "Show all syscalls and summary with statistics"), 3042 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min", 3043 "Trace pagefaults", parse_pagefaults, "maj"), 3044 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"), 3045 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"), 3046 OPT_CALLBACK(0, "call-graph", &trace.opts, 3047 "record_mode[,record_size]", record_callchain_help, 3048 &record_parse_callchain_opt), 3049 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains, 3050 "Show the kernel callchains on the syscall exit path"), 3051 OPT_UINTEGER(0, "min-stack", &trace.min_stack, 3052 "Set the minimum stack depth when parsing the callchain, " 3053 "anything below the specified depth will be ignored."), 3054 OPT_UINTEGER(0, "max-stack", &trace.max_stack, 3055 "Set the maximum stack depth when parsing the callchain, " 3056 "anything beyond the specified depth will be ignored. " 3057 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)), 3058 OPT_BOOLEAN(0, "print-sample", &trace.print_sample, 3059 "print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"), 3060 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout, 3061 "per thread proc mmap processing timeout in ms"), 3062 OPT_UINTEGER('D', "delay", &trace.opts.initial_delay, 3063 "ms to wait before starting measurement after program " 3064 "start"), 3065 OPT_END() 3066 }; 3067 bool __maybe_unused max_stack_user_set = true; 3068 bool mmap_pages_user_set = true; 3069 const char * const trace_subcommands[] = { "record", NULL }; 3070 int err; 3071 char bf[BUFSIZ]; 3072 3073 signal(SIGSEGV, sighandler_dump_stack); 3074 signal(SIGFPE, sighandler_dump_stack); 3075 3076 trace.evlist = perf_evlist__new(); 3077 trace.sctbl = syscalltbl__new(); 3078 3079 if (trace.evlist == NULL || trace.sctbl == NULL) { 3080 pr_err("Not enough memory to run!\n"); 3081 err = -ENOMEM; 3082 goto out; 3083 } 3084 3085 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands, 3086 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION); 3087 3088 err = bpf__setup_stdout(trace.evlist); 3089 if (err) { 3090 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf)); 3091 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf); 3092 goto out; 3093 } 3094 3095 err = -1; 3096 3097 if (trace.trace_pgfaults) { 3098 trace.opts.sample_address = true; 3099 trace.opts.sample_time = true; 3100 } 3101 3102 if (trace.opts.mmap_pages == UINT_MAX) 3103 mmap_pages_user_set = false; 3104 3105 if (trace.max_stack == UINT_MAX) { 3106 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack; 3107 max_stack_user_set = false; 3108 } 3109 3110 #ifdef HAVE_DWARF_UNWIND_SUPPORT 3111 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled) { 3112 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false); 3113 } 3114 #endif 3115 3116 if (callchain_param.enabled) { 3117 if (!mmap_pages_user_set && geteuid() == 0) 3118 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4; 3119 3120 symbol_conf.use_callchain = true; 3121 } 3122 3123 if (trace.evlist->nr_entries > 0) 3124 evlist__set_evsel_handler(trace.evlist, trace__event_handler); 3125 3126 if ((argc >= 1) && (strcmp(argv[0], "record") == 0)) 3127 return trace__record(&trace, argc-1, &argv[1]); 3128 3129 /* summary_only implies summary option, but don't overwrite summary if set */ 3130 if (trace.summary_only) 3131 trace.summary = trace.summary_only; 3132 3133 if (!trace.trace_syscalls && !trace.trace_pgfaults && 3134 trace.evlist->nr_entries == 0 /* Was --events used? */) { 3135 pr_err("Please specify something to trace.\n"); 3136 return -1; 3137 } 3138 3139 if (!trace.trace_syscalls && trace.ev_qualifier) { 3140 pr_err("The -e option can't be used with --no-syscalls.\n"); 3141 goto out; 3142 } 3143 3144 if (output_name != NULL) { 3145 err = trace__open_output(&trace, output_name); 3146 if (err < 0) { 3147 perror("failed to create output file"); 3148 goto out; 3149 } 3150 } 3151 3152 trace.open_id = syscalltbl__id(trace.sctbl, "open"); 3153 3154 err = target__validate(&trace.opts.target); 3155 if (err) { 3156 target__strerror(&trace.opts.target, err, bf, sizeof(bf)); 3157 fprintf(trace.output, "%s", bf); 3158 goto out_close; 3159 } 3160 3161 err = target__parse_uid(&trace.opts.target); 3162 if (err) { 3163 target__strerror(&trace.opts.target, err, bf, sizeof(bf)); 3164 fprintf(trace.output, "%s", bf); 3165 goto out_close; 3166 } 3167 3168 if (!argc && target__none(&trace.opts.target)) 3169 trace.opts.target.system_wide = true; 3170 3171 if (input_name) 3172 err = trace__replay(&trace); 3173 else 3174 err = trace__run(&trace, argc, argv); 3175 3176 out_close: 3177 if (output_name != NULL) 3178 fclose(trace.output); 3179 out: 3180 return err; 3181 } 3182