1 #include <traceevent/event-parse.h> 2 #include "builtin.h" 3 #include "util/color.h" 4 #include "util/debug.h" 5 #include "util/evlist.h" 6 #include "util/machine.h" 7 #include "util/session.h" 8 #include "util/thread.h" 9 #include "util/parse-options.h" 10 #include "util/strlist.h" 11 #include "util/intlist.h" 12 #include "util/thread_map.h" 13 14 #include <libaudit.h> 15 #include <stdlib.h> 16 #include <sys/eventfd.h> 17 #include <sys/mman.h> 18 #include <linux/futex.h> 19 20 /* For older distros: */ 21 #ifndef MAP_STACK 22 # define MAP_STACK 0x20000 23 #endif 24 25 #ifndef MADV_HWPOISON 26 # define MADV_HWPOISON 100 27 #endif 28 29 #ifndef MADV_MERGEABLE 30 # define MADV_MERGEABLE 12 31 #endif 32 33 #ifndef MADV_UNMERGEABLE 34 # define MADV_UNMERGEABLE 13 35 #endif 36 37 struct syscall_arg { 38 unsigned long val; 39 void *parm; 40 u8 idx; 41 u8 mask; 42 }; 43 44 struct strarray { 45 int nr_entries; 46 const char **entries; 47 }; 48 49 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \ 50 .nr_entries = ARRAY_SIZE(array), \ 51 .entries = array, \ 52 } 53 54 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size, 55 struct syscall_arg *arg) 56 { 57 int idx = arg->val; 58 struct strarray *sa = arg->parm; 59 60 if (idx < 0 || idx >= sa->nr_entries) 61 return scnprintf(bf, size, "%d", idx); 62 63 return scnprintf(bf, size, "%s", sa->entries[idx]); 64 } 65 66 #define SCA_STRARRAY syscall_arg__scnprintf_strarray 67 68 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size, 69 struct syscall_arg *arg) 70 { 71 return scnprintf(bf, size, "%#lx", arg->val); 72 } 73 74 #define SCA_HEX syscall_arg__scnprintf_hex 75 76 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, 77 struct syscall_arg *arg) 78 { 79 int printed = 0, prot = arg->val; 80 81 if (prot == PROT_NONE) 82 return scnprintf(bf, size, "NONE"); 83 #define P_MMAP_PROT(n) \ 84 if (prot & PROT_##n) { \ 85 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 86 prot &= ~PROT_##n; \ 87 } 88 89 P_MMAP_PROT(EXEC); 90 P_MMAP_PROT(READ); 91 P_MMAP_PROT(WRITE); 92 #ifdef PROT_SEM 93 P_MMAP_PROT(SEM); 94 #endif 95 P_MMAP_PROT(GROWSDOWN); 96 P_MMAP_PROT(GROWSUP); 97 #undef P_MMAP_PROT 98 99 if (prot) 100 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot); 101 102 return printed; 103 } 104 105 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot 106 107 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, 108 struct syscall_arg *arg) 109 { 110 int printed = 0, flags = arg->val; 111 112 #define P_MMAP_FLAG(n) \ 113 if (flags & MAP_##n) { \ 114 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 115 flags &= ~MAP_##n; \ 116 } 117 118 P_MMAP_FLAG(SHARED); 119 P_MMAP_FLAG(PRIVATE); 120 #ifdef MAP_32BIT 121 P_MMAP_FLAG(32BIT); 122 #endif 123 P_MMAP_FLAG(ANONYMOUS); 124 P_MMAP_FLAG(DENYWRITE); 125 P_MMAP_FLAG(EXECUTABLE); 126 P_MMAP_FLAG(FILE); 127 P_MMAP_FLAG(FIXED); 128 P_MMAP_FLAG(GROWSDOWN); 129 #ifdef MAP_HUGETLB 130 P_MMAP_FLAG(HUGETLB); 131 #endif 132 P_MMAP_FLAG(LOCKED); 133 P_MMAP_FLAG(NONBLOCK); 134 P_MMAP_FLAG(NORESERVE); 135 P_MMAP_FLAG(POPULATE); 136 P_MMAP_FLAG(STACK); 137 #ifdef MAP_UNINITIALIZED 138 P_MMAP_FLAG(UNINITIALIZED); 139 #endif 140 #undef P_MMAP_FLAG 141 142 if (flags) 143 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 144 145 return printed; 146 } 147 148 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags 149 150 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size, 151 struct syscall_arg *arg) 152 { 153 int behavior = arg->val; 154 155 switch (behavior) { 156 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n) 157 P_MADV_BHV(NORMAL); 158 P_MADV_BHV(RANDOM); 159 P_MADV_BHV(SEQUENTIAL); 160 P_MADV_BHV(WILLNEED); 161 P_MADV_BHV(DONTNEED); 162 P_MADV_BHV(REMOVE); 163 P_MADV_BHV(DONTFORK); 164 P_MADV_BHV(DOFORK); 165 P_MADV_BHV(HWPOISON); 166 #ifdef MADV_SOFT_OFFLINE 167 P_MADV_BHV(SOFT_OFFLINE); 168 #endif 169 P_MADV_BHV(MERGEABLE); 170 P_MADV_BHV(UNMERGEABLE); 171 #ifdef MADV_HUGEPAGE 172 P_MADV_BHV(HUGEPAGE); 173 #endif 174 #ifdef MADV_NOHUGEPAGE 175 P_MADV_BHV(NOHUGEPAGE); 176 #endif 177 #ifdef MADV_DONTDUMP 178 P_MADV_BHV(DONTDUMP); 179 #endif 180 #ifdef MADV_DODUMP 181 P_MADV_BHV(DODUMP); 182 #endif 183 #undef P_MADV_PHV 184 default: break; 185 } 186 187 return scnprintf(bf, size, "%#x", behavior); 188 } 189 190 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior 191 192 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg) 193 { 194 enum syscall_futex_args { 195 SCF_UADDR = (1 << 0), 196 SCF_OP = (1 << 1), 197 SCF_VAL = (1 << 2), 198 SCF_TIMEOUT = (1 << 3), 199 SCF_UADDR2 = (1 << 4), 200 SCF_VAL3 = (1 << 5), 201 }; 202 int op = arg->val; 203 int cmd = op & FUTEX_CMD_MASK; 204 size_t printed = 0; 205 206 switch (cmd) { 207 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n); 208 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break; 209 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 210 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 211 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break; 212 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break; 213 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break; 214 P_FUTEX_OP(WAKE_OP); break; 215 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 216 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break; 217 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break; 218 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break; 219 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break; 220 P_FUTEX_OP(WAIT_REQUEUE_PI); break; 221 default: printed = scnprintf(bf, size, "%#x", cmd); break; 222 } 223 224 if (op & FUTEX_PRIVATE_FLAG) 225 printed += scnprintf(bf + printed, size - printed, "|PRIV"); 226 227 if (op & FUTEX_CLOCK_REALTIME) 228 printed += scnprintf(bf + printed, size - printed, "|CLKRT"); 229 230 return printed; 231 } 232 233 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op 234 235 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", }; 236 static DEFINE_STRARRAY(itimers); 237 238 static const char *whences[] = { "SET", "CUR", "END", 239 #ifdef SEEK_DATA 240 "DATA", 241 #endif 242 #ifdef SEEK_HOLE 243 "HOLE", 244 #endif 245 }; 246 static DEFINE_STRARRAY(whences); 247 248 static const char *fcntl_cmds[] = { 249 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK", 250 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64", 251 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX", 252 "F_GETOWNER_UIDS", 253 }; 254 static DEFINE_STRARRAY(fcntl_cmds); 255 256 static const char *rlimit_resources[] = { 257 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE", 258 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO", 259 "RTTIME", 260 }; 261 static DEFINE_STRARRAY(rlimit_resources); 262 263 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", }; 264 static DEFINE_STRARRAY(sighow); 265 266 static const char *socket_families[] = { 267 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM", 268 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI", 269 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC", 270 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC", 271 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF", 272 "ALG", "NFC", "VSOCK", 273 }; 274 static DEFINE_STRARRAY(socket_families); 275 276 #ifndef SOCK_TYPE_MASK 277 #define SOCK_TYPE_MASK 0xf 278 #endif 279 280 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size, 281 struct syscall_arg *arg) 282 { 283 size_t printed; 284 int type = arg->val, 285 flags = type & ~SOCK_TYPE_MASK; 286 287 type &= SOCK_TYPE_MASK; 288 /* 289 * Can't use a strarray, MIPS may override for ABI reasons. 290 */ 291 switch (type) { 292 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break; 293 P_SK_TYPE(STREAM); 294 P_SK_TYPE(DGRAM); 295 P_SK_TYPE(RAW); 296 P_SK_TYPE(RDM); 297 P_SK_TYPE(SEQPACKET); 298 P_SK_TYPE(DCCP); 299 P_SK_TYPE(PACKET); 300 #undef P_SK_TYPE 301 default: 302 printed = scnprintf(bf, size, "%#x", type); 303 } 304 305 #define P_SK_FLAG(n) \ 306 if (flags & SOCK_##n) { \ 307 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \ 308 flags &= ~SOCK_##n; \ 309 } 310 311 P_SK_FLAG(CLOEXEC); 312 P_SK_FLAG(NONBLOCK); 313 #undef P_SK_FLAG 314 315 if (flags) 316 printed += scnprintf(bf + printed, size - printed, "|%#x", flags); 317 318 return printed; 319 } 320 321 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type 322 323 #ifndef MSG_PROBE 324 #define MSG_PROBE 0x10 325 #endif 326 #ifndef MSG_SENDPAGE_NOTLAST 327 #define MSG_SENDPAGE_NOTLAST 0x20000 328 #endif 329 #ifndef MSG_FASTOPEN 330 #define MSG_FASTOPEN 0x20000000 331 #endif 332 333 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size, 334 struct syscall_arg *arg) 335 { 336 int printed = 0, flags = arg->val; 337 338 if (flags == 0) 339 return scnprintf(bf, size, "NONE"); 340 #define P_MSG_FLAG(n) \ 341 if (flags & MSG_##n) { \ 342 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 343 flags &= ~MSG_##n; \ 344 } 345 346 P_MSG_FLAG(OOB); 347 P_MSG_FLAG(PEEK); 348 P_MSG_FLAG(DONTROUTE); 349 P_MSG_FLAG(TRYHARD); 350 P_MSG_FLAG(CTRUNC); 351 P_MSG_FLAG(PROBE); 352 P_MSG_FLAG(TRUNC); 353 P_MSG_FLAG(DONTWAIT); 354 P_MSG_FLAG(EOR); 355 P_MSG_FLAG(WAITALL); 356 P_MSG_FLAG(FIN); 357 P_MSG_FLAG(SYN); 358 P_MSG_FLAG(CONFIRM); 359 P_MSG_FLAG(RST); 360 P_MSG_FLAG(ERRQUEUE); 361 P_MSG_FLAG(NOSIGNAL); 362 P_MSG_FLAG(MORE); 363 P_MSG_FLAG(WAITFORONE); 364 P_MSG_FLAG(SENDPAGE_NOTLAST); 365 P_MSG_FLAG(FASTOPEN); 366 P_MSG_FLAG(CMSG_CLOEXEC); 367 #undef P_MSG_FLAG 368 369 if (flags) 370 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 371 372 return printed; 373 } 374 375 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags 376 377 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size, 378 struct syscall_arg *arg) 379 { 380 size_t printed = 0; 381 int mode = arg->val; 382 383 if (mode == F_OK) /* 0 */ 384 return scnprintf(bf, size, "F"); 385 #define P_MODE(n) \ 386 if (mode & n##_OK) { \ 387 printed += scnprintf(bf + printed, size - printed, "%s", #n); \ 388 mode &= ~n##_OK; \ 389 } 390 391 P_MODE(R); 392 P_MODE(W); 393 P_MODE(X); 394 #undef P_MODE 395 396 if (mode) 397 printed += scnprintf(bf + printed, size - printed, "|%#x", mode); 398 399 return printed; 400 } 401 402 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode 403 404 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, 405 struct syscall_arg *arg) 406 { 407 int printed = 0, flags = arg->val; 408 409 if (!(flags & O_CREAT)) 410 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */ 411 412 if (flags == 0) 413 return scnprintf(bf, size, "RDONLY"); 414 #define P_FLAG(n) \ 415 if (flags & O_##n) { \ 416 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 417 flags &= ~O_##n; \ 418 } 419 420 P_FLAG(APPEND); 421 P_FLAG(ASYNC); 422 P_FLAG(CLOEXEC); 423 P_FLAG(CREAT); 424 P_FLAG(DIRECT); 425 P_FLAG(DIRECTORY); 426 P_FLAG(EXCL); 427 P_FLAG(LARGEFILE); 428 P_FLAG(NOATIME); 429 P_FLAG(NOCTTY); 430 #ifdef O_NONBLOCK 431 P_FLAG(NONBLOCK); 432 #elif O_NDELAY 433 P_FLAG(NDELAY); 434 #endif 435 #ifdef O_PATH 436 P_FLAG(PATH); 437 #endif 438 P_FLAG(RDWR); 439 #ifdef O_DSYNC 440 if ((flags & O_SYNC) == O_SYNC) 441 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC"); 442 else { 443 P_FLAG(DSYNC); 444 } 445 #else 446 P_FLAG(SYNC); 447 #endif 448 P_FLAG(TRUNC); 449 P_FLAG(WRONLY); 450 #undef P_FLAG 451 452 if (flags) 453 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 454 455 return printed; 456 } 457 458 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags 459 460 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size, 461 struct syscall_arg *arg) 462 { 463 int printed = 0, flags = arg->val; 464 465 if (flags == 0) 466 return scnprintf(bf, size, "NONE"); 467 #define P_FLAG(n) \ 468 if (flags & EFD_##n) { \ 469 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ 470 flags &= ~EFD_##n; \ 471 } 472 473 P_FLAG(SEMAPHORE); 474 P_FLAG(CLOEXEC); 475 P_FLAG(NONBLOCK); 476 #undef P_FLAG 477 478 if (flags) 479 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); 480 481 return printed; 482 } 483 484 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags 485 486 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg) 487 { 488 int sig = arg->val; 489 490 switch (sig) { 491 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n) 492 P_SIGNUM(HUP); 493 P_SIGNUM(INT); 494 P_SIGNUM(QUIT); 495 P_SIGNUM(ILL); 496 P_SIGNUM(TRAP); 497 P_SIGNUM(ABRT); 498 P_SIGNUM(BUS); 499 P_SIGNUM(FPE); 500 P_SIGNUM(KILL); 501 P_SIGNUM(USR1); 502 P_SIGNUM(SEGV); 503 P_SIGNUM(USR2); 504 P_SIGNUM(PIPE); 505 P_SIGNUM(ALRM); 506 P_SIGNUM(TERM); 507 P_SIGNUM(STKFLT); 508 P_SIGNUM(CHLD); 509 P_SIGNUM(CONT); 510 P_SIGNUM(STOP); 511 P_SIGNUM(TSTP); 512 P_SIGNUM(TTIN); 513 P_SIGNUM(TTOU); 514 P_SIGNUM(URG); 515 P_SIGNUM(XCPU); 516 P_SIGNUM(XFSZ); 517 P_SIGNUM(VTALRM); 518 P_SIGNUM(PROF); 519 P_SIGNUM(WINCH); 520 P_SIGNUM(IO); 521 P_SIGNUM(PWR); 522 P_SIGNUM(SYS); 523 default: break; 524 } 525 526 return scnprintf(bf, size, "%#x", sig); 527 } 528 529 #define SCA_SIGNUM syscall_arg__scnprintf_signum 530 531 static struct syscall_fmt { 532 const char *name; 533 const char *alias; 534 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg); 535 void *arg_parm[6]; 536 bool errmsg; 537 bool timeout; 538 bool hexret; 539 } syscall_fmts[] = { 540 { .name = "access", .errmsg = true, 541 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, }, 542 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", }, 543 { .name = "brk", .hexret = true, 544 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, }, 545 { .name = "connect", .errmsg = true, }, 546 { .name = "eventfd2", .errmsg = true, 547 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, }, 548 { .name = "fcntl", .errmsg = true, 549 .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ }, 550 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, }, 551 { .name = "fstat", .errmsg = true, .alias = "newfstat", }, 552 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", }, 553 { .name = "futex", .errmsg = true, 554 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, }, 555 { .name = "getitimer", .errmsg = true, 556 .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ }, 557 .arg_parm = { [0] = &strarray__itimers, /* which */ }, }, 558 { .name = "getrlimit", .errmsg = true, 559 .arg_scnprintf = { [0] = SCA_STRARRAY, /* resource */ }, 560 .arg_parm = { [0] = &strarray__rlimit_resources, /* resource */ }, }, 561 { .name = "ioctl", .errmsg = true, 562 .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, }, 563 { .name = "kill", .errmsg = true, 564 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, 565 { .name = "lseek", .errmsg = true, 566 .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ }, 567 .arg_parm = { [2] = &strarray__whences, /* whence */ }, }, 568 { .name = "lstat", .errmsg = true, .alias = "newlstat", }, 569 { .name = "madvise", .errmsg = true, 570 .arg_scnprintf = { [0] = SCA_HEX, /* start */ 571 [2] = SCA_MADV_BHV, /* behavior */ }, }, 572 { .name = "mmap", .hexret = true, 573 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ 574 [2] = SCA_MMAP_PROT, /* prot */ 575 [3] = SCA_MMAP_FLAGS, /* flags */ }, }, 576 { .name = "mprotect", .errmsg = true, 577 .arg_scnprintf = { [0] = SCA_HEX, /* start */ 578 [2] = SCA_MMAP_PROT, /* prot */ }, }, 579 { .name = "mremap", .hexret = true, 580 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ 581 [4] = SCA_HEX, /* new_addr */ }, }, 582 { .name = "munmap", .errmsg = true, 583 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, 584 { .name = "open", .errmsg = true, 585 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, }, 586 { .name = "open_by_handle_at", .errmsg = true, 587 .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, }, 588 { .name = "openat", .errmsg = true, 589 .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, }, 590 { .name = "poll", .errmsg = true, .timeout = true, }, 591 { .name = "ppoll", .errmsg = true, .timeout = true, }, 592 { .name = "pread", .errmsg = true, .alias = "pread64", }, 593 { .name = "prlimit64", .errmsg = true, 594 .arg_scnprintf = { [1] = SCA_STRARRAY, /* resource */ }, 595 .arg_parm = { [1] = &strarray__rlimit_resources, /* resource */ }, }, 596 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", }, 597 { .name = "read", .errmsg = true, }, 598 { .name = "recvfrom", .errmsg = true, 599 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 600 { .name = "recvmmsg", .errmsg = true, 601 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 602 { .name = "recvmsg", .errmsg = true, 603 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, 604 { .name = "rt_sigaction", .errmsg = true, 605 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, }, 606 { .name = "rt_sigprocmask", .errmsg = true, 607 .arg_scnprintf = { [0] = SCA_STRARRAY, /* how */ }, 608 .arg_parm = { [0] = &strarray__sighow, /* how */ }, }, 609 { .name = "rt_sigqueueinfo", .errmsg = true, 610 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, 611 { .name = "rt_tgsigqueueinfo", .errmsg = true, 612 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, 613 { .name = "select", .errmsg = true, .timeout = true, }, 614 { .name = "sendmmsg", .errmsg = true, 615 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 616 { .name = "sendmsg", .errmsg = true, 617 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, 618 { .name = "sendto", .errmsg = true, 619 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, 620 { .name = "setitimer", .errmsg = true, 621 .arg_scnprintf = { [0] = SCA_STRARRAY, /* which */ }, 622 .arg_parm = { [0] = &strarray__itimers, /* which */ }, }, 623 { .name = "setrlimit", .errmsg = true, 624 .arg_scnprintf = { [0] = SCA_STRARRAY, /* resource */ }, 625 .arg_parm = { [0] = &strarray__rlimit_resources, /* resource */ }, }, 626 { .name = "socket", .errmsg = true, 627 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ 628 [1] = SCA_SK_TYPE, /* type */ }, 629 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, 630 { .name = "stat", .errmsg = true, .alias = "newstat", }, 631 { .name = "tgkill", .errmsg = true, 632 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, 633 { .name = "tkill", .errmsg = true, 634 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, 635 { .name = "uname", .errmsg = true, .alias = "newuname", }, 636 }; 637 638 static int syscall_fmt__cmp(const void *name, const void *fmtp) 639 { 640 const struct syscall_fmt *fmt = fmtp; 641 return strcmp(name, fmt->name); 642 } 643 644 static struct syscall_fmt *syscall_fmt__find(const char *name) 645 { 646 const int nmemb = ARRAY_SIZE(syscall_fmts); 647 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp); 648 } 649 650 struct syscall { 651 struct event_format *tp_format; 652 const char *name; 653 bool filtered; 654 struct syscall_fmt *fmt; 655 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg); 656 void **arg_parm; 657 }; 658 659 static size_t fprintf_duration(unsigned long t, FILE *fp) 660 { 661 double duration = (double)t / NSEC_PER_MSEC; 662 size_t printed = fprintf(fp, "("); 663 664 if (duration >= 1.0) 665 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration); 666 else if (duration >= 0.01) 667 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration); 668 else 669 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration); 670 return printed + fprintf(fp, "): "); 671 } 672 673 struct thread_trace { 674 u64 entry_time; 675 u64 exit_time; 676 bool entry_pending; 677 unsigned long nr_events; 678 char *entry_str; 679 double runtime_ms; 680 }; 681 682 static struct thread_trace *thread_trace__new(void) 683 { 684 return zalloc(sizeof(struct thread_trace)); 685 } 686 687 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp) 688 { 689 struct thread_trace *ttrace; 690 691 if (thread == NULL) 692 goto fail; 693 694 if (thread->priv == NULL) 695 thread->priv = thread_trace__new(); 696 697 if (thread->priv == NULL) 698 goto fail; 699 700 ttrace = thread->priv; 701 ++ttrace->nr_events; 702 703 return ttrace; 704 fail: 705 color_fprintf(fp, PERF_COLOR_RED, 706 "WARNING: not enough memory, dropping samples!\n"); 707 return NULL; 708 } 709 710 struct trace { 711 struct perf_tool tool; 712 int audit_machine; 713 struct { 714 int max; 715 struct syscall *table; 716 } syscalls; 717 struct perf_record_opts opts; 718 struct machine host; 719 u64 base_time; 720 bool full_time; 721 FILE *output; 722 unsigned long nr_events; 723 struct strlist *ev_qualifier; 724 bool not_ev_qualifier; 725 struct intlist *tid_list; 726 struct intlist *pid_list; 727 bool sched; 728 bool multiple_threads; 729 bool show_comm; 730 double duration_filter; 731 double runtime_ms; 732 }; 733 734 static bool trace__filter_duration(struct trace *trace, double t) 735 { 736 return t < (trace->duration_filter * NSEC_PER_MSEC); 737 } 738 739 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) 740 { 741 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC; 742 743 return fprintf(fp, "%10.3f ", ts); 744 } 745 746 static bool done = false; 747 748 static void sig_handler(int sig __maybe_unused) 749 { 750 done = true; 751 } 752 753 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread, 754 u64 duration, u64 tstamp, FILE *fp) 755 { 756 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp); 757 printed += fprintf_duration(duration, fp); 758 759 if (trace->multiple_threads) { 760 if (trace->show_comm) 761 printed += fprintf(fp, "%.14s/", thread->comm); 762 printed += fprintf(fp, "%d ", thread->tid); 763 } 764 765 return printed; 766 } 767 768 static int trace__process_event(struct trace *trace, struct machine *machine, 769 union perf_event *event) 770 { 771 int ret = 0; 772 773 switch (event->header.type) { 774 case PERF_RECORD_LOST: 775 color_fprintf(trace->output, PERF_COLOR_RED, 776 "LOST %" PRIu64 " events!\n", event->lost.lost); 777 ret = machine__process_lost_event(machine, event); 778 default: 779 ret = machine__process_event(machine, event); 780 break; 781 } 782 783 return ret; 784 } 785 786 static int trace__tool_process(struct perf_tool *tool, 787 union perf_event *event, 788 struct perf_sample *sample __maybe_unused, 789 struct machine *machine) 790 { 791 struct trace *trace = container_of(tool, struct trace, tool); 792 return trace__process_event(trace, machine, event); 793 } 794 795 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) 796 { 797 int err = symbol__init(); 798 799 if (err) 800 return err; 801 802 machine__init(&trace->host, "", HOST_KERNEL_ID); 803 machine__create_kernel_maps(&trace->host); 804 805 if (perf_target__has_task(&trace->opts.target)) { 806 err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads, 807 trace__tool_process, 808 &trace->host); 809 } else { 810 err = perf_event__synthesize_threads(&trace->tool, trace__tool_process, 811 &trace->host); 812 } 813 814 if (err) 815 symbol__exit(); 816 817 return err; 818 } 819 820 static int syscall__set_arg_fmts(struct syscall *sc) 821 { 822 struct format_field *field; 823 int idx = 0; 824 825 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *)); 826 if (sc->arg_scnprintf == NULL) 827 return -1; 828 829 if (sc->fmt) 830 sc->arg_parm = sc->fmt->arg_parm; 831 832 for (field = sc->tp_format->format.fields->next; field; field = field->next) { 833 if (sc->fmt && sc->fmt->arg_scnprintf[idx]) 834 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx]; 835 else if (field->flags & FIELD_IS_POINTER) 836 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex; 837 ++idx; 838 } 839 840 return 0; 841 } 842 843 static int trace__read_syscall_info(struct trace *trace, int id) 844 { 845 char tp_name[128]; 846 struct syscall *sc; 847 const char *name = audit_syscall_to_name(id, trace->audit_machine); 848 849 if (name == NULL) 850 return -1; 851 852 if (id > trace->syscalls.max) { 853 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc)); 854 855 if (nsyscalls == NULL) 856 return -1; 857 858 if (trace->syscalls.max != -1) { 859 memset(nsyscalls + trace->syscalls.max + 1, 0, 860 (id - trace->syscalls.max) * sizeof(*sc)); 861 } else { 862 memset(nsyscalls, 0, (id + 1) * sizeof(*sc)); 863 } 864 865 trace->syscalls.table = nsyscalls; 866 trace->syscalls.max = id; 867 } 868 869 sc = trace->syscalls.table + id; 870 sc->name = name; 871 872 if (trace->ev_qualifier) { 873 bool in = strlist__find(trace->ev_qualifier, name) != NULL; 874 875 if (!(in ^ trace->not_ev_qualifier)) { 876 sc->filtered = true; 877 /* 878 * No need to do read tracepoint information since this will be 879 * filtered out. 880 */ 881 return 0; 882 } 883 } 884 885 sc->fmt = syscall_fmt__find(sc->name); 886 887 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); 888 sc->tp_format = event_format__new("syscalls", tp_name); 889 890 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) { 891 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias); 892 sc->tp_format = event_format__new("syscalls", tp_name); 893 } 894 895 if (sc->tp_format == NULL) 896 return -1; 897 898 return syscall__set_arg_fmts(sc); 899 } 900 901 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, 902 unsigned long *args) 903 { 904 size_t printed = 0; 905 906 if (sc->tp_format != NULL) { 907 struct format_field *field; 908 u8 bit = 1; 909 struct syscall_arg arg = { 910 .idx = 0, 911 .mask = 0, 912 }; 913 914 for (field = sc->tp_format->format.fields->next; field; 915 field = field->next, ++arg.idx, bit <<= 1) { 916 if (arg.mask & bit) 917 continue; 918 919 if (args[arg.idx] == 0) 920 continue; 921 922 printed += scnprintf(bf + printed, size - printed, 923 "%s%s: ", printed ? ", " : "", field->name); 924 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) { 925 arg.val = args[arg.idx]; 926 if (sc->arg_parm) 927 arg.parm = sc->arg_parm[arg.idx]; 928 printed += sc->arg_scnprintf[arg.idx](bf + printed, 929 size - printed, &arg); 930 } else { 931 printed += scnprintf(bf + printed, size - printed, 932 "%ld", args[arg.idx]); 933 } 934 } 935 } else { 936 int i = 0; 937 938 while (i < 6) { 939 printed += scnprintf(bf + printed, size - printed, 940 "%sarg%d: %ld", 941 printed ? ", " : "", i, args[i]); 942 ++i; 943 } 944 } 945 946 return printed; 947 } 948 949 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel, 950 struct perf_sample *sample); 951 952 static struct syscall *trace__syscall_info(struct trace *trace, 953 struct perf_evsel *evsel, 954 struct perf_sample *sample) 955 { 956 int id = perf_evsel__intval(evsel, sample, "id"); 957 958 if (id < 0) { 959 960 /* 961 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried 962 * before that, leaving at a higher verbosity level till that is 963 * explained. Reproduced with plain ftrace with: 964 * 965 * echo 1 > /t/events/raw_syscalls/sys_exit/enable 966 * grep "NR -1 " /t/trace_pipe 967 * 968 * After generating some load on the machine. 969 */ 970 if (verbose > 1) { 971 static u64 n; 972 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n", 973 id, perf_evsel__name(evsel), ++n); 974 } 975 return NULL; 976 } 977 978 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) && 979 trace__read_syscall_info(trace, id)) 980 goto out_cant_read; 981 982 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL)) 983 goto out_cant_read; 984 985 return &trace->syscalls.table[id]; 986 987 out_cant_read: 988 if (verbose) { 989 fprintf(trace->output, "Problems reading syscall %d", id); 990 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL) 991 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name); 992 fputs(" information\n", trace->output); 993 } 994 return NULL; 995 } 996 997 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, 998 struct perf_sample *sample) 999 { 1000 char *msg; 1001 void *args; 1002 size_t printed = 0; 1003 struct thread *thread; 1004 struct syscall *sc = trace__syscall_info(trace, evsel, sample); 1005 struct thread_trace *ttrace; 1006 1007 if (sc == NULL) 1008 return -1; 1009 1010 if (sc->filtered) 1011 return 0; 1012 1013 thread = machine__findnew_thread(&trace->host, sample->pid, 1014 sample->tid); 1015 ttrace = thread__trace(thread, trace->output); 1016 if (ttrace == NULL) 1017 return -1; 1018 1019 args = perf_evsel__rawptr(evsel, sample, "args"); 1020 if (args == NULL) { 1021 fprintf(trace->output, "Problems reading syscall arguments\n"); 1022 return -1; 1023 } 1024 1025 ttrace = thread->priv; 1026 1027 if (ttrace->entry_str == NULL) { 1028 ttrace->entry_str = malloc(1024); 1029 if (!ttrace->entry_str) 1030 return -1; 1031 } 1032 1033 ttrace->entry_time = sample->time; 1034 msg = ttrace->entry_str; 1035 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name); 1036 1037 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed, args); 1038 1039 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) { 1040 if (!trace->duration_filter) { 1041 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output); 1042 fprintf(trace->output, "%-70s\n", ttrace->entry_str); 1043 } 1044 } else 1045 ttrace->entry_pending = true; 1046 1047 return 0; 1048 } 1049 1050 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, 1051 struct perf_sample *sample) 1052 { 1053 int ret; 1054 u64 duration = 0; 1055 struct thread *thread; 1056 struct syscall *sc = trace__syscall_info(trace, evsel, sample); 1057 struct thread_trace *ttrace; 1058 1059 if (sc == NULL) 1060 return -1; 1061 1062 if (sc->filtered) 1063 return 0; 1064 1065 thread = machine__findnew_thread(&trace->host, sample->pid, 1066 sample->tid); 1067 ttrace = thread__trace(thread, trace->output); 1068 if (ttrace == NULL) 1069 return -1; 1070 1071 ret = perf_evsel__intval(evsel, sample, "ret"); 1072 1073 ttrace = thread->priv; 1074 1075 ttrace->exit_time = sample->time; 1076 1077 if (ttrace->entry_time) { 1078 duration = sample->time - ttrace->entry_time; 1079 if (trace__filter_duration(trace, duration)) 1080 goto out; 1081 } else if (trace->duration_filter) 1082 goto out; 1083 1084 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output); 1085 1086 if (ttrace->entry_pending) { 1087 fprintf(trace->output, "%-70s", ttrace->entry_str); 1088 } else { 1089 fprintf(trace->output, " ... ["); 1090 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued"); 1091 fprintf(trace->output, "]: %s()", sc->name); 1092 } 1093 1094 if (sc->fmt == NULL) { 1095 signed_print: 1096 fprintf(trace->output, ") = %d", ret); 1097 } else if (ret < 0 && sc->fmt->errmsg) { 1098 char bf[256]; 1099 const char *emsg = strerror_r(-ret, bf, sizeof(bf)), 1100 *e = audit_errno_to_name(-ret); 1101 1102 fprintf(trace->output, ") = -1 %s %s", e, emsg); 1103 } else if (ret == 0 && sc->fmt->timeout) 1104 fprintf(trace->output, ") = 0 Timeout"); 1105 else if (sc->fmt->hexret) 1106 fprintf(trace->output, ") = %#x", ret); 1107 else 1108 goto signed_print; 1109 1110 fputc('\n', trace->output); 1111 out: 1112 ttrace->entry_pending = false; 1113 1114 return 0; 1115 } 1116 1117 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel, 1118 struct perf_sample *sample) 1119 { 1120 u64 runtime = perf_evsel__intval(evsel, sample, "runtime"); 1121 double runtime_ms = (double)runtime / NSEC_PER_MSEC; 1122 struct thread *thread = machine__findnew_thread(&trace->host, 1123 sample->pid, 1124 sample->tid); 1125 struct thread_trace *ttrace = thread__trace(thread, trace->output); 1126 1127 if (ttrace == NULL) 1128 goto out_dump; 1129 1130 ttrace->runtime_ms += runtime_ms; 1131 trace->runtime_ms += runtime_ms; 1132 return 0; 1133 1134 out_dump: 1135 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n", 1136 evsel->name, 1137 perf_evsel__strval(evsel, sample, "comm"), 1138 (pid_t)perf_evsel__intval(evsel, sample, "pid"), 1139 runtime, 1140 perf_evsel__intval(evsel, sample, "vruntime")); 1141 return 0; 1142 } 1143 1144 static bool skip_sample(struct trace *trace, struct perf_sample *sample) 1145 { 1146 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) || 1147 (trace->tid_list && intlist__find(trace->tid_list, sample->tid))) 1148 return false; 1149 1150 if (trace->pid_list || trace->tid_list) 1151 return true; 1152 1153 return false; 1154 } 1155 1156 static int trace__process_sample(struct perf_tool *tool, 1157 union perf_event *event __maybe_unused, 1158 struct perf_sample *sample, 1159 struct perf_evsel *evsel, 1160 struct machine *machine __maybe_unused) 1161 { 1162 struct trace *trace = container_of(tool, struct trace, tool); 1163 int err = 0; 1164 1165 tracepoint_handler handler = evsel->handler.func; 1166 1167 if (skip_sample(trace, sample)) 1168 return 0; 1169 1170 if (!trace->full_time && trace->base_time == 0) 1171 trace->base_time = sample->time; 1172 1173 if (handler) 1174 handler(trace, evsel, sample); 1175 1176 return err; 1177 } 1178 1179 static bool 1180 perf_session__has_tp(struct perf_session *session, const char *name) 1181 { 1182 struct perf_evsel *evsel; 1183 1184 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name); 1185 1186 return evsel != NULL; 1187 } 1188 1189 static int parse_target_str(struct trace *trace) 1190 { 1191 if (trace->opts.target.pid) { 1192 trace->pid_list = intlist__new(trace->opts.target.pid); 1193 if (trace->pid_list == NULL) { 1194 pr_err("Error parsing process id string\n"); 1195 return -EINVAL; 1196 } 1197 } 1198 1199 if (trace->opts.target.tid) { 1200 trace->tid_list = intlist__new(trace->opts.target.tid); 1201 if (trace->tid_list == NULL) { 1202 pr_err("Error parsing thread id string\n"); 1203 return -EINVAL; 1204 } 1205 } 1206 1207 return 0; 1208 } 1209 1210 static int trace__run(struct trace *trace, int argc, const char **argv) 1211 { 1212 struct perf_evlist *evlist = perf_evlist__new(); 1213 struct perf_evsel *evsel; 1214 int err = -1, i; 1215 unsigned long before; 1216 const bool forks = argc > 0; 1217 1218 if (evlist == NULL) { 1219 fprintf(trace->output, "Not enough memory to run!\n"); 1220 goto out; 1221 } 1222 1223 if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) || 1224 perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) { 1225 fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n"); 1226 goto out_delete_evlist; 1227 } 1228 1229 if (trace->sched && 1230 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", 1231 trace__sched_stat_runtime)) { 1232 fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n"); 1233 goto out_delete_evlist; 1234 } 1235 1236 err = perf_evlist__create_maps(evlist, &trace->opts.target); 1237 if (err < 0) { 1238 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n"); 1239 goto out_delete_evlist; 1240 } 1241 1242 err = trace__symbols_init(trace, evlist); 1243 if (err < 0) { 1244 fprintf(trace->output, "Problems initializing symbol libraries!\n"); 1245 goto out_delete_maps; 1246 } 1247 1248 perf_evlist__config(evlist, &trace->opts); 1249 1250 signal(SIGCHLD, sig_handler); 1251 signal(SIGINT, sig_handler); 1252 1253 if (forks) { 1254 err = perf_evlist__prepare_workload(evlist, &trace->opts.target, 1255 argv, false, false); 1256 if (err < 0) { 1257 fprintf(trace->output, "Couldn't run the workload!\n"); 1258 goto out_delete_maps; 1259 } 1260 } 1261 1262 err = perf_evlist__open(evlist); 1263 if (err < 0) { 1264 fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno)); 1265 goto out_delete_maps; 1266 } 1267 1268 err = perf_evlist__mmap(evlist, UINT_MAX, false); 1269 if (err < 0) { 1270 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno)); 1271 goto out_close_evlist; 1272 } 1273 1274 perf_evlist__enable(evlist); 1275 1276 if (forks) 1277 perf_evlist__start_workload(evlist); 1278 1279 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1; 1280 again: 1281 before = trace->nr_events; 1282 1283 for (i = 0; i < evlist->nr_mmaps; i++) { 1284 union perf_event *event; 1285 1286 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { 1287 const u32 type = event->header.type; 1288 tracepoint_handler handler; 1289 struct perf_sample sample; 1290 1291 ++trace->nr_events; 1292 1293 err = perf_evlist__parse_sample(evlist, event, &sample); 1294 if (err) { 1295 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err); 1296 continue; 1297 } 1298 1299 if (!trace->full_time && trace->base_time == 0) 1300 trace->base_time = sample.time; 1301 1302 if (type != PERF_RECORD_SAMPLE) { 1303 trace__process_event(trace, &trace->host, event); 1304 continue; 1305 } 1306 1307 evsel = perf_evlist__id2evsel(evlist, sample.id); 1308 if (evsel == NULL) { 1309 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id); 1310 continue; 1311 } 1312 1313 if (sample.raw_data == NULL) { 1314 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", 1315 perf_evsel__name(evsel), sample.tid, 1316 sample.cpu, sample.raw_size); 1317 continue; 1318 } 1319 1320 handler = evsel->handler.func; 1321 handler(trace, evsel, &sample); 1322 1323 if (done) 1324 goto out_unmap_evlist; 1325 } 1326 } 1327 1328 if (trace->nr_events == before) { 1329 if (done) 1330 goto out_unmap_evlist; 1331 1332 poll(evlist->pollfd, evlist->nr_fds, -1); 1333 } 1334 1335 if (done) 1336 perf_evlist__disable(evlist); 1337 1338 goto again; 1339 1340 out_unmap_evlist: 1341 perf_evlist__munmap(evlist); 1342 out_close_evlist: 1343 perf_evlist__close(evlist); 1344 out_delete_maps: 1345 perf_evlist__delete_maps(evlist); 1346 out_delete_evlist: 1347 perf_evlist__delete(evlist); 1348 out: 1349 return err; 1350 } 1351 1352 static int trace__replay(struct trace *trace) 1353 { 1354 const struct perf_evsel_str_handler handlers[] = { 1355 { "raw_syscalls:sys_enter", trace__sys_enter, }, 1356 { "raw_syscalls:sys_exit", trace__sys_exit, }, 1357 }; 1358 1359 struct perf_session *session; 1360 int err = -1; 1361 1362 trace->tool.sample = trace__process_sample; 1363 trace->tool.mmap = perf_event__process_mmap; 1364 trace->tool.mmap2 = perf_event__process_mmap2; 1365 trace->tool.comm = perf_event__process_comm; 1366 trace->tool.exit = perf_event__process_exit; 1367 trace->tool.fork = perf_event__process_fork; 1368 trace->tool.attr = perf_event__process_attr; 1369 trace->tool.tracing_data = perf_event__process_tracing_data; 1370 trace->tool.build_id = perf_event__process_build_id; 1371 1372 trace->tool.ordered_samples = true; 1373 trace->tool.ordering_requires_timestamps = true; 1374 1375 /* add tid to output */ 1376 trace->multiple_threads = true; 1377 1378 if (symbol__init() < 0) 1379 return -1; 1380 1381 session = perf_session__new(input_name, O_RDONLY, 0, false, 1382 &trace->tool); 1383 if (session == NULL) 1384 return -ENOMEM; 1385 1386 err = perf_session__set_tracepoints_handlers(session, handlers); 1387 if (err) 1388 goto out; 1389 1390 if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) { 1391 pr_err("Data file does not have raw_syscalls:sys_enter events\n"); 1392 goto out; 1393 } 1394 1395 if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) { 1396 pr_err("Data file does not have raw_syscalls:sys_exit events\n"); 1397 goto out; 1398 } 1399 1400 err = parse_target_str(trace); 1401 if (err != 0) 1402 goto out; 1403 1404 setup_pager(); 1405 1406 err = perf_session__process_events(session, &trace->tool); 1407 if (err) 1408 pr_err("Failed to process events, error %d", err); 1409 1410 out: 1411 perf_session__delete(session); 1412 1413 return err; 1414 } 1415 1416 static size_t trace__fprintf_threads_header(FILE *fp) 1417 { 1418 size_t printed; 1419 1420 printed = fprintf(fp, "\n _____________________________________________________________________\n"); 1421 printed += fprintf(fp," __) Summary of events (__\n\n"); 1422 printed += fprintf(fp," [ task - pid ] [ events ] [ ratio ] [ runtime ]\n"); 1423 printed += fprintf(fp," _____________________________________________________________________\n\n"); 1424 1425 return printed; 1426 } 1427 1428 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp) 1429 { 1430 size_t printed = trace__fprintf_threads_header(fp); 1431 struct rb_node *nd; 1432 1433 for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) { 1434 struct thread *thread = rb_entry(nd, struct thread, rb_node); 1435 struct thread_trace *ttrace = thread->priv; 1436 const char *color; 1437 double ratio; 1438 1439 if (ttrace == NULL) 1440 continue; 1441 1442 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0; 1443 1444 color = PERF_COLOR_NORMAL; 1445 if (ratio > 50.0) 1446 color = PERF_COLOR_RED; 1447 else if (ratio > 25.0) 1448 color = PERF_COLOR_GREEN; 1449 else if (ratio > 5.0) 1450 color = PERF_COLOR_YELLOW; 1451 1452 printed += color_fprintf(fp, color, "%20s", thread->comm); 1453 printed += fprintf(fp, " - %-5d :%11lu [", thread->tid, ttrace->nr_events); 1454 printed += color_fprintf(fp, color, "%5.1f%%", ratio); 1455 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms); 1456 } 1457 1458 return printed; 1459 } 1460 1461 static int trace__set_duration(const struct option *opt, const char *str, 1462 int unset __maybe_unused) 1463 { 1464 struct trace *trace = opt->value; 1465 1466 trace->duration_filter = atof(str); 1467 return 0; 1468 } 1469 1470 static int trace__open_output(struct trace *trace, const char *filename) 1471 { 1472 struct stat st; 1473 1474 if (!stat(filename, &st) && st.st_size) { 1475 char oldname[PATH_MAX]; 1476 1477 scnprintf(oldname, sizeof(oldname), "%s.old", filename); 1478 unlink(oldname); 1479 rename(filename, oldname); 1480 } 1481 1482 trace->output = fopen(filename, "w"); 1483 1484 return trace->output == NULL ? -errno : 0; 1485 } 1486 1487 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) 1488 { 1489 const char * const trace_usage[] = { 1490 "perf trace [<options>] [<command>]", 1491 "perf trace [<options>] -- <command> [<options>]", 1492 NULL 1493 }; 1494 struct trace trace = { 1495 .audit_machine = audit_detect_machine(), 1496 .syscalls = { 1497 . max = -1, 1498 }, 1499 .opts = { 1500 .target = { 1501 .uid = UINT_MAX, 1502 .uses_mmap = true, 1503 }, 1504 .user_freq = UINT_MAX, 1505 .user_interval = ULLONG_MAX, 1506 .no_delay = true, 1507 .mmap_pages = 1024, 1508 }, 1509 .output = stdout, 1510 .show_comm = true, 1511 }; 1512 const char *output_name = NULL; 1513 const char *ev_qualifier_str = NULL; 1514 const struct option trace_options[] = { 1515 OPT_BOOLEAN(0, "comm", &trace.show_comm, 1516 "show the thread COMM next to its id"), 1517 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", 1518 "list of events to trace"), 1519 OPT_STRING('o', "output", &output_name, "file", "output file name"), 1520 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"), 1521 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid", 1522 "trace events on existing process id"), 1523 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid", 1524 "trace events on existing thread id"), 1525 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide, 1526 "system-wide collection from all CPUs"), 1527 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu", 1528 "list of cpus to monitor"), 1529 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit, 1530 "child tasks do not inherit counters"), 1531 OPT_UINTEGER('m', "mmap-pages", &trace.opts.mmap_pages, 1532 "number of mmap data pages"), 1533 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user", 1534 "user to profile"), 1535 OPT_CALLBACK(0, "duration", &trace, "float", 1536 "show only events with duration > N.M ms", 1537 trace__set_duration), 1538 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"), 1539 OPT_INCR('v', "verbose", &verbose, "be more verbose"), 1540 OPT_BOOLEAN('T', "time", &trace.full_time, 1541 "Show full timestamp, not time relative to first start"), 1542 OPT_END() 1543 }; 1544 int err; 1545 char bf[BUFSIZ]; 1546 1547 argc = parse_options(argc, argv, trace_options, trace_usage, 0); 1548 1549 if (output_name != NULL) { 1550 err = trace__open_output(&trace, output_name); 1551 if (err < 0) { 1552 perror("failed to create output file"); 1553 goto out; 1554 } 1555 } 1556 1557 if (ev_qualifier_str != NULL) { 1558 const char *s = ev_qualifier_str; 1559 1560 trace.not_ev_qualifier = *s == '!'; 1561 if (trace.not_ev_qualifier) 1562 ++s; 1563 trace.ev_qualifier = strlist__new(true, s); 1564 if (trace.ev_qualifier == NULL) { 1565 fputs("Not enough memory to parse event qualifier", 1566 trace.output); 1567 err = -ENOMEM; 1568 goto out_close; 1569 } 1570 } 1571 1572 err = perf_target__validate(&trace.opts.target); 1573 if (err) { 1574 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf)); 1575 fprintf(trace.output, "%s", bf); 1576 goto out_close; 1577 } 1578 1579 err = perf_target__parse_uid(&trace.opts.target); 1580 if (err) { 1581 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf)); 1582 fprintf(trace.output, "%s", bf); 1583 goto out_close; 1584 } 1585 1586 if (!argc && perf_target__none(&trace.opts.target)) 1587 trace.opts.target.system_wide = true; 1588 1589 if (input_name) 1590 err = trace__replay(&trace); 1591 else 1592 err = trace__run(&trace, argc, argv); 1593 1594 if (trace.sched && !err) 1595 trace__fprintf_thread_summary(&trace, trace.output); 1596 1597 out_close: 1598 if (output_name != NULL) 1599 fclose(trace.output); 1600 out: 1601 return err; 1602 } 1603