1 #include "builtin.h" 2 #include "perf.h" 3 4 #include "util/util.h" 5 #include "util/evlist.h" 6 #include "util/cache.h" 7 #include "util/evsel.h" 8 #include "util/symbol.h" 9 #include "util/thread.h" 10 #include "util/header.h" 11 #include "util/session.h" 12 #include "util/tool.h" 13 14 #include "util/parse-options.h" 15 #include "util/trace-event.h" 16 17 #include "util/debug.h" 18 19 #include <sys/prctl.h> 20 #include <sys/resource.h> 21 22 #include <semaphore.h> 23 #include <pthread.h> 24 #include <math.h> 25 26 static const char *input_name; 27 28 static char default_sort_order[] = "avg, max, switch, runtime"; 29 static const char *sort_order = default_sort_order; 30 31 static int profile_cpu = -1; 32 33 #define PR_SET_NAME 15 /* Set process name */ 34 #define MAX_CPUS 4096 35 36 static u64 run_measurement_overhead; 37 static u64 sleep_measurement_overhead; 38 39 #define COMM_LEN 20 40 #define SYM_LEN 129 41 42 #define MAX_PID 65536 43 44 static unsigned long nr_tasks; 45 46 struct sched_atom; 47 48 struct task_desc { 49 unsigned long nr; 50 unsigned long pid; 51 char comm[COMM_LEN]; 52 53 unsigned long nr_events; 54 unsigned long curr_event; 55 struct sched_atom **atoms; 56 57 pthread_t thread; 58 sem_t sleep_sem; 59 60 sem_t ready_for_work; 61 sem_t work_done_sem; 62 63 u64 cpu_usage; 64 }; 65 66 enum sched_event_type { 67 SCHED_EVENT_RUN, 68 SCHED_EVENT_SLEEP, 69 SCHED_EVENT_WAKEUP, 70 SCHED_EVENT_MIGRATION, 71 }; 72 73 struct sched_atom { 74 enum sched_event_type type; 75 int specific_wait; 76 u64 timestamp; 77 u64 duration; 78 unsigned long nr; 79 sem_t *wait_sem; 80 struct task_desc *wakee; 81 }; 82 83 static struct task_desc *pid_to_task[MAX_PID]; 84 85 static struct task_desc **tasks; 86 87 static pthread_mutex_t start_work_mutex = PTHREAD_MUTEX_INITIALIZER; 88 static u64 start_time; 89 90 static pthread_mutex_t work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER; 91 92 static unsigned long nr_run_events; 93 static unsigned long nr_sleep_events; 94 static unsigned long nr_wakeup_events; 95 96 static unsigned long nr_sleep_corrections; 97 static unsigned long nr_run_events_optimized; 98 99 static unsigned long targetless_wakeups; 100 static unsigned long multitarget_wakeups; 101 102 static u64 cpu_usage; 103 static u64 runavg_cpu_usage; 104 static u64 parent_cpu_usage; 105 static u64 runavg_parent_cpu_usage; 106 107 static unsigned long nr_runs; 108 static u64 sum_runtime; 109 static u64 sum_fluct; 110 static u64 run_avg; 111 112 static unsigned int replay_repeat = 10; 113 static unsigned long nr_timestamps; 114 static unsigned long nr_unordered_timestamps; 115 static unsigned long nr_state_machine_bugs; 116 static unsigned long nr_context_switch_bugs; 117 static unsigned long nr_events; 118 static unsigned long nr_lost_chunks; 119 static unsigned long nr_lost_events; 120 121 #define TASK_STATE_TO_CHAR_STR "RSDTtZX" 122 123 enum thread_state { 124 THREAD_SLEEPING = 0, 125 THREAD_WAIT_CPU, 126 THREAD_SCHED_IN, 127 THREAD_IGNORE 128 }; 129 130 struct work_atom { 131 struct list_head list; 132 enum thread_state state; 133 u64 sched_out_time; 134 u64 wake_up_time; 135 u64 sched_in_time; 136 u64 runtime; 137 }; 138 139 struct work_atoms { 140 struct list_head work_list; 141 struct thread *thread; 142 struct rb_node node; 143 u64 max_lat; 144 u64 max_lat_at; 145 u64 total_lat; 146 u64 nb_atoms; 147 u64 total_runtime; 148 }; 149 150 typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *); 151 152 static struct rb_root atom_root, sorted_atom_root; 153 154 static u64 all_runtime; 155 static u64 all_count; 156 157 158 static u64 get_nsecs(void) 159 { 160 struct timespec ts; 161 162 clock_gettime(CLOCK_MONOTONIC, &ts); 163 164 return ts.tv_sec * 1000000000ULL + ts.tv_nsec; 165 } 166 167 static void burn_nsecs(u64 nsecs) 168 { 169 u64 T0 = get_nsecs(), T1; 170 171 do { 172 T1 = get_nsecs(); 173 } while (T1 + run_measurement_overhead < T0 + nsecs); 174 } 175 176 static void sleep_nsecs(u64 nsecs) 177 { 178 struct timespec ts; 179 180 ts.tv_nsec = nsecs % 999999999; 181 ts.tv_sec = nsecs / 999999999; 182 183 nanosleep(&ts, NULL); 184 } 185 186 static void calibrate_run_measurement_overhead(void) 187 { 188 u64 T0, T1, delta, min_delta = 1000000000ULL; 189 int i; 190 191 for (i = 0; i < 10; i++) { 192 T0 = get_nsecs(); 193 burn_nsecs(0); 194 T1 = get_nsecs(); 195 delta = T1-T0; 196 min_delta = min(min_delta, delta); 197 } 198 run_measurement_overhead = min_delta; 199 200 printf("run measurement overhead: %" PRIu64 " nsecs\n", min_delta); 201 } 202 203 static void calibrate_sleep_measurement_overhead(void) 204 { 205 u64 T0, T1, delta, min_delta = 1000000000ULL; 206 int i; 207 208 for (i = 0; i < 10; i++) { 209 T0 = get_nsecs(); 210 sleep_nsecs(10000); 211 T1 = get_nsecs(); 212 delta = T1-T0; 213 min_delta = min(min_delta, delta); 214 } 215 min_delta -= 10000; 216 sleep_measurement_overhead = min_delta; 217 218 printf("sleep measurement overhead: %" PRIu64 " nsecs\n", min_delta); 219 } 220 221 static struct sched_atom * 222 get_new_event(struct task_desc *task, u64 timestamp) 223 { 224 struct sched_atom *event = zalloc(sizeof(*event)); 225 unsigned long idx = task->nr_events; 226 size_t size; 227 228 event->timestamp = timestamp; 229 event->nr = idx; 230 231 task->nr_events++; 232 size = sizeof(struct sched_atom *) * task->nr_events; 233 task->atoms = realloc(task->atoms, size); 234 BUG_ON(!task->atoms); 235 236 task->atoms[idx] = event; 237 238 return event; 239 } 240 241 static struct sched_atom *last_event(struct task_desc *task) 242 { 243 if (!task->nr_events) 244 return NULL; 245 246 return task->atoms[task->nr_events - 1]; 247 } 248 249 static void 250 add_sched_event_run(struct task_desc *task, u64 timestamp, u64 duration) 251 { 252 struct sched_atom *event, *curr_event = last_event(task); 253 254 /* 255 * optimize an existing RUN event by merging this one 256 * to it: 257 */ 258 if (curr_event && curr_event->type == SCHED_EVENT_RUN) { 259 nr_run_events_optimized++; 260 curr_event->duration += duration; 261 return; 262 } 263 264 event = get_new_event(task, timestamp); 265 266 event->type = SCHED_EVENT_RUN; 267 event->duration = duration; 268 269 nr_run_events++; 270 } 271 272 static void 273 add_sched_event_wakeup(struct task_desc *task, u64 timestamp, 274 struct task_desc *wakee) 275 { 276 struct sched_atom *event, *wakee_event; 277 278 event = get_new_event(task, timestamp); 279 event->type = SCHED_EVENT_WAKEUP; 280 event->wakee = wakee; 281 282 wakee_event = last_event(wakee); 283 if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) { 284 targetless_wakeups++; 285 return; 286 } 287 if (wakee_event->wait_sem) { 288 multitarget_wakeups++; 289 return; 290 } 291 292 wakee_event->wait_sem = zalloc(sizeof(*wakee_event->wait_sem)); 293 sem_init(wakee_event->wait_sem, 0, 0); 294 wakee_event->specific_wait = 1; 295 event->wait_sem = wakee_event->wait_sem; 296 297 nr_wakeup_events++; 298 } 299 300 static void 301 add_sched_event_sleep(struct task_desc *task, u64 timestamp, 302 u64 task_state __used) 303 { 304 struct sched_atom *event = get_new_event(task, timestamp); 305 306 event->type = SCHED_EVENT_SLEEP; 307 308 nr_sleep_events++; 309 } 310 311 static struct task_desc *register_pid(unsigned long pid, const char *comm) 312 { 313 struct task_desc *task; 314 315 BUG_ON(pid >= MAX_PID); 316 317 task = pid_to_task[pid]; 318 319 if (task) 320 return task; 321 322 task = zalloc(sizeof(*task)); 323 task->pid = pid; 324 task->nr = nr_tasks; 325 strcpy(task->comm, comm); 326 /* 327 * every task starts in sleeping state - this gets ignored 328 * if there's no wakeup pointing to this sleep state: 329 */ 330 add_sched_event_sleep(task, 0, 0); 331 332 pid_to_task[pid] = task; 333 nr_tasks++; 334 tasks = realloc(tasks, nr_tasks*sizeof(struct task_task *)); 335 BUG_ON(!tasks); 336 tasks[task->nr] = task; 337 338 if (verbose) 339 printf("registered task #%ld, PID %ld (%s)\n", nr_tasks, pid, comm); 340 341 return task; 342 } 343 344 345 static void print_task_traces(void) 346 { 347 struct task_desc *task; 348 unsigned long i; 349 350 for (i = 0; i < nr_tasks; i++) { 351 task = tasks[i]; 352 printf("task %6ld (%20s:%10ld), nr_events: %ld\n", 353 task->nr, task->comm, task->pid, task->nr_events); 354 } 355 } 356 357 static void add_cross_task_wakeups(void) 358 { 359 struct task_desc *task1, *task2; 360 unsigned long i, j; 361 362 for (i = 0; i < nr_tasks; i++) { 363 task1 = tasks[i]; 364 j = i + 1; 365 if (j == nr_tasks) 366 j = 0; 367 task2 = tasks[j]; 368 add_sched_event_wakeup(task1, 0, task2); 369 } 370 } 371 372 static void 373 process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom) 374 { 375 int ret = 0; 376 377 switch (atom->type) { 378 case SCHED_EVENT_RUN: 379 burn_nsecs(atom->duration); 380 break; 381 case SCHED_EVENT_SLEEP: 382 if (atom->wait_sem) 383 ret = sem_wait(atom->wait_sem); 384 BUG_ON(ret); 385 break; 386 case SCHED_EVENT_WAKEUP: 387 if (atom->wait_sem) 388 ret = sem_post(atom->wait_sem); 389 BUG_ON(ret); 390 break; 391 case SCHED_EVENT_MIGRATION: 392 break; 393 default: 394 BUG_ON(1); 395 } 396 } 397 398 static u64 get_cpu_usage_nsec_parent(void) 399 { 400 struct rusage ru; 401 u64 sum; 402 int err; 403 404 err = getrusage(RUSAGE_SELF, &ru); 405 BUG_ON(err); 406 407 sum = ru.ru_utime.tv_sec*1e9 + ru.ru_utime.tv_usec*1e3; 408 sum += ru.ru_stime.tv_sec*1e9 + ru.ru_stime.tv_usec*1e3; 409 410 return sum; 411 } 412 413 static int self_open_counters(void) 414 { 415 struct perf_event_attr attr; 416 int fd; 417 418 memset(&attr, 0, sizeof(attr)); 419 420 attr.type = PERF_TYPE_SOFTWARE; 421 attr.config = PERF_COUNT_SW_TASK_CLOCK; 422 423 fd = sys_perf_event_open(&attr, 0, -1, -1, 0); 424 425 if (fd < 0) 426 die("Error: sys_perf_event_open() syscall returned" 427 "with %d (%s)\n", fd, strerror(errno)); 428 return fd; 429 } 430 431 static u64 get_cpu_usage_nsec_self(int fd) 432 { 433 u64 runtime; 434 int ret; 435 436 ret = read(fd, &runtime, sizeof(runtime)); 437 BUG_ON(ret != sizeof(runtime)); 438 439 return runtime; 440 } 441 442 static void *thread_func(void *ctx) 443 { 444 struct task_desc *this_task = ctx; 445 u64 cpu_usage_0, cpu_usage_1; 446 unsigned long i, ret; 447 char comm2[22]; 448 int fd; 449 450 sprintf(comm2, ":%s", this_task->comm); 451 prctl(PR_SET_NAME, comm2); 452 fd = self_open_counters(); 453 454 again: 455 ret = sem_post(&this_task->ready_for_work); 456 BUG_ON(ret); 457 ret = pthread_mutex_lock(&start_work_mutex); 458 BUG_ON(ret); 459 ret = pthread_mutex_unlock(&start_work_mutex); 460 BUG_ON(ret); 461 462 cpu_usage_0 = get_cpu_usage_nsec_self(fd); 463 464 for (i = 0; i < this_task->nr_events; i++) { 465 this_task->curr_event = i; 466 process_sched_event(this_task, this_task->atoms[i]); 467 } 468 469 cpu_usage_1 = get_cpu_usage_nsec_self(fd); 470 this_task->cpu_usage = cpu_usage_1 - cpu_usage_0; 471 ret = sem_post(&this_task->work_done_sem); 472 BUG_ON(ret); 473 474 ret = pthread_mutex_lock(&work_done_wait_mutex); 475 BUG_ON(ret); 476 ret = pthread_mutex_unlock(&work_done_wait_mutex); 477 BUG_ON(ret); 478 479 goto again; 480 } 481 482 static void create_tasks(void) 483 { 484 struct task_desc *task; 485 pthread_attr_t attr; 486 unsigned long i; 487 int err; 488 489 err = pthread_attr_init(&attr); 490 BUG_ON(err); 491 err = pthread_attr_setstacksize(&attr, 492 (size_t) max(16 * 1024, PTHREAD_STACK_MIN)); 493 BUG_ON(err); 494 err = pthread_mutex_lock(&start_work_mutex); 495 BUG_ON(err); 496 err = pthread_mutex_lock(&work_done_wait_mutex); 497 BUG_ON(err); 498 for (i = 0; i < nr_tasks; i++) { 499 task = tasks[i]; 500 sem_init(&task->sleep_sem, 0, 0); 501 sem_init(&task->ready_for_work, 0, 0); 502 sem_init(&task->work_done_sem, 0, 0); 503 task->curr_event = 0; 504 err = pthread_create(&task->thread, &attr, thread_func, task); 505 BUG_ON(err); 506 } 507 } 508 509 static void wait_for_tasks(void) 510 { 511 u64 cpu_usage_0, cpu_usage_1; 512 struct task_desc *task; 513 unsigned long i, ret; 514 515 start_time = get_nsecs(); 516 cpu_usage = 0; 517 pthread_mutex_unlock(&work_done_wait_mutex); 518 519 for (i = 0; i < nr_tasks; i++) { 520 task = tasks[i]; 521 ret = sem_wait(&task->ready_for_work); 522 BUG_ON(ret); 523 sem_init(&task->ready_for_work, 0, 0); 524 } 525 ret = pthread_mutex_lock(&work_done_wait_mutex); 526 BUG_ON(ret); 527 528 cpu_usage_0 = get_cpu_usage_nsec_parent(); 529 530 pthread_mutex_unlock(&start_work_mutex); 531 532 for (i = 0; i < nr_tasks; i++) { 533 task = tasks[i]; 534 ret = sem_wait(&task->work_done_sem); 535 BUG_ON(ret); 536 sem_init(&task->work_done_sem, 0, 0); 537 cpu_usage += task->cpu_usage; 538 task->cpu_usage = 0; 539 } 540 541 cpu_usage_1 = get_cpu_usage_nsec_parent(); 542 if (!runavg_cpu_usage) 543 runavg_cpu_usage = cpu_usage; 544 runavg_cpu_usage = (runavg_cpu_usage*9 + cpu_usage)/10; 545 546 parent_cpu_usage = cpu_usage_1 - cpu_usage_0; 547 if (!runavg_parent_cpu_usage) 548 runavg_parent_cpu_usage = parent_cpu_usage; 549 runavg_parent_cpu_usage = (runavg_parent_cpu_usage*9 + 550 parent_cpu_usage)/10; 551 552 ret = pthread_mutex_lock(&start_work_mutex); 553 BUG_ON(ret); 554 555 for (i = 0; i < nr_tasks; i++) { 556 task = tasks[i]; 557 sem_init(&task->sleep_sem, 0, 0); 558 task->curr_event = 0; 559 } 560 } 561 562 static void run_one_test(void) 563 { 564 u64 T0, T1, delta, avg_delta, fluct; 565 566 T0 = get_nsecs(); 567 wait_for_tasks(); 568 T1 = get_nsecs(); 569 570 delta = T1 - T0; 571 sum_runtime += delta; 572 nr_runs++; 573 574 avg_delta = sum_runtime / nr_runs; 575 if (delta < avg_delta) 576 fluct = avg_delta - delta; 577 else 578 fluct = delta - avg_delta; 579 sum_fluct += fluct; 580 if (!run_avg) 581 run_avg = delta; 582 run_avg = (run_avg*9 + delta)/10; 583 584 printf("#%-3ld: %0.3f, ", 585 nr_runs, (double)delta/1000000.0); 586 587 printf("ravg: %0.2f, ", 588 (double)run_avg/1e6); 589 590 printf("cpu: %0.2f / %0.2f", 591 (double)cpu_usage/1e6, (double)runavg_cpu_usage/1e6); 592 593 #if 0 594 /* 595 * rusage statistics done by the parent, these are less 596 * accurate than the sum_exec_runtime based statistics: 597 */ 598 printf(" [%0.2f / %0.2f]", 599 (double)parent_cpu_usage/1e6, 600 (double)runavg_parent_cpu_usage/1e6); 601 #endif 602 603 printf("\n"); 604 605 if (nr_sleep_corrections) 606 printf(" (%ld sleep corrections)\n", nr_sleep_corrections); 607 nr_sleep_corrections = 0; 608 } 609 610 static void test_calibrations(void) 611 { 612 u64 T0, T1; 613 614 T0 = get_nsecs(); 615 burn_nsecs(1e6); 616 T1 = get_nsecs(); 617 618 printf("the run test took %" PRIu64 " nsecs\n", T1 - T0); 619 620 T0 = get_nsecs(); 621 sleep_nsecs(1e6); 622 T1 = get_nsecs(); 623 624 printf("the sleep test took %" PRIu64 " nsecs\n", T1 - T0); 625 } 626 627 #define FILL_FIELD(ptr, field, event, data) \ 628 ptr.field = (typeof(ptr.field)) raw_field_value(event, #field, data) 629 630 #define FILL_ARRAY(ptr, array, event, data) \ 631 do { \ 632 void *__array = raw_field_ptr(event, #array, data); \ 633 memcpy(ptr.array, __array, sizeof(ptr.array)); \ 634 } while(0) 635 636 #define FILL_COMMON_FIELDS(ptr, event, data) \ 637 do { \ 638 FILL_FIELD(ptr, common_type, event, data); \ 639 FILL_FIELD(ptr, common_flags, event, data); \ 640 FILL_FIELD(ptr, common_preempt_count, event, data); \ 641 FILL_FIELD(ptr, common_pid, event, data); \ 642 FILL_FIELD(ptr, common_tgid, event, data); \ 643 } while (0) 644 645 646 647 struct trace_switch_event { 648 u32 size; 649 650 u16 common_type; 651 u8 common_flags; 652 u8 common_preempt_count; 653 u32 common_pid; 654 u32 common_tgid; 655 656 char prev_comm[16]; 657 u32 prev_pid; 658 u32 prev_prio; 659 u64 prev_state; 660 char next_comm[16]; 661 u32 next_pid; 662 u32 next_prio; 663 }; 664 665 struct trace_runtime_event { 666 u32 size; 667 668 u16 common_type; 669 u8 common_flags; 670 u8 common_preempt_count; 671 u32 common_pid; 672 u32 common_tgid; 673 674 char comm[16]; 675 u32 pid; 676 u64 runtime; 677 u64 vruntime; 678 }; 679 680 struct trace_wakeup_event { 681 u32 size; 682 683 u16 common_type; 684 u8 common_flags; 685 u8 common_preempt_count; 686 u32 common_pid; 687 u32 common_tgid; 688 689 char comm[16]; 690 u32 pid; 691 692 u32 prio; 693 u32 success; 694 u32 cpu; 695 }; 696 697 struct trace_fork_event { 698 u32 size; 699 700 u16 common_type; 701 u8 common_flags; 702 u8 common_preempt_count; 703 u32 common_pid; 704 u32 common_tgid; 705 706 char parent_comm[16]; 707 u32 parent_pid; 708 char child_comm[16]; 709 u32 child_pid; 710 }; 711 712 struct trace_migrate_task_event { 713 u32 size; 714 715 u16 common_type; 716 u8 common_flags; 717 u8 common_preempt_count; 718 u32 common_pid; 719 u32 common_tgid; 720 721 char comm[16]; 722 u32 pid; 723 724 u32 prio; 725 u32 cpu; 726 }; 727 728 struct trace_sched_handler { 729 void (*switch_event)(struct trace_switch_event *, 730 struct machine *, 731 struct event_format *, 732 int cpu, 733 u64 timestamp, 734 struct thread *thread); 735 736 void (*runtime_event)(struct trace_runtime_event *, 737 struct machine *, 738 struct event_format *, 739 int cpu, 740 u64 timestamp, 741 struct thread *thread); 742 743 void (*wakeup_event)(struct trace_wakeup_event *, 744 struct machine *, 745 struct event_format *, 746 int cpu, 747 u64 timestamp, 748 struct thread *thread); 749 750 void (*fork_event)(struct trace_fork_event *, 751 struct event_format *, 752 int cpu, 753 u64 timestamp, 754 struct thread *thread); 755 756 void (*migrate_task_event)(struct trace_migrate_task_event *, 757 struct machine *machine, 758 struct event_format *, 759 int cpu, 760 u64 timestamp, 761 struct thread *thread); 762 }; 763 764 765 static void 766 replay_wakeup_event(struct trace_wakeup_event *wakeup_event, 767 struct machine *machine __used, 768 struct event_format *event, 769 int cpu __used, 770 u64 timestamp __used, 771 struct thread *thread __used) 772 { 773 struct task_desc *waker, *wakee; 774 775 if (verbose) { 776 printf("sched_wakeup event %p\n", event); 777 778 printf(" ... pid %d woke up %s/%d\n", 779 wakeup_event->common_pid, 780 wakeup_event->comm, 781 wakeup_event->pid); 782 } 783 784 waker = register_pid(wakeup_event->common_pid, "<unknown>"); 785 wakee = register_pid(wakeup_event->pid, wakeup_event->comm); 786 787 add_sched_event_wakeup(waker, timestamp, wakee); 788 } 789 790 static u64 cpu_last_switched[MAX_CPUS]; 791 792 static void 793 replay_switch_event(struct trace_switch_event *switch_event, 794 struct machine *machine __used, 795 struct event_format *event, 796 int cpu, 797 u64 timestamp, 798 struct thread *thread __used) 799 { 800 struct task_desc *prev, __used *next; 801 u64 timestamp0; 802 s64 delta; 803 804 if (verbose) 805 printf("sched_switch event %p\n", event); 806 807 if (cpu >= MAX_CPUS || cpu < 0) 808 return; 809 810 timestamp0 = cpu_last_switched[cpu]; 811 if (timestamp0) 812 delta = timestamp - timestamp0; 813 else 814 delta = 0; 815 816 if (delta < 0) 817 die("hm, delta: %" PRIu64 " < 0 ?\n", delta); 818 819 if (verbose) { 820 printf(" ... switch from %s/%d to %s/%d [ran %" PRIu64 " nsecs]\n", 821 switch_event->prev_comm, switch_event->prev_pid, 822 switch_event->next_comm, switch_event->next_pid, 823 delta); 824 } 825 826 prev = register_pid(switch_event->prev_pid, switch_event->prev_comm); 827 next = register_pid(switch_event->next_pid, switch_event->next_comm); 828 829 cpu_last_switched[cpu] = timestamp; 830 831 add_sched_event_run(prev, timestamp, delta); 832 add_sched_event_sleep(prev, timestamp, switch_event->prev_state); 833 } 834 835 836 static void 837 replay_fork_event(struct trace_fork_event *fork_event, 838 struct event_format *event, 839 int cpu __used, 840 u64 timestamp __used, 841 struct thread *thread __used) 842 { 843 if (verbose) { 844 printf("sched_fork event %p\n", event); 845 printf("... parent: %s/%d\n", fork_event->parent_comm, fork_event->parent_pid); 846 printf("... child: %s/%d\n", fork_event->child_comm, fork_event->child_pid); 847 } 848 register_pid(fork_event->parent_pid, fork_event->parent_comm); 849 register_pid(fork_event->child_pid, fork_event->child_comm); 850 } 851 852 static struct trace_sched_handler replay_ops = { 853 .wakeup_event = replay_wakeup_event, 854 .switch_event = replay_switch_event, 855 .fork_event = replay_fork_event, 856 }; 857 858 struct sort_dimension { 859 const char *name; 860 sort_fn_t cmp; 861 struct list_head list; 862 }; 863 864 static LIST_HEAD(cmp_pid); 865 866 static int 867 thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r) 868 { 869 struct sort_dimension *sort; 870 int ret = 0; 871 872 BUG_ON(list_empty(list)); 873 874 list_for_each_entry(sort, list, list) { 875 ret = sort->cmp(l, r); 876 if (ret) 877 return ret; 878 } 879 880 return ret; 881 } 882 883 static struct work_atoms * 884 thread_atoms_search(struct rb_root *root, struct thread *thread, 885 struct list_head *sort_list) 886 { 887 struct rb_node *node = root->rb_node; 888 struct work_atoms key = { .thread = thread }; 889 890 while (node) { 891 struct work_atoms *atoms; 892 int cmp; 893 894 atoms = container_of(node, struct work_atoms, node); 895 896 cmp = thread_lat_cmp(sort_list, &key, atoms); 897 if (cmp > 0) 898 node = node->rb_left; 899 else if (cmp < 0) 900 node = node->rb_right; 901 else { 902 BUG_ON(thread != atoms->thread); 903 return atoms; 904 } 905 } 906 return NULL; 907 } 908 909 static void 910 __thread_latency_insert(struct rb_root *root, struct work_atoms *data, 911 struct list_head *sort_list) 912 { 913 struct rb_node **new = &(root->rb_node), *parent = NULL; 914 915 while (*new) { 916 struct work_atoms *this; 917 int cmp; 918 919 this = container_of(*new, struct work_atoms, node); 920 parent = *new; 921 922 cmp = thread_lat_cmp(sort_list, data, this); 923 924 if (cmp > 0) 925 new = &((*new)->rb_left); 926 else 927 new = &((*new)->rb_right); 928 } 929 930 rb_link_node(&data->node, parent, new); 931 rb_insert_color(&data->node, root); 932 } 933 934 static void thread_atoms_insert(struct thread *thread) 935 { 936 struct work_atoms *atoms = zalloc(sizeof(*atoms)); 937 if (!atoms) 938 die("No memory"); 939 940 atoms->thread = thread; 941 INIT_LIST_HEAD(&atoms->work_list); 942 __thread_latency_insert(&atom_root, atoms, &cmp_pid); 943 } 944 945 static void 946 latency_fork_event(struct trace_fork_event *fork_event __used, 947 struct event_format *event __used, 948 int cpu __used, 949 u64 timestamp __used, 950 struct thread *thread __used) 951 { 952 /* should insert the newcomer */ 953 } 954 955 __used 956 static char sched_out_state(struct trace_switch_event *switch_event) 957 { 958 const char *str = TASK_STATE_TO_CHAR_STR; 959 960 return str[switch_event->prev_state]; 961 } 962 963 static void 964 add_sched_out_event(struct work_atoms *atoms, 965 char run_state, 966 u64 timestamp) 967 { 968 struct work_atom *atom = zalloc(sizeof(*atom)); 969 if (!atom) 970 die("Non memory"); 971 972 atom->sched_out_time = timestamp; 973 974 if (run_state == 'R') { 975 atom->state = THREAD_WAIT_CPU; 976 atom->wake_up_time = atom->sched_out_time; 977 } 978 979 list_add_tail(&atom->list, &atoms->work_list); 980 } 981 982 static void 983 add_runtime_event(struct work_atoms *atoms, u64 delta, u64 timestamp __used) 984 { 985 struct work_atom *atom; 986 987 BUG_ON(list_empty(&atoms->work_list)); 988 989 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 990 991 atom->runtime += delta; 992 atoms->total_runtime += delta; 993 } 994 995 static void 996 add_sched_in_event(struct work_atoms *atoms, u64 timestamp) 997 { 998 struct work_atom *atom; 999 u64 delta; 1000 1001 if (list_empty(&atoms->work_list)) 1002 return; 1003 1004 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1005 1006 if (atom->state != THREAD_WAIT_CPU) 1007 return; 1008 1009 if (timestamp < atom->wake_up_time) { 1010 atom->state = THREAD_IGNORE; 1011 return; 1012 } 1013 1014 atom->state = THREAD_SCHED_IN; 1015 atom->sched_in_time = timestamp; 1016 1017 delta = atom->sched_in_time - atom->wake_up_time; 1018 atoms->total_lat += delta; 1019 if (delta > atoms->max_lat) { 1020 atoms->max_lat = delta; 1021 atoms->max_lat_at = timestamp; 1022 } 1023 atoms->nb_atoms++; 1024 } 1025 1026 static void 1027 latency_switch_event(struct trace_switch_event *switch_event, 1028 struct machine *machine, 1029 struct event_format *event __used, 1030 int cpu, 1031 u64 timestamp, 1032 struct thread *thread __used) 1033 { 1034 struct work_atoms *out_events, *in_events; 1035 struct thread *sched_out, *sched_in; 1036 u64 timestamp0; 1037 s64 delta; 1038 1039 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1040 1041 timestamp0 = cpu_last_switched[cpu]; 1042 cpu_last_switched[cpu] = timestamp; 1043 if (timestamp0) 1044 delta = timestamp - timestamp0; 1045 else 1046 delta = 0; 1047 1048 if (delta < 0) 1049 die("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1050 1051 1052 sched_out = machine__findnew_thread(machine, switch_event->prev_pid); 1053 sched_in = machine__findnew_thread(machine, switch_event->next_pid); 1054 1055 out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); 1056 if (!out_events) { 1057 thread_atoms_insert(sched_out); 1058 out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); 1059 if (!out_events) 1060 die("out-event: Internal tree error"); 1061 } 1062 add_sched_out_event(out_events, sched_out_state(switch_event), timestamp); 1063 1064 in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid); 1065 if (!in_events) { 1066 thread_atoms_insert(sched_in); 1067 in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid); 1068 if (!in_events) 1069 die("in-event: Internal tree error"); 1070 /* 1071 * Take came in we have not heard about yet, 1072 * add in an initial atom in runnable state: 1073 */ 1074 add_sched_out_event(in_events, 'R', timestamp); 1075 } 1076 add_sched_in_event(in_events, timestamp); 1077 } 1078 1079 static void 1080 latency_runtime_event(struct trace_runtime_event *runtime_event, 1081 struct machine *machine, 1082 struct event_format *event __used, 1083 int cpu, 1084 u64 timestamp, 1085 struct thread *this_thread __used) 1086 { 1087 struct thread *thread = machine__findnew_thread(machine, runtime_event->pid); 1088 struct work_atoms *atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); 1089 1090 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1091 if (!atoms) { 1092 thread_atoms_insert(thread); 1093 atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); 1094 if (!atoms) 1095 die("in-event: Internal tree error"); 1096 add_sched_out_event(atoms, 'R', timestamp); 1097 } 1098 1099 add_runtime_event(atoms, runtime_event->runtime, timestamp); 1100 } 1101 1102 static void 1103 latency_wakeup_event(struct trace_wakeup_event *wakeup_event, 1104 struct machine *machine, 1105 struct event_format *__event __used, 1106 int cpu __used, 1107 u64 timestamp, 1108 struct thread *thread __used) 1109 { 1110 struct work_atoms *atoms; 1111 struct work_atom *atom; 1112 struct thread *wakee; 1113 1114 /* Note for later, it may be interesting to observe the failing cases */ 1115 if (!wakeup_event->success) 1116 return; 1117 1118 wakee = machine__findnew_thread(machine, wakeup_event->pid); 1119 atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); 1120 if (!atoms) { 1121 thread_atoms_insert(wakee); 1122 atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); 1123 if (!atoms) 1124 die("wakeup-event: Internal tree error"); 1125 add_sched_out_event(atoms, 'S', timestamp); 1126 } 1127 1128 BUG_ON(list_empty(&atoms->work_list)); 1129 1130 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1131 1132 /* 1133 * You WILL be missing events if you've recorded only 1134 * one CPU, or are only looking at only one, so don't 1135 * make useless noise. 1136 */ 1137 if (profile_cpu == -1 && atom->state != THREAD_SLEEPING) 1138 nr_state_machine_bugs++; 1139 1140 nr_timestamps++; 1141 if (atom->sched_out_time > timestamp) { 1142 nr_unordered_timestamps++; 1143 return; 1144 } 1145 1146 atom->state = THREAD_WAIT_CPU; 1147 atom->wake_up_time = timestamp; 1148 } 1149 1150 static void 1151 latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event, 1152 struct machine *machine, 1153 struct event_format *__event __used, 1154 int cpu __used, 1155 u64 timestamp, 1156 struct thread *thread __used) 1157 { 1158 struct work_atoms *atoms; 1159 struct work_atom *atom; 1160 struct thread *migrant; 1161 1162 /* 1163 * Only need to worry about migration when profiling one CPU. 1164 */ 1165 if (profile_cpu == -1) 1166 return; 1167 1168 migrant = machine__findnew_thread(machine, migrate_task_event->pid); 1169 atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid); 1170 if (!atoms) { 1171 thread_atoms_insert(migrant); 1172 register_pid(migrant->pid, migrant->comm); 1173 atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid); 1174 if (!atoms) 1175 die("migration-event: Internal tree error"); 1176 add_sched_out_event(atoms, 'R', timestamp); 1177 } 1178 1179 BUG_ON(list_empty(&atoms->work_list)); 1180 1181 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1182 atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp; 1183 1184 nr_timestamps++; 1185 1186 if (atom->sched_out_time > timestamp) 1187 nr_unordered_timestamps++; 1188 } 1189 1190 static struct trace_sched_handler lat_ops = { 1191 .wakeup_event = latency_wakeup_event, 1192 .switch_event = latency_switch_event, 1193 .runtime_event = latency_runtime_event, 1194 .fork_event = latency_fork_event, 1195 .migrate_task_event = latency_migrate_task_event, 1196 }; 1197 1198 static void output_lat_thread(struct work_atoms *work_list) 1199 { 1200 int i; 1201 int ret; 1202 u64 avg; 1203 1204 if (!work_list->nb_atoms) 1205 return; 1206 /* 1207 * Ignore idle threads: 1208 */ 1209 if (!strcmp(work_list->thread->comm, "swapper")) 1210 return; 1211 1212 all_runtime += work_list->total_runtime; 1213 all_count += work_list->nb_atoms; 1214 1215 ret = printf(" %s:%d ", work_list->thread->comm, work_list->thread->pid); 1216 1217 for (i = 0; i < 24 - ret; i++) 1218 printf(" "); 1219 1220 avg = work_list->total_lat / work_list->nb_atoms; 1221 1222 printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %9.6f s\n", 1223 (double)work_list->total_runtime / 1e6, 1224 work_list->nb_atoms, (double)avg / 1e6, 1225 (double)work_list->max_lat / 1e6, 1226 (double)work_list->max_lat_at / 1e9); 1227 } 1228 1229 static int pid_cmp(struct work_atoms *l, struct work_atoms *r) 1230 { 1231 if (l->thread->pid < r->thread->pid) 1232 return -1; 1233 if (l->thread->pid > r->thread->pid) 1234 return 1; 1235 1236 return 0; 1237 } 1238 1239 static struct sort_dimension pid_sort_dimension = { 1240 .name = "pid", 1241 .cmp = pid_cmp, 1242 }; 1243 1244 static int avg_cmp(struct work_atoms *l, struct work_atoms *r) 1245 { 1246 u64 avgl, avgr; 1247 1248 if (!l->nb_atoms) 1249 return -1; 1250 1251 if (!r->nb_atoms) 1252 return 1; 1253 1254 avgl = l->total_lat / l->nb_atoms; 1255 avgr = r->total_lat / r->nb_atoms; 1256 1257 if (avgl < avgr) 1258 return -1; 1259 if (avgl > avgr) 1260 return 1; 1261 1262 return 0; 1263 } 1264 1265 static struct sort_dimension avg_sort_dimension = { 1266 .name = "avg", 1267 .cmp = avg_cmp, 1268 }; 1269 1270 static int max_cmp(struct work_atoms *l, struct work_atoms *r) 1271 { 1272 if (l->max_lat < r->max_lat) 1273 return -1; 1274 if (l->max_lat > r->max_lat) 1275 return 1; 1276 1277 return 0; 1278 } 1279 1280 static struct sort_dimension max_sort_dimension = { 1281 .name = "max", 1282 .cmp = max_cmp, 1283 }; 1284 1285 static int switch_cmp(struct work_atoms *l, struct work_atoms *r) 1286 { 1287 if (l->nb_atoms < r->nb_atoms) 1288 return -1; 1289 if (l->nb_atoms > r->nb_atoms) 1290 return 1; 1291 1292 return 0; 1293 } 1294 1295 static struct sort_dimension switch_sort_dimension = { 1296 .name = "switch", 1297 .cmp = switch_cmp, 1298 }; 1299 1300 static int runtime_cmp(struct work_atoms *l, struct work_atoms *r) 1301 { 1302 if (l->total_runtime < r->total_runtime) 1303 return -1; 1304 if (l->total_runtime > r->total_runtime) 1305 return 1; 1306 1307 return 0; 1308 } 1309 1310 static struct sort_dimension runtime_sort_dimension = { 1311 .name = "runtime", 1312 .cmp = runtime_cmp, 1313 }; 1314 1315 static struct sort_dimension *available_sorts[] = { 1316 &pid_sort_dimension, 1317 &avg_sort_dimension, 1318 &max_sort_dimension, 1319 &switch_sort_dimension, 1320 &runtime_sort_dimension, 1321 }; 1322 1323 #define NB_AVAILABLE_SORTS (int)(sizeof(available_sorts) / sizeof(struct sort_dimension *)) 1324 1325 static LIST_HEAD(sort_list); 1326 1327 static int sort_dimension__add(const char *tok, struct list_head *list) 1328 { 1329 int i; 1330 1331 for (i = 0; i < NB_AVAILABLE_SORTS; i++) { 1332 if (!strcmp(available_sorts[i]->name, tok)) { 1333 list_add_tail(&available_sorts[i]->list, list); 1334 1335 return 0; 1336 } 1337 } 1338 1339 return -1; 1340 } 1341 1342 static void setup_sorting(void); 1343 1344 static void sort_lat(void) 1345 { 1346 struct rb_node *node; 1347 1348 for (;;) { 1349 struct work_atoms *data; 1350 node = rb_first(&atom_root); 1351 if (!node) 1352 break; 1353 1354 rb_erase(node, &atom_root); 1355 data = rb_entry(node, struct work_atoms, node); 1356 __thread_latency_insert(&sorted_atom_root, data, &sort_list); 1357 } 1358 } 1359 1360 static struct trace_sched_handler *trace_handler; 1361 1362 static void 1363 process_sched_wakeup_event(struct perf_tool *tool __used, 1364 struct event_format *event, 1365 struct perf_sample *sample, 1366 struct machine *machine, 1367 struct thread *thread) 1368 { 1369 void *data = sample->raw_data; 1370 struct trace_wakeup_event wakeup_event; 1371 1372 FILL_COMMON_FIELDS(wakeup_event, event, data); 1373 1374 FILL_ARRAY(wakeup_event, comm, event, data); 1375 FILL_FIELD(wakeup_event, pid, event, data); 1376 FILL_FIELD(wakeup_event, prio, event, data); 1377 FILL_FIELD(wakeup_event, success, event, data); 1378 FILL_FIELD(wakeup_event, cpu, event, data); 1379 1380 if (trace_handler->wakeup_event) 1381 trace_handler->wakeup_event(&wakeup_event, machine, event, 1382 sample->cpu, sample->time, thread); 1383 } 1384 1385 /* 1386 * Track the current task - that way we can know whether there's any 1387 * weird events, such as a task being switched away that is not current. 1388 */ 1389 static int max_cpu; 1390 1391 static u32 curr_pid[MAX_CPUS] = { [0 ... MAX_CPUS-1] = -1 }; 1392 1393 static struct thread *curr_thread[MAX_CPUS]; 1394 1395 static char next_shortname1 = 'A'; 1396 static char next_shortname2 = '0'; 1397 1398 static void 1399 map_switch_event(struct trace_switch_event *switch_event, 1400 struct machine *machine, 1401 struct event_format *event __used, 1402 int this_cpu, 1403 u64 timestamp, 1404 struct thread *thread __used) 1405 { 1406 struct thread *sched_out __used, *sched_in; 1407 int new_shortname; 1408 u64 timestamp0; 1409 s64 delta; 1410 int cpu; 1411 1412 BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0); 1413 1414 if (this_cpu > max_cpu) 1415 max_cpu = this_cpu; 1416 1417 timestamp0 = cpu_last_switched[this_cpu]; 1418 cpu_last_switched[this_cpu] = timestamp; 1419 if (timestamp0) 1420 delta = timestamp - timestamp0; 1421 else 1422 delta = 0; 1423 1424 if (delta < 0) 1425 die("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1426 1427 1428 sched_out = machine__findnew_thread(machine, switch_event->prev_pid); 1429 sched_in = machine__findnew_thread(machine, switch_event->next_pid); 1430 1431 curr_thread[this_cpu] = sched_in; 1432 1433 printf(" "); 1434 1435 new_shortname = 0; 1436 if (!sched_in->shortname[0]) { 1437 sched_in->shortname[0] = next_shortname1; 1438 sched_in->shortname[1] = next_shortname2; 1439 1440 if (next_shortname1 < 'Z') { 1441 next_shortname1++; 1442 } else { 1443 next_shortname1='A'; 1444 if (next_shortname2 < '9') { 1445 next_shortname2++; 1446 } else { 1447 next_shortname2='0'; 1448 } 1449 } 1450 new_shortname = 1; 1451 } 1452 1453 for (cpu = 0; cpu <= max_cpu; cpu++) { 1454 if (cpu != this_cpu) 1455 printf(" "); 1456 else 1457 printf("*"); 1458 1459 if (curr_thread[cpu]) { 1460 if (curr_thread[cpu]->pid) 1461 printf("%2s ", curr_thread[cpu]->shortname); 1462 else 1463 printf(". "); 1464 } else 1465 printf(" "); 1466 } 1467 1468 printf(" %12.6f secs ", (double)timestamp/1e9); 1469 if (new_shortname) { 1470 printf("%s => %s:%d\n", 1471 sched_in->shortname, sched_in->comm, sched_in->pid); 1472 } else { 1473 printf("\n"); 1474 } 1475 } 1476 1477 static void 1478 process_sched_switch_event(struct perf_tool *tool __used, 1479 struct event_format *event, 1480 struct perf_sample *sample, 1481 struct machine *machine, 1482 struct thread *thread) 1483 { 1484 int this_cpu = sample->cpu; 1485 void *data = sample->raw_data; 1486 struct trace_switch_event switch_event; 1487 1488 FILL_COMMON_FIELDS(switch_event, event, data); 1489 1490 FILL_ARRAY(switch_event, prev_comm, event, data); 1491 FILL_FIELD(switch_event, prev_pid, event, data); 1492 FILL_FIELD(switch_event, prev_prio, event, data); 1493 FILL_FIELD(switch_event, prev_state, event, data); 1494 FILL_ARRAY(switch_event, next_comm, event, data); 1495 FILL_FIELD(switch_event, next_pid, event, data); 1496 FILL_FIELD(switch_event, next_prio, event, data); 1497 1498 if (curr_pid[this_cpu] != (u32)-1) { 1499 /* 1500 * Are we trying to switch away a PID that is 1501 * not current? 1502 */ 1503 if (curr_pid[this_cpu] != switch_event.prev_pid) 1504 nr_context_switch_bugs++; 1505 } 1506 if (trace_handler->switch_event) 1507 trace_handler->switch_event(&switch_event, machine, event, 1508 this_cpu, sample->time, thread); 1509 1510 curr_pid[this_cpu] = switch_event.next_pid; 1511 } 1512 1513 static void 1514 process_sched_runtime_event(struct perf_tool *tool __used, 1515 struct event_format *event, 1516 struct perf_sample *sample, 1517 struct machine *machine, 1518 struct thread *thread) 1519 { 1520 void *data = sample->raw_data; 1521 struct trace_runtime_event runtime_event; 1522 1523 FILL_ARRAY(runtime_event, comm, event, data); 1524 FILL_FIELD(runtime_event, pid, event, data); 1525 FILL_FIELD(runtime_event, runtime, event, data); 1526 FILL_FIELD(runtime_event, vruntime, event, data); 1527 1528 if (trace_handler->runtime_event) 1529 trace_handler->runtime_event(&runtime_event, machine, event, 1530 sample->cpu, sample->time, thread); 1531 } 1532 1533 static void 1534 process_sched_fork_event(struct perf_tool *tool __used, 1535 struct event_format *event, 1536 struct perf_sample *sample, 1537 struct machine *machine __used, 1538 struct thread *thread) 1539 { 1540 void *data = sample->raw_data; 1541 struct trace_fork_event fork_event; 1542 1543 FILL_COMMON_FIELDS(fork_event, event, data); 1544 1545 FILL_ARRAY(fork_event, parent_comm, event, data); 1546 FILL_FIELD(fork_event, parent_pid, event, data); 1547 FILL_ARRAY(fork_event, child_comm, event, data); 1548 FILL_FIELD(fork_event, child_pid, event, data); 1549 1550 if (trace_handler->fork_event) 1551 trace_handler->fork_event(&fork_event, event, 1552 sample->cpu, sample->time, thread); 1553 } 1554 1555 static void 1556 process_sched_exit_event(struct perf_tool *tool __used, 1557 struct event_format *event, 1558 struct perf_sample *sample __used, 1559 struct machine *machine __used, 1560 struct thread *thread __used) 1561 { 1562 if (verbose) 1563 printf("sched_exit event %p\n", event); 1564 } 1565 1566 static void 1567 process_sched_migrate_task_event(struct perf_tool *tool __used, 1568 struct event_format *event, 1569 struct perf_sample *sample, 1570 struct machine *machine, 1571 struct thread *thread) 1572 { 1573 void *data = sample->raw_data; 1574 struct trace_migrate_task_event migrate_task_event; 1575 1576 FILL_COMMON_FIELDS(migrate_task_event, event, data); 1577 1578 FILL_ARRAY(migrate_task_event, comm, event, data); 1579 FILL_FIELD(migrate_task_event, pid, event, data); 1580 FILL_FIELD(migrate_task_event, prio, event, data); 1581 FILL_FIELD(migrate_task_event, cpu, event, data); 1582 1583 if (trace_handler->migrate_task_event) 1584 trace_handler->migrate_task_event(&migrate_task_event, machine, 1585 event, sample->cpu, 1586 sample->time, thread); 1587 } 1588 1589 typedef void (*tracepoint_handler)(struct perf_tool *tool, struct event_format *event, 1590 struct perf_sample *sample, 1591 struct machine *machine, 1592 struct thread *thread); 1593 1594 static int perf_sched__process_tracepoint_sample(struct perf_tool *tool, 1595 union perf_event *event __used, 1596 struct perf_sample *sample, 1597 struct perf_evsel *evsel, 1598 struct machine *machine) 1599 { 1600 struct thread *thread = machine__findnew_thread(machine, sample->pid); 1601 1602 if (thread == NULL) { 1603 pr_debug("problem processing %s event, skipping it.\n", 1604 evsel->name); 1605 return -1; 1606 } 1607 1608 evsel->hists.stats.total_period += sample->period; 1609 hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); 1610 1611 if (evsel->handler.func != NULL) { 1612 tracepoint_handler f = evsel->handler.func; 1613 1614 if (evsel->handler.data == NULL) 1615 evsel->handler.data = trace_find_event(evsel->attr.config); 1616 1617 f(tool, evsel->handler.data, sample, machine, thread); 1618 } 1619 1620 return 0; 1621 } 1622 1623 static struct perf_tool perf_sched = { 1624 .sample = perf_sched__process_tracepoint_sample, 1625 .comm = perf_event__process_comm, 1626 .lost = perf_event__process_lost, 1627 .fork = perf_event__process_task, 1628 .ordered_samples = true, 1629 }; 1630 1631 static void read_events(bool destroy, struct perf_session **psession) 1632 { 1633 int err = -EINVAL; 1634 const struct perf_evsel_str_handler handlers[] = { 1635 { "sched:sched_switch", process_sched_switch_event, }, 1636 { "sched:sched_stat_runtime", process_sched_runtime_event, }, 1637 { "sched:sched_wakeup", process_sched_wakeup_event, }, 1638 { "sched:sched_wakeup_new", process_sched_wakeup_event, }, 1639 { "sched:sched_process_fork", process_sched_fork_event, }, 1640 { "sched:sched_process_exit", process_sched_exit_event, }, 1641 { "sched:sched_migrate_task", process_sched_migrate_task_event, }, 1642 }; 1643 struct perf_session *session = perf_session__new(input_name, O_RDONLY, 1644 0, false, &perf_sched); 1645 if (session == NULL) 1646 die("No Memory"); 1647 1648 err = perf_evlist__set_tracepoints_handlers_array(session->evlist, handlers); 1649 assert(err == 0); 1650 1651 if (perf_session__has_traces(session, "record -R")) { 1652 err = perf_session__process_events(session, &perf_sched); 1653 if (err) 1654 die("Failed to process events, error %d", err); 1655 1656 nr_events = session->hists.stats.nr_events[0]; 1657 nr_lost_events = session->hists.stats.total_lost; 1658 nr_lost_chunks = session->hists.stats.nr_events[PERF_RECORD_LOST]; 1659 } 1660 1661 if (destroy) 1662 perf_session__delete(session); 1663 1664 if (psession) 1665 *psession = session; 1666 } 1667 1668 static void print_bad_events(void) 1669 { 1670 if (nr_unordered_timestamps && nr_timestamps) { 1671 printf(" INFO: %.3f%% unordered timestamps (%ld out of %ld)\n", 1672 (double)nr_unordered_timestamps/(double)nr_timestamps*100.0, 1673 nr_unordered_timestamps, nr_timestamps); 1674 } 1675 if (nr_lost_events && nr_events) { 1676 printf(" INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n", 1677 (double)nr_lost_events/(double)nr_events*100.0, 1678 nr_lost_events, nr_events, nr_lost_chunks); 1679 } 1680 if (nr_state_machine_bugs && nr_timestamps) { 1681 printf(" INFO: %.3f%% state machine bugs (%ld out of %ld)", 1682 (double)nr_state_machine_bugs/(double)nr_timestamps*100.0, 1683 nr_state_machine_bugs, nr_timestamps); 1684 if (nr_lost_events) 1685 printf(" (due to lost events?)"); 1686 printf("\n"); 1687 } 1688 if (nr_context_switch_bugs && nr_timestamps) { 1689 printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)", 1690 (double)nr_context_switch_bugs/(double)nr_timestamps*100.0, 1691 nr_context_switch_bugs, nr_timestamps); 1692 if (nr_lost_events) 1693 printf(" (due to lost events?)"); 1694 printf("\n"); 1695 } 1696 } 1697 1698 static void __cmd_lat(void) 1699 { 1700 struct rb_node *next; 1701 struct perf_session *session; 1702 1703 setup_pager(); 1704 read_events(false, &session); 1705 sort_lat(); 1706 1707 printf("\n ---------------------------------------------------------------------------------------------------------------\n"); 1708 printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n"); 1709 printf(" ---------------------------------------------------------------------------------------------------------------\n"); 1710 1711 next = rb_first(&sorted_atom_root); 1712 1713 while (next) { 1714 struct work_atoms *work_list; 1715 1716 work_list = rb_entry(next, struct work_atoms, node); 1717 output_lat_thread(work_list); 1718 next = rb_next(next); 1719 } 1720 1721 printf(" -----------------------------------------------------------------------------------------\n"); 1722 printf(" TOTAL: |%11.3f ms |%9" PRIu64 " |\n", 1723 (double)all_runtime/1e6, all_count); 1724 1725 printf(" ---------------------------------------------------\n"); 1726 1727 print_bad_events(); 1728 printf("\n"); 1729 1730 perf_session__delete(session); 1731 } 1732 1733 static struct trace_sched_handler map_ops = { 1734 .wakeup_event = NULL, 1735 .switch_event = map_switch_event, 1736 .runtime_event = NULL, 1737 .fork_event = NULL, 1738 }; 1739 1740 static void __cmd_map(void) 1741 { 1742 max_cpu = sysconf(_SC_NPROCESSORS_CONF); 1743 1744 setup_pager(); 1745 read_events(true, NULL); 1746 print_bad_events(); 1747 } 1748 1749 static void __cmd_replay(void) 1750 { 1751 unsigned long i; 1752 1753 calibrate_run_measurement_overhead(); 1754 calibrate_sleep_measurement_overhead(); 1755 1756 test_calibrations(); 1757 1758 read_events(true, NULL); 1759 1760 printf("nr_run_events: %ld\n", nr_run_events); 1761 printf("nr_sleep_events: %ld\n", nr_sleep_events); 1762 printf("nr_wakeup_events: %ld\n", nr_wakeup_events); 1763 1764 if (targetless_wakeups) 1765 printf("target-less wakeups: %ld\n", targetless_wakeups); 1766 if (multitarget_wakeups) 1767 printf("multi-target wakeups: %ld\n", multitarget_wakeups); 1768 if (nr_run_events_optimized) 1769 printf("run atoms optimized: %ld\n", 1770 nr_run_events_optimized); 1771 1772 print_task_traces(); 1773 add_cross_task_wakeups(); 1774 1775 create_tasks(); 1776 printf("------------------------------------------------------------\n"); 1777 for (i = 0; i < replay_repeat; i++) 1778 run_one_test(); 1779 } 1780 1781 1782 static const char * const sched_usage[] = { 1783 "perf sched [<options>] {record|latency|map|replay|script}", 1784 NULL 1785 }; 1786 1787 static const struct option sched_options[] = { 1788 OPT_STRING('i', "input", &input_name, "file", 1789 "input file name"), 1790 OPT_INCR('v', "verbose", &verbose, 1791 "be more verbose (show symbol address, etc)"), 1792 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1793 "dump raw trace in ASCII"), 1794 OPT_END() 1795 }; 1796 1797 static const char * const latency_usage[] = { 1798 "perf sched latency [<options>]", 1799 NULL 1800 }; 1801 1802 static const struct option latency_options[] = { 1803 OPT_STRING('s', "sort", &sort_order, "key[,key2...]", 1804 "sort by key(s): runtime, switch, avg, max"), 1805 OPT_INCR('v', "verbose", &verbose, 1806 "be more verbose (show symbol address, etc)"), 1807 OPT_INTEGER('C', "CPU", &profile_cpu, 1808 "CPU to profile on"), 1809 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1810 "dump raw trace in ASCII"), 1811 OPT_END() 1812 }; 1813 1814 static const char * const replay_usage[] = { 1815 "perf sched replay [<options>]", 1816 NULL 1817 }; 1818 1819 static const struct option replay_options[] = { 1820 OPT_UINTEGER('r', "repeat", &replay_repeat, 1821 "repeat the workload replay N times (-1: infinite)"), 1822 OPT_INCR('v', "verbose", &verbose, 1823 "be more verbose (show symbol address, etc)"), 1824 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1825 "dump raw trace in ASCII"), 1826 OPT_END() 1827 }; 1828 1829 static void setup_sorting(void) 1830 { 1831 char *tmp, *tok, *str = strdup(sort_order); 1832 1833 for (tok = strtok_r(str, ", ", &tmp); 1834 tok; tok = strtok_r(NULL, ", ", &tmp)) { 1835 if (sort_dimension__add(tok, &sort_list) < 0) { 1836 error("Unknown --sort key: `%s'", tok); 1837 usage_with_options(latency_usage, latency_options); 1838 } 1839 } 1840 1841 free(str); 1842 1843 sort_dimension__add("pid", &cmp_pid); 1844 } 1845 1846 static const char *record_args[] = { 1847 "record", 1848 "-a", 1849 "-R", 1850 "-f", 1851 "-m", "1024", 1852 "-c", "1", 1853 "-e", "sched:sched_switch", 1854 "-e", "sched:sched_stat_wait", 1855 "-e", "sched:sched_stat_sleep", 1856 "-e", "sched:sched_stat_iowait", 1857 "-e", "sched:sched_stat_runtime", 1858 "-e", "sched:sched_process_exit", 1859 "-e", "sched:sched_process_fork", 1860 "-e", "sched:sched_wakeup", 1861 "-e", "sched:sched_migrate_task", 1862 }; 1863 1864 static int __cmd_record(int argc, const char **argv) 1865 { 1866 unsigned int rec_argc, i, j; 1867 const char **rec_argv; 1868 1869 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 1870 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 1871 1872 if (rec_argv == NULL) 1873 return -ENOMEM; 1874 1875 for (i = 0; i < ARRAY_SIZE(record_args); i++) 1876 rec_argv[i] = strdup(record_args[i]); 1877 1878 for (j = 1; j < (unsigned int)argc; j++, i++) 1879 rec_argv[i] = argv[j]; 1880 1881 BUG_ON(i != rec_argc); 1882 1883 return cmd_record(i, rec_argv, NULL); 1884 } 1885 1886 int cmd_sched(int argc, const char **argv, const char *prefix __used) 1887 { 1888 argc = parse_options(argc, argv, sched_options, sched_usage, 1889 PARSE_OPT_STOP_AT_NON_OPTION); 1890 if (!argc) 1891 usage_with_options(sched_usage, sched_options); 1892 1893 /* 1894 * Aliased to 'perf script' for now: 1895 */ 1896 if (!strcmp(argv[0], "script")) 1897 return cmd_script(argc, argv, prefix); 1898 1899 symbol__init(); 1900 if (!strncmp(argv[0], "rec", 3)) { 1901 return __cmd_record(argc, argv); 1902 } else if (!strncmp(argv[0], "lat", 3)) { 1903 trace_handler = &lat_ops; 1904 if (argc > 1) { 1905 argc = parse_options(argc, argv, latency_options, latency_usage, 0); 1906 if (argc) 1907 usage_with_options(latency_usage, latency_options); 1908 } 1909 setup_sorting(); 1910 __cmd_lat(); 1911 } else if (!strcmp(argv[0], "map")) { 1912 trace_handler = &map_ops; 1913 setup_sorting(); 1914 __cmd_map(); 1915 } else if (!strncmp(argv[0], "rep", 3)) { 1916 trace_handler = &replay_ops; 1917 if (argc) { 1918 argc = parse_options(argc, argv, replay_options, replay_usage, 0); 1919 if (argc) 1920 usage_with_options(replay_usage, replay_options); 1921 } 1922 __cmd_replay(); 1923 } else { 1924 usage_with_options(sched_usage, sched_options); 1925 } 1926 1927 return 0; 1928 } 1929