1 #include "builtin.h" 2 #include "perf.h" 3 4 #include "util/util.h" 5 #include "util/cache.h" 6 #include "util/symbol.h" 7 #include "util/thread.h" 8 #include "util/header.h" 9 10 #include "util/parse-options.h" 11 #include "util/trace-event.h" 12 13 #include "util/debug.h" 14 #include "util/data_map.h" 15 16 #include <sys/prctl.h> 17 18 #include <semaphore.h> 19 #include <pthread.h> 20 #include <math.h> 21 22 static char const *input_name = "perf.data"; 23 24 static struct perf_header *header; 25 static u64 sample_type; 26 27 static char default_sort_order[] = "avg, max, switch, runtime"; 28 static char *sort_order = default_sort_order; 29 30 static int profile_cpu = -1; 31 32 #define PR_SET_NAME 15 /* Set process name */ 33 #define MAX_CPUS 4096 34 35 static u64 run_measurement_overhead; 36 static u64 sleep_measurement_overhead; 37 38 #define COMM_LEN 20 39 #define SYM_LEN 129 40 41 #define MAX_PID 65536 42 43 static unsigned long nr_tasks; 44 45 struct sched_atom; 46 47 struct task_desc { 48 unsigned long nr; 49 unsigned long pid; 50 char comm[COMM_LEN]; 51 52 unsigned long nr_events; 53 unsigned long curr_event; 54 struct sched_atom **atoms; 55 56 pthread_t thread; 57 sem_t sleep_sem; 58 59 sem_t ready_for_work; 60 sem_t work_done_sem; 61 62 u64 cpu_usage; 63 }; 64 65 enum sched_event_type { 66 SCHED_EVENT_RUN, 67 SCHED_EVENT_SLEEP, 68 SCHED_EVENT_WAKEUP, 69 SCHED_EVENT_MIGRATION, 70 }; 71 72 struct sched_atom { 73 enum sched_event_type type; 74 u64 timestamp; 75 u64 duration; 76 unsigned long nr; 77 int specific_wait; 78 sem_t *wait_sem; 79 struct task_desc *wakee; 80 }; 81 82 static struct task_desc *pid_to_task[MAX_PID]; 83 84 static struct task_desc **tasks; 85 86 static pthread_mutex_t start_work_mutex = PTHREAD_MUTEX_INITIALIZER; 87 static u64 start_time; 88 89 static pthread_mutex_t work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER; 90 91 static unsigned long nr_run_events; 92 static unsigned long nr_sleep_events; 93 static unsigned long nr_wakeup_events; 94 95 static unsigned long nr_sleep_corrections; 96 static unsigned long nr_run_events_optimized; 97 98 static unsigned long targetless_wakeups; 99 static unsigned long multitarget_wakeups; 100 101 static u64 cpu_usage; 102 static u64 runavg_cpu_usage; 103 static u64 parent_cpu_usage; 104 static u64 runavg_parent_cpu_usage; 105 106 static unsigned long nr_runs; 107 static u64 sum_runtime; 108 static u64 sum_fluct; 109 static u64 run_avg; 110 111 static unsigned long replay_repeat = 10; 112 static unsigned long nr_timestamps; 113 static unsigned long nr_unordered_timestamps; 114 static unsigned long nr_state_machine_bugs; 115 static unsigned long nr_context_switch_bugs; 116 static unsigned long nr_events; 117 static unsigned long nr_lost_chunks; 118 static unsigned long nr_lost_events; 119 120 #define TASK_STATE_TO_CHAR_STR "RSDTtZX" 121 122 enum thread_state { 123 THREAD_SLEEPING = 0, 124 THREAD_WAIT_CPU, 125 THREAD_SCHED_IN, 126 THREAD_IGNORE 127 }; 128 129 struct work_atom { 130 struct list_head list; 131 enum thread_state state; 132 u64 sched_out_time; 133 u64 wake_up_time; 134 u64 sched_in_time; 135 u64 runtime; 136 }; 137 138 struct work_atoms { 139 struct list_head work_list; 140 struct thread *thread; 141 struct rb_node node; 142 u64 max_lat; 143 u64 max_lat_at; 144 u64 total_lat; 145 u64 nb_atoms; 146 u64 total_runtime; 147 }; 148 149 typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *); 150 151 static struct rb_root atom_root, sorted_atom_root; 152 153 static u64 all_runtime; 154 static u64 all_count; 155 156 157 static u64 get_nsecs(void) 158 { 159 struct timespec ts; 160 161 clock_gettime(CLOCK_MONOTONIC, &ts); 162 163 return ts.tv_sec * 1000000000ULL + ts.tv_nsec; 164 } 165 166 static void burn_nsecs(u64 nsecs) 167 { 168 u64 T0 = get_nsecs(), T1; 169 170 do { 171 T1 = get_nsecs(); 172 } while (T1 + run_measurement_overhead < T0 + nsecs); 173 } 174 175 static void sleep_nsecs(u64 nsecs) 176 { 177 struct timespec ts; 178 179 ts.tv_nsec = nsecs % 999999999; 180 ts.tv_sec = nsecs / 999999999; 181 182 nanosleep(&ts, NULL); 183 } 184 185 static void calibrate_run_measurement_overhead(void) 186 { 187 u64 T0, T1, delta, min_delta = 1000000000ULL; 188 int i; 189 190 for (i = 0; i < 10; i++) { 191 T0 = get_nsecs(); 192 burn_nsecs(0); 193 T1 = get_nsecs(); 194 delta = T1-T0; 195 min_delta = min(min_delta, delta); 196 } 197 run_measurement_overhead = min_delta; 198 199 printf("run measurement overhead: %Ld nsecs\n", min_delta); 200 } 201 202 static void calibrate_sleep_measurement_overhead(void) 203 { 204 u64 T0, T1, delta, min_delta = 1000000000ULL; 205 int i; 206 207 for (i = 0; i < 10; i++) { 208 T0 = get_nsecs(); 209 sleep_nsecs(10000); 210 T1 = get_nsecs(); 211 delta = T1-T0; 212 min_delta = min(min_delta, delta); 213 } 214 min_delta -= 10000; 215 sleep_measurement_overhead = min_delta; 216 217 printf("sleep measurement overhead: %Ld nsecs\n", min_delta); 218 } 219 220 static struct sched_atom * 221 get_new_event(struct task_desc *task, u64 timestamp) 222 { 223 struct sched_atom *event = zalloc(sizeof(*event)); 224 unsigned long idx = task->nr_events; 225 size_t size; 226 227 event->timestamp = timestamp; 228 event->nr = idx; 229 230 task->nr_events++; 231 size = sizeof(struct sched_atom *) * task->nr_events; 232 task->atoms = realloc(task->atoms, size); 233 BUG_ON(!task->atoms); 234 235 task->atoms[idx] = event; 236 237 return event; 238 } 239 240 static struct sched_atom *last_event(struct task_desc *task) 241 { 242 if (!task->nr_events) 243 return NULL; 244 245 return task->atoms[task->nr_events - 1]; 246 } 247 248 static void 249 add_sched_event_run(struct task_desc *task, u64 timestamp, u64 duration) 250 { 251 struct sched_atom *event, *curr_event = last_event(task); 252 253 /* 254 * optimize an existing RUN event by merging this one 255 * to it: 256 */ 257 if (curr_event && curr_event->type == SCHED_EVENT_RUN) { 258 nr_run_events_optimized++; 259 curr_event->duration += duration; 260 return; 261 } 262 263 event = get_new_event(task, timestamp); 264 265 event->type = SCHED_EVENT_RUN; 266 event->duration = duration; 267 268 nr_run_events++; 269 } 270 271 static void 272 add_sched_event_wakeup(struct task_desc *task, u64 timestamp, 273 struct task_desc *wakee) 274 { 275 struct sched_atom *event, *wakee_event; 276 277 event = get_new_event(task, timestamp); 278 event->type = SCHED_EVENT_WAKEUP; 279 event->wakee = wakee; 280 281 wakee_event = last_event(wakee); 282 if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) { 283 targetless_wakeups++; 284 return; 285 } 286 if (wakee_event->wait_sem) { 287 multitarget_wakeups++; 288 return; 289 } 290 291 wakee_event->wait_sem = zalloc(sizeof(*wakee_event->wait_sem)); 292 sem_init(wakee_event->wait_sem, 0, 0); 293 wakee_event->specific_wait = 1; 294 event->wait_sem = wakee_event->wait_sem; 295 296 nr_wakeup_events++; 297 } 298 299 static void 300 add_sched_event_sleep(struct task_desc *task, u64 timestamp, 301 u64 task_state __used) 302 { 303 struct sched_atom *event = get_new_event(task, timestamp); 304 305 event->type = SCHED_EVENT_SLEEP; 306 307 nr_sleep_events++; 308 } 309 310 static struct task_desc *register_pid(unsigned long pid, const char *comm) 311 { 312 struct task_desc *task; 313 314 BUG_ON(pid >= MAX_PID); 315 316 task = pid_to_task[pid]; 317 318 if (task) 319 return task; 320 321 task = zalloc(sizeof(*task)); 322 task->pid = pid; 323 task->nr = nr_tasks; 324 strcpy(task->comm, comm); 325 /* 326 * every task starts in sleeping state - this gets ignored 327 * if there's no wakeup pointing to this sleep state: 328 */ 329 add_sched_event_sleep(task, 0, 0); 330 331 pid_to_task[pid] = task; 332 nr_tasks++; 333 tasks = realloc(tasks, nr_tasks*sizeof(struct task_task *)); 334 BUG_ON(!tasks); 335 tasks[task->nr] = task; 336 337 if (verbose) 338 printf("registered task #%ld, PID %ld (%s)\n", nr_tasks, pid, comm); 339 340 return task; 341 } 342 343 344 static void print_task_traces(void) 345 { 346 struct task_desc *task; 347 unsigned long i; 348 349 for (i = 0; i < nr_tasks; i++) { 350 task = tasks[i]; 351 printf("task %6ld (%20s:%10ld), nr_events: %ld\n", 352 task->nr, task->comm, task->pid, task->nr_events); 353 } 354 } 355 356 static void add_cross_task_wakeups(void) 357 { 358 struct task_desc *task1, *task2; 359 unsigned long i, j; 360 361 for (i = 0; i < nr_tasks; i++) { 362 task1 = tasks[i]; 363 j = i + 1; 364 if (j == nr_tasks) 365 j = 0; 366 task2 = tasks[j]; 367 add_sched_event_wakeup(task1, 0, task2); 368 } 369 } 370 371 static void 372 process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom) 373 { 374 int ret = 0; 375 u64 now; 376 long long delta; 377 378 now = get_nsecs(); 379 delta = start_time + atom->timestamp - now; 380 381 switch (atom->type) { 382 case SCHED_EVENT_RUN: 383 burn_nsecs(atom->duration); 384 break; 385 case SCHED_EVENT_SLEEP: 386 if (atom->wait_sem) 387 ret = sem_wait(atom->wait_sem); 388 BUG_ON(ret); 389 break; 390 case SCHED_EVENT_WAKEUP: 391 if (atom->wait_sem) 392 ret = sem_post(atom->wait_sem); 393 BUG_ON(ret); 394 break; 395 case SCHED_EVENT_MIGRATION: 396 break; 397 default: 398 BUG_ON(1); 399 } 400 } 401 402 static u64 get_cpu_usage_nsec_parent(void) 403 { 404 struct rusage ru; 405 u64 sum; 406 int err; 407 408 err = getrusage(RUSAGE_SELF, &ru); 409 BUG_ON(err); 410 411 sum = ru.ru_utime.tv_sec*1e9 + ru.ru_utime.tv_usec*1e3; 412 sum += ru.ru_stime.tv_sec*1e9 + ru.ru_stime.tv_usec*1e3; 413 414 return sum; 415 } 416 417 static int self_open_counters(void) 418 { 419 struct perf_event_attr attr; 420 int fd; 421 422 memset(&attr, 0, sizeof(attr)); 423 424 attr.type = PERF_TYPE_SOFTWARE; 425 attr.config = PERF_COUNT_SW_TASK_CLOCK; 426 427 fd = sys_perf_event_open(&attr, 0, -1, -1, 0); 428 429 if (fd < 0) 430 die("Error: sys_perf_event_open() syscall returned" 431 "with %d (%s)\n", fd, strerror(errno)); 432 return fd; 433 } 434 435 static u64 get_cpu_usage_nsec_self(int fd) 436 { 437 u64 runtime; 438 int ret; 439 440 ret = read(fd, &runtime, sizeof(runtime)); 441 BUG_ON(ret != sizeof(runtime)); 442 443 return runtime; 444 } 445 446 static void *thread_func(void *ctx) 447 { 448 struct task_desc *this_task = ctx; 449 u64 cpu_usage_0, cpu_usage_1; 450 unsigned long i, ret; 451 char comm2[22]; 452 int fd; 453 454 sprintf(comm2, ":%s", this_task->comm); 455 prctl(PR_SET_NAME, comm2); 456 fd = self_open_counters(); 457 458 again: 459 ret = sem_post(&this_task->ready_for_work); 460 BUG_ON(ret); 461 ret = pthread_mutex_lock(&start_work_mutex); 462 BUG_ON(ret); 463 ret = pthread_mutex_unlock(&start_work_mutex); 464 BUG_ON(ret); 465 466 cpu_usage_0 = get_cpu_usage_nsec_self(fd); 467 468 for (i = 0; i < this_task->nr_events; i++) { 469 this_task->curr_event = i; 470 process_sched_event(this_task, this_task->atoms[i]); 471 } 472 473 cpu_usage_1 = get_cpu_usage_nsec_self(fd); 474 this_task->cpu_usage = cpu_usage_1 - cpu_usage_0; 475 ret = sem_post(&this_task->work_done_sem); 476 BUG_ON(ret); 477 478 ret = pthread_mutex_lock(&work_done_wait_mutex); 479 BUG_ON(ret); 480 ret = pthread_mutex_unlock(&work_done_wait_mutex); 481 BUG_ON(ret); 482 483 goto again; 484 } 485 486 static void create_tasks(void) 487 { 488 struct task_desc *task; 489 pthread_attr_t attr; 490 unsigned long i; 491 int err; 492 493 err = pthread_attr_init(&attr); 494 BUG_ON(err); 495 err = pthread_attr_setstacksize(&attr, (size_t)(16*1024)); 496 BUG_ON(err); 497 err = pthread_mutex_lock(&start_work_mutex); 498 BUG_ON(err); 499 err = pthread_mutex_lock(&work_done_wait_mutex); 500 BUG_ON(err); 501 for (i = 0; i < nr_tasks; i++) { 502 task = tasks[i]; 503 sem_init(&task->sleep_sem, 0, 0); 504 sem_init(&task->ready_for_work, 0, 0); 505 sem_init(&task->work_done_sem, 0, 0); 506 task->curr_event = 0; 507 err = pthread_create(&task->thread, &attr, thread_func, task); 508 BUG_ON(err); 509 } 510 } 511 512 static void wait_for_tasks(void) 513 { 514 u64 cpu_usage_0, cpu_usage_1; 515 struct task_desc *task; 516 unsigned long i, ret; 517 518 start_time = get_nsecs(); 519 cpu_usage = 0; 520 pthread_mutex_unlock(&work_done_wait_mutex); 521 522 for (i = 0; i < nr_tasks; i++) { 523 task = tasks[i]; 524 ret = sem_wait(&task->ready_for_work); 525 BUG_ON(ret); 526 sem_init(&task->ready_for_work, 0, 0); 527 } 528 ret = pthread_mutex_lock(&work_done_wait_mutex); 529 BUG_ON(ret); 530 531 cpu_usage_0 = get_cpu_usage_nsec_parent(); 532 533 pthread_mutex_unlock(&start_work_mutex); 534 535 for (i = 0; i < nr_tasks; i++) { 536 task = tasks[i]; 537 ret = sem_wait(&task->work_done_sem); 538 BUG_ON(ret); 539 sem_init(&task->work_done_sem, 0, 0); 540 cpu_usage += task->cpu_usage; 541 task->cpu_usage = 0; 542 } 543 544 cpu_usage_1 = get_cpu_usage_nsec_parent(); 545 if (!runavg_cpu_usage) 546 runavg_cpu_usage = cpu_usage; 547 runavg_cpu_usage = (runavg_cpu_usage*9 + cpu_usage)/10; 548 549 parent_cpu_usage = cpu_usage_1 - cpu_usage_0; 550 if (!runavg_parent_cpu_usage) 551 runavg_parent_cpu_usage = parent_cpu_usage; 552 runavg_parent_cpu_usage = (runavg_parent_cpu_usage*9 + 553 parent_cpu_usage)/10; 554 555 ret = pthread_mutex_lock(&start_work_mutex); 556 BUG_ON(ret); 557 558 for (i = 0; i < nr_tasks; i++) { 559 task = tasks[i]; 560 sem_init(&task->sleep_sem, 0, 0); 561 task->curr_event = 0; 562 } 563 } 564 565 static void run_one_test(void) 566 { 567 u64 T0, T1, delta, avg_delta, fluct, std_dev; 568 569 T0 = get_nsecs(); 570 wait_for_tasks(); 571 T1 = get_nsecs(); 572 573 delta = T1 - T0; 574 sum_runtime += delta; 575 nr_runs++; 576 577 avg_delta = sum_runtime / nr_runs; 578 if (delta < avg_delta) 579 fluct = avg_delta - delta; 580 else 581 fluct = delta - avg_delta; 582 sum_fluct += fluct; 583 std_dev = sum_fluct / nr_runs / sqrt(nr_runs); 584 if (!run_avg) 585 run_avg = delta; 586 run_avg = (run_avg*9 + delta)/10; 587 588 printf("#%-3ld: %0.3f, ", 589 nr_runs, (double)delta/1000000.0); 590 591 printf("ravg: %0.2f, ", 592 (double)run_avg/1e6); 593 594 printf("cpu: %0.2f / %0.2f", 595 (double)cpu_usage/1e6, (double)runavg_cpu_usage/1e6); 596 597 #if 0 598 /* 599 * rusage statistics done by the parent, these are less 600 * accurate than the sum_exec_runtime based statistics: 601 */ 602 printf(" [%0.2f / %0.2f]", 603 (double)parent_cpu_usage/1e6, 604 (double)runavg_parent_cpu_usage/1e6); 605 #endif 606 607 printf("\n"); 608 609 if (nr_sleep_corrections) 610 printf(" (%ld sleep corrections)\n", nr_sleep_corrections); 611 nr_sleep_corrections = 0; 612 } 613 614 static void test_calibrations(void) 615 { 616 u64 T0, T1; 617 618 T0 = get_nsecs(); 619 burn_nsecs(1e6); 620 T1 = get_nsecs(); 621 622 printf("the run test took %Ld nsecs\n", T1-T0); 623 624 T0 = get_nsecs(); 625 sleep_nsecs(1e6); 626 T1 = get_nsecs(); 627 628 printf("the sleep test took %Ld nsecs\n", T1-T0); 629 } 630 631 #define FILL_FIELD(ptr, field, event, data) \ 632 ptr.field = (typeof(ptr.field)) raw_field_value(event, #field, data) 633 634 #define FILL_ARRAY(ptr, array, event, data) \ 635 do { \ 636 void *__array = raw_field_ptr(event, #array, data); \ 637 memcpy(ptr.array, __array, sizeof(ptr.array)); \ 638 } while(0) 639 640 #define FILL_COMMON_FIELDS(ptr, event, data) \ 641 do { \ 642 FILL_FIELD(ptr, common_type, event, data); \ 643 FILL_FIELD(ptr, common_flags, event, data); \ 644 FILL_FIELD(ptr, common_preempt_count, event, data); \ 645 FILL_FIELD(ptr, common_pid, event, data); \ 646 FILL_FIELD(ptr, common_tgid, event, data); \ 647 } while (0) 648 649 650 651 struct trace_switch_event { 652 u32 size; 653 654 u16 common_type; 655 u8 common_flags; 656 u8 common_preempt_count; 657 u32 common_pid; 658 u32 common_tgid; 659 660 char prev_comm[16]; 661 u32 prev_pid; 662 u32 prev_prio; 663 u64 prev_state; 664 char next_comm[16]; 665 u32 next_pid; 666 u32 next_prio; 667 }; 668 669 struct trace_runtime_event { 670 u32 size; 671 672 u16 common_type; 673 u8 common_flags; 674 u8 common_preempt_count; 675 u32 common_pid; 676 u32 common_tgid; 677 678 char comm[16]; 679 u32 pid; 680 u64 runtime; 681 u64 vruntime; 682 }; 683 684 struct trace_wakeup_event { 685 u32 size; 686 687 u16 common_type; 688 u8 common_flags; 689 u8 common_preempt_count; 690 u32 common_pid; 691 u32 common_tgid; 692 693 char comm[16]; 694 u32 pid; 695 696 u32 prio; 697 u32 success; 698 u32 cpu; 699 }; 700 701 struct trace_fork_event { 702 u32 size; 703 704 u16 common_type; 705 u8 common_flags; 706 u8 common_preempt_count; 707 u32 common_pid; 708 u32 common_tgid; 709 710 char parent_comm[16]; 711 u32 parent_pid; 712 char child_comm[16]; 713 u32 child_pid; 714 }; 715 716 struct trace_migrate_task_event { 717 u32 size; 718 719 u16 common_type; 720 u8 common_flags; 721 u8 common_preempt_count; 722 u32 common_pid; 723 u32 common_tgid; 724 725 char comm[16]; 726 u32 pid; 727 728 u32 prio; 729 u32 cpu; 730 }; 731 732 struct trace_sched_handler { 733 void (*switch_event)(struct trace_switch_event *, 734 struct event *, 735 int cpu, 736 u64 timestamp, 737 struct thread *thread); 738 739 void (*runtime_event)(struct trace_runtime_event *, 740 struct event *, 741 int cpu, 742 u64 timestamp, 743 struct thread *thread); 744 745 void (*wakeup_event)(struct trace_wakeup_event *, 746 struct event *, 747 int cpu, 748 u64 timestamp, 749 struct thread *thread); 750 751 void (*fork_event)(struct trace_fork_event *, 752 struct event *, 753 int cpu, 754 u64 timestamp, 755 struct thread *thread); 756 757 void (*migrate_task_event)(struct trace_migrate_task_event *, 758 struct event *, 759 int cpu, 760 u64 timestamp, 761 struct thread *thread); 762 }; 763 764 765 static void 766 replay_wakeup_event(struct trace_wakeup_event *wakeup_event, 767 struct event *event, 768 int cpu __used, 769 u64 timestamp __used, 770 struct thread *thread __used) 771 { 772 struct task_desc *waker, *wakee; 773 774 if (verbose) { 775 printf("sched_wakeup event %p\n", event); 776 777 printf(" ... pid %d woke up %s/%d\n", 778 wakeup_event->common_pid, 779 wakeup_event->comm, 780 wakeup_event->pid); 781 } 782 783 waker = register_pid(wakeup_event->common_pid, "<unknown>"); 784 wakee = register_pid(wakeup_event->pid, wakeup_event->comm); 785 786 add_sched_event_wakeup(waker, timestamp, wakee); 787 } 788 789 static u64 cpu_last_switched[MAX_CPUS]; 790 791 static void 792 replay_switch_event(struct trace_switch_event *switch_event, 793 struct event *event, 794 int cpu, 795 u64 timestamp, 796 struct thread *thread __used) 797 { 798 struct task_desc *prev, *next; 799 u64 timestamp0; 800 s64 delta; 801 802 if (verbose) 803 printf("sched_switch event %p\n", event); 804 805 if (cpu >= MAX_CPUS || cpu < 0) 806 return; 807 808 timestamp0 = cpu_last_switched[cpu]; 809 if (timestamp0) 810 delta = timestamp - timestamp0; 811 else 812 delta = 0; 813 814 if (delta < 0) 815 die("hm, delta: %Ld < 0 ?\n", delta); 816 817 if (verbose) { 818 printf(" ... switch from %s/%d to %s/%d [ran %Ld nsecs]\n", 819 switch_event->prev_comm, switch_event->prev_pid, 820 switch_event->next_comm, switch_event->next_pid, 821 delta); 822 } 823 824 prev = register_pid(switch_event->prev_pid, switch_event->prev_comm); 825 next = register_pid(switch_event->next_pid, switch_event->next_comm); 826 827 cpu_last_switched[cpu] = timestamp; 828 829 add_sched_event_run(prev, timestamp, delta); 830 add_sched_event_sleep(prev, timestamp, switch_event->prev_state); 831 } 832 833 834 static void 835 replay_fork_event(struct trace_fork_event *fork_event, 836 struct event *event, 837 int cpu __used, 838 u64 timestamp __used, 839 struct thread *thread __used) 840 { 841 if (verbose) { 842 printf("sched_fork event %p\n", event); 843 printf("... parent: %s/%d\n", fork_event->parent_comm, fork_event->parent_pid); 844 printf("... child: %s/%d\n", fork_event->child_comm, fork_event->child_pid); 845 } 846 register_pid(fork_event->parent_pid, fork_event->parent_comm); 847 register_pid(fork_event->child_pid, fork_event->child_comm); 848 } 849 850 static struct trace_sched_handler replay_ops = { 851 .wakeup_event = replay_wakeup_event, 852 .switch_event = replay_switch_event, 853 .fork_event = replay_fork_event, 854 }; 855 856 struct sort_dimension { 857 const char *name; 858 sort_fn_t cmp; 859 struct list_head list; 860 }; 861 862 static LIST_HEAD(cmp_pid); 863 864 static int 865 thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r) 866 { 867 struct sort_dimension *sort; 868 int ret = 0; 869 870 BUG_ON(list_empty(list)); 871 872 list_for_each_entry(sort, list, list) { 873 ret = sort->cmp(l, r); 874 if (ret) 875 return ret; 876 } 877 878 return ret; 879 } 880 881 static struct work_atoms * 882 thread_atoms_search(struct rb_root *root, struct thread *thread, 883 struct list_head *sort_list) 884 { 885 struct rb_node *node = root->rb_node; 886 struct work_atoms key = { .thread = thread }; 887 888 while (node) { 889 struct work_atoms *atoms; 890 int cmp; 891 892 atoms = container_of(node, struct work_atoms, node); 893 894 cmp = thread_lat_cmp(sort_list, &key, atoms); 895 if (cmp > 0) 896 node = node->rb_left; 897 else if (cmp < 0) 898 node = node->rb_right; 899 else { 900 BUG_ON(thread != atoms->thread); 901 return atoms; 902 } 903 } 904 return NULL; 905 } 906 907 static void 908 __thread_latency_insert(struct rb_root *root, struct work_atoms *data, 909 struct list_head *sort_list) 910 { 911 struct rb_node **new = &(root->rb_node), *parent = NULL; 912 913 while (*new) { 914 struct work_atoms *this; 915 int cmp; 916 917 this = container_of(*new, struct work_atoms, node); 918 parent = *new; 919 920 cmp = thread_lat_cmp(sort_list, data, this); 921 922 if (cmp > 0) 923 new = &((*new)->rb_left); 924 else 925 new = &((*new)->rb_right); 926 } 927 928 rb_link_node(&data->node, parent, new); 929 rb_insert_color(&data->node, root); 930 } 931 932 static void thread_atoms_insert(struct thread *thread) 933 { 934 struct work_atoms *atoms = zalloc(sizeof(*atoms)); 935 if (!atoms) 936 die("No memory"); 937 938 atoms->thread = thread; 939 INIT_LIST_HEAD(&atoms->work_list); 940 __thread_latency_insert(&atom_root, atoms, &cmp_pid); 941 } 942 943 static void 944 latency_fork_event(struct trace_fork_event *fork_event __used, 945 struct event *event __used, 946 int cpu __used, 947 u64 timestamp __used, 948 struct thread *thread __used) 949 { 950 /* should insert the newcomer */ 951 } 952 953 __used 954 static char sched_out_state(struct trace_switch_event *switch_event) 955 { 956 const char *str = TASK_STATE_TO_CHAR_STR; 957 958 return str[switch_event->prev_state]; 959 } 960 961 static void 962 add_sched_out_event(struct work_atoms *atoms, 963 char run_state, 964 u64 timestamp) 965 { 966 struct work_atom *atom = zalloc(sizeof(*atom)); 967 if (!atom) 968 die("Non memory"); 969 970 atom->sched_out_time = timestamp; 971 972 if (run_state == 'R') { 973 atom->state = THREAD_WAIT_CPU; 974 atom->wake_up_time = atom->sched_out_time; 975 } 976 977 list_add_tail(&atom->list, &atoms->work_list); 978 } 979 980 static void 981 add_runtime_event(struct work_atoms *atoms, u64 delta, u64 timestamp __used) 982 { 983 struct work_atom *atom; 984 985 BUG_ON(list_empty(&atoms->work_list)); 986 987 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 988 989 atom->runtime += delta; 990 atoms->total_runtime += delta; 991 } 992 993 static void 994 add_sched_in_event(struct work_atoms *atoms, u64 timestamp) 995 { 996 struct work_atom *atom; 997 u64 delta; 998 999 if (list_empty(&atoms->work_list)) 1000 return; 1001 1002 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1003 1004 if (atom->state != THREAD_WAIT_CPU) 1005 return; 1006 1007 if (timestamp < atom->wake_up_time) { 1008 atom->state = THREAD_IGNORE; 1009 return; 1010 } 1011 1012 atom->state = THREAD_SCHED_IN; 1013 atom->sched_in_time = timestamp; 1014 1015 delta = atom->sched_in_time - atom->wake_up_time; 1016 atoms->total_lat += delta; 1017 if (delta > atoms->max_lat) { 1018 atoms->max_lat = delta; 1019 atoms->max_lat_at = timestamp; 1020 } 1021 atoms->nb_atoms++; 1022 } 1023 1024 static void 1025 latency_switch_event(struct trace_switch_event *switch_event, 1026 struct event *event __used, 1027 int cpu, 1028 u64 timestamp, 1029 struct thread *thread __used) 1030 { 1031 struct work_atoms *out_events, *in_events; 1032 struct thread *sched_out, *sched_in; 1033 u64 timestamp0; 1034 s64 delta; 1035 1036 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1037 1038 timestamp0 = cpu_last_switched[cpu]; 1039 cpu_last_switched[cpu] = timestamp; 1040 if (timestamp0) 1041 delta = timestamp - timestamp0; 1042 else 1043 delta = 0; 1044 1045 if (delta < 0) 1046 die("hm, delta: %Ld < 0 ?\n", delta); 1047 1048 1049 sched_out = threads__findnew(switch_event->prev_pid); 1050 sched_in = threads__findnew(switch_event->next_pid); 1051 1052 out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); 1053 if (!out_events) { 1054 thread_atoms_insert(sched_out); 1055 out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); 1056 if (!out_events) 1057 die("out-event: Internal tree error"); 1058 } 1059 add_sched_out_event(out_events, sched_out_state(switch_event), timestamp); 1060 1061 in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid); 1062 if (!in_events) { 1063 thread_atoms_insert(sched_in); 1064 in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid); 1065 if (!in_events) 1066 die("in-event: Internal tree error"); 1067 /* 1068 * Take came in we have not heard about yet, 1069 * add in an initial atom in runnable state: 1070 */ 1071 add_sched_out_event(in_events, 'R', timestamp); 1072 } 1073 add_sched_in_event(in_events, timestamp); 1074 } 1075 1076 static void 1077 latency_runtime_event(struct trace_runtime_event *runtime_event, 1078 struct event *event __used, 1079 int cpu, 1080 u64 timestamp, 1081 struct thread *this_thread __used) 1082 { 1083 struct thread *thread = threads__findnew(runtime_event->pid); 1084 struct work_atoms *atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); 1085 1086 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1087 if (!atoms) { 1088 thread_atoms_insert(thread); 1089 atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); 1090 if (!atoms) 1091 die("in-event: Internal tree error"); 1092 add_sched_out_event(atoms, 'R', timestamp); 1093 } 1094 1095 add_runtime_event(atoms, runtime_event->runtime, timestamp); 1096 } 1097 1098 static void 1099 latency_wakeup_event(struct trace_wakeup_event *wakeup_event, 1100 struct event *__event __used, 1101 int cpu __used, 1102 u64 timestamp, 1103 struct thread *thread __used) 1104 { 1105 struct work_atoms *atoms; 1106 struct work_atom *atom; 1107 struct thread *wakee; 1108 1109 /* Note for later, it may be interesting to observe the failing cases */ 1110 if (!wakeup_event->success) 1111 return; 1112 1113 wakee = threads__findnew(wakeup_event->pid); 1114 atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); 1115 if (!atoms) { 1116 thread_atoms_insert(wakee); 1117 atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); 1118 if (!atoms) 1119 die("wakeup-event: Internal tree error"); 1120 add_sched_out_event(atoms, 'S', timestamp); 1121 } 1122 1123 BUG_ON(list_empty(&atoms->work_list)); 1124 1125 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1126 1127 /* 1128 * You WILL be missing events if you've recorded only 1129 * one CPU, or are only looking at only one, so don't 1130 * make useless noise. 1131 */ 1132 if (profile_cpu == -1 && atom->state != THREAD_SLEEPING) 1133 nr_state_machine_bugs++; 1134 1135 nr_timestamps++; 1136 if (atom->sched_out_time > timestamp) { 1137 nr_unordered_timestamps++; 1138 return; 1139 } 1140 1141 atom->state = THREAD_WAIT_CPU; 1142 atom->wake_up_time = timestamp; 1143 } 1144 1145 static void 1146 latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event, 1147 struct event *__event __used, 1148 int cpu __used, 1149 u64 timestamp, 1150 struct thread *thread __used) 1151 { 1152 struct work_atoms *atoms; 1153 struct work_atom *atom; 1154 struct thread *migrant; 1155 1156 /* 1157 * Only need to worry about migration when profiling one CPU. 1158 */ 1159 if (profile_cpu == -1) 1160 return; 1161 1162 migrant = threads__findnew(migrate_task_event->pid); 1163 atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid); 1164 if (!atoms) { 1165 thread_atoms_insert(migrant); 1166 register_pid(migrant->pid, migrant->comm); 1167 atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid); 1168 if (!atoms) 1169 die("migration-event: Internal tree error"); 1170 add_sched_out_event(atoms, 'R', timestamp); 1171 } 1172 1173 BUG_ON(list_empty(&atoms->work_list)); 1174 1175 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1176 atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp; 1177 1178 nr_timestamps++; 1179 1180 if (atom->sched_out_time > timestamp) 1181 nr_unordered_timestamps++; 1182 } 1183 1184 static struct trace_sched_handler lat_ops = { 1185 .wakeup_event = latency_wakeup_event, 1186 .switch_event = latency_switch_event, 1187 .runtime_event = latency_runtime_event, 1188 .fork_event = latency_fork_event, 1189 .migrate_task_event = latency_migrate_task_event, 1190 }; 1191 1192 static void output_lat_thread(struct work_atoms *work_list) 1193 { 1194 int i; 1195 int ret; 1196 u64 avg; 1197 1198 if (!work_list->nb_atoms) 1199 return; 1200 /* 1201 * Ignore idle threads: 1202 */ 1203 if (!strcmp(work_list->thread->comm, "swapper")) 1204 return; 1205 1206 all_runtime += work_list->total_runtime; 1207 all_count += work_list->nb_atoms; 1208 1209 ret = printf(" %s:%d ", work_list->thread->comm, work_list->thread->pid); 1210 1211 for (i = 0; i < 24 - ret; i++) 1212 printf(" "); 1213 1214 avg = work_list->total_lat / work_list->nb_atoms; 1215 1216 printf("|%11.3f ms |%9llu | avg:%9.3f ms | max:%9.3f ms | max at: %9.6f s\n", 1217 (double)work_list->total_runtime / 1e6, 1218 work_list->nb_atoms, (double)avg / 1e6, 1219 (double)work_list->max_lat / 1e6, 1220 (double)work_list->max_lat_at / 1e9); 1221 } 1222 1223 static int pid_cmp(struct work_atoms *l, struct work_atoms *r) 1224 { 1225 if (l->thread->pid < r->thread->pid) 1226 return -1; 1227 if (l->thread->pid > r->thread->pid) 1228 return 1; 1229 1230 return 0; 1231 } 1232 1233 static struct sort_dimension pid_sort_dimension = { 1234 .name = "pid", 1235 .cmp = pid_cmp, 1236 }; 1237 1238 static int avg_cmp(struct work_atoms *l, struct work_atoms *r) 1239 { 1240 u64 avgl, avgr; 1241 1242 if (!l->nb_atoms) 1243 return -1; 1244 1245 if (!r->nb_atoms) 1246 return 1; 1247 1248 avgl = l->total_lat / l->nb_atoms; 1249 avgr = r->total_lat / r->nb_atoms; 1250 1251 if (avgl < avgr) 1252 return -1; 1253 if (avgl > avgr) 1254 return 1; 1255 1256 return 0; 1257 } 1258 1259 static struct sort_dimension avg_sort_dimension = { 1260 .name = "avg", 1261 .cmp = avg_cmp, 1262 }; 1263 1264 static int max_cmp(struct work_atoms *l, struct work_atoms *r) 1265 { 1266 if (l->max_lat < r->max_lat) 1267 return -1; 1268 if (l->max_lat > r->max_lat) 1269 return 1; 1270 1271 return 0; 1272 } 1273 1274 static struct sort_dimension max_sort_dimension = { 1275 .name = "max", 1276 .cmp = max_cmp, 1277 }; 1278 1279 static int switch_cmp(struct work_atoms *l, struct work_atoms *r) 1280 { 1281 if (l->nb_atoms < r->nb_atoms) 1282 return -1; 1283 if (l->nb_atoms > r->nb_atoms) 1284 return 1; 1285 1286 return 0; 1287 } 1288 1289 static struct sort_dimension switch_sort_dimension = { 1290 .name = "switch", 1291 .cmp = switch_cmp, 1292 }; 1293 1294 static int runtime_cmp(struct work_atoms *l, struct work_atoms *r) 1295 { 1296 if (l->total_runtime < r->total_runtime) 1297 return -1; 1298 if (l->total_runtime > r->total_runtime) 1299 return 1; 1300 1301 return 0; 1302 } 1303 1304 static struct sort_dimension runtime_sort_dimension = { 1305 .name = "runtime", 1306 .cmp = runtime_cmp, 1307 }; 1308 1309 static struct sort_dimension *available_sorts[] = { 1310 &pid_sort_dimension, 1311 &avg_sort_dimension, 1312 &max_sort_dimension, 1313 &switch_sort_dimension, 1314 &runtime_sort_dimension, 1315 }; 1316 1317 #define NB_AVAILABLE_SORTS (int)(sizeof(available_sorts) / sizeof(struct sort_dimension *)) 1318 1319 static LIST_HEAD(sort_list); 1320 1321 static int sort_dimension__add(const char *tok, struct list_head *list) 1322 { 1323 int i; 1324 1325 for (i = 0; i < NB_AVAILABLE_SORTS; i++) { 1326 if (!strcmp(available_sorts[i]->name, tok)) { 1327 list_add_tail(&available_sorts[i]->list, list); 1328 1329 return 0; 1330 } 1331 } 1332 1333 return -1; 1334 } 1335 1336 static void setup_sorting(void); 1337 1338 static void sort_lat(void) 1339 { 1340 struct rb_node *node; 1341 1342 for (;;) { 1343 struct work_atoms *data; 1344 node = rb_first(&atom_root); 1345 if (!node) 1346 break; 1347 1348 rb_erase(node, &atom_root); 1349 data = rb_entry(node, struct work_atoms, node); 1350 __thread_latency_insert(&sorted_atom_root, data, &sort_list); 1351 } 1352 } 1353 1354 static struct trace_sched_handler *trace_handler; 1355 1356 static void 1357 process_sched_wakeup_event(void *data, 1358 struct event *event, 1359 int cpu __used, 1360 u64 timestamp __used, 1361 struct thread *thread __used) 1362 { 1363 struct trace_wakeup_event wakeup_event; 1364 1365 FILL_COMMON_FIELDS(wakeup_event, event, data); 1366 1367 FILL_ARRAY(wakeup_event, comm, event, data); 1368 FILL_FIELD(wakeup_event, pid, event, data); 1369 FILL_FIELD(wakeup_event, prio, event, data); 1370 FILL_FIELD(wakeup_event, success, event, data); 1371 FILL_FIELD(wakeup_event, cpu, event, data); 1372 1373 if (trace_handler->wakeup_event) 1374 trace_handler->wakeup_event(&wakeup_event, event, cpu, timestamp, thread); 1375 } 1376 1377 /* 1378 * Track the current task - that way we can know whether there's any 1379 * weird events, such as a task being switched away that is not current. 1380 */ 1381 static int max_cpu; 1382 1383 static u32 curr_pid[MAX_CPUS] = { [0 ... MAX_CPUS-1] = -1 }; 1384 1385 static struct thread *curr_thread[MAX_CPUS]; 1386 1387 static char next_shortname1 = 'A'; 1388 static char next_shortname2 = '0'; 1389 1390 static void 1391 map_switch_event(struct trace_switch_event *switch_event, 1392 struct event *event __used, 1393 int this_cpu, 1394 u64 timestamp, 1395 struct thread *thread __used) 1396 { 1397 struct thread *sched_out, *sched_in; 1398 int new_shortname; 1399 u64 timestamp0; 1400 s64 delta; 1401 int cpu; 1402 1403 BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0); 1404 1405 if (this_cpu > max_cpu) 1406 max_cpu = this_cpu; 1407 1408 timestamp0 = cpu_last_switched[this_cpu]; 1409 cpu_last_switched[this_cpu] = timestamp; 1410 if (timestamp0) 1411 delta = timestamp - timestamp0; 1412 else 1413 delta = 0; 1414 1415 if (delta < 0) 1416 die("hm, delta: %Ld < 0 ?\n", delta); 1417 1418 1419 sched_out = threads__findnew(switch_event->prev_pid); 1420 sched_in = threads__findnew(switch_event->next_pid); 1421 1422 curr_thread[this_cpu] = sched_in; 1423 1424 printf(" "); 1425 1426 new_shortname = 0; 1427 if (!sched_in->shortname[0]) { 1428 sched_in->shortname[0] = next_shortname1; 1429 sched_in->shortname[1] = next_shortname2; 1430 1431 if (next_shortname1 < 'Z') { 1432 next_shortname1++; 1433 } else { 1434 next_shortname1='A'; 1435 if (next_shortname2 < '9') { 1436 next_shortname2++; 1437 } else { 1438 next_shortname2='0'; 1439 } 1440 } 1441 new_shortname = 1; 1442 } 1443 1444 for (cpu = 0; cpu <= max_cpu; cpu++) { 1445 if (cpu != this_cpu) 1446 printf(" "); 1447 else 1448 printf("*"); 1449 1450 if (curr_thread[cpu]) { 1451 if (curr_thread[cpu]->pid) 1452 printf("%2s ", curr_thread[cpu]->shortname); 1453 else 1454 printf(". "); 1455 } else 1456 printf(" "); 1457 } 1458 1459 printf(" %12.6f secs ", (double)timestamp/1e9); 1460 if (new_shortname) { 1461 printf("%s => %s:%d\n", 1462 sched_in->shortname, sched_in->comm, sched_in->pid); 1463 } else { 1464 printf("\n"); 1465 } 1466 } 1467 1468 1469 static void 1470 process_sched_switch_event(void *data, 1471 struct event *event, 1472 int this_cpu, 1473 u64 timestamp __used, 1474 struct thread *thread __used) 1475 { 1476 struct trace_switch_event switch_event; 1477 1478 FILL_COMMON_FIELDS(switch_event, event, data); 1479 1480 FILL_ARRAY(switch_event, prev_comm, event, data); 1481 FILL_FIELD(switch_event, prev_pid, event, data); 1482 FILL_FIELD(switch_event, prev_prio, event, data); 1483 FILL_FIELD(switch_event, prev_state, event, data); 1484 FILL_ARRAY(switch_event, next_comm, event, data); 1485 FILL_FIELD(switch_event, next_pid, event, data); 1486 FILL_FIELD(switch_event, next_prio, event, data); 1487 1488 if (curr_pid[this_cpu] != (u32)-1) { 1489 /* 1490 * Are we trying to switch away a PID that is 1491 * not current? 1492 */ 1493 if (curr_pid[this_cpu] != switch_event.prev_pid) 1494 nr_context_switch_bugs++; 1495 } 1496 if (trace_handler->switch_event) 1497 trace_handler->switch_event(&switch_event, event, this_cpu, timestamp, thread); 1498 1499 curr_pid[this_cpu] = switch_event.next_pid; 1500 } 1501 1502 static void 1503 process_sched_runtime_event(void *data, 1504 struct event *event, 1505 int cpu __used, 1506 u64 timestamp __used, 1507 struct thread *thread __used) 1508 { 1509 struct trace_runtime_event runtime_event; 1510 1511 FILL_ARRAY(runtime_event, comm, event, data); 1512 FILL_FIELD(runtime_event, pid, event, data); 1513 FILL_FIELD(runtime_event, runtime, event, data); 1514 FILL_FIELD(runtime_event, vruntime, event, data); 1515 1516 if (trace_handler->runtime_event) 1517 trace_handler->runtime_event(&runtime_event, event, cpu, timestamp, thread); 1518 } 1519 1520 static void 1521 process_sched_fork_event(void *data, 1522 struct event *event, 1523 int cpu __used, 1524 u64 timestamp __used, 1525 struct thread *thread __used) 1526 { 1527 struct trace_fork_event fork_event; 1528 1529 FILL_COMMON_FIELDS(fork_event, event, data); 1530 1531 FILL_ARRAY(fork_event, parent_comm, event, data); 1532 FILL_FIELD(fork_event, parent_pid, event, data); 1533 FILL_ARRAY(fork_event, child_comm, event, data); 1534 FILL_FIELD(fork_event, child_pid, event, data); 1535 1536 if (trace_handler->fork_event) 1537 trace_handler->fork_event(&fork_event, event, cpu, timestamp, thread); 1538 } 1539 1540 static void 1541 process_sched_exit_event(struct event *event, 1542 int cpu __used, 1543 u64 timestamp __used, 1544 struct thread *thread __used) 1545 { 1546 if (verbose) 1547 printf("sched_exit event %p\n", event); 1548 } 1549 1550 static void 1551 process_sched_migrate_task_event(void *data, 1552 struct event *event, 1553 int cpu __used, 1554 u64 timestamp __used, 1555 struct thread *thread __used) 1556 { 1557 struct trace_migrate_task_event migrate_task_event; 1558 1559 FILL_COMMON_FIELDS(migrate_task_event, event, data); 1560 1561 FILL_ARRAY(migrate_task_event, comm, event, data); 1562 FILL_FIELD(migrate_task_event, pid, event, data); 1563 FILL_FIELD(migrate_task_event, prio, event, data); 1564 FILL_FIELD(migrate_task_event, cpu, event, data); 1565 1566 if (trace_handler->migrate_task_event) 1567 trace_handler->migrate_task_event(&migrate_task_event, event, cpu, timestamp, thread); 1568 } 1569 1570 static void 1571 process_raw_event(event_t *raw_event __used, void *data, 1572 int cpu, u64 timestamp, struct thread *thread) 1573 { 1574 struct event *event; 1575 int type; 1576 1577 1578 type = trace_parse_common_type(data); 1579 event = trace_find_event(type); 1580 1581 if (!strcmp(event->name, "sched_switch")) 1582 process_sched_switch_event(data, event, cpu, timestamp, thread); 1583 if (!strcmp(event->name, "sched_stat_runtime")) 1584 process_sched_runtime_event(data, event, cpu, timestamp, thread); 1585 if (!strcmp(event->name, "sched_wakeup")) 1586 process_sched_wakeup_event(data, event, cpu, timestamp, thread); 1587 if (!strcmp(event->name, "sched_wakeup_new")) 1588 process_sched_wakeup_event(data, event, cpu, timestamp, thread); 1589 if (!strcmp(event->name, "sched_process_fork")) 1590 process_sched_fork_event(data, event, cpu, timestamp, thread); 1591 if (!strcmp(event->name, "sched_process_exit")) 1592 process_sched_exit_event(event, cpu, timestamp, thread); 1593 if (!strcmp(event->name, "sched_migrate_task")) 1594 process_sched_migrate_task_event(data, event, cpu, timestamp, thread); 1595 } 1596 1597 static int process_sample_event(event_t *event) 1598 { 1599 struct sample_data data; 1600 struct thread *thread; 1601 1602 if (!(sample_type & PERF_SAMPLE_RAW)) 1603 return 0; 1604 1605 memset(&data, 0, sizeof(data)); 1606 data.time = -1; 1607 data.cpu = -1; 1608 data.period = -1; 1609 1610 event__parse_sample(event, sample_type, &data); 1611 1612 dump_printf("(IP, %d): %d/%d: %p period: %Ld\n", 1613 event->header.misc, 1614 data.pid, data.tid, 1615 (void *)(long)data.ip, 1616 (long long)data.period); 1617 1618 thread = threads__findnew(data.pid); 1619 if (thread == NULL) { 1620 pr_debug("problem processing %d event, skipping it.\n", 1621 event->header.type); 1622 return -1; 1623 } 1624 1625 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); 1626 1627 if (profile_cpu != -1 && profile_cpu != (int)data.cpu) 1628 return 0; 1629 1630 process_raw_event(event, data.raw_data, data.cpu, data.time, thread); 1631 1632 return 0; 1633 } 1634 1635 static int process_lost_event(event_t *event __used) 1636 { 1637 nr_lost_chunks++; 1638 nr_lost_events += event->lost.lost; 1639 1640 return 0; 1641 } 1642 1643 static int sample_type_check(u64 type) 1644 { 1645 sample_type = type; 1646 1647 if (!(sample_type & PERF_SAMPLE_RAW)) { 1648 fprintf(stderr, 1649 "No trace sample to read. Did you call perf record " 1650 "without -R?"); 1651 return -1; 1652 } 1653 1654 return 0; 1655 } 1656 1657 static struct perf_file_handler file_handler = { 1658 .process_sample_event = process_sample_event, 1659 .process_comm_event = event__process_comm, 1660 .process_lost_event = process_lost_event, 1661 .sample_type_check = sample_type_check, 1662 }; 1663 1664 static int read_events(void) 1665 { 1666 register_idle_thread(); 1667 register_perf_file_handler(&file_handler); 1668 1669 return mmap_dispatch_perf_file(&header, input_name, 0, 0, 1670 &event__cwdlen, &event__cwd); 1671 } 1672 1673 static void print_bad_events(void) 1674 { 1675 if (nr_unordered_timestamps && nr_timestamps) { 1676 printf(" INFO: %.3f%% unordered timestamps (%ld out of %ld)\n", 1677 (double)nr_unordered_timestamps/(double)nr_timestamps*100.0, 1678 nr_unordered_timestamps, nr_timestamps); 1679 } 1680 if (nr_lost_events && nr_events) { 1681 printf(" INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n", 1682 (double)nr_lost_events/(double)nr_events*100.0, 1683 nr_lost_events, nr_events, nr_lost_chunks); 1684 } 1685 if (nr_state_machine_bugs && nr_timestamps) { 1686 printf(" INFO: %.3f%% state machine bugs (%ld out of %ld)", 1687 (double)nr_state_machine_bugs/(double)nr_timestamps*100.0, 1688 nr_state_machine_bugs, nr_timestamps); 1689 if (nr_lost_events) 1690 printf(" (due to lost events?)"); 1691 printf("\n"); 1692 } 1693 if (nr_context_switch_bugs && nr_timestamps) { 1694 printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)", 1695 (double)nr_context_switch_bugs/(double)nr_timestamps*100.0, 1696 nr_context_switch_bugs, nr_timestamps); 1697 if (nr_lost_events) 1698 printf(" (due to lost events?)"); 1699 printf("\n"); 1700 } 1701 } 1702 1703 static void __cmd_lat(void) 1704 { 1705 struct rb_node *next; 1706 1707 setup_pager(); 1708 read_events(); 1709 sort_lat(); 1710 1711 printf("\n ---------------------------------------------------------------------------------------------------------------\n"); 1712 printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n"); 1713 printf(" ---------------------------------------------------------------------------------------------------------------\n"); 1714 1715 next = rb_first(&sorted_atom_root); 1716 1717 while (next) { 1718 struct work_atoms *work_list; 1719 1720 work_list = rb_entry(next, struct work_atoms, node); 1721 output_lat_thread(work_list); 1722 next = rb_next(next); 1723 } 1724 1725 printf(" -----------------------------------------------------------------------------------------\n"); 1726 printf(" TOTAL: |%11.3f ms |%9Ld |\n", 1727 (double)all_runtime/1e6, all_count); 1728 1729 printf(" ---------------------------------------------------\n"); 1730 1731 print_bad_events(); 1732 printf("\n"); 1733 1734 } 1735 1736 static struct trace_sched_handler map_ops = { 1737 .wakeup_event = NULL, 1738 .switch_event = map_switch_event, 1739 .runtime_event = NULL, 1740 .fork_event = NULL, 1741 }; 1742 1743 static void __cmd_map(void) 1744 { 1745 max_cpu = sysconf(_SC_NPROCESSORS_CONF); 1746 1747 setup_pager(); 1748 read_events(); 1749 print_bad_events(); 1750 } 1751 1752 static void __cmd_replay(void) 1753 { 1754 unsigned long i; 1755 1756 calibrate_run_measurement_overhead(); 1757 calibrate_sleep_measurement_overhead(); 1758 1759 test_calibrations(); 1760 1761 read_events(); 1762 1763 printf("nr_run_events: %ld\n", nr_run_events); 1764 printf("nr_sleep_events: %ld\n", nr_sleep_events); 1765 printf("nr_wakeup_events: %ld\n", nr_wakeup_events); 1766 1767 if (targetless_wakeups) 1768 printf("target-less wakeups: %ld\n", targetless_wakeups); 1769 if (multitarget_wakeups) 1770 printf("multi-target wakeups: %ld\n", multitarget_wakeups); 1771 if (nr_run_events_optimized) 1772 printf("run atoms optimized: %ld\n", 1773 nr_run_events_optimized); 1774 1775 print_task_traces(); 1776 add_cross_task_wakeups(); 1777 1778 create_tasks(); 1779 printf("------------------------------------------------------------\n"); 1780 for (i = 0; i < replay_repeat; i++) 1781 run_one_test(); 1782 } 1783 1784 1785 static const char * const sched_usage[] = { 1786 "perf sched [<options>] {record|latency|map|replay|trace}", 1787 NULL 1788 }; 1789 1790 static const struct option sched_options[] = { 1791 OPT_STRING('i', "input", &input_name, "file", 1792 "input file name"), 1793 OPT_BOOLEAN('v', "verbose", &verbose, 1794 "be more verbose (show symbol address, etc)"), 1795 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1796 "dump raw trace in ASCII"), 1797 OPT_END() 1798 }; 1799 1800 static const char * const latency_usage[] = { 1801 "perf sched latency [<options>]", 1802 NULL 1803 }; 1804 1805 static const struct option latency_options[] = { 1806 OPT_STRING('s', "sort", &sort_order, "key[,key2...]", 1807 "sort by key(s): runtime, switch, avg, max"), 1808 OPT_BOOLEAN('v', "verbose", &verbose, 1809 "be more verbose (show symbol address, etc)"), 1810 OPT_INTEGER('C', "CPU", &profile_cpu, 1811 "CPU to profile on"), 1812 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1813 "dump raw trace in ASCII"), 1814 OPT_END() 1815 }; 1816 1817 static const char * const replay_usage[] = { 1818 "perf sched replay [<options>]", 1819 NULL 1820 }; 1821 1822 static const struct option replay_options[] = { 1823 OPT_INTEGER('r', "repeat", &replay_repeat, 1824 "repeat the workload replay N times (-1: infinite)"), 1825 OPT_BOOLEAN('v', "verbose", &verbose, 1826 "be more verbose (show symbol address, etc)"), 1827 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1828 "dump raw trace in ASCII"), 1829 OPT_END() 1830 }; 1831 1832 static void setup_sorting(void) 1833 { 1834 char *tmp, *tok, *str = strdup(sort_order); 1835 1836 for (tok = strtok_r(str, ", ", &tmp); 1837 tok; tok = strtok_r(NULL, ", ", &tmp)) { 1838 if (sort_dimension__add(tok, &sort_list) < 0) { 1839 error("Unknown --sort key: `%s'", tok); 1840 usage_with_options(latency_usage, latency_options); 1841 } 1842 } 1843 1844 free(str); 1845 1846 sort_dimension__add("pid", &cmp_pid); 1847 } 1848 1849 static const char *record_args[] = { 1850 "record", 1851 "-a", 1852 "-R", 1853 "-M", 1854 "-f", 1855 "-m", "1024", 1856 "-c", "1", 1857 "-e", "sched:sched_switch:r", 1858 "-e", "sched:sched_stat_wait:r", 1859 "-e", "sched:sched_stat_sleep:r", 1860 "-e", "sched:sched_stat_iowait:r", 1861 "-e", "sched:sched_stat_runtime:r", 1862 "-e", "sched:sched_process_exit:r", 1863 "-e", "sched:sched_process_fork:r", 1864 "-e", "sched:sched_wakeup:r", 1865 "-e", "sched:sched_migrate_task:r", 1866 }; 1867 1868 static int __cmd_record(int argc, const char **argv) 1869 { 1870 unsigned int rec_argc, i, j; 1871 const char **rec_argv; 1872 1873 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 1874 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 1875 1876 for (i = 0; i < ARRAY_SIZE(record_args); i++) 1877 rec_argv[i] = strdup(record_args[i]); 1878 1879 for (j = 1; j < (unsigned int)argc; j++, i++) 1880 rec_argv[i] = argv[j]; 1881 1882 BUG_ON(i != rec_argc); 1883 1884 return cmd_record(i, rec_argv, NULL); 1885 } 1886 1887 int cmd_sched(int argc, const char **argv, const char *prefix __used) 1888 { 1889 argc = parse_options(argc, argv, sched_options, sched_usage, 1890 PARSE_OPT_STOP_AT_NON_OPTION); 1891 if (!argc) 1892 usage_with_options(sched_usage, sched_options); 1893 1894 /* 1895 * Aliased to 'perf trace' for now: 1896 */ 1897 if (!strcmp(argv[0], "trace")) 1898 return cmd_trace(argc, argv, prefix); 1899 1900 symbol__init(0); 1901 if (!strncmp(argv[0], "rec", 3)) { 1902 return __cmd_record(argc, argv); 1903 } else if (!strncmp(argv[0], "lat", 3)) { 1904 trace_handler = &lat_ops; 1905 if (argc > 1) { 1906 argc = parse_options(argc, argv, latency_options, latency_usage, 0); 1907 if (argc) 1908 usage_with_options(latency_usage, latency_options); 1909 } 1910 setup_sorting(); 1911 __cmd_lat(); 1912 } else if (!strcmp(argv[0], "map")) { 1913 trace_handler = &map_ops; 1914 setup_sorting(); 1915 __cmd_map(); 1916 } else if (!strncmp(argv[0], "rep", 3)) { 1917 trace_handler = &replay_ops; 1918 if (argc) { 1919 argc = parse_options(argc, argv, replay_options, replay_usage, 0); 1920 if (argc) 1921 usage_with_options(replay_usage, replay_options); 1922 } 1923 __cmd_replay(); 1924 } else { 1925 usage_with_options(sched_usage, sched_options); 1926 } 1927 1928 return 0; 1929 } 1930