1 #include "builtin.h" 2 #include "perf.h" 3 4 #include "util/util.h" 5 #include "util/evlist.h" 6 #include "util/cache.h" 7 #include "util/evsel.h" 8 #include "util/symbol.h" 9 #include "util/thread.h" 10 #include "util/header.h" 11 #include "util/session.h" 12 #include "util/tool.h" 13 #include "util/cloexec.h" 14 #include "util/thread_map.h" 15 #include "util/color.h" 16 #include "util/stat.h" 17 #include "util/callchain.h" 18 #include "util/time-utils.h" 19 20 #include <subcmd/parse-options.h> 21 #include "util/trace-event.h" 22 23 #include "util/debug.h" 24 25 #include <linux/log2.h> 26 #include <sys/prctl.h> 27 #include <sys/resource.h> 28 29 #include <semaphore.h> 30 #include <pthread.h> 31 #include <math.h> 32 #include <api/fs/fs.h> 33 #include <linux/time64.h> 34 35 #define PR_SET_NAME 15 /* Set process name */ 36 #define MAX_CPUS 4096 37 #define COMM_LEN 20 38 #define SYM_LEN 129 39 #define MAX_PID 1024000 40 41 struct sched_atom; 42 43 struct task_desc { 44 unsigned long nr; 45 unsigned long pid; 46 char comm[COMM_LEN]; 47 48 unsigned long nr_events; 49 unsigned long curr_event; 50 struct sched_atom **atoms; 51 52 pthread_t thread; 53 sem_t sleep_sem; 54 55 sem_t ready_for_work; 56 sem_t work_done_sem; 57 58 u64 cpu_usage; 59 }; 60 61 enum sched_event_type { 62 SCHED_EVENT_RUN, 63 SCHED_EVENT_SLEEP, 64 SCHED_EVENT_WAKEUP, 65 SCHED_EVENT_MIGRATION, 66 }; 67 68 struct sched_atom { 69 enum sched_event_type type; 70 int specific_wait; 71 u64 timestamp; 72 u64 duration; 73 unsigned long nr; 74 sem_t *wait_sem; 75 struct task_desc *wakee; 76 }; 77 78 #define TASK_STATE_TO_CHAR_STR "RSDTtZXxKWP" 79 80 /* task state bitmask, copied from include/linux/sched.h */ 81 #define TASK_RUNNING 0 82 #define TASK_INTERRUPTIBLE 1 83 #define TASK_UNINTERRUPTIBLE 2 84 #define __TASK_STOPPED 4 85 #define __TASK_TRACED 8 86 /* in tsk->exit_state */ 87 #define EXIT_DEAD 16 88 #define EXIT_ZOMBIE 32 89 #define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD) 90 /* in tsk->state again */ 91 #define TASK_DEAD 64 92 #define TASK_WAKEKILL 128 93 #define TASK_WAKING 256 94 #define TASK_PARKED 512 95 96 enum thread_state { 97 THREAD_SLEEPING = 0, 98 THREAD_WAIT_CPU, 99 THREAD_SCHED_IN, 100 THREAD_IGNORE 101 }; 102 103 struct work_atom { 104 struct list_head list; 105 enum thread_state state; 106 u64 sched_out_time; 107 u64 wake_up_time; 108 u64 sched_in_time; 109 u64 runtime; 110 }; 111 112 struct work_atoms { 113 struct list_head work_list; 114 struct thread *thread; 115 struct rb_node node; 116 u64 max_lat; 117 u64 max_lat_at; 118 u64 total_lat; 119 u64 nb_atoms; 120 u64 total_runtime; 121 int num_merged; 122 }; 123 124 typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *); 125 126 struct perf_sched; 127 128 struct trace_sched_handler { 129 int (*switch_event)(struct perf_sched *sched, struct perf_evsel *evsel, 130 struct perf_sample *sample, struct machine *machine); 131 132 int (*runtime_event)(struct perf_sched *sched, struct perf_evsel *evsel, 133 struct perf_sample *sample, struct machine *machine); 134 135 int (*wakeup_event)(struct perf_sched *sched, struct perf_evsel *evsel, 136 struct perf_sample *sample, struct machine *machine); 137 138 /* PERF_RECORD_FORK event, not sched_process_fork tracepoint */ 139 int (*fork_event)(struct perf_sched *sched, union perf_event *event, 140 struct machine *machine); 141 142 int (*migrate_task_event)(struct perf_sched *sched, 143 struct perf_evsel *evsel, 144 struct perf_sample *sample, 145 struct machine *machine); 146 }; 147 148 #define COLOR_PIDS PERF_COLOR_BLUE 149 #define COLOR_CPUS PERF_COLOR_BG_RED 150 151 struct perf_sched_map { 152 DECLARE_BITMAP(comp_cpus_mask, MAX_CPUS); 153 int *comp_cpus; 154 bool comp; 155 struct thread_map *color_pids; 156 const char *color_pids_str; 157 struct cpu_map *color_cpus; 158 const char *color_cpus_str; 159 struct cpu_map *cpus; 160 const char *cpus_str; 161 }; 162 163 struct perf_sched { 164 struct perf_tool tool; 165 const char *sort_order; 166 unsigned long nr_tasks; 167 struct task_desc **pid_to_task; 168 struct task_desc **tasks; 169 const struct trace_sched_handler *tp_handler; 170 pthread_mutex_t start_work_mutex; 171 pthread_mutex_t work_done_wait_mutex; 172 int profile_cpu; 173 /* 174 * Track the current task - that way we can know whether there's any 175 * weird events, such as a task being switched away that is not current. 176 */ 177 int max_cpu; 178 u32 curr_pid[MAX_CPUS]; 179 struct thread *curr_thread[MAX_CPUS]; 180 char next_shortname1; 181 char next_shortname2; 182 unsigned int replay_repeat; 183 unsigned long nr_run_events; 184 unsigned long nr_sleep_events; 185 unsigned long nr_wakeup_events; 186 unsigned long nr_sleep_corrections; 187 unsigned long nr_run_events_optimized; 188 unsigned long targetless_wakeups; 189 unsigned long multitarget_wakeups; 190 unsigned long nr_runs; 191 unsigned long nr_timestamps; 192 unsigned long nr_unordered_timestamps; 193 unsigned long nr_context_switch_bugs; 194 unsigned long nr_events; 195 unsigned long nr_lost_chunks; 196 unsigned long nr_lost_events; 197 u64 run_measurement_overhead; 198 u64 sleep_measurement_overhead; 199 u64 start_time; 200 u64 cpu_usage; 201 u64 runavg_cpu_usage; 202 u64 parent_cpu_usage; 203 u64 runavg_parent_cpu_usage; 204 u64 sum_runtime; 205 u64 sum_fluct; 206 u64 run_avg; 207 u64 all_runtime; 208 u64 all_count; 209 u64 cpu_last_switched[MAX_CPUS]; 210 struct rb_root atom_root, sorted_atom_root, merged_atom_root; 211 struct list_head sort_list, cmp_pid; 212 bool force; 213 bool skip_merge; 214 struct perf_sched_map map; 215 216 /* options for timehist command */ 217 bool summary; 218 bool summary_only; 219 bool idle_hist; 220 bool show_callchain; 221 unsigned int max_stack; 222 bool show_cpu_visual; 223 bool show_wakeups; 224 bool show_migrations; 225 bool show_state; 226 u64 skipped_samples; 227 const char *time_str; 228 struct perf_time_interval ptime; 229 struct perf_time_interval hist_time; 230 }; 231 232 /* per thread run time data */ 233 struct thread_runtime { 234 u64 last_time; /* time of previous sched in/out event */ 235 u64 dt_run; /* run time */ 236 u64 dt_sleep; /* time between CPU access by sleep (off cpu) */ 237 u64 dt_iowait; /* time between CPU access by iowait (off cpu) */ 238 u64 dt_preempt; /* time between CPU access by preempt (off cpu) */ 239 u64 dt_delay; /* time between wakeup and sched-in */ 240 u64 ready_to_run; /* time of wakeup */ 241 242 struct stats run_stats; 243 u64 total_run_time; 244 u64 total_sleep_time; 245 u64 total_iowait_time; 246 u64 total_preempt_time; 247 u64 total_delay_time; 248 249 int last_state; 250 u64 migrations; 251 }; 252 253 /* per event run time data */ 254 struct evsel_runtime { 255 u64 *last_time; /* time this event was last seen per cpu */ 256 u32 ncpu; /* highest cpu slot allocated */ 257 }; 258 259 /* per cpu idle time data */ 260 struct idle_thread_runtime { 261 struct thread_runtime tr; 262 struct thread *last_thread; 263 struct rb_root sorted_root; 264 struct callchain_root callchain; 265 struct callchain_cursor cursor; 266 }; 267 268 /* track idle times per cpu */ 269 static struct thread **idle_threads; 270 static int idle_max_cpu; 271 static char idle_comm[] = "<idle>"; 272 273 static u64 get_nsecs(void) 274 { 275 struct timespec ts; 276 277 clock_gettime(CLOCK_MONOTONIC, &ts); 278 279 return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; 280 } 281 282 static void burn_nsecs(struct perf_sched *sched, u64 nsecs) 283 { 284 u64 T0 = get_nsecs(), T1; 285 286 do { 287 T1 = get_nsecs(); 288 } while (T1 + sched->run_measurement_overhead < T0 + nsecs); 289 } 290 291 static void sleep_nsecs(u64 nsecs) 292 { 293 struct timespec ts; 294 295 ts.tv_nsec = nsecs % 999999999; 296 ts.tv_sec = nsecs / 999999999; 297 298 nanosleep(&ts, NULL); 299 } 300 301 static void calibrate_run_measurement_overhead(struct perf_sched *sched) 302 { 303 u64 T0, T1, delta, min_delta = NSEC_PER_SEC; 304 int i; 305 306 for (i = 0; i < 10; i++) { 307 T0 = get_nsecs(); 308 burn_nsecs(sched, 0); 309 T1 = get_nsecs(); 310 delta = T1-T0; 311 min_delta = min(min_delta, delta); 312 } 313 sched->run_measurement_overhead = min_delta; 314 315 printf("run measurement overhead: %" PRIu64 " nsecs\n", min_delta); 316 } 317 318 static void calibrate_sleep_measurement_overhead(struct perf_sched *sched) 319 { 320 u64 T0, T1, delta, min_delta = NSEC_PER_SEC; 321 int i; 322 323 for (i = 0; i < 10; i++) { 324 T0 = get_nsecs(); 325 sleep_nsecs(10000); 326 T1 = get_nsecs(); 327 delta = T1-T0; 328 min_delta = min(min_delta, delta); 329 } 330 min_delta -= 10000; 331 sched->sleep_measurement_overhead = min_delta; 332 333 printf("sleep measurement overhead: %" PRIu64 " nsecs\n", min_delta); 334 } 335 336 static struct sched_atom * 337 get_new_event(struct task_desc *task, u64 timestamp) 338 { 339 struct sched_atom *event = zalloc(sizeof(*event)); 340 unsigned long idx = task->nr_events; 341 size_t size; 342 343 event->timestamp = timestamp; 344 event->nr = idx; 345 346 task->nr_events++; 347 size = sizeof(struct sched_atom *) * task->nr_events; 348 task->atoms = realloc(task->atoms, size); 349 BUG_ON(!task->atoms); 350 351 task->atoms[idx] = event; 352 353 return event; 354 } 355 356 static struct sched_atom *last_event(struct task_desc *task) 357 { 358 if (!task->nr_events) 359 return NULL; 360 361 return task->atoms[task->nr_events - 1]; 362 } 363 364 static void add_sched_event_run(struct perf_sched *sched, struct task_desc *task, 365 u64 timestamp, u64 duration) 366 { 367 struct sched_atom *event, *curr_event = last_event(task); 368 369 /* 370 * optimize an existing RUN event by merging this one 371 * to it: 372 */ 373 if (curr_event && curr_event->type == SCHED_EVENT_RUN) { 374 sched->nr_run_events_optimized++; 375 curr_event->duration += duration; 376 return; 377 } 378 379 event = get_new_event(task, timestamp); 380 381 event->type = SCHED_EVENT_RUN; 382 event->duration = duration; 383 384 sched->nr_run_events++; 385 } 386 387 static void add_sched_event_wakeup(struct perf_sched *sched, struct task_desc *task, 388 u64 timestamp, struct task_desc *wakee) 389 { 390 struct sched_atom *event, *wakee_event; 391 392 event = get_new_event(task, timestamp); 393 event->type = SCHED_EVENT_WAKEUP; 394 event->wakee = wakee; 395 396 wakee_event = last_event(wakee); 397 if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) { 398 sched->targetless_wakeups++; 399 return; 400 } 401 if (wakee_event->wait_sem) { 402 sched->multitarget_wakeups++; 403 return; 404 } 405 406 wakee_event->wait_sem = zalloc(sizeof(*wakee_event->wait_sem)); 407 sem_init(wakee_event->wait_sem, 0, 0); 408 wakee_event->specific_wait = 1; 409 event->wait_sem = wakee_event->wait_sem; 410 411 sched->nr_wakeup_events++; 412 } 413 414 static void add_sched_event_sleep(struct perf_sched *sched, struct task_desc *task, 415 u64 timestamp, u64 task_state __maybe_unused) 416 { 417 struct sched_atom *event = get_new_event(task, timestamp); 418 419 event->type = SCHED_EVENT_SLEEP; 420 421 sched->nr_sleep_events++; 422 } 423 424 static struct task_desc *register_pid(struct perf_sched *sched, 425 unsigned long pid, const char *comm) 426 { 427 struct task_desc *task; 428 static int pid_max; 429 430 if (sched->pid_to_task == NULL) { 431 if (sysctl__read_int("kernel/pid_max", &pid_max) < 0) 432 pid_max = MAX_PID; 433 BUG_ON((sched->pid_to_task = calloc(pid_max, sizeof(struct task_desc *))) == NULL); 434 } 435 if (pid >= (unsigned long)pid_max) { 436 BUG_ON((sched->pid_to_task = realloc(sched->pid_to_task, (pid + 1) * 437 sizeof(struct task_desc *))) == NULL); 438 while (pid >= (unsigned long)pid_max) 439 sched->pid_to_task[pid_max++] = NULL; 440 } 441 442 task = sched->pid_to_task[pid]; 443 444 if (task) 445 return task; 446 447 task = zalloc(sizeof(*task)); 448 task->pid = pid; 449 task->nr = sched->nr_tasks; 450 strcpy(task->comm, comm); 451 /* 452 * every task starts in sleeping state - this gets ignored 453 * if there's no wakeup pointing to this sleep state: 454 */ 455 add_sched_event_sleep(sched, task, 0, 0); 456 457 sched->pid_to_task[pid] = task; 458 sched->nr_tasks++; 459 sched->tasks = realloc(sched->tasks, sched->nr_tasks * sizeof(struct task_desc *)); 460 BUG_ON(!sched->tasks); 461 sched->tasks[task->nr] = task; 462 463 if (verbose > 0) 464 printf("registered task #%ld, PID %ld (%s)\n", sched->nr_tasks, pid, comm); 465 466 return task; 467 } 468 469 470 static void print_task_traces(struct perf_sched *sched) 471 { 472 struct task_desc *task; 473 unsigned long i; 474 475 for (i = 0; i < sched->nr_tasks; i++) { 476 task = sched->tasks[i]; 477 printf("task %6ld (%20s:%10ld), nr_events: %ld\n", 478 task->nr, task->comm, task->pid, task->nr_events); 479 } 480 } 481 482 static void add_cross_task_wakeups(struct perf_sched *sched) 483 { 484 struct task_desc *task1, *task2; 485 unsigned long i, j; 486 487 for (i = 0; i < sched->nr_tasks; i++) { 488 task1 = sched->tasks[i]; 489 j = i + 1; 490 if (j == sched->nr_tasks) 491 j = 0; 492 task2 = sched->tasks[j]; 493 add_sched_event_wakeup(sched, task1, 0, task2); 494 } 495 } 496 497 static void perf_sched__process_event(struct perf_sched *sched, 498 struct sched_atom *atom) 499 { 500 int ret = 0; 501 502 switch (atom->type) { 503 case SCHED_EVENT_RUN: 504 burn_nsecs(sched, atom->duration); 505 break; 506 case SCHED_EVENT_SLEEP: 507 if (atom->wait_sem) 508 ret = sem_wait(atom->wait_sem); 509 BUG_ON(ret); 510 break; 511 case SCHED_EVENT_WAKEUP: 512 if (atom->wait_sem) 513 ret = sem_post(atom->wait_sem); 514 BUG_ON(ret); 515 break; 516 case SCHED_EVENT_MIGRATION: 517 break; 518 default: 519 BUG_ON(1); 520 } 521 } 522 523 static u64 get_cpu_usage_nsec_parent(void) 524 { 525 struct rusage ru; 526 u64 sum; 527 int err; 528 529 err = getrusage(RUSAGE_SELF, &ru); 530 BUG_ON(err); 531 532 sum = ru.ru_utime.tv_sec * NSEC_PER_SEC + ru.ru_utime.tv_usec * NSEC_PER_USEC; 533 sum += ru.ru_stime.tv_sec * NSEC_PER_SEC + ru.ru_stime.tv_usec * NSEC_PER_USEC; 534 535 return sum; 536 } 537 538 static int self_open_counters(struct perf_sched *sched, unsigned long cur_task) 539 { 540 struct perf_event_attr attr; 541 char sbuf[STRERR_BUFSIZE], info[STRERR_BUFSIZE]; 542 int fd; 543 struct rlimit limit; 544 bool need_privilege = false; 545 546 memset(&attr, 0, sizeof(attr)); 547 548 attr.type = PERF_TYPE_SOFTWARE; 549 attr.config = PERF_COUNT_SW_TASK_CLOCK; 550 551 force_again: 552 fd = sys_perf_event_open(&attr, 0, -1, -1, 553 perf_event_open_cloexec_flag()); 554 555 if (fd < 0) { 556 if (errno == EMFILE) { 557 if (sched->force) { 558 BUG_ON(getrlimit(RLIMIT_NOFILE, &limit) == -1); 559 limit.rlim_cur += sched->nr_tasks - cur_task; 560 if (limit.rlim_cur > limit.rlim_max) { 561 limit.rlim_max = limit.rlim_cur; 562 need_privilege = true; 563 } 564 if (setrlimit(RLIMIT_NOFILE, &limit) == -1) { 565 if (need_privilege && errno == EPERM) 566 strcpy(info, "Need privilege\n"); 567 } else 568 goto force_again; 569 } else 570 strcpy(info, "Have a try with -f option\n"); 571 } 572 pr_err("Error: sys_perf_event_open() syscall returned " 573 "with %d (%s)\n%s", fd, 574 str_error_r(errno, sbuf, sizeof(sbuf)), info); 575 exit(EXIT_FAILURE); 576 } 577 return fd; 578 } 579 580 static u64 get_cpu_usage_nsec_self(int fd) 581 { 582 u64 runtime; 583 int ret; 584 585 ret = read(fd, &runtime, sizeof(runtime)); 586 BUG_ON(ret != sizeof(runtime)); 587 588 return runtime; 589 } 590 591 struct sched_thread_parms { 592 struct task_desc *task; 593 struct perf_sched *sched; 594 int fd; 595 }; 596 597 static void *thread_func(void *ctx) 598 { 599 struct sched_thread_parms *parms = ctx; 600 struct task_desc *this_task = parms->task; 601 struct perf_sched *sched = parms->sched; 602 u64 cpu_usage_0, cpu_usage_1; 603 unsigned long i, ret; 604 char comm2[22]; 605 int fd = parms->fd; 606 607 zfree(&parms); 608 609 sprintf(comm2, ":%s", this_task->comm); 610 prctl(PR_SET_NAME, comm2); 611 if (fd < 0) 612 return NULL; 613 again: 614 ret = sem_post(&this_task->ready_for_work); 615 BUG_ON(ret); 616 ret = pthread_mutex_lock(&sched->start_work_mutex); 617 BUG_ON(ret); 618 ret = pthread_mutex_unlock(&sched->start_work_mutex); 619 BUG_ON(ret); 620 621 cpu_usage_0 = get_cpu_usage_nsec_self(fd); 622 623 for (i = 0; i < this_task->nr_events; i++) { 624 this_task->curr_event = i; 625 perf_sched__process_event(sched, this_task->atoms[i]); 626 } 627 628 cpu_usage_1 = get_cpu_usage_nsec_self(fd); 629 this_task->cpu_usage = cpu_usage_1 - cpu_usage_0; 630 ret = sem_post(&this_task->work_done_sem); 631 BUG_ON(ret); 632 633 ret = pthread_mutex_lock(&sched->work_done_wait_mutex); 634 BUG_ON(ret); 635 ret = pthread_mutex_unlock(&sched->work_done_wait_mutex); 636 BUG_ON(ret); 637 638 goto again; 639 } 640 641 static void create_tasks(struct perf_sched *sched) 642 { 643 struct task_desc *task; 644 pthread_attr_t attr; 645 unsigned long i; 646 int err; 647 648 err = pthread_attr_init(&attr); 649 BUG_ON(err); 650 err = pthread_attr_setstacksize(&attr, 651 (size_t) max(16 * 1024, PTHREAD_STACK_MIN)); 652 BUG_ON(err); 653 err = pthread_mutex_lock(&sched->start_work_mutex); 654 BUG_ON(err); 655 err = pthread_mutex_lock(&sched->work_done_wait_mutex); 656 BUG_ON(err); 657 for (i = 0; i < sched->nr_tasks; i++) { 658 struct sched_thread_parms *parms = malloc(sizeof(*parms)); 659 BUG_ON(parms == NULL); 660 parms->task = task = sched->tasks[i]; 661 parms->sched = sched; 662 parms->fd = self_open_counters(sched, i); 663 sem_init(&task->sleep_sem, 0, 0); 664 sem_init(&task->ready_for_work, 0, 0); 665 sem_init(&task->work_done_sem, 0, 0); 666 task->curr_event = 0; 667 err = pthread_create(&task->thread, &attr, thread_func, parms); 668 BUG_ON(err); 669 } 670 } 671 672 static void wait_for_tasks(struct perf_sched *sched) 673 { 674 u64 cpu_usage_0, cpu_usage_1; 675 struct task_desc *task; 676 unsigned long i, ret; 677 678 sched->start_time = get_nsecs(); 679 sched->cpu_usage = 0; 680 pthread_mutex_unlock(&sched->work_done_wait_mutex); 681 682 for (i = 0; i < sched->nr_tasks; i++) { 683 task = sched->tasks[i]; 684 ret = sem_wait(&task->ready_for_work); 685 BUG_ON(ret); 686 sem_init(&task->ready_for_work, 0, 0); 687 } 688 ret = pthread_mutex_lock(&sched->work_done_wait_mutex); 689 BUG_ON(ret); 690 691 cpu_usage_0 = get_cpu_usage_nsec_parent(); 692 693 pthread_mutex_unlock(&sched->start_work_mutex); 694 695 for (i = 0; i < sched->nr_tasks; i++) { 696 task = sched->tasks[i]; 697 ret = sem_wait(&task->work_done_sem); 698 BUG_ON(ret); 699 sem_init(&task->work_done_sem, 0, 0); 700 sched->cpu_usage += task->cpu_usage; 701 task->cpu_usage = 0; 702 } 703 704 cpu_usage_1 = get_cpu_usage_nsec_parent(); 705 if (!sched->runavg_cpu_usage) 706 sched->runavg_cpu_usage = sched->cpu_usage; 707 sched->runavg_cpu_usage = (sched->runavg_cpu_usage * (sched->replay_repeat - 1) + sched->cpu_usage) / sched->replay_repeat; 708 709 sched->parent_cpu_usage = cpu_usage_1 - cpu_usage_0; 710 if (!sched->runavg_parent_cpu_usage) 711 sched->runavg_parent_cpu_usage = sched->parent_cpu_usage; 712 sched->runavg_parent_cpu_usage = (sched->runavg_parent_cpu_usage * (sched->replay_repeat - 1) + 713 sched->parent_cpu_usage)/sched->replay_repeat; 714 715 ret = pthread_mutex_lock(&sched->start_work_mutex); 716 BUG_ON(ret); 717 718 for (i = 0; i < sched->nr_tasks; i++) { 719 task = sched->tasks[i]; 720 sem_init(&task->sleep_sem, 0, 0); 721 task->curr_event = 0; 722 } 723 } 724 725 static void run_one_test(struct perf_sched *sched) 726 { 727 u64 T0, T1, delta, avg_delta, fluct; 728 729 T0 = get_nsecs(); 730 wait_for_tasks(sched); 731 T1 = get_nsecs(); 732 733 delta = T1 - T0; 734 sched->sum_runtime += delta; 735 sched->nr_runs++; 736 737 avg_delta = sched->sum_runtime / sched->nr_runs; 738 if (delta < avg_delta) 739 fluct = avg_delta - delta; 740 else 741 fluct = delta - avg_delta; 742 sched->sum_fluct += fluct; 743 if (!sched->run_avg) 744 sched->run_avg = delta; 745 sched->run_avg = (sched->run_avg * (sched->replay_repeat - 1) + delta) / sched->replay_repeat; 746 747 printf("#%-3ld: %0.3f, ", sched->nr_runs, (double)delta / NSEC_PER_MSEC); 748 749 printf("ravg: %0.2f, ", (double)sched->run_avg / NSEC_PER_MSEC); 750 751 printf("cpu: %0.2f / %0.2f", 752 (double)sched->cpu_usage / NSEC_PER_MSEC, (double)sched->runavg_cpu_usage / NSEC_PER_MSEC); 753 754 #if 0 755 /* 756 * rusage statistics done by the parent, these are less 757 * accurate than the sched->sum_exec_runtime based statistics: 758 */ 759 printf(" [%0.2f / %0.2f]", 760 (double)sched->parent_cpu_usage / NSEC_PER_MSEC, 761 (double)sched->runavg_parent_cpu_usage / NSEC_PER_MSEC); 762 #endif 763 764 printf("\n"); 765 766 if (sched->nr_sleep_corrections) 767 printf(" (%ld sleep corrections)\n", sched->nr_sleep_corrections); 768 sched->nr_sleep_corrections = 0; 769 } 770 771 static void test_calibrations(struct perf_sched *sched) 772 { 773 u64 T0, T1; 774 775 T0 = get_nsecs(); 776 burn_nsecs(sched, NSEC_PER_MSEC); 777 T1 = get_nsecs(); 778 779 printf("the run test took %" PRIu64 " nsecs\n", T1 - T0); 780 781 T0 = get_nsecs(); 782 sleep_nsecs(NSEC_PER_MSEC); 783 T1 = get_nsecs(); 784 785 printf("the sleep test took %" PRIu64 " nsecs\n", T1 - T0); 786 } 787 788 static int 789 replay_wakeup_event(struct perf_sched *sched, 790 struct perf_evsel *evsel, struct perf_sample *sample, 791 struct machine *machine __maybe_unused) 792 { 793 const char *comm = perf_evsel__strval(evsel, sample, "comm"); 794 const u32 pid = perf_evsel__intval(evsel, sample, "pid"); 795 struct task_desc *waker, *wakee; 796 797 if (verbose > 0) { 798 printf("sched_wakeup event %p\n", evsel); 799 800 printf(" ... pid %d woke up %s/%d\n", sample->tid, comm, pid); 801 } 802 803 waker = register_pid(sched, sample->tid, "<unknown>"); 804 wakee = register_pid(sched, pid, comm); 805 806 add_sched_event_wakeup(sched, waker, sample->time, wakee); 807 return 0; 808 } 809 810 static int replay_switch_event(struct perf_sched *sched, 811 struct perf_evsel *evsel, 812 struct perf_sample *sample, 813 struct machine *machine __maybe_unused) 814 { 815 const char *prev_comm = perf_evsel__strval(evsel, sample, "prev_comm"), 816 *next_comm = perf_evsel__strval(evsel, sample, "next_comm"); 817 const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"), 818 next_pid = perf_evsel__intval(evsel, sample, "next_pid"); 819 const u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state"); 820 struct task_desc *prev, __maybe_unused *next; 821 u64 timestamp0, timestamp = sample->time; 822 int cpu = sample->cpu; 823 s64 delta; 824 825 if (verbose > 0) 826 printf("sched_switch event %p\n", evsel); 827 828 if (cpu >= MAX_CPUS || cpu < 0) 829 return 0; 830 831 timestamp0 = sched->cpu_last_switched[cpu]; 832 if (timestamp0) 833 delta = timestamp - timestamp0; 834 else 835 delta = 0; 836 837 if (delta < 0) { 838 pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta); 839 return -1; 840 } 841 842 pr_debug(" ... switch from %s/%d to %s/%d [ran %" PRIu64 " nsecs]\n", 843 prev_comm, prev_pid, next_comm, next_pid, delta); 844 845 prev = register_pid(sched, prev_pid, prev_comm); 846 next = register_pid(sched, next_pid, next_comm); 847 848 sched->cpu_last_switched[cpu] = timestamp; 849 850 add_sched_event_run(sched, prev, timestamp, delta); 851 add_sched_event_sleep(sched, prev, timestamp, prev_state); 852 853 return 0; 854 } 855 856 static int replay_fork_event(struct perf_sched *sched, 857 union perf_event *event, 858 struct machine *machine) 859 { 860 struct thread *child, *parent; 861 862 child = machine__findnew_thread(machine, event->fork.pid, 863 event->fork.tid); 864 parent = machine__findnew_thread(machine, event->fork.ppid, 865 event->fork.ptid); 866 867 if (child == NULL || parent == NULL) { 868 pr_debug("thread does not exist on fork event: child %p, parent %p\n", 869 child, parent); 870 goto out_put; 871 } 872 873 if (verbose > 0) { 874 printf("fork event\n"); 875 printf("... parent: %s/%d\n", thread__comm_str(parent), parent->tid); 876 printf("... child: %s/%d\n", thread__comm_str(child), child->tid); 877 } 878 879 register_pid(sched, parent->tid, thread__comm_str(parent)); 880 register_pid(sched, child->tid, thread__comm_str(child)); 881 out_put: 882 thread__put(child); 883 thread__put(parent); 884 return 0; 885 } 886 887 struct sort_dimension { 888 const char *name; 889 sort_fn_t cmp; 890 struct list_head list; 891 }; 892 893 static int 894 thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r) 895 { 896 struct sort_dimension *sort; 897 int ret = 0; 898 899 BUG_ON(list_empty(list)); 900 901 list_for_each_entry(sort, list, list) { 902 ret = sort->cmp(l, r); 903 if (ret) 904 return ret; 905 } 906 907 return ret; 908 } 909 910 static struct work_atoms * 911 thread_atoms_search(struct rb_root *root, struct thread *thread, 912 struct list_head *sort_list) 913 { 914 struct rb_node *node = root->rb_node; 915 struct work_atoms key = { .thread = thread }; 916 917 while (node) { 918 struct work_atoms *atoms; 919 int cmp; 920 921 atoms = container_of(node, struct work_atoms, node); 922 923 cmp = thread_lat_cmp(sort_list, &key, atoms); 924 if (cmp > 0) 925 node = node->rb_left; 926 else if (cmp < 0) 927 node = node->rb_right; 928 else { 929 BUG_ON(thread != atoms->thread); 930 return atoms; 931 } 932 } 933 return NULL; 934 } 935 936 static void 937 __thread_latency_insert(struct rb_root *root, struct work_atoms *data, 938 struct list_head *sort_list) 939 { 940 struct rb_node **new = &(root->rb_node), *parent = NULL; 941 942 while (*new) { 943 struct work_atoms *this; 944 int cmp; 945 946 this = container_of(*new, struct work_atoms, node); 947 parent = *new; 948 949 cmp = thread_lat_cmp(sort_list, data, this); 950 951 if (cmp > 0) 952 new = &((*new)->rb_left); 953 else 954 new = &((*new)->rb_right); 955 } 956 957 rb_link_node(&data->node, parent, new); 958 rb_insert_color(&data->node, root); 959 } 960 961 static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread) 962 { 963 struct work_atoms *atoms = zalloc(sizeof(*atoms)); 964 if (!atoms) { 965 pr_err("No memory at %s\n", __func__); 966 return -1; 967 } 968 969 atoms->thread = thread__get(thread); 970 INIT_LIST_HEAD(&atoms->work_list); 971 __thread_latency_insert(&sched->atom_root, atoms, &sched->cmp_pid); 972 return 0; 973 } 974 975 static char sched_out_state(u64 prev_state) 976 { 977 const char *str = TASK_STATE_TO_CHAR_STR; 978 979 return str[prev_state]; 980 } 981 982 static int 983 add_sched_out_event(struct work_atoms *atoms, 984 char run_state, 985 u64 timestamp) 986 { 987 struct work_atom *atom = zalloc(sizeof(*atom)); 988 if (!atom) { 989 pr_err("Non memory at %s", __func__); 990 return -1; 991 } 992 993 atom->sched_out_time = timestamp; 994 995 if (run_state == 'R') { 996 atom->state = THREAD_WAIT_CPU; 997 atom->wake_up_time = atom->sched_out_time; 998 } 999 1000 list_add_tail(&atom->list, &atoms->work_list); 1001 return 0; 1002 } 1003 1004 static void 1005 add_runtime_event(struct work_atoms *atoms, u64 delta, 1006 u64 timestamp __maybe_unused) 1007 { 1008 struct work_atom *atom; 1009 1010 BUG_ON(list_empty(&atoms->work_list)); 1011 1012 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1013 1014 atom->runtime += delta; 1015 atoms->total_runtime += delta; 1016 } 1017 1018 static void 1019 add_sched_in_event(struct work_atoms *atoms, u64 timestamp) 1020 { 1021 struct work_atom *atom; 1022 u64 delta; 1023 1024 if (list_empty(&atoms->work_list)) 1025 return; 1026 1027 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1028 1029 if (atom->state != THREAD_WAIT_CPU) 1030 return; 1031 1032 if (timestamp < atom->wake_up_time) { 1033 atom->state = THREAD_IGNORE; 1034 return; 1035 } 1036 1037 atom->state = THREAD_SCHED_IN; 1038 atom->sched_in_time = timestamp; 1039 1040 delta = atom->sched_in_time - atom->wake_up_time; 1041 atoms->total_lat += delta; 1042 if (delta > atoms->max_lat) { 1043 atoms->max_lat = delta; 1044 atoms->max_lat_at = timestamp; 1045 } 1046 atoms->nb_atoms++; 1047 } 1048 1049 static int latency_switch_event(struct perf_sched *sched, 1050 struct perf_evsel *evsel, 1051 struct perf_sample *sample, 1052 struct machine *machine) 1053 { 1054 const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"), 1055 next_pid = perf_evsel__intval(evsel, sample, "next_pid"); 1056 const u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state"); 1057 struct work_atoms *out_events, *in_events; 1058 struct thread *sched_out, *sched_in; 1059 u64 timestamp0, timestamp = sample->time; 1060 int cpu = sample->cpu, err = -1; 1061 s64 delta; 1062 1063 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1064 1065 timestamp0 = sched->cpu_last_switched[cpu]; 1066 sched->cpu_last_switched[cpu] = timestamp; 1067 if (timestamp0) 1068 delta = timestamp - timestamp0; 1069 else 1070 delta = 0; 1071 1072 if (delta < 0) { 1073 pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1074 return -1; 1075 } 1076 1077 sched_out = machine__findnew_thread(machine, -1, prev_pid); 1078 sched_in = machine__findnew_thread(machine, -1, next_pid); 1079 if (sched_out == NULL || sched_in == NULL) 1080 goto out_put; 1081 1082 out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid); 1083 if (!out_events) { 1084 if (thread_atoms_insert(sched, sched_out)) 1085 goto out_put; 1086 out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid); 1087 if (!out_events) { 1088 pr_err("out-event: Internal tree error"); 1089 goto out_put; 1090 } 1091 } 1092 if (add_sched_out_event(out_events, sched_out_state(prev_state), timestamp)) 1093 return -1; 1094 1095 in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid); 1096 if (!in_events) { 1097 if (thread_atoms_insert(sched, sched_in)) 1098 goto out_put; 1099 in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid); 1100 if (!in_events) { 1101 pr_err("in-event: Internal tree error"); 1102 goto out_put; 1103 } 1104 /* 1105 * Take came in we have not heard about yet, 1106 * add in an initial atom in runnable state: 1107 */ 1108 if (add_sched_out_event(in_events, 'R', timestamp)) 1109 goto out_put; 1110 } 1111 add_sched_in_event(in_events, timestamp); 1112 err = 0; 1113 out_put: 1114 thread__put(sched_out); 1115 thread__put(sched_in); 1116 return err; 1117 } 1118 1119 static int latency_runtime_event(struct perf_sched *sched, 1120 struct perf_evsel *evsel, 1121 struct perf_sample *sample, 1122 struct machine *machine) 1123 { 1124 const u32 pid = perf_evsel__intval(evsel, sample, "pid"); 1125 const u64 runtime = perf_evsel__intval(evsel, sample, "runtime"); 1126 struct thread *thread = machine__findnew_thread(machine, -1, pid); 1127 struct work_atoms *atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); 1128 u64 timestamp = sample->time; 1129 int cpu = sample->cpu, err = -1; 1130 1131 if (thread == NULL) 1132 return -1; 1133 1134 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1135 if (!atoms) { 1136 if (thread_atoms_insert(sched, thread)) 1137 goto out_put; 1138 atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); 1139 if (!atoms) { 1140 pr_err("in-event: Internal tree error"); 1141 goto out_put; 1142 } 1143 if (add_sched_out_event(atoms, 'R', timestamp)) 1144 goto out_put; 1145 } 1146 1147 add_runtime_event(atoms, runtime, timestamp); 1148 err = 0; 1149 out_put: 1150 thread__put(thread); 1151 return err; 1152 } 1153 1154 static int latency_wakeup_event(struct perf_sched *sched, 1155 struct perf_evsel *evsel, 1156 struct perf_sample *sample, 1157 struct machine *machine) 1158 { 1159 const u32 pid = perf_evsel__intval(evsel, sample, "pid"); 1160 struct work_atoms *atoms; 1161 struct work_atom *atom; 1162 struct thread *wakee; 1163 u64 timestamp = sample->time; 1164 int err = -1; 1165 1166 wakee = machine__findnew_thread(machine, -1, pid); 1167 if (wakee == NULL) 1168 return -1; 1169 atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid); 1170 if (!atoms) { 1171 if (thread_atoms_insert(sched, wakee)) 1172 goto out_put; 1173 atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid); 1174 if (!atoms) { 1175 pr_err("wakeup-event: Internal tree error"); 1176 goto out_put; 1177 } 1178 if (add_sched_out_event(atoms, 'S', timestamp)) 1179 goto out_put; 1180 } 1181 1182 BUG_ON(list_empty(&atoms->work_list)); 1183 1184 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1185 1186 /* 1187 * As we do not guarantee the wakeup event happens when 1188 * task is out of run queue, also may happen when task is 1189 * on run queue and wakeup only change ->state to TASK_RUNNING, 1190 * then we should not set the ->wake_up_time when wake up a 1191 * task which is on run queue. 1192 * 1193 * You WILL be missing events if you've recorded only 1194 * one CPU, or are only looking at only one, so don't 1195 * skip in this case. 1196 */ 1197 if (sched->profile_cpu == -1 && atom->state != THREAD_SLEEPING) 1198 goto out_ok; 1199 1200 sched->nr_timestamps++; 1201 if (atom->sched_out_time > timestamp) { 1202 sched->nr_unordered_timestamps++; 1203 goto out_ok; 1204 } 1205 1206 atom->state = THREAD_WAIT_CPU; 1207 atom->wake_up_time = timestamp; 1208 out_ok: 1209 err = 0; 1210 out_put: 1211 thread__put(wakee); 1212 return err; 1213 } 1214 1215 static int latency_migrate_task_event(struct perf_sched *sched, 1216 struct perf_evsel *evsel, 1217 struct perf_sample *sample, 1218 struct machine *machine) 1219 { 1220 const u32 pid = perf_evsel__intval(evsel, sample, "pid"); 1221 u64 timestamp = sample->time; 1222 struct work_atoms *atoms; 1223 struct work_atom *atom; 1224 struct thread *migrant; 1225 int err = -1; 1226 1227 /* 1228 * Only need to worry about migration when profiling one CPU. 1229 */ 1230 if (sched->profile_cpu == -1) 1231 return 0; 1232 1233 migrant = machine__findnew_thread(machine, -1, pid); 1234 if (migrant == NULL) 1235 return -1; 1236 atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid); 1237 if (!atoms) { 1238 if (thread_atoms_insert(sched, migrant)) 1239 goto out_put; 1240 register_pid(sched, migrant->tid, thread__comm_str(migrant)); 1241 atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid); 1242 if (!atoms) { 1243 pr_err("migration-event: Internal tree error"); 1244 goto out_put; 1245 } 1246 if (add_sched_out_event(atoms, 'R', timestamp)) 1247 goto out_put; 1248 } 1249 1250 BUG_ON(list_empty(&atoms->work_list)); 1251 1252 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1253 atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp; 1254 1255 sched->nr_timestamps++; 1256 1257 if (atom->sched_out_time > timestamp) 1258 sched->nr_unordered_timestamps++; 1259 err = 0; 1260 out_put: 1261 thread__put(migrant); 1262 return err; 1263 } 1264 1265 static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_list) 1266 { 1267 int i; 1268 int ret; 1269 u64 avg; 1270 char max_lat_at[32]; 1271 1272 if (!work_list->nb_atoms) 1273 return; 1274 /* 1275 * Ignore idle threads: 1276 */ 1277 if (!strcmp(thread__comm_str(work_list->thread), "swapper")) 1278 return; 1279 1280 sched->all_runtime += work_list->total_runtime; 1281 sched->all_count += work_list->nb_atoms; 1282 1283 if (work_list->num_merged > 1) 1284 ret = printf(" %s:(%d) ", thread__comm_str(work_list->thread), work_list->num_merged); 1285 else 1286 ret = printf(" %s:%d ", thread__comm_str(work_list->thread), work_list->thread->tid); 1287 1288 for (i = 0; i < 24 - ret; i++) 1289 printf(" "); 1290 1291 avg = work_list->total_lat / work_list->nb_atoms; 1292 timestamp__scnprintf_usec(work_list->max_lat_at, max_lat_at, sizeof(max_lat_at)); 1293 1294 printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13s s\n", 1295 (double)work_list->total_runtime / NSEC_PER_MSEC, 1296 work_list->nb_atoms, (double)avg / NSEC_PER_MSEC, 1297 (double)work_list->max_lat / NSEC_PER_MSEC, 1298 max_lat_at); 1299 } 1300 1301 static int pid_cmp(struct work_atoms *l, struct work_atoms *r) 1302 { 1303 if (l->thread == r->thread) 1304 return 0; 1305 if (l->thread->tid < r->thread->tid) 1306 return -1; 1307 if (l->thread->tid > r->thread->tid) 1308 return 1; 1309 return (int)(l->thread - r->thread); 1310 } 1311 1312 static int avg_cmp(struct work_atoms *l, struct work_atoms *r) 1313 { 1314 u64 avgl, avgr; 1315 1316 if (!l->nb_atoms) 1317 return -1; 1318 1319 if (!r->nb_atoms) 1320 return 1; 1321 1322 avgl = l->total_lat / l->nb_atoms; 1323 avgr = r->total_lat / r->nb_atoms; 1324 1325 if (avgl < avgr) 1326 return -1; 1327 if (avgl > avgr) 1328 return 1; 1329 1330 return 0; 1331 } 1332 1333 static int max_cmp(struct work_atoms *l, struct work_atoms *r) 1334 { 1335 if (l->max_lat < r->max_lat) 1336 return -1; 1337 if (l->max_lat > r->max_lat) 1338 return 1; 1339 1340 return 0; 1341 } 1342 1343 static int switch_cmp(struct work_atoms *l, struct work_atoms *r) 1344 { 1345 if (l->nb_atoms < r->nb_atoms) 1346 return -1; 1347 if (l->nb_atoms > r->nb_atoms) 1348 return 1; 1349 1350 return 0; 1351 } 1352 1353 static int runtime_cmp(struct work_atoms *l, struct work_atoms *r) 1354 { 1355 if (l->total_runtime < r->total_runtime) 1356 return -1; 1357 if (l->total_runtime > r->total_runtime) 1358 return 1; 1359 1360 return 0; 1361 } 1362 1363 static int sort_dimension__add(const char *tok, struct list_head *list) 1364 { 1365 size_t i; 1366 static struct sort_dimension avg_sort_dimension = { 1367 .name = "avg", 1368 .cmp = avg_cmp, 1369 }; 1370 static struct sort_dimension max_sort_dimension = { 1371 .name = "max", 1372 .cmp = max_cmp, 1373 }; 1374 static struct sort_dimension pid_sort_dimension = { 1375 .name = "pid", 1376 .cmp = pid_cmp, 1377 }; 1378 static struct sort_dimension runtime_sort_dimension = { 1379 .name = "runtime", 1380 .cmp = runtime_cmp, 1381 }; 1382 static struct sort_dimension switch_sort_dimension = { 1383 .name = "switch", 1384 .cmp = switch_cmp, 1385 }; 1386 struct sort_dimension *available_sorts[] = { 1387 &pid_sort_dimension, 1388 &avg_sort_dimension, 1389 &max_sort_dimension, 1390 &switch_sort_dimension, 1391 &runtime_sort_dimension, 1392 }; 1393 1394 for (i = 0; i < ARRAY_SIZE(available_sorts); i++) { 1395 if (!strcmp(available_sorts[i]->name, tok)) { 1396 list_add_tail(&available_sorts[i]->list, list); 1397 1398 return 0; 1399 } 1400 } 1401 1402 return -1; 1403 } 1404 1405 static void perf_sched__sort_lat(struct perf_sched *sched) 1406 { 1407 struct rb_node *node; 1408 struct rb_root *root = &sched->atom_root; 1409 again: 1410 for (;;) { 1411 struct work_atoms *data; 1412 node = rb_first(root); 1413 if (!node) 1414 break; 1415 1416 rb_erase(node, root); 1417 data = rb_entry(node, struct work_atoms, node); 1418 __thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list); 1419 } 1420 if (root == &sched->atom_root) { 1421 root = &sched->merged_atom_root; 1422 goto again; 1423 } 1424 } 1425 1426 static int process_sched_wakeup_event(struct perf_tool *tool, 1427 struct perf_evsel *evsel, 1428 struct perf_sample *sample, 1429 struct machine *machine) 1430 { 1431 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1432 1433 if (sched->tp_handler->wakeup_event) 1434 return sched->tp_handler->wakeup_event(sched, evsel, sample, machine); 1435 1436 return 0; 1437 } 1438 1439 union map_priv { 1440 void *ptr; 1441 bool color; 1442 }; 1443 1444 static bool thread__has_color(struct thread *thread) 1445 { 1446 union map_priv priv = { 1447 .ptr = thread__priv(thread), 1448 }; 1449 1450 return priv.color; 1451 } 1452 1453 static struct thread* 1454 map__findnew_thread(struct perf_sched *sched, struct machine *machine, pid_t pid, pid_t tid) 1455 { 1456 struct thread *thread = machine__findnew_thread(machine, pid, tid); 1457 union map_priv priv = { 1458 .color = false, 1459 }; 1460 1461 if (!sched->map.color_pids || !thread || thread__priv(thread)) 1462 return thread; 1463 1464 if (thread_map__has(sched->map.color_pids, tid)) 1465 priv.color = true; 1466 1467 thread__set_priv(thread, priv.ptr); 1468 return thread; 1469 } 1470 1471 static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel, 1472 struct perf_sample *sample, struct machine *machine) 1473 { 1474 const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid"); 1475 struct thread *sched_in; 1476 int new_shortname; 1477 u64 timestamp0, timestamp = sample->time; 1478 s64 delta; 1479 int i, this_cpu = sample->cpu; 1480 int cpus_nr; 1481 bool new_cpu = false; 1482 const char *color = PERF_COLOR_NORMAL; 1483 char stimestamp[32]; 1484 1485 BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0); 1486 1487 if (this_cpu > sched->max_cpu) 1488 sched->max_cpu = this_cpu; 1489 1490 if (sched->map.comp) { 1491 cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS); 1492 if (!test_and_set_bit(this_cpu, sched->map.comp_cpus_mask)) { 1493 sched->map.comp_cpus[cpus_nr++] = this_cpu; 1494 new_cpu = true; 1495 } 1496 } else 1497 cpus_nr = sched->max_cpu; 1498 1499 timestamp0 = sched->cpu_last_switched[this_cpu]; 1500 sched->cpu_last_switched[this_cpu] = timestamp; 1501 if (timestamp0) 1502 delta = timestamp - timestamp0; 1503 else 1504 delta = 0; 1505 1506 if (delta < 0) { 1507 pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1508 return -1; 1509 } 1510 1511 sched_in = map__findnew_thread(sched, machine, -1, next_pid); 1512 if (sched_in == NULL) 1513 return -1; 1514 1515 sched->curr_thread[this_cpu] = thread__get(sched_in); 1516 1517 printf(" "); 1518 1519 new_shortname = 0; 1520 if (!sched_in->shortname[0]) { 1521 if (!strcmp(thread__comm_str(sched_in), "swapper")) { 1522 /* 1523 * Don't allocate a letter-number for swapper:0 1524 * as a shortname. Instead, we use '.' for it. 1525 */ 1526 sched_in->shortname[0] = '.'; 1527 sched_in->shortname[1] = ' '; 1528 } else { 1529 sched_in->shortname[0] = sched->next_shortname1; 1530 sched_in->shortname[1] = sched->next_shortname2; 1531 1532 if (sched->next_shortname1 < 'Z') { 1533 sched->next_shortname1++; 1534 } else { 1535 sched->next_shortname1 = 'A'; 1536 if (sched->next_shortname2 < '9') 1537 sched->next_shortname2++; 1538 else 1539 sched->next_shortname2 = '0'; 1540 } 1541 } 1542 new_shortname = 1; 1543 } 1544 1545 for (i = 0; i < cpus_nr; i++) { 1546 int cpu = sched->map.comp ? sched->map.comp_cpus[i] : i; 1547 struct thread *curr_thread = sched->curr_thread[cpu]; 1548 const char *pid_color = color; 1549 const char *cpu_color = color; 1550 1551 if (curr_thread && thread__has_color(curr_thread)) 1552 pid_color = COLOR_PIDS; 1553 1554 if (sched->map.cpus && !cpu_map__has(sched->map.cpus, cpu)) 1555 continue; 1556 1557 if (sched->map.color_cpus && cpu_map__has(sched->map.color_cpus, cpu)) 1558 cpu_color = COLOR_CPUS; 1559 1560 if (cpu != this_cpu) 1561 color_fprintf(stdout, color, " "); 1562 else 1563 color_fprintf(stdout, cpu_color, "*"); 1564 1565 if (sched->curr_thread[cpu]) 1566 color_fprintf(stdout, pid_color, "%2s ", sched->curr_thread[cpu]->shortname); 1567 else 1568 color_fprintf(stdout, color, " "); 1569 } 1570 1571 if (sched->map.cpus && !cpu_map__has(sched->map.cpus, this_cpu)) 1572 goto out; 1573 1574 timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp)); 1575 color_fprintf(stdout, color, " %12s secs ", stimestamp); 1576 if (new_shortname || (verbose > 0 && sched_in->tid)) { 1577 const char *pid_color = color; 1578 1579 if (thread__has_color(sched_in)) 1580 pid_color = COLOR_PIDS; 1581 1582 color_fprintf(stdout, pid_color, "%s => %s:%d", 1583 sched_in->shortname, thread__comm_str(sched_in), sched_in->tid); 1584 } 1585 1586 if (sched->map.comp && new_cpu) 1587 color_fprintf(stdout, color, " (CPU %d)", this_cpu); 1588 1589 out: 1590 color_fprintf(stdout, color, "\n"); 1591 1592 thread__put(sched_in); 1593 1594 return 0; 1595 } 1596 1597 static int process_sched_switch_event(struct perf_tool *tool, 1598 struct perf_evsel *evsel, 1599 struct perf_sample *sample, 1600 struct machine *machine) 1601 { 1602 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1603 int this_cpu = sample->cpu, err = 0; 1604 u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"), 1605 next_pid = perf_evsel__intval(evsel, sample, "next_pid"); 1606 1607 if (sched->curr_pid[this_cpu] != (u32)-1) { 1608 /* 1609 * Are we trying to switch away a PID that is 1610 * not current? 1611 */ 1612 if (sched->curr_pid[this_cpu] != prev_pid) 1613 sched->nr_context_switch_bugs++; 1614 } 1615 1616 if (sched->tp_handler->switch_event) 1617 err = sched->tp_handler->switch_event(sched, evsel, sample, machine); 1618 1619 sched->curr_pid[this_cpu] = next_pid; 1620 return err; 1621 } 1622 1623 static int process_sched_runtime_event(struct perf_tool *tool, 1624 struct perf_evsel *evsel, 1625 struct perf_sample *sample, 1626 struct machine *machine) 1627 { 1628 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1629 1630 if (sched->tp_handler->runtime_event) 1631 return sched->tp_handler->runtime_event(sched, evsel, sample, machine); 1632 1633 return 0; 1634 } 1635 1636 static int perf_sched__process_fork_event(struct perf_tool *tool, 1637 union perf_event *event, 1638 struct perf_sample *sample, 1639 struct machine *machine) 1640 { 1641 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1642 1643 /* run the fork event through the perf machineruy */ 1644 perf_event__process_fork(tool, event, sample, machine); 1645 1646 /* and then run additional processing needed for this command */ 1647 if (sched->tp_handler->fork_event) 1648 return sched->tp_handler->fork_event(sched, event, machine); 1649 1650 return 0; 1651 } 1652 1653 static int process_sched_migrate_task_event(struct perf_tool *tool, 1654 struct perf_evsel *evsel, 1655 struct perf_sample *sample, 1656 struct machine *machine) 1657 { 1658 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1659 1660 if (sched->tp_handler->migrate_task_event) 1661 return sched->tp_handler->migrate_task_event(sched, evsel, sample, machine); 1662 1663 return 0; 1664 } 1665 1666 typedef int (*tracepoint_handler)(struct perf_tool *tool, 1667 struct perf_evsel *evsel, 1668 struct perf_sample *sample, 1669 struct machine *machine); 1670 1671 static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_unused, 1672 union perf_event *event __maybe_unused, 1673 struct perf_sample *sample, 1674 struct perf_evsel *evsel, 1675 struct machine *machine) 1676 { 1677 int err = 0; 1678 1679 if (evsel->handler != NULL) { 1680 tracepoint_handler f = evsel->handler; 1681 err = f(tool, evsel, sample, machine); 1682 } 1683 1684 return err; 1685 } 1686 1687 static int perf_sched__read_events(struct perf_sched *sched) 1688 { 1689 const struct perf_evsel_str_handler handlers[] = { 1690 { "sched:sched_switch", process_sched_switch_event, }, 1691 { "sched:sched_stat_runtime", process_sched_runtime_event, }, 1692 { "sched:sched_wakeup", process_sched_wakeup_event, }, 1693 { "sched:sched_wakeup_new", process_sched_wakeup_event, }, 1694 { "sched:sched_migrate_task", process_sched_migrate_task_event, }, 1695 }; 1696 struct perf_session *session; 1697 struct perf_data_file file = { 1698 .path = input_name, 1699 .mode = PERF_DATA_MODE_READ, 1700 .force = sched->force, 1701 }; 1702 int rc = -1; 1703 1704 session = perf_session__new(&file, false, &sched->tool); 1705 if (session == NULL) { 1706 pr_debug("No Memory for session\n"); 1707 return -1; 1708 } 1709 1710 symbol__init(&session->header.env); 1711 1712 if (perf_session__set_tracepoints_handlers(session, handlers)) 1713 goto out_delete; 1714 1715 if (perf_session__has_traces(session, "record -R")) { 1716 int err = perf_session__process_events(session); 1717 if (err) { 1718 pr_err("Failed to process events, error %d", err); 1719 goto out_delete; 1720 } 1721 1722 sched->nr_events = session->evlist->stats.nr_events[0]; 1723 sched->nr_lost_events = session->evlist->stats.total_lost; 1724 sched->nr_lost_chunks = session->evlist->stats.nr_events[PERF_RECORD_LOST]; 1725 } 1726 1727 rc = 0; 1728 out_delete: 1729 perf_session__delete(session); 1730 return rc; 1731 } 1732 1733 /* 1734 * scheduling times are printed as msec.usec 1735 */ 1736 static inline void print_sched_time(unsigned long long nsecs, int width) 1737 { 1738 unsigned long msecs; 1739 unsigned long usecs; 1740 1741 msecs = nsecs / NSEC_PER_MSEC; 1742 nsecs -= msecs * NSEC_PER_MSEC; 1743 usecs = nsecs / NSEC_PER_USEC; 1744 printf("%*lu.%03lu ", width, msecs, usecs); 1745 } 1746 1747 /* 1748 * returns runtime data for event, allocating memory for it the 1749 * first time it is used. 1750 */ 1751 static struct evsel_runtime *perf_evsel__get_runtime(struct perf_evsel *evsel) 1752 { 1753 struct evsel_runtime *r = evsel->priv; 1754 1755 if (r == NULL) { 1756 r = zalloc(sizeof(struct evsel_runtime)); 1757 evsel->priv = r; 1758 } 1759 1760 return r; 1761 } 1762 1763 /* 1764 * save last time event was seen per cpu 1765 */ 1766 static void perf_evsel__save_time(struct perf_evsel *evsel, 1767 u64 timestamp, u32 cpu) 1768 { 1769 struct evsel_runtime *r = perf_evsel__get_runtime(evsel); 1770 1771 if (r == NULL) 1772 return; 1773 1774 if ((cpu >= r->ncpu) || (r->last_time == NULL)) { 1775 int i, n = __roundup_pow_of_two(cpu+1); 1776 void *p = r->last_time; 1777 1778 p = realloc(r->last_time, n * sizeof(u64)); 1779 if (!p) 1780 return; 1781 1782 r->last_time = p; 1783 for (i = r->ncpu; i < n; ++i) 1784 r->last_time[i] = (u64) 0; 1785 1786 r->ncpu = n; 1787 } 1788 1789 r->last_time[cpu] = timestamp; 1790 } 1791 1792 /* returns last time this event was seen on the given cpu */ 1793 static u64 perf_evsel__get_time(struct perf_evsel *evsel, u32 cpu) 1794 { 1795 struct evsel_runtime *r = perf_evsel__get_runtime(evsel); 1796 1797 if ((r == NULL) || (r->last_time == NULL) || (cpu >= r->ncpu)) 1798 return 0; 1799 1800 return r->last_time[cpu]; 1801 } 1802 1803 static int comm_width = 30; 1804 1805 static char *timehist_get_commstr(struct thread *thread) 1806 { 1807 static char str[32]; 1808 const char *comm = thread__comm_str(thread); 1809 pid_t tid = thread->tid; 1810 pid_t pid = thread->pid_; 1811 int n; 1812 1813 if (pid == 0) 1814 n = scnprintf(str, sizeof(str), "%s", comm); 1815 1816 else if (tid != pid) 1817 n = scnprintf(str, sizeof(str), "%s[%d/%d]", comm, tid, pid); 1818 1819 else 1820 n = scnprintf(str, sizeof(str), "%s[%d]", comm, tid); 1821 1822 if (n > comm_width) 1823 comm_width = n; 1824 1825 return str; 1826 } 1827 1828 static void timehist_header(struct perf_sched *sched) 1829 { 1830 u32 ncpus = sched->max_cpu + 1; 1831 u32 i, j; 1832 1833 printf("%15s %6s ", "time", "cpu"); 1834 1835 if (sched->show_cpu_visual) { 1836 printf(" "); 1837 for (i = 0, j = 0; i < ncpus; ++i) { 1838 printf("%x", j++); 1839 if (j > 15) 1840 j = 0; 1841 } 1842 printf(" "); 1843 } 1844 1845 printf(" %-*s %9s %9s %9s", comm_width, 1846 "task name", "wait time", "sch delay", "run time"); 1847 1848 if (sched->show_state) 1849 printf(" %s", "state"); 1850 1851 printf("\n"); 1852 1853 /* 1854 * units row 1855 */ 1856 printf("%15s %-6s ", "", ""); 1857 1858 if (sched->show_cpu_visual) 1859 printf(" %*s ", ncpus, ""); 1860 1861 printf(" %-*s %9s %9s %9s", comm_width, 1862 "[tid/pid]", "(msec)", "(msec)", "(msec)"); 1863 1864 if (sched->show_state) 1865 printf(" %5s", ""); 1866 1867 printf("\n"); 1868 1869 /* 1870 * separator 1871 */ 1872 printf("%.15s %.6s ", graph_dotted_line, graph_dotted_line); 1873 1874 if (sched->show_cpu_visual) 1875 printf(" %.*s ", ncpus, graph_dotted_line); 1876 1877 printf(" %.*s %.9s %.9s %.9s", comm_width, 1878 graph_dotted_line, graph_dotted_line, graph_dotted_line, 1879 graph_dotted_line); 1880 1881 if (sched->show_state) 1882 printf(" %.5s", graph_dotted_line); 1883 1884 printf("\n"); 1885 } 1886 1887 static char task_state_char(struct thread *thread, int state) 1888 { 1889 static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; 1890 unsigned bit = state ? ffs(state) : 0; 1891 1892 /* 'I' for idle */ 1893 if (thread->tid == 0) 1894 return 'I'; 1895 1896 return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?'; 1897 } 1898 1899 static void timehist_print_sample(struct perf_sched *sched, 1900 struct perf_sample *sample, 1901 struct addr_location *al, 1902 struct thread *thread, 1903 u64 t, int state) 1904 { 1905 struct thread_runtime *tr = thread__priv(thread); 1906 u32 max_cpus = sched->max_cpu + 1; 1907 char tstr[64]; 1908 u64 wait_time; 1909 1910 timestamp__scnprintf_usec(t, tstr, sizeof(tstr)); 1911 printf("%15s [%04d] ", tstr, sample->cpu); 1912 1913 if (sched->show_cpu_visual) { 1914 u32 i; 1915 char c; 1916 1917 printf(" "); 1918 for (i = 0; i < max_cpus; ++i) { 1919 /* flag idle times with 'i'; others are sched events */ 1920 if (i == sample->cpu) 1921 c = (thread->tid == 0) ? 'i' : 's'; 1922 else 1923 c = ' '; 1924 printf("%c", c); 1925 } 1926 printf(" "); 1927 } 1928 1929 printf(" %-*s ", comm_width, timehist_get_commstr(thread)); 1930 1931 wait_time = tr->dt_sleep + tr->dt_iowait + tr->dt_preempt; 1932 print_sched_time(wait_time, 6); 1933 1934 print_sched_time(tr->dt_delay, 6); 1935 print_sched_time(tr->dt_run, 6); 1936 1937 if (sched->show_state) 1938 printf(" %5c ", task_state_char(thread, state)); 1939 1940 if (sched->show_wakeups) 1941 printf(" %-*s", comm_width, ""); 1942 1943 if (thread->tid == 0) 1944 goto out; 1945 1946 if (sched->show_callchain) 1947 printf(" "); 1948 1949 sample__fprintf_sym(sample, al, 0, 1950 EVSEL__PRINT_SYM | EVSEL__PRINT_ONELINE | 1951 EVSEL__PRINT_CALLCHAIN_ARROW | 1952 EVSEL__PRINT_SKIP_IGNORED, 1953 &callchain_cursor, stdout); 1954 1955 out: 1956 printf("\n"); 1957 } 1958 1959 /* 1960 * Explanation of delta-time stats: 1961 * 1962 * t = time of current schedule out event 1963 * tprev = time of previous sched out event 1964 * also time of schedule-in event for current task 1965 * last_time = time of last sched change event for current task 1966 * (i.e, time process was last scheduled out) 1967 * ready_to_run = time of wakeup for current task 1968 * 1969 * -----|------------|------------|------------|------ 1970 * last ready tprev t 1971 * time to run 1972 * 1973 * |-------- dt_wait --------| 1974 * |- dt_delay -|-- dt_run --| 1975 * 1976 * dt_run = run time of current task 1977 * dt_wait = time between last schedule out event for task and tprev 1978 * represents time spent off the cpu 1979 * dt_delay = time between wakeup and schedule-in of task 1980 */ 1981 1982 static void timehist_update_runtime_stats(struct thread_runtime *r, 1983 u64 t, u64 tprev) 1984 { 1985 r->dt_delay = 0; 1986 r->dt_sleep = 0; 1987 r->dt_iowait = 0; 1988 r->dt_preempt = 0; 1989 r->dt_run = 0; 1990 1991 if (tprev) { 1992 r->dt_run = t - tprev; 1993 if (r->ready_to_run) { 1994 if (r->ready_to_run > tprev) 1995 pr_debug("time travel: wakeup time for task > previous sched_switch event\n"); 1996 else 1997 r->dt_delay = tprev - r->ready_to_run; 1998 } 1999 2000 if (r->last_time > tprev) 2001 pr_debug("time travel: last sched out time for task > previous sched_switch event\n"); 2002 else if (r->last_time) { 2003 u64 dt_wait = tprev - r->last_time; 2004 2005 if (r->last_state == TASK_RUNNING) 2006 r->dt_preempt = dt_wait; 2007 else if (r->last_state == TASK_UNINTERRUPTIBLE) 2008 r->dt_iowait = dt_wait; 2009 else 2010 r->dt_sleep = dt_wait; 2011 } 2012 } 2013 2014 update_stats(&r->run_stats, r->dt_run); 2015 2016 r->total_run_time += r->dt_run; 2017 r->total_delay_time += r->dt_delay; 2018 r->total_sleep_time += r->dt_sleep; 2019 r->total_iowait_time += r->dt_iowait; 2020 r->total_preempt_time += r->dt_preempt; 2021 } 2022 2023 static bool is_idle_sample(struct perf_sample *sample, 2024 struct perf_evsel *evsel) 2025 { 2026 /* pid 0 == swapper == idle task */ 2027 if (strcmp(perf_evsel__name(evsel), "sched:sched_switch") == 0) 2028 return perf_evsel__intval(evsel, sample, "prev_pid") == 0; 2029 2030 return sample->pid == 0; 2031 } 2032 2033 static void save_task_callchain(struct perf_sched *sched, 2034 struct perf_sample *sample, 2035 struct perf_evsel *evsel, 2036 struct machine *machine) 2037 { 2038 struct callchain_cursor *cursor = &callchain_cursor; 2039 struct thread *thread; 2040 2041 /* want main thread for process - has maps */ 2042 thread = machine__findnew_thread(machine, sample->pid, sample->pid); 2043 if (thread == NULL) { 2044 pr_debug("Failed to get thread for pid %d.\n", sample->pid); 2045 return; 2046 } 2047 2048 if (!symbol_conf.use_callchain || sample->callchain == NULL) 2049 return; 2050 2051 if (thread__resolve_callchain(thread, cursor, evsel, sample, 2052 NULL, NULL, sched->max_stack + 2) != 0) { 2053 if (verbose > 0) 2054 error("Failed to resolve callchain. Skipping\n"); 2055 2056 return; 2057 } 2058 2059 callchain_cursor_commit(cursor); 2060 2061 while (true) { 2062 struct callchain_cursor_node *node; 2063 struct symbol *sym; 2064 2065 node = callchain_cursor_current(cursor); 2066 if (node == NULL) 2067 break; 2068 2069 sym = node->sym; 2070 if (sym) { 2071 if (!strcmp(sym->name, "schedule") || 2072 !strcmp(sym->name, "__schedule") || 2073 !strcmp(sym->name, "preempt_schedule")) 2074 sym->ignore = 1; 2075 } 2076 2077 callchain_cursor_advance(cursor); 2078 } 2079 } 2080 2081 static int init_idle_thread(struct thread *thread) 2082 { 2083 struct idle_thread_runtime *itr; 2084 2085 thread__set_comm(thread, idle_comm, 0); 2086 2087 itr = zalloc(sizeof(*itr)); 2088 if (itr == NULL) 2089 return -ENOMEM; 2090 2091 init_stats(&itr->tr.run_stats); 2092 callchain_init(&itr->callchain); 2093 callchain_cursor_reset(&itr->cursor); 2094 thread__set_priv(thread, itr); 2095 2096 return 0; 2097 } 2098 2099 /* 2100 * Track idle stats per cpu by maintaining a local thread 2101 * struct for the idle task on each cpu. 2102 */ 2103 static int init_idle_threads(int ncpu) 2104 { 2105 int i, ret; 2106 2107 idle_threads = zalloc(ncpu * sizeof(struct thread *)); 2108 if (!idle_threads) 2109 return -ENOMEM; 2110 2111 idle_max_cpu = ncpu; 2112 2113 /* allocate the actual thread struct if needed */ 2114 for (i = 0; i < ncpu; ++i) { 2115 idle_threads[i] = thread__new(0, 0); 2116 if (idle_threads[i] == NULL) 2117 return -ENOMEM; 2118 2119 ret = init_idle_thread(idle_threads[i]); 2120 if (ret < 0) 2121 return ret; 2122 } 2123 2124 return 0; 2125 } 2126 2127 static void free_idle_threads(void) 2128 { 2129 int i; 2130 2131 if (idle_threads == NULL) 2132 return; 2133 2134 for (i = 0; i < idle_max_cpu; ++i) { 2135 if ((idle_threads[i])) 2136 thread__delete(idle_threads[i]); 2137 } 2138 2139 free(idle_threads); 2140 } 2141 2142 static struct thread *get_idle_thread(int cpu) 2143 { 2144 /* 2145 * expand/allocate array of pointers to local thread 2146 * structs if needed 2147 */ 2148 if ((cpu >= idle_max_cpu) || (idle_threads == NULL)) { 2149 int i, j = __roundup_pow_of_two(cpu+1); 2150 void *p; 2151 2152 p = realloc(idle_threads, j * sizeof(struct thread *)); 2153 if (!p) 2154 return NULL; 2155 2156 idle_threads = (struct thread **) p; 2157 for (i = idle_max_cpu; i < j; ++i) 2158 idle_threads[i] = NULL; 2159 2160 idle_max_cpu = j; 2161 } 2162 2163 /* allocate a new thread struct if needed */ 2164 if (idle_threads[cpu] == NULL) { 2165 idle_threads[cpu] = thread__new(0, 0); 2166 2167 if (idle_threads[cpu]) { 2168 if (init_idle_thread(idle_threads[cpu]) < 0) 2169 return NULL; 2170 } 2171 } 2172 2173 return idle_threads[cpu]; 2174 } 2175 2176 static void save_idle_callchain(struct idle_thread_runtime *itr, 2177 struct perf_sample *sample) 2178 { 2179 if (!symbol_conf.use_callchain || sample->callchain == NULL) 2180 return; 2181 2182 callchain_cursor__copy(&itr->cursor, &callchain_cursor); 2183 } 2184 2185 /* 2186 * handle runtime stats saved per thread 2187 */ 2188 static struct thread_runtime *thread__init_runtime(struct thread *thread) 2189 { 2190 struct thread_runtime *r; 2191 2192 r = zalloc(sizeof(struct thread_runtime)); 2193 if (!r) 2194 return NULL; 2195 2196 init_stats(&r->run_stats); 2197 thread__set_priv(thread, r); 2198 2199 return r; 2200 } 2201 2202 static struct thread_runtime *thread__get_runtime(struct thread *thread) 2203 { 2204 struct thread_runtime *tr; 2205 2206 tr = thread__priv(thread); 2207 if (tr == NULL) { 2208 tr = thread__init_runtime(thread); 2209 if (tr == NULL) 2210 pr_debug("Failed to malloc memory for runtime data.\n"); 2211 } 2212 2213 return tr; 2214 } 2215 2216 static struct thread *timehist_get_thread(struct perf_sched *sched, 2217 struct perf_sample *sample, 2218 struct machine *machine, 2219 struct perf_evsel *evsel) 2220 { 2221 struct thread *thread; 2222 2223 if (is_idle_sample(sample, evsel)) { 2224 thread = get_idle_thread(sample->cpu); 2225 if (thread == NULL) 2226 pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu); 2227 2228 } else { 2229 /* there were samples with tid 0 but non-zero pid */ 2230 thread = machine__findnew_thread(machine, sample->pid, 2231 sample->tid ?: sample->pid); 2232 if (thread == NULL) { 2233 pr_debug("Failed to get thread for tid %d. skipping sample.\n", 2234 sample->tid); 2235 } 2236 2237 save_task_callchain(sched, sample, evsel, machine); 2238 if (sched->idle_hist) { 2239 struct thread *idle; 2240 struct idle_thread_runtime *itr; 2241 2242 idle = get_idle_thread(sample->cpu); 2243 if (idle == NULL) { 2244 pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu); 2245 return NULL; 2246 } 2247 2248 itr = thread__priv(idle); 2249 if (itr == NULL) 2250 return NULL; 2251 2252 itr->last_thread = thread; 2253 2254 /* copy task callchain when entering to idle */ 2255 if (perf_evsel__intval(evsel, sample, "next_pid") == 0) 2256 save_idle_callchain(itr, sample); 2257 } 2258 } 2259 2260 return thread; 2261 } 2262 2263 static bool timehist_skip_sample(struct perf_sched *sched, 2264 struct thread *thread, 2265 struct perf_evsel *evsel, 2266 struct perf_sample *sample) 2267 { 2268 bool rc = false; 2269 2270 if (thread__is_filtered(thread)) { 2271 rc = true; 2272 sched->skipped_samples++; 2273 } 2274 2275 if (sched->idle_hist) { 2276 if (strcmp(perf_evsel__name(evsel), "sched:sched_switch")) 2277 rc = true; 2278 else if (perf_evsel__intval(evsel, sample, "prev_pid") != 0 && 2279 perf_evsel__intval(evsel, sample, "next_pid") != 0) 2280 rc = true; 2281 } 2282 2283 return rc; 2284 } 2285 2286 static void timehist_print_wakeup_event(struct perf_sched *sched, 2287 struct perf_evsel *evsel, 2288 struct perf_sample *sample, 2289 struct machine *machine, 2290 struct thread *awakened) 2291 { 2292 struct thread *thread; 2293 char tstr[64]; 2294 2295 thread = machine__findnew_thread(machine, sample->pid, sample->tid); 2296 if (thread == NULL) 2297 return; 2298 2299 /* show wakeup unless both awakee and awaker are filtered */ 2300 if (timehist_skip_sample(sched, thread, evsel, sample) && 2301 timehist_skip_sample(sched, awakened, evsel, sample)) { 2302 return; 2303 } 2304 2305 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); 2306 printf("%15s [%04d] ", tstr, sample->cpu); 2307 if (sched->show_cpu_visual) 2308 printf(" %*s ", sched->max_cpu + 1, ""); 2309 2310 printf(" %-*s ", comm_width, timehist_get_commstr(thread)); 2311 2312 /* dt spacer */ 2313 printf(" %9s %9s %9s ", "", "", ""); 2314 2315 printf("awakened: %s", timehist_get_commstr(awakened)); 2316 2317 printf("\n"); 2318 } 2319 2320 static int timehist_sched_wakeup_event(struct perf_tool *tool, 2321 union perf_event *event __maybe_unused, 2322 struct perf_evsel *evsel, 2323 struct perf_sample *sample, 2324 struct machine *machine) 2325 { 2326 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 2327 struct thread *thread; 2328 struct thread_runtime *tr = NULL; 2329 /* want pid of awakened task not pid in sample */ 2330 const u32 pid = perf_evsel__intval(evsel, sample, "pid"); 2331 2332 thread = machine__findnew_thread(machine, 0, pid); 2333 if (thread == NULL) 2334 return -1; 2335 2336 tr = thread__get_runtime(thread); 2337 if (tr == NULL) 2338 return -1; 2339 2340 if (tr->ready_to_run == 0) 2341 tr->ready_to_run = sample->time; 2342 2343 /* show wakeups if requested */ 2344 if (sched->show_wakeups && 2345 !perf_time__skip_sample(&sched->ptime, sample->time)) 2346 timehist_print_wakeup_event(sched, evsel, sample, machine, thread); 2347 2348 return 0; 2349 } 2350 2351 static void timehist_print_migration_event(struct perf_sched *sched, 2352 struct perf_evsel *evsel, 2353 struct perf_sample *sample, 2354 struct machine *machine, 2355 struct thread *migrated) 2356 { 2357 struct thread *thread; 2358 char tstr[64]; 2359 u32 max_cpus = sched->max_cpu + 1; 2360 u32 ocpu, dcpu; 2361 2362 if (sched->summary_only) 2363 return; 2364 2365 max_cpus = sched->max_cpu + 1; 2366 ocpu = perf_evsel__intval(evsel, sample, "orig_cpu"); 2367 dcpu = perf_evsel__intval(evsel, sample, "dest_cpu"); 2368 2369 thread = machine__findnew_thread(machine, sample->pid, sample->tid); 2370 if (thread == NULL) 2371 return; 2372 2373 if (timehist_skip_sample(sched, thread, evsel, sample) && 2374 timehist_skip_sample(sched, migrated, evsel, sample)) { 2375 return; 2376 } 2377 2378 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); 2379 printf("%15s [%04d] ", tstr, sample->cpu); 2380 2381 if (sched->show_cpu_visual) { 2382 u32 i; 2383 char c; 2384 2385 printf(" "); 2386 for (i = 0; i < max_cpus; ++i) { 2387 c = (i == sample->cpu) ? 'm' : ' '; 2388 printf("%c", c); 2389 } 2390 printf(" "); 2391 } 2392 2393 printf(" %-*s ", comm_width, timehist_get_commstr(thread)); 2394 2395 /* dt spacer */ 2396 printf(" %9s %9s %9s ", "", "", ""); 2397 2398 printf("migrated: %s", timehist_get_commstr(migrated)); 2399 printf(" cpu %d => %d", ocpu, dcpu); 2400 2401 printf("\n"); 2402 } 2403 2404 static int timehist_migrate_task_event(struct perf_tool *tool, 2405 union perf_event *event __maybe_unused, 2406 struct perf_evsel *evsel, 2407 struct perf_sample *sample, 2408 struct machine *machine) 2409 { 2410 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 2411 struct thread *thread; 2412 struct thread_runtime *tr = NULL; 2413 /* want pid of migrated task not pid in sample */ 2414 const u32 pid = perf_evsel__intval(evsel, sample, "pid"); 2415 2416 thread = machine__findnew_thread(machine, 0, pid); 2417 if (thread == NULL) 2418 return -1; 2419 2420 tr = thread__get_runtime(thread); 2421 if (tr == NULL) 2422 return -1; 2423 2424 tr->migrations++; 2425 2426 /* show migrations if requested */ 2427 timehist_print_migration_event(sched, evsel, sample, machine, thread); 2428 2429 return 0; 2430 } 2431 2432 static int timehist_sched_change_event(struct perf_tool *tool, 2433 union perf_event *event, 2434 struct perf_evsel *evsel, 2435 struct perf_sample *sample, 2436 struct machine *machine) 2437 { 2438 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 2439 struct perf_time_interval *ptime = &sched->ptime; 2440 struct addr_location al; 2441 struct thread *thread; 2442 struct thread_runtime *tr = NULL; 2443 u64 tprev, t = sample->time; 2444 int rc = 0; 2445 int state = perf_evsel__intval(evsel, sample, "prev_state"); 2446 2447 2448 if (machine__resolve(machine, &al, sample) < 0) { 2449 pr_err("problem processing %d event. skipping it\n", 2450 event->header.type); 2451 rc = -1; 2452 goto out; 2453 } 2454 2455 thread = timehist_get_thread(sched, sample, machine, evsel); 2456 if (thread == NULL) { 2457 rc = -1; 2458 goto out; 2459 } 2460 2461 if (timehist_skip_sample(sched, thread, evsel, sample)) 2462 goto out; 2463 2464 tr = thread__get_runtime(thread); 2465 if (tr == NULL) { 2466 rc = -1; 2467 goto out; 2468 } 2469 2470 tprev = perf_evsel__get_time(evsel, sample->cpu); 2471 2472 /* 2473 * If start time given: 2474 * - sample time is under window user cares about - skip sample 2475 * - tprev is under window user cares about - reset to start of window 2476 */ 2477 if (ptime->start && ptime->start > t) 2478 goto out; 2479 2480 if (tprev && ptime->start > tprev) 2481 tprev = ptime->start; 2482 2483 /* 2484 * If end time given: 2485 * - previous sched event is out of window - we are done 2486 * - sample time is beyond window user cares about - reset it 2487 * to close out stats for time window interest 2488 */ 2489 if (ptime->end) { 2490 if (tprev > ptime->end) 2491 goto out; 2492 2493 if (t > ptime->end) 2494 t = ptime->end; 2495 } 2496 2497 if (!sched->idle_hist || thread->tid == 0) { 2498 timehist_update_runtime_stats(tr, t, tprev); 2499 2500 if (sched->idle_hist) { 2501 struct idle_thread_runtime *itr = (void *)tr; 2502 struct thread_runtime *last_tr; 2503 2504 BUG_ON(thread->tid != 0); 2505 2506 if (itr->last_thread == NULL) 2507 goto out; 2508 2509 /* add current idle time as last thread's runtime */ 2510 last_tr = thread__get_runtime(itr->last_thread); 2511 if (last_tr == NULL) 2512 goto out; 2513 2514 timehist_update_runtime_stats(last_tr, t, tprev); 2515 /* 2516 * remove delta time of last thread as it's not updated 2517 * and otherwise it will show an invalid value next 2518 * time. we only care total run time and run stat. 2519 */ 2520 last_tr->dt_run = 0; 2521 last_tr->dt_delay = 0; 2522 last_tr->dt_sleep = 0; 2523 last_tr->dt_iowait = 0; 2524 last_tr->dt_preempt = 0; 2525 2526 if (itr->cursor.nr) 2527 callchain_append(&itr->callchain, &itr->cursor, t - tprev); 2528 2529 itr->last_thread = NULL; 2530 } 2531 } 2532 2533 if (!sched->summary_only) 2534 timehist_print_sample(sched, sample, &al, thread, t, state); 2535 2536 out: 2537 if (sched->hist_time.start == 0 && t >= ptime->start) 2538 sched->hist_time.start = t; 2539 if (ptime->end == 0 || t <= ptime->end) 2540 sched->hist_time.end = t; 2541 2542 if (tr) { 2543 /* time of this sched_switch event becomes last time task seen */ 2544 tr->last_time = sample->time; 2545 2546 /* last state is used to determine where to account wait time */ 2547 tr->last_state = state; 2548 2549 /* sched out event for task so reset ready to run time */ 2550 tr->ready_to_run = 0; 2551 } 2552 2553 perf_evsel__save_time(evsel, sample->time, sample->cpu); 2554 2555 return rc; 2556 } 2557 2558 static int timehist_sched_switch_event(struct perf_tool *tool, 2559 union perf_event *event, 2560 struct perf_evsel *evsel, 2561 struct perf_sample *sample, 2562 struct machine *machine __maybe_unused) 2563 { 2564 return timehist_sched_change_event(tool, event, evsel, sample, machine); 2565 } 2566 2567 static int process_lost(struct perf_tool *tool __maybe_unused, 2568 union perf_event *event, 2569 struct perf_sample *sample, 2570 struct machine *machine __maybe_unused) 2571 { 2572 char tstr[64]; 2573 2574 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); 2575 printf("%15s ", tstr); 2576 printf("lost %" PRIu64 " events on cpu %d\n", event->lost.lost, sample->cpu); 2577 2578 return 0; 2579 } 2580 2581 2582 static void print_thread_runtime(struct thread *t, 2583 struct thread_runtime *r) 2584 { 2585 double mean = avg_stats(&r->run_stats); 2586 float stddev; 2587 2588 printf("%*s %5d %9" PRIu64 " ", 2589 comm_width, timehist_get_commstr(t), t->ppid, 2590 (u64) r->run_stats.n); 2591 2592 print_sched_time(r->total_run_time, 8); 2593 stddev = rel_stddev_stats(stddev_stats(&r->run_stats), mean); 2594 print_sched_time(r->run_stats.min, 6); 2595 printf(" "); 2596 print_sched_time((u64) mean, 6); 2597 printf(" "); 2598 print_sched_time(r->run_stats.max, 6); 2599 printf(" "); 2600 printf("%5.2f", stddev); 2601 printf(" %5" PRIu64, r->migrations); 2602 printf("\n"); 2603 } 2604 2605 static void print_thread_waittime(struct thread *t, 2606 struct thread_runtime *r) 2607 { 2608 printf("%*s %5d %9" PRIu64 " ", 2609 comm_width, timehist_get_commstr(t), t->ppid, 2610 (u64) r->run_stats.n); 2611 2612 print_sched_time(r->total_run_time, 8); 2613 print_sched_time(r->total_sleep_time, 6); 2614 printf(" "); 2615 print_sched_time(r->total_iowait_time, 6); 2616 printf(" "); 2617 print_sched_time(r->total_preempt_time, 6); 2618 printf(" "); 2619 print_sched_time(r->total_delay_time, 6); 2620 printf("\n"); 2621 } 2622 2623 struct total_run_stats { 2624 struct perf_sched *sched; 2625 u64 sched_count; 2626 u64 task_count; 2627 u64 total_run_time; 2628 }; 2629 2630 static int __show_thread_runtime(struct thread *t, void *priv) 2631 { 2632 struct total_run_stats *stats = priv; 2633 struct thread_runtime *r; 2634 2635 if (thread__is_filtered(t)) 2636 return 0; 2637 2638 r = thread__priv(t); 2639 if (r && r->run_stats.n) { 2640 stats->task_count++; 2641 stats->sched_count += r->run_stats.n; 2642 stats->total_run_time += r->total_run_time; 2643 2644 if (stats->sched->show_state) 2645 print_thread_waittime(t, r); 2646 else 2647 print_thread_runtime(t, r); 2648 } 2649 2650 return 0; 2651 } 2652 2653 static int show_thread_runtime(struct thread *t, void *priv) 2654 { 2655 if (t->dead) 2656 return 0; 2657 2658 return __show_thread_runtime(t, priv); 2659 } 2660 2661 static int show_deadthread_runtime(struct thread *t, void *priv) 2662 { 2663 if (!t->dead) 2664 return 0; 2665 2666 return __show_thread_runtime(t, priv); 2667 } 2668 2669 static size_t callchain__fprintf_folded(FILE *fp, struct callchain_node *node) 2670 { 2671 const char *sep = " <- "; 2672 struct callchain_list *chain; 2673 size_t ret = 0; 2674 char bf[1024]; 2675 bool first; 2676 2677 if (node == NULL) 2678 return 0; 2679 2680 ret = callchain__fprintf_folded(fp, node->parent); 2681 first = (ret == 0); 2682 2683 list_for_each_entry(chain, &node->val, list) { 2684 if (chain->ip >= PERF_CONTEXT_MAX) 2685 continue; 2686 if (chain->ms.sym && chain->ms.sym->ignore) 2687 continue; 2688 ret += fprintf(fp, "%s%s", first ? "" : sep, 2689 callchain_list__sym_name(chain, bf, sizeof(bf), 2690 false)); 2691 first = false; 2692 } 2693 2694 return ret; 2695 } 2696 2697 static size_t timehist_print_idlehist_callchain(struct rb_root *root) 2698 { 2699 size_t ret = 0; 2700 FILE *fp = stdout; 2701 struct callchain_node *chain; 2702 struct rb_node *rb_node = rb_first(root); 2703 2704 printf(" %16s %8s %s\n", "Idle time (msec)", "Count", "Callchains"); 2705 printf(" %.16s %.8s %.50s\n", graph_dotted_line, graph_dotted_line, 2706 graph_dotted_line); 2707 2708 while (rb_node) { 2709 chain = rb_entry(rb_node, struct callchain_node, rb_node); 2710 rb_node = rb_next(rb_node); 2711 2712 ret += fprintf(fp, " "); 2713 print_sched_time(chain->hit, 12); 2714 ret += 16; /* print_sched_time returns 2nd arg + 4 */ 2715 ret += fprintf(fp, " %8d ", chain->count); 2716 ret += callchain__fprintf_folded(fp, chain); 2717 ret += fprintf(fp, "\n"); 2718 } 2719 2720 return ret; 2721 } 2722 2723 static void timehist_print_summary(struct perf_sched *sched, 2724 struct perf_session *session) 2725 { 2726 struct machine *m = &session->machines.host; 2727 struct total_run_stats totals; 2728 u64 task_count; 2729 struct thread *t; 2730 struct thread_runtime *r; 2731 int i; 2732 u64 hist_time = sched->hist_time.end - sched->hist_time.start; 2733 2734 memset(&totals, 0, sizeof(totals)); 2735 totals.sched = sched; 2736 2737 if (sched->idle_hist) { 2738 printf("\nIdle-time summary\n"); 2739 printf("%*s parent sched-out ", comm_width, "comm"); 2740 printf(" idle-time min-idle avg-idle max-idle stddev migrations\n"); 2741 } else if (sched->show_state) { 2742 printf("\nWait-time summary\n"); 2743 printf("%*s parent sched-in ", comm_width, "comm"); 2744 printf(" run-time sleep iowait preempt delay\n"); 2745 } else { 2746 printf("\nRuntime summary\n"); 2747 printf("%*s parent sched-in ", comm_width, "comm"); 2748 printf(" run-time min-run avg-run max-run stddev migrations\n"); 2749 } 2750 printf("%*s (count) ", comm_width, ""); 2751 printf(" (msec) (msec) (msec) (msec) %s\n", 2752 sched->show_state ? "(msec)" : "%"); 2753 printf("%.117s\n", graph_dotted_line); 2754 2755 machine__for_each_thread(m, show_thread_runtime, &totals); 2756 task_count = totals.task_count; 2757 if (!task_count) 2758 printf("<no still running tasks>\n"); 2759 2760 printf("\nTerminated tasks:\n"); 2761 machine__for_each_thread(m, show_deadthread_runtime, &totals); 2762 if (task_count == totals.task_count) 2763 printf("<no terminated tasks>\n"); 2764 2765 /* CPU idle stats not tracked when samples were skipped */ 2766 if (sched->skipped_samples && !sched->idle_hist) 2767 return; 2768 2769 printf("\nIdle stats:\n"); 2770 for (i = 0; i < idle_max_cpu; ++i) { 2771 t = idle_threads[i]; 2772 if (!t) 2773 continue; 2774 2775 r = thread__priv(t); 2776 if (r && r->run_stats.n) { 2777 totals.sched_count += r->run_stats.n; 2778 printf(" CPU %2d idle for ", i); 2779 print_sched_time(r->total_run_time, 6); 2780 printf(" msec (%6.2f%%)\n", 100.0 * r->total_run_time / hist_time); 2781 } else 2782 printf(" CPU %2d idle entire time window\n", i); 2783 } 2784 2785 if (sched->idle_hist && symbol_conf.use_callchain) { 2786 callchain_param.mode = CHAIN_FOLDED; 2787 callchain_param.value = CCVAL_PERIOD; 2788 2789 callchain_register_param(&callchain_param); 2790 2791 printf("\nIdle stats by callchain:\n"); 2792 for (i = 0; i < idle_max_cpu; ++i) { 2793 struct idle_thread_runtime *itr; 2794 2795 t = idle_threads[i]; 2796 if (!t) 2797 continue; 2798 2799 itr = thread__priv(t); 2800 if (itr == NULL) 2801 continue; 2802 2803 callchain_param.sort(&itr->sorted_root, &itr->callchain, 2804 0, &callchain_param); 2805 2806 printf(" CPU %2d:", i); 2807 print_sched_time(itr->tr.total_run_time, 6); 2808 printf(" msec\n"); 2809 timehist_print_idlehist_callchain(&itr->sorted_root); 2810 printf("\n"); 2811 } 2812 } 2813 2814 printf("\n" 2815 " Total number of unique tasks: %" PRIu64 "\n" 2816 "Total number of context switches: %" PRIu64 "\n", 2817 totals.task_count, totals.sched_count); 2818 2819 printf(" Total run time (msec): "); 2820 print_sched_time(totals.total_run_time, 2); 2821 printf("\n"); 2822 2823 printf(" Total scheduling time (msec): "); 2824 print_sched_time(hist_time, 2); 2825 printf(" (x %d)\n", sched->max_cpu); 2826 } 2827 2828 typedef int (*sched_handler)(struct perf_tool *tool, 2829 union perf_event *event, 2830 struct perf_evsel *evsel, 2831 struct perf_sample *sample, 2832 struct machine *machine); 2833 2834 static int perf_timehist__process_sample(struct perf_tool *tool, 2835 union perf_event *event, 2836 struct perf_sample *sample, 2837 struct perf_evsel *evsel, 2838 struct machine *machine) 2839 { 2840 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 2841 int err = 0; 2842 int this_cpu = sample->cpu; 2843 2844 if (this_cpu > sched->max_cpu) 2845 sched->max_cpu = this_cpu; 2846 2847 if (evsel->handler != NULL) { 2848 sched_handler f = evsel->handler; 2849 2850 err = f(tool, event, evsel, sample, machine); 2851 } 2852 2853 return err; 2854 } 2855 2856 static int timehist_check_attr(struct perf_sched *sched, 2857 struct perf_evlist *evlist) 2858 { 2859 struct perf_evsel *evsel; 2860 struct evsel_runtime *er; 2861 2862 list_for_each_entry(evsel, &evlist->entries, node) { 2863 er = perf_evsel__get_runtime(evsel); 2864 if (er == NULL) { 2865 pr_err("Failed to allocate memory for evsel runtime data\n"); 2866 return -1; 2867 } 2868 2869 if (sched->show_callchain && 2870 !(evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN)) { 2871 pr_info("Samples do not have callchains.\n"); 2872 sched->show_callchain = 0; 2873 symbol_conf.use_callchain = 0; 2874 } 2875 } 2876 2877 return 0; 2878 } 2879 2880 static int perf_sched__timehist(struct perf_sched *sched) 2881 { 2882 const struct perf_evsel_str_handler handlers[] = { 2883 { "sched:sched_switch", timehist_sched_switch_event, }, 2884 { "sched:sched_wakeup", timehist_sched_wakeup_event, }, 2885 { "sched:sched_wakeup_new", timehist_sched_wakeup_event, }, 2886 }; 2887 const struct perf_evsel_str_handler migrate_handlers[] = { 2888 { "sched:sched_migrate_task", timehist_migrate_task_event, }, 2889 }; 2890 struct perf_data_file file = { 2891 .path = input_name, 2892 .mode = PERF_DATA_MODE_READ, 2893 .force = sched->force, 2894 }; 2895 2896 struct perf_session *session; 2897 struct perf_evlist *evlist; 2898 int err = -1; 2899 2900 /* 2901 * event handlers for timehist option 2902 */ 2903 sched->tool.sample = perf_timehist__process_sample; 2904 sched->tool.mmap = perf_event__process_mmap; 2905 sched->tool.comm = perf_event__process_comm; 2906 sched->tool.exit = perf_event__process_exit; 2907 sched->tool.fork = perf_event__process_fork; 2908 sched->tool.lost = process_lost; 2909 sched->tool.attr = perf_event__process_attr; 2910 sched->tool.tracing_data = perf_event__process_tracing_data; 2911 sched->tool.build_id = perf_event__process_build_id; 2912 2913 sched->tool.ordered_events = true; 2914 sched->tool.ordering_requires_timestamps = true; 2915 2916 symbol_conf.use_callchain = sched->show_callchain; 2917 2918 session = perf_session__new(&file, false, &sched->tool); 2919 if (session == NULL) 2920 return -ENOMEM; 2921 2922 evlist = session->evlist; 2923 2924 symbol__init(&session->header.env); 2925 2926 if (perf_time__parse_str(&sched->ptime, sched->time_str) != 0) { 2927 pr_err("Invalid time string\n"); 2928 return -EINVAL; 2929 } 2930 2931 if (timehist_check_attr(sched, evlist) != 0) 2932 goto out; 2933 2934 setup_pager(); 2935 2936 /* setup per-evsel handlers */ 2937 if (perf_session__set_tracepoints_handlers(session, handlers)) 2938 goto out; 2939 2940 /* sched_switch event at a minimum needs to exist */ 2941 if (!perf_evlist__find_tracepoint_by_name(session->evlist, 2942 "sched:sched_switch")) { 2943 pr_err("No sched_switch events found. Have you run 'perf sched record'?\n"); 2944 goto out; 2945 } 2946 2947 if (sched->show_migrations && 2948 perf_session__set_tracepoints_handlers(session, migrate_handlers)) 2949 goto out; 2950 2951 /* pre-allocate struct for per-CPU idle stats */ 2952 sched->max_cpu = session->header.env.nr_cpus_online; 2953 if (sched->max_cpu == 0) 2954 sched->max_cpu = 4; 2955 if (init_idle_threads(sched->max_cpu)) 2956 goto out; 2957 2958 /* summary_only implies summary option, but don't overwrite summary if set */ 2959 if (sched->summary_only) 2960 sched->summary = sched->summary_only; 2961 2962 if (!sched->summary_only) 2963 timehist_header(sched); 2964 2965 err = perf_session__process_events(session); 2966 if (err) { 2967 pr_err("Failed to process events, error %d", err); 2968 goto out; 2969 } 2970 2971 sched->nr_events = evlist->stats.nr_events[0]; 2972 sched->nr_lost_events = evlist->stats.total_lost; 2973 sched->nr_lost_chunks = evlist->stats.nr_events[PERF_RECORD_LOST]; 2974 2975 if (sched->summary) 2976 timehist_print_summary(sched, session); 2977 2978 out: 2979 free_idle_threads(); 2980 perf_session__delete(session); 2981 2982 return err; 2983 } 2984 2985 2986 static void print_bad_events(struct perf_sched *sched) 2987 { 2988 if (sched->nr_unordered_timestamps && sched->nr_timestamps) { 2989 printf(" INFO: %.3f%% unordered timestamps (%ld out of %ld)\n", 2990 (double)sched->nr_unordered_timestamps/(double)sched->nr_timestamps*100.0, 2991 sched->nr_unordered_timestamps, sched->nr_timestamps); 2992 } 2993 if (sched->nr_lost_events && sched->nr_events) { 2994 printf(" INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n", 2995 (double)sched->nr_lost_events/(double)sched->nr_events * 100.0, 2996 sched->nr_lost_events, sched->nr_events, sched->nr_lost_chunks); 2997 } 2998 if (sched->nr_context_switch_bugs && sched->nr_timestamps) { 2999 printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)", 3000 (double)sched->nr_context_switch_bugs/(double)sched->nr_timestamps*100.0, 3001 sched->nr_context_switch_bugs, sched->nr_timestamps); 3002 if (sched->nr_lost_events) 3003 printf(" (due to lost events?)"); 3004 printf("\n"); 3005 } 3006 } 3007 3008 static void __merge_work_atoms(struct rb_root *root, struct work_atoms *data) 3009 { 3010 struct rb_node **new = &(root->rb_node), *parent = NULL; 3011 struct work_atoms *this; 3012 const char *comm = thread__comm_str(data->thread), *this_comm; 3013 3014 while (*new) { 3015 int cmp; 3016 3017 this = container_of(*new, struct work_atoms, node); 3018 parent = *new; 3019 3020 this_comm = thread__comm_str(this->thread); 3021 cmp = strcmp(comm, this_comm); 3022 if (cmp > 0) { 3023 new = &((*new)->rb_left); 3024 } else if (cmp < 0) { 3025 new = &((*new)->rb_right); 3026 } else { 3027 this->num_merged++; 3028 this->total_runtime += data->total_runtime; 3029 this->nb_atoms += data->nb_atoms; 3030 this->total_lat += data->total_lat; 3031 list_splice(&data->work_list, &this->work_list); 3032 if (this->max_lat < data->max_lat) { 3033 this->max_lat = data->max_lat; 3034 this->max_lat_at = data->max_lat_at; 3035 } 3036 zfree(&data); 3037 return; 3038 } 3039 } 3040 3041 data->num_merged++; 3042 rb_link_node(&data->node, parent, new); 3043 rb_insert_color(&data->node, root); 3044 } 3045 3046 static void perf_sched__merge_lat(struct perf_sched *sched) 3047 { 3048 struct work_atoms *data; 3049 struct rb_node *node; 3050 3051 if (sched->skip_merge) 3052 return; 3053 3054 while ((node = rb_first(&sched->atom_root))) { 3055 rb_erase(node, &sched->atom_root); 3056 data = rb_entry(node, struct work_atoms, node); 3057 __merge_work_atoms(&sched->merged_atom_root, data); 3058 } 3059 } 3060 3061 static int perf_sched__lat(struct perf_sched *sched) 3062 { 3063 struct rb_node *next; 3064 3065 setup_pager(); 3066 3067 if (perf_sched__read_events(sched)) 3068 return -1; 3069 3070 perf_sched__merge_lat(sched); 3071 perf_sched__sort_lat(sched); 3072 3073 printf("\n -----------------------------------------------------------------------------------------------------------------\n"); 3074 printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n"); 3075 printf(" -----------------------------------------------------------------------------------------------------------------\n"); 3076 3077 next = rb_first(&sched->sorted_atom_root); 3078 3079 while (next) { 3080 struct work_atoms *work_list; 3081 3082 work_list = rb_entry(next, struct work_atoms, node); 3083 output_lat_thread(sched, work_list); 3084 next = rb_next(next); 3085 thread__zput(work_list->thread); 3086 } 3087 3088 printf(" -----------------------------------------------------------------------------------------------------------------\n"); 3089 printf(" TOTAL: |%11.3f ms |%9" PRIu64 " |\n", 3090 (double)sched->all_runtime / NSEC_PER_MSEC, sched->all_count); 3091 3092 printf(" ---------------------------------------------------\n"); 3093 3094 print_bad_events(sched); 3095 printf("\n"); 3096 3097 return 0; 3098 } 3099 3100 static int setup_map_cpus(struct perf_sched *sched) 3101 { 3102 struct cpu_map *map; 3103 3104 sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF); 3105 3106 if (sched->map.comp) { 3107 sched->map.comp_cpus = zalloc(sched->max_cpu * sizeof(int)); 3108 if (!sched->map.comp_cpus) 3109 return -1; 3110 } 3111 3112 if (!sched->map.cpus_str) 3113 return 0; 3114 3115 map = cpu_map__new(sched->map.cpus_str); 3116 if (!map) { 3117 pr_err("failed to get cpus map from %s\n", sched->map.cpus_str); 3118 return -1; 3119 } 3120 3121 sched->map.cpus = map; 3122 return 0; 3123 } 3124 3125 static int setup_color_pids(struct perf_sched *sched) 3126 { 3127 struct thread_map *map; 3128 3129 if (!sched->map.color_pids_str) 3130 return 0; 3131 3132 map = thread_map__new_by_tid_str(sched->map.color_pids_str); 3133 if (!map) { 3134 pr_err("failed to get thread map from %s\n", sched->map.color_pids_str); 3135 return -1; 3136 } 3137 3138 sched->map.color_pids = map; 3139 return 0; 3140 } 3141 3142 static int setup_color_cpus(struct perf_sched *sched) 3143 { 3144 struct cpu_map *map; 3145 3146 if (!sched->map.color_cpus_str) 3147 return 0; 3148 3149 map = cpu_map__new(sched->map.color_cpus_str); 3150 if (!map) { 3151 pr_err("failed to get thread map from %s\n", sched->map.color_cpus_str); 3152 return -1; 3153 } 3154 3155 sched->map.color_cpus = map; 3156 return 0; 3157 } 3158 3159 static int perf_sched__map(struct perf_sched *sched) 3160 { 3161 if (setup_map_cpus(sched)) 3162 return -1; 3163 3164 if (setup_color_pids(sched)) 3165 return -1; 3166 3167 if (setup_color_cpus(sched)) 3168 return -1; 3169 3170 setup_pager(); 3171 if (perf_sched__read_events(sched)) 3172 return -1; 3173 print_bad_events(sched); 3174 return 0; 3175 } 3176 3177 static int perf_sched__replay(struct perf_sched *sched) 3178 { 3179 unsigned long i; 3180 3181 calibrate_run_measurement_overhead(sched); 3182 calibrate_sleep_measurement_overhead(sched); 3183 3184 test_calibrations(sched); 3185 3186 if (perf_sched__read_events(sched)) 3187 return -1; 3188 3189 printf("nr_run_events: %ld\n", sched->nr_run_events); 3190 printf("nr_sleep_events: %ld\n", sched->nr_sleep_events); 3191 printf("nr_wakeup_events: %ld\n", sched->nr_wakeup_events); 3192 3193 if (sched->targetless_wakeups) 3194 printf("target-less wakeups: %ld\n", sched->targetless_wakeups); 3195 if (sched->multitarget_wakeups) 3196 printf("multi-target wakeups: %ld\n", sched->multitarget_wakeups); 3197 if (sched->nr_run_events_optimized) 3198 printf("run atoms optimized: %ld\n", 3199 sched->nr_run_events_optimized); 3200 3201 print_task_traces(sched); 3202 add_cross_task_wakeups(sched); 3203 3204 create_tasks(sched); 3205 printf("------------------------------------------------------------\n"); 3206 for (i = 0; i < sched->replay_repeat; i++) 3207 run_one_test(sched); 3208 3209 return 0; 3210 } 3211 3212 static void setup_sorting(struct perf_sched *sched, const struct option *options, 3213 const char * const usage_msg[]) 3214 { 3215 char *tmp, *tok, *str = strdup(sched->sort_order); 3216 3217 for (tok = strtok_r(str, ", ", &tmp); 3218 tok; tok = strtok_r(NULL, ", ", &tmp)) { 3219 if (sort_dimension__add(tok, &sched->sort_list) < 0) { 3220 usage_with_options_msg(usage_msg, options, 3221 "Unknown --sort key: `%s'", tok); 3222 } 3223 } 3224 3225 free(str); 3226 3227 sort_dimension__add("pid", &sched->cmp_pid); 3228 } 3229 3230 static int __cmd_record(int argc, const char **argv) 3231 { 3232 unsigned int rec_argc, i, j; 3233 const char **rec_argv; 3234 const char * const record_args[] = { 3235 "record", 3236 "-a", 3237 "-R", 3238 "-m", "1024", 3239 "-c", "1", 3240 "-e", "sched:sched_switch", 3241 "-e", "sched:sched_stat_wait", 3242 "-e", "sched:sched_stat_sleep", 3243 "-e", "sched:sched_stat_iowait", 3244 "-e", "sched:sched_stat_runtime", 3245 "-e", "sched:sched_process_fork", 3246 "-e", "sched:sched_wakeup", 3247 "-e", "sched:sched_wakeup_new", 3248 "-e", "sched:sched_migrate_task", 3249 }; 3250 3251 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 3252 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 3253 3254 if (rec_argv == NULL) 3255 return -ENOMEM; 3256 3257 for (i = 0; i < ARRAY_SIZE(record_args); i++) 3258 rec_argv[i] = strdup(record_args[i]); 3259 3260 for (j = 1; j < (unsigned int)argc; j++, i++) 3261 rec_argv[i] = argv[j]; 3262 3263 BUG_ON(i != rec_argc); 3264 3265 return cmd_record(i, rec_argv, NULL); 3266 } 3267 3268 int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) 3269 { 3270 const char default_sort_order[] = "avg, max, switch, runtime"; 3271 struct perf_sched sched = { 3272 .tool = { 3273 .sample = perf_sched__process_tracepoint_sample, 3274 .comm = perf_event__process_comm, 3275 .lost = perf_event__process_lost, 3276 .fork = perf_sched__process_fork_event, 3277 .ordered_events = true, 3278 }, 3279 .cmp_pid = LIST_HEAD_INIT(sched.cmp_pid), 3280 .sort_list = LIST_HEAD_INIT(sched.sort_list), 3281 .start_work_mutex = PTHREAD_MUTEX_INITIALIZER, 3282 .work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER, 3283 .sort_order = default_sort_order, 3284 .replay_repeat = 10, 3285 .profile_cpu = -1, 3286 .next_shortname1 = 'A', 3287 .next_shortname2 = '0', 3288 .skip_merge = 0, 3289 .show_callchain = 1, 3290 .max_stack = 5, 3291 }; 3292 const struct option sched_options[] = { 3293 OPT_STRING('i', "input", &input_name, "file", 3294 "input file name"), 3295 OPT_INCR('v', "verbose", &verbose, 3296 "be more verbose (show symbol address, etc)"), 3297 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 3298 "dump raw trace in ASCII"), 3299 OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"), 3300 OPT_END() 3301 }; 3302 const struct option latency_options[] = { 3303 OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]", 3304 "sort by key(s): runtime, switch, avg, max"), 3305 OPT_INTEGER('C', "CPU", &sched.profile_cpu, 3306 "CPU to profile on"), 3307 OPT_BOOLEAN('p', "pids", &sched.skip_merge, 3308 "latency stats per pid instead of per comm"), 3309 OPT_PARENT(sched_options) 3310 }; 3311 const struct option replay_options[] = { 3312 OPT_UINTEGER('r', "repeat", &sched.replay_repeat, 3313 "repeat the workload replay N times (-1: infinite)"), 3314 OPT_PARENT(sched_options) 3315 }; 3316 const struct option map_options[] = { 3317 OPT_BOOLEAN(0, "compact", &sched.map.comp, 3318 "map output in compact mode"), 3319 OPT_STRING(0, "color-pids", &sched.map.color_pids_str, "pids", 3320 "highlight given pids in map"), 3321 OPT_STRING(0, "color-cpus", &sched.map.color_cpus_str, "cpus", 3322 "highlight given CPUs in map"), 3323 OPT_STRING(0, "cpus", &sched.map.cpus_str, "cpus", 3324 "display given CPUs in map"), 3325 OPT_PARENT(sched_options) 3326 }; 3327 const struct option timehist_options[] = { 3328 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 3329 "file", "vmlinux pathname"), 3330 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, 3331 "file", "kallsyms pathname"), 3332 OPT_BOOLEAN('g', "call-graph", &sched.show_callchain, 3333 "Display call chains if present (default on)"), 3334 OPT_UINTEGER(0, "max-stack", &sched.max_stack, 3335 "Maximum number of functions to display backtrace."), 3336 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", 3337 "Look for files with symbols relative to this directory"), 3338 OPT_BOOLEAN('s', "summary", &sched.summary_only, 3339 "Show only syscall summary with statistics"), 3340 OPT_BOOLEAN('S', "with-summary", &sched.summary, 3341 "Show all syscalls and summary with statistics"), 3342 OPT_BOOLEAN('w', "wakeups", &sched.show_wakeups, "Show wakeup events"), 3343 OPT_BOOLEAN('M', "migrations", &sched.show_migrations, "Show migration events"), 3344 OPT_BOOLEAN('V', "cpu-visual", &sched.show_cpu_visual, "Add CPU visual"), 3345 OPT_BOOLEAN('I', "idle-hist", &sched.idle_hist, "Show idle events only"), 3346 OPT_STRING(0, "time", &sched.time_str, "str", 3347 "Time span for analysis (start,stop)"), 3348 OPT_BOOLEAN(0, "state", &sched.show_state, "Show task state when sched-out"), 3349 OPT_PARENT(sched_options) 3350 }; 3351 3352 const char * const latency_usage[] = { 3353 "perf sched latency [<options>]", 3354 NULL 3355 }; 3356 const char * const replay_usage[] = { 3357 "perf sched replay [<options>]", 3358 NULL 3359 }; 3360 const char * const map_usage[] = { 3361 "perf sched map [<options>]", 3362 NULL 3363 }; 3364 const char * const timehist_usage[] = { 3365 "perf sched timehist [<options>]", 3366 NULL 3367 }; 3368 const char *const sched_subcommands[] = { "record", "latency", "map", 3369 "replay", "script", 3370 "timehist", NULL }; 3371 const char *sched_usage[] = { 3372 NULL, 3373 NULL 3374 }; 3375 struct trace_sched_handler lat_ops = { 3376 .wakeup_event = latency_wakeup_event, 3377 .switch_event = latency_switch_event, 3378 .runtime_event = latency_runtime_event, 3379 .migrate_task_event = latency_migrate_task_event, 3380 }; 3381 struct trace_sched_handler map_ops = { 3382 .switch_event = map_switch_event, 3383 }; 3384 struct trace_sched_handler replay_ops = { 3385 .wakeup_event = replay_wakeup_event, 3386 .switch_event = replay_switch_event, 3387 .fork_event = replay_fork_event, 3388 }; 3389 unsigned int i; 3390 3391 for (i = 0; i < ARRAY_SIZE(sched.curr_pid); i++) 3392 sched.curr_pid[i] = -1; 3393 3394 argc = parse_options_subcommand(argc, argv, sched_options, sched_subcommands, 3395 sched_usage, PARSE_OPT_STOP_AT_NON_OPTION); 3396 if (!argc) 3397 usage_with_options(sched_usage, sched_options); 3398 3399 /* 3400 * Aliased to 'perf script' for now: 3401 */ 3402 if (!strcmp(argv[0], "script")) 3403 return cmd_script(argc, argv, prefix); 3404 3405 if (!strncmp(argv[0], "rec", 3)) { 3406 return __cmd_record(argc, argv); 3407 } else if (!strncmp(argv[0], "lat", 3)) { 3408 sched.tp_handler = &lat_ops; 3409 if (argc > 1) { 3410 argc = parse_options(argc, argv, latency_options, latency_usage, 0); 3411 if (argc) 3412 usage_with_options(latency_usage, latency_options); 3413 } 3414 setup_sorting(&sched, latency_options, latency_usage); 3415 return perf_sched__lat(&sched); 3416 } else if (!strcmp(argv[0], "map")) { 3417 if (argc) { 3418 argc = parse_options(argc, argv, map_options, map_usage, 0); 3419 if (argc) 3420 usage_with_options(map_usage, map_options); 3421 } 3422 sched.tp_handler = &map_ops; 3423 setup_sorting(&sched, latency_options, latency_usage); 3424 return perf_sched__map(&sched); 3425 } else if (!strncmp(argv[0], "rep", 3)) { 3426 sched.tp_handler = &replay_ops; 3427 if (argc) { 3428 argc = parse_options(argc, argv, replay_options, replay_usage, 0); 3429 if (argc) 3430 usage_with_options(replay_usage, replay_options); 3431 } 3432 return perf_sched__replay(&sched); 3433 } else if (!strcmp(argv[0], "timehist")) { 3434 if (argc) { 3435 argc = parse_options(argc, argv, timehist_options, 3436 timehist_usage, 0); 3437 if (argc) 3438 usage_with_options(timehist_usage, timehist_options); 3439 } 3440 if (sched.show_wakeups && sched.summary_only) { 3441 pr_err(" Error: -s and -w are mutually exclusive.\n"); 3442 parse_options_usage(timehist_usage, timehist_options, "s", true); 3443 parse_options_usage(NULL, timehist_options, "w", true); 3444 return -EINVAL; 3445 } 3446 3447 return perf_sched__timehist(&sched); 3448 } else { 3449 usage_with_options(sched_usage, sched_options); 3450 } 3451 3452 return 0; 3453 } 3454