1 // SPDX-License-Identifier: GPL-2.0 2 #include "builtin.h" 3 #include "perf-sys.h" 4 5 #include "util/cpumap.h" 6 #include "util/evlist.h" 7 #include "util/evsel.h" 8 #include "util/evsel_fprintf.h" 9 #include "util/mutex.h" 10 #include "util/symbol.h" 11 #include "util/thread.h" 12 #include "util/header.h" 13 #include "util/session.h" 14 #include "util/tool.h" 15 #include "util/cloexec.h" 16 #include "util/thread_map.h" 17 #include "util/color.h" 18 #include "util/stat.h" 19 #include "util/string2.h" 20 #include "util/callchain.h" 21 #include "util/time-utils.h" 22 23 #include <subcmd/pager.h> 24 #include <subcmd/parse-options.h> 25 #include "util/trace-event.h" 26 27 #include "util/debug.h" 28 #include "util/event.h" 29 #include "util/util.h" 30 31 #include <linux/kernel.h> 32 #include <linux/log2.h> 33 #include <linux/zalloc.h> 34 #include <sys/prctl.h> 35 #include <sys/resource.h> 36 #include <inttypes.h> 37 38 #include <errno.h> 39 #include <semaphore.h> 40 #include <pthread.h> 41 #include <math.h> 42 #include <api/fs/fs.h> 43 #include <perf/cpumap.h> 44 #include <linux/time64.h> 45 #include <linux/err.h> 46 47 #include <linux/ctype.h> 48 49 #define PR_SET_NAME 15 /* Set process name */ 50 #define MAX_CPUS 4096 51 #define COMM_LEN 20 52 #define SYM_LEN 129 53 #define MAX_PID 1024000 54 55 static const char *cpu_list; 56 static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); 57 58 struct sched_atom; 59 60 struct task_desc { 61 unsigned long nr; 62 unsigned long pid; 63 char comm[COMM_LEN]; 64 65 unsigned long nr_events; 66 unsigned long curr_event; 67 struct sched_atom **atoms; 68 69 pthread_t thread; 70 sem_t sleep_sem; 71 72 sem_t ready_for_work; 73 sem_t work_done_sem; 74 75 u64 cpu_usage; 76 }; 77 78 enum sched_event_type { 79 SCHED_EVENT_RUN, 80 SCHED_EVENT_SLEEP, 81 SCHED_EVENT_WAKEUP, 82 SCHED_EVENT_MIGRATION, 83 }; 84 85 struct sched_atom { 86 enum sched_event_type type; 87 int specific_wait; 88 u64 timestamp; 89 u64 duration; 90 unsigned long nr; 91 sem_t *wait_sem; 92 struct task_desc *wakee; 93 }; 94 95 enum thread_state { 96 THREAD_SLEEPING = 0, 97 THREAD_WAIT_CPU, 98 THREAD_SCHED_IN, 99 THREAD_IGNORE 100 }; 101 102 struct work_atom { 103 struct list_head list; 104 enum thread_state state; 105 u64 sched_out_time; 106 u64 wake_up_time; 107 u64 sched_in_time; 108 u64 runtime; 109 }; 110 111 struct work_atoms { 112 struct list_head work_list; 113 struct thread *thread; 114 struct rb_node node; 115 u64 max_lat; 116 u64 max_lat_start; 117 u64 max_lat_end; 118 u64 total_lat; 119 u64 nb_atoms; 120 u64 total_runtime; 121 int num_merged; 122 }; 123 124 typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *); 125 126 struct perf_sched; 127 128 struct trace_sched_handler { 129 int (*switch_event)(struct perf_sched *sched, struct evsel *evsel, 130 struct perf_sample *sample, struct machine *machine); 131 132 int (*runtime_event)(struct perf_sched *sched, struct evsel *evsel, 133 struct perf_sample *sample, struct machine *machine); 134 135 int (*wakeup_event)(struct perf_sched *sched, struct evsel *evsel, 136 struct perf_sample *sample, struct machine *machine); 137 138 /* PERF_RECORD_FORK event, not sched_process_fork tracepoint */ 139 int (*fork_event)(struct perf_sched *sched, union perf_event *event, 140 struct machine *machine); 141 142 int (*migrate_task_event)(struct perf_sched *sched, 143 struct evsel *evsel, 144 struct perf_sample *sample, 145 struct machine *machine); 146 }; 147 148 #define COLOR_PIDS PERF_COLOR_BLUE 149 #define COLOR_CPUS PERF_COLOR_BG_RED 150 151 struct perf_sched_map { 152 DECLARE_BITMAP(comp_cpus_mask, MAX_CPUS); 153 struct perf_cpu *comp_cpus; 154 bool comp; 155 struct perf_thread_map *color_pids; 156 const char *color_pids_str; 157 struct perf_cpu_map *color_cpus; 158 const char *color_cpus_str; 159 struct perf_cpu_map *cpus; 160 const char *cpus_str; 161 }; 162 163 struct perf_sched { 164 struct perf_tool tool; 165 const char *sort_order; 166 unsigned long nr_tasks; 167 struct task_desc **pid_to_task; 168 struct task_desc **tasks; 169 const struct trace_sched_handler *tp_handler; 170 struct mutex start_work_mutex; 171 struct mutex work_done_wait_mutex; 172 int profile_cpu; 173 /* 174 * Track the current task - that way we can know whether there's any 175 * weird events, such as a task being switched away that is not current. 176 */ 177 struct perf_cpu max_cpu; 178 u32 *curr_pid; 179 struct thread **curr_thread; 180 char next_shortname1; 181 char next_shortname2; 182 unsigned int replay_repeat; 183 unsigned long nr_run_events; 184 unsigned long nr_sleep_events; 185 unsigned long nr_wakeup_events; 186 unsigned long nr_sleep_corrections; 187 unsigned long nr_run_events_optimized; 188 unsigned long targetless_wakeups; 189 unsigned long multitarget_wakeups; 190 unsigned long nr_runs; 191 unsigned long nr_timestamps; 192 unsigned long nr_unordered_timestamps; 193 unsigned long nr_context_switch_bugs; 194 unsigned long nr_events; 195 unsigned long nr_lost_chunks; 196 unsigned long nr_lost_events; 197 u64 run_measurement_overhead; 198 u64 sleep_measurement_overhead; 199 u64 start_time; 200 u64 cpu_usage; 201 u64 runavg_cpu_usage; 202 u64 parent_cpu_usage; 203 u64 runavg_parent_cpu_usage; 204 u64 sum_runtime; 205 u64 sum_fluct; 206 u64 run_avg; 207 u64 all_runtime; 208 u64 all_count; 209 u64 *cpu_last_switched; 210 struct rb_root_cached atom_root, sorted_atom_root, merged_atom_root; 211 struct list_head sort_list, cmp_pid; 212 bool force; 213 bool skip_merge; 214 struct perf_sched_map map; 215 216 /* options for timehist command */ 217 bool summary; 218 bool summary_only; 219 bool idle_hist; 220 bool show_callchain; 221 unsigned int max_stack; 222 bool show_cpu_visual; 223 bool show_wakeups; 224 bool show_next; 225 bool show_migrations; 226 bool show_state; 227 u64 skipped_samples; 228 const char *time_str; 229 struct perf_time_interval ptime; 230 struct perf_time_interval hist_time; 231 volatile bool thread_funcs_exit; 232 }; 233 234 /* per thread run time data */ 235 struct thread_runtime { 236 u64 last_time; /* time of previous sched in/out event */ 237 u64 dt_run; /* run time */ 238 u64 dt_sleep; /* time between CPU access by sleep (off cpu) */ 239 u64 dt_iowait; /* time between CPU access by iowait (off cpu) */ 240 u64 dt_preempt; /* time between CPU access by preempt (off cpu) */ 241 u64 dt_delay; /* time between wakeup and sched-in */ 242 u64 ready_to_run; /* time of wakeup */ 243 244 struct stats run_stats; 245 u64 total_run_time; 246 u64 total_sleep_time; 247 u64 total_iowait_time; 248 u64 total_preempt_time; 249 u64 total_delay_time; 250 251 char last_state; 252 253 char shortname[3]; 254 bool comm_changed; 255 256 u64 migrations; 257 }; 258 259 /* per event run time data */ 260 struct evsel_runtime { 261 u64 *last_time; /* time this event was last seen per cpu */ 262 u32 ncpu; /* highest cpu slot allocated */ 263 }; 264 265 /* per cpu idle time data */ 266 struct idle_thread_runtime { 267 struct thread_runtime tr; 268 struct thread *last_thread; 269 struct rb_root_cached sorted_root; 270 struct callchain_root callchain; 271 struct callchain_cursor cursor; 272 }; 273 274 /* track idle times per cpu */ 275 static struct thread **idle_threads; 276 static int idle_max_cpu; 277 static char idle_comm[] = "<idle>"; 278 279 static u64 get_nsecs(void) 280 { 281 struct timespec ts; 282 283 clock_gettime(CLOCK_MONOTONIC, &ts); 284 285 return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; 286 } 287 288 static void burn_nsecs(struct perf_sched *sched, u64 nsecs) 289 { 290 u64 T0 = get_nsecs(), T1; 291 292 do { 293 T1 = get_nsecs(); 294 } while (T1 + sched->run_measurement_overhead < T0 + nsecs); 295 } 296 297 static void sleep_nsecs(u64 nsecs) 298 { 299 struct timespec ts; 300 301 ts.tv_nsec = nsecs % 999999999; 302 ts.tv_sec = nsecs / 999999999; 303 304 nanosleep(&ts, NULL); 305 } 306 307 static void calibrate_run_measurement_overhead(struct perf_sched *sched) 308 { 309 u64 T0, T1, delta, min_delta = NSEC_PER_SEC; 310 int i; 311 312 for (i = 0; i < 10; i++) { 313 T0 = get_nsecs(); 314 burn_nsecs(sched, 0); 315 T1 = get_nsecs(); 316 delta = T1-T0; 317 min_delta = min(min_delta, delta); 318 } 319 sched->run_measurement_overhead = min_delta; 320 321 printf("run measurement overhead: %" PRIu64 " nsecs\n", min_delta); 322 } 323 324 static void calibrate_sleep_measurement_overhead(struct perf_sched *sched) 325 { 326 u64 T0, T1, delta, min_delta = NSEC_PER_SEC; 327 int i; 328 329 for (i = 0; i < 10; i++) { 330 T0 = get_nsecs(); 331 sleep_nsecs(10000); 332 T1 = get_nsecs(); 333 delta = T1-T0; 334 min_delta = min(min_delta, delta); 335 } 336 min_delta -= 10000; 337 sched->sleep_measurement_overhead = min_delta; 338 339 printf("sleep measurement overhead: %" PRIu64 " nsecs\n", min_delta); 340 } 341 342 static struct sched_atom * 343 get_new_event(struct task_desc *task, u64 timestamp) 344 { 345 struct sched_atom *event = zalloc(sizeof(*event)); 346 unsigned long idx = task->nr_events; 347 size_t size; 348 349 event->timestamp = timestamp; 350 event->nr = idx; 351 352 task->nr_events++; 353 size = sizeof(struct sched_atom *) * task->nr_events; 354 task->atoms = realloc(task->atoms, size); 355 BUG_ON(!task->atoms); 356 357 task->atoms[idx] = event; 358 359 return event; 360 } 361 362 static struct sched_atom *last_event(struct task_desc *task) 363 { 364 if (!task->nr_events) 365 return NULL; 366 367 return task->atoms[task->nr_events - 1]; 368 } 369 370 static void add_sched_event_run(struct perf_sched *sched, struct task_desc *task, 371 u64 timestamp, u64 duration) 372 { 373 struct sched_atom *event, *curr_event = last_event(task); 374 375 /* 376 * optimize an existing RUN event by merging this one 377 * to it: 378 */ 379 if (curr_event && curr_event->type == SCHED_EVENT_RUN) { 380 sched->nr_run_events_optimized++; 381 curr_event->duration += duration; 382 return; 383 } 384 385 event = get_new_event(task, timestamp); 386 387 event->type = SCHED_EVENT_RUN; 388 event->duration = duration; 389 390 sched->nr_run_events++; 391 } 392 393 static void add_sched_event_wakeup(struct perf_sched *sched, struct task_desc *task, 394 u64 timestamp, struct task_desc *wakee) 395 { 396 struct sched_atom *event, *wakee_event; 397 398 event = get_new_event(task, timestamp); 399 event->type = SCHED_EVENT_WAKEUP; 400 event->wakee = wakee; 401 402 wakee_event = last_event(wakee); 403 if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) { 404 sched->targetless_wakeups++; 405 return; 406 } 407 if (wakee_event->wait_sem) { 408 sched->multitarget_wakeups++; 409 return; 410 } 411 412 wakee_event->wait_sem = zalloc(sizeof(*wakee_event->wait_sem)); 413 sem_init(wakee_event->wait_sem, 0, 0); 414 wakee_event->specific_wait = 1; 415 event->wait_sem = wakee_event->wait_sem; 416 417 sched->nr_wakeup_events++; 418 } 419 420 static void add_sched_event_sleep(struct perf_sched *sched, struct task_desc *task, 421 u64 timestamp, const char task_state __maybe_unused) 422 { 423 struct sched_atom *event = get_new_event(task, timestamp); 424 425 event->type = SCHED_EVENT_SLEEP; 426 427 sched->nr_sleep_events++; 428 } 429 430 static struct task_desc *register_pid(struct perf_sched *sched, 431 unsigned long pid, const char *comm) 432 { 433 struct task_desc *task; 434 static int pid_max; 435 436 if (sched->pid_to_task == NULL) { 437 if (sysctl__read_int("kernel/pid_max", &pid_max) < 0) 438 pid_max = MAX_PID; 439 BUG_ON((sched->pid_to_task = calloc(pid_max, sizeof(struct task_desc *))) == NULL); 440 } 441 if (pid >= (unsigned long)pid_max) { 442 BUG_ON((sched->pid_to_task = realloc(sched->pid_to_task, (pid + 1) * 443 sizeof(struct task_desc *))) == NULL); 444 while (pid >= (unsigned long)pid_max) 445 sched->pid_to_task[pid_max++] = NULL; 446 } 447 448 task = sched->pid_to_task[pid]; 449 450 if (task) 451 return task; 452 453 task = zalloc(sizeof(*task)); 454 task->pid = pid; 455 task->nr = sched->nr_tasks; 456 strcpy(task->comm, comm); 457 /* 458 * every task starts in sleeping state - this gets ignored 459 * if there's no wakeup pointing to this sleep state: 460 */ 461 add_sched_event_sleep(sched, task, 0, 0); 462 463 sched->pid_to_task[pid] = task; 464 sched->nr_tasks++; 465 sched->tasks = realloc(sched->tasks, sched->nr_tasks * sizeof(struct task_desc *)); 466 BUG_ON(!sched->tasks); 467 sched->tasks[task->nr] = task; 468 469 if (verbose > 0) 470 printf("registered task #%ld, PID %ld (%s)\n", sched->nr_tasks, pid, comm); 471 472 return task; 473 } 474 475 476 static void print_task_traces(struct perf_sched *sched) 477 { 478 struct task_desc *task; 479 unsigned long i; 480 481 for (i = 0; i < sched->nr_tasks; i++) { 482 task = sched->tasks[i]; 483 printf("task %6ld (%20s:%10ld), nr_events: %ld\n", 484 task->nr, task->comm, task->pid, task->nr_events); 485 } 486 } 487 488 static void add_cross_task_wakeups(struct perf_sched *sched) 489 { 490 struct task_desc *task1, *task2; 491 unsigned long i, j; 492 493 for (i = 0; i < sched->nr_tasks; i++) { 494 task1 = sched->tasks[i]; 495 j = i + 1; 496 if (j == sched->nr_tasks) 497 j = 0; 498 task2 = sched->tasks[j]; 499 add_sched_event_wakeup(sched, task1, 0, task2); 500 } 501 } 502 503 static void perf_sched__process_event(struct perf_sched *sched, 504 struct sched_atom *atom) 505 { 506 int ret = 0; 507 508 switch (atom->type) { 509 case SCHED_EVENT_RUN: 510 burn_nsecs(sched, atom->duration); 511 break; 512 case SCHED_EVENT_SLEEP: 513 if (atom->wait_sem) 514 ret = sem_wait(atom->wait_sem); 515 BUG_ON(ret); 516 break; 517 case SCHED_EVENT_WAKEUP: 518 if (atom->wait_sem) 519 ret = sem_post(atom->wait_sem); 520 BUG_ON(ret); 521 break; 522 case SCHED_EVENT_MIGRATION: 523 break; 524 default: 525 BUG_ON(1); 526 } 527 } 528 529 static u64 get_cpu_usage_nsec_parent(void) 530 { 531 struct rusage ru; 532 u64 sum; 533 int err; 534 535 err = getrusage(RUSAGE_SELF, &ru); 536 BUG_ON(err); 537 538 sum = ru.ru_utime.tv_sec * NSEC_PER_SEC + ru.ru_utime.tv_usec * NSEC_PER_USEC; 539 sum += ru.ru_stime.tv_sec * NSEC_PER_SEC + ru.ru_stime.tv_usec * NSEC_PER_USEC; 540 541 return sum; 542 } 543 544 static int self_open_counters(struct perf_sched *sched, unsigned long cur_task) 545 { 546 struct perf_event_attr attr; 547 char sbuf[STRERR_BUFSIZE], info[STRERR_BUFSIZE]; 548 int fd; 549 struct rlimit limit; 550 bool need_privilege = false; 551 552 memset(&attr, 0, sizeof(attr)); 553 554 attr.type = PERF_TYPE_SOFTWARE; 555 attr.config = PERF_COUNT_SW_TASK_CLOCK; 556 557 force_again: 558 fd = sys_perf_event_open(&attr, 0, -1, -1, 559 perf_event_open_cloexec_flag()); 560 561 if (fd < 0) { 562 if (errno == EMFILE) { 563 if (sched->force) { 564 BUG_ON(getrlimit(RLIMIT_NOFILE, &limit) == -1); 565 limit.rlim_cur += sched->nr_tasks - cur_task; 566 if (limit.rlim_cur > limit.rlim_max) { 567 limit.rlim_max = limit.rlim_cur; 568 need_privilege = true; 569 } 570 if (setrlimit(RLIMIT_NOFILE, &limit) == -1) { 571 if (need_privilege && errno == EPERM) 572 strcpy(info, "Need privilege\n"); 573 } else 574 goto force_again; 575 } else 576 strcpy(info, "Have a try with -f option\n"); 577 } 578 pr_err("Error: sys_perf_event_open() syscall returned " 579 "with %d (%s)\n%s", fd, 580 str_error_r(errno, sbuf, sizeof(sbuf)), info); 581 exit(EXIT_FAILURE); 582 } 583 return fd; 584 } 585 586 static u64 get_cpu_usage_nsec_self(int fd) 587 { 588 u64 runtime; 589 int ret; 590 591 ret = read(fd, &runtime, sizeof(runtime)); 592 BUG_ON(ret != sizeof(runtime)); 593 594 return runtime; 595 } 596 597 struct sched_thread_parms { 598 struct task_desc *task; 599 struct perf_sched *sched; 600 int fd; 601 }; 602 603 static void *thread_func(void *ctx) 604 { 605 struct sched_thread_parms *parms = ctx; 606 struct task_desc *this_task = parms->task; 607 struct perf_sched *sched = parms->sched; 608 u64 cpu_usage_0, cpu_usage_1; 609 unsigned long i, ret; 610 char comm2[22]; 611 int fd = parms->fd; 612 613 zfree(&parms); 614 615 sprintf(comm2, ":%s", this_task->comm); 616 prctl(PR_SET_NAME, comm2); 617 if (fd < 0) 618 return NULL; 619 620 while (!sched->thread_funcs_exit) { 621 ret = sem_post(&this_task->ready_for_work); 622 BUG_ON(ret); 623 mutex_lock(&sched->start_work_mutex); 624 mutex_unlock(&sched->start_work_mutex); 625 626 cpu_usage_0 = get_cpu_usage_nsec_self(fd); 627 628 for (i = 0; i < this_task->nr_events; i++) { 629 this_task->curr_event = i; 630 perf_sched__process_event(sched, this_task->atoms[i]); 631 } 632 633 cpu_usage_1 = get_cpu_usage_nsec_self(fd); 634 this_task->cpu_usage = cpu_usage_1 - cpu_usage_0; 635 ret = sem_post(&this_task->work_done_sem); 636 BUG_ON(ret); 637 638 mutex_lock(&sched->work_done_wait_mutex); 639 mutex_unlock(&sched->work_done_wait_mutex); 640 } 641 return NULL; 642 } 643 644 static void create_tasks(struct perf_sched *sched) 645 EXCLUSIVE_LOCK_FUNCTION(sched->start_work_mutex) 646 EXCLUSIVE_LOCK_FUNCTION(sched->work_done_wait_mutex) 647 { 648 struct task_desc *task; 649 pthread_attr_t attr; 650 unsigned long i; 651 int err; 652 653 err = pthread_attr_init(&attr); 654 BUG_ON(err); 655 err = pthread_attr_setstacksize(&attr, 656 (size_t) max(16 * 1024, (int)PTHREAD_STACK_MIN)); 657 BUG_ON(err); 658 mutex_lock(&sched->start_work_mutex); 659 mutex_lock(&sched->work_done_wait_mutex); 660 for (i = 0; i < sched->nr_tasks; i++) { 661 struct sched_thread_parms *parms = malloc(sizeof(*parms)); 662 BUG_ON(parms == NULL); 663 parms->task = task = sched->tasks[i]; 664 parms->sched = sched; 665 parms->fd = self_open_counters(sched, i); 666 sem_init(&task->sleep_sem, 0, 0); 667 sem_init(&task->ready_for_work, 0, 0); 668 sem_init(&task->work_done_sem, 0, 0); 669 task->curr_event = 0; 670 err = pthread_create(&task->thread, &attr, thread_func, parms); 671 BUG_ON(err); 672 } 673 } 674 675 static void destroy_tasks(struct perf_sched *sched) 676 UNLOCK_FUNCTION(sched->start_work_mutex) 677 UNLOCK_FUNCTION(sched->work_done_wait_mutex) 678 { 679 struct task_desc *task; 680 unsigned long i; 681 int err; 682 683 mutex_unlock(&sched->start_work_mutex); 684 mutex_unlock(&sched->work_done_wait_mutex); 685 /* Get rid of threads so they won't be upset by mutex destrunction */ 686 for (i = 0; i < sched->nr_tasks; i++) { 687 task = sched->tasks[i]; 688 err = pthread_join(task->thread, NULL); 689 BUG_ON(err); 690 sem_destroy(&task->sleep_sem); 691 sem_destroy(&task->ready_for_work); 692 sem_destroy(&task->work_done_sem); 693 } 694 } 695 696 static void wait_for_tasks(struct perf_sched *sched) 697 EXCLUSIVE_LOCKS_REQUIRED(sched->work_done_wait_mutex) 698 EXCLUSIVE_LOCKS_REQUIRED(sched->start_work_mutex) 699 { 700 u64 cpu_usage_0, cpu_usage_1; 701 struct task_desc *task; 702 unsigned long i, ret; 703 704 sched->start_time = get_nsecs(); 705 sched->cpu_usage = 0; 706 mutex_unlock(&sched->work_done_wait_mutex); 707 708 for (i = 0; i < sched->nr_tasks; i++) { 709 task = sched->tasks[i]; 710 ret = sem_wait(&task->ready_for_work); 711 BUG_ON(ret); 712 sem_init(&task->ready_for_work, 0, 0); 713 } 714 mutex_lock(&sched->work_done_wait_mutex); 715 716 cpu_usage_0 = get_cpu_usage_nsec_parent(); 717 718 mutex_unlock(&sched->start_work_mutex); 719 720 for (i = 0; i < sched->nr_tasks; i++) { 721 task = sched->tasks[i]; 722 ret = sem_wait(&task->work_done_sem); 723 BUG_ON(ret); 724 sem_init(&task->work_done_sem, 0, 0); 725 sched->cpu_usage += task->cpu_usage; 726 task->cpu_usage = 0; 727 } 728 729 cpu_usage_1 = get_cpu_usage_nsec_parent(); 730 if (!sched->runavg_cpu_usage) 731 sched->runavg_cpu_usage = sched->cpu_usage; 732 sched->runavg_cpu_usage = (sched->runavg_cpu_usage * (sched->replay_repeat - 1) + sched->cpu_usage) / sched->replay_repeat; 733 734 sched->parent_cpu_usage = cpu_usage_1 - cpu_usage_0; 735 if (!sched->runavg_parent_cpu_usage) 736 sched->runavg_parent_cpu_usage = sched->parent_cpu_usage; 737 sched->runavg_parent_cpu_usage = (sched->runavg_parent_cpu_usage * (sched->replay_repeat - 1) + 738 sched->parent_cpu_usage)/sched->replay_repeat; 739 740 mutex_lock(&sched->start_work_mutex); 741 742 for (i = 0; i < sched->nr_tasks; i++) { 743 task = sched->tasks[i]; 744 sem_init(&task->sleep_sem, 0, 0); 745 task->curr_event = 0; 746 } 747 } 748 749 static void run_one_test(struct perf_sched *sched) 750 EXCLUSIVE_LOCKS_REQUIRED(sched->work_done_wait_mutex) 751 EXCLUSIVE_LOCKS_REQUIRED(sched->start_work_mutex) 752 { 753 u64 T0, T1, delta, avg_delta, fluct; 754 755 T0 = get_nsecs(); 756 wait_for_tasks(sched); 757 T1 = get_nsecs(); 758 759 delta = T1 - T0; 760 sched->sum_runtime += delta; 761 sched->nr_runs++; 762 763 avg_delta = sched->sum_runtime / sched->nr_runs; 764 if (delta < avg_delta) 765 fluct = avg_delta - delta; 766 else 767 fluct = delta - avg_delta; 768 sched->sum_fluct += fluct; 769 if (!sched->run_avg) 770 sched->run_avg = delta; 771 sched->run_avg = (sched->run_avg * (sched->replay_repeat - 1) + delta) / sched->replay_repeat; 772 773 printf("#%-3ld: %0.3f, ", sched->nr_runs, (double)delta / NSEC_PER_MSEC); 774 775 printf("ravg: %0.2f, ", (double)sched->run_avg / NSEC_PER_MSEC); 776 777 printf("cpu: %0.2f / %0.2f", 778 (double)sched->cpu_usage / NSEC_PER_MSEC, (double)sched->runavg_cpu_usage / NSEC_PER_MSEC); 779 780 #if 0 781 /* 782 * rusage statistics done by the parent, these are less 783 * accurate than the sched->sum_exec_runtime based statistics: 784 */ 785 printf(" [%0.2f / %0.2f]", 786 (double)sched->parent_cpu_usage / NSEC_PER_MSEC, 787 (double)sched->runavg_parent_cpu_usage / NSEC_PER_MSEC); 788 #endif 789 790 printf("\n"); 791 792 if (sched->nr_sleep_corrections) 793 printf(" (%ld sleep corrections)\n", sched->nr_sleep_corrections); 794 sched->nr_sleep_corrections = 0; 795 } 796 797 static void test_calibrations(struct perf_sched *sched) 798 { 799 u64 T0, T1; 800 801 T0 = get_nsecs(); 802 burn_nsecs(sched, NSEC_PER_MSEC); 803 T1 = get_nsecs(); 804 805 printf("the run test took %" PRIu64 " nsecs\n", T1 - T0); 806 807 T0 = get_nsecs(); 808 sleep_nsecs(NSEC_PER_MSEC); 809 T1 = get_nsecs(); 810 811 printf("the sleep test took %" PRIu64 " nsecs\n", T1 - T0); 812 } 813 814 static int 815 replay_wakeup_event(struct perf_sched *sched, 816 struct evsel *evsel, struct perf_sample *sample, 817 struct machine *machine __maybe_unused) 818 { 819 const char *comm = evsel__strval(evsel, sample, "comm"); 820 const u32 pid = evsel__intval(evsel, sample, "pid"); 821 struct task_desc *waker, *wakee; 822 823 if (verbose > 0) { 824 printf("sched_wakeup event %p\n", evsel); 825 826 printf(" ... pid %d woke up %s/%d\n", sample->tid, comm, pid); 827 } 828 829 waker = register_pid(sched, sample->tid, "<unknown>"); 830 wakee = register_pid(sched, pid, comm); 831 832 add_sched_event_wakeup(sched, waker, sample->time, wakee); 833 return 0; 834 } 835 836 static int replay_switch_event(struct perf_sched *sched, 837 struct evsel *evsel, 838 struct perf_sample *sample, 839 struct machine *machine __maybe_unused) 840 { 841 const char *prev_comm = evsel__strval(evsel, sample, "prev_comm"), 842 *next_comm = evsel__strval(evsel, sample, "next_comm"); 843 const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"), 844 next_pid = evsel__intval(evsel, sample, "next_pid"); 845 const char prev_state = evsel__taskstate(evsel, sample, "prev_state"); 846 struct task_desc *prev, __maybe_unused *next; 847 u64 timestamp0, timestamp = sample->time; 848 int cpu = sample->cpu; 849 s64 delta; 850 851 if (verbose > 0) 852 printf("sched_switch event %p\n", evsel); 853 854 if (cpu >= MAX_CPUS || cpu < 0) 855 return 0; 856 857 timestamp0 = sched->cpu_last_switched[cpu]; 858 if (timestamp0) 859 delta = timestamp - timestamp0; 860 else 861 delta = 0; 862 863 if (delta < 0) { 864 pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta); 865 return -1; 866 } 867 868 pr_debug(" ... switch from %s/%d to %s/%d [ran %" PRIu64 " nsecs]\n", 869 prev_comm, prev_pid, next_comm, next_pid, delta); 870 871 prev = register_pid(sched, prev_pid, prev_comm); 872 next = register_pid(sched, next_pid, next_comm); 873 874 sched->cpu_last_switched[cpu] = timestamp; 875 876 add_sched_event_run(sched, prev, timestamp, delta); 877 add_sched_event_sleep(sched, prev, timestamp, prev_state); 878 879 return 0; 880 } 881 882 static int replay_fork_event(struct perf_sched *sched, 883 union perf_event *event, 884 struct machine *machine) 885 { 886 struct thread *child, *parent; 887 888 child = machine__findnew_thread(machine, event->fork.pid, 889 event->fork.tid); 890 parent = machine__findnew_thread(machine, event->fork.ppid, 891 event->fork.ptid); 892 893 if (child == NULL || parent == NULL) { 894 pr_debug("thread does not exist on fork event: child %p, parent %p\n", 895 child, parent); 896 goto out_put; 897 } 898 899 if (verbose > 0) { 900 printf("fork event\n"); 901 printf("... parent: %s/%d\n", thread__comm_str(parent), thread__tid(parent)); 902 printf("... child: %s/%d\n", thread__comm_str(child), thread__tid(child)); 903 } 904 905 register_pid(sched, thread__tid(parent), thread__comm_str(parent)); 906 register_pid(sched, thread__tid(child), thread__comm_str(child)); 907 out_put: 908 thread__put(child); 909 thread__put(parent); 910 return 0; 911 } 912 913 struct sort_dimension { 914 const char *name; 915 sort_fn_t cmp; 916 struct list_head list; 917 }; 918 919 /* 920 * handle runtime stats saved per thread 921 */ 922 static struct thread_runtime *thread__init_runtime(struct thread *thread) 923 { 924 struct thread_runtime *r; 925 926 r = zalloc(sizeof(struct thread_runtime)); 927 if (!r) 928 return NULL; 929 930 init_stats(&r->run_stats); 931 thread__set_priv(thread, r); 932 933 return r; 934 } 935 936 static struct thread_runtime *thread__get_runtime(struct thread *thread) 937 { 938 struct thread_runtime *tr; 939 940 tr = thread__priv(thread); 941 if (tr == NULL) { 942 tr = thread__init_runtime(thread); 943 if (tr == NULL) 944 pr_debug("Failed to malloc memory for runtime data.\n"); 945 } 946 947 return tr; 948 } 949 950 static int 951 thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r) 952 { 953 struct sort_dimension *sort; 954 int ret = 0; 955 956 BUG_ON(list_empty(list)); 957 958 list_for_each_entry(sort, list, list) { 959 ret = sort->cmp(l, r); 960 if (ret) 961 return ret; 962 } 963 964 return ret; 965 } 966 967 static struct work_atoms * 968 thread_atoms_search(struct rb_root_cached *root, struct thread *thread, 969 struct list_head *sort_list) 970 { 971 struct rb_node *node = root->rb_root.rb_node; 972 struct work_atoms key = { .thread = thread }; 973 974 while (node) { 975 struct work_atoms *atoms; 976 int cmp; 977 978 atoms = container_of(node, struct work_atoms, node); 979 980 cmp = thread_lat_cmp(sort_list, &key, atoms); 981 if (cmp > 0) 982 node = node->rb_left; 983 else if (cmp < 0) 984 node = node->rb_right; 985 else { 986 BUG_ON(thread != atoms->thread); 987 return atoms; 988 } 989 } 990 return NULL; 991 } 992 993 static void 994 __thread_latency_insert(struct rb_root_cached *root, struct work_atoms *data, 995 struct list_head *sort_list) 996 { 997 struct rb_node **new = &(root->rb_root.rb_node), *parent = NULL; 998 bool leftmost = true; 999 1000 while (*new) { 1001 struct work_atoms *this; 1002 int cmp; 1003 1004 this = container_of(*new, struct work_atoms, node); 1005 parent = *new; 1006 1007 cmp = thread_lat_cmp(sort_list, data, this); 1008 1009 if (cmp > 0) 1010 new = &((*new)->rb_left); 1011 else { 1012 new = &((*new)->rb_right); 1013 leftmost = false; 1014 } 1015 } 1016 1017 rb_link_node(&data->node, parent, new); 1018 rb_insert_color_cached(&data->node, root, leftmost); 1019 } 1020 1021 static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread) 1022 { 1023 struct work_atoms *atoms = zalloc(sizeof(*atoms)); 1024 if (!atoms) { 1025 pr_err("No memory at %s\n", __func__); 1026 return -1; 1027 } 1028 1029 atoms->thread = thread__get(thread); 1030 INIT_LIST_HEAD(&atoms->work_list); 1031 __thread_latency_insert(&sched->atom_root, atoms, &sched->cmp_pid); 1032 return 0; 1033 } 1034 1035 static int 1036 add_sched_out_event(struct work_atoms *atoms, 1037 char run_state, 1038 u64 timestamp) 1039 { 1040 struct work_atom *atom = zalloc(sizeof(*atom)); 1041 if (!atom) { 1042 pr_err("Non memory at %s", __func__); 1043 return -1; 1044 } 1045 1046 atom->sched_out_time = timestamp; 1047 1048 if (run_state == 'R') { 1049 atom->state = THREAD_WAIT_CPU; 1050 atom->wake_up_time = atom->sched_out_time; 1051 } 1052 1053 list_add_tail(&atom->list, &atoms->work_list); 1054 return 0; 1055 } 1056 1057 static void 1058 add_runtime_event(struct work_atoms *atoms, u64 delta, 1059 u64 timestamp __maybe_unused) 1060 { 1061 struct work_atom *atom; 1062 1063 BUG_ON(list_empty(&atoms->work_list)); 1064 1065 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1066 1067 atom->runtime += delta; 1068 atoms->total_runtime += delta; 1069 } 1070 1071 static void 1072 add_sched_in_event(struct work_atoms *atoms, u64 timestamp) 1073 { 1074 struct work_atom *atom; 1075 u64 delta; 1076 1077 if (list_empty(&atoms->work_list)) 1078 return; 1079 1080 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1081 1082 if (atom->state != THREAD_WAIT_CPU) 1083 return; 1084 1085 if (timestamp < atom->wake_up_time) { 1086 atom->state = THREAD_IGNORE; 1087 return; 1088 } 1089 1090 atom->state = THREAD_SCHED_IN; 1091 atom->sched_in_time = timestamp; 1092 1093 delta = atom->sched_in_time - atom->wake_up_time; 1094 atoms->total_lat += delta; 1095 if (delta > atoms->max_lat) { 1096 atoms->max_lat = delta; 1097 atoms->max_lat_start = atom->wake_up_time; 1098 atoms->max_lat_end = timestamp; 1099 } 1100 atoms->nb_atoms++; 1101 } 1102 1103 static int latency_switch_event(struct perf_sched *sched, 1104 struct evsel *evsel, 1105 struct perf_sample *sample, 1106 struct machine *machine) 1107 { 1108 const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"), 1109 next_pid = evsel__intval(evsel, sample, "next_pid"); 1110 const char prev_state = evsel__taskstate(evsel, sample, "prev_state"); 1111 struct work_atoms *out_events, *in_events; 1112 struct thread *sched_out, *sched_in; 1113 u64 timestamp0, timestamp = sample->time; 1114 int cpu = sample->cpu, err = -1; 1115 s64 delta; 1116 1117 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1118 1119 timestamp0 = sched->cpu_last_switched[cpu]; 1120 sched->cpu_last_switched[cpu] = timestamp; 1121 if (timestamp0) 1122 delta = timestamp - timestamp0; 1123 else 1124 delta = 0; 1125 1126 if (delta < 0) { 1127 pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1128 return -1; 1129 } 1130 1131 sched_out = machine__findnew_thread(machine, -1, prev_pid); 1132 sched_in = machine__findnew_thread(machine, -1, next_pid); 1133 if (sched_out == NULL || sched_in == NULL) 1134 goto out_put; 1135 1136 out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid); 1137 if (!out_events) { 1138 if (thread_atoms_insert(sched, sched_out)) 1139 goto out_put; 1140 out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid); 1141 if (!out_events) { 1142 pr_err("out-event: Internal tree error"); 1143 goto out_put; 1144 } 1145 } 1146 if (add_sched_out_event(out_events, prev_state, timestamp)) 1147 return -1; 1148 1149 in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid); 1150 if (!in_events) { 1151 if (thread_atoms_insert(sched, sched_in)) 1152 goto out_put; 1153 in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid); 1154 if (!in_events) { 1155 pr_err("in-event: Internal tree error"); 1156 goto out_put; 1157 } 1158 /* 1159 * Take came in we have not heard about yet, 1160 * add in an initial atom in runnable state: 1161 */ 1162 if (add_sched_out_event(in_events, 'R', timestamp)) 1163 goto out_put; 1164 } 1165 add_sched_in_event(in_events, timestamp); 1166 err = 0; 1167 out_put: 1168 thread__put(sched_out); 1169 thread__put(sched_in); 1170 return err; 1171 } 1172 1173 static int latency_runtime_event(struct perf_sched *sched, 1174 struct evsel *evsel, 1175 struct perf_sample *sample, 1176 struct machine *machine) 1177 { 1178 const u32 pid = evsel__intval(evsel, sample, "pid"); 1179 const u64 runtime = evsel__intval(evsel, sample, "runtime"); 1180 struct thread *thread = machine__findnew_thread(machine, -1, pid); 1181 struct work_atoms *atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); 1182 u64 timestamp = sample->time; 1183 int cpu = sample->cpu, err = -1; 1184 1185 if (thread == NULL) 1186 return -1; 1187 1188 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1189 if (!atoms) { 1190 if (thread_atoms_insert(sched, thread)) 1191 goto out_put; 1192 atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); 1193 if (!atoms) { 1194 pr_err("in-event: Internal tree error"); 1195 goto out_put; 1196 } 1197 if (add_sched_out_event(atoms, 'R', timestamp)) 1198 goto out_put; 1199 } 1200 1201 add_runtime_event(atoms, runtime, timestamp); 1202 err = 0; 1203 out_put: 1204 thread__put(thread); 1205 return err; 1206 } 1207 1208 static int latency_wakeup_event(struct perf_sched *sched, 1209 struct evsel *evsel, 1210 struct perf_sample *sample, 1211 struct machine *machine) 1212 { 1213 const u32 pid = evsel__intval(evsel, sample, "pid"); 1214 struct work_atoms *atoms; 1215 struct work_atom *atom; 1216 struct thread *wakee; 1217 u64 timestamp = sample->time; 1218 int err = -1; 1219 1220 wakee = machine__findnew_thread(machine, -1, pid); 1221 if (wakee == NULL) 1222 return -1; 1223 atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid); 1224 if (!atoms) { 1225 if (thread_atoms_insert(sched, wakee)) 1226 goto out_put; 1227 atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid); 1228 if (!atoms) { 1229 pr_err("wakeup-event: Internal tree error"); 1230 goto out_put; 1231 } 1232 if (add_sched_out_event(atoms, 'S', timestamp)) 1233 goto out_put; 1234 } 1235 1236 BUG_ON(list_empty(&atoms->work_list)); 1237 1238 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1239 1240 /* 1241 * As we do not guarantee the wakeup event happens when 1242 * task is out of run queue, also may happen when task is 1243 * on run queue and wakeup only change ->state to TASK_RUNNING, 1244 * then we should not set the ->wake_up_time when wake up a 1245 * task which is on run queue. 1246 * 1247 * You WILL be missing events if you've recorded only 1248 * one CPU, or are only looking at only one, so don't 1249 * skip in this case. 1250 */ 1251 if (sched->profile_cpu == -1 && atom->state != THREAD_SLEEPING) 1252 goto out_ok; 1253 1254 sched->nr_timestamps++; 1255 if (atom->sched_out_time > timestamp) { 1256 sched->nr_unordered_timestamps++; 1257 goto out_ok; 1258 } 1259 1260 atom->state = THREAD_WAIT_CPU; 1261 atom->wake_up_time = timestamp; 1262 out_ok: 1263 err = 0; 1264 out_put: 1265 thread__put(wakee); 1266 return err; 1267 } 1268 1269 static int latency_migrate_task_event(struct perf_sched *sched, 1270 struct evsel *evsel, 1271 struct perf_sample *sample, 1272 struct machine *machine) 1273 { 1274 const u32 pid = evsel__intval(evsel, sample, "pid"); 1275 u64 timestamp = sample->time; 1276 struct work_atoms *atoms; 1277 struct work_atom *atom; 1278 struct thread *migrant; 1279 int err = -1; 1280 1281 /* 1282 * Only need to worry about migration when profiling one CPU. 1283 */ 1284 if (sched->profile_cpu == -1) 1285 return 0; 1286 1287 migrant = machine__findnew_thread(machine, -1, pid); 1288 if (migrant == NULL) 1289 return -1; 1290 atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid); 1291 if (!atoms) { 1292 if (thread_atoms_insert(sched, migrant)) 1293 goto out_put; 1294 register_pid(sched, thread__tid(migrant), thread__comm_str(migrant)); 1295 atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid); 1296 if (!atoms) { 1297 pr_err("migration-event: Internal tree error"); 1298 goto out_put; 1299 } 1300 if (add_sched_out_event(atoms, 'R', timestamp)) 1301 goto out_put; 1302 } 1303 1304 BUG_ON(list_empty(&atoms->work_list)); 1305 1306 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1307 atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp; 1308 1309 sched->nr_timestamps++; 1310 1311 if (atom->sched_out_time > timestamp) 1312 sched->nr_unordered_timestamps++; 1313 err = 0; 1314 out_put: 1315 thread__put(migrant); 1316 return err; 1317 } 1318 1319 static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_list) 1320 { 1321 int i; 1322 int ret; 1323 u64 avg; 1324 char max_lat_start[32], max_lat_end[32]; 1325 1326 if (!work_list->nb_atoms) 1327 return; 1328 /* 1329 * Ignore idle threads: 1330 */ 1331 if (!strcmp(thread__comm_str(work_list->thread), "swapper")) 1332 return; 1333 1334 sched->all_runtime += work_list->total_runtime; 1335 sched->all_count += work_list->nb_atoms; 1336 1337 if (work_list->num_merged > 1) { 1338 ret = printf(" %s:(%d) ", thread__comm_str(work_list->thread), 1339 work_list->num_merged); 1340 } else { 1341 ret = printf(" %s:%d ", thread__comm_str(work_list->thread), 1342 thread__tid(work_list->thread)); 1343 } 1344 1345 for (i = 0; i < 24 - ret; i++) 1346 printf(" "); 1347 1348 avg = work_list->total_lat / work_list->nb_atoms; 1349 timestamp__scnprintf_usec(work_list->max_lat_start, max_lat_start, sizeof(max_lat_start)); 1350 timestamp__scnprintf_usec(work_list->max_lat_end, max_lat_end, sizeof(max_lat_end)); 1351 1352 printf("|%11.3f ms |%9" PRIu64 " | avg:%8.3f ms | max:%8.3f ms | max start: %12s s | max end: %12s s\n", 1353 (double)work_list->total_runtime / NSEC_PER_MSEC, 1354 work_list->nb_atoms, (double)avg / NSEC_PER_MSEC, 1355 (double)work_list->max_lat / NSEC_PER_MSEC, 1356 max_lat_start, max_lat_end); 1357 } 1358 1359 static int pid_cmp(struct work_atoms *l, struct work_atoms *r) 1360 { 1361 pid_t l_tid, r_tid; 1362 1363 if (RC_CHK_EQUAL(l->thread, r->thread)) 1364 return 0; 1365 l_tid = thread__tid(l->thread); 1366 r_tid = thread__tid(r->thread); 1367 if (l_tid < r_tid) 1368 return -1; 1369 if (l_tid > r_tid) 1370 return 1; 1371 return (int)(RC_CHK_ACCESS(l->thread) - RC_CHK_ACCESS(r->thread)); 1372 } 1373 1374 static int avg_cmp(struct work_atoms *l, struct work_atoms *r) 1375 { 1376 u64 avgl, avgr; 1377 1378 if (!l->nb_atoms) 1379 return -1; 1380 1381 if (!r->nb_atoms) 1382 return 1; 1383 1384 avgl = l->total_lat / l->nb_atoms; 1385 avgr = r->total_lat / r->nb_atoms; 1386 1387 if (avgl < avgr) 1388 return -1; 1389 if (avgl > avgr) 1390 return 1; 1391 1392 return 0; 1393 } 1394 1395 static int max_cmp(struct work_atoms *l, struct work_atoms *r) 1396 { 1397 if (l->max_lat < r->max_lat) 1398 return -1; 1399 if (l->max_lat > r->max_lat) 1400 return 1; 1401 1402 return 0; 1403 } 1404 1405 static int switch_cmp(struct work_atoms *l, struct work_atoms *r) 1406 { 1407 if (l->nb_atoms < r->nb_atoms) 1408 return -1; 1409 if (l->nb_atoms > r->nb_atoms) 1410 return 1; 1411 1412 return 0; 1413 } 1414 1415 static int runtime_cmp(struct work_atoms *l, struct work_atoms *r) 1416 { 1417 if (l->total_runtime < r->total_runtime) 1418 return -1; 1419 if (l->total_runtime > r->total_runtime) 1420 return 1; 1421 1422 return 0; 1423 } 1424 1425 static int sort_dimension__add(const char *tok, struct list_head *list) 1426 { 1427 size_t i; 1428 static struct sort_dimension avg_sort_dimension = { 1429 .name = "avg", 1430 .cmp = avg_cmp, 1431 }; 1432 static struct sort_dimension max_sort_dimension = { 1433 .name = "max", 1434 .cmp = max_cmp, 1435 }; 1436 static struct sort_dimension pid_sort_dimension = { 1437 .name = "pid", 1438 .cmp = pid_cmp, 1439 }; 1440 static struct sort_dimension runtime_sort_dimension = { 1441 .name = "runtime", 1442 .cmp = runtime_cmp, 1443 }; 1444 static struct sort_dimension switch_sort_dimension = { 1445 .name = "switch", 1446 .cmp = switch_cmp, 1447 }; 1448 struct sort_dimension *available_sorts[] = { 1449 &pid_sort_dimension, 1450 &avg_sort_dimension, 1451 &max_sort_dimension, 1452 &switch_sort_dimension, 1453 &runtime_sort_dimension, 1454 }; 1455 1456 for (i = 0; i < ARRAY_SIZE(available_sorts); i++) { 1457 if (!strcmp(available_sorts[i]->name, tok)) { 1458 list_add_tail(&available_sorts[i]->list, list); 1459 1460 return 0; 1461 } 1462 } 1463 1464 return -1; 1465 } 1466 1467 static void perf_sched__sort_lat(struct perf_sched *sched) 1468 { 1469 struct rb_node *node; 1470 struct rb_root_cached *root = &sched->atom_root; 1471 again: 1472 for (;;) { 1473 struct work_atoms *data; 1474 node = rb_first_cached(root); 1475 if (!node) 1476 break; 1477 1478 rb_erase_cached(node, root); 1479 data = rb_entry(node, struct work_atoms, node); 1480 __thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list); 1481 } 1482 if (root == &sched->atom_root) { 1483 root = &sched->merged_atom_root; 1484 goto again; 1485 } 1486 } 1487 1488 static int process_sched_wakeup_event(struct perf_tool *tool, 1489 struct evsel *evsel, 1490 struct perf_sample *sample, 1491 struct machine *machine) 1492 { 1493 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1494 1495 if (sched->tp_handler->wakeup_event) 1496 return sched->tp_handler->wakeup_event(sched, evsel, sample, machine); 1497 1498 return 0; 1499 } 1500 1501 static int process_sched_wakeup_ignore(struct perf_tool *tool __maybe_unused, 1502 struct evsel *evsel __maybe_unused, 1503 struct perf_sample *sample __maybe_unused, 1504 struct machine *machine __maybe_unused) 1505 { 1506 return 0; 1507 } 1508 1509 union map_priv { 1510 void *ptr; 1511 bool color; 1512 }; 1513 1514 static bool thread__has_color(struct thread *thread) 1515 { 1516 union map_priv priv = { 1517 .ptr = thread__priv(thread), 1518 }; 1519 1520 return priv.color; 1521 } 1522 1523 static struct thread* 1524 map__findnew_thread(struct perf_sched *sched, struct machine *machine, pid_t pid, pid_t tid) 1525 { 1526 struct thread *thread = machine__findnew_thread(machine, pid, tid); 1527 union map_priv priv = { 1528 .color = false, 1529 }; 1530 1531 if (!sched->map.color_pids || !thread || thread__priv(thread)) 1532 return thread; 1533 1534 if (thread_map__has(sched->map.color_pids, tid)) 1535 priv.color = true; 1536 1537 thread__set_priv(thread, priv.ptr); 1538 return thread; 1539 } 1540 1541 static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, 1542 struct perf_sample *sample, struct machine *machine) 1543 { 1544 const u32 next_pid = evsel__intval(evsel, sample, "next_pid"); 1545 struct thread *sched_in; 1546 struct thread_runtime *tr; 1547 int new_shortname; 1548 u64 timestamp0, timestamp = sample->time; 1549 s64 delta; 1550 int i; 1551 struct perf_cpu this_cpu = { 1552 .cpu = sample->cpu, 1553 }; 1554 int cpus_nr; 1555 bool new_cpu = false; 1556 const char *color = PERF_COLOR_NORMAL; 1557 char stimestamp[32]; 1558 1559 BUG_ON(this_cpu.cpu >= MAX_CPUS || this_cpu.cpu < 0); 1560 1561 if (this_cpu.cpu > sched->max_cpu.cpu) 1562 sched->max_cpu = this_cpu; 1563 1564 if (sched->map.comp) { 1565 cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS); 1566 if (!__test_and_set_bit(this_cpu.cpu, sched->map.comp_cpus_mask)) { 1567 sched->map.comp_cpus[cpus_nr++] = this_cpu; 1568 new_cpu = true; 1569 } 1570 } else 1571 cpus_nr = sched->max_cpu.cpu; 1572 1573 timestamp0 = sched->cpu_last_switched[this_cpu.cpu]; 1574 sched->cpu_last_switched[this_cpu.cpu] = timestamp; 1575 if (timestamp0) 1576 delta = timestamp - timestamp0; 1577 else 1578 delta = 0; 1579 1580 if (delta < 0) { 1581 pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1582 return -1; 1583 } 1584 1585 sched_in = map__findnew_thread(sched, machine, -1, next_pid); 1586 if (sched_in == NULL) 1587 return -1; 1588 1589 tr = thread__get_runtime(sched_in); 1590 if (tr == NULL) { 1591 thread__put(sched_in); 1592 return -1; 1593 } 1594 1595 sched->curr_thread[this_cpu.cpu] = thread__get(sched_in); 1596 1597 printf(" "); 1598 1599 new_shortname = 0; 1600 if (!tr->shortname[0]) { 1601 if (!strcmp(thread__comm_str(sched_in), "swapper")) { 1602 /* 1603 * Don't allocate a letter-number for swapper:0 1604 * as a shortname. Instead, we use '.' for it. 1605 */ 1606 tr->shortname[0] = '.'; 1607 tr->shortname[1] = ' '; 1608 } else { 1609 tr->shortname[0] = sched->next_shortname1; 1610 tr->shortname[1] = sched->next_shortname2; 1611 1612 if (sched->next_shortname1 < 'Z') { 1613 sched->next_shortname1++; 1614 } else { 1615 sched->next_shortname1 = 'A'; 1616 if (sched->next_shortname2 < '9') 1617 sched->next_shortname2++; 1618 else 1619 sched->next_shortname2 = '0'; 1620 } 1621 } 1622 new_shortname = 1; 1623 } 1624 1625 for (i = 0; i < cpus_nr; i++) { 1626 struct perf_cpu cpu = { 1627 .cpu = sched->map.comp ? sched->map.comp_cpus[i].cpu : i, 1628 }; 1629 struct thread *curr_thread = sched->curr_thread[cpu.cpu]; 1630 struct thread_runtime *curr_tr; 1631 const char *pid_color = color; 1632 const char *cpu_color = color; 1633 1634 if (curr_thread && thread__has_color(curr_thread)) 1635 pid_color = COLOR_PIDS; 1636 1637 if (sched->map.cpus && !perf_cpu_map__has(sched->map.cpus, cpu)) 1638 continue; 1639 1640 if (sched->map.color_cpus && perf_cpu_map__has(sched->map.color_cpus, cpu)) 1641 cpu_color = COLOR_CPUS; 1642 1643 if (cpu.cpu != this_cpu.cpu) 1644 color_fprintf(stdout, color, " "); 1645 else 1646 color_fprintf(stdout, cpu_color, "*"); 1647 1648 if (sched->curr_thread[cpu.cpu]) { 1649 curr_tr = thread__get_runtime(sched->curr_thread[cpu.cpu]); 1650 if (curr_tr == NULL) { 1651 thread__put(sched_in); 1652 return -1; 1653 } 1654 color_fprintf(stdout, pid_color, "%2s ", curr_tr->shortname); 1655 } else 1656 color_fprintf(stdout, color, " "); 1657 } 1658 1659 if (sched->map.cpus && !perf_cpu_map__has(sched->map.cpus, this_cpu)) 1660 goto out; 1661 1662 timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp)); 1663 color_fprintf(stdout, color, " %12s secs ", stimestamp); 1664 if (new_shortname || tr->comm_changed || (verbose > 0 && thread__tid(sched_in))) { 1665 const char *pid_color = color; 1666 1667 if (thread__has_color(sched_in)) 1668 pid_color = COLOR_PIDS; 1669 1670 color_fprintf(stdout, pid_color, "%s => %s:%d", 1671 tr->shortname, thread__comm_str(sched_in), thread__tid(sched_in)); 1672 tr->comm_changed = false; 1673 } 1674 1675 if (sched->map.comp && new_cpu) 1676 color_fprintf(stdout, color, " (CPU %d)", this_cpu); 1677 1678 out: 1679 color_fprintf(stdout, color, "\n"); 1680 1681 thread__put(sched_in); 1682 1683 return 0; 1684 } 1685 1686 static int process_sched_switch_event(struct perf_tool *tool, 1687 struct evsel *evsel, 1688 struct perf_sample *sample, 1689 struct machine *machine) 1690 { 1691 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1692 int this_cpu = sample->cpu, err = 0; 1693 u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"), 1694 next_pid = evsel__intval(evsel, sample, "next_pid"); 1695 1696 if (sched->curr_pid[this_cpu] != (u32)-1) { 1697 /* 1698 * Are we trying to switch away a PID that is 1699 * not current? 1700 */ 1701 if (sched->curr_pid[this_cpu] != prev_pid) 1702 sched->nr_context_switch_bugs++; 1703 } 1704 1705 if (sched->tp_handler->switch_event) 1706 err = sched->tp_handler->switch_event(sched, evsel, sample, machine); 1707 1708 sched->curr_pid[this_cpu] = next_pid; 1709 return err; 1710 } 1711 1712 static int process_sched_runtime_event(struct perf_tool *tool, 1713 struct evsel *evsel, 1714 struct perf_sample *sample, 1715 struct machine *machine) 1716 { 1717 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1718 1719 if (sched->tp_handler->runtime_event) 1720 return sched->tp_handler->runtime_event(sched, evsel, sample, machine); 1721 1722 return 0; 1723 } 1724 1725 static int perf_sched__process_fork_event(struct perf_tool *tool, 1726 union perf_event *event, 1727 struct perf_sample *sample, 1728 struct machine *machine) 1729 { 1730 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1731 1732 /* run the fork event through the perf machinery */ 1733 perf_event__process_fork(tool, event, sample, machine); 1734 1735 /* and then run additional processing needed for this command */ 1736 if (sched->tp_handler->fork_event) 1737 return sched->tp_handler->fork_event(sched, event, machine); 1738 1739 return 0; 1740 } 1741 1742 static int process_sched_migrate_task_event(struct perf_tool *tool, 1743 struct evsel *evsel, 1744 struct perf_sample *sample, 1745 struct machine *machine) 1746 { 1747 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1748 1749 if (sched->tp_handler->migrate_task_event) 1750 return sched->tp_handler->migrate_task_event(sched, evsel, sample, machine); 1751 1752 return 0; 1753 } 1754 1755 typedef int (*tracepoint_handler)(struct perf_tool *tool, 1756 struct evsel *evsel, 1757 struct perf_sample *sample, 1758 struct machine *machine); 1759 1760 static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_unused, 1761 union perf_event *event __maybe_unused, 1762 struct perf_sample *sample, 1763 struct evsel *evsel, 1764 struct machine *machine) 1765 { 1766 int err = 0; 1767 1768 if (evsel->handler != NULL) { 1769 tracepoint_handler f = evsel->handler; 1770 err = f(tool, evsel, sample, machine); 1771 } 1772 1773 return err; 1774 } 1775 1776 static int perf_sched__process_comm(struct perf_tool *tool __maybe_unused, 1777 union perf_event *event, 1778 struct perf_sample *sample, 1779 struct machine *machine) 1780 { 1781 struct thread *thread; 1782 struct thread_runtime *tr; 1783 int err; 1784 1785 err = perf_event__process_comm(tool, event, sample, machine); 1786 if (err) 1787 return err; 1788 1789 thread = machine__find_thread(machine, sample->pid, sample->tid); 1790 if (!thread) { 1791 pr_err("Internal error: can't find thread\n"); 1792 return -1; 1793 } 1794 1795 tr = thread__get_runtime(thread); 1796 if (tr == NULL) { 1797 thread__put(thread); 1798 return -1; 1799 } 1800 1801 tr->comm_changed = true; 1802 thread__put(thread); 1803 1804 return 0; 1805 } 1806 1807 static int perf_sched__read_events(struct perf_sched *sched) 1808 { 1809 struct evsel_str_handler handlers[] = { 1810 { "sched:sched_switch", process_sched_switch_event, }, 1811 { "sched:sched_stat_runtime", process_sched_runtime_event, }, 1812 { "sched:sched_wakeup", process_sched_wakeup_event, }, 1813 { "sched:sched_waking", process_sched_wakeup_event, }, 1814 { "sched:sched_wakeup_new", process_sched_wakeup_event, }, 1815 { "sched:sched_migrate_task", process_sched_migrate_task_event, }, 1816 }; 1817 struct perf_session *session; 1818 struct perf_data data = { 1819 .path = input_name, 1820 .mode = PERF_DATA_MODE_READ, 1821 .force = sched->force, 1822 }; 1823 int rc = -1; 1824 1825 session = perf_session__new(&data, &sched->tool); 1826 if (IS_ERR(session)) { 1827 pr_debug("Error creating perf session"); 1828 return PTR_ERR(session); 1829 } 1830 1831 symbol__init(&session->header.env); 1832 1833 /* prefer sched_waking if it is captured */ 1834 if (evlist__find_tracepoint_by_name(session->evlist, "sched:sched_waking")) 1835 handlers[2].handler = process_sched_wakeup_ignore; 1836 1837 if (perf_session__set_tracepoints_handlers(session, handlers)) 1838 goto out_delete; 1839 1840 if (perf_session__has_traces(session, "record -R")) { 1841 int err = perf_session__process_events(session); 1842 if (err) { 1843 pr_err("Failed to process events, error %d", err); 1844 goto out_delete; 1845 } 1846 1847 sched->nr_events = session->evlist->stats.nr_events[0]; 1848 sched->nr_lost_events = session->evlist->stats.total_lost; 1849 sched->nr_lost_chunks = session->evlist->stats.nr_events[PERF_RECORD_LOST]; 1850 } 1851 1852 rc = 0; 1853 out_delete: 1854 perf_session__delete(session); 1855 return rc; 1856 } 1857 1858 /* 1859 * scheduling times are printed as msec.usec 1860 */ 1861 static inline void print_sched_time(unsigned long long nsecs, int width) 1862 { 1863 unsigned long msecs; 1864 unsigned long usecs; 1865 1866 msecs = nsecs / NSEC_PER_MSEC; 1867 nsecs -= msecs * NSEC_PER_MSEC; 1868 usecs = nsecs / NSEC_PER_USEC; 1869 printf("%*lu.%03lu ", width, msecs, usecs); 1870 } 1871 1872 /* 1873 * returns runtime data for event, allocating memory for it the 1874 * first time it is used. 1875 */ 1876 static struct evsel_runtime *evsel__get_runtime(struct evsel *evsel) 1877 { 1878 struct evsel_runtime *r = evsel->priv; 1879 1880 if (r == NULL) { 1881 r = zalloc(sizeof(struct evsel_runtime)); 1882 evsel->priv = r; 1883 } 1884 1885 return r; 1886 } 1887 1888 /* 1889 * save last time event was seen per cpu 1890 */ 1891 static void evsel__save_time(struct evsel *evsel, u64 timestamp, u32 cpu) 1892 { 1893 struct evsel_runtime *r = evsel__get_runtime(evsel); 1894 1895 if (r == NULL) 1896 return; 1897 1898 if ((cpu >= r->ncpu) || (r->last_time == NULL)) { 1899 int i, n = __roundup_pow_of_two(cpu+1); 1900 void *p = r->last_time; 1901 1902 p = realloc(r->last_time, n * sizeof(u64)); 1903 if (!p) 1904 return; 1905 1906 r->last_time = p; 1907 for (i = r->ncpu; i < n; ++i) 1908 r->last_time[i] = (u64) 0; 1909 1910 r->ncpu = n; 1911 } 1912 1913 r->last_time[cpu] = timestamp; 1914 } 1915 1916 /* returns last time this event was seen on the given cpu */ 1917 static u64 evsel__get_time(struct evsel *evsel, u32 cpu) 1918 { 1919 struct evsel_runtime *r = evsel__get_runtime(evsel); 1920 1921 if ((r == NULL) || (r->last_time == NULL) || (cpu >= r->ncpu)) 1922 return 0; 1923 1924 return r->last_time[cpu]; 1925 } 1926 1927 static int comm_width = 30; 1928 1929 static char *timehist_get_commstr(struct thread *thread) 1930 { 1931 static char str[32]; 1932 const char *comm = thread__comm_str(thread); 1933 pid_t tid = thread__tid(thread); 1934 pid_t pid = thread__pid(thread); 1935 int n; 1936 1937 if (pid == 0) 1938 n = scnprintf(str, sizeof(str), "%s", comm); 1939 1940 else if (tid != pid) 1941 n = scnprintf(str, sizeof(str), "%s[%d/%d]", comm, tid, pid); 1942 1943 else 1944 n = scnprintf(str, sizeof(str), "%s[%d]", comm, tid); 1945 1946 if (n > comm_width) 1947 comm_width = n; 1948 1949 return str; 1950 } 1951 1952 static void timehist_header(struct perf_sched *sched) 1953 { 1954 u32 ncpus = sched->max_cpu.cpu + 1; 1955 u32 i, j; 1956 1957 printf("%15s %6s ", "time", "cpu"); 1958 1959 if (sched->show_cpu_visual) { 1960 printf(" "); 1961 for (i = 0, j = 0; i < ncpus; ++i) { 1962 printf("%x", j++); 1963 if (j > 15) 1964 j = 0; 1965 } 1966 printf(" "); 1967 } 1968 1969 printf(" %-*s %9s %9s %9s", comm_width, 1970 "task name", "wait time", "sch delay", "run time"); 1971 1972 if (sched->show_state) 1973 printf(" %s", "state"); 1974 1975 printf("\n"); 1976 1977 /* 1978 * units row 1979 */ 1980 printf("%15s %-6s ", "", ""); 1981 1982 if (sched->show_cpu_visual) 1983 printf(" %*s ", ncpus, ""); 1984 1985 printf(" %-*s %9s %9s %9s", comm_width, 1986 "[tid/pid]", "(msec)", "(msec)", "(msec)"); 1987 1988 if (sched->show_state) 1989 printf(" %5s", ""); 1990 1991 printf("\n"); 1992 1993 /* 1994 * separator 1995 */ 1996 printf("%.15s %.6s ", graph_dotted_line, graph_dotted_line); 1997 1998 if (sched->show_cpu_visual) 1999 printf(" %.*s ", ncpus, graph_dotted_line); 2000 2001 printf(" %.*s %.9s %.9s %.9s", comm_width, 2002 graph_dotted_line, graph_dotted_line, graph_dotted_line, 2003 graph_dotted_line); 2004 2005 if (sched->show_state) 2006 printf(" %.5s", graph_dotted_line); 2007 2008 printf("\n"); 2009 } 2010 2011 static void timehist_print_sample(struct perf_sched *sched, 2012 struct evsel *evsel, 2013 struct perf_sample *sample, 2014 struct addr_location *al, 2015 struct thread *thread, 2016 u64 t, const char state) 2017 { 2018 struct thread_runtime *tr = thread__priv(thread); 2019 const char *next_comm = evsel__strval(evsel, sample, "next_comm"); 2020 const u32 next_pid = evsel__intval(evsel, sample, "next_pid"); 2021 u32 max_cpus = sched->max_cpu.cpu + 1; 2022 char tstr[64]; 2023 char nstr[30]; 2024 u64 wait_time; 2025 2026 if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) 2027 return; 2028 2029 timestamp__scnprintf_usec(t, tstr, sizeof(tstr)); 2030 printf("%15s [%04d] ", tstr, sample->cpu); 2031 2032 if (sched->show_cpu_visual) { 2033 u32 i; 2034 char c; 2035 2036 printf(" "); 2037 for (i = 0; i < max_cpus; ++i) { 2038 /* flag idle times with 'i'; others are sched events */ 2039 if (i == sample->cpu) 2040 c = (thread__tid(thread) == 0) ? 'i' : 's'; 2041 else 2042 c = ' '; 2043 printf("%c", c); 2044 } 2045 printf(" "); 2046 } 2047 2048 printf(" %-*s ", comm_width, timehist_get_commstr(thread)); 2049 2050 wait_time = tr->dt_sleep + tr->dt_iowait + tr->dt_preempt; 2051 print_sched_time(wait_time, 6); 2052 2053 print_sched_time(tr->dt_delay, 6); 2054 print_sched_time(tr->dt_run, 6); 2055 2056 if (sched->show_state) 2057 printf(" %5c ", thread__tid(thread) == 0 ? 'I' : state); 2058 2059 if (sched->show_next) { 2060 snprintf(nstr, sizeof(nstr), "next: %s[%d]", next_comm, next_pid); 2061 printf(" %-*s", comm_width, nstr); 2062 } 2063 2064 if (sched->show_wakeups && !sched->show_next) 2065 printf(" %-*s", comm_width, ""); 2066 2067 if (thread__tid(thread) == 0) 2068 goto out; 2069 2070 if (sched->show_callchain) 2071 printf(" "); 2072 2073 sample__fprintf_sym(sample, al, 0, 2074 EVSEL__PRINT_SYM | EVSEL__PRINT_ONELINE | 2075 EVSEL__PRINT_CALLCHAIN_ARROW | 2076 EVSEL__PRINT_SKIP_IGNORED, 2077 get_tls_callchain_cursor(), symbol_conf.bt_stop_list, stdout); 2078 2079 out: 2080 printf("\n"); 2081 } 2082 2083 /* 2084 * Explanation of delta-time stats: 2085 * 2086 * t = time of current schedule out event 2087 * tprev = time of previous sched out event 2088 * also time of schedule-in event for current task 2089 * last_time = time of last sched change event for current task 2090 * (i.e, time process was last scheduled out) 2091 * ready_to_run = time of wakeup for current task 2092 * 2093 * -----|------------|------------|------------|------ 2094 * last ready tprev t 2095 * time to run 2096 * 2097 * |-------- dt_wait --------| 2098 * |- dt_delay -|-- dt_run --| 2099 * 2100 * dt_run = run time of current task 2101 * dt_wait = time between last schedule out event for task and tprev 2102 * represents time spent off the cpu 2103 * dt_delay = time between wakeup and schedule-in of task 2104 */ 2105 2106 static void timehist_update_runtime_stats(struct thread_runtime *r, 2107 u64 t, u64 tprev) 2108 { 2109 r->dt_delay = 0; 2110 r->dt_sleep = 0; 2111 r->dt_iowait = 0; 2112 r->dt_preempt = 0; 2113 r->dt_run = 0; 2114 2115 if (tprev) { 2116 r->dt_run = t - tprev; 2117 if (r->ready_to_run) { 2118 if (r->ready_to_run > tprev) 2119 pr_debug("time travel: wakeup time for task > previous sched_switch event\n"); 2120 else 2121 r->dt_delay = tprev - r->ready_to_run; 2122 } 2123 2124 if (r->last_time > tprev) 2125 pr_debug("time travel: last sched out time for task > previous sched_switch event\n"); 2126 else if (r->last_time) { 2127 u64 dt_wait = tprev - r->last_time; 2128 2129 if (r->last_state == 'R') 2130 r->dt_preempt = dt_wait; 2131 else if (r->last_state == 'D') 2132 r->dt_iowait = dt_wait; 2133 else 2134 r->dt_sleep = dt_wait; 2135 } 2136 } 2137 2138 update_stats(&r->run_stats, r->dt_run); 2139 2140 r->total_run_time += r->dt_run; 2141 r->total_delay_time += r->dt_delay; 2142 r->total_sleep_time += r->dt_sleep; 2143 r->total_iowait_time += r->dt_iowait; 2144 r->total_preempt_time += r->dt_preempt; 2145 } 2146 2147 static bool is_idle_sample(struct perf_sample *sample, 2148 struct evsel *evsel) 2149 { 2150 /* pid 0 == swapper == idle task */ 2151 if (evsel__name_is(evsel, "sched:sched_switch")) 2152 return evsel__intval(evsel, sample, "prev_pid") == 0; 2153 2154 return sample->pid == 0; 2155 } 2156 2157 static void save_task_callchain(struct perf_sched *sched, 2158 struct perf_sample *sample, 2159 struct evsel *evsel, 2160 struct machine *machine) 2161 { 2162 struct callchain_cursor *cursor; 2163 struct thread *thread; 2164 2165 /* want main thread for process - has maps */ 2166 thread = machine__findnew_thread(machine, sample->pid, sample->pid); 2167 if (thread == NULL) { 2168 pr_debug("Failed to get thread for pid %d.\n", sample->pid); 2169 return; 2170 } 2171 2172 if (!sched->show_callchain || sample->callchain == NULL) 2173 return; 2174 2175 cursor = get_tls_callchain_cursor(); 2176 2177 if (thread__resolve_callchain(thread, cursor, evsel, sample, 2178 NULL, NULL, sched->max_stack + 2) != 0) { 2179 if (verbose > 0) 2180 pr_err("Failed to resolve callchain. Skipping\n"); 2181 2182 return; 2183 } 2184 2185 callchain_cursor_commit(cursor); 2186 2187 while (true) { 2188 struct callchain_cursor_node *node; 2189 struct symbol *sym; 2190 2191 node = callchain_cursor_current(cursor); 2192 if (node == NULL) 2193 break; 2194 2195 sym = node->ms.sym; 2196 if (sym) { 2197 if (!strcmp(sym->name, "schedule") || 2198 !strcmp(sym->name, "__schedule") || 2199 !strcmp(sym->name, "preempt_schedule")) 2200 sym->ignore = 1; 2201 } 2202 2203 callchain_cursor_advance(cursor); 2204 } 2205 } 2206 2207 static int init_idle_thread(struct thread *thread) 2208 { 2209 struct idle_thread_runtime *itr; 2210 2211 thread__set_comm(thread, idle_comm, 0); 2212 2213 itr = zalloc(sizeof(*itr)); 2214 if (itr == NULL) 2215 return -ENOMEM; 2216 2217 init_stats(&itr->tr.run_stats); 2218 callchain_init(&itr->callchain); 2219 callchain_cursor_reset(&itr->cursor); 2220 thread__set_priv(thread, itr); 2221 2222 return 0; 2223 } 2224 2225 /* 2226 * Track idle stats per cpu by maintaining a local thread 2227 * struct for the idle task on each cpu. 2228 */ 2229 static int init_idle_threads(int ncpu) 2230 { 2231 int i, ret; 2232 2233 idle_threads = zalloc(ncpu * sizeof(struct thread *)); 2234 if (!idle_threads) 2235 return -ENOMEM; 2236 2237 idle_max_cpu = ncpu; 2238 2239 /* allocate the actual thread struct if needed */ 2240 for (i = 0; i < ncpu; ++i) { 2241 idle_threads[i] = thread__new(0, 0); 2242 if (idle_threads[i] == NULL) 2243 return -ENOMEM; 2244 2245 ret = init_idle_thread(idle_threads[i]); 2246 if (ret < 0) 2247 return ret; 2248 } 2249 2250 return 0; 2251 } 2252 2253 static void free_idle_threads(void) 2254 { 2255 int i; 2256 2257 if (idle_threads == NULL) 2258 return; 2259 2260 for (i = 0; i < idle_max_cpu; ++i) { 2261 if ((idle_threads[i])) 2262 thread__delete(idle_threads[i]); 2263 } 2264 2265 free(idle_threads); 2266 } 2267 2268 static struct thread *get_idle_thread(int cpu) 2269 { 2270 /* 2271 * expand/allocate array of pointers to local thread 2272 * structs if needed 2273 */ 2274 if ((cpu >= idle_max_cpu) || (idle_threads == NULL)) { 2275 int i, j = __roundup_pow_of_two(cpu+1); 2276 void *p; 2277 2278 p = realloc(idle_threads, j * sizeof(struct thread *)); 2279 if (!p) 2280 return NULL; 2281 2282 idle_threads = (struct thread **) p; 2283 for (i = idle_max_cpu; i < j; ++i) 2284 idle_threads[i] = NULL; 2285 2286 idle_max_cpu = j; 2287 } 2288 2289 /* allocate a new thread struct if needed */ 2290 if (idle_threads[cpu] == NULL) { 2291 idle_threads[cpu] = thread__new(0, 0); 2292 2293 if (idle_threads[cpu]) { 2294 if (init_idle_thread(idle_threads[cpu]) < 0) 2295 return NULL; 2296 } 2297 } 2298 2299 return idle_threads[cpu]; 2300 } 2301 2302 static void save_idle_callchain(struct perf_sched *sched, 2303 struct idle_thread_runtime *itr, 2304 struct perf_sample *sample) 2305 { 2306 struct callchain_cursor *cursor; 2307 2308 if (!sched->show_callchain || sample->callchain == NULL) 2309 return; 2310 2311 cursor = get_tls_callchain_cursor(); 2312 if (cursor == NULL) 2313 return; 2314 2315 callchain_cursor__copy(&itr->cursor, cursor); 2316 } 2317 2318 static struct thread *timehist_get_thread(struct perf_sched *sched, 2319 struct perf_sample *sample, 2320 struct machine *machine, 2321 struct evsel *evsel) 2322 { 2323 struct thread *thread; 2324 2325 if (is_idle_sample(sample, evsel)) { 2326 thread = get_idle_thread(sample->cpu); 2327 if (thread == NULL) 2328 pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu); 2329 2330 } else { 2331 /* there were samples with tid 0 but non-zero pid */ 2332 thread = machine__findnew_thread(machine, sample->pid, 2333 sample->tid ?: sample->pid); 2334 if (thread == NULL) { 2335 pr_debug("Failed to get thread for tid %d. skipping sample.\n", 2336 sample->tid); 2337 } 2338 2339 save_task_callchain(sched, sample, evsel, machine); 2340 if (sched->idle_hist) { 2341 struct thread *idle; 2342 struct idle_thread_runtime *itr; 2343 2344 idle = get_idle_thread(sample->cpu); 2345 if (idle == NULL) { 2346 pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu); 2347 return NULL; 2348 } 2349 2350 itr = thread__priv(idle); 2351 if (itr == NULL) 2352 return NULL; 2353 2354 itr->last_thread = thread; 2355 2356 /* copy task callchain when entering to idle */ 2357 if (evsel__intval(evsel, sample, "next_pid") == 0) 2358 save_idle_callchain(sched, itr, sample); 2359 } 2360 } 2361 2362 return thread; 2363 } 2364 2365 static bool timehist_skip_sample(struct perf_sched *sched, 2366 struct thread *thread, 2367 struct evsel *evsel, 2368 struct perf_sample *sample) 2369 { 2370 bool rc = false; 2371 2372 if (thread__is_filtered(thread)) { 2373 rc = true; 2374 sched->skipped_samples++; 2375 } 2376 2377 if (sched->idle_hist) { 2378 if (!evsel__name_is(evsel, "sched:sched_switch")) 2379 rc = true; 2380 else if (evsel__intval(evsel, sample, "prev_pid") != 0 && 2381 evsel__intval(evsel, sample, "next_pid") != 0) 2382 rc = true; 2383 } 2384 2385 return rc; 2386 } 2387 2388 static void timehist_print_wakeup_event(struct perf_sched *sched, 2389 struct evsel *evsel, 2390 struct perf_sample *sample, 2391 struct machine *machine, 2392 struct thread *awakened) 2393 { 2394 struct thread *thread; 2395 char tstr[64]; 2396 2397 thread = machine__findnew_thread(machine, sample->pid, sample->tid); 2398 if (thread == NULL) 2399 return; 2400 2401 /* show wakeup unless both awakee and awaker are filtered */ 2402 if (timehist_skip_sample(sched, thread, evsel, sample) && 2403 timehist_skip_sample(sched, awakened, evsel, sample)) { 2404 return; 2405 } 2406 2407 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); 2408 printf("%15s [%04d] ", tstr, sample->cpu); 2409 if (sched->show_cpu_visual) 2410 printf(" %*s ", sched->max_cpu.cpu + 1, ""); 2411 2412 printf(" %-*s ", comm_width, timehist_get_commstr(thread)); 2413 2414 /* dt spacer */ 2415 printf(" %9s %9s %9s ", "", "", ""); 2416 2417 printf("awakened: %s", timehist_get_commstr(awakened)); 2418 2419 printf("\n"); 2420 } 2421 2422 static int timehist_sched_wakeup_ignore(struct perf_tool *tool __maybe_unused, 2423 union perf_event *event __maybe_unused, 2424 struct evsel *evsel __maybe_unused, 2425 struct perf_sample *sample __maybe_unused, 2426 struct machine *machine __maybe_unused) 2427 { 2428 return 0; 2429 } 2430 2431 static int timehist_sched_wakeup_event(struct perf_tool *tool, 2432 union perf_event *event __maybe_unused, 2433 struct evsel *evsel, 2434 struct perf_sample *sample, 2435 struct machine *machine) 2436 { 2437 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 2438 struct thread *thread; 2439 struct thread_runtime *tr = NULL; 2440 /* want pid of awakened task not pid in sample */ 2441 const u32 pid = evsel__intval(evsel, sample, "pid"); 2442 2443 thread = machine__findnew_thread(machine, 0, pid); 2444 if (thread == NULL) 2445 return -1; 2446 2447 tr = thread__get_runtime(thread); 2448 if (tr == NULL) 2449 return -1; 2450 2451 if (tr->ready_to_run == 0) 2452 tr->ready_to_run = sample->time; 2453 2454 /* show wakeups if requested */ 2455 if (sched->show_wakeups && 2456 !perf_time__skip_sample(&sched->ptime, sample->time)) 2457 timehist_print_wakeup_event(sched, evsel, sample, machine, thread); 2458 2459 return 0; 2460 } 2461 2462 static void timehist_print_migration_event(struct perf_sched *sched, 2463 struct evsel *evsel, 2464 struct perf_sample *sample, 2465 struct machine *machine, 2466 struct thread *migrated) 2467 { 2468 struct thread *thread; 2469 char tstr[64]; 2470 u32 max_cpus; 2471 u32 ocpu, dcpu; 2472 2473 if (sched->summary_only) 2474 return; 2475 2476 max_cpus = sched->max_cpu.cpu + 1; 2477 ocpu = evsel__intval(evsel, sample, "orig_cpu"); 2478 dcpu = evsel__intval(evsel, sample, "dest_cpu"); 2479 2480 thread = machine__findnew_thread(machine, sample->pid, sample->tid); 2481 if (thread == NULL) 2482 return; 2483 2484 if (timehist_skip_sample(sched, thread, evsel, sample) && 2485 timehist_skip_sample(sched, migrated, evsel, sample)) { 2486 return; 2487 } 2488 2489 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); 2490 printf("%15s [%04d] ", tstr, sample->cpu); 2491 2492 if (sched->show_cpu_visual) { 2493 u32 i; 2494 char c; 2495 2496 printf(" "); 2497 for (i = 0; i < max_cpus; ++i) { 2498 c = (i == sample->cpu) ? 'm' : ' '; 2499 printf("%c", c); 2500 } 2501 printf(" "); 2502 } 2503 2504 printf(" %-*s ", comm_width, timehist_get_commstr(thread)); 2505 2506 /* dt spacer */ 2507 printf(" %9s %9s %9s ", "", "", ""); 2508 2509 printf("migrated: %s", timehist_get_commstr(migrated)); 2510 printf(" cpu %d => %d", ocpu, dcpu); 2511 2512 printf("\n"); 2513 } 2514 2515 static int timehist_migrate_task_event(struct perf_tool *tool, 2516 union perf_event *event __maybe_unused, 2517 struct evsel *evsel, 2518 struct perf_sample *sample, 2519 struct machine *machine) 2520 { 2521 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 2522 struct thread *thread; 2523 struct thread_runtime *tr = NULL; 2524 /* want pid of migrated task not pid in sample */ 2525 const u32 pid = evsel__intval(evsel, sample, "pid"); 2526 2527 thread = machine__findnew_thread(machine, 0, pid); 2528 if (thread == NULL) 2529 return -1; 2530 2531 tr = thread__get_runtime(thread); 2532 if (tr == NULL) 2533 return -1; 2534 2535 tr->migrations++; 2536 2537 /* show migrations if requested */ 2538 timehist_print_migration_event(sched, evsel, sample, machine, thread); 2539 2540 return 0; 2541 } 2542 2543 static int timehist_sched_change_event(struct perf_tool *tool, 2544 union perf_event *event, 2545 struct evsel *evsel, 2546 struct perf_sample *sample, 2547 struct machine *machine) 2548 { 2549 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 2550 struct perf_time_interval *ptime = &sched->ptime; 2551 struct addr_location al; 2552 struct thread *thread; 2553 struct thread_runtime *tr = NULL; 2554 u64 tprev, t = sample->time; 2555 int rc = 0; 2556 const char state = evsel__taskstate(evsel, sample, "prev_state"); 2557 2558 addr_location__init(&al); 2559 if (machine__resolve(machine, &al, sample) < 0) { 2560 pr_err("problem processing %d event. skipping it\n", 2561 event->header.type); 2562 rc = -1; 2563 goto out; 2564 } 2565 2566 thread = timehist_get_thread(sched, sample, machine, evsel); 2567 if (thread == NULL) { 2568 rc = -1; 2569 goto out; 2570 } 2571 2572 if (timehist_skip_sample(sched, thread, evsel, sample)) 2573 goto out; 2574 2575 tr = thread__get_runtime(thread); 2576 if (tr == NULL) { 2577 rc = -1; 2578 goto out; 2579 } 2580 2581 tprev = evsel__get_time(evsel, sample->cpu); 2582 2583 /* 2584 * If start time given: 2585 * - sample time is under window user cares about - skip sample 2586 * - tprev is under window user cares about - reset to start of window 2587 */ 2588 if (ptime->start && ptime->start > t) 2589 goto out; 2590 2591 if (tprev && ptime->start > tprev) 2592 tprev = ptime->start; 2593 2594 /* 2595 * If end time given: 2596 * - previous sched event is out of window - we are done 2597 * - sample time is beyond window user cares about - reset it 2598 * to close out stats for time window interest 2599 */ 2600 if (ptime->end) { 2601 if (tprev > ptime->end) 2602 goto out; 2603 2604 if (t > ptime->end) 2605 t = ptime->end; 2606 } 2607 2608 if (!sched->idle_hist || thread__tid(thread) == 0) { 2609 if (!cpu_list || test_bit(sample->cpu, cpu_bitmap)) 2610 timehist_update_runtime_stats(tr, t, tprev); 2611 2612 if (sched->idle_hist) { 2613 struct idle_thread_runtime *itr = (void *)tr; 2614 struct thread_runtime *last_tr; 2615 2616 BUG_ON(thread__tid(thread) != 0); 2617 2618 if (itr->last_thread == NULL) 2619 goto out; 2620 2621 /* add current idle time as last thread's runtime */ 2622 last_tr = thread__get_runtime(itr->last_thread); 2623 if (last_tr == NULL) 2624 goto out; 2625 2626 timehist_update_runtime_stats(last_tr, t, tprev); 2627 /* 2628 * remove delta time of last thread as it's not updated 2629 * and otherwise it will show an invalid value next 2630 * time. we only care total run time and run stat. 2631 */ 2632 last_tr->dt_run = 0; 2633 last_tr->dt_delay = 0; 2634 last_tr->dt_sleep = 0; 2635 last_tr->dt_iowait = 0; 2636 last_tr->dt_preempt = 0; 2637 2638 if (itr->cursor.nr) 2639 callchain_append(&itr->callchain, &itr->cursor, t - tprev); 2640 2641 itr->last_thread = NULL; 2642 } 2643 } 2644 2645 if (!sched->summary_only) 2646 timehist_print_sample(sched, evsel, sample, &al, thread, t, state); 2647 2648 out: 2649 if (sched->hist_time.start == 0 && t >= ptime->start) 2650 sched->hist_time.start = t; 2651 if (ptime->end == 0 || t <= ptime->end) 2652 sched->hist_time.end = t; 2653 2654 if (tr) { 2655 /* time of this sched_switch event becomes last time task seen */ 2656 tr->last_time = sample->time; 2657 2658 /* last state is used to determine where to account wait time */ 2659 tr->last_state = state; 2660 2661 /* sched out event for task so reset ready to run time */ 2662 tr->ready_to_run = 0; 2663 } 2664 2665 evsel__save_time(evsel, sample->time, sample->cpu); 2666 2667 addr_location__exit(&al); 2668 return rc; 2669 } 2670 2671 static int timehist_sched_switch_event(struct perf_tool *tool, 2672 union perf_event *event, 2673 struct evsel *evsel, 2674 struct perf_sample *sample, 2675 struct machine *machine __maybe_unused) 2676 { 2677 return timehist_sched_change_event(tool, event, evsel, sample, machine); 2678 } 2679 2680 static int process_lost(struct perf_tool *tool __maybe_unused, 2681 union perf_event *event, 2682 struct perf_sample *sample, 2683 struct machine *machine __maybe_unused) 2684 { 2685 char tstr[64]; 2686 2687 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); 2688 printf("%15s ", tstr); 2689 printf("lost %" PRI_lu64 " events on cpu %d\n", event->lost.lost, sample->cpu); 2690 2691 return 0; 2692 } 2693 2694 2695 static void print_thread_runtime(struct thread *t, 2696 struct thread_runtime *r) 2697 { 2698 double mean = avg_stats(&r->run_stats); 2699 float stddev; 2700 2701 printf("%*s %5d %9" PRIu64 " ", 2702 comm_width, timehist_get_commstr(t), thread__ppid(t), 2703 (u64) r->run_stats.n); 2704 2705 print_sched_time(r->total_run_time, 8); 2706 stddev = rel_stddev_stats(stddev_stats(&r->run_stats), mean); 2707 print_sched_time(r->run_stats.min, 6); 2708 printf(" "); 2709 print_sched_time((u64) mean, 6); 2710 printf(" "); 2711 print_sched_time(r->run_stats.max, 6); 2712 printf(" "); 2713 printf("%5.2f", stddev); 2714 printf(" %5" PRIu64, r->migrations); 2715 printf("\n"); 2716 } 2717 2718 static void print_thread_waittime(struct thread *t, 2719 struct thread_runtime *r) 2720 { 2721 printf("%*s %5d %9" PRIu64 " ", 2722 comm_width, timehist_get_commstr(t), thread__ppid(t), 2723 (u64) r->run_stats.n); 2724 2725 print_sched_time(r->total_run_time, 8); 2726 print_sched_time(r->total_sleep_time, 6); 2727 printf(" "); 2728 print_sched_time(r->total_iowait_time, 6); 2729 printf(" "); 2730 print_sched_time(r->total_preempt_time, 6); 2731 printf(" "); 2732 print_sched_time(r->total_delay_time, 6); 2733 printf("\n"); 2734 } 2735 2736 struct total_run_stats { 2737 struct perf_sched *sched; 2738 u64 sched_count; 2739 u64 task_count; 2740 u64 total_run_time; 2741 }; 2742 2743 static int show_thread_runtime(struct thread *t, void *priv) 2744 { 2745 struct total_run_stats *stats = priv; 2746 struct thread_runtime *r; 2747 2748 if (thread__is_filtered(t)) 2749 return 0; 2750 2751 r = thread__priv(t); 2752 if (r && r->run_stats.n) { 2753 stats->task_count++; 2754 stats->sched_count += r->run_stats.n; 2755 stats->total_run_time += r->total_run_time; 2756 2757 if (stats->sched->show_state) 2758 print_thread_waittime(t, r); 2759 else 2760 print_thread_runtime(t, r); 2761 } 2762 2763 return 0; 2764 } 2765 2766 static size_t callchain__fprintf_folded(FILE *fp, struct callchain_node *node) 2767 { 2768 const char *sep = " <- "; 2769 struct callchain_list *chain; 2770 size_t ret = 0; 2771 char bf[1024]; 2772 bool first; 2773 2774 if (node == NULL) 2775 return 0; 2776 2777 ret = callchain__fprintf_folded(fp, node->parent); 2778 first = (ret == 0); 2779 2780 list_for_each_entry(chain, &node->val, list) { 2781 if (chain->ip >= PERF_CONTEXT_MAX) 2782 continue; 2783 if (chain->ms.sym && chain->ms.sym->ignore) 2784 continue; 2785 ret += fprintf(fp, "%s%s", first ? "" : sep, 2786 callchain_list__sym_name(chain, bf, sizeof(bf), 2787 false)); 2788 first = false; 2789 } 2790 2791 return ret; 2792 } 2793 2794 static size_t timehist_print_idlehist_callchain(struct rb_root_cached *root) 2795 { 2796 size_t ret = 0; 2797 FILE *fp = stdout; 2798 struct callchain_node *chain; 2799 struct rb_node *rb_node = rb_first_cached(root); 2800 2801 printf(" %16s %8s %s\n", "Idle time (msec)", "Count", "Callchains"); 2802 printf(" %.16s %.8s %.50s\n", graph_dotted_line, graph_dotted_line, 2803 graph_dotted_line); 2804 2805 while (rb_node) { 2806 chain = rb_entry(rb_node, struct callchain_node, rb_node); 2807 rb_node = rb_next(rb_node); 2808 2809 ret += fprintf(fp, " "); 2810 print_sched_time(chain->hit, 12); 2811 ret += 16; /* print_sched_time returns 2nd arg + 4 */ 2812 ret += fprintf(fp, " %8d ", chain->count); 2813 ret += callchain__fprintf_folded(fp, chain); 2814 ret += fprintf(fp, "\n"); 2815 } 2816 2817 return ret; 2818 } 2819 2820 static void timehist_print_summary(struct perf_sched *sched, 2821 struct perf_session *session) 2822 { 2823 struct machine *m = &session->machines.host; 2824 struct total_run_stats totals; 2825 u64 task_count; 2826 struct thread *t; 2827 struct thread_runtime *r; 2828 int i; 2829 u64 hist_time = sched->hist_time.end - sched->hist_time.start; 2830 2831 memset(&totals, 0, sizeof(totals)); 2832 totals.sched = sched; 2833 2834 if (sched->idle_hist) { 2835 printf("\nIdle-time summary\n"); 2836 printf("%*s parent sched-out ", comm_width, "comm"); 2837 printf(" idle-time min-idle avg-idle max-idle stddev migrations\n"); 2838 } else if (sched->show_state) { 2839 printf("\nWait-time summary\n"); 2840 printf("%*s parent sched-in ", comm_width, "comm"); 2841 printf(" run-time sleep iowait preempt delay\n"); 2842 } else { 2843 printf("\nRuntime summary\n"); 2844 printf("%*s parent sched-in ", comm_width, "comm"); 2845 printf(" run-time min-run avg-run max-run stddev migrations\n"); 2846 } 2847 printf("%*s (count) ", comm_width, ""); 2848 printf(" (msec) (msec) (msec) (msec) %s\n", 2849 sched->show_state ? "(msec)" : "%"); 2850 printf("%.117s\n", graph_dotted_line); 2851 2852 machine__for_each_thread(m, show_thread_runtime, &totals); 2853 task_count = totals.task_count; 2854 if (!task_count) 2855 printf("<no still running tasks>\n"); 2856 2857 /* CPU idle stats not tracked when samples were skipped */ 2858 if (sched->skipped_samples && !sched->idle_hist) 2859 return; 2860 2861 printf("\nIdle stats:\n"); 2862 for (i = 0; i < idle_max_cpu; ++i) { 2863 if (cpu_list && !test_bit(i, cpu_bitmap)) 2864 continue; 2865 2866 t = idle_threads[i]; 2867 if (!t) 2868 continue; 2869 2870 r = thread__priv(t); 2871 if (r && r->run_stats.n) { 2872 totals.sched_count += r->run_stats.n; 2873 printf(" CPU %2d idle for ", i); 2874 print_sched_time(r->total_run_time, 6); 2875 printf(" msec (%6.2f%%)\n", 100.0 * r->total_run_time / hist_time); 2876 } else 2877 printf(" CPU %2d idle entire time window\n", i); 2878 } 2879 2880 if (sched->idle_hist && sched->show_callchain) { 2881 callchain_param.mode = CHAIN_FOLDED; 2882 callchain_param.value = CCVAL_PERIOD; 2883 2884 callchain_register_param(&callchain_param); 2885 2886 printf("\nIdle stats by callchain:\n"); 2887 for (i = 0; i < idle_max_cpu; ++i) { 2888 struct idle_thread_runtime *itr; 2889 2890 t = idle_threads[i]; 2891 if (!t) 2892 continue; 2893 2894 itr = thread__priv(t); 2895 if (itr == NULL) 2896 continue; 2897 2898 callchain_param.sort(&itr->sorted_root.rb_root, &itr->callchain, 2899 0, &callchain_param); 2900 2901 printf(" CPU %2d:", i); 2902 print_sched_time(itr->tr.total_run_time, 6); 2903 printf(" msec\n"); 2904 timehist_print_idlehist_callchain(&itr->sorted_root); 2905 printf("\n"); 2906 } 2907 } 2908 2909 printf("\n" 2910 " Total number of unique tasks: %" PRIu64 "\n" 2911 "Total number of context switches: %" PRIu64 "\n", 2912 totals.task_count, totals.sched_count); 2913 2914 printf(" Total run time (msec): "); 2915 print_sched_time(totals.total_run_time, 2); 2916 printf("\n"); 2917 2918 printf(" Total scheduling time (msec): "); 2919 print_sched_time(hist_time, 2); 2920 printf(" (x %d)\n", sched->max_cpu.cpu); 2921 } 2922 2923 typedef int (*sched_handler)(struct perf_tool *tool, 2924 union perf_event *event, 2925 struct evsel *evsel, 2926 struct perf_sample *sample, 2927 struct machine *machine); 2928 2929 static int perf_timehist__process_sample(struct perf_tool *tool, 2930 union perf_event *event, 2931 struct perf_sample *sample, 2932 struct evsel *evsel, 2933 struct machine *machine) 2934 { 2935 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 2936 int err = 0; 2937 struct perf_cpu this_cpu = { 2938 .cpu = sample->cpu, 2939 }; 2940 2941 if (this_cpu.cpu > sched->max_cpu.cpu) 2942 sched->max_cpu = this_cpu; 2943 2944 if (evsel->handler != NULL) { 2945 sched_handler f = evsel->handler; 2946 2947 err = f(tool, event, evsel, sample, machine); 2948 } 2949 2950 return err; 2951 } 2952 2953 static int timehist_check_attr(struct perf_sched *sched, 2954 struct evlist *evlist) 2955 { 2956 struct evsel *evsel; 2957 struct evsel_runtime *er; 2958 2959 list_for_each_entry(evsel, &evlist->core.entries, core.node) { 2960 er = evsel__get_runtime(evsel); 2961 if (er == NULL) { 2962 pr_err("Failed to allocate memory for evsel runtime data\n"); 2963 return -1; 2964 } 2965 2966 /* only need to save callchain related to sched_switch event */ 2967 if (sched->show_callchain && 2968 evsel__name_is(evsel, "sched:sched_switch") && 2969 !evsel__has_callchain(evsel)) { 2970 pr_info("Samples of sched_switch event do not have callchains.\n"); 2971 sched->show_callchain = 0; 2972 symbol_conf.use_callchain = 0; 2973 } 2974 } 2975 2976 return 0; 2977 } 2978 2979 static int perf_sched__timehist(struct perf_sched *sched) 2980 { 2981 struct evsel_str_handler handlers[] = { 2982 { "sched:sched_switch", timehist_sched_switch_event, }, 2983 { "sched:sched_wakeup", timehist_sched_wakeup_event, }, 2984 { "sched:sched_waking", timehist_sched_wakeup_event, }, 2985 { "sched:sched_wakeup_new", timehist_sched_wakeup_event, }, 2986 }; 2987 const struct evsel_str_handler migrate_handlers[] = { 2988 { "sched:sched_migrate_task", timehist_migrate_task_event, }, 2989 }; 2990 struct perf_data data = { 2991 .path = input_name, 2992 .mode = PERF_DATA_MODE_READ, 2993 .force = sched->force, 2994 }; 2995 2996 struct perf_session *session; 2997 struct evlist *evlist; 2998 int err = -1; 2999 3000 /* 3001 * event handlers for timehist option 3002 */ 3003 sched->tool.sample = perf_timehist__process_sample; 3004 sched->tool.mmap = perf_event__process_mmap; 3005 sched->tool.comm = perf_event__process_comm; 3006 sched->tool.exit = perf_event__process_exit; 3007 sched->tool.fork = perf_event__process_fork; 3008 sched->tool.lost = process_lost; 3009 sched->tool.attr = perf_event__process_attr; 3010 sched->tool.tracing_data = perf_event__process_tracing_data; 3011 sched->tool.build_id = perf_event__process_build_id; 3012 3013 sched->tool.ordered_events = true; 3014 sched->tool.ordering_requires_timestamps = true; 3015 3016 symbol_conf.use_callchain = sched->show_callchain; 3017 3018 session = perf_session__new(&data, &sched->tool); 3019 if (IS_ERR(session)) 3020 return PTR_ERR(session); 3021 3022 if (cpu_list) { 3023 err = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap); 3024 if (err < 0) 3025 goto out; 3026 } 3027 3028 evlist = session->evlist; 3029 3030 symbol__init(&session->header.env); 3031 3032 if (perf_time__parse_str(&sched->ptime, sched->time_str) != 0) { 3033 pr_err("Invalid time string\n"); 3034 return -EINVAL; 3035 } 3036 3037 if (timehist_check_attr(sched, evlist) != 0) 3038 goto out; 3039 3040 setup_pager(); 3041 3042 /* prefer sched_waking if it is captured */ 3043 if (evlist__find_tracepoint_by_name(session->evlist, "sched:sched_waking")) 3044 handlers[1].handler = timehist_sched_wakeup_ignore; 3045 3046 /* setup per-evsel handlers */ 3047 if (perf_session__set_tracepoints_handlers(session, handlers)) 3048 goto out; 3049 3050 /* sched_switch event at a minimum needs to exist */ 3051 if (!evlist__find_tracepoint_by_name(session->evlist, "sched:sched_switch")) { 3052 pr_err("No sched_switch events found. Have you run 'perf sched record'?\n"); 3053 goto out; 3054 } 3055 3056 if (sched->show_migrations && 3057 perf_session__set_tracepoints_handlers(session, migrate_handlers)) 3058 goto out; 3059 3060 /* pre-allocate struct for per-CPU idle stats */ 3061 sched->max_cpu.cpu = session->header.env.nr_cpus_online; 3062 if (sched->max_cpu.cpu == 0) 3063 sched->max_cpu.cpu = 4; 3064 if (init_idle_threads(sched->max_cpu.cpu)) 3065 goto out; 3066 3067 /* summary_only implies summary option, but don't overwrite summary if set */ 3068 if (sched->summary_only) 3069 sched->summary = sched->summary_only; 3070 3071 if (!sched->summary_only) 3072 timehist_header(sched); 3073 3074 err = perf_session__process_events(session); 3075 if (err) { 3076 pr_err("Failed to process events, error %d", err); 3077 goto out; 3078 } 3079 3080 sched->nr_events = evlist->stats.nr_events[0]; 3081 sched->nr_lost_events = evlist->stats.total_lost; 3082 sched->nr_lost_chunks = evlist->stats.nr_events[PERF_RECORD_LOST]; 3083 3084 if (sched->summary) 3085 timehist_print_summary(sched, session); 3086 3087 out: 3088 free_idle_threads(); 3089 perf_session__delete(session); 3090 3091 return err; 3092 } 3093 3094 3095 static void print_bad_events(struct perf_sched *sched) 3096 { 3097 if (sched->nr_unordered_timestamps && sched->nr_timestamps) { 3098 printf(" INFO: %.3f%% unordered timestamps (%ld out of %ld)\n", 3099 (double)sched->nr_unordered_timestamps/(double)sched->nr_timestamps*100.0, 3100 sched->nr_unordered_timestamps, sched->nr_timestamps); 3101 } 3102 if (sched->nr_lost_events && sched->nr_events) { 3103 printf(" INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n", 3104 (double)sched->nr_lost_events/(double)sched->nr_events * 100.0, 3105 sched->nr_lost_events, sched->nr_events, sched->nr_lost_chunks); 3106 } 3107 if (sched->nr_context_switch_bugs && sched->nr_timestamps) { 3108 printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)", 3109 (double)sched->nr_context_switch_bugs/(double)sched->nr_timestamps*100.0, 3110 sched->nr_context_switch_bugs, sched->nr_timestamps); 3111 if (sched->nr_lost_events) 3112 printf(" (due to lost events?)"); 3113 printf("\n"); 3114 } 3115 } 3116 3117 static void __merge_work_atoms(struct rb_root_cached *root, struct work_atoms *data) 3118 { 3119 struct rb_node **new = &(root->rb_root.rb_node), *parent = NULL; 3120 struct work_atoms *this; 3121 const char *comm = thread__comm_str(data->thread), *this_comm; 3122 bool leftmost = true; 3123 3124 while (*new) { 3125 int cmp; 3126 3127 this = container_of(*new, struct work_atoms, node); 3128 parent = *new; 3129 3130 this_comm = thread__comm_str(this->thread); 3131 cmp = strcmp(comm, this_comm); 3132 if (cmp > 0) { 3133 new = &((*new)->rb_left); 3134 } else if (cmp < 0) { 3135 new = &((*new)->rb_right); 3136 leftmost = false; 3137 } else { 3138 this->num_merged++; 3139 this->total_runtime += data->total_runtime; 3140 this->nb_atoms += data->nb_atoms; 3141 this->total_lat += data->total_lat; 3142 list_splice(&data->work_list, &this->work_list); 3143 if (this->max_lat < data->max_lat) { 3144 this->max_lat = data->max_lat; 3145 this->max_lat_start = data->max_lat_start; 3146 this->max_lat_end = data->max_lat_end; 3147 } 3148 zfree(&data); 3149 return; 3150 } 3151 } 3152 3153 data->num_merged++; 3154 rb_link_node(&data->node, parent, new); 3155 rb_insert_color_cached(&data->node, root, leftmost); 3156 } 3157 3158 static void perf_sched__merge_lat(struct perf_sched *sched) 3159 { 3160 struct work_atoms *data; 3161 struct rb_node *node; 3162 3163 if (sched->skip_merge) 3164 return; 3165 3166 while ((node = rb_first_cached(&sched->atom_root))) { 3167 rb_erase_cached(node, &sched->atom_root); 3168 data = rb_entry(node, struct work_atoms, node); 3169 __merge_work_atoms(&sched->merged_atom_root, data); 3170 } 3171 } 3172 3173 static int setup_cpus_switch_event(struct perf_sched *sched) 3174 { 3175 unsigned int i; 3176 3177 sched->cpu_last_switched = calloc(MAX_CPUS, sizeof(*(sched->cpu_last_switched))); 3178 if (!sched->cpu_last_switched) 3179 return -1; 3180 3181 sched->curr_pid = malloc(MAX_CPUS * sizeof(*(sched->curr_pid))); 3182 if (!sched->curr_pid) { 3183 zfree(&sched->cpu_last_switched); 3184 return -1; 3185 } 3186 3187 for (i = 0; i < MAX_CPUS; i++) 3188 sched->curr_pid[i] = -1; 3189 3190 return 0; 3191 } 3192 3193 static void free_cpus_switch_event(struct perf_sched *sched) 3194 { 3195 zfree(&sched->curr_pid); 3196 zfree(&sched->cpu_last_switched); 3197 } 3198 3199 static int perf_sched__lat(struct perf_sched *sched) 3200 { 3201 int rc = -1; 3202 struct rb_node *next; 3203 3204 setup_pager(); 3205 3206 if (setup_cpus_switch_event(sched)) 3207 return rc; 3208 3209 if (perf_sched__read_events(sched)) 3210 goto out_free_cpus_switch_event; 3211 3212 perf_sched__merge_lat(sched); 3213 perf_sched__sort_lat(sched); 3214 3215 printf("\n -------------------------------------------------------------------------------------------------------------------------------------------\n"); 3216 printf(" Task | Runtime ms | Count | Avg delay ms | Max delay ms | Max delay start | Max delay end |\n"); 3217 printf(" -------------------------------------------------------------------------------------------------------------------------------------------\n"); 3218 3219 next = rb_first_cached(&sched->sorted_atom_root); 3220 3221 while (next) { 3222 struct work_atoms *work_list; 3223 3224 work_list = rb_entry(next, struct work_atoms, node); 3225 output_lat_thread(sched, work_list); 3226 next = rb_next(next); 3227 thread__zput(work_list->thread); 3228 } 3229 3230 printf(" -----------------------------------------------------------------------------------------------------------------\n"); 3231 printf(" TOTAL: |%11.3f ms |%9" PRIu64 " |\n", 3232 (double)sched->all_runtime / NSEC_PER_MSEC, sched->all_count); 3233 3234 printf(" ---------------------------------------------------\n"); 3235 3236 print_bad_events(sched); 3237 printf("\n"); 3238 3239 rc = 0; 3240 3241 out_free_cpus_switch_event: 3242 free_cpus_switch_event(sched); 3243 return rc; 3244 } 3245 3246 static int setup_map_cpus(struct perf_sched *sched) 3247 { 3248 sched->max_cpu.cpu = sysconf(_SC_NPROCESSORS_CONF); 3249 3250 if (sched->map.comp) { 3251 sched->map.comp_cpus = zalloc(sched->max_cpu.cpu * sizeof(int)); 3252 if (!sched->map.comp_cpus) 3253 return -1; 3254 } 3255 3256 if (sched->map.cpus_str) { 3257 sched->map.cpus = perf_cpu_map__new(sched->map.cpus_str); 3258 if (!sched->map.cpus) { 3259 pr_err("failed to get cpus map from %s\n", sched->map.cpus_str); 3260 zfree(&sched->map.comp_cpus); 3261 return -1; 3262 } 3263 } 3264 3265 return 0; 3266 } 3267 3268 static int setup_color_pids(struct perf_sched *sched) 3269 { 3270 struct perf_thread_map *map; 3271 3272 if (!sched->map.color_pids_str) 3273 return 0; 3274 3275 map = thread_map__new_by_tid_str(sched->map.color_pids_str); 3276 if (!map) { 3277 pr_err("failed to get thread map from %s\n", sched->map.color_pids_str); 3278 return -1; 3279 } 3280 3281 sched->map.color_pids = map; 3282 return 0; 3283 } 3284 3285 static int setup_color_cpus(struct perf_sched *sched) 3286 { 3287 struct perf_cpu_map *map; 3288 3289 if (!sched->map.color_cpus_str) 3290 return 0; 3291 3292 map = perf_cpu_map__new(sched->map.color_cpus_str); 3293 if (!map) { 3294 pr_err("failed to get thread map from %s\n", sched->map.color_cpus_str); 3295 return -1; 3296 } 3297 3298 sched->map.color_cpus = map; 3299 return 0; 3300 } 3301 3302 static int perf_sched__map(struct perf_sched *sched) 3303 { 3304 int rc = -1; 3305 3306 sched->curr_thread = calloc(MAX_CPUS, sizeof(*(sched->curr_thread))); 3307 if (!sched->curr_thread) 3308 return rc; 3309 3310 if (setup_cpus_switch_event(sched)) 3311 goto out_free_curr_thread; 3312 3313 if (setup_map_cpus(sched)) 3314 goto out_free_cpus_switch_event; 3315 3316 if (setup_color_pids(sched)) 3317 goto out_put_map_cpus; 3318 3319 if (setup_color_cpus(sched)) 3320 goto out_put_color_pids; 3321 3322 setup_pager(); 3323 if (perf_sched__read_events(sched)) 3324 goto out_put_color_cpus; 3325 3326 rc = 0; 3327 print_bad_events(sched); 3328 3329 out_put_color_cpus: 3330 perf_cpu_map__put(sched->map.color_cpus); 3331 3332 out_put_color_pids: 3333 perf_thread_map__put(sched->map.color_pids); 3334 3335 out_put_map_cpus: 3336 zfree(&sched->map.comp_cpus); 3337 perf_cpu_map__put(sched->map.cpus); 3338 3339 out_free_cpus_switch_event: 3340 free_cpus_switch_event(sched); 3341 3342 out_free_curr_thread: 3343 zfree(&sched->curr_thread); 3344 return rc; 3345 } 3346 3347 static int perf_sched__replay(struct perf_sched *sched) 3348 { 3349 int ret; 3350 unsigned long i; 3351 3352 mutex_init(&sched->start_work_mutex); 3353 mutex_init(&sched->work_done_wait_mutex); 3354 3355 ret = setup_cpus_switch_event(sched); 3356 if (ret) 3357 goto out_mutex_destroy; 3358 3359 calibrate_run_measurement_overhead(sched); 3360 calibrate_sleep_measurement_overhead(sched); 3361 3362 test_calibrations(sched); 3363 3364 ret = perf_sched__read_events(sched); 3365 if (ret) 3366 goto out_free_cpus_switch_event; 3367 3368 printf("nr_run_events: %ld\n", sched->nr_run_events); 3369 printf("nr_sleep_events: %ld\n", sched->nr_sleep_events); 3370 printf("nr_wakeup_events: %ld\n", sched->nr_wakeup_events); 3371 3372 if (sched->targetless_wakeups) 3373 printf("target-less wakeups: %ld\n", sched->targetless_wakeups); 3374 if (sched->multitarget_wakeups) 3375 printf("multi-target wakeups: %ld\n", sched->multitarget_wakeups); 3376 if (sched->nr_run_events_optimized) 3377 printf("run atoms optimized: %ld\n", 3378 sched->nr_run_events_optimized); 3379 3380 print_task_traces(sched); 3381 add_cross_task_wakeups(sched); 3382 3383 sched->thread_funcs_exit = false; 3384 create_tasks(sched); 3385 printf("------------------------------------------------------------\n"); 3386 for (i = 0; i < sched->replay_repeat; i++) 3387 run_one_test(sched); 3388 3389 sched->thread_funcs_exit = true; 3390 destroy_tasks(sched); 3391 3392 out_free_cpus_switch_event: 3393 free_cpus_switch_event(sched); 3394 3395 out_mutex_destroy: 3396 mutex_destroy(&sched->start_work_mutex); 3397 mutex_destroy(&sched->work_done_wait_mutex); 3398 return ret; 3399 } 3400 3401 static void setup_sorting(struct perf_sched *sched, const struct option *options, 3402 const char * const usage_msg[]) 3403 { 3404 char *tmp, *tok, *str = strdup(sched->sort_order); 3405 3406 for (tok = strtok_r(str, ", ", &tmp); 3407 tok; tok = strtok_r(NULL, ", ", &tmp)) { 3408 if (sort_dimension__add(tok, &sched->sort_list) < 0) { 3409 usage_with_options_msg(usage_msg, options, 3410 "Unknown --sort key: `%s'", tok); 3411 } 3412 } 3413 3414 free(str); 3415 3416 sort_dimension__add("pid", &sched->cmp_pid); 3417 } 3418 3419 static bool schedstat_events_exposed(void) 3420 { 3421 /* 3422 * Select "sched:sched_stat_wait" event to check 3423 * whether schedstat tracepoints are exposed. 3424 */ 3425 return IS_ERR(trace_event__tp_format("sched", "sched_stat_wait")) ? 3426 false : true; 3427 } 3428 3429 static int __cmd_record(int argc, const char **argv) 3430 { 3431 unsigned int rec_argc, i, j; 3432 char **rec_argv; 3433 const char **rec_argv_copy; 3434 const char * const record_args[] = { 3435 "record", 3436 "-a", 3437 "-R", 3438 "-m", "1024", 3439 "-c", "1", 3440 "-e", "sched:sched_switch", 3441 "-e", "sched:sched_stat_runtime", 3442 "-e", "sched:sched_process_fork", 3443 "-e", "sched:sched_wakeup_new", 3444 "-e", "sched:sched_migrate_task", 3445 }; 3446 3447 /* 3448 * The tracepoints trace_sched_stat_{wait, sleep, iowait} 3449 * are not exposed to user if CONFIG_SCHEDSTATS is not set, 3450 * to prevent "perf sched record" execution failure, determine 3451 * whether to record schedstat events according to actual situation. 3452 */ 3453 const char * const schedstat_args[] = { 3454 "-e", "sched:sched_stat_wait", 3455 "-e", "sched:sched_stat_sleep", 3456 "-e", "sched:sched_stat_iowait", 3457 }; 3458 unsigned int schedstat_argc = schedstat_events_exposed() ? 3459 ARRAY_SIZE(schedstat_args) : 0; 3460 3461 struct tep_event *waking_event; 3462 int ret; 3463 3464 /* 3465 * +2 for either "-e", "sched:sched_wakeup" or 3466 * "-e", "sched:sched_waking" 3467 */ 3468 rec_argc = ARRAY_SIZE(record_args) + 2 + schedstat_argc + argc - 1; 3469 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 3470 if (rec_argv == NULL) 3471 return -ENOMEM; 3472 rec_argv_copy = calloc(rec_argc + 1, sizeof(char *)); 3473 if (rec_argv_copy == NULL) { 3474 free(rec_argv); 3475 return -ENOMEM; 3476 } 3477 3478 for (i = 0; i < ARRAY_SIZE(record_args); i++) 3479 rec_argv[i] = strdup(record_args[i]); 3480 3481 rec_argv[i++] = strdup("-e"); 3482 waking_event = trace_event__tp_format("sched", "sched_waking"); 3483 if (!IS_ERR(waking_event)) 3484 rec_argv[i++] = strdup("sched:sched_waking"); 3485 else 3486 rec_argv[i++] = strdup("sched:sched_wakeup"); 3487 3488 for (j = 0; j < schedstat_argc; j++) 3489 rec_argv[i++] = strdup(schedstat_args[j]); 3490 3491 for (j = 1; j < (unsigned int)argc; j++, i++) 3492 rec_argv[i] = strdup(argv[j]); 3493 3494 BUG_ON(i != rec_argc); 3495 3496 memcpy(rec_argv_copy, rec_argv, sizeof(char *) * rec_argc); 3497 ret = cmd_record(rec_argc, rec_argv_copy); 3498 3499 for (i = 0; i < rec_argc; i++) 3500 free(rec_argv[i]); 3501 free(rec_argv); 3502 free(rec_argv_copy); 3503 3504 return ret; 3505 } 3506 3507 int cmd_sched(int argc, const char **argv) 3508 { 3509 static const char default_sort_order[] = "avg, max, switch, runtime"; 3510 struct perf_sched sched = { 3511 .tool = { 3512 .sample = perf_sched__process_tracepoint_sample, 3513 .comm = perf_sched__process_comm, 3514 .namespaces = perf_event__process_namespaces, 3515 .lost = perf_event__process_lost, 3516 .fork = perf_sched__process_fork_event, 3517 .ordered_events = true, 3518 }, 3519 .cmp_pid = LIST_HEAD_INIT(sched.cmp_pid), 3520 .sort_list = LIST_HEAD_INIT(sched.sort_list), 3521 .sort_order = default_sort_order, 3522 .replay_repeat = 10, 3523 .profile_cpu = -1, 3524 .next_shortname1 = 'A', 3525 .next_shortname2 = '0', 3526 .skip_merge = 0, 3527 .show_callchain = 1, 3528 .max_stack = 5, 3529 }; 3530 const struct option sched_options[] = { 3531 OPT_STRING('i', "input", &input_name, "file", 3532 "input file name"), 3533 OPT_INCR('v', "verbose", &verbose, 3534 "be more verbose (show symbol address, etc)"), 3535 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 3536 "dump raw trace in ASCII"), 3537 OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"), 3538 OPT_END() 3539 }; 3540 const struct option latency_options[] = { 3541 OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]", 3542 "sort by key(s): runtime, switch, avg, max"), 3543 OPT_INTEGER('C', "CPU", &sched.profile_cpu, 3544 "CPU to profile on"), 3545 OPT_BOOLEAN('p', "pids", &sched.skip_merge, 3546 "latency stats per pid instead of per comm"), 3547 OPT_PARENT(sched_options) 3548 }; 3549 const struct option replay_options[] = { 3550 OPT_UINTEGER('r', "repeat", &sched.replay_repeat, 3551 "repeat the workload replay N times (-1: infinite)"), 3552 OPT_PARENT(sched_options) 3553 }; 3554 const struct option map_options[] = { 3555 OPT_BOOLEAN(0, "compact", &sched.map.comp, 3556 "map output in compact mode"), 3557 OPT_STRING(0, "color-pids", &sched.map.color_pids_str, "pids", 3558 "highlight given pids in map"), 3559 OPT_STRING(0, "color-cpus", &sched.map.color_cpus_str, "cpus", 3560 "highlight given CPUs in map"), 3561 OPT_STRING(0, "cpus", &sched.map.cpus_str, "cpus", 3562 "display given CPUs in map"), 3563 OPT_PARENT(sched_options) 3564 }; 3565 const struct option timehist_options[] = { 3566 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 3567 "file", "vmlinux pathname"), 3568 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, 3569 "file", "kallsyms pathname"), 3570 OPT_BOOLEAN('g', "call-graph", &sched.show_callchain, 3571 "Display call chains if present (default on)"), 3572 OPT_UINTEGER(0, "max-stack", &sched.max_stack, 3573 "Maximum number of functions to display backtrace."), 3574 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", 3575 "Look for files with symbols relative to this directory"), 3576 OPT_BOOLEAN('s', "summary", &sched.summary_only, 3577 "Show only syscall summary with statistics"), 3578 OPT_BOOLEAN('S', "with-summary", &sched.summary, 3579 "Show all syscalls and summary with statistics"), 3580 OPT_BOOLEAN('w', "wakeups", &sched.show_wakeups, "Show wakeup events"), 3581 OPT_BOOLEAN('n', "next", &sched.show_next, "Show next task"), 3582 OPT_BOOLEAN('M', "migrations", &sched.show_migrations, "Show migration events"), 3583 OPT_BOOLEAN('V', "cpu-visual", &sched.show_cpu_visual, "Add CPU visual"), 3584 OPT_BOOLEAN('I', "idle-hist", &sched.idle_hist, "Show idle events only"), 3585 OPT_STRING(0, "time", &sched.time_str, "str", 3586 "Time span for analysis (start,stop)"), 3587 OPT_BOOLEAN(0, "state", &sched.show_state, "Show task state when sched-out"), 3588 OPT_STRING('p', "pid", &symbol_conf.pid_list_str, "pid[,pid...]", 3589 "analyze events only for given process id(s)"), 3590 OPT_STRING('t', "tid", &symbol_conf.tid_list_str, "tid[,tid...]", 3591 "analyze events only for given thread id(s)"), 3592 OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"), 3593 OPT_PARENT(sched_options) 3594 }; 3595 3596 const char * const latency_usage[] = { 3597 "perf sched latency [<options>]", 3598 NULL 3599 }; 3600 const char * const replay_usage[] = { 3601 "perf sched replay [<options>]", 3602 NULL 3603 }; 3604 const char * const map_usage[] = { 3605 "perf sched map [<options>]", 3606 NULL 3607 }; 3608 const char * const timehist_usage[] = { 3609 "perf sched timehist [<options>]", 3610 NULL 3611 }; 3612 const char *const sched_subcommands[] = { "record", "latency", "map", 3613 "replay", "script", 3614 "timehist", NULL }; 3615 const char *sched_usage[] = { 3616 NULL, 3617 NULL 3618 }; 3619 struct trace_sched_handler lat_ops = { 3620 .wakeup_event = latency_wakeup_event, 3621 .switch_event = latency_switch_event, 3622 .runtime_event = latency_runtime_event, 3623 .migrate_task_event = latency_migrate_task_event, 3624 }; 3625 struct trace_sched_handler map_ops = { 3626 .switch_event = map_switch_event, 3627 }; 3628 struct trace_sched_handler replay_ops = { 3629 .wakeup_event = replay_wakeup_event, 3630 .switch_event = replay_switch_event, 3631 .fork_event = replay_fork_event, 3632 }; 3633 int ret; 3634 3635 argc = parse_options_subcommand(argc, argv, sched_options, sched_subcommands, 3636 sched_usage, PARSE_OPT_STOP_AT_NON_OPTION); 3637 if (!argc) 3638 usage_with_options(sched_usage, sched_options); 3639 3640 /* 3641 * Aliased to 'perf script' for now: 3642 */ 3643 if (!strcmp(argv[0], "script")) { 3644 return cmd_script(argc, argv); 3645 } else if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) { 3646 return __cmd_record(argc, argv); 3647 } else if (strlen(argv[0]) > 2 && strstarts("latency", argv[0])) { 3648 sched.tp_handler = &lat_ops; 3649 if (argc > 1) { 3650 argc = parse_options(argc, argv, latency_options, latency_usage, 0); 3651 if (argc) 3652 usage_with_options(latency_usage, latency_options); 3653 } 3654 setup_sorting(&sched, latency_options, latency_usage); 3655 return perf_sched__lat(&sched); 3656 } else if (!strcmp(argv[0], "map")) { 3657 if (argc) { 3658 argc = parse_options(argc, argv, map_options, map_usage, 0); 3659 if (argc) 3660 usage_with_options(map_usage, map_options); 3661 } 3662 sched.tp_handler = &map_ops; 3663 setup_sorting(&sched, latency_options, latency_usage); 3664 return perf_sched__map(&sched); 3665 } else if (strlen(argv[0]) > 2 && strstarts("replay", argv[0])) { 3666 sched.tp_handler = &replay_ops; 3667 if (argc) { 3668 argc = parse_options(argc, argv, replay_options, replay_usage, 0); 3669 if (argc) 3670 usage_with_options(replay_usage, replay_options); 3671 } 3672 return perf_sched__replay(&sched); 3673 } else if (!strcmp(argv[0], "timehist")) { 3674 if (argc) { 3675 argc = parse_options(argc, argv, timehist_options, 3676 timehist_usage, 0); 3677 if (argc) 3678 usage_with_options(timehist_usage, timehist_options); 3679 } 3680 if ((sched.show_wakeups || sched.show_next) && 3681 sched.summary_only) { 3682 pr_err(" Error: -s and -[n|w] are mutually exclusive.\n"); 3683 parse_options_usage(timehist_usage, timehist_options, "s", true); 3684 if (sched.show_wakeups) 3685 parse_options_usage(NULL, timehist_options, "w", true); 3686 if (sched.show_next) 3687 parse_options_usage(NULL, timehist_options, "n", true); 3688 return -EINVAL; 3689 } 3690 ret = symbol__validate_sym_arguments(); 3691 if (ret) 3692 return ret; 3693 3694 return perf_sched__timehist(&sched); 3695 } else { 3696 usage_with_options(sched_usage, sched_options); 3697 } 3698 3699 return 0; 3700 } 3701