1 // SPDX-License-Identifier: GPL-2.0 2 #include "builtin.h" 3 #include "perf-sys.h" 4 5 #include "util/cpumap.h" 6 #include "util/evlist.h" 7 #include "util/evsel.h" 8 #include "util/evsel_fprintf.h" 9 #include "util/mutex.h" 10 #include "util/symbol.h" 11 #include "util/thread.h" 12 #include "util/header.h" 13 #include "util/session.h" 14 #include "util/tool.h" 15 #include "util/cloexec.h" 16 #include "util/thread_map.h" 17 #include "util/color.h" 18 #include "util/stat.h" 19 #include "util/string2.h" 20 #include "util/callchain.h" 21 #include "util/time-utils.h" 22 23 #include <subcmd/pager.h> 24 #include <subcmd/parse-options.h> 25 #include "util/trace-event.h" 26 27 #include "util/debug.h" 28 #include "util/event.h" 29 #include "util/util.h" 30 31 #include <linux/kernel.h> 32 #include <linux/log2.h> 33 #include <linux/zalloc.h> 34 #include <sys/prctl.h> 35 #include <sys/resource.h> 36 #include <inttypes.h> 37 38 #include <errno.h> 39 #include <semaphore.h> 40 #include <pthread.h> 41 #include <math.h> 42 #include <api/fs/fs.h> 43 #include <perf/cpumap.h> 44 #include <linux/time64.h> 45 #include <linux/err.h> 46 47 #include <linux/ctype.h> 48 49 #define PR_SET_NAME 15 /* Set process name */ 50 #define MAX_CPUS 4096 51 #define COMM_LEN 20 52 #define SYM_LEN 129 53 #define MAX_PID 1024000 54 #define MAX_PRIO 140 55 56 static const char *cpu_list; 57 static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); 58 59 struct sched_atom; 60 61 struct task_desc { 62 unsigned long nr; 63 unsigned long pid; 64 char comm[COMM_LEN]; 65 66 unsigned long nr_events; 67 unsigned long curr_event; 68 struct sched_atom **atoms; 69 70 pthread_t thread; 71 72 sem_t ready_for_work; 73 sem_t work_done_sem; 74 75 u64 cpu_usage; 76 }; 77 78 enum sched_event_type { 79 SCHED_EVENT_RUN, 80 SCHED_EVENT_SLEEP, 81 SCHED_EVENT_WAKEUP, 82 }; 83 84 struct sched_atom { 85 enum sched_event_type type; 86 u64 timestamp; 87 u64 duration; 88 unsigned long nr; 89 sem_t *wait_sem; 90 struct task_desc *wakee; 91 }; 92 93 enum thread_state { 94 THREAD_SLEEPING = 0, 95 THREAD_WAIT_CPU, 96 THREAD_SCHED_IN, 97 THREAD_IGNORE 98 }; 99 100 struct work_atom { 101 struct list_head list; 102 enum thread_state state; 103 u64 sched_out_time; 104 u64 wake_up_time; 105 u64 sched_in_time; 106 u64 runtime; 107 }; 108 109 struct work_atoms { 110 struct list_head work_list; 111 struct thread *thread; 112 struct rb_node node; 113 u64 max_lat; 114 u64 max_lat_start; 115 u64 max_lat_end; 116 u64 total_lat; 117 u64 nb_atoms; 118 u64 total_runtime; 119 int num_merged; 120 }; 121 122 typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *); 123 124 struct perf_sched; 125 126 struct trace_sched_handler { 127 int (*switch_event)(struct perf_sched *sched, struct evsel *evsel, 128 struct perf_sample *sample, struct machine *machine); 129 130 int (*runtime_event)(struct perf_sched *sched, struct evsel *evsel, 131 struct perf_sample *sample, struct machine *machine); 132 133 int (*wakeup_event)(struct perf_sched *sched, struct evsel *evsel, 134 struct perf_sample *sample, struct machine *machine); 135 136 /* PERF_RECORD_FORK event, not sched_process_fork tracepoint */ 137 int (*fork_event)(struct perf_sched *sched, union perf_event *event, 138 struct machine *machine); 139 140 int (*migrate_task_event)(struct perf_sched *sched, 141 struct evsel *evsel, 142 struct perf_sample *sample, 143 struct machine *machine); 144 }; 145 146 #define COLOR_PIDS PERF_COLOR_BLUE 147 #define COLOR_CPUS PERF_COLOR_BG_RED 148 149 struct perf_sched_map { 150 DECLARE_BITMAP(comp_cpus_mask, MAX_CPUS); 151 struct perf_cpu *comp_cpus; 152 bool comp; 153 struct perf_thread_map *color_pids; 154 const char *color_pids_str; 155 struct perf_cpu_map *color_cpus; 156 const char *color_cpus_str; 157 const char *task_name; 158 struct strlist *task_names; 159 bool fuzzy; 160 struct perf_cpu_map *cpus; 161 const char *cpus_str; 162 }; 163 164 struct perf_sched { 165 struct perf_tool tool; 166 const char *sort_order; 167 unsigned long nr_tasks; 168 struct task_desc **pid_to_task; 169 struct task_desc **tasks; 170 const struct trace_sched_handler *tp_handler; 171 struct mutex start_work_mutex; 172 struct mutex work_done_wait_mutex; 173 int profile_cpu; 174 /* 175 * Track the current task - that way we can know whether there's any 176 * weird events, such as a task being switched away that is not current. 177 */ 178 struct perf_cpu max_cpu; 179 u32 *curr_pid; 180 struct thread **curr_thread; 181 struct thread **curr_out_thread; 182 char next_shortname1; 183 char next_shortname2; 184 unsigned int replay_repeat; 185 unsigned long nr_run_events; 186 unsigned long nr_sleep_events; 187 unsigned long nr_wakeup_events; 188 unsigned long nr_sleep_corrections; 189 unsigned long nr_run_events_optimized; 190 unsigned long targetless_wakeups; 191 unsigned long multitarget_wakeups; 192 unsigned long nr_runs; 193 unsigned long nr_timestamps; 194 unsigned long nr_unordered_timestamps; 195 unsigned long nr_context_switch_bugs; 196 unsigned long nr_events; 197 unsigned long nr_lost_chunks; 198 unsigned long nr_lost_events; 199 u64 run_measurement_overhead; 200 u64 sleep_measurement_overhead; 201 u64 start_time; 202 u64 cpu_usage; 203 u64 runavg_cpu_usage; 204 u64 parent_cpu_usage; 205 u64 runavg_parent_cpu_usage; 206 u64 sum_runtime; 207 u64 sum_fluct; 208 u64 run_avg; 209 u64 all_runtime; 210 u64 all_count; 211 u64 *cpu_last_switched; 212 struct rb_root_cached atom_root, sorted_atom_root, merged_atom_root; 213 struct list_head sort_list, cmp_pid; 214 bool force; 215 bool skip_merge; 216 struct perf_sched_map map; 217 218 /* options for timehist command */ 219 bool summary; 220 bool summary_only; 221 bool idle_hist; 222 bool show_callchain; 223 unsigned int max_stack; 224 bool show_cpu_visual; 225 bool show_wakeups; 226 bool show_next; 227 bool show_migrations; 228 bool show_state; 229 bool show_prio; 230 u64 skipped_samples; 231 const char *time_str; 232 struct perf_time_interval ptime; 233 struct perf_time_interval hist_time; 234 volatile bool thread_funcs_exit; 235 const char *prio_str; 236 DECLARE_BITMAP(prio_bitmap, MAX_PRIO); 237 }; 238 239 /* per thread run time data */ 240 struct thread_runtime { 241 u64 last_time; /* time of previous sched in/out event */ 242 u64 dt_run; /* run time */ 243 u64 dt_sleep; /* time between CPU access by sleep (off cpu) */ 244 u64 dt_iowait; /* time between CPU access by iowait (off cpu) */ 245 u64 dt_preempt; /* time between CPU access by preempt (off cpu) */ 246 u64 dt_delay; /* time between wakeup and sched-in */ 247 u64 ready_to_run; /* time of wakeup */ 248 249 struct stats run_stats; 250 u64 total_run_time; 251 u64 total_sleep_time; 252 u64 total_iowait_time; 253 u64 total_preempt_time; 254 u64 total_delay_time; 255 256 char last_state; 257 258 char shortname[3]; 259 bool comm_changed; 260 261 u64 migrations; 262 263 int prio; 264 }; 265 266 /* per event run time data */ 267 struct evsel_runtime { 268 u64 *last_time; /* time this event was last seen per cpu */ 269 u32 ncpu; /* highest cpu slot allocated */ 270 }; 271 272 /* per cpu idle time data */ 273 struct idle_thread_runtime { 274 struct thread_runtime tr; 275 struct thread *last_thread; 276 struct rb_root_cached sorted_root; 277 struct callchain_root callchain; 278 struct callchain_cursor cursor; 279 }; 280 281 /* track idle times per cpu */ 282 static struct thread **idle_threads; 283 static int idle_max_cpu; 284 static char idle_comm[] = "<idle>"; 285 286 static u64 get_nsecs(void) 287 { 288 struct timespec ts; 289 290 clock_gettime(CLOCK_MONOTONIC, &ts); 291 292 return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; 293 } 294 295 static void burn_nsecs(struct perf_sched *sched, u64 nsecs) 296 { 297 u64 T0 = get_nsecs(), T1; 298 299 do { 300 T1 = get_nsecs(); 301 } while (T1 + sched->run_measurement_overhead < T0 + nsecs); 302 } 303 304 static void sleep_nsecs(u64 nsecs) 305 { 306 struct timespec ts; 307 308 ts.tv_nsec = nsecs % 999999999; 309 ts.tv_sec = nsecs / 999999999; 310 311 nanosleep(&ts, NULL); 312 } 313 314 static void calibrate_run_measurement_overhead(struct perf_sched *sched) 315 { 316 u64 T0, T1, delta, min_delta = NSEC_PER_SEC; 317 int i; 318 319 for (i = 0; i < 10; i++) { 320 T0 = get_nsecs(); 321 burn_nsecs(sched, 0); 322 T1 = get_nsecs(); 323 delta = T1-T0; 324 min_delta = min(min_delta, delta); 325 } 326 sched->run_measurement_overhead = min_delta; 327 328 printf("run measurement overhead: %" PRIu64 " nsecs\n", min_delta); 329 } 330 331 static void calibrate_sleep_measurement_overhead(struct perf_sched *sched) 332 { 333 u64 T0, T1, delta, min_delta = NSEC_PER_SEC; 334 int i; 335 336 for (i = 0; i < 10; i++) { 337 T0 = get_nsecs(); 338 sleep_nsecs(10000); 339 T1 = get_nsecs(); 340 delta = T1-T0; 341 min_delta = min(min_delta, delta); 342 } 343 min_delta -= 10000; 344 sched->sleep_measurement_overhead = min_delta; 345 346 printf("sleep measurement overhead: %" PRIu64 " nsecs\n", min_delta); 347 } 348 349 static struct sched_atom * 350 get_new_event(struct task_desc *task, u64 timestamp) 351 { 352 struct sched_atom *event = zalloc(sizeof(*event)); 353 unsigned long idx = task->nr_events; 354 size_t size; 355 356 event->timestamp = timestamp; 357 event->nr = idx; 358 359 task->nr_events++; 360 size = sizeof(struct sched_atom *) * task->nr_events; 361 task->atoms = realloc(task->atoms, size); 362 BUG_ON(!task->atoms); 363 364 task->atoms[idx] = event; 365 366 return event; 367 } 368 369 static struct sched_atom *last_event(struct task_desc *task) 370 { 371 if (!task->nr_events) 372 return NULL; 373 374 return task->atoms[task->nr_events - 1]; 375 } 376 377 static void add_sched_event_run(struct perf_sched *sched, struct task_desc *task, 378 u64 timestamp, u64 duration) 379 { 380 struct sched_atom *event, *curr_event = last_event(task); 381 382 /* 383 * optimize an existing RUN event by merging this one 384 * to it: 385 */ 386 if (curr_event && curr_event->type == SCHED_EVENT_RUN) { 387 sched->nr_run_events_optimized++; 388 curr_event->duration += duration; 389 return; 390 } 391 392 event = get_new_event(task, timestamp); 393 394 event->type = SCHED_EVENT_RUN; 395 event->duration = duration; 396 397 sched->nr_run_events++; 398 } 399 400 static void add_sched_event_wakeup(struct perf_sched *sched, struct task_desc *task, 401 u64 timestamp, struct task_desc *wakee) 402 { 403 struct sched_atom *event, *wakee_event; 404 405 event = get_new_event(task, timestamp); 406 event->type = SCHED_EVENT_WAKEUP; 407 event->wakee = wakee; 408 409 wakee_event = last_event(wakee); 410 if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) { 411 sched->targetless_wakeups++; 412 return; 413 } 414 if (wakee_event->wait_sem) { 415 sched->multitarget_wakeups++; 416 return; 417 } 418 419 wakee_event->wait_sem = zalloc(sizeof(*wakee_event->wait_sem)); 420 sem_init(wakee_event->wait_sem, 0, 0); 421 event->wait_sem = wakee_event->wait_sem; 422 423 sched->nr_wakeup_events++; 424 } 425 426 static void add_sched_event_sleep(struct perf_sched *sched, struct task_desc *task, 427 u64 timestamp) 428 { 429 struct sched_atom *event = get_new_event(task, timestamp); 430 431 event->type = SCHED_EVENT_SLEEP; 432 433 sched->nr_sleep_events++; 434 } 435 436 static struct task_desc *register_pid(struct perf_sched *sched, 437 unsigned long pid, const char *comm) 438 { 439 struct task_desc *task; 440 static int pid_max; 441 442 if (sched->pid_to_task == NULL) { 443 if (sysctl__read_int("kernel/pid_max", &pid_max) < 0) 444 pid_max = MAX_PID; 445 BUG_ON((sched->pid_to_task = calloc(pid_max, sizeof(struct task_desc *))) == NULL); 446 } 447 if (pid >= (unsigned long)pid_max) { 448 BUG_ON((sched->pid_to_task = realloc(sched->pid_to_task, (pid + 1) * 449 sizeof(struct task_desc *))) == NULL); 450 while (pid >= (unsigned long)pid_max) 451 sched->pid_to_task[pid_max++] = NULL; 452 } 453 454 task = sched->pid_to_task[pid]; 455 456 if (task) 457 return task; 458 459 task = zalloc(sizeof(*task)); 460 task->pid = pid; 461 task->nr = sched->nr_tasks; 462 strcpy(task->comm, comm); 463 /* 464 * every task starts in sleeping state - this gets ignored 465 * if there's no wakeup pointing to this sleep state: 466 */ 467 add_sched_event_sleep(sched, task, 0); 468 469 sched->pid_to_task[pid] = task; 470 sched->nr_tasks++; 471 sched->tasks = realloc(sched->tasks, sched->nr_tasks * sizeof(struct task_desc *)); 472 BUG_ON(!sched->tasks); 473 sched->tasks[task->nr] = task; 474 475 if (verbose > 0) 476 printf("registered task #%ld, PID %ld (%s)\n", sched->nr_tasks, pid, comm); 477 478 return task; 479 } 480 481 482 static void print_task_traces(struct perf_sched *sched) 483 { 484 struct task_desc *task; 485 unsigned long i; 486 487 for (i = 0; i < sched->nr_tasks; i++) { 488 task = sched->tasks[i]; 489 printf("task %6ld (%20s:%10ld), nr_events: %ld\n", 490 task->nr, task->comm, task->pid, task->nr_events); 491 } 492 } 493 494 static void add_cross_task_wakeups(struct perf_sched *sched) 495 { 496 struct task_desc *task1, *task2; 497 unsigned long i, j; 498 499 for (i = 0; i < sched->nr_tasks; i++) { 500 task1 = sched->tasks[i]; 501 j = i + 1; 502 if (j == sched->nr_tasks) 503 j = 0; 504 task2 = sched->tasks[j]; 505 add_sched_event_wakeup(sched, task1, 0, task2); 506 } 507 } 508 509 static void perf_sched__process_event(struct perf_sched *sched, 510 struct sched_atom *atom) 511 { 512 int ret = 0; 513 514 switch (atom->type) { 515 case SCHED_EVENT_RUN: 516 burn_nsecs(sched, atom->duration); 517 break; 518 case SCHED_EVENT_SLEEP: 519 if (atom->wait_sem) 520 ret = sem_wait(atom->wait_sem); 521 BUG_ON(ret); 522 break; 523 case SCHED_EVENT_WAKEUP: 524 if (atom->wait_sem) 525 ret = sem_post(atom->wait_sem); 526 BUG_ON(ret); 527 break; 528 default: 529 BUG_ON(1); 530 } 531 } 532 533 static u64 get_cpu_usage_nsec_parent(void) 534 { 535 struct rusage ru; 536 u64 sum; 537 int err; 538 539 err = getrusage(RUSAGE_SELF, &ru); 540 BUG_ON(err); 541 542 sum = ru.ru_utime.tv_sec * NSEC_PER_SEC + ru.ru_utime.tv_usec * NSEC_PER_USEC; 543 sum += ru.ru_stime.tv_sec * NSEC_PER_SEC + ru.ru_stime.tv_usec * NSEC_PER_USEC; 544 545 return sum; 546 } 547 548 static int self_open_counters(struct perf_sched *sched, unsigned long cur_task) 549 { 550 struct perf_event_attr attr; 551 char sbuf[STRERR_BUFSIZE], info[STRERR_BUFSIZE]; 552 int fd; 553 struct rlimit limit; 554 bool need_privilege = false; 555 556 memset(&attr, 0, sizeof(attr)); 557 558 attr.type = PERF_TYPE_SOFTWARE; 559 attr.config = PERF_COUNT_SW_TASK_CLOCK; 560 561 force_again: 562 fd = sys_perf_event_open(&attr, 0, -1, -1, 563 perf_event_open_cloexec_flag()); 564 565 if (fd < 0) { 566 if (errno == EMFILE) { 567 if (sched->force) { 568 BUG_ON(getrlimit(RLIMIT_NOFILE, &limit) == -1); 569 limit.rlim_cur += sched->nr_tasks - cur_task; 570 if (limit.rlim_cur > limit.rlim_max) { 571 limit.rlim_max = limit.rlim_cur; 572 need_privilege = true; 573 } 574 if (setrlimit(RLIMIT_NOFILE, &limit) == -1) { 575 if (need_privilege && errno == EPERM) 576 strcpy(info, "Need privilege\n"); 577 } else 578 goto force_again; 579 } else 580 strcpy(info, "Have a try with -f option\n"); 581 } 582 pr_err("Error: sys_perf_event_open() syscall returned " 583 "with %d (%s)\n%s", fd, 584 str_error_r(errno, sbuf, sizeof(sbuf)), info); 585 exit(EXIT_FAILURE); 586 } 587 return fd; 588 } 589 590 static u64 get_cpu_usage_nsec_self(int fd) 591 { 592 u64 runtime; 593 int ret; 594 595 ret = read(fd, &runtime, sizeof(runtime)); 596 BUG_ON(ret != sizeof(runtime)); 597 598 return runtime; 599 } 600 601 struct sched_thread_parms { 602 struct task_desc *task; 603 struct perf_sched *sched; 604 int fd; 605 }; 606 607 static void *thread_func(void *ctx) 608 { 609 struct sched_thread_parms *parms = ctx; 610 struct task_desc *this_task = parms->task; 611 struct perf_sched *sched = parms->sched; 612 u64 cpu_usage_0, cpu_usage_1; 613 unsigned long i, ret; 614 char comm2[22]; 615 int fd = parms->fd; 616 617 zfree(&parms); 618 619 sprintf(comm2, ":%s", this_task->comm); 620 prctl(PR_SET_NAME, comm2); 621 if (fd < 0) 622 return NULL; 623 624 while (!sched->thread_funcs_exit) { 625 ret = sem_post(&this_task->ready_for_work); 626 BUG_ON(ret); 627 mutex_lock(&sched->start_work_mutex); 628 mutex_unlock(&sched->start_work_mutex); 629 630 cpu_usage_0 = get_cpu_usage_nsec_self(fd); 631 632 for (i = 0; i < this_task->nr_events; i++) { 633 this_task->curr_event = i; 634 perf_sched__process_event(sched, this_task->atoms[i]); 635 } 636 637 cpu_usage_1 = get_cpu_usage_nsec_self(fd); 638 this_task->cpu_usage = cpu_usage_1 - cpu_usage_0; 639 ret = sem_post(&this_task->work_done_sem); 640 BUG_ON(ret); 641 642 mutex_lock(&sched->work_done_wait_mutex); 643 mutex_unlock(&sched->work_done_wait_mutex); 644 } 645 return NULL; 646 } 647 648 static void create_tasks(struct perf_sched *sched) 649 EXCLUSIVE_LOCK_FUNCTION(sched->start_work_mutex) 650 EXCLUSIVE_LOCK_FUNCTION(sched->work_done_wait_mutex) 651 { 652 struct task_desc *task; 653 pthread_attr_t attr; 654 unsigned long i; 655 int err; 656 657 err = pthread_attr_init(&attr); 658 BUG_ON(err); 659 err = pthread_attr_setstacksize(&attr, 660 (size_t) max(16 * 1024, (int)PTHREAD_STACK_MIN)); 661 BUG_ON(err); 662 mutex_lock(&sched->start_work_mutex); 663 mutex_lock(&sched->work_done_wait_mutex); 664 for (i = 0; i < sched->nr_tasks; i++) { 665 struct sched_thread_parms *parms = malloc(sizeof(*parms)); 666 BUG_ON(parms == NULL); 667 parms->task = task = sched->tasks[i]; 668 parms->sched = sched; 669 parms->fd = self_open_counters(sched, i); 670 sem_init(&task->ready_for_work, 0, 0); 671 sem_init(&task->work_done_sem, 0, 0); 672 task->curr_event = 0; 673 err = pthread_create(&task->thread, &attr, thread_func, parms); 674 BUG_ON(err); 675 } 676 } 677 678 static void destroy_tasks(struct perf_sched *sched) 679 UNLOCK_FUNCTION(sched->start_work_mutex) 680 UNLOCK_FUNCTION(sched->work_done_wait_mutex) 681 { 682 struct task_desc *task; 683 unsigned long i; 684 int err; 685 686 mutex_unlock(&sched->start_work_mutex); 687 mutex_unlock(&sched->work_done_wait_mutex); 688 /* Get rid of threads so they won't be upset by mutex destrunction */ 689 for (i = 0; i < sched->nr_tasks; i++) { 690 task = sched->tasks[i]; 691 err = pthread_join(task->thread, NULL); 692 BUG_ON(err); 693 sem_destroy(&task->ready_for_work); 694 sem_destroy(&task->work_done_sem); 695 } 696 } 697 698 static void wait_for_tasks(struct perf_sched *sched) 699 EXCLUSIVE_LOCKS_REQUIRED(sched->work_done_wait_mutex) 700 EXCLUSIVE_LOCKS_REQUIRED(sched->start_work_mutex) 701 { 702 u64 cpu_usage_0, cpu_usage_1; 703 struct task_desc *task; 704 unsigned long i, ret; 705 706 sched->start_time = get_nsecs(); 707 sched->cpu_usage = 0; 708 mutex_unlock(&sched->work_done_wait_mutex); 709 710 for (i = 0; i < sched->nr_tasks; i++) { 711 task = sched->tasks[i]; 712 ret = sem_wait(&task->ready_for_work); 713 BUG_ON(ret); 714 sem_init(&task->ready_for_work, 0, 0); 715 } 716 mutex_lock(&sched->work_done_wait_mutex); 717 718 cpu_usage_0 = get_cpu_usage_nsec_parent(); 719 720 mutex_unlock(&sched->start_work_mutex); 721 722 for (i = 0; i < sched->nr_tasks; i++) { 723 task = sched->tasks[i]; 724 ret = sem_wait(&task->work_done_sem); 725 BUG_ON(ret); 726 sem_init(&task->work_done_sem, 0, 0); 727 sched->cpu_usage += task->cpu_usage; 728 task->cpu_usage = 0; 729 } 730 731 cpu_usage_1 = get_cpu_usage_nsec_parent(); 732 if (!sched->runavg_cpu_usage) 733 sched->runavg_cpu_usage = sched->cpu_usage; 734 sched->runavg_cpu_usage = (sched->runavg_cpu_usage * (sched->replay_repeat - 1) + sched->cpu_usage) / sched->replay_repeat; 735 736 sched->parent_cpu_usage = cpu_usage_1 - cpu_usage_0; 737 if (!sched->runavg_parent_cpu_usage) 738 sched->runavg_parent_cpu_usage = sched->parent_cpu_usage; 739 sched->runavg_parent_cpu_usage = (sched->runavg_parent_cpu_usage * (sched->replay_repeat - 1) + 740 sched->parent_cpu_usage)/sched->replay_repeat; 741 742 mutex_lock(&sched->start_work_mutex); 743 744 for (i = 0; i < sched->nr_tasks; i++) { 745 task = sched->tasks[i]; 746 task->curr_event = 0; 747 } 748 } 749 750 static void run_one_test(struct perf_sched *sched) 751 EXCLUSIVE_LOCKS_REQUIRED(sched->work_done_wait_mutex) 752 EXCLUSIVE_LOCKS_REQUIRED(sched->start_work_mutex) 753 { 754 u64 T0, T1, delta, avg_delta, fluct; 755 756 T0 = get_nsecs(); 757 wait_for_tasks(sched); 758 T1 = get_nsecs(); 759 760 delta = T1 - T0; 761 sched->sum_runtime += delta; 762 sched->nr_runs++; 763 764 avg_delta = sched->sum_runtime / sched->nr_runs; 765 if (delta < avg_delta) 766 fluct = avg_delta - delta; 767 else 768 fluct = delta - avg_delta; 769 sched->sum_fluct += fluct; 770 if (!sched->run_avg) 771 sched->run_avg = delta; 772 sched->run_avg = (sched->run_avg * (sched->replay_repeat - 1) + delta) / sched->replay_repeat; 773 774 printf("#%-3ld: %0.3f, ", sched->nr_runs, (double)delta / NSEC_PER_MSEC); 775 776 printf("ravg: %0.2f, ", (double)sched->run_avg / NSEC_PER_MSEC); 777 778 printf("cpu: %0.2f / %0.2f", 779 (double)sched->cpu_usage / NSEC_PER_MSEC, (double)sched->runavg_cpu_usage / NSEC_PER_MSEC); 780 781 #if 0 782 /* 783 * rusage statistics done by the parent, these are less 784 * accurate than the sched->sum_exec_runtime based statistics: 785 */ 786 printf(" [%0.2f / %0.2f]", 787 (double)sched->parent_cpu_usage / NSEC_PER_MSEC, 788 (double)sched->runavg_parent_cpu_usage / NSEC_PER_MSEC); 789 #endif 790 791 printf("\n"); 792 793 if (sched->nr_sleep_corrections) 794 printf(" (%ld sleep corrections)\n", sched->nr_sleep_corrections); 795 sched->nr_sleep_corrections = 0; 796 } 797 798 static void test_calibrations(struct perf_sched *sched) 799 { 800 u64 T0, T1; 801 802 T0 = get_nsecs(); 803 burn_nsecs(sched, NSEC_PER_MSEC); 804 T1 = get_nsecs(); 805 806 printf("the run test took %" PRIu64 " nsecs\n", T1 - T0); 807 808 T0 = get_nsecs(); 809 sleep_nsecs(NSEC_PER_MSEC); 810 T1 = get_nsecs(); 811 812 printf("the sleep test took %" PRIu64 " nsecs\n", T1 - T0); 813 } 814 815 static int 816 replay_wakeup_event(struct perf_sched *sched, 817 struct evsel *evsel, struct perf_sample *sample, 818 struct machine *machine __maybe_unused) 819 { 820 const char *comm = evsel__strval(evsel, sample, "comm"); 821 const u32 pid = evsel__intval(evsel, sample, "pid"); 822 struct task_desc *waker, *wakee; 823 824 if (verbose > 0) { 825 printf("sched_wakeup event %p\n", evsel); 826 827 printf(" ... pid %d woke up %s/%d\n", sample->tid, comm, pid); 828 } 829 830 waker = register_pid(sched, sample->tid, "<unknown>"); 831 wakee = register_pid(sched, pid, comm); 832 833 add_sched_event_wakeup(sched, waker, sample->time, wakee); 834 return 0; 835 } 836 837 static int replay_switch_event(struct perf_sched *sched, 838 struct evsel *evsel, 839 struct perf_sample *sample, 840 struct machine *machine __maybe_unused) 841 { 842 const char *prev_comm = evsel__strval(evsel, sample, "prev_comm"), 843 *next_comm = evsel__strval(evsel, sample, "next_comm"); 844 const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"), 845 next_pid = evsel__intval(evsel, sample, "next_pid"); 846 struct task_desc *prev, __maybe_unused *next; 847 u64 timestamp0, timestamp = sample->time; 848 int cpu = sample->cpu; 849 s64 delta; 850 851 if (verbose > 0) 852 printf("sched_switch event %p\n", evsel); 853 854 if (cpu >= MAX_CPUS || cpu < 0) 855 return 0; 856 857 timestamp0 = sched->cpu_last_switched[cpu]; 858 if (timestamp0) 859 delta = timestamp - timestamp0; 860 else 861 delta = 0; 862 863 if (delta < 0) { 864 pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta); 865 return -1; 866 } 867 868 pr_debug(" ... switch from %s/%d to %s/%d [ran %" PRIu64 " nsecs]\n", 869 prev_comm, prev_pid, next_comm, next_pid, delta); 870 871 prev = register_pid(sched, prev_pid, prev_comm); 872 next = register_pid(sched, next_pid, next_comm); 873 874 sched->cpu_last_switched[cpu] = timestamp; 875 876 add_sched_event_run(sched, prev, timestamp, delta); 877 add_sched_event_sleep(sched, prev, timestamp); 878 879 return 0; 880 } 881 882 static int replay_fork_event(struct perf_sched *sched, 883 union perf_event *event, 884 struct machine *machine) 885 { 886 struct thread *child, *parent; 887 888 child = machine__findnew_thread(machine, event->fork.pid, 889 event->fork.tid); 890 parent = machine__findnew_thread(machine, event->fork.ppid, 891 event->fork.ptid); 892 893 if (child == NULL || parent == NULL) { 894 pr_debug("thread does not exist on fork event: child %p, parent %p\n", 895 child, parent); 896 goto out_put; 897 } 898 899 if (verbose > 0) { 900 printf("fork event\n"); 901 printf("... parent: %s/%d\n", thread__comm_str(parent), thread__tid(parent)); 902 printf("... child: %s/%d\n", thread__comm_str(child), thread__tid(child)); 903 } 904 905 register_pid(sched, thread__tid(parent), thread__comm_str(parent)); 906 register_pid(sched, thread__tid(child), thread__comm_str(child)); 907 out_put: 908 thread__put(child); 909 thread__put(parent); 910 return 0; 911 } 912 913 struct sort_dimension { 914 const char *name; 915 sort_fn_t cmp; 916 struct list_head list; 917 }; 918 919 static inline void init_prio(struct thread_runtime *r) 920 { 921 r->prio = -1; 922 } 923 924 /* 925 * handle runtime stats saved per thread 926 */ 927 static struct thread_runtime *thread__init_runtime(struct thread *thread) 928 { 929 struct thread_runtime *r; 930 931 r = zalloc(sizeof(struct thread_runtime)); 932 if (!r) 933 return NULL; 934 935 init_stats(&r->run_stats); 936 init_prio(r); 937 thread__set_priv(thread, r); 938 939 return r; 940 } 941 942 static struct thread_runtime *thread__get_runtime(struct thread *thread) 943 { 944 struct thread_runtime *tr; 945 946 tr = thread__priv(thread); 947 if (tr == NULL) { 948 tr = thread__init_runtime(thread); 949 if (tr == NULL) 950 pr_debug("Failed to malloc memory for runtime data.\n"); 951 } 952 953 return tr; 954 } 955 956 static int 957 thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r) 958 { 959 struct sort_dimension *sort; 960 int ret = 0; 961 962 BUG_ON(list_empty(list)); 963 964 list_for_each_entry(sort, list, list) { 965 ret = sort->cmp(l, r); 966 if (ret) 967 return ret; 968 } 969 970 return ret; 971 } 972 973 static struct work_atoms * 974 thread_atoms_search(struct rb_root_cached *root, struct thread *thread, 975 struct list_head *sort_list) 976 { 977 struct rb_node *node = root->rb_root.rb_node; 978 struct work_atoms key = { .thread = thread }; 979 980 while (node) { 981 struct work_atoms *atoms; 982 int cmp; 983 984 atoms = container_of(node, struct work_atoms, node); 985 986 cmp = thread_lat_cmp(sort_list, &key, atoms); 987 if (cmp > 0) 988 node = node->rb_left; 989 else if (cmp < 0) 990 node = node->rb_right; 991 else { 992 BUG_ON(thread != atoms->thread); 993 return atoms; 994 } 995 } 996 return NULL; 997 } 998 999 static void 1000 __thread_latency_insert(struct rb_root_cached *root, struct work_atoms *data, 1001 struct list_head *sort_list) 1002 { 1003 struct rb_node **new = &(root->rb_root.rb_node), *parent = NULL; 1004 bool leftmost = true; 1005 1006 while (*new) { 1007 struct work_atoms *this; 1008 int cmp; 1009 1010 this = container_of(*new, struct work_atoms, node); 1011 parent = *new; 1012 1013 cmp = thread_lat_cmp(sort_list, data, this); 1014 1015 if (cmp > 0) 1016 new = &((*new)->rb_left); 1017 else { 1018 new = &((*new)->rb_right); 1019 leftmost = false; 1020 } 1021 } 1022 1023 rb_link_node(&data->node, parent, new); 1024 rb_insert_color_cached(&data->node, root, leftmost); 1025 } 1026 1027 static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread) 1028 { 1029 struct work_atoms *atoms = zalloc(sizeof(*atoms)); 1030 if (!atoms) { 1031 pr_err("No memory at %s\n", __func__); 1032 return -1; 1033 } 1034 1035 atoms->thread = thread__get(thread); 1036 INIT_LIST_HEAD(&atoms->work_list); 1037 __thread_latency_insert(&sched->atom_root, atoms, &sched->cmp_pid); 1038 return 0; 1039 } 1040 1041 static int 1042 add_sched_out_event(struct work_atoms *atoms, 1043 char run_state, 1044 u64 timestamp) 1045 { 1046 struct work_atom *atom = zalloc(sizeof(*atom)); 1047 if (!atom) { 1048 pr_err("Non memory at %s", __func__); 1049 return -1; 1050 } 1051 1052 atom->sched_out_time = timestamp; 1053 1054 if (run_state == 'R') { 1055 atom->state = THREAD_WAIT_CPU; 1056 atom->wake_up_time = atom->sched_out_time; 1057 } 1058 1059 list_add_tail(&atom->list, &atoms->work_list); 1060 return 0; 1061 } 1062 1063 static void 1064 add_runtime_event(struct work_atoms *atoms, u64 delta, 1065 u64 timestamp __maybe_unused) 1066 { 1067 struct work_atom *atom; 1068 1069 BUG_ON(list_empty(&atoms->work_list)); 1070 1071 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1072 1073 atom->runtime += delta; 1074 atoms->total_runtime += delta; 1075 } 1076 1077 static void 1078 add_sched_in_event(struct work_atoms *atoms, u64 timestamp) 1079 { 1080 struct work_atom *atom; 1081 u64 delta; 1082 1083 if (list_empty(&atoms->work_list)) 1084 return; 1085 1086 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1087 1088 if (atom->state != THREAD_WAIT_CPU) 1089 return; 1090 1091 if (timestamp < atom->wake_up_time) { 1092 atom->state = THREAD_IGNORE; 1093 return; 1094 } 1095 1096 atom->state = THREAD_SCHED_IN; 1097 atom->sched_in_time = timestamp; 1098 1099 delta = atom->sched_in_time - atom->wake_up_time; 1100 atoms->total_lat += delta; 1101 if (delta > atoms->max_lat) { 1102 atoms->max_lat = delta; 1103 atoms->max_lat_start = atom->wake_up_time; 1104 atoms->max_lat_end = timestamp; 1105 } 1106 atoms->nb_atoms++; 1107 } 1108 1109 static int latency_switch_event(struct perf_sched *sched, 1110 struct evsel *evsel, 1111 struct perf_sample *sample, 1112 struct machine *machine) 1113 { 1114 const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"), 1115 next_pid = evsel__intval(evsel, sample, "next_pid"); 1116 const char prev_state = evsel__taskstate(evsel, sample, "prev_state"); 1117 struct work_atoms *out_events, *in_events; 1118 struct thread *sched_out, *sched_in; 1119 u64 timestamp0, timestamp = sample->time; 1120 int cpu = sample->cpu, err = -1; 1121 s64 delta; 1122 1123 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1124 1125 timestamp0 = sched->cpu_last_switched[cpu]; 1126 sched->cpu_last_switched[cpu] = timestamp; 1127 if (timestamp0) 1128 delta = timestamp - timestamp0; 1129 else 1130 delta = 0; 1131 1132 if (delta < 0) { 1133 pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1134 return -1; 1135 } 1136 1137 sched_out = machine__findnew_thread(machine, -1, prev_pid); 1138 sched_in = machine__findnew_thread(machine, -1, next_pid); 1139 if (sched_out == NULL || sched_in == NULL) 1140 goto out_put; 1141 1142 out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid); 1143 if (!out_events) { 1144 if (thread_atoms_insert(sched, sched_out)) 1145 goto out_put; 1146 out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid); 1147 if (!out_events) { 1148 pr_err("out-event: Internal tree error"); 1149 goto out_put; 1150 } 1151 } 1152 if (add_sched_out_event(out_events, prev_state, timestamp)) 1153 return -1; 1154 1155 in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid); 1156 if (!in_events) { 1157 if (thread_atoms_insert(sched, sched_in)) 1158 goto out_put; 1159 in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid); 1160 if (!in_events) { 1161 pr_err("in-event: Internal tree error"); 1162 goto out_put; 1163 } 1164 /* 1165 * Take came in we have not heard about yet, 1166 * add in an initial atom in runnable state: 1167 */ 1168 if (add_sched_out_event(in_events, 'R', timestamp)) 1169 goto out_put; 1170 } 1171 add_sched_in_event(in_events, timestamp); 1172 err = 0; 1173 out_put: 1174 thread__put(sched_out); 1175 thread__put(sched_in); 1176 return err; 1177 } 1178 1179 static int latency_runtime_event(struct perf_sched *sched, 1180 struct evsel *evsel, 1181 struct perf_sample *sample, 1182 struct machine *machine) 1183 { 1184 const u32 pid = evsel__intval(evsel, sample, "pid"); 1185 const u64 runtime = evsel__intval(evsel, sample, "runtime"); 1186 struct thread *thread = machine__findnew_thread(machine, -1, pid); 1187 struct work_atoms *atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); 1188 u64 timestamp = sample->time; 1189 int cpu = sample->cpu, err = -1; 1190 1191 if (thread == NULL) 1192 return -1; 1193 1194 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1195 if (!atoms) { 1196 if (thread_atoms_insert(sched, thread)) 1197 goto out_put; 1198 atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); 1199 if (!atoms) { 1200 pr_err("in-event: Internal tree error"); 1201 goto out_put; 1202 } 1203 if (add_sched_out_event(atoms, 'R', timestamp)) 1204 goto out_put; 1205 } 1206 1207 add_runtime_event(atoms, runtime, timestamp); 1208 err = 0; 1209 out_put: 1210 thread__put(thread); 1211 return err; 1212 } 1213 1214 static int latency_wakeup_event(struct perf_sched *sched, 1215 struct evsel *evsel, 1216 struct perf_sample *sample, 1217 struct machine *machine) 1218 { 1219 const u32 pid = evsel__intval(evsel, sample, "pid"); 1220 struct work_atoms *atoms; 1221 struct work_atom *atom; 1222 struct thread *wakee; 1223 u64 timestamp = sample->time; 1224 int err = -1; 1225 1226 wakee = machine__findnew_thread(machine, -1, pid); 1227 if (wakee == NULL) 1228 return -1; 1229 atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid); 1230 if (!atoms) { 1231 if (thread_atoms_insert(sched, wakee)) 1232 goto out_put; 1233 atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid); 1234 if (!atoms) { 1235 pr_err("wakeup-event: Internal tree error"); 1236 goto out_put; 1237 } 1238 if (add_sched_out_event(atoms, 'S', timestamp)) 1239 goto out_put; 1240 } 1241 1242 BUG_ON(list_empty(&atoms->work_list)); 1243 1244 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1245 1246 /* 1247 * As we do not guarantee the wakeup event happens when 1248 * task is out of run queue, also may happen when task is 1249 * on run queue and wakeup only change ->state to TASK_RUNNING, 1250 * then we should not set the ->wake_up_time when wake up a 1251 * task which is on run queue. 1252 * 1253 * You WILL be missing events if you've recorded only 1254 * one CPU, or are only looking at only one, so don't 1255 * skip in this case. 1256 */ 1257 if (sched->profile_cpu == -1 && atom->state != THREAD_SLEEPING) 1258 goto out_ok; 1259 1260 sched->nr_timestamps++; 1261 if (atom->sched_out_time > timestamp) { 1262 sched->nr_unordered_timestamps++; 1263 goto out_ok; 1264 } 1265 1266 atom->state = THREAD_WAIT_CPU; 1267 atom->wake_up_time = timestamp; 1268 out_ok: 1269 err = 0; 1270 out_put: 1271 thread__put(wakee); 1272 return err; 1273 } 1274 1275 static int latency_migrate_task_event(struct perf_sched *sched, 1276 struct evsel *evsel, 1277 struct perf_sample *sample, 1278 struct machine *machine) 1279 { 1280 const u32 pid = evsel__intval(evsel, sample, "pid"); 1281 u64 timestamp = sample->time; 1282 struct work_atoms *atoms; 1283 struct work_atom *atom; 1284 struct thread *migrant; 1285 int err = -1; 1286 1287 /* 1288 * Only need to worry about migration when profiling one CPU. 1289 */ 1290 if (sched->profile_cpu == -1) 1291 return 0; 1292 1293 migrant = machine__findnew_thread(machine, -1, pid); 1294 if (migrant == NULL) 1295 return -1; 1296 atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid); 1297 if (!atoms) { 1298 if (thread_atoms_insert(sched, migrant)) 1299 goto out_put; 1300 register_pid(sched, thread__tid(migrant), thread__comm_str(migrant)); 1301 atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid); 1302 if (!atoms) { 1303 pr_err("migration-event: Internal tree error"); 1304 goto out_put; 1305 } 1306 if (add_sched_out_event(atoms, 'R', timestamp)) 1307 goto out_put; 1308 } 1309 1310 BUG_ON(list_empty(&atoms->work_list)); 1311 1312 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1313 atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp; 1314 1315 sched->nr_timestamps++; 1316 1317 if (atom->sched_out_time > timestamp) 1318 sched->nr_unordered_timestamps++; 1319 err = 0; 1320 out_put: 1321 thread__put(migrant); 1322 return err; 1323 } 1324 1325 static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_list) 1326 { 1327 int i; 1328 int ret; 1329 u64 avg; 1330 char max_lat_start[32], max_lat_end[32]; 1331 1332 if (!work_list->nb_atoms) 1333 return; 1334 /* 1335 * Ignore idle threads: 1336 */ 1337 if (!strcmp(thread__comm_str(work_list->thread), "swapper")) 1338 return; 1339 1340 sched->all_runtime += work_list->total_runtime; 1341 sched->all_count += work_list->nb_atoms; 1342 1343 if (work_list->num_merged > 1) { 1344 ret = printf(" %s:(%d) ", thread__comm_str(work_list->thread), 1345 work_list->num_merged); 1346 } else { 1347 ret = printf(" %s:%d ", thread__comm_str(work_list->thread), 1348 thread__tid(work_list->thread)); 1349 } 1350 1351 for (i = 0; i < 24 - ret; i++) 1352 printf(" "); 1353 1354 avg = work_list->total_lat / work_list->nb_atoms; 1355 timestamp__scnprintf_usec(work_list->max_lat_start, max_lat_start, sizeof(max_lat_start)); 1356 timestamp__scnprintf_usec(work_list->max_lat_end, max_lat_end, sizeof(max_lat_end)); 1357 1358 printf("|%11.3f ms |%9" PRIu64 " | avg:%8.3f ms | max:%8.3f ms | max start: %12s s | max end: %12s s\n", 1359 (double)work_list->total_runtime / NSEC_PER_MSEC, 1360 work_list->nb_atoms, (double)avg / NSEC_PER_MSEC, 1361 (double)work_list->max_lat / NSEC_PER_MSEC, 1362 max_lat_start, max_lat_end); 1363 } 1364 1365 static int pid_cmp(struct work_atoms *l, struct work_atoms *r) 1366 { 1367 pid_t l_tid, r_tid; 1368 1369 if (RC_CHK_EQUAL(l->thread, r->thread)) 1370 return 0; 1371 l_tid = thread__tid(l->thread); 1372 r_tid = thread__tid(r->thread); 1373 if (l_tid < r_tid) 1374 return -1; 1375 if (l_tid > r_tid) 1376 return 1; 1377 return (int)(RC_CHK_ACCESS(l->thread) - RC_CHK_ACCESS(r->thread)); 1378 } 1379 1380 static int avg_cmp(struct work_atoms *l, struct work_atoms *r) 1381 { 1382 u64 avgl, avgr; 1383 1384 if (!l->nb_atoms) 1385 return -1; 1386 1387 if (!r->nb_atoms) 1388 return 1; 1389 1390 avgl = l->total_lat / l->nb_atoms; 1391 avgr = r->total_lat / r->nb_atoms; 1392 1393 if (avgl < avgr) 1394 return -1; 1395 if (avgl > avgr) 1396 return 1; 1397 1398 return 0; 1399 } 1400 1401 static int max_cmp(struct work_atoms *l, struct work_atoms *r) 1402 { 1403 if (l->max_lat < r->max_lat) 1404 return -1; 1405 if (l->max_lat > r->max_lat) 1406 return 1; 1407 1408 return 0; 1409 } 1410 1411 static int switch_cmp(struct work_atoms *l, struct work_atoms *r) 1412 { 1413 if (l->nb_atoms < r->nb_atoms) 1414 return -1; 1415 if (l->nb_atoms > r->nb_atoms) 1416 return 1; 1417 1418 return 0; 1419 } 1420 1421 static int runtime_cmp(struct work_atoms *l, struct work_atoms *r) 1422 { 1423 if (l->total_runtime < r->total_runtime) 1424 return -1; 1425 if (l->total_runtime > r->total_runtime) 1426 return 1; 1427 1428 return 0; 1429 } 1430 1431 static int sort_dimension__add(const char *tok, struct list_head *list) 1432 { 1433 size_t i; 1434 static struct sort_dimension avg_sort_dimension = { 1435 .name = "avg", 1436 .cmp = avg_cmp, 1437 }; 1438 static struct sort_dimension max_sort_dimension = { 1439 .name = "max", 1440 .cmp = max_cmp, 1441 }; 1442 static struct sort_dimension pid_sort_dimension = { 1443 .name = "pid", 1444 .cmp = pid_cmp, 1445 }; 1446 static struct sort_dimension runtime_sort_dimension = { 1447 .name = "runtime", 1448 .cmp = runtime_cmp, 1449 }; 1450 static struct sort_dimension switch_sort_dimension = { 1451 .name = "switch", 1452 .cmp = switch_cmp, 1453 }; 1454 struct sort_dimension *available_sorts[] = { 1455 &pid_sort_dimension, 1456 &avg_sort_dimension, 1457 &max_sort_dimension, 1458 &switch_sort_dimension, 1459 &runtime_sort_dimension, 1460 }; 1461 1462 for (i = 0; i < ARRAY_SIZE(available_sorts); i++) { 1463 if (!strcmp(available_sorts[i]->name, tok)) { 1464 list_add_tail(&available_sorts[i]->list, list); 1465 1466 return 0; 1467 } 1468 } 1469 1470 return -1; 1471 } 1472 1473 static void perf_sched__sort_lat(struct perf_sched *sched) 1474 { 1475 struct rb_node *node; 1476 struct rb_root_cached *root = &sched->atom_root; 1477 again: 1478 for (;;) { 1479 struct work_atoms *data; 1480 node = rb_first_cached(root); 1481 if (!node) 1482 break; 1483 1484 rb_erase_cached(node, root); 1485 data = rb_entry(node, struct work_atoms, node); 1486 __thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list); 1487 } 1488 if (root == &sched->atom_root) { 1489 root = &sched->merged_atom_root; 1490 goto again; 1491 } 1492 } 1493 1494 static int process_sched_wakeup_event(const struct perf_tool *tool, 1495 struct evsel *evsel, 1496 struct perf_sample *sample, 1497 struct machine *machine) 1498 { 1499 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1500 1501 if (sched->tp_handler->wakeup_event) 1502 return sched->tp_handler->wakeup_event(sched, evsel, sample, machine); 1503 1504 return 0; 1505 } 1506 1507 static int process_sched_wakeup_ignore(const struct perf_tool *tool __maybe_unused, 1508 struct evsel *evsel __maybe_unused, 1509 struct perf_sample *sample __maybe_unused, 1510 struct machine *machine __maybe_unused) 1511 { 1512 return 0; 1513 } 1514 1515 union map_priv { 1516 void *ptr; 1517 bool color; 1518 }; 1519 1520 static bool thread__has_color(struct thread *thread) 1521 { 1522 union map_priv priv = { 1523 .ptr = thread__priv(thread), 1524 }; 1525 1526 return priv.color; 1527 } 1528 1529 static struct thread* 1530 map__findnew_thread(struct perf_sched *sched, struct machine *machine, pid_t pid, pid_t tid) 1531 { 1532 struct thread *thread = machine__findnew_thread(machine, pid, tid); 1533 union map_priv priv = { 1534 .color = false, 1535 }; 1536 1537 if (!sched->map.color_pids || !thread || thread__priv(thread)) 1538 return thread; 1539 1540 if (thread_map__has(sched->map.color_pids, tid)) 1541 priv.color = true; 1542 1543 thread__set_priv(thread, priv.ptr); 1544 return thread; 1545 } 1546 1547 static bool sched_match_task(struct perf_sched *sched, const char *comm_str) 1548 { 1549 bool fuzzy_match = sched->map.fuzzy; 1550 struct strlist *task_names = sched->map.task_names; 1551 struct str_node *node; 1552 1553 strlist__for_each_entry(node, task_names) { 1554 bool match_found = fuzzy_match ? !!strstr(comm_str, node->s) : 1555 !strcmp(comm_str, node->s); 1556 if (match_found) 1557 return true; 1558 } 1559 1560 return false; 1561 } 1562 1563 static void print_sched_map(struct perf_sched *sched, struct perf_cpu this_cpu, int cpus_nr, 1564 const char *color, bool sched_out) 1565 { 1566 for (int i = 0; i < cpus_nr; i++) { 1567 struct perf_cpu cpu = { 1568 .cpu = sched->map.comp ? sched->map.comp_cpus[i].cpu : i, 1569 }; 1570 struct thread *curr_thread = sched->curr_thread[cpu.cpu]; 1571 struct thread *curr_out_thread = sched->curr_out_thread[cpu.cpu]; 1572 struct thread_runtime *curr_tr; 1573 const char *pid_color = color; 1574 const char *cpu_color = color; 1575 char symbol = ' '; 1576 struct thread *thread_to_check = sched_out ? curr_out_thread : curr_thread; 1577 1578 if (thread_to_check && thread__has_color(thread_to_check)) 1579 pid_color = COLOR_PIDS; 1580 1581 if (sched->map.color_cpus && perf_cpu_map__has(sched->map.color_cpus, cpu)) 1582 cpu_color = COLOR_CPUS; 1583 1584 if (cpu.cpu == this_cpu.cpu) 1585 symbol = '*'; 1586 1587 color_fprintf(stdout, cpu.cpu != this_cpu.cpu ? color : cpu_color, "%c", symbol); 1588 1589 thread_to_check = sched_out ? sched->curr_out_thread[cpu.cpu] : 1590 sched->curr_thread[cpu.cpu]; 1591 1592 if (thread_to_check) { 1593 curr_tr = thread__get_runtime(thread_to_check); 1594 if (curr_tr == NULL) 1595 return; 1596 1597 if (sched_out) { 1598 if (cpu.cpu == this_cpu.cpu) 1599 color_fprintf(stdout, color, "- "); 1600 else { 1601 curr_tr = thread__get_runtime(sched->curr_thread[cpu.cpu]); 1602 if (curr_tr != NULL) 1603 color_fprintf(stdout, pid_color, "%2s ", 1604 curr_tr->shortname); 1605 } 1606 } else 1607 color_fprintf(stdout, pid_color, "%2s ", curr_tr->shortname); 1608 } else 1609 color_fprintf(stdout, color, " "); 1610 } 1611 } 1612 1613 static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, 1614 struct perf_sample *sample, struct machine *machine) 1615 { 1616 const u32 next_pid = evsel__intval(evsel, sample, "next_pid"); 1617 const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"); 1618 struct thread *sched_in, *sched_out; 1619 struct thread_runtime *tr; 1620 int new_shortname; 1621 u64 timestamp0, timestamp = sample->time; 1622 s64 delta; 1623 struct perf_cpu this_cpu = { 1624 .cpu = sample->cpu, 1625 }; 1626 int cpus_nr; 1627 int proceed; 1628 bool new_cpu = false; 1629 const char *color = PERF_COLOR_NORMAL; 1630 char stimestamp[32]; 1631 const char *str; 1632 1633 BUG_ON(this_cpu.cpu >= MAX_CPUS || this_cpu.cpu < 0); 1634 1635 if (this_cpu.cpu > sched->max_cpu.cpu) 1636 sched->max_cpu = this_cpu; 1637 1638 if (sched->map.comp) { 1639 cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS); 1640 if (!__test_and_set_bit(this_cpu.cpu, sched->map.comp_cpus_mask)) { 1641 sched->map.comp_cpus[cpus_nr++] = this_cpu; 1642 new_cpu = true; 1643 } 1644 } else 1645 cpus_nr = sched->max_cpu.cpu; 1646 1647 timestamp0 = sched->cpu_last_switched[this_cpu.cpu]; 1648 sched->cpu_last_switched[this_cpu.cpu] = timestamp; 1649 if (timestamp0) 1650 delta = timestamp - timestamp0; 1651 else 1652 delta = 0; 1653 1654 if (delta < 0) { 1655 pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1656 return -1; 1657 } 1658 1659 sched_in = map__findnew_thread(sched, machine, -1, next_pid); 1660 sched_out = map__findnew_thread(sched, machine, -1, prev_pid); 1661 if (sched_in == NULL || sched_out == NULL) 1662 return -1; 1663 1664 tr = thread__get_runtime(sched_in); 1665 if (tr == NULL) { 1666 thread__put(sched_in); 1667 return -1; 1668 } 1669 1670 sched->curr_thread[this_cpu.cpu] = thread__get(sched_in); 1671 sched->curr_out_thread[this_cpu.cpu] = thread__get(sched_out); 1672 1673 str = thread__comm_str(sched_in); 1674 new_shortname = 0; 1675 if (!tr->shortname[0]) { 1676 if (!strcmp(thread__comm_str(sched_in), "swapper")) { 1677 /* 1678 * Don't allocate a letter-number for swapper:0 1679 * as a shortname. Instead, we use '.' for it. 1680 */ 1681 tr->shortname[0] = '.'; 1682 tr->shortname[1] = ' '; 1683 } else if (!sched->map.task_name || sched_match_task(sched, str)) { 1684 tr->shortname[0] = sched->next_shortname1; 1685 tr->shortname[1] = sched->next_shortname2; 1686 1687 if (sched->next_shortname1 < 'Z') { 1688 sched->next_shortname1++; 1689 } else { 1690 sched->next_shortname1 = 'A'; 1691 if (sched->next_shortname2 < '9') 1692 sched->next_shortname2++; 1693 else 1694 sched->next_shortname2 = '0'; 1695 } 1696 } else { 1697 tr->shortname[0] = '-'; 1698 tr->shortname[1] = ' '; 1699 } 1700 new_shortname = 1; 1701 } 1702 1703 if (sched->map.cpus && !perf_cpu_map__has(sched->map.cpus, this_cpu)) 1704 goto out; 1705 1706 proceed = 0; 1707 str = thread__comm_str(sched_in); 1708 /* 1709 * Check which of sched_in and sched_out matches the passed --task-name 1710 * arguments and call the corresponding print_sched_map. 1711 */ 1712 if (sched->map.task_name && !sched_match_task(sched, str)) { 1713 if (!sched_match_task(sched, thread__comm_str(sched_out))) 1714 goto out; 1715 else 1716 goto sched_out; 1717 1718 } else { 1719 str = thread__comm_str(sched_out); 1720 if (!(sched->map.task_name && !sched_match_task(sched, str))) 1721 proceed = 1; 1722 } 1723 1724 printf(" "); 1725 1726 print_sched_map(sched, this_cpu, cpus_nr, color, false); 1727 1728 timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp)); 1729 color_fprintf(stdout, color, " %12s secs ", stimestamp); 1730 if (new_shortname || tr->comm_changed || (verbose > 0 && thread__tid(sched_in))) { 1731 const char *pid_color = color; 1732 1733 if (thread__has_color(sched_in)) 1734 pid_color = COLOR_PIDS; 1735 1736 color_fprintf(stdout, pid_color, "%s => %s:%d", 1737 tr->shortname, thread__comm_str(sched_in), thread__tid(sched_in)); 1738 tr->comm_changed = false; 1739 } 1740 1741 if (sched->map.comp && new_cpu) 1742 color_fprintf(stdout, color, " (CPU %d)", this_cpu); 1743 1744 if (proceed != 1) { 1745 color_fprintf(stdout, color, "\n"); 1746 goto out; 1747 } 1748 1749 sched_out: 1750 if (sched->map.task_name) { 1751 tr = thread__get_runtime(sched->curr_out_thread[this_cpu.cpu]); 1752 if (strcmp(tr->shortname, "") == 0) 1753 goto out; 1754 1755 if (proceed == 1) 1756 color_fprintf(stdout, color, "\n"); 1757 1758 printf(" "); 1759 print_sched_map(sched, this_cpu, cpus_nr, color, true); 1760 timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp)); 1761 color_fprintf(stdout, color, " %12s secs ", stimestamp); 1762 } 1763 1764 color_fprintf(stdout, color, "\n"); 1765 1766 out: 1767 if (sched->map.task_name) 1768 thread__put(sched_out); 1769 1770 thread__put(sched_in); 1771 1772 return 0; 1773 } 1774 1775 static int process_sched_switch_event(const struct perf_tool *tool, 1776 struct evsel *evsel, 1777 struct perf_sample *sample, 1778 struct machine *machine) 1779 { 1780 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1781 int this_cpu = sample->cpu, err = 0; 1782 u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"), 1783 next_pid = evsel__intval(evsel, sample, "next_pid"); 1784 1785 if (sched->curr_pid[this_cpu] != (u32)-1) { 1786 /* 1787 * Are we trying to switch away a PID that is 1788 * not current? 1789 */ 1790 if (sched->curr_pid[this_cpu] != prev_pid) 1791 sched->nr_context_switch_bugs++; 1792 } 1793 1794 if (sched->tp_handler->switch_event) 1795 err = sched->tp_handler->switch_event(sched, evsel, sample, machine); 1796 1797 sched->curr_pid[this_cpu] = next_pid; 1798 return err; 1799 } 1800 1801 static int process_sched_runtime_event(const struct perf_tool *tool, 1802 struct evsel *evsel, 1803 struct perf_sample *sample, 1804 struct machine *machine) 1805 { 1806 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1807 1808 if (sched->tp_handler->runtime_event) 1809 return sched->tp_handler->runtime_event(sched, evsel, sample, machine); 1810 1811 return 0; 1812 } 1813 1814 static int perf_sched__process_fork_event(const struct perf_tool *tool, 1815 union perf_event *event, 1816 struct perf_sample *sample, 1817 struct machine *machine) 1818 { 1819 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1820 1821 /* run the fork event through the perf machinery */ 1822 perf_event__process_fork(tool, event, sample, machine); 1823 1824 /* and then run additional processing needed for this command */ 1825 if (sched->tp_handler->fork_event) 1826 return sched->tp_handler->fork_event(sched, event, machine); 1827 1828 return 0; 1829 } 1830 1831 static int process_sched_migrate_task_event(const struct perf_tool *tool, 1832 struct evsel *evsel, 1833 struct perf_sample *sample, 1834 struct machine *machine) 1835 { 1836 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1837 1838 if (sched->tp_handler->migrate_task_event) 1839 return sched->tp_handler->migrate_task_event(sched, evsel, sample, machine); 1840 1841 return 0; 1842 } 1843 1844 typedef int (*tracepoint_handler)(const struct perf_tool *tool, 1845 struct evsel *evsel, 1846 struct perf_sample *sample, 1847 struct machine *machine); 1848 1849 static int perf_sched__process_tracepoint_sample(const struct perf_tool *tool __maybe_unused, 1850 union perf_event *event __maybe_unused, 1851 struct perf_sample *sample, 1852 struct evsel *evsel, 1853 struct machine *machine) 1854 { 1855 int err = 0; 1856 1857 if (evsel->handler != NULL) { 1858 tracepoint_handler f = evsel->handler; 1859 err = f(tool, evsel, sample, machine); 1860 } 1861 1862 return err; 1863 } 1864 1865 static int perf_sched__process_comm(const struct perf_tool *tool __maybe_unused, 1866 union perf_event *event, 1867 struct perf_sample *sample, 1868 struct machine *machine) 1869 { 1870 struct thread *thread; 1871 struct thread_runtime *tr; 1872 int err; 1873 1874 err = perf_event__process_comm(tool, event, sample, machine); 1875 if (err) 1876 return err; 1877 1878 thread = machine__find_thread(machine, sample->pid, sample->tid); 1879 if (!thread) { 1880 pr_err("Internal error: can't find thread\n"); 1881 return -1; 1882 } 1883 1884 tr = thread__get_runtime(thread); 1885 if (tr == NULL) { 1886 thread__put(thread); 1887 return -1; 1888 } 1889 1890 tr->comm_changed = true; 1891 thread__put(thread); 1892 1893 return 0; 1894 } 1895 1896 static int perf_sched__read_events(struct perf_sched *sched) 1897 { 1898 struct evsel_str_handler handlers[] = { 1899 { "sched:sched_switch", process_sched_switch_event, }, 1900 { "sched:sched_stat_runtime", process_sched_runtime_event, }, 1901 { "sched:sched_wakeup", process_sched_wakeup_event, }, 1902 { "sched:sched_waking", process_sched_wakeup_event, }, 1903 { "sched:sched_wakeup_new", process_sched_wakeup_event, }, 1904 { "sched:sched_migrate_task", process_sched_migrate_task_event, }, 1905 }; 1906 struct perf_session *session; 1907 struct perf_data data = { 1908 .path = input_name, 1909 .mode = PERF_DATA_MODE_READ, 1910 .force = sched->force, 1911 }; 1912 int rc = -1; 1913 1914 session = perf_session__new(&data, &sched->tool); 1915 if (IS_ERR(session)) { 1916 pr_debug("Error creating perf session"); 1917 return PTR_ERR(session); 1918 } 1919 1920 symbol__init(&session->header.env); 1921 1922 /* prefer sched_waking if it is captured */ 1923 if (evlist__find_tracepoint_by_name(session->evlist, "sched:sched_waking")) 1924 handlers[2].handler = process_sched_wakeup_ignore; 1925 1926 if (perf_session__set_tracepoints_handlers(session, handlers)) 1927 goto out_delete; 1928 1929 if (perf_session__has_traces(session, "record -R")) { 1930 int err = perf_session__process_events(session); 1931 if (err) { 1932 pr_err("Failed to process events, error %d", err); 1933 goto out_delete; 1934 } 1935 1936 sched->nr_events = session->evlist->stats.nr_events[0]; 1937 sched->nr_lost_events = session->evlist->stats.total_lost; 1938 sched->nr_lost_chunks = session->evlist->stats.nr_events[PERF_RECORD_LOST]; 1939 } 1940 1941 rc = 0; 1942 out_delete: 1943 perf_session__delete(session); 1944 return rc; 1945 } 1946 1947 /* 1948 * scheduling times are printed as msec.usec 1949 */ 1950 static inline void print_sched_time(unsigned long long nsecs, int width) 1951 { 1952 unsigned long msecs; 1953 unsigned long usecs; 1954 1955 msecs = nsecs / NSEC_PER_MSEC; 1956 nsecs -= msecs * NSEC_PER_MSEC; 1957 usecs = nsecs / NSEC_PER_USEC; 1958 printf("%*lu.%03lu ", width, msecs, usecs); 1959 } 1960 1961 /* 1962 * returns runtime data for event, allocating memory for it the 1963 * first time it is used. 1964 */ 1965 static struct evsel_runtime *evsel__get_runtime(struct evsel *evsel) 1966 { 1967 struct evsel_runtime *r = evsel->priv; 1968 1969 if (r == NULL) { 1970 r = zalloc(sizeof(struct evsel_runtime)); 1971 evsel->priv = r; 1972 } 1973 1974 return r; 1975 } 1976 1977 /* 1978 * save last time event was seen per cpu 1979 */ 1980 static void evsel__save_time(struct evsel *evsel, u64 timestamp, u32 cpu) 1981 { 1982 struct evsel_runtime *r = evsel__get_runtime(evsel); 1983 1984 if (r == NULL) 1985 return; 1986 1987 if ((cpu >= r->ncpu) || (r->last_time == NULL)) { 1988 int i, n = __roundup_pow_of_two(cpu+1); 1989 void *p = r->last_time; 1990 1991 p = realloc(r->last_time, n * sizeof(u64)); 1992 if (!p) 1993 return; 1994 1995 r->last_time = p; 1996 for (i = r->ncpu; i < n; ++i) 1997 r->last_time[i] = (u64) 0; 1998 1999 r->ncpu = n; 2000 } 2001 2002 r->last_time[cpu] = timestamp; 2003 } 2004 2005 /* returns last time this event was seen on the given cpu */ 2006 static u64 evsel__get_time(struct evsel *evsel, u32 cpu) 2007 { 2008 struct evsel_runtime *r = evsel__get_runtime(evsel); 2009 2010 if ((r == NULL) || (r->last_time == NULL) || (cpu >= r->ncpu)) 2011 return 0; 2012 2013 return r->last_time[cpu]; 2014 } 2015 2016 static int comm_width = 30; 2017 2018 static char *timehist_get_commstr(struct thread *thread) 2019 { 2020 static char str[32]; 2021 const char *comm = thread__comm_str(thread); 2022 pid_t tid = thread__tid(thread); 2023 pid_t pid = thread__pid(thread); 2024 int n; 2025 2026 if (pid == 0) 2027 n = scnprintf(str, sizeof(str), "%s", comm); 2028 2029 else if (tid != pid) 2030 n = scnprintf(str, sizeof(str), "%s[%d/%d]", comm, tid, pid); 2031 2032 else 2033 n = scnprintf(str, sizeof(str), "%s[%d]", comm, tid); 2034 2035 if (n > comm_width) 2036 comm_width = n; 2037 2038 return str; 2039 } 2040 2041 /* prio field format: xxx or xxx->yyy */ 2042 #define MAX_PRIO_STR_LEN 8 2043 static char *timehist_get_priostr(struct evsel *evsel, 2044 struct thread *thread, 2045 struct perf_sample *sample) 2046 { 2047 static char prio_str[16]; 2048 int prev_prio = (int)evsel__intval(evsel, sample, "prev_prio"); 2049 struct thread_runtime *tr = thread__priv(thread); 2050 2051 if (tr->prio != prev_prio && tr->prio != -1) 2052 scnprintf(prio_str, sizeof(prio_str), "%d->%d", tr->prio, prev_prio); 2053 else 2054 scnprintf(prio_str, sizeof(prio_str), "%d", prev_prio); 2055 2056 return prio_str; 2057 } 2058 2059 static void timehist_header(struct perf_sched *sched) 2060 { 2061 u32 ncpus = sched->max_cpu.cpu + 1; 2062 u32 i, j; 2063 2064 printf("%15s %6s ", "time", "cpu"); 2065 2066 if (sched->show_cpu_visual) { 2067 printf(" "); 2068 for (i = 0, j = 0; i < ncpus; ++i) { 2069 printf("%x", j++); 2070 if (j > 15) 2071 j = 0; 2072 } 2073 printf(" "); 2074 } 2075 2076 if (sched->show_prio) { 2077 printf(" %-*s %-*s %9s %9s %9s", 2078 comm_width, "task name", MAX_PRIO_STR_LEN, "prio", 2079 "wait time", "sch delay", "run time"); 2080 } else { 2081 printf(" %-*s %9s %9s %9s", comm_width, 2082 "task name", "wait time", "sch delay", "run time"); 2083 } 2084 2085 if (sched->show_state) 2086 printf(" %s", "state"); 2087 2088 printf("\n"); 2089 2090 /* 2091 * units row 2092 */ 2093 printf("%15s %-6s ", "", ""); 2094 2095 if (sched->show_cpu_visual) 2096 printf(" %*s ", ncpus, ""); 2097 2098 if (sched->show_prio) { 2099 printf(" %-*s %-*s %9s %9s %9s", 2100 comm_width, "[tid/pid]", MAX_PRIO_STR_LEN, "", 2101 "(msec)", "(msec)", "(msec)"); 2102 } else { 2103 printf(" %-*s %9s %9s %9s", comm_width, 2104 "[tid/pid]", "(msec)", "(msec)", "(msec)"); 2105 } 2106 2107 if (sched->show_state) 2108 printf(" %5s", ""); 2109 2110 printf("\n"); 2111 2112 /* 2113 * separator 2114 */ 2115 printf("%.15s %.6s ", graph_dotted_line, graph_dotted_line); 2116 2117 if (sched->show_cpu_visual) 2118 printf(" %.*s ", ncpus, graph_dotted_line); 2119 2120 if (sched->show_prio) { 2121 printf(" %.*s %.*s %.9s %.9s %.9s", 2122 comm_width, graph_dotted_line, MAX_PRIO_STR_LEN, graph_dotted_line, 2123 graph_dotted_line, graph_dotted_line, graph_dotted_line); 2124 } else { 2125 printf(" %.*s %.9s %.9s %.9s", comm_width, 2126 graph_dotted_line, graph_dotted_line, graph_dotted_line, 2127 graph_dotted_line); 2128 } 2129 2130 if (sched->show_state) 2131 printf(" %.5s", graph_dotted_line); 2132 2133 printf("\n"); 2134 } 2135 2136 static void timehist_print_sample(struct perf_sched *sched, 2137 struct evsel *evsel, 2138 struct perf_sample *sample, 2139 struct addr_location *al, 2140 struct thread *thread, 2141 u64 t, const char state) 2142 { 2143 struct thread_runtime *tr = thread__priv(thread); 2144 const char *next_comm = evsel__strval(evsel, sample, "next_comm"); 2145 const u32 next_pid = evsel__intval(evsel, sample, "next_pid"); 2146 u32 max_cpus = sched->max_cpu.cpu + 1; 2147 char tstr[64]; 2148 char nstr[30]; 2149 u64 wait_time; 2150 2151 if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) 2152 return; 2153 2154 timestamp__scnprintf_usec(t, tstr, sizeof(tstr)); 2155 printf("%15s [%04d] ", tstr, sample->cpu); 2156 2157 if (sched->show_cpu_visual) { 2158 u32 i; 2159 char c; 2160 2161 printf(" "); 2162 for (i = 0; i < max_cpus; ++i) { 2163 /* flag idle times with 'i'; others are sched events */ 2164 if (i == sample->cpu) 2165 c = (thread__tid(thread) == 0) ? 'i' : 's'; 2166 else 2167 c = ' '; 2168 printf("%c", c); 2169 } 2170 printf(" "); 2171 } 2172 2173 printf(" %-*s ", comm_width, timehist_get_commstr(thread)); 2174 2175 if (sched->show_prio) 2176 printf(" %-*s ", MAX_PRIO_STR_LEN, timehist_get_priostr(evsel, thread, sample)); 2177 2178 wait_time = tr->dt_sleep + tr->dt_iowait + tr->dt_preempt; 2179 print_sched_time(wait_time, 6); 2180 2181 print_sched_time(tr->dt_delay, 6); 2182 print_sched_time(tr->dt_run, 6); 2183 2184 if (sched->show_state) 2185 printf(" %5c ", thread__tid(thread) == 0 ? 'I' : state); 2186 2187 if (sched->show_next) { 2188 snprintf(nstr, sizeof(nstr), "next: %s[%d]", next_comm, next_pid); 2189 printf(" %-*s", comm_width, nstr); 2190 } 2191 2192 if (sched->show_wakeups && !sched->show_next) 2193 printf(" %-*s", comm_width, ""); 2194 2195 if (thread__tid(thread) == 0) 2196 goto out; 2197 2198 if (sched->show_callchain) 2199 printf(" "); 2200 2201 sample__fprintf_sym(sample, al, 0, 2202 EVSEL__PRINT_SYM | EVSEL__PRINT_ONELINE | 2203 EVSEL__PRINT_CALLCHAIN_ARROW | 2204 EVSEL__PRINT_SKIP_IGNORED, 2205 get_tls_callchain_cursor(), symbol_conf.bt_stop_list, stdout); 2206 2207 out: 2208 printf("\n"); 2209 } 2210 2211 /* 2212 * Explanation of delta-time stats: 2213 * 2214 * t = time of current schedule out event 2215 * tprev = time of previous sched out event 2216 * also time of schedule-in event for current task 2217 * last_time = time of last sched change event for current task 2218 * (i.e, time process was last scheduled out) 2219 * ready_to_run = time of wakeup for current task 2220 * 2221 * -----|------------|------------|------------|------ 2222 * last ready tprev t 2223 * time to run 2224 * 2225 * |-------- dt_wait --------| 2226 * |- dt_delay -|-- dt_run --| 2227 * 2228 * dt_run = run time of current task 2229 * dt_wait = time between last schedule out event for task and tprev 2230 * represents time spent off the cpu 2231 * dt_delay = time between wakeup and schedule-in of task 2232 */ 2233 2234 static void timehist_update_runtime_stats(struct thread_runtime *r, 2235 u64 t, u64 tprev) 2236 { 2237 r->dt_delay = 0; 2238 r->dt_sleep = 0; 2239 r->dt_iowait = 0; 2240 r->dt_preempt = 0; 2241 r->dt_run = 0; 2242 2243 if (tprev) { 2244 r->dt_run = t - tprev; 2245 if (r->ready_to_run) { 2246 if (r->ready_to_run > tprev) 2247 pr_debug("time travel: wakeup time for task > previous sched_switch event\n"); 2248 else 2249 r->dt_delay = tprev - r->ready_to_run; 2250 } 2251 2252 if (r->last_time > tprev) 2253 pr_debug("time travel: last sched out time for task > previous sched_switch event\n"); 2254 else if (r->last_time) { 2255 u64 dt_wait = tprev - r->last_time; 2256 2257 if (r->last_state == 'R') 2258 r->dt_preempt = dt_wait; 2259 else if (r->last_state == 'D') 2260 r->dt_iowait = dt_wait; 2261 else 2262 r->dt_sleep = dt_wait; 2263 } 2264 } 2265 2266 update_stats(&r->run_stats, r->dt_run); 2267 2268 r->total_run_time += r->dt_run; 2269 r->total_delay_time += r->dt_delay; 2270 r->total_sleep_time += r->dt_sleep; 2271 r->total_iowait_time += r->dt_iowait; 2272 r->total_preempt_time += r->dt_preempt; 2273 } 2274 2275 static bool is_idle_sample(struct perf_sample *sample, 2276 struct evsel *evsel) 2277 { 2278 /* pid 0 == swapper == idle task */ 2279 if (evsel__name_is(evsel, "sched:sched_switch")) 2280 return evsel__intval(evsel, sample, "prev_pid") == 0; 2281 2282 return sample->pid == 0; 2283 } 2284 2285 static void save_task_callchain(struct perf_sched *sched, 2286 struct perf_sample *sample, 2287 struct evsel *evsel, 2288 struct machine *machine) 2289 { 2290 struct callchain_cursor *cursor; 2291 struct thread *thread; 2292 2293 /* want main thread for process - has maps */ 2294 thread = machine__findnew_thread(machine, sample->pid, sample->pid); 2295 if (thread == NULL) { 2296 pr_debug("Failed to get thread for pid %d.\n", sample->pid); 2297 return; 2298 } 2299 2300 if (!sched->show_callchain || sample->callchain == NULL) 2301 return; 2302 2303 cursor = get_tls_callchain_cursor(); 2304 2305 if (thread__resolve_callchain(thread, cursor, evsel, sample, 2306 NULL, NULL, sched->max_stack + 2) != 0) { 2307 if (verbose > 0) 2308 pr_err("Failed to resolve callchain. Skipping\n"); 2309 2310 return; 2311 } 2312 2313 callchain_cursor_commit(cursor); 2314 2315 while (true) { 2316 struct callchain_cursor_node *node; 2317 struct symbol *sym; 2318 2319 node = callchain_cursor_current(cursor); 2320 if (node == NULL) 2321 break; 2322 2323 sym = node->ms.sym; 2324 if (sym) { 2325 if (!strcmp(sym->name, "schedule") || 2326 !strcmp(sym->name, "__schedule") || 2327 !strcmp(sym->name, "preempt_schedule")) 2328 sym->ignore = 1; 2329 } 2330 2331 callchain_cursor_advance(cursor); 2332 } 2333 } 2334 2335 static int init_idle_thread(struct thread *thread) 2336 { 2337 struct idle_thread_runtime *itr; 2338 2339 thread__set_comm(thread, idle_comm, 0); 2340 2341 itr = zalloc(sizeof(*itr)); 2342 if (itr == NULL) 2343 return -ENOMEM; 2344 2345 init_prio(&itr->tr); 2346 init_stats(&itr->tr.run_stats); 2347 callchain_init(&itr->callchain); 2348 callchain_cursor_reset(&itr->cursor); 2349 thread__set_priv(thread, itr); 2350 2351 return 0; 2352 } 2353 2354 /* 2355 * Track idle stats per cpu by maintaining a local thread 2356 * struct for the idle task on each cpu. 2357 */ 2358 static int init_idle_threads(int ncpu) 2359 { 2360 int i, ret; 2361 2362 idle_threads = zalloc(ncpu * sizeof(struct thread *)); 2363 if (!idle_threads) 2364 return -ENOMEM; 2365 2366 idle_max_cpu = ncpu; 2367 2368 /* allocate the actual thread struct if needed */ 2369 for (i = 0; i < ncpu; ++i) { 2370 idle_threads[i] = thread__new(0, 0); 2371 if (idle_threads[i] == NULL) 2372 return -ENOMEM; 2373 2374 ret = init_idle_thread(idle_threads[i]); 2375 if (ret < 0) 2376 return ret; 2377 } 2378 2379 return 0; 2380 } 2381 2382 static void free_idle_threads(void) 2383 { 2384 int i; 2385 2386 if (idle_threads == NULL) 2387 return; 2388 2389 for (i = 0; i < idle_max_cpu; ++i) { 2390 if ((idle_threads[i])) 2391 thread__delete(idle_threads[i]); 2392 } 2393 2394 free(idle_threads); 2395 } 2396 2397 static struct thread *get_idle_thread(int cpu) 2398 { 2399 /* 2400 * expand/allocate array of pointers to local thread 2401 * structs if needed 2402 */ 2403 if ((cpu >= idle_max_cpu) || (idle_threads == NULL)) { 2404 int i, j = __roundup_pow_of_two(cpu+1); 2405 void *p; 2406 2407 p = realloc(idle_threads, j * sizeof(struct thread *)); 2408 if (!p) 2409 return NULL; 2410 2411 idle_threads = (struct thread **) p; 2412 for (i = idle_max_cpu; i < j; ++i) 2413 idle_threads[i] = NULL; 2414 2415 idle_max_cpu = j; 2416 } 2417 2418 /* allocate a new thread struct if needed */ 2419 if (idle_threads[cpu] == NULL) { 2420 idle_threads[cpu] = thread__new(0, 0); 2421 2422 if (idle_threads[cpu]) { 2423 if (init_idle_thread(idle_threads[cpu]) < 0) 2424 return NULL; 2425 } 2426 } 2427 2428 return idle_threads[cpu]; 2429 } 2430 2431 static void save_idle_callchain(struct perf_sched *sched, 2432 struct idle_thread_runtime *itr, 2433 struct perf_sample *sample) 2434 { 2435 struct callchain_cursor *cursor; 2436 2437 if (!sched->show_callchain || sample->callchain == NULL) 2438 return; 2439 2440 cursor = get_tls_callchain_cursor(); 2441 if (cursor == NULL) 2442 return; 2443 2444 callchain_cursor__copy(&itr->cursor, cursor); 2445 } 2446 2447 static struct thread *timehist_get_thread(struct perf_sched *sched, 2448 struct perf_sample *sample, 2449 struct machine *machine, 2450 struct evsel *evsel) 2451 { 2452 struct thread *thread; 2453 2454 if (is_idle_sample(sample, evsel)) { 2455 thread = get_idle_thread(sample->cpu); 2456 if (thread == NULL) 2457 pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu); 2458 2459 } else { 2460 /* there were samples with tid 0 but non-zero pid */ 2461 thread = machine__findnew_thread(machine, sample->pid, 2462 sample->tid ?: sample->pid); 2463 if (thread == NULL) { 2464 pr_debug("Failed to get thread for tid %d. skipping sample.\n", 2465 sample->tid); 2466 } 2467 2468 save_task_callchain(sched, sample, evsel, machine); 2469 if (sched->idle_hist) { 2470 struct thread *idle; 2471 struct idle_thread_runtime *itr; 2472 2473 idle = get_idle_thread(sample->cpu); 2474 if (idle == NULL) { 2475 pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu); 2476 return NULL; 2477 } 2478 2479 itr = thread__priv(idle); 2480 if (itr == NULL) 2481 return NULL; 2482 2483 itr->last_thread = thread; 2484 2485 /* copy task callchain when entering to idle */ 2486 if (evsel__intval(evsel, sample, "next_pid") == 0) 2487 save_idle_callchain(sched, itr, sample); 2488 } 2489 } 2490 2491 return thread; 2492 } 2493 2494 static bool timehist_skip_sample(struct perf_sched *sched, 2495 struct thread *thread, 2496 struct evsel *evsel, 2497 struct perf_sample *sample) 2498 { 2499 bool rc = false; 2500 int prio = -1; 2501 struct thread_runtime *tr = NULL; 2502 2503 if (thread__is_filtered(thread)) { 2504 rc = true; 2505 sched->skipped_samples++; 2506 } 2507 2508 if (sched->prio_str) { 2509 /* 2510 * Because priority may be changed during task execution, 2511 * first read priority from prev sched_in event for current task. 2512 * If prev sched_in event is not saved, then read priority from 2513 * current task sched_out event. 2514 */ 2515 tr = thread__get_runtime(thread); 2516 if (tr && tr->prio != -1) 2517 prio = tr->prio; 2518 else if (evsel__name_is(evsel, "sched:sched_switch")) 2519 prio = evsel__intval(evsel, sample, "prev_prio"); 2520 2521 if (prio != -1 && !test_bit(prio, sched->prio_bitmap)) { 2522 rc = true; 2523 sched->skipped_samples++; 2524 } 2525 } 2526 2527 if (sched->idle_hist) { 2528 if (!evsel__name_is(evsel, "sched:sched_switch")) 2529 rc = true; 2530 else if (evsel__intval(evsel, sample, "prev_pid") != 0 && 2531 evsel__intval(evsel, sample, "next_pid") != 0) 2532 rc = true; 2533 } 2534 2535 return rc; 2536 } 2537 2538 static void timehist_print_wakeup_event(struct perf_sched *sched, 2539 struct evsel *evsel, 2540 struct perf_sample *sample, 2541 struct machine *machine, 2542 struct thread *awakened) 2543 { 2544 struct thread *thread; 2545 char tstr[64]; 2546 2547 thread = machine__findnew_thread(machine, sample->pid, sample->tid); 2548 if (thread == NULL) 2549 return; 2550 2551 /* show wakeup unless both awakee and awaker are filtered */ 2552 if (timehist_skip_sample(sched, thread, evsel, sample) && 2553 timehist_skip_sample(sched, awakened, evsel, sample)) { 2554 return; 2555 } 2556 2557 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); 2558 printf("%15s [%04d] ", tstr, sample->cpu); 2559 if (sched->show_cpu_visual) 2560 printf(" %*s ", sched->max_cpu.cpu + 1, ""); 2561 2562 printf(" %-*s ", comm_width, timehist_get_commstr(thread)); 2563 2564 /* dt spacer */ 2565 printf(" %9s %9s %9s ", "", "", ""); 2566 2567 printf("awakened: %s", timehist_get_commstr(awakened)); 2568 2569 printf("\n"); 2570 } 2571 2572 static int timehist_sched_wakeup_ignore(const struct perf_tool *tool __maybe_unused, 2573 union perf_event *event __maybe_unused, 2574 struct evsel *evsel __maybe_unused, 2575 struct perf_sample *sample __maybe_unused, 2576 struct machine *machine __maybe_unused) 2577 { 2578 return 0; 2579 } 2580 2581 static int timehist_sched_wakeup_event(const struct perf_tool *tool, 2582 union perf_event *event __maybe_unused, 2583 struct evsel *evsel, 2584 struct perf_sample *sample, 2585 struct machine *machine) 2586 { 2587 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 2588 struct thread *thread; 2589 struct thread_runtime *tr = NULL; 2590 /* want pid of awakened task not pid in sample */ 2591 const u32 pid = evsel__intval(evsel, sample, "pid"); 2592 2593 thread = machine__findnew_thread(machine, 0, pid); 2594 if (thread == NULL) 2595 return -1; 2596 2597 tr = thread__get_runtime(thread); 2598 if (tr == NULL) 2599 return -1; 2600 2601 if (tr->ready_to_run == 0) 2602 tr->ready_to_run = sample->time; 2603 2604 /* show wakeups if requested */ 2605 if (sched->show_wakeups && 2606 !perf_time__skip_sample(&sched->ptime, sample->time)) 2607 timehist_print_wakeup_event(sched, evsel, sample, machine, thread); 2608 2609 return 0; 2610 } 2611 2612 static void timehist_print_migration_event(struct perf_sched *sched, 2613 struct evsel *evsel, 2614 struct perf_sample *sample, 2615 struct machine *machine, 2616 struct thread *migrated) 2617 { 2618 struct thread *thread; 2619 char tstr[64]; 2620 u32 max_cpus; 2621 u32 ocpu, dcpu; 2622 2623 if (sched->summary_only) 2624 return; 2625 2626 max_cpus = sched->max_cpu.cpu + 1; 2627 ocpu = evsel__intval(evsel, sample, "orig_cpu"); 2628 dcpu = evsel__intval(evsel, sample, "dest_cpu"); 2629 2630 thread = machine__findnew_thread(machine, sample->pid, sample->tid); 2631 if (thread == NULL) 2632 return; 2633 2634 if (timehist_skip_sample(sched, thread, evsel, sample) && 2635 timehist_skip_sample(sched, migrated, evsel, sample)) { 2636 return; 2637 } 2638 2639 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); 2640 printf("%15s [%04d] ", tstr, sample->cpu); 2641 2642 if (sched->show_cpu_visual) { 2643 u32 i; 2644 char c; 2645 2646 printf(" "); 2647 for (i = 0; i < max_cpus; ++i) { 2648 c = (i == sample->cpu) ? 'm' : ' '; 2649 printf("%c", c); 2650 } 2651 printf(" "); 2652 } 2653 2654 printf(" %-*s ", comm_width, timehist_get_commstr(thread)); 2655 2656 /* dt spacer */ 2657 printf(" %9s %9s %9s ", "", "", ""); 2658 2659 printf("migrated: %s", timehist_get_commstr(migrated)); 2660 printf(" cpu %d => %d", ocpu, dcpu); 2661 2662 printf("\n"); 2663 } 2664 2665 static int timehist_migrate_task_event(const struct perf_tool *tool, 2666 union perf_event *event __maybe_unused, 2667 struct evsel *evsel, 2668 struct perf_sample *sample, 2669 struct machine *machine) 2670 { 2671 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 2672 struct thread *thread; 2673 struct thread_runtime *tr = NULL; 2674 /* want pid of migrated task not pid in sample */ 2675 const u32 pid = evsel__intval(evsel, sample, "pid"); 2676 2677 thread = machine__findnew_thread(machine, 0, pid); 2678 if (thread == NULL) 2679 return -1; 2680 2681 tr = thread__get_runtime(thread); 2682 if (tr == NULL) 2683 return -1; 2684 2685 tr->migrations++; 2686 2687 /* show migrations if requested */ 2688 timehist_print_migration_event(sched, evsel, sample, machine, thread); 2689 2690 return 0; 2691 } 2692 2693 static void timehist_update_task_prio(struct evsel *evsel, 2694 struct perf_sample *sample, 2695 struct machine *machine) 2696 { 2697 struct thread *thread; 2698 struct thread_runtime *tr = NULL; 2699 const u32 next_pid = evsel__intval(evsel, sample, "next_pid"); 2700 const u32 next_prio = evsel__intval(evsel, sample, "next_prio"); 2701 2702 if (next_pid == 0) 2703 thread = get_idle_thread(sample->cpu); 2704 else 2705 thread = machine__findnew_thread(machine, -1, next_pid); 2706 2707 if (thread == NULL) 2708 return; 2709 2710 tr = thread__get_runtime(thread); 2711 if (tr == NULL) 2712 return; 2713 2714 tr->prio = next_prio; 2715 } 2716 2717 static int timehist_sched_change_event(const struct perf_tool *tool, 2718 union perf_event *event, 2719 struct evsel *evsel, 2720 struct perf_sample *sample, 2721 struct machine *machine) 2722 { 2723 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 2724 struct perf_time_interval *ptime = &sched->ptime; 2725 struct addr_location al; 2726 struct thread *thread; 2727 struct thread_runtime *tr = NULL; 2728 u64 tprev, t = sample->time; 2729 int rc = 0; 2730 const char state = evsel__taskstate(evsel, sample, "prev_state"); 2731 2732 addr_location__init(&al); 2733 if (machine__resolve(machine, &al, sample) < 0) { 2734 pr_err("problem processing %d event. skipping it\n", 2735 event->header.type); 2736 rc = -1; 2737 goto out; 2738 } 2739 2740 if (sched->show_prio || sched->prio_str) 2741 timehist_update_task_prio(evsel, sample, machine); 2742 2743 thread = timehist_get_thread(sched, sample, machine, evsel); 2744 if (thread == NULL) { 2745 rc = -1; 2746 goto out; 2747 } 2748 2749 if (timehist_skip_sample(sched, thread, evsel, sample)) 2750 goto out; 2751 2752 tr = thread__get_runtime(thread); 2753 if (tr == NULL) { 2754 rc = -1; 2755 goto out; 2756 } 2757 2758 tprev = evsel__get_time(evsel, sample->cpu); 2759 2760 /* 2761 * If start time given: 2762 * - sample time is under window user cares about - skip sample 2763 * - tprev is under window user cares about - reset to start of window 2764 */ 2765 if (ptime->start && ptime->start > t) 2766 goto out; 2767 2768 if (tprev && ptime->start > tprev) 2769 tprev = ptime->start; 2770 2771 /* 2772 * If end time given: 2773 * - previous sched event is out of window - we are done 2774 * - sample time is beyond window user cares about - reset it 2775 * to close out stats for time window interest 2776 * - If tprev is 0, that is, sched_in event for current task is 2777 * not recorded, cannot determine whether sched_in event is 2778 * within time window interest - ignore it 2779 */ 2780 if (ptime->end) { 2781 if (!tprev || tprev > ptime->end) 2782 goto out; 2783 2784 if (t > ptime->end) 2785 t = ptime->end; 2786 } 2787 2788 if (!sched->idle_hist || thread__tid(thread) == 0) { 2789 if (!cpu_list || test_bit(sample->cpu, cpu_bitmap)) 2790 timehist_update_runtime_stats(tr, t, tprev); 2791 2792 if (sched->idle_hist) { 2793 struct idle_thread_runtime *itr = (void *)tr; 2794 struct thread_runtime *last_tr; 2795 2796 if (itr->last_thread == NULL) 2797 goto out; 2798 2799 /* add current idle time as last thread's runtime */ 2800 last_tr = thread__get_runtime(itr->last_thread); 2801 if (last_tr == NULL) 2802 goto out; 2803 2804 timehist_update_runtime_stats(last_tr, t, tprev); 2805 /* 2806 * remove delta time of last thread as it's not updated 2807 * and otherwise it will show an invalid value next 2808 * time. we only care total run time and run stat. 2809 */ 2810 last_tr->dt_run = 0; 2811 last_tr->dt_delay = 0; 2812 last_tr->dt_sleep = 0; 2813 last_tr->dt_iowait = 0; 2814 last_tr->dt_preempt = 0; 2815 2816 if (itr->cursor.nr) 2817 callchain_append(&itr->callchain, &itr->cursor, t - tprev); 2818 2819 itr->last_thread = NULL; 2820 } 2821 2822 if (!sched->summary_only) 2823 timehist_print_sample(sched, evsel, sample, &al, thread, t, state); 2824 } 2825 2826 out: 2827 if (sched->hist_time.start == 0 && t >= ptime->start) 2828 sched->hist_time.start = t; 2829 if (ptime->end == 0 || t <= ptime->end) 2830 sched->hist_time.end = t; 2831 2832 if (tr) { 2833 /* time of this sched_switch event becomes last time task seen */ 2834 tr->last_time = sample->time; 2835 2836 /* last state is used to determine where to account wait time */ 2837 tr->last_state = state; 2838 2839 /* sched out event for task so reset ready to run time */ 2840 if (state == 'R') 2841 tr->ready_to_run = t; 2842 else 2843 tr->ready_to_run = 0; 2844 } 2845 2846 evsel__save_time(evsel, sample->time, sample->cpu); 2847 2848 addr_location__exit(&al); 2849 return rc; 2850 } 2851 2852 static int timehist_sched_switch_event(const struct perf_tool *tool, 2853 union perf_event *event, 2854 struct evsel *evsel, 2855 struct perf_sample *sample, 2856 struct machine *machine __maybe_unused) 2857 { 2858 return timehist_sched_change_event(tool, event, evsel, sample, machine); 2859 } 2860 2861 static int process_lost(const struct perf_tool *tool __maybe_unused, 2862 union perf_event *event, 2863 struct perf_sample *sample, 2864 struct machine *machine __maybe_unused) 2865 { 2866 char tstr[64]; 2867 2868 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); 2869 printf("%15s ", tstr); 2870 printf("lost %" PRI_lu64 " events on cpu %d\n", event->lost.lost, sample->cpu); 2871 2872 return 0; 2873 } 2874 2875 2876 static void print_thread_runtime(struct thread *t, 2877 struct thread_runtime *r) 2878 { 2879 double mean = avg_stats(&r->run_stats); 2880 float stddev; 2881 2882 printf("%*s %5d %9" PRIu64 " ", 2883 comm_width, timehist_get_commstr(t), thread__ppid(t), 2884 (u64) r->run_stats.n); 2885 2886 print_sched_time(r->total_run_time, 8); 2887 stddev = rel_stddev_stats(stddev_stats(&r->run_stats), mean); 2888 print_sched_time(r->run_stats.min, 6); 2889 printf(" "); 2890 print_sched_time((u64) mean, 6); 2891 printf(" "); 2892 print_sched_time(r->run_stats.max, 6); 2893 printf(" "); 2894 printf("%5.2f", stddev); 2895 printf(" %5" PRIu64, r->migrations); 2896 printf("\n"); 2897 } 2898 2899 static void print_thread_waittime(struct thread *t, 2900 struct thread_runtime *r) 2901 { 2902 printf("%*s %5d %9" PRIu64 " ", 2903 comm_width, timehist_get_commstr(t), thread__ppid(t), 2904 (u64) r->run_stats.n); 2905 2906 print_sched_time(r->total_run_time, 8); 2907 print_sched_time(r->total_sleep_time, 6); 2908 printf(" "); 2909 print_sched_time(r->total_iowait_time, 6); 2910 printf(" "); 2911 print_sched_time(r->total_preempt_time, 6); 2912 printf(" "); 2913 print_sched_time(r->total_delay_time, 6); 2914 printf("\n"); 2915 } 2916 2917 struct total_run_stats { 2918 struct perf_sched *sched; 2919 u64 sched_count; 2920 u64 task_count; 2921 u64 total_run_time; 2922 }; 2923 2924 static int show_thread_runtime(struct thread *t, void *priv) 2925 { 2926 struct total_run_stats *stats = priv; 2927 struct thread_runtime *r; 2928 2929 if (thread__is_filtered(t)) 2930 return 0; 2931 2932 r = thread__priv(t); 2933 if (r && r->run_stats.n) { 2934 stats->task_count++; 2935 stats->sched_count += r->run_stats.n; 2936 stats->total_run_time += r->total_run_time; 2937 2938 if (stats->sched->show_state) 2939 print_thread_waittime(t, r); 2940 else 2941 print_thread_runtime(t, r); 2942 } 2943 2944 return 0; 2945 } 2946 2947 static size_t callchain__fprintf_folded(FILE *fp, struct callchain_node *node) 2948 { 2949 const char *sep = " <- "; 2950 struct callchain_list *chain; 2951 size_t ret = 0; 2952 char bf[1024]; 2953 bool first; 2954 2955 if (node == NULL) 2956 return 0; 2957 2958 ret = callchain__fprintf_folded(fp, node->parent); 2959 first = (ret == 0); 2960 2961 list_for_each_entry(chain, &node->val, list) { 2962 if (chain->ip >= PERF_CONTEXT_MAX) 2963 continue; 2964 if (chain->ms.sym && chain->ms.sym->ignore) 2965 continue; 2966 ret += fprintf(fp, "%s%s", first ? "" : sep, 2967 callchain_list__sym_name(chain, bf, sizeof(bf), 2968 false)); 2969 first = false; 2970 } 2971 2972 return ret; 2973 } 2974 2975 static size_t timehist_print_idlehist_callchain(struct rb_root_cached *root) 2976 { 2977 size_t ret = 0; 2978 FILE *fp = stdout; 2979 struct callchain_node *chain; 2980 struct rb_node *rb_node = rb_first_cached(root); 2981 2982 printf(" %16s %8s %s\n", "Idle time (msec)", "Count", "Callchains"); 2983 printf(" %.16s %.8s %.50s\n", graph_dotted_line, graph_dotted_line, 2984 graph_dotted_line); 2985 2986 while (rb_node) { 2987 chain = rb_entry(rb_node, struct callchain_node, rb_node); 2988 rb_node = rb_next(rb_node); 2989 2990 ret += fprintf(fp, " "); 2991 print_sched_time(chain->hit, 12); 2992 ret += 16; /* print_sched_time returns 2nd arg + 4 */ 2993 ret += fprintf(fp, " %8d ", chain->count); 2994 ret += callchain__fprintf_folded(fp, chain); 2995 ret += fprintf(fp, "\n"); 2996 } 2997 2998 return ret; 2999 } 3000 3001 static void timehist_print_summary(struct perf_sched *sched, 3002 struct perf_session *session) 3003 { 3004 struct machine *m = &session->machines.host; 3005 struct total_run_stats totals; 3006 u64 task_count; 3007 struct thread *t; 3008 struct thread_runtime *r; 3009 int i; 3010 u64 hist_time = sched->hist_time.end - sched->hist_time.start; 3011 3012 memset(&totals, 0, sizeof(totals)); 3013 totals.sched = sched; 3014 3015 if (sched->idle_hist) { 3016 printf("\nIdle-time summary\n"); 3017 printf("%*s parent sched-out ", comm_width, "comm"); 3018 printf(" idle-time min-idle avg-idle max-idle stddev migrations\n"); 3019 } else if (sched->show_state) { 3020 printf("\nWait-time summary\n"); 3021 printf("%*s parent sched-in ", comm_width, "comm"); 3022 printf(" run-time sleep iowait preempt delay\n"); 3023 } else { 3024 printf("\nRuntime summary\n"); 3025 printf("%*s parent sched-in ", comm_width, "comm"); 3026 printf(" run-time min-run avg-run max-run stddev migrations\n"); 3027 } 3028 printf("%*s (count) ", comm_width, ""); 3029 printf(" (msec) (msec) (msec) (msec) %s\n", 3030 sched->show_state ? "(msec)" : "%"); 3031 printf("%.117s\n", graph_dotted_line); 3032 3033 machine__for_each_thread(m, show_thread_runtime, &totals); 3034 task_count = totals.task_count; 3035 if (!task_count) 3036 printf("<no still running tasks>\n"); 3037 3038 /* CPU idle stats not tracked when samples were skipped */ 3039 if (sched->skipped_samples && !sched->idle_hist) 3040 return; 3041 3042 printf("\nIdle stats:\n"); 3043 for (i = 0; i < idle_max_cpu; ++i) { 3044 if (cpu_list && !test_bit(i, cpu_bitmap)) 3045 continue; 3046 3047 t = idle_threads[i]; 3048 if (!t) 3049 continue; 3050 3051 r = thread__priv(t); 3052 if (r && r->run_stats.n) { 3053 totals.sched_count += r->run_stats.n; 3054 printf(" CPU %2d idle for ", i); 3055 print_sched_time(r->total_run_time, 6); 3056 printf(" msec (%6.2f%%)\n", 100.0 * r->total_run_time / hist_time); 3057 } else 3058 printf(" CPU %2d idle entire time window\n", i); 3059 } 3060 3061 if (sched->idle_hist && sched->show_callchain) { 3062 callchain_param.mode = CHAIN_FOLDED; 3063 callchain_param.value = CCVAL_PERIOD; 3064 3065 callchain_register_param(&callchain_param); 3066 3067 printf("\nIdle stats by callchain:\n"); 3068 for (i = 0; i < idle_max_cpu; ++i) { 3069 struct idle_thread_runtime *itr; 3070 3071 t = idle_threads[i]; 3072 if (!t) 3073 continue; 3074 3075 itr = thread__priv(t); 3076 if (itr == NULL) 3077 continue; 3078 3079 callchain_param.sort(&itr->sorted_root.rb_root, &itr->callchain, 3080 0, &callchain_param); 3081 3082 printf(" CPU %2d:", i); 3083 print_sched_time(itr->tr.total_run_time, 6); 3084 printf(" msec\n"); 3085 timehist_print_idlehist_callchain(&itr->sorted_root); 3086 printf("\n"); 3087 } 3088 } 3089 3090 printf("\n" 3091 " Total number of unique tasks: %" PRIu64 "\n" 3092 "Total number of context switches: %" PRIu64 "\n", 3093 totals.task_count, totals.sched_count); 3094 3095 printf(" Total run time (msec): "); 3096 print_sched_time(totals.total_run_time, 2); 3097 printf("\n"); 3098 3099 printf(" Total scheduling time (msec): "); 3100 print_sched_time(hist_time, 2); 3101 printf(" (x %d)\n", sched->max_cpu.cpu); 3102 } 3103 3104 typedef int (*sched_handler)(const struct perf_tool *tool, 3105 union perf_event *event, 3106 struct evsel *evsel, 3107 struct perf_sample *sample, 3108 struct machine *machine); 3109 3110 static int perf_timehist__process_sample(const struct perf_tool *tool, 3111 union perf_event *event, 3112 struct perf_sample *sample, 3113 struct evsel *evsel, 3114 struct machine *machine) 3115 { 3116 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 3117 int err = 0; 3118 struct perf_cpu this_cpu = { 3119 .cpu = sample->cpu, 3120 }; 3121 3122 if (this_cpu.cpu > sched->max_cpu.cpu) 3123 sched->max_cpu = this_cpu; 3124 3125 if (evsel->handler != NULL) { 3126 sched_handler f = evsel->handler; 3127 3128 err = f(tool, event, evsel, sample, machine); 3129 } 3130 3131 return err; 3132 } 3133 3134 static int timehist_check_attr(struct perf_sched *sched, 3135 struct evlist *evlist) 3136 { 3137 struct evsel *evsel; 3138 struct evsel_runtime *er; 3139 3140 list_for_each_entry(evsel, &evlist->core.entries, core.node) { 3141 er = evsel__get_runtime(evsel); 3142 if (er == NULL) { 3143 pr_err("Failed to allocate memory for evsel runtime data\n"); 3144 return -1; 3145 } 3146 3147 /* only need to save callchain related to sched_switch event */ 3148 if (sched->show_callchain && 3149 evsel__name_is(evsel, "sched:sched_switch") && 3150 !evsel__has_callchain(evsel)) { 3151 pr_info("Samples of sched_switch event do not have callchains.\n"); 3152 sched->show_callchain = 0; 3153 symbol_conf.use_callchain = 0; 3154 } 3155 } 3156 3157 return 0; 3158 } 3159 3160 static int timehist_parse_prio_str(struct perf_sched *sched) 3161 { 3162 char *p; 3163 unsigned long start_prio, end_prio; 3164 const char *str = sched->prio_str; 3165 3166 if (!str) 3167 return 0; 3168 3169 while (isdigit(*str)) { 3170 p = NULL; 3171 start_prio = strtoul(str, &p, 0); 3172 if (start_prio >= MAX_PRIO || (*p != '\0' && *p != ',' && *p != '-')) 3173 return -1; 3174 3175 if (*p == '-') { 3176 str = ++p; 3177 p = NULL; 3178 end_prio = strtoul(str, &p, 0); 3179 3180 if (end_prio >= MAX_PRIO || (*p != '\0' && *p != ',')) 3181 return -1; 3182 3183 if (end_prio < start_prio) 3184 return -1; 3185 } else { 3186 end_prio = start_prio; 3187 } 3188 3189 for (; start_prio <= end_prio; start_prio++) 3190 __set_bit(start_prio, sched->prio_bitmap); 3191 3192 if (*p) 3193 ++p; 3194 3195 str = p; 3196 } 3197 3198 return 0; 3199 } 3200 3201 static int perf_sched__timehist(struct perf_sched *sched) 3202 { 3203 struct evsel_str_handler handlers[] = { 3204 { "sched:sched_switch", timehist_sched_switch_event, }, 3205 { "sched:sched_wakeup", timehist_sched_wakeup_event, }, 3206 { "sched:sched_waking", timehist_sched_wakeup_event, }, 3207 { "sched:sched_wakeup_new", timehist_sched_wakeup_event, }, 3208 }; 3209 const struct evsel_str_handler migrate_handlers[] = { 3210 { "sched:sched_migrate_task", timehist_migrate_task_event, }, 3211 }; 3212 struct perf_data data = { 3213 .path = input_name, 3214 .mode = PERF_DATA_MODE_READ, 3215 .force = sched->force, 3216 }; 3217 3218 struct perf_session *session; 3219 struct evlist *evlist; 3220 int err = -1; 3221 3222 /* 3223 * event handlers for timehist option 3224 */ 3225 sched->tool.sample = perf_timehist__process_sample; 3226 sched->tool.mmap = perf_event__process_mmap; 3227 sched->tool.comm = perf_event__process_comm; 3228 sched->tool.exit = perf_event__process_exit; 3229 sched->tool.fork = perf_event__process_fork; 3230 sched->tool.lost = process_lost; 3231 sched->tool.attr = perf_event__process_attr; 3232 sched->tool.tracing_data = perf_event__process_tracing_data; 3233 sched->tool.build_id = perf_event__process_build_id; 3234 3235 sched->tool.ordering_requires_timestamps = true; 3236 3237 symbol_conf.use_callchain = sched->show_callchain; 3238 3239 session = perf_session__new(&data, &sched->tool); 3240 if (IS_ERR(session)) 3241 return PTR_ERR(session); 3242 3243 if (cpu_list) { 3244 err = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap); 3245 if (err < 0) 3246 goto out; 3247 } 3248 3249 evlist = session->evlist; 3250 3251 symbol__init(&session->header.env); 3252 3253 if (perf_time__parse_str(&sched->ptime, sched->time_str) != 0) { 3254 pr_err("Invalid time string\n"); 3255 err = -EINVAL; 3256 goto out; 3257 } 3258 3259 if (timehist_check_attr(sched, evlist) != 0) 3260 goto out; 3261 3262 if (timehist_parse_prio_str(sched) != 0) { 3263 pr_err("Invalid prio string\n"); 3264 goto out; 3265 } 3266 3267 setup_pager(); 3268 3269 /* prefer sched_waking if it is captured */ 3270 if (evlist__find_tracepoint_by_name(session->evlist, "sched:sched_waking")) 3271 handlers[1].handler = timehist_sched_wakeup_ignore; 3272 3273 /* setup per-evsel handlers */ 3274 if (perf_session__set_tracepoints_handlers(session, handlers)) 3275 goto out; 3276 3277 /* sched_switch event at a minimum needs to exist */ 3278 if (!evlist__find_tracepoint_by_name(session->evlist, "sched:sched_switch")) { 3279 pr_err("No sched_switch events found. Have you run 'perf sched record'?\n"); 3280 goto out; 3281 } 3282 3283 if (sched->show_migrations && 3284 perf_session__set_tracepoints_handlers(session, migrate_handlers)) 3285 goto out; 3286 3287 /* pre-allocate struct for per-CPU idle stats */ 3288 sched->max_cpu.cpu = session->header.env.nr_cpus_online; 3289 if (sched->max_cpu.cpu == 0) 3290 sched->max_cpu.cpu = 4; 3291 if (init_idle_threads(sched->max_cpu.cpu)) 3292 goto out; 3293 3294 /* summary_only implies summary option, but don't overwrite summary if set */ 3295 if (sched->summary_only) 3296 sched->summary = sched->summary_only; 3297 3298 if (!sched->summary_only) 3299 timehist_header(sched); 3300 3301 err = perf_session__process_events(session); 3302 if (err) { 3303 pr_err("Failed to process events, error %d", err); 3304 goto out; 3305 } 3306 3307 sched->nr_events = evlist->stats.nr_events[0]; 3308 sched->nr_lost_events = evlist->stats.total_lost; 3309 sched->nr_lost_chunks = evlist->stats.nr_events[PERF_RECORD_LOST]; 3310 3311 if (sched->summary) 3312 timehist_print_summary(sched, session); 3313 3314 out: 3315 free_idle_threads(); 3316 perf_session__delete(session); 3317 3318 return err; 3319 } 3320 3321 3322 static void print_bad_events(struct perf_sched *sched) 3323 { 3324 if (sched->nr_unordered_timestamps && sched->nr_timestamps) { 3325 printf(" INFO: %.3f%% unordered timestamps (%ld out of %ld)\n", 3326 (double)sched->nr_unordered_timestamps/(double)sched->nr_timestamps*100.0, 3327 sched->nr_unordered_timestamps, sched->nr_timestamps); 3328 } 3329 if (sched->nr_lost_events && sched->nr_events) { 3330 printf(" INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n", 3331 (double)sched->nr_lost_events/(double)sched->nr_events * 100.0, 3332 sched->nr_lost_events, sched->nr_events, sched->nr_lost_chunks); 3333 } 3334 if (sched->nr_context_switch_bugs && sched->nr_timestamps) { 3335 printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)", 3336 (double)sched->nr_context_switch_bugs/(double)sched->nr_timestamps*100.0, 3337 sched->nr_context_switch_bugs, sched->nr_timestamps); 3338 if (sched->nr_lost_events) 3339 printf(" (due to lost events?)"); 3340 printf("\n"); 3341 } 3342 } 3343 3344 static void __merge_work_atoms(struct rb_root_cached *root, struct work_atoms *data) 3345 { 3346 struct rb_node **new = &(root->rb_root.rb_node), *parent = NULL; 3347 struct work_atoms *this; 3348 const char *comm = thread__comm_str(data->thread), *this_comm; 3349 bool leftmost = true; 3350 3351 while (*new) { 3352 int cmp; 3353 3354 this = container_of(*new, struct work_atoms, node); 3355 parent = *new; 3356 3357 this_comm = thread__comm_str(this->thread); 3358 cmp = strcmp(comm, this_comm); 3359 if (cmp > 0) { 3360 new = &((*new)->rb_left); 3361 } else if (cmp < 0) { 3362 new = &((*new)->rb_right); 3363 leftmost = false; 3364 } else { 3365 this->num_merged++; 3366 this->total_runtime += data->total_runtime; 3367 this->nb_atoms += data->nb_atoms; 3368 this->total_lat += data->total_lat; 3369 list_splice(&data->work_list, &this->work_list); 3370 if (this->max_lat < data->max_lat) { 3371 this->max_lat = data->max_lat; 3372 this->max_lat_start = data->max_lat_start; 3373 this->max_lat_end = data->max_lat_end; 3374 } 3375 zfree(&data); 3376 return; 3377 } 3378 } 3379 3380 data->num_merged++; 3381 rb_link_node(&data->node, parent, new); 3382 rb_insert_color_cached(&data->node, root, leftmost); 3383 } 3384 3385 static void perf_sched__merge_lat(struct perf_sched *sched) 3386 { 3387 struct work_atoms *data; 3388 struct rb_node *node; 3389 3390 if (sched->skip_merge) 3391 return; 3392 3393 while ((node = rb_first_cached(&sched->atom_root))) { 3394 rb_erase_cached(node, &sched->atom_root); 3395 data = rb_entry(node, struct work_atoms, node); 3396 __merge_work_atoms(&sched->merged_atom_root, data); 3397 } 3398 } 3399 3400 static int setup_cpus_switch_event(struct perf_sched *sched) 3401 { 3402 unsigned int i; 3403 3404 sched->cpu_last_switched = calloc(MAX_CPUS, sizeof(*(sched->cpu_last_switched))); 3405 if (!sched->cpu_last_switched) 3406 return -1; 3407 3408 sched->curr_pid = malloc(MAX_CPUS * sizeof(*(sched->curr_pid))); 3409 if (!sched->curr_pid) { 3410 zfree(&sched->cpu_last_switched); 3411 return -1; 3412 } 3413 3414 for (i = 0; i < MAX_CPUS; i++) 3415 sched->curr_pid[i] = -1; 3416 3417 return 0; 3418 } 3419 3420 static void free_cpus_switch_event(struct perf_sched *sched) 3421 { 3422 zfree(&sched->curr_pid); 3423 zfree(&sched->cpu_last_switched); 3424 } 3425 3426 static int perf_sched__lat(struct perf_sched *sched) 3427 { 3428 int rc = -1; 3429 struct rb_node *next; 3430 3431 setup_pager(); 3432 3433 if (setup_cpus_switch_event(sched)) 3434 return rc; 3435 3436 if (perf_sched__read_events(sched)) 3437 goto out_free_cpus_switch_event; 3438 3439 perf_sched__merge_lat(sched); 3440 perf_sched__sort_lat(sched); 3441 3442 printf("\n -------------------------------------------------------------------------------------------------------------------------------------------\n"); 3443 printf(" Task | Runtime ms | Count | Avg delay ms | Max delay ms | Max delay start | Max delay end |\n"); 3444 printf(" -------------------------------------------------------------------------------------------------------------------------------------------\n"); 3445 3446 next = rb_first_cached(&sched->sorted_atom_root); 3447 3448 while (next) { 3449 struct work_atoms *work_list; 3450 3451 work_list = rb_entry(next, struct work_atoms, node); 3452 output_lat_thread(sched, work_list); 3453 next = rb_next(next); 3454 thread__zput(work_list->thread); 3455 } 3456 3457 printf(" -----------------------------------------------------------------------------------------------------------------\n"); 3458 printf(" TOTAL: |%11.3f ms |%9" PRIu64 " |\n", 3459 (double)sched->all_runtime / NSEC_PER_MSEC, sched->all_count); 3460 3461 printf(" ---------------------------------------------------\n"); 3462 3463 print_bad_events(sched); 3464 printf("\n"); 3465 3466 rc = 0; 3467 3468 out_free_cpus_switch_event: 3469 free_cpus_switch_event(sched); 3470 return rc; 3471 } 3472 3473 static int setup_map_cpus(struct perf_sched *sched) 3474 { 3475 sched->max_cpu.cpu = sysconf(_SC_NPROCESSORS_CONF); 3476 3477 if (sched->map.comp) { 3478 sched->map.comp_cpus = zalloc(sched->max_cpu.cpu * sizeof(int)); 3479 if (!sched->map.comp_cpus) 3480 return -1; 3481 } 3482 3483 if (sched->map.cpus_str) { 3484 sched->map.cpus = perf_cpu_map__new(sched->map.cpus_str); 3485 if (!sched->map.cpus) { 3486 pr_err("failed to get cpus map from %s\n", sched->map.cpus_str); 3487 zfree(&sched->map.comp_cpus); 3488 return -1; 3489 } 3490 } 3491 3492 return 0; 3493 } 3494 3495 static int setup_color_pids(struct perf_sched *sched) 3496 { 3497 struct perf_thread_map *map; 3498 3499 if (!sched->map.color_pids_str) 3500 return 0; 3501 3502 map = thread_map__new_by_tid_str(sched->map.color_pids_str); 3503 if (!map) { 3504 pr_err("failed to get thread map from %s\n", sched->map.color_pids_str); 3505 return -1; 3506 } 3507 3508 sched->map.color_pids = map; 3509 return 0; 3510 } 3511 3512 static int setup_color_cpus(struct perf_sched *sched) 3513 { 3514 struct perf_cpu_map *map; 3515 3516 if (!sched->map.color_cpus_str) 3517 return 0; 3518 3519 map = perf_cpu_map__new(sched->map.color_cpus_str); 3520 if (!map) { 3521 pr_err("failed to get thread map from %s\n", sched->map.color_cpus_str); 3522 return -1; 3523 } 3524 3525 sched->map.color_cpus = map; 3526 return 0; 3527 } 3528 3529 static int perf_sched__map(struct perf_sched *sched) 3530 { 3531 int rc = -1; 3532 3533 sched->curr_thread = calloc(MAX_CPUS, sizeof(*(sched->curr_thread))); 3534 if (!sched->curr_thread) 3535 return rc; 3536 3537 sched->curr_out_thread = calloc(MAX_CPUS, sizeof(*(sched->curr_out_thread))); 3538 if (!sched->curr_out_thread) 3539 return rc; 3540 3541 if (setup_cpus_switch_event(sched)) 3542 goto out_free_curr_thread; 3543 3544 if (setup_map_cpus(sched)) 3545 goto out_free_cpus_switch_event; 3546 3547 if (setup_color_pids(sched)) 3548 goto out_put_map_cpus; 3549 3550 if (setup_color_cpus(sched)) 3551 goto out_put_color_pids; 3552 3553 setup_pager(); 3554 if (perf_sched__read_events(sched)) 3555 goto out_put_color_cpus; 3556 3557 rc = 0; 3558 print_bad_events(sched); 3559 3560 out_put_color_cpus: 3561 perf_cpu_map__put(sched->map.color_cpus); 3562 3563 out_put_color_pids: 3564 perf_thread_map__put(sched->map.color_pids); 3565 3566 out_put_map_cpus: 3567 zfree(&sched->map.comp_cpus); 3568 perf_cpu_map__put(sched->map.cpus); 3569 3570 out_free_cpus_switch_event: 3571 free_cpus_switch_event(sched); 3572 3573 out_free_curr_thread: 3574 zfree(&sched->curr_thread); 3575 return rc; 3576 } 3577 3578 static int perf_sched__replay(struct perf_sched *sched) 3579 { 3580 int ret; 3581 unsigned long i; 3582 3583 mutex_init(&sched->start_work_mutex); 3584 mutex_init(&sched->work_done_wait_mutex); 3585 3586 ret = setup_cpus_switch_event(sched); 3587 if (ret) 3588 goto out_mutex_destroy; 3589 3590 calibrate_run_measurement_overhead(sched); 3591 calibrate_sleep_measurement_overhead(sched); 3592 3593 test_calibrations(sched); 3594 3595 ret = perf_sched__read_events(sched); 3596 if (ret) 3597 goto out_free_cpus_switch_event; 3598 3599 printf("nr_run_events: %ld\n", sched->nr_run_events); 3600 printf("nr_sleep_events: %ld\n", sched->nr_sleep_events); 3601 printf("nr_wakeup_events: %ld\n", sched->nr_wakeup_events); 3602 3603 if (sched->targetless_wakeups) 3604 printf("target-less wakeups: %ld\n", sched->targetless_wakeups); 3605 if (sched->multitarget_wakeups) 3606 printf("multi-target wakeups: %ld\n", sched->multitarget_wakeups); 3607 if (sched->nr_run_events_optimized) 3608 printf("run atoms optimized: %ld\n", 3609 sched->nr_run_events_optimized); 3610 3611 print_task_traces(sched); 3612 add_cross_task_wakeups(sched); 3613 3614 sched->thread_funcs_exit = false; 3615 create_tasks(sched); 3616 printf("------------------------------------------------------------\n"); 3617 if (sched->replay_repeat == 0) 3618 sched->replay_repeat = UINT_MAX; 3619 3620 for (i = 0; i < sched->replay_repeat; i++) 3621 run_one_test(sched); 3622 3623 sched->thread_funcs_exit = true; 3624 destroy_tasks(sched); 3625 3626 out_free_cpus_switch_event: 3627 free_cpus_switch_event(sched); 3628 3629 out_mutex_destroy: 3630 mutex_destroy(&sched->start_work_mutex); 3631 mutex_destroy(&sched->work_done_wait_mutex); 3632 return ret; 3633 } 3634 3635 static void setup_sorting(struct perf_sched *sched, const struct option *options, 3636 const char * const usage_msg[]) 3637 { 3638 char *tmp, *tok, *str = strdup(sched->sort_order); 3639 3640 for (tok = strtok_r(str, ", ", &tmp); 3641 tok; tok = strtok_r(NULL, ", ", &tmp)) { 3642 if (sort_dimension__add(tok, &sched->sort_list) < 0) { 3643 usage_with_options_msg(usage_msg, options, 3644 "Unknown --sort key: `%s'", tok); 3645 } 3646 } 3647 3648 free(str); 3649 3650 sort_dimension__add("pid", &sched->cmp_pid); 3651 } 3652 3653 static bool schedstat_events_exposed(void) 3654 { 3655 /* 3656 * Select "sched:sched_stat_wait" event to check 3657 * whether schedstat tracepoints are exposed. 3658 */ 3659 return IS_ERR(trace_event__tp_format("sched", "sched_stat_wait")) ? 3660 false : true; 3661 } 3662 3663 static int __cmd_record(int argc, const char **argv) 3664 { 3665 unsigned int rec_argc, i, j; 3666 char **rec_argv; 3667 const char **rec_argv_copy; 3668 const char * const record_args[] = { 3669 "record", 3670 "-a", 3671 "-R", 3672 "-m", "1024", 3673 "-c", "1", 3674 "-e", "sched:sched_switch", 3675 "-e", "sched:sched_stat_runtime", 3676 "-e", "sched:sched_process_fork", 3677 "-e", "sched:sched_wakeup_new", 3678 "-e", "sched:sched_migrate_task", 3679 }; 3680 3681 /* 3682 * The tracepoints trace_sched_stat_{wait, sleep, iowait} 3683 * are not exposed to user if CONFIG_SCHEDSTATS is not set, 3684 * to prevent "perf sched record" execution failure, determine 3685 * whether to record schedstat events according to actual situation. 3686 */ 3687 const char * const schedstat_args[] = { 3688 "-e", "sched:sched_stat_wait", 3689 "-e", "sched:sched_stat_sleep", 3690 "-e", "sched:sched_stat_iowait", 3691 }; 3692 unsigned int schedstat_argc = schedstat_events_exposed() ? 3693 ARRAY_SIZE(schedstat_args) : 0; 3694 3695 struct tep_event *waking_event; 3696 int ret; 3697 3698 /* 3699 * +2 for either "-e", "sched:sched_wakeup" or 3700 * "-e", "sched:sched_waking" 3701 */ 3702 rec_argc = ARRAY_SIZE(record_args) + 2 + schedstat_argc + argc - 1; 3703 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 3704 if (rec_argv == NULL) 3705 return -ENOMEM; 3706 rec_argv_copy = calloc(rec_argc + 1, sizeof(char *)); 3707 if (rec_argv_copy == NULL) { 3708 free(rec_argv); 3709 return -ENOMEM; 3710 } 3711 3712 for (i = 0; i < ARRAY_SIZE(record_args); i++) 3713 rec_argv[i] = strdup(record_args[i]); 3714 3715 rec_argv[i++] = strdup("-e"); 3716 waking_event = trace_event__tp_format("sched", "sched_waking"); 3717 if (!IS_ERR(waking_event)) 3718 rec_argv[i++] = strdup("sched:sched_waking"); 3719 else 3720 rec_argv[i++] = strdup("sched:sched_wakeup"); 3721 3722 for (j = 0; j < schedstat_argc; j++) 3723 rec_argv[i++] = strdup(schedstat_args[j]); 3724 3725 for (j = 1; j < (unsigned int)argc; j++, i++) 3726 rec_argv[i] = strdup(argv[j]); 3727 3728 BUG_ON(i != rec_argc); 3729 3730 memcpy(rec_argv_copy, rec_argv, sizeof(char *) * rec_argc); 3731 ret = cmd_record(rec_argc, rec_argv_copy); 3732 3733 for (i = 0; i < rec_argc; i++) 3734 free(rec_argv[i]); 3735 free(rec_argv); 3736 free(rec_argv_copy); 3737 3738 return ret; 3739 } 3740 3741 int cmd_sched(int argc, const char **argv) 3742 { 3743 static const char default_sort_order[] = "avg, max, switch, runtime"; 3744 struct perf_sched sched = { 3745 .cmp_pid = LIST_HEAD_INIT(sched.cmp_pid), 3746 .sort_list = LIST_HEAD_INIT(sched.sort_list), 3747 .sort_order = default_sort_order, 3748 .replay_repeat = 10, 3749 .profile_cpu = -1, 3750 .next_shortname1 = 'A', 3751 .next_shortname2 = '0', 3752 .skip_merge = 0, 3753 .show_callchain = 1, 3754 .max_stack = 5, 3755 }; 3756 const struct option sched_options[] = { 3757 OPT_STRING('i', "input", &input_name, "file", 3758 "input file name"), 3759 OPT_INCR('v', "verbose", &verbose, 3760 "be more verbose (show symbol address, etc)"), 3761 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 3762 "dump raw trace in ASCII"), 3763 OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"), 3764 OPT_END() 3765 }; 3766 const struct option latency_options[] = { 3767 OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]", 3768 "sort by key(s): runtime, switch, avg, max"), 3769 OPT_INTEGER('C', "CPU", &sched.profile_cpu, 3770 "CPU to profile on"), 3771 OPT_BOOLEAN('p', "pids", &sched.skip_merge, 3772 "latency stats per pid instead of per comm"), 3773 OPT_PARENT(sched_options) 3774 }; 3775 const struct option replay_options[] = { 3776 OPT_UINTEGER('r', "repeat", &sched.replay_repeat, 3777 "repeat the workload replay N times (0: infinite)"), 3778 OPT_PARENT(sched_options) 3779 }; 3780 const struct option map_options[] = { 3781 OPT_BOOLEAN(0, "compact", &sched.map.comp, 3782 "map output in compact mode"), 3783 OPT_STRING(0, "color-pids", &sched.map.color_pids_str, "pids", 3784 "highlight given pids in map"), 3785 OPT_STRING(0, "color-cpus", &sched.map.color_cpus_str, "cpus", 3786 "highlight given CPUs in map"), 3787 OPT_STRING(0, "cpus", &sched.map.cpus_str, "cpus", 3788 "display given CPUs in map"), 3789 OPT_STRING(0, "task-name", &sched.map.task_name, "task", 3790 "map output only for the given task name(s)."), 3791 OPT_BOOLEAN(0, "fuzzy-name", &sched.map.fuzzy, 3792 "given command name can be partially matched (fuzzy matching)"), 3793 OPT_PARENT(sched_options) 3794 }; 3795 const struct option timehist_options[] = { 3796 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 3797 "file", "vmlinux pathname"), 3798 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, 3799 "file", "kallsyms pathname"), 3800 OPT_BOOLEAN('g', "call-graph", &sched.show_callchain, 3801 "Display call chains if present (default on)"), 3802 OPT_UINTEGER(0, "max-stack", &sched.max_stack, 3803 "Maximum number of functions to display backtrace."), 3804 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", 3805 "Look for files with symbols relative to this directory"), 3806 OPT_BOOLEAN('s', "summary", &sched.summary_only, 3807 "Show only syscall summary with statistics"), 3808 OPT_BOOLEAN('S', "with-summary", &sched.summary, 3809 "Show all syscalls and summary with statistics"), 3810 OPT_BOOLEAN('w', "wakeups", &sched.show_wakeups, "Show wakeup events"), 3811 OPT_BOOLEAN('n', "next", &sched.show_next, "Show next task"), 3812 OPT_BOOLEAN('M', "migrations", &sched.show_migrations, "Show migration events"), 3813 OPT_BOOLEAN('V', "cpu-visual", &sched.show_cpu_visual, "Add CPU visual"), 3814 OPT_BOOLEAN('I', "idle-hist", &sched.idle_hist, "Show idle events only"), 3815 OPT_STRING(0, "time", &sched.time_str, "str", 3816 "Time span for analysis (start,stop)"), 3817 OPT_BOOLEAN(0, "state", &sched.show_state, "Show task state when sched-out"), 3818 OPT_STRING('p', "pid", &symbol_conf.pid_list_str, "pid[,pid...]", 3819 "analyze events only for given process id(s)"), 3820 OPT_STRING('t', "tid", &symbol_conf.tid_list_str, "tid[,tid...]", 3821 "analyze events only for given thread id(s)"), 3822 OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"), 3823 OPT_BOOLEAN(0, "show-prio", &sched.show_prio, "Show task priority"), 3824 OPT_STRING(0, "prio", &sched.prio_str, "prio", 3825 "analyze events only for given task priority(ies)"), 3826 OPT_PARENT(sched_options) 3827 }; 3828 3829 const char * const latency_usage[] = { 3830 "perf sched latency [<options>]", 3831 NULL 3832 }; 3833 const char * const replay_usage[] = { 3834 "perf sched replay [<options>]", 3835 NULL 3836 }; 3837 const char * const map_usage[] = { 3838 "perf sched map [<options>]", 3839 NULL 3840 }; 3841 const char * const timehist_usage[] = { 3842 "perf sched timehist [<options>]", 3843 NULL 3844 }; 3845 const char *const sched_subcommands[] = { "record", "latency", "map", 3846 "replay", "script", 3847 "timehist", NULL }; 3848 const char *sched_usage[] = { 3849 NULL, 3850 NULL 3851 }; 3852 struct trace_sched_handler lat_ops = { 3853 .wakeup_event = latency_wakeup_event, 3854 .switch_event = latency_switch_event, 3855 .runtime_event = latency_runtime_event, 3856 .migrate_task_event = latency_migrate_task_event, 3857 }; 3858 struct trace_sched_handler map_ops = { 3859 .switch_event = map_switch_event, 3860 }; 3861 struct trace_sched_handler replay_ops = { 3862 .wakeup_event = replay_wakeup_event, 3863 .switch_event = replay_switch_event, 3864 .fork_event = replay_fork_event, 3865 }; 3866 int ret; 3867 3868 perf_tool__init(&sched.tool, /*ordered_events=*/true); 3869 sched.tool.sample = perf_sched__process_tracepoint_sample; 3870 sched.tool.comm = perf_sched__process_comm; 3871 sched.tool.namespaces = perf_event__process_namespaces; 3872 sched.tool.lost = perf_event__process_lost; 3873 sched.tool.fork = perf_sched__process_fork_event; 3874 3875 argc = parse_options_subcommand(argc, argv, sched_options, sched_subcommands, 3876 sched_usage, PARSE_OPT_STOP_AT_NON_OPTION); 3877 if (!argc) 3878 usage_with_options(sched_usage, sched_options); 3879 3880 /* 3881 * Aliased to 'perf script' for now: 3882 */ 3883 if (!strcmp(argv[0], "script")) { 3884 return cmd_script(argc, argv); 3885 } else if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) { 3886 return __cmd_record(argc, argv); 3887 } else if (strlen(argv[0]) > 2 && strstarts("latency", argv[0])) { 3888 sched.tp_handler = &lat_ops; 3889 if (argc > 1) { 3890 argc = parse_options(argc, argv, latency_options, latency_usage, 0); 3891 if (argc) 3892 usage_with_options(latency_usage, latency_options); 3893 } 3894 setup_sorting(&sched, latency_options, latency_usage); 3895 return perf_sched__lat(&sched); 3896 } else if (!strcmp(argv[0], "map")) { 3897 if (argc) { 3898 argc = parse_options(argc, argv, map_options, map_usage, 0); 3899 if (argc) 3900 usage_with_options(map_usage, map_options); 3901 3902 if (sched.map.task_name) { 3903 sched.map.task_names = strlist__new(sched.map.task_name, NULL); 3904 if (sched.map.task_names == NULL) { 3905 fprintf(stderr, "Failed to parse task names\n"); 3906 return -1; 3907 } 3908 } 3909 } 3910 sched.tp_handler = &map_ops; 3911 setup_sorting(&sched, latency_options, latency_usage); 3912 return perf_sched__map(&sched); 3913 } else if (strlen(argv[0]) > 2 && strstarts("replay", argv[0])) { 3914 sched.tp_handler = &replay_ops; 3915 if (argc) { 3916 argc = parse_options(argc, argv, replay_options, replay_usage, 0); 3917 if (argc) 3918 usage_with_options(replay_usage, replay_options); 3919 } 3920 return perf_sched__replay(&sched); 3921 } else if (!strcmp(argv[0], "timehist")) { 3922 if (argc) { 3923 argc = parse_options(argc, argv, timehist_options, 3924 timehist_usage, 0); 3925 if (argc) 3926 usage_with_options(timehist_usage, timehist_options); 3927 } 3928 if ((sched.show_wakeups || sched.show_next) && 3929 sched.summary_only) { 3930 pr_err(" Error: -s and -[n|w] are mutually exclusive.\n"); 3931 parse_options_usage(timehist_usage, timehist_options, "s", true); 3932 if (sched.show_wakeups) 3933 parse_options_usage(NULL, timehist_options, "w", true); 3934 if (sched.show_next) 3935 parse_options_usage(NULL, timehist_options, "n", true); 3936 return -EINVAL; 3937 } 3938 ret = symbol__validate_sym_arguments(); 3939 if (ret) 3940 return ret; 3941 3942 return perf_sched__timehist(&sched); 3943 } else { 3944 usage_with_options(sched_usage, sched_options); 3945 } 3946 3947 /* free usage string allocated by parse_options_subcommand */ 3948 free((void *)sched_usage[0]); 3949 3950 return 0; 3951 } 3952