1 // SPDX-License-Identifier: GPL-2.0 2 #include "builtin.h" 3 #include "perf-sys.h" 4 5 #include "util/cpumap.h" 6 #include "util/evlist.h" 7 #include "util/evsel.h" 8 #include "util/evsel_fprintf.h" 9 #include "util/mutex.h" 10 #include "util/symbol.h" 11 #include "util/thread.h" 12 #include "util/header.h" 13 #include "util/session.h" 14 #include "util/tool.h" 15 #include "util/cloexec.h" 16 #include "util/thread_map.h" 17 #include "util/color.h" 18 #include "util/stat.h" 19 #include "util/string2.h" 20 #include "util/callchain.h" 21 #include "util/time-utils.h" 22 23 #include <subcmd/pager.h> 24 #include <subcmd/parse-options.h> 25 #include "util/trace-event.h" 26 27 #include "util/debug.h" 28 #include "util/event.h" 29 #include "util/util.h" 30 31 #include <linux/kernel.h> 32 #include <linux/log2.h> 33 #include <linux/zalloc.h> 34 #include <sys/prctl.h> 35 #include <sys/resource.h> 36 #include <inttypes.h> 37 38 #include <errno.h> 39 #include <semaphore.h> 40 #include <pthread.h> 41 #include <math.h> 42 #include <api/fs/fs.h> 43 #include <perf/cpumap.h> 44 #include <linux/time64.h> 45 #include <linux/err.h> 46 47 #include <linux/ctype.h> 48 49 #define PR_SET_NAME 15 /* Set process name */ 50 #define MAX_CPUS 4096 51 #define COMM_LEN 20 52 #define SYM_LEN 129 53 #define MAX_PID 1024000 54 #define MAX_PRIO 140 55 56 static const char *cpu_list; 57 static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); 58 59 struct sched_atom; 60 61 struct task_desc { 62 unsigned long nr; 63 unsigned long pid; 64 char comm[COMM_LEN]; 65 66 unsigned long nr_events; 67 unsigned long curr_event; 68 struct sched_atom **atoms; 69 70 pthread_t thread; 71 72 sem_t ready_for_work; 73 sem_t work_done_sem; 74 75 u64 cpu_usage; 76 }; 77 78 enum sched_event_type { 79 SCHED_EVENT_RUN, 80 SCHED_EVENT_SLEEP, 81 SCHED_EVENT_WAKEUP, 82 }; 83 84 struct sched_atom { 85 enum sched_event_type type; 86 u64 timestamp; 87 u64 duration; 88 unsigned long nr; 89 sem_t *wait_sem; 90 struct task_desc *wakee; 91 }; 92 93 enum thread_state { 94 THREAD_SLEEPING = 0, 95 THREAD_WAIT_CPU, 96 THREAD_SCHED_IN, 97 THREAD_IGNORE 98 }; 99 100 struct work_atom { 101 struct list_head list; 102 enum thread_state state; 103 u64 sched_out_time; 104 u64 wake_up_time; 105 u64 sched_in_time; 106 u64 runtime; 107 }; 108 109 struct work_atoms { 110 struct list_head work_list; 111 struct thread *thread; 112 struct rb_node node; 113 u64 max_lat; 114 u64 max_lat_start; 115 u64 max_lat_end; 116 u64 total_lat; 117 u64 nb_atoms; 118 u64 total_runtime; 119 int num_merged; 120 }; 121 122 typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *); 123 124 struct perf_sched; 125 126 struct trace_sched_handler { 127 int (*switch_event)(struct perf_sched *sched, struct evsel *evsel, 128 struct perf_sample *sample, struct machine *machine); 129 130 int (*runtime_event)(struct perf_sched *sched, struct evsel *evsel, 131 struct perf_sample *sample, struct machine *machine); 132 133 int (*wakeup_event)(struct perf_sched *sched, struct evsel *evsel, 134 struct perf_sample *sample, struct machine *machine); 135 136 /* PERF_RECORD_FORK event, not sched_process_fork tracepoint */ 137 int (*fork_event)(struct perf_sched *sched, union perf_event *event, 138 struct machine *machine); 139 140 int (*migrate_task_event)(struct perf_sched *sched, 141 struct evsel *evsel, 142 struct perf_sample *sample, 143 struct machine *machine); 144 }; 145 146 #define COLOR_PIDS PERF_COLOR_BLUE 147 #define COLOR_CPUS PERF_COLOR_BG_RED 148 149 struct perf_sched_map { 150 DECLARE_BITMAP(comp_cpus_mask, MAX_CPUS); 151 struct perf_cpu *comp_cpus; 152 bool comp; 153 struct perf_thread_map *color_pids; 154 const char *color_pids_str; 155 struct perf_cpu_map *color_cpus; 156 const char *color_cpus_str; 157 const char *task_name; 158 struct strlist *task_names; 159 bool fuzzy; 160 struct perf_cpu_map *cpus; 161 const char *cpus_str; 162 }; 163 164 struct perf_sched { 165 struct perf_tool tool; 166 const char *sort_order; 167 unsigned long nr_tasks; 168 struct task_desc **pid_to_task; 169 struct task_desc **tasks; 170 const struct trace_sched_handler *tp_handler; 171 struct mutex start_work_mutex; 172 struct mutex work_done_wait_mutex; 173 int profile_cpu; 174 /* 175 * Track the current task - that way we can know whether there's any 176 * weird events, such as a task being switched away that is not current. 177 */ 178 struct perf_cpu max_cpu; 179 u32 *curr_pid; 180 struct thread **curr_thread; 181 struct thread **curr_out_thread; 182 char next_shortname1; 183 char next_shortname2; 184 unsigned int replay_repeat; 185 unsigned long nr_run_events; 186 unsigned long nr_sleep_events; 187 unsigned long nr_wakeup_events; 188 unsigned long nr_sleep_corrections; 189 unsigned long nr_run_events_optimized; 190 unsigned long targetless_wakeups; 191 unsigned long multitarget_wakeups; 192 unsigned long nr_runs; 193 unsigned long nr_timestamps; 194 unsigned long nr_unordered_timestamps; 195 unsigned long nr_context_switch_bugs; 196 unsigned long nr_events; 197 unsigned long nr_lost_chunks; 198 unsigned long nr_lost_events; 199 u64 run_measurement_overhead; 200 u64 sleep_measurement_overhead; 201 u64 start_time; 202 u64 cpu_usage; 203 u64 runavg_cpu_usage; 204 u64 parent_cpu_usage; 205 u64 runavg_parent_cpu_usage; 206 u64 sum_runtime; 207 u64 sum_fluct; 208 u64 run_avg; 209 u64 all_runtime; 210 u64 all_count; 211 u64 *cpu_last_switched; 212 struct rb_root_cached atom_root, sorted_atom_root, merged_atom_root; 213 struct list_head sort_list, cmp_pid; 214 bool force; 215 bool skip_merge; 216 struct perf_sched_map map; 217 218 /* options for timehist command */ 219 bool summary; 220 bool summary_only; 221 bool idle_hist; 222 bool show_callchain; 223 unsigned int max_stack; 224 bool show_cpu_visual; 225 bool show_wakeups; 226 bool show_next; 227 bool show_migrations; 228 bool pre_migrations; 229 bool show_state; 230 bool show_prio; 231 u64 skipped_samples; 232 const char *time_str; 233 struct perf_time_interval ptime; 234 struct perf_time_interval hist_time; 235 volatile bool thread_funcs_exit; 236 const char *prio_str; 237 DECLARE_BITMAP(prio_bitmap, MAX_PRIO); 238 }; 239 240 /* per thread run time data */ 241 struct thread_runtime { 242 u64 last_time; /* time of previous sched in/out event */ 243 u64 dt_run; /* run time */ 244 u64 dt_sleep; /* time between CPU access by sleep (off cpu) */ 245 u64 dt_iowait; /* time between CPU access by iowait (off cpu) */ 246 u64 dt_preempt; /* time between CPU access by preempt (off cpu) */ 247 u64 dt_delay; /* time between wakeup and sched-in */ 248 u64 dt_pre_mig; /* time between migration and wakeup */ 249 u64 ready_to_run; /* time of wakeup */ 250 u64 migrated; /* time when a thread is migrated */ 251 252 struct stats run_stats; 253 u64 total_run_time; 254 u64 total_sleep_time; 255 u64 total_iowait_time; 256 u64 total_preempt_time; 257 u64 total_delay_time; 258 u64 total_pre_mig_time; 259 260 char last_state; 261 262 char shortname[3]; 263 bool comm_changed; 264 265 u64 migrations; 266 267 int prio; 268 }; 269 270 /* per event run time data */ 271 struct evsel_runtime { 272 u64 *last_time; /* time this event was last seen per cpu */ 273 u32 ncpu; /* highest cpu slot allocated */ 274 }; 275 276 /* per cpu idle time data */ 277 struct idle_thread_runtime { 278 struct thread_runtime tr; 279 struct thread *last_thread; 280 struct rb_root_cached sorted_root; 281 struct callchain_root callchain; 282 struct callchain_cursor cursor; 283 }; 284 285 /* track idle times per cpu */ 286 static struct thread **idle_threads; 287 static int idle_max_cpu; 288 static char idle_comm[] = "<idle>"; 289 290 static u64 get_nsecs(void) 291 { 292 struct timespec ts; 293 294 clock_gettime(CLOCK_MONOTONIC, &ts); 295 296 return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; 297 } 298 299 static void burn_nsecs(struct perf_sched *sched, u64 nsecs) 300 { 301 u64 T0 = get_nsecs(), T1; 302 303 do { 304 T1 = get_nsecs(); 305 } while (T1 + sched->run_measurement_overhead < T0 + nsecs); 306 } 307 308 static void sleep_nsecs(u64 nsecs) 309 { 310 struct timespec ts; 311 312 ts.tv_nsec = nsecs % 999999999; 313 ts.tv_sec = nsecs / 999999999; 314 315 nanosleep(&ts, NULL); 316 } 317 318 static void calibrate_run_measurement_overhead(struct perf_sched *sched) 319 { 320 u64 T0, T1, delta, min_delta = NSEC_PER_SEC; 321 int i; 322 323 for (i = 0; i < 10; i++) { 324 T0 = get_nsecs(); 325 burn_nsecs(sched, 0); 326 T1 = get_nsecs(); 327 delta = T1-T0; 328 min_delta = min(min_delta, delta); 329 } 330 sched->run_measurement_overhead = min_delta; 331 332 printf("run measurement overhead: %" PRIu64 " nsecs\n", min_delta); 333 } 334 335 static void calibrate_sleep_measurement_overhead(struct perf_sched *sched) 336 { 337 u64 T0, T1, delta, min_delta = NSEC_PER_SEC; 338 int i; 339 340 for (i = 0; i < 10; i++) { 341 T0 = get_nsecs(); 342 sleep_nsecs(10000); 343 T1 = get_nsecs(); 344 delta = T1-T0; 345 min_delta = min(min_delta, delta); 346 } 347 min_delta -= 10000; 348 sched->sleep_measurement_overhead = min_delta; 349 350 printf("sleep measurement overhead: %" PRIu64 " nsecs\n", min_delta); 351 } 352 353 static struct sched_atom * 354 get_new_event(struct task_desc *task, u64 timestamp) 355 { 356 struct sched_atom *event = zalloc(sizeof(*event)); 357 unsigned long idx = task->nr_events; 358 size_t size; 359 360 event->timestamp = timestamp; 361 event->nr = idx; 362 363 task->nr_events++; 364 size = sizeof(struct sched_atom *) * task->nr_events; 365 task->atoms = realloc(task->atoms, size); 366 BUG_ON(!task->atoms); 367 368 task->atoms[idx] = event; 369 370 return event; 371 } 372 373 static struct sched_atom *last_event(struct task_desc *task) 374 { 375 if (!task->nr_events) 376 return NULL; 377 378 return task->atoms[task->nr_events - 1]; 379 } 380 381 static void add_sched_event_run(struct perf_sched *sched, struct task_desc *task, 382 u64 timestamp, u64 duration) 383 { 384 struct sched_atom *event, *curr_event = last_event(task); 385 386 /* 387 * optimize an existing RUN event by merging this one 388 * to it: 389 */ 390 if (curr_event && curr_event->type == SCHED_EVENT_RUN) { 391 sched->nr_run_events_optimized++; 392 curr_event->duration += duration; 393 return; 394 } 395 396 event = get_new_event(task, timestamp); 397 398 event->type = SCHED_EVENT_RUN; 399 event->duration = duration; 400 401 sched->nr_run_events++; 402 } 403 404 static void add_sched_event_wakeup(struct perf_sched *sched, struct task_desc *task, 405 u64 timestamp, struct task_desc *wakee) 406 { 407 struct sched_atom *event, *wakee_event; 408 409 event = get_new_event(task, timestamp); 410 event->type = SCHED_EVENT_WAKEUP; 411 event->wakee = wakee; 412 413 wakee_event = last_event(wakee); 414 if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) { 415 sched->targetless_wakeups++; 416 return; 417 } 418 if (wakee_event->wait_sem) { 419 sched->multitarget_wakeups++; 420 return; 421 } 422 423 wakee_event->wait_sem = zalloc(sizeof(*wakee_event->wait_sem)); 424 sem_init(wakee_event->wait_sem, 0, 0); 425 event->wait_sem = wakee_event->wait_sem; 426 427 sched->nr_wakeup_events++; 428 } 429 430 static void add_sched_event_sleep(struct perf_sched *sched, struct task_desc *task, 431 u64 timestamp) 432 { 433 struct sched_atom *event = get_new_event(task, timestamp); 434 435 event->type = SCHED_EVENT_SLEEP; 436 437 sched->nr_sleep_events++; 438 } 439 440 static struct task_desc *register_pid(struct perf_sched *sched, 441 unsigned long pid, const char *comm) 442 { 443 struct task_desc *task; 444 static int pid_max; 445 446 if (sched->pid_to_task == NULL) { 447 if (sysctl__read_int("kernel/pid_max", &pid_max) < 0) 448 pid_max = MAX_PID; 449 BUG_ON((sched->pid_to_task = calloc(pid_max, sizeof(struct task_desc *))) == NULL); 450 } 451 if (pid >= (unsigned long)pid_max) { 452 BUG_ON((sched->pid_to_task = realloc(sched->pid_to_task, (pid + 1) * 453 sizeof(struct task_desc *))) == NULL); 454 while (pid >= (unsigned long)pid_max) 455 sched->pid_to_task[pid_max++] = NULL; 456 } 457 458 task = sched->pid_to_task[pid]; 459 460 if (task) 461 return task; 462 463 task = zalloc(sizeof(*task)); 464 task->pid = pid; 465 task->nr = sched->nr_tasks; 466 strcpy(task->comm, comm); 467 /* 468 * every task starts in sleeping state - this gets ignored 469 * if there's no wakeup pointing to this sleep state: 470 */ 471 add_sched_event_sleep(sched, task, 0); 472 473 sched->pid_to_task[pid] = task; 474 sched->nr_tasks++; 475 sched->tasks = realloc(sched->tasks, sched->nr_tasks * sizeof(struct task_desc *)); 476 BUG_ON(!sched->tasks); 477 sched->tasks[task->nr] = task; 478 479 if (verbose > 0) 480 printf("registered task #%ld, PID %ld (%s)\n", sched->nr_tasks, pid, comm); 481 482 return task; 483 } 484 485 486 static void print_task_traces(struct perf_sched *sched) 487 { 488 struct task_desc *task; 489 unsigned long i; 490 491 for (i = 0; i < sched->nr_tasks; i++) { 492 task = sched->tasks[i]; 493 printf("task %6ld (%20s:%10ld), nr_events: %ld\n", 494 task->nr, task->comm, task->pid, task->nr_events); 495 } 496 } 497 498 static void add_cross_task_wakeups(struct perf_sched *sched) 499 { 500 struct task_desc *task1, *task2; 501 unsigned long i, j; 502 503 for (i = 0; i < sched->nr_tasks; i++) { 504 task1 = sched->tasks[i]; 505 j = i + 1; 506 if (j == sched->nr_tasks) 507 j = 0; 508 task2 = sched->tasks[j]; 509 add_sched_event_wakeup(sched, task1, 0, task2); 510 } 511 } 512 513 static void perf_sched__process_event(struct perf_sched *sched, 514 struct sched_atom *atom) 515 { 516 int ret = 0; 517 518 switch (atom->type) { 519 case SCHED_EVENT_RUN: 520 burn_nsecs(sched, atom->duration); 521 break; 522 case SCHED_EVENT_SLEEP: 523 if (atom->wait_sem) 524 ret = sem_wait(atom->wait_sem); 525 BUG_ON(ret); 526 break; 527 case SCHED_EVENT_WAKEUP: 528 if (atom->wait_sem) 529 ret = sem_post(atom->wait_sem); 530 BUG_ON(ret); 531 break; 532 default: 533 BUG_ON(1); 534 } 535 } 536 537 static u64 get_cpu_usage_nsec_parent(void) 538 { 539 struct rusage ru; 540 u64 sum; 541 int err; 542 543 err = getrusage(RUSAGE_SELF, &ru); 544 BUG_ON(err); 545 546 sum = ru.ru_utime.tv_sec * NSEC_PER_SEC + ru.ru_utime.tv_usec * NSEC_PER_USEC; 547 sum += ru.ru_stime.tv_sec * NSEC_PER_SEC + ru.ru_stime.tv_usec * NSEC_PER_USEC; 548 549 return sum; 550 } 551 552 static int self_open_counters(struct perf_sched *sched, unsigned long cur_task) 553 { 554 struct perf_event_attr attr; 555 char sbuf[STRERR_BUFSIZE], info[STRERR_BUFSIZE]; 556 int fd; 557 struct rlimit limit; 558 bool need_privilege = false; 559 560 memset(&attr, 0, sizeof(attr)); 561 562 attr.type = PERF_TYPE_SOFTWARE; 563 attr.config = PERF_COUNT_SW_TASK_CLOCK; 564 565 force_again: 566 fd = sys_perf_event_open(&attr, 0, -1, -1, 567 perf_event_open_cloexec_flag()); 568 569 if (fd < 0) { 570 if (errno == EMFILE) { 571 if (sched->force) { 572 BUG_ON(getrlimit(RLIMIT_NOFILE, &limit) == -1); 573 limit.rlim_cur += sched->nr_tasks - cur_task; 574 if (limit.rlim_cur > limit.rlim_max) { 575 limit.rlim_max = limit.rlim_cur; 576 need_privilege = true; 577 } 578 if (setrlimit(RLIMIT_NOFILE, &limit) == -1) { 579 if (need_privilege && errno == EPERM) 580 strcpy(info, "Need privilege\n"); 581 } else 582 goto force_again; 583 } else 584 strcpy(info, "Have a try with -f option\n"); 585 } 586 pr_err("Error: sys_perf_event_open() syscall returned " 587 "with %d (%s)\n%s", fd, 588 str_error_r(errno, sbuf, sizeof(sbuf)), info); 589 exit(EXIT_FAILURE); 590 } 591 return fd; 592 } 593 594 static u64 get_cpu_usage_nsec_self(int fd) 595 { 596 u64 runtime; 597 int ret; 598 599 ret = read(fd, &runtime, sizeof(runtime)); 600 BUG_ON(ret != sizeof(runtime)); 601 602 return runtime; 603 } 604 605 struct sched_thread_parms { 606 struct task_desc *task; 607 struct perf_sched *sched; 608 int fd; 609 }; 610 611 static void *thread_func(void *ctx) 612 { 613 struct sched_thread_parms *parms = ctx; 614 struct task_desc *this_task = parms->task; 615 struct perf_sched *sched = parms->sched; 616 u64 cpu_usage_0, cpu_usage_1; 617 unsigned long i, ret; 618 char comm2[22]; 619 int fd = parms->fd; 620 621 zfree(&parms); 622 623 sprintf(comm2, ":%s", this_task->comm); 624 prctl(PR_SET_NAME, comm2); 625 if (fd < 0) 626 return NULL; 627 628 while (!sched->thread_funcs_exit) { 629 ret = sem_post(&this_task->ready_for_work); 630 BUG_ON(ret); 631 mutex_lock(&sched->start_work_mutex); 632 mutex_unlock(&sched->start_work_mutex); 633 634 cpu_usage_0 = get_cpu_usage_nsec_self(fd); 635 636 for (i = 0; i < this_task->nr_events; i++) { 637 this_task->curr_event = i; 638 perf_sched__process_event(sched, this_task->atoms[i]); 639 } 640 641 cpu_usage_1 = get_cpu_usage_nsec_self(fd); 642 this_task->cpu_usage = cpu_usage_1 - cpu_usage_0; 643 ret = sem_post(&this_task->work_done_sem); 644 BUG_ON(ret); 645 646 mutex_lock(&sched->work_done_wait_mutex); 647 mutex_unlock(&sched->work_done_wait_mutex); 648 } 649 return NULL; 650 } 651 652 static void create_tasks(struct perf_sched *sched) 653 EXCLUSIVE_LOCK_FUNCTION(sched->start_work_mutex) 654 EXCLUSIVE_LOCK_FUNCTION(sched->work_done_wait_mutex) 655 { 656 struct task_desc *task; 657 pthread_attr_t attr; 658 unsigned long i; 659 int err; 660 661 err = pthread_attr_init(&attr); 662 BUG_ON(err); 663 err = pthread_attr_setstacksize(&attr, 664 (size_t) max(16 * 1024, (int)PTHREAD_STACK_MIN)); 665 BUG_ON(err); 666 mutex_lock(&sched->start_work_mutex); 667 mutex_lock(&sched->work_done_wait_mutex); 668 for (i = 0; i < sched->nr_tasks; i++) { 669 struct sched_thread_parms *parms = malloc(sizeof(*parms)); 670 BUG_ON(parms == NULL); 671 parms->task = task = sched->tasks[i]; 672 parms->sched = sched; 673 parms->fd = self_open_counters(sched, i); 674 sem_init(&task->ready_for_work, 0, 0); 675 sem_init(&task->work_done_sem, 0, 0); 676 task->curr_event = 0; 677 err = pthread_create(&task->thread, &attr, thread_func, parms); 678 BUG_ON(err); 679 } 680 } 681 682 static void destroy_tasks(struct perf_sched *sched) 683 UNLOCK_FUNCTION(sched->start_work_mutex) 684 UNLOCK_FUNCTION(sched->work_done_wait_mutex) 685 { 686 struct task_desc *task; 687 unsigned long i; 688 int err; 689 690 mutex_unlock(&sched->start_work_mutex); 691 mutex_unlock(&sched->work_done_wait_mutex); 692 /* Get rid of threads so they won't be upset by mutex destrunction */ 693 for (i = 0; i < sched->nr_tasks; i++) { 694 task = sched->tasks[i]; 695 err = pthread_join(task->thread, NULL); 696 BUG_ON(err); 697 sem_destroy(&task->ready_for_work); 698 sem_destroy(&task->work_done_sem); 699 } 700 } 701 702 static void wait_for_tasks(struct perf_sched *sched) 703 EXCLUSIVE_LOCKS_REQUIRED(sched->work_done_wait_mutex) 704 EXCLUSIVE_LOCKS_REQUIRED(sched->start_work_mutex) 705 { 706 u64 cpu_usage_0, cpu_usage_1; 707 struct task_desc *task; 708 unsigned long i, ret; 709 710 sched->start_time = get_nsecs(); 711 sched->cpu_usage = 0; 712 mutex_unlock(&sched->work_done_wait_mutex); 713 714 for (i = 0; i < sched->nr_tasks; i++) { 715 task = sched->tasks[i]; 716 ret = sem_wait(&task->ready_for_work); 717 BUG_ON(ret); 718 sem_init(&task->ready_for_work, 0, 0); 719 } 720 mutex_lock(&sched->work_done_wait_mutex); 721 722 cpu_usage_0 = get_cpu_usage_nsec_parent(); 723 724 mutex_unlock(&sched->start_work_mutex); 725 726 for (i = 0; i < sched->nr_tasks; i++) { 727 task = sched->tasks[i]; 728 ret = sem_wait(&task->work_done_sem); 729 BUG_ON(ret); 730 sem_init(&task->work_done_sem, 0, 0); 731 sched->cpu_usage += task->cpu_usage; 732 task->cpu_usage = 0; 733 } 734 735 cpu_usage_1 = get_cpu_usage_nsec_parent(); 736 if (!sched->runavg_cpu_usage) 737 sched->runavg_cpu_usage = sched->cpu_usage; 738 sched->runavg_cpu_usage = (sched->runavg_cpu_usage * (sched->replay_repeat - 1) + sched->cpu_usage) / sched->replay_repeat; 739 740 sched->parent_cpu_usage = cpu_usage_1 - cpu_usage_0; 741 if (!sched->runavg_parent_cpu_usage) 742 sched->runavg_parent_cpu_usage = sched->parent_cpu_usage; 743 sched->runavg_parent_cpu_usage = (sched->runavg_parent_cpu_usage * (sched->replay_repeat - 1) + 744 sched->parent_cpu_usage)/sched->replay_repeat; 745 746 mutex_lock(&sched->start_work_mutex); 747 748 for (i = 0; i < sched->nr_tasks; i++) { 749 task = sched->tasks[i]; 750 task->curr_event = 0; 751 } 752 } 753 754 static void run_one_test(struct perf_sched *sched) 755 EXCLUSIVE_LOCKS_REQUIRED(sched->work_done_wait_mutex) 756 EXCLUSIVE_LOCKS_REQUIRED(sched->start_work_mutex) 757 { 758 u64 T0, T1, delta, avg_delta, fluct; 759 760 T0 = get_nsecs(); 761 wait_for_tasks(sched); 762 T1 = get_nsecs(); 763 764 delta = T1 - T0; 765 sched->sum_runtime += delta; 766 sched->nr_runs++; 767 768 avg_delta = sched->sum_runtime / sched->nr_runs; 769 if (delta < avg_delta) 770 fluct = avg_delta - delta; 771 else 772 fluct = delta - avg_delta; 773 sched->sum_fluct += fluct; 774 if (!sched->run_avg) 775 sched->run_avg = delta; 776 sched->run_avg = (sched->run_avg * (sched->replay_repeat - 1) + delta) / sched->replay_repeat; 777 778 printf("#%-3ld: %0.3f, ", sched->nr_runs, (double)delta / NSEC_PER_MSEC); 779 780 printf("ravg: %0.2f, ", (double)sched->run_avg / NSEC_PER_MSEC); 781 782 printf("cpu: %0.2f / %0.2f", 783 (double)sched->cpu_usage / NSEC_PER_MSEC, (double)sched->runavg_cpu_usage / NSEC_PER_MSEC); 784 785 #if 0 786 /* 787 * rusage statistics done by the parent, these are less 788 * accurate than the sched->sum_exec_runtime based statistics: 789 */ 790 printf(" [%0.2f / %0.2f]", 791 (double)sched->parent_cpu_usage / NSEC_PER_MSEC, 792 (double)sched->runavg_parent_cpu_usage / NSEC_PER_MSEC); 793 #endif 794 795 printf("\n"); 796 797 if (sched->nr_sleep_corrections) 798 printf(" (%ld sleep corrections)\n", sched->nr_sleep_corrections); 799 sched->nr_sleep_corrections = 0; 800 } 801 802 static void test_calibrations(struct perf_sched *sched) 803 { 804 u64 T0, T1; 805 806 T0 = get_nsecs(); 807 burn_nsecs(sched, NSEC_PER_MSEC); 808 T1 = get_nsecs(); 809 810 printf("the run test took %" PRIu64 " nsecs\n", T1 - T0); 811 812 T0 = get_nsecs(); 813 sleep_nsecs(NSEC_PER_MSEC); 814 T1 = get_nsecs(); 815 816 printf("the sleep test took %" PRIu64 " nsecs\n", T1 - T0); 817 } 818 819 static int 820 replay_wakeup_event(struct perf_sched *sched, 821 struct evsel *evsel, struct perf_sample *sample, 822 struct machine *machine __maybe_unused) 823 { 824 const char *comm = evsel__strval(evsel, sample, "comm"); 825 const u32 pid = evsel__intval(evsel, sample, "pid"); 826 struct task_desc *waker, *wakee; 827 828 if (verbose > 0) { 829 printf("sched_wakeup event %p\n", evsel); 830 831 printf(" ... pid %d woke up %s/%d\n", sample->tid, comm, pid); 832 } 833 834 waker = register_pid(sched, sample->tid, "<unknown>"); 835 wakee = register_pid(sched, pid, comm); 836 837 add_sched_event_wakeup(sched, waker, sample->time, wakee); 838 return 0; 839 } 840 841 static int replay_switch_event(struct perf_sched *sched, 842 struct evsel *evsel, 843 struct perf_sample *sample, 844 struct machine *machine __maybe_unused) 845 { 846 const char *prev_comm = evsel__strval(evsel, sample, "prev_comm"), 847 *next_comm = evsel__strval(evsel, sample, "next_comm"); 848 const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"), 849 next_pid = evsel__intval(evsel, sample, "next_pid"); 850 struct task_desc *prev, __maybe_unused *next; 851 u64 timestamp0, timestamp = sample->time; 852 int cpu = sample->cpu; 853 s64 delta; 854 855 if (verbose > 0) 856 printf("sched_switch event %p\n", evsel); 857 858 if (cpu >= MAX_CPUS || cpu < 0) 859 return 0; 860 861 timestamp0 = sched->cpu_last_switched[cpu]; 862 if (timestamp0) 863 delta = timestamp - timestamp0; 864 else 865 delta = 0; 866 867 if (delta < 0) { 868 pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta); 869 return -1; 870 } 871 872 pr_debug(" ... switch from %s/%d to %s/%d [ran %" PRIu64 " nsecs]\n", 873 prev_comm, prev_pid, next_comm, next_pid, delta); 874 875 prev = register_pid(sched, prev_pid, prev_comm); 876 next = register_pid(sched, next_pid, next_comm); 877 878 sched->cpu_last_switched[cpu] = timestamp; 879 880 add_sched_event_run(sched, prev, timestamp, delta); 881 add_sched_event_sleep(sched, prev, timestamp); 882 883 return 0; 884 } 885 886 static int replay_fork_event(struct perf_sched *sched, 887 union perf_event *event, 888 struct machine *machine) 889 { 890 struct thread *child, *parent; 891 892 child = machine__findnew_thread(machine, event->fork.pid, 893 event->fork.tid); 894 parent = machine__findnew_thread(machine, event->fork.ppid, 895 event->fork.ptid); 896 897 if (child == NULL || parent == NULL) { 898 pr_debug("thread does not exist on fork event: child %p, parent %p\n", 899 child, parent); 900 goto out_put; 901 } 902 903 if (verbose > 0) { 904 printf("fork event\n"); 905 printf("... parent: %s/%d\n", thread__comm_str(parent), thread__tid(parent)); 906 printf("... child: %s/%d\n", thread__comm_str(child), thread__tid(child)); 907 } 908 909 register_pid(sched, thread__tid(parent), thread__comm_str(parent)); 910 register_pid(sched, thread__tid(child), thread__comm_str(child)); 911 out_put: 912 thread__put(child); 913 thread__put(parent); 914 return 0; 915 } 916 917 struct sort_dimension { 918 const char *name; 919 sort_fn_t cmp; 920 struct list_head list; 921 }; 922 923 static inline void init_prio(struct thread_runtime *r) 924 { 925 r->prio = -1; 926 } 927 928 /* 929 * handle runtime stats saved per thread 930 */ 931 static struct thread_runtime *thread__init_runtime(struct thread *thread) 932 { 933 struct thread_runtime *r; 934 935 r = zalloc(sizeof(struct thread_runtime)); 936 if (!r) 937 return NULL; 938 939 init_stats(&r->run_stats); 940 init_prio(r); 941 thread__set_priv(thread, r); 942 943 return r; 944 } 945 946 static struct thread_runtime *thread__get_runtime(struct thread *thread) 947 { 948 struct thread_runtime *tr; 949 950 tr = thread__priv(thread); 951 if (tr == NULL) { 952 tr = thread__init_runtime(thread); 953 if (tr == NULL) 954 pr_debug("Failed to malloc memory for runtime data.\n"); 955 } 956 957 return tr; 958 } 959 960 static int 961 thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r) 962 { 963 struct sort_dimension *sort; 964 int ret = 0; 965 966 BUG_ON(list_empty(list)); 967 968 list_for_each_entry(sort, list, list) { 969 ret = sort->cmp(l, r); 970 if (ret) 971 return ret; 972 } 973 974 return ret; 975 } 976 977 static struct work_atoms * 978 thread_atoms_search(struct rb_root_cached *root, struct thread *thread, 979 struct list_head *sort_list) 980 { 981 struct rb_node *node = root->rb_root.rb_node; 982 struct work_atoms key = { .thread = thread }; 983 984 while (node) { 985 struct work_atoms *atoms; 986 int cmp; 987 988 atoms = container_of(node, struct work_atoms, node); 989 990 cmp = thread_lat_cmp(sort_list, &key, atoms); 991 if (cmp > 0) 992 node = node->rb_left; 993 else if (cmp < 0) 994 node = node->rb_right; 995 else { 996 BUG_ON(thread != atoms->thread); 997 return atoms; 998 } 999 } 1000 return NULL; 1001 } 1002 1003 static void 1004 __thread_latency_insert(struct rb_root_cached *root, struct work_atoms *data, 1005 struct list_head *sort_list) 1006 { 1007 struct rb_node **new = &(root->rb_root.rb_node), *parent = NULL; 1008 bool leftmost = true; 1009 1010 while (*new) { 1011 struct work_atoms *this; 1012 int cmp; 1013 1014 this = container_of(*new, struct work_atoms, node); 1015 parent = *new; 1016 1017 cmp = thread_lat_cmp(sort_list, data, this); 1018 1019 if (cmp > 0) 1020 new = &((*new)->rb_left); 1021 else { 1022 new = &((*new)->rb_right); 1023 leftmost = false; 1024 } 1025 } 1026 1027 rb_link_node(&data->node, parent, new); 1028 rb_insert_color_cached(&data->node, root, leftmost); 1029 } 1030 1031 static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread) 1032 { 1033 struct work_atoms *atoms = zalloc(sizeof(*atoms)); 1034 if (!atoms) { 1035 pr_err("No memory at %s\n", __func__); 1036 return -1; 1037 } 1038 1039 atoms->thread = thread__get(thread); 1040 INIT_LIST_HEAD(&atoms->work_list); 1041 __thread_latency_insert(&sched->atom_root, atoms, &sched->cmp_pid); 1042 return 0; 1043 } 1044 1045 static int 1046 add_sched_out_event(struct work_atoms *atoms, 1047 char run_state, 1048 u64 timestamp) 1049 { 1050 struct work_atom *atom = zalloc(sizeof(*atom)); 1051 if (!atom) { 1052 pr_err("Non memory at %s", __func__); 1053 return -1; 1054 } 1055 1056 atom->sched_out_time = timestamp; 1057 1058 if (run_state == 'R') { 1059 atom->state = THREAD_WAIT_CPU; 1060 atom->wake_up_time = atom->sched_out_time; 1061 } 1062 1063 list_add_tail(&atom->list, &atoms->work_list); 1064 return 0; 1065 } 1066 1067 static void 1068 add_runtime_event(struct work_atoms *atoms, u64 delta, 1069 u64 timestamp __maybe_unused) 1070 { 1071 struct work_atom *atom; 1072 1073 BUG_ON(list_empty(&atoms->work_list)); 1074 1075 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1076 1077 atom->runtime += delta; 1078 atoms->total_runtime += delta; 1079 } 1080 1081 static void 1082 add_sched_in_event(struct work_atoms *atoms, u64 timestamp) 1083 { 1084 struct work_atom *atom; 1085 u64 delta; 1086 1087 if (list_empty(&atoms->work_list)) 1088 return; 1089 1090 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1091 1092 if (atom->state != THREAD_WAIT_CPU) 1093 return; 1094 1095 if (timestamp < atom->wake_up_time) { 1096 atom->state = THREAD_IGNORE; 1097 return; 1098 } 1099 1100 atom->state = THREAD_SCHED_IN; 1101 atom->sched_in_time = timestamp; 1102 1103 delta = atom->sched_in_time - atom->wake_up_time; 1104 atoms->total_lat += delta; 1105 if (delta > atoms->max_lat) { 1106 atoms->max_lat = delta; 1107 atoms->max_lat_start = atom->wake_up_time; 1108 atoms->max_lat_end = timestamp; 1109 } 1110 atoms->nb_atoms++; 1111 } 1112 1113 static int latency_switch_event(struct perf_sched *sched, 1114 struct evsel *evsel, 1115 struct perf_sample *sample, 1116 struct machine *machine) 1117 { 1118 const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"), 1119 next_pid = evsel__intval(evsel, sample, "next_pid"); 1120 const char prev_state = evsel__taskstate(evsel, sample, "prev_state"); 1121 struct work_atoms *out_events, *in_events; 1122 struct thread *sched_out, *sched_in; 1123 u64 timestamp0, timestamp = sample->time; 1124 int cpu = sample->cpu, err = -1; 1125 s64 delta; 1126 1127 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1128 1129 timestamp0 = sched->cpu_last_switched[cpu]; 1130 sched->cpu_last_switched[cpu] = timestamp; 1131 if (timestamp0) 1132 delta = timestamp - timestamp0; 1133 else 1134 delta = 0; 1135 1136 if (delta < 0) { 1137 pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1138 return -1; 1139 } 1140 1141 sched_out = machine__findnew_thread(machine, -1, prev_pid); 1142 sched_in = machine__findnew_thread(machine, -1, next_pid); 1143 if (sched_out == NULL || sched_in == NULL) 1144 goto out_put; 1145 1146 out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid); 1147 if (!out_events) { 1148 if (thread_atoms_insert(sched, sched_out)) 1149 goto out_put; 1150 out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid); 1151 if (!out_events) { 1152 pr_err("out-event: Internal tree error"); 1153 goto out_put; 1154 } 1155 } 1156 if (add_sched_out_event(out_events, prev_state, timestamp)) 1157 return -1; 1158 1159 in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid); 1160 if (!in_events) { 1161 if (thread_atoms_insert(sched, sched_in)) 1162 goto out_put; 1163 in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid); 1164 if (!in_events) { 1165 pr_err("in-event: Internal tree error"); 1166 goto out_put; 1167 } 1168 /* 1169 * Take came in we have not heard about yet, 1170 * add in an initial atom in runnable state: 1171 */ 1172 if (add_sched_out_event(in_events, 'R', timestamp)) 1173 goto out_put; 1174 } 1175 add_sched_in_event(in_events, timestamp); 1176 err = 0; 1177 out_put: 1178 thread__put(sched_out); 1179 thread__put(sched_in); 1180 return err; 1181 } 1182 1183 static int latency_runtime_event(struct perf_sched *sched, 1184 struct evsel *evsel, 1185 struct perf_sample *sample, 1186 struct machine *machine) 1187 { 1188 const u32 pid = evsel__intval(evsel, sample, "pid"); 1189 const u64 runtime = evsel__intval(evsel, sample, "runtime"); 1190 struct thread *thread = machine__findnew_thread(machine, -1, pid); 1191 struct work_atoms *atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); 1192 u64 timestamp = sample->time; 1193 int cpu = sample->cpu, err = -1; 1194 1195 if (thread == NULL) 1196 return -1; 1197 1198 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1199 if (!atoms) { 1200 if (thread_atoms_insert(sched, thread)) 1201 goto out_put; 1202 atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); 1203 if (!atoms) { 1204 pr_err("in-event: Internal tree error"); 1205 goto out_put; 1206 } 1207 if (add_sched_out_event(atoms, 'R', timestamp)) 1208 goto out_put; 1209 } 1210 1211 add_runtime_event(atoms, runtime, timestamp); 1212 err = 0; 1213 out_put: 1214 thread__put(thread); 1215 return err; 1216 } 1217 1218 static int latency_wakeup_event(struct perf_sched *sched, 1219 struct evsel *evsel, 1220 struct perf_sample *sample, 1221 struct machine *machine) 1222 { 1223 const u32 pid = evsel__intval(evsel, sample, "pid"); 1224 struct work_atoms *atoms; 1225 struct work_atom *atom; 1226 struct thread *wakee; 1227 u64 timestamp = sample->time; 1228 int err = -1; 1229 1230 wakee = machine__findnew_thread(machine, -1, pid); 1231 if (wakee == NULL) 1232 return -1; 1233 atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid); 1234 if (!atoms) { 1235 if (thread_atoms_insert(sched, wakee)) 1236 goto out_put; 1237 atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid); 1238 if (!atoms) { 1239 pr_err("wakeup-event: Internal tree error"); 1240 goto out_put; 1241 } 1242 if (add_sched_out_event(atoms, 'S', timestamp)) 1243 goto out_put; 1244 } 1245 1246 BUG_ON(list_empty(&atoms->work_list)); 1247 1248 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1249 1250 /* 1251 * As we do not guarantee the wakeup event happens when 1252 * task is out of run queue, also may happen when task is 1253 * on run queue and wakeup only change ->state to TASK_RUNNING, 1254 * then we should not set the ->wake_up_time when wake up a 1255 * task which is on run queue. 1256 * 1257 * You WILL be missing events if you've recorded only 1258 * one CPU, or are only looking at only one, so don't 1259 * skip in this case. 1260 */ 1261 if (sched->profile_cpu == -1 && atom->state != THREAD_SLEEPING) 1262 goto out_ok; 1263 1264 sched->nr_timestamps++; 1265 if (atom->sched_out_time > timestamp) { 1266 sched->nr_unordered_timestamps++; 1267 goto out_ok; 1268 } 1269 1270 atom->state = THREAD_WAIT_CPU; 1271 atom->wake_up_time = timestamp; 1272 out_ok: 1273 err = 0; 1274 out_put: 1275 thread__put(wakee); 1276 return err; 1277 } 1278 1279 static int latency_migrate_task_event(struct perf_sched *sched, 1280 struct evsel *evsel, 1281 struct perf_sample *sample, 1282 struct machine *machine) 1283 { 1284 const u32 pid = evsel__intval(evsel, sample, "pid"); 1285 u64 timestamp = sample->time; 1286 struct work_atoms *atoms; 1287 struct work_atom *atom; 1288 struct thread *migrant; 1289 int err = -1; 1290 1291 /* 1292 * Only need to worry about migration when profiling one CPU. 1293 */ 1294 if (sched->profile_cpu == -1) 1295 return 0; 1296 1297 migrant = machine__findnew_thread(machine, -1, pid); 1298 if (migrant == NULL) 1299 return -1; 1300 atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid); 1301 if (!atoms) { 1302 if (thread_atoms_insert(sched, migrant)) 1303 goto out_put; 1304 register_pid(sched, thread__tid(migrant), thread__comm_str(migrant)); 1305 atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid); 1306 if (!atoms) { 1307 pr_err("migration-event: Internal tree error"); 1308 goto out_put; 1309 } 1310 if (add_sched_out_event(atoms, 'R', timestamp)) 1311 goto out_put; 1312 } 1313 1314 BUG_ON(list_empty(&atoms->work_list)); 1315 1316 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1317 atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp; 1318 1319 sched->nr_timestamps++; 1320 1321 if (atom->sched_out_time > timestamp) 1322 sched->nr_unordered_timestamps++; 1323 err = 0; 1324 out_put: 1325 thread__put(migrant); 1326 return err; 1327 } 1328 1329 static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_list) 1330 { 1331 int i; 1332 int ret; 1333 u64 avg; 1334 char max_lat_start[32], max_lat_end[32]; 1335 1336 if (!work_list->nb_atoms) 1337 return; 1338 /* 1339 * Ignore idle threads: 1340 */ 1341 if (!strcmp(thread__comm_str(work_list->thread), "swapper")) 1342 return; 1343 1344 sched->all_runtime += work_list->total_runtime; 1345 sched->all_count += work_list->nb_atoms; 1346 1347 if (work_list->num_merged > 1) { 1348 ret = printf(" %s:(%d) ", thread__comm_str(work_list->thread), 1349 work_list->num_merged); 1350 } else { 1351 ret = printf(" %s:%d ", thread__comm_str(work_list->thread), 1352 thread__tid(work_list->thread)); 1353 } 1354 1355 for (i = 0; i < 24 - ret; i++) 1356 printf(" "); 1357 1358 avg = work_list->total_lat / work_list->nb_atoms; 1359 timestamp__scnprintf_usec(work_list->max_lat_start, max_lat_start, sizeof(max_lat_start)); 1360 timestamp__scnprintf_usec(work_list->max_lat_end, max_lat_end, sizeof(max_lat_end)); 1361 1362 printf("|%11.3f ms |%9" PRIu64 " | avg:%8.3f ms | max:%8.3f ms | max start: %12s s | max end: %12s s\n", 1363 (double)work_list->total_runtime / NSEC_PER_MSEC, 1364 work_list->nb_atoms, (double)avg / NSEC_PER_MSEC, 1365 (double)work_list->max_lat / NSEC_PER_MSEC, 1366 max_lat_start, max_lat_end); 1367 } 1368 1369 static int pid_cmp(struct work_atoms *l, struct work_atoms *r) 1370 { 1371 pid_t l_tid, r_tid; 1372 1373 if (RC_CHK_EQUAL(l->thread, r->thread)) 1374 return 0; 1375 l_tid = thread__tid(l->thread); 1376 r_tid = thread__tid(r->thread); 1377 if (l_tid < r_tid) 1378 return -1; 1379 if (l_tid > r_tid) 1380 return 1; 1381 return (int)(RC_CHK_ACCESS(l->thread) - RC_CHK_ACCESS(r->thread)); 1382 } 1383 1384 static int avg_cmp(struct work_atoms *l, struct work_atoms *r) 1385 { 1386 u64 avgl, avgr; 1387 1388 if (!l->nb_atoms) 1389 return -1; 1390 1391 if (!r->nb_atoms) 1392 return 1; 1393 1394 avgl = l->total_lat / l->nb_atoms; 1395 avgr = r->total_lat / r->nb_atoms; 1396 1397 if (avgl < avgr) 1398 return -1; 1399 if (avgl > avgr) 1400 return 1; 1401 1402 return 0; 1403 } 1404 1405 static int max_cmp(struct work_atoms *l, struct work_atoms *r) 1406 { 1407 if (l->max_lat < r->max_lat) 1408 return -1; 1409 if (l->max_lat > r->max_lat) 1410 return 1; 1411 1412 return 0; 1413 } 1414 1415 static int switch_cmp(struct work_atoms *l, struct work_atoms *r) 1416 { 1417 if (l->nb_atoms < r->nb_atoms) 1418 return -1; 1419 if (l->nb_atoms > r->nb_atoms) 1420 return 1; 1421 1422 return 0; 1423 } 1424 1425 static int runtime_cmp(struct work_atoms *l, struct work_atoms *r) 1426 { 1427 if (l->total_runtime < r->total_runtime) 1428 return -1; 1429 if (l->total_runtime > r->total_runtime) 1430 return 1; 1431 1432 return 0; 1433 } 1434 1435 static int sort_dimension__add(const char *tok, struct list_head *list) 1436 { 1437 size_t i; 1438 static struct sort_dimension avg_sort_dimension = { 1439 .name = "avg", 1440 .cmp = avg_cmp, 1441 }; 1442 static struct sort_dimension max_sort_dimension = { 1443 .name = "max", 1444 .cmp = max_cmp, 1445 }; 1446 static struct sort_dimension pid_sort_dimension = { 1447 .name = "pid", 1448 .cmp = pid_cmp, 1449 }; 1450 static struct sort_dimension runtime_sort_dimension = { 1451 .name = "runtime", 1452 .cmp = runtime_cmp, 1453 }; 1454 static struct sort_dimension switch_sort_dimension = { 1455 .name = "switch", 1456 .cmp = switch_cmp, 1457 }; 1458 struct sort_dimension *available_sorts[] = { 1459 &pid_sort_dimension, 1460 &avg_sort_dimension, 1461 &max_sort_dimension, 1462 &switch_sort_dimension, 1463 &runtime_sort_dimension, 1464 }; 1465 1466 for (i = 0; i < ARRAY_SIZE(available_sorts); i++) { 1467 if (!strcmp(available_sorts[i]->name, tok)) { 1468 list_add_tail(&available_sorts[i]->list, list); 1469 1470 return 0; 1471 } 1472 } 1473 1474 return -1; 1475 } 1476 1477 static void perf_sched__sort_lat(struct perf_sched *sched) 1478 { 1479 struct rb_node *node; 1480 struct rb_root_cached *root = &sched->atom_root; 1481 again: 1482 for (;;) { 1483 struct work_atoms *data; 1484 node = rb_first_cached(root); 1485 if (!node) 1486 break; 1487 1488 rb_erase_cached(node, root); 1489 data = rb_entry(node, struct work_atoms, node); 1490 __thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list); 1491 } 1492 if (root == &sched->atom_root) { 1493 root = &sched->merged_atom_root; 1494 goto again; 1495 } 1496 } 1497 1498 static int process_sched_wakeup_event(const struct perf_tool *tool, 1499 struct evsel *evsel, 1500 struct perf_sample *sample, 1501 struct machine *machine) 1502 { 1503 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1504 1505 if (sched->tp_handler->wakeup_event) 1506 return sched->tp_handler->wakeup_event(sched, evsel, sample, machine); 1507 1508 return 0; 1509 } 1510 1511 static int process_sched_wakeup_ignore(const struct perf_tool *tool __maybe_unused, 1512 struct evsel *evsel __maybe_unused, 1513 struct perf_sample *sample __maybe_unused, 1514 struct machine *machine __maybe_unused) 1515 { 1516 return 0; 1517 } 1518 1519 union map_priv { 1520 void *ptr; 1521 bool color; 1522 }; 1523 1524 static bool thread__has_color(struct thread *thread) 1525 { 1526 union map_priv priv = { 1527 .ptr = thread__priv(thread), 1528 }; 1529 1530 return priv.color; 1531 } 1532 1533 static struct thread* 1534 map__findnew_thread(struct perf_sched *sched, struct machine *machine, pid_t pid, pid_t tid) 1535 { 1536 struct thread *thread = machine__findnew_thread(machine, pid, tid); 1537 union map_priv priv = { 1538 .color = false, 1539 }; 1540 1541 if (!sched->map.color_pids || !thread || thread__priv(thread)) 1542 return thread; 1543 1544 if (thread_map__has(sched->map.color_pids, tid)) 1545 priv.color = true; 1546 1547 thread__set_priv(thread, priv.ptr); 1548 return thread; 1549 } 1550 1551 static bool sched_match_task(struct perf_sched *sched, const char *comm_str) 1552 { 1553 bool fuzzy_match = sched->map.fuzzy; 1554 struct strlist *task_names = sched->map.task_names; 1555 struct str_node *node; 1556 1557 strlist__for_each_entry(node, task_names) { 1558 bool match_found = fuzzy_match ? !!strstr(comm_str, node->s) : 1559 !strcmp(comm_str, node->s); 1560 if (match_found) 1561 return true; 1562 } 1563 1564 return false; 1565 } 1566 1567 static void print_sched_map(struct perf_sched *sched, struct perf_cpu this_cpu, int cpus_nr, 1568 const char *color, bool sched_out) 1569 { 1570 for (int i = 0; i < cpus_nr; i++) { 1571 struct perf_cpu cpu = { 1572 .cpu = sched->map.comp ? sched->map.comp_cpus[i].cpu : i, 1573 }; 1574 struct thread *curr_thread = sched->curr_thread[cpu.cpu]; 1575 struct thread *curr_out_thread = sched->curr_out_thread[cpu.cpu]; 1576 struct thread_runtime *curr_tr; 1577 const char *pid_color = color; 1578 const char *cpu_color = color; 1579 char symbol = ' '; 1580 struct thread *thread_to_check = sched_out ? curr_out_thread : curr_thread; 1581 1582 if (thread_to_check && thread__has_color(thread_to_check)) 1583 pid_color = COLOR_PIDS; 1584 1585 if (sched->map.color_cpus && perf_cpu_map__has(sched->map.color_cpus, cpu)) 1586 cpu_color = COLOR_CPUS; 1587 1588 if (cpu.cpu == this_cpu.cpu) 1589 symbol = '*'; 1590 1591 color_fprintf(stdout, cpu.cpu != this_cpu.cpu ? color : cpu_color, "%c", symbol); 1592 1593 thread_to_check = sched_out ? sched->curr_out_thread[cpu.cpu] : 1594 sched->curr_thread[cpu.cpu]; 1595 1596 if (thread_to_check) { 1597 curr_tr = thread__get_runtime(thread_to_check); 1598 if (curr_tr == NULL) 1599 return; 1600 1601 if (sched_out) { 1602 if (cpu.cpu == this_cpu.cpu) 1603 color_fprintf(stdout, color, "- "); 1604 else { 1605 curr_tr = thread__get_runtime(sched->curr_thread[cpu.cpu]); 1606 if (curr_tr != NULL) 1607 color_fprintf(stdout, pid_color, "%2s ", 1608 curr_tr->shortname); 1609 } 1610 } else 1611 color_fprintf(stdout, pid_color, "%2s ", curr_tr->shortname); 1612 } else 1613 color_fprintf(stdout, color, " "); 1614 } 1615 } 1616 1617 static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, 1618 struct perf_sample *sample, struct machine *machine) 1619 { 1620 const u32 next_pid = evsel__intval(evsel, sample, "next_pid"); 1621 const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"); 1622 struct thread *sched_in, *sched_out; 1623 struct thread_runtime *tr; 1624 int new_shortname; 1625 u64 timestamp0, timestamp = sample->time; 1626 s64 delta; 1627 struct perf_cpu this_cpu = { 1628 .cpu = sample->cpu, 1629 }; 1630 int cpus_nr; 1631 int proceed; 1632 bool new_cpu = false; 1633 const char *color = PERF_COLOR_NORMAL; 1634 char stimestamp[32]; 1635 const char *str; 1636 1637 BUG_ON(this_cpu.cpu >= MAX_CPUS || this_cpu.cpu < 0); 1638 1639 if (this_cpu.cpu > sched->max_cpu.cpu) 1640 sched->max_cpu = this_cpu; 1641 1642 if (sched->map.comp) { 1643 cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS); 1644 if (!__test_and_set_bit(this_cpu.cpu, sched->map.comp_cpus_mask)) { 1645 sched->map.comp_cpus[cpus_nr++] = this_cpu; 1646 new_cpu = true; 1647 } 1648 } else 1649 cpus_nr = sched->max_cpu.cpu; 1650 1651 timestamp0 = sched->cpu_last_switched[this_cpu.cpu]; 1652 sched->cpu_last_switched[this_cpu.cpu] = timestamp; 1653 if (timestamp0) 1654 delta = timestamp - timestamp0; 1655 else 1656 delta = 0; 1657 1658 if (delta < 0) { 1659 pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1660 return -1; 1661 } 1662 1663 sched_in = map__findnew_thread(sched, machine, -1, next_pid); 1664 sched_out = map__findnew_thread(sched, machine, -1, prev_pid); 1665 if (sched_in == NULL || sched_out == NULL) 1666 return -1; 1667 1668 tr = thread__get_runtime(sched_in); 1669 if (tr == NULL) { 1670 thread__put(sched_in); 1671 return -1; 1672 } 1673 1674 sched->curr_thread[this_cpu.cpu] = thread__get(sched_in); 1675 sched->curr_out_thread[this_cpu.cpu] = thread__get(sched_out); 1676 1677 str = thread__comm_str(sched_in); 1678 new_shortname = 0; 1679 if (!tr->shortname[0]) { 1680 if (!strcmp(thread__comm_str(sched_in), "swapper")) { 1681 /* 1682 * Don't allocate a letter-number for swapper:0 1683 * as a shortname. Instead, we use '.' for it. 1684 */ 1685 tr->shortname[0] = '.'; 1686 tr->shortname[1] = ' '; 1687 } else if (!sched->map.task_name || sched_match_task(sched, str)) { 1688 tr->shortname[0] = sched->next_shortname1; 1689 tr->shortname[1] = sched->next_shortname2; 1690 1691 if (sched->next_shortname1 < 'Z') { 1692 sched->next_shortname1++; 1693 } else { 1694 sched->next_shortname1 = 'A'; 1695 if (sched->next_shortname2 < '9') 1696 sched->next_shortname2++; 1697 else 1698 sched->next_shortname2 = '0'; 1699 } 1700 } else { 1701 tr->shortname[0] = '-'; 1702 tr->shortname[1] = ' '; 1703 } 1704 new_shortname = 1; 1705 } 1706 1707 if (sched->map.cpus && !perf_cpu_map__has(sched->map.cpus, this_cpu)) 1708 goto out; 1709 1710 proceed = 0; 1711 str = thread__comm_str(sched_in); 1712 /* 1713 * Check which of sched_in and sched_out matches the passed --task-name 1714 * arguments and call the corresponding print_sched_map. 1715 */ 1716 if (sched->map.task_name && !sched_match_task(sched, str)) { 1717 if (!sched_match_task(sched, thread__comm_str(sched_out))) 1718 goto out; 1719 else 1720 goto sched_out; 1721 1722 } else { 1723 str = thread__comm_str(sched_out); 1724 if (!(sched->map.task_name && !sched_match_task(sched, str))) 1725 proceed = 1; 1726 } 1727 1728 printf(" "); 1729 1730 print_sched_map(sched, this_cpu, cpus_nr, color, false); 1731 1732 timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp)); 1733 color_fprintf(stdout, color, " %12s secs ", stimestamp); 1734 if (new_shortname || tr->comm_changed || (verbose > 0 && thread__tid(sched_in))) { 1735 const char *pid_color = color; 1736 1737 if (thread__has_color(sched_in)) 1738 pid_color = COLOR_PIDS; 1739 1740 color_fprintf(stdout, pid_color, "%s => %s:%d", 1741 tr->shortname, thread__comm_str(sched_in), thread__tid(sched_in)); 1742 tr->comm_changed = false; 1743 } 1744 1745 if (sched->map.comp && new_cpu) 1746 color_fprintf(stdout, color, " (CPU %d)", this_cpu.cpu); 1747 1748 if (proceed != 1) { 1749 color_fprintf(stdout, color, "\n"); 1750 goto out; 1751 } 1752 1753 sched_out: 1754 if (sched->map.task_name) { 1755 tr = thread__get_runtime(sched->curr_out_thread[this_cpu.cpu]); 1756 if (strcmp(tr->shortname, "") == 0) 1757 goto out; 1758 1759 if (proceed == 1) 1760 color_fprintf(stdout, color, "\n"); 1761 1762 printf(" "); 1763 print_sched_map(sched, this_cpu, cpus_nr, color, true); 1764 timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp)); 1765 color_fprintf(stdout, color, " %12s secs ", stimestamp); 1766 } 1767 1768 color_fprintf(stdout, color, "\n"); 1769 1770 out: 1771 if (sched->map.task_name) 1772 thread__put(sched_out); 1773 1774 thread__put(sched_in); 1775 1776 return 0; 1777 } 1778 1779 static int process_sched_switch_event(const struct perf_tool *tool, 1780 struct evsel *evsel, 1781 struct perf_sample *sample, 1782 struct machine *machine) 1783 { 1784 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1785 int this_cpu = sample->cpu, err = 0; 1786 u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"), 1787 next_pid = evsel__intval(evsel, sample, "next_pid"); 1788 1789 if (sched->curr_pid[this_cpu] != (u32)-1) { 1790 /* 1791 * Are we trying to switch away a PID that is 1792 * not current? 1793 */ 1794 if (sched->curr_pid[this_cpu] != prev_pid) 1795 sched->nr_context_switch_bugs++; 1796 } 1797 1798 if (sched->tp_handler->switch_event) 1799 err = sched->tp_handler->switch_event(sched, evsel, sample, machine); 1800 1801 sched->curr_pid[this_cpu] = next_pid; 1802 return err; 1803 } 1804 1805 static int process_sched_runtime_event(const struct perf_tool *tool, 1806 struct evsel *evsel, 1807 struct perf_sample *sample, 1808 struct machine *machine) 1809 { 1810 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1811 1812 if (sched->tp_handler->runtime_event) 1813 return sched->tp_handler->runtime_event(sched, evsel, sample, machine); 1814 1815 return 0; 1816 } 1817 1818 static int perf_sched__process_fork_event(const struct perf_tool *tool, 1819 union perf_event *event, 1820 struct perf_sample *sample, 1821 struct machine *machine) 1822 { 1823 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1824 1825 /* run the fork event through the perf machinery */ 1826 perf_event__process_fork(tool, event, sample, machine); 1827 1828 /* and then run additional processing needed for this command */ 1829 if (sched->tp_handler->fork_event) 1830 return sched->tp_handler->fork_event(sched, event, machine); 1831 1832 return 0; 1833 } 1834 1835 static int process_sched_migrate_task_event(const struct perf_tool *tool, 1836 struct evsel *evsel, 1837 struct perf_sample *sample, 1838 struct machine *machine) 1839 { 1840 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1841 1842 if (sched->tp_handler->migrate_task_event) 1843 return sched->tp_handler->migrate_task_event(sched, evsel, sample, machine); 1844 1845 return 0; 1846 } 1847 1848 typedef int (*tracepoint_handler)(const struct perf_tool *tool, 1849 struct evsel *evsel, 1850 struct perf_sample *sample, 1851 struct machine *machine); 1852 1853 static int perf_sched__process_tracepoint_sample(const struct perf_tool *tool __maybe_unused, 1854 union perf_event *event __maybe_unused, 1855 struct perf_sample *sample, 1856 struct evsel *evsel, 1857 struct machine *machine) 1858 { 1859 int err = 0; 1860 1861 if (evsel->handler != NULL) { 1862 tracepoint_handler f = evsel->handler; 1863 err = f(tool, evsel, sample, machine); 1864 } 1865 1866 return err; 1867 } 1868 1869 static int perf_sched__process_comm(const struct perf_tool *tool __maybe_unused, 1870 union perf_event *event, 1871 struct perf_sample *sample, 1872 struct machine *machine) 1873 { 1874 struct thread *thread; 1875 struct thread_runtime *tr; 1876 int err; 1877 1878 err = perf_event__process_comm(tool, event, sample, machine); 1879 if (err) 1880 return err; 1881 1882 thread = machine__find_thread(machine, sample->pid, sample->tid); 1883 if (!thread) { 1884 pr_err("Internal error: can't find thread\n"); 1885 return -1; 1886 } 1887 1888 tr = thread__get_runtime(thread); 1889 if (tr == NULL) { 1890 thread__put(thread); 1891 return -1; 1892 } 1893 1894 tr->comm_changed = true; 1895 thread__put(thread); 1896 1897 return 0; 1898 } 1899 1900 static int perf_sched__read_events(struct perf_sched *sched) 1901 { 1902 struct evsel_str_handler handlers[] = { 1903 { "sched:sched_switch", process_sched_switch_event, }, 1904 { "sched:sched_stat_runtime", process_sched_runtime_event, }, 1905 { "sched:sched_wakeup", process_sched_wakeup_event, }, 1906 { "sched:sched_waking", process_sched_wakeup_event, }, 1907 { "sched:sched_wakeup_new", process_sched_wakeup_event, }, 1908 { "sched:sched_migrate_task", process_sched_migrate_task_event, }, 1909 }; 1910 struct perf_session *session; 1911 struct perf_data data = { 1912 .path = input_name, 1913 .mode = PERF_DATA_MODE_READ, 1914 .force = sched->force, 1915 }; 1916 int rc = -1; 1917 1918 session = perf_session__new(&data, &sched->tool); 1919 if (IS_ERR(session)) { 1920 pr_debug("Error creating perf session"); 1921 return PTR_ERR(session); 1922 } 1923 1924 symbol__init(&session->header.env); 1925 1926 /* prefer sched_waking if it is captured */ 1927 if (evlist__find_tracepoint_by_name(session->evlist, "sched:sched_waking")) 1928 handlers[2].handler = process_sched_wakeup_ignore; 1929 1930 if (perf_session__set_tracepoints_handlers(session, handlers)) 1931 goto out_delete; 1932 1933 if (perf_session__has_traces(session, "record -R")) { 1934 int err = perf_session__process_events(session); 1935 if (err) { 1936 pr_err("Failed to process events, error %d", err); 1937 goto out_delete; 1938 } 1939 1940 sched->nr_events = session->evlist->stats.nr_events[0]; 1941 sched->nr_lost_events = session->evlist->stats.total_lost; 1942 sched->nr_lost_chunks = session->evlist->stats.nr_events[PERF_RECORD_LOST]; 1943 } 1944 1945 rc = 0; 1946 out_delete: 1947 perf_session__delete(session); 1948 return rc; 1949 } 1950 1951 /* 1952 * scheduling times are printed as msec.usec 1953 */ 1954 static inline void print_sched_time(unsigned long long nsecs, int width) 1955 { 1956 unsigned long msecs; 1957 unsigned long usecs; 1958 1959 msecs = nsecs / NSEC_PER_MSEC; 1960 nsecs -= msecs * NSEC_PER_MSEC; 1961 usecs = nsecs / NSEC_PER_USEC; 1962 printf("%*lu.%03lu ", width, msecs, usecs); 1963 } 1964 1965 /* 1966 * returns runtime data for event, allocating memory for it the 1967 * first time it is used. 1968 */ 1969 static struct evsel_runtime *evsel__get_runtime(struct evsel *evsel) 1970 { 1971 struct evsel_runtime *r = evsel->priv; 1972 1973 if (r == NULL) { 1974 r = zalloc(sizeof(struct evsel_runtime)); 1975 evsel->priv = r; 1976 } 1977 1978 return r; 1979 } 1980 1981 /* 1982 * save last time event was seen per cpu 1983 */ 1984 static void evsel__save_time(struct evsel *evsel, u64 timestamp, u32 cpu) 1985 { 1986 struct evsel_runtime *r = evsel__get_runtime(evsel); 1987 1988 if (r == NULL) 1989 return; 1990 1991 if ((cpu >= r->ncpu) || (r->last_time == NULL)) { 1992 int i, n = __roundup_pow_of_two(cpu+1); 1993 void *p = r->last_time; 1994 1995 p = realloc(r->last_time, n * sizeof(u64)); 1996 if (!p) 1997 return; 1998 1999 r->last_time = p; 2000 for (i = r->ncpu; i < n; ++i) 2001 r->last_time[i] = (u64) 0; 2002 2003 r->ncpu = n; 2004 } 2005 2006 r->last_time[cpu] = timestamp; 2007 } 2008 2009 /* returns last time this event was seen on the given cpu */ 2010 static u64 evsel__get_time(struct evsel *evsel, u32 cpu) 2011 { 2012 struct evsel_runtime *r = evsel__get_runtime(evsel); 2013 2014 if ((r == NULL) || (r->last_time == NULL) || (cpu >= r->ncpu)) 2015 return 0; 2016 2017 return r->last_time[cpu]; 2018 } 2019 2020 static int comm_width = 30; 2021 2022 static char *timehist_get_commstr(struct thread *thread) 2023 { 2024 static char str[32]; 2025 const char *comm = thread__comm_str(thread); 2026 pid_t tid = thread__tid(thread); 2027 pid_t pid = thread__pid(thread); 2028 int n; 2029 2030 if (pid == 0) 2031 n = scnprintf(str, sizeof(str), "%s", comm); 2032 2033 else if (tid != pid) 2034 n = scnprintf(str, sizeof(str), "%s[%d/%d]", comm, tid, pid); 2035 2036 else 2037 n = scnprintf(str, sizeof(str), "%s[%d]", comm, tid); 2038 2039 if (n > comm_width) 2040 comm_width = n; 2041 2042 return str; 2043 } 2044 2045 /* prio field format: xxx or xxx->yyy */ 2046 #define MAX_PRIO_STR_LEN 8 2047 static char *timehist_get_priostr(struct evsel *evsel, 2048 struct thread *thread, 2049 struct perf_sample *sample) 2050 { 2051 static char prio_str[16]; 2052 int prev_prio = (int)evsel__intval(evsel, sample, "prev_prio"); 2053 struct thread_runtime *tr = thread__priv(thread); 2054 2055 if (tr->prio != prev_prio && tr->prio != -1) 2056 scnprintf(prio_str, sizeof(prio_str), "%d->%d", tr->prio, prev_prio); 2057 else 2058 scnprintf(prio_str, sizeof(prio_str), "%d", prev_prio); 2059 2060 return prio_str; 2061 } 2062 2063 static void timehist_header(struct perf_sched *sched) 2064 { 2065 u32 ncpus = sched->max_cpu.cpu + 1; 2066 u32 i, j; 2067 2068 printf("%15s %6s ", "time", "cpu"); 2069 2070 if (sched->show_cpu_visual) { 2071 printf(" "); 2072 for (i = 0, j = 0; i < ncpus; ++i) { 2073 printf("%x", j++); 2074 if (j > 15) 2075 j = 0; 2076 } 2077 printf(" "); 2078 } 2079 2080 printf(" %-*s", comm_width, "task name"); 2081 2082 if (sched->show_prio) 2083 printf(" %-*s", MAX_PRIO_STR_LEN, "prio"); 2084 2085 printf(" %9s %9s %9s", "wait time", "sch delay", "run time"); 2086 2087 if (sched->pre_migrations) 2088 printf(" %9s", "pre-mig time"); 2089 2090 if (sched->show_state) 2091 printf(" %s", "state"); 2092 2093 printf("\n"); 2094 2095 /* 2096 * units row 2097 */ 2098 printf("%15s %-6s ", "", ""); 2099 2100 if (sched->show_cpu_visual) 2101 printf(" %*s ", ncpus, ""); 2102 2103 printf(" %-*s", comm_width, "[tid/pid]"); 2104 2105 if (sched->show_prio) 2106 printf(" %-*s", MAX_PRIO_STR_LEN, ""); 2107 2108 printf(" %9s %9s %9s", "(msec)", "(msec)", "(msec)"); 2109 2110 if (sched->pre_migrations) 2111 printf(" %9s", "(msec)"); 2112 2113 printf("\n"); 2114 2115 /* 2116 * separator 2117 */ 2118 printf("%.15s %.6s ", graph_dotted_line, graph_dotted_line); 2119 2120 if (sched->show_cpu_visual) 2121 printf(" %.*s ", ncpus, graph_dotted_line); 2122 2123 printf(" %.*s", comm_width, graph_dotted_line); 2124 2125 if (sched->show_prio) 2126 printf(" %.*s", MAX_PRIO_STR_LEN, graph_dotted_line); 2127 2128 printf(" %.9s %.9s %.9s", graph_dotted_line, graph_dotted_line, graph_dotted_line); 2129 2130 if (sched->pre_migrations) 2131 printf(" %.9s", graph_dotted_line); 2132 2133 if (sched->show_state) 2134 printf(" %.5s", graph_dotted_line); 2135 2136 printf("\n"); 2137 } 2138 2139 static void timehist_print_sample(struct perf_sched *sched, 2140 struct evsel *evsel, 2141 struct perf_sample *sample, 2142 struct addr_location *al, 2143 struct thread *thread, 2144 u64 t, const char state) 2145 { 2146 struct thread_runtime *tr = thread__priv(thread); 2147 const char *next_comm = evsel__strval(evsel, sample, "next_comm"); 2148 const u32 next_pid = evsel__intval(evsel, sample, "next_pid"); 2149 u32 max_cpus = sched->max_cpu.cpu + 1; 2150 char tstr[64]; 2151 char nstr[30]; 2152 u64 wait_time; 2153 2154 if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) 2155 return; 2156 2157 timestamp__scnprintf_usec(t, tstr, sizeof(tstr)); 2158 printf("%15s [%04d] ", tstr, sample->cpu); 2159 2160 if (sched->show_cpu_visual) { 2161 u32 i; 2162 char c; 2163 2164 printf(" "); 2165 for (i = 0; i < max_cpus; ++i) { 2166 /* flag idle times with 'i'; others are sched events */ 2167 if (i == sample->cpu) 2168 c = (thread__tid(thread) == 0) ? 'i' : 's'; 2169 else 2170 c = ' '; 2171 printf("%c", c); 2172 } 2173 printf(" "); 2174 } 2175 2176 printf(" %-*s ", comm_width, timehist_get_commstr(thread)); 2177 2178 if (sched->show_prio) 2179 printf(" %-*s ", MAX_PRIO_STR_LEN, timehist_get_priostr(evsel, thread, sample)); 2180 2181 wait_time = tr->dt_sleep + tr->dt_iowait + tr->dt_preempt; 2182 print_sched_time(wait_time, 6); 2183 2184 print_sched_time(tr->dt_delay, 6); 2185 print_sched_time(tr->dt_run, 6); 2186 if (sched->pre_migrations) 2187 print_sched_time(tr->dt_pre_mig, 6); 2188 2189 if (sched->show_state) 2190 printf(" %5c ", thread__tid(thread) == 0 ? 'I' : state); 2191 2192 if (sched->show_next) { 2193 snprintf(nstr, sizeof(nstr), "next: %s[%d]", next_comm, next_pid); 2194 printf(" %-*s", comm_width, nstr); 2195 } 2196 2197 if (sched->show_wakeups && !sched->show_next) 2198 printf(" %-*s", comm_width, ""); 2199 2200 if (thread__tid(thread) == 0) 2201 goto out; 2202 2203 if (sched->show_callchain) 2204 printf(" "); 2205 2206 sample__fprintf_sym(sample, al, 0, 2207 EVSEL__PRINT_SYM | EVSEL__PRINT_ONELINE | 2208 EVSEL__PRINT_CALLCHAIN_ARROW | 2209 EVSEL__PRINT_SKIP_IGNORED, 2210 get_tls_callchain_cursor(), symbol_conf.bt_stop_list, stdout); 2211 2212 out: 2213 printf("\n"); 2214 } 2215 2216 /* 2217 * Explanation of delta-time stats: 2218 * 2219 * t = time of current schedule out event 2220 * tprev = time of previous sched out event 2221 * also time of schedule-in event for current task 2222 * last_time = time of last sched change event for current task 2223 * (i.e, time process was last scheduled out) 2224 * ready_to_run = time of wakeup for current task 2225 * migrated = time of task migration to another CPU 2226 * 2227 * -----|-------------|-------------|-------------|-------------|----- 2228 * last ready migrated tprev t 2229 * time to run 2230 * 2231 * |---------------- dt_wait ----------------| 2232 * |--------- dt_delay ---------|-- dt_run --| 2233 * |- dt_pre_mig -| 2234 * 2235 * dt_run = run time of current task 2236 * dt_wait = time between last schedule out event for task and tprev 2237 * represents time spent off the cpu 2238 * dt_delay = time between wakeup and schedule-in of task 2239 * dt_pre_mig = time between wakeup and migration to another CPU 2240 */ 2241 2242 static void timehist_update_runtime_stats(struct thread_runtime *r, 2243 u64 t, u64 tprev) 2244 { 2245 r->dt_delay = 0; 2246 r->dt_sleep = 0; 2247 r->dt_iowait = 0; 2248 r->dt_preempt = 0; 2249 r->dt_run = 0; 2250 r->dt_pre_mig = 0; 2251 2252 if (tprev) { 2253 r->dt_run = t - tprev; 2254 if (r->ready_to_run) { 2255 if (r->ready_to_run > tprev) 2256 pr_debug("time travel: wakeup time for task > previous sched_switch event\n"); 2257 else 2258 r->dt_delay = tprev - r->ready_to_run; 2259 2260 if ((r->migrated > r->ready_to_run) && (r->migrated < tprev)) 2261 r->dt_pre_mig = r->migrated - r->ready_to_run; 2262 } 2263 2264 if (r->last_time > tprev) 2265 pr_debug("time travel: last sched out time for task > previous sched_switch event\n"); 2266 else if (r->last_time) { 2267 u64 dt_wait = tprev - r->last_time; 2268 2269 if (r->last_state == 'R') 2270 r->dt_preempt = dt_wait; 2271 else if (r->last_state == 'D') 2272 r->dt_iowait = dt_wait; 2273 else 2274 r->dt_sleep = dt_wait; 2275 } 2276 } 2277 2278 update_stats(&r->run_stats, r->dt_run); 2279 2280 r->total_run_time += r->dt_run; 2281 r->total_delay_time += r->dt_delay; 2282 r->total_sleep_time += r->dt_sleep; 2283 r->total_iowait_time += r->dt_iowait; 2284 r->total_preempt_time += r->dt_preempt; 2285 r->total_pre_mig_time += r->dt_pre_mig; 2286 } 2287 2288 static bool is_idle_sample(struct perf_sample *sample, 2289 struct evsel *evsel) 2290 { 2291 /* pid 0 == swapper == idle task */ 2292 if (evsel__name_is(evsel, "sched:sched_switch")) 2293 return evsel__intval(evsel, sample, "prev_pid") == 0; 2294 2295 return sample->pid == 0; 2296 } 2297 2298 static void save_task_callchain(struct perf_sched *sched, 2299 struct perf_sample *sample, 2300 struct evsel *evsel, 2301 struct machine *machine) 2302 { 2303 struct callchain_cursor *cursor; 2304 struct thread *thread; 2305 2306 /* want main thread for process - has maps */ 2307 thread = machine__findnew_thread(machine, sample->pid, sample->pid); 2308 if (thread == NULL) { 2309 pr_debug("Failed to get thread for pid %d.\n", sample->pid); 2310 return; 2311 } 2312 2313 if (!sched->show_callchain || sample->callchain == NULL) 2314 return; 2315 2316 cursor = get_tls_callchain_cursor(); 2317 2318 if (thread__resolve_callchain(thread, cursor, evsel, sample, 2319 NULL, NULL, sched->max_stack + 2) != 0) { 2320 if (verbose > 0) 2321 pr_err("Failed to resolve callchain. Skipping\n"); 2322 2323 return; 2324 } 2325 2326 callchain_cursor_commit(cursor); 2327 2328 while (true) { 2329 struct callchain_cursor_node *node; 2330 struct symbol *sym; 2331 2332 node = callchain_cursor_current(cursor); 2333 if (node == NULL) 2334 break; 2335 2336 sym = node->ms.sym; 2337 if (sym) { 2338 if (!strcmp(sym->name, "schedule") || 2339 !strcmp(sym->name, "__schedule") || 2340 !strcmp(sym->name, "preempt_schedule")) 2341 sym->ignore = 1; 2342 } 2343 2344 callchain_cursor_advance(cursor); 2345 } 2346 } 2347 2348 static int init_idle_thread(struct thread *thread) 2349 { 2350 struct idle_thread_runtime *itr; 2351 2352 thread__set_comm(thread, idle_comm, 0); 2353 2354 itr = zalloc(sizeof(*itr)); 2355 if (itr == NULL) 2356 return -ENOMEM; 2357 2358 init_prio(&itr->tr); 2359 init_stats(&itr->tr.run_stats); 2360 callchain_init(&itr->callchain); 2361 callchain_cursor_reset(&itr->cursor); 2362 thread__set_priv(thread, itr); 2363 2364 return 0; 2365 } 2366 2367 /* 2368 * Track idle stats per cpu by maintaining a local thread 2369 * struct for the idle task on each cpu. 2370 */ 2371 static int init_idle_threads(int ncpu) 2372 { 2373 int i, ret; 2374 2375 idle_threads = zalloc(ncpu * sizeof(struct thread *)); 2376 if (!idle_threads) 2377 return -ENOMEM; 2378 2379 idle_max_cpu = ncpu; 2380 2381 /* allocate the actual thread struct if needed */ 2382 for (i = 0; i < ncpu; ++i) { 2383 idle_threads[i] = thread__new(0, 0); 2384 if (idle_threads[i] == NULL) 2385 return -ENOMEM; 2386 2387 ret = init_idle_thread(idle_threads[i]); 2388 if (ret < 0) 2389 return ret; 2390 } 2391 2392 return 0; 2393 } 2394 2395 static void free_idle_threads(void) 2396 { 2397 int i; 2398 2399 if (idle_threads == NULL) 2400 return; 2401 2402 for (i = 0; i < idle_max_cpu; ++i) { 2403 if ((idle_threads[i])) 2404 thread__delete(idle_threads[i]); 2405 } 2406 2407 free(idle_threads); 2408 } 2409 2410 static struct thread *get_idle_thread(int cpu) 2411 { 2412 /* 2413 * expand/allocate array of pointers to local thread 2414 * structs if needed 2415 */ 2416 if ((cpu >= idle_max_cpu) || (idle_threads == NULL)) { 2417 int i, j = __roundup_pow_of_two(cpu+1); 2418 void *p; 2419 2420 p = realloc(idle_threads, j * sizeof(struct thread *)); 2421 if (!p) 2422 return NULL; 2423 2424 idle_threads = (struct thread **) p; 2425 for (i = idle_max_cpu; i < j; ++i) 2426 idle_threads[i] = NULL; 2427 2428 idle_max_cpu = j; 2429 } 2430 2431 /* allocate a new thread struct if needed */ 2432 if (idle_threads[cpu] == NULL) { 2433 idle_threads[cpu] = thread__new(0, 0); 2434 2435 if (idle_threads[cpu]) { 2436 if (init_idle_thread(idle_threads[cpu]) < 0) 2437 return NULL; 2438 } 2439 } 2440 2441 return idle_threads[cpu]; 2442 } 2443 2444 static void save_idle_callchain(struct perf_sched *sched, 2445 struct idle_thread_runtime *itr, 2446 struct perf_sample *sample) 2447 { 2448 struct callchain_cursor *cursor; 2449 2450 if (!sched->show_callchain || sample->callchain == NULL) 2451 return; 2452 2453 cursor = get_tls_callchain_cursor(); 2454 if (cursor == NULL) 2455 return; 2456 2457 callchain_cursor__copy(&itr->cursor, cursor); 2458 } 2459 2460 static struct thread *timehist_get_thread(struct perf_sched *sched, 2461 struct perf_sample *sample, 2462 struct machine *machine, 2463 struct evsel *evsel) 2464 { 2465 struct thread *thread; 2466 2467 if (is_idle_sample(sample, evsel)) { 2468 thread = get_idle_thread(sample->cpu); 2469 if (thread == NULL) 2470 pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu); 2471 2472 } else { 2473 /* there were samples with tid 0 but non-zero pid */ 2474 thread = machine__findnew_thread(machine, sample->pid, 2475 sample->tid ?: sample->pid); 2476 if (thread == NULL) { 2477 pr_debug("Failed to get thread for tid %d. skipping sample.\n", 2478 sample->tid); 2479 } 2480 2481 save_task_callchain(sched, sample, evsel, machine); 2482 if (sched->idle_hist) { 2483 struct thread *idle; 2484 struct idle_thread_runtime *itr; 2485 2486 idle = get_idle_thread(sample->cpu); 2487 if (idle == NULL) { 2488 pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu); 2489 return NULL; 2490 } 2491 2492 itr = thread__priv(idle); 2493 if (itr == NULL) 2494 return NULL; 2495 2496 itr->last_thread = thread; 2497 2498 /* copy task callchain when entering to idle */ 2499 if (evsel__intval(evsel, sample, "next_pid") == 0) 2500 save_idle_callchain(sched, itr, sample); 2501 } 2502 } 2503 2504 return thread; 2505 } 2506 2507 static bool timehist_skip_sample(struct perf_sched *sched, 2508 struct thread *thread, 2509 struct evsel *evsel, 2510 struct perf_sample *sample) 2511 { 2512 bool rc = false; 2513 int prio = -1; 2514 struct thread_runtime *tr = NULL; 2515 2516 if (thread__is_filtered(thread)) { 2517 rc = true; 2518 sched->skipped_samples++; 2519 } 2520 2521 if (sched->prio_str) { 2522 /* 2523 * Because priority may be changed during task execution, 2524 * first read priority from prev sched_in event for current task. 2525 * If prev sched_in event is not saved, then read priority from 2526 * current task sched_out event. 2527 */ 2528 tr = thread__get_runtime(thread); 2529 if (tr && tr->prio != -1) 2530 prio = tr->prio; 2531 else if (evsel__name_is(evsel, "sched:sched_switch")) 2532 prio = evsel__intval(evsel, sample, "prev_prio"); 2533 2534 if (prio != -1 && !test_bit(prio, sched->prio_bitmap)) { 2535 rc = true; 2536 sched->skipped_samples++; 2537 } 2538 } 2539 2540 if (sched->idle_hist) { 2541 if (!evsel__name_is(evsel, "sched:sched_switch")) 2542 rc = true; 2543 else if (evsel__intval(evsel, sample, "prev_pid") != 0 && 2544 evsel__intval(evsel, sample, "next_pid") != 0) 2545 rc = true; 2546 } 2547 2548 return rc; 2549 } 2550 2551 static void timehist_print_wakeup_event(struct perf_sched *sched, 2552 struct evsel *evsel, 2553 struct perf_sample *sample, 2554 struct machine *machine, 2555 struct thread *awakened) 2556 { 2557 struct thread *thread; 2558 char tstr[64]; 2559 2560 thread = machine__findnew_thread(machine, sample->pid, sample->tid); 2561 if (thread == NULL) 2562 return; 2563 2564 /* show wakeup unless both awakee and awaker are filtered */ 2565 if (timehist_skip_sample(sched, thread, evsel, sample) && 2566 timehist_skip_sample(sched, awakened, evsel, sample)) { 2567 return; 2568 } 2569 2570 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); 2571 printf("%15s [%04d] ", tstr, sample->cpu); 2572 if (sched->show_cpu_visual) 2573 printf(" %*s ", sched->max_cpu.cpu + 1, ""); 2574 2575 printf(" %-*s ", comm_width, timehist_get_commstr(thread)); 2576 2577 /* dt spacer */ 2578 printf(" %9s %9s %9s ", "", "", ""); 2579 2580 printf("awakened: %s", timehist_get_commstr(awakened)); 2581 2582 printf("\n"); 2583 } 2584 2585 static int timehist_sched_wakeup_ignore(const struct perf_tool *tool __maybe_unused, 2586 union perf_event *event __maybe_unused, 2587 struct evsel *evsel __maybe_unused, 2588 struct perf_sample *sample __maybe_unused, 2589 struct machine *machine __maybe_unused) 2590 { 2591 return 0; 2592 } 2593 2594 static int timehist_sched_wakeup_event(const struct perf_tool *tool, 2595 union perf_event *event __maybe_unused, 2596 struct evsel *evsel, 2597 struct perf_sample *sample, 2598 struct machine *machine) 2599 { 2600 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 2601 struct thread *thread; 2602 struct thread_runtime *tr = NULL; 2603 /* want pid of awakened task not pid in sample */ 2604 const u32 pid = evsel__intval(evsel, sample, "pid"); 2605 2606 thread = machine__findnew_thread(machine, 0, pid); 2607 if (thread == NULL) 2608 return -1; 2609 2610 tr = thread__get_runtime(thread); 2611 if (tr == NULL) 2612 return -1; 2613 2614 if (tr->ready_to_run == 0) 2615 tr->ready_to_run = sample->time; 2616 2617 /* show wakeups if requested */ 2618 if (sched->show_wakeups && 2619 !perf_time__skip_sample(&sched->ptime, sample->time)) 2620 timehist_print_wakeup_event(sched, evsel, sample, machine, thread); 2621 2622 return 0; 2623 } 2624 2625 static void timehist_print_migration_event(struct perf_sched *sched, 2626 struct evsel *evsel, 2627 struct perf_sample *sample, 2628 struct machine *machine, 2629 struct thread *migrated) 2630 { 2631 struct thread *thread; 2632 char tstr[64]; 2633 u32 max_cpus; 2634 u32 ocpu, dcpu; 2635 2636 if (sched->summary_only) 2637 return; 2638 2639 max_cpus = sched->max_cpu.cpu + 1; 2640 ocpu = evsel__intval(evsel, sample, "orig_cpu"); 2641 dcpu = evsel__intval(evsel, sample, "dest_cpu"); 2642 2643 thread = machine__findnew_thread(machine, sample->pid, sample->tid); 2644 if (thread == NULL) 2645 return; 2646 2647 if (timehist_skip_sample(sched, thread, evsel, sample) && 2648 timehist_skip_sample(sched, migrated, evsel, sample)) { 2649 return; 2650 } 2651 2652 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); 2653 printf("%15s [%04d] ", tstr, sample->cpu); 2654 2655 if (sched->show_cpu_visual) { 2656 u32 i; 2657 char c; 2658 2659 printf(" "); 2660 for (i = 0; i < max_cpus; ++i) { 2661 c = (i == sample->cpu) ? 'm' : ' '; 2662 printf("%c", c); 2663 } 2664 printf(" "); 2665 } 2666 2667 printf(" %-*s ", comm_width, timehist_get_commstr(thread)); 2668 2669 /* dt spacer */ 2670 printf(" %9s %9s %9s ", "", "", ""); 2671 2672 printf("migrated: %s", timehist_get_commstr(migrated)); 2673 printf(" cpu %d => %d", ocpu, dcpu); 2674 2675 printf("\n"); 2676 } 2677 2678 static int timehist_migrate_task_event(const struct perf_tool *tool, 2679 union perf_event *event __maybe_unused, 2680 struct evsel *evsel, 2681 struct perf_sample *sample, 2682 struct machine *machine) 2683 { 2684 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 2685 struct thread *thread; 2686 struct thread_runtime *tr = NULL; 2687 /* want pid of migrated task not pid in sample */ 2688 const u32 pid = evsel__intval(evsel, sample, "pid"); 2689 2690 thread = machine__findnew_thread(machine, 0, pid); 2691 if (thread == NULL) 2692 return -1; 2693 2694 tr = thread__get_runtime(thread); 2695 if (tr == NULL) 2696 return -1; 2697 2698 tr->migrations++; 2699 tr->migrated = sample->time; 2700 2701 /* show migrations if requested */ 2702 if (sched->show_migrations) { 2703 timehist_print_migration_event(sched, evsel, sample, 2704 machine, thread); 2705 } 2706 2707 return 0; 2708 } 2709 2710 static void timehist_update_task_prio(struct evsel *evsel, 2711 struct perf_sample *sample, 2712 struct machine *machine) 2713 { 2714 struct thread *thread; 2715 struct thread_runtime *tr = NULL; 2716 const u32 next_pid = evsel__intval(evsel, sample, "next_pid"); 2717 const u32 next_prio = evsel__intval(evsel, sample, "next_prio"); 2718 2719 if (next_pid == 0) 2720 thread = get_idle_thread(sample->cpu); 2721 else 2722 thread = machine__findnew_thread(machine, -1, next_pid); 2723 2724 if (thread == NULL) 2725 return; 2726 2727 tr = thread__get_runtime(thread); 2728 if (tr == NULL) 2729 return; 2730 2731 tr->prio = next_prio; 2732 } 2733 2734 static int timehist_sched_change_event(const struct perf_tool *tool, 2735 union perf_event *event, 2736 struct evsel *evsel, 2737 struct perf_sample *sample, 2738 struct machine *machine) 2739 { 2740 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 2741 struct perf_time_interval *ptime = &sched->ptime; 2742 struct addr_location al; 2743 struct thread *thread; 2744 struct thread_runtime *tr = NULL; 2745 u64 tprev, t = sample->time; 2746 int rc = 0; 2747 const char state = evsel__taskstate(evsel, sample, "prev_state"); 2748 2749 addr_location__init(&al); 2750 if (machine__resolve(machine, &al, sample) < 0) { 2751 pr_err("problem processing %d event. skipping it\n", 2752 event->header.type); 2753 rc = -1; 2754 goto out; 2755 } 2756 2757 if (sched->show_prio || sched->prio_str) 2758 timehist_update_task_prio(evsel, sample, machine); 2759 2760 thread = timehist_get_thread(sched, sample, machine, evsel); 2761 if (thread == NULL) { 2762 rc = -1; 2763 goto out; 2764 } 2765 2766 if (timehist_skip_sample(sched, thread, evsel, sample)) 2767 goto out; 2768 2769 tr = thread__get_runtime(thread); 2770 if (tr == NULL) { 2771 rc = -1; 2772 goto out; 2773 } 2774 2775 tprev = evsel__get_time(evsel, sample->cpu); 2776 2777 /* 2778 * If start time given: 2779 * - sample time is under window user cares about - skip sample 2780 * - tprev is under window user cares about - reset to start of window 2781 */ 2782 if (ptime->start && ptime->start > t) 2783 goto out; 2784 2785 if (tprev && ptime->start > tprev) 2786 tprev = ptime->start; 2787 2788 /* 2789 * If end time given: 2790 * - previous sched event is out of window - we are done 2791 * - sample time is beyond window user cares about - reset it 2792 * to close out stats for time window interest 2793 * - If tprev is 0, that is, sched_in event for current task is 2794 * not recorded, cannot determine whether sched_in event is 2795 * within time window interest - ignore it 2796 */ 2797 if (ptime->end) { 2798 if (!tprev || tprev > ptime->end) 2799 goto out; 2800 2801 if (t > ptime->end) 2802 t = ptime->end; 2803 } 2804 2805 if (!sched->idle_hist || thread__tid(thread) == 0) { 2806 if (!cpu_list || test_bit(sample->cpu, cpu_bitmap)) 2807 timehist_update_runtime_stats(tr, t, tprev); 2808 2809 if (sched->idle_hist) { 2810 struct idle_thread_runtime *itr = (void *)tr; 2811 struct thread_runtime *last_tr; 2812 2813 if (itr->last_thread == NULL) 2814 goto out; 2815 2816 /* add current idle time as last thread's runtime */ 2817 last_tr = thread__get_runtime(itr->last_thread); 2818 if (last_tr == NULL) 2819 goto out; 2820 2821 timehist_update_runtime_stats(last_tr, t, tprev); 2822 /* 2823 * remove delta time of last thread as it's not updated 2824 * and otherwise it will show an invalid value next 2825 * time. we only care total run time and run stat. 2826 */ 2827 last_tr->dt_run = 0; 2828 last_tr->dt_delay = 0; 2829 last_tr->dt_sleep = 0; 2830 last_tr->dt_iowait = 0; 2831 last_tr->dt_preempt = 0; 2832 2833 if (itr->cursor.nr) 2834 callchain_append(&itr->callchain, &itr->cursor, t - tprev); 2835 2836 itr->last_thread = NULL; 2837 } 2838 2839 if (!sched->summary_only) 2840 timehist_print_sample(sched, evsel, sample, &al, thread, t, state); 2841 } 2842 2843 out: 2844 if (sched->hist_time.start == 0 && t >= ptime->start) 2845 sched->hist_time.start = t; 2846 if (ptime->end == 0 || t <= ptime->end) 2847 sched->hist_time.end = t; 2848 2849 if (tr) { 2850 /* time of this sched_switch event becomes last time task seen */ 2851 tr->last_time = sample->time; 2852 2853 /* last state is used to determine where to account wait time */ 2854 tr->last_state = state; 2855 2856 /* sched out event for task so reset ready to run time and migrated time */ 2857 if (state == 'R') 2858 tr->ready_to_run = t; 2859 else 2860 tr->ready_to_run = 0; 2861 2862 tr->migrated = 0; 2863 } 2864 2865 evsel__save_time(evsel, sample->time, sample->cpu); 2866 2867 addr_location__exit(&al); 2868 return rc; 2869 } 2870 2871 static int timehist_sched_switch_event(const struct perf_tool *tool, 2872 union perf_event *event, 2873 struct evsel *evsel, 2874 struct perf_sample *sample, 2875 struct machine *machine __maybe_unused) 2876 { 2877 return timehist_sched_change_event(tool, event, evsel, sample, machine); 2878 } 2879 2880 static int process_lost(const struct perf_tool *tool __maybe_unused, 2881 union perf_event *event, 2882 struct perf_sample *sample, 2883 struct machine *machine __maybe_unused) 2884 { 2885 char tstr[64]; 2886 2887 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); 2888 printf("%15s ", tstr); 2889 printf("lost %" PRI_lu64 " events on cpu %d\n", event->lost.lost, sample->cpu); 2890 2891 return 0; 2892 } 2893 2894 2895 static void print_thread_runtime(struct thread *t, 2896 struct thread_runtime *r) 2897 { 2898 double mean = avg_stats(&r->run_stats); 2899 float stddev; 2900 2901 printf("%*s %5d %9" PRIu64 " ", 2902 comm_width, timehist_get_commstr(t), thread__ppid(t), 2903 (u64) r->run_stats.n); 2904 2905 print_sched_time(r->total_run_time, 8); 2906 stddev = rel_stddev_stats(stddev_stats(&r->run_stats), mean); 2907 print_sched_time(r->run_stats.min, 6); 2908 printf(" "); 2909 print_sched_time((u64) mean, 6); 2910 printf(" "); 2911 print_sched_time(r->run_stats.max, 6); 2912 printf(" "); 2913 printf("%5.2f", stddev); 2914 printf(" %5" PRIu64, r->migrations); 2915 printf("\n"); 2916 } 2917 2918 static void print_thread_waittime(struct thread *t, 2919 struct thread_runtime *r) 2920 { 2921 printf("%*s %5d %9" PRIu64 " ", 2922 comm_width, timehist_get_commstr(t), thread__ppid(t), 2923 (u64) r->run_stats.n); 2924 2925 print_sched_time(r->total_run_time, 8); 2926 print_sched_time(r->total_sleep_time, 6); 2927 printf(" "); 2928 print_sched_time(r->total_iowait_time, 6); 2929 printf(" "); 2930 print_sched_time(r->total_preempt_time, 6); 2931 printf(" "); 2932 print_sched_time(r->total_delay_time, 6); 2933 printf("\n"); 2934 } 2935 2936 struct total_run_stats { 2937 struct perf_sched *sched; 2938 u64 sched_count; 2939 u64 task_count; 2940 u64 total_run_time; 2941 }; 2942 2943 static int show_thread_runtime(struct thread *t, void *priv) 2944 { 2945 struct total_run_stats *stats = priv; 2946 struct thread_runtime *r; 2947 2948 if (thread__is_filtered(t)) 2949 return 0; 2950 2951 r = thread__priv(t); 2952 if (r && r->run_stats.n) { 2953 stats->task_count++; 2954 stats->sched_count += r->run_stats.n; 2955 stats->total_run_time += r->total_run_time; 2956 2957 if (stats->sched->show_state) 2958 print_thread_waittime(t, r); 2959 else 2960 print_thread_runtime(t, r); 2961 } 2962 2963 return 0; 2964 } 2965 2966 static size_t callchain__fprintf_folded(FILE *fp, struct callchain_node *node) 2967 { 2968 const char *sep = " <- "; 2969 struct callchain_list *chain; 2970 size_t ret = 0; 2971 char bf[1024]; 2972 bool first; 2973 2974 if (node == NULL) 2975 return 0; 2976 2977 ret = callchain__fprintf_folded(fp, node->parent); 2978 first = (ret == 0); 2979 2980 list_for_each_entry(chain, &node->val, list) { 2981 if (chain->ip >= PERF_CONTEXT_MAX) 2982 continue; 2983 if (chain->ms.sym && chain->ms.sym->ignore) 2984 continue; 2985 ret += fprintf(fp, "%s%s", first ? "" : sep, 2986 callchain_list__sym_name(chain, bf, sizeof(bf), 2987 false)); 2988 first = false; 2989 } 2990 2991 return ret; 2992 } 2993 2994 static size_t timehist_print_idlehist_callchain(struct rb_root_cached *root) 2995 { 2996 size_t ret = 0; 2997 FILE *fp = stdout; 2998 struct callchain_node *chain; 2999 struct rb_node *rb_node = rb_first_cached(root); 3000 3001 printf(" %16s %8s %s\n", "Idle time (msec)", "Count", "Callchains"); 3002 printf(" %.16s %.8s %.50s\n", graph_dotted_line, graph_dotted_line, 3003 graph_dotted_line); 3004 3005 while (rb_node) { 3006 chain = rb_entry(rb_node, struct callchain_node, rb_node); 3007 rb_node = rb_next(rb_node); 3008 3009 ret += fprintf(fp, " "); 3010 print_sched_time(chain->hit, 12); 3011 ret += 16; /* print_sched_time returns 2nd arg + 4 */ 3012 ret += fprintf(fp, " %8d ", chain->count); 3013 ret += callchain__fprintf_folded(fp, chain); 3014 ret += fprintf(fp, "\n"); 3015 } 3016 3017 return ret; 3018 } 3019 3020 static void timehist_print_summary(struct perf_sched *sched, 3021 struct perf_session *session) 3022 { 3023 struct machine *m = &session->machines.host; 3024 struct total_run_stats totals; 3025 u64 task_count; 3026 struct thread *t; 3027 struct thread_runtime *r; 3028 int i; 3029 u64 hist_time = sched->hist_time.end - sched->hist_time.start; 3030 3031 memset(&totals, 0, sizeof(totals)); 3032 totals.sched = sched; 3033 3034 if (sched->idle_hist) { 3035 printf("\nIdle-time summary\n"); 3036 printf("%*s parent sched-out ", comm_width, "comm"); 3037 printf(" idle-time min-idle avg-idle max-idle stddev migrations\n"); 3038 } else if (sched->show_state) { 3039 printf("\nWait-time summary\n"); 3040 printf("%*s parent sched-in ", comm_width, "comm"); 3041 printf(" run-time sleep iowait preempt delay\n"); 3042 } else { 3043 printf("\nRuntime summary\n"); 3044 printf("%*s parent sched-in ", comm_width, "comm"); 3045 printf(" run-time min-run avg-run max-run stddev migrations\n"); 3046 } 3047 printf("%*s (count) ", comm_width, ""); 3048 printf(" (msec) (msec) (msec) (msec) %s\n", 3049 sched->show_state ? "(msec)" : "%"); 3050 printf("%.117s\n", graph_dotted_line); 3051 3052 machine__for_each_thread(m, show_thread_runtime, &totals); 3053 task_count = totals.task_count; 3054 if (!task_count) 3055 printf("<no still running tasks>\n"); 3056 3057 /* CPU idle stats not tracked when samples were skipped */ 3058 if (sched->skipped_samples && !sched->idle_hist) 3059 return; 3060 3061 printf("\nIdle stats:\n"); 3062 for (i = 0; i < idle_max_cpu; ++i) { 3063 if (cpu_list && !test_bit(i, cpu_bitmap)) 3064 continue; 3065 3066 t = idle_threads[i]; 3067 if (!t) 3068 continue; 3069 3070 r = thread__priv(t); 3071 if (r && r->run_stats.n) { 3072 totals.sched_count += r->run_stats.n; 3073 printf(" CPU %2d idle for ", i); 3074 print_sched_time(r->total_run_time, 6); 3075 printf(" msec (%6.2f%%)\n", 100.0 * r->total_run_time / hist_time); 3076 } else 3077 printf(" CPU %2d idle entire time window\n", i); 3078 } 3079 3080 if (sched->idle_hist && sched->show_callchain) { 3081 callchain_param.mode = CHAIN_FOLDED; 3082 callchain_param.value = CCVAL_PERIOD; 3083 3084 callchain_register_param(&callchain_param); 3085 3086 printf("\nIdle stats by callchain:\n"); 3087 for (i = 0; i < idle_max_cpu; ++i) { 3088 struct idle_thread_runtime *itr; 3089 3090 t = idle_threads[i]; 3091 if (!t) 3092 continue; 3093 3094 itr = thread__priv(t); 3095 if (itr == NULL) 3096 continue; 3097 3098 callchain_param.sort(&itr->sorted_root.rb_root, &itr->callchain, 3099 0, &callchain_param); 3100 3101 printf(" CPU %2d:", i); 3102 print_sched_time(itr->tr.total_run_time, 6); 3103 printf(" msec\n"); 3104 timehist_print_idlehist_callchain(&itr->sorted_root); 3105 printf("\n"); 3106 } 3107 } 3108 3109 printf("\n" 3110 " Total number of unique tasks: %" PRIu64 "\n" 3111 "Total number of context switches: %" PRIu64 "\n", 3112 totals.task_count, totals.sched_count); 3113 3114 printf(" Total run time (msec): "); 3115 print_sched_time(totals.total_run_time, 2); 3116 printf("\n"); 3117 3118 printf(" Total scheduling time (msec): "); 3119 print_sched_time(hist_time, 2); 3120 printf(" (x %d)\n", sched->max_cpu.cpu); 3121 } 3122 3123 typedef int (*sched_handler)(const struct perf_tool *tool, 3124 union perf_event *event, 3125 struct evsel *evsel, 3126 struct perf_sample *sample, 3127 struct machine *machine); 3128 3129 static int perf_timehist__process_sample(const struct perf_tool *tool, 3130 union perf_event *event, 3131 struct perf_sample *sample, 3132 struct evsel *evsel, 3133 struct machine *machine) 3134 { 3135 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 3136 int err = 0; 3137 struct perf_cpu this_cpu = { 3138 .cpu = sample->cpu, 3139 }; 3140 3141 if (this_cpu.cpu > sched->max_cpu.cpu) 3142 sched->max_cpu = this_cpu; 3143 3144 if (evsel->handler != NULL) { 3145 sched_handler f = evsel->handler; 3146 3147 err = f(tool, event, evsel, sample, machine); 3148 } 3149 3150 return err; 3151 } 3152 3153 static int timehist_check_attr(struct perf_sched *sched, 3154 struct evlist *evlist) 3155 { 3156 struct evsel *evsel; 3157 struct evsel_runtime *er; 3158 3159 list_for_each_entry(evsel, &evlist->core.entries, core.node) { 3160 er = evsel__get_runtime(evsel); 3161 if (er == NULL) { 3162 pr_err("Failed to allocate memory for evsel runtime data\n"); 3163 return -1; 3164 } 3165 3166 /* only need to save callchain related to sched_switch event */ 3167 if (sched->show_callchain && 3168 evsel__name_is(evsel, "sched:sched_switch") && 3169 !evsel__has_callchain(evsel)) { 3170 pr_info("Samples of sched_switch event do not have callchains.\n"); 3171 sched->show_callchain = 0; 3172 symbol_conf.use_callchain = 0; 3173 } 3174 } 3175 3176 return 0; 3177 } 3178 3179 static int timehist_parse_prio_str(struct perf_sched *sched) 3180 { 3181 char *p; 3182 unsigned long start_prio, end_prio; 3183 const char *str = sched->prio_str; 3184 3185 if (!str) 3186 return 0; 3187 3188 while (isdigit(*str)) { 3189 p = NULL; 3190 start_prio = strtoul(str, &p, 0); 3191 if (start_prio >= MAX_PRIO || (*p != '\0' && *p != ',' && *p != '-')) 3192 return -1; 3193 3194 if (*p == '-') { 3195 str = ++p; 3196 p = NULL; 3197 end_prio = strtoul(str, &p, 0); 3198 3199 if (end_prio >= MAX_PRIO || (*p != '\0' && *p != ',')) 3200 return -1; 3201 3202 if (end_prio < start_prio) 3203 return -1; 3204 } else { 3205 end_prio = start_prio; 3206 } 3207 3208 for (; start_prio <= end_prio; start_prio++) 3209 __set_bit(start_prio, sched->prio_bitmap); 3210 3211 if (*p) 3212 ++p; 3213 3214 str = p; 3215 } 3216 3217 return 0; 3218 } 3219 3220 static int perf_sched__timehist(struct perf_sched *sched) 3221 { 3222 struct evsel_str_handler handlers[] = { 3223 { "sched:sched_switch", timehist_sched_switch_event, }, 3224 { "sched:sched_wakeup", timehist_sched_wakeup_event, }, 3225 { "sched:sched_waking", timehist_sched_wakeup_event, }, 3226 { "sched:sched_wakeup_new", timehist_sched_wakeup_event, }, 3227 }; 3228 const struct evsel_str_handler migrate_handlers[] = { 3229 { "sched:sched_migrate_task", timehist_migrate_task_event, }, 3230 }; 3231 struct perf_data data = { 3232 .path = input_name, 3233 .mode = PERF_DATA_MODE_READ, 3234 .force = sched->force, 3235 }; 3236 3237 struct perf_session *session; 3238 struct evlist *evlist; 3239 int err = -1; 3240 3241 /* 3242 * event handlers for timehist option 3243 */ 3244 sched->tool.sample = perf_timehist__process_sample; 3245 sched->tool.mmap = perf_event__process_mmap; 3246 sched->tool.comm = perf_event__process_comm; 3247 sched->tool.exit = perf_event__process_exit; 3248 sched->tool.fork = perf_event__process_fork; 3249 sched->tool.lost = process_lost; 3250 sched->tool.attr = perf_event__process_attr; 3251 sched->tool.tracing_data = perf_event__process_tracing_data; 3252 sched->tool.build_id = perf_event__process_build_id; 3253 3254 sched->tool.ordering_requires_timestamps = true; 3255 3256 symbol_conf.use_callchain = sched->show_callchain; 3257 3258 session = perf_session__new(&data, &sched->tool); 3259 if (IS_ERR(session)) 3260 return PTR_ERR(session); 3261 3262 if (cpu_list) { 3263 err = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap); 3264 if (err < 0) 3265 goto out; 3266 } 3267 3268 evlist = session->evlist; 3269 3270 symbol__init(&session->header.env); 3271 3272 if (perf_time__parse_str(&sched->ptime, sched->time_str) != 0) { 3273 pr_err("Invalid time string\n"); 3274 err = -EINVAL; 3275 goto out; 3276 } 3277 3278 if (timehist_check_attr(sched, evlist) != 0) 3279 goto out; 3280 3281 if (timehist_parse_prio_str(sched) != 0) { 3282 pr_err("Invalid prio string\n"); 3283 goto out; 3284 } 3285 3286 setup_pager(); 3287 3288 /* prefer sched_waking if it is captured */ 3289 if (evlist__find_tracepoint_by_name(session->evlist, "sched:sched_waking")) 3290 handlers[1].handler = timehist_sched_wakeup_ignore; 3291 3292 /* setup per-evsel handlers */ 3293 if (perf_session__set_tracepoints_handlers(session, handlers)) 3294 goto out; 3295 3296 /* sched_switch event at a minimum needs to exist */ 3297 if (!evlist__find_tracepoint_by_name(session->evlist, "sched:sched_switch")) { 3298 pr_err("No sched_switch events found. Have you run 'perf sched record'?\n"); 3299 goto out; 3300 } 3301 3302 if ((sched->show_migrations || sched->pre_migrations) && 3303 perf_session__set_tracepoints_handlers(session, migrate_handlers)) 3304 goto out; 3305 3306 /* pre-allocate struct for per-CPU idle stats */ 3307 sched->max_cpu.cpu = session->header.env.nr_cpus_online; 3308 if (sched->max_cpu.cpu == 0) 3309 sched->max_cpu.cpu = 4; 3310 if (init_idle_threads(sched->max_cpu.cpu)) 3311 goto out; 3312 3313 /* summary_only implies summary option, but don't overwrite summary if set */ 3314 if (sched->summary_only) 3315 sched->summary = sched->summary_only; 3316 3317 if (!sched->summary_only) 3318 timehist_header(sched); 3319 3320 err = perf_session__process_events(session); 3321 if (err) { 3322 pr_err("Failed to process events, error %d", err); 3323 goto out; 3324 } 3325 3326 sched->nr_events = evlist->stats.nr_events[0]; 3327 sched->nr_lost_events = evlist->stats.total_lost; 3328 sched->nr_lost_chunks = evlist->stats.nr_events[PERF_RECORD_LOST]; 3329 3330 if (sched->summary) 3331 timehist_print_summary(sched, session); 3332 3333 out: 3334 free_idle_threads(); 3335 perf_session__delete(session); 3336 3337 return err; 3338 } 3339 3340 3341 static void print_bad_events(struct perf_sched *sched) 3342 { 3343 if (sched->nr_unordered_timestamps && sched->nr_timestamps) { 3344 printf(" INFO: %.3f%% unordered timestamps (%ld out of %ld)\n", 3345 (double)sched->nr_unordered_timestamps/(double)sched->nr_timestamps*100.0, 3346 sched->nr_unordered_timestamps, sched->nr_timestamps); 3347 } 3348 if (sched->nr_lost_events && sched->nr_events) { 3349 printf(" INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n", 3350 (double)sched->nr_lost_events/(double)sched->nr_events * 100.0, 3351 sched->nr_lost_events, sched->nr_events, sched->nr_lost_chunks); 3352 } 3353 if (sched->nr_context_switch_bugs && sched->nr_timestamps) { 3354 printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)", 3355 (double)sched->nr_context_switch_bugs/(double)sched->nr_timestamps*100.0, 3356 sched->nr_context_switch_bugs, sched->nr_timestamps); 3357 if (sched->nr_lost_events) 3358 printf(" (due to lost events?)"); 3359 printf("\n"); 3360 } 3361 } 3362 3363 static void __merge_work_atoms(struct rb_root_cached *root, struct work_atoms *data) 3364 { 3365 struct rb_node **new = &(root->rb_root.rb_node), *parent = NULL; 3366 struct work_atoms *this; 3367 const char *comm = thread__comm_str(data->thread), *this_comm; 3368 bool leftmost = true; 3369 3370 while (*new) { 3371 int cmp; 3372 3373 this = container_of(*new, struct work_atoms, node); 3374 parent = *new; 3375 3376 this_comm = thread__comm_str(this->thread); 3377 cmp = strcmp(comm, this_comm); 3378 if (cmp > 0) { 3379 new = &((*new)->rb_left); 3380 } else if (cmp < 0) { 3381 new = &((*new)->rb_right); 3382 leftmost = false; 3383 } else { 3384 this->num_merged++; 3385 this->total_runtime += data->total_runtime; 3386 this->nb_atoms += data->nb_atoms; 3387 this->total_lat += data->total_lat; 3388 list_splice(&data->work_list, &this->work_list); 3389 if (this->max_lat < data->max_lat) { 3390 this->max_lat = data->max_lat; 3391 this->max_lat_start = data->max_lat_start; 3392 this->max_lat_end = data->max_lat_end; 3393 } 3394 zfree(&data); 3395 return; 3396 } 3397 } 3398 3399 data->num_merged++; 3400 rb_link_node(&data->node, parent, new); 3401 rb_insert_color_cached(&data->node, root, leftmost); 3402 } 3403 3404 static void perf_sched__merge_lat(struct perf_sched *sched) 3405 { 3406 struct work_atoms *data; 3407 struct rb_node *node; 3408 3409 if (sched->skip_merge) 3410 return; 3411 3412 while ((node = rb_first_cached(&sched->atom_root))) { 3413 rb_erase_cached(node, &sched->atom_root); 3414 data = rb_entry(node, struct work_atoms, node); 3415 __merge_work_atoms(&sched->merged_atom_root, data); 3416 } 3417 } 3418 3419 static int setup_cpus_switch_event(struct perf_sched *sched) 3420 { 3421 unsigned int i; 3422 3423 sched->cpu_last_switched = calloc(MAX_CPUS, sizeof(*(sched->cpu_last_switched))); 3424 if (!sched->cpu_last_switched) 3425 return -1; 3426 3427 sched->curr_pid = malloc(MAX_CPUS * sizeof(*(sched->curr_pid))); 3428 if (!sched->curr_pid) { 3429 zfree(&sched->cpu_last_switched); 3430 return -1; 3431 } 3432 3433 for (i = 0; i < MAX_CPUS; i++) 3434 sched->curr_pid[i] = -1; 3435 3436 return 0; 3437 } 3438 3439 static void free_cpus_switch_event(struct perf_sched *sched) 3440 { 3441 zfree(&sched->curr_pid); 3442 zfree(&sched->cpu_last_switched); 3443 } 3444 3445 static int perf_sched__lat(struct perf_sched *sched) 3446 { 3447 int rc = -1; 3448 struct rb_node *next; 3449 3450 setup_pager(); 3451 3452 if (setup_cpus_switch_event(sched)) 3453 return rc; 3454 3455 if (perf_sched__read_events(sched)) 3456 goto out_free_cpus_switch_event; 3457 3458 perf_sched__merge_lat(sched); 3459 perf_sched__sort_lat(sched); 3460 3461 printf("\n -------------------------------------------------------------------------------------------------------------------------------------------\n"); 3462 printf(" Task | Runtime ms | Count | Avg delay ms | Max delay ms | Max delay start | Max delay end |\n"); 3463 printf(" -------------------------------------------------------------------------------------------------------------------------------------------\n"); 3464 3465 next = rb_first_cached(&sched->sorted_atom_root); 3466 3467 while (next) { 3468 struct work_atoms *work_list; 3469 3470 work_list = rb_entry(next, struct work_atoms, node); 3471 output_lat_thread(sched, work_list); 3472 next = rb_next(next); 3473 thread__zput(work_list->thread); 3474 } 3475 3476 printf(" -----------------------------------------------------------------------------------------------------------------\n"); 3477 printf(" TOTAL: |%11.3f ms |%9" PRIu64 " |\n", 3478 (double)sched->all_runtime / NSEC_PER_MSEC, sched->all_count); 3479 3480 printf(" ---------------------------------------------------\n"); 3481 3482 print_bad_events(sched); 3483 printf("\n"); 3484 3485 rc = 0; 3486 3487 out_free_cpus_switch_event: 3488 free_cpus_switch_event(sched); 3489 return rc; 3490 } 3491 3492 static int setup_map_cpus(struct perf_sched *sched) 3493 { 3494 sched->max_cpu.cpu = sysconf(_SC_NPROCESSORS_CONF); 3495 3496 if (sched->map.comp) { 3497 sched->map.comp_cpus = zalloc(sched->max_cpu.cpu * sizeof(int)); 3498 if (!sched->map.comp_cpus) 3499 return -1; 3500 } 3501 3502 if (sched->map.cpus_str) { 3503 sched->map.cpus = perf_cpu_map__new(sched->map.cpus_str); 3504 if (!sched->map.cpus) { 3505 pr_err("failed to get cpus map from %s\n", sched->map.cpus_str); 3506 zfree(&sched->map.comp_cpus); 3507 return -1; 3508 } 3509 } 3510 3511 return 0; 3512 } 3513 3514 static int setup_color_pids(struct perf_sched *sched) 3515 { 3516 struct perf_thread_map *map; 3517 3518 if (!sched->map.color_pids_str) 3519 return 0; 3520 3521 map = thread_map__new_by_tid_str(sched->map.color_pids_str); 3522 if (!map) { 3523 pr_err("failed to get thread map from %s\n", sched->map.color_pids_str); 3524 return -1; 3525 } 3526 3527 sched->map.color_pids = map; 3528 return 0; 3529 } 3530 3531 static int setup_color_cpus(struct perf_sched *sched) 3532 { 3533 struct perf_cpu_map *map; 3534 3535 if (!sched->map.color_cpus_str) 3536 return 0; 3537 3538 map = perf_cpu_map__new(sched->map.color_cpus_str); 3539 if (!map) { 3540 pr_err("failed to get thread map from %s\n", sched->map.color_cpus_str); 3541 return -1; 3542 } 3543 3544 sched->map.color_cpus = map; 3545 return 0; 3546 } 3547 3548 static int perf_sched__map(struct perf_sched *sched) 3549 { 3550 int rc = -1; 3551 3552 sched->curr_thread = calloc(MAX_CPUS, sizeof(*(sched->curr_thread))); 3553 if (!sched->curr_thread) 3554 return rc; 3555 3556 sched->curr_out_thread = calloc(MAX_CPUS, sizeof(*(sched->curr_out_thread))); 3557 if (!sched->curr_out_thread) 3558 return rc; 3559 3560 if (setup_cpus_switch_event(sched)) 3561 goto out_free_curr_thread; 3562 3563 if (setup_map_cpus(sched)) 3564 goto out_free_cpus_switch_event; 3565 3566 if (setup_color_pids(sched)) 3567 goto out_put_map_cpus; 3568 3569 if (setup_color_cpus(sched)) 3570 goto out_put_color_pids; 3571 3572 setup_pager(); 3573 if (perf_sched__read_events(sched)) 3574 goto out_put_color_cpus; 3575 3576 rc = 0; 3577 print_bad_events(sched); 3578 3579 out_put_color_cpus: 3580 perf_cpu_map__put(sched->map.color_cpus); 3581 3582 out_put_color_pids: 3583 perf_thread_map__put(sched->map.color_pids); 3584 3585 out_put_map_cpus: 3586 zfree(&sched->map.comp_cpus); 3587 perf_cpu_map__put(sched->map.cpus); 3588 3589 out_free_cpus_switch_event: 3590 free_cpus_switch_event(sched); 3591 3592 out_free_curr_thread: 3593 zfree(&sched->curr_thread); 3594 return rc; 3595 } 3596 3597 static int perf_sched__replay(struct perf_sched *sched) 3598 { 3599 int ret; 3600 unsigned long i; 3601 3602 mutex_init(&sched->start_work_mutex); 3603 mutex_init(&sched->work_done_wait_mutex); 3604 3605 ret = setup_cpus_switch_event(sched); 3606 if (ret) 3607 goto out_mutex_destroy; 3608 3609 calibrate_run_measurement_overhead(sched); 3610 calibrate_sleep_measurement_overhead(sched); 3611 3612 test_calibrations(sched); 3613 3614 ret = perf_sched__read_events(sched); 3615 if (ret) 3616 goto out_free_cpus_switch_event; 3617 3618 printf("nr_run_events: %ld\n", sched->nr_run_events); 3619 printf("nr_sleep_events: %ld\n", sched->nr_sleep_events); 3620 printf("nr_wakeup_events: %ld\n", sched->nr_wakeup_events); 3621 3622 if (sched->targetless_wakeups) 3623 printf("target-less wakeups: %ld\n", sched->targetless_wakeups); 3624 if (sched->multitarget_wakeups) 3625 printf("multi-target wakeups: %ld\n", sched->multitarget_wakeups); 3626 if (sched->nr_run_events_optimized) 3627 printf("run atoms optimized: %ld\n", 3628 sched->nr_run_events_optimized); 3629 3630 print_task_traces(sched); 3631 add_cross_task_wakeups(sched); 3632 3633 sched->thread_funcs_exit = false; 3634 create_tasks(sched); 3635 printf("------------------------------------------------------------\n"); 3636 if (sched->replay_repeat == 0) 3637 sched->replay_repeat = UINT_MAX; 3638 3639 for (i = 0; i < sched->replay_repeat; i++) 3640 run_one_test(sched); 3641 3642 sched->thread_funcs_exit = true; 3643 destroy_tasks(sched); 3644 3645 out_free_cpus_switch_event: 3646 free_cpus_switch_event(sched); 3647 3648 out_mutex_destroy: 3649 mutex_destroy(&sched->start_work_mutex); 3650 mutex_destroy(&sched->work_done_wait_mutex); 3651 return ret; 3652 } 3653 3654 static void setup_sorting(struct perf_sched *sched, const struct option *options, 3655 const char * const usage_msg[]) 3656 { 3657 char *tmp, *tok, *str = strdup(sched->sort_order); 3658 3659 for (tok = strtok_r(str, ", ", &tmp); 3660 tok; tok = strtok_r(NULL, ", ", &tmp)) { 3661 if (sort_dimension__add(tok, &sched->sort_list) < 0) { 3662 usage_with_options_msg(usage_msg, options, 3663 "Unknown --sort key: `%s'", tok); 3664 } 3665 } 3666 3667 free(str); 3668 3669 sort_dimension__add("pid", &sched->cmp_pid); 3670 } 3671 3672 static bool schedstat_events_exposed(void) 3673 { 3674 /* 3675 * Select "sched:sched_stat_wait" event to check 3676 * whether schedstat tracepoints are exposed. 3677 */ 3678 return IS_ERR(trace_event__tp_format("sched", "sched_stat_wait")) ? 3679 false : true; 3680 } 3681 3682 static int __cmd_record(int argc, const char **argv) 3683 { 3684 unsigned int rec_argc, i, j; 3685 char **rec_argv; 3686 const char **rec_argv_copy; 3687 const char * const record_args[] = { 3688 "record", 3689 "-a", 3690 "-R", 3691 "-m", "1024", 3692 "-c", "1", 3693 "-e", "sched:sched_switch", 3694 "-e", "sched:sched_stat_runtime", 3695 "-e", "sched:sched_process_fork", 3696 "-e", "sched:sched_wakeup_new", 3697 "-e", "sched:sched_migrate_task", 3698 }; 3699 3700 /* 3701 * The tracepoints trace_sched_stat_{wait, sleep, iowait} 3702 * are not exposed to user if CONFIG_SCHEDSTATS is not set, 3703 * to prevent "perf sched record" execution failure, determine 3704 * whether to record schedstat events according to actual situation. 3705 */ 3706 const char * const schedstat_args[] = { 3707 "-e", "sched:sched_stat_wait", 3708 "-e", "sched:sched_stat_sleep", 3709 "-e", "sched:sched_stat_iowait", 3710 }; 3711 unsigned int schedstat_argc = schedstat_events_exposed() ? 3712 ARRAY_SIZE(schedstat_args) : 0; 3713 3714 struct tep_event *waking_event; 3715 int ret; 3716 3717 /* 3718 * +2 for either "-e", "sched:sched_wakeup" or 3719 * "-e", "sched:sched_waking" 3720 */ 3721 rec_argc = ARRAY_SIZE(record_args) + 2 + schedstat_argc + argc - 1; 3722 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 3723 if (rec_argv == NULL) 3724 return -ENOMEM; 3725 rec_argv_copy = calloc(rec_argc + 1, sizeof(char *)); 3726 if (rec_argv_copy == NULL) { 3727 free(rec_argv); 3728 return -ENOMEM; 3729 } 3730 3731 for (i = 0; i < ARRAY_SIZE(record_args); i++) 3732 rec_argv[i] = strdup(record_args[i]); 3733 3734 rec_argv[i++] = strdup("-e"); 3735 waking_event = trace_event__tp_format("sched", "sched_waking"); 3736 if (!IS_ERR(waking_event)) 3737 rec_argv[i++] = strdup("sched:sched_waking"); 3738 else 3739 rec_argv[i++] = strdup("sched:sched_wakeup"); 3740 3741 for (j = 0; j < schedstat_argc; j++) 3742 rec_argv[i++] = strdup(schedstat_args[j]); 3743 3744 for (j = 1; j < (unsigned int)argc; j++, i++) 3745 rec_argv[i] = strdup(argv[j]); 3746 3747 BUG_ON(i != rec_argc); 3748 3749 memcpy(rec_argv_copy, rec_argv, sizeof(char *) * rec_argc); 3750 ret = cmd_record(rec_argc, rec_argv_copy); 3751 3752 for (i = 0; i < rec_argc; i++) 3753 free(rec_argv[i]); 3754 free(rec_argv); 3755 free(rec_argv_copy); 3756 3757 return ret; 3758 } 3759 3760 int cmd_sched(int argc, const char **argv) 3761 { 3762 static const char default_sort_order[] = "avg, max, switch, runtime"; 3763 struct perf_sched sched = { 3764 .cmp_pid = LIST_HEAD_INIT(sched.cmp_pid), 3765 .sort_list = LIST_HEAD_INIT(sched.sort_list), 3766 .sort_order = default_sort_order, 3767 .replay_repeat = 10, 3768 .profile_cpu = -1, 3769 .next_shortname1 = 'A', 3770 .next_shortname2 = '0', 3771 .skip_merge = 0, 3772 .show_callchain = 1, 3773 .max_stack = 5, 3774 }; 3775 const struct option sched_options[] = { 3776 OPT_STRING('i', "input", &input_name, "file", 3777 "input file name"), 3778 OPT_INCR('v', "verbose", &verbose, 3779 "be more verbose (show symbol address, etc)"), 3780 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 3781 "dump raw trace in ASCII"), 3782 OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"), 3783 OPT_END() 3784 }; 3785 const struct option latency_options[] = { 3786 OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]", 3787 "sort by key(s): runtime, switch, avg, max"), 3788 OPT_INTEGER('C', "CPU", &sched.profile_cpu, 3789 "CPU to profile on"), 3790 OPT_BOOLEAN('p', "pids", &sched.skip_merge, 3791 "latency stats per pid instead of per comm"), 3792 OPT_PARENT(sched_options) 3793 }; 3794 const struct option replay_options[] = { 3795 OPT_UINTEGER('r', "repeat", &sched.replay_repeat, 3796 "repeat the workload replay N times (0: infinite)"), 3797 OPT_PARENT(sched_options) 3798 }; 3799 const struct option map_options[] = { 3800 OPT_BOOLEAN(0, "compact", &sched.map.comp, 3801 "map output in compact mode"), 3802 OPT_STRING(0, "color-pids", &sched.map.color_pids_str, "pids", 3803 "highlight given pids in map"), 3804 OPT_STRING(0, "color-cpus", &sched.map.color_cpus_str, "cpus", 3805 "highlight given CPUs in map"), 3806 OPT_STRING(0, "cpus", &sched.map.cpus_str, "cpus", 3807 "display given CPUs in map"), 3808 OPT_STRING(0, "task-name", &sched.map.task_name, "task", 3809 "map output only for the given task name(s)."), 3810 OPT_BOOLEAN(0, "fuzzy-name", &sched.map.fuzzy, 3811 "given command name can be partially matched (fuzzy matching)"), 3812 OPT_PARENT(sched_options) 3813 }; 3814 const struct option timehist_options[] = { 3815 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 3816 "file", "vmlinux pathname"), 3817 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, 3818 "file", "kallsyms pathname"), 3819 OPT_BOOLEAN('g', "call-graph", &sched.show_callchain, 3820 "Display call chains if present (default on)"), 3821 OPT_UINTEGER(0, "max-stack", &sched.max_stack, 3822 "Maximum number of functions to display backtrace."), 3823 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", 3824 "Look for files with symbols relative to this directory"), 3825 OPT_BOOLEAN('s', "summary", &sched.summary_only, 3826 "Show only syscall summary with statistics"), 3827 OPT_BOOLEAN('S', "with-summary", &sched.summary, 3828 "Show all syscalls and summary with statistics"), 3829 OPT_BOOLEAN('w', "wakeups", &sched.show_wakeups, "Show wakeup events"), 3830 OPT_BOOLEAN('n', "next", &sched.show_next, "Show next task"), 3831 OPT_BOOLEAN('M', "migrations", &sched.show_migrations, "Show migration events"), 3832 OPT_BOOLEAN('V', "cpu-visual", &sched.show_cpu_visual, "Add CPU visual"), 3833 OPT_BOOLEAN('I', "idle-hist", &sched.idle_hist, "Show idle events only"), 3834 OPT_STRING(0, "time", &sched.time_str, "str", 3835 "Time span for analysis (start,stop)"), 3836 OPT_BOOLEAN(0, "state", &sched.show_state, "Show task state when sched-out"), 3837 OPT_STRING('p', "pid", &symbol_conf.pid_list_str, "pid[,pid...]", 3838 "analyze events only for given process id(s)"), 3839 OPT_STRING('t', "tid", &symbol_conf.tid_list_str, "tid[,tid...]", 3840 "analyze events only for given thread id(s)"), 3841 OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"), 3842 OPT_BOOLEAN(0, "show-prio", &sched.show_prio, "Show task priority"), 3843 OPT_STRING(0, "prio", &sched.prio_str, "prio", 3844 "analyze events only for given task priority(ies)"), 3845 OPT_BOOLEAN('P', "pre-migrations", &sched.pre_migrations, "Show pre-migration wait time"), 3846 OPT_PARENT(sched_options) 3847 }; 3848 3849 const char * const latency_usage[] = { 3850 "perf sched latency [<options>]", 3851 NULL 3852 }; 3853 const char * const replay_usage[] = { 3854 "perf sched replay [<options>]", 3855 NULL 3856 }; 3857 const char * const map_usage[] = { 3858 "perf sched map [<options>]", 3859 NULL 3860 }; 3861 const char * const timehist_usage[] = { 3862 "perf sched timehist [<options>]", 3863 NULL 3864 }; 3865 const char *const sched_subcommands[] = { "record", "latency", "map", 3866 "replay", "script", 3867 "timehist", NULL }; 3868 const char *sched_usage[] = { 3869 NULL, 3870 NULL 3871 }; 3872 struct trace_sched_handler lat_ops = { 3873 .wakeup_event = latency_wakeup_event, 3874 .switch_event = latency_switch_event, 3875 .runtime_event = latency_runtime_event, 3876 .migrate_task_event = latency_migrate_task_event, 3877 }; 3878 struct trace_sched_handler map_ops = { 3879 .switch_event = map_switch_event, 3880 }; 3881 struct trace_sched_handler replay_ops = { 3882 .wakeup_event = replay_wakeup_event, 3883 .switch_event = replay_switch_event, 3884 .fork_event = replay_fork_event, 3885 }; 3886 int ret; 3887 3888 perf_tool__init(&sched.tool, /*ordered_events=*/true); 3889 sched.tool.sample = perf_sched__process_tracepoint_sample; 3890 sched.tool.comm = perf_sched__process_comm; 3891 sched.tool.namespaces = perf_event__process_namespaces; 3892 sched.tool.lost = perf_event__process_lost; 3893 sched.tool.fork = perf_sched__process_fork_event; 3894 3895 argc = parse_options_subcommand(argc, argv, sched_options, sched_subcommands, 3896 sched_usage, PARSE_OPT_STOP_AT_NON_OPTION); 3897 if (!argc) 3898 usage_with_options(sched_usage, sched_options); 3899 3900 /* 3901 * Aliased to 'perf script' for now: 3902 */ 3903 if (!strcmp(argv[0], "script")) { 3904 return cmd_script(argc, argv); 3905 } else if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) { 3906 return __cmd_record(argc, argv); 3907 } else if (strlen(argv[0]) > 2 && strstarts("latency", argv[0])) { 3908 sched.tp_handler = &lat_ops; 3909 if (argc > 1) { 3910 argc = parse_options(argc, argv, latency_options, latency_usage, 0); 3911 if (argc) 3912 usage_with_options(latency_usage, latency_options); 3913 } 3914 setup_sorting(&sched, latency_options, latency_usage); 3915 return perf_sched__lat(&sched); 3916 } else if (!strcmp(argv[0], "map")) { 3917 if (argc) { 3918 argc = parse_options(argc, argv, map_options, map_usage, 0); 3919 if (argc) 3920 usage_with_options(map_usage, map_options); 3921 3922 if (sched.map.task_name) { 3923 sched.map.task_names = strlist__new(sched.map.task_name, NULL); 3924 if (sched.map.task_names == NULL) { 3925 fprintf(stderr, "Failed to parse task names\n"); 3926 return -1; 3927 } 3928 } 3929 } 3930 sched.tp_handler = &map_ops; 3931 setup_sorting(&sched, latency_options, latency_usage); 3932 return perf_sched__map(&sched); 3933 } else if (strlen(argv[0]) > 2 && strstarts("replay", argv[0])) { 3934 sched.tp_handler = &replay_ops; 3935 if (argc) { 3936 argc = parse_options(argc, argv, replay_options, replay_usage, 0); 3937 if (argc) 3938 usage_with_options(replay_usage, replay_options); 3939 } 3940 return perf_sched__replay(&sched); 3941 } else if (!strcmp(argv[0], "timehist")) { 3942 if (argc) { 3943 argc = parse_options(argc, argv, timehist_options, 3944 timehist_usage, 0); 3945 if (argc) 3946 usage_with_options(timehist_usage, timehist_options); 3947 } 3948 if ((sched.show_wakeups || sched.show_next) && 3949 sched.summary_only) { 3950 pr_err(" Error: -s and -[n|w] are mutually exclusive.\n"); 3951 parse_options_usage(timehist_usage, timehist_options, "s", true); 3952 if (sched.show_wakeups) 3953 parse_options_usage(NULL, timehist_options, "w", true); 3954 if (sched.show_next) 3955 parse_options_usage(NULL, timehist_options, "n", true); 3956 return -EINVAL; 3957 } 3958 ret = symbol__validate_sym_arguments(); 3959 if (ret) 3960 return ret; 3961 3962 return perf_sched__timehist(&sched); 3963 } else { 3964 usage_with_options(sched_usage, sched_options); 3965 } 3966 3967 /* free usage string allocated by parse_options_subcommand */ 3968 free((void *)sched_usage[0]); 3969 3970 return 0; 3971 } 3972