1 // SPDX-License-Identifier: GPL-2.0 2 #include "builtin.h" 3 #include "perf.h" 4 #include "perf-sys.h" 5 6 #include "util/cpumap.h" 7 #include "util/evlist.h" 8 #include "util/evsel.h" 9 #include "util/evsel_fprintf.h" 10 #include "util/mutex.h" 11 #include "util/symbol.h" 12 #include "util/thread.h" 13 #include "util/header.h" 14 #include "util/session.h" 15 #include "util/tool.h" 16 #include "util/cloexec.h" 17 #include "util/thread_map.h" 18 #include "util/color.h" 19 #include "util/stat.h" 20 #include "util/string2.h" 21 #include "util/callchain.h" 22 #include "util/time-utils.h" 23 24 #include <subcmd/pager.h> 25 #include <subcmd/parse-options.h> 26 #include "util/trace-event.h" 27 28 #include "util/debug.h" 29 #include "util/event.h" 30 #include "util/util.h" 31 #include "util/synthetic-events.h" 32 #include "util/target.h" 33 34 #include <linux/kernel.h> 35 #include <linux/log2.h> 36 #include <linux/zalloc.h> 37 #include <sys/prctl.h> 38 #include <sys/resource.h> 39 #include <inttypes.h> 40 41 #include <errno.h> 42 #include <semaphore.h> 43 #include <pthread.h> 44 #include <math.h> 45 #include <api/fs/fs.h> 46 #include <perf/cpumap.h> 47 #include <linux/time64.h> 48 #include <linux/err.h> 49 50 #include <linux/ctype.h> 51 52 #define PR_SET_NAME 15 /* Set process name */ 53 #define MAX_CPUS 4096 54 #define COMM_LEN 20 55 #define SYM_LEN 129 56 #define MAX_PID 1024000 57 #define MAX_PRIO 140 58 #define SEP_LEN 100 59 60 static const char *cpu_list; 61 static struct perf_cpu_map *user_requested_cpus; 62 static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); 63 64 struct sched_atom; 65 66 struct task_desc { 67 unsigned long nr; 68 unsigned long pid; 69 char comm[COMM_LEN]; 70 71 unsigned long nr_events; 72 unsigned long curr_event; 73 struct sched_atom **atoms; 74 75 pthread_t thread; 76 77 sem_t ready_for_work; 78 sem_t work_done_sem; 79 80 u64 cpu_usage; 81 }; 82 83 enum sched_event_type { 84 SCHED_EVENT_RUN, 85 SCHED_EVENT_SLEEP, 86 SCHED_EVENT_WAKEUP, 87 }; 88 89 struct sched_atom { 90 enum sched_event_type type; 91 u64 timestamp; 92 u64 duration; 93 unsigned long nr; 94 sem_t *wait_sem; 95 struct task_desc *wakee; 96 }; 97 98 enum thread_state { 99 THREAD_SLEEPING = 0, 100 THREAD_WAIT_CPU, 101 THREAD_SCHED_IN, 102 THREAD_IGNORE 103 }; 104 105 struct work_atom { 106 struct list_head list; 107 enum thread_state state; 108 u64 sched_out_time; 109 u64 wake_up_time; 110 u64 sched_in_time; 111 u64 runtime; 112 }; 113 114 struct work_atoms { 115 struct list_head work_list; 116 struct thread *thread; 117 struct rb_node node; 118 u64 max_lat; 119 u64 max_lat_start; 120 u64 max_lat_end; 121 u64 total_lat; 122 u64 nb_atoms; 123 u64 total_runtime; 124 int num_merged; 125 }; 126 127 typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *); 128 129 struct perf_sched; 130 131 struct trace_sched_handler { 132 int (*switch_event)(struct perf_sched *sched, struct evsel *evsel, 133 struct perf_sample *sample, struct machine *machine); 134 135 int (*runtime_event)(struct perf_sched *sched, struct evsel *evsel, 136 struct perf_sample *sample, struct machine *machine); 137 138 int (*wakeup_event)(struct perf_sched *sched, struct evsel *evsel, 139 struct perf_sample *sample, struct machine *machine); 140 141 /* PERF_RECORD_FORK event, not sched_process_fork tracepoint */ 142 int (*fork_event)(struct perf_sched *sched, union perf_event *event, 143 struct machine *machine); 144 145 int (*migrate_task_event)(struct perf_sched *sched, 146 struct evsel *evsel, 147 struct perf_sample *sample, 148 struct machine *machine); 149 }; 150 151 #define COLOR_PIDS PERF_COLOR_BLUE 152 #define COLOR_CPUS PERF_COLOR_BG_RED 153 154 struct perf_sched_map { 155 DECLARE_BITMAP(comp_cpus_mask, MAX_CPUS); 156 struct perf_cpu *comp_cpus; 157 bool comp; 158 struct perf_thread_map *color_pids; 159 const char *color_pids_str; 160 struct perf_cpu_map *color_cpus; 161 const char *color_cpus_str; 162 const char *task_name; 163 struct strlist *task_names; 164 bool fuzzy; 165 struct perf_cpu_map *cpus; 166 const char *cpus_str; 167 }; 168 169 struct perf_sched { 170 struct perf_tool tool; 171 const char *sort_order; 172 unsigned long nr_tasks; 173 struct task_desc **pid_to_task; 174 struct task_desc **tasks; 175 const struct trace_sched_handler *tp_handler; 176 struct mutex start_work_mutex; 177 struct mutex work_done_wait_mutex; 178 int profile_cpu; 179 /* 180 * Track the current task - that way we can know whether there's any 181 * weird events, such as a task being switched away that is not current. 182 */ 183 struct perf_cpu max_cpu; 184 u32 *curr_pid; 185 struct thread **curr_thread; 186 struct thread **curr_out_thread; 187 char next_shortname1; 188 char next_shortname2; 189 unsigned int replay_repeat; 190 unsigned long nr_run_events; 191 unsigned long nr_sleep_events; 192 unsigned long nr_wakeup_events; 193 unsigned long nr_sleep_corrections; 194 unsigned long nr_run_events_optimized; 195 unsigned long targetless_wakeups; 196 unsigned long multitarget_wakeups; 197 unsigned long nr_runs; 198 unsigned long nr_timestamps; 199 unsigned long nr_unordered_timestamps; 200 unsigned long nr_context_switch_bugs; 201 unsigned long nr_events; 202 unsigned long nr_lost_chunks; 203 unsigned long nr_lost_events; 204 u64 run_measurement_overhead; 205 u64 sleep_measurement_overhead; 206 u64 start_time; 207 u64 cpu_usage; 208 u64 runavg_cpu_usage; 209 u64 parent_cpu_usage; 210 u64 runavg_parent_cpu_usage; 211 u64 sum_runtime; 212 u64 sum_fluct; 213 u64 run_avg; 214 u64 all_runtime; 215 u64 all_count; 216 u64 *cpu_last_switched; 217 struct rb_root_cached atom_root, sorted_atom_root, merged_atom_root; 218 struct list_head sort_list, cmp_pid; 219 bool force; 220 bool skip_merge; 221 struct perf_sched_map map; 222 223 /* options for timehist command */ 224 bool summary; 225 bool summary_only; 226 bool idle_hist; 227 bool show_callchain; 228 unsigned int max_stack; 229 bool show_cpu_visual; 230 bool show_wakeups; 231 bool show_next; 232 bool show_migrations; 233 bool pre_migrations; 234 bool show_state; 235 bool show_prio; 236 u64 skipped_samples; 237 const char *time_str; 238 struct perf_time_interval ptime; 239 struct perf_time_interval hist_time; 240 volatile bool thread_funcs_exit; 241 const char *prio_str; 242 DECLARE_BITMAP(prio_bitmap, MAX_PRIO); 243 244 struct perf_session *session; 245 struct perf_data *data; 246 }; 247 248 /* per thread run time data */ 249 struct thread_runtime { 250 u64 last_time; /* time of previous sched in/out event */ 251 u64 dt_run; /* run time */ 252 u64 dt_sleep; /* time between CPU access by sleep (off cpu) */ 253 u64 dt_iowait; /* time between CPU access by iowait (off cpu) */ 254 u64 dt_preempt; /* time between CPU access by preempt (off cpu) */ 255 u64 dt_delay; /* time between wakeup and sched-in */ 256 u64 dt_pre_mig; /* time between migration and wakeup */ 257 u64 ready_to_run; /* time of wakeup */ 258 u64 migrated; /* time when a thread is migrated */ 259 260 struct stats run_stats; 261 u64 total_run_time; 262 u64 total_sleep_time; 263 u64 total_iowait_time; 264 u64 total_preempt_time; 265 u64 total_delay_time; 266 u64 total_pre_mig_time; 267 268 char last_state; 269 270 char shortname[3]; 271 bool comm_changed; 272 273 u64 migrations; 274 275 int prio; 276 }; 277 278 /* per event run time data */ 279 struct evsel_runtime { 280 u64 *last_time; /* time this event was last seen per cpu */ 281 u32 ncpu; /* highest cpu slot allocated */ 282 }; 283 284 /* per cpu idle time data */ 285 struct idle_thread_runtime { 286 struct thread_runtime tr; 287 struct thread *last_thread; 288 struct rb_root_cached sorted_root; 289 struct callchain_root callchain; 290 struct callchain_cursor cursor; 291 }; 292 293 /* track idle times per cpu */ 294 static struct thread **idle_threads; 295 static int idle_max_cpu; 296 static char idle_comm[] = "<idle>"; 297 298 static u64 get_nsecs(void) 299 { 300 struct timespec ts; 301 302 clock_gettime(CLOCK_MONOTONIC, &ts); 303 304 return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; 305 } 306 307 static void burn_nsecs(struct perf_sched *sched, u64 nsecs) 308 { 309 u64 T0 = get_nsecs(), T1; 310 311 do { 312 T1 = get_nsecs(); 313 } while (T1 + sched->run_measurement_overhead < T0 + nsecs); 314 } 315 316 static void sleep_nsecs(u64 nsecs) 317 { 318 struct timespec ts; 319 320 ts.tv_nsec = nsecs % 999999999; 321 ts.tv_sec = nsecs / 999999999; 322 323 nanosleep(&ts, NULL); 324 } 325 326 static void calibrate_run_measurement_overhead(struct perf_sched *sched) 327 { 328 u64 T0, T1, delta, min_delta = NSEC_PER_SEC; 329 int i; 330 331 for (i = 0; i < 10; i++) { 332 T0 = get_nsecs(); 333 burn_nsecs(sched, 0); 334 T1 = get_nsecs(); 335 delta = T1-T0; 336 min_delta = min(min_delta, delta); 337 } 338 sched->run_measurement_overhead = min_delta; 339 340 printf("run measurement overhead: %" PRIu64 " nsecs\n", min_delta); 341 } 342 343 static void calibrate_sleep_measurement_overhead(struct perf_sched *sched) 344 { 345 u64 T0, T1, delta, min_delta = NSEC_PER_SEC; 346 int i; 347 348 for (i = 0; i < 10; i++) { 349 T0 = get_nsecs(); 350 sleep_nsecs(10000); 351 T1 = get_nsecs(); 352 delta = T1-T0; 353 min_delta = min(min_delta, delta); 354 } 355 min_delta -= 10000; 356 sched->sleep_measurement_overhead = min_delta; 357 358 printf("sleep measurement overhead: %" PRIu64 " nsecs\n", min_delta); 359 } 360 361 static struct sched_atom * 362 get_new_event(struct task_desc *task, u64 timestamp) 363 { 364 struct sched_atom *event = zalloc(sizeof(*event)); 365 unsigned long idx = task->nr_events; 366 size_t size; 367 368 event->timestamp = timestamp; 369 event->nr = idx; 370 371 task->nr_events++; 372 size = sizeof(struct sched_atom *) * task->nr_events; 373 task->atoms = realloc(task->atoms, size); 374 BUG_ON(!task->atoms); 375 376 task->atoms[idx] = event; 377 378 return event; 379 } 380 381 static struct sched_atom *last_event(struct task_desc *task) 382 { 383 if (!task->nr_events) 384 return NULL; 385 386 return task->atoms[task->nr_events - 1]; 387 } 388 389 static void add_sched_event_run(struct perf_sched *sched, struct task_desc *task, 390 u64 timestamp, u64 duration) 391 { 392 struct sched_atom *event, *curr_event = last_event(task); 393 394 /* 395 * optimize an existing RUN event by merging this one 396 * to it: 397 */ 398 if (curr_event && curr_event->type == SCHED_EVENT_RUN) { 399 sched->nr_run_events_optimized++; 400 curr_event->duration += duration; 401 return; 402 } 403 404 event = get_new_event(task, timestamp); 405 406 event->type = SCHED_EVENT_RUN; 407 event->duration = duration; 408 409 sched->nr_run_events++; 410 } 411 412 static void add_sched_event_wakeup(struct perf_sched *sched, struct task_desc *task, 413 u64 timestamp, struct task_desc *wakee) 414 { 415 struct sched_atom *event, *wakee_event; 416 417 event = get_new_event(task, timestamp); 418 event->type = SCHED_EVENT_WAKEUP; 419 event->wakee = wakee; 420 421 wakee_event = last_event(wakee); 422 if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) { 423 sched->targetless_wakeups++; 424 return; 425 } 426 if (wakee_event->wait_sem) { 427 sched->multitarget_wakeups++; 428 return; 429 } 430 431 wakee_event->wait_sem = zalloc(sizeof(*wakee_event->wait_sem)); 432 sem_init(wakee_event->wait_sem, 0, 0); 433 event->wait_sem = wakee_event->wait_sem; 434 435 sched->nr_wakeup_events++; 436 } 437 438 static void add_sched_event_sleep(struct perf_sched *sched, struct task_desc *task, 439 u64 timestamp) 440 { 441 struct sched_atom *event = get_new_event(task, timestamp); 442 443 event->type = SCHED_EVENT_SLEEP; 444 445 sched->nr_sleep_events++; 446 } 447 448 static struct task_desc *register_pid(struct perf_sched *sched, 449 unsigned long pid, const char *comm) 450 { 451 struct task_desc *task; 452 static int pid_max; 453 454 if (sched->pid_to_task == NULL) { 455 if (sysctl__read_int("kernel/pid_max", &pid_max) < 0) 456 pid_max = MAX_PID; 457 BUG_ON((sched->pid_to_task = calloc(pid_max, sizeof(struct task_desc *))) == NULL); 458 } 459 if (pid >= (unsigned long)pid_max) { 460 BUG_ON((sched->pid_to_task = realloc(sched->pid_to_task, (pid + 1) * 461 sizeof(struct task_desc *))) == NULL); 462 while (pid >= (unsigned long)pid_max) 463 sched->pid_to_task[pid_max++] = NULL; 464 } 465 466 task = sched->pid_to_task[pid]; 467 468 if (task) 469 return task; 470 471 task = zalloc(sizeof(*task)); 472 task->pid = pid; 473 task->nr = sched->nr_tasks; 474 strcpy(task->comm, comm); 475 /* 476 * every task starts in sleeping state - this gets ignored 477 * if there's no wakeup pointing to this sleep state: 478 */ 479 add_sched_event_sleep(sched, task, 0); 480 481 sched->pid_to_task[pid] = task; 482 sched->nr_tasks++; 483 sched->tasks = realloc(sched->tasks, sched->nr_tasks * sizeof(struct task_desc *)); 484 BUG_ON(!sched->tasks); 485 sched->tasks[task->nr] = task; 486 487 if (verbose > 0) 488 printf("registered task #%ld, PID %ld (%s)\n", sched->nr_tasks, pid, comm); 489 490 return task; 491 } 492 493 494 static void print_task_traces(struct perf_sched *sched) 495 { 496 struct task_desc *task; 497 unsigned long i; 498 499 for (i = 0; i < sched->nr_tasks; i++) { 500 task = sched->tasks[i]; 501 printf("task %6ld (%20s:%10ld), nr_events: %ld\n", 502 task->nr, task->comm, task->pid, task->nr_events); 503 } 504 } 505 506 static void add_cross_task_wakeups(struct perf_sched *sched) 507 { 508 struct task_desc *task1, *task2; 509 unsigned long i, j; 510 511 for (i = 0; i < sched->nr_tasks; i++) { 512 task1 = sched->tasks[i]; 513 j = i + 1; 514 if (j == sched->nr_tasks) 515 j = 0; 516 task2 = sched->tasks[j]; 517 add_sched_event_wakeup(sched, task1, 0, task2); 518 } 519 } 520 521 static void perf_sched__process_event(struct perf_sched *sched, 522 struct sched_atom *atom) 523 { 524 int ret = 0; 525 526 switch (atom->type) { 527 case SCHED_EVENT_RUN: 528 burn_nsecs(sched, atom->duration); 529 break; 530 case SCHED_EVENT_SLEEP: 531 if (atom->wait_sem) 532 ret = sem_wait(atom->wait_sem); 533 BUG_ON(ret); 534 break; 535 case SCHED_EVENT_WAKEUP: 536 if (atom->wait_sem) 537 ret = sem_post(atom->wait_sem); 538 BUG_ON(ret); 539 break; 540 default: 541 BUG_ON(1); 542 } 543 } 544 545 static u64 get_cpu_usage_nsec_parent(void) 546 { 547 struct rusage ru; 548 u64 sum; 549 int err; 550 551 err = getrusage(RUSAGE_SELF, &ru); 552 BUG_ON(err); 553 554 sum = ru.ru_utime.tv_sec * NSEC_PER_SEC + ru.ru_utime.tv_usec * NSEC_PER_USEC; 555 sum += ru.ru_stime.tv_sec * NSEC_PER_SEC + ru.ru_stime.tv_usec * NSEC_PER_USEC; 556 557 return sum; 558 } 559 560 static int self_open_counters(struct perf_sched *sched, unsigned long cur_task) 561 { 562 struct perf_event_attr attr; 563 char sbuf[STRERR_BUFSIZE], info[STRERR_BUFSIZE]; 564 int fd; 565 struct rlimit limit; 566 bool need_privilege = false; 567 568 memset(&attr, 0, sizeof(attr)); 569 570 attr.type = PERF_TYPE_SOFTWARE; 571 attr.config = PERF_COUNT_SW_TASK_CLOCK; 572 573 force_again: 574 fd = sys_perf_event_open(&attr, 0, -1, -1, 575 perf_event_open_cloexec_flag()); 576 577 if (fd < 0) { 578 if (errno == EMFILE) { 579 if (sched->force) { 580 BUG_ON(getrlimit(RLIMIT_NOFILE, &limit) == -1); 581 limit.rlim_cur += sched->nr_tasks - cur_task; 582 if (limit.rlim_cur > limit.rlim_max) { 583 limit.rlim_max = limit.rlim_cur; 584 need_privilege = true; 585 } 586 if (setrlimit(RLIMIT_NOFILE, &limit) == -1) { 587 if (need_privilege && errno == EPERM) 588 strcpy(info, "Need privilege\n"); 589 } else 590 goto force_again; 591 } else 592 strcpy(info, "Have a try with -f option\n"); 593 } 594 pr_err("Error: sys_perf_event_open() syscall returned " 595 "with %d (%s)\n%s", fd, 596 str_error_r(errno, sbuf, sizeof(sbuf)), info); 597 exit(EXIT_FAILURE); 598 } 599 return fd; 600 } 601 602 static u64 get_cpu_usage_nsec_self(int fd) 603 { 604 u64 runtime; 605 int ret; 606 607 ret = read(fd, &runtime, sizeof(runtime)); 608 BUG_ON(ret != sizeof(runtime)); 609 610 return runtime; 611 } 612 613 struct sched_thread_parms { 614 struct task_desc *task; 615 struct perf_sched *sched; 616 int fd; 617 }; 618 619 static void *thread_func(void *ctx) 620 { 621 struct sched_thread_parms *parms = ctx; 622 struct task_desc *this_task = parms->task; 623 struct perf_sched *sched = parms->sched; 624 u64 cpu_usage_0, cpu_usage_1; 625 unsigned long i, ret; 626 char comm2[22]; 627 int fd = parms->fd; 628 629 zfree(&parms); 630 631 sprintf(comm2, ":%s", this_task->comm); 632 prctl(PR_SET_NAME, comm2); 633 if (fd < 0) 634 return NULL; 635 636 while (!sched->thread_funcs_exit) { 637 ret = sem_post(&this_task->ready_for_work); 638 BUG_ON(ret); 639 mutex_lock(&sched->start_work_mutex); 640 mutex_unlock(&sched->start_work_mutex); 641 642 cpu_usage_0 = get_cpu_usage_nsec_self(fd); 643 644 for (i = 0; i < this_task->nr_events; i++) { 645 this_task->curr_event = i; 646 perf_sched__process_event(sched, this_task->atoms[i]); 647 } 648 649 cpu_usage_1 = get_cpu_usage_nsec_self(fd); 650 this_task->cpu_usage = cpu_usage_1 - cpu_usage_0; 651 ret = sem_post(&this_task->work_done_sem); 652 BUG_ON(ret); 653 654 mutex_lock(&sched->work_done_wait_mutex); 655 mutex_unlock(&sched->work_done_wait_mutex); 656 } 657 return NULL; 658 } 659 660 static void create_tasks(struct perf_sched *sched) 661 EXCLUSIVE_LOCK_FUNCTION(sched->start_work_mutex) 662 EXCLUSIVE_LOCK_FUNCTION(sched->work_done_wait_mutex) 663 { 664 struct task_desc *task; 665 pthread_attr_t attr; 666 unsigned long i; 667 int err; 668 669 err = pthread_attr_init(&attr); 670 BUG_ON(err); 671 err = pthread_attr_setstacksize(&attr, 672 (size_t) max(16 * 1024, (int)PTHREAD_STACK_MIN)); 673 BUG_ON(err); 674 mutex_lock(&sched->start_work_mutex); 675 mutex_lock(&sched->work_done_wait_mutex); 676 for (i = 0; i < sched->nr_tasks; i++) { 677 struct sched_thread_parms *parms = malloc(sizeof(*parms)); 678 BUG_ON(parms == NULL); 679 parms->task = task = sched->tasks[i]; 680 parms->sched = sched; 681 parms->fd = self_open_counters(sched, i); 682 sem_init(&task->ready_for_work, 0, 0); 683 sem_init(&task->work_done_sem, 0, 0); 684 task->curr_event = 0; 685 err = pthread_create(&task->thread, &attr, thread_func, parms); 686 BUG_ON(err); 687 } 688 } 689 690 static void destroy_tasks(struct perf_sched *sched) 691 UNLOCK_FUNCTION(sched->start_work_mutex) 692 UNLOCK_FUNCTION(sched->work_done_wait_mutex) 693 { 694 struct task_desc *task; 695 unsigned long i; 696 int err; 697 698 mutex_unlock(&sched->start_work_mutex); 699 mutex_unlock(&sched->work_done_wait_mutex); 700 /* Get rid of threads so they won't be upset by mutex destrunction */ 701 for (i = 0; i < sched->nr_tasks; i++) { 702 task = sched->tasks[i]; 703 err = pthread_join(task->thread, NULL); 704 BUG_ON(err); 705 sem_destroy(&task->ready_for_work); 706 sem_destroy(&task->work_done_sem); 707 } 708 } 709 710 static void wait_for_tasks(struct perf_sched *sched) 711 EXCLUSIVE_LOCKS_REQUIRED(sched->work_done_wait_mutex) 712 EXCLUSIVE_LOCKS_REQUIRED(sched->start_work_mutex) 713 { 714 u64 cpu_usage_0, cpu_usage_1; 715 struct task_desc *task; 716 unsigned long i, ret; 717 718 sched->start_time = get_nsecs(); 719 sched->cpu_usage = 0; 720 mutex_unlock(&sched->work_done_wait_mutex); 721 722 for (i = 0; i < sched->nr_tasks; i++) { 723 task = sched->tasks[i]; 724 ret = sem_wait(&task->ready_for_work); 725 BUG_ON(ret); 726 sem_init(&task->ready_for_work, 0, 0); 727 } 728 mutex_lock(&sched->work_done_wait_mutex); 729 730 cpu_usage_0 = get_cpu_usage_nsec_parent(); 731 732 mutex_unlock(&sched->start_work_mutex); 733 734 for (i = 0; i < sched->nr_tasks; i++) { 735 task = sched->tasks[i]; 736 ret = sem_wait(&task->work_done_sem); 737 BUG_ON(ret); 738 sem_init(&task->work_done_sem, 0, 0); 739 sched->cpu_usage += task->cpu_usage; 740 task->cpu_usage = 0; 741 } 742 743 cpu_usage_1 = get_cpu_usage_nsec_parent(); 744 if (!sched->runavg_cpu_usage) 745 sched->runavg_cpu_usage = sched->cpu_usage; 746 sched->runavg_cpu_usage = (sched->runavg_cpu_usage * (sched->replay_repeat - 1) + sched->cpu_usage) / sched->replay_repeat; 747 748 sched->parent_cpu_usage = cpu_usage_1 - cpu_usage_0; 749 if (!sched->runavg_parent_cpu_usage) 750 sched->runavg_parent_cpu_usage = sched->parent_cpu_usage; 751 sched->runavg_parent_cpu_usage = (sched->runavg_parent_cpu_usage * (sched->replay_repeat - 1) + 752 sched->parent_cpu_usage)/sched->replay_repeat; 753 754 mutex_lock(&sched->start_work_mutex); 755 756 for (i = 0; i < sched->nr_tasks; i++) { 757 task = sched->tasks[i]; 758 task->curr_event = 0; 759 } 760 } 761 762 static void run_one_test(struct perf_sched *sched) 763 EXCLUSIVE_LOCKS_REQUIRED(sched->work_done_wait_mutex) 764 EXCLUSIVE_LOCKS_REQUIRED(sched->start_work_mutex) 765 { 766 u64 T0, T1, delta, avg_delta, fluct; 767 768 T0 = get_nsecs(); 769 wait_for_tasks(sched); 770 T1 = get_nsecs(); 771 772 delta = T1 - T0; 773 sched->sum_runtime += delta; 774 sched->nr_runs++; 775 776 avg_delta = sched->sum_runtime / sched->nr_runs; 777 if (delta < avg_delta) 778 fluct = avg_delta - delta; 779 else 780 fluct = delta - avg_delta; 781 sched->sum_fluct += fluct; 782 if (!sched->run_avg) 783 sched->run_avg = delta; 784 sched->run_avg = (sched->run_avg * (sched->replay_repeat - 1) + delta) / sched->replay_repeat; 785 786 printf("#%-3ld: %0.3f, ", sched->nr_runs, (double)delta / NSEC_PER_MSEC); 787 788 printf("ravg: %0.2f, ", (double)sched->run_avg / NSEC_PER_MSEC); 789 790 printf("cpu: %0.2f / %0.2f", 791 (double)sched->cpu_usage / NSEC_PER_MSEC, (double)sched->runavg_cpu_usage / NSEC_PER_MSEC); 792 793 #if 0 794 /* 795 * rusage statistics done by the parent, these are less 796 * accurate than the sched->sum_exec_runtime based statistics: 797 */ 798 printf(" [%0.2f / %0.2f]", 799 (double)sched->parent_cpu_usage / NSEC_PER_MSEC, 800 (double)sched->runavg_parent_cpu_usage / NSEC_PER_MSEC); 801 #endif 802 803 printf("\n"); 804 805 if (sched->nr_sleep_corrections) 806 printf(" (%ld sleep corrections)\n", sched->nr_sleep_corrections); 807 sched->nr_sleep_corrections = 0; 808 } 809 810 static void test_calibrations(struct perf_sched *sched) 811 { 812 u64 T0, T1; 813 814 T0 = get_nsecs(); 815 burn_nsecs(sched, NSEC_PER_MSEC); 816 T1 = get_nsecs(); 817 818 printf("the run test took %" PRIu64 " nsecs\n", T1 - T0); 819 820 T0 = get_nsecs(); 821 sleep_nsecs(NSEC_PER_MSEC); 822 T1 = get_nsecs(); 823 824 printf("the sleep test took %" PRIu64 " nsecs\n", T1 - T0); 825 } 826 827 static int 828 replay_wakeup_event(struct perf_sched *sched, 829 struct evsel *evsel, struct perf_sample *sample, 830 struct machine *machine __maybe_unused) 831 { 832 const char *comm = evsel__strval(evsel, sample, "comm"); 833 const u32 pid = evsel__intval(evsel, sample, "pid"); 834 struct task_desc *waker, *wakee; 835 836 if (verbose > 0) { 837 printf("sched_wakeup event %p\n", evsel); 838 839 printf(" ... pid %d woke up %s/%d\n", sample->tid, comm, pid); 840 } 841 842 waker = register_pid(sched, sample->tid, "<unknown>"); 843 wakee = register_pid(sched, pid, comm); 844 845 add_sched_event_wakeup(sched, waker, sample->time, wakee); 846 return 0; 847 } 848 849 static int replay_switch_event(struct perf_sched *sched, 850 struct evsel *evsel, 851 struct perf_sample *sample, 852 struct machine *machine __maybe_unused) 853 { 854 const char *prev_comm = evsel__strval(evsel, sample, "prev_comm"), 855 *next_comm = evsel__strval(evsel, sample, "next_comm"); 856 const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"), 857 next_pid = evsel__intval(evsel, sample, "next_pid"); 858 struct task_desc *prev, __maybe_unused *next; 859 u64 timestamp0, timestamp = sample->time; 860 int cpu = sample->cpu; 861 s64 delta; 862 863 if (verbose > 0) 864 printf("sched_switch event %p\n", evsel); 865 866 if (cpu >= MAX_CPUS || cpu < 0) 867 return 0; 868 869 timestamp0 = sched->cpu_last_switched[cpu]; 870 if (timestamp0) 871 delta = timestamp - timestamp0; 872 else 873 delta = 0; 874 875 if (delta < 0) { 876 pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta); 877 return -1; 878 } 879 880 pr_debug(" ... switch from %s/%d to %s/%d [ran %" PRIu64 " nsecs]\n", 881 prev_comm, prev_pid, next_comm, next_pid, delta); 882 883 prev = register_pid(sched, prev_pid, prev_comm); 884 next = register_pid(sched, next_pid, next_comm); 885 886 sched->cpu_last_switched[cpu] = timestamp; 887 888 add_sched_event_run(sched, prev, timestamp, delta); 889 add_sched_event_sleep(sched, prev, timestamp); 890 891 return 0; 892 } 893 894 static int replay_fork_event(struct perf_sched *sched, 895 union perf_event *event, 896 struct machine *machine) 897 { 898 struct thread *child, *parent; 899 900 child = machine__findnew_thread(machine, event->fork.pid, 901 event->fork.tid); 902 parent = machine__findnew_thread(machine, event->fork.ppid, 903 event->fork.ptid); 904 905 if (child == NULL || parent == NULL) { 906 pr_debug("thread does not exist on fork event: child %p, parent %p\n", 907 child, parent); 908 goto out_put; 909 } 910 911 if (verbose > 0) { 912 printf("fork event\n"); 913 printf("... parent: %s/%d\n", thread__comm_str(parent), thread__tid(parent)); 914 printf("... child: %s/%d\n", thread__comm_str(child), thread__tid(child)); 915 } 916 917 register_pid(sched, thread__tid(parent), thread__comm_str(parent)); 918 register_pid(sched, thread__tid(child), thread__comm_str(child)); 919 out_put: 920 thread__put(child); 921 thread__put(parent); 922 return 0; 923 } 924 925 struct sort_dimension { 926 const char *name; 927 sort_fn_t cmp; 928 struct list_head list; 929 }; 930 931 static inline void init_prio(struct thread_runtime *r) 932 { 933 r->prio = -1; 934 } 935 936 /* 937 * handle runtime stats saved per thread 938 */ 939 static struct thread_runtime *thread__init_runtime(struct thread *thread) 940 { 941 struct thread_runtime *r; 942 943 r = zalloc(sizeof(struct thread_runtime)); 944 if (!r) 945 return NULL; 946 947 init_stats(&r->run_stats); 948 init_prio(r); 949 thread__set_priv(thread, r); 950 951 return r; 952 } 953 954 static struct thread_runtime *thread__get_runtime(struct thread *thread) 955 { 956 struct thread_runtime *tr; 957 958 tr = thread__priv(thread); 959 if (tr == NULL) { 960 tr = thread__init_runtime(thread); 961 if (tr == NULL) 962 pr_debug("Failed to malloc memory for runtime data.\n"); 963 } 964 965 return tr; 966 } 967 968 static int 969 thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r) 970 { 971 struct sort_dimension *sort; 972 int ret = 0; 973 974 BUG_ON(list_empty(list)); 975 976 list_for_each_entry(sort, list, list) { 977 ret = sort->cmp(l, r); 978 if (ret) 979 return ret; 980 } 981 982 return ret; 983 } 984 985 static struct work_atoms * 986 thread_atoms_search(struct rb_root_cached *root, struct thread *thread, 987 struct list_head *sort_list) 988 { 989 struct rb_node *node = root->rb_root.rb_node; 990 struct work_atoms key = { .thread = thread }; 991 992 while (node) { 993 struct work_atoms *atoms; 994 int cmp; 995 996 atoms = container_of(node, struct work_atoms, node); 997 998 cmp = thread_lat_cmp(sort_list, &key, atoms); 999 if (cmp > 0) 1000 node = node->rb_left; 1001 else if (cmp < 0) 1002 node = node->rb_right; 1003 else { 1004 BUG_ON(!RC_CHK_EQUAL(thread, atoms->thread)); 1005 return atoms; 1006 } 1007 } 1008 return NULL; 1009 } 1010 1011 static void 1012 __thread_latency_insert(struct rb_root_cached *root, struct work_atoms *data, 1013 struct list_head *sort_list) 1014 { 1015 struct rb_node **new = &(root->rb_root.rb_node), *parent = NULL; 1016 bool leftmost = true; 1017 1018 while (*new) { 1019 struct work_atoms *this; 1020 int cmp; 1021 1022 this = container_of(*new, struct work_atoms, node); 1023 parent = *new; 1024 1025 cmp = thread_lat_cmp(sort_list, data, this); 1026 1027 if (cmp > 0) 1028 new = &((*new)->rb_left); 1029 else { 1030 new = &((*new)->rb_right); 1031 leftmost = false; 1032 } 1033 } 1034 1035 rb_link_node(&data->node, parent, new); 1036 rb_insert_color_cached(&data->node, root, leftmost); 1037 } 1038 1039 static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread) 1040 { 1041 struct work_atoms *atoms = zalloc(sizeof(*atoms)); 1042 if (!atoms) { 1043 pr_err("No memory at %s\n", __func__); 1044 return -1; 1045 } 1046 1047 atoms->thread = thread__get(thread); 1048 INIT_LIST_HEAD(&atoms->work_list); 1049 __thread_latency_insert(&sched->atom_root, atoms, &sched->cmp_pid); 1050 return 0; 1051 } 1052 1053 static int 1054 add_sched_out_event(struct work_atoms *atoms, 1055 char run_state, 1056 u64 timestamp) 1057 { 1058 struct work_atom *atom = zalloc(sizeof(*atom)); 1059 if (!atom) { 1060 pr_err("Non memory at %s", __func__); 1061 return -1; 1062 } 1063 1064 atom->sched_out_time = timestamp; 1065 1066 if (run_state == 'R') { 1067 atom->state = THREAD_WAIT_CPU; 1068 atom->wake_up_time = atom->sched_out_time; 1069 } 1070 1071 list_add_tail(&atom->list, &atoms->work_list); 1072 return 0; 1073 } 1074 1075 static void 1076 add_runtime_event(struct work_atoms *atoms, u64 delta, 1077 u64 timestamp __maybe_unused) 1078 { 1079 struct work_atom *atom; 1080 1081 BUG_ON(list_empty(&atoms->work_list)); 1082 1083 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1084 1085 atom->runtime += delta; 1086 atoms->total_runtime += delta; 1087 } 1088 1089 static void 1090 add_sched_in_event(struct work_atoms *atoms, u64 timestamp) 1091 { 1092 struct work_atom *atom; 1093 u64 delta; 1094 1095 if (list_empty(&atoms->work_list)) 1096 return; 1097 1098 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1099 1100 if (atom->state != THREAD_WAIT_CPU) 1101 return; 1102 1103 if (timestamp < atom->wake_up_time) { 1104 atom->state = THREAD_IGNORE; 1105 return; 1106 } 1107 1108 atom->state = THREAD_SCHED_IN; 1109 atom->sched_in_time = timestamp; 1110 1111 delta = atom->sched_in_time - atom->wake_up_time; 1112 atoms->total_lat += delta; 1113 if (delta > atoms->max_lat) { 1114 atoms->max_lat = delta; 1115 atoms->max_lat_start = atom->wake_up_time; 1116 atoms->max_lat_end = timestamp; 1117 } 1118 atoms->nb_atoms++; 1119 } 1120 1121 static void free_work_atoms(struct work_atoms *atoms) 1122 { 1123 struct work_atom *atom, *tmp; 1124 1125 if (atoms == NULL) 1126 return; 1127 1128 list_for_each_entry_safe(atom, tmp, &atoms->work_list, list) { 1129 list_del(&atom->list); 1130 free(atom); 1131 } 1132 thread__zput(atoms->thread); 1133 free(atoms); 1134 } 1135 1136 static int latency_switch_event(struct perf_sched *sched, 1137 struct evsel *evsel, 1138 struct perf_sample *sample, 1139 struct machine *machine) 1140 { 1141 const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"), 1142 next_pid = evsel__intval(evsel, sample, "next_pid"); 1143 const char prev_state = evsel__taskstate(evsel, sample, "prev_state"); 1144 struct work_atoms *out_events, *in_events; 1145 struct thread *sched_out, *sched_in; 1146 u64 timestamp0, timestamp = sample->time; 1147 int cpu = sample->cpu, err = -1; 1148 s64 delta; 1149 1150 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1151 1152 timestamp0 = sched->cpu_last_switched[cpu]; 1153 sched->cpu_last_switched[cpu] = timestamp; 1154 if (timestamp0) 1155 delta = timestamp - timestamp0; 1156 else 1157 delta = 0; 1158 1159 if (delta < 0) { 1160 pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1161 return -1; 1162 } 1163 1164 sched_out = machine__findnew_thread(machine, -1, prev_pid); 1165 sched_in = machine__findnew_thread(machine, -1, next_pid); 1166 if (sched_out == NULL || sched_in == NULL) 1167 goto out_put; 1168 1169 out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid); 1170 if (!out_events) { 1171 if (thread_atoms_insert(sched, sched_out)) 1172 goto out_put; 1173 out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid); 1174 if (!out_events) { 1175 pr_err("out-event: Internal tree error"); 1176 goto out_put; 1177 } 1178 } 1179 if (add_sched_out_event(out_events, prev_state, timestamp)) 1180 return -1; 1181 1182 in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid); 1183 if (!in_events) { 1184 if (thread_atoms_insert(sched, sched_in)) 1185 goto out_put; 1186 in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid); 1187 if (!in_events) { 1188 pr_err("in-event: Internal tree error"); 1189 goto out_put; 1190 } 1191 /* 1192 * Take came in we have not heard about yet, 1193 * add in an initial atom in runnable state: 1194 */ 1195 if (add_sched_out_event(in_events, 'R', timestamp)) 1196 goto out_put; 1197 } 1198 add_sched_in_event(in_events, timestamp); 1199 err = 0; 1200 out_put: 1201 thread__put(sched_out); 1202 thread__put(sched_in); 1203 return err; 1204 } 1205 1206 static int latency_runtime_event(struct perf_sched *sched, 1207 struct evsel *evsel, 1208 struct perf_sample *sample, 1209 struct machine *machine) 1210 { 1211 const u32 pid = evsel__intval(evsel, sample, "pid"); 1212 const u64 runtime = evsel__intval(evsel, sample, "runtime"); 1213 struct thread *thread = machine__findnew_thread(machine, -1, pid); 1214 struct work_atoms *atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); 1215 u64 timestamp = sample->time; 1216 int cpu = sample->cpu, err = -1; 1217 1218 if (thread == NULL) 1219 return -1; 1220 1221 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1222 if (!atoms) { 1223 if (thread_atoms_insert(sched, thread)) 1224 goto out_put; 1225 atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); 1226 if (!atoms) { 1227 pr_err("in-event: Internal tree error"); 1228 goto out_put; 1229 } 1230 if (add_sched_out_event(atoms, 'R', timestamp)) 1231 goto out_put; 1232 } 1233 1234 add_runtime_event(atoms, runtime, timestamp); 1235 err = 0; 1236 out_put: 1237 thread__put(thread); 1238 return err; 1239 } 1240 1241 static int latency_wakeup_event(struct perf_sched *sched, 1242 struct evsel *evsel, 1243 struct perf_sample *sample, 1244 struct machine *machine) 1245 { 1246 const u32 pid = evsel__intval(evsel, sample, "pid"); 1247 struct work_atoms *atoms; 1248 struct work_atom *atom; 1249 struct thread *wakee; 1250 u64 timestamp = sample->time; 1251 int err = -1; 1252 1253 wakee = machine__findnew_thread(machine, -1, pid); 1254 if (wakee == NULL) 1255 return -1; 1256 atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid); 1257 if (!atoms) { 1258 if (thread_atoms_insert(sched, wakee)) 1259 goto out_put; 1260 atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid); 1261 if (!atoms) { 1262 pr_err("wakeup-event: Internal tree error"); 1263 goto out_put; 1264 } 1265 if (add_sched_out_event(atoms, 'S', timestamp)) 1266 goto out_put; 1267 } 1268 1269 BUG_ON(list_empty(&atoms->work_list)); 1270 1271 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1272 1273 /* 1274 * As we do not guarantee the wakeup event happens when 1275 * task is out of run queue, also may happen when task is 1276 * on run queue and wakeup only change ->state to TASK_RUNNING, 1277 * then we should not set the ->wake_up_time when wake up a 1278 * task which is on run queue. 1279 * 1280 * You WILL be missing events if you've recorded only 1281 * one CPU, or are only looking at only one, so don't 1282 * skip in this case. 1283 */ 1284 if (sched->profile_cpu == -1 && atom->state != THREAD_SLEEPING) 1285 goto out_ok; 1286 1287 sched->nr_timestamps++; 1288 if (atom->sched_out_time > timestamp) { 1289 sched->nr_unordered_timestamps++; 1290 goto out_ok; 1291 } 1292 1293 atom->state = THREAD_WAIT_CPU; 1294 atom->wake_up_time = timestamp; 1295 out_ok: 1296 err = 0; 1297 out_put: 1298 thread__put(wakee); 1299 return err; 1300 } 1301 1302 static int latency_migrate_task_event(struct perf_sched *sched, 1303 struct evsel *evsel, 1304 struct perf_sample *sample, 1305 struct machine *machine) 1306 { 1307 const u32 pid = evsel__intval(evsel, sample, "pid"); 1308 u64 timestamp = sample->time; 1309 struct work_atoms *atoms; 1310 struct work_atom *atom; 1311 struct thread *migrant; 1312 int err = -1; 1313 1314 /* 1315 * Only need to worry about migration when profiling one CPU. 1316 */ 1317 if (sched->profile_cpu == -1) 1318 return 0; 1319 1320 migrant = machine__findnew_thread(machine, -1, pid); 1321 if (migrant == NULL) 1322 return -1; 1323 atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid); 1324 if (!atoms) { 1325 if (thread_atoms_insert(sched, migrant)) 1326 goto out_put; 1327 register_pid(sched, thread__tid(migrant), thread__comm_str(migrant)); 1328 atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid); 1329 if (!atoms) { 1330 pr_err("migration-event: Internal tree error"); 1331 goto out_put; 1332 } 1333 if (add_sched_out_event(atoms, 'R', timestamp)) 1334 goto out_put; 1335 } 1336 1337 BUG_ON(list_empty(&atoms->work_list)); 1338 1339 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1340 atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp; 1341 1342 sched->nr_timestamps++; 1343 1344 if (atom->sched_out_time > timestamp) 1345 sched->nr_unordered_timestamps++; 1346 err = 0; 1347 out_put: 1348 thread__put(migrant); 1349 return err; 1350 } 1351 1352 static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_list) 1353 { 1354 int i; 1355 int ret; 1356 u64 avg; 1357 char max_lat_start[32], max_lat_end[32]; 1358 1359 if (!work_list->nb_atoms) 1360 return; 1361 /* 1362 * Ignore idle threads: 1363 */ 1364 if (!strcmp(thread__comm_str(work_list->thread), "swapper")) 1365 return; 1366 1367 sched->all_runtime += work_list->total_runtime; 1368 sched->all_count += work_list->nb_atoms; 1369 1370 if (work_list->num_merged > 1) { 1371 ret = printf(" %s:(%d) ", thread__comm_str(work_list->thread), 1372 work_list->num_merged); 1373 } else { 1374 ret = printf(" %s:%d ", thread__comm_str(work_list->thread), 1375 thread__tid(work_list->thread)); 1376 } 1377 1378 for (i = 0; i < 24 - ret; i++) 1379 printf(" "); 1380 1381 avg = work_list->total_lat / work_list->nb_atoms; 1382 timestamp__scnprintf_usec(work_list->max_lat_start, max_lat_start, sizeof(max_lat_start)); 1383 timestamp__scnprintf_usec(work_list->max_lat_end, max_lat_end, sizeof(max_lat_end)); 1384 1385 printf("|%11.3f ms |%9" PRIu64 " | avg:%8.3f ms | max:%8.3f ms | max start: %12s s | max end: %12s s\n", 1386 (double)work_list->total_runtime / NSEC_PER_MSEC, 1387 work_list->nb_atoms, (double)avg / NSEC_PER_MSEC, 1388 (double)work_list->max_lat / NSEC_PER_MSEC, 1389 max_lat_start, max_lat_end); 1390 } 1391 1392 static int pid_cmp(struct work_atoms *l, struct work_atoms *r) 1393 { 1394 pid_t l_tid, r_tid; 1395 1396 if (RC_CHK_EQUAL(l->thread, r->thread)) 1397 return 0; 1398 l_tid = thread__tid(l->thread); 1399 r_tid = thread__tid(r->thread); 1400 if (l_tid < r_tid) 1401 return -1; 1402 if (l_tid > r_tid) 1403 return 1; 1404 return (int)(RC_CHK_ACCESS(l->thread) - RC_CHK_ACCESS(r->thread)); 1405 } 1406 1407 static int avg_cmp(struct work_atoms *l, struct work_atoms *r) 1408 { 1409 u64 avgl, avgr; 1410 1411 if (!l->nb_atoms) 1412 return -1; 1413 1414 if (!r->nb_atoms) 1415 return 1; 1416 1417 avgl = l->total_lat / l->nb_atoms; 1418 avgr = r->total_lat / r->nb_atoms; 1419 1420 if (avgl < avgr) 1421 return -1; 1422 if (avgl > avgr) 1423 return 1; 1424 1425 return 0; 1426 } 1427 1428 static int max_cmp(struct work_atoms *l, struct work_atoms *r) 1429 { 1430 if (l->max_lat < r->max_lat) 1431 return -1; 1432 if (l->max_lat > r->max_lat) 1433 return 1; 1434 1435 return 0; 1436 } 1437 1438 static int switch_cmp(struct work_atoms *l, struct work_atoms *r) 1439 { 1440 if (l->nb_atoms < r->nb_atoms) 1441 return -1; 1442 if (l->nb_atoms > r->nb_atoms) 1443 return 1; 1444 1445 return 0; 1446 } 1447 1448 static int runtime_cmp(struct work_atoms *l, struct work_atoms *r) 1449 { 1450 if (l->total_runtime < r->total_runtime) 1451 return -1; 1452 if (l->total_runtime > r->total_runtime) 1453 return 1; 1454 1455 return 0; 1456 } 1457 1458 static int sort_dimension__add(const char *tok, struct list_head *list) 1459 { 1460 size_t i; 1461 static struct sort_dimension avg_sort_dimension = { 1462 .name = "avg", 1463 .cmp = avg_cmp, 1464 }; 1465 static struct sort_dimension max_sort_dimension = { 1466 .name = "max", 1467 .cmp = max_cmp, 1468 }; 1469 static struct sort_dimension pid_sort_dimension = { 1470 .name = "pid", 1471 .cmp = pid_cmp, 1472 }; 1473 static struct sort_dimension runtime_sort_dimension = { 1474 .name = "runtime", 1475 .cmp = runtime_cmp, 1476 }; 1477 static struct sort_dimension switch_sort_dimension = { 1478 .name = "switch", 1479 .cmp = switch_cmp, 1480 }; 1481 struct sort_dimension *available_sorts[] = { 1482 &pid_sort_dimension, 1483 &avg_sort_dimension, 1484 &max_sort_dimension, 1485 &switch_sort_dimension, 1486 &runtime_sort_dimension, 1487 }; 1488 1489 for (i = 0; i < ARRAY_SIZE(available_sorts); i++) { 1490 if (!strcmp(available_sorts[i]->name, tok)) { 1491 list_add_tail(&available_sorts[i]->list, list); 1492 1493 return 0; 1494 } 1495 } 1496 1497 return -1; 1498 } 1499 1500 static void perf_sched__sort_lat(struct perf_sched *sched) 1501 { 1502 struct rb_node *node; 1503 struct rb_root_cached *root = &sched->atom_root; 1504 again: 1505 for (;;) { 1506 struct work_atoms *data; 1507 node = rb_first_cached(root); 1508 if (!node) 1509 break; 1510 1511 rb_erase_cached(node, root); 1512 data = rb_entry(node, struct work_atoms, node); 1513 __thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list); 1514 } 1515 if (root == &sched->atom_root) { 1516 root = &sched->merged_atom_root; 1517 goto again; 1518 } 1519 } 1520 1521 static int process_sched_wakeup_event(const struct perf_tool *tool, 1522 struct evsel *evsel, 1523 struct perf_sample *sample, 1524 struct machine *machine) 1525 { 1526 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1527 1528 if (sched->tp_handler->wakeup_event) 1529 return sched->tp_handler->wakeup_event(sched, evsel, sample, machine); 1530 1531 return 0; 1532 } 1533 1534 static int process_sched_wakeup_ignore(const struct perf_tool *tool __maybe_unused, 1535 struct evsel *evsel __maybe_unused, 1536 struct perf_sample *sample __maybe_unused, 1537 struct machine *machine __maybe_unused) 1538 { 1539 return 0; 1540 } 1541 1542 static bool thread__has_color(struct thread *thread) 1543 { 1544 return thread__priv(thread) != NULL; 1545 } 1546 1547 static struct thread* 1548 map__findnew_thread(struct perf_sched *sched, struct machine *machine, pid_t pid, pid_t tid) 1549 { 1550 struct thread *thread = machine__findnew_thread(machine, pid, tid); 1551 bool color = false; 1552 1553 if (!sched->map.color_pids || !thread || thread__priv(thread)) 1554 return thread; 1555 1556 if (thread_map__has(sched->map.color_pids, tid)) 1557 color = true; 1558 1559 thread__set_priv(thread, color ? ((void*)1) : NULL); 1560 return thread; 1561 } 1562 1563 static bool sched_match_task(struct perf_sched *sched, const char *comm_str) 1564 { 1565 bool fuzzy_match = sched->map.fuzzy; 1566 struct strlist *task_names = sched->map.task_names; 1567 struct str_node *node; 1568 1569 strlist__for_each_entry(node, task_names) { 1570 bool match_found = fuzzy_match ? !!strstr(comm_str, node->s) : 1571 !strcmp(comm_str, node->s); 1572 if (match_found) 1573 return true; 1574 } 1575 1576 return false; 1577 } 1578 1579 static void print_sched_map(struct perf_sched *sched, struct perf_cpu this_cpu, int cpus_nr, 1580 const char *color, bool sched_out) 1581 { 1582 for (int i = 0; i < cpus_nr; i++) { 1583 struct perf_cpu cpu = { 1584 .cpu = sched->map.comp ? sched->map.comp_cpus[i].cpu : i, 1585 }; 1586 struct thread *curr_thread = sched->curr_thread[cpu.cpu]; 1587 struct thread *curr_out_thread = sched->curr_out_thread[cpu.cpu]; 1588 struct thread_runtime *curr_tr; 1589 const char *pid_color = color; 1590 const char *cpu_color = color; 1591 char symbol = ' '; 1592 struct thread *thread_to_check = sched_out ? curr_out_thread : curr_thread; 1593 1594 if (thread_to_check && thread__has_color(thread_to_check)) 1595 pid_color = COLOR_PIDS; 1596 1597 if (sched->map.color_cpus && perf_cpu_map__has(sched->map.color_cpus, cpu)) 1598 cpu_color = COLOR_CPUS; 1599 1600 if (cpu.cpu == this_cpu.cpu) 1601 symbol = '*'; 1602 1603 color_fprintf(stdout, cpu.cpu != this_cpu.cpu ? color : cpu_color, "%c", symbol); 1604 1605 thread_to_check = sched_out ? sched->curr_out_thread[cpu.cpu] : 1606 sched->curr_thread[cpu.cpu]; 1607 1608 if (thread_to_check) { 1609 curr_tr = thread__get_runtime(thread_to_check); 1610 if (curr_tr == NULL) 1611 return; 1612 1613 if (sched_out) { 1614 if (cpu.cpu == this_cpu.cpu) 1615 color_fprintf(stdout, color, "- "); 1616 else { 1617 curr_tr = thread__get_runtime(sched->curr_thread[cpu.cpu]); 1618 if (curr_tr != NULL) 1619 color_fprintf(stdout, pid_color, "%2s ", 1620 curr_tr->shortname); 1621 } 1622 } else 1623 color_fprintf(stdout, pid_color, "%2s ", curr_tr->shortname); 1624 } else 1625 color_fprintf(stdout, color, " "); 1626 } 1627 } 1628 1629 static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, 1630 struct perf_sample *sample, struct machine *machine) 1631 { 1632 const u32 next_pid = evsel__intval(evsel, sample, "next_pid"); 1633 const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"); 1634 struct thread *sched_in, *sched_out; 1635 struct thread_runtime *tr; 1636 int new_shortname; 1637 u64 timestamp0, timestamp = sample->time; 1638 s64 delta; 1639 struct perf_cpu this_cpu = { 1640 .cpu = sample->cpu, 1641 }; 1642 int cpus_nr; 1643 int proceed; 1644 bool new_cpu = false; 1645 const char *color = PERF_COLOR_NORMAL; 1646 char stimestamp[32]; 1647 const char *str; 1648 int ret = -1; 1649 1650 BUG_ON(this_cpu.cpu >= MAX_CPUS || this_cpu.cpu < 0); 1651 1652 if (this_cpu.cpu > sched->max_cpu.cpu) 1653 sched->max_cpu = this_cpu; 1654 1655 if (sched->map.comp) { 1656 cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS); 1657 if (!__test_and_set_bit(this_cpu.cpu, sched->map.comp_cpus_mask)) { 1658 sched->map.comp_cpus[cpus_nr++] = this_cpu; 1659 new_cpu = true; 1660 } 1661 } else 1662 cpus_nr = sched->max_cpu.cpu; 1663 1664 timestamp0 = sched->cpu_last_switched[this_cpu.cpu]; 1665 sched->cpu_last_switched[this_cpu.cpu] = timestamp; 1666 if (timestamp0) 1667 delta = timestamp - timestamp0; 1668 else 1669 delta = 0; 1670 1671 if (delta < 0) { 1672 pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1673 return -1; 1674 } 1675 1676 sched_in = map__findnew_thread(sched, machine, -1, next_pid); 1677 sched_out = map__findnew_thread(sched, machine, -1, prev_pid); 1678 if (sched_in == NULL || sched_out == NULL) 1679 goto out; 1680 1681 tr = thread__get_runtime(sched_in); 1682 if (tr == NULL) 1683 goto out; 1684 1685 thread__put(sched->curr_thread[this_cpu.cpu]); 1686 thread__put(sched->curr_out_thread[this_cpu.cpu]); 1687 1688 sched->curr_thread[this_cpu.cpu] = thread__get(sched_in); 1689 sched->curr_out_thread[this_cpu.cpu] = thread__get(sched_out); 1690 1691 ret = 0; 1692 1693 str = thread__comm_str(sched_in); 1694 new_shortname = 0; 1695 if (!tr->shortname[0]) { 1696 if (!strcmp(thread__comm_str(sched_in), "swapper")) { 1697 /* 1698 * Don't allocate a letter-number for swapper:0 1699 * as a shortname. Instead, we use '.' for it. 1700 */ 1701 tr->shortname[0] = '.'; 1702 tr->shortname[1] = ' '; 1703 } else if (!sched->map.task_name || sched_match_task(sched, str)) { 1704 tr->shortname[0] = sched->next_shortname1; 1705 tr->shortname[1] = sched->next_shortname2; 1706 1707 if (sched->next_shortname1 < 'Z') { 1708 sched->next_shortname1++; 1709 } else { 1710 sched->next_shortname1 = 'A'; 1711 if (sched->next_shortname2 < '9') 1712 sched->next_shortname2++; 1713 else 1714 sched->next_shortname2 = '0'; 1715 } 1716 } else { 1717 tr->shortname[0] = '-'; 1718 tr->shortname[1] = ' '; 1719 } 1720 new_shortname = 1; 1721 } 1722 1723 if (sched->map.cpus && !perf_cpu_map__has(sched->map.cpus, this_cpu)) 1724 goto out; 1725 1726 proceed = 0; 1727 str = thread__comm_str(sched_in); 1728 /* 1729 * Check which of sched_in and sched_out matches the passed --task-name 1730 * arguments and call the corresponding print_sched_map. 1731 */ 1732 if (sched->map.task_name && !sched_match_task(sched, str)) { 1733 if (!sched_match_task(sched, thread__comm_str(sched_out))) 1734 goto out; 1735 else 1736 goto sched_out; 1737 1738 } else { 1739 str = thread__comm_str(sched_out); 1740 if (!(sched->map.task_name && !sched_match_task(sched, str))) 1741 proceed = 1; 1742 } 1743 1744 printf(" "); 1745 1746 print_sched_map(sched, this_cpu, cpus_nr, color, false); 1747 1748 timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp)); 1749 color_fprintf(stdout, color, " %12s secs ", stimestamp); 1750 if (new_shortname || tr->comm_changed || (verbose > 0 && thread__tid(sched_in))) { 1751 const char *pid_color = color; 1752 1753 if (thread__has_color(sched_in)) 1754 pid_color = COLOR_PIDS; 1755 1756 color_fprintf(stdout, pid_color, "%s => %s:%d", 1757 tr->shortname, thread__comm_str(sched_in), thread__tid(sched_in)); 1758 tr->comm_changed = false; 1759 } 1760 1761 if (sched->map.comp && new_cpu) 1762 color_fprintf(stdout, color, " (CPU %d)", this_cpu.cpu); 1763 1764 if (proceed != 1) { 1765 color_fprintf(stdout, color, "\n"); 1766 goto out; 1767 } 1768 1769 sched_out: 1770 if (sched->map.task_name) { 1771 tr = thread__get_runtime(sched->curr_out_thread[this_cpu.cpu]); 1772 if (strcmp(tr->shortname, "") == 0) 1773 goto out; 1774 1775 if (proceed == 1) 1776 color_fprintf(stdout, color, "\n"); 1777 1778 printf(" "); 1779 print_sched_map(sched, this_cpu, cpus_nr, color, true); 1780 timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp)); 1781 color_fprintf(stdout, color, " %12s secs ", stimestamp); 1782 } 1783 1784 color_fprintf(stdout, color, "\n"); 1785 1786 out: 1787 thread__put(sched_out); 1788 thread__put(sched_in); 1789 1790 return ret; 1791 } 1792 1793 static int process_sched_switch_event(const struct perf_tool *tool, 1794 struct evsel *evsel, 1795 struct perf_sample *sample, 1796 struct machine *machine) 1797 { 1798 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1799 int this_cpu = sample->cpu, err = 0; 1800 u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"), 1801 next_pid = evsel__intval(evsel, sample, "next_pid"); 1802 1803 if (sched->curr_pid[this_cpu] != (u32)-1) { 1804 /* 1805 * Are we trying to switch away a PID that is 1806 * not current? 1807 */ 1808 if (sched->curr_pid[this_cpu] != prev_pid) 1809 sched->nr_context_switch_bugs++; 1810 } 1811 1812 if (sched->tp_handler->switch_event) 1813 err = sched->tp_handler->switch_event(sched, evsel, sample, machine); 1814 1815 sched->curr_pid[this_cpu] = next_pid; 1816 return err; 1817 } 1818 1819 static int process_sched_runtime_event(const struct perf_tool *tool, 1820 struct evsel *evsel, 1821 struct perf_sample *sample, 1822 struct machine *machine) 1823 { 1824 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1825 1826 if (sched->tp_handler->runtime_event) 1827 return sched->tp_handler->runtime_event(sched, evsel, sample, machine); 1828 1829 return 0; 1830 } 1831 1832 static int perf_sched__process_fork_event(const struct perf_tool *tool, 1833 union perf_event *event, 1834 struct perf_sample *sample, 1835 struct machine *machine) 1836 { 1837 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1838 1839 /* run the fork event through the perf machinery */ 1840 perf_event__process_fork(tool, event, sample, machine); 1841 1842 /* and then run additional processing needed for this command */ 1843 if (sched->tp_handler->fork_event) 1844 return sched->tp_handler->fork_event(sched, event, machine); 1845 1846 return 0; 1847 } 1848 1849 static int process_sched_migrate_task_event(const struct perf_tool *tool, 1850 struct evsel *evsel, 1851 struct perf_sample *sample, 1852 struct machine *machine) 1853 { 1854 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1855 1856 if (sched->tp_handler->migrate_task_event) 1857 return sched->tp_handler->migrate_task_event(sched, evsel, sample, machine); 1858 1859 return 0; 1860 } 1861 1862 typedef int (*tracepoint_handler)(const struct perf_tool *tool, 1863 struct evsel *evsel, 1864 struct perf_sample *sample, 1865 struct machine *machine); 1866 1867 static int perf_sched__process_tracepoint_sample(const struct perf_tool *tool __maybe_unused, 1868 union perf_event *event __maybe_unused, 1869 struct perf_sample *sample, 1870 struct evsel *evsel, 1871 struct machine *machine) 1872 { 1873 int err = 0; 1874 1875 if (evsel->handler != NULL) { 1876 tracepoint_handler f = evsel->handler; 1877 err = f(tool, evsel, sample, machine); 1878 } 1879 1880 return err; 1881 } 1882 1883 static int perf_sched__process_comm(const struct perf_tool *tool __maybe_unused, 1884 union perf_event *event, 1885 struct perf_sample *sample, 1886 struct machine *machine) 1887 { 1888 struct thread *thread; 1889 struct thread_runtime *tr; 1890 int err; 1891 1892 err = perf_event__process_comm(tool, event, sample, machine); 1893 if (err) 1894 return err; 1895 1896 thread = machine__find_thread(machine, sample->pid, sample->tid); 1897 if (!thread) { 1898 pr_err("Internal error: can't find thread\n"); 1899 return -1; 1900 } 1901 1902 tr = thread__get_runtime(thread); 1903 if (tr == NULL) { 1904 thread__put(thread); 1905 return -1; 1906 } 1907 1908 tr->comm_changed = true; 1909 thread__put(thread); 1910 1911 return 0; 1912 } 1913 1914 static int perf_sched__read_events(struct perf_sched *sched) 1915 { 1916 struct evsel_str_handler handlers[] = { 1917 { "sched:sched_switch", process_sched_switch_event, }, 1918 { "sched:sched_stat_runtime", process_sched_runtime_event, }, 1919 { "sched:sched_wakeup", process_sched_wakeup_event, }, 1920 { "sched:sched_waking", process_sched_wakeup_event, }, 1921 { "sched:sched_wakeup_new", process_sched_wakeup_event, }, 1922 { "sched:sched_migrate_task", process_sched_migrate_task_event, }, 1923 }; 1924 struct perf_session *session; 1925 struct perf_data data = { 1926 .path = input_name, 1927 .mode = PERF_DATA_MODE_READ, 1928 .force = sched->force, 1929 }; 1930 int rc = -1; 1931 1932 session = perf_session__new(&data, &sched->tool); 1933 if (IS_ERR(session)) { 1934 pr_debug("Error creating perf session"); 1935 return PTR_ERR(session); 1936 } 1937 1938 symbol__init(perf_session__env(session)); 1939 1940 /* prefer sched_waking if it is captured */ 1941 if (evlist__find_tracepoint_by_name(session->evlist, "sched:sched_waking")) 1942 handlers[2].handler = process_sched_wakeup_ignore; 1943 1944 if (perf_session__set_tracepoints_handlers(session, handlers)) 1945 goto out_delete; 1946 1947 if (perf_session__has_traces(session, "record -R")) { 1948 int err = perf_session__process_events(session); 1949 if (err) { 1950 pr_err("Failed to process events, error %d", err); 1951 goto out_delete; 1952 } 1953 1954 sched->nr_events = session->evlist->stats.nr_events[0]; 1955 sched->nr_lost_events = session->evlist->stats.total_lost; 1956 sched->nr_lost_chunks = session->evlist->stats.nr_events[PERF_RECORD_LOST]; 1957 } 1958 1959 rc = 0; 1960 out_delete: 1961 perf_session__delete(session); 1962 return rc; 1963 } 1964 1965 /* 1966 * scheduling times are printed as msec.usec 1967 */ 1968 static inline void print_sched_time(unsigned long long nsecs, int width) 1969 { 1970 unsigned long msecs; 1971 unsigned long usecs; 1972 1973 msecs = nsecs / NSEC_PER_MSEC; 1974 nsecs -= msecs * NSEC_PER_MSEC; 1975 usecs = nsecs / NSEC_PER_USEC; 1976 printf("%*lu.%03lu ", width, msecs, usecs); 1977 } 1978 1979 /* 1980 * returns runtime data for event, allocating memory for it the 1981 * first time it is used. 1982 */ 1983 static struct evsel_runtime *evsel__get_runtime(struct evsel *evsel) 1984 { 1985 struct evsel_runtime *r = evsel->priv; 1986 1987 if (r == NULL) { 1988 r = zalloc(sizeof(struct evsel_runtime)); 1989 evsel->priv = r; 1990 } 1991 1992 return r; 1993 } 1994 1995 /* 1996 * save last time event was seen per cpu 1997 */ 1998 static void evsel__save_time(struct evsel *evsel, u64 timestamp, u32 cpu) 1999 { 2000 struct evsel_runtime *r = evsel__get_runtime(evsel); 2001 2002 if (r == NULL) 2003 return; 2004 2005 if ((cpu >= r->ncpu) || (r->last_time == NULL)) { 2006 int i, n = __roundup_pow_of_two(cpu+1); 2007 void *p = r->last_time; 2008 2009 p = realloc(r->last_time, n * sizeof(u64)); 2010 if (!p) 2011 return; 2012 2013 r->last_time = p; 2014 for (i = r->ncpu; i < n; ++i) 2015 r->last_time[i] = (u64) 0; 2016 2017 r->ncpu = n; 2018 } 2019 2020 r->last_time[cpu] = timestamp; 2021 } 2022 2023 /* returns last time this event was seen on the given cpu */ 2024 static u64 evsel__get_time(struct evsel *evsel, u32 cpu) 2025 { 2026 struct evsel_runtime *r = evsel__get_runtime(evsel); 2027 2028 if ((r == NULL) || (r->last_time == NULL) || (cpu >= r->ncpu)) 2029 return 0; 2030 2031 return r->last_time[cpu]; 2032 } 2033 2034 static void timehist__evsel_priv_destructor(void *priv) 2035 { 2036 struct evsel_runtime *r = priv; 2037 2038 if (r) { 2039 free(r->last_time); 2040 free(r); 2041 } 2042 } 2043 2044 static int comm_width = 30; 2045 2046 static char *timehist_get_commstr(struct thread *thread) 2047 { 2048 static char str[32]; 2049 const char *comm = thread__comm_str(thread); 2050 pid_t tid = thread__tid(thread); 2051 pid_t pid = thread__pid(thread); 2052 int n; 2053 2054 if (pid == 0) 2055 n = scnprintf(str, sizeof(str), "%s", comm); 2056 2057 else if (tid != pid) 2058 n = scnprintf(str, sizeof(str), "%s[%d/%d]", comm, tid, pid); 2059 2060 else 2061 n = scnprintf(str, sizeof(str), "%s[%d]", comm, tid); 2062 2063 if (n > comm_width) 2064 comm_width = n; 2065 2066 return str; 2067 } 2068 2069 /* prio field format: xxx or xxx->yyy */ 2070 #define MAX_PRIO_STR_LEN 8 2071 static char *timehist_get_priostr(struct evsel *evsel, 2072 struct thread *thread, 2073 struct perf_sample *sample) 2074 { 2075 static char prio_str[16]; 2076 int prev_prio = (int)evsel__intval(evsel, sample, "prev_prio"); 2077 struct thread_runtime *tr = thread__priv(thread); 2078 2079 if (tr->prio != prev_prio && tr->prio != -1) 2080 scnprintf(prio_str, sizeof(prio_str), "%d->%d", tr->prio, prev_prio); 2081 else 2082 scnprintf(prio_str, sizeof(prio_str), "%d", prev_prio); 2083 2084 return prio_str; 2085 } 2086 2087 static void timehist_header(struct perf_sched *sched) 2088 { 2089 u32 ncpus = sched->max_cpu.cpu + 1; 2090 u32 i, j; 2091 2092 printf("%15s %6s ", "time", "cpu"); 2093 2094 if (sched->show_cpu_visual) { 2095 printf(" "); 2096 for (i = 0, j = 0; i < ncpus; ++i) { 2097 printf("%x", j++); 2098 if (j > 15) 2099 j = 0; 2100 } 2101 printf(" "); 2102 } 2103 2104 printf(" %-*s", comm_width, "task name"); 2105 2106 if (sched->show_prio) 2107 printf(" %-*s", MAX_PRIO_STR_LEN, "prio"); 2108 2109 printf(" %9s %9s %9s", "wait time", "sch delay", "run time"); 2110 2111 if (sched->pre_migrations) 2112 printf(" %9s", "pre-mig time"); 2113 2114 if (sched->show_state) 2115 printf(" %s", "state"); 2116 2117 printf("\n"); 2118 2119 /* 2120 * units row 2121 */ 2122 printf("%15s %-6s ", "", ""); 2123 2124 if (sched->show_cpu_visual) 2125 printf(" %*s ", ncpus, ""); 2126 2127 printf(" %-*s", comm_width, "[tid/pid]"); 2128 2129 if (sched->show_prio) 2130 printf(" %-*s", MAX_PRIO_STR_LEN, ""); 2131 2132 printf(" %9s %9s %9s", "(msec)", "(msec)", "(msec)"); 2133 2134 if (sched->pre_migrations) 2135 printf(" %9s", "(msec)"); 2136 2137 printf("\n"); 2138 2139 /* 2140 * separator 2141 */ 2142 printf("%.15s %.6s ", graph_dotted_line, graph_dotted_line); 2143 2144 if (sched->show_cpu_visual) 2145 printf(" %.*s ", ncpus, graph_dotted_line); 2146 2147 printf(" %.*s", comm_width, graph_dotted_line); 2148 2149 if (sched->show_prio) 2150 printf(" %.*s", MAX_PRIO_STR_LEN, graph_dotted_line); 2151 2152 printf(" %.9s %.9s %.9s", graph_dotted_line, graph_dotted_line, graph_dotted_line); 2153 2154 if (sched->pre_migrations) 2155 printf(" %.9s", graph_dotted_line); 2156 2157 if (sched->show_state) 2158 printf(" %.5s", graph_dotted_line); 2159 2160 printf("\n"); 2161 } 2162 2163 static void timehist_print_sample(struct perf_sched *sched, 2164 struct evsel *evsel, 2165 struct perf_sample *sample, 2166 struct addr_location *al, 2167 struct thread *thread, 2168 u64 t, const char state) 2169 { 2170 struct thread_runtime *tr = thread__priv(thread); 2171 const char *next_comm = evsel__strval(evsel, sample, "next_comm"); 2172 const u32 next_pid = evsel__intval(evsel, sample, "next_pid"); 2173 u32 max_cpus = sched->max_cpu.cpu + 1; 2174 char tstr[64]; 2175 char nstr[30]; 2176 u64 wait_time; 2177 2178 if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) 2179 return; 2180 2181 timestamp__scnprintf_usec(t, tstr, sizeof(tstr)); 2182 printf("%15s [%04d] ", tstr, sample->cpu); 2183 2184 if (sched->show_cpu_visual) { 2185 u32 i; 2186 char c; 2187 2188 printf(" "); 2189 for (i = 0; i < max_cpus; ++i) { 2190 /* flag idle times with 'i'; others are sched events */ 2191 if (i == sample->cpu) 2192 c = (thread__tid(thread) == 0) ? 'i' : 's'; 2193 else 2194 c = ' '; 2195 printf("%c", c); 2196 } 2197 printf(" "); 2198 } 2199 2200 if (!thread__comm_set(thread)) { 2201 const char *prev_comm = evsel__strval(evsel, sample, "prev_comm"); 2202 thread__set_comm(thread, prev_comm, sample->time); 2203 } 2204 2205 printf(" %-*s ", comm_width, timehist_get_commstr(thread)); 2206 2207 if (sched->show_prio) 2208 printf(" %-*s ", MAX_PRIO_STR_LEN, timehist_get_priostr(evsel, thread, sample)); 2209 2210 wait_time = tr->dt_sleep + tr->dt_iowait + tr->dt_preempt; 2211 print_sched_time(wait_time, 6); 2212 2213 print_sched_time(tr->dt_delay, 6); 2214 print_sched_time(tr->dt_run, 6); 2215 if (sched->pre_migrations) 2216 print_sched_time(tr->dt_pre_mig, 6); 2217 2218 if (sched->show_state) 2219 printf(" %5c ", thread__tid(thread) == 0 ? 'I' : state); 2220 2221 if (sched->show_next) { 2222 snprintf(nstr, sizeof(nstr), "next: %s[%d]", next_comm, next_pid); 2223 printf(" %-*s", comm_width, nstr); 2224 } 2225 2226 if (sched->show_wakeups && !sched->show_next) 2227 printf(" %-*s", comm_width, ""); 2228 2229 if (thread__tid(thread) == 0) 2230 goto out; 2231 2232 if (sched->show_callchain) 2233 printf(" "); 2234 2235 sample__fprintf_sym(sample, al, 0, 2236 EVSEL__PRINT_SYM | EVSEL__PRINT_ONELINE | 2237 EVSEL__PRINT_CALLCHAIN_ARROW | 2238 EVSEL__PRINT_SKIP_IGNORED, 2239 get_tls_callchain_cursor(), symbol_conf.bt_stop_list, stdout); 2240 2241 out: 2242 printf("\n"); 2243 } 2244 2245 /* 2246 * Explanation of delta-time stats: 2247 * 2248 * t = time of current schedule out event 2249 * tprev = time of previous sched out event 2250 * also time of schedule-in event for current task 2251 * last_time = time of last sched change event for current task 2252 * (i.e, time process was last scheduled out) 2253 * ready_to_run = time of wakeup for current task 2254 * migrated = time of task migration to another CPU 2255 * 2256 * -----|-------------|-------------|-------------|-------------|----- 2257 * last ready migrated tprev t 2258 * time to run 2259 * 2260 * |---------------- dt_wait ----------------| 2261 * |--------- dt_delay ---------|-- dt_run --| 2262 * |- dt_pre_mig -| 2263 * 2264 * dt_run = run time of current task 2265 * dt_wait = time between last schedule out event for task and tprev 2266 * represents time spent off the cpu 2267 * dt_delay = time between wakeup and schedule-in of task 2268 * dt_pre_mig = time between wakeup and migration to another CPU 2269 */ 2270 2271 static void timehist_update_runtime_stats(struct thread_runtime *r, 2272 u64 t, u64 tprev) 2273 { 2274 r->dt_delay = 0; 2275 r->dt_sleep = 0; 2276 r->dt_iowait = 0; 2277 r->dt_preempt = 0; 2278 r->dt_run = 0; 2279 r->dt_pre_mig = 0; 2280 2281 if (tprev) { 2282 r->dt_run = t - tprev; 2283 if (r->ready_to_run) { 2284 if (r->ready_to_run > tprev) 2285 pr_debug("time travel: wakeup time for task > previous sched_switch event\n"); 2286 else 2287 r->dt_delay = tprev - r->ready_to_run; 2288 2289 if ((r->migrated > r->ready_to_run) && (r->migrated < tprev)) 2290 r->dt_pre_mig = r->migrated - r->ready_to_run; 2291 } 2292 2293 if (r->last_time > tprev) 2294 pr_debug("time travel: last sched out time for task > previous sched_switch event\n"); 2295 else if (r->last_time) { 2296 u64 dt_wait = tprev - r->last_time; 2297 2298 if (r->last_state == 'R') 2299 r->dt_preempt = dt_wait; 2300 else if (r->last_state == 'D') 2301 r->dt_iowait = dt_wait; 2302 else 2303 r->dt_sleep = dt_wait; 2304 } 2305 } 2306 2307 update_stats(&r->run_stats, r->dt_run); 2308 2309 r->total_run_time += r->dt_run; 2310 r->total_delay_time += r->dt_delay; 2311 r->total_sleep_time += r->dt_sleep; 2312 r->total_iowait_time += r->dt_iowait; 2313 r->total_preempt_time += r->dt_preempt; 2314 r->total_pre_mig_time += r->dt_pre_mig; 2315 } 2316 2317 static bool is_idle_sample(struct perf_sample *sample, 2318 struct evsel *evsel) 2319 { 2320 /* pid 0 == swapper == idle task */ 2321 if (evsel__name_is(evsel, "sched:sched_switch")) 2322 return evsel__intval(evsel, sample, "prev_pid") == 0; 2323 2324 return sample->pid == 0; 2325 } 2326 2327 static void save_task_callchain(struct perf_sched *sched, 2328 struct perf_sample *sample, 2329 struct evsel *evsel, 2330 struct machine *machine) 2331 { 2332 struct callchain_cursor *cursor; 2333 struct thread *thread; 2334 2335 /* want main thread for process - has maps */ 2336 thread = machine__findnew_thread(machine, sample->pid, sample->pid); 2337 if (thread == NULL) { 2338 pr_debug("Failed to get thread for pid %d.\n", sample->pid); 2339 return; 2340 } 2341 2342 if (!sched->show_callchain || sample->callchain == NULL) { 2343 thread__put(thread); 2344 return; 2345 } 2346 2347 cursor = get_tls_callchain_cursor(); 2348 2349 if (thread__resolve_callchain(thread, cursor, evsel, sample, 2350 NULL, NULL, sched->max_stack + 2) != 0) { 2351 if (verbose > 0) 2352 pr_err("Failed to resolve callchain. Skipping\n"); 2353 2354 thread__put(thread); 2355 return; 2356 } 2357 2358 callchain_cursor_commit(cursor); 2359 thread__put(thread); 2360 2361 while (true) { 2362 struct callchain_cursor_node *node; 2363 struct symbol *sym; 2364 2365 node = callchain_cursor_current(cursor); 2366 if (node == NULL) 2367 break; 2368 2369 sym = node->ms.sym; 2370 if (sym) { 2371 if (!strcmp(sym->name, "schedule") || 2372 !strcmp(sym->name, "__schedule") || 2373 !strcmp(sym->name, "preempt_schedule")) 2374 sym->ignore = 1; 2375 } 2376 2377 callchain_cursor_advance(cursor); 2378 } 2379 } 2380 2381 static int init_idle_thread(struct thread *thread) 2382 { 2383 struct idle_thread_runtime *itr; 2384 2385 thread__set_comm(thread, idle_comm, 0); 2386 2387 itr = zalloc(sizeof(*itr)); 2388 if (itr == NULL) 2389 return -ENOMEM; 2390 2391 init_prio(&itr->tr); 2392 init_stats(&itr->tr.run_stats); 2393 callchain_init(&itr->callchain); 2394 callchain_cursor_reset(&itr->cursor); 2395 thread__set_priv(thread, itr); 2396 2397 return 0; 2398 } 2399 2400 /* 2401 * Track idle stats per cpu by maintaining a local thread 2402 * struct for the idle task on each cpu. 2403 */ 2404 static int init_idle_threads(int ncpu) 2405 { 2406 int i, ret; 2407 2408 idle_threads = zalloc(ncpu * sizeof(struct thread *)); 2409 if (!idle_threads) 2410 return -ENOMEM; 2411 2412 idle_max_cpu = ncpu; 2413 2414 /* allocate the actual thread struct if needed */ 2415 for (i = 0; i < ncpu; ++i) { 2416 idle_threads[i] = thread__new(0, 0); 2417 if (idle_threads[i] == NULL) 2418 return -ENOMEM; 2419 2420 ret = init_idle_thread(idle_threads[i]); 2421 if (ret < 0) 2422 return ret; 2423 } 2424 2425 return 0; 2426 } 2427 2428 static void free_idle_threads(void) 2429 { 2430 int i; 2431 2432 if (idle_threads == NULL) 2433 return; 2434 2435 for (i = 0; i < idle_max_cpu; ++i) { 2436 struct thread *idle = idle_threads[i]; 2437 2438 if (idle) { 2439 struct idle_thread_runtime *itr; 2440 2441 itr = thread__priv(idle); 2442 if (itr) 2443 thread__put(itr->last_thread); 2444 2445 thread__delete(idle); 2446 } 2447 } 2448 2449 free(idle_threads); 2450 } 2451 2452 static struct thread *get_idle_thread(int cpu) 2453 { 2454 /* 2455 * expand/allocate array of pointers to local thread 2456 * structs if needed 2457 */ 2458 if ((cpu >= idle_max_cpu) || (idle_threads == NULL)) { 2459 int i, j = __roundup_pow_of_two(cpu+1); 2460 void *p; 2461 2462 p = realloc(idle_threads, j * sizeof(struct thread *)); 2463 if (!p) 2464 return NULL; 2465 2466 idle_threads = (struct thread **) p; 2467 for (i = idle_max_cpu; i < j; ++i) 2468 idle_threads[i] = NULL; 2469 2470 idle_max_cpu = j; 2471 } 2472 2473 /* allocate a new thread struct if needed */ 2474 if (idle_threads[cpu] == NULL) { 2475 idle_threads[cpu] = thread__new(0, 0); 2476 2477 if (idle_threads[cpu]) { 2478 if (init_idle_thread(idle_threads[cpu]) < 0) 2479 return NULL; 2480 } 2481 } 2482 2483 return thread__get(idle_threads[cpu]); 2484 } 2485 2486 static void save_idle_callchain(struct perf_sched *sched, 2487 struct idle_thread_runtime *itr, 2488 struct perf_sample *sample) 2489 { 2490 struct callchain_cursor *cursor; 2491 2492 if (!sched->show_callchain || sample->callchain == NULL) 2493 return; 2494 2495 cursor = get_tls_callchain_cursor(); 2496 if (cursor == NULL) 2497 return; 2498 2499 callchain_cursor__copy(&itr->cursor, cursor); 2500 } 2501 2502 static struct thread *timehist_get_thread(struct perf_sched *sched, 2503 struct perf_sample *sample, 2504 struct machine *machine, 2505 struct evsel *evsel) 2506 { 2507 struct thread *thread; 2508 2509 if (is_idle_sample(sample, evsel)) { 2510 thread = get_idle_thread(sample->cpu); 2511 if (thread == NULL) 2512 pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu); 2513 2514 } else { 2515 /* there were samples with tid 0 but non-zero pid */ 2516 thread = machine__findnew_thread(machine, sample->pid, 2517 sample->tid ?: sample->pid); 2518 if (thread == NULL) { 2519 pr_debug("Failed to get thread for tid %d. skipping sample.\n", 2520 sample->tid); 2521 } 2522 2523 save_task_callchain(sched, sample, evsel, machine); 2524 if (sched->idle_hist) { 2525 struct thread *idle; 2526 struct idle_thread_runtime *itr; 2527 2528 idle = get_idle_thread(sample->cpu); 2529 if (idle == NULL) { 2530 pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu); 2531 return NULL; 2532 } 2533 2534 itr = thread__priv(idle); 2535 if (itr == NULL) 2536 return NULL; 2537 2538 thread__put(itr->last_thread); 2539 itr->last_thread = thread__get(thread); 2540 2541 /* copy task callchain when entering to idle */ 2542 if (evsel__intval(evsel, sample, "next_pid") == 0) 2543 save_idle_callchain(sched, itr, sample); 2544 } 2545 } 2546 2547 return thread; 2548 } 2549 2550 static bool timehist_skip_sample(struct perf_sched *sched, 2551 struct thread *thread, 2552 struct evsel *evsel, 2553 struct perf_sample *sample) 2554 { 2555 bool rc = false; 2556 int prio = -1; 2557 struct thread_runtime *tr = NULL; 2558 2559 if (thread__is_filtered(thread)) { 2560 rc = true; 2561 sched->skipped_samples++; 2562 } 2563 2564 if (sched->prio_str) { 2565 /* 2566 * Because priority may be changed during task execution, 2567 * first read priority from prev sched_in event for current task. 2568 * If prev sched_in event is not saved, then read priority from 2569 * current task sched_out event. 2570 */ 2571 tr = thread__get_runtime(thread); 2572 if (tr && tr->prio != -1) 2573 prio = tr->prio; 2574 else if (evsel__name_is(evsel, "sched:sched_switch")) 2575 prio = evsel__intval(evsel, sample, "prev_prio"); 2576 2577 if (prio != -1 && !test_bit(prio, sched->prio_bitmap)) { 2578 rc = true; 2579 sched->skipped_samples++; 2580 } 2581 } 2582 2583 if (sched->idle_hist) { 2584 if (!evsel__name_is(evsel, "sched:sched_switch")) 2585 rc = true; 2586 else if (evsel__intval(evsel, sample, "prev_pid") != 0 && 2587 evsel__intval(evsel, sample, "next_pid") != 0) 2588 rc = true; 2589 } 2590 2591 return rc; 2592 } 2593 2594 static void timehist_print_wakeup_event(struct perf_sched *sched, 2595 struct evsel *evsel, 2596 struct perf_sample *sample, 2597 struct machine *machine, 2598 struct thread *awakened) 2599 { 2600 struct thread *thread; 2601 char tstr[64]; 2602 2603 thread = machine__findnew_thread(machine, sample->pid, sample->tid); 2604 if (thread == NULL) 2605 return; 2606 2607 /* show wakeup unless both awakee and awaker are filtered */ 2608 if (timehist_skip_sample(sched, thread, evsel, sample) && 2609 timehist_skip_sample(sched, awakened, evsel, sample)) { 2610 thread__put(thread); 2611 return; 2612 } 2613 2614 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); 2615 printf("%15s [%04d] ", tstr, sample->cpu); 2616 if (sched->show_cpu_visual) 2617 printf(" %*s ", sched->max_cpu.cpu + 1, ""); 2618 2619 printf(" %-*s ", comm_width, timehist_get_commstr(thread)); 2620 2621 /* dt spacer */ 2622 printf(" %9s %9s %9s ", "", "", ""); 2623 2624 printf("awakened: %s", timehist_get_commstr(awakened)); 2625 2626 printf("\n"); 2627 2628 thread__put(thread); 2629 } 2630 2631 static int timehist_sched_wakeup_ignore(const struct perf_tool *tool __maybe_unused, 2632 union perf_event *event __maybe_unused, 2633 struct evsel *evsel __maybe_unused, 2634 struct perf_sample *sample __maybe_unused, 2635 struct machine *machine __maybe_unused) 2636 { 2637 return 0; 2638 } 2639 2640 static int timehist_sched_wakeup_event(const struct perf_tool *tool, 2641 union perf_event *event __maybe_unused, 2642 struct evsel *evsel, 2643 struct perf_sample *sample, 2644 struct machine *machine) 2645 { 2646 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 2647 struct thread *thread; 2648 struct thread_runtime *tr = NULL; 2649 /* want pid of awakened task not pid in sample */ 2650 const u32 pid = evsel__intval(evsel, sample, "pid"); 2651 2652 thread = machine__findnew_thread(machine, 0, pid); 2653 if (thread == NULL) 2654 return -1; 2655 2656 tr = thread__get_runtime(thread); 2657 if (tr == NULL) { 2658 thread__put(thread); 2659 return -1; 2660 } 2661 2662 if (tr->ready_to_run == 0) 2663 tr->ready_to_run = sample->time; 2664 2665 /* show wakeups if requested */ 2666 if (sched->show_wakeups && 2667 !perf_time__skip_sample(&sched->ptime, sample->time)) 2668 timehist_print_wakeup_event(sched, evsel, sample, machine, thread); 2669 2670 thread__put(thread); 2671 return 0; 2672 } 2673 2674 static void timehist_print_migration_event(struct perf_sched *sched, 2675 struct evsel *evsel, 2676 struct perf_sample *sample, 2677 struct machine *machine, 2678 struct thread *migrated) 2679 { 2680 struct thread *thread; 2681 char tstr[64]; 2682 u32 max_cpus; 2683 u32 ocpu, dcpu; 2684 2685 if (sched->summary_only) 2686 return; 2687 2688 max_cpus = sched->max_cpu.cpu + 1; 2689 ocpu = evsel__intval(evsel, sample, "orig_cpu"); 2690 dcpu = evsel__intval(evsel, sample, "dest_cpu"); 2691 2692 thread = machine__findnew_thread(machine, sample->pid, sample->tid); 2693 if (thread == NULL) 2694 return; 2695 2696 if (timehist_skip_sample(sched, thread, evsel, sample) && 2697 timehist_skip_sample(sched, migrated, evsel, sample)) { 2698 thread__put(thread); 2699 return; 2700 } 2701 2702 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); 2703 printf("%15s [%04d] ", tstr, sample->cpu); 2704 2705 if (sched->show_cpu_visual) { 2706 u32 i; 2707 char c; 2708 2709 printf(" "); 2710 for (i = 0; i < max_cpus; ++i) { 2711 c = (i == sample->cpu) ? 'm' : ' '; 2712 printf("%c", c); 2713 } 2714 printf(" "); 2715 } 2716 2717 printf(" %-*s ", comm_width, timehist_get_commstr(thread)); 2718 2719 /* dt spacer */ 2720 printf(" %9s %9s %9s ", "", "", ""); 2721 2722 printf("migrated: %s", timehist_get_commstr(migrated)); 2723 printf(" cpu %d => %d", ocpu, dcpu); 2724 2725 printf("\n"); 2726 thread__put(thread); 2727 } 2728 2729 static int timehist_migrate_task_event(const struct perf_tool *tool, 2730 union perf_event *event __maybe_unused, 2731 struct evsel *evsel, 2732 struct perf_sample *sample, 2733 struct machine *machine) 2734 { 2735 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 2736 struct thread *thread; 2737 struct thread_runtime *tr = NULL; 2738 /* want pid of migrated task not pid in sample */ 2739 const u32 pid = evsel__intval(evsel, sample, "pid"); 2740 2741 thread = machine__findnew_thread(machine, 0, pid); 2742 if (thread == NULL) 2743 return -1; 2744 2745 tr = thread__get_runtime(thread); 2746 if (tr == NULL) { 2747 thread__put(thread); 2748 return -1; 2749 } 2750 2751 tr->migrations++; 2752 tr->migrated = sample->time; 2753 2754 /* show migrations if requested */ 2755 if (sched->show_migrations) { 2756 timehist_print_migration_event(sched, evsel, sample, 2757 machine, thread); 2758 } 2759 thread__put(thread); 2760 2761 return 0; 2762 } 2763 2764 static void timehist_update_task_prio(struct evsel *evsel, 2765 struct perf_sample *sample, 2766 struct machine *machine) 2767 { 2768 struct thread *thread; 2769 struct thread_runtime *tr = NULL; 2770 const u32 next_pid = evsel__intval(evsel, sample, "next_pid"); 2771 const u32 next_prio = evsel__intval(evsel, sample, "next_prio"); 2772 2773 if (next_pid == 0) 2774 thread = get_idle_thread(sample->cpu); 2775 else 2776 thread = machine__findnew_thread(machine, -1, next_pid); 2777 2778 if (thread == NULL) 2779 return; 2780 2781 tr = thread__get_runtime(thread); 2782 if (tr != NULL) 2783 tr->prio = next_prio; 2784 2785 thread__put(thread); 2786 } 2787 2788 static int timehist_sched_change_event(const struct perf_tool *tool, 2789 union perf_event *event, 2790 struct evsel *evsel, 2791 struct perf_sample *sample, 2792 struct machine *machine) 2793 { 2794 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 2795 struct perf_time_interval *ptime = &sched->ptime; 2796 struct addr_location al; 2797 struct thread *thread = NULL; 2798 struct thread_runtime *tr = NULL; 2799 u64 tprev, t = sample->time; 2800 int rc = 0; 2801 const char state = evsel__taskstate(evsel, sample, "prev_state"); 2802 2803 addr_location__init(&al); 2804 if (machine__resolve(machine, &al, sample) < 0) { 2805 pr_err("problem processing %d event. skipping it\n", 2806 event->header.type); 2807 rc = -1; 2808 goto out; 2809 } 2810 2811 if (sched->show_prio || sched->prio_str) 2812 timehist_update_task_prio(evsel, sample, machine); 2813 2814 thread = timehist_get_thread(sched, sample, machine, evsel); 2815 if (thread == NULL) { 2816 rc = -1; 2817 goto out; 2818 } 2819 2820 if (timehist_skip_sample(sched, thread, evsel, sample)) 2821 goto out; 2822 2823 tr = thread__get_runtime(thread); 2824 if (tr == NULL) { 2825 rc = -1; 2826 goto out; 2827 } 2828 2829 tprev = evsel__get_time(evsel, sample->cpu); 2830 2831 /* 2832 * If start time given: 2833 * - sample time is under window user cares about - skip sample 2834 * - tprev is under window user cares about - reset to start of window 2835 */ 2836 if (ptime->start && ptime->start > t) 2837 goto out; 2838 2839 if (tprev && ptime->start > tprev) 2840 tprev = ptime->start; 2841 2842 /* 2843 * If end time given: 2844 * - previous sched event is out of window - we are done 2845 * - sample time is beyond window user cares about - reset it 2846 * to close out stats for time window interest 2847 * - If tprev is 0, that is, sched_in event for current task is 2848 * not recorded, cannot determine whether sched_in event is 2849 * within time window interest - ignore it 2850 */ 2851 if (ptime->end) { 2852 if (!tprev || tprev > ptime->end) 2853 goto out; 2854 2855 if (t > ptime->end) 2856 t = ptime->end; 2857 } 2858 2859 if (!sched->idle_hist || thread__tid(thread) == 0) { 2860 if (!cpu_list || test_bit(sample->cpu, cpu_bitmap)) 2861 timehist_update_runtime_stats(tr, t, tprev); 2862 2863 if (sched->idle_hist) { 2864 struct idle_thread_runtime *itr = (void *)tr; 2865 struct thread_runtime *last_tr; 2866 2867 if (itr->last_thread == NULL) 2868 goto out; 2869 2870 /* add current idle time as last thread's runtime */ 2871 last_tr = thread__get_runtime(itr->last_thread); 2872 if (last_tr == NULL) 2873 goto out; 2874 2875 timehist_update_runtime_stats(last_tr, t, tprev); 2876 /* 2877 * remove delta time of last thread as it's not updated 2878 * and otherwise it will show an invalid value next 2879 * time. we only care total run time and run stat. 2880 */ 2881 last_tr->dt_run = 0; 2882 last_tr->dt_delay = 0; 2883 last_tr->dt_sleep = 0; 2884 last_tr->dt_iowait = 0; 2885 last_tr->dt_preempt = 0; 2886 2887 if (itr->cursor.nr) 2888 callchain_append(&itr->callchain, &itr->cursor, t - tprev); 2889 2890 itr->last_thread = NULL; 2891 } 2892 2893 if (!sched->summary_only) 2894 timehist_print_sample(sched, evsel, sample, &al, thread, t, state); 2895 } 2896 2897 out: 2898 if (sched->hist_time.start == 0 && t >= ptime->start) 2899 sched->hist_time.start = t; 2900 if (ptime->end == 0 || t <= ptime->end) 2901 sched->hist_time.end = t; 2902 2903 if (tr) { 2904 /* time of this sched_switch event becomes last time task seen */ 2905 tr->last_time = sample->time; 2906 2907 /* last state is used to determine where to account wait time */ 2908 tr->last_state = state; 2909 2910 /* sched out event for task so reset ready to run time and migrated time */ 2911 if (state == 'R') 2912 tr->ready_to_run = t; 2913 else 2914 tr->ready_to_run = 0; 2915 2916 tr->migrated = 0; 2917 } 2918 2919 evsel__save_time(evsel, sample->time, sample->cpu); 2920 2921 thread__put(thread); 2922 addr_location__exit(&al); 2923 return rc; 2924 } 2925 2926 static int timehist_sched_switch_event(const struct perf_tool *tool, 2927 union perf_event *event, 2928 struct evsel *evsel, 2929 struct perf_sample *sample, 2930 struct machine *machine __maybe_unused) 2931 { 2932 return timehist_sched_change_event(tool, event, evsel, sample, machine); 2933 } 2934 2935 static int process_lost(const struct perf_tool *tool __maybe_unused, 2936 union perf_event *event, 2937 struct perf_sample *sample, 2938 struct machine *machine __maybe_unused) 2939 { 2940 char tstr[64]; 2941 2942 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); 2943 printf("%15s ", tstr); 2944 printf("lost %" PRI_lu64 " events on cpu %d\n", event->lost.lost, sample->cpu); 2945 2946 return 0; 2947 } 2948 2949 2950 static void print_thread_runtime(struct thread *t, 2951 struct thread_runtime *r) 2952 { 2953 double mean = avg_stats(&r->run_stats); 2954 float stddev; 2955 2956 printf("%*s %5d %9" PRIu64 " ", 2957 comm_width, timehist_get_commstr(t), thread__ppid(t), 2958 (u64) r->run_stats.n); 2959 2960 print_sched_time(r->total_run_time, 8); 2961 stddev = rel_stddev_stats(stddev_stats(&r->run_stats), mean); 2962 print_sched_time(r->run_stats.min, 6); 2963 printf(" "); 2964 print_sched_time((u64) mean, 6); 2965 printf(" "); 2966 print_sched_time(r->run_stats.max, 6); 2967 printf(" "); 2968 printf("%5.2f", stddev); 2969 printf(" %5" PRIu64, r->migrations); 2970 printf("\n"); 2971 } 2972 2973 static void print_thread_waittime(struct thread *t, 2974 struct thread_runtime *r) 2975 { 2976 printf("%*s %5d %9" PRIu64 " ", 2977 comm_width, timehist_get_commstr(t), thread__ppid(t), 2978 (u64) r->run_stats.n); 2979 2980 print_sched_time(r->total_run_time, 8); 2981 print_sched_time(r->total_sleep_time, 6); 2982 printf(" "); 2983 print_sched_time(r->total_iowait_time, 6); 2984 printf(" "); 2985 print_sched_time(r->total_preempt_time, 6); 2986 printf(" "); 2987 print_sched_time(r->total_delay_time, 6); 2988 printf("\n"); 2989 } 2990 2991 struct total_run_stats { 2992 struct perf_sched *sched; 2993 u64 sched_count; 2994 u64 task_count; 2995 u64 total_run_time; 2996 }; 2997 2998 static int show_thread_runtime(struct thread *t, void *priv) 2999 { 3000 struct total_run_stats *stats = priv; 3001 struct thread_runtime *r; 3002 3003 if (thread__is_filtered(t)) 3004 return 0; 3005 3006 r = thread__priv(t); 3007 if (r && r->run_stats.n) { 3008 stats->task_count++; 3009 stats->sched_count += r->run_stats.n; 3010 stats->total_run_time += r->total_run_time; 3011 3012 if (stats->sched->show_state) 3013 print_thread_waittime(t, r); 3014 else 3015 print_thread_runtime(t, r); 3016 } 3017 3018 return 0; 3019 } 3020 3021 static size_t callchain__fprintf_folded(FILE *fp, struct callchain_node *node) 3022 { 3023 const char *sep = " <- "; 3024 struct callchain_list *chain; 3025 size_t ret = 0; 3026 char bf[1024]; 3027 bool first; 3028 3029 if (node == NULL) 3030 return 0; 3031 3032 ret = callchain__fprintf_folded(fp, node->parent); 3033 first = (ret == 0); 3034 3035 list_for_each_entry(chain, &node->val, list) { 3036 if (chain->ip >= PERF_CONTEXT_MAX) 3037 continue; 3038 if (chain->ms.sym && chain->ms.sym->ignore) 3039 continue; 3040 ret += fprintf(fp, "%s%s", first ? "" : sep, 3041 callchain_list__sym_name(chain, bf, sizeof(bf), 3042 false)); 3043 first = false; 3044 } 3045 3046 return ret; 3047 } 3048 3049 static size_t timehist_print_idlehist_callchain(struct rb_root_cached *root) 3050 { 3051 size_t ret = 0; 3052 FILE *fp = stdout; 3053 struct callchain_node *chain; 3054 struct rb_node *rb_node = rb_first_cached(root); 3055 3056 printf(" %16s %8s %s\n", "Idle time (msec)", "Count", "Callchains"); 3057 printf(" %.16s %.8s %.50s\n", graph_dotted_line, graph_dotted_line, 3058 graph_dotted_line); 3059 3060 while (rb_node) { 3061 chain = rb_entry(rb_node, struct callchain_node, rb_node); 3062 rb_node = rb_next(rb_node); 3063 3064 ret += fprintf(fp, " "); 3065 print_sched_time(chain->hit, 12); 3066 ret += 16; /* print_sched_time returns 2nd arg + 4 */ 3067 ret += fprintf(fp, " %8d ", chain->count); 3068 ret += callchain__fprintf_folded(fp, chain); 3069 ret += fprintf(fp, "\n"); 3070 } 3071 3072 return ret; 3073 } 3074 3075 static void timehist_print_summary(struct perf_sched *sched, 3076 struct perf_session *session) 3077 { 3078 struct machine *m = &session->machines.host; 3079 struct total_run_stats totals; 3080 u64 task_count; 3081 struct thread *t; 3082 struct thread_runtime *r; 3083 int i; 3084 u64 hist_time = sched->hist_time.end - sched->hist_time.start; 3085 3086 memset(&totals, 0, sizeof(totals)); 3087 totals.sched = sched; 3088 3089 if (sched->idle_hist) { 3090 printf("\nIdle-time summary\n"); 3091 printf("%*s parent sched-out ", comm_width, "comm"); 3092 printf(" idle-time min-idle avg-idle max-idle stddev migrations\n"); 3093 } else if (sched->show_state) { 3094 printf("\nWait-time summary\n"); 3095 printf("%*s parent sched-in ", comm_width, "comm"); 3096 printf(" run-time sleep iowait preempt delay\n"); 3097 } else { 3098 printf("\nRuntime summary\n"); 3099 printf("%*s parent sched-in ", comm_width, "comm"); 3100 printf(" run-time min-run avg-run max-run stddev migrations\n"); 3101 } 3102 printf("%*s (count) ", comm_width, ""); 3103 printf(" (msec) (msec) (msec) (msec) %s\n", 3104 sched->show_state ? "(msec)" : "%"); 3105 printf("%.117s\n", graph_dotted_line); 3106 3107 machine__for_each_thread(m, show_thread_runtime, &totals); 3108 task_count = totals.task_count; 3109 if (!task_count) 3110 printf("<no still running tasks>\n"); 3111 3112 /* CPU idle stats not tracked when samples were skipped */ 3113 if (sched->skipped_samples && !sched->idle_hist) 3114 return; 3115 3116 printf("\nIdle stats:\n"); 3117 for (i = 0; i < idle_max_cpu; ++i) { 3118 if (cpu_list && !test_bit(i, cpu_bitmap)) 3119 continue; 3120 3121 t = idle_threads[i]; 3122 if (!t) 3123 continue; 3124 3125 r = thread__priv(t); 3126 if (r && r->run_stats.n) { 3127 totals.sched_count += r->run_stats.n; 3128 printf(" CPU %2d idle for ", i); 3129 print_sched_time(r->total_run_time, 6); 3130 printf(" msec (%6.2f%%)\n", 100.0 * r->total_run_time / hist_time); 3131 } else 3132 printf(" CPU %2d idle entire time window\n", i); 3133 } 3134 3135 if (sched->idle_hist && sched->show_callchain) { 3136 callchain_param.mode = CHAIN_FOLDED; 3137 callchain_param.value = CCVAL_PERIOD; 3138 3139 callchain_register_param(&callchain_param); 3140 3141 printf("\nIdle stats by callchain:\n"); 3142 for (i = 0; i < idle_max_cpu; ++i) { 3143 struct idle_thread_runtime *itr; 3144 3145 t = idle_threads[i]; 3146 if (!t) 3147 continue; 3148 3149 itr = thread__priv(t); 3150 if (itr == NULL) 3151 continue; 3152 3153 callchain_param.sort(&itr->sorted_root.rb_root, &itr->callchain, 3154 0, &callchain_param); 3155 3156 printf(" CPU %2d:", i); 3157 print_sched_time(itr->tr.total_run_time, 6); 3158 printf(" msec\n"); 3159 timehist_print_idlehist_callchain(&itr->sorted_root); 3160 printf("\n"); 3161 } 3162 } 3163 3164 printf("\n" 3165 " Total number of unique tasks: %" PRIu64 "\n" 3166 "Total number of context switches: %" PRIu64 "\n", 3167 totals.task_count, totals.sched_count); 3168 3169 printf(" Total run time (msec): "); 3170 print_sched_time(totals.total_run_time, 2); 3171 printf("\n"); 3172 3173 printf(" Total scheduling time (msec): "); 3174 print_sched_time(hist_time, 2); 3175 printf(" (x %d)\n", sched->max_cpu.cpu); 3176 } 3177 3178 typedef int (*sched_handler)(const struct perf_tool *tool, 3179 union perf_event *event, 3180 struct evsel *evsel, 3181 struct perf_sample *sample, 3182 struct machine *machine); 3183 3184 static int perf_timehist__process_sample(const struct perf_tool *tool, 3185 union perf_event *event, 3186 struct perf_sample *sample, 3187 struct evsel *evsel, 3188 struct machine *machine) 3189 { 3190 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 3191 int err = 0; 3192 struct perf_cpu this_cpu = { 3193 .cpu = sample->cpu, 3194 }; 3195 3196 if (this_cpu.cpu > sched->max_cpu.cpu) 3197 sched->max_cpu = this_cpu; 3198 3199 if (evsel->handler != NULL) { 3200 sched_handler f = evsel->handler; 3201 3202 err = f(tool, event, evsel, sample, machine); 3203 } 3204 3205 return err; 3206 } 3207 3208 static int timehist_check_attr(struct perf_sched *sched, 3209 struct evlist *evlist) 3210 { 3211 struct evsel *evsel; 3212 struct evsel_runtime *er; 3213 3214 list_for_each_entry(evsel, &evlist->core.entries, core.node) { 3215 er = evsel__get_runtime(evsel); 3216 if (er == NULL) { 3217 pr_err("Failed to allocate memory for evsel runtime data\n"); 3218 return -1; 3219 } 3220 3221 /* only need to save callchain related to sched_switch event */ 3222 if (sched->show_callchain && 3223 evsel__name_is(evsel, "sched:sched_switch") && 3224 !evsel__has_callchain(evsel)) { 3225 pr_info("Samples of sched_switch event do not have callchains.\n"); 3226 sched->show_callchain = 0; 3227 symbol_conf.use_callchain = 0; 3228 } 3229 } 3230 3231 return 0; 3232 } 3233 3234 static int timehist_parse_prio_str(struct perf_sched *sched) 3235 { 3236 char *p; 3237 unsigned long start_prio, end_prio; 3238 const char *str = sched->prio_str; 3239 3240 if (!str) 3241 return 0; 3242 3243 while (isdigit(*str)) { 3244 p = NULL; 3245 start_prio = strtoul(str, &p, 0); 3246 if (start_prio >= MAX_PRIO || (*p != '\0' && *p != ',' && *p != '-')) 3247 return -1; 3248 3249 if (*p == '-') { 3250 str = ++p; 3251 p = NULL; 3252 end_prio = strtoul(str, &p, 0); 3253 3254 if (end_prio >= MAX_PRIO || (*p != '\0' && *p != ',')) 3255 return -1; 3256 3257 if (end_prio < start_prio) 3258 return -1; 3259 } else { 3260 end_prio = start_prio; 3261 } 3262 3263 for (; start_prio <= end_prio; start_prio++) 3264 __set_bit(start_prio, sched->prio_bitmap); 3265 3266 if (*p) 3267 ++p; 3268 3269 str = p; 3270 } 3271 3272 return 0; 3273 } 3274 3275 static int perf_sched__timehist(struct perf_sched *sched) 3276 { 3277 struct evsel_str_handler handlers[] = { 3278 { "sched:sched_switch", timehist_sched_switch_event, }, 3279 { "sched:sched_wakeup", timehist_sched_wakeup_event, }, 3280 { "sched:sched_waking", timehist_sched_wakeup_event, }, 3281 { "sched:sched_wakeup_new", timehist_sched_wakeup_event, }, 3282 }; 3283 const struct evsel_str_handler migrate_handlers[] = { 3284 { "sched:sched_migrate_task", timehist_migrate_task_event, }, 3285 }; 3286 struct perf_data data = { 3287 .path = input_name, 3288 .mode = PERF_DATA_MODE_READ, 3289 .force = sched->force, 3290 }; 3291 3292 struct perf_session *session; 3293 struct perf_env *env; 3294 struct evlist *evlist; 3295 int err = -1; 3296 3297 /* 3298 * event handlers for timehist option 3299 */ 3300 sched->tool.sample = perf_timehist__process_sample; 3301 sched->tool.mmap = perf_event__process_mmap; 3302 sched->tool.comm = perf_event__process_comm; 3303 sched->tool.exit = perf_event__process_exit; 3304 sched->tool.fork = perf_event__process_fork; 3305 sched->tool.lost = process_lost; 3306 sched->tool.attr = perf_event__process_attr; 3307 sched->tool.tracing_data = perf_event__process_tracing_data; 3308 sched->tool.build_id = perf_event__process_build_id; 3309 3310 sched->tool.ordering_requires_timestamps = true; 3311 3312 symbol_conf.use_callchain = sched->show_callchain; 3313 3314 session = perf_session__new(&data, &sched->tool); 3315 if (IS_ERR(session)) 3316 return PTR_ERR(session); 3317 3318 env = perf_session__env(session); 3319 if (cpu_list) { 3320 err = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap); 3321 if (err < 0) 3322 goto out; 3323 } 3324 3325 evlist = session->evlist; 3326 3327 symbol__init(env); 3328 3329 if (perf_time__parse_str(&sched->ptime, sched->time_str) != 0) { 3330 pr_err("Invalid time string\n"); 3331 err = -EINVAL; 3332 goto out; 3333 } 3334 3335 if (timehist_check_attr(sched, evlist) != 0) 3336 goto out; 3337 3338 if (timehist_parse_prio_str(sched) != 0) { 3339 pr_err("Invalid prio string\n"); 3340 goto out; 3341 } 3342 3343 setup_pager(); 3344 3345 evsel__set_priv_destructor(timehist__evsel_priv_destructor); 3346 3347 /* prefer sched_waking if it is captured */ 3348 if (evlist__find_tracepoint_by_name(session->evlist, "sched:sched_waking")) 3349 handlers[1].handler = timehist_sched_wakeup_ignore; 3350 3351 /* setup per-evsel handlers */ 3352 if (perf_session__set_tracepoints_handlers(session, handlers)) 3353 goto out; 3354 3355 /* sched_switch event at a minimum needs to exist */ 3356 if (!evlist__find_tracepoint_by_name(session->evlist, "sched:sched_switch")) { 3357 pr_err("No sched_switch events found. Have you run 'perf sched record'?\n"); 3358 goto out; 3359 } 3360 3361 if ((sched->show_migrations || sched->pre_migrations) && 3362 perf_session__set_tracepoints_handlers(session, migrate_handlers)) 3363 goto out; 3364 3365 /* pre-allocate struct for per-CPU idle stats */ 3366 sched->max_cpu.cpu = env->nr_cpus_online; 3367 if (sched->max_cpu.cpu == 0) 3368 sched->max_cpu.cpu = 4; 3369 if (init_idle_threads(sched->max_cpu.cpu)) 3370 goto out; 3371 3372 /* summary_only implies summary option, but don't overwrite summary if set */ 3373 if (sched->summary_only) 3374 sched->summary = sched->summary_only; 3375 3376 if (!sched->summary_only) 3377 timehist_header(sched); 3378 3379 err = perf_session__process_events(session); 3380 if (err) { 3381 pr_err("Failed to process events, error %d", err); 3382 goto out; 3383 } 3384 3385 sched->nr_events = evlist->stats.nr_events[0]; 3386 sched->nr_lost_events = evlist->stats.total_lost; 3387 sched->nr_lost_chunks = evlist->stats.nr_events[PERF_RECORD_LOST]; 3388 3389 if (sched->summary) 3390 timehist_print_summary(sched, session); 3391 3392 out: 3393 free_idle_threads(); 3394 perf_session__delete(session); 3395 3396 return err; 3397 } 3398 3399 3400 static void print_bad_events(struct perf_sched *sched) 3401 { 3402 if (sched->nr_unordered_timestamps && sched->nr_timestamps) { 3403 printf(" INFO: %.3f%% unordered timestamps (%ld out of %ld)\n", 3404 (double)sched->nr_unordered_timestamps/(double)sched->nr_timestamps*100.0, 3405 sched->nr_unordered_timestamps, sched->nr_timestamps); 3406 } 3407 if (sched->nr_lost_events && sched->nr_events) { 3408 printf(" INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n", 3409 (double)sched->nr_lost_events/(double)sched->nr_events * 100.0, 3410 sched->nr_lost_events, sched->nr_events, sched->nr_lost_chunks); 3411 } 3412 if (sched->nr_context_switch_bugs && sched->nr_timestamps) { 3413 printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)", 3414 (double)sched->nr_context_switch_bugs/(double)sched->nr_timestamps*100.0, 3415 sched->nr_context_switch_bugs, sched->nr_timestamps); 3416 if (sched->nr_lost_events) 3417 printf(" (due to lost events?)"); 3418 printf("\n"); 3419 } 3420 } 3421 3422 static void __merge_work_atoms(struct rb_root_cached *root, struct work_atoms *data) 3423 { 3424 struct rb_node **new = &(root->rb_root.rb_node), *parent = NULL; 3425 struct work_atoms *this; 3426 const char *comm = thread__comm_str(data->thread), *this_comm; 3427 bool leftmost = true; 3428 3429 while (*new) { 3430 int cmp; 3431 3432 this = container_of(*new, struct work_atoms, node); 3433 parent = *new; 3434 3435 this_comm = thread__comm_str(this->thread); 3436 cmp = strcmp(comm, this_comm); 3437 if (cmp > 0) { 3438 new = &((*new)->rb_left); 3439 } else if (cmp < 0) { 3440 new = &((*new)->rb_right); 3441 leftmost = false; 3442 } else { 3443 this->num_merged++; 3444 this->total_runtime += data->total_runtime; 3445 this->nb_atoms += data->nb_atoms; 3446 this->total_lat += data->total_lat; 3447 list_splice_init(&data->work_list, &this->work_list); 3448 if (this->max_lat < data->max_lat) { 3449 this->max_lat = data->max_lat; 3450 this->max_lat_start = data->max_lat_start; 3451 this->max_lat_end = data->max_lat_end; 3452 } 3453 free_work_atoms(data); 3454 return; 3455 } 3456 } 3457 3458 data->num_merged++; 3459 rb_link_node(&data->node, parent, new); 3460 rb_insert_color_cached(&data->node, root, leftmost); 3461 } 3462 3463 static void perf_sched__merge_lat(struct perf_sched *sched) 3464 { 3465 struct work_atoms *data; 3466 struct rb_node *node; 3467 3468 if (sched->skip_merge) 3469 return; 3470 3471 while ((node = rb_first_cached(&sched->atom_root))) { 3472 rb_erase_cached(node, &sched->atom_root); 3473 data = rb_entry(node, struct work_atoms, node); 3474 __merge_work_atoms(&sched->merged_atom_root, data); 3475 } 3476 } 3477 3478 static int setup_cpus_switch_event(struct perf_sched *sched) 3479 { 3480 unsigned int i; 3481 3482 sched->cpu_last_switched = calloc(MAX_CPUS, sizeof(*(sched->cpu_last_switched))); 3483 if (!sched->cpu_last_switched) 3484 return -1; 3485 3486 sched->curr_pid = malloc(MAX_CPUS * sizeof(*(sched->curr_pid))); 3487 if (!sched->curr_pid) { 3488 zfree(&sched->cpu_last_switched); 3489 return -1; 3490 } 3491 3492 for (i = 0; i < MAX_CPUS; i++) 3493 sched->curr_pid[i] = -1; 3494 3495 return 0; 3496 } 3497 3498 static void free_cpus_switch_event(struct perf_sched *sched) 3499 { 3500 zfree(&sched->curr_pid); 3501 zfree(&sched->cpu_last_switched); 3502 } 3503 3504 static int perf_sched__lat(struct perf_sched *sched) 3505 { 3506 int rc = -1; 3507 struct rb_node *next; 3508 3509 setup_pager(); 3510 3511 if (setup_cpus_switch_event(sched)) 3512 return rc; 3513 3514 if (perf_sched__read_events(sched)) 3515 goto out_free_cpus_switch_event; 3516 3517 perf_sched__merge_lat(sched); 3518 perf_sched__sort_lat(sched); 3519 3520 printf("\n -------------------------------------------------------------------------------------------------------------------------------------------\n"); 3521 printf(" Task | Runtime ms | Count | Avg delay ms | Max delay ms | Max delay start | Max delay end |\n"); 3522 printf(" -------------------------------------------------------------------------------------------------------------------------------------------\n"); 3523 3524 next = rb_first_cached(&sched->sorted_atom_root); 3525 3526 while (next) { 3527 struct work_atoms *work_list; 3528 3529 work_list = rb_entry(next, struct work_atoms, node); 3530 output_lat_thread(sched, work_list); 3531 next = rb_next(next); 3532 } 3533 3534 printf(" -----------------------------------------------------------------------------------------------------------------\n"); 3535 printf(" TOTAL: |%11.3f ms |%9" PRIu64 " |\n", 3536 (double)sched->all_runtime / NSEC_PER_MSEC, sched->all_count); 3537 3538 printf(" ---------------------------------------------------\n"); 3539 3540 print_bad_events(sched); 3541 printf("\n"); 3542 3543 rc = 0; 3544 3545 while ((next = rb_first_cached(&sched->sorted_atom_root))) { 3546 struct work_atoms *data; 3547 3548 data = rb_entry(next, struct work_atoms, node); 3549 rb_erase_cached(next, &sched->sorted_atom_root); 3550 free_work_atoms(data); 3551 } 3552 out_free_cpus_switch_event: 3553 free_cpus_switch_event(sched); 3554 return rc; 3555 } 3556 3557 static int setup_map_cpus(struct perf_sched *sched) 3558 { 3559 sched->max_cpu.cpu = sysconf(_SC_NPROCESSORS_CONF); 3560 3561 if (sched->map.comp) { 3562 sched->map.comp_cpus = zalloc(sched->max_cpu.cpu * sizeof(int)); 3563 if (!sched->map.comp_cpus) 3564 return -1; 3565 } 3566 3567 if (sched->map.cpus_str) { 3568 sched->map.cpus = perf_cpu_map__new(sched->map.cpus_str); 3569 if (!sched->map.cpus) { 3570 pr_err("failed to get cpus map from %s\n", sched->map.cpus_str); 3571 zfree(&sched->map.comp_cpus); 3572 return -1; 3573 } 3574 } 3575 3576 return 0; 3577 } 3578 3579 static int setup_color_pids(struct perf_sched *sched) 3580 { 3581 struct perf_thread_map *map; 3582 3583 if (!sched->map.color_pids_str) 3584 return 0; 3585 3586 map = thread_map__new_by_tid_str(sched->map.color_pids_str); 3587 if (!map) { 3588 pr_err("failed to get thread map from %s\n", sched->map.color_pids_str); 3589 return -1; 3590 } 3591 3592 sched->map.color_pids = map; 3593 return 0; 3594 } 3595 3596 static int setup_color_cpus(struct perf_sched *sched) 3597 { 3598 struct perf_cpu_map *map; 3599 3600 if (!sched->map.color_cpus_str) 3601 return 0; 3602 3603 map = perf_cpu_map__new(sched->map.color_cpus_str); 3604 if (!map) { 3605 pr_err("failed to get thread map from %s\n", sched->map.color_cpus_str); 3606 return -1; 3607 } 3608 3609 sched->map.color_cpus = map; 3610 return 0; 3611 } 3612 3613 static int perf_sched__map(struct perf_sched *sched) 3614 { 3615 int rc = -1; 3616 3617 sched->curr_thread = calloc(MAX_CPUS, sizeof(*(sched->curr_thread))); 3618 if (!sched->curr_thread) 3619 return rc; 3620 3621 sched->curr_out_thread = calloc(MAX_CPUS, sizeof(*(sched->curr_out_thread))); 3622 if (!sched->curr_out_thread) 3623 goto out_free_curr_thread; 3624 3625 if (setup_cpus_switch_event(sched)) 3626 goto out_free_curr_out_thread; 3627 3628 if (setup_map_cpus(sched)) 3629 goto out_free_cpus_switch_event; 3630 3631 if (setup_color_pids(sched)) 3632 goto out_put_map_cpus; 3633 3634 if (setup_color_cpus(sched)) 3635 goto out_put_color_pids; 3636 3637 setup_pager(); 3638 if (perf_sched__read_events(sched)) 3639 goto out_put_color_cpus; 3640 3641 rc = 0; 3642 print_bad_events(sched); 3643 3644 out_put_color_cpus: 3645 perf_cpu_map__put(sched->map.color_cpus); 3646 3647 out_put_color_pids: 3648 perf_thread_map__put(sched->map.color_pids); 3649 3650 out_put_map_cpus: 3651 zfree(&sched->map.comp_cpus); 3652 perf_cpu_map__put(sched->map.cpus); 3653 3654 out_free_cpus_switch_event: 3655 free_cpus_switch_event(sched); 3656 3657 out_free_curr_out_thread: 3658 for (int i = 0; i < MAX_CPUS; i++) 3659 thread__put(sched->curr_out_thread[i]); 3660 zfree(&sched->curr_out_thread); 3661 3662 out_free_curr_thread: 3663 for (int i = 0; i < MAX_CPUS; i++) 3664 thread__put(sched->curr_thread[i]); 3665 zfree(&sched->curr_thread); 3666 return rc; 3667 } 3668 3669 static int perf_sched__replay(struct perf_sched *sched) 3670 { 3671 int ret; 3672 unsigned long i; 3673 3674 mutex_init(&sched->start_work_mutex); 3675 mutex_init(&sched->work_done_wait_mutex); 3676 3677 ret = setup_cpus_switch_event(sched); 3678 if (ret) 3679 goto out_mutex_destroy; 3680 3681 calibrate_run_measurement_overhead(sched); 3682 calibrate_sleep_measurement_overhead(sched); 3683 3684 test_calibrations(sched); 3685 3686 ret = perf_sched__read_events(sched); 3687 if (ret) 3688 goto out_free_cpus_switch_event; 3689 3690 printf("nr_run_events: %ld\n", sched->nr_run_events); 3691 printf("nr_sleep_events: %ld\n", sched->nr_sleep_events); 3692 printf("nr_wakeup_events: %ld\n", sched->nr_wakeup_events); 3693 3694 if (sched->targetless_wakeups) 3695 printf("target-less wakeups: %ld\n", sched->targetless_wakeups); 3696 if (sched->multitarget_wakeups) 3697 printf("multi-target wakeups: %ld\n", sched->multitarget_wakeups); 3698 if (sched->nr_run_events_optimized) 3699 printf("run atoms optimized: %ld\n", 3700 sched->nr_run_events_optimized); 3701 3702 print_task_traces(sched); 3703 add_cross_task_wakeups(sched); 3704 3705 sched->thread_funcs_exit = false; 3706 create_tasks(sched); 3707 printf("------------------------------------------------------------\n"); 3708 if (sched->replay_repeat == 0) 3709 sched->replay_repeat = UINT_MAX; 3710 3711 for (i = 0; i < sched->replay_repeat; i++) 3712 run_one_test(sched); 3713 3714 sched->thread_funcs_exit = true; 3715 destroy_tasks(sched); 3716 3717 out_free_cpus_switch_event: 3718 free_cpus_switch_event(sched); 3719 3720 out_mutex_destroy: 3721 mutex_destroy(&sched->start_work_mutex); 3722 mutex_destroy(&sched->work_done_wait_mutex); 3723 return ret; 3724 } 3725 3726 static void setup_sorting(struct perf_sched *sched, const struct option *options, 3727 const char * const usage_msg[]) 3728 { 3729 char *tmp, *tok, *str = strdup(sched->sort_order); 3730 3731 for (tok = strtok_r(str, ", ", &tmp); 3732 tok; tok = strtok_r(NULL, ", ", &tmp)) { 3733 if (sort_dimension__add(tok, &sched->sort_list) < 0) { 3734 usage_with_options_msg(usage_msg, options, 3735 "Unknown --sort key: `%s'", tok); 3736 } 3737 } 3738 3739 free(str); 3740 3741 sort_dimension__add("pid", &sched->cmp_pid); 3742 } 3743 3744 static int process_synthesized_schedstat_event(const struct perf_tool *tool, 3745 union perf_event *event, 3746 struct perf_sample *sample __maybe_unused, 3747 struct machine *machine __maybe_unused) 3748 { 3749 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 3750 3751 if (perf_data__write(sched->data, event, event->header.size) <= 0) { 3752 pr_err("failed to write perf data, error: %m\n"); 3753 return -1; 3754 } 3755 3756 sched->session->header.data_size += event->header.size; 3757 return 0; 3758 } 3759 3760 static void sighandler(int sig __maybe_unused) 3761 { 3762 } 3763 3764 static int enable_sched_schedstats(int *reset) 3765 { 3766 char path[PATH_MAX]; 3767 FILE *fp; 3768 char ch; 3769 3770 snprintf(path, PATH_MAX, "%s/sys/kernel/sched_schedstats", procfs__mountpoint()); 3771 fp = fopen(path, "w+"); 3772 if (!fp) { 3773 pr_err("Failed to open %s\n", path); 3774 return -1; 3775 } 3776 3777 ch = getc(fp); 3778 if (ch == '0') { 3779 *reset = 1; 3780 rewind(fp); 3781 putc('1', fp); 3782 fclose(fp); 3783 } 3784 return 0; 3785 } 3786 3787 static int disable_sched_schedstat(void) 3788 { 3789 char path[PATH_MAX]; 3790 FILE *fp; 3791 3792 snprintf(path, PATH_MAX, "%s/sys/kernel/sched_schedstats", procfs__mountpoint()); 3793 fp = fopen(path, "w"); 3794 if (!fp) { 3795 pr_err("Failed to open %s\n", path); 3796 return -1; 3797 } 3798 3799 putc('0', fp); 3800 fclose(fp); 3801 return 0; 3802 } 3803 3804 /* perf.data or any other output file name used by stats subcommand (only). */ 3805 const char *output_name; 3806 3807 static int perf_sched__schedstat_record(struct perf_sched *sched, 3808 int argc, const char **argv) 3809 { 3810 struct perf_session *session; 3811 struct target target = {}; 3812 struct evlist *evlist; 3813 int reset = 0; 3814 int err = 0; 3815 int fd; 3816 struct perf_data data = { 3817 .path = output_name, 3818 .mode = PERF_DATA_MODE_WRITE, 3819 }; 3820 3821 signal(SIGINT, sighandler); 3822 signal(SIGCHLD, sighandler); 3823 signal(SIGTERM, sighandler); 3824 3825 evlist = evlist__new(); 3826 if (!evlist) 3827 return -ENOMEM; 3828 3829 session = perf_session__new(&data, &sched->tool); 3830 if (IS_ERR(session)) { 3831 pr_err("Perf session creation failed.\n"); 3832 evlist__delete(evlist); 3833 return PTR_ERR(session); 3834 } 3835 3836 session->evlist = evlist; 3837 3838 sched->session = session; 3839 sched->data = &data; 3840 3841 fd = perf_data__fd(&data); 3842 3843 /* 3844 * Capture all important metadata about the system. Although they are 3845 * not used by `perf sched stats` tool directly, they provide useful 3846 * information about profiled environment. 3847 */ 3848 perf_header__set_feat(&session->header, HEADER_HOSTNAME); 3849 perf_header__set_feat(&session->header, HEADER_OSRELEASE); 3850 perf_header__set_feat(&session->header, HEADER_VERSION); 3851 perf_header__set_feat(&session->header, HEADER_ARCH); 3852 perf_header__set_feat(&session->header, HEADER_NRCPUS); 3853 perf_header__set_feat(&session->header, HEADER_CPUDESC); 3854 perf_header__set_feat(&session->header, HEADER_CPUID); 3855 perf_header__set_feat(&session->header, HEADER_TOTAL_MEM); 3856 perf_header__set_feat(&session->header, HEADER_CMDLINE); 3857 perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY); 3858 perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY); 3859 perf_header__set_feat(&session->header, HEADER_CACHE); 3860 perf_header__set_feat(&session->header, HEADER_MEM_TOPOLOGY); 3861 perf_header__set_feat(&session->header, HEADER_HYBRID_TOPOLOGY); 3862 perf_header__set_feat(&session->header, HEADER_CPU_DOMAIN_INFO); 3863 3864 err = perf_session__write_header(session, evlist, fd, false); 3865 if (err < 0) 3866 goto out; 3867 3868 /* 3869 * `perf sched stats` does not support workload profiling (-p pid) 3870 * since /proc/schedstat file contains cpu specific data only. Hence, a 3871 * profile target is either set of cpus or systemwide, never a process. 3872 * Note that, although `-- <workload>` is supported, profile data are 3873 * still cpu/systemwide. 3874 */ 3875 if (cpu_list) 3876 target.cpu_list = cpu_list; 3877 else 3878 target.system_wide = true; 3879 3880 if (argc) { 3881 err = evlist__prepare_workload(evlist, &target, argv, false, NULL); 3882 if (err) 3883 goto out; 3884 } 3885 3886 err = evlist__create_maps(evlist, &target); 3887 if (err < 0) 3888 goto out; 3889 3890 user_requested_cpus = evlist->core.user_requested_cpus; 3891 3892 err = perf_event__synthesize_schedstat(&(sched->tool), 3893 process_synthesized_schedstat_event, 3894 user_requested_cpus); 3895 if (err < 0) 3896 goto out; 3897 3898 err = enable_sched_schedstats(&reset); 3899 if (err < 0) 3900 goto out; 3901 3902 if (argc) 3903 evlist__start_workload(evlist); 3904 3905 /* wait for signal */ 3906 pause(); 3907 3908 if (reset) { 3909 err = disable_sched_schedstat(); 3910 if (err < 0) 3911 goto out; 3912 } 3913 3914 err = perf_event__synthesize_schedstat(&(sched->tool), 3915 process_synthesized_schedstat_event, 3916 user_requested_cpus); 3917 if (err < 0) 3918 goto out; 3919 3920 err = perf_session__write_header(session, evlist, fd, true); 3921 3922 out: 3923 if (!err) 3924 fprintf(stderr, "[ perf sched stats: Wrote samples to %s ]\n", data.path); 3925 else 3926 fprintf(stderr, "[ perf sched stats: Failed !! ]\n"); 3927 3928 evlist__delete(evlist); 3929 close(fd); 3930 return err; 3931 } 3932 3933 struct schedstat_domain { 3934 struct list_head domain_list; 3935 struct perf_record_schedstat_domain *domain_data; 3936 }; 3937 3938 struct schedstat_cpu { 3939 struct list_head cpu_list; 3940 struct list_head domain_head; 3941 struct perf_record_schedstat_cpu *cpu_data; 3942 }; 3943 3944 static struct list_head cpu_head = LIST_HEAD_INIT(cpu_head); 3945 static struct schedstat_cpu *cpu_second_pass; 3946 static struct schedstat_domain *domain_second_pass; 3947 static bool after_workload_flag; 3948 static bool verbose_field; 3949 3950 static void store_schedstat_cpu_diff(struct schedstat_cpu *after_workload) 3951 { 3952 struct perf_record_schedstat_cpu *before = cpu_second_pass->cpu_data; 3953 struct perf_record_schedstat_cpu *after = after_workload->cpu_data; 3954 __u16 version = after_workload->cpu_data->version; 3955 3956 #define CPU_FIELD(_type, _name, _desc, _format, _is_pct, _pct_of, _ver) \ 3957 (before->_ver._name = after->_ver._name - before->_ver._name) 3958 3959 if (version == 15) { 3960 #include <perf/schedstat-v15.h> 3961 } else if (version == 16) { 3962 #include <perf/schedstat-v16.h> 3963 } else if (version == 17) { 3964 #include <perf/schedstat-v17.h> 3965 } 3966 3967 #undef CPU_FIELD 3968 } 3969 3970 static void store_schedstat_domain_diff(struct schedstat_domain *after_workload) 3971 { 3972 struct perf_record_schedstat_domain *before = domain_second_pass->domain_data; 3973 struct perf_record_schedstat_domain *after = after_workload->domain_data; 3974 __u16 version = after_workload->domain_data->version; 3975 3976 #define DOMAIN_FIELD(_type, _name, _desc, _format, _is_jiffies, _ver) \ 3977 (before->_ver._name = after->_ver._name - before->_ver._name) 3978 3979 if (version == 15) { 3980 #include <perf/schedstat-v15.h> 3981 } else if (version == 16) { 3982 #include <perf/schedstat-v16.h> 3983 } else if (version == 17) { 3984 #include <perf/schedstat-v17.h> 3985 } 3986 #undef DOMAIN_FIELD 3987 } 3988 3989 #define PCT_CHNG(_x, _y) ((_x) ? ((double)((double)(_y) - (_x)) / (_x)) * 100 : 0.0) 3990 static inline void print_cpu_stats(struct perf_record_schedstat_cpu *cs1, 3991 struct perf_record_schedstat_cpu *cs2) 3992 { 3993 printf("%-65s ", "DESC"); 3994 if (!cs2) 3995 printf("%12s %12s", "COUNT", "PCT_CHANGE"); 3996 else 3997 printf("%12s %11s %12s %14s %10s", "COUNT1", "COUNT2", "PCT_CHANGE", 3998 "PCT_CHANGE1", "PCT_CHANGE2"); 3999 4000 printf("\n"); 4001 print_separator2(SEP_LEN, "", 0); 4002 4003 #define CALC_PCT(_x, _y) ((_y) ? ((double)(_x) / (_y)) * 100 : 0.0) 4004 4005 #define CPU_FIELD(_type, _name, _desc, _format, _is_pct, _pct_of, _ver) \ 4006 do { \ 4007 printf("%-65s: " _format, verbose_field ? _desc : #_name, \ 4008 cs1->_ver._name); \ 4009 if (!cs2) { \ 4010 if (_is_pct) \ 4011 printf(" ( %8.2lf%% )", \ 4012 CALC_PCT(cs1->_ver._name, cs1->_ver._pct_of)); \ 4013 } else { \ 4014 printf("," _format " | %8.2lf%% |", cs2->_ver._name, \ 4015 PCT_CHNG(cs1->_ver._name, cs2->_ver._name)); \ 4016 if (_is_pct) \ 4017 printf(" ( %8.2lf%%, %8.2lf%% )", \ 4018 CALC_PCT(cs1->_ver._name, cs1->_ver._pct_of), \ 4019 CALC_PCT(cs2->_ver._name, cs2->_ver._pct_of)); \ 4020 } \ 4021 printf("\n"); \ 4022 } while (0) 4023 4024 if (cs1->version == 15) { 4025 #include <perf/schedstat-v15.h> 4026 } else if (cs1->version == 16) { 4027 #include <perf/schedstat-v16.h> 4028 } else if (cs1->version == 17) { 4029 #include <perf/schedstat-v17.h> 4030 } 4031 4032 #undef CPU_FIELD 4033 #undef CALC_PCT 4034 } 4035 4036 static inline void print_domain_stats(struct perf_record_schedstat_domain *ds1, 4037 struct perf_record_schedstat_domain *ds2, 4038 __u64 jiffies1, __u64 jiffies2) 4039 { 4040 printf("%-65s ", "DESC"); 4041 if (!ds2) 4042 printf("%12s %14s", "COUNT", "AVG_JIFFIES"); 4043 else 4044 printf("%12s %11s %12s %16s %12s", "COUNT1", "COUNT2", "PCT_CHANGE", 4045 "AVG_JIFFIES1", "AVG_JIFFIES2"); 4046 printf("\n"); 4047 4048 #define DOMAIN_CATEGORY(_desc) \ 4049 do { \ 4050 size_t _len = strlen(_desc); \ 4051 size_t _pre_dash_cnt = (SEP_LEN - _len) / 2; \ 4052 size_t _post_dash_cnt = SEP_LEN - _len - _pre_dash_cnt; \ 4053 print_separator2((int)_pre_dash_cnt, _desc, (int)_post_dash_cnt);\ 4054 } while (0) 4055 4056 #define CALC_AVG(_x, _y) ((_y) ? (long double)(_x) / (_y) : 0.0) 4057 4058 #define DOMAIN_FIELD(_type, _name, _desc, _format, _is_jiffies, _ver) \ 4059 do { \ 4060 printf("%-65s: " _format, verbose_field ? _desc : #_name, \ 4061 ds1->_ver._name); \ 4062 if (!ds2) { \ 4063 if (_is_jiffies) \ 4064 printf(" $ %11.2Lf $", \ 4065 CALC_AVG(jiffies1, ds1->_ver._name)); \ 4066 } else { \ 4067 printf("," _format " | %8.2lf%% |", ds2->_ver._name, \ 4068 PCT_CHNG(ds1->_ver._name, ds2->_ver._name)); \ 4069 if (_is_jiffies) \ 4070 printf(" $ %11.2Lf, %11.2Lf $", \ 4071 CALC_AVG(jiffies1, ds1->_ver._name), \ 4072 CALC_AVG(jiffies2, ds2->_ver._name)); \ 4073 } \ 4074 printf("\n"); \ 4075 } while (0) 4076 4077 #define DERIVED_CNT_FIELD(_name, _desc, _format, _x, _y, _z, _ver) \ 4078 do { \ 4079 __u32 t1 = ds1->_ver._x - ds1->_ver._y - ds1->_ver._z; \ 4080 printf("*%-64s: " _format, verbose_field ? _desc : #_name, t1); \ 4081 if (ds2) { \ 4082 __u32 t2 = ds2->_ver._x - ds2->_ver._y - ds2->_ver._z; \ 4083 printf("," _format " | %8.2lf%% |", t2, \ 4084 PCT_CHNG(t1, t2)); \ 4085 } \ 4086 printf("\n"); \ 4087 } while (0) 4088 4089 #define DERIVED_AVG_FIELD(_name, _desc, _format, _x, _y, _z, _w, _ver) \ 4090 do { \ 4091 __u32 t1 = ds1->_ver._x - ds1->_ver._y - ds1->_ver._z; \ 4092 printf("*%-64s: " _format, verbose_field ? _desc : #_name, \ 4093 CALC_AVG(ds1->_ver._w, t1)); \ 4094 if (ds2) { \ 4095 __u32 t2 = ds2->_ver._x - ds2->_ver._y - ds2->_ver._z; \ 4096 printf("," _format " | %8.2Lf%% |", \ 4097 CALC_AVG(ds2->_ver._w, t2), \ 4098 PCT_CHNG(CALC_AVG(ds1->_ver._w, t1), \ 4099 CALC_AVG(ds2->_ver._w, t2))); \ 4100 } \ 4101 printf("\n"); \ 4102 } while (0) 4103 4104 if (ds1->version == 15) { 4105 #include <perf/schedstat-v15.h> 4106 } else if (ds1->version == 16) { 4107 #include <perf/schedstat-v16.h> 4108 } else if (ds1->version == 17) { 4109 #include <perf/schedstat-v17.h> 4110 } 4111 4112 #undef DERIVED_AVG_FIELD 4113 #undef DERIVED_CNT_FIELD 4114 #undef DOMAIN_FIELD 4115 #undef CALC_AVG 4116 #undef DOMAIN_CATEGORY 4117 } 4118 #undef PCT_CHNG 4119 4120 static void summarize_schedstat_cpu(struct schedstat_cpu *summary_cpu, 4121 struct schedstat_cpu *cptr, 4122 int cnt, bool is_last) 4123 { 4124 struct perf_record_schedstat_cpu *summary_cs = summary_cpu->cpu_data, 4125 *temp_cs = cptr->cpu_data; 4126 4127 #define CPU_FIELD(_type, _name, _desc, _format, _is_pct, _pct_of, _ver) \ 4128 do { \ 4129 summary_cs->_ver._name += temp_cs->_ver._name; \ 4130 if (is_last) \ 4131 summary_cs->_ver._name /= cnt; \ 4132 } while (0) 4133 4134 if (cptr->cpu_data->version == 15) { 4135 #include <perf/schedstat-v15.h> 4136 } else if (cptr->cpu_data->version == 16) { 4137 #include <perf/schedstat-v16.h> 4138 } else if (cptr->cpu_data->version == 17) { 4139 #include <perf/schedstat-v17.h> 4140 } 4141 #undef CPU_FIELD 4142 } 4143 4144 static void summarize_schedstat_domain(struct schedstat_domain *summary_domain, 4145 struct schedstat_domain *dptr, 4146 int cnt, bool is_last) 4147 { 4148 struct perf_record_schedstat_domain *summary_ds = summary_domain->domain_data, 4149 *temp_ds = dptr->domain_data; 4150 4151 #define DOMAIN_FIELD(_type, _name, _desc, _format, _is_jiffies, _ver) \ 4152 do { \ 4153 summary_ds->_ver._name += temp_ds->_ver._name; \ 4154 if (is_last) \ 4155 summary_ds->_ver._name /= cnt; \ 4156 } while (0) 4157 4158 if (dptr->domain_data->version == 15) { 4159 #include <perf/schedstat-v15.h> 4160 } else if (dptr->domain_data->version == 16) { 4161 #include <perf/schedstat-v16.h> 4162 } else if (dptr->domain_data->version == 17) { 4163 #include <perf/schedstat-v17.h> 4164 } 4165 #undef DOMAIN_FIELD 4166 } 4167 4168 /* 4169 * get_all_cpu_stats() appends the summary to the head of the list. 4170 */ 4171 static int get_all_cpu_stats(struct list_head *head) 4172 { 4173 struct schedstat_cpu *cptr = list_first_entry(head, struct schedstat_cpu, cpu_list); 4174 struct schedstat_cpu *summary_head = NULL; 4175 struct perf_record_schedstat_domain *ds; 4176 struct perf_record_schedstat_cpu *cs; 4177 struct schedstat_domain *dptr, *tdptr; 4178 bool is_last = false; 4179 int cnt = 1; 4180 int ret = 0; 4181 4182 if (cptr) { 4183 summary_head = zalloc(sizeof(*summary_head)); 4184 if (!summary_head) 4185 return -ENOMEM; 4186 4187 summary_head->cpu_data = zalloc(sizeof(*cs)); 4188 memcpy(summary_head->cpu_data, cptr->cpu_data, sizeof(*cs)); 4189 4190 INIT_LIST_HEAD(&summary_head->domain_head); 4191 4192 list_for_each_entry(dptr, &cptr->domain_head, domain_list) { 4193 tdptr = zalloc(sizeof(*tdptr)); 4194 if (!tdptr) 4195 return -ENOMEM; 4196 4197 tdptr->domain_data = zalloc(sizeof(*ds)); 4198 if (!tdptr->domain_data) 4199 return -ENOMEM; 4200 4201 memcpy(tdptr->domain_data, dptr->domain_data, sizeof(*ds)); 4202 list_add_tail(&tdptr->domain_list, &summary_head->domain_head); 4203 } 4204 } 4205 4206 list_for_each_entry(cptr, head, cpu_list) { 4207 if (list_is_first(&cptr->cpu_list, head)) 4208 continue; 4209 4210 if (list_is_last(&cptr->cpu_list, head)) 4211 is_last = true; 4212 4213 cnt++; 4214 summarize_schedstat_cpu(summary_head, cptr, cnt, is_last); 4215 tdptr = list_first_entry(&summary_head->domain_head, struct schedstat_domain, 4216 domain_list); 4217 4218 list_for_each_entry(dptr, &cptr->domain_head, domain_list) { 4219 summarize_schedstat_domain(tdptr, dptr, cnt, is_last); 4220 tdptr = list_next_entry(tdptr, domain_list); 4221 } 4222 } 4223 4224 list_add(&summary_head->cpu_list, head); 4225 return ret; 4226 } 4227 4228 static int show_schedstat_data(struct list_head *head1, struct cpu_domain_map **cd_map1, 4229 struct list_head *head2, struct cpu_domain_map **cd_map2, 4230 bool summary_only) 4231 { 4232 struct schedstat_cpu *cptr1 = list_first_entry(head1, struct schedstat_cpu, cpu_list); 4233 struct perf_record_schedstat_domain *ds1 = NULL, *ds2 = NULL; 4234 struct perf_record_schedstat_cpu *cs1 = NULL, *cs2 = NULL; 4235 struct schedstat_domain *dptr1 = NULL, *dptr2 = NULL; 4236 struct schedstat_cpu *cptr2 = NULL; 4237 __u64 jiffies1 = 0, jiffies2 = 0; 4238 bool is_summary = true; 4239 int ret = 0; 4240 4241 printf("Description\n"); 4242 print_separator2(SEP_LEN, "", 0); 4243 printf("%-30s-> %s\n", "DESC", "Description of the field"); 4244 printf("%-30s-> %s\n", "COUNT", "Value of the field"); 4245 printf("%-30s-> %s\n", "PCT_CHANGE", "Percent change with corresponding base value"); 4246 printf("%-30s-> %s\n", "AVG_JIFFIES", 4247 "Avg time in jiffies between two consecutive occurrence of event"); 4248 4249 print_separator2(SEP_LEN, "", 0); 4250 printf("\n"); 4251 4252 printf("%-65s: ", "Time elapsed (in jiffies)"); 4253 jiffies1 = cptr1->cpu_data->timestamp; 4254 printf("%11llu", jiffies1); 4255 if (head2) { 4256 cptr2 = list_first_entry(head2, struct schedstat_cpu, cpu_list); 4257 jiffies2 = cptr2->cpu_data->timestamp; 4258 printf(",%11llu", jiffies2); 4259 } 4260 printf("\n"); 4261 4262 ret = get_all_cpu_stats(head1); 4263 if (cptr2) { 4264 ret = get_all_cpu_stats(head2); 4265 cptr2 = list_first_entry(head2, struct schedstat_cpu, cpu_list); 4266 } 4267 4268 list_for_each_entry(cptr1, head1, cpu_list) { 4269 struct cpu_domain_map *cd_info1 = NULL, *cd_info2 = NULL; 4270 4271 cs1 = cptr1->cpu_data; 4272 cd_info1 = cd_map1[cs1->cpu]; 4273 if (cptr2) { 4274 cs2 = cptr2->cpu_data; 4275 cd_info2 = cd_map2[cs2->cpu]; 4276 dptr2 = list_first_entry(&cptr2->domain_head, struct schedstat_domain, 4277 domain_list); 4278 } 4279 4280 if (cs2 && cs1->cpu != cs2->cpu) { 4281 pr_err("Failed because matching cpus not found for diff\n"); 4282 return -1; 4283 } 4284 4285 if (cd_info2 && cd_info1->nr_domains != cd_info2->nr_domains) { 4286 pr_err("Failed because nr_domains is not same for cpus\n"); 4287 return -1; 4288 } 4289 4290 print_separator2(SEP_LEN, "", 0); 4291 4292 if (is_summary) 4293 printf("CPU: <ALL CPUS SUMMARY>\n"); 4294 else 4295 printf("CPU: %d\n", cs1->cpu); 4296 4297 print_separator2(SEP_LEN, "", 0); 4298 print_cpu_stats(cs1, cs2); 4299 print_separator2(SEP_LEN, "", 0); 4300 4301 list_for_each_entry(dptr1, &cptr1->domain_head, domain_list) { 4302 struct domain_info *dinfo1 = NULL, *dinfo2 = NULL; 4303 4304 ds1 = dptr1->domain_data; 4305 dinfo1 = cd_info1->domains[ds1->domain]; 4306 if (dptr2) { 4307 ds2 = dptr2->domain_data; 4308 dinfo2 = cd_info2->domains[ds2->domain]; 4309 } 4310 4311 if (dinfo2 && dinfo1->domain != dinfo2->domain) { 4312 pr_err("Failed because matching domain not found for diff\n"); 4313 return -1; 4314 } 4315 4316 if (is_summary) { 4317 if (dinfo1->dname) 4318 printf("CPU: <ALL CPUS SUMMARY> | DOMAIN: %s\n", 4319 dinfo1->dname); 4320 else 4321 printf("CPU: <ALL CPUS SUMMARY> | DOMAIN: %d\n", 4322 dinfo1->domain); 4323 } else { 4324 if (dinfo1->dname) 4325 printf("CPU: %d | DOMAIN: %s | DOMAIN_CPUS: ", 4326 cs1->cpu, dinfo1->dname); 4327 else 4328 printf("CPU: %d | DOMAIN: %d | DOMAIN_CPUS: ", 4329 cs1->cpu, dinfo1->domain); 4330 4331 printf("%s\n", dinfo1->cpulist); 4332 } 4333 print_separator2(SEP_LEN, "", 0); 4334 print_domain_stats(ds1, ds2, jiffies1, jiffies2); 4335 print_separator2(SEP_LEN, "", 0); 4336 4337 if (dptr2) 4338 dptr2 = list_next_entry(dptr2, domain_list); 4339 } 4340 if (summary_only) 4341 break; 4342 4343 if (cptr2) 4344 cptr2 = list_next_entry(cptr2, cpu_list); 4345 4346 is_summary = false; 4347 } 4348 return ret; 4349 } 4350 4351 /* 4352 * Creates a linked list of cpu_data and domain_data. Below represents the structure of the linked 4353 * list where CPU0,CPU1,CPU2, ..., CPU(N-1) stores the cpu_data. Here N is the total number of cpus. 4354 * Each of the CPU points to the list of domain_data. Here DOMAIN0, DOMAIN1, DOMAIN2, ... represents 4355 * the domain_data. Here D0, D1, D2, ..., Dm are the number of domains in the respective cpus. 4356 * 4357 * +----------+ 4358 * | CPU_HEAD | 4359 * +----------+ 4360 * | 4361 * v 4362 * +----------+ +---------+ +---------+ +---------+ +--------------+ 4363 * | CPU0 | -> | DOMAIN0 | -> | DOMAIN1 | -> | DOMAIN2 | -> ... -> | DOMAIN(D0-1) | 4364 * +----------+ +---------+ +---------+ +---------+ +--------------+ 4365 * | 4366 * v 4367 * +----------+ +---------+ +---------+ +---------+ +--------------+ 4368 * | CPU1 | -> | DOMAIN0 | -> | DOMAIN1 | -> | DOMAIN2 | -> ... -> | DOMAIN(D1-1) | 4369 * +----------+ +---------+ +---------+ +---------+ +--------------+ 4370 * | 4371 * v 4372 * +----------+ +---------+ +---------+ +---------+ +--------------+ 4373 * | CPU2 | -> | DOMAIN0 | -> | DOMAIN1 | -> | DOMAIN2 | -> ... -> | DOMAIN(D2-1) | 4374 * +----------+ +---------+ +---------+ +---------+ +--------------+ 4375 * | 4376 * v 4377 * ... 4378 * | 4379 * v 4380 * +----------+ +---------+ +---------+ +---------+ +--------------+ 4381 * | CPU(N-1) | -> | DOMAIN0 | -> | DOMAIN1 | -> | DOMAIN2 | -> ... -> | DOMAIN(Dm-1) | 4382 * +----------+ +---------+ +---------+ +---------+ +--------------+ 4383 * 4384 * Each cpu as well as domain has 2 enties in the event list one before the workload starts and 4385 * other after completion of the workload. The above linked list stores the diff of the cpu and 4386 * domain statistics. 4387 */ 4388 static int perf_sched__process_schedstat(const struct perf_tool *tool __maybe_unused, 4389 struct perf_session *session __maybe_unused, 4390 union perf_event *event) 4391 { 4392 struct perf_cpu this_cpu; 4393 static __u32 initial_cpu; 4394 4395 switch (event->header.type) { 4396 case PERF_RECORD_SCHEDSTAT_CPU: 4397 this_cpu.cpu = event->schedstat_cpu.cpu; 4398 break; 4399 case PERF_RECORD_SCHEDSTAT_DOMAIN: 4400 this_cpu.cpu = event->schedstat_domain.cpu; 4401 break; 4402 default: 4403 return 0; 4404 } 4405 4406 if (user_requested_cpus && !perf_cpu_map__has(user_requested_cpus, this_cpu)) 4407 return 0; 4408 4409 if (event->header.type == PERF_RECORD_SCHEDSTAT_CPU) { 4410 struct schedstat_cpu *temp = zalloc(sizeof(*temp)); 4411 4412 if (!temp) 4413 return -ENOMEM; 4414 4415 temp->cpu_data = zalloc(sizeof(*temp->cpu_data)); 4416 if (!temp->cpu_data) 4417 return -ENOMEM; 4418 4419 memcpy(temp->cpu_data, &event->schedstat_cpu, sizeof(*temp->cpu_data)); 4420 4421 if (!list_empty(&cpu_head) && temp->cpu_data->cpu == initial_cpu) 4422 after_workload_flag = true; 4423 4424 if (!after_workload_flag) { 4425 if (list_empty(&cpu_head)) 4426 initial_cpu = temp->cpu_data->cpu; 4427 4428 list_add_tail(&temp->cpu_list, &cpu_head); 4429 INIT_LIST_HEAD(&temp->domain_head); 4430 } else { 4431 if (temp->cpu_data->cpu == initial_cpu) { 4432 cpu_second_pass = list_first_entry(&cpu_head, struct schedstat_cpu, 4433 cpu_list); 4434 cpu_second_pass->cpu_data->timestamp = 4435 temp->cpu_data->timestamp - cpu_second_pass->cpu_data->timestamp; 4436 } else { 4437 cpu_second_pass = list_next_entry(cpu_second_pass, cpu_list); 4438 } 4439 domain_second_pass = list_first_entry(&cpu_second_pass->domain_head, 4440 struct schedstat_domain, domain_list); 4441 store_schedstat_cpu_diff(temp); 4442 } 4443 } else if (event->header.type == PERF_RECORD_SCHEDSTAT_DOMAIN) { 4444 struct schedstat_cpu *cpu_tail; 4445 struct schedstat_domain *temp = zalloc(sizeof(*temp)); 4446 4447 if (!temp) 4448 return -ENOMEM; 4449 4450 temp->domain_data = zalloc(sizeof(*temp->domain_data)); 4451 if (!temp->domain_data) 4452 return -ENOMEM; 4453 4454 memcpy(temp->domain_data, &event->schedstat_domain, sizeof(*temp->domain_data)); 4455 4456 if (!after_workload_flag) { 4457 cpu_tail = list_last_entry(&cpu_head, struct schedstat_cpu, cpu_list); 4458 list_add_tail(&temp->domain_list, &cpu_tail->domain_head); 4459 } else { 4460 store_schedstat_domain_diff(temp); 4461 domain_second_pass = list_next_entry(domain_second_pass, domain_list); 4462 } 4463 } 4464 4465 return 0; 4466 } 4467 4468 static void free_schedstat(struct list_head *head) 4469 { 4470 struct schedstat_domain *dptr, *n1; 4471 struct schedstat_cpu *cptr, *n2; 4472 4473 list_for_each_entry_safe(cptr, n2, head, cpu_list) { 4474 list_for_each_entry_safe(dptr, n1, &cptr->domain_head, domain_list) { 4475 list_del_init(&dptr->domain_list); 4476 free(dptr); 4477 } 4478 list_del_init(&cptr->cpu_list); 4479 free(cptr); 4480 } 4481 } 4482 4483 static int perf_sched__schedstat_report(struct perf_sched *sched) 4484 { 4485 struct cpu_domain_map **cd_map; 4486 struct perf_session *session; 4487 struct target target = {}; 4488 struct perf_data data = { 4489 .path = input_name, 4490 .mode = PERF_DATA_MODE_READ, 4491 }; 4492 int err = 0; 4493 4494 sched->tool.schedstat_cpu = perf_sched__process_schedstat; 4495 sched->tool.schedstat_domain = perf_sched__process_schedstat; 4496 4497 session = perf_session__new(&data, &sched->tool); 4498 if (IS_ERR(session)) { 4499 pr_err("Perf session creation failed.\n"); 4500 return PTR_ERR(session); 4501 } 4502 4503 if (cpu_list) 4504 target.cpu_list = cpu_list; 4505 else 4506 target.system_wide = true; 4507 4508 err = evlist__create_maps(session->evlist, &target); 4509 if (err < 0) 4510 goto out; 4511 4512 user_requested_cpus = session->evlist->core.user_requested_cpus; 4513 4514 err = perf_session__process_events(session); 4515 4516 if (!err) { 4517 setup_pager(); 4518 4519 if (list_empty(&cpu_head)) { 4520 pr_err("Data is not available\n"); 4521 err = -1; 4522 goto out; 4523 } 4524 4525 cd_map = session->header.env.cpu_domain; 4526 err = show_schedstat_data(&cpu_head, cd_map, NULL, NULL, false); 4527 } 4528 4529 out: 4530 free_schedstat(&cpu_head); 4531 perf_session__delete(session); 4532 return err; 4533 } 4534 4535 static int perf_sched__schedstat_diff(struct perf_sched *sched, 4536 int argc, const char **argv) 4537 { 4538 struct cpu_domain_map **cd_map0 = NULL, **cd_map1 = NULL; 4539 struct list_head cpu_head_ses0, cpu_head_ses1; 4540 struct perf_session *session[2]; 4541 struct perf_data data[2]; 4542 int ret = 0, err = 0; 4543 static const char *defaults[] = { 4544 "perf.data.old", 4545 "perf.data", 4546 }; 4547 4548 if (argc) { 4549 if (argc == 1) 4550 defaults[1] = argv[0]; 4551 else if (argc == 2) { 4552 defaults[0] = argv[0]; 4553 defaults[1] = argv[1]; 4554 } else { 4555 pr_err("perf sched stats diff is not supported with more than 2 files.\n"); 4556 goto out_ret; 4557 } 4558 } 4559 4560 INIT_LIST_HEAD(&cpu_head_ses0); 4561 INIT_LIST_HEAD(&cpu_head_ses1); 4562 4563 sched->tool.schedstat_cpu = perf_sched__process_schedstat; 4564 sched->tool.schedstat_domain = perf_sched__process_schedstat; 4565 4566 data[0].path = defaults[0]; 4567 data[0].mode = PERF_DATA_MODE_READ; 4568 session[0] = perf_session__new(&data[0], &sched->tool); 4569 if (IS_ERR(session[0])) { 4570 ret = PTR_ERR(session[0]); 4571 pr_err("Failed to open %s\n", data[0].path); 4572 goto out_delete_ses0; 4573 } 4574 4575 err = perf_session__process_events(session[0]); 4576 if (err) 4577 goto out_delete_ses0; 4578 4579 cd_map0 = session[0]->header.env.cpu_domain; 4580 list_replace_init(&cpu_head, &cpu_head_ses0); 4581 after_workload_flag = false; 4582 4583 data[1].path = defaults[1]; 4584 data[1].mode = PERF_DATA_MODE_READ; 4585 session[1] = perf_session__new(&data[1], &sched->tool); 4586 if (IS_ERR(session[1])) { 4587 ret = PTR_ERR(session[1]); 4588 pr_err("Failed to open %s\n", data[1].path); 4589 goto out_delete_ses1; 4590 } 4591 4592 err = perf_session__process_events(session[1]); 4593 if (err) 4594 goto out_delete_ses1; 4595 4596 cd_map1 = session[1]->header.env.cpu_domain; 4597 list_replace_init(&cpu_head, &cpu_head_ses1); 4598 after_workload_flag = false; 4599 setup_pager(); 4600 4601 if (list_empty(&cpu_head_ses1)) { 4602 pr_err("Data is not available\n"); 4603 ret = -1; 4604 goto out_delete_ses1; 4605 } 4606 4607 if (list_empty(&cpu_head_ses0)) { 4608 pr_err("Data is not available\n"); 4609 ret = -1; 4610 goto out_delete_ses0; 4611 } 4612 4613 show_schedstat_data(&cpu_head_ses0, cd_map0, &cpu_head_ses1, cd_map1, true); 4614 4615 out_delete_ses1: 4616 free_schedstat(&cpu_head_ses1); 4617 if (!IS_ERR(session[1])) 4618 perf_session__delete(session[1]); 4619 4620 out_delete_ses0: 4621 free_schedstat(&cpu_head_ses0); 4622 if (!IS_ERR(session[0])) 4623 perf_session__delete(session[0]); 4624 4625 out_ret: 4626 return ret; 4627 } 4628 4629 static int process_synthesized_event_live(const struct perf_tool *tool __maybe_unused, 4630 union perf_event *event, 4631 struct perf_sample *sample __maybe_unused, 4632 struct machine *machine __maybe_unused) 4633 { 4634 return perf_sched__process_schedstat(tool, NULL, event); 4635 } 4636 4637 static int perf_sched__schedstat_live(struct perf_sched *sched, 4638 int argc, const char **argv) 4639 { 4640 struct cpu_domain_map **cd_map = NULL; 4641 struct target target = {}; 4642 u32 __maybe_unused md; 4643 struct evlist *evlist; 4644 u32 nr = 0, sv; 4645 int reset = 0; 4646 int err = 0; 4647 4648 signal(SIGINT, sighandler); 4649 signal(SIGCHLD, sighandler); 4650 signal(SIGTERM, sighandler); 4651 4652 evlist = evlist__new(); 4653 if (!evlist) 4654 return -ENOMEM; 4655 4656 /* 4657 * `perf sched schedstat` does not support workload profiling (-p pid) 4658 * since /proc/schedstat file contains cpu specific data only. Hence, a 4659 * profile target is either set of cpus or systemwide, never a process. 4660 * Note that, although `-- <workload>` is supported, profile data are 4661 * still cpu/systemwide. 4662 */ 4663 if (cpu_list) 4664 target.cpu_list = cpu_list; 4665 else 4666 target.system_wide = true; 4667 4668 if (argc) { 4669 err = evlist__prepare_workload(evlist, &target, argv, false, NULL); 4670 if (err) 4671 goto out; 4672 } 4673 4674 err = evlist__create_maps(evlist, &target); 4675 if (err < 0) 4676 goto out; 4677 4678 user_requested_cpus = evlist->core.user_requested_cpus; 4679 4680 err = perf_event__synthesize_schedstat(&(sched->tool), 4681 process_synthesized_event_live, 4682 user_requested_cpus); 4683 if (err < 0) 4684 goto out; 4685 4686 err = enable_sched_schedstats(&reset); 4687 if (err < 0) 4688 goto out; 4689 4690 if (argc) 4691 evlist__start_workload(evlist); 4692 4693 /* wait for signal */ 4694 pause(); 4695 4696 if (reset) { 4697 err = disable_sched_schedstat(); 4698 if (err < 0) 4699 goto out; 4700 } 4701 4702 err = perf_event__synthesize_schedstat(&(sched->tool), 4703 process_synthesized_event_live, 4704 user_requested_cpus); 4705 if (err) 4706 goto out; 4707 4708 setup_pager(); 4709 4710 if (list_empty(&cpu_head)) { 4711 pr_err("Data is not available\n"); 4712 err = -1; 4713 goto out; 4714 } 4715 4716 nr = cpu__max_present_cpu().cpu; 4717 cd_map = build_cpu_domain_map(&sv, &md, nr); 4718 if (!cd_map) { 4719 pr_err("Unable to generate cpu-domain relation info"); 4720 goto out; 4721 } 4722 4723 show_schedstat_data(&cpu_head, cd_map, NULL, NULL, false); 4724 free_cpu_domain_info(cd_map, sv, nr); 4725 out: 4726 free_schedstat(&cpu_head); 4727 evlist__delete(evlist); 4728 return err; 4729 } 4730 4731 static bool schedstat_events_exposed(void) 4732 { 4733 /* 4734 * Select "sched:sched_stat_wait" event to check 4735 * whether schedstat tracepoints are exposed. 4736 */ 4737 return IS_ERR(trace_event__tp_format("sched", "sched_stat_wait")) ? 4738 false : true; 4739 } 4740 4741 static int __cmd_record(int argc, const char **argv) 4742 { 4743 unsigned int rec_argc, i, j; 4744 char **rec_argv; 4745 const char **rec_argv_copy; 4746 const char * const record_args[] = { 4747 "record", 4748 "-a", 4749 "-R", 4750 "-m", "1024", 4751 "-c", "1", 4752 "-e", "sched:sched_switch", 4753 "-e", "sched:sched_stat_runtime", 4754 "-e", "sched:sched_process_fork", 4755 "-e", "sched:sched_wakeup_new", 4756 "-e", "sched:sched_migrate_task", 4757 }; 4758 4759 /* 4760 * The tracepoints trace_sched_stat_{wait, sleep, iowait} 4761 * are not exposed to user if CONFIG_SCHEDSTATS is not set, 4762 * to prevent "perf sched record" execution failure, determine 4763 * whether to record schedstat events according to actual situation. 4764 */ 4765 const char * const schedstat_args[] = { 4766 "-e", "sched:sched_stat_wait", 4767 "-e", "sched:sched_stat_sleep", 4768 "-e", "sched:sched_stat_iowait", 4769 }; 4770 unsigned int schedstat_argc = schedstat_events_exposed() ? 4771 ARRAY_SIZE(schedstat_args) : 0; 4772 4773 struct tep_event *waking_event; 4774 int ret; 4775 4776 /* 4777 * +2 for either "-e", "sched:sched_wakeup" or 4778 * "-e", "sched:sched_waking" 4779 */ 4780 rec_argc = ARRAY_SIZE(record_args) + 2 + schedstat_argc + argc - 1; 4781 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 4782 if (rec_argv == NULL) 4783 return -ENOMEM; 4784 rec_argv_copy = calloc(rec_argc + 1, sizeof(char *)); 4785 if (rec_argv_copy == NULL) { 4786 free(rec_argv); 4787 return -ENOMEM; 4788 } 4789 4790 for (i = 0; i < ARRAY_SIZE(record_args); i++) 4791 rec_argv[i] = strdup(record_args[i]); 4792 4793 rec_argv[i++] = strdup("-e"); 4794 waking_event = trace_event__tp_format("sched", "sched_waking"); 4795 if (!IS_ERR(waking_event)) 4796 rec_argv[i++] = strdup("sched:sched_waking"); 4797 else 4798 rec_argv[i++] = strdup("sched:sched_wakeup"); 4799 4800 for (j = 0; j < schedstat_argc; j++) 4801 rec_argv[i++] = strdup(schedstat_args[j]); 4802 4803 for (j = 1; j < (unsigned int)argc; j++, i++) 4804 rec_argv[i] = strdup(argv[j]); 4805 4806 BUG_ON(i != rec_argc); 4807 4808 memcpy(rec_argv_copy, rec_argv, sizeof(char *) * rec_argc); 4809 ret = cmd_record(rec_argc, rec_argv_copy); 4810 4811 for (i = 0; i < rec_argc; i++) 4812 free(rec_argv[i]); 4813 free(rec_argv); 4814 free(rec_argv_copy); 4815 4816 return ret; 4817 } 4818 4819 int cmd_sched(int argc, const char **argv) 4820 { 4821 static const char default_sort_order[] = "avg, max, switch, runtime"; 4822 struct perf_sched sched = { 4823 .cmp_pid = LIST_HEAD_INIT(sched.cmp_pid), 4824 .sort_list = LIST_HEAD_INIT(sched.sort_list), 4825 .sort_order = default_sort_order, 4826 .replay_repeat = 10, 4827 .profile_cpu = -1, 4828 .next_shortname1 = 'A', 4829 .next_shortname2 = '0', 4830 .skip_merge = 0, 4831 .show_callchain = 1, 4832 .max_stack = 5, 4833 }; 4834 const struct option sched_options[] = { 4835 OPT_STRING('i', "input", &input_name, "file", 4836 "input file name"), 4837 OPT_INCR('v', "verbose", &verbose, 4838 "be more verbose (show symbol address, etc)"), 4839 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 4840 "dump raw trace in ASCII"), 4841 OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"), 4842 OPT_END() 4843 }; 4844 const struct option latency_options[] = { 4845 OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]", 4846 "sort by key(s): runtime, switch, avg, max"), 4847 OPT_INTEGER('C', "CPU", &sched.profile_cpu, 4848 "CPU to profile on"), 4849 OPT_BOOLEAN('p', "pids", &sched.skip_merge, 4850 "latency stats per pid instead of per comm"), 4851 OPT_PARENT(sched_options) 4852 }; 4853 const struct option replay_options[] = { 4854 OPT_UINTEGER('r', "repeat", &sched.replay_repeat, 4855 "repeat the workload replay N times (0: infinite)"), 4856 OPT_PARENT(sched_options) 4857 }; 4858 const struct option map_options[] = { 4859 OPT_BOOLEAN(0, "compact", &sched.map.comp, 4860 "map output in compact mode"), 4861 OPT_STRING(0, "color-pids", &sched.map.color_pids_str, "pids", 4862 "highlight given pids in map"), 4863 OPT_STRING(0, "color-cpus", &sched.map.color_cpus_str, "cpus", 4864 "highlight given CPUs in map"), 4865 OPT_STRING(0, "cpus", &sched.map.cpus_str, "cpus", 4866 "display given CPUs in map"), 4867 OPT_STRING(0, "task-name", &sched.map.task_name, "task", 4868 "map output only for the given task name(s)."), 4869 OPT_BOOLEAN(0, "fuzzy-name", &sched.map.fuzzy, 4870 "given command name can be partially matched (fuzzy matching)"), 4871 OPT_PARENT(sched_options) 4872 }; 4873 const struct option timehist_options[] = { 4874 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 4875 "file", "vmlinux pathname"), 4876 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, 4877 "file", "kallsyms pathname"), 4878 OPT_BOOLEAN('g', "call-graph", &sched.show_callchain, 4879 "Display call chains if present (default on)"), 4880 OPT_UINTEGER(0, "max-stack", &sched.max_stack, 4881 "Maximum number of functions to display backtrace."), 4882 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", 4883 "Look for files with symbols relative to this directory"), 4884 OPT_BOOLEAN('s', "summary", &sched.summary_only, 4885 "Show only syscall summary with statistics"), 4886 OPT_BOOLEAN('S', "with-summary", &sched.summary, 4887 "Show all syscalls and summary with statistics"), 4888 OPT_BOOLEAN('w', "wakeups", &sched.show_wakeups, "Show wakeup events"), 4889 OPT_BOOLEAN('n', "next", &sched.show_next, "Show next task"), 4890 OPT_BOOLEAN('M', "migrations", &sched.show_migrations, "Show migration events"), 4891 OPT_BOOLEAN('V', "cpu-visual", &sched.show_cpu_visual, "Add CPU visual"), 4892 OPT_BOOLEAN('I', "idle-hist", &sched.idle_hist, "Show idle events only"), 4893 OPT_STRING(0, "time", &sched.time_str, "str", 4894 "Time span for analysis (start,stop)"), 4895 OPT_BOOLEAN(0, "state", &sched.show_state, "Show task state when sched-out"), 4896 OPT_STRING('p', "pid", &symbol_conf.pid_list_str, "pid[,pid...]", 4897 "analyze events only for given process id(s)"), 4898 OPT_STRING('t', "tid", &symbol_conf.tid_list_str, "tid[,tid...]", 4899 "analyze events only for given thread id(s)"), 4900 OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"), 4901 OPT_BOOLEAN(0, "show-prio", &sched.show_prio, "Show task priority"), 4902 OPT_STRING(0, "prio", &sched.prio_str, "prio", 4903 "analyze events only for given task priority(ies)"), 4904 OPT_BOOLEAN('P', "pre-migrations", &sched.pre_migrations, "Show pre-migration wait time"), 4905 OPT_PARENT(sched_options) 4906 }; 4907 const struct option stats_options[] = { 4908 OPT_STRING('i', "input", &input_name, "file", 4909 "`stats report` with input filename"), 4910 OPT_STRING('o', "output", &output_name, "file", 4911 "`stats record` with output filename"), 4912 OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"), 4913 OPT_BOOLEAN('v', "verbose", &verbose_field, "Show explanation for fields in the report"), 4914 OPT_END() 4915 }; 4916 4917 const char * const latency_usage[] = { 4918 "perf sched latency [<options>]", 4919 NULL 4920 }; 4921 const char * const replay_usage[] = { 4922 "perf sched replay [<options>]", 4923 NULL 4924 }; 4925 const char * const map_usage[] = { 4926 "perf sched map [<options>]", 4927 NULL 4928 }; 4929 const char * const timehist_usage[] = { 4930 "perf sched timehist [<options>]", 4931 NULL 4932 }; 4933 const char *stats_usage[] = { 4934 "perf sched stats {record|report} [<options>]", 4935 NULL 4936 }; 4937 const char *const sched_subcommands[] = { "record", "latency", "map", 4938 "replay", "script", 4939 "timehist", "stats", NULL }; 4940 const char *sched_usage[] = { 4941 NULL, 4942 NULL 4943 }; 4944 struct trace_sched_handler lat_ops = { 4945 .wakeup_event = latency_wakeup_event, 4946 .switch_event = latency_switch_event, 4947 .runtime_event = latency_runtime_event, 4948 .migrate_task_event = latency_migrate_task_event, 4949 }; 4950 struct trace_sched_handler map_ops = { 4951 .switch_event = map_switch_event, 4952 }; 4953 struct trace_sched_handler replay_ops = { 4954 .wakeup_event = replay_wakeup_event, 4955 .switch_event = replay_switch_event, 4956 .fork_event = replay_fork_event, 4957 }; 4958 int ret; 4959 4960 perf_tool__init(&sched.tool, /*ordered_events=*/true); 4961 sched.tool.sample = perf_sched__process_tracepoint_sample; 4962 sched.tool.comm = perf_sched__process_comm; 4963 sched.tool.namespaces = perf_event__process_namespaces; 4964 sched.tool.lost = perf_event__process_lost; 4965 sched.tool.fork = perf_sched__process_fork_event; 4966 4967 argc = parse_options_subcommand(argc, argv, sched_options, sched_subcommands, 4968 sched_usage, PARSE_OPT_STOP_AT_NON_OPTION); 4969 if (!argc) 4970 usage_with_options(sched_usage, sched_options); 4971 4972 thread__set_priv_destructor(free); 4973 4974 /* 4975 * Aliased to 'perf script' for now: 4976 */ 4977 if (!strcmp(argv[0], "script")) { 4978 ret = cmd_script(argc, argv); 4979 } else if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) { 4980 ret = __cmd_record(argc, argv); 4981 } else if (strlen(argv[0]) > 2 && strstarts("latency", argv[0])) { 4982 sched.tp_handler = &lat_ops; 4983 if (argc > 1) { 4984 argc = parse_options(argc, argv, latency_options, latency_usage, 0); 4985 if (argc) 4986 usage_with_options(latency_usage, latency_options); 4987 } 4988 setup_sorting(&sched, latency_options, latency_usage); 4989 ret = perf_sched__lat(&sched); 4990 } else if (!strcmp(argv[0], "map")) { 4991 if (argc) { 4992 argc = parse_options(argc, argv, map_options, map_usage, 0); 4993 if (argc) 4994 usage_with_options(map_usage, map_options); 4995 4996 if (sched.map.task_name) { 4997 sched.map.task_names = strlist__new(sched.map.task_name, NULL); 4998 if (sched.map.task_names == NULL) { 4999 fprintf(stderr, "Failed to parse task names\n"); 5000 ret = -1; 5001 goto out; 5002 } 5003 } 5004 } 5005 sched.tp_handler = &map_ops; 5006 setup_sorting(&sched, latency_options, latency_usage); 5007 ret = perf_sched__map(&sched); 5008 } else if (strlen(argv[0]) > 2 && strstarts("replay", argv[0])) { 5009 sched.tp_handler = &replay_ops; 5010 if (argc) { 5011 argc = parse_options(argc, argv, replay_options, replay_usage, 0); 5012 if (argc) 5013 usage_with_options(replay_usage, replay_options); 5014 } 5015 ret = perf_sched__replay(&sched); 5016 } else if (!strcmp(argv[0], "timehist")) { 5017 if (argc) { 5018 argc = parse_options(argc, argv, timehist_options, 5019 timehist_usage, 0); 5020 if (argc) 5021 usage_with_options(timehist_usage, timehist_options); 5022 } 5023 if ((sched.show_wakeups || sched.show_next) && 5024 sched.summary_only) { 5025 pr_err(" Error: -s and -[n|w] are mutually exclusive.\n"); 5026 parse_options_usage(timehist_usage, timehist_options, "s", true); 5027 if (sched.show_wakeups) 5028 parse_options_usage(NULL, timehist_options, "w", true); 5029 if (sched.show_next) 5030 parse_options_usage(NULL, timehist_options, "n", true); 5031 ret = -EINVAL; 5032 goto out; 5033 } 5034 ret = symbol__validate_sym_arguments(); 5035 if (!ret) 5036 ret = perf_sched__timehist(&sched); 5037 } else if (!strcmp(argv[0], "stats")) { 5038 const char *const stats_subcommands[] = {"record", "report", NULL}; 5039 5040 argc = parse_options_subcommand(argc, argv, stats_options, 5041 stats_subcommands, 5042 stats_usage, 5043 PARSE_OPT_STOP_AT_NON_OPTION); 5044 5045 if (argv[0] && !strcmp(argv[0], "record")) { 5046 if (argc) 5047 argc = parse_options(argc, argv, stats_options, 5048 stats_usage, 0); 5049 return perf_sched__schedstat_record(&sched, argc, argv); 5050 } else if (argv[0] && !strcmp(argv[0], "report")) { 5051 if (argc) 5052 argc = parse_options(argc, argv, stats_options, 5053 stats_usage, 0); 5054 return perf_sched__schedstat_report(&sched); 5055 } else if (argv[0] && !strcmp(argv[0], "diff")) { 5056 if (argc) 5057 argc = parse_options(argc, argv, stats_options, 5058 stats_usage, 0); 5059 return perf_sched__schedstat_diff(&sched, argc, argv); 5060 } 5061 return perf_sched__schedstat_live(&sched, argc, argv); 5062 } else { 5063 usage_with_options(sched_usage, sched_options); 5064 } 5065 5066 out: 5067 /* free usage string allocated by parse_options_subcommand */ 5068 free((void *)sched_usage[0]); 5069 5070 return ret; 5071 } 5072