1 #include "builtin.h" 2 #include "perf.h" 3 4 #include "util/util.h" 5 #include "util/evlist.h" 6 #include "util/cache.h" 7 #include "util/evsel.h" 8 #include "util/symbol.h" 9 #include "util/thread.h" 10 #include "util/header.h" 11 #include "util/session.h" 12 #include "util/tool.h" 13 #include "util/cloexec.h" 14 #include "util/thread_map.h" 15 #include "util/color.h" 16 #include "util/stat.h" 17 #include "util/callchain.h" 18 #include "util/time-utils.h" 19 20 #include <subcmd/parse-options.h> 21 #include "util/trace-event.h" 22 23 #include "util/debug.h" 24 25 #include <linux/log2.h> 26 #include <sys/prctl.h> 27 #include <sys/resource.h> 28 29 #include <semaphore.h> 30 #include <pthread.h> 31 #include <math.h> 32 #include <api/fs/fs.h> 33 #include <linux/time64.h> 34 35 #define PR_SET_NAME 15 /* Set process name */ 36 #define MAX_CPUS 4096 37 #define COMM_LEN 20 38 #define SYM_LEN 129 39 #define MAX_PID 1024000 40 41 struct sched_atom; 42 43 struct task_desc { 44 unsigned long nr; 45 unsigned long pid; 46 char comm[COMM_LEN]; 47 48 unsigned long nr_events; 49 unsigned long curr_event; 50 struct sched_atom **atoms; 51 52 pthread_t thread; 53 sem_t sleep_sem; 54 55 sem_t ready_for_work; 56 sem_t work_done_sem; 57 58 u64 cpu_usage; 59 }; 60 61 enum sched_event_type { 62 SCHED_EVENT_RUN, 63 SCHED_EVENT_SLEEP, 64 SCHED_EVENT_WAKEUP, 65 SCHED_EVENT_MIGRATION, 66 }; 67 68 struct sched_atom { 69 enum sched_event_type type; 70 int specific_wait; 71 u64 timestamp; 72 u64 duration; 73 unsigned long nr; 74 sem_t *wait_sem; 75 struct task_desc *wakee; 76 }; 77 78 #define TASK_STATE_TO_CHAR_STR "RSDTtZXxKWP" 79 80 enum thread_state { 81 THREAD_SLEEPING = 0, 82 THREAD_WAIT_CPU, 83 THREAD_SCHED_IN, 84 THREAD_IGNORE 85 }; 86 87 struct work_atom { 88 struct list_head list; 89 enum thread_state state; 90 u64 sched_out_time; 91 u64 wake_up_time; 92 u64 sched_in_time; 93 u64 runtime; 94 }; 95 96 struct work_atoms { 97 struct list_head work_list; 98 struct thread *thread; 99 struct rb_node node; 100 u64 max_lat; 101 u64 max_lat_at; 102 u64 total_lat; 103 u64 nb_atoms; 104 u64 total_runtime; 105 int num_merged; 106 }; 107 108 typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *); 109 110 struct perf_sched; 111 112 struct trace_sched_handler { 113 int (*switch_event)(struct perf_sched *sched, struct perf_evsel *evsel, 114 struct perf_sample *sample, struct machine *machine); 115 116 int (*runtime_event)(struct perf_sched *sched, struct perf_evsel *evsel, 117 struct perf_sample *sample, struct machine *machine); 118 119 int (*wakeup_event)(struct perf_sched *sched, struct perf_evsel *evsel, 120 struct perf_sample *sample, struct machine *machine); 121 122 /* PERF_RECORD_FORK event, not sched_process_fork tracepoint */ 123 int (*fork_event)(struct perf_sched *sched, union perf_event *event, 124 struct machine *machine); 125 126 int (*migrate_task_event)(struct perf_sched *sched, 127 struct perf_evsel *evsel, 128 struct perf_sample *sample, 129 struct machine *machine); 130 }; 131 132 #define COLOR_PIDS PERF_COLOR_BLUE 133 #define COLOR_CPUS PERF_COLOR_BG_RED 134 135 struct perf_sched_map { 136 DECLARE_BITMAP(comp_cpus_mask, MAX_CPUS); 137 int *comp_cpus; 138 bool comp; 139 struct thread_map *color_pids; 140 const char *color_pids_str; 141 struct cpu_map *color_cpus; 142 const char *color_cpus_str; 143 struct cpu_map *cpus; 144 const char *cpus_str; 145 }; 146 147 struct perf_sched { 148 struct perf_tool tool; 149 const char *sort_order; 150 unsigned long nr_tasks; 151 struct task_desc **pid_to_task; 152 struct task_desc **tasks; 153 const struct trace_sched_handler *tp_handler; 154 pthread_mutex_t start_work_mutex; 155 pthread_mutex_t work_done_wait_mutex; 156 int profile_cpu; 157 /* 158 * Track the current task - that way we can know whether there's any 159 * weird events, such as a task being switched away that is not current. 160 */ 161 int max_cpu; 162 u32 curr_pid[MAX_CPUS]; 163 struct thread *curr_thread[MAX_CPUS]; 164 char next_shortname1; 165 char next_shortname2; 166 unsigned int replay_repeat; 167 unsigned long nr_run_events; 168 unsigned long nr_sleep_events; 169 unsigned long nr_wakeup_events; 170 unsigned long nr_sleep_corrections; 171 unsigned long nr_run_events_optimized; 172 unsigned long targetless_wakeups; 173 unsigned long multitarget_wakeups; 174 unsigned long nr_runs; 175 unsigned long nr_timestamps; 176 unsigned long nr_unordered_timestamps; 177 unsigned long nr_context_switch_bugs; 178 unsigned long nr_events; 179 unsigned long nr_lost_chunks; 180 unsigned long nr_lost_events; 181 u64 run_measurement_overhead; 182 u64 sleep_measurement_overhead; 183 u64 start_time; 184 u64 cpu_usage; 185 u64 runavg_cpu_usage; 186 u64 parent_cpu_usage; 187 u64 runavg_parent_cpu_usage; 188 u64 sum_runtime; 189 u64 sum_fluct; 190 u64 run_avg; 191 u64 all_runtime; 192 u64 all_count; 193 u64 cpu_last_switched[MAX_CPUS]; 194 struct rb_root atom_root, sorted_atom_root, merged_atom_root; 195 struct list_head sort_list, cmp_pid; 196 bool force; 197 bool skip_merge; 198 struct perf_sched_map map; 199 200 /* options for timehist command */ 201 bool summary; 202 bool summary_only; 203 bool show_callchain; 204 unsigned int max_stack; 205 bool show_cpu_visual; 206 bool show_wakeups; 207 bool show_migrations; 208 u64 skipped_samples; 209 const char *time_str; 210 struct perf_time_interval ptime; 211 }; 212 213 /* per thread run time data */ 214 struct thread_runtime { 215 u64 last_time; /* time of previous sched in/out event */ 216 u64 dt_run; /* run time */ 217 u64 dt_wait; /* time between CPU access (off cpu) */ 218 u64 dt_delay; /* time between wakeup and sched-in */ 219 u64 ready_to_run; /* time of wakeup */ 220 221 struct stats run_stats; 222 u64 total_run_time; 223 224 u64 migrations; 225 }; 226 227 /* per event run time data */ 228 struct evsel_runtime { 229 u64 *last_time; /* time this event was last seen per cpu */ 230 u32 ncpu; /* highest cpu slot allocated */ 231 }; 232 233 /* track idle times per cpu */ 234 static struct thread **idle_threads; 235 static int idle_max_cpu; 236 static char idle_comm[] = "<idle>"; 237 238 static u64 get_nsecs(void) 239 { 240 struct timespec ts; 241 242 clock_gettime(CLOCK_MONOTONIC, &ts); 243 244 return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; 245 } 246 247 static void burn_nsecs(struct perf_sched *sched, u64 nsecs) 248 { 249 u64 T0 = get_nsecs(), T1; 250 251 do { 252 T1 = get_nsecs(); 253 } while (T1 + sched->run_measurement_overhead < T0 + nsecs); 254 } 255 256 static void sleep_nsecs(u64 nsecs) 257 { 258 struct timespec ts; 259 260 ts.tv_nsec = nsecs % 999999999; 261 ts.tv_sec = nsecs / 999999999; 262 263 nanosleep(&ts, NULL); 264 } 265 266 static void calibrate_run_measurement_overhead(struct perf_sched *sched) 267 { 268 u64 T0, T1, delta, min_delta = NSEC_PER_SEC; 269 int i; 270 271 for (i = 0; i < 10; i++) { 272 T0 = get_nsecs(); 273 burn_nsecs(sched, 0); 274 T1 = get_nsecs(); 275 delta = T1-T0; 276 min_delta = min(min_delta, delta); 277 } 278 sched->run_measurement_overhead = min_delta; 279 280 printf("run measurement overhead: %" PRIu64 " nsecs\n", min_delta); 281 } 282 283 static void calibrate_sleep_measurement_overhead(struct perf_sched *sched) 284 { 285 u64 T0, T1, delta, min_delta = NSEC_PER_SEC; 286 int i; 287 288 for (i = 0; i < 10; i++) { 289 T0 = get_nsecs(); 290 sleep_nsecs(10000); 291 T1 = get_nsecs(); 292 delta = T1-T0; 293 min_delta = min(min_delta, delta); 294 } 295 min_delta -= 10000; 296 sched->sleep_measurement_overhead = min_delta; 297 298 printf("sleep measurement overhead: %" PRIu64 " nsecs\n", min_delta); 299 } 300 301 static struct sched_atom * 302 get_new_event(struct task_desc *task, u64 timestamp) 303 { 304 struct sched_atom *event = zalloc(sizeof(*event)); 305 unsigned long idx = task->nr_events; 306 size_t size; 307 308 event->timestamp = timestamp; 309 event->nr = idx; 310 311 task->nr_events++; 312 size = sizeof(struct sched_atom *) * task->nr_events; 313 task->atoms = realloc(task->atoms, size); 314 BUG_ON(!task->atoms); 315 316 task->atoms[idx] = event; 317 318 return event; 319 } 320 321 static struct sched_atom *last_event(struct task_desc *task) 322 { 323 if (!task->nr_events) 324 return NULL; 325 326 return task->atoms[task->nr_events - 1]; 327 } 328 329 static void add_sched_event_run(struct perf_sched *sched, struct task_desc *task, 330 u64 timestamp, u64 duration) 331 { 332 struct sched_atom *event, *curr_event = last_event(task); 333 334 /* 335 * optimize an existing RUN event by merging this one 336 * to it: 337 */ 338 if (curr_event && curr_event->type == SCHED_EVENT_RUN) { 339 sched->nr_run_events_optimized++; 340 curr_event->duration += duration; 341 return; 342 } 343 344 event = get_new_event(task, timestamp); 345 346 event->type = SCHED_EVENT_RUN; 347 event->duration = duration; 348 349 sched->nr_run_events++; 350 } 351 352 static void add_sched_event_wakeup(struct perf_sched *sched, struct task_desc *task, 353 u64 timestamp, struct task_desc *wakee) 354 { 355 struct sched_atom *event, *wakee_event; 356 357 event = get_new_event(task, timestamp); 358 event->type = SCHED_EVENT_WAKEUP; 359 event->wakee = wakee; 360 361 wakee_event = last_event(wakee); 362 if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) { 363 sched->targetless_wakeups++; 364 return; 365 } 366 if (wakee_event->wait_sem) { 367 sched->multitarget_wakeups++; 368 return; 369 } 370 371 wakee_event->wait_sem = zalloc(sizeof(*wakee_event->wait_sem)); 372 sem_init(wakee_event->wait_sem, 0, 0); 373 wakee_event->specific_wait = 1; 374 event->wait_sem = wakee_event->wait_sem; 375 376 sched->nr_wakeup_events++; 377 } 378 379 static void add_sched_event_sleep(struct perf_sched *sched, struct task_desc *task, 380 u64 timestamp, u64 task_state __maybe_unused) 381 { 382 struct sched_atom *event = get_new_event(task, timestamp); 383 384 event->type = SCHED_EVENT_SLEEP; 385 386 sched->nr_sleep_events++; 387 } 388 389 static struct task_desc *register_pid(struct perf_sched *sched, 390 unsigned long pid, const char *comm) 391 { 392 struct task_desc *task; 393 static int pid_max; 394 395 if (sched->pid_to_task == NULL) { 396 if (sysctl__read_int("kernel/pid_max", &pid_max) < 0) 397 pid_max = MAX_PID; 398 BUG_ON((sched->pid_to_task = calloc(pid_max, sizeof(struct task_desc *))) == NULL); 399 } 400 if (pid >= (unsigned long)pid_max) { 401 BUG_ON((sched->pid_to_task = realloc(sched->pid_to_task, (pid + 1) * 402 sizeof(struct task_desc *))) == NULL); 403 while (pid >= (unsigned long)pid_max) 404 sched->pid_to_task[pid_max++] = NULL; 405 } 406 407 task = sched->pid_to_task[pid]; 408 409 if (task) 410 return task; 411 412 task = zalloc(sizeof(*task)); 413 task->pid = pid; 414 task->nr = sched->nr_tasks; 415 strcpy(task->comm, comm); 416 /* 417 * every task starts in sleeping state - this gets ignored 418 * if there's no wakeup pointing to this sleep state: 419 */ 420 add_sched_event_sleep(sched, task, 0, 0); 421 422 sched->pid_to_task[pid] = task; 423 sched->nr_tasks++; 424 sched->tasks = realloc(sched->tasks, sched->nr_tasks * sizeof(struct task_desc *)); 425 BUG_ON(!sched->tasks); 426 sched->tasks[task->nr] = task; 427 428 if (verbose) 429 printf("registered task #%ld, PID %ld (%s)\n", sched->nr_tasks, pid, comm); 430 431 return task; 432 } 433 434 435 static void print_task_traces(struct perf_sched *sched) 436 { 437 struct task_desc *task; 438 unsigned long i; 439 440 for (i = 0; i < sched->nr_tasks; i++) { 441 task = sched->tasks[i]; 442 printf("task %6ld (%20s:%10ld), nr_events: %ld\n", 443 task->nr, task->comm, task->pid, task->nr_events); 444 } 445 } 446 447 static void add_cross_task_wakeups(struct perf_sched *sched) 448 { 449 struct task_desc *task1, *task2; 450 unsigned long i, j; 451 452 for (i = 0; i < sched->nr_tasks; i++) { 453 task1 = sched->tasks[i]; 454 j = i + 1; 455 if (j == sched->nr_tasks) 456 j = 0; 457 task2 = sched->tasks[j]; 458 add_sched_event_wakeup(sched, task1, 0, task2); 459 } 460 } 461 462 static void perf_sched__process_event(struct perf_sched *sched, 463 struct sched_atom *atom) 464 { 465 int ret = 0; 466 467 switch (atom->type) { 468 case SCHED_EVENT_RUN: 469 burn_nsecs(sched, atom->duration); 470 break; 471 case SCHED_EVENT_SLEEP: 472 if (atom->wait_sem) 473 ret = sem_wait(atom->wait_sem); 474 BUG_ON(ret); 475 break; 476 case SCHED_EVENT_WAKEUP: 477 if (atom->wait_sem) 478 ret = sem_post(atom->wait_sem); 479 BUG_ON(ret); 480 break; 481 case SCHED_EVENT_MIGRATION: 482 break; 483 default: 484 BUG_ON(1); 485 } 486 } 487 488 static u64 get_cpu_usage_nsec_parent(void) 489 { 490 struct rusage ru; 491 u64 sum; 492 int err; 493 494 err = getrusage(RUSAGE_SELF, &ru); 495 BUG_ON(err); 496 497 sum = ru.ru_utime.tv_sec * NSEC_PER_SEC + ru.ru_utime.tv_usec * NSEC_PER_USEC; 498 sum += ru.ru_stime.tv_sec * NSEC_PER_SEC + ru.ru_stime.tv_usec * NSEC_PER_USEC; 499 500 return sum; 501 } 502 503 static int self_open_counters(struct perf_sched *sched, unsigned long cur_task) 504 { 505 struct perf_event_attr attr; 506 char sbuf[STRERR_BUFSIZE], info[STRERR_BUFSIZE]; 507 int fd; 508 struct rlimit limit; 509 bool need_privilege = false; 510 511 memset(&attr, 0, sizeof(attr)); 512 513 attr.type = PERF_TYPE_SOFTWARE; 514 attr.config = PERF_COUNT_SW_TASK_CLOCK; 515 516 force_again: 517 fd = sys_perf_event_open(&attr, 0, -1, -1, 518 perf_event_open_cloexec_flag()); 519 520 if (fd < 0) { 521 if (errno == EMFILE) { 522 if (sched->force) { 523 BUG_ON(getrlimit(RLIMIT_NOFILE, &limit) == -1); 524 limit.rlim_cur += sched->nr_tasks - cur_task; 525 if (limit.rlim_cur > limit.rlim_max) { 526 limit.rlim_max = limit.rlim_cur; 527 need_privilege = true; 528 } 529 if (setrlimit(RLIMIT_NOFILE, &limit) == -1) { 530 if (need_privilege && errno == EPERM) 531 strcpy(info, "Need privilege\n"); 532 } else 533 goto force_again; 534 } else 535 strcpy(info, "Have a try with -f option\n"); 536 } 537 pr_err("Error: sys_perf_event_open() syscall returned " 538 "with %d (%s)\n%s", fd, 539 str_error_r(errno, sbuf, sizeof(sbuf)), info); 540 exit(EXIT_FAILURE); 541 } 542 return fd; 543 } 544 545 static u64 get_cpu_usage_nsec_self(int fd) 546 { 547 u64 runtime; 548 int ret; 549 550 ret = read(fd, &runtime, sizeof(runtime)); 551 BUG_ON(ret != sizeof(runtime)); 552 553 return runtime; 554 } 555 556 struct sched_thread_parms { 557 struct task_desc *task; 558 struct perf_sched *sched; 559 int fd; 560 }; 561 562 static void *thread_func(void *ctx) 563 { 564 struct sched_thread_parms *parms = ctx; 565 struct task_desc *this_task = parms->task; 566 struct perf_sched *sched = parms->sched; 567 u64 cpu_usage_0, cpu_usage_1; 568 unsigned long i, ret; 569 char comm2[22]; 570 int fd = parms->fd; 571 572 zfree(&parms); 573 574 sprintf(comm2, ":%s", this_task->comm); 575 prctl(PR_SET_NAME, comm2); 576 if (fd < 0) 577 return NULL; 578 again: 579 ret = sem_post(&this_task->ready_for_work); 580 BUG_ON(ret); 581 ret = pthread_mutex_lock(&sched->start_work_mutex); 582 BUG_ON(ret); 583 ret = pthread_mutex_unlock(&sched->start_work_mutex); 584 BUG_ON(ret); 585 586 cpu_usage_0 = get_cpu_usage_nsec_self(fd); 587 588 for (i = 0; i < this_task->nr_events; i++) { 589 this_task->curr_event = i; 590 perf_sched__process_event(sched, this_task->atoms[i]); 591 } 592 593 cpu_usage_1 = get_cpu_usage_nsec_self(fd); 594 this_task->cpu_usage = cpu_usage_1 - cpu_usage_0; 595 ret = sem_post(&this_task->work_done_sem); 596 BUG_ON(ret); 597 598 ret = pthread_mutex_lock(&sched->work_done_wait_mutex); 599 BUG_ON(ret); 600 ret = pthread_mutex_unlock(&sched->work_done_wait_mutex); 601 BUG_ON(ret); 602 603 goto again; 604 } 605 606 static void create_tasks(struct perf_sched *sched) 607 { 608 struct task_desc *task; 609 pthread_attr_t attr; 610 unsigned long i; 611 int err; 612 613 err = pthread_attr_init(&attr); 614 BUG_ON(err); 615 err = pthread_attr_setstacksize(&attr, 616 (size_t) max(16 * 1024, PTHREAD_STACK_MIN)); 617 BUG_ON(err); 618 err = pthread_mutex_lock(&sched->start_work_mutex); 619 BUG_ON(err); 620 err = pthread_mutex_lock(&sched->work_done_wait_mutex); 621 BUG_ON(err); 622 for (i = 0; i < sched->nr_tasks; i++) { 623 struct sched_thread_parms *parms = malloc(sizeof(*parms)); 624 BUG_ON(parms == NULL); 625 parms->task = task = sched->tasks[i]; 626 parms->sched = sched; 627 parms->fd = self_open_counters(sched, i); 628 sem_init(&task->sleep_sem, 0, 0); 629 sem_init(&task->ready_for_work, 0, 0); 630 sem_init(&task->work_done_sem, 0, 0); 631 task->curr_event = 0; 632 err = pthread_create(&task->thread, &attr, thread_func, parms); 633 BUG_ON(err); 634 } 635 } 636 637 static void wait_for_tasks(struct perf_sched *sched) 638 { 639 u64 cpu_usage_0, cpu_usage_1; 640 struct task_desc *task; 641 unsigned long i, ret; 642 643 sched->start_time = get_nsecs(); 644 sched->cpu_usage = 0; 645 pthread_mutex_unlock(&sched->work_done_wait_mutex); 646 647 for (i = 0; i < sched->nr_tasks; i++) { 648 task = sched->tasks[i]; 649 ret = sem_wait(&task->ready_for_work); 650 BUG_ON(ret); 651 sem_init(&task->ready_for_work, 0, 0); 652 } 653 ret = pthread_mutex_lock(&sched->work_done_wait_mutex); 654 BUG_ON(ret); 655 656 cpu_usage_0 = get_cpu_usage_nsec_parent(); 657 658 pthread_mutex_unlock(&sched->start_work_mutex); 659 660 for (i = 0; i < sched->nr_tasks; i++) { 661 task = sched->tasks[i]; 662 ret = sem_wait(&task->work_done_sem); 663 BUG_ON(ret); 664 sem_init(&task->work_done_sem, 0, 0); 665 sched->cpu_usage += task->cpu_usage; 666 task->cpu_usage = 0; 667 } 668 669 cpu_usage_1 = get_cpu_usage_nsec_parent(); 670 if (!sched->runavg_cpu_usage) 671 sched->runavg_cpu_usage = sched->cpu_usage; 672 sched->runavg_cpu_usage = (sched->runavg_cpu_usage * (sched->replay_repeat - 1) + sched->cpu_usage) / sched->replay_repeat; 673 674 sched->parent_cpu_usage = cpu_usage_1 - cpu_usage_0; 675 if (!sched->runavg_parent_cpu_usage) 676 sched->runavg_parent_cpu_usage = sched->parent_cpu_usage; 677 sched->runavg_parent_cpu_usage = (sched->runavg_parent_cpu_usage * (sched->replay_repeat - 1) + 678 sched->parent_cpu_usage)/sched->replay_repeat; 679 680 ret = pthread_mutex_lock(&sched->start_work_mutex); 681 BUG_ON(ret); 682 683 for (i = 0; i < sched->nr_tasks; i++) { 684 task = sched->tasks[i]; 685 sem_init(&task->sleep_sem, 0, 0); 686 task->curr_event = 0; 687 } 688 } 689 690 static void run_one_test(struct perf_sched *sched) 691 { 692 u64 T0, T1, delta, avg_delta, fluct; 693 694 T0 = get_nsecs(); 695 wait_for_tasks(sched); 696 T1 = get_nsecs(); 697 698 delta = T1 - T0; 699 sched->sum_runtime += delta; 700 sched->nr_runs++; 701 702 avg_delta = sched->sum_runtime / sched->nr_runs; 703 if (delta < avg_delta) 704 fluct = avg_delta - delta; 705 else 706 fluct = delta - avg_delta; 707 sched->sum_fluct += fluct; 708 if (!sched->run_avg) 709 sched->run_avg = delta; 710 sched->run_avg = (sched->run_avg * (sched->replay_repeat - 1) + delta) / sched->replay_repeat; 711 712 printf("#%-3ld: %0.3f, ", sched->nr_runs, (double)delta / NSEC_PER_MSEC); 713 714 printf("ravg: %0.2f, ", (double)sched->run_avg / NSEC_PER_MSEC); 715 716 printf("cpu: %0.2f / %0.2f", 717 (double)sched->cpu_usage / NSEC_PER_MSEC, (double)sched->runavg_cpu_usage / NSEC_PER_MSEC); 718 719 #if 0 720 /* 721 * rusage statistics done by the parent, these are less 722 * accurate than the sched->sum_exec_runtime based statistics: 723 */ 724 printf(" [%0.2f / %0.2f]", 725 (double)sched->parent_cpu_usage / NSEC_PER_MSEC, 726 (double)sched->runavg_parent_cpu_usage / NSEC_PER_MSEC); 727 #endif 728 729 printf("\n"); 730 731 if (sched->nr_sleep_corrections) 732 printf(" (%ld sleep corrections)\n", sched->nr_sleep_corrections); 733 sched->nr_sleep_corrections = 0; 734 } 735 736 static void test_calibrations(struct perf_sched *sched) 737 { 738 u64 T0, T1; 739 740 T0 = get_nsecs(); 741 burn_nsecs(sched, NSEC_PER_MSEC); 742 T1 = get_nsecs(); 743 744 printf("the run test took %" PRIu64 " nsecs\n", T1 - T0); 745 746 T0 = get_nsecs(); 747 sleep_nsecs(NSEC_PER_MSEC); 748 T1 = get_nsecs(); 749 750 printf("the sleep test took %" PRIu64 " nsecs\n", T1 - T0); 751 } 752 753 static int 754 replay_wakeup_event(struct perf_sched *sched, 755 struct perf_evsel *evsel, struct perf_sample *sample, 756 struct machine *machine __maybe_unused) 757 { 758 const char *comm = perf_evsel__strval(evsel, sample, "comm"); 759 const u32 pid = perf_evsel__intval(evsel, sample, "pid"); 760 struct task_desc *waker, *wakee; 761 762 if (verbose) { 763 printf("sched_wakeup event %p\n", evsel); 764 765 printf(" ... pid %d woke up %s/%d\n", sample->tid, comm, pid); 766 } 767 768 waker = register_pid(sched, sample->tid, "<unknown>"); 769 wakee = register_pid(sched, pid, comm); 770 771 add_sched_event_wakeup(sched, waker, sample->time, wakee); 772 return 0; 773 } 774 775 static int replay_switch_event(struct perf_sched *sched, 776 struct perf_evsel *evsel, 777 struct perf_sample *sample, 778 struct machine *machine __maybe_unused) 779 { 780 const char *prev_comm = perf_evsel__strval(evsel, sample, "prev_comm"), 781 *next_comm = perf_evsel__strval(evsel, sample, "next_comm"); 782 const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"), 783 next_pid = perf_evsel__intval(evsel, sample, "next_pid"); 784 const u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state"); 785 struct task_desc *prev, __maybe_unused *next; 786 u64 timestamp0, timestamp = sample->time; 787 int cpu = sample->cpu; 788 s64 delta; 789 790 if (verbose) 791 printf("sched_switch event %p\n", evsel); 792 793 if (cpu >= MAX_CPUS || cpu < 0) 794 return 0; 795 796 timestamp0 = sched->cpu_last_switched[cpu]; 797 if (timestamp0) 798 delta = timestamp - timestamp0; 799 else 800 delta = 0; 801 802 if (delta < 0) { 803 pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta); 804 return -1; 805 } 806 807 pr_debug(" ... switch from %s/%d to %s/%d [ran %" PRIu64 " nsecs]\n", 808 prev_comm, prev_pid, next_comm, next_pid, delta); 809 810 prev = register_pid(sched, prev_pid, prev_comm); 811 next = register_pid(sched, next_pid, next_comm); 812 813 sched->cpu_last_switched[cpu] = timestamp; 814 815 add_sched_event_run(sched, prev, timestamp, delta); 816 add_sched_event_sleep(sched, prev, timestamp, prev_state); 817 818 return 0; 819 } 820 821 static int replay_fork_event(struct perf_sched *sched, 822 union perf_event *event, 823 struct machine *machine) 824 { 825 struct thread *child, *parent; 826 827 child = machine__findnew_thread(machine, event->fork.pid, 828 event->fork.tid); 829 parent = machine__findnew_thread(machine, event->fork.ppid, 830 event->fork.ptid); 831 832 if (child == NULL || parent == NULL) { 833 pr_debug("thread does not exist on fork event: child %p, parent %p\n", 834 child, parent); 835 goto out_put; 836 } 837 838 if (verbose) { 839 printf("fork event\n"); 840 printf("... parent: %s/%d\n", thread__comm_str(parent), parent->tid); 841 printf("... child: %s/%d\n", thread__comm_str(child), child->tid); 842 } 843 844 register_pid(sched, parent->tid, thread__comm_str(parent)); 845 register_pid(sched, child->tid, thread__comm_str(child)); 846 out_put: 847 thread__put(child); 848 thread__put(parent); 849 return 0; 850 } 851 852 struct sort_dimension { 853 const char *name; 854 sort_fn_t cmp; 855 struct list_head list; 856 }; 857 858 static int 859 thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r) 860 { 861 struct sort_dimension *sort; 862 int ret = 0; 863 864 BUG_ON(list_empty(list)); 865 866 list_for_each_entry(sort, list, list) { 867 ret = sort->cmp(l, r); 868 if (ret) 869 return ret; 870 } 871 872 return ret; 873 } 874 875 static struct work_atoms * 876 thread_atoms_search(struct rb_root *root, struct thread *thread, 877 struct list_head *sort_list) 878 { 879 struct rb_node *node = root->rb_node; 880 struct work_atoms key = { .thread = thread }; 881 882 while (node) { 883 struct work_atoms *atoms; 884 int cmp; 885 886 atoms = container_of(node, struct work_atoms, node); 887 888 cmp = thread_lat_cmp(sort_list, &key, atoms); 889 if (cmp > 0) 890 node = node->rb_left; 891 else if (cmp < 0) 892 node = node->rb_right; 893 else { 894 BUG_ON(thread != atoms->thread); 895 return atoms; 896 } 897 } 898 return NULL; 899 } 900 901 static void 902 __thread_latency_insert(struct rb_root *root, struct work_atoms *data, 903 struct list_head *sort_list) 904 { 905 struct rb_node **new = &(root->rb_node), *parent = NULL; 906 907 while (*new) { 908 struct work_atoms *this; 909 int cmp; 910 911 this = container_of(*new, struct work_atoms, node); 912 parent = *new; 913 914 cmp = thread_lat_cmp(sort_list, data, this); 915 916 if (cmp > 0) 917 new = &((*new)->rb_left); 918 else 919 new = &((*new)->rb_right); 920 } 921 922 rb_link_node(&data->node, parent, new); 923 rb_insert_color(&data->node, root); 924 } 925 926 static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread) 927 { 928 struct work_atoms *atoms = zalloc(sizeof(*atoms)); 929 if (!atoms) { 930 pr_err("No memory at %s\n", __func__); 931 return -1; 932 } 933 934 atoms->thread = thread__get(thread); 935 INIT_LIST_HEAD(&atoms->work_list); 936 __thread_latency_insert(&sched->atom_root, atoms, &sched->cmp_pid); 937 return 0; 938 } 939 940 static char sched_out_state(u64 prev_state) 941 { 942 const char *str = TASK_STATE_TO_CHAR_STR; 943 944 return str[prev_state]; 945 } 946 947 static int 948 add_sched_out_event(struct work_atoms *atoms, 949 char run_state, 950 u64 timestamp) 951 { 952 struct work_atom *atom = zalloc(sizeof(*atom)); 953 if (!atom) { 954 pr_err("Non memory at %s", __func__); 955 return -1; 956 } 957 958 atom->sched_out_time = timestamp; 959 960 if (run_state == 'R') { 961 atom->state = THREAD_WAIT_CPU; 962 atom->wake_up_time = atom->sched_out_time; 963 } 964 965 list_add_tail(&atom->list, &atoms->work_list); 966 return 0; 967 } 968 969 static void 970 add_runtime_event(struct work_atoms *atoms, u64 delta, 971 u64 timestamp __maybe_unused) 972 { 973 struct work_atom *atom; 974 975 BUG_ON(list_empty(&atoms->work_list)); 976 977 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 978 979 atom->runtime += delta; 980 atoms->total_runtime += delta; 981 } 982 983 static void 984 add_sched_in_event(struct work_atoms *atoms, u64 timestamp) 985 { 986 struct work_atom *atom; 987 u64 delta; 988 989 if (list_empty(&atoms->work_list)) 990 return; 991 992 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 993 994 if (atom->state != THREAD_WAIT_CPU) 995 return; 996 997 if (timestamp < atom->wake_up_time) { 998 atom->state = THREAD_IGNORE; 999 return; 1000 } 1001 1002 atom->state = THREAD_SCHED_IN; 1003 atom->sched_in_time = timestamp; 1004 1005 delta = atom->sched_in_time - atom->wake_up_time; 1006 atoms->total_lat += delta; 1007 if (delta > atoms->max_lat) { 1008 atoms->max_lat = delta; 1009 atoms->max_lat_at = timestamp; 1010 } 1011 atoms->nb_atoms++; 1012 } 1013 1014 static int latency_switch_event(struct perf_sched *sched, 1015 struct perf_evsel *evsel, 1016 struct perf_sample *sample, 1017 struct machine *machine) 1018 { 1019 const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"), 1020 next_pid = perf_evsel__intval(evsel, sample, "next_pid"); 1021 const u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state"); 1022 struct work_atoms *out_events, *in_events; 1023 struct thread *sched_out, *sched_in; 1024 u64 timestamp0, timestamp = sample->time; 1025 int cpu = sample->cpu, err = -1; 1026 s64 delta; 1027 1028 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1029 1030 timestamp0 = sched->cpu_last_switched[cpu]; 1031 sched->cpu_last_switched[cpu] = timestamp; 1032 if (timestamp0) 1033 delta = timestamp - timestamp0; 1034 else 1035 delta = 0; 1036 1037 if (delta < 0) { 1038 pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1039 return -1; 1040 } 1041 1042 sched_out = machine__findnew_thread(machine, -1, prev_pid); 1043 sched_in = machine__findnew_thread(machine, -1, next_pid); 1044 if (sched_out == NULL || sched_in == NULL) 1045 goto out_put; 1046 1047 out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid); 1048 if (!out_events) { 1049 if (thread_atoms_insert(sched, sched_out)) 1050 goto out_put; 1051 out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid); 1052 if (!out_events) { 1053 pr_err("out-event: Internal tree error"); 1054 goto out_put; 1055 } 1056 } 1057 if (add_sched_out_event(out_events, sched_out_state(prev_state), timestamp)) 1058 return -1; 1059 1060 in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid); 1061 if (!in_events) { 1062 if (thread_atoms_insert(sched, sched_in)) 1063 goto out_put; 1064 in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid); 1065 if (!in_events) { 1066 pr_err("in-event: Internal tree error"); 1067 goto out_put; 1068 } 1069 /* 1070 * Take came in we have not heard about yet, 1071 * add in an initial atom in runnable state: 1072 */ 1073 if (add_sched_out_event(in_events, 'R', timestamp)) 1074 goto out_put; 1075 } 1076 add_sched_in_event(in_events, timestamp); 1077 err = 0; 1078 out_put: 1079 thread__put(sched_out); 1080 thread__put(sched_in); 1081 return err; 1082 } 1083 1084 static int latency_runtime_event(struct perf_sched *sched, 1085 struct perf_evsel *evsel, 1086 struct perf_sample *sample, 1087 struct machine *machine) 1088 { 1089 const u32 pid = perf_evsel__intval(evsel, sample, "pid"); 1090 const u64 runtime = perf_evsel__intval(evsel, sample, "runtime"); 1091 struct thread *thread = machine__findnew_thread(machine, -1, pid); 1092 struct work_atoms *atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); 1093 u64 timestamp = sample->time; 1094 int cpu = sample->cpu, err = -1; 1095 1096 if (thread == NULL) 1097 return -1; 1098 1099 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1100 if (!atoms) { 1101 if (thread_atoms_insert(sched, thread)) 1102 goto out_put; 1103 atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); 1104 if (!atoms) { 1105 pr_err("in-event: Internal tree error"); 1106 goto out_put; 1107 } 1108 if (add_sched_out_event(atoms, 'R', timestamp)) 1109 goto out_put; 1110 } 1111 1112 add_runtime_event(atoms, runtime, timestamp); 1113 err = 0; 1114 out_put: 1115 thread__put(thread); 1116 return err; 1117 } 1118 1119 static int latency_wakeup_event(struct perf_sched *sched, 1120 struct perf_evsel *evsel, 1121 struct perf_sample *sample, 1122 struct machine *machine) 1123 { 1124 const u32 pid = perf_evsel__intval(evsel, sample, "pid"); 1125 struct work_atoms *atoms; 1126 struct work_atom *atom; 1127 struct thread *wakee; 1128 u64 timestamp = sample->time; 1129 int err = -1; 1130 1131 wakee = machine__findnew_thread(machine, -1, pid); 1132 if (wakee == NULL) 1133 return -1; 1134 atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid); 1135 if (!atoms) { 1136 if (thread_atoms_insert(sched, wakee)) 1137 goto out_put; 1138 atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid); 1139 if (!atoms) { 1140 pr_err("wakeup-event: Internal tree error"); 1141 goto out_put; 1142 } 1143 if (add_sched_out_event(atoms, 'S', timestamp)) 1144 goto out_put; 1145 } 1146 1147 BUG_ON(list_empty(&atoms->work_list)); 1148 1149 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1150 1151 /* 1152 * As we do not guarantee the wakeup event happens when 1153 * task is out of run queue, also may happen when task is 1154 * on run queue and wakeup only change ->state to TASK_RUNNING, 1155 * then we should not set the ->wake_up_time when wake up a 1156 * task which is on run queue. 1157 * 1158 * You WILL be missing events if you've recorded only 1159 * one CPU, or are only looking at only one, so don't 1160 * skip in this case. 1161 */ 1162 if (sched->profile_cpu == -1 && atom->state != THREAD_SLEEPING) 1163 goto out_ok; 1164 1165 sched->nr_timestamps++; 1166 if (atom->sched_out_time > timestamp) { 1167 sched->nr_unordered_timestamps++; 1168 goto out_ok; 1169 } 1170 1171 atom->state = THREAD_WAIT_CPU; 1172 atom->wake_up_time = timestamp; 1173 out_ok: 1174 err = 0; 1175 out_put: 1176 thread__put(wakee); 1177 return err; 1178 } 1179 1180 static int latency_migrate_task_event(struct perf_sched *sched, 1181 struct perf_evsel *evsel, 1182 struct perf_sample *sample, 1183 struct machine *machine) 1184 { 1185 const u32 pid = perf_evsel__intval(evsel, sample, "pid"); 1186 u64 timestamp = sample->time; 1187 struct work_atoms *atoms; 1188 struct work_atom *atom; 1189 struct thread *migrant; 1190 int err = -1; 1191 1192 /* 1193 * Only need to worry about migration when profiling one CPU. 1194 */ 1195 if (sched->profile_cpu == -1) 1196 return 0; 1197 1198 migrant = machine__findnew_thread(machine, -1, pid); 1199 if (migrant == NULL) 1200 return -1; 1201 atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid); 1202 if (!atoms) { 1203 if (thread_atoms_insert(sched, migrant)) 1204 goto out_put; 1205 register_pid(sched, migrant->tid, thread__comm_str(migrant)); 1206 atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid); 1207 if (!atoms) { 1208 pr_err("migration-event: Internal tree error"); 1209 goto out_put; 1210 } 1211 if (add_sched_out_event(atoms, 'R', timestamp)) 1212 goto out_put; 1213 } 1214 1215 BUG_ON(list_empty(&atoms->work_list)); 1216 1217 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1218 atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp; 1219 1220 sched->nr_timestamps++; 1221 1222 if (atom->sched_out_time > timestamp) 1223 sched->nr_unordered_timestamps++; 1224 err = 0; 1225 out_put: 1226 thread__put(migrant); 1227 return err; 1228 } 1229 1230 static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_list) 1231 { 1232 int i; 1233 int ret; 1234 u64 avg; 1235 char max_lat_at[32]; 1236 1237 if (!work_list->nb_atoms) 1238 return; 1239 /* 1240 * Ignore idle threads: 1241 */ 1242 if (!strcmp(thread__comm_str(work_list->thread), "swapper")) 1243 return; 1244 1245 sched->all_runtime += work_list->total_runtime; 1246 sched->all_count += work_list->nb_atoms; 1247 1248 if (work_list->num_merged > 1) 1249 ret = printf(" %s:(%d) ", thread__comm_str(work_list->thread), work_list->num_merged); 1250 else 1251 ret = printf(" %s:%d ", thread__comm_str(work_list->thread), work_list->thread->tid); 1252 1253 for (i = 0; i < 24 - ret; i++) 1254 printf(" "); 1255 1256 avg = work_list->total_lat / work_list->nb_atoms; 1257 timestamp__scnprintf_usec(work_list->max_lat_at, max_lat_at, sizeof(max_lat_at)); 1258 1259 printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13s s\n", 1260 (double)work_list->total_runtime / NSEC_PER_MSEC, 1261 work_list->nb_atoms, (double)avg / NSEC_PER_MSEC, 1262 (double)work_list->max_lat / NSEC_PER_MSEC, 1263 max_lat_at); 1264 } 1265 1266 static int pid_cmp(struct work_atoms *l, struct work_atoms *r) 1267 { 1268 if (l->thread == r->thread) 1269 return 0; 1270 if (l->thread->tid < r->thread->tid) 1271 return -1; 1272 if (l->thread->tid > r->thread->tid) 1273 return 1; 1274 return (int)(l->thread - r->thread); 1275 } 1276 1277 static int avg_cmp(struct work_atoms *l, struct work_atoms *r) 1278 { 1279 u64 avgl, avgr; 1280 1281 if (!l->nb_atoms) 1282 return -1; 1283 1284 if (!r->nb_atoms) 1285 return 1; 1286 1287 avgl = l->total_lat / l->nb_atoms; 1288 avgr = r->total_lat / r->nb_atoms; 1289 1290 if (avgl < avgr) 1291 return -1; 1292 if (avgl > avgr) 1293 return 1; 1294 1295 return 0; 1296 } 1297 1298 static int max_cmp(struct work_atoms *l, struct work_atoms *r) 1299 { 1300 if (l->max_lat < r->max_lat) 1301 return -1; 1302 if (l->max_lat > r->max_lat) 1303 return 1; 1304 1305 return 0; 1306 } 1307 1308 static int switch_cmp(struct work_atoms *l, struct work_atoms *r) 1309 { 1310 if (l->nb_atoms < r->nb_atoms) 1311 return -1; 1312 if (l->nb_atoms > r->nb_atoms) 1313 return 1; 1314 1315 return 0; 1316 } 1317 1318 static int runtime_cmp(struct work_atoms *l, struct work_atoms *r) 1319 { 1320 if (l->total_runtime < r->total_runtime) 1321 return -1; 1322 if (l->total_runtime > r->total_runtime) 1323 return 1; 1324 1325 return 0; 1326 } 1327 1328 static int sort_dimension__add(const char *tok, struct list_head *list) 1329 { 1330 size_t i; 1331 static struct sort_dimension avg_sort_dimension = { 1332 .name = "avg", 1333 .cmp = avg_cmp, 1334 }; 1335 static struct sort_dimension max_sort_dimension = { 1336 .name = "max", 1337 .cmp = max_cmp, 1338 }; 1339 static struct sort_dimension pid_sort_dimension = { 1340 .name = "pid", 1341 .cmp = pid_cmp, 1342 }; 1343 static struct sort_dimension runtime_sort_dimension = { 1344 .name = "runtime", 1345 .cmp = runtime_cmp, 1346 }; 1347 static struct sort_dimension switch_sort_dimension = { 1348 .name = "switch", 1349 .cmp = switch_cmp, 1350 }; 1351 struct sort_dimension *available_sorts[] = { 1352 &pid_sort_dimension, 1353 &avg_sort_dimension, 1354 &max_sort_dimension, 1355 &switch_sort_dimension, 1356 &runtime_sort_dimension, 1357 }; 1358 1359 for (i = 0; i < ARRAY_SIZE(available_sorts); i++) { 1360 if (!strcmp(available_sorts[i]->name, tok)) { 1361 list_add_tail(&available_sorts[i]->list, list); 1362 1363 return 0; 1364 } 1365 } 1366 1367 return -1; 1368 } 1369 1370 static void perf_sched__sort_lat(struct perf_sched *sched) 1371 { 1372 struct rb_node *node; 1373 struct rb_root *root = &sched->atom_root; 1374 again: 1375 for (;;) { 1376 struct work_atoms *data; 1377 node = rb_first(root); 1378 if (!node) 1379 break; 1380 1381 rb_erase(node, root); 1382 data = rb_entry(node, struct work_atoms, node); 1383 __thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list); 1384 } 1385 if (root == &sched->atom_root) { 1386 root = &sched->merged_atom_root; 1387 goto again; 1388 } 1389 } 1390 1391 static int process_sched_wakeup_event(struct perf_tool *tool, 1392 struct perf_evsel *evsel, 1393 struct perf_sample *sample, 1394 struct machine *machine) 1395 { 1396 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1397 1398 if (sched->tp_handler->wakeup_event) 1399 return sched->tp_handler->wakeup_event(sched, evsel, sample, machine); 1400 1401 return 0; 1402 } 1403 1404 union map_priv { 1405 void *ptr; 1406 bool color; 1407 }; 1408 1409 static bool thread__has_color(struct thread *thread) 1410 { 1411 union map_priv priv = { 1412 .ptr = thread__priv(thread), 1413 }; 1414 1415 return priv.color; 1416 } 1417 1418 static struct thread* 1419 map__findnew_thread(struct perf_sched *sched, struct machine *machine, pid_t pid, pid_t tid) 1420 { 1421 struct thread *thread = machine__findnew_thread(machine, pid, tid); 1422 union map_priv priv = { 1423 .color = false, 1424 }; 1425 1426 if (!sched->map.color_pids || !thread || thread__priv(thread)) 1427 return thread; 1428 1429 if (thread_map__has(sched->map.color_pids, tid)) 1430 priv.color = true; 1431 1432 thread__set_priv(thread, priv.ptr); 1433 return thread; 1434 } 1435 1436 static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel, 1437 struct perf_sample *sample, struct machine *machine) 1438 { 1439 const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid"); 1440 struct thread *sched_in; 1441 int new_shortname; 1442 u64 timestamp0, timestamp = sample->time; 1443 s64 delta; 1444 int i, this_cpu = sample->cpu; 1445 int cpus_nr; 1446 bool new_cpu = false; 1447 const char *color = PERF_COLOR_NORMAL; 1448 char stimestamp[32]; 1449 1450 BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0); 1451 1452 if (this_cpu > sched->max_cpu) 1453 sched->max_cpu = this_cpu; 1454 1455 if (sched->map.comp) { 1456 cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS); 1457 if (!test_and_set_bit(this_cpu, sched->map.comp_cpus_mask)) { 1458 sched->map.comp_cpus[cpus_nr++] = this_cpu; 1459 new_cpu = true; 1460 } 1461 } else 1462 cpus_nr = sched->max_cpu; 1463 1464 timestamp0 = sched->cpu_last_switched[this_cpu]; 1465 sched->cpu_last_switched[this_cpu] = timestamp; 1466 if (timestamp0) 1467 delta = timestamp - timestamp0; 1468 else 1469 delta = 0; 1470 1471 if (delta < 0) { 1472 pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1473 return -1; 1474 } 1475 1476 sched_in = map__findnew_thread(sched, machine, -1, next_pid); 1477 if (sched_in == NULL) 1478 return -1; 1479 1480 sched->curr_thread[this_cpu] = thread__get(sched_in); 1481 1482 printf(" "); 1483 1484 new_shortname = 0; 1485 if (!sched_in->shortname[0]) { 1486 if (!strcmp(thread__comm_str(sched_in), "swapper")) { 1487 /* 1488 * Don't allocate a letter-number for swapper:0 1489 * as a shortname. Instead, we use '.' for it. 1490 */ 1491 sched_in->shortname[0] = '.'; 1492 sched_in->shortname[1] = ' '; 1493 } else { 1494 sched_in->shortname[0] = sched->next_shortname1; 1495 sched_in->shortname[1] = sched->next_shortname2; 1496 1497 if (sched->next_shortname1 < 'Z') { 1498 sched->next_shortname1++; 1499 } else { 1500 sched->next_shortname1 = 'A'; 1501 if (sched->next_shortname2 < '9') 1502 sched->next_shortname2++; 1503 else 1504 sched->next_shortname2 = '0'; 1505 } 1506 } 1507 new_shortname = 1; 1508 } 1509 1510 for (i = 0; i < cpus_nr; i++) { 1511 int cpu = sched->map.comp ? sched->map.comp_cpus[i] : i; 1512 struct thread *curr_thread = sched->curr_thread[cpu]; 1513 const char *pid_color = color; 1514 const char *cpu_color = color; 1515 1516 if (curr_thread && thread__has_color(curr_thread)) 1517 pid_color = COLOR_PIDS; 1518 1519 if (sched->map.cpus && !cpu_map__has(sched->map.cpus, cpu)) 1520 continue; 1521 1522 if (sched->map.color_cpus && cpu_map__has(sched->map.color_cpus, cpu)) 1523 cpu_color = COLOR_CPUS; 1524 1525 if (cpu != this_cpu) 1526 color_fprintf(stdout, color, " "); 1527 else 1528 color_fprintf(stdout, cpu_color, "*"); 1529 1530 if (sched->curr_thread[cpu]) 1531 color_fprintf(stdout, pid_color, "%2s ", sched->curr_thread[cpu]->shortname); 1532 else 1533 color_fprintf(stdout, color, " "); 1534 } 1535 1536 if (sched->map.cpus && !cpu_map__has(sched->map.cpus, this_cpu)) 1537 goto out; 1538 1539 timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp)); 1540 color_fprintf(stdout, color, " %12s secs ", stimestamp); 1541 if (new_shortname || (verbose && sched_in->tid)) { 1542 const char *pid_color = color; 1543 1544 if (thread__has_color(sched_in)) 1545 pid_color = COLOR_PIDS; 1546 1547 color_fprintf(stdout, pid_color, "%s => %s:%d", 1548 sched_in->shortname, thread__comm_str(sched_in), sched_in->tid); 1549 } 1550 1551 if (sched->map.comp && new_cpu) 1552 color_fprintf(stdout, color, " (CPU %d)", this_cpu); 1553 1554 out: 1555 color_fprintf(stdout, color, "\n"); 1556 1557 thread__put(sched_in); 1558 1559 return 0; 1560 } 1561 1562 static int process_sched_switch_event(struct perf_tool *tool, 1563 struct perf_evsel *evsel, 1564 struct perf_sample *sample, 1565 struct machine *machine) 1566 { 1567 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1568 int this_cpu = sample->cpu, err = 0; 1569 u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"), 1570 next_pid = perf_evsel__intval(evsel, sample, "next_pid"); 1571 1572 if (sched->curr_pid[this_cpu] != (u32)-1) { 1573 /* 1574 * Are we trying to switch away a PID that is 1575 * not current? 1576 */ 1577 if (sched->curr_pid[this_cpu] != prev_pid) 1578 sched->nr_context_switch_bugs++; 1579 } 1580 1581 if (sched->tp_handler->switch_event) 1582 err = sched->tp_handler->switch_event(sched, evsel, sample, machine); 1583 1584 sched->curr_pid[this_cpu] = next_pid; 1585 return err; 1586 } 1587 1588 static int process_sched_runtime_event(struct perf_tool *tool, 1589 struct perf_evsel *evsel, 1590 struct perf_sample *sample, 1591 struct machine *machine) 1592 { 1593 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1594 1595 if (sched->tp_handler->runtime_event) 1596 return sched->tp_handler->runtime_event(sched, evsel, sample, machine); 1597 1598 return 0; 1599 } 1600 1601 static int perf_sched__process_fork_event(struct perf_tool *tool, 1602 union perf_event *event, 1603 struct perf_sample *sample, 1604 struct machine *machine) 1605 { 1606 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1607 1608 /* run the fork event through the perf machineruy */ 1609 perf_event__process_fork(tool, event, sample, machine); 1610 1611 /* and then run additional processing needed for this command */ 1612 if (sched->tp_handler->fork_event) 1613 return sched->tp_handler->fork_event(sched, event, machine); 1614 1615 return 0; 1616 } 1617 1618 static int process_sched_migrate_task_event(struct perf_tool *tool, 1619 struct perf_evsel *evsel, 1620 struct perf_sample *sample, 1621 struct machine *machine) 1622 { 1623 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 1624 1625 if (sched->tp_handler->migrate_task_event) 1626 return sched->tp_handler->migrate_task_event(sched, evsel, sample, machine); 1627 1628 return 0; 1629 } 1630 1631 typedef int (*tracepoint_handler)(struct perf_tool *tool, 1632 struct perf_evsel *evsel, 1633 struct perf_sample *sample, 1634 struct machine *machine); 1635 1636 static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_unused, 1637 union perf_event *event __maybe_unused, 1638 struct perf_sample *sample, 1639 struct perf_evsel *evsel, 1640 struct machine *machine) 1641 { 1642 int err = 0; 1643 1644 if (evsel->handler != NULL) { 1645 tracepoint_handler f = evsel->handler; 1646 err = f(tool, evsel, sample, machine); 1647 } 1648 1649 return err; 1650 } 1651 1652 static int perf_sched__read_events(struct perf_sched *sched) 1653 { 1654 const struct perf_evsel_str_handler handlers[] = { 1655 { "sched:sched_switch", process_sched_switch_event, }, 1656 { "sched:sched_stat_runtime", process_sched_runtime_event, }, 1657 { "sched:sched_wakeup", process_sched_wakeup_event, }, 1658 { "sched:sched_wakeup_new", process_sched_wakeup_event, }, 1659 { "sched:sched_migrate_task", process_sched_migrate_task_event, }, 1660 }; 1661 struct perf_session *session; 1662 struct perf_data_file file = { 1663 .path = input_name, 1664 .mode = PERF_DATA_MODE_READ, 1665 .force = sched->force, 1666 }; 1667 int rc = -1; 1668 1669 session = perf_session__new(&file, false, &sched->tool); 1670 if (session == NULL) { 1671 pr_debug("No Memory for session\n"); 1672 return -1; 1673 } 1674 1675 symbol__init(&session->header.env); 1676 1677 if (perf_session__set_tracepoints_handlers(session, handlers)) 1678 goto out_delete; 1679 1680 if (perf_session__has_traces(session, "record -R")) { 1681 int err = perf_session__process_events(session); 1682 if (err) { 1683 pr_err("Failed to process events, error %d", err); 1684 goto out_delete; 1685 } 1686 1687 sched->nr_events = session->evlist->stats.nr_events[0]; 1688 sched->nr_lost_events = session->evlist->stats.total_lost; 1689 sched->nr_lost_chunks = session->evlist->stats.nr_events[PERF_RECORD_LOST]; 1690 } 1691 1692 rc = 0; 1693 out_delete: 1694 perf_session__delete(session); 1695 return rc; 1696 } 1697 1698 /* 1699 * scheduling times are printed as msec.usec 1700 */ 1701 static inline void print_sched_time(unsigned long long nsecs, int width) 1702 { 1703 unsigned long msecs; 1704 unsigned long usecs; 1705 1706 msecs = nsecs / NSEC_PER_MSEC; 1707 nsecs -= msecs * NSEC_PER_MSEC; 1708 usecs = nsecs / NSEC_PER_USEC; 1709 printf("%*lu.%03lu ", width, msecs, usecs); 1710 } 1711 1712 /* 1713 * returns runtime data for event, allocating memory for it the 1714 * first time it is used. 1715 */ 1716 static struct evsel_runtime *perf_evsel__get_runtime(struct perf_evsel *evsel) 1717 { 1718 struct evsel_runtime *r = evsel->priv; 1719 1720 if (r == NULL) { 1721 r = zalloc(sizeof(struct evsel_runtime)); 1722 evsel->priv = r; 1723 } 1724 1725 return r; 1726 } 1727 1728 /* 1729 * save last time event was seen per cpu 1730 */ 1731 static void perf_evsel__save_time(struct perf_evsel *evsel, 1732 u64 timestamp, u32 cpu) 1733 { 1734 struct evsel_runtime *r = perf_evsel__get_runtime(evsel); 1735 1736 if (r == NULL) 1737 return; 1738 1739 if ((cpu >= r->ncpu) || (r->last_time == NULL)) { 1740 int i, n = __roundup_pow_of_two(cpu+1); 1741 void *p = r->last_time; 1742 1743 p = realloc(r->last_time, n * sizeof(u64)); 1744 if (!p) 1745 return; 1746 1747 r->last_time = p; 1748 for (i = r->ncpu; i < n; ++i) 1749 r->last_time[i] = (u64) 0; 1750 1751 r->ncpu = n; 1752 } 1753 1754 r->last_time[cpu] = timestamp; 1755 } 1756 1757 /* returns last time this event was seen on the given cpu */ 1758 static u64 perf_evsel__get_time(struct perf_evsel *evsel, u32 cpu) 1759 { 1760 struct evsel_runtime *r = perf_evsel__get_runtime(evsel); 1761 1762 if ((r == NULL) || (r->last_time == NULL) || (cpu >= r->ncpu)) 1763 return 0; 1764 1765 return r->last_time[cpu]; 1766 } 1767 1768 static int comm_width = 20; 1769 1770 static char *timehist_get_commstr(struct thread *thread) 1771 { 1772 static char str[32]; 1773 const char *comm = thread__comm_str(thread); 1774 pid_t tid = thread->tid; 1775 pid_t pid = thread->pid_; 1776 int n; 1777 1778 if (pid == 0) 1779 n = scnprintf(str, sizeof(str), "%s", comm); 1780 1781 else if (tid != pid) 1782 n = scnprintf(str, sizeof(str), "%s[%d/%d]", comm, tid, pid); 1783 1784 else 1785 n = scnprintf(str, sizeof(str), "%s[%d]", comm, tid); 1786 1787 if (n > comm_width) 1788 comm_width = n; 1789 1790 return str; 1791 } 1792 1793 static void timehist_header(struct perf_sched *sched) 1794 { 1795 u32 ncpus = sched->max_cpu + 1; 1796 u32 i, j; 1797 1798 printf("%15s %6s ", "time", "cpu"); 1799 1800 if (sched->show_cpu_visual) { 1801 printf(" "); 1802 for (i = 0, j = 0; i < ncpus; ++i) { 1803 printf("%x", j++); 1804 if (j > 15) 1805 j = 0; 1806 } 1807 printf(" "); 1808 } 1809 1810 printf(" %-20s %9s %9s %9s", 1811 "task name", "wait time", "sch delay", "run time"); 1812 1813 printf("\n"); 1814 1815 /* 1816 * units row 1817 */ 1818 printf("%15s %-6s ", "", ""); 1819 1820 if (sched->show_cpu_visual) 1821 printf(" %*s ", ncpus, ""); 1822 1823 printf(" %-20s %9s %9s %9s\n", "[tid/pid]", "(msec)", "(msec)", "(msec)"); 1824 1825 /* 1826 * separator 1827 */ 1828 printf("%.15s %.6s ", graph_dotted_line, graph_dotted_line); 1829 1830 if (sched->show_cpu_visual) 1831 printf(" %.*s ", ncpus, graph_dotted_line); 1832 1833 printf(" %.20s %.9s %.9s %.9s", 1834 graph_dotted_line, graph_dotted_line, graph_dotted_line, 1835 graph_dotted_line); 1836 1837 printf("\n"); 1838 } 1839 1840 static void timehist_print_sample(struct perf_sched *sched, 1841 struct perf_sample *sample, 1842 struct addr_location *al, 1843 struct thread *thread, 1844 u64 t) 1845 { 1846 struct thread_runtime *tr = thread__priv(thread); 1847 u32 max_cpus = sched->max_cpu + 1; 1848 char tstr[64]; 1849 1850 timestamp__scnprintf_usec(t, tstr, sizeof(tstr)); 1851 printf("%15s [%04d] ", tstr, sample->cpu); 1852 1853 if (sched->show_cpu_visual) { 1854 u32 i; 1855 char c; 1856 1857 printf(" "); 1858 for (i = 0; i < max_cpus; ++i) { 1859 /* flag idle times with 'i'; others are sched events */ 1860 if (i == sample->cpu) 1861 c = (thread->tid == 0) ? 'i' : 's'; 1862 else 1863 c = ' '; 1864 printf("%c", c); 1865 } 1866 printf(" "); 1867 } 1868 1869 printf(" %-*s ", comm_width, timehist_get_commstr(thread)); 1870 1871 print_sched_time(tr->dt_wait, 6); 1872 print_sched_time(tr->dt_delay, 6); 1873 print_sched_time(tr->dt_run, 6); 1874 1875 if (sched->show_wakeups) 1876 printf(" %-*s", comm_width, ""); 1877 1878 if (thread->tid == 0) 1879 goto out; 1880 1881 if (sched->show_callchain) 1882 printf(" "); 1883 1884 sample__fprintf_sym(sample, al, 0, 1885 EVSEL__PRINT_SYM | EVSEL__PRINT_ONELINE | 1886 EVSEL__PRINT_CALLCHAIN_ARROW | 1887 EVSEL__PRINT_SKIP_IGNORED, 1888 &callchain_cursor, stdout); 1889 1890 out: 1891 printf("\n"); 1892 } 1893 1894 /* 1895 * Explanation of delta-time stats: 1896 * 1897 * t = time of current schedule out event 1898 * tprev = time of previous sched out event 1899 * also time of schedule-in event for current task 1900 * last_time = time of last sched change event for current task 1901 * (i.e, time process was last scheduled out) 1902 * ready_to_run = time of wakeup for current task 1903 * 1904 * -----|------------|------------|------------|------ 1905 * last ready tprev t 1906 * time to run 1907 * 1908 * |-------- dt_wait --------| 1909 * |- dt_delay -|-- dt_run --| 1910 * 1911 * dt_run = run time of current task 1912 * dt_wait = time between last schedule out event for task and tprev 1913 * represents time spent off the cpu 1914 * dt_delay = time between wakeup and schedule-in of task 1915 */ 1916 1917 static void timehist_update_runtime_stats(struct thread_runtime *r, 1918 u64 t, u64 tprev) 1919 { 1920 r->dt_delay = 0; 1921 r->dt_wait = 0; 1922 r->dt_run = 0; 1923 if (tprev) { 1924 r->dt_run = t - tprev; 1925 if (r->ready_to_run) { 1926 if (r->ready_to_run > tprev) 1927 pr_debug("time travel: wakeup time for task > previous sched_switch event\n"); 1928 else 1929 r->dt_delay = tprev - r->ready_to_run; 1930 } 1931 1932 if (r->last_time > tprev) 1933 pr_debug("time travel: last sched out time for task > previous sched_switch event\n"); 1934 else if (r->last_time) 1935 r->dt_wait = tprev - r->last_time; 1936 } 1937 1938 update_stats(&r->run_stats, r->dt_run); 1939 r->total_run_time += r->dt_run; 1940 } 1941 1942 static bool is_idle_sample(struct perf_sched *sched, 1943 struct perf_sample *sample, 1944 struct perf_evsel *evsel, 1945 struct machine *machine) 1946 { 1947 struct thread *thread; 1948 struct callchain_cursor *cursor = &callchain_cursor; 1949 1950 /* pid 0 == swapper == idle task */ 1951 if (sample->pid == 0) 1952 return true; 1953 1954 if (strcmp(perf_evsel__name(evsel), "sched:sched_switch") == 0) { 1955 if (perf_evsel__intval(evsel, sample, "prev_pid") == 0) 1956 return true; 1957 } 1958 1959 /* want main thread for process - has maps */ 1960 thread = machine__findnew_thread(machine, sample->pid, sample->pid); 1961 if (thread == NULL) { 1962 pr_debug("Failed to get thread for pid %d.\n", sample->pid); 1963 return false; 1964 } 1965 1966 if (!symbol_conf.use_callchain || sample->callchain == NULL) 1967 return false; 1968 1969 if (thread__resolve_callchain(thread, cursor, evsel, sample, 1970 NULL, NULL, sched->max_stack + 2) != 0) { 1971 if (verbose) 1972 error("Failed to resolve callchain. Skipping\n"); 1973 1974 return false; 1975 } 1976 1977 callchain_cursor_commit(cursor); 1978 1979 while (true) { 1980 struct callchain_cursor_node *node; 1981 struct symbol *sym; 1982 1983 node = callchain_cursor_current(cursor); 1984 if (node == NULL) 1985 break; 1986 1987 sym = node->sym; 1988 if (sym && sym->name) { 1989 if (!strcmp(sym->name, "schedule") || 1990 !strcmp(sym->name, "__schedule") || 1991 !strcmp(sym->name, "preempt_schedule")) 1992 sym->ignore = 1; 1993 } 1994 1995 callchain_cursor_advance(cursor); 1996 } 1997 1998 return false; 1999 } 2000 2001 /* 2002 * Track idle stats per cpu by maintaining a local thread 2003 * struct for the idle task on each cpu. 2004 */ 2005 static int init_idle_threads(int ncpu) 2006 { 2007 int i; 2008 2009 idle_threads = zalloc(ncpu * sizeof(struct thread *)); 2010 if (!idle_threads) 2011 return -ENOMEM; 2012 2013 idle_max_cpu = ncpu; 2014 2015 /* allocate the actual thread struct if needed */ 2016 for (i = 0; i < ncpu; ++i) { 2017 idle_threads[i] = thread__new(0, 0); 2018 if (idle_threads[i] == NULL) 2019 return -ENOMEM; 2020 2021 thread__set_comm(idle_threads[i], idle_comm, 0); 2022 } 2023 2024 return 0; 2025 } 2026 2027 static void free_idle_threads(void) 2028 { 2029 int i; 2030 2031 if (idle_threads == NULL) 2032 return; 2033 2034 for (i = 0; i < idle_max_cpu; ++i) { 2035 if ((idle_threads[i])) 2036 thread__delete(idle_threads[i]); 2037 } 2038 2039 free(idle_threads); 2040 } 2041 2042 static struct thread *get_idle_thread(int cpu) 2043 { 2044 /* 2045 * expand/allocate array of pointers to local thread 2046 * structs if needed 2047 */ 2048 if ((cpu >= idle_max_cpu) || (idle_threads == NULL)) { 2049 int i, j = __roundup_pow_of_two(cpu+1); 2050 void *p; 2051 2052 p = realloc(idle_threads, j * sizeof(struct thread *)); 2053 if (!p) 2054 return NULL; 2055 2056 idle_threads = (struct thread **) p; 2057 for (i = idle_max_cpu; i < j; ++i) 2058 idle_threads[i] = NULL; 2059 2060 idle_max_cpu = j; 2061 } 2062 2063 /* allocate a new thread struct if needed */ 2064 if (idle_threads[cpu] == NULL) { 2065 idle_threads[cpu] = thread__new(0, 0); 2066 2067 if (idle_threads[cpu]) { 2068 idle_threads[cpu]->tid = 0; 2069 thread__set_comm(idle_threads[cpu], idle_comm, 0); 2070 } 2071 } 2072 2073 return idle_threads[cpu]; 2074 } 2075 2076 /* 2077 * handle runtime stats saved per thread 2078 */ 2079 static struct thread_runtime *thread__init_runtime(struct thread *thread) 2080 { 2081 struct thread_runtime *r; 2082 2083 r = zalloc(sizeof(struct thread_runtime)); 2084 if (!r) 2085 return NULL; 2086 2087 init_stats(&r->run_stats); 2088 thread__set_priv(thread, r); 2089 2090 return r; 2091 } 2092 2093 static struct thread_runtime *thread__get_runtime(struct thread *thread) 2094 { 2095 struct thread_runtime *tr; 2096 2097 tr = thread__priv(thread); 2098 if (tr == NULL) { 2099 tr = thread__init_runtime(thread); 2100 if (tr == NULL) 2101 pr_debug("Failed to malloc memory for runtime data.\n"); 2102 } 2103 2104 return tr; 2105 } 2106 2107 static struct thread *timehist_get_thread(struct perf_sched *sched, 2108 struct perf_sample *sample, 2109 struct machine *machine, 2110 struct perf_evsel *evsel) 2111 { 2112 struct thread *thread; 2113 2114 if (is_idle_sample(sched, sample, evsel, machine)) { 2115 thread = get_idle_thread(sample->cpu); 2116 if (thread == NULL) 2117 pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu); 2118 2119 } else { 2120 /* there were samples with tid 0 but non-zero pid */ 2121 thread = machine__findnew_thread(machine, sample->pid, 2122 sample->tid ?: sample->pid); 2123 if (thread == NULL) { 2124 pr_debug("Failed to get thread for tid %d. skipping sample.\n", 2125 sample->tid); 2126 } 2127 } 2128 2129 return thread; 2130 } 2131 2132 static bool timehist_skip_sample(struct perf_sched *sched, 2133 struct thread *thread) 2134 { 2135 bool rc = false; 2136 2137 if (thread__is_filtered(thread)) { 2138 rc = true; 2139 sched->skipped_samples++; 2140 } 2141 2142 return rc; 2143 } 2144 2145 static void timehist_print_wakeup_event(struct perf_sched *sched, 2146 struct perf_sample *sample, 2147 struct machine *machine, 2148 struct thread *awakened) 2149 { 2150 struct thread *thread; 2151 char tstr[64]; 2152 2153 thread = machine__findnew_thread(machine, sample->pid, sample->tid); 2154 if (thread == NULL) 2155 return; 2156 2157 /* show wakeup unless both awakee and awaker are filtered */ 2158 if (timehist_skip_sample(sched, thread) && 2159 timehist_skip_sample(sched, awakened)) { 2160 return; 2161 } 2162 2163 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); 2164 printf("%15s [%04d] ", tstr, sample->cpu); 2165 if (sched->show_cpu_visual) 2166 printf(" %*s ", sched->max_cpu + 1, ""); 2167 2168 printf(" %-*s ", comm_width, timehist_get_commstr(thread)); 2169 2170 /* dt spacer */ 2171 printf(" %9s %9s %9s ", "", "", ""); 2172 2173 printf("awakened: %s", timehist_get_commstr(awakened)); 2174 2175 printf("\n"); 2176 } 2177 2178 static int timehist_sched_wakeup_event(struct perf_tool *tool, 2179 union perf_event *event __maybe_unused, 2180 struct perf_evsel *evsel, 2181 struct perf_sample *sample, 2182 struct machine *machine) 2183 { 2184 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 2185 struct thread *thread; 2186 struct thread_runtime *tr = NULL; 2187 /* want pid of awakened task not pid in sample */ 2188 const u32 pid = perf_evsel__intval(evsel, sample, "pid"); 2189 2190 thread = machine__findnew_thread(machine, 0, pid); 2191 if (thread == NULL) 2192 return -1; 2193 2194 tr = thread__get_runtime(thread); 2195 if (tr == NULL) 2196 return -1; 2197 2198 if (tr->ready_to_run == 0) 2199 tr->ready_to_run = sample->time; 2200 2201 /* show wakeups if requested */ 2202 if (sched->show_wakeups && 2203 !perf_time__skip_sample(&sched->ptime, sample->time)) 2204 timehist_print_wakeup_event(sched, sample, machine, thread); 2205 2206 return 0; 2207 } 2208 2209 static void timehist_print_migration_event(struct perf_sched *sched, 2210 struct perf_evsel *evsel, 2211 struct perf_sample *sample, 2212 struct machine *machine, 2213 struct thread *migrated) 2214 { 2215 struct thread *thread; 2216 char tstr[64]; 2217 u32 max_cpus = sched->max_cpu + 1; 2218 u32 ocpu, dcpu; 2219 2220 if (sched->summary_only) 2221 return; 2222 2223 max_cpus = sched->max_cpu + 1; 2224 ocpu = perf_evsel__intval(evsel, sample, "orig_cpu"); 2225 dcpu = perf_evsel__intval(evsel, sample, "dest_cpu"); 2226 2227 thread = machine__findnew_thread(machine, sample->pid, sample->tid); 2228 if (thread == NULL) 2229 return; 2230 2231 if (timehist_skip_sample(sched, thread) && 2232 timehist_skip_sample(sched, migrated)) { 2233 return; 2234 } 2235 2236 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); 2237 printf("%15s [%04d] ", tstr, sample->cpu); 2238 2239 if (sched->show_cpu_visual) { 2240 u32 i; 2241 char c; 2242 2243 printf(" "); 2244 for (i = 0; i < max_cpus; ++i) { 2245 c = (i == sample->cpu) ? 'm' : ' '; 2246 printf("%c", c); 2247 } 2248 printf(" "); 2249 } 2250 2251 printf(" %-*s ", comm_width, timehist_get_commstr(thread)); 2252 2253 /* dt spacer */ 2254 printf(" %9s %9s %9s ", "", "", ""); 2255 2256 printf("migrated: %s", timehist_get_commstr(migrated)); 2257 printf(" cpu %d => %d", ocpu, dcpu); 2258 2259 printf("\n"); 2260 } 2261 2262 static int timehist_migrate_task_event(struct perf_tool *tool, 2263 union perf_event *event __maybe_unused, 2264 struct perf_evsel *evsel, 2265 struct perf_sample *sample, 2266 struct machine *machine) 2267 { 2268 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 2269 struct thread *thread; 2270 struct thread_runtime *tr = NULL; 2271 /* want pid of migrated task not pid in sample */ 2272 const u32 pid = perf_evsel__intval(evsel, sample, "pid"); 2273 2274 thread = machine__findnew_thread(machine, 0, pid); 2275 if (thread == NULL) 2276 return -1; 2277 2278 tr = thread__get_runtime(thread); 2279 if (tr == NULL) 2280 return -1; 2281 2282 tr->migrations++; 2283 2284 /* show migrations if requested */ 2285 timehist_print_migration_event(sched, evsel, sample, machine, thread); 2286 2287 return 0; 2288 } 2289 2290 static int timehist_sched_change_event(struct perf_tool *tool, 2291 union perf_event *event, 2292 struct perf_evsel *evsel, 2293 struct perf_sample *sample, 2294 struct machine *machine) 2295 { 2296 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 2297 struct perf_time_interval *ptime = &sched->ptime; 2298 struct addr_location al; 2299 struct thread *thread; 2300 struct thread_runtime *tr = NULL; 2301 u64 tprev, t = sample->time; 2302 int rc = 0; 2303 2304 if (machine__resolve(machine, &al, sample) < 0) { 2305 pr_err("problem processing %d event. skipping it\n", 2306 event->header.type); 2307 rc = -1; 2308 goto out; 2309 } 2310 2311 thread = timehist_get_thread(sched, sample, machine, evsel); 2312 if (thread == NULL) { 2313 rc = -1; 2314 goto out; 2315 } 2316 2317 if (timehist_skip_sample(sched, thread)) 2318 goto out; 2319 2320 tr = thread__get_runtime(thread); 2321 if (tr == NULL) { 2322 rc = -1; 2323 goto out; 2324 } 2325 2326 tprev = perf_evsel__get_time(evsel, sample->cpu); 2327 2328 /* 2329 * If start time given: 2330 * - sample time is under window user cares about - skip sample 2331 * - tprev is under window user cares about - reset to start of window 2332 */ 2333 if (ptime->start && ptime->start > t) 2334 goto out; 2335 2336 if (ptime->start > tprev) 2337 tprev = ptime->start; 2338 2339 /* 2340 * If end time given: 2341 * - previous sched event is out of window - we are done 2342 * - sample time is beyond window user cares about - reset it 2343 * to close out stats for time window interest 2344 */ 2345 if (ptime->end) { 2346 if (tprev > ptime->end) 2347 goto out; 2348 2349 if (t > ptime->end) 2350 t = ptime->end; 2351 } 2352 2353 timehist_update_runtime_stats(tr, t, tprev); 2354 2355 if (!sched->summary_only) 2356 timehist_print_sample(sched, sample, &al, thread, t); 2357 2358 out: 2359 if (tr) { 2360 /* time of this sched_switch event becomes last time task seen */ 2361 tr->last_time = sample->time; 2362 2363 /* sched out event for task so reset ready to run time */ 2364 tr->ready_to_run = 0; 2365 } 2366 2367 perf_evsel__save_time(evsel, sample->time, sample->cpu); 2368 2369 return rc; 2370 } 2371 2372 static int timehist_sched_switch_event(struct perf_tool *tool, 2373 union perf_event *event, 2374 struct perf_evsel *evsel, 2375 struct perf_sample *sample, 2376 struct machine *machine __maybe_unused) 2377 { 2378 return timehist_sched_change_event(tool, event, evsel, sample, machine); 2379 } 2380 2381 static int process_lost(struct perf_tool *tool __maybe_unused, 2382 union perf_event *event, 2383 struct perf_sample *sample, 2384 struct machine *machine __maybe_unused) 2385 { 2386 char tstr[64]; 2387 2388 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); 2389 printf("%15s ", tstr); 2390 printf("lost %" PRIu64 " events on cpu %d\n", event->lost.lost, sample->cpu); 2391 2392 return 0; 2393 } 2394 2395 2396 static void print_thread_runtime(struct thread *t, 2397 struct thread_runtime *r) 2398 { 2399 double mean = avg_stats(&r->run_stats); 2400 float stddev; 2401 2402 printf("%*s %5d %9" PRIu64 " ", 2403 comm_width, timehist_get_commstr(t), t->ppid, 2404 (u64) r->run_stats.n); 2405 2406 print_sched_time(r->total_run_time, 8); 2407 stddev = rel_stddev_stats(stddev_stats(&r->run_stats), mean); 2408 print_sched_time(r->run_stats.min, 6); 2409 printf(" "); 2410 print_sched_time((u64) mean, 6); 2411 printf(" "); 2412 print_sched_time(r->run_stats.max, 6); 2413 printf(" "); 2414 printf("%5.2f", stddev); 2415 printf(" %5" PRIu64, r->migrations); 2416 printf("\n"); 2417 } 2418 2419 struct total_run_stats { 2420 u64 sched_count; 2421 u64 task_count; 2422 u64 total_run_time; 2423 }; 2424 2425 static int __show_thread_runtime(struct thread *t, void *priv) 2426 { 2427 struct total_run_stats *stats = priv; 2428 struct thread_runtime *r; 2429 2430 if (thread__is_filtered(t)) 2431 return 0; 2432 2433 r = thread__priv(t); 2434 if (r && r->run_stats.n) { 2435 stats->task_count++; 2436 stats->sched_count += r->run_stats.n; 2437 stats->total_run_time += r->total_run_time; 2438 print_thread_runtime(t, r); 2439 } 2440 2441 return 0; 2442 } 2443 2444 static int show_thread_runtime(struct thread *t, void *priv) 2445 { 2446 if (t->dead) 2447 return 0; 2448 2449 return __show_thread_runtime(t, priv); 2450 } 2451 2452 static int show_deadthread_runtime(struct thread *t, void *priv) 2453 { 2454 if (!t->dead) 2455 return 0; 2456 2457 return __show_thread_runtime(t, priv); 2458 } 2459 2460 static void timehist_print_summary(struct perf_sched *sched, 2461 struct perf_session *session) 2462 { 2463 struct machine *m = &session->machines.host; 2464 struct total_run_stats totals; 2465 u64 task_count; 2466 struct thread *t; 2467 struct thread_runtime *r; 2468 int i; 2469 2470 memset(&totals, 0, sizeof(totals)); 2471 2472 if (comm_width < 30) 2473 comm_width = 30; 2474 2475 printf("\nRuntime summary\n"); 2476 printf("%*s parent sched-in ", comm_width, "comm"); 2477 printf(" run-time min-run avg-run max-run stddev migrations\n"); 2478 printf("%*s (count) ", comm_width, ""); 2479 printf(" (msec) (msec) (msec) (msec) %%\n"); 2480 printf("%.117s\n", graph_dotted_line); 2481 2482 machine__for_each_thread(m, show_thread_runtime, &totals); 2483 task_count = totals.task_count; 2484 if (!task_count) 2485 printf("<no still running tasks>\n"); 2486 2487 printf("\nTerminated tasks:\n"); 2488 machine__for_each_thread(m, show_deadthread_runtime, &totals); 2489 if (task_count == totals.task_count) 2490 printf("<no terminated tasks>\n"); 2491 2492 /* CPU idle stats not tracked when samples were skipped */ 2493 if (sched->skipped_samples) 2494 return; 2495 2496 printf("\nIdle stats:\n"); 2497 for (i = 0; i < idle_max_cpu; ++i) { 2498 t = idle_threads[i]; 2499 if (!t) 2500 continue; 2501 2502 r = thread__priv(t); 2503 if (r && r->run_stats.n) { 2504 totals.sched_count += r->run_stats.n; 2505 printf(" CPU %2d idle for ", i); 2506 print_sched_time(r->total_run_time, 6); 2507 printf(" msec\n"); 2508 } else 2509 printf(" CPU %2d idle entire time window\n", i); 2510 } 2511 2512 printf("\n" 2513 " Total number of unique tasks: %" PRIu64 "\n" 2514 "Total number of context switches: %" PRIu64 "\n" 2515 " Total run time (msec): ", 2516 totals.task_count, totals.sched_count); 2517 2518 print_sched_time(totals.total_run_time, 2); 2519 printf("\n"); 2520 } 2521 2522 typedef int (*sched_handler)(struct perf_tool *tool, 2523 union perf_event *event, 2524 struct perf_evsel *evsel, 2525 struct perf_sample *sample, 2526 struct machine *machine); 2527 2528 static int perf_timehist__process_sample(struct perf_tool *tool, 2529 union perf_event *event, 2530 struct perf_sample *sample, 2531 struct perf_evsel *evsel, 2532 struct machine *machine) 2533 { 2534 struct perf_sched *sched = container_of(tool, struct perf_sched, tool); 2535 int err = 0; 2536 int this_cpu = sample->cpu; 2537 2538 if (this_cpu > sched->max_cpu) 2539 sched->max_cpu = this_cpu; 2540 2541 if (evsel->handler != NULL) { 2542 sched_handler f = evsel->handler; 2543 2544 err = f(tool, event, evsel, sample, machine); 2545 } 2546 2547 return err; 2548 } 2549 2550 static int timehist_check_attr(struct perf_sched *sched, 2551 struct perf_evlist *evlist) 2552 { 2553 struct perf_evsel *evsel; 2554 struct evsel_runtime *er; 2555 2556 list_for_each_entry(evsel, &evlist->entries, node) { 2557 er = perf_evsel__get_runtime(evsel); 2558 if (er == NULL) { 2559 pr_err("Failed to allocate memory for evsel runtime data\n"); 2560 return -1; 2561 } 2562 2563 if (sched->show_callchain && 2564 !(evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN)) { 2565 pr_info("Samples do not have callchains.\n"); 2566 sched->show_callchain = 0; 2567 symbol_conf.use_callchain = 0; 2568 } 2569 } 2570 2571 return 0; 2572 } 2573 2574 static int perf_sched__timehist(struct perf_sched *sched) 2575 { 2576 const struct perf_evsel_str_handler handlers[] = { 2577 { "sched:sched_switch", timehist_sched_switch_event, }, 2578 { "sched:sched_wakeup", timehist_sched_wakeup_event, }, 2579 { "sched:sched_wakeup_new", timehist_sched_wakeup_event, }, 2580 }; 2581 const struct perf_evsel_str_handler migrate_handlers[] = { 2582 { "sched:sched_migrate_task", timehist_migrate_task_event, }, 2583 }; 2584 struct perf_data_file file = { 2585 .path = input_name, 2586 .mode = PERF_DATA_MODE_READ, 2587 .force = sched->force, 2588 }; 2589 2590 struct perf_session *session; 2591 struct perf_evlist *evlist; 2592 int err = -1; 2593 2594 /* 2595 * event handlers for timehist option 2596 */ 2597 sched->tool.sample = perf_timehist__process_sample; 2598 sched->tool.mmap = perf_event__process_mmap; 2599 sched->tool.comm = perf_event__process_comm; 2600 sched->tool.exit = perf_event__process_exit; 2601 sched->tool.fork = perf_event__process_fork; 2602 sched->tool.lost = process_lost; 2603 sched->tool.attr = perf_event__process_attr; 2604 sched->tool.tracing_data = perf_event__process_tracing_data; 2605 sched->tool.build_id = perf_event__process_build_id; 2606 2607 sched->tool.ordered_events = true; 2608 sched->tool.ordering_requires_timestamps = true; 2609 2610 symbol_conf.use_callchain = sched->show_callchain; 2611 2612 session = perf_session__new(&file, false, &sched->tool); 2613 if (session == NULL) 2614 return -ENOMEM; 2615 2616 evlist = session->evlist; 2617 2618 symbol__init(&session->header.env); 2619 2620 if (perf_time__parse_str(&sched->ptime, sched->time_str) != 0) { 2621 pr_err("Invalid time string\n"); 2622 return -EINVAL; 2623 } 2624 2625 if (timehist_check_attr(sched, evlist) != 0) 2626 goto out; 2627 2628 setup_pager(); 2629 2630 /* setup per-evsel handlers */ 2631 if (perf_session__set_tracepoints_handlers(session, handlers)) 2632 goto out; 2633 2634 /* sched_switch event at a minimum needs to exist */ 2635 if (!perf_evlist__find_tracepoint_by_name(session->evlist, 2636 "sched:sched_switch")) { 2637 pr_err("No sched_switch events found. Have you run 'perf sched record'?\n"); 2638 goto out; 2639 } 2640 2641 if (sched->show_migrations && 2642 perf_session__set_tracepoints_handlers(session, migrate_handlers)) 2643 goto out; 2644 2645 /* pre-allocate struct for per-CPU idle stats */ 2646 sched->max_cpu = session->header.env.nr_cpus_online; 2647 if (sched->max_cpu == 0) 2648 sched->max_cpu = 4; 2649 if (init_idle_threads(sched->max_cpu)) 2650 goto out; 2651 2652 /* summary_only implies summary option, but don't overwrite summary if set */ 2653 if (sched->summary_only) 2654 sched->summary = sched->summary_only; 2655 2656 if (!sched->summary_only) 2657 timehist_header(sched); 2658 2659 err = perf_session__process_events(session); 2660 if (err) { 2661 pr_err("Failed to process events, error %d", err); 2662 goto out; 2663 } 2664 2665 sched->nr_events = evlist->stats.nr_events[0]; 2666 sched->nr_lost_events = evlist->stats.total_lost; 2667 sched->nr_lost_chunks = evlist->stats.nr_events[PERF_RECORD_LOST]; 2668 2669 if (sched->summary) 2670 timehist_print_summary(sched, session); 2671 2672 out: 2673 free_idle_threads(); 2674 perf_session__delete(session); 2675 2676 return err; 2677 } 2678 2679 2680 static void print_bad_events(struct perf_sched *sched) 2681 { 2682 if (sched->nr_unordered_timestamps && sched->nr_timestamps) { 2683 printf(" INFO: %.3f%% unordered timestamps (%ld out of %ld)\n", 2684 (double)sched->nr_unordered_timestamps/(double)sched->nr_timestamps*100.0, 2685 sched->nr_unordered_timestamps, sched->nr_timestamps); 2686 } 2687 if (sched->nr_lost_events && sched->nr_events) { 2688 printf(" INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n", 2689 (double)sched->nr_lost_events/(double)sched->nr_events * 100.0, 2690 sched->nr_lost_events, sched->nr_events, sched->nr_lost_chunks); 2691 } 2692 if (sched->nr_context_switch_bugs && sched->nr_timestamps) { 2693 printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)", 2694 (double)sched->nr_context_switch_bugs/(double)sched->nr_timestamps*100.0, 2695 sched->nr_context_switch_bugs, sched->nr_timestamps); 2696 if (sched->nr_lost_events) 2697 printf(" (due to lost events?)"); 2698 printf("\n"); 2699 } 2700 } 2701 2702 static void __merge_work_atoms(struct rb_root *root, struct work_atoms *data) 2703 { 2704 struct rb_node **new = &(root->rb_node), *parent = NULL; 2705 struct work_atoms *this; 2706 const char *comm = thread__comm_str(data->thread), *this_comm; 2707 2708 while (*new) { 2709 int cmp; 2710 2711 this = container_of(*new, struct work_atoms, node); 2712 parent = *new; 2713 2714 this_comm = thread__comm_str(this->thread); 2715 cmp = strcmp(comm, this_comm); 2716 if (cmp > 0) { 2717 new = &((*new)->rb_left); 2718 } else if (cmp < 0) { 2719 new = &((*new)->rb_right); 2720 } else { 2721 this->num_merged++; 2722 this->total_runtime += data->total_runtime; 2723 this->nb_atoms += data->nb_atoms; 2724 this->total_lat += data->total_lat; 2725 list_splice(&data->work_list, &this->work_list); 2726 if (this->max_lat < data->max_lat) { 2727 this->max_lat = data->max_lat; 2728 this->max_lat_at = data->max_lat_at; 2729 } 2730 zfree(&data); 2731 return; 2732 } 2733 } 2734 2735 data->num_merged++; 2736 rb_link_node(&data->node, parent, new); 2737 rb_insert_color(&data->node, root); 2738 } 2739 2740 static void perf_sched__merge_lat(struct perf_sched *sched) 2741 { 2742 struct work_atoms *data; 2743 struct rb_node *node; 2744 2745 if (sched->skip_merge) 2746 return; 2747 2748 while ((node = rb_first(&sched->atom_root))) { 2749 rb_erase(node, &sched->atom_root); 2750 data = rb_entry(node, struct work_atoms, node); 2751 __merge_work_atoms(&sched->merged_atom_root, data); 2752 } 2753 } 2754 2755 static int perf_sched__lat(struct perf_sched *sched) 2756 { 2757 struct rb_node *next; 2758 2759 setup_pager(); 2760 2761 if (perf_sched__read_events(sched)) 2762 return -1; 2763 2764 perf_sched__merge_lat(sched); 2765 perf_sched__sort_lat(sched); 2766 2767 printf("\n -----------------------------------------------------------------------------------------------------------------\n"); 2768 printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n"); 2769 printf(" -----------------------------------------------------------------------------------------------------------------\n"); 2770 2771 next = rb_first(&sched->sorted_atom_root); 2772 2773 while (next) { 2774 struct work_atoms *work_list; 2775 2776 work_list = rb_entry(next, struct work_atoms, node); 2777 output_lat_thread(sched, work_list); 2778 next = rb_next(next); 2779 thread__zput(work_list->thread); 2780 } 2781 2782 printf(" -----------------------------------------------------------------------------------------------------------------\n"); 2783 printf(" TOTAL: |%11.3f ms |%9" PRIu64 " |\n", 2784 (double)sched->all_runtime / NSEC_PER_MSEC, sched->all_count); 2785 2786 printf(" ---------------------------------------------------\n"); 2787 2788 print_bad_events(sched); 2789 printf("\n"); 2790 2791 return 0; 2792 } 2793 2794 static int setup_map_cpus(struct perf_sched *sched) 2795 { 2796 struct cpu_map *map; 2797 2798 sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF); 2799 2800 if (sched->map.comp) { 2801 sched->map.comp_cpus = zalloc(sched->max_cpu * sizeof(int)); 2802 if (!sched->map.comp_cpus) 2803 return -1; 2804 } 2805 2806 if (!sched->map.cpus_str) 2807 return 0; 2808 2809 map = cpu_map__new(sched->map.cpus_str); 2810 if (!map) { 2811 pr_err("failed to get cpus map from %s\n", sched->map.cpus_str); 2812 return -1; 2813 } 2814 2815 sched->map.cpus = map; 2816 return 0; 2817 } 2818 2819 static int setup_color_pids(struct perf_sched *sched) 2820 { 2821 struct thread_map *map; 2822 2823 if (!sched->map.color_pids_str) 2824 return 0; 2825 2826 map = thread_map__new_by_tid_str(sched->map.color_pids_str); 2827 if (!map) { 2828 pr_err("failed to get thread map from %s\n", sched->map.color_pids_str); 2829 return -1; 2830 } 2831 2832 sched->map.color_pids = map; 2833 return 0; 2834 } 2835 2836 static int setup_color_cpus(struct perf_sched *sched) 2837 { 2838 struct cpu_map *map; 2839 2840 if (!sched->map.color_cpus_str) 2841 return 0; 2842 2843 map = cpu_map__new(sched->map.color_cpus_str); 2844 if (!map) { 2845 pr_err("failed to get thread map from %s\n", sched->map.color_cpus_str); 2846 return -1; 2847 } 2848 2849 sched->map.color_cpus = map; 2850 return 0; 2851 } 2852 2853 static int perf_sched__map(struct perf_sched *sched) 2854 { 2855 if (setup_map_cpus(sched)) 2856 return -1; 2857 2858 if (setup_color_pids(sched)) 2859 return -1; 2860 2861 if (setup_color_cpus(sched)) 2862 return -1; 2863 2864 setup_pager(); 2865 if (perf_sched__read_events(sched)) 2866 return -1; 2867 print_bad_events(sched); 2868 return 0; 2869 } 2870 2871 static int perf_sched__replay(struct perf_sched *sched) 2872 { 2873 unsigned long i; 2874 2875 calibrate_run_measurement_overhead(sched); 2876 calibrate_sleep_measurement_overhead(sched); 2877 2878 test_calibrations(sched); 2879 2880 if (perf_sched__read_events(sched)) 2881 return -1; 2882 2883 printf("nr_run_events: %ld\n", sched->nr_run_events); 2884 printf("nr_sleep_events: %ld\n", sched->nr_sleep_events); 2885 printf("nr_wakeup_events: %ld\n", sched->nr_wakeup_events); 2886 2887 if (sched->targetless_wakeups) 2888 printf("target-less wakeups: %ld\n", sched->targetless_wakeups); 2889 if (sched->multitarget_wakeups) 2890 printf("multi-target wakeups: %ld\n", sched->multitarget_wakeups); 2891 if (sched->nr_run_events_optimized) 2892 printf("run atoms optimized: %ld\n", 2893 sched->nr_run_events_optimized); 2894 2895 print_task_traces(sched); 2896 add_cross_task_wakeups(sched); 2897 2898 create_tasks(sched); 2899 printf("------------------------------------------------------------\n"); 2900 for (i = 0; i < sched->replay_repeat; i++) 2901 run_one_test(sched); 2902 2903 return 0; 2904 } 2905 2906 static void setup_sorting(struct perf_sched *sched, const struct option *options, 2907 const char * const usage_msg[]) 2908 { 2909 char *tmp, *tok, *str = strdup(sched->sort_order); 2910 2911 for (tok = strtok_r(str, ", ", &tmp); 2912 tok; tok = strtok_r(NULL, ", ", &tmp)) { 2913 if (sort_dimension__add(tok, &sched->sort_list) < 0) { 2914 usage_with_options_msg(usage_msg, options, 2915 "Unknown --sort key: `%s'", tok); 2916 } 2917 } 2918 2919 free(str); 2920 2921 sort_dimension__add("pid", &sched->cmp_pid); 2922 } 2923 2924 static int __cmd_record(int argc, const char **argv) 2925 { 2926 unsigned int rec_argc, i, j; 2927 const char **rec_argv; 2928 const char * const record_args[] = { 2929 "record", 2930 "-a", 2931 "-R", 2932 "-m", "1024", 2933 "-c", "1", 2934 "-e", "sched:sched_switch", 2935 "-e", "sched:sched_stat_wait", 2936 "-e", "sched:sched_stat_sleep", 2937 "-e", "sched:sched_stat_iowait", 2938 "-e", "sched:sched_stat_runtime", 2939 "-e", "sched:sched_process_fork", 2940 "-e", "sched:sched_wakeup", 2941 "-e", "sched:sched_wakeup_new", 2942 "-e", "sched:sched_migrate_task", 2943 }; 2944 2945 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 2946 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 2947 2948 if (rec_argv == NULL) 2949 return -ENOMEM; 2950 2951 for (i = 0; i < ARRAY_SIZE(record_args); i++) 2952 rec_argv[i] = strdup(record_args[i]); 2953 2954 for (j = 1; j < (unsigned int)argc; j++, i++) 2955 rec_argv[i] = argv[j]; 2956 2957 BUG_ON(i != rec_argc); 2958 2959 return cmd_record(i, rec_argv, NULL); 2960 } 2961 2962 int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) 2963 { 2964 const char default_sort_order[] = "avg, max, switch, runtime"; 2965 struct perf_sched sched = { 2966 .tool = { 2967 .sample = perf_sched__process_tracepoint_sample, 2968 .comm = perf_event__process_comm, 2969 .lost = perf_event__process_lost, 2970 .fork = perf_sched__process_fork_event, 2971 .ordered_events = true, 2972 }, 2973 .cmp_pid = LIST_HEAD_INIT(sched.cmp_pid), 2974 .sort_list = LIST_HEAD_INIT(sched.sort_list), 2975 .start_work_mutex = PTHREAD_MUTEX_INITIALIZER, 2976 .work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER, 2977 .sort_order = default_sort_order, 2978 .replay_repeat = 10, 2979 .profile_cpu = -1, 2980 .next_shortname1 = 'A', 2981 .next_shortname2 = '0', 2982 .skip_merge = 0, 2983 .show_callchain = 1, 2984 .max_stack = 5, 2985 }; 2986 const struct option sched_options[] = { 2987 OPT_STRING('i', "input", &input_name, "file", 2988 "input file name"), 2989 OPT_INCR('v', "verbose", &verbose, 2990 "be more verbose (show symbol address, etc)"), 2991 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 2992 "dump raw trace in ASCII"), 2993 OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"), 2994 OPT_END() 2995 }; 2996 const struct option latency_options[] = { 2997 OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]", 2998 "sort by key(s): runtime, switch, avg, max"), 2999 OPT_INTEGER('C', "CPU", &sched.profile_cpu, 3000 "CPU to profile on"), 3001 OPT_BOOLEAN('p', "pids", &sched.skip_merge, 3002 "latency stats per pid instead of per comm"), 3003 OPT_PARENT(sched_options) 3004 }; 3005 const struct option replay_options[] = { 3006 OPT_UINTEGER('r', "repeat", &sched.replay_repeat, 3007 "repeat the workload replay N times (-1: infinite)"), 3008 OPT_PARENT(sched_options) 3009 }; 3010 const struct option map_options[] = { 3011 OPT_BOOLEAN(0, "compact", &sched.map.comp, 3012 "map output in compact mode"), 3013 OPT_STRING(0, "color-pids", &sched.map.color_pids_str, "pids", 3014 "highlight given pids in map"), 3015 OPT_STRING(0, "color-cpus", &sched.map.color_cpus_str, "cpus", 3016 "highlight given CPUs in map"), 3017 OPT_STRING(0, "cpus", &sched.map.cpus_str, "cpus", 3018 "display given CPUs in map"), 3019 OPT_PARENT(sched_options) 3020 }; 3021 const struct option timehist_options[] = { 3022 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 3023 "file", "vmlinux pathname"), 3024 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, 3025 "file", "kallsyms pathname"), 3026 OPT_BOOLEAN('g', "call-graph", &sched.show_callchain, 3027 "Display call chains if present (default on)"), 3028 OPT_UINTEGER(0, "max-stack", &sched.max_stack, 3029 "Maximum number of functions to display backtrace."), 3030 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", 3031 "Look for files with symbols relative to this directory"), 3032 OPT_BOOLEAN('s', "summary", &sched.summary_only, 3033 "Show only syscall summary with statistics"), 3034 OPT_BOOLEAN('S', "with-summary", &sched.summary, 3035 "Show all syscalls and summary with statistics"), 3036 OPT_BOOLEAN('w', "wakeups", &sched.show_wakeups, "Show wakeup events"), 3037 OPT_BOOLEAN('M', "migrations", &sched.show_migrations, "Show migration events"), 3038 OPT_BOOLEAN('V', "cpu-visual", &sched.show_cpu_visual, "Add CPU visual"), 3039 OPT_STRING(0, "time", &sched.time_str, "str", 3040 "Time span for analysis (start,stop)"), 3041 OPT_PARENT(sched_options) 3042 }; 3043 3044 const char * const latency_usage[] = { 3045 "perf sched latency [<options>]", 3046 NULL 3047 }; 3048 const char * const replay_usage[] = { 3049 "perf sched replay [<options>]", 3050 NULL 3051 }; 3052 const char * const map_usage[] = { 3053 "perf sched map [<options>]", 3054 NULL 3055 }; 3056 const char * const timehist_usage[] = { 3057 "perf sched timehist [<options>]", 3058 NULL 3059 }; 3060 const char *const sched_subcommands[] = { "record", "latency", "map", 3061 "replay", "script", 3062 "timehist", NULL }; 3063 const char *sched_usage[] = { 3064 NULL, 3065 NULL 3066 }; 3067 struct trace_sched_handler lat_ops = { 3068 .wakeup_event = latency_wakeup_event, 3069 .switch_event = latency_switch_event, 3070 .runtime_event = latency_runtime_event, 3071 .migrate_task_event = latency_migrate_task_event, 3072 }; 3073 struct trace_sched_handler map_ops = { 3074 .switch_event = map_switch_event, 3075 }; 3076 struct trace_sched_handler replay_ops = { 3077 .wakeup_event = replay_wakeup_event, 3078 .switch_event = replay_switch_event, 3079 .fork_event = replay_fork_event, 3080 }; 3081 unsigned int i; 3082 3083 for (i = 0; i < ARRAY_SIZE(sched.curr_pid); i++) 3084 sched.curr_pid[i] = -1; 3085 3086 argc = parse_options_subcommand(argc, argv, sched_options, sched_subcommands, 3087 sched_usage, PARSE_OPT_STOP_AT_NON_OPTION); 3088 if (!argc) 3089 usage_with_options(sched_usage, sched_options); 3090 3091 /* 3092 * Aliased to 'perf script' for now: 3093 */ 3094 if (!strcmp(argv[0], "script")) 3095 return cmd_script(argc, argv, prefix); 3096 3097 if (!strncmp(argv[0], "rec", 3)) { 3098 return __cmd_record(argc, argv); 3099 } else if (!strncmp(argv[0], "lat", 3)) { 3100 sched.tp_handler = &lat_ops; 3101 if (argc > 1) { 3102 argc = parse_options(argc, argv, latency_options, latency_usage, 0); 3103 if (argc) 3104 usage_with_options(latency_usage, latency_options); 3105 } 3106 setup_sorting(&sched, latency_options, latency_usage); 3107 return perf_sched__lat(&sched); 3108 } else if (!strcmp(argv[0], "map")) { 3109 if (argc) { 3110 argc = parse_options(argc, argv, map_options, map_usage, 0); 3111 if (argc) 3112 usage_with_options(map_usage, map_options); 3113 } 3114 sched.tp_handler = &map_ops; 3115 setup_sorting(&sched, latency_options, latency_usage); 3116 return perf_sched__map(&sched); 3117 } else if (!strncmp(argv[0], "rep", 3)) { 3118 sched.tp_handler = &replay_ops; 3119 if (argc) { 3120 argc = parse_options(argc, argv, replay_options, replay_usage, 0); 3121 if (argc) 3122 usage_with_options(replay_usage, replay_options); 3123 } 3124 return perf_sched__replay(&sched); 3125 } else if (!strcmp(argv[0], "timehist")) { 3126 if (argc) { 3127 argc = parse_options(argc, argv, timehist_options, 3128 timehist_usage, 0); 3129 if (argc) 3130 usage_with_options(timehist_usage, timehist_options); 3131 } 3132 if (sched.show_wakeups && sched.summary_only) { 3133 pr_err(" Error: -s and -w are mutually exclusive.\n"); 3134 parse_options_usage(timehist_usage, timehist_options, "s", true); 3135 parse_options_usage(NULL, timehist_options, "w", true); 3136 return -EINVAL; 3137 } 3138 3139 return perf_sched__timehist(&sched); 3140 } else { 3141 usage_with_options(sched_usage, sched_options); 3142 } 3143 3144 return 0; 3145 } 3146