1 // SPDX-License-Identifier: GPL-2.0
2 #include "builtin.h"
3 #include "perf.h"
4 #include "perf-sys.h"
5
6 #include "util/cpumap.h"
7 #include "util/evlist.h"
8 #include "util/evsel.h"
9 #include "util/evsel_fprintf.h"
10 #include "util/mutex.h"
11 #include "util/symbol.h"
12 #include "util/thread.h"
13 #include "util/header.h"
14 #include "util/session.h"
15 #include "util/tool.h"
16 #include "util/cloexec.h"
17 #include "util/thread_map.h"
18 #include "util/color.h"
19 #include "util/stat.h"
20 #include "util/string2.h"
21 #include "util/callchain.h"
22 #include "util/time-utils.h"
23
24 #include <subcmd/pager.h>
25 #include <subcmd/parse-options.h>
26 #include "util/trace-event.h"
27
28 #include "util/debug.h"
29 #include "util/event.h"
30 #include "util/util.h"
31 #include "util/synthetic-events.h"
32 #include "util/target.h"
33
34 #include <linux/kernel.h>
35 #include <linux/log2.h>
36 #include <linux/zalloc.h>
37 #include <sys/prctl.h>
38 #include <sys/resource.h>
39 #include <inttypes.h>
40
41 #include <errno.h>
42 #include <semaphore.h>
43 #include <pthread.h>
44 #include <math.h>
45 #include <api/fs/fs.h>
46 #include <perf/cpumap.h>
47 #include <linux/time64.h>
48 #include <linux/err.h>
49
50 #include <linux/ctype.h>
51
52 #define PR_SET_NAME 15 /* Set process name */
53 #define MAX_CPUS 4096
54 #define COMM_LEN 20
55 #define SYM_LEN 129
56 #define MAX_PID 1024000
57 #define MAX_PRIO 140
58 #define SEP_LEN 100
59
60 static const char *cpu_list;
61 static struct perf_cpu_map *user_requested_cpus;
62 static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
63
64 struct sched_atom;
65
66 struct task_desc {
67 unsigned long nr;
68 unsigned long pid;
69 char comm[COMM_LEN];
70
71 unsigned long nr_events;
72 unsigned long curr_event;
73 struct sched_atom **atoms;
74
75 pthread_t thread;
76
77 sem_t ready_for_work;
78 sem_t work_done_sem;
79
80 u64 cpu_usage;
81 };
82
83 enum sched_event_type {
84 SCHED_EVENT_RUN,
85 SCHED_EVENT_SLEEP,
86 SCHED_EVENT_WAKEUP,
87 };
88
89 struct sched_atom {
90 enum sched_event_type type;
91 u64 timestamp;
92 u64 duration;
93 unsigned long nr;
94 sem_t *wait_sem;
95 struct task_desc *wakee;
96 };
97
98 enum thread_state {
99 THREAD_SLEEPING = 0,
100 THREAD_WAIT_CPU,
101 THREAD_SCHED_IN,
102 THREAD_IGNORE
103 };
104
105 struct work_atom {
106 struct list_head list;
107 enum thread_state state;
108 u64 sched_out_time;
109 u64 wake_up_time;
110 u64 sched_in_time;
111 u64 runtime;
112 };
113
114 struct work_atoms {
115 struct list_head work_list;
116 struct thread *thread;
117 struct rb_node node;
118 u64 max_lat;
119 u64 max_lat_start;
120 u64 max_lat_end;
121 u64 total_lat;
122 u64 nb_atoms;
123 u64 total_runtime;
124 int num_merged;
125 };
126
127 typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *);
128
129 struct perf_sched;
130
131 struct trace_sched_handler {
132 int (*switch_event)(struct perf_sched *sched, struct evsel *evsel,
133 struct perf_sample *sample, struct machine *machine);
134
135 int (*runtime_event)(struct perf_sched *sched, struct evsel *evsel,
136 struct perf_sample *sample, struct machine *machine);
137
138 int (*wakeup_event)(struct perf_sched *sched, struct evsel *evsel,
139 struct perf_sample *sample, struct machine *machine);
140
141 /* PERF_RECORD_FORK event, not sched_process_fork tracepoint */
142 int (*fork_event)(struct perf_sched *sched, union perf_event *event,
143 struct machine *machine);
144
145 int (*migrate_task_event)(struct perf_sched *sched,
146 struct evsel *evsel,
147 struct perf_sample *sample,
148 struct machine *machine);
149 };
150
151 #define COLOR_PIDS PERF_COLOR_BLUE
152 #define COLOR_CPUS PERF_COLOR_BG_RED
153
154 struct perf_sched_map {
155 DECLARE_BITMAP(comp_cpus_mask, MAX_CPUS);
156 struct perf_cpu *comp_cpus;
157 bool comp;
158 struct perf_thread_map *color_pids;
159 const char *color_pids_str;
160 struct perf_cpu_map *color_cpus;
161 const char *color_cpus_str;
162 const char *task_name;
163 struct strlist *task_names;
164 bool fuzzy;
165 struct perf_cpu_map *cpus;
166 const char *cpus_str;
167 };
168
169 struct perf_sched {
170 struct perf_tool tool;
171 const char *sort_order;
172 unsigned long nr_tasks;
173 struct task_desc **pid_to_task;
174 struct task_desc **tasks;
175 const struct trace_sched_handler *tp_handler;
176 struct mutex start_work_mutex;
177 struct mutex work_done_wait_mutex;
178 int profile_cpu;
179 /*
180 * Track the current task - that way we can know whether there's any
181 * weird events, such as a task being switched away that is not current.
182 */
183 struct perf_cpu max_cpu;
184 u32 *curr_pid;
185 struct thread **curr_thread;
186 struct thread **curr_out_thread;
187 char next_shortname1;
188 char next_shortname2;
189 unsigned int replay_repeat;
190 unsigned long nr_run_events;
191 unsigned long nr_sleep_events;
192 unsigned long nr_wakeup_events;
193 unsigned long nr_sleep_corrections;
194 unsigned long nr_run_events_optimized;
195 unsigned long targetless_wakeups;
196 unsigned long multitarget_wakeups;
197 unsigned long nr_runs;
198 unsigned long nr_timestamps;
199 unsigned long nr_unordered_timestamps;
200 unsigned long nr_context_switch_bugs;
201 unsigned long nr_events;
202 unsigned long nr_lost_chunks;
203 unsigned long nr_lost_events;
204 u64 run_measurement_overhead;
205 u64 sleep_measurement_overhead;
206 u64 start_time;
207 u64 cpu_usage;
208 u64 runavg_cpu_usage;
209 u64 parent_cpu_usage;
210 u64 runavg_parent_cpu_usage;
211 u64 sum_runtime;
212 u64 sum_fluct;
213 u64 run_avg;
214 u64 all_runtime;
215 u64 all_count;
216 u64 *cpu_last_switched;
217 struct rb_root_cached atom_root, sorted_atom_root, merged_atom_root;
218 struct list_head sort_list, cmp_pid;
219 bool force;
220 bool skip_merge;
221 struct perf_sched_map map;
222
223 /* options for timehist command */
224 bool summary;
225 bool summary_only;
226 bool idle_hist;
227 bool show_callchain;
228 unsigned int max_stack;
229 bool show_cpu_visual;
230 bool show_wakeups;
231 bool show_next;
232 bool show_migrations;
233 bool pre_migrations;
234 bool show_state;
235 bool show_prio;
236 u64 skipped_samples;
237 const char *time_str;
238 struct perf_time_interval ptime;
239 struct perf_time_interval hist_time;
240 volatile bool thread_funcs_exit;
241 const char *prio_str;
242 DECLARE_BITMAP(prio_bitmap, MAX_PRIO);
243
244 struct perf_session *session;
245 struct perf_data *data;
246 };
247
248 /* per thread run time data */
249 struct thread_runtime {
250 u64 last_time; /* time of previous sched in/out event */
251 u64 dt_run; /* run time */
252 u64 dt_sleep; /* time between CPU access by sleep (off cpu) */
253 u64 dt_iowait; /* time between CPU access by iowait (off cpu) */
254 u64 dt_preempt; /* time between CPU access by preempt (off cpu) */
255 u64 dt_delay; /* time between wakeup and sched-in */
256 u64 dt_pre_mig; /* time between migration and wakeup */
257 u64 ready_to_run; /* time of wakeup */
258 u64 migrated; /* time when a thread is migrated */
259
260 struct stats run_stats;
261 u64 total_run_time;
262 u64 total_sleep_time;
263 u64 total_iowait_time;
264 u64 total_preempt_time;
265 u64 total_delay_time;
266 u64 total_pre_mig_time;
267
268 char last_state;
269
270 char shortname[3];
271 bool comm_changed;
272
273 u64 migrations;
274
275 int prio;
276 };
277
278 /* per event run time data */
279 struct evsel_runtime {
280 u64 *last_time; /* time this event was last seen per cpu */
281 u32 ncpu; /* highest cpu slot allocated */
282 };
283
284 /* per cpu idle time data */
285 struct idle_thread_runtime {
286 struct thread_runtime tr;
287 struct thread *last_thread;
288 struct rb_root_cached sorted_root;
289 struct callchain_root callchain;
290 struct callchain_cursor cursor;
291 };
292
293 /* track idle times per cpu */
294 static struct thread **idle_threads;
295 static int idle_max_cpu;
296 static char idle_comm[] = "<idle>";
297
get_nsecs(void)298 static u64 get_nsecs(void)
299 {
300 struct timespec ts;
301
302 clock_gettime(CLOCK_MONOTONIC, &ts);
303
304 return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
305 }
306
burn_nsecs(struct perf_sched * sched,u64 nsecs)307 static void burn_nsecs(struct perf_sched *sched, u64 nsecs)
308 {
309 u64 T0 = get_nsecs(), T1;
310
311 do {
312 T1 = get_nsecs();
313 } while (T1 + sched->run_measurement_overhead < T0 + nsecs);
314 }
315
sleep_nsecs(u64 nsecs)316 static void sleep_nsecs(u64 nsecs)
317 {
318 struct timespec ts;
319
320 ts.tv_nsec = nsecs % 999999999;
321 ts.tv_sec = nsecs / 999999999;
322
323 nanosleep(&ts, NULL);
324 }
325
calibrate_run_measurement_overhead(struct perf_sched * sched)326 static void calibrate_run_measurement_overhead(struct perf_sched *sched)
327 {
328 u64 T0, T1, delta, min_delta = NSEC_PER_SEC;
329 int i;
330
331 for (i = 0; i < 10; i++) {
332 T0 = get_nsecs();
333 burn_nsecs(sched, 0);
334 T1 = get_nsecs();
335 delta = T1-T0;
336 min_delta = min(min_delta, delta);
337 }
338 sched->run_measurement_overhead = min_delta;
339
340 printf("run measurement overhead: %" PRIu64 " nsecs\n", min_delta);
341 }
342
calibrate_sleep_measurement_overhead(struct perf_sched * sched)343 static void calibrate_sleep_measurement_overhead(struct perf_sched *sched)
344 {
345 u64 T0, T1, delta, min_delta = NSEC_PER_SEC;
346 int i;
347
348 for (i = 0; i < 10; i++) {
349 T0 = get_nsecs();
350 sleep_nsecs(10000);
351 T1 = get_nsecs();
352 delta = T1-T0;
353 min_delta = min(min_delta, delta);
354 }
355 min_delta -= 10000;
356 sched->sleep_measurement_overhead = min_delta;
357
358 printf("sleep measurement overhead: %" PRIu64 " nsecs\n", min_delta);
359 }
360
361 static struct sched_atom *
get_new_event(struct task_desc * task,u64 timestamp)362 get_new_event(struct task_desc *task, u64 timestamp)
363 {
364 struct sched_atom *event = zalloc(sizeof(*event));
365 unsigned long idx = task->nr_events;
366 size_t size;
367
368 event->timestamp = timestamp;
369 event->nr = idx;
370
371 task->nr_events++;
372 size = sizeof(struct sched_atom *) * task->nr_events;
373 task->atoms = realloc(task->atoms, size);
374 BUG_ON(!task->atoms);
375
376 task->atoms[idx] = event;
377
378 return event;
379 }
380
last_event(struct task_desc * task)381 static struct sched_atom *last_event(struct task_desc *task)
382 {
383 if (!task->nr_events)
384 return NULL;
385
386 return task->atoms[task->nr_events - 1];
387 }
388
add_sched_event_run(struct perf_sched * sched,struct task_desc * task,u64 timestamp,u64 duration)389 static void add_sched_event_run(struct perf_sched *sched, struct task_desc *task,
390 u64 timestamp, u64 duration)
391 {
392 struct sched_atom *event, *curr_event = last_event(task);
393
394 /*
395 * optimize an existing RUN event by merging this one
396 * to it:
397 */
398 if (curr_event && curr_event->type == SCHED_EVENT_RUN) {
399 sched->nr_run_events_optimized++;
400 curr_event->duration += duration;
401 return;
402 }
403
404 event = get_new_event(task, timestamp);
405
406 event->type = SCHED_EVENT_RUN;
407 event->duration = duration;
408
409 sched->nr_run_events++;
410 }
411
add_sched_event_wakeup(struct perf_sched * sched,struct task_desc * task,u64 timestamp,struct task_desc * wakee)412 static void add_sched_event_wakeup(struct perf_sched *sched, struct task_desc *task,
413 u64 timestamp, struct task_desc *wakee)
414 {
415 struct sched_atom *event, *wakee_event;
416
417 event = get_new_event(task, timestamp);
418 event->type = SCHED_EVENT_WAKEUP;
419 event->wakee = wakee;
420
421 wakee_event = last_event(wakee);
422 if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) {
423 sched->targetless_wakeups++;
424 return;
425 }
426 if (wakee_event->wait_sem) {
427 sched->multitarget_wakeups++;
428 return;
429 }
430
431 wakee_event->wait_sem = zalloc(sizeof(*wakee_event->wait_sem));
432 sem_init(wakee_event->wait_sem, 0, 0);
433 event->wait_sem = wakee_event->wait_sem;
434
435 sched->nr_wakeup_events++;
436 }
437
add_sched_event_sleep(struct perf_sched * sched,struct task_desc * task,u64 timestamp)438 static void add_sched_event_sleep(struct perf_sched *sched, struct task_desc *task,
439 u64 timestamp)
440 {
441 struct sched_atom *event = get_new_event(task, timestamp);
442
443 event->type = SCHED_EVENT_SLEEP;
444
445 sched->nr_sleep_events++;
446 }
447
register_pid(struct perf_sched * sched,unsigned long pid,const char * comm)448 static struct task_desc *register_pid(struct perf_sched *sched,
449 unsigned long pid, const char *comm)
450 {
451 struct task_desc *task;
452 static int pid_max;
453
454 if (sched->pid_to_task == NULL) {
455 if (sysctl__read_int("kernel/pid_max", &pid_max) < 0)
456 pid_max = MAX_PID;
457 BUG_ON((sched->pid_to_task = calloc(pid_max, sizeof(struct task_desc *))) == NULL);
458 }
459 if (pid >= (unsigned long)pid_max) {
460 BUG_ON((sched->pid_to_task = realloc(sched->pid_to_task, (pid + 1) *
461 sizeof(struct task_desc *))) == NULL);
462 while (pid >= (unsigned long)pid_max)
463 sched->pid_to_task[pid_max++] = NULL;
464 }
465
466 task = sched->pid_to_task[pid];
467
468 if (task)
469 return task;
470
471 task = zalloc(sizeof(*task));
472 task->pid = pid;
473 task->nr = sched->nr_tasks;
474 strcpy(task->comm, comm);
475 /*
476 * every task starts in sleeping state - this gets ignored
477 * if there's no wakeup pointing to this sleep state:
478 */
479 add_sched_event_sleep(sched, task, 0);
480
481 sched->pid_to_task[pid] = task;
482 sched->nr_tasks++;
483 sched->tasks = realloc(sched->tasks, sched->nr_tasks * sizeof(struct task_desc *));
484 BUG_ON(!sched->tasks);
485 sched->tasks[task->nr] = task;
486
487 if (verbose > 0)
488 printf("registered task #%ld, PID %ld (%s)\n", sched->nr_tasks, pid, comm);
489
490 return task;
491 }
492
493
print_task_traces(struct perf_sched * sched)494 static void print_task_traces(struct perf_sched *sched)
495 {
496 struct task_desc *task;
497 unsigned long i;
498
499 for (i = 0; i < sched->nr_tasks; i++) {
500 task = sched->tasks[i];
501 printf("task %6ld (%20s:%10ld), nr_events: %ld\n",
502 task->nr, task->comm, task->pid, task->nr_events);
503 }
504 }
505
add_cross_task_wakeups(struct perf_sched * sched)506 static void add_cross_task_wakeups(struct perf_sched *sched)
507 {
508 struct task_desc *task1, *task2;
509 unsigned long i, j;
510
511 for (i = 0; i < sched->nr_tasks; i++) {
512 task1 = sched->tasks[i];
513 j = i + 1;
514 if (j == sched->nr_tasks)
515 j = 0;
516 task2 = sched->tasks[j];
517 add_sched_event_wakeup(sched, task1, 0, task2);
518 }
519 }
520
perf_sched__process_event(struct perf_sched * sched,struct sched_atom * atom)521 static void perf_sched__process_event(struct perf_sched *sched,
522 struct sched_atom *atom)
523 {
524 int ret = 0;
525
526 switch (atom->type) {
527 case SCHED_EVENT_RUN:
528 burn_nsecs(sched, atom->duration);
529 break;
530 case SCHED_EVENT_SLEEP:
531 if (atom->wait_sem)
532 ret = sem_wait(atom->wait_sem);
533 BUG_ON(ret);
534 break;
535 case SCHED_EVENT_WAKEUP:
536 if (atom->wait_sem)
537 ret = sem_post(atom->wait_sem);
538 BUG_ON(ret);
539 break;
540 default:
541 BUG_ON(1);
542 }
543 }
544
get_cpu_usage_nsec_parent(void)545 static u64 get_cpu_usage_nsec_parent(void)
546 {
547 struct rusage ru;
548 u64 sum;
549 int err;
550
551 err = getrusage(RUSAGE_SELF, &ru);
552 BUG_ON(err);
553
554 sum = ru.ru_utime.tv_sec * NSEC_PER_SEC + ru.ru_utime.tv_usec * NSEC_PER_USEC;
555 sum += ru.ru_stime.tv_sec * NSEC_PER_SEC + ru.ru_stime.tv_usec * NSEC_PER_USEC;
556
557 return sum;
558 }
559
self_open_counters(struct perf_sched * sched,unsigned long cur_task)560 static int self_open_counters(struct perf_sched *sched, unsigned long cur_task)
561 {
562 struct perf_event_attr attr;
563 char sbuf[STRERR_BUFSIZE], info[STRERR_BUFSIZE];
564 int fd;
565 struct rlimit limit;
566 bool need_privilege = false;
567
568 memset(&attr, 0, sizeof(attr));
569
570 attr.type = PERF_TYPE_SOFTWARE;
571 attr.config = PERF_COUNT_SW_TASK_CLOCK;
572
573 force_again:
574 fd = sys_perf_event_open(&attr, 0, -1, -1,
575 perf_event_open_cloexec_flag());
576
577 if (fd < 0) {
578 if (errno == EMFILE) {
579 if (sched->force) {
580 BUG_ON(getrlimit(RLIMIT_NOFILE, &limit) == -1);
581 limit.rlim_cur += sched->nr_tasks - cur_task;
582 if (limit.rlim_cur > limit.rlim_max) {
583 limit.rlim_max = limit.rlim_cur;
584 need_privilege = true;
585 }
586 if (setrlimit(RLIMIT_NOFILE, &limit) == -1) {
587 if (need_privilege && errno == EPERM)
588 strcpy(info, "Need privilege\n");
589 } else
590 goto force_again;
591 } else
592 strcpy(info, "Have a try with -f option\n");
593 }
594 pr_err("Error: sys_perf_event_open() syscall returned "
595 "with %d (%s)\n%s", fd,
596 str_error_r(errno, sbuf, sizeof(sbuf)), info);
597 exit(EXIT_FAILURE);
598 }
599 return fd;
600 }
601
get_cpu_usage_nsec_self(int fd)602 static u64 get_cpu_usage_nsec_self(int fd)
603 {
604 u64 runtime;
605 int ret;
606
607 ret = read(fd, &runtime, sizeof(runtime));
608 BUG_ON(ret != sizeof(runtime));
609
610 return runtime;
611 }
612
613 struct sched_thread_parms {
614 struct task_desc *task;
615 struct perf_sched *sched;
616 int fd;
617 };
618
thread_func(void * ctx)619 static void *thread_func(void *ctx)
620 {
621 struct sched_thread_parms *parms = ctx;
622 struct task_desc *this_task = parms->task;
623 struct perf_sched *sched = parms->sched;
624 u64 cpu_usage_0, cpu_usage_1;
625 unsigned long i, ret;
626 char comm2[22];
627 int fd = parms->fd;
628
629 zfree(&parms);
630
631 sprintf(comm2, ":%s", this_task->comm);
632 prctl(PR_SET_NAME, comm2);
633 if (fd < 0)
634 return NULL;
635
636 while (!sched->thread_funcs_exit) {
637 ret = sem_post(&this_task->ready_for_work);
638 BUG_ON(ret);
639 mutex_lock(&sched->start_work_mutex);
640 mutex_unlock(&sched->start_work_mutex);
641
642 cpu_usage_0 = get_cpu_usage_nsec_self(fd);
643
644 for (i = 0; i < this_task->nr_events; i++) {
645 this_task->curr_event = i;
646 perf_sched__process_event(sched, this_task->atoms[i]);
647 }
648
649 cpu_usage_1 = get_cpu_usage_nsec_self(fd);
650 this_task->cpu_usage = cpu_usage_1 - cpu_usage_0;
651 ret = sem_post(&this_task->work_done_sem);
652 BUG_ON(ret);
653
654 mutex_lock(&sched->work_done_wait_mutex);
655 mutex_unlock(&sched->work_done_wait_mutex);
656 }
657 return NULL;
658 }
659
create_tasks(struct perf_sched * sched)660 static void create_tasks(struct perf_sched *sched)
661 EXCLUSIVE_LOCK_FUNCTION(sched->start_work_mutex)
662 EXCLUSIVE_LOCK_FUNCTION(sched->work_done_wait_mutex)
663 {
664 struct task_desc *task;
665 pthread_attr_t attr;
666 unsigned long i;
667 int err;
668
669 err = pthread_attr_init(&attr);
670 BUG_ON(err);
671 err = pthread_attr_setstacksize(&attr,
672 (size_t) max(16 * 1024, (int)PTHREAD_STACK_MIN));
673 BUG_ON(err);
674 mutex_lock(&sched->start_work_mutex);
675 mutex_lock(&sched->work_done_wait_mutex);
676 for (i = 0; i < sched->nr_tasks; i++) {
677 struct sched_thread_parms *parms = malloc(sizeof(*parms));
678 BUG_ON(parms == NULL);
679 parms->task = task = sched->tasks[i];
680 parms->sched = sched;
681 parms->fd = self_open_counters(sched, i);
682 sem_init(&task->ready_for_work, 0, 0);
683 sem_init(&task->work_done_sem, 0, 0);
684 task->curr_event = 0;
685 err = pthread_create(&task->thread, &attr, thread_func, parms);
686 BUG_ON(err);
687 }
688 }
689
destroy_tasks(struct perf_sched * sched)690 static void destroy_tasks(struct perf_sched *sched)
691 UNLOCK_FUNCTION(sched->start_work_mutex)
692 UNLOCK_FUNCTION(sched->work_done_wait_mutex)
693 {
694 struct task_desc *task;
695 unsigned long i;
696 int err;
697
698 mutex_unlock(&sched->start_work_mutex);
699 mutex_unlock(&sched->work_done_wait_mutex);
700 /* Get rid of threads so they won't be upset by mutex destrunction */
701 for (i = 0; i < sched->nr_tasks; i++) {
702 task = sched->tasks[i];
703 err = pthread_join(task->thread, NULL);
704 BUG_ON(err);
705 sem_destroy(&task->ready_for_work);
706 sem_destroy(&task->work_done_sem);
707 }
708 }
709
wait_for_tasks(struct perf_sched * sched)710 static void wait_for_tasks(struct perf_sched *sched)
711 EXCLUSIVE_LOCKS_REQUIRED(sched->work_done_wait_mutex)
712 EXCLUSIVE_LOCKS_REQUIRED(sched->start_work_mutex)
713 {
714 u64 cpu_usage_0, cpu_usage_1;
715 struct task_desc *task;
716 unsigned long i, ret;
717
718 sched->start_time = get_nsecs();
719 sched->cpu_usage = 0;
720 mutex_unlock(&sched->work_done_wait_mutex);
721
722 for (i = 0; i < sched->nr_tasks; i++) {
723 task = sched->tasks[i];
724 ret = sem_wait(&task->ready_for_work);
725 BUG_ON(ret);
726 sem_init(&task->ready_for_work, 0, 0);
727 }
728 mutex_lock(&sched->work_done_wait_mutex);
729
730 cpu_usage_0 = get_cpu_usage_nsec_parent();
731
732 mutex_unlock(&sched->start_work_mutex);
733
734 for (i = 0; i < sched->nr_tasks; i++) {
735 task = sched->tasks[i];
736 ret = sem_wait(&task->work_done_sem);
737 BUG_ON(ret);
738 sem_init(&task->work_done_sem, 0, 0);
739 sched->cpu_usage += task->cpu_usage;
740 task->cpu_usage = 0;
741 }
742
743 cpu_usage_1 = get_cpu_usage_nsec_parent();
744 if (!sched->runavg_cpu_usage)
745 sched->runavg_cpu_usage = sched->cpu_usage;
746 sched->runavg_cpu_usage = (sched->runavg_cpu_usage * (sched->replay_repeat - 1) + sched->cpu_usage) / sched->replay_repeat;
747
748 sched->parent_cpu_usage = cpu_usage_1 - cpu_usage_0;
749 if (!sched->runavg_parent_cpu_usage)
750 sched->runavg_parent_cpu_usage = sched->parent_cpu_usage;
751 sched->runavg_parent_cpu_usage = (sched->runavg_parent_cpu_usage * (sched->replay_repeat - 1) +
752 sched->parent_cpu_usage)/sched->replay_repeat;
753
754 mutex_lock(&sched->start_work_mutex);
755
756 for (i = 0; i < sched->nr_tasks; i++) {
757 task = sched->tasks[i];
758 task->curr_event = 0;
759 }
760 }
761
run_one_test(struct perf_sched * sched)762 static void run_one_test(struct perf_sched *sched)
763 EXCLUSIVE_LOCKS_REQUIRED(sched->work_done_wait_mutex)
764 EXCLUSIVE_LOCKS_REQUIRED(sched->start_work_mutex)
765 {
766 u64 T0, T1, delta, avg_delta, fluct;
767
768 T0 = get_nsecs();
769 wait_for_tasks(sched);
770 T1 = get_nsecs();
771
772 delta = T1 - T0;
773 sched->sum_runtime += delta;
774 sched->nr_runs++;
775
776 avg_delta = sched->sum_runtime / sched->nr_runs;
777 if (delta < avg_delta)
778 fluct = avg_delta - delta;
779 else
780 fluct = delta - avg_delta;
781 sched->sum_fluct += fluct;
782 if (!sched->run_avg)
783 sched->run_avg = delta;
784 sched->run_avg = (sched->run_avg * (sched->replay_repeat - 1) + delta) / sched->replay_repeat;
785
786 printf("#%-3ld: %0.3f, ", sched->nr_runs, (double)delta / NSEC_PER_MSEC);
787
788 printf("ravg: %0.2f, ", (double)sched->run_avg / NSEC_PER_MSEC);
789
790 printf("cpu: %0.2f / %0.2f",
791 (double)sched->cpu_usage / NSEC_PER_MSEC, (double)sched->runavg_cpu_usage / NSEC_PER_MSEC);
792
793 #if 0
794 /*
795 * rusage statistics done by the parent, these are less
796 * accurate than the sched->sum_exec_runtime based statistics:
797 */
798 printf(" [%0.2f / %0.2f]",
799 (double)sched->parent_cpu_usage / NSEC_PER_MSEC,
800 (double)sched->runavg_parent_cpu_usage / NSEC_PER_MSEC);
801 #endif
802
803 printf("\n");
804
805 if (sched->nr_sleep_corrections)
806 printf(" (%ld sleep corrections)\n", sched->nr_sleep_corrections);
807 sched->nr_sleep_corrections = 0;
808 }
809
test_calibrations(struct perf_sched * sched)810 static void test_calibrations(struct perf_sched *sched)
811 {
812 u64 T0, T1;
813
814 T0 = get_nsecs();
815 burn_nsecs(sched, NSEC_PER_MSEC);
816 T1 = get_nsecs();
817
818 printf("the run test took %" PRIu64 " nsecs\n", T1 - T0);
819
820 T0 = get_nsecs();
821 sleep_nsecs(NSEC_PER_MSEC);
822 T1 = get_nsecs();
823
824 printf("the sleep test took %" PRIu64 " nsecs\n", T1 - T0);
825 }
826
827 static int
replay_wakeup_event(struct perf_sched * sched,struct evsel * evsel,struct perf_sample * sample,struct machine * machine __maybe_unused)828 replay_wakeup_event(struct perf_sched *sched,
829 struct evsel *evsel, struct perf_sample *sample,
830 struct machine *machine __maybe_unused)
831 {
832 const char *comm = evsel__strval(evsel, sample, "comm");
833 const u32 pid = evsel__intval(evsel, sample, "pid");
834 struct task_desc *waker, *wakee;
835
836 if (verbose > 0) {
837 printf("sched_wakeup event %p\n", evsel);
838
839 printf(" ... pid %d woke up %s/%d\n", sample->tid, comm, pid);
840 }
841
842 waker = register_pid(sched, sample->tid, "<unknown>");
843 wakee = register_pid(sched, pid, comm);
844
845 add_sched_event_wakeup(sched, waker, sample->time, wakee);
846 return 0;
847 }
848
replay_switch_event(struct perf_sched * sched,struct evsel * evsel,struct perf_sample * sample,struct machine * machine __maybe_unused)849 static int replay_switch_event(struct perf_sched *sched,
850 struct evsel *evsel,
851 struct perf_sample *sample,
852 struct machine *machine __maybe_unused)
853 {
854 const char *prev_comm = evsel__strval(evsel, sample, "prev_comm"),
855 *next_comm = evsel__strval(evsel, sample, "next_comm");
856 const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"),
857 next_pid = evsel__intval(evsel, sample, "next_pid");
858 struct task_desc *prev, __maybe_unused *next;
859 u64 timestamp0, timestamp = sample->time;
860 int cpu = sample->cpu;
861 s64 delta;
862
863 if (verbose > 0)
864 printf("sched_switch event %p\n", evsel);
865
866 if (cpu >= MAX_CPUS || cpu < 0)
867 return 0;
868
869 timestamp0 = sched->cpu_last_switched[cpu];
870 if (timestamp0)
871 delta = timestamp - timestamp0;
872 else
873 delta = 0;
874
875 if (delta < 0) {
876 pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta);
877 return -1;
878 }
879
880 pr_debug(" ... switch from %s/%d to %s/%d [ran %" PRIu64 " nsecs]\n",
881 prev_comm, prev_pid, next_comm, next_pid, delta);
882
883 prev = register_pid(sched, prev_pid, prev_comm);
884 next = register_pid(sched, next_pid, next_comm);
885
886 sched->cpu_last_switched[cpu] = timestamp;
887
888 add_sched_event_run(sched, prev, timestamp, delta);
889 add_sched_event_sleep(sched, prev, timestamp);
890
891 return 0;
892 }
893
replay_fork_event(struct perf_sched * sched,union perf_event * event,struct machine * machine)894 static int replay_fork_event(struct perf_sched *sched,
895 union perf_event *event,
896 struct machine *machine)
897 {
898 struct thread *child, *parent;
899
900 child = machine__findnew_thread(machine, event->fork.pid,
901 event->fork.tid);
902 parent = machine__findnew_thread(machine, event->fork.ppid,
903 event->fork.ptid);
904
905 if (child == NULL || parent == NULL) {
906 pr_debug("thread does not exist on fork event: child %p, parent %p\n",
907 child, parent);
908 goto out_put;
909 }
910
911 if (verbose > 0) {
912 printf("fork event\n");
913 printf("... parent: %s/%d\n", thread__comm_str(parent), thread__tid(parent));
914 printf("... child: %s/%d\n", thread__comm_str(child), thread__tid(child));
915 }
916
917 register_pid(sched, thread__tid(parent), thread__comm_str(parent));
918 register_pid(sched, thread__tid(child), thread__comm_str(child));
919 out_put:
920 thread__put(child);
921 thread__put(parent);
922 return 0;
923 }
924
925 struct sort_dimension {
926 const char *name;
927 sort_fn_t cmp;
928 struct list_head list;
929 };
930
init_prio(struct thread_runtime * r)931 static inline void init_prio(struct thread_runtime *r)
932 {
933 r->prio = -1;
934 }
935
936 /*
937 * handle runtime stats saved per thread
938 */
thread__init_runtime(struct thread * thread)939 static struct thread_runtime *thread__init_runtime(struct thread *thread)
940 {
941 struct thread_runtime *r;
942
943 r = zalloc(sizeof(struct thread_runtime));
944 if (!r)
945 return NULL;
946
947 init_stats(&r->run_stats);
948 init_prio(r);
949 thread__set_priv(thread, r);
950
951 return r;
952 }
953
thread__get_runtime(struct thread * thread)954 static struct thread_runtime *thread__get_runtime(struct thread *thread)
955 {
956 struct thread_runtime *tr;
957
958 tr = thread__priv(thread);
959 if (tr == NULL) {
960 tr = thread__init_runtime(thread);
961 if (tr == NULL)
962 pr_debug("Failed to malloc memory for runtime data.\n");
963 }
964
965 return tr;
966 }
967
968 static int
thread_lat_cmp(struct list_head * list,struct work_atoms * l,struct work_atoms * r)969 thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r)
970 {
971 struct sort_dimension *sort;
972 int ret = 0;
973
974 BUG_ON(list_empty(list));
975
976 list_for_each_entry(sort, list, list) {
977 ret = sort->cmp(l, r);
978 if (ret)
979 return ret;
980 }
981
982 return ret;
983 }
984
985 static struct work_atoms *
thread_atoms_search(struct rb_root_cached * root,struct thread * thread,struct list_head * sort_list)986 thread_atoms_search(struct rb_root_cached *root, struct thread *thread,
987 struct list_head *sort_list)
988 {
989 struct rb_node *node = root->rb_root.rb_node;
990 struct work_atoms key = { .thread = thread };
991
992 while (node) {
993 struct work_atoms *atoms;
994 int cmp;
995
996 atoms = container_of(node, struct work_atoms, node);
997
998 cmp = thread_lat_cmp(sort_list, &key, atoms);
999 if (cmp > 0)
1000 node = node->rb_left;
1001 else if (cmp < 0)
1002 node = node->rb_right;
1003 else {
1004 BUG_ON(!RC_CHK_EQUAL(thread, atoms->thread));
1005 return atoms;
1006 }
1007 }
1008 return NULL;
1009 }
1010
1011 static void
__thread_latency_insert(struct rb_root_cached * root,struct work_atoms * data,struct list_head * sort_list)1012 __thread_latency_insert(struct rb_root_cached *root, struct work_atoms *data,
1013 struct list_head *sort_list)
1014 {
1015 struct rb_node **new = &(root->rb_root.rb_node), *parent = NULL;
1016 bool leftmost = true;
1017
1018 while (*new) {
1019 struct work_atoms *this;
1020 int cmp;
1021
1022 this = container_of(*new, struct work_atoms, node);
1023 parent = *new;
1024
1025 cmp = thread_lat_cmp(sort_list, data, this);
1026
1027 if (cmp > 0)
1028 new = &((*new)->rb_left);
1029 else {
1030 new = &((*new)->rb_right);
1031 leftmost = false;
1032 }
1033 }
1034
1035 rb_link_node(&data->node, parent, new);
1036 rb_insert_color_cached(&data->node, root, leftmost);
1037 }
1038
thread_atoms_insert(struct perf_sched * sched,struct thread * thread)1039 static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread)
1040 {
1041 struct work_atoms *atoms = zalloc(sizeof(*atoms));
1042 if (!atoms) {
1043 pr_err("No memory at %s\n", __func__);
1044 return -1;
1045 }
1046
1047 atoms->thread = thread__get(thread);
1048 INIT_LIST_HEAD(&atoms->work_list);
1049 __thread_latency_insert(&sched->atom_root, atoms, &sched->cmp_pid);
1050 return 0;
1051 }
1052
1053 static int
add_sched_out_event(struct work_atoms * atoms,char run_state,u64 timestamp)1054 add_sched_out_event(struct work_atoms *atoms,
1055 char run_state,
1056 u64 timestamp)
1057 {
1058 struct work_atom *atom = zalloc(sizeof(*atom));
1059 if (!atom) {
1060 pr_err("Non memory at %s", __func__);
1061 return -1;
1062 }
1063
1064 atom->sched_out_time = timestamp;
1065
1066 if (run_state == 'R') {
1067 atom->state = THREAD_WAIT_CPU;
1068 atom->wake_up_time = atom->sched_out_time;
1069 }
1070
1071 list_add_tail(&atom->list, &atoms->work_list);
1072 return 0;
1073 }
1074
1075 static void
add_runtime_event(struct work_atoms * atoms,u64 delta,u64 timestamp __maybe_unused)1076 add_runtime_event(struct work_atoms *atoms, u64 delta,
1077 u64 timestamp __maybe_unused)
1078 {
1079 struct work_atom *atom;
1080
1081 BUG_ON(list_empty(&atoms->work_list));
1082
1083 atom = list_entry(atoms->work_list.prev, struct work_atom, list);
1084
1085 atom->runtime += delta;
1086 atoms->total_runtime += delta;
1087 }
1088
1089 static void
add_sched_in_event(struct work_atoms * atoms,u64 timestamp)1090 add_sched_in_event(struct work_atoms *atoms, u64 timestamp)
1091 {
1092 struct work_atom *atom;
1093 u64 delta;
1094
1095 if (list_empty(&atoms->work_list))
1096 return;
1097
1098 atom = list_entry(atoms->work_list.prev, struct work_atom, list);
1099
1100 if (atom->state != THREAD_WAIT_CPU)
1101 return;
1102
1103 if (timestamp < atom->wake_up_time) {
1104 atom->state = THREAD_IGNORE;
1105 return;
1106 }
1107
1108 atom->state = THREAD_SCHED_IN;
1109 atom->sched_in_time = timestamp;
1110
1111 delta = atom->sched_in_time - atom->wake_up_time;
1112 atoms->total_lat += delta;
1113 if (delta > atoms->max_lat) {
1114 atoms->max_lat = delta;
1115 atoms->max_lat_start = atom->wake_up_time;
1116 atoms->max_lat_end = timestamp;
1117 }
1118 atoms->nb_atoms++;
1119 }
1120
free_work_atoms(struct work_atoms * atoms)1121 static void free_work_atoms(struct work_atoms *atoms)
1122 {
1123 struct work_atom *atom, *tmp;
1124
1125 if (atoms == NULL)
1126 return;
1127
1128 list_for_each_entry_safe(atom, tmp, &atoms->work_list, list) {
1129 list_del(&atom->list);
1130 free(atom);
1131 }
1132 thread__zput(atoms->thread);
1133 free(atoms);
1134 }
1135
latency_switch_event(struct perf_sched * sched,struct evsel * evsel,struct perf_sample * sample,struct machine * machine)1136 static int latency_switch_event(struct perf_sched *sched,
1137 struct evsel *evsel,
1138 struct perf_sample *sample,
1139 struct machine *machine)
1140 {
1141 const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"),
1142 next_pid = evsel__intval(evsel, sample, "next_pid");
1143 const char prev_state = evsel__taskstate(evsel, sample, "prev_state");
1144 struct work_atoms *out_events, *in_events;
1145 struct thread *sched_out, *sched_in;
1146 u64 timestamp0, timestamp = sample->time;
1147 int cpu = sample->cpu, err = -1;
1148 s64 delta;
1149
1150 BUG_ON(cpu >= MAX_CPUS || cpu < 0);
1151
1152 timestamp0 = sched->cpu_last_switched[cpu];
1153 sched->cpu_last_switched[cpu] = timestamp;
1154 if (timestamp0)
1155 delta = timestamp - timestamp0;
1156 else
1157 delta = 0;
1158
1159 if (delta < 0) {
1160 pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta);
1161 return -1;
1162 }
1163
1164 sched_out = machine__findnew_thread(machine, -1, prev_pid);
1165 sched_in = machine__findnew_thread(machine, -1, next_pid);
1166 if (sched_out == NULL || sched_in == NULL)
1167 goto out_put;
1168
1169 out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid);
1170 if (!out_events) {
1171 if (thread_atoms_insert(sched, sched_out))
1172 goto out_put;
1173 out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid);
1174 if (!out_events) {
1175 pr_err("out-event: Internal tree error");
1176 goto out_put;
1177 }
1178 }
1179 if (add_sched_out_event(out_events, prev_state, timestamp))
1180 return -1;
1181
1182 in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid);
1183 if (!in_events) {
1184 if (thread_atoms_insert(sched, sched_in))
1185 goto out_put;
1186 in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid);
1187 if (!in_events) {
1188 pr_err("in-event: Internal tree error");
1189 goto out_put;
1190 }
1191 /*
1192 * Take came in we have not heard about yet,
1193 * add in an initial atom in runnable state:
1194 */
1195 if (add_sched_out_event(in_events, 'R', timestamp))
1196 goto out_put;
1197 }
1198 add_sched_in_event(in_events, timestamp);
1199 err = 0;
1200 out_put:
1201 thread__put(sched_out);
1202 thread__put(sched_in);
1203 return err;
1204 }
1205
latency_runtime_event(struct perf_sched * sched,struct evsel * evsel,struct perf_sample * sample,struct machine * machine)1206 static int latency_runtime_event(struct perf_sched *sched,
1207 struct evsel *evsel,
1208 struct perf_sample *sample,
1209 struct machine *machine)
1210 {
1211 const u32 pid = evsel__intval(evsel, sample, "pid");
1212 const u64 runtime = evsel__intval(evsel, sample, "runtime");
1213 struct thread *thread = machine__findnew_thread(machine, -1, pid);
1214 struct work_atoms *atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid);
1215 u64 timestamp = sample->time;
1216 int cpu = sample->cpu, err = -1;
1217
1218 if (thread == NULL)
1219 return -1;
1220
1221 BUG_ON(cpu >= MAX_CPUS || cpu < 0);
1222 if (!atoms) {
1223 if (thread_atoms_insert(sched, thread))
1224 goto out_put;
1225 atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid);
1226 if (!atoms) {
1227 pr_err("in-event: Internal tree error");
1228 goto out_put;
1229 }
1230 if (add_sched_out_event(atoms, 'R', timestamp))
1231 goto out_put;
1232 }
1233
1234 add_runtime_event(atoms, runtime, timestamp);
1235 err = 0;
1236 out_put:
1237 thread__put(thread);
1238 return err;
1239 }
1240
latency_wakeup_event(struct perf_sched * sched,struct evsel * evsel,struct perf_sample * sample,struct machine * machine)1241 static int latency_wakeup_event(struct perf_sched *sched,
1242 struct evsel *evsel,
1243 struct perf_sample *sample,
1244 struct machine *machine)
1245 {
1246 const u32 pid = evsel__intval(evsel, sample, "pid");
1247 struct work_atoms *atoms;
1248 struct work_atom *atom;
1249 struct thread *wakee;
1250 u64 timestamp = sample->time;
1251 int err = -1;
1252
1253 wakee = machine__findnew_thread(machine, -1, pid);
1254 if (wakee == NULL)
1255 return -1;
1256 atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid);
1257 if (!atoms) {
1258 if (thread_atoms_insert(sched, wakee))
1259 goto out_put;
1260 atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid);
1261 if (!atoms) {
1262 pr_err("wakeup-event: Internal tree error");
1263 goto out_put;
1264 }
1265 if (add_sched_out_event(atoms, 'S', timestamp))
1266 goto out_put;
1267 }
1268
1269 BUG_ON(list_empty(&atoms->work_list));
1270
1271 atom = list_entry(atoms->work_list.prev, struct work_atom, list);
1272
1273 /*
1274 * As we do not guarantee the wakeup event happens when
1275 * task is out of run queue, also may happen when task is
1276 * on run queue and wakeup only change ->state to TASK_RUNNING,
1277 * then we should not set the ->wake_up_time when wake up a
1278 * task which is on run queue.
1279 *
1280 * You WILL be missing events if you've recorded only
1281 * one CPU, or are only looking at only one, so don't
1282 * skip in this case.
1283 */
1284 if (sched->profile_cpu == -1 && atom->state != THREAD_SLEEPING)
1285 goto out_ok;
1286
1287 sched->nr_timestamps++;
1288 if (atom->sched_out_time > timestamp) {
1289 sched->nr_unordered_timestamps++;
1290 goto out_ok;
1291 }
1292
1293 atom->state = THREAD_WAIT_CPU;
1294 atom->wake_up_time = timestamp;
1295 out_ok:
1296 err = 0;
1297 out_put:
1298 thread__put(wakee);
1299 return err;
1300 }
1301
latency_migrate_task_event(struct perf_sched * sched,struct evsel * evsel,struct perf_sample * sample,struct machine * machine)1302 static int latency_migrate_task_event(struct perf_sched *sched,
1303 struct evsel *evsel,
1304 struct perf_sample *sample,
1305 struct machine *machine)
1306 {
1307 const u32 pid = evsel__intval(evsel, sample, "pid");
1308 u64 timestamp = sample->time;
1309 struct work_atoms *atoms;
1310 struct work_atom *atom;
1311 struct thread *migrant;
1312 int err = -1;
1313
1314 /*
1315 * Only need to worry about migration when profiling one CPU.
1316 */
1317 if (sched->profile_cpu == -1)
1318 return 0;
1319
1320 migrant = machine__findnew_thread(machine, -1, pid);
1321 if (migrant == NULL)
1322 return -1;
1323 atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid);
1324 if (!atoms) {
1325 if (thread_atoms_insert(sched, migrant))
1326 goto out_put;
1327 register_pid(sched, thread__tid(migrant), thread__comm_str(migrant));
1328 atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid);
1329 if (!atoms) {
1330 pr_err("migration-event: Internal tree error");
1331 goto out_put;
1332 }
1333 if (add_sched_out_event(atoms, 'R', timestamp))
1334 goto out_put;
1335 }
1336
1337 BUG_ON(list_empty(&atoms->work_list));
1338
1339 atom = list_entry(atoms->work_list.prev, struct work_atom, list);
1340 atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp;
1341
1342 sched->nr_timestamps++;
1343
1344 if (atom->sched_out_time > timestamp)
1345 sched->nr_unordered_timestamps++;
1346 err = 0;
1347 out_put:
1348 thread__put(migrant);
1349 return err;
1350 }
1351
output_lat_thread(struct perf_sched * sched,struct work_atoms * work_list)1352 static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_list)
1353 {
1354 int i;
1355 int ret;
1356 u64 avg;
1357 char max_lat_start[32], max_lat_end[32];
1358
1359 if (!work_list->nb_atoms)
1360 return;
1361 /*
1362 * Ignore idle threads:
1363 */
1364 if (!strcmp(thread__comm_str(work_list->thread), "swapper"))
1365 return;
1366
1367 sched->all_runtime += work_list->total_runtime;
1368 sched->all_count += work_list->nb_atoms;
1369
1370 if (work_list->num_merged > 1) {
1371 ret = printf(" %s:(%d) ", thread__comm_str(work_list->thread),
1372 work_list->num_merged);
1373 } else {
1374 ret = printf(" %s:%d ", thread__comm_str(work_list->thread),
1375 thread__tid(work_list->thread));
1376 }
1377
1378 for (i = 0; i < 24 - ret; i++)
1379 printf(" ");
1380
1381 avg = work_list->total_lat / work_list->nb_atoms;
1382 timestamp__scnprintf_usec(work_list->max_lat_start, max_lat_start, sizeof(max_lat_start));
1383 timestamp__scnprintf_usec(work_list->max_lat_end, max_lat_end, sizeof(max_lat_end));
1384
1385 printf("|%11.3f ms |%9" PRIu64 " | avg:%8.3f ms | max:%8.3f ms | max start: %12s s | max end: %12s s\n",
1386 (double)work_list->total_runtime / NSEC_PER_MSEC,
1387 work_list->nb_atoms, (double)avg / NSEC_PER_MSEC,
1388 (double)work_list->max_lat / NSEC_PER_MSEC,
1389 max_lat_start, max_lat_end);
1390 }
1391
pid_cmp(struct work_atoms * l,struct work_atoms * r)1392 static int pid_cmp(struct work_atoms *l, struct work_atoms *r)
1393 {
1394 pid_t l_tid, r_tid;
1395
1396 if (RC_CHK_EQUAL(l->thread, r->thread))
1397 return 0;
1398 l_tid = thread__tid(l->thread);
1399 r_tid = thread__tid(r->thread);
1400 if (l_tid < r_tid)
1401 return -1;
1402 if (l_tid > r_tid)
1403 return 1;
1404 return (int)(RC_CHK_ACCESS(l->thread) - RC_CHK_ACCESS(r->thread));
1405 }
1406
avg_cmp(struct work_atoms * l,struct work_atoms * r)1407 static int avg_cmp(struct work_atoms *l, struct work_atoms *r)
1408 {
1409 u64 avgl, avgr;
1410
1411 if (!l->nb_atoms)
1412 return -1;
1413
1414 if (!r->nb_atoms)
1415 return 1;
1416
1417 avgl = l->total_lat / l->nb_atoms;
1418 avgr = r->total_lat / r->nb_atoms;
1419
1420 if (avgl < avgr)
1421 return -1;
1422 if (avgl > avgr)
1423 return 1;
1424
1425 return 0;
1426 }
1427
max_cmp(struct work_atoms * l,struct work_atoms * r)1428 static int max_cmp(struct work_atoms *l, struct work_atoms *r)
1429 {
1430 if (l->max_lat < r->max_lat)
1431 return -1;
1432 if (l->max_lat > r->max_lat)
1433 return 1;
1434
1435 return 0;
1436 }
1437
switch_cmp(struct work_atoms * l,struct work_atoms * r)1438 static int switch_cmp(struct work_atoms *l, struct work_atoms *r)
1439 {
1440 if (l->nb_atoms < r->nb_atoms)
1441 return -1;
1442 if (l->nb_atoms > r->nb_atoms)
1443 return 1;
1444
1445 return 0;
1446 }
1447
runtime_cmp(struct work_atoms * l,struct work_atoms * r)1448 static int runtime_cmp(struct work_atoms *l, struct work_atoms *r)
1449 {
1450 if (l->total_runtime < r->total_runtime)
1451 return -1;
1452 if (l->total_runtime > r->total_runtime)
1453 return 1;
1454
1455 return 0;
1456 }
1457
sort_dimension__add(const char * tok,struct list_head * list)1458 static int sort_dimension__add(const char *tok, struct list_head *list)
1459 {
1460 size_t i;
1461 static struct sort_dimension avg_sort_dimension = {
1462 .name = "avg",
1463 .cmp = avg_cmp,
1464 };
1465 static struct sort_dimension max_sort_dimension = {
1466 .name = "max",
1467 .cmp = max_cmp,
1468 };
1469 static struct sort_dimension pid_sort_dimension = {
1470 .name = "pid",
1471 .cmp = pid_cmp,
1472 };
1473 static struct sort_dimension runtime_sort_dimension = {
1474 .name = "runtime",
1475 .cmp = runtime_cmp,
1476 };
1477 static struct sort_dimension switch_sort_dimension = {
1478 .name = "switch",
1479 .cmp = switch_cmp,
1480 };
1481 struct sort_dimension *available_sorts[] = {
1482 &pid_sort_dimension,
1483 &avg_sort_dimension,
1484 &max_sort_dimension,
1485 &switch_sort_dimension,
1486 &runtime_sort_dimension,
1487 };
1488
1489 for (i = 0; i < ARRAY_SIZE(available_sorts); i++) {
1490 if (!strcmp(available_sorts[i]->name, tok)) {
1491 list_add_tail(&available_sorts[i]->list, list);
1492
1493 return 0;
1494 }
1495 }
1496
1497 return -1;
1498 }
1499
perf_sched__sort_lat(struct perf_sched * sched)1500 static void perf_sched__sort_lat(struct perf_sched *sched)
1501 {
1502 struct rb_node *node;
1503 struct rb_root_cached *root = &sched->atom_root;
1504 again:
1505 for (;;) {
1506 struct work_atoms *data;
1507 node = rb_first_cached(root);
1508 if (!node)
1509 break;
1510
1511 rb_erase_cached(node, root);
1512 data = rb_entry(node, struct work_atoms, node);
1513 __thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list);
1514 }
1515 if (root == &sched->atom_root) {
1516 root = &sched->merged_atom_root;
1517 goto again;
1518 }
1519 }
1520
process_sched_wakeup_event(const struct perf_tool * tool,struct evsel * evsel,struct perf_sample * sample,struct machine * machine)1521 static int process_sched_wakeup_event(const struct perf_tool *tool,
1522 struct evsel *evsel,
1523 struct perf_sample *sample,
1524 struct machine *machine)
1525 {
1526 struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
1527
1528 if (sched->tp_handler->wakeup_event)
1529 return sched->tp_handler->wakeup_event(sched, evsel, sample, machine);
1530
1531 return 0;
1532 }
1533
process_sched_wakeup_ignore(const struct perf_tool * tool __maybe_unused,struct evsel * evsel __maybe_unused,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)1534 static int process_sched_wakeup_ignore(const struct perf_tool *tool __maybe_unused,
1535 struct evsel *evsel __maybe_unused,
1536 struct perf_sample *sample __maybe_unused,
1537 struct machine *machine __maybe_unused)
1538 {
1539 return 0;
1540 }
1541
thread__has_color(struct thread * thread)1542 static bool thread__has_color(struct thread *thread)
1543 {
1544 return thread__priv(thread) != NULL;
1545 }
1546
1547 static struct thread*
map__findnew_thread(struct perf_sched * sched,struct machine * machine,pid_t pid,pid_t tid)1548 map__findnew_thread(struct perf_sched *sched, struct machine *machine, pid_t pid, pid_t tid)
1549 {
1550 struct thread *thread = machine__findnew_thread(machine, pid, tid);
1551 bool color = false;
1552
1553 if (!sched->map.color_pids || !thread || thread__priv(thread))
1554 return thread;
1555
1556 if (thread_map__has(sched->map.color_pids, tid))
1557 color = true;
1558
1559 thread__set_priv(thread, color ? ((void*)1) : NULL);
1560 return thread;
1561 }
1562
sched_match_task(struct perf_sched * sched,const char * comm_str)1563 static bool sched_match_task(struct perf_sched *sched, const char *comm_str)
1564 {
1565 bool fuzzy_match = sched->map.fuzzy;
1566 struct strlist *task_names = sched->map.task_names;
1567 struct str_node *node;
1568
1569 strlist__for_each_entry(node, task_names) {
1570 bool match_found = fuzzy_match ? !!strstr(comm_str, node->s) :
1571 !strcmp(comm_str, node->s);
1572 if (match_found)
1573 return true;
1574 }
1575
1576 return false;
1577 }
1578
print_sched_map(struct perf_sched * sched,struct perf_cpu this_cpu,int cpus_nr,const char * color,bool sched_out)1579 static void print_sched_map(struct perf_sched *sched, struct perf_cpu this_cpu, int cpus_nr,
1580 const char *color, bool sched_out)
1581 {
1582 for (int i = 0; i < cpus_nr; i++) {
1583 struct perf_cpu cpu = {
1584 .cpu = sched->map.comp ? sched->map.comp_cpus[i].cpu : i,
1585 };
1586 struct thread *curr_thread = sched->curr_thread[cpu.cpu];
1587 struct thread *curr_out_thread = sched->curr_out_thread[cpu.cpu];
1588 struct thread_runtime *curr_tr;
1589 const char *pid_color = color;
1590 const char *cpu_color = color;
1591 char symbol = ' ';
1592 struct thread *thread_to_check = sched_out ? curr_out_thread : curr_thread;
1593
1594 if (thread_to_check && thread__has_color(thread_to_check))
1595 pid_color = COLOR_PIDS;
1596
1597 if (sched->map.color_cpus && perf_cpu_map__has(sched->map.color_cpus, cpu))
1598 cpu_color = COLOR_CPUS;
1599
1600 if (cpu.cpu == this_cpu.cpu)
1601 symbol = '*';
1602
1603 color_fprintf(stdout, cpu.cpu != this_cpu.cpu ? color : cpu_color, "%c", symbol);
1604
1605 thread_to_check = sched_out ? sched->curr_out_thread[cpu.cpu] :
1606 sched->curr_thread[cpu.cpu];
1607
1608 if (thread_to_check) {
1609 curr_tr = thread__get_runtime(thread_to_check);
1610 if (curr_tr == NULL)
1611 return;
1612
1613 if (sched_out) {
1614 if (cpu.cpu == this_cpu.cpu)
1615 color_fprintf(stdout, color, "- ");
1616 else {
1617 curr_tr = thread__get_runtime(sched->curr_thread[cpu.cpu]);
1618 if (curr_tr != NULL)
1619 color_fprintf(stdout, pid_color, "%2s ",
1620 curr_tr->shortname);
1621 }
1622 } else
1623 color_fprintf(stdout, pid_color, "%2s ", curr_tr->shortname);
1624 } else
1625 color_fprintf(stdout, color, " ");
1626 }
1627 }
1628
map_switch_event(struct perf_sched * sched,struct evsel * evsel,struct perf_sample * sample,struct machine * machine)1629 static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
1630 struct perf_sample *sample, struct machine *machine)
1631 {
1632 const u32 next_pid = evsel__intval(evsel, sample, "next_pid");
1633 const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid");
1634 struct thread *sched_in, *sched_out;
1635 struct thread_runtime *tr;
1636 int new_shortname;
1637 u64 timestamp0, timestamp = sample->time;
1638 s64 delta;
1639 struct perf_cpu this_cpu = {
1640 .cpu = sample->cpu,
1641 };
1642 int cpus_nr;
1643 int proceed;
1644 bool new_cpu = false;
1645 const char *color = PERF_COLOR_NORMAL;
1646 char stimestamp[32];
1647 const char *str;
1648 int ret = -1;
1649
1650 BUG_ON(this_cpu.cpu >= MAX_CPUS || this_cpu.cpu < 0);
1651
1652 if (this_cpu.cpu > sched->max_cpu.cpu)
1653 sched->max_cpu = this_cpu;
1654
1655 if (sched->map.comp) {
1656 cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS);
1657 if (!__test_and_set_bit(this_cpu.cpu, sched->map.comp_cpus_mask)) {
1658 sched->map.comp_cpus[cpus_nr++] = this_cpu;
1659 new_cpu = true;
1660 }
1661 } else
1662 cpus_nr = sched->max_cpu.cpu;
1663
1664 timestamp0 = sched->cpu_last_switched[this_cpu.cpu];
1665 sched->cpu_last_switched[this_cpu.cpu] = timestamp;
1666 if (timestamp0)
1667 delta = timestamp - timestamp0;
1668 else
1669 delta = 0;
1670
1671 if (delta < 0) {
1672 pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta);
1673 return -1;
1674 }
1675
1676 sched_in = map__findnew_thread(sched, machine, -1, next_pid);
1677 sched_out = map__findnew_thread(sched, machine, -1, prev_pid);
1678 if (sched_in == NULL || sched_out == NULL)
1679 goto out;
1680
1681 tr = thread__get_runtime(sched_in);
1682 if (tr == NULL)
1683 goto out;
1684
1685 thread__put(sched->curr_thread[this_cpu.cpu]);
1686 thread__put(sched->curr_out_thread[this_cpu.cpu]);
1687
1688 sched->curr_thread[this_cpu.cpu] = thread__get(sched_in);
1689 sched->curr_out_thread[this_cpu.cpu] = thread__get(sched_out);
1690
1691 ret = 0;
1692
1693 str = thread__comm_str(sched_in);
1694 new_shortname = 0;
1695 if (!tr->shortname[0]) {
1696 if (!strcmp(thread__comm_str(sched_in), "swapper")) {
1697 /*
1698 * Don't allocate a letter-number for swapper:0
1699 * as a shortname. Instead, we use '.' for it.
1700 */
1701 tr->shortname[0] = '.';
1702 tr->shortname[1] = ' ';
1703 } else if (!sched->map.task_name || sched_match_task(sched, str)) {
1704 tr->shortname[0] = sched->next_shortname1;
1705 tr->shortname[1] = sched->next_shortname2;
1706
1707 if (sched->next_shortname1 < 'Z') {
1708 sched->next_shortname1++;
1709 } else {
1710 sched->next_shortname1 = 'A';
1711 if (sched->next_shortname2 < '9')
1712 sched->next_shortname2++;
1713 else
1714 sched->next_shortname2 = '0';
1715 }
1716 } else {
1717 tr->shortname[0] = '-';
1718 tr->shortname[1] = ' ';
1719 }
1720 new_shortname = 1;
1721 }
1722
1723 if (sched->map.cpus && !perf_cpu_map__has(sched->map.cpus, this_cpu))
1724 goto out;
1725
1726 proceed = 0;
1727 str = thread__comm_str(sched_in);
1728 /*
1729 * Check which of sched_in and sched_out matches the passed --task-name
1730 * arguments and call the corresponding print_sched_map.
1731 */
1732 if (sched->map.task_name && !sched_match_task(sched, str)) {
1733 if (!sched_match_task(sched, thread__comm_str(sched_out)))
1734 goto out;
1735 else
1736 goto sched_out;
1737
1738 } else {
1739 str = thread__comm_str(sched_out);
1740 if (!(sched->map.task_name && !sched_match_task(sched, str)))
1741 proceed = 1;
1742 }
1743
1744 printf(" ");
1745
1746 print_sched_map(sched, this_cpu, cpus_nr, color, false);
1747
1748 timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp));
1749 color_fprintf(stdout, color, " %12s secs ", stimestamp);
1750 if (new_shortname || tr->comm_changed || (verbose > 0 && thread__tid(sched_in))) {
1751 const char *pid_color = color;
1752
1753 if (thread__has_color(sched_in))
1754 pid_color = COLOR_PIDS;
1755
1756 color_fprintf(stdout, pid_color, "%s => %s:%d",
1757 tr->shortname, thread__comm_str(sched_in), thread__tid(sched_in));
1758 tr->comm_changed = false;
1759 }
1760
1761 if (sched->map.comp && new_cpu)
1762 color_fprintf(stdout, color, " (CPU %d)", this_cpu.cpu);
1763
1764 if (proceed != 1) {
1765 color_fprintf(stdout, color, "\n");
1766 goto out;
1767 }
1768
1769 sched_out:
1770 if (sched->map.task_name) {
1771 tr = thread__get_runtime(sched->curr_out_thread[this_cpu.cpu]);
1772 if (strcmp(tr->shortname, "") == 0)
1773 goto out;
1774
1775 if (proceed == 1)
1776 color_fprintf(stdout, color, "\n");
1777
1778 printf(" ");
1779 print_sched_map(sched, this_cpu, cpus_nr, color, true);
1780 timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp));
1781 color_fprintf(stdout, color, " %12s secs ", stimestamp);
1782 }
1783
1784 color_fprintf(stdout, color, "\n");
1785
1786 out:
1787 thread__put(sched_out);
1788 thread__put(sched_in);
1789
1790 return ret;
1791 }
1792
process_sched_switch_event(const struct perf_tool * tool,struct evsel * evsel,struct perf_sample * sample,struct machine * machine)1793 static int process_sched_switch_event(const struct perf_tool *tool,
1794 struct evsel *evsel,
1795 struct perf_sample *sample,
1796 struct machine *machine)
1797 {
1798 struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
1799 int this_cpu = sample->cpu, err = 0;
1800 u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"),
1801 next_pid = evsel__intval(evsel, sample, "next_pid");
1802
1803 if (sched->curr_pid[this_cpu] != (u32)-1) {
1804 /*
1805 * Are we trying to switch away a PID that is
1806 * not current?
1807 */
1808 if (sched->curr_pid[this_cpu] != prev_pid)
1809 sched->nr_context_switch_bugs++;
1810 }
1811
1812 if (sched->tp_handler->switch_event)
1813 err = sched->tp_handler->switch_event(sched, evsel, sample, machine);
1814
1815 sched->curr_pid[this_cpu] = next_pid;
1816 return err;
1817 }
1818
process_sched_runtime_event(const struct perf_tool * tool,struct evsel * evsel,struct perf_sample * sample,struct machine * machine)1819 static int process_sched_runtime_event(const struct perf_tool *tool,
1820 struct evsel *evsel,
1821 struct perf_sample *sample,
1822 struct machine *machine)
1823 {
1824 struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
1825
1826 if (sched->tp_handler->runtime_event)
1827 return sched->tp_handler->runtime_event(sched, evsel, sample, machine);
1828
1829 return 0;
1830 }
1831
perf_sched__process_fork_event(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)1832 static int perf_sched__process_fork_event(const struct perf_tool *tool,
1833 union perf_event *event,
1834 struct perf_sample *sample,
1835 struct machine *machine)
1836 {
1837 struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
1838
1839 /* run the fork event through the perf machinery */
1840 perf_event__process_fork(tool, event, sample, machine);
1841
1842 /* and then run additional processing needed for this command */
1843 if (sched->tp_handler->fork_event)
1844 return sched->tp_handler->fork_event(sched, event, machine);
1845
1846 return 0;
1847 }
1848
process_sched_migrate_task_event(const struct perf_tool * tool,struct evsel * evsel,struct perf_sample * sample,struct machine * machine)1849 static int process_sched_migrate_task_event(const struct perf_tool *tool,
1850 struct evsel *evsel,
1851 struct perf_sample *sample,
1852 struct machine *machine)
1853 {
1854 struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
1855
1856 if (sched->tp_handler->migrate_task_event)
1857 return sched->tp_handler->migrate_task_event(sched, evsel, sample, machine);
1858
1859 return 0;
1860 }
1861
1862 typedef int (*tracepoint_handler)(const struct perf_tool *tool,
1863 struct evsel *evsel,
1864 struct perf_sample *sample,
1865 struct machine *machine);
1866
perf_sched__process_tracepoint_sample(const struct perf_tool * tool __maybe_unused,union perf_event * event __maybe_unused,struct perf_sample * sample,struct evsel * evsel,struct machine * machine)1867 static int perf_sched__process_tracepoint_sample(const struct perf_tool *tool __maybe_unused,
1868 union perf_event *event __maybe_unused,
1869 struct perf_sample *sample,
1870 struct evsel *evsel,
1871 struct machine *machine)
1872 {
1873 int err = 0;
1874
1875 if (evsel->handler != NULL) {
1876 tracepoint_handler f = evsel->handler;
1877 err = f(tool, evsel, sample, machine);
1878 }
1879
1880 return err;
1881 }
1882
perf_sched__process_comm(const struct perf_tool * tool __maybe_unused,union perf_event * event,struct perf_sample * sample,struct machine * machine)1883 static int perf_sched__process_comm(const struct perf_tool *tool __maybe_unused,
1884 union perf_event *event,
1885 struct perf_sample *sample,
1886 struct machine *machine)
1887 {
1888 struct thread *thread;
1889 struct thread_runtime *tr;
1890 int err;
1891
1892 err = perf_event__process_comm(tool, event, sample, machine);
1893 if (err)
1894 return err;
1895
1896 thread = machine__find_thread(machine, sample->pid, sample->tid);
1897 if (!thread) {
1898 pr_err("Internal error: can't find thread\n");
1899 return -1;
1900 }
1901
1902 tr = thread__get_runtime(thread);
1903 if (tr == NULL) {
1904 thread__put(thread);
1905 return -1;
1906 }
1907
1908 tr->comm_changed = true;
1909 thread__put(thread);
1910
1911 return 0;
1912 }
1913
perf_sched__read_events(struct perf_sched * sched)1914 static int perf_sched__read_events(struct perf_sched *sched)
1915 {
1916 struct evsel_str_handler handlers[] = {
1917 { "sched:sched_switch", process_sched_switch_event, },
1918 { "sched:sched_stat_runtime", process_sched_runtime_event, },
1919 { "sched:sched_wakeup", process_sched_wakeup_event, },
1920 { "sched:sched_waking", process_sched_wakeup_event, },
1921 { "sched:sched_wakeup_new", process_sched_wakeup_event, },
1922 { "sched:sched_migrate_task", process_sched_migrate_task_event, },
1923 };
1924 struct perf_session *session;
1925 struct perf_data data = {
1926 .path = input_name,
1927 .mode = PERF_DATA_MODE_READ,
1928 .force = sched->force,
1929 };
1930 int rc = -1;
1931
1932 session = perf_session__new(&data, &sched->tool);
1933 if (IS_ERR(session)) {
1934 pr_debug("Error creating perf session");
1935 return PTR_ERR(session);
1936 }
1937
1938 symbol__init(perf_session__env(session));
1939
1940 /* prefer sched_waking if it is captured */
1941 if (evlist__find_tracepoint_by_name(session->evlist, "sched:sched_waking"))
1942 handlers[2].handler = process_sched_wakeup_ignore;
1943
1944 if (perf_session__set_tracepoints_handlers(session, handlers))
1945 goto out_delete;
1946
1947 if (perf_session__has_traces(session, "record -R")) {
1948 int err = perf_session__process_events(session);
1949 if (err) {
1950 pr_err("Failed to process events, error %d", err);
1951 goto out_delete;
1952 }
1953
1954 sched->nr_events = session->evlist->stats.nr_events[0];
1955 sched->nr_lost_events = session->evlist->stats.total_lost;
1956 sched->nr_lost_chunks = session->evlist->stats.nr_events[PERF_RECORD_LOST];
1957 }
1958
1959 rc = 0;
1960 out_delete:
1961 perf_session__delete(session);
1962 return rc;
1963 }
1964
1965 /*
1966 * scheduling times are printed as msec.usec
1967 */
print_sched_time(unsigned long long nsecs,int width)1968 static inline void print_sched_time(unsigned long long nsecs, int width)
1969 {
1970 unsigned long msecs;
1971 unsigned long usecs;
1972
1973 msecs = nsecs / NSEC_PER_MSEC;
1974 nsecs -= msecs * NSEC_PER_MSEC;
1975 usecs = nsecs / NSEC_PER_USEC;
1976 printf("%*lu.%03lu ", width, msecs, usecs);
1977 }
1978
1979 /*
1980 * returns runtime data for event, allocating memory for it the
1981 * first time it is used.
1982 */
evsel__get_runtime(struct evsel * evsel)1983 static struct evsel_runtime *evsel__get_runtime(struct evsel *evsel)
1984 {
1985 struct evsel_runtime *r = evsel->priv;
1986
1987 if (r == NULL) {
1988 r = zalloc(sizeof(struct evsel_runtime));
1989 evsel->priv = r;
1990 }
1991
1992 return r;
1993 }
1994
1995 /*
1996 * save last time event was seen per cpu
1997 */
evsel__save_time(struct evsel * evsel,u64 timestamp,u32 cpu)1998 static void evsel__save_time(struct evsel *evsel, u64 timestamp, u32 cpu)
1999 {
2000 struct evsel_runtime *r = evsel__get_runtime(evsel);
2001
2002 if (r == NULL)
2003 return;
2004
2005 if ((cpu >= r->ncpu) || (r->last_time == NULL)) {
2006 int i, n = __roundup_pow_of_two(cpu+1);
2007 void *p = r->last_time;
2008
2009 p = realloc(r->last_time, n * sizeof(u64));
2010 if (!p)
2011 return;
2012
2013 r->last_time = p;
2014 for (i = r->ncpu; i < n; ++i)
2015 r->last_time[i] = (u64) 0;
2016
2017 r->ncpu = n;
2018 }
2019
2020 r->last_time[cpu] = timestamp;
2021 }
2022
2023 /* returns last time this event was seen on the given cpu */
evsel__get_time(struct evsel * evsel,u32 cpu)2024 static u64 evsel__get_time(struct evsel *evsel, u32 cpu)
2025 {
2026 struct evsel_runtime *r = evsel__get_runtime(evsel);
2027
2028 if ((r == NULL) || (r->last_time == NULL) || (cpu >= r->ncpu))
2029 return 0;
2030
2031 return r->last_time[cpu];
2032 }
2033
timehist__evsel_priv_destructor(void * priv)2034 static void timehist__evsel_priv_destructor(void *priv)
2035 {
2036 struct evsel_runtime *r = priv;
2037
2038 if (r) {
2039 free(r->last_time);
2040 free(r);
2041 }
2042 }
2043
2044 static int comm_width = 30;
2045
timehist_get_commstr(struct thread * thread)2046 static char *timehist_get_commstr(struct thread *thread)
2047 {
2048 static char str[32];
2049 const char *comm = thread__comm_str(thread);
2050 pid_t tid = thread__tid(thread);
2051 pid_t pid = thread__pid(thread);
2052 int n;
2053
2054 if (pid == 0)
2055 n = scnprintf(str, sizeof(str), "%s", comm);
2056
2057 else if (tid != pid)
2058 n = scnprintf(str, sizeof(str), "%s[%d/%d]", comm, tid, pid);
2059
2060 else
2061 n = scnprintf(str, sizeof(str), "%s[%d]", comm, tid);
2062
2063 if (n > comm_width)
2064 comm_width = n;
2065
2066 return str;
2067 }
2068
2069 /* prio field format: xxx or xxx->yyy */
2070 #define MAX_PRIO_STR_LEN 8
timehist_get_priostr(struct evsel * evsel,struct thread * thread,struct perf_sample * sample)2071 static char *timehist_get_priostr(struct evsel *evsel,
2072 struct thread *thread,
2073 struct perf_sample *sample)
2074 {
2075 static char prio_str[16];
2076 int prev_prio = (int)evsel__intval(evsel, sample, "prev_prio");
2077 struct thread_runtime *tr = thread__priv(thread);
2078
2079 if (tr->prio != prev_prio && tr->prio != -1)
2080 scnprintf(prio_str, sizeof(prio_str), "%d->%d", tr->prio, prev_prio);
2081 else
2082 scnprintf(prio_str, sizeof(prio_str), "%d", prev_prio);
2083
2084 return prio_str;
2085 }
2086
timehist_header(struct perf_sched * sched)2087 static void timehist_header(struct perf_sched *sched)
2088 {
2089 u32 ncpus = sched->max_cpu.cpu + 1;
2090 u32 i, j;
2091
2092 printf("%15s %6s ", "time", "cpu");
2093
2094 if (sched->show_cpu_visual) {
2095 printf(" ");
2096 for (i = 0, j = 0; i < ncpus; ++i) {
2097 printf("%x", j++);
2098 if (j > 15)
2099 j = 0;
2100 }
2101 printf(" ");
2102 }
2103
2104 printf(" %-*s", comm_width, "task name");
2105
2106 if (sched->show_prio)
2107 printf(" %-*s", MAX_PRIO_STR_LEN, "prio");
2108
2109 printf(" %9s %9s %9s", "wait time", "sch delay", "run time");
2110
2111 if (sched->pre_migrations)
2112 printf(" %9s", "pre-mig time");
2113
2114 if (sched->show_state)
2115 printf(" %s", "state");
2116
2117 printf("\n");
2118
2119 /*
2120 * units row
2121 */
2122 printf("%15s %-6s ", "", "");
2123
2124 if (sched->show_cpu_visual)
2125 printf(" %*s ", ncpus, "");
2126
2127 printf(" %-*s", comm_width, "[tid/pid]");
2128
2129 if (sched->show_prio)
2130 printf(" %-*s", MAX_PRIO_STR_LEN, "");
2131
2132 printf(" %9s %9s %9s", "(msec)", "(msec)", "(msec)");
2133
2134 if (sched->pre_migrations)
2135 printf(" %9s", "(msec)");
2136
2137 printf("\n");
2138
2139 /*
2140 * separator
2141 */
2142 printf("%.15s %.6s ", graph_dotted_line, graph_dotted_line);
2143
2144 if (sched->show_cpu_visual)
2145 printf(" %.*s ", ncpus, graph_dotted_line);
2146
2147 printf(" %.*s", comm_width, graph_dotted_line);
2148
2149 if (sched->show_prio)
2150 printf(" %.*s", MAX_PRIO_STR_LEN, graph_dotted_line);
2151
2152 printf(" %.9s %.9s %.9s", graph_dotted_line, graph_dotted_line, graph_dotted_line);
2153
2154 if (sched->pre_migrations)
2155 printf(" %.9s", graph_dotted_line);
2156
2157 if (sched->show_state)
2158 printf(" %.5s", graph_dotted_line);
2159
2160 printf("\n");
2161 }
2162
timehist_print_sample(struct perf_sched * sched,struct evsel * evsel,struct perf_sample * sample,struct addr_location * al,struct thread * thread,u64 t,const char state)2163 static void timehist_print_sample(struct perf_sched *sched,
2164 struct evsel *evsel,
2165 struct perf_sample *sample,
2166 struct addr_location *al,
2167 struct thread *thread,
2168 u64 t, const char state)
2169 {
2170 struct thread_runtime *tr = thread__priv(thread);
2171 const char *next_comm = evsel__strval(evsel, sample, "next_comm");
2172 const u32 next_pid = evsel__intval(evsel, sample, "next_pid");
2173 u32 max_cpus = sched->max_cpu.cpu + 1;
2174 char tstr[64];
2175 char nstr[30];
2176 u64 wait_time;
2177
2178 if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
2179 return;
2180
2181 timestamp__scnprintf_usec(t, tstr, sizeof(tstr));
2182 printf("%15s [%04d] ", tstr, sample->cpu);
2183
2184 if (sched->show_cpu_visual) {
2185 u32 i;
2186 char c;
2187
2188 printf(" ");
2189 for (i = 0; i < max_cpus; ++i) {
2190 /* flag idle times with 'i'; others are sched events */
2191 if (i == sample->cpu)
2192 c = (thread__tid(thread) == 0) ? 'i' : 's';
2193 else
2194 c = ' ';
2195 printf("%c", c);
2196 }
2197 printf(" ");
2198 }
2199
2200 if (!thread__comm_set(thread)) {
2201 const char *prev_comm = evsel__strval(evsel, sample, "prev_comm");
2202 thread__set_comm(thread, prev_comm, sample->time);
2203 }
2204
2205 printf(" %-*s ", comm_width, timehist_get_commstr(thread));
2206
2207 if (sched->show_prio)
2208 printf(" %-*s ", MAX_PRIO_STR_LEN, timehist_get_priostr(evsel, thread, sample));
2209
2210 wait_time = tr->dt_sleep + tr->dt_iowait + tr->dt_preempt;
2211 print_sched_time(wait_time, 6);
2212
2213 print_sched_time(tr->dt_delay, 6);
2214 print_sched_time(tr->dt_run, 6);
2215 if (sched->pre_migrations)
2216 print_sched_time(tr->dt_pre_mig, 6);
2217
2218 if (sched->show_state)
2219 printf(" %5c ", thread__tid(thread) == 0 ? 'I' : state);
2220
2221 if (sched->show_next) {
2222 snprintf(nstr, sizeof(nstr), "next: %s[%d]", next_comm, next_pid);
2223 printf(" %-*s", comm_width, nstr);
2224 }
2225
2226 if (sched->show_wakeups && !sched->show_next)
2227 printf(" %-*s", comm_width, "");
2228
2229 if (thread__tid(thread) == 0)
2230 goto out;
2231
2232 if (sched->show_callchain)
2233 printf(" ");
2234
2235 sample__fprintf_sym(sample, al, 0,
2236 EVSEL__PRINT_SYM | EVSEL__PRINT_ONELINE |
2237 EVSEL__PRINT_CALLCHAIN_ARROW |
2238 EVSEL__PRINT_SKIP_IGNORED,
2239 get_tls_callchain_cursor(), symbol_conf.bt_stop_list, stdout);
2240
2241 out:
2242 printf("\n");
2243 }
2244
2245 /*
2246 * Explanation of delta-time stats:
2247 *
2248 * t = time of current schedule out event
2249 * tprev = time of previous sched out event
2250 * also time of schedule-in event for current task
2251 * last_time = time of last sched change event for current task
2252 * (i.e, time process was last scheduled out)
2253 * ready_to_run = time of wakeup for current task
2254 * migrated = time of task migration to another CPU
2255 *
2256 * -----|-------------|-------------|-------------|-------------|-----
2257 * last ready migrated tprev t
2258 * time to run
2259 *
2260 * |---------------- dt_wait ----------------|
2261 * |--------- dt_delay ---------|-- dt_run --|
2262 * |- dt_pre_mig -|
2263 *
2264 * dt_run = run time of current task
2265 * dt_wait = time between last schedule out event for task and tprev
2266 * represents time spent off the cpu
2267 * dt_delay = time between wakeup and schedule-in of task
2268 * dt_pre_mig = time between wakeup and migration to another CPU
2269 */
2270
timehist_update_runtime_stats(struct thread_runtime * r,u64 t,u64 tprev)2271 static void timehist_update_runtime_stats(struct thread_runtime *r,
2272 u64 t, u64 tprev)
2273 {
2274 r->dt_delay = 0;
2275 r->dt_sleep = 0;
2276 r->dt_iowait = 0;
2277 r->dt_preempt = 0;
2278 r->dt_run = 0;
2279 r->dt_pre_mig = 0;
2280
2281 if (tprev) {
2282 r->dt_run = t - tprev;
2283 if (r->ready_to_run) {
2284 if (r->ready_to_run > tprev)
2285 pr_debug("time travel: wakeup time for task > previous sched_switch event\n");
2286 else
2287 r->dt_delay = tprev - r->ready_to_run;
2288
2289 if ((r->migrated > r->ready_to_run) && (r->migrated < tprev))
2290 r->dt_pre_mig = r->migrated - r->ready_to_run;
2291 }
2292
2293 if (r->last_time > tprev)
2294 pr_debug("time travel: last sched out time for task > previous sched_switch event\n");
2295 else if (r->last_time) {
2296 u64 dt_wait = tprev - r->last_time;
2297
2298 if (r->last_state == 'R')
2299 r->dt_preempt = dt_wait;
2300 else if (r->last_state == 'D')
2301 r->dt_iowait = dt_wait;
2302 else
2303 r->dt_sleep = dt_wait;
2304 }
2305 }
2306
2307 update_stats(&r->run_stats, r->dt_run);
2308
2309 r->total_run_time += r->dt_run;
2310 r->total_delay_time += r->dt_delay;
2311 r->total_sleep_time += r->dt_sleep;
2312 r->total_iowait_time += r->dt_iowait;
2313 r->total_preempt_time += r->dt_preempt;
2314 r->total_pre_mig_time += r->dt_pre_mig;
2315 }
2316
is_idle_sample(struct perf_sample * sample,struct evsel * evsel)2317 static bool is_idle_sample(struct perf_sample *sample,
2318 struct evsel *evsel)
2319 {
2320 /* pid 0 == swapper == idle task */
2321 if (evsel__name_is(evsel, "sched:sched_switch"))
2322 return evsel__intval(evsel, sample, "prev_pid") == 0;
2323
2324 return sample->pid == 0;
2325 }
2326
save_task_callchain(struct perf_sched * sched,struct perf_sample * sample,struct evsel * evsel,struct machine * machine)2327 static void save_task_callchain(struct perf_sched *sched,
2328 struct perf_sample *sample,
2329 struct evsel *evsel,
2330 struct machine *machine)
2331 {
2332 struct callchain_cursor *cursor;
2333 struct thread *thread;
2334
2335 /* want main thread for process - has maps */
2336 thread = machine__findnew_thread(machine, sample->pid, sample->pid);
2337 if (thread == NULL) {
2338 pr_debug("Failed to get thread for pid %d.\n", sample->pid);
2339 return;
2340 }
2341
2342 if (!sched->show_callchain || sample->callchain == NULL) {
2343 thread__put(thread);
2344 return;
2345 }
2346
2347 cursor = get_tls_callchain_cursor();
2348
2349 if (thread__resolve_callchain(thread, cursor, evsel, sample,
2350 NULL, NULL, sched->max_stack + 2) != 0) {
2351 if (verbose > 0)
2352 pr_err("Failed to resolve callchain. Skipping\n");
2353
2354 thread__put(thread);
2355 return;
2356 }
2357
2358 callchain_cursor_commit(cursor);
2359 thread__put(thread);
2360
2361 while (true) {
2362 struct callchain_cursor_node *node;
2363 struct symbol *sym;
2364
2365 node = callchain_cursor_current(cursor);
2366 if (node == NULL)
2367 break;
2368
2369 sym = node->ms.sym;
2370 if (sym) {
2371 if (!strcmp(sym->name, "schedule") ||
2372 !strcmp(sym->name, "__schedule") ||
2373 !strcmp(sym->name, "preempt_schedule"))
2374 sym->ignore = 1;
2375 }
2376
2377 callchain_cursor_advance(cursor);
2378 }
2379 }
2380
init_idle_thread(struct thread * thread)2381 static int init_idle_thread(struct thread *thread)
2382 {
2383 struct idle_thread_runtime *itr;
2384
2385 thread__set_comm(thread, idle_comm, 0);
2386
2387 itr = zalloc(sizeof(*itr));
2388 if (itr == NULL)
2389 return -ENOMEM;
2390
2391 init_prio(&itr->tr);
2392 init_stats(&itr->tr.run_stats);
2393 callchain_init(&itr->callchain);
2394 callchain_cursor_reset(&itr->cursor);
2395 thread__set_priv(thread, itr);
2396
2397 return 0;
2398 }
2399
2400 /*
2401 * Track idle stats per cpu by maintaining a local thread
2402 * struct for the idle task on each cpu.
2403 */
init_idle_threads(int ncpu)2404 static int init_idle_threads(int ncpu)
2405 {
2406 int i, ret;
2407
2408 idle_threads = zalloc(ncpu * sizeof(struct thread *));
2409 if (!idle_threads)
2410 return -ENOMEM;
2411
2412 idle_max_cpu = ncpu;
2413
2414 /* allocate the actual thread struct if needed */
2415 for (i = 0; i < ncpu; ++i) {
2416 idle_threads[i] = thread__new(0, 0);
2417 if (idle_threads[i] == NULL)
2418 return -ENOMEM;
2419
2420 ret = init_idle_thread(idle_threads[i]);
2421 if (ret < 0)
2422 return ret;
2423 }
2424
2425 return 0;
2426 }
2427
free_idle_threads(void)2428 static void free_idle_threads(void)
2429 {
2430 int i;
2431
2432 if (idle_threads == NULL)
2433 return;
2434
2435 for (i = 0; i < idle_max_cpu; ++i) {
2436 struct thread *idle = idle_threads[i];
2437
2438 if (idle) {
2439 struct idle_thread_runtime *itr;
2440
2441 itr = thread__priv(idle);
2442 if (itr)
2443 thread__put(itr->last_thread);
2444
2445 thread__delete(idle);
2446 }
2447 }
2448
2449 free(idle_threads);
2450 }
2451
get_idle_thread(int cpu)2452 static struct thread *get_idle_thread(int cpu)
2453 {
2454 /*
2455 * expand/allocate array of pointers to local thread
2456 * structs if needed
2457 */
2458 if ((cpu >= idle_max_cpu) || (idle_threads == NULL)) {
2459 int i, j = __roundup_pow_of_two(cpu+1);
2460 void *p;
2461
2462 p = realloc(idle_threads, j * sizeof(struct thread *));
2463 if (!p)
2464 return NULL;
2465
2466 idle_threads = (struct thread **) p;
2467 for (i = idle_max_cpu; i < j; ++i)
2468 idle_threads[i] = NULL;
2469
2470 idle_max_cpu = j;
2471 }
2472
2473 /* allocate a new thread struct if needed */
2474 if (idle_threads[cpu] == NULL) {
2475 idle_threads[cpu] = thread__new(0, 0);
2476
2477 if (idle_threads[cpu]) {
2478 if (init_idle_thread(idle_threads[cpu]) < 0)
2479 return NULL;
2480 }
2481 }
2482
2483 return thread__get(idle_threads[cpu]);
2484 }
2485
save_idle_callchain(struct perf_sched * sched,struct idle_thread_runtime * itr,struct perf_sample * sample)2486 static void save_idle_callchain(struct perf_sched *sched,
2487 struct idle_thread_runtime *itr,
2488 struct perf_sample *sample)
2489 {
2490 struct callchain_cursor *cursor;
2491
2492 if (!sched->show_callchain || sample->callchain == NULL)
2493 return;
2494
2495 cursor = get_tls_callchain_cursor();
2496 if (cursor == NULL)
2497 return;
2498
2499 callchain_cursor__copy(&itr->cursor, cursor);
2500 }
2501
timehist_get_thread(struct perf_sched * sched,struct perf_sample * sample,struct machine * machine,struct evsel * evsel)2502 static struct thread *timehist_get_thread(struct perf_sched *sched,
2503 struct perf_sample *sample,
2504 struct machine *machine,
2505 struct evsel *evsel)
2506 {
2507 struct thread *thread;
2508
2509 if (is_idle_sample(sample, evsel)) {
2510 thread = get_idle_thread(sample->cpu);
2511 if (thread == NULL)
2512 pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu);
2513
2514 } else {
2515 /* there were samples with tid 0 but non-zero pid */
2516 thread = machine__findnew_thread(machine, sample->pid,
2517 sample->tid ?: sample->pid);
2518 if (thread == NULL) {
2519 pr_debug("Failed to get thread for tid %d. skipping sample.\n",
2520 sample->tid);
2521 }
2522
2523 save_task_callchain(sched, sample, evsel, machine);
2524 if (sched->idle_hist) {
2525 struct thread *idle;
2526 struct idle_thread_runtime *itr;
2527
2528 idle = get_idle_thread(sample->cpu);
2529 if (idle == NULL) {
2530 pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu);
2531 return NULL;
2532 }
2533
2534 itr = thread__priv(idle);
2535 if (itr == NULL)
2536 return NULL;
2537
2538 thread__put(itr->last_thread);
2539 itr->last_thread = thread__get(thread);
2540
2541 /* copy task callchain when entering to idle */
2542 if (evsel__intval(evsel, sample, "next_pid") == 0)
2543 save_idle_callchain(sched, itr, sample);
2544 }
2545 }
2546
2547 return thread;
2548 }
2549
timehist_skip_sample(struct perf_sched * sched,struct thread * thread,struct evsel * evsel,struct perf_sample * sample)2550 static bool timehist_skip_sample(struct perf_sched *sched,
2551 struct thread *thread,
2552 struct evsel *evsel,
2553 struct perf_sample *sample)
2554 {
2555 bool rc = false;
2556 int prio = -1;
2557 struct thread_runtime *tr = NULL;
2558
2559 if (thread__is_filtered(thread)) {
2560 rc = true;
2561 sched->skipped_samples++;
2562 }
2563
2564 if (sched->prio_str) {
2565 /*
2566 * Because priority may be changed during task execution,
2567 * first read priority from prev sched_in event for current task.
2568 * If prev sched_in event is not saved, then read priority from
2569 * current task sched_out event.
2570 */
2571 tr = thread__get_runtime(thread);
2572 if (tr && tr->prio != -1)
2573 prio = tr->prio;
2574 else if (evsel__name_is(evsel, "sched:sched_switch"))
2575 prio = evsel__intval(evsel, sample, "prev_prio");
2576
2577 if (prio != -1 && !test_bit(prio, sched->prio_bitmap)) {
2578 rc = true;
2579 sched->skipped_samples++;
2580 }
2581 }
2582
2583 if (sched->idle_hist) {
2584 if (!evsel__name_is(evsel, "sched:sched_switch"))
2585 rc = true;
2586 else if (evsel__intval(evsel, sample, "prev_pid") != 0 &&
2587 evsel__intval(evsel, sample, "next_pid") != 0)
2588 rc = true;
2589 }
2590
2591 return rc;
2592 }
2593
timehist_print_wakeup_event(struct perf_sched * sched,struct evsel * evsel,struct perf_sample * sample,struct machine * machine,struct thread * awakened)2594 static void timehist_print_wakeup_event(struct perf_sched *sched,
2595 struct evsel *evsel,
2596 struct perf_sample *sample,
2597 struct machine *machine,
2598 struct thread *awakened)
2599 {
2600 struct thread *thread;
2601 char tstr[64];
2602
2603 thread = machine__findnew_thread(machine, sample->pid, sample->tid);
2604 if (thread == NULL)
2605 return;
2606
2607 /* show wakeup unless both awakee and awaker are filtered */
2608 if (timehist_skip_sample(sched, thread, evsel, sample) &&
2609 timehist_skip_sample(sched, awakened, evsel, sample)) {
2610 thread__put(thread);
2611 return;
2612 }
2613
2614 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr));
2615 printf("%15s [%04d] ", tstr, sample->cpu);
2616 if (sched->show_cpu_visual)
2617 printf(" %*s ", sched->max_cpu.cpu + 1, "");
2618
2619 printf(" %-*s ", comm_width, timehist_get_commstr(thread));
2620
2621 /* dt spacer */
2622 printf(" %9s %9s %9s ", "", "", "");
2623
2624 printf("awakened: %s", timehist_get_commstr(awakened));
2625
2626 printf("\n");
2627
2628 thread__put(thread);
2629 }
2630
timehist_sched_wakeup_ignore(const struct perf_tool * tool __maybe_unused,union perf_event * event __maybe_unused,struct evsel * evsel __maybe_unused,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)2631 static int timehist_sched_wakeup_ignore(const struct perf_tool *tool __maybe_unused,
2632 union perf_event *event __maybe_unused,
2633 struct evsel *evsel __maybe_unused,
2634 struct perf_sample *sample __maybe_unused,
2635 struct machine *machine __maybe_unused)
2636 {
2637 return 0;
2638 }
2639
timehist_sched_wakeup_event(const struct perf_tool * tool,union perf_event * event __maybe_unused,struct evsel * evsel,struct perf_sample * sample,struct machine * machine)2640 static int timehist_sched_wakeup_event(const struct perf_tool *tool,
2641 union perf_event *event __maybe_unused,
2642 struct evsel *evsel,
2643 struct perf_sample *sample,
2644 struct machine *machine)
2645 {
2646 struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
2647 struct thread *thread;
2648 struct thread_runtime *tr = NULL;
2649 /* want pid of awakened task not pid in sample */
2650 const u32 pid = evsel__intval(evsel, sample, "pid");
2651
2652 thread = machine__findnew_thread(machine, 0, pid);
2653 if (thread == NULL)
2654 return -1;
2655
2656 tr = thread__get_runtime(thread);
2657 if (tr == NULL) {
2658 thread__put(thread);
2659 return -1;
2660 }
2661
2662 if (tr->ready_to_run == 0)
2663 tr->ready_to_run = sample->time;
2664
2665 /* show wakeups if requested */
2666 if (sched->show_wakeups &&
2667 !perf_time__skip_sample(&sched->ptime, sample->time))
2668 timehist_print_wakeup_event(sched, evsel, sample, machine, thread);
2669
2670 thread__put(thread);
2671 return 0;
2672 }
2673
timehist_print_migration_event(struct perf_sched * sched,struct evsel * evsel,struct perf_sample * sample,struct machine * machine,struct thread * migrated)2674 static void timehist_print_migration_event(struct perf_sched *sched,
2675 struct evsel *evsel,
2676 struct perf_sample *sample,
2677 struct machine *machine,
2678 struct thread *migrated)
2679 {
2680 struct thread *thread;
2681 char tstr[64];
2682 u32 max_cpus;
2683 u32 ocpu, dcpu;
2684
2685 if (sched->summary_only)
2686 return;
2687
2688 max_cpus = sched->max_cpu.cpu + 1;
2689 ocpu = evsel__intval(evsel, sample, "orig_cpu");
2690 dcpu = evsel__intval(evsel, sample, "dest_cpu");
2691
2692 thread = machine__findnew_thread(machine, sample->pid, sample->tid);
2693 if (thread == NULL)
2694 return;
2695
2696 if (timehist_skip_sample(sched, thread, evsel, sample) &&
2697 timehist_skip_sample(sched, migrated, evsel, sample)) {
2698 thread__put(thread);
2699 return;
2700 }
2701
2702 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr));
2703 printf("%15s [%04d] ", tstr, sample->cpu);
2704
2705 if (sched->show_cpu_visual) {
2706 u32 i;
2707 char c;
2708
2709 printf(" ");
2710 for (i = 0; i < max_cpus; ++i) {
2711 c = (i == sample->cpu) ? 'm' : ' ';
2712 printf("%c", c);
2713 }
2714 printf(" ");
2715 }
2716
2717 printf(" %-*s ", comm_width, timehist_get_commstr(thread));
2718
2719 /* dt spacer */
2720 printf(" %9s %9s %9s ", "", "", "");
2721
2722 printf("migrated: %s", timehist_get_commstr(migrated));
2723 printf(" cpu %d => %d", ocpu, dcpu);
2724
2725 printf("\n");
2726 thread__put(thread);
2727 }
2728
timehist_migrate_task_event(const struct perf_tool * tool,union perf_event * event __maybe_unused,struct evsel * evsel,struct perf_sample * sample,struct machine * machine)2729 static int timehist_migrate_task_event(const struct perf_tool *tool,
2730 union perf_event *event __maybe_unused,
2731 struct evsel *evsel,
2732 struct perf_sample *sample,
2733 struct machine *machine)
2734 {
2735 struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
2736 struct thread *thread;
2737 struct thread_runtime *tr = NULL;
2738 /* want pid of migrated task not pid in sample */
2739 const u32 pid = evsel__intval(evsel, sample, "pid");
2740
2741 thread = machine__findnew_thread(machine, 0, pid);
2742 if (thread == NULL)
2743 return -1;
2744
2745 tr = thread__get_runtime(thread);
2746 if (tr == NULL) {
2747 thread__put(thread);
2748 return -1;
2749 }
2750
2751 tr->migrations++;
2752 tr->migrated = sample->time;
2753
2754 /* show migrations if requested */
2755 if (sched->show_migrations) {
2756 timehist_print_migration_event(sched, evsel, sample,
2757 machine, thread);
2758 }
2759 thread__put(thread);
2760
2761 return 0;
2762 }
2763
timehist_update_task_prio(struct evsel * evsel,struct perf_sample * sample,struct machine * machine)2764 static void timehist_update_task_prio(struct evsel *evsel,
2765 struct perf_sample *sample,
2766 struct machine *machine)
2767 {
2768 struct thread *thread;
2769 struct thread_runtime *tr = NULL;
2770 const u32 next_pid = evsel__intval(evsel, sample, "next_pid");
2771 const u32 next_prio = evsel__intval(evsel, sample, "next_prio");
2772
2773 if (next_pid == 0)
2774 thread = get_idle_thread(sample->cpu);
2775 else
2776 thread = machine__findnew_thread(machine, -1, next_pid);
2777
2778 if (thread == NULL)
2779 return;
2780
2781 tr = thread__get_runtime(thread);
2782 if (tr != NULL)
2783 tr->prio = next_prio;
2784
2785 thread__put(thread);
2786 }
2787
timehist_sched_change_event(const struct perf_tool * tool,union perf_event * event,struct evsel * evsel,struct perf_sample * sample,struct machine * machine)2788 static int timehist_sched_change_event(const struct perf_tool *tool,
2789 union perf_event *event,
2790 struct evsel *evsel,
2791 struct perf_sample *sample,
2792 struct machine *machine)
2793 {
2794 struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
2795 struct perf_time_interval *ptime = &sched->ptime;
2796 struct addr_location al;
2797 struct thread *thread = NULL;
2798 struct thread_runtime *tr = NULL;
2799 u64 tprev, t = sample->time;
2800 int rc = 0;
2801 const char state = evsel__taskstate(evsel, sample, "prev_state");
2802
2803 addr_location__init(&al);
2804 if (machine__resolve(machine, &al, sample) < 0) {
2805 pr_err("problem processing %d event. skipping it\n",
2806 event->header.type);
2807 rc = -1;
2808 goto out;
2809 }
2810
2811 if (sched->show_prio || sched->prio_str)
2812 timehist_update_task_prio(evsel, sample, machine);
2813
2814 thread = timehist_get_thread(sched, sample, machine, evsel);
2815 if (thread == NULL) {
2816 rc = -1;
2817 goto out;
2818 }
2819
2820 if (timehist_skip_sample(sched, thread, evsel, sample))
2821 goto out;
2822
2823 tr = thread__get_runtime(thread);
2824 if (tr == NULL) {
2825 rc = -1;
2826 goto out;
2827 }
2828
2829 tprev = evsel__get_time(evsel, sample->cpu);
2830
2831 /*
2832 * If start time given:
2833 * - sample time is under window user cares about - skip sample
2834 * - tprev is under window user cares about - reset to start of window
2835 */
2836 if (ptime->start && ptime->start > t)
2837 goto out;
2838
2839 if (tprev && ptime->start > tprev)
2840 tprev = ptime->start;
2841
2842 /*
2843 * If end time given:
2844 * - previous sched event is out of window - we are done
2845 * - sample time is beyond window user cares about - reset it
2846 * to close out stats for time window interest
2847 * - If tprev is 0, that is, sched_in event for current task is
2848 * not recorded, cannot determine whether sched_in event is
2849 * within time window interest - ignore it
2850 */
2851 if (ptime->end) {
2852 if (!tprev || tprev > ptime->end)
2853 goto out;
2854
2855 if (t > ptime->end)
2856 t = ptime->end;
2857 }
2858
2859 if (!sched->idle_hist || thread__tid(thread) == 0) {
2860 if (!cpu_list || test_bit(sample->cpu, cpu_bitmap))
2861 timehist_update_runtime_stats(tr, t, tprev);
2862
2863 if (sched->idle_hist) {
2864 struct idle_thread_runtime *itr = (void *)tr;
2865 struct thread_runtime *last_tr;
2866
2867 if (itr->last_thread == NULL)
2868 goto out;
2869
2870 /* add current idle time as last thread's runtime */
2871 last_tr = thread__get_runtime(itr->last_thread);
2872 if (last_tr == NULL)
2873 goto out;
2874
2875 timehist_update_runtime_stats(last_tr, t, tprev);
2876 /*
2877 * remove delta time of last thread as it's not updated
2878 * and otherwise it will show an invalid value next
2879 * time. we only care total run time and run stat.
2880 */
2881 last_tr->dt_run = 0;
2882 last_tr->dt_delay = 0;
2883 last_tr->dt_sleep = 0;
2884 last_tr->dt_iowait = 0;
2885 last_tr->dt_preempt = 0;
2886
2887 if (itr->cursor.nr)
2888 callchain_append(&itr->callchain, &itr->cursor, t - tprev);
2889
2890 itr->last_thread = NULL;
2891 }
2892
2893 if (!sched->summary_only)
2894 timehist_print_sample(sched, evsel, sample, &al, thread, t, state);
2895 }
2896
2897 out:
2898 if (sched->hist_time.start == 0 && t >= ptime->start)
2899 sched->hist_time.start = t;
2900 if (ptime->end == 0 || t <= ptime->end)
2901 sched->hist_time.end = t;
2902
2903 if (tr) {
2904 /* time of this sched_switch event becomes last time task seen */
2905 tr->last_time = sample->time;
2906
2907 /* last state is used to determine where to account wait time */
2908 tr->last_state = state;
2909
2910 /* sched out event for task so reset ready to run time and migrated time */
2911 if (state == 'R')
2912 tr->ready_to_run = t;
2913 else
2914 tr->ready_to_run = 0;
2915
2916 tr->migrated = 0;
2917 }
2918
2919 evsel__save_time(evsel, sample->time, sample->cpu);
2920
2921 thread__put(thread);
2922 addr_location__exit(&al);
2923 return rc;
2924 }
2925
timehist_sched_switch_event(const struct perf_tool * tool,union perf_event * event,struct evsel * evsel,struct perf_sample * sample,struct machine * machine __maybe_unused)2926 static int timehist_sched_switch_event(const struct perf_tool *tool,
2927 union perf_event *event,
2928 struct evsel *evsel,
2929 struct perf_sample *sample,
2930 struct machine *machine __maybe_unused)
2931 {
2932 return timehist_sched_change_event(tool, event, evsel, sample, machine);
2933 }
2934
process_lost(const struct perf_tool * tool __maybe_unused,union perf_event * event,struct perf_sample * sample,struct machine * machine __maybe_unused)2935 static int process_lost(const struct perf_tool *tool __maybe_unused,
2936 union perf_event *event,
2937 struct perf_sample *sample,
2938 struct machine *machine __maybe_unused)
2939 {
2940 char tstr[64];
2941
2942 timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr));
2943 printf("%15s ", tstr);
2944 printf("lost %" PRI_lu64 " events on cpu %d\n", event->lost.lost, sample->cpu);
2945
2946 return 0;
2947 }
2948
2949
print_thread_runtime(struct thread * t,struct thread_runtime * r)2950 static void print_thread_runtime(struct thread *t,
2951 struct thread_runtime *r)
2952 {
2953 double mean = avg_stats(&r->run_stats);
2954 float stddev;
2955
2956 printf("%*s %5d %9" PRIu64 " ",
2957 comm_width, timehist_get_commstr(t), thread__ppid(t),
2958 (u64) r->run_stats.n);
2959
2960 print_sched_time(r->total_run_time, 8);
2961 stddev = rel_stddev_stats(stddev_stats(&r->run_stats), mean);
2962 print_sched_time(r->run_stats.min, 6);
2963 printf(" ");
2964 print_sched_time((u64) mean, 6);
2965 printf(" ");
2966 print_sched_time(r->run_stats.max, 6);
2967 printf(" ");
2968 printf("%5.2f", stddev);
2969 printf(" %5" PRIu64, r->migrations);
2970 printf("\n");
2971 }
2972
print_thread_waittime(struct thread * t,struct thread_runtime * r)2973 static void print_thread_waittime(struct thread *t,
2974 struct thread_runtime *r)
2975 {
2976 printf("%*s %5d %9" PRIu64 " ",
2977 comm_width, timehist_get_commstr(t), thread__ppid(t),
2978 (u64) r->run_stats.n);
2979
2980 print_sched_time(r->total_run_time, 8);
2981 print_sched_time(r->total_sleep_time, 6);
2982 printf(" ");
2983 print_sched_time(r->total_iowait_time, 6);
2984 printf(" ");
2985 print_sched_time(r->total_preempt_time, 6);
2986 printf(" ");
2987 print_sched_time(r->total_delay_time, 6);
2988 printf("\n");
2989 }
2990
2991 struct total_run_stats {
2992 struct perf_sched *sched;
2993 u64 sched_count;
2994 u64 task_count;
2995 u64 total_run_time;
2996 };
2997
show_thread_runtime(struct thread * t,void * priv)2998 static int show_thread_runtime(struct thread *t, void *priv)
2999 {
3000 struct total_run_stats *stats = priv;
3001 struct thread_runtime *r;
3002
3003 if (thread__is_filtered(t))
3004 return 0;
3005
3006 r = thread__priv(t);
3007 if (r && r->run_stats.n) {
3008 stats->task_count++;
3009 stats->sched_count += r->run_stats.n;
3010 stats->total_run_time += r->total_run_time;
3011
3012 if (stats->sched->show_state)
3013 print_thread_waittime(t, r);
3014 else
3015 print_thread_runtime(t, r);
3016 }
3017
3018 return 0;
3019 }
3020
callchain__fprintf_folded(FILE * fp,struct callchain_node * node)3021 static size_t callchain__fprintf_folded(FILE *fp, struct callchain_node *node)
3022 {
3023 const char *sep = " <- ";
3024 struct callchain_list *chain;
3025 size_t ret = 0;
3026 char bf[1024];
3027 bool first;
3028
3029 if (node == NULL)
3030 return 0;
3031
3032 ret = callchain__fprintf_folded(fp, node->parent);
3033 first = (ret == 0);
3034
3035 list_for_each_entry(chain, &node->val, list) {
3036 if (chain->ip >= PERF_CONTEXT_MAX)
3037 continue;
3038 if (chain->ms.sym && chain->ms.sym->ignore)
3039 continue;
3040 ret += fprintf(fp, "%s%s", first ? "" : sep,
3041 callchain_list__sym_name(chain, bf, sizeof(bf),
3042 false));
3043 first = false;
3044 }
3045
3046 return ret;
3047 }
3048
timehist_print_idlehist_callchain(struct rb_root_cached * root)3049 static size_t timehist_print_idlehist_callchain(struct rb_root_cached *root)
3050 {
3051 size_t ret = 0;
3052 FILE *fp = stdout;
3053 struct callchain_node *chain;
3054 struct rb_node *rb_node = rb_first_cached(root);
3055
3056 printf(" %16s %8s %s\n", "Idle time (msec)", "Count", "Callchains");
3057 printf(" %.16s %.8s %.50s\n", graph_dotted_line, graph_dotted_line,
3058 graph_dotted_line);
3059
3060 while (rb_node) {
3061 chain = rb_entry(rb_node, struct callchain_node, rb_node);
3062 rb_node = rb_next(rb_node);
3063
3064 ret += fprintf(fp, " ");
3065 print_sched_time(chain->hit, 12);
3066 ret += 16; /* print_sched_time returns 2nd arg + 4 */
3067 ret += fprintf(fp, " %8d ", chain->count);
3068 ret += callchain__fprintf_folded(fp, chain);
3069 ret += fprintf(fp, "\n");
3070 }
3071
3072 return ret;
3073 }
3074
timehist_print_summary(struct perf_sched * sched,struct perf_session * session)3075 static void timehist_print_summary(struct perf_sched *sched,
3076 struct perf_session *session)
3077 {
3078 struct machine *m = &session->machines.host;
3079 struct total_run_stats totals;
3080 u64 task_count;
3081 struct thread *t;
3082 struct thread_runtime *r;
3083 int i;
3084 u64 hist_time = sched->hist_time.end - sched->hist_time.start;
3085
3086 memset(&totals, 0, sizeof(totals));
3087 totals.sched = sched;
3088
3089 if (sched->idle_hist) {
3090 printf("\nIdle-time summary\n");
3091 printf("%*s parent sched-out ", comm_width, "comm");
3092 printf(" idle-time min-idle avg-idle max-idle stddev migrations\n");
3093 } else if (sched->show_state) {
3094 printf("\nWait-time summary\n");
3095 printf("%*s parent sched-in ", comm_width, "comm");
3096 printf(" run-time sleep iowait preempt delay\n");
3097 } else {
3098 printf("\nRuntime summary\n");
3099 printf("%*s parent sched-in ", comm_width, "comm");
3100 printf(" run-time min-run avg-run max-run stddev migrations\n");
3101 }
3102 printf("%*s (count) ", comm_width, "");
3103 printf(" (msec) (msec) (msec) (msec) %s\n",
3104 sched->show_state ? "(msec)" : "%");
3105 printf("%.117s\n", graph_dotted_line);
3106
3107 machine__for_each_thread(m, show_thread_runtime, &totals);
3108 task_count = totals.task_count;
3109 if (!task_count)
3110 printf("<no still running tasks>\n");
3111
3112 /* CPU idle stats not tracked when samples were skipped */
3113 if (sched->skipped_samples && !sched->idle_hist)
3114 return;
3115
3116 printf("\nIdle stats:\n");
3117 for (i = 0; i < idle_max_cpu; ++i) {
3118 if (cpu_list && !test_bit(i, cpu_bitmap))
3119 continue;
3120
3121 t = idle_threads[i];
3122 if (!t)
3123 continue;
3124
3125 r = thread__priv(t);
3126 if (r && r->run_stats.n) {
3127 totals.sched_count += r->run_stats.n;
3128 printf(" CPU %2d idle for ", i);
3129 print_sched_time(r->total_run_time, 6);
3130 printf(" msec (%6.2f%%)\n", 100.0 * r->total_run_time / hist_time);
3131 } else
3132 printf(" CPU %2d idle entire time window\n", i);
3133 }
3134
3135 if (sched->idle_hist && sched->show_callchain) {
3136 callchain_param.mode = CHAIN_FOLDED;
3137 callchain_param.value = CCVAL_PERIOD;
3138
3139 callchain_register_param(&callchain_param);
3140
3141 printf("\nIdle stats by callchain:\n");
3142 for (i = 0; i < idle_max_cpu; ++i) {
3143 struct idle_thread_runtime *itr;
3144
3145 t = idle_threads[i];
3146 if (!t)
3147 continue;
3148
3149 itr = thread__priv(t);
3150 if (itr == NULL)
3151 continue;
3152
3153 callchain_param.sort(&itr->sorted_root.rb_root, &itr->callchain,
3154 0, &callchain_param);
3155
3156 printf(" CPU %2d:", i);
3157 print_sched_time(itr->tr.total_run_time, 6);
3158 printf(" msec\n");
3159 timehist_print_idlehist_callchain(&itr->sorted_root);
3160 printf("\n");
3161 }
3162 }
3163
3164 printf("\n"
3165 " Total number of unique tasks: %" PRIu64 "\n"
3166 "Total number of context switches: %" PRIu64 "\n",
3167 totals.task_count, totals.sched_count);
3168
3169 printf(" Total run time (msec): ");
3170 print_sched_time(totals.total_run_time, 2);
3171 printf("\n");
3172
3173 printf(" Total scheduling time (msec): ");
3174 print_sched_time(hist_time, 2);
3175 printf(" (x %d)\n", sched->max_cpu.cpu);
3176 }
3177
3178 typedef int (*sched_handler)(const struct perf_tool *tool,
3179 union perf_event *event,
3180 struct evsel *evsel,
3181 struct perf_sample *sample,
3182 struct machine *machine);
3183
perf_timehist__process_sample(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct evsel * evsel,struct machine * machine)3184 static int perf_timehist__process_sample(const struct perf_tool *tool,
3185 union perf_event *event,
3186 struct perf_sample *sample,
3187 struct evsel *evsel,
3188 struct machine *machine)
3189 {
3190 struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
3191 int err = 0;
3192 struct perf_cpu this_cpu = {
3193 .cpu = sample->cpu,
3194 };
3195
3196 if (this_cpu.cpu > sched->max_cpu.cpu)
3197 sched->max_cpu = this_cpu;
3198
3199 if (evsel->handler != NULL) {
3200 sched_handler f = evsel->handler;
3201
3202 err = f(tool, event, evsel, sample, machine);
3203 }
3204
3205 return err;
3206 }
3207
timehist_check_attr(struct perf_sched * sched,struct evlist * evlist)3208 static int timehist_check_attr(struct perf_sched *sched,
3209 struct evlist *evlist)
3210 {
3211 struct evsel *evsel;
3212 struct evsel_runtime *er;
3213
3214 list_for_each_entry(evsel, &evlist->core.entries, core.node) {
3215 er = evsel__get_runtime(evsel);
3216 if (er == NULL) {
3217 pr_err("Failed to allocate memory for evsel runtime data\n");
3218 return -1;
3219 }
3220
3221 /* only need to save callchain related to sched_switch event */
3222 if (sched->show_callchain &&
3223 evsel__name_is(evsel, "sched:sched_switch") &&
3224 !evsel__has_callchain(evsel)) {
3225 pr_info("Samples of sched_switch event do not have callchains.\n");
3226 sched->show_callchain = 0;
3227 symbol_conf.use_callchain = 0;
3228 }
3229 }
3230
3231 return 0;
3232 }
3233
timehist_parse_prio_str(struct perf_sched * sched)3234 static int timehist_parse_prio_str(struct perf_sched *sched)
3235 {
3236 char *p;
3237 unsigned long start_prio, end_prio;
3238 const char *str = sched->prio_str;
3239
3240 if (!str)
3241 return 0;
3242
3243 while (isdigit(*str)) {
3244 p = NULL;
3245 start_prio = strtoul(str, &p, 0);
3246 if (start_prio >= MAX_PRIO || (*p != '\0' && *p != ',' && *p != '-'))
3247 return -1;
3248
3249 if (*p == '-') {
3250 str = ++p;
3251 p = NULL;
3252 end_prio = strtoul(str, &p, 0);
3253
3254 if (end_prio >= MAX_PRIO || (*p != '\0' && *p != ','))
3255 return -1;
3256
3257 if (end_prio < start_prio)
3258 return -1;
3259 } else {
3260 end_prio = start_prio;
3261 }
3262
3263 for (; start_prio <= end_prio; start_prio++)
3264 __set_bit(start_prio, sched->prio_bitmap);
3265
3266 if (*p)
3267 ++p;
3268
3269 str = p;
3270 }
3271
3272 return 0;
3273 }
3274
perf_sched__timehist(struct perf_sched * sched)3275 static int perf_sched__timehist(struct perf_sched *sched)
3276 {
3277 struct evsel_str_handler handlers[] = {
3278 { "sched:sched_switch", timehist_sched_switch_event, },
3279 { "sched:sched_wakeup", timehist_sched_wakeup_event, },
3280 { "sched:sched_waking", timehist_sched_wakeup_event, },
3281 { "sched:sched_wakeup_new", timehist_sched_wakeup_event, },
3282 };
3283 const struct evsel_str_handler migrate_handlers[] = {
3284 { "sched:sched_migrate_task", timehist_migrate_task_event, },
3285 };
3286 struct perf_data data = {
3287 .path = input_name,
3288 .mode = PERF_DATA_MODE_READ,
3289 .force = sched->force,
3290 };
3291
3292 struct perf_session *session;
3293 struct perf_env *env;
3294 struct evlist *evlist;
3295 int err = -1;
3296
3297 /*
3298 * event handlers for timehist option
3299 */
3300 sched->tool.sample = perf_timehist__process_sample;
3301 sched->tool.mmap = perf_event__process_mmap;
3302 sched->tool.comm = perf_event__process_comm;
3303 sched->tool.exit = perf_event__process_exit;
3304 sched->tool.fork = perf_event__process_fork;
3305 sched->tool.lost = process_lost;
3306 sched->tool.attr = perf_event__process_attr;
3307 sched->tool.tracing_data = perf_event__process_tracing_data;
3308 sched->tool.build_id = perf_event__process_build_id;
3309
3310 sched->tool.ordering_requires_timestamps = true;
3311
3312 symbol_conf.use_callchain = sched->show_callchain;
3313
3314 session = perf_session__new(&data, &sched->tool);
3315 if (IS_ERR(session))
3316 return PTR_ERR(session);
3317
3318 env = perf_session__env(session);
3319 if (cpu_list) {
3320 err = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap);
3321 if (err < 0)
3322 goto out;
3323 }
3324
3325 evlist = session->evlist;
3326
3327 symbol__init(env);
3328
3329 if (perf_time__parse_str(&sched->ptime, sched->time_str) != 0) {
3330 pr_err("Invalid time string\n");
3331 err = -EINVAL;
3332 goto out;
3333 }
3334
3335 if (timehist_check_attr(sched, evlist) != 0)
3336 goto out;
3337
3338 if (timehist_parse_prio_str(sched) != 0) {
3339 pr_err("Invalid prio string\n");
3340 goto out;
3341 }
3342
3343 setup_pager();
3344
3345 evsel__set_priv_destructor(timehist__evsel_priv_destructor);
3346
3347 /* prefer sched_waking if it is captured */
3348 if (evlist__find_tracepoint_by_name(session->evlist, "sched:sched_waking"))
3349 handlers[1].handler = timehist_sched_wakeup_ignore;
3350
3351 /* setup per-evsel handlers */
3352 if (perf_session__set_tracepoints_handlers(session, handlers))
3353 goto out;
3354
3355 /* sched_switch event at a minimum needs to exist */
3356 if (!evlist__find_tracepoint_by_name(session->evlist, "sched:sched_switch")) {
3357 pr_err("No sched_switch events found. Have you run 'perf sched record'?\n");
3358 goto out;
3359 }
3360
3361 if ((sched->show_migrations || sched->pre_migrations) &&
3362 perf_session__set_tracepoints_handlers(session, migrate_handlers))
3363 goto out;
3364
3365 /* pre-allocate struct for per-CPU idle stats */
3366 sched->max_cpu.cpu = env->nr_cpus_online;
3367 if (sched->max_cpu.cpu == 0)
3368 sched->max_cpu.cpu = 4;
3369 if (init_idle_threads(sched->max_cpu.cpu))
3370 goto out;
3371
3372 /* summary_only implies summary option, but don't overwrite summary if set */
3373 if (sched->summary_only)
3374 sched->summary = sched->summary_only;
3375
3376 if (!sched->summary_only)
3377 timehist_header(sched);
3378
3379 err = perf_session__process_events(session);
3380 if (err) {
3381 pr_err("Failed to process events, error %d", err);
3382 goto out;
3383 }
3384
3385 sched->nr_events = evlist->stats.nr_events[0];
3386 sched->nr_lost_events = evlist->stats.total_lost;
3387 sched->nr_lost_chunks = evlist->stats.nr_events[PERF_RECORD_LOST];
3388
3389 if (sched->summary)
3390 timehist_print_summary(sched, session);
3391
3392 out:
3393 free_idle_threads();
3394 perf_session__delete(session);
3395
3396 return err;
3397 }
3398
3399
print_bad_events(struct perf_sched * sched)3400 static void print_bad_events(struct perf_sched *sched)
3401 {
3402 if (sched->nr_unordered_timestamps && sched->nr_timestamps) {
3403 printf(" INFO: %.3f%% unordered timestamps (%ld out of %ld)\n",
3404 (double)sched->nr_unordered_timestamps/(double)sched->nr_timestamps*100.0,
3405 sched->nr_unordered_timestamps, sched->nr_timestamps);
3406 }
3407 if (sched->nr_lost_events && sched->nr_events) {
3408 printf(" INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n",
3409 (double)sched->nr_lost_events/(double)sched->nr_events * 100.0,
3410 sched->nr_lost_events, sched->nr_events, sched->nr_lost_chunks);
3411 }
3412 if (sched->nr_context_switch_bugs && sched->nr_timestamps) {
3413 printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)",
3414 (double)sched->nr_context_switch_bugs/(double)sched->nr_timestamps*100.0,
3415 sched->nr_context_switch_bugs, sched->nr_timestamps);
3416 if (sched->nr_lost_events)
3417 printf(" (due to lost events?)");
3418 printf("\n");
3419 }
3420 }
3421
__merge_work_atoms(struct rb_root_cached * root,struct work_atoms * data)3422 static void __merge_work_atoms(struct rb_root_cached *root, struct work_atoms *data)
3423 {
3424 struct rb_node **new = &(root->rb_root.rb_node), *parent = NULL;
3425 struct work_atoms *this;
3426 const char *comm = thread__comm_str(data->thread), *this_comm;
3427 bool leftmost = true;
3428
3429 while (*new) {
3430 int cmp;
3431
3432 this = container_of(*new, struct work_atoms, node);
3433 parent = *new;
3434
3435 this_comm = thread__comm_str(this->thread);
3436 cmp = strcmp(comm, this_comm);
3437 if (cmp > 0) {
3438 new = &((*new)->rb_left);
3439 } else if (cmp < 0) {
3440 new = &((*new)->rb_right);
3441 leftmost = false;
3442 } else {
3443 this->num_merged++;
3444 this->total_runtime += data->total_runtime;
3445 this->nb_atoms += data->nb_atoms;
3446 this->total_lat += data->total_lat;
3447 list_splice_init(&data->work_list, &this->work_list);
3448 if (this->max_lat < data->max_lat) {
3449 this->max_lat = data->max_lat;
3450 this->max_lat_start = data->max_lat_start;
3451 this->max_lat_end = data->max_lat_end;
3452 }
3453 free_work_atoms(data);
3454 return;
3455 }
3456 }
3457
3458 data->num_merged++;
3459 rb_link_node(&data->node, parent, new);
3460 rb_insert_color_cached(&data->node, root, leftmost);
3461 }
3462
perf_sched__merge_lat(struct perf_sched * sched)3463 static void perf_sched__merge_lat(struct perf_sched *sched)
3464 {
3465 struct work_atoms *data;
3466 struct rb_node *node;
3467
3468 if (sched->skip_merge)
3469 return;
3470
3471 while ((node = rb_first_cached(&sched->atom_root))) {
3472 rb_erase_cached(node, &sched->atom_root);
3473 data = rb_entry(node, struct work_atoms, node);
3474 __merge_work_atoms(&sched->merged_atom_root, data);
3475 }
3476 }
3477
setup_cpus_switch_event(struct perf_sched * sched)3478 static int setup_cpus_switch_event(struct perf_sched *sched)
3479 {
3480 unsigned int i;
3481
3482 sched->cpu_last_switched = calloc(MAX_CPUS, sizeof(*(sched->cpu_last_switched)));
3483 if (!sched->cpu_last_switched)
3484 return -1;
3485
3486 sched->curr_pid = malloc(MAX_CPUS * sizeof(*(sched->curr_pid)));
3487 if (!sched->curr_pid) {
3488 zfree(&sched->cpu_last_switched);
3489 return -1;
3490 }
3491
3492 for (i = 0; i < MAX_CPUS; i++)
3493 sched->curr_pid[i] = -1;
3494
3495 return 0;
3496 }
3497
free_cpus_switch_event(struct perf_sched * sched)3498 static void free_cpus_switch_event(struct perf_sched *sched)
3499 {
3500 zfree(&sched->curr_pid);
3501 zfree(&sched->cpu_last_switched);
3502 }
3503
perf_sched__lat(struct perf_sched * sched)3504 static int perf_sched__lat(struct perf_sched *sched)
3505 {
3506 int rc = -1;
3507 struct rb_node *next;
3508
3509 setup_pager();
3510
3511 if (setup_cpus_switch_event(sched))
3512 return rc;
3513
3514 if (perf_sched__read_events(sched))
3515 goto out_free_cpus_switch_event;
3516
3517 perf_sched__merge_lat(sched);
3518 perf_sched__sort_lat(sched);
3519
3520 printf("\n -------------------------------------------------------------------------------------------------------------------------------------------\n");
3521 printf(" Task | Runtime ms | Count | Avg delay ms | Max delay ms | Max delay start | Max delay end |\n");
3522 printf(" -------------------------------------------------------------------------------------------------------------------------------------------\n");
3523
3524 next = rb_first_cached(&sched->sorted_atom_root);
3525
3526 while (next) {
3527 struct work_atoms *work_list;
3528
3529 work_list = rb_entry(next, struct work_atoms, node);
3530 output_lat_thread(sched, work_list);
3531 next = rb_next(next);
3532 }
3533
3534 printf(" -----------------------------------------------------------------------------------------------------------------\n");
3535 printf(" TOTAL: |%11.3f ms |%9" PRIu64 " |\n",
3536 (double)sched->all_runtime / NSEC_PER_MSEC, sched->all_count);
3537
3538 printf(" ---------------------------------------------------\n");
3539
3540 print_bad_events(sched);
3541 printf("\n");
3542
3543 rc = 0;
3544
3545 while ((next = rb_first_cached(&sched->sorted_atom_root))) {
3546 struct work_atoms *data;
3547
3548 data = rb_entry(next, struct work_atoms, node);
3549 rb_erase_cached(next, &sched->sorted_atom_root);
3550 free_work_atoms(data);
3551 }
3552 out_free_cpus_switch_event:
3553 free_cpus_switch_event(sched);
3554 return rc;
3555 }
3556
setup_map_cpus(struct perf_sched * sched)3557 static int setup_map_cpus(struct perf_sched *sched)
3558 {
3559 sched->max_cpu.cpu = sysconf(_SC_NPROCESSORS_CONF);
3560
3561 if (sched->map.comp) {
3562 sched->map.comp_cpus = zalloc(sched->max_cpu.cpu * sizeof(int));
3563 if (!sched->map.comp_cpus)
3564 return -1;
3565 }
3566
3567 if (sched->map.cpus_str) {
3568 sched->map.cpus = perf_cpu_map__new(sched->map.cpus_str);
3569 if (!sched->map.cpus) {
3570 pr_err("failed to get cpus map from %s\n", sched->map.cpus_str);
3571 zfree(&sched->map.comp_cpus);
3572 return -1;
3573 }
3574 }
3575
3576 return 0;
3577 }
3578
setup_color_pids(struct perf_sched * sched)3579 static int setup_color_pids(struct perf_sched *sched)
3580 {
3581 struct perf_thread_map *map;
3582
3583 if (!sched->map.color_pids_str)
3584 return 0;
3585
3586 map = thread_map__new_by_tid_str(sched->map.color_pids_str);
3587 if (!map) {
3588 pr_err("failed to get thread map from %s\n", sched->map.color_pids_str);
3589 return -1;
3590 }
3591
3592 sched->map.color_pids = map;
3593 return 0;
3594 }
3595
setup_color_cpus(struct perf_sched * sched)3596 static int setup_color_cpus(struct perf_sched *sched)
3597 {
3598 struct perf_cpu_map *map;
3599
3600 if (!sched->map.color_cpus_str)
3601 return 0;
3602
3603 map = perf_cpu_map__new(sched->map.color_cpus_str);
3604 if (!map) {
3605 pr_err("failed to get thread map from %s\n", sched->map.color_cpus_str);
3606 return -1;
3607 }
3608
3609 sched->map.color_cpus = map;
3610 return 0;
3611 }
3612
perf_sched__map(struct perf_sched * sched)3613 static int perf_sched__map(struct perf_sched *sched)
3614 {
3615 int rc = -1;
3616
3617 sched->curr_thread = calloc(MAX_CPUS, sizeof(*(sched->curr_thread)));
3618 if (!sched->curr_thread)
3619 return rc;
3620
3621 sched->curr_out_thread = calloc(MAX_CPUS, sizeof(*(sched->curr_out_thread)));
3622 if (!sched->curr_out_thread)
3623 goto out_free_curr_thread;
3624
3625 if (setup_cpus_switch_event(sched))
3626 goto out_free_curr_out_thread;
3627
3628 if (setup_map_cpus(sched))
3629 goto out_free_cpus_switch_event;
3630
3631 if (setup_color_pids(sched))
3632 goto out_put_map_cpus;
3633
3634 if (setup_color_cpus(sched))
3635 goto out_put_color_pids;
3636
3637 setup_pager();
3638 if (perf_sched__read_events(sched))
3639 goto out_put_color_cpus;
3640
3641 rc = 0;
3642 print_bad_events(sched);
3643
3644 out_put_color_cpus:
3645 perf_cpu_map__put(sched->map.color_cpus);
3646
3647 out_put_color_pids:
3648 perf_thread_map__put(sched->map.color_pids);
3649
3650 out_put_map_cpus:
3651 zfree(&sched->map.comp_cpus);
3652 perf_cpu_map__put(sched->map.cpus);
3653
3654 out_free_cpus_switch_event:
3655 free_cpus_switch_event(sched);
3656
3657 out_free_curr_out_thread:
3658 for (int i = 0; i < MAX_CPUS; i++)
3659 thread__put(sched->curr_out_thread[i]);
3660 zfree(&sched->curr_out_thread);
3661
3662 out_free_curr_thread:
3663 for (int i = 0; i < MAX_CPUS; i++)
3664 thread__put(sched->curr_thread[i]);
3665 zfree(&sched->curr_thread);
3666 return rc;
3667 }
3668
perf_sched__replay(struct perf_sched * sched)3669 static int perf_sched__replay(struct perf_sched *sched)
3670 {
3671 int ret;
3672 unsigned long i;
3673
3674 mutex_init(&sched->start_work_mutex);
3675 mutex_init(&sched->work_done_wait_mutex);
3676
3677 ret = setup_cpus_switch_event(sched);
3678 if (ret)
3679 goto out_mutex_destroy;
3680
3681 calibrate_run_measurement_overhead(sched);
3682 calibrate_sleep_measurement_overhead(sched);
3683
3684 test_calibrations(sched);
3685
3686 ret = perf_sched__read_events(sched);
3687 if (ret)
3688 goto out_free_cpus_switch_event;
3689
3690 printf("nr_run_events: %ld\n", sched->nr_run_events);
3691 printf("nr_sleep_events: %ld\n", sched->nr_sleep_events);
3692 printf("nr_wakeup_events: %ld\n", sched->nr_wakeup_events);
3693
3694 if (sched->targetless_wakeups)
3695 printf("target-less wakeups: %ld\n", sched->targetless_wakeups);
3696 if (sched->multitarget_wakeups)
3697 printf("multi-target wakeups: %ld\n", sched->multitarget_wakeups);
3698 if (sched->nr_run_events_optimized)
3699 printf("run atoms optimized: %ld\n",
3700 sched->nr_run_events_optimized);
3701
3702 print_task_traces(sched);
3703 add_cross_task_wakeups(sched);
3704
3705 sched->thread_funcs_exit = false;
3706 create_tasks(sched);
3707 printf("------------------------------------------------------------\n");
3708 if (sched->replay_repeat == 0)
3709 sched->replay_repeat = UINT_MAX;
3710
3711 for (i = 0; i < sched->replay_repeat; i++)
3712 run_one_test(sched);
3713
3714 sched->thread_funcs_exit = true;
3715 destroy_tasks(sched);
3716
3717 out_free_cpus_switch_event:
3718 free_cpus_switch_event(sched);
3719
3720 out_mutex_destroy:
3721 mutex_destroy(&sched->start_work_mutex);
3722 mutex_destroy(&sched->work_done_wait_mutex);
3723 return ret;
3724 }
3725
setup_sorting(struct perf_sched * sched,const struct option * options,const char * const usage_msg[])3726 static void setup_sorting(struct perf_sched *sched, const struct option *options,
3727 const char * const usage_msg[])
3728 {
3729 char *tmp, *tok, *str = strdup(sched->sort_order);
3730
3731 for (tok = strtok_r(str, ", ", &tmp);
3732 tok; tok = strtok_r(NULL, ", ", &tmp)) {
3733 if (sort_dimension__add(tok, &sched->sort_list) < 0) {
3734 usage_with_options_msg(usage_msg, options,
3735 "Unknown --sort key: `%s'", tok);
3736 }
3737 }
3738
3739 free(str);
3740
3741 sort_dimension__add("pid", &sched->cmp_pid);
3742 }
3743
process_synthesized_schedstat_event(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)3744 static int process_synthesized_schedstat_event(const struct perf_tool *tool,
3745 union perf_event *event,
3746 struct perf_sample *sample __maybe_unused,
3747 struct machine *machine __maybe_unused)
3748 {
3749 struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
3750
3751 if (perf_data__write(sched->data, event, event->header.size) <= 0) {
3752 pr_err("failed to write perf data, error: %m\n");
3753 return -1;
3754 }
3755
3756 sched->session->header.data_size += event->header.size;
3757 return 0;
3758 }
3759
sighandler(int sig __maybe_unused)3760 static void sighandler(int sig __maybe_unused)
3761 {
3762 }
3763
enable_sched_schedstats(int * reset)3764 static int enable_sched_schedstats(int *reset)
3765 {
3766 char path[PATH_MAX];
3767 FILE *fp;
3768 char ch;
3769
3770 snprintf(path, PATH_MAX, "%s/sys/kernel/sched_schedstats", procfs__mountpoint());
3771 fp = fopen(path, "w+");
3772 if (!fp) {
3773 pr_err("Failed to open %s\n", path);
3774 return -1;
3775 }
3776
3777 ch = getc(fp);
3778 if (ch == '0') {
3779 *reset = 1;
3780 rewind(fp);
3781 putc('1', fp);
3782 fclose(fp);
3783 }
3784 return 0;
3785 }
3786
disable_sched_schedstat(void)3787 static int disable_sched_schedstat(void)
3788 {
3789 char path[PATH_MAX];
3790 FILE *fp;
3791
3792 snprintf(path, PATH_MAX, "%s/sys/kernel/sched_schedstats", procfs__mountpoint());
3793 fp = fopen(path, "w");
3794 if (!fp) {
3795 pr_err("Failed to open %s\n", path);
3796 return -1;
3797 }
3798
3799 putc('0', fp);
3800 fclose(fp);
3801 return 0;
3802 }
3803
3804 /* perf.data or any other output file name used by stats subcommand (only). */
3805 const char *output_name;
3806
perf_sched__schedstat_record(struct perf_sched * sched,int argc,const char ** argv)3807 static int perf_sched__schedstat_record(struct perf_sched *sched,
3808 int argc, const char **argv)
3809 {
3810 struct perf_session *session;
3811 struct target target = {};
3812 struct evlist *evlist;
3813 int reset = 0;
3814 int err = 0;
3815 int fd;
3816 struct perf_data data = {
3817 .path = output_name,
3818 .mode = PERF_DATA_MODE_WRITE,
3819 };
3820
3821 signal(SIGINT, sighandler);
3822 signal(SIGCHLD, sighandler);
3823 signal(SIGTERM, sighandler);
3824
3825 evlist = evlist__new();
3826 if (!evlist)
3827 return -ENOMEM;
3828
3829 session = perf_session__new(&data, &sched->tool);
3830 if (IS_ERR(session)) {
3831 pr_err("Perf session creation failed.\n");
3832 evlist__delete(evlist);
3833 return PTR_ERR(session);
3834 }
3835
3836 session->evlist = evlist;
3837
3838 sched->session = session;
3839 sched->data = &data;
3840
3841 fd = perf_data__fd(&data);
3842
3843 /*
3844 * Capture all important metadata about the system. Although they are
3845 * not used by `perf sched stats` tool directly, they provide useful
3846 * information about profiled environment.
3847 */
3848 perf_header__set_feat(&session->header, HEADER_HOSTNAME);
3849 perf_header__set_feat(&session->header, HEADER_OSRELEASE);
3850 perf_header__set_feat(&session->header, HEADER_VERSION);
3851 perf_header__set_feat(&session->header, HEADER_ARCH);
3852 perf_header__set_feat(&session->header, HEADER_NRCPUS);
3853 perf_header__set_feat(&session->header, HEADER_CPUDESC);
3854 perf_header__set_feat(&session->header, HEADER_CPUID);
3855 perf_header__set_feat(&session->header, HEADER_TOTAL_MEM);
3856 perf_header__set_feat(&session->header, HEADER_CMDLINE);
3857 perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY);
3858 perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY);
3859 perf_header__set_feat(&session->header, HEADER_CACHE);
3860 perf_header__set_feat(&session->header, HEADER_MEM_TOPOLOGY);
3861 perf_header__set_feat(&session->header, HEADER_HYBRID_TOPOLOGY);
3862 perf_header__set_feat(&session->header, HEADER_CPU_DOMAIN_INFO);
3863
3864 err = perf_session__write_header(session, evlist, fd, false);
3865 if (err < 0)
3866 goto out;
3867
3868 /*
3869 * `perf sched stats` does not support workload profiling (-p pid)
3870 * since /proc/schedstat file contains cpu specific data only. Hence, a
3871 * profile target is either set of cpus or systemwide, never a process.
3872 * Note that, although `-- <workload>` is supported, profile data are
3873 * still cpu/systemwide.
3874 */
3875 if (cpu_list)
3876 target.cpu_list = cpu_list;
3877 else
3878 target.system_wide = true;
3879
3880 if (argc) {
3881 err = evlist__prepare_workload(evlist, &target, argv, false, NULL);
3882 if (err)
3883 goto out;
3884 }
3885
3886 err = evlist__create_maps(evlist, &target);
3887 if (err < 0)
3888 goto out;
3889
3890 user_requested_cpus = evlist->core.user_requested_cpus;
3891
3892 err = perf_event__synthesize_schedstat(&(sched->tool),
3893 process_synthesized_schedstat_event,
3894 user_requested_cpus);
3895 if (err < 0)
3896 goto out;
3897
3898 err = enable_sched_schedstats(&reset);
3899 if (err < 0)
3900 goto out;
3901
3902 if (argc)
3903 evlist__start_workload(evlist);
3904
3905 /* wait for signal */
3906 pause();
3907
3908 if (reset) {
3909 err = disable_sched_schedstat();
3910 if (err < 0)
3911 goto out;
3912 }
3913
3914 err = perf_event__synthesize_schedstat(&(sched->tool),
3915 process_synthesized_schedstat_event,
3916 user_requested_cpus);
3917 if (err < 0)
3918 goto out;
3919
3920 err = perf_session__write_header(session, evlist, fd, true);
3921
3922 out:
3923 if (!err)
3924 fprintf(stderr, "[ perf sched stats: Wrote samples to %s ]\n", data.path);
3925 else
3926 fprintf(stderr, "[ perf sched stats: Failed !! ]\n");
3927
3928 evlist__delete(evlist);
3929 close(fd);
3930 return err;
3931 }
3932
3933 struct schedstat_domain {
3934 struct list_head domain_list;
3935 struct perf_record_schedstat_domain *domain_data;
3936 };
3937
3938 struct schedstat_cpu {
3939 struct list_head cpu_list;
3940 struct list_head domain_head;
3941 struct perf_record_schedstat_cpu *cpu_data;
3942 };
3943
3944 static struct list_head cpu_head = LIST_HEAD_INIT(cpu_head);
3945 static struct schedstat_cpu *cpu_second_pass;
3946 static struct schedstat_domain *domain_second_pass;
3947 static bool after_workload_flag;
3948 static bool verbose_field;
3949
store_schedstat_cpu_diff(struct schedstat_cpu * after_workload)3950 static void store_schedstat_cpu_diff(struct schedstat_cpu *after_workload)
3951 {
3952 struct perf_record_schedstat_cpu *before = cpu_second_pass->cpu_data;
3953 struct perf_record_schedstat_cpu *after = after_workload->cpu_data;
3954 __u16 version = after_workload->cpu_data->version;
3955
3956 #define CPU_FIELD(_type, _name, _desc, _format, _is_pct, _pct_of, _ver) \
3957 (before->_ver._name = after->_ver._name - before->_ver._name)
3958
3959 if (version == 15) {
3960 #include <perf/schedstat-v15.h>
3961 } else if (version == 16) {
3962 #include <perf/schedstat-v16.h>
3963 } else if (version == 17) {
3964 #include <perf/schedstat-v17.h>
3965 }
3966
3967 #undef CPU_FIELD
3968 }
3969
store_schedstat_domain_diff(struct schedstat_domain * after_workload)3970 static void store_schedstat_domain_diff(struct schedstat_domain *after_workload)
3971 {
3972 struct perf_record_schedstat_domain *before = domain_second_pass->domain_data;
3973 struct perf_record_schedstat_domain *after = after_workload->domain_data;
3974 __u16 version = after_workload->domain_data->version;
3975
3976 #define DOMAIN_FIELD(_type, _name, _desc, _format, _is_jiffies, _ver) \
3977 (before->_ver._name = after->_ver._name - before->_ver._name)
3978
3979 if (version == 15) {
3980 #include <perf/schedstat-v15.h>
3981 } else if (version == 16) {
3982 #include <perf/schedstat-v16.h>
3983 } else if (version == 17) {
3984 #include <perf/schedstat-v17.h>
3985 }
3986 #undef DOMAIN_FIELD
3987 }
3988
3989 #define PCT_CHNG(_x, _y) ((_x) ? ((double)((double)(_y) - (_x)) / (_x)) * 100 : 0.0)
print_cpu_stats(struct perf_record_schedstat_cpu * cs1,struct perf_record_schedstat_cpu * cs2)3990 static inline void print_cpu_stats(struct perf_record_schedstat_cpu *cs1,
3991 struct perf_record_schedstat_cpu *cs2)
3992 {
3993 printf("%-65s ", "DESC");
3994 if (!cs2)
3995 printf("%12s %12s", "COUNT", "PCT_CHANGE");
3996 else
3997 printf("%12s %11s %12s %14s %10s", "COUNT1", "COUNT2", "PCT_CHANGE",
3998 "PCT_CHANGE1", "PCT_CHANGE2");
3999
4000 printf("\n");
4001 print_separator2(SEP_LEN, "", 0);
4002
4003 #define CALC_PCT(_x, _y) ((_y) ? ((double)(_x) / (_y)) * 100 : 0.0)
4004
4005 #define CPU_FIELD(_type, _name, _desc, _format, _is_pct, _pct_of, _ver) \
4006 do { \
4007 printf("%-65s: " _format, verbose_field ? _desc : #_name, \
4008 cs1->_ver._name); \
4009 if (!cs2) { \
4010 if (_is_pct) \
4011 printf(" ( %8.2lf%% )", \
4012 CALC_PCT(cs1->_ver._name, cs1->_ver._pct_of)); \
4013 } else { \
4014 printf("," _format " | %8.2lf%% |", cs2->_ver._name, \
4015 PCT_CHNG(cs1->_ver._name, cs2->_ver._name)); \
4016 if (_is_pct) \
4017 printf(" ( %8.2lf%%, %8.2lf%% )", \
4018 CALC_PCT(cs1->_ver._name, cs1->_ver._pct_of), \
4019 CALC_PCT(cs2->_ver._name, cs2->_ver._pct_of)); \
4020 } \
4021 printf("\n"); \
4022 } while (0)
4023
4024 if (cs1->version == 15) {
4025 #include <perf/schedstat-v15.h>
4026 } else if (cs1->version == 16) {
4027 #include <perf/schedstat-v16.h>
4028 } else if (cs1->version == 17) {
4029 #include <perf/schedstat-v17.h>
4030 }
4031
4032 #undef CPU_FIELD
4033 #undef CALC_PCT
4034 }
4035
print_domain_stats(struct perf_record_schedstat_domain * ds1,struct perf_record_schedstat_domain * ds2,__u64 jiffies1,__u64 jiffies2)4036 static inline void print_domain_stats(struct perf_record_schedstat_domain *ds1,
4037 struct perf_record_schedstat_domain *ds2,
4038 __u64 jiffies1, __u64 jiffies2)
4039 {
4040 printf("%-65s ", "DESC");
4041 if (!ds2)
4042 printf("%12s %14s", "COUNT", "AVG_JIFFIES");
4043 else
4044 printf("%12s %11s %12s %16s %12s", "COUNT1", "COUNT2", "PCT_CHANGE",
4045 "AVG_JIFFIES1", "AVG_JIFFIES2");
4046 printf("\n");
4047
4048 #define DOMAIN_CATEGORY(_desc) \
4049 do { \
4050 size_t _len = strlen(_desc); \
4051 size_t _pre_dash_cnt = (SEP_LEN - _len) / 2; \
4052 size_t _post_dash_cnt = SEP_LEN - _len - _pre_dash_cnt; \
4053 print_separator2((int)_pre_dash_cnt, _desc, (int)_post_dash_cnt);\
4054 } while (0)
4055
4056 #define CALC_AVG(_x, _y) ((_y) ? (long double)(_x) / (_y) : 0.0)
4057
4058 #define DOMAIN_FIELD(_type, _name, _desc, _format, _is_jiffies, _ver) \
4059 do { \
4060 printf("%-65s: " _format, verbose_field ? _desc : #_name, \
4061 ds1->_ver._name); \
4062 if (!ds2) { \
4063 if (_is_jiffies) \
4064 printf(" $ %11.2Lf $", \
4065 CALC_AVG(jiffies1, ds1->_ver._name)); \
4066 } else { \
4067 printf("," _format " | %8.2lf%% |", ds2->_ver._name, \
4068 PCT_CHNG(ds1->_ver._name, ds2->_ver._name)); \
4069 if (_is_jiffies) \
4070 printf(" $ %11.2Lf, %11.2Lf $", \
4071 CALC_AVG(jiffies1, ds1->_ver._name), \
4072 CALC_AVG(jiffies2, ds2->_ver._name)); \
4073 } \
4074 printf("\n"); \
4075 } while (0)
4076
4077 #define DERIVED_CNT_FIELD(_name, _desc, _format, _x, _y, _z, _ver) \
4078 do { \
4079 __u32 t1 = ds1->_ver._x - ds1->_ver._y - ds1->_ver._z; \
4080 printf("*%-64s: " _format, verbose_field ? _desc : #_name, t1); \
4081 if (ds2) { \
4082 __u32 t2 = ds2->_ver._x - ds2->_ver._y - ds2->_ver._z; \
4083 printf("," _format " | %8.2lf%% |", t2, \
4084 PCT_CHNG(t1, t2)); \
4085 } \
4086 printf("\n"); \
4087 } while (0)
4088
4089 #define DERIVED_AVG_FIELD(_name, _desc, _format, _x, _y, _z, _w, _ver) \
4090 do { \
4091 __u32 t1 = ds1->_ver._x - ds1->_ver._y - ds1->_ver._z; \
4092 printf("*%-64s: " _format, verbose_field ? _desc : #_name, \
4093 CALC_AVG(ds1->_ver._w, t1)); \
4094 if (ds2) { \
4095 __u32 t2 = ds2->_ver._x - ds2->_ver._y - ds2->_ver._z; \
4096 printf("," _format " | %8.2Lf%% |", \
4097 CALC_AVG(ds2->_ver._w, t2), \
4098 PCT_CHNG(CALC_AVG(ds1->_ver._w, t1), \
4099 CALC_AVG(ds2->_ver._w, t2))); \
4100 } \
4101 printf("\n"); \
4102 } while (0)
4103
4104 if (ds1->version == 15) {
4105 #include <perf/schedstat-v15.h>
4106 } else if (ds1->version == 16) {
4107 #include <perf/schedstat-v16.h>
4108 } else if (ds1->version == 17) {
4109 #include <perf/schedstat-v17.h>
4110 }
4111
4112 #undef DERIVED_AVG_FIELD
4113 #undef DERIVED_CNT_FIELD
4114 #undef DOMAIN_FIELD
4115 #undef CALC_AVG
4116 #undef DOMAIN_CATEGORY
4117 }
4118 #undef PCT_CHNG
4119
summarize_schedstat_cpu(struct schedstat_cpu * summary_cpu,struct schedstat_cpu * cptr,int cnt,bool is_last)4120 static void summarize_schedstat_cpu(struct schedstat_cpu *summary_cpu,
4121 struct schedstat_cpu *cptr,
4122 int cnt, bool is_last)
4123 {
4124 struct perf_record_schedstat_cpu *summary_cs = summary_cpu->cpu_data,
4125 *temp_cs = cptr->cpu_data;
4126
4127 #define CPU_FIELD(_type, _name, _desc, _format, _is_pct, _pct_of, _ver) \
4128 do { \
4129 summary_cs->_ver._name += temp_cs->_ver._name; \
4130 if (is_last) \
4131 summary_cs->_ver._name /= cnt; \
4132 } while (0)
4133
4134 if (cptr->cpu_data->version == 15) {
4135 #include <perf/schedstat-v15.h>
4136 } else if (cptr->cpu_data->version == 16) {
4137 #include <perf/schedstat-v16.h>
4138 } else if (cptr->cpu_data->version == 17) {
4139 #include <perf/schedstat-v17.h>
4140 }
4141 #undef CPU_FIELD
4142 }
4143
summarize_schedstat_domain(struct schedstat_domain * summary_domain,struct schedstat_domain * dptr,int cnt,bool is_last)4144 static void summarize_schedstat_domain(struct schedstat_domain *summary_domain,
4145 struct schedstat_domain *dptr,
4146 int cnt, bool is_last)
4147 {
4148 struct perf_record_schedstat_domain *summary_ds = summary_domain->domain_data,
4149 *temp_ds = dptr->domain_data;
4150
4151 #define DOMAIN_FIELD(_type, _name, _desc, _format, _is_jiffies, _ver) \
4152 do { \
4153 summary_ds->_ver._name += temp_ds->_ver._name; \
4154 if (is_last) \
4155 summary_ds->_ver._name /= cnt; \
4156 } while (0)
4157
4158 if (dptr->domain_data->version == 15) {
4159 #include <perf/schedstat-v15.h>
4160 } else if (dptr->domain_data->version == 16) {
4161 #include <perf/schedstat-v16.h>
4162 } else if (dptr->domain_data->version == 17) {
4163 #include <perf/schedstat-v17.h>
4164 }
4165 #undef DOMAIN_FIELD
4166 }
4167
4168 /*
4169 * get_all_cpu_stats() appends the summary to the head of the list.
4170 */
get_all_cpu_stats(struct list_head * head)4171 static int get_all_cpu_stats(struct list_head *head)
4172 {
4173 struct schedstat_cpu *cptr = list_first_entry(head, struct schedstat_cpu, cpu_list);
4174 struct schedstat_cpu *summary_head = NULL;
4175 struct perf_record_schedstat_domain *ds;
4176 struct perf_record_schedstat_cpu *cs;
4177 struct schedstat_domain *dptr, *tdptr;
4178 bool is_last = false;
4179 int cnt = 1;
4180 int ret = 0;
4181
4182 if (cptr) {
4183 summary_head = zalloc(sizeof(*summary_head));
4184 if (!summary_head)
4185 return -ENOMEM;
4186
4187 summary_head->cpu_data = zalloc(sizeof(*cs));
4188 memcpy(summary_head->cpu_data, cptr->cpu_data, sizeof(*cs));
4189
4190 INIT_LIST_HEAD(&summary_head->domain_head);
4191
4192 list_for_each_entry(dptr, &cptr->domain_head, domain_list) {
4193 tdptr = zalloc(sizeof(*tdptr));
4194 if (!tdptr)
4195 return -ENOMEM;
4196
4197 tdptr->domain_data = zalloc(sizeof(*ds));
4198 if (!tdptr->domain_data)
4199 return -ENOMEM;
4200
4201 memcpy(tdptr->domain_data, dptr->domain_data, sizeof(*ds));
4202 list_add_tail(&tdptr->domain_list, &summary_head->domain_head);
4203 }
4204 }
4205
4206 list_for_each_entry(cptr, head, cpu_list) {
4207 if (list_is_first(&cptr->cpu_list, head))
4208 continue;
4209
4210 if (list_is_last(&cptr->cpu_list, head))
4211 is_last = true;
4212
4213 cnt++;
4214 summarize_schedstat_cpu(summary_head, cptr, cnt, is_last);
4215 tdptr = list_first_entry(&summary_head->domain_head, struct schedstat_domain,
4216 domain_list);
4217
4218 list_for_each_entry(dptr, &cptr->domain_head, domain_list) {
4219 summarize_schedstat_domain(tdptr, dptr, cnt, is_last);
4220 tdptr = list_next_entry(tdptr, domain_list);
4221 }
4222 }
4223
4224 list_add(&summary_head->cpu_list, head);
4225 return ret;
4226 }
4227
show_schedstat_data(struct list_head * head1,struct cpu_domain_map ** cd_map1,struct list_head * head2,struct cpu_domain_map ** cd_map2,bool summary_only)4228 static int show_schedstat_data(struct list_head *head1, struct cpu_domain_map **cd_map1,
4229 struct list_head *head2, struct cpu_domain_map **cd_map2,
4230 bool summary_only)
4231 {
4232 struct schedstat_cpu *cptr1 = list_first_entry(head1, struct schedstat_cpu, cpu_list);
4233 struct perf_record_schedstat_domain *ds1 = NULL, *ds2 = NULL;
4234 struct perf_record_schedstat_cpu *cs1 = NULL, *cs2 = NULL;
4235 struct schedstat_domain *dptr1 = NULL, *dptr2 = NULL;
4236 struct schedstat_cpu *cptr2 = NULL;
4237 __u64 jiffies1 = 0, jiffies2 = 0;
4238 bool is_summary = true;
4239 int ret = 0;
4240
4241 printf("Description\n");
4242 print_separator2(SEP_LEN, "", 0);
4243 printf("%-30s-> %s\n", "DESC", "Description of the field");
4244 printf("%-30s-> %s\n", "COUNT", "Value of the field");
4245 printf("%-30s-> %s\n", "PCT_CHANGE", "Percent change with corresponding base value");
4246 printf("%-30s-> %s\n", "AVG_JIFFIES",
4247 "Avg time in jiffies between two consecutive occurrence of event");
4248
4249 print_separator2(SEP_LEN, "", 0);
4250 printf("\n");
4251
4252 printf("%-65s: ", "Time elapsed (in jiffies)");
4253 jiffies1 = cptr1->cpu_data->timestamp;
4254 printf("%11llu", jiffies1);
4255 if (head2) {
4256 cptr2 = list_first_entry(head2, struct schedstat_cpu, cpu_list);
4257 jiffies2 = cptr2->cpu_data->timestamp;
4258 printf(",%11llu", jiffies2);
4259 }
4260 printf("\n");
4261
4262 ret = get_all_cpu_stats(head1);
4263 if (cptr2) {
4264 ret = get_all_cpu_stats(head2);
4265 cptr2 = list_first_entry(head2, struct schedstat_cpu, cpu_list);
4266 }
4267
4268 list_for_each_entry(cptr1, head1, cpu_list) {
4269 struct cpu_domain_map *cd_info1 = NULL, *cd_info2 = NULL;
4270
4271 cs1 = cptr1->cpu_data;
4272 cd_info1 = cd_map1[cs1->cpu];
4273 if (cptr2) {
4274 cs2 = cptr2->cpu_data;
4275 cd_info2 = cd_map2[cs2->cpu];
4276 dptr2 = list_first_entry(&cptr2->domain_head, struct schedstat_domain,
4277 domain_list);
4278 }
4279
4280 if (cs2 && cs1->cpu != cs2->cpu) {
4281 pr_err("Failed because matching cpus not found for diff\n");
4282 return -1;
4283 }
4284
4285 if (cd_info2 && cd_info1->nr_domains != cd_info2->nr_domains) {
4286 pr_err("Failed because nr_domains is not same for cpus\n");
4287 return -1;
4288 }
4289
4290 print_separator2(SEP_LEN, "", 0);
4291
4292 if (is_summary)
4293 printf("CPU: <ALL CPUS SUMMARY>\n");
4294 else
4295 printf("CPU: %d\n", cs1->cpu);
4296
4297 print_separator2(SEP_LEN, "", 0);
4298 print_cpu_stats(cs1, cs2);
4299 print_separator2(SEP_LEN, "", 0);
4300
4301 list_for_each_entry(dptr1, &cptr1->domain_head, domain_list) {
4302 struct domain_info *dinfo1 = NULL, *dinfo2 = NULL;
4303
4304 ds1 = dptr1->domain_data;
4305 dinfo1 = cd_info1->domains[ds1->domain];
4306 if (dptr2) {
4307 ds2 = dptr2->domain_data;
4308 dinfo2 = cd_info2->domains[ds2->domain];
4309 }
4310
4311 if (dinfo2 && dinfo1->domain != dinfo2->domain) {
4312 pr_err("Failed because matching domain not found for diff\n");
4313 return -1;
4314 }
4315
4316 if (is_summary) {
4317 if (dinfo1->dname)
4318 printf("CPU: <ALL CPUS SUMMARY> | DOMAIN: %s\n",
4319 dinfo1->dname);
4320 else
4321 printf("CPU: <ALL CPUS SUMMARY> | DOMAIN: %d\n",
4322 dinfo1->domain);
4323 } else {
4324 if (dinfo1->dname)
4325 printf("CPU: %d | DOMAIN: %s | DOMAIN_CPUS: ",
4326 cs1->cpu, dinfo1->dname);
4327 else
4328 printf("CPU: %d | DOMAIN: %d | DOMAIN_CPUS: ",
4329 cs1->cpu, dinfo1->domain);
4330
4331 printf("%s\n", dinfo1->cpulist);
4332 }
4333 print_separator2(SEP_LEN, "", 0);
4334 print_domain_stats(ds1, ds2, jiffies1, jiffies2);
4335 print_separator2(SEP_LEN, "", 0);
4336
4337 if (dptr2)
4338 dptr2 = list_next_entry(dptr2, domain_list);
4339 }
4340 if (summary_only)
4341 break;
4342
4343 if (cptr2)
4344 cptr2 = list_next_entry(cptr2, cpu_list);
4345
4346 is_summary = false;
4347 }
4348 return ret;
4349 }
4350
4351 /*
4352 * Creates a linked list of cpu_data and domain_data. Below represents the structure of the linked
4353 * list where CPU0,CPU1,CPU2, ..., CPU(N-1) stores the cpu_data. Here N is the total number of cpus.
4354 * Each of the CPU points to the list of domain_data. Here DOMAIN0, DOMAIN1, DOMAIN2, ... represents
4355 * the domain_data. Here D0, D1, D2, ..., Dm are the number of domains in the respective cpus.
4356 *
4357 * +----------+
4358 * | CPU_HEAD |
4359 * +----------+
4360 * |
4361 * v
4362 * +----------+ +---------+ +---------+ +---------+ +--------------+
4363 * | CPU0 | -> | DOMAIN0 | -> | DOMAIN1 | -> | DOMAIN2 | -> ... -> | DOMAIN(D0-1) |
4364 * +----------+ +---------+ +---------+ +---------+ +--------------+
4365 * |
4366 * v
4367 * +----------+ +---------+ +---------+ +---------+ +--------------+
4368 * | CPU1 | -> | DOMAIN0 | -> | DOMAIN1 | -> | DOMAIN2 | -> ... -> | DOMAIN(D1-1) |
4369 * +----------+ +---------+ +---------+ +---------+ +--------------+
4370 * |
4371 * v
4372 * +----------+ +---------+ +---------+ +---------+ +--------------+
4373 * | CPU2 | -> | DOMAIN0 | -> | DOMAIN1 | -> | DOMAIN2 | -> ... -> | DOMAIN(D2-1) |
4374 * +----------+ +---------+ +---------+ +---------+ +--------------+
4375 * |
4376 * v
4377 * ...
4378 * |
4379 * v
4380 * +----------+ +---------+ +---------+ +---------+ +--------------+
4381 * | CPU(N-1) | -> | DOMAIN0 | -> | DOMAIN1 | -> | DOMAIN2 | -> ... -> | DOMAIN(Dm-1) |
4382 * +----------+ +---------+ +---------+ +---------+ +--------------+
4383 *
4384 * Each cpu as well as domain has 2 enties in the event list one before the workload starts and
4385 * other after completion of the workload. The above linked list stores the diff of the cpu and
4386 * domain statistics.
4387 */
perf_sched__process_schedstat(const struct perf_tool * tool __maybe_unused,struct perf_session * session __maybe_unused,union perf_event * event)4388 static int perf_sched__process_schedstat(const struct perf_tool *tool __maybe_unused,
4389 struct perf_session *session __maybe_unused,
4390 union perf_event *event)
4391 {
4392 struct perf_cpu this_cpu;
4393 static __u32 initial_cpu;
4394
4395 switch (event->header.type) {
4396 case PERF_RECORD_SCHEDSTAT_CPU:
4397 this_cpu.cpu = event->schedstat_cpu.cpu;
4398 break;
4399 case PERF_RECORD_SCHEDSTAT_DOMAIN:
4400 this_cpu.cpu = event->schedstat_domain.cpu;
4401 break;
4402 default:
4403 return 0;
4404 }
4405
4406 if (user_requested_cpus && !perf_cpu_map__has(user_requested_cpus, this_cpu))
4407 return 0;
4408
4409 if (event->header.type == PERF_RECORD_SCHEDSTAT_CPU) {
4410 struct schedstat_cpu *temp = zalloc(sizeof(*temp));
4411
4412 if (!temp)
4413 return -ENOMEM;
4414
4415 temp->cpu_data = zalloc(sizeof(*temp->cpu_data));
4416 if (!temp->cpu_data)
4417 return -ENOMEM;
4418
4419 memcpy(temp->cpu_data, &event->schedstat_cpu, sizeof(*temp->cpu_data));
4420
4421 if (!list_empty(&cpu_head) && temp->cpu_data->cpu == initial_cpu)
4422 after_workload_flag = true;
4423
4424 if (!after_workload_flag) {
4425 if (list_empty(&cpu_head))
4426 initial_cpu = temp->cpu_data->cpu;
4427
4428 list_add_tail(&temp->cpu_list, &cpu_head);
4429 INIT_LIST_HEAD(&temp->domain_head);
4430 } else {
4431 if (temp->cpu_data->cpu == initial_cpu) {
4432 cpu_second_pass = list_first_entry(&cpu_head, struct schedstat_cpu,
4433 cpu_list);
4434 cpu_second_pass->cpu_data->timestamp =
4435 temp->cpu_data->timestamp - cpu_second_pass->cpu_data->timestamp;
4436 } else {
4437 cpu_second_pass = list_next_entry(cpu_second_pass, cpu_list);
4438 }
4439 domain_second_pass = list_first_entry(&cpu_second_pass->domain_head,
4440 struct schedstat_domain, domain_list);
4441 store_schedstat_cpu_diff(temp);
4442 }
4443 } else if (event->header.type == PERF_RECORD_SCHEDSTAT_DOMAIN) {
4444 struct schedstat_cpu *cpu_tail;
4445 struct schedstat_domain *temp = zalloc(sizeof(*temp));
4446
4447 if (!temp)
4448 return -ENOMEM;
4449
4450 temp->domain_data = zalloc(sizeof(*temp->domain_data));
4451 if (!temp->domain_data)
4452 return -ENOMEM;
4453
4454 memcpy(temp->domain_data, &event->schedstat_domain, sizeof(*temp->domain_data));
4455
4456 if (!after_workload_flag) {
4457 cpu_tail = list_last_entry(&cpu_head, struct schedstat_cpu, cpu_list);
4458 list_add_tail(&temp->domain_list, &cpu_tail->domain_head);
4459 } else {
4460 store_schedstat_domain_diff(temp);
4461 domain_second_pass = list_next_entry(domain_second_pass, domain_list);
4462 }
4463 }
4464
4465 return 0;
4466 }
4467
free_schedstat(struct list_head * head)4468 static void free_schedstat(struct list_head *head)
4469 {
4470 struct schedstat_domain *dptr, *n1;
4471 struct schedstat_cpu *cptr, *n2;
4472
4473 list_for_each_entry_safe(cptr, n2, head, cpu_list) {
4474 list_for_each_entry_safe(dptr, n1, &cptr->domain_head, domain_list) {
4475 list_del_init(&dptr->domain_list);
4476 free(dptr);
4477 }
4478 list_del_init(&cptr->cpu_list);
4479 free(cptr);
4480 }
4481 }
4482
perf_sched__schedstat_report(struct perf_sched * sched)4483 static int perf_sched__schedstat_report(struct perf_sched *sched)
4484 {
4485 struct cpu_domain_map **cd_map;
4486 struct perf_session *session;
4487 struct target target = {};
4488 struct perf_data data = {
4489 .path = input_name,
4490 .mode = PERF_DATA_MODE_READ,
4491 };
4492 int err = 0;
4493
4494 sched->tool.schedstat_cpu = perf_sched__process_schedstat;
4495 sched->tool.schedstat_domain = perf_sched__process_schedstat;
4496
4497 session = perf_session__new(&data, &sched->tool);
4498 if (IS_ERR(session)) {
4499 pr_err("Perf session creation failed.\n");
4500 return PTR_ERR(session);
4501 }
4502
4503 if (cpu_list)
4504 target.cpu_list = cpu_list;
4505 else
4506 target.system_wide = true;
4507
4508 err = evlist__create_maps(session->evlist, &target);
4509 if (err < 0)
4510 goto out;
4511
4512 user_requested_cpus = session->evlist->core.user_requested_cpus;
4513
4514 err = perf_session__process_events(session);
4515
4516 if (!err) {
4517 setup_pager();
4518
4519 if (list_empty(&cpu_head)) {
4520 pr_err("Data is not available\n");
4521 err = -1;
4522 goto out;
4523 }
4524
4525 cd_map = session->header.env.cpu_domain;
4526 err = show_schedstat_data(&cpu_head, cd_map, NULL, NULL, false);
4527 }
4528
4529 out:
4530 free_schedstat(&cpu_head);
4531 perf_session__delete(session);
4532 return err;
4533 }
4534
perf_sched__schedstat_diff(struct perf_sched * sched,int argc,const char ** argv)4535 static int perf_sched__schedstat_diff(struct perf_sched *sched,
4536 int argc, const char **argv)
4537 {
4538 struct cpu_domain_map **cd_map0 = NULL, **cd_map1 = NULL;
4539 struct list_head cpu_head_ses0, cpu_head_ses1;
4540 struct perf_session *session[2];
4541 struct perf_data data[2];
4542 int ret = 0, err = 0;
4543 static const char *defaults[] = {
4544 "perf.data.old",
4545 "perf.data",
4546 };
4547
4548 if (argc) {
4549 if (argc == 1)
4550 defaults[1] = argv[0];
4551 else if (argc == 2) {
4552 defaults[0] = argv[0];
4553 defaults[1] = argv[1];
4554 } else {
4555 pr_err("perf sched stats diff is not supported with more than 2 files.\n");
4556 goto out_ret;
4557 }
4558 }
4559
4560 INIT_LIST_HEAD(&cpu_head_ses0);
4561 INIT_LIST_HEAD(&cpu_head_ses1);
4562
4563 sched->tool.schedstat_cpu = perf_sched__process_schedstat;
4564 sched->tool.schedstat_domain = perf_sched__process_schedstat;
4565
4566 data[0].path = defaults[0];
4567 data[0].mode = PERF_DATA_MODE_READ;
4568 session[0] = perf_session__new(&data[0], &sched->tool);
4569 if (IS_ERR(session[0])) {
4570 ret = PTR_ERR(session[0]);
4571 pr_err("Failed to open %s\n", data[0].path);
4572 goto out_delete_ses0;
4573 }
4574
4575 err = perf_session__process_events(session[0]);
4576 if (err)
4577 goto out_delete_ses0;
4578
4579 cd_map0 = session[0]->header.env.cpu_domain;
4580 list_replace_init(&cpu_head, &cpu_head_ses0);
4581 after_workload_flag = false;
4582
4583 data[1].path = defaults[1];
4584 data[1].mode = PERF_DATA_MODE_READ;
4585 session[1] = perf_session__new(&data[1], &sched->tool);
4586 if (IS_ERR(session[1])) {
4587 ret = PTR_ERR(session[1]);
4588 pr_err("Failed to open %s\n", data[1].path);
4589 goto out_delete_ses1;
4590 }
4591
4592 err = perf_session__process_events(session[1]);
4593 if (err)
4594 goto out_delete_ses1;
4595
4596 cd_map1 = session[1]->header.env.cpu_domain;
4597 list_replace_init(&cpu_head, &cpu_head_ses1);
4598 after_workload_flag = false;
4599 setup_pager();
4600
4601 if (list_empty(&cpu_head_ses1)) {
4602 pr_err("Data is not available\n");
4603 ret = -1;
4604 goto out_delete_ses1;
4605 }
4606
4607 if (list_empty(&cpu_head_ses0)) {
4608 pr_err("Data is not available\n");
4609 ret = -1;
4610 goto out_delete_ses0;
4611 }
4612
4613 show_schedstat_data(&cpu_head_ses0, cd_map0, &cpu_head_ses1, cd_map1, true);
4614
4615 out_delete_ses1:
4616 free_schedstat(&cpu_head_ses1);
4617 if (!IS_ERR(session[1]))
4618 perf_session__delete(session[1]);
4619
4620 out_delete_ses0:
4621 free_schedstat(&cpu_head_ses0);
4622 if (!IS_ERR(session[0]))
4623 perf_session__delete(session[0]);
4624
4625 out_ret:
4626 return ret;
4627 }
4628
process_synthesized_event_live(const struct perf_tool * tool __maybe_unused,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)4629 static int process_synthesized_event_live(const struct perf_tool *tool __maybe_unused,
4630 union perf_event *event,
4631 struct perf_sample *sample __maybe_unused,
4632 struct machine *machine __maybe_unused)
4633 {
4634 return perf_sched__process_schedstat(tool, NULL, event);
4635 }
4636
perf_sched__schedstat_live(struct perf_sched * sched,int argc,const char ** argv)4637 static int perf_sched__schedstat_live(struct perf_sched *sched,
4638 int argc, const char **argv)
4639 {
4640 struct cpu_domain_map **cd_map = NULL;
4641 struct target target = {};
4642 u32 __maybe_unused md;
4643 struct evlist *evlist;
4644 u32 nr = 0, sv;
4645 int reset = 0;
4646 int err = 0;
4647
4648 signal(SIGINT, sighandler);
4649 signal(SIGCHLD, sighandler);
4650 signal(SIGTERM, sighandler);
4651
4652 evlist = evlist__new();
4653 if (!evlist)
4654 return -ENOMEM;
4655
4656 /*
4657 * `perf sched schedstat` does not support workload profiling (-p pid)
4658 * since /proc/schedstat file contains cpu specific data only. Hence, a
4659 * profile target is either set of cpus or systemwide, never a process.
4660 * Note that, although `-- <workload>` is supported, profile data are
4661 * still cpu/systemwide.
4662 */
4663 if (cpu_list)
4664 target.cpu_list = cpu_list;
4665 else
4666 target.system_wide = true;
4667
4668 if (argc) {
4669 err = evlist__prepare_workload(evlist, &target, argv, false, NULL);
4670 if (err)
4671 goto out;
4672 }
4673
4674 err = evlist__create_maps(evlist, &target);
4675 if (err < 0)
4676 goto out;
4677
4678 user_requested_cpus = evlist->core.user_requested_cpus;
4679
4680 err = perf_event__synthesize_schedstat(&(sched->tool),
4681 process_synthesized_event_live,
4682 user_requested_cpus);
4683 if (err < 0)
4684 goto out;
4685
4686 err = enable_sched_schedstats(&reset);
4687 if (err < 0)
4688 goto out;
4689
4690 if (argc)
4691 evlist__start_workload(evlist);
4692
4693 /* wait for signal */
4694 pause();
4695
4696 if (reset) {
4697 err = disable_sched_schedstat();
4698 if (err < 0)
4699 goto out;
4700 }
4701
4702 err = perf_event__synthesize_schedstat(&(sched->tool),
4703 process_synthesized_event_live,
4704 user_requested_cpus);
4705 if (err)
4706 goto out;
4707
4708 setup_pager();
4709
4710 if (list_empty(&cpu_head)) {
4711 pr_err("Data is not available\n");
4712 err = -1;
4713 goto out;
4714 }
4715
4716 nr = cpu__max_present_cpu().cpu;
4717 cd_map = build_cpu_domain_map(&sv, &md, nr);
4718 if (!cd_map) {
4719 pr_err("Unable to generate cpu-domain relation info");
4720 goto out;
4721 }
4722
4723 show_schedstat_data(&cpu_head, cd_map, NULL, NULL, false);
4724 free_cpu_domain_info(cd_map, sv, nr);
4725 out:
4726 free_schedstat(&cpu_head);
4727 evlist__delete(evlist);
4728 return err;
4729 }
4730
schedstat_events_exposed(void)4731 static bool schedstat_events_exposed(void)
4732 {
4733 /*
4734 * Select "sched:sched_stat_wait" event to check
4735 * whether schedstat tracepoints are exposed.
4736 */
4737 return IS_ERR(trace_event__tp_format("sched", "sched_stat_wait")) ?
4738 false : true;
4739 }
4740
__cmd_record(int argc,const char ** argv)4741 static int __cmd_record(int argc, const char **argv)
4742 {
4743 unsigned int rec_argc, i, j;
4744 char **rec_argv;
4745 const char **rec_argv_copy;
4746 const char * const record_args[] = {
4747 "record",
4748 "-a",
4749 "-R",
4750 "-m", "1024",
4751 "-c", "1",
4752 "-e", "sched:sched_switch",
4753 "-e", "sched:sched_stat_runtime",
4754 "-e", "sched:sched_process_fork",
4755 "-e", "sched:sched_wakeup_new",
4756 "-e", "sched:sched_migrate_task",
4757 };
4758
4759 /*
4760 * The tracepoints trace_sched_stat_{wait, sleep, iowait}
4761 * are not exposed to user if CONFIG_SCHEDSTATS is not set,
4762 * to prevent "perf sched record" execution failure, determine
4763 * whether to record schedstat events according to actual situation.
4764 */
4765 const char * const schedstat_args[] = {
4766 "-e", "sched:sched_stat_wait",
4767 "-e", "sched:sched_stat_sleep",
4768 "-e", "sched:sched_stat_iowait",
4769 };
4770 unsigned int schedstat_argc = schedstat_events_exposed() ?
4771 ARRAY_SIZE(schedstat_args) : 0;
4772
4773 struct tep_event *waking_event;
4774 int ret;
4775
4776 /*
4777 * +2 for either "-e", "sched:sched_wakeup" or
4778 * "-e", "sched:sched_waking"
4779 */
4780 rec_argc = ARRAY_SIZE(record_args) + 2 + schedstat_argc + argc - 1;
4781 rec_argv = calloc(rec_argc + 1, sizeof(char *));
4782 if (rec_argv == NULL)
4783 return -ENOMEM;
4784 rec_argv_copy = calloc(rec_argc + 1, sizeof(char *));
4785 if (rec_argv_copy == NULL) {
4786 free(rec_argv);
4787 return -ENOMEM;
4788 }
4789
4790 for (i = 0; i < ARRAY_SIZE(record_args); i++)
4791 rec_argv[i] = strdup(record_args[i]);
4792
4793 rec_argv[i++] = strdup("-e");
4794 waking_event = trace_event__tp_format("sched", "sched_waking");
4795 if (!IS_ERR(waking_event))
4796 rec_argv[i++] = strdup("sched:sched_waking");
4797 else
4798 rec_argv[i++] = strdup("sched:sched_wakeup");
4799
4800 for (j = 0; j < schedstat_argc; j++)
4801 rec_argv[i++] = strdup(schedstat_args[j]);
4802
4803 for (j = 1; j < (unsigned int)argc; j++, i++)
4804 rec_argv[i] = strdup(argv[j]);
4805
4806 BUG_ON(i != rec_argc);
4807
4808 memcpy(rec_argv_copy, rec_argv, sizeof(char *) * rec_argc);
4809 ret = cmd_record(rec_argc, rec_argv_copy);
4810
4811 for (i = 0; i < rec_argc; i++)
4812 free(rec_argv[i]);
4813 free(rec_argv);
4814 free(rec_argv_copy);
4815
4816 return ret;
4817 }
4818
cmd_sched(int argc,const char ** argv)4819 int cmd_sched(int argc, const char **argv)
4820 {
4821 static const char default_sort_order[] = "avg, max, switch, runtime";
4822 struct perf_sched sched = {
4823 .cmp_pid = LIST_HEAD_INIT(sched.cmp_pid),
4824 .sort_list = LIST_HEAD_INIT(sched.sort_list),
4825 .sort_order = default_sort_order,
4826 .replay_repeat = 10,
4827 .profile_cpu = -1,
4828 .next_shortname1 = 'A',
4829 .next_shortname2 = '0',
4830 .skip_merge = 0,
4831 .show_callchain = 1,
4832 .max_stack = 5,
4833 };
4834 const struct option sched_options[] = {
4835 OPT_STRING('i', "input", &input_name, "file",
4836 "input file name"),
4837 OPT_INCR('v', "verbose", &verbose,
4838 "be more verbose (show symbol address, etc)"),
4839 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
4840 "dump raw trace in ASCII"),
4841 OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"),
4842 OPT_END()
4843 };
4844 const struct option latency_options[] = {
4845 OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]",
4846 "sort by key(s): runtime, switch, avg, max"),
4847 OPT_INTEGER('C', "CPU", &sched.profile_cpu,
4848 "CPU to profile on"),
4849 OPT_BOOLEAN('p', "pids", &sched.skip_merge,
4850 "latency stats per pid instead of per comm"),
4851 OPT_PARENT(sched_options)
4852 };
4853 const struct option replay_options[] = {
4854 OPT_UINTEGER('r', "repeat", &sched.replay_repeat,
4855 "repeat the workload replay N times (0: infinite)"),
4856 OPT_PARENT(sched_options)
4857 };
4858 const struct option map_options[] = {
4859 OPT_BOOLEAN(0, "compact", &sched.map.comp,
4860 "map output in compact mode"),
4861 OPT_STRING(0, "color-pids", &sched.map.color_pids_str, "pids",
4862 "highlight given pids in map"),
4863 OPT_STRING(0, "color-cpus", &sched.map.color_cpus_str, "cpus",
4864 "highlight given CPUs in map"),
4865 OPT_STRING(0, "cpus", &sched.map.cpus_str, "cpus",
4866 "display given CPUs in map"),
4867 OPT_STRING(0, "task-name", &sched.map.task_name, "task",
4868 "map output only for the given task name(s)."),
4869 OPT_BOOLEAN(0, "fuzzy-name", &sched.map.fuzzy,
4870 "given command name can be partially matched (fuzzy matching)"),
4871 OPT_PARENT(sched_options)
4872 };
4873 const struct option timehist_options[] = {
4874 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
4875 "file", "vmlinux pathname"),
4876 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
4877 "file", "kallsyms pathname"),
4878 OPT_BOOLEAN('g', "call-graph", &sched.show_callchain,
4879 "Display call chains if present (default on)"),
4880 OPT_UINTEGER(0, "max-stack", &sched.max_stack,
4881 "Maximum number of functions to display backtrace."),
4882 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
4883 "Look for files with symbols relative to this directory"),
4884 OPT_BOOLEAN('s', "summary", &sched.summary_only,
4885 "Show only syscall summary with statistics"),
4886 OPT_BOOLEAN('S', "with-summary", &sched.summary,
4887 "Show all syscalls and summary with statistics"),
4888 OPT_BOOLEAN('w', "wakeups", &sched.show_wakeups, "Show wakeup events"),
4889 OPT_BOOLEAN('n', "next", &sched.show_next, "Show next task"),
4890 OPT_BOOLEAN('M', "migrations", &sched.show_migrations, "Show migration events"),
4891 OPT_BOOLEAN('V', "cpu-visual", &sched.show_cpu_visual, "Add CPU visual"),
4892 OPT_BOOLEAN('I', "idle-hist", &sched.idle_hist, "Show idle events only"),
4893 OPT_STRING(0, "time", &sched.time_str, "str",
4894 "Time span for analysis (start,stop)"),
4895 OPT_BOOLEAN(0, "state", &sched.show_state, "Show task state when sched-out"),
4896 OPT_STRING('p', "pid", &symbol_conf.pid_list_str, "pid[,pid...]",
4897 "analyze events only for given process id(s)"),
4898 OPT_STRING('t', "tid", &symbol_conf.tid_list_str, "tid[,tid...]",
4899 "analyze events only for given thread id(s)"),
4900 OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
4901 OPT_BOOLEAN(0, "show-prio", &sched.show_prio, "Show task priority"),
4902 OPT_STRING(0, "prio", &sched.prio_str, "prio",
4903 "analyze events only for given task priority(ies)"),
4904 OPT_BOOLEAN('P', "pre-migrations", &sched.pre_migrations, "Show pre-migration wait time"),
4905 OPT_PARENT(sched_options)
4906 };
4907 const struct option stats_options[] = {
4908 OPT_STRING('i', "input", &input_name, "file",
4909 "`stats report` with input filename"),
4910 OPT_STRING('o', "output", &output_name, "file",
4911 "`stats record` with output filename"),
4912 OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
4913 OPT_BOOLEAN('v', "verbose", &verbose_field, "Show explanation for fields in the report"),
4914 OPT_END()
4915 };
4916
4917 const char * const latency_usage[] = {
4918 "perf sched latency [<options>]",
4919 NULL
4920 };
4921 const char * const replay_usage[] = {
4922 "perf sched replay [<options>]",
4923 NULL
4924 };
4925 const char * const map_usage[] = {
4926 "perf sched map [<options>]",
4927 NULL
4928 };
4929 const char * const timehist_usage[] = {
4930 "perf sched timehist [<options>]",
4931 NULL
4932 };
4933 const char *stats_usage[] = {
4934 "perf sched stats {record|report} [<options>]",
4935 NULL
4936 };
4937 const char *const sched_subcommands[] = { "record", "latency", "map",
4938 "replay", "script",
4939 "timehist", "stats", NULL };
4940 const char *sched_usage[] = {
4941 NULL,
4942 NULL
4943 };
4944 struct trace_sched_handler lat_ops = {
4945 .wakeup_event = latency_wakeup_event,
4946 .switch_event = latency_switch_event,
4947 .runtime_event = latency_runtime_event,
4948 .migrate_task_event = latency_migrate_task_event,
4949 };
4950 struct trace_sched_handler map_ops = {
4951 .switch_event = map_switch_event,
4952 };
4953 struct trace_sched_handler replay_ops = {
4954 .wakeup_event = replay_wakeup_event,
4955 .switch_event = replay_switch_event,
4956 .fork_event = replay_fork_event,
4957 };
4958 int ret;
4959
4960 perf_tool__init(&sched.tool, /*ordered_events=*/true);
4961 sched.tool.sample = perf_sched__process_tracepoint_sample;
4962 sched.tool.comm = perf_sched__process_comm;
4963 sched.tool.namespaces = perf_event__process_namespaces;
4964 sched.tool.lost = perf_event__process_lost;
4965 sched.tool.fork = perf_sched__process_fork_event;
4966
4967 argc = parse_options_subcommand(argc, argv, sched_options, sched_subcommands,
4968 sched_usage, PARSE_OPT_STOP_AT_NON_OPTION);
4969 if (!argc)
4970 usage_with_options(sched_usage, sched_options);
4971
4972 thread__set_priv_destructor(free);
4973
4974 /*
4975 * Aliased to 'perf script' for now:
4976 */
4977 if (!strcmp(argv[0], "script")) {
4978 ret = cmd_script(argc, argv);
4979 } else if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) {
4980 ret = __cmd_record(argc, argv);
4981 } else if (strlen(argv[0]) > 2 && strstarts("latency", argv[0])) {
4982 sched.tp_handler = &lat_ops;
4983 if (argc > 1) {
4984 argc = parse_options(argc, argv, latency_options, latency_usage, 0);
4985 if (argc)
4986 usage_with_options(latency_usage, latency_options);
4987 }
4988 setup_sorting(&sched, latency_options, latency_usage);
4989 ret = perf_sched__lat(&sched);
4990 } else if (!strcmp(argv[0], "map")) {
4991 if (argc) {
4992 argc = parse_options(argc, argv, map_options, map_usage, 0);
4993 if (argc)
4994 usage_with_options(map_usage, map_options);
4995
4996 if (sched.map.task_name) {
4997 sched.map.task_names = strlist__new(sched.map.task_name, NULL);
4998 if (sched.map.task_names == NULL) {
4999 fprintf(stderr, "Failed to parse task names\n");
5000 ret = -1;
5001 goto out;
5002 }
5003 }
5004 }
5005 sched.tp_handler = &map_ops;
5006 setup_sorting(&sched, latency_options, latency_usage);
5007 ret = perf_sched__map(&sched);
5008 } else if (strlen(argv[0]) > 2 && strstarts("replay", argv[0])) {
5009 sched.tp_handler = &replay_ops;
5010 if (argc) {
5011 argc = parse_options(argc, argv, replay_options, replay_usage, 0);
5012 if (argc)
5013 usage_with_options(replay_usage, replay_options);
5014 }
5015 ret = perf_sched__replay(&sched);
5016 } else if (!strcmp(argv[0], "timehist")) {
5017 if (argc) {
5018 argc = parse_options(argc, argv, timehist_options,
5019 timehist_usage, 0);
5020 if (argc)
5021 usage_with_options(timehist_usage, timehist_options);
5022 }
5023 if ((sched.show_wakeups || sched.show_next) &&
5024 sched.summary_only) {
5025 pr_err(" Error: -s and -[n|w] are mutually exclusive.\n");
5026 parse_options_usage(timehist_usage, timehist_options, "s", true);
5027 if (sched.show_wakeups)
5028 parse_options_usage(NULL, timehist_options, "w", true);
5029 if (sched.show_next)
5030 parse_options_usage(NULL, timehist_options, "n", true);
5031 ret = -EINVAL;
5032 goto out;
5033 }
5034 ret = symbol__validate_sym_arguments();
5035 if (!ret)
5036 ret = perf_sched__timehist(&sched);
5037 } else if (!strcmp(argv[0], "stats")) {
5038 const char *const stats_subcommands[] = {"record", "report", NULL};
5039
5040 argc = parse_options_subcommand(argc, argv, stats_options,
5041 stats_subcommands,
5042 stats_usage,
5043 PARSE_OPT_STOP_AT_NON_OPTION);
5044
5045 if (argv[0] && !strcmp(argv[0], "record")) {
5046 if (argc)
5047 argc = parse_options(argc, argv, stats_options,
5048 stats_usage, 0);
5049 return perf_sched__schedstat_record(&sched, argc, argv);
5050 } else if (argv[0] && !strcmp(argv[0], "report")) {
5051 if (argc)
5052 argc = parse_options(argc, argv, stats_options,
5053 stats_usage, 0);
5054 return perf_sched__schedstat_report(&sched);
5055 } else if (argv[0] && !strcmp(argv[0], "diff")) {
5056 if (argc)
5057 argc = parse_options(argc, argv, stats_options,
5058 stats_usage, 0);
5059 return perf_sched__schedstat_diff(&sched, argc, argv);
5060 }
5061 return perf_sched__schedstat_live(&sched, argc, argv);
5062 } else {
5063 usage_with_options(sched_usage, sched_options);
5064 }
5065
5066 out:
5067 /* free usage string allocated by parse_options_subcommand */
5068 free((void *)sched_usage[0]);
5069
5070 return ret;
5071 }
5072