xref: /linux/include/trace/events/sched.h (revision e58e871becec2d3b04ed91c0c16fe8deac9c9dfa)
1 #undef TRACE_SYSTEM
2 #define TRACE_SYSTEM sched
3 
4 #if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
5 #define _TRACE_SCHED_H
6 
7 #include <linux/sched/numa_balancing.h>
8 #include <linux/tracepoint.h>
9 #include <linux/binfmts.h>
10 
11 /*
12  * Tracepoint for calling kthread_stop, performed to end a kthread:
13  */
14 TRACE_EVENT(sched_kthread_stop,
15 
16 	TP_PROTO(struct task_struct *t),
17 
18 	TP_ARGS(t),
19 
20 	TP_STRUCT__entry(
21 		__array(	char,	comm,	TASK_COMM_LEN	)
22 		__field(	pid_t,	pid			)
23 	),
24 
25 	TP_fast_assign(
26 		memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
27 		__entry->pid	= t->pid;
28 	),
29 
30 	TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid)
31 );
32 
33 /*
34  * Tracepoint for the return value of the kthread stopping:
35  */
36 TRACE_EVENT(sched_kthread_stop_ret,
37 
38 	TP_PROTO(int ret),
39 
40 	TP_ARGS(ret),
41 
42 	TP_STRUCT__entry(
43 		__field(	int,	ret	)
44 	),
45 
46 	TP_fast_assign(
47 		__entry->ret	= ret;
48 	),
49 
50 	TP_printk("ret=%d", __entry->ret)
51 );
52 
53 /*
54  * Tracepoint for waking up a task:
55  */
56 DECLARE_EVENT_CLASS(sched_wakeup_template,
57 
58 	TP_PROTO(struct task_struct *p),
59 
60 	TP_ARGS(__perf_task(p)),
61 
62 	TP_STRUCT__entry(
63 		__array(	char,	comm,	TASK_COMM_LEN	)
64 		__field(	pid_t,	pid			)
65 		__field(	int,	prio			)
66 		__field(	int,	success			)
67 		__field(	int,	target_cpu		)
68 	),
69 
70 	TP_fast_assign(
71 		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
72 		__entry->pid		= p->pid;
73 		__entry->prio		= p->prio; /* XXX SCHED_DEADLINE */
74 		__entry->success	= 1; /* rudiment, kill when possible */
75 		__entry->target_cpu	= task_cpu(p);
76 	),
77 
78 	TP_printk("comm=%s pid=%d prio=%d target_cpu=%03d",
79 		  __entry->comm, __entry->pid, __entry->prio,
80 		  __entry->target_cpu)
81 );
82 
83 /*
84  * Tracepoint called when waking a task; this tracepoint is guaranteed to be
85  * called from the waking context.
86  */
87 DEFINE_EVENT(sched_wakeup_template, sched_waking,
88 	     TP_PROTO(struct task_struct *p),
89 	     TP_ARGS(p));
90 
91 /*
92  * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG.
93  * It it not always called from the waking context.
94  */
95 DEFINE_EVENT(sched_wakeup_template, sched_wakeup,
96 	     TP_PROTO(struct task_struct *p),
97 	     TP_ARGS(p));
98 
99 /*
100  * Tracepoint for waking up a new task:
101  */
102 DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
103 	     TP_PROTO(struct task_struct *p),
104 	     TP_ARGS(p));
105 
106 #ifdef CREATE_TRACE_POINTS
107 static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
108 {
109 #ifdef CONFIG_SCHED_DEBUG
110 	BUG_ON(p != current);
111 #endif /* CONFIG_SCHED_DEBUG */
112 
113 	/*
114 	 * Preemption ignores task state, therefore preempted tasks are always
115 	 * RUNNING (we will not have dequeued if state != RUNNING).
116 	 */
117 	return preempt ? TASK_RUNNING | TASK_STATE_MAX : p->state;
118 }
119 #endif /* CREATE_TRACE_POINTS */
120 
121 /*
122  * Tracepoint for task switches, performed by the scheduler:
123  */
124 TRACE_EVENT(sched_switch,
125 
126 	TP_PROTO(bool preempt,
127 		 struct task_struct *prev,
128 		 struct task_struct *next),
129 
130 	TP_ARGS(preempt, prev, next),
131 
132 	TP_STRUCT__entry(
133 		__array(	char,	prev_comm,	TASK_COMM_LEN	)
134 		__field(	pid_t,	prev_pid			)
135 		__field(	int,	prev_prio			)
136 		__field(	long,	prev_state			)
137 		__array(	char,	next_comm,	TASK_COMM_LEN	)
138 		__field(	pid_t,	next_pid			)
139 		__field(	int,	next_prio			)
140 	),
141 
142 	TP_fast_assign(
143 		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
144 		__entry->prev_pid	= prev->pid;
145 		__entry->prev_prio	= prev->prio;
146 		__entry->prev_state	= __trace_sched_switch_state(preempt, prev);
147 		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
148 		__entry->next_pid	= next->pid;
149 		__entry->next_prio	= next->prio;
150 		/* XXX SCHED_DEADLINE */
151 	),
152 
153 	TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d",
154 		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
155 		__entry->prev_state & (TASK_STATE_MAX-1) ?
156 		  __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|",
157 				{ 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
158 				{ 16, "Z" }, { 32, "X" }, { 64, "x" },
159 				{ 128, "K" }, { 256, "W" }, { 512, "P" },
160 				{ 1024, "N" }) : "R",
161 		__entry->prev_state & TASK_STATE_MAX ? "+" : "",
162 		__entry->next_comm, __entry->next_pid, __entry->next_prio)
163 );
164 
165 /*
166  * Tracepoint for a task being migrated:
167  */
168 TRACE_EVENT(sched_migrate_task,
169 
170 	TP_PROTO(struct task_struct *p, int dest_cpu),
171 
172 	TP_ARGS(p, dest_cpu),
173 
174 	TP_STRUCT__entry(
175 		__array(	char,	comm,	TASK_COMM_LEN	)
176 		__field(	pid_t,	pid			)
177 		__field(	int,	prio			)
178 		__field(	int,	orig_cpu		)
179 		__field(	int,	dest_cpu		)
180 	),
181 
182 	TP_fast_assign(
183 		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
184 		__entry->pid		= p->pid;
185 		__entry->prio		= p->prio; /* XXX SCHED_DEADLINE */
186 		__entry->orig_cpu	= task_cpu(p);
187 		__entry->dest_cpu	= dest_cpu;
188 	),
189 
190 	TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d",
191 		  __entry->comm, __entry->pid, __entry->prio,
192 		  __entry->orig_cpu, __entry->dest_cpu)
193 );
194 
195 DECLARE_EVENT_CLASS(sched_process_template,
196 
197 	TP_PROTO(struct task_struct *p),
198 
199 	TP_ARGS(p),
200 
201 	TP_STRUCT__entry(
202 		__array(	char,	comm,	TASK_COMM_LEN	)
203 		__field(	pid_t,	pid			)
204 		__field(	int,	prio			)
205 	),
206 
207 	TP_fast_assign(
208 		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
209 		__entry->pid		= p->pid;
210 		__entry->prio		= p->prio; /* XXX SCHED_DEADLINE */
211 	),
212 
213 	TP_printk("comm=%s pid=%d prio=%d",
214 		  __entry->comm, __entry->pid, __entry->prio)
215 );
216 
217 /*
218  * Tracepoint for freeing a task:
219  */
220 DEFINE_EVENT(sched_process_template, sched_process_free,
221 	     TP_PROTO(struct task_struct *p),
222 	     TP_ARGS(p));
223 
224 
225 /*
226  * Tracepoint for a task exiting:
227  */
228 DEFINE_EVENT(sched_process_template, sched_process_exit,
229 	     TP_PROTO(struct task_struct *p),
230 	     TP_ARGS(p));
231 
232 /*
233  * Tracepoint for waiting on task to unschedule:
234  */
235 DEFINE_EVENT(sched_process_template, sched_wait_task,
236 	TP_PROTO(struct task_struct *p),
237 	TP_ARGS(p));
238 
239 /*
240  * Tracepoint for a waiting task:
241  */
242 TRACE_EVENT(sched_process_wait,
243 
244 	TP_PROTO(struct pid *pid),
245 
246 	TP_ARGS(pid),
247 
248 	TP_STRUCT__entry(
249 		__array(	char,	comm,	TASK_COMM_LEN	)
250 		__field(	pid_t,	pid			)
251 		__field(	int,	prio			)
252 	),
253 
254 	TP_fast_assign(
255 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
256 		__entry->pid		= pid_nr(pid);
257 		__entry->prio		= current->prio; /* XXX SCHED_DEADLINE */
258 	),
259 
260 	TP_printk("comm=%s pid=%d prio=%d",
261 		  __entry->comm, __entry->pid, __entry->prio)
262 );
263 
264 /*
265  * Tracepoint for do_fork:
266  */
267 TRACE_EVENT(sched_process_fork,
268 
269 	TP_PROTO(struct task_struct *parent, struct task_struct *child),
270 
271 	TP_ARGS(parent, child),
272 
273 	TP_STRUCT__entry(
274 		__array(	char,	parent_comm,	TASK_COMM_LEN	)
275 		__field(	pid_t,	parent_pid			)
276 		__array(	char,	child_comm,	TASK_COMM_LEN	)
277 		__field(	pid_t,	child_pid			)
278 	),
279 
280 	TP_fast_assign(
281 		memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN);
282 		__entry->parent_pid	= parent->pid;
283 		memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN);
284 		__entry->child_pid	= child->pid;
285 	),
286 
287 	TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d",
288 		__entry->parent_comm, __entry->parent_pid,
289 		__entry->child_comm, __entry->child_pid)
290 );
291 
292 /*
293  * Tracepoint for exec:
294  */
295 TRACE_EVENT(sched_process_exec,
296 
297 	TP_PROTO(struct task_struct *p, pid_t old_pid,
298 		 struct linux_binprm *bprm),
299 
300 	TP_ARGS(p, old_pid, bprm),
301 
302 	TP_STRUCT__entry(
303 		__string(	filename,	bprm->filename	)
304 		__field(	pid_t,		pid		)
305 		__field(	pid_t,		old_pid		)
306 	),
307 
308 	TP_fast_assign(
309 		__assign_str(filename, bprm->filename);
310 		__entry->pid		= p->pid;
311 		__entry->old_pid	= old_pid;
312 	),
313 
314 	TP_printk("filename=%s pid=%d old_pid=%d", __get_str(filename),
315 		  __entry->pid, __entry->old_pid)
316 );
317 
318 /*
319  * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
320  *     adding sched_stat support to SCHED_FIFO/RR would be welcome.
321  */
322 DECLARE_EVENT_CLASS(sched_stat_template,
323 
324 	TP_PROTO(struct task_struct *tsk, u64 delay),
325 
326 	TP_ARGS(__perf_task(tsk), __perf_count(delay)),
327 
328 	TP_STRUCT__entry(
329 		__array( char,	comm,	TASK_COMM_LEN	)
330 		__field( pid_t,	pid			)
331 		__field( u64,	delay			)
332 	),
333 
334 	TP_fast_assign(
335 		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
336 		__entry->pid	= tsk->pid;
337 		__entry->delay	= delay;
338 	),
339 
340 	TP_printk("comm=%s pid=%d delay=%Lu [ns]",
341 			__entry->comm, __entry->pid,
342 			(unsigned long long)__entry->delay)
343 );
344 
345 
346 /*
347  * Tracepoint for accounting wait time (time the task is runnable
348  * but not actually running due to scheduler contention).
349  */
350 DEFINE_EVENT(sched_stat_template, sched_stat_wait,
351 	     TP_PROTO(struct task_struct *tsk, u64 delay),
352 	     TP_ARGS(tsk, delay));
353 
354 /*
355  * Tracepoint for accounting sleep time (time the task is not runnable,
356  * including iowait, see below).
357  */
358 DEFINE_EVENT(sched_stat_template, sched_stat_sleep,
359 	     TP_PROTO(struct task_struct *tsk, u64 delay),
360 	     TP_ARGS(tsk, delay));
361 
362 /*
363  * Tracepoint for accounting iowait time (time the task is not runnable
364  * due to waiting on IO to complete).
365  */
366 DEFINE_EVENT(sched_stat_template, sched_stat_iowait,
367 	     TP_PROTO(struct task_struct *tsk, u64 delay),
368 	     TP_ARGS(tsk, delay));
369 
370 /*
371  * Tracepoint for accounting blocked time (time the task is in uninterruptible).
372  */
373 DEFINE_EVENT(sched_stat_template, sched_stat_blocked,
374 	     TP_PROTO(struct task_struct *tsk, u64 delay),
375 	     TP_ARGS(tsk, delay));
376 
377 /*
378  * Tracepoint for accounting runtime (time the task is executing
379  * on a CPU).
380  */
381 DECLARE_EVENT_CLASS(sched_stat_runtime,
382 
383 	TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
384 
385 	TP_ARGS(tsk, __perf_count(runtime), vruntime),
386 
387 	TP_STRUCT__entry(
388 		__array( char,	comm,	TASK_COMM_LEN	)
389 		__field( pid_t,	pid			)
390 		__field( u64,	runtime			)
391 		__field( u64,	vruntime			)
392 	),
393 
394 	TP_fast_assign(
395 		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
396 		__entry->pid		= tsk->pid;
397 		__entry->runtime	= runtime;
398 		__entry->vruntime	= vruntime;
399 	),
400 
401 	TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]",
402 			__entry->comm, __entry->pid,
403 			(unsigned long long)__entry->runtime,
404 			(unsigned long long)__entry->vruntime)
405 );
406 
407 DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime,
408 	     TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
409 	     TP_ARGS(tsk, runtime, vruntime));
410 
411 /*
412  * Tracepoint for showing priority inheritance modifying a tasks
413  * priority.
414  */
415 TRACE_EVENT(sched_pi_setprio,
416 
417 	TP_PROTO(struct task_struct *tsk, struct task_struct *pi_task),
418 
419 	TP_ARGS(tsk, pi_task),
420 
421 	TP_STRUCT__entry(
422 		__array( char,	comm,	TASK_COMM_LEN	)
423 		__field( pid_t,	pid			)
424 		__field( int,	oldprio			)
425 		__field( int,	newprio			)
426 	),
427 
428 	TP_fast_assign(
429 		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
430 		__entry->pid		= tsk->pid;
431 		__entry->oldprio	= tsk->prio;
432 		__entry->newprio	= pi_task ? pi_task->prio : tsk->prio;
433 		/* XXX SCHED_DEADLINE bits missing */
434 	),
435 
436 	TP_printk("comm=%s pid=%d oldprio=%d newprio=%d",
437 			__entry->comm, __entry->pid,
438 			__entry->oldprio, __entry->newprio)
439 );
440 
441 #ifdef CONFIG_DETECT_HUNG_TASK
442 TRACE_EVENT(sched_process_hang,
443 	TP_PROTO(struct task_struct *tsk),
444 	TP_ARGS(tsk),
445 
446 	TP_STRUCT__entry(
447 		__array( char,	comm,	TASK_COMM_LEN	)
448 		__field( pid_t,	pid			)
449 	),
450 
451 	TP_fast_assign(
452 		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
453 		__entry->pid = tsk->pid;
454 	),
455 
456 	TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid)
457 );
458 #endif /* CONFIG_DETECT_HUNG_TASK */
459 
460 DECLARE_EVENT_CLASS(sched_move_task_template,
461 
462 	TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
463 
464 	TP_ARGS(tsk, src_cpu, dst_cpu),
465 
466 	TP_STRUCT__entry(
467 		__field( pid_t,	pid			)
468 		__field( pid_t,	tgid			)
469 		__field( pid_t,	ngid			)
470 		__field( int,	src_cpu			)
471 		__field( int,	src_nid			)
472 		__field( int,	dst_cpu			)
473 		__field( int,	dst_nid			)
474 	),
475 
476 	TP_fast_assign(
477 		__entry->pid		= task_pid_nr(tsk);
478 		__entry->tgid		= task_tgid_nr(tsk);
479 		__entry->ngid		= task_numa_group_id(tsk);
480 		__entry->src_cpu	= src_cpu;
481 		__entry->src_nid	= cpu_to_node(src_cpu);
482 		__entry->dst_cpu	= dst_cpu;
483 		__entry->dst_nid	= cpu_to_node(dst_cpu);
484 	),
485 
486 	TP_printk("pid=%d tgid=%d ngid=%d src_cpu=%d src_nid=%d dst_cpu=%d dst_nid=%d",
487 			__entry->pid, __entry->tgid, __entry->ngid,
488 			__entry->src_cpu, __entry->src_nid,
489 			__entry->dst_cpu, __entry->dst_nid)
490 );
491 
492 /*
493  * Tracks migration of tasks from one runqueue to another. Can be used to
494  * detect if automatic NUMA balancing is bouncing between nodes
495  */
496 DEFINE_EVENT(sched_move_task_template, sched_move_numa,
497 	TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
498 
499 	TP_ARGS(tsk, src_cpu, dst_cpu)
500 );
501 
502 DEFINE_EVENT(sched_move_task_template, sched_stick_numa,
503 	TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
504 
505 	TP_ARGS(tsk, src_cpu, dst_cpu)
506 );
507 
508 TRACE_EVENT(sched_swap_numa,
509 
510 	TP_PROTO(struct task_struct *src_tsk, int src_cpu,
511 		 struct task_struct *dst_tsk, int dst_cpu),
512 
513 	TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu),
514 
515 	TP_STRUCT__entry(
516 		__field( pid_t,	src_pid			)
517 		__field( pid_t,	src_tgid		)
518 		__field( pid_t,	src_ngid		)
519 		__field( int,	src_cpu			)
520 		__field( int,	src_nid			)
521 		__field( pid_t,	dst_pid			)
522 		__field( pid_t,	dst_tgid		)
523 		__field( pid_t,	dst_ngid		)
524 		__field( int,	dst_cpu			)
525 		__field( int,	dst_nid			)
526 	),
527 
528 	TP_fast_assign(
529 		__entry->src_pid	= task_pid_nr(src_tsk);
530 		__entry->src_tgid	= task_tgid_nr(src_tsk);
531 		__entry->src_ngid	= task_numa_group_id(src_tsk);
532 		__entry->src_cpu	= src_cpu;
533 		__entry->src_nid	= cpu_to_node(src_cpu);
534 		__entry->dst_pid	= task_pid_nr(dst_tsk);
535 		__entry->dst_tgid	= task_tgid_nr(dst_tsk);
536 		__entry->dst_ngid	= task_numa_group_id(dst_tsk);
537 		__entry->dst_cpu	= dst_cpu;
538 		__entry->dst_nid	= cpu_to_node(dst_cpu);
539 	),
540 
541 	TP_printk("src_pid=%d src_tgid=%d src_ngid=%d src_cpu=%d src_nid=%d dst_pid=%d dst_tgid=%d dst_ngid=%d dst_cpu=%d dst_nid=%d",
542 			__entry->src_pid, __entry->src_tgid, __entry->src_ngid,
543 			__entry->src_cpu, __entry->src_nid,
544 			__entry->dst_pid, __entry->dst_tgid, __entry->dst_ngid,
545 			__entry->dst_cpu, __entry->dst_nid)
546 );
547 
548 /*
549  * Tracepoint for waking a polling cpu without an IPI.
550  */
551 TRACE_EVENT(sched_wake_idle_without_ipi,
552 
553 	TP_PROTO(int cpu),
554 
555 	TP_ARGS(cpu),
556 
557 	TP_STRUCT__entry(
558 		__field(	int,	cpu	)
559 	),
560 
561 	TP_fast_assign(
562 		__entry->cpu	= cpu;
563 	),
564 
565 	TP_printk("cpu=%d", __entry->cpu)
566 );
567 #endif /* _TRACE_SCHED_H */
568 
569 /* This part must be outside protection */
570 #include <trace/define_trace.h>
571