xref: /linux/include/trace/events/sched.h (revision c0e297dc61f8d4453e07afbea1fa8d0e67cd4a34)
1 #undef TRACE_SYSTEM
2 #define TRACE_SYSTEM sched
3 
4 #if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
5 #define _TRACE_SCHED_H
6 
7 #include <linux/sched.h>
8 #include <linux/tracepoint.h>
9 #include <linux/binfmts.h>
10 
11 /*
12  * Tracepoint for calling kthread_stop, performed to end a kthread:
13  */
14 TRACE_EVENT(sched_kthread_stop,
15 
16 	TP_PROTO(struct task_struct *t),
17 
18 	TP_ARGS(t),
19 
20 	TP_STRUCT__entry(
21 		__array(	char,	comm,	TASK_COMM_LEN	)
22 		__field(	pid_t,	pid			)
23 	),
24 
25 	TP_fast_assign(
26 		memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
27 		__entry->pid	= t->pid;
28 	),
29 
30 	TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid)
31 );
32 
33 /*
34  * Tracepoint for the return value of the kthread stopping:
35  */
36 TRACE_EVENT(sched_kthread_stop_ret,
37 
38 	TP_PROTO(int ret),
39 
40 	TP_ARGS(ret),
41 
42 	TP_STRUCT__entry(
43 		__field(	int,	ret	)
44 	),
45 
46 	TP_fast_assign(
47 		__entry->ret	= ret;
48 	),
49 
50 	TP_printk("ret=%d", __entry->ret)
51 );
52 
53 /*
54  * Tracepoint for waking up a task:
55  */
56 DECLARE_EVENT_CLASS(sched_wakeup_template,
57 
58 	TP_PROTO(struct task_struct *p, int success),
59 
60 	TP_ARGS(__perf_task(p), success),
61 
62 	TP_STRUCT__entry(
63 		__array(	char,	comm,	TASK_COMM_LEN	)
64 		__field(	pid_t,	pid			)
65 		__field(	int,	prio			)
66 		__field(	int,	success			)
67 		__field(	int,	target_cpu		)
68 	),
69 
70 	TP_fast_assign(
71 		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
72 		__entry->pid		= p->pid;
73 		__entry->prio		= p->prio;
74 		__entry->success	= success;
75 		__entry->target_cpu	= task_cpu(p);
76 	),
77 
78 	TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d",
79 		  __entry->comm, __entry->pid, __entry->prio,
80 		  __entry->success, __entry->target_cpu)
81 );
82 
83 DEFINE_EVENT(sched_wakeup_template, sched_wakeup,
84 	     TP_PROTO(struct task_struct *p, int success),
85 	     TP_ARGS(p, success));
86 
87 /*
88  * Tracepoint for waking up a new task:
89  */
90 DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
91 	     TP_PROTO(struct task_struct *p, int success),
92 	     TP_ARGS(p, success));
93 
94 #ifdef CREATE_TRACE_POINTS
95 static inline long __trace_sched_switch_state(struct task_struct *p)
96 {
97 	long state = p->state;
98 
99 #ifdef CONFIG_PREEMPT
100 #ifdef CONFIG_SCHED_DEBUG
101 	BUG_ON(p != current);
102 #endif /* CONFIG_SCHED_DEBUG */
103 	/*
104 	 * For all intents and purposes a preempted task is a running task.
105 	 */
106 	if (preempt_count() & PREEMPT_ACTIVE)
107 		state = TASK_RUNNING | TASK_STATE_MAX;
108 #endif /* CONFIG_PREEMPT */
109 
110 	return state;
111 }
112 #endif /* CREATE_TRACE_POINTS */
113 
114 /*
115  * Tracepoint for task switches, performed by the scheduler:
116  */
117 TRACE_EVENT(sched_switch,
118 
119 	TP_PROTO(struct task_struct *prev,
120 		 struct task_struct *next),
121 
122 	TP_ARGS(prev, next),
123 
124 	TP_STRUCT__entry(
125 		__array(	char,	prev_comm,	TASK_COMM_LEN	)
126 		__field(	pid_t,	prev_pid			)
127 		__field(	int,	prev_prio			)
128 		__field(	long,	prev_state			)
129 		__array(	char,	next_comm,	TASK_COMM_LEN	)
130 		__field(	pid_t,	next_pid			)
131 		__field(	int,	next_prio			)
132 	),
133 
134 	TP_fast_assign(
135 		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
136 		__entry->prev_pid	= prev->pid;
137 		__entry->prev_prio	= prev->prio;
138 		__entry->prev_state	= __trace_sched_switch_state(prev);
139 		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
140 		__entry->next_pid	= next->pid;
141 		__entry->next_prio	= next->prio;
142 	),
143 
144 	TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d",
145 		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
146 		__entry->prev_state & (TASK_STATE_MAX-1) ?
147 		  __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|",
148 				{ 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
149 				{ 16, "Z" }, { 32, "X" }, { 64, "x" },
150 				{ 128, "K" }, { 256, "W" }, { 512, "P" },
151 				{ 1024, "N" }) : "R",
152 		__entry->prev_state & TASK_STATE_MAX ? "+" : "",
153 		__entry->next_comm, __entry->next_pid, __entry->next_prio)
154 );
155 
156 /*
157  * Tracepoint for a task being migrated:
158  */
159 TRACE_EVENT(sched_migrate_task,
160 
161 	TP_PROTO(struct task_struct *p, int dest_cpu),
162 
163 	TP_ARGS(p, dest_cpu),
164 
165 	TP_STRUCT__entry(
166 		__array(	char,	comm,	TASK_COMM_LEN	)
167 		__field(	pid_t,	pid			)
168 		__field(	int,	prio			)
169 		__field(	int,	orig_cpu		)
170 		__field(	int,	dest_cpu		)
171 	),
172 
173 	TP_fast_assign(
174 		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
175 		__entry->pid		= p->pid;
176 		__entry->prio		= p->prio;
177 		__entry->orig_cpu	= task_cpu(p);
178 		__entry->dest_cpu	= dest_cpu;
179 	),
180 
181 	TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d",
182 		  __entry->comm, __entry->pid, __entry->prio,
183 		  __entry->orig_cpu, __entry->dest_cpu)
184 );
185 
186 DECLARE_EVENT_CLASS(sched_process_template,
187 
188 	TP_PROTO(struct task_struct *p),
189 
190 	TP_ARGS(p),
191 
192 	TP_STRUCT__entry(
193 		__array(	char,	comm,	TASK_COMM_LEN	)
194 		__field(	pid_t,	pid			)
195 		__field(	int,	prio			)
196 	),
197 
198 	TP_fast_assign(
199 		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
200 		__entry->pid		= p->pid;
201 		__entry->prio		= p->prio;
202 	),
203 
204 	TP_printk("comm=%s pid=%d prio=%d",
205 		  __entry->comm, __entry->pid, __entry->prio)
206 );
207 
208 /*
209  * Tracepoint for freeing a task:
210  */
211 DEFINE_EVENT(sched_process_template, sched_process_free,
212 	     TP_PROTO(struct task_struct *p),
213 	     TP_ARGS(p));
214 
215 
216 /*
217  * Tracepoint for a task exiting:
218  */
219 DEFINE_EVENT(sched_process_template, sched_process_exit,
220 	     TP_PROTO(struct task_struct *p),
221 	     TP_ARGS(p));
222 
223 /*
224  * Tracepoint for waiting on task to unschedule:
225  */
226 DEFINE_EVENT(sched_process_template, sched_wait_task,
227 	TP_PROTO(struct task_struct *p),
228 	TP_ARGS(p));
229 
230 /*
231  * Tracepoint for a waiting task:
232  */
233 TRACE_EVENT(sched_process_wait,
234 
235 	TP_PROTO(struct pid *pid),
236 
237 	TP_ARGS(pid),
238 
239 	TP_STRUCT__entry(
240 		__array(	char,	comm,	TASK_COMM_LEN	)
241 		__field(	pid_t,	pid			)
242 		__field(	int,	prio			)
243 	),
244 
245 	TP_fast_assign(
246 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
247 		__entry->pid		= pid_nr(pid);
248 		__entry->prio		= current->prio;
249 	),
250 
251 	TP_printk("comm=%s pid=%d prio=%d",
252 		  __entry->comm, __entry->pid, __entry->prio)
253 );
254 
255 /*
256  * Tracepoint for do_fork:
257  */
258 TRACE_EVENT(sched_process_fork,
259 
260 	TP_PROTO(struct task_struct *parent, struct task_struct *child),
261 
262 	TP_ARGS(parent, child),
263 
264 	TP_STRUCT__entry(
265 		__array(	char,	parent_comm,	TASK_COMM_LEN	)
266 		__field(	pid_t,	parent_pid			)
267 		__array(	char,	child_comm,	TASK_COMM_LEN	)
268 		__field(	pid_t,	child_pid			)
269 	),
270 
271 	TP_fast_assign(
272 		memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN);
273 		__entry->parent_pid	= parent->pid;
274 		memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN);
275 		__entry->child_pid	= child->pid;
276 	),
277 
278 	TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d",
279 		__entry->parent_comm, __entry->parent_pid,
280 		__entry->child_comm, __entry->child_pid)
281 );
282 
283 /*
284  * Tracepoint for exec:
285  */
286 TRACE_EVENT(sched_process_exec,
287 
288 	TP_PROTO(struct task_struct *p, pid_t old_pid,
289 		 struct linux_binprm *bprm),
290 
291 	TP_ARGS(p, old_pid, bprm),
292 
293 	TP_STRUCT__entry(
294 		__string(	filename,	bprm->filename	)
295 		__field(	pid_t,		pid		)
296 		__field(	pid_t,		old_pid		)
297 	),
298 
299 	TP_fast_assign(
300 		__assign_str(filename, bprm->filename);
301 		__entry->pid		= p->pid;
302 		__entry->old_pid	= old_pid;
303 	),
304 
305 	TP_printk("filename=%s pid=%d old_pid=%d", __get_str(filename),
306 		  __entry->pid, __entry->old_pid)
307 );
308 
309 /*
310  * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
311  *     adding sched_stat support to SCHED_FIFO/RR would be welcome.
312  */
313 DECLARE_EVENT_CLASS(sched_stat_template,
314 
315 	TP_PROTO(struct task_struct *tsk, u64 delay),
316 
317 	TP_ARGS(__perf_task(tsk), __perf_count(delay)),
318 
319 	TP_STRUCT__entry(
320 		__array( char,	comm,	TASK_COMM_LEN	)
321 		__field( pid_t,	pid			)
322 		__field( u64,	delay			)
323 	),
324 
325 	TP_fast_assign(
326 		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
327 		__entry->pid	= tsk->pid;
328 		__entry->delay	= delay;
329 	),
330 
331 	TP_printk("comm=%s pid=%d delay=%Lu [ns]",
332 			__entry->comm, __entry->pid,
333 			(unsigned long long)__entry->delay)
334 );
335 
336 
337 /*
338  * Tracepoint for accounting wait time (time the task is runnable
339  * but not actually running due to scheduler contention).
340  */
341 DEFINE_EVENT(sched_stat_template, sched_stat_wait,
342 	     TP_PROTO(struct task_struct *tsk, u64 delay),
343 	     TP_ARGS(tsk, delay));
344 
345 /*
346  * Tracepoint for accounting sleep time (time the task is not runnable,
347  * including iowait, see below).
348  */
349 DEFINE_EVENT(sched_stat_template, sched_stat_sleep,
350 	     TP_PROTO(struct task_struct *tsk, u64 delay),
351 	     TP_ARGS(tsk, delay));
352 
353 /*
354  * Tracepoint for accounting iowait time (time the task is not runnable
355  * due to waiting on IO to complete).
356  */
357 DEFINE_EVENT(sched_stat_template, sched_stat_iowait,
358 	     TP_PROTO(struct task_struct *tsk, u64 delay),
359 	     TP_ARGS(tsk, delay));
360 
361 /*
362  * Tracepoint for accounting blocked time (time the task is in uninterruptible).
363  */
364 DEFINE_EVENT(sched_stat_template, sched_stat_blocked,
365 	     TP_PROTO(struct task_struct *tsk, u64 delay),
366 	     TP_ARGS(tsk, delay));
367 
368 /*
369  * Tracepoint for accounting runtime (time the task is executing
370  * on a CPU).
371  */
372 DECLARE_EVENT_CLASS(sched_stat_runtime,
373 
374 	TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
375 
376 	TP_ARGS(tsk, __perf_count(runtime), vruntime),
377 
378 	TP_STRUCT__entry(
379 		__array( char,	comm,	TASK_COMM_LEN	)
380 		__field( pid_t,	pid			)
381 		__field( u64,	runtime			)
382 		__field( u64,	vruntime			)
383 	),
384 
385 	TP_fast_assign(
386 		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
387 		__entry->pid		= tsk->pid;
388 		__entry->runtime	= runtime;
389 		__entry->vruntime	= vruntime;
390 	),
391 
392 	TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]",
393 			__entry->comm, __entry->pid,
394 			(unsigned long long)__entry->runtime,
395 			(unsigned long long)__entry->vruntime)
396 );
397 
398 DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime,
399 	     TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
400 	     TP_ARGS(tsk, runtime, vruntime));
401 
402 /*
403  * Tracepoint for showing priority inheritance modifying a tasks
404  * priority.
405  */
406 TRACE_EVENT(sched_pi_setprio,
407 
408 	TP_PROTO(struct task_struct *tsk, int newprio),
409 
410 	TP_ARGS(tsk, newprio),
411 
412 	TP_STRUCT__entry(
413 		__array( char,	comm,	TASK_COMM_LEN	)
414 		__field( pid_t,	pid			)
415 		__field( int,	oldprio			)
416 		__field( int,	newprio			)
417 	),
418 
419 	TP_fast_assign(
420 		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
421 		__entry->pid		= tsk->pid;
422 		__entry->oldprio	= tsk->prio;
423 		__entry->newprio	= newprio;
424 	),
425 
426 	TP_printk("comm=%s pid=%d oldprio=%d newprio=%d",
427 			__entry->comm, __entry->pid,
428 			__entry->oldprio, __entry->newprio)
429 );
430 
431 #ifdef CONFIG_DETECT_HUNG_TASK
432 TRACE_EVENT(sched_process_hang,
433 	TP_PROTO(struct task_struct *tsk),
434 	TP_ARGS(tsk),
435 
436 	TP_STRUCT__entry(
437 		__array( char,	comm,	TASK_COMM_LEN	)
438 		__field( pid_t,	pid			)
439 	),
440 
441 	TP_fast_assign(
442 		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
443 		__entry->pid = tsk->pid;
444 	),
445 
446 	TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid)
447 );
448 #endif /* CONFIG_DETECT_HUNG_TASK */
449 
450 DECLARE_EVENT_CLASS(sched_move_task_template,
451 
452 	TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
453 
454 	TP_ARGS(tsk, src_cpu, dst_cpu),
455 
456 	TP_STRUCT__entry(
457 		__field( pid_t,	pid			)
458 		__field( pid_t,	tgid			)
459 		__field( pid_t,	ngid			)
460 		__field( int,	src_cpu			)
461 		__field( int,	src_nid			)
462 		__field( int,	dst_cpu			)
463 		__field( int,	dst_nid			)
464 	),
465 
466 	TP_fast_assign(
467 		__entry->pid		= task_pid_nr(tsk);
468 		__entry->tgid		= task_tgid_nr(tsk);
469 		__entry->ngid		= task_numa_group_id(tsk);
470 		__entry->src_cpu	= src_cpu;
471 		__entry->src_nid	= cpu_to_node(src_cpu);
472 		__entry->dst_cpu	= dst_cpu;
473 		__entry->dst_nid	= cpu_to_node(dst_cpu);
474 	),
475 
476 	TP_printk("pid=%d tgid=%d ngid=%d src_cpu=%d src_nid=%d dst_cpu=%d dst_nid=%d",
477 			__entry->pid, __entry->tgid, __entry->ngid,
478 			__entry->src_cpu, __entry->src_nid,
479 			__entry->dst_cpu, __entry->dst_nid)
480 );
481 
482 /*
483  * Tracks migration of tasks from one runqueue to another. Can be used to
484  * detect if automatic NUMA balancing is bouncing between nodes
485  */
486 DEFINE_EVENT(sched_move_task_template, sched_move_numa,
487 	TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
488 
489 	TP_ARGS(tsk, src_cpu, dst_cpu)
490 );
491 
492 DEFINE_EVENT(sched_move_task_template, sched_stick_numa,
493 	TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
494 
495 	TP_ARGS(tsk, src_cpu, dst_cpu)
496 );
497 
498 TRACE_EVENT(sched_swap_numa,
499 
500 	TP_PROTO(struct task_struct *src_tsk, int src_cpu,
501 		 struct task_struct *dst_tsk, int dst_cpu),
502 
503 	TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu),
504 
505 	TP_STRUCT__entry(
506 		__field( pid_t,	src_pid			)
507 		__field( pid_t,	src_tgid		)
508 		__field( pid_t,	src_ngid		)
509 		__field( int,	src_cpu			)
510 		__field( int,	src_nid			)
511 		__field( pid_t,	dst_pid			)
512 		__field( pid_t,	dst_tgid		)
513 		__field( pid_t,	dst_ngid		)
514 		__field( int,	dst_cpu			)
515 		__field( int,	dst_nid			)
516 	),
517 
518 	TP_fast_assign(
519 		__entry->src_pid	= task_pid_nr(src_tsk);
520 		__entry->src_tgid	= task_tgid_nr(src_tsk);
521 		__entry->src_ngid	= task_numa_group_id(src_tsk);
522 		__entry->src_cpu	= src_cpu;
523 		__entry->src_nid	= cpu_to_node(src_cpu);
524 		__entry->dst_pid	= task_pid_nr(dst_tsk);
525 		__entry->dst_tgid	= task_tgid_nr(dst_tsk);
526 		__entry->dst_ngid	= task_numa_group_id(dst_tsk);
527 		__entry->dst_cpu	= dst_cpu;
528 		__entry->dst_nid	= cpu_to_node(dst_cpu);
529 	),
530 
531 	TP_printk("src_pid=%d src_tgid=%d src_ngid=%d src_cpu=%d src_nid=%d dst_pid=%d dst_tgid=%d dst_ngid=%d dst_cpu=%d dst_nid=%d",
532 			__entry->src_pid, __entry->src_tgid, __entry->src_ngid,
533 			__entry->src_cpu, __entry->src_nid,
534 			__entry->dst_pid, __entry->dst_tgid, __entry->dst_ngid,
535 			__entry->dst_cpu, __entry->dst_nid)
536 );
537 
538 /*
539  * Tracepoint for waking a polling cpu without an IPI.
540  */
541 TRACE_EVENT(sched_wake_idle_without_ipi,
542 
543 	TP_PROTO(int cpu),
544 
545 	TP_ARGS(cpu),
546 
547 	TP_STRUCT__entry(
548 		__field(	int,	cpu	)
549 	),
550 
551 	TP_fast_assign(
552 		__entry->cpu	= cpu;
553 	),
554 
555 	TP_printk("cpu=%d", __entry->cpu)
556 );
557 #endif /* _TRACE_SCHED_H */
558 
559 /* This part must be outside protection */
560 #include <trace/define_trace.h>
561