xref: /linux/include/trace/events/sched.h (revision cf2f33a4e54096f90652cca3511fd6a456ea5abe)
1 #undef TRACE_SYSTEM
2 #define TRACE_SYSTEM sched
3 
4 #if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
5 #define _TRACE_SCHED_H
6 
7 #include <linux/sched.h>
8 #include <linux/tracepoint.h>
9 #include <linux/binfmts.h>
10 
11 /*
12  * Tracepoint for calling kthread_stop, performed to end a kthread:
13  */
14 TRACE_EVENT(sched_kthread_stop,
15 
16 	TP_PROTO(struct task_struct *t),
17 
18 	TP_ARGS(t),
19 
20 	TP_STRUCT__entry(
21 		__array(	char,	comm,	TASK_COMM_LEN	)
22 		__field(	pid_t,	pid			)
23 	),
24 
25 	TP_fast_assign(
26 		memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
27 		__entry->pid	= t->pid;
28 	),
29 
30 	TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid)
31 );
32 
33 /*
34  * Tracepoint for the return value of the kthread stopping:
35  */
36 TRACE_EVENT(sched_kthread_stop_ret,
37 
38 	TP_PROTO(int ret),
39 
40 	TP_ARGS(ret),
41 
42 	TP_STRUCT__entry(
43 		__field(	int,	ret	)
44 	),
45 
46 	TP_fast_assign(
47 		__entry->ret	= ret;
48 	),
49 
50 	TP_printk("ret=%d", __entry->ret)
51 );
52 
53 /*
54  * Tracepoint for waking up a task:
55  */
56 DECLARE_EVENT_CLASS(sched_wakeup_template,
57 
58 	TP_PROTO(struct task_struct *p),
59 
60 	TP_ARGS(__perf_task(p)),
61 
62 	TP_STRUCT__entry(
63 		__array(	char,	comm,	TASK_COMM_LEN	)
64 		__field(	pid_t,	pid			)
65 		__field(	int,	prio			)
66 		__field(	int,	success			)
67 		__field(	int,	target_cpu		)
68 	),
69 
70 	TP_fast_assign(
71 		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
72 		__entry->pid		= p->pid;
73 		__entry->prio		= p->prio;
74 		__entry->success	= 1; /* rudiment, kill when possible */
75 		__entry->target_cpu	= task_cpu(p);
76 	),
77 
78 	TP_printk("comm=%s pid=%d prio=%d target_cpu=%03d",
79 		  __entry->comm, __entry->pid, __entry->prio,
80 		  __entry->target_cpu)
81 );
82 
83 /*
84  * Tracepoint called when waking a task; this tracepoint is guaranteed to be
85  * called from the waking context.
86  */
87 DEFINE_EVENT(sched_wakeup_template, sched_waking,
88 	     TP_PROTO(struct task_struct *p),
89 	     TP_ARGS(p));
90 
91 /*
92  * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG.
93  * It it not always called from the waking context.
94  */
95 DEFINE_EVENT(sched_wakeup_template, sched_wakeup,
96 	     TP_PROTO(struct task_struct *p),
97 	     TP_ARGS(p));
98 
99 /*
100  * Tracepoint for waking up a new task:
101  */
102 DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
103 	     TP_PROTO(struct task_struct *p),
104 	     TP_ARGS(p));
105 
106 #ifdef CREATE_TRACE_POINTS
107 static inline long __trace_sched_switch_state(struct task_struct *p)
108 {
109 	long state = p->state;
110 
111 #ifdef CONFIG_PREEMPT
112 #ifdef CONFIG_SCHED_DEBUG
113 	BUG_ON(p != current);
114 #endif /* CONFIG_SCHED_DEBUG */
115 	/*
116 	 * For all intents and purposes a preempted task is a running task.
117 	 */
118 	if (preempt_count() & PREEMPT_ACTIVE)
119 		state = TASK_RUNNING | TASK_STATE_MAX;
120 #endif /* CONFIG_PREEMPT */
121 
122 	return state;
123 }
124 #endif /* CREATE_TRACE_POINTS */
125 
126 /*
127  * Tracepoint for task switches, performed by the scheduler:
128  */
129 TRACE_EVENT(sched_switch,
130 
131 	TP_PROTO(struct task_struct *prev,
132 		 struct task_struct *next),
133 
134 	TP_ARGS(prev, next),
135 
136 	TP_STRUCT__entry(
137 		__array(	char,	prev_comm,	TASK_COMM_LEN	)
138 		__field(	pid_t,	prev_pid			)
139 		__field(	int,	prev_prio			)
140 		__field(	long,	prev_state			)
141 		__array(	char,	next_comm,	TASK_COMM_LEN	)
142 		__field(	pid_t,	next_pid			)
143 		__field(	int,	next_prio			)
144 	),
145 
146 	TP_fast_assign(
147 		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
148 		__entry->prev_pid	= prev->pid;
149 		__entry->prev_prio	= prev->prio;
150 		__entry->prev_state	= __trace_sched_switch_state(prev);
151 		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
152 		__entry->next_pid	= next->pid;
153 		__entry->next_prio	= next->prio;
154 	),
155 
156 	TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d",
157 		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
158 		__entry->prev_state & (TASK_STATE_MAX-1) ?
159 		  __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|",
160 				{ 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
161 				{ 16, "Z" }, { 32, "X" }, { 64, "x" },
162 				{ 128, "K" }, { 256, "W" }, { 512, "P" },
163 				{ 1024, "N" }) : "R",
164 		__entry->prev_state & TASK_STATE_MAX ? "+" : "",
165 		__entry->next_comm, __entry->next_pid, __entry->next_prio)
166 );
167 
168 /*
169  * Tracepoint for a task being migrated:
170  */
171 TRACE_EVENT(sched_migrate_task,
172 
173 	TP_PROTO(struct task_struct *p, int dest_cpu),
174 
175 	TP_ARGS(p, dest_cpu),
176 
177 	TP_STRUCT__entry(
178 		__array(	char,	comm,	TASK_COMM_LEN	)
179 		__field(	pid_t,	pid			)
180 		__field(	int,	prio			)
181 		__field(	int,	orig_cpu		)
182 		__field(	int,	dest_cpu		)
183 	),
184 
185 	TP_fast_assign(
186 		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
187 		__entry->pid		= p->pid;
188 		__entry->prio		= p->prio;
189 		__entry->orig_cpu	= task_cpu(p);
190 		__entry->dest_cpu	= dest_cpu;
191 	),
192 
193 	TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d",
194 		  __entry->comm, __entry->pid, __entry->prio,
195 		  __entry->orig_cpu, __entry->dest_cpu)
196 );
197 
198 DECLARE_EVENT_CLASS(sched_process_template,
199 
200 	TP_PROTO(struct task_struct *p),
201 
202 	TP_ARGS(p),
203 
204 	TP_STRUCT__entry(
205 		__array(	char,	comm,	TASK_COMM_LEN	)
206 		__field(	pid_t,	pid			)
207 		__field(	int,	prio			)
208 	),
209 
210 	TP_fast_assign(
211 		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
212 		__entry->pid		= p->pid;
213 		__entry->prio		= p->prio;
214 	),
215 
216 	TP_printk("comm=%s pid=%d prio=%d",
217 		  __entry->comm, __entry->pid, __entry->prio)
218 );
219 
220 /*
221  * Tracepoint for freeing a task:
222  */
223 DEFINE_EVENT(sched_process_template, sched_process_free,
224 	     TP_PROTO(struct task_struct *p),
225 	     TP_ARGS(p));
226 
227 
228 /*
229  * Tracepoint for a task exiting:
230  */
231 DEFINE_EVENT(sched_process_template, sched_process_exit,
232 	     TP_PROTO(struct task_struct *p),
233 	     TP_ARGS(p));
234 
235 /*
236  * Tracepoint for waiting on task to unschedule:
237  */
238 DEFINE_EVENT(sched_process_template, sched_wait_task,
239 	TP_PROTO(struct task_struct *p),
240 	TP_ARGS(p));
241 
242 /*
243  * Tracepoint for a waiting task:
244  */
245 TRACE_EVENT(sched_process_wait,
246 
247 	TP_PROTO(struct pid *pid),
248 
249 	TP_ARGS(pid),
250 
251 	TP_STRUCT__entry(
252 		__array(	char,	comm,	TASK_COMM_LEN	)
253 		__field(	pid_t,	pid			)
254 		__field(	int,	prio			)
255 	),
256 
257 	TP_fast_assign(
258 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
259 		__entry->pid		= pid_nr(pid);
260 		__entry->prio		= current->prio;
261 	),
262 
263 	TP_printk("comm=%s pid=%d prio=%d",
264 		  __entry->comm, __entry->pid, __entry->prio)
265 );
266 
267 /*
268  * Tracepoint for do_fork:
269  */
270 TRACE_EVENT(sched_process_fork,
271 
272 	TP_PROTO(struct task_struct *parent, struct task_struct *child),
273 
274 	TP_ARGS(parent, child),
275 
276 	TP_STRUCT__entry(
277 		__array(	char,	parent_comm,	TASK_COMM_LEN	)
278 		__field(	pid_t,	parent_pid			)
279 		__array(	char,	child_comm,	TASK_COMM_LEN	)
280 		__field(	pid_t,	child_pid			)
281 	),
282 
283 	TP_fast_assign(
284 		memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN);
285 		__entry->parent_pid	= parent->pid;
286 		memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN);
287 		__entry->child_pid	= child->pid;
288 	),
289 
290 	TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d",
291 		__entry->parent_comm, __entry->parent_pid,
292 		__entry->child_comm, __entry->child_pid)
293 );
294 
295 /*
296  * Tracepoint for exec:
297  */
298 TRACE_EVENT(sched_process_exec,
299 
300 	TP_PROTO(struct task_struct *p, pid_t old_pid,
301 		 struct linux_binprm *bprm),
302 
303 	TP_ARGS(p, old_pid, bprm),
304 
305 	TP_STRUCT__entry(
306 		__string(	filename,	bprm->filename	)
307 		__field(	pid_t,		pid		)
308 		__field(	pid_t,		old_pid		)
309 	),
310 
311 	TP_fast_assign(
312 		__assign_str(filename, bprm->filename);
313 		__entry->pid		= p->pid;
314 		__entry->old_pid	= old_pid;
315 	),
316 
317 	TP_printk("filename=%s pid=%d old_pid=%d", __get_str(filename),
318 		  __entry->pid, __entry->old_pid)
319 );
320 
321 /*
322  * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
323  *     adding sched_stat support to SCHED_FIFO/RR would be welcome.
324  */
325 DECLARE_EVENT_CLASS(sched_stat_template,
326 
327 	TP_PROTO(struct task_struct *tsk, u64 delay),
328 
329 	TP_ARGS(__perf_task(tsk), __perf_count(delay)),
330 
331 	TP_STRUCT__entry(
332 		__array( char,	comm,	TASK_COMM_LEN	)
333 		__field( pid_t,	pid			)
334 		__field( u64,	delay			)
335 	),
336 
337 	TP_fast_assign(
338 		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
339 		__entry->pid	= tsk->pid;
340 		__entry->delay	= delay;
341 	),
342 
343 	TP_printk("comm=%s pid=%d delay=%Lu [ns]",
344 			__entry->comm, __entry->pid,
345 			(unsigned long long)__entry->delay)
346 );
347 
348 
349 /*
350  * Tracepoint for accounting wait time (time the task is runnable
351  * but not actually running due to scheduler contention).
352  */
353 DEFINE_EVENT(sched_stat_template, sched_stat_wait,
354 	     TP_PROTO(struct task_struct *tsk, u64 delay),
355 	     TP_ARGS(tsk, delay));
356 
357 /*
358  * Tracepoint for accounting sleep time (time the task is not runnable,
359  * including iowait, see below).
360  */
361 DEFINE_EVENT(sched_stat_template, sched_stat_sleep,
362 	     TP_PROTO(struct task_struct *tsk, u64 delay),
363 	     TP_ARGS(tsk, delay));
364 
365 /*
366  * Tracepoint for accounting iowait time (time the task is not runnable
367  * due to waiting on IO to complete).
368  */
369 DEFINE_EVENT(sched_stat_template, sched_stat_iowait,
370 	     TP_PROTO(struct task_struct *tsk, u64 delay),
371 	     TP_ARGS(tsk, delay));
372 
373 /*
374  * Tracepoint for accounting blocked time (time the task is in uninterruptible).
375  */
376 DEFINE_EVENT(sched_stat_template, sched_stat_blocked,
377 	     TP_PROTO(struct task_struct *tsk, u64 delay),
378 	     TP_ARGS(tsk, delay));
379 
380 /*
381  * Tracepoint for accounting runtime (time the task is executing
382  * on a CPU).
383  */
384 DECLARE_EVENT_CLASS(sched_stat_runtime,
385 
386 	TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
387 
388 	TP_ARGS(tsk, __perf_count(runtime), vruntime),
389 
390 	TP_STRUCT__entry(
391 		__array( char,	comm,	TASK_COMM_LEN	)
392 		__field( pid_t,	pid			)
393 		__field( u64,	runtime			)
394 		__field( u64,	vruntime			)
395 	),
396 
397 	TP_fast_assign(
398 		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
399 		__entry->pid		= tsk->pid;
400 		__entry->runtime	= runtime;
401 		__entry->vruntime	= vruntime;
402 	),
403 
404 	TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]",
405 			__entry->comm, __entry->pid,
406 			(unsigned long long)__entry->runtime,
407 			(unsigned long long)__entry->vruntime)
408 );
409 
410 DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime,
411 	     TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
412 	     TP_ARGS(tsk, runtime, vruntime));
413 
414 /*
415  * Tracepoint for showing priority inheritance modifying a tasks
416  * priority.
417  */
418 TRACE_EVENT(sched_pi_setprio,
419 
420 	TP_PROTO(struct task_struct *tsk, int newprio),
421 
422 	TP_ARGS(tsk, newprio),
423 
424 	TP_STRUCT__entry(
425 		__array( char,	comm,	TASK_COMM_LEN	)
426 		__field( pid_t,	pid			)
427 		__field( int,	oldprio			)
428 		__field( int,	newprio			)
429 	),
430 
431 	TP_fast_assign(
432 		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
433 		__entry->pid		= tsk->pid;
434 		__entry->oldprio	= tsk->prio;
435 		__entry->newprio	= newprio;
436 	),
437 
438 	TP_printk("comm=%s pid=%d oldprio=%d newprio=%d",
439 			__entry->comm, __entry->pid,
440 			__entry->oldprio, __entry->newprio)
441 );
442 
443 #ifdef CONFIG_DETECT_HUNG_TASK
444 TRACE_EVENT(sched_process_hang,
445 	TP_PROTO(struct task_struct *tsk),
446 	TP_ARGS(tsk),
447 
448 	TP_STRUCT__entry(
449 		__array( char,	comm,	TASK_COMM_LEN	)
450 		__field( pid_t,	pid			)
451 	),
452 
453 	TP_fast_assign(
454 		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
455 		__entry->pid = tsk->pid;
456 	),
457 
458 	TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid)
459 );
460 #endif /* CONFIG_DETECT_HUNG_TASK */
461 
462 DECLARE_EVENT_CLASS(sched_move_task_template,
463 
464 	TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
465 
466 	TP_ARGS(tsk, src_cpu, dst_cpu),
467 
468 	TP_STRUCT__entry(
469 		__field( pid_t,	pid			)
470 		__field( pid_t,	tgid			)
471 		__field( pid_t,	ngid			)
472 		__field( int,	src_cpu			)
473 		__field( int,	src_nid			)
474 		__field( int,	dst_cpu			)
475 		__field( int,	dst_nid			)
476 	),
477 
478 	TP_fast_assign(
479 		__entry->pid		= task_pid_nr(tsk);
480 		__entry->tgid		= task_tgid_nr(tsk);
481 		__entry->ngid		= task_numa_group_id(tsk);
482 		__entry->src_cpu	= src_cpu;
483 		__entry->src_nid	= cpu_to_node(src_cpu);
484 		__entry->dst_cpu	= dst_cpu;
485 		__entry->dst_nid	= cpu_to_node(dst_cpu);
486 	),
487 
488 	TP_printk("pid=%d tgid=%d ngid=%d src_cpu=%d src_nid=%d dst_cpu=%d dst_nid=%d",
489 			__entry->pid, __entry->tgid, __entry->ngid,
490 			__entry->src_cpu, __entry->src_nid,
491 			__entry->dst_cpu, __entry->dst_nid)
492 );
493 
494 /*
495  * Tracks migration of tasks from one runqueue to another. Can be used to
496  * detect if automatic NUMA balancing is bouncing between nodes
497  */
498 DEFINE_EVENT(sched_move_task_template, sched_move_numa,
499 	TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
500 
501 	TP_ARGS(tsk, src_cpu, dst_cpu)
502 );
503 
504 DEFINE_EVENT(sched_move_task_template, sched_stick_numa,
505 	TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
506 
507 	TP_ARGS(tsk, src_cpu, dst_cpu)
508 );
509 
510 TRACE_EVENT(sched_swap_numa,
511 
512 	TP_PROTO(struct task_struct *src_tsk, int src_cpu,
513 		 struct task_struct *dst_tsk, int dst_cpu),
514 
515 	TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu),
516 
517 	TP_STRUCT__entry(
518 		__field( pid_t,	src_pid			)
519 		__field( pid_t,	src_tgid		)
520 		__field( pid_t,	src_ngid		)
521 		__field( int,	src_cpu			)
522 		__field( int,	src_nid			)
523 		__field( pid_t,	dst_pid			)
524 		__field( pid_t,	dst_tgid		)
525 		__field( pid_t,	dst_ngid		)
526 		__field( int,	dst_cpu			)
527 		__field( int,	dst_nid			)
528 	),
529 
530 	TP_fast_assign(
531 		__entry->src_pid	= task_pid_nr(src_tsk);
532 		__entry->src_tgid	= task_tgid_nr(src_tsk);
533 		__entry->src_ngid	= task_numa_group_id(src_tsk);
534 		__entry->src_cpu	= src_cpu;
535 		__entry->src_nid	= cpu_to_node(src_cpu);
536 		__entry->dst_pid	= task_pid_nr(dst_tsk);
537 		__entry->dst_tgid	= task_tgid_nr(dst_tsk);
538 		__entry->dst_ngid	= task_numa_group_id(dst_tsk);
539 		__entry->dst_cpu	= dst_cpu;
540 		__entry->dst_nid	= cpu_to_node(dst_cpu);
541 	),
542 
543 	TP_printk("src_pid=%d src_tgid=%d src_ngid=%d src_cpu=%d src_nid=%d dst_pid=%d dst_tgid=%d dst_ngid=%d dst_cpu=%d dst_nid=%d",
544 			__entry->src_pid, __entry->src_tgid, __entry->src_ngid,
545 			__entry->src_cpu, __entry->src_nid,
546 			__entry->dst_pid, __entry->dst_tgid, __entry->dst_ngid,
547 			__entry->dst_cpu, __entry->dst_nid)
548 );
549 
550 /*
551  * Tracepoint for waking a polling cpu without an IPI.
552  */
553 TRACE_EVENT(sched_wake_idle_without_ipi,
554 
555 	TP_PROTO(int cpu),
556 
557 	TP_ARGS(cpu),
558 
559 	TP_STRUCT__entry(
560 		__field(	int,	cpu	)
561 	),
562 
563 	TP_fast_assign(
564 		__entry->cpu	= cpu;
565 	),
566 
567 	TP_printk("cpu=%d", __entry->cpu)
568 );
569 #endif /* _TRACE_SCHED_H */
570 
571 /* This part must be outside protection */
572 #include <trace/define_trace.h>
573