1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #undef TRACE_SYSTEM 3 #define TRACE_SYSTEM sched 4 5 #if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ) 6 #define _TRACE_SCHED_H 7 8 #include <linux/sched/numa_balancing.h> 9 #include <linux/tracepoint.h> 10 #include <linux/binfmts.h> 11 12 /* 13 * Tracepoint for calling kthread_stop, performed to end a kthread: 14 */ 15 TRACE_EVENT(sched_kthread_stop, 16 17 TP_PROTO(struct task_struct *t), 18 19 TP_ARGS(t), 20 21 TP_STRUCT__entry( 22 __array( char, comm, TASK_COMM_LEN ) 23 __field( pid_t, pid ) 24 ), 25 26 TP_fast_assign( 27 memcpy(__entry->comm, t->comm, TASK_COMM_LEN); 28 __entry->pid = t->pid; 29 ), 30 31 TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) 32 ); 33 34 /* 35 * Tracepoint for the return value of the kthread stopping: 36 */ 37 TRACE_EVENT(sched_kthread_stop_ret, 38 39 TP_PROTO(int ret), 40 41 TP_ARGS(ret), 42 43 TP_STRUCT__entry( 44 __field( int, ret ) 45 ), 46 47 TP_fast_assign( 48 __entry->ret = ret; 49 ), 50 51 TP_printk("ret=%d", __entry->ret) 52 ); 53 54 /* 55 * Tracepoint for waking up a task: 56 */ 57 DECLARE_EVENT_CLASS(sched_wakeup_template, 58 59 TP_PROTO(struct task_struct *p), 60 61 TP_ARGS(__perf_task(p)), 62 63 TP_STRUCT__entry( 64 __array( char, comm, TASK_COMM_LEN ) 65 __field( pid_t, pid ) 66 __field( int, prio ) 67 __field( int, success ) 68 __field( int, target_cpu ) 69 ), 70 71 TP_fast_assign( 72 memcpy(__entry->comm, p->comm, TASK_COMM_LEN); 73 __entry->pid = p->pid; 74 __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ 75 __entry->success = 1; /* rudiment, kill when possible */ 76 __entry->target_cpu = task_cpu(p); 77 ), 78 79 TP_printk("comm=%s pid=%d prio=%d target_cpu=%03d", 80 __entry->comm, __entry->pid, __entry->prio, 81 __entry->target_cpu) 82 ); 83 84 /* 85 * Tracepoint called when waking a task; this tracepoint is guaranteed to be 86 * called from the waking context. 87 */ 88 DEFINE_EVENT(sched_wakeup_template, sched_waking, 89 TP_PROTO(struct task_struct *p), 90 TP_ARGS(p)); 91 92 /* 93 * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG. 94 * It it not always called from the waking context. 95 */ 96 DEFINE_EVENT(sched_wakeup_template, sched_wakeup, 97 TP_PROTO(struct task_struct *p), 98 TP_ARGS(p)); 99 100 /* 101 * Tracepoint for waking up a new task: 102 */ 103 DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new, 104 TP_PROTO(struct task_struct *p), 105 TP_ARGS(p)); 106 107 #ifdef CREATE_TRACE_POINTS 108 static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p) 109 { 110 #ifdef CONFIG_SCHED_DEBUG 111 BUG_ON(p != current); 112 #endif /* CONFIG_SCHED_DEBUG */ 113 114 /* 115 * Preemption ignores task state, therefore preempted tasks are always 116 * RUNNING (we will not have dequeued if state != RUNNING). 117 */ 118 if (preempt) 119 return TASK_REPORT_MAX; 120 121 return 1 << task_state_index(p); 122 } 123 #endif /* CREATE_TRACE_POINTS */ 124 125 /* 126 * Tracepoint for task switches, performed by the scheduler: 127 */ 128 TRACE_EVENT(sched_switch, 129 130 TP_PROTO(bool preempt, 131 struct task_struct *prev, 132 struct task_struct *next), 133 134 TP_ARGS(preempt, prev, next), 135 136 TP_STRUCT__entry( 137 __array( char, prev_comm, TASK_COMM_LEN ) 138 __field( pid_t, prev_pid ) 139 __field( int, prev_prio ) 140 __field( long, prev_state ) 141 __array( char, next_comm, TASK_COMM_LEN ) 142 __field( pid_t, next_pid ) 143 __field( int, next_prio ) 144 ), 145 146 TP_fast_assign( 147 memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); 148 __entry->prev_pid = prev->pid; 149 __entry->prev_prio = prev->prio; 150 __entry->prev_state = __trace_sched_switch_state(preempt, prev); 151 memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); 152 __entry->next_pid = next->pid; 153 __entry->next_prio = next->prio; 154 /* XXX SCHED_DEADLINE */ 155 ), 156 157 TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d", 158 __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, 159 160 (__entry->prev_state & (TASK_REPORT_MAX - 1)) ? 161 __print_flags(__entry->prev_state & (TASK_REPORT_MAX - 1), "|", 162 { 0x01, "S" }, { 0x02, "D" }, { 0x04, "T" }, 163 { 0x08, "t" }, { 0x10, "X" }, { 0x20, "Z" }, 164 { 0x40, "P" }, { 0x80, "I" }) : 165 "R", 166 167 __entry->prev_state & TASK_REPORT_MAX ? "+" : "", 168 __entry->next_comm, __entry->next_pid, __entry->next_prio) 169 ); 170 171 /* 172 * Tracepoint for a task being migrated: 173 */ 174 TRACE_EVENT(sched_migrate_task, 175 176 TP_PROTO(struct task_struct *p, int dest_cpu), 177 178 TP_ARGS(p, dest_cpu), 179 180 TP_STRUCT__entry( 181 __array( char, comm, TASK_COMM_LEN ) 182 __field( pid_t, pid ) 183 __field( int, prio ) 184 __field( int, orig_cpu ) 185 __field( int, dest_cpu ) 186 ), 187 188 TP_fast_assign( 189 memcpy(__entry->comm, p->comm, TASK_COMM_LEN); 190 __entry->pid = p->pid; 191 __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ 192 __entry->orig_cpu = task_cpu(p); 193 __entry->dest_cpu = dest_cpu; 194 ), 195 196 TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d", 197 __entry->comm, __entry->pid, __entry->prio, 198 __entry->orig_cpu, __entry->dest_cpu) 199 ); 200 201 DECLARE_EVENT_CLASS(sched_process_template, 202 203 TP_PROTO(struct task_struct *p), 204 205 TP_ARGS(p), 206 207 TP_STRUCT__entry( 208 __array( char, comm, TASK_COMM_LEN ) 209 __field( pid_t, pid ) 210 __field( int, prio ) 211 ), 212 213 TP_fast_assign( 214 memcpy(__entry->comm, p->comm, TASK_COMM_LEN); 215 __entry->pid = p->pid; 216 __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ 217 ), 218 219 TP_printk("comm=%s pid=%d prio=%d", 220 __entry->comm, __entry->pid, __entry->prio) 221 ); 222 223 /* 224 * Tracepoint for freeing a task: 225 */ 226 DEFINE_EVENT(sched_process_template, sched_process_free, 227 TP_PROTO(struct task_struct *p), 228 TP_ARGS(p)); 229 230 231 /* 232 * Tracepoint for a task exiting: 233 */ 234 DEFINE_EVENT(sched_process_template, sched_process_exit, 235 TP_PROTO(struct task_struct *p), 236 TP_ARGS(p)); 237 238 /* 239 * Tracepoint for waiting on task to unschedule: 240 */ 241 DEFINE_EVENT(sched_process_template, sched_wait_task, 242 TP_PROTO(struct task_struct *p), 243 TP_ARGS(p)); 244 245 /* 246 * Tracepoint for a waiting task: 247 */ 248 TRACE_EVENT(sched_process_wait, 249 250 TP_PROTO(struct pid *pid), 251 252 TP_ARGS(pid), 253 254 TP_STRUCT__entry( 255 __array( char, comm, TASK_COMM_LEN ) 256 __field( pid_t, pid ) 257 __field( int, prio ) 258 ), 259 260 TP_fast_assign( 261 memcpy(__entry->comm, current->comm, TASK_COMM_LEN); 262 __entry->pid = pid_nr(pid); 263 __entry->prio = current->prio; /* XXX SCHED_DEADLINE */ 264 ), 265 266 TP_printk("comm=%s pid=%d prio=%d", 267 __entry->comm, __entry->pid, __entry->prio) 268 ); 269 270 /* 271 * Tracepoint for do_fork: 272 */ 273 TRACE_EVENT(sched_process_fork, 274 275 TP_PROTO(struct task_struct *parent, struct task_struct *child), 276 277 TP_ARGS(parent, child), 278 279 TP_STRUCT__entry( 280 __array( char, parent_comm, TASK_COMM_LEN ) 281 __field( pid_t, parent_pid ) 282 __array( char, child_comm, TASK_COMM_LEN ) 283 __field( pid_t, child_pid ) 284 ), 285 286 TP_fast_assign( 287 memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN); 288 __entry->parent_pid = parent->pid; 289 memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN); 290 __entry->child_pid = child->pid; 291 ), 292 293 TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d", 294 __entry->parent_comm, __entry->parent_pid, 295 __entry->child_comm, __entry->child_pid) 296 ); 297 298 /* 299 * Tracepoint for exec: 300 */ 301 TRACE_EVENT(sched_process_exec, 302 303 TP_PROTO(struct task_struct *p, pid_t old_pid, 304 struct linux_binprm *bprm), 305 306 TP_ARGS(p, old_pid, bprm), 307 308 TP_STRUCT__entry( 309 __string( filename, bprm->filename ) 310 __field( pid_t, pid ) 311 __field( pid_t, old_pid ) 312 ), 313 314 TP_fast_assign( 315 __assign_str(filename, bprm->filename); 316 __entry->pid = p->pid; 317 __entry->old_pid = old_pid; 318 ), 319 320 TP_printk("filename=%s pid=%d old_pid=%d", __get_str(filename), 321 __entry->pid, __entry->old_pid) 322 ); 323 324 /* 325 * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE 326 * adding sched_stat support to SCHED_FIFO/RR would be welcome. 327 */ 328 DECLARE_EVENT_CLASS(sched_stat_template, 329 330 TP_PROTO(struct task_struct *tsk, u64 delay), 331 332 TP_ARGS(__perf_task(tsk), __perf_count(delay)), 333 334 TP_STRUCT__entry( 335 __array( char, comm, TASK_COMM_LEN ) 336 __field( pid_t, pid ) 337 __field( u64, delay ) 338 ), 339 340 TP_fast_assign( 341 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); 342 __entry->pid = tsk->pid; 343 __entry->delay = delay; 344 ), 345 346 TP_printk("comm=%s pid=%d delay=%Lu [ns]", 347 __entry->comm, __entry->pid, 348 (unsigned long long)__entry->delay) 349 ); 350 351 352 /* 353 * Tracepoint for accounting wait time (time the task is runnable 354 * but not actually running due to scheduler contention). 355 */ 356 DEFINE_EVENT(sched_stat_template, sched_stat_wait, 357 TP_PROTO(struct task_struct *tsk, u64 delay), 358 TP_ARGS(tsk, delay)); 359 360 /* 361 * Tracepoint for accounting sleep time (time the task is not runnable, 362 * including iowait, see below). 363 */ 364 DEFINE_EVENT(sched_stat_template, sched_stat_sleep, 365 TP_PROTO(struct task_struct *tsk, u64 delay), 366 TP_ARGS(tsk, delay)); 367 368 /* 369 * Tracepoint for accounting iowait time (time the task is not runnable 370 * due to waiting on IO to complete). 371 */ 372 DEFINE_EVENT(sched_stat_template, sched_stat_iowait, 373 TP_PROTO(struct task_struct *tsk, u64 delay), 374 TP_ARGS(tsk, delay)); 375 376 /* 377 * Tracepoint for accounting blocked time (time the task is in uninterruptible). 378 */ 379 DEFINE_EVENT(sched_stat_template, sched_stat_blocked, 380 TP_PROTO(struct task_struct *tsk, u64 delay), 381 TP_ARGS(tsk, delay)); 382 383 /* 384 * Tracepoint for accounting runtime (time the task is executing 385 * on a CPU). 386 */ 387 DECLARE_EVENT_CLASS(sched_stat_runtime, 388 389 TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime), 390 391 TP_ARGS(tsk, __perf_count(runtime), vruntime), 392 393 TP_STRUCT__entry( 394 __array( char, comm, TASK_COMM_LEN ) 395 __field( pid_t, pid ) 396 __field( u64, runtime ) 397 __field( u64, vruntime ) 398 ), 399 400 TP_fast_assign( 401 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); 402 __entry->pid = tsk->pid; 403 __entry->runtime = runtime; 404 __entry->vruntime = vruntime; 405 ), 406 407 TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]", 408 __entry->comm, __entry->pid, 409 (unsigned long long)__entry->runtime, 410 (unsigned long long)__entry->vruntime) 411 ); 412 413 DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime, 414 TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime), 415 TP_ARGS(tsk, runtime, vruntime)); 416 417 /* 418 * Tracepoint for showing priority inheritance modifying a tasks 419 * priority. 420 */ 421 TRACE_EVENT(sched_pi_setprio, 422 423 TP_PROTO(struct task_struct *tsk, struct task_struct *pi_task), 424 425 TP_ARGS(tsk, pi_task), 426 427 TP_STRUCT__entry( 428 __array( char, comm, TASK_COMM_LEN ) 429 __field( pid_t, pid ) 430 __field( int, oldprio ) 431 __field( int, newprio ) 432 ), 433 434 TP_fast_assign( 435 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); 436 __entry->pid = tsk->pid; 437 __entry->oldprio = tsk->prio; 438 __entry->newprio = pi_task ? 439 min(tsk->normal_prio, pi_task->prio) : 440 tsk->normal_prio; 441 /* XXX SCHED_DEADLINE bits missing */ 442 ), 443 444 TP_printk("comm=%s pid=%d oldprio=%d newprio=%d", 445 __entry->comm, __entry->pid, 446 __entry->oldprio, __entry->newprio) 447 ); 448 449 #ifdef CONFIG_DETECT_HUNG_TASK 450 TRACE_EVENT(sched_process_hang, 451 TP_PROTO(struct task_struct *tsk), 452 TP_ARGS(tsk), 453 454 TP_STRUCT__entry( 455 __array( char, comm, TASK_COMM_LEN ) 456 __field( pid_t, pid ) 457 ), 458 459 TP_fast_assign( 460 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); 461 __entry->pid = tsk->pid; 462 ), 463 464 TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) 465 ); 466 #endif /* CONFIG_DETECT_HUNG_TASK */ 467 468 DECLARE_EVENT_CLASS(sched_move_task_template, 469 470 TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu), 471 472 TP_ARGS(tsk, src_cpu, dst_cpu), 473 474 TP_STRUCT__entry( 475 __field( pid_t, pid ) 476 __field( pid_t, tgid ) 477 __field( pid_t, ngid ) 478 __field( int, src_cpu ) 479 __field( int, src_nid ) 480 __field( int, dst_cpu ) 481 __field( int, dst_nid ) 482 ), 483 484 TP_fast_assign( 485 __entry->pid = task_pid_nr(tsk); 486 __entry->tgid = task_tgid_nr(tsk); 487 __entry->ngid = task_numa_group_id(tsk); 488 __entry->src_cpu = src_cpu; 489 __entry->src_nid = cpu_to_node(src_cpu); 490 __entry->dst_cpu = dst_cpu; 491 __entry->dst_nid = cpu_to_node(dst_cpu); 492 ), 493 494 TP_printk("pid=%d tgid=%d ngid=%d src_cpu=%d src_nid=%d dst_cpu=%d dst_nid=%d", 495 __entry->pid, __entry->tgid, __entry->ngid, 496 __entry->src_cpu, __entry->src_nid, 497 __entry->dst_cpu, __entry->dst_nid) 498 ); 499 500 /* 501 * Tracks migration of tasks from one runqueue to another. Can be used to 502 * detect if automatic NUMA balancing is bouncing between nodes 503 */ 504 DEFINE_EVENT(sched_move_task_template, sched_move_numa, 505 TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu), 506 507 TP_ARGS(tsk, src_cpu, dst_cpu) 508 ); 509 510 DEFINE_EVENT(sched_move_task_template, sched_stick_numa, 511 TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu), 512 513 TP_ARGS(tsk, src_cpu, dst_cpu) 514 ); 515 516 TRACE_EVENT(sched_swap_numa, 517 518 TP_PROTO(struct task_struct *src_tsk, int src_cpu, 519 struct task_struct *dst_tsk, int dst_cpu), 520 521 TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu), 522 523 TP_STRUCT__entry( 524 __field( pid_t, src_pid ) 525 __field( pid_t, src_tgid ) 526 __field( pid_t, src_ngid ) 527 __field( int, src_cpu ) 528 __field( int, src_nid ) 529 __field( pid_t, dst_pid ) 530 __field( pid_t, dst_tgid ) 531 __field( pid_t, dst_ngid ) 532 __field( int, dst_cpu ) 533 __field( int, dst_nid ) 534 ), 535 536 TP_fast_assign( 537 __entry->src_pid = task_pid_nr(src_tsk); 538 __entry->src_tgid = task_tgid_nr(src_tsk); 539 __entry->src_ngid = task_numa_group_id(src_tsk); 540 __entry->src_cpu = src_cpu; 541 __entry->src_nid = cpu_to_node(src_cpu); 542 __entry->dst_pid = task_pid_nr(dst_tsk); 543 __entry->dst_tgid = task_tgid_nr(dst_tsk); 544 __entry->dst_ngid = task_numa_group_id(dst_tsk); 545 __entry->dst_cpu = dst_cpu; 546 __entry->dst_nid = cpu_to_node(dst_cpu); 547 ), 548 549 TP_printk("src_pid=%d src_tgid=%d src_ngid=%d src_cpu=%d src_nid=%d dst_pid=%d dst_tgid=%d dst_ngid=%d dst_cpu=%d dst_nid=%d", 550 __entry->src_pid, __entry->src_tgid, __entry->src_ngid, 551 __entry->src_cpu, __entry->src_nid, 552 __entry->dst_pid, __entry->dst_tgid, __entry->dst_ngid, 553 __entry->dst_cpu, __entry->dst_nid) 554 ); 555 556 /* 557 * Tracepoint for waking a polling cpu without an IPI. 558 */ 559 TRACE_EVENT(sched_wake_idle_without_ipi, 560 561 TP_PROTO(int cpu), 562 563 TP_ARGS(cpu), 564 565 TP_STRUCT__entry( 566 __field( int, cpu ) 567 ), 568 569 TP_fast_assign( 570 __entry->cpu = cpu; 571 ), 572 573 TP_printk("cpu=%d", __entry->cpu) 574 ); 575 #endif /* _TRACE_SCHED_H */ 576 577 /* This part must be outside protection */ 578 #include <trace/define_trace.h> 579