1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * trace context switch 4 * 5 * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com> 6 * 7 */ 8 #include <linux/module.h> 9 #include <linux/kallsyms.h> 10 #include <linux/uaccess.h> 11 #include <linux/kmemleak.h> 12 #include <linux/ftrace.h> 13 #include <trace/events/sched.h> 14 15 #include "trace.h" 16 17 #define RECORD_CMDLINE 1 18 #define RECORD_TGID 2 19 20 static int sched_cmdline_ref; 21 static int sched_tgid_ref; 22 static DEFINE_MUTEX(sched_register_mutex); 23 24 static void 25 probe_sched_switch(void *ignore, bool preempt, 26 struct task_struct *prev, struct task_struct *next, 27 unsigned int prev_state) 28 { 29 int flags; 30 31 flags = (RECORD_TGID * !!sched_tgid_ref) + 32 (RECORD_CMDLINE * !!sched_cmdline_ref); 33 34 if (!flags) 35 return; 36 tracing_record_taskinfo_sched_switch(prev, next, flags); 37 } 38 39 static void 40 probe_sched_wakeup(void *ignore, struct task_struct *wakee) 41 { 42 int flags; 43 44 flags = (RECORD_TGID * !!sched_tgid_ref) + 45 (RECORD_CMDLINE * !!sched_cmdline_ref); 46 47 if (!flags) 48 return; 49 tracing_record_taskinfo_sched_switch(current, wakee, flags); 50 } 51 52 static int tracing_sched_register(void) 53 { 54 int ret; 55 56 ret = register_trace_sched_wakeup(probe_sched_wakeup, NULL); 57 if (ret) { 58 pr_info("wakeup trace: Couldn't activate tracepoint" 59 " probe to kernel_sched_wakeup\n"); 60 return ret; 61 } 62 63 ret = register_trace_sched_wakeup_new(probe_sched_wakeup, NULL); 64 if (ret) { 65 pr_info("wakeup trace: Couldn't activate tracepoint" 66 " probe to kernel_sched_wakeup_new\n"); 67 goto fail_deprobe; 68 } 69 70 ret = register_trace_sched_switch(probe_sched_switch, NULL); 71 if (ret) { 72 pr_info("sched trace: Couldn't activate tracepoint" 73 " probe to kernel_sched_switch\n"); 74 goto fail_deprobe_wake_new; 75 } 76 77 return ret; 78 fail_deprobe_wake_new: 79 unregister_trace_sched_wakeup_new(probe_sched_wakeup, NULL); 80 fail_deprobe: 81 unregister_trace_sched_wakeup(probe_sched_wakeup, NULL); 82 return ret; 83 } 84 85 static void tracing_sched_unregister(void) 86 { 87 unregister_trace_sched_switch(probe_sched_switch, NULL); 88 unregister_trace_sched_wakeup_new(probe_sched_wakeup, NULL); 89 unregister_trace_sched_wakeup(probe_sched_wakeup, NULL); 90 } 91 92 static void tracing_start_sched_switch(int ops) 93 { 94 bool sched_register; 95 96 mutex_lock(&sched_register_mutex); 97 sched_register = (!sched_cmdline_ref && !sched_tgid_ref); 98 99 switch (ops) { 100 case RECORD_CMDLINE: 101 sched_cmdline_ref++; 102 break; 103 104 case RECORD_TGID: 105 sched_tgid_ref++; 106 break; 107 } 108 109 if (sched_register && (sched_cmdline_ref || sched_tgid_ref)) 110 tracing_sched_register(); 111 mutex_unlock(&sched_register_mutex); 112 } 113 114 static void tracing_stop_sched_switch(int ops) 115 { 116 mutex_lock(&sched_register_mutex); 117 118 switch (ops) { 119 case RECORD_CMDLINE: 120 sched_cmdline_ref--; 121 break; 122 123 case RECORD_TGID: 124 sched_tgid_ref--; 125 break; 126 } 127 128 if (!sched_cmdline_ref && !sched_tgid_ref) 129 tracing_sched_unregister(); 130 mutex_unlock(&sched_register_mutex); 131 } 132 133 void tracing_start_cmdline_record(void) 134 { 135 tracing_start_sched_switch(RECORD_CMDLINE); 136 } 137 138 void tracing_stop_cmdline_record(void) 139 { 140 tracing_stop_sched_switch(RECORD_CMDLINE); 141 } 142 143 void tracing_start_tgid_record(void) 144 { 145 tracing_start_sched_switch(RECORD_TGID); 146 } 147 148 void tracing_stop_tgid_record(void) 149 { 150 tracing_stop_sched_switch(RECORD_TGID); 151 } 152 153 /* 154 * The tgid_map array maps from pid to tgid; i.e. the value stored at index i 155 * is the tgid last observed corresponding to pid=i. 156 */ 157 static int *tgid_map; 158 159 /* The maximum valid index into tgid_map. */ 160 static size_t tgid_map_max; 161 162 #define SAVED_CMDLINES_DEFAULT 128 163 #define NO_CMDLINE_MAP UINT_MAX 164 /* 165 * Preemption must be disabled before acquiring trace_cmdline_lock. 166 * The various trace_arrays' max_lock must be acquired in a context 167 * where interrupt is disabled. 168 */ 169 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED; 170 struct saved_cmdlines_buffer { 171 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; 172 unsigned *map_cmdline_to_pid; 173 unsigned cmdline_num; 174 int cmdline_idx; 175 char saved_cmdlines[]; 176 }; 177 static struct saved_cmdlines_buffer *savedcmd; 178 179 /* Holds the size of a cmdline and pid element */ 180 #define SAVED_CMDLINE_MAP_ELEMENT_SIZE(s) \ 181 (TASK_COMM_LEN + sizeof((s)->map_cmdline_to_pid[0])) 182 183 static inline char *get_saved_cmdlines(int idx) 184 { 185 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN]; 186 } 187 188 static inline void set_cmdline(int idx, const char *cmdline) 189 { 190 strscpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN); 191 } 192 193 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s) 194 { 195 int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN); 196 197 kmemleak_free(s); 198 free_pages((unsigned long)s, order); 199 } 200 201 static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val) 202 { 203 struct saved_cmdlines_buffer *s; 204 struct page *page; 205 int orig_size, size; 206 int order; 207 208 /* Figure out how much is needed to hold the given number of cmdlines */ 209 orig_size = sizeof(*s) + val * SAVED_CMDLINE_MAP_ELEMENT_SIZE(s); 210 order = get_order(orig_size); 211 size = 1 << (order + PAGE_SHIFT); 212 page = alloc_pages(GFP_KERNEL, order); 213 if (!page) 214 return NULL; 215 216 s = page_address(page); 217 kmemleak_alloc(s, size, 1, GFP_KERNEL); 218 memset(s, 0, sizeof(*s)); 219 220 /* Round up to actual allocation */ 221 val = (size - sizeof(*s)) / SAVED_CMDLINE_MAP_ELEMENT_SIZE(s); 222 s->cmdline_num = val; 223 224 /* Place map_cmdline_to_pid array right after saved_cmdlines */ 225 s->map_cmdline_to_pid = (unsigned *)&s->saved_cmdlines[val * TASK_COMM_LEN]; 226 227 s->cmdline_idx = 0; 228 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP, 229 sizeof(s->map_pid_to_cmdline)); 230 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP, 231 val * sizeof(*s->map_cmdline_to_pid)); 232 233 return s; 234 } 235 236 int trace_create_savedcmd(void) 237 { 238 savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT); 239 240 return savedcmd ? 0 : -ENOMEM; 241 } 242 243 int trace_save_cmdline(struct task_struct *tsk) 244 { 245 unsigned tpid, idx; 246 247 /* treat recording of idle task as a success */ 248 if (!tsk->pid) 249 return 1; 250 251 tpid = tsk->pid & (PID_MAX_DEFAULT - 1); 252 253 /* 254 * It's not the end of the world if we don't get 255 * the lock, but we also don't want to spin 256 * nor do we want to disable interrupts, 257 * so if we miss here, then better luck next time. 258 * 259 * This is called within the scheduler and wake up, so interrupts 260 * had better been disabled and run queue lock been held. 261 */ 262 lockdep_assert_preemption_disabled(); 263 if (!arch_spin_trylock(&trace_cmdline_lock)) 264 return 0; 265 266 idx = savedcmd->map_pid_to_cmdline[tpid]; 267 if (idx == NO_CMDLINE_MAP) { 268 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num; 269 270 savedcmd->map_pid_to_cmdline[tpid] = idx; 271 savedcmd->cmdline_idx = idx; 272 } 273 274 savedcmd->map_cmdline_to_pid[idx] = tsk->pid; 275 set_cmdline(idx, tsk->comm); 276 277 arch_spin_unlock(&trace_cmdline_lock); 278 279 return 1; 280 } 281 282 static void __trace_find_cmdline(int pid, char comm[]) 283 { 284 unsigned map; 285 int tpid; 286 287 if (!pid) { 288 strcpy(comm, "<idle>"); 289 return; 290 } 291 292 if (WARN_ON_ONCE(pid < 0)) { 293 strcpy(comm, "<XXX>"); 294 return; 295 } 296 297 tpid = pid & (PID_MAX_DEFAULT - 1); 298 map = savedcmd->map_pid_to_cmdline[tpid]; 299 if (map != NO_CMDLINE_MAP) { 300 tpid = savedcmd->map_cmdline_to_pid[map]; 301 if (tpid == pid) { 302 strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN); 303 return; 304 } 305 } 306 strcpy(comm, "<...>"); 307 } 308 309 void trace_find_cmdline(int pid, char comm[]) 310 { 311 preempt_disable(); 312 arch_spin_lock(&trace_cmdline_lock); 313 314 __trace_find_cmdline(pid, comm); 315 316 arch_spin_unlock(&trace_cmdline_lock); 317 preempt_enable(); 318 } 319 320 static int *trace_find_tgid_ptr(int pid) 321 { 322 /* 323 * Pairs with the smp_store_release in set_tracer_flag() to ensure that 324 * if we observe a non-NULL tgid_map then we also observe the correct 325 * tgid_map_max. 326 */ 327 int *map = smp_load_acquire(&tgid_map); 328 329 if (unlikely(!map || pid > tgid_map_max)) 330 return NULL; 331 332 return &map[pid]; 333 } 334 335 int trace_find_tgid(int pid) 336 { 337 int *ptr = trace_find_tgid_ptr(pid); 338 339 return ptr ? *ptr : 0; 340 } 341 342 static int trace_save_tgid(struct task_struct *tsk) 343 { 344 int *ptr; 345 346 /* treat recording of idle task as a success */ 347 if (!tsk->pid) 348 return 1; 349 350 ptr = trace_find_tgid_ptr(tsk->pid); 351 if (!ptr) 352 return 0; 353 354 *ptr = tsk->tgid; 355 return 1; 356 } 357 358 static bool tracing_record_taskinfo_skip(int flags) 359 { 360 if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID)))) 361 return true; 362 if (!__this_cpu_read(trace_taskinfo_save)) 363 return true; 364 return false; 365 } 366 367 /** 368 * tracing_record_taskinfo - record the task info of a task 369 * 370 * @task: task to record 371 * @flags: TRACE_RECORD_CMDLINE for recording comm 372 * TRACE_RECORD_TGID for recording tgid 373 */ 374 void tracing_record_taskinfo(struct task_struct *task, int flags) 375 { 376 bool done; 377 378 if (tracing_record_taskinfo_skip(flags)) 379 return; 380 381 /* 382 * Record as much task information as possible. If some fail, continue 383 * to try to record the others. 384 */ 385 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task); 386 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task); 387 388 /* If recording any information failed, retry again soon. */ 389 if (!done) 390 return; 391 392 __this_cpu_write(trace_taskinfo_save, false); 393 } 394 395 /** 396 * tracing_record_taskinfo_sched_switch - record task info for sched_switch 397 * 398 * @prev: previous task during sched_switch 399 * @next: next task during sched_switch 400 * @flags: TRACE_RECORD_CMDLINE for recording comm 401 * TRACE_RECORD_TGID for recording tgid 402 */ 403 void tracing_record_taskinfo_sched_switch(struct task_struct *prev, 404 struct task_struct *next, int flags) 405 { 406 bool done; 407 408 if (tracing_record_taskinfo_skip(flags)) 409 return; 410 411 /* 412 * Record as much task information as possible. If some fail, continue 413 * to try to record the others. 414 */ 415 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev); 416 done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next); 417 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev); 418 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next); 419 420 /* If recording any information failed, retry again soon. */ 421 if (!done) 422 return; 423 424 __this_cpu_write(trace_taskinfo_save, false); 425 } 426 427 /* Helpers to record a specific task information */ 428 void tracing_record_cmdline(struct task_struct *task) 429 { 430 tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE); 431 } 432 433 void tracing_record_tgid(struct task_struct *task) 434 { 435 tracing_record_taskinfo(task, TRACE_RECORD_TGID); 436 } 437 438 int trace_alloc_tgid_map(void) 439 { 440 int *map; 441 442 if (tgid_map) 443 return 0; 444 445 tgid_map_max = pid_max; 446 map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map), 447 GFP_KERNEL); 448 if (!map) 449 return -ENOMEM; 450 451 /* 452 * Pairs with smp_load_acquire() in 453 * trace_find_tgid_ptr() to ensure that if it observes 454 * the tgid_map we just allocated then it also observes 455 * the corresponding tgid_map_max value. 456 */ 457 smp_store_release(&tgid_map, map); 458 return 0; 459 } 460 461 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos) 462 { 463 int pid = ++(*pos); 464 465 return trace_find_tgid_ptr(pid); 466 } 467 468 static void *saved_tgids_start(struct seq_file *m, loff_t *pos) 469 { 470 int pid = *pos; 471 472 return trace_find_tgid_ptr(pid); 473 } 474 475 static void saved_tgids_stop(struct seq_file *m, void *v) 476 { 477 } 478 479 static int saved_tgids_show(struct seq_file *m, void *v) 480 { 481 int *entry = (int *)v; 482 int pid = entry - tgid_map; 483 int tgid = *entry; 484 485 if (tgid == 0) 486 return SEQ_SKIP; 487 488 seq_printf(m, "%d %d\n", pid, tgid); 489 return 0; 490 } 491 492 static const struct seq_operations tracing_saved_tgids_seq_ops = { 493 .start = saved_tgids_start, 494 .stop = saved_tgids_stop, 495 .next = saved_tgids_next, 496 .show = saved_tgids_show, 497 }; 498 499 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp) 500 { 501 int ret; 502 503 ret = tracing_check_open_get_tr(NULL); 504 if (ret) 505 return ret; 506 507 return seq_open(filp, &tracing_saved_tgids_seq_ops); 508 } 509 510 511 const struct file_operations tracing_saved_tgids_fops = { 512 .open = tracing_saved_tgids_open, 513 .read = seq_read, 514 .llseek = seq_lseek, 515 .release = seq_release, 516 }; 517 518 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos) 519 { 520 unsigned int *ptr = v; 521 522 if (*pos || m->count) 523 ptr++; 524 525 (*pos)++; 526 527 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num]; 528 ptr++) { 529 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP) 530 continue; 531 532 return ptr; 533 } 534 535 return NULL; 536 } 537 538 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos) 539 { 540 void *v; 541 loff_t l = 0; 542 543 preempt_disable(); 544 arch_spin_lock(&trace_cmdline_lock); 545 546 v = &savedcmd->map_cmdline_to_pid[0]; 547 while (l <= *pos) { 548 v = saved_cmdlines_next(m, v, &l); 549 if (!v) 550 return NULL; 551 } 552 553 return v; 554 } 555 556 static void saved_cmdlines_stop(struct seq_file *m, void *v) 557 { 558 arch_spin_unlock(&trace_cmdline_lock); 559 preempt_enable(); 560 } 561 562 static int saved_cmdlines_show(struct seq_file *m, void *v) 563 { 564 char buf[TASK_COMM_LEN]; 565 unsigned int *pid = v; 566 567 __trace_find_cmdline(*pid, buf); 568 seq_printf(m, "%d %s\n", *pid, buf); 569 return 0; 570 } 571 572 static const struct seq_operations tracing_saved_cmdlines_seq_ops = { 573 .start = saved_cmdlines_start, 574 .next = saved_cmdlines_next, 575 .stop = saved_cmdlines_stop, 576 .show = saved_cmdlines_show, 577 }; 578 579 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp) 580 { 581 int ret; 582 583 ret = tracing_check_open_get_tr(NULL); 584 if (ret) 585 return ret; 586 587 return seq_open(filp, &tracing_saved_cmdlines_seq_ops); 588 } 589 590 const struct file_operations tracing_saved_cmdlines_fops = { 591 .open = tracing_saved_cmdlines_open, 592 .read = seq_read, 593 .llseek = seq_lseek, 594 .release = seq_release, 595 }; 596 597 static ssize_t 598 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf, 599 size_t cnt, loff_t *ppos) 600 { 601 char buf[64]; 602 int r; 603 604 preempt_disable(); 605 arch_spin_lock(&trace_cmdline_lock); 606 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num); 607 arch_spin_unlock(&trace_cmdline_lock); 608 preempt_enable(); 609 610 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 611 } 612 613 void trace_free_saved_cmdlines_buffer(void) 614 { 615 free_saved_cmdlines_buffer(savedcmd); 616 } 617 618 static int tracing_resize_saved_cmdlines(unsigned int val) 619 { 620 struct saved_cmdlines_buffer *s, *savedcmd_temp; 621 622 s = allocate_cmdlines_buffer(val); 623 if (!s) 624 return -ENOMEM; 625 626 preempt_disable(); 627 arch_spin_lock(&trace_cmdline_lock); 628 savedcmd_temp = savedcmd; 629 savedcmd = s; 630 arch_spin_unlock(&trace_cmdline_lock); 631 preempt_enable(); 632 free_saved_cmdlines_buffer(savedcmd_temp); 633 634 return 0; 635 } 636 637 static ssize_t 638 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf, 639 size_t cnt, loff_t *ppos) 640 { 641 unsigned long val; 642 int ret; 643 644 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 645 if (ret) 646 return ret; 647 648 /* must have at least 1 entry or less than PID_MAX_DEFAULT */ 649 if (!val || val > PID_MAX_DEFAULT) 650 return -EINVAL; 651 652 ret = tracing_resize_saved_cmdlines((unsigned int)val); 653 if (ret < 0) 654 return ret; 655 656 *ppos += cnt; 657 658 return cnt; 659 } 660 661 const struct file_operations tracing_saved_cmdlines_size_fops = { 662 .open = tracing_open_generic, 663 .read = tracing_saved_cmdlines_size_read, 664 .write = tracing_saved_cmdlines_size_write, 665 }; 666