1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * trace context switch 4 * 5 * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com> 6 * 7 */ 8 #include <linux/module.h> 9 #include <linux/kallsyms.h> 10 #include <linux/uaccess.h> 11 #include <linux/kmemleak.h> 12 #include <linux/ftrace.h> 13 #include <trace/events/sched.h> 14 15 #include "trace.h" 16 17 #define RECORD_CMDLINE 1 18 #define RECORD_TGID 2 19 20 static int sched_cmdline_ref; 21 static int sched_tgid_ref; 22 static DEFINE_MUTEX(sched_register_mutex); 23 24 static void 25 probe_sched_switch(void *ignore, bool preempt, 26 struct task_struct *prev, struct task_struct *next, 27 unsigned int prev_state) 28 { 29 int flags; 30 31 flags = (RECORD_TGID * !!sched_tgid_ref) + 32 (RECORD_CMDLINE * !!sched_cmdline_ref); 33 34 if (!flags) 35 return; 36 tracing_record_taskinfo_sched_switch(prev, next, flags); 37 } 38 39 static void 40 probe_sched_wakeup(void *ignore, struct task_struct *wakee) 41 { 42 int flags; 43 44 flags = (RECORD_TGID * !!sched_tgid_ref) + 45 (RECORD_CMDLINE * !!sched_cmdline_ref); 46 47 if (!flags) 48 return; 49 tracing_record_taskinfo_sched_switch(current, wakee, flags); 50 } 51 52 static int tracing_sched_register(void) 53 { 54 int ret; 55 56 ret = register_trace_sched_wakeup(probe_sched_wakeup, NULL); 57 if (ret) { 58 pr_info("wakeup trace: Couldn't activate tracepoint" 59 " probe to kernel_sched_wakeup\n"); 60 return ret; 61 } 62 63 ret = register_trace_sched_wakeup_new(probe_sched_wakeup, NULL); 64 if (ret) { 65 pr_info("wakeup trace: Couldn't activate tracepoint" 66 " probe to kernel_sched_wakeup_new\n"); 67 goto fail_deprobe; 68 } 69 70 ret = register_trace_sched_switch(probe_sched_switch, NULL); 71 if (ret) { 72 pr_info("sched trace: Couldn't activate tracepoint" 73 " probe to kernel_sched_switch\n"); 74 goto fail_deprobe_wake_new; 75 } 76 77 return ret; 78 fail_deprobe_wake_new: 79 unregister_trace_sched_wakeup_new(probe_sched_wakeup, NULL); 80 fail_deprobe: 81 unregister_trace_sched_wakeup(probe_sched_wakeup, NULL); 82 return ret; 83 } 84 85 static void tracing_sched_unregister(void) 86 { 87 unregister_trace_sched_switch(probe_sched_switch, NULL); 88 unregister_trace_sched_wakeup_new(probe_sched_wakeup, NULL); 89 unregister_trace_sched_wakeup(probe_sched_wakeup, NULL); 90 } 91 92 static void tracing_start_sched_switch(int ops) 93 { 94 bool sched_register; 95 96 mutex_lock(&sched_register_mutex); 97 sched_register = (!sched_cmdline_ref && !sched_tgid_ref); 98 99 switch (ops) { 100 case RECORD_CMDLINE: 101 sched_cmdline_ref++; 102 break; 103 104 case RECORD_TGID: 105 sched_tgid_ref++; 106 break; 107 } 108 109 if (sched_register && (sched_cmdline_ref || sched_tgid_ref)) 110 tracing_sched_register(); 111 mutex_unlock(&sched_register_mutex); 112 } 113 114 static void tracing_stop_sched_switch(int ops) 115 { 116 mutex_lock(&sched_register_mutex); 117 118 switch (ops) { 119 case RECORD_CMDLINE: 120 sched_cmdline_ref--; 121 break; 122 123 case RECORD_TGID: 124 sched_tgid_ref--; 125 break; 126 } 127 128 if (!sched_cmdline_ref && !sched_tgid_ref) 129 tracing_sched_unregister(); 130 mutex_unlock(&sched_register_mutex); 131 } 132 133 void tracing_start_cmdline_record(void) 134 { 135 tracing_start_sched_switch(RECORD_CMDLINE); 136 } 137 138 void tracing_stop_cmdline_record(void) 139 { 140 tracing_stop_sched_switch(RECORD_CMDLINE); 141 } 142 143 void tracing_start_tgid_record(void) 144 { 145 tracing_start_sched_switch(RECORD_TGID); 146 } 147 148 void tracing_stop_tgid_record(void) 149 { 150 tracing_stop_sched_switch(RECORD_TGID); 151 } 152 153 /* 154 * The tgid_map array maps from pid to tgid; i.e. the value stored at index i 155 * is the tgid last observed corresponding to pid=i. 156 */ 157 static int *tgid_map; 158 159 /* The maximum valid index into tgid_map. */ 160 static size_t tgid_map_max; 161 162 #define SAVED_CMDLINES_DEFAULT 128 163 #define NO_CMDLINE_MAP UINT_MAX 164 /* 165 * Preemption must be disabled before acquiring trace_cmdline_lock. 166 * The various trace_arrays' max_lock must be acquired in a context 167 * where interrupt is disabled. 168 */ 169 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED; 170 struct saved_cmdlines_buffer { 171 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; 172 unsigned *map_cmdline_to_pid; 173 unsigned cmdline_num; 174 int cmdline_idx; 175 char saved_cmdlines[]; 176 }; 177 static struct saved_cmdlines_buffer *savedcmd; 178 179 /* Holds the size of a cmdline and pid element */ 180 #define SAVED_CMDLINE_MAP_ELEMENT_SIZE(s) \ 181 (TASK_COMM_LEN + sizeof((s)->map_cmdline_to_pid[0])) 182 183 static inline char *get_saved_cmdlines(int idx) 184 { 185 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN]; 186 } 187 188 static inline void set_cmdline(int idx, const char *cmdline) 189 { 190 strscpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN); 191 } 192 193 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s) 194 { 195 int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN); 196 197 kmemleak_free(s); 198 free_pages((unsigned long)s, order); 199 } 200 201 static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val) 202 { 203 struct saved_cmdlines_buffer *s; 204 struct page *page; 205 int orig_size, size; 206 int order; 207 208 /* Figure out how much is needed to hold the given number of cmdlines */ 209 orig_size = sizeof(*s) + val * SAVED_CMDLINE_MAP_ELEMENT_SIZE(s); 210 order = get_order(orig_size); 211 size = 1 << (order + PAGE_SHIFT); 212 page = alloc_pages(GFP_KERNEL, order); 213 if (!page) 214 return NULL; 215 216 s = page_address(page); 217 kmemleak_alloc(s, size, 1, GFP_KERNEL); 218 memset(s, 0, sizeof(*s)); 219 220 /* Round up to actual allocation */ 221 val = (size - sizeof(*s)) / SAVED_CMDLINE_MAP_ELEMENT_SIZE(s); 222 s->cmdline_num = val; 223 224 /* Place map_cmdline_to_pid array right after saved_cmdlines */ 225 s->map_cmdline_to_pid = (unsigned *)&s->saved_cmdlines[val * TASK_COMM_LEN]; 226 227 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP, 228 sizeof(s->map_pid_to_cmdline)); 229 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP, 230 val * sizeof(*s->map_cmdline_to_pid)); 231 232 return s; 233 } 234 235 int trace_create_savedcmd(void) 236 { 237 savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT); 238 239 return savedcmd ? 0 : -ENOMEM; 240 } 241 242 int trace_save_cmdline(struct task_struct *tsk) 243 { 244 unsigned tpid, idx; 245 246 /* treat recording of idle task as a success */ 247 if (!tsk->pid) 248 return 1; 249 250 BUILD_BUG_ON(!is_power_of_2(PID_MAX_DEFAULT)); 251 252 tpid = tsk->pid & (PID_MAX_DEFAULT - 1); 253 254 /* 255 * It's not the end of the world if we don't get 256 * the lock, but we also don't want to spin 257 * nor do we want to disable interrupts, 258 * so if we miss here, then better luck next time. 259 * 260 * This is called within the scheduler and wake up, so interrupts 261 * had better been disabled and run queue lock been held. 262 */ 263 lockdep_assert_preemption_disabled(); 264 if (!arch_spin_trylock(&trace_cmdline_lock)) 265 return 0; 266 267 idx = savedcmd->map_pid_to_cmdline[tpid]; 268 if (idx == NO_CMDLINE_MAP) { 269 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num; 270 271 savedcmd->map_pid_to_cmdline[tpid] = idx; 272 savedcmd->cmdline_idx = idx; 273 } 274 275 savedcmd->map_cmdline_to_pid[idx] = tsk->pid; 276 set_cmdline(idx, tsk->comm); 277 278 arch_spin_unlock(&trace_cmdline_lock); 279 280 return 1; 281 } 282 283 static void __trace_find_cmdline(int pid, char comm[]) 284 { 285 unsigned map; 286 int tpid; 287 288 if (!pid) { 289 strcpy(comm, "<idle>"); 290 return; 291 } 292 293 if (WARN_ON_ONCE(pid < 0)) { 294 strcpy(comm, "<XXX>"); 295 return; 296 } 297 298 tpid = pid & (PID_MAX_DEFAULT - 1); 299 map = savedcmd->map_pid_to_cmdline[tpid]; 300 if (map != NO_CMDLINE_MAP) { 301 tpid = savedcmd->map_cmdline_to_pid[map]; 302 if (tpid == pid) { 303 strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN); 304 return; 305 } 306 } 307 strcpy(comm, "<...>"); 308 } 309 310 void trace_find_cmdline(int pid, char comm[]) 311 { 312 preempt_disable(); 313 arch_spin_lock(&trace_cmdline_lock); 314 315 __trace_find_cmdline(pid, comm); 316 317 arch_spin_unlock(&trace_cmdline_lock); 318 preempt_enable(); 319 } 320 321 static int *trace_find_tgid_ptr(int pid) 322 { 323 /* 324 * Pairs with the smp_store_release in set_tracer_flag() to ensure that 325 * if we observe a non-NULL tgid_map then we also observe the correct 326 * tgid_map_max. 327 */ 328 int *map = smp_load_acquire(&tgid_map); 329 330 if (unlikely(!map || pid > tgid_map_max)) 331 return NULL; 332 333 return &map[pid]; 334 } 335 336 int trace_find_tgid(int pid) 337 { 338 int *ptr = trace_find_tgid_ptr(pid); 339 340 return ptr ? *ptr : 0; 341 } 342 343 static int trace_save_tgid(struct task_struct *tsk) 344 { 345 int *ptr; 346 347 /* treat recording of idle task as a success */ 348 if (!tsk->pid) 349 return 1; 350 351 ptr = trace_find_tgid_ptr(tsk->pid); 352 if (!ptr) 353 return 0; 354 355 *ptr = tsk->tgid; 356 return 1; 357 } 358 359 static bool tracing_record_taskinfo_skip(int flags) 360 { 361 if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID)))) 362 return true; 363 if (!__this_cpu_read(trace_taskinfo_save)) 364 return true; 365 return false; 366 } 367 368 /** 369 * tracing_record_taskinfo - record the task info of a task 370 * 371 * @task: task to record 372 * @flags: TRACE_RECORD_CMDLINE for recording comm 373 * TRACE_RECORD_TGID for recording tgid 374 */ 375 void tracing_record_taskinfo(struct task_struct *task, int flags) 376 { 377 bool done; 378 379 if (tracing_record_taskinfo_skip(flags)) 380 return; 381 382 /* 383 * Record as much task information as possible. If some fail, continue 384 * to try to record the others. 385 */ 386 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task); 387 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task); 388 389 /* If recording any information failed, retry again soon. */ 390 if (!done) 391 return; 392 393 __this_cpu_write(trace_taskinfo_save, false); 394 } 395 396 /** 397 * tracing_record_taskinfo_sched_switch - record task info for sched_switch 398 * 399 * @prev: previous task during sched_switch 400 * @next: next task during sched_switch 401 * @flags: TRACE_RECORD_CMDLINE for recording comm 402 * TRACE_RECORD_TGID for recording tgid 403 */ 404 void tracing_record_taskinfo_sched_switch(struct task_struct *prev, 405 struct task_struct *next, int flags) 406 { 407 bool done; 408 409 if (tracing_record_taskinfo_skip(flags)) 410 return; 411 412 /* 413 * Record as much task information as possible. If some fail, continue 414 * to try to record the others. 415 */ 416 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev); 417 done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next); 418 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev); 419 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next); 420 421 /* If recording any information failed, retry again soon. */ 422 if (!done) 423 return; 424 425 __this_cpu_write(trace_taskinfo_save, false); 426 } 427 428 /* Helpers to record a specific task information */ 429 void tracing_record_cmdline(struct task_struct *task) 430 { 431 tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE); 432 } 433 434 void tracing_record_tgid(struct task_struct *task) 435 { 436 tracing_record_taskinfo(task, TRACE_RECORD_TGID); 437 } 438 439 int trace_alloc_tgid_map(void) 440 { 441 int *map; 442 443 if (tgid_map) 444 return 0; 445 446 tgid_map_max = init_pid_ns.pid_max; 447 map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map), 448 GFP_KERNEL); 449 if (!map) 450 return -ENOMEM; 451 452 /* 453 * Pairs with smp_load_acquire() in 454 * trace_find_tgid_ptr() to ensure that if it observes 455 * the tgid_map we just allocated then it also observes 456 * the corresponding tgid_map_max value. 457 */ 458 smp_store_release(&tgid_map, map); 459 return 0; 460 } 461 462 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos) 463 { 464 int pid = ++(*pos); 465 466 return trace_find_tgid_ptr(pid); 467 } 468 469 static void *saved_tgids_start(struct seq_file *m, loff_t *pos) 470 { 471 int pid = *pos; 472 473 return trace_find_tgid_ptr(pid); 474 } 475 476 static void saved_tgids_stop(struct seq_file *m, void *v) 477 { 478 } 479 480 static int saved_tgids_show(struct seq_file *m, void *v) 481 { 482 int *entry = (int *)v; 483 int pid = entry - tgid_map; 484 int tgid = *entry; 485 486 if (tgid == 0) 487 return SEQ_SKIP; 488 489 seq_printf(m, "%d %d\n", pid, tgid); 490 return 0; 491 } 492 493 static const struct seq_operations tracing_saved_tgids_seq_ops = { 494 .start = saved_tgids_start, 495 .stop = saved_tgids_stop, 496 .next = saved_tgids_next, 497 .show = saved_tgids_show, 498 }; 499 500 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp) 501 { 502 int ret; 503 504 ret = tracing_check_open_get_tr(NULL); 505 if (ret) 506 return ret; 507 508 return seq_open(filp, &tracing_saved_tgids_seq_ops); 509 } 510 511 512 const struct file_operations tracing_saved_tgids_fops = { 513 .open = tracing_saved_tgids_open, 514 .read = seq_read, 515 .llseek = seq_lseek, 516 .release = seq_release, 517 }; 518 519 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos) 520 { 521 unsigned int *ptr = v; 522 523 if (*pos || m->count) 524 ptr++; 525 526 (*pos)++; 527 528 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num]; 529 ptr++) { 530 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP) 531 continue; 532 533 return ptr; 534 } 535 536 return NULL; 537 } 538 539 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos) 540 { 541 void *v; 542 loff_t l = 0; 543 544 preempt_disable(); 545 arch_spin_lock(&trace_cmdline_lock); 546 547 v = &savedcmd->map_cmdline_to_pid[0]; 548 while (l <= *pos) { 549 v = saved_cmdlines_next(m, v, &l); 550 if (!v) 551 return NULL; 552 } 553 554 return v; 555 } 556 557 static void saved_cmdlines_stop(struct seq_file *m, void *v) 558 { 559 arch_spin_unlock(&trace_cmdline_lock); 560 preempt_enable(); 561 } 562 563 static int saved_cmdlines_show(struct seq_file *m, void *v) 564 { 565 char buf[TASK_COMM_LEN]; 566 unsigned int *pid = v; 567 568 __trace_find_cmdline(*pid, buf); 569 seq_printf(m, "%d %s\n", *pid, buf); 570 return 0; 571 } 572 573 static const struct seq_operations tracing_saved_cmdlines_seq_ops = { 574 .start = saved_cmdlines_start, 575 .next = saved_cmdlines_next, 576 .stop = saved_cmdlines_stop, 577 .show = saved_cmdlines_show, 578 }; 579 580 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp) 581 { 582 int ret; 583 584 ret = tracing_check_open_get_tr(NULL); 585 if (ret) 586 return ret; 587 588 return seq_open(filp, &tracing_saved_cmdlines_seq_ops); 589 } 590 591 const struct file_operations tracing_saved_cmdlines_fops = { 592 .open = tracing_saved_cmdlines_open, 593 .read = seq_read, 594 .llseek = seq_lseek, 595 .release = seq_release, 596 }; 597 598 static ssize_t 599 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf, 600 size_t cnt, loff_t *ppos) 601 { 602 char buf[64]; 603 int r; 604 605 preempt_disable(); 606 arch_spin_lock(&trace_cmdline_lock); 607 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num); 608 arch_spin_unlock(&trace_cmdline_lock); 609 preempt_enable(); 610 611 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 612 } 613 614 void trace_free_saved_cmdlines_buffer(void) 615 { 616 free_saved_cmdlines_buffer(savedcmd); 617 } 618 619 static int tracing_resize_saved_cmdlines(unsigned int val) 620 { 621 struct saved_cmdlines_buffer *s, *savedcmd_temp; 622 623 s = allocate_cmdlines_buffer(val); 624 if (!s) 625 return -ENOMEM; 626 627 preempt_disable(); 628 arch_spin_lock(&trace_cmdline_lock); 629 savedcmd_temp = savedcmd; 630 savedcmd = s; 631 arch_spin_unlock(&trace_cmdline_lock); 632 preempt_enable(); 633 free_saved_cmdlines_buffer(savedcmd_temp); 634 635 return 0; 636 } 637 638 static ssize_t 639 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf, 640 size_t cnt, loff_t *ppos) 641 { 642 unsigned long val; 643 int ret; 644 645 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 646 if (ret) 647 return ret; 648 649 /* must have at least 1 entry or less than PID_MAX_DEFAULT */ 650 if (!val || val > PID_MAX_DEFAULT) 651 return -EINVAL; 652 653 ret = tracing_resize_saved_cmdlines((unsigned int)val); 654 if (ret < 0) 655 return ret; 656 657 *ppos += cnt; 658 659 return cnt; 660 } 661 662 const struct file_operations tracing_saved_cmdlines_size_fops = { 663 .open = tracing_open_generic, 664 .read = tracing_saved_cmdlines_size_read, 665 .write = tracing_saved_cmdlines_size_write, 666 }; 667