1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * uprobes-based tracing events 4 * 5 * Copyright (C) IBM Corporation, 2010-2012 6 * Author: Srikar Dronamraju <srikar@linux.vnet.ibm.com> 7 */ 8 #define pr_fmt(fmt) "trace_kprobe: " fmt 9 10 #include <linux/module.h> 11 #include <linux/uaccess.h> 12 #include <linux/uprobes.h> 13 #include <linux/namei.h> 14 #include <linux/string.h> 15 #include <linux/rculist.h> 16 17 #include "trace_probe.h" 18 19 #define UPROBE_EVENT_SYSTEM "uprobes" 20 21 struct uprobe_trace_entry_head { 22 struct trace_entry ent; 23 unsigned long vaddr[]; 24 }; 25 26 #define SIZEOF_TRACE_ENTRY(is_return) \ 27 (sizeof(struct uprobe_trace_entry_head) + \ 28 sizeof(unsigned long) * (is_return ? 2 : 1)) 29 30 #define DATAOF_TRACE_ENTRY(entry, is_return) \ 31 ((void*)(entry) + SIZEOF_TRACE_ENTRY(is_return)) 32 33 struct trace_uprobe_filter { 34 rwlock_t rwlock; 35 int nr_systemwide; 36 struct list_head perf_events; 37 }; 38 39 /* 40 * uprobe event core functions 41 */ 42 struct trace_uprobe { 43 struct list_head list; 44 struct trace_uprobe_filter filter; 45 struct uprobe_consumer consumer; 46 struct path path; 47 struct inode *inode; 48 char *filename; 49 unsigned long offset; 50 unsigned long nhit; 51 struct trace_probe tp; 52 }; 53 54 #define SIZEOF_TRACE_UPROBE(n) \ 55 (offsetof(struct trace_uprobe, tp.args) + \ 56 (sizeof(struct probe_arg) * (n))) 57 58 static int register_uprobe_event(struct trace_uprobe *tu); 59 static int unregister_uprobe_event(struct trace_uprobe *tu); 60 61 static DEFINE_MUTEX(uprobe_lock); 62 static LIST_HEAD(uprobe_list); 63 64 struct uprobe_dispatch_data { 65 struct trace_uprobe *tu; 66 unsigned long bp_addr; 67 }; 68 69 static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs); 70 static int uretprobe_dispatcher(struct uprobe_consumer *con, 71 unsigned long func, struct pt_regs *regs); 72 73 #ifdef CONFIG_STACK_GROWSUP 74 static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n) 75 { 76 return addr - (n * sizeof(long)); 77 } 78 #else 79 static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n) 80 { 81 return addr + (n * sizeof(long)); 82 } 83 #endif 84 85 static unsigned long get_user_stack_nth(struct pt_regs *regs, unsigned int n) 86 { 87 unsigned long ret; 88 unsigned long addr = user_stack_pointer(regs); 89 90 addr = adjust_stack_addr(addr, n); 91 92 if (copy_from_user(&ret, (void __force __user *) addr, sizeof(ret))) 93 return 0; 94 95 return ret; 96 } 97 98 /* 99 * Uprobes-specific fetch functions 100 */ 101 #define DEFINE_FETCH_stack(type) \ 102 static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs, \ 103 void *offset, void *dest) \ 104 { \ 105 *(type *)dest = (type)get_user_stack_nth(regs, \ 106 ((unsigned long)offset)); \ 107 } 108 DEFINE_BASIC_FETCH_FUNCS(stack) 109 /* No string on the stack entry */ 110 #define fetch_stack_string NULL 111 #define fetch_stack_string_size NULL 112 113 #define DEFINE_FETCH_memory(type) \ 114 static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs, \ 115 void *addr, void *dest) \ 116 { \ 117 type retval; \ 118 void __user *vaddr = (void __force __user *) addr; \ 119 \ 120 if (copy_from_user(&retval, vaddr, sizeof(type))) \ 121 *(type *)dest = 0; \ 122 else \ 123 *(type *) dest = retval; \ 124 } 125 DEFINE_BASIC_FETCH_FUNCS(memory) 126 /* 127 * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max 128 * length and relative data location. 129 */ 130 static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, 131 void *addr, void *dest) 132 { 133 long ret; 134 u32 rloc = *(u32 *)dest; 135 int maxlen = get_rloc_len(rloc); 136 u8 *dst = get_rloc_data(dest); 137 void __user *src = (void __force __user *) addr; 138 139 if (!maxlen) 140 return; 141 142 ret = strncpy_from_user(dst, src, maxlen); 143 if (ret == maxlen) 144 dst[--ret] = '\0'; 145 146 if (ret < 0) { /* Failed to fetch string */ 147 ((u8 *)get_rloc_data(dest))[0] = '\0'; 148 *(u32 *)dest = make_data_rloc(0, get_rloc_offs(rloc)); 149 } else { 150 *(u32 *)dest = make_data_rloc(ret, get_rloc_offs(rloc)); 151 } 152 } 153 154 static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs, 155 void *addr, void *dest) 156 { 157 int len; 158 void __user *vaddr = (void __force __user *) addr; 159 160 len = strnlen_user(vaddr, MAX_STRING_SIZE); 161 162 if (len == 0 || len > MAX_STRING_SIZE) /* Failed to check length */ 163 *(u32 *)dest = 0; 164 else 165 *(u32 *)dest = len; 166 } 167 168 static unsigned long translate_user_vaddr(void *file_offset) 169 { 170 unsigned long base_addr; 171 struct uprobe_dispatch_data *udd; 172 173 udd = (void *) current->utask->vaddr; 174 175 base_addr = udd->bp_addr - udd->tu->offset; 176 return base_addr + (unsigned long)file_offset; 177 } 178 179 #define DEFINE_FETCH_file_offset(type) \ 180 static void FETCH_FUNC_NAME(file_offset, type)(struct pt_regs *regs, \ 181 void *offset, void *dest)\ 182 { \ 183 void *vaddr = (void *)translate_user_vaddr(offset); \ 184 \ 185 FETCH_FUNC_NAME(memory, type)(regs, vaddr, dest); \ 186 } 187 DEFINE_BASIC_FETCH_FUNCS(file_offset) 188 DEFINE_FETCH_file_offset(string) 189 DEFINE_FETCH_file_offset(string_size) 190 191 /* Fetch type information table */ 192 static const struct fetch_type uprobes_fetch_type_table[] = { 193 /* Special types */ 194 [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string, 195 sizeof(u32), 1, "__data_loc char[]"), 196 [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32, 197 string_size, sizeof(u32), 0, "u32"), 198 /* Basic types */ 199 ASSIGN_FETCH_TYPE(u8, u8, 0), 200 ASSIGN_FETCH_TYPE(u16, u16, 0), 201 ASSIGN_FETCH_TYPE(u32, u32, 0), 202 ASSIGN_FETCH_TYPE(u64, u64, 0), 203 ASSIGN_FETCH_TYPE(s8, u8, 1), 204 ASSIGN_FETCH_TYPE(s16, u16, 1), 205 ASSIGN_FETCH_TYPE(s32, u32, 1), 206 ASSIGN_FETCH_TYPE(s64, u64, 1), 207 ASSIGN_FETCH_TYPE_ALIAS(x8, u8, u8, 0), 208 ASSIGN_FETCH_TYPE_ALIAS(x16, u16, u16, 0), 209 ASSIGN_FETCH_TYPE_ALIAS(x32, u32, u32, 0), 210 ASSIGN_FETCH_TYPE_ALIAS(x64, u64, u64, 0), 211 212 ASSIGN_FETCH_TYPE_END 213 }; 214 215 static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter) 216 { 217 rwlock_init(&filter->rwlock); 218 filter->nr_systemwide = 0; 219 INIT_LIST_HEAD(&filter->perf_events); 220 } 221 222 static inline bool uprobe_filter_is_empty(struct trace_uprobe_filter *filter) 223 { 224 return !filter->nr_systemwide && list_empty(&filter->perf_events); 225 } 226 227 static inline bool is_ret_probe(struct trace_uprobe *tu) 228 { 229 return tu->consumer.ret_handler != NULL; 230 } 231 232 /* 233 * Allocate new trace_uprobe and initialize it (including uprobes). 234 */ 235 static struct trace_uprobe * 236 alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret) 237 { 238 struct trace_uprobe *tu; 239 240 if (!event || !is_good_name(event)) 241 return ERR_PTR(-EINVAL); 242 243 if (!group || !is_good_name(group)) 244 return ERR_PTR(-EINVAL); 245 246 tu = kzalloc(SIZEOF_TRACE_UPROBE(nargs), GFP_KERNEL); 247 if (!tu) 248 return ERR_PTR(-ENOMEM); 249 250 tu->tp.call.class = &tu->tp.class; 251 tu->tp.call.name = kstrdup(event, GFP_KERNEL); 252 if (!tu->tp.call.name) 253 goto error; 254 255 tu->tp.class.system = kstrdup(group, GFP_KERNEL); 256 if (!tu->tp.class.system) 257 goto error; 258 259 INIT_LIST_HEAD(&tu->list); 260 INIT_LIST_HEAD(&tu->tp.files); 261 tu->consumer.handler = uprobe_dispatcher; 262 if (is_ret) 263 tu->consumer.ret_handler = uretprobe_dispatcher; 264 init_trace_uprobe_filter(&tu->filter); 265 return tu; 266 267 error: 268 kfree(tu->tp.call.name); 269 kfree(tu); 270 271 return ERR_PTR(-ENOMEM); 272 } 273 274 static void free_trace_uprobe(struct trace_uprobe *tu) 275 { 276 int i; 277 278 for (i = 0; i < tu->tp.nr_args; i++) 279 traceprobe_free_probe_arg(&tu->tp.args[i]); 280 281 path_put(&tu->path); 282 kfree(tu->tp.call.class->system); 283 kfree(tu->tp.call.name); 284 kfree(tu->filename); 285 kfree(tu); 286 } 287 288 static struct trace_uprobe *find_probe_event(const char *event, const char *group) 289 { 290 struct trace_uprobe *tu; 291 292 list_for_each_entry(tu, &uprobe_list, list) 293 if (strcmp(trace_event_name(&tu->tp.call), event) == 0 && 294 strcmp(tu->tp.call.class->system, group) == 0) 295 return tu; 296 297 return NULL; 298 } 299 300 /* Unregister a trace_uprobe and probe_event: call with locking uprobe_lock */ 301 static int unregister_trace_uprobe(struct trace_uprobe *tu) 302 { 303 int ret; 304 305 ret = unregister_uprobe_event(tu); 306 if (ret) 307 return ret; 308 309 list_del(&tu->list); 310 free_trace_uprobe(tu); 311 return 0; 312 } 313 314 /* Register a trace_uprobe and probe_event */ 315 static int register_trace_uprobe(struct trace_uprobe *tu) 316 { 317 struct trace_uprobe *old_tu; 318 int ret; 319 320 mutex_lock(&uprobe_lock); 321 322 /* register as an event */ 323 old_tu = find_probe_event(trace_event_name(&tu->tp.call), 324 tu->tp.call.class->system); 325 if (old_tu) { 326 /* delete old event */ 327 ret = unregister_trace_uprobe(old_tu); 328 if (ret) 329 goto end; 330 } 331 332 ret = register_uprobe_event(tu); 333 if (ret) { 334 pr_warn("Failed to register probe event(%d)\n", ret); 335 goto end; 336 } 337 338 list_add_tail(&tu->list, &uprobe_list); 339 340 end: 341 mutex_unlock(&uprobe_lock); 342 343 return ret; 344 } 345 346 /* 347 * Argument syntax: 348 * - Add uprobe: p|r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] 349 * 350 * - Remove uprobe: -:[GRP/]EVENT 351 */ 352 static int create_trace_uprobe(int argc, char **argv) 353 { 354 struct trace_uprobe *tu; 355 char *arg, *event, *group, *filename; 356 char buf[MAX_EVENT_NAME_LEN]; 357 struct path path; 358 unsigned long offset; 359 bool is_delete, is_return; 360 int i, ret; 361 362 ret = 0; 363 is_delete = false; 364 is_return = false; 365 event = NULL; 366 group = NULL; 367 368 /* argc must be >= 1 */ 369 if (argv[0][0] == '-') 370 is_delete = true; 371 else if (argv[0][0] == 'r') 372 is_return = true; 373 else if (argv[0][0] != 'p') { 374 pr_info("Probe definition must be started with 'p', 'r' or '-'.\n"); 375 return -EINVAL; 376 } 377 378 if (argv[0][1] == ':') { 379 event = &argv[0][2]; 380 arg = strchr(event, '/'); 381 382 if (arg) { 383 group = event; 384 event = arg + 1; 385 event[-1] = '\0'; 386 387 if (strlen(group) == 0) { 388 pr_info("Group name is not specified\n"); 389 return -EINVAL; 390 } 391 } 392 if (strlen(event) == 0) { 393 pr_info("Event name is not specified\n"); 394 return -EINVAL; 395 } 396 } 397 if (!group) 398 group = UPROBE_EVENT_SYSTEM; 399 400 if (is_delete) { 401 int ret; 402 403 if (!event) { 404 pr_info("Delete command needs an event name.\n"); 405 return -EINVAL; 406 } 407 mutex_lock(&uprobe_lock); 408 tu = find_probe_event(event, group); 409 410 if (!tu) { 411 mutex_unlock(&uprobe_lock); 412 pr_info("Event %s/%s doesn't exist.\n", group, event); 413 return -ENOENT; 414 } 415 /* delete an event */ 416 ret = unregister_trace_uprobe(tu); 417 mutex_unlock(&uprobe_lock); 418 return ret; 419 } 420 421 if (argc < 2) { 422 pr_info("Probe point is not specified.\n"); 423 return -EINVAL; 424 } 425 /* Find the last occurrence, in case the path contains ':' too. */ 426 arg = strrchr(argv[1], ':'); 427 if (!arg) 428 return -EINVAL; 429 430 *arg++ = '\0'; 431 filename = argv[1]; 432 ret = kern_path(filename, LOOKUP_FOLLOW, &path); 433 if (ret) 434 return ret; 435 436 if (!d_is_reg(path.dentry)) { 437 ret = -EINVAL; 438 goto fail_address_parse; 439 } 440 441 ret = kstrtoul(arg, 0, &offset); 442 if (ret) 443 goto fail_address_parse; 444 445 argc -= 2; 446 argv += 2; 447 448 /* setup a probe */ 449 if (!event) { 450 char *tail; 451 char *ptr; 452 453 tail = kstrdup(kbasename(filename), GFP_KERNEL); 454 if (!tail) { 455 ret = -ENOMEM; 456 goto fail_address_parse; 457 } 458 459 ptr = strpbrk(tail, ".-_"); 460 if (ptr) 461 *ptr = '\0'; 462 463 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_0x%lx", 'p', tail, offset); 464 event = buf; 465 kfree(tail); 466 } 467 468 tu = alloc_trace_uprobe(group, event, argc, is_return); 469 if (IS_ERR(tu)) { 470 pr_info("Failed to allocate trace_uprobe.(%d)\n", (int)PTR_ERR(tu)); 471 ret = PTR_ERR(tu); 472 goto fail_address_parse; 473 } 474 tu->offset = offset; 475 tu->path = path; 476 tu->filename = kstrdup(filename, GFP_KERNEL); 477 478 if (!tu->filename) { 479 pr_info("Failed to allocate filename.\n"); 480 ret = -ENOMEM; 481 goto error; 482 } 483 484 /* parse arguments */ 485 ret = 0; 486 for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { 487 struct probe_arg *parg = &tu->tp.args[i]; 488 489 /* Increment count for freeing args in error case */ 490 tu->tp.nr_args++; 491 492 /* Parse argument name */ 493 arg = strchr(argv[i], '='); 494 if (arg) { 495 *arg++ = '\0'; 496 parg->name = kstrdup(argv[i], GFP_KERNEL); 497 } else { 498 arg = argv[i]; 499 /* If argument name is omitted, set "argN" */ 500 snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1); 501 parg->name = kstrdup(buf, GFP_KERNEL); 502 } 503 504 if (!parg->name) { 505 pr_info("Failed to allocate argument[%d] name.\n", i); 506 ret = -ENOMEM; 507 goto error; 508 } 509 510 if (!is_good_name(parg->name)) { 511 pr_info("Invalid argument[%d] name: %s\n", i, parg->name); 512 ret = -EINVAL; 513 goto error; 514 } 515 516 if (traceprobe_conflict_field_name(parg->name, tu->tp.args, i)) { 517 pr_info("Argument[%d] name '%s' conflicts with " 518 "another field.\n", i, argv[i]); 519 ret = -EINVAL; 520 goto error; 521 } 522 523 /* Parse fetch argument */ 524 ret = traceprobe_parse_probe_arg(arg, &tu->tp.size, parg, 525 is_return, false, 526 uprobes_fetch_type_table); 527 if (ret) { 528 pr_info("Parse error at argument[%d]. (%d)\n", i, ret); 529 goto error; 530 } 531 } 532 533 ret = register_trace_uprobe(tu); 534 if (ret) 535 goto error; 536 return 0; 537 538 error: 539 free_trace_uprobe(tu); 540 return ret; 541 542 fail_address_parse: 543 path_put(&path); 544 545 pr_info("Failed to parse address or file.\n"); 546 547 return ret; 548 } 549 550 static int cleanup_all_probes(void) 551 { 552 struct trace_uprobe *tu; 553 int ret = 0; 554 555 mutex_lock(&uprobe_lock); 556 while (!list_empty(&uprobe_list)) { 557 tu = list_entry(uprobe_list.next, struct trace_uprobe, list); 558 ret = unregister_trace_uprobe(tu); 559 if (ret) 560 break; 561 } 562 mutex_unlock(&uprobe_lock); 563 return ret; 564 } 565 566 /* Probes listing interfaces */ 567 static void *probes_seq_start(struct seq_file *m, loff_t *pos) 568 { 569 mutex_lock(&uprobe_lock); 570 return seq_list_start(&uprobe_list, *pos); 571 } 572 573 static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos) 574 { 575 return seq_list_next(v, &uprobe_list, pos); 576 } 577 578 static void probes_seq_stop(struct seq_file *m, void *v) 579 { 580 mutex_unlock(&uprobe_lock); 581 } 582 583 static int probes_seq_show(struct seq_file *m, void *v) 584 { 585 struct trace_uprobe *tu = v; 586 char c = is_ret_probe(tu) ? 'r' : 'p'; 587 int i; 588 589 seq_printf(m, "%c:%s/%s %s:0x%0*lx", c, tu->tp.call.class->system, 590 trace_event_name(&tu->tp.call), tu->filename, 591 (int)(sizeof(void *) * 2), tu->offset); 592 593 for (i = 0; i < tu->tp.nr_args; i++) 594 seq_printf(m, " %s=%s", tu->tp.args[i].name, tu->tp.args[i].comm); 595 596 seq_putc(m, '\n'); 597 return 0; 598 } 599 600 static const struct seq_operations probes_seq_op = { 601 .start = probes_seq_start, 602 .next = probes_seq_next, 603 .stop = probes_seq_stop, 604 .show = probes_seq_show 605 }; 606 607 static int probes_open(struct inode *inode, struct file *file) 608 { 609 int ret; 610 611 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { 612 ret = cleanup_all_probes(); 613 if (ret) 614 return ret; 615 } 616 617 return seq_open(file, &probes_seq_op); 618 } 619 620 static ssize_t probes_write(struct file *file, const char __user *buffer, 621 size_t count, loff_t *ppos) 622 { 623 return trace_parse_run_command(file, buffer, count, ppos, create_trace_uprobe); 624 } 625 626 static const struct file_operations uprobe_events_ops = { 627 .owner = THIS_MODULE, 628 .open = probes_open, 629 .read = seq_read, 630 .llseek = seq_lseek, 631 .release = seq_release, 632 .write = probes_write, 633 }; 634 635 /* Probes profiling interfaces */ 636 static int probes_profile_seq_show(struct seq_file *m, void *v) 637 { 638 struct trace_uprobe *tu = v; 639 640 seq_printf(m, " %s %-44s %15lu\n", tu->filename, 641 trace_event_name(&tu->tp.call), tu->nhit); 642 return 0; 643 } 644 645 static const struct seq_operations profile_seq_op = { 646 .start = probes_seq_start, 647 .next = probes_seq_next, 648 .stop = probes_seq_stop, 649 .show = probes_profile_seq_show 650 }; 651 652 static int profile_open(struct inode *inode, struct file *file) 653 { 654 return seq_open(file, &profile_seq_op); 655 } 656 657 static const struct file_operations uprobe_profile_ops = { 658 .owner = THIS_MODULE, 659 .open = profile_open, 660 .read = seq_read, 661 .llseek = seq_lseek, 662 .release = seq_release, 663 }; 664 665 struct uprobe_cpu_buffer { 666 struct mutex mutex; 667 void *buf; 668 }; 669 static struct uprobe_cpu_buffer __percpu *uprobe_cpu_buffer; 670 static int uprobe_buffer_refcnt; 671 672 static int uprobe_buffer_init(void) 673 { 674 int cpu, err_cpu; 675 676 uprobe_cpu_buffer = alloc_percpu(struct uprobe_cpu_buffer); 677 if (uprobe_cpu_buffer == NULL) 678 return -ENOMEM; 679 680 for_each_possible_cpu(cpu) { 681 struct page *p = alloc_pages_node(cpu_to_node(cpu), 682 GFP_KERNEL, 0); 683 if (p == NULL) { 684 err_cpu = cpu; 685 goto err; 686 } 687 per_cpu_ptr(uprobe_cpu_buffer, cpu)->buf = page_address(p); 688 mutex_init(&per_cpu_ptr(uprobe_cpu_buffer, cpu)->mutex); 689 } 690 691 return 0; 692 693 err: 694 for_each_possible_cpu(cpu) { 695 if (cpu == err_cpu) 696 break; 697 free_page((unsigned long)per_cpu_ptr(uprobe_cpu_buffer, cpu)->buf); 698 } 699 700 free_percpu(uprobe_cpu_buffer); 701 return -ENOMEM; 702 } 703 704 static int uprobe_buffer_enable(void) 705 { 706 int ret = 0; 707 708 BUG_ON(!mutex_is_locked(&event_mutex)); 709 710 if (uprobe_buffer_refcnt++ == 0) { 711 ret = uprobe_buffer_init(); 712 if (ret < 0) 713 uprobe_buffer_refcnt--; 714 } 715 716 return ret; 717 } 718 719 static void uprobe_buffer_disable(void) 720 { 721 int cpu; 722 723 BUG_ON(!mutex_is_locked(&event_mutex)); 724 725 if (--uprobe_buffer_refcnt == 0) { 726 for_each_possible_cpu(cpu) 727 free_page((unsigned long)per_cpu_ptr(uprobe_cpu_buffer, 728 cpu)->buf); 729 730 free_percpu(uprobe_cpu_buffer); 731 uprobe_cpu_buffer = NULL; 732 } 733 } 734 735 static struct uprobe_cpu_buffer *uprobe_buffer_get(void) 736 { 737 struct uprobe_cpu_buffer *ucb; 738 int cpu; 739 740 cpu = raw_smp_processor_id(); 741 ucb = per_cpu_ptr(uprobe_cpu_buffer, cpu); 742 743 /* 744 * Use per-cpu buffers for fastest access, but we might migrate 745 * so the mutex makes sure we have sole access to it. 746 */ 747 mutex_lock(&ucb->mutex); 748 749 return ucb; 750 } 751 752 static void uprobe_buffer_put(struct uprobe_cpu_buffer *ucb) 753 { 754 mutex_unlock(&ucb->mutex); 755 } 756 757 static void __uprobe_trace_func(struct trace_uprobe *tu, 758 unsigned long func, struct pt_regs *regs, 759 struct uprobe_cpu_buffer *ucb, int dsize, 760 struct trace_event_file *trace_file) 761 { 762 struct uprobe_trace_entry_head *entry; 763 struct ring_buffer_event *event; 764 struct ring_buffer *buffer; 765 void *data; 766 int size, esize; 767 struct trace_event_call *call = &tu->tp.call; 768 769 WARN_ON(call != trace_file->event_call); 770 771 if (WARN_ON_ONCE(tu->tp.size + dsize > PAGE_SIZE)) 772 return; 773 774 if (trace_trigger_soft_disabled(trace_file)) 775 return; 776 777 esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); 778 size = esize + tu->tp.size + dsize; 779 event = trace_event_buffer_lock_reserve(&buffer, trace_file, 780 call->event.type, size, 0, 0); 781 if (!event) 782 return; 783 784 entry = ring_buffer_event_data(event); 785 if (is_ret_probe(tu)) { 786 entry->vaddr[0] = func; 787 entry->vaddr[1] = instruction_pointer(regs); 788 data = DATAOF_TRACE_ENTRY(entry, true); 789 } else { 790 entry->vaddr[0] = instruction_pointer(regs); 791 data = DATAOF_TRACE_ENTRY(entry, false); 792 } 793 794 memcpy(data, ucb->buf, tu->tp.size + dsize); 795 796 event_trigger_unlock_commit(trace_file, buffer, event, entry, 0, 0); 797 } 798 799 /* uprobe handler */ 800 static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs, 801 struct uprobe_cpu_buffer *ucb, int dsize) 802 { 803 struct event_file_link *link; 804 805 if (is_ret_probe(tu)) 806 return 0; 807 808 rcu_read_lock(); 809 list_for_each_entry_rcu(link, &tu->tp.files, list) 810 __uprobe_trace_func(tu, 0, regs, ucb, dsize, link->file); 811 rcu_read_unlock(); 812 813 return 0; 814 } 815 816 static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func, 817 struct pt_regs *regs, 818 struct uprobe_cpu_buffer *ucb, int dsize) 819 { 820 struct event_file_link *link; 821 822 rcu_read_lock(); 823 list_for_each_entry_rcu(link, &tu->tp.files, list) 824 __uprobe_trace_func(tu, func, regs, ucb, dsize, link->file); 825 rcu_read_unlock(); 826 } 827 828 /* Event entry printers */ 829 static enum print_line_t 830 print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *event) 831 { 832 struct uprobe_trace_entry_head *entry; 833 struct trace_seq *s = &iter->seq; 834 struct trace_uprobe *tu; 835 u8 *data; 836 int i; 837 838 entry = (struct uprobe_trace_entry_head *)iter->ent; 839 tu = container_of(event, struct trace_uprobe, tp.call.event); 840 841 if (is_ret_probe(tu)) { 842 trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)", 843 trace_event_name(&tu->tp.call), 844 entry->vaddr[1], entry->vaddr[0]); 845 data = DATAOF_TRACE_ENTRY(entry, true); 846 } else { 847 trace_seq_printf(s, "%s: (0x%lx)", 848 trace_event_name(&tu->tp.call), 849 entry->vaddr[0]); 850 data = DATAOF_TRACE_ENTRY(entry, false); 851 } 852 853 for (i = 0; i < tu->tp.nr_args; i++) { 854 struct probe_arg *parg = &tu->tp.args[i]; 855 856 if (!parg->type->print(s, parg->name, data + parg->offset, entry)) 857 goto out; 858 } 859 860 trace_seq_putc(s, '\n'); 861 862 out: 863 return trace_handle_return(s); 864 } 865 866 typedef bool (*filter_func_t)(struct uprobe_consumer *self, 867 enum uprobe_filter_ctx ctx, 868 struct mm_struct *mm); 869 870 static int 871 probe_event_enable(struct trace_uprobe *tu, struct trace_event_file *file, 872 filter_func_t filter) 873 { 874 bool enabled = trace_probe_is_enabled(&tu->tp); 875 struct event_file_link *link = NULL; 876 int ret; 877 878 if (file) { 879 if (tu->tp.flags & TP_FLAG_PROFILE) 880 return -EINTR; 881 882 link = kmalloc(sizeof(*link), GFP_KERNEL); 883 if (!link) 884 return -ENOMEM; 885 886 link->file = file; 887 list_add_tail_rcu(&link->list, &tu->tp.files); 888 889 tu->tp.flags |= TP_FLAG_TRACE; 890 } else { 891 if (tu->tp.flags & TP_FLAG_TRACE) 892 return -EINTR; 893 894 tu->tp.flags |= TP_FLAG_PROFILE; 895 } 896 897 WARN_ON(!uprobe_filter_is_empty(&tu->filter)); 898 899 if (enabled) 900 return 0; 901 902 ret = uprobe_buffer_enable(); 903 if (ret) 904 goto err_flags; 905 906 tu->consumer.filter = filter; 907 tu->inode = d_real_inode(tu->path.dentry); 908 ret = uprobe_register(tu->inode, tu->offset, &tu->consumer); 909 if (ret) 910 goto err_buffer; 911 912 return 0; 913 914 err_buffer: 915 uprobe_buffer_disable(); 916 917 err_flags: 918 if (file) { 919 list_del(&link->list); 920 kfree(link); 921 tu->tp.flags &= ~TP_FLAG_TRACE; 922 } else { 923 tu->tp.flags &= ~TP_FLAG_PROFILE; 924 } 925 return ret; 926 } 927 928 static void 929 probe_event_disable(struct trace_uprobe *tu, struct trace_event_file *file) 930 { 931 if (!trace_probe_is_enabled(&tu->tp)) 932 return; 933 934 if (file) { 935 struct event_file_link *link; 936 937 link = find_event_file_link(&tu->tp, file); 938 if (!link) 939 return; 940 941 list_del_rcu(&link->list); 942 /* synchronize with u{,ret}probe_trace_func */ 943 synchronize_rcu(); 944 kfree(link); 945 946 if (!list_empty(&tu->tp.files)) 947 return; 948 } 949 950 WARN_ON(!uprobe_filter_is_empty(&tu->filter)); 951 952 uprobe_unregister(tu->inode, tu->offset, &tu->consumer); 953 tu->inode = NULL; 954 tu->tp.flags &= file ? ~TP_FLAG_TRACE : ~TP_FLAG_PROFILE; 955 956 uprobe_buffer_disable(); 957 } 958 959 static int uprobe_event_define_fields(struct trace_event_call *event_call) 960 { 961 int ret, i, size; 962 struct uprobe_trace_entry_head field; 963 struct trace_uprobe *tu = event_call->data; 964 965 if (is_ret_probe(tu)) { 966 DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_FUNC, 0); 967 DEFINE_FIELD(unsigned long, vaddr[1], FIELD_STRING_RETIP, 0); 968 size = SIZEOF_TRACE_ENTRY(true); 969 } else { 970 DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_IP, 0); 971 size = SIZEOF_TRACE_ENTRY(false); 972 } 973 /* Set argument names as fields */ 974 for (i = 0; i < tu->tp.nr_args; i++) { 975 struct probe_arg *parg = &tu->tp.args[i]; 976 977 ret = trace_define_field(event_call, parg->type->fmttype, 978 parg->name, size + parg->offset, 979 parg->type->size, parg->type->is_signed, 980 FILTER_OTHER); 981 982 if (ret) 983 return ret; 984 } 985 return 0; 986 } 987 988 #ifdef CONFIG_PERF_EVENTS 989 static bool 990 __uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm) 991 { 992 struct perf_event *event; 993 994 if (filter->nr_systemwide) 995 return true; 996 997 list_for_each_entry(event, &filter->perf_events, hw.tp_list) { 998 if (event->hw.target->mm == mm) 999 return true; 1000 } 1001 1002 return false; 1003 } 1004 1005 static inline bool 1006 uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event) 1007 { 1008 return __uprobe_perf_filter(&tu->filter, event->hw.target->mm); 1009 } 1010 1011 static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event) 1012 { 1013 bool done; 1014 1015 write_lock(&tu->filter.rwlock); 1016 if (event->hw.target) { 1017 list_del(&event->hw.tp_list); 1018 done = tu->filter.nr_systemwide || 1019 (event->hw.target->flags & PF_EXITING) || 1020 uprobe_filter_event(tu, event); 1021 } else { 1022 tu->filter.nr_systemwide--; 1023 done = tu->filter.nr_systemwide; 1024 } 1025 write_unlock(&tu->filter.rwlock); 1026 1027 if (!done) 1028 return uprobe_apply(tu->inode, tu->offset, &tu->consumer, false); 1029 1030 return 0; 1031 } 1032 1033 static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event) 1034 { 1035 bool done; 1036 int err; 1037 1038 write_lock(&tu->filter.rwlock); 1039 if (event->hw.target) { 1040 /* 1041 * event->parent != NULL means copy_process(), we can avoid 1042 * uprobe_apply(). current->mm must be probed and we can rely 1043 * on dup_mmap() which preserves the already installed bp's. 1044 * 1045 * attr.enable_on_exec means that exec/mmap will install the 1046 * breakpoints we need. 1047 */ 1048 done = tu->filter.nr_systemwide || 1049 event->parent || event->attr.enable_on_exec || 1050 uprobe_filter_event(tu, event); 1051 list_add(&event->hw.tp_list, &tu->filter.perf_events); 1052 } else { 1053 done = tu->filter.nr_systemwide; 1054 tu->filter.nr_systemwide++; 1055 } 1056 write_unlock(&tu->filter.rwlock); 1057 1058 err = 0; 1059 if (!done) { 1060 err = uprobe_apply(tu->inode, tu->offset, &tu->consumer, true); 1061 if (err) 1062 uprobe_perf_close(tu, event); 1063 } 1064 return err; 1065 } 1066 1067 static bool uprobe_perf_filter(struct uprobe_consumer *uc, 1068 enum uprobe_filter_ctx ctx, struct mm_struct *mm) 1069 { 1070 struct trace_uprobe *tu; 1071 int ret; 1072 1073 tu = container_of(uc, struct trace_uprobe, consumer); 1074 read_lock(&tu->filter.rwlock); 1075 ret = __uprobe_perf_filter(&tu->filter, mm); 1076 read_unlock(&tu->filter.rwlock); 1077 1078 return ret; 1079 } 1080 1081 static void __uprobe_perf_func(struct trace_uprobe *tu, 1082 unsigned long func, struct pt_regs *regs, 1083 struct uprobe_cpu_buffer *ucb, int dsize) 1084 { 1085 struct trace_event_call *call = &tu->tp.call; 1086 struct uprobe_trace_entry_head *entry; 1087 struct hlist_head *head; 1088 void *data; 1089 int size, esize; 1090 int rctx; 1091 1092 if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs)) 1093 return; 1094 1095 esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); 1096 1097 size = esize + tu->tp.size + dsize; 1098 size = ALIGN(size + sizeof(u32), sizeof(u64)) - sizeof(u32); 1099 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) 1100 return; 1101 1102 preempt_disable(); 1103 head = this_cpu_ptr(call->perf_events); 1104 if (hlist_empty(head)) 1105 goto out; 1106 1107 entry = perf_trace_buf_alloc(size, NULL, &rctx); 1108 if (!entry) 1109 goto out; 1110 1111 if (is_ret_probe(tu)) { 1112 entry->vaddr[0] = func; 1113 entry->vaddr[1] = instruction_pointer(regs); 1114 data = DATAOF_TRACE_ENTRY(entry, true); 1115 } else { 1116 entry->vaddr[0] = instruction_pointer(regs); 1117 data = DATAOF_TRACE_ENTRY(entry, false); 1118 } 1119 1120 memcpy(data, ucb->buf, tu->tp.size + dsize); 1121 1122 if (size - esize > tu->tp.size + dsize) { 1123 int len = tu->tp.size + dsize; 1124 1125 memset(data + len, 0, size - esize - len); 1126 } 1127 1128 perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, 1129 head, NULL); 1130 out: 1131 preempt_enable(); 1132 } 1133 1134 /* uprobe profile handler */ 1135 static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs, 1136 struct uprobe_cpu_buffer *ucb, int dsize) 1137 { 1138 if (!uprobe_perf_filter(&tu->consumer, 0, current->mm)) 1139 return UPROBE_HANDLER_REMOVE; 1140 1141 if (!is_ret_probe(tu)) 1142 __uprobe_perf_func(tu, 0, regs, ucb, dsize); 1143 return 0; 1144 } 1145 1146 static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func, 1147 struct pt_regs *regs, 1148 struct uprobe_cpu_buffer *ucb, int dsize) 1149 { 1150 __uprobe_perf_func(tu, func, regs, ucb, dsize); 1151 } 1152 1153 int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type, 1154 const char **filename, u64 *probe_offset, 1155 bool perf_type_tracepoint) 1156 { 1157 const char *pevent = trace_event_name(event->tp_event); 1158 const char *group = event->tp_event->class->system; 1159 struct trace_uprobe *tu; 1160 1161 if (perf_type_tracepoint) 1162 tu = find_probe_event(pevent, group); 1163 else 1164 tu = event->tp_event->data; 1165 if (!tu) 1166 return -EINVAL; 1167 1168 *fd_type = is_ret_probe(tu) ? BPF_FD_TYPE_URETPROBE 1169 : BPF_FD_TYPE_UPROBE; 1170 *filename = tu->filename; 1171 *probe_offset = tu->offset; 1172 return 0; 1173 } 1174 #endif /* CONFIG_PERF_EVENTS */ 1175 1176 static int 1177 trace_uprobe_register(struct trace_event_call *event, enum trace_reg type, 1178 void *data) 1179 { 1180 struct trace_uprobe *tu = event->data; 1181 struct trace_event_file *file = data; 1182 1183 switch (type) { 1184 case TRACE_REG_REGISTER: 1185 return probe_event_enable(tu, file, NULL); 1186 1187 case TRACE_REG_UNREGISTER: 1188 probe_event_disable(tu, file); 1189 return 0; 1190 1191 #ifdef CONFIG_PERF_EVENTS 1192 case TRACE_REG_PERF_REGISTER: 1193 return probe_event_enable(tu, NULL, uprobe_perf_filter); 1194 1195 case TRACE_REG_PERF_UNREGISTER: 1196 probe_event_disable(tu, NULL); 1197 return 0; 1198 1199 case TRACE_REG_PERF_OPEN: 1200 return uprobe_perf_open(tu, data); 1201 1202 case TRACE_REG_PERF_CLOSE: 1203 return uprobe_perf_close(tu, data); 1204 1205 #endif 1206 default: 1207 return 0; 1208 } 1209 return 0; 1210 } 1211 1212 static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) 1213 { 1214 struct trace_uprobe *tu; 1215 struct uprobe_dispatch_data udd; 1216 struct uprobe_cpu_buffer *ucb; 1217 int dsize, esize; 1218 int ret = 0; 1219 1220 1221 tu = container_of(con, struct trace_uprobe, consumer); 1222 tu->nhit++; 1223 1224 udd.tu = tu; 1225 udd.bp_addr = instruction_pointer(regs); 1226 1227 current->utask->vaddr = (unsigned long) &udd; 1228 1229 if (WARN_ON_ONCE(!uprobe_cpu_buffer)) 1230 return 0; 1231 1232 dsize = __get_data_size(&tu->tp, regs); 1233 esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); 1234 1235 ucb = uprobe_buffer_get(); 1236 store_trace_args(esize, &tu->tp, regs, ucb->buf, dsize); 1237 1238 if (tu->tp.flags & TP_FLAG_TRACE) 1239 ret |= uprobe_trace_func(tu, regs, ucb, dsize); 1240 1241 #ifdef CONFIG_PERF_EVENTS 1242 if (tu->tp.flags & TP_FLAG_PROFILE) 1243 ret |= uprobe_perf_func(tu, regs, ucb, dsize); 1244 #endif 1245 uprobe_buffer_put(ucb); 1246 return ret; 1247 } 1248 1249 static int uretprobe_dispatcher(struct uprobe_consumer *con, 1250 unsigned long func, struct pt_regs *regs) 1251 { 1252 struct trace_uprobe *tu; 1253 struct uprobe_dispatch_data udd; 1254 struct uprobe_cpu_buffer *ucb; 1255 int dsize, esize; 1256 1257 tu = container_of(con, struct trace_uprobe, consumer); 1258 1259 udd.tu = tu; 1260 udd.bp_addr = func; 1261 1262 current->utask->vaddr = (unsigned long) &udd; 1263 1264 if (WARN_ON_ONCE(!uprobe_cpu_buffer)) 1265 return 0; 1266 1267 dsize = __get_data_size(&tu->tp, regs); 1268 esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); 1269 1270 ucb = uprobe_buffer_get(); 1271 store_trace_args(esize, &tu->tp, regs, ucb->buf, dsize); 1272 1273 if (tu->tp.flags & TP_FLAG_TRACE) 1274 uretprobe_trace_func(tu, func, regs, ucb, dsize); 1275 1276 #ifdef CONFIG_PERF_EVENTS 1277 if (tu->tp.flags & TP_FLAG_PROFILE) 1278 uretprobe_perf_func(tu, func, regs, ucb, dsize); 1279 #endif 1280 uprobe_buffer_put(ucb); 1281 return 0; 1282 } 1283 1284 static struct trace_event_functions uprobe_funcs = { 1285 .trace = print_uprobe_event 1286 }; 1287 1288 static inline void init_trace_event_call(struct trace_uprobe *tu, 1289 struct trace_event_call *call) 1290 { 1291 INIT_LIST_HEAD(&call->class->fields); 1292 call->event.funcs = &uprobe_funcs; 1293 call->class->define_fields = uprobe_event_define_fields; 1294 1295 call->flags = TRACE_EVENT_FL_UPROBE; 1296 call->class->reg = trace_uprobe_register; 1297 call->data = tu; 1298 } 1299 1300 static int register_uprobe_event(struct trace_uprobe *tu) 1301 { 1302 struct trace_event_call *call = &tu->tp.call; 1303 int ret = 0; 1304 1305 init_trace_event_call(tu, call); 1306 1307 if (set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0) 1308 return -ENOMEM; 1309 1310 ret = register_trace_event(&call->event); 1311 if (!ret) { 1312 kfree(call->print_fmt); 1313 return -ENODEV; 1314 } 1315 1316 ret = trace_add_event_call(call); 1317 1318 if (ret) { 1319 pr_info("Failed to register uprobe event: %s\n", 1320 trace_event_name(call)); 1321 kfree(call->print_fmt); 1322 unregister_trace_event(&call->event); 1323 } 1324 1325 return ret; 1326 } 1327 1328 static int unregister_uprobe_event(struct trace_uprobe *tu) 1329 { 1330 int ret; 1331 1332 /* tu->event is unregistered in trace_remove_event_call() */ 1333 ret = trace_remove_event_call(&tu->tp.call); 1334 if (ret) 1335 return ret; 1336 kfree(tu->tp.call.print_fmt); 1337 tu->tp.call.print_fmt = NULL; 1338 return 0; 1339 } 1340 1341 #ifdef CONFIG_PERF_EVENTS 1342 struct trace_event_call * 1343 create_local_trace_uprobe(char *name, unsigned long offs, bool is_return) 1344 { 1345 struct trace_uprobe *tu; 1346 struct path path; 1347 int ret; 1348 1349 ret = kern_path(name, LOOKUP_FOLLOW, &path); 1350 if (ret) 1351 return ERR_PTR(ret); 1352 1353 if (!d_is_reg(path.dentry)) { 1354 path_put(&path); 1355 return ERR_PTR(-EINVAL); 1356 } 1357 1358 /* 1359 * local trace_kprobes are not added to probe_list, so they are never 1360 * searched in find_trace_kprobe(). Therefore, there is no concern of 1361 * duplicated name "DUMMY_EVENT" here. 1362 */ 1363 tu = alloc_trace_uprobe(UPROBE_EVENT_SYSTEM, "DUMMY_EVENT", 0, 1364 is_return); 1365 1366 if (IS_ERR(tu)) { 1367 pr_info("Failed to allocate trace_uprobe.(%d)\n", 1368 (int)PTR_ERR(tu)); 1369 path_put(&path); 1370 return ERR_CAST(tu); 1371 } 1372 1373 tu->offset = offs; 1374 tu->path = path; 1375 tu->filename = kstrdup(name, GFP_KERNEL); 1376 init_trace_event_call(tu, &tu->tp.call); 1377 1378 if (set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0) { 1379 ret = -ENOMEM; 1380 goto error; 1381 } 1382 1383 return &tu->tp.call; 1384 error: 1385 free_trace_uprobe(tu); 1386 return ERR_PTR(ret); 1387 } 1388 1389 void destroy_local_trace_uprobe(struct trace_event_call *event_call) 1390 { 1391 struct trace_uprobe *tu; 1392 1393 tu = container_of(event_call, struct trace_uprobe, tp.call); 1394 1395 kfree(tu->tp.call.print_fmt); 1396 tu->tp.call.print_fmt = NULL; 1397 1398 free_trace_uprobe(tu); 1399 } 1400 #endif /* CONFIG_PERF_EVENTS */ 1401 1402 /* Make a trace interface for controling probe points */ 1403 static __init int init_uprobe_trace(void) 1404 { 1405 struct dentry *d_tracer; 1406 1407 d_tracer = tracing_init_dentry(); 1408 if (IS_ERR(d_tracer)) 1409 return 0; 1410 1411 trace_create_file("uprobe_events", 0644, d_tracer, 1412 NULL, &uprobe_events_ops); 1413 /* Profile interface */ 1414 trace_create_file("uprobe_profile", 0444, d_tracer, 1415 NULL, &uprobe_profile_ops); 1416 return 0; 1417 } 1418 1419 fs_initcall(init_uprobe_trace); 1420