1 /* 2 * Kprobes-based tracing events 3 * 4 * Created by Masami Hiramatsu <mhiramat@redhat.com> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 */ 19 20 #include <linux/module.h> 21 #include <linux/uaccess.h> 22 23 #include "trace_probe.h" 24 25 #define KPROBE_EVENT_SYSTEM "kprobes" 26 27 /** 28 * Kprobe event core functions 29 */ 30 struct trace_probe { 31 struct list_head list; 32 struct kretprobe rp; /* Use rp.kp for kprobe use */ 33 unsigned long nhit; 34 unsigned int flags; /* For TP_FLAG_* */ 35 const char *symbol; /* symbol name */ 36 struct ftrace_event_class class; 37 struct ftrace_event_call call; 38 struct list_head files; 39 ssize_t size; /* trace entry size */ 40 unsigned int nr_args; 41 struct probe_arg args[]; 42 }; 43 44 struct event_file_link { 45 struct ftrace_event_file *file; 46 struct list_head list; 47 }; 48 49 #define SIZEOF_TRACE_PROBE(n) \ 50 (offsetof(struct trace_probe, args) + \ 51 (sizeof(struct probe_arg) * (n))) 52 53 54 static __kprobes bool trace_probe_is_return(struct trace_probe *tp) 55 { 56 return tp->rp.handler != NULL; 57 } 58 59 static __kprobes const char *trace_probe_symbol(struct trace_probe *tp) 60 { 61 return tp->symbol ? tp->symbol : "unknown"; 62 } 63 64 static __kprobes unsigned long trace_probe_offset(struct trace_probe *tp) 65 { 66 return tp->rp.kp.offset; 67 } 68 69 static __kprobes bool trace_probe_is_enabled(struct trace_probe *tp) 70 { 71 return !!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE)); 72 } 73 74 static __kprobes bool trace_probe_is_registered(struct trace_probe *tp) 75 { 76 return !!(tp->flags & TP_FLAG_REGISTERED); 77 } 78 79 static __kprobes bool trace_probe_has_gone(struct trace_probe *tp) 80 { 81 return !!(kprobe_gone(&tp->rp.kp)); 82 } 83 84 static __kprobes bool trace_probe_within_module(struct trace_probe *tp, 85 struct module *mod) 86 { 87 int len = strlen(mod->name); 88 const char *name = trace_probe_symbol(tp); 89 return strncmp(mod->name, name, len) == 0 && name[len] == ':'; 90 } 91 92 static __kprobes bool trace_probe_is_on_module(struct trace_probe *tp) 93 { 94 return !!strchr(trace_probe_symbol(tp), ':'); 95 } 96 97 static int register_probe_event(struct trace_probe *tp); 98 static void unregister_probe_event(struct trace_probe *tp); 99 100 static DEFINE_MUTEX(probe_lock); 101 static LIST_HEAD(probe_list); 102 103 static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs); 104 static int kretprobe_dispatcher(struct kretprobe_instance *ri, 105 struct pt_regs *regs); 106 107 /* 108 * Allocate new trace_probe and initialize it (including kprobes). 109 */ 110 static struct trace_probe *alloc_trace_probe(const char *group, 111 const char *event, 112 void *addr, 113 const char *symbol, 114 unsigned long offs, 115 int nargs, bool is_return) 116 { 117 struct trace_probe *tp; 118 int ret = -ENOMEM; 119 120 tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL); 121 if (!tp) 122 return ERR_PTR(ret); 123 124 if (symbol) { 125 tp->symbol = kstrdup(symbol, GFP_KERNEL); 126 if (!tp->symbol) 127 goto error; 128 tp->rp.kp.symbol_name = tp->symbol; 129 tp->rp.kp.offset = offs; 130 } else 131 tp->rp.kp.addr = addr; 132 133 if (is_return) 134 tp->rp.handler = kretprobe_dispatcher; 135 else 136 tp->rp.kp.pre_handler = kprobe_dispatcher; 137 138 if (!event || !is_good_name(event)) { 139 ret = -EINVAL; 140 goto error; 141 } 142 143 tp->call.class = &tp->class; 144 tp->call.name = kstrdup(event, GFP_KERNEL); 145 if (!tp->call.name) 146 goto error; 147 148 if (!group || !is_good_name(group)) { 149 ret = -EINVAL; 150 goto error; 151 } 152 153 tp->class.system = kstrdup(group, GFP_KERNEL); 154 if (!tp->class.system) 155 goto error; 156 157 INIT_LIST_HEAD(&tp->list); 158 INIT_LIST_HEAD(&tp->files); 159 return tp; 160 error: 161 kfree(tp->call.name); 162 kfree(tp->symbol); 163 kfree(tp); 164 return ERR_PTR(ret); 165 } 166 167 static void free_trace_probe(struct trace_probe *tp) 168 { 169 int i; 170 171 for (i = 0; i < tp->nr_args; i++) 172 traceprobe_free_probe_arg(&tp->args[i]); 173 174 kfree(tp->call.class->system); 175 kfree(tp->call.name); 176 kfree(tp->symbol); 177 kfree(tp); 178 } 179 180 static struct trace_probe *find_trace_probe(const char *event, 181 const char *group) 182 { 183 struct trace_probe *tp; 184 185 list_for_each_entry(tp, &probe_list, list) 186 if (strcmp(tp->call.name, event) == 0 && 187 strcmp(tp->call.class->system, group) == 0) 188 return tp; 189 return NULL; 190 } 191 192 /* 193 * Enable trace_probe 194 * if the file is NULL, enable "perf" handler, or enable "trace" handler. 195 */ 196 static int 197 enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) 198 { 199 int ret = 0; 200 201 if (file) { 202 struct event_file_link *link; 203 204 link = kmalloc(sizeof(*link), GFP_KERNEL); 205 if (!link) { 206 ret = -ENOMEM; 207 goto out; 208 } 209 210 link->file = file; 211 list_add_tail_rcu(&link->list, &tp->files); 212 213 tp->flags |= TP_FLAG_TRACE; 214 } else 215 tp->flags |= TP_FLAG_PROFILE; 216 217 if (trace_probe_is_registered(tp) && !trace_probe_has_gone(tp)) { 218 if (trace_probe_is_return(tp)) 219 ret = enable_kretprobe(&tp->rp); 220 else 221 ret = enable_kprobe(&tp->rp.kp); 222 } 223 out: 224 return ret; 225 } 226 227 static struct event_file_link * 228 find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file) 229 { 230 struct event_file_link *link; 231 232 list_for_each_entry(link, &tp->files, list) 233 if (link->file == file) 234 return link; 235 236 return NULL; 237 } 238 239 /* 240 * Disable trace_probe 241 * if the file is NULL, disable "perf" handler, or disable "trace" handler. 242 */ 243 static int 244 disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) 245 { 246 struct event_file_link *link = NULL; 247 int wait = 0; 248 int ret = 0; 249 250 if (file) { 251 link = find_event_file_link(tp, file); 252 if (!link) { 253 ret = -EINVAL; 254 goto out; 255 } 256 257 list_del_rcu(&link->list); 258 wait = 1; 259 if (!list_empty(&tp->files)) 260 goto out; 261 262 tp->flags &= ~TP_FLAG_TRACE; 263 } else 264 tp->flags &= ~TP_FLAG_PROFILE; 265 266 if (!trace_probe_is_enabled(tp) && trace_probe_is_registered(tp)) { 267 if (trace_probe_is_return(tp)) 268 disable_kretprobe(&tp->rp); 269 else 270 disable_kprobe(&tp->rp.kp); 271 wait = 1; 272 } 273 out: 274 if (wait) { 275 /* 276 * Synchronize with kprobe_trace_func/kretprobe_trace_func 277 * to ensure disabled (all running handlers are finished). 278 * This is not only for kfree(), but also the caller, 279 * trace_remove_event_call() supposes it for releasing 280 * event_call related objects, which will be accessed in 281 * the kprobe_trace_func/kretprobe_trace_func. 282 */ 283 synchronize_sched(); 284 kfree(link); /* Ignored if link == NULL */ 285 } 286 287 return ret; 288 } 289 290 /* Internal register function - just handle k*probes and flags */ 291 static int __register_trace_probe(struct trace_probe *tp) 292 { 293 int i, ret; 294 295 if (trace_probe_is_registered(tp)) 296 return -EINVAL; 297 298 for (i = 0; i < tp->nr_args; i++) 299 traceprobe_update_arg(&tp->args[i]); 300 301 /* Set/clear disabled flag according to tp->flag */ 302 if (trace_probe_is_enabled(tp)) 303 tp->rp.kp.flags &= ~KPROBE_FLAG_DISABLED; 304 else 305 tp->rp.kp.flags |= KPROBE_FLAG_DISABLED; 306 307 if (trace_probe_is_return(tp)) 308 ret = register_kretprobe(&tp->rp); 309 else 310 ret = register_kprobe(&tp->rp.kp); 311 312 if (ret == 0) 313 tp->flags |= TP_FLAG_REGISTERED; 314 else { 315 pr_warning("Could not insert probe at %s+%lu: %d\n", 316 trace_probe_symbol(tp), trace_probe_offset(tp), ret); 317 if (ret == -ENOENT && trace_probe_is_on_module(tp)) { 318 pr_warning("This probe might be able to register after" 319 "target module is loaded. Continue.\n"); 320 ret = 0; 321 } else if (ret == -EILSEQ) { 322 pr_warning("Probing address(0x%p) is not an " 323 "instruction boundary.\n", 324 tp->rp.kp.addr); 325 ret = -EINVAL; 326 } 327 } 328 329 return ret; 330 } 331 332 /* Internal unregister function - just handle k*probes and flags */ 333 static void __unregister_trace_probe(struct trace_probe *tp) 334 { 335 if (trace_probe_is_registered(tp)) { 336 if (trace_probe_is_return(tp)) 337 unregister_kretprobe(&tp->rp); 338 else 339 unregister_kprobe(&tp->rp.kp); 340 tp->flags &= ~TP_FLAG_REGISTERED; 341 /* Cleanup kprobe for reuse */ 342 if (tp->rp.kp.symbol_name) 343 tp->rp.kp.addr = NULL; 344 } 345 } 346 347 /* Unregister a trace_probe and probe_event: call with locking probe_lock */ 348 static int unregister_trace_probe(struct trace_probe *tp) 349 { 350 /* Enabled event can not be unregistered */ 351 if (trace_probe_is_enabled(tp)) 352 return -EBUSY; 353 354 __unregister_trace_probe(tp); 355 list_del(&tp->list); 356 unregister_probe_event(tp); 357 358 return 0; 359 } 360 361 /* Register a trace_probe and probe_event */ 362 static int register_trace_probe(struct trace_probe *tp) 363 { 364 struct trace_probe *old_tp; 365 int ret; 366 367 mutex_lock(&probe_lock); 368 369 /* Delete old (same name) event if exist */ 370 old_tp = find_trace_probe(tp->call.name, tp->call.class->system); 371 if (old_tp) { 372 ret = unregister_trace_probe(old_tp); 373 if (ret < 0) 374 goto end; 375 free_trace_probe(old_tp); 376 } 377 378 /* Register new event */ 379 ret = register_probe_event(tp); 380 if (ret) { 381 pr_warning("Failed to register probe event(%d)\n", ret); 382 goto end; 383 } 384 385 /* Register k*probe */ 386 ret = __register_trace_probe(tp); 387 if (ret < 0) 388 unregister_probe_event(tp); 389 else 390 list_add_tail(&tp->list, &probe_list); 391 392 end: 393 mutex_unlock(&probe_lock); 394 return ret; 395 } 396 397 /* Module notifier call back, checking event on the module */ 398 static int trace_probe_module_callback(struct notifier_block *nb, 399 unsigned long val, void *data) 400 { 401 struct module *mod = data; 402 struct trace_probe *tp; 403 int ret; 404 405 if (val != MODULE_STATE_COMING) 406 return NOTIFY_DONE; 407 408 /* Update probes on coming module */ 409 mutex_lock(&probe_lock); 410 list_for_each_entry(tp, &probe_list, list) { 411 if (trace_probe_within_module(tp, mod)) { 412 /* Don't need to check busy - this should have gone. */ 413 __unregister_trace_probe(tp); 414 ret = __register_trace_probe(tp); 415 if (ret) 416 pr_warning("Failed to re-register probe %s on" 417 "%s: %d\n", 418 tp->call.name, mod->name, ret); 419 } 420 } 421 mutex_unlock(&probe_lock); 422 423 return NOTIFY_DONE; 424 } 425 426 static struct notifier_block trace_probe_module_nb = { 427 .notifier_call = trace_probe_module_callback, 428 .priority = 1 /* Invoked after kprobe module callback */ 429 }; 430 431 static int create_trace_probe(int argc, char **argv) 432 { 433 /* 434 * Argument syntax: 435 * - Add kprobe: p[:[GRP/]EVENT] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS] 436 * - Add kretprobe: r[:[GRP/]EVENT] [MOD:]KSYM[+0] [FETCHARGS] 437 * Fetch args: 438 * $retval : fetch return value 439 * $stack : fetch stack address 440 * $stackN : fetch Nth of stack (N:0-) 441 * @ADDR : fetch memory at ADDR (ADDR should be in kernel) 442 * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol) 443 * %REG : fetch register REG 444 * Dereferencing memory fetch: 445 * +|-offs(ARG) : fetch memory at ARG +|- offs address. 446 * Alias name of args: 447 * NAME=FETCHARG : set NAME as alias of FETCHARG. 448 * Type of args: 449 * FETCHARG:TYPE : use TYPE instead of unsigned long. 450 */ 451 struct trace_probe *tp; 452 int i, ret = 0; 453 bool is_return = false, is_delete = false; 454 char *symbol = NULL, *event = NULL, *group = NULL; 455 char *arg; 456 unsigned long offset = 0; 457 void *addr = NULL; 458 char buf[MAX_EVENT_NAME_LEN]; 459 460 /* argc must be >= 1 */ 461 if (argv[0][0] == 'p') 462 is_return = false; 463 else if (argv[0][0] == 'r') 464 is_return = true; 465 else if (argv[0][0] == '-') 466 is_delete = true; 467 else { 468 pr_info("Probe definition must be started with 'p', 'r' or" 469 " '-'.\n"); 470 return -EINVAL; 471 } 472 473 if (argv[0][1] == ':') { 474 event = &argv[0][2]; 475 if (strchr(event, '/')) { 476 group = event; 477 event = strchr(group, '/') + 1; 478 event[-1] = '\0'; 479 if (strlen(group) == 0) { 480 pr_info("Group name is not specified\n"); 481 return -EINVAL; 482 } 483 } 484 if (strlen(event) == 0) { 485 pr_info("Event name is not specified\n"); 486 return -EINVAL; 487 } 488 } 489 if (!group) 490 group = KPROBE_EVENT_SYSTEM; 491 492 if (is_delete) { 493 if (!event) { 494 pr_info("Delete command needs an event name.\n"); 495 return -EINVAL; 496 } 497 mutex_lock(&probe_lock); 498 tp = find_trace_probe(event, group); 499 if (!tp) { 500 mutex_unlock(&probe_lock); 501 pr_info("Event %s/%s doesn't exist.\n", group, event); 502 return -ENOENT; 503 } 504 /* delete an event */ 505 ret = unregister_trace_probe(tp); 506 if (ret == 0) 507 free_trace_probe(tp); 508 mutex_unlock(&probe_lock); 509 return ret; 510 } 511 512 if (argc < 2) { 513 pr_info("Probe point is not specified.\n"); 514 return -EINVAL; 515 } 516 if (isdigit(argv[1][0])) { 517 if (is_return) { 518 pr_info("Return probe point must be a symbol.\n"); 519 return -EINVAL; 520 } 521 /* an address specified */ 522 ret = kstrtoul(&argv[1][0], 0, (unsigned long *)&addr); 523 if (ret) { 524 pr_info("Failed to parse address.\n"); 525 return ret; 526 } 527 } else { 528 /* a symbol specified */ 529 symbol = argv[1]; 530 /* TODO: support .init module functions */ 531 ret = traceprobe_split_symbol_offset(symbol, &offset); 532 if (ret) { 533 pr_info("Failed to parse symbol.\n"); 534 return ret; 535 } 536 if (offset && is_return) { 537 pr_info("Return probe must be used without offset.\n"); 538 return -EINVAL; 539 } 540 } 541 argc -= 2; argv += 2; 542 543 /* setup a probe */ 544 if (!event) { 545 /* Make a new event name */ 546 if (symbol) 547 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld", 548 is_return ? 'r' : 'p', symbol, offset); 549 else 550 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p", 551 is_return ? 'r' : 'p', addr); 552 event = buf; 553 } 554 tp = alloc_trace_probe(group, event, addr, symbol, offset, argc, 555 is_return); 556 if (IS_ERR(tp)) { 557 pr_info("Failed to allocate trace_probe.(%d)\n", 558 (int)PTR_ERR(tp)); 559 return PTR_ERR(tp); 560 } 561 562 /* parse arguments */ 563 ret = 0; 564 for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { 565 /* Increment count for freeing args in error case */ 566 tp->nr_args++; 567 568 /* Parse argument name */ 569 arg = strchr(argv[i], '='); 570 if (arg) { 571 *arg++ = '\0'; 572 tp->args[i].name = kstrdup(argv[i], GFP_KERNEL); 573 } else { 574 arg = argv[i]; 575 /* If argument name is omitted, set "argN" */ 576 snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1); 577 tp->args[i].name = kstrdup(buf, GFP_KERNEL); 578 } 579 580 if (!tp->args[i].name) { 581 pr_info("Failed to allocate argument[%d] name.\n", i); 582 ret = -ENOMEM; 583 goto error; 584 } 585 586 if (!is_good_name(tp->args[i].name)) { 587 pr_info("Invalid argument[%d] name: %s\n", 588 i, tp->args[i].name); 589 ret = -EINVAL; 590 goto error; 591 } 592 593 if (traceprobe_conflict_field_name(tp->args[i].name, 594 tp->args, i)) { 595 pr_info("Argument[%d] name '%s' conflicts with " 596 "another field.\n", i, argv[i]); 597 ret = -EINVAL; 598 goto error; 599 } 600 601 /* Parse fetch argument */ 602 ret = traceprobe_parse_probe_arg(arg, &tp->size, &tp->args[i], 603 is_return, true); 604 if (ret) { 605 pr_info("Parse error at argument[%d]. (%d)\n", i, ret); 606 goto error; 607 } 608 } 609 610 ret = register_trace_probe(tp); 611 if (ret) 612 goto error; 613 return 0; 614 615 error: 616 free_trace_probe(tp); 617 return ret; 618 } 619 620 static int release_all_trace_probes(void) 621 { 622 struct trace_probe *tp; 623 int ret = 0; 624 625 mutex_lock(&probe_lock); 626 /* Ensure no probe is in use. */ 627 list_for_each_entry(tp, &probe_list, list) 628 if (trace_probe_is_enabled(tp)) { 629 ret = -EBUSY; 630 goto end; 631 } 632 /* TODO: Use batch unregistration */ 633 while (!list_empty(&probe_list)) { 634 tp = list_entry(probe_list.next, struct trace_probe, list); 635 unregister_trace_probe(tp); 636 free_trace_probe(tp); 637 } 638 639 end: 640 mutex_unlock(&probe_lock); 641 642 return ret; 643 } 644 645 /* Probes listing interfaces */ 646 static void *probes_seq_start(struct seq_file *m, loff_t *pos) 647 { 648 mutex_lock(&probe_lock); 649 return seq_list_start(&probe_list, *pos); 650 } 651 652 static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos) 653 { 654 return seq_list_next(v, &probe_list, pos); 655 } 656 657 static void probes_seq_stop(struct seq_file *m, void *v) 658 { 659 mutex_unlock(&probe_lock); 660 } 661 662 static int probes_seq_show(struct seq_file *m, void *v) 663 { 664 struct trace_probe *tp = v; 665 int i; 666 667 seq_printf(m, "%c", trace_probe_is_return(tp) ? 'r' : 'p'); 668 seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name); 669 670 if (!tp->symbol) 671 seq_printf(m, " 0x%p", tp->rp.kp.addr); 672 else if (tp->rp.kp.offset) 673 seq_printf(m, " %s+%u", trace_probe_symbol(tp), 674 tp->rp.kp.offset); 675 else 676 seq_printf(m, " %s", trace_probe_symbol(tp)); 677 678 for (i = 0; i < tp->nr_args; i++) 679 seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm); 680 seq_printf(m, "\n"); 681 682 return 0; 683 } 684 685 static const struct seq_operations probes_seq_op = { 686 .start = probes_seq_start, 687 .next = probes_seq_next, 688 .stop = probes_seq_stop, 689 .show = probes_seq_show 690 }; 691 692 static int probes_open(struct inode *inode, struct file *file) 693 { 694 int ret; 695 696 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { 697 ret = release_all_trace_probes(); 698 if (ret < 0) 699 return ret; 700 } 701 702 return seq_open(file, &probes_seq_op); 703 } 704 705 static ssize_t probes_write(struct file *file, const char __user *buffer, 706 size_t count, loff_t *ppos) 707 { 708 return traceprobe_probes_write(file, buffer, count, ppos, 709 create_trace_probe); 710 } 711 712 static const struct file_operations kprobe_events_ops = { 713 .owner = THIS_MODULE, 714 .open = probes_open, 715 .read = seq_read, 716 .llseek = seq_lseek, 717 .release = seq_release, 718 .write = probes_write, 719 }; 720 721 /* Probes profiling interfaces */ 722 static int probes_profile_seq_show(struct seq_file *m, void *v) 723 { 724 struct trace_probe *tp = v; 725 726 seq_printf(m, " %-44s %15lu %15lu\n", tp->call.name, tp->nhit, 727 tp->rp.kp.nmissed); 728 729 return 0; 730 } 731 732 static const struct seq_operations profile_seq_op = { 733 .start = probes_seq_start, 734 .next = probes_seq_next, 735 .stop = probes_seq_stop, 736 .show = probes_profile_seq_show 737 }; 738 739 static int profile_open(struct inode *inode, struct file *file) 740 { 741 return seq_open(file, &profile_seq_op); 742 } 743 744 static const struct file_operations kprobe_profile_ops = { 745 .owner = THIS_MODULE, 746 .open = profile_open, 747 .read = seq_read, 748 .llseek = seq_lseek, 749 .release = seq_release, 750 }; 751 752 /* Sum up total data length for dynamic arraies (strings) */ 753 static __kprobes int __get_data_size(struct trace_probe *tp, 754 struct pt_regs *regs) 755 { 756 int i, ret = 0; 757 u32 len; 758 759 for (i = 0; i < tp->nr_args; i++) 760 if (unlikely(tp->args[i].fetch_size.fn)) { 761 call_fetch(&tp->args[i].fetch_size, regs, &len); 762 ret += len; 763 } 764 765 return ret; 766 } 767 768 /* Store the value of each argument */ 769 static __kprobes void store_trace_args(int ent_size, struct trace_probe *tp, 770 struct pt_regs *regs, 771 u8 *data, int maxlen) 772 { 773 int i; 774 u32 end = tp->size; 775 u32 *dl; /* Data (relative) location */ 776 777 for (i = 0; i < tp->nr_args; i++) { 778 if (unlikely(tp->args[i].fetch_size.fn)) { 779 /* 780 * First, we set the relative location and 781 * maximum data length to *dl 782 */ 783 dl = (u32 *)(data + tp->args[i].offset); 784 *dl = make_data_rloc(maxlen, end - tp->args[i].offset); 785 /* Then try to fetch string or dynamic array data */ 786 call_fetch(&tp->args[i].fetch, regs, dl); 787 /* Reduce maximum length */ 788 end += get_rloc_len(*dl); 789 maxlen -= get_rloc_len(*dl); 790 /* Trick here, convert data_rloc to data_loc */ 791 *dl = convert_rloc_to_loc(*dl, 792 ent_size + tp->args[i].offset); 793 } else 794 /* Just fetching data normally */ 795 call_fetch(&tp->args[i].fetch, regs, 796 data + tp->args[i].offset); 797 } 798 } 799 800 /* Kprobe handler */ 801 static __kprobes void 802 __kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs, 803 struct ftrace_event_file *ftrace_file) 804 { 805 struct kprobe_trace_entry_head *entry; 806 struct ring_buffer_event *event; 807 struct ring_buffer *buffer; 808 int size, dsize, pc; 809 unsigned long irq_flags; 810 struct ftrace_event_call *call = &tp->call; 811 812 WARN_ON(call != ftrace_file->event_call); 813 814 if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags)) 815 return; 816 817 local_save_flags(irq_flags); 818 pc = preempt_count(); 819 820 dsize = __get_data_size(tp, regs); 821 size = sizeof(*entry) + tp->size + dsize; 822 823 event = trace_event_buffer_lock_reserve(&buffer, ftrace_file, 824 call->event.type, 825 size, irq_flags, pc); 826 if (!event) 827 return; 828 829 entry = ring_buffer_event_data(event); 830 entry->ip = (unsigned long)tp->rp.kp.addr; 831 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 832 833 if (!filter_current_check_discard(buffer, call, entry, event)) 834 trace_buffer_unlock_commit_regs(buffer, event, 835 irq_flags, pc, regs); 836 } 837 838 static __kprobes void 839 kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs) 840 { 841 struct event_file_link *link; 842 843 list_for_each_entry_rcu(link, &tp->files, list) 844 __kprobe_trace_func(tp, regs, link->file); 845 } 846 847 /* Kretprobe handler */ 848 static __kprobes void 849 __kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri, 850 struct pt_regs *regs, 851 struct ftrace_event_file *ftrace_file) 852 { 853 struct kretprobe_trace_entry_head *entry; 854 struct ring_buffer_event *event; 855 struct ring_buffer *buffer; 856 int size, pc, dsize; 857 unsigned long irq_flags; 858 struct ftrace_event_call *call = &tp->call; 859 860 WARN_ON(call != ftrace_file->event_call); 861 862 if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags)) 863 return; 864 865 local_save_flags(irq_flags); 866 pc = preempt_count(); 867 868 dsize = __get_data_size(tp, regs); 869 size = sizeof(*entry) + tp->size + dsize; 870 871 event = trace_event_buffer_lock_reserve(&buffer, ftrace_file, 872 call->event.type, 873 size, irq_flags, pc); 874 if (!event) 875 return; 876 877 entry = ring_buffer_event_data(event); 878 entry->func = (unsigned long)tp->rp.kp.addr; 879 entry->ret_ip = (unsigned long)ri->ret_addr; 880 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 881 882 if (!filter_current_check_discard(buffer, call, entry, event)) 883 trace_buffer_unlock_commit_regs(buffer, event, 884 irq_flags, pc, regs); 885 } 886 887 static __kprobes void 888 kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri, 889 struct pt_regs *regs) 890 { 891 struct event_file_link *link; 892 893 list_for_each_entry_rcu(link, &tp->files, list) 894 __kretprobe_trace_func(tp, ri, regs, link->file); 895 } 896 897 /* Event entry printers */ 898 static enum print_line_t 899 print_kprobe_event(struct trace_iterator *iter, int flags, 900 struct trace_event *event) 901 { 902 struct kprobe_trace_entry_head *field; 903 struct trace_seq *s = &iter->seq; 904 struct trace_probe *tp; 905 u8 *data; 906 int i; 907 908 field = (struct kprobe_trace_entry_head *)iter->ent; 909 tp = container_of(event, struct trace_probe, call.event); 910 911 if (!trace_seq_printf(s, "%s: (", tp->call.name)) 912 goto partial; 913 914 if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET)) 915 goto partial; 916 917 if (!trace_seq_puts(s, ")")) 918 goto partial; 919 920 data = (u8 *)&field[1]; 921 for (i = 0; i < tp->nr_args; i++) 922 if (!tp->args[i].type->print(s, tp->args[i].name, 923 data + tp->args[i].offset, field)) 924 goto partial; 925 926 if (!trace_seq_puts(s, "\n")) 927 goto partial; 928 929 return TRACE_TYPE_HANDLED; 930 partial: 931 return TRACE_TYPE_PARTIAL_LINE; 932 } 933 934 static enum print_line_t 935 print_kretprobe_event(struct trace_iterator *iter, int flags, 936 struct trace_event *event) 937 { 938 struct kretprobe_trace_entry_head *field; 939 struct trace_seq *s = &iter->seq; 940 struct trace_probe *tp; 941 u8 *data; 942 int i; 943 944 field = (struct kretprobe_trace_entry_head *)iter->ent; 945 tp = container_of(event, struct trace_probe, call.event); 946 947 if (!trace_seq_printf(s, "%s: (", tp->call.name)) 948 goto partial; 949 950 if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET)) 951 goto partial; 952 953 if (!trace_seq_puts(s, " <- ")) 954 goto partial; 955 956 if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET)) 957 goto partial; 958 959 if (!trace_seq_puts(s, ")")) 960 goto partial; 961 962 data = (u8 *)&field[1]; 963 for (i = 0; i < tp->nr_args; i++) 964 if (!tp->args[i].type->print(s, tp->args[i].name, 965 data + tp->args[i].offset, field)) 966 goto partial; 967 968 if (!trace_seq_puts(s, "\n")) 969 goto partial; 970 971 return TRACE_TYPE_HANDLED; 972 partial: 973 return TRACE_TYPE_PARTIAL_LINE; 974 } 975 976 977 static int kprobe_event_define_fields(struct ftrace_event_call *event_call) 978 { 979 int ret, i; 980 struct kprobe_trace_entry_head field; 981 struct trace_probe *tp = (struct trace_probe *)event_call->data; 982 983 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); 984 /* Set argument names as fields */ 985 for (i = 0; i < tp->nr_args; i++) { 986 ret = trace_define_field(event_call, tp->args[i].type->fmttype, 987 tp->args[i].name, 988 sizeof(field) + tp->args[i].offset, 989 tp->args[i].type->size, 990 tp->args[i].type->is_signed, 991 FILTER_OTHER); 992 if (ret) 993 return ret; 994 } 995 return 0; 996 } 997 998 static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) 999 { 1000 int ret, i; 1001 struct kretprobe_trace_entry_head field; 1002 struct trace_probe *tp = (struct trace_probe *)event_call->data; 1003 1004 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0); 1005 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0); 1006 /* Set argument names as fields */ 1007 for (i = 0; i < tp->nr_args; i++) { 1008 ret = trace_define_field(event_call, tp->args[i].type->fmttype, 1009 tp->args[i].name, 1010 sizeof(field) + tp->args[i].offset, 1011 tp->args[i].type->size, 1012 tp->args[i].type->is_signed, 1013 FILTER_OTHER); 1014 if (ret) 1015 return ret; 1016 } 1017 return 0; 1018 } 1019 1020 static int __set_print_fmt(struct trace_probe *tp, char *buf, int len) 1021 { 1022 int i; 1023 int pos = 0; 1024 1025 const char *fmt, *arg; 1026 1027 if (!trace_probe_is_return(tp)) { 1028 fmt = "(%lx)"; 1029 arg = "REC->" FIELD_STRING_IP; 1030 } else { 1031 fmt = "(%lx <- %lx)"; 1032 arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP; 1033 } 1034 1035 /* When len=0, we just calculate the needed length */ 1036 #define LEN_OR_ZERO (len ? len - pos : 0) 1037 1038 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt); 1039 1040 for (i = 0; i < tp->nr_args; i++) { 1041 pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s", 1042 tp->args[i].name, tp->args[i].type->fmt); 1043 } 1044 1045 pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg); 1046 1047 for (i = 0; i < tp->nr_args; i++) { 1048 if (strcmp(tp->args[i].type->name, "string") == 0) 1049 pos += snprintf(buf + pos, LEN_OR_ZERO, 1050 ", __get_str(%s)", 1051 tp->args[i].name); 1052 else 1053 pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s", 1054 tp->args[i].name); 1055 } 1056 1057 #undef LEN_OR_ZERO 1058 1059 /* return the length of print_fmt */ 1060 return pos; 1061 } 1062 1063 static int set_print_fmt(struct trace_probe *tp) 1064 { 1065 int len; 1066 char *print_fmt; 1067 1068 /* First: called with 0 length to calculate the needed length */ 1069 len = __set_print_fmt(tp, NULL, 0); 1070 print_fmt = kmalloc(len + 1, GFP_KERNEL); 1071 if (!print_fmt) 1072 return -ENOMEM; 1073 1074 /* Second: actually write the @print_fmt */ 1075 __set_print_fmt(tp, print_fmt, len + 1); 1076 tp->call.print_fmt = print_fmt; 1077 1078 return 0; 1079 } 1080 1081 #ifdef CONFIG_PERF_EVENTS 1082 1083 /* Kprobe profile handler */ 1084 static __kprobes void 1085 kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs) 1086 { 1087 struct ftrace_event_call *call = &tp->call; 1088 struct kprobe_trace_entry_head *entry; 1089 struct hlist_head *head; 1090 int size, __size, dsize; 1091 int rctx; 1092 1093 head = this_cpu_ptr(call->perf_events); 1094 if (hlist_empty(head)) 1095 return; 1096 1097 dsize = __get_data_size(tp, regs); 1098 __size = sizeof(*entry) + tp->size + dsize; 1099 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1100 size -= sizeof(u32); 1101 1102 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); 1103 if (!entry) 1104 return; 1105 1106 entry->ip = (unsigned long)tp->rp.kp.addr; 1107 memset(&entry[1], 0, dsize); 1108 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 1109 perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); 1110 } 1111 1112 /* Kretprobe profile handler */ 1113 static __kprobes void 1114 kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri, 1115 struct pt_regs *regs) 1116 { 1117 struct ftrace_event_call *call = &tp->call; 1118 struct kretprobe_trace_entry_head *entry; 1119 struct hlist_head *head; 1120 int size, __size, dsize; 1121 int rctx; 1122 1123 head = this_cpu_ptr(call->perf_events); 1124 if (hlist_empty(head)) 1125 return; 1126 1127 dsize = __get_data_size(tp, regs); 1128 __size = sizeof(*entry) + tp->size + dsize; 1129 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1130 size -= sizeof(u32); 1131 1132 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); 1133 if (!entry) 1134 return; 1135 1136 entry->func = (unsigned long)tp->rp.kp.addr; 1137 entry->ret_ip = (unsigned long)ri->ret_addr; 1138 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 1139 perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); 1140 } 1141 #endif /* CONFIG_PERF_EVENTS */ 1142 1143 /* 1144 * called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex. 1145 * 1146 * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe 1147 * lockless, but we can't race with this __init function. 1148 */ 1149 static __kprobes 1150 int kprobe_register(struct ftrace_event_call *event, 1151 enum trace_reg type, void *data) 1152 { 1153 struct trace_probe *tp = (struct trace_probe *)event->data; 1154 struct ftrace_event_file *file = data; 1155 1156 switch (type) { 1157 case TRACE_REG_REGISTER: 1158 return enable_trace_probe(tp, file); 1159 case TRACE_REG_UNREGISTER: 1160 return disable_trace_probe(tp, file); 1161 1162 #ifdef CONFIG_PERF_EVENTS 1163 case TRACE_REG_PERF_REGISTER: 1164 return enable_trace_probe(tp, NULL); 1165 case TRACE_REG_PERF_UNREGISTER: 1166 return disable_trace_probe(tp, NULL); 1167 case TRACE_REG_PERF_OPEN: 1168 case TRACE_REG_PERF_CLOSE: 1169 case TRACE_REG_PERF_ADD: 1170 case TRACE_REG_PERF_DEL: 1171 return 0; 1172 #endif 1173 } 1174 return 0; 1175 } 1176 1177 static __kprobes 1178 int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) 1179 { 1180 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); 1181 1182 tp->nhit++; 1183 1184 if (tp->flags & TP_FLAG_TRACE) 1185 kprobe_trace_func(tp, regs); 1186 #ifdef CONFIG_PERF_EVENTS 1187 if (tp->flags & TP_FLAG_PROFILE) 1188 kprobe_perf_func(tp, regs); 1189 #endif 1190 return 0; /* We don't tweek kernel, so just return 0 */ 1191 } 1192 1193 static __kprobes 1194 int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) 1195 { 1196 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); 1197 1198 tp->nhit++; 1199 1200 if (tp->flags & TP_FLAG_TRACE) 1201 kretprobe_trace_func(tp, ri, regs); 1202 #ifdef CONFIG_PERF_EVENTS 1203 if (tp->flags & TP_FLAG_PROFILE) 1204 kretprobe_perf_func(tp, ri, regs); 1205 #endif 1206 return 0; /* We don't tweek kernel, so just return 0 */ 1207 } 1208 1209 static struct trace_event_functions kretprobe_funcs = { 1210 .trace = print_kretprobe_event 1211 }; 1212 1213 static struct trace_event_functions kprobe_funcs = { 1214 .trace = print_kprobe_event 1215 }; 1216 1217 static int register_probe_event(struct trace_probe *tp) 1218 { 1219 struct ftrace_event_call *call = &tp->call; 1220 int ret; 1221 1222 /* Initialize ftrace_event_call */ 1223 INIT_LIST_HEAD(&call->class->fields); 1224 if (trace_probe_is_return(tp)) { 1225 call->event.funcs = &kretprobe_funcs; 1226 call->class->define_fields = kretprobe_event_define_fields; 1227 } else { 1228 call->event.funcs = &kprobe_funcs; 1229 call->class->define_fields = kprobe_event_define_fields; 1230 } 1231 if (set_print_fmt(tp) < 0) 1232 return -ENOMEM; 1233 ret = register_ftrace_event(&call->event); 1234 if (!ret) { 1235 kfree(call->print_fmt); 1236 return -ENODEV; 1237 } 1238 call->flags = 0; 1239 call->class->reg = kprobe_register; 1240 call->data = tp; 1241 ret = trace_add_event_call(call); 1242 if (ret) { 1243 pr_info("Failed to register kprobe event: %s\n", call->name); 1244 kfree(call->print_fmt); 1245 unregister_ftrace_event(&call->event); 1246 } 1247 return ret; 1248 } 1249 1250 static void unregister_probe_event(struct trace_probe *tp) 1251 { 1252 /* tp->event is unregistered in trace_remove_event_call() */ 1253 trace_remove_event_call(&tp->call); 1254 kfree(tp->call.print_fmt); 1255 } 1256 1257 /* Make a debugfs interface for controlling probe points */ 1258 static __init int init_kprobe_trace(void) 1259 { 1260 struct dentry *d_tracer; 1261 struct dentry *entry; 1262 1263 if (register_module_notifier(&trace_probe_module_nb)) 1264 return -EINVAL; 1265 1266 d_tracer = tracing_init_dentry(); 1267 if (!d_tracer) 1268 return 0; 1269 1270 entry = debugfs_create_file("kprobe_events", 0644, d_tracer, 1271 NULL, &kprobe_events_ops); 1272 1273 /* Event list interface */ 1274 if (!entry) 1275 pr_warning("Could not create debugfs " 1276 "'kprobe_events' entry\n"); 1277 1278 /* Profile interface */ 1279 entry = debugfs_create_file("kprobe_profile", 0444, d_tracer, 1280 NULL, &kprobe_profile_ops); 1281 1282 if (!entry) 1283 pr_warning("Could not create debugfs " 1284 "'kprobe_profile' entry\n"); 1285 return 0; 1286 } 1287 fs_initcall(init_kprobe_trace); 1288 1289 1290 #ifdef CONFIG_FTRACE_STARTUP_TEST 1291 1292 /* 1293 * The "__used" keeps gcc from removing the function symbol 1294 * from the kallsyms table. 1295 */ 1296 static __used int kprobe_trace_selftest_target(int a1, int a2, int a3, 1297 int a4, int a5, int a6) 1298 { 1299 return a1 + a2 + a3 + a4 + a5 + a6; 1300 } 1301 1302 static struct ftrace_event_file * 1303 find_trace_probe_file(struct trace_probe *tp, struct trace_array *tr) 1304 { 1305 struct ftrace_event_file *file; 1306 1307 list_for_each_entry(file, &tr->events, list) 1308 if (file->event_call == &tp->call) 1309 return file; 1310 1311 return NULL; 1312 } 1313 1314 /* 1315 * Nobody but us can call enable_trace_probe/disable_trace_probe at this 1316 * stage, we can do this lockless. 1317 */ 1318 static __init int kprobe_trace_self_tests_init(void) 1319 { 1320 int ret, warn = 0; 1321 int (*target)(int, int, int, int, int, int); 1322 struct trace_probe *tp; 1323 struct ftrace_event_file *file; 1324 1325 target = kprobe_trace_selftest_target; 1326 1327 pr_info("Testing kprobe tracing: "); 1328 1329 ret = traceprobe_command("p:testprobe kprobe_trace_selftest_target " 1330 "$stack $stack0 +0($stack)", 1331 create_trace_probe); 1332 if (WARN_ON_ONCE(ret)) { 1333 pr_warn("error on probing function entry.\n"); 1334 warn++; 1335 } else { 1336 /* Enable trace point */ 1337 tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM); 1338 if (WARN_ON_ONCE(tp == NULL)) { 1339 pr_warn("error on getting new probe.\n"); 1340 warn++; 1341 } else { 1342 file = find_trace_probe_file(tp, top_trace_array()); 1343 if (WARN_ON_ONCE(file == NULL)) { 1344 pr_warn("error on getting probe file.\n"); 1345 warn++; 1346 } else 1347 enable_trace_probe(tp, file); 1348 } 1349 } 1350 1351 ret = traceprobe_command("r:testprobe2 kprobe_trace_selftest_target " 1352 "$retval", create_trace_probe); 1353 if (WARN_ON_ONCE(ret)) { 1354 pr_warn("error on probing function return.\n"); 1355 warn++; 1356 } else { 1357 /* Enable trace point */ 1358 tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM); 1359 if (WARN_ON_ONCE(tp == NULL)) { 1360 pr_warn("error on getting 2nd new probe.\n"); 1361 warn++; 1362 } else { 1363 file = find_trace_probe_file(tp, top_trace_array()); 1364 if (WARN_ON_ONCE(file == NULL)) { 1365 pr_warn("error on getting probe file.\n"); 1366 warn++; 1367 } else 1368 enable_trace_probe(tp, file); 1369 } 1370 } 1371 1372 if (warn) 1373 goto end; 1374 1375 ret = target(1, 2, 3, 4, 5, 6); 1376 1377 /* Disable trace points before removing it */ 1378 tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM); 1379 if (WARN_ON_ONCE(tp == NULL)) { 1380 pr_warn("error on getting test probe.\n"); 1381 warn++; 1382 } else { 1383 file = find_trace_probe_file(tp, top_trace_array()); 1384 if (WARN_ON_ONCE(file == NULL)) { 1385 pr_warn("error on getting probe file.\n"); 1386 warn++; 1387 } else 1388 disable_trace_probe(tp, file); 1389 } 1390 1391 tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM); 1392 if (WARN_ON_ONCE(tp == NULL)) { 1393 pr_warn("error on getting 2nd test probe.\n"); 1394 warn++; 1395 } else { 1396 file = find_trace_probe_file(tp, top_trace_array()); 1397 if (WARN_ON_ONCE(file == NULL)) { 1398 pr_warn("error on getting probe file.\n"); 1399 warn++; 1400 } else 1401 disable_trace_probe(tp, file); 1402 } 1403 1404 ret = traceprobe_command("-:testprobe", create_trace_probe); 1405 if (WARN_ON_ONCE(ret)) { 1406 pr_warn("error on deleting a probe.\n"); 1407 warn++; 1408 } 1409 1410 ret = traceprobe_command("-:testprobe2", create_trace_probe); 1411 if (WARN_ON_ONCE(ret)) { 1412 pr_warn("error on deleting a probe.\n"); 1413 warn++; 1414 } 1415 1416 end: 1417 release_all_trace_probes(); 1418 if (warn) 1419 pr_cont("NG: Some tests are failed. Please check them.\n"); 1420 else 1421 pr_cont("OK\n"); 1422 return 0; 1423 } 1424 1425 late_initcall(kprobe_trace_self_tests_init); 1426 1427 #endif 1428