1 /* 2 * Kprobes-based tracing events 3 * 4 * Created by Masami Hiramatsu <mhiramat@redhat.com> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 */ 19 20 #include <linux/module.h> 21 #include <linux/uaccess.h> 22 23 #include "trace_probe.h" 24 25 #define KPROBE_EVENT_SYSTEM "kprobes" 26 27 /** 28 * Kprobe event core functions 29 */ 30 struct trace_probe { 31 struct list_head list; 32 struct kretprobe rp; /* Use rp.kp for kprobe use */ 33 unsigned long nhit; 34 unsigned int flags; /* For TP_FLAG_* */ 35 const char *symbol; /* symbol name */ 36 struct ftrace_event_class class; 37 struct ftrace_event_call call; 38 struct list_head files; 39 ssize_t size; /* trace entry size */ 40 unsigned int nr_args; 41 struct probe_arg args[]; 42 }; 43 44 struct event_file_link { 45 struct ftrace_event_file *file; 46 struct list_head list; 47 }; 48 49 #define SIZEOF_TRACE_PROBE(n) \ 50 (offsetof(struct trace_probe, args) + \ 51 (sizeof(struct probe_arg) * (n))) 52 53 54 static __kprobes bool trace_probe_is_return(struct trace_probe *tp) 55 { 56 return tp->rp.handler != NULL; 57 } 58 59 static __kprobes const char *trace_probe_symbol(struct trace_probe *tp) 60 { 61 return tp->symbol ? tp->symbol : "unknown"; 62 } 63 64 static __kprobes unsigned long trace_probe_offset(struct trace_probe *tp) 65 { 66 return tp->rp.kp.offset; 67 } 68 69 static __kprobes bool trace_probe_is_enabled(struct trace_probe *tp) 70 { 71 return !!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE)); 72 } 73 74 static __kprobes bool trace_probe_is_registered(struct trace_probe *tp) 75 { 76 return !!(tp->flags & TP_FLAG_REGISTERED); 77 } 78 79 static __kprobes bool trace_probe_has_gone(struct trace_probe *tp) 80 { 81 return !!(kprobe_gone(&tp->rp.kp)); 82 } 83 84 static __kprobes bool trace_probe_within_module(struct trace_probe *tp, 85 struct module *mod) 86 { 87 int len = strlen(mod->name); 88 const char *name = trace_probe_symbol(tp); 89 return strncmp(mod->name, name, len) == 0 && name[len] == ':'; 90 } 91 92 static __kprobes bool trace_probe_is_on_module(struct trace_probe *tp) 93 { 94 return !!strchr(trace_probe_symbol(tp), ':'); 95 } 96 97 static int register_probe_event(struct trace_probe *tp); 98 static void unregister_probe_event(struct trace_probe *tp); 99 100 static DEFINE_MUTEX(probe_lock); 101 static LIST_HEAD(probe_list); 102 103 static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs); 104 static int kretprobe_dispatcher(struct kretprobe_instance *ri, 105 struct pt_regs *regs); 106 107 /* 108 * Allocate new trace_probe and initialize it (including kprobes). 109 */ 110 static struct trace_probe *alloc_trace_probe(const char *group, 111 const char *event, 112 void *addr, 113 const char *symbol, 114 unsigned long offs, 115 int nargs, bool is_return) 116 { 117 struct trace_probe *tp; 118 int ret = -ENOMEM; 119 120 tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL); 121 if (!tp) 122 return ERR_PTR(ret); 123 124 if (symbol) { 125 tp->symbol = kstrdup(symbol, GFP_KERNEL); 126 if (!tp->symbol) 127 goto error; 128 tp->rp.kp.symbol_name = tp->symbol; 129 tp->rp.kp.offset = offs; 130 } else 131 tp->rp.kp.addr = addr; 132 133 if (is_return) 134 tp->rp.handler = kretprobe_dispatcher; 135 else 136 tp->rp.kp.pre_handler = kprobe_dispatcher; 137 138 if (!event || !is_good_name(event)) { 139 ret = -EINVAL; 140 goto error; 141 } 142 143 tp->call.class = &tp->class; 144 tp->call.name = kstrdup(event, GFP_KERNEL); 145 if (!tp->call.name) 146 goto error; 147 148 if (!group || !is_good_name(group)) { 149 ret = -EINVAL; 150 goto error; 151 } 152 153 tp->class.system = kstrdup(group, GFP_KERNEL); 154 if (!tp->class.system) 155 goto error; 156 157 INIT_LIST_HEAD(&tp->list); 158 INIT_LIST_HEAD(&tp->files); 159 return tp; 160 error: 161 kfree(tp->call.name); 162 kfree(tp->symbol); 163 kfree(tp); 164 return ERR_PTR(ret); 165 } 166 167 static void free_trace_probe(struct trace_probe *tp) 168 { 169 int i; 170 171 for (i = 0; i < tp->nr_args; i++) 172 traceprobe_free_probe_arg(&tp->args[i]); 173 174 kfree(tp->call.class->system); 175 kfree(tp->call.name); 176 kfree(tp->symbol); 177 kfree(tp); 178 } 179 180 static struct trace_probe *find_trace_probe(const char *event, 181 const char *group) 182 { 183 struct trace_probe *tp; 184 185 list_for_each_entry(tp, &probe_list, list) 186 if (strcmp(tp->call.name, event) == 0 && 187 strcmp(tp->call.class->system, group) == 0) 188 return tp; 189 return NULL; 190 } 191 192 /* 193 * Enable trace_probe 194 * if the file is NULL, enable "perf" handler, or enable "trace" handler. 195 */ 196 static int 197 enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) 198 { 199 int ret = 0; 200 201 if (file) { 202 struct event_file_link *link; 203 204 link = kmalloc(sizeof(*link), GFP_KERNEL); 205 if (!link) { 206 ret = -ENOMEM; 207 goto out; 208 } 209 210 link->file = file; 211 list_add_tail_rcu(&link->list, &tp->files); 212 213 tp->flags |= TP_FLAG_TRACE; 214 } else 215 tp->flags |= TP_FLAG_PROFILE; 216 217 if (trace_probe_is_registered(tp) && !trace_probe_has_gone(tp)) { 218 if (trace_probe_is_return(tp)) 219 ret = enable_kretprobe(&tp->rp); 220 else 221 ret = enable_kprobe(&tp->rp.kp); 222 } 223 out: 224 return ret; 225 } 226 227 static struct event_file_link * 228 find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file) 229 { 230 struct event_file_link *link; 231 232 list_for_each_entry(link, &tp->files, list) 233 if (link->file == file) 234 return link; 235 236 return NULL; 237 } 238 239 /* 240 * Disable trace_probe 241 * if the file is NULL, disable "perf" handler, or disable "trace" handler. 242 */ 243 static int 244 disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) 245 { 246 int ret = 0; 247 248 if (file) { 249 struct event_file_link *link; 250 251 link = find_event_file_link(tp, file); 252 if (!link) { 253 ret = -EINVAL; 254 goto out; 255 } 256 257 list_del_rcu(&link->list); 258 /* synchronize with kprobe_trace_func/kretprobe_trace_func */ 259 synchronize_sched(); 260 kfree(link); 261 262 if (!list_empty(&tp->files)) 263 goto out; 264 265 tp->flags &= ~TP_FLAG_TRACE; 266 } else 267 tp->flags &= ~TP_FLAG_PROFILE; 268 269 if (!trace_probe_is_enabled(tp) && trace_probe_is_registered(tp)) { 270 if (trace_probe_is_return(tp)) 271 disable_kretprobe(&tp->rp); 272 else 273 disable_kprobe(&tp->rp.kp); 274 } 275 out: 276 return ret; 277 } 278 279 /* Internal register function - just handle k*probes and flags */ 280 static int __register_trace_probe(struct trace_probe *tp) 281 { 282 int i, ret; 283 284 if (trace_probe_is_registered(tp)) 285 return -EINVAL; 286 287 for (i = 0; i < tp->nr_args; i++) 288 traceprobe_update_arg(&tp->args[i]); 289 290 /* Set/clear disabled flag according to tp->flag */ 291 if (trace_probe_is_enabled(tp)) 292 tp->rp.kp.flags &= ~KPROBE_FLAG_DISABLED; 293 else 294 tp->rp.kp.flags |= KPROBE_FLAG_DISABLED; 295 296 if (trace_probe_is_return(tp)) 297 ret = register_kretprobe(&tp->rp); 298 else 299 ret = register_kprobe(&tp->rp.kp); 300 301 if (ret == 0) 302 tp->flags |= TP_FLAG_REGISTERED; 303 else { 304 pr_warning("Could not insert probe at %s+%lu: %d\n", 305 trace_probe_symbol(tp), trace_probe_offset(tp), ret); 306 if (ret == -ENOENT && trace_probe_is_on_module(tp)) { 307 pr_warning("This probe might be able to register after" 308 "target module is loaded. Continue.\n"); 309 ret = 0; 310 } else if (ret == -EILSEQ) { 311 pr_warning("Probing address(0x%p) is not an " 312 "instruction boundary.\n", 313 tp->rp.kp.addr); 314 ret = -EINVAL; 315 } 316 } 317 318 return ret; 319 } 320 321 /* Internal unregister function - just handle k*probes and flags */ 322 static void __unregister_trace_probe(struct trace_probe *tp) 323 { 324 if (trace_probe_is_registered(tp)) { 325 if (trace_probe_is_return(tp)) 326 unregister_kretprobe(&tp->rp); 327 else 328 unregister_kprobe(&tp->rp.kp); 329 tp->flags &= ~TP_FLAG_REGISTERED; 330 /* Cleanup kprobe for reuse */ 331 if (tp->rp.kp.symbol_name) 332 tp->rp.kp.addr = NULL; 333 } 334 } 335 336 /* Unregister a trace_probe and probe_event: call with locking probe_lock */ 337 static int unregister_trace_probe(struct trace_probe *tp) 338 { 339 /* Enabled event can not be unregistered */ 340 if (trace_probe_is_enabled(tp)) 341 return -EBUSY; 342 343 __unregister_trace_probe(tp); 344 list_del(&tp->list); 345 unregister_probe_event(tp); 346 347 return 0; 348 } 349 350 /* Register a trace_probe and probe_event */ 351 static int register_trace_probe(struct trace_probe *tp) 352 { 353 struct trace_probe *old_tp; 354 int ret; 355 356 mutex_lock(&probe_lock); 357 358 /* Delete old (same name) event if exist */ 359 old_tp = find_trace_probe(tp->call.name, tp->call.class->system); 360 if (old_tp) { 361 ret = unregister_trace_probe(old_tp); 362 if (ret < 0) 363 goto end; 364 free_trace_probe(old_tp); 365 } 366 367 /* Register new event */ 368 ret = register_probe_event(tp); 369 if (ret) { 370 pr_warning("Failed to register probe event(%d)\n", ret); 371 goto end; 372 } 373 374 /* Register k*probe */ 375 ret = __register_trace_probe(tp); 376 if (ret < 0) 377 unregister_probe_event(tp); 378 else 379 list_add_tail(&tp->list, &probe_list); 380 381 end: 382 mutex_unlock(&probe_lock); 383 return ret; 384 } 385 386 /* Module notifier call back, checking event on the module */ 387 static int trace_probe_module_callback(struct notifier_block *nb, 388 unsigned long val, void *data) 389 { 390 struct module *mod = data; 391 struct trace_probe *tp; 392 int ret; 393 394 if (val != MODULE_STATE_COMING) 395 return NOTIFY_DONE; 396 397 /* Update probes on coming module */ 398 mutex_lock(&probe_lock); 399 list_for_each_entry(tp, &probe_list, list) { 400 if (trace_probe_within_module(tp, mod)) { 401 /* Don't need to check busy - this should have gone. */ 402 __unregister_trace_probe(tp); 403 ret = __register_trace_probe(tp); 404 if (ret) 405 pr_warning("Failed to re-register probe %s on" 406 "%s: %d\n", 407 tp->call.name, mod->name, ret); 408 } 409 } 410 mutex_unlock(&probe_lock); 411 412 return NOTIFY_DONE; 413 } 414 415 static struct notifier_block trace_probe_module_nb = { 416 .notifier_call = trace_probe_module_callback, 417 .priority = 1 /* Invoked after kprobe module callback */ 418 }; 419 420 static int create_trace_probe(int argc, char **argv) 421 { 422 /* 423 * Argument syntax: 424 * - Add kprobe: p[:[GRP/]EVENT] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS] 425 * - Add kretprobe: r[:[GRP/]EVENT] [MOD:]KSYM[+0] [FETCHARGS] 426 * Fetch args: 427 * $retval : fetch return value 428 * $stack : fetch stack address 429 * $stackN : fetch Nth of stack (N:0-) 430 * @ADDR : fetch memory at ADDR (ADDR should be in kernel) 431 * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol) 432 * %REG : fetch register REG 433 * Dereferencing memory fetch: 434 * +|-offs(ARG) : fetch memory at ARG +|- offs address. 435 * Alias name of args: 436 * NAME=FETCHARG : set NAME as alias of FETCHARG. 437 * Type of args: 438 * FETCHARG:TYPE : use TYPE instead of unsigned long. 439 */ 440 struct trace_probe *tp; 441 int i, ret = 0; 442 bool is_return = false, is_delete = false; 443 char *symbol = NULL, *event = NULL, *group = NULL; 444 char *arg; 445 unsigned long offset = 0; 446 void *addr = NULL; 447 char buf[MAX_EVENT_NAME_LEN]; 448 449 /* argc must be >= 1 */ 450 if (argv[0][0] == 'p') 451 is_return = false; 452 else if (argv[0][0] == 'r') 453 is_return = true; 454 else if (argv[0][0] == '-') 455 is_delete = true; 456 else { 457 pr_info("Probe definition must be started with 'p', 'r' or" 458 " '-'.\n"); 459 return -EINVAL; 460 } 461 462 if (argv[0][1] == ':') { 463 event = &argv[0][2]; 464 if (strchr(event, '/')) { 465 group = event; 466 event = strchr(group, '/') + 1; 467 event[-1] = '\0'; 468 if (strlen(group) == 0) { 469 pr_info("Group name is not specified\n"); 470 return -EINVAL; 471 } 472 } 473 if (strlen(event) == 0) { 474 pr_info("Event name is not specified\n"); 475 return -EINVAL; 476 } 477 } 478 if (!group) 479 group = KPROBE_EVENT_SYSTEM; 480 481 if (is_delete) { 482 if (!event) { 483 pr_info("Delete command needs an event name.\n"); 484 return -EINVAL; 485 } 486 mutex_lock(&probe_lock); 487 tp = find_trace_probe(event, group); 488 if (!tp) { 489 mutex_unlock(&probe_lock); 490 pr_info("Event %s/%s doesn't exist.\n", group, event); 491 return -ENOENT; 492 } 493 /* delete an event */ 494 ret = unregister_trace_probe(tp); 495 if (ret == 0) 496 free_trace_probe(tp); 497 mutex_unlock(&probe_lock); 498 return ret; 499 } 500 501 if (argc < 2) { 502 pr_info("Probe point is not specified.\n"); 503 return -EINVAL; 504 } 505 if (isdigit(argv[1][0])) { 506 if (is_return) { 507 pr_info("Return probe point must be a symbol.\n"); 508 return -EINVAL; 509 } 510 /* an address specified */ 511 ret = kstrtoul(&argv[1][0], 0, (unsigned long *)&addr); 512 if (ret) { 513 pr_info("Failed to parse address.\n"); 514 return ret; 515 } 516 } else { 517 /* a symbol specified */ 518 symbol = argv[1]; 519 /* TODO: support .init module functions */ 520 ret = traceprobe_split_symbol_offset(symbol, &offset); 521 if (ret) { 522 pr_info("Failed to parse symbol.\n"); 523 return ret; 524 } 525 if (offset && is_return) { 526 pr_info("Return probe must be used without offset.\n"); 527 return -EINVAL; 528 } 529 } 530 argc -= 2; argv += 2; 531 532 /* setup a probe */ 533 if (!event) { 534 /* Make a new event name */ 535 if (symbol) 536 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld", 537 is_return ? 'r' : 'p', symbol, offset); 538 else 539 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p", 540 is_return ? 'r' : 'p', addr); 541 event = buf; 542 } 543 tp = alloc_trace_probe(group, event, addr, symbol, offset, argc, 544 is_return); 545 if (IS_ERR(tp)) { 546 pr_info("Failed to allocate trace_probe.(%d)\n", 547 (int)PTR_ERR(tp)); 548 return PTR_ERR(tp); 549 } 550 551 /* parse arguments */ 552 ret = 0; 553 for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { 554 /* Increment count for freeing args in error case */ 555 tp->nr_args++; 556 557 /* Parse argument name */ 558 arg = strchr(argv[i], '='); 559 if (arg) { 560 *arg++ = '\0'; 561 tp->args[i].name = kstrdup(argv[i], GFP_KERNEL); 562 } else { 563 arg = argv[i]; 564 /* If argument name is omitted, set "argN" */ 565 snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1); 566 tp->args[i].name = kstrdup(buf, GFP_KERNEL); 567 } 568 569 if (!tp->args[i].name) { 570 pr_info("Failed to allocate argument[%d] name.\n", i); 571 ret = -ENOMEM; 572 goto error; 573 } 574 575 if (!is_good_name(tp->args[i].name)) { 576 pr_info("Invalid argument[%d] name: %s\n", 577 i, tp->args[i].name); 578 ret = -EINVAL; 579 goto error; 580 } 581 582 if (traceprobe_conflict_field_name(tp->args[i].name, 583 tp->args, i)) { 584 pr_info("Argument[%d] name '%s' conflicts with " 585 "another field.\n", i, argv[i]); 586 ret = -EINVAL; 587 goto error; 588 } 589 590 /* Parse fetch argument */ 591 ret = traceprobe_parse_probe_arg(arg, &tp->size, &tp->args[i], 592 is_return, true); 593 if (ret) { 594 pr_info("Parse error at argument[%d]. (%d)\n", i, ret); 595 goto error; 596 } 597 } 598 599 ret = register_trace_probe(tp); 600 if (ret) 601 goto error; 602 return 0; 603 604 error: 605 free_trace_probe(tp); 606 return ret; 607 } 608 609 static int release_all_trace_probes(void) 610 { 611 struct trace_probe *tp; 612 int ret = 0; 613 614 mutex_lock(&probe_lock); 615 /* Ensure no probe is in use. */ 616 list_for_each_entry(tp, &probe_list, list) 617 if (trace_probe_is_enabled(tp)) { 618 ret = -EBUSY; 619 goto end; 620 } 621 /* TODO: Use batch unregistration */ 622 while (!list_empty(&probe_list)) { 623 tp = list_entry(probe_list.next, struct trace_probe, list); 624 unregister_trace_probe(tp); 625 free_trace_probe(tp); 626 } 627 628 end: 629 mutex_unlock(&probe_lock); 630 631 return ret; 632 } 633 634 /* Probes listing interfaces */ 635 static void *probes_seq_start(struct seq_file *m, loff_t *pos) 636 { 637 mutex_lock(&probe_lock); 638 return seq_list_start(&probe_list, *pos); 639 } 640 641 static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos) 642 { 643 return seq_list_next(v, &probe_list, pos); 644 } 645 646 static void probes_seq_stop(struct seq_file *m, void *v) 647 { 648 mutex_unlock(&probe_lock); 649 } 650 651 static int probes_seq_show(struct seq_file *m, void *v) 652 { 653 struct trace_probe *tp = v; 654 int i; 655 656 seq_printf(m, "%c", trace_probe_is_return(tp) ? 'r' : 'p'); 657 seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name); 658 659 if (!tp->symbol) 660 seq_printf(m, " 0x%p", tp->rp.kp.addr); 661 else if (tp->rp.kp.offset) 662 seq_printf(m, " %s+%u", trace_probe_symbol(tp), 663 tp->rp.kp.offset); 664 else 665 seq_printf(m, " %s", trace_probe_symbol(tp)); 666 667 for (i = 0; i < tp->nr_args; i++) 668 seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm); 669 seq_printf(m, "\n"); 670 671 return 0; 672 } 673 674 static const struct seq_operations probes_seq_op = { 675 .start = probes_seq_start, 676 .next = probes_seq_next, 677 .stop = probes_seq_stop, 678 .show = probes_seq_show 679 }; 680 681 static int probes_open(struct inode *inode, struct file *file) 682 { 683 int ret; 684 685 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { 686 ret = release_all_trace_probes(); 687 if (ret < 0) 688 return ret; 689 } 690 691 return seq_open(file, &probes_seq_op); 692 } 693 694 static ssize_t probes_write(struct file *file, const char __user *buffer, 695 size_t count, loff_t *ppos) 696 { 697 return traceprobe_probes_write(file, buffer, count, ppos, 698 create_trace_probe); 699 } 700 701 static const struct file_operations kprobe_events_ops = { 702 .owner = THIS_MODULE, 703 .open = probes_open, 704 .read = seq_read, 705 .llseek = seq_lseek, 706 .release = seq_release, 707 .write = probes_write, 708 }; 709 710 /* Probes profiling interfaces */ 711 static int probes_profile_seq_show(struct seq_file *m, void *v) 712 { 713 struct trace_probe *tp = v; 714 715 seq_printf(m, " %-44s %15lu %15lu\n", tp->call.name, tp->nhit, 716 tp->rp.kp.nmissed); 717 718 return 0; 719 } 720 721 static const struct seq_operations profile_seq_op = { 722 .start = probes_seq_start, 723 .next = probes_seq_next, 724 .stop = probes_seq_stop, 725 .show = probes_profile_seq_show 726 }; 727 728 static int profile_open(struct inode *inode, struct file *file) 729 { 730 return seq_open(file, &profile_seq_op); 731 } 732 733 static const struct file_operations kprobe_profile_ops = { 734 .owner = THIS_MODULE, 735 .open = profile_open, 736 .read = seq_read, 737 .llseek = seq_lseek, 738 .release = seq_release, 739 }; 740 741 /* Sum up total data length for dynamic arraies (strings) */ 742 static __kprobes int __get_data_size(struct trace_probe *tp, 743 struct pt_regs *regs) 744 { 745 int i, ret = 0; 746 u32 len; 747 748 for (i = 0; i < tp->nr_args; i++) 749 if (unlikely(tp->args[i].fetch_size.fn)) { 750 call_fetch(&tp->args[i].fetch_size, regs, &len); 751 ret += len; 752 } 753 754 return ret; 755 } 756 757 /* Store the value of each argument */ 758 static __kprobes void store_trace_args(int ent_size, struct trace_probe *tp, 759 struct pt_regs *regs, 760 u8 *data, int maxlen) 761 { 762 int i; 763 u32 end = tp->size; 764 u32 *dl; /* Data (relative) location */ 765 766 for (i = 0; i < tp->nr_args; i++) { 767 if (unlikely(tp->args[i].fetch_size.fn)) { 768 /* 769 * First, we set the relative location and 770 * maximum data length to *dl 771 */ 772 dl = (u32 *)(data + tp->args[i].offset); 773 *dl = make_data_rloc(maxlen, end - tp->args[i].offset); 774 /* Then try to fetch string or dynamic array data */ 775 call_fetch(&tp->args[i].fetch, regs, dl); 776 /* Reduce maximum length */ 777 end += get_rloc_len(*dl); 778 maxlen -= get_rloc_len(*dl); 779 /* Trick here, convert data_rloc to data_loc */ 780 *dl = convert_rloc_to_loc(*dl, 781 ent_size + tp->args[i].offset); 782 } else 783 /* Just fetching data normally */ 784 call_fetch(&tp->args[i].fetch, regs, 785 data + tp->args[i].offset); 786 } 787 } 788 789 /* Kprobe handler */ 790 static __kprobes void 791 __kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs, 792 struct ftrace_event_file *ftrace_file) 793 { 794 struct kprobe_trace_entry_head *entry; 795 struct ring_buffer_event *event; 796 struct ring_buffer *buffer; 797 int size, dsize, pc; 798 unsigned long irq_flags; 799 struct ftrace_event_call *call = &tp->call; 800 801 WARN_ON(call != ftrace_file->event_call); 802 803 if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags)) 804 return; 805 806 local_save_flags(irq_flags); 807 pc = preempt_count(); 808 809 dsize = __get_data_size(tp, regs); 810 size = sizeof(*entry) + tp->size + dsize; 811 812 event = trace_event_buffer_lock_reserve(&buffer, ftrace_file, 813 call->event.type, 814 size, irq_flags, pc); 815 if (!event) 816 return; 817 818 entry = ring_buffer_event_data(event); 819 entry->ip = (unsigned long)tp->rp.kp.addr; 820 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 821 822 if (!filter_current_check_discard(buffer, call, entry, event)) 823 trace_buffer_unlock_commit_regs(buffer, event, 824 irq_flags, pc, regs); 825 } 826 827 static __kprobes void 828 kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs) 829 { 830 struct event_file_link *link; 831 832 list_for_each_entry_rcu(link, &tp->files, list) 833 __kprobe_trace_func(tp, regs, link->file); 834 } 835 836 /* Kretprobe handler */ 837 static __kprobes void 838 __kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri, 839 struct pt_regs *regs, 840 struct ftrace_event_file *ftrace_file) 841 { 842 struct kretprobe_trace_entry_head *entry; 843 struct ring_buffer_event *event; 844 struct ring_buffer *buffer; 845 int size, pc, dsize; 846 unsigned long irq_flags; 847 struct ftrace_event_call *call = &tp->call; 848 849 WARN_ON(call != ftrace_file->event_call); 850 851 if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags)) 852 return; 853 854 local_save_flags(irq_flags); 855 pc = preempt_count(); 856 857 dsize = __get_data_size(tp, regs); 858 size = sizeof(*entry) + tp->size + dsize; 859 860 event = trace_event_buffer_lock_reserve(&buffer, ftrace_file, 861 call->event.type, 862 size, irq_flags, pc); 863 if (!event) 864 return; 865 866 entry = ring_buffer_event_data(event); 867 entry->func = (unsigned long)tp->rp.kp.addr; 868 entry->ret_ip = (unsigned long)ri->ret_addr; 869 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 870 871 if (!filter_current_check_discard(buffer, call, entry, event)) 872 trace_buffer_unlock_commit_regs(buffer, event, 873 irq_flags, pc, regs); 874 } 875 876 static __kprobes void 877 kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri, 878 struct pt_regs *regs) 879 { 880 struct event_file_link *link; 881 882 list_for_each_entry_rcu(link, &tp->files, list) 883 __kretprobe_trace_func(tp, ri, regs, link->file); 884 } 885 886 /* Event entry printers */ 887 static enum print_line_t 888 print_kprobe_event(struct trace_iterator *iter, int flags, 889 struct trace_event *event) 890 { 891 struct kprobe_trace_entry_head *field; 892 struct trace_seq *s = &iter->seq; 893 struct trace_probe *tp; 894 u8 *data; 895 int i; 896 897 field = (struct kprobe_trace_entry_head *)iter->ent; 898 tp = container_of(event, struct trace_probe, call.event); 899 900 if (!trace_seq_printf(s, "%s: (", tp->call.name)) 901 goto partial; 902 903 if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET)) 904 goto partial; 905 906 if (!trace_seq_puts(s, ")")) 907 goto partial; 908 909 data = (u8 *)&field[1]; 910 for (i = 0; i < tp->nr_args; i++) 911 if (!tp->args[i].type->print(s, tp->args[i].name, 912 data + tp->args[i].offset, field)) 913 goto partial; 914 915 if (!trace_seq_puts(s, "\n")) 916 goto partial; 917 918 return TRACE_TYPE_HANDLED; 919 partial: 920 return TRACE_TYPE_PARTIAL_LINE; 921 } 922 923 static enum print_line_t 924 print_kretprobe_event(struct trace_iterator *iter, int flags, 925 struct trace_event *event) 926 { 927 struct kretprobe_trace_entry_head *field; 928 struct trace_seq *s = &iter->seq; 929 struct trace_probe *tp; 930 u8 *data; 931 int i; 932 933 field = (struct kretprobe_trace_entry_head *)iter->ent; 934 tp = container_of(event, struct trace_probe, call.event); 935 936 if (!trace_seq_printf(s, "%s: (", tp->call.name)) 937 goto partial; 938 939 if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET)) 940 goto partial; 941 942 if (!trace_seq_puts(s, " <- ")) 943 goto partial; 944 945 if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET)) 946 goto partial; 947 948 if (!trace_seq_puts(s, ")")) 949 goto partial; 950 951 data = (u8 *)&field[1]; 952 for (i = 0; i < tp->nr_args; i++) 953 if (!tp->args[i].type->print(s, tp->args[i].name, 954 data + tp->args[i].offset, field)) 955 goto partial; 956 957 if (!trace_seq_puts(s, "\n")) 958 goto partial; 959 960 return TRACE_TYPE_HANDLED; 961 partial: 962 return TRACE_TYPE_PARTIAL_LINE; 963 } 964 965 966 static int kprobe_event_define_fields(struct ftrace_event_call *event_call) 967 { 968 int ret, i; 969 struct kprobe_trace_entry_head field; 970 struct trace_probe *tp = (struct trace_probe *)event_call->data; 971 972 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); 973 /* Set argument names as fields */ 974 for (i = 0; i < tp->nr_args; i++) { 975 ret = trace_define_field(event_call, tp->args[i].type->fmttype, 976 tp->args[i].name, 977 sizeof(field) + tp->args[i].offset, 978 tp->args[i].type->size, 979 tp->args[i].type->is_signed, 980 FILTER_OTHER); 981 if (ret) 982 return ret; 983 } 984 return 0; 985 } 986 987 static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) 988 { 989 int ret, i; 990 struct kretprobe_trace_entry_head field; 991 struct trace_probe *tp = (struct trace_probe *)event_call->data; 992 993 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0); 994 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0); 995 /* Set argument names as fields */ 996 for (i = 0; i < tp->nr_args; i++) { 997 ret = trace_define_field(event_call, tp->args[i].type->fmttype, 998 tp->args[i].name, 999 sizeof(field) + tp->args[i].offset, 1000 tp->args[i].type->size, 1001 tp->args[i].type->is_signed, 1002 FILTER_OTHER); 1003 if (ret) 1004 return ret; 1005 } 1006 return 0; 1007 } 1008 1009 static int __set_print_fmt(struct trace_probe *tp, char *buf, int len) 1010 { 1011 int i; 1012 int pos = 0; 1013 1014 const char *fmt, *arg; 1015 1016 if (!trace_probe_is_return(tp)) { 1017 fmt = "(%lx)"; 1018 arg = "REC->" FIELD_STRING_IP; 1019 } else { 1020 fmt = "(%lx <- %lx)"; 1021 arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP; 1022 } 1023 1024 /* When len=0, we just calculate the needed length */ 1025 #define LEN_OR_ZERO (len ? len - pos : 0) 1026 1027 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt); 1028 1029 for (i = 0; i < tp->nr_args; i++) { 1030 pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s", 1031 tp->args[i].name, tp->args[i].type->fmt); 1032 } 1033 1034 pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg); 1035 1036 for (i = 0; i < tp->nr_args; i++) { 1037 if (strcmp(tp->args[i].type->name, "string") == 0) 1038 pos += snprintf(buf + pos, LEN_OR_ZERO, 1039 ", __get_str(%s)", 1040 tp->args[i].name); 1041 else 1042 pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s", 1043 tp->args[i].name); 1044 } 1045 1046 #undef LEN_OR_ZERO 1047 1048 /* return the length of print_fmt */ 1049 return pos; 1050 } 1051 1052 static int set_print_fmt(struct trace_probe *tp) 1053 { 1054 int len; 1055 char *print_fmt; 1056 1057 /* First: called with 0 length to calculate the needed length */ 1058 len = __set_print_fmt(tp, NULL, 0); 1059 print_fmt = kmalloc(len + 1, GFP_KERNEL); 1060 if (!print_fmt) 1061 return -ENOMEM; 1062 1063 /* Second: actually write the @print_fmt */ 1064 __set_print_fmt(tp, print_fmt, len + 1); 1065 tp->call.print_fmt = print_fmt; 1066 1067 return 0; 1068 } 1069 1070 #ifdef CONFIG_PERF_EVENTS 1071 1072 /* Kprobe profile handler */ 1073 static __kprobes void 1074 kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs) 1075 { 1076 struct ftrace_event_call *call = &tp->call; 1077 struct kprobe_trace_entry_head *entry; 1078 struct hlist_head *head; 1079 int size, __size, dsize; 1080 int rctx; 1081 1082 head = this_cpu_ptr(call->perf_events); 1083 if (hlist_empty(head)) 1084 return; 1085 1086 dsize = __get_data_size(tp, regs); 1087 __size = sizeof(*entry) + tp->size + dsize; 1088 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1089 size -= sizeof(u32); 1090 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, 1091 "profile buffer not large enough")) 1092 return; 1093 1094 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); 1095 if (!entry) 1096 return; 1097 1098 entry->ip = (unsigned long)tp->rp.kp.addr; 1099 memset(&entry[1], 0, dsize); 1100 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 1101 perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); 1102 } 1103 1104 /* Kretprobe profile handler */ 1105 static __kprobes void 1106 kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri, 1107 struct pt_regs *regs) 1108 { 1109 struct ftrace_event_call *call = &tp->call; 1110 struct kretprobe_trace_entry_head *entry; 1111 struct hlist_head *head; 1112 int size, __size, dsize; 1113 int rctx; 1114 1115 head = this_cpu_ptr(call->perf_events); 1116 if (hlist_empty(head)) 1117 return; 1118 1119 dsize = __get_data_size(tp, regs); 1120 __size = sizeof(*entry) + tp->size + dsize; 1121 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1122 size -= sizeof(u32); 1123 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, 1124 "profile buffer not large enough")) 1125 return; 1126 1127 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); 1128 if (!entry) 1129 return; 1130 1131 entry->func = (unsigned long)tp->rp.kp.addr; 1132 entry->ret_ip = (unsigned long)ri->ret_addr; 1133 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 1134 perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); 1135 } 1136 #endif /* CONFIG_PERF_EVENTS */ 1137 1138 /* 1139 * called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex. 1140 * 1141 * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe 1142 * lockless, but we can't race with this __init function. 1143 */ 1144 static __kprobes 1145 int kprobe_register(struct ftrace_event_call *event, 1146 enum trace_reg type, void *data) 1147 { 1148 struct trace_probe *tp = (struct trace_probe *)event->data; 1149 struct ftrace_event_file *file = data; 1150 1151 switch (type) { 1152 case TRACE_REG_REGISTER: 1153 return enable_trace_probe(tp, file); 1154 case TRACE_REG_UNREGISTER: 1155 return disable_trace_probe(tp, file); 1156 1157 #ifdef CONFIG_PERF_EVENTS 1158 case TRACE_REG_PERF_REGISTER: 1159 return enable_trace_probe(tp, NULL); 1160 case TRACE_REG_PERF_UNREGISTER: 1161 return disable_trace_probe(tp, NULL); 1162 case TRACE_REG_PERF_OPEN: 1163 case TRACE_REG_PERF_CLOSE: 1164 case TRACE_REG_PERF_ADD: 1165 case TRACE_REG_PERF_DEL: 1166 return 0; 1167 #endif 1168 } 1169 return 0; 1170 } 1171 1172 static __kprobes 1173 int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) 1174 { 1175 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); 1176 1177 tp->nhit++; 1178 1179 if (tp->flags & TP_FLAG_TRACE) 1180 kprobe_trace_func(tp, regs); 1181 #ifdef CONFIG_PERF_EVENTS 1182 if (tp->flags & TP_FLAG_PROFILE) 1183 kprobe_perf_func(tp, regs); 1184 #endif 1185 return 0; /* We don't tweek kernel, so just return 0 */ 1186 } 1187 1188 static __kprobes 1189 int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) 1190 { 1191 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); 1192 1193 tp->nhit++; 1194 1195 if (tp->flags & TP_FLAG_TRACE) 1196 kretprobe_trace_func(tp, ri, regs); 1197 #ifdef CONFIG_PERF_EVENTS 1198 if (tp->flags & TP_FLAG_PROFILE) 1199 kretprobe_perf_func(tp, ri, regs); 1200 #endif 1201 return 0; /* We don't tweek kernel, so just return 0 */ 1202 } 1203 1204 static struct trace_event_functions kretprobe_funcs = { 1205 .trace = print_kretprobe_event 1206 }; 1207 1208 static struct trace_event_functions kprobe_funcs = { 1209 .trace = print_kprobe_event 1210 }; 1211 1212 static int register_probe_event(struct trace_probe *tp) 1213 { 1214 struct ftrace_event_call *call = &tp->call; 1215 int ret; 1216 1217 /* Initialize ftrace_event_call */ 1218 INIT_LIST_HEAD(&call->class->fields); 1219 if (trace_probe_is_return(tp)) { 1220 call->event.funcs = &kretprobe_funcs; 1221 call->class->define_fields = kretprobe_event_define_fields; 1222 } else { 1223 call->event.funcs = &kprobe_funcs; 1224 call->class->define_fields = kprobe_event_define_fields; 1225 } 1226 if (set_print_fmt(tp) < 0) 1227 return -ENOMEM; 1228 ret = register_ftrace_event(&call->event); 1229 if (!ret) { 1230 kfree(call->print_fmt); 1231 return -ENODEV; 1232 } 1233 call->flags = 0; 1234 call->class->reg = kprobe_register; 1235 call->data = tp; 1236 ret = trace_add_event_call(call); 1237 if (ret) { 1238 pr_info("Failed to register kprobe event: %s\n", call->name); 1239 kfree(call->print_fmt); 1240 unregister_ftrace_event(&call->event); 1241 } 1242 return ret; 1243 } 1244 1245 static void unregister_probe_event(struct trace_probe *tp) 1246 { 1247 /* tp->event is unregistered in trace_remove_event_call() */ 1248 trace_remove_event_call(&tp->call); 1249 kfree(tp->call.print_fmt); 1250 } 1251 1252 /* Make a debugfs interface for controlling probe points */ 1253 static __init int init_kprobe_trace(void) 1254 { 1255 struct dentry *d_tracer; 1256 struct dentry *entry; 1257 1258 if (register_module_notifier(&trace_probe_module_nb)) 1259 return -EINVAL; 1260 1261 d_tracer = tracing_init_dentry(); 1262 if (!d_tracer) 1263 return 0; 1264 1265 entry = debugfs_create_file("kprobe_events", 0644, d_tracer, 1266 NULL, &kprobe_events_ops); 1267 1268 /* Event list interface */ 1269 if (!entry) 1270 pr_warning("Could not create debugfs " 1271 "'kprobe_events' entry\n"); 1272 1273 /* Profile interface */ 1274 entry = debugfs_create_file("kprobe_profile", 0444, d_tracer, 1275 NULL, &kprobe_profile_ops); 1276 1277 if (!entry) 1278 pr_warning("Could not create debugfs " 1279 "'kprobe_profile' entry\n"); 1280 return 0; 1281 } 1282 fs_initcall(init_kprobe_trace); 1283 1284 1285 #ifdef CONFIG_FTRACE_STARTUP_TEST 1286 1287 /* 1288 * The "__used" keeps gcc from removing the function symbol 1289 * from the kallsyms table. 1290 */ 1291 static __used int kprobe_trace_selftest_target(int a1, int a2, int a3, 1292 int a4, int a5, int a6) 1293 { 1294 return a1 + a2 + a3 + a4 + a5 + a6; 1295 } 1296 1297 static struct ftrace_event_file * 1298 find_trace_probe_file(struct trace_probe *tp, struct trace_array *tr) 1299 { 1300 struct ftrace_event_file *file; 1301 1302 list_for_each_entry(file, &tr->events, list) 1303 if (file->event_call == &tp->call) 1304 return file; 1305 1306 return NULL; 1307 } 1308 1309 /* 1310 * Nobody but us can call enable_trace_probe/disable_trace_probe at this 1311 * stage, we can do this lockless. 1312 */ 1313 static __init int kprobe_trace_self_tests_init(void) 1314 { 1315 int ret, warn = 0; 1316 int (*target)(int, int, int, int, int, int); 1317 struct trace_probe *tp; 1318 struct ftrace_event_file *file; 1319 1320 target = kprobe_trace_selftest_target; 1321 1322 pr_info("Testing kprobe tracing: "); 1323 1324 ret = traceprobe_command("p:testprobe kprobe_trace_selftest_target " 1325 "$stack $stack0 +0($stack)", 1326 create_trace_probe); 1327 if (WARN_ON_ONCE(ret)) { 1328 pr_warn("error on probing function entry.\n"); 1329 warn++; 1330 } else { 1331 /* Enable trace point */ 1332 tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM); 1333 if (WARN_ON_ONCE(tp == NULL)) { 1334 pr_warn("error on getting new probe.\n"); 1335 warn++; 1336 } else { 1337 file = find_trace_probe_file(tp, top_trace_array()); 1338 if (WARN_ON_ONCE(file == NULL)) { 1339 pr_warn("error on getting probe file.\n"); 1340 warn++; 1341 } else 1342 enable_trace_probe(tp, file); 1343 } 1344 } 1345 1346 ret = traceprobe_command("r:testprobe2 kprobe_trace_selftest_target " 1347 "$retval", create_trace_probe); 1348 if (WARN_ON_ONCE(ret)) { 1349 pr_warn("error on probing function return.\n"); 1350 warn++; 1351 } else { 1352 /* Enable trace point */ 1353 tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM); 1354 if (WARN_ON_ONCE(tp == NULL)) { 1355 pr_warn("error on getting 2nd new probe.\n"); 1356 warn++; 1357 } else { 1358 file = find_trace_probe_file(tp, top_trace_array()); 1359 if (WARN_ON_ONCE(file == NULL)) { 1360 pr_warn("error on getting probe file.\n"); 1361 warn++; 1362 } else 1363 enable_trace_probe(tp, file); 1364 } 1365 } 1366 1367 if (warn) 1368 goto end; 1369 1370 ret = target(1, 2, 3, 4, 5, 6); 1371 1372 /* Disable trace points before removing it */ 1373 tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM); 1374 if (WARN_ON_ONCE(tp == NULL)) { 1375 pr_warn("error on getting test probe.\n"); 1376 warn++; 1377 } else { 1378 file = find_trace_probe_file(tp, top_trace_array()); 1379 if (WARN_ON_ONCE(file == NULL)) { 1380 pr_warn("error on getting probe file.\n"); 1381 warn++; 1382 } else 1383 disable_trace_probe(tp, file); 1384 } 1385 1386 tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM); 1387 if (WARN_ON_ONCE(tp == NULL)) { 1388 pr_warn("error on getting 2nd test probe.\n"); 1389 warn++; 1390 } else { 1391 file = find_trace_probe_file(tp, top_trace_array()); 1392 if (WARN_ON_ONCE(file == NULL)) { 1393 pr_warn("error on getting probe file.\n"); 1394 warn++; 1395 } else 1396 disable_trace_probe(tp, file); 1397 } 1398 1399 ret = traceprobe_command("-:testprobe", create_trace_probe); 1400 if (WARN_ON_ONCE(ret)) { 1401 pr_warn("error on deleting a probe.\n"); 1402 warn++; 1403 } 1404 1405 ret = traceprobe_command("-:testprobe2", create_trace_probe); 1406 if (WARN_ON_ONCE(ret)) { 1407 pr_warn("error on deleting a probe.\n"); 1408 warn++; 1409 } 1410 1411 end: 1412 release_all_trace_probes(); 1413 if (warn) 1414 pr_cont("NG: Some tests are failed. Please check them.\n"); 1415 else 1416 pr_cont("OK\n"); 1417 return 0; 1418 } 1419 1420 late_initcall(kprobe_trace_self_tests_init); 1421 1422 #endif 1423