1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * event probes 4 * 5 * Part of this code was copied from kernel/trace/trace_kprobe.c written by 6 * Masami Hiramatsu <mhiramat@kernel.org> 7 * 8 * Copyright (C) 2021, VMware Inc, Steven Rostedt <rostedt@goodmis.org> 9 * Copyright (C) 2021, VMware Inc, Tzvetomir Stoyanov tz.stoyanov@gmail.com> 10 * 11 */ 12 #include <linux/module.h> 13 #include <linux/mutex.h> 14 #include <linux/ftrace.h> 15 16 #include "trace_dynevent.h" 17 #include "trace_probe.h" 18 #include "trace_probe_tmpl.h" 19 20 #define EPROBE_EVENT_SYSTEM "eprobes" 21 22 struct trace_eprobe { 23 /* tracepoint system */ 24 const char *event_system; 25 26 /* tracepoint event */ 27 const char *event_name; 28 29 /* filter string for the tracepoint */ 30 char *filter_str; 31 32 struct trace_event_call *event; 33 34 struct dyn_event devent; 35 struct trace_probe tp; 36 }; 37 38 struct eprobe_data { 39 struct trace_event_file *file; 40 struct trace_eprobe *ep; 41 }; 42 43 static int __trace_eprobe_create(int argc, const char *argv[]); 44 45 static void trace_event_probe_cleanup(struct trace_eprobe *ep) 46 { 47 if (!ep) 48 return; 49 trace_probe_cleanup(&ep->tp); 50 kfree(ep->event_name); 51 kfree(ep->event_system); 52 if (ep->event) 53 trace_event_put_ref(ep->event); 54 kfree(ep); 55 } 56 57 static struct trace_eprobe *to_trace_eprobe(struct dyn_event *ev) 58 { 59 return container_of(ev, struct trace_eprobe, devent); 60 } 61 62 static int eprobe_dyn_event_create(const char *raw_command) 63 { 64 return trace_probe_create(raw_command, __trace_eprobe_create); 65 } 66 67 static int eprobe_dyn_event_show(struct seq_file *m, struct dyn_event *ev) 68 { 69 struct trace_eprobe *ep = to_trace_eprobe(ev); 70 int i; 71 72 seq_printf(m, "e:%s/%s", trace_probe_group_name(&ep->tp), 73 trace_probe_name(&ep->tp)); 74 seq_printf(m, " %s.%s", ep->event_system, ep->event_name); 75 76 for (i = 0; i < ep->tp.nr_args; i++) 77 seq_printf(m, " %s=%s", ep->tp.args[i].name, ep->tp.args[i].comm); 78 seq_putc(m, '\n'); 79 80 return 0; 81 } 82 83 static int unregister_trace_eprobe(struct trace_eprobe *ep) 84 { 85 /* If other probes are on the event, just unregister eprobe */ 86 if (trace_probe_has_sibling(&ep->tp)) 87 goto unreg; 88 89 /* Enabled event can not be unregistered */ 90 if (trace_probe_is_enabled(&ep->tp)) 91 return -EBUSY; 92 93 /* Will fail if probe is being used by ftrace or perf */ 94 if (trace_probe_unregister_event_call(&ep->tp)) 95 return -EBUSY; 96 97 unreg: 98 dyn_event_remove(&ep->devent); 99 trace_probe_unlink(&ep->tp); 100 101 return 0; 102 } 103 104 static int eprobe_dyn_event_release(struct dyn_event *ev) 105 { 106 struct trace_eprobe *ep = to_trace_eprobe(ev); 107 int ret = unregister_trace_eprobe(ep); 108 109 if (!ret) 110 trace_event_probe_cleanup(ep); 111 return ret; 112 } 113 114 static bool eprobe_dyn_event_is_busy(struct dyn_event *ev) 115 { 116 struct trace_eprobe *ep = to_trace_eprobe(ev); 117 118 return trace_probe_is_enabled(&ep->tp); 119 } 120 121 static bool eprobe_dyn_event_match(const char *system, const char *event, 122 int argc, const char **argv, struct dyn_event *ev) 123 { 124 struct trace_eprobe *ep = to_trace_eprobe(ev); 125 const char *slash; 126 127 /* 128 * We match the following: 129 * event only - match all eprobes with event name 130 * system and event only - match all system/event probes 131 * system only - match all system probes 132 * 133 * The below has the above satisfied with more arguments: 134 * 135 * attached system/event - If the arg has the system and event 136 * the probe is attached to, match 137 * probes with the attachment. 138 * 139 * If any more args are given, then it requires a full match. 140 */ 141 142 /* 143 * If system exists, but this probe is not part of that system 144 * do not match. 145 */ 146 if (system && strcmp(trace_probe_group_name(&ep->tp), system) != 0) 147 return false; 148 149 /* Must match the event name */ 150 if (event[0] != '\0' && strcmp(trace_probe_name(&ep->tp), event) != 0) 151 return false; 152 153 /* No arguments match all */ 154 if (argc < 1) 155 return true; 156 157 /* First argument is the system/event the probe is attached to */ 158 159 slash = strchr(argv[0], '/'); 160 if (!slash) 161 slash = strchr(argv[0], '.'); 162 if (!slash) 163 return false; 164 165 if (strncmp(ep->event_system, argv[0], slash - argv[0])) 166 return false; 167 if (strcmp(ep->event_name, slash + 1)) 168 return false; 169 170 argc--; 171 argv++; 172 173 /* If there are no other args, then match */ 174 if (argc < 1) 175 return true; 176 177 return trace_probe_match_command_args(&ep->tp, argc, argv); 178 } 179 180 static struct dyn_event_operations eprobe_dyn_event_ops = { 181 .create = eprobe_dyn_event_create, 182 .show = eprobe_dyn_event_show, 183 .is_busy = eprobe_dyn_event_is_busy, 184 .free = eprobe_dyn_event_release, 185 .match = eprobe_dyn_event_match, 186 }; 187 188 static struct trace_eprobe *alloc_event_probe(const char *group, 189 const char *this_event, 190 struct trace_event_call *event, 191 int nargs) 192 { 193 struct trace_eprobe *ep; 194 const char *event_name; 195 const char *sys_name; 196 int ret = -ENOMEM; 197 198 if (!event) 199 return ERR_PTR(-ENODEV); 200 201 sys_name = event->class->system; 202 event_name = trace_event_name(event); 203 204 ep = kzalloc(struct_size(ep, tp.args, nargs), GFP_KERNEL); 205 if (!ep) { 206 trace_event_put_ref(event); 207 goto error; 208 } 209 ep->event = event; 210 ep->event_name = kstrdup(event_name, GFP_KERNEL); 211 if (!ep->event_name) 212 goto error; 213 ep->event_system = kstrdup(sys_name, GFP_KERNEL); 214 if (!ep->event_system) 215 goto error; 216 217 ret = trace_probe_init(&ep->tp, this_event, group, false); 218 if (ret < 0) 219 goto error; 220 221 dyn_event_init(&ep->devent, &eprobe_dyn_event_ops); 222 return ep; 223 error: 224 trace_event_probe_cleanup(ep); 225 return ERR_PTR(ret); 226 } 227 228 static int trace_eprobe_tp_arg_update(struct trace_eprobe *ep, int i) 229 { 230 struct probe_arg *parg = &ep->tp.args[i]; 231 struct ftrace_event_field *field; 232 struct list_head *head; 233 int ret = -ENOENT; 234 235 head = trace_get_fields(ep->event); 236 list_for_each_entry(field, head, link) { 237 if (!strcmp(parg->code->data, field->name)) { 238 kfree(parg->code->data); 239 parg->code->data = field; 240 return 0; 241 } 242 } 243 244 /* 245 * Argument not found on event. But allow for comm and COMM 246 * to be used to get the current->comm. 247 */ 248 if (strcmp(parg->code->data, "COMM") == 0 || 249 strcmp(parg->code->data, "comm") == 0) { 250 parg->code->op = FETCH_OP_COMM; 251 ret = 0; 252 } 253 254 kfree(parg->code->data); 255 parg->code->data = NULL; 256 return ret; 257 } 258 259 static int eprobe_event_define_fields(struct trace_event_call *event_call) 260 { 261 struct eprobe_trace_entry_head field; 262 struct trace_probe *tp; 263 264 tp = trace_probe_primary_from_call(event_call); 265 if (WARN_ON_ONCE(!tp)) 266 return -ENOENT; 267 268 return traceprobe_define_arg_fields(event_call, sizeof(field), tp); 269 } 270 271 static struct trace_event_fields eprobe_fields_array[] = { 272 { .type = TRACE_FUNCTION_TYPE, 273 .define_fields = eprobe_event_define_fields }, 274 {} 275 }; 276 277 /* Event entry printers */ 278 static enum print_line_t 279 print_eprobe_event(struct trace_iterator *iter, int flags, 280 struct trace_event *event) 281 { 282 struct eprobe_trace_entry_head *field; 283 struct trace_event_call *pevent; 284 struct trace_event *probed_event; 285 struct trace_seq *s = &iter->seq; 286 struct trace_eprobe *ep; 287 struct trace_probe *tp; 288 unsigned int type; 289 290 field = (struct eprobe_trace_entry_head *)iter->ent; 291 tp = trace_probe_primary_from_call( 292 container_of(event, struct trace_event_call, event)); 293 if (WARN_ON_ONCE(!tp)) 294 goto out; 295 296 ep = container_of(tp, struct trace_eprobe, tp); 297 type = ep->event->event.type; 298 299 trace_seq_printf(s, "%s: (", trace_probe_name(tp)); 300 301 probed_event = ftrace_find_event(type); 302 if (probed_event) { 303 pevent = container_of(probed_event, struct trace_event_call, event); 304 trace_seq_printf(s, "%s.%s", pevent->class->system, 305 trace_event_name(pevent)); 306 } else { 307 trace_seq_printf(s, "%u", type); 308 } 309 310 trace_seq_putc(s, ')'); 311 312 if (print_probe_args(s, tp->args, tp->nr_args, 313 (u8 *)&field[1], field) < 0) 314 goto out; 315 316 trace_seq_putc(s, '\n'); 317 out: 318 return trace_handle_return(s); 319 } 320 321 static unsigned long get_event_field(struct fetch_insn *code, void *rec) 322 { 323 struct ftrace_event_field *field = code->data; 324 unsigned long val; 325 void *addr; 326 327 addr = rec + field->offset; 328 329 if (is_string_field(field)) { 330 switch (field->filter_type) { 331 case FILTER_DYN_STRING: 332 val = (unsigned long)(rec + (*(unsigned int *)addr & 0xffff)); 333 break; 334 case FILTER_RDYN_STRING: 335 val = (unsigned long)(addr + (*(unsigned int *)addr & 0xffff)); 336 break; 337 case FILTER_STATIC_STRING: 338 val = (unsigned long)addr; 339 break; 340 case FILTER_PTR_STRING: 341 val = (unsigned long)(*(char *)addr); 342 break; 343 default: 344 WARN_ON_ONCE(1); 345 return 0; 346 } 347 return val; 348 } 349 350 switch (field->size) { 351 case 1: 352 if (field->is_signed) 353 val = *(char *)addr; 354 else 355 val = *(unsigned char *)addr; 356 break; 357 case 2: 358 if (field->is_signed) 359 val = *(short *)addr; 360 else 361 val = *(unsigned short *)addr; 362 break; 363 case 4: 364 if (field->is_signed) 365 val = *(int *)addr; 366 else 367 val = *(unsigned int *)addr; 368 break; 369 default: 370 if (field->is_signed) 371 val = *(long *)addr; 372 else 373 val = *(unsigned long *)addr; 374 break; 375 } 376 return val; 377 } 378 379 static int get_eprobe_size(struct trace_probe *tp, void *rec) 380 { 381 struct fetch_insn *code; 382 struct probe_arg *arg; 383 int i, len, ret = 0; 384 385 for (i = 0; i < tp->nr_args; i++) { 386 arg = tp->args + i; 387 if (arg->dynamic) { 388 unsigned long val; 389 390 code = arg->code; 391 retry: 392 switch (code->op) { 393 case FETCH_OP_TP_ARG: 394 val = get_event_field(code, rec); 395 break; 396 case FETCH_OP_IMM: 397 val = code->immediate; 398 break; 399 case FETCH_OP_COMM: 400 val = (unsigned long)current->comm; 401 break; 402 case FETCH_OP_DATA: 403 val = (unsigned long)code->data; 404 break; 405 case FETCH_NOP_SYMBOL: /* Ignore a place holder */ 406 code++; 407 goto retry; 408 default: 409 continue; 410 } 411 code++; 412 len = process_fetch_insn_bottom(code, val, NULL, NULL); 413 if (len > 0) 414 ret += len; 415 } 416 } 417 418 return ret; 419 } 420 421 /* Kprobe specific fetch functions */ 422 423 /* Note that we don't verify it, since the code does not come from user space */ 424 static int 425 process_fetch_insn(struct fetch_insn *code, void *rec, void *dest, 426 void *base) 427 { 428 unsigned long val; 429 430 retry: 431 switch (code->op) { 432 case FETCH_OP_TP_ARG: 433 val = get_event_field(code, rec); 434 break; 435 case FETCH_OP_IMM: 436 val = code->immediate; 437 break; 438 case FETCH_OP_COMM: 439 val = (unsigned long)current->comm; 440 break; 441 case FETCH_OP_DATA: 442 val = (unsigned long)code->data; 443 break; 444 case FETCH_NOP_SYMBOL: /* Ignore a place holder */ 445 code++; 446 goto retry; 447 default: 448 return -EILSEQ; 449 } 450 code++; 451 return process_fetch_insn_bottom(code, val, dest, base); 452 } 453 NOKPROBE_SYMBOL(process_fetch_insn) 454 455 /* Return the length of string -- including null terminal byte */ 456 static nokprobe_inline int 457 fetch_store_strlen_user(unsigned long addr) 458 { 459 const void __user *uaddr = (__force const void __user *)addr; 460 461 return strnlen_user_nofault(uaddr, MAX_STRING_SIZE); 462 } 463 464 /* Return the length of string -- including null terminal byte */ 465 static nokprobe_inline int 466 fetch_store_strlen(unsigned long addr) 467 { 468 int ret, len = 0; 469 u8 c; 470 471 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE 472 if (addr < TASK_SIZE) 473 return fetch_store_strlen_user(addr); 474 #endif 475 476 do { 477 ret = copy_from_kernel_nofault(&c, (u8 *)addr + len, 1); 478 len++; 479 } while (c && ret == 0 && len < MAX_STRING_SIZE); 480 481 return (ret < 0) ? ret : len; 482 } 483 484 /* 485 * Fetch a null-terminated string from user. Caller MUST set *(u32 *)buf 486 * with max length and relative data location. 487 */ 488 static nokprobe_inline int 489 fetch_store_string_user(unsigned long addr, void *dest, void *base) 490 { 491 const void __user *uaddr = (__force const void __user *)addr; 492 int maxlen = get_loc_len(*(u32 *)dest); 493 void *__dest; 494 long ret; 495 496 if (unlikely(!maxlen)) 497 return -ENOMEM; 498 499 __dest = get_loc_data(dest, base); 500 501 ret = strncpy_from_user_nofault(__dest, uaddr, maxlen); 502 if (ret >= 0) 503 *(u32 *)dest = make_data_loc(ret, __dest - base); 504 505 return ret; 506 } 507 508 /* 509 * Fetch a null-terminated string. Caller MUST set *(u32 *)buf with max 510 * length and relative data location. 511 */ 512 static nokprobe_inline int 513 fetch_store_string(unsigned long addr, void *dest, void *base) 514 { 515 int maxlen = get_loc_len(*(u32 *)dest); 516 void *__dest; 517 long ret; 518 519 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE 520 if ((unsigned long)addr < TASK_SIZE) 521 return fetch_store_string_user(addr, dest, base); 522 #endif 523 524 if (unlikely(!maxlen)) 525 return -ENOMEM; 526 527 __dest = get_loc_data(dest, base); 528 529 /* 530 * Try to get string again, since the string can be changed while 531 * probing. 532 */ 533 ret = strncpy_from_kernel_nofault(__dest, (void *)addr, maxlen); 534 if (ret >= 0) 535 *(u32 *)dest = make_data_loc(ret, __dest - base); 536 537 return ret; 538 } 539 540 static nokprobe_inline int 541 probe_mem_read_user(void *dest, void *src, size_t size) 542 { 543 const void __user *uaddr = (__force const void __user *)src; 544 545 return copy_from_user_nofault(dest, uaddr, size); 546 } 547 548 static nokprobe_inline int 549 probe_mem_read(void *dest, void *src, size_t size) 550 { 551 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE 552 if ((unsigned long)src < TASK_SIZE) 553 return probe_mem_read_user(dest, src, size); 554 #endif 555 return copy_from_kernel_nofault(dest, src, size); 556 } 557 558 /* eprobe handler */ 559 static inline void 560 __eprobe_trace_func(struct eprobe_data *edata, void *rec) 561 { 562 struct eprobe_trace_entry_head *entry; 563 struct trace_event_call *call = trace_probe_event_call(&edata->ep->tp); 564 struct trace_event_buffer fbuffer; 565 int dsize; 566 567 if (WARN_ON_ONCE(call != edata->file->event_call)) 568 return; 569 570 if (trace_trigger_soft_disabled(edata->file)) 571 return; 572 573 dsize = get_eprobe_size(&edata->ep->tp, rec); 574 575 entry = trace_event_buffer_reserve(&fbuffer, edata->file, 576 sizeof(*entry) + edata->ep->tp.size + dsize); 577 578 if (!entry) 579 return; 580 581 entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event); 582 store_trace_args(&entry[1], &edata->ep->tp, rec, sizeof(*entry), dsize); 583 584 trace_event_buffer_commit(&fbuffer); 585 } 586 587 /* 588 * The event probe implementation uses event triggers to get access to 589 * the event it is attached to, but is not an actual trigger. The below 590 * functions are just stubs to fulfill what is needed to use the trigger 591 * infrastructure. 592 */ 593 static int eprobe_trigger_init(struct event_trigger_data *data) 594 { 595 return 0; 596 } 597 598 static void eprobe_trigger_free(struct event_trigger_data *data) 599 { 600 601 } 602 603 static int eprobe_trigger_print(struct seq_file *m, 604 struct event_trigger_data *data) 605 { 606 /* Do not print eprobe event triggers */ 607 return 0; 608 } 609 610 static void eprobe_trigger_func(struct event_trigger_data *data, 611 struct trace_buffer *buffer, void *rec, 612 struct ring_buffer_event *rbe) 613 { 614 struct eprobe_data *edata = data->private_data; 615 616 __eprobe_trace_func(edata, rec); 617 } 618 619 static struct event_trigger_ops eprobe_trigger_ops = { 620 .trigger = eprobe_trigger_func, 621 .print = eprobe_trigger_print, 622 .init = eprobe_trigger_init, 623 .free = eprobe_trigger_free, 624 }; 625 626 static int eprobe_trigger_cmd_parse(struct event_command *cmd_ops, 627 struct trace_event_file *file, 628 char *glob, char *cmd, 629 char *param_and_filter) 630 { 631 return -1; 632 } 633 634 static int eprobe_trigger_reg_func(char *glob, 635 struct event_trigger_data *data, 636 struct trace_event_file *file) 637 { 638 return -1; 639 } 640 641 static void eprobe_trigger_unreg_func(char *glob, 642 struct event_trigger_data *data, 643 struct trace_event_file *file) 644 { 645 646 } 647 648 static struct event_trigger_ops *eprobe_trigger_get_ops(char *cmd, 649 char *param) 650 { 651 return &eprobe_trigger_ops; 652 } 653 654 static struct event_command event_trigger_cmd = { 655 .name = "eprobe", 656 .trigger_type = ETT_EVENT_EPROBE, 657 .flags = EVENT_CMD_FL_NEEDS_REC, 658 .parse = eprobe_trigger_cmd_parse, 659 .reg = eprobe_trigger_reg_func, 660 .unreg = eprobe_trigger_unreg_func, 661 .unreg_all = NULL, 662 .get_trigger_ops = eprobe_trigger_get_ops, 663 .set_filter = NULL, 664 }; 665 666 static struct event_trigger_data * 667 new_eprobe_trigger(struct trace_eprobe *ep, struct trace_event_file *file) 668 { 669 struct event_trigger_data *trigger; 670 struct event_filter *filter = NULL; 671 struct eprobe_data *edata; 672 int ret; 673 674 edata = kzalloc(sizeof(*edata), GFP_KERNEL); 675 trigger = kzalloc(sizeof(*trigger), GFP_KERNEL); 676 if (!trigger || !edata) { 677 ret = -ENOMEM; 678 goto error; 679 } 680 681 trigger->flags = EVENT_TRIGGER_FL_PROBE; 682 trigger->count = -1; 683 trigger->ops = &eprobe_trigger_ops; 684 685 /* 686 * EVENT PROBE triggers are not registered as commands with 687 * register_event_command(), as they are not controlled by the user 688 * from the trigger file 689 */ 690 trigger->cmd_ops = &event_trigger_cmd; 691 692 INIT_LIST_HEAD(&trigger->list); 693 694 if (ep->filter_str) { 695 ret = create_event_filter(file->tr, file->event_call, 696 ep->filter_str, false, &filter); 697 if (ret) 698 goto error; 699 } 700 RCU_INIT_POINTER(trigger->filter, filter); 701 702 edata->file = file; 703 edata->ep = ep; 704 trigger->private_data = edata; 705 706 return trigger; 707 error: 708 free_event_filter(filter); 709 kfree(edata); 710 kfree(trigger); 711 return ERR_PTR(ret); 712 } 713 714 static int enable_eprobe(struct trace_eprobe *ep, 715 struct trace_event_file *eprobe_file) 716 { 717 struct event_trigger_data *trigger; 718 struct trace_event_file *file; 719 struct trace_array *tr = eprobe_file->tr; 720 721 file = find_event_file(tr, ep->event_system, ep->event_name); 722 if (!file) 723 return -ENOENT; 724 trigger = new_eprobe_trigger(ep, eprobe_file); 725 if (IS_ERR(trigger)) 726 return PTR_ERR(trigger); 727 728 list_add_tail_rcu(&trigger->list, &file->triggers); 729 730 trace_event_trigger_enable_disable(file, 1); 731 update_cond_flag(file); 732 733 return 0; 734 } 735 736 static struct trace_event_functions eprobe_funcs = { 737 .trace = print_eprobe_event 738 }; 739 740 static int disable_eprobe(struct trace_eprobe *ep, 741 struct trace_array *tr) 742 { 743 struct event_trigger_data *trigger = NULL, *iter; 744 struct trace_event_file *file; 745 struct event_filter *filter; 746 struct eprobe_data *edata; 747 748 file = find_event_file(tr, ep->event_system, ep->event_name); 749 if (!file) 750 return -ENOENT; 751 752 list_for_each_entry(iter, &file->triggers, list) { 753 if (!(iter->flags & EVENT_TRIGGER_FL_PROBE)) 754 continue; 755 edata = iter->private_data; 756 if (edata->ep == ep) { 757 trigger = iter; 758 break; 759 } 760 } 761 if (!trigger) 762 return -ENODEV; 763 764 list_del_rcu(&trigger->list); 765 766 trace_event_trigger_enable_disable(file, 0); 767 update_cond_flag(file); 768 769 /* Make sure nothing is using the edata or trigger */ 770 tracepoint_synchronize_unregister(); 771 772 filter = rcu_access_pointer(trigger->filter); 773 774 if (filter) 775 free_event_filter(filter); 776 kfree(edata); 777 kfree(trigger); 778 779 return 0; 780 } 781 782 static int enable_trace_eprobe(struct trace_event_call *call, 783 struct trace_event_file *file) 784 { 785 struct trace_probe *pos, *tp; 786 struct trace_eprobe *ep; 787 bool enabled; 788 int ret = 0; 789 790 tp = trace_probe_primary_from_call(call); 791 if (WARN_ON_ONCE(!tp)) 792 return -ENODEV; 793 enabled = trace_probe_is_enabled(tp); 794 795 /* This also changes "enabled" state */ 796 if (file) { 797 ret = trace_probe_add_file(tp, file); 798 if (ret) 799 return ret; 800 } else 801 trace_probe_set_flag(tp, TP_FLAG_PROFILE); 802 803 if (enabled) 804 return 0; 805 806 list_for_each_entry(pos, trace_probe_probe_list(tp), list) { 807 ep = container_of(pos, struct trace_eprobe, tp); 808 ret = enable_eprobe(ep, file); 809 if (ret) 810 break; 811 enabled = true; 812 } 813 814 if (ret) { 815 /* Failed to enable one of them. Roll back all */ 816 if (enabled) 817 disable_eprobe(ep, file->tr); 818 if (file) 819 trace_probe_remove_file(tp, file); 820 else 821 trace_probe_clear_flag(tp, TP_FLAG_PROFILE); 822 } 823 824 return ret; 825 } 826 827 static int disable_trace_eprobe(struct trace_event_call *call, 828 struct trace_event_file *file) 829 { 830 struct trace_probe *pos, *tp; 831 struct trace_eprobe *ep; 832 833 tp = trace_probe_primary_from_call(call); 834 if (WARN_ON_ONCE(!tp)) 835 return -ENODEV; 836 837 if (file) { 838 if (!trace_probe_get_file_link(tp, file)) 839 return -ENOENT; 840 if (!trace_probe_has_single_file(tp)) 841 goto out; 842 trace_probe_clear_flag(tp, TP_FLAG_TRACE); 843 } else 844 trace_probe_clear_flag(tp, TP_FLAG_PROFILE); 845 846 if (!trace_probe_is_enabled(tp)) { 847 list_for_each_entry(pos, trace_probe_probe_list(tp), list) { 848 ep = container_of(pos, struct trace_eprobe, tp); 849 disable_eprobe(ep, file->tr); 850 } 851 } 852 853 out: 854 if (file) 855 /* 856 * Synchronization is done in below function. For perf event, 857 * file == NULL and perf_trace_event_unreg() calls 858 * tracepoint_synchronize_unregister() to ensure synchronize 859 * event. We don't need to care about it. 860 */ 861 trace_probe_remove_file(tp, file); 862 863 return 0; 864 } 865 866 static int eprobe_register(struct trace_event_call *event, 867 enum trace_reg type, void *data) 868 { 869 struct trace_event_file *file = data; 870 871 switch (type) { 872 case TRACE_REG_REGISTER: 873 return enable_trace_eprobe(event, file); 874 case TRACE_REG_UNREGISTER: 875 return disable_trace_eprobe(event, file); 876 #ifdef CONFIG_PERF_EVENTS 877 case TRACE_REG_PERF_REGISTER: 878 case TRACE_REG_PERF_UNREGISTER: 879 case TRACE_REG_PERF_OPEN: 880 case TRACE_REG_PERF_CLOSE: 881 case TRACE_REG_PERF_ADD: 882 case TRACE_REG_PERF_DEL: 883 return 0; 884 #endif 885 } 886 return 0; 887 } 888 889 static inline void init_trace_eprobe_call(struct trace_eprobe *ep) 890 { 891 struct trace_event_call *call = trace_probe_event_call(&ep->tp); 892 893 call->flags = TRACE_EVENT_FL_EPROBE; 894 call->event.funcs = &eprobe_funcs; 895 call->class->fields_array = eprobe_fields_array; 896 call->class->reg = eprobe_register; 897 } 898 899 static struct trace_event_call * 900 find_and_get_event(const char *system, const char *event_name) 901 { 902 struct trace_event_call *tp_event; 903 const char *name; 904 905 list_for_each_entry(tp_event, &ftrace_events, list) { 906 /* Skip other probes and ftrace events */ 907 if (tp_event->flags & 908 (TRACE_EVENT_FL_IGNORE_ENABLE | 909 TRACE_EVENT_FL_KPROBE | 910 TRACE_EVENT_FL_UPROBE | 911 TRACE_EVENT_FL_EPROBE)) 912 continue; 913 if (!tp_event->class->system || 914 strcmp(system, tp_event->class->system)) 915 continue; 916 name = trace_event_name(tp_event); 917 if (!name || strcmp(event_name, name)) 918 continue; 919 if (!trace_event_try_get_ref(tp_event)) { 920 return NULL; 921 break; 922 } 923 return tp_event; 924 break; 925 } 926 return NULL; 927 } 928 929 static int trace_eprobe_tp_update_arg(struct trace_eprobe *ep, const char *argv[], int i) 930 { 931 unsigned int flags = TPARG_FL_KERNEL | TPARG_FL_TPOINT; 932 int ret; 933 934 ret = traceprobe_parse_probe_arg(&ep->tp, i, argv[i], flags); 935 if (ret) 936 return ret; 937 938 if (ep->tp.args[i].code->op == FETCH_OP_TP_ARG) { 939 ret = trace_eprobe_tp_arg_update(ep, i); 940 if (ret) 941 trace_probe_log_err(0, BAD_ATTACH_ARG); 942 } 943 944 /* Handle symbols "@" */ 945 if (!ret) 946 ret = traceprobe_update_arg(&ep->tp.args[i]); 947 948 return ret; 949 } 950 951 static int trace_eprobe_parse_filter(struct trace_eprobe *ep, int argc, const char *argv[]) 952 { 953 struct event_filter *dummy; 954 int i, ret, len = 0; 955 char *p; 956 957 if (argc == 0) { 958 trace_probe_log_err(0, NO_EP_FILTER); 959 return -EINVAL; 960 } 961 962 /* Recover the filter string */ 963 for (i = 0; i < argc; i++) 964 len += strlen(argv[i]) + 1; 965 966 ep->filter_str = kzalloc(len, GFP_KERNEL); 967 if (!ep->filter_str) 968 return -ENOMEM; 969 970 p = ep->filter_str; 971 for (i = 0; i < argc; i++) { 972 ret = snprintf(p, len, "%s ", argv[i]); 973 if (ret < 0) 974 goto error; 975 if (ret > len) { 976 ret = -E2BIG; 977 goto error; 978 } 979 p += ret; 980 len -= ret; 981 } 982 p[-1] = '\0'; 983 984 /* 985 * Ensure the filter string can be parsed correctly. Note, this 986 * filter string is for the original event, not for the eprobe. 987 */ 988 ret = create_event_filter(top_trace_array(), ep->event, ep->filter_str, 989 true, &dummy); 990 free_event_filter(dummy); 991 if (ret) 992 goto error; 993 994 return 0; 995 error: 996 kfree(ep->filter_str); 997 ep->filter_str = NULL; 998 return ret; 999 } 1000 1001 static int __trace_eprobe_create(int argc, const char *argv[]) 1002 { 1003 /* 1004 * Argument syntax: 1005 * e[:[GRP/][ENAME]] SYSTEM.EVENT [FETCHARGS] [if FILTER] 1006 * Fetch args (no space): 1007 * <name>=$<field>[:TYPE] 1008 */ 1009 const char *event = NULL, *group = EPROBE_EVENT_SYSTEM; 1010 const char *sys_event = NULL, *sys_name = NULL; 1011 struct trace_event_call *event_call; 1012 struct trace_eprobe *ep = NULL; 1013 char buf1[MAX_EVENT_NAME_LEN]; 1014 char buf2[MAX_EVENT_NAME_LEN]; 1015 char gbuf[MAX_EVENT_NAME_LEN]; 1016 int ret = 0, filter_idx = 0; 1017 int i, filter_cnt; 1018 1019 if (argc < 2 || argv[0][0] != 'e') 1020 return -ECANCELED; 1021 1022 trace_probe_log_init("event_probe", argc, argv); 1023 1024 event = strchr(&argv[0][1], ':'); 1025 if (event) { 1026 event++; 1027 ret = traceprobe_parse_event_name(&event, &group, gbuf, 1028 event - argv[0]); 1029 if (ret) 1030 goto parse_error; 1031 } 1032 1033 trace_probe_log_set_index(1); 1034 sys_event = argv[1]; 1035 ret = traceprobe_parse_event_name(&sys_event, &sys_name, buf2, 0); 1036 if (ret || !sys_event || !sys_name) { 1037 trace_probe_log_err(0, NO_EVENT_INFO); 1038 goto parse_error; 1039 } 1040 1041 if (!event) { 1042 strscpy(buf1, argv[1], MAX_EVENT_NAME_LEN); 1043 sanitize_event_name(buf1); 1044 event = buf1; 1045 } 1046 1047 for (i = 2; i < argc; i++) { 1048 if (!strcmp(argv[i], "if")) { 1049 filter_idx = i + 1; 1050 filter_cnt = argc - filter_idx; 1051 argc = i; 1052 break; 1053 } 1054 } 1055 1056 mutex_lock(&event_mutex); 1057 event_call = find_and_get_event(sys_name, sys_event); 1058 ep = alloc_event_probe(group, event, event_call, argc - 2); 1059 mutex_unlock(&event_mutex); 1060 1061 if (IS_ERR(ep)) { 1062 ret = PTR_ERR(ep); 1063 if (ret == -ENODEV) 1064 trace_probe_log_err(0, BAD_ATTACH_EVENT); 1065 /* This must return -ENOMEM or missing event, else there is a bug */ 1066 WARN_ON_ONCE(ret != -ENOMEM && ret != -ENODEV); 1067 ep = NULL; 1068 goto error; 1069 } 1070 1071 if (filter_idx) { 1072 trace_probe_log_set_index(filter_idx); 1073 ret = trace_eprobe_parse_filter(ep, filter_cnt, argv + filter_idx); 1074 if (ret) 1075 goto parse_error; 1076 } else 1077 ep->filter_str = NULL; 1078 1079 argc -= 2; argv += 2; 1080 /* parse arguments */ 1081 for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { 1082 trace_probe_log_set_index(i + 2); 1083 ret = trace_eprobe_tp_update_arg(ep, argv, i); 1084 if (ret) 1085 goto error; 1086 } 1087 ret = traceprobe_set_print_fmt(&ep->tp, PROBE_PRINT_EVENT); 1088 if (ret < 0) 1089 goto error; 1090 init_trace_eprobe_call(ep); 1091 mutex_lock(&event_mutex); 1092 ret = trace_probe_register_event_call(&ep->tp); 1093 if (ret) { 1094 if (ret == -EEXIST) { 1095 trace_probe_log_set_index(0); 1096 trace_probe_log_err(0, EVENT_EXIST); 1097 } 1098 mutex_unlock(&event_mutex); 1099 goto error; 1100 } 1101 ret = dyn_event_add(&ep->devent, &ep->tp.event->call); 1102 mutex_unlock(&event_mutex); 1103 return ret; 1104 parse_error: 1105 ret = -EINVAL; 1106 error: 1107 trace_event_probe_cleanup(ep); 1108 return ret; 1109 } 1110 1111 /* 1112 * Register dynevent at core_initcall. This allows kernel to setup eprobe 1113 * events in postcore_initcall without tracefs. 1114 */ 1115 static __init int trace_events_eprobe_init_early(void) 1116 { 1117 int err = 0; 1118 1119 err = dyn_event_register(&eprobe_dyn_event_ops); 1120 if (err) 1121 pr_warn("Could not register eprobe_dyn_event_ops\n"); 1122 1123 return err; 1124 } 1125 core_initcall(trace_events_eprobe_init_early); 1126