1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * fprobe - Simple ftrace probe wrapper for function entry. 4 */ 5 #define pr_fmt(fmt) "fprobe: " fmt 6 7 #include <linux/err.h> 8 #include <linux/fprobe.h> 9 #include <linux/kallsyms.h> 10 #include <linux/kprobes.h> 11 #include <linux/list.h> 12 #include <linux/mutex.h> 13 #include <linux/slab.h> 14 #include <linux/sort.h> 15 16 #include <asm/fprobe.h> 17 18 #include "trace.h" 19 20 #define FPROBE_IP_HASH_BITS 8 21 #define FPROBE_IP_TABLE_SIZE (1 << FPROBE_IP_HASH_BITS) 22 23 #define FPROBE_HASH_BITS 6 24 #define FPROBE_TABLE_SIZE (1 << FPROBE_HASH_BITS) 25 26 #define SIZE_IN_LONG(x) ((x + sizeof(long) - 1) >> (sizeof(long) == 8 ? 3 : 2)) 27 28 /* 29 * fprobe_table: hold 'fprobe_hlist::hlist' for checking the fprobe still 30 * exists. The key is the address of fprobe instance. 31 * fprobe_ip_table: hold 'fprobe_hlist::array[*]' for searching the fprobe 32 * instance related to the funciton address. The key is the ftrace IP 33 * address. 34 * 35 * When unregistering the fprobe, fprobe_hlist::fp and fprobe_hlist::array[*].fp 36 * are set NULL and delete those from both hash tables (by hlist_del_rcu). 37 * After an RCU grace period, the fprobe_hlist itself will be released. 38 * 39 * fprobe_table and fprobe_ip_table can be accessed from either 40 * - Normal hlist traversal and RCU add/del under 'fprobe_mutex' is held. 41 * - RCU hlist traversal under disabling preempt 42 */ 43 static struct hlist_head fprobe_table[FPROBE_TABLE_SIZE]; 44 static struct hlist_head fprobe_ip_table[FPROBE_IP_TABLE_SIZE]; 45 static DEFINE_MUTEX(fprobe_mutex); 46 47 /* 48 * Find first fprobe in the hlist. It will be iterated twice in the entry 49 * probe, once for correcting the total required size, the second time is 50 * calling back the user handlers. 51 * Thus the hlist in the fprobe_table must be sorted and new probe needs to 52 * be added *before* the first fprobe. 53 */ 54 static struct fprobe_hlist_node *find_first_fprobe_node(unsigned long ip) 55 { 56 struct fprobe_hlist_node *node; 57 struct hlist_head *head; 58 59 head = &fprobe_ip_table[hash_ptr((void *)ip, FPROBE_IP_HASH_BITS)]; 60 hlist_for_each_entry_rcu(node, head, hlist, 61 lockdep_is_held(&fprobe_mutex)) { 62 if (node->addr == ip) 63 return node; 64 } 65 return NULL; 66 } 67 NOKPROBE_SYMBOL(find_first_fprobe_node); 68 69 /* Node insertion and deletion requires the fprobe_mutex */ 70 static void insert_fprobe_node(struct fprobe_hlist_node *node) 71 { 72 unsigned long ip = node->addr; 73 struct fprobe_hlist_node *next; 74 struct hlist_head *head; 75 76 lockdep_assert_held(&fprobe_mutex); 77 78 next = find_first_fprobe_node(ip); 79 if (next) { 80 hlist_add_before_rcu(&node->hlist, &next->hlist); 81 return; 82 } 83 head = &fprobe_ip_table[hash_ptr((void *)ip, FPROBE_IP_HASH_BITS)]; 84 hlist_add_head_rcu(&node->hlist, head); 85 } 86 87 /* Return true if there are synonims */ 88 static bool delete_fprobe_node(struct fprobe_hlist_node *node) 89 { 90 lockdep_assert_held(&fprobe_mutex); 91 92 WRITE_ONCE(node->fp, NULL); 93 hlist_del_rcu(&node->hlist); 94 return !!find_first_fprobe_node(node->addr); 95 } 96 97 /* Check existence of the fprobe */ 98 static bool is_fprobe_still_exist(struct fprobe *fp) 99 { 100 struct hlist_head *head; 101 struct fprobe_hlist *fph; 102 103 head = &fprobe_table[hash_ptr(fp, FPROBE_HASH_BITS)]; 104 hlist_for_each_entry_rcu(fph, head, hlist, 105 lockdep_is_held(&fprobe_mutex)) { 106 if (fph->fp == fp) 107 return true; 108 } 109 return false; 110 } 111 NOKPROBE_SYMBOL(is_fprobe_still_exist); 112 113 static int add_fprobe_hash(struct fprobe *fp) 114 { 115 struct fprobe_hlist *fph = fp->hlist_array; 116 struct hlist_head *head; 117 118 lockdep_assert_held(&fprobe_mutex); 119 120 if (WARN_ON_ONCE(!fph)) 121 return -EINVAL; 122 123 if (is_fprobe_still_exist(fp)) 124 return -EEXIST; 125 126 head = &fprobe_table[hash_ptr(fp, FPROBE_HASH_BITS)]; 127 hlist_add_head_rcu(&fp->hlist_array->hlist, head); 128 return 0; 129 } 130 131 static int del_fprobe_hash(struct fprobe *fp) 132 { 133 struct fprobe_hlist *fph = fp->hlist_array; 134 135 lockdep_assert_held(&fprobe_mutex); 136 137 if (WARN_ON_ONCE(!fph)) 138 return -EINVAL; 139 140 if (!is_fprobe_still_exist(fp)) 141 return -ENOENT; 142 143 fph->fp = NULL; 144 hlist_del_rcu(&fph->hlist); 145 return 0; 146 } 147 148 #ifdef ARCH_DEFINE_ENCODE_FPROBE_HEADER 149 150 /* The arch should encode fprobe_header info into one unsigned long */ 151 #define FPROBE_HEADER_SIZE_IN_LONG 1 152 153 static inline bool write_fprobe_header(unsigned long *stack, 154 struct fprobe *fp, unsigned int size_words) 155 { 156 if (WARN_ON_ONCE(size_words > MAX_FPROBE_DATA_SIZE_WORD || 157 !arch_fprobe_header_encodable(fp))) 158 return false; 159 160 *stack = arch_encode_fprobe_header(fp, size_words); 161 return true; 162 } 163 164 static inline void read_fprobe_header(unsigned long *stack, 165 struct fprobe **fp, unsigned int *size_words) 166 { 167 *fp = arch_decode_fprobe_header_fp(*stack); 168 *size_words = arch_decode_fprobe_header_size(*stack); 169 } 170 171 #else 172 173 /* Generic fprobe_header */ 174 struct __fprobe_header { 175 struct fprobe *fp; 176 unsigned long size_words; 177 } __packed; 178 179 #define FPROBE_HEADER_SIZE_IN_LONG SIZE_IN_LONG(sizeof(struct __fprobe_header)) 180 181 static inline bool write_fprobe_header(unsigned long *stack, 182 struct fprobe *fp, unsigned int size_words) 183 { 184 struct __fprobe_header *fph = (struct __fprobe_header *)stack; 185 186 if (WARN_ON_ONCE(size_words > MAX_FPROBE_DATA_SIZE_WORD)) 187 return false; 188 189 fph->fp = fp; 190 fph->size_words = size_words; 191 return true; 192 } 193 194 static inline void read_fprobe_header(unsigned long *stack, 195 struct fprobe **fp, unsigned int *size_words) 196 { 197 struct __fprobe_header *fph = (struct __fprobe_header *)stack; 198 199 *fp = fph->fp; 200 *size_words = fph->size_words; 201 } 202 203 #endif 204 205 /* 206 * fprobe shadow stack management: 207 * Since fprobe shares a single fgraph_ops, it needs to share the stack entry 208 * among the probes on the same function exit. Note that a new probe can be 209 * registered before a target function is returning, we can not use the hash 210 * table to find the corresponding probes. Thus the probe address is stored on 211 * the shadow stack with its entry data size. 212 * 213 */ 214 static inline int __fprobe_handler(unsigned long ip, unsigned long parent_ip, 215 struct fprobe *fp, struct ftrace_regs *fregs, 216 void *data) 217 { 218 if (!fp->entry_handler) 219 return 0; 220 221 return fp->entry_handler(fp, ip, parent_ip, fregs, data); 222 } 223 224 static inline int __fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip, 225 struct fprobe *fp, struct ftrace_regs *fregs, 226 void *data) 227 { 228 int ret; 229 /* 230 * This user handler is shared with other kprobes and is not expected to be 231 * called recursively. So if any other kprobe handler is running, this will 232 * exit as kprobe does. See the section 'Share the callbacks with kprobes' 233 * in Documentation/trace/fprobe.rst for more information. 234 */ 235 if (unlikely(kprobe_running())) { 236 fp->nmissed++; 237 return 0; 238 } 239 240 kprobe_busy_begin(); 241 ret = __fprobe_handler(ip, parent_ip, fp, fregs, data); 242 kprobe_busy_end(); 243 return ret; 244 } 245 246 static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops, 247 struct ftrace_regs *fregs) 248 { 249 struct fprobe_hlist_node *node, *first; 250 unsigned long *fgraph_data = NULL; 251 unsigned long func = trace->func; 252 unsigned long ret_ip; 253 int reserved_words; 254 struct fprobe *fp; 255 int used, ret; 256 257 if (WARN_ON_ONCE(!fregs)) 258 return 0; 259 260 first = node = find_first_fprobe_node(func); 261 if (unlikely(!first)) 262 return 0; 263 264 reserved_words = 0; 265 hlist_for_each_entry_from_rcu(node, hlist) { 266 if (node->addr != func) 267 break; 268 fp = READ_ONCE(node->fp); 269 if (!fp || !fp->exit_handler) 270 continue; 271 /* 272 * Since fprobe can be enabled until the next loop, we ignore the 273 * fprobe's disabled flag in this loop. 274 */ 275 reserved_words += 276 FPROBE_HEADER_SIZE_IN_LONG + SIZE_IN_LONG(fp->entry_data_size); 277 } 278 node = first; 279 if (reserved_words) { 280 fgraph_data = fgraph_reserve_data(gops->idx, reserved_words * sizeof(long)); 281 if (unlikely(!fgraph_data)) { 282 hlist_for_each_entry_from_rcu(node, hlist) { 283 if (node->addr != func) 284 break; 285 fp = READ_ONCE(node->fp); 286 if (fp && !fprobe_disabled(fp)) 287 fp->nmissed++; 288 } 289 return 0; 290 } 291 } 292 293 /* 294 * TODO: recursion detection has been done in the fgraph. Thus we need 295 * to add a callback to increment missed counter. 296 */ 297 ret_ip = ftrace_regs_get_return_address(fregs); 298 used = 0; 299 hlist_for_each_entry_from_rcu(node, hlist) { 300 int data_size; 301 void *data; 302 303 if (node->addr != func) 304 break; 305 fp = READ_ONCE(node->fp); 306 if (!fp || fprobe_disabled(fp)) 307 continue; 308 309 data_size = fp->entry_data_size; 310 if (data_size && fp->exit_handler) 311 data = fgraph_data + used + FPROBE_HEADER_SIZE_IN_LONG; 312 else 313 data = NULL; 314 315 if (fprobe_shared_with_kprobes(fp)) 316 ret = __fprobe_kprobe_handler(func, ret_ip, fp, fregs, data); 317 else 318 ret = __fprobe_handler(func, ret_ip, fp, fregs, data); 319 320 /* If entry_handler returns !0, nmissed is not counted but skips exit_handler. */ 321 if (!ret && fp->exit_handler) { 322 int size_words = SIZE_IN_LONG(data_size); 323 324 if (write_fprobe_header(&fgraph_data[used], fp, size_words)) 325 used += FPROBE_HEADER_SIZE_IN_LONG + size_words; 326 } 327 } 328 if (used < reserved_words) 329 memset(fgraph_data + used, 0, reserved_words - used); 330 331 /* If any exit_handler is set, data must be used. */ 332 return used != 0; 333 } 334 NOKPROBE_SYMBOL(fprobe_entry); 335 336 static void fprobe_return(struct ftrace_graph_ret *trace, 337 struct fgraph_ops *gops, 338 struct ftrace_regs *fregs) 339 { 340 unsigned long *fgraph_data = NULL; 341 unsigned long ret_ip; 342 struct fprobe *fp; 343 int size, curr; 344 int size_words; 345 346 fgraph_data = (unsigned long *)fgraph_retrieve_data(gops->idx, &size); 347 if (WARN_ON_ONCE(!fgraph_data)) 348 return; 349 size_words = SIZE_IN_LONG(size); 350 ret_ip = ftrace_regs_get_instruction_pointer(fregs); 351 352 preempt_disable(); 353 354 curr = 0; 355 while (size_words > curr) { 356 read_fprobe_header(&fgraph_data[curr], &fp, &size); 357 if (!fp) 358 break; 359 curr += FPROBE_HEADER_SIZE_IN_LONG; 360 if (is_fprobe_still_exist(fp) && !fprobe_disabled(fp)) { 361 if (WARN_ON_ONCE(curr + size > size_words)) 362 break; 363 fp->exit_handler(fp, trace->func, ret_ip, fregs, 364 size ? fgraph_data + curr : NULL); 365 } 366 curr += size; 367 } 368 preempt_enable(); 369 } 370 NOKPROBE_SYMBOL(fprobe_return); 371 372 static struct fgraph_ops fprobe_graph_ops = { 373 .entryfunc = fprobe_entry, 374 .retfunc = fprobe_return, 375 }; 376 static int fprobe_graph_active; 377 378 /* Add @addrs to the ftrace filter and register fgraph if needed. */ 379 static int fprobe_graph_add_ips(unsigned long *addrs, int num) 380 { 381 int ret; 382 383 lockdep_assert_held(&fprobe_mutex); 384 385 ret = ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 0, 0); 386 if (ret) 387 return ret; 388 389 if (!fprobe_graph_active) { 390 ret = register_ftrace_graph(&fprobe_graph_ops); 391 if (WARN_ON_ONCE(ret)) { 392 ftrace_free_filter(&fprobe_graph_ops.ops); 393 return ret; 394 } 395 } 396 fprobe_graph_active++; 397 return 0; 398 } 399 400 /* Remove @addrs from the ftrace filter and unregister fgraph if possible. */ 401 static void fprobe_graph_remove_ips(unsigned long *addrs, int num) 402 { 403 lockdep_assert_held(&fprobe_mutex); 404 405 fprobe_graph_active--; 406 if (!fprobe_graph_active) { 407 /* Q: should we unregister it ? */ 408 unregister_ftrace_graph(&fprobe_graph_ops); 409 return; 410 } 411 412 ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0); 413 } 414 415 static int symbols_cmp(const void *a, const void *b) 416 { 417 const char **str_a = (const char **) a; 418 const char **str_b = (const char **) b; 419 420 return strcmp(*str_a, *str_b); 421 } 422 423 /* Convert ftrace location address from symbols */ 424 static unsigned long *get_ftrace_locations(const char **syms, int num) 425 { 426 unsigned long *addrs; 427 428 /* Convert symbols to symbol address */ 429 addrs = kcalloc(num, sizeof(*addrs), GFP_KERNEL); 430 if (!addrs) 431 return ERR_PTR(-ENOMEM); 432 433 /* ftrace_lookup_symbols expects sorted symbols */ 434 sort(syms, num, sizeof(*syms), symbols_cmp, NULL); 435 436 if (!ftrace_lookup_symbols(syms, num, addrs)) 437 return addrs; 438 439 kfree(addrs); 440 return ERR_PTR(-ENOENT); 441 } 442 443 struct filter_match_data { 444 const char *filter; 445 const char *notfilter; 446 size_t index; 447 size_t size; 448 unsigned long *addrs; 449 }; 450 451 static int filter_match_callback(void *data, const char *name, unsigned long addr) 452 { 453 struct filter_match_data *match = data; 454 455 if (!glob_match(match->filter, name) || 456 (match->notfilter && glob_match(match->notfilter, name))) 457 return 0; 458 459 if (!ftrace_location(addr)) 460 return 0; 461 462 if (match->addrs) 463 match->addrs[match->index] = addr; 464 465 match->index++; 466 return match->index == match->size; 467 } 468 469 /* 470 * Make IP list from the filter/no-filter glob patterns. 471 * Return the number of matched symbols, or -ENOENT. 472 */ 473 static int ip_list_from_filter(const char *filter, const char *notfilter, 474 unsigned long *addrs, size_t size) 475 { 476 struct filter_match_data match = { .filter = filter, .notfilter = notfilter, 477 .index = 0, .size = size, .addrs = addrs}; 478 int ret; 479 480 ret = kallsyms_on_each_symbol(filter_match_callback, &match); 481 if (ret < 0) 482 return ret; 483 ret = module_kallsyms_on_each_symbol(NULL, filter_match_callback, &match); 484 if (ret < 0) 485 return ret; 486 487 return match.index ?: -ENOENT; 488 } 489 490 static void fprobe_fail_cleanup(struct fprobe *fp) 491 { 492 kfree(fp->hlist_array); 493 fp->hlist_array = NULL; 494 } 495 496 /* Initialize the fprobe data structure. */ 497 static int fprobe_init(struct fprobe *fp, unsigned long *addrs, int num) 498 { 499 struct fprobe_hlist *hlist_array; 500 unsigned long addr; 501 int size, i; 502 503 if (!fp || !addrs || num <= 0) 504 return -EINVAL; 505 506 size = ALIGN(fp->entry_data_size, sizeof(long)); 507 if (size > MAX_FPROBE_DATA_SIZE) 508 return -E2BIG; 509 fp->entry_data_size = size; 510 511 hlist_array = kzalloc(struct_size(hlist_array, array, num), GFP_KERNEL); 512 if (!hlist_array) 513 return -ENOMEM; 514 515 fp->nmissed = 0; 516 517 hlist_array->size = num; 518 fp->hlist_array = hlist_array; 519 hlist_array->fp = fp; 520 for (i = 0; i < num; i++) { 521 hlist_array->array[i].fp = fp; 522 addr = ftrace_location(addrs[i]); 523 if (!addr) { 524 fprobe_fail_cleanup(fp); 525 return -ENOENT; 526 } 527 hlist_array->array[i].addr = addr; 528 } 529 return 0; 530 } 531 532 #define FPROBE_IPS_MAX INT_MAX 533 534 /** 535 * register_fprobe() - Register fprobe to ftrace by pattern. 536 * @fp: A fprobe data structure to be registered. 537 * @filter: A wildcard pattern of probed symbols. 538 * @notfilter: A wildcard pattern of NOT probed symbols. 539 * 540 * Register @fp to ftrace for enabling the probe on the symbols matched to @filter. 541 * If @notfilter is not NULL, the symbols matched the @notfilter are not probed. 542 * 543 * Return 0 if @fp is registered successfully, -errno if not. 544 */ 545 int register_fprobe(struct fprobe *fp, const char *filter, const char *notfilter) 546 { 547 unsigned long *addrs; 548 int ret; 549 550 if (!fp || !filter) 551 return -EINVAL; 552 553 ret = ip_list_from_filter(filter, notfilter, NULL, FPROBE_IPS_MAX); 554 if (ret < 0) 555 return ret; 556 557 addrs = kcalloc(ret, sizeof(unsigned long), GFP_KERNEL); 558 if (!addrs) 559 return -ENOMEM; 560 ret = ip_list_from_filter(filter, notfilter, addrs, ret); 561 if (ret > 0) 562 ret = register_fprobe_ips(fp, addrs, ret); 563 564 kfree(addrs); 565 return ret; 566 } 567 EXPORT_SYMBOL_GPL(register_fprobe); 568 569 /** 570 * register_fprobe_ips() - Register fprobe to ftrace by address. 571 * @fp: A fprobe data structure to be registered. 572 * @addrs: An array of target function address. 573 * @num: The number of entries of @addrs. 574 * 575 * Register @fp to ftrace for enabling the probe on the address given by @addrs. 576 * The @addrs must be the addresses of ftrace location address, which may be 577 * the symbol address + arch-dependent offset. 578 * If you unsure what this mean, please use other registration functions. 579 * 580 * Return 0 if @fp is registered successfully, -errno if not. 581 */ 582 int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num) 583 { 584 struct fprobe_hlist *hlist_array; 585 int ret, i; 586 587 ret = fprobe_init(fp, addrs, num); 588 if (ret) 589 return ret; 590 591 mutex_lock(&fprobe_mutex); 592 593 hlist_array = fp->hlist_array; 594 ret = fprobe_graph_add_ips(addrs, num); 595 if (!ret) { 596 add_fprobe_hash(fp); 597 for (i = 0; i < hlist_array->size; i++) 598 insert_fprobe_node(&hlist_array->array[i]); 599 } 600 mutex_unlock(&fprobe_mutex); 601 602 if (ret) 603 fprobe_fail_cleanup(fp); 604 605 return ret; 606 } 607 EXPORT_SYMBOL_GPL(register_fprobe_ips); 608 609 /** 610 * register_fprobe_syms() - Register fprobe to ftrace by symbols. 611 * @fp: A fprobe data structure to be registered. 612 * @syms: An array of target symbols. 613 * @num: The number of entries of @syms. 614 * 615 * Register @fp to the symbols given by @syms array. This will be useful if 616 * you are sure the symbols exist in the kernel. 617 * 618 * Return 0 if @fp is registered successfully, -errno if not. 619 */ 620 int register_fprobe_syms(struct fprobe *fp, const char **syms, int num) 621 { 622 unsigned long *addrs; 623 int ret; 624 625 if (!fp || !syms || num <= 0) 626 return -EINVAL; 627 628 addrs = get_ftrace_locations(syms, num); 629 if (IS_ERR(addrs)) 630 return PTR_ERR(addrs); 631 632 ret = register_fprobe_ips(fp, addrs, num); 633 634 kfree(addrs); 635 636 return ret; 637 } 638 EXPORT_SYMBOL_GPL(register_fprobe_syms); 639 640 bool fprobe_is_registered(struct fprobe *fp) 641 { 642 if (!fp || !fp->hlist_array) 643 return false; 644 return true; 645 } 646 647 /** 648 * unregister_fprobe() - Unregister fprobe. 649 * @fp: A fprobe data structure to be unregistered. 650 * 651 * Unregister fprobe (and remove ftrace hooks from the function entries). 652 * 653 * Return 0 if @fp is unregistered successfully, -errno if not. 654 */ 655 int unregister_fprobe(struct fprobe *fp) 656 { 657 struct fprobe_hlist *hlist_array; 658 unsigned long *addrs = NULL; 659 int ret = 0, i, count; 660 661 mutex_lock(&fprobe_mutex); 662 if (!fp || !is_fprobe_still_exist(fp)) { 663 ret = -EINVAL; 664 goto out; 665 } 666 667 hlist_array = fp->hlist_array; 668 addrs = kcalloc(hlist_array->size, sizeof(unsigned long), GFP_KERNEL); 669 if (!addrs) { 670 ret = -ENOMEM; /* TODO: Fallback to one-by-one loop */ 671 goto out; 672 } 673 674 /* Remove non-synonim ips from table and hash */ 675 count = 0; 676 for (i = 0; i < hlist_array->size; i++) { 677 if (!delete_fprobe_node(&hlist_array->array[i])) 678 addrs[count++] = hlist_array->array[i].addr; 679 } 680 del_fprobe_hash(fp); 681 682 if (count) 683 fprobe_graph_remove_ips(addrs, count); 684 685 kfree_rcu(hlist_array, rcu); 686 fp->hlist_array = NULL; 687 688 out: 689 mutex_unlock(&fprobe_mutex); 690 691 kfree(addrs); 692 return ret; 693 } 694 EXPORT_SYMBOL_GPL(unregister_fprobe); 695