1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * fprobe - Simple ftrace probe wrapper for function entry.
4 */
5 #define pr_fmt(fmt) "fprobe: " fmt
6
7 #include <linux/cleanup.h>
8 #include <linux/err.h>
9 #include <linux/fprobe.h>
10 #include <linux/kallsyms.h>
11 #include <linux/kprobes.h>
12 #include <linux/list.h>
13 #include <linux/mutex.h>
14 #include <linux/rhashtable.h>
15 #include <linux/slab.h>
16 #include <linux/sort.h>
17
18 #include <asm/fprobe.h>
19
20 #include "trace.h"
21
22 #define FPROBE_IP_HASH_BITS 8
23 #define FPROBE_IP_TABLE_SIZE (1 << FPROBE_IP_HASH_BITS)
24
25 #define FPROBE_HASH_BITS 6
26 #define FPROBE_TABLE_SIZE (1 << FPROBE_HASH_BITS)
27
28 #define SIZE_IN_LONG(x) ((x + sizeof(long) - 1) >> (sizeof(long) == 8 ? 3 : 2))
29
30 /*
31 * fprobe_table: hold 'fprobe_hlist::hlist' for checking the fprobe still
32 * exists. The key is the address of fprobe instance.
33 * fprobe_ip_table: hold 'fprobe_hlist::array[*]' for searching the fprobe
34 * instance related to the function address. The key is the ftrace IP
35 * address.
36 *
37 * When unregistering the fprobe, fprobe_hlist::fp and fprobe_hlist::array[*].fp
38 * are set NULL and delete those from both hash tables (by hlist_del_rcu).
39 * After an RCU grace period, the fprobe_hlist itself will be released.
40 *
41 * fprobe_table and fprobe_ip_table can be accessed from either
42 * - Normal hlist traversal and RCU add/del under 'fprobe_mutex' is held.
43 * - RCU hlist traversal under disabling preempt
44 */
45 static struct hlist_head fprobe_table[FPROBE_TABLE_SIZE];
46 static struct rhltable fprobe_ip_table;
47 static DEFINE_MUTEX(fprobe_mutex);
48 static struct fgraph_ops fprobe_graph_ops;
49
fprobe_node_hashfn(const void * data,u32 len,u32 seed)50 static u32 fprobe_node_hashfn(const void *data, u32 len, u32 seed)
51 {
52 return hash_ptr(*(unsigned long **)data, 32);
53 }
54
fprobe_node_cmp(struct rhashtable_compare_arg * arg,const void * ptr)55 static int fprobe_node_cmp(struct rhashtable_compare_arg *arg,
56 const void *ptr)
57 {
58 unsigned long key = *(unsigned long *)arg->key;
59 const struct fprobe_hlist_node *n = ptr;
60
61 return n->addr != key;
62 }
63
fprobe_node_obj_hashfn(const void * data,u32 len,u32 seed)64 static u32 fprobe_node_obj_hashfn(const void *data, u32 len, u32 seed)
65 {
66 const struct fprobe_hlist_node *n = data;
67
68 return hash_ptr((void *)n->addr, 32);
69 }
70
71 static const struct rhashtable_params fprobe_rht_params = {
72 .head_offset = offsetof(struct fprobe_hlist_node, hlist),
73 .key_offset = offsetof(struct fprobe_hlist_node, addr),
74 .key_len = sizeof_field(struct fprobe_hlist_node, addr),
75 .hashfn = fprobe_node_hashfn,
76 .obj_hashfn = fprobe_node_obj_hashfn,
77 .obj_cmpfn = fprobe_node_cmp,
78 .automatic_shrinking = true,
79 };
80
81 /* Node insertion and deletion requires the fprobe_mutex */
__insert_fprobe_node(struct fprobe_hlist_node * node,struct fprobe * fp)82 static int __insert_fprobe_node(struct fprobe_hlist_node *node, struct fprobe *fp)
83 {
84 int ret;
85
86 lockdep_assert_held(&fprobe_mutex);
87
88 ret = rhltable_insert(&fprobe_ip_table, &node->hlist, fprobe_rht_params);
89 /* Set the fprobe pointer if insertion was successful. */
90 if (!ret)
91 WRITE_ONCE(node->fp, fp);
92 return ret;
93 }
94
__delete_fprobe_node(struct fprobe_hlist_node * node)95 static void __delete_fprobe_node(struct fprobe_hlist_node *node)
96 {
97 lockdep_assert_held(&fprobe_mutex);
98
99 /* Avoid double deleting and non-inserted nodes */
100 if (READ_ONCE(node->fp) != NULL) {
101 WRITE_ONCE(node->fp, NULL);
102 rhltable_remove(&fprobe_ip_table, &node->hlist,
103 fprobe_rht_params);
104 }
105 }
106
107 /* Check existence of the fprobe */
fprobe_registered(struct fprobe * fp)108 static bool fprobe_registered(struct fprobe *fp)
109 {
110 struct hlist_head *head;
111 struct fprobe_hlist *fph;
112
113 head = &fprobe_table[hash_ptr(fp, FPROBE_HASH_BITS)];
114 hlist_for_each_entry_rcu(fph, head, hlist,
115 lockdep_is_held(&fprobe_mutex)) {
116 if (fph->fp == fp)
117 return true;
118 }
119 return false;
120 }
121 NOKPROBE_SYMBOL(fprobe_registered);
122
add_fprobe_hash(struct fprobe * fp)123 static int add_fprobe_hash(struct fprobe *fp)
124 {
125 struct fprobe_hlist *fph = fp->hlist_array;
126 struct hlist_head *head;
127
128 lockdep_assert_held(&fprobe_mutex);
129
130 if (WARN_ON_ONCE(!fph))
131 return -EINVAL;
132
133 head = &fprobe_table[hash_ptr(fp, FPROBE_HASH_BITS)];
134 hlist_add_head_rcu(&fp->hlist_array->hlist, head);
135 return 0;
136 }
137
del_fprobe_hash(struct fprobe * fp)138 static int del_fprobe_hash(struct fprobe *fp)
139 {
140 struct fprobe_hlist *fph = fp->hlist_array;
141
142 lockdep_assert_held(&fprobe_mutex);
143
144 if (WARN_ON_ONCE(!fph))
145 return -EINVAL;
146
147 if (!fprobe_registered(fp))
148 return -ENOENT;
149
150 fph->fp = NULL;
151 hlist_del_rcu(&fph->hlist);
152 return 0;
153 }
154
155 #ifdef ARCH_DEFINE_ENCODE_FPROBE_HEADER
156
157 /* The arch should encode fprobe_header info into one unsigned long */
158 #define FPROBE_HEADER_SIZE_IN_LONG 1
159
write_fprobe_header(unsigned long * stack,struct fprobe * fp,unsigned int size_words)160 static inline bool write_fprobe_header(unsigned long *stack,
161 struct fprobe *fp, unsigned int size_words)
162 {
163 if (WARN_ON_ONCE(size_words > MAX_FPROBE_DATA_SIZE_WORD ||
164 !arch_fprobe_header_encodable(fp)))
165 return false;
166
167 *stack = arch_encode_fprobe_header(fp, size_words);
168 return true;
169 }
170
read_fprobe_header(unsigned long * stack,struct fprobe ** fp,unsigned int * size_words)171 static inline void read_fprobe_header(unsigned long *stack,
172 struct fprobe **fp, unsigned int *size_words)
173 {
174 *fp = arch_decode_fprobe_header_fp(*stack);
175 *size_words = arch_decode_fprobe_header_size(*stack);
176 }
177
178 #else
179
180 /* Generic fprobe_header */
181 struct __fprobe_header {
182 struct fprobe *fp;
183 unsigned long size_words;
184 } __packed;
185
186 #define FPROBE_HEADER_SIZE_IN_LONG SIZE_IN_LONG(sizeof(struct __fprobe_header))
187
write_fprobe_header(unsigned long * stack,struct fprobe * fp,unsigned int size_words)188 static inline bool write_fprobe_header(unsigned long *stack,
189 struct fprobe *fp, unsigned int size_words)
190 {
191 struct __fprobe_header *fph = (struct __fprobe_header *)stack;
192
193 if (WARN_ON_ONCE(size_words > MAX_FPROBE_DATA_SIZE_WORD))
194 return false;
195
196 fph->fp = fp;
197 fph->size_words = size_words;
198 return true;
199 }
200
read_fprobe_header(unsigned long * stack,struct fprobe ** fp,unsigned int * size_words)201 static inline void read_fprobe_header(unsigned long *stack,
202 struct fprobe **fp, unsigned int *size_words)
203 {
204 struct __fprobe_header *fph = (struct __fprobe_header *)stack;
205
206 *fp = fph->fp;
207 *size_words = fph->size_words;
208 }
209
210 #endif
211
212 /*
213 * fprobe shadow stack management:
214 * Since fprobe shares a single fgraph_ops, it needs to share the stack entry
215 * among the probes on the same function exit. Note that a new probe can be
216 * registered before a target function is returning, we can not use the hash
217 * table to find the corresponding probes. Thus the probe address is stored on
218 * the shadow stack with its entry data size.
219 *
220 */
__fprobe_handler(unsigned long ip,unsigned long parent_ip,struct fprobe * fp,struct ftrace_regs * fregs,void * data)221 static inline int __fprobe_handler(unsigned long ip, unsigned long parent_ip,
222 struct fprobe *fp, struct ftrace_regs *fregs,
223 void *data)
224 {
225 if (!fp->entry_handler)
226 return 0;
227
228 return fp->entry_handler(fp, ip, parent_ip, fregs, data);
229 }
230
__fprobe_kprobe_handler(unsigned long ip,unsigned long parent_ip,struct fprobe * fp,struct ftrace_regs * fregs,void * data)231 static inline int __fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip,
232 struct fprobe *fp, struct ftrace_regs *fregs,
233 void *data)
234 {
235 int ret;
236 /*
237 * This user handler is shared with other kprobes and is not expected to be
238 * called recursively. So if any other kprobe handler is running, this will
239 * exit as kprobe does. See the section 'Share the callbacks with kprobes'
240 * in Documentation/trace/fprobe.rst for more information.
241 */
242 if (unlikely(kprobe_running())) {
243 fp->nmissed++;
244 return 0;
245 }
246
247 kprobe_busy_begin();
248 ret = __fprobe_handler(ip, parent_ip, fp, fregs, data);
249 kprobe_busy_end();
250 return ret;
251 }
252
253 static int fprobe_fgraph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
254 struct ftrace_regs *fregs);
255 static void fprobe_return(struct ftrace_graph_ret *trace,
256 struct fgraph_ops *gops,
257 struct ftrace_regs *fregs);
258
259 static struct fgraph_ops fprobe_graph_ops = {
260 .entryfunc = fprobe_fgraph_entry,
261 .retfunc = fprobe_return,
262 };
263 /* Number of fgraph fprobe nodes */
264 static int nr_fgraph_fprobes;
265 /* Is fprobe_graph_ops registered? */
266 static bool fprobe_graph_registered;
267
268 /* Add @addrs to the ftrace filter and register fgraph if needed. */
fprobe_graph_add_ips(unsigned long * addrs,int num)269 static int fprobe_graph_add_ips(unsigned long *addrs, int num)
270 {
271 int ret;
272
273 lockdep_assert_held(&fprobe_mutex);
274
275 ret = ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 0, 0);
276 if (ret)
277 return ret;
278
279 if (!fprobe_graph_registered) {
280 ret = register_ftrace_graph(&fprobe_graph_ops);
281 if (WARN_ON_ONCE(ret)) {
282 ftrace_free_filter(&fprobe_graph_ops.ops);
283 return ret;
284 }
285 fprobe_graph_registered = true;
286 }
287 return 0;
288 }
289
__fprobe_graph_unregister(void)290 static void __fprobe_graph_unregister(void)
291 {
292 if (fprobe_graph_registered) {
293 unregister_ftrace_graph(&fprobe_graph_ops);
294 ftrace_free_filter(&fprobe_graph_ops.ops);
295 fprobe_graph_registered = false;
296 }
297 }
298
299 /* Remove @addrs from the ftrace filter and unregister fgraph if possible. */
fprobe_graph_remove_ips(unsigned long * addrs,int num)300 static void fprobe_graph_remove_ips(unsigned long *addrs, int num)
301 {
302 lockdep_assert_held(&fprobe_mutex);
303
304 if (!nr_fgraph_fprobes)
305 __fprobe_graph_unregister();
306 else if (num)
307 ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0);
308 }
309
310 #if defined(CONFIG_DYNAMIC_FTRACE_WITH_ARGS) || defined(CONFIG_DYNAMIC_FTRACE_WITH_REGS)
311
312 /* ftrace_ops callback, this processes fprobes which have only entry_handler. */
fprobe_ftrace_entry(unsigned long ip,unsigned long parent_ip,struct ftrace_ops * ops,struct ftrace_regs * fregs)313 static void fprobe_ftrace_entry(unsigned long ip, unsigned long parent_ip,
314 struct ftrace_ops *ops, struct ftrace_regs *fregs)
315 {
316 struct fprobe_hlist_node *node;
317 struct rhlist_head *head, *pos;
318 struct fprobe *fp;
319 int bit;
320
321 bit = ftrace_test_recursion_trylock(ip, parent_ip);
322 if (bit < 0)
323 return;
324
325 /*
326 * ftrace_test_recursion_trylock() disables preemption, but
327 * rhltable_lookup() checks whether rcu_read_lcok is held.
328 * So we take rcu_read_lock() here.
329 */
330 rcu_read_lock();
331 head = rhltable_lookup(&fprobe_ip_table, &ip, fprobe_rht_params);
332
333 rhl_for_each_entry_rcu(node, pos, head, hlist) {
334 if (node->addr != ip)
335 break;
336 fp = READ_ONCE(node->fp);
337 if (unlikely(!fp || fprobe_disabled(fp) || fp->exit_handler))
338 continue;
339
340 if (fprobe_shared_with_kprobes(fp))
341 __fprobe_kprobe_handler(ip, parent_ip, fp, fregs, NULL);
342 else
343 __fprobe_handler(ip, parent_ip, fp, fregs, NULL);
344 }
345 rcu_read_unlock();
346 ftrace_test_recursion_unlock(bit);
347 }
348 NOKPROBE_SYMBOL(fprobe_ftrace_entry);
349
350 static struct ftrace_ops fprobe_ftrace_ops = {
351 .func = fprobe_ftrace_entry,
352 .flags = FTRACE_OPS_FL_SAVE_ARGS,
353 };
354 /* Number of ftrace fprobe nodes */
355 static int nr_ftrace_fprobes;
356 /* Is fprobe_ftrace_ops registered? */
357 static bool fprobe_ftrace_registered;
358
fprobe_ftrace_add_ips(unsigned long * addrs,int num)359 static int fprobe_ftrace_add_ips(unsigned long *addrs, int num)
360 {
361 int ret;
362
363 lockdep_assert_held(&fprobe_mutex);
364
365 ret = ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 0, 0);
366 if (ret)
367 return ret;
368
369 if (!fprobe_ftrace_registered) {
370 ret = register_ftrace_function(&fprobe_ftrace_ops);
371 if (ret) {
372 ftrace_free_filter(&fprobe_ftrace_ops);
373 return ret;
374 }
375 fprobe_ftrace_registered = true;
376 }
377 return 0;
378 }
379
__fprobe_ftrace_unregister(void)380 static void __fprobe_ftrace_unregister(void)
381 {
382 if (fprobe_ftrace_registered) {
383 unregister_ftrace_function(&fprobe_ftrace_ops);
384 ftrace_free_filter(&fprobe_ftrace_ops);
385 fprobe_ftrace_registered = false;
386 }
387 }
388
fprobe_ftrace_remove_ips(unsigned long * addrs,int num)389 static void fprobe_ftrace_remove_ips(unsigned long *addrs, int num)
390 {
391 lockdep_assert_held(&fprobe_mutex);
392
393 if (!nr_ftrace_fprobes)
394 __fprobe_ftrace_unregister();
395 else if (num)
396 ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 1, 0);
397 }
398
fprobe_is_ftrace(struct fprobe * fp)399 static bool fprobe_is_ftrace(struct fprobe *fp)
400 {
401 return !fp->exit_handler;
402 }
403
404 /* Node insertion and deletion requires the fprobe_mutex */
insert_fprobe_node(struct fprobe_hlist_node * node,struct fprobe * fp)405 static int insert_fprobe_node(struct fprobe_hlist_node *node, struct fprobe *fp)
406 {
407 int ret;
408
409 lockdep_assert_held(&fprobe_mutex);
410
411 ret = __insert_fprobe_node(node, fp);
412 if (!ret) {
413 if (fprobe_is_ftrace(fp))
414 nr_ftrace_fprobes++;
415 else
416 nr_fgraph_fprobes++;
417 }
418
419 return ret;
420 }
421
delete_fprobe_node(struct fprobe_hlist_node * node)422 static void delete_fprobe_node(struct fprobe_hlist_node *node)
423 {
424 struct fprobe *fp;
425
426 lockdep_assert_held(&fprobe_mutex);
427
428 fp = READ_ONCE(node->fp);
429 if (fp) {
430 if (fprobe_is_ftrace(fp))
431 nr_ftrace_fprobes--;
432 else
433 nr_fgraph_fprobes--;
434 }
435 __delete_fprobe_node(node);
436 }
437
fprobe_exists_on_hash(unsigned long ip,bool ftrace)438 static bool fprobe_exists_on_hash(unsigned long ip, bool ftrace)
439 {
440 struct rhlist_head *head, *pos;
441 struct fprobe_hlist_node *node;
442 struct fprobe *fp;
443
444 guard(rcu)();
445 head = rhltable_lookup(&fprobe_ip_table, &ip,
446 fprobe_rht_params);
447 if (!head)
448 return false;
449 /* We have to check the same type on the list. */
450 rhl_for_each_entry_rcu(node, pos, head, hlist) {
451 if (node->addr != ip)
452 break;
453 fp = READ_ONCE(node->fp);
454 if (likely(fp)) {
455 if ((!ftrace && fp->exit_handler) ||
456 (ftrace && !fp->exit_handler))
457 return true;
458 }
459 }
460
461 return false;
462 }
463
464 #ifdef CONFIG_MODULES
fprobe_remove_ips(unsigned long * ips,unsigned int cnt)465 static void fprobe_remove_ips(unsigned long *ips, unsigned int cnt)
466 {
467 if (!nr_fgraph_fprobes)
468 __fprobe_graph_unregister();
469 else if (cnt)
470 ftrace_set_filter_ips(&fprobe_graph_ops.ops, ips, cnt, 1, 0);
471
472 if (!nr_ftrace_fprobes)
473 __fprobe_ftrace_unregister();
474 else if (cnt)
475 ftrace_set_filter_ips(&fprobe_ftrace_ops, ips, cnt, 1, 0);
476 }
477 #endif
478 #else
fprobe_ftrace_add_ips(unsigned long * addrs,int num)479 static int fprobe_ftrace_add_ips(unsigned long *addrs, int num)
480 {
481 return -ENOENT;
482 }
483
fprobe_ftrace_remove_ips(unsigned long * addrs,int num)484 static void fprobe_ftrace_remove_ips(unsigned long *addrs, int num)
485 {
486 }
487
fprobe_is_ftrace(struct fprobe * fp)488 static bool fprobe_is_ftrace(struct fprobe *fp)
489 {
490 return false;
491 }
492
493 /* Node insertion and deletion requires the fprobe_mutex */
insert_fprobe_node(struct fprobe_hlist_node * node,struct fprobe * fp)494 static int insert_fprobe_node(struct fprobe_hlist_node *node, struct fprobe *fp)
495 {
496 int ret;
497
498 lockdep_assert_held(&fprobe_mutex);
499
500 ret = __insert_fprobe_node(node, fp);
501 if (!ret)
502 nr_fgraph_fprobes++;
503
504 return ret;
505 }
506
delete_fprobe_node(struct fprobe_hlist_node * node)507 static void delete_fprobe_node(struct fprobe_hlist_node *node)
508 {
509 struct fprobe *fp;
510
511 lockdep_assert_held(&fprobe_mutex);
512
513 fp = READ_ONCE(node->fp);
514 if (fp)
515 nr_fgraph_fprobes--;
516 __delete_fprobe_node(node);
517 }
518
fprobe_exists_on_hash(unsigned long ip,bool ftrace __maybe_unused)519 static bool fprobe_exists_on_hash(unsigned long ip, bool ftrace __maybe_unused)
520 {
521 struct rhlist_head *head, *pos;
522 struct fprobe_hlist_node *node;
523 struct fprobe *fp;
524
525 guard(rcu)();
526 head = rhltable_lookup(&fprobe_ip_table, &ip,
527 fprobe_rht_params);
528 if (!head)
529 return false;
530 /* We only need to check fp is there. */
531 rhl_for_each_entry_rcu(node, pos, head, hlist) {
532 if (node->addr != ip)
533 break;
534 fp = READ_ONCE(node->fp);
535 if (likely(fp))
536 return true;
537 }
538
539 return false;
540 }
541
542 #ifdef CONFIG_MODULES
fprobe_remove_ips(unsigned long * ips,unsigned int cnt)543 static void fprobe_remove_ips(unsigned long *ips, unsigned int cnt)
544 {
545 if (!nr_fgraph_fprobes)
546 __fprobe_graph_unregister();
547 else if (cnt)
548 ftrace_set_filter_ips(&fprobe_graph_ops.ops, ips, cnt, 1, 0);
549 }
550 #endif
551 #endif /* !CONFIG_DYNAMIC_FTRACE_WITH_ARGS && !CONFIG_DYNAMIC_FTRACE_WITH_REGS */
552
553 /* fgraph_ops callback, this processes fprobes which have exit_handler. */
fprobe_fgraph_entry(struct ftrace_graph_ent * trace,struct fgraph_ops * gops,struct ftrace_regs * fregs)554 static int fprobe_fgraph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
555 struct ftrace_regs *fregs)
556 {
557 unsigned long *fgraph_data = NULL;
558 unsigned long func = trace->func;
559 struct fprobe_hlist_node *node;
560 struct rhlist_head *head, *pos;
561 unsigned long ret_ip;
562 int reserved_words;
563 struct fprobe *fp;
564 int used, ret;
565
566 if (WARN_ON_ONCE(!fregs))
567 return 0;
568
569 guard(rcu)();
570 head = rhltable_lookup(&fprobe_ip_table, &func, fprobe_rht_params);
571 reserved_words = 0;
572 rhl_for_each_entry_rcu(node, pos, head, hlist) {
573 if (node->addr != func)
574 continue;
575 fp = READ_ONCE(node->fp);
576 if (!fp || !fp->exit_handler)
577 continue;
578 /*
579 * Since fprobe can be enabled until the next loop, we ignore the
580 * fprobe's disabled flag in this loop.
581 */
582 reserved_words +=
583 FPROBE_HEADER_SIZE_IN_LONG + SIZE_IN_LONG(fp->entry_data_size);
584 }
585 if (reserved_words) {
586 fgraph_data = fgraph_reserve_data(gops->idx, reserved_words * sizeof(long));
587 if (unlikely(!fgraph_data)) {
588 rhl_for_each_entry_rcu(node, pos, head, hlist) {
589 if (node->addr != func)
590 continue;
591 fp = READ_ONCE(node->fp);
592 if (fp && !fprobe_disabled(fp) && !fprobe_is_ftrace(fp))
593 fp->nmissed++;
594 }
595 return 0;
596 }
597 }
598
599 /*
600 * TODO: recursion detection has been done in the fgraph. Thus we need
601 * to add a callback to increment missed counter.
602 */
603 ret_ip = ftrace_regs_get_return_address(fregs);
604 used = 0;
605 rhl_for_each_entry_rcu(node, pos, head, hlist) {
606 int data_size;
607 void *data;
608
609 if (node->addr != func)
610 continue;
611 fp = READ_ONCE(node->fp);
612 if (unlikely(!fp || fprobe_disabled(fp) || fprobe_is_ftrace(fp)))
613 continue;
614
615 data_size = fp->entry_data_size;
616 if (data_size && fp->exit_handler)
617 data = fgraph_data + used + FPROBE_HEADER_SIZE_IN_LONG;
618 else
619 data = NULL;
620
621 if (fprobe_shared_with_kprobes(fp))
622 ret = __fprobe_kprobe_handler(func, ret_ip, fp, fregs, data);
623 else
624 ret = __fprobe_handler(func, ret_ip, fp, fregs, data);
625
626 /* If entry_handler returns !0, nmissed is not counted but skips exit_handler. */
627 if (!ret && fp->exit_handler) {
628 int size_words = SIZE_IN_LONG(data_size);
629
630 if (write_fprobe_header(&fgraph_data[used], fp, size_words))
631 used += FPROBE_HEADER_SIZE_IN_LONG + size_words;
632 }
633 }
634
635 /* If any exit_handler is set, data must be used. */
636 return used != 0;
637 }
638 NOKPROBE_SYMBOL(fprobe_fgraph_entry);
639
fprobe_return(struct ftrace_graph_ret * trace,struct fgraph_ops * gops,struct ftrace_regs * fregs)640 static void fprobe_return(struct ftrace_graph_ret *trace,
641 struct fgraph_ops *gops,
642 struct ftrace_regs *fregs)
643 {
644 unsigned long *fgraph_data = NULL;
645 unsigned long ret_ip;
646 struct fprobe *fp;
647 int size, curr;
648 int size_words;
649
650 fgraph_data = (unsigned long *)fgraph_retrieve_data(gops->idx, &size);
651 if (WARN_ON_ONCE(!fgraph_data))
652 return;
653 size_words = SIZE_IN_LONG(size);
654 ret_ip = ftrace_regs_get_instruction_pointer(fregs);
655
656 preempt_disable_notrace();
657
658 curr = 0;
659 while (size_words > curr) {
660 read_fprobe_header(&fgraph_data[curr], &fp, &size);
661 if (!fp)
662 break;
663 curr += FPROBE_HEADER_SIZE_IN_LONG;
664 if (fprobe_registered(fp) && !fprobe_disabled(fp)) {
665 if (WARN_ON_ONCE(curr + size > size_words))
666 break;
667 fp->exit_handler(fp, trace->func, ret_ip, fregs,
668 size ? fgraph_data + curr : NULL);
669 }
670 curr += size;
671 }
672 preempt_enable_notrace();
673 }
674 NOKPROBE_SYMBOL(fprobe_return);
675
676 #ifdef CONFIG_MODULES
677
678 #define FPROBE_IPS_BATCH_INIT 128
679 /* instruction pointer address list */
680 struct fprobe_addr_list {
681 int index;
682 int size;
683 unsigned long *addrs;
684 };
685
fprobe_remove_node_in_module(struct module * mod,struct fprobe_hlist_node * node,struct fprobe_addr_list * alist)686 static int fprobe_remove_node_in_module(struct module *mod, struct fprobe_hlist_node *node,
687 struct fprobe_addr_list *alist)
688 {
689 lockdep_assert_in_rcu_read_lock();
690
691 if (!within_module(node->addr, mod))
692 return 0;
693
694 delete_fprobe_node(node);
695 /* If no address list is available, we can't track this address. */
696 if (!alist->addrs)
697 return 0;
698 /*
699 * Don't care the type here, because all fprobes on the same
700 * address must be removed eventually.
701 */
702 if (!rhltable_lookup(&fprobe_ip_table, &node->addr, fprobe_rht_params)) {
703 alist->addrs[alist->index++] = node->addr;
704 if (alist->index == alist->size)
705 return -ENOSPC;
706 }
707
708 return 0;
709 }
710
711 /* Handle module unloading to manage fprobe_ip_table. */
fprobe_module_callback(struct notifier_block * nb,unsigned long val,void * data)712 static int fprobe_module_callback(struct notifier_block *nb,
713 unsigned long val, void *data)
714 {
715 struct fprobe_addr_list alist = {.size = FPROBE_IPS_BATCH_INIT};
716 struct fprobe_hlist_node *node;
717 struct rhashtable_iter iter;
718 struct module *mod = data;
719 bool retry;
720
721 if (val != MODULE_STATE_GOING)
722 return NOTIFY_DONE;
723
724 alist.addrs = kcalloc(alist.size, sizeof(*alist.addrs), GFP_KERNEL);
725 /*
726 * If failed to alloc memory, ftrace_ops will not be able to remove ips from
727 * hash, but we can still remove nodes from fprobe_ip_table, so we can avoid
728 * the potential wrong callback. So just print a warning here and try to
729 * continue without address list.
730 */
731 WARN_ONCE(!alist.addrs,
732 "Failed to allocate memory for fprobe_addr_list, ftrace_ops will not be updated");
733
734 mutex_lock(&fprobe_mutex);
735 again:
736 retry = false;
737 alist.index = 0;
738 rhltable_walk_enter(&fprobe_ip_table, &iter);
739 do {
740 rhashtable_walk_start(&iter);
741
742 while ((node = rhashtable_walk_next(&iter)) && !IS_ERR(node))
743 if (fprobe_remove_node_in_module(mod, node, &alist) < 0) {
744 retry = true;
745 break;
746 }
747
748 rhashtable_walk_stop(&iter);
749 } while (node == ERR_PTR(-EAGAIN) && !retry);
750 rhashtable_walk_exit(&iter);
751 /* Remove any ips from hash table(s) */
752 fprobe_remove_ips(alist.addrs, alist.index);
753 /*
754 * If we break rhashtable walk loop except for -EAGAIN, we need
755 * to restart looping from start for safety. Anyway, this is
756 * not a hotpath.
757 */
758 if (retry)
759 goto again;
760
761 mutex_unlock(&fprobe_mutex);
762
763 kfree(alist.addrs);
764
765 return NOTIFY_DONE;
766 }
767
768 static struct notifier_block fprobe_module_nb = {
769 .notifier_call = fprobe_module_callback,
770 .priority = 0,
771 };
772
init_fprobe_module(void)773 static int __init init_fprobe_module(void)
774 {
775 return register_module_notifier(&fprobe_module_nb);
776 }
777 early_initcall(init_fprobe_module);
778 #endif
779
symbols_cmp(const void * a,const void * b)780 static int symbols_cmp(const void *a, const void *b)
781 {
782 const char **str_a = (const char **) a;
783 const char **str_b = (const char **) b;
784
785 return strcmp(*str_a, *str_b);
786 }
787
788 /* Convert ftrace location address from symbols */
get_ftrace_locations(const char ** syms,int num)789 static unsigned long *get_ftrace_locations(const char **syms, int num)
790 {
791 unsigned long *addrs;
792
793 /* Convert symbols to symbol address */
794 addrs = kcalloc(num, sizeof(*addrs), GFP_KERNEL);
795 if (!addrs)
796 return ERR_PTR(-ENOMEM);
797
798 /* ftrace_lookup_symbols expects sorted symbols */
799 sort(syms, num, sizeof(*syms), symbols_cmp, NULL);
800
801 if (!ftrace_lookup_symbols(syms, num, addrs))
802 return addrs;
803
804 kfree(addrs);
805 return ERR_PTR(-ENOENT);
806 }
807
808 struct filter_match_data {
809 const char *filter;
810 const char *notfilter;
811 size_t index;
812 size_t size;
813 unsigned long *addrs;
814 struct module **mods;
815 };
816
filter_match_callback(void * data,const char * name,unsigned long addr)817 static int filter_match_callback(void *data, const char *name, unsigned long addr)
818 {
819 struct filter_match_data *match = data;
820
821 if (!glob_match(match->filter, name) ||
822 (match->notfilter && glob_match(match->notfilter, name)))
823 return 0;
824
825 if (!ftrace_location(addr))
826 return 0;
827
828 if (match->addrs) {
829 struct module *mod = __module_text_address(addr);
830
831 if (mod && !try_module_get(mod))
832 return 0;
833
834 match->mods[match->index] = mod;
835 match->addrs[match->index] = addr;
836 }
837 match->index++;
838 return match->index == match->size;
839 }
840
841 /*
842 * Make IP list from the filter/no-filter glob patterns.
843 * Return the number of matched symbols, or errno.
844 * If @addrs == NULL, this just counts the number of matched symbols. If @addrs
845 * is passed with an array, we need to pass the an @mods array of the same size
846 * to increment the module refcount for each symbol.
847 * This means we also need to call `module_put` for each element of @mods after
848 * using the @addrs.
849 */
get_ips_from_filter(const char * filter,const char * notfilter,unsigned long * addrs,struct module ** mods,size_t size)850 static int get_ips_from_filter(const char *filter, const char *notfilter,
851 unsigned long *addrs, struct module **mods,
852 size_t size)
853 {
854 struct filter_match_data match = { .filter = filter, .notfilter = notfilter,
855 .index = 0, .size = size, .addrs = addrs, .mods = mods};
856 int ret;
857
858 if (addrs && !mods)
859 return -EINVAL;
860
861 ret = kallsyms_on_each_symbol(filter_match_callback, &match);
862 if (ret < 0)
863 return ret;
864 if (IS_ENABLED(CONFIG_MODULES)) {
865 ret = module_kallsyms_on_each_symbol(NULL, filter_match_callback, &match);
866 if (ret < 0)
867 return ret;
868 }
869
870 return match.index ?: -ENOENT;
871 }
872
fprobe_fail_cleanup(struct fprobe * fp)873 static void fprobe_fail_cleanup(struct fprobe *fp)
874 {
875 kfree(fp->hlist_array);
876 fp->hlist_array = NULL;
877 }
878
879 /* Initialize the fprobe data structure. */
fprobe_init(struct fprobe * fp,unsigned long * addrs,int num)880 static int fprobe_init(struct fprobe *fp, unsigned long *addrs, int num)
881 {
882 struct fprobe_hlist *hlist_array;
883 unsigned long addr;
884 int size, i;
885
886 if (!fp || !addrs || num <= 0)
887 return -EINVAL;
888
889 size = ALIGN(fp->entry_data_size, sizeof(long));
890 if (size > MAX_FPROBE_DATA_SIZE)
891 return -E2BIG;
892 fp->entry_data_size = size;
893
894 hlist_array = kzalloc_flex(*hlist_array, array, num);
895 if (!hlist_array)
896 return -ENOMEM;
897
898 fp->nmissed = 0;
899
900 hlist_array->size = num;
901 fp->hlist_array = hlist_array;
902 hlist_array->fp = fp;
903 for (i = 0; i < num; i++) {
904 addr = ftrace_location(addrs[i]);
905 if (!addr) {
906 fprobe_fail_cleanup(fp);
907 return -ENOENT;
908 }
909 hlist_array->array[i].addr = addr;
910 }
911 return 0;
912 }
913
914 #define FPROBE_IPS_MAX INT_MAX
915
fprobe_count_ips_from_filter(const char * filter,const char * notfilter)916 int fprobe_count_ips_from_filter(const char *filter, const char *notfilter)
917 {
918 return get_ips_from_filter(filter, notfilter, NULL, NULL, FPROBE_IPS_MAX);
919 }
920
921 /**
922 * register_fprobe() - Register fprobe to ftrace by pattern.
923 * @fp: A fprobe data structure to be registered.
924 * @filter: A wildcard pattern of probed symbols.
925 * @notfilter: A wildcard pattern of NOT probed symbols.
926 *
927 * Register @fp to ftrace for enabling the probe on the symbols matched to @filter.
928 * If @notfilter is not NULL, the symbols matched the @notfilter are not probed.
929 *
930 * Return 0 if @fp is registered successfully, -errno if not.
931 */
register_fprobe(struct fprobe * fp,const char * filter,const char * notfilter)932 int register_fprobe(struct fprobe *fp, const char *filter, const char *notfilter)
933 {
934 unsigned long *addrs __free(kfree) = NULL;
935 struct module **mods __free(kfree) = NULL;
936 int ret, num;
937
938 if (!fp || !filter)
939 return -EINVAL;
940
941 num = get_ips_from_filter(filter, notfilter, NULL, NULL, FPROBE_IPS_MAX);
942 if (num < 0)
943 return num;
944
945 addrs = kcalloc(num, sizeof(*addrs), GFP_KERNEL);
946 if (!addrs)
947 return -ENOMEM;
948
949 mods = kzalloc_objs(*mods, num);
950 if (!mods)
951 return -ENOMEM;
952
953 ret = get_ips_from_filter(filter, notfilter, addrs, mods, num);
954 if (ret < 0)
955 return ret;
956
957 ret = register_fprobe_ips(fp, addrs, ret);
958
959 for (int i = 0; i < num; i++) {
960 if (mods[i])
961 module_put(mods[i]);
962 }
963 return ret;
964 }
965 EXPORT_SYMBOL_GPL(register_fprobe);
966
967 static int unregister_fprobe_nolock(struct fprobe *fp);
968
969 /**
970 * register_fprobe_ips() - Register fprobe to ftrace by address.
971 * @fp: A fprobe data structure to be registered.
972 * @addrs: An array of target function address.
973 * @num: The number of entries of @addrs.
974 *
975 * Register @fp to ftrace for enabling the probe on the address given by @addrs.
976 * The @addrs must be the addresses of ftrace location address, which may be
977 * the symbol address + arch-dependent offset.
978 * If you unsure what this mean, please use other registration functions.
979 *
980 * Return 0 if @fp is registered successfully, -errno if not.
981 */
register_fprobe_ips(struct fprobe * fp,unsigned long * addrs,int num)982 int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num)
983 {
984 struct fprobe_hlist *hlist_array;
985 int ret, i;
986
987 guard(mutex)(&fprobe_mutex);
988 if (fprobe_registered(fp))
989 return -EEXIST;
990
991 ret = fprobe_init(fp, addrs, num);
992 if (ret)
993 return ret;
994
995 if (fprobe_is_ftrace(fp))
996 ret = fprobe_ftrace_add_ips(addrs, num);
997 else
998 ret = fprobe_graph_add_ips(addrs, num);
999 if (ret) {
1000 fprobe_fail_cleanup(fp);
1001 return ret;
1002 }
1003
1004 hlist_array = fp->hlist_array;
1005 ret = add_fprobe_hash(fp);
1006 for (i = 0; i < hlist_array->size && !ret; i++)
1007 ret = insert_fprobe_node(&hlist_array->array[i], fp);
1008
1009 if (ret) {
1010 unregister_fprobe_nolock(fp);
1011 /* In error case, wait for clean up safely. */
1012 synchronize_rcu();
1013 }
1014
1015 return ret;
1016 }
1017 EXPORT_SYMBOL_GPL(register_fprobe_ips);
1018
1019 /**
1020 * register_fprobe_syms() - Register fprobe to ftrace by symbols.
1021 * @fp: A fprobe data structure to be registered.
1022 * @syms: An array of target symbols.
1023 * @num: The number of entries of @syms.
1024 *
1025 * Register @fp to the symbols given by @syms array. This will be useful if
1026 * you are sure the symbols exist in the kernel.
1027 *
1028 * Return 0 if @fp is registered successfully, -errno if not.
1029 */
register_fprobe_syms(struct fprobe * fp,const char ** syms,int num)1030 int register_fprobe_syms(struct fprobe *fp, const char **syms, int num)
1031 {
1032 unsigned long *addrs;
1033 int ret;
1034
1035 if (!fp || !syms || num <= 0)
1036 return -EINVAL;
1037
1038 addrs = get_ftrace_locations(syms, num);
1039 if (IS_ERR(addrs))
1040 return PTR_ERR(addrs);
1041
1042 ret = register_fprobe_ips(fp, addrs, num);
1043
1044 kfree(addrs);
1045
1046 return ret;
1047 }
1048 EXPORT_SYMBOL_GPL(register_fprobe_syms);
1049
fprobe_is_registered(struct fprobe * fp)1050 bool fprobe_is_registered(struct fprobe *fp)
1051 {
1052 if (!fp || !fp->hlist_array)
1053 return false;
1054 return true;
1055 }
1056
unregister_fprobe_nolock(struct fprobe * fp)1057 static int unregister_fprobe_nolock(struct fprobe *fp)
1058 {
1059 struct fprobe_hlist *hlist_array = fp->hlist_array;
1060 unsigned long *addrs = NULL;
1061 int i, count;
1062
1063 addrs = kcalloc(hlist_array->size, sizeof(unsigned long), GFP_KERNEL);
1064 /*
1065 * This will remove fprobe_hash_node from the hash table even if
1066 * memory allocation fails. However, ftrace_ops will not be updated.
1067 * Anyway, when the last fprobe is unregistered, ftrace_ops is also
1068 * unregistered.
1069 */
1070 if (!addrs)
1071 pr_warn("Failed to allocate working array. ftrace_ops may not sync.\n");
1072
1073 /* Remove non-synonim ips from table and hash */
1074 count = 0;
1075 for (i = 0; i < hlist_array->size; i++) {
1076 delete_fprobe_node(&hlist_array->array[i]);
1077 if (addrs && !fprobe_exists_on_hash(hlist_array->array[i].addr,
1078 fprobe_is_ftrace(fp)))
1079 addrs[count++] = hlist_array->array[i].addr;
1080 }
1081 del_fprobe_hash(fp);
1082
1083 if (fprobe_is_ftrace(fp))
1084 fprobe_ftrace_remove_ips(addrs, count);
1085 else
1086 fprobe_graph_remove_ips(addrs, count);
1087
1088 kfree_rcu(hlist_array, rcu);
1089 fp->hlist_array = NULL;
1090 kfree(addrs);
1091
1092 return 0;
1093 }
1094
1095 /**
1096 * unregister_fprobe() - Unregister fprobe.
1097 * @fp: A fprobe data structure to be unregistered.
1098 *
1099 * Unregister fprobe (and remove ftrace hooks from the function entries).
1100 *
1101 * Return 0 if @fp is unregistered successfully, -errno if not.
1102 */
unregister_fprobe(struct fprobe * fp)1103 int unregister_fprobe(struct fprobe *fp)
1104 {
1105 guard(mutex)(&fprobe_mutex);
1106 if (!fp || !fprobe_registered(fp))
1107 return -EINVAL;
1108
1109 return unregister_fprobe_nolock(fp);
1110 }
1111 EXPORT_SYMBOL_GPL(unregister_fprobe);
1112
fprobe_initcall(void)1113 static int __init fprobe_initcall(void)
1114 {
1115 rhltable_init(&fprobe_ip_table, &fprobe_rht_params);
1116 return 0;
1117 }
1118 core_initcall(fprobe_initcall);
1119