1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * fprobe - Simple ftrace probe wrapper for function entry.
4 */
5 #define pr_fmt(fmt) "fprobe: " fmt
6
7 #include <linux/err.h>
8 #include <linux/fprobe.h>
9 #include <linux/kallsyms.h>
10 #include <linux/kprobes.h>
11 #include <linux/list.h>
12 #include <linux/mutex.h>
13 #include <linux/rhashtable.h>
14 #include <linux/slab.h>
15 #include <linux/sort.h>
16
17 #include <asm/fprobe.h>
18
19 #include "trace.h"
20
21 #define FPROBE_IP_HASH_BITS 8
22 #define FPROBE_IP_TABLE_SIZE (1 << FPROBE_IP_HASH_BITS)
23
24 #define FPROBE_HASH_BITS 6
25 #define FPROBE_TABLE_SIZE (1 << FPROBE_HASH_BITS)
26
27 #define SIZE_IN_LONG(x) ((x + sizeof(long) - 1) >> (sizeof(long) == 8 ? 3 : 2))
28
29 /*
30 * fprobe_table: hold 'fprobe_hlist::hlist' for checking the fprobe still
31 * exists. The key is the address of fprobe instance.
32 * fprobe_ip_table: hold 'fprobe_hlist::array[*]' for searching the fprobe
33 * instance related to the function address. The key is the ftrace IP
34 * address.
35 *
36 * When unregistering the fprobe, fprobe_hlist::fp and fprobe_hlist::array[*].fp
37 * are set NULL and delete those from both hash tables (by hlist_del_rcu).
38 * After an RCU grace period, the fprobe_hlist itself will be released.
39 *
40 * fprobe_table and fprobe_ip_table can be accessed from either
41 * - Normal hlist traversal and RCU add/del under 'fprobe_mutex' is held.
42 * - RCU hlist traversal under disabling preempt
43 */
44 static struct hlist_head fprobe_table[FPROBE_TABLE_SIZE];
45 static struct rhltable fprobe_ip_table;
46 static DEFINE_MUTEX(fprobe_mutex);
47 static struct fgraph_ops fprobe_graph_ops;
48
fprobe_node_hashfn(const void * data,u32 len,u32 seed)49 static u32 fprobe_node_hashfn(const void *data, u32 len, u32 seed)
50 {
51 return hash_ptr(*(unsigned long **)data, 32);
52 }
53
fprobe_node_cmp(struct rhashtable_compare_arg * arg,const void * ptr)54 static int fprobe_node_cmp(struct rhashtable_compare_arg *arg,
55 const void *ptr)
56 {
57 unsigned long key = *(unsigned long *)arg->key;
58 const struct fprobe_hlist_node *n = ptr;
59
60 return n->addr != key;
61 }
62
fprobe_node_obj_hashfn(const void * data,u32 len,u32 seed)63 static u32 fprobe_node_obj_hashfn(const void *data, u32 len, u32 seed)
64 {
65 const struct fprobe_hlist_node *n = data;
66
67 return hash_ptr((void *)n->addr, 32);
68 }
69
70 static const struct rhashtable_params fprobe_rht_params = {
71 .head_offset = offsetof(struct fprobe_hlist_node, hlist),
72 .key_offset = offsetof(struct fprobe_hlist_node, addr),
73 .key_len = sizeof_field(struct fprobe_hlist_node, addr),
74 .hashfn = fprobe_node_hashfn,
75 .obj_hashfn = fprobe_node_obj_hashfn,
76 .obj_cmpfn = fprobe_node_cmp,
77 .automatic_shrinking = true,
78 };
79
80 /* Node insertion and deletion requires the fprobe_mutex */
insert_fprobe_node(struct fprobe_hlist_node * node)81 static int insert_fprobe_node(struct fprobe_hlist_node *node)
82 {
83 lockdep_assert_held(&fprobe_mutex);
84
85 return rhltable_insert(&fprobe_ip_table, &node->hlist, fprobe_rht_params);
86 }
87
88 /* Return true if there are synonims */
delete_fprobe_node(struct fprobe_hlist_node * node)89 static bool delete_fprobe_node(struct fprobe_hlist_node *node)
90 {
91 lockdep_assert_held(&fprobe_mutex);
92 bool ret;
93
94 /* Avoid double deleting */
95 if (READ_ONCE(node->fp) != NULL) {
96 WRITE_ONCE(node->fp, NULL);
97 rhltable_remove(&fprobe_ip_table, &node->hlist,
98 fprobe_rht_params);
99 }
100
101 rcu_read_lock();
102 ret = !!rhltable_lookup(&fprobe_ip_table, &node->addr,
103 fprobe_rht_params);
104 rcu_read_unlock();
105
106 return ret;
107 }
108
109 /* Check existence of the fprobe */
is_fprobe_still_exist(struct fprobe * fp)110 static bool is_fprobe_still_exist(struct fprobe *fp)
111 {
112 struct hlist_head *head;
113 struct fprobe_hlist *fph;
114
115 head = &fprobe_table[hash_ptr(fp, FPROBE_HASH_BITS)];
116 hlist_for_each_entry_rcu(fph, head, hlist,
117 lockdep_is_held(&fprobe_mutex)) {
118 if (fph->fp == fp)
119 return true;
120 }
121 return false;
122 }
123 NOKPROBE_SYMBOL(is_fprobe_still_exist);
124
add_fprobe_hash(struct fprobe * fp)125 static int add_fprobe_hash(struct fprobe *fp)
126 {
127 struct fprobe_hlist *fph = fp->hlist_array;
128 struct hlist_head *head;
129
130 lockdep_assert_held(&fprobe_mutex);
131
132 if (WARN_ON_ONCE(!fph))
133 return -EINVAL;
134
135 if (is_fprobe_still_exist(fp))
136 return -EEXIST;
137
138 head = &fprobe_table[hash_ptr(fp, FPROBE_HASH_BITS)];
139 hlist_add_head_rcu(&fp->hlist_array->hlist, head);
140 return 0;
141 }
142
del_fprobe_hash(struct fprobe * fp)143 static int del_fprobe_hash(struct fprobe *fp)
144 {
145 struct fprobe_hlist *fph = fp->hlist_array;
146
147 lockdep_assert_held(&fprobe_mutex);
148
149 if (WARN_ON_ONCE(!fph))
150 return -EINVAL;
151
152 if (!is_fprobe_still_exist(fp))
153 return -ENOENT;
154
155 fph->fp = NULL;
156 hlist_del_rcu(&fph->hlist);
157 return 0;
158 }
159
160 #ifdef ARCH_DEFINE_ENCODE_FPROBE_HEADER
161
162 /* The arch should encode fprobe_header info into one unsigned long */
163 #define FPROBE_HEADER_SIZE_IN_LONG 1
164
write_fprobe_header(unsigned long * stack,struct fprobe * fp,unsigned int size_words)165 static inline bool write_fprobe_header(unsigned long *stack,
166 struct fprobe *fp, unsigned int size_words)
167 {
168 if (WARN_ON_ONCE(size_words > MAX_FPROBE_DATA_SIZE_WORD ||
169 !arch_fprobe_header_encodable(fp)))
170 return false;
171
172 *stack = arch_encode_fprobe_header(fp, size_words);
173 return true;
174 }
175
read_fprobe_header(unsigned long * stack,struct fprobe ** fp,unsigned int * size_words)176 static inline void read_fprobe_header(unsigned long *stack,
177 struct fprobe **fp, unsigned int *size_words)
178 {
179 *fp = arch_decode_fprobe_header_fp(*stack);
180 *size_words = arch_decode_fprobe_header_size(*stack);
181 }
182
183 #else
184
185 /* Generic fprobe_header */
186 struct __fprobe_header {
187 struct fprobe *fp;
188 unsigned long size_words;
189 } __packed;
190
191 #define FPROBE_HEADER_SIZE_IN_LONG SIZE_IN_LONG(sizeof(struct __fprobe_header))
192
write_fprobe_header(unsigned long * stack,struct fprobe * fp,unsigned int size_words)193 static inline bool write_fprobe_header(unsigned long *stack,
194 struct fprobe *fp, unsigned int size_words)
195 {
196 struct __fprobe_header *fph = (struct __fprobe_header *)stack;
197
198 if (WARN_ON_ONCE(size_words > MAX_FPROBE_DATA_SIZE_WORD))
199 return false;
200
201 fph->fp = fp;
202 fph->size_words = size_words;
203 return true;
204 }
205
read_fprobe_header(unsigned long * stack,struct fprobe ** fp,unsigned int * size_words)206 static inline void read_fprobe_header(unsigned long *stack,
207 struct fprobe **fp, unsigned int *size_words)
208 {
209 struct __fprobe_header *fph = (struct __fprobe_header *)stack;
210
211 *fp = fph->fp;
212 *size_words = fph->size_words;
213 }
214
215 #endif
216
217 /*
218 * fprobe shadow stack management:
219 * Since fprobe shares a single fgraph_ops, it needs to share the stack entry
220 * among the probes on the same function exit. Note that a new probe can be
221 * registered before a target function is returning, we can not use the hash
222 * table to find the corresponding probes. Thus the probe address is stored on
223 * the shadow stack with its entry data size.
224 *
225 */
__fprobe_handler(unsigned long ip,unsigned long parent_ip,struct fprobe * fp,struct ftrace_regs * fregs,void * data)226 static inline int __fprobe_handler(unsigned long ip, unsigned long parent_ip,
227 struct fprobe *fp, struct ftrace_regs *fregs,
228 void *data)
229 {
230 if (!fp->entry_handler)
231 return 0;
232
233 return fp->entry_handler(fp, ip, parent_ip, fregs, data);
234 }
235
__fprobe_kprobe_handler(unsigned long ip,unsigned long parent_ip,struct fprobe * fp,struct ftrace_regs * fregs,void * data)236 static inline int __fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip,
237 struct fprobe *fp, struct ftrace_regs *fregs,
238 void *data)
239 {
240 int ret;
241 /*
242 * This user handler is shared with other kprobes and is not expected to be
243 * called recursively. So if any other kprobe handler is running, this will
244 * exit as kprobe does. See the section 'Share the callbacks with kprobes'
245 * in Documentation/trace/fprobe.rst for more information.
246 */
247 if (unlikely(kprobe_running())) {
248 fp->nmissed++;
249 return 0;
250 }
251
252 kprobe_busy_begin();
253 ret = __fprobe_handler(ip, parent_ip, fp, fregs, data);
254 kprobe_busy_end();
255 return ret;
256 }
257
258 #if defined(CONFIG_DYNAMIC_FTRACE_WITH_ARGS) || defined(CONFIG_DYNAMIC_FTRACE_WITH_REGS)
259 /* ftrace_ops callback, this processes fprobes which have only entry_handler. */
fprobe_ftrace_entry(unsigned long ip,unsigned long parent_ip,struct ftrace_ops * ops,struct ftrace_regs * fregs)260 static void fprobe_ftrace_entry(unsigned long ip, unsigned long parent_ip,
261 struct ftrace_ops *ops, struct ftrace_regs *fregs)
262 {
263 struct fprobe_hlist_node *node;
264 struct rhlist_head *head, *pos;
265 struct fprobe *fp;
266 int bit;
267
268 bit = ftrace_test_recursion_trylock(ip, parent_ip);
269 if (bit < 0)
270 return;
271
272 /*
273 * ftrace_test_recursion_trylock() disables preemption, but
274 * rhltable_lookup() checks whether rcu_read_lcok is held.
275 * So we take rcu_read_lock() here.
276 */
277 rcu_read_lock();
278 head = rhltable_lookup(&fprobe_ip_table, &ip, fprobe_rht_params);
279
280 rhl_for_each_entry_rcu(node, pos, head, hlist) {
281 if (node->addr != ip)
282 break;
283 fp = READ_ONCE(node->fp);
284 if (unlikely(!fp || fprobe_disabled(fp) || fp->exit_handler))
285 continue;
286
287 if (fprobe_shared_with_kprobes(fp))
288 __fprobe_kprobe_handler(ip, parent_ip, fp, fregs, NULL);
289 else
290 __fprobe_handler(ip, parent_ip, fp, fregs, NULL);
291 }
292 rcu_read_unlock();
293 ftrace_test_recursion_unlock(bit);
294 }
295 NOKPROBE_SYMBOL(fprobe_ftrace_entry);
296
297 static struct ftrace_ops fprobe_ftrace_ops = {
298 .func = fprobe_ftrace_entry,
299 .flags = FTRACE_OPS_FL_SAVE_ARGS,
300 };
301 static int fprobe_ftrace_active;
302
fprobe_ftrace_add_ips(unsigned long * addrs,int num)303 static int fprobe_ftrace_add_ips(unsigned long *addrs, int num)
304 {
305 int ret;
306
307 lockdep_assert_held(&fprobe_mutex);
308
309 ret = ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 0, 0);
310 if (ret)
311 return ret;
312
313 if (!fprobe_ftrace_active) {
314 ret = register_ftrace_function(&fprobe_ftrace_ops);
315 if (ret) {
316 ftrace_free_filter(&fprobe_ftrace_ops);
317 return ret;
318 }
319 }
320 fprobe_ftrace_active++;
321 return 0;
322 }
323
fprobe_ftrace_remove_ips(unsigned long * addrs,int num)324 static void fprobe_ftrace_remove_ips(unsigned long *addrs, int num)
325 {
326 lockdep_assert_held(&fprobe_mutex);
327
328 fprobe_ftrace_active--;
329 if (!fprobe_ftrace_active)
330 unregister_ftrace_function(&fprobe_ftrace_ops);
331 if (num)
332 ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 1, 0);
333 }
334
fprobe_is_ftrace(struct fprobe * fp)335 static bool fprobe_is_ftrace(struct fprobe *fp)
336 {
337 return !fp->exit_handler;
338 }
339
340 #ifdef CONFIG_MODULES
fprobe_set_ips(unsigned long * ips,unsigned int cnt,int remove,int reset)341 static void fprobe_set_ips(unsigned long *ips, unsigned int cnt, int remove,
342 int reset)
343 {
344 ftrace_set_filter_ips(&fprobe_graph_ops.ops, ips, cnt, remove, reset);
345 ftrace_set_filter_ips(&fprobe_ftrace_ops, ips, cnt, remove, reset);
346 }
347 #endif
348 #else
fprobe_ftrace_add_ips(unsigned long * addrs,int num)349 static int fprobe_ftrace_add_ips(unsigned long *addrs, int num)
350 {
351 return -ENOENT;
352 }
353
fprobe_ftrace_remove_ips(unsigned long * addrs,int num)354 static void fprobe_ftrace_remove_ips(unsigned long *addrs, int num)
355 {
356 }
357
fprobe_is_ftrace(struct fprobe * fp)358 static bool fprobe_is_ftrace(struct fprobe *fp)
359 {
360 return false;
361 }
362
363 #ifdef CONFIG_MODULES
fprobe_set_ips(unsigned long * ips,unsigned int cnt,int remove,int reset)364 static void fprobe_set_ips(unsigned long *ips, unsigned int cnt, int remove,
365 int reset)
366 {
367 ftrace_set_filter_ips(&fprobe_graph_ops.ops, ips, cnt, remove, reset);
368 }
369 #endif
370 #endif /* !CONFIG_DYNAMIC_FTRACE_WITH_ARGS && !CONFIG_DYNAMIC_FTRACE_WITH_REGS */
371
372 /* fgraph_ops callback, this processes fprobes which have exit_handler. */
fprobe_fgraph_entry(struct ftrace_graph_ent * trace,struct fgraph_ops * gops,struct ftrace_regs * fregs)373 static int fprobe_fgraph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
374 struct ftrace_regs *fregs)
375 {
376 unsigned long *fgraph_data = NULL;
377 unsigned long func = trace->func;
378 struct fprobe_hlist_node *node;
379 struct rhlist_head *head, *pos;
380 unsigned long ret_ip;
381 int reserved_words;
382 struct fprobe *fp;
383 int used, ret;
384
385 if (WARN_ON_ONCE(!fregs))
386 return 0;
387
388 guard(rcu)();
389 head = rhltable_lookup(&fprobe_ip_table, &func, fprobe_rht_params);
390 reserved_words = 0;
391 rhl_for_each_entry_rcu(node, pos, head, hlist) {
392 if (node->addr != func)
393 continue;
394 fp = READ_ONCE(node->fp);
395 if (!fp || !fp->exit_handler)
396 continue;
397 /*
398 * Since fprobe can be enabled until the next loop, we ignore the
399 * fprobe's disabled flag in this loop.
400 */
401 reserved_words +=
402 FPROBE_HEADER_SIZE_IN_LONG + SIZE_IN_LONG(fp->entry_data_size);
403 }
404 if (reserved_words) {
405 fgraph_data = fgraph_reserve_data(gops->idx, reserved_words * sizeof(long));
406 if (unlikely(!fgraph_data)) {
407 rhl_for_each_entry_rcu(node, pos, head, hlist) {
408 if (node->addr != func)
409 continue;
410 fp = READ_ONCE(node->fp);
411 if (fp && !fprobe_disabled(fp) && !fprobe_is_ftrace(fp))
412 fp->nmissed++;
413 }
414 return 0;
415 }
416 }
417
418 /*
419 * TODO: recursion detection has been done in the fgraph. Thus we need
420 * to add a callback to increment missed counter.
421 */
422 ret_ip = ftrace_regs_get_return_address(fregs);
423 used = 0;
424 rhl_for_each_entry_rcu(node, pos, head, hlist) {
425 int data_size;
426 void *data;
427
428 if (node->addr != func)
429 continue;
430 fp = READ_ONCE(node->fp);
431 if (unlikely(!fp || fprobe_disabled(fp) || fprobe_is_ftrace(fp)))
432 continue;
433
434 data_size = fp->entry_data_size;
435 if (data_size && fp->exit_handler)
436 data = fgraph_data + used + FPROBE_HEADER_SIZE_IN_LONG;
437 else
438 data = NULL;
439
440 if (fprobe_shared_with_kprobes(fp))
441 ret = __fprobe_kprobe_handler(func, ret_ip, fp, fregs, data);
442 else
443 ret = __fprobe_handler(func, ret_ip, fp, fregs, data);
444
445 /* If entry_handler returns !0, nmissed is not counted but skips exit_handler. */
446 if (!ret && fp->exit_handler) {
447 int size_words = SIZE_IN_LONG(data_size);
448
449 if (write_fprobe_header(&fgraph_data[used], fp, size_words))
450 used += FPROBE_HEADER_SIZE_IN_LONG + size_words;
451 }
452 }
453 if (used < reserved_words)
454 memset(fgraph_data + used, 0, reserved_words - used);
455
456 /* If any exit_handler is set, data must be used. */
457 return used != 0;
458 }
459 NOKPROBE_SYMBOL(fprobe_fgraph_entry);
460
fprobe_return(struct ftrace_graph_ret * trace,struct fgraph_ops * gops,struct ftrace_regs * fregs)461 static void fprobe_return(struct ftrace_graph_ret *trace,
462 struct fgraph_ops *gops,
463 struct ftrace_regs *fregs)
464 {
465 unsigned long *fgraph_data = NULL;
466 unsigned long ret_ip;
467 struct fprobe *fp;
468 int size, curr;
469 int size_words;
470
471 fgraph_data = (unsigned long *)fgraph_retrieve_data(gops->idx, &size);
472 if (WARN_ON_ONCE(!fgraph_data))
473 return;
474 size_words = SIZE_IN_LONG(size);
475 ret_ip = ftrace_regs_get_instruction_pointer(fregs);
476
477 preempt_disable_notrace();
478
479 curr = 0;
480 while (size_words > curr) {
481 read_fprobe_header(&fgraph_data[curr], &fp, &size);
482 if (!fp)
483 break;
484 curr += FPROBE_HEADER_SIZE_IN_LONG;
485 if (is_fprobe_still_exist(fp) && !fprobe_disabled(fp)) {
486 if (WARN_ON_ONCE(curr + size > size_words))
487 break;
488 fp->exit_handler(fp, trace->func, ret_ip, fregs,
489 size ? fgraph_data + curr : NULL);
490 }
491 curr += size;
492 }
493 preempt_enable_notrace();
494 }
495 NOKPROBE_SYMBOL(fprobe_return);
496
497 static struct fgraph_ops fprobe_graph_ops = {
498 .entryfunc = fprobe_fgraph_entry,
499 .retfunc = fprobe_return,
500 };
501 static int fprobe_graph_active;
502
503 /* Add @addrs to the ftrace filter and register fgraph if needed. */
fprobe_graph_add_ips(unsigned long * addrs,int num)504 static int fprobe_graph_add_ips(unsigned long *addrs, int num)
505 {
506 int ret;
507
508 lockdep_assert_held(&fprobe_mutex);
509
510 ret = ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 0, 0);
511 if (ret)
512 return ret;
513
514 if (!fprobe_graph_active) {
515 ret = register_ftrace_graph(&fprobe_graph_ops);
516 if (WARN_ON_ONCE(ret)) {
517 ftrace_free_filter(&fprobe_graph_ops.ops);
518 return ret;
519 }
520 }
521 fprobe_graph_active++;
522 return 0;
523 }
524
525 /* Remove @addrs from the ftrace filter and unregister fgraph if possible. */
fprobe_graph_remove_ips(unsigned long * addrs,int num)526 static void fprobe_graph_remove_ips(unsigned long *addrs, int num)
527 {
528 lockdep_assert_held(&fprobe_mutex);
529
530 fprobe_graph_active--;
531 /* Q: should we unregister it ? */
532 if (!fprobe_graph_active)
533 unregister_ftrace_graph(&fprobe_graph_ops);
534
535 if (num)
536 ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0);
537 }
538
539 #ifdef CONFIG_MODULES
540
541 #define FPROBE_IPS_BATCH_INIT 8
542 /* instruction pointer address list */
543 struct fprobe_addr_list {
544 int index;
545 int size;
546 unsigned long *addrs;
547 };
548
fprobe_addr_list_add(struct fprobe_addr_list * alist,unsigned long addr)549 static int fprobe_addr_list_add(struct fprobe_addr_list *alist, unsigned long addr)
550 {
551 unsigned long *addrs;
552
553 /* Previously we failed to expand the list. */
554 if (alist->index == alist->size)
555 return -ENOSPC;
556
557 alist->addrs[alist->index++] = addr;
558 if (alist->index < alist->size)
559 return 0;
560
561 /* Expand the address list */
562 addrs = kcalloc(alist->size * 2, sizeof(*addrs), GFP_KERNEL);
563 if (!addrs)
564 return -ENOMEM;
565
566 memcpy(addrs, alist->addrs, alist->size * sizeof(*addrs));
567 alist->size *= 2;
568 kfree(alist->addrs);
569 alist->addrs = addrs;
570
571 return 0;
572 }
573
fprobe_remove_node_in_module(struct module * mod,struct fprobe_hlist_node * node,struct fprobe_addr_list * alist)574 static void fprobe_remove_node_in_module(struct module *mod, struct fprobe_hlist_node *node,
575 struct fprobe_addr_list *alist)
576 {
577 if (!within_module(node->addr, mod))
578 return;
579 if (delete_fprobe_node(node))
580 return;
581 /*
582 * If failed to update alist, just continue to update hlist.
583 * Therefore, at list user handler will not hit anymore.
584 */
585 fprobe_addr_list_add(alist, node->addr);
586 }
587
588 /* Handle module unloading to manage fprobe_ip_table. */
fprobe_module_callback(struct notifier_block * nb,unsigned long val,void * data)589 static int fprobe_module_callback(struct notifier_block *nb,
590 unsigned long val, void *data)
591 {
592 struct fprobe_addr_list alist = {.size = FPROBE_IPS_BATCH_INIT};
593 struct fprobe_hlist_node *node;
594 struct rhashtable_iter iter;
595 struct module *mod = data;
596
597 if (val != MODULE_STATE_GOING)
598 return NOTIFY_DONE;
599
600 alist.addrs = kcalloc(alist.size, sizeof(*alist.addrs), GFP_KERNEL);
601 /* If failed to alloc memory, we can not remove ips from hash. */
602 if (!alist.addrs)
603 return NOTIFY_DONE;
604
605 mutex_lock(&fprobe_mutex);
606 rhltable_walk_enter(&fprobe_ip_table, &iter);
607 do {
608 rhashtable_walk_start(&iter);
609
610 while ((node = rhashtable_walk_next(&iter)) && !IS_ERR(node))
611 fprobe_remove_node_in_module(mod, node, &alist);
612
613 rhashtable_walk_stop(&iter);
614 } while (node == ERR_PTR(-EAGAIN));
615 rhashtable_walk_exit(&iter);
616
617 if (alist.index > 0)
618 fprobe_set_ips(alist.addrs, alist.index, 1, 0);
619 mutex_unlock(&fprobe_mutex);
620
621 kfree(alist.addrs);
622
623 return NOTIFY_DONE;
624 }
625
626 static struct notifier_block fprobe_module_nb = {
627 .notifier_call = fprobe_module_callback,
628 .priority = 0,
629 };
630
init_fprobe_module(void)631 static int __init init_fprobe_module(void)
632 {
633 return register_module_notifier(&fprobe_module_nb);
634 }
635 early_initcall(init_fprobe_module);
636 #endif
637
symbols_cmp(const void * a,const void * b)638 static int symbols_cmp(const void *a, const void *b)
639 {
640 const char **str_a = (const char **) a;
641 const char **str_b = (const char **) b;
642
643 return strcmp(*str_a, *str_b);
644 }
645
646 /* Convert ftrace location address from symbols */
get_ftrace_locations(const char ** syms,int num)647 static unsigned long *get_ftrace_locations(const char **syms, int num)
648 {
649 unsigned long *addrs;
650
651 /* Convert symbols to symbol address */
652 addrs = kcalloc(num, sizeof(*addrs), GFP_KERNEL);
653 if (!addrs)
654 return ERR_PTR(-ENOMEM);
655
656 /* ftrace_lookup_symbols expects sorted symbols */
657 sort(syms, num, sizeof(*syms), symbols_cmp, NULL);
658
659 if (!ftrace_lookup_symbols(syms, num, addrs))
660 return addrs;
661
662 kfree(addrs);
663 return ERR_PTR(-ENOENT);
664 }
665
666 struct filter_match_data {
667 const char *filter;
668 const char *notfilter;
669 size_t index;
670 size_t size;
671 unsigned long *addrs;
672 struct module **mods;
673 };
674
filter_match_callback(void * data,const char * name,unsigned long addr)675 static int filter_match_callback(void *data, const char *name, unsigned long addr)
676 {
677 struct filter_match_data *match = data;
678
679 if (!glob_match(match->filter, name) ||
680 (match->notfilter && glob_match(match->notfilter, name)))
681 return 0;
682
683 if (!ftrace_location(addr))
684 return 0;
685
686 if (match->addrs) {
687 struct module *mod = __module_text_address(addr);
688
689 if (mod && !try_module_get(mod))
690 return 0;
691
692 match->mods[match->index] = mod;
693 match->addrs[match->index] = addr;
694 }
695 match->index++;
696 return match->index == match->size;
697 }
698
699 /*
700 * Make IP list from the filter/no-filter glob patterns.
701 * Return the number of matched symbols, or errno.
702 * If @addrs == NULL, this just counts the number of matched symbols. If @addrs
703 * is passed with an array, we need to pass the an @mods array of the same size
704 * to increment the module refcount for each symbol.
705 * This means we also need to call `module_put` for each element of @mods after
706 * using the @addrs.
707 */
get_ips_from_filter(const char * filter,const char * notfilter,unsigned long * addrs,struct module ** mods,size_t size)708 static int get_ips_from_filter(const char *filter, const char *notfilter,
709 unsigned long *addrs, struct module **mods,
710 size_t size)
711 {
712 struct filter_match_data match = { .filter = filter, .notfilter = notfilter,
713 .index = 0, .size = size, .addrs = addrs, .mods = mods};
714 int ret;
715
716 if (addrs && !mods)
717 return -EINVAL;
718
719 ret = kallsyms_on_each_symbol(filter_match_callback, &match);
720 if (ret < 0)
721 return ret;
722 if (IS_ENABLED(CONFIG_MODULES)) {
723 ret = module_kallsyms_on_each_symbol(NULL, filter_match_callback, &match);
724 if (ret < 0)
725 return ret;
726 }
727
728 return match.index ?: -ENOENT;
729 }
730
fprobe_fail_cleanup(struct fprobe * fp)731 static void fprobe_fail_cleanup(struct fprobe *fp)
732 {
733 kfree(fp->hlist_array);
734 fp->hlist_array = NULL;
735 }
736
737 /* Initialize the fprobe data structure. */
fprobe_init(struct fprobe * fp,unsigned long * addrs,int num)738 static int fprobe_init(struct fprobe *fp, unsigned long *addrs, int num)
739 {
740 struct fprobe_hlist *hlist_array;
741 unsigned long addr;
742 int size, i;
743
744 if (!fp || !addrs || num <= 0)
745 return -EINVAL;
746
747 size = ALIGN(fp->entry_data_size, sizeof(long));
748 if (size > MAX_FPROBE_DATA_SIZE)
749 return -E2BIG;
750 fp->entry_data_size = size;
751
752 hlist_array = kzalloc(struct_size(hlist_array, array, num), GFP_KERNEL);
753 if (!hlist_array)
754 return -ENOMEM;
755
756 fp->nmissed = 0;
757
758 hlist_array->size = num;
759 fp->hlist_array = hlist_array;
760 hlist_array->fp = fp;
761 for (i = 0; i < num; i++) {
762 hlist_array->array[i].fp = fp;
763 addr = ftrace_location(addrs[i]);
764 if (!addr) {
765 fprobe_fail_cleanup(fp);
766 return -ENOENT;
767 }
768 hlist_array->array[i].addr = addr;
769 }
770 return 0;
771 }
772
773 #define FPROBE_IPS_MAX INT_MAX
774
fprobe_count_ips_from_filter(const char * filter,const char * notfilter)775 int fprobe_count_ips_from_filter(const char *filter, const char *notfilter)
776 {
777 return get_ips_from_filter(filter, notfilter, NULL, NULL, FPROBE_IPS_MAX);
778 }
779
780 /**
781 * register_fprobe() - Register fprobe to ftrace by pattern.
782 * @fp: A fprobe data structure to be registered.
783 * @filter: A wildcard pattern of probed symbols.
784 * @notfilter: A wildcard pattern of NOT probed symbols.
785 *
786 * Register @fp to ftrace for enabling the probe on the symbols matched to @filter.
787 * If @notfilter is not NULL, the symbols matched the @notfilter are not probed.
788 *
789 * Return 0 if @fp is registered successfully, -errno if not.
790 */
register_fprobe(struct fprobe * fp,const char * filter,const char * notfilter)791 int register_fprobe(struct fprobe *fp, const char *filter, const char *notfilter)
792 {
793 unsigned long *addrs __free(kfree) = NULL;
794 struct module **mods __free(kfree) = NULL;
795 int ret, num;
796
797 if (!fp || !filter)
798 return -EINVAL;
799
800 num = get_ips_from_filter(filter, notfilter, NULL, NULL, FPROBE_IPS_MAX);
801 if (num < 0)
802 return num;
803
804 addrs = kcalloc(num, sizeof(*addrs), GFP_KERNEL);
805 if (!addrs)
806 return -ENOMEM;
807
808 mods = kcalloc(num, sizeof(*mods), GFP_KERNEL);
809 if (!mods)
810 return -ENOMEM;
811
812 ret = get_ips_from_filter(filter, notfilter, addrs, mods, num);
813 if (ret < 0)
814 return ret;
815
816 ret = register_fprobe_ips(fp, addrs, ret);
817
818 for (int i = 0; i < num; i++) {
819 if (mods[i])
820 module_put(mods[i]);
821 }
822 return ret;
823 }
824 EXPORT_SYMBOL_GPL(register_fprobe);
825
826 /**
827 * register_fprobe_ips() - Register fprobe to ftrace by address.
828 * @fp: A fprobe data structure to be registered.
829 * @addrs: An array of target function address.
830 * @num: The number of entries of @addrs.
831 *
832 * Register @fp to ftrace for enabling the probe on the address given by @addrs.
833 * The @addrs must be the addresses of ftrace location address, which may be
834 * the symbol address + arch-dependent offset.
835 * If you unsure what this mean, please use other registration functions.
836 *
837 * Return 0 if @fp is registered successfully, -errno if not.
838 */
register_fprobe_ips(struct fprobe * fp,unsigned long * addrs,int num)839 int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num)
840 {
841 struct fprobe_hlist *hlist_array;
842 int ret, i;
843
844 ret = fprobe_init(fp, addrs, num);
845 if (ret)
846 return ret;
847
848 mutex_lock(&fprobe_mutex);
849
850 hlist_array = fp->hlist_array;
851 if (fprobe_is_ftrace(fp))
852 ret = fprobe_ftrace_add_ips(addrs, num);
853 else
854 ret = fprobe_graph_add_ips(addrs, num);
855
856 if (!ret) {
857 add_fprobe_hash(fp);
858 for (i = 0; i < hlist_array->size; i++) {
859 ret = insert_fprobe_node(&hlist_array->array[i]);
860 if (ret)
861 break;
862 }
863 /* fallback on insert error */
864 if (ret) {
865 for (i--; i >= 0; i--)
866 delete_fprobe_node(&hlist_array->array[i]);
867 }
868 }
869 mutex_unlock(&fprobe_mutex);
870
871 if (ret)
872 fprobe_fail_cleanup(fp);
873
874 return ret;
875 }
876 EXPORT_SYMBOL_GPL(register_fprobe_ips);
877
878 /**
879 * register_fprobe_syms() - Register fprobe to ftrace by symbols.
880 * @fp: A fprobe data structure to be registered.
881 * @syms: An array of target symbols.
882 * @num: The number of entries of @syms.
883 *
884 * Register @fp to the symbols given by @syms array. This will be useful if
885 * you are sure the symbols exist in the kernel.
886 *
887 * Return 0 if @fp is registered successfully, -errno if not.
888 */
register_fprobe_syms(struct fprobe * fp,const char ** syms,int num)889 int register_fprobe_syms(struct fprobe *fp, const char **syms, int num)
890 {
891 unsigned long *addrs;
892 int ret;
893
894 if (!fp || !syms || num <= 0)
895 return -EINVAL;
896
897 addrs = get_ftrace_locations(syms, num);
898 if (IS_ERR(addrs))
899 return PTR_ERR(addrs);
900
901 ret = register_fprobe_ips(fp, addrs, num);
902
903 kfree(addrs);
904
905 return ret;
906 }
907 EXPORT_SYMBOL_GPL(register_fprobe_syms);
908
fprobe_is_registered(struct fprobe * fp)909 bool fprobe_is_registered(struct fprobe *fp)
910 {
911 if (!fp || !fp->hlist_array)
912 return false;
913 return true;
914 }
915
916 /**
917 * unregister_fprobe() - Unregister fprobe.
918 * @fp: A fprobe data structure to be unregistered.
919 *
920 * Unregister fprobe (and remove ftrace hooks from the function entries).
921 *
922 * Return 0 if @fp is unregistered successfully, -errno if not.
923 */
unregister_fprobe(struct fprobe * fp)924 int unregister_fprobe(struct fprobe *fp)
925 {
926 struct fprobe_hlist *hlist_array;
927 unsigned long *addrs = NULL;
928 int ret = 0, i, count;
929
930 mutex_lock(&fprobe_mutex);
931 if (!fp || !is_fprobe_still_exist(fp)) {
932 ret = -EINVAL;
933 goto out;
934 }
935
936 hlist_array = fp->hlist_array;
937 addrs = kcalloc(hlist_array->size, sizeof(unsigned long), GFP_KERNEL);
938 if (!addrs) {
939 ret = -ENOMEM; /* TODO: Fallback to one-by-one loop */
940 goto out;
941 }
942
943 /* Remove non-synonim ips from table and hash */
944 count = 0;
945 for (i = 0; i < hlist_array->size; i++) {
946 if (!delete_fprobe_node(&hlist_array->array[i]))
947 addrs[count++] = hlist_array->array[i].addr;
948 }
949 del_fprobe_hash(fp);
950
951 if (fprobe_is_ftrace(fp))
952 fprobe_ftrace_remove_ips(addrs, count);
953 else
954 fprobe_graph_remove_ips(addrs, count);
955
956 kfree_rcu(hlist_array, rcu);
957 fp->hlist_array = NULL;
958
959 out:
960 mutex_unlock(&fprobe_mutex);
961
962 kfree(addrs);
963 return ret;
964 }
965 EXPORT_SYMBOL_GPL(unregister_fprobe);
966
fprobe_initcall(void)967 static int __init fprobe_initcall(void)
968 {
969 rhltable_init(&fprobe_ip_table, &fprobe_rht_params);
970 return 0;
971 }
972 core_initcall(fprobe_initcall);
973