xref: /linux/kernel/trace/fprobe.c (revision 08ed5c81f6058bd7d6cd28d1750667ad3ceee3d1)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * fprobe - Simple ftrace probe wrapper for function entry.
4  */
5 #define pr_fmt(fmt) "fprobe: " fmt
6 
7 #include <linux/err.h>
8 #include <linux/fprobe.h>
9 #include <linux/kallsyms.h>
10 #include <linux/kprobes.h>
11 #include <linux/list.h>
12 #include <linux/mutex.h>
13 #include <linux/rhashtable.h>
14 #include <linux/slab.h>
15 #include <linux/sort.h>
16 
17 #include <asm/fprobe.h>
18 
19 #include "trace.h"
20 
21 #define FPROBE_IP_HASH_BITS 8
22 #define FPROBE_IP_TABLE_SIZE (1 << FPROBE_IP_HASH_BITS)
23 
24 #define FPROBE_HASH_BITS 6
25 #define FPROBE_TABLE_SIZE (1 << FPROBE_HASH_BITS)
26 
27 #define SIZE_IN_LONG(x) ((x + sizeof(long) - 1) >> (sizeof(long) == 8 ? 3 : 2))
28 
29 /*
30  * fprobe_table: hold 'fprobe_hlist::hlist' for checking the fprobe still
31  *   exists. The key is the address of fprobe instance.
32  * fprobe_ip_table: hold 'fprobe_hlist::array[*]' for searching the fprobe
33  *   instance related to the funciton address. The key is the ftrace IP
34  *   address.
35  *
36  * When unregistering the fprobe, fprobe_hlist::fp and fprobe_hlist::array[*].fp
37  * are set NULL and delete those from both hash tables (by hlist_del_rcu).
38  * After an RCU grace period, the fprobe_hlist itself will be released.
39  *
40  * fprobe_table and fprobe_ip_table can be accessed from either
41  *  - Normal hlist traversal and RCU add/del under 'fprobe_mutex' is held.
42  *  - RCU hlist traversal under disabling preempt
43  */
44 static struct hlist_head fprobe_table[FPROBE_TABLE_SIZE];
45 static struct rhltable fprobe_ip_table;
46 static DEFINE_MUTEX(fprobe_mutex);
47 
48 static u32 fprobe_node_hashfn(const void *data, u32 len, u32 seed)
49 {
50 	return hash_ptr(*(unsigned long **)data, 32);
51 }
52 
53 static int fprobe_node_cmp(struct rhashtable_compare_arg *arg,
54 			   const void *ptr)
55 {
56 	unsigned long key = *(unsigned long *)arg->key;
57 	const struct fprobe_hlist_node *n = ptr;
58 
59 	return n->addr != key;
60 }
61 
62 static u32 fprobe_node_obj_hashfn(const void *data, u32 len, u32 seed)
63 {
64 	const struct fprobe_hlist_node *n = data;
65 
66 	return hash_ptr((void *)n->addr, 32);
67 }
68 
69 static const struct rhashtable_params fprobe_rht_params = {
70 	.head_offset		= offsetof(struct fprobe_hlist_node, hlist),
71 	.key_offset		= offsetof(struct fprobe_hlist_node, addr),
72 	.key_len		= sizeof_field(struct fprobe_hlist_node, addr),
73 	.hashfn			= fprobe_node_hashfn,
74 	.obj_hashfn		= fprobe_node_obj_hashfn,
75 	.obj_cmpfn		= fprobe_node_cmp,
76 	.automatic_shrinking	= true,
77 };
78 
79 /* Node insertion and deletion requires the fprobe_mutex */
80 static int insert_fprobe_node(struct fprobe_hlist_node *node)
81 {
82 	lockdep_assert_held(&fprobe_mutex);
83 
84 	return rhltable_insert(&fprobe_ip_table, &node->hlist, fprobe_rht_params);
85 }
86 
87 /* Return true if there are synonims */
88 static bool delete_fprobe_node(struct fprobe_hlist_node *node)
89 {
90 	lockdep_assert_held(&fprobe_mutex);
91 	bool ret;
92 
93 	/* Avoid double deleting */
94 	if (READ_ONCE(node->fp) != NULL) {
95 		WRITE_ONCE(node->fp, NULL);
96 		rhltable_remove(&fprobe_ip_table, &node->hlist,
97 				fprobe_rht_params);
98 	}
99 
100 	rcu_read_lock();
101 	ret = !!rhltable_lookup(&fprobe_ip_table, &node->addr,
102 				fprobe_rht_params);
103 	rcu_read_unlock();
104 
105 	return ret;
106 }
107 
108 /* Check existence of the fprobe */
109 static bool is_fprobe_still_exist(struct fprobe *fp)
110 {
111 	struct hlist_head *head;
112 	struct fprobe_hlist *fph;
113 
114 	head = &fprobe_table[hash_ptr(fp, FPROBE_HASH_BITS)];
115 	hlist_for_each_entry_rcu(fph, head, hlist,
116 				 lockdep_is_held(&fprobe_mutex)) {
117 		if (fph->fp == fp)
118 			return true;
119 	}
120 	return false;
121 }
122 NOKPROBE_SYMBOL(is_fprobe_still_exist);
123 
124 static int add_fprobe_hash(struct fprobe *fp)
125 {
126 	struct fprobe_hlist *fph = fp->hlist_array;
127 	struct hlist_head *head;
128 
129 	lockdep_assert_held(&fprobe_mutex);
130 
131 	if (WARN_ON_ONCE(!fph))
132 		return -EINVAL;
133 
134 	if (is_fprobe_still_exist(fp))
135 		return -EEXIST;
136 
137 	head = &fprobe_table[hash_ptr(fp, FPROBE_HASH_BITS)];
138 	hlist_add_head_rcu(&fp->hlist_array->hlist, head);
139 	return 0;
140 }
141 
142 static int del_fprobe_hash(struct fprobe *fp)
143 {
144 	struct fprobe_hlist *fph = fp->hlist_array;
145 
146 	lockdep_assert_held(&fprobe_mutex);
147 
148 	if (WARN_ON_ONCE(!fph))
149 		return -EINVAL;
150 
151 	if (!is_fprobe_still_exist(fp))
152 		return -ENOENT;
153 
154 	fph->fp = NULL;
155 	hlist_del_rcu(&fph->hlist);
156 	return 0;
157 }
158 
159 #ifdef ARCH_DEFINE_ENCODE_FPROBE_HEADER
160 
161 /* The arch should encode fprobe_header info into one unsigned long */
162 #define FPROBE_HEADER_SIZE_IN_LONG	1
163 
164 static inline bool write_fprobe_header(unsigned long *stack,
165 					struct fprobe *fp, unsigned int size_words)
166 {
167 	if (WARN_ON_ONCE(size_words > MAX_FPROBE_DATA_SIZE_WORD ||
168 			 !arch_fprobe_header_encodable(fp)))
169 		return false;
170 
171 	*stack = arch_encode_fprobe_header(fp, size_words);
172 	return true;
173 }
174 
175 static inline void read_fprobe_header(unsigned long *stack,
176 					struct fprobe **fp, unsigned int *size_words)
177 {
178 	*fp = arch_decode_fprobe_header_fp(*stack);
179 	*size_words = arch_decode_fprobe_header_size(*stack);
180 }
181 
182 #else
183 
184 /* Generic fprobe_header */
185 struct __fprobe_header {
186 	struct fprobe *fp;
187 	unsigned long size_words;
188 } __packed;
189 
190 #define FPROBE_HEADER_SIZE_IN_LONG	SIZE_IN_LONG(sizeof(struct __fprobe_header))
191 
192 static inline bool write_fprobe_header(unsigned long *stack,
193 					struct fprobe *fp, unsigned int size_words)
194 {
195 	struct __fprobe_header *fph = (struct __fprobe_header *)stack;
196 
197 	if (WARN_ON_ONCE(size_words > MAX_FPROBE_DATA_SIZE_WORD))
198 		return false;
199 
200 	fph->fp = fp;
201 	fph->size_words = size_words;
202 	return true;
203 }
204 
205 static inline void read_fprobe_header(unsigned long *stack,
206 					struct fprobe **fp, unsigned int *size_words)
207 {
208 	struct __fprobe_header *fph = (struct __fprobe_header *)stack;
209 
210 	*fp = fph->fp;
211 	*size_words = fph->size_words;
212 }
213 
214 #endif
215 
216 /*
217  * fprobe shadow stack management:
218  * Since fprobe shares a single fgraph_ops, it needs to share the stack entry
219  * among the probes on the same function exit. Note that a new probe can be
220  * registered before a target function is returning, we can not use the hash
221  * table to find the corresponding probes. Thus the probe address is stored on
222  * the shadow stack with its entry data size.
223  *
224  */
225 static inline int __fprobe_handler(unsigned long ip, unsigned long parent_ip,
226 				   struct fprobe *fp, struct ftrace_regs *fregs,
227 				   void *data)
228 {
229 	if (!fp->entry_handler)
230 		return 0;
231 
232 	return fp->entry_handler(fp, ip, parent_ip, fregs, data);
233 }
234 
235 static inline int __fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip,
236 					  struct fprobe *fp, struct ftrace_regs *fregs,
237 					  void *data)
238 {
239 	int ret;
240 	/*
241 	 * This user handler is shared with other kprobes and is not expected to be
242 	 * called recursively. So if any other kprobe handler is running, this will
243 	 * exit as kprobe does. See the section 'Share the callbacks with kprobes'
244 	 * in Documentation/trace/fprobe.rst for more information.
245 	 */
246 	if (unlikely(kprobe_running())) {
247 		fp->nmissed++;
248 		return 0;
249 	}
250 
251 	kprobe_busy_begin();
252 	ret = __fprobe_handler(ip, parent_ip, fp, fregs, data);
253 	kprobe_busy_end();
254 	return ret;
255 }
256 
257 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
258 /* ftrace_ops callback, this processes fprobes which have only entry_handler. */
259 static void fprobe_ftrace_entry(unsigned long ip, unsigned long parent_ip,
260 	struct ftrace_ops *ops, struct ftrace_regs *fregs)
261 {
262 	struct fprobe_hlist_node *node;
263 	struct rhlist_head *head, *pos;
264 	struct fprobe *fp;
265 	int bit;
266 
267 	bit = ftrace_test_recursion_trylock(ip, parent_ip);
268 	if (bit < 0)
269 		return;
270 
271 	/*
272 	 * ftrace_test_recursion_trylock() disables preemption, but
273 	 * rhltable_lookup() checks whether rcu_read_lcok is held.
274 	 * So we take rcu_read_lock() here.
275 	 */
276 	rcu_read_lock();
277 	head = rhltable_lookup(&fprobe_ip_table, &ip, fprobe_rht_params);
278 
279 	rhl_for_each_entry_rcu(node, pos, head, hlist) {
280 		if (node->addr != ip)
281 			break;
282 		fp = READ_ONCE(node->fp);
283 		if (unlikely(!fp || fprobe_disabled(fp) || fp->exit_handler))
284 			continue;
285 
286 		if (fprobe_shared_with_kprobes(fp))
287 			__fprobe_kprobe_handler(ip, parent_ip, fp, fregs, NULL);
288 		else
289 			__fprobe_handler(ip, parent_ip, fp, fregs, NULL);
290 	}
291 	rcu_read_unlock();
292 	ftrace_test_recursion_unlock(bit);
293 }
294 NOKPROBE_SYMBOL(fprobe_ftrace_entry);
295 
296 static struct ftrace_ops fprobe_ftrace_ops = {
297 	.func	= fprobe_ftrace_entry,
298 	.flags	= FTRACE_OPS_FL_SAVE_REGS,
299 };
300 static int fprobe_ftrace_active;
301 
302 static int fprobe_ftrace_add_ips(unsigned long *addrs, int num)
303 {
304 	int ret;
305 
306 	lockdep_assert_held(&fprobe_mutex);
307 
308 	ret = ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 0, 0);
309 	if (ret)
310 		return ret;
311 
312 	if (!fprobe_ftrace_active) {
313 		ret = register_ftrace_function(&fprobe_ftrace_ops);
314 		if (ret) {
315 			ftrace_free_filter(&fprobe_ftrace_ops);
316 			return ret;
317 		}
318 	}
319 	fprobe_ftrace_active++;
320 	return 0;
321 }
322 
323 static void fprobe_ftrace_remove_ips(unsigned long *addrs, int num)
324 {
325 	lockdep_assert_held(&fprobe_mutex);
326 
327 	fprobe_ftrace_active--;
328 	if (!fprobe_ftrace_active)
329 		unregister_ftrace_function(&fprobe_ftrace_ops);
330 	if (num)
331 		ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 1, 0);
332 }
333 
334 static bool fprobe_is_ftrace(struct fprobe *fp)
335 {
336 	return !fp->exit_handler;
337 }
338 #else
339 static int fprobe_ftrace_add_ips(unsigned long *addrs, int num)
340 {
341 	return -ENOENT;
342 }
343 
344 static void fprobe_ftrace_remove_ips(unsigned long *addrs, int num)
345 {
346 }
347 
348 static bool fprobe_is_ftrace(struct fprobe *fp)
349 {
350 	return false;
351 }
352 #endif
353 
354 /* fgraph_ops callback, this processes fprobes which have exit_handler. */
355 static int fprobe_fgraph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
356 			       struct ftrace_regs *fregs)
357 {
358 	unsigned long *fgraph_data = NULL;
359 	unsigned long func = trace->func;
360 	struct fprobe_hlist_node *node;
361 	struct rhlist_head *head, *pos;
362 	unsigned long ret_ip;
363 	int reserved_words;
364 	struct fprobe *fp;
365 	int used, ret;
366 
367 	if (WARN_ON_ONCE(!fregs))
368 		return 0;
369 
370 	guard(rcu)();
371 	head = rhltable_lookup(&fprobe_ip_table, &func, fprobe_rht_params);
372 	reserved_words = 0;
373 	rhl_for_each_entry_rcu(node, pos, head, hlist) {
374 		if (node->addr != func)
375 			continue;
376 		fp = READ_ONCE(node->fp);
377 		if (!fp || !fp->exit_handler)
378 			continue;
379 		/*
380 		 * Since fprobe can be enabled until the next loop, we ignore the
381 		 * fprobe's disabled flag in this loop.
382 		 */
383 		reserved_words +=
384 			FPROBE_HEADER_SIZE_IN_LONG + SIZE_IN_LONG(fp->entry_data_size);
385 	}
386 	if (reserved_words) {
387 		fgraph_data = fgraph_reserve_data(gops->idx, reserved_words * sizeof(long));
388 		if (unlikely(!fgraph_data)) {
389 			rhl_for_each_entry_rcu(node, pos, head, hlist) {
390 				if (node->addr != func)
391 					continue;
392 				fp = READ_ONCE(node->fp);
393 				if (fp && !fprobe_disabled(fp) && !fprobe_is_ftrace(fp))
394 					fp->nmissed++;
395 			}
396 			return 0;
397 		}
398 	}
399 
400 	/*
401 	 * TODO: recursion detection has been done in the fgraph. Thus we need
402 	 * to add a callback to increment missed counter.
403 	 */
404 	ret_ip = ftrace_regs_get_return_address(fregs);
405 	used = 0;
406 	rhl_for_each_entry_rcu(node, pos, head, hlist) {
407 		int data_size;
408 		void *data;
409 
410 		if (node->addr != func)
411 			continue;
412 		fp = READ_ONCE(node->fp);
413 		if (unlikely(!fp || fprobe_disabled(fp) || fprobe_is_ftrace(fp)))
414 			continue;
415 
416 		data_size = fp->entry_data_size;
417 		if (data_size && fp->exit_handler)
418 			data = fgraph_data + used + FPROBE_HEADER_SIZE_IN_LONG;
419 		else
420 			data = NULL;
421 
422 		if (fprobe_shared_with_kprobes(fp))
423 			ret = __fprobe_kprobe_handler(func, ret_ip, fp, fregs, data);
424 		else
425 			ret = __fprobe_handler(func, ret_ip, fp, fregs, data);
426 
427 		/* If entry_handler returns !0, nmissed is not counted but skips exit_handler. */
428 		if (!ret && fp->exit_handler) {
429 			int size_words = SIZE_IN_LONG(data_size);
430 
431 			if (write_fprobe_header(&fgraph_data[used], fp, size_words))
432 				used += FPROBE_HEADER_SIZE_IN_LONG + size_words;
433 		}
434 	}
435 	if (used < reserved_words)
436 		memset(fgraph_data + used, 0, reserved_words - used);
437 
438 	/* If any exit_handler is set, data must be used. */
439 	return used != 0;
440 }
441 NOKPROBE_SYMBOL(fprobe_fgraph_entry);
442 
443 static void fprobe_return(struct ftrace_graph_ret *trace,
444 			  struct fgraph_ops *gops,
445 			  struct ftrace_regs *fregs)
446 {
447 	unsigned long *fgraph_data = NULL;
448 	unsigned long ret_ip;
449 	struct fprobe *fp;
450 	int size, curr;
451 	int size_words;
452 
453 	fgraph_data = (unsigned long *)fgraph_retrieve_data(gops->idx, &size);
454 	if (WARN_ON_ONCE(!fgraph_data))
455 		return;
456 	size_words = SIZE_IN_LONG(size);
457 	ret_ip = ftrace_regs_get_instruction_pointer(fregs);
458 
459 	preempt_disable_notrace();
460 
461 	curr = 0;
462 	while (size_words > curr) {
463 		read_fprobe_header(&fgraph_data[curr], &fp, &size);
464 		if (!fp)
465 			break;
466 		curr += FPROBE_HEADER_SIZE_IN_LONG;
467 		if (is_fprobe_still_exist(fp) && !fprobe_disabled(fp)) {
468 			if (WARN_ON_ONCE(curr + size > size_words))
469 				break;
470 			fp->exit_handler(fp, trace->func, ret_ip, fregs,
471 					 size ? fgraph_data + curr : NULL);
472 		}
473 		curr += size;
474 	}
475 	preempt_enable_notrace();
476 }
477 NOKPROBE_SYMBOL(fprobe_return);
478 
479 static struct fgraph_ops fprobe_graph_ops = {
480 	.entryfunc	= fprobe_fgraph_entry,
481 	.retfunc	= fprobe_return,
482 };
483 static int fprobe_graph_active;
484 
485 /* Add @addrs to the ftrace filter and register fgraph if needed. */
486 static int fprobe_graph_add_ips(unsigned long *addrs, int num)
487 {
488 	int ret;
489 
490 	lockdep_assert_held(&fprobe_mutex);
491 
492 	ret = ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 0, 0);
493 	if (ret)
494 		return ret;
495 
496 	if (!fprobe_graph_active) {
497 		ret = register_ftrace_graph(&fprobe_graph_ops);
498 		if (WARN_ON_ONCE(ret)) {
499 			ftrace_free_filter(&fprobe_graph_ops.ops);
500 			return ret;
501 		}
502 	}
503 	fprobe_graph_active++;
504 	return 0;
505 }
506 
507 /* Remove @addrs from the ftrace filter and unregister fgraph if possible. */
508 static void fprobe_graph_remove_ips(unsigned long *addrs, int num)
509 {
510 	lockdep_assert_held(&fprobe_mutex);
511 
512 	fprobe_graph_active--;
513 	/* Q: should we unregister it ? */
514 	if (!fprobe_graph_active)
515 		unregister_ftrace_graph(&fprobe_graph_ops);
516 
517 	if (num)
518 		ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0);
519 }
520 
521 #ifdef CONFIG_MODULES
522 
523 #define FPROBE_IPS_BATCH_INIT 8
524 /* instruction pointer address list */
525 struct fprobe_addr_list {
526 	int index;
527 	int size;
528 	unsigned long *addrs;
529 };
530 
531 static int fprobe_addr_list_add(struct fprobe_addr_list *alist, unsigned long addr)
532 {
533 	unsigned long *addrs;
534 
535 	/* Previously we failed to expand the list. */
536 	if (alist->index == alist->size)
537 		return -ENOSPC;
538 
539 	alist->addrs[alist->index++] = addr;
540 	if (alist->index < alist->size)
541 		return 0;
542 
543 	/* Expand the address list */
544 	addrs = kcalloc(alist->size * 2, sizeof(*addrs), GFP_KERNEL);
545 	if (!addrs)
546 		return -ENOMEM;
547 
548 	memcpy(addrs, alist->addrs, alist->size * sizeof(*addrs));
549 	alist->size *= 2;
550 	kfree(alist->addrs);
551 	alist->addrs = addrs;
552 
553 	return 0;
554 }
555 
556 static void fprobe_remove_node_in_module(struct module *mod, struct fprobe_hlist_node *node,
557 					 struct fprobe_addr_list *alist)
558 {
559 	if (!within_module(node->addr, mod))
560 		return;
561 	if (delete_fprobe_node(node))
562 		return;
563 	/*
564 	 * If failed to update alist, just continue to update hlist.
565 	 * Therefore, at list user handler will not hit anymore.
566 	 */
567 	fprobe_addr_list_add(alist, node->addr);
568 }
569 
570 /* Handle module unloading to manage fprobe_ip_table. */
571 static int fprobe_module_callback(struct notifier_block *nb,
572 				  unsigned long val, void *data)
573 {
574 	struct fprobe_addr_list alist = {.size = FPROBE_IPS_BATCH_INIT};
575 	struct fprobe_hlist_node *node;
576 	struct rhashtable_iter iter;
577 	struct module *mod = data;
578 
579 	if (val != MODULE_STATE_GOING)
580 		return NOTIFY_DONE;
581 
582 	alist.addrs = kcalloc(alist.size, sizeof(*alist.addrs), GFP_KERNEL);
583 	/* If failed to alloc memory, we can not remove ips from hash. */
584 	if (!alist.addrs)
585 		return NOTIFY_DONE;
586 
587 	mutex_lock(&fprobe_mutex);
588 	rhltable_walk_enter(&fprobe_ip_table, &iter);
589 	do {
590 		rhashtable_walk_start(&iter);
591 
592 		while ((node = rhashtable_walk_next(&iter)) && !IS_ERR(node))
593 			fprobe_remove_node_in_module(mod, node, &alist);
594 
595 		rhashtable_walk_stop(&iter);
596 	} while (node == ERR_PTR(-EAGAIN));
597 	rhashtable_walk_exit(&iter);
598 
599 	if (alist.index > 0) {
600 		ftrace_set_filter_ips(&fprobe_graph_ops.ops,
601 				      alist.addrs, alist.index, 1, 0);
602 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
603 		ftrace_set_filter_ips(&fprobe_ftrace_ops,
604 				      alist.addrs, alist.index, 1, 0);
605 #endif
606 	}
607 	mutex_unlock(&fprobe_mutex);
608 
609 	kfree(alist.addrs);
610 
611 	return NOTIFY_DONE;
612 }
613 
614 static struct notifier_block fprobe_module_nb = {
615 	.notifier_call = fprobe_module_callback,
616 	.priority = 0,
617 };
618 
619 static int __init init_fprobe_module(void)
620 {
621 	return register_module_notifier(&fprobe_module_nb);
622 }
623 early_initcall(init_fprobe_module);
624 #endif
625 
626 static int symbols_cmp(const void *a, const void *b)
627 {
628 	const char **str_a = (const char **) a;
629 	const char **str_b = (const char **) b;
630 
631 	return strcmp(*str_a, *str_b);
632 }
633 
634 /* Convert ftrace location address from symbols */
635 static unsigned long *get_ftrace_locations(const char **syms, int num)
636 {
637 	unsigned long *addrs;
638 
639 	/* Convert symbols to symbol address */
640 	addrs = kcalloc(num, sizeof(*addrs), GFP_KERNEL);
641 	if (!addrs)
642 		return ERR_PTR(-ENOMEM);
643 
644 	/* ftrace_lookup_symbols expects sorted symbols */
645 	sort(syms, num, sizeof(*syms), symbols_cmp, NULL);
646 
647 	if (!ftrace_lookup_symbols(syms, num, addrs))
648 		return addrs;
649 
650 	kfree(addrs);
651 	return ERR_PTR(-ENOENT);
652 }
653 
654 struct filter_match_data {
655 	const char *filter;
656 	const char *notfilter;
657 	size_t index;
658 	size_t size;
659 	unsigned long *addrs;
660 	struct module **mods;
661 };
662 
663 static int filter_match_callback(void *data, const char *name, unsigned long addr)
664 {
665 	struct filter_match_data *match = data;
666 
667 	if (!glob_match(match->filter, name) ||
668 	    (match->notfilter && glob_match(match->notfilter, name)))
669 		return 0;
670 
671 	if (!ftrace_location(addr))
672 		return 0;
673 
674 	if (match->addrs) {
675 		struct module *mod = __module_text_address(addr);
676 
677 		if (mod && !try_module_get(mod))
678 			return 0;
679 
680 		match->mods[match->index] = mod;
681 		match->addrs[match->index] = addr;
682 	}
683 	match->index++;
684 	return match->index == match->size;
685 }
686 
687 /*
688  * Make IP list from the filter/no-filter glob patterns.
689  * Return the number of matched symbols, or errno.
690  * If @addrs == NULL, this just counts the number of matched symbols. If @addrs
691  * is passed with an array, we need to pass the an @mods array of the same size
692  * to increment the module refcount for each symbol.
693  * This means we also need to call `module_put` for each element of @mods after
694  * using the @addrs.
695  */
696 static int get_ips_from_filter(const char *filter, const char *notfilter,
697 			       unsigned long *addrs, struct module **mods,
698 			       size_t size)
699 {
700 	struct filter_match_data match = { .filter = filter, .notfilter = notfilter,
701 		.index = 0, .size = size, .addrs = addrs, .mods = mods};
702 	int ret;
703 
704 	if (addrs && !mods)
705 		return -EINVAL;
706 
707 	ret = kallsyms_on_each_symbol(filter_match_callback, &match);
708 	if (ret < 0)
709 		return ret;
710 	if (IS_ENABLED(CONFIG_MODULES)) {
711 		ret = module_kallsyms_on_each_symbol(NULL, filter_match_callback, &match);
712 		if (ret < 0)
713 			return ret;
714 	}
715 
716 	return match.index ?: -ENOENT;
717 }
718 
719 static void fprobe_fail_cleanup(struct fprobe *fp)
720 {
721 	kfree(fp->hlist_array);
722 	fp->hlist_array = NULL;
723 }
724 
725 /* Initialize the fprobe data structure. */
726 static int fprobe_init(struct fprobe *fp, unsigned long *addrs, int num)
727 {
728 	struct fprobe_hlist *hlist_array;
729 	unsigned long addr;
730 	int size, i;
731 
732 	if (!fp || !addrs || num <= 0)
733 		return -EINVAL;
734 
735 	size = ALIGN(fp->entry_data_size, sizeof(long));
736 	if (size > MAX_FPROBE_DATA_SIZE)
737 		return -E2BIG;
738 	fp->entry_data_size = size;
739 
740 	hlist_array = kzalloc(struct_size(hlist_array, array, num), GFP_KERNEL);
741 	if (!hlist_array)
742 		return -ENOMEM;
743 
744 	fp->nmissed = 0;
745 
746 	hlist_array->size = num;
747 	fp->hlist_array = hlist_array;
748 	hlist_array->fp = fp;
749 	for (i = 0; i < num; i++) {
750 		hlist_array->array[i].fp = fp;
751 		addr = ftrace_location(addrs[i]);
752 		if (!addr) {
753 			fprobe_fail_cleanup(fp);
754 			return -ENOENT;
755 		}
756 		hlist_array->array[i].addr = addr;
757 	}
758 	return 0;
759 }
760 
761 #define FPROBE_IPS_MAX	INT_MAX
762 
763 int fprobe_count_ips_from_filter(const char *filter, const char *notfilter)
764 {
765 	return get_ips_from_filter(filter, notfilter, NULL, NULL, FPROBE_IPS_MAX);
766 }
767 
768 /**
769  * register_fprobe() - Register fprobe to ftrace by pattern.
770  * @fp: A fprobe data structure to be registered.
771  * @filter: A wildcard pattern of probed symbols.
772  * @notfilter: A wildcard pattern of NOT probed symbols.
773  *
774  * Register @fp to ftrace for enabling the probe on the symbols matched to @filter.
775  * If @notfilter is not NULL, the symbols matched the @notfilter are not probed.
776  *
777  * Return 0 if @fp is registered successfully, -errno if not.
778  */
779 int register_fprobe(struct fprobe *fp, const char *filter, const char *notfilter)
780 {
781 	unsigned long *addrs __free(kfree) = NULL;
782 	struct module **mods __free(kfree) = NULL;
783 	int ret, num;
784 
785 	if (!fp || !filter)
786 		return -EINVAL;
787 
788 	num = get_ips_from_filter(filter, notfilter, NULL, NULL, FPROBE_IPS_MAX);
789 	if (num < 0)
790 		return num;
791 
792 	addrs = kcalloc(num, sizeof(*addrs), GFP_KERNEL);
793 	if (!addrs)
794 		return -ENOMEM;
795 
796 	mods = kcalloc(num, sizeof(*mods), GFP_KERNEL);
797 	if (!mods)
798 		return -ENOMEM;
799 
800 	ret = get_ips_from_filter(filter, notfilter, addrs, mods, num);
801 	if (ret < 0)
802 		return ret;
803 
804 	ret = register_fprobe_ips(fp, addrs, ret);
805 
806 	for (int i = 0; i < num; i++) {
807 		if (mods[i])
808 			module_put(mods[i]);
809 	}
810 	return ret;
811 }
812 EXPORT_SYMBOL_GPL(register_fprobe);
813 
814 /**
815  * register_fprobe_ips() - Register fprobe to ftrace by address.
816  * @fp: A fprobe data structure to be registered.
817  * @addrs: An array of target function address.
818  * @num: The number of entries of @addrs.
819  *
820  * Register @fp to ftrace for enabling the probe on the address given by @addrs.
821  * The @addrs must be the addresses of ftrace location address, which may be
822  * the symbol address + arch-dependent offset.
823  * If you unsure what this mean, please use other registration functions.
824  *
825  * Return 0 if @fp is registered successfully, -errno if not.
826  */
827 int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num)
828 {
829 	struct fprobe_hlist *hlist_array;
830 	int ret, i;
831 
832 	ret = fprobe_init(fp, addrs, num);
833 	if (ret)
834 		return ret;
835 
836 	mutex_lock(&fprobe_mutex);
837 
838 	hlist_array = fp->hlist_array;
839 	if (fprobe_is_ftrace(fp))
840 		ret = fprobe_ftrace_add_ips(addrs, num);
841 	else
842 		ret = fprobe_graph_add_ips(addrs, num);
843 
844 	if (!ret) {
845 		add_fprobe_hash(fp);
846 		for (i = 0; i < hlist_array->size; i++) {
847 			ret = insert_fprobe_node(&hlist_array->array[i]);
848 			if (ret)
849 				break;
850 		}
851 		/* fallback on insert error */
852 		if (ret) {
853 			for (i--; i >= 0; i--)
854 				delete_fprobe_node(&hlist_array->array[i]);
855 		}
856 	}
857 	mutex_unlock(&fprobe_mutex);
858 
859 	if (ret)
860 		fprobe_fail_cleanup(fp);
861 
862 	return ret;
863 }
864 EXPORT_SYMBOL_GPL(register_fprobe_ips);
865 
866 /**
867  * register_fprobe_syms() - Register fprobe to ftrace by symbols.
868  * @fp: A fprobe data structure to be registered.
869  * @syms: An array of target symbols.
870  * @num: The number of entries of @syms.
871  *
872  * Register @fp to the symbols given by @syms array. This will be useful if
873  * you are sure the symbols exist in the kernel.
874  *
875  * Return 0 if @fp is registered successfully, -errno if not.
876  */
877 int register_fprobe_syms(struct fprobe *fp, const char **syms, int num)
878 {
879 	unsigned long *addrs;
880 	int ret;
881 
882 	if (!fp || !syms || num <= 0)
883 		return -EINVAL;
884 
885 	addrs = get_ftrace_locations(syms, num);
886 	if (IS_ERR(addrs))
887 		return PTR_ERR(addrs);
888 
889 	ret = register_fprobe_ips(fp, addrs, num);
890 
891 	kfree(addrs);
892 
893 	return ret;
894 }
895 EXPORT_SYMBOL_GPL(register_fprobe_syms);
896 
897 bool fprobe_is_registered(struct fprobe *fp)
898 {
899 	if (!fp || !fp->hlist_array)
900 		return false;
901 	return true;
902 }
903 
904 /**
905  * unregister_fprobe() - Unregister fprobe.
906  * @fp: A fprobe data structure to be unregistered.
907  *
908  * Unregister fprobe (and remove ftrace hooks from the function entries).
909  *
910  * Return 0 if @fp is unregistered successfully, -errno if not.
911  */
912 int unregister_fprobe(struct fprobe *fp)
913 {
914 	struct fprobe_hlist *hlist_array;
915 	unsigned long *addrs = NULL;
916 	int ret = 0, i, count;
917 
918 	mutex_lock(&fprobe_mutex);
919 	if (!fp || !is_fprobe_still_exist(fp)) {
920 		ret = -EINVAL;
921 		goto out;
922 	}
923 
924 	hlist_array = fp->hlist_array;
925 	addrs = kcalloc(hlist_array->size, sizeof(unsigned long), GFP_KERNEL);
926 	if (!addrs) {
927 		ret = -ENOMEM;	/* TODO: Fallback to one-by-one loop */
928 		goto out;
929 	}
930 
931 	/* Remove non-synonim ips from table and hash */
932 	count = 0;
933 	for (i = 0; i < hlist_array->size; i++) {
934 		if (!delete_fprobe_node(&hlist_array->array[i]))
935 			addrs[count++] = hlist_array->array[i].addr;
936 	}
937 	del_fprobe_hash(fp);
938 
939 	if (fprobe_is_ftrace(fp))
940 		fprobe_ftrace_remove_ips(addrs, count);
941 	else
942 		fprobe_graph_remove_ips(addrs, count);
943 
944 	kfree_rcu(hlist_array, rcu);
945 	fp->hlist_array = NULL;
946 
947 out:
948 	mutex_unlock(&fprobe_mutex);
949 
950 	kfree(addrs);
951 	return ret;
952 }
953 EXPORT_SYMBOL_GPL(unregister_fprobe);
954 
955 static int __init fprobe_initcall(void)
956 {
957 	rhltable_init(&fprobe_ip_table, &fprobe_rht_params);
958 	return 0;
959 }
960 core_initcall(fprobe_initcall);
961