xref: /linux/kernel/trace/fprobe.c (revision 09670b8c38b37bc2d6fc5d01fa7e02c38f7adf36)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * fprobe - Simple ftrace probe wrapper for function entry.
4  */
5 #define pr_fmt(fmt) "fprobe: " fmt
6 
7 #include <linux/err.h>
8 #include <linux/fprobe.h>
9 #include <linux/kallsyms.h>
10 #include <linux/kprobes.h>
11 #include <linux/list.h>
12 #include <linux/mutex.h>
13 #include <linux/rhashtable.h>
14 #include <linux/slab.h>
15 #include <linux/sort.h>
16 
17 #include <asm/fprobe.h>
18 
19 #include "trace.h"
20 
21 #define FPROBE_IP_HASH_BITS 8
22 #define FPROBE_IP_TABLE_SIZE (1 << FPROBE_IP_HASH_BITS)
23 
24 #define FPROBE_HASH_BITS 6
25 #define FPROBE_TABLE_SIZE (1 << FPROBE_HASH_BITS)
26 
27 #define SIZE_IN_LONG(x) ((x + sizeof(long) - 1) >> (sizeof(long) == 8 ? 3 : 2))
28 
29 /*
30  * fprobe_table: hold 'fprobe_hlist::hlist' for checking the fprobe still
31  *   exists. The key is the address of fprobe instance.
32  * fprobe_ip_table: hold 'fprobe_hlist::array[*]' for searching the fprobe
33  *   instance related to the function address. The key is the ftrace IP
34  *   address.
35  *
36  * When unregistering the fprobe, fprobe_hlist::fp and fprobe_hlist::array[*].fp
37  * are set NULL and delete those from both hash tables (by hlist_del_rcu).
38  * After an RCU grace period, the fprobe_hlist itself will be released.
39  *
40  * fprobe_table and fprobe_ip_table can be accessed from either
41  *  - Normal hlist traversal and RCU add/del under 'fprobe_mutex' is held.
42  *  - RCU hlist traversal under disabling preempt
43  */
44 static struct hlist_head fprobe_table[FPROBE_TABLE_SIZE];
45 static struct rhltable fprobe_ip_table;
46 static DEFINE_MUTEX(fprobe_mutex);
47 static struct fgraph_ops fprobe_graph_ops;
48 
fprobe_node_hashfn(const void * data,u32 len,u32 seed)49 static u32 fprobe_node_hashfn(const void *data, u32 len, u32 seed)
50 {
51 	return hash_ptr(*(unsigned long **)data, 32);
52 }
53 
fprobe_node_cmp(struct rhashtable_compare_arg * arg,const void * ptr)54 static int fprobe_node_cmp(struct rhashtable_compare_arg *arg,
55 			   const void *ptr)
56 {
57 	unsigned long key = *(unsigned long *)arg->key;
58 	const struct fprobe_hlist_node *n = ptr;
59 
60 	return n->addr != key;
61 }
62 
fprobe_node_obj_hashfn(const void * data,u32 len,u32 seed)63 static u32 fprobe_node_obj_hashfn(const void *data, u32 len, u32 seed)
64 {
65 	const struct fprobe_hlist_node *n = data;
66 
67 	return hash_ptr((void *)n->addr, 32);
68 }
69 
70 static const struct rhashtable_params fprobe_rht_params = {
71 	.head_offset		= offsetof(struct fprobe_hlist_node, hlist),
72 	.key_offset		= offsetof(struct fprobe_hlist_node, addr),
73 	.key_len		= sizeof_field(struct fprobe_hlist_node, addr),
74 	.hashfn			= fprobe_node_hashfn,
75 	.obj_hashfn		= fprobe_node_obj_hashfn,
76 	.obj_cmpfn		= fprobe_node_cmp,
77 	.automatic_shrinking	= true,
78 };
79 
80 /* Node insertion and deletion requires the fprobe_mutex */
insert_fprobe_node(struct fprobe_hlist_node * node)81 static int insert_fprobe_node(struct fprobe_hlist_node *node)
82 {
83 	lockdep_assert_held(&fprobe_mutex);
84 
85 	return rhltable_insert(&fprobe_ip_table, &node->hlist, fprobe_rht_params);
86 }
87 
88 /* Return true if there are synonims */
delete_fprobe_node(struct fprobe_hlist_node * node)89 static bool delete_fprobe_node(struct fprobe_hlist_node *node)
90 {
91 	lockdep_assert_held(&fprobe_mutex);
92 	bool ret;
93 
94 	/* Avoid double deleting */
95 	if (READ_ONCE(node->fp) != NULL) {
96 		WRITE_ONCE(node->fp, NULL);
97 		rhltable_remove(&fprobe_ip_table, &node->hlist,
98 				fprobe_rht_params);
99 	}
100 
101 	rcu_read_lock();
102 	ret = !!rhltable_lookup(&fprobe_ip_table, &node->addr,
103 				fprobe_rht_params);
104 	rcu_read_unlock();
105 
106 	return ret;
107 }
108 
109 /* Check existence of the fprobe */
is_fprobe_still_exist(struct fprobe * fp)110 static bool is_fprobe_still_exist(struct fprobe *fp)
111 {
112 	struct hlist_head *head;
113 	struct fprobe_hlist *fph;
114 
115 	head = &fprobe_table[hash_ptr(fp, FPROBE_HASH_BITS)];
116 	hlist_for_each_entry_rcu(fph, head, hlist,
117 				 lockdep_is_held(&fprobe_mutex)) {
118 		if (fph->fp == fp)
119 			return true;
120 	}
121 	return false;
122 }
123 NOKPROBE_SYMBOL(is_fprobe_still_exist);
124 
add_fprobe_hash(struct fprobe * fp)125 static int add_fprobe_hash(struct fprobe *fp)
126 {
127 	struct fprobe_hlist *fph = fp->hlist_array;
128 	struct hlist_head *head;
129 
130 	lockdep_assert_held(&fprobe_mutex);
131 
132 	if (WARN_ON_ONCE(!fph))
133 		return -EINVAL;
134 
135 	if (is_fprobe_still_exist(fp))
136 		return -EEXIST;
137 
138 	head = &fprobe_table[hash_ptr(fp, FPROBE_HASH_BITS)];
139 	hlist_add_head_rcu(&fp->hlist_array->hlist, head);
140 	return 0;
141 }
142 
del_fprobe_hash(struct fprobe * fp)143 static int del_fprobe_hash(struct fprobe *fp)
144 {
145 	struct fprobe_hlist *fph = fp->hlist_array;
146 
147 	lockdep_assert_held(&fprobe_mutex);
148 
149 	if (WARN_ON_ONCE(!fph))
150 		return -EINVAL;
151 
152 	if (!is_fprobe_still_exist(fp))
153 		return -ENOENT;
154 
155 	fph->fp = NULL;
156 	hlist_del_rcu(&fph->hlist);
157 	return 0;
158 }
159 
160 #ifdef ARCH_DEFINE_ENCODE_FPROBE_HEADER
161 
162 /* The arch should encode fprobe_header info into one unsigned long */
163 #define FPROBE_HEADER_SIZE_IN_LONG	1
164 
write_fprobe_header(unsigned long * stack,struct fprobe * fp,unsigned int size_words)165 static inline bool write_fprobe_header(unsigned long *stack,
166 					struct fprobe *fp, unsigned int size_words)
167 {
168 	if (WARN_ON_ONCE(size_words > MAX_FPROBE_DATA_SIZE_WORD ||
169 			 !arch_fprobe_header_encodable(fp)))
170 		return false;
171 
172 	*stack = arch_encode_fprobe_header(fp, size_words);
173 	return true;
174 }
175 
read_fprobe_header(unsigned long * stack,struct fprobe ** fp,unsigned int * size_words)176 static inline void read_fprobe_header(unsigned long *stack,
177 					struct fprobe **fp, unsigned int *size_words)
178 {
179 	*fp = arch_decode_fprobe_header_fp(*stack);
180 	*size_words = arch_decode_fprobe_header_size(*stack);
181 }
182 
183 #else
184 
185 /* Generic fprobe_header */
186 struct __fprobe_header {
187 	struct fprobe *fp;
188 	unsigned long size_words;
189 } __packed;
190 
191 #define FPROBE_HEADER_SIZE_IN_LONG	SIZE_IN_LONG(sizeof(struct __fprobe_header))
192 
write_fprobe_header(unsigned long * stack,struct fprobe * fp,unsigned int size_words)193 static inline bool write_fprobe_header(unsigned long *stack,
194 					struct fprobe *fp, unsigned int size_words)
195 {
196 	struct __fprobe_header *fph = (struct __fprobe_header *)stack;
197 
198 	if (WARN_ON_ONCE(size_words > MAX_FPROBE_DATA_SIZE_WORD))
199 		return false;
200 
201 	fph->fp = fp;
202 	fph->size_words = size_words;
203 	return true;
204 }
205 
read_fprobe_header(unsigned long * stack,struct fprobe ** fp,unsigned int * size_words)206 static inline void read_fprobe_header(unsigned long *stack,
207 					struct fprobe **fp, unsigned int *size_words)
208 {
209 	struct __fprobe_header *fph = (struct __fprobe_header *)stack;
210 
211 	*fp = fph->fp;
212 	*size_words = fph->size_words;
213 }
214 
215 #endif
216 
217 /*
218  * fprobe shadow stack management:
219  * Since fprobe shares a single fgraph_ops, it needs to share the stack entry
220  * among the probes on the same function exit. Note that a new probe can be
221  * registered before a target function is returning, we can not use the hash
222  * table to find the corresponding probes. Thus the probe address is stored on
223  * the shadow stack with its entry data size.
224  *
225  */
__fprobe_handler(unsigned long ip,unsigned long parent_ip,struct fprobe * fp,struct ftrace_regs * fregs,void * data)226 static inline int __fprobe_handler(unsigned long ip, unsigned long parent_ip,
227 				   struct fprobe *fp, struct ftrace_regs *fregs,
228 				   void *data)
229 {
230 	if (!fp->entry_handler)
231 		return 0;
232 
233 	return fp->entry_handler(fp, ip, parent_ip, fregs, data);
234 }
235 
__fprobe_kprobe_handler(unsigned long ip,unsigned long parent_ip,struct fprobe * fp,struct ftrace_regs * fregs,void * data)236 static inline int __fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip,
237 					  struct fprobe *fp, struct ftrace_regs *fregs,
238 					  void *data)
239 {
240 	int ret;
241 	/*
242 	 * This user handler is shared with other kprobes and is not expected to be
243 	 * called recursively. So if any other kprobe handler is running, this will
244 	 * exit as kprobe does. See the section 'Share the callbacks with kprobes'
245 	 * in Documentation/trace/fprobe.rst for more information.
246 	 */
247 	if (unlikely(kprobe_running())) {
248 		fp->nmissed++;
249 		return 0;
250 	}
251 
252 	kprobe_busy_begin();
253 	ret = __fprobe_handler(ip, parent_ip, fp, fregs, data);
254 	kprobe_busy_end();
255 	return ret;
256 }
257 
258 #if defined(CONFIG_DYNAMIC_FTRACE_WITH_ARGS) || defined(CONFIG_DYNAMIC_FTRACE_WITH_REGS)
259 /* ftrace_ops callback, this processes fprobes which have only entry_handler. */
fprobe_ftrace_entry(unsigned long ip,unsigned long parent_ip,struct ftrace_ops * ops,struct ftrace_regs * fregs)260 static void fprobe_ftrace_entry(unsigned long ip, unsigned long parent_ip,
261 	struct ftrace_ops *ops, struct ftrace_regs *fregs)
262 {
263 	struct fprobe_hlist_node *node;
264 	struct rhlist_head *head, *pos;
265 	struct fprobe *fp;
266 	int bit;
267 
268 	bit = ftrace_test_recursion_trylock(ip, parent_ip);
269 	if (bit < 0)
270 		return;
271 
272 	/*
273 	 * ftrace_test_recursion_trylock() disables preemption, but
274 	 * rhltable_lookup() checks whether rcu_read_lcok is held.
275 	 * So we take rcu_read_lock() here.
276 	 */
277 	rcu_read_lock();
278 	head = rhltable_lookup(&fprobe_ip_table, &ip, fprobe_rht_params);
279 
280 	rhl_for_each_entry_rcu(node, pos, head, hlist) {
281 		if (node->addr != ip)
282 			break;
283 		fp = READ_ONCE(node->fp);
284 		if (unlikely(!fp || fprobe_disabled(fp) || fp->exit_handler))
285 			continue;
286 
287 		if (fprobe_shared_with_kprobes(fp))
288 			__fprobe_kprobe_handler(ip, parent_ip, fp, fregs, NULL);
289 		else
290 			__fprobe_handler(ip, parent_ip, fp, fregs, NULL);
291 	}
292 	rcu_read_unlock();
293 	ftrace_test_recursion_unlock(bit);
294 }
295 NOKPROBE_SYMBOL(fprobe_ftrace_entry);
296 
297 static struct ftrace_ops fprobe_ftrace_ops = {
298 	.func	= fprobe_ftrace_entry,
299 	.flags	= FTRACE_OPS_FL_SAVE_ARGS,
300 };
301 static int fprobe_ftrace_active;
302 
fprobe_ftrace_add_ips(unsigned long * addrs,int num)303 static int fprobe_ftrace_add_ips(unsigned long *addrs, int num)
304 {
305 	int ret;
306 
307 	lockdep_assert_held(&fprobe_mutex);
308 
309 	ret = ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 0, 0);
310 	if (ret)
311 		return ret;
312 
313 	if (!fprobe_ftrace_active) {
314 		ret = register_ftrace_function(&fprobe_ftrace_ops);
315 		if (ret) {
316 			ftrace_free_filter(&fprobe_ftrace_ops);
317 			return ret;
318 		}
319 	}
320 	fprobe_ftrace_active++;
321 	return 0;
322 }
323 
fprobe_ftrace_remove_ips(unsigned long * addrs,int num)324 static void fprobe_ftrace_remove_ips(unsigned long *addrs, int num)
325 {
326 	lockdep_assert_held(&fprobe_mutex);
327 
328 	fprobe_ftrace_active--;
329 	if (!fprobe_ftrace_active)
330 		unregister_ftrace_function(&fprobe_ftrace_ops);
331 	if (num)
332 		ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 1, 0);
333 }
334 
fprobe_is_ftrace(struct fprobe * fp)335 static bool fprobe_is_ftrace(struct fprobe *fp)
336 {
337 	return !fp->exit_handler;
338 }
339 
340 #ifdef CONFIG_MODULES
fprobe_set_ips(unsigned long * ips,unsigned int cnt,int remove,int reset)341 static void fprobe_set_ips(unsigned long *ips, unsigned int cnt, int remove,
342 			   int reset)
343 {
344 	ftrace_set_filter_ips(&fprobe_graph_ops.ops, ips, cnt, remove, reset);
345 	ftrace_set_filter_ips(&fprobe_ftrace_ops, ips, cnt, remove, reset);
346 }
347 #endif
348 #else
fprobe_ftrace_add_ips(unsigned long * addrs,int num)349 static int fprobe_ftrace_add_ips(unsigned long *addrs, int num)
350 {
351 	return -ENOENT;
352 }
353 
fprobe_ftrace_remove_ips(unsigned long * addrs,int num)354 static void fprobe_ftrace_remove_ips(unsigned long *addrs, int num)
355 {
356 }
357 
fprobe_is_ftrace(struct fprobe * fp)358 static bool fprobe_is_ftrace(struct fprobe *fp)
359 {
360 	return false;
361 }
362 
363 #ifdef CONFIG_MODULES
fprobe_set_ips(unsigned long * ips,unsigned int cnt,int remove,int reset)364 static void fprobe_set_ips(unsigned long *ips, unsigned int cnt, int remove,
365 			   int reset)
366 {
367 	ftrace_set_filter_ips(&fprobe_graph_ops.ops, ips, cnt, remove, reset);
368 }
369 #endif
370 #endif /* !CONFIG_DYNAMIC_FTRACE_WITH_ARGS && !CONFIG_DYNAMIC_FTRACE_WITH_REGS */
371 
372 /* fgraph_ops callback, this processes fprobes which have exit_handler. */
fprobe_fgraph_entry(struct ftrace_graph_ent * trace,struct fgraph_ops * gops,struct ftrace_regs * fregs)373 static int fprobe_fgraph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
374 			       struct ftrace_regs *fregs)
375 {
376 	unsigned long *fgraph_data = NULL;
377 	unsigned long func = trace->func;
378 	struct fprobe_hlist_node *node;
379 	struct rhlist_head *head, *pos;
380 	unsigned long ret_ip;
381 	int reserved_words;
382 	struct fprobe *fp;
383 	int used, ret;
384 
385 	if (WARN_ON_ONCE(!fregs))
386 		return 0;
387 
388 	guard(rcu)();
389 	head = rhltable_lookup(&fprobe_ip_table, &func, fprobe_rht_params);
390 	reserved_words = 0;
391 	rhl_for_each_entry_rcu(node, pos, head, hlist) {
392 		if (node->addr != func)
393 			continue;
394 		fp = READ_ONCE(node->fp);
395 		if (!fp || !fp->exit_handler)
396 			continue;
397 		/*
398 		 * Since fprobe can be enabled until the next loop, we ignore the
399 		 * fprobe's disabled flag in this loop.
400 		 */
401 		reserved_words +=
402 			FPROBE_HEADER_SIZE_IN_LONG + SIZE_IN_LONG(fp->entry_data_size);
403 	}
404 	if (reserved_words) {
405 		fgraph_data = fgraph_reserve_data(gops->idx, reserved_words * sizeof(long));
406 		if (unlikely(!fgraph_data)) {
407 			rhl_for_each_entry_rcu(node, pos, head, hlist) {
408 				if (node->addr != func)
409 					continue;
410 				fp = READ_ONCE(node->fp);
411 				if (fp && !fprobe_disabled(fp) && !fprobe_is_ftrace(fp))
412 					fp->nmissed++;
413 			}
414 			return 0;
415 		}
416 	}
417 
418 	/*
419 	 * TODO: recursion detection has been done in the fgraph. Thus we need
420 	 * to add a callback to increment missed counter.
421 	 */
422 	ret_ip = ftrace_regs_get_return_address(fregs);
423 	used = 0;
424 	rhl_for_each_entry_rcu(node, pos, head, hlist) {
425 		int data_size;
426 		void *data;
427 
428 		if (node->addr != func)
429 			continue;
430 		fp = READ_ONCE(node->fp);
431 		if (unlikely(!fp || fprobe_disabled(fp) || fprobe_is_ftrace(fp)))
432 			continue;
433 
434 		data_size = fp->entry_data_size;
435 		if (data_size && fp->exit_handler)
436 			data = fgraph_data + used + FPROBE_HEADER_SIZE_IN_LONG;
437 		else
438 			data = NULL;
439 
440 		if (fprobe_shared_with_kprobes(fp))
441 			ret = __fprobe_kprobe_handler(func, ret_ip, fp, fregs, data);
442 		else
443 			ret = __fprobe_handler(func, ret_ip, fp, fregs, data);
444 
445 		/* If entry_handler returns !0, nmissed is not counted but skips exit_handler. */
446 		if (!ret && fp->exit_handler) {
447 			int size_words = SIZE_IN_LONG(data_size);
448 
449 			if (write_fprobe_header(&fgraph_data[used], fp, size_words))
450 				used += FPROBE_HEADER_SIZE_IN_LONG + size_words;
451 		}
452 	}
453 	if (used < reserved_words)
454 		memset(fgraph_data + used, 0, reserved_words - used);
455 
456 	/* If any exit_handler is set, data must be used. */
457 	return used != 0;
458 }
459 NOKPROBE_SYMBOL(fprobe_fgraph_entry);
460 
fprobe_return(struct ftrace_graph_ret * trace,struct fgraph_ops * gops,struct ftrace_regs * fregs)461 static void fprobe_return(struct ftrace_graph_ret *trace,
462 			  struct fgraph_ops *gops,
463 			  struct ftrace_regs *fregs)
464 {
465 	unsigned long *fgraph_data = NULL;
466 	unsigned long ret_ip;
467 	struct fprobe *fp;
468 	int size, curr;
469 	int size_words;
470 
471 	fgraph_data = (unsigned long *)fgraph_retrieve_data(gops->idx, &size);
472 	if (WARN_ON_ONCE(!fgraph_data))
473 		return;
474 	size_words = SIZE_IN_LONG(size);
475 	ret_ip = ftrace_regs_get_instruction_pointer(fregs);
476 
477 	preempt_disable_notrace();
478 
479 	curr = 0;
480 	while (size_words > curr) {
481 		read_fprobe_header(&fgraph_data[curr], &fp, &size);
482 		if (!fp)
483 			break;
484 		curr += FPROBE_HEADER_SIZE_IN_LONG;
485 		if (is_fprobe_still_exist(fp) && !fprobe_disabled(fp)) {
486 			if (WARN_ON_ONCE(curr + size > size_words))
487 				break;
488 			fp->exit_handler(fp, trace->func, ret_ip, fregs,
489 					 size ? fgraph_data + curr : NULL);
490 		}
491 		curr += size;
492 	}
493 	preempt_enable_notrace();
494 }
495 NOKPROBE_SYMBOL(fprobe_return);
496 
497 static struct fgraph_ops fprobe_graph_ops = {
498 	.entryfunc	= fprobe_fgraph_entry,
499 	.retfunc	= fprobe_return,
500 };
501 static int fprobe_graph_active;
502 
503 /* Add @addrs to the ftrace filter and register fgraph if needed. */
fprobe_graph_add_ips(unsigned long * addrs,int num)504 static int fprobe_graph_add_ips(unsigned long *addrs, int num)
505 {
506 	int ret;
507 
508 	lockdep_assert_held(&fprobe_mutex);
509 
510 	ret = ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 0, 0);
511 	if (ret)
512 		return ret;
513 
514 	if (!fprobe_graph_active) {
515 		ret = register_ftrace_graph(&fprobe_graph_ops);
516 		if (WARN_ON_ONCE(ret)) {
517 			ftrace_free_filter(&fprobe_graph_ops.ops);
518 			return ret;
519 		}
520 	}
521 	fprobe_graph_active++;
522 	return 0;
523 }
524 
525 /* Remove @addrs from the ftrace filter and unregister fgraph if possible. */
fprobe_graph_remove_ips(unsigned long * addrs,int num)526 static void fprobe_graph_remove_ips(unsigned long *addrs, int num)
527 {
528 	lockdep_assert_held(&fprobe_mutex);
529 
530 	fprobe_graph_active--;
531 	/* Q: should we unregister it ? */
532 	if (!fprobe_graph_active)
533 		unregister_ftrace_graph(&fprobe_graph_ops);
534 
535 	if (num)
536 		ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0);
537 }
538 
539 #ifdef CONFIG_MODULES
540 
541 #define FPROBE_IPS_BATCH_INIT 8
542 /* instruction pointer address list */
543 struct fprobe_addr_list {
544 	int index;
545 	int size;
546 	unsigned long *addrs;
547 };
548 
fprobe_addr_list_add(struct fprobe_addr_list * alist,unsigned long addr)549 static int fprobe_addr_list_add(struct fprobe_addr_list *alist, unsigned long addr)
550 {
551 	unsigned long *addrs;
552 
553 	/* Previously we failed to expand the list. */
554 	if (alist->index == alist->size)
555 		return -ENOSPC;
556 
557 	alist->addrs[alist->index++] = addr;
558 	if (alist->index < alist->size)
559 		return 0;
560 
561 	/* Expand the address list */
562 	addrs = kcalloc(alist->size * 2, sizeof(*addrs), GFP_KERNEL);
563 	if (!addrs)
564 		return -ENOMEM;
565 
566 	memcpy(addrs, alist->addrs, alist->size * sizeof(*addrs));
567 	alist->size *= 2;
568 	kfree(alist->addrs);
569 	alist->addrs = addrs;
570 
571 	return 0;
572 }
573 
fprobe_remove_node_in_module(struct module * mod,struct fprobe_hlist_node * node,struct fprobe_addr_list * alist)574 static void fprobe_remove_node_in_module(struct module *mod, struct fprobe_hlist_node *node,
575 					 struct fprobe_addr_list *alist)
576 {
577 	if (!within_module(node->addr, mod))
578 		return;
579 	if (delete_fprobe_node(node))
580 		return;
581 	/*
582 	 * If failed to update alist, just continue to update hlist.
583 	 * Therefore, at list user handler will not hit anymore.
584 	 */
585 	fprobe_addr_list_add(alist, node->addr);
586 }
587 
588 /* Handle module unloading to manage fprobe_ip_table. */
fprobe_module_callback(struct notifier_block * nb,unsigned long val,void * data)589 static int fprobe_module_callback(struct notifier_block *nb,
590 				  unsigned long val, void *data)
591 {
592 	struct fprobe_addr_list alist = {.size = FPROBE_IPS_BATCH_INIT};
593 	struct fprobe_hlist_node *node;
594 	struct rhashtable_iter iter;
595 	struct module *mod = data;
596 
597 	if (val != MODULE_STATE_GOING)
598 		return NOTIFY_DONE;
599 
600 	alist.addrs = kcalloc(alist.size, sizeof(*alist.addrs), GFP_KERNEL);
601 	/* If failed to alloc memory, we can not remove ips from hash. */
602 	if (!alist.addrs)
603 		return NOTIFY_DONE;
604 
605 	mutex_lock(&fprobe_mutex);
606 	rhltable_walk_enter(&fprobe_ip_table, &iter);
607 	do {
608 		rhashtable_walk_start(&iter);
609 
610 		while ((node = rhashtable_walk_next(&iter)) && !IS_ERR(node))
611 			fprobe_remove_node_in_module(mod, node, &alist);
612 
613 		rhashtable_walk_stop(&iter);
614 	} while (node == ERR_PTR(-EAGAIN));
615 	rhashtable_walk_exit(&iter);
616 
617 	if (alist.index > 0)
618 		fprobe_set_ips(alist.addrs, alist.index, 1, 0);
619 	mutex_unlock(&fprobe_mutex);
620 
621 	kfree(alist.addrs);
622 
623 	return NOTIFY_DONE;
624 }
625 
626 static struct notifier_block fprobe_module_nb = {
627 	.notifier_call = fprobe_module_callback,
628 	.priority = 0,
629 };
630 
init_fprobe_module(void)631 static int __init init_fprobe_module(void)
632 {
633 	return register_module_notifier(&fprobe_module_nb);
634 }
635 early_initcall(init_fprobe_module);
636 #endif
637 
symbols_cmp(const void * a,const void * b)638 static int symbols_cmp(const void *a, const void *b)
639 {
640 	const char **str_a = (const char **) a;
641 	const char **str_b = (const char **) b;
642 
643 	return strcmp(*str_a, *str_b);
644 }
645 
646 /* Convert ftrace location address from symbols */
get_ftrace_locations(const char ** syms,int num)647 static unsigned long *get_ftrace_locations(const char **syms, int num)
648 {
649 	unsigned long *addrs;
650 
651 	/* Convert symbols to symbol address */
652 	addrs = kcalloc(num, sizeof(*addrs), GFP_KERNEL);
653 	if (!addrs)
654 		return ERR_PTR(-ENOMEM);
655 
656 	/* ftrace_lookup_symbols expects sorted symbols */
657 	sort(syms, num, sizeof(*syms), symbols_cmp, NULL);
658 
659 	if (!ftrace_lookup_symbols(syms, num, addrs))
660 		return addrs;
661 
662 	kfree(addrs);
663 	return ERR_PTR(-ENOENT);
664 }
665 
666 struct filter_match_data {
667 	const char *filter;
668 	const char *notfilter;
669 	size_t index;
670 	size_t size;
671 	unsigned long *addrs;
672 	struct module **mods;
673 };
674 
filter_match_callback(void * data,const char * name,unsigned long addr)675 static int filter_match_callback(void *data, const char *name, unsigned long addr)
676 {
677 	struct filter_match_data *match = data;
678 
679 	if (!glob_match(match->filter, name) ||
680 	    (match->notfilter && glob_match(match->notfilter, name)))
681 		return 0;
682 
683 	if (!ftrace_location(addr))
684 		return 0;
685 
686 	if (match->addrs) {
687 		struct module *mod = __module_text_address(addr);
688 
689 		if (mod && !try_module_get(mod))
690 			return 0;
691 
692 		match->mods[match->index] = mod;
693 		match->addrs[match->index] = addr;
694 	}
695 	match->index++;
696 	return match->index == match->size;
697 }
698 
699 /*
700  * Make IP list from the filter/no-filter glob patterns.
701  * Return the number of matched symbols, or errno.
702  * If @addrs == NULL, this just counts the number of matched symbols. If @addrs
703  * is passed with an array, we need to pass the an @mods array of the same size
704  * to increment the module refcount for each symbol.
705  * This means we also need to call `module_put` for each element of @mods after
706  * using the @addrs.
707  */
get_ips_from_filter(const char * filter,const char * notfilter,unsigned long * addrs,struct module ** mods,size_t size)708 static int get_ips_from_filter(const char *filter, const char *notfilter,
709 			       unsigned long *addrs, struct module **mods,
710 			       size_t size)
711 {
712 	struct filter_match_data match = { .filter = filter, .notfilter = notfilter,
713 		.index = 0, .size = size, .addrs = addrs, .mods = mods};
714 	int ret;
715 
716 	if (addrs && !mods)
717 		return -EINVAL;
718 
719 	ret = kallsyms_on_each_symbol(filter_match_callback, &match);
720 	if (ret < 0)
721 		return ret;
722 	if (IS_ENABLED(CONFIG_MODULES)) {
723 		ret = module_kallsyms_on_each_symbol(NULL, filter_match_callback, &match);
724 		if (ret < 0)
725 			return ret;
726 	}
727 
728 	return match.index ?: -ENOENT;
729 }
730 
fprobe_fail_cleanup(struct fprobe * fp)731 static void fprobe_fail_cleanup(struct fprobe *fp)
732 {
733 	kfree(fp->hlist_array);
734 	fp->hlist_array = NULL;
735 }
736 
737 /* Initialize the fprobe data structure. */
fprobe_init(struct fprobe * fp,unsigned long * addrs,int num)738 static int fprobe_init(struct fprobe *fp, unsigned long *addrs, int num)
739 {
740 	struct fprobe_hlist *hlist_array;
741 	unsigned long addr;
742 	int size, i;
743 
744 	if (!fp || !addrs || num <= 0)
745 		return -EINVAL;
746 
747 	size = ALIGN(fp->entry_data_size, sizeof(long));
748 	if (size > MAX_FPROBE_DATA_SIZE)
749 		return -E2BIG;
750 	fp->entry_data_size = size;
751 
752 	hlist_array = kzalloc(struct_size(hlist_array, array, num), GFP_KERNEL);
753 	if (!hlist_array)
754 		return -ENOMEM;
755 
756 	fp->nmissed = 0;
757 
758 	hlist_array->size = num;
759 	fp->hlist_array = hlist_array;
760 	hlist_array->fp = fp;
761 	for (i = 0; i < num; i++) {
762 		hlist_array->array[i].fp = fp;
763 		addr = ftrace_location(addrs[i]);
764 		if (!addr) {
765 			fprobe_fail_cleanup(fp);
766 			return -ENOENT;
767 		}
768 		hlist_array->array[i].addr = addr;
769 	}
770 	return 0;
771 }
772 
773 #define FPROBE_IPS_MAX	INT_MAX
774 
fprobe_count_ips_from_filter(const char * filter,const char * notfilter)775 int fprobe_count_ips_from_filter(const char *filter, const char *notfilter)
776 {
777 	return get_ips_from_filter(filter, notfilter, NULL, NULL, FPROBE_IPS_MAX);
778 }
779 
780 /**
781  * register_fprobe() - Register fprobe to ftrace by pattern.
782  * @fp: A fprobe data structure to be registered.
783  * @filter: A wildcard pattern of probed symbols.
784  * @notfilter: A wildcard pattern of NOT probed symbols.
785  *
786  * Register @fp to ftrace for enabling the probe on the symbols matched to @filter.
787  * If @notfilter is not NULL, the symbols matched the @notfilter are not probed.
788  *
789  * Return 0 if @fp is registered successfully, -errno if not.
790  */
register_fprobe(struct fprobe * fp,const char * filter,const char * notfilter)791 int register_fprobe(struct fprobe *fp, const char *filter, const char *notfilter)
792 {
793 	unsigned long *addrs __free(kfree) = NULL;
794 	struct module **mods __free(kfree) = NULL;
795 	int ret, num;
796 
797 	if (!fp || !filter)
798 		return -EINVAL;
799 
800 	num = get_ips_from_filter(filter, notfilter, NULL, NULL, FPROBE_IPS_MAX);
801 	if (num < 0)
802 		return num;
803 
804 	addrs = kcalloc(num, sizeof(*addrs), GFP_KERNEL);
805 	if (!addrs)
806 		return -ENOMEM;
807 
808 	mods = kcalloc(num, sizeof(*mods), GFP_KERNEL);
809 	if (!mods)
810 		return -ENOMEM;
811 
812 	ret = get_ips_from_filter(filter, notfilter, addrs, mods, num);
813 	if (ret < 0)
814 		return ret;
815 
816 	ret = register_fprobe_ips(fp, addrs, ret);
817 
818 	for (int i = 0; i < num; i++) {
819 		if (mods[i])
820 			module_put(mods[i]);
821 	}
822 	return ret;
823 }
824 EXPORT_SYMBOL_GPL(register_fprobe);
825 
826 /**
827  * register_fprobe_ips() - Register fprobe to ftrace by address.
828  * @fp: A fprobe data structure to be registered.
829  * @addrs: An array of target function address.
830  * @num: The number of entries of @addrs.
831  *
832  * Register @fp to ftrace for enabling the probe on the address given by @addrs.
833  * The @addrs must be the addresses of ftrace location address, which may be
834  * the symbol address + arch-dependent offset.
835  * If you unsure what this mean, please use other registration functions.
836  *
837  * Return 0 if @fp is registered successfully, -errno if not.
838  */
register_fprobe_ips(struct fprobe * fp,unsigned long * addrs,int num)839 int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num)
840 {
841 	struct fprobe_hlist *hlist_array;
842 	int ret, i;
843 
844 	ret = fprobe_init(fp, addrs, num);
845 	if (ret)
846 		return ret;
847 
848 	mutex_lock(&fprobe_mutex);
849 
850 	hlist_array = fp->hlist_array;
851 	if (fprobe_is_ftrace(fp))
852 		ret = fprobe_ftrace_add_ips(addrs, num);
853 	else
854 		ret = fprobe_graph_add_ips(addrs, num);
855 
856 	if (!ret) {
857 		add_fprobe_hash(fp);
858 		for (i = 0; i < hlist_array->size; i++) {
859 			ret = insert_fprobe_node(&hlist_array->array[i]);
860 			if (ret)
861 				break;
862 		}
863 		/* fallback on insert error */
864 		if (ret) {
865 			for (i--; i >= 0; i--)
866 				delete_fprobe_node(&hlist_array->array[i]);
867 		}
868 	}
869 	mutex_unlock(&fprobe_mutex);
870 
871 	if (ret)
872 		fprobe_fail_cleanup(fp);
873 
874 	return ret;
875 }
876 EXPORT_SYMBOL_GPL(register_fprobe_ips);
877 
878 /**
879  * register_fprobe_syms() - Register fprobe to ftrace by symbols.
880  * @fp: A fprobe data structure to be registered.
881  * @syms: An array of target symbols.
882  * @num: The number of entries of @syms.
883  *
884  * Register @fp to the symbols given by @syms array. This will be useful if
885  * you are sure the symbols exist in the kernel.
886  *
887  * Return 0 if @fp is registered successfully, -errno if not.
888  */
register_fprobe_syms(struct fprobe * fp,const char ** syms,int num)889 int register_fprobe_syms(struct fprobe *fp, const char **syms, int num)
890 {
891 	unsigned long *addrs;
892 	int ret;
893 
894 	if (!fp || !syms || num <= 0)
895 		return -EINVAL;
896 
897 	addrs = get_ftrace_locations(syms, num);
898 	if (IS_ERR(addrs))
899 		return PTR_ERR(addrs);
900 
901 	ret = register_fprobe_ips(fp, addrs, num);
902 
903 	kfree(addrs);
904 
905 	return ret;
906 }
907 EXPORT_SYMBOL_GPL(register_fprobe_syms);
908 
fprobe_is_registered(struct fprobe * fp)909 bool fprobe_is_registered(struct fprobe *fp)
910 {
911 	if (!fp || !fp->hlist_array)
912 		return false;
913 	return true;
914 }
915 
916 /**
917  * unregister_fprobe() - Unregister fprobe.
918  * @fp: A fprobe data structure to be unregistered.
919  *
920  * Unregister fprobe (and remove ftrace hooks from the function entries).
921  *
922  * Return 0 if @fp is unregistered successfully, -errno if not.
923  */
unregister_fprobe(struct fprobe * fp)924 int unregister_fprobe(struct fprobe *fp)
925 {
926 	struct fprobe_hlist *hlist_array;
927 	unsigned long *addrs = NULL;
928 	int ret = 0, i, count;
929 
930 	mutex_lock(&fprobe_mutex);
931 	if (!fp || !is_fprobe_still_exist(fp)) {
932 		ret = -EINVAL;
933 		goto out;
934 	}
935 
936 	hlist_array = fp->hlist_array;
937 	addrs = kcalloc(hlist_array->size, sizeof(unsigned long), GFP_KERNEL);
938 	if (!addrs) {
939 		ret = -ENOMEM;	/* TODO: Fallback to one-by-one loop */
940 		goto out;
941 	}
942 
943 	/* Remove non-synonim ips from table and hash */
944 	count = 0;
945 	for (i = 0; i < hlist_array->size; i++) {
946 		if (!delete_fprobe_node(&hlist_array->array[i]))
947 			addrs[count++] = hlist_array->array[i].addr;
948 	}
949 	del_fprobe_hash(fp);
950 
951 	if (fprobe_is_ftrace(fp))
952 		fprobe_ftrace_remove_ips(addrs, count);
953 	else
954 		fprobe_graph_remove_ips(addrs, count);
955 
956 	kfree_rcu(hlist_array, rcu);
957 	fp->hlist_array = NULL;
958 
959 out:
960 	mutex_unlock(&fprobe_mutex);
961 
962 	kfree(addrs);
963 	return ret;
964 }
965 EXPORT_SYMBOL_GPL(unregister_fprobe);
966 
fprobe_initcall(void)967 static int __init fprobe_initcall(void)
968 {
969 	rhltable_init(&fprobe_ip_table, &fprobe_rht_params);
970 	return 0;
971 }
972 core_initcall(fprobe_initcall);
973