xref: /linux/kernel/trace/fprobe.c (revision 665159e246749578d4e4bfe106ee3b74edcdab18)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * fprobe - Simple ftrace probe wrapper for function entry.
4  */
5 #define pr_fmt(fmt) "fprobe: " fmt
6 
7 #include <linux/cleanup.h>
8 #include <linux/err.h>
9 #include <linux/fprobe.h>
10 #include <linux/kallsyms.h>
11 #include <linux/kprobes.h>
12 #include <linux/list.h>
13 #include <linux/mutex.h>
14 #include <linux/rhashtable.h>
15 #include <linux/slab.h>
16 #include <linux/sort.h>
17 
18 #include <asm/fprobe.h>
19 
20 #include "trace.h"
21 
22 #define FPROBE_IP_HASH_BITS 8
23 #define FPROBE_IP_TABLE_SIZE (1 << FPROBE_IP_HASH_BITS)
24 
25 #define FPROBE_HASH_BITS 6
26 #define FPROBE_TABLE_SIZE (1 << FPROBE_HASH_BITS)
27 
28 #define SIZE_IN_LONG(x) ((x + sizeof(long) - 1) >> (sizeof(long) == 8 ? 3 : 2))
29 
30 /*
31  * fprobe_table: hold 'fprobe_hlist::hlist' for checking the fprobe still
32  *   exists. The key is the address of fprobe instance.
33  * fprobe_ip_table: hold 'fprobe_hlist::array[*]' for searching the fprobe
34  *   instance related to the function address. The key is the ftrace IP
35  *   address.
36  *
37  * When unregistering the fprobe, fprobe_hlist::fp and fprobe_hlist::array[*].fp
38  * are set NULL and delete those from both hash tables (by hlist_del_rcu).
39  * After an RCU grace period, the fprobe_hlist itself will be released.
40  *
41  * fprobe_table and fprobe_ip_table can be accessed from either
42  *  - Normal hlist traversal and RCU add/del under 'fprobe_mutex' is held.
43  *  - RCU hlist traversal under disabling preempt
44  */
45 static struct hlist_head fprobe_table[FPROBE_TABLE_SIZE];
46 static struct rhltable fprobe_ip_table;
47 static DEFINE_MUTEX(fprobe_mutex);
48 static struct fgraph_ops fprobe_graph_ops;
49 
fprobe_node_hashfn(const void * data,u32 len,u32 seed)50 static u32 fprobe_node_hashfn(const void *data, u32 len, u32 seed)
51 {
52 	return hash_ptr(*(unsigned long **)data, 32);
53 }
54 
fprobe_node_cmp(struct rhashtable_compare_arg * arg,const void * ptr)55 static int fprobe_node_cmp(struct rhashtable_compare_arg *arg,
56 			   const void *ptr)
57 {
58 	unsigned long key = *(unsigned long *)arg->key;
59 	const struct fprobe_hlist_node *n = ptr;
60 
61 	return n->addr != key;
62 }
63 
fprobe_node_obj_hashfn(const void * data,u32 len,u32 seed)64 static u32 fprobe_node_obj_hashfn(const void *data, u32 len, u32 seed)
65 {
66 	const struct fprobe_hlist_node *n = data;
67 
68 	return hash_ptr((void *)n->addr, 32);
69 }
70 
71 static const struct rhashtable_params fprobe_rht_params = {
72 	.head_offset		= offsetof(struct fprobe_hlist_node, hlist),
73 	.key_offset		= offsetof(struct fprobe_hlist_node, addr),
74 	.key_len		= sizeof_field(struct fprobe_hlist_node, addr),
75 	.hashfn			= fprobe_node_hashfn,
76 	.obj_hashfn		= fprobe_node_obj_hashfn,
77 	.obj_cmpfn		= fprobe_node_cmp,
78 	.automatic_shrinking	= true,
79 };
80 
81 /* Node insertion and deletion requires the fprobe_mutex */
__insert_fprobe_node(struct fprobe_hlist_node * node,struct fprobe * fp)82 static int __insert_fprobe_node(struct fprobe_hlist_node *node, struct fprobe *fp)
83 {
84 	int ret;
85 
86 	lockdep_assert_held(&fprobe_mutex);
87 
88 	ret = rhltable_insert(&fprobe_ip_table, &node->hlist, fprobe_rht_params);
89 	/* Set the fprobe pointer if insertion was successful. */
90 	if (!ret)
91 		WRITE_ONCE(node->fp, fp);
92 	return ret;
93 }
94 
__delete_fprobe_node(struct fprobe_hlist_node * node)95 static void __delete_fprobe_node(struct fprobe_hlist_node *node)
96 {
97 	lockdep_assert_held(&fprobe_mutex);
98 
99 	/* Avoid double deleting and non-inserted nodes */
100 	if (READ_ONCE(node->fp) != NULL) {
101 		WRITE_ONCE(node->fp, NULL);
102 		rhltable_remove(&fprobe_ip_table, &node->hlist,
103 				fprobe_rht_params);
104 	}
105 }
106 
107 /* Check existence of the fprobe */
fprobe_registered(struct fprobe * fp)108 static bool fprobe_registered(struct fprobe *fp)
109 {
110 	struct hlist_head *head;
111 	struct fprobe_hlist *fph;
112 
113 	head = &fprobe_table[hash_ptr(fp, FPROBE_HASH_BITS)];
114 	hlist_for_each_entry_rcu(fph, head, hlist,
115 				 lockdep_is_held(&fprobe_mutex)) {
116 		if (fph->fp == fp)
117 			return true;
118 	}
119 	return false;
120 }
121 NOKPROBE_SYMBOL(fprobe_registered);
122 
add_fprobe_hash(struct fprobe * fp)123 static int add_fprobe_hash(struct fprobe *fp)
124 {
125 	struct fprobe_hlist *fph = fp->hlist_array;
126 	struct hlist_head *head;
127 
128 	lockdep_assert_held(&fprobe_mutex);
129 
130 	if (WARN_ON_ONCE(!fph))
131 		return -EINVAL;
132 
133 	head = &fprobe_table[hash_ptr(fp, FPROBE_HASH_BITS)];
134 	hlist_add_head_rcu(&fp->hlist_array->hlist, head);
135 	return 0;
136 }
137 
del_fprobe_hash(struct fprobe * fp)138 static int del_fprobe_hash(struct fprobe *fp)
139 {
140 	struct fprobe_hlist *fph = fp->hlist_array;
141 
142 	lockdep_assert_held(&fprobe_mutex);
143 
144 	if (WARN_ON_ONCE(!fph))
145 		return -EINVAL;
146 
147 	if (!fprobe_registered(fp))
148 		return -ENOENT;
149 
150 	fph->fp = NULL;
151 	hlist_del_rcu(&fph->hlist);
152 	return 0;
153 }
154 
155 #ifdef ARCH_DEFINE_ENCODE_FPROBE_HEADER
156 
157 /* The arch should encode fprobe_header info into one unsigned long */
158 #define FPROBE_HEADER_SIZE_IN_LONG	1
159 
write_fprobe_header(unsigned long * stack,struct fprobe * fp,unsigned int size_words)160 static inline bool write_fprobe_header(unsigned long *stack,
161 					struct fprobe *fp, unsigned int size_words)
162 {
163 	if (WARN_ON_ONCE(size_words > MAX_FPROBE_DATA_SIZE_WORD ||
164 			 !arch_fprobe_header_encodable(fp)))
165 		return false;
166 
167 	*stack = arch_encode_fprobe_header(fp, size_words);
168 	return true;
169 }
170 
read_fprobe_header(unsigned long * stack,struct fprobe ** fp,unsigned int * size_words)171 static inline void read_fprobe_header(unsigned long *stack,
172 					struct fprobe **fp, unsigned int *size_words)
173 {
174 	*fp = arch_decode_fprobe_header_fp(*stack);
175 	*size_words = arch_decode_fprobe_header_size(*stack);
176 }
177 
178 #else
179 
180 /* Generic fprobe_header */
181 struct __fprobe_header {
182 	struct fprobe *fp;
183 	unsigned long size_words;
184 } __packed;
185 
186 #define FPROBE_HEADER_SIZE_IN_LONG	SIZE_IN_LONG(sizeof(struct __fprobe_header))
187 
write_fprobe_header(unsigned long * stack,struct fprobe * fp,unsigned int size_words)188 static inline bool write_fprobe_header(unsigned long *stack,
189 					struct fprobe *fp, unsigned int size_words)
190 {
191 	struct __fprobe_header *fph = (struct __fprobe_header *)stack;
192 
193 	if (WARN_ON_ONCE(size_words > MAX_FPROBE_DATA_SIZE_WORD))
194 		return false;
195 
196 	fph->fp = fp;
197 	fph->size_words = size_words;
198 	return true;
199 }
200 
read_fprobe_header(unsigned long * stack,struct fprobe ** fp,unsigned int * size_words)201 static inline void read_fprobe_header(unsigned long *stack,
202 					struct fprobe **fp, unsigned int *size_words)
203 {
204 	struct __fprobe_header *fph = (struct __fprobe_header *)stack;
205 
206 	*fp = fph->fp;
207 	*size_words = fph->size_words;
208 }
209 
210 #endif
211 
212 /*
213  * fprobe shadow stack management:
214  * Since fprobe shares a single fgraph_ops, it needs to share the stack entry
215  * among the probes on the same function exit. Note that a new probe can be
216  * registered before a target function is returning, we can not use the hash
217  * table to find the corresponding probes. Thus the probe address is stored on
218  * the shadow stack with its entry data size.
219  *
220  */
__fprobe_handler(unsigned long ip,unsigned long parent_ip,struct fprobe * fp,struct ftrace_regs * fregs,void * data)221 static inline int __fprobe_handler(unsigned long ip, unsigned long parent_ip,
222 				   struct fprobe *fp, struct ftrace_regs *fregs,
223 				   void *data)
224 {
225 	if (!fp->entry_handler)
226 		return 0;
227 
228 	return fp->entry_handler(fp, ip, parent_ip, fregs, data);
229 }
230 
__fprobe_kprobe_handler(unsigned long ip,unsigned long parent_ip,struct fprobe * fp,struct ftrace_regs * fregs,void * data)231 static inline int __fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip,
232 					  struct fprobe *fp, struct ftrace_regs *fregs,
233 					  void *data)
234 {
235 	int ret;
236 	/*
237 	 * This user handler is shared with other kprobes and is not expected to be
238 	 * called recursively. So if any other kprobe handler is running, this will
239 	 * exit as kprobe does. See the section 'Share the callbacks with kprobes'
240 	 * in Documentation/trace/fprobe.rst for more information.
241 	 */
242 	if (unlikely(kprobe_running())) {
243 		fp->nmissed++;
244 		return 0;
245 	}
246 
247 	kprobe_busy_begin();
248 	ret = __fprobe_handler(ip, parent_ip, fp, fregs, data);
249 	kprobe_busy_end();
250 	return ret;
251 }
252 
253 static int fprobe_fgraph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
254 			       struct ftrace_regs *fregs);
255 static void fprobe_return(struct ftrace_graph_ret *trace,
256 			  struct fgraph_ops *gops,
257 			  struct ftrace_regs *fregs);
258 
259 static struct fgraph_ops fprobe_graph_ops = {
260 	.entryfunc	= fprobe_fgraph_entry,
261 	.retfunc	= fprobe_return,
262 };
263 /* Number of fgraph fprobe nodes */
264 static int nr_fgraph_fprobes;
265 /* Is fprobe_graph_ops registered? */
266 static bool fprobe_graph_registered;
267 
268 /* Add @addrs to the ftrace filter and register fgraph if needed. */
fprobe_graph_add_ips(unsigned long * addrs,int num)269 static int fprobe_graph_add_ips(unsigned long *addrs, int num)
270 {
271 	int ret;
272 
273 	lockdep_assert_held(&fprobe_mutex);
274 
275 	ret = ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 0, 0);
276 	if (ret)
277 		return ret;
278 
279 	if (!fprobe_graph_registered) {
280 		ret = register_ftrace_graph(&fprobe_graph_ops);
281 		if (WARN_ON_ONCE(ret)) {
282 			ftrace_free_filter(&fprobe_graph_ops.ops);
283 			return ret;
284 		}
285 		fprobe_graph_registered = true;
286 	}
287 	return 0;
288 }
289 
__fprobe_graph_unregister(void)290 static void __fprobe_graph_unregister(void)
291 {
292 	if (fprobe_graph_registered) {
293 		unregister_ftrace_graph(&fprobe_graph_ops);
294 		ftrace_free_filter(&fprobe_graph_ops.ops);
295 		fprobe_graph_registered = false;
296 	}
297 }
298 
299 /* Remove @addrs from the ftrace filter and unregister fgraph if possible. */
fprobe_graph_remove_ips(unsigned long * addrs,int num)300 static void fprobe_graph_remove_ips(unsigned long *addrs, int num)
301 {
302 	lockdep_assert_held(&fprobe_mutex);
303 
304 	if (!nr_fgraph_fprobes)
305 		__fprobe_graph_unregister();
306 	else if (num)
307 		ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0);
308 }
309 
310 #if defined(CONFIG_DYNAMIC_FTRACE_WITH_ARGS) || defined(CONFIG_DYNAMIC_FTRACE_WITH_REGS)
311 
312 /* ftrace_ops callback, this processes fprobes which have only entry_handler. */
fprobe_ftrace_entry(unsigned long ip,unsigned long parent_ip,struct ftrace_ops * ops,struct ftrace_regs * fregs)313 static void fprobe_ftrace_entry(unsigned long ip, unsigned long parent_ip,
314 	struct ftrace_ops *ops, struct ftrace_regs *fregs)
315 {
316 	struct fprobe_hlist_node *node;
317 	struct rhlist_head *head, *pos;
318 	struct fprobe *fp;
319 	int bit;
320 
321 	bit = ftrace_test_recursion_trylock(ip, parent_ip);
322 	if (bit < 0)
323 		return;
324 
325 	/*
326 	 * ftrace_test_recursion_trylock() disables preemption, but
327 	 * rhltable_lookup() checks whether rcu_read_lcok is held.
328 	 * So we take rcu_read_lock() here.
329 	 */
330 	rcu_read_lock();
331 	head = rhltable_lookup(&fprobe_ip_table, &ip, fprobe_rht_params);
332 
333 	rhl_for_each_entry_rcu(node, pos, head, hlist) {
334 		if (node->addr != ip)
335 			break;
336 		fp = READ_ONCE(node->fp);
337 		if (unlikely(!fp || fprobe_disabled(fp) || fp->exit_handler))
338 			continue;
339 
340 		if (fprobe_shared_with_kprobes(fp))
341 			__fprobe_kprobe_handler(ip, parent_ip, fp, fregs, NULL);
342 		else
343 			__fprobe_handler(ip, parent_ip, fp, fregs, NULL);
344 	}
345 	rcu_read_unlock();
346 	ftrace_test_recursion_unlock(bit);
347 }
348 NOKPROBE_SYMBOL(fprobe_ftrace_entry);
349 
350 static struct ftrace_ops fprobe_ftrace_ops = {
351 	.func	= fprobe_ftrace_entry,
352 	.flags	= FTRACE_OPS_FL_SAVE_ARGS,
353 };
354 /* Number of ftrace fprobe nodes */
355 static int nr_ftrace_fprobes;
356 /* Is fprobe_ftrace_ops registered? */
357 static bool fprobe_ftrace_registered;
358 
fprobe_ftrace_add_ips(unsigned long * addrs,int num)359 static int fprobe_ftrace_add_ips(unsigned long *addrs, int num)
360 {
361 	int ret;
362 
363 	lockdep_assert_held(&fprobe_mutex);
364 
365 	ret = ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 0, 0);
366 	if (ret)
367 		return ret;
368 
369 	if (!fprobe_ftrace_registered) {
370 		ret = register_ftrace_function(&fprobe_ftrace_ops);
371 		if (ret) {
372 			ftrace_free_filter(&fprobe_ftrace_ops);
373 			return ret;
374 		}
375 		fprobe_ftrace_registered = true;
376 	}
377 	return 0;
378 }
379 
__fprobe_ftrace_unregister(void)380 static void __fprobe_ftrace_unregister(void)
381 {
382 	if (fprobe_ftrace_registered) {
383 		unregister_ftrace_function(&fprobe_ftrace_ops);
384 		ftrace_free_filter(&fprobe_ftrace_ops);
385 		fprobe_ftrace_registered = false;
386 	}
387 }
388 
fprobe_ftrace_remove_ips(unsigned long * addrs,int num)389 static void fprobe_ftrace_remove_ips(unsigned long *addrs, int num)
390 {
391 	lockdep_assert_held(&fprobe_mutex);
392 
393 	if (!nr_ftrace_fprobes)
394 		__fprobe_ftrace_unregister();
395 	else if (num)
396 		ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 1, 0);
397 }
398 
fprobe_is_ftrace(struct fprobe * fp)399 static bool fprobe_is_ftrace(struct fprobe *fp)
400 {
401 	return !fp->exit_handler;
402 }
403 
404 /* Node insertion and deletion requires the fprobe_mutex */
insert_fprobe_node(struct fprobe_hlist_node * node,struct fprobe * fp)405 static int insert_fprobe_node(struct fprobe_hlist_node *node, struct fprobe *fp)
406 {
407 	int ret;
408 
409 	lockdep_assert_held(&fprobe_mutex);
410 
411 	ret = __insert_fprobe_node(node, fp);
412 	if (!ret) {
413 		if (fprobe_is_ftrace(fp))
414 			nr_ftrace_fprobes++;
415 		else
416 			nr_fgraph_fprobes++;
417 	}
418 
419 	return ret;
420 }
421 
delete_fprobe_node(struct fprobe_hlist_node * node)422 static void delete_fprobe_node(struct fprobe_hlist_node *node)
423 {
424 	struct fprobe *fp;
425 
426 	lockdep_assert_held(&fprobe_mutex);
427 
428 	fp = READ_ONCE(node->fp);
429 	if (fp) {
430 		if (fprobe_is_ftrace(fp))
431 			nr_ftrace_fprobes--;
432 		else
433 			nr_fgraph_fprobes--;
434 	}
435 	__delete_fprobe_node(node);
436 }
437 
fprobe_exists_on_hash(unsigned long ip,bool ftrace)438 static bool fprobe_exists_on_hash(unsigned long ip, bool ftrace)
439 {
440 	struct rhlist_head *head, *pos;
441 	struct fprobe_hlist_node *node;
442 	struct fprobe *fp;
443 
444 	guard(rcu)();
445 	head = rhltable_lookup(&fprobe_ip_table, &ip,
446 				fprobe_rht_params);
447 	if (!head)
448 		return false;
449 	/* We have to check the same type on the list. */
450 	rhl_for_each_entry_rcu(node, pos, head, hlist) {
451 		if (node->addr != ip)
452 			break;
453 		fp = READ_ONCE(node->fp);
454 		if (likely(fp)) {
455 			if ((!ftrace && fp->exit_handler) ||
456 			    (ftrace && !fp->exit_handler))
457 				return true;
458 		}
459 	}
460 
461 	return false;
462 }
463 
464 #ifdef CONFIG_MODULES
fprobe_remove_ips(unsigned long * ips,unsigned int cnt)465 static void fprobe_remove_ips(unsigned long *ips, unsigned int cnt)
466 {
467 	if (!nr_fgraph_fprobes)
468 		__fprobe_graph_unregister();
469 	else if (cnt)
470 		ftrace_set_filter_ips(&fprobe_graph_ops.ops, ips, cnt, 1, 0);
471 
472 	if (!nr_ftrace_fprobes)
473 		__fprobe_ftrace_unregister();
474 	else if (cnt)
475 		ftrace_set_filter_ips(&fprobe_ftrace_ops, ips, cnt, 1, 0);
476 }
477 #endif
478 #else
fprobe_ftrace_add_ips(unsigned long * addrs,int num)479 static int fprobe_ftrace_add_ips(unsigned long *addrs, int num)
480 {
481 	return -ENOENT;
482 }
483 
fprobe_ftrace_remove_ips(unsigned long * addrs,int num)484 static void fprobe_ftrace_remove_ips(unsigned long *addrs, int num)
485 {
486 }
487 
fprobe_is_ftrace(struct fprobe * fp)488 static bool fprobe_is_ftrace(struct fprobe *fp)
489 {
490 	return false;
491 }
492 
493 /* Node insertion and deletion requires the fprobe_mutex */
insert_fprobe_node(struct fprobe_hlist_node * node,struct fprobe * fp)494 static int insert_fprobe_node(struct fprobe_hlist_node *node, struct fprobe *fp)
495 {
496 	int ret;
497 
498 	lockdep_assert_held(&fprobe_mutex);
499 
500 	ret = __insert_fprobe_node(node, fp);
501 	if (!ret)
502 		nr_fgraph_fprobes++;
503 
504 	return ret;
505 }
506 
delete_fprobe_node(struct fprobe_hlist_node * node)507 static void delete_fprobe_node(struct fprobe_hlist_node *node)
508 {
509 	struct fprobe *fp;
510 
511 	lockdep_assert_held(&fprobe_mutex);
512 
513 	fp = READ_ONCE(node->fp);
514 	if (fp)
515 		nr_fgraph_fprobes--;
516 	__delete_fprobe_node(node);
517 }
518 
fprobe_exists_on_hash(unsigned long ip,bool ftrace __maybe_unused)519 static bool fprobe_exists_on_hash(unsigned long ip, bool ftrace __maybe_unused)
520 {
521 	struct rhlist_head *head, *pos;
522 	struct fprobe_hlist_node *node;
523 	struct fprobe *fp;
524 
525 	guard(rcu)();
526 	head = rhltable_lookup(&fprobe_ip_table, &ip,
527 				fprobe_rht_params);
528 	if (!head)
529 		return false;
530 	/* We only need to check fp is there. */
531 	rhl_for_each_entry_rcu(node, pos, head, hlist) {
532 		if (node->addr != ip)
533 			break;
534 		fp = READ_ONCE(node->fp);
535 		if (likely(fp))
536 			return true;
537 	}
538 
539 	return false;
540 }
541 
542 #ifdef CONFIG_MODULES
fprobe_remove_ips(unsigned long * ips,unsigned int cnt)543 static void fprobe_remove_ips(unsigned long *ips, unsigned int cnt)
544 {
545 	if (!nr_fgraph_fprobes)
546 		__fprobe_graph_unregister();
547 	else if (cnt)
548 		ftrace_set_filter_ips(&fprobe_graph_ops.ops, ips, cnt, 1, 0);
549 }
550 #endif
551 #endif /* !CONFIG_DYNAMIC_FTRACE_WITH_ARGS && !CONFIG_DYNAMIC_FTRACE_WITH_REGS */
552 
553 /* fgraph_ops callback, this processes fprobes which have exit_handler. */
fprobe_fgraph_entry(struct ftrace_graph_ent * trace,struct fgraph_ops * gops,struct ftrace_regs * fregs)554 static int fprobe_fgraph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
555 			       struct ftrace_regs *fregs)
556 {
557 	unsigned long *fgraph_data = NULL;
558 	unsigned long func = trace->func;
559 	struct fprobe_hlist_node *node;
560 	struct rhlist_head *head, *pos;
561 	unsigned long ret_ip;
562 	int reserved_words;
563 	struct fprobe *fp;
564 	int used, ret;
565 
566 	if (WARN_ON_ONCE(!fregs))
567 		return 0;
568 
569 	guard(rcu)();
570 	head = rhltable_lookup(&fprobe_ip_table, &func, fprobe_rht_params);
571 	reserved_words = 0;
572 	rhl_for_each_entry_rcu(node, pos, head, hlist) {
573 		if (node->addr != func)
574 			continue;
575 		fp = READ_ONCE(node->fp);
576 		if (!fp || !fp->exit_handler)
577 			continue;
578 		/*
579 		 * Since fprobe can be enabled until the next loop, we ignore the
580 		 * fprobe's disabled flag in this loop.
581 		 */
582 		reserved_words +=
583 			FPROBE_HEADER_SIZE_IN_LONG + SIZE_IN_LONG(fp->entry_data_size);
584 	}
585 	if (reserved_words) {
586 		fgraph_data = fgraph_reserve_data(gops->idx, reserved_words * sizeof(long));
587 		if (unlikely(!fgraph_data)) {
588 			rhl_for_each_entry_rcu(node, pos, head, hlist) {
589 				if (node->addr != func)
590 					continue;
591 				fp = READ_ONCE(node->fp);
592 				if (fp && !fprobe_disabled(fp) && !fprobe_is_ftrace(fp))
593 					fp->nmissed++;
594 			}
595 			return 0;
596 		}
597 	}
598 
599 	/*
600 	 * TODO: recursion detection has been done in the fgraph. Thus we need
601 	 * to add a callback to increment missed counter.
602 	 */
603 	ret_ip = ftrace_regs_get_return_address(fregs);
604 	used = 0;
605 	rhl_for_each_entry_rcu(node, pos, head, hlist) {
606 		int data_size;
607 		void *data;
608 
609 		if (node->addr != func)
610 			continue;
611 		fp = READ_ONCE(node->fp);
612 		if (unlikely(!fp || fprobe_disabled(fp) || fprobe_is_ftrace(fp)))
613 			continue;
614 
615 		data_size = fp->entry_data_size;
616 		/*
617 		 * The list may have grown since it was sized, so this node
618 		 * may not fit. Skip it as missed rather than overrun the
619 		 * reservation.
620 		 */
621 		if (fp->exit_handler &&
622 		    used + FPROBE_HEADER_SIZE_IN_LONG + SIZE_IN_LONG(data_size) > reserved_words) {
623 			fp->nmissed++;
624 			continue;
625 		}
626 		if (data_size && fp->exit_handler)
627 			data = fgraph_data + used + FPROBE_HEADER_SIZE_IN_LONG;
628 		else
629 			data = NULL;
630 
631 		if (fprobe_shared_with_kprobes(fp))
632 			ret = __fprobe_kprobe_handler(func, ret_ip, fp, fregs, data);
633 		else
634 			ret = __fprobe_handler(func, ret_ip, fp, fregs, data);
635 
636 		/* If entry_handler returns !0, nmissed is not counted but skips exit_handler. */
637 		if (!ret && fp->exit_handler) {
638 			int size_words = SIZE_IN_LONG(data_size);
639 
640 			if (write_fprobe_header(&fgraph_data[used], fp, size_words))
641 				used += FPROBE_HEADER_SIZE_IN_LONG + size_words;
642 		}
643 	}
644 
645 	/* If any exit_handler is set, data must be used. */
646 	return used != 0;
647 }
648 NOKPROBE_SYMBOL(fprobe_fgraph_entry);
649 
fprobe_return(struct ftrace_graph_ret * trace,struct fgraph_ops * gops,struct ftrace_regs * fregs)650 static void fprobe_return(struct ftrace_graph_ret *trace,
651 			  struct fgraph_ops *gops,
652 			  struct ftrace_regs *fregs)
653 {
654 	unsigned long *fgraph_data = NULL;
655 	unsigned long ret_ip;
656 	struct fprobe *fp;
657 	int size, curr;
658 	int size_words;
659 
660 	fgraph_data = (unsigned long *)fgraph_retrieve_data(gops->idx, &size);
661 	if (WARN_ON_ONCE(!fgraph_data))
662 		return;
663 	size_words = SIZE_IN_LONG(size);
664 	ret_ip = ftrace_regs_get_instruction_pointer(fregs);
665 
666 	preempt_disable_notrace();
667 
668 	curr = 0;
669 	while (size_words > curr) {
670 		read_fprobe_header(&fgraph_data[curr], &fp, &size);
671 		if (!fp)
672 			break;
673 		curr += FPROBE_HEADER_SIZE_IN_LONG;
674 		if (fprobe_registered(fp) && !fprobe_disabled(fp)) {
675 			if (WARN_ON_ONCE(curr + size > size_words))
676 				break;
677 			fp->exit_handler(fp, trace->func, ret_ip, fregs,
678 					 size ? fgraph_data + curr : NULL);
679 		}
680 		curr += size;
681 	}
682 	preempt_enable_notrace();
683 }
684 NOKPROBE_SYMBOL(fprobe_return);
685 
686 #ifdef CONFIG_MODULES
687 
688 #define FPROBE_IPS_BATCH_INIT 128
689 /* instruction pointer address list */
690 struct fprobe_addr_list {
691 	int index;
692 	int size;
693 	unsigned long *addrs;
694 };
695 
fprobe_remove_node_in_module(struct module * mod,struct fprobe_hlist_node * node,struct fprobe_addr_list * alist)696 static int fprobe_remove_node_in_module(struct module *mod, struct fprobe_hlist_node *node,
697 					 struct fprobe_addr_list *alist)
698 {
699 	lockdep_assert_in_rcu_read_lock();
700 
701 	if (!within_module(node->addr, mod))
702 		return 0;
703 
704 	delete_fprobe_node(node);
705 	/* If no address list is available, we can't track this address. */
706 	if (!alist->addrs)
707 		return 0;
708 	/*
709 	 * Don't care the type here, because all fprobes on the same
710 	 * address must be removed eventually.
711 	 */
712 	if (!rhltable_lookup(&fprobe_ip_table, &node->addr, fprobe_rht_params)) {
713 		alist->addrs[alist->index++] = node->addr;
714 		if (alist->index == alist->size)
715 			return -ENOSPC;
716 	}
717 
718 	return 0;
719 }
720 
721 /* Handle module unloading to manage fprobe_ip_table. */
fprobe_module_callback(struct notifier_block * nb,unsigned long val,void * data)722 static int fprobe_module_callback(struct notifier_block *nb,
723 				  unsigned long val, void *data)
724 {
725 	struct fprobe_addr_list alist = {.size = FPROBE_IPS_BATCH_INIT};
726 	struct fprobe_hlist_node *node;
727 	struct rhashtable_iter iter;
728 	struct module *mod = data;
729 	bool retry;
730 
731 	if (val != MODULE_STATE_GOING)
732 		return NOTIFY_DONE;
733 
734 	alist.addrs = kcalloc(alist.size, sizeof(*alist.addrs), GFP_KERNEL);
735 	/*
736 	 * If failed to alloc memory, ftrace_ops will not be able to remove ips from
737 	 * hash, but we can still remove nodes from fprobe_ip_table, so we can avoid
738 	 * the potential wrong callback. So just print a warning here and try to
739 	 * continue without address list.
740 	 */
741 	WARN_ONCE(!alist.addrs,
742 		"Failed to allocate memory for fprobe_addr_list, ftrace_ops will not be updated");
743 
744 	mutex_lock(&fprobe_mutex);
745 again:
746 	retry = false;
747 	alist.index = 0;
748 	rhltable_walk_enter(&fprobe_ip_table, &iter);
749 	do {
750 		rhashtable_walk_start(&iter);
751 
752 		while ((node = rhashtable_walk_next(&iter)) && !IS_ERR(node))
753 			if (fprobe_remove_node_in_module(mod, node, &alist) < 0) {
754 				retry = true;
755 				break;
756 			}
757 
758 		rhashtable_walk_stop(&iter);
759 	} while (node == ERR_PTR(-EAGAIN) && !retry);
760 	rhashtable_walk_exit(&iter);
761 	/* Remove any ips from hash table(s) */
762 	fprobe_remove_ips(alist.addrs, alist.index);
763 	/*
764 	 * If we break rhashtable walk loop except for -EAGAIN, we need
765 	 * to restart looping from start for safety. Anyway, this is
766 	 * not a hotpath.
767 	 */
768 	if (retry)
769 		goto again;
770 
771 	mutex_unlock(&fprobe_mutex);
772 
773 	kfree(alist.addrs);
774 
775 	return NOTIFY_DONE;
776 }
777 
778 static struct notifier_block fprobe_module_nb = {
779 	.notifier_call = fprobe_module_callback,
780 	.priority = 0,
781 };
782 
init_fprobe_module(void)783 static int __init init_fprobe_module(void)
784 {
785 	return register_module_notifier(&fprobe_module_nb);
786 }
787 early_initcall(init_fprobe_module);
788 #endif
789 
symbols_cmp(const void * a,const void * b)790 static int symbols_cmp(const void *a, const void *b)
791 {
792 	const char **str_a = (const char **) a;
793 	const char **str_b = (const char **) b;
794 
795 	return strcmp(*str_a, *str_b);
796 }
797 
798 /* Convert ftrace location address from symbols */
get_ftrace_locations(const char ** syms,int num)799 static unsigned long *get_ftrace_locations(const char **syms, int num)
800 {
801 	unsigned long *addrs;
802 
803 	/* Convert symbols to symbol address */
804 	addrs = kcalloc(num, sizeof(*addrs), GFP_KERNEL);
805 	if (!addrs)
806 		return ERR_PTR(-ENOMEM);
807 
808 	/* ftrace_lookup_symbols expects sorted symbols */
809 	sort(syms, num, sizeof(*syms), symbols_cmp, NULL);
810 
811 	if (!ftrace_lookup_symbols(syms, num, addrs))
812 		return addrs;
813 
814 	kfree(addrs);
815 	return ERR_PTR(-ENOENT);
816 }
817 
818 struct filter_match_data {
819 	const char *filter;
820 	const char *notfilter;
821 	size_t index;
822 	size_t size;
823 	unsigned long *addrs;
824 	struct module **mods;
825 };
826 
filter_match_callback(void * data,const char * name,unsigned long addr)827 static int filter_match_callback(void *data, const char *name, unsigned long addr)
828 {
829 	struct filter_match_data *match = data;
830 
831 	if (!glob_match(match->filter, name) ||
832 	    (match->notfilter && glob_match(match->notfilter, name)))
833 		return 0;
834 
835 	if (!ftrace_location(addr))
836 		return 0;
837 
838 	if (match->addrs) {
839 		struct module *mod = __module_text_address(addr);
840 
841 		if (mod && !try_module_get(mod))
842 			return 0;
843 
844 		match->mods[match->index] = mod;
845 		match->addrs[match->index] = addr;
846 	}
847 	match->index++;
848 	return match->index == match->size;
849 }
850 
851 /*
852  * Make IP list from the filter/no-filter glob patterns.
853  * Return the number of matched symbols, or errno.
854  * If @addrs == NULL, this just counts the number of matched symbols. If @addrs
855  * is passed with an array, we need to pass the an @mods array of the same size
856  * to increment the module refcount for each symbol.
857  * This means we also need to call `module_put` for each element of @mods after
858  * using the @addrs.
859  */
get_ips_from_filter(const char * filter,const char * notfilter,unsigned long * addrs,struct module ** mods,size_t size)860 static int get_ips_from_filter(const char *filter, const char *notfilter,
861 			       unsigned long *addrs, struct module **mods,
862 			       size_t size)
863 {
864 	struct filter_match_data match = { .filter = filter, .notfilter = notfilter,
865 		.index = 0, .size = size, .addrs = addrs, .mods = mods};
866 	int ret;
867 
868 	if (addrs && !mods)
869 		return -EINVAL;
870 
871 	ret = kallsyms_on_each_symbol(filter_match_callback, &match);
872 	if (ret < 0)
873 		return ret;
874 	if (IS_ENABLED(CONFIG_MODULES)) {
875 		ret = module_kallsyms_on_each_symbol(NULL, filter_match_callback, &match);
876 		if (ret < 0)
877 			return ret;
878 	}
879 
880 	return match.index ?: -ENOENT;
881 }
882 
fprobe_fail_cleanup(struct fprobe * fp)883 static void fprobe_fail_cleanup(struct fprobe *fp)
884 {
885 	kfree(fp->hlist_array);
886 	fp->hlist_array = NULL;
887 }
888 
889 /* Initialize the fprobe data structure. */
fprobe_init(struct fprobe * fp,unsigned long * addrs,int num)890 static int fprobe_init(struct fprobe *fp, unsigned long *addrs, int num)
891 {
892 	struct fprobe_hlist *hlist_array;
893 	unsigned long addr;
894 	int size, i;
895 
896 	if (!fp || !addrs || num <= 0)
897 		return -EINVAL;
898 
899 	size = ALIGN(fp->entry_data_size, sizeof(long));
900 	if (size > MAX_FPROBE_DATA_SIZE)
901 		return -E2BIG;
902 	fp->entry_data_size = size;
903 
904 	hlist_array = kzalloc_flex(*hlist_array, array, num);
905 	if (!hlist_array)
906 		return -ENOMEM;
907 
908 	fp->nmissed = 0;
909 
910 	hlist_array->size = num;
911 	fp->hlist_array = hlist_array;
912 	hlist_array->fp = fp;
913 	for (i = 0; i < num; i++) {
914 		addr = ftrace_location(addrs[i]);
915 		if (!addr) {
916 			fprobe_fail_cleanup(fp);
917 			return -ENOENT;
918 		}
919 		hlist_array->array[i].addr = addr;
920 	}
921 	return 0;
922 }
923 
924 #define FPROBE_IPS_MAX	INT_MAX
925 
fprobe_count_ips_from_filter(const char * filter,const char * notfilter)926 int fprobe_count_ips_from_filter(const char *filter, const char *notfilter)
927 {
928 	return get_ips_from_filter(filter, notfilter, NULL, NULL, FPROBE_IPS_MAX);
929 }
930 
931 /**
932  * register_fprobe() - Register fprobe to ftrace by pattern.
933  * @fp: A fprobe data structure to be registered.
934  * @filter: A wildcard pattern of probed symbols.
935  * @notfilter: A wildcard pattern of NOT probed symbols.
936  *
937  * Register @fp to ftrace for enabling the probe on the symbols matched to @filter.
938  * If @notfilter is not NULL, the symbols matched the @notfilter are not probed.
939  *
940  * Return 0 if @fp is registered successfully, -errno if not.
941  */
register_fprobe(struct fprobe * fp,const char * filter,const char * notfilter)942 int register_fprobe(struct fprobe *fp, const char *filter, const char *notfilter)
943 {
944 	unsigned long *addrs __free(kfree) = NULL;
945 	struct module **mods __free(kfree) = NULL;
946 	int ret, num;
947 
948 	if (!fp || !filter)
949 		return -EINVAL;
950 
951 	num = get_ips_from_filter(filter, notfilter, NULL, NULL, FPROBE_IPS_MAX);
952 	if (num < 0)
953 		return num;
954 
955 	addrs = kcalloc(num, sizeof(*addrs), GFP_KERNEL);
956 	if (!addrs)
957 		return -ENOMEM;
958 
959 	mods = kzalloc_objs(*mods, num);
960 	if (!mods)
961 		return -ENOMEM;
962 
963 	ret = get_ips_from_filter(filter, notfilter, addrs, mods, num);
964 	if (ret < 0)
965 		return ret;
966 
967 	ret = register_fprobe_ips(fp, addrs, ret);
968 
969 	for (int i = 0; i < num; i++) {
970 		if (mods[i])
971 			module_put(mods[i]);
972 	}
973 	return ret;
974 }
975 EXPORT_SYMBOL_GPL(register_fprobe);
976 
977 static int unregister_fprobe_nolock(struct fprobe *fp);
978 
979 /**
980  * register_fprobe_ips() - Register fprobe to ftrace by address.
981  * @fp: A fprobe data structure to be registered.
982  * @addrs: An array of target function address.
983  * @num: The number of entries of @addrs.
984  *
985  * Register @fp to ftrace for enabling the probe on the address given by @addrs.
986  * The @addrs must be the addresses of ftrace location address, which may be
987  * the symbol address + arch-dependent offset.
988  * If you unsure what this mean, please use other registration functions.
989  *
990  * Return 0 if @fp is registered successfully, -errno if not.
991  */
register_fprobe_ips(struct fprobe * fp,unsigned long * addrs,int num)992 int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num)
993 {
994 	struct fprobe_hlist *hlist_array;
995 	int ret, i;
996 
997 	guard(mutex)(&fprobe_mutex);
998 	if (fprobe_registered(fp))
999 		return -EEXIST;
1000 
1001 	ret = fprobe_init(fp, addrs, num);
1002 	if (ret)
1003 		return ret;
1004 
1005 	if (fprobe_is_ftrace(fp))
1006 		ret = fprobe_ftrace_add_ips(addrs, num);
1007 	else
1008 		ret = fprobe_graph_add_ips(addrs, num);
1009 	if (ret) {
1010 		fprobe_fail_cleanup(fp);
1011 		return ret;
1012 	}
1013 
1014 	hlist_array = fp->hlist_array;
1015 	ret = add_fprobe_hash(fp);
1016 	for (i = 0; i < hlist_array->size && !ret; i++)
1017 		ret = insert_fprobe_node(&hlist_array->array[i], fp);
1018 
1019 	if (ret) {
1020 		unregister_fprobe_nolock(fp);
1021 		/* In error case, wait for clean up safely. */
1022 		synchronize_rcu();
1023 	}
1024 
1025 	return ret;
1026 }
1027 EXPORT_SYMBOL_GPL(register_fprobe_ips);
1028 
1029 /**
1030  * register_fprobe_syms() - Register fprobe to ftrace by symbols.
1031  * @fp: A fprobe data structure to be registered.
1032  * @syms: An array of target symbols.
1033  * @num: The number of entries of @syms.
1034  *
1035  * Register @fp to the symbols given by @syms array. This will be useful if
1036  * you are sure the symbols exist in the kernel.
1037  *
1038  * Return 0 if @fp is registered successfully, -errno if not.
1039  */
register_fprobe_syms(struct fprobe * fp,const char ** syms,int num)1040 int register_fprobe_syms(struct fprobe *fp, const char **syms, int num)
1041 {
1042 	unsigned long *addrs;
1043 	int ret;
1044 
1045 	if (!fp || !syms || num <= 0)
1046 		return -EINVAL;
1047 
1048 	addrs = get_ftrace_locations(syms, num);
1049 	if (IS_ERR(addrs))
1050 		return PTR_ERR(addrs);
1051 
1052 	ret = register_fprobe_ips(fp, addrs, num);
1053 
1054 	kfree(addrs);
1055 
1056 	return ret;
1057 }
1058 EXPORT_SYMBOL_GPL(register_fprobe_syms);
1059 
fprobe_is_registered(struct fprobe * fp)1060 bool fprobe_is_registered(struct fprobe *fp)
1061 {
1062 	if (!fp || !fp->hlist_array)
1063 		return false;
1064 	return true;
1065 }
1066 
unregister_fprobe_nolock(struct fprobe * fp)1067 static int unregister_fprobe_nolock(struct fprobe *fp)
1068 {
1069 	struct fprobe_hlist *hlist_array = fp->hlist_array;
1070 	unsigned long *addrs = NULL;
1071 	int i, count;
1072 
1073 	addrs = kcalloc(hlist_array->size, sizeof(unsigned long), GFP_KERNEL);
1074 	/*
1075 	 * This will remove fprobe_hash_node from the hash table even if
1076 	 * memory allocation fails. However, ftrace_ops will not be updated.
1077 	 * Anyway, when the last fprobe is unregistered, ftrace_ops is also
1078 	 * unregistered.
1079 	 */
1080 	if (!addrs)
1081 		pr_warn("Failed to allocate working array. ftrace_ops may not sync.\n");
1082 
1083 	/* Remove non-synonim ips from table and hash */
1084 	count = 0;
1085 	for (i = 0; i < hlist_array->size; i++) {
1086 		delete_fprobe_node(&hlist_array->array[i]);
1087 		if (addrs && !fprobe_exists_on_hash(hlist_array->array[i].addr,
1088 						    fprobe_is_ftrace(fp)))
1089 			addrs[count++] = hlist_array->array[i].addr;
1090 	}
1091 	del_fprobe_hash(fp);
1092 
1093 	if (fprobe_is_ftrace(fp))
1094 		fprobe_ftrace_remove_ips(addrs, count);
1095 	else
1096 		fprobe_graph_remove_ips(addrs, count);
1097 
1098 	kfree_rcu(hlist_array, rcu);
1099 	fp->hlist_array = NULL;
1100 	kfree(addrs);
1101 
1102 	return 0;
1103 }
1104 
1105 /**
1106  * unregister_fprobe_async() - Unregister fprobe without RCU GP wait
1107  * @fp: A fprobe data structure to be unregistered.
1108  *
1109  * Unregister fprobe (and remove ftrace hooks from the function entries).
1110  * This function will NOT wait until the fprobe is no longer used.
1111  *
1112  * Return 0 if @fp is unregistered successfully, -errno if not.
1113  */
unregister_fprobe_async(struct fprobe * fp)1114 int unregister_fprobe_async(struct fprobe *fp)
1115 {
1116 	guard(mutex)(&fprobe_mutex);
1117 	if (!fp || !fprobe_registered(fp))
1118 		return -EINVAL;
1119 
1120 	return unregister_fprobe_nolock(fp);
1121 }
1122 
1123 /**
1124  * unregister_fprobe() - Unregister fprobe with RCU GP wait
1125  * @fp: A fprobe data structure to be unregistered.
1126  *
1127  * Unregister fprobe (and remove ftrace hooks from the function entries).
1128  * This function will block until the fprobe is no longer used.
1129  *
1130  * Return 0 if @fp is unregistered successfully, -errno if not.
1131  */
unregister_fprobe(struct fprobe * fp)1132 int unregister_fprobe(struct fprobe *fp)
1133 {
1134 	int ret = unregister_fprobe_async(fp);
1135 
1136 	if (!ret)
1137 		synchronize_rcu();
1138 	return ret;
1139 }
1140 EXPORT_SYMBOL_GPL(unregister_fprobe);
1141 
fprobe_initcall(void)1142 static int __init fprobe_initcall(void)
1143 {
1144 	rhltable_init(&fprobe_ip_table, &fprobe_rht_params);
1145 	return 0;
1146 }
1147 core_initcall(fprobe_initcall);
1148